diff --git a/Cargo.lock b/Cargo.lock index 41c449b8a1..f5c07cbf55 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4548,6 +4548,7 @@ dependencies = [ "enigo", "env_logger", "fantoccini", + "flate2", "fs2", "futures", "futures-util", diff --git a/Cargo.toml b/Cargo.toml index f7fc42c18b..4635060406 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -55,6 +55,10 @@ hmac = "0.12" tar = "0.4" xz2 = { version = "0.1", features = ["static"] } zip = { version = "2", default-features = false, features = ["deflate"] } +# gzip decoder for the Piper tar.gz binary releases on macOS / Linux. Already +# pulled in transitively by zip's `deflate` feature; declared directly so +# the installer module can `use flate2::read::GzDecoder`. +flate2 = "1" # Real timeout for `node --version` probes in the runtime resolver. Guards # against a broken shim on PATH hanging the bootstrap forever. wait-timeout = "0.2" diff --git a/app/src-tauri/Cargo.lock b/app/src-tauri/Cargo.lock index cef4d8236b..e8f0c16112 100644 --- a/app/src-tauri/Cargo.lock +++ b/app/src-tauri/Cargo.lock @@ -4612,6 +4612,7 @@ dependencies = [ "dotenvy", "enigo", "env_logger", + "flate2", "fs2", "futures", "futures-util", diff --git a/app/src/chat/chatSendError.ts b/app/src/chat/chatSendError.ts index e74b1d73de..1c1b5bec8f 100644 --- a/app/src/chat/chatSendError.ts +++ b/app/src/chat/chatSendError.ts @@ -6,6 +6,8 @@ export type ChatSendErrorCode = | 'cloud_send_failed' | 'voice_transcription' | 'stt_not_ready' + | 'voice_synthesis' + | 'tts_not_ready' | 'microphone_unavailable' | 'microphone_recording' | 'microphone_access' diff --git a/app/src/components/settings/hooks/useSettingsNavigation.ts b/app/src/components/settings/hooks/useSettingsNavigation.ts index ab565fc424..7fad5fe64d 100644 --- a/app/src/components/settings/hooks/useSettingsNavigation.ts +++ b/app/src/components/settings/hooks/useSettingsNavigation.ts @@ -191,13 +191,13 @@ export const useSettingsNavigation = (): SettingsNavigationHook => { // Leaf panels under features case 'screen-intelligence': case 'autocomplete': - case 'voice': case 'messaging': case 'tools': return [settingsCrumb, featuresCrumb]; // Leaf panels under AI & Models case 'local-model': + case 'voice': return [settingsCrumb, aiModelsCrumb]; // Team sub-pages diff --git a/app/src/components/settings/panels/VoicePanel.tsx b/app/src/components/settings/panels/VoicePanel.tsx index 1d4023be55..cc2dbd106e 100644 --- a/app/src/components/settings/panels/VoicePanel.tsx +++ b/app/src/components/settings/panels/VoicePanel.tsx @@ -1,5 +1,12 @@ import { useEffect, useRef, useState } from 'react'; +import { + installPiper, + installWhisper, + piperInstallStatus, + type VoiceInstallStatus, + whisperInstallStatus, +} from '../../../services/api/voiceInstallApi'; import { openhumanGetVoiceServerSettings, openhumanLocalAiAssetsStatus, @@ -7,7 +14,9 @@ import { openhumanVoiceServerStart, openhumanVoiceServerStatus, openhumanVoiceServerStop, + openhumanVoiceSetProviders, openhumanVoiceStatus, + type VoiceProvidersSnapshot, type VoiceServerSettings, type VoiceServerStatus, type VoiceStatus, @@ -15,13 +24,41 @@ import { import SettingsHeader from '../components/SettingsHeader'; import { useSettingsNavigation } from '../hooks/useSettingsNavigation'; +// Curated Piper voice presets — a handful of well-known English voices +// covering male/female and US/GB accents at the recommended `medium` +// quality tier. The full catalogue at +// huggingface.co/rhasspy/piper-voices has 100+ voices; a dropdown of +// every option is unusable so we ship a starter set and keep the free- +// text input as an escape hatch via the "Other…" option. +const PIPER_VOICE_PRESETS: ReadonlyArray<{ id: string; label: string }> = [ + { id: 'en_US-lessac-medium', label: 'US · Lessac (neutral, recommended)' }, + { id: 'en_US-lessac-high', label: 'US · Lessac (higher quality, larger)' }, + { id: 'en_US-ryan-medium', label: 'US · Ryan (male)' }, + { id: 'en_US-amy-medium', label: 'US · Amy (female)' }, + { id: 'en_US-libritts-high', label: 'US · LibriTTS (multi-speaker)' }, + { id: 'en_GB-alan-medium', label: 'GB · Alan (male)' }, + { id: 'en_GB-jenny_dioco-medium', label: 'GB · Jenny Dioco (female)' }, + { id: 'en_GB-northern_english_male-medium', label: 'GB · Northern English (male)' }, +]; + const VoicePanel = () => { const { navigateBack, navigateToSettings, breadcrumbs } = useSettingsNavigation(); const [settings, setSettings] = useState(null); const [savedSettings, setSavedSettings] = useState(null); const [serverStatus, setServerStatus] = useState(null); - const [, setVoiceStatus] = useState(null); + const [voiceStatus, setVoiceStatus] = useState(null); const [sttReady, setSttReady] = useState(false); + // Local provider selectors — initialised from voice_status, persisted via + // openhumanVoiceSetProviders on change. Empty string until first load. + const [sttProvider, setSttProvider] = useState<'cloud' | 'whisper' | ''>(''); + const [ttsProvider, setTtsProvider] = useState<'cloud' | 'piper' | ''>(''); + const [sttModel, setSttModel] = useState(''); + const [ttsVoice, setTtsVoice] = useState(''); + const [isSavingProviders, setIsSavingProviders] = useState(false); + const [whisperInstall, setWhisperInstall] = useState(null); + const [piperInstall, setPiperInstall] = useState(null); + const [isInstallingWhisper, setIsInstallingWhisper] = useState(false); + const [isInstallingPiper, setIsInstallingPiper] = useState(false); const [, setIsLoading] = useState(true); const [isSaving, setIsSaving] = useState(false); const [isStarting, setIsStarting] = useState(false); @@ -47,12 +84,36 @@ const VoicePanel = () => { const loadData = async (forceSettings = false) => { try { - const [settingsResponse, serverResponse, voiceResponse, assetsResponse] = await Promise.all([ + const [ + settingsResponse, + serverResponse, + voiceResponse, + assetsResponse, + whisperStatusResponse, + piperStatusResponse, + ] = await Promise.all([ openhumanGetVoiceServerSettings(), openhumanVoiceServerStatus(), openhumanVoiceStatus(), openhumanLocalAiAssetsStatus(), + whisperInstallStatus().catch(err => { + // Status polls happen on a 2s loop; a single transient error + // shouldn't blow up the entire settings panel. Log + keep the + // previous snapshot. + if (process.env.NODE_ENV !== 'production') { + console.debug('[voice-install:whisper] status poll failed', err); + } + return null; + }), + piperInstallStatus().catch(err => { + if (process.env.NODE_ENV !== 'production') { + console.debug('[voice-install:piper] status poll failed', err); + } + return null; + }), ]); + if (whisperStatusResponse) setWhisperInstall(whisperStatusResponse); + if (piperStatusResponse) setPiperInstall(piperStatusResponse); const currentSettings = settingsRef.current; const currentSavedSettings = savedSettingsRef.current; if ( @@ -65,6 +126,25 @@ const VoicePanel = () => { setSavedSettings(settingsResponse.result); setServerStatus(serverResponse); setVoiceStatus(voiceResponse); + // Seed provider dropdowns from core state on first load. Use the + // functional updater form so the check reads *current* state rather + // than the stale closure captured when the interval was created — + // otherwise every poll tick could re-apply the server value and + // clobber an in-flight user edit. + if (voiceResponse.stt_provider) { + const seeded = voiceResponse.stt_provider === 'whisper' ? 'whisper' : 'cloud'; + setSttProvider(prev => prev || seeded); + } + if (voiceResponse.tts_provider) { + const seeded = voiceResponse.tts_provider === 'piper' ? 'piper' : 'cloud'; + setTtsProvider(prev => prev || seeded); + } + if (voiceResponse.stt_model_id) { + setSttModel(prev => prev || voiceResponse.stt_model_id); + } + if (voiceResponse.tts_voice_id) { + setTtsVoice(prev => prev || voiceResponse.tts_voice_id); + } const sttAssetState = assetsResponse.result.stt?.state; const sttAssetOk = sttAssetState === 'ready' || sttAssetState === 'ondemand'; if (process.env.NODE_ENV !== 'production') { @@ -187,16 +267,351 @@ const VoicePanel = () => { const disabled = !sttReady; const isRunning = serverStatus != null && serverStatus.state !== 'stopped'; + const persistProviders = async ( + update: Partial & { + stt_provider?: 'cloud' | 'whisper'; + tts_provider?: 'cloud' | 'piper'; + stt_model?: string; + tts_voice?: string; + } + ) => { + setIsSavingProviders(true); + setError(null); + try { + const snapshot = await openhumanVoiceSetProviders({ + stt_provider: update.stt_provider, + tts_provider: update.tts_provider, + stt_model: update.stt_model, + tts_voice: update.tts_voice, + }); + if (process.env.NODE_ENV !== 'production') { + console.debug('[VoicePanel:providers] saved', snapshot); + } + setNotice('Voice providers saved.'); + // Force a reload so the rest of the panel reflects the new state. + await loadData(true); + } catch (err) { + const message = err instanceof Error ? err.message : 'Failed to save voice providers'; + setError(message); + } finally { + setIsSavingProviders(false); + } + }; + + const onSttProviderChange = (next: 'cloud' | 'whisper') => { + setSttProvider(next); + void persistProviders({ stt_provider: next }); + }; + const onTtsProviderChange = (next: 'cloud' | 'piper') => { + setTtsProvider(next); + void persistProviders({ tts_provider: next }); + }; + + /** + * Map an install status snapshot to a button label. Single source of + * truth for the four states the UI surfaces: Not installed / Install / + * Installing N% / Reinstall. + */ + const installButtonLabel = ( + status: VoiceInstallStatus | null, + busy: boolean, + _engine: 'Whisper' | 'Piper' + ): string => { + // Render based on the remote status — the install RPC is fire-and-forget, + // so the local `busy` flag only covers the brief moment between click and + // the RPC return. The real "is install running?" signal comes from the + // polled status table, which lags behind by at most one 2s tick. + if (status?.state === 'installing') { + const pct = typeof status.progress === 'number' ? `${status.progress}%` : '…'; + return `Installing ${pct}`; + } + if (busy) return 'Installing…'; + if (status?.state === 'installed') return 'Reinstall locally'; + if (status?.state === 'broken') return 'Repair'; + if (status?.state === 'error') return 'Retry locally'; + return 'Install locally'; + }; + + const handleInstallWhisper = async () => { + setIsInstallingWhisper(true); + setError(null); + setNotice(null); + try { + const force = whisperInstall?.state === 'installed'; + console.debug('[voice-install:whisper] install click force=%s', force); + const result = await installWhisper({ modelSize: sttModel || undefined, force }); + setWhisperInstall(result); + setNotice( + result.state === 'installed' + ? 'Whisper is ready.' + : `Whisper install started (${result.stage ?? 'queued'})` + ); + } catch (err) { + const message = err instanceof Error ? err.message : 'Failed to install Whisper'; + setError(message); + } finally { + setIsInstallingWhisper(false); + await loadData(false); + } + }; + + const handleInstallPiper = async () => { + setIsInstallingPiper(true); + setError(null); + setNotice(null); + try { + const force = piperInstall?.state === 'installed'; + console.debug('[voice-install:piper] install click force=%s', force); + const result = await installPiper({ voiceId: ttsVoice || undefined, force }); + setPiperInstall(result); + setNotice( + result.state === 'installed' + ? 'Piper is ready.' + : `Piper install started (${result.stage ?? 'queued'})` + ); + } catch (err) { + const message = err instanceof Error ? err.message : 'Failed to install Piper'; + setError(message); + } finally { + setIsInstallingPiper(false); + await loadData(false); + } + }; + + const whisperReady = whisperInstall?.state === 'installed'; + const piperReady = piperInstall?.state === 'installed'; + return (
+
+
+
+

Voice Providers

+

+ Choose where transcription and synthesis run. Use the Install locally buttons to + download the binaries and models into your workspace — no manual{' '} + WHISPER_BIN or PIPER_BIN setup required. +

+
+
+ + {sttProvider === 'whisper' && ( + + )} + + {ttsProvider === 'piper' && ( + + )} +
+
+
+
@@ -322,16 +737,9 @@ const VoicePanel = () => { )} {disabled && ( -
-
- Voice dictation is disabled until the local STT model is downloaded and ready. -
- +
+ Voice dictation is disabled until the local STT model is downloaded. Use the{' '} + Voice Providers section above to install Whisper.
)} diff --git a/app/src/components/settings/panels/__tests__/VoicePanel.test.tsx b/app/src/components/settings/panels/__tests__/VoicePanel.test.tsx index ec83e1d2ef..3a5936bf5a 100644 --- a/app/src/components/settings/panels/__tests__/VoicePanel.test.tsx +++ b/app/src/components/settings/panels/__tests__/VoicePanel.test.tsx @@ -1,6 +1,13 @@ import { fireEvent, screen, waitFor } from '@testing-library/react'; import { beforeEach, describe, expect, it, vi } from 'vitest'; +import { + installPiper, + installWhisper, + piperInstallStatus, + type VoiceInstallStatus, + whisperInstallStatus, +} from '../../../../services/api/voiceInstallApi'; import { renderWithProviders } from '../../../../test/test-utils'; import { type CommandResponse, @@ -11,6 +18,7 @@ import { openhumanVoiceServerStart, openhumanVoiceServerStatus, openhumanVoiceServerStop, + openhumanVoiceSetProviders, openhumanVoiceStatus, type VoiceServerSettings, type VoiceServerStatus, @@ -25,16 +33,40 @@ vi.mock('../../../../utils/tauriCommands', () => ({ openhumanVoiceServerStart: vi.fn(), openhumanVoiceServerStatus: vi.fn(), openhumanVoiceServerStop: vi.fn(), + openhumanVoiceSetProviders: vi.fn(), openhumanVoiceStatus: vi.fn(), })); +vi.mock('../../../../services/api/voiceInstallApi', () => ({ + installWhisper: vi.fn(), + installPiper: vi.fn(), + whisperInstallStatus: vi.fn(), + piperInstallStatus: vi.fn(), +})); + type RuntimeHarness = { settings: VoiceServerSettings; serverStatus: VoiceServerStatus; voiceStatus: VoiceStatus; sttState: string; + whisperStatus: VoiceInstallStatus; + piperStatus: VoiceInstallStatus; }; +const makeInstallStatus = ( + engine: 'whisper' | 'piper', + overrides: Partial = {} +): VoiceInstallStatus => ({ + engine, + state: 'missing', + progress: null, + downloaded_bytes: null, + total_bytes: null, + stage: null, + error_detail: null, + ...overrides, +}); + const makeConfigSnapshot = (): CommandResponse => ({ result: { config: {}, @@ -78,8 +110,12 @@ describe('VoicePanel', () => { tts_voice_path: '/tmp/tts.onnx', whisper_in_process: true, llm_cleanup_enabled: true, + stt_provider: 'cloud', + tts_provider: 'cloud', }, sttState: 'ready', + whisperStatus: makeInstallStatus('whisper'), + piperStatus: makeInstallStatus('piper'), }; vi.mocked(openhumanGetVoiceServerSettings).mockImplementation(async () => ({ @@ -114,6 +150,40 @@ describe('VoicePanel', () => { runtime.serverStatus = { ...runtime.serverStatus, state: 'stopped' }; return { ...runtime.serverStatus }; }); + vi.mocked(openhumanVoiceSetProviders).mockImplementation(async update => { + if (update.stt_provider) runtime.voiceStatus.stt_provider = update.stt_provider; + if (update.tts_provider) runtime.voiceStatus.tts_provider = update.tts_provider; + if (update.stt_model) runtime.voiceStatus.stt_model_id = update.stt_model; + if (update.tts_voice) runtime.voiceStatus.tts_voice_id = update.tts_voice; + return { + stt_provider: runtime.voiceStatus.stt_provider, + tts_provider: runtime.voiceStatus.tts_provider, + stt_model_id: runtime.voiceStatus.stt_model_id, + tts_voice_id: runtime.voiceStatus.tts_voice_id, + }; + }); + + // Install-status polls return the current harness snapshot — tests + // mutate `runtime.whisperStatus` / `runtime.piperStatus` to simulate + // a real install cycle. + vi.mocked(whisperInstallStatus).mockImplementation(async () => ({ ...runtime.whisperStatus })); + vi.mocked(piperInstallStatus).mockImplementation(async () => ({ ...runtime.piperStatus })); + vi.mocked(installWhisper).mockImplementation(async () => { + runtime.whisperStatus = makeInstallStatus('whisper', { + state: 'installed', + progress: 100, + stage: 'install complete', + }); + return { ...runtime.whisperStatus }; + }); + vi.mocked(installPiper).mockImplementation(async () => { + runtime.piperStatus = makeInstallStatus('piper', { + state: 'installed', + progress: 100, + stage: 'install complete', + }); + return { ...runtime.piperStatus }; + }); }); it('disables the panel when STT assets are not ready', async () => { @@ -122,9 +192,8 @@ describe('VoicePanel', () => { renderWithProviders(, { initialEntries: ['/settings/voice'] }); - expect(await screen.findByText('Voice Dictation')).toBeInTheDocument(); expect( - screen.getByText(/Voice dictation is disabled until the local STT model is downloaded/) + await screen.findByText(/Voice dictation is disabled until the local STT model is downloaded/) ).toBeInTheDocument(); expect(screen.getByRole('button', { name: 'Start Voice Server' })).toBeDisabled(); }); @@ -193,4 +262,239 @@ describe('VoicePanel', () => { await screen.findByText('Voice server restarted with the new settings.') ).toBeInTheDocument(); }); + + it('renders the STT and TTS provider dropdowns with seeded values', async () => { + runtime.voiceStatus.stt_provider = 'whisper'; + runtime.voiceStatus.tts_provider = 'piper'; + + renderWithProviders(, { initialEntries: ['/settings/voice'] }); + + const sttSelect = (await screen.findByTestId('stt-provider-select')) as HTMLSelectElement; + const ttsSelect = (await screen.findByTestId('tts-provider-select')) as HTMLSelectElement; + // Initial load runs an extra interval tick; wait for the seeding effect. + await waitFor(() => expect(sttSelect.value).toBe('whisper')); + expect(ttsSelect.value).toBe('piper'); + // The Whisper model picker only appears when the STT provider is local. + expect(screen.getByTestId('stt-model-select')).toBeInTheDocument(); + // tts_voice_id is seeded to 'en_US-lessac-medium' which is a known preset, + // so the UI should render the preset select, not the free-text input. + expect(screen.getByTestId('tts-voice-select')).toBeInTheDocument(); + expect(screen.queryByTestId('tts-voice-input')).not.toBeInTheDocument(); + }); + + it('persists STT provider changes through openhumanVoiceSetProviders', async () => { + renderWithProviders(, { initialEntries: ['/settings/voice'] }); + + const sttSelect = (await screen.findByTestId('stt-provider-select')) as HTMLSelectElement; + fireEvent.change(sttSelect, { target: { value: 'whisper' } }); + + await waitFor(() => + expect(vi.mocked(openhumanVoiceSetProviders)).toHaveBeenCalledWith( + expect.objectContaining({ stt_provider: 'whisper' }) + ) + ); + // Saved notice should surface for the user. + expect(await screen.findByText(/Voice providers saved/i)).toBeInTheDocument(); + }); + + it('persists TTS provider changes through openhumanVoiceSetProviders', async () => { + renderWithProviders(, { initialEntries: ['/settings/voice'] }); + + const ttsSelect = (await screen.findByTestId('tts-provider-select')) as HTMLSelectElement; + fireEvent.change(ttsSelect, { target: { value: 'piper' } }); + + await waitFor(() => + expect(vi.mocked(openhumanVoiceSetProviders)).toHaveBeenCalledWith( + expect.objectContaining({ tts_provider: 'piper' }) + ) + ); + }); + + it('renders the Install Whisper button when the engine is missing', async () => { + runtime.whisperStatus = makeInstallStatus('whisper'); // explicit missing + renderWithProviders(, { initialEntries: ['/settings/voice'] }); + + const button = await screen.findByTestId('install-whisper-button'); + expect(button).toHaveTextContent('Install locally'); + expect(screen.getByTestId('whisper-install-state')).toHaveTextContent('Not installed'); + }); + + it('disables the Local Whisper STT option when the engine is missing', async () => { + runtime.whisperStatus = makeInstallStatus('whisper'); + renderWithProviders(, { initialEntries: ['/settings/voice'] }); + + const sttSelect = (await screen.findByTestId('stt-provider-select')) as HTMLSelectElement; + const whisperOption = sttSelect.querySelector( + 'option[value="whisper"]' + ) as HTMLOptionElement | null; + expect(whisperOption).not.toBeNull(); + expect(whisperOption!.disabled).toBe(true); + expect(whisperOption!.textContent).toMatch(/install required/i); + }); + + it('shows a Reinstall label once Whisper is installed', async () => { + runtime.whisperStatus = makeInstallStatus('whisper', { state: 'installed', progress: 100 }); + renderWithProviders(, { initialEntries: ['/settings/voice'] }); + + const button = await screen.findByTestId('install-whisper-button'); + await waitFor(() => expect(button).toHaveTextContent(/Reinstall locally/i)); + expect(screen.getByTestId('whisper-install-state')).toHaveTextContent('Installed'); + }); + + it('triggers installWhisper when the user clicks Install', async () => { + renderWithProviders(, { initialEntries: ['/settings/voice'] }); + + const button = await screen.findByTestId('install-whisper-button'); + fireEvent.click(button); + + await waitFor(() => expect(vi.mocked(installWhisper)).toHaveBeenCalledTimes(1)); + // First-time install must NOT force re-download. + expect(vi.mocked(installWhisper)).toHaveBeenCalledWith( + expect.objectContaining({ force: false }) + ); + }); + + it('forces re-download when Reinstall is clicked on an installed engine', async () => { + runtime.whisperStatus = makeInstallStatus('whisper', { state: 'installed', progress: 100 }); + renderWithProviders(, { initialEntries: ['/settings/voice'] }); + + const button = await screen.findByTestId('install-whisper-button'); + await waitFor(() => expect(button).toHaveTextContent(/Reinstall locally/i)); + fireEvent.click(button); + + await waitFor(() => expect(vi.mocked(installWhisper)).toHaveBeenCalledTimes(1)); + expect(vi.mocked(installWhisper)).toHaveBeenCalledWith( + expect.objectContaining({ force: true }) + ); + }); + + it('renders the Install Piper button when the engine is missing', async () => { + runtime.piperStatus = makeInstallStatus('piper'); + renderWithProviders(, { initialEntries: ['/settings/voice'] }); + + const button = await screen.findByTestId('install-piper-button'); + expect(button).toHaveTextContent('Install locally'); + expect(screen.getByTestId('piper-install-state')).toHaveTextContent('Not installed'); + }); + + it('disables the Local Piper TTS option when the engine is missing', async () => { + runtime.piperStatus = makeInstallStatus('piper'); + renderWithProviders(, { initialEntries: ['/settings/voice'] }); + + const ttsSelect = (await screen.findByTestId('tts-provider-select')) as HTMLSelectElement; + const piperOption = ttsSelect.querySelector( + 'option[value="piper"]' + ) as HTMLOptionElement | null; + expect(piperOption).not.toBeNull(); + expect(piperOption!.disabled).toBe(true); + expect(piperOption!.textContent).toMatch(/install required/i); + }); + + it('triggers installPiper when the user clicks Install', async () => { + renderWithProviders(, { initialEntries: ['/settings/voice'] }); + + const button = await screen.findByTestId('install-piper-button'); + fireEvent.click(button); + + await waitFor(() => expect(vi.mocked(installPiper)).toHaveBeenCalledTimes(1)); + expect(vi.mocked(installPiper)).toHaveBeenCalledWith(expect.objectContaining({ force: false })); + }); + + it('shows the in-flight installing label and percentage', async () => { + runtime.whisperStatus = makeInstallStatus('whisper', { + state: 'installing', + progress: 42, + stage: 'downloading model', + }); + renderWithProviders(, { initialEntries: ['/settings/voice'] }); + + const stateSpan = await screen.findByTestId('whisper-install-state'); + await waitFor(() => expect(stateSpan).toHaveTextContent(/downloading model/i)); + }); + + it('surfaces an error_detail in the install state line', async () => { + runtime.piperStatus = makeInstallStatus('piper', { + state: 'error', + error_detail: 'network unreachable', + }); + renderWithProviders(, { initialEntries: ['/settings/voice'] }); + + await waitFor(() => + expect(screen.getByTestId('piper-install-state')).toHaveTextContent('network unreachable') + ); + // Button label flips into the retry messaging. + expect(screen.getByTestId('install-piper-button')).toHaveTextContent(/Retry locally/i); + }); + + it('shows an error notice when installWhisper rejects', async () => { + // Freeze subsequent loadData calls so the error isn't cleared by the + // automatic reload that fires in the finally block. + vi.mocked(installWhisper).mockRejectedValueOnce(new Error('disk full')); + vi.mocked(openhumanGetVoiceServerSettings).mockImplementation( + () => new Promise(() => {}) // hang — prevents error being wiped by reload + ); + renderWithProviders(, { initialEntries: ['/settings/voice'] }); + + // Wait for the initial load to complete (which uses the pre-hang impl) + await screen.findByTestId('install-whisper-button'); + // Now freeze subsequent calls and click + const button = screen.getByTestId('install-whisper-button'); + fireEvent.click(button); + + await waitFor(() => expect(screen.queryByText('disk full')).toBeInTheDocument()); + }); + + it('shows an error notice when installPiper rejects', async () => { + vi.mocked(installPiper).mockRejectedValueOnce(new Error('no space left')); + vi.mocked(openhumanGetVoiceServerSettings).mockImplementation( + () => new Promise(() => {}) // hang — prevents error being wiped by reload + ); + renderWithProviders(, { initialEntries: ['/settings/voice'] }); + + await screen.findByTestId('install-piper-button'); + const button = screen.getByTestId('install-piper-button'); + fireEvent.click(button); + + await waitFor(() => expect(screen.queryByText('no space left')).toBeInTheDocument()); + }); + + it('shows an error when persistProviders fails', async () => { + vi.mocked(openhumanVoiceSetProviders).mockRejectedValueOnce(new Error('RPC timeout')); + renderWithProviders(, { initialEntries: ['/settings/voice'] }); + + const sttSelect = (await screen.findByTestId('stt-provider-select')) as HTMLSelectElement; + fireEvent.change(sttSelect, { target: { value: 'whisper' } }); + + await waitFor(() => expect(screen.getByText('RPC timeout')).toBeInTheDocument()); + }); + + it('shows a Piper installing label with percentage', async () => { + runtime.piperStatus = makeInstallStatus('piper', { + state: 'installing', + progress: 55, + stage: 'downloading voice', + }); + renderWithProviders(, { initialEntries: ['/settings/voice'] }); + + const stateSpan = await screen.findByTestId('piper-install-state'); + await waitFor(() => expect(stateSpan).toHaveTextContent(/downloading voice/i)); + }); + + it('renders a preset select and auto-installs when a Piper voice preset is changed', async () => { + runtime.voiceStatus.tts_provider = 'piper'; + runtime.voiceStatus.tts_voice_id = 'en_US-lessac-medium'; + renderWithProviders(, { initialEntries: ['/settings/voice'] }); + + const ttsSelect = (await screen.findByTestId('tts-provider-select')) as HTMLSelectElement; + await waitFor(() => expect(ttsSelect.value).toBe('piper')); + + const voiceSelect = (await screen.findByTestId('tts-voice-select')) as HTMLSelectElement; + fireEvent.change(voiceSelect, { target: { value: 'en_US-ryan-medium' } }); + + await waitFor(() => + expect(vi.mocked(openhumanVoiceSetProviders)).toHaveBeenCalledWith( + expect.objectContaining({ tts_voice: 'en_US-ryan-medium' }) + ) + ); + }); }); diff --git a/app/src/components/skills/VoiceSetupModal.tsx b/app/src/components/skills/VoiceSetupModal.tsx index 3d64b4d9a5..3925b3ef7b 100644 --- a/app/src/components/skills/VoiceSetupModal.tsx +++ b/app/src/components/skills/VoiceSetupModal.tsx @@ -56,7 +56,9 @@ export default function VoiceSetupModal({ onClose, skillStatus }: Props) { const handleGoToLocalModel = () => { onClose(); - navigate('/settings/local-model'); + // STT model install lives on the Voice settings panel (PR 2). The + // legacy `/settings/local-model` route handled Ollama assets only. + navigate('/settings/voice'); }; const handleGoToSettings = () => { diff --git a/app/src/features/human/MicCloudComposer.test.tsx b/app/src/features/human/MicComposer.test.tsx similarity index 84% rename from app/src/features/human/MicCloudComposer.test.tsx rename to app/src/features/human/MicComposer.test.tsx index 040d73fc4d..184a8b194d 100644 --- a/app/src/features/human/MicCloudComposer.test.tsx +++ b/app/src/features/human/MicComposer.test.tsx @@ -1,14 +1,14 @@ import { fireEvent, render, screen, waitFor } from '@testing-library/react'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; -import { MicCloudComposer } from './MicCloudComposer'; +import { MicComposer } from './MicComposer'; -// transcribeCloud + encodeBlobToWav are the network/heavy boundaries — mock -// them here so we can drive the state machine without touching real APIs. -const transcribeCloudMock = vi.fn(); +// transcribeWithFactory + encodeBlobToWav are the network/heavy boundaries — +// mock them here so we can drive the state machine without touching real APIs. +const transcribeWithFactoryMock = vi.fn(); const encodeBlobToWavMock = vi.fn(); vi.mock('./voice/sttClient', () => ({ - transcribeCloud: (...args: unknown[]) => transcribeCloudMock(...args), + transcribeWithFactory: (...args: unknown[]) => transcribeWithFactoryMock(...args), })); vi.mock('./voice/wavEncoder', () => ({ encodeBlobToWav: (...args: unknown[]) => encodeBlobToWavMock(...args), @@ -44,7 +44,7 @@ function makeFakeRecorder(mime: string): FakeRecorder { const fakeStream = { getTracks: () => [{ stop: vi.fn() }] } as unknown as MediaStream; -describe('MicCloudComposer', () => { +describe('MicComposer', () => { let recorder: FakeRecorder; let getUserMediaMock: ReturnType; // Snapshot the descriptor so afterEach can restore it — without this, the @@ -57,7 +57,7 @@ describe('MicCloudComposer', () => { globalThis.navigator, 'mediaDevices' ); - transcribeCloudMock.mockReset(); + transcribeWithFactoryMock.mockReset(); encodeBlobToWavMock.mockReset(); recorder = makeFakeRecorder('audio/webm;codecs=opus'); @@ -93,26 +93,26 @@ describe('MicCloudComposer', () => { }); it('renders the idle "Tap and speak" state', () => { - render(); + render(); expect(screen.getByText('Tap and speak')).toBeInTheDocument(); }); it('shows a "Waiting" label when disabled', () => { - render(); + render(); expect(screen.getByText(/waiting/i)).toBeInTheDocument(); }); it('does not start recording when disabled', () => { - render(); + render(); fireEvent.click(screen.getByRole('button', { name: /start recording/i })); expect(getUserMediaMock).not.toHaveBeenCalled(); }); it('starts recording on tap, then transcribes + submits on second tap', async () => { - transcribeCloudMock.mockResolvedValueOnce('hello world'); + transcribeWithFactoryMock.mockResolvedValueOnce('hello world'); const onSubmit = vi.fn(); const onError = vi.fn(); - render(); + render(); fireEvent.click(screen.getByRole('button', { name: /start recording/i })); await waitFor(() => expect(getUserMediaMock).toHaveBeenCalled()); @@ -131,19 +131,19 @@ describe('MicCloudComposer', () => { fireEvent.click(screen.getByRole('button', { name: /stop recording and send/i })); await waitFor(() => expect(onSubmit).toHaveBeenCalledWith('hello world')); - expect(transcribeCloudMock).toHaveBeenCalledTimes(1); + expect(transcribeWithFactoryMock).toHaveBeenCalledTimes(1); }); it('forwards the language prop to transcribeCloud', async () => { - transcribeCloudMock.mockResolvedValueOnce('hi'); - render(); + transcribeWithFactoryMock.mockResolvedValueOnce('hi'); + render(); fireEvent.click(screen.getByRole('button', { name: /start recording/i })); await waitFor(() => expect(screen.getByRole('button', { name: /stop recording and send/i })).toBeInTheDocument() ); fireEvent.click(screen.getByRole('button', { name: /stop recording and send/i })); - await waitFor(() => expect(transcribeCloudMock).toHaveBeenCalled()); - const opts = transcribeCloudMock.mock.calls[0][1]; + await waitFor(() => expect(transcribeWithFactoryMock).toHaveBeenCalled()); + const opts = transcribeWithFactoryMock.mock.calls[0][1]; expect(opts).toEqual({ language: 'es' }); }); @@ -151,7 +151,7 @@ describe('MicCloudComposer', () => { const err = Object.assign(new DOMException('', 'NotAllowedError')); getUserMediaMock.mockRejectedValueOnce(err); const onError = vi.fn(); - render(); + render(); fireEvent.click(screen.getByRole('button', { name: /start recording/i })); await waitFor(() => expect(onError).toHaveBeenCalledWith(expect.stringMatching(/permission/i))); }); @@ -160,7 +160,7 @@ describe('MicCloudComposer', () => { const err = new DOMException('', 'OverconstrainedError'); getUserMediaMock.mockRejectedValueOnce(err); const onError = vi.fn(); - render(); + render(); fireEvent.click(screen.getByRole('button', { name: /start recording/i })); await waitFor(() => expect(onError).toHaveBeenCalledWith(expect.stringMatching(/unavailable/i)) @@ -171,7 +171,7 @@ describe('MicCloudComposer', () => { const err = new DOMException('', 'NotReadableError'); getUserMediaMock.mockRejectedValueOnce(err); const onError = vi.fn(); - render(); + render(); fireEvent.click(screen.getByRole('button', { name: /start recording/i })); await waitFor(() => expect(onError).toHaveBeenCalledWith(expect.stringMatching(/in use/i))); }); @@ -179,7 +179,7 @@ describe('MicCloudComposer', () => { it('surfaces a generic error for non-DOMException getUserMedia failures', async () => { getUserMediaMock.mockRejectedValueOnce(new Error('some other error')); const onError = vi.fn(); - render(); + render(); fireEvent.click(screen.getByRole('button', { name: /start recording/i })); await waitFor(() => expect(onError).toHaveBeenCalledWith(expect.stringMatching(/microphone error/i)) @@ -187,14 +187,14 @@ describe('MicCloudComposer', () => { }); it('falls back to wav re-encode when the native attempt fails', async () => { - transcribeCloudMock + transcribeWithFactoryMock .mockRejectedValueOnce(new Error('codec not accepted')) .mockResolvedValueOnce('after fallback'); encodeBlobToWavMock.mockResolvedValueOnce( new Blob([new Uint8Array([0])], { type: 'audio/wav' }) ); const onSubmit = vi.fn(); - render(); + render(); fireEvent.click(screen.getByRole('button', { name: /start recording/i })); await waitFor(() => expect(screen.getByRole('button', { name: /stop recording and send/i })).toBeInTheDocument() @@ -202,14 +202,14 @@ describe('MicCloudComposer', () => { fireEvent.click(screen.getByRole('button', { name: /stop recording and send/i })); await waitFor(() => expect(onSubmit).toHaveBeenCalledWith('after fallback')); expect(encodeBlobToWavMock).toHaveBeenCalledTimes(1); - expect(transcribeCloudMock).toHaveBeenCalledTimes(2); + expect(transcribeWithFactoryMock).toHaveBeenCalledTimes(2); }); it('reports an error when transcription returns empty text', async () => { - transcribeCloudMock.mockResolvedValueOnce(''); + transcribeWithFactoryMock.mockResolvedValueOnce(''); const onError = vi.fn(); const onSubmit = vi.fn(); - render(); + render(); fireEvent.click(screen.getByRole('button', { name: /start recording/i })); await waitFor(() => expect(screen.getByRole('button', { name: /stop recording and send/i })).toBeInTheDocument() @@ -228,7 +228,7 @@ describe('MicCloudComposer', () => { writable: true, }); const onError = vi.fn(); - render(); + render(); fireEvent.click(screen.getByRole('button', { name: /start recording/i })); expect(onError).toHaveBeenCalledWith(expect.stringMatching(/not available/i)); }); @@ -236,9 +236,9 @@ describe('MicCloudComposer', () => { // ── Spacebar shortcut (#1471) ──────────────────────────────────────────── it('spacebar starts recording when idle and stops + submits on second press', async () => { - transcribeCloudMock.mockResolvedValueOnce('voice via space'); + transcribeWithFactoryMock.mockResolvedValueOnce('voice via space'); const onSubmit = vi.fn(); - render(); + render(); fireEvent.keyDown(window, { code: 'Space' }); await waitFor(() => expect(getUserMediaMock).toHaveBeenCalled()); @@ -251,13 +251,13 @@ describe('MicCloudComposer', () => { }); it('spacebar ignores key repeat so holding the key does not flap the recorder', () => { - render(); + render(); fireEvent.keyDown(window, { code: 'Space', repeat: true }); expect(getUserMediaMock).not.toHaveBeenCalled(); }); it('spacebar ignores modifier combinations so Shift-Space etc. stay free', () => { - render(); + render(); fireEvent.keyDown(window, { code: 'Space', shiftKey: true }); fireEvent.keyDown(window, { code: 'Space', ctrlKey: true }); fireEvent.keyDown(window, { code: 'Space', metaKey: true }); @@ -269,7 +269,7 @@ describe('MicCloudComposer', () => { render( <> - + ); const input = screen.getByTestId('text-field'); @@ -282,7 +282,7 @@ describe('MicCloudComposer', () => { render( <>