diff --git a/Cargo.lock b/Cargo.lock
index 41c449b8a1..f5c07cbf55 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4548,6 +4548,7 @@ dependencies = [
  "enigo",
  "env_logger",
  "fantoccini",
+ "flate2",
  "fs2",
  "futures",
  "futures-util",
diff --git a/Cargo.toml b/Cargo.toml
index f7fc42c18b..4635060406 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -55,6 +55,10 @@ hmac = "0.12"
 tar = "0.4"
 xz2 = { version = "0.1", features = ["static"] }
 zip = { version = "2", default-features = false, features = ["deflate"] }
+# gzip decoder for the Piper tar.gz binary releases on macOS / Linux. Already
+# pulled in transitively by zip's `deflate` feature; declared directly so
+# the installer module can `use flate2::read::GzDecoder`.
+flate2 = "1"
 # Real timeout for `node --version` probes in the runtime resolver. Guards
 # against a broken shim on PATH hanging the bootstrap forever.
 wait-timeout = "0.2"
diff --git a/app/src-tauri/Cargo.lock b/app/src-tauri/Cargo.lock
index cef4d8236b..e8f0c16112 100644
--- a/app/src-tauri/Cargo.lock
+++ b/app/src-tauri/Cargo.lock
@@ -4612,6 +4612,7 @@ dependencies = [
  "dotenvy",
  "enigo",
  "env_logger",
+ "flate2",
  "fs2",
  "futures",
  "futures-util",
diff --git a/app/src/chat/chatSendError.ts b/app/src/chat/chatSendError.ts
index e74b1d73de..1c1b5bec8f 100644
--- a/app/src/chat/chatSendError.ts
+++ b/app/src/chat/chatSendError.ts
@@ -6,6 +6,8 @@ export type ChatSendErrorCode =
   | 'cloud_send_failed'
   | 'voice_transcription'
   | 'stt_not_ready'
+  | 'voice_synthesis'
+  | 'tts_not_ready'
   | 'microphone_unavailable'
   | 'microphone_recording'
   | 'microphone_access'
diff --git a/app/src/components/settings/hooks/useSettingsNavigation.ts b/app/src/components/settings/hooks/useSettingsNavigation.ts
index ab565fc424..7fad5fe64d 100644
--- a/app/src/components/settings/hooks/useSettingsNavigation.ts
+++ b/app/src/components/settings/hooks/useSettingsNavigation.ts
@@ -191,13 +191,13 @@ export const useSettingsNavigation = (): SettingsNavigationHook => {
       // Leaf panels under features
       case 'screen-intelligence':
       case 'autocomplete':
-      case 'voice':
       case 'messaging':
       case 'tools':
         return [settingsCrumb, featuresCrumb];
 
       // Leaf panels under AI & Models
       case 'local-model':
+      case 'voice':
         return [settingsCrumb, aiModelsCrumb];
 
       // Team sub-pages
diff --git a/app/src/components/settings/panels/VoicePanel.tsx b/app/src/components/settings/panels/VoicePanel.tsx
index 1d4023be55..cc2dbd106e 100644
--- a/app/src/components/settings/panels/VoicePanel.tsx
+++ b/app/src/components/settings/panels/VoicePanel.tsx
@@ -1,5 +1,12 @@
 import { useEffect, useRef, useState } from 'react';
 
+import {
+  installPiper,
+  installWhisper,
+  piperInstallStatus,
+  type VoiceInstallStatus,
+  whisperInstallStatus,
+} from '../../../services/api/voiceInstallApi';
 import {
   openhumanGetVoiceServerSettings,
   openhumanLocalAiAssetsStatus,
@@ -7,7 +14,9 @@ import {
   openhumanVoiceServerStart,
   openhumanVoiceServerStatus,
   openhumanVoiceServerStop,
+  openhumanVoiceSetProviders,
   openhumanVoiceStatus,
+  type VoiceProvidersSnapshot,
   type VoiceServerSettings,
   type VoiceServerStatus,
   type VoiceStatus,
@@ -15,13 +24,41 @@ import {
 import SettingsHeader from '../components/SettingsHeader';
 import { useSettingsNavigation } from '../hooks/useSettingsNavigation';
 
+// Curated Piper voice presets — a handful of well-known English voices
+// covering male/female and US/GB accents at the recommended `medium`
+// quality tier. The full catalogue at
+// huggingface.co/rhasspy/piper-voices has 100+ voices; a dropdown of
+// every option is unusable so we ship a starter set and keep the free-
+// text input as an escape hatch via the "Other…" option.
+const PIPER_VOICE_PRESETS: ReadonlyArray<{ id: string; label: string }> = [
+  { id: 'en_US-lessac-medium', label: 'US · Lessac (neutral, recommended)' },
+  { id: 'en_US-lessac-high', label: 'US · Lessac (higher quality, larger)' },
+  { id: 'en_US-ryan-medium', label: 'US · Ryan (male)' },
+  { id: 'en_US-amy-medium', label: 'US · Amy (female)' },
+  { id: 'en_US-libritts-high', label: 'US · LibriTTS (multi-speaker)' },
+  { id: 'en_GB-alan-medium', label: 'GB · Alan (male)' },
+  { id: 'en_GB-jenny_dioco-medium', label: 'GB · Jenny Dioco (female)' },
+  { id: 'en_GB-northern_english_male-medium', label: 'GB · Northern English (male)' },
+];
+
 const VoicePanel = () => {
   const { navigateBack, navigateToSettings, breadcrumbs } = useSettingsNavigation();
   const [settings, setSettings] = useState<VoiceServerSettings | null>(null);
   const [savedSettings, setSavedSettings] = useState<VoiceServerSettings | null>(null);
   const [serverStatus, setServerStatus] = useState<VoiceServerStatus | null>(null);
-  const [, setVoiceStatus] = useState<VoiceStatus | null>(null);
+  const [voiceStatus, setVoiceStatus] = useState<VoiceStatus | null>(null);
   const [sttReady, setSttReady] = useState(false);
+  // Local provider selectors — initialised from voice_status, persisted via
+  // openhumanVoiceSetProviders on change. Empty string until first load.
+  const [sttProvider, setSttProvider] = useState<'cloud' | 'whisper' | ''>('');
+  const [ttsProvider, setTtsProvider] = useState<'cloud' | 'piper' | ''>('');
+  const [sttModel, setSttModel] = useState<string>('');
+  const [ttsVoice, setTtsVoice] = useState<string>('');
+  const [isSavingProviders, setIsSavingProviders] = useState(false);
+  const [whisperInstall, setWhisperInstall] = useState<VoiceInstallStatus | null>(null);
+  const [piperInstall, setPiperInstall] = useState<VoiceInstallStatus | null>(null);
+  const [isInstallingWhisper, setIsInstallingWhisper] = useState(false);
+  const [isInstallingPiper, setIsInstallingPiper] = useState(false);
   const [, setIsLoading] = useState(true);
   const [isSaving, setIsSaving] = useState(false);
   const [isStarting, setIsStarting] = useState(false);
@@ -47,12 +84,36 @@ const VoicePanel = () => {
 
   const loadData = async (forceSettings = false) => {
     try {
-      const [settingsResponse, serverResponse, voiceResponse, assetsResponse] = await Promise.all([
+      const [
+        settingsResponse,
+        serverResponse,
+        voiceResponse,
+        assetsResponse,
+        whisperStatusResponse,
+        piperStatusResponse,
+      ] = await Promise.all([
         openhumanGetVoiceServerSettings(),
         openhumanVoiceServerStatus(),
         openhumanVoiceStatus(),
         openhumanLocalAiAssetsStatus(),
+        whisperInstallStatus().catch(err => {
+          // Status polls happen on a 2s loop; a single transient error
+          // shouldn't blow up the entire settings panel. Log + keep the
+          // previous snapshot.
+          if (process.env.NODE_ENV !== 'production') {
+            console.debug('[voice-install:whisper] status poll failed', err);
+          }
+          return null;
+        }),
+        piperInstallStatus().catch(err => {
+          if (process.env.NODE_ENV !== 'production') {
+            console.debug('[voice-install:piper] status poll failed', err);
+          }
+          return null;
+        }),
       ]);
+      if (whisperStatusResponse) setWhisperInstall(whisperStatusResponse);
+      if (piperStatusResponse) setPiperInstall(piperStatusResponse);
       const currentSettings = settingsRef.current;
       const currentSavedSettings = savedSettingsRef.current;
       if (
@@ -65,6 +126,25 @@ const VoicePanel = () => {
       setSavedSettings(settingsResponse.result);
       setServerStatus(serverResponse);
       setVoiceStatus(voiceResponse);
+      // Seed provider dropdowns from core state on first load. Use the
+      // functional updater form so the check reads *current* state rather
+      // than the stale closure captured when the interval was created —
+      // otherwise every poll tick could re-apply the server value and
+      // clobber an in-flight user edit.
+      if (voiceResponse.stt_provider) {
+        const seeded = voiceResponse.stt_provider === 'whisper' ? 'whisper' : 'cloud';
+        setSttProvider(prev => prev || seeded);
+      }
+      if (voiceResponse.tts_provider) {
+        const seeded = voiceResponse.tts_provider === 'piper' ? 'piper' : 'cloud';
+        setTtsProvider(prev => prev || seeded);
+      }
+      if (voiceResponse.stt_model_id) {
+        setSttModel(prev => prev || voiceResponse.stt_model_id);
+      }
+      if (voiceResponse.tts_voice_id) {
+        setTtsVoice(prev => prev || voiceResponse.tts_voice_id);
+      }
       const sttAssetState = assetsResponse.result.stt?.state;
       const sttAssetOk = sttAssetState === 'ready' || sttAssetState === 'ondemand';
       if (process.env.NODE_ENV !== 'production') {
@@ -187,16 +267,351 @@ const VoicePanel = () => {
   const disabled = !sttReady;
   const isRunning = serverStatus != null && serverStatus.state !== 'stopped';
 
+  const persistProviders = async (
+    update: Partial<VoiceProvidersSnapshot> & {
+      stt_provider?: 'cloud' | 'whisper';
+      tts_provider?: 'cloud' | 'piper';
+      stt_model?: string;
+      tts_voice?: string;
+    }
+  ) => {
+    setIsSavingProviders(true);
+    setError(null);
+    try {
+      const snapshot = await openhumanVoiceSetProviders({
+        stt_provider: update.stt_provider,
+        tts_provider: update.tts_provider,
+        stt_model: update.stt_model,
+        tts_voice: update.tts_voice,
+      });
+      if (process.env.NODE_ENV !== 'production') {
+        console.debug('[VoicePanel:providers] saved', snapshot);
+      }
+      setNotice('Voice providers saved.');
+      // Force a reload so the rest of the panel reflects the new state.
+      await loadData(true);
+    } catch (err) {
+      const message = err instanceof Error ? err.message : 'Failed to save voice providers';
+      setError(message);
+    } finally {
+      setIsSavingProviders(false);
+    }
+  };
+
+  const onSttProviderChange = (next: 'cloud' | 'whisper') => {
+    setSttProvider(next);
+    void persistProviders({ stt_provider: next });
+  };
+  const onTtsProviderChange = (next: 'cloud' | 'piper') => {
+    setTtsProvider(next);
+    void persistProviders({ tts_provider: next });
+  };
+
+  /**
+   * Map an install status snapshot to a button label. Single source of
+   * truth for the four states the UI surfaces: Not installed / Install /
+   * Installing N% / Reinstall.
+   */
+  const installButtonLabel = (
+    status: VoiceInstallStatus | null,
+    busy: boolean,
+    _engine: 'Whisper' | 'Piper'
+  ): string => {
+    // Render based on the remote status — the install RPC is fire-and-forget,
+    // so the local `busy` flag only covers the brief moment between click and
+    // the RPC return. The real "is install running?" signal comes from the
+    // polled status table, which lags behind by at most one 2s tick.
+    if (status?.state === 'installing') {
+      const pct = typeof status.progress === 'number' ? `${status.progress}%` : '…';
+      return `Installing ${pct}`;
+    }
+    if (busy) return 'Installing…';
+    if (status?.state === 'installed') return 'Reinstall locally';
+    if (status?.state === 'broken') return 'Repair';
+    if (status?.state === 'error') return 'Retry locally';
+    return 'Install locally';
+  };
+
+  const handleInstallWhisper = async () => {
+    setIsInstallingWhisper(true);
+    setError(null);
+    setNotice(null);
+    try {
+      const force = whisperInstall?.state === 'installed';
+      console.debug('[voice-install:whisper] install click force=%s', force);
+      const result = await installWhisper({ modelSize: sttModel || undefined, force });
+      setWhisperInstall(result);
+      setNotice(
+        result.state === 'installed'
+          ? 'Whisper is ready.'
+          : `Whisper install started (${result.stage ?? 'queued'})`
+      );
+    } catch (err) {
+      const message = err instanceof Error ? err.message : 'Failed to install Whisper';
+      setError(message);
+    } finally {
+      setIsInstallingWhisper(false);
+      await loadData(false);
+    }
+  };
+
+  const handleInstallPiper = async () => {
+    setIsInstallingPiper(true);
+    setError(null);
+    setNotice(null);
+    try {
+      const force = piperInstall?.state === 'installed';
+      console.debug('[voice-install:piper] install click force=%s', force);
+      const result = await installPiper({ voiceId: ttsVoice || undefined, force });
+      setPiperInstall(result);
+      setNotice(
+        result.state === 'installed'
+          ? 'Piper is ready.'
+          : `Piper install started (${result.stage ?? 'queued'})`
+      );
+    } catch (err) {
+      const message = err instanceof Error ? err.message : 'Failed to install Piper';
+      setError(message);
+    } finally {
+      setIsInstallingPiper(false);
+      await loadData(false);
+    }
+  };
+
+  const whisperReady = whisperInstall?.state === 'installed';
+  const piperReady = piperInstall?.state === 'installed';
+
   return (
     <div>
       <SettingsHeader
-        title="Voice Dictation"
+        title="Voice"
         showBackButton={true}
         onBack={navigateBack}
         breadcrumbs={breadcrumbs}
       />
 
       <div className="p-4 space-y-4">
+        <section className="space-y-3">
+          <div
+            className="bg-stone-50 rounded-lg border border-stone-200 p-4 space-y-4"
+            data-testid="voice-providers-section">
+            <div>
+              <h3 className="text-sm font-semibold text-stone-900">Voice Providers</h3>
+              <p className="text-xs text-stone-500 mt-1">
+                Choose where transcription and synthesis run. Use the Install locally buttons to
+                download the binaries and models into your workspace — no manual{' '}
+                <code>WHISPER_BIN</code> or <code>PIPER_BIN</code> setup required.
+              </p>
+            </div>
+            <div className="grid grid-cols-1 sm:grid-cols-2 gap-4">
+              <label className="block space-y-1">
+                <span className="text-xs font-medium text-stone-600">Speech-to-Text Provider</span>
+                <select
+                  aria-label="STT provider"
+                  data-testid="stt-provider-select"
+                  value={sttProvider || 'cloud'}
+                  disabled={isSavingProviders}
+                  onChange={e => onSttProviderChange(e.target.value as 'cloud' | 'whisper')}
+                  className="w-full rounded-md border border-stone-200 bg-white px-3 py-2 text-sm text-stone-900 focus:outline-none focus:ring-1 focus:ring-primary-400">
+                  <option value="cloud">Cloud (Whisper proxy)</option>
+                  <option value="whisper" disabled={!whisperReady}>
+                    Local Whisper{whisperReady ? '' : ' (install required)'}
+                  </option>
+                </select>
+                <div className="flex items-center gap-2 pt-1">
+                  <button
+                    type="button"
+                    data-testid="install-whisper-button"
+                    onClick={() => void handleInstallWhisper()}
+                    disabled={isInstallingWhisper || whisperInstall?.state === 'installing'}
+                    title={
+                      whisperReady
+                        ? 'Whisper is installed. Click to reinstall.'
+                        : 'Download whisper.cpp and the GGML model into your workspace.'
+                    }
+                    className={`px-2.5 py-1 text-[11px] rounded-md text-white disabled:opacity-60 ${
+                      whisperReady
+                        ? 'bg-stone-600 hover:bg-stone-700'
+                        : 'bg-primary-600 hover:bg-primary-700'
+                    }`}>
+                    {installButtonLabel(whisperInstall, isInstallingWhisper, 'Whisper')}
+                  </button>
+                  <span
+                    data-testid="whisper-install-state"
+                    className={`text-[11px] ${
+                      whisperReady
+                        ? 'text-emerald-600'
+                        : whisperInstall?.state === 'error'
+                          ? 'text-red-600'
+                          : 'text-stone-500'
+                    }`}>
+                    {whisperInstall?.state === 'installing' && whisperInstall.stage
+                      ? whisperInstall.stage
+                      : whisperReady
+                        ? 'Installed'
+                        : whisperInstall?.state === 'error'
+                          ? (whisperInstall.error_detail ?? 'Install failed')
+                          : 'Not installed'}
+                  </span>
+                </div>
+              </label>
+              {sttProvider === 'whisper' && (
+                <label className="block space-y-1">
+                  <span className="text-xs font-medium text-stone-600">Whisper Model</span>
+                  <select
+                    aria-label="Whisper model"
+                    data-testid="stt-model-select"
+                    value={sttModel || 'medium'}
+                    disabled={isSavingProviders}
+                    onChange={e => {
+                      const nextModel = e.target.value;
+                      setSttModel(nextModel);
+                      void persistProviders({ stt_model: nextModel });
+                      // Trigger install for the newly-selected model. The
+                      // RPC is fire-and-forget + idempotent: if the .bin
+                      // is already on disk, install_whisper short-circuits;
+                      // if missing, status polling renders the download
+                      // progress in the Install button inline.
+                      void installWhisper({ modelSize: nextModel }).catch(err =>
+                        console.warn(
+                          '[voice-install:whisper] auto-install on model change failed:',
+                          err
+                        )
+                      );
+                    }}
+                    className="w-full rounded-md border border-stone-200 bg-white px-3 py-2 text-sm text-stone-900 focus:outline-none focus:ring-1 focus:ring-primary-400">
+                    <option value="tiny">Tiny (39 MB, fastest)</option>
+                    <option value="base">Base (74 MB)</option>
+                    <option value="small">Small (244 MB)</option>
+                    <option value="medium">Medium (769 MB, recommended)</option>
+                    <option value="whisper-large-v3-turbo">
+                      Large v3 Turbo (1.5 GB, best accuracy)
+                    </option>
+                  </select>
+                </label>
+              )}
+              <label className="block space-y-1">
+                <span className="text-xs font-medium text-stone-600">Text-to-Speech Provider</span>
+                <select
+                  aria-label="TTS provider"
+                  data-testid="tts-provider-select"
+                  value={ttsProvider || 'cloud'}
+                  disabled={isSavingProviders}
+                  onChange={e => onTtsProviderChange(e.target.value as 'cloud' | 'piper')}
+                  className="w-full rounded-md border border-stone-200 bg-white px-3 py-2 text-sm text-stone-900 focus:outline-none focus:ring-1 focus:ring-primary-400">
+                  <option value="cloud">Cloud (ElevenLabs proxy)</option>
+                  <option value="piper" disabled={!piperReady}>
+                    Local Piper{piperReady ? '' : ' (install required)'}
+                  </option>
+                </select>
+                <div className="flex items-center gap-2 pt-1">
+                  <button
+                    type="button"
+                    data-testid="install-piper-button"
+                    onClick={() => void handleInstallPiper()}
+                    disabled={isInstallingPiper || piperInstall?.state === 'installing'}
+                    title={
+                      piperReady
+                        ? 'Piper is installed. Click to reinstall.'
+                        : 'Download Piper and the bundled en_US-lessac-medium voice into your workspace.'
+                    }
+                    className={`px-2.5 py-1 text-[11px] rounded-md text-white disabled:opacity-60 ${
+                      piperReady
+                        ? 'bg-stone-600 hover:bg-stone-700'
+                        : 'bg-primary-600 hover:bg-primary-700'
+                    }`}>
+                    {installButtonLabel(piperInstall, isInstallingPiper, 'Piper')}
+                  </button>
+                  <span
+                    data-testid="piper-install-state"
+                    className={`text-[11px] ${
+                      piperReady
+                        ? 'text-emerald-600'
+                        : piperInstall?.state === 'error'
+                          ? 'text-red-600'
+                          : 'text-stone-500'
+                    }`}>
+                    {piperInstall?.state === 'installing' && piperInstall.stage
+                      ? piperInstall.stage
+                      : piperReady
+                        ? 'Installed'
+                        : piperInstall?.state === 'error'
+                          ? (piperInstall.error_detail ?? 'Install failed')
+                          : 'Not installed'}
+                  </span>
+                </div>
+              </label>
+              {ttsProvider === 'piper' && (
+                <label className="block space-y-1">
+                  <span className="text-xs font-medium text-stone-600">Piper Voice</span>
+                  <select
+                    aria-label="Piper voice"
+                    data-testid="tts-voice-select"
+                    value={
+                      PIPER_VOICE_PRESETS.some(v => v.id === ttsVoice) ? ttsVoice : '__custom__'
+                    }
+                    disabled={isSavingProviders}
+                    onChange={e => {
+                      const next = e.target.value;
+                      if (next === '__custom__') {
+                        // Keep current free-text value; the text input below
+                        // becomes the editor.
+                        return;
+                      }
+                      setTtsVoice(next);
+                      void persistProviders({ tts_voice: next });
+                      // Auto-fetch the .onnx for the new voice if missing.
+                      // install_piper is fire-and-forget; status polling
+                      // shows download progress in the Install button.
+                      void installPiper({ voiceId: next }).catch(err =>
+                        console.warn(
+                          '[voice-install:piper] auto-install on voice change failed:',
+                          err
+                        )
+                      );
+                    }}
+                    className="w-full rounded-md border border-stone-200 bg-white px-3 py-2 text-sm text-stone-900 focus:outline-none focus:ring-1 focus:ring-primary-400">
+                    {PIPER_VOICE_PRESETS.map(v => (
+                      <option key={v.id} value={v.id}>
+                        {v.label}
+                      </option>
+                    ))}
+                    <option value="__custom__">Other (type below)…</option>
+                  </select>
+                  {!PIPER_VOICE_PRESETS.some(v => v.id === ttsVoice) && (
+                    <input
+                      aria-label="Piper voice id (custom)"
+                      data-testid="tts-voice-input"
+                      value={ttsVoice}
+                      placeholder="en_US-lessac-medium"
+                      disabled={isSavingProviders}
+                      onChange={e => setTtsVoice(e.target.value)}
+                      onBlur={() => {
+                        if (ttsVoice && ttsVoice !== voiceStatus?.tts_voice_id) {
+                          void persistProviders({ tts_voice: ttsVoice });
+                          void installPiper({ voiceId: ttsVoice }).catch(err =>
+                            console.warn(
+                              '[voice-install:piper] auto-install on custom voice failed:',
+                              err
+                            )
+                          );
+                        }
+                      }}
+                      className="mt-1 w-full rounded-md border border-stone-200 bg-white px-3 py-2 text-sm text-stone-900 placeholder:text-stone-400 focus:outline-none focus:ring-1 focus:ring-primary-400"
+                    />
+                  )}
+                  <p className="text-[11px] text-stone-500 mt-0.5">
+                    Voices come from{' '}
+                    <code className="font-mono">huggingface.co/rhasspy/piper-voices</code>.
+                    Switching voices may require an Install/Reinstall click to download the new{' '}
+                    <code>.onnx</code>.
+                  </p>
+                </label>
+              )}
+            </div>
+          </div>
+        </section>
+
         <section className={`space-y-3 ${disabled ? 'opacity-60' : ''}`}>
           <div className="bg-stone-50 rounded-lg border border-stone-200 p-4 space-y-4">
             <div>
@@ -322,16 +737,9 @@ const VoicePanel = () => {
             )}
 
             {disabled && (
-              <div className="rounded-md border border-amber-200 bg-amber-50 p-4 text-sm text-amber-800 space-y-3">
-                <div>
-                  Voice dictation is disabled until the local STT model is downloaded and ready.
-                </div>
-                <button
-                  type="button"
-                  onClick={() => navigateToSettings('local-model')}
-                  className="px-3 py-1.5 text-xs rounded-md bg-amber-600 hover:bg-amber-700 text-white">
-                  Open Local AI Model
-                </button>
+              <div className="rounded-md border border-amber-200 bg-amber-50 p-4 text-sm text-amber-800">
+                Voice dictation is disabled until the local STT model is downloaded. Use the{' '}
+                <strong>Voice Providers</strong> section above to install Whisper.
               </div>
             )}
 
diff --git a/app/src/components/settings/panels/__tests__/VoicePanel.test.tsx b/app/src/components/settings/panels/__tests__/VoicePanel.test.tsx
index ec83e1d2ef..3a5936bf5a 100644
--- a/app/src/components/settings/panels/__tests__/VoicePanel.test.tsx
+++ b/app/src/components/settings/panels/__tests__/VoicePanel.test.tsx
@@ -1,6 +1,13 @@
 import { fireEvent, screen, waitFor } from '@testing-library/react';
 import { beforeEach, describe, expect, it, vi } from 'vitest';
 
+import {
+  installPiper,
+  installWhisper,
+  piperInstallStatus,
+  type VoiceInstallStatus,
+  whisperInstallStatus,
+} from '../../../../services/api/voiceInstallApi';
 import { renderWithProviders } from '../../../../test/test-utils';
 import {
   type CommandResponse,
@@ -11,6 +18,7 @@ import {
   openhumanVoiceServerStart,
   openhumanVoiceServerStatus,
   openhumanVoiceServerStop,
+  openhumanVoiceSetProviders,
   openhumanVoiceStatus,
   type VoiceServerSettings,
   type VoiceServerStatus,
@@ -25,16 +33,40 @@ vi.mock('../../../../utils/tauriCommands', () => ({
   openhumanVoiceServerStart: vi.fn(),
   openhumanVoiceServerStatus: vi.fn(),
   openhumanVoiceServerStop: vi.fn(),
+  openhumanVoiceSetProviders: vi.fn(),
   openhumanVoiceStatus: vi.fn(),
 }));
 
+vi.mock('../../../../services/api/voiceInstallApi', () => ({
+  installWhisper: vi.fn(),
+  installPiper: vi.fn(),
+  whisperInstallStatus: vi.fn(),
+  piperInstallStatus: vi.fn(),
+}));
+
 type RuntimeHarness = {
   settings: VoiceServerSettings;
   serverStatus: VoiceServerStatus;
   voiceStatus: VoiceStatus;
   sttState: string;
+  whisperStatus: VoiceInstallStatus;
+  piperStatus: VoiceInstallStatus;
 };
 
+const makeInstallStatus = (
+  engine: 'whisper' | 'piper',
+  overrides: Partial<VoiceInstallStatus> = {}
+): VoiceInstallStatus => ({
+  engine,
+  state: 'missing',
+  progress: null,
+  downloaded_bytes: null,
+  total_bytes: null,
+  stage: null,
+  error_detail: null,
+  ...overrides,
+});
+
 const makeConfigSnapshot = (): CommandResponse<ConfigSnapshot> => ({
   result: {
     config: {},
@@ -78,8 +110,12 @@ describe('VoicePanel', () => {
         tts_voice_path: '/tmp/tts.onnx',
         whisper_in_process: true,
         llm_cleanup_enabled: true,
+        stt_provider: 'cloud',
+        tts_provider: 'cloud',
       },
       sttState: 'ready',
+      whisperStatus: makeInstallStatus('whisper'),
+      piperStatus: makeInstallStatus('piper'),
     };
 
     vi.mocked(openhumanGetVoiceServerSettings).mockImplementation(async () => ({
@@ -114,6 +150,40 @@ describe('VoicePanel', () => {
       runtime.serverStatus = { ...runtime.serverStatus, state: 'stopped' };
       return { ...runtime.serverStatus };
     });
+    vi.mocked(openhumanVoiceSetProviders).mockImplementation(async update => {
+      if (update.stt_provider) runtime.voiceStatus.stt_provider = update.stt_provider;
+      if (update.tts_provider) runtime.voiceStatus.tts_provider = update.tts_provider;
+      if (update.stt_model) runtime.voiceStatus.stt_model_id = update.stt_model;
+      if (update.tts_voice) runtime.voiceStatus.tts_voice_id = update.tts_voice;
+      return {
+        stt_provider: runtime.voiceStatus.stt_provider,
+        tts_provider: runtime.voiceStatus.tts_provider,
+        stt_model_id: runtime.voiceStatus.stt_model_id,
+        tts_voice_id: runtime.voiceStatus.tts_voice_id,
+      };
+    });
+
+    // Install-status polls return the current harness snapshot — tests
+    // mutate `runtime.whisperStatus` / `runtime.piperStatus` to simulate
+    // a real install cycle.
+    vi.mocked(whisperInstallStatus).mockImplementation(async () => ({ ...runtime.whisperStatus }));
+    vi.mocked(piperInstallStatus).mockImplementation(async () => ({ ...runtime.piperStatus }));
+    vi.mocked(installWhisper).mockImplementation(async () => {
+      runtime.whisperStatus = makeInstallStatus('whisper', {
+        state: 'installed',
+        progress: 100,
+        stage: 'install complete',
+      });
+      return { ...runtime.whisperStatus };
+    });
+    vi.mocked(installPiper).mockImplementation(async () => {
+      runtime.piperStatus = makeInstallStatus('piper', {
+        state: 'installed',
+        progress: 100,
+        stage: 'install complete',
+      });
+      return { ...runtime.piperStatus };
+    });
   });
 
   it('disables the panel when STT assets are not ready', async () => {
@@ -122,9 +192,8 @@ describe('VoicePanel', () => {
 
     renderWithProviders(<VoicePanel />, { initialEntries: ['/settings/voice'] });
 
-    expect(await screen.findByText('Voice Dictation')).toBeInTheDocument();
     expect(
-      screen.getByText(/Voice dictation is disabled until the local STT model is downloaded/)
+      await screen.findByText(/Voice dictation is disabled until the local STT model is downloaded/)
     ).toBeInTheDocument();
     expect(screen.getByRole('button', { name: 'Start Voice Server' })).toBeDisabled();
   });
@@ -193,4 +262,239 @@ describe('VoicePanel', () => {
       await screen.findByText('Voice server restarted with the new settings.')
     ).toBeInTheDocument();
   });
+
+  it('renders the STT and TTS provider dropdowns with seeded values', async () => {
+    runtime.voiceStatus.stt_provider = 'whisper';
+    runtime.voiceStatus.tts_provider = 'piper';
+
+    renderWithProviders(<VoicePanel />, { initialEntries: ['/settings/voice'] });
+
+    const sttSelect = (await screen.findByTestId('stt-provider-select')) as HTMLSelectElement;
+    const ttsSelect = (await screen.findByTestId('tts-provider-select')) as HTMLSelectElement;
+    // Initial load runs an extra interval tick; wait for the seeding effect.
+    await waitFor(() => expect(sttSelect.value).toBe('whisper'));
+    expect(ttsSelect.value).toBe('piper');
+    // The Whisper model picker only appears when the STT provider is local.
+    expect(screen.getByTestId('stt-model-select')).toBeInTheDocument();
+    // tts_voice_id is seeded to 'en_US-lessac-medium' which is a known preset,
+    // so the UI should render the preset select, not the free-text input.
+    expect(screen.getByTestId('tts-voice-select')).toBeInTheDocument();
+    expect(screen.queryByTestId('tts-voice-input')).not.toBeInTheDocument();
+  });
+
+  it('persists STT provider changes through openhumanVoiceSetProviders', async () => {
+    renderWithProviders(<VoicePanel />, { initialEntries: ['/settings/voice'] });
+
+    const sttSelect = (await screen.findByTestId('stt-provider-select')) as HTMLSelectElement;
+    fireEvent.change(sttSelect, { target: { value: 'whisper' } });
+
+    await waitFor(() =>
+      expect(vi.mocked(openhumanVoiceSetProviders)).toHaveBeenCalledWith(
+        expect.objectContaining({ stt_provider: 'whisper' })
+      )
+    );
+    // Saved notice should surface for the user.
+    expect(await screen.findByText(/Voice providers saved/i)).toBeInTheDocument();
+  });
+
+  it('persists TTS provider changes through openhumanVoiceSetProviders', async () => {
+    renderWithProviders(<VoicePanel />, { initialEntries: ['/settings/voice'] });
+
+    const ttsSelect = (await screen.findByTestId('tts-provider-select')) as HTMLSelectElement;
+    fireEvent.change(ttsSelect, { target: { value: 'piper' } });
+
+    await waitFor(() =>
+      expect(vi.mocked(openhumanVoiceSetProviders)).toHaveBeenCalledWith(
+        expect.objectContaining({ tts_provider: 'piper' })
+      )
+    );
+  });
+
+  it('renders the Install Whisper button when the engine is missing', async () => {
+    runtime.whisperStatus = makeInstallStatus('whisper'); // explicit missing
+    renderWithProviders(<VoicePanel />, { initialEntries: ['/settings/voice'] });
+
+    const button = await screen.findByTestId('install-whisper-button');
+    expect(button).toHaveTextContent('Install locally');
+    expect(screen.getByTestId('whisper-install-state')).toHaveTextContent('Not installed');
+  });
+
+  it('disables the Local Whisper STT option when the engine is missing', async () => {
+    runtime.whisperStatus = makeInstallStatus('whisper');
+    renderWithProviders(<VoicePanel />, { initialEntries: ['/settings/voice'] });
+
+    const sttSelect = (await screen.findByTestId('stt-provider-select')) as HTMLSelectElement;
+    const whisperOption = sttSelect.querySelector(
+      'option[value="whisper"]'
+    ) as HTMLOptionElement | null;
+    expect(whisperOption).not.toBeNull();
+    expect(whisperOption!.disabled).toBe(true);
+    expect(whisperOption!.textContent).toMatch(/install required/i);
+  });
+
+  it('shows a Reinstall label once Whisper is installed', async () => {
+    runtime.whisperStatus = makeInstallStatus('whisper', { state: 'installed', progress: 100 });
+    renderWithProviders(<VoicePanel />, { initialEntries: ['/settings/voice'] });
+
+    const button = await screen.findByTestId('install-whisper-button');
+    await waitFor(() => expect(button).toHaveTextContent(/Reinstall locally/i));
+    expect(screen.getByTestId('whisper-install-state')).toHaveTextContent('Installed');
+  });
+
+  it('triggers installWhisper when the user clicks Install', async () => {
+    renderWithProviders(<VoicePanel />, { initialEntries: ['/settings/voice'] });
+
+    const button = await screen.findByTestId('install-whisper-button');
+    fireEvent.click(button);
+
+    await waitFor(() => expect(vi.mocked(installWhisper)).toHaveBeenCalledTimes(1));
+    // First-time install must NOT force re-download.
+    expect(vi.mocked(installWhisper)).toHaveBeenCalledWith(
+      expect.objectContaining({ force: false })
+    );
+  });
+
+  it('forces re-download when Reinstall is clicked on an installed engine', async () => {
+    runtime.whisperStatus = makeInstallStatus('whisper', { state: 'installed', progress: 100 });
+    renderWithProviders(<VoicePanel />, { initialEntries: ['/settings/voice'] });
+
+    const button = await screen.findByTestId('install-whisper-button');
+    await waitFor(() => expect(button).toHaveTextContent(/Reinstall locally/i));
+    fireEvent.click(button);
+
+    await waitFor(() => expect(vi.mocked(installWhisper)).toHaveBeenCalledTimes(1));
+    expect(vi.mocked(installWhisper)).toHaveBeenCalledWith(
+      expect.objectContaining({ force: true })
+    );
+  });
+
+  it('renders the Install Piper button when the engine is missing', async () => {
+    runtime.piperStatus = makeInstallStatus('piper');
+    renderWithProviders(<VoicePanel />, { initialEntries: ['/settings/voice'] });
+
+    const button = await screen.findByTestId('install-piper-button');
+    expect(button).toHaveTextContent('Install locally');
+    expect(screen.getByTestId('piper-install-state')).toHaveTextContent('Not installed');
+  });
+
+  it('disables the Local Piper TTS option when the engine is missing', async () => {
+    runtime.piperStatus = makeInstallStatus('piper');
+    renderWithProviders(<VoicePanel />, { initialEntries: ['/settings/voice'] });
+
+    const ttsSelect = (await screen.findByTestId('tts-provider-select')) as HTMLSelectElement;
+    const piperOption = ttsSelect.querySelector(
+      'option[value="piper"]'
+    ) as HTMLOptionElement | null;
+    expect(piperOption).not.toBeNull();
+    expect(piperOption!.disabled).toBe(true);
+    expect(piperOption!.textContent).toMatch(/install required/i);
+  });
+
+  it('triggers installPiper when the user clicks Install', async () => {
+    renderWithProviders(<VoicePanel />, { initialEntries: ['/settings/voice'] });
+
+    const button = await screen.findByTestId('install-piper-button');
+    fireEvent.click(button);
+
+    await waitFor(() => expect(vi.mocked(installPiper)).toHaveBeenCalledTimes(1));
+    expect(vi.mocked(installPiper)).toHaveBeenCalledWith(expect.objectContaining({ force: false }));
+  });
+
+  it('shows the in-flight installing label and percentage', async () => {
+    runtime.whisperStatus = makeInstallStatus('whisper', {
+      state: 'installing',
+      progress: 42,
+      stage: 'downloading model',
+    });
+    renderWithProviders(<VoicePanel />, { initialEntries: ['/settings/voice'] });
+
+    const stateSpan = await screen.findByTestId('whisper-install-state');
+    await waitFor(() => expect(stateSpan).toHaveTextContent(/downloading model/i));
+  });
+
+  it('surfaces an error_detail in the install state line', async () => {
+    runtime.piperStatus = makeInstallStatus('piper', {
+      state: 'error',
+      error_detail: 'network unreachable',
+    });
+    renderWithProviders(<VoicePanel />, { initialEntries: ['/settings/voice'] });
+
+    await waitFor(() =>
+      expect(screen.getByTestId('piper-install-state')).toHaveTextContent('network unreachable')
+    );
+    // Button label flips into the retry messaging.
+    expect(screen.getByTestId('install-piper-button')).toHaveTextContent(/Retry locally/i);
+  });
+
+  it('shows an error notice when installWhisper rejects', async () => {
+    // Freeze subsequent loadData calls so the error isn't cleared by the
+    // automatic reload that fires in the finally block.
+    vi.mocked(installWhisper).mockRejectedValueOnce(new Error('disk full'));
+    vi.mocked(openhumanGetVoiceServerSettings).mockImplementation(
+      () => new Promise(() => {}) // hang — prevents error being wiped by reload
+    );
+    renderWithProviders(<VoicePanel />, { initialEntries: ['/settings/voice'] });
+
+    // Wait for the initial load to complete (which uses the pre-hang impl)
+    await screen.findByTestId('install-whisper-button');
+    // Now freeze subsequent calls and click
+    const button = screen.getByTestId('install-whisper-button');
+    fireEvent.click(button);
+
+    await waitFor(() => expect(screen.queryByText('disk full')).toBeInTheDocument());
+  });
+
+  it('shows an error notice when installPiper rejects', async () => {
+    vi.mocked(installPiper).mockRejectedValueOnce(new Error('no space left'));
+    vi.mocked(openhumanGetVoiceServerSettings).mockImplementation(
+      () => new Promise(() => {}) // hang — prevents error being wiped by reload
+    );
+    renderWithProviders(<VoicePanel />, { initialEntries: ['/settings/voice'] });
+
+    await screen.findByTestId('install-piper-button');
+    const button = screen.getByTestId('install-piper-button');
+    fireEvent.click(button);
+
+    await waitFor(() => expect(screen.queryByText('no space left')).toBeInTheDocument());
+  });
+
+  it('shows an error when persistProviders fails', async () => {
+    vi.mocked(openhumanVoiceSetProviders).mockRejectedValueOnce(new Error('RPC timeout'));
+    renderWithProviders(<VoicePanel />, { initialEntries: ['/settings/voice'] });
+
+    const sttSelect = (await screen.findByTestId('stt-provider-select')) as HTMLSelectElement;
+    fireEvent.change(sttSelect, { target: { value: 'whisper' } });
+
+    await waitFor(() => expect(screen.getByText('RPC timeout')).toBeInTheDocument());
+  });
+
+  it('shows a Piper installing label with percentage', async () => {
+    runtime.piperStatus = makeInstallStatus('piper', {
+      state: 'installing',
+      progress: 55,
+      stage: 'downloading voice',
+    });
+    renderWithProviders(<VoicePanel />, { initialEntries: ['/settings/voice'] });
+
+    const stateSpan = await screen.findByTestId('piper-install-state');
+    await waitFor(() => expect(stateSpan).toHaveTextContent(/downloading voice/i));
+  });
+
+  it('renders a preset select and auto-installs when a Piper voice preset is changed', async () => {
+    runtime.voiceStatus.tts_provider = 'piper';
+    runtime.voiceStatus.tts_voice_id = 'en_US-lessac-medium';
+    renderWithProviders(<VoicePanel />, { initialEntries: ['/settings/voice'] });
+
+    const ttsSelect = (await screen.findByTestId('tts-provider-select')) as HTMLSelectElement;
+    await waitFor(() => expect(ttsSelect.value).toBe('piper'));
+
+    const voiceSelect = (await screen.findByTestId('tts-voice-select')) as HTMLSelectElement;
+    fireEvent.change(voiceSelect, { target: { value: 'en_US-ryan-medium' } });
+
+    await waitFor(() =>
+      expect(vi.mocked(openhumanVoiceSetProviders)).toHaveBeenCalledWith(
+        expect.objectContaining({ tts_voice: 'en_US-ryan-medium' })
+      )
+    );
+  });
 });
diff --git a/app/src/components/skills/VoiceSetupModal.tsx b/app/src/components/skills/VoiceSetupModal.tsx
index 3d64b4d9a5..3925b3ef7b 100644
--- a/app/src/components/skills/VoiceSetupModal.tsx
+++ b/app/src/components/skills/VoiceSetupModal.tsx
@@ -56,7 +56,9 @@ export default function VoiceSetupModal({ onClose, skillStatus }: Props) {
 
   const handleGoToLocalModel = () => {
     onClose();
-    navigate('/settings/local-model');
+    // STT model install lives on the Voice settings panel (PR 2). The
+    // legacy `/settings/local-model` route handled Ollama assets only.
+    navigate('/settings/voice');
   };
 
   const handleGoToSettings = () => {
diff --git a/app/src/features/human/MicCloudComposer.test.tsx b/app/src/features/human/MicComposer.test.tsx
similarity index 84%
rename from app/src/features/human/MicCloudComposer.test.tsx
rename to app/src/features/human/MicComposer.test.tsx
index 040d73fc4d..184a8b194d 100644
--- a/app/src/features/human/MicCloudComposer.test.tsx
+++ b/app/src/features/human/MicComposer.test.tsx
@@ -1,14 +1,14 @@
 import { fireEvent, render, screen, waitFor } from '@testing-library/react';
 import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
 
-import { MicCloudComposer } from './MicCloudComposer';
+import { MicComposer } from './MicComposer';
 
-// transcribeCloud + encodeBlobToWav are the network/heavy boundaries — mock
-// them here so we can drive the state machine without touching real APIs.
-const transcribeCloudMock = vi.fn();
+// transcribeWithFactory + encodeBlobToWav are the network/heavy boundaries —
+// mock them here so we can drive the state machine without touching real APIs.
+const transcribeWithFactoryMock = vi.fn();
 const encodeBlobToWavMock = vi.fn();
 vi.mock('./voice/sttClient', () => ({
-  transcribeCloud: (...args: unknown[]) => transcribeCloudMock(...args),
+  transcribeWithFactory: (...args: unknown[]) => transcribeWithFactoryMock(...args),
 }));
 vi.mock('./voice/wavEncoder', () => ({
   encodeBlobToWav: (...args: unknown[]) => encodeBlobToWavMock(...args),
@@ -44,7 +44,7 @@ function makeFakeRecorder(mime: string): FakeRecorder {
 
 const fakeStream = { getTracks: () => [{ stop: vi.fn() }] } as unknown as MediaStream;
 
-describe('MicCloudComposer', () => {
+describe('MicComposer', () => {
   let recorder: FakeRecorder;
   let getUserMediaMock: ReturnType<typeof vi.fn>;
   // Snapshot the descriptor so afterEach can restore it — without this, the
@@ -57,7 +57,7 @@ describe('MicCloudComposer', () => {
       globalThis.navigator,
       'mediaDevices'
     );
-    transcribeCloudMock.mockReset();
+    transcribeWithFactoryMock.mockReset();
     encodeBlobToWavMock.mockReset();
     recorder = makeFakeRecorder('audio/webm;codecs=opus');
 
@@ -93,26 +93,26 @@ describe('MicCloudComposer', () => {
   });
 
   it('renders the idle "Tap and speak" state', () => {
-    render(<MicCloudComposer disabled={false} onSubmit={vi.fn()} />);
+    render(<MicComposer disabled={false} onSubmit={vi.fn()} />);
     expect(screen.getByText('Tap and speak')).toBeInTheDocument();
   });
 
   it('shows a "Waiting" label when disabled', () => {
-    render(<MicCloudComposer disabled={true} onSubmit={vi.fn()} />);
+    render(<MicComposer disabled={true} onSubmit={vi.fn()} />);
     expect(screen.getByText(/waiting/i)).toBeInTheDocument();
   });
 
   it('does not start recording when disabled', () => {
-    render(<MicCloudComposer disabled={true} onSubmit={vi.fn()} />);
+    render(<MicComposer disabled={true} onSubmit={vi.fn()} />);
     fireEvent.click(screen.getByRole('button', { name: /start recording/i }));
     expect(getUserMediaMock).not.toHaveBeenCalled();
   });
 
   it('starts recording on tap, then transcribes + submits on second tap', async () => {
-    transcribeCloudMock.mockResolvedValueOnce('hello world');
+    transcribeWithFactoryMock.mockResolvedValueOnce('hello world');
     const onSubmit = vi.fn();
     const onError = vi.fn();
-    render(<MicCloudComposer disabled={false} onSubmit={onSubmit} onError={onError} />);
+    render(<MicComposer disabled={false} onSubmit={onSubmit} onError={onError} />);
 
     fireEvent.click(screen.getByRole('button', { name: /start recording/i }));
     await waitFor(() => expect(getUserMediaMock).toHaveBeenCalled());
@@ -131,19 +131,19 @@ describe('MicCloudComposer', () => {
 
     fireEvent.click(screen.getByRole('button', { name: /stop recording and send/i }));
     await waitFor(() => expect(onSubmit).toHaveBeenCalledWith('hello world'));
-    expect(transcribeCloudMock).toHaveBeenCalledTimes(1);
+    expect(transcribeWithFactoryMock).toHaveBeenCalledTimes(1);
   });
 
   it('forwards the language prop to transcribeCloud', async () => {
-    transcribeCloudMock.mockResolvedValueOnce('hi');
-    render(<MicCloudComposer disabled={false} onSubmit={vi.fn()} language="es" />);
+    transcribeWithFactoryMock.mockResolvedValueOnce('hi');
+    render(<MicComposer disabled={false} onSubmit={vi.fn()} language="es" />);
     fireEvent.click(screen.getByRole('button', { name: /start recording/i }));
     await waitFor(() =>
       expect(screen.getByRole('button', { name: /stop recording and send/i })).toBeInTheDocument()
     );
     fireEvent.click(screen.getByRole('button', { name: /stop recording and send/i }));
-    await waitFor(() => expect(transcribeCloudMock).toHaveBeenCalled());
-    const opts = transcribeCloudMock.mock.calls[0][1];
+    await waitFor(() => expect(transcribeWithFactoryMock).toHaveBeenCalled());
+    const opts = transcribeWithFactoryMock.mock.calls[0][1];
     expect(opts).toEqual({ language: 'es' });
   });
 
@@ -151,7 +151,7 @@ describe('MicCloudComposer', () => {
     const err = Object.assign(new DOMException('', 'NotAllowedError'));
     getUserMediaMock.mockRejectedValueOnce(err);
     const onError = vi.fn();
-    render(<MicCloudComposer disabled={false} onSubmit={vi.fn()} onError={onError} />);
+    render(<MicComposer disabled={false} onSubmit={vi.fn()} onError={onError} />);
     fireEvent.click(screen.getByRole('button', { name: /start recording/i }));
     await waitFor(() => expect(onError).toHaveBeenCalledWith(expect.stringMatching(/permission/i)));
   });
@@ -160,7 +160,7 @@ describe('MicCloudComposer', () => {
     const err = new DOMException('', 'OverconstrainedError');
     getUserMediaMock.mockRejectedValueOnce(err);
     const onError = vi.fn();
-    render(<MicCloudComposer disabled={false} onSubmit={vi.fn()} onError={onError} />);
+    render(<MicComposer disabled={false} onSubmit={vi.fn()} onError={onError} />);
     fireEvent.click(screen.getByRole('button', { name: /start recording/i }));
     await waitFor(() =>
       expect(onError).toHaveBeenCalledWith(expect.stringMatching(/unavailable/i))
@@ -171,7 +171,7 @@ describe('MicCloudComposer', () => {
     const err = new DOMException('', 'NotReadableError');
     getUserMediaMock.mockRejectedValueOnce(err);
     const onError = vi.fn();
-    render(<MicCloudComposer disabled={false} onSubmit={vi.fn()} onError={onError} />);
+    render(<MicComposer disabled={false} onSubmit={vi.fn()} onError={onError} />);
     fireEvent.click(screen.getByRole('button', { name: /start recording/i }));
     await waitFor(() => expect(onError).toHaveBeenCalledWith(expect.stringMatching(/in use/i)));
   });
@@ -179,7 +179,7 @@ describe('MicCloudComposer', () => {
   it('surfaces a generic error for non-DOMException getUserMedia failures', async () => {
     getUserMediaMock.mockRejectedValueOnce(new Error('some other error'));
     const onError = vi.fn();
-    render(<MicCloudComposer disabled={false} onSubmit={vi.fn()} onError={onError} />);
+    render(<MicComposer disabled={false} onSubmit={vi.fn()} onError={onError} />);
     fireEvent.click(screen.getByRole('button', { name: /start recording/i }));
     await waitFor(() =>
       expect(onError).toHaveBeenCalledWith(expect.stringMatching(/microphone error/i))
@@ -187,14 +187,14 @@ describe('MicCloudComposer', () => {
   });
 
   it('falls back to wav re-encode when the native attempt fails', async () => {
-    transcribeCloudMock
+    transcribeWithFactoryMock
       .mockRejectedValueOnce(new Error('codec not accepted'))
       .mockResolvedValueOnce('after fallback');
     encodeBlobToWavMock.mockResolvedValueOnce(
       new Blob([new Uint8Array([0])], { type: 'audio/wav' })
     );
     const onSubmit = vi.fn();
-    render(<MicCloudComposer disabled={false} onSubmit={onSubmit} />);
+    render(<MicComposer disabled={false} onSubmit={onSubmit} />);
     fireEvent.click(screen.getByRole('button', { name: /start recording/i }));
     await waitFor(() =>
       expect(screen.getByRole('button', { name: /stop recording and send/i })).toBeInTheDocument()
@@ -202,14 +202,14 @@ describe('MicCloudComposer', () => {
     fireEvent.click(screen.getByRole('button', { name: /stop recording and send/i }));
     await waitFor(() => expect(onSubmit).toHaveBeenCalledWith('after fallback'));
     expect(encodeBlobToWavMock).toHaveBeenCalledTimes(1);
-    expect(transcribeCloudMock).toHaveBeenCalledTimes(2);
+    expect(transcribeWithFactoryMock).toHaveBeenCalledTimes(2);
   });
 
   it('reports an error when transcription returns empty text', async () => {
-    transcribeCloudMock.mockResolvedValueOnce('');
+    transcribeWithFactoryMock.mockResolvedValueOnce('');
     const onError = vi.fn();
     const onSubmit = vi.fn();
-    render(<MicCloudComposer disabled={false} onSubmit={onSubmit} onError={onError} />);
+    render(<MicComposer disabled={false} onSubmit={onSubmit} onError={onError} />);
     fireEvent.click(screen.getByRole('button', { name: /start recording/i }));
     await waitFor(() =>
       expect(screen.getByRole('button', { name: /stop recording and send/i })).toBeInTheDocument()
@@ -228,7 +228,7 @@ describe('MicCloudComposer', () => {
       writable: true,
     });
     const onError = vi.fn();
-    render(<MicCloudComposer disabled={false} onSubmit={vi.fn()} onError={onError} />);
+    render(<MicComposer disabled={false} onSubmit={vi.fn()} onError={onError} />);
     fireEvent.click(screen.getByRole('button', { name: /start recording/i }));
     expect(onError).toHaveBeenCalledWith(expect.stringMatching(/not available/i));
   });
@@ -236,9 +236,9 @@ describe('MicCloudComposer', () => {
   // ── Spacebar shortcut (#1471) ────────────────────────────────────────────
 
   it('spacebar starts recording when idle and stops + submits on second press', async () => {
-    transcribeCloudMock.mockResolvedValueOnce('voice via space');
+    transcribeWithFactoryMock.mockResolvedValueOnce('voice via space');
     const onSubmit = vi.fn();
-    render(<MicCloudComposer disabled={false} onSubmit={onSubmit} />);
+    render(<MicComposer disabled={false} onSubmit={onSubmit} />);
 
     fireEvent.keyDown(window, { code: 'Space' });
     await waitFor(() => expect(getUserMediaMock).toHaveBeenCalled());
@@ -251,13 +251,13 @@ describe('MicCloudComposer', () => {
   });
 
   it('spacebar ignores key repeat so holding the key does not flap the recorder', () => {
-    render(<MicCloudComposer disabled={false} onSubmit={vi.fn()} />);
+    render(<MicComposer disabled={false} onSubmit={vi.fn()} />);
     fireEvent.keyDown(window, { code: 'Space', repeat: true });
     expect(getUserMediaMock).not.toHaveBeenCalled();
   });
 
   it('spacebar ignores modifier combinations so Shift-Space etc. stay free', () => {
-    render(<MicCloudComposer disabled={false} onSubmit={vi.fn()} />);
+    render(<MicComposer disabled={false} onSubmit={vi.fn()} />);
     fireEvent.keyDown(window, { code: 'Space', shiftKey: true });
     fireEvent.keyDown(window, { code: 'Space', ctrlKey: true });
     fireEvent.keyDown(window, { code: 'Space', metaKey: true });
@@ -269,7 +269,7 @@ describe('MicCloudComposer', () => {
     render(
       <>
         <input data-testid="text-field" type="text" />
-        <MicCloudComposer disabled={false} onSubmit={vi.fn()} />
+        <MicComposer disabled={false} onSubmit={vi.fn()} />
       </>
     );
     const input = screen.getByTestId('text-field');
@@ -282,7 +282,7 @@ describe('MicCloudComposer', () => {
     render(
       <>
         <textarea data-testid="ta" />
-        <MicCloudComposer disabled={false} onSubmit={vi.fn()} />
+        <MicComposer disabled={false} onSubmit={vi.fn()} />
       </>
     );
     const ta = screen.getByTestId('ta');
@@ -297,7 +297,7 @@ describe('MicCloudComposer', () => {
         <div data-testid="ce" contentEditable suppressContentEditableWarning>
           x
         </div>
-        <MicCloudComposer disabled={false} onSubmit={vi.fn()} />
+        <MicComposer disabled={false} onSubmit={vi.fn()} />
       </>
     );
     const ce = screen.getByTestId('ce');
@@ -307,14 +307,14 @@ describe('MicCloudComposer', () => {
   });
 
   it('spacebar is a no-op while the composer is disabled', () => {
-    render(<MicCloudComposer disabled={true} onSubmit={vi.fn()} />);
+    render(<MicComposer disabled={true} onSubmit={vi.fn()} />);
     fireEvent.keyDown(window, { code: 'Space' });
     expect(getUserMediaMock).not.toHaveBeenCalled();
   });
 
   it('removes the window keydown listener on unmount', () => {
     const removeSpy = vi.spyOn(window, 'removeEventListener');
-    const { unmount } = render(<MicCloudComposer disabled={false} onSubmit={vi.fn()} />);
+    const { unmount } = render(<MicComposer disabled={false} onSubmit={vi.fn()} />);
     unmount();
     expect(removeSpy).toHaveBeenCalledWith('keydown', expect.any(Function));
     removeSpy.mockRestore();
@@ -334,7 +334,7 @@ describe('MicCloudComposer', () => {
       writable: true,
     });
 
-    render(<MicCloudComposer disabled={false} onSubmit={vi.fn()} showDeviceSelector />);
+    render(<MicComposer disabled={false} onSubmit={vi.fn()} showDeviceSelector />);
 
     await waitFor(() => expect(enumerateDevicesMock).toHaveBeenCalled());
     expect(await screen.findByRole('combobox', { name: /microphone device/i })).toBeInTheDocument();
@@ -355,7 +355,7 @@ describe('MicCloudComposer', () => {
       writable: true,
     });
 
-    render(<MicCloudComposer disabled={false} onSubmit={vi.fn()} />);
+    render(<MicComposer disabled={false} onSubmit={vi.fn()} />);
 
     await waitFor(() => {
       expect(
@@ -375,7 +375,7 @@ describe('MicCloudComposer', () => {
       writable: true,
     });
 
-    render(<MicCloudComposer disabled={false} onSubmit={vi.fn()} showDeviceSelector />);
+    render(<MicComposer disabled={false} onSubmit={vi.fn()} showDeviceSelector />);
 
     const select = await screen.findByRole('combobox', { name: /microphone device/i });
     expect(select).toBeInTheDocument();
@@ -393,7 +393,7 @@ describe('MicCloudComposer', () => {
       writable: true,
     });
 
-    render(<MicCloudComposer disabled={false} onSubmit={vi.fn()} showDeviceSelector />);
+    render(<MicComposer disabled={false} onSubmit={vi.fn()} showDeviceSelector />);
 
     await waitFor(() => expect(screen.queryByRole('combobox')).toBeInTheDocument());
     expect(screen.getByText('Microphone 1')).toBeInTheDocument();
@@ -405,14 +405,14 @@ describe('MicCloudComposer', () => {
       { kind: 'audioinput', deviceId: 'dev1', label: 'Built-in Mic' },
       { kind: 'audioinput', deviceId: 'dev2', label: 'USB Headset' },
     ]);
-    transcribeCloudMock.mockResolvedValueOnce('hello');
+    transcribeWithFactoryMock.mockResolvedValueOnce('hello');
     Object.defineProperty(globalThis.navigator, 'mediaDevices', {
       value: { getUserMedia: getUserMediaMock, enumerateDevices: enumerateDevicesMock },
       configurable: true,
       writable: true,
     });
 
-    render(<MicCloudComposer disabled={false} onSubmit={vi.fn()} showDeviceSelector />);
+    render(<MicComposer disabled={false} onSubmit={vi.fn()} showDeviceSelector />);
 
     // Wait for the selector to appear and pick the second device
     const select = await screen.findByRole('combobox', { name: /microphone device/i });
@@ -439,14 +439,14 @@ describe('MicCloudComposer', () => {
         { kind: 'audioinput', deviceId: 'dev1', label: 'Built-in Mic' },
         { kind: 'audioinput', deviceId: 'dev2', label: 'USB Headset' },
       ]);
-    transcribeCloudMock.mockResolvedValueOnce('ok');
+    transcribeWithFactoryMock.mockResolvedValueOnce('ok');
     Object.defineProperty(globalThis.navigator, 'mediaDevices', {
       value: { getUserMedia: getUserMediaMock, enumerateDevices: enumerateDevicesMock },
       configurable: true,
       writable: true,
     });
 
-    render(<MicCloudComposer disabled={false} onSubmit={vi.fn()} showDeviceSelector />);
+    render(<MicComposer disabled={false} onSubmit={vi.fn()} showDeviceSelector />);
 
     // Mount enumerate ran — labels are blank placeholders
     await waitFor(() => expect(screen.queryByRole('combobox')).toBeInTheDocument());
@@ -471,7 +471,7 @@ describe('MicCloudComposer', () => {
       writable: true,
     });
 
-    render(<MicCloudComposer disabled={false} onSubmit={vi.fn()} showDeviceSelector />);
+    render(<MicComposer disabled={false} onSubmit={vi.fn()} showDeviceSelector />);
 
     await waitFor(() => expect(enumerateDevicesMock).toHaveBeenCalled());
     // Selector requires >1 device; error yields 0 → selector stays hidden
diff --git a/app/src/features/human/MicCloudComposer.tsx b/app/src/features/human/MicComposer.tsx
similarity index 96%
rename from app/src/features/human/MicCloudComposer.tsx
rename to app/src/features/human/MicComposer.tsx
index a0d379535c..71fa26fadd 100644
--- a/app/src/features/human/MicCloudComposer.tsx
+++ b/app/src/features/human/MicComposer.tsx
@@ -1,7 +1,7 @@
 import debug from 'debug';
 import { useEffect, useRef, useState } from 'react';
 
-import { transcribeCloud } from './voice/sttClient';
+import { transcribeWithFactory } from './voice/sttClient';
 import { encodeBlobToWav } from './voice/wavEncoder';
 
 /** Minimal descriptor for an audio input device. */
@@ -29,7 +29,7 @@ function pickRecorderMime(): string {
   return '';
 }
 
-export interface MicCloudComposerProps {
+export interface MicComposerProps {
   /** Disabled while a turn is in flight or the welcome message is pending. */
   disabled: boolean;
   /** Receives the transcribed text — same callback the textarea send uses. */
@@ -48,20 +48,24 @@ type RecordingState = 'idle' | 'recording' | 'transcribing';
 
 /**
  * Tap-to-toggle mic composer for the mascot page. Captures audio via the
- * browser's `MediaRecorder`, hands the resulting Blob to the cloud STT proxy
- * (`openhuman.voice_cloud_transcribe`), then forwards the transcript through
- * `onSubmit` so it joins the agent's normal send pipeline.
+ * browser's `MediaRecorder`, hands the resulting Blob to the factory-
+ * dispatched STT RPC (`openhuman.voice_stt_dispatch`), then forwards the
+ * transcript through `onSubmit` so it joins the agent's normal send pipeline.
+ *
+ * The provider (cloud vs local Whisper) is resolved server-side from
+ * `config.local_ai.stt_provider`, so the renderer doesn't have to know
+ * which backend ran — it only sees `{ text, provider }`.
  *
  * Single button, single decision: tap once to start recording, tap again to
  * stop and send. No textarea — that's the whole point of the mascot tab.
  */
-export function MicCloudComposer({
+export function MicComposer({
   disabled,
   onSubmit,
   onError,
   language = 'en',
   showDeviceSelector = false,
-}: MicCloudComposerProps) {
+}: MicComposerProps) {
   const [state, setState] = useState<RecordingState>('idle');
   const [devices, setDevices] = useState<AudioInputDevice[]>([]);
   const [selectedDeviceId, setSelectedDeviceId] = useState<string>('');
@@ -390,7 +394,7 @@ export function MicCloudComposer({
         blob.type,
         language || 'auto'
       );
-      const text = await transcribeCloud(blob, opts);
+      const text = await transcribeWithFactory(blob, opts);
       composerLog('transcribe ok attempt=native ms=%d', Math.round(Date.now() - startedAt));
       return text;
     } catch (err) {
@@ -403,7 +407,7 @@ export function MicCloudComposer({
         wav.size,
         Math.round(Date.now() - reEncodeStart)
       );
-      const text = await transcribeCloud(wav, opts);
+      const text = await transcribeWithFactory(wav, opts);
       composerLog(
         'transcribe ok attempt=wav-fallback total_ms=%d',
         Math.round(Date.now() - startedAt)
@@ -489,4 +493,4 @@ export function MicCloudComposer({
   );
 }
 
-export default MicCloudComposer;
+export default MicComposer;
diff --git a/app/src/features/human/voice/sttClient.test.ts b/app/src/features/human/voice/sttClient.test.ts
index 7398d0a248..b4d93cc942 100644
--- a/app/src/features/human/voice/sttClient.test.ts
+++ b/app/src/features/human/voice/sttClient.test.ts
@@ -1,7 +1,7 @@
 import { beforeEach, describe, expect, it, vi } from 'vitest';
 
 import { callCoreRpc } from '../../../services/coreRpcClient';
-import { transcribeCloud } from './sttClient';
+import { transcribeCloud, transcribeWithFactory } from './sttClient';
 
 vi.mock('../../../services/coreRpcClient', () => ({ callCoreRpc: vi.fn() }));
 
@@ -110,3 +110,69 @@ describe('transcribeCloud', () => {
     await expect(transcribeCloud(blob)).rejects.toThrow(/upstream STT failed/);
   });
 });
+
+describe('transcribeWithFactory', () => {
+  beforeEach(() => {
+    (callCoreRpc as ReturnType<typeof vi.fn>).mockReset();
+  });
+
+  it('routes through openhuman.voice_stt_dispatch and returns text', async () => {
+    const mock = callCoreRpc as ReturnType<typeof vi.fn>;
+    mock.mockResolvedValueOnce({ text: 'hello via factory', provider: 'cloud' });
+    const blob = new Blob([new Uint8Array([1, 2, 3])], { type: 'audio/webm' });
+
+    const text = await transcribeWithFactory(blob);
+    expect(text).toBe('hello via factory');
+    const call = mock.mock.calls[0][0] as { method: string; params: Record<string, unknown> };
+    expect(call.method).toBe('openhuman.voice_stt_dispatch');
+    expect(call.params.mime_type).toBe('audio/webm');
+    expect(call.params.file_name).toBe('audio.webm');
+    // No provider override unless caller pins one.
+    expect(call.params.provider).toBeUndefined();
+  });
+
+  it('forwards an explicit provider override', async () => {
+    const mock = callCoreRpc as ReturnType<typeof vi.fn>;
+    mock.mockResolvedValueOnce({ text: 'local hi', provider: 'whisper' });
+    const blob = new Blob([new Uint8Array([1])], { type: 'audio/webm' });
+    await transcribeWithFactory(blob, { provider: 'whisper', model: 'whisper-large-v3-turbo' });
+    const params = mock.mock.calls[0][0].params as Record<string, unknown>;
+    expect(params.provider).toBe('whisper');
+    expect(params.model).toBe('whisper-large-v3-turbo');
+  });
+
+  it('rejects empty blobs without hitting the core', async () => {
+    const mock = callCoreRpc as ReturnType<typeof vi.fn>;
+    const blob = new Blob([], { type: 'audio/webm' });
+    await expect(transcribeWithFactory(blob)).rejects.toThrow(/empty/);
+    expect(mock).not.toHaveBeenCalled();
+  });
+
+  it('rewrites stale-sidecar "unknown method" errors', async () => {
+    const mock = callCoreRpc as ReturnType<typeof vi.fn>;
+    mock.mockRejectedValueOnce(new Error('unknown method: openhuman.voice_stt_dispatch'));
+    const blob = new Blob([new Uint8Array([1])], { type: 'audio/webm' });
+    await expect(transcribeWithFactory(blob)).rejects.toThrow(/Restart the OpenHuman desktop app/i);
+  });
+
+  it('passes through non-unknown-method errors verbatim', async () => {
+    const mock = callCoreRpc as ReturnType<typeof vi.fn>;
+    mock.mockRejectedValueOnce(new Error('whisper.cpp failed: model not found'));
+    const blob = new Blob([new Uint8Array([1])], { type: 'audio/webm' });
+    await expect(transcribeWithFactory(blob)).rejects.toThrow(/whisper.cpp failed/);
+  });
+
+  it('trims whitespace off the returned transcript', async () => {
+    const mock = callCoreRpc as ReturnType<typeof vi.fn>;
+    mock.mockResolvedValueOnce({ text: '  padded  ', provider: 'whisper' });
+    const blob = new Blob([new Uint8Array([1])], { type: 'audio/webm' });
+    expect(await transcribeWithFactory(blob)).toBe('padded');
+  });
+
+  it('returns empty string when provider yields no text', async () => {
+    const mock = callCoreRpc as ReturnType<typeof vi.fn>;
+    mock.mockResolvedValueOnce({ provider: 'whisper' });
+    const blob = new Blob([new Uint8Array([1])], { type: 'audio/webm' });
+    expect(await transcribeWithFactory(blob)).toBe('');
+  });
+});
diff --git a/app/src/features/human/voice/sttClient.ts b/app/src/features/human/voice/sttClient.ts
index 06062792b7..1700b067d0 100644
--- a/app/src/features/human/voice/sttClient.ts
+++ b/app/src/features/human/voice/sttClient.ts
@@ -84,6 +84,89 @@ export async function transcribeCloud(
   return text;
 }
 
+export interface FactoryTranscribeOptions {
+  /** BCP-47 language hint, e.g. `'en'`. */
+  language?: string;
+  /** Override the server-side provider resolution (`'cloud'` | `'whisper'`).
+   *  When unset the core reads `config.local_ai.stt_provider`. */
+  provider?: 'cloud' | 'whisper';
+  /** Whisper model id (whisper branch only). */
+  model?: string;
+  /** Defaults derived from the recorded blob. */
+  mimeType?: string;
+  fileName?: string;
+}
+
+export interface FactoryTranscribeResult {
+  text: string;
+  /** Provider that actually ran ('cloud' or 'whisper'). */
+  provider: string;
+}
+
+/**
+ * Factory-dispatched transcription. Hits `openhuman.voice_stt_dispatch`
+ * — the core resolves the provider from config (or `opts.provider` when
+ * the caller forces one). Returns the transcript only; the renderer
+ * surfaces the provider id via debug logs.
+ *
+ * Goes through the same base64 encoding path as `transcribeCloud` so the
+ * MicComposer can swap implementations without re-tooling the recorder.
+ */
+export async function transcribeWithFactory(
+  blob: Blob,
+  opts: FactoryTranscribeOptions = {}
+): Promise<string> {
+  if (!blob || blob.size === 0) {
+    throw new Error('audio blob is empty');
+  }
+  const encodeStart = Date.now();
+  const audio_base64 = await blobToBase64(blob);
+  const encodeMs = Math.round(Date.now() - encodeStart);
+
+  const params: Record<string, unknown> = { audio_base64 };
+  const mime = (opts.mimeType ?? blob.type ?? 'audio/webm').split(';')[0].trim() || 'audio/webm';
+  params.mime_type = mime;
+  params.file_name = opts.fileName ?? `audio.${guessExtension(mime)}`;
+  if (opts.provider) params.provider = opts.provider;
+  if (opts.model) params.model = opts.model;
+  if (opts.language) params.language = opts.language;
+
+  sttLog(
+    '[voice-stt] transcribe-factory bytes=%d mime=%s provider=%s base64_ms=%d',
+    blob.size,
+    mime,
+    opts.provider ?? '<config>',
+    encodeMs
+  );
+
+  const rpcStart = Date.now();
+  let result: FactoryTranscribeResult;
+  try {
+    result = await callCoreRpc<FactoryTranscribeResult>({
+      method: 'openhuman.voice_stt_dispatch',
+      params,
+    });
+  } catch (err) {
+    const msg = err instanceof Error ? err.message : String(err);
+    if (msg.includes('unknown method')) {
+      sttLog('[voice-stt] dispatch stale-sidecar path: %s', msg);
+      throw new Error(
+        'Voice transcription is unavailable in this build. Restart the OpenHuman desktop app to pick up the latest core sidecar.'
+      );
+    }
+    sttLog('[voice-stt] dispatch failed (passthrough): %O', err);
+    throw err;
+  }
+  const text = result?.text?.trim() ?? '';
+  sttLog(
+    '[voice-stt] transcribed provider=%s chars=%d rpc_ms=%d',
+    result?.provider ?? '<unknown>',
+    text.length,
+    Math.round(Date.now() - rpcStart)
+  );
+  return text;
+}
+
 async function blobToBase64(blob: Blob): Promise<string> {
   const buf = await blob.arrayBuffer();
   const bytes = new Uint8Array(buf);
diff --git a/app/src/pages/Conversations.tsx b/app/src/pages/Conversations.tsx
index f73a559e6a..68e6178c7d 100644
--- a/app/src/pages/Conversations.tsx
+++ b/app/src/pages/Conversations.tsx
@@ -10,7 +10,7 @@ import PillTabBar from '../components/PillTabBar';
 import UpsellBanner from '../components/upsell/UpsellBanner';
 import { dismissBanner, shouldShowBanner } from '../components/upsell/upsellDismissState';
 import UsageLimitModal from '../components/upsell/UsageLimitModal';
-import MicCloudComposer from '../features/human/MicCloudComposer';
+import MicComposer from '../features/human/MicComposer';
 // [#1123] Commented out — welcome-agent onboarding replaced by Joyride walkthrough
 // import { ONBOARDING_WELCOME_THREAD_LABEL } from '../constants/onboardingChat';
 import { useStickToBottom } from '../hooks/useStickToBottom';
@@ -1648,11 +1648,16 @@ const Conversations = ({ variant = 'page', composer = 'text' }: ConversationsPro
               </p>
               <div className="flex items-center gap-2 flex-shrink-0 ml-2">
                 {(sendError.code === 'stt_not_ready' ||
-                  sendError.code === 'voice_transcription') && (
+                  sendError.code === 'voice_transcription' ||
+                  sendError.code === 'tts_not_ready' ||
+                  sendError.code === 'voice_synthesis') && (
                   <button
                     onClick={() => {
                       setSendError(null);
-                      navigate('/settings/local-model');
+                      // STT/TTS provider settings live on the Voice panel
+                      // since PR 2; the legacy local-model route was for
+                      // back when speech assets were lumped with Ollama.
+                      navigate('/settings/voice');
                     }}
                     className="text-xs text-primary-500 hover:text-primary-600 font-medium transition-colors">
                     Set up
@@ -1668,7 +1673,7 @@ const Conversations = ({ variant = 'page', composer = 'text' }: ConversationsPro
           )}
 
           {composer === 'mic-cloud' ? (
-            <MicCloudComposer
+            <MicComposer
               // Without `!selectedThreadId`, a mic submit before a thread is
               // ready hits `handleSendMessage`'s early return and the
               // transcript is silently dropped — the user spoke into the void.
diff --git a/app/src/pages/Settings.tsx b/app/src/pages/Settings.tsx
index 6a56df5fbb..2aa1e0b27c 100644
--- a/app/src/pages/Settings.tsx
+++ b/app/src/pages/Settings.tsx
@@ -212,6 +212,23 @@ const aiModelsSettingsItems = [
       </svg>
     ),
   },
+  {
+    id: 'voice',
+    title: 'Voice (STT & TTS)',
+    description:
+      'Choose between cloud and local providers for speech-to-text (Whisper) and text-to-speech (Piper)',
+    route: 'voice',
+    icon: (
+      <svg className="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
+        <path
+          strokeLinecap="round"
+          strokeLinejoin="round"
+          strokeWidth={2}
+          d="M19 11a7 7 0 01-7 7m0 0a7 7 0 01-7-7m7 7v4m0 0H8m4 0h4m-4-8a3 3 0 01-3-3V5a3 3 0 116 0v6a3 3 0 01-3 3z"
+        />
+      </svg>
+    ),
+  },
 ];
 
 const WrappedSettingsPage = ({ children }: { children: ReactNode }) => {
diff --git a/app/src/services/api/__tests__/voiceInstallApi.test.ts b/app/src/services/api/__tests__/voiceInstallApi.test.ts
new file mode 100644
index 0000000000..ce7f51c02a
--- /dev/null
+++ b/app/src/services/api/__tests__/voiceInstallApi.test.ts
@@ -0,0 +1,116 @@
+import { beforeEach, describe, expect, it, vi } from 'vitest';
+
+import {
+  installPiper,
+  installWhisper,
+  piperInstallStatus,
+  type VoiceInstallStatus,
+  whisperInstallStatus,
+} from '../voiceInstallApi';
+
+vi.mock('../../coreRpcClient', () => ({ callCoreRpc: vi.fn() }));
+
+const buildStatus = (overrides: Partial<VoiceInstallStatus> = {}): VoiceInstallStatus => ({
+  engine: 'whisper',
+  state: 'installed',
+  progress: 100,
+  downloaded_bytes: null,
+  total_bytes: null,
+  stage: null,
+  error_detail: null,
+  ...overrides,
+});
+
+describe('voiceInstallApi', () => {
+  beforeEach(async () => {
+    const { callCoreRpc } = await import('../../coreRpcClient');
+    vi.mocked(callCoreRpc).mockReset();
+  });
+
+  describe('installWhisper', () => {
+    it('passes model_size and force flags through to the RPC', async () => {
+      const { callCoreRpc } = await import('../../coreRpcClient');
+      vi.mocked(callCoreRpc).mockResolvedValueOnce(buildStatus({ engine: 'whisper' }));
+      const result = await installWhisper({ modelSize: 'tiny', force: true });
+      expect(callCoreRpc).toHaveBeenCalledWith({
+        method: 'openhuman.local_ai_install_whisper',
+        params: { model_size: 'tiny', force: true },
+      });
+      expect(result.engine).toBe('whisper');
+      expect(result.state).toBe('installed');
+    });
+
+    it('omits undefined params and lets the core apply defaults', async () => {
+      const { callCoreRpc } = await import('../../coreRpcClient');
+      vi.mocked(callCoreRpc).mockResolvedValueOnce(buildStatus());
+      await installWhisper();
+      expect(callCoreRpc).toHaveBeenCalledWith({
+        method: 'openhuman.local_ai_install_whisper',
+        params: { model_size: undefined, force: undefined },
+      });
+    });
+
+    it('propagates a thrown RPC error so the UI can surface it', async () => {
+      const { callCoreRpc } = await import('../../coreRpcClient');
+      vi.mocked(callCoreRpc).mockRejectedValueOnce(new Error('boom'));
+      await expect(installWhisper({ modelSize: 'tiny' })).rejects.toThrow('boom');
+    });
+  });
+
+  describe('installPiper', () => {
+    it('passes voice_id and force flags through to the RPC', async () => {
+      const { callCoreRpc } = await import('../../coreRpcClient');
+      vi.mocked(callCoreRpc).mockResolvedValueOnce(
+        buildStatus({ engine: 'piper', state: 'installing', progress: 25 })
+      );
+      const result = await installPiper({ voiceId: 'en_US-lessac-medium', force: false });
+      expect(callCoreRpc).toHaveBeenCalledWith({
+        method: 'openhuman.local_ai_install_piper',
+        params: { voice_id: 'en_US-lessac-medium', force: false },
+      });
+      expect(result.state).toBe('installing');
+      expect(result.progress).toBe(25);
+    });
+
+    it('omits undefined params and lets the core apply defaults', async () => {
+      const { callCoreRpc } = await import('../../coreRpcClient');
+      vi.mocked(callCoreRpc).mockResolvedValueOnce(buildStatus({ engine: 'piper' }));
+      await installPiper();
+      expect(callCoreRpc).toHaveBeenCalledWith({
+        method: 'openhuman.local_ai_install_piper',
+        params: { voice_id: undefined, force: undefined },
+      });
+    });
+  });
+
+  describe('whisperInstallStatus', () => {
+    it('calls the status RPC with empty params', async () => {
+      const { callCoreRpc } = await import('../../coreRpcClient');
+      vi.mocked(callCoreRpc).mockResolvedValueOnce(
+        buildStatus({ engine: 'whisper', state: 'missing', progress: null })
+      );
+      const result = await whisperInstallStatus();
+      expect(callCoreRpc).toHaveBeenCalledWith({
+        method: 'openhuman.local_ai_whisper_install_status',
+        params: {},
+      });
+      expect(result.state).toBe('missing');
+    });
+  });
+
+  describe('piperInstallStatus', () => {
+    it('calls the status RPC with empty params', async () => {
+      const { callCoreRpc } = await import('../../coreRpcClient');
+      vi.mocked(callCoreRpc).mockResolvedValueOnce(
+        buildStatus({ engine: 'piper', state: 'error', error_detail: 'network down' })
+      );
+      const result = await piperInstallStatus();
+      expect(callCoreRpc).toHaveBeenCalledWith({
+        method: 'openhuman.local_ai_piper_install_status',
+        params: {},
+      });
+      expect(result.state).toBe('error');
+      expect(result.error_detail).toBe('network down');
+    });
+  });
+});
diff --git a/app/src/services/api/voiceInstallApi.ts b/app/src/services/api/voiceInstallApi.ts
new file mode 100644
index 0000000000..13c8de4d15
--- /dev/null
+++ b/app/src/services/api/voiceInstallApi.ts
@@ -0,0 +1,113 @@
+/**
+ * Voice engine installer API — wraps the four new `local_ai.*` RPCs that
+ * orchestrate downloads of the Whisper GGML model + binary and the Piper
+ * binary + bundled voice into the workspace.
+ *
+ * The renderer never touches HTTP URLs directly; everything funnels
+ * through the Rust core where streaming + atomic rename + SHA validation
+ * lives. From the UI's point of view a button click translates to a
+ * single RPC kick-off plus a polled status RPC for progress.
+ */
+import debug from 'debug';
+
+import { callCoreRpc } from '../coreRpcClient';
+
+const log = debug('voiceInstallApi');
+
+/**
+ * Stable wire shape of [`crate::openhuman::local_ai::voice_install_common::VoiceInstallState`].
+ *
+ * The Rust enum serializes via `#[serde(rename_all = "snake_case")]` so
+ * the TypeScript union mirrors the lowercase variants exactly.
+ */
+export type VoiceInstallState = 'missing' | 'installing' | 'installed' | 'broken' | 'error';
+
+/**
+ * Mirrors `VoiceInstallStatus` on the Rust side. The shape is identical
+ * for both `whisper` and `piper` so the renderer can share components.
+ */
+export interface VoiceInstallStatus {
+  /** `"whisper"` or `"piper"`. */
+  engine: string;
+  /** Current state — drives the button label / spinner. */
+  state: VoiceInstallState;
+  /** 0–100 percent, populated while `state === 'installing'`. */
+  progress: number | null;
+  /** Bytes received so far across the current download stage. */
+  downloaded_bytes: number | null;
+  /** Total bytes expected (may be null for chunked transfer encoding). */
+  total_bytes: number | null;
+  /** Free-text status line — e.g. "downloading model (ggml-tiny.bin)". */
+  stage: string | null;
+  /** Populated when `state === 'error'`. */
+  error_detail: string | null;
+}
+
+export interface InstallWhisperParams {
+  /** Whisper model size — `tiny | base | small | medium | large-v3-turbo`. */
+  modelSize?: string;
+  /** When true, blow away the existing model and re-download. */
+  force?: boolean;
+}
+
+export interface InstallPiperParams {
+  /** Piper voice id (e.g. `en_US-lessac-medium`). */
+  voiceId?: string;
+  /** When true, blow away the existing voice files and re-download. */
+  force?: boolean;
+}
+
+/**
+ * Kick off (or re-kick) a Whisper install. Resolves with the post-install
+ * status snapshot — the renderer should also poll `whisperInstallStatus`
+ * during the in-flight phase to update progress.
+ */
+export async function installWhisper(
+  params: InstallWhisperParams = {}
+): Promise<VoiceInstallStatus> {
+  log('[voice-install:whisper] kick-off %o', params);
+  const result = await callCoreRpc<VoiceInstallStatus>({
+    method: 'openhuman.local_ai_install_whisper',
+    params: { model_size: params.modelSize, force: params.force },
+  });
+  log('[voice-install:whisper] result state=%s stage=%s', result.state, result.stage ?? '<none>');
+  return result;
+}
+
+/**
+ * Kick off (or re-kick) a Piper install. See `installWhisper` for the
+ * mental model — same wire shape, different engine slot.
+ */
+export async function installPiper(params: InstallPiperParams = {}): Promise<VoiceInstallStatus> {
+  log('[voice-install:piper] kick-off %o', params);
+  const result = await callCoreRpc<VoiceInstallStatus>({
+    method: 'openhuman.local_ai_install_piper',
+    params: { voice_id: params.voiceId, force: params.force },
+  });
+  log('[voice-install:piper] result state=%s stage=%s', result.state, result.stage ?? '<none>');
+  return result;
+}
+
+/**
+ * Query the current Whisper installer state. Safe to call repeatedly —
+ * the core returns from an in-memory status table without touching disk
+ * unless the table is empty (first read after a process restart), in
+ * which case it falls back to a one-shot on-disk artifact check.
+ */
+export async function whisperInstallStatus(): Promise<VoiceInstallStatus> {
+  return await callCoreRpc<VoiceInstallStatus>({
+    method: 'openhuman.local_ai_whisper_install_status',
+    params: {},
+  });
+}
+
+/**
+ * Query the current Piper installer state. Same contract as
+ * `whisperInstallStatus`.
+ */
+export async function piperInstallStatus(): Promise<VoiceInstallStatus> {
+  return await callCoreRpc<VoiceInstallStatus>({
+    method: 'openhuman.local_ai_piper_install_status',
+    params: {},
+  });
+}
diff --git a/app/src/utils/tauriCommands/voice.ts b/app/src/utils/tauriCommands/voice.ts
index 92e7fe9812..db2fd2348a 100644
--- a/app/src/utils/tauriCommands/voice.ts
+++ b/app/src/utils/tauriCommands/voice.ts
@@ -33,6 +33,10 @@ export interface VoiceStatus {
   whisper_in_process: boolean;
   /** Whether LLM post-processing is enabled for transcription cleanup. */
   llm_cleanup_enabled: boolean;
+  /** Currently selected STT provider ('cloud' or 'whisper'). */
+  stt_provider: string;
+  /** Currently selected TTS provider ('cloud' or 'piper'). */
+  tts_provider: string;
 }
 
 export interface VoiceServerStatus {
@@ -109,6 +113,34 @@ export async function openhumanUpdateVoiceServerSettings(update: {
   });
 }
 
+export interface VoiceProvidersUpdate {
+  stt_provider?: 'cloud' | 'whisper';
+  tts_provider?: 'cloud' | 'piper';
+  stt_model?: string;
+  tts_voice?: string;
+}
+
+export interface VoiceProvidersSnapshot {
+  stt_provider: string;
+  tts_provider: string;
+  stt_model_id: string;
+  tts_voice_id: string;
+}
+
+/**
+ * Persist the STT / TTS provider selection. Maps to the
+ * `openhuman.voice_set_providers` RPC, which validates each value against
+ * the supported provider list and rejects unknown ids server-side.
+ */
+export async function openhumanVoiceSetProviders(
+  update: VoiceProvidersUpdate
+): Promise<VoiceProvidersSnapshot> {
+  return await callCoreRpc<VoiceProvidersSnapshot>({
+    method: 'openhuman.voice_set_providers',
+    params: update,
+  });
+}
+
 export async function openhumanVoiceTranscribe(
   audioPath: string,
   context?: string,
diff --git a/src/openhuman/about_app/catalog.rs b/src/openhuman/about_app/catalog.rs
index 3d84ef2b33..de75213fff 100644
--- a/src/openhuman/about_app/catalog.rs
+++ b/src/openhuman/about_app/catalog.rs
@@ -454,21 +454,27 @@ const CAPABILITIES: &[Capability] = &[
     },
     Capability {
         id: "local_ai.speech_to_text",
-        name: "Speech Recognition",
+        name: "Speech Recognition (Local)",
         domain: "local_ai",
         category: CapabilityCategory::LocalAI,
-        description: "Transcribe audio into text using local speech recognition.",
-        how_to: "Settings > Local AI Model > Advanced > Test Voice Input",
+        description:
+            "Transcribe audio into text using local whisper.cpp via the voice STT factory. \
+             Pick the model size (tiny / base / small / medium / large-v3-turbo) in \
+             Settings > Voice; the factory routes through WHISPER_BIN or the in-process engine.",
+        how_to: "Settings > Voice > STT Provider = Whisper",
         status: CapabilityStatus::Beta,
         privacy: None,
     },
     Capability {
         id: "local_ai.text_to_speech",
-        name: "Text to Speech",
+        name: "Text to Speech (Local)",
         domain: "local_ai",
         category: CapabilityCategory::LocalAI,
-        description: "Synthesize speech from text using local voice models.",
-        how_to: "Settings > Local AI Model > Advanced > Test Voice Output",
+        description:
+            "Synthesize speech locally with Piper via the voice TTS factory. PIPER_BIN points \
+             at the binary; the voice .onnx ships with the installer. Returns a synthetic \
+             viseme timeline (full forced-alignment lives behind the cloud provider for now).",
+        how_to: "Settings > Voice > TTS Provider = Piper",
         status: CapabilityStatus::Beta,
         privacy: None,
     },
@@ -492,6 +498,34 @@ const CAPABILITIES: &[Capability] = &[
         status: CapabilityStatus::Beta,
         privacy: None,
     },
+    Capability {
+        id: "local_ai.whisper_installer",
+        name: "Whisper Installer (Local STT)",
+        domain: "local_ai",
+        category: CapabilityCategory::LocalAI,
+        description:
+            "One-click download of the whisper.cpp GGML model (and on Windows the whisper-cli \
+             binary) into the workspace so local Speech-to-Text runs without manual setup. \
+             Streams to disk via a .part file + atomic rename so a crash never leaves a corrupt \
+             model behind.",
+        how_to: "Settings > Voice > Voice Providers > Install Whisper",
+        status: CapabilityStatus::Beta,
+        privacy: MODEL_DOWNLOAD,
+    },
+    Capability {
+        id: "local_ai.piper_installer",
+        name: "Piper Installer (Local TTS)",
+        domain: "local_ai",
+        category: CapabilityCategory::LocalAI,
+        description:
+            "One-click download of the Piper binary archive and the bundled en_US-lessac-medium \
+             voice (.onnx + .onnx.json) into the workspace so local Text-to-Speech runs without \
+             manual setup. Atomic rename guarantees no half-written voice files are ever read \
+             by the runtime.",
+        how_to: "Settings > Voice > Voice Providers > Install Piper",
+        status: CapabilityStatus::Beta,
+        privacy: MODEL_DOWNLOAD,
+    },
     Capability {
         id: "team.create",
         name: "Create Teams",
diff --git a/src/openhuman/config/schema/local_ai.rs b/src/openhuman/config/schema/local_ai.rs
index 5af23324ed..35344a8c45 100644
--- a/src/openhuman/config/schema/local_ai.rs
+++ b/src/openhuman/config/schema/local_ai.rs
@@ -65,8 +65,18 @@ pub struct LocalAiConfig {
     pub stt_model_id: String,
     #[serde(default = "default_stt_download_url")]
     pub stt_download_url: Option<String>,
+    /// Voice STT provider selector. `"cloud"` (default) routes through the
+    /// backend Whisper proxy; `"whisper"` runs local whisper.cpp via the
+    /// `WHISPER_BIN` env var. Surfaced in Settings → Voice.
+    #[serde(default = "default_stt_provider")]
+    pub stt_provider: String,
     #[serde(default = "default_tts_voice_id")]
     pub tts_voice_id: String,
+    /// Voice TTS provider selector. `"cloud"` (default) routes through the
+    /// backend ElevenLabs proxy and returns rich visemes; `"piper"` runs
+    /// local Piper via the `PIPER_BIN` env var.
+    #[serde(default = "default_tts_provider")]
+    pub tts_provider: String,
     #[serde(default = "default_tts_download_url")]
     pub tts_download_url: Option<String>,
     #[serde(default = "default_tts_config_download_url")]
@@ -146,6 +156,14 @@ fn default_tts_voice_id() -> String {
     "en_US-lessac-medium".to_string()
 }
 
+fn default_stt_provider() -> String {
+    "cloud".to_string()
+}
+
+fn default_tts_provider() -> String {
+    "cloud".to_string()
+}
+
 fn default_stt_download_url() -> Option<String> {
     Some(
         "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base-q5_1.bin?download=true"
@@ -244,7 +262,9 @@ impl Default for LocalAiConfig {
             embedding_model_id: default_embedding_model_id(),
             stt_model_id: default_stt_model_id(),
             stt_download_url: default_stt_download_url(),
+            stt_provider: default_stt_provider(),
             tts_voice_id: default_tts_voice_id(),
+            tts_provider: default_tts_provider(),
             tts_download_url: default_tts_download_url(),
             tts_config_download_url: default_tts_config_download_url(),
             quantization: default_quantization(),
diff --git a/src/openhuman/local_ai/install_piper.rs b/src/openhuman/local_ai/install_piper.rs
new file mode 100644
index 0000000000..c04ad6d1ea
--- /dev/null
+++ b/src/openhuman/local_ai/install_piper.rs
@@ -0,0 +1,656 @@
+//! Piper installer — downloads the platform-specific Piper binary
+//! archive and the bundled `en_US-lessac-medium` voice (`.onnx` +
+//! `.onnx.json` sidecar) into the workspace.
+//!
+//! Voice IDs other than the bundled default are intentionally out of
+//! scope; the VoicePanel exposes a free-text `tts_voice_id` input so
+//! advanced users can manually drop in additional `.onnx` files alongside
+//! the bundled one (see Voice TTS factory docs).
+
+use std::io::Read;
+use std::path::PathBuf;
+
+use crate::openhuman::config::Config;
+
+use super::paths;
+use super::voice_install_common::{
+    download_to_file, read_status, write_status, VoiceInstallState, VoiceInstallStatus,
+    ENGINE_PIPER,
+};
+
+const LOG_PREFIX: &str = "[voice-install:piper]";
+
+/// Default voice id shipped with the installer. Matches
+/// [`crate::openhuman::voice::factory::DEFAULT_PIPER_VOICE`].
+pub const DEFAULT_PIPER_VOICE: &str = "en_US-lessac-medium";
+
+/// Minimum bytes for the Piper release archive. The smallest historical
+/// build is ~7 MB; below 1 MB is almost certainly an error response.
+const MIN_BINARY_ARCHIVE_BYTES: u64 = 1024 * 1024;
+
+/// Minimum bytes for the voice `.onnx` model. `en_US-lessac-medium.onnx`
+/// is ~60 MB; allow some slack for CDN compression differences.
+const MIN_VOICE_BYTES: u64 = 30 * 1024 * 1024;
+
+/// Minimum bytes for the `.onnx.json` sidecar. The file is human-readable
+/// JSON, typically a few KB; anything below 256 bytes is almost certainly
+/// a 404 HTML response masquerading as JSON.
+const MIN_VOICE_JSON_BYTES: u64 = 256;
+
+/// Result of resolving the Piper binary archive URL for the host OS.
+struct BinaryAsset {
+    url: String,
+    /// Archive shape — drives the extraction strategy.
+    kind: ArchiveKind,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+enum ArchiveKind {
+    Zip,
+    TarGz,
+}
+
+/// Per-OS Piper release asset URL. The Piper project publishes one
+/// archive per OS/architecture under the `latest` release alias. Names
+/// have been stable across recent releases.
+fn binary_download_asset() -> Option<BinaryAsset> {
+    let base = "https://github.com/rhasspy/piper/releases/latest/download";
+    if cfg!(target_os = "windows") {
+        return Some(BinaryAsset {
+            url: format!("{base}/piper_windows_amd64.zip"),
+            kind: ArchiveKind::Zip,
+        });
+    }
+    if cfg!(target_os = "macos") {
+        // Two assets exist (`piper_macos_x64.tar.gz` and
+        // `piper_macos_aarch64.tar.gz`). Pick based on the host arch.
+        let arch = std::env::consts::ARCH;
+        let suffix = match arch {
+            "aarch64" | "arm64" => "macos_aarch64",
+            _ => "macos_x64",
+        };
+        return Some(BinaryAsset {
+            url: format!("{base}/piper_{suffix}.tar.gz"),
+            kind: ArchiveKind::TarGz,
+        });
+    }
+    if cfg!(target_os = "linux") {
+        let arch = std::env::consts::ARCH;
+        let suffix = match arch {
+            "aarch64" | "arm64" => "linux_aarch64",
+            "armv7" | "arm" => "linux_armv7",
+            _ => "linux_x86_64",
+        };
+        return Some(BinaryAsset {
+            url: format!("{base}/piper_{suffix}.tar.gz"),
+            kind: ArchiveKind::TarGz,
+        });
+    }
+    None
+}
+
+/// Voice file URLs on HuggingFace. Returns `(onnx_url, onnx_json_url)`.
+fn voice_download_urls(voice_id: &str) -> (String, String) {
+    // The Piper voices repo uses the structure:
+    //   en/en_US/lessac/medium/en_US-lessac-medium.onnx
+    //   en/en_US/lessac/medium/en_US-lessac-medium.onnx.json
+    // We only support the bundled default — multi-voice support is
+    // tracked separately. The path components mirror the voice id.
+    let (lang_short, locale, name, quality) = decode_voice_id(voice_id);
+    let base = format!(
+        "https://huggingface.co/rhasspy/piper-voices/resolve/main/{lang_short}/{locale}/{name}/{quality}"
+    );
+    let stem = format!("{locale}-{name}-{quality}");
+    (
+        format!("{base}/{stem}.onnx"),
+        format!("{base}/{stem}.onnx.json"),
+    )
+}
+
+/// Decompose `en_US-lessac-medium` into its repo-path pieces.
+///
+/// Returns `(short_lang, locale, voice_name, quality)`.
+fn decode_voice_id(voice_id: &str) -> (String, String, String, String) {
+    // Fall back to the bundled default if the id is malformed — the
+    // installer should never panic on user-typed input.
+    let trimmed = voice_id.trim();
+    let id = if trimmed.is_empty() {
+        DEFAULT_PIPER_VOICE
+    } else {
+        trimmed
+    };
+    let parts: Vec<&str> = id.split('-').collect();
+    if parts.len() < 3 {
+        // Reuse the default decomposition on any malformed input so the
+        // download URL is still well-formed (the install will fail at
+        // size validation if the file doesn't exist upstream).
+        return (
+            "en".to_string(),
+            "en_US".to_string(),
+            "lessac".to_string(),
+            "medium".to_string(),
+        );
+    }
+    let locale = parts[0].to_string();
+    let name = parts[1].to_string();
+    let quality = parts[2..].join("-");
+    let short_lang = locale.split('_').next().unwrap_or("en").to_string();
+    (short_lang, locale, name, quality)
+}
+
+/// Convenience: read the current installer status snapshot, falling back
+/// to "installed" when on-disk artifacts pass validation.
+pub fn status(config: &Config) -> VoiceInstallStatus {
+    let mut snapshot = read_status(ENGINE_PIPER);
+    let configured_voice = crate::openhuman::local_ai::model_ids::effective_tts_voice_id(config);
+    let configured_voice = configured_voice.trim_end_matches(".onnx").to_string();
+    if matches!(snapshot.state, VoiceInstallState::Missing)
+        && installed_artifacts_ok(config, &configured_voice)
+    {
+        snapshot.state = VoiceInstallState::Installed;
+        snapshot.stage = Some("binary and voice present".to_string());
+    }
+    snapshot
+}
+
+fn installed_artifacts_ok(config: &Config, voice_id: &str) -> bool {
+    // Check the SPECIFIC requested voice, not the hard-coded default.
+    // Without this, switching voice via the dropdown would short-circuit
+    // with "already installed" and never fetch the new `.onnx`.
+    let voice_ok = paths::workspace_piper_voice_paths(config, voice_id)
+        .map(|(onnx, json)| {
+            let onnx_ok = std::fs::metadata(&onnx)
+                .map(|m| m.is_file() && m.len() >= MIN_VOICE_BYTES)
+                .unwrap_or(false);
+            let json_ok = std::fs::metadata(&json)
+                .map(|m| m.is_file() && m.len() >= MIN_VOICE_JSON_BYTES)
+                .unwrap_or(false);
+            log::debug!(
+                "{LOG_PREFIX} install check onnx={} onnx_ok={} json={} json_ok={}",
+                onnx.display(),
+                onnx_ok,
+                json.display(),
+                json_ok
+            );
+            onnx_ok && json_ok
+        })
+        .unwrap_or(false);
+    let binary_ok = paths::workspace_piper_binary_candidates(config)
+        .iter()
+        .any(|p| p.is_file());
+    log::debug!(
+        "{LOG_PREFIX} install check binary_ok={} voice_ok={}",
+        binary_ok,
+        voice_ok
+    );
+    binary_ok && voice_ok
+}
+
+/// Kick off (or re-kick) a Piper install. `force_reinstall = true`
+/// removes any existing voice file first; otherwise an already-installed
+/// engine returns immediately with a no-op success.
+pub async fn install_piper(
+    config: &Config,
+    voice_id: Option<String>,
+    force_reinstall: bool,
+) -> Result<VoiceInstallStatus, String> {
+    let voice = voice_id
+        .as_deref()
+        .map(str::trim)
+        .filter(|s| !s.is_empty())
+        .unwrap_or(DEFAULT_PIPER_VOICE)
+        .to_string();
+    log::debug!("{LOG_PREFIX} install requested voice={voice} force_reinstall={force_reinstall}");
+
+    if !force_reinstall && installed_artifacts_ok(config, &voice) {
+        log::debug!("{LOG_PREFIX} short-circuit: artifacts already present");
+        let snapshot = VoiceInstallStatus {
+            engine: ENGINE_PIPER.to_string(),
+            state: VoiceInstallState::Installed,
+            progress: Some(100),
+            downloaded_bytes: None,
+            total_bytes: None,
+            stage: Some("already installed".to_string()),
+            error_detail: None,
+        };
+        write_status(snapshot.clone());
+        return Ok(snapshot);
+    }
+
+    write_status(VoiceInstallStatus {
+        engine: ENGINE_PIPER.to_string(),
+        state: VoiceInstallState::Installing,
+        progress: Some(0),
+        downloaded_bytes: Some(0),
+        total_bytes: None,
+        stage: Some(format!("starting piper install ({voice})")),
+        error_detail: None,
+    });
+
+    let result = run_install(config, &voice).await;
+    match &result {
+        Ok(()) => {
+            let snapshot = VoiceInstallStatus {
+                engine: ENGINE_PIPER.to_string(),
+                state: VoiceInstallState::Installed,
+                progress: Some(100),
+                downloaded_bytes: None,
+                total_bytes: None,
+                stage: Some("install complete".to_string()),
+                error_detail: None,
+            };
+            write_status(snapshot.clone());
+            Ok(snapshot)
+        }
+        Err(msg) => {
+            let snapshot = VoiceInstallStatus {
+                engine: ENGINE_PIPER.to_string(),
+                state: VoiceInstallState::Error,
+                progress: None,
+                downloaded_bytes: None,
+                total_bytes: None,
+                stage: None,
+                error_detail: Some(msg.clone()),
+            };
+            write_status(snapshot.clone());
+            Err(msg.clone())
+        }
+    }
+}
+
+async fn run_install(config: &Config, voice: &str) -> Result<(), String> {
+    // 1) Voice files: `.onnx` (heavy) + `.onnx.json` (small sidecar).
+    let (onnx_url, json_url) = voice_download_urls(voice);
+    let (onnx_path, json_path) = paths::workspace_piper_voice_paths(config, voice)
+        .ok_or_else(|| format!("{LOG_PREFIX} could not resolve voice paths for '{voice}'"))?;
+
+    log::debug!("{LOG_PREFIX} downloading voice url={onnx_url}");
+    update_stage(format!("downloading {voice}.onnx"));
+    download_to_file(
+        &onnx_url,
+        &onnx_path,
+        None,
+        MIN_VOICE_BYTES,
+        LOG_PREFIX,
+        |downloaded, total| {
+            let progress = total
+                .filter(|t| *t > 0)
+                .map(|t| ((downloaded * 100) / t).min(100) as u8);
+            write_status(VoiceInstallStatus {
+                engine: ENGINE_PIPER.to_string(),
+                state: VoiceInstallState::Installing,
+                progress,
+                downloaded_bytes: Some(downloaded),
+                total_bytes: total,
+                stage: Some("downloading voice (.onnx)".to_string()),
+                error_detail: None,
+            });
+        },
+    )
+    .await?;
+    log::debug!("{LOG_PREFIX} voice .onnx staged at {}", onnx_path.display());
+
+    log::debug!("{LOG_PREFIX} downloading voice json url={json_url}");
+    update_stage(format!("downloading {voice}.onnx.json"));
+    download_to_file(
+        &json_url,
+        &json_path,
+        None,
+        MIN_VOICE_JSON_BYTES,
+        LOG_PREFIX,
+        |downloaded, total| {
+            let progress = total
+                .filter(|t| *t > 0)
+                .map(|t| ((downloaded * 100) / t).min(100) as u8);
+            write_status(VoiceInstallStatus {
+                engine: ENGINE_PIPER.to_string(),
+                state: VoiceInstallState::Installing,
+                progress,
+                downloaded_bytes: Some(downloaded),
+                total_bytes: total,
+                stage: Some("downloading voice (.onnx.json)".to_string()),
+                error_detail: None,
+            });
+        },
+    )
+    .await?;
+
+    // 2) Binary archive.
+    let asset = binary_download_asset()
+        .ok_or_else(|| format!("{LOG_PREFIX} no piper binary release for this OS/arch"))?;
+    let archive_name = asset
+        .url
+        .rsplit('/')
+        .next()
+        .unwrap_or("piper_archive")
+        .to_string();
+    let archive_path = paths::workspace_piper_dir(config).join(&archive_name);
+    log::debug!("{LOG_PREFIX} downloading binary url={}", asset.url);
+    update_stage("downloading piper binary".to_string());
+    download_to_file(
+        &asset.url,
+        &archive_path,
+        None,
+        MIN_BINARY_ARCHIVE_BYTES,
+        LOG_PREFIX,
+        |downloaded, total| {
+            let progress = total
+                .filter(|t| *t > 0)
+                .map(|t| ((downloaded * 100) / t).min(100) as u8);
+            write_status(VoiceInstallStatus {
+                engine: ENGINE_PIPER.to_string(),
+                state: VoiceInstallState::Installing,
+                progress,
+                downloaded_bytes: Some(downloaded),
+                total_bytes: total,
+                stage: Some("downloading binary".to_string()),
+                error_detail: None,
+            });
+        },
+    )
+    .await?;
+    update_stage("extracting piper binary".to_string());
+    let dest = paths::workspace_piper_dir(config);
+    match asset.kind {
+        ArchiveKind::Zip => extract_zip(&archive_path, &dest)?,
+        ArchiveKind::TarGz => extract_tar_gz(&archive_path, &dest)?,
+    }
+    let _ = std::fs::remove_file(&archive_path);
+
+    Ok(())
+}
+
+fn update_stage(stage: String) {
+    let mut current = read_status(ENGINE_PIPER);
+    current.stage = Some(stage);
+    write_status(current);
+}
+
+fn extract_zip(zip_path: &std::path::Path, dest_dir: &std::path::Path) -> Result<(), String> {
+    log::debug!(
+        "{LOG_PREFIX} extract_zip {} -> {}",
+        zip_path.display(),
+        dest_dir.display()
+    );
+    let file = std::fs::File::open(zip_path).map_err(|e| format!("{LOG_PREFIX} open zip: {e}"))?;
+    let mut archive =
+        zip::ZipArchive::new(file).map_err(|e| format!("{LOG_PREFIX} parse zip: {e}"))?;
+    std::fs::create_dir_all(dest_dir).map_err(|e| format!("{LOG_PREFIX} mkdir dest: {e}"))?;
+    for i in 0..archive.len() {
+        let mut entry = archive
+            .by_index(i)
+            .map_err(|e| format!("{LOG_PREFIX} zip entry {i}: {e}"))?;
+        let Some(rel) = entry.enclosed_name() else {
+            continue;
+        };
+        let rel = rel.to_path_buf();
+        let out_path = dest_dir.join(&rel);
+        if entry.is_dir() {
+            std::fs::create_dir_all(&out_path)
+                .map_err(|e| format!("{LOG_PREFIX} mkdir {}: {e}", out_path.display()))?;
+        } else {
+            if let Some(parent) = out_path.parent() {
+                std::fs::create_dir_all(parent)
+                    .map_err(|e| format!("{LOG_PREFIX} mkdir {}: {e}", parent.display()))?;
+            }
+            let mut out = std::fs::File::create(&out_path)
+                .map_err(|e| format!("{LOG_PREFIX} create {}: {e}", out_path.display()))?;
+            std::io::copy(&mut entry, &mut out)
+                .map_err(|e| format!("{LOG_PREFIX} copy {}: {e}", out_path.display()))?;
+        }
+    }
+    Ok(())
+}
+
+fn extract_tar_gz(archive: &std::path::Path, dest_dir: &std::path::Path) -> Result<(), String> {
+    log::debug!(
+        "{LOG_PREFIX} extract_tar_gz {} -> {}",
+        archive.display(),
+        dest_dir.display()
+    );
+    std::fs::create_dir_all(dest_dir).map_err(|e| format!("{LOG_PREFIX} mkdir dest: {e}"))?;
+    let file =
+        std::fs::File::open(archive).map_err(|e| format!("{LOG_PREFIX} open tar.gz: {e}"))?;
+    // The Piper tarball is gzipped. The `flate2` crate is already a
+    // transitive dep through `tar`; if it's not directly available we
+    // would need to add it here. As of this writing the workspace uses
+    // gzip-aware tar via the `flate2` dep that ships with `zip`'s
+    // companion utilities — but the standard pattern in this codebase
+    // is to shell out to `tar` so we don't grow the dep tree.
+    //
+    // To keep the installer self-contained without adding a new
+    // workspace dep, decompress in-memory then hand the plain tar to
+    // the `tar` crate. The Piper archive is only ~7 MB so a single
+    // in-memory inflate is acceptable.
+    let mut gz = std::io::BufReader::new(file);
+    let mut compressed = Vec::new();
+    gz.read_to_end(&mut compressed)
+        .map_err(|e| format!("{LOG_PREFIX} read tar.gz: {e}"))?;
+    let decompressed =
+        inflate_gzip(&compressed).map_err(|e| format!("{LOG_PREFIX} inflate tar.gz: {e}"))?;
+    let mut tar = tar::Archive::new(std::io::Cursor::new(decompressed));
+    tar.unpack(dest_dir)
+        .map_err(|e| format!("{LOG_PREFIX} unpack tar: {e}"))?;
+    Ok(())
+}
+
+/// Inflate a gzip stream using the `flate2` crate that ships with `zip`'s
+/// deflate feature. We re-export through the `zip` crate's surface to
+/// avoid a direct flate2 dep declaration.
+fn inflate_gzip(compressed: &[u8]) -> Result<Vec<u8>, String> {
+    // `flate2` is pulled in transitively by `zip` with the `deflate`
+    // feature. Use its public reader API directly.
+    use flate2::read::GzDecoder;
+    let mut decoder = GzDecoder::new(compressed);
+    let mut out = Vec::new();
+    decoder
+        .read_to_end(&mut out)
+        .map_err(|e| format!("gz decode: {e}"))?;
+    Ok(out)
+}
+
+/// Return the workspace-installed Piper binary path if one exists. Used
+/// by `paths::resolve_piper_binary` to prefer the workspace install over
+/// `PIPER_BIN` / PATH.
+pub(crate) fn find_workspace_piper_binary(config: &Config) -> Option<PathBuf> {
+    let candidates = paths::workspace_piper_binary_candidates(config);
+    for candidate in candidates {
+        if candidate.is_file() {
+            log::debug!(
+                "{LOG_PREFIX} found workspace piper binary at {}",
+                candidate.display()
+            );
+            return Some(candidate);
+        }
+    }
+    None
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::openhuman::local_ai::voice_install_common::reset_status;
+
+    fn temp_config() -> (tempfile::TempDir, Config) {
+        let dir = tempfile::tempdir().expect("tempdir");
+        let config = Config {
+            workspace_dir: dir.path().join("workspace"),
+            config_path: dir.path().join("config.toml"),
+            ..Config::default()
+        };
+        (dir, config)
+    }
+
+    #[test]
+    fn decode_voice_id_splits_correctly() {
+        assert_eq!(
+            decode_voice_id("en_US-lessac-medium"),
+            (
+                "en".to_string(),
+                "en_US".to_string(),
+                "lessac".to_string(),
+                "medium".to_string()
+            )
+        );
+        assert_eq!(
+            decode_voice_id("de_DE-thorsten-high"),
+            (
+                "de".to_string(),
+                "de_DE".to_string(),
+                "thorsten".to_string(),
+                "high".to_string()
+            )
+        );
+    }
+
+    #[test]
+    fn decode_voice_id_falls_back_for_garbage() {
+        // Single-piece input is malformed → bundled default decomposition.
+        let (lang, locale, name, quality) = decode_voice_id("garbage");
+        assert_eq!(lang, "en");
+        assert_eq!(locale, "en_US");
+        assert_eq!(name, "lessac");
+        assert_eq!(quality, "medium");
+
+        let (_lang, _locale, _name, _quality) = decode_voice_id("");
+        // Empty string also produces the bundled default — guarded above.
+    }
+
+    #[test]
+    fn voice_download_urls_anchor_on_hf_bucket() {
+        let (onnx, json) = voice_download_urls("en_US-lessac-medium");
+        assert!(onnx.starts_with("https://huggingface.co/rhasspy/piper-voices/resolve/main/"));
+        assert!(onnx.ends_with("en_US-lessac-medium.onnx"));
+        assert!(json.ends_with("en_US-lessac-medium.onnx.json"));
+    }
+
+    #[test]
+    fn binary_download_asset_picks_an_os_specific_url() {
+        let asset = binary_download_asset();
+        // On supported platforms we expect an asset; the test only runs
+        // on the host so this is informative.
+        if cfg!(any(
+            target_os = "windows",
+            target_os = "macos",
+            target_os = "linux"
+        )) {
+            let asset = asset.expect("supported platform should return an asset");
+            assert!(asset.url.contains("piper"));
+            assert!(asset
+                .url
+                .starts_with("https://github.com/rhasspy/piper/releases"));
+            if cfg!(windows) {
+                assert_eq!(asset.kind, ArchiveKind::Zip);
+            } else {
+                assert_eq!(asset.kind, ArchiveKind::TarGz);
+            }
+        } else {
+            assert!(asset.is_none());
+        }
+    }
+
+    /// Serialise tests that write into the shared `~/.openhuman/bin/piper/`
+    /// directory; reuses the module-wide `local_ai_test_guard` so paths +
+    /// install_whisper tests are serialised through the same lock.
+    fn shared_install_lock() -> std::sync::MutexGuard<'static, ()> {
+        crate::openhuman::local_ai::local_ai_test_guard()
+    }
+
+    fn wipe_shared_install_dir(config: &Config) {
+        let dir = paths::workspace_piper_dir(config);
+        let _ = std::fs::remove_dir_all(&dir);
+    }
+
+    #[test]
+    fn status_reports_missing_for_fresh_workspace() {
+        let _g = shared_install_lock();
+        reset_status(ENGINE_PIPER);
+        let (_tmp, config) = temp_config();
+        wipe_shared_install_dir(&config);
+        let snapshot = status(&config);
+        assert_eq!(snapshot.state, VoiceInstallState::Missing);
+    }
+
+    /// Build a `.onnx.json` payload big enough to pass the size floor.
+    /// Real Piper sidecars are a few KB; the floor exists to reject 404
+    /// HTML pages, so as long as we write past 256 bytes we mirror the
+    /// production validator's accept set.
+    fn synthetic_voice_json() -> Vec<u8> {
+        let mut body = br#"{"audio":{"sample_rate":22050},"phoneme_id_map":{},"#.to_vec();
+        // Pad to comfortably exceed the size floor without altering shape.
+        body.extend_from_slice(br#""filler":""#);
+        body.extend(std::iter::repeat_n(b'x', 512));
+        body.extend_from_slice(br#""}"#);
+        body
+    }
+
+    #[test]
+    fn status_promotes_to_installed_when_voice_and_binary_present() {
+        let _g = shared_install_lock();
+        reset_status(ENGINE_PIPER);
+        let (_tmp, config) = temp_config();
+        wipe_shared_install_dir(&config);
+        // Voice files.
+        let (onnx, json) =
+            paths::workspace_piper_voice_paths(&config, DEFAULT_PIPER_VOICE).expect("voice paths");
+        std::fs::create_dir_all(onnx.parent().unwrap()).unwrap();
+        std::fs::write(&onnx, vec![0u8; (MIN_VOICE_BYTES + 1024) as usize]).unwrap();
+        std::fs::write(&json, synthetic_voice_json()).unwrap();
+        // Binary.
+        let bin_candidate = paths::workspace_piper_binary_candidates(&config)[0].clone();
+        std::fs::create_dir_all(bin_candidate.parent().unwrap()).unwrap();
+        std::fs::write(&bin_candidate, b"stub").unwrap();
+
+        let snapshot = status(&config);
+        assert_eq!(snapshot.state, VoiceInstallState::Installed);
+        wipe_shared_install_dir(&config);
+    }
+
+    // Same rationale as install_whisper.rs: holding the sync mutex over
+    // the install await is safe because the install path doesn't acquire
+    // any other locks, and the guard's job is to keep filesystem writes
+    // from racing with sibling tests.
+    #[allow(clippy::await_holding_lock)]
+    #[tokio::test]
+    async fn install_short_circuits_when_already_installed() {
+        let _g = shared_install_lock();
+        reset_status(ENGINE_PIPER);
+        let (_tmp, config) = temp_config();
+        wipe_shared_install_dir(&config);
+        let (onnx, json) =
+            paths::workspace_piper_voice_paths(&config, DEFAULT_PIPER_VOICE).expect("voice paths");
+        std::fs::create_dir_all(onnx.parent().unwrap()).unwrap();
+        std::fs::write(&onnx, vec![0u8; (MIN_VOICE_BYTES + 1024) as usize]).unwrap();
+        std::fs::write(&json, synthetic_voice_json()).unwrap();
+        let bin_candidate = paths::workspace_piper_binary_candidates(&config)[0].clone();
+        std::fs::create_dir_all(bin_candidate.parent().unwrap()).unwrap();
+        std::fs::write(&bin_candidate, b"stub").unwrap();
+
+        let result = install_piper(&config, None, false).await;
+        assert!(result.is_ok(), "short-circuit must succeed: {result:?}");
+        let snap = result.unwrap();
+        assert_eq!(snap.state, VoiceInstallState::Installed);
+        wipe_shared_install_dir(&config);
+    }
+
+    #[test]
+    fn find_workspace_piper_binary_returns_path_when_present() {
+        let _g = shared_install_lock();
+        let (_tmp, config) = temp_config();
+        wipe_shared_install_dir(&config);
+        let target = paths::workspace_piper_binary_candidates(&config)[0].clone();
+        std::fs::create_dir_all(target.parent().unwrap()).unwrap();
+        std::fs::write(&target, b"stub").unwrap();
+        let found = find_workspace_piper_binary(&config).expect("should find binary");
+        assert_eq!(found, target);
+        wipe_shared_install_dir(&config);
+    }
+
+    #[test]
+    fn find_workspace_piper_binary_returns_none_without_install() {
+        let _g = shared_install_lock();
+        let (_tmp, config) = temp_config();
+        wipe_shared_install_dir(&config);
+        assert!(find_workspace_piper_binary(&config).is_none());
+    }
+}
diff --git a/src/openhuman/local_ai/install_whisper.rs b/src/openhuman/local_ai/install_whisper.rs
new file mode 100644
index 0000000000..378fd16d33
--- /dev/null
+++ b/src/openhuman/local_ai/install_whisper.rs
@@ -0,0 +1,497 @@
+//! Whisper installer — downloads the GGML model file (and best-effort
+//! `whisper-cli` binary when an upstream release asset exists for the
+//! target OS) into the workspace.
+//!
+//! ## Scope notes
+//!
+//! The whisper.cpp project doesn't ship pre-built binaries with a
+//! perfectly consistent naming scheme across OSes — Linux distros are
+//! typically built from source, macOS arrives via Homebrew (`brew install
+//! whisper-cpp`), and only Windows has a stable `.zip` asset on the
+//! GitHub release page (`whisper-bin-x64.zip`). Our strategy:
+//!
+//! 1. **Always** download the GGML model file. This is the heavy artifact
+//!    (1.6 GB for `large-v3-turbo`) and the one the local STT factory
+//!    cannot run without.
+//! 2. **Windows**: download the `whisper-bin-x64.zip` Windows release
+//!    asset, unzip into the workspace, and surface the binary path.
+//! 3. **macOS / Linux**: skip the binary fetch and leave a clear
+//!    diagnostic note telling the user to install `whisper-cli` via
+//!    their package manager. The model file is still ready for use the
+//!    moment a binary lands on PATH.
+//!
+//! Per-engine progress is reported via the shared
+//! [`crate::openhuman::local_ai::voice_install_common`] status table so
+//! the renderer can poll one RPC for state across both Whisper and Piper.
+
+use std::path::PathBuf;
+
+use crate::openhuman::config::Config;
+
+use super::paths;
+use super::voice_install_common::{
+    download_to_file, read_status, write_status, VoiceInstallState, VoiceInstallStatus,
+    ENGINE_WHISPER,
+};
+
+const LOG_PREFIX: &str = "[voice-install:whisper]";
+
+/// Default model size when the caller omits one. Matches
+/// [`crate::openhuman::voice::factory::DEFAULT_WHISPER_MODEL`].
+pub const DEFAULT_WHISPER_MODEL_SIZE: &str = "medium";
+
+/// Minimum bytes for the smallest model (tiny is ~39 MB on disk; allow
+/// some slack for HF mirror compression differences). Anything below this
+/// is almost certainly an HTML error page from the CDN — refuse to
+/// finalize the file.
+const MIN_MODEL_BYTES: u64 = 30 * 1024 * 1024;
+
+/// Minimum bytes for the Windows whisper-cli release zip. The smallest
+/// historical build is ~5 MB; anything tinier is an error page.
+const MIN_BINARY_ZIP_BYTES: u64 = 1024 * 1024;
+
+/// Resolve the human-readable size token (`tiny`, `base`, `small`,
+/// `medium`, `large-v3-turbo`) into the GGML filename used by
+/// whisper.cpp's HuggingFace bucket.
+fn ggml_filename(size: &str) -> String {
+    // The bucket convention is `ggml-<size>.bin`. Variants exist for
+    // quantization (e.g. `ggml-base-q5_1.bin`) but the installer takes
+    // the canonical fp16 form for predictability.
+    //
+    // Tolerate any of these caller-side conventions so a stale config
+    // value (e.g. `ggml-base-q5_1.bin` from the legacy on-demand assets
+    // path) doesn't double-prefix into `ggml-ggml-base-q5_1.bin.bin`:
+    //   - short token: `tiny`, `large-v3-turbo`
+    //   - factory id:  `whisper-large-v3-turbo`
+    //   - full ggml:   `ggml-base-q5_1.bin`
+    let trimmed = size.trim();
+    if trimmed.is_empty() {
+        return format!("ggml-{DEFAULT_WHISPER_MODEL_SIZE}.bin");
+    }
+    let mut s = trimmed;
+    s = s.strip_prefix("whisper-").unwrap_or(s);
+    s = s.strip_prefix("ggml-").unwrap_or(s);
+    s = s.strip_suffix(".bin").unwrap_or(s);
+    format!("ggml-{s}.bin")
+}
+
+/// Canonical HuggingFace download URL for `ggml-<size>.bin`. Anchored on
+/// `ggerganov/whisper.cpp` (the upstream-maintained bucket) so the URL
+/// stays stable across whisper.cpp version bumps.
+pub fn model_download_url(size: &str) -> String {
+    let filename = ggml_filename(size);
+    format!("https://huggingface.co/ggerganov/whisper.cpp/resolve/main/{filename}")
+}
+
+/// Best-effort URL for the Windows whisper-cli release archive. Returns
+/// `None` on non-Windows hosts where we skip the binary fetch.
+pub fn binary_download_url() -> Option<String> {
+    if cfg!(windows) {
+        // The Windows asset name has been stable across the recent
+        // whisper.cpp releases (`whisper-bin-x64.zip`). The `latest`
+        // alias on GitHub Releases follows the most recent tag.
+        Some(
+            "https://github.com/ggerganov/whisper.cpp/releases/latest/download/whisper-bin-x64.zip"
+                .to_string(),
+        )
+    } else {
+        None
+    }
+}
+
+/// Convenience: read the current installer status snapshot.
+pub fn status(config: &Config) -> VoiceInstallStatus {
+    let mut snapshot = read_status(ENGINE_WHISPER);
+    // If nothing has been recorded yet, derive a state from the on-disk
+    // artifacts so the UI doesn't show a perpetual "missing" after a
+    // successful install across a process restart.
+    if matches!(snapshot.state, VoiceInstallState::Missing) {
+        let configured = crate::openhuman::local_ai::model_ids::effective_stt_model_id(config);
+        if installed_artifacts_ok(config, &configured) {
+            snapshot.state = VoiceInstallState::Installed;
+            snapshot.stage = Some(format!("{configured} present"));
+        }
+    }
+    snapshot
+}
+
+/// Returns `true` when the workspace whisper install dir contains a
+/// usable model file (model size > minimum threshold). The binary is
+/// optional — the user may have whisper-cli on PATH.
+fn installed_artifacts_ok(config: &Config, size: &str) -> bool {
+    // Check the SPECIFIC requested size, not the default. Without this,
+    // a user with `medium` installed who switches the dropdown to `small`
+    // would short-circuit with "already installed" and never download
+    // the new size.
+    let model_path = paths::workspace_whisper_model_path(config, size);
+    let model_ok = std::fs::metadata(&model_path)
+        .map(|m| m.is_file() && m.len() >= MIN_MODEL_BYTES)
+        .unwrap_or(false);
+    log::debug!(
+        "{LOG_PREFIX} install check size={size} model={} model_ok={}",
+        model_path.display(),
+        model_ok
+    );
+    model_ok
+}
+
+/// Kick off (or re-kick) a Whisper install. `force_reinstall = true`
+/// removes any existing model file first; otherwise an already-installed
+/// engine returns immediately with a no-op success.
+pub async fn install_whisper(
+    config: &Config,
+    model_size: Option<String>,
+    force_reinstall: bool,
+) -> Result<VoiceInstallStatus, String> {
+    let size = model_size
+        .as_deref()
+        .map(str::trim)
+        .filter(|s| !s.is_empty())
+        .unwrap_or(DEFAULT_WHISPER_MODEL_SIZE)
+        .to_string();
+    log::debug!("{LOG_PREFIX} install requested size={size} force_reinstall={force_reinstall}");
+
+    if !force_reinstall && installed_artifacts_ok(config, &size) {
+        log::debug!("{LOG_PREFIX} short-circuit: artifacts already present");
+        let snapshot = VoiceInstallStatus {
+            engine: ENGINE_WHISPER.to_string(),
+            state: VoiceInstallState::Installed,
+            progress: Some(100),
+            downloaded_bytes: None,
+            total_bytes: None,
+            stage: Some("already installed".to_string()),
+            error_detail: None,
+        };
+        write_status(snapshot.clone());
+        return Ok(snapshot);
+    }
+
+    write_status(VoiceInstallStatus {
+        engine: ENGINE_WHISPER.to_string(),
+        state: VoiceInstallState::Installing,
+        progress: Some(0),
+        downloaded_bytes: Some(0),
+        total_bytes: None,
+        stage: Some(format!("starting whisper install ({size})")),
+        error_detail: None,
+    });
+
+    let result = run_install(config, &size).await;
+    match &result {
+        Ok(()) => {
+            let snapshot = VoiceInstallStatus {
+                engine: ENGINE_WHISPER.to_string(),
+                state: VoiceInstallState::Installed,
+                progress: Some(100),
+                downloaded_bytes: None,
+                total_bytes: None,
+                stage: Some("install complete".to_string()),
+                error_detail: None,
+            };
+            write_status(snapshot.clone());
+            Ok(snapshot)
+        }
+        Err(msg) => {
+            let snapshot = VoiceInstallStatus {
+                engine: ENGINE_WHISPER.to_string(),
+                state: VoiceInstallState::Error,
+                progress: None,
+                downloaded_bytes: None,
+                total_bytes: None,
+                stage: None,
+                error_detail: Some(msg.clone()),
+            };
+            write_status(snapshot.clone());
+            Err(msg.clone())
+        }
+    }
+}
+
+async fn run_install(config: &Config, size: &str) -> Result<(), String> {
+    // 1) Download the GGML model file (the must-have artifact).
+    let model_path = paths::workspace_whisper_model_path(config, size);
+    let model_url = model_download_url(size);
+    log::debug!("{LOG_PREFIX} downloading model url={model_url}");
+    update_stage(format!("downloading {}", ggml_filename(size)));
+    download_to_file(
+        &model_url,
+        &model_path,
+        None,
+        MIN_MODEL_BYTES,
+        LOG_PREFIX,
+        |downloaded, total| {
+            let progress = total
+                .filter(|t| *t > 0)
+                .map(|t| ((downloaded * 100) / t).min(100) as u8);
+            write_status(VoiceInstallStatus {
+                engine: ENGINE_WHISPER.to_string(),
+                state: VoiceInstallState::Installing,
+                progress,
+                downloaded_bytes: Some(downloaded),
+                total_bytes: total,
+                stage: Some("downloading model".to_string()),
+                error_detail: None,
+            });
+        },
+    )
+    .await?;
+    log::debug!("{LOG_PREFIX} model staged at {}", model_path.display());
+
+    // 2) Windows only: fetch the whisper-cli binary archive.
+    if let Some(url) = binary_download_url() {
+        let zip_path = paths::workspace_whisper_dir(config).join("whisper-bin-x64.zip");
+        log::debug!("{LOG_PREFIX} downloading binary url={url}");
+        update_stage("downloading whisper-cli binary".to_string());
+        download_to_file(
+            &url,
+            &zip_path,
+            None,
+            MIN_BINARY_ZIP_BYTES,
+            LOG_PREFIX,
+            |downloaded, total| {
+                let progress = total
+                    .filter(|t| *t > 0)
+                    .map(|t| ((downloaded * 100) / t).min(100) as u8);
+                write_status(VoiceInstallStatus {
+                    engine: ENGINE_WHISPER.to_string(),
+                    state: VoiceInstallState::Installing,
+                    progress,
+                    downloaded_bytes: Some(downloaded),
+                    total_bytes: total,
+                    stage: Some("downloading binary".to_string()),
+                    error_detail: None,
+                });
+            },
+        )
+        .await?;
+        update_stage("extracting whisper-cli binary".to_string());
+        extract_zip(&zip_path, &paths::workspace_whisper_dir(config))?;
+        // Best-effort cleanup of the staged archive.
+        let _ = std::fs::remove_file(&zip_path);
+    }
+
+    Ok(())
+}
+
+fn update_stage(stage: String) {
+    let mut current = read_status(ENGINE_WHISPER);
+    current.stage = Some(stage);
+    write_status(current);
+}
+
+/// Extract a zip file synchronously. Whisper's Windows binary archive is
+/// small (a few megabytes) so blocking is fine here — we're not on the
+/// hot async path.
+fn extract_zip(zip_path: &std::path::Path, dest_dir: &std::path::Path) -> Result<(), String> {
+    log::debug!(
+        "{LOG_PREFIX} extract_zip {} -> {}",
+        zip_path.display(),
+        dest_dir.display()
+    );
+    let file = std::fs::File::open(zip_path).map_err(|e| format!("{LOG_PREFIX} open zip: {e}"))?;
+    let mut archive =
+        zip::ZipArchive::new(file).map_err(|e| format!("{LOG_PREFIX} parse zip: {e}"))?;
+    std::fs::create_dir_all(dest_dir).map_err(|e| format!("{LOG_PREFIX} mkdir dest: {e}"))?;
+    for i in 0..archive.len() {
+        let mut entry = archive
+            .by_index(i)
+            .map_err(|e| format!("{LOG_PREFIX} zip entry {i}: {e}"))?;
+        let Some(rel) = entry.enclosed_name() else {
+            // Skip suspicious entries (zip-slip protection).
+            continue;
+        };
+        let rel = rel.to_path_buf();
+        let out_path = dest_dir.join(&rel);
+        if entry.is_dir() {
+            std::fs::create_dir_all(&out_path)
+                .map_err(|e| format!("{LOG_PREFIX} mkdir {}: {e}", out_path.display()))?;
+        } else {
+            if let Some(parent) = out_path.parent() {
+                std::fs::create_dir_all(parent)
+                    .map_err(|e| format!("{LOG_PREFIX} mkdir {}: {e}", parent.display()))?;
+            }
+            let mut out = std::fs::File::create(&out_path)
+                .map_err(|e| format!("{LOG_PREFIX} create {}: {e}", out_path.display()))?;
+            std::io::copy(&mut entry, &mut out)
+                .map_err(|e| format!("{LOG_PREFIX} copy {}: {e}", out_path.display()))?;
+        }
+    }
+    Ok(())
+}
+
+/// Return the workspace-installed whisper-cli binary path if one exists.
+/// Used by `paths::resolve_whisper_binary` to prefer the workspace
+/// install over `WHISPER_BIN` / PATH.
+pub(crate) fn find_workspace_whisper_binary(config: &Config) -> Option<PathBuf> {
+    let candidates = paths::workspace_whisper_binary_candidates(config);
+    for candidate in candidates {
+        if candidate.is_file() {
+            log::debug!(
+                "{LOG_PREFIX} found workspace whisper binary at {}",
+                candidate.display()
+            );
+            return Some(candidate);
+        }
+    }
+    None
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::openhuman::local_ai::voice_install_common::reset_status;
+
+    fn temp_config() -> (tempfile::TempDir, Config) {
+        let dir = tempfile::tempdir().expect("tempdir");
+        let config = Config {
+            workspace_dir: dir.path().join("workspace"),
+            config_path: dir.path().join("config.toml"),
+            ..Config::default()
+        };
+        (dir, config)
+    }
+
+    #[test]
+    fn ggml_filename_strips_whisper_prefix() {
+        assert_eq!(
+            ggml_filename("whisper-large-v3-turbo"),
+            "ggml-large-v3-turbo.bin"
+        );
+        assert_eq!(ggml_filename("large-v3-turbo"), "ggml-large-v3-turbo.bin");
+        assert_eq!(ggml_filename("tiny"), "ggml-tiny.bin");
+        assert_eq!(ggml_filename("  base  "), "ggml-base.bin");
+        // Tolerate full ggml filename (regression: stale legacy config like
+        // `ggml-base-q5_1.bin` used to produce `ggml-ggml-base-q5_1.bin.bin`).
+        assert_eq!(ggml_filename("ggml-base-q5_1.bin"), "ggml-base-q5_1.bin");
+        assert_eq!(ggml_filename("ggml-tiny.bin"), "ggml-tiny.bin");
+    }
+
+    #[test]
+    fn ggml_filename_empty_falls_back_to_default() {
+        assert_eq!(
+            ggml_filename(""),
+            format!("ggml-{DEFAULT_WHISPER_MODEL_SIZE}.bin")
+        );
+        assert_eq!(
+            ggml_filename("   "),
+            format!("ggml-{DEFAULT_WHISPER_MODEL_SIZE}.bin")
+        );
+    }
+
+    #[test]
+    fn model_download_url_anchors_on_hf_bucket() {
+        let url = model_download_url("tiny");
+        assert!(
+            url.starts_with("https://huggingface.co/ggerganov/whisper.cpp/resolve/main/"),
+            "url should anchor on the canonical HF bucket: {url}"
+        );
+        assert!(url.ends_with("ggml-tiny.bin"));
+    }
+
+    #[test]
+    fn binary_download_url_only_for_windows() {
+        if cfg!(windows) {
+            let url = binary_download_url().expect("windows must offer a binary url");
+            assert!(url.contains("whisper-bin-x64.zip"));
+            assert!(url.contains("github.com/ggerganov/whisper.cpp"));
+        } else {
+            assert!(
+                binary_download_url().is_none(),
+                "non-Windows hosts should not advertise a binary download URL"
+            );
+        }
+    }
+
+    /// Serialise tests that write into the shared `~/.openhuman/bin/whisper/`
+    /// directory. `shared_root_dir` ignores `config.workspace_dir` and goes
+    /// straight to the user home dir, so two tests can collide if they run
+    /// in parallel. Reuses the module-wide `local_ai_test_guard` so paths
+    /// + install_piper tests are serialised through the same lock.
+    fn shared_install_lock() -> std::sync::MutexGuard<'static, ()> {
+        crate::openhuman::local_ai::local_ai_test_guard()
+    }
+
+    /// Wipe the shared-root install dir for whisper so the absence
+    /// assertions below are deterministic across parallel test runs.
+    fn wipe_shared_install_dir(config: &Config) {
+        let dir = paths::workspace_whisper_dir(config);
+        let _ = std::fs::remove_dir_all(&dir);
+    }
+
+    #[test]
+    fn status_reports_missing_for_fresh_workspace() {
+        let _g = shared_install_lock();
+        reset_status(ENGINE_WHISPER);
+        let (_tmp, config) = temp_config();
+        wipe_shared_install_dir(&config);
+        let snapshot = status(&config);
+        assert_eq!(snapshot.state, VoiceInstallState::Missing);
+    }
+
+    #[test]
+    fn status_promotes_to_installed_when_model_present() {
+        let _g = shared_install_lock();
+        reset_status(ENGINE_WHISPER);
+        let (_tmp, mut config) = temp_config();
+        // The status helper derives installed state from effective_stt_model_id,
+        // so config must agree with the file we create. Pin it to the default
+        // model size so the on-disk lookup matches.
+        config.local_ai.stt_model_id = DEFAULT_WHISPER_MODEL_SIZE.to_string();
+        wipe_shared_install_dir(&config);
+        let path = paths::workspace_whisper_model_path(&config, DEFAULT_WHISPER_MODEL_SIZE);
+        std::fs::create_dir_all(path.parent().unwrap()).unwrap();
+        // Write a file just over the size floor so the validator accepts it.
+        let buf = vec![0u8; (MIN_MODEL_BYTES + 1024) as usize];
+        std::fs::write(&path, &buf).unwrap();
+        let snapshot = status(&config);
+        assert_eq!(snapshot.state, VoiceInstallState::Installed);
+        wipe_shared_install_dir(&config);
+    }
+
+    // We deliberately hold the sync mutex across the install await — the
+    // install path doesn't acquire any other locks so there is no risk of
+    // deadlock, and the guard's only job is to serialise filesystem
+    // writes against parallel tests. Same pattern used elsewhere in
+    // local_ai test modules.
+    #[allow(clippy::await_holding_lock)]
+    #[tokio::test]
+    async fn install_short_circuits_when_already_installed() {
+        let _g = shared_install_lock();
+        reset_status(ENGINE_WHISPER);
+        let (_tmp, config) = temp_config();
+        wipe_shared_install_dir(&config);
+        let path = paths::workspace_whisper_model_path(&config, DEFAULT_WHISPER_MODEL_SIZE);
+        std::fs::create_dir_all(path.parent().unwrap()).unwrap();
+        let buf = vec![0u8; (MIN_MODEL_BYTES + 1024) as usize];
+        std::fs::write(&path, &buf).unwrap();
+
+        let result = install_whisper(&config, None, false).await;
+        assert!(result.is_ok(), "short-circuit must succeed: {result:?}");
+        let snap = result.unwrap();
+        assert_eq!(snap.state, VoiceInstallState::Installed);
+        assert_eq!(snap.stage.as_deref(), Some("already installed"));
+        wipe_shared_install_dir(&config);
+    }
+
+    #[test]
+    fn find_workspace_whisper_binary_returns_none_without_install() {
+        let _g = shared_install_lock();
+        let (_tmp, config) = temp_config();
+        wipe_shared_install_dir(&config);
+        assert!(find_workspace_whisper_binary(&config).is_none());
+    }
+
+    #[test]
+    fn find_workspace_whisper_binary_returns_path_when_present() {
+        let _g = shared_install_lock();
+        let (_tmp, config) = temp_config();
+        wipe_shared_install_dir(&config);
+        let candidates = paths::workspace_whisper_binary_candidates(&config);
+        let target = candidates.first().expect("at least one candidate").clone();
+        std::fs::create_dir_all(target.parent().unwrap()).unwrap();
+        std::fs::write(&target, b"stub").unwrap();
+        let found = find_workspace_whisper_binary(&config).expect("should find binary");
+        assert_eq!(found, target);
+        wipe_shared_install_dir(&config);
+    }
+}
diff --git a/src/openhuman/local_ai/mod.rs b/src/openhuman/local_ai/mod.rs
index a7b5f57ab1..629aeaff53 100644
--- a/src/openhuman/local_ai/mod.rs
+++ b/src/openhuman/local_ai/mod.rs
@@ -20,6 +20,8 @@ mod schemas;
 pub mod sentiment;
 
 mod install;
+pub(crate) mod install_piper;
+pub(crate) mod install_whisper;
 pub(crate) mod model_ids;
 mod ollama_api;
 mod process_util;
@@ -28,6 +30,7 @@ mod parse;
 pub(crate) mod paths;
 mod service;
 mod types;
+pub(crate) mod voice_install_common;
 
 pub use core::*;
 pub use device::DeviceProfile;
diff --git a/src/openhuman/local_ai/paths.rs b/src/openhuman/local_ai/paths.rs
index 50639a10d7..2ce7a50576 100644
--- a/src/openhuman/local_ai/paths.rs
+++ b/src/openhuman/local_ai/paths.rs
@@ -15,10 +15,21 @@ pub(crate) fn config_root_dir(config: &Config) -> PathBuf {
         .unwrap_or_else(|| config.workspace_dir.clone())
 }
 
-/// Returns the shared root openhuman directory (`~/.openhuman/`), which is
-/// used for resources that should NOT be duplicated per user (model downloads,
-/// binaries, etc.).
+/// Returns the root directory under which local-AI artifacts (binaries,
+/// model files) are written and resolved.
+///
+/// Default callers see the shared `~/.openhuman/` root, which avoids
+/// duplicating multi-GB model files across users on a single machine.
+///
+/// When `OPENHUMAN_WORKSPACE` is **explicitly** set (test/dev parallel
+/// sessions, multi-workspace deployments, isolated CI runs), the
+/// shared-root contract no longer applies — those callers want full
+/// isolation, including their own copy of any installed binaries. Honor
+/// the override by returning the workspace dir directly.
 fn shared_root_dir(config: &Config) -> PathBuf {
+    if std::env::var_os("OPENHUMAN_WORKSPACE").is_some() {
+        return config_root_dir(config);
+    }
     crate::openhuman::config::default_root_openhuman_dir()
         .unwrap_or_else(|_| config_root_dir(config))
 }
@@ -84,6 +95,34 @@ pub(crate) fn ollama_spawn_marker_path(config: &Config) -> PathBuf {
 }
 
 pub(crate) fn resolve_whisper_binary() -> Option<PathBuf> {
+    // Precedence: workspace install > env override > PATH lookup. The
+    // workspace install path is the canonical drop-zone for the binary
+    // populated by `install_whisper::install_whisper`; checking it first
+    // means a user who just clicked Install in the VoicePanel doesn't
+    // have to also export WHISPER_BIN. Falls back to the env+PATH form
+    // for advanced users who pin a custom binary.
+    if let Ok(shared) = crate::openhuman::config::default_root_openhuman_dir() {
+        let root = shared.join("bin").join("whisper");
+        let bin_name = if cfg!(windows) {
+            "whisper-cli.exe"
+        } else {
+            "whisper-cli"
+        };
+        for candidate in [
+            root.join(bin_name),
+            root.join("whisper-bin-x64").join(bin_name),
+            root.join("bin").join(bin_name),
+        ] {
+            if candidate.is_file() {
+                log::debug!(
+                    "[voice-install:whisper] resolved workspace binary {}",
+                    candidate.display()
+                );
+                return Some(candidate);
+            }
+        }
+    }
+
     if let Some(from_env) = std::env::var("WHISPER_BIN")
         .ok()
         .filter(|v| !v.trim().is_empty())
@@ -106,7 +145,42 @@ pub(crate) fn resolve_whisper_binary() -> Option<PathBuf> {
     })
 }
 
+/// Config-aware whisper resolution. Preference order:
+///   1. Workspace-installed binary (placed by `install_whisper`)
+///   2. `WHISPER_BIN` env override
+///   3. `whisper-cli` on PATH
+///
+/// Falling back to the env-only resolver lets callers that don't have a
+/// `Config` reference (e.g. the bare-process voice STT subprocess code)
+/// stay compiling without rewiring.
+pub(crate) fn resolve_whisper_binary_with_config(config: &Config) -> Option<PathBuf> {
+    if let Some(workspace) = super::install_whisper::find_workspace_whisper_binary(config) {
+        return Some(workspace);
+    }
+    resolve_whisper_binary()
+}
+
 pub(crate) fn resolve_piper_binary() -> Option<PathBuf> {
+    // Precedence: workspace install > env override > PATH lookup. See
+    // the `resolve_whisper_binary` comment above for the rationale.
+    if let Ok(shared) = crate::openhuman::config::default_root_openhuman_dir() {
+        let root = shared.join("bin").join("piper");
+        let bin_name = if cfg!(windows) { "piper.exe" } else { "piper" };
+        for candidate in [
+            root.join(bin_name),
+            root.join("piper").join(bin_name),
+            root.join("bin").join(bin_name),
+        ] {
+            if candidate.is_file() {
+                log::debug!(
+                    "[voice-install:piper] resolved workspace binary {}",
+                    candidate.display()
+                );
+                return Some(candidate);
+            }
+        }
+    }
+
     if let Some(from_env) = std::env::var("PIPER_BIN")
         .ok()
         .filter(|v| !v.trim().is_empty())
@@ -125,22 +199,157 @@ pub(crate) fn resolve_piper_binary() -> Option<PathBuf> {
     })
 }
 
+/// Config-aware piper resolution. Same precedence shape as
+/// `resolve_whisper_binary_with_config` — workspace install first, env
+/// second, PATH third.
+pub(crate) fn resolve_piper_binary_with_config(config: &Config) -> Option<PathBuf> {
+    if let Some(workspace) = super::install_piper::find_workspace_piper_binary(config) {
+        return Some(workspace);
+    }
+    resolve_piper_binary()
+}
+
+// ---------------------------------------------------------------------------
+// Workspace install paths — used by install_whisper / install_piper.
+// ---------------------------------------------------------------------------
+
+/// Workspace dir for Whisper artifacts. Lives next to the Ollama dir so
+/// users with a single shared root see all local-AI binaries together.
+pub(crate) fn workspace_whisper_dir(config: &Config) -> PathBuf {
+    shared_root_dir(config).join("bin").join("whisper")
+}
+
+/// On-disk path for the GGML model file. `size` is the short
+/// designator (`tiny`, `base`, `small`, `medium`, `large-v3-turbo`).
+///
+/// Tolerates any of these caller-side conventions so a stale config
+/// value (e.g. legacy `ggml-base-q5_1.bin`) doesn't produce the broken
+/// `ggml-ggml-base-q5_1.bin.bin` filename and break the
+/// "is whisper installed?" resolver:
+///   - short token: `tiny`, `large-v3-turbo`
+///   - factory id:  `whisper-large-v3-turbo`
+///   - full ggml:   `ggml-base-q5_1.bin`
+pub(crate) fn workspace_whisper_model_path(config: &Config, size: &str) -> PathBuf {
+    let trimmed = size.trim();
+    if trimmed.is_empty() {
+        return workspace_whisper_dir(config).join("ggml-medium.bin");
+    }
+    let mut s = trimmed;
+    s = s.strip_prefix("whisper-").unwrap_or(s);
+    s = s.strip_prefix("ggml-").unwrap_or(s);
+    s = s.strip_suffix(".bin").unwrap_or(s);
+    workspace_whisper_dir(config).join(format!("ggml-{s}.bin"))
+}
+
+/// All candidate paths where the workspace-installed whisper-cli binary
+/// might land after extraction. The Windows archive nests the binary
+/// inside a `whisper-bin-x64/` directory; check both the flat and
+/// nested layouts so future archive shape changes don't silently break
+/// resolution.
+pub(crate) fn workspace_whisper_binary_candidates(config: &Config) -> Vec<PathBuf> {
+    let root = workspace_whisper_dir(config);
+    let bin_name = if cfg!(windows) {
+        "whisper-cli.exe"
+    } else {
+        "whisper-cli"
+    };
+    // Layouts observed in upstream releases:
+    //   - Windows zip extracts to `Release/` (cmake build artifact dir)
+    //   - Older archives flattened to root or used `whisper-bin-x64/`
+    //   - Some package managers drop the binary in `bin/`
+    // Probe every known layout so future archive shape changes don't
+    // silently break resolution.
+    vec![
+        root.join(bin_name),
+        root.join("Release").join(bin_name),
+        root.join("whisper-bin-x64").join(bin_name),
+        root.join("whisper-bin-x64").join("Release").join(bin_name),
+        root.join("bin").join(bin_name),
+    ]
+}
+
+/// Workspace dir for Piper artifacts.
+pub(crate) fn workspace_piper_dir(config: &Config) -> PathBuf {
+    shared_root_dir(config).join("bin").join("piper")
+}
+
+/// On-disk paths for a Piper voice — returns the `.onnx` and
+/// `.onnx.json` sidecar in that order. Returns `None` if the voice id
+/// is empty (no fallback — the caller must validate up front).
+pub(crate) fn workspace_piper_voice_paths(
+    config: &Config,
+    voice_id: &str,
+) -> Option<(PathBuf, PathBuf)> {
+    let trimmed = voice_id.trim();
+    if trimmed.is_empty() {
+        return None;
+    }
+    let base = workspace_piper_dir(config).join("voices").join(trimmed);
+    Some((
+        base.with_extension("onnx"),
+        base.with_extension("onnx.json"),
+    ))
+}
+
+/// All candidate paths where the workspace-installed Piper binary might
+/// land. Windows zips drop `piper.exe` in a `piper/` subdir; tar.gz
+/// archives on Linux/macOS sometimes flatten to the install root.
+pub(crate) fn workspace_piper_binary_candidates(config: &Config) -> Vec<PathBuf> {
+    let root = workspace_piper_dir(config);
+    let bin_name = if cfg!(windows) { "piper.exe" } else { "piper" };
+    vec![
+        root.join(bin_name),
+        root.join("piper").join(bin_name),
+        root.join("bin").join(bin_name),
+    ]
+}
+
 pub(crate) fn resolve_stt_model_path(config: &Config) -> Result<String, String> {
     let id = model_ids::effective_stt_model_id(config);
-    let path = PathBuf::from(&id);
+    resolve_stt_model_path_by_id(&id, config)
+}
+
+/// Resolve the on-disk GGML model path for an explicit `model_id`.
+///
+/// Used when the caller has already computed the effective model id (e.g.
+/// from a per-request override) and needs the path without re-reading the
+/// config default. Probes the same candidate set as `resolve_stt_model_path`.
+pub(crate) fn resolve_stt_model_path_by_id(id: &str, config: &Config) -> Result<String, String> {
+    let path = PathBuf::from(id);
     if path.is_file() {
         return Ok(path.display().to_string());
     }
-    let candidate = workspace_local_models_dir(config).join("stt").join(&id);
-    if candidate.is_file() {
-        Ok(candidate.display().to_string())
+    // The voice installer places the GGML model file under
+    // `workspace_whisper_dir(config)/ggml-<size>.bin`, but the legacy
+    // local-AI flow stages STT models under `workspace_local_models_dir`.
+    // Probe both so a user who installed via the new Install button
+    // doesn't need to redo anything.
+    let legacy = workspace_local_models_dir(config).join("stt").join(id);
+    if legacy.is_file() {
+        return Ok(legacy.display().to_string());
+    }
+    let installer = workspace_whisper_dir(config).join(id);
+    if installer.is_file() {
+        return Ok(installer.display().to_string());
+    }
+    // Also probe the ggml-prefixed form for short ids like `tiny`.
+    let bare = id.trim().strip_prefix("whisper-").unwrap_or(id.trim());
+    let normalized = if bare.starts_with("ggml-") {
+        bare.to_string()
     } else {
-        Err(format!(
-            "STT model not found. Expected '{}' or '{}'",
-            path.display(),
-            candidate.display()
-        ))
+        format!("ggml-{bare}.bin")
+    };
+    let normalized_path = workspace_whisper_dir(config).join(&normalized);
+    if normalized_path.is_file() {
+        return Ok(normalized_path.display().to_string());
     }
+    Err(format!(
+        "STT model not found. Expected one of '{}', '{}', '{}', '{}'",
+        path.display(),
+        legacy.display(),
+        installer.display(),
+        normalized_path.display()
+    ))
 }
 
 pub(crate) fn resolve_tts_voice_path(config: &Config) -> Result<String, String> {
@@ -150,22 +359,40 @@ pub(crate) fn resolve_tts_voice_path(config: &Config) -> Result<String, String>
         return Ok(path.display().to_string());
     }
     let filename = if voice_id.ends_with(".onnx") {
-        voice_id
+        voice_id.clone()
     } else {
         format!("{voice_id}.onnx")
     };
-    let candidate = workspace_local_models_dir(config)
+    // Installer drop-zone — `install_piper` writes
+    // `bin/piper/voices/<id>.onnx`. Probed FIRST because legacy paths
+    // may contain stale stubs from earlier workspaces (a 4-byte legacy
+    // stub used to win over a 63 MB installer copy and crash Piper with
+    // STATUS_STACK_BUFFER_OVERRUN).
+    let installer_onnx_path =
+        workspace_piper_voice_paths(config, voice_id.trim_end_matches(".onnx"))
+            .map(|(onnx, _)| onnx);
+    if let Some(p) = &installer_onnx_path {
+        if p.is_file() {
+            return Ok(p.display().to_string());
+        }
+    }
+    // Legacy path used by the original voice pipeline. Still checked so
+    // pre-installer setups keep working.
+    let legacy = workspace_local_models_dir(config)
         .join("tts")
-        .join(filename);
-    if candidate.is_file() {
-        Ok(candidate.display().to_string())
-    } else {
-        Err(format!(
-            "TTS voice model not found. Expected '{}' or '{}'",
-            path.display(),
-            candidate.display()
-        ))
+        .join(&filename);
+    if legacy.is_file() {
+        return Ok(legacy.display().to_string());
     }
+    let installer_display = installer_onnx_path
+        .as_ref()
+        .map(|p| p.display().to_string())
+        .unwrap_or_else(|| "(no installer path resolvable)".to_string());
+    Err(format!(
+        "TTS voice model not found. Expected '{}' (installer) or '{}' (legacy)",
+        installer_display,
+        legacy.display()
+    ))
 }
 
 pub(crate) fn stt_model_target_path(config: &Config) -> PathBuf {
@@ -222,8 +449,21 @@ mod tests {
 
     #[test]
     fn resolve_tts_voice_path_appends_onnx_for_voice_ids() {
+        // The installer drop-zone (`bin/piper/voices/<id>.onnx`) is probed
+        // FIRST by `resolve_tts_voice_path`, and lives under the shared
+        // root (`~/.openhuman/`) — not the temp config. If a sibling
+        // install_piper test runs in parallel with the default voice id
+        // and leaves a stub there, this test sees that file and the
+        // assertion fails. Serialise via the shared install guard and
+        // wipe the installer path so the legacy `models/local-ai/tts/`
+        // candidate is the only match.
+        let _g = shared_install_lock();
         let (_tmp, mut config) = temp_config();
         config.local_ai.tts_voice_id = "en_US-lessac-medium".to_string();
+        let installer_onnx = workspace_piper_voice_paths(&config, "en_US-lessac-medium")
+            .map(|(onnx, _)| onnx)
+            .expect("installer onnx path");
+        let _ = std::fs::remove_file(&installer_onnx);
         let model_path = workspace_local_models_dir(&config)
             .join("tts")
             .join("en_US-lessac-medium.onnx");
@@ -287,4 +527,137 @@ mod tests {
         let found = find_workspace_ollama_binary(&config).expect("find workspace binary");
         assert_eq!(found, legacy);
     }
+
+    #[test]
+    fn workspace_whisper_model_path_uses_ggml_naming() {
+        let (_tmp, config) = temp_config();
+        let path = workspace_whisper_model_path(&config, "large-v3-turbo");
+        assert!(
+            path.to_string_lossy().ends_with("ggml-large-v3-turbo.bin"),
+            "expected ggml-<size>.bin suffix: {}",
+            path.display()
+        );
+        // Stripping the `whisper-` prefix keeps the filename uniform with
+        // bare-size callers.
+        let alt = workspace_whisper_model_path(&config, "whisper-tiny");
+        assert!(alt.to_string_lossy().ends_with("ggml-tiny.bin"));
+        // Regression: stale legacy config (`ggml-base-q5_1.bin`) used to
+        // produce the broken path `ggml-ggml-base-q5_1.bin.bin`.
+        let legacy = workspace_whisper_model_path(&config, "ggml-base-q5_1.bin");
+        assert!(
+            legacy.to_string_lossy().ends_with("ggml-base-q5_1.bin"),
+            "stale legacy id must collapse to canonical ggml-<size>.bin: {}",
+            legacy.display()
+        );
+        let legacy_short = workspace_whisper_model_path(&config, "ggml-tiny.bin");
+        assert!(legacy_short.to_string_lossy().ends_with("ggml-tiny.bin"));
+        // Empty size falls back to the default model size (medium).
+        let default = workspace_whisper_model_path(&config, "");
+        assert!(
+            default.to_string_lossy().ends_with("ggml-medium.bin"),
+            "empty size should fall back to ggml-medium.bin: {}",
+            default.display()
+        );
+    }
+
+    #[test]
+    fn workspace_whisper_binary_candidates_cover_known_archive_layouts() {
+        let (_tmp, config) = temp_config();
+        let candidates = workspace_whisper_binary_candidates(&config);
+        let suffix = if cfg!(windows) {
+            "whisper-cli.exe"
+        } else {
+            "whisper-cli"
+        };
+        assert!(
+            candidates.iter().any(|p| p.ends_with(suffix)),
+            "flat-layout candidate must contain whisper-cli"
+        );
+        assert!(
+            candidates
+                .iter()
+                .any(|p| p.to_string_lossy().contains("whisper-bin-x64")),
+            "legacy Windows-zip nested layout must be a candidate"
+        );
+        // Regression: upstream Windows zip extracts to `Release/`. Without
+        // this candidate, the resolver reports "binary not found" even
+        // though the install succeeded.
+        assert!(
+            candidates
+                .iter()
+                .any(|p| p.to_string_lossy().contains("Release")),
+            "Release/ cmake-build layout must be a candidate"
+        );
+    }
+
+    #[test]
+    fn workspace_piper_voice_paths_returns_onnx_pair() {
+        let (_tmp, config) = temp_config();
+        let (onnx, json) =
+            workspace_piper_voice_paths(&config, "en_US-lessac-medium").expect("voice paths");
+        assert!(onnx.to_string_lossy().ends_with("en_US-lessac-medium.onnx"));
+        assert!(json
+            .to_string_lossy()
+            .ends_with("en_US-lessac-medium.onnx.json"));
+        // Empty voice id is rejected so the caller can fail fast.
+        assert!(workspace_piper_voice_paths(&config, "").is_none());
+        assert!(workspace_piper_voice_paths(&config, "   ").is_none());
+    }
+
+    #[test]
+    fn workspace_piper_binary_candidates_include_flat_layout() {
+        let (_tmp, config) = temp_config();
+        let candidates = workspace_piper_binary_candidates(&config);
+        let suffix = if cfg!(windows) { "piper.exe" } else { "piper" };
+        assert!(
+            candidates.iter().any(|p| p.ends_with(suffix)),
+            "flat-layout piper binary must be a candidate"
+        );
+    }
+
+    /// Serialise with sibling install_whisper / install_piper tests that
+    /// write into the same shared `~/.openhuman/bin/...` directory. Uses
+    /// the existing module-wide guard so all readers/writers go through
+    /// one critical section.
+    fn shared_install_lock() -> std::sync::MutexGuard<'static, ()> {
+        crate::openhuman::local_ai::local_ai_test_guard()
+    }
+
+    #[test]
+    fn resolve_whisper_binary_with_config_prefers_workspace_install() {
+        // The workspace candidate takes precedence over PATH lookup. We
+        // can't trivially clear PATH on every host, but writing a stub
+        // into the workspace dir is enough to verify the function
+        // returns the workspace path first.
+        let _g = shared_install_lock();
+        let (_tmp, config) = temp_config();
+        let target = workspace_whisper_binary_candidates(&config)
+            .into_iter()
+            .next()
+            .expect("at least one candidate");
+        // Wipe + recreate so a leftover stub from a parallel test cannot
+        // race the mkdir/write pair below.
+        let _ = std::fs::remove_dir_all(workspace_whisper_dir(&config));
+        std::fs::create_dir_all(target.parent().expect("parent")).expect("mkdir");
+        std::fs::write(&target, b"stub").expect("write stub");
+        let resolved = resolve_whisper_binary_with_config(&config).expect("workspace resolve");
+        assert_eq!(resolved, target);
+        let _ = std::fs::remove_dir_all(workspace_whisper_dir(&config));
+    }
+
+    #[test]
+    fn resolve_piper_binary_with_config_prefers_workspace_install() {
+        let _g = shared_install_lock();
+        let (_tmp, config) = temp_config();
+        let target = workspace_piper_binary_candidates(&config)
+            .into_iter()
+            .next()
+            .expect("at least one candidate");
+        let _ = std::fs::remove_dir_all(workspace_piper_dir(&config));
+        std::fs::create_dir_all(target.parent().expect("parent")).expect("mkdir");
+        std::fs::write(&target, b"stub").expect("write stub");
+        let resolved = resolve_piper_binary_with_config(&config).expect("workspace resolve");
+        assert_eq!(resolved, target);
+        let _ = std::fs::remove_dir_all(workspace_piper_dir(&config));
+    }
 }
diff --git a/src/openhuman/local_ai/schemas.rs b/src/openhuman/local_ai/schemas.rs
index 19db1e65ba..1ca677a637 100644
--- a/src/openhuman/local_ai/schemas.rs
+++ b/src/openhuman/local_ai/schemas.rs
@@ -111,6 +111,28 @@ struct LocalAiTenorSearchParams {
     limit: Option<u32>,
 }
 
+#[derive(Debug, Deserialize)]
+struct LocalAiInstallWhisperParams {
+    /// Optional model size (`tiny`, `base`, `small`, `medium`,
+    /// `large-v3-turbo`). Defaults to `large-v3-turbo`.
+    #[serde(default)]
+    model_size: Option<String>,
+    /// When true, blow away any existing model file and re-download.
+    #[serde(default)]
+    force: Option<bool>,
+}
+
+#[derive(Debug, Deserialize)]
+struct LocalAiInstallPiperParams {
+    /// Optional Piper voice id (e.g. `en_US-lessac-medium`). Defaults to
+    /// the bundled US-English Lessac voice.
+    #[serde(default)]
+    voice_id: Option<String>,
+    /// When true, blow away any existing voice file and re-download.
+    #[serde(default)]
+    force: Option<bool>,
+}
+
 pub fn all_controller_schemas() -> Vec<ControllerSchema> {
     vec![
         schemas("agent_chat"),
@@ -138,6 +160,10 @@ pub fn all_controller_schemas() -> Vec<ControllerSchema> {
         schemas("local_ai_analyze_sentiment"),
         schemas("local_ai_should_send_gif"),
         schemas("local_ai_tenor_search"),
+        schemas("local_ai_install_whisper"),
+        schemas("local_ai_install_piper"),
+        schemas("local_ai_whisper_install_status"),
+        schemas("local_ai_piper_install_status"),
     ]
 }
 
@@ -243,6 +269,22 @@ pub fn all_registered_controllers() -> Vec<RegisteredController> {
             schema: schemas("local_ai_tenor_search"),
             handler: handle_local_ai_tenor_search,
         },
+        RegisteredController {
+            schema: schemas("local_ai_install_whisper"),
+            handler: handle_local_ai_install_whisper,
+        },
+        RegisteredController {
+            schema: schemas("local_ai_install_piper"),
+            handler: handle_local_ai_install_piper,
+        },
+        RegisteredController {
+            schema: schemas("local_ai_whisper_install_status"),
+            handler: handle_local_ai_whisper_install_status,
+        },
+        RegisteredController {
+            schema: schemas("local_ai_piper_install_status"),
+            handler: handle_local_ai_piper_install_status,
+        },
     ]
 }
 
@@ -492,6 +534,52 @@ pub fn schemas(function: &str) -> ControllerSchema {
             ],
             outputs: vec![json_output("result", "Tenor search result: {results, next}.")],
         },
+        "local_ai_install_whisper" => ControllerSchema {
+            namespace: "local_ai",
+            function: "install_whisper",
+            description: "Download whisper.cpp's GGML model (and on Windows the whisper-cli binary) into the workspace so the local STT factory has everything it needs to run.",
+            inputs: vec![
+                optional_string(
+                    "model_size",
+                    "Whisper model size (tiny, base, small, medium, large-v3-turbo). Defaults to large-v3-turbo.",
+                ),
+                optional_bool(
+                    "force",
+                    "When true, re-download even if the workspace already has a matching model.",
+                ),
+            ],
+            outputs: vec![json_output("status", "Whisper install status payload.")],
+        },
+        "local_ai_install_piper" => ControllerSchema {
+            namespace: "local_ai",
+            function: "install_piper",
+            description: "Download the Piper binary archive and the bundled en_US-lessac-medium voice files into the workspace.",
+            inputs: vec![
+                optional_string(
+                    "voice_id",
+                    "Piper voice id (e.g. en_US-lessac-medium). Defaults to en_US-lessac-medium.",
+                ),
+                optional_bool(
+                    "force",
+                    "When true, re-download even if the workspace already has the voice files.",
+                ),
+            ],
+            outputs: vec![json_output("status", "Piper install status payload.")],
+        },
+        "local_ai_whisper_install_status" => ControllerSchema {
+            namespace: "local_ai",
+            function: "whisper_install_status",
+            description: "Query the Whisper install state (missing / installing / installed / broken / error) plus per-stage download progress.",
+            inputs: vec![],
+            outputs: vec![json_output("status", "Whisper install status payload.")],
+        },
+        "local_ai_piper_install_status" => ControllerSchema {
+            namespace: "local_ai",
+            function: "piper_install_status",
+            description: "Query the Piper install state (missing / installing / installed / broken / error) plus per-stage download progress.",
+            inputs: vec![],
+            outputs: vec![json_output("status", "Piper install status payload.")],
+        },
         _ => ControllerSchema {
             namespace: "local_ai",
             function: "unknown",
@@ -927,6 +1015,149 @@ fn handle_local_ai_chat(params: Map<String, Value>) -> ControllerFuture {
     })
 }
 
+// The install RPCs are intentionally fire-and-forget: a binary+model
+// download can take minutes (1.6 GB GGML model, ~5 MB Piper binary
+// archive) but the core JSON-RPC client times out at
+// VITE_CORE_RPC_TIMEOUT_MS (default 30s). Blocking the handler on the
+// full download would force the UI into a retry loop that deletes the
+// in-flight .part on each retry, looping forever.
+//
+// Shape: mark the engine as `installing(0%)` in the shared status table,
+// spawn the real install on a background tokio task, return the
+// just-written status immediately. The UI's status-polling RPC
+// (handle_local_ai_*_install_status) reads from the same table and
+// renders real-time progress. The eventual `installed` / `error`
+// transition lands on the table when the background task finishes;
+// no caller awaits it.
+
+fn handle_local_ai_install_whisper(params: Map<String, Value>) -> ControllerFuture {
+    Box::pin(async move {
+        let p = deserialize_params::<LocalAiInstallWhisperParams>(params)?;
+        let config = config_rpc::load_config_with_timeout().await?;
+        let force = p.force.unwrap_or(false);
+
+        // Idempotency: a duplicate click while an install is already in
+        // flight should be a no-op, not a second concurrent download.
+        let current = crate::openhuman::local_ai::voice_install_common::read_status(
+            crate::openhuman::local_ai::voice_install_common::ENGINE_WHISPER,
+        );
+        if current.state
+            == crate::openhuman::local_ai::voice_install_common::VoiceInstallState::Installing
+        {
+            tracing::debug!(
+                "[voice-install:whisper] already installing — returning current status"
+            );
+            return serde_json::to_value(current)
+                .map_err(|e| format!("serialize whisper status: {e}"));
+        }
+
+        // Mark "installing" before the spawn so the very next status poll
+        // (≤ 2s away) reflects the new state without a stale read.
+        crate::openhuman::local_ai::voice_install_common::write_status(
+            crate::openhuman::local_ai::voice_install_common::VoiceInstallStatus {
+                engine: crate::openhuman::local_ai::voice_install_common::ENGINE_WHISPER
+                    .to_string(),
+                state:
+                    crate::openhuman::local_ai::voice_install_common::VoiceInstallState::Installing,
+                progress: Some(0),
+                downloaded_bytes: None,
+                total_bytes: None,
+                stage: Some("queued".to_string()),
+                error_detail: None,
+            },
+        );
+
+        tracing::debug!(
+            model_size = ?p.model_size,
+            force,
+            "[voice-install:whisper] spawning background install"
+        );
+        let model_size = p.model_size.clone();
+        tokio::spawn(async move {
+            if let Err(e) = crate::openhuman::local_ai::install_whisper::install_whisper(
+                &config, model_size, force,
+            )
+            .await
+            {
+                log::warn!("[voice-install:whisper] background install failed: {e}");
+            }
+        });
+
+        let status = crate::openhuman::local_ai::voice_install_common::read_status(
+            crate::openhuman::local_ai::voice_install_common::ENGINE_WHISPER,
+        );
+        serde_json::to_value(status).map_err(|e| format!("serialize whisper status: {e}"))
+    })
+}
+
+fn handle_local_ai_install_piper(params: Map<String, Value>) -> ControllerFuture {
+    Box::pin(async move {
+        let p = deserialize_params::<LocalAiInstallPiperParams>(params)?;
+        let config = config_rpc::load_config_with_timeout().await?;
+        let force = p.force.unwrap_or(false);
+
+        let current = crate::openhuman::local_ai::voice_install_common::read_status(
+            crate::openhuman::local_ai::voice_install_common::ENGINE_PIPER,
+        );
+        if current.state
+            == crate::openhuman::local_ai::voice_install_common::VoiceInstallState::Installing
+        {
+            tracing::debug!("[voice-install:piper] already installing — returning current status");
+            return serde_json::to_value(current)
+                .map_err(|e| format!("serialize piper status: {e}"));
+        }
+
+        crate::openhuman::local_ai::voice_install_common::write_status(
+            crate::openhuman::local_ai::voice_install_common::VoiceInstallStatus {
+                engine: crate::openhuman::local_ai::voice_install_common::ENGINE_PIPER.to_string(),
+                state:
+                    crate::openhuman::local_ai::voice_install_common::VoiceInstallState::Installing,
+                progress: Some(0),
+                downloaded_bytes: None,
+                total_bytes: None,
+                stage: Some("queued".to_string()),
+                error_detail: None,
+            },
+        );
+
+        tracing::debug!(
+            voice_id = ?p.voice_id,
+            force,
+            "[voice-install:piper] spawning background install"
+        );
+        let voice_id = p.voice_id.clone();
+        tokio::spawn(async move {
+            if let Err(e) =
+                crate::openhuman::local_ai::install_piper::install_piper(&config, voice_id, force)
+                    .await
+            {
+                log::warn!("[voice-install:piper] background install failed: {e}");
+            }
+        });
+
+        let status = crate::openhuman::local_ai::voice_install_common::read_status(
+            crate::openhuman::local_ai::voice_install_common::ENGINE_PIPER,
+        );
+        serde_json::to_value(status).map_err(|e| format!("serialize piper status: {e}"))
+    })
+}
+
+fn handle_local_ai_whisper_install_status(_params: Map<String, Value>) -> ControllerFuture {
+    Box::pin(async move {
+        let config = config_rpc::load_config_with_timeout().await?;
+        let status = crate::openhuman::local_ai::install_whisper::status(&config);
+        serde_json::to_value(status).map_err(|e| format!("serialize whisper status: {e}"))
+    })
+}
+
+fn handle_local_ai_piper_install_status(_params: Map<String, Value>) -> ControllerFuture {
+    Box::pin(async move {
+        let config = config_rpc::load_config_with_timeout().await?;
+        let status = crate::openhuman::local_ai::install_piper::status(&config);
+        serde_json::to_value(status).map_err(|e| format!("serialize piper status: {e}"))
+    })
+}
+
 fn deserialize_params<T: DeserializeOwned>(params: Map<String, Value>) -> Result<T, String> {
     serde_json::from_value(Value::Object(params)).map_err(|e| format!("invalid params: {e}"))
 }
diff --git a/src/openhuman/local_ai/voice_install_common.rs b/src/openhuman/local_ai/voice_install_common.rs
new file mode 100644
index 0000000000..86dbb85884
--- /dev/null
+++ b/src/openhuman/local_ai/voice_install_common.rs
@@ -0,0 +1,485 @@
+//! Shared installer plumbing for the local voice stack (Whisper + Piper).
+//!
+//! Both installers need the same primitives:
+//!
+//! - Stream a URL to disk via `.part` suffix + atomic rename so a crash
+//!   never leaves a corrupt artifact that downstream code (the STT/TTS
+//!   factory) tries to load.
+//! - Validate either a known SHA256 (when upstream publishes one) or a
+//!   minimum size threshold so a truncated download doesn't masquerade as
+//!   a finished install.
+//! - Surface per-engine progress (downloading, extracting, idle, ready,
+//!   error) on a polled status RPC — matches the existing
+//!   `local_ai_downloads_progress` UX so the VoicePanel can reuse the
+//!   same progress UI primitives without inventing a new event-bus channel.
+//!
+//! The Ollama installer fires-and-forgets a single PowerShell / sh block
+//! and lets the OS owner that process. For Whisper and Piper we need
+//! finer-grained progress reporting (the GGML model file alone is up to
+//! 1.6 GB and users absolutely will need a percentage indicator) so the
+//! shared harness here streams the body chunks itself and updates a
+//! singleton state map keyed by engine id.
+
+use std::collections::HashMap;
+use std::path::{Path, PathBuf};
+use std::sync::Mutex;
+use std::time::{Duration, Instant};
+
+use futures_util::StreamExt;
+use serde::{Deserialize, Serialize};
+use sha2::{Digest, Sha256};
+
+/// Overall request timeout for a single install download. 30 minutes covers
+/// the 1.6 GB `ggml-large-v3-turbo` model on a 1 Mbps link with headroom;
+/// anything slower probably isn't realistically going to finish anyway.
+const REQUEST_TIMEOUT: Duration = Duration::from_secs(1800);
+
+/// Per-chunk idle timeout. If the body stream produces no bytes for this
+/// long, treat the connection as dead and abort so the caller can retry
+/// from a clean state. Without this guard, a half-open TCP connection (the
+/// failure mode behind the "progress stuck at 18%" symptom) holds the
+/// install task forever, defeating the polled-status UX.
+const CHUNK_IDLE_TIMEOUT: Duration = Duration::from_secs(45);
+use tokio::io::AsyncWriteExt;
+
+/// Stable engine id for status tracking. The two installers register their
+/// progress under these keys; the status RPC reads them back.
+pub const ENGINE_WHISPER: &str = "whisper";
+pub const ENGINE_PIPER: &str = "piper";
+
+/// Lifecycle state for a voice-engine install. Mirrors the state machine
+/// the Ollama installer exposes via `LocalAiStatus.state`.
+#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
+#[serde(rename_all = "snake_case")]
+pub enum VoiceInstallState {
+    /// Nothing has happened — no binaries, no models. The default state
+    /// when the workspace has never been touched.
+    Missing,
+    /// An install is in flight. `progress` and `downloaded_bytes` will be
+    /// updated as chunks land.
+    Installing,
+    /// All required artifacts (binary + at least one default model) are
+    /// present and pass validation.
+    Installed,
+    /// The expected install dir contains artifacts but they fail
+    /// validation (e.g. size below threshold, hash mismatch, missing
+    /// `.onnx.json` sidecar). The user should re-run install.
+    Broken,
+    /// The last install attempt errored. `error_detail` carries the
+    /// human-readable reason.
+    Error,
+}
+
+impl VoiceInstallState {
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            VoiceInstallState::Missing => "missing",
+            VoiceInstallState::Installing => "installing",
+            VoiceInstallState::Installed => "installed",
+            VoiceInstallState::Broken => "broken",
+            VoiceInstallState::Error => "error",
+        }
+    }
+}
+
+/// Snapshot returned over JSON-RPC for one engine's installer.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct VoiceInstallStatus {
+    /// Stable engine id (`"whisper"` / `"piper"`).
+    pub engine: String,
+    /// Current state — see [`VoiceInstallState`].
+    pub state: VoiceInstallState,
+    /// 0-100 percent for the in-flight download (`None` when state is not
+    /// `Installing`).
+    pub progress: Option<u8>,
+    /// Bytes received so far.
+    pub downloaded_bytes: Option<u64>,
+    /// Total bytes expected (from `Content-Length` — may be `None` for
+    /// chunked transfer encoding).
+    pub total_bytes: Option<u64>,
+    /// Free-text status line — what file we're downloading, what stage
+    /// we're at. Useful for the UI to show "Downloading whisper-cli…" vs
+    /// "Downloading ggml-large-v3-turbo.bin…".
+    pub stage: Option<String>,
+    /// Populated when `state == Error` — the user-facing failure reason.
+    pub error_detail: Option<String>,
+}
+
+impl VoiceInstallStatus {
+    fn missing(engine: &str) -> Self {
+        Self {
+            engine: engine.to_string(),
+            state: VoiceInstallState::Missing,
+            progress: None,
+            downloaded_bytes: None,
+            total_bytes: None,
+            stage: None,
+            error_detail: None,
+        }
+    }
+}
+
+/// In-memory status table — keyed by engine id. Both installers share
+/// this so the status RPC can answer for either engine without a separate
+/// store.
+static STATUS_TABLE: once_cell::sync::Lazy<Mutex<HashMap<String, VoiceInstallStatus>>> =
+    once_cell::sync::Lazy::new(|| Mutex::new(HashMap::new()));
+
+/// Fetch the current status snapshot for `engine`. Returns `Missing`
+/// when the engine has never been touched.
+pub fn read_status(engine: &str) -> VoiceInstallStatus {
+    STATUS_TABLE
+        .lock()
+        .expect("voice install status lock poisoned")
+        .get(engine)
+        .cloned()
+        .unwrap_or_else(|| VoiceInstallStatus::missing(engine))
+}
+
+/// Replace the snapshot for `engine`. Internal helper for the installer
+/// flow — exposed at module scope so install_whisper / install_piper can
+/// update progress without going through a public setter API.
+pub(crate) fn write_status(status: VoiceInstallStatus) {
+    log::debug!(
+        "[voice-install] status update engine={} state={} progress={:?} stage={:?}",
+        status.engine,
+        status.state.as_str(),
+        status.progress,
+        status.stage,
+    );
+    let mut table = STATUS_TABLE
+        .lock()
+        .expect("voice install status lock poisoned");
+    table.insert(status.engine.clone(), status);
+}
+
+/// Force a fresh missing state for `engine`. Used by tests and by the
+/// "Reinstall" path before kicking off a new download.
+#[cfg(test)]
+pub(crate) fn reset_status(engine: &str) {
+    let mut table = STATUS_TABLE
+        .lock()
+        .expect("voice install status lock poisoned");
+    table.remove(engine);
+}
+
+/// Download `url` to `dest` with atomic rename. Streams bytes through
+/// SHA256 if `expected_sha256` is provided, otherwise validates that the
+/// final size is at least `min_bytes`.
+///
+/// The on-disk write goes to `<dest>.part` first and is `rename`d into
+/// place only after all checks pass. If the function is interrupted
+/// mid-stream (process killed, network drop) the `.part` file is the
+/// only thing left behind; the next call detects and overwrites it so
+/// we never read a half-written model.
+///
+/// Progress callbacks fire every chunk with `(downloaded_bytes,
+/// total_bytes)`. Total may be `None` for chunked responses.
+pub async fn download_to_file(
+    url: &str,
+    dest: &Path,
+    expected_sha256: Option<&str>,
+    min_bytes: u64,
+    log_prefix: &str,
+    mut on_progress: impl FnMut(u64, Option<u64>),
+) -> Result<(), String> {
+    if let Some(parent) = dest.parent() {
+        tokio::fs::create_dir_all(parent)
+            .await
+            .map_err(|e| format!("{log_prefix} mkdir {}: {e}", parent.display()))?;
+    }
+
+    let part_path = part_path(dest);
+    // Always start from scratch — resumable HTTP Range support is
+    // useful but not free (servers must return 206, hash state has to
+    // restart on hash mismatch). For the MVP we restart cleanly and
+    // ensure `.part` is removed first so we never accidentally append
+    // to leftover bytes from an earlier failed attempt.
+    if part_path.exists() {
+        let _ = tokio::fs::remove_file(&part_path).await;
+    }
+
+    log::debug!("{log_prefix} GET {url} -> {}", part_path.display());
+    let client = reqwest::Client::builder()
+        // 15s connect handshake; 30min overall request budget (covers 1.6 GB
+        // GGML model on a 1 Mbps link). Per-chunk idle timeout is enforced
+        // separately on each stream read below so half-open connections
+        // fail fast instead of hanging the install task forever.
+        .connect_timeout(Duration::from_secs(15))
+        .timeout(REQUEST_TIMEOUT)
+        .build()
+        .map_err(|e| format!("{log_prefix} build http client: {e}"))?;
+    let started = Instant::now();
+    let resp = client
+        .get(url)
+        .send()
+        .await
+        .map_err(|e| format!("{log_prefix} request {url}: {e}"))?;
+    if !resp.status().is_success() {
+        return Err(format!(
+            "{log_prefix} non-2xx response from {url}: {}",
+            resp.status()
+        ));
+    }
+    let total = resp.content_length();
+    log::debug!(
+        "{log_prefix} response status={} content_length={:?}",
+        resp.status(),
+        total
+    );
+
+    let mut file = tokio::fs::File::create(&part_path)
+        .await
+        .map_err(|e| format!("{log_prefix} create {}: {e}", part_path.display()))?;
+    let mut hasher = expected_sha256.is_some().then(Sha256::new);
+    let mut downloaded: u64 = 0;
+    let mut stream = resp.bytes_stream();
+    loop {
+        // Per-chunk idle timeout — if no bytes arrive within CHUNK_IDLE_TIMEOUT,
+        // bail out so a stalled half-open TCP connection doesn't hold the install
+        // task forever. Clean up the .part on the way out so a retry starts fresh.
+        let next = tokio::time::timeout(CHUNK_IDLE_TIMEOUT, stream.next()).await;
+        let chunk = match next {
+            Ok(Some(chunk)) => chunk,
+            Ok(None) => break,
+            Err(_) => {
+                drop(file);
+                let _ = tokio::fs::remove_file(&part_path).await;
+                return Err(format!(
+                    "{log_prefix} body stream idle for >{}s after {downloaded} bytes; aborting",
+                    CHUNK_IDLE_TIMEOUT.as_secs()
+                ));
+            }
+        };
+        let bytes = match chunk {
+            Ok(bytes) => bytes,
+            Err(e) => {
+                drop(file);
+                let _ = tokio::fs::remove_file(&part_path).await;
+                return Err(format!("{log_prefix} body stream: {e}"));
+            }
+        };
+        if let Some(h) = hasher.as_mut() {
+            h.update(&bytes);
+        }
+        if let Err(e) = file.write_all(&bytes).await {
+            drop(file);
+            let _ = tokio::fs::remove_file(&part_path).await;
+            return Err(format!("{log_prefix} write {}: {e}", part_path.display()));
+        }
+        downloaded = downloaded.saturating_add(bytes.len() as u64);
+        on_progress(downloaded, total);
+    }
+    file.flush()
+        .await
+        .map_err(|e| format!("{log_prefix} flush {}: {e}", part_path.display()))?;
+    drop(file);
+
+    if downloaded < min_bytes {
+        let _ = tokio::fs::remove_file(&part_path).await;
+        return Err(format!(
+            "{log_prefix} downloaded payload too small: {downloaded} bytes < min {min_bytes}"
+        ));
+    }
+    if let (Some(expected), Some(hasher)) = (expected_sha256, hasher) {
+        let got = hex::encode(hasher.finalize());
+        let expected_norm = expected.trim().to_ascii_lowercase();
+        if got != expected_norm {
+            // Never log the full file contents on mismatch — just the hashes.
+            log::warn!(
+                "{log_prefix} sha256 mismatch expected={} got={}",
+                expected_norm,
+                got
+            );
+            let _ = tokio::fs::remove_file(&part_path).await;
+            return Err(format!(
+                "{log_prefix} sha256 mismatch (expected {expected_norm}, got {got})"
+            ));
+        }
+    }
+
+    // Atomic rename — only after all checks pass. On Windows
+    // `tokio::fs::rename` maps to `MoveFileExW` which fails if the dest
+    // already exists, so remove it first.
+    if dest.exists() {
+        tokio::fs::remove_file(dest)
+            .await
+            .map_err(|e| format!("{log_prefix} remove existing {}: {e}", dest.display()))?;
+    }
+    tokio::fs::rename(&part_path, dest).await.map_err(|e| {
+        format!(
+            "{log_prefix} rename {} -> {}: {e}",
+            part_path.display(),
+            dest.display()
+        )
+    })?;
+    log::debug!(
+        "{log_prefix} downloaded {} bytes -> {} elapsed_ms={}",
+        downloaded,
+        dest.display(),
+        started.elapsed().as_millis()
+    );
+    Ok(())
+}
+
+/// Produce the `.part` sibling of `dest`. Helper kept testable.
+pub fn part_path(dest: &Path) -> PathBuf {
+    let mut s = dest.as_os_str().to_os_string();
+    s.push(".part");
+    PathBuf::from(s)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn part_path_appends_part_suffix() {
+        let p = part_path(Path::new("/tmp/foo.bin"));
+        assert_eq!(
+            p.file_name().unwrap().to_string_lossy(),
+            "foo.bin.part",
+            "should append .part"
+        );
+    }
+
+    #[test]
+    fn part_path_handles_no_extension() {
+        let p = part_path(Path::new("/tmp/binaryname"));
+        assert_eq!(p.file_name().unwrap().to_string_lossy(), "binaryname.part");
+    }
+
+    #[test]
+    fn voice_install_state_as_str_is_stable() {
+        // The UI relies on the lowercase string form — guard against an
+        // accidental rename breaking the wire contract.
+        assert_eq!(VoiceInstallState::Missing.as_str(), "missing");
+        assert_eq!(VoiceInstallState::Installing.as_str(), "installing");
+        assert_eq!(VoiceInstallState::Installed.as_str(), "installed");
+        assert_eq!(VoiceInstallState::Broken.as_str(), "broken");
+        assert_eq!(VoiceInstallState::Error.as_str(), "error");
+    }
+
+    #[test]
+    fn read_status_defaults_to_missing_for_unseen_engine() {
+        let unique = format!("test-engine-{}", uuid::Uuid::new_v4());
+        let snapshot = read_status(&unique);
+        assert_eq!(snapshot.state, VoiceInstallState::Missing);
+        assert_eq!(snapshot.engine, unique);
+        assert!(snapshot.progress.is_none());
+    }
+
+    #[test]
+    fn write_and_read_status_roundtrip() {
+        let engine = format!("rt-{}", uuid::Uuid::new_v4());
+        let status = VoiceInstallStatus {
+            engine: engine.clone(),
+            state: VoiceInstallState::Installing,
+            progress: Some(42),
+            downloaded_bytes: Some(1024),
+            total_bytes: Some(2048),
+            stage: Some("downloading model".to_string()),
+            error_detail: None,
+        };
+        write_status(status);
+        let got = read_status(&engine);
+        assert_eq!(got.state, VoiceInstallState::Installing);
+        assert_eq!(got.progress, Some(42));
+        assert_eq!(got.stage.as_deref(), Some("downloading model"));
+        // Clean up so the suite stays deterministic for parallel runs.
+        reset_status(&engine);
+    }
+
+    #[test]
+    fn reset_status_returns_engine_to_missing() {
+        let engine = format!("rs-{}", uuid::Uuid::new_v4());
+        write_status(VoiceInstallStatus {
+            engine: engine.clone(),
+            state: VoiceInstallState::Installed,
+            progress: None,
+            downloaded_bytes: None,
+            total_bytes: None,
+            stage: None,
+            error_detail: None,
+        });
+        reset_status(&engine);
+        assert_eq!(read_status(&engine).state, VoiceInstallState::Missing);
+    }
+
+    #[tokio::test]
+    async fn download_to_file_rejects_oversize_min_bytes() {
+        // 4xx-like guard: a non-existent host fails before we can write
+        // anything. Use a localhost port that nothing is listening on so
+        // the test is hermetic.
+        let dir = tempfile::tempdir().unwrap();
+        let dest = dir.path().join("never.bin");
+        let result = download_to_file(
+            "http://127.0.0.1:1/never",
+            &dest,
+            None,
+            10,
+            "[voice-install:test]",
+            |_, _| {},
+        )
+        .await;
+        assert!(result.is_err(), "expected network error on unused port");
+        // No `.part` should be left behind on a connection failure.
+        let part = part_path(&dest);
+        assert!(
+            !part.exists(),
+            "no part file should remain after pre-stream failure"
+        );
+    }
+
+    #[tokio::test]
+    async fn download_to_file_streams_and_renames_atomically() {
+        // Spin up a one-shot in-process server with hyper via reqwest's
+        // test infrastructure isn't available here, so we stand up a tiny
+        // TCP listener that serves a fixed body. Keep the body small so
+        // the test stays fast.
+        use std::io::Write as _;
+        use std::net::TcpListener;
+        let listener = TcpListener::bind("127.0.0.1:0").unwrap();
+        let addr = listener.local_addr().unwrap();
+        let body = b"hello voice-install body";
+        let server = tokio::task::spawn_blocking(move || {
+            let (mut sock, _) = listener.accept().unwrap();
+            // Drain request bytes — we only need headers.
+            let mut buf = [0u8; 1024];
+            use std::io::Read as _;
+            let _ = sock.read(&mut buf);
+            let response = format!(
+                "HTTP/1.1 200 OK\r\nContent-Length: {}\r\nContent-Type: application/octet-stream\r\nConnection: close\r\n\r\n",
+                body.len()
+            );
+            sock.write_all(response.as_bytes()).unwrap();
+            sock.write_all(body).unwrap();
+            sock.flush().unwrap();
+        });
+
+        let dir = tempfile::tempdir().unwrap();
+        let dest = dir.path().join("hello.bin");
+        let url = format!("http://{addr}/hello");
+        let mut last_progress = (0u64, None);
+        let result = download_to_file(
+            &url,
+            &dest,
+            None,
+            5,
+            "[voice-install:test]",
+            |downloaded, total| {
+                last_progress = (downloaded, total);
+            },
+        )
+        .await;
+        server.await.unwrap();
+        assert!(result.is_ok(), "download failed: {result:?}");
+        let on_disk = tokio::fs::read(&dest).await.unwrap();
+        assert_eq!(on_disk.as_slice(), body, "wrong bytes landed on disk");
+        assert!(last_progress.0 > 0, "progress callback should fire");
+        assert!(
+            !part_path(&dest).exists(),
+            "part file should be renamed away"
+        );
+    }
+}
diff --git a/src/openhuman/voice/factory.rs b/src/openhuman/voice/factory.rs
new file mode 100644
index 0000000000..db24805185
--- /dev/null
+++ b/src/openhuman/voice/factory.rs
@@ -0,0 +1,552 @@
+//! Factory functions for creating voice (STT / TTS) providers.
+//!
+//! Mirrors the shape of [`crate::openhuman::embeddings::factory`]: a single
+//! entry point that takes a provider name + parameters and returns a boxed
+//! trait object. Production paths pick the provider based on the user's
+//! config (`stt_provider`, `tts_provider`); unit tests use the factory
+//! directly to verify dispatch branches.
+//!
+//! ## STT providers
+//!
+//! - `"cloud"` → backend Whisper proxy (POST `/openai/v1/audio/transcriptions`).
+//!   Same path the renamed `MicComposer` used to call directly. Keeps the API key
+//!   off the desktop, costs network round-trip latency.
+//! - `"whisper"` → local Whisper via the `WHISPER_BIN` env var (or in-process
+//!   `whisper-rs` engine when `local_ai.whisper_in_process` is on). Zero
+//!   network, but the user has to download the model. Default model:
+//!   `whisper-large-v3-turbo` (recommended) or smaller variants
+//!   (`tiny / base / small / medium`) for lower-end hardware.
+//!
+//! ## TTS providers
+//!
+//! - `"cloud"` → backend ElevenLabs proxy (POST `/openai/v1/audio/speech`)
+//!   which also returns Oculus-15 visemes for the mascot lip-sync.
+//! - `"piper"` → local Piper subprocess via `PIPER_BIN`. Lower latency than
+//!   ElevenLabs and runs offline; default voice `en_US-lessac-medium`.
+//!   **Note**: Kokoro (higher quality, 82M params) is intentionally out of
+//!   scope for this ship — `PIPER_BIN` is already reserved in `.env.example`
+//!   and Piper is the simpler integration. Kokoro is tracked as future work.
+//!
+//! ## Logging prefixes
+//!
+//! All factory branches log against `[voice-factory]`; the wrapped provider
+//! implementations log under `[voice-stt]` / `[voice-tts]` so end-to-end
+//! traces grep cleanly.
+
+use std::sync::Arc;
+
+use async_trait::async_trait;
+use log::debug;
+use serde::{Deserialize, Serialize};
+
+use super::cloud_transcribe::{transcribe_cloud, CloudTranscribeOptions, CloudTranscribeResult};
+use super::local_speech::{synthesize_piper, PiperOptions};
+use super::local_transcribe::{transcribe_whisper, WhisperTranscribeOptions};
+use super::reply_speech::{synthesize_reply, ReplySpeechOptions, ReplySpeechResult};
+use crate::openhuman::config::Config;
+use crate::rpc::RpcOutcome;
+
+const LOG_PREFIX: &str = "[voice-factory]";
+
+// ---------------------------------------------------------------------------
+// Provider traits
+// ---------------------------------------------------------------------------
+
+/// Common shape both STT branches return after dispatch. Keeps the wire
+/// contract identical regardless of provider — the UI only sees `text`.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct SttResult {
+    pub text: String,
+    /// Lowercase provider id (`"cloud"`, `"whisper"`) — exposed on the wire
+    /// so the renderer can show the user which path actually ran.
+    pub provider: String,
+}
+
+/// Speech-to-text provider abstraction. Cloud (backend proxy) and Whisper
+/// (local subprocess / in-process) both implement this; the factory hands
+/// the caller a boxed trait object.
+#[async_trait]
+pub trait SttProvider: Send + Sync {
+    /// Stable identifier used in logs and config (`"cloud"`, `"whisper"`).
+    fn name(&self) -> &'static str;
+
+    /// Transcribe a single base64-encoded audio blob.
+    ///
+    /// `mime_type` and `file_name` are hints; providers that don't care
+    /// may ignore them. `language` is BCP-47 (`"en"`, `"es"`); pass `None`
+    /// to let the provider auto-detect.
+    async fn transcribe(
+        &self,
+        config: &Config,
+        audio_base64: &str,
+        mime_type: Option<&str>,
+        file_name: Option<&str>,
+        language: Option<&str>,
+    ) -> Result<RpcOutcome<SttResult>, String>;
+}
+
+/// Text-to-speech provider abstraction. Cloud returns rich viseme alignment
+/// (used by the mascot lip-sync); Piper returns audio only and the caller
+/// derives a flat viseme timeline downstream.
+#[async_trait]
+pub trait TtsProvider: Send + Sync {
+    fn name(&self) -> &'static str;
+
+    /// Synthesize speech for `text`. Returns the same envelope shape as
+    /// `voice.reply_synthesize` so the renderer can swap providers without
+    /// branching on the response.
+    async fn synthesize(
+        &self,
+        config: &Config,
+        text: &str,
+        voice: Option<&str>,
+    ) -> Result<RpcOutcome<ReplySpeechResult>, String>;
+}
+
+// ---------------------------------------------------------------------------
+// Cloud STT
+// ---------------------------------------------------------------------------
+
+/// Cloud STT — wraps [`transcribe_cloud`]. Stateless; cheap to construct.
+pub struct CloudSttProvider {
+    model: String,
+}
+
+impl CloudSttProvider {
+    pub fn new(model: impl Into<String>) -> Self {
+        Self {
+            model: model.into(),
+        }
+    }
+}
+
+#[async_trait]
+impl SttProvider for CloudSttProvider {
+    fn name(&self) -> &'static str {
+        "cloud"
+    }
+
+    async fn transcribe(
+        &self,
+        config: &Config,
+        audio_base64: &str,
+        mime_type: Option<&str>,
+        file_name: Option<&str>,
+        language: Option<&str>,
+    ) -> Result<RpcOutcome<SttResult>, String> {
+        debug!(
+            "{LOG_PREFIX} cloud STT dispatch model={} bytes_b64={}",
+            self.model,
+            audio_base64.len()
+        );
+        let opts = CloudTranscribeOptions {
+            model: Some(self.model.clone()),
+            language: language.map(str::to_string),
+            mime_type: mime_type.map(str::to_string),
+            file_name: file_name.map(str::to_string),
+        };
+        let outcome = transcribe_cloud(config, audio_base64, &opts).await?;
+        let CloudTranscribeResult { text } = outcome.value;
+        Ok(RpcOutcome::single_log(
+            SttResult {
+                text,
+                provider: "cloud".to_string(),
+            },
+            "voice-factory: cloud STT completed",
+        ))
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Local Whisper STT
+// ---------------------------------------------------------------------------
+
+/// Local Whisper STT — wraps [`transcribe_whisper`]. Resolves `WHISPER_BIN`
+/// lazily on each call.
+pub struct WhisperSttProvider {
+    model: String,
+}
+
+impl WhisperSttProvider {
+    pub fn new(model: impl Into<String>) -> Self {
+        Self {
+            model: model.into(),
+        }
+    }
+}
+
+#[async_trait]
+impl SttProvider for WhisperSttProvider {
+    fn name(&self) -> &'static str {
+        "whisper"
+    }
+
+    async fn transcribe(
+        &self,
+        config: &Config,
+        audio_base64: &str,
+        mime_type: Option<&str>,
+        _file_name: Option<&str>,
+        language: Option<&str>,
+    ) -> Result<RpcOutcome<SttResult>, String> {
+        debug!(
+            "{LOG_PREFIX} whisper STT dispatch model={} mime={:?} lang={:?}",
+            self.model, mime_type, language
+        );
+        let opts = WhisperTranscribeOptions {
+            model: Some(self.model.clone()),
+            mime_type: mime_type.map(str::to_string),
+            language: language.map(str::to_string),
+        };
+        let outcome = transcribe_whisper(config, audio_base64, &opts).await?;
+        Ok(RpcOutcome::single_log(
+            SttResult {
+                text: outcome.value.text,
+                provider: "whisper".to_string(),
+            },
+            "voice-factory: whisper STT completed",
+        ))
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Cloud TTS
+// ---------------------------------------------------------------------------
+
+/// Cloud TTS — wraps [`synthesize_reply`] (backend ElevenLabs proxy).
+pub struct CloudTtsProvider {
+    voice: Option<String>,
+}
+
+impl CloudTtsProvider {
+    pub fn new(voice: Option<String>) -> Self {
+        Self { voice }
+    }
+}
+
+#[async_trait]
+impl TtsProvider for CloudTtsProvider {
+    fn name(&self) -> &'static str {
+        "cloud"
+    }
+
+    async fn synthesize(
+        &self,
+        config: &Config,
+        text: &str,
+        voice: Option<&str>,
+    ) -> Result<RpcOutcome<ReplySpeechResult>, String> {
+        let resolved_voice = voice
+            .map(str::to_string)
+            .or_else(|| self.voice.clone())
+            .filter(|s| !s.trim().is_empty());
+        debug!(
+            "{LOG_PREFIX} cloud TTS dispatch voice={} chars={}",
+            resolved_voice.as_deref().unwrap_or("<default>"),
+            text.len()
+        );
+        let opts = ReplySpeechOptions {
+            voice_id: resolved_voice,
+            model_id: None,
+            output_format: None,
+            voice_settings: None,
+        };
+        synthesize_reply(config, text, &opts).await
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Local Piper TTS
+// ---------------------------------------------------------------------------
+
+/// Local Piper TTS — wraps [`synthesize_piper`].
+pub struct PiperTtsProvider {
+    voice: String,
+}
+
+impl PiperTtsProvider {
+    pub fn new(voice: impl Into<String>) -> Self {
+        Self {
+            voice: voice.into(),
+        }
+    }
+}
+
+#[async_trait]
+impl TtsProvider for PiperTtsProvider {
+    fn name(&self) -> &'static str {
+        "piper"
+    }
+
+    async fn synthesize(
+        &self,
+        config: &Config,
+        text: &str,
+        voice: Option<&str>,
+    ) -> Result<RpcOutcome<ReplySpeechResult>, String> {
+        let resolved_voice = voice
+            .map(str::to_string)
+            .filter(|s| !s.trim().is_empty())
+            .unwrap_or_else(|| self.voice.clone());
+        debug!(
+            "{LOG_PREFIX} piper TTS dispatch voice={} chars={}",
+            resolved_voice,
+            text.len()
+        );
+        let opts = PiperOptions {
+            voice: Some(resolved_voice),
+        };
+        synthesize_piper(config, text, &opts).await
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Factory entry points (mirrors embeddings/factory.rs)
+// ---------------------------------------------------------------------------
+
+/// Creates a speech-to-text provider based on the specified name and model.
+///
+/// Supported provider names:
+/// - `"cloud"` → backend Whisper proxy — default, preferred for laptops
+///   without local models
+/// - `"whisper"` → local whisper.cpp via `WHISPER_BIN` (or in-process
+///   `whisper-rs` when configured)
+///
+/// Returns an error for unrecognised provider names so configuration
+/// mistakes surface immediately rather than silently degrading.
+///
+/// The factory does not eagerly resolve the binary — `WhisperSttProvider`
+/// looks up `WHISPER_BIN` lazily inside `transcribe()` so a misconfigured
+/// install fails at use-time with a clear error message instead of at
+/// startup.
+pub fn create_stt_provider(
+    provider: &str,
+    model: &str,
+    _config: &Config,
+) -> anyhow::Result<Box<dyn SttProvider>> {
+    debug!("{LOG_PREFIX} create_stt_provider provider={provider} model={model}");
+    let model = if model.trim().is_empty() {
+        DEFAULT_WHISPER_MODEL
+    } else {
+        model
+    };
+    match provider.trim() {
+        "cloud" => Ok(Box::new(CloudSttProvider::new(
+            super::cloud_transcribe_default_model(),
+        ))),
+        "whisper" => Ok(Box::new(WhisperSttProvider::new(model))),
+        unknown => Err(anyhow::anyhow!(
+            "unknown STT provider: \"{unknown}\". Supported: \"cloud\", \"whisper\""
+        )),
+    }
+}
+
+/// Creates a text-to-speech provider based on the specified name and voice.
+///
+/// Supported provider names:
+/// - `"cloud"` → backend ElevenLabs proxy with viseme alignment
+/// - `"piper"` → local Piper subprocess via `PIPER_BIN`
+///
+/// Kokoro is **not** implemented in this cut — the integration shipped with
+/// Piper because `PIPER_BIN` is already reserved in `.env.example` and the
+/// runtime contract (subprocess + `.onnx` model) is simpler. Adding Kokoro
+/// later is straightforward: add a new branch here and a `local_speech_kokoro`
+/// sibling module.
+pub fn create_tts_provider(
+    provider: &str,
+    voice: &str,
+    _config: &Config,
+) -> anyhow::Result<Box<dyn TtsProvider>> {
+    debug!("{LOG_PREFIX} create_tts_provider provider={provider} voice={voice}");
+    let voice = if voice.trim().is_empty() {
+        DEFAULT_PIPER_VOICE
+    } else {
+        voice
+    };
+    match provider.trim() {
+        "cloud" => Ok(Box::new(CloudTtsProvider::new(if voice.is_empty() {
+            None
+        } else {
+            Some(voice.to_string())
+        }))),
+        "piper" => Ok(Box::new(PiperTtsProvider::new(voice))),
+        unknown => Err(anyhow::anyhow!(
+            "unknown TTS provider: \"{unknown}\". Supported: \"cloud\", \"piper\""
+        )),
+    }
+}
+
+/// Default Whisper model. `whisper-large-v3-turbo` is the recommended ship
+/// default — best accuracy-to-latency tradeoff in the Whisper family (5×
+/// faster than `large-v3` with comparable WER on English). Users on lower-
+/// spec hardware can drop down to `medium` / `small` / `base` / `tiny` via
+/// the install presets.
+pub const DEFAULT_WHISPER_MODEL: &str = "whisper-large-v3-turbo";
+
+/// Default Piper voice — `en_US-lessac-medium`, matches
+/// [`super::super::local_ai::model_ids::effective_tts_voice_id`].
+pub const DEFAULT_PIPER_VOICE: &str = "en_US-lessac-medium";
+
+/// Whisper install presets (size tiers exposed to the installer UI).
+/// Mirrors the Ollama model installer surface: each entry is `(id, label)`.
+pub const WHISPER_MODEL_PRESETS: &[(&str, &str)] = &[
+    ("tiny", "Tiny (39 MB, fastest)"),
+    ("base", "Base (74 MB)"),
+    ("small", "Small (244 MB)"),
+    ("medium", "Medium (769 MB, recommended)"),
+    ("large-v3-turbo", "Large v3 Turbo (1.5 GB, best accuracy)"),
+];
+
+/// Returns a thread-safe default STT provider (cloud). Used by callers that
+/// can't easily plumb a `Config` reference but still need a sensible default.
+pub fn default_stt_provider() -> Arc<dyn SttProvider> {
+    Arc::new(CloudSttProvider::new(
+        super::cloud_transcribe_default_model(),
+    ))
+}
+
+/// Returns a thread-safe default TTS provider (cloud).
+pub fn default_tts_provider() -> Arc<dyn TtsProvider> {
+    Arc::new(CloudTtsProvider::new(None))
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn cfg() -> Config {
+        Config::default()
+    }
+
+    #[test]
+    fn stt_factory_cloud_branch() {
+        let p = create_stt_provider("cloud", "ignored", &cfg()).unwrap();
+        assert_eq!(p.name(), "cloud");
+    }
+
+    #[test]
+    fn stt_factory_whisper_branch() {
+        let p = create_stt_provider("whisper", "whisper-large-v3-turbo", &cfg()).unwrap();
+        assert_eq!(p.name(), "whisper");
+    }
+
+    #[test]
+    fn stt_factory_whisper_empty_model_uses_default() {
+        // Empty model → default whisper-large-v3-turbo; constructor must not
+        // reject an empty string with an opaque error.
+        let p = create_stt_provider("whisper", "", &cfg()).unwrap();
+        assert_eq!(p.name(), "whisper");
+    }
+
+    #[test]
+    fn stt_factory_unknown_provider_errors() {
+        let err = create_stt_provider("deepgram", "nova-2", &cfg())
+            .err()
+            .expect("deepgram is not implemented");
+        let msg = err.to_string();
+        assert!(msg.contains("deepgram"), "should name the provider: {msg}");
+        assert!(msg.contains("unknown"), "should say unknown: {msg}");
+    }
+
+    #[test]
+    fn stt_factory_empty_string_errors() {
+        let err = create_stt_provider("", "model", &cfg())
+            .err()
+            .expect("empty provider must error");
+        assert!(err.to_string().contains("unknown"));
+    }
+
+    #[test]
+    fn tts_factory_cloud_branch() {
+        let p = create_tts_provider("cloud", "Rachel", &cfg()).unwrap();
+        assert_eq!(p.name(), "cloud");
+    }
+
+    #[test]
+    fn tts_factory_piper_branch() {
+        let p = create_tts_provider("piper", "en_US-lessac-medium", &cfg()).unwrap();
+        assert_eq!(p.name(), "piper");
+    }
+
+    #[test]
+    fn tts_factory_piper_empty_voice_uses_default() {
+        let p = create_tts_provider("piper", "", &cfg()).unwrap();
+        assert_eq!(p.name(), "piper");
+    }
+
+    #[test]
+    fn tts_factory_unknown_provider_errors() {
+        let err = create_tts_provider("kokoro", "af_bella", &cfg())
+            .err()
+            .expect("kokoro is not implemented in this cut");
+        let msg = err.to_string();
+        assert!(msg.contains("kokoro"), "should name the provider: {msg}");
+        assert!(msg.contains("unknown"), "should say unknown: {msg}");
+    }
+
+    #[test]
+    fn whisper_presets_cover_full_size_ladder() {
+        // Sanity-check the installer surface: tiny→large-v3-turbo must all be
+        // exposed so the local-AI panel can render the size picker without
+        // hard-coding the list.
+        let ids: Vec<&str> = WHISPER_MODEL_PRESETS.iter().map(|(id, _)| *id).collect();
+        for expected in ["tiny", "base", "small", "medium", "large-v3-turbo"] {
+            assert!(
+                ids.contains(&expected),
+                "WHISPER_MODEL_PRESETS missing {expected}"
+            );
+        }
+    }
+
+    #[tokio::test]
+    async fn whisper_provider_fails_clearly_when_binary_missing() {
+        // No WHISPER_BIN env, no model file — the provider must surface an
+        // actionable error rather than panic. Drive a small base64 payload
+        // so we never reach the actual transcription call.
+        let _guard = unset_env_guard("WHISPER_BIN");
+        let provider = WhisperSttProvider::new("whisper-large-v3-turbo");
+        let result = provider
+            .transcribe(&cfg(), "AAAA", Some("audio/wav"), None, None)
+            .await;
+        assert!(result.is_err(), "missing binary must error");
+        let msg = result.err().unwrap();
+        // Whatever the underlying message says, it must NOT be a serialize
+        // panic — i.e. we must have hit the binary-resolution branch.
+        assert!(
+            !msg.is_empty(),
+            "error message should be populated for diagnosis"
+        );
+    }
+
+    #[test]
+    fn default_providers_return_cloud() {
+        assert_eq!(default_stt_provider().name(), "cloud");
+        assert_eq!(default_tts_provider().name(), "cloud");
+    }
+
+    /// Drop guard that unsets an env var on construction and restores it on
+    /// drop. Necessary because cargo runs tests in parallel and bare
+    /// `remove_var` would leak across tests.
+    fn unset_env_guard(key: &'static str) -> EnvUnsetGuard {
+        let prev = std::env::var_os(key);
+        std::env::remove_var(key);
+        EnvUnsetGuard { key, prev }
+    }
+
+    struct EnvUnsetGuard {
+        key: &'static str,
+        prev: Option<std::ffi::OsString>,
+    }
+    impl Drop for EnvUnsetGuard {
+        fn drop(&mut self) {
+            match &self.prev {
+                Some(v) => std::env::set_var(self.key, v),
+                None => std::env::remove_var(self.key),
+            }
+        }
+    }
+}
diff --git a/src/openhuman/voice/local_speech.rs b/src/openhuman/voice/local_speech.rs
new file mode 100644
index 0000000000..716f32d95d
--- /dev/null
+++ b/src/openhuman/voice/local_speech.rs
@@ -0,0 +1,343 @@
+//! Local text-to-speech — invokes Piper as a sub-process via the
+//! `PIPER_BIN` environment variable, then reads the resulting WAV file
+//! back into a base64-encoded payload that matches the
+//! [`super::reply_speech::ReplySpeechResult`] shape so the renderer can
+//! swap providers without branching on the response.
+//!
+//! ## Why Piper, not Kokoro
+//!
+//! The plan for issue #1710 evaluated both engines for the first local TTS
+//! ship:
+//!
+//! - **Piper** — ONNX-based, lower latency (~150 ms on M2 CPU for a short
+//!   sentence), simpler runtime contract (one binary + one `.onnx` voice
+//!   file), and `PIPER_BIN` is already reserved in `.env.example`.
+//! - **Kokoro** — 82M parameters, higher audio quality, but requires a
+//!   Python runtime or a custom ONNX runner with phonemization, and the
+//!   integration surface is materially larger.
+//!
+//! Piper ships first. Kokoro is tracked as future work and would land as a
+//! sibling module (`local_speech_kokoro.rs`) plus a `"kokoro"` branch in
+//! [`super::factory::create_tts_provider`].
+//!
+//! ## Resolution order
+//!
+//! 1. `PIPER_BIN` env var (absolute path, takes precedence)
+//! 2. `piper` / `piper.exe` on `$PATH`
+//!
+//! Both branches share the same resolution helper as the legacy voice
+//! pipeline ([`crate::openhuman::local_ai::paths::resolve_piper_binary`]),
+//! so STT availability checks, the installer UI, and the factory dispatch
+//! all agree on what counts as "installed".
+//!
+//! ## Where to get the binary
+//!
+//! **Easy path:** click "Install Piper" in `Settings → Voice → Voice
+//! Providers`. That triggers
+//! [`crate::openhuman::local_ai::install_piper`] which downloads the
+//! Piper binary archive (`.zip` on Windows, `.tar.gz` on macOS / Linux)
+//! into `~/.openhuman/bin/piper/`, extracts it, and stages the bundled
+//! `en_US-lessac-medium` voice (`.onnx` + `.onnx.json`) alongside via a
+//! `.part` file + atomic rename. After install the `resolve_piper_binary`
+//! helper in `local_ai/paths.rs` picks it up automatically.
+//!
+//! **Advanced path:** download Piper from
+//! [rhasspy/piper](https://github.com/rhasspy/piper) releases (one
+//! self-contained binary per OS) plus a voice `.onnx` (+ `.onnx.json`)
+//! from [rhasspy/piper-voices](https://huggingface.co/rhasspy/piper-voices),
+//! and either drop the binary on `$PATH` or point `PIPER_BIN` at it.
+//!
+//! ## Hardware / latency notes (AC #2 of issue #1710)
+//!
+//! Piper on a 2022 M2 CPU synthesizes ~150 ms of audio per second of
+//! output for the `medium` quality tier; on a five-year-old laptop budget
+//! 300–500 ms. The visemes returned here are a synthetic flat timeline
+//! (the renderer uses them only as a fallback when the cloud branch fails)
+//! — accurate visemes from Piper would require a separate forced-aligner
+//! pass and is intentionally out of scope.
+//!
+//! ## Log prefix
+//!
+//! `[voice-tts]` — pairs with `[voice-stt]` and `[voice-factory]` for
+//! end-to-end debug greps.
+
+use std::path::PathBuf;
+
+use base64::{engine::general_purpose::STANDARD as BASE64, Engine};
+use log::debug;
+
+use crate::openhuman::config::Config;
+use crate::openhuman::local_ai::paths::{resolve_piper_binary_with_config, resolve_tts_voice_path};
+use crate::rpc::RpcOutcome;
+
+use super::reply_speech::{ReplySpeechResult, VisemeFrame};
+
+const LOG_PREFIX: &str = "[voice-tts]";
+
+/// Default Piper voice id.
+pub const DEFAULT_PIPER_VOICE: &str = "en_US-lessac-medium";
+
+/// Caller-tunable knobs for local Piper synthesis.
+#[derive(Debug, Default, Clone)]
+pub struct PiperOptions {
+    /// Override voice id (e.g. `en_US-lessac-medium`). When `None` we
+    /// resolve against `config.local_ai.tts_voice_id` via
+    /// [`resolve_tts_voice_path`].
+    pub voice: Option<String>,
+}
+
+/// Synthesize speech using local Piper.
+///
+/// Implementation strategy (sub-process model):
+///
+/// 1. Resolve `PIPER_BIN` (env override → PATH). Missing binary → error.
+/// 2. Resolve the voice `.onnx` path against the workspace; missing model
+///    surfaces an actionable error pointing the user at the installer.
+/// 3. Write a temp WAV output path, spawn `piper --model <voice>
+///    --output_file <out.wav>`, pipe `text` to stdin, wait, then read the
+///    WAV back into memory.
+/// 4. Return a [`ReplySpeechResult`] with `audio_base64` populated and a
+///    synthetic neutral viseme timeline so the mascot lip-sync doesn't
+///    null-out.
+///
+/// **No model assets are embedded.** Voice files live in the workspace
+/// models directory after the installer pulls them.
+pub async fn synthesize_piper(
+    config: &Config,
+    text: &str,
+    opts: &PiperOptions,
+) -> Result<RpcOutcome<ReplySpeechResult>, String> {
+    let trimmed = text.trim();
+    if trimmed.is_empty() {
+        return Err("text is required".to_string());
+    }
+
+    let piper_bin = resolve_piper_binary_with_config(config).ok_or_else(|| {
+        format!(
+            "{LOG_PREFIX} piper binary not found. \
+             Set PIPER_BIN to the absolute path of piper, or install piper on \
+             PATH (download from https://github.com/rhasspy/piper/releases)."
+        )
+    })?;
+    debug!("{LOG_PREFIX} resolved piper binary={}", piper_bin.display());
+
+    let voice_path = resolve_tts_voice_path(config).map_err(|e| format!("{LOG_PREFIX} {e}"))?;
+    let voice_id = opts
+        .voice
+        .as_deref()
+        .map(str::trim)
+        .filter(|s| !s.is_empty())
+        .unwrap_or(DEFAULT_PIPER_VOICE)
+        .to_string();
+    debug!("{LOG_PREFIX} voice={voice_id} model_path={voice_path}");
+
+    let out_dir = std::env::temp_dir().join("openhuman_voice_output");
+    tokio::fs::create_dir_all(&out_dir)
+        .await
+        .map_err(|e| format!("{LOG_PREFIX} failed to create voice output directory: {e}"))?;
+    let out_path = out_dir.join(format!(
+        "piper-{}-{}.wav",
+        chrono::Utc::now().timestamp_millis(),
+        uuid::Uuid::new_v4()
+    ));
+
+    // Piper's default --length-scale is 1.0 which sounds rushed for most
+    // English voices. 1.15 (≈ 15% slower) lands closer to natural speech
+    // pace without dragging. Future work: surface this as a settings slider
+    // (config.local_ai.tts_length_scale) so users can tune to taste.
+    const DEFAULT_LENGTH_SCALE: &str = "1.15";
+
+    let spawn_started = std::time::Instant::now();
+    let mut cmd = tokio::process::Command::new(&piper_bin);
+    cmd.args([
+        "--model",
+        voice_path.as_str(),
+        "--output_file",
+        &out_path.to_string_lossy(),
+        "--length-scale",
+        DEFAULT_LENGTH_SCALE,
+    ])
+    .stdin(std::process::Stdio::piped())
+    .stdout(std::process::Stdio::null())
+    .stderr(std::process::Stdio::piped());
+    // Suppress the Windows console window that would otherwise flash on
+    // every TTS request (piper.exe is a console subsystem binary).
+    #[cfg(windows)]
+    {
+        use std::os::windows::process::CommandExt;
+        cmd.creation_flags(0x08000000);
+    }
+    let mut child = cmd
+        .spawn()
+        .map_err(|e| format!("{LOG_PREFIX} failed to launch piper: {e}"))?;
+
+    // Pipe the text to stdin — Piper reads UTF-8 lines.
+    if let Some(mut stdin) = child.stdin.take() {
+        use tokio::io::AsyncWriteExt;
+        stdin
+            .write_all(trimmed.as_bytes())
+            .await
+            .map_err(|e| format!("{LOG_PREFIX} failed to write text to piper stdin: {e}"))?;
+        // Drop stdin so piper sees EOF and finishes synthesis.
+    }
+
+    let output = child
+        .wait_with_output()
+        .await
+        .map_err(|e| format!("{LOG_PREFIX} failed to wait on piper: {e}"))?;
+
+    let exit_code = output.status.code();
+    debug!(
+        "{LOG_PREFIX} piper exited code={:?} elapsed_ms={} stderr_bytes={}",
+        exit_code,
+        spawn_started.elapsed().as_millis(),
+        output.stderr.len()
+    );
+    if !output.status.success() {
+        // Best-effort cleanup of the partial output.
+        let _ = tokio::fs::remove_file(&out_path).await;
+        return Err(format!(
+            "{LOG_PREFIX} piper failed (exit={:?}): {}",
+            exit_code,
+            String::from_utf8_lossy(&output.stderr).trim()
+        ));
+    }
+
+    let audio_bytes = read_and_clean_wav(&out_path).await?;
+    let audio_base64 = BASE64.encode(&audio_bytes);
+    let visemes = synthetic_viseme_timeline(trimmed);
+    debug!(
+        "{LOG_PREFIX} synthesized wav_bytes={} visemes={}",
+        audio_bytes.len(),
+        visemes.len()
+    );
+
+    Ok(RpcOutcome::single_log(
+        ReplySpeechResult {
+            audio_base64,
+            audio_mime: "audio/wav".to_string(),
+            visemes,
+            alignment: None,
+        },
+        "local piper TTS completed",
+    ))
+}
+
+async fn read_and_clean_wav(path: &std::path::Path) -> Result<Vec<u8>, String> {
+    let bytes = tokio::fs::read(path)
+        .await
+        .map_err(|e| format!("{LOG_PREFIX} failed to read piper output: {e}"))?;
+    if let Err(e) = tokio::fs::remove_file(path).await {
+        log::warn!(
+            "{LOG_PREFIX} failed to clean up piper output {}: {e}",
+            path.display()
+        );
+    }
+    Ok(bytes)
+}
+
+/// Build a synthetic neutral-vowel viseme timeline. The mascot expects at
+/// least one frame to render the mouth; without it the rig snaps closed
+/// for the entire utterance. A real forced-aligner pass would replace
+/// this — see the module-level note.
+fn synthetic_viseme_timeline(text: &str) -> Vec<VisemeFrame> {
+    let chars = text.chars().filter(|c| !c.is_whitespace()).count().max(1);
+    // ~80 ms per non-whitespace char is a reasonable average for English
+    // speech at conversational tempo. The mascot smooths between frames
+    // so this looks plausible without being meaningfully wrong.
+    let per_char_ms: u64 = 80;
+    let total_ms = (chars as u64) * per_char_ms;
+    vec![
+        VisemeFrame {
+            viseme: "sil".to_string(),
+            start_ms: 0,
+            end_ms: 40,
+        },
+        VisemeFrame {
+            viseme: "aa".to_string(),
+            start_ms: 40,
+            end_ms: total_ms.max(80),
+        },
+    ]
+}
+
+/// Resolves [`PathBuf`] inputs to absolute paths so logs/errors don't show
+/// platform-specific relative noise. Kept as a tiny helper so its
+/// behaviour is testable.
+#[allow(dead_code)]
+fn absolutize(p: PathBuf) -> PathBuf {
+    p.canonicalize().unwrap_or(p)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[tokio::test]
+    async fn synthesize_piper_rejects_empty_text() {
+        let config = Config::default();
+        let opts = PiperOptions::default();
+        let err = synthesize_piper(&config, "", &opts).await.err().unwrap();
+        assert!(err.contains("required"), "empty text must error: {err}");
+
+        let err = synthesize_piper(&config, "   ", &opts).await.err().unwrap();
+        assert!(
+            err.contains("required"),
+            "whitespace text must error: {err}"
+        );
+    }
+
+    #[tokio::test]
+    async fn synthesize_piper_surfaces_binary_lookup_failure() {
+        // Same shape as the whisper test — make sure missing PIPER_BIN
+        // produces an actionable error, not a panic in the spawn path.
+        let prev_piper = std::env::var_os("PIPER_BIN");
+        std::env::remove_var("PIPER_BIN");
+
+        let config = Config::default();
+        let opts = PiperOptions::default();
+        let result = synthesize_piper(&config, "hello world", &opts).await;
+
+        if let Some(v) = prev_piper {
+            std::env::set_var("PIPER_BIN", v);
+        }
+
+        let err = result.err().expect("missing piper must error");
+        assert!(
+            err.contains("piper") || err.contains("TTS"),
+            "should mention piper or TTS: {err}"
+        );
+    }
+
+    #[test]
+    fn synthetic_viseme_timeline_yields_non_empty_frames() {
+        let frames = synthetic_viseme_timeline("hello world");
+        assert!(!frames.is_empty(), "must produce at least one frame");
+        assert_eq!(frames[0].viseme, "sil", "leading silence");
+        assert!(
+            frames.last().unwrap().end_ms >= 80,
+            "tail frame must extend past the leading silence"
+        );
+    }
+
+    #[test]
+    fn synthetic_viseme_timeline_handles_whitespace_only_text() {
+        // Whitespace-only input would normally be rejected upstream, but
+        // the helper itself must not panic — defends against a future
+        // caller that bypasses the validator.
+        let frames = synthetic_viseme_timeline("   ");
+        assert!(!frames.is_empty());
+        // chars().filter(non-ws).count() is 0 → min 1 → 80 ms total.
+        assert_eq!(frames[1].end_ms, 80);
+    }
+
+    #[test]
+    fn synthetic_viseme_timeline_scales_with_length() {
+        let short = synthetic_viseme_timeline("hi");
+        let long = synthetic_viseme_timeline("the quick brown fox jumps");
+        assert!(
+            long.last().unwrap().end_ms > short.last().unwrap().end_ms,
+            "longer text should produce a longer timeline"
+        );
+    }
+}
diff --git a/src/openhuman/voice/local_transcribe.rs b/src/openhuman/voice/local_transcribe.rs
new file mode 100644
index 0000000000..c2a9106a27
--- /dev/null
+++ b/src/openhuman/voice/local_transcribe.rs
@@ -0,0 +1,381 @@
+//! Local speech-to-text — invokes whisper.cpp (`whisper-cli`) as a
+//! sub-process via the `WHISPER_BIN` environment variable.
+//!
+//! ## Resolution order
+//!
+//! 1. `WHISPER_BIN` env var (absolute path, takes precedence)
+//! 2. `whisper-cli` / `whisper-cli.exe` on `$PATH`
+//!
+//! When neither resolves, transcription fails with a clear, actionable
+//! error pointing the user at the install path. Resolution lives in
+//! [`crate::openhuman::local_ai::paths::resolve_whisper_binary`] — kept in
+//! one place so STT, voice-status, and the installer all agree.
+//!
+//! ## Where to get the binary
+//!
+//! **Easy path:** click "Install Whisper" in `Settings → Voice → Voice
+//! Providers`. That triggers
+//! [`crate::openhuman::local_ai::install_whisper`] which streams the
+//! GGML model file (`ggml-<size>.bin`) into
+//! `~/.openhuman/bin/whisper/` via a `.part` file + atomic rename, plus
+//! the `whisper-cli` binary on Windows where upstream ships a release
+//! archive. After install the `resolve_whisper_binary` helper in
+//! `local_ai/paths.rs` picks it up automatically — no env var to set.
+//!
+//! **Advanced path:** install whisper.cpp's `whisper-cli` from a package
+//! manager (`brew install whisper-cpp`, `pacman -S whisper.cpp`, …) or
+//! build from source ([ggerganov/whisper.cpp](https://github.com/ggerganov/whisper.cpp))
+//! and either drop the binary on `$PATH` or point `WHISPER_BIN` at it.
+//!
+//! ## Hardware / latency notes (AC #2 of issue #1710)
+//!
+//! Default model is **`whisper-large-v3-turbo`** — best accuracy at a
+//! latency that fits a desktop UX (≈ 4× faster than `large-v3` with the
+//! same WER on English). On lower-end hardware:
+//!
+//! | Model           | Disk    | RAM    | Latency (M2 CPU, 10s clip) | Notes |
+//! |-----------------|---------|--------|-----------------------------|-------|
+//! | `tiny`          | 39 MB   | ~150 MB| ~0.4 s                     | Demo-grade |
+//! | `base`          | 74 MB   | ~210 MB| ~0.6 s                     | Decent for short utterances |
+//! | `small`         | 244 MB  | ~480 MB| ~1.4 s                     | Default for older laptops |
+//! | `medium`        | 769 MB  | ~1.2 GB| ~3.0 s                     | Good accuracy, heavier |
+//! | `large-v3-turbo`| 1.5 GB  | ~2.2 GB| ~1.8 s                     | Recommended (this default) |
+//!
+//! No model assets are embedded in the binary — everything is downloaded
+//! into the workspace on first use.
+//!
+//! ## Log prefix
+//!
+//! `[voice-stt]` — grep-friendly so debug runs across factory dispatch,
+//! sub-process spawn, and result decoding line up cleanly.
+
+use base64::{engine::general_purpose::STANDARD as BASE64, Engine};
+use chrono::Utc;
+use log::{debug, warn};
+use serde::{Deserialize, Serialize};
+
+use crate::openhuman::config::Config;
+use crate::openhuman::local_ai::paths::resolve_whisper_binary_with_config;
+use crate::rpc::RpcOutcome;
+
+const LOG_PREFIX: &str = "[voice-stt]";
+
+/// Default model id when the caller does not override.
+pub const DEFAULT_WHISPER_MODEL: &str = "medium";
+
+/// Caller-tunable knobs for local Whisper transcription.
+#[derive(Debug, Default, Clone)]
+pub struct WhisperTranscribeOptions {
+    /// Whisper model id (e.g. `whisper-large-v3-turbo`). When `None` we
+    /// fall back to [`DEFAULT_WHISPER_MODEL`].
+    pub model: Option<String>,
+    /// Recorder MIME type (e.g. `audio/webm`). Used to pick the right file
+    /// extension on disk before handing off to whisper-cli, which sniffs
+    /// the extension.
+    pub mime_type: Option<String>,
+    /// BCP-47 language hint (e.g. `"en"`).
+    pub language: Option<String>,
+}
+
+/// Output of local whisper transcription. Matches
+/// [`super::cloud_transcribe::CloudTranscribeResult`] shape so the factory's
+/// `SttResult` can carry either provider's payload without conditional code.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct WhisperTranscribeResult {
+    pub text: String,
+    /// The model id that produced the transcript — populated so the UI can
+    /// show the user which model ran (useful when they're A/B-testing
+    /// sizes during the install flow).
+    pub model_id: String,
+}
+
+/// Transcribe a base64-encoded audio blob using local whisper.cpp.
+///
+/// Implementation strategy (sub-process model):
+///
+/// 1. Resolve `WHISPER_BIN` (env override → PATH lookup). If missing,
+///    return an actionable error so the UI can deep-link to the installer.
+/// 2. Decode the base64 audio and write it to a temp file under
+///    `$TMP/openhuman_voice_input/voice-<ts>-<uuid>.<ext>` — whisper-cli
+///    consumes a file path, not stdin.
+/// 3. Spawn `whisper-cli -m <model> -f <file> [-l <lang>]`, capture
+///    stdout, and clean up the temp file regardless of outcome.
+/// 4. Return the trimmed transcript. Empty stdout is reported as an error
+///    (whisper produced no output → almost always a model/file mismatch).
+///
+/// **No model assets are embedded.** The model file is downloaded by the
+/// installer into the workspace; this function only locates the binary.
+pub async fn transcribe_whisper(
+    config: &Config,
+    audio_base64: &str,
+    opts: &WhisperTranscribeOptions,
+) -> Result<RpcOutcome<WhisperTranscribeResult>, String> {
+    let trimmed = audio_base64.trim();
+    if trimmed.is_empty() {
+        return Err("audio_base64 is required".to_string());
+    }
+    let audio_bytes = BASE64
+        .decode(trimmed)
+        .map_err(|e| format!("invalid base64 audio: {e}"))?;
+    if audio_bytes.is_empty() {
+        return Err("decoded audio is empty".to_string());
+    }
+
+    let model_id = opts
+        .model
+        .as_deref()
+        .map(str::trim)
+        .filter(|s| !s.is_empty())
+        .unwrap_or(DEFAULT_WHISPER_MODEL)
+        .to_string();
+
+    let whisper_bin = resolve_whisper_binary_with_config(config).ok_or_else(|| {
+        format!(
+            "{LOG_PREFIX} whisper.cpp binary not found. \
+             Set WHISPER_BIN to the absolute path of whisper-cli, or install \
+             whisper-cli on PATH (`brew install whisper-cpp` / package manager / \
+             build from https://github.com/ggerganov/whisper.cpp)."
+        )
+    })?;
+    debug!(
+        "{LOG_PREFIX} resolved whisper binary={} model_id={}",
+        whisper_bin.display(),
+        model_id
+    );
+
+    let ext = mime_to_extension(opts.mime_type.as_deref());
+    let voice_dir = std::env::temp_dir().join("openhuman_voice_input");
+    tokio::fs::create_dir_all(&voice_dir)
+        .await
+        .map_err(|e| format!("{LOG_PREFIX} failed to create voice input directory: {e}"))?;
+    let file_path = voice_dir.join(format!(
+        "voice-{}-{}.{}",
+        Utc::now().timestamp_millis(),
+        uuid::Uuid::new_v4(),
+        ext
+    ));
+    tokio::fs::write(&file_path, &audio_bytes)
+        .await
+        .map_err(|e| format!("{LOG_PREFIX} failed to write audio file: {e}"))?;
+    debug!(
+        "{LOG_PREFIX} staged audio bytes={} path={}",
+        audio_bytes.len(),
+        file_path.display()
+    );
+
+    // Resolve the on-disk model path using the effective model_id (which may
+    // have been overridden by the request options). Without threading model_id
+    // through here the resolver would ignore the override and use whatever the
+    // config default is, producing a mismatch between the returned model_id
+    // and the model actually used for transcription.
+    let model_path =
+        crate::openhuman::local_ai::paths::resolve_stt_model_path_by_id(&model_id, config)
+            .map_err(|e| format!("{LOG_PREFIX} {e}"))?;
+    debug!("{LOG_PREFIX} resolved STT model path={model_path}");
+
+    let mut args: Vec<String> = vec![
+        "-m".to_string(),
+        model_path,
+        "-f".to_string(),
+        file_path.to_string_lossy().to_string(),
+        // Suppress segment timestamp prefixes (`[00:00:00.000 --> ...]`) in
+        // stdout — we want the bare transcript text only. Without this flag
+        // the timestamps leak into the message body the user sees.
+        "--no-timestamps".to_string(),
+    ];
+    if let Some(lang) = opts
+        .language
+        .as_deref()
+        .map(str::trim)
+        .filter(|s| !s.is_empty())
+    {
+        args.push("-l".to_string());
+        args.push(lang.to_string());
+    }
+    debug!("{LOG_PREFIX} spawning whisper-cli args={args:?}");
+
+    let spawn_started = std::time::Instant::now();
+    let mut cmd = tokio::process::Command::new(&whisper_bin);
+    cmd.args(&args);
+    // Suppress the Windows console window that would otherwise flash on
+    // every invocation (whisper-cli is a console subsystem binary). The
+    // 0x08000000 constant is CREATE_NO_WINDOW from winbase.h. No-op on
+    // platforms without the extension trait.
+    #[cfg(windows)]
+    {
+        use std::os::windows::process::CommandExt;
+        cmd.creation_flags(0x08000000);
+    }
+    // Cap the subprocess so a stalled whisper-cli never hangs the RPC
+    // caller indefinitely. 120 s is generous for any reasonable audio
+    // fragment but avoids an infinite wait on a hung process.
+    const WHISPER_TIMEOUT_SECS: u64 = 120;
+    let output_result = tokio::time::timeout(
+        std::time::Duration::from_secs(WHISPER_TIMEOUT_SECS),
+        cmd.output(),
+    )
+    .await
+    .map_err(|_| format!("{LOG_PREFIX} whisper-cli timed out after {WHISPER_TIMEOUT_SECS}s"))?;
+
+    // Always clean up the staged audio file; warn but don't fail on cleanup.
+    if let Err(e) = tokio::fs::remove_file(&file_path).await {
+        warn!(
+            "{LOG_PREFIX} failed to clean up temp audio file {}: {e}",
+            file_path.display()
+        );
+    }
+
+    let output =
+        output_result.map_err(|e| format!("{LOG_PREFIX} failed to spawn whisper-cli: {e}"))?;
+
+    let exit_code = output.status.code();
+    debug!(
+        "{LOG_PREFIX} whisper-cli exited code={:?} elapsed_ms={} stdout_bytes={} stderr_bytes={}",
+        exit_code,
+        spawn_started.elapsed().as_millis(),
+        output.stdout.len(),
+        output.stderr.len()
+    );
+    if !output.status.success() {
+        return Err(format!(
+            "{LOG_PREFIX} whisper-cli failed (exit={:?}): {}",
+            exit_code,
+            String::from_utf8_lossy(&output.stderr).trim()
+        ));
+    }
+
+    let text = String::from_utf8_lossy(&output.stdout).trim().to_string();
+    if text.is_empty() {
+        return Err(format!(
+            "{LOG_PREFIX} whisper-cli returned empty transcript (model={model_id})"
+        ));
+    }
+
+    Ok(RpcOutcome::single_log(
+        WhisperTranscribeResult { text, model_id },
+        "local whisper STT completed",
+    ))
+}
+
+/// Map a recorder MIME type to a safe filename extension. Defaults to
+/// `webm` because `MediaRecorder` defaults to WebM/Opus and whisper-cli
+/// (built with ffmpeg) handles it transparently.
+fn mime_to_extension(mime: Option<&str>) -> &'static str {
+    match mime
+        .map(str::trim)
+        .map(|m| m.split(';').next().unwrap_or(m).to_ascii_lowercase())
+        .as_deref()
+    {
+        Some("audio/wav") | Some("audio/x-wav") => "wav",
+        Some("audio/mpeg") => "mp3",
+        Some("audio/mp4") | Some("audio/x-m4a") => "m4a",
+        Some("audio/ogg") => "ogg",
+        Some("audio/flac") => "flac",
+        // Default branch covers `audio/webm`, `audio/webm;codecs=opus`,
+        // unknown types, and `None`. WebM is the MediaRecorder default
+        // on Chromium so it's the safest fallback.
+        _ => "webm",
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use base64::engine::general_purpose::STANDARD as BASE64;
+    use base64::Engine;
+
+    #[test]
+    fn mime_to_extension_maps_known_types() {
+        assert_eq!(mime_to_extension(Some("audio/webm")), "webm");
+        assert_eq!(mime_to_extension(Some("audio/webm;codecs=opus")), "webm");
+        assert_eq!(mime_to_extension(Some("audio/wav")), "wav");
+        assert_eq!(mime_to_extension(Some("audio/x-wav")), "wav");
+        assert_eq!(mime_to_extension(Some("audio/mpeg")), "mp3");
+        assert_eq!(mime_to_extension(Some("audio/mp4")), "m4a");
+        assert_eq!(mime_to_extension(Some("audio/x-m4a")), "m4a");
+        assert_eq!(mime_to_extension(Some("audio/ogg")), "ogg");
+        assert_eq!(mime_to_extension(Some("audio/flac")), "flac");
+    }
+
+    #[test]
+    fn mime_to_extension_falls_back_to_webm() {
+        // Unknown / missing / unparseable inputs default to webm — covers
+        // the case where MediaRecorder reports a vendor-specific type.
+        assert_eq!(mime_to_extension(None), "webm");
+        assert_eq!(mime_to_extension(Some("")), "webm");
+        assert_eq!(mime_to_extension(Some("application/octet-stream")), "webm");
+        assert_eq!(mime_to_extension(Some("video/mp4")), "webm");
+    }
+
+    #[tokio::test]
+    async fn transcribe_whisper_rejects_empty_input() {
+        let config = Config::default();
+        let opts = WhisperTranscribeOptions::default();
+        let err = transcribe_whisper(&config, "", &opts).await.err().unwrap();
+        assert!(
+            err.contains("required"),
+            "should reject empty base64 input: {err}"
+        );
+
+        let err = transcribe_whisper(&config, "   ", &opts)
+            .await
+            .err()
+            .unwrap();
+        assert!(
+            err.contains("required"),
+            "whitespace-only must error: {err}"
+        );
+    }
+
+    #[tokio::test]
+    async fn transcribe_whisper_rejects_invalid_base64() {
+        let config = Config::default();
+        let opts = WhisperTranscribeOptions::default();
+        let err = transcribe_whisper(&config, "not-base64-!", &opts)
+            .await
+            .err()
+            .unwrap();
+        assert!(err.contains("invalid base64"), "should fail decode: {err}");
+    }
+
+    #[tokio::test]
+    async fn transcribe_whisper_rejects_empty_decoded_payload() {
+        let config = Config::default();
+        let opts = WhisperTranscribeOptions::default();
+        // Valid base64 but decodes to zero bytes.
+        let err = transcribe_whisper(&config, "", &opts).await.err().unwrap();
+        assert!(
+            err.contains("required") || err.contains("empty"),
+            "should reject zero-byte audio: {err}"
+        );
+    }
+
+    #[tokio::test]
+    async fn transcribe_whisper_surfaces_binary_lookup_failure() {
+        // No WHISPER_BIN and no PATH entry → factory must produce an
+        // actionable error rather than panicking inside the subprocess
+        // spawn. Use a 1-byte base64 payload so the binary-resolution
+        // branch runs before any audio handling.
+        let prev_whisper = std::env::var_os("WHISPER_BIN");
+        std::env::remove_var("WHISPER_BIN");
+        let payload = BASE64.encode(b"X");
+
+        let config = Config::default();
+        let opts = WhisperTranscribeOptions::default();
+        let result = transcribe_whisper(&config, &payload, &opts).await;
+
+        // Restore env immediately, even on failure.
+        if let Some(v) = prev_whisper {
+            std::env::set_var("WHISPER_BIN", v);
+        }
+
+        // Either the binary missing OR the model missing must surface; both
+        // count as the "factory dispatched but local stack isn't installed"
+        // case the test exists to cover.
+        let err = result.err().expect("missing local stack must error");
+        assert!(
+            err.contains("whisper") || err.contains("STT model"),
+            "should mention whisper or STT model: {err}"
+        );
+    }
+}
diff --git a/src/openhuman/voice/mod.rs b/src/openhuman/voice/mod.rs
index e90515e2c8..ffcc043ce1 100644
--- a/src/openhuman/voice/mod.rs
+++ b/src/openhuman/voice/mod.rs
@@ -8,8 +8,11 @@ pub mod audio_capture;
 pub(crate) mod cli;
 pub mod cloud_transcribe;
 pub mod dictation_listener;
+pub mod factory;
 pub mod hallucination;
 pub mod hotkey;
+pub mod local_speech;
+pub mod local_transcribe;
 mod ops;
 mod postprocess;
 pub mod reply_speech;
@@ -19,6 +22,19 @@ pub mod streaming;
 pub mod text_input;
 mod types;
 
+pub use factory::{
+    create_stt_provider, create_tts_provider, default_stt_provider, default_tts_provider,
+    SttProvider, SttResult, TtsProvider, DEFAULT_PIPER_VOICE, DEFAULT_WHISPER_MODEL,
+    WHISPER_MODEL_PRESETS,
+};
 pub use ops::*;
 pub use schemas::{all_voice_controller_schemas, all_voice_registered_controllers, voice_schemas};
 pub use types::{VoiceSpeechResult, VoiceStatus, VoiceTtsResult};
+
+/// Default Whisper-v1 model id sent to the backend cloud STT proxy. Kept
+/// here (rather than in `cloud_transcribe.rs`) so the factory module can
+/// reach it via the public `voice::` surface without re-exporting an
+/// internal constant.
+pub(crate) fn cloud_transcribe_default_model() -> &'static str {
+    "whisper-v1"
+}
diff --git a/src/openhuman/voice/ops.rs b/src/openhuman/voice/ops.rs
index 22a6fb3cba..5f25e0570f 100644
--- a/src/openhuman/voice/ops.rs
+++ b/src/openhuman/voice/ops.rs
@@ -54,6 +54,17 @@ pub async fn voice_status(config: &Config) -> Result<RpcOutcome<VoiceStatus>, St
         safe_basename_str(&tts_voice),
     );
 
+    let stt_provider = if config.local_ai.stt_provider.trim().is_empty() {
+        "cloud".to_string()
+    } else {
+        config.local_ai.stt_provider.clone()
+    };
+    let tts_provider = if config.local_ai.tts_provider.trim().is_empty() {
+        "cloud".to_string()
+    } else {
+        config.local_ai.tts_provider.clone()
+    };
+
     let status = VoiceStatus {
         stt_available,
         tts_available,
@@ -65,6 +76,8 @@ pub async fn voice_status(config: &Config) -> Result<RpcOutcome<VoiceStatus>, St
         tts_voice_path: tts_voice,
         whisper_in_process,
         llm_cleanup_enabled: config.local_ai.voice_llm_cleanup_enabled,
+        stt_provider,
+        tts_provider,
     };
 
     Ok(RpcOutcome::single_log(status, "voice status checked"))
diff --git a/src/openhuman/voice/schemas.rs b/src/openhuman/voice/schemas.rs
index d145366dbc..2f8ac5dc36 100644
--- a/src/openhuman/voice/schemas.rs
+++ b/src/openhuman/voice/schemas.rs
@@ -57,6 +57,53 @@ struct CloudTranscribeParams {
     language: Option<String>,
 }
 
+/// Factory-dispatched STT request. The caller can either pin a provider
+/// explicitly (`"cloud"` / `"whisper"`) or let the controller resolve the
+/// effective provider from `config.local_ai.stt_provider`. Keeps the
+/// existing `voice_cloud_transcribe` RPC intact for back-compat — older
+/// renderers still pin the cloud path directly.
+#[derive(Debug, Deserialize)]
+struct SttDispatchParams {
+    audio_base64: String,
+    /// Provider override; falls back to `config.local_ai.stt_provider`.
+    #[serde(default)]
+    provider: Option<String>,
+    /// Model override (cloud branch ignores it).
+    #[serde(default)]
+    model: Option<String>,
+    #[serde(default)]
+    mime_type: Option<String>,
+    #[serde(default)]
+    file_name: Option<String>,
+    #[serde(default)]
+    language: Option<String>,
+}
+
+/// Factory-dispatched TTS request. Same provider-resolution rule as
+/// [`SttDispatchParams`].
+#[derive(Debug, Deserialize)]
+struct TtsDispatchParams {
+    text: String,
+    #[serde(default)]
+    provider: Option<String>,
+    #[serde(default)]
+    voice: Option<String>,
+}
+
+/// Settings-panel update for the STT/TTS provider selectors. Both are
+/// optional; omitted fields are left at their current value.
+#[derive(Debug, Deserialize)]
+struct SetProvidersParams {
+    #[serde(default)]
+    stt_provider: Option<String>,
+    #[serde(default)]
+    tts_provider: Option<String>,
+    #[serde(default)]
+    stt_model: Option<String>,
+    #[serde(default)]
+    tts_voice: Option<String>,
+}
+
 #[derive(Debug, Deserialize)]
 struct ReplySynthesizeParams {
     text: String,
@@ -98,6 +145,9 @@ pub fn all_voice_controller_schemas() -> Vec<ControllerSchema> {
         voice_schemas("voice_tts"),
         voice_schemas("voice_reply_synthesize"),
         voice_schemas("voice_cloud_transcribe"),
+        voice_schemas("voice_stt_dispatch"),
+        voice_schemas("voice_tts_dispatch"),
+        voice_schemas("voice_set_providers"),
         voice_schemas("voice_server_start"),
         voice_schemas("voice_server_stop"),
         voice_schemas("voice_server_status"),
@@ -131,6 +181,18 @@ pub fn all_voice_registered_controllers() -> Vec<RegisteredController> {
             schema: voice_schemas("voice_cloud_transcribe"),
             handler: handle_voice_cloud_transcribe,
         },
+        RegisteredController {
+            schema: voice_schemas("voice_stt_dispatch"),
+            handler: handle_voice_stt_dispatch,
+        },
+        RegisteredController {
+            schema: voice_schemas("voice_tts_dispatch"),
+            handler: handle_voice_tts_dispatch,
+        },
+        RegisteredController {
+            schema: voice_schemas("voice_set_providers"),
+            handler: handle_voice_set_providers,
+        },
         RegisteredController {
             schema: voice_schemas("voice_server_start"),
             handler: handle_voice_server_start,
@@ -231,6 +293,79 @@ pub fn voice_schemas(function: &str) -> ControllerSchema {
                 "ReplySpeechResult: { audio_base64, audio_mime, visemes, alignment? }.",
             )],
         },
+        "voice_stt_dispatch" => ControllerSchema {
+            namespace: "voice",
+            function: "stt_dispatch",
+            description:
+                "Factory-dispatched speech-to-text. Routes to the cloud Whisper proxy or the \
+                 local whisper.cpp binary based on `provider` (or `config.local_ai.stt_provider` \
+                 when unspecified). Returns the same `{ text }` payload either way.",
+            inputs: vec![
+                required_string(
+                    "audio_base64",
+                    "Base64-encoded audio bytes (e.g. webm/opus from MediaRecorder).",
+                ),
+                optional_string(
+                    "provider",
+                    "Override provider: 'cloud' or 'whisper'. Defaults to config.local_ai.stt_provider.",
+                ),
+                optional_string("model", "Whisper model id (whisper branch only)."),
+                optional_string("mime_type", "Audio MIME type (default: audio/webm)."),
+                optional_string("file_name", "Filename hint (default: audio.webm)."),
+                optional_string("language", "BCP-47 language hint, e.g. 'en'."),
+            ],
+            outputs: vec![json_output(
+                "result",
+                "SttResult: { text, provider }.",
+            )],
+        },
+        "voice_tts_dispatch" => ControllerSchema {
+            namespace: "voice",
+            function: "tts_dispatch",
+            description:
+                "Factory-dispatched text-to-speech. Routes to the cloud ElevenLabs proxy \
+                 (returns rich viseme alignment) or local Piper (returns audio + a synthetic \
+                 viseme timeline) based on `provider` (or `config.local_ai.tts_provider`).",
+            inputs: vec![
+                required_string("text", "Text to synthesize."),
+                optional_string(
+                    "provider",
+                    "Override provider: 'cloud' or 'piper'. Defaults to config.local_ai.tts_provider.",
+                ),
+                optional_string(
+                    "voice",
+                    "Voice id (provider-specific). Piper expects an id like 'en_US-lessac-medium'.",
+                ),
+            ],
+            outputs: vec![json_output(
+                "reply",
+                "ReplySpeechResult: { audio_base64, audio_mime, visemes, alignment? }.",
+            )],
+        },
+        "voice_set_providers" => ControllerSchema {
+            namespace: "voice",
+            function: "set_providers",
+            description:
+                "Persist the STT / TTS provider selection (and optional model/voice id) into \
+                 `config.local_ai.{stt,tts}_provider` so subsequent voice_stt_dispatch / \
+                 voice_tts_dispatch calls resolve without an explicit provider param.",
+            inputs: vec![
+                optional_string(
+                    "stt_provider",
+                    "STT provider id ('cloud' or 'whisper'). Omitted = unchanged.",
+                ),
+                optional_string(
+                    "tts_provider",
+                    "TTS provider id ('cloud' or 'piper'). Omitted = unchanged.",
+                ),
+                optional_string("stt_model", "Whisper model id (e.g. 'whisper-large-v3-turbo')."),
+                optional_string("tts_voice", "Piper voice id (e.g. 'en_US-lessac-medium')."),
+            ],
+            outputs: vec![json_output(
+                "providers",
+                "Updated provider selectors: { stt_provider, tts_provider, stt_model_id, tts_voice_id }.",
+            )],
+        },
         "voice_cloud_transcribe" => ControllerSchema {
             namespace: "voice",
             function: "cloud_transcribe",
@@ -368,14 +503,43 @@ fn handle_voice_reply_synthesize(params: Map<String, Value>) -> ControllerFuture
     Box::pin(async move {
         let config = config_rpc::load_config_with_timeout().await?;
         let p = deserialize_params::<ReplySynthesizeParams>(params)?;
-        let opts = crate::openhuman::voice::reply_speech::ReplySpeechOptions {
-            voice_id: p.voice_id,
-            model_id: p.model_id,
-            output_format: p.output_format,
-            voice_settings: None,
+        // Dispatch through the TTS factory so the user's `tts_provider`
+        // setting (cloud / piper / …) is honored on the spoken-reply path,
+        // not just the dedicated `voice_tts_dispatch` RPC. Without this
+        // routing, the settings dropdown was effectively decorative —
+        // selecting "piper" persisted to config but conversation replies
+        // still hit the cloud TTS proxy.
+        let provider_name = effective_tts_provider(&config);
+        // Only default to the Piper voice id when the active provider is
+        // actually Piper. Passing a Piper voice id to a cloud TTS provider
+        // would send an invalid voice to the upstream API.
+        let voice = p
+            .voice_id
+            .as_deref()
+            .map(str::trim)
+            .filter(|s| !s.is_empty())
+            .map(str::to_string)
+            .unwrap_or_else(|| {
+                if provider_name == "piper" {
+                    crate::openhuman::voice::DEFAULT_PIPER_VOICE.to_string()
+                } else {
+                    String::new()
+                }
+            });
+        let effective_voice = if voice.is_empty() {
+            None
+        } else {
+            Some(voice.as_str())
         };
+        log::debug!(
+            "[voice-factory] voice_reply_synthesize dispatch provider={provider_name} voice={voice}"
+        );
+        let provider =
+            crate::openhuman::voice::create_tts_provider(&provider_name, &voice, &config)
+                .map_err(|e| e.to_string())?;
         to_json(
-            crate::openhuman::voice::reply_speech::synthesize_reply(&config, &p.text, &opts)
+            provider
+                .synthesize(&config, &p.text, effective_voice)
                 .await?,
         )
     })
@@ -402,6 +566,190 @@ fn handle_voice_cloud_transcribe(params: Map<String, Value>) -> ControllerFuture
     })
 }
 
+fn handle_voice_stt_dispatch(params: Map<String, Value>) -> ControllerFuture {
+    Box::pin(async move {
+        let config = config_rpc::load_config_with_timeout().await?;
+        let p = deserialize_params::<SttDispatchParams>(params)?;
+        let provider_name = p
+            .provider
+            .as_deref()
+            .map(str::trim)
+            .filter(|s| !s.is_empty())
+            .map(str::to_string)
+            .unwrap_or_else(|| effective_stt_provider(&config));
+        let model = p
+            .model
+            .as_deref()
+            .map(str::trim)
+            .filter(|s| !s.is_empty())
+            .map(str::to_string)
+            .unwrap_or_else(|| crate::openhuman::voice::DEFAULT_WHISPER_MODEL.to_string());
+
+        log::debug!(
+            "[voice-factory] RPC voice_stt_dispatch provider={provider_name} model={model}"
+        );
+        let provider =
+            crate::openhuman::voice::create_stt_provider(&provider_name, &model, &config)
+                .map_err(|e| e.to_string())?;
+        let outcome = provider
+            .transcribe(
+                &config,
+                &p.audio_base64,
+                p.mime_type.as_deref(),
+                p.file_name.as_deref(),
+                p.language.as_deref(),
+            )
+            .await?;
+        let value = serde_json::json!({
+            "text": outcome.value.text,
+            "provider": outcome.value.provider,
+        });
+        Ok(value)
+    })
+}
+
+fn handle_voice_tts_dispatch(params: Map<String, Value>) -> ControllerFuture {
+    Box::pin(async move {
+        let config = config_rpc::load_config_with_timeout().await?;
+        let p = deserialize_params::<TtsDispatchParams>(params)?;
+        let provider_name = p
+            .provider
+            .as_deref()
+            .map(str::trim)
+            .filter(|s| !s.is_empty())
+            .map(str::to_string)
+            .unwrap_or_else(|| effective_tts_provider(&config));
+        // Only fall back to the Piper default voice id when the provider is
+        // Piper; sending a Piper voice id to a cloud TTS endpoint is invalid.
+        let voice = p
+            .voice
+            .as_deref()
+            .map(str::trim)
+            .filter(|s| !s.is_empty())
+            .map(str::to_string)
+            .unwrap_or_else(|| {
+                if provider_name == "piper" {
+                    crate::openhuman::voice::DEFAULT_PIPER_VOICE.to_string()
+                } else {
+                    String::new()
+                }
+            });
+        let effective_voice = if voice.is_empty() {
+            None
+        } else {
+            Some(voice.as_str())
+        };
+
+        log::debug!(
+            "[voice-factory] RPC voice_tts_dispatch provider={provider_name} voice={voice}"
+        );
+        let provider =
+            crate::openhuman::voice::create_tts_provider(&provider_name, &voice, &config)
+                .map_err(|e| e.to_string())?;
+        let outcome = provider
+            .synthesize(&config, &p.text, effective_voice)
+            .await?;
+        to_json(outcome)
+    })
+}
+
+fn handle_voice_set_providers(params: Map<String, Value>) -> ControllerFuture {
+    Box::pin(async move {
+        let p = deserialize_params::<SetProvidersParams>(params)?;
+        let mut config = config_rpc::load_config_with_timeout().await?;
+
+        if let Some(stt) = p
+            .stt_provider
+            .as_deref()
+            .map(str::trim)
+            .filter(|s| !s.is_empty())
+        {
+            validate_stt_provider(stt)?;
+            config.local_ai.stt_provider = stt.to_string();
+        }
+        if let Some(tts) = p
+            .tts_provider
+            .as_deref()
+            .map(str::trim)
+            .filter(|s| !s.is_empty())
+        {
+            validate_tts_provider(tts)?;
+            config.local_ai.tts_provider = tts.to_string();
+        }
+        if let Some(model) = p
+            .stt_model
+            .as_deref()
+            .map(str::trim)
+            .filter(|s| !s.is_empty())
+        {
+            config.local_ai.stt_model_id = model.to_string();
+        }
+        if let Some(voice) = p
+            .tts_voice
+            .as_deref()
+            .map(str::trim)
+            .filter(|s| !s.is_empty())
+        {
+            config.local_ai.tts_voice_id = voice.to_string();
+        }
+
+        config.save().await.map_err(|e| e.to_string())?;
+        log::debug!(
+            "[voice-factory] persisted providers stt={} tts={} stt_model={} tts_voice={}",
+            config.local_ai.stt_provider,
+            config.local_ai.tts_provider,
+            config.local_ai.stt_model_id,
+            config.local_ai.tts_voice_id
+        );
+
+        Ok(serde_json::json!({
+            "stt_provider": config.local_ai.stt_provider,
+            "tts_provider": config.local_ai.tts_provider,
+            "stt_model_id": config.local_ai.stt_model_id,
+            "tts_voice_id": config.local_ai.tts_voice_id,
+        }))
+    })
+}
+
+fn validate_stt_provider(provider: &str) -> Result<(), String> {
+    match provider {
+        "cloud" | "whisper" => Ok(()),
+        other => Err(format!(
+            "invalid stt_provider '{other}' (valid: 'cloud', 'whisper')"
+        )),
+    }
+}
+
+fn validate_tts_provider(provider: &str) -> Result<(), String> {
+    match provider {
+        "cloud" | "piper" => Ok(()),
+        other => Err(format!(
+            "invalid tts_provider '{other}' (valid: 'cloud', 'piper')"
+        )),
+    }
+}
+
+/// Read the user-selected STT provider from config. Defaults to `"cloud"`
+/// for fresh installs — keeps the existing renderer behaviour unchanged
+/// until the user opts into the local stack.
+fn effective_stt_provider(config: &crate::openhuman::config::Config) -> String {
+    let raw = config.local_ai.stt_provider.trim();
+    if raw.is_empty() {
+        "cloud".to_string()
+    } else {
+        raw.to_string()
+    }
+}
+
+fn effective_tts_provider(config: &crate::openhuman::config::Config) -> String {
+    let raw = config.local_ai.tts_provider.trim();
+    if raw.is_empty() {
+        "cloud".to_string()
+    } else {
+        raw.to_string()
+    }
+}
+
 fn handle_voice_server_start(params: Map<String, Value>) -> ControllerFuture {
     Box::pin(async move {
         use crate::openhuman::voice::hotkey::ActivationMode;
diff --git a/src/openhuman/voice/schemas_tests.rs b/src/openhuman/voice/schemas_tests.rs
index f2a3e34d0d..cf7b4a871a 100644
--- a/src/openhuman/voice/schemas_tests.rs
+++ b/src/openhuman/voice/schemas_tests.rs
@@ -22,6 +22,39 @@ fn schema_names_are_stable() {
     let s = voice_schemas("overlay_stt_notify");
     assert_eq!(s.namespace, "voice");
     assert_eq!(s.function, "overlay_stt_notify");
+
+    let s = voice_schemas("voice_stt_dispatch");
+    assert_eq!(s.namespace, "voice");
+    assert_eq!(s.function, "stt_dispatch");
+
+    let s = voice_schemas("voice_tts_dispatch");
+    assert_eq!(s.namespace, "voice");
+    assert_eq!(s.function, "tts_dispatch");
+
+    let s = voice_schemas("voice_set_providers");
+    assert_eq!(s.namespace, "voice");
+    assert_eq!(s.function, "set_providers");
+}
+
+#[test]
+fn factory_dispatch_schemas_are_wired_into_registry() {
+    // Both dispatch endpoints + the persistence endpoint must be reachable
+    // through the registered_controllers list; without them the JSON-RPC
+    // router will reject the new method names with "unknown method".
+    let registry = all_voice_registered_controllers();
+    let functions: Vec<&'static str> = registry.iter().map(|c| c.schema.function).collect();
+    assert!(
+        functions.contains(&"stt_dispatch"),
+        "voice.stt_dispatch must be registered (got {functions:?})"
+    );
+    assert!(
+        functions.contains(&"tts_dispatch"),
+        "voice.tts_dispatch must be registered"
+    );
+    assert!(
+        functions.contains(&"set_providers"),
+        "voice.set_providers must be registered"
+    );
 }
 
 #[test]
diff --git a/src/openhuman/voice/types.rs b/src/openhuman/voice/types.rs
index 59760459dc..1103090633 100644
--- a/src/openhuman/voice/types.rs
+++ b/src/openhuman/voice/types.rs
@@ -37,6 +37,13 @@ pub struct VoiceStatus {
     pub whisper_in_process: bool,
     /// Whether LLM post-processing is enabled for transcription cleanup.
     pub llm_cleanup_enabled: bool,
+    /// Currently selected STT provider ("cloud" or "whisper"). Echoed so
+    /// the settings panel can render the picker without an extra RPC.
+    #[serde(default)]
+    pub stt_provider: String,
+    /// Currently selected TTS provider ("cloud" or "piper").
+    #[serde(default)]
+    pub tts_provider: String,
 }
 
 impl From<LocalAiSpeechResult> for VoiceSpeechResult {
@@ -99,6 +106,8 @@ mod tests {
             tts_voice_path: None,
             whisper_in_process: true,
             llm_cleanup_enabled: true,
+            stt_provider: "whisper".into(),
+            tts_provider: "cloud".into(),
         };
         let v = serde_json::to_value(&s).unwrap();
         assert_eq!(v["stt_available"], true);
@@ -106,6 +115,8 @@ mod tests {
         assert!(v["piper_binary"].is_null());
         assert_eq!(v["whisper_in_process"], true);
         assert_eq!(v["llm_cleanup_enabled"], true);
+        assert_eq!(v["stt_provider"], "whisper");
+        assert_eq!(v["tts_provider"], "cloud");
     }
 
     #[test]