diff --git a/packages/app/src/components/reader/FoliateViewer.tsx b/packages/app/src/components/reader/FoliateViewer.tsx index 0f5ddac7..a78641e9 100644 --- a/packages/app/src/components/reader/FoliateViewer.tsx +++ b/packages/app/src/components/reader/FoliateViewer.tsx @@ -12,6 +12,7 @@ import type { ChapterParagraph, ChapterTranslationResult, } from "@readany/core/translation/chapter-translator"; +import { splitTextIntoTTSSegmentRanges } from "@readany/core/reader"; import type { ViewSettings } from "@readany/core/types"; import { Overlayer } from "foliate-js/overlayer.js"; import { marked } from "marked"; @@ -255,6 +256,7 @@ export interface FoliateViewerHandle { /** Get visible text on the current page for TTS */ getVisibleText: () => string; getVisibleTTSSegments: (alignCfi?: string | null) => Promise; + getSelectionTTSSegments: (selection: BookSelection) => Promise; getTTSSegmentContext: ( cfi: string, before?: number, @@ -715,6 +717,159 @@ export const FoliateViewer = forwardRef [ensureDesktopTTS], ); + const getSelectionTTSSegments = useCallback( + async (selection: BookSelection): Promise => { + const view = viewRef.current; + const range = selection.range; + const fallbackText = normalizeTTSSegmentText(selection.text); + const fallbackCfi = selection.cfi || ""; + const fallback = () => + fallbackText && fallbackCfi ? [{ text: fallbackText, cfi: fallbackCfi }] : []; + if (!view || !range || range.collapsed) return fallback(); + + await ensureDesktopTTS(); + + const doc = + range.startContainer.nodeType === Node.DOCUMENT_NODE + ? (range.startContainer as Document) + : range.startContainer.ownerDocument; + if (!doc) return fallback(); + const contents = view.renderer?.getContents?.() ?? []; + const content = contents.find( + (item: { doc?: Document; index?: number }) => item.doc === doc, + ); + const sectionIndex = selection.chapterIndex ?? content?.index ?? 0; + const lang = + doc.documentElement.lang || + doc.documentElement.getAttribute("xml:lang") || + doc.body.lang || + navigator.language || + "en"; + const root = + range.commonAncestorContainer.nodeType === Node.TEXT_NODE + ? range.commonAncestorContainer.parentElement + : range.commonAncestorContainer; + if (!root) return fallback(); + + const positionedNodes: Array<{ + node: Text; + start: number; + end: number; + nodeStart: number; + }> = []; + let selectionText = ""; + const walker = doc.createTreeWalker(root, NodeFilter.SHOW_TEXT, { + acceptNode: (node) => { + if (!node.nodeValue?.trim()) return NodeFilter.FILTER_SKIP; + const parent = (node as Text).parentElement; + if (!parent) return NodeFilter.FILTER_ACCEPT; + const tag = parent.tagName.toLowerCase(); + if (tag === "script" || tag === "style") return NodeFilter.FILTER_REJECT; + if (parent.closest(".readany-translation")) return NodeFilter.FILTER_REJECT; + + const nodeRange = doc.createRange(); + try { + nodeRange.selectNodeContents(node); + if (range.compareBoundaryPoints(Range.END_TO_START, nodeRange) <= 0) { + return NodeFilter.FILTER_REJECT; + } + if (range.compareBoundaryPoints(Range.START_TO_END, nodeRange) >= 0) { + return NodeFilter.FILTER_REJECT; + } + return NodeFilter.FILTER_ACCEPT; + } catch { + return NodeFilter.FILTER_REJECT; + } finally { + nodeRange.detach?.(); + } + }, + }); + + for ( + let textNode = walker.nextNode() as Text | null; + textNode; + textNode = walker.nextNode() as Text | null + ) { + const sourceText = textNode.nodeValue || ""; + const startOffset = + textNode === range.startContainer + ? Math.max(0, Math.min(sourceText.length, range.startOffset)) + : 0; + const endOffset = + textNode === range.endContainer + ? Math.max(0, Math.min(sourceText.length, range.endOffset)) + : sourceText.length; + if (endOffset <= startOffset) continue; + + const text = sourceText.slice(startOffset, endOffset); + const start = selectionText.length; + selectionText += text; + positionedNodes.push({ + node: textNode, + start, + end: selectionText.length, + nodeStart: startOffset, + }); + } + + if (!selectionText.trim() || positionedNodes.length === 0) return fallback(); + + const resolvePosition = (absoluteOffset: number, isEnd: boolean) => { + for (const item of positionedNodes) { + if (absoluteOffset < item.end || (isEnd && absoluteOffset <= item.end)) { + return { + node: item.node, + offset: Math.max( + 0, + Math.min( + item.node.nodeValue?.length ?? 0, + item.nodeStart + absoluteOffset - item.start, + ), + ), + }; + } + } + const last = positionedNodes[positionedNodes.length - 1]; + return { node: last.node, offset: last.node.nodeValue?.length ?? 0 }; + }; + + const seen = new Set(); + const segments: TTSSegmentDetail[] = []; + for (const segment of splitTextIntoTTSSegmentRanges(selectionText, lang)) { + const startPos = resolvePosition(segment.start, false); + const endPos = resolvePosition(segment.end, true); + if (!startPos || !endPos) continue; + + const segmentRange = doc.createRange(); + try { + segmentRange.setStart(startPos.node, startPos.offset); + segmentRange.setEnd(endPos.node, endPos.offset); + const cfi = view.getCFI(sectionIndex, segmentRange); + const identity = getTTSSegmentIdentity(cfi, segment.text); + if (cfi && !seen.has(identity)) { + seen.add(identity); + segments.push({ text: segment.text, cfi }); + } + } catch { + // skip segment if CFI resolution fails + } finally { + segmentRange.detach?.(); + } + } + + if (segments.length > 0) { + console.log("[FoliateViewer][TTS] selectionTTSSegments", { + count: segments.length, + firstText: segments[0]?.text || null, + }); + return segments; + } + + return fallback(); + }, + [ensureDesktopTTS], + ); + const getTTSSegmentContext = useCallback( async ( cfi: string, @@ -979,6 +1134,7 @@ export const FoliateViewer = forwardRef } }, getVisibleTTSSegments, + getSelectionTTSSegments, getTTSSegmentContext, setTTSHighlight: async (cfi: string | null, color?: string) => { ttsHighlightStateRef.current = { @@ -1198,7 +1354,7 @@ export const FoliateViewer = forwardRef : undefined, })); }, - [clearTTSHighlight, ensureDesktopTTS, getVisibleTTSSegments], + [clearTTSHighlight, ensureDesktopTTS, getSelectionTTSSegments, getVisibleTTSSegments], ); // --- Section load handler --- @@ -1760,12 +1916,23 @@ export const FoliateViewer = forwardRef if (!view) return null; const contents = view.renderer?.getContents?.(); - if (!contents?.[0]?.doc) return null; - - const doc = contents[0].doc as Document; - const sel = doc.getSelection(); - const range = getSelectionRange(sel); - if (!range) return null; + if (!contents?.length) return null; + + let doc: Document | null = null; + let sel: Selection | null = null; + let range: Range | null = null; + for (const content of contents) { + const contentDoc = content?.doc as Document | undefined; + const contentSelection = contentDoc?.getSelection(); + const contentRange = getSelectionRange(contentSelection); + if (contentDoc && contentSelection && contentRange) { + doc = contentDoc; + sel = contentSelection; + range = contentRange; + break; + } + } + if (!doc || !sel || !range) return null; const text = (sel?.toString() || "").trim(); if (!text) return null; @@ -1773,7 +1940,10 @@ export const FoliateViewer = forwardRef let cfi: string | undefined; let chapterIndex: number | undefined; try { - const index = contents[0].index; + const rangeDoc = range.startContainer.ownerDocument; + const content = + contents.find((item: { doc?: Document }) => item.doc === rangeDoc) ?? contents[0]; + const index = content.index; if (index !== undefined) { cfi = view.getCFI(index, range); chapterIndex = index; diff --git a/packages/app/src/components/reader/ReaderView.tsx b/packages/app/src/components/reader/ReaderView.tsx index b384d17e..be871e09 100644 --- a/packages/app/src/components/reader/ReaderView.tsx +++ b/packages/app/src/components/reader/ReaderView.tsx @@ -1243,7 +1243,7 @@ export function ReaderView({ bookId, tabId }: ReaderViewProps) { (sel: BookSelection | null) => { setSelection(sel); if (sel) { - setSelectedText(tabId, sel.text, null); + setSelectedText(tabId, sel.text, sel.cfi ?? null); if (sel.rects.length > 0) { // SelectionPopover uses absolute positioning relative to containerRef const containerRect = containerRef.current?.getBoundingClientRect(); @@ -1561,20 +1561,27 @@ export function ReaderView({ bookId, tabId }: ReaderViewProps) { void foliateRef.current?.setTTSHighlight(null); return; } - if (ttsSourceKind !== "page") { + if (ttsSourceKind !== "page" && ttsSourceKind !== "selection") { void foliateRef.current?.setTTSHighlight(null); return; } void foliateRef.current?.setTTSHighlight( - currentTTSSegment?.cfi || null, + currentTTSSegment?.cfi || (ttsSourceKind === "selection" ? ttsCurrentLocationCfi : null), "rgba(96, 165, 250, 0.35)", ); - }, [bookId, currentTTSSegment?.cfi, ttsCurrentBookId, ttsPlayState, ttsSourceKind]); + }, [ + bookId, + currentTTSSegment?.cfi, + ttsCurrentBookId, + ttsCurrentLocationCfi, + ttsPlayState, + ttsSourceKind, + ]); useEffect(() => { if (ttsCurrentBookId !== bookId) return; const targetCfi = - ttsSourceKind === "page" && + (ttsSourceKind === "page" || ttsSourceKind === "selection") && (ttsPlayState === "playing" || ttsPlayState === "paused" || ttsPlayState === "loading") ? currentTTSSegment?.cfi || ttsCurrentLocationCfi || null : readerTab?.currentCfi; @@ -1862,27 +1869,47 @@ export function ReaderView({ bookId, tabId }: ReaderViewProps) { ]); const startSelectionTTS = useCallback( - (text: string) => { + async (text: string, selectionForCfi?: BookSelection | null) => { const normalized = text.trim(); if (!normalized) return; - const segments = splitNarrationText(normalized) - .filter(Boolean) - .map((segmentText) => ({ text: segmentText, cfi: null })); + const selectionSegments = + selectionForCfi && foliateRef.current + ? await foliateRef.current.getSelectionTTSSegments(selectionForCfi) + : []; + const fallbackCfi = + selectionForCfi?.cfi || + readerTab?.selectionCfi || + ttsCurrentLocationCfi || + readerTab?.currentCfi || + null; + const segments = selectionSegments.length + ? selectionSegments.map((segment) => ({ + text: segment.text.trim(), + cfi: segment.cfi || fallbackCfi, + })) + : splitNarrationText(normalized) + .filter(Boolean) + .map((segmentText) => ({ text: segmentText, cfi: fallbackCfi })); + const playableSegments = segments.filter((segment) => segment.text.length > 0); setTtsSourceKind("selection"); setTtsContinuousEnabled(false); setTtsLastText(normalized); - setTtsSegments(segments); + setTtsSegments(playableSegments); setTtsPrevPageSegments([]); setTtsFutureSegments([]); ttsLastTextRef.current = normalized; - ttsSegmentsRef.current = segments; + ttsSegmentsRef.current = playableSegments; ttsFutureSegmentsRef.current = []; ttsContinuousRef.current = false; ttsSetOnEnd(null); ttsSetCurrentBook(book?.meta.title ?? "", readerTab?.chapterTitle ?? "", bookId); - ttsSetCurrentLocation(readerTab?.selectionCfi || readerTab?.currentCfi || ""); + ttsSetCurrentLocation(playableSegments[0]?.cfi || fallbackCfi || ""); setShowTTS(true); - ttsPlay(segments.length > 0 ? segments.map((segment) => segment.text) : normalized); + ttsPlay( + playableSegments.length > 0 + ? playableSegments.map((segment) => segment.text) + : normalized, + ); }, [ ttsPlay, @@ -1893,6 +1920,7 @@ export function ReaderView({ bookId, tabId }: ReaderViewProps) { readerTab?.chapterTitle, readerTab?.selectionCfi, readerTab?.currentCfi, + ttsCurrentLocationCfi, bookId, ], ); @@ -2049,17 +2077,31 @@ export function ReaderView({ bookId, tabId }: ReaderViewProps) { // TTS: speak selected text (no auto page-turn) const handleSpeakSelection = useCallback(() => { - if (selection?.text) { - startSelectionTTS(selection.text); + const currentSelection = selection; + if (currentSelection?.text) { + void startSelectionTTS(currentSelection.text, currentSelection); } setSelection(null); }, [selection, startSelectionTTS]); const handleTTSReplay = useCallback(async () => { if (ttsSourceKind === "selection") { - const text = (ttsCurrentText || ttsLastText).trim(); + const replaySegments = ttsSegmentsRef.current.filter((segment) => segment.text.trim()); + if (replaySegments.length > 0) { + const text = replaySegments + .map((segment) => segment.text) + .join(" ") + .trim(); + setTtsSegments(replaySegments); + setTtsLastText(text); + ttsLastTextRef.current = text; + ttsSetCurrentLocation(replaySegments[0]?.cfi || ttsCurrentLocationCfi || ""); + ttsPlay(replaySegments.map((segment) => segment.text)); + return; + } + const text = ttsLastText.trim(); if (text) { - startSelectionTTS(text); + await startSelectionTTS(text); } return; } @@ -2069,8 +2111,10 @@ export function ReaderView({ bookId, tabId }: ReaderViewProps) { startPageTTS, startSelectionTTS, ttsContinuousEnabled, - ttsCurrentText, + ttsCurrentLocationCfi, ttsLastText, + ttsPlay, + ttsSetCurrentLocation, ttsSourceKind, ]); @@ -2096,9 +2140,22 @@ export function ReaderView({ bookId, tabId }: ReaderViewProps) { } if (ttsSourceKind === "selection") { - const text = (ttsCurrentText || ttsLastText).trim(); + const replaySegments = ttsSegmentsRef.current.filter((segment) => segment.text.trim()); + if (replaySegments.length > 0) { + const text = replaySegments + .map((segment) => segment.text) + .join(" ") + .trim(); + setTtsSegments(replaySegments); + setTtsLastText(text); + ttsLastTextRef.current = text; + ttsSetCurrentLocation(replaySegments[0]?.cfi || ttsCurrentLocationCfi || ""); + ttsPlay(replaySegments.map((segment) => segment.text)); + return; + } + const text = ttsLastText.trim(); if (text) { - startSelectionTTS(text); + await startSelectionTTS(text); } return; } @@ -2108,11 +2165,13 @@ export function ReaderView({ bookId, tabId }: ReaderViewProps) { startPageTTS, startSelectionTTS, ttsContinuousEnabled, - ttsCurrentText, + ttsCurrentLocationCfi, ttsLastText, ttsPause, + ttsPlay, ttsPlayState, ttsResume, + ttsSetCurrentLocation, ttsSourceKind, ]); diff --git a/packages/core/src/reader/index.ts b/packages/core/src/reader/index.ts index 6c7bd09f..bec746bc 100644 --- a/packages/core/src/reader/index.ts +++ b/packages/core/src/reader/index.ts @@ -25,3 +25,7 @@ export type { SessionEvent, SessionDetector } from "./session-detector"; // Annotation mutations export { createSelectionNoteMutation } from "./selection-note"; export type { SelectionNoteMutation, SelectionNoteMutationInput } from "./selection-note"; + +// TTS +export { splitTextIntoTTSSegmentRanges } from "./tts-segments"; +export type { TTSTextSegmentRange } from "./tts-segments"; diff --git a/packages/core/src/reader/tts-segments.test.ts b/packages/core/src/reader/tts-segments.test.ts new file mode 100644 index 00000000..f007b544 --- /dev/null +++ b/packages/core/src/reader/tts-segments.test.ts @@ -0,0 +1,24 @@ +import { describe, expect, it } from "vitest"; + +import { splitTextIntoTTSSegmentRanges } from "./tts-segments"; + +describe("splitTextIntoTTSSegmentRanges", () => { + it("keeps source offsets while trimming sentence whitespace", () => { + const segments = splitTextIntoTTSSegmentRanges(" First sentence. Second sentence! ", "en"); + + expect(segments).toEqual([ + { text: "First sentence.", start: 2, end: 17 }, + { text: "Second sentence!", start: 19, end: 35 }, + ]); + }); + + it("supports Chinese punctuation", () => { + const segments = splitTextIntoTTSSegmentRanges("第一句。第二句!第三句?", "zh-CN"); + + expect(segments).toEqual([ + { text: "第一句。", start: 0, end: 4 }, + { text: "第二句!", start: 4, end: 8 }, + { text: "第三句?", start: 8, end: 12 }, + ]); + }); +}); diff --git a/packages/core/src/reader/tts-segments.ts b/packages/core/src/reader/tts-segments.ts new file mode 100644 index 00000000..3cd807de --- /dev/null +++ b/packages/core/src/reader/tts-segments.ts @@ -0,0 +1,55 @@ +export interface TTSTextSegmentRange { + text: string; + start: number; + end: number; +} + +type SentenceSegmenter = new ( + locales?: string | string[], + options?: { granularity?: "grapheme" | "word" | "sentence" }, +) => { + segment(input: string): Iterable<{ index: number; segment: string }>; +}; + +const FALLBACK_SENTENCE_RE = + /[^\r\n.!?;:\u3002\uff01\uff1f\uff1b\uff1a]+[.!?;:\u3002\uff01\uff1f\uff1b\uff1a\u2026]*/gu; + +export function splitTextIntoTTSSegmentRanges( + text: string, + locale?: string | string[], +): TTSTextSegmentRange[] { + if (!text.trim()) return []; + + const SegmenterCtor = ( + Intl as typeof Intl & { + Segmenter?: SentenceSegmenter; + } + ).Segmenter; + + const rawRanges = SegmenterCtor + ? Array.from(new SegmenterCtor(locale, { granularity: "sentence" }).segment(text)).map( + (item) => ({ + start: item.index, + end: item.index + item.segment.length, + }), + ) + : Array.from(text.matchAll(FALLBACK_SENTENCE_RE)).map((match) => ({ + start: match.index ?? 0, + end: (match.index ?? 0) + match[0].length, + })); + + const ranges = rawRanges.length ? rawRanges : [{ start: 0, end: text.length }]; + const result: TTSTextSegmentRange[] = []; + + for (const range of ranges) { + let start = range.start; + let end = range.end; + while (start < end && /\s/u.test(text[start] ?? "")) start++; + while (end > start && /\s/u.test(text[end - 1] ?? "")) end--; + const segmentText = text.slice(start, end).replace(/\s+/g, " ").trim(); + if (segmentText.length < 1) continue; + result.push({ text: segmentText, start, end }); + } + + return result; +}