diff --git a/.env.evo-x2.example b/.env.evo-x2.example index 8a0942a..80aaf92 100644 --- a/.env.evo-x2.example +++ b/.env.evo-x2.example @@ -15,6 +15,7 @@ STYLE_LLM_MODEL=gemma4:e2b BRIEF_LLM_MODEL=qwen3.6:27b ARTICLE_LLM_MODEL=gemma4:e2b DRAFT_LLM_MODEL=gemma4:31b +VERIFY_LLM_MODEL=gemma4:latest # Ordered fallback chain: # 1. Evo X2 llama.cpp over Tailnet/Caddy. @@ -24,3 +25,4 @@ STYLE_LLM_FALLBACK_MODELS=gemma-4-E2B-it-Q8_0.gguf,gemma4:e2b BRIEF_LLM_FALLBACK_MODELS=gemma-4-E2B-it-Q8_0.gguf,qwen3:30b-a3b ARTICLE_LLM_FALLBACK_MODELS=gemma-4-E2B-it-Q8_0.gguf,gemma4:e2b DRAFT_LLM_FALLBACK_MODELS=gemma-4-E2B-it-Q8_0.gguf,qwen3:30b-a3b +VERIFY_LLM_FALLBACK_MODELS=gemma-4-E2B-it-Q8_0.gguf,gemma4:e2b diff --git a/.env.example b/.env.example index 3a7d37e..26f236c 100644 --- a/.env.example +++ b/.env.example @@ -9,6 +9,7 @@ LLM_TIMEOUT_SECONDS=180 # BRIEF_LLM_MODEL=qwen3.6:27b # ARTICLE_LLM_MODEL=gemma4:e2b # DRAFT_LLM_MODEL=gemma4:31b +# VERIFY_LLM_MODEL=gemma4:latest # Ordered fallback chain. Use comma-separated OpenAI-compatible base URLs. # LLM_FALLBACK_BASE_URLS=http://remote-llama/v1,http://127.0.0.1:8081/v1 # Optional comma-separated per-phase fallback model names: @@ -16,6 +17,7 @@ LLM_TIMEOUT_SECONDS=180 # BRIEF_LLM_FALLBACK_MODELS=gemma-4-E2B-it-Q8_0.gguf,qwen3:30b-a3b # ARTICLE_LLM_FALLBACK_MODELS=gemma-4-E2B-it-Q8_0.gguf,gemma4:e2b # DRAFT_LLM_FALLBACK_MODELS=gemma-4-E2B-it-Q8_0.gguf,qwen3:30b-a3b +# VERIFY_LLM_FALLBACK_MODELS=gemma-4-E2B-it-Q8_0.gguf,gemma4:e2b # Legacy single fallback remains supported: # FALLBACK_LLM_BASE_URL=http://127.0.0.1:8081/v1 LLAMACPP_HOST=127.0.0.1 diff --git a/Makefile b/Makefile index c0e2eb2..d088c79 100644 --- a/Makefile +++ b/Makefile @@ -21,6 +21,7 @@ EVO_X2_BRIEF_LLM_MODEL ?= qwen3.6:27b EVO_X2_STYLE_LLM_MODEL ?= gemma4:e2b EVO_X2_ARTICLE_LLM_MODEL ?= gemma4:e2b EVO_X2_DRAFT_LLM_MODEL ?= gemma4:31b +EVO_X2_VERIFY_LLM_MODEL ?= gemma4:latest FALLBACK_LLM_BASE_URL ?= http://127.0.0.1:8081/v1 LLM_FALLBACK_BASE_URLS ?= $(EVO_X2_LLAMA_CPP_LLM_BASE_URL),$(FALLBACK_LLM_BASE_URL) EVO_X2_LLAMA_CPP_MODEL ?= gemma-4-E2B-it-Q8_0.gguf @@ -28,6 +29,7 @@ STYLE_LLM_FALLBACK_MODELS ?= $(EVO_X2_LLAMA_CPP_MODEL),gemma4:e2b BRIEF_LLM_FALLBACK_MODELS ?= $(EVO_X2_LLAMA_CPP_MODEL),qwen3:30b-a3b ARTICLE_LLM_FALLBACK_MODELS ?= $(EVO_X2_LLAMA_CPP_MODEL),gemma4:e2b DRAFT_LLM_FALLBACK_MODELS ?= $(EVO_X2_LLAMA_CPP_MODEL),qwen3:30b-a3b +VERIFY_LLM_FALLBACK_MODELS ?= $(EVO_X2_LLAMA_CPP_MODEL),gemma4:e2b LLAMACPP_HOST ?= 127.0.0.1 LLAMACPP_PORT ?= 8081 LLAMACPP_BASE_URL ?= $(LLM_BASE_URL) @@ -46,7 +48,7 @@ dev: evo-x2: remote remote: evo-x2-preflight - NOTE_MAKER_SKIP_ENV=1 LLM_RUNTIME=remote LLM_BASE_URL="$(EVO_X2_LLM_BASE_URL)" LLM_MODEL="$(EVO_X2_LLM_MODEL)" STYLE_LLM_MODEL="$(EVO_X2_STYLE_LLM_MODEL)" BRIEF_LLM_MODEL="$(EVO_X2_BRIEF_LLM_MODEL)" ARTICLE_LLM_MODEL="$(EVO_X2_ARTICLE_LLM_MODEL)" DRAFT_LLM_MODEL="$(EVO_X2_DRAFT_LLM_MODEL)" LLM_FALLBACK_BASE_URLS="$(LLM_FALLBACK_BASE_URLS)" STYLE_LLM_FALLBACK_MODELS="$(STYLE_LLM_FALLBACK_MODELS)" BRIEF_LLM_FALLBACK_MODELS="$(BRIEF_LLM_FALLBACK_MODELS)" ARTICLE_LLM_FALLBACK_MODELS="$(ARTICLE_LLM_FALLBACK_MODELS)" DRAFT_LLM_FALLBACK_MODELS="$(DRAFT_LLM_FALLBACK_MODELS)" ./scripts/dev.sh + NOTE_MAKER_SKIP_ENV=1 LLM_RUNTIME=remote LLM_BASE_URL="$(EVO_X2_LLM_BASE_URL)" LLM_MODEL="$(EVO_X2_LLM_MODEL)" STYLE_LLM_MODEL="$(EVO_X2_STYLE_LLM_MODEL)" BRIEF_LLM_MODEL="$(EVO_X2_BRIEF_LLM_MODEL)" ARTICLE_LLM_MODEL="$(EVO_X2_ARTICLE_LLM_MODEL)" DRAFT_LLM_MODEL="$(EVO_X2_DRAFT_LLM_MODEL)" VERIFY_LLM_MODEL="$(EVO_X2_VERIFY_LLM_MODEL)" LLM_FALLBACK_BASE_URLS="$(LLM_FALLBACK_BASE_URLS)" STYLE_LLM_FALLBACK_MODELS="$(STYLE_LLM_FALLBACK_MODELS)" BRIEF_LLM_FALLBACK_MODELS="$(BRIEF_LLM_FALLBACK_MODELS)" ARTICLE_LLM_FALLBACK_MODELS="$(ARTICLE_LLM_FALLBACK_MODELS)" DRAFT_LLM_FALLBACK_MODELS="$(DRAFT_LLM_FALLBACK_MODELS)" VERIFY_LLM_FALLBACK_MODELS="$(VERIFY_LLM_FALLBACK_MODELS)" ./scripts/dev.sh evo-x2-preflight: EVO_X2_TAILNET_HOST="$(EVO_X2_TAILNET_HOST)" EVO_X2_LLM_BASE_URL="$(EVO_X2_LLM_BASE_URL)" ./scripts/evo-x2-tailnet-preflight.sh @@ -59,7 +61,7 @@ evo-x2-ssh-models: curl -s "$(EVO_X2_SSH_LLM_BASE_URL)/models" scenario-evo-x2: evo-x2-preflight - RUN_NOTE_SCENARIO=1 RUN_LOCAL_LLM_SCENARIO=1 SCENARIO_STREAM_DRAFT=1 LLM_BASE_URL="$(EVO_X2_LLM_BASE_URL)" LLM_MODEL="$(EVO_X2_LLM_MODEL)" STYLE_LLM_MODEL="$(EVO_X2_STYLE_LLM_MODEL)" BRIEF_LLM_MODEL="$(EVO_X2_BRIEF_LLM_MODEL)" ARTICLE_LLM_MODEL="$(EVO_X2_ARTICLE_LLM_MODEL)" DRAFT_LLM_MODEL="$(EVO_X2_DRAFT_LLM_MODEL)" LLM_TIMEOUT_SECONDS=900 LLM_FALLBACK_BASE_URLS="$(LLM_FALLBACK_BASE_URLS)" STYLE_LLM_FALLBACK_MODELS="$(STYLE_LLM_FALLBACK_MODELS)" BRIEF_LLM_FALLBACK_MODELS="$(BRIEF_LLM_FALLBACK_MODELS)" ARTICLE_LLM_FALLBACK_MODELS="$(ARTICLE_LLM_FALLBACK_MODELS)" DRAFT_LLM_FALLBACK_MODELS="$(DRAFT_LLM_FALLBACK_MODELS)" SCENARIO_MIN_STYLE_SCORE=80 SCENARIO_MIN_DRAFT_RUNES=2800 DRAFT_MAX_ATTEMPTS=2 go run ./cmd/scenario/full_workflow + RUN_NOTE_SCENARIO=1 RUN_LOCAL_LLM_SCENARIO=1 SCENARIO_STREAM_DRAFT=1 LLM_BASE_URL="$(EVO_X2_LLM_BASE_URL)" LLM_MODEL="$(EVO_X2_LLM_MODEL)" STYLE_LLM_MODEL="$(EVO_X2_STYLE_LLM_MODEL)" BRIEF_LLM_MODEL="$(EVO_X2_BRIEF_LLM_MODEL)" ARTICLE_LLM_MODEL="$(EVO_X2_ARTICLE_LLM_MODEL)" DRAFT_LLM_MODEL="$(EVO_X2_DRAFT_LLM_MODEL)" VERIFY_LLM_MODEL="$(EVO_X2_VERIFY_LLM_MODEL)" LLM_TIMEOUT_SECONDS=900 LLM_FALLBACK_BASE_URLS="$(LLM_FALLBACK_BASE_URLS)" STYLE_LLM_FALLBACK_MODELS="$(STYLE_LLM_FALLBACK_MODELS)" BRIEF_LLM_FALLBACK_MODELS="$(BRIEF_LLM_FALLBACK_MODELS)" ARTICLE_LLM_FALLBACK_MODELS="$(ARTICLE_LLM_FALLBACK_MODELS)" DRAFT_LLM_FALLBACK_MODELS="$(DRAFT_LLM_FALLBACK_MODELS)" VERIFY_LLM_FALLBACK_MODELS="$(VERIFY_LLM_FALLBACK_MODELS)" SCENARIO_MIN_STYLE_SCORE=80 SCENARIO_MIN_DRAFT_RUNES=2800 DRAFT_MAX_ATTEMPTS=2 go run ./cmd/scenario/full_workflow server: go run ./cmd/server diff --git a/README.md b/README.md index 18b010d..0e7e4f0 100644 --- a/README.md +++ b/README.md @@ -81,7 +81,7 @@ mise trust mise run evo-x2 ``` -既定では Tailnet 上の `http://evo-x2.tailb30e58.ts.net/v1` に接続します。モデルを変える場合は `.env.evo-x2.example` を参考に `LLM_MODEL`、`STYLE_LLM_MODEL`、`BRIEF_LLM_MODEL`、`ARTICLE_LLM_MODEL`、`DRAFT_LLM_MODEL` を設定してください。120B級のモデルを使う場合は `LLM_TIMEOUT_SECONDS` を長めに設定します。 +既定では Tailnet 上の `http://evo-x2.tailb30e58.ts.net/v1` に接続します。モデルを変える場合は `.env.evo-x2.example` を参考に `LLM_MODEL`、`STYLE_LLM_MODEL`、`BRIEF_LLM_MODEL`、`ARTICLE_LLM_MODEL`、`DRAFT_LLM_MODEL`、`VERIFY_LLM_MODEL` を設定してください。120B級のモデルを使う場合は `LLM_TIMEOUT_SECONDS` を長めに設定します。 画面上部の「設定」から、フェーズ別に使うモデルと一問一答の質問を変更できます。質問は初期テンプレートを編集でき、追加質問も下書き生成のブリーフに含まれます。 @@ -93,6 +93,7 @@ mise run evo-x2 - `BRIEF_LLM_MODEL`: 深掘り質問生成用。既定は推論力重視の `qwen3.6:27b`。 - `ARTICLE_LLM_MODEL`: 旧 `/api/generate` 用。既定は軽量な `gemma4:e2b`。 - `DRAFT_LLM_MODEL`: 一問一答後の最終下書き生成用。既定は日本語下書き品質重視の `gemma4:31b`。 +- `VERIFY_LLM_MODEL`: 下書き後の最終一貫性チェック用。既定は軽量な `gemma4:latest`。 - `EVO_X2_TAILNET_HOST`: Tailnet/MagicDNS 上の Evo X2 ホスト名です。既定値は `evo-x2.tailb30e58.ts.net`。 - `EVO_X2_LLM_BASE_URL`: Evo X2 Ollama primary の OpenAI互換APIです。既定値は `http://evo-x2.tailb30e58.ts.net/v1`。 - `LLM_FALLBACK_BASE_URLS`: カンマ区切りの fallback chain です。既定は Evo X2 llama.cpp、最後にローカル llama.cpp。 diff --git a/cmd/scenario/draft_generation/main.go b/cmd/scenario/draft_generation/main.go index 2baf797..3cb51b3 100644 --- a/cmd/scenario/draft_generation/main.go +++ b/cmd/scenario/draft_generation/main.go @@ -41,6 +41,7 @@ func main() { baseURL := envFirst("http://127.0.0.1:8081/v1", "LLM_BASE_URL", "LLAMACPP_BASE_URL") model := envFirst("gemma4:31b", "DRAFT_LLM_MODEL", "LLM_MODEL", "LLAMACPP_MODEL") + verifyModel := envFirst("gemma4:latest", "VERIFY_LLM_MODEL", "LLM_MODEL", "LLAMACPP_MODEL") minStyleScore := envFloat("SCENARIO_MIN_STYLE_SCORE", 80) minDraftRunes := envInt("SCENARIO_MIN_DRAFT_RUNES", 2400) maxAttempts := envInt("DRAFT_MAX_ATTEMPTS", 2) @@ -49,7 +50,11 @@ func main() { if err != nil { fatalf("create local llm client: %v", err) } - service := draftapp.NewService(client) + verifyClient, err := llamacpp.NewClientFromEnvForPurpose("VERIFY") + if err != nil { + fatalf("create verification llm client: %v", err) + } + service := draftapp.NewServiceWithVerifier(client, draftapp.NewLightweightVerifier(verifyClient)) var result draftapp.GenerateResult var finalElapsed time.Duration @@ -88,6 +93,7 @@ func main() { finalChunks = chunkCount writeFile(filepath.Join(outputDir, fmt.Sprintf("draft_attempt_%d.md", attempt)), result.Draft.Markdown()+"\n") writeJSON(filepath.Join(outputDir, fmt.Sprintf("evaluation_attempt_%d.json", attempt)), result.Evaluation) + writeJSON(filepath.Join(outputDir, fmt.Sprintf("verification_attempt_%d.json", attempt)), result.Verification) if result.Evaluation.Comparison.Score >= minStyleScore && len([]rune(result.Draft.Markdown())) >= minDraftRunes { break } @@ -95,6 +101,7 @@ func main() { writeFile(filepath.Join(outputDir, "draft.md"), result.Draft.Markdown()+"\n") writeJSON(filepath.Join(outputDir, "evaluation.json"), result.Evaluation) + writeJSON(filepath.Join(outputDir, "verification.json"), result.Verification) if result.Evaluation.Comparison.Score < minStyleScore { fatalf("style score %.1f below scenario minimum %.1f", result.Evaluation.Comparison.Score, minStyleScore) } @@ -106,6 +113,9 @@ func main() { fmt.Printf("passed=%v\n", result.Evaluation.Passed) fmt.Printf("score=%.1f\n", result.Evaluation.Comparison.Score) fmt.Printf("runes=%d\n", len([]rune(result.Draft.Markdown()))) + fmt.Printf("verification_performed=%v\n", result.Verification.Performed) + fmt.Printf("verification_passed=%v\n", result.Verification.Passed) + fmt.Printf("verification_summary=%s\n", result.Verification.Summary) fmt.Printf("elapsed_seconds=%.2f\n", finalElapsed.Seconds()) fmt.Printf("streaming=%v\n", streamDraft) if streamDraft { @@ -114,8 +124,10 @@ func main() { } fmt.Printf("llm_base_url=%s\n", baseURL) fmt.Printf("llm_model=%s\n", model) + fmt.Printf("verify_model=%s\n", verifyModel) fmt.Printf("draft=%s\n", filepath.Join(outputDir, "draft.md")) fmt.Printf("evaluation=%s\n", filepath.Join(outputDir, "evaluation.json")) + fmt.Printf("verification=%s\n", filepath.Join(outputDir, "verification.json")) } func readJSON(path string, out any) { diff --git a/docs/adrs/0001-three-phase-local-article-generation.md b/docs/adrs/0001-three-phase-local-article-generation.md index 3fff1db..9c3a59b 100644 --- a/docs/adrs/0001-three-phase-local-article-generation.md +++ b/docs/adrs/0001-three-phase-local-article-generation.md @@ -44,6 +44,7 @@ Note Maker will move from a single `POST /api/generate` flow to a three-phase wo - Generate a draft from `WritingStyleGuide + ArticleBrief`. - Do not fetch Note articles during draft generation. - Validate the draft as paste-ready Markdown and compare it against the author style profile. + - Run a final lightweight-model consistency check against the draft, brief, style guide, and target output format before returning the result. These phases are orchestrated by application services, not autonomous background agents. The word "agent" may be used in product language, but the implementation should use deterministic workflow boundaries first. @@ -83,7 +84,7 @@ Planned services: - `GenerateDraftService` - input: `WritingStyleGuide`, `ArticleBrief`. - - output: validated `Draft`, comparison report. + - output: validated `Draft`, comparison report, and lightweight final verification report. - `ArticleWorkflowService` - optional facade for UI/API flows that need to coordinate the three services. diff --git a/docs/adrs/0002-multi-persona-multi-format-extension.md b/docs/adrs/0002-multi-persona-multi-format-extension.md index b9f7fbd..4a035df 100644 --- a/docs/adrs/0002-multi-persona-multi-format-extension.md +++ b/docs/adrs/0002-multi-persona-multi-format-extension.md @@ -141,7 +141,7 @@ New domain types under `internal/domain`: - `AnalyzeAuthorStyleService` accepts a `persona_id` and persists the resulting guide as a new version under that persona; previous versions are preserved. - `InterviewService` consults the active persona and format to assemble the question list before the first question. -- `GenerateDraftService` resolves the prompt template fragment from the format's strategy, injects the active format's embedded Markdown guide, and merges persona-specific tone hints. +- `GenerateDraftService` resolves the prompt template fragment from the format's strategy, injects the active format's embedded Markdown guide, merges persona-specific tone hints, and runs a lightweight final verification step after the 31B draft is validated. - New `RegenerateSectionService` accepts a draft id, a section selector (heading anchor or character range), the brief, and the persona+format; returns a candidate replacement for human review. - New `StreamingDraftService` produces SSE chunks for the draft phase. @@ -202,6 +202,7 @@ Current implementation status as of 2026-05-02: - Phase B1 is complete ahead of the original order: `Persona` and `OutputFormat` concepts, prompt dispatch, and format validators are in place ([#21](https://github.com/terisuke/note_maker/issues/21)). The remaining Phase B work stays deferred until after Phase A/C foundations. - Evo X2 Ollama is the primary heavy-inference runtime through the Tailnet OpenAI-compatible API (`http://evo-x2.tailb30e58.ts.net/v1`). The runtime fallback chain is Evo X2 Ollama → Evo X2 llama.cpp (`/llama/v1`) → workstation-local llama.cpp. SSH tunnel access is an explicit developer diagnostic only. - Phase model defaults are intentionally split: lightweight `gemma4:e2b` for source/style summarization, `qwen3.6:27b` for deeper interview questions, and `gemma4:31b` for final Japanese draft generation. This is an operational default, not a hard domain rule; users can override it per phase. +- Final verification uses lightweight Gemma by default (`gemma4:latest`, currently the Evo X2 E4B-class Ollama model) to check brief coverage, style consistency, output-format notation, and unsupported factual assertions before the UI presents the final draft ([#47](https://github.com/terisuke/note_maker/issues/47)). - Runtime validation showed that Tailnet inference can take 20+ minutes and still miss quality gates because of generation variance. Therefore, Phase A started with streaming and cancellation ([#18](https://github.com/terisuke/note_maker/issues/18)) before the broader transcript rewrite ([#17](https://github.com/terisuke/note_maker/issues/17)). Primary-runtime quality stabilization is tracked separately in [#40](https://github.com/terisuke/note_maker/issues/40). - Phase A2 is implemented in code: `llamacpp.Client.GenerateStream`, streaming follow-up/draft service paths, `Accept: text/event-stream` handlers, browser Cancel controls, heartbeat events, and final runtime metrics. It still requires real Tailnet Evo X2 validation before closing the issue. diff --git a/docs/implementation-plans/issue-adr-guardrails.md b/docs/implementation-plans/issue-adr-guardrails.md index 2cdc570..6bf9d39 100644 --- a/docs/implementation-plans/issue-adr-guardrails.md +++ b/docs/implementation-plans/issue-adr-guardrails.md @@ -63,6 +63,7 @@ The phases in [ADR 0002](../adrs/0002-multi-persona-multi-format-extension.md) ( - SSH tunnels are allowed only as explicit developer diagnostics, not as the product default, because they depend on per-device SSH setup. - Local llama.cpp (`http://127.0.0.1:8081/v1`) is fallback only. Do not set `LLM_BASE_URL` to local Ollama or local llama.cpp for Evo X2 validation unless the test is explicitly measuring fallback behavior. - Runtime validation must report base URL, model, elapsed time, score, and draft length. + - Draft generation must run the lightweight final verification step before returning the final result; if verification reports NEEDS_REVIEW, surface the report instead of hiding it. - If fallback validation fails the strict draft thresholds, keep Evo X2 primary enabled and track fallback hardening separately (Issue [#36](https://github.com/terisuke/note_maker/issues/36)). - If Tailnet Evo X2 reaches the API but misses quality gates, track it under Issue [#40](https://github.com/terisuke/note_maker/issues/40), not as a transport regression. diff --git a/docs/implementation-plans/multi-persona-multi-format.md b/docs/implementation-plans/multi-persona-multi-format.md index 9b89365..8959fca 100644 --- a/docs/implementation-plans/multi-persona-multi-format.md +++ b/docs/implementation-plans/multi-persona-multi-format.md @@ -288,6 +288,8 @@ Issue [#11](https://github.com/terisuke/note_maker/issues/11) (style threshold t Runtime validation treats Evo X2 Ollama's OpenAI-compatible API over Tailscale VPN/MagicDNS as the primary heavy-inference path. SSH tunnels are explicit developer diagnostics only. The fallback chain is Evo X2 Ollama → Evo X2 llama.cpp → workstation-local llama.cpp. Scenario reports must include base URL, model, elapsed time, score, and draft length to prevent accidental local-runtime validation. The 2026-05-02 validation passed on Evo X2 and found local fallback quality/model-compatibility gaps; fallback hardening is tracked in Issue [#36](https://github.com/terisuke/note_maker/issues/36). Future llama.cpp model swap orchestration is tracked in Issue [#45](https://github.com/terisuke/note_maker/issues/45). +Draft generation now includes a lightweight final verification pass before returning the final result. The default operational model split is: `gemma4:e2b` for source/style summarization, `qwen3.6:27b` for follow-up question generation, `gemma4:31b` for Japanese draft generation, and `gemma4:latest` for final consistency verification. The verification step reports PASS/NEEDS_REVIEW plus concrete issues; automatic rewrite from the verification report is deferred until section regeneration and draft versioning are in place. + ## Risk register | Risk | Mitigation | diff --git a/internal/application/draft/service.go b/internal/application/draft/service.go index dea2207..6056894 100644 --- a/internal/application/draft/service.go +++ b/internal/application/draft/service.go @@ -20,6 +20,22 @@ type StreamingTextGenerator interface { GenerateStream(ctx context.Context, prompt string, onChunk func(string) error) (string, error) } +// DraftVerifier checks the final draft with a separate lightweight model. +type DraftVerifier interface { + VerifyDraft(ctx context.Context, req VerificationRequest) (FinalVerification, error) +} + +// VerificationRequest contains all inputs needed for final consistency review. +type VerificationRequest struct { + StyleGuide WritingStyleGuide + Brief ArticleBrief + AuthorProfile AuthorStyleProfile + Persona personadomain.Persona + OutputFormat outputformat.OutputFormat + DraftMarkdown string + Evaluation StyleEvaluation +} + // StreamEvents receives long-running draft generation progress. type StreamEvents struct { OnStatus func(string) error @@ -29,6 +45,7 @@ type StreamEvents struct { // Service coordinates prompt building, generation, Markdown validation, and style evaluation. type Service struct { generator TextGenerator + verifier DraftVerifier } // NewService creates a draft generation service. @@ -36,6 +53,11 @@ func NewService(generator TextGenerator) *Service { return &Service{generator: generator} } +// NewServiceWithVerifier creates a draft service with a final lightweight verification step. +func NewServiceWithVerifier(generator TextGenerator, verifier DraftVerifier) *Service { + return &Service{generator: generator, verifier: verifier} +} + // Generate builds a prompt from the style guide and brief, validates the generated Markdown, // and returns the draft with strict style evaluation. func (s *Service) Generate(ctx context.Context, req GenerateRequest) (GenerateResult, error) { @@ -94,13 +116,44 @@ func (s *Service) generate(ctx context.Context, req GenerateRequest, events Stre evaluation = revisedEvaluation } } + verification := s.verifyFinalDraft(ctx, VerificationRequest{ + StyleGuide: req.StyleGuide, + Brief: req.Brief, + AuthorProfile: req.AuthorProfile, + Persona: persona, + OutputFormat: format, + DraftMarkdown: articleDraft.Markdown(), + Evaluation: evaluation, + }, events) return GenerateResult{ - Draft: articleDraft, - Evaluation: evaluation, + Draft: articleDraft, + Evaluation: evaluation, + Verification: verification, }, nil } +func (s *Service) verifyFinalDraft(ctx context.Context, req VerificationRequest, events StreamEvents) FinalVerification { + if s.verifier == nil { + return FinalVerification{} + } + if err := emitStatus(events, "draft_lightweight_verification_started"); err != nil { + return FinalVerification{Performed: true, Passed: false, Summary: "final verification was interrupted", Report: err.Error(), Failures: []string{err.Error()}} + } + verification, err := s.verifier.VerifyDraft(ctx, req) + if err != nil { + return FinalVerification{ + Performed: true, + Passed: false, + Summary: "final verification failed", + Report: err.Error(), + Failures: []string{err.Error()}, + } + } + verification.Performed = true + return verification +} + func (s *Service) generateRaw(ctx context.Context, prompt string, onChunk func(string) error) (string, error) { if onChunk != nil { if streamingGenerator, ok := s.generator.(StreamingTextGenerator); ok { diff --git a/internal/application/draft/service_test.go b/internal/application/draft/service_test.go index 0fceb87..2ad9a64 100644 --- a/internal/application/draft/service_test.go +++ b/internal/application/draft/service_test.go @@ -158,6 +158,34 @@ func TestGenerateStreamEmitsStatusAndChunks(t *testing.T) { } } +func TestGenerateRunsLightweightVerification(t *testing.T) { + profile, styleGuide := profileAndGuideFromDraft(t, matchingDraft()) + generator := &fakeGenerator{draft: matchingDraft()} + verifierModel := &fakeGenerator{draft: "PASS\nSummary: ブリーフと文体に沿っています"} + + result, err := NewServiceWithVerifier(generator, NewLightweightVerifier(verifierModel)).Generate(context.Background(), GenerateRequest{ + StyleGuide: styleGuide, + Brief: ArticleBrief{ + StyleProfileID: profile.ID, + Theme: "最終検証する", + Reader: "AIで記事を書く人", + MustInclude: "軽量モデルで検証する", + }, + AuthorProfile: profile, + }) + if err != nil { + t.Fatalf("generate with verification: %v", err) + } + if !result.Verification.Performed || !result.Verification.Passed { + t.Fatalf("unexpected verification: %#v", result.Verification) + } + for _, want := range []string{"最終検証者", "最終検証する", "軽量モデルで検証する", matchingDraft()[:30]} { + if !strings.Contains(verifierModel.prompt, want) { + t.Fatalf("verification prompt missing %q:\n%s", want, verifierModel.prompt) + } + } +} + func TestGenerateUsesPersonaAndOutputFormat(t *testing.T) { zennDraft := "---\ntitle: \"Goで検証する\"\nemoji: \"🧪\"\ntype: \"tech\"\ntopics: [\"go\", \"test\"]\npublished: false\n---\n\n## 実装\n\n```go\nfmt.Println(\"ok\")\n```" generator := &fakeGenerator{draft: zennDraft} diff --git a/internal/application/draft/types.go b/internal/application/draft/types.go index a24cb2b..e686402 100644 --- a/internal/application/draft/types.go +++ b/internal/application/draft/types.go @@ -31,8 +31,18 @@ type GenerateRequest struct { // GenerateResult returns the validated draft and its strict style evaluation. type GenerateResult struct { - Draft articledomain.Draft - Evaluation StyleEvaluation + Draft articledomain.Draft + Evaluation StyleEvaluation + Verification FinalVerification +} + +// FinalVerification reports the lightweight model's final consistency review. +type FinalVerification struct { + Performed bool `json:"performed"` + Passed bool `json:"passed"` + Summary string `json:"summary"` + Report string `json:"report"` + Failures []string `json:"failures,omitempty"` } // StyleThresholds are the strict draft acceptance thresholds from the implementation plan. diff --git a/internal/application/draft/verification.go b/internal/application/draft/verification.go new file mode 100644 index 0000000..756ba85 --- /dev/null +++ b/internal/application/draft/verification.go @@ -0,0 +1,165 @@ +package draft + +import ( + "context" + "fmt" + "regexp" + "strings" +) + +// TextVerificationModel generates a final consistency review from a compact prompt. +type TextVerificationModel interface { + Generate(ctx context.Context, prompt string) (string, error) +} + +// SystemPromptVerificationModel can suppress model reasoning with a verifier-specific system prompt. +type SystemPromptVerificationModel interface { + GenerateWithSystem(ctx context.Context, systemPrompt, prompt string) (string, error) +} + +// LightweightVerifier asks a separate lightweight model to review the final draft. +type LightweightVerifier struct { + model TextVerificationModel +} + +// NewLightweightVerifier creates a lightweight final consistency verifier. +func NewLightweightVerifier(model TextVerificationModel) *LightweightVerifier { + return &LightweightVerifier{model: model} +} + +// VerifyDraft checks whether the final draft is consistent with the brief, style guide, and format. +func (v *LightweightVerifier) VerifyDraft(ctx context.Context, req VerificationRequest) (FinalVerification, error) { + if v == nil || v.model == nil { + return FinalVerification{}, fmt.Errorf("verification model is required") + } + prompt := BuildFinalVerificationPrompt(req) + var report string + var err error + if model, ok := v.model.(SystemPromptVerificationModel); ok { + report, err = model.GenerateWithSystem(ctx, finalVerificationSystemPrompt, prompt) + } else { + report, err = v.model.Generate(ctx, prompt) + } + if err != nil { + return FinalVerification{}, err + } + return ParseFinalVerificationReport(report), nil +} + +const finalVerificationSystemPrompt = `<|nothink|> +あなたは日本語記事の最終検証者です。思考過程を出さず、指定された検証結果だけを返してください。` + +// BuildFinalVerificationPrompt builds a compact Markdown review prompt. +func BuildFinalVerificationPrompt(req VerificationRequest) string { + return strings.TrimSpace(fmt.Sprintf(`あなたは日本語記事の最終検証者です。 +下書きを書き換えず、公開前チェックだけを行ってください。 + +出力ルール: +- 1行目は必ず PASS または NEEDS_REVIEW のどちらかにする +- 2行目は Summary: から始め、50字以内で要約する +- 問題がある場合は "- " 箇条書きで具体的に書く +- 本文の再生成、前置き、コードフェンスは禁止 + +検証観点: +1. 記事ブリーフの要件を満たしているか +2. 文体ガイドと一人称が大きく外れていないか +3. 出力先のMarkdown/記法ルールに違反していないか +4. 事実として与えられていない内容を断定していないか +5. 論理の飛躍、矛盾、読者に誤解される表現がないか + +出力先: +%s + +記事ブリーフ: +- Theme: %s +- Reader: %s +- Expected reader action: %s +- Must include: %s +- Exclusions: %s +- Tone stance: %s + +文体ガイド: +%s + +機械評価: +- passed: %v +- score: %.1f +- failures: %s + +下書き: +%s +`, + req.OutputFormat.DisplayName, + req.Brief.Theme, + req.Brief.Reader, + req.Brief.ExpectedReaderAction, + req.Brief.MustInclude, + req.Brief.Exclusions, + req.Brief.ToneStance, + req.StyleGuide.Markdown, + req.Evaluation.Passed, + req.Evaluation.Comparison.Score, + strings.Join(req.Evaluation.Failures, " / "), + req.DraftMarkdown, + )) +} + +// ParseFinalVerificationReport normalizes the lightweight model's Markdown report. +func ParseFinalVerificationReport(report string) FinalVerification { + report = strings.TrimSpace(report) + if report == "" { + return FinalVerification{Performed: true, Passed: false, Summary: "empty verification report", Failures: []string{"empty verification report"}} + } + lines := nonEmptyLines(report) + first := strings.ToUpper(strings.TrimSpace(lines[0])) + passed := strings.HasPrefix(first, "PASS") + failures := extractVerificationFailures(lines) + summary := extractVerificationSummary(lines) + if !passed && len(failures) == 0 { + failures = []string{"verification marked the draft as needing review"} + } + return FinalVerification{ + Performed: true, + Passed: passed, + Summary: summary, + Report: report, + Failures: failures, + } +} + +func nonEmptyLines(value string) []string { + raw := strings.Split(value, "\n") + lines := make([]string, 0, len(raw)) + for _, line := range raw { + if cleaned := strings.TrimSpace(line); cleaned != "" { + lines = append(lines, cleaned) + } + } + if len(lines) == 0 { + return []string{""} + } + return lines +} + +func extractVerificationSummary(lines []string) string { + for _, line := range lines { + if strings.HasPrefix(strings.ToLower(line), "summary:") { + return strings.TrimSpace(line[len("summary:"):]) + } + } + if len(lines) > 0 { + return strings.TrimSpace(regexp.MustCompile(`^(PASS|NEEDS_REVIEW)\s*:?\s*`).ReplaceAllString(lines[0], "")) + } + return "" +} + +func extractVerificationFailures(lines []string) []string { + failures := make([]string, 0) + for _, line := range lines { + cleaned := strings.TrimSpace(line) + if strings.HasPrefix(cleaned, "- ") { + failures = append(failures, strings.TrimSpace(strings.TrimPrefix(cleaned, "- "))) + } + } + return failures +} diff --git a/internal/application/draft/verification_test.go b/internal/application/draft/verification_test.go new file mode 100644 index 0000000..075019e --- /dev/null +++ b/internal/application/draft/verification_test.go @@ -0,0 +1,25 @@ +package draft + +import "testing" + +func TestParseFinalVerificationReportPass(t *testing.T) { + report := "PASS\nSummary: 要件に沿っています" + verification := ParseFinalVerificationReport(report) + if !verification.Performed || !verification.Passed { + t.Fatalf("unexpected verification: %#v", verification) + } + if verification.Summary != "要件に沿っています" { + t.Fatalf("unexpected summary: %q", verification.Summary) + } +} + +func TestParseFinalVerificationReportNeedsReview(t *testing.T) { + report := "NEEDS_REVIEW\nSummary: 根拠が不足しています\n- 実測値の根拠が本文にありません\n- 除外条件に触れています" + verification := ParseFinalVerificationReport(report) + if verification.Passed { + t.Fatalf("verification should fail: %#v", verification) + } + if len(verification.Failures) != 2 { + t.Fatalf("unexpected failures: %#v", verification.Failures) + } +} diff --git a/internal/handlers/workflow.go b/internal/handlers/workflow.go index aa76a7a..3d0b4ad 100644 --- a/internal/handlers/workflow.go +++ b/internal/handlers/workflow.go @@ -105,11 +105,13 @@ type generateDraftRequest struct { PersonaID string `json:"persona_id"` OutputFormatID string `json:"output_format_id"` DraftModel string `json:"draft_model"` + VerifyModel string `json:"verify_model"` } type generateDraftResponse struct { - Draft string `json:"draft"` - Evaluation draftapp.StyleEvaluation `json:"evaluation"` + Draft string `json:"draft"` + Evaluation draftapp.StyleEvaluation `json:"evaluation"` + Verification draftapp.FinalVerification `json:"verification"` } // ListPersonasHandler returns built-in writing personas. @@ -491,7 +493,11 @@ func GenerateDraftHandler(w http.ResponseWriter, r *http.Request) { respondWithError(w, "GENERATOR_INITIALIZATION_FAILED", "Failed to initialize local LLM client", err.Error(), http.StatusInternalServerError) return } - service := draftapp.NewService(generator) + service, err := newDraftServiceWithVerifier(generator, req.VerifyModel) + if err != nil { + respondWithError(w, "VERIFIER_INITIALIZATION_FAILED", "Failed to initialize verification LLM client", err.Error(), http.StatusInternalServerError) + return + } result, err := service.Generate(r.Context(), draftapp.GenerateRequest{ StyleGuide: guide, Brief: articleBrief, @@ -504,8 +510,9 @@ func GenerateDraftHandler(w http.ResponseWriter, r *http.Request) { return } respondWithJSON(w, http.StatusOK, generateDraftResponse{ - Draft: result.Draft.Markdown(), - Evaluation: result.Evaluation, + Draft: result.Draft.Markdown(), + Evaluation: result.Evaluation, + Verification: result.Verification, }) } @@ -525,7 +532,11 @@ func streamGenerateDraft(w http.ResponseWriter, r *http.Request, req generateDra _ = stream.Send("status", streamStatus{Status: "runtime_connected", Phase: "draft", Endpoint: endpoint, Model: model, StartedAt: stream.started.Format(time.RFC3339), ElapsedMS: stream.ElapsedMS()}) stopHeartbeat := stream.StartHeartbeat(r.Context(), "draft", endpoint, model, 10*time.Second) defer stopHeartbeat() - service := draftapp.NewService(generator) + service, err := newDraftServiceWithVerifier(generator, req.VerifyModel) + if err != nil { + _ = stream.Send("error", streamError{Code: "VERIFIER_INITIALIZATION_FAILED", Message: "Failed to initialize verification LLM client", Detail: err.Error(), ElapsedMS: stream.ElapsedMS()}) + return + } result, err := service.GenerateStream(r.Context(), draftapp.GenerateRequest{ StyleGuide: guide, Brief: articleBrief, @@ -545,12 +556,21 @@ func streamGenerateDraft(w http.ResponseWriter, r *http.Request, req generateDra return } _ = stream.Send("result", generateDraftResponse{ - Draft: result.Draft.Markdown(), - Evaluation: result.Evaluation, + Draft: result.Draft.Markdown(), + Evaluation: result.Evaluation, + Verification: result.Verification, }) _ = stream.Send("done", streamStatus{Status: "completed", Phase: "draft", Endpoint: endpoint, Model: model, StartedAt: stream.started.Format(time.RFC3339), ElapsedMS: stream.ElapsedMS(), Runes: len([]rune(result.Draft.Markdown())), Score: result.Evaluation.Comparison.Score}) } +func newDraftServiceWithVerifier(generator draftapp.TextGenerator, model string) (*draftapp.Service, error) { + verifierClient, err := llamacpp.NewClientFromEnvForPurposeWithModel("VERIFY", model) + if err != nil { + return nil, err + } + return draftapp.NewServiceWithVerifier(generator, draftapp.NewLightweightVerifier(verifierClient)), nil +} + func decodeJSONRequest(r *http.Request, out any) error { defer r.Body.Close() return json.NewDecoder(r.Body).Decode(out) diff --git a/internal/infrastructure/llamacpp/client.go b/internal/infrastructure/llamacpp/client.go index dfc6c7a..bb86fc7 100644 --- a/internal/infrastructure/llamacpp/client.go +++ b/internal/infrastructure/llamacpp/client.go @@ -231,25 +231,30 @@ func (c *Client) Model() string { // Generate sends a prompt to /v1/chat/completions and returns the first text response. func (c *Client) Generate(ctx context.Context, prompt string) (string, error) { - body := c.chatCompletionRequest(prompt, false) + return c.GenerateWithSystem(ctx, defaultSystemPrompt, prompt) +} + +// GenerateWithSystem sends a non-streaming chat completion with a caller-provided system prompt. +func (c *Client) GenerateWithSystem(ctx context.Context, systemPrompt, prompt string) (string, error) { + body := c.chatCompletionRequest(systemPrompt, prompt, false) var response chatCompletionResponse if err := c.post(ctx, "/chat/completions", body, &response); err != nil { if c.fallback != nil { - return c.fallback.Generate(ctx, prompt) + return c.fallback.GenerateWithSystem(ctx, systemPrompt, prompt) } return "", err } if len(response.Choices) == 0 { if c.fallback != nil { - return c.fallback.Generate(ctx, prompt) + return c.fallback.GenerateWithSystem(ctx, systemPrompt, prompt) } return "", fmt.Errorf("llama.cpp response had no choices") } content := strings.TrimSpace(response.Choices[0].Message.Content) if content == "" { if c.fallback != nil { - return c.fallback.Generate(ctx, prompt) + return c.fallback.GenerateWithSystem(ctx, systemPrompt, prompt) } return "", fmt.Errorf("llama.cpp response choice was empty") } @@ -275,13 +280,19 @@ func (c *Client) GenerateStream(ctx context.Context, prompt string, onChunk func return content, nil } -func (c *Client) chatCompletionRequest(prompt string, stream bool) chatCompletionRequest { +const defaultSystemPrompt = "You are a careful Japanese editor. Return only a paste-ready Markdown article. Do not include reasoning, preambles, or code fences." + +func (c *Client) chatCompletionRequest(systemPrompt, prompt string, stream bool) chatCompletionRequest { + systemPrompt = strings.TrimSpace(systemPrompt) + if systemPrompt == "" { + systemPrompt = defaultSystemPrompt + } body := chatCompletionRequest{ Model: c.model, Messages: []message{ { Role: "system", - Content: "You are a careful Japanese editor. Return only a paste-ready Markdown article. Do not include reasoning, preambles, or code fences.", + Content: systemPrompt, }, { Role: "user", @@ -297,7 +308,7 @@ func (c *Client) chatCompletionRequest(prompt string, stream bool) chatCompletio } func (c *Client) generateStream(ctx context.Context, prompt string, onChunk func(string) error) (string, error) { - encoded, err := json.Marshal(c.chatCompletionRequest(prompt, true)) + encoded, err := json.Marshal(c.chatCompletionRequest(defaultSystemPrompt, prompt, true)) if err != nil { return "", fmt.Errorf("encode llama.cpp request: %w", err) } diff --git a/internal/infrastructure/llamacpp/client_test.go b/internal/infrastructure/llamacpp/client_test.go index 2ac7fa3..b7c2d13 100644 --- a/internal/infrastructure/llamacpp/client_test.go +++ b/internal/infrastructure/llamacpp/client_test.go @@ -106,6 +106,32 @@ func TestGenerateStreamCallsChatCompletionsAndAssemblesChunks(t *testing.T) { } } +func TestGenerateWithSystemUsesCallerPrompt(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + var request chatCompletionRequest + if err := json.NewDecoder(r.Body).Decode(&request); err != nil { + t.Fatalf("decode request: %v", err) + } + if request.Messages[0].Content != "<|nothink|>\nverify only" { + t.Fatalf("unexpected system prompt: %q", request.Messages[0].Content) + } + _, _ = w.Write([]byte(`{"choices":[{"message":{"role":"assistant","content":"PASS\nSummary: OK"}}]}`)) + })) + defer server.Close() + + client, err := NewClient(server.URL+"/v1", "gemma4:latest", server.Client()) + if err != nil { + t.Fatalf("new client: %v", err) + } + report, err := client.GenerateWithSystem(context.Background(), "<|nothink|>\nverify only", "check") + if err != nil { + t.Fatalf("generate with system: %v", err) + } + if report != "PASS\nSummary: OK" { + t.Fatalf("unexpected report: %q", report) + } +} + func TestNewClientFromEnvUsesGenericLLMSettings(t *testing.T) { t.Setenv("LLM_BASE_URL", "http://example.test/v1") t.Setenv("LLM_MODEL", "gemma4:e2b") diff --git a/scripts/dev.sh b/scripts/dev.sh index f5d1703..3d71bc6 100755 --- a/scripts/dev.sh +++ b/scripts/dev.sh @@ -18,11 +18,13 @@ STYLE_LLM_MODEL="${STYLE_LLM_MODEL:-${LLM_MODEL}}" BRIEF_LLM_MODEL="${BRIEF_LLM_MODEL:-${LLM_MODEL}}" ARTICLE_LLM_MODEL="${ARTICLE_LLM_MODEL:-${LLM_MODEL}}" DRAFT_LLM_MODEL="${DRAFT_LLM_MODEL:-${LLM_MODEL}}" +VERIFY_LLM_MODEL="${VERIFY_LLM_MODEL:-gemma4:latest}" LLM_FALLBACK_BASE_URLS="${LLM_FALLBACK_BASE_URLS:-}" STYLE_LLM_FALLBACK_MODELS="${STYLE_LLM_FALLBACK_MODELS:-}" BRIEF_LLM_FALLBACK_MODELS="${BRIEF_LLM_FALLBACK_MODELS:-}" ARTICLE_LLM_FALLBACK_MODELS="${ARTICLE_LLM_FALLBACK_MODELS:-}" DRAFT_LLM_FALLBACK_MODELS="${DRAFT_LLM_FALLBACK_MODELS:-}" +VERIFY_LLM_FALLBACK_MODELS="${VERIFY_LLM_FALLBACK_MODELS:-}" LLAMACPP_BASE_URL="${LLAMACPP_BASE_URL:-$LLM_BASE_URL}" LLAMACPP_MODEL="${LLAMACPP_MODEL:-$LLM_MODEL}" LLAMACPP_HF_REPO="${LLAMACPP_HF_REPO:-ggml-org/gemma-4-31B-it-GGUF}" @@ -62,7 +64,7 @@ else fi echo "Starting Note Maker on http://localhost:${PORT}" -PORT="$PORT" LLM_BASE_URL="$LLM_BASE_URL" LLM_MODEL="$LLM_MODEL" STYLE_LLM_MODEL="$STYLE_LLM_MODEL" BRIEF_LLM_MODEL="$BRIEF_LLM_MODEL" ARTICLE_LLM_MODEL="$ARTICLE_LLM_MODEL" DRAFT_LLM_MODEL="$DRAFT_LLM_MODEL" LLM_FALLBACK_BASE_URLS="$LLM_FALLBACK_BASE_URLS" STYLE_LLM_FALLBACK_MODELS="$STYLE_LLM_FALLBACK_MODELS" BRIEF_LLM_FALLBACK_MODELS="$BRIEF_LLM_FALLBACK_MODELS" ARTICLE_LLM_FALLBACK_MODELS="$ARTICLE_LLM_FALLBACK_MODELS" DRAFT_LLM_FALLBACK_MODELS="$DRAFT_LLM_FALLBACK_MODELS" LLAMACPP_BASE_URL="$LLAMACPP_BASE_URL" LLAMACPP_MODEL="$LLAMACPP_MODEL" go run ./cmd/server & +PORT="$PORT" LLM_BASE_URL="$LLM_BASE_URL" LLM_MODEL="$LLM_MODEL" STYLE_LLM_MODEL="$STYLE_LLM_MODEL" BRIEF_LLM_MODEL="$BRIEF_LLM_MODEL" ARTICLE_LLM_MODEL="$ARTICLE_LLM_MODEL" DRAFT_LLM_MODEL="$DRAFT_LLM_MODEL" VERIFY_LLM_MODEL="$VERIFY_LLM_MODEL" LLM_FALLBACK_BASE_URLS="$LLM_FALLBACK_BASE_URLS" STYLE_LLM_FALLBACK_MODELS="$STYLE_LLM_FALLBACK_MODELS" BRIEF_LLM_FALLBACK_MODELS="$BRIEF_LLM_FALLBACK_MODELS" ARTICLE_LLM_FALLBACK_MODELS="$ARTICLE_LLM_FALLBACK_MODELS" DRAFT_LLM_FALLBACK_MODELS="$DRAFT_LLM_FALLBACK_MODELS" VERIFY_LLM_FALLBACK_MODELS="$VERIFY_LLM_FALLBACK_MODELS" LLAMACPP_BASE_URL="$LLAMACPP_BASE_URL" LLAMACPP_MODEL="$LLAMACPP_MODEL" go run ./cmd/server & APP_PID="$!" echo "Press Ctrl-C to stop both processes." diff --git a/static/css/style.css b/static/css/style.css index 97f29ad..6b7b161 100644 --- a/static/css/style.css +++ b/static/css/style.css @@ -120,7 +120,7 @@ body { .config-grid { display: grid; - grid-template-columns: repeat(5, minmax(0, 1fr)); + grid-template-columns: repeat(3, minmax(0, 1fr)); gap: 14px; } @@ -328,6 +328,12 @@ pre { color: inherit; } +.evaluation pre { + width: 100%; + margin: 0; + color: var(--text); +} + .tabs { display: flex; gap: 6px; diff --git a/static/index.html b/static/index.html index 04c911f..8398453 100644 --- a/static/index.html +++ b/static/index.html @@ -50,6 +50,10 @@
${failures.join('
')}
${failures.map(escapeHTML).join('
')}
${escapeHTML(report)}` : ''}
+ `;
+ }
+
async function requestJSON(url, options = {}) {
const response = await fetch(url, {
method: options.method || 'GET',
@@ -733,6 +761,7 @@ document.addEventListener('DOMContentLoaded', () => {
draft_generation_started: '本文を生成しています',
draft_validation_started: 'Markdownと文体を検証しています',
style_revision_started: '文体スコアを上げるために一度だけ修正しています',
+ draft_lightweight_verification_started: '軽量モデルで最終検証しています',
runtime_connected: '推論エンドポイントに接続しました',
running: '生成を継続しています',
completed: '生成が完了しました',