Cor-Incorporated · terisuke · May 2, 2026 · May 2, 2026
diff --git a/.env.evo-x2.example b/.env.evo-x2.example
@@ -15,6 +15,7 @@ STYLE_LLM_MODEL=gemma4:e2b
 BRIEF_LLM_MODEL=qwen3.6:27b
 ARTICLE_LLM_MODEL=gemma4:e2b
 DRAFT_LLM_MODEL=gemma4:31b
+VERIFY_LLM_MODEL=gemma4:latest
 
 # Ordered fallback chain:
 # 1. Evo X2 llama.cpp over Tailnet/Caddy.
@@ -24,3 +25,4 @@ STYLE_LLM_FALLBACK_MODELS=gemma-4-E2B-it-Q8_0.gguf,gemma4:e2b
 BRIEF_LLM_FALLBACK_MODELS=gemma-4-E2B-it-Q8_0.gguf,qwen3:30b-a3b
 ARTICLE_LLM_FALLBACK_MODELS=gemma-4-E2B-it-Q8_0.gguf,gemma4:e2b
 DRAFT_LLM_FALLBACK_MODELS=gemma-4-E2B-it-Q8_0.gguf,qwen3:30b-a3b
+VERIFY_LLM_FALLBACK_MODELS=gemma-4-E2B-it-Q8_0.gguf,gemma4:e2b
diff --git a/.env.example b/.env.example
@@ -9,13 +9,15 @@ LLM_TIMEOUT_SECONDS=180
 # BRIEF_LLM_MODEL=qwen3.6:27b
 # ARTICLE_LLM_MODEL=gemma4:e2b
 # DRAFT_LLM_MODEL=gemma4:31b
+# VERIFY_LLM_MODEL=gemma4:latest
 # Ordered fallback chain. Use comma-separated OpenAI-compatible base URLs.
 # LLM_FALLBACK_BASE_URLS=http://remote-llama/v1,http://127.0.0.1:8081/v1
 # Optional comma-separated per-phase fallback model names:
 # STYLE_LLM_FALLBACK_MODELS=gemma-4-E2B-it-Q8_0.gguf,gemma4:e2b
 # BRIEF_LLM_FALLBACK_MODELS=gemma-4-E2B-it-Q8_0.gguf,qwen3:30b-a3b
 # ARTICLE_LLM_FALLBACK_MODELS=gemma-4-E2B-it-Q8_0.gguf,gemma4:e2b
 # DRAFT_LLM_FALLBACK_MODELS=gemma-4-E2B-it-Q8_0.gguf,qwen3:30b-a3b
+# VERIFY_LLM_FALLBACK_MODELS=gemma-4-E2B-it-Q8_0.gguf,gemma4:e2b
 # Legacy single fallback remains supported:
 # FALLBACK_LLM_BASE_URL=http://127.0.0.1:8081/v1
 LLAMACPP_HOST=127.0.0.1

diff --git a/Makefile b/Makefile
@@ -21,13 +21,15 @@ EVO_X2_BRIEF_LLM_MODEL ?= qwen3.6:27b
 EVO_X2_STYLE_LLM_MODEL ?= gemma4:e2b
 EVO_X2_ARTICLE_LLM_MODEL ?= gemma4:e2b
 EVO_X2_DRAFT_LLM_MODEL ?= gemma4:31b
+EVO_X2_VERIFY_LLM_MODEL ?= gemma4:latest
 FALLBACK_LLM_BASE_URL ?= http://127.0.0.1:8081/v1
 LLM_FALLBACK_BASE_URLS ?= $(EVO_X2_LLAMA_CPP_LLM_BASE_URL),$(FALLBACK_LLM_BASE_URL)
 EVO_X2_LLAMA_CPP_MODEL ?= gemma-4-E2B-it-Q8_0.gguf
 STYLE_LLM_FALLBACK_MODELS ?= $(EVO_X2_LLAMA_CPP_MODEL),gemma4:e2b
 BRIEF_LLM_FALLBACK_MODELS ?= $(EVO_X2_LLAMA_CPP_MODEL),qwen3:30b-a3b
 ARTICLE_LLM_FALLBACK_MODELS ?= $(EVO_X2_LLAMA_CPP_MODEL),gemma4:e2b
 DRAFT_LLM_FALLBACK_MODELS ?= $(EVO_X2_LLAMA_CPP_MODEL),qwen3:30b-a3b
+VERIFY_LLM_FALLBACK_MODELS ?= $(EVO_X2_LLAMA_CPP_MODEL),gemma4:e2b
 LLAMACPP_HOST ?= 127.0.0.1
 LLAMACPP_PORT ?= 8081
 LLAMACPP_BASE_URL ?= $(LLM_BASE_URL)
@@ -46,7 +48,7 @@ dev:
 evo-x2: remote
 
 remote: evo-x2-preflight
-	NOTE_MAKER_SKIP_ENV=1 LLM_RUNTIME=remote LLM_BASE_URL="$(EVO_X2_LLM_BASE_URL)" LLM_MODEL="$(EVO_X2_LLM_MODEL)" STYLE_LLM_MODEL="$(EVO_X2_STYLE_LLM_MODEL)" BRIEF_LLM_MODEL="$(EVO_X2_BRIEF_LLM_MODEL)" ARTICLE_LLM_MODEL="$(EVO_X2_ARTICLE_LLM_MODEL)" DRAFT_LLM_MODEL="$(EVO_X2_DRAFT_LLM_MODEL)" LLM_FALLBACK_BASE_URLS="$(LLM_FALLBACK_BASE_URLS)" STYLE_LLM_FALLBACK_MODELS="$(STYLE_LLM_FALLBACK_MODELS)" BRIEF_LLM_FALLBACK_MODELS="$(BRIEF_LLM_FALLBACK_MODELS)" ARTICLE_LLM_FALLBACK_MODELS="$(ARTICLE_LLM_FALLBACK_MODELS)" DRAFT_LLM_FALLBACK_MODELS="$(DRAFT_LLM_FALLBACK_MODELS)" ./scripts/dev.sh
+	NOTE_MAKER_SKIP_ENV=1 LLM_RUNTIME=remote LLM_BASE_URL="$(EVO_X2_LLM_BASE_URL)" LLM_MODEL="$(EVO_X2_LLM_MODEL)" STYLE_LLM_MODEL="$(EVO_X2_STYLE_LLM_MODEL)" BRIEF_LLM_MODEL="$(EVO_X2_BRIEF_LLM_MODEL)" ARTICLE_LLM_MODEL="$(EVO_X2_ARTICLE_LLM_MODEL)" DRAFT_LLM_MODEL="$(EVO_X2_DRAFT_LLM_MODEL)" VERIFY_LLM_MODEL="$(EVO_X2_VERIFY_LLM_MODEL)" LLM_FALLBACK_BASE_URLS="$(LLM_FALLBACK_BASE_URLS)" STYLE_LLM_FALLBACK_MODELS="$(STYLE_LLM_FALLBACK_MODELS)" BRIEF_LLM_FALLBACK_MODELS="$(BRIEF_LLM_FALLBACK_MODELS)" ARTICLE_LLM_FALLBACK_MODELS="$(ARTICLE_LLM_FALLBACK_MODELS)" DRAFT_LLM_FALLBACK_MODELS="$(DRAFT_LLM_FALLBACK_MODELS)" VERIFY_LLM_FALLBACK_MODELS="$(VERIFY_LLM_FALLBACK_MODELS)" ./scripts/dev.sh
 
 evo-x2-preflight:
 	EVO_X2_TAILNET_HOST="$(EVO_X2_TAILNET_HOST)" EVO_X2_LLM_BASE_URL="$(EVO_X2_LLM_BASE_URL)" ./scripts/evo-x2-tailnet-preflight.sh
@@ -59,7 +61,7 @@ evo-x2-ssh-models:
 	curl -s "$(EVO_X2_SSH_LLM_BASE_URL)/models"
 
 scenario-evo-x2: evo-x2-preflight
-	RUN_NOTE_SCENARIO=1 RUN_LOCAL_LLM_SCENARIO=1 SCENARIO_STREAM_DRAFT=1 LLM_BASE_URL="$(EVO_X2_LLM_BASE_URL)" LLM_MODEL="$(EVO_X2_LLM_MODEL)" STYLE_LLM_MODEL="$(EVO_X2_STYLE_LLM_MODEL)" BRIEF_LLM_MODEL="$(EVO_X2_BRIEF_LLM_MODEL)" ARTICLE_LLM_MODEL="$(EVO_X2_ARTICLE_LLM_MODEL)" DRAFT_LLM_MODEL="$(EVO_X2_DRAFT_LLM_MODEL)" LLM_TIMEOUT_SECONDS=900 LLM_FALLBACK_BASE_URLS="$(LLM_FALLBACK_BASE_URLS)" STYLE_LLM_FALLBACK_MODELS="$(STYLE_LLM_FALLBACK_MODELS)" BRIEF_LLM_FALLBACK_MODELS="$(BRIEF_LLM_FALLBACK_MODELS)" ARTICLE_LLM_FALLBACK_MODELS="$(ARTICLE_LLM_FALLBACK_MODELS)" DRAFT_LLM_FALLBACK_MODELS="$(DRAFT_LLM_FALLBACK_MODELS)" SCENARIO_MIN_STYLE_SCORE=80 SCENARIO_MIN_DRAFT_RUNES=2800 DRAFT_MAX_ATTEMPTS=2 go run ./cmd/scenario/full_workflow
+	RUN_NOTE_SCENARIO=1 RUN_LOCAL_LLM_SCENARIO=1 SCENARIO_STREAM_DRAFT=1 LLM_BASE_URL="$(EVO_X2_LLM_BASE_URL)" LLM_MODEL="$(EVO_X2_LLM_MODEL)" STYLE_LLM_MODEL="$(EVO_X2_STYLE_LLM_MODEL)" BRIEF_LLM_MODEL="$(EVO_X2_BRIEF_LLM_MODEL)" ARTICLE_LLM_MODEL="$(EVO_X2_ARTICLE_LLM_MODEL)" DRAFT_LLM_MODEL="$(EVO_X2_DRAFT_LLM_MODEL)" VERIFY_LLM_MODEL="$(EVO_X2_VERIFY_LLM_MODEL)" LLM_TIMEOUT_SECONDS=900 LLM_FALLBACK_BASE_URLS="$(LLM_FALLBACK_BASE_URLS)" STYLE_LLM_FALLBACK_MODELS="$(STYLE_LLM_FALLBACK_MODELS)" BRIEF_LLM_FALLBACK_MODELS="$(BRIEF_LLM_FALLBACK_MODELS)" ARTICLE_LLM_FALLBACK_MODELS="$(ARTICLE_LLM_FALLBACK_MODELS)" DRAFT_LLM_FALLBACK_MODELS="$(DRAFT_LLM_FALLBACK_MODELS)" VERIFY_LLM_FALLBACK_MODELS="$(VERIFY_LLM_FALLBACK_MODELS)" SCENARIO_MIN_STYLE_SCORE=80 SCENARIO_MIN_DRAFT_RUNES=2800 DRAFT_MAX_ATTEMPTS=2 go run ./cmd/scenario/full_workflow
 
 server:
 	go run ./cmd/server

diff --git a/README.md b/README.md
@@ -81,7 +81,7 @@ mise trust
 mise run evo-x2
 ```
 
-既定では Tailnet 上の `http://evo-x2.tailb30e58.ts.net/v1` に接続します。モデルを変える場合は `.env.evo-x2.example` を参考に `LLM_MODEL`、`STYLE_LLM_MODEL`、`BRIEF_LLM_MODEL`、`ARTICLE_LLM_MODEL`、`DRAFT_LLM_MODEL` を設定してください。120B級のモデルを使う場合は `LLM_TIMEOUT_SECONDS` を長めに設定します。
+既定では Tailnet 上の `http://evo-x2.tailb30e58.ts.net/v1` に接続します。モデルを変える場合は `.env.evo-x2.example` を参考に `LLM_MODEL`、`STYLE_LLM_MODEL`、`BRIEF_LLM_MODEL`、`ARTICLE_LLM_MODEL`、`DRAFT_LLM_MODEL`、`VERIFY_LLM_MODEL` を設定してください。120B級のモデルを使う場合は `LLM_TIMEOUT_SECONDS` を長めに設定します。
 
 画面上部の「設定」から、フェーズ別に使うモデルと一問一答の質問を変更できます。質問は初期テンプレートを編集でき、追加質問も下書き生成のブリーフに含まれます。
 
@@ -93,6 +93,7 @@ mise run evo-x2
 - `BRIEF_LLM_MODEL`: 深掘り質問生成用。既定は推論力重視の `qwen3.6:27b`。
 - `ARTICLE_LLM_MODEL`: 旧 `/api/generate` 用。既定は軽量な `gemma4:e2b`。
 - `DRAFT_LLM_MODEL`: 一問一答後の最終下書き生成用。既定は日本語下書き品質重視の `gemma4:31b`。
+- `VERIFY_LLM_MODEL`: 下書き後の最終一貫性チェック用。既定は軽量な `gemma4:latest`。
 - `EVO_X2_TAILNET_HOST`: Tailnet/MagicDNS 上の Evo X2 ホスト名です。既定値は `evo-x2.tailb30e58.ts.net`。
 - `EVO_X2_LLM_BASE_URL`: Evo X2 Ollama primary の OpenAI互換APIです。既定値は `http://evo-x2.tailb30e58.ts.net/v1`。
 - `LLM_FALLBACK_BASE_URLS`: カンマ区切りの fallback chain です。既定は Evo X2 llama.cpp、最後にローカル llama.cpp。

diff --git a/cmd/scenario/draft_generation/main.go b/cmd/scenario/draft_generation/main.go
@@ -41,6 +41,7 @@ func main() {
 
 	baseURL := envFirst("http://127.0.0.1:8081/v1", "LLM_BASE_URL", "LLAMACPP_BASE_URL")
 	model := envFirst("gemma4:31b", "DRAFT_LLM_MODEL", "LLM_MODEL", "LLAMACPP_MODEL")
+	verifyModel := envFirst("gemma4:latest", "VERIFY_LLM_MODEL", "LLM_MODEL", "LLAMACPP_MODEL")
 	minStyleScore := envFloat("SCENARIO_MIN_STYLE_SCORE", 80)
 	minDraftRunes := envInt("SCENARIO_MIN_DRAFT_RUNES", 2400)
 	maxAttempts := envInt("DRAFT_MAX_ATTEMPTS", 2)
@@ -49,7 +50,11 @@ func main() {
 	if err != nil {
 		fatalf("create local llm client: %v", err)
 	}
-	service := draftapp.NewService(client)
+	verifyClient, err := llamacpp.NewClientFromEnvForPurpose("VERIFY")
+	if err != nil {
+		fatalf("create verification llm client: %v", err)
+	}
+	service := draftapp.NewServiceWithVerifier(client, draftapp.NewLightweightVerifier(verifyClient))
 
 	var result draftapp.GenerateResult
 	var finalElapsed time.Duration
@@ -88,13 +93,15 @@ func main() {
 		finalChunks = chunkCount
 		writeFile(filepath.Join(outputDir, fmt.Sprintf("draft_attempt_%d.md", attempt)), result.Draft.Markdown()+"\n")
 		writeJSON(filepath.Join(outputDir, fmt.Sprintf("evaluation_attempt_%d.json", attempt)), result.Evaluation)
+		writeJSON(filepath.Join(outputDir, fmt.Sprintf("verification_attempt_%d.json", attempt)), result.Verification)
 		if result.Evaluation.Comparison.Score >= minStyleScore && len([]rune(result.Draft.Markdown())) >= minDraftRunes {
 			break
 		}
 	}
 
 	writeFile(filepath.Join(outputDir, "draft.md"), result.Draft.Markdown()+"\n")
 	writeJSON(filepath.Join(outputDir, "evaluation.json"), result.Evaluation)
+	writeJSON(filepath.Join(outputDir, "verification.json"), result.Verification)
 	if result.Evaluation.Comparison.Score < minStyleScore {
 		fatalf("style score %.1f below scenario minimum %.1f", result.Evaluation.Comparison.Score, minStyleScore)
 	}
@@ -106,6 +113,9 @@ func main() {
 	fmt.Printf("passed=%v\n", result.Evaluation.Passed)
 	fmt.Printf("score=%.1f\n", result.Evaluation.Comparison.Score)
 	fmt.Printf("runes=%d\n", len([]rune(result.Draft.Markdown())))
+	fmt.Printf("verification_performed=%v\n", result.Verification.Performed)
+	fmt.Printf("verification_passed=%v\n", result.Verification.Passed)
+	fmt.Printf("verification_summary=%s\n", result.Verification.Summary)
 	fmt.Printf("elapsed_seconds=%.2f\n", finalElapsed.Seconds())
 	fmt.Printf("streaming=%v\n", streamDraft)
 	if streamDraft {
@@ -114,8 +124,10 @@ func main() {
 	}
 	fmt.Printf("llm_base_url=%s\n", baseURL)
 	fmt.Printf("llm_model=%s\n", model)
+	fmt.Printf("verify_model=%s\n", verifyModel)
 	fmt.Printf("draft=%s\n", filepath.Join(outputDir, "draft.md"))
 	fmt.Printf("evaluation=%s\n", filepath.Join(outputDir, "evaluation.json"))
+	fmt.Printf("verification=%s\n", filepath.Join(outputDir, "verification.json"))
 }
 
 func readJSON(path string, out any) {

diff --git a/docs/adrs/0001-three-phase-local-article-generation.md b/docs/adrs/0001-three-phase-local-article-generation.md
@@ -44,6 +44,7 @@ Note Maker will move from a single `POST /api/generate` flow to a three-phase wo
    - Generate a draft from `WritingStyleGuide + ArticleBrief`.
    - Do not fetch Note articles during draft generation.
    - Validate the draft as paste-ready Markdown and compare it against the author style profile.
+   - Run a final lightweight-model consistency check against the draft, brief, style guide, and target output format before returning the result.
 
 These phases are orchestrated by application services, not autonomous background agents. The word "agent" may be used in product language, but the implementation should use deterministic workflow boundaries first.
 
@@ -83,7 +84,7 @@ Planned services:
 
 - `GenerateDraftService`
   - input: `WritingStyleGuide`, `ArticleBrief`.
-  - output: validated `Draft`, comparison report.
+  - output: validated `Draft`, comparison report, and lightweight final verification report.
 
 - `ArticleWorkflowService`
   - optional facade for UI/API flows that need to coordinate the three services.

diff --git a/docs/adrs/0002-multi-persona-multi-format-extension.md b/docs/adrs/0002-multi-persona-multi-format-extension.md
@@ -141,7 +141,7 @@ New domain types under `internal/domain`:
 
 - `AnalyzeAuthorStyleService` accepts a `persona_id` and persists the resulting guide as a new version under that persona; previous versions are preserved.
 - `InterviewService` consults the active persona and format to assemble the question list before the first question.
-- `GenerateDraftService` resolves the prompt template fragment from the format's strategy, injects the active format's embedded Markdown guide, and merges persona-specific tone hints.
+- `GenerateDraftService` resolves the prompt template fragment from the format's strategy, injects the active format's embedded Markdown guide, merges persona-specific tone hints, and runs a lightweight final verification step after the 31B draft is validated.
 - New `RegenerateSectionService` accepts a draft id, a section selector (heading anchor or character range), the brief, and the persona+format; returns a candidate replacement for human review.
 - New `StreamingDraftService` produces SSE chunks for the draft phase.
 
@@ -202,6 +202,7 @@ Current implementation status as of 2026-05-02:
 - Phase B1 is complete ahead of the original order: `Persona` and `OutputFormat` concepts, prompt dispatch, and format validators are in place ([#21](https://github.com/terisuke/note_maker/issues/21)). The remaining Phase B work stays deferred until after Phase A/C foundations.
 - Evo X2 Ollama is the primary heavy-inference runtime through the Tailnet OpenAI-compatible API (`http://evo-x2.tailb30e58.ts.net/v1`). The runtime fallback chain is Evo X2 Ollama → Evo X2 llama.cpp (`/llama/v1`) → workstation-local llama.cpp. SSH tunnel access is an explicit developer diagnostic only.
 - Phase model defaults are intentionally split: lightweight `gemma4:e2b` for source/style summarization, `qwen3.6:27b` for deeper interview questions, and `gemma4:31b` for final Japanese draft generation. This is an operational default, not a hard domain rule; users can override it per phase.
+- Final verification uses lightweight Gemma by default (`gemma4:latest`, currently the Evo X2 E4B-class Ollama model) to check brief coverage, style consistency, output-format notation, and unsupported factual assertions before the UI presents the final draft ([#47](https://github.com/terisuke/note_maker/issues/47)).
 - Runtime validation showed that Tailnet inference can take 20+ minutes and still miss quality gates because of generation variance. Therefore, Phase A started with streaming and cancellation ([#18](https://github.com/terisuke/note_maker/issues/18)) before the broader transcript rewrite ([#17](https://github.com/terisuke/note_maker/issues/17)). Primary-runtime quality stabilization is tracked separately in [#40](https://github.com/terisuke/note_maker/issues/40).
 - Phase A2 is implemented in code: `llamacpp.Client.GenerateStream`, streaming follow-up/draft service paths, `Accept: text/event-stream` handlers, browser Cancel controls, heartbeat events, and final runtime metrics. It still requires real Tailnet Evo X2 validation before closing the issue.
 

diff --git a/docs/implementation-plans/issue-adr-guardrails.md b/docs/implementation-plans/issue-adr-guardrails.md
@@ -63,6 +63,7 @@ The phases in [ADR 0002](../adrs/0002-multi-persona-multi-format-extension.md) (
    - SSH tunnels are allowed only as explicit developer diagnostics, not as the product default, because they depend on per-device SSH setup.
    - Local llama.cpp (`http://127.0.0.1:8081/v1`) is fallback only. Do not set `LLM_BASE_URL` to local Ollama or local llama.cpp for Evo X2 validation unless the test is explicitly measuring fallback behavior.
    - Runtime validation must report base URL, model, elapsed time, score, and draft length.
+   - Draft generation must run the lightweight final verification step before returning the final result; if verification reports NEEDS_REVIEW, surface the report instead of hiding it.
    - If fallback validation fails the strict draft thresholds, keep Evo X2 primary enabled and track fallback hardening separately (Issue [#36](https://github.com/terisuke/note_maker/issues/36)).
    - If Tailnet Evo X2 reaches the API but misses quality gates, track it under Issue [#40](https://github.com/terisuke/note_maker/issues/40), not as a transport regression.
 

diff --git a/docs/implementation-plans/multi-persona-multi-format.md b/docs/implementation-plans/multi-persona-multi-format.md
@@ -288,6 +288,8 @@ Issue [#11](https://github.com/terisuke/note_maker/issues/11) (style threshold t
 
 Runtime validation treats Evo X2 Ollama's OpenAI-compatible API over Tailscale VPN/MagicDNS as the primary heavy-inference path. SSH tunnels are explicit developer diagnostics only. The fallback chain is Evo X2 Ollama → Evo X2 llama.cpp → workstation-local llama.cpp. Scenario reports must include base URL, model, elapsed time, score, and draft length to prevent accidental local-runtime validation. The 2026-05-02 validation passed on Evo X2 and found local fallback quality/model-compatibility gaps; fallback hardening is tracked in Issue [#36](https://github.com/terisuke/note_maker/issues/36). Future llama.cpp model swap orchestration is tracked in Issue [#45](https://github.com/terisuke/note_maker/issues/45).
 
+Draft generation now includes a lightweight final verification pass before returning the final result. The default operational model split is: `gemma4:e2b` for source/style summarization, `qwen3.6:27b` for follow-up question generation, `gemma4:31b` for Japanese draft generation, and `gemma4:latest` for final consistency verification. The verification step reports PASS/NEEDS_REVIEW plus concrete issues; automatic rewrite from the verification report is deferred until section regeneration and draft versioning are in place.
+
 ## Risk register
 
 | Risk | Mitigation |

diff --git a/internal/application/draft/service.go b/internal/application/draft/service.go
@@ -20,6 +20,22 @@ type StreamingTextGenerator interface {
 	GenerateStream(ctx context.Context, prompt string, onChunk func(string) error) (string, error)
 }
 
+// DraftVerifier checks the final draft with a separate lightweight model.
+type DraftVerifier interface {
+	VerifyDraft(ctx context.Context, req VerificationRequest) (FinalVerification, error)
+}
+
+// VerificationRequest contains all inputs needed for final consistency review.
+type VerificationRequest struct {
+	StyleGuide    WritingStyleGuide
+	Brief         ArticleBrief
+	AuthorProfile AuthorStyleProfile
+	Persona       personadomain.Persona
+	OutputFormat  outputformat.OutputFormat
+	DraftMarkdown string
+	Evaluation    StyleEvaluation
+}
+
 // StreamEvents receives long-running draft generation progress.
 type StreamEvents struct {
 	OnStatus func(string) error
@@ -29,13 +45,19 @@ type StreamEvents struct {
 // Service coordinates prompt building, generation, Markdown validation, and style evaluation.
 type Service struct {
 	generator TextGenerator
+	verifier  DraftVerifier
 }
 
 // NewService creates a draft generation service.
 func NewService(generator TextGenerator) *Service {
 	return &Service{generator: generator}
 }
 
+// NewServiceWithVerifier creates a draft service with a final lightweight verification step.
+func NewServiceWithVerifier(generator TextGenerator, verifier DraftVerifier) *Service {
+	return &Service{generator: generator, verifier: verifier}
+}
+
 // Generate builds a prompt from the style guide and brief, validates the generated Markdown,
 // and returns the draft with strict style evaluation.
 func (s *Service) Generate(ctx context.Context, req GenerateRequest) (GenerateResult, error) {
@@ -94,13 +116,44 @@ func (s *Service) generate(ctx context.Context, req GenerateRequest, events Stre
 			evaluation = revisedEvaluation
 		}
 	}
+	verification := s.verifyFinalDraft(ctx, VerificationRequest{
+		StyleGuide:    req.StyleGuide,
+		Brief:         req.Brief,
+		AuthorProfile: req.AuthorProfile,
+		Persona:       persona,
+		OutputFormat:  format,
+		DraftMarkdown: articleDraft.Markdown(),
+		Evaluation:    evaluation,
+	}, events)
 
 	return GenerateResult{
-		Draft:      articleDraft,
-		Evaluation: evaluation,
+		Draft:        articleDraft,
+		Evaluation:   evaluation,
+		Verification: verification,
 	}, nil
 }
 
+func (s *Service) verifyFinalDraft(ctx context.Context, req VerificationRequest, events StreamEvents) FinalVerification {
+	if s.verifier == nil {
+		return FinalVerification{}
+	}
+	if err := emitStatus(events, "draft_lightweight_verification_started"); err != nil {
+		return FinalVerification{Performed: true, Passed: false, Summary: "final verification was interrupted", Report: err.Error(), Failures: []string{err.Error()}}
+	}
+	verification, err := s.verifier.VerifyDraft(ctx, req)
+	if err != nil {
+		return FinalVerification{
+			Performed: true,
+			Passed:    false,
+			Summary:   "final verification failed",
+			Report:    err.Error(),
+			Failures:  []string{err.Error()},
+		}
+	}
+	verification.Performed = true
+	return verification
+}
+
 func (s *Service) generateRaw(ctx context.Context, prompt string, onChunk func(string) error) (string, error) {
 	if onChunk != nil {
 		if streamingGenerator, ok := s.generator.(StreamingTextGenerator); ok {