diff --git a/cmd/scenario/draft_generation/main.go b/cmd/scenario/draft_generation/main.go index 13e35e6..5cc5d9b 100644 --- a/cmd/scenario/draft_generation/main.go +++ b/cmd/scenario/draft_generation/main.go @@ -3,6 +3,7 @@ package main import ( "context" "encoding/json" + "errors" "fmt" "os" "path/filepath" @@ -42,6 +43,13 @@ func main() { baseURL := envFirst("http://127.0.0.1:8081/v1", "LLM_BASE_URL", "LLAMACPP_BASE_URL") model := envFirst("gemma4:31b", "DRAFT_LLM_MODEL", "LLM_MODEL", "LLAMACPP_MODEL") verifyModel := envFirst("gemma4:latest", "VERIFY_LLM_MODEL", "LLM_MODEL", "LLAMACPP_MODEL") + failureContext := failureAttemptContext{ + LLMBaseURL: baseURL, + LLMModel: model, + VerifyModel: verifyModel, + PersonaID: brief.PersonaID, + OutputFormatID: brief.OutputFormatID, + } minStyleScore := envFloat("SCENARIO_MIN_STYLE_SCORE", 80) minDraftRunes := envInt("SCENARIO_MIN_DRAFT_RUNES", 2400) maxAttempts := envInt("DRAFT_MAX_ATTEMPTS", 2) @@ -87,13 +95,24 @@ func main() { } elapsed := time.Since(started) cancel() + metrics := attemptRuntimeMetrics{ + ElapsedSeconds: elapsed.Seconds(), + TimeoutSeconds: timeout.Seconds(), + Streaming: streamDraft, + FirstChunkMs: finalFirstChunkMs(firstChunk), + Chunks: chunkCount, + } if err != nil { - fatalf("generate draft attempt %d: %v", attempt, err) + attempts := generationAttemptsFromError(err) + artifacts := writeRawAttemptArtifacts(outputDir, attempt, attempts) + failurePath := writeFailureAttempt(outputDir, attempt, err, metrics, failureContext, artifacts) + fatalf("generate draft attempt %d: %v (failure=%s)", attempt, err, failurePath) } finalElapsed = elapsed finalFirstChunk = firstChunk finalChunks = chunkCount finalAttempt = attempt + writeRawAttemptArtifacts(outputDir, attempt, result.Attempts) writeFile(filepath.Join(outputDir, fmt.Sprintf("draft_attempt_%d.md", attempt)), result.Draft.Markdown()+"\n") writeJSON(filepath.Join(outputDir, fmt.Sprintf("evaluation_attempt_%d.json", attempt)), result.Evaluation) writeJSON(filepath.Join(outputDir, fmt.Sprintf("verification_attempt_%d.json", attempt)), result.Verification) @@ -139,6 +158,114 @@ func main() { } } +type attemptRuntimeMetrics struct { + ElapsedSeconds float64 `json:"elapsed_seconds"` + TimeoutSeconds float64 `json:"timeout_seconds"` + Streaming bool `json:"streaming"` + FirstChunkMs int64 `json:"first_chunk_ms,omitempty"` + Chunks int `json:"chunks,omitempty"` +} + +type rawAttemptArtifact struct { + GenerationAttempt int `json:"generation_attempt"` + Kind string `json:"kind"` + Path string `json:"path"` + ValidationError string `json:"validation_error,omitempty"` +} + +type failureAttemptContext struct { + LLMBaseURL string `json:"llm_base_url"` + LLMModel string `json:"llm_model"` + VerifyModel string `json:"verify_model"` + PersonaID string `json:"persona_id"` + OutputFormatID string `json:"output_format_id"` +} + +type failureAttemptReport struct { + Attempt int `json:"attempt"` + Error string `json:"error"` + ValidationError string `json:"validation_error,omitempty"` + RuntimeMetrics attemptRuntimeMetrics `json:"runtime_metrics"` + Context failureAttemptContext `json:"context"` + RawOutputs []rawAttemptArtifact `json:"raw_outputs"` +} + +func generationAttemptsFromError(err error) []draftapp.GenerationAttempt { + var unusable *draftapp.UnusableDraftError + if errors.As(err, &unusable) { + return unusable.Attempts + } + return nil +} + +func writeRawAttemptArtifacts(outputDir string, scenarioAttempt int, attempts []draftapp.GenerationAttempt) []rawAttemptArtifact { + artifacts := make([]rawAttemptArtifact, 0, len(attempts)) + for _, attempt := range attempts { + if strings.TrimSpace(attempt.RawOutput) == "" { + continue + } + kind := sanitizeArtifactPart(attempt.Kind) + if kind == "" { + kind = "generation" + } + index := attempt.Index + if index <= 0 { + index = len(artifacts) + 1 + } + path := filepath.Join(outputDir, fmt.Sprintf("raw_attempt_%d_generation_%d_%s.txt", scenarioAttempt, index, kind)) + writeFile(path, strings.TrimRight(attempt.RawOutput, "\n")+"\n") + artifacts = append(artifacts, rawAttemptArtifact{ + GenerationAttempt: index, + Kind: attempt.Kind, + Path: path, + ValidationError: attempt.ValidationError, + }) + } + return artifacts +} + +func writeFailureAttempt(outputDir string, attempt int, err error, metrics attemptRuntimeMetrics, context failureAttemptContext, artifacts []rawAttemptArtifact) string { + report := failureAttemptReport{ + Attempt: attempt, + Error: err.Error(), + ValidationError: validationErrorFromGenerateError(err), + RuntimeMetrics: metrics, + Context: context, + RawOutputs: artifacts, + } + path := filepath.Join(outputDir, fmt.Sprintf("failure_attempt_%d.json", attempt)) + writeJSON(path, report) + return path +} + +func validationErrorFromGenerateError(err error) string { + var unusable *draftapp.UnusableDraftError + if errors.As(err, &unusable) && unusable.Err != nil { + return unusable.Err.Error() + } + return "" +} + +func finalFirstChunkMs(firstChunk time.Duration) int64 { + if firstChunk <= 0 { + return 0 + } + return firstChunk.Milliseconds() +} + +func sanitizeArtifactPart(value string) string { + value = strings.TrimSpace(strings.ToLower(value)) + var builder strings.Builder + for _, r := range value { + if (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '-' || r == '_' { + builder.WriteRune(r) + continue + } + builder.WriteByte('_') + } + return strings.Trim(builder.String(), "_") +} + func readJSON(path string, out any) { encoded, err := os.ReadFile(path) if err != nil { diff --git a/cmd/scenario/draft_generation/main_test.go b/cmd/scenario/draft_generation/main_test.go new file mode 100644 index 0000000..5e938d1 --- /dev/null +++ b/cmd/scenario/draft_generation/main_test.go @@ -0,0 +1,93 @@ +package main + +import ( + "encoding/json" + "errors" + "os" + "path/filepath" + "testing" + + draftapp "github.com/teradakousuke/note_maker/internal/application/draft" +) + +func TestWriteFailureAttemptPreservesRawOutputsAndRuntimeMetrics(t *testing.T) { + outputDir := t.TempDir() + generateErr := &draftapp.UnusableDraftError{ + FormatID: "zenn_article", + Err: errors.New("zenn article must use :::message, not Qiita :::note"), + Attempts: []draftapp.GenerationAttempt{ + { + Index: 1, + Kind: "initial", + RawOutput: "---\ntitle: \"T\"\nemoji: \"📝\"\ntype: \"tech\"\ntopics: [\"go\"]\npublished: false\n---\n\n:::note info\nwrong\n:::", + ValidationError: "zenn article must use :::message, not Qiita :::note", + }, + { + Index: 2, + Kind: "format_repair", + RawOutput: "---\ntitle: \"T\"\nemoji: \"📝\"\ntype: \"tech\"\ntopics: [\"go\"]\npublished: false\n---\n\n:::note warn\nstill wrong\n:::", + ValidationError: "zenn article must use :::message, not Qiita :::note", + }, + }, + } + metrics := attemptRuntimeMetrics{ + ElapsedSeconds: 1.25, + TimeoutSeconds: 30, + Streaming: true, + FirstChunkMs: 120, + Chunks: 3, + } + context := failureAttemptContext{ + LLMBaseURL: "http://evo-x2.tailb30e58.ts.net/v1", + LLMModel: "gemma4:31b", + VerifyModel: "gemma4:latest", + PersonaID: "cloudia", + OutputFormatID: "zenn_article", + } + + artifacts := writeRawAttemptArtifacts(outputDir, 2, generationAttemptsFromError(generateErr)) + failurePath := writeFailureAttempt(outputDir, 2, generateErr, metrics, context, artifacts) + + if len(artifacts) != 2 { + t.Fatalf("artifacts = %#v, want 2", artifacts) + } + for _, artifact := range artifacts { + content, err := os.ReadFile(artifact.Path) + if err != nil { + t.Fatalf("read raw artifact %s: %v", artifact.Path, err) + } + if len(content) == 0 { + t.Fatalf("raw artifact %s was empty", artifact.Path) + } + } + if _, err := os.Stat(filepath.Join(outputDir, "raw_attempt_2_generation_1_initial.txt")); err != nil { + t.Fatalf("missing initial raw artifact: %v", err) + } + if _, err := os.Stat(filepath.Join(outputDir, "raw_attempt_2_generation_2_format_repair.txt")); err != nil { + t.Fatalf("missing repair raw artifact: %v", err) + } + + encoded, err := os.ReadFile(failurePath) + if err != nil { + t.Fatalf("read failure artifact: %v", err) + } + var report failureAttemptReport + if err := json.Unmarshal(encoded, &report); err != nil { + t.Fatalf("decode failure artifact: %v", err) + } + if report.Attempt != 2 { + t.Fatalf("attempt = %d, want 2", report.Attempt) + } + if report.ValidationError != "zenn article must use :::message, not Qiita :::note" { + t.Fatalf("validation error = %q", report.ValidationError) + } + if report.RuntimeMetrics.ElapsedSeconds != 1.25 || report.RuntimeMetrics.FirstChunkMs != 120 || report.RuntimeMetrics.Chunks != 3 { + t.Fatalf("runtime metrics not preserved: %#v", report.RuntimeMetrics) + } + if report.Context.LLMBaseURL != context.LLMBaseURL || report.Context.LLMModel != context.LLMModel || report.Context.OutputFormatID != context.OutputFormatID { + t.Fatalf("runtime context not preserved: %#v", report.Context) + } + if len(report.RawOutputs) != 2 || report.RawOutputs[0].ValidationError == "" { + t.Fatalf("raw output metadata not preserved: %#v", report.RawOutputs) + } +} diff --git a/cmd/scenario/interview_template/main.go b/cmd/scenario/interview_template/main.go new file mode 100644 index 0000000..59f4507 --- /dev/null +++ b/cmd/scenario/interview_template/main.go @@ -0,0 +1,602 @@ +package main + +import ( + "context" + "encoding/json" + "fmt" + "os" + "path/filepath" + "strings" + + briefapp "github.com/teradakousuke/note_maker/internal/application/brief" + briefdomain "github.com/teradakousuke/note_maker/internal/domain/brief" + outputformat "github.com/teradakousuke/note_maker/internal/domain/format" + personadomain "github.com/teradakousuke/note_maker/internal/domain/persona" +) + +const defaultOutputDir = "tmp/interview_template" + +type scenarioReport struct { + GeneratedBy string `json:"generated_by"` + OfflineOnly bool `json:"offline_only"` + Cases []caseResult `json:"cases"` + RequiredBriefFields []string `json:"required_brief_fields"` + ExpectedDeepDiveTarget []string `json:"expected_deep_dive_target_order"` + QuestionCoverage map[string]int `json:"question_coverage"` + BriefCoverage map[string]int `json:"brief_coverage"` + Artifacts map[string]string `json:"artifacts"` +} + +type casePlan struct { + ID string + PersonaID string + OutputFormatID string +} + +type caseResult struct { + ID string `json:"id"` + PersonaID string `json:"persona_id"` + PersonaDisplayName string `json:"persona_display_name"` + OutputFormatID string `json:"output_format_id"` + OutputFormatName string `json:"output_format_name"` + SessionID string `json:"session_id"` + QuestionCount int `json:"question_count"` + RequiredQuestionIDs []string `json:"required_question_ids"` + OptionalQuestionIDs []string `json:"optional_question_ids"` + ExtensionQuestionIDs []string `json:"extension_question_ids"` + CustomAnswerIDs []string `json:"custom_answer_ids"` + DeepDiveTargetIDs []string `json:"deep_dive_target_ids"` + DeepDiveCount int `json:"deep_dive_count"` + TemplateChecks []checkResult `json:"template_checks"` + BriefChecks []checkResult `json:"brief_checks"` + QuestionTemplate []questionEntry `json:"question_template"` + BriefPath string `json:"brief_path"` + SessionPath string `json:"session_path"` +} + +type questionEntry struct { + ID string `json:"id"` + Text string `json:"text"` + Required bool `json:"required"` + TargetField string `json:"target_field"` +} + +type checkResult struct { + Name string `json:"name"` + Passed bool `json:"passed"` + Detail string `json:"detail,omitempty"` +} + +func main() { + outputDir := envOrDefault("SCENARIO_OUTPUT_DIR", defaultOutputDir) + report, err := runScenario(context.Background(), outputDir) + if err != nil { + fatalf("%v", err) + } + + fmt.Printf("interview template scenario completed\n") + fmt.Printf("offline_only=%v\n", report.OfflineOnly) + fmt.Printf("cases=%d\n", len(report.Cases)) + fmt.Printf("question_templates=%d\n", report.QuestionCoverage["cases"]) + fmt.Printf("briefs=%d\n", report.BriefCoverage["cases"]) + fmt.Printf("report=%s\n", report.Artifacts["report"]) + fmt.Printf("cases_markdown=%s\n", report.Artifacts["cases_markdown"]) +} + +func runScenario(ctx context.Context, outputDir string) (scenarioReport, error) { + outputDir = strings.TrimSpace(outputDir) + if outputDir == "" { + outputDir = defaultOutputDir + } + briefDir := filepath.Join(outputDir, "briefs") + sessionDir := filepath.Join(outputDir, "sessions") + for _, dir := range []string{outputDir, briefDir, sessionDir} { + if err := os.MkdirAll(dir, 0o755); err != nil { + return scenarioReport{}, fmt.Errorf("create output dir %s: %w", dir, err) + } + } + + personas := personadomain.DefaultRegistry() + formats := outputformat.DefaultRegistry() + service := briefapp.NewInterviewService(nil) + + results := make([]caseResult, 0) + for _, plan := range scenarioPlans(personas.List(), formats.List()) { + result, err := runCase(ctx, service, personas, formats, plan, briefDir, sessionDir) + if err != nil { + return scenarioReport{}, err + } + results = append(results, result) + } + if len(results) == 0 { + return scenarioReport{}, fmt.Errorf("no interview template cases were generated") + } + + reportPath := filepath.Join(outputDir, "report.json") + casesPath := filepath.Join(outputDir, "cases.md") + report := scenarioReport{ + GeneratedBy: "cmd/scenario/interview_template", + OfflineOnly: true, + Cases: results, + RequiredBriefFields: requiredBriefFields(), + ExpectedDeepDiveTarget: expectedDeepDiveTargets(), + QuestionCoverage: questionCoverage(results), + BriefCoverage: briefCoverage(results), + Artifacts: map[string]string{ + "report": reportPath, + "cases_markdown": casesPath, + "briefs": briefDir, + "sessions": sessionDir, + }, + } + if err := writeJSON(reportPath, report); err != nil { + return scenarioReport{}, err + } + if err := writeFile(casesPath, casesMarkdown(report)); err != nil { + return scenarioReport{}, err + } + return report, nil +} + +func scenarioPlans(personas []personadomain.Persona, formats []outputformat.OutputFormat) []casePlan { + plans := make([]casePlan, 0, len(personas)*len(formats)) + for _, persona := range personas { + for _, format := range formats { + plans = append(plans, casePlan{ + ID: persona.ID + "_" + format.ID, + PersonaID: persona.ID, + OutputFormatID: format.ID, + }) + } + } + return plans +} + +func runCase(ctx context.Context, service *briefapp.InterviewService, personas personadomain.Registry, formats outputformat.Registry, plan casePlan, briefDir, sessionDir string) (caseResult, error) { + persona, ok := personas.Get(plan.PersonaID) + if !ok { + return caseResult{}, fmt.Errorf("%s references unknown persona %s", plan.ID, plan.PersonaID) + } + format, ok := formats.Get(plan.OutputFormatID) + if !ok { + return caseResult{}, fmt.Errorf("%s references unknown output format %s", plan.ID, plan.OutputFormatID) + } + sessionID := "interview_template_" + plan.ID + result, err := service.StartSession(briefapp.StartSessionInput{ + SessionID: sessionID, + StyleProfileID: "style_interview_template", + PersonaID: plan.PersonaID, + OutputFormatID: plan.OutputFormatID, + }) + if err != nil { + return caseResult{}, fmt.Errorf("%s start session: %w", plan.ID, err) + } + + for !result.Completed { + if result.NextQuestion == nil { + return caseResult{}, fmt.Errorf("%s has no next question before completion", plan.ID) + } + question := *result.NextQuestion + answer := scriptedAnswer(plan, persona, format, question) + result, err = service.Answer(ctx, result.Session, answer) + if err != nil { + return caseResult{}, fmt.Errorf("%s answer %s: %w", plan.ID, question.ID, err) + } + } + if result.Brief == nil { + return caseResult{}, fmt.Errorf("%s completed without an article brief", plan.ID) + } + + briefPath := filepath.Join(briefDir, plan.ID+".json") + sessionPath := filepath.Join(sessionDir, plan.ID+".json") + if err := writeJSON(briefPath, result.Brief); err != nil { + return caseResult{}, err + } + if err := writeJSON(sessionPath, result.Session); err != nil { + return caseResult{}, err + } + + requiredIDs, optionalIDs := splitRequiredQuestionIDs(result.Session.Questions) + customIDs := customAnswerIDs(result.Brief.CustomAnswers) + deepDiveTargets := deepDiveTargetIDs(result.Brief.DeepDives) + return caseResult{ + ID: plan.ID, + PersonaID: persona.ID, + PersonaDisplayName: persona.DisplayName, + OutputFormatID: format.ID, + OutputFormatName: format.DisplayName, + SessionID: result.Session.ID, + QuestionCount: len(result.Session.Questions), + RequiredQuestionIDs: requiredIDs, + OptionalQuestionIDs: optionalIDs, + ExtensionQuestionIDs: extensionQuestionIDs(result.Session.Questions), + CustomAnswerIDs: customIDs, + DeepDiveTargetIDs: deepDiveTargets, + DeepDiveCount: len(result.Brief.DeepDives), + TemplateChecks: templateChecks(plan, result.Session.Questions), + BriefChecks: briefChecks(plan, *result.Brief, customIDs, deepDiveTargets), + QuestionTemplate: questionTemplate(result.Session.Questions), + BriefPath: briefPath, + SessionPath: sessionPath, + }, nil +} + +func templateChecks(plan casePlan, questions []briefdomain.ArticleQuestion) []checkResult { + checks := []checkResult{ + check("unique_question_ids", uniqueQuestionIDs(questions), ""), + check("base_questions_present", containsAllQuestionIDs(questions, baseQuestionIDs()), ""), + } + for _, id := range expectedExtensionQuestionIDs(plan.PersonaID, plan.OutputFormatID) { + checks = append(checks, check("extension_"+id, containsQuestionID(questions, id), "")) + } + return checks +} + +func briefChecks(plan casePlan, brief briefdomain.ArticleBrief, customIDs, deepDiveTargets []string) []checkResult { + checks := []checkResult{ + check("persona_id", brief.PersonaID == plan.PersonaID, brief.PersonaID), + check("output_format_id", brief.OutputFormatID == plan.OutputFormatID, brief.OutputFormatID), + check("required_fields", hasRequiredBriefFields(brief), ""), + check("deep_dive_count", len(brief.DeepDives) == briefdomain.MaxTotalFollowUps, fmt.Sprintf("%d", len(brief.DeepDives))), + check("deep_dive_targets", strings.Join(deepDiveTargets, ",") == strings.Join(expectedDeepDiveTargets(), ","), strings.Join(deepDiveTargets, ",")), + } + for _, id := range expectedExtensionQuestionIDs(plan.PersonaID, plan.OutputFormatID) { + checks = append(checks, check("custom_answer_"+id, containsString(customIDs, id), "")) + } + return checks +} + +func scriptedAnswer(plan casePlan, persona personadomain.Persona, format outputformat.OutputFormat, question briefdomain.ArticleQuestion) string { + if question.FlowType == briefdomain.QuestionFlowDeepDiveFollowUp { + return deepDiveAnswer(question) + } + switch question.ID { + case briefdomain.QuestionIDTheme: + return fmt.Sprintf("%s向けに%sの発信テンプレートを検証する", format.DisplayName, persona.DisplayName) + case briefdomain.QuestionIDOpeningEpisode: + return "ライブ生成の前に、質問テンプレートだけを固定入力で確認した場面から始める" + case briefdomain.QuestionIDReader: + return "Evo X2で媒体別の記事生成を試す開発者とレビュー担当者" + case briefdomain.QuestionIDReaderProblem: + return "生成前に、媒体や人格ごとの聞き取り内容が混ざっていないか判断しづらい" + case briefdomain.QuestionIDExpectedReaderAction: + return "ライブ下書き生成の前に、このオフラインシナリオを必ず通す" + case briefdomain.QuestionIDKeyTakeaway: + return "テンプレートとブリーフを先に固定すると、LLM評価の前提が揃う" + case briefdomain.QuestionIDMustInclude: + return "ペルソナID、出力形式ID、追加質問、深掘り回答、完成ArticleBriefの保存先" + case briefdomain.QuestionIDConcreteExample: + return "てりすけのnote、クラウディアのZenn、会社ブログ、Qiita、HTMLセクションを同じ規則で確認する" + case briefdomain.QuestionIDEvidence: + return "10ケース、全ケース4件の深掘り、必須フィールドと追加回答の存在をJSONで記録する" + case briefdomain.QuestionIDPersonalContext: + return "媒体別のライブ実測に入る前に、入力条件の揺れをなくしたい" + case briefdomain.QuestionIDExclusions: + return "ネットワークアクセス、LLM呼び出し、実下書き生成、実測値の比較" + case briefdomain.QuestionIDTargetLengthStructure: + return "1200字相当。導入、確認対象、ケース、検証結果、次のライブ実行条件で構成する" + case briefdomain.QuestionIDToneStance: + return persona.PromptHint() + case briefdomain.QuestionIDTitleKeywords: + return "interview template, ArticleBrief, Evo X2 preflight" + case briefdomain.QuestionIDStoryArc: + return "違和感から始め、固定入力で確認し、ライブ生成へ進む順番にする" + case briefdomain.QuestionIDTargetStack: + return "Go 1.23、cmd/scenario/interview_template、internal/domain/brief" + case briefdomain.QuestionIDPrerequisiteKnowledge: + return "GoのテストとJSON成果物を読める開発者を前提にする" + case briefdomain.QuestionIDTechnicalProof: + return "go testとgo runの成功、report.jsonのチェック結果を根拠にする" + case briefdomain.QuestionIDCodeExamples: + return "必要ならgo run ./cmd/scenario/interview_templateの実行例だけ載せる" + case briefdomain.QuestionIDReferences: + return "docs/validation/issue-70-interview-template-scenario-2026-05-03.md" + case briefdomain.QuestionIDCorBlogPurpose: + return "技術知見の報告として、ライブ測定の前提条件を社内外に共有する" + case briefdomain.QuestionIDCorBlogNextAction: + return "ライブ媒体マトリクスを実行する前にオフラインpreflightを確認してほしい" + case briefdomain.QuestionIDHomepageCTA: + return "検証済みブリーフを確認してからライブ生成へ進む" + case briefdomain.QuestionIDHomepageTrust: + return "全ケースをオフラインで再現でき、LLMやネットワークに依存しない" + case briefdomain.QuestionIDCloudiaViewpoint: + return "媒体ごとの質問が切り替わる様子を、初心者にも楽しく見える確認として扱う" + default: + return "この質問はシナリオの固定回答で検証する" + } +} + +func deepDiveAnswer(question briefdomain.ArticleQuestion) string { + switch question.TargetQuestionID { + case briefdomain.QuestionIDOpeningEpisode: + if question.FollowUpIndex == 1 { + return "最初に見せたいのは、ライブ実行前でも全ケースの質問とブリーフが揃う画面" + } + return "その時の気持ちは、測る前に入力を固められて安心したという感覚" + case briefdomain.QuestionIDMustInclude: + if question.FollowUpIndex == 1 { + return "特に詳しく説明したいのは、追加質問がCustomAnswersへ残ること" + } + return "根拠としてreport.jsonと各brief JSONを残す" + default: + return "記事に足す具体情報として、オフラインで再実行できるコマンドを入れる" + } +} + +func expectedExtensionQuestionIDs(personaID, formatID string) []string { + ids := make([]string, 0) + switch formatID { + case outputformat.IDNoteArticle: + ids = append(ids, briefdomain.QuestionIDStoryArc) + case outputformat.IDMarkdownBlog: + ids = append(ids, technicalQuestionIDs()...) + ids = append(ids, briefdomain.QuestionIDCorBlogPurpose, briefdomain.QuestionIDCorBlogNextAction) + case outputformat.IDZennArticle, outputformat.IDQiitaArticle: + ids = append(ids, technicalQuestionIDs()...) + case outputformat.IDHomepageSection: + ids = append(ids, briefdomain.QuestionIDHomepageCTA, briefdomain.QuestionIDHomepageTrust) + } + if personaID == personadomain.IDCloudia { + ids = append(ids, briefdomain.QuestionIDCloudiaViewpoint) + } + return ids +} + +func technicalQuestionIDs() []string { + return []string{ + briefdomain.QuestionIDTargetStack, + briefdomain.QuestionIDPrerequisiteKnowledge, + briefdomain.QuestionIDTechnicalProof, + briefdomain.QuestionIDCodeExamples, + briefdomain.QuestionIDReferences, + } +} + +func baseQuestionIDs() []string { + questions := briefdomain.FixedQuestions() + ids := make([]string, 0, len(questions)) + for _, question := range questions { + ids = append(ids, question.ID) + } + return ids +} + +func expectedDeepDiveTargets() []string { + return []string{ + briefdomain.QuestionIDOpeningEpisode, + briefdomain.QuestionIDOpeningEpisode, + briefdomain.QuestionIDMustInclude, + briefdomain.QuestionIDMustInclude, + } +} + +func requiredBriefFields() []string { + return []string{ + "style_profile_id", + "persona_id", + "output_format_id", + "theme", + "opening_episode", + "reader", + "expected_reader_action", + "must_include", + "personal_context", + "target_length_structure", + "tone_stance", + } +} + +func hasRequiredBriefFields(brief briefdomain.ArticleBrief) bool { + values := []string{ + brief.StyleProfileID, + brief.PersonaID, + brief.OutputFormatID, + brief.Theme, + brief.OpeningEpisode, + brief.Reader, + brief.ExpectedReaderAction, + brief.MustInclude, + brief.PersonalContext, + brief.TargetLengthStructure, + brief.ToneStance, + } + for _, value := range values { + if strings.TrimSpace(value) == "" { + return false + } + } + return true +} + +func splitRequiredQuestionIDs(questions []briefdomain.ArticleQuestion) ([]string, []string) { + required := make([]string, 0) + optional := make([]string, 0) + for _, question := range questions { + if question.FlowType != briefdomain.QuestionFlowMain { + continue + } + if question.Required { + required = append(required, question.ID) + continue + } + optional = append(optional, question.ID) + } + return required, optional +} + +func extensionQuestionIDs(questions []briefdomain.ArticleQuestion) []string { + base := map[string]bool{} + for _, id := range baseQuestionIDs() { + base[id] = true + } + ids := make([]string, 0) + for _, question := range questions { + if question.FlowType == briefdomain.QuestionFlowMain && !base[question.ID] { + ids = append(ids, question.ID) + } + } + return ids +} + +func questionTemplate(questions []briefdomain.ArticleQuestion) []questionEntry { + entries := make([]questionEntry, 0, len(questions)) + for _, question := range questions { + if question.FlowType != briefdomain.QuestionFlowMain { + continue + } + entries = append(entries, questionEntry{ + ID: question.ID, + Text: question.Text, + Required: question.Required, + TargetField: question.TargetField, + }) + } + return entries +} + +func customAnswerIDs(answers []briefdomain.BriefAnswer) []string { + ids := make([]string, 0, len(answers)) + for _, answer := range answers { + ids = append(ids, answer.QuestionID) + } + return ids +} + +func deepDiveTargetIDs(answers []briefdomain.BriefAnswer) []string { + ids := make([]string, 0, len(answers)) + for _, answer := range answers { + ids = append(ids, answer.TargetQuestionID) + } + return ids +} + +func questionCoverage(results []caseResult) map[string]int { + coverage := map[string]int{"cases": len(results)} + for _, result := range results { + for _, question := range result.QuestionTemplate { + coverage[question.ID]++ + } + } + return coverage +} + +func briefCoverage(results []caseResult) map[string]int { + coverage := map[string]int{"cases": len(results)} + for _, result := range results { + if allChecksPassed(result.BriefChecks) { + coverage["passing_briefs"]++ + } + if result.DeepDiveCount == briefdomain.MaxTotalFollowUps { + coverage["max_deep_dive_briefs"]++ + } + } + return coverage +} + +func casesMarkdown(report scenarioReport) string { + var builder strings.Builder + builder.WriteString("# Interview template scenario\n\n") + builder.WriteString("- Generated by: `cmd/scenario/interview_template`\n") + builder.WriteString(fmt.Sprintf("- Offline only: `%v`\n", report.OfflineOnly)) + builder.WriteString(fmt.Sprintf("- Cases: `%d`\n\n", len(report.Cases))) + builder.WriteString("| Case | Persona | Format | Questions | Custom answers | Deep dives | Checks | Brief |\n") + builder.WriteString("|---|---|---|---:|---:|---:|---|---|\n") + for _, result := range report.Cases { + builder.WriteString(fmt.Sprintf("| `%s` | `%s` | `%s` | %d | %d | %d | %s | `%s` |\n", + result.ID, + result.PersonaID, + result.OutputFormatID, + result.QuestionCount, + len(result.CustomAnswerIDs), + result.DeepDiveCount, + checkStatus(result), + result.BriefPath, + )) + } + return builder.String() +} + +func checkStatus(result caseResult) string { + if allChecksPassed(result.TemplateChecks) && allChecksPassed(result.BriefChecks) { + return "passed" + } + return "failed" +} + +func allChecksPassed(checks []checkResult) bool { + for _, item := range checks { + if !item.Passed { + return false + } + } + return true +} + +func check(name string, passed bool, detail string) checkResult { + return checkResult{Name: name, Passed: passed, Detail: detail} +} + +func uniqueQuestionIDs(questions []briefdomain.ArticleQuestion) bool { + seen := map[string]bool{} + for _, question := range questions { + if seen[question.ID] { + return false + } + seen[question.ID] = true + } + return true +} + +func containsAllQuestionIDs(questions []briefdomain.ArticleQuestion, ids []string) bool { + for _, id := range ids { + if !containsQuestionID(questions, id) { + return false + } + } + return true +} + +func containsQuestionID(questions []briefdomain.ArticleQuestion, id string) bool { + for _, question := range questions { + if question.ID == id { + return true + } + } + return false +} + +func containsString(values []string, want string) bool { + for _, value := range values { + if value == want { + return true + } + } + return false +} + +func writeJSON(path string, value any) error { + encoded, err := json.MarshalIndent(value, "", " ") + if err != nil { + return fmt.Errorf("encode %s: %w", path, err) + } + return writeFile(path, string(encoded)+"\n") +} + +func writeFile(path, content string) error { + if err := os.WriteFile(path, []byte(content), 0o644); err != nil { + return fmt.Errorf("write %s: %w", path, err) + } + return nil +} + +func envOrDefault(key, fallback string) string { + if value := strings.TrimSpace(os.Getenv(key)); value != "" { + return value + } + return fallback +} + +func fatalf(format string, args ...any) { + fmt.Fprintf(os.Stderr, format+"\n", args...) + os.Exit(1) +} diff --git a/cmd/scenario/interview_template/main_test.go b/cmd/scenario/interview_template/main_test.go new file mode 100644 index 0000000..526c9ff --- /dev/null +++ b/cmd/scenario/interview_template/main_test.go @@ -0,0 +1,106 @@ +package main + +import ( + "context" + "encoding/json" + "os" + "path/filepath" + "testing" + + briefdomain "github.com/teradakousuke/note_maker/internal/domain/brief" + outputformat "github.com/teradakousuke/note_maker/internal/domain/format" + personadomain "github.com/teradakousuke/note_maker/internal/domain/persona" +) + +func TestRunScenarioCoversAllPersonaFormatTemplates(t *testing.T) { + outputDir := t.TempDir() + report, err := runScenario(context.Background(), outputDir) + if err != nil { + t.Fatalf("run scenario: %v", err) + } + + wantCases := len(personadomain.DefaultRegistry().List()) * len(outputformat.DefaultRegistry().List()) + if len(report.Cases) != wantCases { + t.Fatalf("cases = %d, want %d", len(report.Cases), wantCases) + } + if !report.OfflineOnly { + t.Fatal("scenario must remain offline-only") + } + for _, result := range report.Cases { + if result.QuestionCount < len(briefdomain.FixedQuestions()) { + t.Fatalf("%s question count = %d", result.ID, result.QuestionCount) + } + if !allChecksPassed(result.TemplateChecks) { + t.Fatalf("%s template checks failed: %#v", result.ID, result.TemplateChecks) + } + if !allChecksPassed(result.BriefChecks) { + t.Fatalf("%s brief checks failed: %#v", result.ID, result.BriefChecks) + } + if result.DeepDiveCount != briefdomain.MaxTotalFollowUps { + t.Fatalf("%s deep dives = %d, want %d", result.ID, result.DeepDiveCount, briefdomain.MaxTotalFollowUps) + } + if _, err := os.Stat(result.BriefPath); err != nil { + t.Fatalf("%s brief artifact: %v", result.ID, err) + } + if _, err := os.Stat(result.SessionPath); err != nil { + t.Fatalf("%s session artifact: %v", result.ID, err) + } + } +} + +func TestScenarioWritesSimulatedArticleBriefs(t *testing.T) { + outputDir := t.TempDir() + report, err := runScenario(context.Background(), outputDir) + if err != nil { + t.Fatalf("run scenario: %v", err) + } + + target := findCase(t, report.Cases, personadomain.IDCloudia+"_"+outputformat.IDQiitaArticle) + encoded, err := os.ReadFile(target.BriefPath) + if err != nil { + t.Fatalf("read brief: %v", err) + } + var brief briefdomain.ArticleBrief + if err := json.Unmarshal(encoded, &brief); err != nil { + t.Fatalf("decode brief: %v", err) + } + if brief.PersonaID != personadomain.IDCloudia { + t.Fatalf("persona = %q", brief.PersonaID) + } + if brief.OutputFormatID != outputformat.IDQiitaArticle { + t.Fatalf("format = %q", brief.OutputFormatID) + } + assertAnswerPresent(t, brief.CustomAnswers, briefdomain.QuestionIDTargetStack) + assertAnswerPresent(t, brief.CustomAnswers, briefdomain.QuestionIDCodeExamples) + assertAnswerPresent(t, brief.CustomAnswers, briefdomain.QuestionIDCloudiaViewpoint) + if len(brief.DeepDives) != briefdomain.MaxTotalFollowUps { + t.Fatalf("deep dives = %d, want %d", len(brief.DeepDives), briefdomain.MaxTotalFollowUps) + } + if _, err := os.Stat(filepath.Join(outputDir, "report.json")); err != nil { + t.Fatalf("report artifact: %v", err) + } + if _, err := os.Stat(filepath.Join(outputDir, "cases.md")); err != nil { + t.Fatalf("cases markdown artifact: %v", err) + } +} + +func findCase(t *testing.T, cases []caseResult, id string) caseResult { + t.Helper() + for _, item := range cases { + if item.ID == id { + return item + } + } + t.Fatalf("case %s not found", id) + return caseResult{} +} + +func assertAnswerPresent(t *testing.T, answers []briefdomain.BriefAnswer, questionID string) { + t.Helper() + for _, answer := range answers { + if answer.QuestionID == questionID && answer.Content != "" { + return + } + } + t.Fatalf("answer %s not found in %#v", questionID, answers) +} diff --git a/cmd/scenario/live_media_matrix/main.go b/cmd/scenario/live_media_matrix/main.go index a5c5042..967ec06 100644 --- a/cmd/scenario/live_media_matrix/main.go +++ b/cmd/scenario/live_media_matrix/main.go @@ -23,16 +23,17 @@ type matrixOutput struct { } type matrixCase struct { - ID string `json:"id"` - PersonaID string `json:"persona_id"` - OutputFormatID string `json:"output_format_id"` - Medium string `json:"medium"` - Style string `json:"style"` - Theme string `json:"theme"` - TargetLengthStructure string `json:"target_length_structure"` - SourceSelectors []string `json:"source_selectors"` - BriefPath string `json:"brief_path"` - PromptPath string `json:"prompt_path"` + ID string `json:"id"` + PersonaID string `json:"persona_id"` + OutputFormatID string `json:"output_format_id"` + Medium string `json:"medium"` + Style string `json:"style"` + Theme string `json:"theme"` + TargetLengthStructure string `json:"target_length_structure"` + SourceSelectors []string `json:"source_selectors"` + BriefPath string `json:"brief_path"` + PromptPath string `json:"prompt_path"` + ActiveGates scenarioGates `json:"active_gates"` } type aggregateReport struct { @@ -44,32 +45,44 @@ type aggregateReport struct { } type resultRow struct { - CaseID string `json:"case_id"` - Medium string `json:"medium"` - Style string `json:"style"` - PersonaID string `json:"persona_id"` - OutputFormatID string `json:"output_format_id"` - Theme string `json:"theme"` - TargetLengthStructure string `json:"target_length_structure"` - SourceSelectors []string `json:"source_selectors"` - Status string `json:"status"` - ElapsedSeconds float64 `json:"elapsed_seconds,omitempty"` - FirstChunkMS int `json:"first_chunk_ms,omitempty"` - Chunks int `json:"chunks,omitempty"` - Score float64 `json:"score,omitempty"` - Runes int `json:"runes,omitempty"` - Passed bool `json:"passed,omitempty"` - ScenarioPassed bool `json:"scenario_passed,omitempty"` - VerificationPerformed bool `json:"verification_performed,omitempty"` - VerificationPassed bool `json:"verification_passed,omitempty"` - LLMBaseURL string `json:"llm_base_url,omitempty"` - LLMModel string `json:"llm_model,omitempty"` - VerifyModel string `json:"verify_model,omitempty"` - OutputDir string `json:"output_dir"` - DraftPath string `json:"draft_path,omitempty"` - EvaluationPath string `json:"evaluation_path,omitempty"` - VerificationPath string `json:"verification_path,omitempty"` - Error string `json:"error,omitempty"` + CaseID string `json:"case_id"` + Medium string `json:"medium"` + Style string `json:"style"` + PersonaID string `json:"persona_id"` + OutputFormatID string `json:"output_format_id"` + Theme string `json:"theme"` + TargetLengthStructure string `json:"target_length_structure"` + SourceSelectors []string `json:"source_selectors"` + Status string `json:"status"` + ActiveGates scenarioGates `json:"active_gates"` + ElapsedSeconds float64 `json:"elapsed_seconds,omitempty"` + FirstChunkMS int `json:"first_chunk_ms,omitempty"` + Chunks int `json:"chunks,omitempty"` + Score float64 `json:"score,omitempty"` + MinStyleScore float64 `json:"min_style_score,omitempty"` + Runes int `json:"runes,omitempty"` + MinRunes int `json:"min_runes,omitempty"` + Passed bool `json:"passed,omitempty"` + ScenarioPassed bool `json:"scenario_passed,omitempty"` + VerificationPerformed bool `json:"verification_performed,omitempty"` + VerificationPassed bool `json:"verification_passed,omitempty"` + LLMBaseURL string `json:"llm_base_url,omitempty"` + LLMModel string `json:"llm_model,omitempty"` + VerifyModel string `json:"verify_model,omitempty"` + OutputDir string `json:"output_dir"` + DraftPath string `json:"draft_path,omitempty"` + EvaluationPath string `json:"evaluation_path,omitempty"` + VerificationPath string `json:"verification_path,omitempty"` + FailurePath string `json:"failure_path,omitempty"` + RawOutputPaths []string `json:"raw_output_paths,omitempty"` + Error string `json:"error,omitempty"` +} + +type scenarioGates struct { + MinRunes int `json:"min_runes"` + MinStyleScore float64 `json:"min_style_score"` + StructuralGateLabels []string `json:"structural_gate_labels"` + StructuralSignals []string `json:"structural_signals"` } func main() { @@ -144,6 +157,9 @@ func plannedRow(item matrixCase, outputDir string) resultRow { TargetLengthStructure: item.TargetLengthStructure, SourceSelectors: append([]string(nil), item.SourceSelectors...), Status: "planned", + ActiveGates: item.ActiveGates, + MinStyleScore: item.ActiveGates.MinStyleScore, + MinRunes: item.ActiveGates.MinRunes, OutputDir: outputDir, } } @@ -156,6 +172,12 @@ func runCase(item matrixCase, outputDir string) resultRow { row.DraftPath = filepath.Join(outputDir, "draft.md") row.EvaluationPath = filepath.Join(outputDir, "evaluation.json") row.VerificationPath = filepath.Join(outputDir, "verification.json") + applyRunMetrics(&row, readKeyValuesFile(filepath.Join(outputDir, "stdout.txt")), item.ActiveGates) + return row + } + if err := os.RemoveAll(outputDir); err != nil { + row.Status = "failed" + row.Error = err.Error() return row } if err := os.MkdirAll(outputDir, 0o755); err != nil { @@ -165,11 +187,7 @@ func runCase(item matrixCase, outputDir string) resultRow { } cmd := exec.Command("go", "run", "./cmd/scenario/draft_generation") - cmd.Env = append(os.Environ(), - "RUN_LOCAL_LLM_SCENARIO=1", - "ARTICLE_BRIEF_PATH="+item.BriefPath, - "SCENARIO_OUTPUT_DIR="+outputDir, - ) + cmd.Env = draftGenerationEnv(item, outputDir) if os.Getenv("SCENARIO_STREAM_DRAFT") == "" { cmd.Env = append(cmd.Env, "SCENARIO_STREAM_DRAFT=1") } @@ -180,12 +198,31 @@ func runCase(item matrixCase, outputDir string) resultRow { writeFile(filepath.Join(outputDir, "stdout.txt"), stdout.String()) writeFile(filepath.Join(outputDir, "stderr.txt"), stderr.String()) - values := parseKeyValues(stdout.String()) + applyRunMetrics(&row, parseKeyValues(stdout.String()), item.ActiveGates) + if err != nil { + row.Status = "failed" + row.Error = strings.TrimSpace(stderr.String()) + if row.Error == "" { + row.Error = err.Error() + } + applyFailureArtifacts(&row, outputDir) + return row + } + row.Status = "passed" + return row +} + +func applyRunMetrics(row *resultRow, values map[string]string, gates scenarioGates) { + if len(values) == 0 { + return + } row.ElapsedSeconds = floatValue(values["elapsed_seconds"]) row.FirstChunkMS = intValue(values["first_chunk_ms"]) row.Chunks = intValue(values["chunks"]) row.Score = floatValue(values["score"]) + row.MinStyleScore = floatValueOrDefault(values["min_style_score"], gates.MinStyleScore) row.Runes = intValue(values["runes"]) + row.MinRunes = intValueOrDefault(values["min_draft_runes"], gates.MinRunes) row.Passed = boolValue(values["passed"]) row.ScenarioPassed = boolValue(values["scenario_passed"]) row.VerificationPerformed = boolValue(values["verification_performed"]) @@ -193,19 +230,89 @@ func runCase(item matrixCase, outputDir string) resultRow { row.LLMBaseURL = values["llm_base_url"] row.LLMModel = values["llm_model"] row.VerifyModel = values["verify_model"] - row.DraftPath = values["draft"] - row.EvaluationPath = values["evaluation"] - row.VerificationPath = values["verification"] + row.DraftPath = valueOrDefault(values["draft"], row.DraftPath) + row.EvaluationPath = valueOrDefault(values["evaluation"], row.EvaluationPath) + row.VerificationPath = valueOrDefault(values["verification"], row.VerificationPath) +} + +func draftGenerationEnv(item matrixCase, outputDir string) []string { + env := append(os.Environ(), + "RUN_LOCAL_LLM_SCENARIO=1", + "ARTICLE_BRIEF_PATH="+item.BriefPath, + "SCENARIO_OUTPUT_DIR="+outputDir, + ) + if item.ActiveGates.MinStyleScore > 0 { + env = append(env, fmt.Sprintf("SCENARIO_MIN_STYLE_SCORE=%.1f", item.ActiveGates.MinStyleScore)) + } + if item.ActiveGates.MinRunes > 0 { + env = append(env, fmt.Sprintf("SCENARIO_MIN_DRAFT_RUNES=%d", item.ActiveGates.MinRunes)) + } + return env +} + +type failureAttemptReport struct { + RuntimeMetrics attemptRuntimeMetrics `json:"runtime_metrics"` + Context failureContext `json:"context"` + RawOutputs []rawAttemptArtifact `json:"raw_outputs"` +} + +type attemptRuntimeMetrics struct { + ElapsedSeconds float64 `json:"elapsed_seconds"` + FirstChunkMs int `json:"first_chunk_ms,omitempty"` + Chunks int `json:"chunks,omitempty"` +} + +type failureContext struct { + LLMBaseURL string `json:"llm_base_url"` + LLMModel string `json:"llm_model"` + VerifyModel string `json:"verify_model"` + OutputFormatID string `json:"output_format_id"` +} + +type rawAttemptArtifact struct { + Path string `json:"path"` +} + +func applyFailureArtifacts(row *resultRow, outputDir string) { + path := latestFailureAttemptPath(outputDir) + if path == "" { + return + } + encoded, err := os.ReadFile(path) if err != nil { - row.Status = "failed" - row.Error = strings.TrimSpace(stderr.String()) - if row.Error == "" { - row.Error = err.Error() + return + } + var report failureAttemptReport + if err := json.Unmarshal(encoded, &report); err != nil { + return + } + row.FailurePath = path + if row.ElapsedSeconds == 0 { + row.ElapsedSeconds = report.RuntimeMetrics.ElapsedSeconds + } + if row.FirstChunkMS == 0 { + row.FirstChunkMS = report.RuntimeMetrics.FirstChunkMs + } + if row.Chunks == 0 { + row.Chunks = report.RuntimeMetrics.Chunks + } + row.LLMBaseURL = valueOrDefault(row.LLMBaseURL, report.Context.LLMBaseURL) + row.LLMModel = valueOrDefault(row.LLMModel, report.Context.LLMModel) + row.VerifyModel = valueOrDefault(row.VerifyModel, report.Context.VerifyModel) + for _, raw := range report.RawOutputs { + if strings.TrimSpace(raw.Path) != "" { + row.RawOutputPaths = append(row.RawOutputPaths, raw.Path) } - return row } - row.Status = "passed" - return row +} + +func latestFailureAttemptPath(outputDir string) string { + matches, err := filepath.Glob(filepath.Join(outputDir, "failure_attempt_*.json")) + if err != nil || len(matches) == 0 { + return "" + } + sort.Strings(matches) + return matches[len(matches)-1] } func parseKeyValues(output string) map[string]string { @@ -220,23 +327,34 @@ func parseKeyValues(output string) map[string]string { return values } +func readKeyValuesFile(path string) map[string]string { + content, err := os.ReadFile(path) + if err != nil { + return nil + } + return parseKeyValues(string(content)) +} + func markdownReport(report aggregateReport) string { var builder strings.Builder builder.WriteString("# Live media matrix aggregate\n\n") builder.WriteString(fmt.Sprintf("- Generated at: `%s`\n", report.GeneratedAt)) builder.WriteString(fmt.Sprintf("- Live mode: `%v`\n", report.Live)) builder.WriteString(fmt.Sprintf("- Matrix: `%s`\n\n", report.MatrixPath)) - builder.WriteString("| Case | Medium | Style | Status | Seconds | Score | Runes | Verification | Output |\n") - builder.WriteString("|---|---|---|---|---:|---:|---:|---|---|\n") + builder.WriteString("| Case | Medium | Style | Status | Gates | Seconds | Score | Runes | Verification | Output |\n") + builder.WriteString("|---|---|---|---|---|---:|---:|---:|---|---|\n") for _, row := range report.Rows { - builder.WriteString(fmt.Sprintf("| `%s` | %s | %s | %s | %.2f | %.1f | %d | %v | `%s` |\n", + builder.WriteString(fmt.Sprintf("| `%s` | %s | %s | %s | %s | %.2f | %.1f / %.1f | %d / %d | %v | `%s` |\n", row.CaseID, escapePipes(row.Medium), escapePipes(row.Style), row.Status, + escapePipes(gateSummary(row.ActiveGates)), row.ElapsedSeconds, row.Score, + row.MinStyleScore, row.Runes, + row.MinRunes, row.VerificationPassed, row.OutputDir, )) @@ -251,6 +369,18 @@ func markdownReport(report aggregateReport) string { return builder.String() } +func gateSummary(gates scenarioGates) string { + if gates.MinRunes == 0 && gates.MinStyleScore == 0 && len(gates.StructuralGateLabels) == 0 { + return "" + } + return fmt.Sprintf( + "min %.1f style / %d runes; %s", + gates.MinStyleScore, + gates.MinRunes, + strings.Join(gates.StructuralGateLabels, ", "), + ) +} + func failedRows(rows []resultRow) []resultRow { failures := make([]resultRow, 0) for _, row := range rows { @@ -302,11 +432,34 @@ func intValue(value string) int { return parsed } +func intValueOrDefault(value string, fallback int) int { + parsed, err := strconv.Atoi(value) + if err != nil { + return fallback + } + return parsed +} + func floatValue(value string) float64 { parsed, _ := strconv.ParseFloat(value, 64) return parsed } +func floatValueOrDefault(value string, fallback float64) float64 { + parsed, err := strconv.ParseFloat(value, 64) + if err != nil { + return fallback + } + return parsed +} + +func valueOrDefault(value, fallback string) string { + if strings.TrimSpace(value) == "" { + return fallback + } + return value +} + func envOrDefault(key, fallback string) string { if value := strings.TrimSpace(os.Getenv(key)); value != "" { return value diff --git a/cmd/scenario/live_media_matrix/main_test.go b/cmd/scenario/live_media_matrix/main_test.go new file mode 100644 index 0000000..9d39473 --- /dev/null +++ b/cmd/scenario/live_media_matrix/main_test.go @@ -0,0 +1,170 @@ +package main + +import ( + "encoding/json" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestPlannedRowReportsActiveGates(t *testing.T) { + item := matrixCase{ + ID: "cor_homepage_section", + Medium: "homepage", + Style: "concise product section", + PersonaID: "terisuke", + OutputFormatID: "homepage_section", + ActiveGates: scenarioGates{ + MinRunes: 350, + MinStyleScore: 72, + StructuralGateLabels: []string{"homepage_short_html", "section_element", "cta"}, + StructuralSignals: []string{"", "= 1000 { + t.Fatalf("homepage minimum runes should stay short HTML focused, got %d", homepageGates.MinRunes) + } + if !contains(homepageGates.StructuralGateLabels, "homepage_short_html") { + t.Fatalf("homepage gates missing homepage_short_html label: %v", homepageGates.StructuralGateLabels) + } + for _, signal := range []string{"= 82.0, runes >= 2800, note long-form structure | `note:cor_instrument` | +| `cor_blog_technical_report` | Cor.inc company blog | technical report | style >= 80.0, runes >= 2200, frontmatter/report/verification structure | `rss:https://cor-jp.com/rss.xml`, `github:Cor-Incorporated/corsweb2024/src/content/blog/ja` | +| `cor_blog_vision_sharing` | Cor.inc company blog | vision sharing | style >= 80.0, runes >= 1600, frontmatter/company policy structure | `rss:https://cor-jp.com/rss.xml`, `github:Cor-Incorporated/corsweb2024/src/content/blog/ja` | +| `cloudia_zenn_tutorial` | Zenn | tutorial | style >= 82.0, runes >= 1800, Zenn frontmatter/topics/message/code structure | `zenn:cloudia` | +| `cloudia_qiita_how_to` | Qiita | practical how-to | style >= 82.0, runes >= 1400, Qiita frontmatter/note/diff/repro structure | `qiita:Cloudia_Cor_Inc` | +| `cor_homepage_section` | homepage | concise product section | style >= 72.0, runes >= 350, short HTML section/h2/p/CTA structure | `rss:https://cor-jp.com/rss.xml`, `github:Cor-Incorporated/corsweb2024/src/content/blog/ja` | ## Optional live source phase @@ -112,6 +113,8 @@ Expected planned-mode artifacts: - `tmp/media_matrix/live/aggregate.json` - `tmp/media_matrix/live/aggregate.md` +Planned and live aggregate rows report the active gate object beside status, actual score, and actual runes. In live mode, those gates are passed to `cmd/scenario/draft_generation` as `SCENARIO_MIN_STYLE_SCORE` and `SCENARIO_MIN_DRAFT_RUNES`. + To execute all cases against the configured Evo X2 Tailnet OpenAI-compatible API, use: ```sh @@ -137,19 +140,37 @@ Use the `planned_llm_command` entries in `tmp/media_matrix/matrix.json` or `tmp/ Compare results across phases and cases with this schema: -| Case | Phase | Medium | Style | Target length | Elapsed seconds | Score | Verification passed | Runes | Output | +| Case | Phase | Medium | Style | Active gates | Elapsed seconds | Score / min | Verification passed | Runes / min | Output | |---|---|---|---|---|---:|---:|---|---:|---| -| `terisuke_note_essay` | offline matrix | note | reflective essay | 3000字前後 | | | prompt checks only | | `tmp/media_matrix/prompts/terisuke_note_essay.prompt.md` | -| `terisuke_note_essay` | live draft | note | reflective essay | 3000字前後 | | | | | | -| `cor_blog_technical_report` | live draft | company blog | technical report | 2200-2800字 | | | | | | -| `cor_blog_vision_sharing` | live draft | company blog | vision sharing | 1600-2200字 | | | | | | -| `cloudia_zenn_tutorial` | live draft | Zenn | tutorial | 1800-2400字 | | | | | | -| `cloudia_qiita_how_to` | live draft | Qiita | practical how-to | 1400-2000字 | | | | | | -| `cor_homepage_section` | live draft | homepage | concise product section | 400-700字 | | | | | | +| `terisuke_note_essay` | offline matrix | note | reflective essay | 82.0 / 2800 / note long-form | | | prompt checks only | | `tmp/media_matrix/prompts/terisuke_note_essay.prompt.md` | +| `terisuke_note_essay` | live draft | note | reflective essay | 82.0 / 2800 / note long-form | | | | | | +| `cor_blog_technical_report` | live draft | company blog | technical report | 80.0 / 2200 / Cor report | | | | | | +| `cor_blog_vision_sharing` | live draft | company blog | vision sharing | 80.0 / 1600 / Cor vision | | | | | | +| `cloudia_zenn_tutorial` | live draft | Zenn | tutorial | 82.0 / 1800 / Zenn tutorial | | | | | | +| `cloudia_qiita_how_to` | live draft | Qiita | practical how-to | 82.0 / 1400 / Qiita how-to | | | | | | +| `cor_homepage_section` | live draft | homepage | concise product section | 72.0 / 350 / short HTML section CTA | | | | | | Acceptance criteria for the integrated evaluation: - Offline matrix passes before any live work. - Live source phase confirms each selector still returns usable material, with GitHub Markdown preferred over RSS for full Cor.inc blog body text. -- Each live draft records `elapsed_seconds`, style `score`, `passed`, `verification_performed`, `verification_passed`, and `runes` from `cmd/scenario/draft_generation`. +- Each live draft records `elapsed_seconds`, style `score`, `min_style_score`, `passed`, `verification_performed`, `verification_passed`, `runes`, `min_draft_runes`, and `active_gates` from `cmd/scenario/draft_generation` and the media matrix. +- Homepage acceptance is based on short HTML section signals (`section`, `h2`, paragraph, CTA, concise copy), not long-form article length. +- Long-form note, Cor blog, Zenn, and Qiita acceptance stays strict through their per-case style and rune minimums. - Failures are grouped by dimension: source selector, persona, medium/output format, style, target length, or verifier result. + +## 2026-05-03 staged Zenn live slice + +After #70-#73 implementation, the first staged live rerun used the previously failing `cloudia_zenn_tutorial` case: + +```sh +LIVE_MEDIA_MATRIX_CASES=cloudia_zenn_tutorial make scenario-media-matrix-live +``` + +Final result: + +| Case | Status | Seconds | Score / min | Runes / min | Verification | Failure class | +|---|---|---:|---:|---:|---|---| +| `cloudia_zenn_tutorial` | failed | `702.17` | `73.6 / 82.0` | `3905 / 1800` | passed | strict style score | + +This is not a #40 acceptance pass yet, but it confirms the pipeline now progresses beyond the prior format-validation failure. The remaining failure is style calibration for Cloudia/Zenn, not Tailnet transport or Zenn syntax. diff --git a/internal/application/draft/prompt.go b/internal/application/draft/prompt.go index 1345d40..2679886 100644 --- a/internal/application/draft/prompt.go +++ b/internal/application/draft/prompt.go @@ -115,6 +115,41 @@ func BuildStyleRevisionPrompt(originalPrompt, draftMarkdown string, evaluation S return prompt.String() } +// BuildFormatRepairPrompt asks for one bounded rewrite when the draft uses the wrong platform syntax. +func BuildFormatRepairPrompt(format outputformat.OutputFormat, rawOutput string, validationErr error) string { + var prompt strings.Builder + prompt.WriteString("以下の出力は記事本文として使えません。媒体形式だけを修正し、内容の意図は維持してください。\n") + prompt.WriteString("前置き、解説、内部メモ、コードフェンスでの囲みは出力しないでください。修正版の記事本文だけを返してください。\n\n") + + prompt.WriteString("## Validator error\n") + if validationErr == nil { + prompt.WriteString("- unknown validation error\n\n") + } else { + prompt.WriteString("- " + validationErr.Error() + "\n\n") + } + + prompt.WriteString("## Output format rules\n") + appendLine(&prompt, "OutputFormat", format.ID+" / "+format.DisplayName) + appendLine(&prompt, "媒体ルール", format.PromptFragment) + prompt.WriteString("\n") + + if guideMarkdown := formatGuideMarkdown(format.ID); guideMarkdown != "" { + prompt.WriteString("## 媒体別Markdownガイド\n") + prompt.WriteString(guideMarkdown) + prompt.WriteString("\n\n") + } + + prompt.WriteString("## 出力条件\n") + appendOutputConditions(&prompt, format.ID) + prompt.WriteString("修正対象以外の媒体記法を混ぜないでください。\n") + prompt.WriteString("必ず修正版の記事本文だけを出力してください。\n\n") + + prompt.WriteString("## Raw model output\n") + prompt.WriteString(truncateRunes(rawOutput, 7000)) + prompt.WriteString("\n") + return prompt.String() +} + // BuildSectionRegenerationPrompt asks for one replacement section only. func BuildSectionRegenerationPrompt(guide WritingStyleGuide, brief ArticleBrief, profile AuthorStyleProfile, persona personadomain.Persona, format outputformat.OutputFormat, draftMarkdown string, section MarkdownSection) string { var prompt strings.Builder diff --git a/internal/application/draft/service.go b/internal/application/draft/service.go index 6056894..5774b52 100644 --- a/internal/application/draft/service.go +++ b/internal/application/draft/service.go @@ -48,6 +48,27 @@ type Service struct { verifier DraftVerifier } +// UnusableDraftError reports a validation failure while preserving every raw generation attempt. +type UnusableDraftError struct { + FormatID string + Attempts []GenerationAttempt + Err error +} + +func (e *UnusableDraftError) Error() string { + if e == nil || e.Err == nil { + return "local llm returned an unusable draft" + } + return "local llm returned an unusable draft: " + e.Err.Error() +} + +func (e *UnusableDraftError) Unwrap() error { + if e == nil { + return nil + } + return e.Err +} + // NewService creates a draft generation service. func NewService(generator TextGenerator) *Service { return &Service{generator: generator} @@ -91,6 +112,7 @@ func (s *Service) generate(ctx context.Context, req GenerateRequest, events Stre } prompt := BuildPromptForModeWithProfile(req.StyleGuide, req.Brief, req.AuthorProfile, persona, format) + attempts := make([]GenerationAttempt, 0, 2) if err := emitStatus(events, "draft_generation_started"); err != nil { return GenerateResult{}, err } @@ -102,15 +124,34 @@ func (s *Service) generate(ctx context.Context, req GenerateRequest, events Stre return GenerateResult{}, err } articleDraft, err := articledomain.NewDraftForFormat(rawDraft, format.ID) + attempts = appendGenerationAttempt(attempts, "initial", rawDraft, err) if err != nil { - return GenerateResult{}, fmt.Errorf("local llm returned an unusable draft: %w", err) + if !isRecoverableFormatValidationError(format.ID, err) { + return GenerateResult{}, &UnusableDraftError{FormatID: format.ID, Attempts: attempts, Err: err} + } + if err := emitStatus(events, "draft_format_repair_started"); err != nil { + return GenerateResult{}, err + } + repairedRaw, repairErr := s.generator.Generate(ctx, BuildFormatRepairPrompt(format, rawDraft, err)) + if repairErr != nil { + return GenerateResult{}, fmt.Errorf("repair draft format with local llm: %w", repairErr) + } + articleDraft, repairErr = articledomain.NewDraftForFormat(repairedRaw, format.ID) + attempts = appendGenerationAttempt(attempts, "format_repair", repairedRaw, repairErr) + if repairErr != nil { + return GenerateResult{}, &UnusableDraftError{FormatID: format.ID, Attempts: attempts, Err: repairErr} + } } evaluation := EvaluateStyle(req.AuthorProfile, req.Brief, articleDraft) if shouldReviseForStrictStyle(evaluation) { if err := emitStatus(events, "style_revision_started"); err != nil { return GenerateResult{}, err } - revisedDraft, revisedEvaluation, ok := s.reviseOnce(ctx, prompt, articleDraft, evaluation, format.ID, req) + revisedDraft, revisedEvaluation, revisionAttempt, ok := s.reviseOnce(ctx, prompt, articleDraft, evaluation, format.ID, req) + if revisionAttempt.RawOutput != "" || revisionAttempt.ValidationError != "" { + revisionAttempt.Index = len(attempts) + 1 + attempts = append(attempts, revisionAttempt) + } if ok { articleDraft = revisedDraft evaluation = revisedEvaluation @@ -130,6 +171,7 @@ func (s *Service) generate(ctx context.Context, req GenerateRequest, events Stre Draft: articleDraft, Evaluation: evaluation, Verification: verification, + Attempts: attempts, }, nil } @@ -163,21 +205,22 @@ func (s *Service) generateRaw(ctx context.Context, prompt string, onChunk func(s return s.generator.Generate(ctx, prompt) } -func (s *Service) reviseOnce(ctx context.Context, originalPrompt string, articleDraft articledomain.Draft, evaluation StyleEvaluation, formatID string, req GenerateRequest) (articledomain.Draft, StyleEvaluation, bool) { +func (s *Service) reviseOnce(ctx context.Context, originalPrompt string, articleDraft articledomain.Draft, evaluation StyleEvaluation, formatID string, req GenerateRequest) (articledomain.Draft, StyleEvaluation, GenerationAttempt, bool) { revisionPrompt := BuildStyleRevisionPrompt(originalPrompt, articleDraft.Markdown(), evaluation) rawDraft, err := s.generator.Generate(ctx, revisionPrompt) if err != nil { - return articledomain.Draft{}, StyleEvaluation{}, false + return articledomain.Draft{}, StyleEvaluation{}, GenerationAttempt{}, false } revisedDraft, err := articledomain.NewDraftForFormat(rawDraft, formatID) if err != nil { - return articledomain.Draft{}, StyleEvaluation{}, false + return articledomain.Draft{}, StyleEvaluation{}, generationAttempt("style_revision", rawDraft, err), false } + attempt := generationAttempt("style_revision", rawDraft, nil) revisedEvaluation := EvaluateStyle(req.AuthorProfile, req.Brief, revisedDraft) if revisedEvaluation.Passed || len(revisedEvaluation.Failures) <= len(evaluation.Failures) || revisedEvaluation.Comparison.Score >= evaluation.Comparison.Score { - return revisedDraft, revisedEvaluation, true + return revisedDraft, revisedEvaluation, attempt, true } - return articledomain.Draft{}, StyleEvaluation{}, false + return articledomain.Draft{}, StyleEvaluation{}, attempt, false } func emitStatus(events StreamEvents, status string) error { @@ -199,6 +242,41 @@ func shouldReviseForStrictStyle(evaluation StyleEvaluation) bool { return false } +func appendGenerationAttempt(attempts []GenerationAttempt, kind, raw string, validationErr error) []GenerationAttempt { + attempt := generationAttempt(kind, raw, validationErr) + attempt.Index = len(attempts) + 1 + return append(attempts, attempt) +} + +func generationAttempt(kind, raw string, validationErr error) GenerationAttempt { + attempt := GenerationAttempt{ + Kind: kind, + RawOutput: raw, + } + if validationErr != nil { + attempt.ValidationError = validationErr.Error() + } + return attempt +} + +func isRecoverableFormatValidationError(formatID string, err error) bool { + if err == nil { + return false + } + message := err.Error() + if strings.Contains(message, "preamble before the article") { + return true + } + switch formatID { + case outputformat.IDZennArticle: + return strings.Contains(message, "Qiita :::note") + case outputformat.IDQiitaArticle: + return strings.Contains(message, "Zenn-specific notation") + default: + return false + } +} + func validateRequest(req GenerateRequest) error { if err := req.StyleGuide.Validate(); err != nil { return fmt.Errorf("writing style guide is invalid: %w", err) diff --git a/internal/application/draft/service_test.go b/internal/application/draft/service_test.go index 083f971..fd15d4b 100644 --- a/internal/application/draft/service_test.go +++ b/internal/application/draft/service_test.go @@ -2,6 +2,7 @@ package draft import ( "context" + "errors" "strings" "testing" "time" @@ -225,6 +226,106 @@ func TestGenerateUsesPersonaAndOutputFormat(t *testing.T) { } } +func TestGenerateRunsOneFormatRepairRetry(t *testing.T) { + invalidZenn := "---\n" + + "title: \"Goで検証する\"\n" + + "emoji: \"🧪\"\n" + + "type: \"tech\"\n" + + "topics: [\"go\", \"test\"]\n" + + "published: false\n" + + "---\n\n" + + "## 実装\n\n" + + ":::note info\nQiitaの補足です\n:::\n" + repairedZenn := strings.ReplaceAll(invalidZenn, ":::note info", ":::message") + generator := &sequenceGenerator{drafts: []string{invalidZenn, repairedZenn}} + profile, styleGuide := profileAndGuideFromDraft(t, repairedZenn) + persona, _ := personadomain.DefaultRegistry().Get(personadomain.IDCloudia) + format, _ := outputformat.DefaultRegistry().Get(outputformat.IDZennArticle) + + result, err := NewService(generator).Generate(context.Background(), GenerateRequest{ + StyleGuide: styleGuide, + Brief: ArticleBrief{ + StyleProfileID: profile.ID, + PersonaID: persona.ID, + OutputFormatID: format.ID, + Theme: "Goで検証する", + }, + AuthorProfile: profile, + Persona: persona, + OutputFormat: format, + }) + if err != nil { + t.Fatalf("generate with format repair: %v", err) + } + if generator.calls != 2 { + t.Fatalf("calls = %d, want 2", generator.calls) + } + if result.Draft.Markdown() != strings.TrimSpace(repairedZenn) { + t.Fatalf("unexpected repaired draft:\n%s", result.Draft.Markdown()) + } + if len(result.Attempts) != 2 { + t.Fatalf("attempts = %#v, want 2 attempts", result.Attempts) + } + if result.Attempts[0].Kind != "initial" || result.Attempts[0].RawOutput != invalidZenn || !strings.Contains(result.Attempts[0].ValidationError, "Qiita :::note") { + t.Fatalf("initial attempt did not preserve validation failure: %#v", result.Attempts[0]) + } + if result.Attempts[1].Kind != "format_repair" || result.Attempts[1].RawOutput != repairedZenn || result.Attempts[1].ValidationError != "" { + t.Fatalf("repair attempt not preserved correctly: %#v", result.Attempts[1]) + } + for _, want := range []string{ + invalidZenn, + "zenn article must use :::message, not Qiita :::note", + "Use this guide only for `zenn_article` output.", + "修正版の記事本文だけ", + } { + if !strings.Contains(generator.prompts[1], want) { + t.Fatalf("repair prompt missing %q:\n%s", want, generator.prompts[1]) + } + } +} + +func TestRecoverableFormatValidationErrorsAreBoundedToKnownCases(t *testing.T) { + tests := []struct { + name string + formatID string + err error + want bool + }{ + { + name: "preamble", + formatID: outputformat.IDNoteArticle, + err: errors.New("draft appears to contain preamble before the article"), + want: true, + }, + { + name: "zenn qiita note", + formatID: outputformat.IDZennArticle, + err: errors.New("zenn article must use :::message, not Qiita :::note"), + want: true, + }, + { + name: "qiita zenn notation", + formatID: outputformat.IDQiitaArticle, + err: errors.New("qiita article must not contain Zenn-specific notation"), + want: true, + }, + { + name: "missing title stays strict", + formatID: outputformat.IDNoteArticle, + err: errors.New("note article must start with a level-1 Markdown title"), + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := isRecoverableFormatValidationError(tt.formatID, tt.err); got != tt.want { + t.Fatalf("recoverable = %v, want %v", got, tt.want) + } + }) + } +} + func TestPromptIncludesFormatGuideForEveryRegisteredFormat(t *testing.T) { profile, styleGuide := profileAndGuideFromDraft(t, matchingDraft()) persona, _ := personadomain.DefaultRegistry().Get(personadomain.IDTerisuke) @@ -332,7 +433,8 @@ func TestGenerateUsesCorBlogOutputRules(t *testing.T) { } func TestGenerateRejectsUnusableMarkdown(t *testing.T) { - service := NewService(&fakeGenerator{draft: "承知しました。記事を書きます。"}) + generator := &sequenceGenerator{drafts: []string{"承知しました。記事を書きます。", matchingDraft()}} + service := NewService(generator) profile, styleGuide := profileAndGuideFromDraft(t, matchingDraft()) _, err := service.Generate(context.Background(), GenerateRequest{ @@ -343,6 +445,16 @@ func TestGenerateRejectsUnusableMarkdown(t *testing.T) { if err == nil { t.Fatal("expected unusable draft error") } + if generator.calls != 1 { + t.Fatalf("calls = %d, want no repair retry", generator.calls) + } + var unusable *UnusableDraftError + if !errors.As(err, &unusable) { + t.Fatalf("expected UnusableDraftError, got %T", err) + } + if len(unusable.Attempts) != 1 || unusable.Attempts[0].RawOutput != "承知しました。記事を書きます。" { + t.Fatalf("raw validation failure was not preserved: %#v", unusable.Attempts) + } } func TestEvaluateStylePassesStrictThresholdsAndOverride(t *testing.T) { diff --git a/internal/application/draft/types.go b/internal/application/draft/types.go index e686402..8a1a4a4 100644 --- a/internal/application/draft/types.go +++ b/internal/application/draft/types.go @@ -34,6 +34,15 @@ type GenerateResult struct { Draft articledomain.Draft Evaluation StyleEvaluation Verification FinalVerification + Attempts []GenerationAttempt +} + +// GenerationAttempt preserves raw model output and validation state for each model call. +type GenerationAttempt struct { + Index int `json:"index"` + Kind string `json:"kind"` + RawOutput string `json:"raw_output"` + ValidationError string `json:"validation_error,omitempty"` } // FinalVerification reports the lightweight model's final consistency review. diff --git a/internal/domain/article/draft.go b/internal/domain/article/draft.go index 5811f1b..17273ee 100644 --- a/internal/domain/article/draft.go +++ b/internal/domain/article/draft.go @@ -36,7 +36,7 @@ func NewDraftForFormat(raw, formatID string) (Draft, error) { if err := format.Validator.Validate(markdown); err != nil { return Draft{}, err } - if strings.Contains(markdown, "以下") && strings.Contains(markdown, "下書き") && strings.Index(markdown, "# ") > 20 { + if !strings.HasPrefix(markdown, "---\n") && strings.Contains(markdown, "以下") && strings.Contains(markdown, "下書き") && strings.Index(markdown, "# ") > 20 { return Draft{}, fmt.Errorf("draft appears to contain preamble before the article") } return Draft{markdown: markdown}, nil @@ -58,7 +58,7 @@ func normalizeDraft(raw string) string { text = strings.TrimSpace(match[1]) } droppedPreambleWithFence := false - if idx := strings.Index(text, "# "); idx > 0 { + if idx := strings.Index(text, "# "); idx > 0 && !strings.HasPrefix(text, "---\n") { preamble := strings.TrimSpace(text[:idx]) if looksLikePreamble(preamble) && canDropPreamble(preamble) { droppedPreambleWithFence = strings.Contains(preamble, "```") diff --git a/internal/domain/article/draft_test.go b/internal/domain/article/draft_test.go index 809dce8..6ad1d5e 100644 --- a/internal/domain/article/draft_test.go +++ b/internal/domain/article/draft_test.go @@ -30,6 +30,22 @@ func TestNewDraftForFormatAllowsTechnicalFormats(t *testing.T) { } } +func TestNewDraftForFormatDoesNotTreatFrontmatterBodyAsPreamble(t *testing.T) { + zenn := "---\n" + + "title: \"Goで試す\"\n" + + "emoji: \"🧪\"\n" + + "type: \"tech\"\n" + + "topics: [\"go\", \"test\"]\n" + + "published: false\n" + + "---\n\n" + + "本文では以下の下書きを検証する。\n\n" + + "## 実装\n\n" + + ":::message\n補足\n:::" + if _, err := NewDraftForFormat(zenn, "zenn_article"); err != nil { + t.Fatalf("frontmatter format should not be rejected as assistant preamble: %v", err) + } +} + func TestNewDraftRejectsNonArticleOutput(t *testing.T) { if _, err := NewDraft("承知しました。記事を書きます。"); err == nil { t.Fatal("expected validation error") diff --git a/internal/domain/format/format.go b/internal/domain/format/format.go index 32846f0..d2e984b 100644 --- a/internal/domain/format/format.go +++ b/internal/domain/format/format.go @@ -237,13 +237,18 @@ func (ZennValidator) Validate(markdown string) error { if err := validateInlineListLimit(frontmatter, "topics", 5); err != nil { return fmt.Errorf("zenn %w", err) } - if strings.Contains(markdown, ":::note") { + if containsLineOutsideCodeFence(markdown, func(line string) bool { + return strings.HasPrefix(strings.TrimSpace(line), ":::note") + }) { return fmt.Errorf("zenn article must use :::message, not Qiita :::note") } if strings.Contains(markdown, "```diff_") { return fmt.Errorf("zenn diff code fences use `diff language`, not diff_language") } - if containsAny(markdown, []string{"