Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 16 additions & 13 deletions .env.evo-x2.example
Original file line number Diff line number Diff line change
@@ -1,23 +1,26 @@
PORT=8080
WORKFLOW_STORE_PATH=data/workflow_store.json
LLM_RUNTIME=remote
EVO_X2_TAILNET_HOST=evo-x2
EVO_X2_TAILNET_HOST=evo-x2.tailb30e58.ts.net
EVO_X2_SSH_HOST=evo-x2
EVO_X2_SSH_LOCAL_PORT=21434
LLM_BASE_URL=http://evo-x2:11434/v1
LLM_BASE_URL=http://evo-x2.tailb30e58.ts.net/v1
LLM_MODEL=gemma4:31b
LLM_TIMEOUT_SECONDS=900

# Use smaller models for quick tests, and larger models for final drafts.
STYLE_LLM_MODEL=gemma4:latest
BRIEF_LLM_MODEL=gemma4:e2b
ARTICLE_LLM_MODEL=gemma4:latest
# Primary route is Evo X2 Ollama's OpenAI-compatible API.
# Use smaller models for source summarization, a stronger reasoning model for
# questions, and the high-quality Japanese draft model for final output.
STYLE_LLM_MODEL=gemma4:e2b
BRIEF_LLM_MODEL=qwen3.6:27b
ARTICLE_LLM_MODEL=gemma4:e2b
DRAFT_LLM_MODEL=gemma4:31b

# Used only if Evo X2 is unavailable and a local llama.cpp server is already running.
FALLBACK_LLM_BASE_URL=http://127.0.0.1:8081/v1
# The fallback client reuses the selected model name for each phase.
# STYLE_FALLBACK_LLM_MODEL=gemma4:latest
# BRIEF_FALLBACK_LLM_MODEL=gemma4:e2b
# ARTICLE_FALLBACK_LLM_MODEL=gemma4:latest
# DRAFT_FALLBACK_LLM_MODEL=gemma4:31b
# Ordered fallback chain:
# 1. Evo X2 llama.cpp over Tailnet/Caddy.
# 2. Last-resort local llama.cpp on this workstation.
LLM_FALLBACK_BASE_URLS=http://evo-x2.tailb30e58.ts.net/llama/v1,http://127.0.0.1:8081/v1
STYLE_LLM_FALLBACK_MODELS=gemma-4-E2B-it-Q8_0.gguf,gemma4:e2b
BRIEF_LLM_FALLBACK_MODELS=gemma-4-E2B-it-Q8_0.gguf,qwen3:30b-a3b
ARTICLE_LLM_FALLBACK_MODELS=gemma-4-E2B-it-Q8_0.gguf,gemma4:e2b
DRAFT_LLM_FALLBACK_MODELS=gemma-4-E2B-it-Q8_0.gguf,qwen3:30b-a3b
16 changes: 11 additions & 5 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,19 @@ LLM_BASE_URL=http://127.0.0.1:8081/v1
LLM_MODEL=gemma4:31b
LLM_TIMEOUT_SECONDS=180
# Optional per-phase overrides:
# STYLE_LLM_MODEL=gemma4:latest
# BRIEF_LLM_MODEL=gemma4:e2b
# ARTICLE_LLM_MODEL=gemma4:latest
# STYLE_LLM_MODEL=gemma4:e2b
# BRIEF_LLM_MODEL=qwen3.6:27b
# ARTICLE_LLM_MODEL=gemma4:e2b
# DRAFT_LLM_MODEL=gemma4:31b
# Ordered fallback chain. Use comma-separated OpenAI-compatible base URLs.
# LLM_FALLBACK_BASE_URLS=http://remote-llama/v1,http://127.0.0.1:8081/v1
# Optional comma-separated per-phase fallback model names:
# STYLE_LLM_FALLBACK_MODELS=gemma-4-E2B-it-Q8_0.gguf,gemma4:e2b
# BRIEF_LLM_FALLBACK_MODELS=gemma-4-E2B-it-Q8_0.gguf,qwen3:30b-a3b
# ARTICLE_LLM_FALLBACK_MODELS=gemma-4-E2B-it-Q8_0.gguf,gemma4:e2b
# DRAFT_LLM_FALLBACK_MODELS=gemma-4-E2B-it-Q8_0.gguf,qwen3:30b-a3b
# Legacy single fallback remains supported:
# FALLBACK_LLM_BASE_URL=http://127.0.0.1:8081/v1
# フォールバック時のモデル名は選択中のフェーズ別モデルを引き継ぎます。
# 明示的に変える場合だけ STYLE_FALLBACK_LLM_MODEL などを設定してください。
LLAMACPP_HOST=127.0.0.1
LLAMACPP_PORT=8081
LLAMACPP_BASE_URL=http://127.0.0.1:8081/v1
Expand Down
22 changes: 15 additions & 7 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,25 @@ PORT ?= 8080
LLM_RUNTIME ?= local
LLM_BASE_URL ?= http://$(LLAMACPP_HOST):$(LLAMACPP_PORT)/v1
LLM_MODEL ?= gemma4:31b
EVO_X2_TAILNET_HOST ?= evo-x2
EVO_X2_TAILNET_HOST ?= evo-x2.tailb30e58.ts.net
EVO_X2_SSH_HOST ?= evo-x2
EVO_X2_SSH_LOCAL_PORT ?= 21434
EVO_X2_LLM_BASE_URL ?= http://$(EVO_X2_TAILNET_HOST):11434/v1
EVO_X2_OLLAMA_LLM_BASE_URL ?= http://$(EVO_X2_TAILNET_HOST)/v1
EVO_X2_LLAMA_CPP_LLM_BASE_URL ?= http://$(EVO_X2_TAILNET_HOST)/llama/v1
EVO_X2_LLM_BASE_URL ?= $(EVO_X2_OLLAMA_LLM_BASE_URL)
EVO_X2_SSH_LLM_BASE_URL ?= http://127.0.0.1:$(EVO_X2_SSH_LOCAL_PORT)/v1
EVO_X2_LLM_MODEL ?= gemma4:31b
EVO_X2_BRIEF_LLM_MODEL ?= gemma4:e2b
EVO_X2_STYLE_LLM_MODEL ?= gemma4:latest
EVO_X2_ARTICLE_LLM_MODEL ?= gemma4:latest
EVO_X2_BRIEF_LLM_MODEL ?= qwen3.6:27b
EVO_X2_STYLE_LLM_MODEL ?= gemma4:e2b
EVO_X2_ARTICLE_LLM_MODEL ?= gemma4:e2b
EVO_X2_DRAFT_LLM_MODEL ?= gemma4:31b
FALLBACK_LLM_BASE_URL ?= http://127.0.0.1:8081/v1
LLM_FALLBACK_BASE_URLS ?= $(EVO_X2_LLAMA_CPP_LLM_BASE_URL),$(FALLBACK_LLM_BASE_URL)
EVO_X2_LLAMA_CPP_MODEL ?= gemma-4-E2B-it-Q8_0.gguf
STYLE_LLM_FALLBACK_MODELS ?= $(EVO_X2_LLAMA_CPP_MODEL),gemma4:e2b
BRIEF_LLM_FALLBACK_MODELS ?= $(EVO_X2_LLAMA_CPP_MODEL),qwen3:30b-a3b
ARTICLE_LLM_FALLBACK_MODELS ?= $(EVO_X2_LLAMA_CPP_MODEL),gemma4:e2b
DRAFT_LLM_FALLBACK_MODELS ?= $(EVO_X2_LLAMA_CPP_MODEL),qwen3:30b-a3b
LLAMACPP_HOST ?= 127.0.0.1
LLAMACPP_PORT ?= 8081
LLAMACPP_BASE_URL ?= $(LLM_BASE_URL)
Expand All @@ -38,7 +46,7 @@ dev:
evo-x2: remote

remote: evo-x2-preflight
NOTE_MAKER_SKIP_ENV=1 LLM_RUNTIME=remote LLM_BASE_URL="$(EVO_X2_LLM_BASE_URL)" LLM_MODEL="$(EVO_X2_LLM_MODEL)" STYLE_LLM_MODEL="$(EVO_X2_STYLE_LLM_MODEL)" BRIEF_LLM_MODEL="$(EVO_X2_BRIEF_LLM_MODEL)" ARTICLE_LLM_MODEL="$(EVO_X2_ARTICLE_LLM_MODEL)" DRAFT_LLM_MODEL="$(EVO_X2_DRAFT_LLM_MODEL)" FALLBACK_LLM_BASE_URL="$(FALLBACK_LLM_BASE_URL)" ./scripts/dev.sh
NOTE_MAKER_SKIP_ENV=1 LLM_RUNTIME=remote LLM_BASE_URL="$(EVO_X2_LLM_BASE_URL)" LLM_MODEL="$(EVO_X2_LLM_MODEL)" STYLE_LLM_MODEL="$(EVO_X2_STYLE_LLM_MODEL)" BRIEF_LLM_MODEL="$(EVO_X2_BRIEF_LLM_MODEL)" ARTICLE_LLM_MODEL="$(EVO_X2_ARTICLE_LLM_MODEL)" DRAFT_LLM_MODEL="$(EVO_X2_DRAFT_LLM_MODEL)" LLM_FALLBACK_BASE_URLS="$(LLM_FALLBACK_BASE_URLS)" STYLE_LLM_FALLBACK_MODELS="$(STYLE_LLM_FALLBACK_MODELS)" BRIEF_LLM_FALLBACK_MODELS="$(BRIEF_LLM_FALLBACK_MODELS)" ARTICLE_LLM_FALLBACK_MODELS="$(ARTICLE_LLM_FALLBACK_MODELS)" DRAFT_LLM_FALLBACK_MODELS="$(DRAFT_LLM_FALLBACK_MODELS)" ./scripts/dev.sh

evo-x2-preflight:
EVO_X2_TAILNET_HOST="$(EVO_X2_TAILNET_HOST)" EVO_X2_LLM_BASE_URL="$(EVO_X2_LLM_BASE_URL)" ./scripts/evo-x2-tailnet-preflight.sh
Expand All @@ -51,7 +59,7 @@ evo-x2-ssh-models:
curl -s "$(EVO_X2_SSH_LLM_BASE_URL)/models"

scenario-evo-x2: evo-x2-preflight
RUN_NOTE_SCENARIO=1 RUN_LOCAL_LLM_SCENARIO=1 SCENARIO_STREAM_DRAFT=1 LLM_BASE_URL="$(EVO_X2_LLM_BASE_URL)" LLM_MODEL="$(EVO_X2_LLM_MODEL)" STYLE_LLM_MODEL="$(EVO_X2_STYLE_LLM_MODEL)" BRIEF_LLM_MODEL="$(EVO_X2_BRIEF_LLM_MODEL)" ARTICLE_LLM_MODEL="$(EVO_X2_ARTICLE_LLM_MODEL)" DRAFT_LLM_MODEL="$(EVO_X2_DRAFT_LLM_MODEL)" LLM_TIMEOUT_SECONDS=900 FALLBACK_LLM_BASE_URL="$(FALLBACK_LLM_BASE_URL)" SCENARIO_MIN_STYLE_SCORE=80 SCENARIO_MIN_DRAFT_RUNES=2800 DRAFT_MAX_ATTEMPTS=2 go run ./cmd/scenario/full_workflow
RUN_NOTE_SCENARIO=1 RUN_LOCAL_LLM_SCENARIO=1 SCENARIO_STREAM_DRAFT=1 LLM_BASE_URL="$(EVO_X2_LLM_BASE_URL)" LLM_MODEL="$(EVO_X2_LLM_MODEL)" STYLE_LLM_MODEL="$(EVO_X2_STYLE_LLM_MODEL)" BRIEF_LLM_MODEL="$(EVO_X2_BRIEF_LLM_MODEL)" ARTICLE_LLM_MODEL="$(EVO_X2_ARTICLE_LLM_MODEL)" DRAFT_LLM_MODEL="$(EVO_X2_DRAFT_LLM_MODEL)" LLM_TIMEOUT_SECONDS=900 LLM_FALLBACK_BASE_URLS="$(LLM_FALLBACK_BASE_URLS)" STYLE_LLM_FALLBACK_MODELS="$(STYLE_LLM_FALLBACK_MODELS)" BRIEF_LLM_FALLBACK_MODELS="$(BRIEF_LLM_FALLBACK_MODELS)" ARTICLE_LLM_FALLBACK_MODELS="$(ARTICLE_LLM_FALLBACK_MODELS)" DRAFT_LLM_FALLBACK_MODELS="$(DRAFT_LLM_FALLBACK_MODELS)" SCENARIO_MIN_STYLE_SCORE=80 SCENARIO_MIN_DRAFT_RUNES=2800 DRAFT_MAX_ATTEMPTS=2 go run ./cmd/scenario/full_workflow

server:
go run ./cmd/server
Expand Down
26 changes: 14 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,13 @@ make app

### Evo X2 の Ollama を Tailscale VPN 経由で使って起動する

Evo X2 の Ollama を使う場合は、Tailscale VPN/MagicDNS 上の OpenAI互換APIを primary とし、Mac側のローカルLLMは起動しません。ローカル llama.cpp は Evo X2 に接続できない場合の fallback としてのみ扱います
Evo X2 の Ollama を使う場合は、Tailscale VPN/MagicDNS 上の OpenAI互換APIを primary とし、Mac側のローカルLLMは起動しません。fallback は順番を固定します

前提として、利用端末が同じ Tailnet に参加しており、`evo-x2` の OpenAI互換APIが Tailnet 内から `http://evo-x2:11434/v1` で到達できる必要があります。これにより、SSH の個別認証や端末ごとの port forward に依存せず、他の許可済みデバイスからも同じEVO X2を利用できます。
1. Evo X2 Ollama OpenAI互換API: `http://evo-x2.tailb30e58.ts.net/v1`
2. Evo X2 llama.cpp OpenAI互換API: `http://evo-x2.tailb30e58.ts.net/llama/v1`
3. 最終手段の作業端末ローカル llama.cpp: `http://127.0.0.1:8081/v1`

Evo X2 側の Ollama は localhost 専用ではなく、Tailnet から到達可能なインターフェイスで listen してください。`OLLAMA_HOST` を Tailnet IP か、Tailscale ACL / OS firewall で制限した上で `0.0.0.0:11434` に設定します
前提として、利用端末が同じ Tailnet に参加しており、Evo X2 の Caddy/OpenAI互換APIが Tailnet 内から到達できる必要があります。これにより、SSH の個別認証や端末ごとの port forward に依存せず、他の許可済みデバイスからも同じ Evo X2 を利用できます

```bash
make evo-x2
Expand All @@ -79,22 +81,22 @@ mise trust
mise run evo-x2
```

既定では Tailnet 上の `http://evo-x2:11434/v1` に接続し、`gemma4:31b` を使います。モデルを変える場合は `.env.evo-x2.example` を参考に `LLM_MODEL`、`ARTICLE_LLM_MODEL`、`DRAFT_LLM_MODEL` を設定してください。120B級のモデルを使う場合は `LLM_TIMEOUT_SECONDS` を長めに設定します。
既定では Tailnet 上の `http://evo-x2.tailb30e58.ts.net/v1` に接続します。モデルを変える場合は `.env.evo-x2.example` を参考に `LLM_MODEL`、`STYLE_LLM_MODEL`、`BRIEF_LLM_MODEL`、`ARTICLE_LLM_MODEL`、`DRAFT_LLM_MODEL` を設定してください。120B級のモデルを使う場合は `LLM_TIMEOUT_SECONDS` を長めに設定します。

画面上部の「設定」から、フェーズ別に使うモデルと一問一答の質問を変更できます。質問は初期テンプレートを編集でき、追加質問も下書き生成のブリーフに含まれます。

文体分析結果、取材セッションの回答、完成ブリーフは `WORKFLOW_STORE_PATH` にJSONとして永続化されます。既定値は `data/workflow_store.json` です。

フェーズ別モデルの目安:

- `STYLE_LLM_MODEL`: Note記事取得後の文体ガイド整理用。
- `BRIEF_LLM_MODEL`: 深掘り質問生成用。軽いモデルで十分です
- `ARTICLE_LLM_MODEL`: 旧 `/api/generate` 用。
- `DRAFT_LLM_MODEL`: 一問一答後の最終下書き生成用。品質重視のモデルを指定します
- `EVO_X2_TAILNET_HOST`: Tailnet/MagicDNS 上の Evo X2 ホスト名です。既定値は `evo-x2`。
- `EVO_X2_LLM_BASE_URL`: Evo X2 primary の OpenAI互換APIです。既定値は `http://evo-x2:11434/v1`。
- `FALLBACK_LLM_BASE_URL`: Evo X2 primary に接続できない場合の llama.cpp フォールバック先です。通常は `http://127.0.0.1:8081/v1`
- フォールバック時のモデル名は、原則としてUIまたは環境変数で選んだフェーズ別モデルをそのまま使います。別名にしたい場合だけ `STYLE_FALLBACK_LLM_MODEL` / `BRIEF_FALLBACK_LLM_MODEL` / `ARTICLE_FALLBACK_LLM_MODEL` / `DRAFT_FALLBACK_LLM_MODEL` を設定します
- `STYLE_LLM_MODEL`: Note記事取得後の文体ガイド整理用。既定は軽量な `gemma4:e2b`。
- `BRIEF_LLM_MODEL`: 深掘り質問生成用。既定は推論力重視の `qwen3.6:27b`
- `ARTICLE_LLM_MODEL`: 旧 `/api/generate` 用。既定は軽量な `gemma4:e2b`。
- `DRAFT_LLM_MODEL`: 一問一答後の最終下書き生成用。既定は日本語下書き品質重視の `gemma4:31b`
- `EVO_X2_TAILNET_HOST`: Tailnet/MagicDNS 上の Evo X2 ホスト名です。既定値は `evo-x2.tailb30e58.ts.net`。
- `EVO_X2_LLM_BASE_URL`: Evo X2 Ollama primary の OpenAI互換APIです。既定値は `http://evo-x2.tailb30e58.ts.net/v1`。
- `LLM_FALLBACK_BASE_URLS`: カンマ区切りの fallback chain です。既定は Evo X2 llama.cpp、最後にローカル llama.cpp
- `STYLE_LLM_FALLBACK_MODELS` など: fallback endpoint ごとのモデル名をカンマ区切りで指定します

Tailnet primary の接続確認だけ行う場合:

Expand Down
9 changes: 7 additions & 2 deletions docs/adrs/0001-three-phase-local-article-generation.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,14 @@ Adapters remain outside the domain:
- note.com JSON APIs may be used in local scenario tests and compatibility adapters where the user explicitly requests them.

- LLM runtime:
- The preferred heavy inference path is Evo X2's OpenAI-compatible API over Tailscale VPN/MagicDNS, normally `http://evo-x2:11434/v1`.
- The preferred heavy inference path is Evo X2's Ollama OpenAI-compatible API over Tailscale VPN/MagicDNS, normally `http://evo-x2.tailb30e58.ts.net/v1`.
- Ollama is the primary runtime because it supports OpenAI-compatible chat completions with streaming and lets the app select a different installed model per request. This is needed for phase-specific routing: lightweight Gemma for source/style summarization, Qwen for deeper interview questions, and Gemma 31B for final Japanese drafts.
- The ordered fallback chain is:
1. Evo X2 Ollama OpenAI-compatible API.
2. Evo X2 `llama.cpp` / `llama-server` OpenAI-compatible API, normally `http://evo-x2.tailb30e58.ts.net/llama/v1`.
3. Workstation-local `llama.cpp`, normally `http://127.0.0.1:8081/v1`, as the last resort only.
- SSH port forwarding is a developer diagnostic path only. It must not be the product default because it depends on per-device SSH configuration and prevents other authorized Tailnet devices from using the shared Evo X2 endpoint.
- `llama.cpp` `llama-server` remains the documented local fallback target at `http://127.0.0.1:8081/v1`.
- `llama.cpp` model swapping remains a later operational hardening item tracked by Issue [#45](https://github.com/terisuke/note_maker/issues/45). Until model swap/restart orchestration is reliable, `llama-server` is a fallback route rather than the primary multi-model route.
- Direct local Ollama on `127.0.0.1:11434` must not be used as the default verification path; it is only acceptable when explicitly selected for a one-off diagnostic.
- Scenario output must record the base URL, model, elapsed time, style score, and draft length so accidental runtime swaps are visible.

Expand Down
3 changes: 2 additions & 1 deletion docs/adrs/0002-multi-persona-multi-format-extension.md
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,8 @@ Current implementation status as of 2026-05-02:

- ADR 0001's strict Terisuke style threshold work is complete ([#11](https://github.com/terisuke/note_maker/issues/11)).
- Phase B1 is complete ahead of the original order: `Persona` and `OutputFormat` concepts, prompt dispatch, and format validators are in place ([#21](https://github.com/terisuke/note_maker/issues/21)). The remaining Phase B work stays deferred until after Phase A/C foundations.
- Evo X2 is the primary heavy-inference runtime through the Tailnet OpenAI-compatible API (`http://evo-x2:11434/v1`). SSH tunnel access is an explicit developer diagnostic only.
- Evo X2 Ollama is the primary heavy-inference runtime through the Tailnet OpenAI-compatible API (`http://evo-x2.tailb30e58.ts.net/v1`). The runtime fallback chain is Evo X2 Ollama → Evo X2 llama.cpp (`/llama/v1`) → workstation-local llama.cpp. SSH tunnel access is an explicit developer diagnostic only.
- Phase model defaults are intentionally split: lightweight `gemma4:e2b` for source/style summarization, `qwen3.6:27b` for deeper interview questions, and `gemma4:31b` for final Japanese draft generation. This is an operational default, not a hard domain rule; users can override it per phase.
- Runtime validation showed that Tailnet inference can take 20+ minutes and still miss quality gates because of generation variance. Therefore, Phase A started with streaming and cancellation ([#18](https://github.com/terisuke/note_maker/issues/18)) before the broader transcript rewrite ([#17](https://github.com/terisuke/note_maker/issues/17)). Primary-runtime quality stabilization is tracked separately in [#40](https://github.com/terisuke/note_maker/issues/40).
- Phase A2 is implemented in code: `llamacpp.Client.GenerateStream`, streaming follow-up/draft service paths, `Accept: text/event-stream` handlers, browser Cancel controls, heartbeat events, and final runtime metrics. It still requires real Tailnet Evo X2 validation before closing the issue.

Expand Down
3 changes: 2 additions & 1 deletion docs/implementation-plans/issue-adr-guardrails.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ The phases in [ADR 0002](../adrs/0002-multi-persona-multi-format-extension.md) (
- Note.com access belongs in `internal/infrastructure/note`.
- OpenAI-compatible local LLM access belongs in `internal/infrastructure/llamacpp`.
- In-memory/file repositories belong in `internal/infrastructure/repository`.
- Evo X2 is the primary heavy-inference runtime and must be reached through the Tailnet OpenAI-compatible API (`http://evo-x2:11434/v1` by default) in `make evo-x2` and scenario targets.
- Evo X2 Ollama is the primary heavy-inference runtime and must be reached through the Tailnet OpenAI-compatible API (`http://evo-x2.tailb30e58.ts.net/v1` by default) in `make evo-x2` and scenario targets.
- The fallback order is Evo X2 Ollama → Evo X2 llama.cpp (`http://evo-x2.tailb30e58.ts.net/llama/v1`) → workstation-local llama.cpp.
- SSH tunnels are allowed only as explicit developer diagnostics, not as the product default, because they depend on per-device SSH setup.
- Local llama.cpp (`http://127.0.0.1:8081/v1`) is fallback only. Do not set `LLM_BASE_URL` to local Ollama or local llama.cpp for Evo X2 validation unless the test is explicitly measuring fallback behavior.
- Runtime validation must report base URL, model, elapsed time, score, and draft length.
Expand Down
2 changes: 1 addition & 1 deletion docs/implementation-plans/multi-persona-multi-format.md
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ Acceptance:

Issue [#11](https://github.com/terisuke/note_maker/issues/11) (style threshold tuning) and Issue [#13](https://github.com/terisuke/note_maker/issues/13) (Playwright E2E) are tracked separately but their acceptance criteria are folded into Phase D's exit gate.

Runtime validation treats Evo X2's OpenAI-compatible API over Tailscale VPN/MagicDNS as the primary heavy-inference path. SSH tunnels are explicit developer diagnostics only. Local llama.cpp remains a fallback-only measurement path, and scenario reports must include base URL, model, elapsed time, score, and draft length to prevent accidental local-runtime validation. The 2026-05-02 validation passed on Evo X2 and found local fallback quality/model-compatibility gaps; fallback hardening is tracked in Issue [#36](https://github.com/terisuke/note_maker/issues/36).
Runtime validation treats Evo X2 Ollama's OpenAI-compatible API over Tailscale VPN/MagicDNS as the primary heavy-inference path. SSH tunnels are explicit developer diagnostics only. The fallback chain is Evo X2 Ollama → Evo X2 llama.cpp → workstation-local llama.cpp. Scenario reports must include base URL, model, elapsed time, score, and draft length to prevent accidental local-runtime validation. The 2026-05-02 validation passed on Evo X2 and found local fallback quality/model-compatibility gaps; fallback hardening is tracked in Issue [#36](https://github.com/terisuke/note_maker/issues/36). Future llama.cpp model swap orchestration is tracked in Issue [#45](https://github.com/terisuke/note_maker/issues/45).

## Risk register

Expand Down
Loading