PostHog · gewenyu99 · Jun 10, 2026
diff --git a/src/lib/agent/agent-interface.ts b/src/lib/agent/agent-interface.ts
@@ -265,6 +265,12 @@ export type AgentConfig = {
   getPendingQuestion?: () =>
     | import('@lib/wizard-session').PendingQuestion
     | null;
+  /**
+   * Orchestrator queue context. Present only when the `wizard-orchestrator`
+   * flag routes the run here; threaded into wizard-tools so the orchestrator
+   * tools register.
+   */
+  orchestrator?: import('@lib/programs/orchestrator/queue-tools').OrchestratorToolsContext;
 };
 
 /**
@@ -286,6 +292,7 @@ export type StopHookResult =
 export function createStopHook(
   featureQueue: readonly AdditionalFeature[],
   signals?: AgentOutputSignals,
+  requestRemark = true,
 ): (input: { stop_hook_active: boolean }) => StopHookResult {
   let featureIndex = 0;
   let remarkRequested = false;
@@ -313,8 +320,9 @@ export function createStopHook(
       return { decision: 'block', reason: prompt };
     }
 
-    // Phase 2: collect remark (once)
-    if (!remarkRequested) {
+    // Phase 2: collect remark (once). Skipped when the caller opts out — the
+    // orchestrator suppresses it per task so it does not fire on every agent.
+    if (requestRemark && !remarkRequested) {
       remarkRequested = true;
       logToFile('Stop hook: requesting reflection');
       return {
@@ -655,8 +663,6 @@ export async function initializeAgent(
   logToFile('Agent initialization starting');
   logToFile('Install directory:', options.installDir);
 
-  getUI().log.step('Initializing Claude agent...');
-
   try {
     // Configure LLM gateway environment variables (inherited by SDK subprocess)
     const gatewayUrl = getLlmGatewayUrlFromHost(config.posthogApiHost);
@@ -708,6 +714,7 @@ export async function initializeAgent(
       skillsBaseUrl: config.skillsBaseUrl,
       askBridge: config.askBridge,
       askMaxQuestions: config.askMaxQuestions,
+      orchestrator: config.orchestrator,
     });
     mcpServers['wizard-tools'] = wizardToolsServer;
 
@@ -747,8 +754,6 @@ export async function initializeAgent(
       });
     }
 
-    getUI().log.step(`Verbose logs: ${getLogFilePath()}`);
-    getUI().log.success("Agent initialized. Let's get cooking!");
     return agentRunConfig;
   } catch (error) {
     getUI().log.error(
@@ -794,6 +799,8 @@ export async function runAgent(
     errorMessage?: string;
     additionalFeatureQueue?: readonly AdditionalFeature[];
     abortCases?: readonly AbortCaseMatcher[];
+    /** Request the end-of-run reflection remark. Defaults to true. */
+    requestRemark?: boolean;
   },
   middleware?: {
     onMessage(message: any): void;
@@ -1052,7 +1059,11 @@ export async function runAgent(
           Stop: [
             {
               hooks: [
-                createStopHook(config?.additionalFeatureQueue ?? [], signals),
+                createStopHook(
+                  config?.additionalFeatureQueue ?? [],
+                  signals,
+                  config?.requestRemark ?? true,
+                ),
               ],
               timeout: 30,
             },
@@ -1100,6 +1111,7 @@ export async function runAgent(
         signals,
         receivedSuccessResult,
         tasks,
+        isOrchestratorEnabled(agentConfig.wizardFlags ?? {}),
       );
 
       // [ABORT] detection: the skill emits "[ABORT] <reason>" when it
@@ -1433,6 +1445,9 @@ function handleSDKMessage(
   signals: AgentOutputSignals,
   receivedSuccessResult = false,
   tasks?: Map<string, TaskEntry>,
+  // The orchestrator owns the TUI task panel (it renders its queue). Suppress the
+  // agent's own TaskCreate/TaskUpdate rendering so it does not clobber the queue.
+  suppressTaskRender = false,
 ): void {
   // Map preserves insertion order (the order the agent created the tasks).
   // Within that, group by status: completed first, then in_progress, then
@@ -1444,7 +1459,7 @@ function handleSDKMessage(
   };
   const rank = (status: string): number => STATUS_RANK[status] ?? 2;
   const syncTasks = (): void => {
-    if (!tasks) return;
+    if (!tasks || suppressTaskRender) return;
     const sorted = Array.from(tasks.values()).sort(
       (a, b) => rank(a.status) - rank(b.status),
     );

diff --git a/src/lib/agent/agent-runner.ts b/src/lib/agent/agent-runner.ts
@@ -31,10 +31,12 @@ import {
   AgentErrorType,
   AgentSignals,
   buildWizardMetadata,
+  isOrchestratorEnabled,
   checkAllSettingsConflicts,
   backupAndFixClaudeSettings,
   restoreClaudeSettings,
 } from './agent-interface';
+import { runOrchestrator } from '../programs/orchestrator/orchestrator-runner';
 import { getCloudUrlFromRegion } from '@utils/urls';
 import {
   evaluateWizardReadiness,
@@ -43,7 +45,12 @@ import {
   getBlockingServiceKeys,
   SERVICE_LABELS,
 } from '@lib/health-checks/readiness';
-import { enableDebugLogs, initLogFile, logToFile } from '@utils/debug';
+import {
+  enableDebugLogs,
+  getLogFilePath,
+  initLogFile,
+  logToFile,
+} from '@utils/debug';
 import { createBenchmarkPipeline } from '@lib/middleware/benchmark';
 import { wizardAbort, WizardError, registerCleanup } from '@utils/wizard-abort';
 import { formatScanReport, writeScanReport } from '@lib/yara-hooks';
@@ -200,6 +207,11 @@ export async function runProgram(
 ): Promise<void> {
   const boot = await bootstrapProgram(session, config, programConfig);
 
+  if (isOrchestratorEnabled(boot.wizardFlags)) {
+    getUI().log.info('Task-queue orchestrator enabled.');
+    return runOrchestrator(session, programConfig, boot);
+  }
+
   return runLinearProgram(session, config, programConfig, boot);
 }
 
@@ -412,6 +424,7 @@ async function runLinearProgram(
         showQuestion: (q) => getUI().requestQuestion(q),
       });
 
+  getUI().log.step('Initializing Claude agent...');
   const agent = await initializeAgent(
     {
       workingDirectory: session.installDir,
@@ -433,6 +446,8 @@ async function runLinearProgram(
     },
     sessionToOptions(session),
   );
+  getUI().log.step(`Verbose logs: ${getLogFilePath()}`);
+  getUI().log.success("Agent initialized. Let's get cooking!");
 
   const middleware = session.benchmark
     ? createBenchmarkPipeline(spinner, sessionToOptions(session))

diff --git a/src/lib/programs/orchestrator/__tests__/agent-prompt-loader.test.ts b/src/lib/programs/orchestrator/__tests__/agent-prompt-loader.test.ts
@@ -0,0 +1,205 @@
+import * as fs from 'fs';
+import * as os from 'os';
+import * as path from 'path';
+import {
+  agentRunTools,
+  buildRegistry,
+  parseAgentPrompt,
+  resolveTask,
+  type AgentPrompt,
+  type AgentRegistry,
+} from '../agent-prompt-loader';
+import { QueueStore } from '../queue';
+
+function tmpDir(): string {
+  return fs.mkdtempSync(path.join(os.tmpdir(), 'agent-loader-test-'));
+}
+
+function registryOf(prompts: AgentPrompt[]): AgentRegistry {
+  return buildRegistry(
+    prompts.map((p) => ({ ...p, flow: 'test-flow' })),
+    'test-flow',
+  );
+}
+
+describe('parseAgentPrompt', () => {
+  const sample = `---
+type: instrument-events
+model: claude-sonnet-4-6     # cheapest model that succeeds
+skills: [instrument-events]
+allowedTools: [Read, Edit, Grep, Glob, Bash]
+disallowedTools: [enqueue_task]
+dependsOn: [init]
+---
+
+## Goal
+Add at least one capture call.
+`;
+
+  it('parses frontmatter scalars and inline arrays', () => {
+    const p = parseAgentPrompt(sample, 'fallback');
+    expect(p.type).toBe('instrument-events');
+    expect(p.model).toBe('claude-sonnet-4-6');
+    expect(p.skills).toEqual(['instrument-events']);
+    expect(p.allowedTools).toEqual(['Read', 'Edit', 'Grep', 'Glob', 'Bash']);
+    expect(p.disallowedTools).toEqual(['enqueue_task']);
+    expect(p.dependsOn).toEqual(['init']);
+  });
+
+  it('strips inline comments and keeps the body', () => {
+    const p = parseAgentPrompt(sample, 'fallback');
+    expect(p.model).not.toContain('#');
+    expect(p.body).toContain('## Goal');
+    expect(p.body).not.toContain('---');
+  });
+
+  it('falls back to the menu id when type is omitted', () => {
+    const p = parseAgentPrompt('---\nmodel: x\n---\nbody', 'install');
+    expect(p.type).toBe('install');
+  });
+
+  it('parses the flow from frontmatter', () => {
+    const p = parseAgentPrompt('---\nflow: audit\n---\nx', 'fix-events');
+    expect(p.flow).toBe('audit');
+  });
+
+  it('marks the seed from frontmatter; everything else is a task', () => {
+    expect(parseAgentPrompt('---\nseed: true\n---\nplan', 'planner').seed).toBe(
+      true,
+    );
+    expect(parseAgentPrompt('---\nmodel: x\n---\nbody', 'install').seed).toBe(
+      false,
+    );
+  });
+
+  it('defaults missing array fields to empty and model to undefined', () => {
+    const p = parseAgentPrompt('no frontmatter at all', 'stub');
+    expect(p.model).toBeUndefined();
+    expect(p.skills).toEqual([]);
+    expect(p.dependsOn).toEqual([]);
+    expect(p.body).toBe('no frontmatter at all');
+  });
+});
+
+describe('agentRunTools', () => {
+  it('MCP-qualifies orchestrator tools and passes native tools through', () => {
+    const p = parseAgentPrompt(
+      '---\nallowedTools: [Read, read_handoffs]\ndisallowedTools: [enqueue_task, complete_task, Bash]\n---\nx',
+      't',
+    );
+    const { allowedTools, disallowedTools } = agentRunTools(p);
+    expect(allowedTools).toEqual([
+      'Read',
+      'mcp__posthog-wizard__read_handoffs',
+    ]);
+    expect(disallowedTools).toEqual([
+      'mcp__posthog-wizard__enqueue_task',
+      'mcp__posthog-wizard__complete_task',
+      'Bash',
+    ]);
+  });
+});
+
+describe('buildRegistry', () => {
+  const prompt = (over: Partial<AgentPrompt>): AgentPrompt => ({
+    type: 'x',
+    seed: false,
+    skills: [],
+    allowedTools: [],
+    disallowedTools: [],
+    dependsOn: [],
+    body: 'b',
+    ...over,
+  });
+
+  it('scopes to one flow and keeps the seed out of the task types', () => {
+    const registry = buildRegistry(
+      [
+        prompt({ type: 'plan-audit', flow: 'audit', seed: true }),
+        prompt({ type: 'fix-events', flow: 'audit' }),
+        prompt({ type: 'install', flow: 'posthog-integration' }),
+        prompt({ type: 'example' }),
+      ],
+      'audit',
+    );
+    expect(registry.types).toEqual(['fix-events']);
+    expect(registry.seed?.type).toBe('plan-audit');
+    expect(registry.get('install')).toBeUndefined();
+    // A flowless prompt (e.g. the documentation example) joins no registry.
+    expect(registry.get('example')).toBeUndefined();
+  });
+});
+
+describe('resolveTask', () => {
+  let dir: string;
+  let store: QueueStore;
+
+  beforeEach(() => {
+    dir = tmpDir();
+    store = new QueueStore(dir, 'run-1');
+  });
+
+  afterEach(() => {
+    fs.rmSync(dir, { recursive: true, force: true });
+  });
+
+  const prompt: AgentPrompt = {
+    type: 'capture',
+    seed: false,
+    model: 'claude-haiku-4-5-20251001',
+    skills: ['instrument-events'],
+    allowedTools: ['Read', 'Edit'],
+    disallowedTools: ['enqueue_task'],
+    dependsOn: ['plan-capture'],
+    body: '## Goal\nInstrument the planned events.',
+  };
+
+  it('throws when no prompt is registered for the type', () => {
+    const registry = registryOf([]);
+    const task = { type: 'capture', dependsOn: [] } as never;
+    expect(() => resolveTask(registry, task, store)).toThrow(/capture/);
+  });
+
+  it('resolves model, tools, and skills from the prompt', () => {
+    const registry = registryOf([prompt]);
+    const task = store.enqueue({ type: 'capture' });
+    const resolved = resolveTask(registry, task, store);
+    expect(resolved.model).toBe('claude-haiku-4-5-20251001');
+    expect(resolved.skills).toEqual(['instrument-events']);
+    expect(resolved.disallowedTools).toEqual([
+      'mcp__posthog-wizard__enqueue_task',
+    ]);
+  });
+
+  it('prefers the enqueue model override over the prompt model', () => {
+    const registry = registryOf([prompt]);
+    const task = store.enqueue({ type: 'capture', model: 'override-x' });
+    expect(resolveTask(registry, task, store).model).toBe('override-x');
+  });
+
+  it("appends upstream dependencies' handoffs as context", () => {
+    const registry = registryOf([prompt]);
+    const dep = store.enqueue({ type: 'plan-capture' });
+    store.complete(dep.id, {
+      goals: 'decide events',
+      did: 'picked signup and purchase',
+      forNextAgent: 'instrument those two',
+    });
+    const task = store.enqueue({
+      type: 'capture',
+      dependsOn: [dep.id],
+    });
+    const resolved = resolveTask(registry, task, store);
+    expect(resolved.prompt).toContain('Context from previous steps');
+    expect(resolved.prompt).toContain('picked signup and purchase');
+    expect(resolved.prompt).toContain('instrument those two');
+  });
+
+  it('omits the context section when there are no handoffs', () => {
+    const registry = registryOf([prompt]);
+    const task = store.enqueue({ type: 'capture' });
+    expect(resolveTask(registry, task, store).prompt).not.toContain(
+      'Context from previous steps',
+    );
+  });
+});