Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 23 additions & 8 deletions src/lib/agent/agent-interface.ts
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,12 @@ export type AgentConfig = {
getPendingQuestion?: () =>
| import('@lib/wizard-session').PendingQuestion
| null;
/**
* Orchestrator queue context. Present only when the `wizard-orchestrator`
* flag routes the run here; threaded into wizard-tools so the orchestrator
* tools register.
*/
orchestrator?: import('@lib/programs/orchestrator/queue-tools').OrchestratorToolsContext;
};

/**
Expand All @@ -286,6 +292,7 @@ export type StopHookResult =
export function createStopHook(
featureQueue: readonly AdditionalFeature[],
signals?: AgentOutputSignals,
requestRemark = true,
): (input: { stop_hook_active: boolean }) => StopHookResult {
let featureIndex = 0;
let remarkRequested = false;
Expand Down Expand Up @@ -313,8 +320,9 @@ export function createStopHook(
return { decision: 'block', reason: prompt };
}

// Phase 2: collect remark (once)
if (!remarkRequested) {
// Phase 2: collect remark (once). Skipped when the caller opts out — the
// orchestrator suppresses it per task so it does not fire on every agent.
if (requestRemark && !remarkRequested) {
remarkRequested = true;
logToFile('Stop hook: requesting reflection');
return {
Expand Down Expand Up @@ -655,8 +663,6 @@ export async function initializeAgent(
logToFile('Agent initialization starting');
logToFile('Install directory:', options.installDir);

getUI().log.step('Initializing Claude agent...');

try {
// Configure LLM gateway environment variables (inherited by SDK subprocess)
const gatewayUrl = getLlmGatewayUrlFromHost(config.posthogApiHost);
Expand Down Expand Up @@ -708,6 +714,7 @@ export async function initializeAgent(
skillsBaseUrl: config.skillsBaseUrl,
askBridge: config.askBridge,
askMaxQuestions: config.askMaxQuestions,
orchestrator: config.orchestrator,
});
mcpServers['wizard-tools'] = wizardToolsServer;

Expand Down Expand Up @@ -747,8 +754,6 @@ export async function initializeAgent(
});
}

getUI().log.step(`Verbose logs: ${getLogFilePath()}`);
getUI().log.success("Agent initialized. Let's get cooking!");
return agentRunConfig;
} catch (error) {
getUI().log.error(
Expand Down Expand Up @@ -794,6 +799,8 @@ export async function runAgent(
errorMessage?: string;
additionalFeatureQueue?: readonly AdditionalFeature[];
abortCases?: readonly AbortCaseMatcher[];
/** Request the end-of-run reflection remark. Defaults to true. */
requestRemark?: boolean;
},
middleware?: {
onMessage(message: any): void;
Expand Down Expand Up @@ -1052,7 +1059,11 @@ export async function runAgent(
Stop: [
{
hooks: [
createStopHook(config?.additionalFeatureQueue ?? [], signals),
createStopHook(
config?.additionalFeatureQueue ?? [],
signals,
config?.requestRemark ?? true,
),
],
timeout: 30,
},
Expand Down Expand Up @@ -1100,6 +1111,7 @@ export async function runAgent(
signals,
receivedSuccessResult,
tasks,
isOrchestratorEnabled(agentConfig.wizardFlags ?? {}),
);

// [ABORT] detection: the skill emits "[ABORT] <reason>" when it
Expand Down Expand Up @@ -1433,6 +1445,9 @@ function handleSDKMessage(
signals: AgentOutputSignals,
receivedSuccessResult = false,
tasks?: Map<string, TaskEntry>,
// The orchestrator owns the TUI task panel (it renders its queue). Suppress the
// agent's own TaskCreate/TaskUpdate rendering so it does not clobber the queue.
suppressTaskRender = false,
): void {
// Map preserves insertion order (the order the agent created the tasks).
// Within that, group by status: completed first, then in_progress, then
Expand All @@ -1444,7 +1459,7 @@ function handleSDKMessage(
};
const rank = (status: string): number => STATUS_RANK[status] ?? 2;
const syncTasks = (): void => {
if (!tasks) return;
if (!tasks || suppressTaskRender) return;
const sorted = Array.from(tasks.values()).sort(
(a, b) => rank(a.status) - rank(b.status),
);
Expand Down
17 changes: 16 additions & 1 deletion src/lib/agent/agent-runner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,12 @@ import {
AgentErrorType,
AgentSignals,
buildWizardMetadata,
isOrchestratorEnabled,
checkAllSettingsConflicts,
backupAndFixClaudeSettings,
restoreClaudeSettings,
} from './agent-interface';
import { runOrchestrator } from '../programs/orchestrator/orchestrator-runner';
import { getCloudUrlFromRegion } from '@utils/urls';
import {
evaluateWizardReadiness,
Expand All @@ -43,7 +45,12 @@ import {
getBlockingServiceKeys,
SERVICE_LABELS,
} from '@lib/health-checks/readiness';
import { enableDebugLogs, initLogFile, logToFile } from '@utils/debug';
import {
enableDebugLogs,
getLogFilePath,
initLogFile,
logToFile,
} from '@utils/debug';
import { createBenchmarkPipeline } from '@lib/middleware/benchmark';
import { wizardAbort, WizardError, registerCleanup } from '@utils/wizard-abort';
import { formatScanReport, writeScanReport } from '@lib/yara-hooks';
Expand Down Expand Up @@ -200,6 +207,11 @@ export async function runProgram(
): Promise<void> {
const boot = await bootstrapProgram(session, config, programConfig);

if (isOrchestratorEnabled(boot.wizardFlags)) {
getUI().log.info('Task-queue orchestrator enabled.');
return runOrchestrator(session, programConfig, boot);
}

return runLinearProgram(session, config, programConfig, boot);
}

Expand Down Expand Up @@ -412,6 +424,7 @@ async function runLinearProgram(
showQuestion: (q) => getUI().requestQuestion(q),
});

getUI().log.step('Initializing Claude agent...');
const agent = await initializeAgent(
{
workingDirectory: session.installDir,
Expand All @@ -433,6 +446,8 @@ async function runLinearProgram(
},
sessionToOptions(session),
);
getUI().log.step(`Verbose logs: ${getLogFilePath()}`);
getUI().log.success("Agent initialized. Let's get cooking!");

const middleware = session.benchmark
? createBenchmarkPipeline(spinner, sessionToOptions(session))
Expand Down
205 changes: 205 additions & 0 deletions src/lib/programs/orchestrator/__tests__/agent-prompt-loader.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
import * as fs from 'fs';
import * as os from 'os';
import * as path from 'path';
import {
agentRunTools,
buildRegistry,
parseAgentPrompt,
resolveTask,
type AgentPrompt,
type AgentRegistry,
} from '../agent-prompt-loader';
import { QueueStore } from '../queue';

function tmpDir(): string {
return fs.mkdtempSync(path.join(os.tmpdir(), 'agent-loader-test-'));
}

function registryOf(prompts: AgentPrompt[]): AgentRegistry {
return buildRegistry(
prompts.map((p) => ({ ...p, flow: 'test-flow' })),
'test-flow',
);
}

describe('parseAgentPrompt', () => {
const sample = `---
type: instrument-events
model: claude-sonnet-4-6 # cheapest model that succeeds
skills: [instrument-events]
allowedTools: [Read, Edit, Grep, Glob, Bash]
disallowedTools: [enqueue_task]
dependsOn: [init]
---

## Goal
Add at least one capture call.
`;

it('parses frontmatter scalars and inline arrays', () => {
const p = parseAgentPrompt(sample, 'fallback');
expect(p.type).toBe('instrument-events');
expect(p.model).toBe('claude-sonnet-4-6');
expect(p.skills).toEqual(['instrument-events']);
expect(p.allowedTools).toEqual(['Read', 'Edit', 'Grep', 'Glob', 'Bash']);
expect(p.disallowedTools).toEqual(['enqueue_task']);
expect(p.dependsOn).toEqual(['init']);
});

it('strips inline comments and keeps the body', () => {
const p = parseAgentPrompt(sample, 'fallback');
expect(p.model).not.toContain('#');
expect(p.body).toContain('## Goal');
expect(p.body).not.toContain('---');
});

it('falls back to the menu id when type is omitted', () => {
const p = parseAgentPrompt('---\nmodel: x\n---\nbody', 'install');
expect(p.type).toBe('install');
});

it('parses the flow from frontmatter', () => {
const p = parseAgentPrompt('---\nflow: audit\n---\nx', 'fix-events');
expect(p.flow).toBe('audit');
});

it('marks the seed from frontmatter; everything else is a task', () => {
expect(parseAgentPrompt('---\nseed: true\n---\nplan', 'planner').seed).toBe(
true,
);
expect(parseAgentPrompt('---\nmodel: x\n---\nbody', 'install').seed).toBe(
false,
);
});

it('defaults missing array fields to empty and model to undefined', () => {
const p = parseAgentPrompt('no frontmatter at all', 'stub');
expect(p.model).toBeUndefined();
expect(p.skills).toEqual([]);
expect(p.dependsOn).toEqual([]);
expect(p.body).toBe('no frontmatter at all');
});
});

describe('agentRunTools', () => {
it('MCP-qualifies orchestrator tools and passes native tools through', () => {
const p = parseAgentPrompt(
'---\nallowedTools: [Read, read_handoffs]\ndisallowedTools: [enqueue_task, complete_task, Bash]\n---\nx',
't',
);
const { allowedTools, disallowedTools } = agentRunTools(p);
expect(allowedTools).toEqual([
'Read',
'mcp__posthog-wizard__read_handoffs',
]);
expect(disallowedTools).toEqual([
'mcp__posthog-wizard__enqueue_task',
'mcp__posthog-wizard__complete_task',
'Bash',
]);
});
});

describe('buildRegistry', () => {
const prompt = (over: Partial<AgentPrompt>): AgentPrompt => ({
type: 'x',
seed: false,
skills: [],
allowedTools: [],
disallowedTools: [],
dependsOn: [],
body: 'b',
...over,
});

it('scopes to one flow and keeps the seed out of the task types', () => {
const registry = buildRegistry(
[
prompt({ type: 'plan-audit', flow: 'audit', seed: true }),
prompt({ type: 'fix-events', flow: 'audit' }),
prompt({ type: 'install', flow: 'posthog-integration' }),
prompt({ type: 'example' }),
],
'audit',
);
expect(registry.types).toEqual(['fix-events']);
expect(registry.seed?.type).toBe('plan-audit');
expect(registry.get('install')).toBeUndefined();
// A flowless prompt (e.g. the documentation example) joins no registry.
expect(registry.get('example')).toBeUndefined();
});
});

describe('resolveTask', () => {
let dir: string;
let store: QueueStore;

beforeEach(() => {
dir = tmpDir();
store = new QueueStore(dir, 'run-1');
});

afterEach(() => {
fs.rmSync(dir, { recursive: true, force: true });
});

const prompt: AgentPrompt = {
type: 'capture',
seed: false,
model: 'claude-haiku-4-5-20251001',
skills: ['instrument-events'],
allowedTools: ['Read', 'Edit'],
disallowedTools: ['enqueue_task'],
dependsOn: ['plan-capture'],
body: '## Goal\nInstrument the planned events.',
};

it('throws when no prompt is registered for the type', () => {
const registry = registryOf([]);
const task = { type: 'capture', dependsOn: [] } as never;
expect(() => resolveTask(registry, task, store)).toThrow(/capture/);
});

it('resolves model, tools, and skills from the prompt', () => {
const registry = registryOf([prompt]);
const task = store.enqueue({ type: 'capture' });
const resolved = resolveTask(registry, task, store);
expect(resolved.model).toBe('claude-haiku-4-5-20251001');
expect(resolved.skills).toEqual(['instrument-events']);
expect(resolved.disallowedTools).toEqual([
'mcp__posthog-wizard__enqueue_task',
]);
});

it('prefers the enqueue model override over the prompt model', () => {
const registry = registryOf([prompt]);
const task = store.enqueue({ type: 'capture', model: 'override-x' });
expect(resolveTask(registry, task, store).model).toBe('override-x');
});

it("appends upstream dependencies' handoffs as context", () => {
const registry = registryOf([prompt]);
const dep = store.enqueue({ type: 'plan-capture' });
store.complete(dep.id, {
goals: 'decide events',
did: 'picked signup and purchase',
forNextAgent: 'instrument those two',
});
const task = store.enqueue({
type: 'capture',
dependsOn: [dep.id],
});
const resolved = resolveTask(registry, task, store);
expect(resolved.prompt).toContain('Context from previous steps');
expect(resolved.prompt).toContain('picked signup and purchase');
expect(resolved.prompt).toContain('instrument those two');
});

it('omits the context section when there are no handoffs', () => {
const registry = registryOf([prompt]);
const task = store.enqueue({ type: 'capture' });
expect(resolveTask(registry, task, store).prompt).not.toContain(
'Context from previous steps',
);
});
});
Loading
Loading