diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 18dfaa291..f3aeb9c3f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -161,9 +161,22 @@ jobs: with: node-version: 22 cache: npm + - name: Install uv + uses: astral-sh/setup-uv@v8.0.0 + with: + python-version: '3.12' - run: npm ci + - working-directory: cockpit/langgraph/streaming/python + run: uv sync - run: npx playwright install --with-deps chromium - run: npx nx e2e cockpit --skip-nx-cache + - name: Upload Playwright trace on failure + if: failure() + uses: actions/upload-artifact@v4 + with: + name: cockpit-e2e-trace + path: apps/cockpit/e2e/test-results/ + retention-days: 7 website-e2e: name: Website — e2e diff --git a/apps/cockpit/e2e/.gitignore b/apps/cockpit/e2e/.gitignore new file mode 100644 index 000000000..059a55910 --- /dev/null +++ b/apps/cockpit/e2e/.gitignore @@ -0,0 +1,3 @@ +test-results/ +playwright-report/ +*.tmp diff --git a/apps/cockpit/e2e/README.md b/apps/cockpit/e2e/README.md new file mode 100644 index 000000000..708953da5 --- /dev/null +++ b/apps/cockpit/e2e/README.md @@ -0,0 +1,33 @@ +# cockpit e2e + +Cross-stack E2E harness for cockpit example apps. Uses [`@copilotkit/aimock`](https://github.com/CopilotKit/aimock) as a deterministic mock for LLM API calls; the per-product Python LangGraph dev server is launched with `OPENAI_BASE_URL` pointed at it; Playwright drives the example Angular app in real Chromium. + +Phase 1 covers `c-messages` only. Future phases each add one example (one fixture + one spec file per PR). + +## Run the suite + +``` +npx nx e2e cockpit +``` + +Replay-only. No `OPENAI_API_KEY` needed. Reads committed fixtures from `fixtures/`. + +## Refresh a fixture + +Each captured fixture has a recipe script under `scripts/`. Example for the c-messages fixture: + +``` +OPENAI_API_KEY=sk-... uv run --project cockpit/langgraph/streaming/python \ + python apps/cockpit/e2e/scripts/record-c-messages.py +``` + +Commit the updated `fixtures/c-messages.json`. Scripts are dev-only; CI never runs them. + +## Layout + +- `aimock-runner.ts` — programmatic boot of the mock server (mirrors `examples/chat/aimock-e2e/aimock-runner.ts`). +- `test-helpers.ts` — `sendPromptAndWait` helper that waits on `chat-message[data-streaming="false"]`. +- `fixtures/` — committed JSON fixtures keyed by example. +- `scripts/` — fixture-capture recipes (one per fixture). +- `playwright.config.ts` — Playwright config with globalSetup that boots aimock + LangGraph + Angular dev server. +- `c-messages.spec.ts` — Phase 1 pilot. diff --git a/apps/cockpit/e2e/aimock-runner.spec.ts b/apps/cockpit/e2e/aimock-runner.spec.ts new file mode 100644 index 000000000..7c096476d --- /dev/null +++ b/apps/cockpit/e2e/aimock-runner.spec.ts @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: MIT +import { describe, it, expect, afterEach } from 'vitest'; +import { writeFileSync, mkdtempSync, rmSync } from 'node:fs'; +import { join } from 'node:path'; +import { tmpdir } from 'node:os'; +import { startAimock, type AimockHandle } from './aimock-runner'; + +describe('startAimock', () => { + let handle: AimockHandle | null = null; + let workDir = ''; + + afterEach(async () => { + if (handle) await handle.stop(); + handle = null; + if (workDir) rmSync(workDir, { recursive: true, force: true }); + workDir = ''; + }); + + it('boots a replay server backed by a fixture file', async () => { + workDir = mkdtempSync(join(tmpdir(), 'aimock-test-')); + const fixturePath = join(workDir, 'hi.json'); + writeFileSync( + fixturePath, + JSON.stringify({ + fixtures: [ + { match: { userMessage: 'say hi briefly' }, response: { content: 'Hi!' } }, + ], + }), + ); + + handle = await startAimock({ mode: 'replay', fixturePath }); + expect(handle.port).toBeGreaterThan(0); + expect(handle.baseUrl).toMatch(/^http:\/\/.+\/v1$/); + + // The OpenAI SDK call path is exercised in Task 0's de-risk; this + // unit test stops at "the harness started cleanly and exposes the + // documented shape." + }); + + it('stop() is idempotent', async () => { + workDir = mkdtempSync(join(tmpdir(), 'aimock-test-')); + const fixturePath = join(workDir, 'hi.json'); + writeFileSync(fixturePath, JSON.stringify({ fixtures: [] })); + handle = await startAimock({ mode: 'replay', fixturePath }); + await handle.stop(); + await handle.stop(); + expect(true).toBe(true); + }); + + it('loads and merges all .json files in a directory', async () => { + workDir = mkdtempSync(join(tmpdir(), 'aimock-test-')); + writeFileSync( + join(workDir, 'a.json'), + JSON.stringify({ + fixtures: [{ match: { userMessage: 'one' }, response: { content: 'A' } }], + }), + ); + writeFileSync( + join(workDir, 'b.json'), + JSON.stringify({ + fixtures: [{ match: { userMessage: 'two' }, response: { content: 'B' } }], + }), + ); + // Non-JSON file in the dir should be ignored. + writeFileSync(join(workDir, 'README.md'), '# not a fixture'); + + handle = await startAimock({ mode: 'replay', fixturePath: workDir }); + expect(handle.port).toBeGreaterThan(0); + expect(handle.baseUrl).toMatch(/^http:\/\/.+\/v1$/); + }); +}); diff --git a/apps/cockpit/e2e/aimock-runner.ts b/apps/cockpit/e2e/aimock-runner.ts new file mode 100644 index 000000000..5392cb777 --- /dev/null +++ b/apps/cockpit/e2e/aimock-runner.ts @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: MIT +import { LLMock } from '@copilotkit/aimock'; +import { readFileSync, readdirSync, statSync } from 'node:fs'; +import { join } from 'node:path'; + +export interface AimockHandle { + /** Port the mock server is listening on. */ + readonly port: number; + /** Full base URL the OpenAI SDK should target (includes /v1 suffix). */ + readonly baseUrl: string; + /** Tear down the server. Safe to call multiple times. */ + stop(): Promise; +} + +export interface AimockStartOptions { + mode: 'replay'; + /** Path to a single fixture file OR a directory of fixture files. */ + fixturePath: string; +} + +// Raw JSON entry shape passes through to aimock's FixtureFileEntry — the +// `match` block can carry richer discriminators (toolName, hasToolResult, +// turnIndex, etc.) that are needed to distinguish a parent LLM's first call +// from its continuation after a tool round. We don't narrow the shape here; +// aimock's `addFixturesFromJSON` validates structure at load time. +type FixtureFileEntry = Record; + +function loadFixtureEntries(fixturePath: string): FixtureFileEntry[] { + const stats = statSync(fixturePath); + const out: FixtureFileEntry[] = []; + const readFile = (full: string): void => { + const raw = readFileSync(full, 'utf-8'); + const parsed = JSON.parse(raw) as { fixtures: FixtureFileEntry[] }; + for (const fx of parsed.fixtures) out.push(fx); + }; + if (stats.isDirectory()) { + const files = readdirSync(fixturePath) + .filter((f) => f.endsWith('.json')) + .sort(); + for (const file of files) readFile(join(fixturePath, file)); + return out; + } + readFile(fixturePath); + return out; +} + +export async function startAimock(opts: AimockStartOptions): Promise { + const entries = loadFixtureEntries(opts.fixturePath); + + // Use a large chunkSize so each response arrives in 1-2 SSE deltas. This + // intentionally turns off the partial-markdown streaming path for harness + // tests: structural assertions (code fence, list) measure the FINAL rendered + // DOM, not the progressive render. With aggressive default chunking, the + // partial-markdown parser sometimes can't recover a triple-backtick fence + // that gets split mid-token, and the final state ends up as inline + // instead of
. Streaming-progressive behavior is covered by the
+  // Phase 1 unit-variance tables; the e2e harness is for final-state
+  // invariants and cross-stack integration.
+  const mock = new LLMock({ port: 0, chunkSize: 4096 });
+  if (entries.length > 0) {
+    mock.addFixturesFromJSON(entries as never);
+  }
+  await mock.start();
+
+  const port = mock.port;
+  const baseUrl = `${mock.url}/v1`;
+  let stopped = false;
+
+  return {
+    port,
+    baseUrl,
+    async stop() {
+      if (stopped) return;
+      stopped = true;
+      await mock.stop();
+    },
+  };
+}
diff --git a/apps/cockpit/e2e/all-examples-smoke.spec.ts b/apps/cockpit/e2e/all-examples-smoke.spec.ts
deleted file mode 100644
index 823e36638..000000000
--- a/apps/cockpit/e2e/all-examples-smoke.spec.ts
+++ /dev/null
@@ -1,66 +0,0 @@
-import { expect, test } from '@playwright/test';
-
-/**
- * Smoke test that verifies every capability example's Angular app is running
- * and can render the chat interface. Requires all 15 Angular apps to be served.
- *
- * Run with: npx playwright test apps/cockpit/e2e/all-examples-smoke.spec.ts
- *
- * Prerequisites:
- *   npx tsx apps/cockpit/scripts/serve-example.ts --all
- *   OR: nx run cockpit:serve-all
- */
-
-const EXAMPLES = [
-  { name: 'streaming', port: 4300, selector: 'app-streaming' },
-  { name: 'persistence', port: 4301, selector: 'app-persistence' },
-  { name: 'interrupts', port: 4302, selector: 'app-interrupts' },
-  { name: 'memory', port: 4303, selector: 'app-memory' },
-  { name: 'durable-execution', port: 4304, selector: 'app-durable-execution' },
-  { name: 'subgraphs', port: 4305, selector: 'app-subgraphs' },
-  { name: 'time-travel', port: 4306, selector: 'app-time-travel' },
-  { name: 'deployment-runtime', port: 4307, selector: 'app-deployment-runtime' },
-  { name: 'planning', port: 4310, selector: 'app-planning' },
-  { name: 'filesystem', port: 4311, selector: 'app-filesystem' },
-  { name: 'da-subagents', port: 4312, selector: 'app-subagents' },
-  { name: 'da-memory', port: 4313, selector: 'app-da-memory' },
-  { name: 'skills', port: 4314, selector: 'app-skills' },
-  { name: 'sandboxes', port: 4315, selector: 'app-sandboxes' },
-  { name: 'c-a2ui', port: 4511, selector: 'app-a2ui' },
-] as const;
-
-test.describe('All Examples Smoke Test', () => {
-  for (const example of EXAMPLES) {
-    test(`${example.name} (port ${example.port}) renders chat UI`, async ({ page }) => {
-      await page.goto(`http://localhost:${example.port}`, { timeout: 15000 });
-      await page.waitForSelector(example.selector, { state: 'attached', timeout: 10000 });
-
-      // Verify the chat component renders
-      await expect(page.locator('chat')).toBeVisible({ timeout: 5000 });
-
-      // Verify input and send button exist
-      await expect(page.locator('textarea[name="messageText"]')).toBeVisible({ timeout: 5000 });
-      await expect(page.locator('button[type="submit"]')).toBeVisible({ timeout: 5000 });
-    });
-  }
-});
-
-test.describe('All Examples Send Message Test', () => {
-  // This test requires a running LangGraph backend with OPENAI_API_KEY
-  test.skip(({ }, testInfo) => !process.env['OPENAI_API_KEY'], 'Requires OPENAI_API_KEY');
-
-  for (const example of EXAMPLES) {
-    test(`${example.name} (port ${example.port}) sends and receives a message`, async ({ page }) => {
-      await page.goto(`http://localhost:${example.port}`, { timeout: 15000 });
-      await page.waitForSelector(example.selector, { state: 'attached', timeout: 10000 });
-
-      // Type and send a message
-      await page.fill('textarea[name="messageText"]', 'hello');
-      await page.click('button[type="submit"]');
-
-      // Wait for AI response
-      await expect(page.locator('.chat-md').first()).toBeVisible({ timeout: 30000 });
-      await expect(page.locator('.chat-md').first()).not.toBeEmpty({ timeout: 30000 });
-    });
-  }
-});
diff --git a/apps/cockpit/e2e/cockpit.spec.ts b/apps/cockpit/e2e/cockpit.spec.ts
deleted file mode 100644
index b28ed605e..000000000
--- a/apps/cockpit/e2e/cockpit.spec.ts
+++ /dev/null
@@ -1,44 +0,0 @@
-import { expect, test } from '@playwright/test';
-
-test('renders navigation and shell on the home page', async ({ page }) => {
-  await page.goto('/');
-  await expect(page.getByRole('main', { name: 'Cockpit shell' })).toHaveAttribute(
-    'data-hydrated',
-    'true'
-  );
-
-  await expect(page.getByRole('navigation', { name: 'Cockpit navigation' })).toBeVisible();
-  await expect(page.getByRole('button', { name: 'Run', exact: true })).toBeVisible();
-  await expect(page.getByRole('button', { name: 'Code', exact: true })).toBeVisible();
-  await expect(page.getByRole('button', { name: 'Docs', exact: true })).toBeVisible();
-});
-
-test('navigates from the sidebar to a capability route', async ({ page }) => {
-  await page.goto('/');
-  await expect(page.getByRole('main', { name: 'Cockpit shell' })).toHaveAttribute(
-    'data-hydrated',
-    'true'
-  );
-
-  // Sidebar strips "LangGraph " prefix, so the link text is just "Persistence"
-  await page.getByRole('link', { name: 'Persistence', exact: true }).click();
-
-  await expect(page).toHaveURL(/\/langgraph\/core-capabilities\/persistence\/overview\/python$/);
-  await expect(page.getByRole('main', { name: 'Cockpit shell' })).toHaveAttribute(
-    'data-hydrated',
-    'true'
-  );
-
-  // Mode switcher should still be present
-  await expect(page.getByRole('button', { name: 'Code', exact: true })).toBeVisible();
-});
-
-test('falls back to the product overview when a missing typescript route is requested', async ({ page }) => {
-  await page.goto('/langgraph/core-capabilities/streaming/overview/typescript');
-
-  await expect(page).toHaveURL(/\/langgraph\/getting-started\/overview\/overview\/python$/);
-  await expect(page.getByRole('main', { name: 'Cockpit shell' })).toHaveAttribute(
-    'data-hydrated',
-    'true'
-  );
-});
diff --git a/apps/cockpit/e2e/dark-mode.spec.ts b/apps/cockpit/e2e/dark-mode.spec.ts
deleted file mode 100644
index b60b7df1e..000000000
--- a/apps/cockpit/e2e/dark-mode.spec.ts
+++ /dev/null
@@ -1,49 +0,0 @@
-import { expect, test } from '@playwright/test';
-
-const COOKIE_URL = 'http://127.0.0.1:4201';
-
-test.describe('dark mode', () => {
-  test('defaults to dark when no cookie is set', async ({ page, context }) => {
-    await context.clearCookies();
-    await page.goto('/');
-    await expect(page.locator('html')).toHaveAttribute('data-theme', 'dark');
-    const canvas = await page
-      .locator('html')
-      .evaluate((el) => getComputedStyle(el).getPropertyValue('--ds-canvas').trim());
-    expect(canvas).toBe('rgb(17, 17, 17)');
-  });
-
-  test('honors theme=light cookie on server render', async ({ page, context }) => {
-    await context.addCookies([
-      { name: 'theme', value: 'light', url: COOKIE_URL },
-    ]);
-    await page.goto('/');
-    await expect(page.locator('html')).toHaveAttribute('data-theme', 'light');
-    const canvas = await page
-      .locator('html')
-      .evaluate((el) => getComputedStyle(el).getPropertyValue('--ds-canvas').trim());
-    expect(canvas).toBe('rgb(255, 255, 255)');
-  });
-
-  test('toggle flips data-theme optimistically and persists across reload', async ({
-    page,
-    context,
-  }) => {
-    await context.clearCookies();
-    await page.goto('/');
-    await expect(page.locator('html')).toHaveAttribute('data-theme', 'dark');
-
-    // Wait for the POST that persists the cookie so the reload below sees it.
-    const themePost = page.waitForResponse(
-      (resp) => resp.url().endsWith('/api/theme') && resp.request().method() === 'POST',
-    );
-    await page.getByRole('button', { name: /switch to light/i }).click();
-    // Optimistic: data-theme flips synchronously
-    await expect(page.locator('html')).toHaveAttribute('data-theme', 'light');
-
-    // Persistence: wait for the cookie write, then reload and confirm
-    await themePost;
-    await page.reload();
-    await expect(page.locator('html')).toHaveAttribute('data-theme', 'light');
-  });
-});
diff --git a/apps/cockpit/e2e/fixtures/streaming.json b/apps/cockpit/e2e/fixtures/streaming.json
new file mode 100644
index 000000000..d54869ff9
--- /dev/null
+++ b/apps/cockpit/e2e/fixtures/streaming.json
@@ -0,0 +1,12 @@
+{
+  "fixtures": [
+    {
+      "match": {
+        "userMessage": "Tell me one quick fact about Angular signals in two sentences."
+      },
+      "response": {
+        "content": "Angular signals are a reactive primitive (signal, computed, effect) that track dependencies to provide fine-grained reactivity and more efficient change detection. They let you update state synchronously via set()/update() and ensure only consumers that read an affected signal are re\u2011evaluated."
+      }
+    }
+  ]
+}
diff --git a/apps/cockpit/e2e/global-setup.ts b/apps/cockpit/e2e/global-setup.ts
new file mode 100644
index 000000000..ac5c7a157
--- /dev/null
+++ b/apps/cockpit/e2e/global-setup.ts
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: MIT
+import { spawn, type ChildProcess } from 'node:child_process';
+import { setTimeout as delay } from 'node:timers/promises';
+import { resolve } from 'node:path';
+import { startAimock, type AimockHandle } from './aimock-runner';
+
+interface SharedState {
+  aimock: AimockHandle;
+  langgraph: ChildProcess;
+  angular: ChildProcess;
+}
+
+declare global {
+  // eslint-disable-next-line no-var
+  var __COCKPIT_AIMOCK_E2E_STATE__: SharedState | undefined;
+}
+
+const REPO_ROOT = resolve(__dirname, '../../..');
+const FIXTURE_PATH = process.env.AIMOCK_FIXTURE
+  ? resolve(__dirname, process.env.AIMOCK_FIXTURE)
+  : resolve(__dirname, 'fixtures');
+
+async function waitForPort(url: string, timeoutMs: number): Promise {
+  const start = Date.now();
+  while (Date.now() - start < timeoutMs) {
+    try {
+      const res = await fetch(url);
+      if (res.ok || res.status === 404) return;
+    } catch {
+      // server not up yet
+    }
+    await delay(500);
+  }
+  throw new Error(`Server at ${url} did not become ready within ${timeoutMs}ms`);
+}
+
+export default async function globalSetup(): Promise {
+  const aimock = await startAimock({ mode: 'replay', fixturePath: FIXTURE_PATH });
+  // eslint-disable-next-line no-console
+  console.log(`[cockpit] aimock listening at ${aimock.baseUrl}`);
+
+  const langgraph = spawn(
+    'uv',
+    ['run', 'langgraph', 'dev', '--port', '8123', '--no-browser'],
+    {
+      cwd: resolve(REPO_ROOT, 'cockpit/langgraph/streaming/python'),
+      env: {
+        ...process.env,
+        OPENAI_BASE_URL: aimock.baseUrl,
+        OPENAI_API_KEY: 'test-not-used',
+      },
+      stdio: 'pipe',
+    },
+  );
+  langgraph.stdout?.on('data', (b) => process.stdout.write(`[langgraph] ${b}`));
+  langgraph.stderr?.on('data', (b) => process.stderr.write(`[langgraph] ${b}`));
+
+  await waitForPort('http://localhost:8123/ok', 90_000);
+  // eslint-disable-next-line no-console
+  console.log('[cockpit] langgraph ready on :8123');
+
+  const angular = spawn(
+    'npx',
+    ['nx', 'serve', 'cockpit-langgraph-streaming-angular', '--port', '4300'],
+    {
+      cwd: REPO_ROOT,
+      env: { ...process.env },
+      stdio: 'pipe',
+    },
+  );
+  angular.stdout?.on('data', (b) => process.stdout.write(`[angular] ${b}`));
+  angular.stderr?.on('data', (b) => process.stderr.write(`[angular] ${b}`));
+
+  await waitForPort('http://localhost:4300/', 120_000);
+  // eslint-disable-next-line no-console
+  console.log('[cockpit] angular ready on :4300');
+
+  globalThis.__COCKPIT_AIMOCK_E2E_STATE__ = { aimock, langgraph, angular };
+}
diff --git a/apps/cockpit/e2e/global-teardown.ts b/apps/cockpit/e2e/global-teardown.ts
new file mode 100644
index 000000000..6bdbe43d1
--- /dev/null
+++ b/apps/cockpit/e2e/global-teardown.ts
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: MIT
+export default async function globalTeardown(): Promise {
+  const state = globalThis.__COCKPIT_AIMOCK_E2E_STATE__;
+  if (!state) return;
+  state.angular.kill('SIGTERM');
+  state.langgraph.kill('SIGTERM');
+  await state.aimock.stop();
+  globalThis.__COCKPIT_AIMOCK_E2E_STATE__ = undefined;
+}
diff --git a/apps/cockpit/e2e/playwright.config.ts b/apps/cockpit/e2e/playwright.config.ts
new file mode 100644
index 000000000..de3ffaa70
--- /dev/null
+++ b/apps/cockpit/e2e/playwright.config.ts
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: MIT
+import { defineConfig, devices } from '@playwright/test';
+
+export default defineConfig({
+  testDir: '.',
+  testMatch: '**/*.spec.ts',
+  testIgnore: ['aimock-runner.spec.ts'],
+  fullyParallel: false,
+  workers: 1,
+  retries: process.env.CI ? 2 : 0,
+  reporter: process.env.CI ? [['list'], ['html', { open: 'never' }]] : 'list',
+  use: {
+    baseURL: 'http://localhost:4300',
+    trace: 'retain-on-failure',
+  },
+  projects: [
+    {
+      name: 'chromium',
+      use: { ...devices['Desktop Chrome'] },
+    },
+  ],
+  globalSetup: './global-setup.ts',
+  globalTeardown: './global-teardown.ts',
+});
diff --git a/apps/cockpit/e2e/production-smoke.spec.ts b/apps/cockpit/e2e/production-smoke.spec.ts
deleted file mode 100644
index 4653b43dc..000000000
--- a/apps/cockpit/e2e/production-smoke.spec.ts
+++ /dev/null
@@ -1,119 +0,0 @@
-import { expect, test } from '@playwright/test';
-
-/**
- * Production smoke test — verifies the deployed stack works end-to-end.
- *
- * Requires:
- *   EXAMPLES_URL - e.g., https://examples.cacheplane.ai
- *   OPENAI_API_KEY - for send/receive tests (optional)
- *
- * Run:
- *   BASE_URL=https://cockpit.cacheplane.ai \
- *   EXAMPLES_URL=https://examples.cacheplane.ai \
- *   npx playwright test apps/cockpit/e2e/production-smoke.spec.ts
- */
-
-const COCKPIT_URL = process.env['BASE_URL'] ?? 'https://cockpit.cacheplane.ai';
-const EXAMPLES_URL = process.env['EXAMPLES_URL'] ?? 'https://examples.cacheplane.ai';
-
-const CAPABILITIES = [
-  'langgraph/streaming',
-  'langgraph/persistence',
-  'langgraph/interrupts',
-  'langgraph/memory',
-  'langgraph/durable-execution',
-  'langgraph/subgraphs',
-  'langgraph/time-travel',
-  'langgraph/deployment-runtime',
-  'deep-agents/planning',
-  'deep-agents/filesystem',
-  'deep-agents/subagents',
-  'deep-agents/memory',
-  'deep-agents/skills',
-  'deep-agents/sandboxes',
-  'chat/messages',
-  'chat/input',
-  'chat/interrupts',
-  'chat/tool-calls',
-  'chat/subagents',
-  'chat/threads',
-  'chat/timeline',
-  'chat/generative-ui',
-  'chat/debug',
-  'chat/theming',
-  'chat/a2ui',
-] as const;
-
-const RENDER_CAPABILITIES = [
-  'render/spec-rendering',
-  'render/element-rendering',
-  'render/state-management',
-  'render/registry',
-  'render/repeat-loops',
-  'render/computed-functions',
-] as const;
-
-const CHAT_PRIMITIVE_CAPABILITIES = new Set([
-  'chat/messages',
-  'chat/input',
-  'chat/debug',
-]);
-
-const A2UI_CAPABILITIES = new Set([
-  'chat/a2ui',
-]);
-
-test.describe('Production: Angular example apps load', () => {
-  for (const cap of CAPABILITIES) {
-    test(`${cap} loads at examples URL`, async ({ page }) => {
-      const url = `${EXAMPLES_URL}/${cap}/`;
-      const res = await page.goto(url, { timeout: 15000 });
-      expect(res?.status()).toBe(200);
-      if (CHAT_PRIMITIVE_CAPABILITIES.has(cap)) {
-        await expect(page.getByRole('search', { name: 'Message input' })).toBeVisible({ timeout: 10000 });
-        return;
-      }
-
-      await expect(page.locator('chat')).toBeVisible({ timeout: 10000 });
-    });
-  }
-});
-
-test.describe('Production: Render example apps load', () => {
-  for (const cap of RENDER_CAPABILITIES) {
-    test(`${cap} loads at examples URL`, async ({ page }) => {
-      const url = `${EXAMPLES_URL}/${cap}/`;
-      const res = await page.goto(url, { timeout: 15000 });
-      expect(res?.status()).toBe(200);
-    });
-  }
-});
-
-test.describe('Production: cockpit loads correctly', () => {
-  test('cockpit loads with sidebar navigation', async ({ page }) => {
-    await page.goto(COCKPIT_URL, { timeout: 15000 });
-    await expect(page.getByRole('navigation', { name: 'Cockpit navigation' })).toBeVisible();
-    const links = await page.locator('nav a').allTextContents();
-    const overviewLinks = links.filter((t) => t.toLowerCase().includes('overview'));
-    expect(overviewLinks).toHaveLength(0);
-  });
-});
-
-test.describe('Production: send/receive smoke', () => {
-  test.skip(() => !process.env['OPENAI_API_KEY'], 'Requires OPENAI_API_KEY');
-
-  for (const cap of ['langgraph/streaming', 'deep-agents/planning', 'chat/a2ui'] as const) {
-    test(`${cap} sends and receives a message`, async ({ page }) => {
-      await page.goto(`${EXAMPLES_URL}/${cap}/`, { timeout: 15000 });
-      await expect(page.locator('chat')).toBeVisible({ timeout: 10000 });
-      await page.fill('textarea[name="messageText"]', 'hello');
-      await page.click('button[type="submit"]');
-      if (A2UI_CAPABILITIES.has(cap)) {
-        await expect(page.getByRole('heading', { name: 'Contact Us' })).toBeVisible({ timeout: 30000 });
-        return;
-      }
-
-      await expect(page.locator('.chat-md').first()).toBeVisible({ timeout: 30000 });
-    });
-  }
-});
diff --git a/apps/cockpit/e2e/scripts/record-streaming.py b/apps/cockpit/e2e/scripts/record-streaming.py
new file mode 100644
index 000000000..3a9228085
--- /dev/null
+++ b/apps/cockpit/e2e/scripts/record-streaming.py
@@ -0,0 +1,58 @@
+"""Capture a real text response from the streaming graph's LLM.
+
+Mirrors cockpit/langgraph/streaming/python/src/graph.py's
+build_streaming_graph() setup: ChatOpenAI(gpt-5-mini, streaming=True)
++ system prompt from prompts/streaming.md.
+
+Run from repo root:
+  OPENAI_API_KEY=sk-... uv run --project cockpit/langgraph/streaming/python \
+    python apps/cockpit/e2e/scripts/record-streaming.py
+"""
+import json
+import os
+import sys
+from pathlib import Path
+
+env_path = Path("cockpit/langgraph/streaming/python/.env")
+if env_path.exists():
+    for line in env_path.read_text().splitlines():
+        line = line.strip()
+        if line and not line.startswith("#") and "=" in line:
+            k, _, v = line.partition("=")
+            os.environ.setdefault(k.strip(), v.strip().strip('"').strip("'"))
+
+if not os.environ.get("OPENAI_API_KEY"):
+    print("OPENAI_API_KEY not set (in env or .env)", file=sys.stderr)
+    sys.exit(1)
+
+from langchain_core.messages import HumanMessage, SystemMessage
+from langchain_openai import ChatOpenAI
+
+PROMPT = "Tell me one quick fact about Angular signals in two sentences."
+SYSTEM_PROMPT = (
+    Path("cockpit/langgraph/streaming/python/prompts/streaming.md").read_text()
+)
+
+llm = ChatOpenAI(model="gpt-5-mini", temperature=0)
+response = llm.invoke(
+    [SystemMessage(content=SYSTEM_PROMPT), HumanMessage(content=PROMPT)],
+)
+text = response.content if isinstance(response.content, str) else ""
+if not text.strip():
+    print("LLM returned empty content; cannot build fixture", file=sys.stderr)
+    sys.exit(2)
+print(f"captured {len(text)} chars; first 80: {text[:80]!r}")
+
+fixture = {
+    "fixtures": [
+        {
+            "match": {"userMessage": PROMPT},
+            "response": {"content": text},
+        }
+    ]
+}
+
+out_path = Path("apps/cockpit/e2e/fixtures/streaming.json")
+out_path.parent.mkdir(parents=True, exist_ok=True)
+out_path.write_text(json.dumps(fixture, indent=2) + "\n")
+print(f"\nWrote fixture to {out_path}")
diff --git a/apps/cockpit/e2e/streaming.spec.ts b/apps/cockpit/e2e/streaming.spec.ts
new file mode 100644
index 000000000..ca0074474
--- /dev/null
+++ b/apps/cockpit/e2e/streaming.spec.ts
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: MIT
+import { test, expect } from '@playwright/test';
+import { sendPromptAndWait } from './test-helpers';
+
+test('streaming: assistant text from the mocked LLM renders in the cockpit chat composition', async ({ page }) => {
+  const bubble = await sendPromptAndWait(
+    page,
+    'Tell me one quick fact about Angular signals in two sentences.',
+  );
+
+  // The captured fixture's content (Angular signals fact) must reach the
+  // rendered bubble. Proves: aimock served the streaming graph's LLM call,
+  // langgraph routed back the AI message, the cockpit-langgraph-streaming-angular
+  // app rendered it via the chat composition, and the streaming-finalized
+  // signal (data-streaming="false") settled.
+  const finalText = await bubble.innerText();
+  expect(finalText.toLowerCase()).toContain('signal');
+});
diff --git a/apps/cockpit/e2e/test-helpers.ts b/apps/cockpit/e2e/test-helpers.ts
new file mode 100644
index 000000000..0bbe9a252
--- /dev/null
+++ b/apps/cockpit/e2e/test-helpers.ts
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: MIT
+import { expect, type Locator, type Page } from '@playwright/test';
+
+/**
+ * Send a user prompt and wait for the assistant bubble to finalize.
+ *
+ * "Finalized" means `chat-message[data-role="assistant"][data-streaming="false"]`:
+ * the chat composition wires `[streaming]` to `agent.isLoading() && i === lastIndex`
+ * on the latest assistant ``, so the attribute flips to `"false"`
+ * once the agent stops loading and the markdown render has settled.
+ *
+ * Asserting on intermediate streaming-state DOM (partial `
    `, in-flight + * code fences, etc.) is the source of e2e flake — always wait on this + * attribute before counting or text-matching downstream of the assistant turn. + */ +export async function sendPromptAndWait(page: Page, prompt: string): Promise { + await page.goto('/'); + const input = page.getByRole('textbox', { name: /message|prompt/i }); + await input.fill(prompt); + await page.getByRole('button', { name: /send/i }).click(); + + const finalizedAssistant = page + .locator('chat-message[data-role="assistant"][data-streaming="false"]') + .last(); + await expect(finalizedAssistant).toBeAttached({ timeout: 45_000 }); + await expect + .poll(async () => ((await finalizedAssistant.innerText()) ?? '').trim().length, { + timeout: 30_000, + }) + .toBeGreaterThan(0); + return finalizedAssistant; +} diff --git a/apps/cockpit/e2e/tsconfig.json b/apps/cockpit/e2e/tsconfig.json new file mode 100644 index 000000000..234dd6a8b --- /dev/null +++ b/apps/cockpit/e2e/tsconfig.json @@ -0,0 +1,15 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "ES2022", + "moduleResolution": "Bundler", + "esModuleInterop": true, + "strict": true, + "skipLibCheck": true, + "allowImportingTsExtensions": false, + "noEmit": true, + "types": ["node"] + }, + "include": ["**/*.ts"], + "exclude": ["node_modules", "test-results", "playwright-report"] +} diff --git a/apps/cockpit/playwright.config.ts b/apps/cockpit/playwright.config.ts deleted file mode 100644 index 5f1f45101..000000000 --- a/apps/cockpit/playwright.config.ts +++ /dev/null @@ -1,30 +0,0 @@ -import { defineConfig, devices } from '@playwright/test'; - -const baseURL = process.env['BASE_URL'] ?? 'http://127.0.0.1:4201'; -const shouldStartLocalServer = !process.env['BASE_URL']; - -export default defineConfig({ - testDir: './e2e', - testIgnore: ['**/all-examples-smoke*', '**/production-smoke*'], - fullyParallel: true, - use: { - baseURL, - }, - // Declare chromium as the only browser project. Without this, Playwright - // validates ALL default browsers (chromium + webkit + firefox) on test - // start and prints a long "missing system dependencies" warning for the - // browsers we don't run, even though tests pass cleanly on chromium. - projects: [ - { - name: 'chromium', - use: { ...devices['Desktop Chrome'] }, - }, - ], - webServer: shouldStartLocalServer - ? { - command: 'npx next dev . --port 4201', - url: 'http://127.0.0.1:4201', - reuseExistingServer: false, - } - : undefined, -}); diff --git a/apps/cockpit/project.json b/apps/cockpit/project.json index 3830f04ea..66343afef 100644 --- a/apps/cockpit/project.json +++ b/apps/cockpit/project.json @@ -49,7 +49,7 @@ "e2e": { "executor": "@nx/playwright:playwright", "options": { - "config": "apps/cockpit/playwright.config.ts" + "config": "apps/cockpit/e2e/playwright.config.ts" } }, "serve-streaming": { diff --git a/docs/superpowers/plans/2026-05-15-cockpit-aimock-e2e.md b/docs/superpowers/plans/2026-05-15-cockpit-aimock-e2e.md new file mode 100644 index 000000000..543fda61e --- /dev/null +++ b/docs/superpowers/plans/2026-05-15-cockpit-aimock-e2e.md @@ -0,0 +1,903 @@ +# Cockpit aimock E2E — Phase 1 Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax. + +**Goal:** Replace the existing cockpit e2e surface at `apps/cockpit/e2e/` with an aimock-driven harness, with one pilot spec exercising the `streaming` example through aimock end-to-end. Existing `cockpit` Nx project's `e2e` target is repointed at the new playwright config; no new Nx project is created. + +**Architecture:** Harness mirroring `examples/chat/aimock-e2e/`, living at `apps/cockpit/e2e/`. Playwright globalSetup boots aimock + `cockpit/langgraph/streaming/python` (multi-graph langgraph) + the `cockpit-langgraph-streaming-angular` dev server. Pilot spec captures a real LLM text response, asserts the finalized assistant bubble carries a distinctive phrase. + +**Tech Stack:** `@copilotkit/aimock`, Playwright, Nx, GitHub Actions. Python LangGraph dev server via `uv`. + +**Spec:** [docs/superpowers/specs/2026-05-15-cockpit-aimock-e2e-design.md](../specs/2026-05-15-cockpit-aimock-e2e-design.md) + +--- + +## Working environment + +- Worktree: `/tmp/cockpit-aimock-spec` (branch `claude/cockpit-aimock-e2e-design`). +- `node_modules` is symlinked from main checkout; `npx` and `nx` work directly. (If the worktree was recreated, run `ln -sf /Users/blove/repos/angular-agent-framework/node_modules /tmp/cockpit-aimock-spec/node_modules`.) +- Copy `.env` for capture: `cp /Users/blove/repos/angular-agent-framework/examples/chat/python/.env cockpit/langgraph/streaming/python/.env` (the cockpit langgraph project doesn't keep its own .env — `OPENAI_API_KEY` is reused). +- Generate the licensing public key if missing: `node libs/licensing/scripts/generate-public-key.mjs`. +- License header `// SPDX-License-Identifier: MIT` on line 1 of every new TS file. +- One commit per task. DO NOT push, amend, or `git add -A`. +- Two commits (spec + plan) already exist on the branch. + +## Coordination with open PR #339 + +PR #339 modifies `apps/cockpit/playwright.config.ts` (which this plan deletes outright). Merge order: +1. #339 lands first. +2. Pull main into this branch (`git fetch origin main && git merge origin/main`). +3. Task 7 of this plan deletes the file #339 modified — Git resolves cleanly (a delete-vs-edit conflict resolves to "deleted"). + +If #339 hasn't merged when this work starts, proceed anyway — the merge conflict is mechanical. + +--- + +## Task 0: De-risk cockpit-langgraph + aimock integration + +**Files:** None (investigation only). + +The chat harness verified `examples/chat/python` honors `OPENAI_BASE_URL`. The cockpit `streaming/python` agent has a different code path. Verify before any code lands. + +- [ ] **Step 1: Verify no hardcoded base_url in cockpit streaming agent code** + +Run: +```bash +grep -rn "base_url\|ChatOpenAI\|OpenAI(" cockpit/langgraph/streaming/python/src/ | head -30 +``` + +Expected: zero `base_url=` arguments. ChatOpenAI / OpenAI constructors should accept the env var by default. + +If any hardcoded `base_url=` is found that overrides `OPENAI_BASE_URL`: STOP, report. Spec may need a workaround. + +- [ ] **Step 2: Inspect the streaming graph setup** + +Read `cockpit/langgraph/streaming/python/src/graph.py` — find `build_streaming_graph()`. The compiled graph is registered as `"streaming"` in `langgraph.json`. The system prompt is loaded from `cockpit/langgraph/streaming/python/prompts/streaming.md`. Note the system prompt's first paragraph in your report. + +Expected: single-node graph that calls `ChatOpenAI(model="gpt-5-mini", streaming=True).ainvoke(messages)`. No tool bindings. + +- [ ] **Step 3: Smoke-test the aimock + streaming-python flow** + +Create scratch fixture at `/tmp/cockpit-tc-fixture.json` (text response — `streaming` doesn't bind tools, so the mock returns plain content): + +```json +{ + "fixtures": [ + { + "match": { "userMessage": "say hi briefly" }, + "response": { "content": "Hello from cockpit-streaming!" } + } + ] +} +``` + +In one terminal, start aimock + langgraph: +```bash +cd /tmp/cockpit-aimock-spec +npm install --no-save --no-package-lock @copilotkit/aimock openai + +# Inline node script that starts aimock and keeps it alive +node -e " +const { LLMock } = require('@copilotkit/aimock'); +const fs = require('fs'); +const mock = new LLMock({ port: 0, chunkSize: 4096 }); +const fx = JSON.parse(fs.readFileSync('/tmp/cockpit-tc-fixture.json', 'utf-8')); +mock.addFixturesFromJSON(fx.fixtures); +mock.start().then(() => console.log('AIMOCK_BASE_URL=' + mock.url + '/v1')); +" & +NODE_PID=$! +sleep 3 +# Capture the URL printed; pass it to langgraph below. +# NOTE: keep this node process alive; kill with `kill $NODE_PID` after step 4. +``` + +Verify aimock printed an `AIMOCK_BASE_URL=...` line. + +If the inline node script fails: STOP. Report whether `@copilotkit/aimock` is importable, what error occurred. + +- [ ] **Step 4: Hit langgraph via the proxy, confirm tool flow** + +In another terminal: +```bash +cd /tmp/cockpit-aimock-spec/cockpit/langgraph/streaming/python +cp /Users/blove/repos/angular-agent-framework/examples/chat/python/.env .env +uv sync +OPENAI_BASE_URL=/v1 OPENAI_API_KEY=test-not-used \ + uv run langgraph dev --port 8123 --no-browser & +LG_PID=$! +sleep 15 +curl -sf http://localhost:8123/ok +``` + +Expected: `{"ok":true}`. If langgraph fails to start (port conflict, missing deps): STOP. + +Then dispatch a single run against the streaming graph: +```bash +THREAD=$(curl -s -X POST http://localhost:8123/threads -H 'content-type: application/json' -d '{}' | python3 -c 'import sys,json; print(json.load(sys.stdin)["thread_id"])') +echo "thread: $THREAD" +curl -s -X POST http://localhost:8123/threads/$THREAD/runs -H 'content-type: application/json' -d "{\"assistant_id\":\"streaming\",\"input\":{\"messages\":[{\"role\":\"user\",\"content\":\"say hi briefly\"}]}}" > /tmp/run.json +sleep 5 +curl -s http://localhost:8123/threads/$THREAD/state | python3 -c 'import sys,json; s=json.load(sys.stdin); print("message_count:", len(s["values"].get("messages",[]))); print("last_message_content:", str(s["values"]["messages"][-1].get("content",""))[:200])' +``` + +Expected: at least 2 messages (user + AI), and the AI message `content` contains the mock's response text (`"Hello from cockpit-streaming!"`). Confirms aimock served the streaming graph's LLM call. Report `last_message_content` verbatim so Task 5's assertion can use the actual phrase. + +- [ ] **Step 5: Tear down** + +```bash +kill $NODE_PID $LG_PID 2>/dev/null || true +rm -f /tmp/cockpit-tc-fixture.json /tmp/run.json +rm -f cockpit/langgraph/streaming/python/.env +# remove the test install +rm -rf node_modules/@copilotkit/aimock node_modules/openai 2>/dev/null || true +``` + +Confirm: `git status` clean (the worktree node_modules is a symlink to the main checkout — the rm above only removes from the symlinked target's `node_modules`, which is fine because Task 1 reinstalls properly). + +- [ ] **Step 6: Report** + +DE-RISK COMPLETE or DE-RISK FAILED. Include: +- Hardcoded `base_url=` findings (should be none). +- `streaming` graph confirmation: built from `build_streaming_graph()` in `graph.py`, no tool bindings. +- The system prompt content from `prompts/streaming.md` (just the first paragraph or so — informs the capture script). +- Whether the curl-driven run produced an AI message with the mock's exact content text. +- Any deviations from the spec's assumed shape. + +If de-risk passes, proceed to Task 1. If it fails, STOP and escalate. + +--- + +## Task 1: Add per-directory configs at `apps/cockpit/e2e/` + +**Files:** +- Create: `apps/cockpit/e2e/tsconfig.json` +- Create: `apps/cockpit/e2e/.gitignore` +- Create: `apps/cockpit/e2e/README.md` + +No new Nx `project.json` — the existing `cockpit` project's `e2e` target is reused (its `config` path is updated in Task 6 once the new harness's playwright config exists). + +- [ ] **Step 1: Create tsconfig.json** + +Write `apps/cockpit/e2e/tsconfig.json`: + +```json +{ + "compilerOptions": { + "target": "ES2022", + "module": "ES2022", + "moduleResolution": "Bundler", + "esModuleInterop": true, + "strict": true, + "skipLibCheck": true, + "allowImportingTsExtensions": false, + "noEmit": true, + "types": ["node"] + }, + "include": ["**/*.ts"], + "exclude": ["node_modules", "test-results", "playwright-report"] +} +``` + +- [ ] **Step 2: Create .gitignore** + +Write `apps/cockpit/e2e/.gitignore`: + +``` +test-results/ +playwright-report/ +*.tmp +``` + +- [ ] **Step 3: Create README.md** + +Write `apps/cockpit/e2e/README.md`: + +```markdown +# cockpit e2e + +Cross-stack E2E harness for cockpit example apps. Uses [`@copilotkit/aimock`](https://github.com/CopilotKit/aimock) as a deterministic mock for LLM API calls; the per-product Python LangGraph dev server is launched with `OPENAI_BASE_URL` pointed at it; Playwright drives the example Angular app in real Chromium. + +Phase 1 covers `streaming` only. Future phases each add one example (one fixture + one spec file per PR). + +## Run the suite + +``` +npx nx e2e cockpit +``` + +Replay-only. No `OPENAI_API_KEY` needed. Reads committed fixtures from `fixtures/`. + +## Refresh a fixture + +Each captured fixture has a recipe script under `scripts/`. Example for the streaming fixture: + +``` +OPENAI_API_KEY=sk-... uv run --project cockpit/langgraph/streaming/python \ + python apps/cockpit/e2e/scripts/record-streaming.py +``` + +Commit the updated `fixtures/streaming.json`. Scripts are dev-only; CI never runs them. + +## Layout + +- `aimock-runner.ts` — programmatic boot of the mock server (mirrors `examples/chat/aimock-e2e/aimock-runner.ts`). +- `test-helpers.ts` — `sendPromptAndWait` helper that waits on `chat-message[data-streaming="false"]`. +- `fixtures/` — committed JSON fixtures keyed by example. +- `scripts/` — fixture-capture recipes (one per fixture). +- `playwright.config.ts` — Playwright config with globalSetup that boots aimock + LangGraph + Angular dev server. +- `streaming.spec.ts` — Phase 1 pilot. +``` + +- [ ] **Step 4: Commit Task 1** + +```bash +cd /tmp/cockpit-aimock-spec +git add apps/cockpit/e2e/tsconfig.json \ + apps/cockpit/e2e/.gitignore \ + apps/cockpit/e2e/README.md +git commit -m "feat(cockpit): scaffold e2e dir tsconfig + .gitignore + README" +``` + +NOTE: this task may fail to apply cleanly if any of the four legacy specs in `apps/cockpit/e2e/` still exist (the `.gitignore` is fine; the tsconfig and README will live alongside them temporarily). That's expected — Task 6 deletes the legacy files. Until then, the new harness modules and the legacy specs coexist in the same directory. + +--- + +## Task 2: Copy harness modules from the chat harness + +**Files:** +- Create: `apps/cockpit/e2e/aimock-runner.ts` +- Create: `apps/cockpit/e2e/aimock-runner.spec.ts` +- Create: `apps/cockpit/e2e/test-helpers.ts` + +These are byte-for-byte copies of the chat harness modules (acknowledged duplication per the spec). The runner is already battle-tested through Phase 2a–2e + the regenerate scenario. + +- [ ] **Step 1: Copy aimock-runner.ts** + +```bash +cd /tmp/cockpit-aimock-spec +cp examples/chat/aimock-e2e/aimock-runner.ts apps/cockpit/e2e/aimock-runner.ts +``` + +- [ ] **Step 2: Copy aimock-runner.spec.ts** + +```bash +cp examples/chat/aimock-e2e/aimock-runner.spec.ts apps/cockpit/e2e/aimock-runner.spec.ts +``` + +- [ ] **Step 3: Copy test-helpers.ts** + +```bash +cp examples/chat/aimock-e2e/test-helpers.ts apps/cockpit/e2e/test-helpers.ts +``` + +- [ ] **Step 4: Run the runner unit tests** + +```bash +cd /tmp/cockpit-aimock-spec/apps/cockpit/e2e +npx vitest run aimock-runner.spec.ts +``` + +Expected: 3 passed (boots a replay server, stop is idempotent, loads directory of fixtures). + +If `@copilotkit/aimock` import fails: `cd /tmp/cockpit-aimock-spec && npm install` should fix it (the package is already in the root `package.json` from Phase 2a). + +If any test fails, STOP and report — the modules should be byte-identical to the chat harness which passes today. + +- [ ] **Step 5: Commit Task 2** + +```bash +cd /tmp/cockpit-aimock-spec +git add apps/cockpit/e2e/aimock-runner.ts \ + apps/cockpit/e2e/aimock-runner.spec.ts \ + apps/cockpit/e2e/test-helpers.ts +git commit -m "feat(cockpit): copy aimock-runner and test-helpers from chat harness" +``` + +--- + +## Task 3: Capture the streaming fixture + +**Files:** +- Create: `apps/cockpit/e2e/scripts/record-streaming.py` +- Create: `apps/cockpit/e2e/fixtures/streaming.json` (generated by script) + +- [ ] **Step 1: Write the capture script** + +Write `apps/cockpit/e2e/scripts/record-streaming.py`. The script mirrors `cockpit/langgraph/streaming/python/src/graph.py`'s `build_streaming_graph()` LLM setup (same model, same system prompt source `prompts/streaming.md`). Captures a text response — `streaming` doesn't bind tools. + +```python +"""Capture a real text response from the streaming graph's LLM. + +Mirrors cockpit/langgraph/streaming/python/src/graph.py's +build_streaming_graph() setup: ChatOpenAI(gpt-5-mini, streaming=True) ++ system prompt from prompts/streaming.md. + +Run from repo root: + OPENAI_API_KEY=sk-... uv run --project cockpit/langgraph/streaming/python \\ + python apps/cockpit/e2e/scripts/record-streaming.py +""" +import json +import os +import sys +from pathlib import Path + +env_path = Path("cockpit/langgraph/streaming/python/.env") +if env_path.exists(): + for line in env_path.read_text().splitlines(): + line = line.strip() + if line and not line.startswith("#") and "=" in line: + k, _, v = line.partition("=") + os.environ.setdefault(k.strip(), v.strip().strip('"').strip("'")) + +if not os.environ.get("OPENAI_API_KEY"): + print("OPENAI_API_KEY not set (in env or .env)", file=sys.stderr) + sys.exit(1) + +from langchain_core.messages import HumanMessage, SystemMessage +from langchain_openai import ChatOpenAI + +PROMPT = "Tell me one quick fact about Angular signals in two sentences." +SYSTEM_PROMPT = ( + Path("cockpit/langgraph/streaming/python/prompts/streaming.md").read_text() +) + +llm = ChatOpenAI(model="gpt-5-mini", temperature=0) +response = llm.invoke( + [SystemMessage(content=SYSTEM_PROMPT), HumanMessage(content=PROMPT)], +) +text = response.content if isinstance(response.content, str) else "" +if not text.strip(): + print("LLM returned empty content; cannot build fixture", file=sys.stderr) + sys.exit(2) +print(f"captured {len(text)} chars; first 80: {text[:80]!r}") + +fixture = { + "fixtures": [ + { + "match": {"userMessage": PROMPT}, + "response": {"content": text}, + } + ] +} + +out_path = Path("apps/cockpit/e2e/fixtures/streaming.json") +out_path.parent.mkdir(parents=True, exist_ok=True) +out_path.write_text(json.dumps(fixture, indent=2) + "\n") +print(f"\nWrote fixture to {out_path}") +``` + +The PROMPT is intentionally specific ("Angular signals in two sentences") so the captured response contains a distinctive phrase Task 5's assertion can match. + +- [ ] **Step 2: Run the script** + +```bash +cd /tmp/cockpit-aimock-spec +# .env should be present from Task 0; recreate if removed: +cp /Users/blove/repos/angular-agent-framework/examples/chat/python/.env cockpit/langgraph/streaming/python/.env +uv run --project cockpit/langgraph/streaming/python python apps/cockpit/e2e/scripts/record-streaming.py +``` + +Expected: prints `captured chars; first 80: ''` and writes `apps/cockpit/e2e/fixtures/streaming.json`. + +If `text.strip()` is empty: STOP. The LLM didn't respond — check `OPENAI_API_KEY` validity, check the messages.md file isn't empty. + +- [ ] **Step 3: Inspect the captured fixture** + +```bash +cd /tmp/cockpit-aimock-spec +head -10 apps/cockpit/e2e/fixtures/streaming.json +``` + +Verify the file starts with `{"fixtures": [` and contains a `response.content` string mentioning "signal" (or close variant). Note a distinctive 1-2 word phrase from the response — Task 5's spec uses it as the assertion target. Common phrases: "signal", "Angular", "reactive". + +- [ ] **Step 4: Commit Task 3** + +```bash +cd /tmp/cockpit-aimock-spec +git add apps/cockpit/e2e/scripts/record-streaming.py \ + apps/cockpit/e2e/fixtures/streaming.json +git commit -m "feat(cockpit): add streaming fixture and capture script" +``` + +DO NOT commit the `.env` file at `cockpit/langgraph/streaming/python/.env` — it's gitignored, but verify with `git status`. + +--- + +## Task 4: Playwright config + globalSetup + globalTeardown + +**Files:** +- Create: `apps/cockpit/e2e/playwright.config.ts` +- Create: `apps/cockpit/e2e/global-setup.ts` +- Create: `apps/cockpit/e2e/global-teardown.ts` + +- [ ] **Step 1: Write playwright.config.ts** + +Write `apps/cockpit/e2e/playwright.config.ts`: + +```typescript +// SPDX-License-Identifier: MIT +import { defineConfig, devices } from '@playwright/test'; + +export default defineConfig({ + testDir: '.', + testMatch: '**/*.spec.ts', + testIgnore: ['aimock-runner.spec.ts'], + fullyParallel: false, + workers: 1, + retries: process.env.CI ? 2 : 0, + reporter: process.env.CI ? [['list'], ['html', { open: 'never' }]] : 'list', + use: { + baseURL: 'http://localhost:4300', + trace: 'retain-on-failure', + }, + projects: [ + { + name: 'chromium', + use: { ...devices['Desktop Chrome'] }, + }, + ], + globalSetup: './global-setup.ts', + globalTeardown: './global-teardown.ts', +}); +``` + +- [ ] **Step 2: Write global-setup.ts** + +Write `apps/cockpit/e2e/global-setup.ts`: + +```typescript +// SPDX-License-Identifier: MIT +import { spawn, type ChildProcess } from 'node:child_process'; +import { setTimeout as delay } from 'node:timers/promises'; +import { resolve } from 'node:path'; +import { startAimock, type AimockHandle } from './aimock-runner'; + +interface SharedState { + aimock: AimockHandle; + langgraph: ChildProcess; + angular: ChildProcess; +} + +declare global { + // eslint-disable-next-line no-var + var __COCKPIT_AIMOCK_E2E_STATE__: SharedState | undefined; +} + +const REPO_ROOT = resolve(__dirname, '../../..'); +const FIXTURE_PATH = process.env.AIMOCK_FIXTURE + ? resolve(__dirname, process.env.AIMOCK_FIXTURE) + : resolve(__dirname, 'fixtures'); + +async function waitForPort(url: string, timeoutMs: number): Promise { + const start = Date.now(); + while (Date.now() - start < timeoutMs) { + try { + const res = await fetch(url); + if (res.ok || res.status === 404) return; + } catch { + // server not up yet + } + await delay(500); + } + throw new Error(`Server at ${url} did not become ready within ${timeoutMs}ms`); +} + +export default async function globalSetup(): Promise { + const aimock = await startAimock({ mode: 'replay', fixturePath: FIXTURE_PATH }); + // eslint-disable-next-line no-console + console.log(`[cockpit] aimock listening at ${aimock.baseUrl}`); + + const langgraph = spawn( + 'uv', + ['run', 'langgraph', 'dev', '--port', '8123', '--no-browser'], + { + cwd: resolve(REPO_ROOT, 'cockpit/langgraph/streaming/python'), + env: { + ...process.env, + OPENAI_BASE_URL: aimock.baseUrl, + OPENAI_API_KEY: 'test-not-used', + }, + stdio: 'pipe', + }, + ); + langgraph.stdout?.on('data', (b) => process.stdout.write(`[langgraph] ${b}`)); + langgraph.stderr?.on('data', (b) => process.stderr.write(`[langgraph] ${b}`)); + + await waitForPort('http://localhost:8123/ok', 90_000); + // eslint-disable-next-line no-console + console.log('[cockpit] langgraph ready on :8123'); + + const angular = spawn( + 'npx', + ['nx', 'serve', 'cockpit-langgraph-streaming-angular', '--port', '4300'], + { + cwd: REPO_ROOT, + env: { ...process.env }, + stdio: 'pipe', + }, + ); + angular.stdout?.on('data', (b) => process.stdout.write(`[angular] ${b}`)); + angular.stderr?.on('data', (b) => process.stderr.write(`[angular] ${b}`)); + + await waitForPort('http://localhost:4300/', 120_000); + // eslint-disable-next-line no-console + console.log('[cockpit] angular ready on :4300'); + + globalThis.__COCKPIT_AIMOCK_E2E_STATE__ = { aimock, langgraph, angular }; +} +``` + +- [ ] **Step 3: Write global-teardown.ts** + +Write `apps/cockpit/e2e/global-teardown.ts`: + +```typescript +// SPDX-License-Identifier: MIT +export default async function globalTeardown(): Promise { + const state = globalThis.__COCKPIT_AIMOCK_E2E_STATE__; + if (!state) return; + state.angular.kill('SIGTERM'); + state.langgraph.kill('SIGTERM'); + await state.aimock.stop(); + globalThis.__COCKPIT_AIMOCK_E2E_STATE__ = undefined; +} +``` + +- [ ] **Step 4: Type-check the config** + +```bash +cd /tmp/cockpit-aimock-spec/apps/cockpit/e2e +npx tsc --noEmit +``` + +Expected: no errors. + +- [ ] **Step 5: Commit Task 4** + +```bash +cd /tmp/cockpit-aimock-spec +git add apps/cockpit/e2e/playwright.config.ts \ + apps/cockpit/e2e/global-setup.ts \ + apps/cockpit/e2e/global-teardown.ts +git commit -m "feat(cockpit): add Playwright config with cockpit-streaming globalSetup" +``` + +--- + +## Task 5: Write the streaming pilot spec + +**Files:** +- Create: `apps/cockpit/e2e/streaming.spec.ts` + +- [ ] **Step 1: Identify a phrase to assert on** + +Open `apps/cockpit/e2e/fixtures/streaming.json` and look at the `response.content` string. Pick a distinctive 1-2 word phrase that's likely to appear verbatim — the captured response was about Angular signals, so `signal`, `Angular`, or `reactive` are good candidates. Note it; Step 2 uses it. + +- [ ] **Step 2: Write the spec** + +Write `apps/cockpit/e2e/streaming.spec.ts` (replace `` with the phrase from Step 1): + +```typescript +// SPDX-License-Identifier: MIT +import { test, expect } from '@playwright/test'; +import { sendPromptAndWait } from './test-helpers'; + +test('streaming: assistant text from the mocked LLM renders in the cockpit chat composition', async ({ page }) => { + const bubble = await sendPromptAndWait( + page, + 'Tell me one quick fact about Angular signals in two sentences.', + ); + + // The captured fixture's content (Angular signals fact) must reach the + // rendered bubble. Proves: aimock served the streaming graph's LLM call, + // langgraph routed back the AI message, the cockpit-langgraph-streaming-angular + // app rendered it via the chat composition, and the streaming-finalized + // signal (data-streaming="false") settled. + const finalText = await bubble.innerText(); + expect(finalText.toLowerCase()).toContain(''.toLowerCase()); +}); +``` + +- [ ] **Step 3: Run the spec** + +```bash +cd /tmp/cockpit-aimock-spec +npx playwright install --with-deps chromium +cd apps/cockpit/e2e +rm -rf test-results playwright-report +npx playwright test streaming.spec.ts +``` + +Expected: 1 test passes within ~60–120s wall-clock (includes Angular dev-server cold-start). + +If the spec fails: capture Playwright trace from `test-results/`, STOP, report. Likely causes: +- The `cockpit-langgraph-streaming-angular` app's `streamingAssistantId` doesn't match the `streaming` graph_id — verify with `grep -n "AssistantId\|streaming" cockpit/langgraph/streaming/angular/src/environments/environment.ts`. +- The Angular proxy.conf.json points elsewhere than 8123 — check `cockpit/langgraph/streaming/angular/proxy.conf.json`. +- The fixture's prompt doesn't exact-match the spec's `sendPromptAndWait` argument — both must be byte-identical. + +- [ ] **Step 4: Run the suite three times for stability** + +```bash +cd /tmp/cockpit-aimock-spec/apps/cockpit/e2e +for i in 1 2 3; do + echo "=== Run $i ===" + rm -rf test-results playwright-report ../../../../test-results + sleep 8 + npx playwright test +done +``` + +Expected: 3 consecutive clean runs (1 passed each). If any run fails, STOP and investigate — flakes here would compound across the future per-example specs. + +- [ ] **Step 5: Commit Task 5** + +```bash +cd /tmp/cockpit-aimock-spec +git add apps/cockpit/e2e/streaming.spec.ts +git commit -m "test(cockpit): add streaming aimock pilot spec" +``` + +--- + +## Task 6: Delete legacy specs and old playwright config, repoint cockpit e2e target + +**Files:** +- Delete: `apps/cockpit/e2e/cockpit.spec.ts` +- Delete: `apps/cockpit/e2e/dark-mode.spec.ts` +- Delete: `apps/cockpit/e2e/all-examples-smoke.spec.ts` +- Delete: `apps/cockpit/e2e/production-smoke.spec.ts` +- Delete: `apps/cockpit/playwright.config.ts` +- Modify: `apps/cockpit/project.json` (point `e2e` target's `config` at the new playwright config) + +- [ ] **Step 1: Delete the legacy specs and old top-level playwright config** + +```bash +cd /tmp/cockpit-aimock-spec +git rm apps/cockpit/e2e/cockpit.spec.ts \ + apps/cockpit/e2e/dark-mode.spec.ts \ + apps/cockpit/e2e/all-examples-smoke.spec.ts \ + apps/cockpit/e2e/production-smoke.spec.ts \ + apps/cockpit/playwright.config.ts +``` + +If `apps/cockpit/e2e/` contains other unexpected files (helpers, fixtures from older work), list them with `ls apps/cockpit/e2e/` and report — Task 1–5 only added expected files. + +- [ ] **Step 2: Repoint the cockpit project's e2e target** + +Open `apps/cockpit/project.json` and find the `"e2e"` target block. The `config` option currently reads `"apps/cockpit/playwright.config.ts"`: + +```json + "e2e": { + "executor": "@nx/playwright:playwright", + "options": { + "config": "apps/cockpit/playwright.config.ts" + } + }, +``` + +Update the `config` value to point at the new harness's config: + +```json + "e2e": { + "executor": "@nx/playwright:playwright", + "options": { + "config": "apps/cockpit/e2e/playwright.config.ts" + } + }, +``` + +Verify the file is still valid JSON: +```bash +cd /tmp/cockpit-aimock-spec +python3 -c "import json; json.load(open('apps/cockpit/project.json'))" && echo "OK" +``` + +Expected: `OK`. + +- [ ] **Step 3: Verify nothing else references the old config path** + +```bash +cd /tmp/cockpit-aimock-spec +grep -rn "apps/cockpit/playwright.config" \ + --include='*.ts' --include='*.json' --include='*.yml' --include='*.md' \ + | grep -v 'node_modules\|test-results\|playwright-report\|docs/superpowers/' +``` + +Expected: zero matches. If any reference to the old top-level `apps/cockpit/playwright.config.ts` remains, STOP and report. + +- [ ] **Step 4: Commit Task 6** + +```bash +cd /tmp/cockpit-aimock-spec +git add apps/cockpit/project.json +git commit -m "chore(cockpit): drop legacy e2e specs; repoint e2e target to new harness config" +``` + +The `git rm` from Step 1 staged the deletions; the `git add` here stages the project.json modification. + +--- + +## Task 7: Update existing CI cockpit-e2e job + +**Files:** +- Modify: `.github/workflows/ci.yml` + +The existing `cockpit-e2e` job already invokes `npx nx e2e cockpit`, which after Task 6 drives the new harness. It just needs additional setup steps (uv install, python sync, trace upload). + +- [ ] **Step 1: Locate and update the cockpit-e2e job** + +Open `.github/workflows/ci.yml` and find the `cockpit-e2e` job. Current shape: + +```yaml + cockpit-e2e: + name: Cockpit — e2e + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6.0.2 + - uses: actions/setup-node@v6.3.0 + with: + node-version: 22 + cache: npm + - run: npm ci + - run: npx playwright install --with-deps chromium + - run: npx nx e2e cockpit --skip-nx-cache +``` + +Replace with: + +```yaml + cockpit-e2e: + name: Cockpit — e2e + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6.0.2 + - uses: actions/setup-node@v6.3.0 + with: + node-version: 22 + cache: npm + - name: Install uv + uses: astral-sh/setup-uv@v8.0.0 + with: + python-version: '3.12' + - run: npm ci + - working-directory: cockpit/langgraph/streaming/python + run: uv sync + - run: npx playwright install --with-deps chromium + - run: npx nx e2e cockpit --skip-nx-cache + - name: Upload Playwright trace on failure + if: failure() + uses: actions/upload-artifact@v4 + with: + name: cockpit-e2e-trace + path: apps/cockpit/e2e/test-results/ + retention-days: 7 +``` + +- [ ] **Step 2: Confirm deploy.needs is unchanged** + +```bash +grep -A20 "deploy:" /tmp/cockpit-aimock-spec/.github/workflows/ci.yml | grep cockpit +``` + +Expected: includes `cockpit-e2e` (job name unchanged). No edit needed. + +- [ ] **Step 3: Verify the workflow YAML parses** + +```bash +cd /tmp/cockpit-aimock-spec +npx -y js-yaml .github/workflows/ci.yml > /dev/null && echo "OK" +``` + +Expected: `OK`. + +- [ ] **Step 4: Commit Task 7** + +```bash +cd /tmp/cockpit-aimock-spec +git add .github/workflows/ci.yml +git commit -m "ci(cockpit): wire cockpit-e2e job for aimock harness (uv + python + trace)" +``` + +--- + +## Task 8: Verify, push, open PR + +- [ ] **Step 1: Final local verification** + +Run the new project end-to-end one more time: + +```bash +cd /tmp/cockpit-aimock-spec +npx nx e2e cockpit +``` + +Expected: 1 test passes. + +Then run the existing chat aimock harness to confirm nothing collateral broke: + +```bash +cd examples/chat/aimock-e2e && npx playwright test && cd - +``` + +Expected: 9 tests pass (smoke + 3 markdown + a2ui-single-bubble + research-subagent + interrupt-approval + regenerate). + +- [ ] **Step 2: Confirm working tree is clean** + +```bash +cd /tmp/cockpit-aimock-spec +git status --short +``` + +Expected: empty (only `node_modules` symlink as untracked). + +Remove any stray `.env` or `test-results/` directories from the worktree. + +- [ ] **Step 3: Push branch** + +```bash +cd /tmp/cockpit-aimock-spec +git push -u origin claude/cockpit-aimock-e2e-design +``` + +- [ ] **Step 4: Open PR** + +```bash +gh pr create --title "feat(cockpit): aimock E2E harness — Phase 1 (streaming pilot)" --body "$(cat <<'EOF' +## Summary + +Replaces the legacy cockpit e2e surface with an aimock-driven harness living at the existing `apps/cockpit/e2e/` location. Phase 1 lands the harness modules + one pilot spec for the `streaming` example end-to-end. No new Nx project — the existing `cockpit` project's `e2e` target is repointed at the new playwright config. + +Sits on the chat aimock harness pattern ([#309](https://github.com/cacheplane/angular-agent-framework/pull/309) and onward). Cockpit-shell coverage is dropped — to be rebuilt separately if/when needed. + +### What changed +- Added harness modules at `apps/cockpit/e2e/` (aimock-runner, test-helpers, globalSetup/teardown, playwright config), copied byte-for-byte from `examples/chat/aimock-e2e/` where applicable. +- Captured streaming fixture + reusable capture script under `apps/cockpit/e2e/scripts/`. +- Playwright globalSetup boots aimock + `cockpit/langgraph/streaming/python` (multi-graph langgraph serving 12 graphs including `streaming`) + `cockpit-langgraph-streaming-angular` dev server on :4300. +- Deleted: 4 legacy specs in `apps/cockpit/e2e/` and the old `apps/cockpit/playwright.config.ts`. +- `apps/cockpit/project.json`'s `e2e` target's `config` path updated to the new harness's playwright config. +- CI: existing `Cockpit — e2e` job augmented with uv install + python sync + trace upload. Job name + position in `deploy.needs` unchanged. + +### Test plan +- [x] Local: pilot spec passes 3/3 consecutive runs +- [x] Chat aimock harness still green (no shared-state regressions) +- [x] No production code touched (only harness, fixtures, CI workflow, deletions, one project.json config-path edit) +- [ ] CI green on this PR + +### Notes for reviewers +- Module duplication (`aimock-runner.ts`, `test-helpers.ts`) is intentional per the design — promoting to a shared library is deferred until a third harness wants the same code. +- Pilot assertion uses strictness "B" from brainstorming: surface attached + `data-streaming="false"` wait + content-phrase match. No per-component structural assertions. +- Future per-example PRs each add one fixture JSON + one spec file. If they hit a graph not registered in `streaming/python/langgraph.json`, globalSetup grows to spawn an additional langgraph process on a different port. + +Spec: `docs/superpowers/specs/2026-05-15-cockpit-aimock-e2e-design.md` +Plan: `docs/superpowers/plans/2026-05-15-cockpit-aimock-e2e.md` +EOF +)" +``` + +- [ ] **Step 5: Watch CI** + +```bash +gh pr checks --watch --interval 30 +``` + +Report when CI completes. + +--- + +## Self-review checklist + +- [x] Spec coverage: + - Goal → Tasks 1–5 + - File layout (8 files) → Tasks 1, 2, 4, 5 + - Components (runner, helpers, fixture, globalSetup, spec) → Tasks 2, 3, 4, 5 + - CI integration → Task 7 + - "Replace existing cockpit e2e" → Task 6 + - Risks/unknowns → Task 0 de-risk + - Phase 1 acceptance criteria → Task 8 verification +- [x] Placeholder scan: no TBD/TODO. Two adapt-if-Task-0-revealed notes are intentional guidance for the implementer to incorporate de-risk findings. +- [x] Type consistency: `AimockHandle`, `AimockStartOptions`, `startAimock`, `sendPromptAndWait` names match across tasks and align with the chat harness (since the modules are copied). +- [x] Constraints: `@copilotkit/aimock` referenced only in plan/spec/README/imports; commit messages and PR body avoid the library name. + +## Execution handoff + +Plan complete. Recommended: **subagent-driven-development**, with Task 0 dispatched first as a blocking gate (proven valuable in Phase 2a and 2c). If Task 0 reports unexpected agent-code shape, the spec needs updating before Tasks 1+ proceed. diff --git a/docs/superpowers/specs/2026-05-15-cockpit-aimock-e2e-design.md b/docs/superpowers/specs/2026-05-15-cockpit-aimock-e2e-design.md new file mode 100644 index 000000000..ae1ad382d --- /dev/null +++ b/docs/superpowers/specs/2026-05-15-cockpit-aimock-e2e-design.md @@ -0,0 +1,237 @@ +# Cockpit aimock E2E — design + +> **Place in the larger plan.** Cockpit examples (15+ Angular apps demonstrating different agent capabilities) have no automated agent-flow coverage today: the existing `apps/cockpit/e2e/all-examples-smoke.spec.ts` either checks UI shell only or skips when `OPENAI_API_KEY` is absent (so the send-message tests are dead in CI). This design proposes a new Nx project that replaces the existing cockpit e2e surface entirely with aimock-driven per-example coverage. + +## Goal + +Build cross-stack E2E test coverage for cockpit example apps, modeled after the chat aimock harness ([`examples/chat/aimock-e2e/`](../../../examples/chat/aimock-e2e/)). The new harness lives at the existing `apps/cockpit/e2e/` location — it IS the cockpit e2e from here on. The cockpit Nx project's existing `e2e` target is rewired to drive the new harness. Phase 1 lands the harness scaffolding + one pilot example (`streaming`). Phases 2+ add one example per PR. + +## Library + +Same as the chat harness: [`@copilotkit/aimock`](https://github.com/CopilotKit/aimock). The runner uses the `addFixturesFromJSON` API (proven in Phase 2d) so fixture entries can carry richer match shapes (`toolName`, `hasToolResult`, `turnIndex`). + +## Non-goals + +- Cockpit web-app shell coverage (the `cockpit.spec.ts` and `dark-mode.spec.ts` flows in the existing `apps/cockpit/e2e/` project). The existing project is deleted. A separate effort can rebuild cockpit-shell coverage if needed. +- Replacing the chat aimock harness at `examples/chat/aimock-e2e/`. Fully independent. +- Multi-product coverage in Phase 1 (e.g., deep-agents, ag-ui, render). The pilot is langgraph-product-only; other products can be added later. +- Production code changes outside of CI workflow files and the new Nx project (no proxy.conf changes, no app code changes). + +## What "replace the existing cockpit e2e" means concretely + +Phase 1 deletes: +- `apps/cockpit/e2e/cockpit.spec.ts` +- `apps/cockpit/e2e/dark-mode.spec.ts` +- `apps/cockpit/e2e/all-examples-smoke.spec.ts` +- `apps/cockpit/e2e/production-smoke.spec.ts` (orphaned — was opt-in for production checks) +- `apps/cockpit/playwright.config.ts` (moves into `apps/cockpit/e2e/playwright.config.ts`) + +Phase 1 modifies: +- `apps/cockpit/project.json`'s `e2e` target — points at the new `apps/cockpit/e2e/playwright.config.ts`, no other changes. + +Phase 1 adds (all under `apps/cockpit/e2e/`): +- aimock harness modules (runner, helpers, globalSetup/teardown, playwright config) +- One pilot fixture + spec for streaming +- One capture script + +The `Cockpit — e2e` CI job in `.github/workflows/ci.yml` is renamed/rewired (the existing `npx nx e2e cockpit` invocation now drives the new harness because the target's config path changed). + +The shell tests catch real regressions on Next.js routing + hydration, but cockpit-shell coverage isn't this phase's value. Per the user direction: "we can always have a cockpit web app e2e test in the future." + +## Architecture + +``` +[Playwright test on CI/local] + ↓ drives real Chromium +[Angular dev server :4300 (cockpit-langgraph-streaming-angular)] + ↓ /api proxy → :8123 +[LangGraph dev server :8123 (cockpit/langgraph/streaming/python)] + ↓ OPENAI_BASE_URL=http://localhost:AIMOCK_PORT/v1 +[aimock node process] + ↑ reads fixtures/*.json +``` + +The langgraph deployment at `cockpit/langgraph/streaming/python/langgraph.json` already registers 12 graphs (`streaming`, `c-messages`, `c-input`, `c-debug`, `c-interrupts`, `c-theming`, `c-threads`, `c-timeline`, `c-tool-calls`, `c-subagents`, `c-a2ui`, `c-generative-ui`) from one process. The pilot uses graph `streaming`. Future per-example specs that hit graphs in `streaming/python` reuse the same langgraph process; specs that need a graph from a different python project (e.g., `memory`, `interrupts`) get a second langgraph spawned by globalSetup on a different port + a per-example proxy override. + +Phase 1 only covers `streaming`, so only `streaming/python` is launched. + +## Why `streaming` is the pilot (not `c-a2ui`, `c-messages`, or `c-tool-calls`) + +The Phase 1 pilot targets the foundational invariant — "an LLM-driven response routes through the cockpit chat composition into rendered DOM." `streaming` is the cleanest fit because: + +- It calls a real LLM (`ChatOpenAI.ainvoke()` from `build_streaming_graph()` in `cockpit/langgraph/streaming/python/src/graph.py`, with a system prompt from `prompts/streaming.md`), so aimock is meaningfully exercised. +- The angular app `cockpit-langgraph-streaming-angular` uses the **full `` composition** from `@ngaf/chat`, which renders `` elements with the `data-streaming` attribute the harness's `sendPromptAndWait` helper waits on. +- No tool bindings, no interrupts, no subagents — the assertion is just "assistant text rendered." Matches Phase 2a/2b's `hi.json` smoke pattern. + +Other candidates were rejected during brainstorming + implementation: + +- **`c-a2ui`** — graph is fully hardcoded (returns a `CONTACT_FORM_JSONL` constant). No LLM call. Aimock would be a tree falling in the forest. +- **`c-messages`** — graph IS LLM-driven (via `_build_prompt_graph("messages.md")`), but the angular app `cockpit-chat-messages-angular` uses `` without providing message-template slots, so it never renders any messages. The pilot's DOM assertions can't ever match. +- **`c-tool-calls`** — same shape as `c-messages` (LLM-driven graph, but the angular app uses primitives without templates). The system prompt also SAYS the LLM has tools, but the graph doesn't actually `bind_tools()`. +- **`c-generative-ui`** — uses `dashboard_graph.py` which IS a real multi-node LLM-driven flow with tool binding. Too complex for the pilot; Phase 2+ candidate after the c-* refactor sub-phase lands. + +> **Sub-phase memo (out of scope here):** Most `c-*` graphs need to be refactored to actually exercise their named capability (real tools bound for `c-tool-calls`, real `interrupt({...})` for `c-interrupts`, real subagent dispatch for `c-subagents`, real `render_a2ui_surface` for `c-a2ui`). That refactor unblocks per-capability aimock test coverage in subsequent phases. Captured in the user's project memory under `project_cockpit_chat_examples_llm_driven_followup.md`. + +## File layout + +``` +apps/cockpit/e2e/ +├── aimock-runner.ts # Copy of examples/chat/aimock-e2e/aimock-runner.ts. +├── fixtures/ +│ └── streaming.json # Captured assistant text response for the streaming example. +├── global-setup.ts # Boots aimock + streaming/python langgraph + streaming Angular dev server. +├── global-teardown.ts # Reverse order shutdown. +├── playwright.config.ts # Cockpit aimock e2e Playwright config. +├── README.md +├── scripts/ +│ └── record-streaming.py # Fixture-capture recipe (dev-only). +├── test-helpers.ts # sendPromptAndWait helper (waiting on data-streaming="false"). +├── tsconfig.json +└── streaming.spec.ts # Phase 1 pilot. +``` + +No new Nx project. The existing `cockpit` Nx project's `e2e` target is reused — only its `config` path changes from `apps/cockpit/playwright.config.ts` to `apps/cockpit/e2e/playwright.config.ts`. Build/serve/test targets are untouched. + +Module duplication from `examples/chat/aimock-e2e/`: `aimock-runner.ts` (~85 lines) and `test-helpers.ts` (~30 lines) are byte-for-byte copies. Acceptable cost for keeping the two harnesses fully independent (per user direction). Promotion to a shared library lands as a separate spec when a third harness wants the same code. + +## Components + +### `aimock-runner.ts` + +Identical to `examples/chat/aimock-e2e/aimock-runner.ts` as of [PR #330](https://github.com/cacheplane/angular-agent-framework/pull/330). Uses `LLMock({ port: 0, chunkSize: 4096 })` and `mock.addFixturesFromJSON(entries)` so fixture entries can carry the full match-discriminator surface aimock supports. + +### `global-setup.ts` + +Boots in order: +1. **aimock** via the runner module, fixtures dir = `apps/cockpit/e2e/fixtures`. +2. **streaming/python langgraph** as a child process: `uv run langgraph dev --port 8123 --no-browser`, env `OPENAI_BASE_URL=` + `OPENAI_API_KEY=test-not-used`. cwd = `cockpit/langgraph/streaming/python`. +3. **streaming Angular dev server** as a child process: `npx nx serve cockpit-langgraph-streaming-angular --port 4300`. cwd = repo root. + +Waits for each to be ready (HTTP GET `/ok` or `/`) with a 60–120s timeout before proceeding. + +When future phases add examples hitting a different python project (e.g., `cockpit/langgraph/memory/python`), the globalSetup grows to spawn the additional langgraph processes on different ports. The Angular env per-example already knows its langgraph URL via `environment.langGraphApiUrl`; for cross-port deployments, we either override that env at build time or use a thin per-example proxy config update. Deferred until needed. + +### `test-helpers.ts` + +`sendPromptAndWait(page, prompt)` — exact copy of the helper from [PR #327](https://github.com/cacheplane/angular-agent-framework/pull/327). Waits on `chat-message[data-role="assistant"][data-streaming="false"]` before returning the finalized bubble locator. + +### `playwright.config.ts` + +Standard Playwright config: +- `testDir: '.'` +- `testMatch: '**/*.spec.ts'` +- `testIgnore: ['aimock-runner.spec.ts']` if a runner spec is added later (Phase 1 doesn't include one — runner is copy-pasted and already exercised by the chat harness). +- `projects: [{ name: 'chromium', use: devices['Desktop Chrome'] }]` to suppress the webkit-deps warning being addressed in [PR #339](https://github.com/cacheplane/angular-agent-framework/pull/339). +- `globalSetup`, `globalTeardown` wired. +- `workers: 1`, `fullyParallel: false` for Phase 1 (single langgraph, single Angular dev server can't safely run parallel tests yet). +- `retries: 2` in CI, `0` locally. + +### `streaming.spec.ts` (Phase 1 pilot) + +Captures a real text response from `gpt-5-mini` for a fixed prompt (same capture-script pattern as Phase 2a's `hi.json`). Asserts: + +1. The finalized assistant bubble (`chat-message[data-role="assistant"][data-streaming="false"]`) is in the DOM. +2. The bubble's text contains a distinctive phrase from the captured fixture — proves the LLM response routed through aimock + langgraph + the cockpit chat composition into rendered DOM. +Matches the strictness level chosen during brainstorming ("B" — finalized streaming wait + content-phrase match, no per-component structural assertions). + +### `fixtures/streaming.json` + +Captured from a real `gpt-5-mini` run via a python script that mirrors the streaming graph's LLM setup (`_build_prompt_graph("messages.md")` with the `prompts/messages.md` system prompt). Same capture pattern as Phase 2a's `hi.json` — the python script is committed under `scripts/` for fixture refresh, the JSON fixture is committed. + +The fixture's `match.userMessage` exact-matches the prompt the spec sends. The `response` carries `content` (the captured assistant text). No continuation entry needed for Phase 1 — the assertion is on the rendered text, not on a multi-turn flow. + +### `README.md` + +Short doc covering: how to run locally, how to capture a new fixture (referencing the throwaway python script pattern), what each file is for, links to the chat harness for the analogous infrastructure. + +## CI integration + +### Update the existing `Cockpit — e2e` job + +Edit `.github/workflows/ci.yml`: + +- **Keep** the existing `cockpit-e2e` job (named "Cockpit — e2e"). It already invokes `npx nx e2e cockpit`, which after this phase drives the new harness because `apps/cockpit/project.json`'s `e2e` target points at the new playwright config. +- **Add** the steps the new harness needs (uv install, python sync, fail-trace upload). The chromium install is already there. + +The updated job body: + +```yaml +cockpit-e2e: + name: Cockpit — e2e + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6.0.2 + - uses: actions/setup-node@v6.3.0 + with: + node-version: 22 + cache: npm + - name: Install uv + uses: astral-sh/setup-uv@v8.0.0 + with: + python-version: '3.12' + - run: npm ci + - working-directory: cockpit/langgraph/streaming/python + run: uv sync + - run: npx playwright install --with-deps chromium + - run: npx nx e2e cockpit --skip-nx-cache + - name: Upload Playwright trace on failure + if: failure() + uses: actions/upload-artifact@v4 + with: + name: cockpit-e2e-trace + path: apps/cockpit/e2e/test-results/ + retention-days: 7 +``` + +- **No `deploy.needs` change** — the job name stays `cockpit-e2e` so the existing entry in the `deploy` job's `needs:` list keeps working. + +## Local dev workflow + +``` +# Run the suite (replay only — no OPENAI_API_KEY needed) +npx nx e2e cockpit + +# Capture or refresh a fixture (needs OPENAI_API_KEY) +OPENAI_API_KEY=sk-... uv run --project cockpit/langgraph/streaming/python \ + python apps/cockpit/e2e/scripts/record-.py +``` + +Each captured fixture's recipe script is committed to `apps/cockpit/e2e/scripts/` (different from the chat harness — these scripts are useful enough to keep around for refresh, unlike the truly-throwaway Phase 2c script). The script is dev-only; CI never runs it. + +## Coordination with open PR #339 + +[PR #339](https://github.com/cacheplane/angular-agent-framework/pull/339) modifies `apps/cockpit/playwright.config.ts` and `apps/website/playwright.config.ts` to scope to chromium and drops two orphaned worktree gitlinks. This phase deletes `apps/cockpit/playwright.config.ts` (moves into `apps/cockpit/e2e/playwright.config.ts`), making the cockpit half of #339 moot. + +Coordination plan: +- Merge #339 first (it's a clean small fix; reviewers expect it). +- Then this phase deletes the old `apps/cockpit/playwright.config.ts` and replaces it with `apps/cockpit/e2e/playwright.config.ts`, superseding the cockpit half of #339. +- The website half of #339 (`apps/website/playwright.config.ts` and the gitlink removal) is kept and continues to provide value. + +## Risks and unknowns + +- **streaming/python boot time.** The streaming graph is registered alongside 11 other graphs in `streaming/python/langgraph.json`. Cold start may be slower than the chat harness's `examples/chat/python` startup. Mitigation: `waitForPort` timeout = 90s (vs. 60s for chat). Real measurement happens during Task 0 de-risk. +- **OPENAI_BASE_URL handoff for cockpit.** Phase 2a verified this works for `examples/chat/python`. The cockpit `streaming/python` agent code might construct OpenAI clients differently. Task 0 de-risk reads `cockpit/langgraph/streaming/python/src/` and confirms no hardcoded `base_url=` overrides. +- **Angular nx serve startup time.** Each cockpit example uses `@angular/build:dev-server`. First serve may be ~30s including a cold build. Spec timeouts (`toBeAttached({ timeout: 45_000 })`) need to be generous enough. +- **Future per-example proxy concerns.** When phases 2+ add examples hitting a python project other than `streaming/python`, the per-example proxy target (currently always `:8123`) needs a per-example mapping. This is a future-phase concern — Phase 1 doesn't need it. + +## Acceptance criteria + +Phase 1 merges when: +- The four existing specs at `apps/cockpit/e2e/` (cockpit, dark-mode, all-examples-smoke, production-smoke) are deleted. +- `apps/cockpit/playwright.config.ts` is deleted (moved to `apps/cockpit/e2e/playwright.config.ts`). +- `apps/cockpit/project.json`'s `e2e` target's `config` path points to the new `apps/cockpit/e2e/playwright.config.ts`. +- New harness modules + fixture + pilot spec live under `apps/cockpit/e2e/`. +- `nx e2e cockpit` passes locally + in CI against the new harness. +- One pilot spec (`streaming.spec.ts`) passes 3/3 consecutive local runs with retry-free CI. +- One committed fixture at `apps/cockpit/e2e/fixtures/streaming.json`, captured from a real `gpt-5-mini` run (text response — NOT envelopes/toolCalls). +- One capture script at `apps/cockpit/e2e/scripts/record-streaming.py` for fixture refresh. +- The existing `Cockpit — e2e` CI job is updated with the steps the new harness needs (uv install, python sync, fail-trace upload); job name + position in `deploy.needs` unchanged. + +## What lands next (Phases 2+, NOT this phase) + +For sizing, the likely follow-up shape (one PR per phase): + +- **Phase 2** — second `c-*` example from the streaming/python langgraph (e.g., `c-tool-calls`, `c-interrupts`, `c-subagents`, `c-generative-ui`). Reuses the existing langgraph process; just adds a fixture + spec file. +- **Phase 3** — first example from a different python project (e.g., `memory` from `cockpit/langgraph/memory/python`). Tests the multi-langgraph globalSetup pattern. +- **Phase 4+** — one PR per remaining cockpit example, prioritized by which capabilities have shipped product regressions historically. +- **Eventually** — promote the duplicated `aimock-runner.ts` + `test-helpers.ts` to a shared `libs/internal/aimock-harness/` library when a third harness is on the horizon.