From 58685e3cec3b51ad700db2b5c7ca3ea9f6e20a08 Mon Sep 17 00:00:00 2001
From: Sweets Sweetman <sweetmantech@gmail.com>
Date: Wed, 1 Jul 2026 11:13:46 -0500
Subject: [PATCH] feat(agent): data-grounding rule in the agent system prompt
 (never fabricate)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The single biggest lever against hallucinated task-email data (recoupable/chat#1833):
the root cause across every fabricated report is the agent's rule "no data ⇒ invent
plausible data" (Apache→OneRPM run, verbatim: "the API doesn't have direct CPM
metrics, I'll generate … sample data"). buildAgentSystemPrompt now always emits a
DATA_GROUNDING_SECTION: state only figures retrieved from a successful tool call
this run; on missing/failed/empty data, say so and omit/stop — never estimate,
"industry average", or sample. This caps hallucinated data at ~0 for all tasks,
even ones with no data source.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 lib/chat/__tests__/buildAgentSystemPrompt.test.ts |  8 +++++---
 lib/chat/buildAgentSystemPrompt.ts                | 12 +++++++++++-
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/lib/chat/__tests__/buildAgentSystemPrompt.test.ts b/lib/chat/__tests__/buildAgentSystemPrompt.test.ts
index 81cb9268d..0e99bbfc2 100644
--- a/lib/chat/__tests__/buildAgentSystemPrompt.test.ts
+++ b/lib/chat/__tests__/buildAgentSystemPrompt.test.ts
@@ -4,7 +4,7 @@ import { buildAgentSystemPrompt } from "@/lib/chat/buildAgentSystemPrompt";
 describe("buildAgentSystemPrompt", () => {
   it("emits only customInstructions when no cwd is provided", () => {
     const prompt = buildAgentSystemPrompt({ customInstructions: "hello" });
-    expect(prompt).toBe("hello");
+    expect(prompt).toContain("hello");
     expect(prompt).not.toMatch(/Working directory/);
   });
 
@@ -26,7 +26,9 @@ describe("buildAgentSystemPrompt", () => {
     expect(customIdx).toBeGreaterThan(envIdx);
   });
 
-  it("returns empty string when all options are empty", () => {
-    expect(buildAgentSystemPrompt({})).toBe("");
+  it("always includes the data-grounding no-fabrication rule, even with empty options", () => {
+    const prompt = buildAgentSystemPrompt({});
+    expect(prompt).toMatch(/never fabricate/i);
+    expect(prompt).toMatch(/sample|estimate|industry average/i);
   });
 });
diff --git a/lib/chat/buildAgentSystemPrompt.ts b/lib/chat/buildAgentSystemPrompt.ts
index 922273ae7..c458b301b 100644
--- a/lib/chat/buildAgentSystemPrompt.ts
+++ b/lib/chat/buildAgentSystemPrompt.ts
@@ -3,6 +3,15 @@ const ENVIRONMENT_SECTION = `# Environment
 Working directory: . (workspace root)
 Use workspace-relative paths for all file operations.`;
 
+const DATA_GROUNDING_SECTION = `# Data grounding — never fabricate
+
+State only figures, statistics, or facts you retrieved from a **successful tool
+call in this run**. If a data call fails, returns empty, or the source isn't
+connected, say so plainly (e.g. "no YouTube data connected for this artist") and
+omit the metric — send a shorter, honest report or stop. **Never** estimate, use
+"industry averages", or fill gaps with sample/placeholder/illustrative numbers. A
+short accurate report always beats a padded one built on invented data.`;
+
 export type BuildAgentSystemPromptOptions = {
   /**
    * Sandbox working directory. Triggers inclusion of the Environment
@@ -39,7 +48,8 @@ export type BuildAgentSystemPromptOptions = {
  * generated branches).
  */
 export function buildAgentSystemPrompt(options: BuildAgentSystemPromptOptions): string {
-  const parts: string[] = [];
+  // Always first: the no-fabrication rule applies to every agent run.
+  const parts: string[] = [DATA_GROUNDING_SECTION];
 
   if (options.cwd) {
     parts.push(ENVIRONMENT_SECTION);