Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions src/intelligence/effort.ts
Original file line number Diff line number Diff line change
Expand Up @@ -134,3 +134,52 @@ export function isIntelligenceOff(settings: EffortSettings): boolean {
settings.intelligenceBudgetUsd === 0
)
}

/**
* The run-config overrides an `EffortSettings` compiles to — the bridge between the
* pure effort policy and the orchestration entrypoints (`runPersonified` / the
* improvement cycle). This is ONLY data: it never constructs an analyst or runs a
* loop. The caller reads these flags to decide WHAT to pass:
*
* - `withAnalyst: false` ⇒ DO NOT construct/pass a `ScopeAnalyst` to `runPersonified`
* (the dormant empty-findings path runs; the base agent still works). This is the
* PRODUCT fail-closed at `off`/`eco` — "don't construct the analyst" — distinct from
* the EXPERIMENT fail-closed inside `createScopeAnalyst` ("hard abort"), which stays
* untouched. Degrade, never throw.
* - `fanout` ⇒ the `ShapeBudget.fanout` width to pass (`1` at `off`, the tier's breadth
* otherwise). Overrides the personify default fanout.
* - `withLoops: false` ⇒ the improvement cycle is a no-op for this run (no refine /
* fanout-vote multi-step loop spawns).
* - `intelligenceBudgetUsd` ⇒ the intelligence-class spend ceiling carried through for
* the billing clamp (passed verbatim; `0` refuses every intelligence spawn).
*/
export interface EffortOverridesCompiled {
/** Construct + pass a `ScopeAnalyst`? `false` ⇒ omit it (degrade to the base agent). */
withAnalyst: boolean
/** `ShapeBudget.fanout` width to pass to `runPersonified`. */
fanout: number
/** Run the multi-step improvement cycle, or no-op it for this run? */
withLoops: boolean
/** Intelligence-class spend ceiling. `0` refuses every intelligence spawn; `null` uncapped. */
intelligenceBudgetUsd: number | null
}

/**
* Compile resolved `EffortSettings` into the orchestration overrides above. Pure: same
* input → same object, no I/O, no execution, no construction. It is the single place that
* maps the effort axes onto the run-config knobs, so no `if (effort)` leaks into the
* supervise kernel — the kernel stays effort-blind, the caller reads these flags once.
*
* `off`/`eco` (`analysts: false`) compile to `withAnalyst: false` ⇒ the caller omits the
* analyst and the run degrades to the dormant base agent rather than throwing. `fanout: 1`
* (no breadth) at `off`; `withLoops: false` no-ops the improvement cycle. `standard`+
* compile to `withAnalyst: true`, the tier's `fanout`, and `withLoops: true`.
*/
export function compileEffort(settings: EffortSettings): EffortOverridesCompiled {
return {
withAnalyst: settings.analysts,
fanout: settings.fanout,
withLoops: settings.loops,
intelligenceBudgetUsd: settings.intelligenceBudgetUsd,
}
}
53 changes: 52 additions & 1 deletion src/intelligence/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,13 @@
* telemetry export.
*/

import { createOtelExporter, loopEventToOtelSpan, type OtelExporter } from '../otel-export'
import {
buildLoopOtelSpans,
createOtelExporter,
loopEventToOtelSpan,
type OtelExporter,
} from '../otel-export'
import type { LoopTraceEvent } from '../runtime/types'
import {
defaultEffortTier,
type EffortOverrides,
Expand All @@ -47,10 +53,12 @@ export { composeCertifiedPrompt, pullCertified, withCertifiedDelivery } from './
export type {
CorpusAccess,
EffortOverrides,
EffortOverridesCompiled,
EffortSettings,
EffortTier,
} from './effort'
export {
compileEffort,
defaultEffortTier,
isIntelligenceOff,
resolveEffort,
Expand Down Expand Up @@ -154,6 +162,15 @@ export interface TraceHandle {
}): void
}

/** Metadata for {@link IntelligenceClient.recordTrace}. */
export interface RecordTraceMeta {
/** 32-hex trace id to anchor every span to. Defaults to a fresh id. */
traceId?: string
/** Span id of an enclosing span the loop root should parent under (e.g. a
* `traceRun` span). Omitted ⇒ the loop root is the trace root. */
rootParentSpanId?: string
}

/** The resolved outcome of one traced run, surfaced on the export span and
* available to the caller for downstream billing assertions. */
export interface TraceOutcome {
Expand Down Expand Up @@ -182,6 +199,16 @@ export interface IntelligenceClient {
* `fn` propagates to the caller (the agent's own failures are not masked).
*/
traceRun<T>(meta: TraceMeta, fn: (trace: TraceHandle) => Promise<T>): Promise<T>
/**
* Export a run's full loop topology — the ordered `LoopTraceEvent` stream a
* `runLoop`/`Supervisor` run emits — as a nested OTLP span tree (loop → round →
* iteration) into ONE trace. Reuses the shipped `buildLoopOtelSpans` builder
* (NO second span builder), so the topology a viewer renders matches the
* kernel's. `traceId` defaults to a fresh id; `rootParentSpanId` parents the
* loop root under an enclosing span (e.g. a `traceRun` span) when given.
* Best-effort: export failures are swallowed. Returns the resolved `traceId`.
*/
recordTrace(events: ReadonlyArray<LoopTraceEvent>, meta?: RecordTraceMeta): string
/**
* Network-free readiness report: which adoption modes are reachable given
* this config. Observe is always reachable; Recommend needs outcomes; PR
Expand Down Expand Up @@ -388,6 +415,30 @@ export function createIntelligenceClient(config: IntelligenceConfig): Intelligen
return result
},

recordTrace(events: ReadonlyArray<LoopTraceEvent>, meta?: RecordTraceMeta): string {
const traceId = meta?.traceId ?? freshTraceId()
const ex = getExporter()
if (!ex || events.length === 0) return traceId
// Reuse the shipped topology builder — loop → round → iteration span tree —
// so the structure matches the kernel's, never a second parallel builder.
try {
const spans = buildLoopOtelSpans(
events as ReadonlyArray<{
kind: string
runId: string
timestamp: number
payload: object
}>,
traceId,
meta?.rootParentSpanId,
)
for (const span of spans) ex.exportSpan(span)
} catch {
// Best-effort — a trace export must never fail the caller's run.
}
return traceId
},

doctor(): DoctorReport {
const hasRepo = Boolean(config.repo?.owner && config.repo?.name && config.repo?.baseBranch)
const hasChecks = Boolean(config.checks && config.checks.length > 0)
Expand Down
162 changes: 162 additions & 0 deletions src/intelligence/intelligence.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
import type { LoopTraceEvent } from '../runtime/types'
import {
compileEffort,
createIntelligenceClient,
defaultRedactor,
isIntelligenceOff,
Expand Down Expand Up @@ -310,3 +312,163 @@ describe('doctor()', () => {
expect(client.doctor().exportConfigured).toBe(false)
})
})

/** Pull every span's `name` + `traceId` across an OTLP export body. */
function spansOf(body: unknown): Array<{ name: string; traceId: string }> {
const out: Array<{ name: string; traceId: string }> = []
const resourceSpans = (body as { resourceSpans?: unknown[] })?.resourceSpans ?? []
for (const rs of resourceSpans) {
for (const ss of (rs as { scopeSpans?: unknown[] }).scopeSpans ?? []) {
for (const span of (ss as { spans?: unknown[] }).spans ?? []) {
const s = span as { name?: string; traceId?: string }
out.push({ name: String(s.name), traceId: String(s.traceId) })
}
}
}
return out
}

/** A minimal but real loop event stream: a plan round over two iterations. */
function loopStream(runId = 'loop-run'): LoopTraceEvent[] {
return [
{
kind: 'loop.started',
runId,
timestamp: 1000,
payload: { driver: 'fanout', agentRunNames: ['a'], maxIterations: 4, maxConcurrency: 2 },
},
{
kind: 'loop.plan',
runId,
timestamp: 1001,
payload: { roundIndex: 0, plannedCount: 2, moveKind: 'fanout', childIndices: [0, 1] },
},
{
kind: 'loop.iteration.started',
runId,
timestamp: 1002,
payload: { iterationIndex: 0, agentRunName: 'a', taskHash: 'h0', groupId: 0 },
},
{
kind: 'loop.iteration.started',
runId,
timestamp: 1003,
payload: { iterationIndex: 1, agentRunName: 'a', taskHash: 'h1', groupId: 0 },
},
{
kind: 'loop.iteration.ended',
runId,
timestamp: 1010,
payload: {
iterationIndex: 0,
agentRunName: 'a',
costUsd: 0.001,
durationMs: 8,
verdict: { valid: false, score: 0.3 },
groupId: 0,
},
},
{
kind: 'loop.iteration.ended',
runId,
timestamp: 1012,
payload: {
iterationIndex: 1,
agentRunName: 'a',
costUsd: 0.002,
durationMs: 9,
verdict: { valid: true, score: 0.8 },
groupId: 0,
},
},
{
kind: 'loop.ended',
runId,
timestamp: 1020,
payload: { winnerIterationIndex: 1, totalCostUsd: 0.003, durationMs: 20, iterations: 2 },
},
]
}

describe('recordTrace — loop topology via buildLoopOtelSpans (gap 2)', () => {
it('exports a nested loop→round→iteration span tree under ONE traceId', async () => {
const { calls } = installFetchSpy('ok')
const client = createIntelligenceClient({ project: 'support-agent', apiKey, endpoint })
const traceId = client.recordTrace(loopStream(), { traceId: 'a'.repeat(32) })
await client.flush()

expect(traceId).toBe('a'.repeat(32))
const spans = calls.flatMap((c) => spansOf(c.body))
const names = spans.map((s) => s.name)
// The TREE builder emits topology-level names; a flat per-event builder would emit the
// raw event kinds (loop.started/loop.iteration.ended). Asserting these proves reuse of
// buildLoopOtelSpans, not a second span builder.
expect(names).toContain('loop')
expect(names).toContain('loop.round')
expect(names.filter((n) => n === 'loop.iteration').length).toBe(2)
// Every span shares the supplied traceId (one trace, not N).
const traceIds = new Set(spans.map((s) => s.traceId))
expect(traceIds.size).toBe(1)
expect([...traceIds][0]).toBe('a'.repeat(32))
})

it('mints a fresh traceId when none is supplied and survives a dead endpoint', async () => {
installFetchSpy('throw')
const client = createIntelligenceClient({ project: 'p', apiKey, endpoint })
const traceId = client.recordTrace(loopStream())
expect(traceId).toMatch(/^[0-9a-f]{32}$/)
// Export failure is swallowed — recordTrace never throws.
await expect(client.flush()).resolves.toBeUndefined()
})

it('is a no-op (no fetch) on an empty event stream or with no endpoint', async () => {
const spy = vi.fn()
vi.stubGlobal('fetch', spy)
const withEndpoint = createIntelligenceClient({ project: 'p', apiKey, endpoint })
withEndpoint.recordTrace([])
await withEndpoint.flush()
const noEndpoint = createIntelligenceClient({ project: 'p', apiKey })
noEndpoint.recordTrace(loopStream())
await noEndpoint.flush()
expect(spy).not.toHaveBeenCalled()
})
})

describe('compileEffort — EffortSettings → run-config overrides (gap 3/4)', () => {
it('off compiles to no-analyst, fanout 1, no loops, zero intelligence budget', () => {
const compiled = compileEffort(resolveEffort('off'))
expect(compiled).toEqual({
withAnalyst: false,
fanout: 1,
withLoops: false,
intelligenceBudgetUsd: 0,
})
// The product fail-closed: at off the caller omits the analyst (degrade, not throw).
expect(compiled.withAnalyst).toBe(false)
})

it('eco keeps the analyst but no breadth and no improvement loops', () => {
const compiled = compileEffort(resolveEffort('eco'))
expect(compiled.withAnalyst).toBe(true)
expect(compiled.fanout).toBe(1)
expect(compiled.withLoops).toBe(false)
})

it('standard turns the analyst on, opens breadth, and enables loops', () => {
const compiled = compileEffort(resolveEffort('standard'))
expect(compiled.withAnalyst).toBe(true)
expect(compiled.fanout).toBeGreaterThan(1)
expect(compiled.withLoops).toBe(true)
})

it('carries a per-field override through to the compiled overrides', () => {
// Overriding analysts back on at off lifts the analyst-construction gate.
const compiled = compileEffort(resolveEffort('off', { analysts: true, fanout: 4 }))
expect(compiled.withAnalyst).toBe(true)
expect(compiled.fanout).toBe(4)
})

it('max compiles to an uncapped intelligence budget', () => {
expect(compileEffort(resolveEffort('max')).intelligenceBudgetUsd).toBeNull()
})
})
1 change: 1 addition & 0 deletions src/runtime/personify/persona.ts
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ export async function runPersonified<Task, D>(
...(options.withinMs !== undefined ? { withinMs: options.withinMs } : {}),
...(options.now ? { now: options.now } : {}),
...(options.signal ? { signal: options.signal } : {}),
...(options.hooks ? { hooks: options.hooks } : {}),
}
return supervisor.run(rootAgent, options.task, supervisorOpts)
}
Expand Down
7 changes: 7 additions & 0 deletions src/runtime/personify/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
*/

import type { AgentProfile, BackendType } from '@tangle-network/sandbox'
import type { RuntimeHooks } from '../../runtime-hooks'
import type {
Agent,
AgentSpec,
Expand Down Expand Up @@ -241,6 +242,12 @@ export interface RunPersonifiedOptions<Task, D> {
/** Optional scope analyst threaded into the shape's ShapeContext so loopUntil/widen steer
* on trace-derived findings instead of the dormant empty default. */
readonly analyst?: ScopeAnalyst<D>
/**
* Lifecycle stream sink, forwarded to `SupervisorOpts.hooks` so the root `Scope`'s
* `agent.spawn`/`agent.child` events flow to an observer (e.g. the Intelligence SDK's
* trace export). Absent ⇒ no stream (the run is silent, as today).
*/
readonly hooks?: RuntimeHooks
}

/** The composed run signature. */
Expand Down
Loading
Loading