Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/large-session-tool-pruning.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"kilo-code": patch
---

Reduce delays in long sessions by compacting older tool outputs when the model request becomes too large.
29 changes: 21 additions & 8 deletions packages/opencode/src/session/compaction.ts
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,10 @@ type CompletedCompaction = {
summary: string | undefined
}

// kilocode_change start - allow safe pruning at cache-invalidating boundaries
export type PruneReason = "normal" | "post-compaction" | "payload-limit"
// kilocode_change end

function summaryText(message: MessageV2.WithParts) {
const text = message.parts
.filter((part): part is MessageV2.TextPart => part.type === "text")
Expand Down Expand Up @@ -188,7 +192,7 @@ export interface Interface {
tokens: MessageV2.Assistant["tokens"]
model: Provider.Model
}) => Effect.Effect<boolean>
readonly prune: (input: { sessionID: SessionID }) => Effect.Effect<void>
readonly prune: (input: { sessionID: SessionID; reason?: PruneReason }) => Effect.Effect<void> // kilocode_change
readonly process: (input: {
parentID: MessageID
messages: MessageV2.WithParts[]
Expand Down Expand Up @@ -296,10 +300,13 @@ export const layer: Layer.Layer<

// goes backwards through parts until there are PRUNE_PROTECT tokens worth of tool
// calls, then erases output of older tool calls to free context space
const prune = Effect.fn("SessionCompaction.prune")(function* (input: { sessionID: SessionID }) {
// kilocode_change start - preserve normal opt-in pruning, but allow payload/compaction cleanup by default
const prune = Effect.fn("SessionCompaction.prune")(function* (input: { sessionID: SessionID; reason?: PruneReason }) {
const cfg = yield* config.get()
if (!cfg.compaction?.prune) return
log.info("pruning")
const reason = input.reason ?? "normal"
if (cfg.compaction?.prune === false) return
if (reason === "normal" && cfg.compaction?.prune !== true) return
log.info("pruning", { reason })

const msgs = yield* session
.messages({ sessionID: input.sessionID })
Expand Down Expand Up @@ -338,9 +345,10 @@ export const layer: Layer.Layer<
yield* session.updatePart(part)
}
}
log.info("pruned", { count: toPrune.length })
log.info("pruned", { reason, count: toPrune.length })
}
})
// kilocode_change end

const processCompaction = Effect.fn("SessionCompaction.process")(function* (input: {
parentID: MessageID
Expand Down Expand Up @@ -556,8 +564,13 @@ export const layer: Layer.Layer<
}
}

// kilocode_change start - compaction already invalidates cache, so collapse stale tool outputs too
if (processor.message.error) return "stop"
if (result === "continue") yield* bus.publish(Event.Compacted, { sessionID: input.sessionID })
if (result === "continue") {
yield* prune({ sessionID: input.sessionID, reason: "post-compaction" })
yield* bus.publish(Event.Compacted, { sessionID: input.sessionID })
}
// kilocode_change end
return result
})

Expand Down Expand Up @@ -612,11 +625,11 @@ export const defaultLayer = Layer.suspend(() =>

const { runPromise } = makeRuntime(Service, defaultLayer)

export async function isOverflow(input: { tokens: MessageV2.Assistant["tokens"]; model: Provider.Model }) {
export async function isOverflow(input: { tokens: MessageV2.Assistant["tokens"]; model: Provider.Model }) { // kilocode_change
return runPromise((svc) => svc.isOverflow(input))
}

export async function prune(input: { sessionID: SessionID }) {
export async function prune(input: { sessionID: SessionID; reason?: PruneReason }) { // kilocode_change
return runPromise((svc) => svc.prune(input))
}

Expand Down
26 changes: 22 additions & 4 deletions packages/opencode/src/session/prompt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,10 @@ const STRUCTURED_OUTPUT_SYSTEM_PROMPT = `IMPORTANT: The user has requested struc
// kilocode_change
export const shouldAskPlanFollowup = KiloSessionPrompt.shouldAskPlanFollowup

// kilocode_change start - persistent tool-output pruning when payload is already large
const REQUEST_PRUNE_BYTES = 1_250_000
// kilocode_change end

const log = Log.create({ service: "session.prompt" })
const elog = EffectLogger.create({ service: "session.prompt" })

Expand Down Expand Up @@ -1333,7 +1337,6 @@ NOTE: At any point in time through this workflow you should feel free to ask the
// kilocode_change end
},
)
// kilocode_change end

const lastAssistant = Effect.fnUntraced(function* (sessionID: SessionID) {
// kilocode_change start - retry when cancel races before shellImpl writes messages
Expand Down Expand Up @@ -1587,12 +1590,27 @@ NOTE: At any point in time through this workflow you should feel free to ask the
msgs = KiloSessionPrompt.maybeStripHistoricalMedia(msgs)
// kilocode_change end

const [skills, env, instructions, modelMsgs] = yield* Effect.all([
// kilocode_change start - persistently prune stale tool outputs when payload is already large
const [skills, env, instructions] = yield* Effect.all([
sys.skills(agent),
sys.environment(model, lastUser.editorContext), // kilocode_change
instruction.system().pipe(Effect.orDie),
MessageV2.toModelMessagesEffect(msgs, model),
])
let modelMsgs = yield* MessageV2.toModelMessagesEffect(msgs, model)
const size = Buffer.byteLength(JSON.stringify(modelMsgs))
if (size > REQUEST_PRUNE_BYTES) {
Comment thread
marius-kilocode marked this conversation as resolved.
yield* compaction.prune({ sessionID, reason: "payload-limit" })
msgs = yield* MessageV2.filterCompactedEffect(sessionID)
Comment thread
marius-kilocode marked this conversation as resolved.
msgs = KiloSessionPromptQueue.scope(sessionID, msgs)
msgs = KiloSessionPrompt.trimBeforeLastSummary(msgs)
yield* plugin.trigger("experimental.chat.messages.transform", {}, { messages: msgs })
KiloSessionPrompt.injectEditorContext({ msgs, lastUser, sessionID, cache: envCache })
msgs = KiloSessionPrompt.maybeStripHistoricalMedia(msgs)
modelMsgs = yield* MessageV2.toModelMessagesEffect(msgs, model)
const nextSize = Buffer.byteLength(JSON.stringify(modelMsgs))
if (nextSize > REQUEST_PRUNE_BYTES) log.warn("payload still large after pruning", { size: nextSize })
}
// kilocode_change end
const system = [...env, ...instructions, ...(skills ? [skills] : [])]
const format = lastUser.format ?? { type: "text" as const }
if (format.type === "json_schema") system.push(STRUCTURED_OUTPUT_SYSTEM_PROMPT) // kilocode_change
Expand Down Expand Up @@ -1698,7 +1716,7 @@ NOTE: At any point in time through this workflow you should feel free to ask the
continue
}

yield* compaction.prune({ sessionID }).pipe(Effect.ignore, Effect.forkIn(scope))
yield* compaction.prune({ sessionID, reason: "normal" }).pipe(Effect.ignore, Effect.forkIn(scope))
return yield* lastAssistant(sessionID)
},
)
Expand Down
Loading