diff --git a/.changeset/large-session-tool-pruning.md b/.changeset/large-session-tool-pruning.md
new file mode 100644
index 00000000000..187f985c2ea
--- /dev/null
+++ b/.changeset/large-session-tool-pruning.md
@@ -0,0 +1,5 @@
+---
+"kilo-code": patch
+---
+
+Reduce delays in long sessions by compacting older tool outputs when the model request becomes too large.
diff --git a/packages/opencode/src/session/compaction.ts b/packages/opencode/src/session/compaction.ts
index 91b0ef4b863..e96332bcbfb 100644
--- a/packages/opencode/src/session/compaction.ts
+++ b/packages/opencode/src/session/compaction.ts
@@ -92,6 +92,10 @@ type CompletedCompaction = {
   summary: string | undefined
 }
 
+// kilocode_change start - allow safe pruning at cache-invalidating boundaries
+export type PruneReason = "normal" | "post-compaction" | "payload-limit"
+// kilocode_change end
+
 function summaryText(message: MessageV2.WithParts) {
   const text = message.parts
     .filter((part): part is MessageV2.TextPart => part.type === "text")
@@ -188,7 +192,7 @@ export interface Interface {
     tokens: MessageV2.Assistant["tokens"]
     model: Provider.Model
   }) => Effect.Effect<boolean>
-  readonly prune: (input: { sessionID: SessionID }) => Effect.Effect<void>
+  readonly prune: (input: { sessionID: SessionID; reason?: PruneReason }) => Effect.Effect<void> // kilocode_change
   readonly process: (input: {
     parentID: MessageID
     messages: MessageV2.WithParts[]
@@ -296,10 +300,13 @@ export const layer: Layer.Layer<
 
     // goes backwards through parts until there are PRUNE_PROTECT tokens worth of tool
     // calls, then erases output of older tool calls to free context space
-    const prune = Effect.fn("SessionCompaction.prune")(function* (input: { sessionID: SessionID }) {
+    // kilocode_change start - preserve normal opt-in pruning, but allow payload/compaction cleanup by default
+    const prune = Effect.fn("SessionCompaction.prune")(function* (input: { sessionID: SessionID; reason?: PruneReason }) {
       const cfg = yield* config.get()
-      if (!cfg.compaction?.prune) return
-      log.info("pruning")
+      const reason = input.reason ?? "normal"
+      if (cfg.compaction?.prune === false) return
+      if (reason === "normal" && cfg.compaction?.prune !== true) return
+      log.info("pruning", { reason })
 
       const msgs = yield* session
         .messages({ sessionID: input.sessionID })
@@ -338,9 +345,10 @@ export const layer: Layer.Layer<
             yield* session.updatePart(part)
           }
         }
-        log.info("pruned", { count: toPrune.length })
+        log.info("pruned", { reason, count: toPrune.length })
       }
     })
+    // kilocode_change end
 
     const processCompaction = Effect.fn("SessionCompaction.process")(function* (input: {
       parentID: MessageID
@@ -556,8 +564,13 @@ export const layer: Layer.Layer<
         }
       }
 
+      // kilocode_change start - compaction already invalidates cache, so collapse stale tool outputs too
       if (processor.message.error) return "stop"
-      if (result === "continue") yield* bus.publish(Event.Compacted, { sessionID: input.sessionID })
+      if (result === "continue") {
+        yield* prune({ sessionID: input.sessionID, reason: "post-compaction" })
+        yield* bus.publish(Event.Compacted, { sessionID: input.sessionID })
+      }
+      // kilocode_change end
       return result
     })
 
@@ -612,11 +625,11 @@ export const defaultLayer = Layer.suspend(() =>
 
 const { runPromise } = makeRuntime(Service, defaultLayer)
 
-export async function isOverflow(input: { tokens: MessageV2.Assistant["tokens"]; model: Provider.Model }) {
+export async function isOverflow(input: { tokens: MessageV2.Assistant["tokens"]; model: Provider.Model }) { // kilocode_change
   return runPromise((svc) => svc.isOverflow(input))
 }
 
-export async function prune(input: { sessionID: SessionID }) {
+export async function prune(input: { sessionID: SessionID; reason?: PruneReason }) { // kilocode_change
   return runPromise((svc) => svc.prune(input))
 }
 
diff --git a/packages/opencode/src/session/prompt.ts b/packages/opencode/src/session/prompt.ts
index 69fa84bcac7..1f679142639 100644
--- a/packages/opencode/src/session/prompt.ts
+++ b/packages/opencode/src/session/prompt.ts
@@ -78,6 +78,10 @@ const STRUCTURED_OUTPUT_SYSTEM_PROMPT = `IMPORTANT: The user has requested struc
 // kilocode_change
 export const shouldAskPlanFollowup = KiloSessionPrompt.shouldAskPlanFollowup
 
+// kilocode_change start - persistent tool-output pruning when payload is already large
+const REQUEST_PRUNE_BYTES = 1_250_000
+// kilocode_change end
+
 const log = Log.create({ service: "session.prompt" })
 const elog = EffectLogger.create({ service: "session.prompt" })
 
@@ -1333,7 +1337,6 @@ NOTE: At any point in time through this workflow you should feel free to ask the
         // kilocode_change end
       },
     )
-    // kilocode_change end
 
     const lastAssistant = Effect.fnUntraced(function* (sessionID: SessionID) {
       // kilocode_change start - retry when cancel races before shellImpl writes messages
@@ -1587,12 +1590,27 @@ NOTE: At any point in time through this workflow you should feel free to ask the
             msgs = KiloSessionPrompt.maybeStripHistoricalMedia(msgs)
             // kilocode_change end
 
-            const [skills, env, instructions, modelMsgs] = yield* Effect.all([
+            // kilocode_change start - persistently prune stale tool outputs when payload is already large
+            const [skills, env, instructions] = yield* Effect.all([
               sys.skills(agent),
               sys.environment(model, lastUser.editorContext), // kilocode_change
               instruction.system().pipe(Effect.orDie),
-              MessageV2.toModelMessagesEffect(msgs, model),
             ])
+            let modelMsgs = yield* MessageV2.toModelMessagesEffect(msgs, model)
+            const size = Buffer.byteLength(JSON.stringify(modelMsgs))
+            if (size > REQUEST_PRUNE_BYTES) {
+              yield* compaction.prune({ sessionID, reason: "payload-limit" })
+              msgs = yield* MessageV2.filterCompactedEffect(sessionID)
+              msgs = KiloSessionPromptQueue.scope(sessionID, msgs)
+              msgs = KiloSessionPrompt.trimBeforeLastSummary(msgs)
+              yield* plugin.trigger("experimental.chat.messages.transform", {}, { messages: msgs })
+              KiloSessionPrompt.injectEditorContext({ msgs, lastUser, sessionID, cache: envCache })
+              msgs = KiloSessionPrompt.maybeStripHistoricalMedia(msgs)
+              modelMsgs = yield* MessageV2.toModelMessagesEffect(msgs, model)
+              const nextSize = Buffer.byteLength(JSON.stringify(modelMsgs))
+              if (nextSize > REQUEST_PRUNE_BYTES) log.warn("payload still large after pruning", { size: nextSize })
+            }
+            // kilocode_change end
             const system = [...env, ...instructions, ...(skills ? [skills] : [])]
             const format = lastUser.format ?? { type: "text" as const }
             if (format.type === "json_schema") system.push(STRUCTURED_OUTPUT_SYSTEM_PROMPT) // kilocode_change
@@ -1698,7 +1716,7 @@ NOTE: At any point in time through this workflow you should feel free to ask the
           continue
         }
 
-        yield* compaction.prune({ sessionID }).pipe(Effect.ignore, Effect.forkIn(scope))
+        yield* compaction.prune({ sessionID, reason: "normal" }).pipe(Effect.ignore, Effect.forkIn(scope))
         return yield* lastAssistant(sessionID)
       },
     )