From 97a2a20f5b820c2710a88c07e0503ee4eb3c55b2 Mon Sep 17 00:00:00 2001 From: Test Date: Fri, 20 Feb 2026 15:29:04 -0800 Subject: [PATCH 1/2] fix: reap ghost sessions and deduplicate pending-* entries When a session's real ID can't be resolved during launch, use `pending-` format so unresolved entries are identifiable. In the session tracker's poll cycle, reap stale entries: - Remove pending-* entries when a resolved session shares the same PID - Mark "running" sessions as stopped when their PID is dead Add PID-based dedup in listSessions() as a safety net so pending-* entries never appear alongside their resolved counterpart. Fixes #22 Co-Authored-By: Claude Opus 4.6 --- src/adapters/claude-code.ts | 3 +- src/daemon/session-tracker.test.ts | 144 ++++++++++++++++++++++++++++- src/daemon/session-tracker.ts | 88 ++++++++++++++++++ 3 files changed, 233 insertions(+), 2 deletions(-) diff --git a/src/adapters/claude-code.ts b/src/adapters/claude-code.ts index 6761c85..d37d89b 100644 --- a/src/adapters/claude-code.ts +++ b/src/adapters/claude-code.ts @@ -259,7 +259,8 @@ export class ClaudeCodeAdapter implements AgentAdapter { resolvedSessionId = await this.pollForSessionId(logPath, pid, 5000); } - const sessionId = resolvedSessionId || crypto.randomUUID(); + const sessionId = + resolvedSessionId || (pid ? `pending-${pid}` : crypto.randomUUID()); // Persist session metadata so status checks work after wrapper exits if (pid) { diff --git a/src/daemon/session-tracker.test.ts b/src/daemon/session-tracker.test.ts index faeb2eb..c8b7156 100644 --- a/src/daemon/session-tracker.test.ts +++ b/src/daemon/session-tracker.test.ts @@ -2,7 +2,7 @@ import fs from "node:fs/promises"; import os from "node:os"; import path from "node:path"; import { afterEach, beforeEach, describe, expect, it } from "vitest"; -import type { AgentSession } from "../core/types.js"; +import type { AgentAdapter, AgentSession } from "../core/types.js"; import { SessionTracker } from "./session-tracker.js"; import { StateManager } from "./state.js"; @@ -176,4 +176,146 @@ describe("SessionTracker", () => { expect(tracker.onSessionExit("unknown")).toBeUndefined(); }); }); + + describe("ghost session reaping (issue #22)", () => { + /** Create a mock adapter that returns the given sessions from list() */ + function mockAdapter(sessions: AgentSession[]): AgentAdapter { + return { + id: "mock", + list: async () => sessions, + peek: async () => "", + status: async () => sessions[0], + launch: async () => sessions[0], + stop: async () => {}, + resume: async () => {}, + async *events() {}, + }; + } + + it("marks dead PID sessions as stopped during poll", async () => { + // Pre-seed state with a "running" session whose PID is dead + tracker.track( + makeSession({ id: "pending-12345", status: "running", pid: 12345 }), + "claude-code", + ); + + // Create tracker with dead-PID checker and an adapter that returns nothing + const reapTracker = new SessionTracker(state, { + adapters: { "claude-code": mockAdapter([]) }, + isProcessAlive: () => false, + }); + + // Trigger a poll cycle + reapTracker.startPolling(); + // Wait for poll to complete + await new Promise((r) => setTimeout(r, 100)); + reapTracker.stopPolling(); + + const session = state.getSession("pending-12345"); + expect(session?.status).toBe("stopped"); + expect(session?.stoppedAt).toBeDefined(); + }); + + it("removes pending-* entry when resolved session exists with same PID", async () => { + // Pre-seed state with a pending entry + tracker.track( + makeSession({ id: "pending-99999", status: "running", pid: 99999 }), + "claude-code", + ); + + // Adapter returns a resolved session with the same PID + const resolvedSession = makeSession({ + id: "abc123-real-session-id", + status: "running", + pid: 99999, + }); + + const reapTracker = new SessionTracker(state, { + adapters: { "claude-code": mockAdapter([resolvedSession]) }, + isProcessAlive: (pid) => pid === 99999, + }); + + reapTracker.startPolling(); + await new Promise((r) => setTimeout(r, 100)); + reapTracker.stopPolling(); + + // pending-* entry should be removed + expect(state.getSession("pending-99999")).toBeUndefined(); + // Real session should exist + expect(state.getSession("abc123-real-session-id")).toBeDefined(); + expect(state.getSession("abc123-real-session-id")?.status).toBe( + "running", + ); + }); + + it("live PID sessions still show as running after poll", async () => { + tracker.track( + makeSession({ id: "live-session", status: "running", pid: 55555 }), + "claude-code", + ); + + // Adapter returns this session as running + const liveSession = makeSession({ + id: "live-session", + status: "running", + pid: 55555, + }); + + const reapTracker = new SessionTracker(state, { + adapters: { "claude-code": mockAdapter([liveSession]) }, + isProcessAlive: (pid) => pid === 55555, + }); + + reapTracker.startPolling(); + await new Promise((r) => setTimeout(r, 100)); + reapTracker.stopPolling(); + + const session = state.getSession("live-session"); + expect(session?.status).toBe("running"); + expect(session?.pid).toBe(55555); + }); + + it("listSessions deduplicates pending-* vs resolved entries by PID", () => { + // Both entries exist in state + tracker.track( + makeSession({ id: "pending-77777", status: "running", pid: 77777 }), + "claude-code", + ); + tracker.track( + makeSession({ + id: "real-session-uuid", + status: "running", + pid: 77777, + }), + "claude-code", + ); + + const list = tracker.listSessions({ all: true }); + // Only the resolved session should appear + const ids = list.map((s) => s.id); + expect(ids).toContain("real-session-uuid"); + expect(ids).not.toContain("pending-77777"); + }); + + it("keeps pending-* entry if no resolved session shares its PID", () => { + // Only a pending entry, no resolved session with same PID + tracker.track( + makeSession({ id: "pending-44444", status: "running", pid: 44444 }), + "claude-code", + ); + tracker.track( + makeSession({ + id: "different-session", + status: "running", + pid: 88888, + }), + "claude-code", + ); + + const list = tracker.listSessions({ all: true }); + const ids = list.map((s) => s.id); + expect(ids).toContain("pending-44444"); + expect(ids).toContain("different-session"); + }); + }); }); diff --git a/src/daemon/session-tracker.ts b/src/daemon/session-tracker.ts index 5681447..0a61fb4 100644 --- a/src/daemon/session-tracker.ts +++ b/src/daemon/session-tracker.ts @@ -4,6 +4,8 @@ import type { SessionRecord, StateManager } from "./state.js"; export interface SessionTrackerOpts { adapters: Record; pollIntervalMs?: number; + /** Override PID liveness check for testing (default: process.kill(pid, 0)) */ + isProcessAlive?: (pid: number) => boolean; } export class SessionTracker { @@ -11,11 +13,13 @@ export class SessionTracker { private adapters: Record; private pollIntervalMs: number; private pollHandle: ReturnType | null = null; + private readonly isProcessAlive: (pid: number) => boolean; constructor(state: StateManager, opts: SessionTrackerOpts) { this.state = state; this.adapters = opts.adapters; this.pollIntervalMs = opts.pollIntervalMs ?? 5000; + this.isProcessAlive = opts.isProcessAlive ?? defaultIsProcessAlive; } startPolling(): void { @@ -35,10 +39,17 @@ export class SessionTracker { } private async poll(): Promise { + // Collect PIDs from all adapter-returned sessions (the source of truth) + const adapterPidToId = new Map(); + for (const [adapterName, adapter] of Object.entries(this.adapters)) { try { const sessions = await adapter.list({ all: true }); for (const session of sessions) { + if (session.pid) { + adapterPidToId.set(session.pid, session.id); + } + const existing = this.state.getSession(session.id); const record = sessionToRecord(session, adapterName); @@ -59,6 +70,49 @@ export class SessionTracker { // Adapter unavailable — skip } } + + // Reap stale entries from daemon state + this.reapStaleEntries(adapterPidToId); + } + + /** + * Clean up ghost sessions in the daemon state: + * - pending-* entries whose PID matches a resolved session → remove pending + * - Any "running"/"idle" session in state whose PID is dead → mark stopped + */ + private reapStaleEntries(adapterPidToId: Map): void { + const sessions = this.state.getSessions(); + + for (const [id, record] of Object.entries(sessions)) { + // Bug 2: If this is a pending-* entry and a real session has the same PID, + // the pending entry is stale — remove it + if (id.startsWith("pending-") && record.pid) { + const resolvedId = adapterPidToId.get(record.pid); + if (resolvedId && resolvedId !== id) { + this.state.removeSession(id); + continue; + } + } + + // Bug 1: If session is "running"/"idle" but PID is dead, mark stopped + if ( + (record.status === "running" || record.status === "idle") && + record.pid + ) { + // Only reap if the adapter didn't return this session as running + // (adapter is the source of truth for sessions it knows about) + const adapterId = adapterPidToId.get(record.pid); + if (adapterId === id) continue; // Adapter confirmed this PID is active + + if (!this.isProcessAlive(record.pid)) { + this.state.setSession(id, { + ...record, + status: "stopped", + stoppedAt: new Date().toISOString(), + }); + } + } + } } /** Track a newly launched session */ @@ -96,6 +150,9 @@ export class SessionTracker { ); } + // Dedup: if a pending-* entry shares a PID with a resolved entry, show only the resolved one + filtered = deduplicatePendingSessions(filtered); + return filtered.sort((a, b) => { // Running first, then by recency if (a.status === "running" && b.status !== "running") return -1; @@ -122,6 +179,37 @@ export class SessionTracker { } } +/** Check if a process is alive via kill(pid, 0) signal check */ +function defaultIsProcessAlive(pid: number): boolean { + try { + process.kill(pid, 0); + return true; + } catch { + return false; + } +} + +/** + * Remove pending-* entries that share a PID with a resolved (non-pending) session. + * This is a safety net for list output — the poll() reaper handles cleanup in state. + */ +function deduplicatePendingSessions( + sessions: SessionRecord[], +): SessionRecord[] { + const realPids = new Set(); + for (const s of sessions) { + if (!s.id.startsWith("pending-") && s.pid) { + realPids.add(s.pid); + } + } + return sessions.filter((s) => { + if (s.id.startsWith("pending-") && s.pid && realPids.has(s.pid)) { + return false; + } + return true; + }); +} + function sessionToRecord( session: AgentSession, adapterName: string, From b3be1f71ada6039fbe4b1a4e3c49203708a21915 Mon Sep 17 00:00:00 2001 From: "Doink (OpenClaw)" Date: Thu, 5 Mar 2026 08:27:29 -0800 Subject: [PATCH 2/2] fix: ghost session cleanup --- package-lock.json | 4 +- src/cli.ts | 46 ++++++++ src/daemon/server.ts | 33 ++++++ src/daemon/session-tracker.test.ts | 162 +++++++++++++++++++++++++++++ src/daemon/session-tracker.ts | 83 ++++++++++++++- 5 files changed, 325 insertions(+), 3 deletions(-) diff --git a/package-lock.json b/package-lock.json index 50e8f5d..26108f7 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,11 +1,11 @@ { - "name": "agentctl", + "name": "@orgloop/agentctl", "version": "1.0.1", "lockfileVersion": 3, "requires": true, "packages": { "": { - "name": "agentctl", + "name": "@orgloop/agentctl", "version": "1.0.1", "license": "MIT", "dependencies": { diff --git a/src/cli.ts b/src/cli.ts index cbc5ff8..8cddbbd 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -363,6 +363,52 @@ program } }); +// kill +program + .command("kill ") + .description("Remove a session record from the store") + .option("--force", "Force remove regardless of state (SIGKILL if running)") + .action(async (id: string, opts) => { + const daemonRunning = await ensureDaemon(); + if (!daemonRunning) { + console.error("Daemon not running"); + process.exit(1); + } + try { + await client.call("session.kill", { id, force: opts.force }); + console.log(`Removed session ${id.slice(0, 8)}`); + } catch (err) { + console.error((err as Error).message); + process.exit(1); + } + }); + +// prune +program + .command("prune") + .description("Remove stale stopped sessions") + .option( + "--max-age ", + "Remove sessions stopped more than N hours ago", + "24", + ) + .action(async (opts) => { + const daemonRunning = await ensureDaemon(); + if (!daemonRunning) { + console.error("Daemon not running"); + process.exit(1); + } + try { + const result = await client.call<{ pruned: number }>("session.prune", { + maxAgeHours: Number(opts.maxAge), + }); + console.log(`Pruned ${result.pruned} stale session(s)`); + } catch (err) { + console.error((err as Error).message); + process.exit(1); + } + }); + // resume program .command("resume ") diff --git a/src/daemon/server.ts b/src/daemon/server.ts index ee530d9..be513c5 100644 --- a/src/daemon/server.ts +++ b/src/daemon/server.ts @@ -308,6 +308,39 @@ function createRequestHandler(ctx: HandlerContext) { return null; } + case "session.kill": { + const session = ctx.sessionTracker.getSession(params.id as string); + if (!session) throw new Error(`Session not found: ${params.id}`); + const force = params.force as boolean | undefined; + + // If session is still running, try to stop the process first + if (session.status === "running" || session.status === "idle") { + if (session.pid) { + try { + process.kill(session.pid, force ? "SIGKILL" : "SIGTERM"); + } catch { + // Already dead + } + } + } + + // Remove auto-lock + ctx.lockManager.autoUnlock(session.id); + + // Remove session record from store + ctx.sessionTracker.removeSession(session.id); + ctx.metrics.recordSessionStopped(); + + return null; + } + + case "session.prune": { + const maxAgeMs = + ((params.maxAgeHours as number) || 24) * 60 * 60 * 1000; + const pruned = ctx.sessionTracker.pruneSessions(maxAgeMs); + return { pruned }; + } + case "session.resume": { const session = ctx.sessionTracker.getSession(params.id as string); if (!session) throw new Error(`Session not found: ${params.id}`); diff --git a/src/daemon/session-tracker.test.ts b/src/daemon/session-tracker.test.ts index c8b7156..08e67e8 100644 --- a/src/daemon/session-tracker.test.ts +++ b/src/daemon/session-tracker.test.ts @@ -317,5 +317,167 @@ describe("SessionTracker", () => { expect(ids).toContain("pending-44444"); expect(ids).toContain("different-session"); }); + + it("detects PID reuse and marks session stopped when command doesn't match", async () => { + // Session with PID 11111 was a claude process, but PID got reused by trustd + tracker.track( + makeSession({ id: "ghost-session", status: "running", pid: 11111 }), + "claude-code", + ); + + const reapTracker = new SessionTracker(state, { + adapters: { "claude-code": mockAdapter([]) }, + isProcessAlive: () => true, // PID exists (reused) + getProcessCommand: () => "/usr/libexec/trustd", // Not claude! + }); + + reapTracker.startPolling(); + await new Promise((r) => setTimeout(r, 100)); + reapTracker.stopPolling(); + + const session = state.getSession("ghost-session"); + expect(session?.status).toBe("stopped"); + expect(session?.stoppedAt).toBeDefined(); + }); + + it("keeps session running when PID is alive and command matches", async () => { + tracker.track( + makeSession({ id: "valid-session", status: "running", pid: 22222 }), + "claude-code", + ); + + const reapTracker = new SessionTracker(state, { + adapters: { "claude-code": mockAdapter([]) }, + isProcessAlive: () => true, + getProcessCommand: () => "claude --session abc123", + }); + + reapTracker.startPolling(); + await new Promise((r) => setTimeout(r, 100)); + reapTracker.stopPolling(); + + const session = state.getSession("valid-session"); + expect(session?.status).toBe("running"); + }); + }); + + describe("removeSession", () => { + it("removes a session by exact ID", () => { + tracker.track( + makeSession({ id: "session-to-remove", status: "stopped" }), + "claude-code", + ); + + const removed = tracker.removeSession("session-to-remove"); + expect(removed).toBe(true); + expect(state.getSession("session-to-remove")).toBeUndefined(); + }); + + it("removes a session by prefix", () => { + tracker.track( + makeSession({ id: "session-abcdef123", status: "stopped" }), + "claude-code", + ); + + const removed = tracker.removeSession("session-abc"); + expect(removed).toBe(true); + expect(state.getSession("session-abcdef123")).toBeUndefined(); + }); + + it("returns false for unknown session", () => { + expect(tracker.removeSession("nonexistent")).toBe(false); + }); + }); + + describe("pruneSessions", () => { + it("removes stopped sessions older than the threshold", () => { + const oldDate = new Date(Date.now() - 48 * 60 * 60 * 1000); // 48h ago + tracker.track( + makeSession({ + id: "old-session", + status: "stopped", + startedAt: oldDate, + }), + "claude-code", + ); + // Manually set stoppedAt to old date + const record = state.getSession("old-session"); + expect(record).toBeDefined(); + state.setSession("old-session", { + ...record!, + status: "stopped", + stoppedAt: oldDate.toISOString(), + }); + + const pruned = tracker.pruneSessions(24 * 60 * 60 * 1000); // 24h + expect(pruned).toBe(1); + expect(state.getSession("old-session")).toBeUndefined(); + }); + + it("keeps stopped sessions newer than the threshold", () => { + const recentDate = new Date(Date.now() - 2 * 60 * 60 * 1000); // 2h ago + tracker.track( + makeSession({ + id: "recent-session", + status: "stopped", + startedAt: recentDate, + }), + "claude-code", + ); + const record = state.getSession("recent-session"); + expect(record).toBeDefined(); + state.setSession("recent-session", { + ...record!, + status: "stopped", + stoppedAt: recentDate.toISOString(), + }); + + const pruned = tracker.pruneSessions(24 * 60 * 60 * 1000); + expect(pruned).toBe(0); + expect(state.getSession("recent-session")).toBeDefined(); + }); + + it("never prunes running or idle sessions", () => { + const oldDate = new Date(Date.now() - 48 * 60 * 60 * 1000); + tracker.track( + makeSession({ + id: "running-old", + status: "running", + startedAt: oldDate, + }), + "claude-code", + ); + tracker.track( + makeSession({ + id: "idle-old", + status: "idle", + startedAt: oldDate, + }), + "claude-code", + ); + + const pruned = tracker.pruneSessions(24 * 60 * 60 * 1000); + expect(pruned).toBe(0); + expect(state.getSession("running-old")).toBeDefined(); + expect(state.getSession("idle-old")).toBeDefined(); + }); + + it("prunes error/completed/failed sessions", () => { + const oldDate = new Date(Date.now() - 48 * 60 * 60 * 1000); + + for (const status of ["error", "completed", "failed"] as const) { + tracker.track( + makeSession({ + id: `${status}-session`, + status, + startedAt: oldDate, + }), + "claude-code", + ); + } + + const pruned = tracker.pruneSessions(24 * 60 * 60 * 1000); + expect(pruned).toBe(3); + }); }); }); diff --git a/src/daemon/session-tracker.ts b/src/daemon/session-tracker.ts index 0a61fb4..515340e 100644 --- a/src/daemon/session-tracker.ts +++ b/src/daemon/session-tracker.ts @@ -1,3 +1,4 @@ +import { execFileSync } from "node:child_process"; import type { AgentAdapter, AgentSession } from "../core/types.js"; import type { SessionRecord, StateManager } from "./state.js"; @@ -6,6 +7,8 @@ export interface SessionTrackerOpts { pollIntervalMs?: number; /** Override PID liveness check for testing (default: process.kill(pid, 0)) */ isProcessAlive?: (pid: number) => boolean; + /** Override process command lookup for testing (default: ps -p -o command=) */ + getProcessCommand?: (pid: number) => string | null; } export class SessionTracker { @@ -14,12 +17,14 @@ export class SessionTracker { private pollIntervalMs: number; private pollHandle: ReturnType | null = null; private readonly isProcessAlive: (pid: number) => boolean; + private readonly getProcessCommand: (pid: number) => string | null; constructor(state: StateManager, opts: SessionTrackerOpts) { this.state = state; this.adapters = opts.adapters; this.pollIntervalMs = opts.pollIntervalMs ?? 5000; this.isProcessAlive = opts.isProcessAlive ?? defaultIsProcessAlive; + this.getProcessCommand = opts.getProcessCommand ?? defaultGetProcessCommand; } startPolling(): void { @@ -94,7 +99,7 @@ export class SessionTracker { } } - // Bug 1: If session is "running"/"idle" but PID is dead, mark stopped + // Bug 1: If session is "running"/"idle" but PID is dead or reused, mark stopped if ( (record.status === "running" || record.status === "idle") && record.pid @@ -104,7 +109,20 @@ export class SessionTracker { const adapterId = adapterPidToId.get(record.pid); if (adapterId === id) continue; // Adapter confirmed this PID is active + let shouldReap = false; + if (!this.isProcessAlive(record.pid)) { + // PID is dead + shouldReap = true; + } else { + // PID is alive — check if it was reused by a different process + const cmd = this.getProcessCommand(record.pid); + if (cmd !== null && !isExpectedCommand(cmd, record.adapter)) { + shouldReap = true; + } + } + + if (shouldReap) { this.state.setSession(id, { ...record, status: "stopped", @@ -167,6 +185,44 @@ export class SessionTracker { ).length; } + /** Resolve a session ID (exact or prefix) to the full ID */ + resolveSessionId(id: string): string | undefined { + if (this.state.getSession(id)) return id; + const sessions = this.state.getSessions(); + const matches = Object.entries(sessions).filter(([key]) => + key.startsWith(id), + ); + if (matches.length === 1) return matches[0][0]; + return undefined; + } + + /** Remove a session from the store. Returns true if removed. */ + removeSession(id: string): boolean { + const fullId = this.resolveSessionId(id); + if (!fullId) return false; + this.state.removeSession(fullId); + return true; + } + + /** Remove stopped/completed/failed sessions older than maxAgeMs. Returns count pruned. */ + pruneSessions(maxAgeMs: number = 24 * 60 * 60 * 1000): number { + const sessions = this.state.getSessions(); + const now = Date.now(); + let pruned = 0; + + for (const [id, record] of Object.entries(sessions)) { + if (record.status === "running" || record.status === "idle") continue; + const timestamp = record.stoppedAt || record.startedAt; + const age = now - new Date(timestamp).getTime(); + if (age > maxAgeMs) { + this.state.removeSession(id); + pruned++; + } + } + + return pruned; + } + /** Called when a session stops — returns the cwd for fuse/lock processing */ onSessionExit(sessionId: string): SessionRecord | undefined { const session = this.state.getSession(sessionId); @@ -189,6 +245,31 @@ function defaultIsProcessAlive(pid: number): boolean { } } +/** Get the command string for a PID via ps */ +function defaultGetProcessCommand(pid: number): string | null { + try { + const stdout = execFileSync("ps", ["-p", String(pid), "-o", "command="], { + encoding: "utf-8", + timeout: 2000, + }); + return stdout.trim() || null; + } catch { + return null; + } +} + +/** Known command patterns per adapter — used to detect PID reuse */ +const ADAPTER_COMMAND_PATTERNS: Record = { + "claude-code": /\bclaude\b/, +}; + +/** Check if the process command matches what we expect for this adapter */ +function isExpectedCommand(command: string, adapter: string): boolean { + const pattern = ADAPTER_COMMAND_PATTERNS[adapter]; + if (!pattern) return true; // Unknown adapter — can't validate, assume OK + return pattern.test(command); +} + /** * Remove pending-* entries that share a PID with a resolved (non-pending) session. * This is a safety net for list output — the poll() reaper handles cleanup in state.