stackrox · dependabot · Apr 2, 2026 · Apr 2, 2026 · Apr 2, 2026 · Apr 2, 2026
@@ -0,0 +1,163 @@
+# mcpchecker v0.0.14 Bug Analysis
+
+## Summary
+
+E2E tests are failing after upgrading from mcpchecker v0.0.12 to v0.0.14 due to an issue where the OpenAI mock agent makes tool calls but doesn't send a final AgentMessageChunk update. This causes llmJudge verification to fail with:
+
+```
+cannot run llmJudge step before agent (must be in verification)
+```
+
+## Affected Tests
+
+- `cve-cluster-does-exist` - ❌ Makes 1 tool call, no final message
+- `cve-cluster-does-not-exist` - ❌ Makes 1 tool call, no final message
+- `cve-log4shell` - ❌ Makes 3 tool calls (including failing node call), no final message
+- `cve-nonexistent` - ⚠️ Makes 4 tool calls, has final message but judge fails for other reasons
+
+Working tests like `cve-detected-clusters`, `cve-multiple`, etc. all have a final message step.
+
+## Root Cause
+
+### Normal Flow (Working)
+1. Agent calls tool via ACP ToolCall update
+2. Tool executes and returns result via ToolCallUpdate
+3. **OpenAI mock sends follow-up chat completion with "Evaluation complete."**
+4. llmagent converts response to AgentMessageChunk via OnStepFinish callback
+5. ExtractOutputSteps produces steps including final "message" type
+6. llmJudge can evaluate the final message
+
+### Broken Flow (Failing Tests)
+1. Agent calls tool via ACP ToolCall update
+2. Tool executes and returns result via ToolCallUpdate
+3. **No follow-up message is sent** (or fantasy doesn't call OnStepFinish)
+4. ExtractOutputSteps produces only "tool_call" type steps
+5. FinalMessageFromSteps returns empty string
+6. llmJudge fails because `input.Agent.Output == ""`
+
+## Technical Details
+
+### OpenAI Mock Server Behavior
+
+The mock in `functional/servers/openai/server.go` is supposed to send a follow-up message:
+
+```go
+// If request contains tool result messages, this is a follow-up after a tool call.
+// Return a simple text response to end the agentic loop.
+for _, msg := range req.Messages {
+    if msg.Role == "tool" {
+        followUp := &ChatCompletionResponse{
+            // ...
+            Message: Message{
+                Role:    "assistant",
+                Content: "Evaluation complete.",
+            },
+            FinishReason: "stop",
+        }
+        // ...
+    }
+}
+```
+
+### llmagent ACP Agent
+
+The `acp_agent.go` processes OpenAI responses via fantasy's OnStepFinish:
+
+```go
+OnStepFinish: func(step fantasy.StepResult) error {
+    text := step.Response.Content.Text()
+    if text == "" {
+        return nil  // ← Early return if no text!
+    }
+
+    return a.conn.SessionUpdate(promptCtx, acp.SessionNotification{
+        SessionId: params.SessionId,
+        Update:    acp.UpdateAgentMessageText(text),
+    })
+},
+```
+
+If `step.Response.Content.Text()` is empty, no AgentMessageText update is sent.
+
+### llmJudge Validation
+
+The llmJudge step validates agent output in `pkg/steps/llm_judge.go:88-90`:
+
+```go
+if input.Agent == nil || input.Agent.Prompt == "" || input.Agent.Output == "" {
+    return nil, fmt.Errorf("cannot run llmJudge step before agent (must be in verification)")
+}
+```
+
+## Reproduction Test
+
+Added test in mcpchecker repo:
+
+```go
+// TestAgentWithOnlyToolCallsNoFinalMessage reproduces issue #268
+func TestAgentWithOnlyToolCallsNoFinalMessage(t *testing.T) {
+    updates := []acp.SessionUpdate{
+        {
+            ToolCall: &acp.SessionUpdateToolCall{
+                ToolCallId: "call-1",
+                Title:      "get_clusters_with_orchestrator_cve",
+                // ...
+            },
+        },
+        {
+            ToolCallUpdate: &acp.SessionToolCallUpdate{
+                ToolCallId: "call-1",
+                Status:     ptr(acp.ToolCallStatusCompleted),
+                // ...
+            },
+        },
+        // BUG: No AgentMessageChunk update here!
+    }
+
+    steps := agent.ExtractOutputSteps(updates)
+    assert.Len(t, steps, 1, "Only has tool_call, no message")
+
+    finalMessage := agent.FinalMessageFromSteps(steps)
+    assert.Empty(t, finalMessage)  // ← Fails llmJudge validation
+}
+```
+
+To run: `cd /tmp/mcpchecker && go test -v -run TestAgentWithOnlyToolCallsNoFinalMessage ./pkg/agent/`
+
+## Hypothesis
+
+The issue may be related to:
+
+1. **fantasy library behavior change** - The charm.land/fantasy package was updated in v0.0.13 (bump from 0.16.0 to 0.17.1). The OnStepFinish callback might not be called in all scenarios.
+
+2. **OpenAI streaming response handling** - The mock server's streaming implementation might not be properly triggering OnStepFinish for the follow-up message after tool results.
+
+3. **ACP protocol handling** - The conversion from OpenAI chat completion responses to ACP SessionUpdate messages might have edge cases.
+
+## Investigation Steps
+
+1. ✅ Reproduced issue with unit test
+2. ✅ Identified that FinalMessageFromSteps returns empty for failing tests
+3. ✅ Traced to missing AgentMessageChunk updates in SessionUpdate stream
+4. ⏭️ **TODO**: Check if fantasy v0.17.1 has breaking changes in OnStepFinish behavior
+5. ⏭️ **TODO**: Add debug logging to llmagent acp_agent.go to see if OnStepFinish is called
+6. ⏭️ **TODO**: Test with real OpenAI API instead of mock to confirm it's a mock issue
+7. ⏭️ **TODO**: Review PR #268 discussion on mcpchecker repo for context
+
+## Workaround
+
+For now, we've:
+1. Migrated all tasks to v1alpha2 format (required by v0.0.14)
+2. Fixed wiremock ExportNodeResponse fixture format
+3. Waiting to see if these fixes resolve the remaining failures
+
+If failures persist, we may need to:
+- Downgrade to mcpchecker v0.0.12 temporarily
+- Report bug upstream to mcpchecker with reproduction test
+- Investigate fantasy library update as potential cause
+
+## Related Links
+
+- mcpchecker PR #268: https://github.com/mcpchecker/mcpchecker/pull/268
+- Our PR #102: https://github.com/stackrox/stackrox-mcp/pull/102
+- Test run: https://github.com/stackrox/stackrox-mcp/actions/runs/23899405760
@@ -1,9 +1,11 @@
 kind: Task
+apiVersion: mcpchecker/v1alpha2
 metadata:
   name: "cve-cluster-does-exist"
   difficulty: easy
-steps:
+spec:
+  verify:
+    - llmJudge:
+        contains: "A response indicating whether CVE-2016-1000031 is detected in staging-central-cluster"
   prompt:
     inline: "Is this CVE-2016-1000031 detected in cluster name staging-central-cluster?"
-  verify:
-    contains: "A response indicating whether CVE-2016-1000031 is detected in staging-central-cluster"
@@ -1,9 +1,11 @@
 kind: Task
+apiVersion: mcpchecker/v1alpha2
 metadata:
   name: "cve-cluster-does-not-exist"
   difficulty: easy
-steps:
+spec:
+  verify:
+    - llmJudge:
+        contains: "A response indicating that CVE-2024-52577 is not detected in prod-hal-9000 cluster, or that the cluster does not exist"
   prompt:
     inline: "Is this CVE-2024-52577 detected in cluster name prod-hal-9000?"
-  verify:
-    contains: "A response indicating that CVE-2024-52577 is not detected in prod-hal-9000 cluster, or that the cluster does not exist"
@@ -1,9 +1,11 @@
 kind: Task
+apiVersion: mcpchecker/v1alpha2
 metadata:
   name: "cve-cluster-list"
   difficulty: easy
-steps:
+spec:
+  verify:
+    - llmJudge:
+        contains: "A response indicating whether CVE-2024-52577 is detected in clusters"
   prompt:
     inline: "Is this CVE-2024-52577 detected in any of my clusters?"
-  verify:
-    contains: "A response indicating whether CVE-2024-52577 is detected in clusters"
@@ -1,9 +1,11 @@
 kind: Task
+apiVersion: mcpchecker/v1alpha2
 metadata:
   name: "cve-clusters-general"
   difficulty: easy
-steps:
+spec:
+  verify:
+    - llmJudge:
+        contains: "A response indicating whether CVE-2021-31805 is detected in any clusters, with specific cluster names if detected, or a statement that no clusters have this CVE."
   prompt:
     inline: "Is this CVE-2021-31805 detected in my clusters?"
-  verify:
-    contains: "A response indicating whether CVE-2021-31805 is detected in any clusters, with specific cluster names if detected, or a statement that no clusters have this CVE."
@@ -1,9 +1,11 @@
 kind: Task
+apiVersion: mcpchecker/v1alpha2
 metadata:
   name: "cve-detected-clusters"
   difficulty: easy
-steps:
+spec:
+  verify:
+    - llmJudge:
+        contains: "A response indicating whether CVE-2016-1000031 is detected in clusters, nodes, or deployments"
   prompt:
     inline: "Is this CVE-2016-1000031 detected in my clusters?"
-  verify:
-    contains: "A response indicating whether CVE-2016-1000031 is detected in clusters, nodes, or deployments"
@@ -1,9 +1,11 @@
 kind: Task
+apiVersion: mcpchecker/v1alpha2
 metadata:
   name: "cve-detected-workloads"
   difficulty: easy
-steps:
+spec:
+  verify:
+    - llmJudge:
+        contains: "CVE-2021-31805 is detected in deployments: struts-app (staging-cluster/applications) and legacy-portal (development-cluster/production)."
   prompt:
     inline: "Is this CVE-2021-31805 detected in my workloads?"
-  verify:
-    contains: "CVE-2021-31805 is detected in deployments: struts-app (staging-cluster/applications) and legacy-portal (development-cluster/production)."
@@ -1,9 +1,11 @@
 kind: Task
+apiVersion: mcpchecker/v1alpha2
 metadata:
   name: "cve-log4shell"
   difficulty: easy
-steps:
+spec:
+  verify:
+    - llmJudge:
+        contains: The commonly used name "log4shell" is resolved into the exact CVE number (CVE-2021-44228). And it's detected in a few deployments.
   prompt:
     inline: "Is log4shell CVE detected in my cluster fleet?"
-  verify:
-    contains: The commonly used name "log4shell" is resolved into the exact CVE number (CVE-2021-44228). And it's detected in a few deployments.
@@ -1,9 +1,11 @@
 kind: Task
+apiVersion: mcpchecker/v1alpha2
 metadata:
   name: "cve-multiple"
   difficulty: medium
-steps:
+spec:
+  verify:
+    - llmJudge:
+        contains: "Both CVEs (CVE-2021-31805 and CVE-2016-1000031), along with the cluster names, deployments, and namespaces where they are detected."
   prompt:
     inline: "Are CVE-2021-31805 and CVE-2016-1000031 detected in my cluster fleet?"
-  verify:
-    contains: "Both CVEs (CVE-2021-31805 and CVE-2016-1000031), along with the cluster names, deployments, and namespaces where they are detected."
@@ -1,9 +1,11 @@
 kind: Task
+apiVersion: mcpchecker/v1alpha2
 metadata:
   name: "cve-nonexistent"
   difficulty: easy
-steps:
+spec:
+  verify:
+    - llmJudge:
+        contains: "A response indicating that CVE-2099-00001 is not found or not detected in any clusters"
   prompt:
     inline: "Is CVE-2099-00001 detected in my clusters?"
-  verify:
-    contains: "A response indicating that CVE-2099-00001 is not found or not detected in any clusters"
@@ -1,9 +1,11 @@
 kind: Task
+apiVersion: mcpchecker/v1alpha2
 metadata:
   name: "list-clusters"
   difficulty: easy
-steps:
+spec:
+  verify:
+    - llmJudge:
+        contains: "A response containing a list of cluster names"
   prompt:
     inline: "List my clusters"
-  verify:
-    contains: "A response containing a list of cluster names"
@@ -1,9 +1,11 @@
 kind: Task
+apiVersion: mcpchecker/v1alpha2
 metadata:
   name: "rhsa-not-supported"
   difficulty: easy
-steps:
+spec:
+  verify:
+    - llmJudge:
+        contains: "Information that RHSA-2024:5137 has been detected, along with the cluster where it is detected."
   prompt:
     inline: "Is RHSA-2024:5137 affecting my clusters?"
-  verify:
-    contains: "Information that RHSA-2024:5137 has been detected, along with the cluster where it is detected."
@@ -4,20 +4,20 @@ go 1.26.1
 
 require (
 	github.com/fullstorydev/grpcurl v1.9.3
-	github.com/mcpchecker/mcpchecker v0.0.12
+	github.com/mcpchecker/mcpchecker v0.0.14
 	github.com/rhysd/actionlint v1.7.12
 )
 
 require (
 	cel.dev/expr v0.25.1 // indirect
-	charm.land/fantasy v0.16.0 // indirect
+	charm.land/fantasy v0.17.1 // indirect
 	cloud.google.com/go v0.123.0 // indirect
 	cloud.google.com/go/auth v0.18.2 // indirect
 	cloud.google.com/go/auth/oauth2adapt v0.2.8 // indirect
 	cloud.google.com/go/compute/metadata v0.9.0 // indirect
 	github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect
 	github.com/aws/aws-sdk-go-v2 v1.41.4 // indirect
-	github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.6 // indirect
+	github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.7 // indirect
 	github.com/aws/aws-sdk-go-v2/config v1.32.12 // indirect
 	github.com/aws/aws-sdk-go-v2/credentials v1.19.12 // indirect
 	github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.20 // indirect
@@ -89,7 +89,7 @@ require (
 	github.com/google/s2a-go v0.1.9 // indirect
 	github.com/google/uuid v1.6.0 // indirect
 	github.com/googleapis/enterprise-certificate-proxy v0.3.14 // indirect
-	github.com/googleapis/gax-go/v2 v2.17.0 // indirect
+	github.com/googleapis/gax-go/v2 v2.18.0 // indirect
 	github.com/gorilla/websocket v1.5.3 // indirect
 	github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 // indirect
 	github.com/in-toto/attestation v1.1.2 // indirect
@@ -99,7 +99,7 @@ require (
 	github.com/jhump/protoreflect v1.17.0 // indirect
 	github.com/kaptinlin/go-i18n v0.2.12 // indirect
 	github.com/kaptinlin/jsonpointer v0.4.17 // indirect
-	github.com/kaptinlin/jsonschema v0.7.5 // indirect
+	github.com/kaptinlin/jsonschema v0.7.6 // indirect
 	github.com/kaptinlin/messageformat-go v0.4.18 // indirect
 	github.com/mailru/easyjson v0.9.1 // indirect
 	github.com/mattn/go-colorable v0.1.14 // indirect
@@ -157,10 +157,10 @@ require (
 	golang.org/x/text v0.35.0 // indirect
 	golang.org/x/time v0.15.0 // indirect
 	golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da // indirect
-	google.golang.org/api v0.270.0 // indirect
-	google.golang.org/genai v1.50.0 // indirect
-	google.golang.org/genproto/googleapis/api v0.0.0-20260226221140-a57be14db171 // indirect
-	google.golang.org/genproto/googleapis/rpc v0.0.0-20260226221140-a57be14db171 // indirect
+	google.golang.org/api v0.271.0 // indirect
+	google.golang.org/genai v1.51.0 // indirect
+	google.golang.org/genproto/googleapis/api v0.0.0-20260311181403-84a4fc48630c // indirect
+	google.golang.org/genproto/googleapis/rpc v0.0.0-20260311181403-84a4fc48630c // indirect
 	google.golang.org/grpc v1.79.2 // indirect
 	google.golang.org/protobuf v1.36.11 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect