Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions codex-rs/app-server/tests/suite/v2/web_search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ const DEFAULT_READ_TIMEOUT: Duration = Duration::from_secs(60);
const DEFAULT_READ_TIMEOUT: Duration = Duration::from_secs(10);

#[tokio::test]
async fn standalone_web_search_round_trips_encrypted_output() -> Result<()> {
async fn standalone_web_search_round_trips_output() -> Result<()> {
let call_id = "web-run-1";
let server = responses::start_mock_server().await;
mount_search_response(&server).await;
Expand Down Expand Up @@ -170,8 +170,8 @@ async fn standalone_web_search_round_trips_encrypted_output() -> Result<()> {
"type": "function_call_output",
"call_id": call_id,
"output": [{
"type": "encrypted_content",
"encrypted_content": "ciphertext",
"type": "input_text",
"text": "Search result",
}],
})
);
Expand Down Expand Up @@ -259,6 +259,7 @@ async fn mount_search_response(server: &MockServer) {
.and(path("/api/codex/alpha/search"))
.respond_with(ResponseTemplate::new(200).set_body_json(json!({
"encrypted_output": "ciphertext",
"output": "Search result",
})))
.expect(1)
.mount(server)
Expand Down
12 changes: 8 additions & 4 deletions codex-rs/codex-api/src/endpoint/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,10 +134,13 @@ mod tests {
}

#[tokio::test]
async fn search_posts_typed_request_and_parses_encrypted_output() {
async fn search_posts_typed_request_and_parses_output() {
let transport = CapturingTransport::new(
serde_json::to_vec(&json!({"encrypted_output": "ciphertext"}))
.expect("serialize response"),
serde_json::to_vec(&json!({
"encrypted_output": "ciphertext",
"output": "search result",
}))
.expect("serialize response"),
);
let client = SearchClient::new(transport.clone(), provider(), Arc::new(DummyAuth));

Expand Down Expand Up @@ -203,7 +206,8 @@ mod tests {
assert_eq!(
response,
SearchResponse {
encrypted_output: "ciphertext".to_string(),
encrypted_output: Some("ciphertext".to_string()),
output: "search result".to_string(),
}
);

Expand Down
3 changes: 2 additions & 1 deletion codex-rs/codex-api/src/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -280,5 +280,6 @@ pub enum AllowedCaller {

#[derive(Debug, Clone, Deserialize, PartialEq, Eq)]
pub struct SearchResponse {
pub encrypted_output: String,
pub encrypted_output: Option<String>,
pub output: String,
}
110 changes: 110 additions & 0 deletions codex-rs/core/tests/suite/code_mode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@ use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
use codex_config::types::McpServerConfig;
use codex_config::types::McpServerTransportConfig;
use codex_core::config::Config;
use codex_extension_api::ExtensionRegistryBuilder;
use codex_features::Feature;
use codex_login::CodexAuth;
use codex_models_manager::bundled_models_response;
use codex_protocol::config_types::WebSearchMode;
use codex_protocol::dynamic_tools::DynamicToolCallOutputContentItem;
use codex_protocol::dynamic_tools::DynamicToolResponse;
use codex_protocol::dynamic_tools::DynamicToolSpec;
Expand All @@ -17,6 +19,7 @@ use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::EventMsg;
use codex_protocol::protocol::Op;
use codex_protocol::user_input::UserInput;
use codex_web_search_extension::install as install_web_search_extension;
use core_test_support::apps_test_server::AppsTestServer;
use core_test_support::apps_test_server::AppsTestToolLoading;
use core_test_support::apps_test_server::DIRECT_CALENDAR_APP_ONLY_TOOL;
Expand Down Expand Up @@ -45,9 +48,14 @@ use std::collections::HashMap;
use std::collections::HashSet;
use std::fs;
use std::path::Path;
use std::sync::Arc;
use std::time::Duration;
use std::time::Instant;
use wiremock::Mock;
use wiremock::MockServer;
use wiremock::ResponseTemplate;
use wiremock::matchers::method;
use wiremock::matchers::path;

fn custom_tool_output_items(req: &ResponsesRequest, call_id: &str) -> Vec<Value> {
match req.custom_tool_call_output(call_id).get("output") {
Expand Down Expand Up @@ -191,6 +199,108 @@ async fn run_code_mode_turn_with_config(
Ok((test, second_mock))
}

#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_can_call_standalone_web_search() -> Result<()> {
skip_if_no_network!(Ok(()));

let server = responses::start_mock_server().await;
Mock::given(method("POST"))
.and(path("/v1/alpha/search"))
.respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({
"output": "Search result",
})))
.expect(1)
.mount(&server)
.await;

responses::mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-1"),
ev_custom_tool_call(
"call-1",
"exec",
r#"
const result = await tools.web__run({
search_query: [{ q: "standalone web search" }],
});
text(result);
"#,
),
ev_completed("resp-1"),
]),
)
.await;
let follow_up_mock = responses::mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
)
.await;

let auth = CodexAuth::from_api_key("dummy");
let auth_manager = codex_core::test_support::auth_manager_from_auth(auth.clone());
let mut extension_builder = ExtensionRegistryBuilder::<Config>::new();
install_web_search_extension(&mut extension_builder, auth_manager);
let mut builder = test_codex()
.with_auth(auth)
.with_extensions(Arc::new(extension_builder.build()))
.with_model("test-gpt-5.1-codex")
.with_config(|config| {
config
.features
.enable(Feature::CodeMode)
.expect("code mode should be enabled");
config
.features
.enable(Feature::StandaloneWebSearch)
.expect("standalone web search should be enabled");
config
.web_search_mode
.set(WebSearchMode::Live)
.expect("web search mode should be accepted");
});
let test = builder.build(&server).await?;

test.submit_turn("Search the web from code mode").await?;

let search_request = server
.received_requests()
.await
.expect("received requests should be available")
.into_iter()
.find(|request| request.url.path() == "/v1/alpha/search")
.expect("standalone search request should be sent");
let search_body = search_request
.body_json::<Value>()
.expect("search request body should be JSON");
assert_eq!(
search_body["model"],
serde_json::json!("test-gpt-5.1-codex")
);
assert_eq!(
search_body["commands"],
serde_json::json!({
"search_query": [{"q": "standalone web search"}],
})
);
assert_eq!(
search_body["settings"],
serde_json::json!({
"allowed_callers": ["direct"],
"external_web_access": true,
})
);
assert_eq!(
custom_tool_output_last_non_empty_text(&follow_up_mock.single_request(), "call-1"),
Some("Search result".to_string())
);

Ok(())
}

async fn run_code_mode_turn_with_rmcp(
server: &MockServer,
prompt: &str,
Expand Down
28 changes: 14 additions & 14 deletions codex-rs/ext/web-search/src/output.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,19 @@ use codex_protocol::models::FunctionCallOutputContentItem;
use codex_protocol::models::FunctionCallOutputPayload;
use codex_protocol::models::ResponseInputItem;

pub(crate) struct EncryptedSearchOutput {
encrypted_output: String,
pub(crate) struct SearchOutput {
output: String,
}

impl EncryptedSearchOutput {
pub(crate) fn new(encrypted_output: String) -> Self {
Self { encrypted_output }
impl SearchOutput {
pub(crate) fn new(output: String) -> Self {
Self { output }
}
}

impl ToolOutput for EncryptedSearchOutput {
impl ToolOutput for SearchOutput {
fn log_preview(&self) -> String {
"[encrypted standalone web search output]".to_string()
"[standalone web search output]".to_string()
}

fn success_for_logging(&self) -> bool {
Expand All @@ -29,8 +29,8 @@ impl ToolOutput for EncryptedSearchOutput {
ResponseInputItem::FunctionCallOutput {
call_id: call_id.to_string(),
output: FunctionCallOutputPayload::from_content_items(vec![
FunctionCallOutputContentItem::EncryptedContent {
encrypted_content: self.encrypted_output.clone(),
FunctionCallOutputContentItem::InputText {

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

codex:

Should this PR also make standalone search honor memories.disable_on_external_context? This output is recorded as a normal FunctionCallOutput, which the external-context detector does not recognize, so plaintext web results remain eligible for memory generation even when that setting is enabled.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

adding a func on tool output to allow us to do this cleanly. It is bit out of the scope of this PR (since its creating something reusable for future external tool calls), so I've stacked a follow up PR to resolve this issue: #26821

text: self.output.clone(),
},
]),
}
Expand All @@ -45,12 +45,12 @@ mod tests {
use codex_protocol::models::ResponseInputItem;
use pretty_assertions::assert_eq;

use super::EncryptedSearchOutput;
use super::SearchOutput;
use super::ToolOutput;

#[test]
fn emits_encrypted_function_call_output() {
let output = EncryptedSearchOutput::new("encrypted-search-output".to_string());
fn emits_plaintext_function_call_output() {
let output = SearchOutput::new("search output".to_string());

assert_eq!(
output.to_response_item(
Expand All @@ -62,8 +62,8 @@ mod tests {
ResponseInputItem::FunctionCallOutput {
call_id: "call-1".to_string(),
output: FunctionCallOutputPayload::from_content_items(vec![
FunctionCallOutputContentItem::EncryptedContent {
encrypted_content: "encrypted-search-output".to_string(),
FunctionCallOutputContentItem::InputText {
text: "search output".to_string(),
},
]),
}
Expand Down
8 changes: 3 additions & 5 deletions codex-rs/ext/web-search/src/tool.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ use http::HeaderMap;
use url::Url;

use crate::history::recent_input;
use crate::output::EncryptedSearchOutput;
use crate::output::SearchOutput;
use crate::schema::commands_schema;

pub(crate) const WEB_NAMESPACE: &str = "web";
Expand Down Expand Up @@ -67,7 +67,7 @@ impl ToolExecutor<ToolCall> for WebSearchTool {
}

fn exposure(&self) -> ToolExposure {
ToolExposure::DirectModelOnly
ToolExposure::Direct
}

fn supports_parallel_tool_calls(&self) -> bool {
Expand Down Expand Up @@ -114,9 +114,7 @@ impl ToolExecutor<ToolCall> for WebSearchTool {
.emit_completed(web_search_item(&call.call_id, command_action))
.await;

Ok(Box::new(EncryptedSearchOutput::new(
response.encrypted_output,
)))
Ok(Box::new(SearchOutput::new(response.output)))

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P0 Badge Preserve encrypted search output for model-visible replies

When web.run is called as a normal model-visible tool (the app-server test still exercises that path), this line discards the returned encrypted_output and later sends output as input_text in the next Responses request. That injects the full standalone search result into model history instead of the encrypted content, and the root AGENTS.md “Model visible context” rule requires new model-context fragments that can exceed 1k tokens to be highlighted and bounded. Keep the encrypted payload for to_response_item while using plaintext only for code-mode results.

Useful? React with 👍 / 👎.

@sayan-oai sayan-oai Jun 6, 2026

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pseudo related to this, there is some token accounting for tool call output. previously this was encrypted so responses added a max_output_tokens field (because we couldn’t truncate encrypted content in the harness safely), but now that it’s not can/does this output go through the normal tool call accounting that tracks + truncates large results?

EDIT: nvm, we get this for free

}
}

Expand Down
Loading