diff --git a/codex-rs/tools/src/json_schema.rs b/codex-rs/tools/src/json_schema.rs index 5352eaa91d2..528bb3adf80 100644 --- a/codex-rs/tools/src/json_schema.rs +++ b/codex-rs/tools/src/json_schema.rs @@ -6,13 +6,15 @@ use std::collections::BTreeMap; use std::collections::BTreeSet; const DEFINITION_TABLE_KEYS: [&str; 2] = ["$defs", "definitions"]; -const SCHEMA_CHILD_KEYS: [&str; 2] = ["items", "anyOf"]; +const SCHEMA_CHILD_KEYS: [&str; 4] = ["items", "anyOf", "oneOf", "allOf"]; +const COMPOSITION_SCHEMA_KEYS: [&str; 3] = ["anyOf", "oneOf", "allOf"]; /// Primitive JSON Schema type names we support in tool definitions. /// /// This mirrors the OpenAI Structured Outputs subset for JSON Schema `type`: /// string, number, boolean, integer, object, array, and null. -/// Keywords such as `enum`, `const`, and `anyOf` are modeled separately. +/// Keywords such as `enum`, `const`, `anyOf`, `oneOf`, and `allOf` are modeled +/// separately. /// See . #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] #[serde(rename_all = "lowercase")] @@ -61,6 +63,10 @@ pub struct JsonSchema { pub additional_properties: Option, #[serde(rename = "anyOf", skip_serializing_if = "Option::is_none")] pub any_of: Option>, + #[serde(rename = "oneOf", skip_serializing_if = "Option::is_none")] + pub one_of: Option>, + #[serde(rename = "allOf", skip_serializing_if = "Option::is_none")] + pub all_of: Option>, #[serde(rename = "$defs", skip_serializing_if = "Option::is_none")] pub defs: Option>, #[serde(skip_serializing_if = "Option::is_none")] @@ -85,6 +91,22 @@ impl JsonSchema { } } + pub fn one_of(variants: Vec, description: Option) -> Self { + Self { + description, + one_of: Some(variants), + ..Default::default() + } + } + + pub fn all_of(variants: Vec, description: Option) -> Self { + Self { + description, + all_of: Some(variants), + ..Default::default() + } + } + pub fn boolean(description: Option) -> Self { Self::typed(JsonSchemaPrimitiveType::Boolean, description) } @@ -219,6 +241,7 @@ const LARGE_SCHEMA_COMPACTION_PASSES: &[LargeSchemaCompactionPass] = &[ strip_schema_descriptions, drop_schema_definitions, collapse_deep_schema_objects_from_root, + prune_schema_compositions, ]; fn collapse_deep_schema_objects_from_root(value: &mut JsonValue) { @@ -397,6 +420,27 @@ fn collapse_deep_schema_objects(value: &mut JsonValue, depth: usize) { } } +fn prune_schema_compositions(value: &mut JsonValue) { + match value { + JsonValue::Array(values) => { + for value in values { + prune_schema_compositions(value); + } + } + JsonValue::Object(map) => { + if has_composition_keyword(map) { + *value = json!({}); + return; + } + + for_each_schema_child_mut(map, DefinitionTraversal::Skip, &mut |value| { + prune_schema_compositions(value); + }); + } + _ => {} + } +} + fn is_complex_schema_object(map: &serde_json::Map) -> bool { SCHEMA_CHILD_KEYS.iter().any(|key| map.contains_key(*key)) || map.contains_key("properties") @@ -404,10 +448,16 @@ fn is_complex_schema_object(map: &serde_json::Map) -> bool { || map.contains_key("$ref") } +fn has_composition_keyword(map: &serde_json::Map) -> bool { + COMPOSITION_SCHEMA_KEYS + .into_iter() + .any(|key| map.contains_key(key)) +} + /// Sanitize a JSON Schema (as serde_json::Value) so it can fit our limited /// schema representation. This function: /// - Ensures every typed schema object has a `"type"` when required. -/// - Preserves explicit `anyOf`. +/// - Preserves explicit `anyOf`, `oneOf`, and `allOf`. /// - Preserves `$ref` and reachable local `$defs` / `definitions`. /// - Collapses `const` into single-value `enum`. /// - Fills required child fields for object/array schema types, including @@ -443,8 +493,10 @@ fn sanitize_json_schema(value: &mut JsonValue) { if let Some(value) = map.get_mut("prefixItems") { sanitize_json_schema(value); } - if let Some(value) = map.get_mut("anyOf") { - sanitize_json_schema(value); + for key in COMPOSITION_SCHEMA_KEYS { + if let Some(value) = map.get_mut(key) { + sanitize_json_schema(value); + } } for table in DEFINITION_TABLE_KEYS { sanitize_schema_table(map, table); @@ -456,7 +508,8 @@ fn sanitize_json_schema(value: &mut JsonValue) { let mut schema_types = normalized_schema_types(map); - if schema_types.is_empty() && (map.contains_key("$ref") || map.contains_key("anyOf")) { + if schema_types.is_empty() && (map.contains_key("$ref") || has_composition_keyword(map)) + { return; } diff --git a/codex-rs/tools/src/json_schema_tests.rs b/codex-rs/tools/src/json_schema_tests.rs index 6973d06c005..1f245c6a6bb 100644 --- a/codex-rs/tools/src/json_schema_tests.rs +++ b/codex-rs/tools/src/json_schema_tests.rs @@ -728,6 +728,109 @@ fn parse_tool_input_schema_preserves_nested_any_of_property() { ); } +#[test] +fn parse_tool_input_schema_preserves_nested_one_of_property() { + // Example schema shape: + // { + // "type": "object", + // "properties": { + // "query": { + // "oneOf": [ + // { "const": "exact" }, + // { "type": "number" } + // ] + // } + // } + // } + // + // Expected normalization behavior: + // - The nested `oneOf` is preserved. + // - Child variants are recursively sanitized, including `const` to `enum`. + let schema = parse_tool_input_schema(&serde_json::json!({ + "type": "object", + "properties": { + "query": { + "oneOf": [ + { "const": "exact" }, + { "type": "number" } + ] + } + } + })) + .expect("parse schema"); + + assert_eq!( + schema, + JsonSchema::object( + BTreeMap::from([( + "query".to_string(), + JsonSchema::one_of( + vec![ + JsonSchema::string_enum( + vec![serde_json::json!("exact")], + /*description*/ None, + ), + JsonSchema::number(/*description*/ None), + ], + /*description*/ None, + ), + )]), + /*required*/ None, + /*additional_properties*/ None + ) + ); +} + +#[test] +fn parse_tool_input_schema_preserves_nested_all_of_property() { + // Example schema shape: + // { + // "type": "object", + // "properties": { + // "query": { + // "allOf": [ + // { "type": "string" }, + // { "description": "unrecognized by itself" } + // ] + // } + // } + // } + // + // Expected normalization behavior: + // - The nested `allOf` is preserved structurally rather than flattened. + // - Child variants are recursively sanitized. + let schema = parse_tool_input_schema(&serde_json::json!({ + "type": "object", + "properties": { + "query": { + "allOf": [ + { "type": "string" }, + { "description": "unrecognized by itself" } + ] + } + } + })) + .expect("parse schema"); + + assert_eq!( + schema, + JsonSchema::object( + BTreeMap::from([( + "query".to_string(), + JsonSchema::all_of( + vec![ + JsonSchema::string(/*description*/ None), + JsonSchema::default(), + ], + /*description*/ None, + ), + )]), + /*required*/ None, + /*additional_properties*/ None + ) + ); +} + #[test] fn parse_tool_input_schema_preserves_type_unions_without_rewriting_to_any_of() { // Example schema shape: @@ -1070,6 +1173,77 @@ fn parse_large_tool_input_schema_strips_descriptions_without_removing_descriptio ); } +#[test] +fn parse_large_tool_input_schema_prunes_compositions_as_last_resort() { + for composition_key in super::COMPOSITION_SCHEMA_KEYS { + let variants = vec![ + serde_json::json!({ + "type": "string", + "enum": ["first ".repeat(400)] + }), + serde_json::json!({ + "type": "string", + "enum": ["second ".repeat(400)] + }), + serde_json::json!({ + "type": "string", + "enum": ["third ".repeat(400)] + }), + ]; + + let mut choice = serde_json::Map::new(); + choice.insert( + composition_key.to_string(), + serde_json::Value::Array(variants), + ); + let schema = parse_tool_input_schema(&serde_json::json!({ + "type": "object", + "properties": { + "choice": choice + } + })) + .expect("parse schema"); + + assert_eq!( + serde_json::to_value(schema).expect("serialize schema"), + serde_json::json!({ + "type": "object", + "properties": { + "choice": {} + } + }) + ); + } +} + +#[test] +fn parse_large_tool_input_schema_prunes_single_composition_variant_if_still_over_budget() { + let schema = parse_tool_input_schema(&serde_json::json!({ + "type": "object", + "properties": { + "choice": { + "anyOf": [ + { + "type": "string", + "enum": ["x".repeat(4_500)] + } + ] + } + } + })) + .expect("parse schema"); + + assert_eq!( + serde_json::to_value(schema).expect("serialize schema"), + serde_json::json!({ + "type": "object", + "properties": { + "choice": {} + } + }) + ); +} + #[test] fn parse_large_tool_input_schema_preserves_object_enum_literal_descriptions() { let schema = parse_tool_input_schema(&serde_json::json!({ @@ -1397,10 +1571,24 @@ fn parse_tool_input_schema_collects_refs_from_schema_child_keywords() { {"$ref": "#/$defs/Choice"}, {"type": "string"} ] + }, + "exclusive_choice": { + "oneOf": [ + {"$ref": "#/$defs/ExclusiveChoice"}, + {"type": "integer"} + ] + }, + "combined": { + "allOf": [ + {"$ref": "#/$defs/Combined"}, + {"type": "object"} + ] } }, "$defs": { + "Combined": {"type": "object"}, "Choice": {"type": "boolean"}, + "ExclusiveChoice": {"type": "null"}, "Extra": {"type": "number"}, "Item": {"type": "string"}, "Unused": {"type": "null"} @@ -1419,6 +1607,18 @@ fn parse_tool_input_schema_collects_refs_from_schema_child_keywords() { {"type": "string"} ] }, + "combined": { + "allOf": [ + {"$ref": "#/$defs/Combined"}, + {"type": "object", "properties": {}} + ] + }, + "exclusive_choice": { + "oneOf": [ + {"$ref": "#/$defs/ExclusiveChoice"}, + {"type": "integer"} + ] + }, "items_holder": { "type": "array", "items": {"$ref": "#/$defs/Item"} @@ -1430,7 +1630,9 @@ fn parse_tool_input_schema_collects_refs_from_schema_child_keywords() { } }, "$defs": { + "Combined": {"type": "object", "properties": {}}, "Choice": {"type": "boolean"}, + "ExclusiveChoice": {"type": "null"}, "Extra": {"type": "number"}, "Item": {"type": "string"} } diff --git a/codex-rs/tools/tests/fixtures/json_schema_policy/google_drive.json b/codex-rs/tools/tests/fixtures/json_schema_policy/google_drive.json index bf37296cc02..518038aee50 100644 --- a/codex-rs/tools/tests/fixtures/json_schema_policy/google_drive.json +++ b/codex-rs/tools/tests/fixtures/json_schema_policy/google_drive.json @@ -53,13 +53,18 @@ { "pointer": "/properties/title/description", "value": "Copied file title." + }, + { + "pointer": "/properties/file/oneOf/0/type", + "value": "string" + }, + { + "pointer": "/properties/metadata/allOf/0/properties/source/type", + "value": "string" } ], "expected_pruned": [], - "expected_dropped_fields": [ - "/properties/file/oneOf", - "/properties/metadata/allOf" - ] + "expected_dropped_fields": [] } ] }