Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
166 changes: 166 additions & 0 deletions codex-rs/tools/src/json_schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ pub fn parse_tool_input_schema(input_schema: &JsonValue) -> Result<JsonSchema, s
let mut input_schema = input_schema.clone();
sanitize_json_schema(&mut input_schema);
prune_unreachable_definitions(&mut input_schema);
compact_large_tool_schema(&mut input_schema);
Comment thread
celia-oai marked this conversation as resolved.
Comment thread
celia-oai marked this conversation as resolved.
let schema: JsonSchema = serde_json::from_value(input_schema)?;
if matches!(
schema.schema_type,
Expand All @@ -170,6 +171,47 @@ pub fn parse_tool_input_schema(input_schema: &JsonValue) -> Result<JsonSchema, s
Ok(schema)
}

// Use compact normalized JSON bytes as a cheap local proxy for the 1k-token
// schema budget.
const MAX_COMPACT_TOOL_SCHEMA_BYTES: usize = 4_000;
const MAX_COMPACT_TOOL_SCHEMA_DEPTH: usize = 2;
Comment thread
celia-oai marked this conversation as resolved.

/// Shrink unusually large tool schemas while preserving the top-level argument
/// surface. Compaction is best-effort rather than a hard cap: it runs only
/// after schema sanitization/pruning and applies increasingly lossy passes
/// while the schema remains over budget.
fn compact_large_tool_schema(value: &mut JsonValue) {
for pass in LARGE_SCHEMA_COMPACTION_PASSES {
if compact_schema_fits_budget(value) {
break;
}
pass(value);
Comment thread
celia-oai marked this conversation as resolved.
}
Comment thread
celia-oai marked this conversation as resolved.
Comment thread
celia-oai marked this conversation as resolved.
}

type LargeSchemaCompactionPass = fn(&mut JsonValue);

const LARGE_SCHEMA_COMPACTION_PASSES: &[LargeSchemaCompactionPass] = &[
strip_schema_descriptions,
drop_schema_definitions,
collapse_deep_schema_objects_from_root,
Comment thread
celia-oai marked this conversation as resolved.
];

fn collapse_deep_schema_objects_from_root(value: &mut JsonValue) {
collapse_deep_schema_objects(value, /*depth*/ 0);
}

fn compact_schema_fits_budget(value: &JsonValue) -> bool {
compact_normalized_schema_len(value) <= MAX_COMPACT_TOOL_SCHEMA_BYTES
}

fn compact_normalized_schema_len(value: &JsonValue) -> usize {
serde_json::from_value::<JsonSchema>(value.clone())
.and_then(|schema| serde_json::to_vec(&schema))
.map(|json| json.len())
.unwrap_or(0)
}

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum DefinitionTraversal {
Include,
Expand Down Expand Up @@ -214,6 +256,130 @@ fn for_each_schema_child(
}
}

fn strip_schema_descriptions(value: &mut JsonValue) {
match value {
JsonValue::Array(values) => {
for value in values {
strip_schema_descriptions(value);
}
}
JsonValue::Object(map) => {
map.remove("description");
for_each_schema_child_mut(map, DefinitionTraversal::Include, &mut |value| {
strip_schema_descriptions(value);
});
}
_ => {}
}
}

fn for_each_schema_child_mut(
map: &mut serde_json::Map<String, JsonValue>,
definition_traversal: DefinitionTraversal,
visitor: &mut impl FnMut(&mut JsonValue),
) {
if let Some(properties) = map.get_mut("properties")
&& let Some(properties_map) = properties.as_object_mut()
{
for value in properties_map.values_mut() {
visitor(value);
}
}

for key in SCHEMA_CHILD_KEYS {
if let Some(value) = map.get_mut(key) {
visitor(value);
}
}

if let Some(additional_properties) = map.get_mut("additionalProperties")
&& !matches!(additional_properties, JsonValue::Bool(_))
{
visitor(additional_properties);
}

if definition_traversal == DefinitionTraversal::Include {
for key in DEFINITION_TABLE_KEYS {
if let Some(definitions) = map.get_mut(key)
&& let Some(definitions_map) = definitions.as_object_mut()
{
for value in definitions_map.values_mut() {
visitor(value);
}
}
}
}
}

/// Replace local definition refs with empty schemas before dropping root
/// definition tables, so downstream behavior does not depend on how a schema
/// parser handles refs to missing definitions.
fn drop_schema_definitions(value: &mut JsonValue) {
rewrite_definition_refs_to_empty_schemas(value);

let JsonValue::Object(map) = value else {
return;
};

for key in DEFINITION_TABLE_KEYS {
map.remove(key);
}
}

fn rewrite_definition_refs_to_empty_schemas(value: &mut JsonValue) {
match value {
JsonValue::Array(values) => {
for value in values {
rewrite_definition_refs_to_empty_schemas(value);
}
}
JsonValue::Object(map) => {
if map
.get("$ref")
.and_then(JsonValue::as_str)
.and_then(parse_local_definition_ref)
.is_some()
{
*value = json!({});
Comment thread
celia-oai marked this conversation as resolved.
return;
}

for_each_schema_child_mut(map, DefinitionTraversal::Skip, &mut |value| {
rewrite_definition_refs_to_empty_schemas(value);
});
}
_ => {}
}
}

fn collapse_deep_schema_objects(value: &mut JsonValue, depth: usize) {
match value {
JsonValue::Array(values) => {
for value in values {
collapse_deep_schema_objects(value, depth);
}
}
JsonValue::Object(map) => {
if depth >= MAX_COMPACT_TOOL_SCHEMA_DEPTH && is_complex_schema_object(map) {
*value = json!({});
return;
}

for_each_schema_child_mut(map, DefinitionTraversal::Skip, &mut |value| {
collapse_deep_schema_objects(value, depth + 1);
});
}
_ => {}
}
}

fn is_complex_schema_object(map: &serde_json::Map<String, JsonValue>) -> bool {
SCHEMA_CHILD_KEYS.iter().any(|key| map.contains_key(*key))
|| map.contains_key("properties")
|| map.contains_key("additionalProperties")
|| map.contains_key("$ref")
}

/// Sanitize a JSON Schema (as serde_json::Value) so it can fit our limited
/// schema representation. This function:
/// - Ensures every typed schema object has a `"type"` when required.
Expand Down
Loading
Loading