diff --git a/crates/runner-shared/src/debug_info.rs b/crates/runner-shared/src/debug_info.rs index 2fcbf696..0b7f7ae7 100644 --- a/crates/runner-shared/src/debug_info.rs +++ b/crates/runner-shared/src/debug_info.rs @@ -21,6 +21,13 @@ impl std::fmt::Debug for DebugInfo { } } +/// Per-pid mounting info referencing a deduplicated debug info entry. +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct MappedProcessDebugInfo { + pub debug_info_key: String, + pub load_bias: u64, +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ModuleDebugInfo { /// The path to the object file on disk (e.g. `/usr/lib/libc.so.6`) diff --git a/crates/runner-shared/src/lib.rs b/crates/runner-shared/src/lib.rs index 381a8a04..61e804de 100644 --- a/crates/runner-shared/src/lib.rs +++ b/crates/runner-shared/src/lib.rs @@ -2,6 +2,7 @@ pub mod artifacts; pub mod debug_info; pub mod fifo; pub mod metadata; +pub mod module_symbols; pub mod perf_event; pub mod unwind_data; pub mod walltime_results; diff --git a/crates/runner-shared/src/metadata.rs b/crates/runner-shared/src/metadata.rs index 13beae77..b5c7d46c 100644 --- a/crates/runner-shared/src/metadata.rs +++ b/crates/runner-shared/src/metadata.rs @@ -1,12 +1,17 @@ use anyhow::Context; +use libc::pid_t; use serde::{Deserialize, Serialize}; +use std::collections::HashMap; use std::io::BufWriter; use std::path::Path; +use std::path::PathBuf; -use crate::debug_info::ModuleDebugInfo; +use crate::debug_info::{MappedProcessDebugInfo, ModuleDebugInfo}; use crate::fifo::MarkerType; +use crate::module_symbols::MappedProcessModuleSymbols; +use crate::unwind_data::MappedProcessUnwindData; -#[derive(Serialize, Deserialize)] +#[derive(Serialize, Deserialize, Default)] pub struct PerfMetadata { /// The version of this metadata format. pub version: u64, @@ -14,20 +19,56 @@ pub struct PerfMetadata { /// Name and version of the integration pub integration: (String, String), + /// Per-pid modules that should be ignored, with runtime address ranges derived from symbol bounds + load bias + #[serde(default, skip_serializing_if = "HashMap::is_empty")] + pub ignored_modules_by_pid: HashMap>, + + /// Deduplicated debug info entries, keyed by semantic key + #[serde(default, skip_serializing_if = "HashMap::is_empty")] + pub debug_info: HashMap, + + /// Per-pid debug info references, mapping PID to mounted modules' debug info + /// Referenced by `path_keys` that point to the deduplicated `debug_info` entries. + #[serde(default, skip_serializing_if = "HashMap::is_empty")] + pub mapped_process_debug_info_by_pid: HashMap>, + + /// Per-pid unwind data references, mapping PID to mounted modules' unwind data + /// Referenced by `path_keys` that point to the deduplicated `unwind_data` files on disk. + #[serde(default, skip_serializing_if = "HashMap::is_empty")] + pub mapped_process_unwind_data_by_pid: HashMap>, + + /// Per-pid symbol references, mapping PID to its mounted modules' symbols + /// Referenced by `path_keys` that point to the deduplicated `symbols.map` files on disk. + #[serde(default, skip_serializing_if = "HashMap::is_empty")] + pub mapped_process_module_symbols: HashMap>, + + /// Mapping from semantic `path_key` to original binary path on host disk + /// Used by `mapped_process_debug_info_by_pid`, `mapped_process_unwind_data_by_pid` and + /// `mapped_process_module_symbols` the deduplicated entries + /// + /// Until now, only kept for traceability, if we ever need to reconstruct the original paths from the keys + #[serde(default, skip_serializing_if = "HashMap::is_empty")] + pub path_key_to_path: HashMap, + + // Deprecated fields below are kept for backward compatibility, since this struct is used in + // the parser and older versions of the runner still generate them + // /// The URIs of the benchmarks with the timestamps they were executed at. #[deprecated(note = "Use ExecutionTimestamps in the 'artifacts' module instead")] pub uri_by_ts: Vec<(u64, String)>, /// Modules that should be ignored and removed from the folded trace and callgraph (e.g. python interpreter) + #[deprecated(note = "Use 'ignored_modules_by_pid' instead")] pub ignored_modules: Vec<(String, u64, u64)>, /// Marker for certain regions in the profiling data #[deprecated(note = "Use ExecutionTimestamps in the 'artifacts' module instead")] pub markers: Vec, - /// Debug info for all modules across all processes, mapping PID to module debug info - #[serde(default, skip_serializing_if = "std::collections::HashMap::is_empty")] - pub debug_info_by_pid: std::collections::HashMap>, + /// Kept for backward compatibility, was used before deduplication of debug info entries. + #[serde(default, skip_serializing_if = "HashMap::is_empty")] + #[deprecated(note = "Use 'debug_info' + 'mapped_process_debug_info_by_pid' instead")] + pub debug_info_by_pid: HashMap>, } impl PerfMetadata { diff --git a/crates/runner-shared/src/module_symbols.rs b/crates/runner-shared/src/module_symbols.rs new file mode 100644 index 00000000..73742fb5 --- /dev/null +++ b/crates/runner-shared/src/module_symbols.rs @@ -0,0 +1,11 @@ +use serde::{Deserialize, Serialize}; + +/// File suffix used when registering module symbols in a PID agnostic way. +pub const SYMBOLS_MAP_SUFFIX: &str = "symbols.map"; + +/// Per-pid mounting info referencing a deduplicated perf map entry. +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct MappedProcessModuleSymbols { + pub perf_map_key: String, + pub load_bias: u64, +} diff --git a/crates/runner-shared/src/unwind_data.rs b/crates/runner-shared/src/unwind_data.rs index ed4cb029..a7d8ac0e 100644 --- a/crates/runner-shared/src/unwind_data.rs +++ b/crates/runner-shared/src/unwind_data.rs @@ -10,16 +10,91 @@ pub const UNWIND_FILE_EXT: &str = "unwind_data"; pub type UnwindData = UnwindDataV2; -impl UnwindData { +impl UnwindDataV3 { pub fn parse(reader: &[u8]) -> anyhow::Result { let compat: UnwindDataCompat = bincode::deserialize(reader)?; + match compat { + UnwindDataCompat::V1(_) => { + anyhow::bail!("Cannot parse V1 unwind data as V3 (breaking changes)") + } + UnwindDataCompat::V2(_) => { + anyhow::bail!("Cannot parse V2 unwind data as V3 (breaking changes)") + } + UnwindDataCompat::V3(v3) => Ok(v3), + } + } + + pub fn save_to>(&self, folder: P, key: &str) -> anyhow::Result<()> { + let path = folder.as_ref().join(format!("{key}.{UNWIND_FILE_EXT}")); + let compat = UnwindDataCompat::V3(self.clone()); + let file = std::fs::File::create(&path)?; + const BUFFER_SIZE: usize = 256 * 1024; + let writer = BufWriter::with_capacity(BUFFER_SIZE, file); + bincode::serialize_into(writer, &compat)?; + Ok(()) + } +} + +/// A versioned enum for `UnwindData` to allow for future extensions while maintaining backward compatibility. +#[derive(Serialize, Deserialize)] +enum UnwindDataCompat { + V1(UnwindDataV1), + V2(UnwindDataV2), + V3(UnwindDataV3), +} + +#[doc(hidden)] +#[derive(Serialize, Deserialize, Clone)] +struct UnwindDataV1 { + pub path: String, + + pub avma_range: Range, + pub base_avma: u64, + pub base_svma: u64, + + pub eh_frame_hdr: Vec, + pub eh_frame_hdr_svma: Range, + + pub eh_frame: Vec, + pub eh_frame_svma: Range, +} + +#[doc(hidden)] +#[derive(Serialize, Deserialize, Clone, PartialEq)] +pub struct UnwindDataV2 { + pub path: String, + + /// The monotonic timestamp when the unwind data was captured. + /// Is `None` if unwind data is valid for the whole program execution + pub timestamp: Option, + + pub avma_range: Range, + pub base_avma: u64, + pub base_svma: u64, + + pub eh_frame_hdr: Vec, + pub eh_frame_hdr_svma: Range, + + pub eh_frame: Vec, + pub eh_frame_svma: Range, +} + +impl UnwindDataV2 { + /// Parse unwind data bytes, converting V1 to V2 but erroring on V3 + /// (since V3 doesn't have the per-pid fields needed for V2). + pub fn parse(reader: &[u8]) -> anyhow::Result { + let compat: UnwindDataCompat = bincode::deserialize(reader)?; match compat { UnwindDataCompat::V1(v1) => Ok(v1.into()), UnwindDataCompat::V2(v2) => Ok(v2), + UnwindDataCompat::V3(_) => { + anyhow::bail!("Cannot parse V3 unwind data as V2 (missing per-pid fields)") + } } } + /// Will be removed once the backend has been deployed and we can merge the changes in the runner pub fn save_to>(&self, folder: P, pid: i32) -> anyhow::Result<()> { let unwind_data_path = folder.as_ref().join(format!( "{}_{:x}_{:x}_{}.{UNWIND_FILE_EXT}", @@ -59,6 +134,47 @@ impl UnwindData { } } +impl From for UnwindDataV2 { + fn from(v1: UnwindDataV1) -> Self { + Self { + path: v1.path, + timestamp: None, + avma_range: v1.avma_range, + base_avma: v1.base_avma, + base_svma: v1.base_svma, + eh_frame_hdr: v1.eh_frame_hdr, + eh_frame_hdr_svma: v1.eh_frame_hdr_svma, + eh_frame: v1.eh_frame, + eh_frame_svma: v1.eh_frame_svma, + } + } +} + +/// Pid-agnostic unwind data. +/// Contains only the data that is common across all PIDs loading the same shared library. +#[derive(Serialize, Deserialize, Clone, PartialEq, Eq, Hash)] +pub struct UnwindDataV3 { + pub path: String, + pub base_svma: u64, + pub eh_frame_hdr: Vec, + pub eh_frame_hdr_svma: Range, + pub eh_frame: Vec, + pub eh_frame_svma: Range, +} + +impl From for UnwindDataV3 { + fn from(v2: UnwindDataV2) -> Self { + Self { + path: v2.path, + base_svma: v2.base_svma, + eh_frame_hdr: v2.eh_frame_hdr, + eh_frame_hdr_svma: v2.eh_frame_hdr_svma, + eh_frame: v2.eh_frame, + eh_frame_svma: v2.eh_frame_svma, + } + } +} + impl Debug for UnwindData { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let eh_frame_hdr_hash = { @@ -89,61 +205,145 @@ impl Debug for UnwindData { } } -/// A versioned enum for `UnwindData` to allow for future extensions while maintaining backward compatibility. -#[derive(Serialize, Deserialize)] -enum UnwindDataCompat { - V1(UnwindDataV1), - V2(UnwindDataV2), +impl Debug for UnwindDataV3 { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let eh_frame_hdr_hash = { + let mut hasher = DefaultHasher::new(); + self.eh_frame_hdr.hash(&mut hasher); + hasher.finish() + }; + let eh_frame_hash = { + let mut hasher = DefaultHasher::new(); + self.eh_frame.hash(&mut hasher); + hasher.finish() + }; + + f.debug_struct("UnwindData") + .field("path", &self.path) + .field("base_svma", &format_args!("{:x}", self.base_svma)) + .field( + "eh_frame_hdr_svma", + &format_args!("{:x?}", self.eh_frame_hdr_svma), + ) + .field("eh_frame_hdr_hash", &format_args!("{eh_frame_hdr_hash:x}")) + .field("eh_frame_hash", &format_args!("{eh_frame_hash:x}")) + .field("eh_frame_svma", &format_args!("{:x?}", self.eh_frame_svma)) + .finish() + } } -#[doc(hidden)] -#[derive(Serialize, Deserialize, Clone)] -struct UnwindDataV1 { - pub path: String, +/// Per-pid mounting info referencing a deduplicated unwind data entry. +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct MappedProcessUnwindData { + pub unwind_data_key: String, + #[serde(flatten)] + pub inner: ProcessUnwindData, +} +#[derive(Serialize, Deserialize, Clone)] +pub struct ProcessUnwindData { + pub timestamp: Option, pub avma_range: Range, pub base_avma: u64, - pub base_svma: u64, - - pub eh_frame_hdr: Vec, - pub eh_frame_hdr_svma: Range, +} - pub eh_frame: Vec, - pub eh_frame_svma: Range, +impl Debug for ProcessUnwindData { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ProcessUnwindData") + .field("timestamp", &self.timestamp) + .field("avma_range", &format_args!("{:x?}", self.avma_range)) + .field("base_avma", &format_args!("{:x}", self.base_avma)) + .finish() + } } -#[doc(hidden)] -#[derive(Serialize, Deserialize, Clone)] -pub struct UnwindDataV2 { - pub path: String, +#[cfg(test)] +mod tests { + use super::*; - /// The monotonic timestamp when the unwind data was captured. - /// Is `None` if unwind data is valid for the whole program execution - pub timestamp: Option, + const V2_BINARY: &[u8] = include_bytes!("../testdata/unwind_data_v2.bin"); + const V3_BINARY: &[u8] = include_bytes!("../testdata/unwind_data_v3.bin"); - pub avma_range: Range, - pub base_avma: u64, - pub base_svma: u64, + fn create_sample_v2() -> UnwindDataV2 { + UnwindDataV2 { + path: "/lib/test.so".to_string(), + timestamp: Some(12345), + avma_range: 0x1000..0x2000, + base_avma: 0x1000, + base_svma: 0x0, + eh_frame_hdr: vec![1, 2, 3, 4], + eh_frame_hdr_svma: 0x100..0x200, + eh_frame: vec![5, 6, 7, 8], + eh_frame_svma: 0x200..0x300, + } + } - pub eh_frame_hdr: Vec, - pub eh_frame_hdr_svma: Range, + fn create_sample_v3() -> UnwindDataV3 { + UnwindDataV3 { + path: "/lib/test.so".to_string(), + base_svma: 0x0, + eh_frame_hdr: vec![1, 2, 3, 4], + eh_frame_hdr_svma: 0x100..0x200, + eh_frame: vec![5, 6, 7, 8], + eh_frame_svma: 0x200..0x300, + } + } - pub eh_frame: Vec, - pub eh_frame_svma: Range, -} + #[test] + fn test_parse_v2_as_v3_should_error() { + // Try to parse V2 binary artifact as V3 using UnwindData::parse + let result = UnwindDataV3::parse(V2_BINARY); -impl From for UnwindDataV2 { - fn from(v1: UnwindDataV1) -> Self { - Self { - path: v1.path, - timestamp: None, - avma_range: v1.avma_range, - base_avma: v1.base_avma, - base_svma: v1.base_svma, - eh_frame_hdr: v1.eh_frame_hdr, - eh_frame_hdr_svma: v1.eh_frame_hdr_svma, - eh_frame: v1.eh_frame, - eh_frame_svma: v1.eh_frame_svma, - } + // Should error due to breaking changes between V2 and V3 + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!( + err.to_string() + .contains("Cannot parse V2 unwind data as V3"), + "Expected error message about V2->V3 incompatibility, got: {err}" + ); + } + + #[test] + fn test_parse_v3_as_v2_should_error() { + // Try to parse V3 binary artifact as V2 using UnwindDataV2::parse + let result = UnwindDataV2::parse(V3_BINARY); + + // Should error with specific message about missing per-pid fields + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!( + err.to_string() + .contains("Cannot parse V3 unwind data as V2"), + "Expected error message about V3->V2 incompatibility, got: {err}" + ); + } + + #[test] + fn test_parse_v3_as_v3() { + // Parse V3 binary artifact as V3 using UnwindData::parse + let parsed_v3 = UnwindDataV3::parse(V3_BINARY).expect("Failed to parse V3 data as V3"); + + // Should match expected V3 data + let expected_v3 = create_sample_v3(); + assert_eq!(parsed_v3, expected_v3); + } + + #[test] + fn test_parse_v2_as_v2() { + // Parse V2 binary artifact as V2 using UnwindDataV2::parse + let parsed_v2 = UnwindDataV2::parse(V2_BINARY).expect("Failed to parse V2 data as V2"); + + // Should match expected V2 data + let expected_v2 = create_sample_v2(); + assert_eq!(parsed_v2.path, expected_v2.path); + assert_eq!(parsed_v2.timestamp, expected_v2.timestamp); + assert_eq!(parsed_v2.avma_range, expected_v2.avma_range); + assert_eq!(parsed_v2.base_avma, expected_v2.base_avma); + assert_eq!(parsed_v2.base_svma, expected_v2.base_svma); + assert_eq!(parsed_v2.eh_frame_hdr, expected_v2.eh_frame_hdr); + assert_eq!(parsed_v2.eh_frame_hdr_svma, expected_v2.eh_frame_hdr_svma); + assert_eq!(parsed_v2.eh_frame, expected_v2.eh_frame); + assert_eq!(parsed_v2.eh_frame_svma, expected_v2.eh_frame_svma); } } diff --git a/crates/runner-shared/testdata/unwind_data_v2.bin b/crates/runner-shared/testdata/unwind_data_v2.bin new file mode 100644 index 00000000..f26e8277 Binary files /dev/null and b/crates/runner-shared/testdata/unwind_data_v2.bin differ diff --git a/crates/runner-shared/testdata/unwind_data_v3.bin b/crates/runner-shared/testdata/unwind_data_v3.bin new file mode 100644 index 00000000..74b42b68 Binary files /dev/null and b/crates/runner-shared/testdata/unwind_data_v3.bin differ diff --git a/src/executor/wall_time/perf/mod.rs b/src/executor/wall_time/perf/mod.rs index 6792ba99..8783d20e 100644 --- a/src/executor/wall_time/perf/mod.rs +++ b/src/executor/wall_time/perf/mod.rs @@ -385,6 +385,8 @@ impl BenchmarkData { }, markers: self.marker_result.markers.clone(), debug_info_by_pid, + // Fill new fields to default for now + ..Default::default() }; metadata.save_to(&path).unwrap();