From 9bdd9edc1f813f95c0543c6c88f12823456b358d Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Thu, 28 May 2026 11:08:28 +0200 Subject: [PATCH 1/4] feat(native-span): add dedup convenience to VecMap --- libdd-trace-utils/src/span/vec_map.rs | 209 ++++++++++++++++++++++---- 1 file changed, 183 insertions(+), 26 deletions(-) diff --git a/libdd-trace-utils/src/span/vec_map.rs b/libdd-trace-utils/src/span/vec_map.rs index 9d2b5391ae..7adc363e14 100644 --- a/libdd-trace-utils/src/span/vec_map.rs +++ b/libdd-trace-utils/src/span/vec_map.rs @@ -10,8 +10,10 @@ use serde::ser::{Serialize, Serializer}; use std::borrow::Borrow; -use std::collections::HashSet; +use std::collections::{HashMap, HashSet}; use std::hash::Hash; +use std::slice; +use std::sync::atomic::{AtomicBool, Ordering}; /// A Vec-backed map that provides HashMap-like lookup by key. /// @@ -39,7 +41,7 @@ use std::hash::Hash; /// /// As this is a map, iteration order is not defined nor guaranteed. In practice, iteration follows /// insertion order, but [Self::dedup] will reverse the underlying vector. -#[derive(Clone, Debug, PartialEq)] +#[derive(Clone, Debug)] pub struct VecMap { data: Vec<(K, V)>, /// Deduped is a flag that is set after entry deduplication. It is dirtied (set to `false`) @@ -59,6 +61,21 @@ impl Default for VecMap { } } +// This implementation allocates, which isn't expected for equality testing (not allocating would be +// rather tedious though). It's fine for tests (where `PartialEq` is currently needed), so we +// cfg-gate it to avoid unintended usage in prod. +#[cfg(any(test, feature = "test-utils"))] +impl PartialEq for VecMap { + fn eq(&self, other: &Self) -> bool { + let lhs: HashMap<&K, &V> = self.data.iter().map(|(k, v)| (k, v)).collect(); + let rhs: HashMap<&K, &V> = other.data.iter().map(|(k, v)| (k, v)).collect(); + lhs == rhs + } +} + +#[cfg(any(test, feature = "test-utils"))] +impl Eq for VecMap {} + impl VecMap { #[must_use] #[inline] @@ -136,13 +153,13 @@ impl VecMap { /// Iterate over the element, including duplicate entries. #[inline] - pub fn iter(&self) -> std::slice::Iter<'_, (K, V)> { + pub fn iter(&self) -> slice::Iter<'_, (K, V)> { self.data.iter() } /// Iterate mutably over the elements, including duplicate entries. #[inline] - pub fn iter_mut(&mut self) -> std::slice::IterMut<'_, (K, V)> { + pub fn iter_mut(&mut self) -> slice::IterMut<'_, (K, V)> { self.dirty(); self.data.iter_mut() } @@ -168,6 +185,51 @@ impl VecMap { } } +impl VecMap { + /// Returns a deduped map, that either borrows from `self` without performing any work if the + /// map is already deduped, or dedup the entries in a new separate vec otherwise. As opposed to + /// [Self::dedup], `as_deduped_map` takes an immutable reference to `self` but might allocate. + /// Prefer [Self::dedup] when applicable. + pub fn as_deduped_map(&self) -> DedupedVecMap<'_, K, V> { + if self.deduped { + DedupedVecMap::Borrowed(self) + } else { + static WARNED: AtomicBool = AtomicBool::new(false); + if !WARNED.swap(true, Ordering::Relaxed) { + tracing::warn!( + "VecMap not deduped before encoding. Performing defensive on-the-fly dedup" + ); + } + + DedupedVecMap::Owned( + self.data + .iter() + .map(|(k, v)| (k, v)) + .collect::>() + .into_iter() + .collect(), + ) + } + } + + /// This is a convenience wrapper around [Self::as_deduped_map] used in the msgpack encoder, + /// where we expect the map to be deduped, but call `as_deduped_map` as a defensive measure. If + /// the latter had to deduplicate and allocate a new vec, we log a warning (at most once). + #[allow(unused)] + pub(crate) fn defensive_dedup(&self) -> DedupedVecMap<'_, K, V> { + if !self.is_deduped() { + static WARNED: AtomicBool = AtomicBool::new(false); + if !WARNED.swap(true, Ordering::Relaxed) { + tracing::warn!( + "VecMap not deduped before encoding. Performing defensive on-the-fly dedup" + ); + } + } + + self.as_deduped_map() + } +} + impl VecMap { /// Remove entries with a duplicate key, only keeping the last one. After this, a flag is set /// internally, such that as long as the map isn't extended or mutably iterated, the next @@ -222,7 +284,7 @@ impl IntoIterator for VecMap { impl<'a, K, V> IntoIterator for &'a VecMap { type Item = &'a (K, V); - type IntoIter = std::slice::Iter<'a, (K, V)>; + type IntoIter = slice::Iter<'a, (K, V)>; fn into_iter(self) -> Self::IntoIter { self.data.iter() @@ -231,7 +293,7 @@ impl<'a, K, V> IntoIterator for &'a VecMap { impl<'a, K, V> IntoIterator for &'a mut VecMap { type Item = &'a mut (K, V); - type IntoIter = std::slice::IterMut<'a, (K, V)>; + type IntoIter = slice::IterMut<'a, (K, V)>; fn into_iter(self) -> Self::IntoIter { // Since we iterate on keys as well, they can modified, and introduce duplicates. @@ -250,33 +312,74 @@ impl Extend<(K, V)> for VecMap { impl Serialize for VecMap { fn serialize(&self, serializer: S) -> Result { use serde::ser::SerializeMap; - use std::collections::HashMap; - // We pre-compute the deduped map. If deduplication were done on the fly during - // serialization, we couldn't provide a length up front to the serializer, and the current - // one (rmp) will allocate an intermediate buffer defensively. - if self.deduped { - let mut map_ser = serializer.serialize_map(Some(self.len()))?; + let deduped = self.as_deduped_map(); + let mut map_ser = serializer.serialize_map(Some(deduped.len()))?; - for (k, v) in self { - map_ser.serialize_entry(k, v)?; - } + for (k, v) in deduped.iter() { + map_ser.serialize_entry(k, v)?; + } - map_ser.end() - } else { - // Note: using `dedup` would need an additional `clone()` of the whole map here. We can - // use references instead. - self.data - .iter() - .map(|(k, v)| (k, v)) - // Since the iterator is sized, `collect()` should pre-allocate with the right - // capacity in one shot. - .collect::>() - .serialize(serializer) + map_ser.end() + } +} + +pub enum DedupedVecMap<'a, K, V> { + Borrowed(&'a VecMap), + Owned(Vec<(&'a K, &'a V)>), +} + +impl<'a, K, V> DedupedVecMap<'a, K, V> { + #[inline] + pub fn iter(&self) -> DedupedVecMapIter<'_, 'a, K, V> { + match self { + DedupedVecMap::Borrowed(vec_map) => DedupedVecMapIter::Borrowed(vec_map.iter()), + DedupedVecMap::Owned(vec) => DedupedVecMapIter::Owned(vec.iter()), } } + + #[inline] + pub fn len(&self) -> usize { + match self { + DedupedVecMap::Borrowed(vec_map) => vec_map.len(), + DedupedVecMap::Owned(vec) => vec.len(), + } + } + + #[inline] + pub fn is_empty(&self) -> bool { + match self { + DedupedVecMap::Borrowed(vec_map) => vec_map.is_empty(), + DedupedVecMap::Owned(vec) => vec.is_empty(), + } + } +} + +pub enum DedupedVecMapIter<'b, 'a: 'b, K, V> { + Borrowed(slice::Iter<'a, (K, V)>), + Owned(slice::Iter<'b, (&'a K, &'a V)>), } +impl<'b, 'a: 'b, K, V> Iterator for DedupedVecMapIter<'b, 'a, K, V> { + type Item = (&'a K, &'a V); + + fn next(&mut self) -> Option { + match self { + DedupedVecMapIter::Borrowed(iter) => iter.next().map(|(k, v)| (k, v)), + DedupedVecMapIter::Owned(iter) => iter.next().copied(), + } + } + + fn size_hint(&self) -> (usize, Option) { + match self { + DedupedVecMapIter::Borrowed(iter) => iter.size_hint(), + DedupedVecMapIter::Owned(iter) => iter.size_hint(), + } + } +} + +impl<'b, 'a: 'b, K, V> ExactSizeIterator for DedupedVecMapIter<'b, 'a, K, V> {} + #[cfg(test)] mod tests { use super::*; @@ -448,6 +551,60 @@ mod tests { assert!(!m.is_deduped()); } + #[test] + fn deduped_vec_map_borrowed_iter_and_len() { + let mut m = VecMap::new(); + m.insert("a", 1); + m.insert("b", 2); + m.dedup(); + + let d = m.defensive_dedup(); + assert!(matches!(d, DedupedVecMap::Borrowed(_))); + assert_eq!(d.len(), 2); + + let items: Vec<_> = d.iter().collect(); + assert_eq!(items, vec![(&"a", &1), (&"b", &2)]); + } + + #[test] + fn deduped_vec_map_copy_iter_and_len() { + let mut m = VecMap::new(); + m.insert("a", 1); + m.insert("a", 2); + m.insert("b", 3); + + let d = m.defensive_dedup(); + assert!(matches!(d, DedupedVecMap::Owned(_))); + assert_eq!(d.len(), 2); + + let items: HashMap<&&str, &i32> = d.iter().collect(); + assert_eq!(items[&"a"], &2); + assert_eq!(items[&"b"], &3); + } + + #[test] + fn deduped_vec_map_iter_exact_size() { + let mut m = VecMap::new(); + m.insert("a", 1); + m.insert("b", 2); + m.insert("c", 3); + m.dedup(); + + let d = m.defensive_dedup(); + let mut iter = d.iter(); + assert_eq!(iter.len(), 3); + iter.next(); + assert_eq!(iter.len(), 2); + } + + #[test] + fn deduped_vec_map_empty() { + let m: VecMap = VecMap::new(); + let d = DedupedVecMap::Borrowed(&m); + assert_eq!(d.len(), 0); + assert_eq!(d.iter().count(), 0); + } + #[test] fn serialize_deduplicates_keeping_last() { let mut m = VecMap::new(); From 5e118fe86172d17a25c622c9b7beb8af9368b8ae Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Thu, 28 May 2026 15:09:01 +0200 Subject: [PATCH 2/4] fix: double log when deduping vec_map --- libdd-trace-utils/src/span/vec_map.rs | 7 ------- 1 file changed, 7 deletions(-) diff --git a/libdd-trace-utils/src/span/vec_map.rs b/libdd-trace-utils/src/span/vec_map.rs index 7adc363e14..0e40583996 100644 --- a/libdd-trace-utils/src/span/vec_map.rs +++ b/libdd-trace-utils/src/span/vec_map.rs @@ -194,13 +194,6 @@ impl VecMap { if self.deduped { DedupedVecMap::Borrowed(self) } else { - static WARNED: AtomicBool = AtomicBool::new(false); - if !WARNED.swap(true, Ordering::Relaxed) { - tracing::warn!( - "VecMap not deduped before encoding. Performing defensive on-the-fly dedup" - ); - } - DedupedVecMap::Owned( self.data .iter() From 15e2e0e77807ac59bde66d8f3e8b9845746eccca Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Thu, 28 May 2026 15:12:21 +0200 Subject: [PATCH 3/4] test: fix failing test (vecmap order not guaranteed) --- libdd-trace-utils/src/span/vec_map.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libdd-trace-utils/src/span/vec_map.rs b/libdd-trace-utils/src/span/vec_map.rs index 0e40583996..58a5af11dc 100644 --- a/libdd-trace-utils/src/span/vec_map.rs +++ b/libdd-trace-utils/src/span/vec_map.rs @@ -555,7 +555,8 @@ mod tests { assert!(matches!(d, DedupedVecMap::Borrowed(_))); assert_eq!(d.len(), 2); - let items: Vec<_> = d.iter().collect(); + let mut items: Vec<_> = d.iter().collect(); + items.sort_by_key(|(k, _)| **k); assert_eq!(items, vec![(&"a", &1), (&"b", &2)]); } From 0996e9a5271b4a3dfa0c76a91c03ebcbbfa58ce4 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Mon, 1 Jun 2026 19:13:27 +0200 Subject: [PATCH 4/4] refactor(trace-utils): store HashMap directly in DedupedVecMap::Owned Avoid the redundant collect from HashMap back into Vec when deduping. The Owned variant now holds a HashMap<&K, &V> directly, removing one allocation and copy. Co-Authored-By: Claude Opus 4.6 --- libdd-trace-utils/src/span/vec_map.rs | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/libdd-trace-utils/src/span/vec_map.rs b/libdd-trace-utils/src/span/vec_map.rs index 58a5af11dc..8825cfbbdf 100644 --- a/libdd-trace-utils/src/span/vec_map.rs +++ b/libdd-trace-utils/src/span/vec_map.rs @@ -194,14 +194,7 @@ impl VecMap { if self.deduped { DedupedVecMap::Borrowed(self) } else { - DedupedVecMap::Owned( - self.data - .iter() - .map(|(k, v)| (k, v)) - .collect::>() - .into_iter() - .collect(), - ) + DedupedVecMap::Owned(self.data.iter().map(|(k, v)| (k, v)).collect()) } } @@ -319,7 +312,7 @@ impl Serialize for VecMap { pub enum DedupedVecMap<'a, K, V> { Borrowed(&'a VecMap), - Owned(Vec<(&'a K, &'a V)>), + Owned(HashMap<&'a K, &'a V>), } impl<'a, K, V> DedupedVecMap<'a, K, V> { @@ -327,7 +320,7 @@ impl<'a, K, V> DedupedVecMap<'a, K, V> { pub fn iter(&self) -> DedupedVecMapIter<'_, 'a, K, V> { match self { DedupedVecMap::Borrowed(vec_map) => DedupedVecMapIter::Borrowed(vec_map.iter()), - DedupedVecMap::Owned(vec) => DedupedVecMapIter::Owned(vec.iter()), + DedupedVecMap::Owned(map) => DedupedVecMapIter::Owned(map.iter()), } } @@ -335,7 +328,7 @@ impl<'a, K, V> DedupedVecMap<'a, K, V> { pub fn len(&self) -> usize { match self { DedupedVecMap::Borrowed(vec_map) => vec_map.len(), - DedupedVecMap::Owned(vec) => vec.len(), + DedupedVecMap::Owned(map) => map.len(), } } @@ -343,14 +336,14 @@ impl<'a, K, V> DedupedVecMap<'a, K, V> { pub fn is_empty(&self) -> bool { match self { DedupedVecMap::Borrowed(vec_map) => vec_map.is_empty(), - DedupedVecMap::Owned(vec) => vec.is_empty(), + DedupedVecMap::Owned(map) => map.is_empty(), } } } pub enum DedupedVecMapIter<'b, 'a: 'b, K, V> { Borrowed(slice::Iter<'a, (K, V)>), - Owned(slice::Iter<'b, (&'a K, &'a V)>), + Owned(std::collections::hash_map::Iter<'b, &'a K, &'a V>), } impl<'b, 'a: 'b, K, V> Iterator for DedupedVecMapIter<'b, 'a, K, V> { @@ -359,7 +352,7 @@ impl<'b, 'a: 'b, K, V> Iterator for DedupedVecMapIter<'b, 'a, K, V> { fn next(&mut self) -> Option { match self { DedupedVecMapIter::Borrowed(iter) => iter.next().map(|(k, v)| (k, v)), - DedupedVecMapIter::Owned(iter) => iter.next().copied(), + DedupedVecMapIter::Owned(iter) => iter.next().map(|(&k, &v)| (k, v)), } }