Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
196 changes: 170 additions & 26 deletions libdd-trace-utils/src/span/vec_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@

use serde::ser::{Serialize, Serializer};
use std::borrow::Borrow;
use std::collections::HashSet;
use std::collections::{HashMap, HashSet};
use std::hash::Hash;
use std::slice;
use std::sync::atomic::{AtomicBool, Ordering};

/// A Vec-backed map that provides HashMap-like lookup by key.
///
Expand Down Expand Up @@ -39,7 +41,7 @@ use std::hash::Hash;
///
/// As this is a map, iteration order is not defined nor guaranteed. In practice, iteration follows
/// insertion order, but [Self::dedup] will reverse the underlying vector.
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug)]
pub struct VecMap<K, V> {
data: Vec<(K, V)>,
/// Deduped is a flag that is set after entry deduplication. It is dirtied (set to `false`)
Expand All @@ -59,6 +61,21 @@ impl<K, V> Default for VecMap<K, V> {
}
}

// This implementation allocates, which isn't expected for equality testing (not allocating would be
// rather tedious though). It's fine for tests (where `PartialEq` is currently needed), so we
// cfg-gate it to avoid unintended usage in prod.
#[cfg(any(test, feature = "test-utils"))]
impl<K: Eq + Hash, V: PartialEq> PartialEq for VecMap<K, V> {
fn eq(&self, other: &Self) -> bool {
let lhs: HashMap<&K, &V> = self.data.iter().map(|(k, v)| (k, v)).collect();
let rhs: HashMap<&K, &V> = other.data.iter().map(|(k, v)| (k, v)).collect();
lhs == rhs
}
}

#[cfg(any(test, feature = "test-utils"))]
impl<K: Eq + Hash, V: Eq> Eq for VecMap<K, V> {}

impl<K, V> VecMap<K, V> {
#[must_use]
#[inline]
Expand Down Expand Up @@ -136,13 +153,13 @@ impl<K, V> VecMap<K, V> {

/// Iterate over the element, including duplicate entries.
#[inline]
pub fn iter(&self) -> std::slice::Iter<'_, (K, V)> {
pub fn iter(&self) -> slice::Iter<'_, (K, V)> {
self.data.iter()
}

/// Iterate mutably over the elements, including duplicate entries.
#[inline]
pub fn iter_mut(&mut self) -> std::slice::IterMut<'_, (K, V)> {
pub fn iter_mut(&mut self) -> slice::IterMut<'_, (K, V)> {
self.dirty();
self.data.iter_mut()
}
Expand All @@ -168,6 +185,37 @@ impl<K, V> VecMap<K, V> {
}
}

impl<K: Eq + Hash, V> VecMap<K, V> {
/// Returns a deduped map, that either borrows from `self` without performing any work if the
/// map is already deduped, or dedup the entries in a new separate vec otherwise. As opposed to
/// [Self::dedup], `as_deduped_map` takes an immutable reference to `self` but might allocate.
/// Prefer [Self::dedup] when applicable.
pub fn as_deduped_map(&self) -> DedupedVecMap<'_, K, V> {
if self.deduped {
DedupedVecMap::Borrowed(self)
} else {
DedupedVecMap::Owned(self.data.iter().map(|(k, v)| (k, v)).collect())
}
}

/// This is a convenience wrapper around [Self::as_deduped_map] used in the msgpack encoder,
/// where we expect the map to be deduped, but call `as_deduped_map` as a defensive measure. If
/// the latter had to deduplicate and allocate a new vec, we log a warning (at most once).
#[allow(unused)]
pub(crate) fn defensive_dedup(&self) -> DedupedVecMap<'_, K, V> {
if !self.is_deduped() {
static WARNED: AtomicBool = AtomicBool::new(false);
if !WARNED.swap(true, Ordering::Relaxed) {
Comment thread
yannham marked this conversation as resolved.
tracing::warn!(
"VecMap not deduped before encoding. Performing defensive on-the-fly dedup"
);
}
}

self.as_deduped_map()
}
}

impl<K: Hash + Eq + Clone, V> VecMap<K, V> {
/// Remove entries with a duplicate key, only keeping the last one. After this, a flag is set
/// internally, such that as long as the map isn't extended or mutably iterated, the next
Expand Down Expand Up @@ -222,7 +270,7 @@ impl<K, V> IntoIterator for VecMap<K, V> {

impl<'a, K, V> IntoIterator for &'a VecMap<K, V> {
type Item = &'a (K, V);
type IntoIter = std::slice::Iter<'a, (K, V)>;
type IntoIter = slice::Iter<'a, (K, V)>;

fn into_iter(self) -> Self::IntoIter {
self.data.iter()
Expand All @@ -231,7 +279,7 @@ impl<'a, K, V> IntoIterator for &'a VecMap<K, V> {

impl<'a, K, V> IntoIterator for &'a mut VecMap<K, V> {
type Item = &'a mut (K, V);
type IntoIter = std::slice::IterMut<'a, (K, V)>;
type IntoIter = slice::IterMut<'a, (K, V)>;

fn into_iter(self) -> Self::IntoIter {
// Since we iterate on keys as well, they can modified, and introduce duplicates.
Expand All @@ -250,33 +298,74 @@ impl<K, V> Extend<(K, V)> for VecMap<K, V> {
impl<K: Serialize + Eq + Hash, V: Serialize> Serialize for VecMap<K, V> {
fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
use serde::ser::SerializeMap;
use std::collections::HashMap;

// We pre-compute the deduped map. If deduplication were done on the fly during
// serialization, we couldn't provide a length up front to the serializer, and the current
// one (rmp) will allocate an intermediate buffer defensively.
if self.deduped {
let mut map_ser = serializer.serialize_map(Some(self.len()))?;
let deduped = self.as_deduped_map();
let mut map_ser = serializer.serialize_map(Some(deduped.len()))?;

for (k, v) in self {
map_ser.serialize_entry(k, v)?;
}
for (k, v) in deduped.iter() {
map_ser.serialize_entry(k, v)?;
}

map_ser.end()
} else {
// Note: using `dedup` would need an additional `clone()` of the whole map here. We can
// use references instead.
self.data
.iter()
.map(|(k, v)| (k, v))
// Since the iterator is sized, `collect()` should pre-allocate with the right
// capacity in one shot.
.collect::<HashMap<&K, &V>>()
.serialize(serializer)
map_ser.end()
}
}

pub enum DedupedVecMap<'a, K, V> {
Borrowed(&'a VecMap<K, V>),
Owned(HashMap<&'a K, &'a V>),
}

impl<'a, K, V> DedupedVecMap<'a, K, V> {
#[inline]
pub fn iter(&self) -> DedupedVecMapIter<'_, 'a, K, V> {
match self {
DedupedVecMap::Borrowed(vec_map) => DedupedVecMapIter::Borrowed(vec_map.iter()),
DedupedVecMap::Owned(map) => DedupedVecMapIter::Owned(map.iter()),
}
}

#[inline]
pub fn len(&self) -> usize {
match self {
DedupedVecMap::Borrowed(vec_map) => vec_map.len(),
DedupedVecMap::Owned(map) => map.len(),
}
}

#[inline]
pub fn is_empty(&self) -> bool {
match self {
DedupedVecMap::Borrowed(vec_map) => vec_map.is_empty(),
DedupedVecMap::Owned(map) => map.is_empty(),
}
}
}

pub enum DedupedVecMapIter<'b, 'a: 'b, K, V> {
Borrowed(slice::Iter<'a, (K, V)>),
Owned(std::collections::hash_map::Iter<'b, &'a K, &'a V>),
}

impl<'b, 'a: 'b, K, V> Iterator for DedupedVecMapIter<'b, 'a, K, V> {
type Item = (&'a K, &'a V);

fn next(&mut self) -> Option<Self::Item> {
match self {
DedupedVecMapIter::Borrowed(iter) => iter.next().map(|(k, v)| (k, v)),
DedupedVecMapIter::Owned(iter) => iter.next().map(|(&k, &v)| (k, v)),
}
}

fn size_hint(&self) -> (usize, Option<usize>) {
match self {
DedupedVecMapIter::Borrowed(iter) => iter.size_hint(),
DedupedVecMapIter::Owned(iter) => iter.size_hint(),
}
}
}

impl<'b, 'a: 'b, K, V> ExactSizeIterator for DedupedVecMapIter<'b, 'a, K, V> {}

#[cfg(test)]
mod tests {
use super::*;
Expand Down Expand Up @@ -448,6 +537,61 @@ mod tests {
assert!(!m.is_deduped());
}

#[test]
fn deduped_vec_map_borrowed_iter_and_len() {
let mut m = VecMap::new();
m.insert("a", 1);
m.insert("b", 2);
m.dedup();

let d = m.defensive_dedup();
assert!(matches!(d, DedupedVecMap::Borrowed(_)));
assert_eq!(d.len(), 2);

let mut items: Vec<_> = d.iter().collect();
items.sort_by_key(|(k, _)| **k);
assert_eq!(items, vec![(&"a", &1), (&"b", &2)]);
}

#[test]
fn deduped_vec_map_copy_iter_and_len() {
let mut m = VecMap::new();
m.insert("a", 1);
m.insert("a", 2);
m.insert("b", 3);

let d = m.defensive_dedup();
assert!(matches!(d, DedupedVecMap::Owned(_)));
assert_eq!(d.len(), 2);

let items: HashMap<&&str, &i32> = d.iter().collect();
assert_eq!(items[&"a"], &2);
assert_eq!(items[&"b"], &3);
}

#[test]
fn deduped_vec_map_iter_exact_size() {
let mut m = VecMap::new();
m.insert("a", 1);
m.insert("b", 2);
m.insert("c", 3);
m.dedup();

let d = m.defensive_dedup();
let mut iter = d.iter();
assert_eq!(iter.len(), 3);
iter.next();
assert_eq!(iter.len(), 2);
}

#[test]
fn deduped_vec_map_empty() {
let m: VecMap<String, i32> = VecMap::new();
let d = DedupedVecMap::Borrowed(&m);
assert_eq!(d.len(), 0);
assert_eq!(d.iter().count(), 0);
}

#[test]
fn serialize_deduplicates_keeping_last() {
let mut m = VecMap::new();
Expand Down
Loading