From 7028201d0bd7d1318277a00e8ca97ec46ca1674d Mon Sep 17 00:00:00 2001 From: hanabi1224 Date: Wed, 23 Jul 2025 19:48:40 +0800 Subject: [PATCH 1/4] fix: remove write_cache from ForestCar and PlainCar --- src/cid_collections/hash_map.rs | 2 + src/db/car/any.rs | 14 ------ src/db/car/forest.rs | 34 ++----------- src/db/car/plain.rs | 72 ++++------------------------ src/tool/subcommands/snapshot_cmd.rs | 5 +- 5 files changed, 17 insertions(+), 110 deletions(-) diff --git a/src/cid_collections/hash_map.rs b/src/cid_collections/hash_map.rs index 463ae17bf087..128a455f81d5 100644 --- a/src/cid_collections/hash_map.rs +++ b/src/cid_collections/hash_map.rs @@ -1,6 +1,8 @@ // Copyright 2019-2025 ChainSafe Systems // SPDX-License-Identifier: Apache-2.0, MIT +#![allow(dead_code)] + use super::{CidV1DagCborBlake2b256, MaybeCompactedCid, Uncompactable}; use cid::Cid; #[cfg(doc)] diff --git a/src/db/car/any.rs b/src/db/car/any.rs index 36e544fd40dd..298b3f866773 100644 --- a/src/db/car/any.rs +++ b/src/db/car/any.rs @@ -10,7 +10,6 @@ use super::{CacheKey, RandomAccessFileReader, ZstdFrameCache}; use crate::blocks::{Tipset, TipsetKey}; -use crate::db::PersistentStore; use crate::utils::io::EitherMmapOrRandomAccessFile; use cid::Cid; use fvm_ipld_blockstore::Blockstore; @@ -142,19 +141,6 @@ where } } -impl PersistentStore for AnyCar -where - ReaderT: ReadAt, -{ - fn put_keyed_persistent(&self, k: &Cid, block: &[u8]) -> anyhow::Result<()> { - match self { - AnyCar::Forest(forest) => forest.put_keyed_persistent(k, block), - AnyCar::Plain(plain) => plain.put_keyed_persistent(k, block), - AnyCar::Memory(mem) => mem.put_keyed_persistent(k, block), - } - } -} - impl From> for AnyCar { fn from(car: super::ForestCar) -> Self { Self::Forest(car) diff --git a/src/db/car/forest.rs b/src/db/car/forest.rs index df4d16b81fcc..f194a7136230 100644 --- a/src/db/car/forest.rs +++ b/src/db/car/forest.rs @@ -48,7 +48,6 @@ use super::{CacheKey, ZstdFrameCache}; use crate::blocks::{Tipset, TipsetKey}; -use crate::db::PersistentStore; use crate::db::car::RandomAccessFileReader; use crate::db::car::plain::write_skip_frame_header_async; use crate::utils::db::car_stream::{CarBlock, CarV1Header}; @@ -62,7 +61,7 @@ use futures::{Stream, TryStream, TryStreamExt as _}; use fvm_ipld_blockstore::Blockstore; use fvm_ipld_encoding::to_vec; use nunny::Vec as NonEmpty; -use parking_lot::{Mutex, RwLock}; +use parking_lot::Mutex; use positioned_io::{Cursor, ReadAt, ReadBytesAtExt, SizeCursor}; use std::io::{Seek, SeekFrom}; use std::path::Path; @@ -96,7 +95,6 @@ pub struct ForestCar { indexed: index::Reader>, index_size_bytes: u32, frame_cache: Arc>, - write_cache: Arc>>>, roots: NonEmpty, } @@ -116,7 +114,6 @@ impl ForestCar { indexed, index_size_bytes, frame_cache: Arc::new(Mutex::new(ZstdFrameCache::default())), - write_cache: Arc::new(RwLock::new(ahash::HashMap::default())), roots: header.roots, }) } @@ -178,7 +175,6 @@ impl ForestCar { }), index_size_bytes: self.index_size_bytes, frame_cache: self.frame_cache, - write_cache: self.write_cache, roots: self.roots, } } @@ -205,11 +201,6 @@ where { #[tracing::instrument(level = "trace", skip(self))] fn get(&self, k: &Cid) -> anyhow::Result>> { - // Return immediately if the value is cached. - if let Some(value) = self.write_cache.read().get(k) { - return Ok(Some(value.clone())); - } - let indexed = &self.indexed; for position in indexed.get(*k)?.into_iter() { let cache_query = self.frame_cache.lock().get(position, self.cache_key, *k); @@ -246,26 +237,9 @@ where Ok(None) } - #[tracing::instrument(level = "trace", skip(self, block))] - fn put_keyed(&self, k: &Cid, block: &[u8]) -> anyhow::Result<()> { - debug_assert!( - CarBlock { - cid: *k, - data: block.to_vec() - } - .valid() - ); - self.write_cache.write().insert(*k, Vec::from(block)); - Ok(()) - } -} - -impl PersistentStore for ForestCar -where - ReaderT: ReadAt, -{ - fn put_keyed_persistent(&self, k: &Cid, block: &[u8]) -> anyhow::Result<()> { - self.put_keyed(k, block) + /// Not supported, use [`super::ManyCar`] instead. + fn put_keyed(&self, _: &Cid, _: &[u8]) -> anyhow::Result<()> { + anyhow::bail!("put is not supported in ForestCar, use ManyCar instead"); } } diff --git a/src/db/car/plain.rs b/src/db/car/plain.rs index ac4c541b819b..802475f0a72e 100644 --- a/src/db/car/plain.rs +++ b/src/db/car/plain.rs @@ -60,23 +60,20 @@ //! - CARv2 support //! - A wrapper that abstracts over car formats for reading. -use crate::cid_collections::{CidHashMap, hash_map::Entry as CidHashMapEntry}; +use crate::cid_collections::CidHashMap; use crate::db::PersistentStore; use crate::utils::db::car_stream::{CarV1Header, CarV2Header}; use crate::{ blocks::{Tipset, TipsetKey}, utils::encoding::from_slice_with_fallback, }; -use CidHashMapEntry::{Occupied, Vacant}; use cid::Cid; use fvm_ipld_blockstore::Blockstore; use integer_encoding::{FixedIntReader, VarIntReader}; use nunny::Vec as NonEmpty; use parking_lot::RwLock; use positioned_io::ReadAt; -use std::ops::DerefMut; use std::{ - any::Any, io::{ self, BufReader, ErrorKind::{InvalidData, Unsupported}, @@ -101,7 +98,7 @@ use tracing::{debug, trace}; /// /// When a block is requested, [`PlainCar`] scrolls to that offset, and reads the block, on-demand. /// -/// Writes for new blocks (which don't exist in the CAR already) are currently cached in-memory. +/// Writes for new blocks (which don't exist in the CAR already) are not supported. /// /// Random-access performance is expected to be poor, as the OS will have to load separate parts of /// the file from disk, and flush it for each read. However, (near) linear access should be pretty @@ -110,7 +107,6 @@ use tracing::{debug, trace}; /// See [module documentation](mod@self) for more. pub struct PlainCar { reader: ReaderT, - write_cache: RwLock>>, index: RwLock>, version: u64, header_v1: CarV1Header, @@ -162,7 +158,6 @@ impl PlainCar { debug!(num_blocks, "indexed CAR"); Ok(Self { reader, - write_cache: RwLock::new(CidHashMap::new()), index: RwLock::new(index), version, header_v1, @@ -197,7 +192,6 @@ impl PlainCar { pub fn into_dyn(self) -> PlainCar> { PlainCar { reader: Box::new(self.reader), - write_cache: self.write_cache, index: self.index, version: self.version, header_v1: self.header_v1, @@ -227,39 +221,23 @@ where { #[tracing::instrument(level = "trace", skip(self))] fn get(&self, k: &Cid) -> anyhow::Result>> { - match (self.index.read().get(k), self.write_cache.read().get(k)) { - (Some(_location), Some(_cached)) => { - trace!("evicting from write cache"); - Ok(self.write_cache.write().remove(k)) - } - (Some(UncompressedBlockDataLocation { offset, length }), None) => { + match self.index.read().get(k) { + Some(UncompressedBlockDataLocation { offset, length }) => { trace!("fetching from disk"); let mut data = vec![0; usize::try_from(*length).unwrap()]; self.reader.read_exact_at(*offset, &mut data)?; Ok(Some(data)) } - (None, Some(cached)) => { - trace!("getting from write cache"); - Ok(Some(cached.clone())) - } - (None, None) => { + None => { trace!("not found"); Ok(None) } } } - /// # Panics - /// - If the write cache already contains different data with this CID - /// - See also [`Self::new`]. - /// - /// Note: Locks have to be acquired in exactly the same order as in `get`, otherwise a - /// deadlock is imminent in a multi-threaded context. - #[tracing::instrument(level = "trace", skip(self, block))] - fn put_keyed(&self, k: &Cid, block: &[u8]) -> anyhow::Result<()> { - let mut index = self.index.write(); - let mut cache = self.write_cache.write(); - handle_write_cache(cache.deref_mut(), index.deref_mut(), k, block) + /// Not supported, use [`super::ManyCar`] instead. + fn put_keyed(&self, _: &Cid, _: &[u8]) -> anyhow::Result<()> { + anyhow::bail!("put is not supported in PlainCar, use ManyCar instead"); } } @@ -289,40 +267,6 @@ pub struct CompressedBlockDataLocation { pub location_in_frame: UncompressedBlockDataLocation, } -/// # Panics -/// - If the write cache already contains different data with this CID -/// -/// Note: This could potentially be enhanced with fine-grained read/write -/// locking, however the performance is acceptable for now. -fn handle_write_cache( - write_cache: &mut CidHashMap>, - index: &mut CidHashMap, - k: &Cid, - block: &[u8], -) -> anyhow::Result<()> { - match (index.get(k), write_cache.entry(*k)) { - (None, Occupied(already)) => match already.get() == block { - true => { - trace!("already in cache"); - Ok(()) - } - false => panic!("mismatched content on second write for CID {k}"), - }, - (None, Vacant(vacant)) => { - trace!(bytes = block.len(), "insert into cache"); - vacant.insert(block.to_owned()); - Ok(()) - } - (Some(_), Vacant(_)) => { - trace!("already on disk"); - Ok(()) - } - (Some(_), Occupied(_)) => { - unreachable!("we don't insert a CID in the write cache if it exists on disk") - } - } -} - fn cid_error_to_io_error(cid_error: cid::Error) -> io::Error { match cid_error { cid::Error::Io(io_error) => io_error, diff --git a/src/tool/subcommands/snapshot_cmd.rs b/src/tool/subcommands/snapshot_cmd.rs index 24d8e4658079..fd0e02fe8d78 100644 --- a/src/tool/subcommands/snapshot_cmd.rs +++ b/src/tool/subcommands/snapshot_cmd.rs @@ -6,9 +6,9 @@ use crate::blocks::Tipset; use crate::chain::index::{ChainIndex, ResolveNullTipset}; use crate::cli_shared::snapshot; use crate::daemon::bundle::load_actor_bundles; -use crate::db::PersistentStore; use crate::db::car::forest::DEFAULT_FOREST_CAR_FRAME_SIZE; use crate::db::car::{AnyCar, ManyCar}; +use crate::db::{MemoryDB, PersistentStore}; use crate::interpreter::{MessageCallbackCtx, VMTrace}; use crate::ipld::stream_chain; use crate::networks::{ChainConfig, NetworkChain, butterflynet, calibnet, mainnet}; @@ -172,7 +172,8 @@ impl SnapshotCommands { for file in snapshot_files { println!("Validating {}", file.display()); let result = async { - let store = AnyCar::try_from(file.as_path())?; + let store = ManyCar::new(MemoryDB::default()) + .with_read_only(AnyCar::try_from(file.as_path())?)?; validate_with_blockstore( store.heaviest_tipset()?, Arc::new(store), From 05ed1554ca9547b8e5d9a3acbd2f5e4e5093b5e7 Mon Sep 17 00:00:00 2001 From: hanabi1224 Date: Wed, 23 Jul 2025 21:58:29 +0800 Subject: [PATCH 2/4] fix unit tests --- src/db/car/forest.rs | 2 +- src/db/car/plain.rs | 2 +- src/rpc/methods/chain.rs | 11 ++++++++--- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/db/car/forest.rs b/src/db/car/forest.rs index f194a7136230..8ed887b159a1 100644 --- a/src/db/car/forest.rs +++ b/src/db/car/forest.rs @@ -239,7 +239,7 @@ where /// Not supported, use [`super::ManyCar`] instead. fn put_keyed(&self, _: &Cid, _: &[u8]) -> anyhow::Result<()> { - anyhow::bail!("put is not supported in ForestCar, use ManyCar instead"); + anyhow::bail!("ForestCar is read-only, use ManyCar instead"); } } diff --git a/src/db/car/plain.rs b/src/db/car/plain.rs index 802475f0a72e..75d3b54ee59e 100644 --- a/src/db/car/plain.rs +++ b/src/db/car/plain.rs @@ -237,7 +237,7 @@ where /// Not supported, use [`super::ManyCar`] instead. fn put_keyed(&self, _: &Cid, _: &[u8]) -> anyhow::Result<()> { - anyhow::bail!("put is not supported in PlainCar, use ManyCar instead"); + anyhow::bail!("PlainCar is read-only, use ManyCar instead"); } } diff --git a/src/rpc/methods/chain.rs b/src/rpc/methods/chain.rs index b0704c041995..eb71fe26d674 100644 --- a/src/rpc/methods/chain.rs +++ b/src/rpc/methods/chain.rs @@ -955,7 +955,10 @@ mod tests { use crate::{ blocks::{Chain4U, RawBlockHeader, chain4u}, - db::{MemoryDB, car::PlainCar}, + db::{ + MemoryDB, + car::{AnyCar, ManyCar}, + }, networks::{self, ChainConfig}, }; @@ -1067,10 +1070,12 @@ mod tests { let _ = (a, c1); } - impl ChainStore>> { + impl ChainStore> { fn _load(genesis_car: &'static [u8], genesis_cid: Cid) -> Self { let db = Arc::new(Chain4U::with_blockstore( - PlainCar::new(genesis_car).unwrap(), + ManyCar::new(MemoryDB::default()) + .with_read_only(AnyCar::new(genesis_car).unwrap()) + .unwrap(), )); let genesis_block_header = db.get_cbor(&genesis_cid).unwrap().unwrap(); ChainStore::new( From c14b494a1ac33898bc40120bd0a568a76ff32358 Mon Sep 17 00:00:00 2001 From: hanabi1224 Date: Wed, 23 Jul 2025 22:43:25 +0800 Subject: [PATCH 3/4] panic in put_keyed --- src/db/car/forest.rs | 2 +- src/db/car/plain.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/db/car/forest.rs b/src/db/car/forest.rs index 8ed887b159a1..655f5a36b953 100644 --- a/src/db/car/forest.rs +++ b/src/db/car/forest.rs @@ -239,7 +239,7 @@ where /// Not supported, use [`super::ManyCar`] instead. fn put_keyed(&self, _: &Cid, _: &[u8]) -> anyhow::Result<()> { - anyhow::bail!("ForestCar is read-only, use ManyCar instead"); + unreachable!("ForestCar is read-only, use ManyCar instead"); } } diff --git a/src/db/car/plain.rs b/src/db/car/plain.rs index 75d3b54ee59e..fc508f3a3d9e 100644 --- a/src/db/car/plain.rs +++ b/src/db/car/plain.rs @@ -237,7 +237,7 @@ where /// Not supported, use [`super::ManyCar`] instead. fn put_keyed(&self, _: &Cid, _: &[u8]) -> anyhow::Result<()> { - anyhow::bail!("PlainCar is read-only, use ManyCar instead"); + unreachable!("PlainCar is read-only, use ManyCar instead"); } } From 86d76aadef70db736cec0cd2701f4ea0d68a014b Mon Sep 17 00:00:00 2001 From: hanabi1224 Date: Thu, 24 Jul 2025 16:04:34 +0800 Subject: [PATCH 4/4] allow dead_code for functions --- src/cid_collections/hash_map.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/cid_collections/hash_map.rs b/src/cid_collections/hash_map.rs index 128a455f81d5..3ae7060f6abf 100644 --- a/src/cid_collections/hash_map.rs +++ b/src/cid_collections/hash_map.rs @@ -1,8 +1,6 @@ // Copyright 2019-2025 ChainSafe Systems // SPDX-License-Identifier: Apache-2.0, MIT -#![allow(dead_code)] - use super::{CidV1DagCborBlake2b256, MaybeCompactedCid, Uncompactable}; use cid::Cid; #[cfg(doc)] @@ -110,6 +108,7 @@ impl CidHashMap { /// Gets the given key's corresponding entry in the map for in-place manipulation. /// /// See also [`HashMap::entry`]. + #[allow(dead_code)] pub fn entry(&mut self, key: Cid) -> Entry { match MaybeCompactedCid::from(key) { MaybeCompactedCid::Compact(c) => match self.compact.entry(c) { @@ -135,6 +134,7 @@ impl CidHashMap { /// A view into a single entry in a map, which may either be vacant or occupied. /// /// This `enum` is constructed using [`CidHashMap::entry`]. +#[allow(dead_code)] #[derive(Debug)] pub enum Entry<'a, V: 'a> { /// An occupied entry. @@ -147,6 +147,7 @@ pub enum Entry<'a, V: 'a> { /// It is part of the [`Entry`] enum. /// /// See also [`std::collections::hash_map::OccupiedEntry`]. +#[allow(dead_code)] #[derive(Debug)] pub struct OccupiedEntry<'a, V> { inner: OccupiedEntryInner<'a, V>, @@ -156,6 +157,7 @@ impl OccupiedEntry<'_, V> { /// Gets a reference to the value in the entry. /// /// See also [`std::collections::hash_map::OccupiedEntry::get`]. + #[allow(dead_code)] pub fn get(&self) -> &V { match &self.inner { OccupiedEntryInner::Compact(c) => c.get(), @@ -165,6 +167,7 @@ impl OccupiedEntry<'_, V> { } /// Hides compaction from users. +#[allow(dead_code)] #[derive(Debug)] enum OccupiedEntryInner<'a, V> { Compact(StdOccupiedEntry<'a, CidV1DagCborBlake2b256, V>), @@ -175,6 +178,7 @@ enum OccupiedEntryInner<'a, V> { /// It is part of the [`Entry`] enum. /// /// See also [`std::collections::hash_map::VacantEntry`]. +#[allow(dead_code)] #[derive(Debug)] pub struct VacantEntry<'a, V> { inner: VacantEntryInner<'a, V>, @@ -185,6 +189,7 @@ impl<'a, V> VacantEntry<'a, V> { /// and returns a mutable reference to it. /// /// See also [`std::collections::hash_map::VacantEntry::insert`]. + #[allow(dead_code)] pub fn insert(self, value: V) -> &'a mut V { match self.inner { VacantEntryInner::Compact(c) => c.insert(value), @@ -194,6 +199,7 @@ impl<'a, V> VacantEntry<'a, V> { } /// Hides compaction from users. +#[allow(dead_code)] #[derive(Debug)] enum VacantEntryInner<'a, V> { Compact(StdVacantEntry<'a, CidV1DagCborBlake2b256, V>),