From 80b47ac1832ac7c44c6d4f7cb38ec847e54e41ef Mon Sep 17 00:00:00 2001 From: Piotr Osiewicz <24362066+osiewicz@users.noreply.github.com> Date: Thu, 4 Dec 2025 20:39:05 +0100 Subject: [PATCH] clean: Optimize clean with multiple -p specifiers This commit optimizes implementation of `cargo clean -p` by reducing the amount of directory walks that take place. We now walk each directory at most once and add to the list of files to be cleaned, step by step. In practice this helps us significantly reduce the runtime for clearing large workspaces (as implemented in #16263); for Zed, `cargo clean --workspace` went down from 73 seconds to 3 seconds. We have 216 workspace members. Co-authored-by: dino Co-authored-by: Ed Page --- .../compiler/build_context/target_info.rs | 7 + src/cargo/ops/cargo_clean.rs | 219 ++++++++---------- 2 files changed, 105 insertions(+), 121 deletions(-) diff --git a/src/cargo/core/compiler/build_context/target_info.rs b/src/cargo/core/compiler/build_context/target_info.rs index 10671f7d50f..170a676588e 100644 --- a/src/cargo/core/compiler/build_context/target_info.rs +++ b/src/cargo/core/compiler/build_context/target_info.rs @@ -147,6 +147,13 @@ impl FileType { should_replace_hyphens: true, } } + + pub fn output_prefix_suffix(&self, target: &Target) -> (String, String) { + ( + format!("{}{}-", self.prefix, target.crate_name()), + self.suffix.clone(), + ) + } } impl TargetInfo { diff --git a/src/cargo/ops/cargo_clean.rs b/src/cargo/ops/cargo_clean.rs index dd4c3993fc1..7f445259dcc 100644 --- a/src/cargo/ops/cargo_clean.rs +++ b/src/cargo/ops/cargo_clean.rs @@ -9,8 +9,9 @@ use crate::util::interning::InternedString; use crate::util::{GlobalContext, Progress, ProgressStyle}; use anyhow::bail; use cargo_util::paths; -use indexmap::IndexSet; -use std::collections::{HashMap, HashSet}; +use indexmap::{IndexMap, IndexSet}; + +use std::ffi::OsString; use std::fs; use std::path::{Path, PathBuf}; use std::rc::Rc; @@ -193,6 +194,7 @@ fn clean_specs( let packages = pkg_set.get_many(pkg_ids)?; clean_ctx.progress = Box::new(CleaningPackagesBar::new(clean_ctx.gctx, packages.len())); + let mut dirs_to_clean = DirectoriesToClean::default(); if clean_ctx.gctx.cli_unstable().build_dir_new_layout { for pkg in packages { @@ -233,48 +235,54 @@ fn clean_specs( }; if let Some(uplift_dir) = uplift_dir { for file_type in file_types { - let uplifted_path = - uplift_dir.join(file_type.uplift_filename(target)); - clean_ctx.rm_rf(&uplifted_path)?; + let uplifted_filename = file_type.uplift_filename(target); + // Dep-info generated by Cargo itself. - let dep_info = uplifted_path.with_extension("d"); - clean_ctx.rm_rf(&dep_info)?; + let dep_info = Path::new(&uplifted_filename) + .with_extension("d") + .to_string_lossy() + .into_owned(); + + dirs_to_clean.mark_utf(uplift_dir, |filename| { + filename == uplifted_filename || filename == dep_info + }); } } + let path_dash = format!("{}-", crate_name); - let dir = escape_glob_path(layout.build_dir().incremental())?; - let incremental = Path::new(&dir).join(format!("{}-*", crate_name)); - clean_ctx.rm_rf_glob(&incremental)?; + dirs_to_clean.mark_utf(layout.build_dir().incremental(), |filename| { + filename.starts_with(&path_dash) + }); } } } } } else { - // Try to reduce the amount of times we iterate over the same target directory by storing away - // the directories we've iterated over (and cleaned for a given package). - let mut cleaned_packages: HashMap<_, HashSet<_>> = HashMap::default(); for pkg in packages { - let pkg_dir = format!("{}-*", pkg.name()); clean_ctx.progress.on_cleaning_package(&pkg.name())?; // Clean fingerprints. for (_, layout) in &layouts_with_host { - let dir = escape_glob_path(layout.build_dir().legacy_fingerprint())?; - clean_ctx.rm_rf_package_glob_containing_hash( - &pkg.name(), - &Path::new(&dir).join(&pkg_dir), - )?; + dirs_to_clean.mark_utf(layout.build_dir().legacy_fingerprint(), |filename| { + let Some((pkg_name, _)) = filename.rsplit_once('-') else { + return false; + }; + + pkg_name == pkg.name().as_str() + }); } for target in pkg.targets() { if target.is_custom_build() { // Get both the build_script_build and the output directory. for (_, layout) in &layouts_with_host { - let dir = escape_glob_path(layout.build_dir().build())?; - clean_ctx.rm_rf_package_glob_containing_hash( - &pkg.name(), - &Path::new(&dir).join(&pkg_dir), - )?; + dirs_to_clean.mark_utf(layout.build_dir().build(), |filename| { + let Some((current_name, _)) = filename.rsplit_once('-') else { + return false; + }; + + current_name == pkg.name().as_str() + }); } continue; } @@ -304,15 +312,15 @@ fn clean_specs( } _ => (layout.build_dir().legacy_deps(), Some(artifact_dir.dest())), }; - let mut dir_glob_str = escape_glob_path(dir)?; - let dir_glob = Path::new(&dir_glob_str); + for file_type in file_types { // Some files include a hash in the filename, some don't. - let hashed_name = file_type.output_filename(target, Some("*")); + let (prefix, suffix) = file_type.output_prefix_suffix(target); let unhashed_name = file_type.output_filename(target, None); - - clean_ctx.rm_rf_glob(&dir_glob.join(&hashed_name))?; - clean_ctx.rm_rf(&dir.join(&unhashed_name))?; + dirs_to_clean.mark_utf(&dir, |filename| { + (filename.starts_with(&prefix) && filename.ends_with(&suffix)) + || unhashed_name == filename + }); // Remove the uplifted copy. if let Some(uplift_dir) = uplift_dir { @@ -324,49 +332,79 @@ fn clean_specs( clean_ctx.rm_rf(&dep_info)?; } } - let unhashed_dep_info = dir.join(format!("{}.d", crate_name)); - clean_ctx.rm_rf(&unhashed_dep_info)?; + let unhashed_dep_info = format!("{}.d", crate_name); + dirs_to_clean.mark_utf(dir, |filename| filename == unhashed_dep_info); - if !dir_glob_str.ends_with(std::path::MAIN_SEPARATOR) { - dir_glob_str.push(std::path::MAIN_SEPARATOR); - } - dir_glob_str.push('*'); - let dir_glob_str: Rc = dir_glob_str.into(); - if cleaned_packages - .entry(dir_glob_str.clone()) - .or_default() - .insert(crate_name.clone()) - { - let paths = [ + dirs_to_clean.mark_utf(dir, |filename| { + if filename.starts_with(&path_dash) { // Remove dep-info file generated by rustc. It is not tracked in // file_types. It does not have a prefix. - (path_dash, ".d"), + filename.ends_with(".d") + } else if filename.starts_with(&path_dot) { // Remove split-debuginfo files generated by rustc. - (path_dot, ".o"), - (path_dot, ".dwo"), - (path_dot, ".dwp"), - ]; - clean_ctx.rm_rf_prefix_list(&dir_glob_str, &paths)?; - } + [".o", ".dwo", ".dwp"] + .iter() + .any(|suffix| filename.ends_with(suffix)) + } else { + false + } + }); // TODO: what to do about build_script_build? - let dir = escape_glob_path(layout.build_dir().incremental())?; - let incremental = Path::new(&dir).join(format!("{}-*", crate_name)); - clean_ctx.rm_rf_glob(&incremental)?; + dirs_to_clean.mark_utf(layout.build_dir().incremental(), |filename| { + filename.starts_with(path_dash) + }); } } } } } + clean_ctx.rm_rf_all(dirs_to_clean)?; Ok(()) } -fn escape_glob_path(pattern: &Path) -> CargoResult { - let pattern = pattern - .to_str() - .ok_or_else(|| anyhow::anyhow!("expected utf-8 path"))?; - Ok(glob::Pattern::escape(pattern)) +#[derive(Default)] +struct DirectoriesToClean { + dir_contents: IndexMap>, + to_remove: IndexSet, +} + +impl DirectoriesToClean { + fn mark(&mut self, directory: &Path, mut should_remove_entry: impl FnMut(&OsString) -> bool) { + let entry = match self.dir_contents.entry(directory.to_owned()) { + indexmap::map::Entry::Occupied(occupied_entry) => occupied_entry.into_mut(), + indexmap::map::Entry::Vacant(vacant_entry) => { + let Ok(dir_entries) = std::fs::read_dir(directory.to_owned()) else { + return; + }; + vacant_entry.insert( + dir_entries + .into_iter() + .flatten() + .map(|entry| entry.file_name()) + .collect::>(), + ) + } + }; + + entry.retain(|path| { + let should_remove = should_remove_entry(path); + if should_remove { + self.to_remove.insert(directory.join(path)); + } + !should_remove + }); + } + + fn mark_utf(&mut self, directory: &Path, mut should_remove_entry: impl FnMut(&str) -> bool) { + self.mark(directory, move |filename| { + let Some(as_utf) = filename.to_str() else { + return false; + }; + should_remove_entry(as_utf) + }); + } } impl<'gctx> CleanContext<'gctx> { @@ -384,74 +422,13 @@ impl<'gctx> CleanContext<'gctx> { } } - /// Glob remove artifacts for the provided `package` - /// - /// Make sure the artifact is for `package` and not another crate that is prefixed by - /// `package` by getting the original name stripped of the trailing hash and possible - /// extension - fn rm_rf_package_glob_containing_hash( - &mut self, - package: &str, - pattern: &Path, - ) -> CargoResult<()> { - // TODO: Display utf8 warning to user? Or switch to globset? - let pattern = pattern - .to_str() - .ok_or_else(|| anyhow::anyhow!("expected utf-8 path"))?; - for path in glob::glob(pattern)? { - let path = path?; - - let pkg_name = path - .file_name() - .and_then(std::ffi::OsStr::to_str) - .and_then(|artifact| artifact.rsplit_once('-')) - .ok_or_else(|| anyhow::anyhow!("expected utf-8 path"))? - .0; - - if pkg_name != package { - continue; - } - + fn rm_rf_all(&mut self, dirs: DirectoriesToClean) -> CargoResult<()> { + for path in dirs.to_remove { self.rm_rf(&path)?; } Ok(()) } - fn rm_rf_glob(&mut self, pattern: &Path) -> CargoResult<()> { - // TODO: Display utf8 warning to user? Or switch to globset? - let pattern = pattern - .to_str() - .ok_or_else(|| anyhow::anyhow!("expected utf-8 path"))?; - for path in glob::glob(pattern)? { - self.rm_rf(&path?)?; - } - Ok(()) - } - - /// Removes files matching a glob and any of the provided filename patterns (prefix/suffix pairs). - /// - /// This function iterates over files matching a glob (`pattern`) and removes those whose - /// filenames start and end with specific prefix/suffix pairs. It should be more efficient for - /// operations involving multiple prefix/suffix pairs, as it iterates over the directory - /// only once, unlike making multiple calls to [`Self::rm_rf_glob`]. - fn rm_rf_prefix_list( - &mut self, - pattern: &str, - path_matchers: &[(&str, &str)], - ) -> CargoResult<()> { - for path in glob::glob(pattern)? { - let path = path?; - let filename = path.file_name().and_then(|name| name.to_str()).unwrap(); - if path_matchers - .iter() - .any(|(prefix, suffix)| filename.starts_with(prefix) && filename.ends_with(suffix)) - { - self.rm_rf(&path)?; - } - } - Ok(()) - } - pub fn rm_rf(&mut self, path: &Path) -> CargoResult<()> { let meta = match fs::symlink_metadata(path) { Ok(meta) => meta,