@@ -213,54 +213,56 @@ def _collect_single_test_coverage( # pylint: disable=too-many-locals
213213 """
214214 Collect file-level coverage for a single test, fully self-contained.
215215
216- Creates a temp directory with copies of .gcda files and their matching
217- .gcno files, then runs a single batched gcov call. This avoids touching
218- the shared build tree, making it safe to call concurrently.
216+ Copies .gcno files from the real build tree into the test's isolated
217+ .gcda directory (alongside the .gcda files), runs a batched gcov call,
218+ then removes the .gcno copies. Each test has its own directory, so
219+ this is safe to call concurrently without touching the shared build tree.
219220 """
220221 build_subdir = os .path .join (test_gcda , "build" )
221222 if not os .path .isdir (build_subdir ):
222223 return uuid , []
223224
224- with tempfile .TemporaryDirectory () as tmpdir :
225- matching_gcno = []
226-
227- for dirpath , _ , filenames in os .walk (build_subdir ):
228- for fname in filenames :
229- if not fname .endswith (".gcda" ):
230- continue
231- gcda_src = os .path .join (dirpath , fname )
232- rel = os .path .relpath (gcda_src , test_gcda )
233-
234- # Copy .gcda into temp dir
235- gcda_dst = os .path .join (tmpdir , rel )
236- os .makedirs (os .path .dirname (gcda_dst ), exist_ok = True )
237- shutil .copy2 (gcda_src , gcda_dst )
238-
239- # Copy matching .gcno from real build tree
240- gcno_rel = rel [:- 5 ] + ".gcno"
241- gcno_src = os .path .join (root_dir , gcno_rel )
242- if os .path .isfile (gcno_src ):
243- gcno_dst = os .path .join (tmpdir , gcno_rel )
244- shutil .copy2 (gcno_src , gcno_dst )
245- matching_gcno .append (gcno_dst )
246-
247- if not matching_gcno :
248- return uuid , []
249-
250- # Batch: single gcov call for all .gcno files in this test.
251- cmd = [gcov_bin , "--json-format" , "--stdout" ] + matching_gcno
252- try :
253- proc = subprocess .run (
254- cmd , capture_output = True , cwd = tmpdir , timeout = 120 , check = False
255- )
256- except (subprocess .TimeoutExpired , subprocess .SubprocessError , OSError ):
257- return uuid , []
225+ gcno_copies = []
258226
259- if proc .returncode != 0 or not proc .stdout :
260- return uuid , []
227+ for dirpath , _ , filenames in os .walk (build_subdir ):
228+ for fname in filenames :
229+ if not fname .endswith (".gcda" ):
230+ continue
231+ # Derive matching .gcno path in the real build tree
232+ gcda_path = os .path .join (dirpath , fname )
233+ rel = os .path .relpath (gcda_path , test_gcda )
234+ gcno_rel = rel [:- 5 ] + ".gcno"
235+ gcno_src = os .path .join (root_dir , gcno_rel )
236+ if os .path .isfile (gcno_src ):
237+ # Copy .gcno alongside .gcda in the test's isolated dir
238+ gcno_dst = os .path .join (dirpath , fname [:- 5 ] + ".gcno" )
239+ shutil .copy2 (gcno_src , gcno_dst )
240+ gcno_copies .append (gcno_dst )
241+
242+ if not gcno_copies :
243+ return uuid , []
261244
262- coverage = _parse_gcov_json_output (proc .stdout , root_dir )
263- return uuid , sorted (coverage )
245+ # Batch: single gcov call for all .gcno files in this test.
246+ # Run from root_dir so source path resolution works correctly.
247+ cmd = [gcov_bin , "--json-format" , "--stdout" ] + gcno_copies
248+ try :
249+ proc = subprocess .run (
250+ cmd , capture_output = True , cwd = root_dir , timeout = 120 , check = False
251+ )
252+ except (subprocess .TimeoutExpired , subprocess .SubprocessError , OSError ):
253+ return uuid , []
254+ finally :
255+ for g in gcno_copies :
256+ try :
257+ os .remove (g )
258+ except OSError :
259+ pass
260+
261+ if proc .returncode != 0 or not proc .stdout :
262+ return uuid , []
263+
264+ coverage = _parse_gcov_json_output (proc .stdout , root_dir )
265+ return uuid , sorted (coverage )
264266
265267
266268def _run_single_test_direct (test_info : dict , gcda_dir : str , strip : str ) -> tuple : # pylint: disable=too-many-locals
@@ -390,8 +392,11 @@ def build_coverage_cache( # pylint: disable=unused-argument,too-many-locals,too
390392
391393 if n_jobs is None :
392394 n_jobs = max (os .cpu_count () or 1 , 1 )
395+ # Cap Phase 1 parallelism: each test spawns MPI processes (~500MB each),
396+ # so too many concurrent tests cause OOM on large nodes.
397+ phase1_jobs = min (n_jobs , 32 )
393398 cons .print (f"[bold]Building coverage cache for { len (cases )} tests "
394- f"({ n_jobs } parallel )...[/bold]" )
399+ f"({ phase1_jobs } test workers, { n_jobs } gcov workers )...[/bold]" )
395400 cons .print (f"[dim]Using gcov binary: { gcov_bin } [/dim]" )
396401 cons .print (f"[dim]Found { len (gcno_files )} .gcno files[/dim]" )
397402 cons .print (f"[dim]GCOV_PREFIX_STRIP={ strip } [/dim]" )
@@ -412,7 +417,7 @@ def build_coverage_cache( # pylint: disable=unused-argument,too-many-locals,too
412417 cons .print ("[bold]Phase 1/2: Running tests...[/bold]" )
413418 test_results : dict = {}
414419 all_failures : dict = {}
415- with ThreadPoolExecutor (max_workers = n_jobs ) as pool :
420+ with ThreadPoolExecutor (max_workers = phase1_jobs ) as pool :
416421 futures = {
417422 pool .submit (_run_single_test_direct , info , gcda_dir , strip ): info
418423 for info in test_infos
@@ -432,9 +437,24 @@ def build_coverage_cache( # pylint: disable=unused-argument,too-many-locals,too
432437 fail_str = ", " .join (f"{ t } ={ rc } " for t , rc in fails )
433438 cons .print (f" [yellow]{ uuid } [/yellow]: { fail_str } " )
434439
440+ # Diagnostic: verify .gcda files exist for at least one test.
441+ sample_uuid = next (iter (test_results ), None )
442+ if sample_uuid :
443+ sample_gcda = test_results [sample_uuid ]
444+ sample_build = os .path .join (sample_gcda , "build" )
445+ if os .path .isdir (sample_build ):
446+ gcda_count = sum (
447+ 1 for _ , _ , fns in os .walk (sample_build )
448+ for f in fns if f .endswith (".gcda" )
449+ )
450+ cons .print (f"[dim]Sample test { sample_uuid } : "
451+ f"{ gcda_count } .gcda files in { sample_build } [/dim]" )
452+ else :
453+ cons .print (f"[yellow]Sample test { sample_uuid } : "
454+ f"no build/ dir in { sample_gcda } [/yellow]" )
455+
435456 # Phase 2: Collect gcov coverage from each test's isolated .gcda directory.
436- # Each test is processed in its own temp dir (copied .gcda + .gcno files)
437- # with a single batched gcov call, so tests can run in parallel.
457+ # .gcno files are temporarily copied alongside .gcda files, then removed.
438458 cons .print ()
439459 cons .print ("[bold]Phase 2/2: Collecting coverage...[/bold]" )
440460 cache : dict = {}
0 commit comments