From 449c1c3da84224cf0e5f0772d347ca1c72d51ed5 Mon Sep 17 00:00:00 2001 From: Ralf Anton Beier Date: Thu, 4 Jun 2026 21:39:18 +0200 Subject: [PATCH] =?UTF-8?q?test(synthesis):=20pin=20the=20real=20codegen?= =?UTF-8?q?=20waste=20=E2=80=94=20redundant=20const,=20not=20dead=20stores?= =?UTF-8?q?=20(VCR-RA-001)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Evidence-driven: measured on REAL selector output, (p & 0x7e) + (p & 0x7e) lowers to `movw r1,#126; and r2,r0,r1; movw r3,#126; and r4,r0,r3; ...` — the selector RE-MATERIALIZES 0x7e into a fresh register while the first copy is still live. So on the shape gale measures (flat_flight's repeated clamps), the dominant waste is REDUNDANT MATERIALIZATION (const-CSE territory), NOT dead stores: analyze_function reports dead_defs=0, redundant_consts=1. This redirects the transform priority — the dead-store pass (#246) is correct but a no-op on this waste; the delta-producing transform is const-CSE (drop the redundant movw, rewrite the consumer to the resident reg). And 0x7e is a valid Thumb-2 AND immediate, so the materialization is itself avoidable via immediate folding — an even larger latent win. The test pins current suboptimal codegen and is EXPECTED TO FLIP when const-CSE / immediate folding lands (redundant → 0) — the flip is the signal the optimization works. Measure-before-transform, per the methodology. Part of #242. Co-Authored-By: Claude Opus 4.8 --- .../src/instruction_selector.rs | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/crates/synth-synthesis/src/instruction_selector.rs b/crates/synth-synthesis/src/instruction_selector.rs index 6fe1bc8..ff50dc6 100644 --- a/crates/synth-synthesis/src/instruction_selector.rs +++ b/crates/synth-synthesis/src/instruction_selector.rs @@ -14381,6 +14381,53 @@ mod tests { assert_eq!(out.len(), instrs.len() - removed); } + /// VCR-RA-001 evidence: which transform actually targets the real waste? + /// + /// Measured on real selector output, `(p & 0x7e) + (p & 0x7e)` lowers to: + /// movw r1,#126 ; and r2,r0,r1 ; movw r3,#126 ; and r4,r0,r3 ; add ... + /// i.e. the selector RE-MATERIALIZES `0x7e` into a fresh register while the + /// first copy is still live. So the dominant waste is **redundant + /// materialization** (const-CSE territory), NOT dead stores — dead-store + /// elimination is a no-op here. (And `0x7e` is a valid Thumb-2 `AND` + /// immediate, so the materialization is itself avoidable via immediate + /// folding — an even larger latent win.) + /// + /// This test documents the current suboptimal codegen and pins the + /// transform priority. It is EXPECTED TO FLIP when const-CSE / immediate + /// folding lands (redundant count → 0) — at which point update it to assert + /// the improved shape; the flip is the signal the optimization works. + #[test] + fn vcr_redundant_const_is_the_real_waste_not_dead_stores() { + use crate::liveness; + let mut selector = fresh_selector(); + let ops = vec![ + WasmOp::LocalGet(0), + WasmOp::I32Const(0x7e), + WasmOp::I32And, + WasmOp::LocalGet(0), + WasmOp::I32Const(0x7e), + WasmOp::I32And, + WasmOp::I32Add, + WasmOp::End, + ]; + let instrs = selector.select_with_stack(&ops, 1).unwrap(); + let report = liveness::analyze_function(&instrs); + + // Dead-store elimination finds nothing here — DCE is not the lever. + assert_eq!( + report.dead_defs.len(), + 0, + "no dead stores in this pattern: {instrs:#?}" + ); + // const-CSE finds the real waste: the second `0x7e` is redundant. + assert_eq!( + report.redundant_consts.len(), + 1, + "expected one redundant 0x7e materialization: {instrs:#?}" + ); + assert_eq!(report.redundant_consts[0].value, 0x7e); + } + #[test] fn test_select_with_stack_i32_local_uses_str_ldr() { // An i32 non-param local should produce Str/Ldr to the SP-based slot.