From 0f0f3d3f0d2aa08c23814aca6ea51fad5e227389 Mon Sep 17 00:00:00 2001
From: Ralf Anton Beier <ralf_beier@me.com>
Date: Fri, 5 Jun 2026 06:24:53 +0200
Subject: [PATCH] =?UTF-8?q?fix(encoder):=20bounds-check=20LDR/STR=20imm12?=
 =?UTF-8?q?=20offset=20=E2=80=94=20close=20the=20wrong-address=20class=20(?=
 =?UTF-8?q?#259)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

gale #259: the eight thumb32 load/store immediate-offset encoders
(encode_thumb32_ldr/str/ldrb_imm/ldrsb_imm/ldrh_imm/ldrsh_imm/strb_imm/strh_imm)
masked `offset & 0xFFF` and emitted unconditionally — for offset >= 0x1000 the
access silently targeted the wrong address (offset & 0xFFF). Same class as #253
(ADD/SUB) and #255 (CMP/ADDS/SUBS).

Fix: a shared `check_ldst_imm12` guard — error for offset > 0xFFF (the imm12
range is 0..=4095), forcing the selector to use register-offset addressing,
rather than silently wrap. Closes the load/store wrong-address class.

Liveness: probing `i32.load offset=5000` compiles successfully (no Err) — the
selector already materializes large offsets and never feeds the imm12 encoders
>= 0x1000, so this is DEFENSIVE hardening (matching gale's note), not a live
breakage.

GATE: clippy clean; backend suite passes; the three frozen fixtures are
BYTE-IDENTICAL between this change and main (control_step / flight_seam_flat /
div_const .o compared with `cmp` — the guard only adds an error path for
offset > 0xFFF, unreachable by the fixtures). Test
ldst_imm12_offset_errors_when_out_of_range.

NOTE: the /tmp/armv differential venv lost its `wasmtime` module (tooling
regression, unrelated to this change) — verified via byte-comparison instead,
which is strictly stronger than the result-level differential here.

Closes #259. Part of #242.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 crates/synth-backend/src/arm_encoder.rs | 60 +++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

diff --git a/crates/synth-backend/src/arm_encoder.rs b/crates/synth-backend/src/arm_encoder.rs
index f0c7d1c..6bb1994 100644
--- a/crates/synth-backend/src/arm_encoder.rs
+++ b/crates/synth-backend/src/arm_encoder.rs
@@ -6258,6 +6258,7 @@ impl ArmEncoder {
         let base_bits = reg_to_bits(base);
 
         // LDR.W Rd, [Rn, #imm12]
+        check_ldst_imm12(offset)?;
         let hw1: u16 = (0xF8D0 | base_bits) as u16;
         let hw2: u16 = ((rd_bits << 12) | (offset & 0xFFF)) as u16;
 
@@ -6272,6 +6273,7 @@ impl ArmEncoder {
         let base_bits = reg_to_bits(base);
 
         // STR.W Rd, [Rn, #imm12]
+        check_ldst_imm12(offset)?;
         let hw1: u16 = (0xF8C0 | base_bits) as u16;
         let hw2: u16 = ((rd_bits << 12) | (offset & 0xFFF)) as u16;
 
@@ -6321,6 +6323,7 @@ impl ArmEncoder {
         let rd_bits = reg_to_bits(rd);
         let base_bits = reg_to_bits(base);
         // LDRB.W Rd, [Rn, #imm12]: 1111 1000 1001 Rn | Rt imm12
+        check_ldst_imm12(offset)?;
         let hw1: u16 = (0xF890 | base_bits) as u16;
         let hw2: u16 = ((rd_bits << 12) | (offset & 0xFFF)) as u16;
         let mut bytes = hw1.to_le_bytes().to_vec();
@@ -6346,6 +6349,7 @@ impl ArmEncoder {
         let rd_bits = reg_to_bits(rd);
         let base_bits = reg_to_bits(base);
         // LDRSB.W Rd, [Rn, #imm12]: 1111 1001 1001 Rn | Rt imm12
+        check_ldst_imm12(offset)?;
         let hw1: u16 = (0xF990 | base_bits) as u16;
         let hw2: u16 = ((rd_bits << 12) | (offset & 0xFFF)) as u16;
         let mut bytes = hw1.to_le_bytes().to_vec();
@@ -6371,6 +6375,7 @@ impl ArmEncoder {
         let rd_bits = reg_to_bits(rd);
         let base_bits = reg_to_bits(base);
         // LDRH.W Rd, [Rn, #imm12]: 1111 1000 1011 Rn | Rt imm12
+        check_ldst_imm12(offset)?;
         let hw1: u16 = (0xF8B0 | base_bits) as u16;
         let hw2: u16 = ((rd_bits << 12) | (offset & 0xFFF)) as u16;
         let mut bytes = hw1.to_le_bytes().to_vec();
@@ -6396,6 +6401,7 @@ impl ArmEncoder {
         let rd_bits = reg_to_bits(rd);
         let base_bits = reg_to_bits(base);
         // LDRSH.W Rd, [Rn, #imm12]: 1111 1001 1011 Rn | Rt imm12
+        check_ldst_imm12(offset)?;
         let hw1: u16 = (0xF9B0 | base_bits) as u16;
         let hw2: u16 = ((rd_bits << 12) | (offset & 0xFFF)) as u16;
         let mut bytes = hw1.to_le_bytes().to_vec();
@@ -6421,6 +6427,7 @@ impl ArmEncoder {
         let rd_bits = reg_to_bits(rd);
         let base_bits = reg_to_bits(base);
         // STRB.W Rd, [Rn, #imm12]: 1111 1000 1000 Rn | Rt imm12
+        check_ldst_imm12(offset)?;
         let hw1: u16 = (0xF880 | base_bits) as u16;
         let hw2: u16 = ((rd_bits << 12) | (offset & 0xFFF)) as u16;
         let mut bytes = hw1.to_le_bytes().to_vec();
@@ -6446,6 +6453,7 @@ impl ArmEncoder {
         let rd_bits = reg_to_bits(rd);
         let base_bits = reg_to_bits(base);
         // STRH.W Rd, [Rn, #imm12]: 1111 1000 1010 Rn | Rt imm12
+        check_ldst_imm12(offset)?;
         let hw1: u16 = (0xF8A0 | base_bits) as u16;
         let hw2: u16 = ((rd_bits << 12) | (offset & 0xFFF)) as u16;
         let mut bytes = hw1.to_le_bytes().to_vec();
@@ -6695,6 +6703,21 @@ fn try_thumb_expand_imm(value: u32) -> Option<u32> {
     None
 }
 
+/// Guard a Thumb-2 `LDR/STR Rd, [Rn, #imm12]` offset. The imm12 form supports
+/// `0..=4095`; a larger offset must be materialized into a register by the
+/// selector (register-offset addressing). Returning `Err` rather than silently
+/// masking `offset & 0xFFF` closes the wrong-address miscompile class (#259,
+/// the load/store sibling of #253/#255).
+fn check_ldst_imm12(offset: u32) -> Result<()> {
+    if offset > 0xFFF {
+        Err(synth_core::Error::synthesis(
+            "load/store immediate offset > 0xFFF (4095) — materialize the offset into a register",
+        ))
+    } else {
+        Ok(())
+    }
+}
+
 fn reg_to_bits(reg: &Reg) -> u32 {
     match reg {
         Reg::R0 => 0,
@@ -9155,6 +9178,43 @@ mod tests {
         );
     }
 
+    /// #259: LDR/STR (and sub-word) immediate-offset encoders truncated
+    /// `offset & 0xFFF`, silently targeting the wrong address for offset >= 4096.
+    /// They now error (the selector must use register-offset addressing) — the
+    /// load/store sibling of the #253/#255 class. Offsets <= 4095 still encode.
+    #[test]
+    fn ldst_imm12_offset_errors_when_out_of_range() {
+        let encoder = ArmEncoder::new_thumb2();
+        // offset 0xFFF (4095): valid → Ok; ldr r0, [r1, #4095].
+        assert!(
+            encoder
+                .encode_thumb32_ldr(&Reg::R0, &Reg::R1, 0xFFF)
+                .is_ok()
+        );
+        // offset 0x1000 (4096): out of imm12 range → Err (not & 0xFFF → #0).
+        assert!(
+            encoder
+                .encode_thumb32_ldr(&Reg::R0, &Reg::R1, 0x1000)
+                .is_err(),
+            "ldr offset 4096 must error, not wrap to 0"
+        );
+        assert!(
+            encoder
+                .encode_thumb32_str(&Reg::R0, &Reg::R1, 0x1000)
+                .is_err()
+        );
+        assert!(
+            encoder
+                .encode_thumb32_ldrb_imm(&Reg::R0, &Reg::R1, 5000)
+                .is_err()
+        );
+        assert!(
+            encoder
+                .encode_thumb32_strh_imm(&Reg::R0, &Reg::R1, 5000)
+                .is_err()
+        );
+    }
+
     /// Latent miscompile fix: ADD/SUB with a >0xFF immediate (e.g.
     /// `add sp, sp, #frame` for a >=256-byte frame) used ADD.W (T3), whose
     /// `i:imm3:imm8` is a ThumbExpandImm modified immediate — so `#256` silently