Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 88 additions & 36 deletions crates/compiler-core/src/bytecode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,12 @@ use crate::{
};
use alloc::{borrow::ToOwned, boxed::Box, collections::BTreeSet, fmt, string::String, vec::Vec};
use bitflags::bitflags;
use core::{cell::UnsafeCell, hash, mem, ops::Deref};
use core::{
cell::UnsafeCell,
hash, mem,
ops::Deref,
sync::atomic::{AtomicU8, AtomicU16, Ordering},
};
use itertools::Itertools;
use malachite_bigint::BigInt;
use num_complex::Complex64;
Expand Down Expand Up @@ -367,8 +372,13 @@ impl TryFrom<&[u8]> for CodeUnit {

pub struct CodeUnits(UnsafeCell<Box<[CodeUnit]>>);

// SAFETY: All mutation of the inner buffer is serialized by `monitoring_data: PyMutex`
// in `PyCode`. The `UnsafeCell` is required because `replace_op` mutates through `&self`.
// SAFETY: All cache operations use atomic read/write instructions.
// - replace_op / compare_exchange_op: AtomicU8 store/CAS (Release)
// - cache read/write: AtomicU16 load/store (Relaxed)
// - adaptive counter: AtomicU8 load/store (Relaxed)
// Ordering is established by:
// - replace_op (Release) ↔ dispatch loop read_op (Acquire) for cache data visibility
// - tp_version_tag (Acquire) for descriptor pointer validity
unsafe impl Sync for CodeUnits {}

impl Clone for CodeUnits {
Expand Down Expand Up @@ -435,45 +445,81 @@ impl Deref for CodeUnits {

impl CodeUnits {
/// Replace the opcode at `index` in-place without changing the arg byte.
/// Uses atomic Release store to ensure prior cache writes are visible
/// to threads that subsequently read the new opcode with Acquire.
///
/// # Safety
/// - `index` must be in bounds.
/// - `new_op` must have the same arg semantics as the original opcode.
/// - The caller must ensure exclusive access to the instruction buffer
/// (no concurrent reads or writes to the same `CodeUnits`).
pub unsafe fn replace_op(&self, index: usize, new_op: Instruction) {
unsafe {
let units = &mut *self.0.get();
let unit_ptr = units.as_mut_ptr().add(index);
// Write only the opcode byte (first byte of CodeUnit due to #[repr(C)])
let op_ptr = unit_ptr as *mut u8;
core::ptr::write(op_ptr, new_op.into());
}
let units = unsafe { &*self.0.get() };
let ptr = units.as_ptr().wrapping_add(index) as *const AtomicU8;
unsafe { &*ptr }.store(new_op.into(), Ordering::Release);
}

/// Atomically replace opcode only if it still matches `expected`.
/// Returns true on success. Uses Release ordering on success.
///
/// # Safety
/// - `index` must be in bounds.
pub unsafe fn compare_exchange_op(
&self,
index: usize,
expected: Instruction,
new_op: Instruction,
) -> bool {
let units = unsafe { &*self.0.get() };
let ptr = units.as_ptr().wrapping_add(index) as *const AtomicU8;
unsafe { &*ptr }
.compare_exchange(
expected.into(),
new_op.into(),
Ordering::Release,
Ordering::Relaxed,
)
.is_ok()
}

/// Atomically read the opcode at `index` with Acquire ordering.
/// Pairs with `replace_op` (Release) to ensure cache data visibility.
pub fn read_op(&self, index: usize) -> Instruction {
let units = unsafe { &*self.0.get() };
let ptr = units.as_ptr().wrapping_add(index) as *const AtomicU8;
let byte = unsafe { &*ptr }.load(Ordering::Acquire);
// SAFETY: Only valid Instruction values are stored via replace_op/compare_exchange_op.
unsafe { mem::transmute::<u8, Instruction>(byte) }
}

/// Atomically read the arg byte at `index` with Relaxed ordering.
pub fn read_arg(&self, index: usize) -> OpArgByte {
let units = unsafe { &*self.0.get() };
let ptr = units.as_ptr().wrapping_add(index) as *const u8;
let arg_ptr = unsafe { ptr.add(1) } as *const AtomicU8;
OpArgByte::from(unsafe { &*arg_ptr }.load(Ordering::Relaxed))
}
Comment on lines +485 to 499
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

🏁 Script executed:

cd crates/compiler-core && wc -l src/bytecode.rs

Repository: RustPython/RustPython

Length of output: 85


🏁 Script executed:

cd crates/compiler-core && sed -n '480,510p' src/bytecode.rs

Repository: RustPython/RustPython

Length of output: 1585


🏁 Script executed:

cd crates/compiler-core && sed -n '564,580p' src/bytecode.rs

Repository: RustPython/RustPython

Length of output: 712


🏁 Script executed:

cd crates/compiler-core && sed -n '440,460p' src/bytecode.rs

Repository: RustPython/RustPython

Length of output: 956


🏁 Script executed:

cd crates/compiler-core && sed -n '1,30p' src/bytecode.rs

Repository: RustPython/RustPython

Length of output: 1183


🏁 Script executed:

cd crates/compiler-core && rg -A 3 -B 3 "read_op\(" src/bytecode.rs | head -40

Repository: RustPython/RustPython

Length of output: 985


🏁 Script executed:

cd crates/compiler-core && rg "read_arg\(" src/bytecode.rs | head -20

Repository: RustPython/RustPython

Length of output: 160


🏁 Script executed:

cd crates/compiler-core && rg "read_adaptive_counter\(" src/bytecode.rs | head -20

Repository: RustPython/RustPython

Length of output: 126


🏁 Script executed:

cd crates/compiler-core && sed -n '320,380p' src/bytecode.rs

Repository: RustPython/RustPython

Length of output: 1985


🏁 Script executed:

cd crates/compiler-core && grep -n "struct CodeUnit\|struct CodeUnits\|#\[repr" src/bytecode.rs | head -20

Repository: RustPython/RustPython

Length of output: 268


🏁 Script executed:

cd crates/compiler-core && grep -rn "read_op\|read_arg\|read_adaptive_counter" src/ --include="*.rs" | grep -v "^\s*//" | head -40

Repository: RustPython/RustPython

Length of output: 691


🏁 Script executed:

cd crates/compiler-core && sed -n '630,680p' src/bytecode.rs

Repository: RustPython/RustPython

Length of output: 1808


🏁 Script executed:

cd crates/compiler-core && sed -n '380,440p' src/bytecode.rs

Repository: RustPython/RustPython

Length of output: 1989


🏁 Script executed:

cd crates/compiler-core && sed -n '590,640p' src/bytecode.rs

Repository: RustPython/RustPython

Length of output: 1911


🏁 Script executed:

cd crates/compiler-core && grep -n "pub fn len\|pub fn is_empty\|impl.*CodeUnits" src/bytecode.rs

Repository: RustPython/RustPython

Length of output: 415


🏁 Script executed:

cd crates/compiler-core && sed -n '373,430p' src/bytecode.rs

Repository: RustPython/RustPython

Length of output: 1990


🏁 Script executed:

cd crates/compiler-core && grep -rn "\.read_op\|\.read_arg\|\.read_adaptive_counter" . --include="*.rs" | grep -v "src/bytecode.rs"

Repository: RustPython/RustPython

Length of output: 47


🏁 Script executed:

cd crates/compiler-core && sed -n '446,480p' src/bytecode.rs

Repository: RustPython/RustPython

Length of output: 1406


🏁 Script executed:

rg -r crates/ "\.read_op\(|\.read_arg\(|\.read_adaptive_counter\(" --include="*.rs" | head -50

Repository: RustPython/RustPython

Length of output: 506


🏁 Script executed:

rg "CodeUnits" --include="*.rs" -l | head -20

Repository: RustPython/RustPython

Length of output: 506


🏁 Script executed:

rg "\.read_op\(|\.read_arg\(|\.read_adaptive_counter\(" -t rs

Repository: RustPython/RustPython

Length of output: 91


🏁 Script executed:

rg "CodeUnits" -t rs -l

Repository: RustPython/RustPython

Length of output: 91


🏁 Script executed:

rg "\.read_op\(|\.read_arg\(|\.read_adaptive_counter\("

Repository: RustPython/RustPython

Length of output: 1856


🏁 Script executed:

rg "CodeUnits"

Repository: RustPython/RustPython

Length of output: 1699


🏁 Script executed:

sed -n '1,50p' crates/vm/src/frame.rs

Repository: RustPython/RustPython

Length of output: 1953


🏁 Script executed:

rg -B 5 -A 5 "\.read_op\(" crates/vm/src/frame.rs | head -80

Repository: RustPython/RustPython

Length of output: 2743


🏁 Script executed:

sed -n '700,800p' crates/vm/src/frame.rs | grep -A 10 "fn eval"

Repository: RustPython/RustPython

Length of output: 47


🏁 Script executed:

rg "idx.*read_op|instr_idx.*read_op" crates/vm/src/frame.rs -B 3 -A 3 | head -100

Repository: RustPython/RustPython

Length of output: 47


🏁 Script executed:

sed -n '485,500p' crates/compiler-core/src/bytecode.rs

Repository: RustPython/RustPython

Length of output: 846


🏁 Script executed:

sed -n '569,580p' crates/compiler-core/src/bytecode.rs

Repository: RustPython/RustPython

Length of output: 567


🏁 Script executed:

rg "while.*<.*len\(\)|for.*in.*0.*len" crates/vm/src/frame.rs -B 2 -A 5 | head -60

Repository: RustPython/RustPython

Length of output: 47


🏁 Script executed:

rg "pub fn" crates/compiler-core/src/bytecode.rs | head -20

Repository: RustPython/RustPython

Length of output: 1228


🏁 Script executed:

sed -n '435,445p' crates/compiler-core/src/bytecode.rs

Repository: RustPython/RustPython

Length of output: 419


🏁 Script executed:

sed -n '435,500p' crates/compiler-core/src/bytecode.rs | grep -E "fn len|fn is_empty"

Repository: RustPython/RustPython

Length of output: 47


🏁 Script executed:

rg "impl.*Deref.*CodeUnits" crates/compiler-core/src/bytecode.rs -A 15

Repository: RustPython/RustPython

Length of output: 668


🏁 Script executed:

grep -n "pub fn len\|pub fn is_empty" crates/compiler-core/src/bytecode.rs

Repository: RustPython/RustPython

Length of output: 47


🏁 Script executed:

sed -n '435,450p' crates/compiler-core/src/bytecode.rs

Repository: RustPython/RustPython

Length of output: 668


🏁 Script executed:

rg "\.len\(\)" crates/compiler-core/src/bytecode.rs | head -10

Repository: RustPython/RustPython

Length of output: 489


🏁 Script executed:

sed -n '515,570p' crates/compiler-core/src/bytecode.rs

Repository: RustPython/RustPython

Length of output: 2142


Safe readers allow potential UB on invalid index.

read_op, read_arg, and read_adaptive_counter are public safe functions that dereference raw pointers derived from wrapping_add(index) without bounds validation. Out-of-range input will cause undefined behavior instead of panicking. This contradicts the pattern already established in the same file for read_cache_u16, which includes an assertion.

Add bounds checks to all three functions:

Proposed fix
 pub fn read_op(&self, index: usize) -> Instruction {
     let units = unsafe { &*self.0.get() };
+    assert!(index < units.len(), "read_op: index out of bounds");
     let ptr = units.as_ptr().wrapping_add(index) as *const AtomicU8;
     let byte = unsafe { &*ptr }.load(Ordering::Acquire);
     // SAFETY: Only valid Instruction values are stored via replace_op/compare_exchange_op.
     unsafe { mem::transmute::<u8, Instruction>(byte) }
 }

 pub fn read_arg(&self, index: usize) -> OpArgByte {
     let units = unsafe { &*self.0.get() };
+    assert!(index < units.len(), "read_arg: index out of bounds");
     let ptr = units.as_ptr().wrapping_add(index) as *const u8;
     let arg_ptr = unsafe { ptr.add(1) } as *const AtomicU8;
     OpArgByte::from(unsafe { &*arg_ptr }.load(Ordering::Relaxed))
 }

 pub fn read_adaptive_counter(&self, index: usize) -> u8 {
     let units = unsafe { &*self.0.get() };
+    assert!(index < units.len(), "read_adaptive_counter: index out of bounds");
     let ptr = units.as_ptr().wrapping_add(index) as *const u8;
     let arg_ptr = unsafe { ptr.add(1) } as *const AtomicU8;
     unsafe { &*arg_ptr }.load(Ordering::Relaxed)
 }

Per coding guidelines: "Follow Rust best practices for error handling and memory management".

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@crates/compiler-core/src/bytecode.rs` around lines 485 - 499, The public safe
readers (read_op, read_arg, read_adaptive_counter) currently compute raw
pointers with wrapping_add(index) and dereference without bounds checks, which
can cause UB on out-of-range indices; update each function to load units first
(as in read_cache_u16) and assert the index (and any additional byte offsets,
e.g., index+1 for arg) is within units.len() before computing pointers and
dereferencing so out-of-bounds inputs panic instead of producing UB, keeping the
existing pointer/dereference logic otherwise.


/// Write a u16 value into a CACHE code unit at `index`.
/// Each CodeUnit is 2 bytes (#[repr(C)]: op u8 + arg u8), so one u16 fits exactly.
/// Uses Relaxed atomic store; ordering is provided by replace_op (Release).
///
/// # Safety
/// - `index` must be in bounds and point to a CACHE entry.
/// - The caller must ensure no concurrent reads/writes to the same slot.
pub unsafe fn write_cache_u16(&self, index: usize, value: u16) {
unsafe {
let units = &mut *self.0.get();
let ptr = units.as_mut_ptr().add(index) as *mut u8;
core::ptr::write_unaligned(ptr as *mut u16, value);
}
let units = unsafe { &*self.0.get() };
let ptr = units.as_ptr().wrapping_add(index) as *const AtomicU16;
unsafe { &*ptr }.store(value, Ordering::Relaxed);
Comment on lines +509 to +510
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

🏁 Script executed:

# First, locate and read the relevant section of the bytecode file
fd bytecode.rs | head -1

Repository: RustPython/RustPython

Length of output: 101


🏁 Script executed:

# Read the file to check the CodeUnit struct and the atomic operations around the mentioned lines
cat -n crates/compiler-core/src/bytecode.rs | sed -n '1,50p'

Repository: RustPython/RustPython

Length of output: 2067


🏁 Script executed:

# Get the CodeUnit struct definition and surrounding code
cat -n crates/compiler-core/src/bytecode.rs | sed -n '30,100p'

Repository: RustPython/RustPython

Length of output: 2781


🏁 Script executed:

# Check lines 509-525 to see the actual code
cat -n crates/compiler-core/src/bytecode.rs | sed -n '500,530p'

Repository: RustPython/RustPython

Length of output: 1618


🏁 Script executed:

# Check the size and alignment assertions
cat -n crates/compiler-core/src/bytecode.rs | grep -A 3 "const _:"

Repository: RustPython/RustPython

Length of output: 275


🏁 Script executed:

# Find the CodeUnit struct definition
cat -n crates/compiler-core/src/bytecode.rs | sed -n '340,360p'

Repository: RustPython/RustPython

Length of output: 738


🏁 Script executed:

# Search for the CodeUnit struct definition more broadly
rg "struct CodeUnit" -A 5 crates/compiler-core/src/bytecode.rs

Repository: RustPython/RustPython

Length of output: 492


🏁 Script executed:

# Check if there's any alignment attribute or repr clause on CodeUnit
rg "CodeUnit" crates/compiler-core/src/bytecode.rs | head -20

Repository: RustPython/RustPython

Length of output: 857


🏁 Script executed:

# Check what Instruction and OpArgByte are
rg "type Instruction|struct Instruction|enum Instruction" crates/compiler-core/src/bytecode.rs

Repository: RustPython/RustPython

Length of output: 47


🏁 Script executed:

# Check OpArgByte definition
rg "type OpArgByte|struct OpArgByte|enum OpArgByte" crates/compiler-core/src/bytecode.rs

Repository: RustPython/RustPython

Length of output: 47


🏁 Script executed:

# Look at the instruction module for Instruction definition
cat -n crates/compiler-core/src/bytecode/instruction.rs | head -50

Repository: RustPython/RustPython

Length of output: 1871


🏁 Script executed:

# Look at the oparg module for OpArgByte
cat -n crates/compiler-core/src/bytecode/oparg.rs | head -30

Repository: RustPython/RustPython

Length of output: 857


🏁 Script executed:

# Check what the alignment of AtomicU16 is and the current alignment of CodeUnit
cat > /tmp/check_alignment.rs << 'EOF'
use core::{mem, sync::atomic::AtomicU16};

#[repr(C)]
#[derive(Copy, Clone, Debug)]
pub struct CodeUnit {
    pub op: u8,
    pub arg: u8,
}

fn main() {
    println!("CodeUnit size: {}", mem::size_of::<CodeUnit>());
    println!("CodeUnit align: {}", mem::align_of::<CodeUnit>());
    println!("AtomicU16 size: {}", mem::size_of::<AtomicU16>());
    println!("AtomicU16 align: {}", mem::align_of::<AtomicU16>());
    println!("u16 align: {}", mem::align_of::<u16>());
}
EOF
rustc /tmp/check_alignment.rs -o /tmp/check_alignment && /tmp/check_alignment

Repository: RustPython/RustPython

Length of output: 111


🏁 Script executed:

# Verify the actual lines 509-510 and 521-523 in context
cat -n crates/compiler-core/src/bytecode.rs | sed -n '507,524p'

Repository: RustPython/RustPython

Length of output: 961


🏁 Script executed:

# Check if there are any other alignment-related comments or safety notes
rg "align|Alignment" crates/compiler-core/src/bytecode.rs -B 2 -A 2

Repository: RustPython/RustPython

Length of output: 47


🏁 Script executed:

# Verify the actual Instruction and OpArgByte alignment requirements by checking their definitions
rg "pub struct Instruction|pub enum Instruction" crates/compiler-core/src/bytecode/instruction.rs -A 1

Repository: RustPython/RustPython

Length of output: 147


🏁 Script executed:

# Look for any repr attributes on Instruction
cat -n crates/compiler-core/src/bytecode/instruction.rs | sed -n '16,30p'

Repository: RustPython/RustPython

Length of output: 636


🏁 Script executed:

# Check if there are any tests or documentation about alignment
rg "align|unsafe.*AtomicU16" crates/compiler-core/src/ -B 3 -A 3

Repository: RustPython/RustPython

Length of output: 1165


🏁 Script executed:

# Check what the actual byte layout is for CodeUnit
python3 << 'EOF'
# Instruction is #[repr(u8)] - 1 byte
# OpArgByte is #[repr(transparent)] wrapping u8 - 1 byte
# So CodeUnit = 1 + 1 = 2 bytes, but with #[repr(C)], alignment is 1 byte

print("With #[repr(C)] and two u8 fields:")
print("  Size: 2 bytes")
print("  Alignment: 1 byte (minimum of field alignments)")
print()
print("AtomicU16 requires:")
print("  Size: 2 bytes")
print("  Alignment: 2 bytes (natural alignment for atomic operations)")
print()
print("Issue: Casting 1-byte aligned pointer to 2-byte aligned is UB")
EOF

Repository: RustPython/RustPython

Length of output: 328


🏁 Script executed:

# Check if there are any other unsafe pointer casts in the file that might have similar issues
rg "as \*const AtomicU" crates/compiler-core/src/bytecode.rs

Repository: RustPython/RustPython

Length of output: 623


🏁 Script executed:

# Check the SAFETY comment around these operations
cat -n crates/compiler-core/src/bytecode.rs | sed -n '495,535p'

Repository: RustPython/RustPython

Length of output: 2081


Add explicit 2-byte alignment guarantee to CodeUnit struct.

The casts to *const AtomicU16 at lines 509 and 521 require 2-byte alignment, but CodeUnit with #[repr(C)] and two u8 fields has only 1-byte alignment. This violates atomic type requirements and is undefined behavior on any target. The current size assertion only checks that CodeUnit is 2 bytes, not that it's 2-byte aligned.

Add align(2) to the repr attribute and assert alignment:

Proposed fix
-#[repr(C)]
+#[repr(C, align(2))]
 #[derive(Copy, Clone, Debug)]
 pub struct CodeUnit {
     pub op: Instruction,
     pub arg: OpArgByte,
 }

-const _: () = assert!(mem::size_of::<CodeUnit>() == 2);
+const _: () = {
+    assert!(mem::size_of::<CodeUnit>() == 2);
+    assert!(mem::align_of::<CodeUnit>() >= mem::align_of::<u16>());
+};
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
let ptr = units.as_ptr().wrapping_add(index) as *const AtomicU16;
unsafe { &*ptr }.store(value, Ordering::Relaxed);
#[repr(C, align(2))]
#[derive(Copy, Clone, Debug)]
pub struct CodeUnit {
pub op: Instruction,
pub arg: OpArgByte,
}
const _: () = {
assert!(mem::size_of::<CodeUnit>() == 2);
assert!(mem::align_of::<CodeUnit>() >= mem::align_of::<u16>());
};
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@crates/compiler-core/src/bytecode.rs` around lines 509 - 510, The code casts
CodeUnit pointers to *const AtomicU16 (see usages like
units.as_ptr().wrapping_add(index) as *const AtomicU16 and the subsequent .store
calls), but CodeUnit is currently #[repr(C)] with two u8 fields and only 1-byte
alignment, which violates atomic alignment requirements; fix by changing the
struct declaration to #[repr(C, align(2))] for CodeUnit and add a compile-time
assertion to ensure alignment (e.g. assert_eq!(std::mem::align_of::<CodeUnit>(),
2) or a const_assert) so the AtomicU16 casts are safe.

}

/// Read a u16 value from a CACHE code unit at `index`.
/// Uses Relaxed atomic load; ordering is provided by read_op (Acquire).
///
/// # Panics
/// Panics if `index` is out of bounds.
pub fn read_cache_u16(&self, index: usize) -> u16 {
let units = unsafe { &*self.0.get() };
assert!(index < units.len(), "read_cache_u16: index out of bounds");
let ptr = units.as_ptr().wrapping_add(index) as *const u8;
unsafe { core::ptr::read_unaligned(ptr as *const u16) }
let ptr = units.as_ptr().wrapping_add(index) as *const AtomicU16;
unsafe { &*ptr }.load(Ordering::Relaxed)
}

/// Write a u32 value across two consecutive CACHE code units starting at `index`.
Expand Down Expand Up @@ -518,36 +564,40 @@ impl CodeUnits {
lo | (hi << 32)
}

/// Read the adaptive counter from the first CACHE entry's `arg` byte.
/// This preserves `op = Instruction::Cache`, unlike `read_cache_u16`.
/// Read the adaptive counter from the CACHE entry's `arg` byte at `index`.
/// Uses Relaxed atomic load.
pub fn read_adaptive_counter(&self, index: usize) -> u8 {
let units = unsafe { &*self.0.get() };
u8::from(units[index].arg)
let ptr = units.as_ptr().wrapping_add(index) as *const u8;
let arg_ptr = unsafe { ptr.add(1) } as *const AtomicU8;
unsafe { &*arg_ptr }.load(Ordering::Relaxed)
}

/// Write the adaptive counter to the first CACHE entry's `arg` byte.
/// This preserves `op = Instruction::Cache`, unlike `write_cache_u16`.
/// Write the adaptive counter to the CACHE entry's `arg` byte at `index`.
/// Uses Relaxed atomic store.
///
/// # Safety
/// - `index` must be in bounds and point to a CACHE entry.
pub unsafe fn write_adaptive_counter(&self, index: usize, value: u8) {
let units = unsafe { &mut *self.0.get() };
units[index].arg = OpArgByte::from(value);
let units = unsafe { &*self.0.get() };
let ptr = units.as_ptr().wrapping_add(index) as *const u8;
let arg_ptr = unsafe { ptr.add(1) } as *const AtomicU8;
unsafe { &*arg_ptr }.store(value, Ordering::Relaxed);
}

/// Produce a clean copy of the bytecode suitable for serialization
/// (marshal) and `co_code`. Specialized opcodes are mapped back to their
/// base variants via `deoptimize()` and all CACHE entries are zeroed.
pub fn original_bytes(&self) -> Vec<u8> {
let units = unsafe { &*self.0.get() };
let mut out = Vec::with_capacity(units.len() * 2);
let len = units.len();
let len = self.len();
let mut out = Vec::with_capacity(len * 2);
let mut i = 0;
while i < len {
let op = units[i].op.deoptimize();
let op = self.read_op(i).deoptimize();
let arg = self.read_arg(i);
let caches = op.cache_entries();
out.push(u8::from(op));
out.push(u8::from(units[i].arg));
out.push(u8::from(arg));
// Zero-fill all CACHE entries (counter + cached data)
for _ in 0..caches {
i += 1;
Expand All @@ -562,20 +612,22 @@ impl CodeUnits {
/// Initialize adaptive warmup counters for all cacheable instructions.
/// Called lazily at RESUME (first execution of a code object).
/// Uses the `arg` byte of the first CACHE entry, preserving `op = Instruction::Cache`.
/// All writes are atomic (Relaxed) to avoid data races with concurrent readers.
pub fn quicken(&self) {
let units = unsafe { &mut *self.0.get() };
let len = units.len();
let len = self.len();
let mut i = 0;
while i < len {
let op = units[i].op;
let op = self.read_op(i);
let caches = op.cache_entries();
if caches > 0 {
// Don't write adaptive counter for instrumented opcodes;
// specialization is skipped while monitoring is active.
if !op.is_instrumented() {
let cache_base = i + 1;
if cache_base < len {
units[cache_base].arg = OpArgByte::from(ADAPTIVE_WARMUP_VALUE);
unsafe {
self.write_adaptive_counter(cache_base, ADAPTIVE_WARMUP_VALUE);
}
}
}
i += 1 + caches;
Expand Down
4 changes: 2 additions & 2 deletions crates/vm/src/builtins/frame.rs
Original file line number Diff line number Diff line change
Expand Up @@ -182,8 +182,8 @@ pub(crate) mod stack_analysis {
}
oparg = (oparg << 8) | u32::from(u8::from(instructions[i].arg));

// De-instrument: get the underlying real instruction
let opcode = opcode.to_base().unwrap_or(opcode);
// De-instrument and de-specialize: get the underlying base instruction
let opcode = opcode.to_base().unwrap_or(opcode).deoptimize();

let caches = opcode.cache_entries();
let next_i = i + 1 + caches;
Expand Down
16 changes: 9 additions & 7 deletions crates/vm/src/builtins/function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@ use super::{
PyAsyncGen, PyCode, PyCoroutine, PyDictRef, PyGenerator, PyModule, PyStr, PyStrRef, PyTuple,
PyTupleRef, PyType,
};
#[cfg(feature = "jit")]
use crate::common::lock::OnceCell;
use crate::common::lock::PyMutex;
use crate::function::ArgMapping;
use crate::object::{PyAtomicRef, Traverse, TraverseFn};
Expand Down Expand Up @@ -75,7 +73,7 @@ pub struct PyFunction {
doc: PyMutex<PyObjectRef>,
func_version: AtomicU32,
#[cfg(feature = "jit")]
jitted_code: OnceCell<CompiledCode>,
jitted_code: PyMutex<Option<CompiledCode>>,
}

static FUNC_VERSION_COUNTER: AtomicU32 = AtomicU32::new(1);
Expand Down Expand Up @@ -214,7 +212,7 @@ impl PyFunction {
doc: PyMutex::new(doc),
func_version: AtomicU32::new(next_func_version()),
#[cfg(feature = "jit")]
jitted_code: OnceCell::new(),
jitted_code: PyMutex::new(None),
};
Ok(func)
}
Expand Down Expand Up @@ -538,7 +536,7 @@ impl Py<PyFunction> {
vm: &VirtualMachine,
) -> PyResult {
#[cfg(feature = "jit")]
if let Some(jitted_code) = self.jitted_code.get() {
if let Some(jitted_code) = self.jitted_code.lock().as_ref() {
use crate::convert::ToPyObject;
match jit::get_jit_args(self, &func_args, jitted_code, vm) {
Ok(args) => {
Expand Down Expand Up @@ -712,6 +710,10 @@ impl PyFunction {
#[pygetset(setter)]
fn set___code__(&self, code: PyRef<PyCode>, vm: &VirtualMachine) {
self.code.swap_to_temporary_refs(code, vm);
#[cfg(feature = "jit")]
{
*self.jitted_code.lock() = None;
}
Comment on lines 711 to +716
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Make __code__ update atomic with JIT cache invalidation.

self.code is swapped before jitted_code is cleared. A concurrent call can run stale compiled code against the new __code__ in that window.

🔧 Suggested fix
     #[pygetset(setter)]
     fn set___code__(&self, code: PyRef<PyCode>, vm: &VirtualMachine) {
-        self.code.swap_to_temporary_refs(code, vm);
         #[cfg(feature = "jit")]
         {
-            *self.jitted_code.lock() = None;
+            let mut jitted = self.jitted_code.lock();
+            *jitted = None;
+            self.code.swap_to_temporary_refs(code, vm);
+        }
+        #[cfg(not(feature = "jit"))]
+        {
+            self.code.swap_to_temporary_refs(code, vm);
         }
         self.func_version.store(0, Relaxed);
     }
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@crates/vm/src/builtins/function.rs` around lines 711 - 716, The update to
__code__ must be atomic with JIT-cache invalidation to avoid running stale
jitted code; modify set___code__ so it acquires the jitted_code mutex and clears
the cached entry before performing self.code.swap_to_temporary_refs, or hold the
same jitted_code lock while doing both operations so no other thread can observe
cleared/uncleared state in between. Specifically, in set___code__, lock
jitted_code (the Mutex used at *self.jitted_code.lock()), set it to None, then
call self.code.swap_to_temporary_refs(code, vm) while still holding the lock,
and finally release the lock so the swap and cache invalidation occur
atomically.

self.func_version.store(0, Relaxed);
}

Expand Down Expand Up @@ -948,15 +950,15 @@ impl PyFunction {
#[cfg(feature = "jit")]
#[pymethod]
fn __jit__(zelf: PyRef<Self>, vm: &VirtualMachine) -> PyResult<()> {
if zelf.jitted_code.get().is_some() {
if zelf.jitted_code.lock().is_some() {
return Ok(());
}
let arg_types = jit::get_jit_arg_types(&zelf, vm)?;
let ret_type = jit::jit_ret_type(&zelf, vm)?;
let code: &Py<PyCode> = &zelf.code;
let compiled = rustpython_jit::compile(&code.code, &arg_types, ret_type)
.map_err(|err| jit::new_jit_error(err.to_string(), vm))?;
let _ = zelf.jitted_code.set(compiled);
*zelf.jitted_code.lock() = Some(compiled);
Ok(())
Comment on lines +953 to 962
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Revalidate code identity before publishing compiled JIT code.

__jit__ does check -> compile -> store with no consistency check. If __code__ changes during compile, stale machine code can be cached and reused.

🔧 Suggested fix
     #[cfg(feature = "jit")]
     #[pymethod]
     fn __jit__(zelf: PyRef<Self>, vm: &VirtualMachine) -> PyResult<()> {
         if zelf.jitted_code.lock().is_some() {
             return Ok(());
         }
+        let code_snapshot: PyRef<PyCode> = (*zelf.code).to_owned();
+        let code_id = code_snapshot.get_id();

         let arg_types = jit::get_jit_arg_types(&zelf, vm)?;
         let ret_type = jit::jit_ret_type(&zelf, vm)?;
-        let code: &Py<PyCode> = &zelf.code;
-        let compiled = rustpython_jit::compile(&code.code, &arg_types, ret_type)
+        let compiled = rustpython_jit::compile(&code_snapshot.code, &arg_types, ret_type)
             .map_err(|err| jit::new_jit_error(err.to_string(), vm))?;
-        *zelf.jitted_code.lock() = Some(compiled);
+        let mut jitted = zelf.jitted_code.lock();
+        if jitted.is_none() && (*zelf.code).get_id() == code_id {
+            *jitted = Some(compiled);
+        }
         Ok(())
     }

}
}
Expand Down
9 changes: 6 additions & 3 deletions crates/vm/src/builtins/type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ impl PyType {
if old_version == 0 {
return;
}
self.tp_version_tag.store(0, Ordering::Release);
self.tp_version_tag.store(0, Ordering::SeqCst);
// Release strong references held by cache entries for this version.
// We hold owned refs that would prevent GC of class attributes after
// type deletion.
Expand Down Expand Up @@ -2168,6 +2168,11 @@ impl SetAttr for PyType {
}
let assign = value.is_assign();

// Invalidate inline caches before modifying attributes.
// This ensures other threads see the version invalidation before
// any attribute changes, preventing use-after-free of cached descriptors.
zelf.modified();

Comment on lines +2171 to +2175
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Close the version-tag race introduced by pre-invalidation.

Calling zelf.modified() only before mutation allows a concurrent find_name_in_mro to assign/cache under a fresh tag during the mutation window (see Line 761-Line 777), so stale results can remain valid after the write completes.

🔧 Suggested fix
         // Invalidate inline caches before modifying attributes.
         // This ensures other threads see the version invalidation before
         // any attribute changes, preventing use-after-free of cached descriptors.
         zelf.modified();

         if let PySetterValue::Assign(value) = value {
             zelf.attributes.write().insert(attr_name, value);
         } else {
             let prev_value = zelf.attributes.write().shift_remove(attr_name); // TODO: swap_remove applicable?
             if prev_value.is_none() {
                 return Err(vm.new_attribute_error(format!(
                     "type object '{}' has no attribute '{}'",
                     zelf.name(),
                     attr_name,
                 )));
             }
         }

         if attr_name.as_wtf8().starts_with("__") && attr_name.as_wtf8().ends_with("__") {
             if assign {
                 zelf.update_slot::<true>(attr_name, &vm.ctx);
             } else {
                 zelf.update_slot::<false>(attr_name, &vm.ctx);
             }
         }
+        // Close the race: another thread may have assigned/cached a fresh
+        // version while mutation was in-flight.
+        zelf.modified();
         Ok(())
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@crates/vm/src/builtins/type.rs` around lines 2171 - 2175, The current
pre-invalidation (calling zelf.modified() once before attribute mutation) leaves
a window where a concurrent find_name_in_mro may cache results under the new tag
during the mutation; fix this by invoking zelf.modified() both before starting
the mutation and once more immediately after finishing attribute modifications
so any caches created during the mutation are replay-invalidated; update the
code path where attributes are changed (the block containing the single
zelf.modified() call) to perform a second modified() post-mutation and ensure
this behavior aligns with the cache-checking logic in find_name_in_mro.

if let PySetterValue::Assign(value) = value {
zelf.attributes.write().insert(attr_name, value);
} else {
Expand All @@ -2180,8 +2185,6 @@ impl SetAttr for PyType {
)));
}
}
// Invalidate inline caches that depend on this type's attributes
zelf.modified();

if attr_name.as_wtf8().starts_with("__") && attr_name.as_wtf8().ends_with("__") {
if assign {
Expand Down
9 changes: 6 additions & 3 deletions crates/vm/src/dict_inner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@ use crate::{
use alloc::fmt;
use core::mem::size_of;
use core::ops::ControlFlow;
use core::sync::atomic::{AtomicU64, Ordering::Relaxed};
use core::sync::atomic::{
AtomicU64,
Ordering::{Acquire, Release},
};
use num_traits::ToPrimitive;

// HashIndex is intended to be same size with hash::PyHash
Expand Down Expand Up @@ -261,12 +264,12 @@ type PopInnerResult<T> = ControlFlow<Option<DictEntry<T>>>;
impl<T: Clone> Dict<T> {
/// Monotonically increasing version counter for mutation tracking.
pub fn version(&self) -> u64 {
self.version.load(Relaxed)
self.version.load(Acquire)
}

/// Bump the version counter after any mutation.
fn bump_version(&self) {
self.version.fetch_add(1, Relaxed);
self.version.fetch_add(1, Release);
}

fn read(&self) -> PyRwLockReadGuard<'_, DictInner<T>> {
Expand Down
Loading
Loading