-
Notifications
You must be signed in to change notification settings - Fork 1.4k
Specialized ops #7322
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Specialized ops #7322
Changes from all commits
e77bb5a
4ab342d
5e9ec26
c9a40ce
2bd0d10
7bd5300
5c81306
5a1b29b
f99e95f
8821456
dd89151
21c29ca
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -8,7 +8,12 @@ use crate::{ | |||||||||||||||||||||||||||
| }; | ||||||||||||||||||||||||||||
| use alloc::{borrow::ToOwned, boxed::Box, collections::BTreeSet, fmt, string::String, vec::Vec}; | ||||||||||||||||||||||||||||
| use bitflags::bitflags; | ||||||||||||||||||||||||||||
| use core::{cell::UnsafeCell, hash, mem, ops::Deref}; | ||||||||||||||||||||||||||||
| use core::{ | ||||||||||||||||||||||||||||
| cell::UnsafeCell, | ||||||||||||||||||||||||||||
| hash, mem, | ||||||||||||||||||||||||||||
| ops::Deref, | ||||||||||||||||||||||||||||
| sync::atomic::{AtomicU8, AtomicU16, Ordering}, | ||||||||||||||||||||||||||||
| }; | ||||||||||||||||||||||||||||
| use itertools::Itertools; | ||||||||||||||||||||||||||||
| use malachite_bigint::BigInt; | ||||||||||||||||||||||||||||
| use num_complex::Complex64; | ||||||||||||||||||||||||||||
|
|
@@ -367,8 +372,13 @@ impl TryFrom<&[u8]> for CodeUnit { | |||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| pub struct CodeUnits(UnsafeCell<Box<[CodeUnit]>>); | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| // SAFETY: All mutation of the inner buffer is serialized by `monitoring_data: PyMutex` | ||||||||||||||||||||||||||||
| // in `PyCode`. The `UnsafeCell` is required because `replace_op` mutates through `&self`. | ||||||||||||||||||||||||||||
| // SAFETY: All cache operations use atomic read/write instructions. | ||||||||||||||||||||||||||||
| // - replace_op / compare_exchange_op: AtomicU8 store/CAS (Release) | ||||||||||||||||||||||||||||
| // - cache read/write: AtomicU16 load/store (Relaxed) | ||||||||||||||||||||||||||||
| // - adaptive counter: AtomicU8 load/store (Relaxed) | ||||||||||||||||||||||||||||
| // Ordering is established by: | ||||||||||||||||||||||||||||
| // - replace_op (Release) ↔ dispatch loop read_op (Acquire) for cache data visibility | ||||||||||||||||||||||||||||
| // - tp_version_tag (Acquire) for descriptor pointer validity | ||||||||||||||||||||||||||||
| unsafe impl Sync for CodeUnits {} | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| impl Clone for CodeUnits { | ||||||||||||||||||||||||||||
|
|
@@ -435,45 +445,81 @@ impl Deref for CodeUnits { | |||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| impl CodeUnits { | ||||||||||||||||||||||||||||
| /// Replace the opcode at `index` in-place without changing the arg byte. | ||||||||||||||||||||||||||||
| /// Uses atomic Release store to ensure prior cache writes are visible | ||||||||||||||||||||||||||||
| /// to threads that subsequently read the new opcode with Acquire. | ||||||||||||||||||||||||||||
| /// | ||||||||||||||||||||||||||||
| /// # Safety | ||||||||||||||||||||||||||||
| /// - `index` must be in bounds. | ||||||||||||||||||||||||||||
| /// - `new_op` must have the same arg semantics as the original opcode. | ||||||||||||||||||||||||||||
| /// - The caller must ensure exclusive access to the instruction buffer | ||||||||||||||||||||||||||||
| /// (no concurrent reads or writes to the same `CodeUnits`). | ||||||||||||||||||||||||||||
| pub unsafe fn replace_op(&self, index: usize, new_op: Instruction) { | ||||||||||||||||||||||||||||
| unsafe { | ||||||||||||||||||||||||||||
| let units = &mut *self.0.get(); | ||||||||||||||||||||||||||||
| let unit_ptr = units.as_mut_ptr().add(index); | ||||||||||||||||||||||||||||
| // Write only the opcode byte (first byte of CodeUnit due to #[repr(C)]) | ||||||||||||||||||||||||||||
| let op_ptr = unit_ptr as *mut u8; | ||||||||||||||||||||||||||||
| core::ptr::write(op_ptr, new_op.into()); | ||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||
| let units = unsafe { &*self.0.get() }; | ||||||||||||||||||||||||||||
| let ptr = units.as_ptr().wrapping_add(index) as *const AtomicU8; | ||||||||||||||||||||||||||||
| unsafe { &*ptr }.store(new_op.into(), Ordering::Release); | ||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| /// Atomically replace opcode only if it still matches `expected`. | ||||||||||||||||||||||||||||
| /// Returns true on success. Uses Release ordering on success. | ||||||||||||||||||||||||||||
| /// | ||||||||||||||||||||||||||||
| /// # Safety | ||||||||||||||||||||||||||||
| /// - `index` must be in bounds. | ||||||||||||||||||||||||||||
| pub unsafe fn compare_exchange_op( | ||||||||||||||||||||||||||||
| &self, | ||||||||||||||||||||||||||||
| index: usize, | ||||||||||||||||||||||||||||
| expected: Instruction, | ||||||||||||||||||||||||||||
| new_op: Instruction, | ||||||||||||||||||||||||||||
| ) -> bool { | ||||||||||||||||||||||||||||
| let units = unsafe { &*self.0.get() }; | ||||||||||||||||||||||||||||
| let ptr = units.as_ptr().wrapping_add(index) as *const AtomicU8; | ||||||||||||||||||||||||||||
| unsafe { &*ptr } | ||||||||||||||||||||||||||||
| .compare_exchange( | ||||||||||||||||||||||||||||
| expected.into(), | ||||||||||||||||||||||||||||
| new_op.into(), | ||||||||||||||||||||||||||||
| Ordering::Release, | ||||||||||||||||||||||||||||
| Ordering::Relaxed, | ||||||||||||||||||||||||||||
| ) | ||||||||||||||||||||||||||||
| .is_ok() | ||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| /// Atomically read the opcode at `index` with Acquire ordering. | ||||||||||||||||||||||||||||
| /// Pairs with `replace_op` (Release) to ensure cache data visibility. | ||||||||||||||||||||||||||||
| pub fn read_op(&self, index: usize) -> Instruction { | ||||||||||||||||||||||||||||
| let units = unsafe { &*self.0.get() }; | ||||||||||||||||||||||||||||
| let ptr = units.as_ptr().wrapping_add(index) as *const AtomicU8; | ||||||||||||||||||||||||||||
| let byte = unsafe { &*ptr }.load(Ordering::Acquire); | ||||||||||||||||||||||||||||
| // SAFETY: Only valid Instruction values are stored via replace_op/compare_exchange_op. | ||||||||||||||||||||||||||||
| unsafe { mem::transmute::<u8, Instruction>(byte) } | ||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| /// Atomically read the arg byte at `index` with Relaxed ordering. | ||||||||||||||||||||||||||||
| pub fn read_arg(&self, index: usize) -> OpArgByte { | ||||||||||||||||||||||||||||
| let units = unsafe { &*self.0.get() }; | ||||||||||||||||||||||||||||
| let ptr = units.as_ptr().wrapping_add(index) as *const u8; | ||||||||||||||||||||||||||||
| let arg_ptr = unsafe { ptr.add(1) } as *const AtomicU8; | ||||||||||||||||||||||||||||
| OpArgByte::from(unsafe { &*arg_ptr }.load(Ordering::Relaxed)) | ||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| /// Write a u16 value into a CACHE code unit at `index`. | ||||||||||||||||||||||||||||
| /// Each CodeUnit is 2 bytes (#[repr(C)]: op u8 + arg u8), so one u16 fits exactly. | ||||||||||||||||||||||||||||
| /// Uses Relaxed atomic store; ordering is provided by replace_op (Release). | ||||||||||||||||||||||||||||
| /// | ||||||||||||||||||||||||||||
| /// # Safety | ||||||||||||||||||||||||||||
| /// - `index` must be in bounds and point to a CACHE entry. | ||||||||||||||||||||||||||||
| /// - The caller must ensure no concurrent reads/writes to the same slot. | ||||||||||||||||||||||||||||
| pub unsafe fn write_cache_u16(&self, index: usize, value: u16) { | ||||||||||||||||||||||||||||
| unsafe { | ||||||||||||||||||||||||||||
| let units = &mut *self.0.get(); | ||||||||||||||||||||||||||||
| let ptr = units.as_mut_ptr().add(index) as *mut u8; | ||||||||||||||||||||||||||||
| core::ptr::write_unaligned(ptr as *mut u16, value); | ||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||
| let units = unsafe { &*self.0.get() }; | ||||||||||||||||||||||||||||
| let ptr = units.as_ptr().wrapping_add(index) as *const AtomicU16; | ||||||||||||||||||||||||||||
| unsafe { &*ptr }.store(value, Ordering::Relaxed); | ||||||||||||||||||||||||||||
|
Comment on lines
+509
to
+510
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: # First, locate and read the relevant section of the bytecode file
fd bytecode.rs | head -1Repository: RustPython/RustPython Length of output: 101 🏁 Script executed: # Read the file to check the CodeUnit struct and the atomic operations around the mentioned lines
cat -n crates/compiler-core/src/bytecode.rs | sed -n '1,50p'Repository: RustPython/RustPython Length of output: 2067 🏁 Script executed: # Get the CodeUnit struct definition and surrounding code
cat -n crates/compiler-core/src/bytecode.rs | sed -n '30,100p'Repository: RustPython/RustPython Length of output: 2781 🏁 Script executed: # Check lines 509-525 to see the actual code
cat -n crates/compiler-core/src/bytecode.rs | sed -n '500,530p'Repository: RustPython/RustPython Length of output: 1618 🏁 Script executed: # Check the size and alignment assertions
cat -n crates/compiler-core/src/bytecode.rs | grep -A 3 "const _:"Repository: RustPython/RustPython Length of output: 275 🏁 Script executed: # Find the CodeUnit struct definition
cat -n crates/compiler-core/src/bytecode.rs | sed -n '340,360p'Repository: RustPython/RustPython Length of output: 738 🏁 Script executed: # Search for the CodeUnit struct definition more broadly
rg "struct CodeUnit" -A 5 crates/compiler-core/src/bytecode.rsRepository: RustPython/RustPython Length of output: 492 🏁 Script executed: # Check if there's any alignment attribute or repr clause on CodeUnit
rg "CodeUnit" crates/compiler-core/src/bytecode.rs | head -20Repository: RustPython/RustPython Length of output: 857 🏁 Script executed: # Check what Instruction and OpArgByte are
rg "type Instruction|struct Instruction|enum Instruction" crates/compiler-core/src/bytecode.rsRepository: RustPython/RustPython Length of output: 47 🏁 Script executed: # Check OpArgByte definition
rg "type OpArgByte|struct OpArgByte|enum OpArgByte" crates/compiler-core/src/bytecode.rsRepository: RustPython/RustPython Length of output: 47 🏁 Script executed: # Look at the instruction module for Instruction definition
cat -n crates/compiler-core/src/bytecode/instruction.rs | head -50Repository: RustPython/RustPython Length of output: 1871 🏁 Script executed: # Look at the oparg module for OpArgByte
cat -n crates/compiler-core/src/bytecode/oparg.rs | head -30Repository: RustPython/RustPython Length of output: 857 🏁 Script executed: # Check what the alignment of AtomicU16 is and the current alignment of CodeUnit
cat > /tmp/check_alignment.rs << 'EOF'
use core::{mem, sync::atomic::AtomicU16};
#[repr(C)]
#[derive(Copy, Clone, Debug)]
pub struct CodeUnit {
pub op: u8,
pub arg: u8,
}
fn main() {
println!("CodeUnit size: {}", mem::size_of::<CodeUnit>());
println!("CodeUnit align: {}", mem::align_of::<CodeUnit>());
println!("AtomicU16 size: {}", mem::size_of::<AtomicU16>());
println!("AtomicU16 align: {}", mem::align_of::<AtomicU16>());
println!("u16 align: {}", mem::align_of::<u16>());
}
EOF
rustc /tmp/check_alignment.rs -o /tmp/check_alignment && /tmp/check_alignmentRepository: RustPython/RustPython Length of output: 111 🏁 Script executed: # Verify the actual lines 509-510 and 521-523 in context
cat -n crates/compiler-core/src/bytecode.rs | sed -n '507,524p'Repository: RustPython/RustPython Length of output: 961 🏁 Script executed: # Check if there are any other alignment-related comments or safety notes
rg "align|Alignment" crates/compiler-core/src/bytecode.rs -B 2 -A 2Repository: RustPython/RustPython Length of output: 47 🏁 Script executed: # Verify the actual Instruction and OpArgByte alignment requirements by checking their definitions
rg "pub struct Instruction|pub enum Instruction" crates/compiler-core/src/bytecode/instruction.rs -A 1Repository: RustPython/RustPython Length of output: 147 🏁 Script executed: # Look for any repr attributes on Instruction
cat -n crates/compiler-core/src/bytecode/instruction.rs | sed -n '16,30p'Repository: RustPython/RustPython Length of output: 636 🏁 Script executed: # Check if there are any tests or documentation about alignment
rg "align|unsafe.*AtomicU16" crates/compiler-core/src/ -B 3 -A 3Repository: RustPython/RustPython Length of output: 1165 🏁 Script executed: # Check what the actual byte layout is for CodeUnit
python3 << 'EOF'
# Instruction is #[repr(u8)] - 1 byte
# OpArgByte is #[repr(transparent)] wrapping u8 - 1 byte
# So CodeUnit = 1 + 1 = 2 bytes, but with #[repr(C)], alignment is 1 byte
print("With #[repr(C)] and two u8 fields:")
print(" Size: 2 bytes")
print(" Alignment: 1 byte (minimum of field alignments)")
print()
print("AtomicU16 requires:")
print(" Size: 2 bytes")
print(" Alignment: 2 bytes (natural alignment for atomic operations)")
print()
print("Issue: Casting 1-byte aligned pointer to 2-byte aligned is UB")
EOFRepository: RustPython/RustPython Length of output: 328 🏁 Script executed: # Check if there are any other unsafe pointer casts in the file that might have similar issues
rg "as \*const AtomicU" crates/compiler-core/src/bytecode.rsRepository: RustPython/RustPython Length of output: 623 🏁 Script executed: # Check the SAFETY comment around these operations
cat -n crates/compiler-core/src/bytecode.rs | sed -n '495,535p'Repository: RustPython/RustPython Length of output: 2081 Add explicit 2-byte alignment guarantee to The casts to Add Proposed fix-#[repr(C)]
+#[repr(C, align(2))]
#[derive(Copy, Clone, Debug)]
pub struct CodeUnit {
pub op: Instruction,
pub arg: OpArgByte,
}
-const _: () = assert!(mem::size_of::<CodeUnit>() == 2);
+const _: () = {
+ assert!(mem::size_of::<CodeUnit>() == 2);
+ assert!(mem::align_of::<CodeUnit>() >= mem::align_of::<u16>());
+};📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| /// Read a u16 value from a CACHE code unit at `index`. | ||||||||||||||||||||||||||||
| /// Uses Relaxed atomic load; ordering is provided by read_op (Acquire). | ||||||||||||||||||||||||||||
| /// | ||||||||||||||||||||||||||||
| /// # Panics | ||||||||||||||||||||||||||||
| /// Panics if `index` is out of bounds. | ||||||||||||||||||||||||||||
| pub fn read_cache_u16(&self, index: usize) -> u16 { | ||||||||||||||||||||||||||||
| let units = unsafe { &*self.0.get() }; | ||||||||||||||||||||||||||||
| assert!(index < units.len(), "read_cache_u16: index out of bounds"); | ||||||||||||||||||||||||||||
| let ptr = units.as_ptr().wrapping_add(index) as *const u8; | ||||||||||||||||||||||||||||
| unsafe { core::ptr::read_unaligned(ptr as *const u16) } | ||||||||||||||||||||||||||||
| let ptr = units.as_ptr().wrapping_add(index) as *const AtomicU16; | ||||||||||||||||||||||||||||
| unsafe { &*ptr }.load(Ordering::Relaxed) | ||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| /// Write a u32 value across two consecutive CACHE code units starting at `index`. | ||||||||||||||||||||||||||||
|
|
@@ -518,36 +564,40 @@ impl CodeUnits { | |||||||||||||||||||||||||||
| lo | (hi << 32) | ||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| /// Read the adaptive counter from the first CACHE entry's `arg` byte. | ||||||||||||||||||||||||||||
| /// This preserves `op = Instruction::Cache`, unlike `read_cache_u16`. | ||||||||||||||||||||||||||||
| /// Read the adaptive counter from the CACHE entry's `arg` byte at `index`. | ||||||||||||||||||||||||||||
| /// Uses Relaxed atomic load. | ||||||||||||||||||||||||||||
| pub fn read_adaptive_counter(&self, index: usize) -> u8 { | ||||||||||||||||||||||||||||
| let units = unsafe { &*self.0.get() }; | ||||||||||||||||||||||||||||
| u8::from(units[index].arg) | ||||||||||||||||||||||||||||
| let ptr = units.as_ptr().wrapping_add(index) as *const u8; | ||||||||||||||||||||||||||||
| let arg_ptr = unsafe { ptr.add(1) } as *const AtomicU8; | ||||||||||||||||||||||||||||
| unsafe { &*arg_ptr }.load(Ordering::Relaxed) | ||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| /// Write the adaptive counter to the first CACHE entry's `arg` byte. | ||||||||||||||||||||||||||||
| /// This preserves `op = Instruction::Cache`, unlike `write_cache_u16`. | ||||||||||||||||||||||||||||
| /// Write the adaptive counter to the CACHE entry's `arg` byte at `index`. | ||||||||||||||||||||||||||||
| /// Uses Relaxed atomic store. | ||||||||||||||||||||||||||||
| /// | ||||||||||||||||||||||||||||
| /// # Safety | ||||||||||||||||||||||||||||
| /// - `index` must be in bounds and point to a CACHE entry. | ||||||||||||||||||||||||||||
| pub unsafe fn write_adaptive_counter(&self, index: usize, value: u8) { | ||||||||||||||||||||||||||||
| let units = unsafe { &mut *self.0.get() }; | ||||||||||||||||||||||||||||
| units[index].arg = OpArgByte::from(value); | ||||||||||||||||||||||||||||
| let units = unsafe { &*self.0.get() }; | ||||||||||||||||||||||||||||
| let ptr = units.as_ptr().wrapping_add(index) as *const u8; | ||||||||||||||||||||||||||||
| let arg_ptr = unsafe { ptr.add(1) } as *const AtomicU8; | ||||||||||||||||||||||||||||
| unsafe { &*arg_ptr }.store(value, Ordering::Relaxed); | ||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| /// Produce a clean copy of the bytecode suitable for serialization | ||||||||||||||||||||||||||||
| /// (marshal) and `co_code`. Specialized opcodes are mapped back to their | ||||||||||||||||||||||||||||
| /// base variants via `deoptimize()` and all CACHE entries are zeroed. | ||||||||||||||||||||||||||||
| pub fn original_bytes(&self) -> Vec<u8> { | ||||||||||||||||||||||||||||
| let units = unsafe { &*self.0.get() }; | ||||||||||||||||||||||||||||
| let mut out = Vec::with_capacity(units.len() * 2); | ||||||||||||||||||||||||||||
| let len = units.len(); | ||||||||||||||||||||||||||||
| let len = self.len(); | ||||||||||||||||||||||||||||
| let mut out = Vec::with_capacity(len * 2); | ||||||||||||||||||||||||||||
| let mut i = 0; | ||||||||||||||||||||||||||||
| while i < len { | ||||||||||||||||||||||||||||
| let op = units[i].op.deoptimize(); | ||||||||||||||||||||||||||||
| let op = self.read_op(i).deoptimize(); | ||||||||||||||||||||||||||||
| let arg = self.read_arg(i); | ||||||||||||||||||||||||||||
| let caches = op.cache_entries(); | ||||||||||||||||||||||||||||
| out.push(u8::from(op)); | ||||||||||||||||||||||||||||
| out.push(u8::from(units[i].arg)); | ||||||||||||||||||||||||||||
| out.push(u8::from(arg)); | ||||||||||||||||||||||||||||
| // Zero-fill all CACHE entries (counter + cached data) | ||||||||||||||||||||||||||||
| for _ in 0..caches { | ||||||||||||||||||||||||||||
| i += 1; | ||||||||||||||||||||||||||||
|
|
@@ -562,20 +612,22 @@ impl CodeUnits { | |||||||||||||||||||||||||||
| /// Initialize adaptive warmup counters for all cacheable instructions. | ||||||||||||||||||||||||||||
| /// Called lazily at RESUME (first execution of a code object). | ||||||||||||||||||||||||||||
| /// Uses the `arg` byte of the first CACHE entry, preserving `op = Instruction::Cache`. | ||||||||||||||||||||||||||||
| /// All writes are atomic (Relaxed) to avoid data races with concurrent readers. | ||||||||||||||||||||||||||||
| pub fn quicken(&self) { | ||||||||||||||||||||||||||||
| let units = unsafe { &mut *self.0.get() }; | ||||||||||||||||||||||||||||
| let len = units.len(); | ||||||||||||||||||||||||||||
| let len = self.len(); | ||||||||||||||||||||||||||||
| let mut i = 0; | ||||||||||||||||||||||||||||
| while i < len { | ||||||||||||||||||||||||||||
| let op = units[i].op; | ||||||||||||||||||||||||||||
| let op = self.read_op(i); | ||||||||||||||||||||||||||||
| let caches = op.cache_entries(); | ||||||||||||||||||||||||||||
| if caches > 0 { | ||||||||||||||||||||||||||||
| // Don't write adaptive counter for instrumented opcodes; | ||||||||||||||||||||||||||||
| // specialization is skipped while monitoring is active. | ||||||||||||||||||||||||||||
| if !op.is_instrumented() { | ||||||||||||||||||||||||||||
| let cache_base = i + 1; | ||||||||||||||||||||||||||||
| if cache_base < len { | ||||||||||||||||||||||||||||
| units[cache_base].arg = OpArgByte::from(ADAPTIVE_WARMUP_VALUE); | ||||||||||||||||||||||||||||
| unsafe { | ||||||||||||||||||||||||||||
| self.write_adaptive_counter(cache_base, ADAPTIVE_WARMUP_VALUE); | ||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||
| i += 1 + caches; | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,8 +5,6 @@ use super::{ | |
| PyAsyncGen, PyCode, PyCoroutine, PyDictRef, PyGenerator, PyModule, PyStr, PyStrRef, PyTuple, | ||
| PyTupleRef, PyType, | ||
| }; | ||
| #[cfg(feature = "jit")] | ||
| use crate::common::lock::OnceCell; | ||
| use crate::common::lock::PyMutex; | ||
| use crate::function::ArgMapping; | ||
| use crate::object::{PyAtomicRef, Traverse, TraverseFn}; | ||
|
|
@@ -75,7 +73,7 @@ pub struct PyFunction { | |
| doc: PyMutex<PyObjectRef>, | ||
| func_version: AtomicU32, | ||
| #[cfg(feature = "jit")] | ||
| jitted_code: OnceCell<CompiledCode>, | ||
| jitted_code: PyMutex<Option<CompiledCode>>, | ||
| } | ||
|
|
||
| static FUNC_VERSION_COUNTER: AtomicU32 = AtomicU32::new(1); | ||
|
|
@@ -214,7 +212,7 @@ impl PyFunction { | |
| doc: PyMutex::new(doc), | ||
| func_version: AtomicU32::new(next_func_version()), | ||
| #[cfg(feature = "jit")] | ||
| jitted_code: OnceCell::new(), | ||
| jitted_code: PyMutex::new(None), | ||
| }; | ||
| Ok(func) | ||
| } | ||
|
|
@@ -538,7 +536,7 @@ impl Py<PyFunction> { | |
| vm: &VirtualMachine, | ||
| ) -> PyResult { | ||
| #[cfg(feature = "jit")] | ||
| if let Some(jitted_code) = self.jitted_code.get() { | ||
| if let Some(jitted_code) = self.jitted_code.lock().as_ref() { | ||
| use crate::convert::ToPyObject; | ||
| match jit::get_jit_args(self, &func_args, jitted_code, vm) { | ||
| Ok(args) => { | ||
|
|
@@ -712,6 +710,10 @@ impl PyFunction { | |
| #[pygetset(setter)] | ||
| fn set___code__(&self, code: PyRef<PyCode>, vm: &VirtualMachine) { | ||
| self.code.swap_to_temporary_refs(code, vm); | ||
| #[cfg(feature = "jit")] | ||
| { | ||
| *self.jitted_code.lock() = None; | ||
| } | ||
|
Comment on lines
711
to
+716
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Make
🔧 Suggested fix #[pygetset(setter)]
fn set___code__(&self, code: PyRef<PyCode>, vm: &VirtualMachine) {
- self.code.swap_to_temporary_refs(code, vm);
#[cfg(feature = "jit")]
{
- *self.jitted_code.lock() = None;
+ let mut jitted = self.jitted_code.lock();
+ *jitted = None;
+ self.code.swap_to_temporary_refs(code, vm);
+ }
+ #[cfg(not(feature = "jit"))]
+ {
+ self.code.swap_to_temporary_refs(code, vm);
}
self.func_version.store(0, Relaxed);
}🤖 Prompt for AI Agents |
||
| self.func_version.store(0, Relaxed); | ||
| } | ||
|
|
||
|
|
@@ -948,15 +950,15 @@ impl PyFunction { | |
| #[cfg(feature = "jit")] | ||
| #[pymethod] | ||
| fn __jit__(zelf: PyRef<Self>, vm: &VirtualMachine) -> PyResult<()> { | ||
| if zelf.jitted_code.get().is_some() { | ||
| if zelf.jitted_code.lock().is_some() { | ||
| return Ok(()); | ||
| } | ||
| let arg_types = jit::get_jit_arg_types(&zelf, vm)?; | ||
| let ret_type = jit::jit_ret_type(&zelf, vm)?; | ||
| let code: &Py<PyCode> = &zelf.code; | ||
| let compiled = rustpython_jit::compile(&code.code, &arg_types, ret_type) | ||
| .map_err(|err| jit::new_jit_error(err.to_string(), vm))?; | ||
| let _ = zelf.jitted_code.set(compiled); | ||
| *zelf.jitted_code.lock() = Some(compiled); | ||
| Ok(()) | ||
|
Comment on lines
+953
to
962
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Revalidate code identity before publishing compiled JIT code.
🔧 Suggested fix #[cfg(feature = "jit")]
#[pymethod]
fn __jit__(zelf: PyRef<Self>, vm: &VirtualMachine) -> PyResult<()> {
if zelf.jitted_code.lock().is_some() {
return Ok(());
}
+ let code_snapshot: PyRef<PyCode> = (*zelf.code).to_owned();
+ let code_id = code_snapshot.get_id();
let arg_types = jit::get_jit_arg_types(&zelf, vm)?;
let ret_type = jit::jit_ret_type(&zelf, vm)?;
- let code: &Py<PyCode> = &zelf.code;
- let compiled = rustpython_jit::compile(&code.code, &arg_types, ret_type)
+ let compiled = rustpython_jit::compile(&code_snapshot.code, &arg_types, ret_type)
.map_err(|err| jit::new_jit_error(err.to_string(), vm))?;
- *zelf.jitted_code.lock() = Some(compiled);
+ let mut jitted = zelf.jitted_code.lock();
+ if jitted.is_none() && (*zelf.code).get_id() == code_id {
+ *jitted = Some(compiled);
+ }
Ok(())
} |
||
| } | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -347,7 +347,7 @@ impl PyType { | |
| if old_version == 0 { | ||
| return; | ||
| } | ||
| self.tp_version_tag.store(0, Ordering::Release); | ||
| self.tp_version_tag.store(0, Ordering::SeqCst); | ||
| // Release strong references held by cache entries for this version. | ||
| // We hold owned refs that would prevent GC of class attributes after | ||
| // type deletion. | ||
|
|
@@ -2168,6 +2168,11 @@ impl SetAttr for PyType { | |
| } | ||
| let assign = value.is_assign(); | ||
|
|
||
| // Invalidate inline caches before modifying attributes. | ||
| // This ensures other threads see the version invalidation before | ||
| // any attribute changes, preventing use-after-free of cached descriptors. | ||
| zelf.modified(); | ||
|
|
||
|
Comment on lines
+2171
to
+2175
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Close the version-tag race introduced by pre-invalidation. Calling 🔧 Suggested fix // Invalidate inline caches before modifying attributes.
// This ensures other threads see the version invalidation before
// any attribute changes, preventing use-after-free of cached descriptors.
zelf.modified();
if let PySetterValue::Assign(value) = value {
zelf.attributes.write().insert(attr_name, value);
} else {
let prev_value = zelf.attributes.write().shift_remove(attr_name); // TODO: swap_remove applicable?
if prev_value.is_none() {
return Err(vm.new_attribute_error(format!(
"type object '{}' has no attribute '{}'",
zelf.name(),
attr_name,
)));
}
}
if attr_name.as_wtf8().starts_with("__") && attr_name.as_wtf8().ends_with("__") {
if assign {
zelf.update_slot::<true>(attr_name, &vm.ctx);
} else {
zelf.update_slot::<false>(attr_name, &vm.ctx);
}
}
+ // Close the race: another thread may have assigned/cached a fresh
+ // version while mutation was in-flight.
+ zelf.modified();
Ok(())🤖 Prompt for AI Agents |
||
| if let PySetterValue::Assign(value) = value { | ||
| zelf.attributes.write().insert(attr_name, value); | ||
| } else { | ||
|
|
@@ -2180,8 +2185,6 @@ impl SetAttr for PyType { | |
| ))); | ||
| } | ||
| } | ||
| // Invalidate inline caches that depend on this type's attributes | ||
| zelf.modified(); | ||
|
|
||
| if attr_name.as_wtf8().starts_with("__") && attr_name.as_wtf8().ends_with("__") { | ||
| if assign { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🧩 Analysis chain
🏁 Script executed:
Repository: RustPython/RustPython
Length of output: 85
🏁 Script executed:
Repository: RustPython/RustPython
Length of output: 1585
🏁 Script executed:
Repository: RustPython/RustPython
Length of output: 712
🏁 Script executed:
Repository: RustPython/RustPython
Length of output: 956
🏁 Script executed:
Repository: RustPython/RustPython
Length of output: 1183
🏁 Script executed:
Repository: RustPython/RustPython
Length of output: 985
🏁 Script executed:
Repository: RustPython/RustPython
Length of output: 160
🏁 Script executed:
Repository: RustPython/RustPython
Length of output: 126
🏁 Script executed:
Repository: RustPython/RustPython
Length of output: 1985
🏁 Script executed:
Repository: RustPython/RustPython
Length of output: 268
🏁 Script executed:
Repository: RustPython/RustPython
Length of output: 691
🏁 Script executed:
Repository: RustPython/RustPython
Length of output: 1808
🏁 Script executed:
Repository: RustPython/RustPython
Length of output: 1989
🏁 Script executed:
Repository: RustPython/RustPython
Length of output: 1911
🏁 Script executed:
Repository: RustPython/RustPython
Length of output: 415
🏁 Script executed:
Repository: RustPython/RustPython
Length of output: 1990
🏁 Script executed:
Repository: RustPython/RustPython
Length of output: 47
🏁 Script executed:
Repository: RustPython/RustPython
Length of output: 1406
🏁 Script executed:
Repository: RustPython/RustPython
Length of output: 506
🏁 Script executed:
Repository: RustPython/RustPython
Length of output: 506
🏁 Script executed:
rg "\.read_op\(|\.read_arg\(|\.read_adaptive_counter\(" -t rsRepository: RustPython/RustPython
Length of output: 91
🏁 Script executed:
rg "CodeUnits" -t rs -lRepository: RustPython/RustPython
Length of output: 91
🏁 Script executed:
rg "\.read_op\(|\.read_arg\(|\.read_adaptive_counter\("Repository: RustPython/RustPython
Length of output: 1856
🏁 Script executed:
rg "CodeUnits"Repository: RustPython/RustPython
Length of output: 1699
🏁 Script executed:
sed -n '1,50p' crates/vm/src/frame.rsRepository: RustPython/RustPython
Length of output: 1953
🏁 Script executed:
Repository: RustPython/RustPython
Length of output: 2743
🏁 Script executed:
Repository: RustPython/RustPython
Length of output: 47
🏁 Script executed:
Repository: RustPython/RustPython
Length of output: 47
🏁 Script executed:
sed -n '485,500p' crates/compiler-core/src/bytecode.rsRepository: RustPython/RustPython
Length of output: 846
🏁 Script executed:
sed -n '569,580p' crates/compiler-core/src/bytecode.rsRepository: RustPython/RustPython
Length of output: 567
🏁 Script executed:
Repository: RustPython/RustPython
Length of output: 47
🏁 Script executed:
Repository: RustPython/RustPython
Length of output: 1228
🏁 Script executed:
sed -n '435,445p' crates/compiler-core/src/bytecode.rsRepository: RustPython/RustPython
Length of output: 419
🏁 Script executed:
Repository: RustPython/RustPython
Length of output: 47
🏁 Script executed:
rg "impl.*Deref.*CodeUnits" crates/compiler-core/src/bytecode.rs -A 15Repository: RustPython/RustPython
Length of output: 668
🏁 Script executed:
grep -n "pub fn len\|pub fn is_empty" crates/compiler-core/src/bytecode.rsRepository: RustPython/RustPython
Length of output: 47
🏁 Script executed:
sed -n '435,450p' crates/compiler-core/src/bytecode.rsRepository: RustPython/RustPython
Length of output: 668
🏁 Script executed:
Repository: RustPython/RustPython
Length of output: 489
🏁 Script executed:
sed -n '515,570p' crates/compiler-core/src/bytecode.rsRepository: RustPython/RustPython
Length of output: 2142
Safe readers allow potential UB on invalid
index.read_op,read_arg, andread_adaptive_counterare public safe functions that dereference raw pointers derived fromwrapping_add(index)without bounds validation. Out-of-range input will cause undefined behavior instead of panicking. This contradicts the pattern already established in the same file forread_cache_u16, which includes an assertion.Add bounds checks to all three functions:
Proposed fix
pub fn read_op(&self, index: usize) -> Instruction { let units = unsafe { &*self.0.get() }; + assert!(index < units.len(), "read_op: index out of bounds"); let ptr = units.as_ptr().wrapping_add(index) as *const AtomicU8; let byte = unsafe { &*ptr }.load(Ordering::Acquire); // SAFETY: Only valid Instruction values are stored via replace_op/compare_exchange_op. unsafe { mem::transmute::<u8, Instruction>(byte) } } pub fn read_arg(&self, index: usize) -> OpArgByte { let units = unsafe { &*self.0.get() }; + assert!(index < units.len(), "read_arg: index out of bounds"); let ptr = units.as_ptr().wrapping_add(index) as *const u8; let arg_ptr = unsafe { ptr.add(1) } as *const AtomicU8; OpArgByte::from(unsafe { &*arg_ptr }.load(Ordering::Relaxed)) } pub fn read_adaptive_counter(&self, index: usize) -> u8 { let units = unsafe { &*self.0.get() }; + assert!(index < units.len(), "read_adaptive_counter: index out of bounds"); let ptr = units.as_ptr().wrapping_add(index) as *const u8; let arg_ptr = unsafe { ptr.add(1) } as *const AtomicU8; unsafe { &*arg_ptr }.load(Ordering::Relaxed) }Per coding guidelines: "Follow Rust best practices for error handling and memory management".
🤖 Prompt for AI Agents