Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
c6b6842
fix jit
youknowone Mar 4, 2026
b379239
vm: complete specialized opcode dispatch paths
youknowone Mar 4, 2026
bdeca2b
vm: cache LOAD_GLOBAL with dict entry hints
youknowone Mar 4, 2026
ca3814a
vm: align adaptive specialization counters with CPython backoff
youknowone Mar 4, 2026
f04bf22
vm: apply cooldown counter on specialization success paths
youknowone Mar 4, 2026
02a1495
vm: retain LOAD_GLOBAL specializations on misses
youknowone Mar 4, 2026
13125d8
vm: keep attr and call specializations on guard misses
youknowone Mar 4, 2026
c7007b5
vm: retain store-attr and store-subscr specializations on misses
youknowone Mar 4, 2026
453294d
vm: retain specialization opcodes on generic fallback paths
youknowone Mar 4, 2026
b8f6cc2
vm: align jump-backward specialization defaults with CPython
youknowone Mar 4, 2026
7bfa961
vm: retain exact-args call specializations on misses
youknowone Mar 4, 2026
c852994
vm: retain SEND_GEN specialization on non-coroutine sends
youknowone Mar 4, 2026
1a1bbf5
vm: specialize list.append calls like CPython CALL_LIST_APPEND
youknowone Mar 4, 2026
bdd87f4
vm: set cooldown on LOAD_ATTR_CLASS specialization
youknowone Mar 4, 2026
696e57a
vm: specialize bound method object CALL paths
youknowone Mar 4, 2026
3dc64cc
vm: specialize CALL_KW for bound method objects
youknowone Mar 4, 2026
f220a24
vm: use current-state function version for CALL_KW specialization
youknowone Mar 4, 2026
a66c706
vm: align CALL/CALL_KW pyfunction specialization with CPython
youknowone Mar 4, 2026
59545f5
vm: drop call-site identity caches in generic CALL specializations
youknowone Mar 4, 2026
f8eebec
vm: align builtin type call specializations with CPython guards
youknowone Mar 4, 2026
58bb1ea
vm: align builtin CALL guards with CPython self_or_null semantics
youknowone Mar 4, 2026
157a2c3
vm: require exact list in CALL_LIST_APPEND fast path
youknowone Mar 4, 2026
cae26d4
vm: align CALL builtin/class specialization flow with CPython
youknowone Mar 4, 2026
0d01f7e
vm: tighten len/isinstance CALL specializations to builtin guards
youknowone Mar 4, 2026
22cccae
vm: gate CALL_BUILTIN_CLASS on type vectorcall like CPython
youknowone Mar 4, 2026
30e8a75
vm: run non-py CALL specializations via direct vectorcall
youknowone Mar 4, 2026
da36859
vm: align class-call specialization branching with CPython
youknowone Mar 4, 2026
42d81ff
Fix CI: disable ForIterGen, tighten CALL guards
youknowone Mar 4, 2026
d464119
vm: restore FOR_ITER_GEN specialization and tuple index parity
youknowone Mar 4, 2026
0f044d6
Add datastack-backed FastLocals for non-generator frames
youknowone Mar 4, 2026
cc3fd09
Drop read lock before key_eq in dict get_hint
youknowone Mar 4, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions Lib/test/test_pickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,6 @@ def dumps(self, arg, proto=None, **kwargs):
f.seek(0)
return bytes(f.read())

@unittest.expectedFailure # TODO: RUSTPYTHON
def test_bad_newobj_args(self):
return super().test_bad_newobj_args()

@unittest.expectedFailure # TODO: RUSTPYTHON
def test_buffer_callback_error(self):
return super().test_buffer_callback_error()
Expand Down
128 changes: 92 additions & 36 deletions crates/compiler-core/src/bytecode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -349,9 +349,47 @@ pub struct CodeUnit {
const _: () = assert!(mem::size_of::<CodeUnit>() == 2);

/// Adaptive specialization: number of executions before attempting specialization.
pub const ADAPTIVE_WARMUP_VALUE: u8 = 50;
/// Adaptive specialization: backoff counter after de-optimization.
pub const ADAPTIVE_BACKOFF_VALUE: u8 = 250;
///
/// Matches CPython's `_Py_BackoffCounter` encoding.
pub const ADAPTIVE_WARMUP_VALUE: u16 = adaptive_counter_bits(1, 1);
/// Adaptive specialization: cooldown counter after a successful specialization.
///
/// Value/backoff = (52, 0), matching CPython's ADAPTIVE_COOLDOWN bits.
pub const ADAPTIVE_COOLDOWN_VALUE: u16 = adaptive_counter_bits(52, 0);
/// Initial JUMP_BACKWARD counter bits (value/backoff = 4095/12).
pub const JUMP_BACKWARD_INITIAL_VALUE: u16 = adaptive_counter_bits(4095, 12);

const BACKOFF_BITS: u16 = 4;
const MAX_BACKOFF: u16 = 12;
const UNREACHABLE_BACKOFF: u16 = 15;

/// Encode an adaptive counter as `(value << 4) | backoff`.
pub const fn adaptive_counter_bits(value: u16, backoff: u16) -> u16 {
(value << BACKOFF_BITS) | backoff
}

/// True when the adaptive counter should trigger specialization.
#[inline]
pub const fn adaptive_counter_triggers(counter: u16) -> bool {
counter < UNREACHABLE_BACKOFF
}

/// Decrement adaptive counter by one countdown step.
#[inline]
pub const fn advance_adaptive_counter(counter: u16) -> u16 {
counter.wrapping_sub(1 << BACKOFF_BITS)
}

/// Reset adaptive counter with exponential backoff.
#[inline]
pub const fn adaptive_counter_backoff(counter: u16) -> u16 {
let backoff = counter & ((1 << BACKOFF_BITS) - 1);
if backoff < MAX_BACKOFF {
adaptive_counter_bits((1 << (backoff + 1)) - 1, backoff + 1)
} else {
adaptive_counter_bits((1 << MAX_BACKOFF) - 1, MAX_BACKOFF)
}
}

impl CodeUnit {
pub const fn new(op: Instruction, arg: OpArgByte) -> Self {
Expand All @@ -370,12 +408,15 @@ impl TryFrom<&[u8]> for CodeUnit {
}
}

pub struct CodeUnits(UnsafeCell<Box<[CodeUnit]>>);
pub struct CodeUnits {
units: UnsafeCell<Box<[CodeUnit]>>,
adaptive_counters: Box<[AtomicU16]>,
}

// SAFETY: All cache operations use atomic read/write instructions.
// - replace_op / compare_exchange_op: AtomicU8 store/CAS (Release)
// - cache read/write: AtomicU16 load/store (Relaxed)
// - adaptive counter: AtomicU8 load/store (Relaxed)
// - adaptive counter: AtomicU16 load/store (Relaxed)
// Ordering is established by:
// - replace_op (Release) ↔ dispatch loop read_op (Acquire) for cache data visibility
// - tp_version_tag (Acquire) for descriptor pointer validity
Expand All @@ -385,15 +426,23 @@ impl Clone for CodeUnits {
fn clone(&self) -> Self {
// SAFETY: No concurrent mutation during clone — cloning is only done
// during code object construction or marshaling, not while instrumented.
let inner = unsafe { &*self.0.get() };
Self(UnsafeCell::new(inner.clone()))
let units = unsafe { &*self.units.get() }.clone();
let adaptive_counters = self
.adaptive_counters
.iter()
.map(|c| AtomicU16::new(c.load(Ordering::Relaxed)))
.collect();
Self {
units: UnsafeCell::new(units),
adaptive_counters,
}
}
}

impl fmt::Debug for CodeUnits {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
// SAFETY: Debug formatting doesn't race with replace_op
let inner = unsafe { &*self.0.get() };
let inner = unsafe { &*self.units.get() };
f.debug_tuple("CodeUnits").field(inner).finish()
}
}
Expand All @@ -406,29 +455,37 @@ impl TryFrom<&[u8]> for CodeUnits {
return Err(Self::Error::InvalidBytecode);
}

let units: Self = value
let units = value
.chunks_exact(2)
.map(CodeUnit::try_from)
.collect::<Result<_, _>>()?;
Ok(units)
.collect::<Result<Vec<_>, _>>()?;
Ok(units.into())
}
}

impl<const N: usize> From<[CodeUnit; N]> for CodeUnits {
fn from(value: [CodeUnit; N]) -> Self {
Self(UnsafeCell::new(Box::from(value)))
Self::from(Vec::from(value))
}
}

impl From<Vec<CodeUnit>> for CodeUnits {
fn from(value: Vec<CodeUnit>) -> Self {
Self(UnsafeCell::new(value.into_boxed_slice()))
let units = value.into_boxed_slice();
let adaptive_counters = (0..units.len())
.map(|_| AtomicU16::new(0))
.collect::<Vec<_>>()
.into_boxed_slice();
Self {
units: UnsafeCell::new(units),
adaptive_counters,
}
}
}

impl FromIterator<CodeUnit> for CodeUnits {
fn from_iter<T: IntoIterator<Item = CodeUnit>>(iter: T) -> Self {
Self(UnsafeCell::new(iter.into_iter().collect()))
Self::from(iter.into_iter().collect::<Vec<_>>())
}
}

Expand All @@ -439,7 +496,7 @@ impl Deref for CodeUnits {
// SAFETY: Shared references to the slice are valid even while replace_op
// may update individual opcode bytes — readers tolerate stale opcodes
// (they will re-read on the next iteration).
unsafe { &*self.0.get() }
unsafe { &*self.units.get() }
}
}

Expand All @@ -452,7 +509,7 @@ impl CodeUnits {
/// - `index` must be in bounds.
/// - `new_op` must have the same arg semantics as the original opcode.
pub unsafe fn replace_op(&self, index: usize, new_op: Instruction) {
let units = unsafe { &*self.0.get() };
let units = unsafe { &*self.units.get() };
let ptr = units.as_ptr().wrapping_add(index) as *const AtomicU8;
unsafe { &*ptr }.store(new_op.into(), Ordering::Release);
}
Expand All @@ -468,7 +525,7 @@ impl CodeUnits {
expected: Instruction,
new_op: Instruction,
) -> bool {
let units = unsafe { &*self.0.get() };
let units = unsafe { &*self.units.get() };
let ptr = units.as_ptr().wrapping_add(index) as *const AtomicU8;
unsafe { &*ptr }
.compare_exchange(
Expand All @@ -483,7 +540,7 @@ impl CodeUnits {
/// Atomically read the opcode at `index` with Acquire ordering.
/// Pairs with `replace_op` (Release) to ensure cache data visibility.
pub fn read_op(&self, index: usize) -> Instruction {
let units = unsafe { &*self.0.get() };
let units = unsafe { &*self.units.get() };
let ptr = units.as_ptr().wrapping_add(index) as *const AtomicU8;
let byte = unsafe { &*ptr }.load(Ordering::Acquire);
// SAFETY: Only valid Instruction values are stored via replace_op/compare_exchange_op.
Expand All @@ -492,7 +549,7 @@ impl CodeUnits {

/// Atomically read the arg byte at `index` with Relaxed ordering.
pub fn read_arg(&self, index: usize) -> OpArgByte {
let units = unsafe { &*self.0.get() };
let units = unsafe { &*self.units.get() };
let ptr = units.as_ptr().wrapping_add(index) as *const u8;
let arg_ptr = unsafe { ptr.add(1) } as *const AtomicU8;
OpArgByte::from(unsafe { &*arg_ptr }.load(Ordering::Relaxed))
Expand All @@ -505,7 +562,7 @@ impl CodeUnits {
/// # Safety
/// - `index` must be in bounds and point to a CACHE entry.
pub unsafe fn write_cache_u16(&self, index: usize, value: u16) {
let units = unsafe { &*self.0.get() };
let units = unsafe { &*self.units.get() };
let ptr = units.as_ptr().wrapping_add(index) as *const AtomicU16;
unsafe { &*ptr }.store(value, Ordering::Relaxed);
}
Expand All @@ -516,7 +573,7 @@ impl CodeUnits {
/// # Panics
/// Panics if `index` is out of bounds.
pub fn read_cache_u16(&self, index: usize) -> u16 {
let units = unsafe { &*self.0.get() };
let units = unsafe { &*self.units.get() };
assert!(index < units.len(), "read_cache_u16: index out of bounds");
let ptr = units.as_ptr().wrapping_add(index) as *const AtomicU16;
unsafe { &*ptr }.load(Ordering::Relaxed)
Expand Down Expand Up @@ -564,25 +621,19 @@ impl CodeUnits {
lo | (hi << 32)
}

/// Read the adaptive counter from the CACHE entry's `arg` byte at `index`.
/// Read adaptive counter bits for instruction at `index`.
/// Uses Relaxed atomic load.
pub fn read_adaptive_counter(&self, index: usize) -> u8 {
let units = unsafe { &*self.0.get() };
let ptr = units.as_ptr().wrapping_add(index) as *const u8;
let arg_ptr = unsafe { ptr.add(1) } as *const AtomicU8;
unsafe { &*arg_ptr }.load(Ordering::Relaxed)
pub fn read_adaptive_counter(&self, index: usize) -> u16 {
self.adaptive_counters[index].load(Ordering::Relaxed)
}

/// Write the adaptive counter to the CACHE entry's `arg` byte at `index`.
/// Write adaptive counter bits for instruction at `index`.
/// Uses Relaxed atomic store.
///
/// # Safety
/// - `index` must be in bounds and point to a CACHE entry.
pub unsafe fn write_adaptive_counter(&self, index: usize, value: u8) {
let units = unsafe { &*self.0.get() };
let ptr = units.as_ptr().wrapping_add(index) as *const u8;
let arg_ptr = unsafe { ptr.add(1) } as *const AtomicU8;
unsafe { &*arg_ptr }.store(value, Ordering::Relaxed);
/// - `index` must be in bounds.
pub unsafe fn write_adaptive_counter(&self, index: usize, value: u16) {
self.adaptive_counters[index].store(value, Ordering::Relaxed);
}

/// Produce a clean copy of the bytecode suitable for serialization
Expand Down Expand Up @@ -611,7 +662,7 @@ impl CodeUnits {

/// Initialize adaptive warmup counters for all cacheable instructions.
/// Called lazily at RESUME (first execution of a code object).
/// Uses the `arg` byte of the first CACHE entry, preserving `op = Instruction::Cache`.
/// Counters are stored out-of-line to preserve `op = Instruction::Cache`.
/// All writes are atomic (Relaxed) to avoid data races with concurrent readers.
pub fn quicken(&self) {
let len = self.len();
Expand All @@ -625,8 +676,13 @@ impl CodeUnits {
if !op.is_instrumented() {
let cache_base = i + 1;
if cache_base < len {
let initial_counter = if matches!(op, Instruction::JumpBackward { .. }) {
JUMP_BACKWARD_INITIAL_VALUE
} else {
ADAPTIVE_WARMUP_VALUE
};
unsafe {
self.write_adaptive_counter(cache_base, ADAPTIVE_WARMUP_VALUE);
self.write_adaptive_counter(cache_base, initial_counter);
}
}
}
Expand Down
13 changes: 11 additions & 2 deletions crates/jit/src/instructions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -210,9 +210,18 @@ impl<'a, 'b> FunctionCompiler<'a, 'b> {
func_ref: FuncRef,
bytecode: &CodeObject<C>,
) -> Result<(), JitCompileError> {
// JIT should consume a stable instruction stream: de-specialized opcodes
// with zeroed CACHE entries, not runtime-mutated quickened code.
let clean_instructions: bytecode::CodeUnits = bytecode
.instructions
.original_bytes()
.as_slice()
.try_into()
.map_err(|_| JitCompileError::BadBytecode)?;

let mut label_targets = BTreeSet::new();
let mut target_arg_state = OpArgState::default();
for (offset, &raw_instr) in bytecode.instructions.iter().enumerate() {
for (offset, &raw_instr) in clean_instructions.iter().enumerate() {
let (instruction, arg) = target_arg_state.get(raw_instr);
if let Some(target) = Self::instruction_target(offset as u32, instruction, arg)? {
label_targets.insert(target);
Expand All @@ -223,7 +232,7 @@ impl<'a, 'b> FunctionCompiler<'a, 'b> {
// Track whether we have "returned" in the current block
let mut in_unreachable_code = false;

for (offset, &raw_instr) in bytecode.instructions.iter().enumerate() {
for (offset, &raw_instr) in clean_instructions.iter().enumerate() {
let label = Label(offset as u32);
let (instruction, arg) = arg_state.get(raw_instr);

Expand Down
27 changes: 27 additions & 0 deletions crates/vm/src/builtins/dict.rs
Original file line number Diff line number Diff line change
Expand Up @@ -668,6 +668,33 @@ impl Py<PyDict> {
}
}

/// Return a cached-entry hint for exact dict fast paths.
pub(crate) fn hint_for_key<K: DictKey + ?Sized>(
&self,
key: &K,
vm: &VirtualMachine,
) -> PyResult<Option<u16>> {
if self.exact_dict(vm) {
self.entries.hint_for_key(vm, key)
} else {
Ok(None)
}
}

/// Fast lookup using a cached entry index hint.
pub(crate) fn get_item_opt_hint<K: DictKey + ?Sized>(
&self,
key: &K,
hint: u16,
vm: &VirtualMachine,
) -> PyResult<Option<PyObjectRef>> {
if self.exact_dict(vm) {
self.entries.get_hint(vm, key, usize::from(hint))
} else {
self.get_item_opt(key, vm)
}
}

pub fn get_item<K: DictKey + ?Sized>(&self, key: &K, vm: &VirtualMachine) -> PyResult {
if self.exact_dict(vm) {
self.inner_getitem(key, vm)
Expand Down
19 changes: 16 additions & 3 deletions crates/vm/src/builtins/function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -727,10 +727,12 @@ impl PyFunction {

#[pygetset(setter)]
fn set___code__(&self, code: PyRef<PyCode>, vm: &VirtualMachine) {
#[cfg(feature = "jit")]
let mut jit_guard = self.jitted_code.lock();
self.code.swap_to_temporary_refs(code, vm);
#[cfg(feature = "jit")]
{
*self.jitted_code.lock() = None;
*jit_guard = None;
}
self.func_version.store(0, Relaxed);
}
Expand Down Expand Up @@ -968,15 +970,16 @@ impl PyFunction {
#[cfg(feature = "jit")]
#[pymethod]
fn __jit__(zelf: PyRef<Self>, vm: &VirtualMachine) -> PyResult<()> {
if zelf.jitted_code.lock().is_some() {
let mut jit_guard = zelf.jitted_code.lock();
if jit_guard.is_some() {
return Ok(());
}
let arg_types = jit::get_jit_arg_types(&zelf, vm)?;
let ret_type = jit::jit_ret_type(&zelf, vm)?;
let code: &Py<PyCode> = &zelf.code;
let compiled = rustpython_jit::compile(&code.code, &arg_types, ret_type)
.map_err(|err| jit::new_jit_error(err.to_string(), vm))?;
*zelf.jitted_code.lock() = Some(compiled);
*jit_guard = Some(compiled);
Ok(())
}
}
Expand Down Expand Up @@ -1149,6 +1152,16 @@ impl PyBoundMethod {
Self { object, function }
}

#[inline]
pub(crate) fn function_obj(&self) -> &PyObjectRef {
&self.function
}

#[inline]
pub(crate) fn self_obj(&self) -> &PyObjectRef {
&self.object
}

#[deprecated(note = "Use `Self::new(object, function).into_ref(ctx)` instead")]
pub fn new_ref(object: PyObjectRef, function: PyObjectRef, ctx: &Context) -> PyRef<Self> {
Self::new(object, function).into_ref(ctx)
Expand Down
8 changes: 7 additions & 1 deletion crates/vm/src/builtins/tuple.rs
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,13 @@ impl PyTuple {

fn _getitem(&self, needle: &PyObject, vm: &VirtualMachine) -> PyResult {
match SequenceIndex::try_from_borrowed_object(vm, needle, "tuple")? {
SequenceIndex::Int(i) => self.elements.getitem_by_index(vm, i),
SequenceIndex::Int(i) => {
let index = self
.elements
.wrap_index(i)
.ok_or_else(|| vm.new_index_error("tuple index out of range"))?;
Ok(self.elements[index].clone())
}
SequenceIndex::Slice(slice) => self
.elements
.getitem_by_slice(vm, slice)
Expand Down
Loading
Loading