diff --git a/Lib/test/test_pickle.py b/Lib/test/test_pickle.py index a09037034ad..c9d4a348448 100644 --- a/Lib/test/test_pickle.py +++ b/Lib/test/test_pickle.py @@ -85,10 +85,6 @@ def dumps(self, arg, proto=None, **kwargs): f.seek(0) return bytes(f.read()) - @unittest.expectedFailure # TODO: RUSTPYTHON - def test_bad_newobj_args(self): - return super().test_bad_newobj_args() - @unittest.expectedFailure # TODO: RUSTPYTHON def test_buffer_callback_error(self): return super().test_buffer_callback_error() diff --git a/crates/compiler-core/src/bytecode.rs b/crates/compiler-core/src/bytecode.rs index d3dda5090ab..90b58116164 100644 --- a/crates/compiler-core/src/bytecode.rs +++ b/crates/compiler-core/src/bytecode.rs @@ -349,9 +349,47 @@ pub struct CodeUnit { const _: () = assert!(mem::size_of::() == 2); /// Adaptive specialization: number of executions before attempting specialization. -pub const ADAPTIVE_WARMUP_VALUE: u8 = 50; -/// Adaptive specialization: backoff counter after de-optimization. -pub const ADAPTIVE_BACKOFF_VALUE: u8 = 250; +/// +/// Matches CPython's `_Py_BackoffCounter` encoding. +pub const ADAPTIVE_WARMUP_VALUE: u16 = adaptive_counter_bits(1, 1); +/// Adaptive specialization: cooldown counter after a successful specialization. +/// +/// Value/backoff = (52, 0), matching CPython's ADAPTIVE_COOLDOWN bits. +pub const ADAPTIVE_COOLDOWN_VALUE: u16 = adaptive_counter_bits(52, 0); +/// Initial JUMP_BACKWARD counter bits (value/backoff = 4095/12). +pub const JUMP_BACKWARD_INITIAL_VALUE: u16 = adaptive_counter_bits(4095, 12); + +const BACKOFF_BITS: u16 = 4; +const MAX_BACKOFF: u16 = 12; +const UNREACHABLE_BACKOFF: u16 = 15; + +/// Encode an adaptive counter as `(value << 4) | backoff`. +pub const fn adaptive_counter_bits(value: u16, backoff: u16) -> u16 { + (value << BACKOFF_BITS) | backoff +} + +/// True when the adaptive counter should trigger specialization. +#[inline] +pub const fn adaptive_counter_triggers(counter: u16) -> bool { + counter < UNREACHABLE_BACKOFF +} + +/// Decrement adaptive counter by one countdown step. +#[inline] +pub const fn advance_adaptive_counter(counter: u16) -> u16 { + counter.wrapping_sub(1 << BACKOFF_BITS) +} + +/// Reset adaptive counter with exponential backoff. +#[inline] +pub const fn adaptive_counter_backoff(counter: u16) -> u16 { + let backoff = counter & ((1 << BACKOFF_BITS) - 1); + if backoff < MAX_BACKOFF { + adaptive_counter_bits((1 << (backoff + 1)) - 1, backoff + 1) + } else { + adaptive_counter_bits((1 << MAX_BACKOFF) - 1, MAX_BACKOFF) + } +} impl CodeUnit { pub const fn new(op: Instruction, arg: OpArgByte) -> Self { @@ -370,12 +408,15 @@ impl TryFrom<&[u8]> for CodeUnit { } } -pub struct CodeUnits(UnsafeCell>); +pub struct CodeUnits { + units: UnsafeCell>, + adaptive_counters: Box<[AtomicU16]>, +} // SAFETY: All cache operations use atomic read/write instructions. // - replace_op / compare_exchange_op: AtomicU8 store/CAS (Release) // - cache read/write: AtomicU16 load/store (Relaxed) -// - adaptive counter: AtomicU8 load/store (Relaxed) +// - adaptive counter: AtomicU16 load/store (Relaxed) // Ordering is established by: // - replace_op (Release) ↔ dispatch loop read_op (Acquire) for cache data visibility // - tp_version_tag (Acquire) for descriptor pointer validity @@ -385,15 +426,23 @@ impl Clone for CodeUnits { fn clone(&self) -> Self { // SAFETY: No concurrent mutation during clone — cloning is only done // during code object construction or marshaling, not while instrumented. - let inner = unsafe { &*self.0.get() }; - Self(UnsafeCell::new(inner.clone())) + let units = unsafe { &*self.units.get() }.clone(); + let adaptive_counters = self + .adaptive_counters + .iter() + .map(|c| AtomicU16::new(c.load(Ordering::Relaxed))) + .collect(); + Self { + units: UnsafeCell::new(units), + adaptive_counters, + } } } impl fmt::Debug for CodeUnits { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { // SAFETY: Debug formatting doesn't race with replace_op - let inner = unsafe { &*self.0.get() }; + let inner = unsafe { &*self.units.get() }; f.debug_tuple("CodeUnits").field(inner).finish() } } @@ -406,29 +455,37 @@ impl TryFrom<&[u8]> for CodeUnits { return Err(Self::Error::InvalidBytecode); } - let units: Self = value + let units = value .chunks_exact(2) .map(CodeUnit::try_from) - .collect::>()?; - Ok(units) + .collect::, _>>()?; + Ok(units.into()) } } impl From<[CodeUnit; N]> for CodeUnits { fn from(value: [CodeUnit; N]) -> Self { - Self(UnsafeCell::new(Box::from(value))) + Self::from(Vec::from(value)) } } impl From> for CodeUnits { fn from(value: Vec) -> Self { - Self(UnsafeCell::new(value.into_boxed_slice())) + let units = value.into_boxed_slice(); + let adaptive_counters = (0..units.len()) + .map(|_| AtomicU16::new(0)) + .collect::>() + .into_boxed_slice(); + Self { + units: UnsafeCell::new(units), + adaptive_counters, + } } } impl FromIterator for CodeUnits { fn from_iter>(iter: T) -> Self { - Self(UnsafeCell::new(iter.into_iter().collect())) + Self::from(iter.into_iter().collect::>()) } } @@ -439,7 +496,7 @@ impl Deref for CodeUnits { // SAFETY: Shared references to the slice are valid even while replace_op // may update individual opcode bytes — readers tolerate stale opcodes // (they will re-read on the next iteration). - unsafe { &*self.0.get() } + unsafe { &*self.units.get() } } } @@ -452,7 +509,7 @@ impl CodeUnits { /// - `index` must be in bounds. /// - `new_op` must have the same arg semantics as the original opcode. pub unsafe fn replace_op(&self, index: usize, new_op: Instruction) { - let units = unsafe { &*self.0.get() }; + let units = unsafe { &*self.units.get() }; let ptr = units.as_ptr().wrapping_add(index) as *const AtomicU8; unsafe { &*ptr }.store(new_op.into(), Ordering::Release); } @@ -468,7 +525,7 @@ impl CodeUnits { expected: Instruction, new_op: Instruction, ) -> bool { - let units = unsafe { &*self.0.get() }; + let units = unsafe { &*self.units.get() }; let ptr = units.as_ptr().wrapping_add(index) as *const AtomicU8; unsafe { &*ptr } .compare_exchange( @@ -483,7 +540,7 @@ impl CodeUnits { /// Atomically read the opcode at `index` with Acquire ordering. /// Pairs with `replace_op` (Release) to ensure cache data visibility. pub fn read_op(&self, index: usize) -> Instruction { - let units = unsafe { &*self.0.get() }; + let units = unsafe { &*self.units.get() }; let ptr = units.as_ptr().wrapping_add(index) as *const AtomicU8; let byte = unsafe { &*ptr }.load(Ordering::Acquire); // SAFETY: Only valid Instruction values are stored via replace_op/compare_exchange_op. @@ -492,7 +549,7 @@ impl CodeUnits { /// Atomically read the arg byte at `index` with Relaxed ordering. pub fn read_arg(&self, index: usize) -> OpArgByte { - let units = unsafe { &*self.0.get() }; + let units = unsafe { &*self.units.get() }; let ptr = units.as_ptr().wrapping_add(index) as *const u8; let arg_ptr = unsafe { ptr.add(1) } as *const AtomicU8; OpArgByte::from(unsafe { &*arg_ptr }.load(Ordering::Relaxed)) @@ -505,7 +562,7 @@ impl CodeUnits { /// # Safety /// - `index` must be in bounds and point to a CACHE entry. pub unsafe fn write_cache_u16(&self, index: usize, value: u16) { - let units = unsafe { &*self.0.get() }; + let units = unsafe { &*self.units.get() }; let ptr = units.as_ptr().wrapping_add(index) as *const AtomicU16; unsafe { &*ptr }.store(value, Ordering::Relaxed); } @@ -516,7 +573,7 @@ impl CodeUnits { /// # Panics /// Panics if `index` is out of bounds. pub fn read_cache_u16(&self, index: usize) -> u16 { - let units = unsafe { &*self.0.get() }; + let units = unsafe { &*self.units.get() }; assert!(index < units.len(), "read_cache_u16: index out of bounds"); let ptr = units.as_ptr().wrapping_add(index) as *const AtomicU16; unsafe { &*ptr }.load(Ordering::Relaxed) @@ -564,25 +621,19 @@ impl CodeUnits { lo | (hi << 32) } - /// Read the adaptive counter from the CACHE entry's `arg` byte at `index`. + /// Read adaptive counter bits for instruction at `index`. /// Uses Relaxed atomic load. - pub fn read_adaptive_counter(&self, index: usize) -> u8 { - let units = unsafe { &*self.0.get() }; - let ptr = units.as_ptr().wrapping_add(index) as *const u8; - let arg_ptr = unsafe { ptr.add(1) } as *const AtomicU8; - unsafe { &*arg_ptr }.load(Ordering::Relaxed) + pub fn read_adaptive_counter(&self, index: usize) -> u16 { + self.adaptive_counters[index].load(Ordering::Relaxed) } - /// Write the adaptive counter to the CACHE entry's `arg` byte at `index`. + /// Write adaptive counter bits for instruction at `index`. /// Uses Relaxed atomic store. /// /// # Safety - /// - `index` must be in bounds and point to a CACHE entry. - pub unsafe fn write_adaptive_counter(&self, index: usize, value: u8) { - let units = unsafe { &*self.0.get() }; - let ptr = units.as_ptr().wrapping_add(index) as *const u8; - let arg_ptr = unsafe { ptr.add(1) } as *const AtomicU8; - unsafe { &*arg_ptr }.store(value, Ordering::Relaxed); + /// - `index` must be in bounds. + pub unsafe fn write_adaptive_counter(&self, index: usize, value: u16) { + self.adaptive_counters[index].store(value, Ordering::Relaxed); } /// Produce a clean copy of the bytecode suitable for serialization @@ -611,7 +662,7 @@ impl CodeUnits { /// Initialize adaptive warmup counters for all cacheable instructions. /// Called lazily at RESUME (first execution of a code object). - /// Uses the `arg` byte of the first CACHE entry, preserving `op = Instruction::Cache`. + /// Counters are stored out-of-line to preserve `op = Instruction::Cache`. /// All writes are atomic (Relaxed) to avoid data races with concurrent readers. pub fn quicken(&self) { let len = self.len(); @@ -625,8 +676,13 @@ impl CodeUnits { if !op.is_instrumented() { let cache_base = i + 1; if cache_base < len { + let initial_counter = if matches!(op, Instruction::JumpBackward { .. }) { + JUMP_BACKWARD_INITIAL_VALUE + } else { + ADAPTIVE_WARMUP_VALUE + }; unsafe { - self.write_adaptive_counter(cache_base, ADAPTIVE_WARMUP_VALUE); + self.write_adaptive_counter(cache_base, initial_counter); } } } diff --git a/crates/jit/src/instructions.rs b/crates/jit/src/instructions.rs index 5bf4057b340..9d8be5bc6e3 100644 --- a/crates/jit/src/instructions.rs +++ b/crates/jit/src/instructions.rs @@ -210,9 +210,18 @@ impl<'a, 'b> FunctionCompiler<'a, 'b> { func_ref: FuncRef, bytecode: &CodeObject, ) -> Result<(), JitCompileError> { + // JIT should consume a stable instruction stream: de-specialized opcodes + // with zeroed CACHE entries, not runtime-mutated quickened code. + let clean_instructions: bytecode::CodeUnits = bytecode + .instructions + .original_bytes() + .as_slice() + .try_into() + .map_err(|_| JitCompileError::BadBytecode)?; + let mut label_targets = BTreeSet::new(); let mut target_arg_state = OpArgState::default(); - for (offset, &raw_instr) in bytecode.instructions.iter().enumerate() { + for (offset, &raw_instr) in clean_instructions.iter().enumerate() { let (instruction, arg) = target_arg_state.get(raw_instr); if let Some(target) = Self::instruction_target(offset as u32, instruction, arg)? { label_targets.insert(target); @@ -223,7 +232,7 @@ impl<'a, 'b> FunctionCompiler<'a, 'b> { // Track whether we have "returned" in the current block let mut in_unreachable_code = false; - for (offset, &raw_instr) in bytecode.instructions.iter().enumerate() { + for (offset, &raw_instr) in clean_instructions.iter().enumerate() { let label = Label(offset as u32); let (instruction, arg) = arg_state.get(raw_instr); diff --git a/crates/vm/src/builtins/dict.rs b/crates/vm/src/builtins/dict.rs index c630fc25dff..7ba173fe7e4 100644 --- a/crates/vm/src/builtins/dict.rs +++ b/crates/vm/src/builtins/dict.rs @@ -668,6 +668,33 @@ impl Py { } } + /// Return a cached-entry hint for exact dict fast paths. + pub(crate) fn hint_for_key( + &self, + key: &K, + vm: &VirtualMachine, + ) -> PyResult> { + if self.exact_dict(vm) { + self.entries.hint_for_key(vm, key) + } else { + Ok(None) + } + } + + /// Fast lookup using a cached entry index hint. + pub(crate) fn get_item_opt_hint( + &self, + key: &K, + hint: u16, + vm: &VirtualMachine, + ) -> PyResult> { + if self.exact_dict(vm) { + self.entries.get_hint(vm, key, usize::from(hint)) + } else { + self.get_item_opt(key, vm) + } + } + pub fn get_item(&self, key: &K, vm: &VirtualMachine) -> PyResult { if self.exact_dict(vm) { self.inner_getitem(key, vm) diff --git a/crates/vm/src/builtins/function.rs b/crates/vm/src/builtins/function.rs index 1316dd7b725..03663d22e5d 100644 --- a/crates/vm/src/builtins/function.rs +++ b/crates/vm/src/builtins/function.rs @@ -727,10 +727,12 @@ impl PyFunction { #[pygetset(setter)] fn set___code__(&self, code: PyRef, vm: &VirtualMachine) { + #[cfg(feature = "jit")] + let mut jit_guard = self.jitted_code.lock(); self.code.swap_to_temporary_refs(code, vm); #[cfg(feature = "jit")] { - *self.jitted_code.lock() = None; + *jit_guard = None; } self.func_version.store(0, Relaxed); } @@ -968,7 +970,8 @@ impl PyFunction { #[cfg(feature = "jit")] #[pymethod] fn __jit__(zelf: PyRef, vm: &VirtualMachine) -> PyResult<()> { - if zelf.jitted_code.lock().is_some() { + let mut jit_guard = zelf.jitted_code.lock(); + if jit_guard.is_some() { return Ok(()); } let arg_types = jit::get_jit_arg_types(&zelf, vm)?; @@ -976,7 +979,7 @@ impl PyFunction { let code: &Py = &zelf.code; let compiled = rustpython_jit::compile(&code.code, &arg_types, ret_type) .map_err(|err| jit::new_jit_error(err.to_string(), vm))?; - *zelf.jitted_code.lock() = Some(compiled); + *jit_guard = Some(compiled); Ok(()) } } @@ -1149,6 +1152,16 @@ impl PyBoundMethod { Self { object, function } } + #[inline] + pub(crate) fn function_obj(&self) -> &PyObjectRef { + &self.function + } + + #[inline] + pub(crate) fn self_obj(&self) -> &PyObjectRef { + &self.object + } + #[deprecated(note = "Use `Self::new(object, function).into_ref(ctx)` instead")] pub fn new_ref(object: PyObjectRef, function: PyObjectRef, ctx: &Context) -> PyRef { Self::new(object, function).into_ref(ctx) diff --git a/crates/vm/src/builtins/tuple.rs b/crates/vm/src/builtins/tuple.rs index b7ed066f1d1..03f88f1b5fe 100644 --- a/crates/vm/src/builtins/tuple.rs +++ b/crates/vm/src/builtins/tuple.rs @@ -327,7 +327,13 @@ impl PyTuple { fn _getitem(&self, needle: &PyObject, vm: &VirtualMachine) -> PyResult { match SequenceIndex::try_from_borrowed_object(vm, needle, "tuple")? { - SequenceIndex::Int(i) => self.elements.getitem_by_index(vm, i), + SequenceIndex::Int(i) => { + let index = self + .elements + .wrap_index(i) + .ok_or_else(|| vm.new_index_error("tuple index out of range"))?; + Ok(self.elements[index].clone()) + } SequenceIndex::Slice(slice) => self .elements .getitem_by_slice(vm, slice) diff --git a/crates/vm/src/dict_inner.rs b/crates/vm/src/dict_inner.rs index 2a77ea7d991..763fa856319 100644 --- a/crates/vm/src/dict_inner.rs +++ b/crates/vm/src/dict_inner.rs @@ -337,6 +337,50 @@ impl Dict { self._get_inner(vm, key, hash) } + /// Return a stable entry hint for `key` if present. + /// + /// The hint is the internal entry index and can be used with + /// [`Self::get_hint`]. It is invalidated by dict mutations. + pub fn hint_for_key( + &self, + vm: &VirtualMachine, + key: &K, + ) -> PyResult> { + let hash = key.key_hash(vm)?; + let (entry, _) = self.lookup(vm, key, hash, None)?; + let Some(index) = entry.index() else { + return Ok(None); + }; + Ok(u16::try_from(index).ok()) + } + + /// Fast path lookup using a cached entry index (`hint`). + /// + /// Returns `None` if the hint is stale or the key no longer matches. + pub fn get_hint( + &self, + vm: &VirtualMachine, + key: &K, + hint: usize, + ) -> PyResult> { + let (entry_key, entry_value) = { + let inner = self.read(); + let Some(Some(entry)) = inner.entries.get(hint) else { + return Ok(None); + }; + if key.key_is(&entry.key) { + return Ok(Some(entry.value.clone())); + } + (entry.key.clone(), entry.value.clone()) + }; + // key_eq may run Python __eq__, so must be outside the lock. + if key.key_eq(vm, &entry_key)? { + Ok(Some(entry_value)) + } else { + Ok(None) + } + } + fn _get_inner( &self, vm: &VirtualMachine, diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 29626d104da..8032cf2802d 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -12,13 +12,13 @@ use crate::{ builtin_func::PyNativeFunction, descriptor::{MemberGetter, PyMemberDescriptor, PyMethodDescriptor}, frame::stack_analysis, - function::{PyCell, PyCellRef, PyFunction, vectorcall_function}, + function::{PyBoundMethod, PyCell, PyCellRef, PyFunction, vectorcall_function}, list::PyListIterator, range::PyRangeIterator, tuple::{PyTuple, PyTupleIterator, PyTupleRef}, }, bytecode::{ - self, ADAPTIVE_BACKOFF_VALUE, Arg, Instruction, LoadAttr, LoadSuperAttr, SpecialMethod, + self, ADAPTIVE_COOLDOWN_VALUE, Arg, Instruction, LoadAttr, LoadSuperAttr, SpecialMethod, }, convert::{ToPyObject, ToPyResult}, coroutine::Coro, @@ -1641,7 +1641,6 @@ impl ExecutingFrame<'_> { self.push_value(result.to_pyobject(vm)); Ok(None) } else { - self.deoptimize(Instruction::BinaryOp { op: Arg::marker() }); self.execute_bin_op(vm, bytecode::BinaryOperator::InplaceAdd) } } @@ -2169,6 +2168,18 @@ impl ExecutingFrame<'_> { Ok(None) } Instruction::JumpBackward { .. } => { + // CPython rewrites JUMP_BACKWARD to JUMP_BACKWARD_NO_JIT + // when JIT is unavailable. + let instr_idx = self.lasti() as usize - 1; + unsafe { + self.code + .instructions + .replace_op(instr_idx, Instruction::JumpBackwardNoJit); + } + self.jump_relative_backward(u32::from(arg), 1); + Ok(None) + } + Instruction::JumpBackwardJit | Instruction::JumpBackwardNoJit => { self.jump_relative_backward(u32::from(arg), 1); Ok(None) } @@ -2303,6 +2314,18 @@ impl ExecutingFrame<'_> { } Instruction::LoadConst { consti: idx } => { self.push_value(self.code.constants[idx.get(arg) as usize].clone().into()); + // Mirror CPython's LOAD_CONST family transition. RustPython does + // not currently distinguish immortal constants at runtime. + let instr_idx = self.lasti() as usize - 1; + unsafe { + self.code + .instructions + .replace_op(instr_idx, Instruction::LoadConstMortal); + } + Ok(None) + } + Instruction::LoadConstMortal | Instruction::LoadConstImmortal => { + self.push_value(self.code.constants[u32::from(arg) as usize].clone().into()); Ok(None) } Instruction::LoadCommonConstant { idx } => { @@ -2805,7 +2828,7 @@ impl ExecutingFrame<'_> { Ok(None) } Instruction::RaiseVarargs { argc: kind } => self.execute_raise(vm, kind.get(arg)), - Instruction::Resume { .. } => { + Instruction::Resume { .. } | Instruction::ResumeCheck => { // Lazy quickening: initialize adaptive counters on first execution if !self.code.quickened.swap(true, atomic::Ordering::Relaxed) { self.code.instructions.quicken(); @@ -3133,21 +3156,6 @@ impl ExecutingFrame<'_> { } } } - { - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; - unsafe { - self.code.instructions.replace_op( - instr_idx, - Instruction::Send { - delta: Arg::marker(), - }, - ); - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); - } - } match self._send(receiver, val, vm)? { PyIterReturn::Return(value) => { self.push_value(value); @@ -3268,6 +3276,35 @@ impl ExecutingFrame<'_> { self.load_attr_slow(vm, oparg) } } + Instruction::LoadAttrMethodLazyDict => { + let oparg = LoadAttr::new(u32::from(arg)); + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + + let owner = self.top_value(); + let type_version = self.code.instructions.read_cache_u32(cache_base + 1); + + if type_version != 0 + && owner.class().tp_version_tag.load(Acquire) == type_version + && owner.dict().is_none() + { + let descr_ptr = self.code.instructions.read_cache_u64(cache_base + 5); + let func = unsafe { &*(descr_ptr as *const PyObject) }.to_owned(); + let owner = self.pop_value(); + self.push_value(func); + self.push_value(owner); + Ok(None) + } else { + self.deoptimize_at( + Instruction::LoadAttr { + namei: Arg::marker(), + }, + instr_idx, + cache_base, + ); + self.load_attr_slow(vm, oparg) + } + } Instruction::LoadAttrMethodWithValues => { let oparg = LoadAttr::new(u32::from(arg)); let instr_idx = self.lasti() as usize - 1; @@ -3292,9 +3329,14 @@ impl ExecutingFrame<'_> { namei: Arg::marker(), }, ); - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code + .instructions + .read_adaptive_counter(cache_base), + ), + ); } return self.load_attr_slow(vm, oparg); } @@ -3352,6 +3394,39 @@ impl ExecutingFrame<'_> { ); self.load_attr_slow(vm, oparg) } + Instruction::LoadAttrWithHint => { + let oparg = LoadAttr::new(u32::from(arg)); + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let attr_name = self.code.names[oparg.name_idx() as usize]; + + let owner = self.top_value(); + let type_version = self.code.instructions.read_cache_u32(cache_base + 1); + + if type_version != 0 + && owner.class().tp_version_tag.load(Acquire) == type_version + && let Some(dict) = owner.dict() + && let Some(value) = dict.get_item_opt(attr_name, vm)? + { + self.pop_value(); + if oparg.is_method() { + self.push_value(value); + self.push_value_opt(None); + } else { + self.push_value(value); + } + return Ok(None); + } + + self.deoptimize_at( + Instruction::LoadAttr { + namei: Arg::marker(), + }, + instr_idx, + cache_base, + ); + self.load_attr_slow(vm, oparg) + } Instruction::LoadAttrModule => { let oparg = LoadAttr::new(u32::from(arg)); let instr_idx = self.lasti() as usize - 1; @@ -3383,9 +3458,12 @@ impl ExecutingFrame<'_> { namei: Arg::marker(), }, ); - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } self.load_attr_slow(vm, oparg) } @@ -3417,9 +3495,12 @@ impl ExecutingFrame<'_> { namei: Arg::marker(), }, ); - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } self.load_attr_slow(vm, oparg) } @@ -3465,9 +3546,12 @@ impl ExecutingFrame<'_> { namei: Arg::marker(), }, ); - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } self.load_attr_slow(vm, oparg) } @@ -3501,10 +3585,55 @@ impl ExecutingFrame<'_> { namei: Arg::marker(), }, ); - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); + } + self.load_attr_slow(vm, oparg) + } + Instruction::LoadAttrClassWithMetaclassCheck => { + let oparg = LoadAttr::new(u32::from(arg)); + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + + let owner = self.top_value(); + let type_version = self.code.instructions.read_cache_u32(cache_base + 1); + let metaclass_version = self.code.instructions.read_cache_u32(cache_base + 3); + + if type_version != 0 + && metaclass_version != 0 + && let Some(owner_type) = owner.downcast_ref::() + && owner_type.tp_version_tag.load(Acquire) == type_version + && owner.class().tp_version_tag.load(Acquire) == metaclass_version + { + let descr_ptr = self.code.instructions.read_cache_u64(cache_base + 5); + let attr = unsafe { &*(descr_ptr as *const PyObject) }.to_owned(); + self.pop_value(); + if oparg.is_method() { + self.push_value(attr); + self.push_value_opt(None); + } else { + self.push_value(attr); + } + return Ok(None); } + self.deoptimize_at( + Instruction::LoadAttr { + namei: Arg::marker(), + }, + instr_idx, + cache_base, + ); + self.load_attr_slow(vm, oparg) + } + Instruction::LoadAttrGetattributeOverridden => { + let oparg = LoadAttr::new(u32::from(arg)); + self.deoptimize(Instruction::LoadAttr { + namei: Arg::marker(), + }); self.load_attr_slow(vm, oparg) } Instruction::LoadAttrSlot => { @@ -3537,9 +3666,12 @@ impl ExecutingFrame<'_> { namei: Arg::marker(), }, ); - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } self.load_attr_slow(vm, oparg) } @@ -3572,9 +3704,12 @@ impl ExecutingFrame<'_> { namei: Arg::marker(), }, ); - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } self.load_attr_slow(vm, oparg) } @@ -3595,13 +3730,25 @@ impl ExecutingFrame<'_> { dict.set_item(attr_name, value, vm)?; return Ok(None); } - self.deoptimize_at( - Instruction::StoreAttr { - namei: Arg::marker(), - }, - instr_idx, - cache_base, - ); + self.store_attr(vm, attr_idx) + } + Instruction::StoreAttrWithHint => { + let attr_idx = u32::from(arg); + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let attr_name = self.code.names[attr_idx as usize]; + let owner = self.top_value(); + let type_version = self.code.instructions.read_cache_u32(cache_base + 1); + + if type_version != 0 + && owner.class().tp_version_tag.load(Acquire) == type_version + && let Some(dict) = owner.dict() + { + self.pop_value(); // owner + let value = self.pop_value(); + dict.set_item(attr_name, value, vm)?; + return Ok(None); + } self.store_attr(vm, attr_idx) } Instruction::StoreAttrSlot => { @@ -3621,19 +3768,7 @@ impl ExecutingFrame<'_> { owner.set_slot(slot_offset, Some(value)); return Ok(None); } - // Deoptimize let attr_idx = u32::from(arg); - unsafe { - self.code.instructions.replace_op( - instr_idx, - Instruction::StoreAttr { - namei: Arg::marker(), - }, - ); - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); - } self.store_attr(vm, attr_idx) } Instruction::StoreSubscrListInt => { @@ -3651,10 +3786,8 @@ impl ExecutingFrame<'_> { return Ok(None); } drop(vec); - self.deoptimize(Instruction::StoreSubscr); return Err(vm.new_index_error("list assignment index out of range")); } - self.deoptimize(Instruction::StoreSubscr); obj.set_item(&*idx, value, vm)?; Ok(None) } @@ -3667,7 +3800,6 @@ impl ExecutingFrame<'_> { dict.set_item(&*idx, value, vm)?; Ok(None) } else { - self.deoptimize(Instruction::StoreSubscr); obj.set_item(&*idx, value, vm)?; Ok(None) } @@ -3704,10 +3836,14 @@ impl ExecutingFrame<'_> { self.push_value(result.to_pyobject(vm)); Ok(None) } else { - self.deoptimize(Instruction::BinaryOp { op: Arg::marker() }); self.execute_bin_op(vm, bytecode::BinaryOperator::Add) } } + Instruction::BinaryOpSubscrGetitem | Instruction::BinaryOpExtend => { + let op = bytecode::BinaryOperator::try_from(u32::from(arg)) + .unwrap_or(bytecode::BinaryOperator::Subscr); + self.execute_bin_op(vm, op) + } Instruction::BinaryOpSubscrListInt => { let b = self.top_value(); let a = self.nth_value(1); @@ -3726,10 +3862,8 @@ impl ExecutingFrame<'_> { return Ok(None); } drop(vec); - self.deoptimize(Instruction::BinaryOp { op: Arg::marker() }); return Err(vm.new_index_error("list index out of range")); } - self.deoptimize(Instruction::BinaryOp { op: Arg::marker() }); self.execute_bin_op(vm, bytecode::BinaryOperator::Subscr) } Instruction::BinaryOpSubscrTupleInt => { @@ -3748,10 +3882,8 @@ impl ExecutingFrame<'_> { self.push_value(value); return Ok(None); } - self.deoptimize(Instruction::BinaryOp { op: Arg::marker() }); return Err(vm.new_index_error("tuple index out of range")); } - self.deoptimize(Instruction::BinaryOp { op: Arg::marker() }); self.execute_bin_op(vm, bytecode::BinaryOperator::Subscr) } Instruction::BinaryOpSubscrDict => { @@ -3766,18 +3898,15 @@ impl ExecutingFrame<'_> { return Ok(None); } Ok(None) => { - self.deoptimize(Instruction::BinaryOp { op: Arg::marker() }); let key = self.pop_value(); self.pop_value(); return Err(vm.new_key_error(key)); } Err(e) => { - self.deoptimize(Instruction::BinaryOp { op: Arg::marker() }); return Err(e); } } } - self.deoptimize(Instruction::BinaryOp { op: Arg::marker() }); self.execute_bin_op(vm, bytecode::BinaryOperator::Subscr) } Instruction::BinaryOpSubscrStrInt => { @@ -3796,12 +3925,10 @@ impl ExecutingFrame<'_> { return Ok(None); } Err(e) => { - self.deoptimize(Instruction::BinaryOp { op: Arg::marker() }); return Err(e); } } } - self.deoptimize(Instruction::BinaryOp { op: Arg::marker() }); self.execute_bin_op(vm, bytecode::BinaryOperator::Subscr) } Instruction::BinaryOpSubscrListSlice => { @@ -3816,7 +3943,6 @@ impl ExecutingFrame<'_> { self.push_value(result); return Ok(None); } - self.deoptimize(Instruction::BinaryOp { op: Arg::marker() }); self.execute_bin_op(vm, bytecode::BinaryOperator::Subscr) } Instruction::CallPyExactArgs => { @@ -3830,26 +3956,22 @@ impl ExecutingFrame<'_> { && func.func_version() == cached_version && cached_version != 0 { - let args: Vec = self.pop_multiple(nargs as usize).collect(); - let _null = self.pop_value_opt(); // self_or_null (NULL) + let pos_args: Vec = self.pop_multiple(nargs as usize).collect(); + let self_or_null = self.pop_value_opt(); let callable = self.pop_value(); let func = callable.downcast_ref::().unwrap(); + let args = if let Some(self_val) = self_or_null { + let mut all_args = Vec::with_capacity(pos_args.len() + 1); + all_args.push(self_val); + all_args.extend(pos_args); + all_args + } else { + pos_args + }; let result = func.invoke_exact_args(args, vm)?; self.push_value(result); Ok(None) } else { - // Deoptimize - unsafe { - self.code.instructions.replace_op( - instr_idx, - Instruction::Call { - argc: Arg::marker(), - }, - ); - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); - } let args = self.collect_positional_args(nargs); self.execute_call(args, vm) } @@ -3859,37 +3981,38 @@ impl ExecutingFrame<'_> { let cache_base = instr_idx + 1; let cached_version = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); - // Stack: [callable, self_val, arg1, ..., argN] + // Stack: [callable, self_or_null(NULL), arg1, ..., argN] + let stack_len = self.localsplus.stack_len(); + let self_or_null_is_some = self + .localsplus + .stack_index(stack_len - nargs as usize - 1) + .is_some(); let callable = self.nth_value(nargs + 1); - if let Some(func) = callable.downcast_ref::() - && func.func_version() == cached_version - && cached_version != 0 + if !self_or_null_is_some + && let Some(bound_method) = callable.downcast_ref::() { - let pos_args: Vec = self.pop_multiple(nargs as usize).collect(); - let self_val = self.pop_value(); - let callable = self.pop_value(); - let func = callable.downcast_ref::().unwrap(); - let mut all_args = Vec::with_capacity(pos_args.len() + 1); - all_args.push(self_val); - all_args.extend(pos_args); - let result = func.invoke_exact_args(all_args, vm)?; - self.push_value(result); - Ok(None) - } else { - // Deoptimize - unsafe { - self.code.instructions.replace_op( - instr_idx, - Instruction::Call { - argc: Arg::marker(), - }, - ); - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + let bound_function = bound_method.function_obj().clone(); + let bound_self = bound_method.self_obj().clone(); + if let Some(func) = bound_function.downcast_ref::() + && func.func_version() == cached_version + && cached_version != 0 + { + let pos_args: Vec = + self.pop_multiple(nargs as usize).collect(); + self.pop_value_opt(); // null (self_or_null) + self.pop_value(); // callable (bound method) + let mut all_args = Vec::with_capacity(pos_args.len() + 1); + all_args.push(bound_self); + all_args.extend(pos_args); + let result = func.invoke_exact_args(all_args, vm)?; + self.push_value(result); + return Ok(None); } let args = self.collect_positional_args(nargs); self.execute_call(args, vm) + } else { + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) } } Instruction::CallLen => { @@ -3900,17 +4023,20 @@ impl ExecutingFrame<'_> { if nargs == 1 { // Stack: [callable, null, arg] let obj = self.pop_value(); // arg - let _null = self.pop_value_opt(); + let null = self.pop_value_opt(); let callable = self.pop_value(); let callable_tag = &*callable as *const PyObject as u32; - if cached_tag == callable_tag { + let is_len_callable = callable + .downcast_ref_if_exact::(vm) + .is_some_and(|native| native.zelf.is_none() && native.value.name == "len"); + if null.is_none() && cached_tag == callable_tag && is_len_callable { let len = obj.length(vm)?; self.push_value(vm.ctx.new_int(len).into()); return Ok(None); } // Guard failed — re-push and fallback self.push_value(callable); - self.push_value_opt(_null); + self.push_value_opt(null); self.push_value(obj); } self.deoptimize(Instruction::Call { @@ -3924,23 +4050,34 @@ impl ExecutingFrame<'_> { let cache_base = instr_idx + 1; let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); - if nargs == 2 { - // Stack: [callable, null, obj, class_info] - let class_info = self.pop_value(); - let obj = self.pop_value(); - let _null = self.pop_value_opt(); - let callable = self.pop_value(); - let callable_tag = &*callable as *const PyObject as u32; - if cached_tag == callable_tag { - let result = obj.is_instance(&class_info, vm)?; + let stack_len = self.localsplus.stack_len(); + let self_or_null_is_some = self + .localsplus + .stack_index(stack_len - nargs as usize - 1) + .is_some(); + let effective_nargs = nargs + u32::from(self_or_null_is_some); + if effective_nargs == 2 { + let callable = self.nth_value(nargs + 1); + let callable_tag = callable as *const PyObject as u32; + let is_isinstance_callable = callable + .downcast_ref_if_exact::(vm) + .is_some_and(|native| { + native.zelf.is_none() && native.value.name == "isinstance" + }); + if cached_tag == callable_tag && is_isinstance_callable { + let nargs_usize = nargs as usize; + let pos_args: Vec = self.pop_multiple(nargs_usize).collect(); + let self_or_null = self.pop_value_opt(); + self.pop_value(); // callable + let mut all_args = Vec::with_capacity(2); + if let Some(self_val) = self_or_null { + all_args.push(self_val); + } + all_args.extend(pos_args); + let result = all_args[0].is_instance(&all_args[1], vm)?; self.push_value(vm.ctx.new_bool(result).into()); return Ok(None); } - // Guard failed — re-push and fallback - self.push_value(callable); - self.push_value_opt(_null); - self.push_value(obj); - self.push_value(class_info); } self.deoptimize(Instruction::Call { argc: Arg::marker(), @@ -3949,24 +4086,20 @@ impl ExecutingFrame<'_> { self.execute_call(args, vm) } Instruction::CallType1 => { - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; - let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); if nargs == 1 { // Stack: [callable, null, arg] let obj = self.pop_value(); - let _null = self.pop_value_opt(); + let null = self.pop_value_opt(); let callable = self.pop_value(); - let callable_tag = &*callable as *const PyObject as u32; - if cached_tag == callable_tag { + if null.is_none() && callable.is(vm.ctx.types.type_type.as_object()) { let tp = obj.class().to_owned().into(); self.push_value(tp); return Ok(None); } // Guard failed — re-push and fallback self.push_value(callable); - self.push_value_opt(_null); + self.push_value_opt(null); self.push_value(obj); } self.deoptimize(Instruction::Call { @@ -3976,22 +4109,18 @@ impl ExecutingFrame<'_> { self.execute_call(args, vm) } Instruction::CallStr1 => { - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; - let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); if nargs == 1 { let obj = self.pop_value(); - let _null = self.pop_value_opt(); + let null = self.pop_value_opt(); let callable = self.pop_value(); - let callable_tag = &*callable as *const PyObject as u32; - if cached_tag == callable_tag { + if null.is_none() && callable.is(vm.ctx.types.str_type.as_object()) { let result = obj.str(vm)?; self.push_value(result.into()); return Ok(None); } self.push_value(callable); - self.push_value_opt(_null); + self.push_value_opt(null); self.push_value(obj); } self.deoptimize(Instruction::Call { @@ -4001,16 +4130,12 @@ impl ExecutingFrame<'_> { self.execute_call(args, vm) } Instruction::CallTuple1 => { - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; - let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); if nargs == 1 { let obj = self.pop_value(); - let _null = self.pop_value_opt(); + let null = self.pop_value_opt(); let callable = self.pop_value(); - let callable_tag = &*callable as *const PyObject as u32; - if cached_tag == callable_tag { + if null.is_none() && callable.is(vm.ctx.types.tuple_type.as_object()) { // tuple(x) returns x as-is when x is already an exact tuple if let Ok(tuple) = obj.clone().downcast_exact::(vm) { self.push_value(tuple.into_pyref().into()); @@ -4021,7 +4146,7 @@ impl ExecutingFrame<'_> { return Ok(None); } self.push_value(callable); - self.push_value_opt(_null); + self.push_value_opt(null); self.push_value(obj); } self.deoptimize(Instruction::Call { @@ -4031,29 +4156,32 @@ impl ExecutingFrame<'_> { self.execute_call(args, vm) } Instruction::CallBuiltinO => { - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; - let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); - if nargs == 1 { - let obj = self.pop_value(); - let _null = self.pop_value_opt(); + let stack_len = self.localsplus.stack_len(); + let self_or_null_is_some = self + .localsplus + .stack_index(stack_len - nargs as usize - 1) + .is_some(); + let effective_nargs = nargs + u32::from(self_or_null_is_some); + let callable = self.nth_value(nargs + 1); + if callable + .downcast_ref_if_exact::(vm) + .is_some() + && effective_nargs == 1 + { + let nargs_usize = nargs as usize; + let pos_args: Vec = self.pop_multiple(nargs_usize).collect(); + let self_or_null = self.pop_value_opt(); let callable = self.pop_value(); - let callable_tag = &*callable as *const PyObject as u32; - if cached_tag == callable_tag - && let Some(native) = callable.downcast_ref::() - { - let args = FuncArgs { - args: vec![obj], - kwargs: Default::default(), - }; - let result = (native.value.func)(vm, args)?; - self.push_value(result); - return Ok(None); + let mut args_vec = Vec::with_capacity(effective_nargs as usize); + if let Some(self_val) = self_or_null { + args_vec.push(self_val); } - self.push_value(callable); - self.push_value_opt(_null); - self.push_value(obj); + args_vec.extend(pos_args); + let result = + callable.vectorcall(args_vec, effective_nargs as usize, None, vm)?; + self.push_value(result); + return Ok(None); } self.deoptimize(Instruction::Call { argc: Arg::marker(), @@ -4062,29 +4190,29 @@ impl ExecutingFrame<'_> { self.execute_call(args, vm) } Instruction::CallBuiltinFast => { - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; - let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); + let stack_len = self.localsplus.stack_len(); + let self_or_null_is_some = self + .localsplus + .stack_index(stack_len - nargs as usize - 1) + .is_some(); + let effective_nargs = nargs + u32::from(self_or_null_is_some); let callable = self.nth_value(nargs + 1); - let callable_tag = callable as *const PyObject as u32; - let func = if cached_tag == callable_tag { - callable - .downcast_ref::() - .map(|n| n.value.func) - } else { - None - }; - if let Some(func) = func { - let positional_args: Vec = - self.pop_multiple(nargs as usize).collect(); - self.pop_value_opt(); // null (self_or_null) - self.pop_value(); // callable - let args = FuncArgs { - args: positional_args, - kwargs: Default::default(), - }; - let result = func(vm, args)?; + if callable + .downcast_ref_if_exact::(vm) + .is_some() + { + let nargs_usize = nargs as usize; + let pos_args: Vec = self.pop_multiple(nargs_usize).collect(); + let self_or_null = self.pop_value_opt(); + let callable = self.pop_value(); + let mut args_vec = Vec::with_capacity(effective_nargs as usize); + if let Some(self_val) = self_or_null { + args_vec.push(self_val); + } + args_vec.extend(pos_args); + let result = + callable.vectorcall(args_vec, effective_nargs as usize, None, vm)?; self.push_value(result); return Ok(None); } @@ -4133,22 +4261,40 @@ impl ExecutingFrame<'_> { let cache_base = instr_idx + 1; let cached_version = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); + let stack_len = self.localsplus.stack_len(); + let self_or_null_is_some = self + .localsplus + .stack_index(stack_len - nargs as usize - 1) + .is_some(); let callable = self.nth_value(nargs + 1); - if let Some(func) = callable.downcast_ref::() - && func.func_version() == cached_version - && cached_version != 0 + if !self_or_null_is_some + && let Some(bound_method) = callable.downcast_ref::() { - let nargs_usize = nargs as usize; - let pos_args: Vec = self.pop_multiple(nargs_usize).collect(); - let self_val = self.pop_value(); - let callable = self.pop_value(); - let mut args_vec = Vec::with_capacity(nargs_usize + 1); - args_vec.push(self_val); - args_vec.extend(pos_args); - let result = - vectorcall_function(&callable, args_vec, nargs_usize + 1, None, vm)?; - self.push_value(result); - Ok(None) + let bound_function = bound_method.function_obj().clone(); + let bound_self = bound_method.self_obj().clone(); + if let Some(func) = bound_function.downcast_ref::() + && func.func_version() == cached_version + && cached_version != 0 + { + let nargs_usize = nargs as usize; + let pos_args: Vec = self.pop_multiple(nargs_usize).collect(); + self.pop_value_opt(); // null (self_or_null) + self.pop_value(); // callable (bound method) + let mut args_vec = Vec::with_capacity(nargs_usize + 1); + args_vec.push(bound_self); + args_vec.extend(pos_args); + let result = vectorcall_function( + &bound_function, + args_vec, + nargs_usize + 1, + None, + vm, + )?; + self.push_value(result); + return Ok(None); + } + let args = self.collect_positional_args(nargs); + self.execute_call(args, vm) } else { self.deoptimize(Instruction::Call { argc: Arg::marker(), @@ -4160,20 +4306,41 @@ impl ExecutingFrame<'_> { Instruction::CallListAppend => { let nargs: u32 = arg.into(); if nargs == 1 { - // Stack: [list.append (bound method), self_or_null (list), item] - let item = self.pop_value(); - let self_or_null = self.pop_value_opt(); - let callable = self.pop_value(); - if let Some(list_obj) = self_or_null.as_ref() - && let Some(list) = list_obj.downcast_ref_if_exact::(vm) - { - list.append(item); - self.push_value(vm.ctx.none()); - return Ok(None); + // Stack: [callable, self_or_null, item] + let stack_len = self.localsplus.stack_len(); + let self_or_null_is_some = self.localsplus.stack_index(stack_len - 2).is_some(); + let callable = self.nth_value(2); + let self_is_exact_list = self + .localsplus + .stack_index(stack_len - 2) + .as_ref() + .is_some_and(|obj| obj.class().is(vm.ctx.types.list_type)); + let is_list_append = + callable + .downcast_ref::() + .is_some_and(|descr| { + descr.method.name == "append" + && descr.objclass.is(vm.ctx.types.list_type) + }); + if is_list_append && self_or_null_is_some && self_is_exact_list { + let item = self.pop_value(); + let self_or_null = self.pop_value_opt(); + let callable = self.pop_value(); + if let Some(list_obj) = self_or_null.as_ref() + && let Some(list) = list_obj.downcast_ref_if_exact::(vm) + { + list.append(item); + // CALL_LIST_APPEND fuses the following POP_TOP. + self.jump_relative_forward( + 1, + Instruction::CallListAppend.cache_entries() as u32, + ); + return Ok(None); + } + self.push_value(callable); + self.push_value_opt(self_or_null); + self.push_value(item); } - self.push_value(callable); - self.push_value_opt(self_or_null); - self.push_value(item); } self.deoptimize(Instruction::Call { argc: Arg::marker(), @@ -4182,17 +4349,13 @@ impl ExecutingFrame<'_> { self.execute_call(args, vm) } Instruction::CallMethodDescriptorNoargs => { - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; - let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); if nargs == 0 { // Stack: [callable, self_or_null] — peek to get func ptr let stack_len = self.localsplus.stack_len(); let self_or_null_is_some = self.localsplus.stack_index(stack_len - 1).is_some(); let callable = self.nth_value(1); - let callable_tag = callable as *const PyObject as u32; - let func = if cached_tag == callable_tag && self_or_null_is_some { + let func = if self_or_null_is_some { callable .downcast_ref::() .map(|d| d.method.func) @@ -4217,17 +4380,13 @@ impl ExecutingFrame<'_> { self.execute_call_vectorcall(nargs, vm) } Instruction::CallMethodDescriptorO => { - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; - let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); if nargs == 1 { // Stack: [callable, self_or_null, arg1] let stack_len = self.localsplus.stack_len(); let self_or_null_is_some = self.localsplus.stack_index(stack_len - 2).is_some(); let callable = self.nth_value(2); - let callable_tag = callable as *const PyObject as u32; - let func = if cached_tag == callable_tag && self_or_null_is_some { + let func = if self_or_null_is_some { callable .downcast_ref::() .map(|d| d.method.func) @@ -4253,18 +4412,14 @@ impl ExecutingFrame<'_> { self.execute_call_vectorcall(nargs, vm) } Instruction::CallMethodDescriptorFast => { - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; - let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); let callable = self.nth_value(nargs + 1); - let callable_tag = callable as *const PyObject as u32; let stack_len = self.localsplus.stack_len(); let self_or_null_is_some = self .localsplus .stack_index(stack_len - nargs as usize - 1) .is_some(); - let func = if cached_tag == callable_tag && self_or_null_is_some { + let func = if self_or_null_is_some { callable .downcast_ref::() .map(|d| d.method.func) @@ -4293,16 +4448,29 @@ impl ExecutingFrame<'_> { self.execute_call_vectorcall(nargs, vm) } Instruction::CallBuiltinClass => { - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; - let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); let callable = self.nth_value(nargs + 1); - let callable_tag = callable as *const PyObject as u32; - if !(cached_tag == callable_tag && callable.downcast_ref::().is_some()) { - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); + if let Some(cls) = callable.downcast_ref::() + && cls.slots.vectorcall.load().is_some() + { + let nargs_usize = nargs as usize; + let pos_args: Vec = self.pop_multiple(nargs_usize).collect(); + let self_or_null = self.pop_value_opt(); + let callable = self.pop_value(); + let self_is_some = self_or_null.is_some(); + let mut args_vec = Vec::with_capacity(nargs_usize + usize::from(self_is_some)); + if let Some(self_val) = self_or_null { + args_vec.push(self_val); + } + args_vec.extend(pos_args); + let result = callable.vectorcall( + args_vec, + nargs_usize + usize::from(self_is_some), + None, + vm, + )?; + self.push_value(result); + return Ok(None); } self.execute_call_vectorcall(nargs, vm) } @@ -4325,6 +4493,7 @@ impl ExecutingFrame<'_> { // Look up __init__ (guarded by type_version) if let Some(init) = cls.get_attr(identifier!(vm, __init__)) && let Some(init_func) = init.downcast_ref::() + && init_func.can_specialize_call(nargs + 1) { // Allocate object directly (tp_new == object.__new__) let dict = if cls @@ -4370,18 +4539,14 @@ impl ExecutingFrame<'_> { } Instruction::CallMethodDescriptorFastWithKeywords => { // Native function interface is uniform regardless of keyword support - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; - let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); let callable = self.nth_value(nargs + 1); - let callable_tag = callable as *const PyObject as u32; let stack_len = self.localsplus.stack_len(); let self_or_null_is_some = self .localsplus .stack_index(stack_len - nargs as usize - 1) .is_some(); - let func = if cached_tag == callable_tag && self_or_null_is_some { + let func = if self_or_null_is_some { callable .downcast_ref::() .map(|d| d.method.func) @@ -4411,29 +4576,29 @@ impl ExecutingFrame<'_> { } Instruction::CallBuiltinFastWithKeywords => { // Native function interface is uniform regardless of keyword support - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; - let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); + let stack_len = self.localsplus.stack_len(); + let self_or_null_is_some = self + .localsplus + .stack_index(stack_len - nargs as usize - 1) + .is_some(); + let effective_nargs = nargs + u32::from(self_or_null_is_some); let callable = self.nth_value(nargs + 1); - let callable_tag = callable as *const PyObject as u32; - let func = if cached_tag == callable_tag { - callable - .downcast_ref::() - .map(|n| n.value.func) - } else { - None - }; - if let Some(func) = func { - let positional_args: Vec = - self.pop_multiple(nargs as usize).collect(); - self.pop_value_opt(); // null (self_or_null) - self.pop_value(); // callable - let args = FuncArgs { - args: positional_args, - kwargs: Default::default(), - }; - let result = func(vm, args)?; + if callable + .downcast_ref_if_exact::(vm) + .is_some() + { + let nargs_usize = nargs as usize; + let pos_args: Vec = self.pop_multiple(nargs_usize).collect(); + let self_or_null = self.pop_value_opt(); + let callable = self.pop_value(); + let mut args_vec = Vec::with_capacity(effective_nargs as usize); + if let Some(self_val) = self_or_null { + args_vec.push(self_val); + } + args_vec.extend(pos_args); + let result = + callable.vectorcall(args_vec, effective_nargs as usize, None, vm)?; self.push_value(result); return Ok(None); } @@ -4443,18 +4608,37 @@ impl ExecutingFrame<'_> { self.execute_call_vectorcall(nargs, vm) } Instruction::CallNonPyGeneral => { - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; - let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); + let stack_len = self.localsplus.stack_len(); + let self_or_null_is_some = self + .localsplus + .stack_index(stack_len - nargs as usize - 1) + .is_some(); let callable = self.nth_value(nargs + 1); - let callable_tag = callable as *const PyObject as u32; - if cached_tag != callable_tag { - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); + if callable.downcast_ref::().is_some() + || callable.downcast_ref::().is_some() + { + let args = self.collect_positional_args(nargs); + return self.execute_call(args, vm); + } + let nargs_usize = nargs as usize; + let pos_args: Vec = self.pop_multiple(nargs_usize).collect(); + let self_or_null = self.pop_value_opt(); + let callable = self.pop_value(); + let mut args_vec = + Vec::with_capacity(nargs_usize + usize::from(self_or_null_is_some)); + if let Some(self_val) = self_or_null { + args_vec.push(self_val); } - self.execute_call_vectorcall(nargs, vm) + args_vec.extend(pos_args); + let result = callable.vectorcall( + args_vec, + nargs_usize + usize::from(self_or_null_is_some), + None, + vm, + )?; + self.push_value(result); + Ok(None) } Instruction::CallKwPy => { let instr_idx = self.lasti() as usize - 1; @@ -4507,30 +4691,46 @@ impl ExecutingFrame<'_> { let cache_base = instr_idx + 1; let cached_version = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); - // Stack: [callable, self_or_null(=self), arg1, ..., argN, kwarg_names] + // Stack: [callable, self_or_null, arg1, ..., argN, kwarg_names] + let stack_len = self.localsplus.stack_len(); + let self_or_null_is_some = self + .localsplus + .stack_index(stack_len - nargs as usize - 2) + .is_some(); let callable = self.nth_value(nargs + 2); - if let Some(func) = callable.downcast_ref::() - && func.func_version() == cached_version - && cached_version != 0 + if !self_or_null_is_some + && let Some(bound_method) = callable.downcast_ref::() { - let nargs_usize = nargs as usize; - let kwarg_names_obj = self.pop_value(); - let kwarg_names_tuple = kwarg_names_obj - .downcast_ref::() - .expect("kwarg names should be tuple"); - let kw_count = kwarg_names_tuple.len(); - let all_args: Vec = self.pop_multiple(nargs_usize).collect(); - let self_val = self.pop_value(); - let callable = self.pop_value(); - let pos_count = nargs_usize - kw_count; - let mut args_vec = Vec::with_capacity(nargs_usize + 1); - args_vec.push(self_val); - args_vec.extend(all_args); - let kwnames = kwarg_names_tuple.as_slice(); - let result = - vectorcall_function(&callable, args_vec, pos_count + 1, Some(kwnames), vm)?; - self.push_value(result); - return Ok(None); + let bound_function = bound_method.function_obj().clone(); + let bound_self = bound_method.self_obj().clone(); + if let Some(func) = bound_function.downcast_ref::() + && func.func_version() == cached_version + && cached_version != 0 + { + let nargs_usize = nargs as usize; + let kwarg_names_obj = self.pop_value(); + let kwarg_names_tuple = kwarg_names_obj + .downcast_ref::() + .expect("kwarg names should be tuple"); + let kw_count = kwarg_names_tuple.len(); + let all_args: Vec = self.pop_multiple(nargs_usize).collect(); + self.pop_value_opt(); // null (self_or_null) + self.pop_value(); // callable (bound method) + let pos_count = nargs_usize - kw_count; + let mut args_vec = Vec::with_capacity(nargs_usize + 1); + args_vec.push(bound_self); + args_vec.extend(all_args); + let kwnames = kwarg_names_tuple.as_slice(); + let result = vectorcall_function( + &bound_function, + args_vec, + pos_count + 1, + Some(kwnames), + vm, + )?; + self.push_value(result); + return Ok(None); + } } self.deoptimize(Instruction::CallKw { argc: Arg::marker(), @@ -4539,18 +4739,43 @@ impl ExecutingFrame<'_> { self.execute_call(args, vm) } Instruction::CallKwNonPy => { - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; - let cached_tag = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); + let stack_len = self.localsplus.stack_len(); + let self_or_null_is_some = self + .localsplus + .stack_index(stack_len - nargs as usize - 2) + .is_some(); let callable = self.nth_value(nargs + 2); - let callable_tag = callable as *const PyObject as u32; - if cached_tag != callable_tag { - self.deoptimize(Instruction::CallKw { - argc: Arg::marker(), - }); + if callable.downcast_ref::().is_some() + || callable.downcast_ref::().is_some() + { + let args = self.collect_keyword_args(nargs); + return self.execute_call(args, vm); + } + let nargs_usize = nargs as usize; + let kwarg_names_obj = self.pop_value(); + let kwarg_names_tuple = kwarg_names_obj + .downcast_ref::() + .expect("kwarg names should be tuple"); + let kw_count = kwarg_names_tuple.len(); + let all_args: Vec = self.pop_multiple(nargs_usize).collect(); + let self_or_null = self.pop_value_opt(); + let callable = self.pop_value(); + let pos_count = nargs_usize - kw_count; + let mut args_vec = + Vec::with_capacity(nargs_usize + usize::from(self_or_null_is_some)); + if let Some(self_val) = self_or_null { + args_vec.push(self_val); } - self.execute_call_kw_vectorcall(nargs, vm) + args_vec.extend(all_args); + let result = callable.vectorcall( + args_vec, + pos_count + usize::from(self_or_null_is_some), + Some(kwarg_names_tuple.as_slice()), + vm, + )?; + self.push_value(result); + Ok(None) } Instruction::LoadSuperAttrAttr => { let oparg = u32::from(arg); @@ -4612,9 +4837,12 @@ impl ExecutingFrame<'_> { }, ); let cache_base = self.lasti() as usize; - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } let oparg = LoadSuperAttr::new(oparg); self.load_super_attr(vm, oparg) @@ -4692,9 +4920,12 @@ impl ExecutingFrame<'_> { }, ); let cache_base = self.lasti() as usize; - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } let oparg = LoadSuperAttr::new(oparg); self.load_super_attr(vm, oparg) @@ -4713,9 +4944,6 @@ impl ExecutingFrame<'_> { self.push_value(vm.ctx.new_bool(result).into()); Ok(None) } else { - self.deoptimize(Instruction::CompareOp { - opname: Arg::marker(), - }); let op = bytecode::ComparisonOperator::try_from(u32::from(arg)) .unwrap_or(bytecode::ComparisonOperator::Equal); self.execute_compare(vm, op) @@ -4740,9 +4968,6 @@ impl ExecutingFrame<'_> { self.push_value(vm.ctx.new_bool(result).into()); Ok(None) } else { - self.deoptimize(Instruction::CompareOp { - opname: Arg::marker(), - }); let op = bytecode::ComparisonOperator::try_from(u32::from(arg)) .unwrap_or(bytecode::ComparisonOperator::Equal); self.execute_compare(vm, op) @@ -4762,9 +4987,6 @@ impl ExecutingFrame<'_> { self.push_value(vm.ctx.new_bool(result).into()); Ok(None) } else { - self.deoptimize(Instruction::CompareOp { - opname: Arg::marker(), - }); let op = bytecode::ComparisonOperator::try_from(u32::from(arg)) .unwrap_or(bytecode::ComparisonOperator::Equal); self.execute_compare(vm, op) @@ -4776,7 +4998,6 @@ impl ExecutingFrame<'_> { // Already a bool, no-op Ok(None) } else { - self.deoptimize(Instruction::ToBool); let obj = self.pop_value(); let result = obj.try_to_bool(vm)?; self.push_value(vm.ctx.new_bool(result).into()); @@ -4791,7 +5012,6 @@ impl ExecutingFrame<'_> { self.push_value(vm.ctx.new_bool(result).into()); Ok(None) } else { - self.deoptimize(Instruction::ToBool); let obj = self.pop_value(); let result = obj.try_to_bool(vm)?; self.push_value(vm.ctx.new_bool(result).into()); @@ -4805,7 +5025,6 @@ impl ExecutingFrame<'_> { self.push_value(vm.ctx.new_bool(false).into()); Ok(None) } else { - self.deoptimize(Instruction::ToBool); let obj = self.pop_value(); let result = obj.try_to_bool(vm)?; self.push_value(vm.ctx.new_bool(result).into()); @@ -4820,7 +5039,6 @@ impl ExecutingFrame<'_> { self.push_value(vm.ctx.new_bool(result).into()); Ok(None) } else { - self.deoptimize(Instruction::ToBool); let obj = self.pop_value(); let result = obj.try_to_bool(vm)?; self.push_value(vm.ctx.new_bool(result).into()); @@ -4835,7 +5053,6 @@ impl ExecutingFrame<'_> { self.push_value(vm.ctx.new_bool(result).into()); Ok(None) } else { - self.deoptimize(Instruction::ToBool); let obj = self.pop_value(); let result = obj.try_to_bool(vm)?; self.push_value(vm.ctx.new_bool(result).into()); @@ -4855,7 +5072,6 @@ impl ExecutingFrame<'_> { self.push_value(vm.ctx.new_bool(true).into()); Ok(None) } else { - self.deoptimize(Instruction::ToBool); let obj = self.pop_value(); let result = obj.try_to_bool(vm)?; self.push_value(vm.ctx.new_bool(result).into()); @@ -4878,9 +5094,6 @@ impl ExecutingFrame<'_> { self.push_value(vm.ctx.new_bool(value).into()); Ok(None) } else { - self.deoptimize(Instruction::ContainsOp { - invert: Arg::marker(), - }); let b = self.pop_value(); let a = self.pop_value(); let invert = bytecode::Invert::try_from(u32::from(arg) as u8) @@ -4911,9 +5124,6 @@ impl ExecutingFrame<'_> { self.push_value(vm.ctx.new_bool(value).into()); Ok(None) } else { - self.deoptimize(Instruction::ContainsOp { - invert: Arg::marker(), - }); let b = self.pop_value(); let a = self.pop_value(); let invert = bytecode::Invert::try_from(u32::from(arg) as u8) @@ -4939,9 +5149,6 @@ impl ExecutingFrame<'_> { return Ok(None); } } - self.deoptimize(Instruction::UnpackSequence { - count: Arg::marker(), - }); let size = u32::from(arg); self.unpack_sequence(size, vm) } @@ -4959,9 +5166,6 @@ impl ExecutingFrame<'_> { return Ok(None); } } - self.deoptimize(Instruction::UnpackSequence { - count: Arg::marker(), - }); self.unpack_sequence(size as u32, vm) } Instruction::UnpackSequenceList => { @@ -4979,9 +5183,6 @@ impl ExecutingFrame<'_> { return Ok(None); } } - self.deoptimize(Instruction::UnpackSequence { - count: Arg::marker(), - }); self.unpack_sequence(size as u32, vm) } Instruction::ForIterRange => { @@ -4995,9 +5196,6 @@ impl ExecutingFrame<'_> { } Ok(None) } else { - self.deoptimize(Instruction::ForIter { - delta: Arg::marker(), - }); self.execute_for_iter(vm, target)?; Ok(None) } @@ -5013,9 +5211,6 @@ impl ExecutingFrame<'_> { } Ok(None) } else { - self.deoptimize(Instruction::ForIter { - delta: Arg::marker(), - }); self.execute_for_iter(vm, target)?; Ok(None) } @@ -5031,9 +5226,6 @@ impl ExecutingFrame<'_> { } Ok(None) } else { - self.deoptimize(Instruction::ForIter { - delta: Arg::marker(), - }); self.execute_for_iter(vm, target)?; Ok(None) } @@ -5046,92 +5238,69 @@ impl ExecutingFrame<'_> { Ok(PyIterReturn::Return(value)) => { self.push_value(value); } - Ok(PyIterReturn::StopIteration(_)) => { + Ok(PyIterReturn::StopIteration(value)) => { + if vm.use_tracing.get() && !vm.is_none(&self.object.trace.lock()) { + let stop_exc = vm.new_stop_iteration(value); + self.fire_exception_trace(&stop_exc, vm)?; + } self.for_iter_jump_on_exhausted(target); } Err(e) => return Err(e), } Ok(None) } else { - self.deoptimize(Instruction::ForIter { - delta: Arg::marker(), - }); self.execute_for_iter(vm, target)?; Ok(None) } } Instruction::LoadGlobalModule => { let oparg = u32::from(arg); - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; - let cached_version = self.code.instructions.read_cache_u32(cache_base + 1); - let current_version = self.globals.version() as u32; - if cached_version == current_version { - // globals unchanged — name is in globals, look up only there + let cache_base = self.lasti() as usize; + // Keep specialized opcode on guard miss, matching CPython's + // JUMP_TO_PREDICTED(LOAD_GLOBAL) behavior. + let cached_version = self.code.instructions.read_cache_u16(cache_base + 1); + let cached_index = self.code.instructions.read_cache_u16(cache_base + 3); + if let Ok(current_version) = u16::try_from(self.globals.version()) + && cached_version == current_version + { let name = self.code.names[(oparg >> 1) as usize]; - if let Some(x) = self.globals.get_item_opt(name, vm)? { - self.push_value(x); - if (oparg & 1) != 0 { - self.push_value_opt(None); - } - Ok(None) - } else { - // Name was removed from globals - self.deoptimize(Instruction::LoadGlobal { - namei: Arg::marker(), - }); - let x = self.load_global_or_builtin(name, vm)?; + if let Some(x) = self.globals.get_item_opt_hint(name, cached_index, vm)? { self.push_value(x); if (oparg & 1) != 0 { self.push_value_opt(None); } - Ok(None) + return Ok(None); } - } else { - self.deoptimize(Instruction::LoadGlobal { - namei: Arg::marker(), - }); - let name = self.code.names[(oparg >> 1) as usize]; - let x = self.load_global_or_builtin(name, vm)?; - self.push_value(x); - if (oparg & 1) != 0 { - self.push_value_opt(None); - } - Ok(None) } + let name = self.code.names[(oparg >> 1) as usize]; + let x = self.load_global_or_builtin(name, vm)?; + self.push_value(x); + if (oparg & 1) != 0 { + self.push_value_opt(None); + } + Ok(None) } Instruction::LoadGlobalBuiltin => { let oparg = u32::from(arg); - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; - let cached_globals_ver = self.code.instructions.read_cache_u32(cache_base + 1); - let cached_builtins_ver = self.code.instructions.read_cache_u32(cache_base + 2); - let current_globals_ver = self.globals.version() as u32; - if cached_globals_ver == current_globals_ver { - // globals unchanged — name is NOT in globals, check builtins - if let Some(builtins_dict) = self.builtins.downcast_ref_if_exact::(vm) { - let current_builtins_ver = builtins_dict.version() as u32; - if cached_builtins_ver == current_builtins_ver { - // Both versions match — safe to look up in builtins - let name = self.code.names[(oparg >> 1) as usize]; - if let Some(x) = builtins_dict.get_item_opt(name, vm)? { - self.push_value(x); - if (oparg & 1) != 0 { - self.push_value_opt(None); - } - return Ok(None); - } + let cache_base = self.lasti() as usize; + let cached_globals_ver = self.code.instructions.read_cache_u16(cache_base + 1); + let cached_builtins_ver = self.code.instructions.read_cache_u16(cache_base + 2); + let cached_index = self.code.instructions.read_cache_u16(cache_base + 3); + if let Ok(current_globals_ver) = u16::try_from(self.globals.version()) + && cached_globals_ver == current_globals_ver + && let Some(builtins_dict) = self.builtins.downcast_ref_if_exact::(vm) + && let Ok(current_builtins_ver) = u16::try_from(builtins_dict.version()) + && cached_builtins_ver == current_builtins_ver + { + let name = self.code.names[(oparg >> 1) as usize]; + if let Some(x) = builtins_dict.get_item_opt_hint(name, cached_index, vm)? { + self.push_value(x); + if (oparg & 1) != 0 { + self.push_value_opt(None); } + return Ok(None); } } - // Version mismatch or lookup failed — deoptimize - self.deoptimize_at( - Instruction::LoadGlobal { - namei: Arg::marker(), - }, - instr_idx, - cache_base, - ); let name = self.code.names[(oparg >> 1) as usize]; let x = self.load_global_or_builtin(name, vm)?; self.push_value(x); @@ -6750,9 +6919,12 @@ impl ExecutingFrame<'_> { .is_some_and(|f| f as usize == PyBaseObject::getattro as *const () as usize); if !is_default_getattro { unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } return; } @@ -6765,9 +6937,12 @@ impl ExecutingFrame<'_> { if type_version == 0 { // Version counter overflow — backoff to avoid re-attempting every execution unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } return; } @@ -6778,10 +6953,8 @@ impl ExecutingFrame<'_> { self.code .instructions .write_cache_u32(cache_base + 1, type_version); - self.code - .instructions - .replace_op(instr_idx, Instruction::LoadAttrModule); } + self.specialize_at(instr_idx, cache_base, Instruction::LoadAttrModule); return; } @@ -6812,19 +6985,22 @@ impl ExecutingFrame<'_> { let new_op = if !class_has_dict { Instruction::LoadAttrMethodNoDict + } else if obj.dict().is_none() { + Instruction::LoadAttrMethodLazyDict } else { Instruction::LoadAttrMethodWithValues }; - unsafe { - self.code.instructions.replace_op(instr_idx, new_op); - } + self.specialize_at(instr_idx, cache_base, new_op); return; } // Can't specialize this method call unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } } else { // Regular attribute access @@ -6850,10 +7026,8 @@ impl ExecutingFrame<'_> { self.code .instructions .write_cache_u32(cache_base + 3, offset as u32); - self.code - .instructions - .replace_op(instr_idx, Instruction::LoadAttrSlot); } + self.specialize_at(instr_idx, cache_base, Instruction::LoadAttrSlot); } else if let Some(ref descr) = cls_attr && descr.downcast_ref::().is_some() { @@ -6866,23 +7040,27 @@ impl ExecutingFrame<'_> { self.code .instructions .write_cache_u64(cache_base + 5, descr_ptr); - self.code - .instructions - .replace_op(instr_idx, Instruction::LoadAttrProperty); } + self.specialize_at(instr_idx, cache_base, Instruction::LoadAttrProperty); } else { unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } } } else if has_descr_get { // Non-data descriptor with __get__ — can't specialize unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } } else if class_has_dict { if let Some(ref descr) = cls_attr { @@ -6895,20 +7073,49 @@ impl ExecutingFrame<'_> { self.code .instructions .write_cache_u64(cache_base + 5, descr_ptr); - self.code - .instructions - .replace_op(instr_idx, Instruction::LoadAttrNondescriptorWithValues); } + self.specialize_at( + instr_idx, + cache_base, + Instruction::LoadAttrNondescriptorWithValues, + ); } else { // No class attr, must be in instance dict + let use_hint = if let Some(dict) = obj.dict() { + match dict.get_item_opt(attr_name, _vm) { + Ok(Some(_)) => true, + Ok(None) => false, + Err(_) => { + unsafe { + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code + .instructions + .read_adaptive_counter(cache_base), + ), + ); + } + return; + } + } + } else { + false + }; unsafe { self.code .instructions .write_cache_u32(cache_base + 1, type_version); - self.code - .instructions - .replace_op(instr_idx, Instruction::LoadAttrInstanceValue); } + self.specialize_at( + instr_idx, + cache_base, + if use_hint { + Instruction::LoadAttrWithHint + } else { + Instruction::LoadAttrInstanceValue + }, + ); } } else if let Some(ref descr) = cls_attr { // No dict support, plain class attr — cache directly @@ -6920,16 +7127,21 @@ impl ExecutingFrame<'_> { self.code .instructions .write_cache_u64(cache_base + 5, descr_ptr); - self.code - .instructions - .replace_op(instr_idx, Instruction::LoadAttrNondescriptorNoDict); } + self.specialize_at( + instr_idx, + cache_base, + Instruction::LoadAttrNondescriptorNoDict, + ); } else { // No dict, no class attr — can't specialize unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } } } @@ -6952,9 +7164,12 @@ impl ExecutingFrame<'_> { } if type_version == 0 { unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } return; } @@ -6969,9 +7184,30 @@ impl ExecutingFrame<'_> { if attr_class.slots.descr_set.load().is_some() { // Data descriptor on metaclass — can't specialize unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); + } + return; + } + } + let mut metaclass_version = 0; + if !mcl.slots.flags.has_feature(PyTypeFlags::IMMUTABLETYPE) { + metaclass_version = mcl.tp_version_tag.load(Acquire); + if metaclass_version == 0 { + metaclass_version = mcl.assign_version_tag(); + } + if metaclass_version == 0 { + unsafe { + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } return; } @@ -6985,26 +7221,35 @@ impl ExecutingFrame<'_> { if !has_descr_get { // METHOD or NON_DESCRIPTOR — can cache directly let descr_ptr = &**descr as *const PyObject as u64; + let new_op = if metaclass_version == 0 { + Instruction::LoadAttrClass + } else { + Instruction::LoadAttrClassWithMetaclassCheck + }; unsafe { self.code .instructions .write_cache_u32(cache_base + 1, type_version); self.code .instructions - .write_cache_u64(cache_base + 5, descr_ptr); + .write_cache_u32(cache_base + 3, metaclass_version); self.code .instructions - .replace_op(instr_idx, Instruction::LoadAttrClass); + .write_cache_u64(cache_base + 5, descr_ptr); } + self.specialize_at(instr_idx, cache_base, new_op); return; } } // Can't specialize unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } } @@ -7131,21 +7376,32 @@ impl ExecutingFrame<'_> { self.commit_specialization(instr_idx, cache_base, new_op); } - /// Adaptive counter: decrement the warmup counter, or call the specialize - /// function when it reaches zero. + /// Adaptive counter: trigger specialization at zero, otherwise advance countdown. #[inline] fn adaptive(&mut self, specialize: impl FnOnce(&mut Self, usize, usize)) { let instr_idx = self.lasti() as usize - 1; let cache_base = instr_idx + 1; let counter = self.code.instructions.read_adaptive_counter(cache_base); - if counter > 0 { + if bytecode::adaptive_counter_triggers(counter) { + specialize(self, instr_idx, cache_base); + } else { unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, counter - 1); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::advance_adaptive_counter(counter), + ); } - } else { - specialize(self, instr_idx, cache_base); + } + } + + /// Install a specialized opcode and set adaptive cooldown bits. + #[inline] + fn specialize_at(&mut self, instr_idx: usize, cache_base: usize, new_op: Instruction) { + unsafe { + self.code + .instructions + .write_adaptive_counter(cache_base, ADAPTIVE_COOLDOWN_VALUE); + self.code.instructions.replace_op(instr_idx, new_op); } } @@ -7158,14 +7414,15 @@ impl ExecutingFrame<'_> { new_op: Option, ) { if let Some(new_op) = new_op { - unsafe { - self.code.instructions.replace_op(instr_idx, new_op); - } + self.specialize_at(instr_idx, cache_base, new_op); } else { unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } } } @@ -7185,14 +7442,17 @@ impl ExecutingFrame<'_> { fn deoptimize_at(&mut self, base_op: Instruction, instr_idx: usize, cache_base: usize) { unsafe { self.code.instructions.replace_op(instr_idx, base_op); - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } } /// Execute a specialized binary op on two int operands. - /// Deoptimize if either operand is not an exact int. + /// Fallback to generic binary op if either operand is not an exact int. #[inline] fn execute_binary_op_int( &mut self, @@ -7212,13 +7472,12 @@ impl ExecutingFrame<'_> { self.push_value(vm.ctx.new_bigint(&result).into()); Ok(None) } else { - self.deoptimize(Instruction::BinaryOp { op: Arg::marker() }); self.execute_bin_op(vm, deopt_op) } } /// Execute a specialized binary op on two float operands. - /// Deoptimize if either operand is not an exact float. + /// Fallback to generic binary op if either operand is not an exact float. #[inline] fn execute_binary_op_float( &mut self, @@ -7238,7 +7497,6 @@ impl ExecutingFrame<'_> { self.push_value(vm.ctx.new_float(result).into()); Ok(None) } else { - self.deoptimize(Instruction::BinaryOp { op: Arg::marker() }); self.execute_bin_op(vm, deopt_op) } } @@ -7270,9 +7528,12 @@ impl ExecutingFrame<'_> { let version = func.get_version_for_current_state(); if version == 0 { unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } return; } @@ -7284,138 +7545,167 @@ impl ExecutingFrame<'_> { }; let new_op = if func.can_specialize_call(effective_nargs) { - if self_or_null_is_some { - Instruction::CallBoundMethodExactArgs - } else { - Instruction::CallPyExactArgs - } - } else if self_or_null_is_some { - Instruction::CallBoundMethodGeneral + Instruction::CallPyExactArgs } else { Instruction::CallPyGeneral }; unsafe { - self.code.instructions.replace_op(instr_idx, new_op); self.code .instructions .write_cache_u32(cache_base + 1, version); } + self.specialize_at(instr_idx, cache_base, new_op); return; } - // Try to specialize method descriptor calls - if self_or_null_is_some && callable.downcast_ref::().is_some() { - let callable_tag = callable as *const PyObject as u32; - let new_op = match nargs { - 0 => Instruction::CallMethodDescriptorNoargs, - 1 => Instruction::CallMethodDescriptorO, - _ => Instruction::CallMethodDescriptorFast, + // Bound Python method object (`method`) specialization. + if !self_or_null_is_some + && let Some(bound_method) = callable.downcast_ref::() + && let Some(func) = bound_method.function_obj().downcast_ref::() + { + let version = func.get_version_for_current_state(); + if version == 0 { + unsafe { + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); + } + return; + } + + let new_op = if func.can_specialize_call(nargs + 1) { + Instruction::CallBoundMethodExactArgs + } else { + Instruction::CallBoundMethodGeneral }; unsafe { - self.code.instructions.replace_op(instr_idx, new_op); self.code .instructions - .write_cache_u32(cache_base + 1, callable_tag); + .write_cache_u32(cache_base + 1, version); } + self.specialize_at(instr_idx, cache_base, new_op); + return; + } + + // Try to specialize method descriptor calls + if self_or_null_is_some && let Some(descr) = callable.downcast_ref::() { + let call_cache_entries = Instruction::CallListAppend.cache_entries(); + let next_idx = cache_base + call_cache_entries; + let next_is_pop_top = if next_idx < self.code.instructions.len() { + let next_op = self.code.instructions.read_op(next_idx); + matches!(next_op.to_base().unwrap_or(next_op), Instruction::PopTop) + } else { + false + }; + + let new_op = if nargs == 1 + && descr.method.name == "append" + && descr.objclass.is(vm.ctx.types.list_type) + && next_is_pop_top + { + Instruction::CallListAppend + } else { + match nargs { + 0 => Instruction::CallMethodDescriptorNoargs, + 1 => Instruction::CallMethodDescriptorO, + _ => Instruction::CallMethodDescriptorFast, + } + }; + self.specialize_at(instr_idx, cache_base, new_op); return; } // Try to specialize builtin calls - if !self_or_null_is_some { - if let Some(native) = callable.downcast_ref::() - && native.zelf.is_none() + if let Some(native) = callable.downcast_ref_if_exact::(vm) { + let effective_nargs = nargs + u32::from(self_or_null_is_some); + let callable_tag = callable as *const PyObject as u32; + let new_op = if native.zelf.is_none() + && native.value.name == "len" + && nargs == 1 + && effective_nargs == 1 { - let callable_tag = callable as *const PyObject as u32; - let new_op = match (native.value.name, nargs) { - ("len", 1) => Instruction::CallLen, - ("isinstance", 2) => Instruction::CallIsinstance, - (_, 1) => Instruction::CallBuiltinO, - _ => Instruction::CallBuiltinFast, + Instruction::CallLen + } else if native.zelf.is_none() + && native.value.name == "isinstance" + && effective_nargs == 2 + { + Instruction::CallIsinstance + } else if effective_nargs == 1 { + Instruction::CallBuiltinO + } else { + Instruction::CallBuiltinFast + }; + if matches!(new_op, Instruction::CallLen | Instruction::CallIsinstance) { + unsafe { + self.code + .instructions + .write_cache_u32(cache_base + 1, callable_tag); + } + } + self.specialize_at(instr_idx, cache_base, new_op); + return; + } + + // type/str/tuple(x) and class-call specializations + if callable.class().is(vm.ctx.types.type_type) + && let Some(cls) = callable.downcast_ref::() + { + if !self_or_null_is_some && nargs == 1 { + let new_op = if callable.is(&vm.ctx.types.type_type.as_object()) { + Some(Instruction::CallType1) + } else if callable.is(&vm.ctx.types.str_type.as_object()) { + Some(Instruction::CallStr1) + } else if callable.is(&vm.ctx.types.tuple_type.as_object()) { + Some(Instruction::CallTuple1) + } else { + None }; - let new_op = Some(new_op); if let Some(new_op) = new_op { - unsafe { - self.code.instructions.replace_op(instr_idx, new_op); - self.code - .instructions - .write_cache_u32(cache_base + 1, callable_tag); - } + self.specialize_at(instr_idx, cache_base, new_op); return; } } - // type/str/tuple(x) specialization - if callable.class().is(vm.ctx.types.type_type) { - if nargs == 1 { - let new_op = if callable.is(&vm.ctx.types.type_type.as_object()) { - Some(Instruction::CallType1) - } else if callable.is(&vm.ctx.types.str_type.as_object()) { - Some(Instruction::CallStr1) - } else if callable.is(&vm.ctx.types.tuple_type.as_object()) { - Some(Instruction::CallTuple1) - } else { - None - }; - if let Some(new_op) = new_op { - let callable_tag = callable as *const PyObject as u32; + if cls.slots.flags.has_feature(PyTypeFlags::IMMUTABLETYPE) + && cls.slots.vectorcall.load().is_some() + { + self.specialize_at(instr_idx, cache_base, Instruction::CallBuiltinClass); + return; + } + // CallAllocAndEnterInit: heap type with default __new__ + if !self_or_null_is_some && cls.slots.flags.has_feature(PyTypeFlags::HEAPTYPE) { + let object_new = vm.ctx.types.object_type.slots.new.load(); + let cls_new = cls.slots.new.load(); + if let (Some(cls_new_fn), Some(obj_new_fn)) = (cls_new, object_new) + && cls_new_fn as usize == obj_new_fn as usize + && let Some(init) = cls.get_attr(identifier!(vm, __init__)) + && let Some(init_func) = init.downcast_ref::() + && init_func.can_specialize_call(nargs + 1) + { + let version = cls.tp_version_tag.load(Acquire); + if version != 0 { unsafe { - self.code.instructions.replace_op(instr_idx, new_op); self.code .instructions - .write_cache_u32(cache_base + 1, callable_tag); + .write_cache_u32(cache_base + 1, version); } + self.specialize_at( + instr_idx, + cache_base, + Instruction::CallAllocAndEnterInit, + ); return; } } - // CallAllocAndEnterInit: heap type with default __new__ - if let Some(cls) = callable.downcast_ref::() - && cls.slots.flags.has_feature(PyTypeFlags::HEAPTYPE) - { - let object_new = vm.ctx.types.object_type.slots.new.load(); - let cls_new = cls.slots.new.load(); - if let (Some(cls_new_fn), Some(obj_new_fn)) = (cls_new, object_new) - && cls_new_fn as usize == obj_new_fn as usize - && let Some(init) = cls.get_attr(identifier!(vm, __init__)) - && let Some(init_func) = init.downcast_ref::() - && init_func.can_specialize_call(nargs + 1) - { - let version = cls.tp_version_tag.load(Acquire); - if version != 0 { - unsafe { - self.code - .instructions - .replace_op(instr_idx, Instruction::CallAllocAndEnterInit); - self.code - .instructions - .write_cache_u32(cache_base + 1, version); - } - return; - } - } - } - // General builtin class call (any type with Callable) - let callable_tag = callable as *const PyObject as u32; - unsafe { - self.code - .instructions - .replace_op(instr_idx, Instruction::CallBuiltinClass); - self.code - .instructions - .write_cache_u32(cache_base + 1, callable_tag); - } - return; } + self.specialize_at(instr_idx, cache_base, Instruction::CallNonPyGeneral); + return; } - // General fallback: cache callable identity to skip re-specialization - let callable_tag = callable as *const PyObject as u32; - unsafe { - self.code - .instructions - .replace_op(instr_idx, Instruction::CallNonPyGeneral); - self.code - .instructions - .write_cache_u32(cache_base + 1, callable_tag); - } + // General fallback: specialized non-Python callable path + self.specialize_at(instr_idx, cache_base, Instruction::CallNonPyGeneral); } fn specialize_call_kw( @@ -7441,40 +7731,55 @@ impl ExecutingFrame<'_> { let callable = self.nth_value(nargs + 2); if let Some(func) = callable.downcast_ref::() { - let version = func.func_version(); + let version = func.get_version_for_current_state(); if version == 0 { unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } return; } - let new_op = if self_or_null_is_some { - Instruction::CallKwBoundMethod - } else { - Instruction::CallKwPy - }; unsafe { - self.code.instructions.replace_op(instr_idx, new_op); self.code .instructions .write_cache_u32(cache_base + 1, version); } + self.specialize_at(instr_idx, cache_base, Instruction::CallKwPy); return; } - // General fallback - let callable_tag = callable as *const PyObject as u32; - unsafe { - self.code - .instructions - .replace_op(instr_idx, Instruction::CallKwNonPy); - self.code - .instructions - .write_cache_u32(cache_base + 1, callable_tag); + if !self_or_null_is_some + && let Some(bound_method) = callable.downcast_ref::() + && let Some(func) = bound_method.function_obj().downcast_ref::() + { + let version = func.get_version_for_current_state(); + if version == 0 { + unsafe { + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); + } + return; + } + unsafe { + self.code + .instructions + .write_cache_u32(cache_base + 1, version); + } + self.specialize_at(instr_idx, cache_base, Instruction::CallKwBoundMethod); + return; } + + // General fallback: specialized non-Python callable path + self.specialize_at(instr_idx, cache_base, Instruction::CallKwNonPy); } fn specialize_send(&mut self, instr_idx: usize, cache_base: usize) { @@ -7487,16 +7792,15 @@ impl ExecutingFrame<'_> { // Stack: [receiver, val] — receiver is at position 1 let receiver = self.nth_value(1); if self.builtin_coro(receiver).is_some() { - unsafe { - self.code - .instructions - .replace_op(instr_idx, Instruction::SendGen); - } + self.specialize_at(instr_idx, cache_base, Instruction::SendGen); } else { unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } } } @@ -7522,9 +7826,12 @@ impl ExecutingFrame<'_> { || class.downcast_ref::().is_none() { unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } return; } @@ -7534,9 +7841,7 @@ impl ExecutingFrame<'_> { } else { Instruction::LoadSuperAttrAttr }; - unsafe { - self.code.instructions.replace_op(instr_idx, new_op); - } + self.specialize_at(instr_idx, cache_base, new_op); } fn specialize_compare_op( @@ -7616,15 +7921,16 @@ impl ExecutingFrame<'_> { self.code .instructions .write_cache_u32(cache_base + 1, type_version); - self.code - .instructions - .replace_op(instr_idx, Instruction::ToBoolAlwaysTrue); } + self.specialize_at(instr_idx, cache_base, Instruction::ToBoolAlwaysTrue); } else { unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } } return; @@ -7692,45 +7998,58 @@ impl ExecutingFrame<'_> { return; } let name = self.code.names[(oparg >> 1) as usize]; - // Check if name exists in globals - let in_globals = self.globals.get_item_opt(name, vm).ok().flatten().is_some(); - - let globals_version = self.globals.version() as u32; + let Ok(globals_version) = u16::try_from(self.globals.version()) else { + unsafe { + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); + } + return; + }; - if in_globals { + if let Ok(Some(globals_hint)) = self.globals.hint_for_key(name, vm) { unsafe { self.code .instructions - .replace_op(instr_idx, Instruction::LoadGlobalModule); + .write_cache_u16(cache_base + 1, globals_version); + self.code.instructions.write_cache_u16(cache_base + 2, 0); self.code .instructions - .write_cache_u32(cache_base + 1, globals_version); + .write_cache_u16(cache_base + 3, globals_hint); } - } else if let Some(builtins_dict) = self.builtins.downcast_ref_if_exact::(vm) - && builtins_dict - .get_item_opt(name, vm) - .ok() - .flatten() - .is_some() + self.specialize_at(instr_idx, cache_base, Instruction::LoadGlobalModule); + return; + } + + if let Some(builtins_dict) = self.builtins.downcast_ref_if_exact::(vm) + && let Ok(Some(builtins_hint)) = builtins_dict.hint_for_key(name, vm) + && let Ok(builtins_version) = u16::try_from(builtins_dict.version()) { - let builtins_version = builtins_dict.version() as u32; unsafe { self.code .instructions - .replace_op(instr_idx, Instruction::LoadGlobalBuiltin); + .write_cache_u16(cache_base + 1, globals_version); self.code .instructions - .write_cache_u32(cache_base + 1, globals_version); + .write_cache_u16(cache_base + 2, builtins_version); self.code .instructions - .write_cache_u32(cache_base + 2, builtins_version); - } - } else { - unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + .write_cache_u16(cache_base + 3, builtins_hint); } + self.specialize_at(instr_idx, cache_base, Instruction::LoadGlobalBuiltin); + return; + } + + unsafe { + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } } @@ -7814,7 +8133,7 @@ impl ExecutingFrame<'_> { fn specialize_store_attr( &mut self, - _vm: &VirtualMachine, + vm: &VirtualMachine, attr_idx: bytecode::NameIdx, instr_idx: usize, cache_base: usize, @@ -7837,9 +8156,12 @@ impl ExecutingFrame<'_> { .is_some_and(|f| f as usize == PyBaseObject::slot_setattro as *const () as usize); if !is_default_setattr { unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } return; } @@ -7851,9 +8173,12 @@ impl ExecutingFrame<'_> { } if type_version == 0 { unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } return; } @@ -7879,31 +8204,56 @@ impl ExecutingFrame<'_> { self.code .instructions .write_cache_u16(cache_base + 3, offset as u16); - self.code - .instructions - .replace_op(instr_idx, Instruction::StoreAttrSlot); } + self.specialize_at(instr_idx, cache_base, Instruction::StoreAttrSlot); } else { unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } } - } else if owner.dict().is_some() { + } else if let Some(dict) = owner.dict() { + let use_hint = match dict.get_item_opt(attr_name, vm) { + Ok(Some(_)) => true, + Ok(None) => false, + Err(_) => { + unsafe { + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); + } + return; + } + }; unsafe { self.code .instructions .write_cache_u32(cache_base + 1, type_version); - self.code - .instructions - .replace_op(instr_idx, Instruction::StoreAttrInstanceValue); } + self.specialize_at( + instr_idx, + cache_base, + if use_hint { + Instruction::StoreAttrWithHint + } else { + Instruction::StoreAttrInstanceValue + }, + ); } else { unsafe { - self.code - .instructions - .write_adaptive_counter(cache_base, ADAPTIVE_BACKOFF_VALUE); + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); } } } diff --git a/crates/vm/src/stdlib/sys/monitoring.rs b/crates/vm/src/stdlib/sys/monitoring.rs index 858ea83b8a7..6d1aeb9c8f3 100644 --- a/crates/vm/src/stdlib/sys/monitoring.rs +++ b/crates/vm/src/stdlib/sys/monitoring.rs @@ -322,7 +322,7 @@ pub fn instrument_code(code: &PyCode, events: u32) { .code .instructions .iter() - .position(|u| matches!(u.op, Instruction::Resume { .. })) + .position(|u| matches!(u.op, Instruction::Resume { .. } | Instruction::ResumeCheck)) .unwrap_or(0); // Phase 4: Place regular INSTRUMENTED_* opcodes