From f584c47f5da7d94b5e8b5a098fb012aab51bd763 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Tue, 3 Mar 2026 22:24:26 +0900 Subject: [PATCH 1/6] Add PyType vectorcall and use vectorcall in all specialized call fallbacks --- crates/vm/src/builtins/type.rs | 29 +++++++++++++++++++++ crates/vm/src/frame.rs | 47 +++++++++------------------------- 2 files changed, 41 insertions(+), 35 deletions(-) diff --git a/crates/vm/src/builtins/type.rs b/crates/vm/src/builtins/type.rs index 276a81f58b6..1b50e7caa12 100644 --- a/crates/vm/src/builtins/type.rs +++ b/crates/vm/src/builtins/type.rs @@ -2339,8 +2339,37 @@ fn subtype_set_dict(obj: PyObjectRef, value: PyObjectRef, vm: &VirtualMachine) - * The magical type type */ +/// Vectorcall for PyType (PEP 590). +/// Fast path: type(x) returns x.__class__ without constructing FuncArgs. +fn vectorcall_type( + zelf_obj: &PyObject, + args: Vec, + nargs: usize, + kwnames: Option<&[PyObjectRef]>, + vm: &VirtualMachine, +) -> PyResult { + let zelf: &Py = zelf_obj.downcast_ref().unwrap(); + + // type(x) fast path: single positional arg, no kwargs + if zelf.is(vm.ctx.types.type_type) { + let no_kwargs = kwnames.is_none_or(|kw| kw.is_empty()); + if nargs == 1 && no_kwargs { + return Ok(args[0].obj_type()); + } + } + + // Fallback: construct FuncArgs and use standard call + let func_args = FuncArgs::from_vectorcall(&args, nargs, kwnames); + PyType::call(zelf, func_args, vm) +} + pub(crate) fn init(ctx: &'static Context) { PyType::extend_class(ctx, ctx.types.type_type); + ctx.types + .type_type + .slots + .vectorcall + .store(Some(vectorcall_type)); } pub(crate) fn call_slot_new( diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 28a04318379..e4ba8b9db21 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -3847,8 +3847,7 @@ impl ExecutingFrame<'_> { self.deoptimize(Instruction::Call { argc: Arg::marker(), }); - let args = self.collect_positional_args(nargs); - self.execute_call(args, vm) + self.execute_call_vectorcall(nargs, vm) } Instruction::CallMethodDescriptorO => { let instr_idx = self.lasti() as usize - 1; @@ -3885,8 +3884,7 @@ impl ExecutingFrame<'_> { self.deoptimize(Instruction::Call { argc: Arg::marker(), }); - let args = self.collect_positional_args(nargs); - self.execute_call(args, vm) + self.execute_call_vectorcall(nargs, vm) } Instruction::CallMethodDescriptorFast => { let instr_idx = self.lasti() as usize - 1; @@ -3924,8 +3922,7 @@ impl ExecutingFrame<'_> { self.deoptimize(Instruction::Call { argc: Arg::marker(), }); - let args = self.collect_positional_args(nargs); - self.execute_call(args, vm) + self.execute_call_vectorcall(nargs, vm) } Instruction::CallBuiltinClass => { let instr_idx = self.lasti() as usize - 1; @@ -3935,25 +3932,12 @@ impl ExecutingFrame<'_> { let callable = self.nth_value(nargs + 1); let callable_tag = callable as *const PyObject as u32; if cached_tag == callable_tag && callable.downcast_ref::().is_some() { - let args = self.collect_positional_args(nargs); - let self_or_null = self.pop_value_opt(); - let callable = self.pop_value(); - let final_args = if let Some(self_val) = self_or_null { - let mut args = args; - args.prepend_arg(self_val); - args - } else { - args - }; - let result = callable.call(final_args, vm)?; - self.push_value(result); - return Ok(None); + return self.execute_call_vectorcall(nargs, vm); } self.deoptimize(Instruction::Call { argc: Arg::marker(), }); - let args = self.collect_positional_args(nargs); - self.execute_call(args, vm) + self.execute_call_vectorcall(nargs, vm) } Instruction::CallAllocAndEnterInit => { let instr_idx = self.lasti() as usize - 1; @@ -4013,8 +3997,7 @@ impl ExecutingFrame<'_> { self.deoptimize(Instruction::Call { argc: Arg::marker(), }); - let args = self.collect_positional_args(nargs); - self.execute_call(args, vm) + self.execute_call_vectorcall(nargs, vm) } Instruction::CallMethodDescriptorFastWithKeywords => { // Native function interface is uniform regardless of keyword support @@ -4053,8 +4036,7 @@ impl ExecutingFrame<'_> { self.deoptimize(Instruction::Call { argc: Arg::marker(), }); - let args = self.collect_positional_args(nargs); - self.execute_call(args, vm) + self.execute_call_vectorcall(nargs, vm) } Instruction::CallBuiltinFastWithKeywords => { // Native function interface is uniform regardless of keyword support @@ -4087,8 +4069,7 @@ impl ExecutingFrame<'_> { self.deoptimize(Instruction::Call { argc: Arg::marker(), }); - let args = self.collect_positional_args(nargs); - self.execute_call(args, vm) + self.execute_call_vectorcall(nargs, vm) } Instruction::CallNonPyGeneral => { let instr_idx = self.lasti() as usize - 1; @@ -4098,14 +4079,12 @@ impl ExecutingFrame<'_> { let callable = self.nth_value(nargs + 1); let callable_tag = callable as *const PyObject as u32; if cached_tag == callable_tag { - let args = self.collect_positional_args(nargs); - return self.execute_call(args, vm); + return self.execute_call_vectorcall(nargs, vm); } self.deoptimize(Instruction::Call { argc: Arg::marker(), }); - let args = self.collect_positional_args(nargs); - self.execute_call(args, vm) + self.execute_call_vectorcall(nargs, vm) } Instruction::CallKwPy => { let instr_idx = self.lasti() as usize - 1; @@ -4197,14 +4176,12 @@ impl ExecutingFrame<'_> { let callable = self.nth_value(nargs + 2); let callable_tag = callable as *const PyObject as u32; if cached_tag == callable_tag { - let args = self.collect_keyword_args(nargs); - return self.execute_call(args, vm); + return self.execute_call_kw_vectorcall(nargs, vm); } self.deoptimize(Instruction::CallKw { argc: Arg::marker(), }); - let args = self.collect_keyword_args(nargs); - self.execute_call(args, vm) + self.execute_call_kw_vectorcall(nargs, vm) } Instruction::LoadSuperAttrAttr => { let oparg = u32::from(arg); From de9cbc4776a81e07dca0bdaeb7a98ae113e374cd Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Tue, 3 Mar 2026 22:28:14 +0900 Subject: [PATCH 2/6] Add vectorcall slot for PyMethodDescriptor and PyWrapper --- crates/vm/src/builtins/descriptor.rs | 53 ++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/crates/vm/src/builtins/descriptor.rs b/crates/vm/src/builtins/descriptor.rs index a7a3e89e42a..709e0de0a03 100644 --- a/crates/vm/src/builtins/descriptor.rs +++ b/crates/vm/src/builtins/descriptor.rs @@ -426,10 +426,63 @@ impl GetDescriptor for PyMemberDescriptor { } } +/// Vectorcall for method_descriptor: calls native method directly +fn vectorcall_method_descriptor( + zelf_obj: &PyObject, + args: Vec, + nargs: usize, + kwnames: Option<&[PyObjectRef]>, + vm: &VirtualMachine, +) -> PyResult { + let zelf: &Py = zelf_obj.downcast_ref().unwrap(); + let func_args = FuncArgs::from_vectorcall(&args, nargs, kwnames); + (zelf.method.func)(vm, func_args) +} + +/// Vectorcall for wrapper_descriptor: calls wrapped slot function +fn vectorcall_wrapper( + zelf_obj: &PyObject, + args: Vec, + nargs: usize, + kwnames: Option<&[PyObjectRef]>, + vm: &VirtualMachine, +) -> PyResult { + let zelf: &Py = zelf_obj.downcast_ref().unwrap(); + // First positional arg is self + if nargs == 0 { + return Err(vm.new_type_error(format!( + "descriptor '{}' of '{}' object needs an argument", + zelf.name.as_str(), + zelf.typ.name() + ))); + } + let obj = args[0].clone(); + if !obj.fast_isinstance(zelf.typ) { + return Err(vm.new_type_error(format!( + "descriptor '{}' requires a '{}' object but received a '{}'", + zelf.name.as_str(), + zelf.typ.name(), + obj.class().name() + ))); + } + let rest = FuncArgs::from_vectorcall(&args[1..], nargs - 1, kwnames); + zelf.wrapped.call(obj, rest, vm) +} + pub fn init(ctx: &'static Context) { PyMemberDescriptor::extend_class(ctx, ctx.types.member_descriptor_type); PyMethodDescriptor::extend_class(ctx, ctx.types.method_descriptor_type); + ctx.types + .method_descriptor_type + .slots + .vectorcall + .store(Some(vectorcall_method_descriptor)); PyWrapper::extend_class(ctx, ctx.types.wrapper_descriptor_type); + ctx.types + .wrapper_descriptor_type + .slots + .vectorcall + .store(Some(vectorcall_wrapper)); PyMethodWrapper::extend_class(ctx, ctx.types.method_wrapper_type); } From 02a6998a09e3aa73e3adfc33d46e7a99775b59d6 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Tue, 3 Mar 2026 22:38:50 +0900 Subject: [PATCH 3/6] Add FuncArgs::from_vectorcall_owned and simplify vectorcall fallback paths Remove has_vectorcall checks from execute_call_vectorcall and execute_call_kw_vectorcall. The invoke_vectorcall fallback now uses from_vectorcall_owned to move args instead of cloning. --- crates/vm/src/frame.rs | 46 ++---------------------------- crates/vm/src/function/argument.rs | 28 ++++++++++++++++++ crates/vm/src/protocol/callable.rs | 11 ++----- 3 files changed, 33 insertions(+), 52 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index e4ba8b9db21..393b8ff1029 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -5607,18 +5607,7 @@ impl ExecutingFrame<'_> { let self_or_null_idx = stack_len - nargs_usize - 1; let args_start = stack_len - nargs_usize; - // Check if callable has vectorcall slot - let has_vectorcall = self.state.stack[callable_idx] - .as_ref() - .is_some_and(|sr| sr.as_object().class().slots.vectorcall.load().is_some()); - - if !has_vectorcall { - // Fallback to existing FuncArgs path - let args = self.collect_positional_args(nargs); - return self.execute_call(args, vm); - } - - // Build args slice: [self_or_null?, arg1, ..., argN] + // Build args: [self?, arg1, ..., argN] let self_or_null = self.state.stack[self_or_null_idx] .take() .map(|sr| sr.to_pyobj()); @@ -5641,6 +5630,7 @@ impl ExecutingFrame<'_> { let callable_obj = self.state.stack[callable_idx].take().unwrap().to_pyobj(); self.state.stack.truncate(callable_idx); + // invoke_vectorcall falls back to FuncArgs if no vectorcall slot let result = callable_obj.vectorcall(args_vec, effective_nargs, None, vm)?; self.push_value(result); Ok(None) @@ -5663,37 +5653,6 @@ impl ExecutingFrame<'_> { let self_or_null_idx = stack_len - nargs_usize - 1; let args_start = stack_len - nargs_usize; - // Check if callable has vectorcall slot - let has_vectorcall = self.state.stack[callable_idx] - .as_ref() - .is_some_and(|sr| sr.as_object().class().slots.vectorcall.load().is_some()); - - if !has_vectorcall { - // Fallback: reconstruct kwarg_names iterator and use existing path - let kwarg_names_iter = kwarg_names_tuple.as_slice().iter().map(|pyobj| { - pyobj - .downcast_ref::() - .unwrap() - .as_str() - .to_owned() - }); - let args = self.pop_multiple(nargs_usize); - let func_args = FuncArgs::with_kwargs_names(args, kwarg_names_iter); - // pop self_or_null and callable - let self_or_null = self.pop_value_opt(); - let callable = self.pop_value(); - let final_args = if let Some(self_val) = self_or_null { - let mut args = func_args; - args.prepend_arg(self_val); - args - } else { - func_args - }; - let value = callable.call(final_args, vm)?; - self.push_value(value); - return Ok(None); - } - // Build args: [self?, pos_arg1, ..., pos_argM, kw_val1, ..., kw_valK] let self_or_null = self.state.stack[self_or_null_idx] .take() @@ -5717,6 +5676,7 @@ impl ExecutingFrame<'_> { let callable_obj = self.state.stack[callable_idx].take().unwrap().to_pyobj(); self.state.stack.truncate(callable_idx); + // invoke_vectorcall falls back to FuncArgs if no vectorcall slot let kwnames = kwarg_names_tuple.as_slice(); let result = callable_obj.vectorcall(args_vec, effective_nargs, Some(kwnames), vm)?; self.push_value(result); diff --git a/crates/vm/src/function/argument.rs b/crates/vm/src/function/argument.rs index c52c2d55d18..c9c7554a429 100644 --- a/crates/vm/src/function/argument.rs +++ b/crates/vm/src/function/argument.rs @@ -138,6 +138,7 @@ impl FuncArgs { /// Create FuncArgs from a vectorcall-style argument slice (PEP 590). /// `args[..nargs]` are positional, and if `kwnames` is provided, /// the last `kwnames.len()` entries in `args[nargs..]` are keyword values. + /// Convert borrowed vectorcall args to FuncArgs (clones all values). pub fn from_vectorcall( args: &[PyObjectRef], nargs: usize, @@ -166,6 +167,33 @@ impl FuncArgs { } } + /// Convert owned vectorcall args to FuncArgs (moves values, no clone). + pub fn from_vectorcall_owned( + mut args: Vec, + nargs: usize, + kwnames: Option<&[PyObjectRef]>, + ) -> Self { + let kwargs = if let Some(names) = kwnames { + let kw_count = names.len(); + names + .iter() + .zip(args.drain(nargs..nargs + kw_count)) + .map(|(name, val)| { + let key = name + .downcast_ref::() + .expect("kwnames must be strings") + .as_str() + .to_owned(); + (key, val) + }) + .collect() + } else { + IndexMap::new() + }; + args.truncate(nargs); + Self { args, kwargs } + } + pub fn is_empty(&self) -> bool { self.args.is_empty() && self.kwargs.is_empty() } diff --git a/crates/vm/src/protocol/callable.rs b/crates/vm/src/protocol/callable.rs index ab3e7d815ab..cecb9431fbb 100644 --- a/crates/vm/src/protocol/callable.rs +++ b/crates/vm/src/protocol/callable.rs @@ -126,15 +126,8 @@ impl<'a> PyCallable<'a> { result } } else { - // Fallback: convert owned Vec to FuncArgs - let func_args = FuncArgs { - args: args[..nargs].to_vec(), - kwargs: if let Some(kwn) = kwnames { - FuncArgs::from_vectorcall(&args, nargs, Some(kwn)).kwargs - } else { - indexmap::IndexMap::new() - }, - }; + // Fallback: convert owned Vec to FuncArgs (move, no clone) + let func_args = FuncArgs::from_vectorcall_owned(args, nargs, kwnames); self.invoke(func_args, vm) } } From 07f129b04a15cae7870e6a5cbc3195e2a2f5358d Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Tue, 3 Mar 2026 23:52:37 +0900 Subject: [PATCH 4/6] Use lazy locals allocation in vectorcall fast path, add debug_assert bounds --- crates/vm/src/builtins/function.rs | 6 +++--- crates/vm/src/function/argument.rs | 4 ++++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/crates/vm/src/builtins/function.rs b/crates/vm/src/builtins/function.rs index a41b119a6f1..316f739c9c9 100644 --- a/crates/vm/src/builtins/function.rs +++ b/crates/vm/src/builtins/function.rs @@ -1280,14 +1280,14 @@ pub(crate) fn vectorcall_function( // FAST PATH: simple positional-only call, exact arg count. // Move owned args directly into fastlocals — no clone needed. let locals = if code.flags.contains(bytecode::CodeFlags::NEWLOCALS) { - ArgMapping::from_dict_exact(vm.ctx.new_dict()) + None // lazy allocation — most frames never access locals dict } else { - ArgMapping::from_dict_exact(zelf.globals.clone()) + Some(ArgMapping::from_dict_exact(zelf.globals.clone())) }; let frame = Frame::new( code.to_owned(), - Scope::new(Some(locals), zelf.globals.clone()), + Scope::new(locals, zelf.globals.clone()), zelf.builtins.clone(), zelf.closure.as_ref().map_or(&[], |c| c.as_slice()), Some(zelf.to_owned().into()), diff --git a/crates/vm/src/function/argument.rs b/crates/vm/src/function/argument.rs index c9c7554a429..b39ee6f6bca 100644 --- a/crates/vm/src/function/argument.rs +++ b/crates/vm/src/function/argument.rs @@ -144,6 +144,8 @@ impl FuncArgs { nargs: usize, kwnames: Option<&[PyObjectRef]>, ) -> Self { + debug_assert!(nargs <= args.len()); + debug_assert!(kwnames.is_none_or(|kw| nargs + kw.len() <= args.len())); let pos_args = args[..nargs].to_vec(); let kwargs = if let Some(names) = kwnames { names @@ -173,6 +175,8 @@ impl FuncArgs { nargs: usize, kwnames: Option<&[PyObjectRef]>, ) -> Self { + debug_assert!(nargs <= args.len()); + debug_assert!(kwnames.is_none_or(|kw| nargs + kw.len() <= args.len())); let kwargs = if let Some(names) = kwnames { let kw_count = names.len(); names From c47d1f2d9a82b368588f7e1e8b9665c30a7927cb Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Wed, 4 Mar 2026 01:56:27 +0900 Subject: [PATCH 5/6] Use from_vectorcall_owned in descriptor/type vectorcall, add stack debug_asserts --- crates/vm/src/builtins/descriptor.rs | 8 ++++---- crates/vm/src/builtins/type.rs | 2 +- crates/vm/src/frame.rs | 9 +++++++++ 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/crates/vm/src/builtins/descriptor.rs b/crates/vm/src/builtins/descriptor.rs index 709e0de0a03..05e819a56e9 100644 --- a/crates/vm/src/builtins/descriptor.rs +++ b/crates/vm/src/builtins/descriptor.rs @@ -435,14 +435,14 @@ fn vectorcall_method_descriptor( vm: &VirtualMachine, ) -> PyResult { let zelf: &Py = zelf_obj.downcast_ref().unwrap(); - let func_args = FuncArgs::from_vectorcall(&args, nargs, kwnames); + let func_args = FuncArgs::from_vectorcall_owned(args, nargs, kwnames); (zelf.method.func)(vm, func_args) } /// Vectorcall for wrapper_descriptor: calls wrapped slot function fn vectorcall_wrapper( zelf_obj: &PyObject, - args: Vec, + mut args: Vec, nargs: usize, kwnames: Option<&[PyObjectRef]>, vm: &VirtualMachine, @@ -456,7 +456,7 @@ fn vectorcall_wrapper( zelf.typ.name() ))); } - let obj = args[0].clone(); + let obj = args.remove(0); if !obj.fast_isinstance(zelf.typ) { return Err(vm.new_type_error(format!( "descriptor '{}' requires a '{}' object but received a '{}'", @@ -465,7 +465,7 @@ fn vectorcall_wrapper( obj.class().name() ))); } - let rest = FuncArgs::from_vectorcall(&args[1..], nargs - 1, kwnames); + let rest = FuncArgs::from_vectorcall_owned(args, nargs - 1, kwnames); zelf.wrapped.call(obj, rest, vm) } diff --git a/crates/vm/src/builtins/type.rs b/crates/vm/src/builtins/type.rs index 1b50e7caa12..5bc68d38c0f 100644 --- a/crates/vm/src/builtins/type.rs +++ b/crates/vm/src/builtins/type.rs @@ -2359,7 +2359,7 @@ fn vectorcall_type( } // Fallback: construct FuncArgs and use standard call - let func_args = FuncArgs::from_vectorcall(&args, nargs, kwnames); + let func_args = FuncArgs::from_vectorcall_owned(args, nargs, kwnames); PyType::call(zelf, func_args, vm) } diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 393b8ff1029..1f73cc619dc 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -5603,6 +5603,10 @@ impl ExecutingFrame<'_> { fn execute_call_vectorcall(&mut self, nargs: u32, vm: &VirtualMachine) -> FrameResult { let nargs_usize = nargs as usize; let stack_len = self.state.stack.len(); + debug_assert!( + stack_len >= nargs_usize + 2, + "CALL stack underflow: need callable + self_or_null + {nargs_usize} args, have {stack_len}" + ); let callable_idx = stack_len - nargs_usize - 2; let self_or_null_idx = stack_len - nargs_usize - 1; let args_start = stack_len - nargs_usize; @@ -5647,8 +5651,13 @@ impl ExecutingFrame<'_> { .downcast_ref::() .expect("kwarg names should be tuple"); let kw_count = kwarg_names_tuple.len(); + debug_assert!(kw_count <= nargs_usize, "CALL_KW kw_count exceeds nargs"); let stack_len = self.state.stack.len(); + debug_assert!( + stack_len >= nargs_usize + 2, + "CALL_KW stack underflow: need callable + self_or_null + {nargs_usize} args, have {stack_len}" + ); let callable_idx = stack_len - nargs_usize - 2; let self_or_null_idx = stack_len - nargs_usize - 1; let args_start = stack_len - nargs_usize; From 2885f80785ac9f7ff3303393fdc54ee2996f33d4 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Wed, 4 Mar 2026 05:45:36 +0900 Subject: [PATCH 6/6] Collapse duplicated branch tails and use checked_sub in CALL_KW - Collapse conditional deopt + unconditional vectorcall pattern in CallBuiltinClass, CallNonPyGeneral, CallKwNonPy - Use checked_sub for nargs_usize - kw_count in execute_call_kw_vectorcall to prevent silent underflow in release builds --- crates/vm/src/frame.rs | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 1f73cc619dc..c1a45fc10ce 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -3931,12 +3931,11 @@ impl ExecutingFrame<'_> { let nargs: u32 = arg.into(); let callable = self.nth_value(nargs + 1); let callable_tag = callable as *const PyObject as u32; - if cached_tag == callable_tag && callable.downcast_ref::().is_some() { - return self.execute_call_vectorcall(nargs, vm); + if !(cached_tag == callable_tag && callable.downcast_ref::().is_some()) { + self.deoptimize(Instruction::Call { + argc: Arg::marker(), + }); } - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); self.execute_call_vectorcall(nargs, vm) } Instruction::CallAllocAndEnterInit => { @@ -4078,12 +4077,11 @@ impl ExecutingFrame<'_> { let nargs: u32 = arg.into(); let callable = self.nth_value(nargs + 1); let callable_tag = callable as *const PyObject as u32; - if cached_tag == callable_tag { - return self.execute_call_vectorcall(nargs, vm); + if cached_tag != callable_tag { + self.deoptimize(Instruction::Call { + argc: Arg::marker(), + }); } - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); self.execute_call_vectorcall(nargs, vm) } Instruction::CallKwPy => { @@ -4175,12 +4173,11 @@ impl ExecutingFrame<'_> { let nargs: u32 = arg.into(); let callable = self.nth_value(nargs + 2); let callable_tag = callable as *const PyObject as u32; - if cached_tag == callable_tag { - return self.execute_call_kw_vectorcall(nargs, vm); + if cached_tag != callable_tag { + self.deoptimize(Instruction::CallKw { + argc: Arg::marker(), + }); } - self.deoptimize(Instruction::CallKw { - argc: Arg::marker(), - }); self.execute_call_kw_vectorcall(nargs, vm) } Instruction::LoadSuperAttrAttr => { @@ -5668,7 +5665,9 @@ impl ExecutingFrame<'_> { .map(|sr| sr.to_pyobj()); let has_self = self_or_null.is_some(); - let pos_count = nargs_usize - kw_count; + let pos_count = nargs_usize + .checked_sub(kw_count) + .expect("CALL_KW: kw_count exceeds nargs"); let effective_nargs = if has_self { pos_count + 1 } else { pos_count }; // Build the full args slice: positional (including self) + kwarg values