From a7327b6d5d2f5a4d402dc1d82a768bbd680ecc0a Mon Sep 17 00:00:00 2001 From: ShaharNaveh <50263213+ShaharNaveh@users.noreply.github.com> Date: Mon, 16 Mar 2026 13:47:43 +0100 Subject: [PATCH 1/7] Align `_opcode_metadata.py` to 3.14.3 --- Lib/_opcode_metadata.py | 305 ++++++++++++------ .../compiler-core/src/bytecode/instruction.rs | 180 +++++------ scripts/generate_opcode_metadata.py | 122 ++++++- 3 files changed, 410 insertions(+), 197 deletions(-) diff --git a/Lib/_opcode_metadata.py b/Lib/_opcode_metadata.py index bb55ee423cf..ac3897848ff 100644 --- a/Lib/_opcode_metadata.py +++ b/Lib/_opcode_metadata.py @@ -2,15 +2,222 @@ # for RustPython bytecode format (CPython 3.13 compatible opcode numbers). # Do not edit! -_specializations = {} +_specializations = { + "RESUME": [ + "RESUME_CHECK", + ], + "LOAD_CONST": [ + "LOAD_CONST_MORTAL", + "LOAD_CONST_IMMORTAL", + ], + "TO_BOOL": [ + "TO_BOOL_ALWAYS_TRUE", + "TO_BOOL_BOOL", + "TO_BOOL_INT", + "TO_BOOL_LIST", + "TO_BOOL_NONE", + "TO_BOOL_STR", + ], + "BINARY_OP": [ + "BINARY_OP_MULTIPLY_INT", + "BINARY_OP_ADD_INT", + "BINARY_OP_SUBTRACT_INT", + "BINARY_OP_MULTIPLY_FLOAT", + "BINARY_OP_ADD_FLOAT", + "BINARY_OP_SUBTRACT_FLOAT", + "BINARY_OP_ADD_UNICODE", + "BINARY_OP_SUBSCR_LIST_INT", + "BINARY_OP_SUBSCR_LIST_SLICE", + "BINARY_OP_SUBSCR_TUPLE_INT", + "BINARY_OP_SUBSCR_STR_INT", + "BINARY_OP_SUBSCR_DICT", + "BINARY_OP_SUBSCR_GETITEM", + "BINARY_OP_EXTEND", + "BINARY_OP_INPLACE_ADD_UNICODE", + ], + "STORE_SUBSCR": [ + "STORE_SUBSCR_DICT", + "STORE_SUBSCR_LIST_INT", + ], + "SEND": [ + "SEND_GEN", + ], + "UNPACK_SEQUENCE": [ + "UNPACK_SEQUENCE_TWO_TUPLE", + "UNPACK_SEQUENCE_TUPLE", + "UNPACK_SEQUENCE_LIST", + ], + "STORE_ATTR": [ + "STORE_ATTR_INSTANCE_VALUE", + "STORE_ATTR_SLOT", + "STORE_ATTR_WITH_HINT", + ], + "LOAD_GLOBAL": [ + "LOAD_GLOBAL_MODULE", + "LOAD_GLOBAL_BUILTIN", + ], + "LOAD_SUPER_ATTR": [ + "LOAD_SUPER_ATTR_ATTR", + "LOAD_SUPER_ATTR_METHOD", + ], + "LOAD_ATTR": [ + "LOAD_ATTR_INSTANCE_VALUE", + "LOAD_ATTR_MODULE", + "LOAD_ATTR_WITH_HINT", + "LOAD_ATTR_SLOT", + "LOAD_ATTR_CLASS", + "LOAD_ATTR_CLASS_WITH_METACLASS_CHECK", + "LOAD_ATTR_PROPERTY", + "LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN", + "LOAD_ATTR_METHOD_WITH_VALUES", + "LOAD_ATTR_METHOD_NO_DICT", + "LOAD_ATTR_METHOD_LAZY_DICT", + "LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES", + "LOAD_ATTR_NONDESCRIPTOR_NO_DICT", + ], + "COMPARE_OP": [ + "COMPARE_OP_FLOAT", + "COMPARE_OP_INT", + "COMPARE_OP_STR", + ], + "CONTAINS_OP": [ + "CONTAINS_OP_SET", + "CONTAINS_OP_DICT", + ], + "JUMP_BACKWARD": [ + "JUMP_BACKWARD_NO_JIT", + "JUMP_BACKWARD_JIT", + ], + "FOR_ITER": [ + "FOR_ITER_LIST", + "FOR_ITER_TUPLE", + "FOR_ITER_RANGE", + "FOR_ITER_GEN", + ], + "CALL": [ + "CALL_BOUND_METHOD_EXACT_ARGS", + "CALL_PY_EXACT_ARGS", + "CALL_TYPE_1", + "CALL_STR_1", + "CALL_TUPLE_1", + "CALL_BUILTIN_CLASS", + "CALL_BUILTIN_O", + "CALL_BUILTIN_FAST", + "CALL_BUILTIN_FAST_WITH_KEYWORDS", + "CALL_LEN", + "CALL_ISINSTANCE", + "CALL_LIST_APPEND", + "CALL_METHOD_DESCRIPTOR_O", + "CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS", + "CALL_METHOD_DESCRIPTOR_NOARGS", + "CALL_METHOD_DESCRIPTOR_FAST", + "CALL_ALLOC_AND_ENTER_INIT", + "CALL_PY_GENERAL", + "CALL_BOUND_METHOD_GENERAL", + "CALL_NON_PY_GENERAL", + ], + "CALL_KW": [ + "CALL_KW_BOUND_METHOD", + "CALL_KW_PY", + "CALL_KW_NON_PY", + ], +} -_specialized_opmap = {} +_specialized_opmap = { + 'BINARY_OP_ADD_FLOAT': 129, + 'BINARY_OP_ADD_INT': 130, + 'BINARY_OP_ADD_UNICODE': 131, + 'BINARY_OP_EXTEND': 132, + 'BINARY_OP_INPLACE_ADD_UNICODE': 3, + 'BINARY_OP_MULTIPLY_FLOAT': 133, + 'BINARY_OP_MULTIPLY_INT': 134, + 'BINARY_OP_SUBSCR_DICT': 135, + 'BINARY_OP_SUBSCR_GETITEM': 136, + 'BINARY_OP_SUBSCR_LIST_INT': 137, + 'BINARY_OP_SUBSCR_LIST_SLICE': 138, + 'BINARY_OP_SUBSCR_STR_INT': 139, + 'BINARY_OP_SUBSCR_TUPLE_INT': 140, + 'BINARY_OP_SUBTRACT_FLOAT': 141, + 'BINARY_OP_SUBTRACT_INT': 142, + 'CALL_ALLOC_AND_ENTER_INIT': 143, + 'CALL_BOUND_METHOD_EXACT_ARGS': 144, + 'CALL_BOUND_METHOD_GENERAL': 145, + 'CALL_BUILTIN_CLASS': 146, + 'CALL_BUILTIN_FAST': 147, + 'CALL_BUILTIN_FAST_WITH_KEYWORDS': 148, + 'CALL_BUILTIN_O': 149, + 'CALL_ISINSTANCE': 150, + 'CALL_KW_BOUND_METHOD': 151, + 'CALL_KW_NON_PY': 152, + 'CALL_KW_PY': 153, + 'CALL_LEN': 154, + 'CALL_LIST_APPEND': 155, + 'CALL_METHOD_DESCRIPTOR_FAST': 156, + 'CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS': 157, + 'CALL_METHOD_DESCRIPTOR_NOARGS': 158, + 'CALL_METHOD_DESCRIPTOR_O': 159, + 'CALL_NON_PY_GENERAL': 160, + 'CALL_PY_EXACT_ARGS': 161, + 'CALL_PY_GENERAL': 162, + 'CALL_STR_1': 163, + 'CALL_TUPLE_1': 164, + 'CALL_TYPE_1': 165, + 'COMPARE_OP_FLOAT': 166, + 'COMPARE_OP_INT': 167, + 'COMPARE_OP_STR': 168, + 'CONTAINS_OP_DICT': 169, + 'CONTAINS_OP_SET': 170, + 'FOR_ITER_GEN': 171, + 'FOR_ITER_LIST': 172, + 'FOR_ITER_RANGE': 173, + 'FOR_ITER_TUPLE': 174, + 'JUMP_BACKWARD_JIT': 175, + 'JUMP_BACKWARD_NO_JIT': 176, + 'LOAD_ATTR_CLASS': 177, + 'LOAD_ATTR_CLASS_WITH_METACLASS_CHECK': 178, + 'LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN': 179, + 'LOAD_ATTR_INSTANCE_VALUE': 180, + 'LOAD_ATTR_METHOD_LAZY_DICT': 181, + 'LOAD_ATTR_METHOD_NO_DICT': 182, + 'LOAD_ATTR_METHOD_WITH_VALUES': 183, + 'LOAD_ATTR_MODULE': 184, + 'LOAD_ATTR_NONDESCRIPTOR_NO_DICT': 185, + 'LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES': 186, + 'LOAD_ATTR_PROPERTY': 187, + 'LOAD_ATTR_SLOT': 188, + 'LOAD_ATTR_WITH_HINT': 189, + 'LOAD_CONST_IMMORTAL': 190, + 'LOAD_CONST_MORTAL': 191, + 'LOAD_GLOBAL_BUILTIN': 192, + 'LOAD_GLOBAL_MODULE': 193, + 'LOAD_SUPER_ATTR_ATTR': 194, + 'LOAD_SUPER_ATTR_METHOD': 195, + 'RESUME_CHECK': 196, + 'SEND_GEN': 197, + 'STORE_ATTR_INSTANCE_VALUE': 198, + 'STORE_ATTR_SLOT': 199, + 'STORE_ATTR_WITH_HINT': 200, + 'STORE_SUBSCR_DICT': 201, + 'STORE_SUBSCR_LIST_INT': 202, + 'TO_BOOL_ALWAYS_TRUE': 203, + 'TO_BOOL_BOOL': 204, + 'TO_BOOL_INT': 205, + 'TO_BOOL_LIST': 206, + 'TO_BOOL_NONE': 207, + 'TO_BOOL_STR': 208, + 'UNPACK_SEQUENCE_LIST': 209, + 'UNPACK_SEQUENCE_TUPLE': 210, + 'UNPACK_SEQUENCE_TWO_TUPLE': 211, +} opmap = { 'CACHE': 0, + 'RESERVED': 17, + 'RESUME': 128, + 'INSTRUMENTED_LINE': 254, + 'ENTER_EXECUTOR': 255, 'BINARY_SLICE': 1, 'BUILD_TEMPLATE': 2, - 'BINARY_OP_INPLACE_ADD_UNICODE': 3, 'CALL_FUNCTION_EX': 4, 'CHECK_EG_MATCH': 5, 'CHECK_EXC_MATCH': 6, @@ -24,7 +231,6 @@ 'GET_AITER': 14, 'GET_ANEXT': 15, 'GET_ITER': 16, - 'RESERVED': 17, 'GET_LEN': 18, 'GET_YIELD_FROM_ITER': 19, 'INTERPRETER_EXIT': 20, @@ -128,90 +334,6 @@ 'UNPACK_EX': 118, 'UNPACK_SEQUENCE': 119, 'YIELD_VALUE': 120, - 'RESUME': 128, - 'BINARY_OP_ADD_FLOAT': 129, - 'BINARY_OP_ADD_INT': 130, - 'BINARY_OP_ADD_UNICODE': 131, - 'BINARY_OP_EXTEND': 132, - 'BINARY_OP_MULTIPLY_FLOAT': 133, - 'BINARY_OP_MULTIPLY_INT': 134, - 'BINARY_OP_SUBSCR_DICT': 135, - 'BINARY_OP_SUBSCR_GETITEM': 136, - 'BINARY_OP_SUBSCR_LIST_INT': 137, - 'BINARY_OP_SUBSCR_LIST_SLICE': 138, - 'BINARY_OP_SUBSCR_STR_INT': 139, - 'BINARY_OP_SUBSCR_TUPLE_INT': 140, - 'BINARY_OP_SUBTRACT_FLOAT': 141, - 'BINARY_OP_SUBTRACT_INT': 142, - 'CALL_ALLOC_AND_ENTER_INIT': 143, - 'CALL_BOUND_METHOD_EXACT_ARGS': 144, - 'CALL_BOUND_METHOD_GENERAL': 145, - 'CALL_BUILTIN_CLASS': 146, - 'CALL_BUILTIN_FAST': 147, - 'CALL_BUILTIN_FAST_WITH_KEYWORDS': 148, - 'CALL_BUILTIN_O': 149, - 'CALL_ISINSTANCE': 150, - 'CALL_KW_BOUND_METHOD': 151, - 'CALL_KW_NON_PY': 152, - 'CALL_KW_PY': 153, - 'CALL_LEN': 154, - 'CALL_LIST_APPEND': 155, - 'CALL_METHOD_DESCRIPTOR_FAST': 156, - 'CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS': 157, - 'CALL_METHOD_DESCRIPTOR_NOARGS': 158, - 'CALL_METHOD_DESCRIPTOR_O': 159, - 'CALL_NON_PY_GENERAL': 160, - 'CALL_PY_EXACT_ARGS': 161, - 'CALL_PY_GENERAL': 162, - 'CALL_STR_1': 163, - 'CALL_TUPLE_1': 164, - 'CALL_TYPE_1': 165, - 'COMPARE_OP_FLOAT': 166, - 'COMPARE_OP_INT': 167, - 'COMPARE_OP_STR': 168, - 'CONTAINS_OP_DICT': 169, - 'CONTAINS_OP_SET': 170, - 'FOR_ITER_GEN': 171, - 'FOR_ITER_LIST': 172, - 'FOR_ITER_RANGE': 173, - 'FOR_ITER_TUPLE': 174, - 'JUMP_BACKWARD_JIT': 175, - 'JUMP_BACKWARD_NO_JIT': 176, - 'LOAD_ATTR_CLASS': 177, - 'LOAD_ATTR_CLASS_WITH_METACLASS_CHECK': 178, - 'LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN': 179, - 'LOAD_ATTR_INSTANCE_VALUE': 180, - 'LOAD_ATTR_METHOD_LAZY_DICT': 181, - 'LOAD_ATTR_METHOD_NO_DICT': 182, - 'LOAD_ATTR_METHOD_WITH_VALUES': 183, - 'LOAD_ATTR_MODULE': 184, - 'LOAD_ATTR_NONDESCRIPTOR_NO_DICT': 185, - 'LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES': 186, - 'LOAD_ATTR_PROPERTY': 187, - 'LOAD_ATTR_SLOT': 188, - 'LOAD_ATTR_WITH_HINT': 189, - 'LOAD_CONST_IMMORTAL': 190, - 'LOAD_CONST_MORTAL': 191, - 'LOAD_GLOBAL_BUILTIN': 192, - 'LOAD_GLOBAL_MODULE': 193, - 'LOAD_SUPER_ATTR_ATTR': 194, - 'LOAD_SUPER_ATTR_METHOD': 195, - 'RESUME_CHECK': 196, - 'SEND_GEN': 197, - 'STORE_ATTR_INSTANCE_VALUE': 198, - 'STORE_ATTR_SLOT': 199, - 'STORE_ATTR_WITH_HINT': 200, - 'STORE_SUBSCR_DICT': 201, - 'STORE_SUBSCR_LIST_INT': 202, - 'TO_BOOL_ALWAYS_TRUE': 203, - 'TO_BOOL_BOOL': 204, - 'TO_BOOL_INT': 205, - 'TO_BOOL_LIST': 206, - 'TO_BOOL_NONE': 207, - 'TO_BOOL_STR': 208, - 'UNPACK_SEQUENCE_LIST': 209, - 'UNPACK_SEQUENCE_TUPLE': 210, - 'UNPACK_SEQUENCE_TWO_TUPLE': 211, 'INSTRUMENTED_END_FOR': 234, 'INSTRUMENTED_POP_ITER': 235, 'INSTRUMENTED_END_SEND': 236, @@ -232,8 +354,6 @@ 'INSTRUMENTED_CALL_KW': 251, 'INSTRUMENTED_CALL_FUNCTION_EX': 252, 'INSTRUMENTED_JUMP_BACKWARD': 253, - 'INSTRUMENTED_LINE': 254, - 'ENTER_EXECUTOR': 255, 'ANNOTATIONS_PLACEHOLDER': 256, 'JUMP': 257, 'JUMP_IF_FALSE': 258, @@ -247,6 +367,5 @@ 'STORE_FAST_MAYBE_NULL': 266, } -# CPython 3.13 compatible: opcodes < 44 have no argument -HAVE_ARGUMENT = 44 -MIN_INSTRUMENTED_OPCODE = 236 +HAVE_ARGUMENT = 43 +MIN_INSTRUMENTED_OPCODE = 234 diff --git a/crates/compiler-core/src/bytecode/instruction.rs b/crates/compiler-core/src/bytecode/instruction.rs index ea5fe181861..3ee4d2baed0 100644 --- a/crates/compiler-core/src/bytecode/instruction.rs +++ b/crates/compiler-core/src/bytecode/instruction.rs @@ -564,64 +564,13 @@ impl Instruction { /// `_PyOpcode_Deopt` pub fn deoptimize(self) -> Self { match self { - // LOAD_ATTR specializations - Self::LoadAttrClass - | Self::LoadAttrClassWithMetaclassCheck - | Self::LoadAttrGetattributeOverridden - | Self::LoadAttrInstanceValue - | Self::LoadAttrMethodLazyDict - | Self::LoadAttrMethodNoDict - | Self::LoadAttrMethodWithValues - | Self::LoadAttrModule - | Self::LoadAttrNondescriptorNoDict - | Self::LoadAttrNondescriptorWithValues - | Self::LoadAttrProperty - | Self::LoadAttrSlot - | Self::LoadAttrWithHint => Self::LoadAttr { - namei: Arg::marker(), - }, - // BINARY_OP specializations - Self::BinaryOpAddFloat - | Self::BinaryOpAddInt - | Self::BinaryOpAddUnicode - | Self::BinaryOpExtend - | Self::BinaryOpInplaceAddUnicode - | Self::BinaryOpMultiplyFloat - | Self::BinaryOpMultiplyInt - | Self::BinaryOpSubscrDict - | Self::BinaryOpSubscrGetitem - | Self::BinaryOpSubscrListInt - | Self::BinaryOpSubscrListSlice - | Self::BinaryOpSubscrStrInt - | Self::BinaryOpSubscrTupleInt - | Self::BinaryOpSubtractFloat - | Self::BinaryOpSubtractInt => Self::BinaryOp { op: Arg::marker() }, - // CALL specializations - Self::CallAllocAndEnterInit - | Self::CallBoundMethodExactArgs - | Self::CallBoundMethodGeneral - | Self::CallBuiltinClass - | Self::CallBuiltinFast - | Self::CallBuiltinFastWithKeywords - | Self::CallBuiltinO - | Self::CallIsinstance - | Self::CallLen - | Self::CallListAppend - | Self::CallMethodDescriptorFast - | Self::CallMethodDescriptorFastWithKeywords - | Self::CallMethodDescriptorNoargs - | Self::CallMethodDescriptorO - | Self::CallNonPyGeneral - | Self::CallPyExactArgs - | Self::CallPyGeneral - | Self::CallStr1 - | Self::CallTuple1 - | Self::CallType1 => Self::Call { - argc: Arg::marker(), + // RESUME specializations + Self::ResumeCheck => Self::Resume { + context: Arg::marker(), }, - // CALL_KW specializations - Self::CallKwBoundMethod | Self::CallKwNonPy | Self::CallKwPy => Self::CallKw { - argc: Arg::marker(), + // LOAD_CONST specializations + Self::LoadConstMortal | Self::LoadConstImmortal => Self::LoadConst { + consti: Arg::marker(), }, // TO_BOOL specializations Self::ToBoolAlwaysTrue @@ -630,58 +579,109 @@ impl Instruction { | Self::ToBoolList | Self::ToBoolNone | Self::ToBoolStr => Self::ToBool, - // COMPARE_OP specializations - Self::CompareOpFloat | Self::CompareOpInt | Self::CompareOpStr => Self::CompareOp { - opname: Arg::marker(), - }, - // CONTAINS_OP specializations - Self::ContainsOpDict | Self::ContainsOpSet => Self::ContainsOp { - invert: Arg::marker(), + // BINARY_OP specializations + Self::BinaryOpMultiplyInt + | Self::BinaryOpAddInt + | Self::BinaryOpSubtractInt + | Self::BinaryOpMultiplyFloat + | Self::BinaryOpAddFloat + | Self::BinaryOpSubtractFloat + | Self::BinaryOpAddUnicode + | Self::BinaryOpSubscrListInt + | Self::BinaryOpSubscrListSlice + | Self::BinaryOpSubscrTupleInt + | Self::BinaryOpSubscrStrInt + | Self::BinaryOpSubscrDict + | Self::BinaryOpSubscrGetitem + | Self::BinaryOpExtend + | Self::BinaryOpInplaceAddUnicode => Self::BinaryOp { op: Arg::marker() }, + // STORE_SUBSCR specializations + Self::StoreSubscrDict | Self::StoreSubscrListInt => Self::StoreSubscr, + // SEND specializations + Self::SendGen => Self::Send { + delta: Arg::marker(), }, - // FOR_ITER specializations - Self::ForIterGen | Self::ForIterList | Self::ForIterRange | Self::ForIterTuple => { - Self::ForIter { - delta: Arg::marker(), + // UNPACK_SEQUENCE specializations + Self::UnpackSequenceTwoTuple | Self::UnpackSequenceTuple | Self::UnpackSequenceList => { + Self::UnpackSequence { + count: Arg::marker(), } } - // LOAD_GLOBAL specializations - Self::LoadGlobalBuiltin | Self::LoadGlobalModule => Self::LoadGlobal { - namei: Arg::marker(), - }, // STORE_ATTR specializations Self::StoreAttrInstanceValue | Self::StoreAttrSlot | Self::StoreAttrWithHint => { Self::StoreAttr { namei: Arg::marker(), } } + // LOAD_GLOBAL specializations + Self::LoadGlobalModule | Self::LoadGlobalBuiltin => Self::LoadGlobal { + namei: Arg::marker(), + }, // LOAD_SUPER_ATTR specializations Self::LoadSuperAttrAttr | Self::LoadSuperAttrMethod => Self::LoadSuperAttr { namei: Arg::marker(), }, - // STORE_SUBSCR specializations - Self::StoreSubscrDict | Self::StoreSubscrListInt => Self::StoreSubscr, - // UNPACK_SEQUENCE specializations - Self::UnpackSequenceList | Self::UnpackSequenceTuple | Self::UnpackSequenceTwoTuple => { - Self::UnpackSequence { - count: Arg::marker(), - } - } - // SEND specializations - Self::SendGen => Self::Send { - delta: Arg::marker(), + // LOAD_ATTR specializations + Self::LoadAttrInstanceValue + | Self::LoadAttrModule + | Self::LoadAttrWithHint + | Self::LoadAttrSlot + | Self::LoadAttrClass + | Self::LoadAttrClassWithMetaclassCheck + | Self::LoadAttrProperty + | Self::LoadAttrGetattributeOverridden + | Self::LoadAttrMethodWithValues + | Self::LoadAttrMethodNoDict + | Self::LoadAttrMethodLazyDict + | Self::LoadAttrNondescriptorWithValues + | Self::LoadAttrNondescriptorNoDict => Self::LoadAttr { + namei: Arg::marker(), }, - // LOAD_CONST specializations - Self::LoadConstImmortal | Self::LoadConstMortal => Self::LoadConst { - consti: Arg::marker(), + // COMPARE_OP specializations + Self::CompareOpFloat | Self::CompareOpInt | Self::CompareOpStr => Self::CompareOp { + opname: Arg::marker(), }, - // RESUME specializations - Self::ResumeCheck => Self::Resume { - context: Arg::marker(), + // CONTAINS_OP specializations + Self::ContainsOpSet | Self::ContainsOpDict => Self::ContainsOp { + invert: Arg::marker(), }, // JUMP_BACKWARD specializations - Self::JumpBackwardJit | Self::JumpBackwardNoJit => Self::JumpBackward { + Self::JumpBackwardNoJit | Self::JumpBackwardJit => Self::JumpBackward { delta: Arg::marker(), }, + // FOR_ITER specializations + Self::ForIterList | Self::ForIterTuple | Self::ForIterRange | Self::ForIterGen => { + Self::ForIter { + delta: Arg::marker(), + } + } + // CALL specializations + Self::CallBoundMethodExactArgs + | Self::CallPyExactArgs + | Self::CallType1 + | Self::CallStr1 + | Self::CallTuple1 + | Self::CallBuiltinClass + | Self::CallBuiltinO + | Self::CallBuiltinFast + | Self::CallBuiltinFastWithKeywords + | Self::CallLen + | Self::CallIsinstance + | Self::CallListAppend + | Self::CallMethodDescriptorO + | Self::CallMethodDescriptorFastWithKeywords + | Self::CallMethodDescriptorNoargs + | Self::CallMethodDescriptorFast + | Self::CallAllocAndEnterInit + | Self::CallPyGeneral + | Self::CallBoundMethodGeneral + | Self::CallNonPyGeneral => Self::Call { + argc: Arg::marker(), + }, + // CALL_KW specializations + Self::CallKwBoundMethod | Self::CallKwPy | Self::CallKwNonPy => Self::CallKw { + argc: Arg::marker(), + }, // Instrumented opcodes map back to their base _ => match self.to_base() { Some(base) => base, diff --git a/scripts/generate_opcode_metadata.py b/scripts/generate_opcode_metadata.py index 42fb55a7c01..5a3188ca6e4 100644 --- a/scripts/generate_opcode_metadata.py +++ b/scripts/generate_opcode_metadata.py @@ -16,25 +16,48 @@ OPCODE_METADATA_FILE = ROOT / "Lib" / "_opcode_metadata.py" +# Opcodes that needs to be first, regardless of their opcode ID. +PRIORITY_OPMAP = { + "CACHE", + "RESERVED", + "RESUME", + "INSTRUMENTED_LINE", + "ENTER_EXECUTOR", +} + + +def to_pascal_case(s: str) -> str: + res = re.sub(r"(?<=[a-z0-9])([A-Z])", r"_\1", s) + return re.sub(r"(\D)(\d+)$", r"\1_\2", res).upper() + + class Opcode(typing.NamedTuple): rust_name: str id: int + have_oparg: bool @property def cpython_name(self) -> str: - name = re.sub(r"(?<=[a-z0-9])([A-Z])", r"_\1", self.rust_name) - return re.sub(r"(\D)(\d+)$", r"\1_\2", name).upper() + return to_pascal_case(self.rust_name) + + @property + def is_instrumented(self): + return self.cpython_name.startswith("INSTRUMENTED_") @classmethod def from_str(cls, body: str): raw_variants = re.split(r"(\d+),", body.strip()) raw_variants.remove("") - for raw_name, raw_id in itertools.batched(raw_variants, 2): + for raw_name, raw_id in itertools.batched(raw_variants, 2, strict=True): + have_oparg = "Arg<" in raw_name # Hacky but works name = re.findall(r"\b[A-Z][A-Za-z]*\d*\b(?=\s*[\({=])", raw_name)[0] - yield cls(rust_name=name.strip(), id=int(raw_id)) + yield cls(rust_name=name.strip(), id=int(raw_id), have_oparg=have_oparg) def __lt__(self, other: typing.Self) -> bool: - return self.id < other.id + sprio, oprio = ( + opcode.cpython_name not in PRIORITY_OPMAP for opcode in (self, other) + ) + return (sprio, self.id) < (oprio, other.id) def extract_enum_body(contents: str, enum_name: str) -> str: @@ -49,33 +72,104 @@ def extract_enum_body(contents: str, enum_name: str) -> str: ) +def build_deopts(contents: str) -> dict[str, list[str]]: + raw_body = re.search( + r"fn deoptimize\(self\) -> Self(.*)", contents, re.DOTALL + ).group(1) + body = "\n".join( + itertools.takewhile( + lambda l: not l.startswith("_ =>"), # Take until reaching fallback + filter( + lambda l: ( + not l.startswith(("//", "match")) # Skip comments or start of match + ), + map(str.strip, raw_body.splitlines()), + ), + ) + ).removeprefix("{") + + depth = 0 + arms = [] + buf = [] + for char in body: + if char == "{": + depth += 1 + elif char == "}": + depth -= 1 + + if depth == 0 and (char in ("}", ",")): + arm = "".join(buf).strip() + arms.append(arm) + buf = [] + else: + buf.append(char) + + # last arm + arms.append("".join(buf)) + arms = [arm for arm in arms if arm] + + deopts = {} + for arm in arms: + *specialized, deopt = map(to_pascal_case, re.findall(r"Self::(\w*)\b", arm)) + deopts[deopt] = specialized + + return deopts + + contents = BYTECODE_FILE.read_text(encoding="utf-8") + +deopts = build_deopts(contents) + enum_body = "\n".join( extract_enum_body(contents, enum_name) for enum_name in ("Instruction", "PseudoInstruction") ) opcodes = list(Opcode.from_str(enum_body)) +have_oparg = min(opcode.id for opcode in opcodes if opcode.have_oparg) - 1 +min_instrumented = min(opcode.id for opcode in opcodes if opcode.is_instrumented) + # Generate the output file output = """# This file is generated by scripts/generate_opcode_metadata.py # for RustPython bytecode format (CPython 3.13 compatible opcode numbers). # Do not edit! +""" -_specializations = {} +output += "\n_specializations = {\n" -_specialized_opmap = {} +for key, lst in deopts.items(): + output += f' "{key}": [\n' + for item in lst: + output += f' "{item}",\n' + output += " ],\n" -opmap = { -""" +output += "}\n" + +specialized = set(itertools.chain.from_iterable(deopts.values())) +output += "\n_specialized_opmap = {\n" +for opcode in sorted(opcodes, key=lambda op: op.cpython_name): + cpython_name = opcode.cpython_name + if cpython_name not in specialized: + continue + + output += f" '{cpython_name}': {opcode.id},\n" + +output += "}\n" + +output += "\nopmap = {\n" for opcode in sorted(opcodes): - output += f" '{opcode.cpython_name}': {opcode.id},\n" + cpython_name = opcode.cpython_name + if cpython_name in specialized: + continue + + output += f" '{cpython_name}': {opcode.id},\n" -output += """} +output += "}\n" -# CPython 3.13 compatible: opcodes < 44 have no argument -HAVE_ARGUMENT = 44 -MIN_INSTRUMENTED_OPCODE = 236 +output += f""" +HAVE_ARGUMENT = {have_oparg} +MIN_INSTRUMENTED_OPCODE = {min_instrumented} """ OPCODE_METADATA_FILE.write_text(output, encoding="utf-8") From 6393ac09d118c4e29609d0b732bb2ca1b5eef5d7 Mon Sep 17 00:00:00 2001 From: ShaharNaveh <50263213+ShaharNaveh@users.noreply.github.com> Date: Tue, 17 Mar 2026 12:50:38 +0100 Subject: [PATCH 2/7] Unmark passing test --- Lib/test/test_dis.py | 1 - 1 file changed, 1 deletion(-) diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index 8ba783bf141..36c63aeb1e4 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -1005,7 +1005,6 @@ def test_opname(self): def test_boundaries(self): self.assertEqual(dis.opmap["EXTENDED_ARG"], dis.EXTENDED_ARG) - @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: 29 not less than or equal to 20 def test_widths(self): long_opcodes = set(['JUMP_BACKWARD_NO_INTERRUPT', 'LOAD_FAST_BORROW_LOAD_FAST_BORROW', From e59d6cdb8189946196abc7b3fa4c99335639f7c3 Mon Sep 17 00:00:00 2001 From: ShaharNaveh <50263213+ShaharNaveh@users.noreply.github.com> Date: Tue, 17 Mar 2026 13:05:22 +0100 Subject: [PATCH 3/7] Ensure python 3.14 runs on CI --- .github/workflows/pr-format.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/pr-format.yaml b/.github/workflows/pr-format.yaml index 6b11a758668..36a6487fb07 100644 --- a/.github/workflows/pr-format.yaml +++ b/.github/workflows/pr-format.yaml @@ -13,6 +13,9 @@ concurrency: group: format-check-${{ github.event.pull_request.number }} cancel-in-progress: true +env: + PYTHON_VERSION: "3.14.3" + jobs: format_check: permissions: @@ -45,6 +48,10 @@ jobs: - name: Run ruff check import sorting run: ruff check --select I --fix + - uses: actions/setup-python@v6.2.0 + with: + python-version: ${{ env.PYTHON_VERSION }} + - name: Run generate_opcode_metadata.py run: python scripts/generate_opcode_metadata.py From cf536b7a086fe2ee2e66ac23c19281ece9e5123b Mon Sep 17 00:00:00 2001 From: ShaharNaveh <50263213+ShaharNaveh@users.noreply.github.com> Date: Tue, 17 Mar 2026 13:46:51 +0100 Subject: [PATCH 4/7] Update banner --- Lib/_opcode_metadata.py | 2 +- scripts/generate_opcode_metadata.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/_opcode_metadata.py b/Lib/_opcode_metadata.py index ac3897848ff..4da6e507736 100644 --- a/Lib/_opcode_metadata.py +++ b/Lib/_opcode_metadata.py @@ -1,5 +1,5 @@ # This file is generated by scripts/generate_opcode_metadata.py -# for RustPython bytecode format (CPython 3.13 compatible opcode numbers). +# for RustPython bytecode format (CPython 3.14 compatible opcode numbers). # Do not edit! _specializations = { diff --git a/scripts/generate_opcode_metadata.py b/scripts/generate_opcode_metadata.py index 5a3188ca6e4..3167b234ce9 100644 --- a/scripts/generate_opcode_metadata.py +++ b/scripts/generate_opcode_metadata.py @@ -131,7 +131,7 @@ def build_deopts(contents: str) -> dict[str, list[str]]: # Generate the output file output = """# This file is generated by scripts/generate_opcode_metadata.py -# for RustPython bytecode format (CPython 3.13 compatible opcode numbers). +# for RustPython bytecode format (CPython 3.14 compatible opcode numbers). # Do not edit! """ From 5f8f38d2bdccb15ea240fb1126cd355eefa5fb9d Mon Sep 17 00:00:00 2001 From: ShaharNaveh <50263213+ShaharNaveh@users.noreply.github.com> Date: Tue, 17 Mar 2026 14:23:38 +0100 Subject: [PATCH 5/7] Fix `test__opcode.py` --- Lib/test/test__opcode.py | 2 -- .../compiler-core/src/bytecode/instruction.rs | 33 ++++++++++++------- crates/stdlib/src/_opcode.rs | 5 +++ 3 files changed, 26 insertions(+), 14 deletions(-) diff --git a/Lib/test/test__opcode.py b/Lib/test/test__opcode.py index 43d475baa5d..c253bc2be02 100644 --- a/Lib/test/test__opcode.py +++ b/Lib/test/test__opcode.py @@ -38,7 +38,6 @@ def test_is_valid(self): opcodes = [dis.opmap[opname] for opname in names] self.check_bool_function_result(_opcode.is_valid, opcodes, True) - @unittest.expectedFailure # TODO: RUSTPYTHON; KeyError: 'BINARY_OP_ADD_INT' def test_opmaps(self): def check_roundtrip(name, map): return self.assertEqual(opcode.opname[map[name]], name) @@ -116,7 +115,6 @@ def test_stack_effect_jump(self): class SpecializationStatsTests(unittest.TestCase): - @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: 'load_attr' not found in [] def test_specialization_stats(self): stat_names = ["success", "failure", "hit", "deferred", "miss", "deopt"] specialized_opcodes = [ diff --git a/crates/compiler-core/src/bytecode/instruction.rs b/crates/compiler-core/src/bytecode/instruction.rs index 0f817e72cd4..1eae30cdc6e 100644 --- a/crates/compiler-core/src/bytecode/instruction.rs +++ b/crates/compiler-core/src/bytecode/instruction.rs @@ -462,7 +462,7 @@ impl TryFrom for Instruction { impl Instruction { /// Returns `true` if this is any instrumented opcode /// (regular INSTRUMENTED_*, INSTRUMENTED_LINE, or INSTRUMENTED_INSTRUCTION). - pub fn is_instrumented(self) -> bool { + pub const fn is_instrumented(self) -> bool { self.to_base().is_some() || matches!(self, Self::InstrumentedLine | Self::InstrumentedInstruction) } @@ -509,7 +509,7 @@ impl Instruction { /// /// The returned base opcode uses `Arg::marker()` for typed fields — /// only the opcode byte matters since `replace_op` preserves the arg byte. - pub fn to_base(self) -> Option { + pub const fn to_base(self) -> Option { Some(match self { Self::InstrumentedResume => Self::Resume { context: Arg::marker(), @@ -555,11 +555,10 @@ impl Instruction { _ => return None, }) } - - /// Map a specialized opcode back to its adaptive (base) variant. + /// Map a specialized or instrumented opcode back to its adaptive (base) variant. /// `_PyOpcode_Deopt` - pub fn deoptimize(self) -> Self { - match self { + pub const fn deopt(self) -> Option { + Some(match self { // RESUME specializations Self::ResumeCheck => Self::Resume { context: Arg::marker(), @@ -678,17 +677,27 @@ impl Instruction { Self::CallKwBoundMethod | Self::CallKwPy | Self::CallKwNonPy => Self::CallKw { argc: Arg::marker(), }, - // Instrumented opcodes map back to their base - _ => match self.to_base() { - Some(base) => base, - None => self, - }, + _ => return None, + }) + } + + /// Map a specialized opcode back to its adaptive (base) variant. + pub const fn deoptimize(self) -> Self { + match self.deopt() { + Some(v) => v, + None => { + // Instrumented opcodes map back to their base + match self.to_base() { + Some(v) => v, + None => self, + } + } } } /// Number of CACHE code units that follow this instruction. /// _PyOpcode_Caches - pub fn cache_entries(self) -> usize { + pub const fn cache_entries(self) -> usize { match self { // LOAD_ATTR: 9 cache entries Self::LoadAttr { .. } diff --git a/crates/stdlib/src/_opcode.rs b/crates/stdlib/src/_opcode.rs index ba1e6120fc0..4b3b30520bd 100644 --- a/crates/stdlib/src/_opcode.rs +++ b/crates/stdlib/src/_opcode.rs @@ -197,6 +197,11 @@ mod _opcode { let opcode = Opcode::try_from_pyint(args.opcode, vm)?; + // Raise ValueError if specialized. + if opcode.inner().real().is_some_and(|op| op.deopt().is_some()) { + return Err(vm.new_value_error("invalid opcode or oparg")); + } + let _ = jump; // Python API accepts jump but it's not used Ok(opcode.stack_effect(oparg)) } From 4c9193c4398f3bc7b54912b861f7f443327c8a4f Mon Sep 17 00:00:00 2001 From: ShaharNaveh <50263213+ShaharNaveh@users.noreply.github.com> Date: Tue, 17 Mar 2026 14:25:28 +0100 Subject: [PATCH 6/7] Adjust generate script --- scripts/generate_opcode_metadata.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/generate_opcode_metadata.py b/scripts/generate_opcode_metadata.py index 3167b234ce9..7e1c9222ffe 100644 --- a/scripts/generate_opcode_metadata.py +++ b/scripts/generate_opcode_metadata.py @@ -74,14 +74,16 @@ def extract_enum_body(contents: str, enum_name: str) -> str: def build_deopts(contents: str) -> dict[str, list[str]]: raw_body = re.search( - r"fn deoptimize\(self\) -> Self(.*)", contents, re.DOTALL + r"fn deopt\(self\) -> Option(.*)", contents, re.DOTALL ).group(1) body = "\n".join( itertools.takewhile( lambda l: not l.startswith("_ =>"), # Take until reaching fallback filter( lambda l: ( - not l.startswith(("//", "match")) # Skip comments or start of match + not l.startswith( + ("//", "Some(match") + ) # Skip comments or start of match ), map(str.strip, raw_body.splitlines()), ), From 2aef7fca1a65721eea5608a9883520780c5ba4bc Mon Sep 17 00:00:00 2001 From: ShaharNaveh <50263213+ShaharNaveh@users.noreply.github.com> Date: Tue, 17 Mar 2026 14:34:14 +0100 Subject: [PATCH 7/7] Fix docs --- crates/compiler-core/src/bytecode/instruction.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/crates/compiler-core/src/bytecode/instruction.rs b/crates/compiler-core/src/bytecode/instruction.rs index 1eae30cdc6e..9d4856851e2 100644 --- a/crates/compiler-core/src/bytecode/instruction.rs +++ b/crates/compiler-core/src/bytecode/instruction.rs @@ -555,7 +555,8 @@ impl Instruction { _ => return None, }) } - /// Map a specialized or instrumented opcode back to its adaptive (base) variant. + + /// Map a specialized opcode back to its adaptive (base) variant. /// `_PyOpcode_Deopt` pub const fn deopt(self) -> Option { Some(match self { @@ -681,7 +682,7 @@ impl Instruction { }) } - /// Map a specialized opcode back to its adaptive (base) variant. + /// Map a specialized or instrumented opcode back to its adaptive (base) variant. pub const fn deoptimize(self) -> Self { match self.deopt() { Some(v) => v,