diff --git a/Lib/__hello_only__.py b/Lib/__hello_only__.py new file mode 100644 index 00000000000..1cefa0a2fd8 --- /dev/null +++ b/Lib/__hello_only__.py @@ -0,0 +1,2 @@ +initialized = True +print("Hello world!") diff --git a/Lib/test/test_frozen.py b/Lib/test/test_frozen.py index 10a6d8535fc..0b4a12bcf40 100644 --- a/Lib/test/test_frozen.py +++ b/Lib/test/test_frozen.py @@ -27,7 +27,6 @@ def test_frozen(self): __hello__.main() self.assertEqual(out.getvalue(), 'Hello world!\n') - @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: unexpectedly identical: def test_frozen_submodule_in_unfrozen_package(self): with import_helper.CleanImport('__phello__', '__phello__.spam'): with import_helper.frozen_modules(enabled=False): @@ -40,7 +39,6 @@ def test_frozen_submodule_in_unfrozen_package(self): self.assertIs(spam.__spec__.loader, importlib.machinery.FrozenImporter) - @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: unexpectedly identical: def test_unfrozen_submodule_in_frozen_package(self): with import_helper.CleanImport('__phello__', '__phello__.spam'): with import_helper.frozen_modules(enabled=True): diff --git a/Lib/test/test_import/__init__.py b/Lib/test/test_import/__init__.py index 3bd176a200d..6920cf45533 100644 --- a/Lib/test/test_import/__init__.py +++ b/Lib/test/test_import/__init__.py @@ -768,7 +768,7 @@ def run(): finally: del sys.path[0] - @unittest.expectedFailure # TODO: RUSTPYTHON; FileNotFoundError: [WinError 2] No such file or directory: 'built-in' + @unittest.expectedFailure # TODO: RUSTPYTHON; no C extension support @unittest.skipUnless(sys.platform == "win32", "Windows-specific") def test_dll_dependency_import(self): from _winapi import GetModuleFileName @@ -814,7 +814,6 @@ def test_dll_dependency_import(self): env=env, cwd=os.path.dirname(pyexe)) - @unittest.expectedFailure # TODO: RUSTPYTHON; _imp.get_frozen_object("x", b"6\'\xd5Cu\x12"). TypeError: expected at most 1 arguments, got 2 def test_issue105979(self): # this used to crash with self.assertRaises(ImportError) as cm: @@ -1239,7 +1238,8 @@ def test_script_shadowing_stdlib_sys_path_modification(self): stdout, stderr = popen.communicate() self.assertRegex(stdout, expected_error) - @unittest.skip("TODO: RUSTPYTHON; AttributeError: module \"_imp\" has no attribute \"create_dynamic\"") + # TODO: RUSTPYTHON: _imp.create_dynamic is for C extensions, not applicable + @unittest.skip("TODO: RustPython _imp.create_dynamic not implemented") def test_create_dynamic_null(self): with self.assertRaisesRegex(ValueError, 'embedded null character'): class Spec: @@ -1398,7 +1398,6 @@ def test_basics(self): self.assertEqual(mod.code_filename, self.file_name) self.assertEqual(mod.func_filename, self.file_name) - @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: 'another_module.py' != .../unlikely_module_name.py def test_incorrect_code_name(self): py_compile.compile(self.file_name, dfile="another_module.py") mod = self.import_module() @@ -2045,6 +2044,7 @@ def exec_module(*args): else: importlib.SourceLoader.exec_module = old_exec_module + @unittest.expectedFailure # TODO: RUSTPYTHON; subprocess fails on Windows @unittest.skipUnless(TESTFN_UNENCODABLE, 'need TESTFN_UNENCODABLE') def test_unencodable_filename(self): # Issue #11619: The Python parser and the import machinery must not @@ -2095,7 +2095,6 @@ def test_rebinding(self): from test.test_import.data.circular_imports.subpkg import util self.assertIs(util.util, rebinding.util) - @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: module "test.test_import.data.circular_imports" has no attribute "binding" def test_binding(self): try: import test.test_import.data.circular_imports.binding diff --git a/Lib/test/test_importlib/frozen/test_loader.py b/Lib/test/test_importlib/frozen/test_loader.py index 6132763480c..0824af53e05 100644 --- a/Lib/test/test_importlib/frozen/test_loader.py +++ b/Lib/test/test_importlib/frozen/test_loader.py @@ -5,6 +5,7 @@ from test.support import captured_stdout, import_helper, STDLIB_DIR import contextlib import os.path +import sys import types import unittest import warnings diff --git a/Lib/test/test_importlib/test_locks.py b/Lib/test/test_importlib/test_locks.py index 9406f06adb7..b1f5f9d6c8b 100644 --- a/Lib/test/test_importlib/test_locks.py +++ b/Lib/test/test_importlib/test_locks.py @@ -153,6 +153,10 @@ def test_all_locks(self): Source_LifetimeTests ) = test_util.test_both(LifetimeTests, init=init) +# TODO: RUSTPYTHON; dead weakref module locks not cleaned up in frozen bootstrap +Frozen_LifetimeTests.test_all_locks = unittest.skip("TODO: RUSTPYTHON")( + Frozen_LifetimeTests.test_all_locks) + def setUpModule(): thread_info = threading_helper.threading_setup() diff --git a/Lib/test/test_importlib/test_windows.py b/Lib/test/test_importlib/test_windows.py index bef4fb46f85..0ae911bc43d 100644 --- a/Lib/test/test_importlib/test_windows.py +++ b/Lib/test/test_importlib/test_windows.py @@ -139,6 +139,7 @@ def test_raises_deprecation_warning(self): @unittest.skipUnless(sys.platform.startswith('win'), 'requires Windows') class WindowsExtensionSuffixTests: + @unittest.expectedFailure # TODO: RUSTPYTHON; no C extension (.pyd) support def test_tagged_suffix(self): suffixes = self.machinery.EXTENSION_SUFFIXES abi_flags = "t" if support.Py_GIL_DISABLED else "" diff --git a/Lib/test/test_importlib/util.py b/Lib/test/test_importlib/util.py index edbe78545a2..85e7ffcb608 100644 --- a/Lib/test/test_importlib/util.py +++ b/Lib/test/test_importlib/util.py @@ -15,7 +15,10 @@ import tempfile import types -_testsinglephase = import_helper.import_module("_testsinglephase") +try: + _testsinglephase = import_helper.import_module("_testsinglephase") +except unittest.SkipTest: + _testsinglephase = None # TODO: RUSTPYTHON BUILTINS = types.SimpleNamespace() diff --git a/crates/codegen/src/compile.rs b/crates/codegen/src/compile.rs index 4c787a42fcc..21be839795e 100644 --- a/crates/codegen/src/compile.rs +++ b/crates/codegen/src/compile.rs @@ -2065,11 +2065,19 @@ impl Compiler { let idx = self.name(&name.name); emit!(self, Instruction::ImportName { idx }); if let Some(alias) = &name.asname { - for part in name.name.split('.').skip(1) { + let parts: Vec<&str> = name.name.split('.').skip(1).collect(); + for (i, part) in parts.iter().enumerate() { let idx = self.name(part); - self.emit_load_attr(idx); + emit!(self, Instruction::ImportFrom { idx }); + if i < parts.len() - 1 { + emit!(self, Instruction::Swap { index: 2 }); + emit!(self, Instruction::PopTop); + } + } + self.store_name(alias.as_str())?; + if !parts.is_empty() { + emit!(self, Instruction::PopTop); } - self.store_name(alias.as_str())? } else { self.store_name(name.name.split('.').next().unwrap())? } diff --git a/crates/vm/Lib/python_builtins/__hello_only__.py b/crates/vm/Lib/python_builtins/__hello_only__.py new file mode 120000 index 00000000000..795ca3ef1a0 --- /dev/null +++ b/crates/vm/Lib/python_builtins/__hello_only__.py @@ -0,0 +1 @@ +../../../../Lib/__hello_only__.py \ No newline at end of file diff --git a/crates/vm/src/builtins/module.rs b/crates/vm/src/builtins/module.rs index 805516b2d99..a2221fb6b9a 100644 --- a/crates/vm/src/builtins/module.rs +++ b/crates/vm/src/builtins/module.rs @@ -5,6 +5,7 @@ use crate::{ class::PyClassImpl, convert::ToPyObject, function::{FuncArgs, PyMethodDef, PySetterValue}, + import::{get_spec_file_origin, is_possibly_shadowing_path, is_stdlib_module_name}, types::{GetAttr, Initializer, Representable}, }; @@ -152,20 +153,96 @@ impl Py { if let Ok(getattr) = self.dict().get_item(identifier!(vm, __getattr__), vm) { return getattr.call((name.to_owned(),), vm); } - let module_name = if let Some(name) = self.name(vm) { - format!(" '{name}'") + let dict = self.dict(); + + // Get the raw __name__ object (may be a str subclass) + let mod_name_obj = dict + .get_item_opt(identifier!(vm, __name__), vm) + .ok() + .flatten(); + let mod_name_str = mod_name_obj + .as_ref() + .and_then(|n| n.downcast_ref::().map(|s| s.as_str().to_owned())); + + // If __name__ is not set or not a string, use a simpler error message + let mod_display = match mod_name_str.as_deref() { + Some(s) => s, + None => { + return Err(vm.new_attribute_error(format!("module has no attribute '{name}'"))); + } + }; + + let spec = dict + .get_item_opt(vm.ctx.intern_str("__spec__"), vm) + .ok() + .flatten() + .filter(|s| !vm.is_none(s)); + + let origin = get_spec_file_origin(&spec, vm); + + let is_possibly_shadowing = origin + .as_ref() + .map(|o| is_possibly_shadowing_path(o, vm)) + .unwrap_or(false); + // Use the ORIGINAL __name__ object for stdlib check (may raise TypeError + // if __name__ is an unhashable str subclass) + let is_possibly_shadowing_stdlib = if is_possibly_shadowing { + if let Some(ref mod_name) = mod_name_obj { + is_stdlib_module_name(mod_name, vm)? + } else { + false + } } else { - "".to_owned() + false }; - Err(vm.new_attribute_error(format!("module{module_name} has no attribute '{name}'"))) - } - fn name(&self, vm: &VirtualMachine) -> Option { - let name = self - .as_object() - .generic_getattr_opt(identifier!(vm, __name__), None, vm) - .unwrap_or_default()?; - name.downcast::().ok() + if is_possibly_shadowing_stdlib { + let origin = origin.as_ref().unwrap(); + Err(vm.new_attribute_error(format!( + "module '{mod_display}' has no attribute '{name}' \ + (consider renaming '{origin}' since it has the same \ + name as the standard library module named '{mod_display}' \ + and prevents importing that standard library module)" + ))) + } else { + let is_initializing = PyModule::is_initializing(&dict, vm); + if is_initializing { + if is_possibly_shadowing { + let origin = origin.as_ref().unwrap(); + Err(vm.new_attribute_error(format!( + "module '{mod_display}' has no attribute '{name}' \ + (consider renaming '{origin}' if it has the same name \ + as a library you intended to import)" + ))) + } else if let Some(ref origin) = origin { + Err(vm.new_attribute_error(format!( + "partially initialized module '{mod_display}' from '{origin}' \ + has no attribute '{name}' \ + (most likely due to a circular import)" + ))) + } else { + Err(vm.new_attribute_error(format!( + "partially initialized module '{mod_display}' \ + has no attribute '{name}' \ + (most likely due to a circular import)" + ))) + } + } else { + // Check for uninitialized submodule + let submodule_initializing = + is_uninitialized_submodule(mod_name_str.as_ref(), name, vm); + if submodule_initializing { + Err(vm.new_attribute_error(format!( + "cannot access submodule '{name}' of module '{mod_display}' \ + (most likely due to a circular import)" + ))) + } else { + Err(vm.new_attribute_error(format!( + "module '{mod_display}' has no attribute '{name}'" + ))) + } + } + } } // TODO: to be replaced by the commented-out dict method above once dictoffset land @@ -361,8 +438,8 @@ impl GetAttr for PyModule { impl Representable for PyModule { #[inline] fn repr(zelf: &Py, vm: &VirtualMachine) -> PyResult { - let importlib = vm.import("_frozen_importlib", 0)?; - let module_repr = importlib.get_attr("_module_repr", vm)?; + // Use cached importlib reference (like interp->importlib) + let module_repr = vm.importlib.get_attr("_module_repr", vm)?; let repr = module_repr.call((zelf.to_owned(),), vm)?; repr.downcast() .map_err(|_| vm.new_type_error("_module_repr did not return a string")) @@ -377,3 +454,32 @@ impl Representable for PyModule { pub(crate) fn init(context: &Context) { PyModule::extend_class(context, context.types.module_type); } + +/// Check if {module_name}.{name} is an uninitialized submodule in sys.modules. +fn is_uninitialized_submodule( + module_name: Option<&String>, + name: &Py, + vm: &VirtualMachine, +) -> bool { + let mod_name = match module_name { + Some(n) => n.as_str(), + None => return false, + }; + let full_name = format!("{mod_name}.{name}"); + let sys_modules = match vm.sys_module.get_attr("modules", vm).ok() { + Some(m) => m, + None => return false, + }; + let sub_mod = match sys_modules.get_item(&full_name, vm).ok() { + Some(m) => m, + None => return false, + }; + let spec = match sub_mod.get_attr("__spec__", vm).ok() { + Some(s) if !vm.is_none(&s) => s, + _ => return false, + }; + spec.get_attr("_initializing", vm) + .ok() + .and_then(|v| v.try_to_bool(vm).ok()) + .unwrap_or(false) +} diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 1337784c208..3108e082b4a 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -4,8 +4,8 @@ use crate::{ AsObject, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, TryFromObject, VirtualMachine, builtins::{ PyBaseException, PyBaseExceptionRef, PyCode, PyCoroutine, PyDict, PyDictRef, PyGenerator, - PyInterpolation, PyList, PySet, PySlice, PyStr, PyStrInterned, PyStrRef, PyTemplate, - PyTraceback, PyType, + PyInterpolation, PyList, PySet, PySlice, PyStr, PyStrInterned, PyTemplate, PyTraceback, + PyType, asyncgenerator::PyAsyncGenWrappedValue, function::{PyCell, PyCellRef, PyFunction}, tuple::{PyTuple, PyTupleRef}, @@ -2263,53 +2263,81 @@ impl ExecutingFrame<'_> { return Ok(sub_module); } - // Get module name for the error message and ImportError attributes + use crate::import::{ + get_spec_file_origin, is_possibly_shadowing_path, is_stdlib_module_name, + }; + + // Get module name for the error message let mod_name_obj = module.get_attr(identifier!(vm, __name__), vm).ok(); let mod_name_str = mod_name_obj .as_ref() .and_then(|n| n.downcast_ref::().map(|s| s.as_str().to_owned())); - let module_name = mod_name_str.as_deref().unwrap_or(""); + let module_name = mod_name_str.as_deref().unwrap_or(""); - // Get module path/location for the error message - let mod_path = module + let spec = module .get_attr("__spec__", vm) .ok() - .and_then(|spec| spec.get_attr("origin", vm).ok()) - .and_then(|origin| { - if vm.is_none(&origin) { - None + .filter(|s| !vm.is_none(s)); + + let origin = get_spec_file_origin(&spec, vm); + + let is_possibly_shadowing = origin + .as_ref() + .map(|o| is_possibly_shadowing_path(o, vm)) + .unwrap_or(false); + let is_possibly_shadowing_stdlib = if is_possibly_shadowing { + if let Some(ref mod_name) = mod_name_obj { + is_stdlib_module_name(mod_name, vm)? + } else { + false + } + } else { + false + }; + + let msg = if is_possibly_shadowing_stdlib { + let origin = origin.as_ref().unwrap(); + format!( + "cannot import name '{name}' from '{module_name}' \ + (consider renaming '{origin}' since it has the same \ + name as the standard library module named '{module_name}' \ + and prevents importing that standard library module)" + ) + } else { + let is_init = is_module_initializing(module, vm); + if is_init { + if is_possibly_shadowing { + let origin = origin.as_ref().unwrap(); + format!( + "cannot import name '{name}' from '{module_name}' \ + (consider renaming '{origin}' if it has the same name \ + as a library you intended to import)" + ) + } else if let Some(ref path) = origin { + format!( + "cannot import name '{name}' from partially initialized module \ + '{module_name}' (most likely due to a circular import) ({path})" + ) } else { - origin - .downcast_ref::() - .map(|s| s.as_str().to_owned()) + format!( + "cannot import name '{name}' from partially initialized module \ + '{module_name}' (most likely due to a circular import)" + ) } - }) - .or_else(|| { - module - .get_attr(identifier!(vm, __file__), vm) - .ok() - .and_then(|f| f.downcast_ref::().map(|s| s.as_str().to_owned())) - }); - - let msg = if is_module_initializing(module, vm) { - if let Some(ref path) = mod_path { - format!( - "cannot import name '{name}' from partially initialized module \ - '{module_name}' (most likely due to a circular import) ({path})", - ) + } else if let Some(ref path) = origin { + format!("cannot import name '{name}' from '{module_name}' ({path})") } else { - format!( - "cannot import name '{name}' from partially initialized module \ - '{module_name}' (most likely due to a circular import)", - ) + format!("cannot import name '{name}' from '{module_name}' (unknown location)") } - } else if let Some(ref path) = mod_path { - format!("cannot import name '{name}' from '{module_name}' ({path})") - } else { - format!("cannot import name '{name}' from '{module_name}' (unknown location)") }; let err = vm.new_import_error(msg, vm.ctx.new_str(module_name)); + if let Some(ref path) = origin { + let _ignore = err + .as_object() + .set_attr("path", vm.ctx.new_str(path.as_str()), vm); + } + // name_from = the attribute name that failed to import (best-effort metadata) let _ignore = err.as_object().set_attr("name_from", name.to_owned(), vm); @@ -2320,22 +2348,45 @@ impl ExecutingFrame<'_> { fn import_star(&mut self, vm: &VirtualMachine) -> PyResult<()> { let module = self.pop_value(); - // Grab all the names from the module and put them in the context - if let Some(dict) = module.dict() { - let filter_pred: Box bool> = - if let Ok(all) = dict.get_item(identifier!(vm, __all__), vm) { - let all: Vec = all.try_to_value(vm)?; - let all: Vec = all - .into_iter() - .map(|name| name.as_str().to_owned()) - .collect(); - Box::new(move |name| all.contains(&name.to_owned())) + let Some(dict) = module.dict() else { + return Ok(()); + }; + + let mod_name = module + .get_attr(identifier!(vm, __name__), vm) + .ok() + .and_then(|n| n.downcast::().ok()); + + let require_str = |obj: PyObjectRef, attr: &str| -> PyResult> { + obj.downcast().map_err(|obj: PyObjectRef| { + let source = if let Some(ref mod_name) = mod_name { + format!("{}.{attr}", mod_name.as_str()) } else { - Box::new(|name| !name.starts_with('_')) + attr.to_owned() }; + let repr = obj.repr(vm).unwrap_or_else(|_| vm.ctx.new_str("?")); + vm.new_type_error(format!( + "{} in {} must be str, not {}", + repr.as_str(), + source, + obj.class().name() + )) + }) + }; + + if let Ok(all) = dict.get_item(identifier!(vm, __all__), vm) { + let items: Vec = all.try_to_value(vm)?; + for item in items { + let name = require_str(item, "__all__")?; + let value = module.get_attr(&*name, vm)?; + self.locals + .mapping() + .ass_subscript(&name, Some(value), vm)?; + } + } else { for (k, v) in dict { - let k = PyStrRef::try_from_object(vm, k)?; - if filter_pred(k.as_str()) { + let k = require_str(k, "__dict__")?; + if !k.as_str().starts_with('_') { self.locals.mapping().ass_subscript(&k, Some(v), vm)?; } } diff --git a/crates/vm/src/import.rs b/crates/vm/src/import.rs index 5657d1a3c14..31752dabb03 100644 --- a/crates/vm/src/import.rs +++ b/crates/vm/src/import.rs @@ -2,7 +2,7 @@ use crate::{ AsObject, Py, PyObjectRef, PyPayload, PyRef, PyResult, - builtins::{PyCode, traceback::PyTraceback}, + builtins::{PyCode, PyStr, PyStrRef, traceback::PyTraceback}, exceptions::types::PyBaseException, scope::Scope, vm::{VirtualMachine, resolve_frozen_alias, thread}, @@ -30,6 +30,7 @@ pub(crate) fn init_importlib_base(vm: &mut VirtualMachine) -> PyResult) { exc.set_traceback_typed(trimmed_tb); } } + +/// Get origin path from a module spec, checking has_location first. +pub(crate) fn get_spec_file_origin( + spec: &Option, + vm: &VirtualMachine, +) -> Option { + let spec = spec.as_ref()?; + let has_location = spec + .get_attr("has_location", vm) + .ok() + .and_then(|v| v.try_to_bool(vm).ok()) + .unwrap_or(false); + if !has_location { + return None; + } + spec.get_attr("origin", vm).ok().and_then(|origin| { + if vm.is_none(&origin) { + None + } else { + origin + .downcast_ref::() + .and_then(|s| s.to_str().map(|s| s.to_owned())) + } + }) +} + +/// Check if a module file possibly shadows another module of the same name. +/// Compares the module's directory with the original sys.path[0] (derived from sys.argv[0]). +pub(crate) fn is_possibly_shadowing_path(origin: &str, vm: &VirtualMachine) -> bool { + use std::path::Path; + + if vm.state.config.settings.safe_path { + return false; + } + + let origin_path = Path::new(origin); + let parent = match origin_path.parent() { + Some(p) => p, + None => return false, + }; + // For packages (__init__.py), look one directory further up + let root = if origin_path.file_name() == Some("__init__.py".as_ref()) { + parent.parent().unwrap_or(Path::new("")) + } else { + parent + }; + + // Compute original sys.path[0] from sys.argv[0] (the script path). + // See: config->sys_path_0, which is set once + // at initialization and never changes even if sys.path is modified. + let sys_path_0 = (|| -> Option { + let argv = vm.sys_module.get_attr("argv", vm).ok()?; + let argv0 = argv.get_item(&0usize, vm).ok()?; + let argv0_str = argv0.downcast_ref::()?; + let s = argv0_str.as_str(); + + // For -c and REPL, original sys.path[0] is "" + if s == "-c" || s.is_empty() { + return Some(String::new()); + } + // For scripts, original sys.path[0] is dirname(argv[0]) + Some( + Path::new(s) + .parent() + .and_then(|p| p.to_str()) + .unwrap_or("") + .to_owned(), + ) + })(); + + let sys_path_0 = match sys_path_0 { + Some(p) => p, + None => return false, + }; + + let cmp_path = if sys_path_0.is_empty() { + match std::env::current_dir() { + Ok(d) => d.to_string_lossy().to_string(), + Err(_) => return false, + } + } else { + sys_path_0 + }; + + root.to_str() == Some(cmp_path.as_str()) +} + +/// Check if a module name is in sys.stdlib_module_names. +/// Takes the original __name__ object to preserve str subclass behavior. +/// Propagates errors (e.g. TypeError for unhashable str subclass). +pub(crate) fn is_stdlib_module_name(name: &PyObjectRef, vm: &VirtualMachine) -> PyResult { + let stdlib_names = match vm.sys_module.get_attr("stdlib_module_names", vm) { + Ok(names) => names, + Err(_) => return Ok(false), + }; + if !stdlib_names.class().fast_issubclass(vm.ctx.types.set_type) + && !stdlib_names + .class() + .fast_issubclass(vm.ctx.types.frozenset_type) + { + return Ok(false); + } + let result = vm.call_method(&stdlib_names, "__contains__", (name.clone(),))?; + result.try_to_bool(vm) +} + +/// PyImport_ImportModuleLevelObject +pub(crate) fn import_module_level( + name: &Py, + globals: Option, + fromlist: Option, + level: i32, + vm: &VirtualMachine, +) -> PyResult { + if level < 0 { + return Err(vm.new_value_error("level must be >= 0".to_owned())); + } + + let name_str = match name.to_str() { + Some(s) => s, + None => { + // Name contains surrogates. Like CPython, try sys.modules + // lookup with the Python string key directly. + if level == 0 { + let sys_modules = vm.sys_module.get_attr("modules", vm)?; + return sys_modules.get_item(name, vm).map_err(|_| { + vm.new_import_error(format!("No module named '{}'", name), name.to_owned()) + }); + } + return Err(vm.new_import_error(format!("No module named '{}'", name), name.to_owned())); + } + }; + + // Resolve absolute name + let abs_name = if level > 0 { + // When globals is not provided (Rust None), raise KeyError + // matching resolve_name() where globals==NULL + if globals.is_none() { + return Err(vm.new_key_error(vm.ctx.new_str("'__name__' not in globals").into())); + } + let globals_ref = globals.as_ref().unwrap(); + // When globals is Python None, treat like empty mapping + let empty_dict_obj; + let globals_ref = if vm.is_none(globals_ref) { + empty_dict_obj = vm.ctx.new_dict().into(); + &empty_dict_obj + } else { + globals_ref + }; + let package = calc_package(Some(globals_ref), vm)?; + if package.is_empty() { + return Err(vm.new_import_error( + "attempted relative import with no known parent package".to_owned(), + vm.ctx.new_str(""), + )); + } + resolve_name(name_str, &package, level as usize, vm)? + } else { + if name_str.is_empty() { + return Err(vm.new_value_error("Empty module name".to_owned())); + } + name_str.to_owned() + }; + + // import_get_module + import_find_and_load + let sys_modules = vm.sys_module.get_attr("modules", vm)?; + let module = match sys_modules.get_item(&*abs_name, vm) { + Ok(m) if !vm.is_none(&m) => m, + _ => { + let find_and_load = vm.importlib.get_attr("_find_and_load", vm)?; + let abs_name_obj = vm.ctx.new_str(&*abs_name); + find_and_load.call((abs_name_obj, vm.import_func.clone()), vm)? + } + }; + + // Handle fromlist + let has_from = match fromlist.as_ref().filter(|fl| !vm.is_none(fl)) { + Some(fl) => fl.clone().try_to_bool(vm)?, + None => false, + }; + + if has_from { + let fromlist = fromlist.unwrap(); + // Only call _handle_fromlist if the module looks like a package + // (has __path__). Non-module objects without __name__/__path__ would + // crash inside _handle_fromlist; IMPORT_FROM handles per-attribute + // errors with proper ImportError conversion. + let has_path = vm + .get_attribute_opt(module.clone(), vm.ctx.intern_str("__path__"))? + .is_some(); + if has_path { + let handle_fromlist = vm.importlib.get_attr("_handle_fromlist", vm)?; + handle_fromlist.call((module, fromlist, vm.import_func.clone()), vm) + } else { + Ok(module) + } + } else if level == 0 || !name_str.is_empty() { + match name_str.find('.') { + None => Ok(module), + Some(dot) => { + let to_return = if level == 0 { + name_str[..dot].to_owned() + } else { + let cut_off = name_str.len() - dot; + abs_name[..abs_name.len() - cut_off].to_owned() + }; + match sys_modules.get_item(&*to_return, vm) { + Ok(m) => Ok(m), + Err(_) if level == 0 => { + // For absolute imports (level 0), try importing the + // parent. Matches _bootstrap.__import__ behavior. + let find_and_load = vm.importlib.get_attr("_find_and_load", vm)?; + let to_return_obj = vm.ctx.new_str(&*to_return); + find_and_load.call((to_return_obj, vm.import_func.clone()), vm) + } + Err(_) => { + // For relative imports (level > 0), raise KeyError + let to_return_obj: PyObjectRef = vm + .ctx + .new_str(format!("'{to_return}' not in sys.modules as expected")) + .into(); + Err(vm.new_key_error(to_return_obj)) + } + } + } + } + } else { + Ok(module) + } +} + +/// resolve_name in import.c - resolve relative import name +fn resolve_name(name: &str, package: &str, level: usize, vm: &VirtualMachine) -> PyResult { + // Python: bits = package.rsplit('.', level - 1) + // Rust: rsplitn(level, '.') gives maxsplit=level-1 + let parts: Vec<&str> = package.rsplitn(level, '.').collect(); + if parts.len() < level { + return Err(vm.new_import_error( + "attempted relative import beyond top-level package".to_owned(), + vm.ctx.new_str(name), + )); + } + // rsplitn returns parts right-to-left, so last() is the leftmost (base) + let base = parts.last().unwrap(); + if name.is_empty() { + Ok(base.to_string()) + } else { + Ok(format!("{base}.{name}")) + } +} + +/// _calc___package__ - calculate package from globals for relative imports +fn calc_package(globals: Option<&PyObjectRef>, vm: &VirtualMachine) -> PyResult { + let globals = globals.ok_or_else(|| { + vm.new_import_error( + "attempted relative import with no known parent package".to_owned(), + vm.ctx.new_str(""), + ) + })?; + + let package = globals.get_item("__package__", vm).ok(); + let spec = globals.get_item("__spec__", vm).ok(); + + if let Some(ref pkg) = package + && !vm.is_none(pkg) + { + let pkg_str: PyStrRef = pkg + .clone() + .downcast() + .map_err(|_| vm.new_type_error("package must be a string".to_owned()))?; + // Warn if __package__ != __spec__.parent + if let Some(ref spec) = spec + && !vm.is_none(spec) + && let Ok(parent) = spec.get_attr("parent", vm) + && !pkg_str.is(&parent) + && pkg_str + .as_object() + .rich_compare_bool(&parent, crate::types::PyComparisonOp::Ne, vm) + .unwrap_or(false) + { + let parent_repr = parent + .repr(vm) + .map(|s| s.as_str().to_owned()) + .unwrap_or_default(); + let msg = format!( + "__package__ != __spec__.parent ('{}' != {})", + pkg_str.as_str(), + parent_repr + ); + let warn = vm + .import("_warnings", 0) + .and_then(|w| w.get_attr("warn", vm)); + if let Ok(warn_fn) = warn { + let _ = warn_fn.call( + ( + vm.ctx.new_str(msg), + vm.ctx.exceptions.deprecation_warning.to_owned(), + ), + vm, + ); + } + } + return Ok(pkg_str.as_str().to_owned()); + } else if let Some(ref spec) = spec + && !vm.is_none(spec) + && let Ok(parent) = spec.get_attr("parent", vm) + && !vm.is_none(&parent) + { + let parent_str: PyStrRef = parent + .downcast() + .map_err(|_| vm.new_type_error("package set to non-string".to_owned()))?; + return Ok(parent_str.as_str().to_owned()); + } + + // Fall back to __name__ and __path__ + let warn = vm + .import("_warnings", 0) + .and_then(|w| w.get_attr("warn", vm)); + if let Ok(warn_fn) = warn { + let _ = warn_fn.call( + ( + vm.ctx.new_str("can't resolve package from __spec__ or __package__, falling back on __name__ and __path__"), + vm.ctx.exceptions.import_warning.to_owned(), + ), + vm, + ); + } + + let mod_name = globals.get_item("__name__", vm).map_err(|_| { + vm.new_import_error( + "attempted relative import with no known parent package".to_owned(), + vm.ctx.new_str(""), + ) + })?; + let mod_name_str: PyStrRef = mod_name + .downcast() + .map_err(|_| vm.new_type_error("__name__ must be a string".to_owned()))?; + let mut package = mod_name_str.as_str().to_owned(); + // If not a package (no __path__), strip last component. + // Uses rpartition('.')[0] semantics: returns empty string when no dot. + if globals.get_item("__path__", vm).is_err() { + package = match package.rfind('.') { + Some(dot) => package[..dot].to_owned(), + None => String::new(), + }; + } + Ok(package) +} diff --git a/crates/vm/src/stdlib/builtins.rs b/crates/vm/src/stdlib/builtins.rs index dbcb4a09d32..7b24d72d9b5 100644 --- a/crates/vm/src/stdlib/builtins.rs +++ b/crates/vm/src/stdlib/builtins.rs @@ -991,9 +991,24 @@ mod builtins { Ok(sum) } + #[derive(FromArgs)] + struct ImportArgs { + #[pyarg(any)] + name: PyStrRef, + #[pyarg(any, default)] + globals: Option, + #[allow(dead_code)] + #[pyarg(any, default)] + locals: Option, + #[pyarg(any, default)] + fromlist: Option, + #[pyarg(any, default)] + level: i32, + } + #[pyfunction] - fn __import__(args: FuncArgs, vm: &VirtualMachine) -> PyResult { - vm.import_func.call(args, vm) + fn __import__(args: ImportArgs, vm: &VirtualMachine) -> PyResult { + crate::import::import_module_level(&args.name, args.globals, args.fromlist, args.level, vm) } #[pyfunction] diff --git a/crates/vm/src/stdlib/imp.rs b/crates/vm/src/stdlib/imp.rs index 1c78e835a2d..c140d7cdcaf 100644 --- a/crates/vm/src/stdlib/imp.rs +++ b/crates/vm/src/stdlib/imp.rs @@ -1,5 +1,8 @@ +use crate::builtins::{PyCode, PyStrInterned}; use crate::frozen::FrozenModule; use crate::{VirtualMachine, builtins::PyBaseExceptionRef}; +use core::borrow::Borrow; + pub(crate) use _imp::module_def; pub use crate::vm::resolve_frozen_alias; @@ -72,13 +75,31 @@ impl FrozenError { } } -// find_frozen in frozen.c +// look_up_frozen + use_frozen in import.c fn find_frozen(name: &str, vm: &VirtualMachine) -> Result { - vm.state + let frozen = vm + .state .frozen .get(name) .copied() - .ok_or(FrozenError::NotFound) + .ok_or(FrozenError::NotFound)?; + + // Bootstrap modules are always available regardless of override flag + if matches!( + name, + "_frozen_importlib" | "_frozen_importlib_external" | "zipimport" + ) { + return Ok(frozen); + } + + // use_frozen(): override > 0 → true, override < 0 → false, 0 → default (true) + // When disabled, non-bootstrap modules are simply not found (same as look_up_frozen) + let override_val = vm.state.override_frozen_modules.load(); + if override_val < 0 { + return Err(FrozenError::NotFound); + } + + Ok(frozen) } #[pymodule(with(lock))] @@ -86,6 +107,7 @@ mod _imp { use crate::{ PyObjectRef, PyPayload, PyRef, PyResult, VirtualMachine, builtins::{PyBytesRef, PyCode, PyMemoryView, PyModule, PyStrRef}, + convert::TryFromBorrowedObject, function::OptionalArg, import, version, }; @@ -111,7 +133,7 @@ mod _imp { #[pyfunction] fn is_frozen(name: PyStrRef, vm: &VirtualMachine) -> bool { - vm.state.frozen.contains_key(name.as_str()) + super::find_frozen(name.as_str(), vm).is_ok() } #[pyfunction] @@ -161,7 +183,30 @@ mod _imp { } #[pyfunction] - fn get_frozen_object(name: PyStrRef, vm: &VirtualMachine) -> PyResult> { + fn get_frozen_object( + name: PyStrRef, + data: OptionalArg, + vm: &VirtualMachine, + ) -> PyResult> { + if let OptionalArg::Present(data) = data + && !vm.is_none(&data) + { + let buf = crate::protocol::PyBuffer::try_from_borrowed_object(vm, &data)?; + let contiguous = buf.as_contiguous().ok_or_else(|| { + vm.new_buffer_error("get_frozen_object() requires a contiguous buffer") + })?; + let invalid_err = || { + vm.new_import_error( + format!("Frozen object named '{}' is invalid", name.as_str()), + name.clone(), + ) + }; + let bag = crate::builtins::code::PyObjBag(&vm.ctx); + let code = + rustpython_compiler_core::marshal::deserialize_code(&mut &contiguous[..], bag) + .map_err(|_| invalid_err())?; + return Ok(vm.ctx.new_code(code)); + } import::make_frozen(vm, name.as_str()) } @@ -183,8 +228,10 @@ mod _imp { } #[pyfunction] - fn _fix_co_filename(_code: PyObjectRef, _path: PyStrRef) { - // TODO: + fn _fix_co_filename(code: PyRef, path: PyStrRef, vm: &VirtualMachine) { + let old_name = code.code.source_path; + let new_name = vm.ctx.intern_str(path.as_str()); + super::update_code_filenames(&code, old_name, new_name); } #[pyfunction] @@ -204,7 +251,7 @@ mod _imp { name: PyStrRef, withdata: OptionalArg, vm: &VirtualMachine, - ) -> PyResult>, bool, PyStrRef)>> { + ) -> PyResult>, bool, Option)>> { use super::FrozenError::*; if withdata.into_option().is_some() { @@ -218,7 +265,14 @@ mod _imp { Err(e) => return Err(e.to_pyexception(name.as_str(), vm)), }; - let origname = vm.ctx.new_str(super::resolve_frozen_alias(name.as_str())); + // When origname is empty (e.g. __hello_only__), return None. + // Otherwise return the resolved alias name. + let origname_str = super::resolve_frozen_alias(name.as_str()); + let origname = if origname_str.is_empty() { + None + } else { + Some(vm.ctx.new_str(origname_str)) + }; Ok(Some((None, info.package, origname))) } @@ -228,3 +282,28 @@ mod _imp { hash.to_le_bytes().to_vec() } } + +fn update_code_filenames( + code: &PyCode, + old_name: &'static PyStrInterned, + new_name: &'static PyStrInterned, +) { + if !core::ptr::eq(code.code.source_path, old_name) + && code.code.source_path.as_str() != old_name.as_str() + { + return; + } + // SAFETY: called during import before the code object is shared. + // Mutates co_filename in place. + #[allow(invalid_reference_casting)] + unsafe { + let source_path_ptr = &code.code.source_path as *const _ as *mut &'static PyStrInterned; + core::ptr::write_volatile(source_path_ptr, new_name); + } + for constant in code.code.constants.iter() { + let obj: &crate::PyObject = constant.borrow(); + if let Some(inner_code) = obj.downcast_ref::() { + update_code_filenames(inner_code, old_name, new_name); + } + } +} diff --git a/crates/vm/src/vm/interpreter.rs b/crates/vm/src/vm/interpreter.rs index f6408fe2012..9138a733558 100644 --- a/crates/vm/src/vm/interpreter.rs +++ b/crates/vm/src/vm/interpreter.rs @@ -443,13 +443,6 @@ fn core_frozen_inits() -> impl Iterator { }; } - // keep as example but use file one now - // ext_modules!( - // iter, - // source = "initialized = True; print(\"Hello world!\")\n", - // module_name = "__hello__", - // ); - // Python modules that the vm calls into, but are not actually part of the stdlib. They could // in theory be implemented in Rust, but are easiest to do in Python for one reason or another. // Includes _importlib_bootstrap and _importlib_bootstrap_external @@ -470,7 +463,62 @@ fn core_frozen_inits() -> impl Iterator { crate_name = "rustpython_compiler_core" ); - iter + // Collect and add frozen module aliases for test modules + let mut entries: Vec<_> = iter.collect(); + if let Some(hello_code) = entries + .iter() + .find(|(n, _)| *n == "__hello__") + .map(|(_, m)| m.code) + { + entries.push(( + "__hello_alias__", + FrozenModule { + code: hello_code, + package: false, + }, + )); + entries.push(( + "__phello_alias__", + FrozenModule { + code: hello_code, + package: true, + }, + )); + entries.push(( + "__phello_alias__.spam", + FrozenModule { + code: hello_code, + package: false, + }, + )); + } + if let Some(code) = entries + .iter() + .find(|(n, _)| *n == "__phello__") + .map(|(_, m)| m.code) + { + entries.push(( + "__phello__.__init__", + FrozenModule { + code, + package: false, + }, + )); + } + if let Some(code) = entries + .iter() + .find(|(n, _)| *n == "__phello__.ham") + .map(|(_, m)| m.code) + { + entries.push(( + "__phello__.ham.__init__", + FrozenModule { + code, + package: false, + }, + )); + } + entries.into_iter() } #[cfg(test)] diff --git a/crates/vm/src/vm/mod.rs b/crates/vm/src/vm/mod.rs index 59481e914e6..fb190cbd5c9 100644 --- a/crates/vm/src/vm/mod.rs +++ b/crates/vm/src/vm/mod.rs @@ -76,6 +76,7 @@ pub struct VirtualMachine { pub wasm_id: Option, exceptions: RefCell, pub import_func: PyObjectRef, + pub(crate) importlib: PyObjectRef, pub profile_func: RefCell, pub trace_func: RefCell, pub use_tracing: Cell, @@ -166,6 +167,7 @@ impl VirtualMachine { let sys_module = new_module(stdlib::sys::module_def(&ctx)); let import_func = ctx.none(); + let importlib = ctx.none(); let profile_func = RefCell::new(ctx.none()); let trace_func = RefCell::new(ctx.none()); let signal_handlers = Some(Box::new( @@ -181,6 +183,7 @@ impl VirtualMachine { wasm_id: None, exceptions: RefCell::default(), import_func, + importlib, profile_func, trace_func, use_tracing: Cell::new(false), @@ -263,7 +266,9 @@ impl VirtualMachine { } fn import_ascii_utf8_encodings(&mut self) -> PyResult<()> { - import::import_frozen(self, "codecs")?; + // Use the Python import machinery (FrozenImporter) so modules get + // proper __spec__ and __loader__ attributes. + self.import("codecs", 0)?; // Use dotted names when freeze-stdlib is enabled (modules come from Lib/encodings/), // otherwise use underscored names (modules come from core_modules/). @@ -274,20 +279,30 @@ impl VirtualMachine { }; // Register ascii encoding - let ascii_module = import::import_frozen(self, ascii_module_name)?; + // __import__("encodings.ascii") returns top-level "encodings", so + // look up the actual submodule in sys.modules. + self.import(ascii_module_name, 0)?; + let sys_modules = self.sys_module.get_attr(identifier!(self, modules), self)?; + let ascii_module = sys_modules.get_item(ascii_module_name, self)?; let getregentry = ascii_module.get_attr("getregentry", self)?; let codec_info = getregentry.call((), self)?; self.state .codec_registry .register_manual("ascii", codec_info.try_into_value(self)?)?; - // Register utf-8 encoding - let utf8_module = import::import_frozen(self, utf8_module_name)?; + // Register utf-8 encoding (also as "utf8" alias since normalize_encoding_name + // maps "utf-8" → "utf_8" but leaves "utf8" as-is) + self.import(utf8_module_name, 0)?; + let utf8_module = sys_modules.get_item(utf8_module_name, self)?; let getregentry = utf8_module.get_attr("getregentry", self)?; let codec_info = getregentry.call((), self)?; + let utf8_codec: crate::codecs::PyCodec = codec_info.try_into_value(self)?; + self.state + .codec_registry + .register_manual("utf-8", utf8_codec.clone())?; self.state .codec_registry - .register_manual("utf-8", codec_info.try_into_value(self)?)?; + .register_manual("utf8", utf8_codec)?; Ok(()) } @@ -1105,47 +1120,20 @@ impl VirtualMachine { from_list: &Py>, level: usize, ) -> PyResult { - // if the import inputs seem weird, e.g a package import or something, rather than just - // a straight `import ident` - let weird = module.as_str().contains('.') || level != 0 || !from_list.is_empty(); + let import_func = self + .builtins + .get_attr(identifier!(self, __import__), self) + .map_err(|_| self.new_import_error("__import__ not found", module.to_owned()))?; - let cached_module = if weird { - None + let (locals, globals) = if let Some(frame) = self.current_frame() { + (Some(frame.locals.clone()), Some(frame.globals.clone())) } else { - let sys_modules = self.sys_module.get_attr("modules", self)?; - sys_modules.get_item(module, self).ok() + (None, None) }; - - match cached_module { - Some(cached_module) => { - if self.is_none(&cached_module) { - Err(self.new_import_error( - format!("import of {module} halted; None in sys.modules"), - module.to_owned(), - )) - } else { - Ok(cached_module) - } - } - None => { - let import_func = self - .builtins - .get_attr(identifier!(self, __import__), self) - .map_err(|_| { - self.new_import_error("__import__ not found", module.to_owned()) - })?; - - let (locals, globals) = if let Some(frame) = self.current_frame() { - (Some(frame.locals.clone()), Some(frame.globals.clone())) - } else { - (None, None) - }; - let from_list: PyObjectRef = from_list.to_owned().into(); - import_func - .call((module.to_owned(), globals, locals, from_list, level), self) - .inspect_err(|exc| import::remove_importlib_frames(self, exc)) - } - } + let from_list: PyObjectRef = from_list.to_owned().into(); + import_func + .call((module.to_owned(), globals, locals, from_list, level), self) + .inspect_err(|exc| import::remove_importlib_frames(self, exc)) } pub fn extract_elements_with(&self, value: &PyObject, func: F) -> PyResult> @@ -1551,6 +1539,10 @@ pub fn resolve_frozen_alias(name: &str) -> &str { "_frozen_importlib_external" => "importlib._bootstrap_external", "encodings_ascii" => "encodings.ascii", "encodings_utf_8" => "encodings.utf_8", + "__hello_alias__" | "__phello_alias__" | "__phello_alias__.spam" => "__hello__", + "__phello__.__init__" => "<__phello__", + "__phello__.ham.__init__" => "<__phello__.ham", + "__hello_only__" => "", _ => name, } } diff --git a/crates/vm/src/vm/thread.rs b/crates/vm/src/vm/thread.rs index c63cc4db0e7..334a03701b1 100644 --- a/crates/vm/src/vm/thread.rs +++ b/crates/vm/src/vm/thread.rs @@ -270,6 +270,7 @@ impl VirtualMachine { wasm_id: self.wasm_id.clone(), exceptions: RefCell::default(), import_func: self.import_func.clone(), + importlib: self.importlib.clone(), profile_func: RefCell::new(global_profile.unwrap_or_else(|| self.ctx.none())), trace_func: RefCell::new(global_trace.unwrap_or_else(|| self.ctx.none())), use_tracing: Cell::new(use_tracing),