From 544ff283658aab68714bf9eefce7be70e0c1055f Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Sat, 24 Jan 2026 12:27:15 +0900 Subject: [PATCH 1/8] show todo --- scripts/update_lib/show_todo.py | 192 +++++++++++++++++++++++++++++++- 1 file changed, 189 insertions(+), 3 deletions(-) diff --git a/scripts/update_lib/show_todo.py b/scripts/update_lib/show_todo.py index 1bf9def7df0..e8c130899a9 100644 --- a/scripts/update_lib/show_todo.py +++ b/scripts/update_lib/show_todo.py @@ -108,8 +108,139 @@ def compute_todo_list( return result +def get_all_tests(cpython_prefix: str = "cpython") -> list[str]: + """Get all test module names from cpython/Lib/test/. + + Returns: + Sorted list of test names (e.g., ["test_abc", "test_dis", ...]) + """ + test_dir = pathlib.Path(cpython_prefix) / "Lib" / "test" + if not test_dir.exists(): + return [] + + tests = set() + for entry in test_dir.iterdir(): + # Skip private/internal and special directories + if entry.name.startswith(("_", ".")): + continue + # Skip non-test items + if not entry.name.startswith("test_"): + continue + + if entry.is_file() and entry.suffix == ".py": + tests.add(entry.stem) + elif entry.is_dir() and (entry / "__init__.py").exists(): + tests.add(entry.name) + + return sorted(tests) + + +def is_test_up_to_date( + test_name: str, cpython_prefix: str = "cpython", lib_prefix: str = "Lib" +) -> bool: + """Check if a test is up-to-date by comparing files.""" + import filecmp + + from update_lib.deps import _dircmp_is_same + + # Try directory first, then file + cpython_dir = pathlib.Path(cpython_prefix) / "Lib" / "test" / test_name + cpython_file = pathlib.Path(cpython_prefix) / "Lib" / "test" / f"{test_name}.py" + + if cpython_dir.exists(): + cpython_path = cpython_dir + elif cpython_file.exists(): + cpython_path = cpython_file + else: + return True # No cpython test, consider up-to-date + + local_path = pathlib.Path(lib_prefix) / "test" / cpython_path.name + + if not local_path.exists(): + return False + + if cpython_path.is_file(): + return filecmp.cmp(cpython_path, local_path, shallow=False) + else: + dcmp = filecmp.dircmp(cpython_path, local_path) + return _dircmp_is_same(dcmp) + + +def compute_test_todo_list( + cpython_prefix: str = "cpython", + lib_prefix: str = "Lib", + include_done: bool = False, + lib_status: dict[str, bool] | None = None, +) -> list[dict]: + """Compute prioritized list of tests to update. + + Scoring: + - If corresponding lib is up-to-date: score = 0 (ready) + - If corresponding lib is NOT up-to-date: score = 1 (wait for lib) + - If no corresponding lib: score = -1 (independent) + + Returns: + List of dicts with test info, sorted by priority + """ + all_tests = get_all_tests(cpython_prefix) + + result = [] + for test_name in all_tests: + up_to_date = is_test_up_to_date(test_name, cpython_prefix, lib_prefix) + + if up_to_date and not include_done: + continue + + # Extract lib name from test name (test_foo -> foo) + lib_name = test_name[5:] if test_name.startswith("test_") else test_name + + # Check if corresponding lib is up-to-date + # Scoring: 0 = lib ready (highest priority), 1 = no lib, 2 = lib pending + if lib_status and lib_name in lib_status: + lib_up_to_date = lib_status[lib_name] + if lib_up_to_date: + score = 0 # Lib is ready, can update test + else: + score = 2 # Wait for lib first + else: + score = 1 # No corresponding lib (independent test) + + result.append( + { + "name": test_name, + "lib_name": lib_name, + "score": score, + "up_to_date": up_to_date, + } + ) + + # Sort by score (ascending) + result.sort(key=lambda x: x["score"]) + + return result + + +def format_test_todo_list( + todo_list: list[dict], + limit: int | None = None, +) -> list[str]: + """Format test todo list for display.""" + lines = [] + + if limit: + todo_list = todo_list[:limit] + + for item in todo_list: + name = item["name"] + done_mark = "[x]" if item["up_to_date"] else "[ ]" + lines.append(f"- {done_mark} {name}") + + return lines + + def format_todo_list( todo_list: list[dict], + test_by_lib: dict[str, dict] | None = None, limit: int | None = None, verbose: bool = False, ) -> list[str]: @@ -117,6 +248,7 @@ def format_todo_list( Args: todo_list: List from compute_todo_list() + test_by_lib: Dict mapping lib_name -> test info (optional) limit: Maximum number of items to show verbose: Show detailed dependency information @@ -149,6 +281,12 @@ def format_todo_list( lines.append(" ".join(parts)) + # Show corresponding test if exists + if test_by_lib and name in test_by_lib: + test_info = test_by_lib[name] + test_done_mark = "[x]" if test_info["up_to_date"] else "[ ]" + lines.append(f" - {test_done_mark} {test_info['name']}") + # Verbose mode: show detailed dependency info if verbose: if item["reverse_deps"]: @@ -161,6 +299,55 @@ def format_todo_list( return lines +def format_all_todo( + cpython_prefix: str = "cpython", + lib_prefix: str = "Lib", + limit: int | None = None, + include_done: bool = False, + verbose: bool = False, +) -> list[str]: + """Format prioritized list of modules and tests to update. + + Returns: + List of formatted lines + """ + from update_lib.deps import is_up_to_date + from update_lib.show_deps import get_all_modules + + lines = [] + + # Compute lib todo + lib_todo = compute_todo_list(cpython_prefix, lib_prefix, include_done) + + # Build lib status map for test scoring + lib_status = {} + for name in get_all_modules(cpython_prefix): + lib_status[name] = is_up_to_date(name, cpython_prefix, lib_prefix) + + # Compute test todo + test_todo = compute_test_todo_list(cpython_prefix, lib_prefix, include_done, lib_status) + + # Build test_by_lib map (only for tests with corresponding lib) + test_by_lib = {} + no_lib_tests = [] + for test in test_todo: + if test["score"] == 1: # no lib + no_lib_tests.append(test) + else: + test_by_lib[test["lib_name"]] = test + + # Format lib todo with embedded tests + lines.extend(format_todo_list(lib_todo, test_by_lib, limit, verbose)) + + # Format "no lib" tests separately if any + if no_lib_tests: + lines.append("") + lines.append("## Standalone Tests") + lines.extend(format_test_todo_list(no_lib_tests, limit)) + + return lines + + def show_todo( cpython_prefix: str = "cpython", lib_prefix: str = "Lib", @@ -168,9 +355,8 @@ def show_todo( include_done: bool = False, verbose: bool = False, ) -> None: - """Show prioritized list of modules to update.""" - todo_list = compute_todo_list(cpython_prefix, lib_prefix, include_done) - for line in format_todo_list(todo_list, limit, verbose): + """Show prioritized list of modules and tests to update.""" + for line in format_all_todo(cpython_prefix, lib_prefix, limit, include_done, verbose): print(line) From b842907c7d6e4ed68ccef565217ce272f408168e Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Sat, 24 Jan 2026 12:33:44 +0900 Subject: [PATCH 2/8] `update_lib todo` also shows test todo --- scripts/update_lib/show_todo.py | 101 +++++++++++++++++++++++++++++--- 1 file changed, 92 insertions(+), 9 deletions(-) diff --git a/scripts/update_lib/show_todo.py b/scripts/update_lib/show_todo.py index e8c130899a9..2ffd4d036b3 100644 --- a/scripts/update_lib/show_todo.py +++ b/scripts/update_lib/show_todo.py @@ -135,14 +135,87 @@ def get_all_tests(cpython_prefix: str = "cpython") -> list[str]: return sorted(tests) +def _filter_rustpython_todo(content: str) -> str: + """Remove lines containing 'TODO: RUSTPYTHON' from content.""" + lines = content.splitlines(keepends=True) + filtered = [line for line in lines if "TODO: RUSTPYTHON" not in line] + return "".join(filtered) + + +def _count_rustpython_todo(content: str) -> int: + """Count lines containing 'TODO: RUSTPYTHON' in content.""" + return sum(1 for line in content.splitlines() if "TODO: RUSTPYTHON" in line) + + +def _compare_file_ignoring_todo(cpython_path: pathlib.Path, local_path: pathlib.Path) -> bool: + """Compare two files, ignoring TODO: RUSTPYTHON lines in local file.""" + try: + cpython_content = cpython_path.read_text(encoding="utf-8") + local_content = local_path.read_text(encoding="utf-8") + except (OSError, UnicodeDecodeError): + return False + + local_filtered = _filter_rustpython_todo(local_content) + return cpython_content == local_filtered + + +def _compare_dir_ignoring_todo(cpython_path: pathlib.Path, local_path: pathlib.Path) -> bool: + """Compare two directories, ignoring TODO: RUSTPYTHON lines in local files.""" + # Get all .py files in both directories + cpython_files = {f.relative_to(cpython_path) for f in cpython_path.rglob("*.py")} + local_files = {f.relative_to(local_path) for f in local_path.rglob("*.py")} + + # Check for missing or extra files + if cpython_files != local_files: + return False + + # Compare each file + for rel_path in cpython_files: + if not _compare_file_ignoring_todo(cpython_path / rel_path, local_path / rel_path): + return False + + return True + + +def count_test_todos( + test_name: str, lib_prefix: str = "Lib" +) -> int: + """Count TODO: RUSTPYTHON lines in a test file/directory.""" + local_dir = pathlib.Path(lib_prefix) / "test" / test_name + local_file = pathlib.Path(lib_prefix) / "test" / f"{test_name}.py" + + if local_dir.exists(): + local_path = local_dir + elif local_file.exists(): + local_path = local_file + else: + return 0 + + total = 0 + if local_path.is_file(): + try: + content = local_path.read_text(encoding="utf-8") + total = _count_rustpython_todo(content) + except (OSError, UnicodeDecodeError): + pass + else: + for py_file in local_path.rglob("*.py"): + try: + content = py_file.read_text(encoding="utf-8") + total += _count_rustpython_todo(content) + except (OSError, UnicodeDecodeError): + pass + + return total + + def is_test_up_to_date( test_name: str, cpython_prefix: str = "cpython", lib_prefix: str = "Lib" ) -> bool: - """Check if a test is up-to-date by comparing files.""" - import filecmp - - from update_lib.deps import _dircmp_is_same + """Check if a test is up-to-date by comparing files. + Ignores lines containing 'TODO: RUSTPYTHON' in local files. + """ # Try directory first, then file cpython_dir = pathlib.Path(cpython_prefix) / "Lib" / "test" / test_name cpython_file = pathlib.Path(cpython_prefix) / "Lib" / "test" / f"{test_name}.py" @@ -160,10 +233,9 @@ def is_test_up_to_date( return False if cpython_path.is_file(): - return filecmp.cmp(cpython_path, local_path, shallow=False) + return _compare_file_ignoring_todo(cpython_path, local_path) else: - dcmp = filecmp.dircmp(cpython_path, local_path) - return _dircmp_is_same(dcmp) + return _compare_dir_ignoring_todo(cpython_path, local_path) def compute_test_todo_list( @@ -205,12 +277,15 @@ def compute_test_todo_list( else: score = 1 # No corresponding lib (independent test) + todo_count = count_test_todos(test_name, lib_prefix) + result.append( { "name": test_name, "lib_name": lib_name, "score": score, "up_to_date": up_to_date, + "todo_count": todo_count, } ) @@ -233,7 +308,11 @@ def format_test_todo_list( for item in todo_list: name = item["name"] done_mark = "[x]" if item["up_to_date"] else "[ ]" - lines.append(f"- {done_mark} {name}") + todo_count = item.get("todo_count", 0) + if todo_count > 0: + lines.append(f"- {done_mark} {name} ({todo_count} TODO)") + else: + lines.append(f"- {done_mark} {name}") return lines @@ -285,7 +364,11 @@ def format_todo_list( if test_by_lib and name in test_by_lib: test_info = test_by_lib[name] test_done_mark = "[x]" if test_info["up_to_date"] else "[ ]" - lines.append(f" - {test_done_mark} {test_info['name']}") + todo_count = test_info.get("todo_count", 0) + if todo_count > 0: + lines.append(f" - {test_done_mark} {test_info['name']} ({todo_count} TODO)") + else: + lines.append(f" - {test_done_mark} {test_info['name']}") # Verbose mode: show detailed dependency info if verbose: From 29607ef283b8fd4f5ce135b4b47075939ca709ca Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sat, 24 Jan 2026 03:36:47 +0000 Subject: [PATCH 3/8] Auto-format: ruff format --- scripts/update_lib/show_todo.py | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/scripts/update_lib/show_todo.py b/scripts/update_lib/show_todo.py index 2ffd4d036b3..d7b8049fae0 100644 --- a/scripts/update_lib/show_todo.py +++ b/scripts/update_lib/show_todo.py @@ -147,7 +147,9 @@ def _count_rustpython_todo(content: str) -> int: return sum(1 for line in content.splitlines() if "TODO: RUSTPYTHON" in line) -def _compare_file_ignoring_todo(cpython_path: pathlib.Path, local_path: pathlib.Path) -> bool: +def _compare_file_ignoring_todo( + cpython_path: pathlib.Path, local_path: pathlib.Path +) -> bool: """Compare two files, ignoring TODO: RUSTPYTHON lines in local file.""" try: cpython_content = cpython_path.read_text(encoding="utf-8") @@ -159,7 +161,9 @@ def _compare_file_ignoring_todo(cpython_path: pathlib.Path, local_path: pathlib. return cpython_content == local_filtered -def _compare_dir_ignoring_todo(cpython_path: pathlib.Path, local_path: pathlib.Path) -> bool: +def _compare_dir_ignoring_todo( + cpython_path: pathlib.Path, local_path: pathlib.Path +) -> bool: """Compare two directories, ignoring TODO: RUSTPYTHON lines in local files.""" # Get all .py files in both directories cpython_files = {f.relative_to(cpython_path) for f in cpython_path.rglob("*.py")} @@ -171,15 +175,15 @@ def _compare_dir_ignoring_todo(cpython_path: pathlib.Path, local_path: pathlib.P # Compare each file for rel_path in cpython_files: - if not _compare_file_ignoring_todo(cpython_path / rel_path, local_path / rel_path): + if not _compare_file_ignoring_todo( + cpython_path / rel_path, local_path / rel_path + ): return False return True -def count_test_todos( - test_name: str, lib_prefix: str = "Lib" -) -> int: +def count_test_todos(test_name: str, lib_prefix: str = "Lib") -> int: """Count TODO: RUSTPYTHON lines in a test file/directory.""" local_dir = pathlib.Path(lib_prefix) / "test" / test_name local_file = pathlib.Path(lib_prefix) / "test" / f"{test_name}.py" @@ -366,7 +370,9 @@ def format_todo_list( test_done_mark = "[x]" if test_info["up_to_date"] else "[ ]" todo_count = test_info.get("todo_count", 0) if todo_count > 0: - lines.append(f" - {test_done_mark} {test_info['name']} ({todo_count} TODO)") + lines.append( + f" - {test_done_mark} {test_info['name']} ({todo_count} TODO)" + ) else: lines.append(f" - {test_done_mark} {test_info['name']}") @@ -408,7 +414,9 @@ def format_all_todo( lib_status[name] = is_up_to_date(name, cpython_prefix, lib_prefix) # Compute test todo - test_todo = compute_test_todo_list(cpython_prefix, lib_prefix, include_done, lib_status) + test_todo = compute_test_todo_list( + cpython_prefix, lib_prefix, include_done, lib_status + ) # Build test_by_lib map (only for tests with corresponding lib) test_by_lib = {} @@ -439,7 +447,9 @@ def show_todo( verbose: bool = False, ) -> None: """Show prioritized list of modules and tests to update.""" - for line in format_all_todo(cpython_prefix, lib_prefix, limit, include_done, verbose): + for line in format_all_todo( + cpython_prefix, lib_prefix, limit, include_done, verbose + ): print(line) From 6daabf81ca19f430ab1c563035dc31fac0278275 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Sat, 24 Jan 2026 12:43:03 +0900 Subject: [PATCH 4/8] show lib when test is not done --- scripts/update_lib/show_todo.py | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/scripts/update_lib/show_todo.py b/scripts/update_lib/show_todo.py index d7b8049fae0..33eeb0acc1c 100644 --- a/scripts/update_lib/show_todo.py +++ b/scripts/update_lib/show_todo.py @@ -405,27 +405,41 @@ def format_all_todo( lines = [] - # Compute lib todo - lib_todo = compute_todo_list(cpython_prefix, lib_prefix, include_done) - # Build lib status map for test scoring lib_status = {} for name in get_all_modules(cpython_prefix): lib_status[name] = is_up_to_date(name, cpython_prefix, lib_prefix) - # Compute test todo + # Compute test todo (always include all to find libs with pending tests) test_todo = compute_test_todo_list( - cpython_prefix, lib_prefix, include_done, lib_status + cpython_prefix, lib_prefix, include_done=True, lib_status=lib_status ) # Build test_by_lib map (only for tests with corresponding lib) test_by_lib = {} no_lib_tests = [] + # Set of libs that have pending tests + libs_with_pending_tests = set() for test in test_todo: if test["score"] == 1: # no lib - no_lib_tests.append(test) + if not test["up_to_date"] or include_done: + no_lib_tests.append(test) else: test_by_lib[test["lib_name"]] = test + if not test["up_to_date"]: + libs_with_pending_tests.add(test["lib_name"]) + + # Compute lib todo - include libs with pending tests even if lib is done + lib_todo_base = compute_todo_list(cpython_prefix, lib_prefix, include_done=True) + + # Filter lib todo: include if lib is not done OR has pending test + lib_todo = [] + for item in lib_todo_base: + lib_not_done = not item["up_to_date"] + has_pending_test = item["name"] in libs_with_pending_tests + + if include_done or lib_not_done or has_pending_test: + lib_todo.append(item) # Format lib todo with embedded tests lines.extend(format_todo_list(lib_todo, test_by_lib, limit, verbose)) From fe61dd1283fcc9b3c2db5ee9a733067e1f9dcbf4 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Sat, 24 Jan 2026 12:46:35 +0900 Subject: [PATCH 5/8] show untracked files --- scripts/update_lib/show_todo.py | 68 +++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/scripts/update_lib/show_todo.py b/scripts/update_lib/show_todo.py index 33eeb0acc1c..622d5baa347 100644 --- a/scripts/update_lib/show_todo.py +++ b/scripts/update_lib/show_todo.py @@ -135,6 +135,63 @@ def get_all_tests(cpython_prefix: str = "cpython") -> list[str]: return sorted(tests) +def get_untracked_files( + cpython_prefix: str = "cpython", + lib_prefix: str = "Lib", +) -> list[str]: + """Get files that exist in cpython/Lib but not in our Lib. + + Excludes files that belong to tracked modules (shown in library todo). + Includes all file types (.py, .txt, .pem, .json, etc.) + + Returns: + Sorted list of relative paths (e.g., ["foo.py", "data/file.txt"]) + """ + from update_lib.show_deps import get_all_modules + + cpython_lib = pathlib.Path(cpython_prefix) / "Lib" + local_lib = pathlib.Path(lib_prefix) + + if not cpython_lib.exists(): + return [] + + # Get tracked modules (shown in library todo) + tracked_modules = set(get_all_modules(cpython_prefix)) + + untracked = [] + + for cpython_file in cpython_lib.rglob("*"): + # Skip directories + if cpython_file.is_dir(): + continue + + # Get relative path from Lib/ + rel_path = cpython_file.relative_to(cpython_lib) + + # Skip test/ directory (handled separately by test todo) + if rel_path.parts and rel_path.parts[0] == "test": + continue + + # Check if file belongs to a tracked module + # e.g., idlelib/Icons/idle.gif -> module "idlelib" + # e.g., foo.py -> module "foo" + first_part = rel_path.parts[0] + if first_part.endswith(".py"): + module_name = first_part[:-3] # Remove .py + else: + module_name = first_part + + if module_name in tracked_modules: + continue + + # Check if exists in local lib + local_file = local_lib / rel_path + if not local_file.exists(): + untracked.append(str(rel_path)) + + return sorted(untracked) + + def _filter_rustpython_todo(content: str) -> str: """Remove lines containing 'TODO: RUSTPYTHON' from content.""" lines = content.splitlines(keepends=True) @@ -450,6 +507,17 @@ def format_all_todo( lines.append("## Standalone Tests") lines.extend(format_test_todo_list(no_lib_tests, limit)) + # Format untracked files (in cpython but not in our Lib) + untracked = get_untracked_files(cpython_prefix, lib_prefix) + if untracked: + lines.append("") + lines.append("## Untracked Files") + display_untracked = untracked[:limit] if limit else untracked + for path in display_untracked: + lines.append(f"- {path}") + if limit and len(untracked) > limit: + lines.append(f" ... and {len(untracked) - limit} more") + return lines From 968771dcbbefb279713d56a516efef5a61b4d63f Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Sat, 24 Jan 2026 12:52:19 +0900 Subject: [PATCH 6/8] _ast_unparse --- scripts/update_lib/deps.py | 3 +++ scripts/update_lib/show_todo.py | 8 +++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/scripts/update_lib/deps.py b/scripts/update_lib/deps.py index 791f9d3aa23..ea61b9c4b66 100644 --- a/scripts/update_lib/deps.py +++ b/scripts/update_lib/deps.py @@ -72,6 +72,9 @@ def clear_import_graph_caches() -> None: "abc": { "hard_deps": ["_py_abc.py"], }, + "ast": { + "hard_deps": ["_ast_unparse.py"], + }, "codecs": { "hard_deps": ["_pycodecs.py"], }, diff --git a/scripts/update_lib/show_todo.py b/scripts/update_lib/show_todo.py index 622d5baa347..21f6576ac13 100644 --- a/scripts/update_lib/show_todo.py +++ b/scripts/update_lib/show_todo.py @@ -141,12 +141,14 @@ def get_untracked_files( ) -> list[str]: """Get files that exist in cpython/Lib but not in our Lib. - Excludes files that belong to tracked modules (shown in library todo). + Excludes files that belong to tracked modules (shown in library todo) + and hard_deps of those modules. Includes all file types (.py, .txt, .pem, .json, etc.) Returns: Sorted list of relative paths (e.g., ["foo.py", "data/file.txt"]) """ + from update_lib.deps import resolve_hard_dep_parent from update_lib.show_deps import get_all_modules cpython_lib = pathlib.Path(cpython_prefix) / "Lib" @@ -184,6 +186,10 @@ def get_untracked_files( if module_name in tracked_modules: continue + # Check if this is a hard_dep of a tracked module + if resolve_hard_dep_parent(module_name) is not None: + continue + # Check if exists in local lib local_file = local_lib / rel_path if not local_file.exists(): From 7af6bbddb45ea6242cf63438f2cf480c7b6bbce8 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Sat, 24 Jan 2026 12:57:03 +0900 Subject: [PATCH 7/8] better auto detection --- scripts/update_lib/deps.py | 57 +++++++++++++++++++-------------- scripts/update_lib/show_deps.py | 21 +++++++++--- scripts/update_lib/show_todo.py | 2 +- 3 files changed, 51 insertions(+), 29 deletions(-) diff --git a/scripts/update_lib/deps.py b/scripts/update_lib/deps.py index ea61b9c4b66..2ef9f9cbfd8 100644 --- a/scripts/update_lib/deps.py +++ b/scripts/update_lib/deps.py @@ -68,28 +68,10 @@ def clear_import_graph_caches() -> None: "lib": [], # No Python lib (Rust implementation) "hard_deps": ["_pylong.py"], }, - # Pure Python implementations - "abc": { - "hard_deps": ["_py_abc.py"], - }, + # Non-pattern hard_deps (can't be auto-detected) "ast": { "hard_deps": ["_ast_unparse.py"], }, - "codecs": { - "hard_deps": ["_pycodecs.py"], - }, - "datetime": { - "hard_deps": ["_pydatetime.py"], - }, - "decimal": { - "hard_deps": ["_pydecimal.py"], - }, - "io": { - "hard_deps": ["_pyio.py"], - }, - "warnings": { - "hard_deps": ["_py_warnings.py"], - }, # Data directories "pydoc": { "hard_deps": ["pydoc_data"], @@ -105,22 +87,25 @@ def clear_import_graph_caches() -> None: } -def resolve_hard_dep_parent(name: str) -> str | None: +def resolve_hard_dep_parent(name: str, cpython_prefix: str = "cpython") -> str | None: """Resolve a hard_dep name to its parent module. - If 'name' is listed as a hard_dep of another module, return that module's name. - E.g., 'pydoc_data' -> 'pydoc', '_pydatetime' -> 'datetime' + Only returns a parent if the file is actually tracked: + - Explicitly listed in DEPENDENCIES as a hard_dep + - Or auto-detected _py{module}.py pattern where the parent module exists Args: name: Module or file name (with or without .py extension) + cpython_prefix: CPython directory prefix Returns: - Parent module name if found, None otherwise + Parent module name if found and tracked, None otherwise """ # Normalize: remove .py extension if present if name.endswith(".py"): name = name[:-3] + # Check DEPENDENCIES table first (explicit hard_deps) for module_name, dep_info in DEPENDENCIES.items(): hard_deps = dep_info.get("hard_deps", []) for dep in hard_deps: @@ -128,6 +113,24 @@ def resolve_hard_dep_parent(name: str) -> str | None: dep_normalized = dep[:-3] if dep.endswith(".py") else dep if dep_normalized == name: return module_name + + # Auto-detect _py{module} or _py_{module} patterns + # Only if the parent module actually exists + if name.startswith("_py"): + if name.startswith("_py_"): + # _py_abc -> abc + parent = name[4:] + else: + # _pydatetime -> datetime + parent = name[3:] + + # Verify the parent module exists + lib_dir = pathlib.Path(cpython_prefix) / "Lib" + parent_file = lib_dir / f"{parent}.py" + parent_dir = lib_dir / parent + if parent_file.exists() or (parent_dir.exists() and (parent_dir / "__init__.py").exists()): + return parent + return None @@ -237,10 +240,16 @@ def get_lib_paths( # Default: try file first, then directory paths = [resolve_module_path(name, cpython_prefix, prefer="file")] - # Add hard_deps + # Add hard_deps from DEPENDENCIES for dep in dep_info.get("hard_deps", []): paths.append(construct_lib_path(cpython_prefix, dep)) + # Auto-detect _py{module}.py or _py_{module}.py patterns + for pattern in [f"_py{name}.py", f"_py_{name}.py"]: + auto_path = construct_lib_path(cpython_prefix, pattern) + if auto_path.exists() and auto_path not in paths: + paths.append(auto_path) + return tuple(paths) diff --git a/scripts/update_lib/show_deps.py b/scripts/update_lib/show_deps.py index 50d58e4592d..32be6559726 100644 --- a/scripts/update_lib/show_deps.py +++ b/scripts/update_lib/show_deps.py @@ -19,26 +19,39 @@ def get_all_modules(cpython_prefix: str = "cpython") -> list[str]: """Get all top-level module names from cpython/Lib/. + Includes private modules (_*) that are not hard_deps of other modules. + Returns: Sorted list of module names (without .py extension) """ + from update_lib.deps import resolve_hard_dep_parent + lib_dir = pathlib.Path(cpython_prefix) / "Lib" if not lib_dir.exists(): return [] modules = set() for entry in lib_dir.iterdir(): - # Skip private/internal modules and special directories - if entry.name.startswith(("_", ".")): + # Skip hidden files + if entry.name.startswith("."): continue # Skip test directory if entry.name == "test": continue if entry.is_file() and entry.suffix == ".py": - modules.add(entry.stem) + name = entry.stem elif entry.is_dir() and (entry / "__init__.py").exists(): - modules.add(entry.name) + name = entry.name + else: + continue + + # Skip private modules that are hard_deps of other modules + # e.g., _pydatetime is a hard_dep of datetime, so skip it + if name.startswith("_") and resolve_hard_dep_parent(name, cpython_prefix) is not None: + continue + + modules.add(name) return sorted(modules) diff --git a/scripts/update_lib/show_todo.py b/scripts/update_lib/show_todo.py index 21f6576ac13..20682ce61ad 100644 --- a/scripts/update_lib/show_todo.py +++ b/scripts/update_lib/show_todo.py @@ -187,7 +187,7 @@ def get_untracked_files( continue # Check if this is a hard_dep of a tracked module - if resolve_hard_dep_parent(module_name) is not None: + if resolve_hard_dep_parent(module_name, cpython_prefix) is not None: continue # Check if exists in local lib From 7096b76d947e075d4b76f26bccc32951f30f79b2 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sat, 24 Jan 2026 04:09:31 +0000 Subject: [PATCH 8/8] Auto-format: ruff format --- scripts/update_lib/deps.py | 4 +++- scripts/update_lib/show_deps.py | 5 ++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/scripts/update_lib/deps.py b/scripts/update_lib/deps.py index 2ef9f9cbfd8..3e7d43b9b69 100644 --- a/scripts/update_lib/deps.py +++ b/scripts/update_lib/deps.py @@ -128,7 +128,9 @@ def resolve_hard_dep_parent(name: str, cpython_prefix: str = "cpython") -> str | lib_dir = pathlib.Path(cpython_prefix) / "Lib" parent_file = lib_dir / f"{parent}.py" parent_dir = lib_dir / parent - if parent_file.exists() or (parent_dir.exists() and (parent_dir / "__init__.py").exists()): + if parent_file.exists() or ( + parent_dir.exists() and (parent_dir / "__init__.py").exists() + ): return parent return None diff --git a/scripts/update_lib/show_deps.py b/scripts/update_lib/show_deps.py index 32be6559726..ef0b1111b70 100644 --- a/scripts/update_lib/show_deps.py +++ b/scripts/update_lib/show_deps.py @@ -48,7 +48,10 @@ def get_all_modules(cpython_prefix: str = "cpython") -> list[str]: # Skip private modules that are hard_deps of other modules # e.g., _pydatetime is a hard_dep of datetime, so skip it - if name.startswith("_") and resolve_hard_dep_parent(name, cpython_prefix) is not None: + if ( + name.startswith("_") + and resolve_hard_dep_parent(name, cpython_prefix) is not None + ): continue modules.add(name)