From d0f64045ea745bc8f04a7546fe5ca54c9d9c373e Mon Sep 17 00:00:00 2001 From: David Seddon Date: Fri, 1 Aug 2025 09:50:34 +0100 Subject: [PATCH 1/6] Don't use ports and adapters for ImportScanner We don't have swappable ImportScanners anyway, so this complicates the design. --- .importlinter | 4 +-- rust/src/import_scanning.rs | 11 ++++++ src/grimp/application/ports/importscanner.py | 35 ------------------- .../scanning.py} | 0 src/grimp/application/usecases.py | 4 +-- src/grimp/main.py | 2 -- .../test_scanning.py} | 25 ++++++------- tests/unit/conftest.py | 2 -- 8 files changed, 28 insertions(+), 55 deletions(-) delete mode 100644 src/grimp/application/ports/importscanner.py rename src/grimp/{adaptors/importscanner.py => application/scanning.py} (100%) rename tests/unit/{adaptors/test_importscanner.py => application/test_scanning.py} (98%) diff --git a/.importlinter b/.importlinter index f94e4429..9ec2eaff 100644 --- a/.importlinter +++ b/.importlinter @@ -18,5 +18,5 @@ ignore_imports = ; is an object within grimp, rather than a module in its own right, ; so we ignore these imports here. grimp.adaptors.graph -> grimp - grimp.adaptors.importscanner -> grimp - grimp.adaptors.filesystem -> grimp \ No newline at end of file + grimp.adaptors.filesystem -> grimp + grimp.application.scanning -> grimp diff --git a/rust/src/import_scanning.rs b/rust/src/import_scanning.rs index 66f07a62..b7db9dcc 100644 --- a/rust/src/import_scanning.rs +++ b/rust/src/import_scanning.rs @@ -8,6 +8,7 @@ use pyo3::prelude::*; use pyo3::types::{PyDict, PySet}; use std::collections::HashSet; +/// Statically analyses some Python modules for import statements within their shared package. #[pyclass] pub struct ImportScanner { file_system: Box, @@ -26,6 +27,14 @@ struct DirectImport { #[pymethods] impl ImportScanner { + /// Python args: + /// + /// - file_system: The file system interface to use. (A BasicFileSystem.) + /// - found_packages: Set of FoundPackages containing all the modules + /// for analysis. + /// - include_external_packages: Whether to include imports of external modules (i.e. + /// modules not contained in modules_by_package_directory) + /// in the results. #[allow(unused_variables)] #[new] #[pyo3(signature = (file_system, found_packages, include_external_packages=false))] @@ -57,6 +66,8 @@ impl ImportScanner { }) } + /// Statically analyses the given module and returns a set of Modules that + /// it imports. #[pyo3(signature = (module, exclude_type_checking_imports=false))] fn scan_for_imports<'a>( &self, diff --git a/src/grimp/application/ports/importscanner.py b/src/grimp/application/ports/importscanner.py deleted file mode 100644 index 82cb21bc..00000000 --- a/src/grimp/application/ports/importscanner.py +++ /dev/null @@ -1,35 +0,0 @@ -from typing import Set, Protocol - -from grimp.application.ports.filesystem import BasicFileSystem -from grimp.application.ports.modulefinder import FoundPackage -from grimp.domain.valueobjects import DirectImport, Module - - -class AbstractImportScanner(Protocol): - """ - Statically analyses some Python modules for import statements within their shared package. - """ - - def __init__( - self, - file_system: BasicFileSystem, - found_packages: Set[FoundPackage], - include_external_packages: bool = False, - ) -> None: - """ - Args: - - found_packages: Set of FoundPackages containing all the modules - for analysis. - - file_system: The file system interface to use. - - include_external_packages: Whether to include imports of external modules (i.e. - modules not contained in modules_by_package_directory) - in the results. - """ - - def scan_for_imports( - self, module: Module, *, exclude_type_checking_imports: bool = False - ) -> Set[DirectImport]: - """ - Statically analyses the given module and returns an iterable of Modules that - it imports. - """ diff --git a/src/grimp/adaptors/importscanner.py b/src/grimp/application/scanning.py similarity index 100% rename from src/grimp/adaptors/importscanner.py rename to src/grimp/application/scanning.py diff --git a/src/grimp/application/usecases.py b/src/grimp/application/usecases.py index 35e830f3..abf6197b 100644 --- a/src/grimp/application/usecases.py +++ b/src/grimp/application/usecases.py @@ -10,9 +10,9 @@ from ..application.ports import caching from ..application.ports.filesystem import AbstractFileSystem from ..application.ports.graph import ImportGraph -from ..application.ports.importscanner import AbstractImportScanner from ..application.ports.modulefinder import AbstractModuleFinder, FoundPackage, ModuleFile from ..application.ports.packagefinder import AbstractPackageFinder +from ..application.scanning import ImportScanner from ..domain.valueobjects import DirectImport, Module from .config import settings import os @@ -260,7 +260,7 @@ def _scan_chunk( ) -> Dict[ModuleFile, Set[DirectImport]]: file_system: AbstractFileSystem = settings.FILE_SYSTEM basic_file_system = file_system.convert_to_basic() - import_scanner: AbstractImportScanner = settings.IMPORT_SCANNER_CLASS( + import_scanner = ImportScanner( file_system=basic_file_system, found_packages=found_packages, # Ensure that the passed exclude_type_checking_imports is definitely a boolean, diff --git a/src/grimp/main.py b/src/grimp/main.py index d60533a8..eac21113 100644 --- a/src/grimp/main.py +++ b/src/grimp/main.py @@ -3,7 +3,6 @@ from .adaptors.caching import Cache from .adaptors.filesystem import FileSystem from .adaptors.graph import ImportGraph -from .adaptors.importscanner import ImportScanner from .adaptors.modulefinder import ModuleFinder from .adaptors.packagefinder import ImportLibPackageFinder from .adaptors.timing import SystemClockTimer @@ -13,7 +12,6 @@ settings.configure( MODULE_FINDER=ModuleFinder(), FILE_SYSTEM=FileSystem(), - IMPORT_SCANNER_CLASS=ImportScanner, # type: ignore[has-type] IMPORT_GRAPH_CLASS=ImportGraph, PACKAGE_FINDER=ImportLibPackageFinder(), CACHE_CLASS=Cache, diff --git a/tests/unit/adaptors/test_importscanner.py b/tests/unit/application/test_scanning.py similarity index 98% rename from tests/unit/adaptors/test_importscanner.py rename to tests/unit/application/test_scanning.py index fe55a704..d1b8240b 100644 --- a/tests/unit/adaptors/test_importscanner.py +++ b/tests/unit/application/test_scanning.py @@ -4,6 +4,7 @@ import pytest # type: ignore from grimp.application.ports.modulefinder import FoundPackage, ModuleFile +from grimp.application import scanning from grimp.domain.valueobjects import DirectImport, Module from grimp import _rustgrimp as rust # type: ignore[attr-defined] @@ -61,7 +62,7 @@ def test_absolute_imports(include_external_packages, expected_result): } ) - import_scanner = rust.ImportScanner( + import_scanner = scanning.ImportScanner( found_packages={ FoundPackage( name="foo", @@ -93,7 +94,7 @@ def test_non_ascii(): }, ) - import_scanner = rust.ImportScanner( + import_scanner = scanning.ImportScanner( found_packages={ FoundPackage( name="mypackage", @@ -166,7 +167,7 @@ def test_single_namespace_package_portion(): } ) - import_scanner = rust.ImportScanner( + import_scanner = scanning.ImportScanner( found_packages={ FoundPackage( name="namespace.foo", @@ -253,7 +254,7 @@ def test_import_of_portion_not_in_graph(include_external_packages): } ) - import_scanner = rust.ImportScanner( + import_scanner = scanning.ImportScanner( found_packages={ FoundPackage( name="namespace.foo", @@ -442,7 +443,7 @@ def test_absolute_from_imports(include_external_packages, expected_result): }, ) - import_scanner = rust.ImportScanner( + import_scanner = scanning.ImportScanner( found_packages={ FoundPackage( name="foo", @@ -501,7 +502,7 @@ def test_relative_from_imports(module_to_scan_is_package): }, ) - import_scanner = rust.ImportScanner( + import_scanner = scanning.ImportScanner( found_packages={ FoundPackage( name="foo", @@ -559,7 +560,7 @@ def test_trims_to_known_modules(import_source): content_map={"/path/to/foo/one.py": import_source}, ) - import_scanner = rust.ImportScanner( + import_scanner = scanning.ImportScanner( found_packages={ FoundPackage( name="foo", @@ -607,7 +608,7 @@ def test_trims_to_known_modules_within_init_file(): }, ) - import_scanner = rust.ImportScanner( + import_scanner = scanning.ImportScanner( found_packages={ FoundPackage( name="foo", @@ -658,7 +659,7 @@ def my_function(): }, ) - import_scanner = rust.ImportScanner( + import_scanner = scanning.ImportScanner( found_packages={ FoundPackage( name="foo", @@ -722,7 +723,7 @@ def test_external_package_imports_for_namespace_packages(statement, expected_mod } ) - import_scanner = rust.ImportScanner( + import_scanner = scanning.ImportScanner( found_packages={ FoundPackage( name="namespace.foo.blue", @@ -773,7 +774,7 @@ def test_scans_multiple_packages(statement): } ) - import_scanner = rust.ImportScanner( + import_scanner = scanning.ImportScanner( found_packages={ FoundPackage( name="foo", @@ -842,7 +843,7 @@ def test_exclude_type_checking_imports( } ) - import_scanner = rust.ImportScanner( + import_scanner = scanning.ImportScanner( found_packages={ FoundPackage( name="foo", diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 7840ed3d..b98cc077 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -3,7 +3,6 @@ from grimp.application.config import settings from grimp.adaptors.graph import ImportGraph from grimp.adaptors.modulefinder import ModuleFinder -from grimp.adaptors.importscanner import ImportScanner @pytest.fixture(scope="module", autouse=True) @@ -11,6 +10,5 @@ def configure_unit_tests(): settings.configure( IMPORT_GRAPH_CLASS=ImportGraph, MODULE_FINDER=ModuleFinder(), - IMPORT_SCANNER_CLASS=ImportScanner, FILE_SYSTEM=None, ) From 9e626d0f967ee49d0ca2ed055b90b0cb9fdb5bca Mon Sep 17 00:00:00 2001 From: David Seddon Date: Fri, 1 Aug 2025 10:18:53 +0100 Subject: [PATCH 2/6] Move scanning functions into scanning.py --- src/grimp/application/scanning.py | 105 ++++++++++++++++++- src/grimp/application/usecases.py | 105 +------------------ tests/functional/test_build_and_use_graph.py | 4 +- tests/unit/application/test_scanning.py | 83 ++++++++++++++- tests/unit/application/test_usecases.py | 80 +------------- 5 files changed, 190 insertions(+), 187 deletions(-) diff --git a/src/grimp/application/scanning.py b/src/grimp/application/scanning.py index 655782de..a5f89731 100644 --- a/src/grimp/application/scanning.py +++ b/src/grimp/application/scanning.py @@ -1,3 +1,106 @@ -from grimp import _rustgrimp as rust # type: ignore[attr-defined] +import math +import os +from typing import Collection, Set, Dict, Iterable + +import joblib # type: ignore + +from grimp import _rustgrimp as rust, DirectImport # type: ignore[attr-defined] +from grimp.application.config import settings +from grimp.application.ports.filesystem import AbstractFileSystem +from grimp.application.ports.modulefinder import ModuleFile, FoundPackage ImportScanner = rust.ImportScanner + +# Calling code can set this environment variable if it wants to tune when to switch to +# multiprocessing, or set it to a large number to disable it altogether. +MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING_ENV_NAME = "GRIMP_MIN_MULTIPROCESSING_MODULES" +# This is an arbitrary number, but setting it too low slows down our functional tests considerably. +# If you change this, update docs/usage.rst too! +DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING = 50 + + +def scan_imports( + module_files: Collection[ModuleFile], + *, + found_packages: Set[FoundPackage], + include_external_packages: bool, + exclude_type_checking_imports: bool, +) -> Dict[ModuleFile, Set[DirectImport]]: + chunks = _create_chunks(module_files) + return _scan_chunks( + chunks, + found_packages, + include_external_packages, + exclude_type_checking_imports, + ) + + +def _create_chunks(module_files: Collection[ModuleFile]) -> tuple[tuple[ModuleFile, ...], ...]: + """ + Split the module files into chunks, each to be worked on by a separate OS process. + """ + module_files_tuple = tuple(module_files) + + number_of_module_files = len(module_files_tuple) + n_chunks = _decide_number_of_processes(number_of_module_files) + chunk_size = math.ceil(number_of_module_files / n_chunks) + + return tuple( + module_files_tuple[i * chunk_size : (i + 1) * chunk_size] for i in range(n_chunks) + ) + + +def _decide_number_of_processes(number_of_module_files: int) -> int: + min_number_of_modules = int( + os.environ.get( + MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING_ENV_NAME, + DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING, + ) + ) + if number_of_module_files < min_number_of_modules: + # Don't incur the overhead of multiple processes. + return 1 + return min(joblib.cpu_count(), number_of_module_files) + + +def _scan_chunk( + found_packages: Set[FoundPackage], + include_external_packages: bool, + exclude_type_checking_imports: bool, + chunk: Iterable[ModuleFile], +) -> Dict[ModuleFile, Set[DirectImport]]: + file_system: AbstractFileSystem = settings.FILE_SYSTEM + basic_file_system = file_system.convert_to_basic() + import_scanner = ImportScanner( + file_system=basic_file_system, + found_packages=found_packages, + # Ensure that the passed exclude_type_checking_imports is definitely a boolean, + # otherwise the Rust class will error. + include_external_packages=bool(include_external_packages), + ) + return { + module_file: import_scanner.scan_for_imports( + module_file.module, exclude_type_checking_imports=exclude_type_checking_imports + ) + for module_file in chunk + } + + +def _scan_chunks( + chunks: Collection[Collection[ModuleFile]], + found_packages: Set[FoundPackage], + include_external_packages: bool, + exclude_type_checking_imports: bool, +) -> Dict[ModuleFile, Set[DirectImport]]: + number_of_processes = len(chunks) + import_scanning_jobs = joblib.Parallel(n_jobs=number_of_processes)( + joblib.delayed(_scan_chunk)( + found_packages, include_external_packages, exclude_type_checking_imports, chunk + ) + for chunk in chunks + ) + + imports_by_module_file = {} + for chunk_imports_by_module_file in import_scanning_jobs: + imports_by_module_file.update(chunk_imports_by_module_file) + return imports_by_module_file diff --git a/src/grimp/application/usecases.py b/src/grimp/application/usecases.py index abf6197b..b8841f6e 100644 --- a/src/grimp/application/usecases.py +++ b/src/grimp/application/usecases.py @@ -2,34 +2,22 @@ Use cases handle application logic. """ -from typing import Dict, Sequence, Set, Type, Union, cast, Iterable, Collection -import math - -import joblib # type: ignore +from typing import Dict, Sequence, Set, Type, Union, cast, Iterable +from .scanning import scan_imports from ..application.ports import caching from ..application.ports.filesystem import AbstractFileSystem from ..application.ports.graph import ImportGraph from ..application.ports.modulefinder import AbstractModuleFinder, FoundPackage, ModuleFile from ..application.ports.packagefinder import AbstractPackageFinder -from ..application.scanning import ImportScanner from ..domain.valueobjects import DirectImport, Module from .config import settings -import os class NotSupplied: pass -# Calling code can set this environment variable if it wants to tune when to switch to -# multiprocessing, or set it to a large number to disable it altogether. -MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING_ENV_NAME = "GRIMP_MIN_MULTIPROCESSING_MODULES" -# This is an arbitrary number, but setting it too low slows down our functional tests considerably. -# If you change this, update docs/usage.rst too! -DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING = 50 - - def build_graph( package_name, *additional_package_names, @@ -142,7 +130,7 @@ def _scan_packages( remaining_module_files_to_scan = module_files_to_scan.difference(imports_by_module_file) if remaining_module_files_to_scan: imports_by_module_file.update( - _scan_imports( + scan_imports( remaining_module_files_to_scan, found_packages=found_packages, include_external_packages=include_external_packages, @@ -206,90 +194,3 @@ def _read_imports_from_cache( else: imports_by_module_file[module_file] = direct_imports return imports_by_module_file - - -def _scan_imports( - module_files: Collection[ModuleFile], - *, - found_packages: Set[FoundPackage], - include_external_packages: bool, - exclude_type_checking_imports: bool, -) -> Dict[ModuleFile, Set[DirectImport]]: - chunks = _create_chunks(module_files) - return _scan_chunks( - chunks, - found_packages, - include_external_packages, - exclude_type_checking_imports, - ) - - -def _create_chunks(module_files: Collection[ModuleFile]) -> tuple[tuple[ModuleFile, ...], ...]: - """ - Split the module files into chunks, each to be worked on by a separate OS process. - """ - module_files_tuple = tuple(module_files) - - number_of_module_files = len(module_files_tuple) - n_chunks = _decide_number_of_processes(number_of_module_files) - chunk_size = math.ceil(number_of_module_files / n_chunks) - - return tuple( - module_files_tuple[i * chunk_size : (i + 1) * chunk_size] for i in range(n_chunks) - ) - - -def _decide_number_of_processes(number_of_module_files: int) -> int: - min_number_of_modules = int( - os.environ.get( - MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING_ENV_NAME, - DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING, - ) - ) - if number_of_module_files < min_number_of_modules: - # Don't incur the overhead of multiple processes. - return 1 - return min(joblib.cpu_count(), number_of_module_files) - - -def _scan_chunk( - found_packages: Set[FoundPackage], - include_external_packages: bool, - exclude_type_checking_imports: bool, - chunk: Iterable[ModuleFile], -) -> Dict[ModuleFile, Set[DirectImport]]: - file_system: AbstractFileSystem = settings.FILE_SYSTEM - basic_file_system = file_system.convert_to_basic() - import_scanner = ImportScanner( - file_system=basic_file_system, - found_packages=found_packages, - # Ensure that the passed exclude_type_checking_imports is definitely a boolean, - # otherwise the Rust class will error. - include_external_packages=bool(include_external_packages), - ) - return { - module_file: import_scanner.scan_for_imports( - module_file.module, exclude_type_checking_imports=exclude_type_checking_imports - ) - for module_file in chunk - } - - -def _scan_chunks( - chunks: Collection[Collection[ModuleFile]], - found_packages: Set[FoundPackage], - include_external_packages: bool, - exclude_type_checking_imports: bool, -) -> Dict[ModuleFile, Set[DirectImport]]: - number_of_processes = len(chunks) - import_scanning_jobs = joblib.Parallel(n_jobs=number_of_processes)( - joblib.delayed(_scan_chunk)( - found_packages, include_external_packages, exclude_type_checking_imports, chunk - ) - for chunk in chunks - ) - - imports_by_module_file = {} - for chunk_imports_by_module_file in import_scanning_jobs: - imports_by_module_file.update(chunk_imports_by_module_file) - return imports_by_module_file diff --git a/tests/functional/test_build_and_use_graph.py b/tests/functional/test_build_and_use_graph.py index e061a82e..20b55d05 100644 --- a/tests/functional/test_build_and_use_graph.py +++ b/tests/functional/test_build_and_use_graph.py @@ -2,7 +2,7 @@ from typing import Set, Tuple, Optional import pytest from unittest.mock import patch -from grimp.application import usecases +from grimp.application import scanning """ @@ -56,7 +56,7 @@ def test_modules(): } -@patch.object(usecases, "DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING", 0) +@patch.object(scanning, "DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING", 0) def test_modules_multiprocessing(): """ This test runs relatively slowly, but it's important we cover the multiprocessing code. diff --git a/tests/unit/application/test_scanning.py b/tests/unit/application/test_scanning.py index d1b8240b..e2287714 100644 --- a/tests/unit/application/test_scanning.py +++ b/tests/unit/application/test_scanning.py @@ -1,13 +1,18 @@ from typing import Set - +import os +from unittest.mock import patch import pytest # type: ignore - +import joblib # type: ignore from grimp.application.ports.modulefinder import FoundPackage, ModuleFile from grimp.application import scanning from grimp.domain.valueobjects import DirectImport, Module - +from tests.config import override_settings from grimp import _rustgrimp as rust # type: ignore[attr-defined] +from tests.adaptors.filesystem import FakeFileSystem + + +SOME_CPU_COUNT = 8 @pytest.mark.parametrize( @@ -904,6 +909,78 @@ def test_exclude_type_checking_imports( assert expected_result == result +@patch.object(scanning, "_scan_chunks", return_value={}) +@patch.object(joblib, "cpu_count", return_value=SOME_CPU_COUNT) +@pytest.mark.parametrize( + "number_of_modules, fake_environ, expected_number_of_chunks", + [ + ( + scanning.DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING - 1, + {}, + 1, + ), + ( + scanning.DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING, + {}, + SOME_CPU_COUNT, + ), + ( + scanning.DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING + 1, + {}, + SOME_CPU_COUNT, + ), + ( + 149, + {scanning.MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING_ENV_NAME: 150}, + 1, + ), + ( + 150, + {scanning.MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING_ENV_NAME: 150}, + SOME_CPU_COUNT, + ), + ( + 151, + {scanning.MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING_ENV_NAME: 150}, + SOME_CPU_COUNT, + ), + ], +) +def test_scanning_multiprocessing_respects_min_number_of_modules( + mock_cpu_count, + mock_scan_chunks, + number_of_modules, + fake_environ, + expected_number_of_chunks, +): + module_files = frozenset( + { + ModuleFile( + module=Module(f"mypackage.mod_{i}"), + mtime=999, + ) + for i in range(number_of_modules) + } + ) + found_packages = { + FoundPackage(name="mypackage", directory="/path/to/mypackage", module_files=module_files) + } + + with override_settings( + FILE_SYSTEM=FakeFileSystem(), + ), patch.object(os, "environ", fake_environ): + scanning.scan_imports( + module_files, + found_packages=found_packages, + include_external_packages=False, + exclude_type_checking_imports=False, + ) + + [call] = mock_scan_chunks.call_args_list + chunks = call.args[0] + assert len(chunks) == expected_number_of_chunks + + def _modules_to_module_files(modules: Set[Module]) -> Set[ModuleFile]: some_mtime = 100933.4 return {ModuleFile(module=module, mtime=some_mtime) for module in modules} diff --git a/tests/unit/application/test_usecases.py b/tests/unit/application/test_usecases.py index 0b76e98e..531cfe56 100644 --- a/tests/unit/application/test_usecases.py +++ b/tests/unit/application/test_usecases.py @@ -1,8 +1,5 @@ -import os from typing import Dict, Optional, Set -from unittest.mock import sentinel, patch - -import joblib # type: ignore +from unittest.mock import sentinel import pytest # type: ignore from grimp.application import usecases @@ -11,11 +8,8 @@ from grimp.domain.valueobjects import DirectImport, Module from tests.adaptors.filesystem import FakeFileSystem from tests.adaptors.packagefinder import BaseFakePackageFinder -from tests.adaptors.modulefinder import BaseFakeModuleFinder from tests.config import override_settings -SOME_CPU_COUNT = 8 - class TestBuildGraph: @pytest.mark.parametrize("include_external_packages", (True, False)) @@ -137,78 +131,6 @@ def write( kwargs["cache_dir"] = supplied_cache_dir usecases.build_graph("mypackage", **kwargs) - @patch.object(usecases, "_scan_chunks", return_value={}) - @patch.object(joblib, "cpu_count", return_value=SOME_CPU_COUNT) - @pytest.mark.parametrize( - "number_of_modules, fake_environ, expected_number_of_chunks", - [ - ( - usecases.DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING - 1, - {}, - 1, - ), - ( - usecases.DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING, - {}, - SOME_CPU_COUNT, - ), - ( - usecases.DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING + 1, - {}, - SOME_CPU_COUNT, - ), - ( - 149, - {usecases.MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING_ENV_NAME: 150}, - 1, - ), - ( - 150, - {usecases.MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING_ENV_NAME: 150}, - SOME_CPU_COUNT, - ), - ( - 151, - {usecases.MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING_ENV_NAME: 150}, - SOME_CPU_COUNT, - ), - ], - ) - def test_scanning_multiprocessing_respects_min_number_of_modules( - self, - mock_cpu_count, - mock_scan_chunks, - number_of_modules, - fake_environ, - expected_number_of_chunks, - ): - class FakePackageFinder(BaseFakePackageFinder): - directory_map = {"mypackage": "/path/to/mypackage"} - - class FakeModuleFinder(BaseFakeModuleFinder): - module_files_by_package_name = { - "mypackage": frozenset( - { - ModuleFile( - module=Module(f"mypackage.mod_{i}"), - mtime=999, - ) - for i in range(number_of_modules) - } - ) - } - - with override_settings( - FILE_SYSTEM=FakeFileSystem(), - PACKAGE_FINDER=FakePackageFinder(), - MODULE_FINDER=FakeModuleFinder(), - ), patch.object(os, "environ", fake_environ): - usecases.build_graph("mypackage", cache_dir=None) - - [call] = mock_scan_chunks.call_args_list - chunks = call.args[0] - assert len(chunks) == expected_number_of_chunks - def test_forgives_wrong_type_being_passed_to_include_external_packages(self): file_system = FakeFileSystem( contents=""" From 07a7ad5f026267864a1a43da75a62d5c80e37d5b Mon Sep 17 00:00:00 2001 From: David Seddon Date: Fri, 1 Aug 2025 11:10:03 +0100 Subject: [PATCH 3/6] Adjust unit tests so they don't interact directly with scanner --- rust/src/filesystem.rs | 7 + src/grimp/application/scanning.py | 3 +- tests/unit/application/test_scanning.py | 607 +++++++++++++----------- 3 files changed, 332 insertions(+), 285 deletions(-) diff --git a/rust/src/filesystem.rs b/rust/src/filesystem.rs index a55e4c52..a73725b5 100644 --- a/rust/src/filesystem.rs +++ b/rust/src/filesystem.rs @@ -262,6 +262,13 @@ impl PyFakeBasicFileSystem { fn read(&self, file_name: &str) -> PyResult { self.inner.read(file_name) } + + // Temporary workaround method for Python tests. + fn convert_to_basic(&self) -> PyResult { + Ok(PyFakeBasicFileSystem { + inner: self.inner.clone() + }) + } } /// Parses an indented string representing a file system structure diff --git a/src/grimp/application/scanning.py b/src/grimp/application/scanning.py index a5f89731..70a81413 100644 --- a/src/grimp/application/scanning.py +++ b/src/grimp/application/scanning.py @@ -4,7 +4,8 @@ import joblib # type: ignore -from grimp import _rustgrimp as rust, DirectImport # type: ignore[attr-defined] +from grimp import _rustgrimp as rust # type: ignore[attr-defined] +from grimp.domain.valueobjects import DirectImport from grimp.application.config import settings from grimp.application.ports.filesystem import AbstractFileSystem from grimp.application.ports.modulefinder import ModuleFile, FoundPackage diff --git a/tests/unit/application/test_scanning.py b/tests/unit/application/test_scanning.py index e2287714..4fd1012c 100644 --- a/tests/unit/application/test_scanning.py +++ b/tests/unit/application/test_scanning.py @@ -4,6 +4,7 @@ from unittest.mock import patch import pytest # type: ignore import joblib # type: ignore + from grimp.application.ports.modulefinder import FoundPackage, ModuleFile from grimp.application import scanning from grimp.domain.valueobjects import DirectImport, Module @@ -55,7 +56,8 @@ ), ) def test_absolute_imports(include_external_packages, expected_result): - all_modules = {Module("foo.one"), Module("foo.two")} + module_file_to_scan = _module_to_module_file(Module("foo.one")) + all_module_files = frozenset({module_file_to_scan, _module_to_module_file(Module("foo.two"))}) file_system = rust.FakeBasicFileSystem( content_map={ "/path/to/foo/one.py": """ @@ -67,25 +69,26 @@ def test_absolute_imports(include_external_packages, expected_result): } ) - import_scanner = scanning.ImportScanner( - found_packages={ - FoundPackage( - name="foo", - directory="/path/to/foo", - module_files=frozenset(_modules_to_module_files(all_modules)), - ) - }, - file_system=file_system, - include_external_packages=include_external_packages, - ) - - result = import_scanner.scan_for_imports(Module("foo.one")) + with override_settings(FILE_SYSTEM=file_system): + result = scanning.scan_imports( + {module_file_to_scan}, + found_packages={ + FoundPackage( + name="foo", + directory="/path/to/foo", + module_files=all_module_files, + ) + }, + include_external_packages=include_external_packages, + exclude_type_checking_imports=False, + ) - assert expected_result == result + assert {module_file_to_scan: expected_result} == result def test_non_ascii(): blue_module = Module("mypackage.blue") + blue_module_file = _module_to_module_file(blue_module) non_ascii_modules = {Module("mypackage.ñon_ascii_变"), Module("mypackage.ñon_ascii_变.ラーメン")} file_system = rust.FakeBasicFileSystem( content_map={ @@ -98,42 +101,44 @@ def test_non_ascii(): "/path/to/mypackage/ñon_ascii_变/ラーメン.py": "", }, ) + found_packages = { + FoundPackage( + name="mypackage", + directory="/path/to/mypackage", + module_files=frozenset({blue_module_file}) + | _modules_to_module_files(non_ascii_modules), + ) + } - import_scanner = scanning.ImportScanner( - found_packages={ - FoundPackage( - name="mypackage", - directory="/path/to/mypackage", - module_files=frozenset( - _modules_to_module_files({blue_module} | non_ascii_modules) - ), - ) - }, - file_system=file_system, - include_external_packages=True, - ) - - result = import_scanner.scan_for_imports(blue_module) + with override_settings(FILE_SYSTEM=file_system): + result = scanning.scan_imports( + {blue_module_file}, + found_packages=found_packages, + include_external_packages=True, + exclude_type_checking_imports=False, + ) assert result == { - DirectImport( - importer=Module("mypackage.blue"), - imported=Module("ñon_ascii_变"), - line_number=1, - line_contents="from ñon_ascii_变 import *", - ), - DirectImport( - importer=Module("mypackage.blue"), - imported=Module("mypackage.ñon_ascii_变"), - line_number=2, - line_contents="from . import ñon_ascii_变", - ), - DirectImport( - importer=Module("mypackage.blue"), - imported=Module("mypackage.ñon_ascii_变.ラーメン"), - line_number=3, - line_contents="import mypackage.ñon_ascii_变.ラーメン", - ), + blue_module_file: { + DirectImport( + importer=Module("mypackage.blue"), + imported=Module("ñon_ascii_变"), + line_number=1, + line_contents="from ñon_ascii_变 import *", + ), + DirectImport( + importer=Module("mypackage.blue"), + imported=Module("mypackage.ñon_ascii_变"), + line_number=2, + line_contents="from . import ñon_ascii_变", + ), + DirectImport( + importer=Module("mypackage.blue"), + imported=Module("mypackage.ñon_ascii_变.ラーメン"), + line_number=3, + line_contents="import mypackage.ñon_ascii_变.ラーメン", + ), + } } @@ -157,7 +162,8 @@ def test_single_namespace_package_portion(): MODULE_BLUE, MODULE_ALPHA, } - + module_one_file = _module_to_module_file(MODULE_ONE) + module_alpha_file = _module_to_module_file(MODULE_ALPHA) file_system = rust.FakeBasicFileSystem( content_map={ "/path/to/namespace/foo/one.py": """ @@ -171,25 +177,24 @@ def test_single_namespace_package_portion(): """, } ) + found_packages = { + FoundPackage( + name="namespace.foo", + directory="/path/to/namespace/foo", + module_files=frozenset(_modules_to_module_files(all_modules)), + ) + } - import_scanner = scanning.ImportScanner( - found_packages={ - FoundPackage( - name="namespace.foo", - directory="/path/to/namespace/foo", - module_files=frozenset(_modules_to_module_files(all_modules)), - ) - }, - file_system=file_system, - ) - - results = ( - import_scanner.scan_for_imports(MODULE_ONE), - import_scanner.scan_for_imports(MODULE_ALPHA), - ) + with override_settings(FILE_SYSTEM=file_system): + result = scanning.scan_imports( + {module_one_file, module_alpha_file}, + found_packages=found_packages, + include_external_packages=True, + exclude_type_checking_imports=False, + ) - assert results == ( - { + assert result == { + module_one_file: { DirectImport( importer=MODULE_ONE, imported=MODULE_TWO, @@ -209,7 +214,7 @@ def test_single_namespace_package_portion(): line_contents="from . import four", ), }, - { + module_alpha_file: { DirectImport( importer=MODULE_ALPHA, imported=MODULE_BLUE, @@ -223,7 +228,7 @@ def test_single_namespace_package_portion(): line_contents="from ... import three", ), }, - ) + } @pytest.mark.parametrize("include_external_packages", (True, False)) @@ -235,7 +240,8 @@ def test_import_of_portion_not_in_graph(include_external_packages): MODULE_FOO = Module("namespace.foo") MODULE_FOO_ONE = Module("namespace.foo.one") MODULE_FOO_BLUE = Module("namespace.foo.one.blue") - + module_foo_one_file = _module_to_module_file(MODULE_FOO_ONE) + module_foo_blue_file = _module_to_module_file(MODULE_FOO_BLUE) all_modules = { MODULE_FOO, MODULE_FOO_ONE, @@ -259,38 +265,37 @@ def test_import_of_portion_not_in_graph(include_external_packages): } ) - import_scanner = scanning.ImportScanner( - found_packages={ - FoundPackage( - name="namespace.foo", - directory="/path/to/namespace/foo", - module_files=frozenset(_modules_to_module_files(all_modules)), - ), - FoundPackage( - name="namespace.bar.one.green", - directory="/path/to/namespace/bar/one/green", - module_files=frozenset( - _modules_to_module_files( - { - MODULE_BAR_ONE_GREEN, - MODULE_BAR_ONE_GREEN_ALPHA, - } - ) - ), + found_packages = { + FoundPackage( + name="namespace.foo", + directory="/path/to/namespace/foo", + module_files=frozenset(_modules_to_module_files(all_modules)), + ), + FoundPackage( + name="namespace.bar.one.green", + directory="/path/to/namespace/bar/one/green", + module_files=frozenset( + _modules_to_module_files( + { + MODULE_BAR_ONE_GREEN, + MODULE_BAR_ONE_GREEN_ALPHA, + } + ) ), - }, - file_system=file_system, - include_external_packages=include_external_packages, - ) + ), + } - results = ( - import_scanner.scan_for_imports(MODULE_FOO_ONE), - import_scanner.scan_for_imports(MODULE_FOO_BLUE), - ) + with override_settings(FILE_SYSTEM=file_system): + result = scanning.scan_imports( + {module_foo_one_file, module_foo_blue_file}, + found_packages=found_packages, + include_external_packages=include_external_packages, + exclude_type_checking_imports=False, + ) if include_external_packages: - assert results == ( - { + assert result == { + module_foo_one_file: { DirectImport( importer=MODULE_FOO_ONE, imported=Module("namespace.bar.one.orange"), @@ -322,7 +327,7 @@ def test_import_of_portion_not_in_graph(include_external_packages): line_contents="from ..magenta.one import alpha", ), }, - { + module_foo_blue_file: { DirectImport( importer=MODULE_FOO_BLUE, imported=Module("namespace.teal"), @@ -342,9 +347,12 @@ def test_import_of_portion_not_in_graph(include_external_packages): line_contents="from ...scarlet.one import alpha", ), }, - ) + } else: - assert results == (set(), set()) + assert result == { + module_foo_one_file: set(), + module_foo_blue_file: set(), + } @pytest.mark.parametrize( @@ -421,6 +429,7 @@ def test_absolute_from_imports(include_external_packages, expected_result): Module("foo.two.yellow"), Module("foo.three"), } + module_foo_one_blue_file = _module_to_module_file(Module("foo.one.blue")) file_system = rust.FakeBasicFileSystem( contents=""" /path/to/foo/ @@ -447,22 +456,23 @@ def test_absolute_from_imports(include_external_packages, expected_result): """ }, ) + found_packages = { + FoundPackage( + name="foo", + directory="/path/to/foo", + module_files=frozenset(_modules_to_module_files(all_modules)), + ) + } - import_scanner = scanning.ImportScanner( - found_packages={ - FoundPackage( - name="foo", - directory="/path/to/foo", - module_files=frozenset(_modules_to_module_files(all_modules)), - ) - }, - file_system=file_system, - include_external_packages=include_external_packages, - ) - - result = import_scanner.scan_for_imports(Module("foo.one.blue")) + with override_settings(FILE_SYSTEM=file_system): + result = scanning.scan_imports( + {module_foo_one_blue_file}, + found_packages=found_packages, + include_external_packages=include_external_packages, + exclude_type_checking_imports=False, + ) - assert expected_result == result + assert {module_foo_one_blue_file: expected_result} == result @pytest.mark.parametrize("module_to_scan_is_package", (True, False)) @@ -482,7 +492,7 @@ def test_relative_from_imports(module_to_scan_is_package): else: module_to_scan = Module("foo.one.blue") module_filename = "/path/to/foo/one/blue.py" - + module_file_to_scan = _module_to_module_file(module_to_scan) file_system = rust.FakeBasicFileSystem( contents=""" /path/to/foo/ @@ -506,39 +516,43 @@ def test_relative_from_imports(module_to_scan_is_package): """ }, ) + found_packages = { + FoundPackage( + name="foo", + directory="/path/to/foo", + module_files=frozenset(_modules_to_module_files(all_modules)), + ) + } - import_scanner = scanning.ImportScanner( - found_packages={ - FoundPackage( - name="foo", - directory="/path/to/foo", - module_files=frozenset(_modules_to_module_files(all_modules)), - ) - }, - file_system=file_system, - ) - - result = import_scanner.scan_for_imports(module_to_scan) + with override_settings(FILE_SYSTEM=file_system): + result = scanning.scan_imports( + {module_file_to_scan}, + found_packages=found_packages, + include_external_packages=False, + exclude_type_checking_imports=True, + ) assert result == { - DirectImport( - importer=module_to_scan, - imported=Module("foo.one.green"), - line_number=1, - line_contents="from . import green", - ), - DirectImport( - importer=module_to_scan, - imported=Module("foo.two.yellow"), - line_number=2, - line_contents="from ..two import yellow", - ), - DirectImport( - importer=module_to_scan, - imported=Module("foo.three"), - line_number=3, - line_contents="from .. import three", - ), + module_file_to_scan: { + DirectImport( + importer=module_to_scan, + imported=Module("foo.one.green"), + line_number=1, + line_contents="from . import green", + ), + DirectImport( + importer=module_to_scan, + imported=Module("foo.two.yellow"), + line_number=2, + line_contents="from ..two import yellow", + ), + DirectImport( + importer=module_to_scan, + imported=Module("foo.three"), + line_number=3, + line_contents="from .. import three", + ), + } } @@ -553,6 +567,7 @@ def test_trims_to_known_modules(import_source): Module("foo.two"), Module("foo.two.yellow"), } + module_file_to_scan = _module_to_module_file(Module("foo.one")) file_system = rust.FakeBasicFileSystem( contents=""" /path/to/foo/ @@ -564,27 +579,31 @@ def test_trims_to_known_modules(import_source): """, content_map={"/path/to/foo/one.py": import_source}, ) + found_packages = { + FoundPackage( + name="foo", + directory="/path/to/foo", + module_files=frozenset(_modules_to_module_files(all_modules)), + ) + } - import_scanner = scanning.ImportScanner( - found_packages={ - FoundPackage( - name="foo", - directory="/path/to/foo", - module_files=frozenset(_modules_to_module_files(all_modules)), - ) - }, - file_system=file_system, - ) - - result = import_scanner.scan_for_imports(Module("foo.one")) + with override_settings(FILE_SYSTEM=file_system): + result = scanning.scan_imports( + {module_file_to_scan}, + found_packages=found_packages, + include_external_packages=False, + exclude_type_checking_imports=False, + ) assert result == { - DirectImport( - importer=Module("foo.one"), - imported=Module("foo.two.yellow"), - line_number=1, - line_contents=import_source, - ) + module_file_to_scan: { + DirectImport( + importer=Module("foo.one"), + imported=Module("foo.two.yellow"), + line_number=1, + line_contents=import_source, + ) + } } @@ -596,6 +615,8 @@ def test_trims_to_known_modules_within_init_file(): Module("foo.one.blue"), Module("foo.one.blue.alpha"), } + module_file_foo_one = _module_to_module_file(Module("foo.one")) + module_file_foo_one_blue = _module_to_module_file(Module("foo.one.blue")) file_system = rust.FakeBasicFileSystem( contents=""" /path/to/foo/ @@ -612,43 +633,45 @@ def test_trims_to_known_modules_within_init_file(): "/path/to/foo/one/blue/__init__.py": "from .alpha import my_function", }, ) - - import_scanner = scanning.ImportScanner( - found_packages={ - FoundPackage( - name="foo", - directory="/path/to/foo", - module_files=frozenset(_modules_to_module_files(all_modules)), - ) - }, - file_system=file_system, - ) - - result = import_scanner.scan_for_imports(Module("foo.one")) - - assert result == { - DirectImport( - importer=Module("foo.one"), - imported=Module("foo.one.yellow"), - line_number=1, - line_contents="from .yellow import my_function", + found_packages = { + FoundPackage( + name="foo", + directory="/path/to/foo", + module_files=frozenset(_modules_to_module_files(all_modules)), ) } - result = import_scanner.scan_for_imports(Module("foo.one.blue")) + with override_settings(FILE_SYSTEM=file_system): + result = scanning.scan_imports( + {module_file_foo_one, module_file_foo_one_blue}, + found_packages=found_packages, + include_external_packages=False, + exclude_type_checking_imports=False, + ) assert result == { - DirectImport( - importer=Module("foo.one.blue"), - imported=Module("foo.one.blue.alpha"), - line_number=1, - line_contents="from .alpha import my_function", - ) + module_file_foo_one: { + DirectImport( + importer=Module("foo.one"), + imported=Module("foo.one.yellow"), + line_number=1, + line_contents="from .yellow import my_function", + ) + }, + module_file_foo_one_blue: { + DirectImport( + importer=Module("foo.one.blue"), + imported=Module("foo.one.blue.alpha"), + line_number=1, + line_contents="from .alpha import my_function", + ) + }, } def test_trims_whitespace_from_start_of_line_contents(): all_modules = {Module("foo"), Module("foo.one"), Module("foo.two")} + module_file_to_scan = _module_to_module_file(Module("foo.one")) file_system = rust.FakeBasicFileSystem( contents=""" /path/to/foo/ @@ -663,27 +686,31 @@ def my_function(): """ }, ) + found_packages = { + FoundPackage( + name="foo", + directory="/path/to/foo", + module_files=frozenset(_modules_to_module_files(all_modules)), + ) + } - import_scanner = scanning.ImportScanner( - found_packages={ - FoundPackage( - name="foo", - directory="/path/to/foo", - module_files=frozenset(_modules_to_module_files(all_modules)), - ) - }, - file_system=file_system, - ) - - result = import_scanner.scan_for_imports(Module("foo.one")) + with override_settings(FILE_SYSTEM=file_system): + result = scanning.scan_imports( + {module_file_to_scan}, + found_packages=found_packages, + include_external_packages=False, + exclude_type_checking_imports=False, + ) assert result == { - DirectImport( - importer=Module("foo.one"), - imported=Module("foo.two"), - line_number=2, - line_contents="from . import two", - ) + module_file_to_scan: { + DirectImport( + importer=Module("foo.one"), + imported=Module("foo.two"), + line_number=2, + line_contents="from . import two", + ), + } } @@ -721,52 +748,56 @@ def my_function(): ) def test_external_package_imports_for_namespace_packages(statement, expected_module_name): module_to_scan = Module("namespace.foo.blue.alpha") - + module_file_to_scan = _module_to_module_file(module_to_scan) file_system = rust.FakeBasicFileSystem( content_map={ "/path/to/namespace/foo/blue/alpha.py": statement, } ) + found_packages = { + FoundPackage( + name="namespace.foo.blue", + directory="/path/to/namespace/foo/blue", + module_files=frozenset( + _modules_to_module_files( + { + Module("namespace.foo.blue"), + module_to_scan, + Module("namespace.foo.blue.beta"), + } + ) + ), + ) + } - import_scanner = scanning.ImportScanner( - found_packages={ - FoundPackage( - name="namespace.foo.blue", - directory="/path/to/namespace/foo/blue", - module_files=frozenset( - _modules_to_module_files( - { - Module("namespace.foo.blue"), - module_to_scan, - Module("namespace.foo.blue.beta"), - } - ) - ), - ) - }, - file_system=file_system, - include_external_packages=True, - ) - - result = import_scanner.scan_for_imports(module_to_scan) + with override_settings(FILE_SYSTEM=file_system): + result = scanning.scan_imports( + {module_file_to_scan}, + found_packages=found_packages, + include_external_packages=True, + exclude_type_checking_imports=False, + ) if expected_module_name: assert { - DirectImport( - importer=module_to_scan, - imported=Module(expected_module_name), - line_number=1, - line_contents=statement, - ), + module_file_to_scan: { + DirectImport( + importer=module_to_scan, + imported=Module(expected_module_name), + line_number=1, + line_contents=statement, + ), + } } == result else: - assert result == set() + assert {module_file_to_scan: set()} == result @pytest.mark.parametrize("statement", ("import bar.blue", "from bar import blue")) def test_scans_multiple_packages(statement): foo_modules = {Module("foo"), Module("foo.one"), Module("foo.two")} bar_modules = {Module("bar"), Module("bar.green"), Module("bar.blue")} + foo_one_module_file = _module_to_module_file(Module("foo.one")) file_system = rust.FakeBasicFileSystem( content_map={ "/path/to/foo/one.py": f""" @@ -778,38 +809,42 @@ def test_scans_multiple_packages(statement): """ } ) + found_packages = { + FoundPackage( + name="foo", + directory="/path/to/foo", + module_files=frozenset(_modules_to_module_files(foo_modules)), + ), + FoundPackage( + name="bar", + directory="/path/to/bar", + module_files=frozenset(_modules_to_module_files(bar_modules)), + ), + } - import_scanner = scanning.ImportScanner( - found_packages={ - FoundPackage( - name="foo", - directory="/path/to/foo", - module_files=frozenset(_modules_to_module_files(foo_modules)), - ), - FoundPackage( - name="bar", - directory="/path/to/bar", - module_files=frozenset(_modules_to_module_files(bar_modules)), - ), - }, - file_system=file_system, - ) - - result = import_scanner.scan_for_imports(Module("foo.one")) + with override_settings(FILE_SYSTEM=file_system): + result = scanning.scan_imports( + {foo_one_module_file}, + found_packages=found_packages, + include_external_packages=False, + exclude_type_checking_imports=False, + ) assert { - DirectImport( - importer=Module("foo.one"), - imported=Module("foo.two"), - line_number=1, - line_contents="import foo.two", - ), - DirectImport( - importer=Module("foo.one"), - imported=Module("bar.blue"), - line_number=2, - line_contents=statement, - ), + foo_one_module_file: { + DirectImport( + importer=Module("foo.one"), + imported=Module("foo.two"), + line_number=1, + line_contents="import foo.two", + ), + DirectImport( + importer=Module("foo.one"), + imported=Module("bar.blue"), + line_number=2, + line_contents=statement, + ), + } } == result @@ -835,6 +870,7 @@ def test_exclude_type_checking_imports( Module("foo.four"), Module("foo.five"), } + module_foo_one_file = _module_to_module_file(Module("foo.one")) file_system = rust.FakeBasicFileSystem( content_map={ "/path/to/foo/one.py": f""" @@ -847,17 +883,21 @@ def test_exclude_type_checking_imports( """ } ) + found_packages = { + FoundPackage( + name="foo", + directory="/path/to/foo", + module_files=frozenset(_modules_to_module_files(all_modules)), + ) + } - import_scanner = scanning.ImportScanner( - found_packages={ - FoundPackage( - name="foo", - directory="/path/to/foo", - module_files=frozenset(_modules_to_module_files(all_modules)), - ) - }, - file_system=file_system, - ) + with override_settings(FILE_SYSTEM=file_system): + result = scanning.scan_imports( + {module_foo_one_file}, + found_packages=found_packages, + include_external_packages=False, + exclude_type_checking_imports=exclude_type_checking_imports, + ) if exclude_type_checking_imports and is_statement_valid: expected_result = { @@ -901,12 +941,7 @@ def test_exclude_type_checking_imports( line_contents="import foo.five", ), } - - result = import_scanner.scan_for_imports( - Module("foo.one"), exclude_type_checking_imports=exclude_type_checking_imports - ) - - assert expected_result == result + assert {module_foo_one_file: expected_result} == result @patch.object(scanning, "_scan_chunks", return_value={}) @@ -981,6 +1016,10 @@ def test_scanning_multiprocessing_respects_min_number_of_modules( assert len(chunks) == expected_number_of_chunks -def _modules_to_module_files(modules: Set[Module]) -> Set[ModuleFile]: +def _module_to_module_file(module: Module) -> ModuleFile: some_mtime = 100933.4 - return {ModuleFile(module=module, mtime=some_mtime) for module in modules} + return ModuleFile(module=module, mtime=some_mtime) + + +def _modules_to_module_files(modules: Set[Module]) -> Set[ModuleFile]: + return {_module_to_module_file(module) for module in modules} From 8f3db6389ba2e6c536f0c2c5e1240695ce7f9c0e Mon Sep 17 00:00:00 2001 From: David Seddon Date: Fri, 1 Aug 2025 13:42:57 +0100 Subject: [PATCH 4/6] Extract function get_file_system_boxed --- rust/src/import_scanning.rs | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/rust/src/import_scanning.rs b/rust/src/import_scanning.rs index b7db9dcc..aafae5fe 100644 --- a/rust/src/import_scanning.rs +++ b/rust/src/import_scanning.rs @@ -44,17 +44,7 @@ impl ImportScanner { found_packages: Bound<'_, PyAny>, include_external_packages: bool, ) -> PyResult { - let file_system_boxed: Box; - - if let Ok(py_real) = file_system.extract::>() { - file_system_boxed = Box::new(py_real.inner.clone()); - } else if let Ok(py_fake) = file_system.extract::>() { - file_system_boxed = Box::new(py_fake.inner.clone()); - } else { - return Err(PyTypeError::new_err( - "file_system must be an instance of RealBasicFileSystem or FakeBasicFileSystem", - )); - } + let file_system_boxed = get_file_system_boxed(&file_system)?; let found_packages_rust = _py_found_packages_to_rust(found_packages); let modules = _get_modules_from_found_packages(&found_packages_rust); @@ -373,3 +363,18 @@ fn count_leading_dots(s: &str) -> usize { fn module_is_descendant(module_name: &str, potential_ancestor: &str) -> bool { module_name.starts_with(&format!("{potential_ancestor}.")) } + +fn get_file_system_boxed<'py>(file_system: &Bound<'_, PyAny>) -> PyResult>{ + let file_system_boxed: Box; + + if let Ok(py_real) = file_system.extract::>() { + file_system_boxed = Box::new(py_real.inner.clone()); + } else if let Ok(py_fake) = file_system.extract::>() { + file_system_boxed = Box::new(py_fake.inner.clone()); + } else { + return Err(PyTypeError::new_err( + "file_system must be an instance of RealBasicFileSystem or FakeBasicFileSystem", + )); + } + Ok(file_system_boxed) +} \ No newline at end of file From b11612e1eb0720b0640e9a5bf0ba64370e961de2 Mon Sep 17 00:00:00 2001 From: David Seddon Date: Fri, 1 Aug 2025 14:07:08 +0100 Subject: [PATCH 5/6] Call scan_for_imports in Rust This removes all mention of ImportScanner from Python. --- rust/src/import_scanning.rs | 4 +-- rust/src/lib.rs | 30 +++++++++++++++++++++++ src/grimp/application/ports/filesystem.py | 6 ++--- src/grimp/application/scanning.py | 13 +++------- 4 files changed, 39 insertions(+), 14 deletions(-) diff --git a/rust/src/import_scanning.rs b/rust/src/import_scanning.rs index aafae5fe..e74f7230 100644 --- a/rust/src/import_scanning.rs +++ b/rust/src/import_scanning.rs @@ -38,7 +38,7 @@ impl ImportScanner { #[allow(unused_variables)] #[new] #[pyo3(signature = (file_system, found_packages, include_external_packages=false))] - fn new( + pub fn new( py: Python, file_system: Bound<'_, PyAny>, found_packages: Bound<'_, PyAny>, @@ -59,7 +59,7 @@ impl ImportScanner { /// Statically analyses the given module and returns a set of Modules that /// it imports. #[pyo3(signature = (module, exclude_type_checking_imports=false))] - fn scan_for_imports<'a>( + pub fn scan_for_imports<'a>( &self, py: Python<'a>, module: Bound<'_, PyAny>, diff --git a/rust/src/lib.rs b/rust/src/lib.rs index a6d0357b..67fa5ab3 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -27,6 +27,7 @@ use std::collections::HashSet; #[pymodule] fn _rustgrimp(py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(parse_imported_objects_from_code))?; + m.add_wrapped(wrap_pyfunction!(scan_for_imports))?; m.add_class::()?; m.add_class::()?; m.add_class::()?; @@ -41,6 +42,35 @@ fn _rustgrimp(py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> { Ok(()) } +#[pyfunction] +fn scan_for_imports<'py,>( + py: Python<'py>, + module_files: Vec>, + found_packages: Bound<'py, PyAny>, + include_external_packages: bool, + exclude_type_checking_imports: bool, + file_system: Bound<'py, PyAny>, +) -> PyResult>{ + let scanner = ImportScanner::new( + py, + file_system, + found_packages, + include_external_packages, + )?; + + let dict = PyDict::new(py); + for module_file in module_files { + let py_module_instance = module_file.getattr("module").unwrap(); + let imports = scanner.scan_for_imports( + py, + py_module_instance, + exclude_type_checking_imports, + )?; + dict.set_item(module_file, imports).unwrap(); + } + Ok(dict) +} + #[pyfunction] fn parse_imported_objects_from_code<'py>( py: Python<'py>, diff --git a/src/grimp/application/ports/filesystem.py b/src/grimp/application/ports/filesystem.py index bfe129a2..6c1bfd9f 100644 --- a/src/grimp/application/ports/filesystem.py +++ b/src/grimp/application/ports/filesystem.py @@ -93,11 +93,11 @@ def convert_to_basic(self) -> BasicFileSystem: class BasicFileSystem(Protocol): """ - A more limited file system, used by the ImportScanner. + A more limited file system, used by the Rust-based scan_for_imports function. Having two different file system APIs is an interim approach, allowing us to - implement BasicFileSystem in Rust, for use with the ImportScanner, without needing - to implement the full range of methods used by other parts of Grimp. + implement BasicFileSystem in Rust without needing to implement the full range + of methods used by other parts of Grimp. Eventually we'll move the full implementation to Rust, and have a single FileSystem interface. diff --git a/src/grimp/application/scanning.py b/src/grimp/application/scanning.py index 70a81413..f7b4ba96 100644 --- a/src/grimp/application/scanning.py +++ b/src/grimp/application/scanning.py @@ -10,7 +10,6 @@ from grimp.application.ports.filesystem import AbstractFileSystem from grimp.application.ports.modulefinder import ModuleFile, FoundPackage -ImportScanner = rust.ImportScanner # Calling code can set this environment variable if it wants to tune when to switch to # multiprocessing, or set it to a large number to disable it altogether. @@ -72,19 +71,15 @@ def _scan_chunk( ) -> Dict[ModuleFile, Set[DirectImport]]: file_system: AbstractFileSystem = settings.FILE_SYSTEM basic_file_system = file_system.convert_to_basic() - import_scanner = ImportScanner( - file_system=basic_file_system, + return rust.scan_for_imports( + module_files=chunk, found_packages=found_packages, # Ensure that the passed exclude_type_checking_imports is definitely a boolean, # otherwise the Rust class will error. include_external_packages=bool(include_external_packages), + exclude_type_checking_imports=exclude_type_checking_imports, + file_system=basic_file_system, ) - return { - module_file: import_scanner.scan_for_imports( - module_file.module, exclude_type_checking_imports=exclude_type_checking_imports - ) - for module_file in chunk - } def _scan_chunks( From a3f6e0c87ac17311357ee27680266f9bb98cfcd0 Mon Sep 17 00:00:00 2001 From: David Seddon Date: Fri, 1 Aug 2025 16:20:05 +0100 Subject: [PATCH 6/6] Remove ImportScanner from Rust --- rust/src/errors.rs | 1 + rust/src/import_parsing.rs | 21 +- rust/src/import_scanning.rs | 614 ++++++++++++++---------------- rust/src/lib.rs | 110 ++++-- src/grimp/application/scanning.py | 5 +- 5 files changed, 376 insertions(+), 375 deletions(-) diff --git a/rust/src/errors.rs b/rust/src/errors.rs index a4849481..46c0dd1e 100644 --- a/rust/src/errors.rs +++ b/rust/src/errors.rs @@ -20,6 +20,7 @@ pub enum GrimpError { #[error("Error parsing python code (line {line_number}, text {text}).")] ParseError { + module_filename: String, line_number: usize, text: String, #[source] diff --git a/rust/src/import_parsing.rs b/rust/src/import_parsing.rs index 0dd3e1a1..a43f7422 100644 --- a/rust/src/import_parsing.rs +++ b/rust/src/import_parsing.rs @@ -3,8 +3,6 @@ use ruff_python_ast::statement_visitor::{StatementVisitor, walk_body, walk_stmt} use ruff_python_ast::{Expr, Stmt}; use ruff_python_parser::parse_module; use ruff_source_file::{LineIndex, SourceCode}; -use std::fs; -use std::path::Path; #[derive(Debug, PartialEq, Eq, Clone)] pub struct ImportedObject { @@ -30,12 +28,10 @@ impl ImportedObject { } } -pub fn parse_imports(path: &Path) -> GrimpResult> { - let code = fs::read_to_string(path).expect("failed to read file"); - parse_imports_from_code(&code) -} - -pub fn parse_imports_from_code(code: &str) -> GrimpResult> { +pub fn parse_imports_from_code( + code: &str, + module_filename: &str, +) -> GrimpResult> { let line_index = LineIndex::from_source_text(code); let source_code = SourceCode::new(code, &line_index); @@ -46,6 +42,7 @@ pub fn parse_imports_from_code(code: &str) -> GrimpResult> { let line_number = location_index.get(); let text = source_code.line_text(location_index).trim(); Err(GrimpError::ParseError { + module_filename: module_filename.to_string(), line_number, text: text.to_owned(), parse_error: e, @@ -157,13 +154,13 @@ mod tests { #[test] fn test_parse_empty_string() { - let imports = parse_imports_from_code("").unwrap(); + let imports = parse_imports_from_code("", "some_filename.py").unwrap(); assert!(imports.is_empty()); } fn parse_and_check(case: (&str, &[&str])) { let (code, expected_imports) = case; - let imports = parse_imports_from_code(code).unwrap(); + let imports = parse_imports_from_code(code, "some_filename.py").unwrap(); assert_eq!( expected_imports, imports.into_iter().map(|i| i.name).collect::>() @@ -172,7 +169,7 @@ mod tests { fn parse_and_check_with_typechecking_only(case: (&str, &[(&str, bool)])) { let (code, expected_imports) = case; - let imports = parse_imports_from_code(code).unwrap(); + let imports = parse_imports_from_code(code, "some_filename.py").unwrap(); assert_eq!( expected_imports .iter() @@ -526,6 +523,7 @@ import a from b import c from d import (e) from f import *", + "some_filename.py", ) .unwrap(); assert_eq!( @@ -552,6 +550,7 @@ if TYPE_CHECKING: from d import (e) if TYPE_CHECKING: from f import *", + "some_filename.py", ) .unwrap(); assert_eq!( diff --git a/rust/src/import_scanning.rs b/rust/src/import_scanning.rs index e74f7230..c2d3d141 100644 --- a/rust/src/import_scanning.rs +++ b/rust/src/import_scanning.rs @@ -1,334 +1,24 @@ -use crate::errors::GrimpError; +/// Statically analyses some Python modules for import statements within their shared package. +use crate::errors::GrimpResult; use crate::filesystem::{FileSystem, PyFakeBasicFileSystem, PyRealBasicFileSystem}; use crate::import_parsing; use crate::module_finding::{FoundPackage, Module}; use itertools::Itertools; -use pyo3::exceptions::{PyFileNotFoundError, PyTypeError}; +use pyo3::exceptions::PyTypeError; use pyo3::prelude::*; use pyo3::types::{PyDict, PySet}; -use std::collections::HashSet; - -/// Statically analyses some Python modules for import statements within their shared package. -#[pyclass] -pub struct ImportScanner { - file_system: Box, - found_packages: HashSet, - include_external_packages: bool, - modules: HashSet, -} +use std::collections::{HashMap, HashSet}; +use std::io::{self, ErrorKind}; #[derive(Debug, Hash, Eq, PartialEq)] -struct DirectImport { +pub struct DirectImport { importer: String, imported: String, line_number: usize, line_contents: String, } -#[pymethods] -impl ImportScanner { - /// Python args: - /// - /// - file_system: The file system interface to use. (A BasicFileSystem.) - /// - found_packages: Set of FoundPackages containing all the modules - /// for analysis. - /// - include_external_packages: Whether to include imports of external modules (i.e. - /// modules not contained in modules_by_package_directory) - /// in the results. - #[allow(unused_variables)] - #[new] - #[pyo3(signature = (file_system, found_packages, include_external_packages=false))] - pub fn new( - py: Python, - file_system: Bound<'_, PyAny>, - found_packages: Bound<'_, PyAny>, - include_external_packages: bool, - ) -> PyResult { - let file_system_boxed = get_file_system_boxed(&file_system)?; - let found_packages_rust = _py_found_packages_to_rust(found_packages); - let modules = _get_modules_from_found_packages(&found_packages_rust); - - Ok(ImportScanner { - file_system: file_system_boxed, - found_packages: found_packages_rust, - modules, - include_external_packages, - }) - } - - /// Statically analyses the given module and returns a set of Modules that - /// it imports. - #[pyo3(signature = (module, exclude_type_checking_imports=false))] - pub fn scan_for_imports<'a>( - &self, - py: Python<'a>, - module: Bound<'_, PyAny>, - exclude_type_checking_imports: bool, - ) -> PyResult> { - let module_rust = module.extract().unwrap(); - let found_package_for_module = self._lookup_found_package_for_module(&module_rust); - let module_filename = - self._determine_module_filename(&module_rust, found_package_for_module)?; - let module_contents = self.file_system.read(&module_filename).unwrap(); - - let parse_result = import_parsing::parse_imports_from_code(&module_contents); - match parse_result { - Err(GrimpError::ParseError { - line_number, text, .. - }) => { - // TODO: define SourceSyntaxError using pyo3. - let exceptions_pymodule = PyModule::import(py, "grimp.exceptions").unwrap(); - let py_exception_class = exceptions_pymodule.getattr("SourceSyntaxError").unwrap(); - let exception = py_exception_class - .call1((module_filename, line_number, text)) - .unwrap(); - return Err(PyErr::from_value(exception)); - } - Err(e) => { - return Err(e.into()); - } - _ => (), - } - let imported_objects = parse_result.unwrap(); - let is_package = self._module_is_package(&module_filename); - - let mut imports: HashSet = HashSet::new(); - for imported_object in imported_objects { - // Don't include type checking imports, if specified. - if exclude_type_checking_imports && imported_object.typechecking_only { - continue; - } - - // Resolve relative imports. - let imported_object_name = self._get_absolute_imported_object_name( - &module_rust, - is_package, - &imported_object.name, - ); - - // Resolve imported module. - match self._get_internal_module(&imported_object_name) { - Some(imported_module) => { - // It's an internal import. - imports.insert(DirectImport { - importer: module_rust.name.to_string(), - imported: imported_module.name.to_string(), - line_number: imported_object.line_number, - line_contents: imported_object.line_contents, - }); - } - None => { - // It's an external import. - if self.include_external_packages { - if let Some(imported_module) = - self._distill_external_module(&imported_object_name) - { - imports.insert(DirectImport { - importer: module_rust.name.to_string(), - imported: imported_module, - line_number: imported_object.line_number, - line_contents: imported_object.line_contents, - }); - } - } - } - } - } - - Ok(self._to_py_direct_imports(py, &imports)) - } -} - -impl ImportScanner { - fn _lookup_found_package_for_module(&self, module: &Module) -> &FoundPackage { - // TODO: it's probably inefficient to do this every time we look up a module. - for found_package in &self.found_packages { - for module_file in &found_package.module_files { - if module_file.module == *module { - return found_package; - } - } - } - panic!("Could not lookup found package for module {module}"); - } - - fn _determine_module_filename( - &self, - module: &Module, - found_package: &FoundPackage, - ) -> PyResult { - // TODO: could we do all this with &str instead of String? - let top_level_components: Vec = - found_package.name.split(".").map(str::to_string).collect(); - let module_components: Vec = module.name.split(".").map(str::to_string).collect(); - let leaf_components = module_components[top_level_components.len()..].to_vec(); - - let mut filename_root_components: Vec = vec![found_package.directory.clone()]; - filename_root_components.extend(leaf_components); - let filename_root = self.file_system.join(filename_root_components); - let normal_module_path = format!("{filename_root}.py"); - let init_module_path = self - .file_system - .join(vec![filename_root, "__init__.py".to_string()]); - let candidate_filenames = [normal_module_path, init_module_path]; - for candidate_filename in candidate_filenames { - if self.file_system.exists(&candidate_filename) { - return Ok(candidate_filename); - } - } - Err(PyFileNotFoundError::new_err(format!( - "Could not find module {module}." - ))) - } - - fn _module_is_package(&self, module_filename: &str) -> bool { - self.file_system.split(module_filename).1 == "__init__.py" - } - - #[allow(unused_variables)] - fn _get_absolute_imported_object_name( - &self, - module: &Module, - is_package: bool, - imported_object_name: &str, - ) -> String { - let leading_dots_count = count_leading_dots(imported_object_name); - if leading_dots_count == 0 { - return imported_object_name.to_string(); - } - let imported_object_name_base: String; - if is_package { - if leading_dots_count == 1 { - imported_object_name_base = module.name.clone(); - } else { - let parts: Vec<&str> = module.name.split('.').collect(); - imported_object_name_base = - parts[0..parts.len() - leading_dots_count + 1].join("."); - } - } else { - let parts: Vec<&str> = module.name.split('.').collect(); - imported_object_name_base = parts[0..parts.len() - leading_dots_count].join("."); - } - - format!( - "{}.{}", - imported_object_name_base, - &imported_object_name[leading_dots_count..] - ) - } - - #[allow(unused_variables)] - fn _get_internal_module(&self, imported_object_name: &str) -> Option { - let candidate_module = Module { - name: imported_object_name.to_string(), - }; - if self.modules.contains(&candidate_module) { - return Some(candidate_module); - } - if let Some((parent_name, _)) = imported_object_name.rsplit_once('.') { - let parent = Module { - name: parent_name.to_string(), - }; - if self.modules.contains(&parent) { - return Some(parent); - } - } - None - } - - /// Given a module that we already know is external, turn it into a module to add to the graph. - // - // The 'distillation' process involves removing any unwanted subpackages. For example, - // django.models.db should be turned into simply django. - // The process is more complex for potential namespace packages, as it's not possible to - // determine the portion package simply from name. Rather than adding the overhead of a - // filesystem read, we just get the shallowest component that does not clash with an internal - // module namespace. Take, for example, foo.blue.alpha.one. If one of the found - // packages is foo.blue.beta, the module will be distilled to foo.blue.alpha. - // Alternatively, if the found package is foo.green, the distilled module will - // be foo.blue. - #[allow(unused_variables)] - fn _distill_external_module(&self, module_name: &str) -> Option { - let module_root = module_name.split(".").next().unwrap(); - // If it's a module that is a parent of one of the internal packages, return None - // as it doesn't make sense and is probably an import of a namespace package. - for package in &self.found_packages { - if module_is_descendant(&package.name, module_name) { - return None; - } - } - - // If it shares a namespace with an internal module, get the shallowest component that does - // not clash with an internal module namespace. - let mut candidate_portions: HashSet = HashSet::new(); - let mut sorted_found_packages: Vec<&FoundPackage> = self.found_packages.iter().collect(); - sorted_found_packages.sort_by_key(|package| &package.name); - sorted_found_packages.reverse(); - - for found_package in sorted_found_packages { - let root_module = &found_package.name; - if module_is_descendant(root_module, module_root) { - let mut internal_path_components: Vec<&str> = root_module.split(".").collect(); - let mut external_path_components: Vec<&str> = module_name.split(".").collect(); - let mut external_namespace_components: Vec<&str> = vec![]; - while external_path_components[0] == internal_path_components[0] { - external_namespace_components.push(external_path_components.remove(0)); - internal_path_components.remove(0); - } - external_namespace_components.push(external_path_components[0]); - - candidate_portions.insert(Module { - name: external_namespace_components.join("."), - }); - } - } - - if !candidate_portions.is_empty() { - // If multiple found packages share a namespace with this module, use the deepest one - // as we know that that will be a namespace too. - let deepest_candidate_portion = candidate_portions - .iter() - .sorted_by_key(|portion| portion.name.split(".").collect::>().len()) - .next_back() - .unwrap(); - Some(deepest_candidate_portion.name.clone()) - } else { - Some(module_name.split('.').next().unwrap().to_string()) - } - } - - fn _to_py_direct_imports<'a>( - &self, - py: Python<'a>, - rust_imports: &HashSet, - ) -> Bound<'a, PySet> { - // TODO: do this in the constructor. - let valueobjects_pymodule = PyModule::import(py, "grimp.domain.valueobjects").unwrap(); - let py_module_class = valueobjects_pymodule.getattr("Module").unwrap(); - let py_direct_import_class = valueobjects_pymodule.getattr("DirectImport").unwrap(); - - let pyset = PySet::empty(py).unwrap(); - - for rust_import in rust_imports { - let importer = py_module_class.call1((&rust_import.importer,)).unwrap(); - let imported = py_module_class.call1((&rust_import.imported,)).unwrap(); - let kwargs = PyDict::new(py); - kwargs.set_item("importer", &importer).unwrap(); - kwargs.set_item("imported", &imported).unwrap(); - kwargs - .set_item("line_number", rust_import.line_number) - .unwrap(); - kwargs - .set_item("line_contents", &rust_import.line_contents) - .unwrap(); - let py_direct_import = py_direct_import_class.call((), Some(&kwargs)).unwrap(); - pyset.add(&py_direct_import).unwrap(); - } - - pyset - } -} -fn _py_found_packages_to_rust(py_found_packages: Bound<'_, PyAny>) -> HashSet { +pub fn py_found_packages_to_rust(py_found_packages: &Bound<'_, PyAny>) -> HashSet { let py_set = py_found_packages .downcast::() .expect("Expected py_found_packages to be a Python set."); @@ -346,7 +36,7 @@ fn _py_found_packages_to_rust(py_found_packages: Bound<'_, PyAny>) -> HashSet) -> HashSet { +pub fn get_modules_from_found_packages(found_packages: &HashSet) -> HashSet { let mut modules = HashSet::new(); for package in found_packages { for module_file in &package.module_files { @@ -364,7 +54,10 @@ fn module_is_descendant(module_name: &str, potential_ancestor: &str) -> bool { module_name.starts_with(&format!("{potential_ancestor}.")) } -fn get_file_system_boxed<'py>(file_system: &Bound<'_, PyAny>) -> PyResult>{ +#[allow(clippy::borrowed_box)] +pub fn get_file_system_boxed<'py>( + file_system: &Bound<'py, PyAny>, +) -> PyResult> { let file_system_boxed: Box; if let Ok(py_real) = file_system.extract::>() { @@ -377,4 +70,285 @@ fn get_file_system_boxed<'py>(file_system: &Bound<'_, PyAny>) -> PyResult, + found_packages: &HashSet, + include_external_packages: bool, + modules: &HashSet, + exclude_type_checking_imports: bool, +) -> GrimpResult>> { + let mut imports_by_module = HashMap::new(); + + for module in modules { + let imports_for_module = scan_for_imports_no_py_single_module( + module, + file_system, + found_packages, + &get_modules_from_found_packages(found_packages), + include_external_packages, + exclude_type_checking_imports, + )?; + imports_by_module.insert(module.clone(), imports_for_module); + } + Ok(imports_by_module) +} + +#[allow(clippy::borrowed_box)] +fn scan_for_imports_no_py_single_module( + module: &Module, + file_system: &Box, + found_packages: &HashSet, + all_modules: &HashSet, + include_external_packages: bool, + exclude_type_checking_imports: bool, +) -> GrimpResult> { + let mut imports: HashSet = HashSet::new(); + let found_package_for_module = _lookup_found_package_for_module(module, found_packages); + let module_filename = + _determine_module_filename(module, found_package_for_module, file_system).unwrap(); + let module_contents = file_system.read(&module_filename).unwrap(); + let imported_objects = + import_parsing::parse_imports_from_code(&module_contents, &module_filename)?; + + let is_package = _module_is_package(&module_filename, file_system); + + for imported_object in imported_objects { + // Don't include type checking imports, if specified. + if exclude_type_checking_imports && imported_object.typechecking_only { + continue; + } + + // Resolve relative imports. + let imported_object_name = + _get_absolute_imported_object_name(module, is_package, &imported_object.name); + + // Resolve imported module. + match _get_internal_module(&imported_object_name, all_modules) { + Some(imported_module) => { + // It's an internal import. + imports.insert(DirectImport { + importer: module.name.to_string(), + imported: imported_module.name.to_string(), + line_number: imported_object.line_number, + line_contents: imported_object.line_contents, + }); + } + None => { + // It's an external import. + if include_external_packages { + if let Some(imported_module) = + _distill_external_module(&imported_object_name, found_packages) + { + imports.insert(DirectImport { + importer: module.name.to_string(), + imported: imported_module, + line_number: imported_object.line_number, + line_contents: imported_object.line_contents, + }); + } + } + } + } + } + + Ok(imports) +} + +pub fn to_py_direct_imports<'a>( + py: Python<'a>, + rust_imports: &HashSet, +) -> Bound<'a, PySet> { + let valueobjects_pymodule = PyModule::import(py, "grimp.domain.valueobjects").unwrap(); + let py_module_class = valueobjects_pymodule.getattr("Module").unwrap(); + let py_direct_import_class = valueobjects_pymodule.getattr("DirectImport").unwrap(); + + let pyset = PySet::empty(py).unwrap(); + + for rust_import in rust_imports { + let importer = py_module_class.call1((&rust_import.importer,)).unwrap(); + let imported = py_module_class.call1((&rust_import.imported,)).unwrap(); + let kwargs = PyDict::new(py); + kwargs.set_item("importer", &importer).unwrap(); + kwargs.set_item("imported", &imported).unwrap(); + kwargs + .set_item("line_number", rust_import.line_number) + .unwrap(); + kwargs + .set_item("line_contents", &rust_import.line_contents) + .unwrap(); + let py_direct_import = py_direct_import_class.call((), Some(&kwargs)).unwrap(); + pyset.add(&py_direct_import).unwrap(); + } + + pyset +} + +fn _lookup_found_package_for_module<'b>( + module: &Module, + found_packages: &'b HashSet, +) -> &'b FoundPackage { + // TODO: it's probably inefficient to do this every time we look up a module. + for found_package in found_packages { + for module_file in &found_package.module_files { + if module_file.module == *module { + return found_package; + } + } + } + panic!("Could not lookup found package for module {module}"); +} + +#[allow(clippy::borrowed_box)] +fn _determine_module_filename( + module: &Module, + found_package: &FoundPackage, + file_system: &Box, +) -> io::Result { + // TODO: could we do all this with &str instead of String? + let top_level_components: Vec = + found_package.name.split(".").map(str::to_string).collect(); + let module_components: Vec = module.name.split(".").map(str::to_string).collect(); + let leaf_components = module_components[top_level_components.len()..].to_vec(); + + let mut filename_root_components: Vec = vec![found_package.directory.clone()]; + filename_root_components.extend(leaf_components); + let filename_root = file_system.join(filename_root_components); + let normal_module_path = format!("{filename_root}.py"); + let init_module_path = file_system.join(vec![filename_root, "__init__.py".to_string()]); + let candidate_filenames = [normal_module_path, init_module_path]; + for candidate_filename in candidate_filenames { + if file_system.exists(&candidate_filename) { + return Ok(candidate_filename); + } + } + Err(io::Error::new( + ErrorKind::NotFound, + "Could not find module {module}.", + )) +} + +#[allow(clippy::borrowed_box)] +fn _module_is_package( + module_filename: &str, + file_system: &Box, +) -> bool { + file_system.split(module_filename).1 == "__init__.py" +} + +fn _get_absolute_imported_object_name( + module: &Module, + is_package: bool, + imported_object_name: &str, +) -> String { + let leading_dots_count = count_leading_dots(imported_object_name); + if leading_dots_count == 0 { + return imported_object_name.to_string(); + } + let imported_object_name_base: String; + if is_package { + if leading_dots_count == 1 { + imported_object_name_base = module.name.clone(); + } else { + let parts: Vec<&str> = module.name.split('.').collect(); + imported_object_name_base = parts[0..parts.len() - leading_dots_count + 1].join("."); + } + } else { + let parts: Vec<&str> = module.name.split('.').collect(); + imported_object_name_base = parts[0..parts.len() - leading_dots_count].join("."); + } + + format!( + "{}.{}", + imported_object_name_base, + &imported_object_name[leading_dots_count..] + ) +} + +fn _get_internal_module( + imported_object_name: &str, + all_modules: &HashSet, +) -> Option { + let candidate_module = Module { + name: imported_object_name.to_string(), + }; + if all_modules.contains(&candidate_module) { + return Some(candidate_module); + } + if let Some((parent_name, _)) = imported_object_name.rsplit_once('.') { + let parent = Module { + name: parent_name.to_string(), + }; + if all_modules.contains(&parent) { + return Some(parent); + } + } + None +} + +/// Given a module that we already know is external, turn it into a module to add to the graph. +// +// The 'distillation' process involves removing any unwanted subpackages. For example, +// django.models.db should be turned into simply django. +// The process is more complex for potential namespace packages, as it's not possible to +// determine the portion package simply from name. Rather than adding the overhead of a +// filesystem read, we just get the shallowest component that does not clash with an internal +// module namespace. Take, for example, foo.blue.alpha.one. If one of the found +// packages is foo.blue.beta, the module will be distilled to foo.blue.alpha. +// Alternatively, if the found package is foo.green, the distilled module will +// be foo.blue. +fn _distill_external_module( + module_name: &str, + found_packages: &HashSet, +) -> Option { + let module_root = module_name.split(".").next().unwrap(); + // If it's a module that is a parent of one of the internal packages, return None + // as it doesn't make sense and is probably an import of a namespace package. + for package in found_packages { + if module_is_descendant(&package.name, module_name) { + return None; + } + } + + // If it shares a namespace with an internal module, get the shallowest component that does + // not clash with an internal module namespace. + let mut candidate_portions: HashSet = HashSet::new(); + let mut sorted_found_packages: Vec<&FoundPackage> = found_packages.iter().collect(); + sorted_found_packages.sort_by_key(|package| &package.name); + sorted_found_packages.reverse(); + + for found_package in sorted_found_packages { + let root_module = &found_package.name; + if module_is_descendant(root_module, module_root) { + let mut internal_path_components: Vec<&str> = root_module.split(".").collect(); + let mut external_path_components: Vec<&str> = module_name.split(".").collect(); + let mut external_namespace_components: Vec<&str> = vec![]; + while external_path_components[0] == internal_path_components[0] { + external_namespace_components.push(external_path_components.remove(0)); + internal_path_components.remove(0); + } + external_namespace_components.push(external_path_components[0]); + + candidate_portions.insert(Module { + name: external_namespace_components.join("."), + }); + } + } + + if !candidate_portions.is_empty() { + // If multiple found packages share a namespace with this module, use the deepest one + // as we know that that will be a namespace too. + let deepest_candidate_portion = candidate_portions + .iter() + .sorted_by_key(|portion| portion.name.split(".").collect::>().len()) + .next_back() + .unwrap(); + Some(deepest_candidate_portion.name.clone()) + } else { + Some(module_name.split('.').next().unwrap().to_string()) + } +} diff --git a/rust/src/lib.rs b/rust/src/lib.rs index 67fa5ab3..3fd3cd92 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -12,7 +12,10 @@ use crate::exceptions::{InvalidModuleExpression, ModuleNotPresent, NoSuchContain use crate::filesystem::{PyFakeBasicFileSystem, PyRealBasicFileSystem}; use crate::graph::higher_order_queries::Level; use crate::graph::{Graph, Module, ModuleIterator, ModuleTokenIterator}; -use crate::import_scanning::ImportScanner; +use crate::import_scanning::{ + get_file_system_boxed, py_found_packages_to_rust, scan_for_imports_no_py, + to_py_direct_imports, +}; use crate::module_expressions::ModuleExpression; use derive_new::new; use itertools::Itertools; @@ -26,12 +29,10 @@ use std::collections::HashSet; #[pymodule] fn _rustgrimp(py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> { - m.add_wrapped(wrap_pyfunction!(parse_imported_objects_from_code))?; m.add_wrapped(wrap_pyfunction!(scan_for_imports))?; m.add_class::()?; m.add_class::()?; m.add_class::()?; - m.add_class::()?; m.add("ModuleNotPresent", py.get_type::())?; m.add("NoSuchContainer", py.get_type::())?; m.add( @@ -42,57 +43,82 @@ fn _rustgrimp(py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> { Ok(()) } +/// Statically analyses the given module and returns a set of Modules that +/// it imports. +/// Python args: +/// +/// - module_files The modules to scan. +/// - found_packages: Set of FoundPackages containing all the modules +/// for analysis. +/// - include_external_packages: Whether to include imports of external modules (i.e. +/// modules not contained in modules_by_package_directory) +/// in the results. +/// - exclude_type_checking_imports: If True, don't include imports behind TYPE_CHECKING guards. +/// - file_system: The file system interface to use. (A BasicFileSystem.) +/// +/// Returns dict[Module, set[DirectImport]]. #[pyfunction] -fn scan_for_imports<'py,>( +fn scan_for_imports<'py>( py: Python<'py>, module_files: Vec>, found_packages: Bound<'py, PyAny>, include_external_packages: bool, exclude_type_checking_imports: bool, file_system: Bound<'py, PyAny>, -) -> PyResult>{ - let scanner = ImportScanner::new( - py, - file_system, - found_packages, +) -> PyResult> { + let valueobjects_pymodule = PyModule::import(py, "grimp.domain.valueobjects").unwrap(); + let py_module_class = valueobjects_pymodule.getattr("Module").unwrap(); + let file_system_boxed = get_file_system_boxed(&file_system)?; + let found_packages_rust = py_found_packages_to_rust(&found_packages); + let modules_rust: HashSet = module_files + .iter() + .map(|module_file| { + module_file + .getattr("module") + .unwrap() + .extract::() + .unwrap() + }) + .collect(); + + let imports_by_module_result = scan_for_imports_no_py( + &file_system_boxed, + &found_packages_rust, include_external_packages, - )?; - - let dict = PyDict::new(py); - for module_file in module_files { - let py_module_instance = module_file.getattr("module").unwrap(); - let imports = scanner.scan_for_imports( - py, - py_module_instance, - exclude_type_checking_imports, - )?; - dict.set_item(module_file, imports).unwrap(); + &modules_rust, + exclude_type_checking_imports, + ); + + match imports_by_module_result { + Err(GrimpError::ParseError { + module_filename, line_number, text, .. + }) => { + // TODO: define SourceSyntaxError using pyo3. + let exceptions_pymodule = PyModule::import(py, "grimp.exceptions").unwrap(); + let py_exception_class = exceptions_pymodule.getattr("SourceSyntaxError").unwrap(); + let exception = py_exception_class + .call1((module_filename, line_number, text)) + .unwrap(); + return Err(PyErr::from_value(exception)); + } + Err(e) => { + return Err(e.into()); + } + _ => (), } - Ok(dict) -} + let imports_by_module = imports_by_module_result.unwrap(); -#[pyfunction] -fn parse_imported_objects_from_code<'py>( - py: Python<'py>, - module_code: &str, -) -> PyResult>> { - let imports = import_parsing::parse_imports_from_code(module_code)?; - - Ok(imports - .into_iter() - .map(|import| { - let dict = PyDict::new(py); - dict.set_item("name", import.name).unwrap(); - dict.set_item("line_number", import.line_number).unwrap(); - dict.set_item("line_contents", import.line_contents) - .unwrap(); - dict.set_item("typechecking_only", import.typechecking_only) - .unwrap(); - dict - }) - .collect()) + let imports_by_module_py = PyDict::new(py); + for (module, imports) in imports_by_module.iter() { + let py_module_instance = py_module_class.call1((module.name.clone(),)).unwrap(); + let py_imports = to_py_direct_imports(py, imports); + imports_by_module_py.set_item(py_module_instance, py_imports).unwrap(); + } + + Ok(imports_by_module_py) } + #[pyclass(name = "Graph")] struct GraphWrapper { _graph: Graph, diff --git a/src/grimp/application/scanning.py b/src/grimp/application/scanning.py index f7b4ba96..ed6fdca1 100644 --- a/src/grimp/application/scanning.py +++ b/src/grimp/application/scanning.py @@ -5,7 +5,7 @@ import joblib # type: ignore from grimp import _rustgrimp as rust # type: ignore[attr-defined] -from grimp.domain.valueobjects import DirectImport +from grimp.domain.valueobjects import DirectImport, Module from grimp.application.config import settings from grimp.application.ports.filesystem import AbstractFileSystem from grimp.application.ports.modulefinder import ModuleFile, FoundPackage @@ -71,7 +71,7 @@ def _scan_chunk( ) -> Dict[ModuleFile, Set[DirectImport]]: file_system: AbstractFileSystem = settings.FILE_SYSTEM basic_file_system = file_system.convert_to_basic() - return rust.scan_for_imports( + imports_by_module: dict[Module, set[DirectImport]] = rust.scan_for_imports( module_files=chunk, found_packages=found_packages, # Ensure that the passed exclude_type_checking_imports is definitely a boolean, @@ -80,6 +80,7 @@ def _scan_chunk( exclude_type_checking_imports=exclude_type_checking_imports, file_system=basic_file_system, ) + return {module_file: imports_by_module[module_file.module] for module_file in chunk} def _scan_chunks(