diff --git a/rust/src/caching.rs b/rust/src/caching.rs index cb00b417..84a64bad 100644 --- a/rust/src/caching.rs +++ b/rust/src/caching.rs @@ -2,10 +2,34 @@ use crate::errors::{GrimpError, GrimpResult}; use crate::filesystem::get_file_system_boxed; use crate::import_scanning::{DirectImport, imports_by_module_to_py}; use crate::module_finding::Module; -use pyo3::types::PyDict; -use pyo3::{Bound, PyAny, PyResult, Python, pyfunction}; +use pyo3::types::PyAnyMethods; +use pyo3::types::{PyDict, PySet}; +use pyo3::types::{PyDictMethods, PySetMethods}; +use pyo3::{Bound, FromPyObject, PyAny, PyResult, Python, pyfunction}; use std::collections::{HashMap, HashSet}; +/// Writes the cache file containing all the imports for a given package. +/// Args: +/// - filename: str +/// - imports_by_module: dict[Module, Set[DirectImport]] +/// - file_system: The file system interface to use. (A BasicFileSystem.) +#[pyfunction] +pub fn write_cache_data_map_file<'py>( + filename: &str, + imports_by_module: Bound<'py, PyDict>, + file_system: Bound<'py, PyAny>, +) -> PyResult<()> { + let mut file_system_boxed = get_file_system_boxed(&file_system)?; + + let ImportsByModule(imports_by_module_rust) = imports_by_module.extract()?; + + let file_contents = serialize_imports_by_module(&imports_by_module_rust); + + file_system_boxed.write(filename, &file_contents)?; + + Ok(()) +} + /// Reads the cache file containing all the imports for a given package. /// Args: /// - filename: str @@ -26,6 +50,52 @@ pub fn read_cache_data_map_file<'py>( Ok(imports_by_module_to_py(py, imports_by_module)) } +/// A newtype wrapper for HashMap> that implements FromPyObject. +pub struct ImportsByModule(pub HashMap>); + +impl<'py> FromPyObject<'py> for ImportsByModule { + fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult { + let py_dict = ob.downcast::()?; + let mut imports_by_module_rust = HashMap::new(); + + for (py_key, py_value) in py_dict.iter() { + let module: Module = py_key.extract()?; + let py_set = py_value.downcast::()?; + let mut hashset: HashSet = HashSet::new(); + for element in py_set.iter() { + let direct_import: DirectImport = element.extract()?; + hashset.insert(direct_import); + } + imports_by_module_rust.insert(module, hashset); + } + + Ok(ImportsByModule(imports_by_module_rust)) + } +} + +fn serialize_imports_by_module( + imports_by_module: &HashMap>, +) -> String { + let raw_map: HashMap<&str, Vec<(&str, usize, &str)>> = imports_by_module + .iter() + .map(|(module, imports)| { + let imports_vec: Vec<(&str, usize, &str)> = imports + .iter() + .map(|import| { + ( + import.imported.as_str(), + import.line_number, + import.line_contents.as_str(), + ) + }) + .collect(); + (module.name.as_str(), imports_vec) + }) + .collect(); + + serde_json::to_string(&raw_map).expect("Failed to serialize to JSON") +} + pub fn parse_json_to_map( json_str: &str, filename: &str, diff --git a/rust/src/filesystem.rs b/rust/src/filesystem.rs index 4b54781c..c234aa64 100644 --- a/rust/src/filesystem.rs +++ b/rust/src/filesystem.rs @@ -5,8 +5,10 @@ use regex::Regex; use std::collections::HashMap; use std::ffi::OsStr; use std::fs; +use std::fs::File; +use std::io::prelude::*; use std::path::{Path, PathBuf}; -use std::sync::LazyLock; +use std::sync::{Arc, LazyLock, Mutex}; use unindent::unindent; static ENCODING_RE: LazyLock = @@ -22,17 +24,19 @@ pub trait FileSystem: Send + Sync { fn exists(&self, file_name: &str) -> bool; fn read(&self, file_name: &str) -> PyResult; + + fn write(&mut self, file_name: &str, contents: &str) -> PyResult<()>; } #[derive(Clone)] #[pyclass] -pub struct RealBasicFileSystem {} +struct RealBasicFileSystem {} // Implements a BasicFileSystem (defined in grimp.application.ports.filesystem.BasicFileSystem) // that actually reads files. #[pyclass(name = "RealBasicFileSystem")] pub struct PyRealBasicFileSystem { - pub inner: RealBasicFileSystem, + inner: RealBasicFileSystem, } impl FileSystem for RealBasicFileSystem { @@ -129,6 +133,16 @@ impl FileSystem for RealBasicFileSystem { }) } } + + fn write(&mut self, file_name: &str, contents: &str) -> PyResult<()> { + let file_path: PathBuf = file_name.into(); + if let Some(patent_dir) = file_path.parent() { + fs::create_dir_all(patent_dir)?; + } + File::create(file_path)? + .write_all(contents.as_bytes()) + .map_err(Into::into) + } } #[pymethods] @@ -161,19 +175,23 @@ impl PyRealBasicFileSystem { fn read(&self, file_name: &str) -> PyResult { self.inner.read(file_name) } + + fn write(&mut self, file_name: &str, contents: &str) -> PyResult<()> { + self.inner.write(file_name, contents) + } } type FileSystemContents = HashMap; #[derive(Clone)] -pub struct FakeBasicFileSystem { - contents: Box, +struct FakeBasicFileSystem { + contents: Arc>, } // Implements BasicFileSystem (defined in grimp.application.ports.filesystem.BasicFileSystem). #[pyclass(name = "FakeBasicFileSystem")] pub struct PyFakeBasicFileSystem { - pub inner: FakeBasicFileSystem, + inner: FakeBasicFileSystem, } impl FakeBasicFileSystem { @@ -190,7 +208,7 @@ impl FakeBasicFileSystem { parsed_contents.extend(unindented_map); }; Ok(FakeBasicFileSystem { - contents: Box::new(parsed_contents), + contents: Arc::new(Mutex::new(parsed_contents)), }) } } @@ -232,17 +250,25 @@ impl FileSystem for FakeBasicFileSystem { /// Checks if a file or directory exists within the file system. fn exists(&self, file_name: &str) -> bool { - self.contents.contains_key(file_name) + self.contents.lock().unwrap().contains_key(file_name) } fn read(&self, file_name: &str) -> PyResult { - match self.contents.get(file_name) { - Some(file_name) => Ok(file_name.clone()), + let contents = self.contents.lock().unwrap(); + match contents.get(file_name) { + Some(file_contents) => Ok(file_contents.clone()), None => Err(PyFileNotFoundError::new_err(format!( "No such file: {file_name}" ))), } } + + #[allow(unused_variables)] + fn write(&mut self, file_name: &str, contents: &str) -> PyResult<()> { + let mut contents_mut = self.contents.lock().unwrap(); + contents_mut.insert(file_name.to_string(), contents.to_string()); + Ok(()) + } } #[pymethods] @@ -278,6 +304,10 @@ impl PyFakeBasicFileSystem { self.inner.read(file_name) } + fn write(&mut self, file_name: &str, contents: &str) -> PyResult<()> { + self.inner.write(file_name, contents) + } + // Temporary workaround method for Python tests. fn convert_to_basic(&self) -> PyResult { Ok(PyFakeBasicFileSystem { @@ -289,7 +319,7 @@ impl PyFakeBasicFileSystem { /// Parses an indented string representing a file system structure /// into a HashMap where keys are full file paths. /// See tests.adaptors.filesystem.FakeFileSystem for the API. -pub fn parse_indented_file_system_string(file_system_string: &str) -> HashMap { +fn parse_indented_file_system_string(file_system_string: &str) -> HashMap { let mut file_paths_map: HashMap = HashMap::new(); let mut path_stack: Vec = Vec::new(); // Stores current directory path components let mut first_line = true; // Flag to handle the very first path component @@ -381,7 +411,6 @@ pub fn get_file_system_boxed<'py>( file_system: &Bound<'py, PyAny>, ) -> PyResult> { let file_system_boxed: Box; - if let Ok(py_real) = file_system.extract::>() { file_system_boxed = Box::new(py_real.inner.clone()); } else if let Ok(py_fake) = file_system.extract::>() { @@ -391,5 +420,6 @@ pub fn get_file_system_boxed<'py>( "file_system must be an instance of RealBasicFileSystem or FakeBasicFileSystem", )); } + Ok(file_system_boxed) } diff --git a/rust/src/import_scanning.rs b/rust/src/import_scanning.rs index 0719c035..dee2a420 100644 --- a/rust/src/import_scanning.rs +++ b/rust/src/import_scanning.rs @@ -18,7 +18,23 @@ pub struct DirectImport { pub line_contents: String, } -pub fn py_found_packages_to_rust(py_found_packages: &Bound<'_, PyAny>) -> HashSet { +impl<'py> FromPyObject<'py> for DirectImport { + fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult { + let importer: String = ob.getattr("importer")?.getattr("name")?.extract()?; + let imported: String = ob.getattr("imported")?.getattr("name")?.extract()?; + let line_number: usize = ob.getattr("line_number")?.extract()?; + let line_contents: String = ob.getattr("line_contents")?.extract()?; + + Ok(DirectImport { + importer, + imported, + line_number, + line_contents, + }) + } +} + +fn py_found_packages_to_rust(py_found_packages: &Bound<'_, PyAny>) -> HashSet { let py_set = py_found_packages .downcast::() .expect("Expected py_found_packages to be a Python set."); @@ -36,7 +52,7 @@ pub fn py_found_packages_to_rust(py_found_packages: &Bound<'_, PyAny>) -> HashSe rust_found_packages } -pub fn get_modules_from_found_packages(found_packages: &HashSet) -> HashSet { +fn get_modules_from_found_packages(found_packages: &HashSet) -> HashSet { let mut modules = HashSet::new(); for package in found_packages { for module_file in &package.module_files { @@ -57,7 +73,7 @@ fn module_is_descendant(module_name: &str, potential_ancestor: &str) -> bool { /// Statically analyses the given module and returns a set of Modules that /// it imports. #[allow(clippy::borrowed_box)] -pub fn scan_for_imports_no_py( +fn scan_for_imports_no_py( file_system: &Box, found_packages: &HashSet, include_external_packages: bool, @@ -153,7 +169,7 @@ fn scan_for_imports_no_py_single_module( Ok(imports) } -pub fn to_py_direct_imports<'a>( +fn to_py_direct_imports<'a>( py: Python<'a>, rust_imports: &HashSet, ) -> Bound<'a, PySet> { diff --git a/rust/src/lib.rs b/rust/src/lib.rs index e0453f26..e63b501b 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -18,6 +18,9 @@ mod _rustgrimp { #[pymodule_export] use crate::caching::read_cache_data_map_file; + #[pymodule_export] + use crate::caching::write_cache_data_map_file; + #[pymodule_export] use crate::graph::GraphWrapper; diff --git a/src/grimp/adaptors/caching.py b/src/grimp/adaptors/caching.py index 0782923c..36310272 100644 --- a/src/grimp/adaptors/caching.py +++ b/src/grimp/adaptors/caching.py @@ -4,7 +4,7 @@ import logging from typing import Optional -from grimp.application.ports.filesystem import AbstractFileSystem +from grimp.application.ports.filesystem import BasicFileSystem from grimp.application.ports.modulefinder import FoundPackage, ModuleFile from grimp.domain.valueobjects import DirectImport, Module @@ -77,7 +77,7 @@ def __init__(self, *args, namer: type[CacheFileNamer], **kwargs) -> None: @classmethod def setup( cls, - file_system: AbstractFileSystem, + file_system: BasicFileSystem, found_packages: set[FoundPackage], include_external_packages: bool, exclude_type_checking_imports: bool = False, @@ -122,22 +122,6 @@ def write( ) -> None: self._write_marker_files_if_not_already_there() # Write data file. - primitives_map: PrimitiveFormat = {} - for found_package in self.found_packages: - primitives_map_for_found_package: PrimitiveFormat = { - module_file.module.name: [ - ( - direct_import.imported.name, - direct_import.line_number, - direct_import.line_contents, - ) - for direct_import in imports_by_module[module_file.module] - ] - for module_file in found_package.module_files - } - primitives_map.update(primitives_map_for_found_package) - - serialized = json.dumps(primitives_map) data_cache_filename = self.file_system.join( self.cache_dir, self._namer.make_data_file_name( @@ -146,7 +130,12 @@ def write( exclude_type_checking_imports=self.exclude_type_checking_imports, ), ) - self.file_system.write(data_cache_filename, serialized) + rust.write_cache_data_map_file( + filename=data_cache_filename, + imports_by_module=imports_by_module, + file_system=self.file_system, + ) + logger.info(f"Wrote data cache file {data_cache_filename}.") # Write meta files. @@ -202,7 +191,7 @@ def _read_data_map_file(self) -> dict[Module, set[DirectImport]]: ) try: imports_by_module = rust.read_cache_data_map_file( - data_cache_filename, self.file_system.convert_to_basic() + data_cache_filename, self.file_system ) except FileNotFoundError: logger.info(f"No cache file: {data_cache_filename}.") diff --git a/src/grimp/application/ports/caching.py b/src/grimp/application/ports/caching.py index e04a1362..c271ddd4 100644 --- a/src/grimp/application/ports/caching.py +++ b/src/grimp/application/ports/caching.py @@ -1,7 +1,7 @@ from grimp.application.ports.modulefinder import FoundPackage, ModuleFile from grimp.domain.valueobjects import DirectImport, Module -from .filesystem import AbstractFileSystem +from .filesystem import BasicFileSystem class CacheMiss(Exception): @@ -11,7 +11,7 @@ class CacheMiss(Exception): class Cache: def __init__( self, - file_system: AbstractFileSystem, + file_system: BasicFileSystem, include_external_packages: bool, exclude_type_checking_imports: bool, found_packages: set[FoundPackage], @@ -29,7 +29,7 @@ def __init__( @classmethod def setup( cls, - file_system: AbstractFileSystem, + file_system: BasicFileSystem, found_packages: set[FoundPackage], *, include_external_packages: bool, diff --git a/src/grimp/application/ports/filesystem.py b/src/grimp/application/ports/filesystem.py index 6597f8ac..268b1efe 100644 --- a/src/grimp/application/ports/filesystem.py +++ b/src/grimp/application/ports/filesystem.py @@ -93,7 +93,7 @@ def convert_to_basic(self) -> BasicFileSystem: class BasicFileSystem(Protocol): """ - A more limited file system, used by the Rust-based scan_for_imports function. + A more limited file system. Having two different file system APIs is an interim approach, allowing us to implement BasicFileSystem in Rust without needing to implement the full range @@ -109,4 +109,6 @@ def split(self, file_name: str) -> tuple[str, str]: ... def read(self, file_name: str) -> str: ... + def write(self, file_name: str, contents: str) -> None: ... + def exists(self, file_name: str) -> bool: ... diff --git a/src/grimp/application/usecases.py b/src/grimp/application/usecases.py index ef5b858a..e7503d29 100644 --- a/src/grimp/application/usecases.py +++ b/src/grimp/application/usecases.py @@ -7,7 +7,7 @@ from .scanning import scan_imports from ..application.ports import caching -from ..application.ports.filesystem import AbstractFileSystem +from ..application.ports.filesystem import AbstractFileSystem, BasicFileSystem from ..application.graph import ImportGraph from ..application.ports.modulefinder import AbstractModuleFinder, FoundPackage, ModuleFile from ..application.ports.packagefinder import AbstractPackageFinder @@ -57,7 +57,7 @@ def build_graph( imports_by_module = _scan_packages( found_packages=found_packages, - file_system=file_system, + file_system=file_system.convert_to_basic(), include_external_packages=include_external_packages, exclude_type_checking_imports=exclude_type_checking_imports, cache_dir=cache_dir, @@ -102,7 +102,7 @@ def _validate_package_names_are_strings( def _scan_packages( found_packages: set[FoundPackage], - file_system: AbstractFileSystem, + file_system: BasicFileSystem, include_external_packages: bool, exclude_type_checking_imports: bool, cache_dir: str | type[NotSupplied] | None, diff --git a/tests/unit/adaptors/test_caching.py b/tests/unit/adaptors/test_caching.py index 77673520..3f6576db 100644 --- a/tests/unit/adaptors/test_caching.py +++ b/tests/unit/adaptors/test_caching.py @@ -7,7 +7,7 @@ from grimp.application.ports.caching import CacheMiss from grimp.application.ports.modulefinder import FoundPackage, ModuleFile from grimp.domain.valueobjects import DirectImport, Module -from tests.adaptors.filesystem import FakeFileSystem +from grimp import _rustgrimp as rust # type: ignore[attr-defined] class SimplisticFileNamer(CacheFileNamer): @@ -124,7 +124,7 @@ def test_make_data_file_unique_string( class TestCache: SOME_MTIME = 1676645081.4935088 - FILE_SYSTEM = FakeFileSystem( + FILE_SYSTEM = rust.FakeBasicFileSystem( contents=""" .grimp_cache/ mypackage.meta.json @@ -249,7 +249,7 @@ def test_logs_missing_cache_files(self, caplog): caplog.set_level(logging.INFO, logger=Cache.__module__) Cache.setup( - file_system=FakeFileSystem(), # No cache files. + file_system=rust.FakeBasicFileSystem(), # No cache files. found_packages=self.FOUND_PACKAGES, namer=SimplisticFileNamer, include_external_packages=False, @@ -264,7 +264,7 @@ def test_logs_missing_cache_files(self, caplog): def test_logs_corrupt_cache_meta_file_reading(self, serialized_mtime: str, caplog): caplog.set_level(logging.WARNING, logger=Cache.__module__) - file_system = FakeFileSystem( + file_system = rust.FakeBasicFileSystem( contents=""" .grimp_cache/ mypackage.meta.json @@ -292,7 +292,7 @@ def test_logs_corrupt_cache_meta_file_reading(self, serialized_mtime: str, caplo def test_logs_corrupt_cache_data_file_reading(self, caplog): caplog.set_level(logging.WARNING, logger=Cache.__module__) - file_system = FakeFileSystem( + file_system = rust.FakeBasicFileSystem( contents=""" .grimp_cache/ mypackage.meta.json @@ -392,7 +392,7 @@ def test_uses_cache_for_module_with_same_mtime( ) def test_raises_cache_miss_for_missing_module_from_data(self): - file_system = FakeFileSystem( + file_system = rust.FakeBasicFileSystem( contents=""" .grimp_cache/ mypackage.meta.json @@ -427,7 +427,7 @@ def test_raises_cache_miss_for_missing_module_from_data(self): @pytest.mark.parametrize("serialized_mtime", ("INVALID_JSON", '["wrong", "type"]')) def test_raises_cache_miss_for_corrupt_meta_file(self, serialized_mtime): - file_system = FakeFileSystem( + file_system = rust.FakeBasicFileSystem( contents=""" .grimp_cache/ mypackage.meta.json @@ -463,7 +463,7 @@ def test_raises_cache_miss_for_corrupt_meta_file(self, serialized_mtime): @pytest.mark.parametrize("serialized_import", ("INVALID_JSON", '["wrong", "type"]')) def test_raises_cache_miss_for_corrupt_data_file(self, serialized_import): - file_system = FakeFileSystem( + file_system = rust.FakeBasicFileSystem( contents=""" .grimp_cache/ mypackage.meta.json @@ -553,7 +553,14 @@ def test_uses_cache_multiple_packages( | expected_additional_imports ) - @pytest.mark.parametrize("cache_dir", ("/tmp/some-cache-dir", "/tmp/some-cache-dir/", None)) + @pytest.mark.parametrize( + "cache_dir", + ( + "/tmp/some-cache-dir", + "/tmp/some-cache-dir/", + None, + ), + ) @pytest.mark.parametrize( "include_external_packages, expected_data_file_name", ( @@ -565,7 +572,7 @@ def test_write_to_cache( self, include_external_packages, expected_data_file_name, cache_dir, caplog ): caplog.set_level(logging.INFO, logger=Cache.__module__) - file_system = FakeFileSystem() + file_system = rust.FakeBasicFileSystem() blue_one = Module(name="blue.one") blue_two = Module(name="blue.two") green_one = Module(name="green.one") @@ -672,7 +679,7 @@ def test_write_to_cache( def test_write_to_cache_adds_marker_files(self): some_cache_dir = "/tmp/some-cache-dir" - file_system = FakeFileSystem() + file_system = rust.FakeBasicFileSystem() cache = Cache.setup( file_system=file_system, cache_dir=some_cache_dir, diff --git a/tests/unit/adaptors/test_filesystem.py b/tests/unit/adaptors/test_filesystem.py index 6644b7bc..e769902b 100644 --- a/tests/unit/adaptors/test_filesystem.py +++ b/tests/unit/adaptors/test_filesystem.py @@ -147,6 +147,14 @@ def test_read(self, file_name, expected_contents): else: assert file_system.read(file_name) == expected_contents + def test_write(self): + some_filename, some_contents = "path/to/some-file.txt", "Some contents." + file_system = self.file_system_cls() + + file_system.write(some_filename, some_contents) + + assert file_system.read(some_filename) == some_contents + class TestFakeFileSystem(_Base): file_system_cls = FakeFileSystem