Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 77 additions & 1 deletion rust/src/caching.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,36 @@ use crate::errors::{GrimpError, GrimpResult};
use crate::filesystem::get_file_system_boxed;
use crate::import_scanning::{DirectImport, imports_by_module_to_py};
use crate::module_finding::Module;
use pyo3::types::PyDict;
use pyo3::types::PyAnyMethods;
use pyo3::types::{PyDict, PySet};
use pyo3::types::{PyDictMethods, PySetMethods};
use pyo3::{Bound, PyAny, PyResult, Python, pyfunction};
use std::collections::{HashMap, HashSet};
use serde_json;

/// Writes the cache file containing all the imports for a given package.
/// Args:
/// - filename: str
/// - imports_by_module: dict[Module, Set[DirectImport]]
/// - file_system: The file system interface to use. (A BasicFileSystem.)
#[pyfunction]
pub fn write_cache_data_map_file<'py>(
py: Python<'py>,
filename: &str,
imports_by_module: Bound<'py, PyDict>,
file_system: Bound<'py, PyAny>,
) -> PyResult<()> {
eprintln!("About to clone for write.");
let mut file_system_boxed = get_file_system_boxed(&file_system)?;

let imports_by_module_rust = imports_by_module_to_rust(imports_by_module);

let file_contents = serialize_imports_by_module(&imports_by_module_rust);

file_system_boxed.write(filename, &file_contents)?;

Ok(())
}

/// Reads the cache file containing all the imports for a given package.
/// Args:
Expand All @@ -17,6 +44,7 @@ pub fn read_cache_data_map_file<'py>(
filename: &str,
file_system: Bound<'py, PyAny>,
) -> PyResult<Bound<'py, PyDict>> {
eprintln!("About to clone for read.");
let file_system_boxed = get_file_system_boxed(&file_system)?;

let file_contents = file_system_boxed.read(filename)?;
Expand All @@ -26,6 +54,54 @@ pub fn read_cache_data_map_file<'py>(
Ok(imports_by_module_to_py(py, imports_by_module))
}

#[allow(unused_variables)]
fn imports_by_module_to_rust(
imports_by_module_py: Bound<PyDict>,
) -> HashMap<Module, HashSet<DirectImport>> {
let mut imports_by_module_rust = HashMap::new();

for (py_key, py_value) in imports_by_module_py.iter() {
let module: Module = py_key.extract().unwrap();
let py_set = py_value
.downcast::<PySet>()
.expect("Expected value to be a Python set.");
let mut hashset: HashSet<DirectImport> = HashSet::new();
for element in py_set.iter() {
let direct_import: DirectImport = element
.extract()
.expect("Expected value to be DirectImport.");
hashset.insert(direct_import);
}
imports_by_module_rust.insert(module, hashset);
}

imports_by_module_rust
}

#[allow(unused_variables)]
fn serialize_imports_by_module(
imports_by_module: &HashMap<Module, HashSet<DirectImport>>,
) -> String {
let raw_map: HashMap<&str, Vec<(&str, usize, &str)>> = imports_by_module
.iter()
.map(|(module, imports)| {
let imports_vec: Vec<(&str, usize, &str)> = imports
.iter()
.map(|import| {
(
import.imported.as_str(),
import.line_number,
import.line_contents.as_str(),
)
})
.collect();
(module.name.as_str(), imports_vec)
})
.collect();

serde_json::to_string(&raw_map).expect("Failed to serialize to JSON")
}

pub fn parse_json_to_map(
json_str: &str,
filename: &str,
Expand Down
31 changes: 30 additions & 1 deletion rust/src/filesystem.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
use std::io::prelude::*;
use itertools::Itertools;
use std::fs::File;
use pyo3::exceptions::{PyFileNotFoundError, PyTypeError, PyUnicodeDecodeError};
use pyo3::prelude::*;
use regex::Regex;
Expand All @@ -22,6 +24,8 @@ pub trait FileSystem: Send + Sync {
fn exists(&self, file_name: &str) -> bool;

fn read(&self, file_name: &str) -> PyResult<String>;

fn write(&mut self, file_name: &str, contents: &str) -> PyResult<()>;
}

#[derive(Clone)]
Expand Down Expand Up @@ -129,6 +133,12 @@ impl FileSystem for RealBasicFileSystem {
})
}
}

fn write(&mut self, file_name: &str, contents: &str) -> PyResult<()> {
let mut file = File::create(file_name)?;
file.write_all(contents.as_bytes())?;
Ok(())
}
}

#[pymethods]
Expand Down Expand Up @@ -161,6 +171,10 @@ impl PyRealBasicFileSystem {
fn read(&self, file_name: &str) -> PyResult<String> {
self.inner.read(file_name)
}

fn write(&mut self, file_name: &str, contents: &str) -> PyResult<()> {
self.inner.write(file_name, contents)
}
}

type FileSystemContents = HashMap<String, String>;
Expand Down Expand Up @@ -236,13 +250,23 @@ impl FileSystem for FakeBasicFileSystem {
}

fn read(&self, file_name: &str) -> PyResult<String> {
eprintln!("{:?}", &self.contents.keys());
match self.contents.get(file_name) {
Some(file_name) => Ok(file_name.clone()),
None => Err(PyFileNotFoundError::new_err(format!(
"No such file: {file_name}"
))),
}
}

#[allow(unused_variables)]
fn write(&mut self, file_name: &str, contents: &str) -> PyResult<()> {
eprintln!("Writing into fake {}, {}", file_name, contents);
eprintln!("Contents currently {:?}", self.contents);
self.contents.insert(file_name.to_string(), contents.to_string());
eprintln!("Contents now {:?}", self.contents);
Ok(())
}
}

#[pymethods]
Expand Down Expand Up @@ -278,6 +302,10 @@ impl PyFakeBasicFileSystem {
self.inner.read(file_name)
}

fn write(&mut self, file_name: &str, contents: &str) -> PyResult<()> {
self.inner.write(file_name, contents)
}

// Temporary workaround method for Python tests.
fn convert_to_basic(&self) -> PyResult<Self> {
Ok(PyFakeBasicFileSystem {
Expand Down Expand Up @@ -381,7 +409,7 @@ pub fn get_file_system_boxed<'py>(
file_system: &Bound<'py, PyAny>,
) -> PyResult<Box<dyn FileSystem + Send + Sync>> {
let file_system_boxed: Box<dyn FileSystem + Send + Sync>;

eprintln!("Cloning file system.");
if let Ok(py_real) = file_system.extract::<PyRef<PyRealBasicFileSystem>>() {
file_system_boxed = Box::new(py_real.inner.clone());
} else if let Ok(py_fake) = file_system.extract::<PyRef<PyFakeBasicFileSystem>>() {
Expand All @@ -391,5 +419,6 @@ pub fn get_file_system_boxed<'py>(
"file_system must be an instance of RealBasicFileSystem or FakeBasicFileSystem",
));
}

Ok(file_system_boxed)
}
23 changes: 20 additions & 3 deletions rust/src/import_scanning.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
use crate::errors::GrimpResult;
use crate::filesystem::FileSystem;
use crate::module_finding::{FoundPackage, Module};
use crate::module_finding::{FoundPackage, Module, ModuleFile};
use crate::{import_parsing, module_finding};
use itertools::Itertools;
use pyo3::prelude::*;
use pyo3::types::{PyDict, PySet};
use pyo3::types::{PyDict, PyFrozenSet, PySet};
/// Statically analyses some Python modules for import statements within their shared package.
use rayon::prelude::*;
use std::collections::{HashMap, HashSet};
use std::collections::{BTreeSet, HashMap, HashSet};
use std::io::{self, ErrorKind};

#[derive(Debug, Hash, Eq, PartialEq)]
Expand All @@ -18,6 +18,23 @@ pub struct DirectImport {
pub line_contents: String,
}

impl<'py> FromPyObject<'py> for DirectImport {
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
let importer: String = ob.getattr("importer")?.getattr("name")?.extract()?;
let imported: String = ob.getattr("imported")?.getattr("name")?.extract()?;
let line_number: usize = ob.getattr("line_number")?.extract()?;
let line_contents: String = ob.getattr("line_contents")?.extract()?;

Ok(DirectImport {
importer,
imported,
line_number,
line_contents,
})
}
}


pub fn py_found_packages_to_rust(py_found_packages: &Bound<'_, PyAny>) -> HashSet<FoundPackage> {
let py_set = py_found_packages
.downcast::<PySet>()
Expand Down
3 changes: 2 additions & 1 deletion rust/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ mod import_scanning;
pub mod module_expressions;
mod module_finding;

use crate::caching::read_cache_data_map_file;
use crate::caching::{read_cache_data_map_file,write_cache_data_map_file};
use crate::errors::{GrimpError, GrimpResult};
use crate::exceptions::{
CorruptCache, InvalidModuleExpression, ModuleNotPresent, NoSuchContainer, ParseError,
Expand All @@ -32,6 +32,7 @@ use std::collections::HashSet;
#[pymodule]
fn _rustgrimp(py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_wrapped(wrap_pyfunction!(scan_for_imports))?;
m.add_wrapped(wrap_pyfunction!(write_cache_data_map_file))?;
m.add_wrapped(wrap_pyfunction!(read_cache_data_map_file))?;
m.add_class::<GraphWrapper>()?;
m.add_class::<PyRealBasicFileSystem>()?;
Expand Down
1 change: 1 addition & 0 deletions rust/src/module_finding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ impl fmt::Display for Module {
}
}


#[derive(Debug, Clone, PartialEq, Eq, Hash)]
/// Set of modules found under a single package, together with metadata.
pub struct FoundPackage {
Expand Down
35 changes: 22 additions & 13 deletions src/grimp/adaptors/caching.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import logging
from typing import Dict, List, Optional, Set, Tuple, Type

from grimp.application.ports.filesystem import AbstractFileSystem
from grimp.application.ports.filesystem import BasicFileSystem
from grimp.application.ports.modulefinder import FoundPackage, ModuleFile
from grimp.domain.valueobjects import DirectImport, Module

Expand Down Expand Up @@ -77,7 +77,7 @@ def __init__(self, *args, namer: Type[CacheFileNamer], **kwargs) -> None:
@classmethod
def setup(
cls,
file_system: AbstractFileSystem,
file_system: BasicFileSystem,
found_packages: Set[FoundPackage],
include_external_packages: bool,
exclude_type_checking_imports: bool = False,
Expand Down Expand Up @@ -120,8 +120,24 @@ def write(
self,
imports_by_module: Dict[Module, Set[DirectImport]],
) -> None:
self._write_marker_files_if_not_already_there()

# Write data file.
data_cache_filename = self.file_system.join(
self.cache_dir,
self._namer.make_data_file_name(
found_packages=self.found_packages,
include_external_packages=self.include_external_packages,
exclude_type_checking_imports=self.exclude_type_checking_imports,
),
)
# Rust version
rust.write_cache_data_map_file(
filename=data_cache_filename,
imports_by_module=imports_by_module,
file_system=self.file_system,
)
self._write_marker_files_if_not_already_there()
# Python version
primitives_map: PrimitiveFormat = {}
for found_package in self.found_packages:
primitives_map_for_found_package: PrimitiveFormat = {
Expand All @@ -138,15 +154,8 @@ def write(
primitives_map.update(primitives_map_for_found_package)

serialized = json.dumps(primitives_map)
data_cache_filename = self.file_system.join(
self.cache_dir,
self._namer.make_data_file_name(
found_packages=self.found_packages,
include_external_packages=self.include_external_packages,
exclude_type_checking_imports=self.exclude_type_checking_imports,
),
)
self.file_system.write(data_cache_filename, serialized)
# self.file_system.write(data_cache_filename, serialized)

logger.info(f"Wrote data cache file {data_cache_filename}.")

# Write meta files.
Expand Down Expand Up @@ -202,7 +211,7 @@ def _read_data_map_file(self) -> Dict[Module, Set[DirectImport]]:
)
try:
imports_by_module = rust.read_cache_data_map_file(
data_cache_filename, self.file_system.convert_to_basic()
data_cache_filename, self.file_system
)
except FileNotFoundError:
logger.info(f"No cache file: {data_cache_filename}.")
Expand Down
6 changes: 3 additions & 3 deletions src/grimp/application/ports/caching.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from grimp.application.ports.modulefinder import FoundPackage, ModuleFile
from grimp.domain.valueobjects import DirectImport, Module

from .filesystem import AbstractFileSystem
from .filesystem import BasicFileSystem


class CacheMiss(Exception):
Expand All @@ -13,7 +13,7 @@ class CacheMiss(Exception):
class Cache:
def __init__(
self,
file_system: AbstractFileSystem,
file_system: BasicFileSystem,
include_external_packages: bool,
exclude_type_checking_imports: bool,
found_packages: Set[FoundPackage],
Expand All @@ -31,7 +31,7 @@ def __init__(
@classmethod
def setup(
cls,
file_system: AbstractFileSystem,
file_system: BasicFileSystem,
found_packages: Set[FoundPackage],
*,
include_external_packages: bool,
Expand Down
5 changes: 4 additions & 1 deletion src/grimp/application/ports/filesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def convert_to_basic(self) -> BasicFileSystem:

class BasicFileSystem(Protocol):
"""
A more limited file system, used by the Rust-based scan_for_imports function.
A more limited file system.

Having two different file system APIs is an interim approach, allowing us to
implement BasicFileSystem in Rust without needing to implement the full range
Expand All @@ -112,5 +112,8 @@ def split(self, file_name: str) -> Tuple[str, str]:
def read(self, file_name: str) -> str:
...

def write(self, file_name: str, contents: str) -> None:
...

def exists(self, file_name: str) -> bool:
...
2 changes: 1 addition & 1 deletion src/grimp/application/usecases.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def _scan_packages(
if cache_dir is not None:
cache_dir_if_supplied = cache_dir if cache_dir != NotSupplied else None
cache: caching.Cache = settings.CACHE_CLASS.setup(
file_system=file_system,
file_system=file_system.convert_to_basic(),
found_packages=found_packages,
include_external_packages=include_external_packages,
exclude_type_checking_imports=exclude_type_checking_imports,
Expand Down
Loading
Loading