From 56a2cb24145a8eed9a7da80126aa15ecede0adb0 Mon Sep 17 00:00:00 2001 From: Mohamed Ashraf Date: Wed, 26 Nov 2025 19:43:07 +0200 Subject: [PATCH 1/8] Validate tests_root directory exists --- codeflash/lsp/beta.py | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/codeflash/lsp/beta.py b/codeflash/lsp/beta.py index 14d43a1a5..56dbe8f7b 100644 --- a/codeflash/lsp/beta.py +++ b/codeflash/lsp/beta.py @@ -184,11 +184,35 @@ def write_config(params: WriteConfigParams) -> dict[str, any]: # the client provided a config path but it doesn't exist create_empty_pyproject_toml(cfg_file) + # Handle both dict and object access for config + def get_config_value(key: str, default: str = "") -> str: + if isinstance(cfg, dict): + return cfg.get(key, default) + return getattr(cfg, key, default) + + tests_root = get_config_value("tests_root", "") + # Validate tests_root directory exists if provided + if tests_root: + # Resolve path relative to config file directory or current working directory + if cfg_file: + base_dir = cfg_file.parent + else: + base_dir = Path.cwd() + tests_root_path = (base_dir / tests_root).resolve() + if not tests_root_path.exists() or not tests_root_path.is_dir(): + return { + "status": "error", + "message": f"Invalid 'tests_root': directory does not exist at {tests_root_path}", + "field_errors": { + "tests_root": f"Directory does not exist at {tests_root_path}", + }, + } + setup_info = VsCodeSetupInfo( - module_root=getattr(cfg, "module_root", ""), - tests_root=getattr(cfg, "tests_root", ""), - test_framework=getattr(cfg, "test_framework", "pytest"), - formatter=get_formatter_cmds(getattr(cfg, "formatter_cmds", "disabled")), + module_root=get_config_value("module_root", ""), + tests_root=tests_root, + test_framework=get_config_value("test_framework", "pytest"), + formatter=get_formatter_cmds(get_config_value("formatter_cmds", "disabled")), ) devnull_writer = open(os.devnull, "w") # noqa From 2340728385ce7c99ae3fa59989e8e26b71ece1f0 Mon Sep 17 00:00:00 2001 From: Mohamed Ashraf Date: Thu, 27 Nov 2025 08:45:30 +0200 Subject: [PATCH 2/8] fix formatting --- codeflash/lsp/beta.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/codeflash/lsp/beta.py b/codeflash/lsp/beta.py index 56dbe8f7b..1af7aa66a 100644 --- a/codeflash/lsp/beta.py +++ b/codeflash/lsp/beta.py @@ -203,9 +203,7 @@ def get_config_value(key: str, default: str = "") -> str: return { "status": "error", "message": f"Invalid 'tests_root': directory does not exist at {tests_root_path}", - "field_errors": { - "tests_root": f"Directory does not exist at {tests_root_path}", - }, + "field_errors": {"tests_root": f"Directory does not exist at {tests_root_path}"}, } setup_info = VsCodeSetupInfo( From 14f5b904bf6d8efbef7f909d035bdcfca6498905 Mon Sep 17 00:00:00 2001 From: Mohamed Ashraf Date: Mon, 1 Dec 2025 19:02:02 +0200 Subject: [PATCH 3/8] fix formmatting --- codeflash/lsp/beta.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/codeflash/lsp/beta.py b/codeflash/lsp/beta.py index 1af7aa66a..e90b8269e 100644 --- a/codeflash/lsp/beta.py +++ b/codeflash/lsp/beta.py @@ -194,10 +194,7 @@ def get_config_value(key: str, default: str = "") -> str: # Validate tests_root directory exists if provided if tests_root: # Resolve path relative to config file directory or current working directory - if cfg_file: - base_dir = cfg_file.parent - else: - base_dir = Path.cwd() + base_dir = cfg_file.parent if cfg_file else Path.cwd() tests_root_path = (base_dir / tests_root).resolve() if not tests_root_path.exists() or not tests_root_path.is_dir(): return { From 01b4b6e208e196ae7e8f60e96da4e352dcab1c6c Mon Sep 17 00:00:00 2001 From: Mohamed Ashraf Date: Thu, 4 Dec 2025 15:35:55 +0200 Subject: [PATCH 4/8] strict directories validation --- codeflash/cli_cmds/cmd_init.py | 77 ++++++++++++++++++---------- codeflash/code_utils/code_utils.py | 80 ++++++++++++++++++++++++++++++ codeflash/lsp/beta.py | 25 ++++++++-- 3 files changed, 153 insertions(+), 29 deletions(-) diff --git a/codeflash/cli_cmds/cmd_init.py b/codeflash/cli_cmds/cmd_init.py index 9cc3c8674..b3549e5d9 100644 --- a/codeflash/cli_cmds/cmd_init.py +++ b/codeflash/cli_cmds/cmd_init.py @@ -33,6 +33,7 @@ from codeflash.code_utils.github_utils import get_github_secrets_page_url from codeflash.code_utils.oauth_handler import perform_oauth_signin from codeflash.code_utils.shell_utils import get_shell_rc_path, is_powershell, save_api_key_to_rc +from codeflash.code_utils.code_utils import validate_relative_directory_path from codeflash.either import is_successful from codeflash.lsp.helpers import is_LSP_enabled from codeflash.telemetry.posthog_cf import ph @@ -356,20 +357,32 @@ def collect_setup_info() -> CLISetupInfo: console.print(custom_panel) console.print() - custom_questions = [ - inquirer.Path( - "custom_path", - message="Enter the path to your module directory", - path_type=inquirer.Path.DIRECTORY, - exists=True, - ) - ] + # Retry loop for custom module root path + module_root = None + while module_root is None: + custom_questions = [ + inquirer.Path( + "custom_path", + message="Enter the path to your module directory", + path_type=inquirer.Path.DIRECTORY, + exists=True, + ) + ] - custom_answers = inquirer.prompt(custom_questions, theme=CodeflashTheme()) - if custom_answers: - module_root = Path(custom_answers["custom_path"]) - else: - apologize_and_exit() + custom_answers = inquirer.prompt(custom_questions, theme=CodeflashTheme()) + if not custom_answers: + apologize_and_exit() + return # unreachable but satisfies type checker + + custom_path_str = str(custom_answers["custom_path"]) + # Validate the path is safe + is_valid, error_msg = validate_relative_directory_path(custom_path_str) + if not is_valid: + click.echo(f"❌ Invalid path: {error_msg}") + click.echo("Please enter a valid relative directory path.") + console.print() # Add spacing before retry + continue # Retry the prompt + module_root = Path(custom_path_str) else: module_root = module_root_answer ph("cli-project-root-provided") @@ -427,20 +440,32 @@ def collect_setup_info() -> CLISetupInfo: console.print(custom_tests_panel) console.print() - custom_tests_questions = [ - inquirer.Path( - "custom_tests_path", - message="Enter the path to your tests directory", - path_type=inquirer.Path.DIRECTORY, - exists=True, - ) - ] + # Retry loop for custom tests root path + tests_root = None + while tests_root is None: + custom_tests_questions = [ + inquirer.Path( + "custom_tests_path", + message="Enter the path to your tests directory", + path_type=inquirer.Path.DIRECTORY, + exists=True, + ) + ] - custom_tests_answers = inquirer.prompt(custom_tests_questions, theme=CodeflashTheme()) - if custom_tests_answers: - tests_root = Path(curdir) / Path(custom_tests_answers["custom_tests_path"]) - else: - apologize_and_exit() + custom_tests_answers = inquirer.prompt(custom_tests_questions, theme=CodeflashTheme()) + if not custom_tests_answers: + apologize_and_exit() + return # unreachable but satisfies type checker + + custom_tests_path_str = str(custom_tests_answers["custom_tests_path"]) + # Validate the path is safe + is_valid, error_msg = validate_relative_directory_path(custom_tests_path_str) + if not is_valid: + click.echo(f"❌ Invalid path: {error_msg}") + click.echo("Please enter a valid relative directory path.") + console.print() # Add spacing before retry + continue # Retry the prompt + tests_root = Path(curdir) / Path(custom_tests_path_str) else: tests_root = Path(curdir) / Path(cast("str", tests_root_answer)) diff --git a/codeflash/code_utils/code_utils.py b/codeflash/code_utils/code_utils.py index 37e0dd94e..ca646e38d 100644 --- a/codeflash/code_utils/code_utils.py +++ b/codeflash/code_utils/code_utils.py @@ -376,3 +376,83 @@ def extract_unique_errors(pytest_output: str) -> set[str]: unique_errors.add(error_message) return unique_errors + + +def validate_relative_directory_path(path: str) -> tuple[bool, str]: + """Validate that a path is a safe relative directory path. + + Prevents path traversal attacks and invalid paths. + Works cross-platform (Windows, Linux, macOS). + + Args: + path: The path string to validate + + Returns: + tuple[bool, str]: (is_valid, error_message) + - is_valid: True if path is valid, False otherwise + - error_message: Empty string if valid, error description if invalid + """ + if not path or not path.strip(): + return False, "Path cannot be empty" + + # Normalize whitespace + path = path.strip() + + # Check for shell commands or dangerous patterns + dangerous_patterns = [ + "cd ", + "ls ", + "rm ", + "mkdir ", + "rmdir ", + "del ", + "dir ", + "type ", + "cat ", + "echo ", + "&&", + "||", + ";", + "|", + ">", + "<", + "$", + "`", + ] + path_lower = path.lower() + for pattern in dangerous_patterns: + if pattern in path_lower: + return False, f"Path contains invalid characters or commands: {pattern.strip()}" + + # Check for path traversal attempts (cross-platform) + # Normalize path separators for checking + normalized = path.replace("\\", "/") + if ".." in normalized: + return False, "Path cannot contain '..' (parent directory traversal)" + + # Check for absolute paths (Windows and Unix) + if os.path.isabs(path): + return False, "Path must be relative, not absolute" + + # Check for invalid characters (OS-specific) + invalid_chars = set() + if os.name == "nt": # Windows + invalid_chars = {'<', '>', ':', '"', '|', '?', '*'} + else: # Unix-like + invalid_chars = {'\0'} + + if any(char in path for char in invalid_chars): + return False, f"Path contains invalid characters for this operating system" + + # Validate using pathlib to ensure it's a valid path structure + try: + path_obj = Path(path) + # Check if path would resolve outside the current directory + # This is a safety check for edge cases + parts = path_obj.parts + if any(part == ".." for part in parts): + return False, "Path cannot contain '..' (parent directory traversal)" + except (ValueError, OSError) as e: + return False, f"Invalid path format: {str(e)}" + + return True, "" \ No newline at end of file diff --git a/codeflash/lsp/beta.py b/codeflash/lsp/beta.py index e90b8269e..79ce43817 100644 --- a/codeflash/lsp/beta.py +++ b/codeflash/lsp/beta.py @@ -23,6 +23,7 @@ get_valid_subdirs, is_valid_pyproject_toml, ) +from codeflash.code_utils.code_utils import validate_relative_directory_path from codeflash.code_utils.git_utils import git_root_dir from codeflash.code_utils.git_worktree_utils import create_worktree_snapshot_commit from codeflash.code_utils.shell_utils import save_api_key_to_rc @@ -191,9 +192,16 @@ def get_config_value(key: str, default: str = "") -> str: return getattr(cfg, key, default) tests_root = get_config_value("tests_root", "") - # Validate tests_root directory exists if provided + # Validate tests_root path format and safety if tests_root: - # Resolve path relative to config file directory or current working directory + is_valid, error_msg = validate_relative_directory_path(tests_root) + if not is_valid: + return { + "status": "error", + "message": f"Invalid 'tests_root': {error_msg}", + "field_errors": {"tests_root": error_msg}, + } + # Validate tests_root directory exists if provided base_dir = cfg_file.parent if cfg_file else Path.cwd() tests_root_path = (base_dir / tests_root).resolve() if not tests_root_path.exists() or not tests_root_path.is_dir(): @@ -203,8 +211,19 @@ def get_config_value(key: str, default: str = "") -> str: "field_errors": {"tests_root": f"Directory does not exist at {tests_root_path}"}, } + # Validate module_root path format and safety + module_root = get_config_value("module_root", "") + if module_root: + is_valid, error_msg = validate_relative_directory_path(module_root) + if not is_valid: + return { + "status": "error", + "message": f"Invalid 'module_root': {error_msg}", + "field_errors": {"module_root": error_msg}, + } + setup_info = VsCodeSetupInfo( - module_root=get_config_value("module_root", ""), + module_root=module_root, tests_root=tests_root, test_framework=get_config_value("test_framework", "pytest"), formatter=get_formatter_cmds(get_config_value("formatter_cmds", "disabled")), From c1460cf98572a3fe84202ae8a5af9f5a3518dff5 Mon Sep 17 00:00:00 2001 From: Mohamed Ashraf Date: Thu, 4 Dec 2025 16:09:27 +0200 Subject: [PATCH 5/8] fix linting --- codeflash/cli_cmds/cmd_init.py | 6 +-- codeflash/code_utils/code_utils.py | 60 +++++++++++++----------------- 2 files changed, 29 insertions(+), 37 deletions(-) diff --git a/codeflash/cli_cmds/cmd_init.py b/codeflash/cli_cmds/cmd_init.py index b3549e5d9..b4035d550 100644 --- a/codeflash/cli_cmds/cmd_init.py +++ b/codeflash/cli_cmds/cmd_init.py @@ -26,6 +26,7 @@ from codeflash.cli_cmds.cli_common import apologize_and_exit from codeflash.cli_cmds.console import console, logger from codeflash.cli_cmds.extension import install_vscode_extension +from codeflash.code_utils.code_utils import validate_relative_directory_path from codeflash.code_utils.compat import LF from codeflash.code_utils.config_parser import parse_config_file from codeflash.code_utils.env_utils import check_formatter_installed, get_codeflash_api_key @@ -33,7 +34,6 @@ from codeflash.code_utils.github_utils import get_github_secrets_page_url from codeflash.code_utils.oauth_handler import perform_oauth_signin from codeflash.code_utils.shell_utils import get_shell_rc_path, is_powershell, save_api_key_to_rc -from codeflash.code_utils.code_utils import validate_relative_directory_path from codeflash.either import is_successful from codeflash.lsp.helpers import is_LSP_enabled from codeflash.telemetry.posthog_cf import ph @@ -372,7 +372,7 @@ def collect_setup_info() -> CLISetupInfo: custom_answers = inquirer.prompt(custom_questions, theme=CodeflashTheme()) if not custom_answers: apologize_and_exit() - return # unreachable but satisfies type checker + return None # unreachable but satisfies type checker custom_path_str = str(custom_answers["custom_path"]) # Validate the path is safe @@ -455,7 +455,7 @@ def collect_setup_info() -> CLISetupInfo: custom_tests_answers = inquirer.prompt(custom_tests_questions, theme=CodeflashTheme()) if not custom_tests_answers: apologize_and_exit() - return # unreachable but satisfies type checker + return None # unreachable but satisfies type checker custom_tests_path_str = str(custom_tests_answers["custom_tests_path"]) # Validate the path is safe diff --git a/codeflash/code_utils/code_utils.py b/codeflash/code_utils/code_utils.py index ca646e38d..a91a49cb0 100644 --- a/codeflash/code_utils/code_utils.py +++ b/codeflash/code_utils/code_utils.py @@ -380,24 +380,26 @@ def extract_unique_errors(pytest_output: str) -> set[str]: def validate_relative_directory_path(path: str) -> tuple[bool, str]: """Validate that a path is a safe relative directory path. - + Prevents path traversal attacks and invalid paths. Works cross-platform (Windows, Linux, macOS). - + Args: path: The path string to validate - + Returns: tuple[bool, str]: (is_valid, error_message) - is_valid: True if path is valid, False otherwise - error_message: Empty string if valid, error description if invalid + """ + # Check for empty path if not path or not path.strip(): return False, "Path cannot be empty" - + # Normalize whitespace path = path.strip() - + # Check for shell commands or dangerous patterns dangerous_patterns = [ "cd ", @@ -423,36 +425,26 @@ def validate_relative_directory_path(path: str) -> tuple[bool, str]: for pattern in dangerous_patterns: if pattern in path_lower: return False, f"Path contains invalid characters or commands: {pattern.strip()}" - + # Check for path traversal attempts (cross-platform) - # Normalize path separators for checking normalized = path.replace("\\", "/") if ".." in normalized: return False, "Path cannot contain '..' (parent directory traversal)" - - # Check for absolute paths (Windows and Unix) - if os.path.isabs(path): - return False, "Path must be relative, not absolute" - - # Check for invalid characters (OS-specific) - invalid_chars = set() - if os.name == "nt": # Windows - invalid_chars = {'<', '>', ':', '"', '|', '?', '*'} - else: # Unix-like - invalid_chars = {'\0'} - - if any(char in path for char in invalid_chars): - return False, f"Path contains invalid characters for this operating system" - - # Validate using pathlib to ensure it's a valid path structure - try: - path_obj = Path(path) - # Check if path would resolve outside the current directory - # This is a safety check for edge cases - parts = path_obj.parts - if any(part == ".." for part in parts): - return False, "Path cannot contain '..' (parent directory traversal)" - except (ValueError, OSError) as e: - return False, f"Invalid path format: {str(e)}" - - return True, "" \ No newline at end of file + + # Check for absolute paths and invalid characters + invalid_chars = {"<", ">", ":", '"', "|", "?", "*"} if os.name == "nt" else {"\0"} + error_msg = "" + if Path(path).is_absolute(): + error_msg = "Path must be relative, not absolute" + elif any(char in path for char in invalid_chars): + error_msg = "Path contains invalid characters for this operating system" + else: + # Validate using pathlib to ensure it's a valid path structure + try: + Path(path) + except (ValueError, OSError) as e: + error_msg = f"Invalid path format: {e!s}" + + if error_msg: + return False, error_msg + return True, "" From d52dea9a373635f343e67e6fc37a1325a9d4b358 Mon Sep 17 00:00:00 2001 From: Mohamed Ashraf Date: Thu, 4 Dec 2025 17:53:11 +0200 Subject: [PATCH 6/8] add optimized version of the function --- codeflash/code_utils/code_utils.py | 68 +++++++++++++++++------------- 1 file changed, 39 insertions(+), 29 deletions(-) diff --git a/codeflash/code_utils/code_utils.py b/codeflash/code_utils/code_utils.py index a91a49cb0..7717d5fa3 100644 --- a/codeflash/code_utils/code_utils.py +++ b/codeflash/code_utils/code_utils.py @@ -19,6 +19,35 @@ from codeflash.code_utils.config_parser import find_pyproject_toml, get_all_closest_config_files from codeflash.lsp.helpers import is_LSP_enabled +_DANGEROUS_PATTERNS = [ + "cd ", + "ls ", + "rm ", + "mkdir ", + "rmdir ", + "del ", + "dir ", + "type ", + "cat ", + "echo ", + "&&", + "||", + ";", + "|", + ">", + "<", + "$", + "`", +] + +_DANGEROUS_PATTERNS_SET = set(_DANGEROUS_PATTERNS) + +_DANGEROUS_PATTERNS_LOWER = tuple(pat.lower() for pat in _DANGEROUS_PATTERNS) + +_INVALID_CHARS_NT = {"<", ">", ":", '"', "|", "?", "*"} + +_INVALID_CHARS_UNIX = {"\0"} + ImportErrorPattern = re.compile(r"ModuleNotFoundError.*$", re.MULTILINE) BLACKLIST_ADDOPTS = ("--benchmark", "--sugar", "--codespeed", "--cov", "--profile", "--junitxml", "-n") @@ -393,50 +422,31 @@ def validate_relative_directory_path(path: str) -> tuple[bool, str]: - error_message: Empty string if valid, error description if invalid """ - # Check for empty path if not path or not path.strip(): return False, "Path cannot be empty" # Normalize whitespace path = path.strip() - - # Check for shell commands or dangerous patterns - dangerous_patterns = [ - "cd ", - "ls ", - "rm ", - "mkdir ", - "rmdir ", - "del ", - "dir ", - "type ", - "cat ", - "echo ", - "&&", - "||", - ";", - "|", - ">", - "<", - "$", - "`", - ] path_lower = path.lower() - for pattern in dangerous_patterns: - if pattern in path_lower: - return False, f"Path contains invalid characters or commands: {pattern.strip()}" + # Instead of for-loop, use generator with next() for early exit + found_pattern = next((pattern for pattern in _DANGEROUS_PATTERNS_LOWER if pattern in path_lower), None) + if found_pattern is not None: + return False, f"Path contains invalid characters or commands: {found_pattern.strip()}" # Check for path traversal attempts (cross-platform) + # Normalize path separators for checking normalized = path.replace("\\", "/") if ".." in normalized: return False, "Path cannot contain '..' (parent directory traversal)" - # Check for absolute paths and invalid characters - invalid_chars = {"<", ">", ":", '"', "|", "?", "*"} if os.name == "nt" else {"\0"} + # Check for absolute paths, invalid characters, and validate path format error_msg = "" if Path(path).is_absolute(): error_msg = "Path must be relative, not absolute" - elif any(char in path for char in invalid_chars): + elif os.name == "nt": # Windows + if any(char in _INVALID_CHARS_NT for char in path): + error_msg = "Path contains invalid characters for this operating system" + elif "\0" in path: # Unix-like error_msg = "Path contains invalid characters for this operating system" else: # Validate using pathlib to ensure it's a valid path structure From 525a5d48273a89f4e59ed3f5bb119f6ba6f1cd1c Mon Sep 17 00:00:00 2001 From: Mohamed Ashraf Date: Thu, 4 Dec 2025 20:57:09 +0200 Subject: [PATCH 7/8] update invalid path error message --- codeflash/code_utils/code_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codeflash/code_utils/code_utils.py b/codeflash/code_utils/code_utils.py index 7717d5fa3..31e1a167a 100644 --- a/codeflash/code_utils/code_utils.py +++ b/codeflash/code_utils/code_utils.py @@ -437,7 +437,7 @@ def validate_relative_directory_path(path: str) -> tuple[bool, str]: # Normalize path separators for checking normalized = path.replace("\\", "/") if ".." in normalized: - return False, "Path cannot contain '..' (parent directory traversal)" + return False, "Path cannot contain '..'. Use a relative path like 'tests' or 'src/app' instead" # Check for absolute paths, invalid characters, and validate path format error_msg = "" From 2204ab97e569abaeec2e5a276a7686e00d2543be Mon Sep 17 00:00:00 2001 From: Mohamed Ashraf Date: Fri, 12 Dec 2025 15:37:19 +0200 Subject: [PATCH 8/8] remove the complexity malicious commands validations --- codeflash/code_utils/code_utils.py | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/codeflash/code_utils/code_utils.py b/codeflash/code_utils/code_utils.py index 31e1a167a..5682bdf42 100644 --- a/codeflash/code_utils/code_utils.py +++ b/codeflash/code_utils/code_utils.py @@ -19,31 +19,6 @@ from codeflash.code_utils.config_parser import find_pyproject_toml, get_all_closest_config_files from codeflash.lsp.helpers import is_LSP_enabled -_DANGEROUS_PATTERNS = [ - "cd ", - "ls ", - "rm ", - "mkdir ", - "rmdir ", - "del ", - "dir ", - "type ", - "cat ", - "echo ", - "&&", - "||", - ";", - "|", - ">", - "<", - "$", - "`", -] - -_DANGEROUS_PATTERNS_SET = set(_DANGEROUS_PATTERNS) - -_DANGEROUS_PATTERNS_LOWER = tuple(pat.lower() for pat in _DANGEROUS_PATTERNS) - _INVALID_CHARS_NT = {"<", ">", ":", '"', "|", "?", "*"} _INVALID_CHARS_UNIX = {"\0"} @@ -427,11 +402,6 @@ def validate_relative_directory_path(path: str) -> tuple[bool, str]: # Normalize whitespace path = path.strip() - path_lower = path.lower() - # Instead of for-loop, use generator with next() for early exit - found_pattern = next((pattern for pattern in _DANGEROUS_PATTERNS_LOWER if pattern in path_lower), None) - if found_pattern is not None: - return False, f"Path contains invalid characters or commands: {found_pattern.strip()}" # Check for path traversal attempts (cross-platform) # Normalize path separators for checking