diff --git a/codeflash/code_utils/config_consts.py b/codeflash/code_utils/config_consts.py index 6b2805fbf..638655632 100644 --- a/codeflash/code_utils/config_consts.py +++ b/codeflash/code_utils/config_consts.py @@ -8,6 +8,7 @@ MAX_CUMULATIVE_TEST_RUNTIME_NANOSECONDS = 100e6 # 100ms N_TESTS_TO_GENERATE = 2 TOTAL_LOOPING_TIME = 10.0 # 10 second candidate benchmarking budget +CONSISTENT_LOOP_COUNT = 3 COVERAGE_THRESHOLD = 60.0 MIN_TESTCASE_PASSED_THRESHOLD = 6 REPEAT_OPTIMIZATION_PROBABILITY = 0.1 diff --git a/codeflash/code_utils/env_utils.py b/codeflash/code_utils/env_utils.py index 4987e6d8d..32cbdfa2b 100644 --- a/codeflash/code_utils/env_utils.py +++ b/codeflash/code_utils/env_utils.py @@ -19,7 +19,6 @@ def check_formatter_installed(formatter_cmds: list[str], exit_on_failure: bool = True) -> bool: # noqa if not formatter_cmds or formatter_cmds[0] == "disabled": return True - first_cmd = formatter_cmds[0] cmd_tokens = shlex.split(first_cmd) if isinstance(first_cmd, str) else [first_cmd] diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index 860c2eaf1..8209308d9 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -1806,7 +1806,6 @@ def establish_original_code_baseline( benchmarking_results, self.function_to_optimize.function_name ) logger.debug(f"Original async function throughput: {async_throughput} calls/second") - console.rule() if self.args.benchmark: replay_benchmarking_test_results = benchmarking_results.group_by_benchmarks( diff --git a/codeflash/verification/pytest_plugin.py b/codeflash/verification/pytest_plugin.py index 20ef8624a..791a39ec4 100644 --- a/codeflash/verification/pytest_plugin.py +++ b/codeflash/verification/pytest_plugin.py @@ -2,23 +2,27 @@ import contextlib import inspect - -# System Imports import logging import os import platform import re +import statistics import sys import time as _time_module import warnings +from collections import deque + +# System Imports from pathlib import Path -from typing import TYPE_CHECKING, Any, Callable +from typing import TYPE_CHECKING, Any, Callable, Optional from unittest import TestCase # PyTest Imports import pytest from pluggy import HookspecMarker +from codeflash.code_utils.config_consts import CONSISTENT_LOOP_COUNT + if TYPE_CHECKING: from _pytest.config import Config, Parser from _pytest.main import Session @@ -77,6 +81,7 @@ class UnexpectedError(Exception): # Store references to original functions before any patching _ORIGINAL_TIME_TIME = _time_module.time _ORIGINAL_PERF_COUNTER = _time_module.perf_counter +_ORIGINAL_PERF_COUNTER_NS = _time_module.perf_counter_ns _ORIGINAL_TIME_SLEEP = _time_module.sleep @@ -260,6 +265,29 @@ def pytest_configure(config: Config) -> None: _apply_deterministic_patches() +def get_runtime_from_stdout(stdout: str) -> Optional[int]: + marker_start = "!######" + marker_end = "######!" + + if not stdout: + return None + + end = stdout.rfind(marker_end) + if end == -1: + return None + + start = stdout.rfind(marker_start, 0, end) + if start == -1: + return None + + payload = stdout[start + len(marker_start) : end] + last_colon = payload.rfind(":") + if last_colon == -1: + return None + + return int(payload[last_colon + 1 :]) + + class PytestLoops: name: str = "pytest-loops" @@ -268,9 +296,30 @@ def __init__(self, config: Config) -> None: level = logging.DEBUG if config.option.verbose > 1 else logging.INFO logging.basicConfig(level=level) self.logger = logging.getLogger(self.name) + self.current_loop_durations_in_nano: list[int] = [] + + def dynamic_tolerance(self, avg: float) -> float: + if avg < 0.0001: # < 100 µs + return 0.7 + if avg < 0.0005: # < 500 µs + return 0.5 + if avg < 0.001: # < 1 ms + return 0.4 + if avg < 0.01: # < 10 ms + return 0.2 + if avg < 0.1: # < 100 ms + return 0.1 + return 0.03 # > 0.1 s + + @pytest.hookimpl + def pytest_runtest_logreport(self, report: pytest.TestReport) -> None: + if report.when == "call" and (duration_ns := get_runtime_from_stdout(report.capstdout)): + self.current_loop_durations_in_nano.append(duration_ns) @hookspec(firstresult=True) def pytest_runtestloop(self, session: Session) -> bool: + durations = deque(maxlen=CONSISTENT_LOOP_COUNT) + """Reimplement the test loop but loop for the user defined amount of time.""" if session.testsfailed and not session.config.option.continue_on_collection_errors: msg = "{} error{} during collection".format(session.testsfailed, "s" if session.testsfailed != 1 else "") @@ -283,10 +332,10 @@ def pytest_runtestloop(self, session: Session) -> bool: total_time: float = self._get_total_time(session) count: int = 0 - - while total_time >= SHORTEST_AMOUNT_OF_TIME: # need to run at least one for normal tests + runtimes = [] + while total_time >= SHORTEST_AMOUNT_OF_TIME: count += 1 - total_time = self._get_total_time(session) + self.current_loop_durations_in_nano.clear() for index, item in enumerate(session.items): item: pytest.Item = item # noqa: PLW0127, PLW2901 @@ -304,8 +353,31 @@ def pytest_runtestloop(self, session: Session) -> bool: raise session.Failed(session.shouldfail) if session.shouldstop: raise session.Interrupted(session.shouldstop) + + runtimes.extend(list(self.current_loop_durations_in_nano)) + + total_duration_in_nano = sum(self.current_loop_durations_in_nano) + if total_duration_in_nano > 0: + durations.append(total_duration_in_nano) + else: + durations.clear() + + # Consistency check + if len(durations) == CONSISTENT_LOOP_COUNT and count >= session.config.option.codeflash_min_loops: + avg = statistics.median(durations) + if avg == 0: + consistent = all(d == 0 for d in durations) + else: + consistent = all(abs(d - avg) / avg <= self.dynamic_tolerance(avg) for d in durations) + if consistent: + Path(f"/home/mohammed/Documents/test-results/break-{int(_ORIGINAL_TIME_TIME())}.txt").write_text( + f"loops: {count}, runtime: {runtimes}" + ) + break + if self._timed_out(session, start_time, count): - break # exit loop + break + _ORIGINAL_TIME_SLEEP(self._get_delay_time(session)) return True