Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions codeflash/code_utils/config_consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
MAX_CUMULATIVE_TEST_RUNTIME_NANOSECONDS = 100e6 # 100ms
N_TESTS_TO_GENERATE = 2
TOTAL_LOOPING_TIME = 10.0 # 10 second candidate benchmarking budget
CONSISTENT_LOOP_COUNT = 3
COVERAGE_THRESHOLD = 60.0
MIN_TESTCASE_PASSED_THRESHOLD = 6
REPEAT_OPTIMIZATION_PROBABILITY = 0.1
Expand Down
1 change: 0 additions & 1 deletion codeflash/code_utils/env_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
def check_formatter_installed(formatter_cmds: list[str], exit_on_failure: bool = True) -> bool: # noqa
if not formatter_cmds or formatter_cmds[0] == "disabled":
return True

first_cmd = formatter_cmds[0]
cmd_tokens = shlex.split(first_cmd) if isinstance(first_cmd, str) else [first_cmd]

Expand Down
54 changes: 49 additions & 5 deletions codeflash/verification/pytest_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,17 @@

import contextlib
import inspect

# System Imports
import logging
import os
import platform
import re
import statistics
import sys
import time as _time_module
import warnings
from collections import deque

# System Imports
from pathlib import Path
from typing import TYPE_CHECKING, Any, Callable
from unittest import TestCase
Expand All @@ -19,6 +21,8 @@
import pytest
from pluggy import HookspecMarker

from codeflash.code_utils.config_consts import CONSISTENT_LOOP_COUNT

if TYPE_CHECKING:
from _pytest.config import Config, Parser
from _pytest.main import Session
Expand Down Expand Up @@ -268,9 +272,30 @@ def __init__(self, config: Config) -> None:
level = logging.DEBUG if config.option.verbose > 1 else logging.INFO
logging.basicConfig(level=level)
self.logger = logging.getLogger(self.name)
self.current_loop_durations_in_seconds: list[float] = []

def dynamic_tolerance(self, avg: float) -> float:
if avg < 0.0001: # < 100 µs
return 0.7
if avg < 0.0005: # < 500 µs
return 0.5
if avg < 0.001: # < 1 ms
return 0.4
if avg < 0.01: # < 10 ms
return 0.2
if avg < 0.1: # < 100 ms
return 0.1
return 0.03 # > 0.1 s

@pytest.hookimpl
def pytest_runtest_logreport(self, report: pytest.TestReport) -> None:
if report.when == "call" and report.outcome == "passed":
self.current_loop_durations_in_seconds.append(report.duration)

@hookspec(firstresult=True)
def pytest_runtestloop(self, session: Session) -> bool:
durations = deque(maxlen=CONSISTENT_LOOP_COUNT)

"""Reimplement the test loop but loop for the user defined amount of time."""
if session.testsfailed and not session.config.option.continue_on_collection_errors:
msg = "{} error{} during collection".format(session.testsfailed, "s" if session.testsfailed != 1 else "")
Expand All @@ -284,9 +309,9 @@ def pytest_runtestloop(self, session: Session) -> bool:

count: int = 0

while total_time >= SHORTEST_AMOUNT_OF_TIME: # need to run at least one for normal tests
while total_time >= SHORTEST_AMOUNT_OF_TIME:
count += 1
total_time = self._get_total_time(session)
self.current_loop_durations_in_seconds.clear()

for index, item in enumerate(session.items):
item: pytest.Item = item # noqa: PLW0127, PLW2901
Expand All @@ -304,8 +329,27 @@ def pytest_runtestloop(self, session: Session) -> bool:
raise session.Failed(session.shouldfail)
if session.shouldstop:
raise session.Interrupted(session.shouldstop)

total_duration_in_seconds = sum(self.current_loop_durations_in_seconds)

if total_duration_in_seconds > 0:
durations.append(total_duration_in_seconds)
else:
durations.clear()

# Consistency check
if len(durations) == CONSISTENT_LOOP_COUNT:
avg = statistics.median(durations)
if avg == 0:
consistent = all(d == 0 for d in durations)
else:
consistent = all(abs(d - avg) / avg <= self.dynamic_tolerance(avg) for d in durations)
if consistent:
break

if self._timed_out(session, start_time, count):
break # exit loop
break

_ORIGINAL_TIME_SLEEP(self._get_delay_time(session))
return True

Expand Down
Loading