Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 2 additions & 6 deletions codeflash/api/aiservice.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,20 +248,18 @@ def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest]
"original_source_code": opt.original_source_code,
"read_only_dependency_code": opt.read_only_dependency_code,
"original_line_profiler_results": opt.original_line_profiler_results,
"original_code_runtime": opt.original_code_runtime,
"original_code_runtime": humanize_runtime(opt.original_code_runtime),
"optimized_source_code": opt.optimized_source_code,
"optimized_explanation": opt.optimized_explanation,
"optimized_line_profiler_results": opt.optimized_line_profiler_results,
"optimized_code_runtime": opt.optimized_code_runtime,
"optimized_code_runtime": humanize_runtime(opt.optimized_code_runtime),
"speedup": opt.speedup,
"trace_id": opt.trace_id,
"function_references": opt.function_references,
"python_version": platform.python_version(),
}
for opt in request
]
logger.debug(f"Refining {len(request)} optimizations…")
console.rule()
try:
response = self.make_ai_service_request("/refinement", payload=payload, timeout=120)
except requests.exceptions.RequestException as e:
Expand All @@ -271,8 +269,6 @@ def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest]

if response.status_code == 200:
refined_optimizations = response.json()["refinements"]
logger.debug(f"Generated {len(refined_optimizations)} candidate refinements.")
console.rule()

refinements = self._get_valid_candidates(refined_optimizations)
return [
Expand Down
57 changes: 57 additions & 0 deletions codeflash/code_utils/code_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,63 @@ def unified_diff_strings(code1: str, code2: str, fromfile: str = "original", tof
return "".join(diff)


def choose_weights(**importance: float) -> list[float]:
"""Choose normalized weights from relative importance values.

Example:
choose_weights(runtime=3, diff=1)
-> [0.75, 0.25]

Args:
**importance: keyword args of metric=importance (relative numbers).

Returns:
A list of weights in the same order as the arguments.

"""
total = sum(importance.values())
if total == 0:
raise ValueError("At least one importance value must be > 0")

return [v / total for v in importance.values()]


def normalize(values: list[float]) -> list[float]:
mn, mx = min(values), max(values)
if mx == mn:
return [0.0] * len(values)
return [(v - mn) / (mx - mn) for v in values]


def create_score_dictionary_from_metrics(weights: list[float], *metrics: list[float]) -> dict[int, int]:
"""Combine multiple metrics into a single weighted score dictionary.

Each metric is a list of values (smaller = better).
The total score for each index is the weighted sum of its values
across all metrics:

score[index] = Σ (value * weight)

Args:
weights: A list of weights, one per metric. Larger weight = more influence.
*metrics: Lists of values (one list per metric, aligned by index).

Returns:
A dictionary mapping each index to its combined weighted score.

"""
if len(weights) != len(metrics):
raise ValueError("Number of weights must match number of metrics")

combined: dict[int, float] = {}

for weight, metric in zip(weights, metrics):
for idx, value in enumerate(metric):
combined[idx] = combined.get(idx, 0) + value * weight

return combined


def diff_length(a: str, b: str) -> int:
"""Compute the length (in characters) of the unified diff between two strings.

Expand Down
5 changes: 5 additions & 0 deletions codeflash/code_utils/config_consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@
DEFAULT_IMPORTANCE_THRESHOLD = 0.001
N_CANDIDATES_LP = 6

# Refinement
REFINE_ALL_THRESHOLD = 2 # when valid optimizations count is 2 or less, refine all optimizations
REFINED_CANDIDATE_RANKING_WEIGHTS = (2, 1) # (runtime, diff), runtime is more important than diff by a factor of 2
TOP_N_REFINEMENTS = 0.45 # top 45% of valid optimizations (based on the weighted score) are refined

# LSP-specific
N_CANDIDATES_LSP = 3
N_TESTS_TO_GENERATE_LSP = 2
Expand Down
4 changes: 2 additions & 2 deletions codeflash/models/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@ class AIServiceRefinerRequest:
optimization_id: str
original_source_code: str
read_only_dependency_code: str
original_code_runtime: str
original_code_runtime: int
optimized_source_code: str
optimized_explanation: str
optimized_code_runtime: str
optimized_code_runtime: int
speedup: str
trace_id: str
original_line_profiler_results: str
Expand Down
116 changes: 66 additions & 50 deletions codeflash/optimization/function_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,16 @@
replace_function_definitions_in_module,
)
from codeflash.code_utils.code_utils import (
choose_weights,
cleanup_paths,
create_rank_dictionary_compact,
create_score_dictionary_from_metrics,
diff_length,
extract_unique_errors,
file_name_from_test_module_name,
get_run_tmp_file,
module_name_from_file_path,
normalize,
restore_conftest,
unified_diff_strings,
)
Expand All @@ -45,7 +48,10 @@
N_CANDIDATES_EFFECTIVE,
N_CANDIDATES_LP_EFFECTIVE,
N_TESTS_TO_GENERATE_EFFECTIVE,
REFINE_ALL_THRESHOLD,
REFINED_CANDIDATE_RANKING_WEIGHTS,
REPEAT_OPTIMIZATION_PROBABILITY,
TOP_N_REFINEMENTS,
TOTAL_LOOPING_TIME_EFFECTIVE,
)
from codeflash.code_utils.deduplicate_code import normalize_code
Expand Down Expand Up @@ -124,19 +130,23 @@ def __init__(
self,
initial_candidates: list,
future_line_profile_results: concurrent.futures.Future,
future_all_refinements: list,
all_refinements_data: list[AIServiceRefinerRequest],
ai_service_client: AiServiceClient,
executor: concurrent.futures.ThreadPoolExecutor,
) -> None:
self.candidate_queue = queue.Queue()
self.line_profiler_done = False
self.refinement_done = False
self.candidate_len = len(initial_candidates)
self.ai_service_client = ai_service_client
self.executor = executor

# Initialize queue with initial candidates
for candidate in initial_candidates:
self.candidate_queue.put(candidate)

self.future_line_profile_results = future_line_profile_results
self.future_all_refinements = future_all_refinements
self.all_refinements_data = all_refinements_data

def get_next_candidate(self) -> OptimizedCandidate | None:
"""Get the next candidate from the queue, handling async results as needed."""
Expand Down Expand Up @@ -168,15 +178,45 @@ def _process_line_profiler_results(self) -> OptimizedCandidate | None:

return self.get_next_candidate()

def refine_optimizations(self, request: list[AIServiceRefinerRequest]) -> concurrent.futures.Future:
return self.executor.submit(self.ai_service_client.optimize_python_code_refinement, request=request)

def _process_refinement_results(self) -> OptimizedCandidate | None:
"""Process refinement results and add to queue."""
if self.future_all_refinements:
"""Process refinement results and add to queue. We generate a weighted ranking based on the runtime and diff lines and select the best (round of 45%) of valid optimizations to be refined."""
future_refinements: list[concurrent.futures.Future] = []

if len(self.all_refinements_data) <= REFINE_ALL_THRESHOLD:
for data in self.all_refinements_data:
future_refinements.append(self.refine_optimizations([data])) # noqa: PERF401
else:
diff_lens_list = []
runtimes_list = []
for c in self.all_refinements_data:
diff_lens_list.append(diff_length(c.original_source_code, c.optimized_source_code))
runtimes_list.append(c.optimized_code_runtime)

runtime_w, diff_w = REFINED_CANDIDATE_RANKING_WEIGHTS
weights = choose_weights(runtime=runtime_w, diff=diff_w)

runtime_norm = normalize(runtimes_list)
diffs_norm = normalize(diff_lens_list)
# the lower the better
score_dict = create_score_dictionary_from_metrics(weights, runtime_norm, diffs_norm)
top_n_candidates = int((TOP_N_REFINEMENTS * len(runtimes_list)) + 0.5)
top_indecies = sorted(score_dict, key=score_dict.get)[:top_n_candidates]

for idx in top_indecies:
data = self.all_refinements_data[idx]
future_refinements.append(self.refine_optimizations([data]))

if future_refinements:
logger.info("loading|Refining generated code for improved quality and performance...")
concurrent.futures.wait(self.future_all_refinements)

concurrent.futures.wait(future_refinements)
refinement_response = []

for future_refinement in self.future_all_refinements:
possible_refinement = future_refinement.result()
for f in future_refinements:
possible_refinement = f.result()
if len(possible_refinement) > 0:
refinement_response.append(possible_refinement[0])

Expand Down Expand Up @@ -684,15 +724,14 @@ def process_single_candidate(
original_helper_code: dict[Path, str],
file_path_to_helper_classes: dict[Path, set[str]],
eval_ctx: CandidateEvaluationContext,
future_all_refinements: list[concurrent.futures.Future],
ai_service_client: AiServiceClient,
all_refinements_data: list[AIServiceRefinerRequest],
exp_type: str,
function_references: str,
) -> BestOptimization | None:
"""Process a single optimization candidate.

Returns the BestOptimization if the candidate is successful, None otherwise.
Updates eval_ctx with results and may append to future_all_refinements.
Updates eval_ctx with results and may append to all_refinements_data.
"""
# Cleanup temp files
get_run_tmp_file(Path(f"test_return_values_{candidate_index}.bin")).unlink(missing_ok=True)
Expand Down Expand Up @@ -787,14 +826,19 @@ def process_single_candidate(

# Queue refinement for non-refined candidates
if not candidate.optimization_id.endswith("refi"):
future_all_refinements.append(
self.refine_optimizations(
valid_optimizations=[best_optimization],
original_code_baseline=original_code_baseline,
code_context=code_context,
all_refinements_data.append(
AIServiceRefinerRequest(
optimization_id=best_optimization.candidate.optimization_id,
original_source_code=code_context.read_writable_code.markdown,
read_only_dependency_code=code_context.read_only_context_code,
original_code_runtime=original_code_baseline.runtime,
optimized_source_code=best_optimization.candidate.source_code.markdown,
optimized_explanation=best_optimization.candidate.explanation,
optimized_code_runtime=best_optimization.runtime,
speedup=f"{int(performance_gain(original_runtime_ns=original_code_baseline.runtime, optimized_runtime_ns=best_optimization.runtime) * 100)}%",
trace_id=self.get_trace_id(exp_type),
ai_service_client=ai_service_client,
executor=self.executor,
original_line_profiler_results=original_code_baseline.line_profile_results["str_out"],
optimized_line_profiler_results=best_optimization.line_profiler_test_results["str_out"],
function_references=function_references,
)
)
Expand Down Expand Up @@ -830,7 +874,7 @@ def determine_best_candidate(

# Initialize evaluation context and async tasks
eval_ctx = CandidateEvaluationContext()
future_all_refinements: list[concurrent.futures.Future] = []
all_refinements_data: list[AIServiceRefinerRequest] = []
ai_service_client = self.aiservice_client if exp_type == "EXP0" else self.local_aiservice_client
assert ai_service_client is not None, "AI service client must be set for optimization"

Expand All @@ -848,7 +892,9 @@ def determine_best_candidate(
else None,
)

processor = CandidateProcessor(candidates, future_line_profile_results, future_all_refinements)
processor = CandidateProcessor(
candidates, future_line_profile_results, all_refinements_data, self.aiservice_client, self.executor
)
candidate_index = 0

# Process candidates using queue-based approach
Expand All @@ -869,8 +915,7 @@ def determine_best_candidate(
original_helper_code=original_helper_code,
file_path_to_helper_classes=file_path_to_helper_classes,
eval_ctx=eval_ctx,
future_all_refinements=future_all_refinements,
ai_service_client=ai_service_client,
all_refinements_data=all_refinements_data,
exp_type=exp_type,
function_references=function_references,
)
Expand Down Expand Up @@ -903,35 +948,6 @@ def determine_best_candidate(

return best_optimization

def refine_optimizations(
self,
valid_optimizations: list[BestOptimization],
original_code_baseline: OriginalCodeBaseline,
code_context: CodeOptimizationContext,
trace_id: str,
ai_service_client: AiServiceClient,
executor: concurrent.futures.ThreadPoolExecutor,
function_references: str | None = None,
) -> concurrent.futures.Future:
request = [
AIServiceRefinerRequest(
optimization_id=opt.candidate.optimization_id,
original_source_code=code_context.read_writable_code.markdown,
read_only_dependency_code=code_context.read_only_context_code,
original_code_runtime=humanize_runtime(original_code_baseline.runtime),
optimized_source_code=opt.candidate.source_code.markdown,
optimized_explanation=opt.candidate.explanation,
optimized_code_runtime=humanize_runtime(opt.runtime),
speedup=f"{int(performance_gain(original_runtime_ns=original_code_baseline.runtime, optimized_runtime_ns=opt.runtime) * 100)}%",
trace_id=trace_id,
original_line_profiler_results=original_code_baseline.line_profile_results["str_out"],
optimized_line_profiler_results=opt.line_profiler_test_results["str_out"],
function_references=function_references,
)
for opt in valid_optimizations
]
return executor.submit(ai_service_client.optimize_python_code_refinement, request=request)

def log_successful_optimization(
self, explanation: Explanation, generated_tests: GeneratedTestsList, exp_type: str
) -> None:
Expand Down
Loading