diff --git a/codeflash/api/aiservice.py b/codeflash/api/aiservice.py index 8743ab2ac..fac3c87c1 100644 --- a/codeflash/api/aiservice.py +++ b/codeflash/api/aiservice.py @@ -248,11 +248,11 @@ def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest] "original_source_code": opt.original_source_code, "read_only_dependency_code": opt.read_only_dependency_code, "original_line_profiler_results": opt.original_line_profiler_results, - "original_code_runtime": opt.original_code_runtime, + "original_code_runtime": humanize_runtime(opt.original_code_runtime), "optimized_source_code": opt.optimized_source_code, "optimized_explanation": opt.optimized_explanation, "optimized_line_profiler_results": opt.optimized_line_profiler_results, - "optimized_code_runtime": opt.optimized_code_runtime, + "optimized_code_runtime": humanize_runtime(opt.optimized_code_runtime), "speedup": opt.speedup, "trace_id": opt.trace_id, "function_references": opt.function_references, @@ -260,8 +260,6 @@ def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest] } for opt in request ] - logger.debug(f"Refining {len(request)} optimizations…") - console.rule() try: response = self.make_ai_service_request("/refinement", payload=payload, timeout=120) except requests.exceptions.RequestException as e: @@ -271,8 +269,6 @@ def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest] if response.status_code == 200: refined_optimizations = response.json()["refinements"] - logger.debug(f"Generated {len(refined_optimizations)} candidate refinements.") - console.rule() refinements = self._get_valid_candidates(refined_optimizations) return [ diff --git a/codeflash/code_utils/code_utils.py b/codeflash/code_utils/code_utils.py index 37e0dd94e..6fd4917a9 100644 --- a/codeflash/code_utils/code_utils.py +++ b/codeflash/code_utils/code_utils.py @@ -41,6 +41,63 @@ def unified_diff_strings(code1: str, code2: str, fromfile: str = "original", tof return "".join(diff) +def choose_weights(**importance: float) -> list[float]: + """Choose normalized weights from relative importance values. + + Example: + choose_weights(runtime=3, diff=1) + -> [0.75, 0.25] + + Args: + **importance: keyword args of metric=importance (relative numbers). + + Returns: + A list of weights in the same order as the arguments. + + """ + total = sum(importance.values()) + if total == 0: + raise ValueError("At least one importance value must be > 0") + + return [v / total for v in importance.values()] + + +def normalize(values: list[float]) -> list[float]: + mn, mx = min(values), max(values) + if mx == mn: + return [0.0] * len(values) + return [(v - mn) / (mx - mn) for v in values] + + +def create_score_dictionary_from_metrics(weights: list[float], *metrics: list[float]) -> dict[int, int]: + """Combine multiple metrics into a single weighted score dictionary. + + Each metric is a list of values (smaller = better). + The total score for each index is the weighted sum of its values + across all metrics: + + score[index] = Ī£ (value * weight) + + Args: + weights: A list of weights, one per metric. Larger weight = more influence. + *metrics: Lists of values (one list per metric, aligned by index). + + Returns: + A dictionary mapping each index to its combined weighted score. + + """ + if len(weights) != len(metrics): + raise ValueError("Number of weights must match number of metrics") + + combined: dict[int, float] = {} + + for weight, metric in zip(weights, metrics): + for idx, value in enumerate(metric): + combined[idx] = combined.get(idx, 0) + value * weight + + return combined + + def diff_length(a: str, b: str) -> int: """Compute the length (in characters) of the unified diff between two strings. diff --git a/codeflash/code_utils/config_consts.py b/codeflash/code_utils/config_consts.py index 6b2805fbf..4b538c17c 100644 --- a/codeflash/code_utils/config_consts.py +++ b/codeflash/code_utils/config_consts.py @@ -14,6 +14,11 @@ DEFAULT_IMPORTANCE_THRESHOLD = 0.001 N_CANDIDATES_LP = 6 +# Refinement +REFINE_ALL_THRESHOLD = 2 # when valid optimizations count is 2 or less, refine all optimizations +REFINED_CANDIDATE_RANKING_WEIGHTS = (2, 1) # (runtime, diff), runtime is more important than diff by a factor of 2 +TOP_N_REFINEMENTS = 0.45 # top 45% of valid optimizations (based on the weighted score) are refined + # LSP-specific N_CANDIDATES_LSP = 3 N_TESTS_TO_GENERATE_LSP = 2 diff --git a/codeflash/models/models.py b/codeflash/models/models.py index 0ea380059..bd66d3551 100644 --- a/codeflash/models/models.py +++ b/codeflash/models/models.py @@ -36,10 +36,10 @@ class AIServiceRefinerRequest: optimization_id: str original_source_code: str read_only_dependency_code: str - original_code_runtime: str + original_code_runtime: int optimized_source_code: str optimized_explanation: str - optimized_code_runtime: str + optimized_code_runtime: int speedup: str trace_id: str original_line_profiler_results: str diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index 3fda86489..a60b29b8b 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -29,13 +29,16 @@ replace_function_definitions_in_module, ) from codeflash.code_utils.code_utils import ( + choose_weights, cleanup_paths, create_rank_dictionary_compact, + create_score_dictionary_from_metrics, diff_length, extract_unique_errors, file_name_from_test_module_name, get_run_tmp_file, module_name_from_file_path, + normalize, restore_conftest, unified_diff_strings, ) @@ -45,7 +48,10 @@ N_CANDIDATES_EFFECTIVE, N_CANDIDATES_LP_EFFECTIVE, N_TESTS_TO_GENERATE_EFFECTIVE, + REFINE_ALL_THRESHOLD, + REFINED_CANDIDATE_RANKING_WEIGHTS, REPEAT_OPTIMIZATION_PROBABILITY, + TOP_N_REFINEMENTS, TOTAL_LOOPING_TIME_EFFECTIVE, ) from codeflash.code_utils.deduplicate_code import normalize_code @@ -124,19 +130,23 @@ def __init__( self, initial_candidates: list, future_line_profile_results: concurrent.futures.Future, - future_all_refinements: list, + all_refinements_data: list[AIServiceRefinerRequest], + ai_service_client: AiServiceClient, + executor: concurrent.futures.ThreadPoolExecutor, ) -> None: self.candidate_queue = queue.Queue() self.line_profiler_done = False self.refinement_done = False self.candidate_len = len(initial_candidates) + self.ai_service_client = ai_service_client + self.executor = executor # Initialize queue with initial candidates for candidate in initial_candidates: self.candidate_queue.put(candidate) self.future_line_profile_results = future_line_profile_results - self.future_all_refinements = future_all_refinements + self.all_refinements_data = all_refinements_data def get_next_candidate(self) -> OptimizedCandidate | None: """Get the next candidate from the queue, handling async results as needed.""" @@ -168,15 +178,45 @@ def _process_line_profiler_results(self) -> OptimizedCandidate | None: return self.get_next_candidate() + def refine_optimizations(self, request: list[AIServiceRefinerRequest]) -> concurrent.futures.Future: + return self.executor.submit(self.ai_service_client.optimize_python_code_refinement, request=request) + def _process_refinement_results(self) -> OptimizedCandidate | None: - """Process refinement results and add to queue.""" - if self.future_all_refinements: + """Process refinement results and add to queue. We generate a weighted ranking based on the runtime and diff lines and select the best (round of 45%) of valid optimizations to be refined.""" + future_refinements: list[concurrent.futures.Future] = [] + + if len(self.all_refinements_data) <= REFINE_ALL_THRESHOLD: + for data in self.all_refinements_data: + future_refinements.append(self.refine_optimizations([data])) # noqa: PERF401 + else: + diff_lens_list = [] + runtimes_list = [] + for c in self.all_refinements_data: + diff_lens_list.append(diff_length(c.original_source_code, c.optimized_source_code)) + runtimes_list.append(c.optimized_code_runtime) + + runtime_w, diff_w = REFINED_CANDIDATE_RANKING_WEIGHTS + weights = choose_weights(runtime=runtime_w, diff=diff_w) + + runtime_norm = normalize(runtimes_list) + diffs_norm = normalize(diff_lens_list) + # the lower the better + score_dict = create_score_dictionary_from_metrics(weights, runtime_norm, diffs_norm) + top_n_candidates = int((TOP_N_REFINEMENTS * len(runtimes_list)) + 0.5) + top_indecies = sorted(score_dict, key=score_dict.get)[:top_n_candidates] + + for idx in top_indecies: + data = self.all_refinements_data[idx] + future_refinements.append(self.refine_optimizations([data])) + + if future_refinements: logger.info("loading|Refining generated code for improved quality and performance...") - concurrent.futures.wait(self.future_all_refinements) + + concurrent.futures.wait(future_refinements) refinement_response = [] - for future_refinement in self.future_all_refinements: - possible_refinement = future_refinement.result() + for f in future_refinements: + possible_refinement = f.result() if len(possible_refinement) > 0: refinement_response.append(possible_refinement[0]) @@ -684,15 +724,14 @@ def process_single_candidate( original_helper_code: dict[Path, str], file_path_to_helper_classes: dict[Path, set[str]], eval_ctx: CandidateEvaluationContext, - future_all_refinements: list[concurrent.futures.Future], - ai_service_client: AiServiceClient, + all_refinements_data: list[AIServiceRefinerRequest], exp_type: str, function_references: str, ) -> BestOptimization | None: """Process a single optimization candidate. Returns the BestOptimization if the candidate is successful, None otherwise. - Updates eval_ctx with results and may append to future_all_refinements. + Updates eval_ctx with results and may append to all_refinements_data. """ # Cleanup temp files get_run_tmp_file(Path(f"test_return_values_{candidate_index}.bin")).unlink(missing_ok=True) @@ -787,14 +826,19 @@ def process_single_candidate( # Queue refinement for non-refined candidates if not candidate.optimization_id.endswith("refi"): - future_all_refinements.append( - self.refine_optimizations( - valid_optimizations=[best_optimization], - original_code_baseline=original_code_baseline, - code_context=code_context, + all_refinements_data.append( + AIServiceRefinerRequest( + optimization_id=best_optimization.candidate.optimization_id, + original_source_code=code_context.read_writable_code.markdown, + read_only_dependency_code=code_context.read_only_context_code, + original_code_runtime=original_code_baseline.runtime, + optimized_source_code=best_optimization.candidate.source_code.markdown, + optimized_explanation=best_optimization.candidate.explanation, + optimized_code_runtime=best_optimization.runtime, + speedup=f"{int(performance_gain(original_runtime_ns=original_code_baseline.runtime, optimized_runtime_ns=best_optimization.runtime) * 100)}%", trace_id=self.get_trace_id(exp_type), - ai_service_client=ai_service_client, - executor=self.executor, + original_line_profiler_results=original_code_baseline.line_profile_results["str_out"], + optimized_line_profiler_results=best_optimization.line_profiler_test_results["str_out"], function_references=function_references, ) ) @@ -830,7 +874,7 @@ def determine_best_candidate( # Initialize evaluation context and async tasks eval_ctx = CandidateEvaluationContext() - future_all_refinements: list[concurrent.futures.Future] = [] + all_refinements_data: list[AIServiceRefinerRequest] = [] ai_service_client = self.aiservice_client if exp_type == "EXP0" else self.local_aiservice_client assert ai_service_client is not None, "AI service client must be set for optimization" @@ -848,7 +892,9 @@ def determine_best_candidate( else None, ) - processor = CandidateProcessor(candidates, future_line_profile_results, future_all_refinements) + processor = CandidateProcessor( + candidates, future_line_profile_results, all_refinements_data, self.aiservice_client, self.executor + ) candidate_index = 0 # Process candidates using queue-based approach @@ -869,8 +915,7 @@ def determine_best_candidate( original_helper_code=original_helper_code, file_path_to_helper_classes=file_path_to_helper_classes, eval_ctx=eval_ctx, - future_all_refinements=future_all_refinements, - ai_service_client=ai_service_client, + all_refinements_data=all_refinements_data, exp_type=exp_type, function_references=function_references, ) @@ -903,35 +948,6 @@ def determine_best_candidate( return best_optimization - def refine_optimizations( - self, - valid_optimizations: list[BestOptimization], - original_code_baseline: OriginalCodeBaseline, - code_context: CodeOptimizationContext, - trace_id: str, - ai_service_client: AiServiceClient, - executor: concurrent.futures.ThreadPoolExecutor, - function_references: str | None = None, - ) -> concurrent.futures.Future: - request = [ - AIServiceRefinerRequest( - optimization_id=opt.candidate.optimization_id, - original_source_code=code_context.read_writable_code.markdown, - read_only_dependency_code=code_context.read_only_context_code, - original_code_runtime=humanize_runtime(original_code_baseline.runtime), - optimized_source_code=opt.candidate.source_code.markdown, - optimized_explanation=opt.candidate.explanation, - optimized_code_runtime=humanize_runtime(opt.runtime), - speedup=f"{int(performance_gain(original_runtime_ns=original_code_baseline.runtime, optimized_runtime_ns=opt.runtime) * 100)}%", - trace_id=trace_id, - original_line_profiler_results=original_code_baseline.line_profile_results["str_out"], - optimized_line_profiler_results=opt.line_profiler_test_results["str_out"], - function_references=function_references, - ) - for opt in valid_optimizations - ] - return executor.submit(ai_service_client.optimize_python_code_refinement, request=request) - def log_successful_optimization( self, explanation: Explanation, generated_tests: GeneratedTestsList, exp_type: str ) -> None: