Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/unit-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ jobs:
- name: install dependencies
run: uv sync

- name: Install test-only dependencies (Python 3.13)
if: matrix.python-version == '3.13'
- name: Install test-only dependencies (Python 3.9 and 3.13)
if: matrix.python-version == '3.9' || matrix.python-version == '3.13'
run: uv sync --group tests

- name: Unit tests
Expand Down
15 changes: 14 additions & 1 deletion codeflash/api/aiservice.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,8 @@ def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest]
"optimized_code_runtime": opt.optimized_code_runtime,
"speedup": opt.speedup,
"trace_id": opt.trace_id,
"function_references": opt.function_references,
"python_version": platform.python_version(),
}
for opt in request
]
Expand Down Expand Up @@ -308,6 +310,7 @@ def get_new_explanation( # noqa: D417
original_throughput: str | None = None,
optimized_throughput: str | None = None,
throughput_improvement: str | None = None,
function_references: str | None = None,
) -> str:
"""Optimize the given python code for performance by making a request to the Django endpoint.

Expand All @@ -327,6 +330,7 @@ def get_new_explanation( # noqa: D417
- original_throughput: str | None - throughput for the baseline code (operations per second)
- optimized_throughput: str | None - throughput for the optimized code (operations per second)
- throughput_improvement: str | None - throughput improvement percentage
- function_references: str | None - where the function is called in the codebase

Returns
-------
Expand All @@ -349,6 +353,7 @@ def get_new_explanation( # noqa: D417
"original_throughput": original_throughput,
"optimized_throughput": optimized_throughput,
"throughput_improvement": throughput_improvement,
"function_references": function_references,
}
logger.info("loading|Generating explanation")
console.rule()
Expand All @@ -373,7 +378,12 @@ def get_new_explanation( # noqa: D417
return ""

def generate_ranking( # noqa: D417
self, trace_id: str, diffs: list[str], optimization_ids: list[str], speedups: list[float]
self,
trace_id: str,
diffs: list[str],
optimization_ids: list[str],
speedups: list[float],
function_references: str | None = None,
) -> list[int] | None:
"""Optimize the given python code for performance by making a request to the Django endpoint.

Expand All @@ -382,6 +392,7 @@ def generate_ranking( # noqa: D417
- trace_id : unique uuid of function
- diffs : list of unified diff strings of opt candidates
- speedups : list of speedups of opt candidates
- function_references : where the function is called in the codebase

Returns
-------
Expand All @@ -394,6 +405,7 @@ def generate_ranking( # noqa: D417
"speedups": speedups,
"optimization_ids": optimization_ids,
"python_version": platform.python_version(),
"function_references": function_references,
}
logger.info("loading|Generating ranking")
console.rule()
Expand Down Expand Up @@ -594,6 +606,7 @@ def get_optimization_review(
"optimized_runtime": humanize_runtime(explanation.best_runtime_ns),
"original_runtime": humanize_runtime(explanation.original_runtime_ns),
"calling_fn_details": calling_fn_details,
"python_version": platform.python_version(),
}
console.rule()
try:
Expand Down
7 changes: 7 additions & 0 deletions codeflash/code_utils/code_extractor.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import ast
import time
from dataclasses import dataclass
from itertools import chain
from pathlib import Path
Expand Down Expand Up @@ -1138,6 +1139,7 @@ def find_specific_function_in_file(
def get_fn_references_jedi(
source_code: str, file_path: Path, project_root: Path, target_function: str, target_class: str | None
) -> list[Path]:
start_time = time.perf_counter()
function_position: CodePosition = find_specific_function_in_file(
source_code, file_path, target_function, target_class
)
Expand All @@ -1146,6 +1148,8 @@ def get_fn_references_jedi(
# Get references to the function
references = script.get_references(line=function_position.line_no, column=function_position.col_no)
# Collect unique file paths where references are found
end_time = time.perf_counter()
logger.debug(f"Jedi for function references ran in {end_time - start_time:.2f} seconds")
reference_files = set()
for ref in references:
if ref.module_path:
Expand All @@ -1163,6 +1167,7 @@ def get_fn_references_jedi(
def get_opt_review_metrics(
source_code: str, file_path: Path, qualified_name: str, project_root: Path, tests_root: Path
) -> str:
start_time = time.perf_counter()
try:
qualified_name_split = qualified_name.rsplit(".", maxsplit=1)
if len(qualified_name_split) == 1:
Expand All @@ -1176,4 +1181,6 @@ def get_opt_review_metrics(
except Exception as e:
calling_fns_details = ""
logger.debug(f"Investigate {e}")
end_time = time.perf_counter()
logger.debug(f"Got function references in {end_time - start_time:.2f} seconds")
return calling_fns_details
1 change: 1 addition & 0 deletions codeflash/models/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ class AIServiceRefinerRequest:
trace_id: str
original_line_profiler_results: str
optimized_line_profiler_results: str
function_references: str | None = None


# If the method spam is in the class Ham, which is at the top level of the module eggs in the package foo, the fully
Expand Down
53 changes: 37 additions & 16 deletions codeflash/optimization/function_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ def __init__(
) = None
n_tests = N_TESTS_TO_GENERATE_EFFECTIVE
self.executor = concurrent.futures.ThreadPoolExecutor(
max_workers=n_tests + 2 if self.experiment_id is None else n_tests + 3
max_workers=n_tests + 3 if self.experiment_id is None else n_tests + 4
)

def can_be_optimized(self) -> Result[tuple[bool, CodeOptimizationContext, dict[Path, str]], str]:
Expand Down Expand Up @@ -286,6 +286,7 @@ def generate_and_instrument_tests(
list[Path],
set[Path],
dict | None,
str,
]
]:
"""Generate and instrument tests, returning all necessary data for optimization."""
Expand Down Expand Up @@ -323,9 +324,14 @@ def generate_and_instrument_tests(

generated_tests: GeneratedTestsList
optimizations_set: OptimizationSet
count_tests, generated_tests, function_to_concolic_tests, concolic_test_str, optimizations_set = (
generated_results.unwrap()
)
(
count_tests,
generated_tests,
function_to_concolic_tests,
concolic_test_str,
optimizations_set,
function_references,
) = generated_results.unwrap()

for i, generated_test in enumerate(generated_tests.generated_tests):
with generated_test.behavior_file_path.open("w", encoding="utf8") as f:
Expand Down Expand Up @@ -371,6 +377,7 @@ def generate_and_instrument_tests(
generated_perf_test_paths,
instrumented_unittests_created_for_function,
original_conftest_content,
function_references,
)
)

Expand Down Expand Up @@ -403,6 +410,7 @@ def optimize_function(self) -> Result[BestOptimization, str]:
generated_perf_test_paths,
instrumented_unittests_created_for_function,
original_conftest_content,
function_references,
) = test_setup_result.unwrap()

baseline_setup_result = self.setup_and_establish_baseline(
Expand Down Expand Up @@ -437,6 +445,7 @@ def optimize_function(self) -> Result[BestOptimization, str]:
generated_tests=generated_tests,
test_functions_to_remove=test_functions_to_remove,
concolic_test_str=concolic_test_str,
function_references=function_references,
)

# Add function to code context hash if in gh actions
Expand All @@ -458,6 +467,7 @@ def determine_best_candidate(
original_helper_code: dict[Path, str],
file_path_to_helper_classes: dict[Path, set[str]],
exp_type: str,
function_references: str,
) -> BestOptimization | None:
best_optimization: BestOptimization | None = None
_best_runtime_until_now = original_code_baseline.runtime
Expand Down Expand Up @@ -667,6 +677,7 @@ def determine_best_candidate(
else self.function_trace_id,
ai_service_client=ai_service_client,
executor=self.executor,
function_references=function_references,
)
)
else:
Expand Down Expand Up @@ -753,6 +764,7 @@ def determine_best_candidate(
optimization_ids=optimization_ids,
speedups=speedups_list,
trace_id=self.function_trace_id[:-4] + exp_type if self.experiment_id else self.function_trace_id,
function_references=function_references,
)
concurrent.futures.wait([future_ranking])
ranking = future_ranking.result()
Expand All @@ -766,7 +778,7 @@ def determine_best_candidate(
min_key = min(overall_ranking, key=overall_ranking.get)
elif len(optimization_ids) == 1:
min_key = 0 # only one candidate in valid _opts, already returns if there are no valid candidates
else: # 0? shouldn't happen but it's there to escape potential bugs
else: # 0? shouldn't happen, but it's there to escape potential bugs
return None
best_optimization = valid_candidates_with_shorter_code[min_key]
# reassign code string which is the shortest
Expand All @@ -790,6 +802,7 @@ def refine_optimizations(
trace_id: str,
ai_service_client: AiServiceClient,
executor: concurrent.futures.ThreadPoolExecutor,
function_references: str | None = None,
) -> concurrent.futures.Future:
request = [
AIServiceRefinerRequest(
Expand All @@ -804,6 +817,7 @@ def refine_optimizations(
trace_id=trace_id,
original_line_profiler_results=original_code_baseline.line_profile_results["str_out"],
optimized_line_profiler_results=opt.line_profiler_test_results["str_out"],
function_references=function_references,
)
for opt in valid_optimizations
]
Expand Down Expand Up @@ -1089,7 +1103,7 @@ def generate_tests_and_optimizations(
generated_test_paths: list[Path],
generated_perf_test_paths: list[Path],
run_experiment: bool = False, # noqa: FBT001, FBT002
) -> Result[tuple[GeneratedTestsList, dict[str, set[FunctionCalledInTest]], OptimizationSet], str]:
) -> Result[tuple[GeneratedTestsList, dict[str, set[FunctionCalledInTest]], OptimizationSet], str, str]:
n_tests = N_TESTS_TO_GENERATE_EFFECTIVE
assert len(generated_test_paths) == n_tests
console.rule()
Expand All @@ -1116,7 +1130,15 @@ def generate_tests_and_optimizations(
future_concolic_tests = self.executor.submit(
generate_concolic_tests, self.test_cfg, self.args, self.function_to_optimize, self.function_to_optimize_ast
)
futures = [*future_tests, future_optimization_candidates, future_concolic_tests]
future_references = self.executor.submit(
get_opt_review_metrics,
self.function_to_optimize_source_code,
self.function_to_optimize.file_path,
self.function_to_optimize.qualified_name,
self.project_root,
self.test_cfg.tests_root,
)
futures = [*future_tests, future_optimization_candidates, future_concolic_tests, future_references]
if run_experiment:
future_candidates_exp = self.executor.submit(
self.local_aiservice_client.optimize_python_code,
Expand Down Expand Up @@ -1168,7 +1190,7 @@ def generate_tests_and_optimizations(
logger.warning(f"Failed to generate and instrument tests for {self.function_to_optimize.function_name}")
return Failure(f"/!\\ NO TESTS GENERATED for {self.function_to_optimize.function_name}")
function_to_concolic_tests, concolic_test_str = future_concolic_tests.result()

function_references = future_references.result()
count_tests = len(tests)
if concolic_test_str:
count_tests += 1
Expand All @@ -1182,6 +1204,7 @@ def generate_tests_and_optimizations(
function_to_concolic_tests,
concolic_test_str,
OptimizationSet(control=candidates, experiment=candidates_experiment),
function_references,
)
self.generate_and_instrument_tests_results = result
return Success(result)
Expand Down Expand Up @@ -1263,6 +1286,7 @@ def find_and_process_best_optimization(
generated_tests: GeneratedTestsList,
test_functions_to_remove: list[str],
concolic_test_str: str | None,
function_references: str,
) -> BestOptimization | None:
"""Find the best optimization candidate and process it with all required steps."""
best_optimization = None
Expand All @@ -1279,6 +1303,7 @@ def find_and_process_best_optimization(
original_helper_code=original_helper_code,
file_path_to_helper_classes=file_path_to_helper_classes,
exp_type=exp_type,
function_references=function_references,
)
ph(
"cli-optimize-function-finished",
Expand Down Expand Up @@ -1347,6 +1372,7 @@ def find_and_process_best_optimization(
exp_type,
original_helper_code,
code_context,
function_references,
)
return best_optimization

Expand All @@ -1364,6 +1390,7 @@ def process_review(
exp_type: str,
original_helper_code: dict[Path, str],
code_context: CodeOptimizationContext,
function_references: str,
) -> None:
coverage_message = (
original_code_baseline.coverage_results.build_message()
Expand Down Expand Up @@ -1430,6 +1457,7 @@ def process_review(
original_throughput=original_throughput_str,
optimized_throughput=optimized_throughput_str,
throughput_improvement=throughput_improvement_str,
function_references=function_references,
)
new_explanation = Explanation(
raw_explanation_message=new_explanation_raw_str or explanation.raw_explanation_message,
Expand Down Expand Up @@ -1466,16 +1494,9 @@ def process_review(
opt_review_response = ""
if raise_pr or staging_review:
data["root_dir"] = git_root_dir()
calling_fn_details = get_opt_review_metrics(
self.function_to_optimize_source_code,
self.function_to_optimize.file_path,
self.function_to_optimize.qualified_name,
self.project_root,
self.test_cfg.tests_root,
)
try:
opt_review_response = self.aiservice_client.get_optimization_review(
**data, calling_fn_details=calling_fn_details
**data, calling_fn_details=function_references
)
except Exception as e:
logger.debug(f"optimization review response failed, investigate {e}")
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ tests = [
"scipy>=1.13.1",
"torch>=2.8.0",
"xarray>=2024.7.0",
"eval_type_backport"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what's this needed for?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@KRRT7 for 3.9 unittests, we're using '|' for type hints which is not supported in 3.9, from the logs

TypeError: Unable to evaluate type annotation 'str | None'. If you are making use of the new typing syntax (unions using | since Python 3.10 or builtins subscripting since Python 3.9), you should either replace the use of new syntax with the existing typing constructs or install the eval_type_backport package.``` 
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we don't use these types of type hints in our cli code for the same reason. use Union

]

[tool.hatch.build.targets.sdist]
Expand Down
Loading