AI-Hypercomputer
diff --git a/‎src/benchmark_attention.py‎
Lines changed: 3 additions & 3 deletions b/‎src/benchmark_attention.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/benchmark_convolution.py‎
Lines changed: 1 addition & 1 deletion b/‎src/benchmark_convolution.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/benchmark_utils.py‎
Lines changed: 2 additions & 4 deletions b/‎src/benchmark_utils.py‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎src/run_benchmark.py‎
Lines changed: 11 additions & 1 deletion b/‎src/run_benchmark.py‎
Lines changed: 11 additions & 1 deletion
@@ -156,20 +156,20 @@ def pallas_flash_attention_benchmark(
  """Benchmarks the Pallas flash attention kernel."""
 
  @partial(jax.jit, static_argnames=["causal"])
- def pallas_attention(q, k, v, causal):
+ def f(q, k, v, causal):
  return pallas_flash_attention.mha_reference(
  q, k, v, ab=None, segment_ids=None, causal=causal
  )
 
  # Generate QKV.
  q, k, v = generate_qkv(batch, seq_len, d_model, num_heads)
  # Run once
- output = pallas_attention(q, k, v, causal)
+ output = f(q, k, v, causal)
  jax.block_until_ready(output)
 
  # Run benchmark
  time_ms_list = simple_timeit(
- pallas_attention,
+ f,
  q,
  k,
  v,
 
@@ -60,7 +60,7 @@ def f(x, kernel, mode):
 
  # Time the operation
  time_ms_list = simple_timeit(
- jitted_f,
+ f,
  x,
  kernel,
  padding_mode,
 
@@ -246,12 +246,10 @@ def rename_xla_dump(
  matching_anchor_files.sort(key=os.path.getmtime, reverse=True)
  latest_anchor_file = matching_anchor_files[0]
 
- # Extract the common 'jit_f.[unique_id]' part from the anchor file.
- # This regex captures from 'jit_f.' up to the next '.' (before the specific suffix like '.before_optimizations')
  # Example: 'module_0080.jit_f.cl_747713181.before_optimizations.txt'
- # This will extract 'jit_f.cl_747713181'
+ # This will extract 'module_0080.jit_f.cl_747713181'
  filename_base = os.path.basename(latest_anchor_file)
- jit_id_match = re.search(r"(jit_f\.[^.]+)", filename_base)
+ jit_id_match = re.search(r"(module.*jit_f\.[^.]+)", filename_base)
 
  if not jit_id_match:
  print(
 
@@ -19,6 +19,8 @@
 import ray
 from concurrent.futures import ThreadPoolExecutor
 import os
+import copy
+
 
 COLLECTIVE_BENCHMARK_MAP = {
  "all_gather": "benchmark_collectives.all_gather_benchmark",
@@ -240,6 +242,7 @@ def run_single_benchmark(benchmark_config: Dict[str, Any]):
  # Run the benchmark
  calculate_metrics_results = []
  for benchmark_param in benchmark_params:
+ original_benchmark_param = copy.deepcopy(benchmark_param)
  benchmark_param = preprocess_benchmark_param(
  benchmark_param, trace_dir=trace_dir
  )
@@ -286,7 +289,7 @@ def run_single_benchmark(benchmark_config: Dict[str, Any]):
  tmp_xla_dump_dir=TMP_XLA_DUMP_DIR,
  dest_xla_dump_dir=xla_dump_dir,
  benchmark_name=benchmark_name,
- benchmark_param=filtered_benchmark_param,
+ benchmark_param=original_benchmark_param,
  )
 
  # Dump metrics to file.
@@ -305,6 +308,13 @@ def main(config_path: str, multithreaded: bool):
  if not benchmarks or not isinstance(benchmarks, list):
  raise ValueError("Configuration must contain a 'benchmarks' list.")
 
+ # Clear the tmp dirs.
+ if os.path.exists(TMP_XLA_DUMP_DIR):
+ for filename in os.listdir(TMP_XLA_DUMP_DIR):
+ file_path = os.path.join(TMP_XLA_DUMP_DIR, filename)
+ if os.path.isfile(file_path):
+ os.remove(file_path)
+
  if multithreaded:
  ray.init(
  runtime_env=ray.runtime_env.RuntimeEnv(