@@ -272,7 +272,7 @@ def cli_run(
272272 exists = True , dir_okay = True , file_okay = False , resolve_path = True
273273 ),
274274)
275- @click .argument ("eval_set_file_path " , nargs = - 1 )
275+ @click .argument ("eval_set_file_path_or_id " , nargs = - 1 )
276276@click .option ("--config_file_path" , help = "Optional. The path to config file." )
277277@click .option (
278278 "--print_detailed_results" ,
@@ -292,7 +292,7 @@ def cli_run(
292292)
293293def cli_eval (
294294 agent_module_file_path : str ,
295- eval_set_file_path : list [str ],
295+ eval_set_file_path_or_id : list [str ],
296296 config_file_path : str ,
297297 print_detailed_results : bool ,
298298 eval_storage_uri : Optional [str ] = None ,
@@ -302,40 +302,75 @@ def cli_eval(
302302 AGENT_MODULE_FILE_PATH: The path to the __init__.py file that contains a
303303 module by the name "agent". "agent" module contains a root_agent.
304304
305- EVAL_SET_FILE_PATH: You can specify one or more eval set file paths.
305+ EVAL_SET_FILE_PATH_OR_ID: You can specify one or more eval set file paths or
306+ eval set id.
306307
308+ Mixing of eval set file paths with eval set ids is not allowed.
309+
310+ *Eval Set File Path*
307311 For each file, all evals will be run by default.
308312
309313 If you want to run only specific evals from a eval set, first create a comma
310314 separated list of eval names and then add that as a suffix to the eval set
311315 file name, demarcated by a `:`.
312316
313- For example,
317+ For example, we have `sample_eval_set_file.json` file that has following the
318+ eval cases:
319+ sample_eval_set_file.json:
320+ |....... eval_1
321+ |....... eval_2
322+ |....... eval_3
323+ |....... eval_4
324+ |....... eval_5
314325
315326 sample_eval_set_file.json:eval_1,eval_2,eval_3
316327
317328 This will only run eval_1, eval_2 and eval_3 from sample_eval_set_file.json.
318329
330+ *Eval Set Id*
331+ For each eval set, all evals will be run by default.
332+
333+ If you want to run only specific evals from a eval set, first create a comma
334+ separated list of eval names and then add that as a suffix to the eval set
335+ file name, demarcated by a `:`.
336+
337+ For example, we have `sample_eval_set_id` that has following the eval cases:
338+ sample_eval_set_id:
339+ |....... eval_1
340+ |....... eval_2
341+ |....... eval_3
342+ |....... eval_4
343+ |....... eval_5
344+
345+ If we did:
346+ sample_eval_set_id:eval_1,eval_2,eval_3
347+
348+ This will only run eval_1, eval_2 and eval_3 from sample_eval_set_id.
349+
319350 CONFIG_FILE_PATH: The path to config file.
320351
321352 PRINT_DETAILED_RESULTS: Prints detailed results on the console.
322353 """
323354 envs .load_dotenv_for_agent (agent_module_file_path , "." )
324355
325356 try :
357+ from ..evaluation .base_eval_service import InferenceConfig
358+ from ..evaluation .base_eval_service import InferenceRequest
359+ from ..evaluation .eval_metrics import EvalMetric
360+ from ..evaluation .eval_result import EvalCaseResult
361+ from ..evaluation .evaluator import EvalStatus
362+ from ..evaluation .in_memory_eval_sets_manager import InMemoryEvalSetsManager
363+ from ..evaluation .local_eval_service import LocalEvalService
326364 from ..evaluation .local_eval_set_results_manager import LocalEvalSetResultsManager
327365 from ..evaluation .local_eval_sets_manager import load_eval_set_from_file
328- from ..sessions .in_memory_session_service import InMemorySessionService
329- from .cli_eval import EvalCaseResult
330- from .cli_eval import EvalMetric
331- from .cli_eval import EvalStatus
366+ from ..evaluation .local_eval_sets_manager import LocalEvalSetsManager
367+ from .cli_eval import _collect_eval_results
368+ from .cli_eval import _collect_inferences
332369 from .cli_eval import get_evaluation_criteria_or_default
333370 from .cli_eval import get_root_agent
334371 from .cli_eval import parse_and_get_evals_to_run
335- from .cli_eval import run_evals
336- from .cli_eval import try_get_reset_func
337- except ModuleNotFoundError :
338- raise click .ClickException (MISSING_EVAL_DEPENDENCIES_MESSAGE )
372+ except ModuleNotFoundError as mnf :
373+ raise click .ClickException (MISSING_EVAL_DEPENDENCIES_MESSAGE ) from mnf
339374
340375 evaluation_criteria = get_evaluation_criteria_or_default (config_file_path )
341376 eval_metrics = []
@@ -347,80 +382,103 @@ def cli_eval(
347382 print (f"Using evaluation criteria: { evaluation_criteria } " )
348383
349384 root_agent = get_root_agent (agent_module_file_path )
350- reset_func = try_get_reset_func (agent_module_file_path )
351-
352- gcs_eval_sets_manager = None
385+ app_name = os . path . basename (agent_module_file_path )
386+ agents_dir = os . path . dirname ( agent_module_file_path )
387+ eval_sets_manager = None
353388 eval_set_results_manager = None
389+
354390 if eval_storage_uri :
355391 gcs_eval_managers = evals .create_gcs_eval_managers_from_uri (
356392 eval_storage_uri
357393 )
358- gcs_eval_sets_manager = gcs_eval_managers .eval_sets_manager
394+ eval_sets_manager = gcs_eval_managers .eval_sets_manager
359395 eval_set_results_manager = gcs_eval_managers .eval_set_results_manager
360396 else :
361- eval_set_results_manager = LocalEvalSetResultsManager (
362- agents_dir = os .path .dirname (agent_module_file_path )
363- )
364- eval_set_file_path_to_evals = parse_and_get_evals_to_run (eval_set_file_path )
365- eval_set_id_to_eval_cases = {}
366-
367- # Read the eval_set files and get the cases.
368- for eval_set_file_path , eval_case_ids in eval_set_file_path_to_evals .items ():
369- if gcs_eval_sets_manager :
370- eval_set = gcs_eval_sets_manager ._load_eval_set_from_blob (
371- eval_set_file_path
372- )
373- if not eval_set :
397+ eval_set_results_manager = LocalEvalSetResultsManager (agents_dir = agents_dir )
398+
399+ inference_requests = []
400+ eval_set_file_or_id_to_evals = parse_and_get_evals_to_run (
401+ eval_set_file_path_or_id
402+ )
403+
404+ # Check if the first entry is a file that exists, if it does then we assume
405+ # rest of the entries are also files. We enforce this assumption in the if
406+ # block.
407+ if eval_set_file_or_id_to_evals and os .path .exists (
408+ list (eval_set_file_or_id_to_evals .keys ())[0 ]
409+ ):
410+ eval_sets_manager = InMemoryEvalSetsManager ()
411+
412+ # Read the eval_set files and get the cases.
413+ for (
414+ eval_set_file_path ,
415+ eval_case_ids ,
416+ ) in eval_set_file_or_id_to_evals .items ():
417+ try :
418+ eval_set = load_eval_set_from_file (
419+ eval_set_file_path , eval_set_file_path
420+ )
421+ except FileNotFoundError as fne :
374422 raise click .ClickException (
375- f"Eval set { eval_set_file_path } not found in GCS."
423+ f"`{ eval_set_file_path } ` should be a valid eval set file."
424+ ) from fne
425+
426+ eval_sets_manager .create_eval_set (
427+ app_name = app_name , eval_set_id = eval_set .eval_set_id
428+ )
429+ for eval_case in eval_set .eval_cases :
430+ eval_sets_manager .add_eval_case (
431+ app_name = app_name ,
432+ eval_set_id = eval_set .eval_set_id ,
433+ eval_case = eval_case ,
376434 )
377- else :
378- eval_set = load_eval_set_from_file (eval_set_file_path , eval_set_file_path )
379- eval_cases = eval_set .eval_cases
380-
381- if eval_case_ids :
382- # There are eval_ids that we should select.
383- eval_cases = [
384- e for e in eval_set .eval_cases if e .eval_id in eval_case_ids
385- ]
386-
387- eval_set_id_to_eval_cases [eval_set .eval_set_id ] = eval_cases
388-
389- async def _collect_eval_results () -> list [EvalCaseResult ]:
390- session_service = InMemorySessionService ()
391- eval_case_results = []
392- async for eval_case_result in run_evals (
393- eval_set_id_to_eval_cases ,
394- root_agent ,
395- reset_func ,
396- eval_metrics ,
397- session_service = session_service ,
398- ):
399- eval_case_result .session_details = await session_service .get_session (
400- app_name = os .path .basename (agent_module_file_path ),
401- user_id = eval_case_result .user_id ,
402- session_id = eval_case_result .session_id ,
435+ inference_requests .append (
436+ InferenceRequest (
437+ app_name = app_name ,
438+ eval_set_id = eval_set .eval_set_id ,
439+ eval_case_ids = eval_case_ids ,
440+ inference_config = InferenceConfig (),
441+ )
442+ )
443+ else :
444+ # We assume that what we have are eval set ids instead.
445+ eval_sets_manager = (
446+ eval_sets_manager
447+ if eval_storage_uri
448+ else LocalEvalSetsManager (agents_dir = agents_dir )
449+ )
450+
451+ for eval_set_id_key , eval_case_ids in eval_set_file_or_id_to_evals .items ():
452+ inference_requests .append (
453+ InferenceRequest (
454+ app_name = app_name ,
455+ eval_set_id = eval_set_id_key ,
456+ eval_case_ids = eval_case_ids ,
457+ inference_config = InferenceConfig (),
458+ )
403459 )
404- eval_case_results .append (eval_case_result )
405- return eval_case_results
406460
407461 try :
408- eval_results = asyncio .run (_collect_eval_results ())
409- except ModuleNotFoundError :
410- raise click .ClickException (MISSING_EVAL_DEPENDENCIES_MESSAGE )
411-
412- # Write eval set results.
413- eval_set_id_to_eval_results = collections .defaultdict (list )
414- for eval_case_result in eval_results :
415- eval_set_id = eval_case_result .eval_set_id
416- eval_set_id_to_eval_results [eval_set_id ].append (eval_case_result )
417-
418- for eval_set_id , eval_case_results in eval_set_id_to_eval_results .items ():
419- eval_set_results_manager .save_eval_set_result (
420- app_name = os .path .basename (agent_module_file_path ),
421- eval_set_id = eval_set_id ,
422- eval_case_results = eval_case_results ,
462+ eval_service = LocalEvalService (
463+ root_agent = root_agent ,
464+ eval_sets_manager = eval_sets_manager ,
465+ eval_set_results_manager = eval_set_results_manager ,
466+ )
467+
468+ inference_results = asyncio .run (
469+ _collect_inferences (
470+ inference_requests = inference_requests , eval_service = eval_service
471+ )
472+ )
473+ eval_results = asyncio .run (
474+ _collect_eval_results (
475+ inference_results = inference_results ,
476+ eval_service = eval_service ,
477+ eval_metrics = eval_metrics ,
478+ )
423479 )
480+ except ModuleNotFoundError as mnf :
481+ raise click .ClickException (MISSING_EVAL_DEPENDENCIES_MESSAGE ) from mnf
424482
425483 print ("*********************************************************************" )
426484 eval_run_summary = {}
@@ -1023,7 +1081,8 @@ def cli_deploy_agent_engine(
10231081 Example:
10241082
10251083 adk deploy agent_engine --project=[project] --region=[region]
1026- --staging_bucket=[staging_bucket] --display_name=[app_name] path/to/my_agent
1084+ --staging_bucket=[staging_bucket] --display_name=[app_name]
1085+ path/to/my_agent
10271086 """
10281087 try :
10291088 cli_deploy .to_agent_engine (
0 commit comments