Skip to main content
Configuration is the foundation of flexible testing. mcp-eval provides multiple ways to configure your tests, from simple YAML files to sophisticated programmatic control.

Configuration hierarchy

mcp-eval uses a layered configuration system (highest priority first):
  1. Programmatic overrides - Set in code
  2. Environment variables - Set in shell or CI
  3. Config files - mcpeval.yaml and mcpeval.secrets.yaml
  4. Defaults - Built-in sensible defaults

The MCPEvalSettings model

The complete configuration structure:
from mcp_eval.config import MCPEvalSettings  # Full settings structure settings = MCPEvalSettings(  # Judge configuration  judge={  "provider": "anthropic",  "model": "claude-3-5-sonnet-20241022",  "min_score": 0.8,  "system_prompt": "You are a helpful test judge",  "max_tokens": 2000,  "temperature": 0.0  },    # Metrics collection  metrics={  "collect_tool_calls": True,  "collect_tokens": True,  "collect_costs": True,  "collect_timings": True,  "include_thinking": False  },    # Reporting configuration  reporting={  "formats": ["json", "markdown", "html"],  "output_dir": "test-reports",  "include_traces": True,  "include_conversation": True,  "timestamp_format": "%Y%m%d_%H%M%S"  },    # Execution control  execution={  "max_concurrency": 5,  "timeout_seconds": 300,  "max_retries": 3,  "retry_delay_seconds": 5,  "fail_fast": False,  "verbose": True  },    # Default provider settings  provider="anthropic",  model="claude-3-5-sonnet-20241022",    # Default servers  default_servers=["fetch", "filesystem"],    # Default agent  default_agent="default" ) 

Loading configuration

Automatic discovery

from mcp_eval.config import load_config  # Discovers config files from current directory upward settings = load_config()  # Or specify a path settings = load_config("/path/to/project")  # Or pass a dict settings = load_config({  "provider": "openai",  "model": "gpt-4-turbo-preview" }) 

Manual loading

from mcp_eval.config import MCPEvalSettings import yaml  # Load from YAML file with open("custom_config.yaml") as f:  config_dict = yaml.safe_load(f)  settings = MCPEvalSettings(**config_dict)  # Load and merge multiple sources base_config = yaml.safe_load(open("base.yaml")) secrets = yaml.safe_load(open("secrets.yaml")) overrides = {"execution": {"verbose": True}}  # Merge configurations full_config = {**base_config, **secrets, **overrides} settings = MCPEvalSettings(**full_config) 

Updating configuration

Global updates

from mcp_eval.config import update_config, get_settings  # Update specific fields update_config({  "execution": {  "max_concurrency": 10,  "timeout_seconds": 600  },  "reporting": {  "output_dir": "custom-reports"  } })  # Get current settings current = get_settings() print(f"Timeout: {current.execution.timeout_seconds}s") 

Scoped configuration

from mcp_eval.config import use_config import contextlib  # Temporarily use different config with use_config(custom_settings):  # Tests here use custom_settings  await run_tests() # Original config restored  # Or use context manager @contextlib.contextmanager def production_config():  original = get_settings()  try:  update_config({  "provider": "anthropic",  "model": "claude-3-opus-20240229",  "execution": {"max_retries": 5}  })  yield  finally:  use_config(original)  with production_config():  await run_critical_tests() 

Agent configuration

Using named agents

from mcp_eval.config import use_agent  # Use agent defined in mcpeval.yaml use_agent("specialized_agent")  # Agents are defined in config like: # agents: # specialized_agent: # model: claude-3-opus-20240229 # provider: anthropic # instruction: "You are a specialized test agent" # server_names: ["custom_server"] 

Agent factory pattern

from mcp_eval.config import use_agent_factory from mcp_eval.agent import Agent  def create_dynamic_agent():  """Create agent based on runtime conditions."""  if os.getenv("TEST_ENV") == "production":  return Agent(  model="claude-3-opus-20240229",  instruction="Be extremely thorough"  )  else:  return Agent(  model="claude-3-5-sonnet-20241022",  instruction="Standard testing"  )  # Register the factory use_agent_factory(create_dynamic_agent) 

Direct agent objects

from mcp_eval.config import use_agent_object from mcp_eval.agent import Agent  # Create and configure agent my_agent = Agent(  model="claude-3-5-sonnet-20241022",  provider="anthropic",  instruction="""You are a security-focused test agent.  Always check for vulnerabilities and edge cases.""",  server_names=["security_scanner", "filesystem"],  temperature=0.0, # Deterministic  max_tokens=4000 )  # Use this specific agent use_agent_object(my_agent) 

Agent configuration in tests

from mcp_eval.core import task, with_agent from mcp_eval.agent import AgentConfig  # Use different agents for different tests @with_agent("fast_agent") @task("Quick test") async def test_fast(agent):  # Uses fast_agent configuration  pass  @with_agent(AgentConfig(  model="claude-3-opus-20240229",  instruction="Be extremely thorough",  max_iterations=10 )) @task("Thorough test") async def test_thorough(agent):  # Uses inline configuration  pass 

Programmatic defaults

Set global defaults programmatically:
from mcp_eval.config import ProgrammaticDefaults  # Set default agent for all tests ProgrammaticDefaults.set_default_agent(my_agent)  # Set default servers ProgrammaticDefaults.set_default_servers(["fetch", "calculator"])  # Set default provider configuration ProgrammaticDefaults.set_provider_config({  "provider": "openai",  "model": "gpt-4-turbo-preview",  "api_key": os.getenv("OPENAI_API_KEY") })  # Clear all programmatic defaults ProgrammaticDefaults.clear() 

Environment variables

Provider configuration

# API keys export ANTHROPIC_API_KEY="sk-ant-..." export OPENAI_API_KEY="sk-..." export GOOGLE_API_KEY="..."  # Provider selection export MCPEVAL_PROVIDER="anthropic" export MCPEVAL_MODEL="claude-3-5-sonnet-20241022"  # Provider-specific settings export ANTHROPIC_BASE_URL="https://api.anthropic.com" export OPENAI_ORG_ID="org-..." 

Execution control

# Timeouts and retries export MCPEVAL_TIMEOUT_SECONDS="600" export MCPEVAL_MAX_RETRIES="5" export MCPEVAL_RETRY_DELAY="10"  # Concurrency export MCPEVAL_MAX_CONCURRENCY="10"  # Verbosity export MCPEVAL_VERBOSE="true" export MCPEVAL_DEBUG="true" 

Reporting

# Output configuration export MCPEVAL_OUTPUT_DIR="/tmp/test-reports" export MCPEVAL_REPORT_FORMATS="json,html,markdown" export MCPEVAL_INCLUDE_TRACES="true" 

Configuration validation

Validate on load

from mcp_eval.config import load_config, validate_config  try:  settings = load_config()  validate_config(settings) except ValueError as e:  print(f"Invalid configuration: {e}")  # Handle invalid config 

Custom validation

def validate_custom_settings(settings: MCPEvalSettings):  """Add custom validation rules."""    # Ensure API key is set  if settings.provider == "anthropic":  if not os.getenv("ANTHROPIC_API_KEY"):  raise ValueError("Anthropic API key required")    # Validate model compatibility  if settings.judge.provider == "openai":  valid_models = ["gpt-4", "gpt-4-turbo-preview"]  if settings.judge.model not in valid_models:  raise ValueError(f"Judge model must be one of {valid_models}")    # Ensure timeout is reasonable  if settings.execution.timeout_seconds > 3600:  raise ValueError("Timeout cannot exceed 1 hour")    return True  # Use in your test setup settings = load_config() if not validate_custom_settings(settings):  sys.exit(1) 

Advanced patterns

Dynamic configuration based on environment

import os from mcp_eval.config import load_config, update_config  def configure_for_environment():  """Adjust config based on environment."""  base_config = load_config()    env = os.getenv("TEST_ENV", "development")    if env == "production":  update_config({  "provider": "anthropic",  "model": "claude-3-opus-20240229",  "execution": {  "max_retries": 5,  "timeout_seconds": 600,  "fail_fast": True  },  "judge": {  "min_score": 0.9 # Stricter in production  }  })  elif env == "ci":  update_config({  "execution": {  "max_concurrency": 2, # Limited resources in CI  "verbose": True  },  "reporting": {  "formats": ["json"], # Machine-readable only  "output_dir": "/tmp/ci-reports"  }  })  else: # development  update_config({  "execution": {  "verbose": True,  "max_retries": 1  },  "reporting": {  "formats": ["html"], # Interactive reports  }  })  configure_for_environment() 

Configuration inheritance

class BaseTestConfig:  """Base configuration for all tests."""  BASE_SETTINGS = {  "provider": "anthropic",  "model": "claude-3-5-sonnet-20241022",  "execution": {  "timeout_seconds": 300,  "max_retries": 3  }  }  class IntegrationTestConfig(BaseTestConfig):  """Config for integration tests."""  SETTINGS = {  **BaseTestConfig.BASE_SETTINGS,  "execution": {  **BaseTestConfig.BASE_SETTINGS["execution"],  "timeout_seconds": 600, # Longer timeout  },  "default_servers": ["fetch", "database", "cache"]  }  class UnitTestConfig(BaseTestConfig):  """Config for unit tests."""  SETTINGS = {  **BaseTestConfig.BASE_SETTINGS,  "execution": {  **BaseTestConfig.BASE_SETTINGS["execution"],  "timeout_seconds": 60, # Quick tests  },  "default_servers": ["mock_server"]  }  # Use in tests from mcp_eval.config import use_config  if test_type == "integration":  use_config(IntegrationTestConfig.SETTINGS) else:  use_config(UnitTestConfig.SETTINGS) 

Config hot-reloading

import watchdog.observers import watchdog.events  class ConfigReloader(watchdog.events.FileSystemEventHandler):  """Reload config when files change."""    def on_modified(self, event):  if event.src_path.endswith("mcpeval.yaml"):  print("Config changed, reloading...")  try:  new_config = load_config()  use_config(new_config)  print("✅ Config reloaded successfully")  except Exception as e:  print(f"❌ Failed to reload: {e}")  # Watch for changes observer = watchdog.observers.Observer() observer.schedule(ConfigReloader(), ".", recursive=False) observer.start() 

Best practices

Separate secrets: Always keep API keys and sensitive data in mcpeval.secrets.yaml or environment variables, never in your main config file.
Validate early: Validate your configuration at the start of your test runs to catch issues before tests begin executing.
Use environment-specific configs: Different environments (dev, staging, prod) should have different configuration profiles for appropriate testing rigor.

Debugging configuration

from mcp_eval.config import get_settings, print_config  # Print current configuration print_config()  # Or get as dict for inspection settings = get_settings() config_dict = settings.model_dump()  import json print(json.dumps(config_dict, indent=2))  # Check specific values print(f"Provider: {settings.provider}") print(f"Model: {settings.model}") print(f"Timeout: {settings.execution.timeout_seconds}s") print(f"Output dir: {settings.reporting.output_dir}") 

See also