Skip to content

Commit 3ab4ad1

Browse files
authored
Add Claude 4 Sonnet to evals (#68)
* Add Claude 4 Sonnet to evals * Avoid the Braintrust proxy
1 parent 939ec4b commit 3ab4ad1

File tree

3 files changed

+11
-1
lines changed

3 files changed

+11
-1
lines changed

.github/workflows/daily_anthropic_evals.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ jobs:
4646
4747
- name: Set Anthropic models
4848
run: |
49-
echo "MODELS=claude-3-5-sonnet-latest,claude-3-7-sonnet-latest" >> $GITHUB_ENV
49+
echo "MODELS=claude-3-5-sonnet-latest,claude-3-7-sonnet-latest,claude-sonnet-4-0" >> $GITHUB_ENV
5050
5151
- name: Run evaluations
5252
env:

runner/eval_convex_coding.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,7 @@ def convex_coding_task(model: ModelTemplate, input: str):
207207
model_names = [
208208
"claude-3-5-sonnet-latest",
209209
"claude-3-7-sonnet-latest",
210+
"claude-sonnet-4-0",
210211
"gpt-4o",
211212
"o3-mini",
212213
"gemini-2.0-flash-lite",

runner/models/__init__.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,15 @@ class ModelTemplate(BaseModel):
3838
uses_system_prompt=True,
3939
provider=ModelProvider.ANTHROPIC,
4040
),
41+
ModelTemplate(
42+
name="claude-sonnet-4-0",
43+
formatted_name="Claude 4 Sonnet",
44+
max_concurrency=int(os.getenv("ANTHROPIC_CONCURRENCY", "2")),
45+
requires_chain_of_thought=True,
46+
uses_system_prompt=True,
47+
provider=ModelProvider.ANTHROPIC,
48+
override_proxy="https://api.anthropic.com/v1",
49+
),
4150
ModelTemplate(
4251
name="gpt-4o",
4352
formatted_name="GPT-4o",

0 commit comments

Comments
 (0)