Skip to content

Commit 8930906

Browse files
committed
feat: support anthropic extended thinking
1 parent 9059fb8 commit 8930906

File tree

4 files changed

+33
-4
lines changed

4 files changed

+33
-4
lines changed

bigcodebench/gen/util/anthropic_request.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,18 @@ def make_auto_request(client: anthropic.Client, *args, **kwargs) -> Message:
1616
try:
1717
signal.signal(signal.SIGALRM, handler)
1818
signal.alarm(100)
19-
ret = client.messages.create(*args, **kwargs)
19+
if "reasoning_budget" in kwargs and "reasoning_beta" in kwargs:
20+
ret = client.beta.messages.create(
21+
*args,
22+
**kwargs,
23+
thinking = {
24+
"type": "enabled",
25+
"budget": kwargs["reasoning_budget"],
26+
},
27+
betas=[kwargs["reasoning_beta"]]
28+
)
29+
else:
30+
ret = client.messages.create(*args, **kwargs)
2031
signal.alarm(0)
2132
except anthropic.RateLimitError:
2233
print("Rate limit exceeded. Waiting...")

bigcodebench/generate.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,11 @@ def run_codegen(
132132
temperature: float = 0.0,
133133
max_new_tokens: int = 1280,
134134
greedy: bool = False,
135+
# openai
135136
reasoning_effort: str = "medium",
137+
# anthropic
138+
reasoning_budget: int = 0,
139+
reasoning_beta: str = "output-128k-2025-02-19",
136140
strip_newlines: bool = False,
137141
direct_completion: bool = False,
138142
resume: bool = True,
@@ -173,6 +177,8 @@ def run_codegen(
173177
temperature=temperature,
174178
max_new_tokens=max_new_tokens,
175179
reasoning_effort=reasoning_effort,
180+
reasoning_budget=reasoning_budget,
181+
reasoning_beta=reasoning_beta,
176182
instruction_prefix=instruction_prefix,
177183
response_prefix=response_prefix,
178184
prefill=not skip_prefill,
@@ -186,8 +192,11 @@ def run_codegen(
186192
)
187193

188194
extra = "-" + subset if subset != "full" else ""
189-
if reasoning_effort and model.startswith("o1-") or model.startswith("o3-") or model.endswith("-reasoner"):
195+
if backend == "openai" and reasoning_effort and model.startswith("o1-") or model.startswith("o3-") or model.endswith("-reasoner"):
190196
model = model + f"--{reasoning_effort}"
197+
198+
if backend == "anthropic" and reasoning_budget and reasoning_beta:
199+
model = model + f"--{reasoning_budget}-{reasoning_beta}"
191200

192201
if skip_prefill:
193202
identifier = model.replace("/", "--") + "--skip_prefill" + f"--{revision}--bigcodebench{extra}-{split}--{backend}-{temperature}-{n_samples}-sanitized_calibrated.jsonl"

bigcodebench/provider/__init__.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,11 @@ def make_model(
99
dataset: str = "bigcodebench",
1010
temperature: float = 0.0,
1111
max_new_tokens: int = 1280,
12-
# o1 and o3 only
12+
# openai only
1313
reasoning_effort: str = "medium",
14+
# anthropic only
15+
reasoning_budget: int = 0,
16+
reasoning_beta: str = "output-128k-2025-02-19",
1417
# instruction model only
1518
instruction_prefix: str = None,
1619
response_prefix: str = None,
@@ -118,6 +121,8 @@ def make_model(
118121
split=split,
119122
temperature=temperature,
120123
max_new_tokens=max_new_tokens,
124+
reasoning_budget=reasoning_budget,
125+
reasoning_beta=reasoning_beta,
121126
instruction_prefix=instruction_prefix,
122127
response_prefix=response_prefix,
123128
)

bigcodebench/provider/anthropic.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,11 @@
99
from bigcodebench.provider.utility import make_raw_chat_prompt
1010

1111
class AnthropicDecoder(DecoderBase):
12-
def __init__(self, name: str, **kwargs) -> None:
12+
def __init__(self, name: str, reasoning_budget: int = 0, reasoning_beta: str = "output-128k-2025-02-19", **kwargs) -> None:
1313
super().__init__(name, **kwargs)
1414
self.client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_KEY"))
15+
self.reasoning_budget = reasoning_budget
16+
self.reasoning_beta = reasoning_beta
1517

1618
def codegen(
1719
self, prompts: List[str], do_sample: bool = True, num_samples: int = 200
@@ -43,6 +45,8 @@ def codegen(
4345
max_tokens=self.max_new_tokens,
4446
temperature=self.temperature,
4547
stop_sequences=self.eos,
48+
reasoning_budget=self.reasoning_budget,
49+
reasoning_beta=self.reasoning_beta,
4650
)
4751
outputs.append(ret.content[0].text)
4852
all_outputs.append(outputs)

0 commit comments

Comments
 (0)