Skip to content

Commit 8b67bce

Browse files
committed
working self-improving agents
1 parent 8fa1d40 commit 8b67bce

File tree

9 files changed

+78
-64
lines changed

9 files changed

+78
-64
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,4 @@ wheels/
1111
.venv
1212
*.svg
1313
scratch/
14+
.self-improving-agent/

.python-version

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
3.12
1+
3.13

human-seeded-evals/app/agent.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,12 @@
66
from datetime import datetime
77
from typing import AsyncIterator
88

9-
from cloudkv import AsyncCloudKV
109
from pydantic_ai import Agent, RunContext
1110
from pydantic_ai.models import Model
1211

1312
from .models import TimeRangeInputs, TimeRangeResponse
1413
from .self_improving_agent import SelfImprovingAgentModel
15-
from .self_improving_agent_storage import CloudKVStorage
14+
from .self_improving_agent_storage import LocalStorage
1615

1716

1817
@dataclass
@@ -32,13 +31,14 @@ class TimeRangeDeps:
3231

3332
@asynccontextmanager
3433
async def self_improving_model() -> AsyncIterator[SelfImprovingAgentModel]:
35-
cloudkv_read_token, cloudkv_write_token = os.environ['CLOUDKV_TOKEN'].split('.')
3634
logfire_read_token = os.environ['LOGFIRE_READ_TOKEN']
37-
async with AsyncCloudKV(cloudkv_read_token, cloudkv_write_token) as cloudkv:
38-
storage = CloudKVStorage(cloudkv)
39-
m = SelfImprovingAgentModel('anthropic:claude-sonnet-4-0', storage, logfire_read_token, 'time_range_agent')
40-
yield m
41-
await m.wait_for_coach()
35+
# cloudkv_read_token, cloudkv_write_token = os.environ['CLOUDKV_TOKEN'].split('.')
36+
# async with AsyncCloudKV(cloudkv_read_token, cloudkv_write_token) as cloudkv:
37+
# storage = CloudKVStorage(cloudkv)
38+
storage = LocalStorage()
39+
m = SelfImprovingAgentModel('anthropic:claude-sonnet-4-0', storage, logfire_read_token, 'time_range_agent')
40+
yield m
41+
await m.wait_for_coach()
4242

4343

4444
@time_range_agent.instructions

human-seeded-evals/app/main.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
logfire.configure(environment='dev')
1212

1313
logfire.instrument_pydantic_ai()
14+
logfire.instrument_httpx()
1415

1516

1617
@asynccontextmanager

human-seeded-evals/app/self_improving_agent.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -429,9 +429,9 @@ def get_instrunctions(messages: list[ModelMessage]) -> str | None:
429429

430430
def get_tools_fields(tools: list[ToolDefinition], prefix: str, description: str) -> Iterable[FieldDetails]:
431431
for t in tools:
432-
prefix = f'{prefix}.{escape_key(t.name)}'
433-
yield FieldDetails(f'{prefix}.description', description, t.description)
434-
yield from json_schema_fields(t.parameters_json_schema, f'{prefix}.parameters')
432+
tool_prefix = f'{prefix}.{escape_key(t.name)}'
433+
yield FieldDetails(f'{tool_prefix}.description', description, t.description)
434+
yield from json_schema_fields(t.parameters_json_schema, f'{tool_prefix}.parameters')
435435

436436

437437
JsonSchema = dict[str, Any]

human-seeded-evals/app/self_improving_agent_storage.py

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
1+
import asyncio
12
from contextlib import asynccontextmanager
23
from dataclasses import dataclass
34
from datetime import timedelta
4-
from typing import AsyncIterator
5+
from functools import partial
6+
from pathlib import Path
7+
from typing import AsyncIterator, Callable, ParamSpec, TypeVar
58

69
from cloudkv import AsyncCloudKV
710

@@ -30,3 +33,43 @@ async def lock(self, agent_name: str) -> AsyncIterator[bool]:
3033
await self.cloud_kv.delete(key)
3134
else:
3235
yield False
36+
37+
38+
@dataclass
39+
class LocalStorage(SelfImprovingAgentStorage):
40+
directory: Path = Path('.self-improving-agent')
41+
42+
def __post_init__(self):
43+
self.directory.mkdir(exist_ok=True)
44+
45+
async def get_patch(self, agent_name: str) -> ModelContextPatch | None:
46+
file = self.directory / f'{agent_name}.json'
47+
if file.exists():
48+
content = await asyncify(file.read_bytes)
49+
return ModelContextPatch.model_validate_json(content)
50+
51+
async def set_patch(self, agent_name: str, patch: ModelContextPatch, expires: timedelta) -> None:
52+
# note we're ignoring expiry here
53+
file = self.directory / f'{agent_name}.json'
54+
content = patch.model_dump_json(indent=2)
55+
await asyncify(file.write_text, content)
56+
57+
@asynccontextmanager
58+
async def lock(self, agent_name: str) -> AsyncIterator[bool]:
59+
file = self.directory / f'lock:{agent_name}'
60+
if not await asyncify(file.exists):
61+
await asyncify(file.touch)
62+
try:
63+
yield True
64+
finally:
65+
await asyncify(file.unlink)
66+
else:
67+
yield False
68+
69+
70+
P = ParamSpec('P')
71+
R = TypeVar('R')
72+
73+
74+
async def asyncify(func: Callable[P, R], *args: P.args, **kwargs: P.kwargs) -> R:
75+
return await asyncio.get_event_loop().run_in_executor(None, partial(func, *args, **kwargs))

human-seeded-evals/update_sia.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import asyncio
2+
3+
import logfire
4+
from app.agent import infer_time_range, self_improving_model
5+
from app.models import TimeRangeInputs
6+
7+
logfire.configure(environment='evals')
8+
9+
logfire.instrument_pydantic_ai()
10+
11+
12+
async def main():
13+
async with self_improving_model() as model:
14+
with model.blocking_context():
15+
with logfire.span('running infer_time_range with blocking coach'):
16+
await infer_time_range(TimeRangeInputs(prompt='yesterday'), model=model)
17+
18+
19+
if __name__ == '__main__':
20+
asyncio.run(main())

pyproject.toml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,6 @@ dependencies = [
1818
[dependency-groups]
1919
dev = ["pyright>=1.1.402", "ruff>=0.12.1", "watchfiles>=1.1.0"]
2020

21-
22-
[tool.uv.workspace]
23-
members = ["human-seeded-evals"]
24-
2521
[tool.ruff]
2622
line-length = 120
2723
target-version = "py39"

uv.lock

Lines changed: 0 additions & 47 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)