Skip to content

Commit d016523

Browse files
committed
Switch to more recent Claude XML format
Refs simonw#15 Refs simonw#16 (comment)
1 parent db4a164 commit d016523

File tree

3 files changed

+38
-45
lines changed

3 files changed

+38
-45
lines changed

README.md

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -104,22 +104,26 @@ Contents of file3.txt
104104
---
105105
```
106106

107-
### XML Output
107+
### Claude XML Output
108108

109109
Anthropic has provided [specific guidelines](https://docs.anthropic.com/claude/docs/long-context-window-tips) for optimally structuring prompts to take advantage of Claude's extended context window.
110110

111111
To structure the output in this way, use the optional `--cxml` flag, which will produce output like this:
112112

113113
```xml
114114
<documents>
115-
<document path="my_directory/file1.txt">
115+
<document index="1">
116+
<source>my_directory/file1.txt</source>
117+
<document_content>
116118
Contents of file1.txt
119+
</document_content>
117120
</document>
118-
119-
<document path="my_directory/file2.txt">
121+
<document index="2">
122+
<source>my_directory/file2.txt</source>
123+
<document_content>
120124
Contents of file2.txt
125+
</document_content>
121126
</document>
122-
...
123127
</documents>
124128
```
125129

files_to_prompt/cli.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33

44
import click
55

6+
global_index = 1
7+
68

79
def should_ignore(path, gitignore_rules):
810
for rule in gitignore_rules:
@@ -39,9 +41,14 @@ def print_default(path, content):
3941

4042

4143
def print_as_xml(path, content):
42-
click.echo(f'<document path="{path}">')
44+
global global_index
45+
click.echo(f'<document index="{global_index}">')
46+
click.echo(f"<source>{path}</source>")
47+
click.echo("<document_content>")
4348
click.echo(content)
49+
click.echo("</document_content>")
4450
click.echo("</document>")
51+
global_index += 1
4552

4653

4754
def process_path(
@@ -151,6 +158,9 @@ def cli(paths, include_hidden, ignore_gitignore, ignore_patterns, claude_xml):
151158
...
152159
</documents>
153160
"""
161+
# Reset global_index for pytest
162+
global global_index
163+
global_index = 1
154164
gitignore_rules = []
155165
for path in paths:
156166
if not os.path.exists(path):
@@ -159,7 +169,6 @@ def cli(paths, include_hidden, ignore_gitignore, ignore_patterns, claude_xml):
159169
gitignore_rules.extend(read_gitignore(os.path.dirname(path)))
160170
if claude_xml and path == paths[0]:
161171
click.echo("<documents>")
162-
163172
process_path(
164173
path,
165174
include_hidden,
@@ -168,6 +177,5 @@ def cli(paths, include_hidden, ignore_gitignore, ignore_patterns, claude_xml):
168177
ignore_patterns,
169178
claude_xml,
170179
)
171-
172180
if claude_xml:
173181
click.echo("</documents>")

tests/test_files_to_prompt.py

Lines changed: 18 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import os
2+
import pytest
23

34
from click.testing import CliRunner
45

@@ -190,53 +191,33 @@ def test_binary_file_warning(tmpdir):
190191
)
191192

192193

193-
def test_xml_format_dir(tmpdir):
194+
@pytest.mark.parametrize(
195+
"args", (["test_dir"], ["test_dir/file1.txt", "test_dir/file2.txt"])
196+
)
197+
def test_xml_format_dir(tmpdir, args):
194198
runner = CliRunner()
195199
with tmpdir.as_cwd():
196200
os.makedirs("test_dir")
197201
with open("test_dir/file1.txt", "w") as f:
198-
f.write("Contents of file1")
202+
f.write("Contents of file1.txt")
199203
with open("test_dir/file2.txt", "w") as f:
200-
f.write("Contents of file2")
201-
202-
result = runner.invoke(cli, ["test_dir", "--cxml"])
203-
assert result.exit_code == 0
204-
actual = result.output
205-
expected = """
206-
<documents>
207-
<document path="test_dir/file1.txt">
208-
Contents of file1
209-
</document>
210-
<document path="test_dir/file2.txt">
211-
Contents of file2
212-
</document>
213-
</documents>
214-
"""
215-
assert expected.strip() == actual.strip()
216-
217-
218-
def test_cxml_format_multiple_paths(tmpdir):
219-
runner = CliRunner()
220-
with tmpdir.as_cwd():
221-
os.makedirs("test_dir")
222-
with open("test_dir/file1.txt", "w") as f:
223-
f.write("Contents of file1")
224-
with open("test_dir/file2.txt", "w") as f:
225-
f.write("Contents of file2")
226-
227-
result = runner.invoke(
228-
cli, ["test_dir/file1.txt", "test_dir/file2.txt", "--cxml"]
229-
)
230-
204+
f.write("Contents of file2.txt")
205+
result = runner.invoke(cli, args + ["--cxml"])
231206
assert result.exit_code == 0
232207
actual = result.output
233208
expected = """
234209
<documents>
235-
<document path="test_dir/file1.txt">
236-
Contents of file1
210+
<document index="1">
211+
<source>test_dir/file1.txt</source>
212+
<document_content>
213+
Contents of file1.txt
214+
</document_content>
237215
</document>
238-
<document path="test_dir/file2.txt">
239-
Contents of file2
216+
<document index="2">
217+
<source>test_dir/file2.txt</source>
218+
<document_content>
219+
Contents of file2.txt
220+
</document_content>
240221
</document>
241222
</documents>
242223
"""

0 commit comments

Comments
 (0)