Skip to content

Commit 996a341

Browse files
tawsifkamaljayhack
andauthored
CG-10739: Open Hands Semantic Edit Implimentation (#476)
- Can now specify line start and end point for edit - For large files, it will replace the portion of the file from start and end using gpt-4o-mini (the draft editor) - For smaller files (< 300 lines), it will resort to modifying the entire file - added better system prompts with more information about codegen --------- Co-authored-by: jayhack <jayhack.0@gmail.com> Co-authored-by: Jay Hack <jayhack@users.noreply.github.com>
1 parent 38b6045 commit 996a341

File tree

6 files changed

+605
-125
lines changed

6 files changed

+605
-125
lines changed
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
"""Langchain tools for workspace operations."""
2+
3+
from langchain_core.tools.base import BaseTool
4+
5+
from codegen import Codebase
6+
7+
from .tools import (
8+
CommitTool,
9+
CreateFileTool,
10+
DeleteFileTool,
11+
EditFileTool,
12+
ListDirectoryTool,
13+
RevealSymbolTool,
14+
SearchTool,
15+
SemanticEditTool,
16+
ViewFileTool,
17+
)
18+
19+
__all__ = [
20+
# Tool classes
21+
"CommitTool",
22+
"CreateFileTool",
23+
"DeleteFileTool",
24+
"EditFileTool",
25+
"ListDirectoryTool",
26+
"RevealSymbolTool",
27+
"SearchTool",
28+
"SemanticEditTool",
29+
"ViewFileTool",
30+
# Helper functions
31+
"get_workspace_tools",
32+
]
33+
34+
35+
def get_workspace_tools(codebase: Codebase) -> list[BaseTool]:
36+
"""Get all workspace tools initialized with a codebase.
37+
38+
Args:
39+
codebase: The codebase to operate on
40+
41+
Returns:
42+
List of initialized Langchain tools
43+
"""
44+
return [
45+
ViewFileTool(codebase),
46+
ListDirectoryTool(codebase),
47+
SearchTool(codebase),
48+
EditFileTool(codebase),
49+
CreateFileTool(codebase),
50+
DeleteFileTool(codebase),
51+
CommitTool(codebase),
52+
RevealSymbolTool(codebase),
53+
SemanticEditTool(codebase),
54+
]

src/codegen/extensions/langchain/agent.py

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from langchain.tools import BaseTool
77
from langchain_core.chat_history import InMemoryChatMessageHistory
88
from langchain_core.messages import BaseMessage
9+
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
910
from langchain_core.runnables.history import RunnableWithMessageHistory
1011
from langchain_openai import ChatOpenAI
1112

@@ -75,8 +76,62 @@ def create_codebase_agent(
7576
GithubCreatePRReviewCommentTool(codebase),
7677
]
7778

78-
# Get the prompt to use
79-
prompt = pull("hwchase17/openai-functions-agent")
79+
prompt = ChatPromptTemplate.from_messages(
80+
[
81+
(
82+
"system",
83+
"""
84+
You are an expert software engineer with deep knowledge of code analysis, refactoring, and development best practices.
85+
You have access to a powerful set of tools from codegen that allow you to analyze and modify codebases:
86+
87+
Core Capabilities:
88+
1. Code Analysis & Navigation:
89+
- Search codebases using text or regex patterns
90+
- View file contents and metadata (functions, classes, imports)
91+
- Analyze code structure and dependencies
92+
- Reveal symbol definitions and usages
93+
94+
2. File Operations:
95+
- View, create, edit, and delete files
96+
- Rename files while updating all imports
97+
- Move symbols between files
98+
- Commit changes to disk
99+
100+
3. Semantic Editing:
101+
- Make precise, context-aware code edits
102+
- Analyze affected code structures
103+
- Preview changes before applying
104+
- Ensure code quality with linting
105+
106+
107+
4. Code Search:
108+
- Text-based and semantic search
109+
- Search within specific directories
110+
- Filter by file extensions
111+
- Get paginated results
112+
113+
Best Practices:
114+
- Always analyze code structure before making changes
115+
- Preview edits to understand their impact
116+
- Update imports and dependencies when moving code
117+
- Use semantic edits for complex changes
118+
- Commit changes after significant modifications
119+
- Maintain code quality and consistency
120+
121+
Remember: You can combine these tools to perform complex refactoring
122+
and development tasks. Always explain your approach before making changes.
123+
Important rules: If you are asked to make any edits to a file, always
124+
first view the file to understand its context and make sure you understand
125+
the impact of the changes. Only then make the changes.
126+
Ensure if specifiying line numbers, it's chosen with room (around 20
127+
lines before and 20 lines after the edit range)
128+
""",
129+
),
130+
MessagesPlaceholder("chat_history", optional=True),
131+
("human", "{input}"),
132+
MessagesPlaceholder("agent_scratchpad"),
133+
]
134+
)
80135

81136
# Create the agent
82137
agent = OpenAIFunctionsAgent(

src/codegen/extensions/langchain/tools.py

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import json
44
from typing import Callable, ClassVar, Literal, Optional
55

6-
from langchain.tools import BaseTool
6+
from langchain_core.tools.base import BaseTool
77
from pydantic import BaseModel, Field
88

99
from codegen import Codebase
@@ -18,6 +18,10 @@
1818
linear_search_issues_tool,
1919
)
2020
from codegen.extensions.tools.link_annotation import add_links_to_message
21+
from codegen.extensions.tools.reveal_symbol import reveal_symbol
22+
from codegen.extensions.tools.search import search
23+
from codegen.extensions.tools.semantic_edit import semantic_edit
24+
from codegen.extensions.tools.semantic_search import semantic_search
2125

2226
from ..tools import (
2327
commit,
@@ -30,13 +34,10 @@
3034
list_directory,
3135
move_symbol,
3236
rename_file,
33-
reveal_symbol,
34-
search,
35-
semantic_edit,
36-
semantic_search,
3737
view_file,
3838
view_pr,
3939
)
40+
from ..tools.tool_prompts import _FILE_EDIT_DESCRIPTION
4041

4142

4243
class ViewFileInput(BaseModel):
@@ -118,7 +119,7 @@ class EditFileTool(BaseTool):
118119
"""Tool for editing files."""
119120

120121
name: ClassVar[str] = "edit_file"
121-
description: ClassVar[str] = "Edit a file by replacing its entire content"
122+
description: ClassVar[str] = "Edit a file by replacing its entire content. This tool should only be used for replacing entire file contents."
122123
args_schema: ClassVar[type[BaseModel]] = EditFileInput
123124
codebase: Codebase = Field(exclude=True)
124125

@@ -233,36 +234,35 @@ def _run(
233234
return json.dumps(result, indent=2)
234235

235236

237+
_SEMANTIC_EDIT_BRIEF = """Tool for semantic editing of files.
238+
* Allows editing files by providing a draft of the new content
239+
* For large files, specify line ranges to edit
240+
* Will intelligently handle unchanged sections of code. Also supports appending to the end of a file."""
241+
242+
236243
class SemanticEditInput(BaseModel):
237244
"""Input for semantic editing."""
238245

239-
filepath: str = Field(..., description="Path to the file to edit")
240-
edit_spec: str = Field(
241-
...,
242-
description="""The edit specification showing desired changes.
243-
Must contain code blocks between '# ... existing code ...' markers.
244-
Example:
245-
# ... existing code ...
246-
def new_function():
247-
print("Hello")
248-
# ... existing code ...
249-
""",
250-
)
246+
filepath: str = Field(..., description="Path of the file relative to workspace root")
247+
edit_content: str = Field(..., description=_FILE_EDIT_DESCRIPTION)
248+
start: int = Field(default=1, description="Starting line number (1-indexed, inclusive). Default is 1.")
249+
end: int = Field(default=-1, description="Ending line number (1-indexed, inclusive). Default is -1 (end of file).")
251250

252251

253252
class SemanticEditTool(BaseTool):
254253
"""Tool for semantic editing of files."""
255254

256255
name: ClassVar[str] = "semantic_edit"
257-
description: ClassVar[str] = "Edit a file using a semantic edit specification with code blocks"
256+
description: ClassVar[str] = _SEMANTIC_EDIT_BRIEF # Short description
258257
args_schema: ClassVar[type[BaseModel]] = SemanticEditInput
259258
codebase: Codebase = Field(exclude=True)
260259

261260
def __init__(self, codebase: Codebase) -> None:
262261
super().__init__(codebase=codebase)
263262

264-
def _run(self, filepath: str, edit_spec: str) -> str:
265-
result = semantic_edit(self.codebase, filepath, edit_spec)
263+
def _run(self, filepath: str, edit_content: str, start: int = 1, end: int = -1) -> str:
264+
# Create the the draft editor mini llm
265+
result = semantic_edit(self.codebase, filepath, edit_content, start=start, end=end)
266266
return json.dumps(result, indent=2)
267267

268268

src/codegen/extensions/tools/file_operations.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,36 @@
66
from codegen.sdk.core.directory import Directory
77

88

9+
def _add_line_numbers(content: str) -> str:
10+
"""Add line numbers to source code.
11+
12+
Args:
13+
source: The source code as a string
14+
line_range: Optional tuple of (start_line, end_line) to only show specific lines.
15+
Line numbers are 1-indexed.
16+
17+
Returns:
18+
Source code with line numbers prefixed
19+
"""
20+
lines = content.splitlines()
21+
total_lines = len(lines)
22+
23+
# Calculate padding for line numbers based on max line number
24+
max_line_num = total_lines
25+
padding = len(str(max_line_num))
26+
27+
# Get the line range to display
28+
start = 0
29+
end = total_lines
30+
31+
# Add line numbers
32+
numbered_lines = []
33+
for i, line in enumerate(lines[start:end], start=start + 1):
34+
numbered_lines.append(f"{i:>{padding}}|{line}")
35+
36+
return "\n".join(numbered_lines)
37+
38+
939
def view_file(codebase: Codebase, filepath: str) -> dict[str, Any]:
1040
"""View the contents and metadata of a file.
1141
@@ -28,7 +58,7 @@ def view_file(codebase: Codebase, filepath: str) -> dict[str, Any]:
2858

2959
return {
3060
"filepath": file.filepath,
31-
"content": file.content,
61+
"content": _add_line_numbers(file.content),
3262
}
3363

3464

0 commit comments

Comments
 (0)