@@ -269,6 +269,45 @@ def generate_token_string(context_string: str) -> str:
269
269
formatted_tokens = f"{ total_gpt_tokens } "
270
270
return formatted_tokens
271
271
272
+
273
+ def ingest_single_file (path : str , query : dict ) -> Dict :
274
+ if not os .path .isfile (path ):
275
+ raise ValueError (f"Path { path } is not a file" )
276
+
277
+ file_size = os .path .getsize (path )
278
+ is_text = is_text_file (path )
279
+ if not is_text :
280
+ raise ValueError (f"File { path } is not a text file" )
281
+
282
+ content = read_file_content (path )
283
+ if file_size > query ['max_file_size' ]:
284
+ content = "[Content ignored: file too large]"
285
+
286
+ file_info = {
287
+ "path" : path .replace (query ['local_path' ], "" ),
288
+ "content" : content ,
289
+ "size" : file_size
290
+ }
291
+
292
+ summary = (
293
+ f"Repository: { query ['user_name' ]} /{ query ['repo_name' ]} \n "
294
+ f"File: { os .path .basename (path )} \n "
295
+ f"Size: { file_size :,} bytes\n "
296
+ f"Lines: { len (content .splitlines ()):,} \n "
297
+ )
298
+
299
+
300
+
301
+ files_content = create_file_content_string ([file_info ])
302
+ tree = "Directory structure:\n └── " + os .path .basename (path )
303
+
304
+
305
+ formatted_tokens = generate_token_string (files_content )
306
+ if formatted_tokens :
307
+ summary += f"\n Estimated tokens: { formatted_tokens } "
308
+ return (summary , tree , files_content )
309
+
310
+
272
311
def ingest_from_query (query : dict , ignore_patterns : List [str ] = DEFAULT_IGNORE_PATTERNS ) -> Dict :
273
312
"""Main entry point for analyzing a codebase directory or single file."""
274
313
@@ -277,43 +316,7 @@ def ingest_from_query(query: dict, ignore_patterns: List[str] = DEFAULT_IGNORE_P
277
316
raise ValueError (f"{ query ['slug' ]} cannot be found, make sure the repository is public" )
278
317
279
318
if query .get ('type' ) == 'blob' :
280
- if not os .path .isfile (path ):
281
- raise ValueError (f"Path { path } is not a file" )
282
-
283
- file_size = os .path .getsize (path )
284
- is_text = is_text_file (path )
285
- if not is_text :
286
- raise ValueError (f"File { path } is not a text file" )
287
-
288
- content = read_file_content (path )
289
- if file_size > query ['max_file_size' ]:
290
- content = "[Content ignored: file too large]"
291
-
292
- file_info = {
293
- "path" : path .replace (query ['local_path' ], "" ),
294
- "content" : content ,
295
- "size" : file_size
296
- }
297
-
298
- summary = (
299
- f"Repository: { query ['user_name' ]} /{ query ['repo_name' ]} \n "
300
- f"File: { os .path .basename (path )} \n "
301
- f"Size: { file_size :,} bytes\n "
302
- f"Lines: { len (content .splitlines ()):,} \n "
303
- )
304
-
305
-
306
-
307
- files_content = create_file_content_string ([file_info ])
308
- tree = "Directory structure:\n └── " + os .path .basename (path )
309
-
310
-
311
- print (files_content )
312
- formatted_tokens = generate_token_string (files_content )
313
- if formatted_tokens :
314
- summary += f"\n Estimated tokens: { formatted_tokens } "
315
- return (summary , tree , files_content )
316
-
319
+ return ingest_single_file (path , query )
317
320
else :
318
321
nodes = scan_directory (path , ignore_patterns , query ['local_path' ])
319
322
files = extract_files_content (query , nodes , query ['max_file_size' ])
0 commit comments