Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Add Python 3.9 support by using ParamSpec from typing_extensions and …
…removing match statements
  • Loading branch information
filipchristiansen committed Feb 17, 2025
commit fe81de1a58b44b741905ed04a12e7d6beeaa4e0c
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
fail-fast: true
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
python-version: ["3.10", "3.11", "3.12", "3.13"]
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]

steps:
- uses: actions/checkout@v4
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ repos:
hooks:
- id: pyupgrade
description: "Automatically upgrade syntax for newer versions."
args: [--py3-plus, --py36-plus, --py38-plus, --py39-plus, --py310-plus]
args: [--py3-plus, --py36-plus]

- repo: https://github.com/pre-commit/pygrep-hooks
rev: v1.10.0
Expand Down
12 changes: 8 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ You can also replace `hub` with `ingest` in any GitHub URL to access the corespo
- **CLI tool**: Run it as a shell command
- **Python package**: Import it in your code

## 📚 Requirements

- Python 3.9+

## 📦 Installation

``` bash
Expand Down Expand Up @@ -61,7 +65,7 @@ gitingest --help

This will write the digest in a text file (default `digest.txt`) in your current working directory.

## 🐛 Python package usage
## 🐍 Python package usage

```python
# Synchronous usage
Expand All @@ -81,7 +85,7 @@ result = asyncio.run(ingest_async("path/to/directory"))

By default, this won't write a file but can be enabled with the `output` argument.

## 🌐 Self-host
## 🐳 Self-host

1. Build the image:

Expand All @@ -104,7 +108,7 @@ If you are hosting it on a domain, you can specify the allowed hostnames via env
ALLOWED_HOSTS="example.com, localhost, 127.0.0.1"
```

## ✔️ Contributing to Gitingest
## 🤝 Contributing

### Non-technical ways to contribute

Expand All @@ -128,6 +132,6 @@ Gitingest aims to be friendly for first time contributors, with a simple python

Check out the NPM alternative 📦 Repomix: <https://github.com/yamadashy/repomix>

## Project Growth
## 🚀 Project Growth

[![Star History Chart](https://api.star-history.com/svg?repos=cyclotruc/gitingest&type=Date)](https://star-history.com/#cyclotruc/gitingest&Date)
10 changes: 4 additions & 6 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,20 @@ name = "gitingest"
version = "0.1.3"
description="CLI tool to analyze and create text dumps of codebases for LLMs"
readme = {file = "README.md", content-type = "text/markdown" }
requires-python = ">= 3.10"
requires-python = ">= 3.9"
dependencies = [
"click>=8.0.0",
"fastapi[standard]",
"python-dotenv",
"slowapi",
"starlette",
"tiktoken",
"uvicorn",
"typing_extensions; python_version < '3.10'",
]

license = {file = "LICENSE"}
authors = [{name = "Romain Courtois", email = "romain@coderamp.io"}]
classifiers=[
"Development Status :: 3 - Alpha",
"Intended Audience :: Developers",
"License :: OSI Approved :: MIT License",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,14 @@
install_requires=[
"click>=8.0.0",
"tiktoken",
"typing_extensions; python_version < '3.10'",
],
entry_points={
"console_scripts": [
"gitingest=gitingest.cli:main",
],
},
python_requires=">=3.6",
python_requires=">=3.9",
author="Romain Courtois",
author_email="romain@coderamp.io",
description="CLI tool to analyze and create text dumps of codebases for LLMs",
Expand Down
21 changes: 11 additions & 10 deletions src/gitingest/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# pylint: disable=no-value-for-parameter

import asyncio
from typing import Optional

import click

Expand All @@ -19,31 +20,31 @@
@click.option("--branch", "-b", default=None, help="Branch to clone and ingest")
def main(
source: str,
output: str | None,
output: Optional[str],
max_size: int,
exclude_pattern: tuple[str, ...],
include_pattern: tuple[str, ...],
branch: str | None,
branch: Optional[str],
):
"""
Main entry point for the CLI. This function is called when the CLI is run as a script.
Main entry point for the CLI. This function is called when the CLI is run as a script.

It calls the async main function to run the command.

Parameters
----------
source : str
The source directory or repository to analyze.
output : str | None
output : Optional[str]
The path where the output file will be written. If not specified, the output will be written
to a file named `<repo_name>.txt` in the current directory.
max_size : int
The maximum file size to process, in bytes. Files larger than this size will be ignored.
exclude_pattern : tuple[str, ...]
A tuple of patterns to exclude during the analysis. Files matching these patterns will be ignored.
A tuple of patterns to exclude during the analysis. Files matching these patterns will be ignored.
include_pattern : tuple[str, ...]
A tuple of patterns to include during the analysis. Only files matching these patterns will be processed.
branch : str | None
branch : Optional[str]
The branch to clone (optional).
"""
# Main entry point for the CLI. This function is called when the CLI is run as a script.
Expand All @@ -52,11 +53,11 @@ def main(

async def _async_main(
source: str,
output: str | None,
output: Optional[str],
max_size: int,
exclude_pattern: tuple[str, ...],
include_pattern: tuple[str, ...],
branch: str | None,
branch: Optional[str],
) -> None:
"""
Analyze a directory or repository and create a text dump of its contents.
Expand All @@ -68,7 +69,7 @@ async def _async_main(
----------
source : str
The source directory or repository to analyze.
output : str | None
output : Optional[str]
The path where the output file will be written. If not specified, the output will be written
to a file named `<repo_name>.txt` in the current directory.
max_size : int
Expand All @@ -77,7 +78,7 @@ async def _async_main(
A tuple of patterns to exclude during the analysis. Files matching these patterns will be ignored.
include_pattern : tuple[str, ...]
A tuple of patterns to include during the analysis. Only files matching these patterns will be processed.
branch : str | None
branch : Optional[str]
The branch to clone (optional).

Raises
Expand Down
22 changes: 10 additions & 12 deletions src/gitingest/notebook_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import warnings
from itertools import chain
from pathlib import Path
from typing import Any
from typing import Any, Optional

from gitingest.exceptions import InvalidNotebookError

Expand Down Expand Up @@ -63,7 +63,7 @@ def process_notebook(file: Path, include_output: bool = True) -> str:
return "\n\n".join(result) + "\n"


def _process_cell(cell: dict[str, Any], include_output: bool) -> str | None:
def _process_cell(cell: dict[str, Any], include_output: bool) -> Optional[str]:
"""
Process a Jupyter notebook cell and return the cell content as a string.

Expand All @@ -76,7 +76,7 @@ def _process_cell(cell: dict[str, Any], include_output: bool) -> str | None:

Returns
-------
str | None
Optional[str]
The cell content as a string, or None if the cell is empty.

Raises
Expand Down Expand Up @@ -139,15 +139,13 @@ def _extract_output(output: dict[str, Any]) -> list[str]:
"""
output_type = output["output_type"]

match output_type:
case "stream":
return output["text"]
if output_type == "stream":
return output["text"]

case "execute_result" | "display_data":
return output["data"]["text/plain"]
if output_type in ("execute_result", "display_data"):
return output["data"]["text/plain"]

case "error":
return [f"Error: {output['ename']}: {output['evalue']}"]
if output_type == "error":
return [f"Error: {output['ename']}: {output['evalue']}"]

case _:
raise ValueError(f"Unknown output type: {output_type}")
raise ValueError(f"Unknown output type: {output_type}")
16 changes: 8 additions & 8 deletions src/gitingest/query_ingestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import platform
from fnmatch import fnmatch
from pathlib import Path
from typing import Any
from typing import Any, Optional, Union

import tiktoken

Expand Down Expand Up @@ -42,7 +42,7 @@ def _normalize_path(path: Path) -> Path:
return Path(os.path.normpath(str(path)))


def _normalize_path_str(path: str | Path) -> str:
def _normalize_path_str(path: Union[Path, str]) -> str:
"""
Convert path to string with forward slashes for consistent output.

Expand Down Expand Up @@ -293,10 +293,10 @@ def _sort_children(children: list[dict[str, Any]]) -> list[dict[str, Any]]:
def _scan_directory(
path: Path,
query: ParsedQuery,
seen_paths: set[Path] | None = None,
seen_paths: Optional[set[Path]] = None,
depth: int = 0,
stats: dict[str, int] | None = None,
) -> dict[str, Any] | None:
stats: Optional[dict[str, int]] = None,
) -> Optional[dict[str, Any]]:
"""
Recursively analyze a directory and its contents with safety limits.

Expand Down Expand Up @@ -573,7 +573,7 @@ def _process_item(
def _extract_files_content(
query: ParsedQuery,
node: dict[str, Any],
files: list[dict[str, Any]] | None = None,
files: Optional[list[dict[str, Any]]] = None,
) -> list[dict[str, Any]]:
"""
Recursively collect all text files with their contents.
Expand Down Expand Up @@ -733,7 +733,7 @@ def _create_tree_structure(query: ParsedQuery, node: dict[str, Any], prefix: str
return tree


def _generate_token_string(context_string: str) -> str | None:
def _generate_token_string(context_string: str) -> Optional[str]:
"""
Return the number of tokens in a text string.

Expand All @@ -747,7 +747,7 @@ def _generate_token_string(context_string: str) -> str | None:

Returns
-------
str | None
Optional[str]
The formatted number of tokens as a string (e.g., '1.2k', '1.2M'), or `None` if an error occurs.
"""
try:
Expand Down
33 changes: 17 additions & 16 deletions src/gitingest/query_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import warnings
from dataclasses import dataclass
from pathlib import Path
from typing import Optional, Union
from urllib.parse import unquote, urlparse

from gitingest.config import MAX_FILE_SIZE, TMP_BASE_PATH
Expand All @@ -32,28 +33,28 @@ class ParsedQuery: # pylint: disable=too-many-instance-attributes
Dataclass to store the parsed details of the repository or file path.
"""

user_name: str | None
repo_name: str | None
user_name: Optional[str]
repo_name: Optional[str]
subpath: str
local_path: Path
url: str | None
url: Optional[str]
slug: str
id: str
type: str | None = None
branch: str | None = None
commit: str | None = None
type: Optional[str] = None
branch: Optional[str] = None
commit: Optional[str] = None
max_file_size: int = MAX_FILE_SIZE
ignore_patterns: set[str] | None = None
include_patterns: set[str] | None = None
pattern_type: str | None = None
ignore_patterns: Optional[set[str]] = None
include_patterns: Optional[set[str]] = None
pattern_type: Optional[str] = None


async def parse_query(
source: str,
max_file_size: int,
from_web: bool,
include_patterns: set[str] | str | None = None,
ignore_patterns: set[str] | str | None = None,
include_patterns: Optional[Union[str, set[str]]] = None,
ignore_patterns: Optional[Union[str, set[str]]] = None,
) -> ParsedQuery:
"""
Parse the input source (URL or path) to extract relevant details for the query.
Expand All @@ -70,9 +71,9 @@ async def parse_query(
The maximum file size in bytes to include.
from_web : bool
Flag indicating whether the source is a web URL.
include_patterns : set[str] | str | None, optional
include_patterns : Union[str, set[str]], optional
Patterns to include, by default None. Can be a set of strings or a single string.
ignore_patterns : set[str] | str | None, optional
ignore_patterns : Union[str, set[str]], optional
Patterns to ignore, by default None. Can be a set of strings or a single string.

Returns
Expand Down Expand Up @@ -208,7 +209,7 @@ async def _parse_repo_source(source: str) -> ParsedQuery:
return parsed


async def _configure_branch_and_subpath(remaining_parts: list[str], url: str) -> str | None:
async def _configure_branch_and_subpath(remaining_parts: list[str], url: str) -> Optional[str]:
"""
Configure the branch and subpath based on the remaining parts of the URL.
Parameters
Expand All @@ -219,7 +220,7 @@ async def _configure_branch_and_subpath(remaining_parts: list[str], url: str) ->
The URL of the repository.
Returns
-------
str | None
str, optional
The branch name if found, otherwise None.

"""
Expand Down Expand Up @@ -283,7 +284,7 @@ def _normalize_pattern(pattern: str) -> str:
return pattern


def _parse_patterns(pattern: set[str] | str) -> set[str]:
def _parse_patterns(pattern: Union[str, set[str]]) -> set[str]:
"""
Parse and validate file/directory patterns for inclusion or exclusion.

Expand Down
Loading