Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
refactor: centralize PAT validation, streamline repo checks & houseke…
…eping * `.venv*` to `.gitignore` * `# type: ignore[attr-defined]` hints in `compat_typing.py` for IDE-agnostic imports * Helpful PAT string in `InvalidGitHubTokenError` for easier debugging * Bump **ruff-pre-commit** hook → `v0.12.1` * CONTRIBUTING: * Require **Python 3.9+** * Recommend signed (`-S`) commits * PAT validation now happens **only** in entry points (`utils.auth.resolve_token` for CLI/lib, `server.process_query` for Web UI) * Unified `_check_github_repo_exists` into `check_repo_exists`, replacing `curl -I` with `curl --silent --location --write-out %{http_code} -o /dev/null` * Broaden `_GITHUB_PAT_PATTERN` * `create_git_auth_header` raises `ValueError` when hostname is missing * Tests updated to expect raw HTTP-code output * Superfluous “token can be set via `GITHUB_TOKEN`” notes in docstrings * `.gitingestignore` & `.terraform` from `DEFAULT_IGNORE_PATTERNS` * Token validation inside `create_git_command` * Obsolete `test_create_git_command_invalid_token` * Adjust `test_clone.py` and `test_git_utils.py` for new status-code handling * Consolidate mocks after token-validation relocation BREAKING CHANGE: `create_git_command` no longer validates GitHub tokens; callers must ensure tokens are valid (via `validate_github_token`) before invoking lower-level git helpers.
  • Loading branch information
filipchristiansen committed Jul 1, 2025
commit ef4b4b8f74138fe7942ecbb1f5ff1fb7f6b71842
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ celerybeat.pid
# Environments
.env
.venv
.venv*
env/
venv/
ENV/
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ repos:
args: ["--disable=line-length"]

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.12.0
rev: v0.12.1
hooks:
- id: ruff-check
- id: ruff-format
Expand Down
16 changes: 13 additions & 3 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ Thanks for your interest in contributing to Gitingest! 🚀 Gitingest aims to be
cd gitingest
```

**Note**: To contrubute, ensure you have **Python 3.9 or newer** installed, as some of the `pre-commit` hooks (e.g. `pyupgrade`) require Python 3.9+.

3. Set up the development environment and install dependencies:

```bash
Expand All @@ -31,7 +33,7 @@ Thanks for your interest in contributing to Gitingest! 🚀 Gitingest aims to be
4. Create a new branch for your changes:

```bash
git checkout -b your-branch
git checkout -S -b your-branch
```

5. Make your changes. Make sure to add corresponding tests for your changes.
Expand Down Expand Up @@ -66,10 +68,18 @@ Thanks for your interest in contributing to Gitingest! 🚀 Gitingest aims to be

9. Confirm that everything is working as expected. If you encounter any issues, fix them and repeat steps 6 to 8.

10. Commit your changes:
10. Commit your changes (signed):

All commits to Gitingest must be [GPG-signed](https://docs.github.com/en/authentication/managing-commit-signature-verification) so that the project can verify the authorship of every contribution. You can either configure Git globally with:

```bash
git config --global commit.gpgSign true
```

or pass the `-S` flag as shown below.

```bash
git commit -m "Your commit message"
git commit -S -m "Your commit message"
```

If `pre-commit` raises any issues, fix them and repeat steps 6 to 9.
Expand Down
1 change: 0 additions & 1 deletion src/gitingest/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,6 @@ async def _async_main(
If ``True``, also ingest files matched by ``.gitignore`` or ``.gitingestignore`` (default: ``False``).
token : str | None
GitHub personal access token (PAT) for accessing private repositories.
Can also be set via the ``GITHUB_TOKEN`` environment variable.
output : str | None
The path where the output file will be written (default: ``digest.txt`` in current directory).
Use ``"-"`` to write to ``stdout``.
Expand Down
8 changes: 1 addition & 7 deletions src/gitingest/clone.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
ensure_git_installed,
is_github_host,
run_command,
validate_github_token,
)
from gitingest.utils.os_utils import ensure_directory
from gitingest.utils.timeout_wrapper import async_timeout
Expand All @@ -23,7 +22,7 @@


@async_timeout(DEFAULT_TIMEOUT)
async def clone_repo(config: CloneConfig, token: str | None = None) -> None:
async def clone_repo(config: CloneConfig, *, token: str | None = None) -> None:
"""Clone a repository to a local path based on the provided configuration.

This function handles the process of cloning a Git repository to the local file system.
Expand All @@ -36,7 +35,6 @@ async def clone_repo(config: CloneConfig, token: str | None = None) -> None:
The configuration for cloning the repository.
token : str | None
GitHub personal access token (PAT) for accessing private repositories.
Can also be set via the ``GITHUB_TOKEN`` environment variable.

Raises
------
Expand All @@ -51,10 +49,6 @@ async def clone_repo(config: CloneConfig, token: str | None = None) -> None:
branch: str | None = config.branch
partial_clone: bool = config.subpath != "/"

# Validate token if provided
if token and is_github_host(url):
validate_github_token(token)

# Create parent directory if it doesn't exist
await ensure_directory(Path(local_path).parent)

Expand Down
3 changes: 0 additions & 3 deletions src/gitingest/query_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ async def parse_query(
Patterns to ignore. Can be a set of strings or a single string.
token : str | None
GitHub personal access token (PAT) for accessing private repositories.
Can also be set via the ``GITHUB_TOKEN`` environment variable.

Returns
-------
Expand Down Expand Up @@ -109,7 +108,6 @@ async def _parse_remote_repo(source: str, token: str | None = None) -> Ingestion
The URL or domain-less slug to parse.
token : str | None
GitHub personal access token (PAT) for accessing private repositories.
Can also be set via the ``GITHUB_TOKEN`` environment variable.

Returns
-------
Expand Down Expand Up @@ -301,7 +299,6 @@ async def try_domains_for_user_and_repo(user_name: str, repo_name: str, token: s
The name of the repository.
token : str | None
GitHub personal access token (PAT) for accessing private repositories.
Can also be set via the ``GITHUB_TOKEN`` environment variable.

Returns
-------
Expand Down
7 changes: 6 additions & 1 deletion src/gitingest/utils/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

import os

from gitingest.utils.git_utils import validate_github_token


def resolve_token(token: str | None) -> str | None:
"""Resolve the token to use for the query.
Expand All @@ -19,4 +21,7 @@ def resolve_token(token: str | None) -> str | None:
The resolved token.

"""
return token or os.getenv("GITHUB_TOKEN")
token = token or os.getenv("GITHUB_TOKEN")
if token:
validate_github_token(token)
return token
8 changes: 4 additions & 4 deletions src/gitingest/utils/compat_typing.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
"""Compatibility layer for typing."""

try:
from typing import ParamSpec, TypeAlias # Py ≥ 3.10
from typing import ParamSpec, TypeAlias # type: ignore[attr-defined] # Py ≥ 3.10
except ImportError:
from typing_extensions import ParamSpec, TypeAlias # Py 3.8 / 3.9
from typing_extensions import ParamSpec, TypeAlias # type: ignore[attr-defined] # Py 3.8 / 3.9

try:
from typing import Annotated # Py ≥ 3.9
from typing import Annotated # type: ignore[attr-defined] # Py ≥ 3.9
except ImportError:
from typing_extensions import Annotated # Py 3.8
from typing_extensions import Annotated # type: ignore[attr-defined] # Py 3.8

__all__ = ["Annotated", "ParamSpec", "TypeAlias"]
8 changes: 3 additions & 5 deletions src/gitingest/utils/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,6 @@ def __init__(self, message: str) -> None:
class InvalidGitHubTokenError(ValueError):
"""Exception raised when a GitHub Personal Access Token is malformed."""

def __init__(self) -> None:
super().__init__(
"Invalid GitHub token format. Token should start with 'github_pat_' or 'ghp_' "
"followed by at least 36 characters of letters, numbers, and underscores.",
)
def __init__(self, token: str) -> None:
msg = f"Invalid GitHub token format: {token!r}. To generate a token, see https://github.com/settings/tokens."
super().__init__(msg)
Loading
Loading