Skip to content

Git Operations

git

Generic git operations for CI pipelines.

Host-side git operations: clone, push, branch creation, diff inspection. All operations use subprocess calls to git.

GitDiffError

Bases: Exception

Raised when git diff fails (missing ref, not a repo, etc.).

extract_repo_url(text)

Extract a repo URL from text, validating against forge APIs.

Filters out subpaths, file extensions, and placeholder URLs. Returns the first URL that resolves to a real project, or the first unvalidated candidate if no API tokens are available.

Source code in src/agentic_ci/git.py
def extract_repo_url(text: str) -> str | None:
    """Extract a repo URL from text, validating against forge APIs.

    Filters out subpaths, file extensions, and placeholder URLs.
    Returns the first URL that resolves to a real project, or the first
    unvalidated candidate if no API tokens are available.
    """
    candidates = _collect_candidates(text, _GITLAB_URL_RE)
    has_token = bool(os.environ.get("BOT_PAT") or os.environ.get("GITLAB_TOKEN"))
    if candidates and has_token:
        for url in candidates:
            if _validate_gitlab_url(url):
                return url
    if candidates:
        return candidates[0]

    candidates = _collect_candidates(text, _GITHUB_URL_RE)
    if candidates:
        for url in candidates:
            if _validate_github_url(url):
                return url
        return candidates[0]

    return None

validate_repo_url(url)

Check that a repo URL points to an allowed host with no path traversal.

Source code in src/agentic_ci/git.py
def validate_repo_url(url: str) -> bool:
    """Check that a repo URL points to an allowed host with no path traversal."""
    if not url:
        return False
    parsed = urlparse(url)
    if parsed.scheme != "https":
        return False
    if not parsed.hostname or parsed.hostname not in ALLOWED_HOSTS:
        return False
    if parsed.username or parsed.password:
        return False
    if ".." in (parsed.path or ""):
        return False
    return True

validate_branch_exists(repo_url, branch)

Check if a branch exists on the remote repository.

Parameters:

Name Type Description Default
repo_url str

HTTPS URL of the git repository

required
branch str

Branch name to validate

required

Returns:

Type Description
bool

True if the branch exists on the remote, False otherwise

Note

Returns False for any error condition (network issues, invalid refs, etc.) to allow graceful fallback in the resolution chain.

Source code in src/agentic_ci/git.py
def validate_branch_exists(repo_url: str, branch: str) -> bool:
    """Check if a branch exists on the remote repository.

    Args:
        repo_url: HTTPS URL of the git repository
        branch: Branch name to validate

    Returns:
        True if the branch exists on the remote, False otherwise

    Note:
        Returns False for any error condition (network issues, invalid refs, etc.)
        to allow graceful fallback in the resolution chain.
    """
    if not _validate_ref(branch):
        log.warning("Invalid branch name rejected: %s", branch)
        return False

    try:
        result = subprocess.run(
            ["git", "ls-remote", "--heads", repo_url, branch],
            capture_output=True,
            text=True,
            timeout=30,
            stdin=_DEVNULL,
        )

        if result.returncode != 0:
            log.debug(
                "git ls-remote failed for %s branch %s: %s", repo_url, branch, result.stderr.strip()
            )
            return False

        output = result.stdout.strip()
        if not output:
            log.debug("Branch %s does not exist on remote %s", branch, repo_url)
            return False

        log.debug("Branch %s exists on remote %s", branch, repo_url)
        return True

    except subprocess.TimeoutExpired:
        log.warning("Branch validation timed out for %s branch %s", repo_url, branch)
        return False
    except (subprocess.CalledProcessError, FileNotFoundError) as exc:
        log.debug("Branch validation failed for %s branch %s: %s", repo_url, branch, exc)
        return False

clone_repo(url, dest, branch=None, depth=None)

Clone a repository. Returns True on success.

Source code in src/agentic_ci/git.py
def clone_repo(url: str, dest: Path, branch: str | None = None, depth: int | None = None) -> bool:
    """Clone a repository. Returns True on success."""
    if not validate_repo_url(url):
        log.error("clone_repo: invalid or disallowed URL: %s", url)
        return False
    if branch and not _validate_ref(branch):
        log.error("clone_repo: invalid branch name: %s", branch)
        return False
    cmd = [
        "git",
        "-c",
        "protocol.ext.allow=never",
        "-c",
        "protocol.file.allow=never",
        "clone",
    ]
    if depth:
        cmd += ["--depth", str(depth)]
    if branch:
        cmd += ["--branch", branch]
    cmd += ["--", url, str(dest)]
    try:
        subprocess.run(
            cmd,
            check=True,
            capture_output=True,
            text=True,
            timeout=GIT_CLONE_TIMEOUT,
            stdin=_DEVNULL,
        )
        subprocess.run(
            ["git", "config", "--global", "--add", "safe.directory", str(dest.resolve())],
            capture_output=True,
            text=True,
        )
        return True
    except subprocess.TimeoutExpired:
        log.error("git clone timed out after %ds for %s", GIT_CLONE_TIMEOUT, url)
        return False
    except subprocess.CalledProcessError as exc:
        log.error("git clone failed: %s", exc.stderr)
        return False

create_branch(repo_dir, branch_name)

Create and checkout a new branch.

Source code in src/agentic_ci/git.py
def create_branch(repo_dir: Path, branch_name: str) -> bool:
    """Create and checkout a new branch."""
    if not _validate_ref(branch_name):
        log.error("create_branch: invalid branch name: %s", branch_name)
        return False
    try:
        subprocess.run(
            ["git", "switch", "-c", branch_name],
            cwd=str(repo_dir),
            check=True,
            capture_output=True,
            text=True,
        )
        return True
    except subprocess.CalledProcessError as exc:
        log.error("git switch -c failed: %s", exc.stderr)
        return False

checkout_branch(repo_dir, branch)

Checkout an existing branch. Returns True on success.

Source code in src/agentic_ci/git.py
def checkout_branch(repo_dir: Path, branch: str) -> bool:
    """Checkout an existing branch. Returns True on success."""
    if not _validate_ref(branch):
        log.error("checkout_branch: invalid branch name: %s", branch)
        return False
    try:
        subprocess.run(
            ["git", "checkout", branch],
            cwd=str(repo_dir),
            check=True,
            capture_output=True,
            text=True,
            stdin=_DEVNULL,
        )
        return True
    except subprocess.CalledProcessError as exc:
        log.error("git checkout failed: %s", exc.stderr)
        return False
    except FileNotFoundError:
        log.error("git binary not found")
        return False

get_default_branch(repo_dir)

Detect the default branch of the remote origin.

Runs git rev-parse --abbrev-ref origin/HEAD and strips the origin/ prefix. Falls back to "main" when the remote HEAD cannot be determined.

Source code in src/agentic_ci/git.py
def get_default_branch(repo_dir: Path) -> str:
    """Detect the default branch of the remote origin.

    Runs ``git rev-parse --abbrev-ref origin/HEAD`` and strips the
    ``origin/`` prefix. Falls back to ``"main"`` when the remote HEAD
    cannot be determined.
    """
    try:
        result = subprocess.run(
            ["git", "rev-parse", "--abbrev-ref", "origin/HEAD"],
            cwd=str(repo_dir),
            check=True,
            capture_output=True,
            text=True,
            stdin=_DEVNULL,
        )
        ref = result.stdout.strip()
        if ref and ref != "origin/HEAD":
            return ref.removeprefix("origin/")
    except (subprocess.CalledProcessError, FileNotFoundError):
        pass
    return "main"

git_output(repo_dir, *args)

Run a git command and return its stripped stdout, or None on error.

This is a thin wrapper around subprocess.run for cases where the caller only needs the text output of a git command.

Source code in src/agentic_ci/git.py
def git_output(repo_dir: Path, *args: str) -> str | None:
    """Run a git command and return its stripped stdout, or None on error.

    This is a thin wrapper around ``subprocess.run`` for cases where
    the caller only needs the text output of a git command.
    """
    try:
        result = subprocess.run(
            ["git", *args],
            cwd=str(repo_dir),
            check=True,
            capture_output=True,
            text=True,
            stdin=_DEVNULL,
        )
        return result.stdout.strip()
    except (subprocess.CalledProcessError, FileNotFoundError):
        return None

push_branch(repo_dir, remote='origin', branch=None)

Push the current branch to remote. Returns True on success.

Source code in src/agentic_ci/git.py
def push_branch(repo_dir: Path, remote: str = "origin", branch: str | None = None) -> bool:
    """Push the current branch to remote. Returns True on success."""
    if not remote or remote.startswith("-") or ".." in remote or "@{" in remote:
        log.error("push_branch: invalid remote name: %s", remote)
        return False
    if not _SAFE_REMOTE_RE.match(remote):
        log.error("push_branch: invalid remote name: %s", remote)
        return False
    if branch and not _validate_ref(branch):
        log.error("push_branch: invalid branch name: %s", branch)
        return False
    if not branch:
        try:
            result = subprocess.run(
                ["git", "rev-parse", "--abbrev-ref", "HEAD"],
                cwd=str(repo_dir),
                check=True,
                capture_output=True,
                text=True,
                stdin=_DEVNULL,
            )
            branch = result.stdout.strip()
        except subprocess.CalledProcessError:
            log.error("push_branch: could not detect current branch")
            return False
        if not _validate_ref(branch):
            log.error("push_branch: detected invalid branch name: %s", branch)
            return False
    cmd = ["git", "push", "--force-with-lease", "--set-upstream", remote, branch]
    try:
        subprocess.run(
            cmd,
            cwd=str(repo_dir),
            check=True,
            capture_output=True,
            text=True,
            timeout=GIT_PUSH_TIMEOUT,
            stdin=_DEVNULL,
        )
        return True
    except subprocess.TimeoutExpired:
        log.error("git push timed out after %ds", GIT_PUSH_TIMEOUT)
        return False
    except subprocess.CalledProcessError as exc:
        log.error("git push failed: %s", exc.stderr)
        return False

setup_git_config(repo_dir, name, email)

Set local git user config.

Source code in src/agentic_ci/git.py
def setup_git_config(repo_dir: Path, name: str, email: str) -> None:
    """Set local git user config."""
    subprocess.run(
        ["git", "config", "user.name", name],
        cwd=str(repo_dir),
        check=True,
        capture_output=True,
        text=True,
    )
    subprocess.run(
        ["git", "config", "user.email", email],
        cwd=str(repo_dir),
        check=True,
        capture_output=True,
        text=True,
    )

harden_git_config(repo_dir)

Apply security hardening to git config (disable hooks, fsmonitor).

Source code in src/agentic_ci/git.py
def harden_git_config(repo_dir: Path) -> None:
    """Apply security hardening to git config (disable hooks, fsmonitor)."""
    for key, value in [
        ("core.hooksPath", "/dev/null"),
        ("core.fsmonitor", "false"),
    ]:
        subprocess.run(
            ["git", "config", key, value],
            cwd=str(repo_dir),
            check=True,
            capture_output=True,
            text=True,
        )

get_commit_info(repo_dir)

Get the latest commit info (committer, email, message, sha).

Uses committer identity (not author) so that rebased or cherry-picked commits always reflect the current git config.

Source code in src/agentic_ci/git.py
def get_commit_info(repo_dir: Path) -> dict:
    """Get the latest commit info (committer, email, message, sha).

    Uses committer identity (not author) so that rebased or
    cherry-picked commits always reflect the current git config.
    """
    fmt = "%H%n%ce%n%cn%n%s"
    result = subprocess.run(
        ["git", "log", "-1", f"--format={fmt}"],
        cwd=str(repo_dir),
        capture_output=True,
        text=True,
        check=True,
    )
    lines = result.stdout.strip().split("\n")
    if len(lines) < 4:
        return {}
    return {"sha": lines[0], "email": lines[1], "name": lines[2], "subject": lines[3]}

get_changed_files(repo_dir, base_ref='HEAD~1')

Get list of files changed relative to base_ref.

Raises GitDiffError if the git command fails.

Source code in src/agentic_ci/git.py
def get_changed_files(repo_dir: Path, base_ref: str = "HEAD~1") -> list[str]:
    """Get list of files changed relative to base_ref.

    Raises GitDiffError if the git command fails.
    """
    if not _validate_ref(base_ref):
        raise GitDiffError(f"Invalid ref name: {base_ref}")
    try:
        result = subprocess.run(
            ["git", "diff", "--name-only", base_ref],
            cwd=str(repo_dir),
            capture_output=True,
            text=True,
            check=True,
        )
        return [f for f in result.stdout.strip().split("\n") if f]
    except subprocess.CalledProcessError as exc:
        raise GitDiffError(
            f"git diff failed for base_ref={base_ref}: {exc.stderr.strip()}"
        ) from exc

strip_committed_files(repo_dir, patterns, base_ref='origin/HEAD')

Remove files matching patterns from the latest commit.

Agents can bypass .git/info/exclude by explicitly naming files in git add. This function detects any committed files that match the given fnmatch patterns and amends the commit to remove them, keeping the working-tree copies intact.

Returns the list of file paths that were stripped (empty if none matched).

Source code in src/agentic_ci/git.py
def strip_committed_files(
    repo_dir: Path,
    patterns: list[str],
    base_ref: str = "origin/HEAD",
) -> list[str]:
    """Remove files matching *patterns* from the latest commit.

    Agents can bypass ``.git/info/exclude`` by explicitly naming files in
    ``git add``.  This function detects any committed files that match the
    given fnmatch *patterns* and amends the commit to remove them, keeping
    the working-tree copies intact.

    Returns the list of file paths that were stripped (empty if none matched).
    """
    try:
        changed = get_changed_files(repo_dir, base_ref=base_ref)
    except GitDiffError:
        return []

    to_remove = []
    for filepath in changed:
        name = Path(filepath).name
        for pattern in patterns:
            if fnmatch.fnmatch(name, pattern) or fnmatch.fnmatch(filepath, pattern):
                to_remove.append(filepath)
                break

    if not to_remove:
        return []

    log.warning(
        "Stripping %d artifact file(s) from commit: %s",
        len(to_remove),
        ", ".join(to_remove),
    )
    for filepath in to_remove:
        subprocess.run(
            ["git", "rm", "--cached", "--quiet", filepath],
            cwd=str(repo_dir),
            capture_output=True,
        )
    subprocess.run(
        ["git", "commit", "--amend", "--no-edit", "--allow-empty"],
        cwd=str(repo_dir),
        capture_output=True,
    )
    return to_remove

setup_git_credentials(repo_url, *, github_token_resolver=None)

Configure git url.insteadOf for the forge hosting repo_url.

Sets up transparent credential injection so that clone_repo() and push_branch() (which use bare HTTPS URLs) can authenticate without modification.

For GitLab, reads BOT_PAT from the environment. For GitHub, calls github_token_resolver(repo_url) to obtain a short-lived token. If no resolver is provided for GitHub URLs, returns False.

Idempotent and safe to call multiple times. Returns True on success, False if credentials are unavailable.

Source code in src/agentic_ci/git.py
def setup_git_credentials(
    repo_url: str,
    *,
    github_token_resolver: Callable[[str], str | None] | None = None,
) -> bool:
    """Configure ``git url.insteadOf`` for the forge hosting *repo_url*.

    Sets up transparent credential injection so that ``clone_repo()`` and
    ``push_branch()`` (which use bare HTTPS URLs) can authenticate
    without modification.

    For **GitLab**, reads ``BOT_PAT`` from the environment.
    For **GitHub**, calls *github_token_resolver(repo_url)* to obtain a
    short-lived token. If no resolver is provided for GitHub URLs,
    returns False.

    Idempotent and safe to call multiple times. Returns True on success,
    False if credentials are unavailable.
    """
    if not repo_url:
        return False

    parsed = urlparse(repo_url)
    hostname = (parsed.hostname or "").lower()

    if hostname == "gitlab.com":
        return _setup_gitlab_credentials()
    elif hostname == "github.com":
        if github_token_resolver is None:
            log.error("No github_token_resolver provided for GitHub URL: %s", repo_url)
            return False
        return _setup_github_credentials(repo_url, github_token_resolver)

    log.info("No credential setup needed for URL: %s", repo_url)
    return True