This commit is contained in:
2026-02-02 19:54:04 +00:00
commit d55002e218
23 changed files with 672 additions and 0 deletions

156
src/ai_reviewer/diff.py Normal file
View File

@@ -0,0 +1,156 @@
from __future__ import annotations
import subprocess
from collections.abc import Iterable
from dataclasses import dataclass
@dataclass(frozen=True)
class Hunk:
header: str
lines: list[str]
def line_count(self) -> int:
return 1 + len(self.lines)
@dataclass(frozen=True)
class FileDiff:
path: str
header_lines: list[str]
hunks: list[Hunk]
def line_count(self) -> int:
return len(self.header_lines) + sum(h.line_count() for h in self.hunks)
@dataclass(frozen=True)
class DiffChunk:
path: str
header_lines: list[str]
hunks: list[Hunk]
def to_text(self) -> str:
lines: list[str] = []
lines.extend(self.header_lines)
for hunk in self.hunks:
lines.append(hunk.header)
lines.extend(hunk.lines)
return "\n".join(lines).rstrip() + "\n"
def line_count(self) -> int:
return len(self.header_lines) + sum(h.line_count() for h in self.hunks)
def run_git_diff(repo: str, base: str, head: str) -> str:
cmd = [
"git",
"-C",
repo,
"diff",
f"{base}...{head}",
"--unified=3",
"--no-color",
]
result = subprocess.run(cmd, check=False, capture_output=True, text=True)
if result.returncode not in (0, 1):
raise RuntimeError(result.stderr.strip() or "git diff failed")
return result.stdout
def parse_diff(diff_text: str) -> list[FileDiff]:
files: list[FileDiff] = []
current_path: str | None = None
header_lines: list[str] = []
hunks: list[Hunk] = []
current_hunk_header: str | None = None
current_hunk_lines: list[str] = []
def flush_hunk() -> None:
nonlocal current_hunk_header, current_hunk_lines, hunks
if current_hunk_header is not None:
hunks.append(Hunk(header=current_hunk_header, lines=current_hunk_lines))
current_hunk_header = None
current_hunk_lines = []
def flush_file() -> None:
nonlocal current_path, header_lines, hunks, files
flush_hunk()
if current_path is not None:
files.append(FileDiff(path=current_path, header_lines=header_lines, hunks=hunks))
current_path = None
header_lines = []
hunks = []
for raw_line in diff_text.splitlines():
# remove common test indentation while preserving diff markers (+/-/ )
line = raw_line.lstrip()
if line.startswith("diff --git "):
flush_file()
header_lines = [line]
parts = line.split()
if len(parts) >= 4 and parts[3].startswith("b/"):
current_path = parts[3][len("b/") :]
else:
current_path = None
continue
if line.startswith("+++ "):
header_lines.append(line)
if line.startswith("+++ b/"):
current_path = line[len("+++ b/") :]
continue
if line.startswith("--- "):
header_lines.append(line)
continue
if current_path is None and line.startswith("index "):
header_lines.append(line)
continue
if line.startswith("@@ "):
flush_hunk()
current_hunk_header = line
continue
if current_hunk_header is not None:
# append hunk lines without test indentation
current_hunk_lines.append(line)
elif line.strip() != "":
header_lines.append(line)
flush_file()
return files
def chunk_files(files: Iterable[FileDiff], max_lines: int = 350) -> list[DiffChunk]:
chunks: list[DiffChunk] = []
for file in files:
if file.line_count() <= max_lines:
chunks.append(
DiffChunk(path=file.path, header_lines=file.header_lines, hunks=file.hunks)
)
continue
current_hunks: list[Hunk] = []
current_lines = len(file.header_lines)
for hunk in file.hunks:
hunk_lines = hunk.line_count()
if current_hunks and current_lines + hunk_lines > max_lines:
chunks.append(
DiffChunk(path=file.path, header_lines=file.header_lines, hunks=current_hunks)
)
current_hunks = []
current_lines = len(file.header_lines)
current_hunks.append(hunk)
current_lines += hunk_lines
if current_hunks:
chunks.append(
DiffChunk(path=file.path, header_lines=file.header_lines, hunks=current_hunks)
)
return chunks