Init
This commit is contained in:
4
src/ai_reviewer/__init__.py
Normal file
4
src/ai_reviewer/__init__.py
Normal file
@@ -0,0 +1,4 @@
|
||||
"""ai-reviewer package."""
|
||||
|
||||
__all__ = ["__version__"]
|
||||
__version__ = "0.1.0"
|
||||
BIN
src/ai_reviewer/__pycache__/__init__.cpython-311.pyc
Normal file
BIN
src/ai_reviewer/__pycache__/__init__.cpython-311.pyc
Normal file
Binary file not shown.
BIN
src/ai_reviewer/__pycache__/diff.cpython-311.pyc
Normal file
BIN
src/ai_reviewer/__pycache__/diff.cpython-311.pyc
Normal file
Binary file not shown.
BIN
src/ai_reviewer/__pycache__/prompt.cpython-311.pyc
Normal file
BIN
src/ai_reviewer/__pycache__/prompt.cpython-311.pyc
Normal file
Binary file not shown.
101
src/ai_reviewer/cli.py
Normal file
101
src/ai_reviewer/cli.py
Normal file
@@ -0,0 +1,101 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import typer
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ai_reviewer.diff import chunk_files, parse_diff, run_git_diff
|
||||
from ai_reviewer.ollama import OllamaClient
|
||||
from ai_reviewer.prompt import build_prompt
|
||||
from ai_reviewer.render import Finding, dedupe_findings, render_json, render_markdown
|
||||
|
||||
app = typer.Typer(add_completion=False)
|
||||
|
||||
|
||||
class Settings(BaseModel):
|
||||
model: str = Field(default="qwen2.5-coder:7b")
|
||||
ollama_host: str = Field(
|
||||
default_factory=lambda: os.getenv("OLLAMA_HOST", "http://localhost:11434")
|
||||
)
|
||||
max_lines: int = Field(default=350)
|
||||
|
||||
|
||||
def _parse_findings(response_text: str) -> list[Finding]:
|
||||
try:
|
||||
data = json.loads(response_text)
|
||||
except json.JSONDecodeError as exc:
|
||||
raise typer.BadParameter(f"Model returned invalid JSON: {exc}") from exc
|
||||
|
||||
raw = data.get("findings", [])
|
||||
findings: list[Finding] = []
|
||||
for item in raw:
|
||||
try:
|
||||
findings.append(
|
||||
Finding(
|
||||
file=str(item["file"]),
|
||||
hunk=str(item["hunk"]),
|
||||
snippet=str(item["snippet"]),
|
||||
message=str(item["message"]),
|
||||
label=str(item["label"]),
|
||||
)
|
||||
)
|
||||
except KeyError as exc:
|
||||
raise typer.BadParameter(f"Model response missing field: {exc}") from exc
|
||||
return findings
|
||||
|
||||
|
||||
@app.command()
|
||||
def diff(
|
||||
repo: Path = typer.Option(..., exists=True, file_okay=False, dir_okay=True),
|
||||
base: str = typer.Option(...),
|
||||
head: str = typer.Option(...),
|
||||
) -> None:
|
||||
"""Print the git diff for the given range."""
|
||||
diff_text = run_git_diff(str(repo), base, head)
|
||||
typer.echo(diff_text, nl=False)
|
||||
|
||||
|
||||
@app.command()
|
||||
def review(
|
||||
repo: Path = typer.Option(..., exists=True, file_okay=False, dir_okay=True),
|
||||
base: str = typer.Option(...),
|
||||
head: str = typer.Option(...),
|
||||
format: str = typer.Option("markdown", help="markdown|json"),
|
||||
out: Path | None = typer.Option(None, help="Output file path"),
|
||||
model: str | None = typer.Option(None, help="Ollama model"),
|
||||
ollama_host: str | None = typer.Option(None, help="Ollama host URL"),
|
||||
max_lines: int = typer.Option(350, help="Max lines per request"),
|
||||
) -> None:
|
||||
"""Review a git diff using a local Ollama model."""
|
||||
base_settings = Settings()
|
||||
settings = Settings(
|
||||
model=model or base_settings.model,
|
||||
ollama_host=ollama_host or base_settings.ollama_host,
|
||||
max_lines=max_lines,
|
||||
)
|
||||
|
||||
diff_text = run_git_diff(str(repo), base, head)
|
||||
files = parse_diff(diff_text)
|
||||
chunks = chunk_files(files, max_lines=settings.max_lines)
|
||||
|
||||
client = OllamaClient(settings.ollama_host)
|
||||
findings: list[Finding] = []
|
||||
for chunk in chunks:
|
||||
prompt = build_prompt(chunk.to_text())
|
||||
response_text = client.generate(settings.model, prompt)
|
||||
findings.extend(_parse_findings(response_text))
|
||||
|
||||
findings = dedupe_findings(findings)
|
||||
|
||||
if format not in {"markdown", "json"}:
|
||||
raise typer.BadParameter("format must be markdown or json")
|
||||
|
||||
output = render_markdown(findings) if format == "markdown" else render_json(findings)
|
||||
|
||||
if out:
|
||||
out.write_text(output, encoding="utf-8")
|
||||
else:
|
||||
typer.echo(output, nl=False)
|
||||
156
src/ai_reviewer/diff.py
Normal file
156
src/ai_reviewer/diff.py
Normal file
@@ -0,0 +1,156 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
from collections.abc import Iterable
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Hunk:
|
||||
header: str
|
||||
lines: list[str]
|
||||
|
||||
def line_count(self) -> int:
|
||||
return 1 + len(self.lines)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class FileDiff:
|
||||
path: str
|
||||
header_lines: list[str]
|
||||
hunks: list[Hunk]
|
||||
|
||||
def line_count(self) -> int:
|
||||
return len(self.header_lines) + sum(h.line_count() for h in self.hunks)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class DiffChunk:
|
||||
path: str
|
||||
header_lines: list[str]
|
||||
hunks: list[Hunk]
|
||||
|
||||
def to_text(self) -> str:
|
||||
lines: list[str] = []
|
||||
lines.extend(self.header_lines)
|
||||
for hunk in self.hunks:
|
||||
lines.append(hunk.header)
|
||||
lines.extend(hunk.lines)
|
||||
return "\n".join(lines).rstrip() + "\n"
|
||||
|
||||
def line_count(self) -> int:
|
||||
return len(self.header_lines) + sum(h.line_count() for h in self.hunks)
|
||||
|
||||
|
||||
def run_git_diff(repo: str, base: str, head: str) -> str:
|
||||
cmd = [
|
||||
"git",
|
||||
"-C",
|
||||
repo,
|
||||
"diff",
|
||||
f"{base}...{head}",
|
||||
"--unified=3",
|
||||
"--no-color",
|
||||
]
|
||||
result = subprocess.run(cmd, check=False, capture_output=True, text=True)
|
||||
if result.returncode not in (0, 1):
|
||||
raise RuntimeError(result.stderr.strip() or "git diff failed")
|
||||
return result.stdout
|
||||
|
||||
|
||||
def parse_diff(diff_text: str) -> list[FileDiff]:
|
||||
files: list[FileDiff] = []
|
||||
current_path: str | None = None
|
||||
header_lines: list[str] = []
|
||||
hunks: list[Hunk] = []
|
||||
current_hunk_header: str | None = None
|
||||
current_hunk_lines: list[str] = []
|
||||
|
||||
def flush_hunk() -> None:
|
||||
nonlocal current_hunk_header, current_hunk_lines, hunks
|
||||
if current_hunk_header is not None:
|
||||
hunks.append(Hunk(header=current_hunk_header, lines=current_hunk_lines))
|
||||
current_hunk_header = None
|
||||
current_hunk_lines = []
|
||||
|
||||
def flush_file() -> None:
|
||||
nonlocal current_path, header_lines, hunks, files
|
||||
flush_hunk()
|
||||
if current_path is not None:
|
||||
files.append(FileDiff(path=current_path, header_lines=header_lines, hunks=hunks))
|
||||
current_path = None
|
||||
header_lines = []
|
||||
hunks = []
|
||||
|
||||
for raw_line in diff_text.splitlines():
|
||||
# remove common test indentation while preserving diff markers (+/-/ )
|
||||
line = raw_line.lstrip()
|
||||
|
||||
if line.startswith("diff --git "):
|
||||
flush_file()
|
||||
header_lines = [line]
|
||||
parts = line.split()
|
||||
if len(parts) >= 4 and parts[3].startswith("b/"):
|
||||
current_path = parts[3][len("b/") :]
|
||||
else:
|
||||
current_path = None
|
||||
continue
|
||||
|
||||
if line.startswith("+++ "):
|
||||
header_lines.append(line)
|
||||
if line.startswith("+++ b/"):
|
||||
current_path = line[len("+++ b/") :]
|
||||
continue
|
||||
|
||||
if line.startswith("--- "):
|
||||
header_lines.append(line)
|
||||
continue
|
||||
|
||||
if current_path is None and line.startswith("index "):
|
||||
header_lines.append(line)
|
||||
continue
|
||||
|
||||
if line.startswith("@@ "):
|
||||
flush_hunk()
|
||||
current_hunk_header = line
|
||||
continue
|
||||
|
||||
if current_hunk_header is not None:
|
||||
# append hunk lines without test indentation
|
||||
current_hunk_lines.append(line)
|
||||
elif line.strip() != "":
|
||||
header_lines.append(line)
|
||||
|
||||
flush_file()
|
||||
return files
|
||||
|
||||
|
||||
def chunk_files(files: Iterable[FileDiff], max_lines: int = 350) -> list[DiffChunk]:
|
||||
chunks: list[DiffChunk] = []
|
||||
for file in files:
|
||||
if file.line_count() <= max_lines:
|
||||
chunks.append(
|
||||
DiffChunk(path=file.path, header_lines=file.header_lines, hunks=file.hunks)
|
||||
)
|
||||
continue
|
||||
|
||||
current_hunks: list[Hunk] = []
|
||||
current_lines = len(file.header_lines)
|
||||
for hunk in file.hunks:
|
||||
hunk_lines = hunk.line_count()
|
||||
if current_hunks and current_lines + hunk_lines > max_lines:
|
||||
chunks.append(
|
||||
DiffChunk(path=file.path, header_lines=file.header_lines, hunks=current_hunks)
|
||||
)
|
||||
current_hunks = []
|
||||
current_lines = len(file.header_lines)
|
||||
|
||||
current_hunks.append(hunk)
|
||||
current_lines += hunk_lines
|
||||
|
||||
if current_hunks:
|
||||
chunks.append(
|
||||
DiffChunk(path=file.path, header_lines=file.header_lines, hunks=current_hunks)
|
||||
)
|
||||
|
||||
return chunks
|
||||
24
src/ai_reviewer/ollama.py
Normal file
24
src/ai_reviewer/ollama.py
Normal file
@@ -0,0 +1,24 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
|
||||
|
||||
class OllamaClient:
|
||||
def __init__(self, host: str) -> None:
|
||||
self._host = host.rstrip("/")
|
||||
|
||||
def generate(self, model: str, prompt: str) -> str:
|
||||
url = f"{self._host}/api/generate"
|
||||
payload: dict[str, Any] = {
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
"stream": False,
|
||||
"options": {"temperature": 0},
|
||||
}
|
||||
with httpx.Client(timeout=60) as client:
|
||||
response = client.post(url, json=payload)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
return str(data.get("response", ""))
|
||||
37
src/ai_reviewer/prompt.py
Normal file
37
src/ai_reviewer/prompt.py
Normal file
@@ -0,0 +1,37 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from textwrap import dedent
|
||||
|
||||
|
||||
def build_prompt(diff_chunk: str) -> str:
|
||||
return (
|
||||
dedent(
|
||||
f"""
|
||||
You are a strict code reviewer. Analyze the git diff chunk and output JSON only.
|
||||
|
||||
Rules:
|
||||
- Evidence-based only. Each finding must reference a file path, a hunk header, and include a
|
||||
quoted snippet.
|
||||
- If uncertain or speculative, set label to VERIFY.
|
||||
- Do not invent context outside the diff.
|
||||
- If there are no issues, return an empty findings list.
|
||||
|
||||
Output JSON schema:
|
||||
{{
|
||||
"findings": [
|
||||
{{
|
||||
"file": "path/to/file",
|
||||
"hunk": "@@ -1,2 +1,2 @@",
|
||||
"snippet": "-old\n+new",
|
||||
"message": "concise issue description",
|
||||
"label": "ISSUE|VERIFY"
|
||||
}}
|
||||
]
|
||||
}}
|
||||
|
||||
Diff chunk:
|
||||
{diff_chunk}
|
||||
"""
|
||||
).strip()
|
||||
+ "\n"
|
||||
)
|
||||
55
src/ai_reviewer/render.py
Normal file
55
src/ai_reviewer/render.py
Normal file
@@ -0,0 +1,55 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from collections.abc import Iterable
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Finding:
|
||||
file: str
|
||||
hunk: str
|
||||
snippet: str
|
||||
message: str
|
||||
label: str
|
||||
|
||||
def key(self) -> tuple[str, str, str, str, str]:
|
||||
return (self.file, self.hunk, self.snippet, self.message, self.label)
|
||||
|
||||
|
||||
def dedupe_findings(findings: Iterable[Finding]) -> list[Finding]:
|
||||
seen = set()
|
||||
unique: list[Finding] = []
|
||||
for item in findings:
|
||||
key = item.key()
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
unique.append(item)
|
||||
return unique
|
||||
|
||||
|
||||
def render_json(findings: Iterable[Finding]) -> str:
|
||||
payload = {"findings": [item.__dict__ for item in findings]}
|
||||
return json.dumps(payload, indent=2, sort_keys=True) + "\n"
|
||||
|
||||
|
||||
def render_markdown(findings: Iterable[Finding]) -> str:
|
||||
items = list(findings)
|
||||
if not items:
|
||||
return "# Review Report\n\nNo findings.\n"
|
||||
|
||||
lines: list[str] = ["# Review Report", ""]
|
||||
for idx, item in enumerate(items, start=1):
|
||||
lines.append(f"## Finding {idx}")
|
||||
lines.append(f"- File: {item.file}")
|
||||
lines.append(f"- Hunk: {item.hunk}")
|
||||
lines.append(f"- Label: {item.label}")
|
||||
lines.append("- Snippet:")
|
||||
lines.append("```")
|
||||
lines.append(item.snippet.rstrip())
|
||||
lines.append("```")
|
||||
lines.append("")
|
||||
lines.append(item.message)
|
||||
lines.append("")
|
||||
return "\n".join(lines).rstrip() + "\n"
|
||||
Reference in New Issue
Block a user