feat(worktree): git worktree per task to isolate shared /repos (ORCH-2 / S-4)

- add src/git_worktree.py: ensure/remove/get_worktree_path
- config: worktrees_dir=/repos/_wt
- launcher: agent runs in per-branch worktree; task-file + commit/push in worktree; no shared checkout
- qg/checks: read artifacts + run make test from worktree (branch arg, backward-compatible)
- webhooks/plane: pass branch into QG dispatch; review fallback from worktree
- webhooks/gitea: keep read-only branch --contains in main clone (documented)
- tests: test_git_worktree.py (isolation) + update test_launcher write-task-file
- docs: ARCHITECTURE worktree section + BUGFIXES_2026-06-02_ORCH2

Preserves B-1/B-2/S-1/S-5 fixes (paths now point at worktree).
This commit is contained in:
Dev Agent
2026-06-02 21:12:06 +03:00
parent 66a37612fd
commit 1ebe8afc23
10 changed files with 474 additions and 89 deletions

View File

@@ -6,6 +6,7 @@ import signal
from ..config import settings
from ..db import get_db, get_task_by_repo_branch, update_task_stage
from ..stages import get_next_stage, get_qg_for_stage, get_agent_for_stage
from ..git_worktree import ensure_worktree, get_worktree_path
from ..qg.checks import QG_CHECKS
from ..notifications import notify_stage_change, notify_qg_failure, notify_agent_started, notify_agent_finished, notify_approve_requested
from ..plane_sync import notify_stage_change as plane_notify_stage, add_comment as plane_add_comment
@@ -71,15 +72,22 @@ class AgentLauncher:
if not config:
raise ValueError(f"Unknown agent: {agent}")
# Container-local path (repos mounted at /repos)
# Main clone lives at /repos/<repo>; the agent works in an isolated worktree
# (ORCH-2 / S-4) so concurrent tasks never fight over a shared checkout.
local_repo_path = os.path.join(settings.repos_dir, repo)
if not os.path.isdir(local_repo_path):
raise FileNotFoundError(f"Repo not found: {local_repo_path}")
# Write task file if content provided (B-1: direct write to mounted /repos, no docker)
# Determine branch (needed before we touch the worktree / task file).
_br_row = get_db().execute("SELECT branch FROM tasks WHERE id=?", (task_id,)).fetchone() if task_id else None
agent_branch = _br_row[0] if _br_row else "main"
# Ensure the per-branch worktree exists and is on the right branch.
work_path = ensure_worktree(repo, agent_branch)
# Write task file if content provided (B-1: direct write; now into the worktree).
if task_content:
self._write_task_file(repo, config["task_file"], task_content)
self._write_task_file(repo, agent_branch, config["task_file"], task_content)
# Record run in DB
conn = get_db()
@@ -99,15 +107,13 @@ class AgentLauncher:
system_prompt = config["system_prompt"]
allowed_tools = config["allowed_tools"]
# Determine branch for checkout
_br_row = get_db().execute("SELECT branch FROM tasks WHERE id=?", (task_id,)).fetchone() if task_id else None
agent_branch = _br_row[0] if _br_row else "main"
model = config.get("model", "")
model_flag = f"--model {model} " if model else ""
# No git fetch/checkout here: ensure_worktree() already put the worktree on
# the right branch. The agent simply runs inside its isolated work_path.
cmd = (
f'cd {local_repo_path} && git fetch origin 2>/dev/null; git checkout {agent_branch} 2>/dev/null || git checkout -b {agent_branch} origin/{agent_branch} 2>/dev/null; '
f'cd {work_path} && '
f'{self.CLAUDE_BIN} --print '
f'{model_flag}'
f'"$(cat {task_file})" '
@@ -219,8 +225,10 @@ class AgentLauncher:
notify_agent_finished(run_id, agent, exit_code, task_id=_task_id, duration_s=_duration_s)
# Commit and push any changes
repo_path = os.path.join(settings.repos_dir, repo)
# Commit and push any changes — in the per-branch worktree (ORCH-2 / S-4),
# NOT in the shared /repos/<repo>. The worktree is already on `branch`
# (ensure_worktree did the checkout), so no checkout is needed here.
repo_path = get_worktree_path(repo, branch)
try:
git_env = {
**os.environ,
@@ -230,20 +238,6 @@ class AgentLauncher:
"GIT_COMMITTER_NAME": "claude-bot",
"GIT_COMMITTER_EMAIL": "claude-bot@mva154.local",
}
# Checkout feature branch before committing
subprocess.run(
["git", "-C", repo_path, "fetch", "origin"],
capture_output=True, text=True, timeout=30, env=git_env
)
checkout_result = subprocess.run(
["git", "-C", repo_path, "checkout", branch],
capture_output=True, text=True, timeout=30, env=git_env
)
if checkout_result.returncode != 0:
subprocess.run(
["git", "-C", repo_path, "checkout", "-b", branch, f"origin/{branch}"],
capture_output=True, text=True, timeout=30, env=git_env
)
result = subprocess.run(
["git", "-C", repo_path, "status", "--porcelain"],
capture_output=True, text=True, timeout=10, env=git_env
@@ -351,7 +345,7 @@ class AgentLauncher:
if agent == "analyst" and qg_name == "check_analysis_approved" and work_item_id:
files_check = QG_CHECKS.get("check_analysis_complete")
if files_check:
files_ok, _ = files_check(repo, work_item_id)
files_ok, _ = files_check(repo, work_item_id, branch)
if files_ok:
# Full artifacts ready -> In Review
from ..plane_sync import set_issue_in_review
@@ -364,10 +358,10 @@ class AgentLauncher:
notify_approve_requested(task_id)
logger.info(f"Task {task_id}: analyst finished, requested :approved: in Plane")
else:
# Check if questions file exists
# Check if questions file exists (in the task worktree)
import os as _os
questions_path = _os.path.join(
settings.repos_dir, repo,
get_worktree_path(repo, branch),
f"docs/work-items/{work_item_id}/01-questions.md"
)
if _os.path.isfile(questions_path):
@@ -392,11 +386,14 @@ class AgentLauncher:
)
return
elif qg_name in ("check_ci_green", "check_tests_local"):
# (repo, branch) signature — already worktree-aware.
passed, reason = check_fn(repo, branch)
elif qg_name == "check_tests_passed":
passed, reason = check_fn(repo, work_item_id or "")
# Artifact check — pass branch so it reads from the worktree.
passed, reason = check_fn(repo, work_item_id or "", branch)
else:
passed, reason = check_fn(repo, work_item_id or "")
# Other artifact checks (check_architecture_done, etc.) — worktree-aware.
passed, reason = check_fn(repo, work_item_id or "", branch)
if not passed:
logger.info(f"Task {task_id}: QG '{qg_name}' not passed after {agent}: {reason}")
@@ -461,7 +458,7 @@ class AgentLauncher:
if agent == "architect" and qg_name == "check_architecture_done" and not passed:
import os as _os
conflict_path = _os.path.join(
settings.repos_dir, repo,
get_worktree_path(repo, branch),
f"docs/work-items/{work_item_id}/10-conflict.md"
)
if _os.path.isfile(conflict_path):
@@ -578,15 +575,16 @@ class AgentLauncher:
logger.error(f"Auto-merge failed for {branch}: {e}")
return False
def _write_task_file(self, repo: str, task_file: str, content: str):
"""Write task file directly to the mounted repo volume (/repos).
def _write_task_file(self, repo: str, branch: str, task_file: str, content: str):
"""Write task file directly into the task's worktree.
B-1 fix: no docker. The repos directory is mounted RW at settings.repos_dir
(/repos inside the container), so write straight to /repos/<repo>/<task_file>.
B-1 fix: no docker (direct open()). ORCH-2/S-4: the target is the per-branch
worktree (/repos/_wt/<repo>/<branch>), not the shared /repos/<repo>, so the
agent reads the task ZADANIE from its own isolated working copy.
Raise on failure instead of silently swallowing errors.
"""
container_repo_path = os.path.join(settings.repos_dir, repo) # /repos/<repo>
full_path = os.path.join(container_repo_path, task_file)
work_path = get_worktree_path(repo, branch) # /repos/_wt/<repo>/<branch>
full_path = os.path.join(work_path, task_file)
try:
with open(full_path, "w", encoding="utf-8") as f:
f.write(content)

View File

@@ -20,6 +20,7 @@ class Settings(BaseSettings):
claude_bin: str = "/opt/claude-code/bin/claude.exe"
repos_dir: str = "/repos"
host_repos_dir: str = "/home/slin/repos"
worktrees_dir: str = "/repos/_wt" # ORCH-2 / S-4: isolated worktree per task/branch
# DB
db_path: str = "/app/data/orchestrator.db"

107
src/git_worktree.py Normal file
View File

@@ -0,0 +1,107 @@
"""Git worktree management — isolated working copy per task/branch (ORCH-2 / S-4).
Background
----------
Previously every git operation (checkout/commit/push/test) ran in the single shared
clone ``/repos/<repo>``. With two active tasks a ``git checkout`` of one branch would
overwrite the working copy of the other -> races (see AUDIT S-4 / ET-009 "two collectors").
Solution
--------
Each task (branch) gets an isolated git worktree::
/repos/<repo> <- main clone (fetch / worktree management)
/repos/_wt/<repo>/<safe-branch> <- worktree for one task/branch (agent works here)
A branch can only be checked out in ONE worktree at a time, which is exactly the
property we want: one task = one branch = one worktree.
"""
import os
import re
import subprocess
import logging
from .config import settings
logger = logging.getLogger("orchestrator.git_worktree")
def _safe(branch: str) -> str:
"""Filesystem-safe branch name for use in a path component."""
return re.sub(r"[^A-Za-z0-9._-]", "_", branch)
def get_worktree_path(repo: str, branch: str) -> str:
"""Path of the worktree for (repo, branch). Does NOT create it."""
return os.path.join(settings.worktrees_dir, repo, _safe(branch))
def _main_repo(repo: str) -> str:
return os.path.join(settings.repos_dir, repo)
def ensure_worktree(repo: str, branch: str) -> str:
"""Create (or reuse) an isolated worktree for ``branch``. Returns its path.
Main clone stays at ``/repos/<repo>``. Worktree lives at
``/repos/_wt/<repo>/<safe-branch>``.
- If the worktree already exists, it is fetched + fast-aligned to the branch
(and to ``origin/<branch>`` when that remote branch exists).
- If the branch exists (locally or on origin) it is checked out into a fresh
worktree; otherwise a new branch is created from ``origin/main``.
"""
main_repo = _main_repo(repo)
wt = get_worktree_path(repo, branch)
if not os.path.isdir(main_repo):
raise FileNotFoundError(f"Main repo not found: {main_repo}")
# Always refresh refs in the main clone first.
subprocess.run(["git", "-C", main_repo, "fetch", "origin"],
capture_output=True, timeout=60)
# Reuse existing worktree (.git may be a dir or a file pointer for worktrees).
if os.path.isdir(os.path.join(wt, ".git")) or os.path.isfile(os.path.join(wt, ".git")):
subprocess.run(["git", "-C", wt, "fetch", "origin"], capture_output=True, timeout=60)
subprocess.run(["git", "-C", wt, "checkout", branch], capture_output=True, timeout=30)
# Align to remote only if the remote branch exists (avoid wiping local-only work).
rb = subprocess.run(
["git", "-C", wt, "rev-parse", "--verify", "--quiet", f"origin/{branch}"],
capture_output=True,
)
if rb.returncode == 0:
subprocess.run(["git", "-C", wt, "reset", "--hard", f"origin/{branch}"],
capture_output=True, timeout=30)
logger.info(f"Worktree reused: {wt} (branch {branch})")
return wt
os.makedirs(os.path.dirname(wt), exist_ok=True)
# Try to attach an existing branch (local or remote-tracking) to the new worktree.
r = subprocess.run(["git", "-C", main_repo, "worktree", "add", wt, branch],
capture_output=True, text=True, timeout=60)
if r.returncode != 0:
# Branch doesn't exist yet — create it from origin/main.
r2 = subprocess.run(
["git", "-C", main_repo, "worktree", "add", "-b", branch, wt, "origin/main"],
capture_output=True, text=True, timeout=60,
)
if r2.returncode != 0:
raise RuntimeError(
f"git worktree add failed for {repo}:{branch}: "
f"{r.stderr.strip()} | {r2.stderr.strip()}"
)
logger.info(f"Worktree ready: {wt} (branch {branch})")
return wt
def remove_worktree(repo: str, branch: str):
"""Remove the worktree for (repo, branch) — optional cleanup when a task is done."""
main_repo = _main_repo(repo)
wt = get_worktree_path(repo, branch)
subprocess.run(["git", "-C", main_repo, "worktree", "remove", "--force", wt],
capture_output=True, timeout=30)
# Prune dangling administrative entries.
subprocess.run(["git", "-C", main_repo, "worktree", "prune"],
capture_output=True, timeout=30)
logger.info(f"Worktree removed: {wt}")

View File

@@ -7,12 +7,28 @@ from ..config import settings
logger = logging.getLogger("orchestrator.qg")
from ..git_worktree import get_worktree_path, ensure_worktree
def _repo_path(repo: str, branch: str | None = None) -> str:
"""Resolve the working path to read agent artifacts from.
ORCH-2 / S-4: artifacts now live in the per-branch worktree. When a branch is
given and its worktree exists on disk, read from there; otherwise fall back to
the shared /repos/<repo> clone (keeps backward-compat for 2-arg callers/tests).
"""
if branch:
wt = get_worktree_path(repo, branch)
if os.path.isdir(wt):
return wt
return os.path.join(settings.repos_dir, repo)
# Shared httpx client config
GITEA_HEADERS = {"Authorization": f"token {settings.gitea_token}"}
GITEA_BASE = f"{settings.gitea_url}/api/v1"
def check_analysis_complete(repo: str, work_item_id: str) -> tuple[bool, str]:
def check_analysis_complete(repo: str, work_item_id: str, branch: str | None = None) -> tuple[bool, str]:
"""
Check if analysis artifacts exist in the repo branch.
Required files:
@@ -28,7 +44,7 @@ def check_analysis_complete(repo: str, work_item_id: str) -> tuple[bool, str]:
f"docs/work-items/{work_item_id}/04-test-plan.yaml",
]
repo_path = os.path.join(settings.repos_dir, repo)
repo_path = _repo_path(repo, branch)
missing = []
for f in required_files:
@@ -41,13 +57,13 @@ def check_analysis_complete(repo: str, work_item_id: str) -> tuple[bool, str]:
return True, "All analysis artifacts present"
def check_architecture_done(repo: str, work_item_id: str) -> tuple[bool, str]:
def check_architecture_done(repo: str, work_item_id: str, branch: str | None = None) -> tuple[bool, str]:
"""
Check if architecture artifacts exist.
Required: docs/work-items/<work_item_id>/06-adr/ (at least 1 file)
OR: docs/work-items/<work_item_id>/07-infra-requirements.md
"""
repo_path = os.path.join(settings.repos_dir, repo)
repo_path = _repo_path(repo, branch)
adr_dir = os.path.join(repo_path, f"docs/work-items/{work_item_id}/06-adr")
infra_file = os.path.join(repo_path, f"docs/work-items/{work_item_id}/07-infra-requirements.md")
@@ -119,12 +135,12 @@ def check_review_approved(repo: str, pr_number: int) -> tuple[bool, str]:
return False, f"API error: {e}"
def check_tests_passed(repo: str, work_item_id: str) -> tuple[bool, str]:
def check_tests_passed(repo: str, work_item_id: str, branch: str | None = None) -> tuple[bool, str]:
"""
Check if test report exists and contains PASS indicator.
File: docs/work-items/<work_item_id>/13-test-report.md
"""
repo_path = os.path.join(settings.repos_dir, repo)
repo_path = _repo_path(repo, branch)
report_path = os.path.join(repo_path, f"docs/work-items/{work_item_id}/13-test-report.md")
if not os.path.isfile(report_path):
@@ -141,7 +157,7 @@ def check_tests_passed(repo: str, work_item_id: str) -> tuple[bool, str]:
def check_analysis_approved(repo: str, work_item_id: str) -> tuple[bool, str]:
def check_analysis_approved(repo: str, work_item_id: str, branch: str | None = None) -> tuple[bool, str]:
"""
Check if analysis is complete AND approved by stakeholder.
Requirements:
@@ -152,7 +168,7 @@ def check_analysis_approved(repo: str, work_item_id: str) -> tuple[bool, str]:
so the approval check verifies file completeness as a safety gate.
"""
# First check files
files_ok, files_reason = check_analysis_complete(repo, work_item_id)
files_ok, files_reason = check_analysis_complete(repo, work_item_id, branch)
if not files_ok:
return False, files_reason
@@ -187,7 +203,7 @@ def check_analysis_approved(repo: str, work_item_id: str) -> tuple[bool, str]:
def check_reviewer_verdict(repo: str, work_item_id: str) -> tuple[bool, str]:
def check_reviewer_verdict(repo: str, work_item_id: str, branch: str | None = None) -> tuple[bool, str]:
"""
Check reviewer agent verdict from 12-review.md (S-5 fix).
@@ -198,7 +214,7 @@ def check_reviewer_verdict(repo: str, work_item_id: str) -> tuple[bool, str]:
(False, ...) -> verdict: REQUEST_CHANGES, missing verdict, or no frontmatter
"""
import yaml
repo_path = os.path.join(settings.repos_dir, repo)
repo_path = _repo_path(repo, branch)
review_path = os.path.join(repo_path, f"docs/work-items/{work_item_id}/12-review.md")
if not os.path.isfile(review_path):
@@ -229,26 +245,15 @@ def check_reviewer_verdict(repo: str, work_item_id: str) -> tuple[bool, str]:
def check_tests_local(repo: str, branch: str) -> tuple[bool, str]:
"""
S-1 fix: run the project test suite locally in /repos/<repo> and judge by exit
code, instead of depending on Gitea CI (which is not configured -> always false).
S-1 fix: run the project test suite locally and judge by exit code, instead of
depending on Gitea CI (which is not configured -> always false).
Checks out `branch` in the shared /repos checkout and runs `make test`.
NOTE (known limitation): the shared /repos checkout means this is not safe for
concurrent active tasks. git-worktree-per-task is a separate task (S-4).
ORCH-2 / S-4: tests run inside the per-branch worktree (ensure_worktree), so this
is safe for concurrent active tasks — no shared /repos checkout race.
"""
import subprocess
repo_path = os.path.join(settings.repos_dir, repo)
try:
subprocess.run(
["git", "-C", repo_path, "fetch", "origin"],
capture_output=True, timeout=30,
)
co = subprocess.run(
["git", "-C", repo_path, "checkout", branch],
capture_output=True, text=True, timeout=30,
)
if co.returncode != 0:
return False, f"Cannot checkout branch '{branch}': {co.stderr.strip()[-200:]}"
repo_path = ensure_worktree(repo, branch)
r = subprocess.run(
["make", "test"], cwd=repo_path,
capture_output=True, text=True, timeout=600,

View File

@@ -146,7 +146,9 @@ async def handle_ci_status(payload: dict):
if not branch:
sha = payload.get("sha", "")
repo_name = payload.get("repository", {}).get("name", settings.default_repo)
# Try to find task by checking git branch containing this SHA
# Try to find task by checking git branch containing this SHA.
# ORCH-2 / S-4: this is a READ-ONLY query of remote-tracking refs in the main
# clone (no checkout / no mutation), so it is safe to keep on /repos/<repo>.
try:
result = subprocess.run(
["git", "-C", os.path.join(settings.repos_dir, repo_name),

View File

@@ -304,7 +304,8 @@ async def _try_advance_stage(
# Determine args based on QG function
if qg_name in ("check_analysis_approved", "check_analysis_complete", "check_architecture_done", "check_tests_passed", "check_reviewer_verdict"):
passed, reason = qg_func(repo, work_item_id)
# ORCH-2 / S-4: pass branch so artifacts are read from the task worktree.
passed, reason = qg_func(repo, work_item_id, branch)
elif qg_name in ("check_ci_green", "check_tests_local"):
passed, reason = qg_func(repo, branch)
elif qg_name == "check_review_approved":
@@ -327,8 +328,10 @@ async def _try_advance_stage(
else:
# No open PR but review file exists — check file-based
import os
_review_path = os.path.join(_s.repos_dir, repo, f"docs/work-items/{work_item_id}/12-review.md")
_review_path2 = os.path.join(_s.repos_dir, repo, f"docs/work-items/{work_item_id}/09-review.md")
from ..git_worktree import get_worktree_path as _gwp
_wt = _gwp(repo, branch) if os.path.isdir(_gwp(repo, branch)) else os.path.join(_s.repos_dir, repo)
_review_path = os.path.join(_wt, f"docs/work-items/{work_item_id}/12-review.md")
_review_path2 = os.path.join(_wt, f"docs/work-items/{work_item_id}/09-review.md")
if os.path.isfile(_review_path) or os.path.isfile(_review_path2):
passed, reason = True, "Review file exists (file-based approval)"
else: