import subprocess import os import logging import threading import signal from ..config import settings from ..db import get_db, get_task_by_repo_branch, update_task_stage, enqueue_job from ..stages import get_next_stage, get_qg_for_stage, get_agent_for_stage from ..git_worktree import ensure_worktree, get_worktree_path from ..qg.checks import QG_CHECKS from ..notifications import notify_stage_change, notify_qg_failure, notify_agent_started, notify_agent_finished, notify_approve_requested from ..plane_sync import notify_stage_change as plane_notify_stage, add_comment as plane_add_comment logger = logging.getLogger("orchestrator.launcher") class AgentLauncher: """Launch Claude CLI agents directly (binary mounted into container).""" AGENT_CONFIGS = { "analyst": { "system_prompt": ".openclaw/agents/analyst.md", "task_file": ".task.md", "allowed_tools": "Read,Write,Edit,Bash", }, "architect": { "system_prompt": ".openclaw/agents/architect.md", "task_file": ".task-arch.md", "allowed_tools": "Read,Write,Edit,Bash", "model": "opus", }, "developer": { "system_prompt": ".openclaw/agents/developer.md", "task_file": ".task-dev.md", "allowed_tools": "Read,Write,Edit,Bash", }, "reviewer": { "system_prompt": ".openclaw/agents/reviewer.md", "task_file": ".task-review.md", "allowed_tools": "Read,Write,Edit,Bash", "model": "opus", }, "tester": { "system_prompt": ".openclaw/agents/tester.md", "task_file": ".task-test.md", "allowed_tools": "Read,Write,Edit,Bash", }, "deployer": { "task_file": ".task-deploy.md", "system_prompt": ".openclaw/agents/deployer.md", "allowed_tools": "Read,Write,Edit,Bash", }, } CLAUDE_BIN = "/opt/claude-code/bin/claude.exe" AGENT_TIMEOUT = 1800 # 30 minutes def launch(self, agent: str, repo: str, task_content: str = None, task_id: int = None) -> int: """ Launch a Claude CLI agent directly (legacy synchronous path). Kept for backward compatibility (direct callers / existing tests). The ORCH-1 job queue uses launch_job() instead, but both share _spawn(). Args: agent: Agent role (analyst, architect, developer, reviewer, tester) repo: Repository name task_content: Optional task content to write to task file task_id: Optional task ID to associate with this run Returns: agent_run_id from DB """ return self._spawn(agent, repo, task_content, task_id, job_id=None) def launch_job(self, job: dict) -> int: """ORCH-1: launch an agent for a claimed queue job. Same spawn path as launch(), but threads job['id'] through so the monitor can update the job's status (done / requeue / failed) and link jobs.run_id to the agent_runs row. Returns the agent_run_id. """ return self._spawn( job["agent"], job["repo"], job.get("task_content"), job.get("task_id"), job_id=job["id"], ) def _spawn(self, agent: str, repo: str, task_content: str = None, task_id: int = None, job_id: int = None) -> int: """Shared spawn implementation for launch() and launch_job(). When job_id is set, the monitor/watchdog drive the jobs table status (ORCH-1). The claude-CLI Popen logic (B-2) and worktree/task-file logic (B-1 / ORCH-2) are unchanged. """ config = self.AGENT_CONFIGS.get(agent) if not config: raise ValueError(f"Unknown agent: {agent}") # Main clone lives at /repos/; the agent works in an isolated worktree # (ORCH-2 / S-4) so concurrent tasks never fight over a shared checkout. local_repo_path = os.path.join(settings.repos_dir, repo) if not os.path.isdir(local_repo_path): raise FileNotFoundError(f"Repo not found: {local_repo_path}") # Determine branch (needed before we touch the worktree / task file). _br_row = get_db().execute("SELECT branch FROM tasks WHERE id=?", (task_id,)).fetchone() if task_id else None agent_branch = _br_row[0] if _br_row else "main" # Ensure the per-branch worktree exists and is on the right branch. work_path = ensure_worktree(repo, agent_branch) # Write task file if content provided (B-1: direct write; now into the worktree). if task_content: self._write_task_file(repo, agent_branch, config["task_file"], task_content) # Record run in DB conn = get_db() cursor = conn.execute( "INSERT INTO agent_runs (task_id, agent) VALUES (?, ?)", (task_id, agent), ) run_id = cursor.lastrowid conn.commit() # ORCH-1: link this job to the agent_runs row and stamp started_at. if job_id is not None: conn.execute( "UPDATE jobs SET run_id = ?, started_at = datetime('now') WHERE id = ?", (run_id, job_id), ) conn.commit() # Prepare output log path output_path = f"/app/data/runs/{run_id}.log" os.makedirs(os.path.dirname(output_path), exist_ok=True) # Build the claude command task_file = config["task_file"] system_prompt = config["system_prompt"] allowed_tools = config["allowed_tools"] model = config.get("model", "") model_flag = f"--model {model} " if model else "" # No git fetch/checkout here: ensure_worktree() already put the worktree on # the right branch. The agent simply runs inside its isolated work_path. cmd = ( f'cd {work_path} && ' f'{self.CLAUDE_BIN} --print ' f'{model_flag}' f'"$(cat {task_file})" ' f'--system-prompt "$(cat {system_prompt})" ' f'--allowedTools {allowed_tools}' ) logger.info(f"Launching agent '{agent}' for repo '{repo}', run_id={run_id}") # Launch as background process. # B-2 fix: redirect stdout/stderr straight to the log file at the OS level. # No PIPE in the orchestrator process -> no PIPE deadlock, no reader thread, # no zombies. log_fh is closed by _monitor_agent after proc.wait(). log_fh = open(output_path, "w") proc = subprocess.Popen( ["bash", "-c", cmd], stdout=log_fh, stderr=subprocess.STDOUT, env={ **os.environ, "HOME": "/home/slin", "GIT_AUTHOR_NAME": "claude-bot", "GIT_AUTHOR_EMAIL": "claude-bot@mva154.local", "GIT_COMMITTER_NAME": "claude-bot", "GIT_COMMITTER_EMAIL": "claude-bot@mva154.local", }, ) # Update DB with output path conn.execute( "UPDATE agent_runs SET output_path = ? WHERE id = ?", (output_path, run_id), ) conn.commit() conn.close() # Start timeout watchdog t = threading.Thread( target=self._watchdog, args=(proc.pid, run_id), kwargs={"job_id": job_id}, daemon=True, ) t.start() # Start monitor thread (waits for completion, commits, pushes) # agent_branch already computed above m = threading.Thread( target=self._monitor_agent, args=(proc, run_id, agent, repo, agent_branch, output_path, log_fh), kwargs={"job_id": job_id}, daemon=True, ) m.start() logger.info(f"Agent '{agent}' launched, pid={proc.pid}, run_id={run_id}") notify_agent_started(run_id, agent, task_id) return run_id def _watchdog(self, pid: int, run_id: int, timeout: int = None, job_id: int = None): """Kill agent if it exceeds timeout. ORCH-1: on a timeout-kill the monitor's proc.wait() returns the kill exit code and drives the job retry/fail logic, so the watchdog itself only needs to SIGKILL and record the agent_runs exit. job_id is accepted for symmetry. """ import time if timeout is None: timeout = self.AGENT_TIMEOUT time.sleep(timeout) try: os.kill(pid, signal.SIGKILL) logger.warning(f"Agent run_id={run_id} killed after {timeout}s timeout") conn = get_db() conn.execute( "UPDATE agent_runs SET finished_at=datetime('now'), exit_code=-9 WHERE id=?", (run_id,), ) conn.commit() conn.close() except ProcessLookupError: pass # Already finished def _monitor_agent(self, proc, run_id, agent, repo, branch, output_path=None, log_fh=None, job_id=None): """Wait for agent to finish, commit+push results, update DB. B-2 fix: stdout already goes straight to the log file via Popen, so we just block on proc.wait() (guaranteed reap -> no zombie, real exit_code) and then close the log file handle. No PIPE, no select loop, no startup timeout here (the watchdog still enforces the overall AGENT_TIMEOUT by pid). """ import time as _time _start_ts = _time.time() exit_code = proc.wait() if log_fh is not None: try: log_fh.close() except Exception: pass _duration_s = int(_time.time() - _start_ts) logger.info(f"Agent run_id={run_id} ({agent}) finished with exit_code={exit_code}") # Update DB conn = get_db() conn.execute( "UPDATE agent_runs SET finished_at=datetime('now'), exit_code=? WHERE id=?", (exit_code, run_id), ) conn.commit() # Get task_id for notification _row = conn.execute("SELECT task_id FROM agent_runs WHERE id=?", (run_id,)).fetchone() _task_id = _row[0] if _row else None conn.close() notify_agent_finished(run_id, agent, exit_code, task_id=_task_id, duration_s=_duration_s) # Commit and push any changes — in the per-branch worktree (ORCH-2 / S-4), # NOT in the shared /repos/. The worktree is already on `branch` # (ensure_worktree did the checkout), so no checkout is needed here. repo_path = get_worktree_path(repo, branch) try: git_env = { **os.environ, "HOME": "/home/slin", "GIT_AUTHOR_NAME": "claude-bot", "GIT_AUTHOR_EMAIL": "claude-bot@mva154.local", "GIT_COMMITTER_NAME": "claude-bot", "GIT_COMMITTER_EMAIL": "claude-bot@mva154.local", } result = subprocess.run( ["git", "-C", repo_path, "status", "--porcelain"], capture_output=True, text=True, timeout=10, env=git_env ) if result.stdout.strip(): # Add docs/ always subprocess.run( ["git", "-C", repo_path, "add", "docs/"], capture_output=True, text=True, timeout=10, env=git_env ) # Add src/ and tests/ for developer if agent == "developer": subprocess.run( ["git", "-C", repo_path, "add", "src/", "tests/"], capture_output=True, text=True, timeout=10, env=git_env ) # Commit commit_result = subprocess.run( ["git", "-C", repo_path, "commit", "-m", f"{agent}(ET): auto-commit from {agent} run_id={run_id}"], capture_output=True, text=True, timeout=30, env=git_env ) if commit_result.returncode == 0: push_result = subprocess.run( ["git", "-C", repo_path, "push", "origin", branch], capture_output=True, text=True, timeout=60, env=git_env ) if push_result.returncode == 0: logger.info(f"Agent run_id={run_id}: committed and pushed to {branch}") # Auto-create PR after developer pushes if agent == "developer": self._ensure_pr(repo, branch, run_id) else: logger.error(f"Agent run_id={run_id}: push failed: {push_result.stderr}") else: logger.warning(f"Agent run_id={run_id}: commit failed: {commit_result.stderr}") else: logger.info(f"Agent run_id={run_id}: no changes to commit") except Exception as e: logger.error(f"Agent run_id={run_id}: post-run git failed: {e}") # Handle deployer failure (smoke/healthcheck failed) — Task 7 if exit_code != 0 and agent == "deployer": conn = get_db() task_row = conn.execute( "SELECT id, work_item_id FROM tasks WHERE repo=? AND branch=?", (repo, branch), ).fetchone() conn.close() if task_row: _tid, _wid = task_row update_task_stage(_tid, "development") notify_stage_change(_tid, "deploy", "development") plane_notify_stage(_wid, "deploy", "development") from ..plane_sync import set_issue_blocked set_issue_blocked(_wid) plane_add_comment( _wid, "\u274c Deploy FAILED (smoke/healthcheck). Rolled back. Developer \u043d\u0443\u0436\u0435\u043d \u0434\u043b\u044f \u0444\u0438\u043a\u0441\u0430." ) from ..notifications import send_telegram send_telegram(f"\U0001f6a8 {_wid}: Deploy failed! Rolled back. Needs fix.") # Notify on startup timeout (exit_code from kill = -9 or 137) if exit_code != 0 and exit_code not in (None,): conn = get_db() task_row = conn.execute( "SELECT id, work_item_id FROM tasks WHERE repo=? AND branch=?", (repo, branch), ).fetchone() conn.close() if task_row and agent != "deployer": # deployer handled above _tid, _wid = task_row from ..notifications import send_telegram send_telegram(f"\u26a0\ufe0f {_wid}: Agent {agent} failed (exit_code={exit_code}). Check logs: /app/data/runs/{run_id}.log") # Auto-advance stage if agent finished successfully and QG passes if exit_code == 0: self._try_advance_stage(run_id, agent, repo, branch) # ORCH-1: drive the job-queue status for queue-launched jobs only. # (Legacy direct launch() has job_id=None and is unaffected.) if job_id is not None: self._finalize_job(job_id, agent, run_id, exit_code, output_path=output_path) def _backoff_seconds(self, transient_attempts: int, retry_after: int = None) -> int: """Exponential backoff for transient failures, honouring Retry-After. backoff = min(2^transient_attempts * base, max). If the server sent a Retry-After, use the larger of the two (never poll sooner than asked). """ base = settings.backoff_base_seconds cap = settings.backoff_max_seconds backoff = min((2 ** max(transient_attempts, 0)) * base, cap) if retry_after is not None and retry_after > 0: backoff = max(backoff, min(retry_after, cap)) return int(backoff) def _finalize_job(self, job_id: int, agent: str, run_id: int, exit_code, output_path=None): """ORCH-1: update the jobs row after the agent process finished. exit_code == 0 -> done (and resets the breaker streak via on_outcome). exit_code != 0 -> classify the failure from the run log tail (token-free): - TRANSIENT (429/overload/network): backoff-requeue with available_at in the future + a SEPARATE transient_attempts budget (settings.transient_max_attempts), honouring Retry-After. Reported to the breaker so it opens after N consecutive transient failures. - PERMANENT (code fault): ordinary attempts < max_attempts requeue, otherwise 'failed' + Telegram. """ from ..db import get_job, mark_job from ..error_classifier import classify_log_file try: job = get_job(job_id) if not job: return if exit_code == 0: mark_job(job_id, "done", run_id=run_id) logger.info(f"Job {job_id} ({agent}) done (run_id={run_id})") self._record_outcome(transient=False, recovered=True) return # Classify the failure from the agent log tail (no token cost). kind, retry_after = "permanent", None log_path = output_path or f"/app/data/runs/{run_id}.log" try: kind, retry_after = classify_log_file(log_path) except Exception: pass if kind == "transient": self._finalize_transient(job_id, agent, run_id, exit_code, job, retry_after) else: self._finalize_permanent(job_id, agent, run_id, exit_code, job) except Exception as e: logger.error(f"Job {job_id}: _finalize_job error: {e}") def _finalize_transient(self, job_id, agent, run_id, exit_code, job, retry_after): """Transient (429/overload/net) failure -> backoff requeue or fail when budget out.""" from ..db import mark_job, mark_job_transient tattempts = job.get("transient_attempts", 0) tmax = settings.transient_max_attempts err = (f"transient (429/overload) agent {agent} exit={exit_code} " f"(run_id={run_id}); retry_after={retry_after}") self._record_outcome(transient=True, recovered=False) if tattempts < tmax: backoff = self._backoff_seconds(tattempts + 1, retry_after) mark_job_transient(job_id, backoff, error=err) logger.warning( f"Job {job_id} ({agent}) TRANSIENT fail (exit={exit_code}), " f"backoff {backoff}s, transient_attempt {tattempts + 1}/{tmax}" ) else: mark_job(job_id, "failed", run_id=run_id, error=err) logger.error( f"Job {job_id} ({agent}) failed after {tattempts} transient attempts" ) self._notify_failed(job_id, agent, job, run_id, f"transient (rate-limit) after {tattempts} attempts") def _finalize_permanent(self, job_id, agent, run_id, exit_code, job): """Permanent (code-fault) failure -> normal attempts In Review from ..plane_sync import set_issue_in_review set_issue_in_review(work_item_id) plane_add_comment( work_item_id, "\U0001f4cb BRD/\u0422\u0417/AC/TestPlan \u0433\u043e\u0442\u043e\u0432\u044b. " "\u041f\u0440\u043e\u0448\u0443 review \u0438 \u0440\u0435\u0430\u043a\u0446\u0438\u044e :approved: \u0434\u043b\u044f \u043f\u0440\u043e\u0434\u0432\u0438\u0436\u0435\u043d\u0438\u044f \u0432 Architecture." ) notify_approve_requested(task_id) logger.info(f"Task {task_id}: analyst finished, requested :approved: in Plane") else: # Check if questions file exists (in the task worktree) import os as _os questions_path = _os.path.join( get_worktree_path(repo, branch), f"docs/work-items/{work_item_id}/01-questions.md" ) if _os.path.isfile(questions_path): # Analyst has questions -> Needs Input from ..plane_sync import set_issue_needs_input set_issue_needs_input(work_item_id) with open(questions_path, "r") as qf: questions_text = qf.read() plane_add_comment( work_item_id, f"\u2753 Analyst \u043d\u0443\u0436\u0434\u0430\u0435\u0442\u0441\u044f \u0432 \u0443\u0442\u043e\u0447\u043d\u0435\u043d\u0438\u0438:\n\n{questions_text}" ) from ..notifications import send_telegram send_telegram( f"\u2753 {work_item_id}: Analyst \u0437\u0430\u0434\u0430\u0451\u0442 \u0432\u043e\u043f\u0440\u043e\u0441\u044b. \u041e\u0442\u0432\u0435\u0442\u044c \u0432 Plane." ) else: # No artifacts and no questions plane_add_comment( work_item_id, "\u26a0\ufe0f Analyst \u0437\u0430\u0432\u0435\u0440\u0448\u0438\u043b\u0441\u044f \u0431\u0435\u0437 \u0430\u0440\u0442\u0435\u0444\u0430\u043a\u0442\u043e\u0432 \u0438 \u0431\u0435\u0437 \u0432\u043e\u043f\u0440\u043e\u0441\u043e\u0432. \u041f\u0440\u043e\u0432\u0435\u0440\u044c\u0442\u0435 \u043b\u043e\u0433." ) return elif qg_name in ("check_ci_green", "check_tests_local"): # (repo, branch) signature — already worktree-aware. passed, reason = check_fn(repo, branch) elif qg_name == "check_tests_passed": # Artifact check — pass branch so it reads from the worktree. passed, reason = check_fn(repo, work_item_id or "", branch) else: # Other artifact checks (check_architecture_done, etc.) — worktree-aware. passed, reason = check_fn(repo, work_item_id or "", branch) if not passed: logger.info(f"Task {task_id}: QG '{qg_name}' not passed after {agent}: {reason}") # If reviewer says REQUEST_CHANGES, rollback to development if agent == "reviewer" and "REQUEST_CHANGES" in reason: update_task_stage(task_id, "development") notify_stage_change(task_id, current_stage, "development") plane_notify_stage(work_item_id, current_stage, "development") # Count retries conn2 = get_db() retry_count = conn2.execute( "SELECT COUNT(*) FROM agent_runs WHERE task_id=? AND agent='developer'", (task_id,) ).fetchone()[0] conn2.close() if retry_count < 3: task_desc = ( f"Work item: {work_item_id}\nRepo: {repo}\nBranch: {branch}\n" f"Stage: development\nNote: REQUEST_CHANGES from reviewer " f"(attempt {retry_count+1}/3). Fix findings in " f"docs/work-items/{work_item_id}/12-review.md" ) new_job = enqueue_job("developer", repo, task_desc, task_id=task_id) logger.info(f"Task {task_id}: reviewer REQUEST_CHANGES, enqueued developer (job_id={new_job})") else: from ..notifications import send_telegram send_telegram(f"\u26a0\ufe0f {work_item_id}: Max developer retries (3) reached. Manual intervention needed.") logger.error(f"Task {task_id}: max retries reached") # Task 6: Tester FAIL -> rollback to development if agent == "tester" and qg_name == "check_tests_passed" and not passed: update_task_stage(task_id, "development") notify_stage_change(task_id, current_stage, "development") plane_notify_stage(work_item_id, current_stage, "development") from ..plane_sync import set_issue_in_progress set_issue_in_progress(work_item_id) plane_add_comment( work_item_id, f"\u274c \u0422\u0435\u0441\u0442\u044b \u043d\u0435 \u043f\u0440\u043e\u0448\u043b\u0438: {reason}. Developer \u043f\u0435\u0440\u0435\u0437\u0430\u043f\u0443\u0449\u0435\u043d \u0434\u043b\u044f \u0444\u0438\u043a\u0441\u0430." ) conn2 = get_db() retry_count = conn2.execute( "SELECT COUNT(*) FROM agent_runs WHERE task_id=? AND agent='developer'", (task_id,) ).fetchone()[0] conn2.close() if retry_count < 3: task_desc = ( f"Work item: {work_item_id}\nRepo: {repo}\nBranch: {branch}\n" f"Stage: development\nNote: Tests FAILED. " f"Fix failures described in docs/work-items/{work_item_id}/13-test-report.md" ) new_job = enqueue_job("developer", repo, task_desc, task_id=task_id) logger.info(f"Task {task_id}: tester FAIL, enqueued developer (job_id={new_job})") else: from ..notifications import send_telegram from ..plane_sync import set_issue_blocked set_issue_blocked(work_item_id) send_telegram(f"\U0001f6a8 {work_item_id}: Tests still failing after 3 developer retries. Manual intervention needed.") # Task 8: Architect conflict -> rollback to analysis if agent == "architect" and qg_name == "check_architecture_done" and not passed: import os as _os conflict_path = _os.path.join( get_worktree_path(repo, branch), f"docs/work-items/{work_item_id}/10-conflict.md" ) if _os.path.isfile(conflict_path): update_task_stage(task_id, "analysis") notify_stage_change(task_id, current_stage, "analysis") plane_notify_stage(work_item_id, current_stage, "analysis") from ..plane_sync import set_issue_in_progress set_issue_in_progress(work_item_id) with open(conflict_path, "r") as cf: conflict_text = cf.read()[:500] plane_add_comment( work_item_id, f"\u26a0\ufe0f Architect \u043d\u0430\u0448\u0451\u043b \u043a\u043e\u043d\u0444\u043b\u0438\u043a\u0442 \u0441 \u0422\u0417. \u0412\u043e\u0437\u0432\u0440\u0430\u0442 \u0432 Analysis.\n\n{conflict_text}" ) task_desc = ( f"Work item: {work_item_id}\nRepo: {repo}\nBranch: {branch}\n" f"Stage: analysis\nNote: Architect conflict. Revise TRZ. " f"See docs/work-items/{work_item_id}/10-conflict.md" ) new_job = enqueue_job("analyst", repo, task_desc, task_id=task_id) logger.info(f"Task {task_id}: architect conflict, enqueued analyst (job_id={new_job})") return return elif qg_name: return # Advance stage update_task_stage(task_id, next_stage) notify_stage_change(task_id, current_stage, next_stage) plane_notify_stage(work_item_id, current_stage, next_stage) logger.info(f"Task {task_id}: {current_stage} -> {next_stage} (auto-advance after {agent})") # Launch next agent if defined next_agent = get_agent_for_stage(next_stage) if next_agent: task_desc = f"Work item: {work_item_id}\nRepo: {repo}\nBranch: {branch}\nStage: {next_stage}" new_job_id = enqueue_job(next_agent, repo, task_desc, task_id=task_id) logger.info(f"Task {task_id}: enqueued '{next_agent}' (job_id={new_job_id})") except Exception as e: logger.error(f"Auto-advance failed for run_id={run_id}: {e}") def _ensure_pr(self, repo: str, branch: str, run_id: int): import httpx owner = settings.gitea_owner headers = {"Authorization": f"token {settings.gitea_token}"} base_url = f"{settings.gitea_url}/api/v1" try: resp = httpx.get( f"{base_url}/repos/{owner}/{repo}/pulls", params={"state": "open", "head": branch}, headers=headers, timeout=10 ) resp.raise_for_status() prs = resp.json() if prs: return prs[0]["number"] parts = branch.split("/") title = parts[-1] if parts else branch resp = httpx.post( f"{base_url}/repos/{owner}/{repo}/pulls", json={"title": f"feat: {title}", "head": branch, "base": "main", "body": f"Auto-created by orchestrator after developer run_id={run_id}"}, headers=headers, timeout=10 ) resp.raise_for_status() pr_number = resp.json()["number"] logger.info(f"Created PR #{pr_number} for {branch}") return pr_number except Exception as e: logger.error(f"Failed to create PR for {branch}: {e}") return None def _auto_merge_pr(self, repo: str, branch: str, task_id: int, work_item_id: str): import httpx owner = settings.gitea_owner headers = {"Authorization": f"token {settings.gitea_token}"} base_url = f"{settings.gitea_url}/api/v1" try: resp = httpx.get( f"{base_url}/repos/{owner}/{repo}/pulls", params={"state": "open", "head": branch}, headers=headers, timeout=10 ) resp.raise_for_status() prs = resp.json() if not prs: pr_number = self._ensure_pr(repo, branch, 0) if not pr_number: return False else: pr_number = prs[0]["number"] resp = httpx.post( f"{base_url}/repos/{owner}/{repo}/pulls/{pr_number}/merge", json={"Do": "merge"}, headers=headers, timeout=30 ) if resp.status_code in (200, 204): logger.info(f"PR #{pr_number} merged for {branch}") update_task_stage(task_id, "done") notify_stage_change(task_id, "deploy", "done") plane_notify_stage(work_item_id, "deploy", "done") from ..notifications import send_telegram send_telegram(f"\u2705 {work_item_id}: PR #{pr_number} merged! deploy -> done. Task complete.") return True else: logger.error(f"Merge failed for PR #{pr_number}: {resp.status_code} {resp.text}") from ..notifications import send_telegram send_telegram(f"\u26a0\ufe0f {work_item_id}: Auto-merge failed (HTTP {resp.status_code}). Manual merge needed.") return False except Exception as e: logger.error(f"Auto-merge failed for {branch}: {e}") return False def _write_task_file(self, repo: str, branch: str, task_file: str, content: str): """Write task file directly into the task's worktree. B-1 fix: no docker (direct open()). ORCH-2/S-4: the target is the per-branch worktree (/repos/_wt//), not the shared /repos/, so the agent reads the task ZADANIE from its own isolated working copy. Raise on failure instead of silently swallowing errors. """ work_path = get_worktree_path(repo, branch) # /repos/_wt// full_path = os.path.join(work_path, task_file) try: with open(full_path, "w", encoding="utf-8") as f: f.write(content) logger.info(f"Task file written: {full_path} ({len(content)} bytes)") except OSError as e: logger.error(f"Failed to write task file {full_path}: {e}") raise RuntimeError(f"Failed to write task file: {e}") launcher = AgentLauncher()