diff --git a/src/db.py b/src/db.py index f8bfa9d..51ae5a1 100644 --- a/src/db.py +++ b/src/db.py @@ -159,6 +159,44 @@ def get_next_work_item_id(repo: str, prefix: str = "ET") -> str: return f"{prefix}-{next_num:03d}" +def ensure_unique_work_item_id(work_item_id: str, repo: str) -> str: + """BUG 2a: guarantee work_item_id uniqueness within (repo) over M-6 derive. + + M-6 derives the work_item_id from the Plane sequence_id. That number can + collide (e.g. an issue was deleted and the sequence reused, or two issues + map to the same number) -> the SAME ET-NNN gets handed to two different + tasks, which then physically share a branch/worktree slug prefix and step on + each other (see ET-006: task 8 and task 25). + + This is a guard LAYERED ON TOP of the M-6 derive (it does NOT replace it): + given the derived id, if that exact -NNN already exists in the tasks + table for this repo, walk forward (ET-007, ET-008, ...) until a free number + is found and return that instead. If the derived id is free, it is returned + unchanged. + """ + if not work_item_id or "-" not in work_item_id: + return work_item_id + prefix, num_str = work_item_id.rsplit("-", 1) + try: + num = int(num_str) + except ValueError: + return work_item_id + width = len(num_str) + + conn = get_db() + try: + candidate = work_item_id + while conn.execute( + "SELECT 1 FROM tasks WHERE repo = ? AND work_item_id = ? LIMIT 1", + (repo, candidate), + ).fetchone() is not None: + num += 1 + candidate = f"{prefix}-{num:0{width}d}" + return candidate + finally: + conn.close() + + # --------------------------------------------------------------------------- # ORCH-5 (M-7): idempotent webhook event logging # --------------------------------------------------------------------------- diff --git a/src/webhooks/plane.py b/src/webhooks/plane.py index 63b9879..9efe6be 100644 --- a/src/webhooks/plane.py +++ b/src/webhooks/plane.py @@ -13,6 +13,7 @@ from ..db import ( get_db, get_task_by_plane_id, get_next_work_item_id, + ensure_unique_work_item_id, update_task_stage, enqueue_job, insert_event_dedup, @@ -317,10 +318,41 @@ async def start_pipeline(data: dict, project_id: str = ""): f"fell back to DB increment: {work_item_id}" ) + # BUG 2a: uniqueness-guard LAYERED ON TOP of the M-6 derive above (the derive + # itself is untouched). If the derived ET-NNN is already taken by another + # task in this repo (collision -> two tasks would share branch/worktree, see + # ET-006), bump to the next free number. + _derived = work_item_id + work_item_id = ensure_unique_work_item_id(work_item_id, repo) + if work_item_id != _derived: + logger.warning( + f"work_item_id collision: derived {_derived} already in use for " + f"{repo}; reassigned {plane_id} -> {work_item_id}" + ) + # Create slug from name slug = re.sub(r"[^a-z0-9]+", "-", name.lower()).strip("-")[:30] branch = f"feature/{work_item_id}-{slug}" + # BUG 2b (defense-in-depth): the worktree/path is keyed by BRANCH + # (git_worktree.get_worktree_path) and tasks are reverse-resolved by + # (repo, branch). With 2a the work_item_id is unique, so the branch prefix is + # too; but the slug could still collide (e.g. two issues with the same title + # under different ids -> fine) or, worse, an identical branch already exist. + # Guard physically: if this exact branch is already owned by another task in + # this repo, disambiguate with the (now unique) work_item_id so two tasks can + # never share a worktree. + _conn_b = get_db() + _branch_taken = _conn_b.execute( + "SELECT 1 FROM tasks WHERE repo = ? AND branch = ? LIMIT 1", (repo, branch) + ).fetchone() + _conn_b.close() + if _branch_taken is not None: + branch = f"feature/{work_item_id}-{plane_id[:8]}" + logger.warning( + f"branch collision for {repo}; disambiguated to unique branch {branch}" + ) + # Insert task into DB conn = get_db() conn.execute(