diff --git a/.agents/skills/changelog-draft/SKILL.md b/.agents/skills/changelog-draft/SKILL.md index 6c41d9bd57..081b0803f5 100644 --- a/.agents/skills/changelog-draft/SKILL.md +++ b/.agents/skills/changelog-draft/SKILL.md @@ -36,11 +36,11 @@ Record the range as `previous_cut_tag..release_tag`. ### Step 2 — Fetch PR data -Run the `fetch_prs.py` script to collect all PRs merged in the release range and extract explicit changelog markers: +Run the `fetch_prs.py` script to collect all public-release PRs merged in the release range and extract explicit changelog markers. Pass the repository that the workflow checked out, not necessarily the public repository. Release workflows run from `warpdotdev/warp-internal`, and the script deterministically resolves `warp-repo-sync[bot]` PRs back to their original public `warpdotdev/warp` PR metadata before emitting JSON. When running from `warpdotdev/warp-internal`, the script intentionally omits PRs that were not authored by the repo-sync bot, because those are private internal changes that must not be exposed to the changelog agent or generated artifacts. ```bash python3 .agents/skills/changelog-draft/scripts/fetch_prs.py \ - --repo warpdotdev/warp \ + --repo "${GITHUB_REPOSITORY:-warpdotdev/warp}" \ --base-ref \ --head-ref ``` @@ -52,6 +52,7 @@ The script outputs JSON to stdout with this structure: "prs": [ { "number": 1234, + "url": "https://github.com/warpdotdev/warp/pull/1234", "title": "...", "author": "username", "body": "...", @@ -61,12 +62,22 @@ The script outputs JSON to stdout with this structure: { "category": "NEW-FEATURE", "text": "Added dark mode" } ], "linked_issues": [5678], - "changed_files": ["app/src/ai/agent.rs", "crates/warp_features/src/lib.rs"] + "changed_files": ["app/src/ai/agent.rs", "crates/warp_features/src/lib.rs"], + "source_repo": "warpdotdev/warp", + "internal_pr": { + "number": 25712, + "url": "https://github.com/warpdotdev/warp-internal/pull/25712", + "author": "warp-repo-sync[bot]", + "title": "...", + "repo": "warpdotdev/warp-internal" + } } ] } ``` +Use the top-level `number`, `url`, `author`, `body`, `labels`, `changed_files`, and `source_repo` fields as the source of truth. `internal_pr` is audit-only and must never be used for contributor attribution or user-facing changelog links. If `url` is empty, omit the PR link from user-facing markdown rather than synthesizing one. + ### Step 3 — Classify contributors Run the `classify_contributors.py` script with the unique author logins from Step 2: @@ -174,6 +185,8 @@ Combine explicit entries (Step 2) and inferred entries (Step 6) into the final r PRs marked with `CHANGELOG-NONE` are explicitly opted out and must never appear in the changelog markdown. +When creating entries, copy `pr_number`, `url`, `author`, `source_repo`, and `internal_pr` from the normalized PR record. The release JSON converter uses `url` directly; do not invent public PR URLs from PR numbers. + ### Step 8 — Write output files Write two files to `output_dir`: @@ -219,6 +232,7 @@ The markdown draft must **not** include "Needs Review" or "Skipped PRs" sections "entries": [ { "pr_number": 1234, + "url": "https://github.com/warpdotdev/warp/pull/1234", "category": "NEW-FEATURE", "text": "Added dark mode", "source": "explicit", @@ -226,7 +240,9 @@ The markdown draft must **not** include "Needs Review" or "Skipped PRs" sections "is_external": true, "confidence": "high", "rationale": null, - "feature_flag": null + "feature_flag": null, + "source_repo": "warpdotdev/warp", + "internal_pr": null } ], "skipped": [...], diff --git a/.agents/skills/changelog-draft/scripts/convert_to_release_json.py b/.agents/skills/changelog-draft/scripts/convert_to_release_json.py index b53eabb67c..115f3f58c9 100644 --- a/.agents/skills/changelog-draft/scripts/convert_to_release_json.py +++ b/.agents/skills/changelog-draft/scripts/convert_to_release_json.py @@ -31,8 +31,6 @@ "IMAGE": "images", } -REPO_URL = "https://github.com/warpdotdev/warp" - def format_entry(entry: dict) -> str: """Format a single changelog entry as a text line with a PR link. @@ -40,14 +38,17 @@ def format_entry(entry: dict) -> str: Includes external contributor attribution when applicable. """ text = entry["text"] - pr_number = entry["pr_number"] - link = f"([#{pr_number}]({REPO_URL}/pull/{pr_number}))" + pr_number = entry.get("pr_number") or entry.get("number") + url = entry.get("url") or entry.get("pr_url") + + link = "" + if url and pr_number: + link = f" ([#{pr_number}]({url}))" attribution = "" if entry.get("is_external") and entry.get("author"): attribution = f" — @{entry['author']} ✨" - - return f"{text} {link}{attribution}" + return f"{text}{link}{attribution}" def convert(draft: dict) -> dict: diff --git a/.agents/skills/changelog-draft/scripts/fetch_prs.py b/.agents/skills/changelog-draft/scripts/fetch_prs.py index c7af67f74d..5db8f9ac50 100644 --- a/.agents/skills/changelog-draft/scripts/fetch_prs.py +++ b/.agents/skills/changelog-draft/scripts/fetch_prs.py @@ -26,6 +26,16 @@ r"(?:close[sd]?|fix(?:e[sd])?|resolve[sd]?)\s+#(\d+)", re.IGNORECASE, ) +PUBLIC_REPO = "warpdotdev/warp" +INTERNAL_REPO = "warpdotdev/warp-internal" +REPO_SYNC_AUTHORS = frozenset( + { + "app/warp-repo-sync", + "warp-repo-sync", + "warp-repo-sync[bot]", + } +) +PUBLIC_PR_URL_RE = re.compile(r"https://github\.com/warpdotdev/warp/pull/(\d+)") def run(cmd: list[str], *, check: bool = True) -> str: @@ -90,7 +100,7 @@ def get_merged_commits(sha: str) -> list[str]: def fetch_pr_data(repo: str, pr_number: int) -> dict | None: """Fetch PR metadata and changed file paths via gh CLI.""" - fields = "number,title,author,body,labels,mergedAt,files" + fields = "number,title,author,body,labels,mergedAt,files,url" raw = run( ["gh", "pr", "view", str(pr_number), "--repo", repo, "--json", fields], check=False, @@ -103,6 +113,137 @@ def fetch_pr_data(repo: str, pr_number: int) -> dict | None: return None +def fetch_pr_commit_messages(repo: str, pr_number: int) -> list[str]: + """Fetch commit messages for a PR via the GitHub API.""" + raw = run( + ["gh", "api", f"repos/{repo}/pulls/{pr_number}/commits"], + check=False, + ) + if not raw: + return [] + try: + commits = json.loads(raw) + except json.JSONDecodeError: + return [] + + messages = [] + for commit in commits: + if not isinstance(commit, dict): + continue + commit_data = commit.get("commit") + if isinstance(commit_data, dict): + message = commit_data.get("message") + if message: + messages.append(message) + return messages + + +def get_author_login(data: dict) -> str: + """Extract a GitHub login from a gh PR JSON object.""" + if isinstance(data.get("author"), dict): + return data["author"].get("login", "") + if isinstance(data.get("author"), str): + return data["author"] + return "" + + +def get_label_names(data: dict) -> list[str]: + """Extract label names from a gh PR JSON object.""" + label_names = [] + for lbl in data.get("labels", []) or []: + if isinstance(lbl, dict): + label_names.append(lbl.get("name", "")) + else: + label_names.append(str(lbl)) + return label_names + + +def get_file_paths(data: dict) -> list[str]: + """Extract changed file paths from a gh PR JSON object.""" + file_paths = [] + for f in data.get("files", []) or []: + if isinstance(f, dict): + file_paths.append(f.get("path", "")) + return file_paths + + +def is_repo_sync_pr(data: dict) -> bool: + """Return whether this PR was created by the public-to-internal repo sync bot.""" + return get_author_login(data) in REPO_SYNC_AUTHORS + + +def should_include_pr(repo: str, data: dict) -> bool: + """Return whether a PR should be exposed to changelog generation. + + Releases are cut from warp-internal, but non-sync-bot PRs merged there are + private/internal changes. Do not expose them to the Oz changelog agent or to + generated artifacts. + """ + return repo != INTERNAL_REPO or is_repo_sync_pr(data) + + +def extract_public_pr_number(text: str) -> int | None: + """Extract a public warpdotdev/warp PR number from text.""" + if not text: + return None + m = PUBLIC_PR_URL_RE.search(text) + if m: + return int(m.group(1)) + # Repo-sync commits commonly preserve the original public squash-merge + # subject, such as "feat: add thing (#1234)". + m = re.search(r"\(#(\d+)\)\s*$", text.splitlines()[0] if text else "") + if m: + return int(m.group(1)) + return None + + +def resolve_public_pr_number(repo: str, pr_number: int, data: dict) -> int | None: + """Resolve a repo-sync PR back to its original public warpdotdev/warp PR.""" + public_pr_number = extract_public_pr_number(data.get("body", "") or "") + if public_pr_number is not None: + return public_pr_number + + for message in fetch_pr_commit_messages(repo, pr_number): + public_pr_number = extract_public_pr_number(message) + if public_pr_number is not None: + return public_pr_number + return None + + +def pr_reference(repo: str, pr_number: int, data: dict) -> dict: + """Build a compact audit reference to a PR.""" + return { + "number": data.get("number", pr_number), + "url": data.get("url", ""), + "author": get_author_login(data), + "title": data.get("title", ""), + "repo": repo, + } + + +def normalize_pr_data(repo: str, pr_number: int, data: dict) -> tuple[str, dict, dict | None]: + """Resolve repo-sync PRs to public PR metadata. + + The release workflow runs from warp-internal, where public PRs are mirrored + as warp-repo-sync[bot] PRs with different PR numbers. For changelog output + and contributor attribution, use the original public PR metadata when it can + be resolved, and keep the internal PR under `internal_pr` for audit only. + """ + internal_pr = pr_reference(repo, pr_number, data) if repo != PUBLIC_REPO else None + if repo == PUBLIC_REPO or not is_repo_sync_pr(data): + return repo, data, internal_pr + + public_pr_number = resolve_public_pr_number(repo, pr_number, data) + if public_pr_number is None: + return repo, data, internal_pr + + public_data = fetch_pr_data(PUBLIC_REPO, public_pr_number) + if public_data is None: + return repo, data, internal_pr + + return PUBLIC_REPO, public_data, internal_pr + + def extract_linked_issues(body: str) -> list[int]: """Extract issue numbers from closing keywords in a PR body.""" if not body: @@ -161,42 +302,33 @@ def process_pr(pr_num: int) -> None: data = fetch_pr_data(args.repo, pr_num) if data is None: return - - author_login = "" - if isinstance(data.get("author"), dict): - author_login = data["author"].get("login", "") - elif isinstance(data.get("author"), str): - author_login = data["author"] - - label_names = [] - for lbl in data.get("labels", []) or []: - if isinstance(lbl, dict): - label_names.append(lbl.get("name", "")) - else: - label_names.append(str(lbl)) + if not should_include_pr(args.repo, data): + return + source_repo, data, internal_pr = normalize_pr_data(args.repo, pr_num, data) + author_login = get_author_login(data) + label_names = get_label_names(data) body = data.get("body", "") or "" explicit_entries = extract_markers(body) linked_issues = extract_linked_issues(body) - - file_paths = [] - for f in data.get("files", []) or []: - if isinstance(f, dict): - file_paths.append(f.get("path", "")) - - prs.append( - { - "number": data.get("number", pr_num), - "title": data.get("title", ""), - "author": author_login, - "body": body, - "labels": label_names, - "merged_at": data.get("mergedAt", ""), - "explicit_entries": explicit_entries, - "linked_issues": linked_issues, - "changed_files": file_paths, - } - ) + file_paths = get_file_paths(data) + + pr = { + "number": data.get("number", pr_num), + "url": data.get("url", "") if source_repo == PUBLIC_REPO else "", + "title": data.get("title", ""), + "author": author_login, + "body": body, + "labels": label_names, + "merged_at": data.get("mergedAt", ""), + "explicit_entries": explicit_entries, + "linked_issues": linked_issues, + "changed_files": file_paths, + "source_repo": source_repo, + } + if internal_pr is not None: + pr["internal_pr"] = internal_pr + prs.append(pr) for sha in commit_shas: pr_num = extract_pr_number(sha) diff --git a/.github/workflows/changelog_draft.yml b/.github/workflows/changelog_draft.yml index 24a2ace133..ea219c7f84 100644 --- a/.github/workflows/changelog_draft.yml +++ b/.github/workflows/changelog_draft.yml @@ -55,6 +55,7 @@ jobs: Output directory: ${{ runner.temp }}/changelog-draft Follow the workflow in .agents/skills/changelog-draft/SKILL.md exactly. + When fetching PR data, pass the checked-out repository ("${{ github.repository }}") to fetch_prs.py and rely on the script's repo-sync normalization to resolve public warpdotdev/warp PR numbers, URLs, and authors. The script intentionally omits non-repo-sync PRs from warp-internal because they are private internal changes. Do not infer or synthesize public PR links manually. After writing the output files, print the full contents of changelog-draft.md to stdout so it appears in the workflow log. diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml index 4bc1cc2b2e..72be69a998 100644 --- a/.github/workflows/create_release.yml +++ b/.github/workflows/create_release.yml @@ -1661,6 +1661,7 @@ jobs: Follow the workflow in .agents/skills/changelog-draft/SKILL.md exactly. Make sure to produce both output files: changelog-draft.md and changelog-draft.json. + The release workflow may run from warpdotdev/warp-internal. When fetching PR data, pass the checked-out repository ("${{ github.repository }}") to fetch_prs.py and rely on the script's repo-sync normalization to resolve public warpdotdev/warp PR numbers, URLs, and authors. The script intentionally omits non-repo-sync PRs from warp-internal because they are private internal changes. Do not infer or synthesize public PR links manually. After writing the output files, print the full contents of changelog-draft.md to stdout so it appears in the workflow log.