-
Notifications
You must be signed in to change notification settings - Fork 4.8k
Normalize changelog PR metadata from repo sync #10971
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -26,6 +26,16 @@ | |
| r"(?:close[sd]?|fix(?:e[sd])?|resolve[sd]?)\s+#(\d+)", | ||
| re.IGNORECASE, | ||
| ) | ||
| PUBLIC_REPO = "warpdotdev/warp" | ||
| INTERNAL_REPO = "warpdotdev/warp-internal" | ||
| REPO_SYNC_AUTHORS = frozenset( | ||
| { | ||
| "app/warp-repo-sync", | ||
| "warp-repo-sync", | ||
| "warp-repo-sync[bot]", | ||
| } | ||
| ) | ||
| PUBLIC_PR_URL_RE = re.compile(r"https://github\.com/warpdotdev/warp/pull/(\d+)") | ||
|
|
||
|
|
||
| def run(cmd: list[str], *, check: bool = True) -> str: | ||
|
|
@@ -90,7 +100,7 @@ def get_merged_commits(sha: str) -> list[str]: | |
|
|
||
| def fetch_pr_data(repo: str, pr_number: int) -> dict | None: | ||
| """Fetch PR metadata and changed file paths via gh CLI.""" | ||
| fields = "number,title,author,body,labels,mergedAt,files" | ||
| fields = "number,title,author,body,labels,mergedAt,files,url" | ||
| raw = run( | ||
| ["gh", "pr", "view", str(pr_number), "--repo", repo, "--json", fields], | ||
| check=False, | ||
|
|
@@ -103,6 +113,137 @@ def fetch_pr_data(repo: str, pr_number: int) -> dict | None: | |
| return None | ||
|
|
||
|
|
||
| def fetch_pr_commit_messages(repo: str, pr_number: int) -> list[str]: | ||
| """Fetch commit messages for a PR via the GitHub API.""" | ||
| raw = run( | ||
| ["gh", "api", f"repos/{repo}/pulls/{pr_number}/commits"], | ||
| check=False, | ||
| ) | ||
| if not raw: | ||
| return [] | ||
| try: | ||
| commits = json.loads(raw) | ||
| except json.JSONDecodeError: | ||
| return [] | ||
|
|
||
| messages = [] | ||
| for commit in commits: | ||
| if not isinstance(commit, dict): | ||
| continue | ||
| commit_data = commit.get("commit") | ||
| if isinstance(commit_data, dict): | ||
| message = commit_data.get("message") | ||
| if message: | ||
| messages.append(message) | ||
| return messages | ||
|
|
||
|
|
||
| def get_author_login(data: dict) -> str: | ||
| """Extract a GitHub login from a gh PR JSON object.""" | ||
| if isinstance(data.get("author"), dict): | ||
| return data["author"].get("login", "") | ||
| if isinstance(data.get("author"), str): | ||
| return data["author"] | ||
| return "" | ||
|
|
||
|
|
||
| def get_label_names(data: dict) -> list[str]: | ||
| """Extract label names from a gh PR JSON object.""" | ||
| label_names = [] | ||
| for lbl in data.get("labels", []) or []: | ||
| if isinstance(lbl, dict): | ||
| label_names.append(lbl.get("name", "")) | ||
| else: | ||
| label_names.append(str(lbl)) | ||
| return label_names | ||
|
|
||
|
|
||
| def get_file_paths(data: dict) -> list[str]: | ||
| """Extract changed file paths from a gh PR JSON object.""" | ||
| file_paths = [] | ||
| for f in data.get("files", []) or []: | ||
| if isinstance(f, dict): | ||
| file_paths.append(f.get("path", "")) | ||
| return file_paths | ||
|
|
||
|
|
||
| def is_repo_sync_pr(data: dict) -> bool: | ||
| """Return whether this PR was created by the public-to-internal repo sync bot.""" | ||
| return get_author_login(data) in REPO_SYNC_AUTHORS | ||
|
|
||
|
|
||
| def should_include_pr(repo: str, data: dict) -> bool: | ||
| """Return whether a PR should be exposed to changelog generation. | ||
|
|
||
| Releases are cut from warp-internal, but non-sync-bot PRs merged there are | ||
| private/internal changes. Do not expose them to the Oz changelog agent or to | ||
| generated artifacts. | ||
| """ | ||
| return repo != INTERNAL_REPO or is_repo_sync_pr(data) | ||
|
|
||
|
|
||
| def extract_public_pr_number(text: str) -> int | None: | ||
| """Extract a public warpdotdev/warp PR number from text.""" | ||
| if not text: | ||
| return None | ||
| m = PUBLIC_PR_URL_RE.search(text) | ||
| if m: | ||
| return int(m.group(1)) | ||
| # Repo-sync commits commonly preserve the original public squash-merge | ||
| # subject, such as "feat: add thing (#1234)". | ||
| m = re.search(r"\(#(\d+)\)\s*$", text.splitlines()[0] if text else "") | ||
| if m: | ||
| return int(m.group(1)) | ||
| return None | ||
|
|
||
|
|
||
| def resolve_public_pr_number(repo: str, pr_number: int, data: dict) -> int | None: | ||
| """Resolve a repo-sync PR back to its original public warpdotdev/warp PR.""" | ||
| public_pr_number = extract_public_pr_number(data.get("body", "") or "") | ||
| if public_pr_number is not None: | ||
| return public_pr_number | ||
|
|
||
| for message in fetch_pr_commit_messages(repo, pr_number): | ||
| public_pr_number = extract_public_pr_number(message) | ||
| if public_pr_number is not None: | ||
| return public_pr_number | ||
| return None | ||
|
|
||
|
|
||
| def pr_reference(repo: str, pr_number: int, data: dict) -> dict: | ||
| """Build a compact audit reference to a PR.""" | ||
| return { | ||
| "number": data.get("number", pr_number), | ||
| "url": data.get("url", ""), | ||
| "author": get_author_login(data), | ||
| "title": data.get("title", ""), | ||
| "repo": repo, | ||
| } | ||
|
|
||
|
|
||
| def normalize_pr_data(repo: str, pr_number: int, data: dict) -> tuple[str, dict, dict | None]: | ||
| """Resolve repo-sync PRs to public PR metadata. | ||
|
|
||
| The release workflow runs from warp-internal, where public PRs are mirrored | ||
| as warp-repo-sync[bot] PRs with different PR numbers. For changelog output | ||
| and contributor attribution, use the original public PR metadata when it can | ||
| be resolved, and keep the internal PR under `internal_pr` for audit only. | ||
| """ | ||
| internal_pr = pr_reference(repo, pr_number, data) if repo != PUBLIC_REPO else None | ||
| if repo == PUBLIC_REPO or not is_repo_sync_pr(data): | ||
| return repo, data, internal_pr | ||
|
|
||
| public_pr_number = resolve_public_pr_number(repo, pr_number, data) | ||
| if public_pr_number is None: | ||
| return repo, data, internal_pr | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
|
||
| public_data = fetch_pr_data(PUBLIC_REPO, public_pr_number) | ||
| if public_data is None: | ||
| return repo, data, internal_pr | ||
|
Comment on lines
+237
to
+242
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
|
|
||
| return PUBLIC_REPO, public_data, internal_pr | ||
|
|
||
|
|
||
| def extract_linked_issues(body: str) -> list[int]: | ||
| """Extract issue numbers from closing keywords in a PR body.""" | ||
| if not body: | ||
|
|
@@ -161,42 +302,33 @@ def process_pr(pr_num: int) -> None: | |
| data = fetch_pr_data(args.repo, pr_num) | ||
| if data is None: | ||
| return | ||
|
|
||
| author_login = "" | ||
| if isinstance(data.get("author"), dict): | ||
| author_login = data["author"].get("login", "") | ||
| elif isinstance(data.get("author"), str): | ||
| author_login = data["author"] | ||
|
|
||
| label_names = [] | ||
| for lbl in data.get("labels", []) or []: | ||
| if isinstance(lbl, dict): | ||
| label_names.append(lbl.get("name", "")) | ||
| else: | ||
| label_names.append(str(lbl)) | ||
| if not should_include_pr(args.repo, data): | ||
| return | ||
| source_repo, data, internal_pr = normalize_pr_data(args.repo, pr_num, data) | ||
| author_login = get_author_login(data) | ||
| label_names = get_label_names(data) | ||
|
|
||
| body = data.get("body", "") or "" | ||
| explicit_entries = extract_markers(body) | ||
| linked_issues = extract_linked_issues(body) | ||
|
|
||
| file_paths = [] | ||
| for f in data.get("files", []) or []: | ||
| if isinstance(f, dict): | ||
| file_paths.append(f.get("path", "")) | ||
|
|
||
| prs.append( | ||
| { | ||
| "number": data.get("number", pr_num), | ||
| "title": data.get("title", ""), | ||
| "author": author_login, | ||
| "body": body, | ||
| "labels": label_names, | ||
| "merged_at": data.get("mergedAt", ""), | ||
| "explicit_entries": explicit_entries, | ||
| "linked_issues": linked_issues, | ||
| "changed_files": file_paths, | ||
| } | ||
| ) | ||
| file_paths = get_file_paths(data) | ||
|
|
||
| pr = { | ||
| "number": data.get("number", pr_num), | ||
| "url": data.get("url", "") if source_repo == PUBLIC_REPO else "", | ||
| "title": data.get("title", ""), | ||
| "author": author_login, | ||
| "body": body, | ||
| "labels": label_names, | ||
| "merged_at": data.get("mergedAt", ""), | ||
| "explicit_entries": explicit_entries, | ||
| "linked_issues": linked_issues, | ||
| "changed_files": file_paths, | ||
| "source_repo": source_repo, | ||
| } | ||
| if internal_pr is not None: | ||
| pr["internal_pr"] = internal_pr | ||
| prs.append(pr) | ||
|
|
||
| for sha in commit_shas: | ||
| pr_num = extract_pr_number(sha) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
warpdotdev/warp-internalcannot leak non-sync internal PRs into changelog artifacts.