-
Notifications
You must be signed in to change notification settings - Fork 395
Consolidate diff --git header parsing across safe-output PR handlers
#32578
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
2851773
6440bd8
8b95966
f5db498
e2367e2
402beea
41e075b
1838f5a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,132 @@ | ||
| // @ts-check | ||
| const A_PREFIX_LENGTH = 2; | ||
| const B_PREFIX_LENGTH = 2; | ||
| const QUOTED_PREFIX_LENGTH = 3; | ||
|
|
||
| /** | ||
| * Parses a single `diff --git` header line and extracts both old/new paths. | ||
| * Handles unquoted and C-style quoted pathspecs. | ||
| * | ||
| * @param {string} headerLine | ||
| * @returns {{ oldPath: string|null, newPath: string|null, parseable: boolean }} | ||
| */ | ||
| function parseDiffGitHeader(headerLine) { | ||
| const sanitizedHeaderLine = headerLine.endsWith("\r") ? headerLine.slice(0, -1) : headerLine; | ||
| const rest = sanitizedHeaderLine.replace(/^diff --git /, ""); | ||
| if (rest === sanitizedHeaderLine) { | ||
| return { oldPath: null, newPath: null, parseable: false }; | ||
| } | ||
|
|
||
| // Git may emit unquoted paths that still contain spaces in `diff --git` | ||
| // headers. In that case, split using the required ` b/` token boundary | ||
| // instead of generic whitespace tokenization. | ||
| if (rest.startsWith("a/")) { | ||
| const quotedSep = rest.indexOf(' "b/'); | ||
| const unquotedSep = rest.indexOf(" b/"); | ||
| const foundSeparatorIndices = [quotedSep, unquotedSep].filter(idx => idx >= 0); | ||
| if (foundSeparatorIndices.length > 0) { | ||
| const sep = Math.min(...foundSeparatorIndices); | ||
| const oldPath = rest.slice(A_PREFIX_LENGTH, sep) || null; | ||
| const newToken = rest.slice(sep + 1).trimEnd(); | ||
| let newPath = null; | ||
| if (newToken.startsWith('"b/')) { | ||
| if (newToken.endsWith('"')) { | ||
| newPath = newToken.slice(QUOTED_PREFIX_LENGTH, -1) || null; | ||
| } else { | ||
| newPath = newToken.slice(QUOTED_PREFIX_LENGTH) || null; | ||
| } | ||
| } else if (newToken.startsWith("b/")) { | ||
| newPath = newToken.slice(B_PREFIX_LENGTH) || null; | ||
| } | ||
| if (oldPath || newPath) { | ||
| return { oldPath, newPath, parseable: true }; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| /** @type {string[]} */ | ||
| const tokens = []; | ||
| const isWhitespace = ch => ch === " " || ch === "\t" || ch === "\r" || ch === "\n"; | ||
| let i = 0; | ||
| while (i < rest.length && tokens.length < 2) { | ||
| while (i < rest.length && isWhitespace(rest[i])) { | ||
| i++; | ||
| } | ||
|
Comment on lines
+51
to
+54
|
||
| if (i >= rest.length) { | ||
| break; | ||
| } | ||
|
|
||
| let token = ""; | ||
| if (rest[i] === '"') { | ||
| token += rest[i++]; | ||
| let closedQuote = false; | ||
| while (i < rest.length) { | ||
| const ch = rest[i++]; | ||
| token += ch; | ||
| if (ch === "\\" && i < rest.length) { | ||
| token += rest[i++]; | ||
| } else if (ch === '"') { | ||
| closedQuote = true; | ||
| break; | ||
| } | ||
| } | ||
| if (!closedQuote) { | ||
| return { oldPath: null, newPath: null, parseable: false }; | ||
| } | ||
| } else { | ||
| while (i < rest.length && !isWhitespace(rest[i])) { | ||
| token += rest[i++]; | ||
| } | ||
| } | ||
| tokens.push(token); | ||
| } | ||
|
|
||
| if (tokens.length < 2) { | ||
| return { oldPath: null, newPath: null, parseable: false }; | ||
| } | ||
|
|
||
| const stripPrefix = tok => { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [/zoom-out] The original implementation had the same gap. Now that this is the shared module, it would be worth adding a guard — e.g. returning |
||
| if (tok.startsWith('"a/') || tok.startsWith('"b/')) { | ||
| return tok.slice(QUOTED_PREFIX_LENGTH, tok.endsWith('"') ? -1 : undefined); | ||
| } | ||
| if (tok.startsWith("a/") || tok.startsWith("b/")) { | ||
| return tok.slice(B_PREFIX_LENGTH); | ||
| } | ||
| return tok; | ||
| }; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [/improve-codebase-architecture] Consider adding a |
||
|
|
||
| const oldPath = stripPrefix(tokens[0]) || null; | ||
| const newPath = stripPrefix(tokens[1]) || null; | ||
| if (!oldPath && !newPath) { | ||
| return { oldPath: null, newPath: null, parseable: false }; | ||
| } | ||
|
|
||
| return { oldPath, newPath, parseable: true }; | ||
| } | ||
|
|
||
| /** | ||
| * Extracts parsed entries for all `diff --git` headers in a patch. | ||
| * | ||
| * @param {string} patchContent | ||
| * @returns {{ oldPath: string|null, newPath: string|null, parseable: boolean, headerIndex: number, headerLine: string }[]} | ||
| */ | ||
| function extractDiffGitHeaderEntries(patchContent) { | ||
| if (!patchContent || !patchContent.trim()) { | ||
| return []; | ||
| } | ||
|
|
||
| /** @type {{ oldPath: string|null, newPath: string|null, parseable: boolean, headerIndex: number, headerLine: string }[]} */ | ||
| const entries = []; | ||
| const headerRe = /^diff --git .*$/gm; | ||
| let match; | ||
| while ((match = headerRe.exec(patchContent)) !== null) { | ||
| entries.push({ | ||
| ...parseDiffGitHeader(match[0]), | ||
| headerIndex: match.index, | ||
| headerLine: match[0], | ||
| }); | ||
| } | ||
| return entries; | ||
| } | ||
|
|
||
| module.exports = { parseDiffGitHeader, extractDiffGitHeaderEntries }; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [/zoom-out] A dedicated |
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[/zoom-out] The test name
"should preserve full escaped paths from quoted headers"and its assertionexpect(result).toContain("foo\\\\bar/config.json")(which resolves to the 4-char sequencefoo\\bar) documents that paths with backslashes are returned in their raw (un-unescaped) form. This is good for regression protection, but it also serves as implicit confirmation that the paths returned are not filesystem-ready for consumers that do string comparison (see the related comment onstripPrefix). Consider extracting this fact into a// NOTE: paths are returned rawcomment inextractDiffGitHeaderEntriesso consumers don't have to infer it from a test.