From 66df994bf42345d7fba650c3cca534c77f739d31 Mon Sep 17 00:00:00 2001 From: Rachel Lee Nabors Date: Fri, 23 Jan 2026 00:13:51 +0000 Subject: [PATCH 1/6] Prevent vale style review from removing code blocks The LLM was making overly aggressive suggestions that removed entire code blocks from documentation. This adds multiple safeguards: - Add getLinesInCodeBlocks() to detect lines inside fenced code blocks - Skip any suggestion targeting lines inside code blocks - Reject suggestions where suggested text is <70% of original length - Strengthen LLM prompt with explicit "NEVER MODIFY CODE" instructions - Extract validation helpers to reduce cognitive complexity Fixes the issue where PR #700 removed 173 lines of code examples. Co-Authored-By: Claude Opus 4.5 --- scripts/vale-style-review.ts | 95 +++++++++++++++++++++++++++++++----- 1 file changed, 84 insertions(+), 11 deletions(-) diff --git a/scripts/vale-style-review.ts b/scripts/vale-style-review.ts index c0323aa93..189c8cc18 100644 --- a/scripts/vale-style-review.ts +++ b/scripts/vale-style-review.ts @@ -45,6 +45,8 @@ const DIFF_HUNK_REGEX = /^@@ -\d+(?:,\d+)? \+(\d+)(?:,\d+)? @@/; const OWNER = "ArcadeAI"; const REPO = "docs"; const MAX_LENGTH_CHANGE_RATIO = 0.5; +const MIN_SUGGESTED_LENGTH_RATIO = 0.7; // Suggested must be at least 70% of original length +const CODE_BLOCK_MARKER = "```"; // Load style guide const STYLE_GUIDE_PATH = join(__dirname, "..", "STYLEGUIDE.md"); @@ -256,6 +258,11 @@ ${STYLE_GUIDE} TASK: Fix the Vale style issues listed below for this file. Return JSON with your suggestions. +CRITICAL - NEVER MODIFY CODE: +- NEVER suggest changes to lines inside code blocks (lines between \`\`\` markers) +- NEVER suggest changes to code examples, import statements, function calls, or variable names +- SKIP any Vale issue that appears inside a code block or affects code + RULES: 1. ONLY fix the specific issues listed - do not make any other changes 2. Make MINIMAL changes - only change the specific word or phrase mentioned in the issue @@ -263,9 +270,10 @@ RULES: 4. If a message says "Use 'X' instead of 'Y'", find ONLY Y and replace with X - nothing else 5. Preserve technical accuracy - never change code or technical details 6. For passive voice - only fix if active voice is clearer -7. If an issue should NOT be fixed (e.g., passive voice is appropriate), omit it +7. If an issue should NOT be fixed (e.g., passive voice is appropriate, or it's inside code), OMIT IT 8. The "original" field must contain the EXACT full line from the file 9. The "suggested" field must be identical to "original" EXCEPT for the specific fix +10. The "suggested" field must NEVER be shorter than the "original" - you are fixing style, not removing content FILE: ${filename} @@ -364,6 +372,62 @@ async function getSuggestions( } } +// Check which lines are inside code blocks (fenced with ```) +// Returns a Set of line numbers (1-indexed) that are inside code blocks +function getLinesInCodeBlocks(content: string): Set { + const linesInCodeBlocks = new Set(); + const lines = content.split("\n"); + let inCodeBlock = false; + + for (let i = 0; i < lines.length; i += 1) { + const line = lines[i]; + const lineNum = i + 1; // 1-indexed + + // Check if this line starts or ends a code block + if (line.trim().startsWith(CODE_BLOCK_MARKER)) { + if (inCodeBlock) { + // Closing a code block - this line is still part of the block + linesInCodeBlocks.add(lineNum); + inCodeBlock = false; + } else { + // Opening a code block - this line is part of the block + linesInCodeBlocks.add(lineNum); + inCodeBlock = true; + } + } else if (inCodeBlock) { + // Inside a code block + linesInCodeBlocks.add(lineNum); + } + } + + return linesInCodeBlocks; +} + +// Validate that a suggestion has the required fields with correct types +function hasValidFields(s: Suggestion): boolean { + return ( + typeof s.line === "number" && + typeof s.original === "string" && + typeof s.suggested === "string" && + typeof s.rule === "string" + ); +} + +// Check if a suggestion would remove too much content +function wouldRemoveContent(s: Suggestion): boolean { + const originalLen = s.original.trim().length; + const suggestedLen = s.suggested.trim().length; + return ( + originalLen > 0 && suggestedLen < originalLen * MIN_SUGGESTED_LENGTH_RATIO + ); +} + +// Check if a suggestion makes destructive length changes +function isDestructiveChange(s: Suggestion): boolean { + const lengthDiff = Math.abs(s.suggested.length - s.original.length); + return lengthDiff > s.original.length * MAX_LENGTH_CHANGE_RATIO; +} + // Format suggestions as GitHub review comments // Only includes suggestions for lines that are in the PR diff and don't already have Vale comments function formatReviewComments(options: { @@ -382,6 +446,9 @@ function formatReviewComments(options: { } = options; const lines = fileContent.split("\n"); + // Get lines that are inside code blocks - NEVER modify these + const linesInCodeBlocks = getLinesInCodeBlocks(fileContent); + return suggestions .filter((s) => { // Skip if there's already a Vale comment on this line @@ -391,30 +458,36 @@ function formatReviewComments(options: { return false; } // Validate required fields exist and have correct types - if ( - typeof s.line !== "number" || - typeof s.original !== "string" || - typeof s.suggested !== "string" || - typeof s.rule !== "string" - ) { + if (!hasValidFields(s)) { return false; } // Validate line number is in range if (s.line < 1 || s.line > lines.length) { return false; } + // CRITICAL: Never modify lines inside code blocks + if (linesInCodeBlocks.has(s.line)) { + console.log( + ` Skipping line ${s.line} (inside code block - code must not be modified)` + ); + return false; + } // Validate line is in the PR diff (GitHub API requirement) if (!commentableLines.has(s.line)) { return false; } - // Reject destructive suggestions (length change > 50% of original) - const lengthDiff = Math.abs(s.suggested.length - s.original.length); - if (lengthDiff > s.original.length * MAX_LENGTH_CHANGE_RATIO) { + // Reject suggestions that remove content (suggested is too short) + if (wouldRemoveContent(s)) { console.log( - ` Skipping destructive suggestion on line ${s.line} (length change: ${lengthDiff} chars)` + ` Skipping line ${s.line} (suggested text too short - would remove content)` ); return false; } + // Reject destructive suggestions (length change > 50% of original) + if (isDestructiveChange(s)) { + console.log(` Skipping destructive suggestion on line ${s.line}`); + return false; + } // Validate original content matches (loosely) const actualLine = lines[s.line - 1]; return actualLine.includes( From 8c44a35a67e513f921e9a9ef2cf557797689ed8f Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 23 Jan 2026 00:22:08 +0000 Subject: [PATCH 2/6] Fix inconsistent length validation and prompt contradiction in vale-style-review - Make isDestructiveChange use trimmed lengths to match wouldRemoveContent - Update prompt to reflect actual validation logic (allows 30% shorter text) This fixes false positives where whitespace-only changes were incorrectly rejected, and aligns the LLM prompt with the actual 70% length threshold in the code. --- scripts/vale-style-review.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/vale-style-review.ts b/scripts/vale-style-review.ts index 189c8cc18..f6b4a2b43 100644 --- a/scripts/vale-style-review.ts +++ b/scripts/vale-style-review.ts @@ -273,7 +273,7 @@ RULES: 7. If an issue should NOT be fixed (e.g., passive voice is appropriate, or it's inside code), OMIT IT 8. The "original" field must contain the EXACT full line from the file 9. The "suggested" field must be identical to "original" EXCEPT for the specific fix -10. The "suggested" field must NEVER be shorter than the "original" - you are fixing style, not removing content +10. The "suggested" field should not be significantly shorter than the "original" - you are fixing style, not removing content FILE: ${filename} @@ -424,8 +424,8 @@ function wouldRemoveContent(s: Suggestion): boolean { // Check if a suggestion makes destructive length changes function isDestructiveChange(s: Suggestion): boolean { - const lengthDiff = Math.abs(s.suggested.length - s.original.length); - return lengthDiff > s.original.length * MAX_LENGTH_CHANGE_RATIO; + const lengthDiff = Math.abs(s.suggested.trim().length - s.original.trim().length); + return lengthDiff > s.original.trim().length * MAX_LENGTH_CHANGE_RATIO; } // Format suggestions as GitHub review comments From e805c9977e96eecdd604772a9c025e625e0da2fb Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 23 Jan 2026 00:30:29 +0000 Subject: [PATCH 3/6] Fix nested code block detection in getLinesInCodeBlocks The function now tracks the opening backtick count and only closes a code block when encountering a line with at least as many backticks. This prevents nested code blocks (e.g., 4+ backticks wrapping 3-backtick blocks) from being incorrectly detected. --- scripts/vale-style-review.ts | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/scripts/vale-style-review.ts b/scripts/vale-style-review.ts index f6b4a2b43..64d8135de 100644 --- a/scripts/vale-style-review.ts +++ b/scripts/vale-style-review.ts @@ -378,6 +378,7 @@ function getLinesInCodeBlocks(content: string): Set { const linesInCodeBlocks = new Set(); const lines = content.split("\n"); let inCodeBlock = false; + let openingBacktickCount = 0; for (let i = 0; i < lines.length; i += 1) { const line = lines[i]; @@ -385,14 +386,33 @@ function getLinesInCodeBlocks(content: string): Set { // Check if this line starts or ends a code block if (line.trim().startsWith(CODE_BLOCK_MARKER)) { + // Count consecutive backticks at the start + const trimmedLine = line.trim(); + let backtickCount = 0; + for (const char of trimmedLine) { + if (char === "`") { + backtickCount += 1; + } else { + break; + } + } + if (inCodeBlock) { - // Closing a code block - this line is still part of the block - linesInCodeBlocks.add(lineNum); - inCodeBlock = false; + // Only close if we have at least as many backticks as the opening + if (backtickCount >= openingBacktickCount) { + // Closing a code block - this line is still part of the block + linesInCodeBlocks.add(lineNum); + inCodeBlock = false; + openingBacktickCount = 0; + } else { + // Not enough backticks to close, treat as content inside the code block + linesInCodeBlocks.add(lineNum); + } } else { // Opening a code block - this line is part of the block linesInCodeBlocks.add(lineNum); inCodeBlock = true; + openingBacktickCount = backtickCount; } } else if (inCodeBlock) { // Inside a code block From 827e2563bfdce8c3f2f5490c508dace0c05e22a3 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 23 Jan 2026 00:38:54 +0000 Subject: [PATCH 4/6] Fix CommonMark code fence parsing bugs in vale-style-review - Enforce 3-space indentation limit for code fences per CommonMark spec - Validate closing fences only accept whitespace after backticks - Prevent misidentification of indented backticks as fence markers - Ensure non-whitespace content after backticks keeps fence open --- scripts/vale-style-review.ts | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/scripts/vale-style-review.ts b/scripts/vale-style-review.ts index 64d8135de..08e8413e7 100644 --- a/scripts/vale-style-review.ts +++ b/scripts/vale-style-review.ts @@ -384,10 +384,22 @@ function getLinesInCodeBlocks(content: string): Set { const line = lines[i]; const lineNum = i + 1; // 1-indexed + // Count leading spaces (CommonMark allows at most 3 spaces for fence markers) + let leadingSpaces = 0; + for (const char of line) { + if (char === " ") { + leadingSpaces += 1; + } else { + break; + } + } + + // Only consider fence markers if indentation is 0-3 spaces + const trimmedLine = line.trim(); + // Check if this line starts or ends a code block - if (line.trim().startsWith(CODE_BLOCK_MARKER)) { + if (leadingSpaces <= 3 && trimmedLine.startsWith(CODE_BLOCK_MARKER)) { // Count consecutive backticks at the start - const trimmedLine = line.trim(); let backtickCount = 0; for (const char of trimmedLine) { if (char === "`") { @@ -398,14 +410,19 @@ function getLinesInCodeBlocks(content: string): Set { } if (inCodeBlock) { - // Only close if we have at least as many backticks as the opening - if (backtickCount >= openingBacktickCount) { + // For closing fence: must have at least as many backticks as opening + // AND only whitespace after the backticks (per CommonMark spec) + const afterBackticks = trimmedLine.slice(backtickCount); + if ( + backtickCount >= openingBacktickCount && + afterBackticks.trim() === "" + ) { // Closing a code block - this line is still part of the block linesInCodeBlocks.add(lineNum); inCodeBlock = false; openingBacktickCount = 0; } else { - // Not enough backticks to close, treat as content inside the code block + // Not a valid closing fence, treat as content inside the code block linesInCodeBlocks.add(lineNum); } } else { From b7c17ed9099086e0ac6914fc847a23c6734ce118 Mon Sep 17 00:00:00 2001 From: Rachel Lee Nabors Date: Fri, 23 Jan 2026 16:04:48 +0000 Subject: [PATCH 5/6] Fix lint errors in vale-style-review.ts - Extract magic numbers to constants (MAX_FENCE_INDENT, TAB_STOP_WIDTH) - Reduce cognitive complexity in getLinesInCodeBlocks by extracting helper functions - Fix line formatting in isDestructiveChange Co-Authored-By: Claude Opus 4.5 --- scripts/vale-style-review.ts | 100 ++++++++++++++++++++++------------- 1 file changed, 64 insertions(+), 36 deletions(-) diff --git a/scripts/vale-style-review.ts b/scripts/vale-style-review.ts index 08e8413e7..3ef040c0a 100644 --- a/scripts/vale-style-review.ts +++ b/scripts/vale-style-review.ts @@ -47,6 +47,8 @@ const REPO = "docs"; const MAX_LENGTH_CHANGE_RATIO = 0.5; const MIN_SUGGESTED_LENGTH_RATIO = 0.7; // Suggested must be at least 70% of original length const CODE_BLOCK_MARKER = "```"; +const MAX_FENCE_INDENT = 3; // CommonMark allows 0-3 spaces before fence markers +const TAB_STOP_WIDTH = 4; // CommonMark tab stops are at multiples of 4 // Load style guide const STYLE_GUIDE_PATH = join(__dirname, "..", "STYLEGUIDE.md"); @@ -372,6 +374,60 @@ async function getSuggestions( } } +// Convert leading whitespace (including tabs) to equivalent space count +// Per CommonMark spec: tabs behave as if replaced by spaces with tab stop of 4 +// Tab stops are at positions 0, 4, 8, 12, etc. (multiples of 4) +function countLeadingWhitespace(line: string): number { + let position = 0; + for (const char of line) { + if (char === " ") { + position += 1; + } else if (char === "\t") { + // Advance to next tab stop (multiple of TAB_STOP_WIDTH) + position = Math.ceil((position + 1) / TAB_STOP_WIDTH) * TAB_STOP_WIDTH; + } else { + // Non-whitespace character - stop counting + break; + } + } + return position; +} + +// Count consecutive backticks at the start of a trimmed line +function countLeadingBackticks(trimmedLine: string): number { + let count = 0; + for (const char of trimmedLine) { + if (char === "`") { + count += 1; + } else { + break; + } + } + return count; +} + +// Check if a line could be a fence marker (has valid indentation and starts with ```) +function isFenceCandidate(line: string): boolean { + const leadingWhitespace = countLeadingWhitespace(line); + const trimmedLine = line.trim(); + return ( + leadingWhitespace <= MAX_FENCE_INDENT && + trimmedLine.startsWith(CODE_BLOCK_MARKER) + ); +} + +// Check if a line is a valid closing fence +function isValidClosingFence( + trimmedLine: string, + backtickCount: number, + openingBacktickCount: number +): boolean { + // For closing fence: must have at least as many backticks as opening + // AND only whitespace after the backticks (per CommonMark spec) + const afterBackticks = trimmedLine.slice(backtickCount); + return backtickCount >= openingBacktickCount && afterBackticks.trim() === ""; +} + // Check which lines are inside code blocks (fenced with ```) // Returns a Set of line numbers (1-indexed) that are inside code blocks function getLinesInCodeBlocks(content: string): Set { @@ -383,56 +439,26 @@ function getLinesInCodeBlocks(content: string): Set { for (let i = 0; i < lines.length; i += 1) { const line = lines[i]; const lineNum = i + 1; // 1-indexed - - // Count leading spaces (CommonMark allows at most 3 spaces for fence markers) - let leadingSpaces = 0; - for (const char of line) { - if (char === " ") { - leadingSpaces += 1; - } else { - break; - } - } - - // Only consider fence markers if indentation is 0-3 spaces const trimmedLine = line.trim(); - // Check if this line starts or ends a code block - if (leadingSpaces <= 3 && trimmedLine.startsWith(CODE_BLOCK_MARKER)) { - // Count consecutive backticks at the start - let backtickCount = 0; - for (const char of trimmedLine) { - if (char === "`") { - backtickCount += 1; - } else { - break; - } - } + if (isFenceCandidate(line)) { + const backtickCount = countLeadingBackticks(trimmedLine); if (inCodeBlock) { - // For closing fence: must have at least as many backticks as opening - // AND only whitespace after the backticks (per CommonMark spec) - const afterBackticks = trimmedLine.slice(backtickCount); + linesInCodeBlocks.add(lineNum); if ( - backtickCount >= openingBacktickCount && - afterBackticks.trim() === "" + isValidClosingFence(trimmedLine, backtickCount, openingBacktickCount) ) { - // Closing a code block - this line is still part of the block - linesInCodeBlocks.add(lineNum); inCodeBlock = false; openingBacktickCount = 0; - } else { - // Not a valid closing fence, treat as content inside the code block - linesInCodeBlocks.add(lineNum); } } else { - // Opening a code block - this line is part of the block + // Opening a code block linesInCodeBlocks.add(lineNum); inCodeBlock = true; openingBacktickCount = backtickCount; } } else if (inCodeBlock) { - // Inside a code block linesInCodeBlocks.add(lineNum); } } @@ -461,7 +487,9 @@ function wouldRemoveContent(s: Suggestion): boolean { // Check if a suggestion makes destructive length changes function isDestructiveChange(s: Suggestion): boolean { - const lengthDiff = Math.abs(s.suggested.trim().length - s.original.trim().length); + const lengthDiff = Math.abs( + s.suggested.trim().length - s.original.trim().length + ); return lengthDiff > s.original.trim().length * MAX_LENGTH_CHANGE_RATIO; } From 606e9b426e826ca960af2d617ee3b5cbf1184cd8 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 23 Jan 2026 16:19:29 +0000 Subject: [PATCH 6/6] Fix: Support tilde-fenced code blocks in vale-style-review The getLinesInCodeBlocks function now detects both backtick ( and ~~~ - Enhanced isValidClosingFence to match opening fence character - Updated getLinesInCodeBlocks to track fence type --- scripts/vale-style-review.ts | 49 +++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/scripts/vale-style-review.ts b/scripts/vale-style-review.ts index 3ef040c0a..bb322e004 100644 --- a/scripts/vale-style-review.ts +++ b/scripts/vale-style-review.ts @@ -46,7 +46,6 @@ const OWNER = "ArcadeAI"; const REPO = "docs"; const MAX_LENGTH_CHANGE_RATIO = 0.5; const MIN_SUGGESTED_LENGTH_RATIO = 0.7; // Suggested must be at least 70% of original length -const CODE_BLOCK_MARKER = "```"; const MAX_FENCE_INDENT = 3; // CommonMark allows 0-3 spaces before fence markers const TAB_STOP_WIDTH = 4; // CommonMark tab stops are at multiples of 4 @@ -261,7 +260,7 @@ ${STYLE_GUIDE} TASK: Fix the Vale style issues listed below for this file. Return JSON with your suggestions. CRITICAL - NEVER MODIFY CODE: -- NEVER suggest changes to lines inside code blocks (lines between \`\`\` markers) +- NEVER suggest changes to lines inside code blocks (lines between \`\`\` or ~~~ markers) - NEVER suggest changes to code examples, import statements, function calls, or variable names - SKIP any Vale issue that appears inside a code block or affects code @@ -393,48 +392,58 @@ function countLeadingWhitespace(line: string): number { return position; } -// Count consecutive backticks at the start of a trimmed line -function countLeadingBackticks(trimmedLine: string): number { +// Count consecutive fence characters (backticks or tildes) at the start of a trimmed line +// Returns both the count and the character type +function countLeadingFenceChars(trimmedLine: string): { count: number; char: string } { + const firstChar = trimmedLine[0]; + if (firstChar !== "`" && firstChar !== "~") { + return { count: 0, char: "" }; + } + let count = 0; for (const char of trimmedLine) { - if (char === "`") { + if (char === firstChar) { count += 1; } else { break; } } - return count; + return { count, char: firstChar }; } -// Check if a line could be a fence marker (has valid indentation and starts with ```) +// Check if a line could be a fence marker (has valid indentation and starts with ``` or ~~~) function isFenceCandidate(line: string): boolean { const leadingWhitespace = countLeadingWhitespace(line); const trimmedLine = line.trim(); return ( leadingWhitespace <= MAX_FENCE_INDENT && - trimmedLine.startsWith(CODE_BLOCK_MARKER) + (trimmedLine.startsWith("```") || trimmedLine.startsWith("~~~")) ); } // Check if a line is a valid closing fence function isValidClosingFence( trimmedLine: string, - backtickCount: number, - openingBacktickCount: number + fenceInfo: { count: number; char: string }, + openingFenceInfo: { count: number; char: string } ): boolean { - // For closing fence: must have at least as many backticks as opening - // AND only whitespace after the backticks (per CommonMark spec) - const afterBackticks = trimmedLine.slice(backtickCount); - return backtickCount >= openingBacktickCount && afterBackticks.trim() === ""; + // For closing fence: must use same character, have at least as many chars as opening, + // AND only whitespace after the fence chars (per CommonMark spec) + const afterFenceChars = trimmedLine.slice(fenceInfo.count); + return ( + fenceInfo.char === openingFenceInfo.char && + fenceInfo.count >= openingFenceInfo.count && + afterFenceChars.trim() === "" + ); } -// Check which lines are inside code blocks (fenced with ```) +// Check which lines are inside code blocks (fenced with ``` or ~~~) // Returns a Set of line numbers (1-indexed) that are inside code blocks function getLinesInCodeBlocks(content: string): Set { const linesInCodeBlocks = new Set(); const lines = content.split("\n"); let inCodeBlock = false; - let openingBacktickCount = 0; + let openingFenceInfo = { count: 0, char: "" }; for (let i = 0; i < lines.length; i += 1) { const line = lines[i]; @@ -442,21 +451,21 @@ function getLinesInCodeBlocks(content: string): Set { const trimmedLine = line.trim(); if (isFenceCandidate(line)) { - const backtickCount = countLeadingBackticks(trimmedLine); + const fenceInfo = countLeadingFenceChars(trimmedLine); if (inCodeBlock) { linesInCodeBlocks.add(lineNum); if ( - isValidClosingFence(trimmedLine, backtickCount, openingBacktickCount) + isValidClosingFence(trimmedLine, fenceInfo, openingFenceInfo) ) { inCodeBlock = false; - openingBacktickCount = 0; + openingFenceInfo = { count: 0, char: "" }; } } else { // Opening a code block linesInCodeBlocks.add(lineNum); inCodeBlock = true; - openingBacktickCount = backtickCount; + openingFenceInfo = fenceInfo; } } else if (inCodeBlock) { linesInCodeBlocks.add(lineNum);