Skip to content

Commit dee5188

Browse files
TravisEz13Copilotxtqqczze
authored
[release/v7.6] Add markdown link verification for PRs (#26445)
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: xtqqczze <45661989+xtqqczze@users.noreply.github.com>
1 parent ae7f849 commit dee5188

File tree

6 files changed

+916
-0
lines changed

6 files changed

+916
-0
lines changed
Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
# Copyright (c) Microsoft Corporation.
2+
# Licensed under the MIT License.
3+
4+
#requires -version 7
5+
# Markdig is always available in PowerShell 7
6+
<#
7+
.SYNOPSIS
8+
Parse CHANGELOG files using Markdig to extract links.
9+
10+
.DESCRIPTION
11+
This script uses Markdig.Markdown.Parse to parse all markdown files in the CHANGELOG directory
12+
and extract different types of links (inline links, reference links, etc.).
13+
14+
.PARAMETER ChangelogPath
15+
Path to the CHANGELOG directory. Defaults to ./CHANGELOG
16+
17+
.PARAMETER LinkType
18+
Filter by link type: All, Inline, Reference, AutoLink. Defaults to All.
19+
20+
.EXAMPLE
21+
.\Parse-MarkdownLink.ps1
22+
23+
.EXAMPLE
24+
.\Parse-MarkdownLink.ps1 -LinkType Reference
25+
#>
26+
27+
param(
28+
[string]$ChangelogPath = "./CHANGELOG",
29+
[ValidateSet("All", "Inline", "Reference", "AutoLink")]
30+
[string]$LinkType = "All"
31+
)
32+
33+
Write-Verbose "Using built-in Markdig functionality to parse markdown files"
34+
35+
function Get-LinksFromMarkdownAst {
36+
param(
37+
[Parameter(Mandatory)]
38+
[object]$Node,
39+
[Parameter(Mandatory)]
40+
[string]$FileName,
41+
[System.Collections.ArrayList]$Links
42+
)
43+
44+
if ($null -eq $Links) {
45+
return
46+
}
47+
48+
# Check if current node is a link
49+
if ($Node -is [Markdig.Syntax.Inlines.LinkInline]) {
50+
$linkInfo = [PSCustomObject]@{
51+
Path = $FileName
52+
Line = $Node.Line + 1 # Convert to 1-based line numbering
53+
Column = $Node.Column + 1 # Convert to 1-based column numbering
54+
Url = $Node.Url ?? ""
55+
Text = $Node.FirstChild?.ToString() ?? ""
56+
Type = "Inline"
57+
IsImage = $Node.IsImage
58+
}
59+
[void]$Links.Add($linkInfo)
60+
}
61+
elseif ($Node -is [Markdig.Syntax.Inlines.AutolinkInline]) {
62+
$linkInfo = [PSCustomObject]@{
63+
Path = $FileName
64+
Line = $Node.Line + 1
65+
Column = $Node.Column + 1
66+
Url = $Node.Url ?? ""
67+
Text = $Node.Url ?? ""
68+
Type = "AutoLink"
69+
IsImage = $false
70+
}
71+
[void]$Links.Add($linkInfo)
72+
}
73+
elseif ($Node -is [Markdig.Syntax.LinkReferenceDefinitionGroup]) {
74+
foreach ($refDef in $Node) {
75+
$linkInfo = [PSCustomObject]@{
76+
Path = $FileName
77+
Line = $refDef.Line + 1
78+
Column = $refDef.Column + 1
79+
Url = $refDef.Url ?? ""
80+
Text = $refDef.Label ?? ""
81+
Type = "Reference"
82+
IsImage = $false
83+
}
84+
[void]$Links.Add($linkInfo)
85+
}
86+
}
87+
elseif ($Node -is [Markdig.Syntax.LinkReferenceDefinition]) {
88+
$linkInfo = [PSCustomObject]@{
89+
Path = $FileName
90+
Line = $Node.Line + 1
91+
Column = $Node.Column + 1
92+
Url = $Node.Url ?? ""
93+
Text = $Node.Label ?? ""
94+
Type = "Reference"
95+
IsImage = $false
96+
}
97+
[void]$Links.Add($linkInfo)
98+
}
99+
100+
# For MarkdownDocument (root), iterate through all blocks
101+
if ($Node -is [Markdig.Syntax.MarkdownDocument]) {
102+
foreach ($block in $Node) {
103+
Get-LinksFromMarkdownAst -Node $block -FileName $FileName -Links $Links
104+
}
105+
}
106+
# For block containers, iterate through children
107+
elseif ($Node -is [Markdig.Syntax.ContainerBlock]) {
108+
foreach ($child in $Node) {
109+
Get-LinksFromMarkdownAst -Node $child -FileName $FileName -Links $Links
110+
}
111+
}
112+
# For leaf blocks with inlines, process the inline content
113+
elseif ($Node -is [Markdig.Syntax.LeafBlock] -and $Node.Inline) {
114+
Get-LinksFromMarkdownAst -Node $Node.Inline -FileName $FileName -Links $Links
115+
}
116+
# For inline containers, process all child inlines
117+
elseif ($Node -is [Markdig.Syntax.Inlines.ContainerInline]) {
118+
$child = $Node.FirstChild
119+
while ($child) {
120+
Get-LinksFromMarkdownAst -Node $child -FileName $FileName -Links $Links
121+
$child = $child.NextSibling
122+
}
123+
}
124+
# For other inline elements that might have children
125+
elseif ($Node.PSObject.Properties.Name -contains "FirstChild" -and $Node.FirstChild) {
126+
$child = $Node.FirstChild
127+
while ($child) {
128+
Get-LinksFromMarkdownAst -Node $child -FileName $FileName -Links $Links
129+
$child = $child.NextSibling
130+
}
131+
}
132+
}
133+
134+
function Parse-ChangelogFiles {
135+
param(
136+
[string]$Path
137+
)
138+
139+
if (-not (Test-Path $Path)) {
140+
Write-Error "CHANGELOG directory not found: $Path"
141+
return
142+
}
143+
144+
$markdownFiles = Get-ChildItem -Path $Path -Filter "*.md" -File
145+
146+
if ($markdownFiles.Count -eq 0) {
147+
Write-Warning "No markdown files found in $Path"
148+
return
149+
}
150+
151+
$allLinks = [System.Collections.ArrayList]::new()
152+
153+
foreach ($file in $markdownFiles) {
154+
Write-Verbose "Processing file: $($file.Name)"
155+
156+
try {
157+
$content = Get-Content -Path $file.FullName -Raw -Encoding UTF8
158+
159+
# Parse the markdown content using Markdig
160+
$document = [Markdig.Markdown]::Parse($content, [Markdig.MarkdownPipelineBuilder]::new())
161+
162+
# Extract links from the AST
163+
Get-LinksFromMarkdownAst -Node $document -FileName $file.FullName -Links $allLinks
164+
165+
} catch {
166+
Write-Warning "Error processing file $($file.Name): $($_.Exception.Message)"
167+
}
168+
}
169+
170+
# Filter by link type if specified
171+
if ($LinkType -ne "All") {
172+
$allLinks = $allLinks | Where-Object { $_.Type -eq $LinkType }
173+
}
174+
175+
return $allLinks
176+
}
177+
178+
# Main execution
179+
$links = Parse-ChangelogFiles -Path $ChangelogPath
180+
181+
# Output PowerShell objects
182+
$links
Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
# Verify Markdown Links Action
2+
3+
A GitHub composite action that verifies all links in markdown files using PowerShell and Markdig.
4+
5+
## Features
6+
7+
- ✅ Parses markdown files using Markdig (built into PowerShell 7)
8+
- ✅ Extracts all link types: inline links, reference links, and autolinks
9+
- ✅ Verifies HTTP/HTTPS links with configurable timeouts and retries
10+
- ✅ Validates local file references
11+
- ✅ Supports excluding specific URL patterns
12+
- ✅ Provides detailed error reporting with file locations
13+
- ✅ Outputs metrics for CI/CD integration
14+
15+
## Usage
16+
17+
### Basic Usage
18+
19+
```yaml
20+
- name: Verify Markdown Links
21+
uses: ./.github/actions/infrastructure/markdownlinks
22+
with:
23+
path: './CHANGELOG'
24+
```
25+
26+
### Advanced Usage
27+
28+
```yaml
29+
- name: Verify Markdown Links
30+
uses: ./.github/actions/infrastructure/markdownlinks
31+
with:
32+
path: './docs'
33+
fail-on-error: 'true'
34+
timeout: 30
35+
max-retries: 2
36+
exclude-patterns: '*.example.com/*,*://localhost/*'
37+
```
38+
39+
### With Outputs
40+
41+
```yaml
42+
- name: Verify Markdown Links
43+
id: verify-links
44+
uses: ./.github/actions/infrastructure/markdownlinks
45+
with:
46+
path: './CHANGELOG'
47+
fail-on-error: 'false'
48+
49+
- name: Display Results
50+
run: |
51+
echo "Total links: ${{ steps.verify-links.outputs.total-links }}"
52+
echo "Passed: ${{ steps.verify-links.outputs.passed-links }}"
53+
echo "Failed: ${{ steps.verify-links.outputs.failed-links }}"
54+
echo "Skipped: ${{ steps.verify-links.outputs.skipped-links }}"
55+
```
56+
57+
## Inputs
58+
59+
| Input | Description | Required | Default |
60+
|-------|-------------|----------|---------|
61+
| `path` | Path to the directory containing markdown files to verify | No | `./CHANGELOG` |
62+
| `exclude-patterns` | Comma-separated list of URL patterns to exclude from verification | No | `''` |
63+
| `fail-on-error` | Whether to fail the action if any links are broken | No | `true` |
64+
| `timeout` | Timeout in seconds for HTTP requests | No | `30` |
65+
| `max-retries` | Maximum number of retries for failed requests | No | `2` |
66+
67+
## Outputs
68+
69+
| Output | Description |
70+
|--------|-------------|
71+
| `total-links` | Total number of unique links checked |
72+
| `passed-links` | Number of links that passed verification |
73+
| `failed-links` | Number of links that failed verification |
74+
| `skipped-links` | Number of links that were skipped |
75+
76+
## Excluded Link Types
77+
78+
The action automatically skips the following link types:
79+
80+
- **Anchor links** (`#section-name`) - Would require full markdown parsing
81+
- **Email links** (`mailto:user@example.com`) - Cannot be verified without sending email
82+
83+
## GitHub Workflow Test
84+
85+
This section provides a workflow example and instructions for testing the link verification action.
86+
87+
### Testing the Workflow
88+
89+
To test that the workflow properly detects broken links:
90+
91+
1. Make change to this file (e.g., this README.md file already contains one in the [Broken Link Test](#broken-link-test) section)
92+
1. The workflow will run and should fail, reporting the broken link(s)
93+
1. Revert your change to this file
94+
1. Push again to verify the workflow passes
95+
96+
### Example Workflow Configuration
97+
98+
```yaml
99+
name: Verify Links
100+
101+
on:
102+
push:
103+
branches: [ main ]
104+
paths:
105+
- '**/*.md'
106+
pull_request:
107+
branches: [ main ]
108+
paths:
109+
- '**/*.md'
110+
schedule:
111+
# Run weekly to catch external link rot
112+
- cron: '0 0 * * 0'
113+
114+
jobs:
115+
verify-links:
116+
runs-on: ubuntu-latest
117+
steps:
118+
- name: Checkout
119+
uses: actions/checkout@v4
120+
121+
- name: Verify CHANGELOG Links
122+
uses: ./.github/actions/infrastructure/markdownlinks
123+
with:
124+
path: './CHANGELOG'
125+
fail-on-error: 'true'
126+
127+
- name: Verify Documentation Links
128+
uses: ./.github/actions/infrastructure/markdownlinks
129+
with:
130+
path: './docs'
131+
fail-on-error: 'false'
132+
exclude-patterns: '*.internal.example.com/*'
133+
```
134+
135+
## How It Works
136+
137+
1. **Parse Markdown**: Uses `Parse-MarkdownLink.ps1` to extract all links from markdown files using Markdig
138+
2. **Deduplicate**: Groups links by URL to avoid checking the same link multiple times
139+
3. **Verify Links**:
140+
- HTTP/HTTPS links: Makes HEAD/GET requests with configurable timeout and retries
141+
- Local file references: Checks if the file exists relative to the markdown file
142+
- Excluded patterns: Skips links matching the exclude patterns
143+
4. **Report Results**: Displays detailed results with file locations for failed links
144+
5. **Set Outputs**: Provides metrics for downstream steps
145+
146+
## Error Output Example
147+
148+
```
149+
✗ FAILED: https://example.com/broken-link - HTTP 404
150+
Found in: /path/to/file.md:42:15
151+
Found in: /path/to/other.md:100:20
152+
153+
Link Verification Summary
154+
============================================================
155+
Total URLs checked: 150
156+
Passed: 145
157+
Failed: 2
158+
Skipped: 3
159+
160+
Failed Links:
161+
https://example.com/broken-link
162+
Error: HTTP 404
163+
Occurrences: 2
164+
```
165+
166+
## Requirements
167+
168+
- PowerShell 7+ (includes Markdig)
169+
- Runs on: `ubuntu-latest`, `windows-latest`, `macos-latest`
170+
171+
## Broken Link Test
172+
173+
- [Broken Link](https://github.com/PowerShell/PowerShell/wiki/NonExistentPage404)
174+
175+
## License
176+
177+
Same as the PowerShell repository.

0 commit comments

Comments
 (0)