-
Notifications
You must be signed in to change notification settings - Fork 1.1k
150 lines (133 loc) · 6.11 KB
/
validate-files.yml
File metadata and controls
150 lines (133 loc) · 6.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
name: Validate Files
# This workflow validates that all files in the repository comply with:
# 1. Windows filename compatibility — no reserved characters (< > : " | ? * \)
# so the repo can be cloned on Windows systems.
# 2. File size limits — no files larger than 10 MB. Many enterprise users mirror
# graphql-java into internal repositories that enforce file size limits.
# 3. No dangerous Unicode characters — prevents Trojan Source (BiDi override),
# glassworm, and similar attacks using invisible or control characters.
on:
push:
branches:
- master
- '**'
pull_request:
branches:
- master
- 23.x
- 22.x
- 21.x
- 20.x
- 19.x
permissions:
contents: read
jobs:
validate-filenames-and-size:
runs-on: ubuntu-latest
name: Validate Files (Windows names, size, Unicode safety)
steps:
- name: Checkout code
uses: actions/checkout@v6
with:
fetch-depth: 0 # Fetch all history to check all files
- name: Check for Windows-incompatible filenames
run: |
echo "Checking for Windows-incompatible filenames..."
# Windows reserved characters: < > : " | ? * \
INVALID_CHARS='[<>:"|?*\\]'
# Get all files in the repository (excluding .git directory)
ALL_FILES=$(git ls-files)
# Check each file for invalid characters
INVALID_FILES=$(echo "$ALL_FILES" | grep -E "$INVALID_CHARS" || true)
if [ -n "$INVALID_FILES" ]; then
echo "::error::The following files have Windows-incompatible characters in their names:"
echo "$INVALID_FILES" | while read -r file; do
echo "::error file=${file}::File contains Windows-incompatible characters"
echo " - $file"
done
echo ""
echo "Please rename these files to remove characters: < > : \" | ? * \\"
echo "For ISO timestamps, replace colons with hyphens (e.g., 08:40:24 -> 08-40-24)"
exit 1
else
echo "✓ All filenames are Windows-compatible"
fi
- name: Check for files larger than 10MB
run: |
echo "Checking for files larger than 10MB..."
MAX_SIZE=$((10 * 1024 * 1024)) # 10 MB in bytes
LARGE_FILES=""
# Get all files in the repository (excluding .git directory)
ALL_FILES=$(git ls-files)
# Check each file's size
while IFS= read -r file; do
if [ -f "$file" ]; then
size=$(stat -c%s "$file" 2>/dev/null || stat -f%z "$file" 2>/dev/null)
if [ -z "$size" ]; then
echo "::warning file=${file}::Could not determine size of file"
continue
fi
if [ "$size" -gt "$MAX_SIZE" ]; then
size_mb=$(awk "BEGIN {printf \"%.2f\", $size/1024/1024}")
echo "::error file=${file}::File size (${size_mb} MB) exceeds 10MB limit"
LARGE_FILES="${LARGE_FILES}${file} (${size_mb} MB)\n"
fi
fi
done <<< "$ALL_FILES"
if [ -n "$LARGE_FILES" ]; then
echo ""
echo "The following files exceed 10MB:"
echo -e "$LARGE_FILES"
echo ""
echo "Please consider one of these options:"
echo " 1. Split the file into smaller parts with suffixes .part1, .part2, etc."
echo " 2. Remove unnecessary content from the file"
exit 1
else
echo "✓ All files are within the 10MB size limit"
fi
- name: Check for dangerous Unicode characters
run: |
echo "Checking for dangerous Unicode characters (Trojan Source / glassworm)..."
# Dangerous character ranges:
# U+0000-0008, U+000B-000C, U+000E-001F C0 control chars (except TAB, LF, CR)
# U+007F-009F DELETE + C1 control chars
# U+200B-200D Zero-width space/non-joiner/joiner
# U+FEFF Zero-width no-break space (BOM)
# U+202A-202E BiDi embedding/override (Trojan Source)
# U+2066-2069 BiDi isolate chars (Trojan Source)
FOUND_FILES=""
while IFS= read -r file; do
if [ ! -f "$file" ]; then
continue
fi
# Skip binary files
if file --mime-type "$file" 2>/dev/null | grep -qv 'text/'; then
continue
fi
MATCHES=$(perl -CSD -ne '
if (/[\x{0000}-\x{0008}\x{000B}\x{000C}\x{000E}-\x{001F}\x{007F}-\x{009F}\x{200B}-\x{200D}\x{FEFF}\x{202A}-\x{202E}\x{2066}-\x{2069}]/) {
print " line $.: $_";
}
' "$file" 2>/dev/null || true)
if [ -n "$MATCHES" ]; then
echo "::error file=${file}::File contains dangerous Unicode characters"
FOUND_FILES="${FOUND_FILES}${file}:\n${MATCHES}\n"
fi
done <<< "$(git ls-files)"
if [ -n "$FOUND_FILES" ]; then
echo ""
echo "The following files contain dangerous Unicode characters:"
echo -e "$FOUND_FILES"
echo ""
echo "These invisible or rendering-altering characters can be used for"
echo "Trojan Source or glassworm-style attacks. Detected categories:"
echo " - C0/C1 control characters (U+0000-001F, U+007F-009F, except TAB/LF/CR)"
echo " - Zero-width characters (U+200B-200D, U+FEFF)"
echo " - BiDi override/isolate (U+202A-202E, U+2066-2069)"
echo ""
echo "Please remove these characters from the affected files."
exit 1
else
echo "✓ No dangerous Unicode characters found"
fi