-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Expand file tree
/
Copy pathpre-commit
More file actions
executable file
·127 lines (109 loc) · 4.92 KB
/
pre-commit
File metadata and controls
executable file
·127 lines (109 loc) · 4.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#!/bin/bash
# Pre-commit hook to enforce Windows compatibility, file size limits,
# and dangerous Unicode character detection.
#
# 1. Windows filenames: prevents characters that are reserved on Windows (< > : " | ? * \)
# so the repo can be cloned on Windows systems.
# 2. File size: rejects files larger than 10 MB. Many enterprise users mirror graphql-java
# into internal repositories that enforce file size limits.
# 3. Dangerous Unicode: detects invisible/control characters that can be used for
# "Trojan Source" (BiDi override), homoglyph, or glassworm-style attacks.
# ANSI color codes for better output readability
RED='\033[0;31m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
# Track if we found any errors
ERRORS_FOUND=0
echo "Running pre-commit checks..."
# Check 1: Windows-incompatible filenames
echo " Checking for Windows-incompatible filenames..."
# Windows reserved characters: < > : " | ? * \
# Note: We escape the backslash in the regex pattern
INVALID_CHARS='[<>:"|?*\\]'
# Get list of staged files
STAGED_FILES=$(git diff --cached --name-only --diff-filter=ACR)
if [ -n "$STAGED_FILES" ]; then
# Check each staged file for invalid characters
INVALID_FILES=$(echo "$STAGED_FILES" | grep -E "$INVALID_CHARS" || true)
if [ -n "$INVALID_FILES" ]; then
echo -e "${RED}Error: The following files have Windows-incompatible characters in their names:${NC}"
echo "$INVALID_FILES" | while read -r file; do
echo " - $file"
done
echo -e "${YELLOW}Please rename these files to remove characters: < > : \" | ? * \\${NC}"
echo -e "${YELLOW}For ISO timestamps, replace colons with hyphens (e.g., 08:40:24 -> 08-40-24)${NC}"
ERRORS_FOUND=1
fi
fi
# Check 2: Files larger than 10MB
echo " Checking for files larger than 10MB..."
MAX_SIZE=$((10 * 1024 * 1024)) # 10 MB in bytes
LARGE_FILES=""
if [ -n "$STAGED_FILES" ]; then
while IFS= read -r file; do
if [ -f "$file" ]; then
# Try to get file size with cross-platform compatibility
size=$(stat -c%s "$file" 2>/dev/null || stat -f%z "$file" 2>/dev/null)
if [ -z "$size" ]; then
echo -e "${YELLOW}Warning: Could not determine size of $file, skipping size check${NC}"
continue
fi
if [ "$size" -gt "$MAX_SIZE" ]; then
# Format size in human-readable format using awk (more portable than bc)
size_mb=$(awk "BEGIN {printf \"%.2f\", $size/1024/1024}")
LARGE_FILES="${LARGE_FILES} - $file (${size_mb} MB)\n"
fi
fi
done <<< "$STAGED_FILES"
fi
if [ -n "$LARGE_FILES" ]; then
echo -e "${RED}Error: The following files exceed 10MB:${NC}"
echo -e "$LARGE_FILES"
echo -e "${YELLOW}Please consider one of these options:${NC}"
echo -e "${YELLOW} 1. Split the file into smaller parts with suffixes .part1, .part2, etc.${NC}"
echo -e "${YELLOW} 2. Remove unnecessary content from the file${NC}"
ERRORS_FOUND=1
fi
# Check 3: Dangerous Unicode characters (Trojan Source / glassworm attacks)
# Detects: C0/C1 control chars (except TAB, LF, CR), zero-width characters,
# BiDi override/embedding/isolate chars.
# Uses perl for macOS compatibility (grep -P is not available on macOS).
echo " Checking for dangerous Unicode characters..."
UNICODE_FILES=""
if [ -n "$STAGED_FILES" ]; then
while IFS= read -r file; do
if [ ! -f "$file" ]; then
continue
fi
# Skip binary files
if file --mime-type "$file" 2>/dev/null | grep -qv 'text/'; then
continue
fi
MATCHES=$(perl -CSD -ne '
if (/[\x{0000}-\x{0008}\x{000B}\x{000C}\x{000E}-\x{001F}\x{007F}-\x{009F}\x{200B}-\x{200D}\x{FEFF}\x{202A}-\x{202E}\x{2066}-\x{2069}]/) {
print " line $.: $_";
}
' "$file" 2>/dev/null || true)
if [ -n "$MATCHES" ]; then
UNICODE_FILES="${UNICODE_FILES} - ${file}\n${MATCHES}\n"
fi
done <<< "$STAGED_FILES"
fi
if [ -n "$UNICODE_FILES" ]; then
echo -e "${RED}Error: The following files contain dangerous Unicode characters:${NC}"
echo -e "$UNICODE_FILES"
echo -e "${YELLOW}These characters are invisible or alter text rendering and can be used for${NC}"
echo -e "${YELLOW}Trojan Source or glassworm-style attacks. Detected character categories:${NC}"
echo -e "${YELLOW} - C0/C1 control characters (U+0000-001F, U+007F-009F, except TAB/LF/CR)${NC}"
echo -e "${YELLOW} - Zero-width characters (U+200B-200D, U+FEFF)${NC}"
echo -e "${YELLOW} - BiDi override/isolate (U+202A-202E, U+2066-2069)${NC}"
echo -e "${YELLOW}Please remove these characters from the affected files.${NC}"
ERRORS_FOUND=1
fi
# Exit with error if any checks failed
if [ "$ERRORS_FOUND" -eq 1 ]; then
echo -e "${RED}Pre-commit checks failed. Please fix the issues above and try again.${NC}"
exit 1
fi
echo " All pre-commit checks passed!"
exit 0