Skip to content

Commit ac47e8d

Browse files
committed
Add script that checks web code snippets vs source
The initial use case is to check the code snippets at: http://fjii.sc/ImgLib2_Examples against their linked counterparts in the imglib2/examples subdirectory of imglib.git on GitHub (https://github.com/imagej/imglib). However, the script could be used to check any similar raw code snippets against a source repository or other linked source. We also plan to use it on the ImageJ web site; e.g.: http://developer.imagej.net/extensibility
1 parent f00de6a commit ac47e8d

1 file changed

Lines changed: 138 additions & 0 deletions

File tree

verify-code-snippets.pl

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
#!/usr/bin/perl
2+
3+
#
4+
# verify-code-snippets.pl
5+
#
6+
7+
# Script to check that code snippets on a given web page match the linked code.
8+
# For the script to do anything, the page must declare a link to a source file
9+
# on GitHub, of the form
10+
11+
# Usage: verify-code-snippets.pl http://domain.name/path/to/page-to-check ...
12+
13+
use HTML::Entities;
14+
15+
use strict;
16+
17+
# constants
18+
my $urlChar = '[\w\-\@?^=%&/~\+#\.]';
19+
my $codeLink = "https://github.com/$urlChar*/blob/$urlChar*";
20+
my $lineRange = 10;
21+
22+
# parse environment variables
23+
my $debug = $ENV{'DEBUG'};
24+
25+
# parse command line arguments
26+
my @urls = @ARGV;
27+
28+
# process all URLs given
29+
my $returnValue = 0;
30+
for my $url (@urls) {
31+
my $result = processURL($url);
32+
if ($result) {
33+
$returnValue = $result;
34+
}
35+
}
36+
exit $returnValue;
37+
38+
# Processes the given URL for code snippets, checking any it finds.
39+
sub processURL($) {
40+
my ($pageURL) = @_;
41+
42+
debug("Downloading $pageURL\n");
43+
my @page = `curl -s "$pageURL"`;
44+
45+
# decode encoded HTML characters (e.g., '&' -> '&')
46+
for (my $pageIndex = 0; $pageIndex < @page; $pageIndex++) {
47+
$page[$pageIndex] = decode_entities($page[$pageIndex]);
48+
}
49+
50+
# scan document for code links
51+
my $failCount = 0;
52+
for (my $pageIndex = 0; $pageIndex < @page; $pageIndex++) {
53+
my $pageLine = $page[$pageIndex];
54+
if ($pageLine =~ /$codeLink/) {
55+
my $codeURL = makeRaw($&);
56+
debug("Downloading $codeURL");
57+
my @code = `curl -s $codeURL`;
58+
59+
# search for matching first line of code within a few lines
60+
my $codeOffset = findMatch(\@page, $code[0], $pageIndex);
61+
62+
if ($codeOffset < 0) {
63+
debug("NO CODE SNIPPET FOUND\n");
64+
next;
65+
}
66+
67+
my $lineNo = $codeOffset + 1;
68+
debug("Checking code snippet at line $lineNo");
69+
my $match = valuesEqual(\@page, $codeOffset, \@code);
70+
71+
if ($match) {
72+
debug("CODE SNIPPET MATCHES\n");
73+
}
74+
else {
75+
$failCount++;
76+
}
77+
}
78+
}
79+
80+
# return number of non-matching code snippets
81+
if ($failCount > 0) {
82+
error("Found $failCount non-matching code snippets.");
83+
}
84+
return $failCount;
85+
}
86+
87+
# Converts a cooked GitHub link to a raw code link
88+
sub makeRaw($) {
89+
my ($url) = @_;
90+
$url =~ s/github\.com/raw.github.com/;
91+
$url =~ s/blob\///;
92+
return $url;
93+
}
94+
95+
# Searches the given list for a matching string from the specified index.
96+
sub findMatch($$$) {
97+
my ($list, $string, $index) = @_;
98+
for (my $i = $index; $i < $index + $lineRange; $i++) {
99+
if ($$list[$i] eq $string) {
100+
return $i;
101+
}
102+
}
103+
return -1;
104+
}
105+
106+
# Checks whether the two arrays match (with a1 offset by the given index).
107+
sub valuesEqual($$$) {
108+
my ($a1, $offset, $a2) = @_;
109+
my $index = $offset;
110+
for my $v2 (@$a2) {
111+
my $v1 = sterilize($$a1[$index++]);
112+
if ($v1 ne $v2) {
113+
my $lineNo = $index - $offset + 1;
114+
debug("LINE $lineNo DOES NOT MATCH:\n\t$v1\t$v2");
115+
return 0;
116+
}
117+
}
118+
return 1;
119+
}
120+
121+
# Hacky routine to fix discrepancies in HTML code...
122+
sub sterilize($) {
123+
my ($value) = @_;
124+
$value =~ s/<\/pre>$//;
125+
return $value;
126+
}
127+
128+
# Emits the given message on stderr when the debug flag is set.
129+
sub debug($) {
130+
$debug || return;
131+
error(@_);
132+
}
133+
134+
# Emits the given message on stderr.
135+
sub error($) {
136+
my ($message) = @_;
137+
print STDERR "$message\n";
138+
}

0 commit comments

Comments
 (0)