forked from EFForg/https-everywhere
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrewriter.js
More file actions
133 lines (118 loc) · 4.06 KB
/
rewriter.js
File metadata and controls
133 lines (118 loc) · 4.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
// HTTPS Rewriter.
//
// Uses the rulesets from HTTPS to recursively rewrite URL references in a
// given directory to HTTPS. Uses protocol-relative URLs wherever possible.
// Makes a copy of each file at filename.bak.
//
// Usage:
// cd https-everywhere
// ./makecrx.sh # to build default.rulesets
// cd rewriter
// (install node and npm)
// npm install
// node rewriter.js ~/path/to/my/webapp
// cd ~/path/to/my/webapp
// git diff
var path = require("path"),
fs = require("fs"),
DOMParser = require('xmldom').DOMParser,
readdirp = require('readdirp'),
es = require('event-stream'),
lrucache = require("../chromium/lru"),
rules = require("../chromium/rules"),
URI = require("URIjs");
var ruleSets = null;
/**
* For a given directory, recursively edit all files in it that match a filename
* pattern representing source code. Replace URLs in those files with rewritten
* ones if possible.
*/
function processDir(dir) {
var stream = readdirp({
root: dir,
fileFilter: ['*.html', '*.js', '*.rb', '*.erb', '*.mustache',
'*.scala', '*.c', '*.cc', '*.cpp', '*.cxx',
'*.java', '*.go', '*.php', '*.css', '*.pl', '*.py',
'*.rhtml', '*.sh', '*.yaml']
});
stream
.on('warn', function (err) {
console.error('non-fatal error', err);
// Optionally call stream.destroy() here in order to abort and cause 'close' to be emitted
})
.on('error', function (err) { console.error('fatal error', err); })
.pipe(es.mapSync(function (entry) {
var filename = path.join(dir, entry.path);
console.log("Rewriting " + filename);
try {
processFile(filename);
} catch(e) {
console.log(filename, e);
}
}));
}
/**
* Overwrite the default URI find_uri_expression with a modified one that
* mitigates a catastrophic backtracking issue common in CSS.
* The workaround was to insist that URLs start with http, since those are the
* only ones we want to rewrite anyhow. Note that this may still go exponential
* on certain inputs. http://www.regular-expressions.info/catastrophic.html
* Example string that blows up URI.withinString:
* image:url(http://img.youtube.com/vi/x7f
*/
URI.find_uri_expression = /\b((?:http:(?:\/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}\/)(?:[^\s()<>]+)+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))/ig;
function processFile(filename) {
var contents = fs.readFileSync(filename, 'utf8');
var rewrittenFile = URI.withinString(contents, function(url) {
var uri = new URI(url);
if (uri.protocol() != 'http') return url;
uri.normalize();
var rewritten = ruleSets.rewriteURI(uri.toString(), uri.host());
if (rewritten) {
// If the rewrite was just a protocol change, output protocol-relative
// URIs.
var rewrittenUri = new URI(rewritten).protocol('http');
if (rewrittenUri.toString() === uri.toString()) {
return rewrittenUri.protocol('').toString();
} else {
return rewritten;
}
} else {
return url;
}
});
fs.writeFileSync(filename + ".new", rewrittenFile);
fs.renameSync(filename, filename + ".bak");
fs.renameSync(filename + ".new", filename);
}
/**
* Load all rulesets for rewriting.
*/
function loadRuleSets() {
console.log("Loading rules...");
var fileContents = fs.readFileSync(path.join(__dirname, '../pkg/crx/rules/default.rulesets'), 'utf8');
var xml = new DOMParser().parseFromString(fileContents, 'text/xml');
ruleSets = new rules.RuleSets("fake user agent", lrucache.LRUCache, {});
ruleSets.addFromXml(xml);
}
function usage() {
console.log("Usage: \n nodejs rewriter.js /path/to/my/webapp \n");
process.exit(1);
}
if (process.argv.length <= 2) {
usage();
}
for (var i = 2; i < process.argv.length; i++) {
var rewritePath = process.argv[i];
if (rewritePath.indexOf('-') == 0) {
usage();
}
if (!fs.existsSync(rewritePath)) {
console.log("Path doesn't exist: " + rewritePath);
process.exit(1);
}
}
loadRuleSets();
for (var i = 2; i < process.argv.length; i++) {
processDir(process.argv[i]);
}