This repository was archived by the owner on Feb 15, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 9
Expand file tree
/
Copy pathatjsparse.py
More file actions
231 lines (172 loc) · 5.86 KB
/
atjsparse.py
File metadata and controls
231 lines (172 loc) · 5.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
"""Parsing and executing JavaScript code"""
import abc
import json
import base64
import subprocess
from pathlib import Path
from typing import Optional, Union
from typing import Type, Any
import regex
import js2py
import requests
from .atlog import log
js: Optional['Interpreter'] = None
class Interpreter(abc.ABC):
"""Base JS interpreter class"""
def __init__(self) -> None:
"""Base JS interpreter class"""
def __getitem__(self, name: str) -> Any:
"""Support for `js[name]` syntax
instead of `js.get_var(name)`
Args:
name (str): Variable name
Returns:
Variable value
"""
return self.get_var(name)
@abc.abstractmethod
def exec_js(self, func: str) -> None:
"""Executes JavaScript code
Args:
func (str): JS function
"""
@abc.abstractmethod
def get_var(self, name: str) -> Any:
"""Returns JS variable value
from the interpreter
Args:
name (str): Variable name
Returns:
Variable value
"""
class NodeInterpreter(Interpreter):
"""Node.JS interpreter wrapper,
starts a simple web server in background"""
def __init__(
self,
node: Union[str, Path] = 'node',
host: str = 'localhost',
port: int = 8001) -> None:
"""Node.JS interpreter wrapper,
starts a simple web server in background
Args:
node (Union[str, Path], optional): Path to `node` executable
host (str, optional): Hostname for the web server
port (int, optional): Port for the web server
"""
super().__init__()
file_dir = Path(__file__).absolute().parent
server_js = file_dir / 'data' / 'server.js'
self.url = f'http://{host}:{port}'
self.timeout = 2
# pylint: disable=consider-using-with
self.proc = subprocess.Popen(
args=[
node, server_js,
f'{port}', host,
],
stdout=subprocess.PIPE,
)
# pylint: enable=consider-using-with
assert self.proc.stdout is not None
ok_msg = self.proc.stdout.readline()
log.debug('Received from server.js: %s', ok_msg)
def exec_js(self, func: str) -> None:
resp = requests.post(self.url, data=func, timeout=self.timeout)
resp.raise_for_status()
def get_var(self, name: str) -> Any:
resp = requests.post(self.url, data=name, timeout=self.timeout)
resp.raise_for_status()
log.debug('NodeJS response: %s', resp.content)
return json.loads(resp.content)
def __del__(self) -> None:
try:
self.proc.terminate()
self.proc.communicate()
except AttributeError:
log.warning(
'NodeJS process was not initialized, '
'but __del__ was called'
)
class Js2PyInterpreter(Interpreter):
"""Js2Py interpreter,
uses js2py library to execute code"""
# Thanks to http://regex.inginf.units.it
arrowexp = regex.compile(r'\w[^\}]*+')
def __init__(self) -> None:
"""Js2Py interpreter,
uses js2py library to execute code"""
super().__init__()
ctx = js2py.EvalJs({'atob': atob})
ctx.execute('''
window.Map = function(_i){ };
window.setTimeout = function(_f,_t){ };
window.setInterval = function(_f,_t){ };
window.encodeURIComponent = window.Map;
window.document = { };
document.doctype = { };
document.currentScript = { };
document.getElementById = window.Map;
document.prepend = window.Map;
document.append = window.Map;
document.appendChild = window.Map;
''')
self.ctx = ctx
def exec_js(self, func: str) -> None:
self.ctx.execute(self.to_ecma5(func))
def get_var(self, name: str) -> Any:
return self.ctx[name]
def to_ecma5(self, func: str) -> str:
"""Converts from ECMA6 format to ECMA5
(replacing arrow expressions)
and removes comment blocks
Args:
func (str): ECMA6 function
Returns:
ECMA5 function
"""
# Delete anything between /* and */
func = regex.sub(r'/\*.+?\*/', '', func)
# Search for arrow expressions
match = self.arrowexp.search(func)
if match is None:
return func
# Convert the function
conv = '(function(){' + match[0] + '})()'
# Convert 1 more expression.
# It doesn't change,
# so it was hardcoded
# as a regexp
return regex.sub(
r'(?:s|\(s\)) => s.split\([\'"]{2}\).reverse\(\).join\([\'"]{2}\)',
'function(s){return s.split(\'\').reverse().join(\'\')}',
conv
)
def atob(s: str) -> str:
"""Wrapper for the built-in library function.
Decodes a base64 string
Args:
s (str): Encoded data
Returns:
Decoded string
"""
return base64.standard_b64decode(str(s)).decode('utf-8')
def get_interpreter(
*args,
create: Type[Interpreter] = Js2PyInterpreter,
**kwargs) -> 'Interpreter':
"""Get or create a JS interpreter.
`*args` and `**kwargs` will be passed
directly to JS interpreter `__init__`
(when creating it)
Args:
create (Type[Interpreter], optional): Preferred interpreter
Returns:
JS interpreter instance
"""
global js # pylint: disable=global-statement
# create if none
if js is None:
js = create(*args, **kwargs)
# and return
return js