Skip to content

Commit cd0c6f5

Browse files
committed
wip
1 parent 53ab302 commit cd0c6f5

File tree

10 files changed

+578
-11
lines changed

10 files changed

+578
-11
lines changed

notebook/bundler/__init__.py

Whitespace-only changes.

notebook/bundler/handlers.py

Lines changed: 242 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,242 @@
1+
# Copyright (c) Jupyter Development Team.
2+
# Distributed under the terms of the Modified BSD License.
3+
import os
4+
import shutil
5+
import errno
6+
import nbformat
7+
import fnmatch
8+
import glob
9+
from notebook.utils import url_path_join, url2path
10+
from notebook.base.handlers import IPythonHandler, path_regex
11+
from notebook.services.config import ConfigManager
12+
from ipython_genutils.importstring import import_item
13+
from tornado import web, gen
14+
15+
class BundlerTools(object):
16+
'''Set of common tools to aid bundler implementations.'''
17+
def get_file_references(self, abs_nb_path, version):
18+
'''
19+
Gets a list of files referenced either in Markdown fenced code blocks
20+
or in HTML comments from the notebook. Expands patterns expressed in
21+
gitignore syntax (https://git-scm.com/docs/gitignore). Returns the
22+
fully expanded list of filenames relative to the notebook dirname.
23+
24+
NOTE: Temporarily changes the current working directory when called.
25+
26+
:param abs_nb_path: Absolute path of the notebook on disk
27+
:param version: Version of the notebook document format to use
28+
:returns: List of filename strings relative to the notebook path
29+
'''
30+
ref_patterns = self.get_reference_patterns(abs_nb_path, version)
31+
expanded = self.expand_references(os.path.dirname(abs_nb_path), ref_patterns)
32+
return expanded
33+
34+
def get_reference_patterns(self, abs_nb_path, version):
35+
'''
36+
Gets a list of reference patterns either in Markdown fenced code blocks
37+
or in HTML comments from the notebook.
38+
39+
:param abs_nb_path: Absolute path of the notebook on disk
40+
:param version: Version of the notebook document format to use
41+
:returns: List of pattern strings from the notebook
42+
'''
43+
notebook = nbformat.read(abs_nb_path, version)
44+
referenced_list = []
45+
for cell in notebook.cells:
46+
references = self.get_cell_reference_patterns(cell)
47+
if references:
48+
referenced_list = referenced_list + references
49+
return referenced_list
50+
51+
def get_cell_reference_patterns(self, cell):
52+
'''
53+
Retrieves the list of references from a single notebook cell. Looks for
54+
fenced code blocks or HTML comments in Markdown cells, e.g.,
55+
56+
```
57+
some.csv
58+
foo/
59+
!foo/bar
60+
```
61+
62+
or
63+
64+
<!--associate:
65+
some.csv
66+
foo/
67+
!foo/bar
68+
-->
69+
70+
:param cell: Notebook cell object
71+
:returns: List of strings
72+
'''
73+
referenced = []
74+
# invisible after execution: unrendered HTML comment
75+
if cell.get('cell_type').startswith('markdown') and cell.get('source').startswith('<!--associate:'):
76+
lines = cell.get('source')[len('<!--associate:'):].splitlines()
77+
for line in lines:
78+
if line.startswith('-->'):
79+
break
80+
# Trying to go out of the current directory leads to
81+
# trouble when deploying
82+
if line.find('../') < 0 and not line.startswith('#'):
83+
referenced.append(line)
84+
# visible after execution: rendered as a code element within a pre element
85+
elif cell.get('cell_type').startswith('markdown') and cell.get('source').find('```') >= 0:
86+
source = cell.get('source')
87+
offset = source.find('```')
88+
lines = source[offset + len('```'):].splitlines()
89+
for line in lines:
90+
if line.startswith('```'):
91+
break
92+
# Trying to go out of the current directory leads to
93+
# trouble when deploying
94+
if line.find('../') < 0 and not line.startswith('#'):
95+
referenced.append(line)
96+
97+
# Clean out blank references
98+
return [ref for ref in referenced if ref.strip()]
99+
100+
def expand_references(self, root_path, references):
101+
'''
102+
Expands a set of reference patterns by evaluating them against the
103+
given root directory. Expansions are performed against patterns
104+
expressed in the same manner as in gitignore
105+
(https://git-scm.com/docs/gitignore).
106+
107+
:param root_path: Assumed root directory for the patterns
108+
:param references: List of reference patterns
109+
:returns: List of filename strings relative to the root path
110+
'''
111+
globbed = []
112+
negations = []
113+
must_walk = []
114+
for pattern in references:
115+
if pattern and pattern.find('/') < 0:
116+
# simple shell glob
117+
cwd = os.getcwd()
118+
os.chdir(root_path)
119+
if pattern.startswith('!'):
120+
negations = negations + glob.glob(pattern[1:])
121+
else:
122+
globbed = globbed + glob.glob(pattern)
123+
os.chdir(cwd)
124+
elif pattern:
125+
must_walk.append(pattern)
126+
127+
for pattern in must_walk:
128+
pattern_is_negation = pattern.startswith('!')
129+
if pattern_is_negation:
130+
testpattern = pattern[1:]
131+
else:
132+
testpattern = pattern
133+
for root, _, filenames in os.walk(root_path):
134+
for filename in filenames:
135+
joined = os.path.join(root[len(root_path) + 1:], filename)
136+
if testpattern.endswith('/'):
137+
if joined.startswith(testpattern):
138+
if pattern_is_negation:
139+
negations.append(joined)
140+
else:
141+
globbed.append(joined)
142+
elif testpattern.find('**') >= 0:
143+
# path wildcard
144+
ends = testpattern.split('**')
145+
if len(ends) == 2:
146+
if joined.startswith(ends[0]) and joined.endswith(ends[1]):
147+
if pattern_is_negation:
148+
negations.append(joined)
149+
else:
150+
globbed.append(joined)
151+
else:
152+
# segments should be respected
153+
if fnmatch.fnmatch(joined, testpattern):
154+
if pattern_is_negation:
155+
negations.append(joined)
156+
else:
157+
globbed.append(joined)
158+
159+
for negated in negations:
160+
try:
161+
globbed.remove(negated)
162+
except ValueError as err:
163+
pass
164+
return set(globbed)
165+
166+
def copy_filelist(self, src, dst, src_relative_filenames):
167+
'''
168+
Copies the given list of files, relative to src, into dst, creating
169+
directories along the way as needed and ignore existence errors.
170+
Skips any files that do not exist. Does not create empty directories
171+
from src in dst.
172+
173+
:param src: Root of the source directory
174+
:param dst: Root of the destination directory
175+
:param src_relative_filenames: List of filename relative to src
176+
'''
177+
for filename in src_relative_filenames:
178+
# Only consider the file if it exists in src
179+
if os.path.isfile(os.path.join(src, filename)):
180+
parent_relative = os.path.dirname(filename)
181+
if parent_relative:
182+
# Make sure the parent directory exists
183+
parent_dst = os.path.join(dst, parent_relative)
184+
try:
185+
os.makedirs(parent_dst)
186+
except OSError as exc:
187+
if exc.errno == errno.EEXIST:
188+
pass
189+
else:
190+
raise exc
191+
shutil.copy2(os.path.join(src, filename), os.path.join(dst, filename))
192+
193+
class BundlerHandler(IPythonHandler):
194+
def initialize(self):
195+
# Create common tools for bundler plugin to use
196+
self.tools = BundlerTools()
197+
198+
def get_bundler(self, bundler_id):
199+
'''
200+
:param bundler_id: Unique ID within the notebook/jupyter_cms_bundlers
201+
config section.
202+
:returns: Dict of bundler metadata with keys label, group, module_name
203+
:raises KeyError: If the bundler is not registered
204+
'''
205+
cm = ConfigManager()
206+
return cm.get('notebook').get('bundlers', {})[bundler_id]
207+
208+
@web.authenticated
209+
@gen.coroutine
210+
def get(self, path):
211+
"""Bundle the given notebook.
212+
"""
213+
bundler_id = self.get_query_argument('bundler')
214+
model = self.contents_manager.get(path=url2path(path))
215+
216+
try:
217+
bundler = self.get_bundler(bundler_id)
218+
except KeyError:
219+
raise web.HTTPError(404, 'Bundler %s not found' % bundler_id)
220+
221+
module_name = bundler['module_name']
222+
try:
223+
# no-op in python3, decode error in python2
224+
module_name = str(module_name)
225+
except UnicodeEncodeError:
226+
# Encode unicode as utf-8 in python2 else import_item fails
227+
module_name = module_name.encode('utf-8')
228+
229+
try:
230+
bundler_mod = import_item(module_name)
231+
except ImportError:
232+
raise web.HTTPError(500, 'Could not import bundler %s ' % bundler_id)
233+
234+
# Let the bundler respond in any way it sees fit and assume it will
235+
# finish the request
236+
yield gen.maybe_future(bundler_mod.bundle(self, model))
237+
238+
_bundler_id_regex = r'(?P<bundler_id>[A-Za-z0-9_]+)'
239+
240+
default_handlers = [
241+
(r"/bundle/(.*)", BundlerHandler)
242+
]

notebook/bundler/tests/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)