-
-
Notifications
You must be signed in to change notification settings - Fork 4.5k
IPEP 15: autosave the notebook #3158
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
076f226
b68ea6d
c8b6955
80b5d18
bc87e4c
92341f3
609c814
5ce631b
757bf96
7a392f6
511bb2f
a188c64
65bc9e8
eb50507
27b173a
47e762b
156594a
4c11197
ba358e3
7cababc
a38e0d4
d4fedf6
c801089
0c16365
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12,4 +12,5 @@ build | |
| *.egg-info | ||
| *~ | ||
| *.bak | ||
| .ipynb_checkpoints | ||
| .tox | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,6 +20,7 @@ | |
| import io | ||
| import os | ||
| import glob | ||
| import shutil | ||
|
|
||
| from tornado import web | ||
|
|
||
|
|
@@ -43,11 +44,36 @@ class FileNotebookManager(NotebookManager): | |
| """ | ||
| ) | ||
|
|
||
| checkpoint_dir = Unicode(config=True, | ||
| help="""The location in which to keep notebook checkpoints | ||
|
|
||
| By default, it is notebook-dir/.ipynb_checkpoints | ||
| """ | ||
| ) | ||
| def _checkpoint_dir_default(self): | ||
| return os.path.join(self.notebook_dir, '.ipynb_checkpoints') | ||
|
|
||
| def _checkpoint_dir_changed(self, name, old, new): | ||
| """do a bit of validation of the checkpoint dir""" | ||
| if not os.path.isabs(new): | ||
| # If we receive a non-absolute path, make it absolute. | ||
| abs_new = os.path.abspath(new) | ||
| self.checkpoint_dir = abs_new | ||
| return | ||
| if os.path.exists(new) and not os.path.isdir(new): | ||
| raise TraitError("checkpoint dir %r is not a directory" % new) | ||
| if not os.path.exists(new): | ||
| self.log.info("Creating checkpoint dir %s", new) | ||
| try: | ||
| os.mkdir(new) | ||
| except: | ||
| raise TraitError("Couldn't create checkpoint dir %r" % new) | ||
|
|
||
| filename_ext = Unicode(u'.ipynb') | ||
|
|
||
| # Map notebook names to notebook_ids | ||
| rev_mapping = Dict() | ||
|
|
||
| def get_notebook_names(self): | ||
| """List all notebook names in the notebook dir.""" | ||
| names = glob.glob(os.path.join(self.notebook_dir, | ||
|
|
@@ -89,39 +115,49 @@ def notebook_exists(self, notebook_id): | |
| return False | ||
| path = self.get_path_by_name(self.mapping[notebook_id]) | ||
| return os.path.isfile(path) | ||
|
|
||
| def find_path(self, notebook_id): | ||
| """Return a full path to a notebook given its notebook_id.""" | ||
| def get_name(self, notebook_id): | ||
| """get a notebook name, raising 404 if not found""" | ||
| try: | ||
| name = self.mapping[notebook_id] | ||
| except KeyError: | ||
| raise web.HTTPError(404, u'Notebook does not exist: %s' % notebook_id) | ||
| return name | ||
|
|
||
| def get_path(self, notebook_id): | ||
| """Return a full path to a notebook given its notebook_id.""" | ||
| name = self.get_name(notebook_id) | ||
| return self.get_path_by_name(name) | ||
|
|
||
| def get_path_by_name(self, name): | ||
| """Return a full path to a notebook given its name.""" | ||
| filename = name + self.filename_ext | ||
| path = os.path.join(self.notebook_dir, filename) | ||
| return path | ||
| return path | ||
|
|
||
| def read_notebook_object(self, notebook_id): | ||
| """Get the NotebookNode representation of a notebook by notebook_id.""" | ||
| path = self.find_path(notebook_id) | ||
| if not os.path.isfile(path): | ||
| raise web.HTTPError(404, u'Notebook does not exist: %s' % notebook_id) | ||
| def read_notebook_object_from_path(self, path): | ||
| """read a notebook object from a path""" | ||
| info = os.stat(path) | ||
| last_modified = datetime.datetime.utcfromtimestamp(info.st_mtime) | ||
| with open(path,'r') as f: | ||
| s = f.read() | ||
| try: | ||
| # v1 and v2 and json in the .ipynb files. | ||
| nb = current.reads(s, u'json') | ||
| except: | ||
| raise web.HTTPError(500, u'Unreadable JSON notebook.') | ||
| except Exception as e: | ||
| raise web.HTTPError(500, u'Unreadable JSON notebook: %s' % e) | ||
| return last_modified, nb | ||
|
|
||
| def read_notebook_object(self, notebook_id): | ||
| """Get the Notebook representation of a notebook by notebook_id.""" | ||
| path = self.get_path(notebook_id) | ||
| if not os.path.isfile(path): | ||
| raise web.HTTPError(404, u'Notebook does not exist: %s' % notebook_id) | ||
| last_modified, nb = self.read_notebook_object_from_path(path) | ||
| # Always use the filename as the notebook name. | ||
| nb.metadata.name = os.path.splitext(os.path.basename(path))[0] | ||
| return last_modified, nb | ||
|
|
||
| def write_notebook_object(self, nb, notebook_id=None): | ||
| """Save an existing notebook object by notebook_id.""" | ||
| try: | ||
|
|
@@ -136,16 +172,20 @@ def write_notebook_object(self, nb, notebook_id=None): | |
| raise web.HTTPError(404, u'Notebook does not exist: %s' % notebook_id) | ||
|
|
||
| old_name = self.mapping[notebook_id] | ||
| old_checkpoints = self.list_checkpoints(notebook_id) | ||
|
|
||
| path = self.get_path_by_name(new_name) | ||
| try: | ||
| self.log.debug("Autosaving notebook %s", path) | ||
| with open(path,'w') as f: | ||
| current.write(nb, f, u'json') | ||
| except Exception as e: | ||
| raise web.HTTPError(400, u'Unexpected error while saving notebook: %s' % e) | ||
| raise web.HTTPError(400, u'Unexpected error while autosaving notebook: %s' % e) | ||
|
|
||
| # save .py script as well | ||
| if self.save_script: | ||
| pypath = os.path.splitext(path)[0] + '.py' | ||
| self.log.debug("Writing script %s", pypath) | ||
| try: | ||
| with io.open(pypath,'w', encoding='utf-8') as f: | ||
| current.write(nb, f, u'py') | ||
|
|
@@ -154,25 +194,52 @@ def write_notebook_object(self, nb, notebook_id=None): | |
|
|
||
| # remove old files if the name changed | ||
| if old_name != new_name: | ||
| # update mapping | ||
| self.mapping[notebook_id] = new_name | ||
| self.rev_mapping[new_name] = notebook_id | ||
| del self.rev_mapping[old_name] | ||
|
|
||
| # remove renamed original, if it exists | ||
| old_path = self.get_path_by_name(old_name) | ||
| if os.path.isfile(old_path): | ||
| self.log.debug("unlinking notebook %s", old_path) | ||
| os.unlink(old_path) | ||
|
|
||
| # cleanup old script, if it exists | ||
| if self.save_script: | ||
| old_pypath = os.path.splitext(old_path)[0] + '.py' | ||
| if os.path.isfile(old_pypath): | ||
| self.log.debug("unlinking script %s", old_pypath) | ||
| os.unlink(old_pypath) | ||
| self.mapping[notebook_id] = new_name | ||
| self.rev_mapping[new_name] = notebook_id | ||
| del self.rev_mapping[old_name] | ||
|
|
||
|
|
||
| # rename checkpoints to follow file | ||
| for cp in old_checkpoints: | ||
| checkpoint_id = cp['checkpoint_id'] | ||
| old_cp_path = self.get_checkpoint_path_by_name(old_name, checkpoint_id) | ||
| new_cp_path = self.get_checkpoint_path_by_name(new_name, checkpoint_id) | ||
| if os.path.isfile(old_cp_path): | ||
| self.log.debug("renaming checkpoint %s -> %s", old_cp_path, new_cp_path) | ||
| os.rename(old_cp_path, new_cp_path) | ||
|
|
||
| return notebook_id | ||
|
|
||
| def delete_notebook(self, notebook_id): | ||
| """Delete notebook by notebook_id.""" | ||
| path = self.find_path(notebook_id) | ||
| if not os.path.isfile(path): | ||
| nb_path = self.get_path(notebook_id) | ||
| if not os.path.isfile(nb_path): | ||
| raise web.HTTPError(404, u'Notebook does not exist: %s' % notebook_id) | ||
| os.unlink(path) | ||
|
|
||
| # clear checkpoints | ||
| for checkpoint in self.list_checkpoints(notebook_id): | ||
| checkpoint_id = checkpoint['checkpoint_id'] | ||
| path = self.get_checkpoint_path(notebook_id, checkpoint_id) | ||
| self.log.debug(path) | ||
| if os.path.isfile(path): | ||
| self.log.debug("unlinking checkpoint %s", path) | ||
| os.unlink(path) | ||
|
|
||
| self.log.debug("unlinking notebook %s", nb_path) | ||
| os.unlink(nb_path) | ||
| self.delete_notebook_id(notebook_id) | ||
|
|
||
| def increment_filename(self, basename): | ||
|
|
@@ -191,6 +258,89 @@ def increment_filename(self, basename): | |
| else: | ||
| i = i+1 | ||
| return name | ||
|
|
||
| # Checkpoint-related utilities | ||
|
|
||
| def get_checkpoint_path_by_name(self, name, checkpoint_id): | ||
| """Return a full path to a notebook checkpoint, given its name and checkpoint id.""" | ||
| filename = "{name}-{checkpoint_id}{ext}".format( | ||
| name=name, | ||
| checkpoint_id=checkpoint_id, | ||
| ext=self.filename_ext, | ||
| ) | ||
| path = os.path.join(self.checkpoint_dir, filename) | ||
| return path | ||
|
|
||
| def get_checkpoint_path(self, notebook_id, checkpoint_id): | ||
| """find the path to a checkpoint""" | ||
| name = self.get_name(notebook_id) | ||
| return self.get_checkpoint_path_by_name(name, checkpoint_id) | ||
|
|
||
| def get_checkpoint_info(self, notebook_id, checkpoint_id): | ||
| """construct the info dict for a given checkpoint""" | ||
| path = self.get_checkpoint_path(notebook_id, checkpoint_id) | ||
| stats = os.stat(path) | ||
| last_modified = datetime.datetime.utcfromtimestamp(stats.st_mtime) | ||
| info = dict( | ||
| checkpoint_id = checkpoint_id, | ||
| last_modified = last_modified, | ||
| ) | ||
|
|
||
| return info | ||
|
|
||
| # public checkpoint API | ||
|
|
||
| def create_checkpoint(self, notebook_id): | ||
| """Create a checkpoint from the current state of a notebook""" | ||
| nb_path = self.get_path(notebook_id) | ||
| # only the one checkpoint ID: | ||
| checkpoint_id = "checkpoint" | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I actually think we should fully implement multiple checkpoints on the server side with real checkpoint uuids. For now, I think it is reasonable if the frontend can only restore to the previous checkpoint, but there is no reason to not do the full thing on the server. I think this will allow us to better test out the ideas that will lead up to a git backed checkpoint. |
||
| cp_path = self.get_checkpoint_path(notebook_id, checkpoint_id) | ||
| self.log.debug("creating checkpoint for notebook %s", notebook_id) | ||
| if not os.path.exists(self.checkpoint_dir): | ||
| os.mkdir(self.checkpoint_dir) | ||
| shutil.copy2(nb_path, cp_path) | ||
|
|
||
| # return the checkpoint info | ||
| return self.get_checkpoint_info(notebook_id, checkpoint_id) | ||
|
|
||
| def list_checkpoints(self, notebook_id): | ||
| """list the checkpoints for a given notebook | ||
|
|
||
| This notebook manager currently only supports one checkpoint per notebook. | ||
| """ | ||
| checkpoint_id = "checkpoint" | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's implement multiple checkpoints here as wel. Part of the reason I want us to do this now is it will help us think through how we will track the ordering of the checkpoints. Raw uuids are not ordered. We need to figure out a good way of overlaying an ordering on the checkpoints that is preserved across server restarts (IOW we can't just use an in memory ordered list of uuids).
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, this ordering question is another area where we have to think about how this relates to git integration. |
||
| path = self.get_checkpoint_path(notebook_id, checkpoint_id) | ||
| if not os.path.exists(path): | ||
| return [] | ||
| else: | ||
| return [self.get_checkpoint_info(notebook_id, checkpoint_id)] | ||
|
|
||
|
|
||
| def restore_checkpoint(self, notebook_id, checkpoint_id): | ||
| """restore a notebook to a checkpointed state""" | ||
| self.log.info("restoring Notebook %s from checkpoint %s", notebook_id, checkpoint_id) | ||
| nb_path = self.get_path(notebook_id) | ||
| cp_path = self.get_checkpoint_path(notebook_id, checkpoint_id) | ||
| if not os.path.isfile(cp_path): | ||
| self.log.debug("checkpoint file does not exist: %s", cp_path) | ||
| raise web.HTTPError(404, | ||
| u'Notebook checkpoint does not exist: %s-%s' % (notebook_id, checkpoint_id) | ||
| ) | ||
| # ensure notebook is readable (never restore from an unreadable notebook) | ||
| last_modified, nb = self.read_notebook_object_from_path(cp_path) | ||
| shutil.copy2(cp_path, nb_path) | ||
| self.log.debug("copying %s -> %s", cp_path, nb_path) | ||
|
|
||
| def delete_checkpoint(self, notebook_id, checkpoint_id): | ||
| """delete a notebook's checkpoint""" | ||
| path = self.get_checkpoint_path(notebook_id, checkpoint_id) | ||
| if not os.path.isfile(path): | ||
| raise web.HTTPError(404, | ||
| u'Notebook checkpoint does not exist: %s-%s' % (notebook_id, checkpoint_id) | ||
| ) | ||
| self.log.debug("unlinking %s", path) | ||
| os.unlink(path) | ||
|
|
||
| def info_string(self): | ||
| return "Serving notebooks from local directory: %s" % self.notebook_dir | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is
TraitErrorthe right thing to raise for errors in this method? Do we do this elsewhere rather than just letting it raise the underlying exception?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is copied verbatim from notebook-dir, we raise TraitError when trait values are invalid.