diff scripts/dump_dbm_sessions_db.py @ 6799:1188bb423f92

Script dumps dbm files without marshalled data like _ids Allow the _ids database to be dumped. The _ids db (when using the anydbm backend) records the number last used for each object type. E.G. if there were 8 statuses defined, it would have the entry: status: 8 Allow other dbm databases (like the node or journal dbs) to be dumped. Used argparse for command line options and added support for: dump all keys in sorted order dump specific key(s) pretty print output
author John Rouillard <rouilj@ieee.org>
date Sun, 24 Jul 2022 01:31:45 -0400
parents 61481d7bbb07
children 9ba04f37896f
line wrap: on
line diff
--- a/scripts/dump_dbm_sessions_db.py	Thu Jul 21 01:24:24 2022 -0400
+++ b/scripts/dump_dbm_sessions_db.py	Sun Jul 24 01:31:45 2022 -0400
@@ -9,27 +9,97 @@
    key: <timestamp> data
 
 where <timestamp> is the human readable __timestamp decoded from the
-data object.
+data object. Data object is dumped in json format. With pretty print
 
+   key:
+     <timestamp>
+       {
+          key: val,
+          ...
+       }
+
+if data is not a python object, print will be key: data or
+   key:
+     data
+
+if pretty printed.
 """
 
-import dbm, marshal, sys
+import argparse, dbm, json, marshal, os, sys
 from datetime import datetime
 
-try:
-  file = sys.argv[1]
-except IndexError:
+def indent(text, amount, ch=" "):
+  """ Found at: https://stackoverflow.com/a/8348914
+  """
+  padding = amount * ch
+  return ''.join(padding+line for line in text.splitlines(True))
+
+def print_marshal(k):
+  d = marshal.loads(db[k])
+  try:
+    t = datetime.fromtimestamp(d['__timestamp'])
+  except (KeyError, TypeError):
+    # TypeError raised if marshalled data is not a dict (list, tuple etc)
+    t = "no_timestamp"
+  if args.pretty:
+    print("%s:\n  %s\n%s"%(k, t, indent(json.dumps(
+      d, sort_keys=True, indent=4), 4)))
+  else:
+    print("%s: %s %s"%(k, t, d))
+
+def print_raw(k):
+  if args.pretty:
+    print("%s:\n  %s"%(k, db[k]))
+  else:
+    print("%s: %s"%(k, db[k]))
+
+parser = argparse.ArgumentParser(
+  description='Dump DBM files used by Roundup in storage order.')
+parser.add_argument('-k', '--key', action="append",
+    help='dump the entry for a key, can be used multiple times.')
+parser.add_argument('-K', '--keysonly', action='store_true',
+    help='print the database keys, sorted in byte order.')
+parser.add_argument('-p', '--pretty', action='store_true',
+    help='pretty print the output rather than printing on one line.')
+parser.add_argument('file', nargs='?',
+                    help='file to be dumped ("sessions" if not provided)')
+args = parser.parse_args()
+
+if args.file:
+  file = args.file
+else:
   file="sessions"
 
 try:
    db = dbm.open(file)
-except Exception:
-   print("Unable to open database: %s"%file)
-   exit(1)
+except Exception as e:
+  print("Unable to open database for %s: %s"%(file, e))
+  try:
+    os.stat(file)
+    print("  perhaps file is invalid or was created with a different version of Python?")
+  except OSError:
+    # the file does exist on disk.
+    pass
+  exit(1)
+
+if args.keysonly:
+  for k in sorted(db.keys()):
+    print("%s"%k)
+  exit(0)
+
+if args.key:
+  for k in args.key:
+    try:
+      print_marshal(k)
+    except (ValueError):
+      print_raw(k)
+  exit(0)
 
 k = db.firstkey()
 while k is not None:
-    d = marshal.loads(db[k])
-    t = datetime.fromtimestamp(d['__timestamp'])
-    print("%s: %s %s"%(k, t, d))
-    k = db.nextkey(k)
+  try:
+    print_marshal(k)
+  except (ValueError):  # ValueError marshal.loads failed
+    print_raw(k)
+
+  k = db.nextkey(k)

Roundup Issue Tracker: http://roundup-tracker.org/