-
Notifications
You must be signed in to change notification settings - Fork 90
Expand file tree
/
Copy pathutils.py
More file actions
122 lines (93 loc) · 3.24 KB
/
utils.py
File metadata and controls
122 lines (93 loc) · 3.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
"""Define utility functions for the pshtt library."""
# Standard Python Libraries
import contextlib
import csv
import datetime
import errno
import json
import logging
import os
import re
import sys
import traceback
# Display exception without re-throwing it.
def format_last_exception():
"""Pretty format the last raised exception."""
exc_type, exc_value, exc_traceback = sys.exc_info()
return os.linesep.join(
traceback.format_exception(exc_type, exc_value, exc_traceback)
)
# mkdir -p in python, from:
# http://stackoverflow.com/questions/600268/mkdir-p-functionality-in-python
def mkdir_p(path):
"""Make a directory and any missing directories in the path."""
try:
os.makedirs(path)
except OSError as exc: # Python >2.5
if exc.errno == errno.EEXIST:
pass
else:
raise
def json_for(data):
"""Pretty format the given object to JSON."""
return json.dumps(data, sort_keys=True, indent=2, default=format_datetime)
def write(content, destination, binary=False):
"""Write contents to a destination after making any missing directories."""
parent = os.path.dirname(destination)
if parent != "":
mkdir_p(parent)
with (
open(destination, "bw") if binary else open(destination, "w", encoding="utf-8")
) as f:
f.write(content)
def format_datetime(obj):
"""Provide a formatted datetime."""
if isinstance(obj, datetime.date):
return obj.isoformat()
if isinstance(obj, str):
return obj
return None
# Load domains from a CSV, skip a header row
def load_domains(domain_csv):
"""Load a list of domains from a CSV file."""
domains = []
with open(domain_csv, encoding="utf-8") as csvfile:
for row in csv.reader(csvfile):
# Skip empty rows.
if not row or not row[0].strip():
continue
row[0] = row[0].lower()
# Skip any header row.
if not domains and row[0].startswith("domain"):
continue
domains.append(row[0])
return domains
# Configure logging level, so logging.debug can hinge on --debug.
def configure_logging(debug_logging=False):
"""Configure the logging library."""
log_level = logging.DEBUG if debug_logging else logging.WARNING
logging.basicConfig(format="%(message)s", level=log_level)
def format_domains(domains):
"""Format a given list of domains."""
formatted_domains = []
for domain in domains:
# Replace a single instance of http://, https://, and www. if present.
formatted_domains.append(re.sub(r"^(https?://)?(www\.)?", "", domain))
return formatted_domains
def debug(*args, divider=False):
"""Output a debugging message."""
if divider:
logging.debug(os.linesep + "-------------------------" + os.linesep)
if args:
logging.debug(*args)
@contextlib.contextmanager
def smart_open(filename=None):
"""Context manager that can handle writing to a file or stdout.
Adapted from: https://stackoverflow.com/a/17603000
"""
handle = sys.stdout if filename is None else open(filename, "w", encoding="utf-8")
try:
yield handle
finally:
if handle is not sys.stdout:
handle.close()