forked from apify/apify-client-python
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathformat_docs.py
More file actions
executable file
·208 lines (181 loc) · 9.96 KB
/
format_docs.py
File metadata and controls
executable file
·208 lines (181 loc) · 9.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
#!/usr/bin/env python3
import re
from typing import Dict, List
# Our custom types that need special handling which can't be done reasonably generic
CUSTOM_TYPES = ['ActorJobStatus', 'ActorSourceType', 'WebhookEventType']
# The markdown generated by Sphinx has a few issues, we have to fix those manually
subs = [
(r'class (\w+)\([^)]+\)', r'class \1'), # remove class signature, it is duplicated in __init__ method
(r'\n\n###', r'\n***\n\n###'), # add horizontal line before each class and method
(r'\\__init__', r'__init__'), # remove bad escape in __init__
(r'(?<=\*\*Return type\*\*\n\n )([\w.]+)', r'`\1`'), # put return types in backticks if not already
(r'(?<=\*\*Return type\*\*\n\n )`((\w+)Client|ListPage)`', r'[`\1`](#\1)'), # add fragment links to return types if they are of our type
(r' None([, ])', r' `None`\1'), # replace None with `None`, if it's in a normal sentence (surrounded by whitespace or comma)
(r'\*\*Parameters\*\*\n\n \*\*', '**Parameters**\n\n * **'), # workaround for sphinx bug with single parameter not being rendered as list
(r'\(`(.*), optional`\)', r'(`\1`, *optional*)'), # workaround for bug formatting parameter types
# (r'\*\*\[\*\*(\w+)\*\*\]', r'[\1]'), # workaround for bug formatting parameter types
(r' \\\*', r' *'), # workaround for bug with nested lists in argument descriptions
(r'`(\w+)`\[`(\w+)`\]', r'`\1[\2]`'), # workaround for complex parameter types with backticks
(\
r'`Union`\[`str`, `int`, `float`, `bool`, `None`, `Dict`\[`str`, `Any`\], `List\[Any\]`\]', \
'`Union[str, int, float, bool, None, Dict[str, Any], List[Any]]`' \
), # workaround for the JSONSerializable type being rendered badly (I know this would be better generic, but it would be really hard to write)
(r'(###[^\n]+)\n', r'\1\n\n'), # add empty line after every heading
(r'\n +\n', '\n\n'), # remove whitespace from empty lines
(r'\n\n+', '\n\n'), # reduce 3+ newlines to 2
(r' ', r' '), # indent with 2 spaces instead of 4
]
# Add fragment link to parameters of our custom types (I know this would be better generic, but it would be really hard to write)
for custom_type in CUSTOM_TYPES:
subs.append((fr'`{custom_type}`', f'[`{custom_type}`](#{custom_type.lower()})'))
subs.append((fr'`list of {custom_type}`', f'[`list of {custom_type}`](#{custom_type.lower()})'))
subs.append((fr'`({custom_type})\.([A-Z_]+)`', lambda match: f'[{match.group(0)}](#{match.group(1).lower()}-{match.group(2).lower()})'))
# Load the api_reference.md generated by Sphinx
with open('api_reference.md', 'r+') as api_reference:
api_reference_content = api_reference.read()
# Do the above defined replacements
for (pattern, repl) in subs:
api_reference_content = re.sub(pattern, repl, api_reference_content, flags=re.M)
# Generate the table of contents for each class
toc_methods: Dict[str, List] = {}
toc_enum_items: Dict[str, List] = {}
current_class = ''
for line in api_reference_content.splitlines():
match = re.match(r'### class (\w+)', line)
if match is not None:
current_class = match.group(1)
match = re.match(r'#### (\w+)\([^: ]', line)
if match is not None:
method = match.group(1)
method = re.sub('_', '\\_', method)
if current_class not in toc_methods:
toc_methods[current_class] = []
toc_methods[current_class].append(method)
match = re.match(r'#### (\w+)\( =', line)
if match is not None:
enum_item = match.group(1)
enum_item = re.sub('_', '\\_', enum_item)
if current_class not in toc_enum_items:
toc_enum_items[current_class] = []
toc_enum_items[current_class].append(enum_item)
# Generate class variable members for each class
class_variables: Dict[str, List] = {}
current_class = ''
in_class_variable = False
in_class_variable_description = False
in_class_variable_type = False
for line in api_reference_content.splitlines():
match = re.match(r'### class (\w+)', line)
if match is not None:
current_class = match.group(1)
match = re.match(r'#### (\w+)\(:', line)
if match is not None:
in_class_variable = True
in_class_variable_description = True
variable_name = match.group(1)
variable_name = re.sub('_', '\\_', variable_name)
variable_dict = {
'name': variable_name,
'type': '',
'description': '',
}
if current_class not in class_variables:
class_variables[current_class] = []
class_variables[current_class].append(variable_dict)
continue
if in_class_variable:
match = re.match(r'^\*\*\*', line)
if match is not None:
in_class_variable = False
in_class_variable_description = False
in_class_variable_type = False
if in_class_variable_description:
match = re.match(r'^\* \*\*Type', line)
if match is not None:
in_class_variable_description = False
in_class_variable_type = True
else:
if line != '':
class_variables[current_class][-1]['description'] += line
if in_class_variable_type:
match = re.match(r'^ ([A-z]+)', line)
if match is not None:
class_variables[current_class][-1]['type'] = match.group(1)
# Parse the whole file again and add fragment links
lines = api_reference_content.splitlines()
transformed_lines = []
current_class = ''
in_class_description = False
in_class_variable_description = False
for line in lines:
# Add table of contents to the beginning of each class (after the class description)
if in_class_description:
match = re.match(r'^\*\*\*', line)
if match is not None:
if current_class in class_variables:
transformed_lines.append('#### Instance attributes')
transformed_lines.append('')
transformed_lines.append('Name | Type | Description')
transformed_lines.append('---- | ---- | -----------')
for variable in class_variables[current_class]:
transformed_lines.append(f'`{variable["name"]}` | `{variable["type"]}` | {variable["description"]}')
transformed_lines.append('')
if current_class in toc_methods:
for method in toc_methods[current_class]:
transformed_lines.append(f'* [{method}()](#{current_class.lower()}-{method.lower()})')
transformed_lines.append('')
if current_class in toc_enum_items:
for enum_item in toc_enum_items[current_class]:
transformed_lines.append(f'* [{enum_item}](#{current_class.lower()}-{enum_item.lower()})')
transformed_lines.append('')
in_class_description = False
transformed_lines.append(line)
# Remove class variable descriptions (we add them in another way)
elif in_class_variable_description:
match = re.match(r'^\*\*\*', line)
if match is not None:
in_class_variable_description = False
else:
# Mark start of parsing class variable description:
match = re.match(r'#### (\w+)\(:', line)
if match is not None:
in_class_variable_description = True
continue
# Add special fragment link marker to each class header (will get used in Apify docs to display "Copy link" link)
match = re.match(r'### class (\w+)', line)
if match is not None:
current_class = match.group(1)
in_class_description = True
line = re.sub(r'### class', f'### [](#{current_class.lower()})', line)
# Add special fragment link marker to each function header (will get used in Apify docs to display "Copy link" link)
match = re.match(r'#### (\w+)\([^: ]', line)
if match is not None:
method = match.group(1)
line = re.sub(r'(#### .*)\\\*(.*)', r'\1*\2', line)
line = re.sub(r'#### (\w+)(\([^)]*\))', f'#### [](#{current_class.lower()}-{method.lower()}) `{current_class}.\\1\\2`', line)
# Add special fragment link marker to each enum item header (will get used in Apify docs to display "Copy link" link)
match = re.match(r'#### (\w+)\( =', line)
if match is not None:
method = match.group(1)
line = re.sub(r'(#### .*)\\\*(.*)', r'\1*\2', line)
line = re.sub(r'#### (\w+)(\([^)]*\))', f'#### [](#{current_class.lower()}-{method.lower()}) `{current_class}.\\1`', line)
transformed_lines.append(line)
lines = transformed_lines
transformed_lines = []
for line in lines:
# Lowercase all the links
match = re.search(r'(\[[^\]]*\])(\(#[^)]+\))', line)
if match is not None:
line = re.sub(r'(\[[^\]]*\])(\(#[^)]+\))', f'{match.group(1)}{match.group(2).lower()}', line)
transformed_lines.append(line)
# Add a short header
api_reference_content = \
'\n## API Reference\n\n' + \
'All public classes, methods and their parameters can be inspected in this API reference.\n\n' + \
'\n'.join(transformed_lines) + '\n'
# Remove successive empty lines again
api_reference_content = '\n' + re.sub(r'\n\n+', '\n\n', api_reference_content, flags=re.M).strip('\n') + '\n'
# Rewrite the api_reference.md file with the transformed content
api_reference.seek(0)
api_reference.write(api_reference_content)
api_reference.truncate()