apify-client-python/docs/res/format_docs.py at master · pythonif/apify-client-python

History

executable file

208 lines (181 loc) · 9.96 KB

Raw

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

#!/usr/bin/env python3

import re

from typing import Dict, List

# Our custom types that need special handling which can't be done reasonably generic

CUSTOM_TYPES = ['ActorJobStatus', 'ActorSourceType', 'WebhookEventType']

# The markdown generated by Sphinx has a few issues, we have to fix those manually

subs = [

(r'class (\w+)\([^)]+\)', r'class \1'), # remove class signature, it is duplicated in __init__ method

(r'\n\n###', r'\n***\n\n###'), # add horizontal line before each class and method

(r'\\__init__', r'__init__'), # remove bad escape in __init__

(r'(?<=\*\*Return type\*\*\n\n )([\w.]+)', r'`\1`'), # put return types in backticks if not already

(r'(?<=\*\*Return type\*\*\n\n )`((\w+)Client|ListPage)`', r'[`\1`](#\1)'), # add fragment links to return types if they are of our type

(r' None([, ])', r' `None`\1'), # replace None with `None`, if it's in a normal sentence (surrounded by whitespace or comma)

(r'\*\*Parameters\*\*\n\n \*\*', '**Parameters**\n\n * **'), # workaround for sphinx bug with single parameter not being rendered as list

(r'\(`(.*), optional`\)', r'(`\1`, *optional*)'), # workaround for bug formatting parameter types

# (r'\*\*\[\*\*(\w+)\*\*\]', r'[\1]'), # workaround for bug formatting parameter types

(r' \\\*', r' *'), # workaround for bug with nested lists in argument descriptions

(r'`(\w+)`\[`(\w+)`\]', r'`\1[\2]`'), # workaround for complex parameter types with backticks

r'`Union`\[`str`, `int`, `float`, `bool`, `None`, `Dict`\[`str`, `Any`\], `List\[Any\]`\]', \

'`Union[str, int, float, bool, None, Dict[str, Any], List[Any]]`' \

), # workaround for the JSONSerializable type being rendered badly (I know this would be better generic, but it would be really hard to write)

(r'(###[^\n]+)\n', r'\1\n\n'), # add empty line after every heading

(r'\n +\n', '\n\n'), # remove whitespace from empty lines

(r'\n\n+', '\n\n'), # reduce 3+ newlines to 2

(r' ', r' '), # indent with 2 spaces instead of 4

]

# Add fragment link to parameters of our custom types (I know this would be better generic, but it would be really hard to write)

for custom_type in CUSTOM_TYPES:

subs.append((fr'`{custom_type}`', f'[`{custom_type}`](#{custom_type.lower()})'))

subs.append((fr'`list of {custom_type}`', f'[`list of {custom_type}`](#{custom_type.lower()})'))

subs.append((fr'`({custom_type})\.([A-Z_]+)`', lambda match: f'[{match.group(0)}](#{match.group(1).lower()}-{match.group(2).lower()})'))

# Load the api_reference.md generated by Sphinx

with open('api_reference.md', 'r+') as api_reference:

api_reference_content = api_reference.read()

# Do the above defined replacements

for (pattern, repl) in subs:

api_reference_content = re.sub(pattern, repl, api_reference_content, flags=re.M)

# Generate the table of contents for each class

toc_methods: Dict[str, List] = {}

toc_enum_items: Dict[str, List] = {}

current_class = ''

for line in api_reference_content.splitlines():

match = re.match(r'### class (\w+)', line)

if match is not None:

current_class = match.group(1)

match = re.match(r'#### (\w+)\([^: ]', line)

if match is not None:

method = match.group(1)

method = re.sub('_', '\\_', method)

if current_class not in toc_methods:

toc_methods[current_class] = []

toc_methods[current_class].append(method)

match = re.match(r'#### (\w+)\( =', line)

if match is not None:

enum_item = match.group(1)

enum_item = re.sub('_', '\\_', enum_item)

if current_class not in toc_enum_items:

toc_enum_items[current_class] = []

toc_enum_items[current_class].append(enum_item)

# Generate class variable members for each class

class_variables: Dict[str, List] = {}

current_class = ''

in_class_variable = False

in_class_variable_description = False

in_class_variable_type = False

for line in api_reference_content.splitlines():

match = re.match(r'### class (\w+)', line)

if match is not None:

current_class = match.group(1)

match = re.match(r'#### (\w+)\(:', line)

if match is not None:

in_class_variable = True

in_class_variable_description = True

variable_name = match.group(1)

variable_name = re.sub('_', '\\_', variable_name)

variable_dict = {

'name': variable_name,

'type': '',

'description': '',

}

if current_class not in class_variables:

class_variables[current_class] = []

class_variables[current_class].append(variable_dict)

continue

if in_class_variable:

match = re.match(r'^\*\*\*', line)

if match is not None:

in_class_variable = False

in_class_variable_description = False

in_class_variable_type = False

if in_class_variable_description:

match = re.match(r'^\* \*\*Type', line)

if match is not None:

in_class_variable_description = False

in_class_variable_type = True

else:

if line != '':

class_variables[current_class][-1]['description'] += line

if in_class_variable_type:

match = re.match(r'^ ([A-z]+)', line)

if match is not None:

class_variables[current_class][-1]['type'] = match.group(1)

# Parse the whole file again and add fragment links

lines = api_reference_content.splitlines()

transformed_lines = []

current_class = ''

in_class_description = False

in_class_variable_description = False

for line in lines:

# Add table of contents to the beginning of each class (after the class description)

if in_class_description:

match = re.match(r'^\*\*\*', line)

if match is not None:

if current_class in class_variables:

transformed_lines.append('#### Instance attributes')

transformed_lines.append('')

transformed_lines.append('Name | Type | Description')

transformed_lines.append('---- | ---- | -----------')

for variable in class_variables[current_class]:

transformed_lines.append(f'`{variable["name"]}` | `{variable["type"]}` | {variable["description"]}')

transformed_lines.append('')

if current_class in toc_methods:

for method in toc_methods[current_class]:

transformed_lines.append(f'* [{method}()](#{current_class.lower()}-{method.lower()})')

transformed_lines.append('')

if current_class in toc_enum_items:

for enum_item in toc_enum_items[current_class]:

transformed_lines.append(f'* [{enum_item}](#{current_class.lower()}-{enum_item.lower()})')

transformed_lines.append('')

in_class_description = False

transformed_lines.append(line)

# Remove class variable descriptions (we add them in another way)

elif in_class_variable_description:

match = re.match(r'^\*\*\*', line)

if match is not None:

in_class_variable_description = False

else:

# Mark start of parsing class variable description:

match = re.match(r'#### (\w+)\(:', line)

if match is not None:

in_class_variable_description = True

continue

# Add special fragment link marker to each class header (will get used in Apify docs to display "Copy link" link)

match = re.match(r'### class (\w+)', line)

if match is not None:

current_class = match.group(1)

in_class_description = True

line = re.sub(r'### class', f'### [](#{current_class.lower()})', line)

# Add special fragment link marker to each function header (will get used in Apify docs to display "Copy link" link)

match = re.match(r'#### (\w+)\([^: ]', line)

if match is not None:

method = match.group(1)

line = re.sub(r'(#### .*)\\\*(.*)', r'\1*\2', line)

line = re.sub(r'#### (\w+)(\([^)]*\))', f'#### [](#{current_class.lower()}-{method.lower()}) `{current_class}.\\1\\2`', line)

# Add special fragment link marker to each enum item header (will get used in Apify docs to display "Copy link" link)

match = re.match(r'#### (\w+)\( =', line)

if match is not None:

method = match.group(1)

line = re.sub(r'(#### .*)\\\*(.*)', r'\1*\2', line)

line = re.sub(r'#### (\w+)(\([^)]*\))', f'#### [](#{current_class.lower()}-{method.lower()}) `{current_class}.\\1`', line)

transformed_lines.append(line)

lines = transformed_lines

transformed_lines = []

for line in lines:

# Lowercase all the links

match = re.search(r'(\[[^\]]*\])(\(#[^)]+\))', line)

if match is not None:

line = re.sub(r'(\[[^\]]*\])(\(#[^)]+\))', f'{match.group(1)}{match.group(2).lower()}', line)

transformed_lines.append(line)

# Add a short header

api_reference_content = \

'\n## API Reference\n\n' + \

'All public classes, methods and their parameters can be inspected in this API reference.\n\n' + \

'\n'.join(transformed_lines) + '\n'

# Remove successive empty lines again

api_reference_content = '\n' + re.sub(r'\n\n+', '\n\n', api_reference_content, flags=re.M).strip('\n') + '\n'

# Rewrite the api_reference.md file with the transformed content

api_reference.seek(0)

api_reference.write(api_reference_content)

api_reference.truncate()

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

format_docs.py

Latest commit

History

format_docs.py

File metadata and controls