11#!/usr/bin/env python
2- import argparse
3- import multiprocessing as mp
42import os
5- import json
6- import uuid
7- import sys
3+ from argparse import ArgumentParser
84from concurrent import futures
95from collections import defaultdict
106from functools import partial
7+ from json import dumps
8+ from multiprocessing import cpu_count
9+ from sys import argv
10+ from uuid import uuid4
1111
12- from bs4 import BeautifulSoup
13- from markdown import markdown
1412import requests
1513import urllib3
14+ from bs4 import BeautifulSoup
15+ from markdown import markdown
1616
1717
1818# Ignore security hazard since certs SHOULD be trusted (https)
1919urllib3 .disable_warnings (urllib3 .exceptions .InsecureRequestWarning )
2020
2121# Avoid rate limiting (tcp)
22- URL_BOT_ID = f'Bot { str (uuid . uuid4 ())} '
22+ URL_BOT_ID = f'Bot { str (uuid4 ())} '
2323
2424
2525def extract_urls_from_html (content ):
@@ -104,7 +104,7 @@ def bad_url(url_status):
104104
105105
106106def parse_args (argv ):
107- parser = argparse . ArgumentParser (
107+ parser = ArgumentParser (
108108 description = 'Check correctness of url links.' ,
109109 add_help = True )
110110 parser .add_argument (
@@ -119,14 +119,14 @@ def parse_args(argv):
119119 help = 'Number of url retries' )
120120 parser .add_argument (
121121 '--num-threads' ,
122- default = mp . cpu_count ()* 4 ,
122+ default = cpu_count ()* 4 ,
123123 dest = 'threads' ,
124124 help = 'Number of threads to run with' )
125125 return parser .parse_args (argv )
126126
127127
128128def main ():
129- args = parse_args (sys . argv [1 :])
129+ args = parse_args (argv [1 :])
130130 print ('Extract urls...' )
131131 all_urls = extract_urls (os .getcwd () + os .path .sep + 'content' )
132132 print ('\n Check urls...' )
@@ -147,8 +147,8 @@ def main():
147147 bad_url : all_urls [bad_url ]
148148 for bad_url in bad_url_status
149149 }
150- status_content = json . dumps (bad_url_status , indent = 4 )
151- location_content = json . dumps (bad_url_location , indent = 4 )
150+ status_content = dumps (bad_url_status , indent = 4 )
151+ location_content = dumps (bad_url_location , indent = 4 )
152152 print (f'\n Bad url status: { status_content } ' )
153153 print (f'\n Bad url locations: { location_content } ' )
154154
0 commit comments