Skip to content

Commit 001ca3a

Browse files
author
Arvid Paeglit
committed
added progress bar;
related optimization
1 parent fbee573 commit 001ca3a

File tree

9 files changed

+162
-137
lines changed

9 files changed

+162
-137
lines changed

deepcode/__init__.py

Lines changed: 32 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,49 @@
11
import asyncio
22
import os
3+
from tqdm import tqdm
34

4-
from .files import prepare_bundle_files
5-
from .bundle import get_filters, generate_bundles, create_git_bundle, fulfill_bundle, check_bundle
5+
from .files import collect_bundle_files, prepare_bundle_hashes
6+
from .bundle import get_filters, generate_bundle, create_git_bundle
67
from .analysis import get_analysis
78
from .utils import logger, profile_speed
89

910

1011
@profile_speed
1112
async def analize_folders(paths, linters_enabled=False):
1213
""" Entire flow of analyzing local folders. """
13-
file_filter = await get_filters()
14+
15+
with tqdm(total=5, desc='Analizing folders', unit='step', leave=False) as pbar:
16+
17+
pbar.set_description('Fetching supported extensions')
18+
file_filter = await get_filters()
19+
pbar.update(1)
20+
21+
pbar.set_description('Scanning for files')
22+
bundle_files = collect_bundle_files(paths, file_filter)
23+
bundle_files = tuple(
24+
tqdm(bundle_files, desc='Found files', unit='f', leave=False) # progress bar
25+
)
26+
pbar.update(1)
27+
28+
pbar.set_description('Computing file hashes')
29+
file_hashes = prepare_bundle_hashes(
30+
tqdm(bundle_files, desc='Calculated hashes', unit='files', leave=False) # progress bar
31+
)
32+
pbar.update(1)
33+
34+
pbar.set_description('Sending data')
35+
36+
bundle_id = await generate_bundle(file_hashes)
37+
pbar.update(1)
1438

15-
file_hashes = prepare_bundle_files(paths, file_filter)
16-
logger.info('Files to be analyzed --> {}'.format( len(file_hashes) ))
39+
pbar.set_description('Requesting audit results')
40+
res = await get_analysis(bundle_id, linters_enabled=linters_enabled)
41+
pbar.update(1)
42+
pbar.set_description('Finished analysis')
1743

18-
async for bundle_id, missing_files in generate_bundles(file_hashes):
19-
20-
while(missing_files):
21-
await fulfill_bundle(bundle_id, missing_files)
22-
23-
missing_files = await check_bundle(bundle_id)
24-
25-
return await get_analysis(bundle_id, linters_enabled=linters_enabled)
44+
return res
2645

2746

28-
@profile_speed
2947
async def analize_git(platform, owner, repo, oid=None, linters_enabled=False):
3048
""" Entire flow of analyzing remote git repositories. """
3149
bundle_id = await create_git_bundle(platform, owner, repo, oid)

deepcode/analysis.py

Lines changed: 35 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,53 @@
11
import asyncio
22
import aiohttp
3+
from tqdm import tqdm
34

45
from .connection import api_call
5-
from .utils import profile_speed, logger
6+
from .utils import logger
67

78
ANALYSIS_PROGRESS_INTERVAL = 2
89
ANALYSIS_RETRY_DELAY = 5
910
ANALYSIS_RETRIES = 3
1011

11-
@profile_speed
12+
1213
async def get_analysis(bundle_id, linters_enabled):
1314
""" Initiate analysis via API and wait for results. """
1415

1516
success_statuses = ['DONE'] if linters_enabled else ['DONE', 'DC_DONE']
1617
attempt = 0
1718

18-
while(True):
19-
path = ('analysis/{}?linters' if linters_enabled else 'analysis/{}').format(bundle_id)
20-
data = await api_call(path)
21-
22-
if data.get('status') in success_statuses and data.get('analysisResults'):
23-
return {
24-
'id': bundle_id,
25-
'url': data['analysisURL'],
26-
'results': data['analysisResults']
27-
}
28-
29-
elif data['status'] == 'FAILED':
30-
if attempt >= ANALYSIS_RETRIES:
31-
raise RuntimeError("Analysis failed for {} times. It seems, Deepcode has some issues. Please contact Deepcode. Response --> {}".format(ANALYSIS_RETRIES, data))
19+
with tqdm(total=100, unit='%', leave=False) as pbar:
20+
21+
current_percentage = 0
22+
while(True):
23+
path = ('analysis/{}?linters' if linters_enabled else 'analysis/{}').format(bundle_id)
24+
data = await api_call(path)
25+
26+
pbar.set_description(data.get('status', '').lower().capitalize())
3227

33-
logger.info('Analysis failed. Retrying in {} sec'.format(ANALYSIS_RETRY_DELAY))
34-
attempt += 1
35-
await asyncio.sleep(ANALYSIS_RETRY_DELAY)
28+
if data.get('status') in success_statuses and data.get('analysisResults'):
29+
return {
30+
'id': bundle_id,
31+
'url': data['analysisURL'],
32+
'results': data['analysisResults']
33+
}
34+
35+
elif data['status'] == 'FAILED':
36+
if attempt >= ANALYSIS_RETRIES:
37+
raise RuntimeError("Analysis failed for {} times. It seems, Deepcode has some issues. Please contact Deepcode. Response --> {}".format(ANALYSIS_RETRIES, data))
38+
39+
logger.info('Analysis failed. Retrying in {} sec'.format(ANALYSIS_RETRY_DELAY))
40+
attempt += 1
41+
await asyncio.sleep(ANALYSIS_RETRY_DELAY)
42+
43+
elif data.get('progress'):
44+
45+
progress = int(data['progress'] * 100)
46+
pbar.update(progress - current_percentage)
47+
current_percentage = progress
3648

37-
elif data.get('progress'):
38-
logger.info('{} {:2.0f}%'.format(data.get('status', '').lower(), data['progress'] * 100))
39-
await asyncio.sleep(ANALYSIS_PROGRESS_INTERVAL)
49+
await asyncio.sleep(ANALYSIS_PROGRESS_INTERVAL)
4050

41-
else:
42-
logger.info('initialising...')
43-
await asyncio.sleep(ANALYSIS_PROGRESS_INTERVAL)
51+
else:
52+
logger.info('initialising...')
53+
await asyncio.sleep(ANALYSIS_PROGRESS_INTERVAL)

deepcode/bundle.py

Lines changed: 49 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,13 @@
33
import asyncio
44
import time
55
from funcy import chunks
6+
from tqdm import tqdm
67

78
from .connection import api_call
89
from .files import get_file_content, compose_file_buckets, MAX_BUCKET_SIZE
9-
from .utils import profile_speed, logger
10+
from .utils import logger
11+
1012

11-
@profile_speed
1213
async def get_filters():
1314
""" Fetch supported file extensions """
1415
filters = await api_call('filters')
@@ -18,6 +19,7 @@ async def get_filters():
1819

1920

2021
async def _request_file_bundle(path, method, file_hashes):
22+
2123
res = await api_call(
2224
path='bundle', method='POST',
2325
data={'files': dict(file_hashes), 'removedFiles': []},
@@ -27,33 +29,44 @@ async def _request_file_bundle(path, method, file_hashes):
2729
logger.debug('bundle id: {} | missing_files: {}'.format(bundle_id, len(missing_files)))
2830
return bundle_id, missing_files
2931

30-
@profile_speed
31-
async def create_file_bundle(file_hashes):
32+
33+
def create_file_bundle(file_hashes):
3234
""" Create a new bundle via API """
33-
return await _request_file_bundle('bundle', 'POST', file_hashes)
35+
return _request_file_bundle('bundle', 'POST', file_hashes)
3436

35-
@profile_speed
36-
async def extend_file_bundle(bundle_id, file_hashes):
37+
38+
def extend_file_bundle(bundle_id, file_hashes):
3739
""" Extend bundle via API """
38-
return await _request_file_bundle('bundle/{}'.format(bundle_id), 'PUT', file_hashes)
40+
return _request_file_bundle('bundle/{}'.format(bundle_id), 'PUT', file_hashes)
3941

4042

41-
async def generate_bundles(file_hashes):
42-
""" Generate bundles via API. Split files into chunks. """
43+
async def generate_bundle(file_hashes):
44+
""" Generate bundles via API. Incapsulates all logic of our bundle protocol. """
4345

44-
bundle_id, missing_files = None, []
45-
46-
for index, chunked_files in enumerate(chunks(int(MAX_BUCKET_SIZE // 200), file_hashes)):
47-
logger.debug('#{} chunk with {} files'.format(index, len(chunked_files)))
48-
if not bundle_id:
49-
bundle_id, missing_files = await create_file_bundle(chunked_files)
50-
else:
51-
bundle_id, missing_files = await extend_file_bundle(bundle_id, chunked_files)
46+
async def _complete_bundle(bundle_task):
47+
bundle_id, missing_files = await bundle_task
48+
while(missing_files):
49+
await fulfill_bundle(bundle_id, missing_files) # Send all missing files
50+
missing_files = await check_bundle(bundle_id) # Check that all files are uploaded
51+
52+
return bundle_id
5253

53-
yield bundle_id, missing_files
54+
bundle_id = None
5455

56+
with tqdm(total=len(file_hashes), desc='Generated bundles', unit='bundle', leave=False) as pbar:
57+
58+
for chunked_files in chunks(int(MAX_BUCKET_SIZE // 200), file_hashes):
59+
60+
if not bundle_id:
61+
bundle_id = await _complete_bundle( create_file_bundle(chunked_files) )
62+
else:
63+
bundle_id = await _complete_bundle( extend_file_bundle(bundle_id, chunked_files) )
64+
65+
pbar.update(len(chunked_files))
66+
67+
return bundle_id
68+
5569

56-
@profile_speed
5770
async def create_git_bundle(platform, owner, repo, oid):
5871
""" Create a git bundle via API """
5972
data = {
@@ -68,7 +81,7 @@ async def create_git_bundle(platform, owner, repo, oid):
6881
res = await api_call('bundle', method='POST', data=data, compression_level=9)
6982
return res['bundleId']
7083

71-
@profile_speed
84+
7285
async def check_bundle(bundle_id):
7386
""" Check missing files in bundle via API """
7487
data = await api_call('bundle/{}'.format(bundle_id), method='GET')
@@ -79,7 +92,6 @@ async def upload_bundle_files(bundle_id, entries):
7992
"""
8093
Each entry should contain of: (path, hash)
8194
"""
82-
start_time = time.time()
8395

8496
data = []
8597
for file_path, file_hash in entries:
@@ -96,23 +108,23 @@ async def upload_bundle_files(bundle_id, entries):
96108
callback=lambda resp: resp.text()
97109
)
98110

99-
logger.debug('{:10.2f} sec | sent {} files'.format(
100-
time.time() - start_time,
101-
len(entries)
102-
))
103111

104-
105-
@profile_speed
106112
async def fulfill_bundle(bundle_id, missing_files):
107113
""" Upload missing files to bundle via API """
108114
if not missing_files:
109115
return
110-
111-
tasks = [
112-
upload_bundle_files(bundle_id, chunk)
113-
for chunk in compose_file_buckets(missing_files)
114-
]
115-
if tasks:
116-
await asyncio.gather(*tasks)
117-
else:
118-
logger.info('No new files sent, as all files have been uploaded earlier')
116+
logger.debug('Uploading {} missing files'.format(len(missing_files)))
117+
with tqdm(total=len(missing_files), desc='Uploading missing files', unit='files', leave=False) as pbar:
118+
119+
async def _wrap(chunk):
120+
await upload_bundle_files(bundle_id, chunk)
121+
pbar.update(len(chunk))
122+
123+
tasks = [
124+
_wrap(chunk)
125+
for chunk in compose_file_buckets(missing_files)
126+
]
127+
if tasks:
128+
await asyncio.wait(tasks)
129+
else:
130+
logger.info('No new files sent, as all files have been uploaded earlier')

deepcode/files.py

Lines changed: 8 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
11
import os
22
import fnmatch
33
import aiofiles
4-
import asyncio
54
from funcy import lcat, project
65
from itertools import chain
76
import hashlib
87

9-
from .utils import profile_speed, logger
8+
from .utils import logger
109

1110
IGNORES_DEFAULT = {
1211
'**/.git',
@@ -36,10 +35,9 @@ def is_ignored(path, file_ignores):
3635

3736

3837
def collect_bundle_files(paths, file_filter, file_ignores=IGNORES_DEFAULT):
39-
local_files = []
40-
4138
for path in paths:
4239
with os.scandir(path) as it:
40+
local_files = []
4341
sub_dirs = []
4442
local_ignore_file = False
4543
for entry in it:
@@ -68,26 +66,16 @@ def collect_bundle_files(paths, file_filter, file_ignores=IGNORES_DEFAULT):
6866
if local_ignore_file:
6967
local_files = [f for f in local_files if not is_ignored(f.path, file_ignores)]
7068

69+
yield from local_files
70+
7171
sub_dirs = [
7272
subdir for subdir in sub_dirs
7373
if not is_ignored(subdir, file_ignores)
7474
]
75-
results = collect_bundle_files(sub_dirs, file_filter, file_ignores)
76-
local_files.extend(results)
77-
78-
return local_files
75+
yield from collect_bundle_files(sub_dirs, file_filter, file_ignores)
7976

8077

8178
def get_file_meta(file_path):
82-
83-
# stat = os.stat(file_path)
84-
# sg = lambda f: getattr(stat, f, '')
85-
# hasher.update('{}{}{}{}'.format(
86-
# stat.st_size, sg('st_rsize'), # file sizes
87-
# sg('st_mtime'), # modified
88-
# sg('st_type') # file type
89-
# ).encode('utf-8') )
90-
9179
content = get_file_content(file_path)
9280
hasher = hashlib.sha256()
9381
hasher.update(content.encode('utf-8'))
@@ -105,17 +93,10 @@ def prepare_bundle_hashes(bundle_files, bucket_size=MAX_BUCKET_SIZE):
10593
return items
10694

10795

108-
@profile_speed
109-
def prepare_bundle_files(paths, file_filter):
110-
""" Prepare files for bundle. """
111-
bundle_files = collect_bundle_files(paths, file_filter)
112-
return prepare_bundle_hashes(bundle_files)
113-
114-
11596
def compose_file_buckets(file_paths, bucket_size=MAX_BUCKET_SIZE):
11697
"""
117-
Split files into buckets with limiting max size
118-
Return list of items: (path, hash, size)
98+
Splits files into buckets with limiting max size
99+
Returns list of items: (path, hash)
119100
"""
120101
buckets = [{
121102
'size': bucket_size,
@@ -128,6 +109,7 @@ def route_file_to_bucket(file_path):
128109

129110
# Check that file does not exceed max bucket size
130111
if file_size > bucket_size:
112+
logger.debug('ecxluded big file --> {} ({} bytes)'.format(file_path, file_size))
131113
return
132114

133115
# Try to find existing bucket

0 commit comments

Comments
 (0)