1. Pandas; Global retrieves the list of files in the specified directory
import pandas as pd
import glob
data_dir = "/public/data/"
#get file suffix as .txt list of files for
df_all = pd.concat([pd.read_csv(f, sep='t') for f in glob.glob(data_dir + '*.txt')])
print(df_all)
2. Use the enumerate function to obtain indexes and values
# A-K alphabet list
letter = [chr(ord('A') + i) for i in range(0, 11)]
#output index and value
for idx, value in enumerate(letter):
print(f"{idx}t{value}")
3. Use the zip function to traverse multiple lists simultaneously
#0-10 digit list
number = [n for n in range(0, 11)]
# A-K alphabet list
letter = [chr(ord('A') + i) for i in range(0, 11)]
for number, letter in zip(letter, number):
print(f"{letter}: {number}")
# 0: A
# 1: B
# 2: C
# 3: D
# 4: E
# 5: F
# 6: G
# 7: H
# 8: I
# 9: J
# 10: K
4. Built in function map; Filter to filter data
number = [n for n in range(0, 11)]
#obtain the square number
squared_numbers = list(map(lambda x: x**2, number)
print(squared_numbers)
# [0, 1, 4, 9, 16, 25, 36, 49, 64, 81, 100]
#get even numbers
even_numbers = list(filter(lambda x: x % 2 == 0, number))
print(even_numbers)
# [0, 2, 4, 6, 8, 10]
5. Use the concurrent. futures module to implement concurrent processing of loops and improve computational efficiency
import concurrent.futures
def square(num):
return num ** 2
with concurrent.futures.ThreadPoolExecutor() as executor:
res = list(executor.map(square, number))
print(res)
6. Use the asyncio module to implement asynchronous processing and improve concurrency performance
import asyncio
import math
async def sqrt(num):
return math.sqrt(num)
async def calculate():
run_tasks = [sqrt(num) for num in number]
results = await asyncio.gather(*run_tasks)
print(results)
asyncio.run(calculate())
7. Program Run Analysis Decorator
import time
def analysis_time(func):
def warpper(*args, **kwargs):
start_time = time.time()
res = func(*args, *kwargs)
end_time = time.time()
print(f"{func.__name__} program run time: {end_time - start_time}s")
return res
return warpper
#parallel computing
import concurrent.futures
def square(num):
return num ** 2
@analysis_time
def calulate(number):
with concurrent.futures.ThreadPoolExecutor() as executor:
res = list(executor.map(square, number))
return res
print(calulate(number))
# calulate program run time: 0.002947568893432617s
# [0, 1, 4, 9, 16, 25, 36, 49, 64, 81, 100]