Efficient Python code for bioinformatics data analysis

1. Pandas; Global retrieves the list of files in the specified directory

import pandas as pd
import glob
data_dir = "/public/data/"
#get file suffix as .txt list of files for 
df_all = pd.concat([pd.read_csv(f, sep='t') for f in glob.glob(data_dir + '*.txt')])
print(df_all)

2. Use the enumerate function to obtain indexes and values

# A-K alphabet list 
letter = [chr(ord('A') + i) for i in range(0, 11)]
#output index and value 
for idx, value in enumerate(letter):
print(f"{idx}t{value}")

3. Use the zip function to traverse multiple lists simultaneously

#0-10 digit list 
number = [n for n in range(0, 11)]
# A-K alphabet list 
letter = [chr(ord('A') + i) for i in range(0, 11)]
for number, letter in zip(letter, number):
print(f"{letter}: {number}")

# 0: A
# 1: B
# 2: C
# 3: D
# 4: E
# 5: F
# 6: G
# 7: H
# 8: I
# 9: J
# 10: K

4. Built in function map; Filter to filter data

number = [n for n in range(0, 11)]
#obtain the square number 
squared_numbers = list(map(lambda x: x**2, number)
print(squared_numbers) 
# [0, 1, 4, 9, 16, 25, 36, 49, 64, 81, 100]
#get even numbers 
even_numbers = list(filter(lambda x: x % 2 == 0, number))
print(even_numbers)
# [0, 2, 4, 6, 8, 10]

5. Use the concurrent. futures module to implement concurrent processing of loops and improve computational efficiency

import concurrent.futures
def square(num):
return num ** 2
with concurrent.futures.ThreadPoolExecutor() as executor:
res = list(executor.map(square, number))

print(res)

6. Use the asyncio module to implement asynchronous processing and improve concurrency performance

import asyncio
import math
async def sqrt(num):
return math.sqrt(num)
async def calculate():
run_tasks = [sqrt(num) for num in number]

results = await asyncio.gather(*run_tasks)
print(results)
asyncio.run(calculate())

7. Program Run Analysis Decorator

import time
def analysis_time(func):
def warpper(*args, **kwargs):
  start_time = time.time()
  res = func(*args, *kwargs)
  end_time = time.time()
  print(f"{func.__name__} program run time: {end_time - start_time}s")
  return res
return warpper
#parallel computing 
import concurrent.futures
def square(num):
return num ** 2

@analysis_time
def calulate(number):
with concurrent.futures.ThreadPoolExecutor() as executor:
  res = list(executor.map(square, number))
  return res
print(calulate(number))
# calulate program run time: 0.002947568893432617s
# [0, 1, 4, 9, 16, 25, 36, 49, 64, 81, 100]