|
| 1 | +# 这个测试用与弹幕的抓取 |
| 2 | + |
| 3 | +import multiprocessing |
| 4 | +import os |
| 5 | +import socket |
| 6 | +import sqlite3 |
| 7 | +import time |
| 8 | +from time import localtime |
| 9 | + |
| 10 | +import requests |
| 11 | +from bs4 import BeautifulSoup |
| 12 | + |
| 13 | +client = socket.socket(socket.AF_INET, socket.SOCK_STREAM) |
| 14 | + |
| 15 | +host = socket.gethostbyname("openbarrage.douyutv.com") |
| 16 | +port = 8601 |
| 17 | +client.connect((host, port)) |
| 18 | +import re |
| 19 | + |
| 20 | +path = re.compile(b'txt@=(.+?)/cid@') |
| 21 | +uid_path = re.compile(b'nn@=(.+?)/txt@') |
| 22 | +level_path = re.compile(b'level@=([1-9][0-9]?)/egtt@') |
| 23 | + |
| 24 | + |
| 25 | +def sendmsg(msgstr): |
| 26 | + msg = msgstr.encode('utf-8') |
| 27 | + data_length = len(msg) + 8 |
| 28 | + code = 689 |
| 29 | + msgHead = int.to_bytes(data_length, 4, 'little') \ |
| 30 | + + int.to_bytes(data_length, 4, 'little') + int.to_bytes(code, 4, 'little') |
| 31 | + client.send(msgHead) |
| 32 | + sent = 0 |
| 33 | + while sent < len(msg): |
| 34 | + tn = client.send(msg[sent:]) |
| 35 | + sent = sent + tn |
| 36 | + |
| 37 | + |
| 38 | +def get_name(roomid): |
| 39 | + r = requests.get("http://www.douyu.com/" + roomid) |
| 40 | + soup = BeautifulSoup(r.text, 'lxml') |
| 41 | + return soup.find('a', {'class', 'zb-name'}).string |
| 42 | + |
| 43 | + |
| 44 | +def keeplive(): |
| 45 | + while True: |
| 46 | + msg = 'type@=keeplive/tick@=' + str(int(time.time())) + '/\x00' |
| 47 | + print('init live') |
| 48 | + sendmsg(msg) |
| 49 | + time.sleep(15) |
| 50 | + |
| 51 | + |
| 52 | +def start(roomid): |
| 53 | + msg = 'type@=loginreq/username@=/password@=/roomid@={}/\x00'.format(roomid) |
| 54 | + sendmsg(msg) |
| 55 | + print(client.recv(1024)) |
| 56 | + msg_more = 'type@=joingroup/rid@={}/gid@=-9999/\x00'.format(roomid) |
| 57 | + sendmsg(msg_more) |
| 58 | + if 'danmudata_{}_{}-{}-{}.db'.format(get_name(roomid), localtime().tm_year, localtime().tm_mon, |
| 59 | + localtime().tm_mday) in os.listdir('.'): |
| 60 | + print("检测到表已经创建成功!") |
| 61 | + conn = sqlite3.connect( |
| 62 | + 'danmudata_{}_{}-{}-{}.db'.format(get_name(roomid), localtime().tm_year, localtime().tm_mon, |
| 63 | + localtime().tm_mday)) |
| 64 | + else: |
| 65 | + conn = sqlite3.connect( |
| 66 | + 'danmudata_{}_{}-{}-{}.db'.format(get_name(roomid), localtime().tm_year, localtime().tm_mon, |
| 67 | + localtime().tm_mday)) |
| 68 | + conn.execute('''CREATE TABLE DANMU |
| 69 | + (level INT NOT NULL, |
| 70 | + NAME CHAR(20) NOT NULL, |
| 71 | + danmu CHAR(200) NOT NULL |
| 72 | + );''') |
| 73 | + # f=open('danmudata.txt','a') |
| 74 | + print('连接到{}的直播间'.format(get_name(roomid))) |
| 75 | + while True: |
| 76 | + data = client.recv(1024) |
| 77 | + # print(data) |
| 78 | + data_more = path.findall(data) |
| 79 | + uid_more = uid_path.findall(data) |
| 80 | + level_more = level_path.findall(data) |
| 81 | + if not data: |
| 82 | + break |
| 83 | + else: |
| 84 | + for i in range(0, len(data_more)): |
| 85 | + try: |
| 86 | + print( |
| 87 | + "lv:" + level_more[i].decode() + ">>>>>>" + uid_more[i].decode() + ":" + data_more[i].decode()) |
| 88 | + conn.execute( |
| 89 | + "INSERT INTO DANMU(level,NAME,danmu) VALUES ({0},'{1}','{2}')".format(level_more[i].decode(), |
| 90 | + uid_more[i].decode(), |
| 91 | + data_more[i].decode())) |
| 92 | + conn.commit() |
| 93 | + except KeyboardInterrupt: |
| 94 | + conn.close() |
| 95 | + except: |
| 96 | + continue |
| 97 | + |
| 98 | + |
| 99 | +if __name__ == '__main__': |
| 100 | + room_id = input("plz enter the room id") |
| 101 | + p1 = multiprocessing.Process(target=start, args=(room_id,)) |
| 102 | + p2 = multiprocessing.Process(target=keeplive) |
| 103 | + p1.start() |
| 104 | + p2.start() |
0 commit comments