|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +插入一百万条guest用户的活跃度热力图数据 |
| 4 | +用于性能测试 |
| 5 | +""" |
| 6 | + |
| 7 | +import sys |
| 8 | +import os |
| 9 | +from datetime import datetime, date, timedelta |
| 10 | +import random |
| 11 | +import time |
| 12 | + |
| 13 | +# 添加项目根目录到Python路径 |
| 14 | +current_dir = os.path.dirname(os.path.abspath(__file__)) |
| 15 | +project_root = os.path.dirname(current_dir) |
| 16 | +api_server_dir = os.path.join(project_root, 'todo-for-ai-api-server') |
| 17 | +sys.path.insert(0, api_server_dir) |
| 18 | + |
| 19 | +from sqlalchemy import create_engine, text |
| 20 | +from sqlalchemy.orm import sessionmaker |
| 21 | +import pymysql |
| 22 | + |
| 23 | +# 数据库配置 - 优先使用环境变量,否则从.env文件加载 |
| 24 | +from dotenv import load_dotenv |
| 25 | + |
| 26 | +# 加载环境变量 |
| 27 | +env_path = os.path.join(project_root, 'private-deploy', '.env') |
| 28 | +if os.path.exists(env_path): |
| 29 | + load_dotenv(env_path) |
| 30 | + print(f"📄 加载环境变量: {env_path}") |
| 31 | + |
| 32 | +DATABASE_URL = os.environ.get('DATABASE_URL') or 'mysql+pymysql://root:password@localhost:3306/todo_for_ai' |
| 33 | +print(f"📊 数据库URL: {DATABASE_URL}") |
| 34 | + |
| 35 | +# Guest用户信息 |
| 36 | +GUEST_EMAIL = 'guest@todo4ai.org' |
| 37 | + |
| 38 | +def get_guest_user_id(session): |
| 39 | + """获取guest用户的ID""" |
| 40 | + result = session.execute(text("SELECT id FROM users WHERE email = :email"), {"email": GUEST_EMAIL}) |
| 41 | + row = result.fetchone() |
| 42 | + if row: |
| 43 | + return row[0] |
| 44 | + else: |
| 45 | + print("❌ Guest用户不存在,请先登录一次创建guest用户") |
| 46 | + print(" 可以访问前端点击「游客体验」按钮创建guest用户") |
| 47 | + sys.exit(1) |
| 48 | + |
| 49 | +def generate_activity_data(user_id, total_records=1000000): |
| 50 | + """ |
| 51 | + 生成活跃度数据 |
| 52 | + |
| 53 | + 参数: |
| 54 | + - total_records: 目标总活跃次数(不是记录数) |
| 55 | + |
| 56 | + 策略: |
| 57 | + user_activities表是每天每用户一条汇总记录 |
| 58 | + 生成最近730天(2年)的数据,每天一条,但活跃度数值很高 |
| 59 | + 实际生成730条记录,模拟100万次活跃 |
| 60 | + """ |
| 61 | + print(f"📊 开始生成活跃度热力图测试数据...") |
| 62 | + |
| 63 | + # 生成最近730天(2年)的数据,确保包含过去365天 |
| 64 | + days_needed = 730 |
| 65 | + total_activity_per_day = total_records // days_needed # 平均每天的总活跃次数 |
| 66 | + |
| 67 | + end_date = date.today() |
| 68 | + start_date = end_date - timedelta(days=days_needed - 1) |
| 69 | + |
| 70 | + print(f" 日期范围: {start_date} ~ {end_date} ({days_needed}天)") |
| 71 | + print(f" 每天总活跃次数: {total_activity_per_day:,}") |
| 72 | + print(f" 总记录数: {days_needed:,} 条(每天1条)") |
| 73 | + |
| 74 | + batch_data = [] |
| 75 | + |
| 76 | + # 遍历每一天,生成一条汇总记录 |
| 77 | + for day_offset in range(days_needed): |
| 78 | + current_date = start_date + timedelta(days=day_offset) |
| 79 | + |
| 80 | + # 将总活跃次数按比例分配到各个活动类型 |
| 81 | + # 保持合理的比例:创建 < 完成 < 状态变更 < 更新 |
| 82 | + task_created = random.randint(total_activity_per_day // 10, total_activity_per_day // 8) |
| 83 | + task_completed = random.randint(total_activity_per_day // 8, total_activity_per_day // 6) |
| 84 | + task_status_changed = random.randint(total_activity_per_day // 6, total_activity_per_day // 4) |
| 85 | + task_updated = total_activity_per_day - task_created - task_completed - task_status_changed |
| 86 | + |
| 87 | + # 确保更新数是正数 |
| 88 | + if task_updated < 0: |
| 89 | + task_updated = total_activity_per_day // 4 |
| 90 | + |
| 91 | + total_activity = task_created + task_updated + task_status_changed + task_completed |
| 92 | + |
| 93 | + # 生成时间戳 |
| 94 | + first_activity = datetime.combine(current_date, datetime.min.time()) + timedelta(hours=random.randint(8, 12)) |
| 95 | + last_activity = first_activity + timedelta(hours=random.randint(1, 10)) |
| 96 | + |
| 97 | + batch_data.append({ |
| 98 | + 'user_id': user_id, |
| 99 | + 'activity_date': current_date, |
| 100 | + 'task_created_count': task_created, |
| 101 | + 'task_updated_count': task_updated, |
| 102 | + 'task_status_changed_count': task_status_changed, |
| 103 | + 'task_completed_count': task_completed, |
| 104 | + 'total_activity_count': total_activity, |
| 105 | + 'first_activity_at': first_activity, |
| 106 | + 'last_activity_at': last_activity, |
| 107 | + 'created_at': datetime.now(), |
| 108 | + 'updated_at': datetime.now() |
| 109 | + }) |
| 110 | + |
| 111 | + # 进度显示 |
| 112 | + if (day_offset + 1) % 100 == 0: |
| 113 | + print(f" 生成进度: {day_offset + 1} / {days_needed} 天 ({(day_offset + 1)*100/days_needed:.1f}%)") |
| 114 | + |
| 115 | + print(f"✅ 数据生成完成,共 {len(batch_data):,} 条记录") |
| 116 | + print(f" 模拟总活跃次数: {sum(d['total_activity_count'] for d in batch_data):,}") |
| 117 | + return batch_data |
| 118 | + |
| 119 | +def batch_insert_data(session, data, batch_size=10000): |
| 120 | + """ |
| 121 | + 批量插入数据 |
| 122 | + """ |
| 123 | + print(f"\n📥 开始批量插入数据(批次大小: {batch_size:,})...") |
| 124 | + |
| 125 | + total = len(data) |
| 126 | + inserted = 0 |
| 127 | + start_time = time.time() |
| 128 | + |
| 129 | + # 准备批量插入的SQL语句 |
| 130 | + insert_sql = text(""" |
| 131 | + INSERT INTO user_activities ( |
| 132 | + user_id, activity_date, task_created_count, task_updated_count, |
| 133 | + task_status_changed_count, task_completed_count, total_activity_count, |
| 134 | + first_activity_at, last_activity_at, created_at, updated_at |
| 135 | + ) VALUES ( |
| 136 | + :user_id, :activity_date, :task_created_count, :task_updated_count, |
| 137 | + :task_status_changed_count, :task_completed_count, :total_activity_count, |
| 138 | + :first_activity_at, :last_activity_at, :created_at, :updated_at |
| 139 | + ) ON DUPLICATE KEY UPDATE |
| 140 | + task_created_count = VALUES(task_created_count), |
| 141 | + task_updated_count = VALUES(task_updated_count), |
| 142 | + task_status_changed_count = VALUES(task_status_changed_count), |
| 143 | + task_completed_count = VALUES(task_completed_count), |
| 144 | + total_activity_count = VALUES(total_activity_count), |
| 145 | + first_activity_at = VALUES(first_activity_at), |
| 146 | + last_activity_at = VALUES(last_activity_at), |
| 147 | + updated_at = VALUES(updated_at) |
| 148 | + """) |
| 149 | + |
| 150 | + try: |
| 151 | + for i in range(0, total, batch_size): |
| 152 | + batch = data[i:i + batch_size] |
| 153 | + |
| 154 | + # 批量插入 |
| 155 | + session.execute(insert_sql, batch) |
| 156 | + session.commit() |
| 157 | + |
| 158 | + inserted += len(batch) |
| 159 | + elapsed = time.time() - start_time |
| 160 | + speed = inserted / elapsed if elapsed > 0 else 0 |
| 161 | + remaining = (total - inserted) / speed if speed > 0 else 0 |
| 162 | + |
| 163 | + print(f" 插入进度: {inserted:,} / {total:,} ({inserted*100/total:.1f}%) " |
| 164 | + f"- 速度: {speed:.0f} 条/秒 - 剩余时间: {remaining:.0f}秒") |
| 165 | + |
| 166 | + elapsed = time.time() - start_time |
| 167 | + print(f"\n✅ 数据插入完成!") |
| 168 | + print(f" 总记录数: {total:,} 条") |
| 169 | + print(f" 总耗时: {elapsed:.2f} 秒") |
| 170 | + print(f" 平均速度: {total/elapsed:.0f} 条/秒") |
| 171 | + |
| 172 | + except Exception as e: |
| 173 | + session.rollback() |
| 174 | + print(f"\n❌ 插入失败: {str(e)}") |
| 175 | + raise |
| 176 | + |
| 177 | +def verify_data(session, user_id): |
| 178 | + """验证数据插入情况""" |
| 179 | + print("\n🔍 验证数据插入情况...") |
| 180 | + |
| 181 | + # 查询总记录数 |
| 182 | + result = session.execute( |
| 183 | + text("SELECT COUNT(*) FROM user_activities WHERE user_id = :user_id"), |
| 184 | + {"user_id": user_id} |
| 185 | + ) |
| 186 | + total = result.fetchone()[0] |
| 187 | + print(f" Guest用户总活跃记录: {total:,} 条") |
| 188 | + |
| 189 | + # 查询日期范围 |
| 190 | + result = session.execute( |
| 191 | + text("SELECT MIN(activity_date), MAX(activity_date) FROM user_activities WHERE user_id = :user_id"), |
| 192 | + {"user_id": user_id} |
| 193 | + ) |
| 194 | + min_date, max_date = result.fetchone() |
| 195 | + print(f" 日期范围: {min_date} ~ {max_date}") |
| 196 | + |
| 197 | + # 查询总活跃度 |
| 198 | + result = session.execute( |
| 199 | + text("SELECT SUM(total_activity_count) FROM user_activities WHERE user_id = :user_id"), |
| 200 | + {"user_id": user_id} |
| 201 | + ) |
| 202 | + total_activities = result.fetchone()[0] |
| 203 | + print(f" 总活跃次数: {total_activities:,} 次") |
| 204 | + |
| 205 | +def main(): |
| 206 | + """主函数""" |
| 207 | + # 检查命令行参数 |
| 208 | + auto_confirm = '--yes' in sys.argv or '-y' in sys.argv |
| 209 | + |
| 210 | + print("=" * 70) |
| 211 | + print("🚀 Guest用户活跃度热力图测试数据生成工具") |
| 212 | + print("=" * 70) |
| 213 | + print() |
| 214 | + |
| 215 | + # 创建数据库连接 |
| 216 | + print("📡 连接数据库...") |
| 217 | + engine = create_engine(DATABASE_URL, echo=False) |
| 218 | + Session = sessionmaker(bind=engine) |
| 219 | + session = Session() |
| 220 | + |
| 221 | + try: |
| 222 | + # 获取guest用户ID |
| 223 | + user_id = get_guest_user_id(session) |
| 224 | + print(f"✅ 找到Guest用户 (ID: {user_id})") |
| 225 | + |
| 226 | + # 检查现有数据 |
| 227 | + result = session.execute( |
| 228 | + text("SELECT COUNT(*) FROM user_activities WHERE user_id = :user_id"), |
| 229 | + {"user_id": user_id} |
| 230 | + ) |
| 231 | + existing_count = result.fetchone()[0] |
| 232 | + |
| 233 | + if existing_count > 0: |
| 234 | + print(f"\n⚠️ 警告:Guest用户已有 {existing_count:,} 条活跃记录") |
| 235 | + if auto_confirm: |
| 236 | + print("自动确认:将覆盖现有数据") |
| 237 | + else: |
| 238 | + response = input("是否继续?这将覆盖现有数据 (y/N): ") |
| 239 | + if response.lower() != 'y': |
| 240 | + print("已取消") |
| 241 | + return |
| 242 | + |
| 243 | + # 删除现有数据 |
| 244 | + print("🗑️ 删除现有数据...") |
| 245 | + session.execute( |
| 246 | + text("DELETE FROM user_activities WHERE user_id = :user_id"), |
| 247 | + {"user_id": user_id} |
| 248 | + ) |
| 249 | + session.commit() |
| 250 | + print("✅ 已删除现有数据") |
| 251 | + |
| 252 | + # 生成数据(参数是目标总活跃次数,实际生成730条记录) |
| 253 | + data = generate_activity_data(user_id, total_records=1000000) |
| 254 | + |
| 255 | + # 批量插入 |
| 256 | + batch_insert_data(session, data, batch_size=10000) |
| 257 | + |
| 258 | + # 验证数据 |
| 259 | + verify_data(session, user_id) |
| 260 | + |
| 261 | + print("\n" + "=" * 70) |
| 262 | + print("✅ 完成!现在可以使用Playwright测试热力图性能了") |
| 263 | + print(" API端点: http://localhost:50110/api/v1/dashboard/activity-heatmap") |
| 264 | + print("=" * 70) |
| 265 | + |
| 266 | + except Exception as e: |
| 267 | + print(f"\n❌ 错误: {str(e)}") |
| 268 | + import traceback |
| 269 | + traceback.print_exc() |
| 270 | + finally: |
| 271 | + session.close() |
| 272 | + |
| 273 | +if __name__ == '__main__': |
| 274 | + main() |
0 commit comments