From 4c4d168471a7ceb38005a74db0b703705585db1f Mon Sep 17 00:00:00 2001 From: YoVinchen Date: Mon, 4 Aug 2025 09:14:27 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0Redis=E6=9F=A5=E8=AF=A2?= =?UTF-8?q?=E6=AF=94=E5=AF=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CLAUDE.md | 113 +- app.py | 2256 +--------------------------------- app_original_backup.py | 2229 +++++++++++++++++++++++++++++++++ modules/__init__.py | 17 + modules/api_routes.py | 1020 +++++++++++++++ modules/cassandra_client.py | 114 ++ modules/config_manager.py | 671 ++++++++++ modules/data_comparison.py | 363 ++++++ modules/database.py | 228 ++++ modules/query_engine.py | 234 ++++ modules/query_logger.py | 272 ++++ modules/redis_client.py | 249 ++++ modules/redis_query.py | 355 ++++++ modules/sharding.py | 115 ++ requirements.txt | 3 +- static/js/redis_compare.js | 1292 +++++++++++++++++++ templates/index.html | 30 +- templates/redis_compare.html | 696 +++++++++++ 18 files changed, 8007 insertions(+), 2250 deletions(-) create mode 100644 app_original_backup.py create mode 100644 modules/__init__.py create mode 100644 modules/api_routes.py create mode 100644 modules/cassandra_client.py create mode 100644 modules/config_manager.py create mode 100644 modules/data_comparison.py create mode 100644 modules/database.py create mode 100644 modules/query_engine.py create mode 100644 modules/query_logger.py create mode 100644 modules/redis_client.py create mode 100644 modules/redis_query.py create mode 100644 modules/sharding.py create mode 100644 static/js/redis_compare.js create mode 100644 templates/redis_compare.html diff --git a/CLAUDE.md b/CLAUDE.md index 4ff0a9d..0bd4e41 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -4,7 +4,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co ## 项目架构 -这是一个基于 Flask 的数据库查询比对工具,用于比较 Cassandra 数据库中生产环境和测试环境的数据差异。现已支持单表查询和TWCS分表查询两种模式。 +这是一个基于 Flask 的数据库查询比对工具,用于比较 Cassandra 数据库中生产环境和测试环境的数据差异。现已支持单表查询、TWCS分表查询和**多主键查询**三种核心功能。 ### 核心组件架构 @@ -15,29 +15,38 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co - 配置组管理(CRUD操作) - JSON字段特殊处理和数组比较逻辑 - 查询历史记录管理 - - **分表查询功能(新增)**: + - **分表查询功能**: - `ShardingCalculator`类:TWCS时间分表计算器 - `execute_sharding_query()`:分表查询执行 - `execute_mixed_query()`:混合查询支持(生产分表+测试单表组合) - `/api/sharding-query`:分表查询API端点 + - **多主键查询功能(新增)**: + - `execute_query()`函数支持复合主键SQL构建 + - `compare_results()`函数支持复合主键匹配 + - `match_composite_key()`辅助函数处理复合主键比较 - `config_groups.db`: SQLite数据库,存储用户保存的配置组、查询历史和分表配置 **前端 (原生JavaScript + Bootstrap)** -- `templates/db_compare.html`: 主界面模板,**现已支持单表和分表双模式** +- `templates/db_compare.html`: 主界面模板,**现已支持单表、分表和多主键三种模式** - 分表模式切换开关 - 生产/测试环境独立分表配置 - 分表参数配置(时间间隔、分表数量) - 分表查询信息展示 + - **多主键查询支持**:UI提示和占位符文本更新 - `templates/index.html`: 工具集合首页 - `static/js/app.js`: 核心前端逻辑 - 配置管理和表单处理 - 差异结果的分页展示系统 - 原生数据展示(多种视图模式:格式化、原始、差异对比、树形) - 高级错误处理和用户反馈 - - **分表查询支持(新增)**: + - **分表查询支持**: - `toggleShardingMode()`:分表模式切换 - `getShardingConfig()`:分表配置获取 - `displayShardingInfo()`:分表查询结果展示 + - **多主键查询支持(新增)**: + - `getCurrentConfig()`函数解析复合主键配置 + - `formatCompositeKey()`:复合主键显示格式化 + - UI占位符和提示文本支持复合主键格式 **分表查询功能模块(重要新增)** - **时间戳提取算法(已更新)**: @@ -55,6 +64,21 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co - 生产环境单表 + 测试环境分表 - 生产环境单表 + 测试环境单表 +**多主键查询功能模块(最新功能)** +- **复合主键格式**: + - 主键字段:逗号分隔,如 `docid,id` + - 查询值:逗号分隔,如 `8825C293B3609175B2224236E984FEDB,8825C293B3609175B2224236E984FED` + - 一行一组复合主键值 +- **SQL构建逻辑**: + - 单主键:`key IN (val1, val2, val3)` + - 复合主键:`(key1='val1' AND key2='val2') OR (key1='val3' AND key2='val4')` +- **数据匹配算法**: + - `match_composite_key()`函数处理单主键和复合主键的统一匹配 + - 支持字段数量验证和类型转换 +- **向后兼容**: + - 完全兼容现有单主键查询 + - 自动识别主键类型并采用相应处理逻辑 + **核心文件** - `app.py`: 唯一的主应用文件,包含所有功能实现 - `config_groups.db`: SQLite数据库文件 @@ -67,6 +91,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co - 字段级别的差异统计和分析 - 数据质量评估和建议生成 - 支持包含和排除特定字段的比较 +- **多主键数据比对**:支持复合主键的精确匹配和差异检测 **用户界面特性** - 分页系统(差异记录和相同记录) @@ -115,6 +140,12 @@ python app.py # 单表查询测试:http://localhost:5000/db-compare # 分表查询测试:在Web界面中开启分表模式 +# 多主键查询测试示例: +# 1. 在主键字段中输入:docid,id +# 2. 在查询Key值中输入(每行一组): +# 8825C293B3609175B2224236E984FEDB,8825C293B3609175B2224236E984FED +# 9925C293B3609175B2224236E984FEDB,9925C293B3609175B2224236E984FED + # 数据库初始化(如果config_groups.db不存在) # 通过访问Web界面会自动创建数据库表结构 ``` @@ -129,17 +160,18 @@ python app.py - cassandra-driver==3.29.1 ### 项目特点 -- **单文件架构**:所有后端逻辑都在 `app.py` 中实现(1400+行代码) -- **内存日志系统**:使用 `QueryLogCollector` 类在内存中收集查询日志 -- **SQLite本地存储**:配置组和查询历史存储在本地 `config_groups.db` 文件中 +- **单文件架构**:所有后端逻辑都在 `app.py` 中实现(2230+行代码) +- **内存+数据库日志系统**:使用 `QueryLogCollector` 类在内存和SQLite中收集查询日志 +- **SQLite本地存储**:配置组、查询历史和日志存储在本地 `config_groups.db` 文件中 - **前端原生实现**:使用原生JavaScript + Bootstrap,无现代前端框架 +- **多模式支持**:单表查询、分表查询、多主键查询的统一架构 ## API架构说明 ### 核心API端点 - `GET /api/default-config`: 获取默认数据库配置 -- `POST /api/query`: 执行单表数据库查询比对(原有功能) -- `POST /api/sharding-query`: 执行分表查询比对(新增功能) +- `POST /api/query`: 执行单表数据库查询比对(**支持多主键查询**) +- `POST /api/sharding-query`: 执行分表查询比对(**支持多主键查询**) - `GET /api/config-groups`: 获取所有配置组 - `POST /api/config-groups`: 创建新配置组 - `GET /api/config-groups/`: 获取特定配置组 @@ -148,8 +180,10 @@ python app.py - `GET /api/query-history`: 获取查询历史 - `POST /api/query-history`: 保存查询历史 - `GET /api/query-history/`: 获取特定历史记录 +- `GET /api/query-history//results`: 获取历史记录的完整结果数据 - `DELETE /api/query-history/`: 删除历史记录 - `GET /api/query-logs`: 获取查询日志(支持limit参数) +- `GET /api/query-logs/history/`: 获取特定历史记录的相关日志 - `DELETE /api/query-logs`: 清空查询日志 ### 查询比对流程 @@ -181,10 +215,10 @@ python app.py username, password, keyspace, table }, test_config: { /* 同上 */ }, - keys: ["主键字段名"], + keys: ["主键字段名"], // 支持多个字段,如 ["docid", "id"] fields_to_compare: ["字段1", "字段2"], // 空数组=全部字段 exclude_fields: ["排除字段"], - values: ["key1", "key2", "key3"] // 要查询的Key值 + values: ["key1", "key2", "key3"] // 单主键或复合主键值 } ``` @@ -193,10 +227,10 @@ python app.py { pro_config: { /* 基础配置同上 */ }, test_config: { /* 基础配置同上 */ }, - keys: ["主键字段名"], + keys: ["主键字段名"], // 支持复合主键 fields_to_compare: ["字段1", "字段2"], exclude_fields: ["排除字段"], - values: ["key1", "key2", "key3"], + values: ["key1", "key2", "key3"], // 支持复合主键值 sharding_config: { use_sharding_for_pro: true, // 生产环境是否使用分表 use_sharding_for_test: false, // 测试环境是否使用分表 @@ -206,12 +240,30 @@ python app.py } ``` +**多主键查询格式示例**: +```javascript +// 复合主键配置 +keys: ["docid", "id"] + +// 复合主键查询值(逗号分隔) +values: [ + "8825C293B3609175B2224236E984FEDB,8825C293B3609175B2224236E984FED", + "9925C293B3609175B2224236E984FEDB,9925C293B3609175B2224236E984FED" +] +``` + ### 查询结果结构 ```javascript { total_keys, pro_count, test_count, - differences: [{ key, field, pro_value, test_value, message }], - identical_results: [{ key, pro_fields, test_fields }], + differences: [{ + key: {docid: "val1", id: "val2"}, // 支持复合主键对象 + field, pro_value, test_value, message + }], + identical_results: [{ + key: {docid: "val1", id: "val2"}, // 支持复合主键对象 + pro_fields, test_fields + }], field_diff_count: { "field_name": count }, raw_pro_data: [], raw_test_data: [], summary: { overview, percentages, field_analysis, recommendations }, @@ -239,10 +291,11 @@ python app.py - **HTML模板**:使用Jinja2模板引擎,主要文件在 `templates/` 目录 ### 核心类和函数位置(app.py) -- `QueryLogCollector`类:日志收集系统(第20-46行) -- `ShardingCalculator`类:分表计算器(第64行开始) -- 数据库连接:`create_cassandra_connection()` -- 查询比对:`execute_query()` 和 `execute_sharding_query()` +- `QueryLogCollector`类:日志收集系统(第23-276行) +- `ShardingCalculator`类:分表计算器(第291行开始) +- 数据库连接:`create_connection()` (第1072行) +- 查询比对:`execute_query()` (第1177行) 和 `execute_sharding_query()` (第1250行) +- **多主键支持**:`match_composite_key()` (第1407行) - API路由:使用Flask装饰器定义 ### 分表功能开发指导 @@ -255,6 +308,13 @@ python app.py - **混合查询**:支持生产环境分表+测试环境单表的组合场景 - **前端状态**:分表模式通过 `toggleShardingMode()` 切换,影响UI和提示文本 +### 多主键功能开发指导 +- **主键解析**:前端通过逗号分隔解析主键字段和值 +- **SQL构建**:后端 `execute_query()` 根据主键数量选择不同的WHERE条件构建策略 +- **数据匹配**:`match_composite_key()` 函数统一处理单主键和复合主键匹配逻辑 +- **UI适配**:占位符和提示文本根据模式动态更新 +- **结果展示**:支持复合主键对象格式的显示和格式化 + ### Cassandra连接处理 - 连接包含详细的错误诊断和重试机制 - 使用DCAwareRoundRobinPolicy避免负载均衡警告 @@ -308,4 +368,17 @@ python app.py - results_summary: 结果摘要(JSON) - execution_time: 执行时间 - total_keys/differences_count/identical_count: 统计数据 -- created_at: 时间戳 \ No newline at end of file +- **sharding_config: 分表配置(JSON,新增字段)** +- **query_type: 查询类型('single'/'sharding',新增字段)** +- **raw_results/differences_data/identical_data: 查询结果数据(新增字段)** +- created_at: 时间戳 + +**query_logs表(新增表)** +- id: 主键 +- batch_id: 批次ID +- **history_id: 关联历史记录ID(外键)** +- timestamp: 时间戳 +- level: 日志级别(INFO/WARNING/ERROR) +- message: 日志消息 +- query_type: 查询类型 +- created_at: 创建时间 \ No newline at end of file diff --git a/app.py b/app.py index 714e56b..af52ed2 100644 --- a/app.py +++ b/app.py @@ -1,2229 +1,57 @@ -from flask import Flask, render_template, request, jsonify, send_from_directory -from cassandra.cluster import Cluster -from cassandra.auth import PlainTextAuthProvider -import json -import os -import logging -import sqlite3 -from datetime import datetime, timedelta -import re -import concurrent.futures -import time +""" +BigDataTool - 主应用文件 +模块化重构后的主应用,使用分离的模块组织代码 +""" +import logging +from flask import Flask + +# 导入模块 +from modules.database import ensure_database +from modules.query_logger import QueryLogCollector, CollectorHandler +from modules.api_routes import setup_routes + +# 创建Flask应用 app = Flask(__name__) # 配置日志 -logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' +) logger = logging.getLogger(__name__) -# 数据库配置 -DATABASE_PATH = 'config_groups.db' +# 确保数据库存在 +if not ensure_database(): + logger.error("数据库初始化失败,应用可能无法正常工作") +else: + logger.info("数据库初始化成功") -# 查询日志收集器 -class QueryLogCollector: - def __init__(self, max_logs=1000, db_path=None): - self.logs = [] # 内存中的日志缓存 - self.max_logs = max_logs - self.current_batch_id = None - self.batch_counter = 0 - self.current_query_type = 'single' - self.current_history_id = None # 当前关联的历史记录ID - self.db_path = db_path or DATABASE_PATH - - def start_new_batch(self, query_type='single'): - """开始新的查询批次""" - self.batch_counter += 1 - self.current_batch_id = f"batch_{self.batch_counter}_{datetime.now().strftime('%H%M%S')}" - self.current_query_type = query_type - self.current_history_id = None # 重置历史记录ID - - # 添加批次开始标记 - self.add_log('INFO', f"=== 开始{query_type}查询批次 (ID: {self.current_batch_id}) ===", force_batch_id=self.current_batch_id) - return self.current_batch_id - - def set_history_id(self, history_id): - """设置当前批次关联的历史记录ID""" - self.current_history_id = history_id - if self.current_batch_id and history_id: - self.add_log('INFO', f"关联历史记录ID: {history_id}", force_batch_id=self.current_batch_id) - # 更新当前批次的所有日志记录的history_id - self._update_batch_history_id(self.current_batch_id, history_id) - - def _update_batch_history_id(self, batch_id, history_id): - """更新批次中所有日志的history_id""" - try: - conn = sqlite3.connect(self.db_path, timeout=30) - cursor = conn.cursor() - - cursor.execute(''' - UPDATE query_logs - SET history_id = ? - WHERE batch_id = ? - ''', (history_id, batch_id)) - - conn.commit() - conn.close() - logger.info(f"已更新批次 {batch_id} 的历史记录关联到 {history_id}") - except Exception as e: - print(f"Warning: Failed to update batch history_id: {e}") - - def end_current_batch(self): - """结束当前查询批次""" - if self.current_batch_id: - self.add_log('INFO', f"=== 查询批次完成 (ID: {self.current_batch_id}) ===", force_batch_id=self.current_batch_id) - self.current_batch_id = None - self.current_history_id = None - - def add_log(self, level, message, force_batch_id=None, force_query_type=None, force_history_id=None): - """添加日志到内存和数据库""" - timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3] - batch_id = force_batch_id or self.current_batch_id - query_type = force_query_type or self.current_query_type - history_id = force_history_id or self.current_history_id - - log_entry = { - 'timestamp': timestamp, - 'level': level, - 'message': message, - 'batch_id': batch_id, - 'query_type': query_type, - 'history_id': history_id - } - - # 添加到内存缓存 - self.logs.append(log_entry) - if len(self.logs) > self.max_logs: - self.logs.pop(0) - - # 保存到数据库 - self._save_log_to_db(log_entry) - - def _save_log_to_db(self, log_entry): - """将日志保存到数据库""" - try: - conn = sqlite3.connect(self.db_path, timeout=30) - cursor = conn.cursor() - - cursor.execute(''' - INSERT INTO query_logs (batch_id, history_id, timestamp, level, message, query_type) - VALUES (?, ?, ?, ?, ?, ?) - ''', ( - log_entry['batch_id'], - log_entry['history_id'], - log_entry['timestamp'], - log_entry['level'], - log_entry['message'], - log_entry['query_type'] - )) - - conn.commit() - conn.close() - except Exception as e: - # 数据库写入失败时记录到控制台,但不影响程序运行 - print(f"Warning: Failed to save log to database: {e}") - - def get_logs(self, limit=None, from_db=True): - """获取日志,支持从数据库或内存获取""" - if from_db: - return self._get_logs_from_db(limit) - else: - # 从内存获取 - if limit: - return self.logs[-limit:] - return self.logs - - def _get_logs_from_db(self, limit=None): - """从数据库获取日志""" - try: - conn = sqlite3.connect(self.db_path, timeout=30) - conn.row_factory = sqlite3.Row - cursor = conn.cursor() - - query = ''' - SELECT batch_id, history_id, timestamp, level, message, query_type - FROM query_logs - ORDER BY id DESC - ''' - - if limit: - query += f' LIMIT {limit}' - - cursor.execute(query) - rows = cursor.fetchall() - - # 转换为字典格式并反转顺序(最新的在前) - logs = [] - for row in reversed(rows): - logs.append({ - 'batch_id': row['batch_id'], - 'history_id': row['history_id'], - 'timestamp': row['timestamp'], - 'level': row['level'], - 'message': row['message'], - 'query_type': row['query_type'] - }) - - conn.close() - return logs - except Exception as e: - print(f"Warning: Failed to get logs from database: {e}") - # 如果数据库读取失败,返回内存中的日志 - return self.get_logs(limit, from_db=False) - - def _get_total_logs_count(self): - """获取数据库中的日志总数""" - try: - conn = sqlite3.connect(self.db_path, timeout=30) - cursor = conn.cursor() - cursor.execute('SELECT COUNT(*) FROM query_logs') - count = cursor.fetchone()[0] - conn.close() - return count - except Exception as e: - print(f"Warning: Failed to get logs count from database: {e}") - return len(self.logs) - - def get_logs_by_history_id(self, history_id): - """根据历史记录ID获取相关日志""" - try: - conn = sqlite3.connect(self.db_path, timeout=30) - conn.row_factory = sqlite3.Row - cursor = conn.cursor() - - cursor.execute(''' - SELECT batch_id, history_id, timestamp, level, message, query_type - FROM query_logs - WHERE history_id = ? - ORDER BY id ASC - ''', (history_id,)) - - rows = cursor.fetchall() - logs = [] - for row in rows: - logs.append({ - 'batch_id': row['batch_id'], - 'history_id': row['history_id'], - 'timestamp': row['timestamp'], - 'level': row['level'], - 'message': row['message'], - 'query_type': row['query_type'] - }) - - conn.close() - return logs - except Exception as e: - print(f"Warning: Failed to get logs by history_id: {e}") - return [] - - def get_logs_grouped_by_batch(self, limit=None, from_db=True): - """按批次分组获取日志""" - logs = self.get_logs(limit, from_db) - grouped_logs = {} - batch_order = [] - - for log in logs: - batch_id = log.get('batch_id', 'unknown') - if batch_id not in grouped_logs: - grouped_logs[batch_id] = [] - batch_order.append(batch_id) - grouped_logs[batch_id].append(log) - - # 返回按时间顺序排列的批次 - return [(batch_id, grouped_logs[batch_id]) for batch_id in batch_order] - - def clear_logs(self, clear_db=True): - """清空日志""" - # 清空内存 - self.logs.clear() - self.current_batch_id = None - self.batch_counter = 0 - - # 清空数据库 - if clear_db: - try: - conn = sqlite3.connect(self.db_path, timeout=30) - cursor = conn.cursor() - cursor.execute('DELETE FROM query_logs') - conn.commit() - conn.close() - except Exception as e: - print(f"Warning: Failed to clear logs from database: {e}") - - def cleanup_old_logs(self, days_to_keep=30): - """清理旧日志,保留指定天数的日志""" - try: - conn = sqlite3.connect(self.db_path, timeout=30) - cursor = conn.cursor() - - # 删除超过指定天数的日志 - cutoff_date = datetime.now() - timedelta(days=days_to_keep) - cursor.execute(''' - DELETE FROM query_logs - WHERE created_at < ? - ''', (cutoff_date.strftime('%Y-%m-%d %H:%M:%S'),)) - - deleted_count = cursor.rowcount - conn.commit() - conn.close() - - logger.info(f"清理了 {deleted_count} 条超过 {days_to_keep} 天的旧日志") - return deleted_count - except Exception as e: - logger.error(f"清理旧日志失败: {e}") - return 0 - -# 全局日志收集器实例 +# 创建查询日志收集器 query_log_collector = QueryLogCollector() -# 自定义日志处理器 -class CollectorHandler(logging.Handler): - def __init__(self, collector): - super().__init__() - self.collector = collector - - def emit(self, record): - self.collector.add_log(record.levelname, record.getMessage()) - -# 添加收集器处理器到logger +# 设置日志处理器,将应用日志记录到查询日志中 collector_handler = CollectorHandler(query_log_collector) -logger.addHandler(collector_handler) +collector_handler.setLevel(logging.INFO) -class ShardingCalculator: - """分表计算器,基于TWCS策略""" - - def __init__(self, interval_seconds=604800, table_count=14): - """ - 初始化分表计算器 - :param interval_seconds: 时间间隔(秒),默认604800(7天) - :param table_count: 分表数量,默认14 - """ - self.interval_seconds = interval_seconds - self.table_count = table_count - - def extract_timestamp_from_key(self, key): - """ - 从Key中提取时间戳 - 新规则:优先提取最后一个下划线后的数字,如果没有下划线则提取最后连续的数字部分 - """ - if not key: - return None - - key_str = str(key) - - # 方法1:如果包含下划线,尝试提取最后一个下划线后的部分 - if '_' in key_str: - parts = key_str.split('_') - last_part = parts[-1] - # 检查最后一部分是否为纯数字 - if last_part.isdigit(): - timestamp = int(last_part) - logger.info(f"Key '{key}' 通过下划线分割提取到时间戳: {timestamp}") - return timestamp - - # 方法2:使用正则表达式找到所有数字序列,取最后一个较长的 - number_sequences = re.findall(r'\d+', key_str) - - if not number_sequences: - logger.warning(f"Key '{key}' 中没有找到数字字符") - return None - - # 如果有多个数字序列,优先选择最长的,如果长度相同则选择最后一个 - longest_sequence = max(number_sequences, key=len) - - # 如果最长的有多个,选择最后一个最长的 - max_length = len(longest_sequence) - last_longest = None - for seq in number_sequences: - if len(seq) == max_length: - last_longest = seq - - try: - timestamp = int(last_longest) - logger.info(f"Key '{key}' 通过数字序列提取到时间戳: {timestamp} (从序列 {number_sequences} 中选择)") - return timestamp - except ValueError: - logger.error(f"Key '{key}' 时间戳转换失败: {last_longest}") - return None - - def calculate_shard_index(self, timestamp): - """ - 计算分表索引 - 公式:timestamp // interval_seconds % table_count - """ - if timestamp is None: - return None - return int(timestamp) // self.interval_seconds % self.table_count - - def get_shard_table_name(self, base_table_name, key): - """ - 根据Key获取对应的分表名称 - """ - timestamp = self.extract_timestamp_from_key(key) - if timestamp is None: - return None - - shard_index = self.calculate_shard_index(timestamp) - return f"{base_table_name}_{shard_index}" - - def get_all_shard_tables_for_keys(self, base_table_name, keys): - """ - 为一批Keys计算所有需要查询的分表 - 返回: {shard_table_name: [keys_for_this_shard], ...} - """ - shard_mapping = {} - failed_keys = [] - calculation_stats = { - 'total_keys': len(keys), - 'successful_extractions': 0, - 'failed_extractions': 0, - 'unique_shards': 0 - } - - for key in keys: - shard_table = self.get_shard_table_name(base_table_name, key) - if shard_table: - if shard_table not in shard_mapping: - shard_mapping[shard_table] = [] - shard_mapping[shard_table].append(key) - calculation_stats['successful_extractions'] += 1 - else: - failed_keys.append(key) - calculation_stats['failed_extractions'] += 1 - - calculation_stats['unique_shards'] = len(shard_mapping) - - return shard_mapping, failed_keys, calculation_stats +# 为特定的logger添加收集器 +cassandra_logger = logging.getLogger('modules.cassandra_client') +query_logger = logging.getLogger('modules.query_engine') +comparison_logger = logging.getLogger('modules.data_comparison') +sharding_logger = logging.getLogger('modules.sharding') -def init_database(): - """初始化数据库""" - try: - conn = sqlite3.connect(DATABASE_PATH) - cursor = conn.cursor() - - # 创建配置组表 - cursor.execute(''' - CREATE TABLE IF NOT EXISTS config_groups ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - name TEXT NOT NULL UNIQUE, - description TEXT, - pro_config TEXT NOT NULL, - test_config TEXT NOT NULL, - query_config TEXT NOT NULL, - sharding_config TEXT, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP - ) - ''') - - # 创建查询历史表,包含分表配置字段 - cursor.execute(''' - CREATE TABLE IF NOT EXISTS query_history ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - name TEXT NOT NULL, - description TEXT, - pro_config TEXT NOT NULL, - test_config TEXT NOT NULL, - query_config TEXT NOT NULL, - query_keys TEXT NOT NULL, - results_summary TEXT NOT NULL, - execution_time REAL NOT NULL, - total_keys INTEGER NOT NULL, - differences_count INTEGER NOT NULL, - identical_count INTEGER NOT NULL, - sharding_config TEXT, - query_type TEXT DEFAULT 'single', - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP - ) - ''') - - # 创建分表配置组表 - cursor.execute(''' - CREATE TABLE IF NOT EXISTS sharding_config_groups ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - name TEXT NOT NULL UNIQUE, - description TEXT, - pro_config TEXT NOT NULL, - test_config TEXT NOT NULL, - query_config TEXT NOT NULL, - sharding_config TEXT NOT NULL, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP - ) - ''') - - # 创建查询日志表 - cursor.execute(''' - CREATE TABLE IF NOT EXISTS query_logs ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - batch_id TEXT NOT NULL, - history_id INTEGER, - timestamp TEXT NOT NULL, - level TEXT NOT NULL, - message TEXT NOT NULL, - query_type TEXT DEFAULT 'single', - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - FOREIGN KEY (history_id) REFERENCES query_history (id) ON DELETE CASCADE - ) - ''') - - # 创建索引 - cursor.execute('CREATE INDEX IF NOT EXISTS idx_query_logs_batch_id ON query_logs(batch_id)') - cursor.execute('CREATE INDEX IF NOT EXISTS idx_query_logs_history_id ON query_logs(history_id)') - cursor.execute('CREATE INDEX IF NOT EXISTS idx_query_logs_timestamp ON query_logs(timestamp)') - cursor.execute('CREATE INDEX IF NOT EXISTS idx_query_logs_level ON query_logs(level)') - - conn.commit() - conn.close() - logger.info("数据库初始化完成") - return True - except Exception as e: - logger.error(f"数据库初始化失败: {e}") - return False +cassandra_logger.addHandler(collector_handler) +query_logger.addHandler(collector_handler) +comparison_logger.addHandler(collector_handler) +sharding_logger.addHandler(collector_handler) -def ensure_database(): - """确保数据库和表存在""" - if not os.path.exists(DATABASE_PATH): - logger.info("数据库文件不存在,正在创建...") - return init_database() - - # 检查表是否存在 - try: - conn = sqlite3.connect(DATABASE_PATH) - cursor = conn.cursor() - cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name IN ('config_groups', 'query_history', 'sharding_config_groups', 'query_logs')") - results = cursor.fetchall() - existing_tables = [row[0] for row in results] - - required_tables = ['config_groups', 'query_history', 'sharding_config_groups', 'query_logs'] - missing_tables = [table for table in required_tables if table not in existing_tables] - - if missing_tables: - logger.info(f"数据库表不完整,缺少表:{missing_tables},正在重新创建...") - return init_database() - - # 检查config_groups表是否有sharding_config字段 - cursor.execute("PRAGMA table_info(config_groups)") - columns = cursor.fetchall() - column_names = [column[1] for column in columns] - - if 'sharding_config' not in column_names: - logger.info("添加sharding_config字段到config_groups表...") - cursor.execute("ALTER TABLE config_groups ADD COLUMN sharding_config TEXT") - conn.commit() - logger.info("sharding_config字段添加成功") - - # 检查query_history表是否有分表相关字段 - cursor.execute("PRAGMA table_info(query_history)") - history_columns = cursor.fetchall() - history_column_names = [column[1] for column in history_columns] - - if 'sharding_config' not in history_column_names: - logger.info("添加sharding_config字段到query_history表...") - cursor.execute("ALTER TABLE query_history ADD COLUMN sharding_config TEXT") - conn.commit() - logger.info("query_history表sharding_config字段添加成功") - - if 'query_type' not in history_column_names: - logger.info("添加query_type字段到query_history表...") - cursor.execute("ALTER TABLE query_history ADD COLUMN query_type TEXT DEFAULT 'single'") - conn.commit() - logger.info("query_history表query_type字段添加成功") - - # 添加查询结果数据存储字段 - if 'raw_results' not in history_column_names: - logger.info("添加raw_results字段到query_history表...") - cursor.execute("ALTER TABLE query_history ADD COLUMN raw_results TEXT") - conn.commit() - logger.info("query_history表raw_results字段添加成功") - - if 'differences_data' not in history_column_names: - logger.info("添加differences_data字段到query_history表...") - cursor.execute("ALTER TABLE query_history ADD COLUMN differences_data TEXT") - conn.commit() - logger.info("query_history表differences_data字段添加成功") - - if 'identical_data' not in history_column_names: - logger.info("添加identical_data字段到query_history表...") - cursor.execute("ALTER TABLE query_history ADD COLUMN identical_data TEXT") - conn.commit() - logger.info("query_history表identical_data字段添加成功") - - # 检查query_logs表是否存在history_id字段 - cursor.execute("PRAGMA table_info(query_logs)") - logs_columns = cursor.fetchall() - logs_column_names = [column[1] for column in logs_columns] - - if 'history_id' not in logs_column_names: - logger.info("添加history_id字段到query_logs表...") - cursor.execute("ALTER TABLE query_logs ADD COLUMN history_id INTEGER") - # 创建外键索引 - cursor.execute('CREATE INDEX IF NOT EXISTS idx_query_logs_history_id ON query_logs(history_id)') - conn.commit() - logger.info("query_logs表history_id字段添加成功") - - conn.close() - return True - except Exception as e: - logger.error(f"检查数据库表失败: {e}") - return init_database() - -def get_db_connection(): - """获取数据库连接""" - conn = sqlite3.connect(DATABASE_PATH) - conn.row_factory = sqlite3.Row - return conn - -def normalize_json_string(value): - """标准化JSON字符串,用于比较""" - if not isinstance(value, str): - return value - - try: - # 尝试解析JSON - json_obj = json.loads(value) - - # 如果是数组,需要进行特殊处理 - if isinstance(json_obj, list): - # 尝试对数组元素进行标准化排序 - normalized_array = normalize_json_array(json_obj) - return json.dumps(normalized_array, sort_keys=True, separators=(',', ':')) - else: - # 普通对象,直接序列化 - return json.dumps(json_obj, sort_keys=True, separators=(',', ':')) - except (json.JSONDecodeError, TypeError): - # 如果不是JSON,返回原值 - return value - -def normalize_json_array(json_array): - """标准化JSON数组,处理元素顺序问题""" - try: - normalized_elements = [] - - for element in json_array: - if isinstance(element, dict): - # 对字典元素进行标准化 - normalized_elements.append(json.dumps(element, sort_keys=True, separators=(',', ':'))) - elif isinstance(element, str): - # 如果是字符串,尝试解析为JSON - try: - parsed_element = json.loads(element) - normalized_elements.append(json.dumps(parsed_element, sort_keys=True, separators=(',', ':'))) - except: - normalized_elements.append(element) - else: - normalized_elements.append(element) - - # 对标准化后的元素进行排序,确保顺序一致 - normalized_elements.sort() - - # 重新解析为对象数组 - result_array = [] - for element in normalized_elements: - if isinstance(element, str): - try: - result_array.append(json.loads(element)) - except: - result_array.append(element) - else: - result_array.append(element) - - return result_array - - except Exception as e: - logger.warning(f"数组标准化失败: {e}") - return json_array - -def is_json_array_field(value): - """检查字段是否为JSON数组格式""" - if not isinstance(value, (str, list)): - return False - - try: - if isinstance(value, str): - parsed = json.loads(value) - return isinstance(parsed, list) - elif isinstance(value, list): - # 检查是否为JSON字符串数组 - if len(value) > 0 and isinstance(value[0], str): - try: - json.loads(value[0]) - return True - except: - return False - return True - except: - return False - -def compare_array_values(value1, value2): - """专门用于比较数组类型的值""" - try: - # 处理字符串表示的数组 - if isinstance(value1, str) and isinstance(value2, str): - try: - array1 = json.loads(value1) - array2 = json.loads(value2) - if isinstance(array1, list) and isinstance(array2, list): - return compare_json_arrays(array1, array2) - except: - pass - - # 处理Python列表类型 - elif isinstance(value1, list) and isinstance(value2, list): - return compare_json_arrays(value1, value2) - - # 处理混合情况:一个是字符串数组,一个是列表 - elif isinstance(value1, list) and isinstance(value2, str): - try: - array2 = json.loads(value2) - if isinstance(array2, list): - return compare_json_arrays(value1, array2) - except: - pass - elif isinstance(value1, str) and isinstance(value2, list): - try: - array1 = json.loads(value1) - if isinstance(array1, list): - return compare_json_arrays(array1, value2) - except: - pass - - return False - except Exception as e: - logger.warning(f"数组比较失败: {e}") - return False - -def compare_json_arrays(array1, array2): - """比较两个JSON数组,忽略元素顺序""" - try: - if len(array1) != len(array2): - return False - - # 标准化两个数组 - normalized_array1 = normalize_json_array(array1.copy()) - normalized_array2 = normalize_json_array(array2.copy()) - - # 将标准化后的数组转换为可比较的格式 - comparable1 = json.dumps(normalized_array1, sort_keys=True) - comparable2 = json.dumps(normalized_array2, sort_keys=True) - - return comparable1 == comparable2 - - except Exception as e: - logger.warning(f"JSON数组比较失败: {e}") - return False - -def format_json_for_display(value): - """格式化JSON用于显示""" - if not isinstance(value, str): - return str(value) - - try: - # 尝试解析JSON - json_obj = json.loads(value) - # 格式化显示(带缩进) - return json.dumps(json_obj, sort_keys=True, indent=2, ensure_ascii=False) - except (json.JSONDecodeError, TypeError): - # 如果不是JSON,返回原值 - return str(value) - -def is_json_field(value): - """检查字段是否为JSON格式""" - if not isinstance(value, str): - return False - - try: - json.loads(value) - return True - except (json.JSONDecodeError, TypeError): - return False - -def compare_values(value1, value2): - """智能比较两个值,支持JSON标准化和数组比较""" - # 首先检查是否为数组类型 - if is_json_array_field(value1) or is_json_array_field(value2): - return compare_array_values(value1, value2) - - # 如果两个值都是字符串,尝试JSON标准化比较 - if isinstance(value1, str) and isinstance(value2, str): - normalized_value1 = normalize_json_string(value1) - normalized_value2 = normalize_json_string(value2) - return normalized_value1 == normalized_value2 - - # 其他情况直接比较 - return value1 == value2 - -# 默认配置(不显示敏感信息) -DEFAULT_CONFIG = { - 'pro_config': { - 'cluster_name': '', - 'hosts': [], - 'port': 9042, - 'datacenter': '', - 'username': '', - 'password': '', - 'keyspace': '', - 'table': '' - }, - 'test_config': { - 'cluster_name': '', - 'hosts': [], - 'port': 9042, - 'datacenter': '', - 'username': '', - 'password': '', - 'keyspace': '', - 'table': '' - }, - 'keys': [], - 'fields_to_compare': [], - 'exclude_fields': [] -} - -def save_config_group(name, description, pro_config, test_config, query_config, sharding_config=None): - """保存配置组""" - if not ensure_database(): - logger.error("数据库初始化失败") - return False - - conn = get_db_connection() - cursor = conn.cursor() - - try: - cursor.execute(''' - INSERT OR REPLACE INTO config_groups - (name, description, pro_config, test_config, query_config, sharding_config, updated_at) - VALUES (?, ?, ?, ?, ?, ?, ?) - ''', ( - name, description, - json.dumps(pro_config), - json.dumps(test_config), - json.dumps(query_config), - json.dumps(sharding_config) if sharding_config else None, - datetime.now().isoformat() - )) - conn.commit() - logger.info(f"配置组 '{name}' 保存成功,包含分表配置: {sharding_config is not None}") - return True - except Exception as e: - logger.error(f"保存配置组失败: {e}") - return False - finally: - conn.close() - -def get_config_groups(): - """获取所有配置组""" - if not ensure_database(): - logger.error("数据库初始化失败") - return [] - - conn = get_db_connection() - cursor = conn.cursor() - - try: - cursor.execute(''' - SELECT id, name, description, created_at, updated_at - FROM config_groups - ORDER BY updated_at DESC - ''') - rows = cursor.fetchall() - - config_groups = [] - for row in rows: - config_groups.append({ - 'id': row['id'], - 'name': row['name'], - 'description': row['description'], - 'created_at': row['created_at'], - 'updated_at': row['updated_at'] - }) - - return config_groups - except Exception as e: - logger.error(f"获取配置组失败: {e}") - return [] - finally: - conn.close() - -def get_config_group_by_id(group_id): - """根据ID获取配置组详情""" - if not ensure_database(): - logger.error("数据库初始化失败") - return None - - conn = get_db_connection() - cursor = conn.cursor() - - try: - cursor.execute(''' - SELECT id, name, description, pro_config, test_config, query_config, - sharding_config, created_at, updated_at - FROM config_groups WHERE id = ? - ''', (group_id,)) - row = cursor.fetchone() - - if row: - config = { - 'id': row['id'], - 'name': row['name'], - 'description': row['description'], - 'pro_config': json.loads(row['pro_config']), - 'test_config': json.loads(row['test_config']), - 'query_config': json.loads(row['query_config']), - 'created_at': row['created_at'], - 'updated_at': row['updated_at'] - } - - # 添加分表配置 - if row['sharding_config']: - try: - config['sharding_config'] = json.loads(row['sharding_config']) - except (json.JSONDecodeError, TypeError): - config['sharding_config'] = None - else: - config['sharding_config'] = None - - return config - return None - except Exception as e: - logger.error(f"获取配置组详情失败: {e}") - return None - finally: - conn.close() - -def delete_config_group(group_id): - """删除配置组""" - if not ensure_database(): - logger.error("数据库初始化失败") - return False - - conn = get_db_connection() - cursor = conn.cursor() - - try: - cursor.execute('DELETE FROM config_groups WHERE id = ?', (group_id,)) - conn.commit() - success = cursor.rowcount > 0 - if success: - logger.info(f"配置组ID {group_id} 删除成功") - return success - except Exception as e: - logger.error(f"删除配置组失败: {e}") - return False - finally: - conn.close() - -def save_query_history(name, description, pro_config, test_config, query_config, query_keys, - results_summary, execution_time, total_keys, differences_count, identical_count, - sharding_config=None, query_type='single', raw_results=None, differences_data=None, identical_data=None): - """保存查询历史记录,支持分表查询和查询结果数据,返回历史记录ID""" - if not ensure_database(): - logger.error("数据库初始化失败") - return None - - conn = get_db_connection() - cursor = conn.cursor() - - try: - cursor.execute(''' - INSERT INTO query_history - (name, description, pro_config, test_config, query_config, query_keys, - results_summary, execution_time, total_keys, differences_count, identical_count, - sharding_config, query_type, raw_results, differences_data, identical_data) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) - ''', ( - name, description, - json.dumps(pro_config), - json.dumps(test_config), - json.dumps(query_config), - json.dumps(query_keys), - json.dumps(results_summary), - execution_time, - total_keys, - differences_count, - identical_count, - json.dumps(sharding_config) if sharding_config else None, - query_type, - json.dumps(raw_results) if raw_results else None, - json.dumps(differences_data) if differences_data else None, - json.dumps(identical_data) if identical_data else None - )) - - # 获取插入记录的ID - history_id = cursor.lastrowid - conn.commit() - logger.info(f"查询历史记录 '{name}' 保存成功,查询类型:{query_type},ID:{history_id}") - return history_id - except Exception as e: - logger.error(f"保存查询历史记录失败: {e}") - return None - finally: - conn.close() - -def get_query_history(): - """获取所有查询历史记录""" - if not ensure_database(): - logger.error("数据库初始化失败") - return [] - - conn = get_db_connection() - cursor = conn.cursor() - - try: - cursor.execute(''' - SELECT id, name, description, execution_time, total_keys, - differences_count, identical_count, created_at, query_type - FROM query_history - ORDER BY created_at DESC - ''') - rows = cursor.fetchall() - - history_list = [] - for row in rows: - # 获取列名列表以检查字段是否存在 - column_names = [desc[0] for desc in cursor.description] - history_list.append({ - 'id': row['id'], - 'name': row['name'], - 'description': row['description'], - 'execution_time': row['execution_time'], - 'total_keys': row['total_keys'], - 'differences_count': row['differences_count'], - 'identical_count': row['identical_count'], - 'created_at': row['created_at'], - 'query_type': row['query_type'] if 'query_type' in column_names else 'single' - }) - - return history_list - except Exception as e: - logger.error(f"获取查询历史记录失败: {e}") - return [] - finally: - conn.close() - -def get_query_history_by_id(history_id): - """根据ID获取查询历史记录详情""" - if not ensure_database(): - logger.error("数据库初始化失败") - return None - - conn = get_db_connection() - cursor = conn.cursor() - - try: - cursor.execute(''' - SELECT * FROM query_history WHERE id = ? - ''', (history_id,)) - row = cursor.fetchone() - - if row: - # 获取列名列表以检查字段是否存在 - column_names = [desc[0] for desc in cursor.description] - return { - 'id': row['id'], - 'name': row['name'], - 'description': row['description'], - 'pro_config': json.loads(row['pro_config']), - 'test_config': json.loads(row['test_config']), - 'query_config': json.loads(row['query_config']), - 'query_keys': json.loads(row['query_keys']), - 'results_summary': json.loads(row['results_summary']), - 'execution_time': row['execution_time'], - 'total_keys': row['total_keys'], - 'differences_count': row['differences_count'], - 'identical_count': row['identical_count'], - 'created_at': row['created_at'], - # 处理新字段,保持向后兼容 - 'sharding_config': json.loads(row['sharding_config']) if 'sharding_config' in column_names and row['sharding_config'] else None, - 'query_type': row['query_type'] if 'query_type' in column_names else 'single', - # 添加查询结果数据支持 - 'raw_results': json.loads(row['raw_results']) if 'raw_results' in column_names and row['raw_results'] else None, - 'differences_data': json.loads(row['differences_data']) if 'differences_data' in column_names and row['differences_data'] else None, - 'identical_data': json.loads(row['identical_data']) if 'identical_data' in column_names and row['identical_data'] else None - } - return None - except Exception as e: - logger.error(f"获取查询历史记录详情失败: {e}") - return None - finally: - conn.close() - -def delete_query_history(history_id): - """删除查询历史记录""" - if not ensure_database(): - logger.error("数据库初始化失败") - return False - - conn = get_db_connection() - cursor = conn.cursor() - - try: - cursor.execute('DELETE FROM query_history WHERE id = ?', (history_id,)) - conn.commit() - success = cursor.rowcount > 0 - if success: - logger.info(f"查询历史记录ID {history_id} 删除成功") - return success - except Exception as e: - logger.error(f"删除查询历史记录失败: {e}") - return False - finally: - conn.close() - -def create_connection(config): - """创建Cassandra连接,带有增强的错误诊断和容错机制""" - start_time = time.time() - - logger.info(f"=== 开始创建Cassandra连接 ===") - logger.info(f"主机列表: {config.get('hosts', [])}") - logger.info(f"端口: {config.get('port', 9042)}") - logger.info(f"用户名: {config.get('username', 'N/A')}") - logger.info(f"Keyspace: {config.get('keyspace', 'N/A')}") - - try: - logger.info("正在创建认证提供者...") - auth_provider = PlainTextAuthProvider(username=config['username'], password=config['password']) - - logger.info("正在创建集群连接...") - # 设置连接池配置,提高容错性 - from cassandra.policies import DCAwareRoundRobinPolicy - - # 设置负载均衡策略,避免单点故障 - load_balancing_policy = DCAwareRoundRobinPolicy(local_dc=config.get('datacenter', 'dc1')) - - # 创建连接配置,增加容错参数 - cluster = Cluster( - config['hosts'], - port=config['port'], - auth_provider=auth_provider, - load_balancing_policy=load_balancing_policy, - # 增加容错配置 - protocol_version=4, # 使用稳定的协议版本 - connect_timeout=15, # 连接超时 - control_connection_timeout=15, # 控制连接超时 - max_schema_agreement_wait=30 # schema同步等待时间 - ) - - logger.info("正在连接到Keyspace...") - session = cluster.connect(config['keyspace']) - - # 设置session级别的容错参数 - session.default_timeout = 30 # 查询超时时间 - - connection_time = time.time() - start_time - logger.info(f"✅ Cassandra连接成功: 连接时间={connection_time:.3f}秒") - - # 记录集群状态 - try: - cluster_name = cluster.metadata.cluster_name or "Unknown" - logger.info(f" 集群名称: {cluster_name}") - - # 记录可用主机状态 - live_hosts = [str(host.address) for host in cluster.metadata.all_hosts() if host.is_up] - down_hosts = [str(host.address) for host in cluster.metadata.all_hosts() if not host.is_up] - - logger.info(f" 可用节点: {live_hosts} ({len(live_hosts)}个)") - if down_hosts: - logger.warning(f" 故障节点: {down_hosts} ({len(down_hosts)}个)") - - except Exception as meta_error: - logger.warning(f"无法获取集群元数据: {meta_error}") - - return cluster, session - - except Exception as e: - connection_time = time.time() - start_time - error_msg = str(e) - - logger.error(f"❌ Cassandra连接失败: 连接时间={connection_time:.3f}秒") - logger.error(f"错误类型: {type(e).__name__}") - logger.error(f"错误详情: {error_msg}") - - # 提供详细的诊断信息 - if "connection refused" in error_msg.lower() or "unable to connect" in error_msg.lower(): - logger.error("❌ 诊断:无法连接到Cassandra服务器") - logger.error("🔧 建议检查:") - logger.error(" 1. Cassandra服务是否启动") - logger.error(" 2. 主机地址和端口是否正确") - logger.error(" 3. 网络防火墙是否阻挡连接") - - elif "timeout" in error_msg.lower(): - logger.error("❌ 诊断:连接超时") - logger.error("🔧 建议检查:") - logger.error(" 1. 网络延迟是否过高") - logger.error(" 2. Cassandra服务器负载是否过高") - logger.error(" 3. 增加连接超时时间") - - elif "authentication" in error_msg.lower() or "unauthorized" in error_msg.lower(): - logger.error("❌ 诊断:认证失败") - logger.error("🔧 建议检查:") - logger.error(" 1. 用户名和密码是否正确") - logger.error(" 2. 用户是否有访问该keyspace的权限") - - elif "keyspace" in error_msg.lower(): - logger.error("❌ 诊断:Keyspace不存在") - logger.error("🔧 建议检查:") - logger.error(" 1. Keyspace名称是否正确") - logger.error(" 2. Keyspace是否已创建") - - else: - logger.error("❌ 诊断:未知连接错误") - logger.error("🔧 建议:") - logger.error(" 1. 检查所有连接参数") - logger.error(" 2. 查看Cassandra服务器日志") - logger.error(" 3. 测试网络连通性") - - return None, None - -def execute_query(session, table, keys, fields, values, exclude_fields=None): - """执行查询,支持单主键和复合主键""" - try: - # 参数验证 - if not keys or len(keys) == 0: - logger.error("Keys参数为空,无法构建查询") - return [] - - if not values or len(values) == 0: - logger.error("Values参数为空,无法构建查询") - return [] - - # 构建查询条件 - if len(keys) == 1: - # 单主键查询(保持原有逻辑) - quoted_values = [f"'{value}'" for value in values] - query_conditions = f"{keys[0]} IN ({', '.join(quoted_values)})" - else: - # 复合主键查询 - conditions = [] - for value in values: - # 检查value是否包含复合主键分隔符 - if isinstance(value, str) and ',' in value: - # 解析复合主键值 - key_values = [v.strip() for v in value.split(',')] - if len(key_values) == len(keys): - # 构建单个复合主键条件: (key1='val1' AND key2='val2') - key_conditions = [] - for i, (key, val) in enumerate(zip(keys, key_values)): - key_conditions.append(f"{key} = '{val}'") - conditions.append(f"({' AND '.join(key_conditions)})") - else: - logger.warning(f"复合主键值 '{value}' 的字段数量({len(key_values)})与主键字段数量({len(keys)})不匹配") - # 将其作为第一个主键的值处理 - conditions.append(f"{keys[0]} = '{value}'") - else: - # 单值,作为第一个主键的值处理 - conditions.append(f"{keys[0]} = '{value}'") - - if conditions: - query_conditions = ' OR '.join(conditions) - else: - logger.error("无法构建有效的查询条件") - return [] - - # 确定要查询的字段 - if fields: - fields_str = ", ".join(fields) - else: - fields_str = "*" - - query_sql = f"SELECT {fields_str} FROM {table} WHERE {query_conditions};" - - # 记录查询SQL日志 - logger.info(f"执行查询SQL: {query_sql}") - if len(keys) > 1: - logger.info(f"复合主键查询参数: 表={table}, 主键字段={keys}, 字段={fields_str}, Key数量={len(values)}") - else: - logger.info(f"单主键查询参数: 表={table}, 主键字段={keys[0]}, 字段={fields_str}, Key数量={len(values)}") - - # 执行查询 - start_time = time.time() - result = session.execute(query_sql) - execution_time = time.time() - start_time - - result_list = list(result) if result else [] - logger.info(f"查询完成: 执行时间={execution_time:.3f}秒, 返回记录数={len(result_list)}") - - return result_list - except Exception as e: - logger.error(f"查询执行失败: SQL={query_sql if 'query_sql' in locals() else 'N/A'}, 错误={str(e)}") - return [] - -def execute_sharding_query(session, shard_mapping, keys, fields, exclude_fields=None): - """ - 执行分表查询 - :param session: Cassandra会话 - :param shard_mapping: 分表映射 {table_name: [keys]} - :param keys: 主键字段名列表 - :param fields: 要查询的字段列表 - :param exclude_fields: 要排除的字段列表 - :return: (查询结果列表, 查询到的表列表, 查询失败的表列表) - """ - all_results = [] - queried_tables = [] - error_tables = [] - - logger.info(f"开始执行分表查询,涉及 {len(shard_mapping)} 张分表") - total_start_time = time.time() - - for table_name, table_keys in shard_mapping.items(): - try: - logger.info(f"查询分表 {table_name},包含 {len(table_keys)} 个key: {table_keys}") - # 为每个分表执行查询 - table_results = execute_query(session, table_name, keys, fields, table_keys, exclude_fields) - all_results.extend(table_results) - queried_tables.append(table_name) - logger.info(f"分表 {table_name} 查询成功,返回 {len(table_results)} 条记录") - except Exception as e: - logger.error(f"分表 {table_name} 查询失败: {e}") - error_tables.append(table_name) - - total_execution_time = time.time() - total_start_time - logger.info(f"分表查询总计完成: 执行时间={total_execution_time:.3f}秒, 成功表数={len(queried_tables)}, 失败表数={len(error_tables)}, 总记录数={len(all_results)}") - - return all_results, queried_tables, error_tables - -def execute_mixed_query(pro_session, test_session, pro_config, test_config, keys, fields_to_compare, values, exclude_fields, sharding_config): - """ - 执行混合查询(生产环境分表,测试环境可能单表或分表) - """ - results = { - 'pro_data': [], - 'test_data': [], - 'sharding_info': { - 'calculation_stats': {} - } - } - - # 处理生产环境查询 - if sharding_config.get('use_sharding_for_pro', False): - # 获取生产环境分表配置参数,优先使用专用参数,否则使用通用参数 - pro_interval = sharding_config.get('pro_interval_seconds') or sharding_config.get('interval_seconds', 604800) - pro_table_count = sharding_config.get('pro_table_count') or sharding_config.get('table_count', 14) - - # 记录生产环境分表配置信息 - logger.info(f"=== 生产环境分表配置 ===") - logger.info(f"启用分表查询: True") - logger.info(f"时间间隔: {pro_interval}秒 ({pro_interval//86400}天)") - logger.info(f"分表数量: {pro_table_count}张") - logger.info(f"基础表名: {pro_config['table']}") - - pro_calculator = ShardingCalculator( - interval_seconds=pro_interval, - table_count=pro_table_count - ) - pro_shard_mapping, pro_failed_keys, pro_calc_stats = pro_calculator.get_all_shard_tables_for_keys( - pro_config['table'], values - ) - - logger.info(f"生产环境分表映射结果: 涉及{len(pro_shard_mapping)}张分表, 失败Key数量: {len(pro_failed_keys)}") - - pro_data, pro_queried_tables, pro_error_tables = execute_sharding_query( - pro_session, pro_shard_mapping, keys, fields_to_compare, exclude_fields - ) - - results['pro_data'] = pro_data - results['sharding_info']['pro_shards'] = { - 'enabled': True, - 'interval_seconds': sharding_config.get('pro_interval_seconds', 604800), - 'table_count': sharding_config.get('pro_table_count', 14), - 'queried_tables': pro_queried_tables, - 'error_tables': pro_error_tables, - 'failed_keys': pro_failed_keys - } - results['sharding_info']['calculation_stats'].update(pro_calc_stats) - else: - # 生产环境单表查询 - logger.info(f"=== 生产环境单表配置 ===") - logger.info(f"启用分表查询: False") - logger.info(f"表名: {pro_config['table']}") - - pro_data = execute_query(pro_session, pro_config['table'], keys, fields_to_compare, values, exclude_fields) - results['pro_data'] = pro_data - results['sharding_info']['pro_shards'] = { - 'enabled': False, - 'queried_tables': [pro_config['table']] - } - - # 处理测试环境查询 - if sharding_config.get('use_sharding_for_test', False): - # 获取测试环境分表配置参数,优先使用专用参数,否则使用通用参数 - test_interval = sharding_config.get('test_interval_seconds') or sharding_config.get('interval_seconds', 604800) - test_table_count = sharding_config.get('test_table_count') or sharding_config.get('table_count', 14) - - # 记录测试环境分表配置信息 - logger.info(f"=== 测试环境分表配置 ===") - logger.info(f"启用分表查询: True") - logger.info(f"时间间隔: {test_interval}秒 ({test_interval//86400}天)") - logger.info(f"分表数量: {test_table_count}张") - logger.info(f"基础表名: {test_config['table']}") - - test_calculator = ShardingCalculator( - interval_seconds=test_interval, - table_count=test_table_count - ) - test_shard_mapping, test_failed_keys, test_calc_stats = test_calculator.get_all_shard_tables_for_keys( - test_config['table'], values - ) - - logger.info(f"测试环境分表映射结果: 涉及{len(test_shard_mapping)}张分表, 失败Key数量: {len(test_failed_keys)}") - - test_data, test_queried_tables, test_error_tables = execute_sharding_query( - test_session, test_shard_mapping, keys, fields_to_compare, exclude_fields - ) - - results['test_data'] = test_data - results['sharding_info']['test_shards'] = { - 'enabled': True, - 'interval_seconds': test_interval, - 'table_count': test_table_count, - 'queried_tables': test_queried_tables, - 'error_tables': test_error_tables, - 'failed_keys': test_failed_keys - } - - # 合并计算统计信息 - if not results['sharding_info']['calculation_stats']: - results['sharding_info']['calculation_stats'] = test_calc_stats - else: - # 测试环境单表查询 - logger.info(f"=== 测试环境单表配置 ===") - logger.info(f"启用分表查询: False") - logger.info(f"表名: {test_config['table']}") - - test_data = execute_query(test_session, test_config['table'], keys, fields_to_compare, values, exclude_fields) - results['test_data'] = test_data - results['sharding_info']['test_shards'] = { - 'enabled': False, - 'queried_tables': [test_config['table']] - } - - return results - -def compare_results(pro_data, test_data, keys, fields_to_compare, exclude_fields, values): - """比较查询结果,支持复合主键""" - differences = [] - field_diff_count = {} - identical_results = [] # 存储相同的结果 - - def match_composite_key(row, composite_value, keys): - """检查数据行是否匹配复合主键值""" - if len(keys) == 1: - # 单主键匹配 - return getattr(row, keys[0]) == composite_value - else: - # 复合主键匹配 - if isinstance(composite_value, str) and ',' in composite_value: - key_values = [v.strip() for v in composite_value.split(',')] - if len(key_values) == len(keys): - return all(str(getattr(row, key)) == key_val for key, key_val in zip(keys, key_values)) - # 如果不是复合值,只匹配第一个主键 - return getattr(row, keys[0]) == composite_value - - for value in values: - # 查找生产表和测试表中该主键值的相关数据 - rows_pro = [row for row in pro_data if match_composite_key(row, value, keys)] - rows_test = [row for row in test_data if match_composite_key(row, value, keys)] - - for row_pro in rows_pro: - # 在测试表中查找相同主键的行 - row_test = next( - (row for row in rows_test if all(getattr(row, key) == getattr(row_pro, key) for key in keys)), - None - ) - - if row_test: - # 确定要比较的列 - columns = fields_to_compare if fields_to_compare else row_pro._fields - columns = [col for col in columns if col not in exclude_fields] - - has_difference = False - row_differences = [] - identical_fields = {} - - for column in columns: - value_pro = getattr(row_pro, column) - value_test = getattr(row_test, column) - - # 使用智能比较函数 - if not compare_values(value_pro, value_test): - has_difference = True - # 格式化显示值 - formatted_pro_value = format_json_for_display(value_pro) - formatted_test_value = format_json_for_display(value_test) - - row_differences.append({ - 'key': {key: getattr(row_pro, key) for key in keys}, - 'field': column, - 'pro_value': formatted_pro_value, - 'test_value': formatted_test_value, - 'is_json': is_json_field(value_pro) or is_json_field(value_test), - 'is_array': is_json_array_field(value_pro) or is_json_array_field(value_test) - }) - - # 统计字段差异次数 - field_diff_count[column] = field_diff_count.get(column, 0) + 1 - else: - # 存储相同的字段值 - identical_fields[column] = format_json_for_display(value_pro) - - if has_difference: - differences.extend(row_differences) - else: - # 如果没有差异,存储到相同结果中 - identical_results.append({ - 'key': {key: getattr(row_pro, key) for key in keys}, - 'pro_fields': identical_fields, - 'test_fields': {col: format_json_for_display(getattr(row_test, col)) for col in columns} - }) - else: - # 在测试表中未找到对应行 - differences.append({ - 'key': {key: getattr(row_pro, key) for key in keys}, - 'message': '在测试表中未找到该行' - }) - - # 检查测试表中是否有生产表中不存在的行 - for row_test in rows_test: - row_pro = next( - (row for row in rows_pro if all(getattr(row, key) == getattr(row_test, key) for key in keys)), - None - ) - if not row_pro: - differences.append({ - 'key': {key: getattr(row_test, key) for key in keys}, - 'message': '在生产表中未找到该行' - }) - - return differences, field_diff_count, identical_results - -def generate_comparison_summary(total_keys, pro_count, test_count, differences, identical_results, field_diff_count): - """生成比较总结报告""" - # 计算基本统计 - different_records = len(set([list(diff['key'].values())[0] for diff in differences if 'field' in diff])) - identical_records = len(identical_results) - missing_in_test = len([diff for diff in differences if diff.get('message') == '在测试表中未找到该行']) - missing_in_pro = len([diff for diff in differences if diff.get('message') == '在生产表中未找到该行']) - - # 计算百分比 - def safe_percentage(part, total): - return round((part / total * 100), 2) if total > 0 else 0 - - identical_percentage = safe_percentage(identical_records, total_keys) - different_percentage = safe_percentage(different_records, total_keys) - - # 生成总结 - summary = { - 'overview': { - 'total_keys_queried': total_keys, - 'pro_records_found': pro_count, - 'test_records_found': test_count, - 'identical_records': identical_records, - 'different_records': different_records, - 'missing_in_test': missing_in_test, - 'missing_in_pro': missing_in_pro - }, - 'percentages': { - 'data_consistency': identical_percentage, - 'data_differences': different_percentage, - 'missing_rate': safe_percentage(missing_in_test + missing_in_pro, total_keys) - }, - 'field_analysis': { - 'total_fields_compared': len(field_diff_count) if field_diff_count else 0, - 'most_different_fields': sorted(field_diff_count.items(), key=lambda x: x[1], reverse=True)[:5] if field_diff_count else [] - }, - 'data_quality': { - 'completeness': safe_percentage(pro_count + test_count, total_keys * 2), - 'consistency_score': identical_percentage, - 'quality_level': get_quality_level(identical_percentage) - }, - 'recommendations': generate_recommendations(identical_percentage, missing_in_test, missing_in_pro, field_diff_count) - } - - return summary - -def get_quality_level(consistency_percentage): - """根据一致性百分比获取数据质量等级""" - if consistency_percentage >= 95: - return {'level': '优秀', 'color': 'success', 'description': '数据一致性非常高'} - elif consistency_percentage >= 90: - return {'level': '良好', 'color': 'info', 'description': '数据一致性较高'} - elif consistency_percentage >= 80: - return {'level': '一般', 'color': 'warning', 'description': '数据一致性中等,需要关注'} - else: - return {'level': '较差', 'color': 'danger', 'description': '数据一致性较低,需要重点处理'} - -def generate_recommendations(consistency_percentage, missing_in_test, missing_in_pro, field_diff_count): - """生成改进建议""" - recommendations = [] - - if consistency_percentage < 90: - recommendations.append('建议重点关注数据一致性问题,检查数据同步机制') - - if missing_in_test > 0: - recommendations.append(f'测试环境缺失 {missing_in_test} 条记录,建议检查数据迁移过程') - - if missing_in_pro > 0: - recommendations.append(f'生产环境缺失 {missing_in_pro} 条记录,建议检查数据完整性') - - if field_diff_count: - top_diff_field = max(field_diff_count.items(), key=lambda x: x[1]) - recommendations.append(f'字段 "{top_diff_field[0]}" 差异最多({top_diff_field[1]}次),建议优先处理') - - if not recommendations: - recommendations.append('数据质量良好,建议继续保持当前的数据管理流程') - - return recommendations - -@app.route('/') -def index(): - return render_template('index.html') - -@app.route('/test-config-load') -def test_config_load(): - """配置加载测试页面""" - return send_from_directory('.', 'test_config_load.html') - -@app.route('/db-compare') -def db_compare(): - return render_template('db_compare.html') - -@app.route('/api/sharding-query', methods=['POST']) -def sharding_query_compare(): - """分表查询比对API""" - try: - data = request.json - - # 开始新的查询批次 - batch_id = query_log_collector.start_new_batch('分表') - - logger.info("开始执行分表数据库比对查询") - - # 解析配置 - pro_config = data.get('pro_config', DEFAULT_CONFIG['pro_config']) - test_config = data.get('test_config', DEFAULT_CONFIG['test_config']) - - # 从query_config中获取keys等参数 - query_config = data.get('query_config', {}) - keys = query_config.get('keys', DEFAULT_CONFIG['keys']) - fields_to_compare = query_config.get('fields_to_compare', DEFAULT_CONFIG['fields_to_compare']) - exclude_fields = query_config.get('exclude_fields', DEFAULT_CONFIG['exclude_fields']) - - values = data.get('values', []) - sharding_config = data.get('sharding_config', {}) - - # 参数验证 - if not values: - logger.warning("分表查询失败:未提供查询key值") - return jsonify({'error': '请提供查询key值'}), 400 - - if not keys: - logger.warning("分表查询失败:未提供主键字段") - return jsonify({'error': '请提供主键字段'}), 400 - - # 添加详细的参数日志 - logger.info(f"分表查询参数解析结果:") - logger.info(f" keys: {keys}") - logger.info(f" values数量: {len(values)}") - logger.info(f" fields_to_compare: {fields_to_compare}") - logger.info(f" exclude_fields: {exclude_fields}") - logger.info(f" sharding_config原始数据: {sharding_config}") - logger.info(f" sharding_config具体参数:") - logger.info(f" use_sharding_for_pro: {sharding_config.get('use_sharding_for_pro')}") - logger.info(f" use_sharding_for_test: {sharding_config.get('use_sharding_for_test')}") - logger.info(f" pro_interval_seconds: {sharding_config.get('pro_interval_seconds')}") - logger.info(f" pro_table_count: {sharding_config.get('pro_table_count')}") - logger.info(f" test_interval_seconds: {sharding_config.get('test_interval_seconds')}") - logger.info(f" test_table_count: {sharding_config.get('test_table_count')}") - logger.info(f" interval_seconds: {sharding_config.get('interval_seconds')}") - logger.info(f" table_count: {sharding_config.get('table_count')}") - - logger.info(f"分表查询配置:{len(values)}个key值,生产表:{pro_config['table']},测试表:{test_config['table']}") - - # 创建数据库连接 - pro_cluster, pro_session = create_connection(pro_config) - test_cluster, test_session = create_connection(test_config) - - if not pro_session or not test_session: - logger.error("数据库连接失败") - return jsonify({'error': '数据库连接失败,请检查配置信息'}), 500 - - try: - # 执行混合查询(支持生产环境分表、测试环境单表/分表的组合) - logger.info("执行分表混合查询") - query_results = execute_mixed_query( - pro_session, test_session, pro_config, test_config, - keys, fields_to_compare, values, exclude_fields, sharding_config - ) - - pro_data = query_results['pro_data'] - test_data = query_results['test_data'] - sharding_info = query_results['sharding_info'] - - logger.info(f"分表查询结果:生产表 {len(pro_data)} 条记录,测试表 {len(test_data)} 条记录") - - # 比较结果 - differences, field_diff_count, identical_results = compare_results( - pro_data, test_data, keys, fields_to_compare, exclude_fields, values - ) - - # 统计信息 - different_ids = set() - for diff in differences: - if 'field' in diff: - different_ids.add(list(diff['key'].values())[0]) - - non_different_ids = set(values) - different_ids - - # 生成比较总结 - summary = generate_comparison_summary( - len(values), len(pro_data), len(test_data), - differences, identical_results, field_diff_count - ) - - result = { - 'total_keys': len(values), - 'pro_count': len(pro_data), - 'test_count': len(test_data), - 'differences': differences, - 'identical_results': identical_results, - 'field_diff_count': field_diff_count, - 'different_ids': list(different_ids), - 'non_different_ids': list(non_different_ids), - 'summary': summary, - 'sharding_info': sharding_info, # 包含分表查询信息 - 'raw_pro_data': [dict(row._asdict()) for row in pro_data] if pro_data else [], - 'raw_test_data': [dict(row._asdict()) for row in test_data] if test_data else [] - } - - logger.info(f"分表比对完成:发现 {len(differences)} 处差异") - - # 自动保存分表查询历史记录 - try: - # 生成历史记录名称 - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - history_name = f"分表查询_{timestamp}" - history_description = f"自动保存 - 分表查询{len(values)}个Key,发现{len(differences)}处差异" - - # 保存历史记录 - history_id = save_query_history( - name=history_name, - description=history_description, - pro_config=pro_config, - test_config=test_config, - query_config={ - 'keys': keys, - 'fields_to_compare': fields_to_compare, - 'exclude_fields': exclude_fields - }, - query_keys=values, - results_summary=summary, - execution_time=0.0, # 可以后续优化计算实际执行时间 - total_keys=len(values), - differences_count=len(differences), - identical_count=len(identical_results), - # 新增分表相关参数 - sharding_config=sharding_config, - query_type='sharding', - # 添加查询结果数据 - raw_results={ - 'raw_pro_data': [dict(row._asdict()) for row in pro_data] if pro_data else [], - 'raw_test_data': [dict(row._asdict()) for row in test_data] if test_data else [], - 'sharding_info': sharding_info # 包含分表信息 - }, - differences_data=differences, - identical_data=identical_results - ) - - # 关联查询日志与历史记录 - if history_id: - query_log_collector.set_history_id(history_id) - logger.info(f"分表查询历史记录保存成功: {history_name}, ID: {history_id}") - else: - logger.warning("分表查询历史记录保存失败,无法获取history_id") - except Exception as e: - logger.warning(f"保存分表查询历史记录失败: {e}") - - # 结束查询批次 - query_log_collector.end_current_batch() - return jsonify(result) - - except Exception as e: - logger.error(f"分表查询执行失败:{str(e)}") - # 结束查询批次(出错情况) - query_log_collector.end_current_batch() - return jsonify({'error': f'分表查询执行失败:{str(e)}'}), 500 - finally: - # 关闭连接 - if pro_cluster: - pro_cluster.shutdown() - if test_cluster: - test_cluster.shutdown() - - except Exception as e: - logger.error(f"分表查询请求处理失败:{str(e)}") - # 结束查询批次(请求处理出错) - query_log_collector.end_current_batch() - return jsonify({'error': f'分表查询请求处理失败:{str(e)}'}), 500 - -@app.route('/api/query', methods=['POST']) -def query_compare(): - try: - data = request.json - - # 开始新的查询批次 - batch_id = query_log_collector.start_new_batch('单表') - - logger.info("开始执行数据库比对查询") - - # 解析配置 - pro_config = data.get('pro_config', DEFAULT_CONFIG['pro_config']) - test_config = data.get('test_config', DEFAULT_CONFIG['test_config']) - - # 从query_config中获取keys等参数 - query_config = data.get('query_config', {}) - keys = query_config.get('keys', DEFAULT_CONFIG['keys']) - fields_to_compare = query_config.get('fields_to_compare', DEFAULT_CONFIG['fields_to_compare']) - exclude_fields = query_config.get('exclude_fields', DEFAULT_CONFIG['exclude_fields']) - - values = data.get('values', []) - - # 参数验证 - if not values: - logger.warning("查询失败:未提供查询key值") - return jsonify({'error': '请提供查询key值'}), 400 - - if not keys: - logger.warning("查询失败:未提供主键字段") - return jsonify({'error': '请提供主键字段'}), 400 - - # 添加详细的参数日志 - logger.info(f"单表查询参数解析结果:") - logger.info(f" keys: {keys}") - logger.info(f" values数量: {len(values)}") - logger.info(f" fields_to_compare: {fields_to_compare}") - logger.info(f" exclude_fields: {exclude_fields}") - - logger.info(f"查询配置:{len(values)}个key值,生产表:{pro_config['table']},测试表:{test_config['table']}") - - # 创建数据库连接 - pro_cluster, pro_session = create_connection(pro_config) - test_cluster, test_session = create_connection(test_config) - - if not pro_session or not test_session: - logger.error("数据库连接失败") - return jsonify({'error': '数据库连接失败,请检查配置信息'}), 500 - - try: - # 执行查询 - logger.info("执行生产环境查询") - pro_data = execute_query(pro_session, pro_config['table'], keys, fields_to_compare, values, exclude_fields) - logger.info("执行测试环境查询") - test_data = execute_query(test_session, test_config['table'], keys, fields_to_compare, values, exclude_fields) - - logger.info(f"查询结果:生产表 {len(pro_data)} 条记录,测试表 {len(test_data)} 条记录") - - # 比较结果 - differences, field_diff_count, identical_results = compare_results(pro_data, test_data, keys, fields_to_compare, exclude_fields, values) - - # 统计信息 - different_ids = set() - for diff in differences: - if 'field' in diff: - different_ids.add(list(diff['key'].values())[0]) - - non_different_ids = set(values) - different_ids - - # 生成比较总结 - summary = generate_comparison_summary( - len(values), len(pro_data), len(test_data), - differences, identical_results, field_diff_count - ) - - result = { - 'total_keys': len(values), - 'pro_count': len(pro_data), - 'test_count': len(test_data), - 'differences': differences, - 'identical_results': identical_results, - 'field_diff_count': field_diff_count, - 'different_ids': list(different_ids), - 'non_different_ids': list(non_different_ids), - 'summary': summary, - 'raw_pro_data': [dict(row._asdict()) for row in pro_data] if pro_data else [], - 'raw_test_data': [dict(row._asdict()) for row in test_data] if test_data else [] - } - - logger.info(f"比对完成:发现 {len(differences)} 处差异") - - # 自动保存查询历史记录(可选,基于执行结果) - try: - # 生成历史记录名称 - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - history_name = f"查询_{timestamp}" - history_description = f"自动保存 - 查询{len(values)}个Key,发现{len(differences)}处差异" - - # 保存历史记录 - history_id = save_query_history( - name=history_name, - description=history_description, - pro_config=pro_config, - test_config=test_config, - query_config={ - 'keys': keys, - 'fields_to_compare': fields_to_compare, - 'exclude_fields': exclude_fields - }, - query_keys=values, - results_summary=summary, - execution_time=0.0, # 可以后续优化计算实际执行时间 - total_keys=len(values), - differences_count=len(differences), - identical_count=len(identical_results), - # 添加查询结果数据 - raw_results={ - 'raw_pro_data': [dict(row._asdict()) for row in pro_data] if pro_data else [], - 'raw_test_data': [dict(row._asdict()) for row in test_data] if test_data else [] - }, - differences_data=differences, - identical_data=identical_results - ) - - # 关联查询日志与历史记录 - if history_id: - query_log_collector.set_history_id(history_id) - logger.info(f"查询历史记录保存成功: {history_name}, ID: {history_id}") - else: - logger.warning("查询历史记录保存失败,无法获取history_id") - except Exception as e: - logger.warning(f"保存查询历史记录失败: {e}") - - # 结束查询批次 - query_log_collector.end_current_batch() - return jsonify(result) - - except Exception as e: - logger.error(f"查询执行失败:{str(e)}") - # 结束查询批次(出错情况) - query_log_collector.end_current_batch() - return jsonify({'error': f'查询执行失败:{str(e)}'}), 500 - finally: - # 关闭连接 - if pro_cluster: - pro_cluster.shutdown() - if test_cluster: - test_cluster.shutdown() - - except Exception as e: - logger.error(f"请求处理失败:{str(e)}") - # 结束查询批次(请求处理出错) - query_log_collector.end_current_batch() - return jsonify({'error': f'请求处理失败:{str(e)}'}), 500 - -@app.route('/api/default-config') -def get_default_config(): - return jsonify(DEFAULT_CONFIG) - -# 配置组管理API -@app.route('/api/config-groups', methods=['GET']) -def api_get_config_groups(): - """获取所有配置组""" - config_groups = get_config_groups() - return jsonify({'success': True, 'data': config_groups}) - -@app.route('/api/config-groups', methods=['POST']) -def api_save_config_group(): - """保存配置组""" - try: - data = request.json - name = data.get('name', '').strip() - description = data.get('description', '').strip() - pro_config = data.get('pro_config', {}) - test_config = data.get('test_config', {}) - - # 获取查询配置,支持两种格式 - if 'query_config' in data: - # 嵌套格式 - query_config = data.get('query_config', {}) - else: - # 平铺格式 - query_config = { - 'keys': data.get('keys', []), - 'fields_to_compare': data.get('fields_to_compare', []), - 'exclude_fields': data.get('exclude_fields', []) - } - - # 提取分表配置 - sharding_config = data.get('sharding_config') - - if not name: - return jsonify({'success': False, 'error': '配置组名称不能为空'}), 400 - - success = save_config_group(name, description, pro_config, test_config, query_config, sharding_config) - - if success: - return jsonify({'success': True, 'message': '配置组保存成功'}) - else: - return jsonify({'success': False, 'error': '配置组保存失败'}), 500 - - except Exception as e: - logger.error(f"保存配置组API失败: {e}") - return jsonify({'success': False, 'error': str(e)}), 500 - -@app.route('/api/config-groups/', methods=['GET']) -def api_get_config_group(group_id): - """获取指定配置组详情""" - config_group = get_config_group_by_id(group_id) - - if config_group: - return jsonify({'success': True, 'data': config_group}) - else: - return jsonify({'success': False, 'error': '配置组不存在'}), 404 - -@app.route('/api/config-groups/', methods=['DELETE']) -def api_delete_config_group(group_id): - """删除配置组""" - success = delete_config_group(group_id) - - if success: - return jsonify({'success': True, 'message': '配置组删除成功'}) - else: - return jsonify({'success': False, 'error': '配置组删除失败'}), 500 - -@app.route('/api/init-db', methods=['POST']) -def api_init_database(): - """手动初始化数据库(用于测试)""" - success = init_database() - if success: - return jsonify({'success': True, 'message': '数据库初始化成功'}) - else: - return jsonify({'success': False, 'error': '数据库初始化失败'}), 500 - -# 查询历史管理API -@app.route('/api/query-history', methods=['GET']) -def api_get_query_history(): - """获取所有查询历史记录""" - history_list = get_query_history() - return jsonify({'success': True, 'data': history_list}) - -@app.route('/api/query-history', methods=['POST']) -def api_save_query_history(): - """保存查询历史记录,支持分表查询""" - try: - data = request.json - name = data.get('name', '').strip() - description = data.get('description', '').strip() - pro_config = data.get('pro_config', {}) - test_config = data.get('test_config', {}) - query_config = data.get('query_config', {}) - query_keys = data.get('query_keys', []) - results_summary = data.get('results_summary', {}) - execution_time = data.get('execution_time', 0.0) - total_keys = data.get('total_keys', 0) - differences_count = data.get('differences_count', 0) - identical_count = data.get('identical_count', 0) - # 新增分表相关字段 - sharding_config = data.get('sharding_config') - query_type = data.get('query_type', 'single') - - if not name: - return jsonify({'success': False, 'error': '历史记录名称不能为空'}), 400 - - success = save_query_history( - name, description, pro_config, test_config, query_config, - query_keys, results_summary, execution_time, total_keys, - differences_count, identical_count, sharding_config, query_type - ) - - if success: - query_type_desc = '分表查询' if query_type == 'sharding' else '单表查询' - return jsonify({'success': True, 'message': f'{query_type_desc}历史记录保存成功'}) - else: - return jsonify({'success': False, 'error': '查询历史记录保存失败'}), 500 - - except Exception as e: - logger.error(f"保存查询历史记录API失败: {e}") - return jsonify({'success': False, 'error': str(e)}), 500 - -@app.route('/api/query-history/', methods=['GET']) -def api_get_query_history_detail(history_id): - """获取指定查询历史记录详情""" - history_record = get_query_history_by_id(history_id) - - if history_record: - return jsonify({'success': True, 'data': history_record}) - else: - return jsonify({'success': False, 'error': '查询历史记录不存在'}), 404 - -@app.route('/api/query-history//results', methods=['GET']) -def api_get_query_history_results(history_id): - """获取查询历史记录的完整结果数据""" - try: - history_record = get_query_history_by_id(history_id) - if not history_record: - return jsonify({'success': False, 'error': '历史记录不存在'}), 404 - - # 安全获取raw_results数据 - raw_results = history_record.get('raw_results') - if raw_results and isinstance(raw_results, dict): - raw_pro_data = raw_results.get('raw_pro_data', []) or [] - raw_test_data = raw_results.get('raw_test_data', []) or [] - sharding_info = raw_results.get('sharding_info') if history_record.get('query_type') == 'sharding' else None - else: - raw_pro_data = [] - raw_test_data = [] - sharding_info = None - - # 安全获取差异和相同结果数据 - differences_data = history_record.get('differences_data') or [] - identical_data = history_record.get('identical_data') or [] - - # 构建完整的查询结果格式,与API查询结果保持一致 - result = { - 'total_keys': history_record['total_keys'], - 'pro_count': len(raw_pro_data), - 'test_count': len(raw_test_data), - 'differences': differences_data, - 'identical_results': identical_data, - 'field_diff_count': {}, # 可以从differences_data中重新计算 - 'summary': history_record.get('results_summary', {}), - 'raw_pro_data': raw_pro_data, - 'raw_test_data': raw_test_data, - # 如果是分表查询,添加分表信息 - 'sharding_info': sharding_info, - # 添加历史记录元信息 - 'history_info': { - 'id': history_record['id'], - 'name': history_record['name'], - 'description': history_record['description'], - 'created_at': history_record['created_at'], - 'query_type': history_record.get('query_type', 'single') - } - } - - # 重新计算field_diff_count - if differences_data: - field_diff_count = {} - for diff in differences_data: - if isinstance(diff, dict) and 'field' in diff: - field_name = diff['field'] - field_diff_count[field_name] = field_diff_count.get(field_name, 0) + 1 - result['field_diff_count'] = field_diff_count - - return jsonify({ - 'success': True, - 'data': result, - 'message': f'历史记录 "{history_record["name"]}" 结果加载成功' - }) - - except Exception as e: - logger.error(f"获取查询历史记录结果失败: {e}") - return jsonify({'success': False, 'error': f'获取历史记录结果失败: {str(e)}'}), 500 - -@app.route('/api/query-history/', methods=['DELETE']) -def api_delete_query_history(history_id): - """删除查询历史记录""" - success = delete_query_history(history_id) - - if success: - return jsonify({'success': True, 'message': '查询历史记录删除成功'}) - else: - return jsonify({'success': False, 'error': '查询历史记录删除失败'}), 500 - -@app.route('/api/query-logs', methods=['GET']) -def api_get_query_logs(): - """获取查询日志,支持分组显示和数据库存储""" - try: - limit = request.args.get('limit', type=int) - grouped = request.args.get('grouped', 'true').lower() == 'true' # 默认分组显示 - from_db = request.args.get('from_db', 'true').lower() == 'true' # 默认从数据库获取 - - if grouped: - # 返回分组日志 - grouped_logs = query_log_collector.get_logs_grouped_by_batch(limit, from_db) - # 获取总数(用于统计) - total_logs = query_log_collector._get_total_logs_count() if from_db else len(query_log_collector.logs) - - return jsonify({ - 'success': True, - 'data': grouped_logs, - 'total': total_logs, - 'grouped': True, - 'from_db': from_db - }) - else: - # 返回原始日志列表 - logs = query_log_collector.get_logs(limit, from_db) - total_logs = query_log_collector._get_total_logs_count() if from_db else len(query_log_collector.logs) - - return jsonify({ - 'success': True, - 'data': logs, - 'total': total_logs, - 'grouped': False, - 'from_db': from_db - }) - except Exception as e: - logger.error(f"获取查询日志失败: {e}") - return jsonify({'success': False, 'error': str(e)}), 500 - -@app.route('/api/query-logs', methods=['DELETE']) -def api_clear_query_logs(): - """清空查询日志,支持清空数据库日志""" - try: - clear_db = request.args.get('clear_db', 'true').lower() == 'true' # 默认清空数据库 - query_log_collector.clear_logs(clear_db) - - message = '查询日志已清空(包括数据库)' if clear_db else '查询日志已清空(仅内存)' - return jsonify({'success': True, 'message': message}) - except Exception as e: - logger.error(f"清空查询日志失败: {e}") - return jsonify({'success': False, 'error': str(e)}), 500 - -@app.route('/api/query-logs/cleanup', methods=['POST']) -def api_cleanup_old_logs(): - """清理旧的查询日志""" - try: - days_to_keep = request.json.get('days_to_keep', 30) if request.json else 30 - deleted_count = query_log_collector.cleanup_old_logs(days_to_keep) - - return jsonify({ - 'success': True, - 'message': f'成功清理 {deleted_count} 条超过 {days_to_keep} 天的旧日志', - 'deleted_count': deleted_count - }) - except Exception as e: - logger.error(f"清理旧日志失败: {e}") - return jsonify({'success': False, 'error': str(e)}), 500 - -@app.route('/api/query-logs/history/', methods=['GET']) -def api_get_query_logs_by_history(history_id): - """根据历史记录ID获取相关查询日志""" - try: - logs = query_log_collector.get_logs_by_history_id(history_id) - - # 按批次分组显示 - grouped_logs = {} - batch_order = [] - - for log in logs: - batch_id = log.get('batch_id', 'unknown') - if batch_id not in grouped_logs: - grouped_logs[batch_id] = [] - batch_order.append(batch_id) - grouped_logs[batch_id].append(log) - - # 返回按时间顺序排列的批次 - grouped_result = [(batch_id, grouped_logs[batch_id]) for batch_id in batch_order] - - return jsonify({ - 'success': True, - 'data': grouped_result, - 'total': len(logs), - 'history_id': history_id, - 'grouped': True - }) - except Exception as e: - logger.error(f"获取历史记录相关查询日志失败: {e}") - return jsonify({'success': False, 'error': str(e)}), 500 +# 设置所有路由 +setup_routes(app, query_log_collector) if __name__ == '__main__': - app.run(debug=True, port=5001) + logger.info("=== BigDataTool 启动 ===") + logger.info("应用架构:模块化") + logger.info("支持功能:单表查询、分表查询、多主键查询、配置管理、查询历史") + logger.info("访问地址:http://localhost:5001") + logger.info("API文档:/api/* 路径下的所有端点") + app.run(debug=True, port=5001) \ No newline at end of file diff --git a/app_original_backup.py b/app_original_backup.py new file mode 100644 index 0000000..714e56b --- /dev/null +++ b/app_original_backup.py @@ -0,0 +1,2229 @@ +from flask import Flask, render_template, request, jsonify, send_from_directory +from cassandra.cluster import Cluster +from cassandra.auth import PlainTextAuthProvider +import json +import os +import logging +import sqlite3 +from datetime import datetime, timedelta +import re +import concurrent.futures +import time + +app = Flask(__name__) + +# 配置日志 +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') +logger = logging.getLogger(__name__) + +# 数据库配置 +DATABASE_PATH = 'config_groups.db' + +# 查询日志收集器 +class QueryLogCollector: + def __init__(self, max_logs=1000, db_path=None): + self.logs = [] # 内存中的日志缓存 + self.max_logs = max_logs + self.current_batch_id = None + self.batch_counter = 0 + self.current_query_type = 'single' + self.current_history_id = None # 当前关联的历史记录ID + self.db_path = db_path or DATABASE_PATH + + def start_new_batch(self, query_type='single'): + """开始新的查询批次""" + self.batch_counter += 1 + self.current_batch_id = f"batch_{self.batch_counter}_{datetime.now().strftime('%H%M%S')}" + self.current_query_type = query_type + self.current_history_id = None # 重置历史记录ID + + # 添加批次开始标记 + self.add_log('INFO', f"=== 开始{query_type}查询批次 (ID: {self.current_batch_id}) ===", force_batch_id=self.current_batch_id) + return self.current_batch_id + + def set_history_id(self, history_id): + """设置当前批次关联的历史记录ID""" + self.current_history_id = history_id + if self.current_batch_id and history_id: + self.add_log('INFO', f"关联历史记录ID: {history_id}", force_batch_id=self.current_batch_id) + # 更新当前批次的所有日志记录的history_id + self._update_batch_history_id(self.current_batch_id, history_id) + + def _update_batch_history_id(self, batch_id, history_id): + """更新批次中所有日志的history_id""" + try: + conn = sqlite3.connect(self.db_path, timeout=30) + cursor = conn.cursor() + + cursor.execute(''' + UPDATE query_logs + SET history_id = ? + WHERE batch_id = ? + ''', (history_id, batch_id)) + + conn.commit() + conn.close() + logger.info(f"已更新批次 {batch_id} 的历史记录关联到 {history_id}") + except Exception as e: + print(f"Warning: Failed to update batch history_id: {e}") + + def end_current_batch(self): + """结束当前查询批次""" + if self.current_batch_id: + self.add_log('INFO', f"=== 查询批次完成 (ID: {self.current_batch_id}) ===", force_batch_id=self.current_batch_id) + self.current_batch_id = None + self.current_history_id = None + + def add_log(self, level, message, force_batch_id=None, force_query_type=None, force_history_id=None): + """添加日志到内存和数据库""" + timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3] + batch_id = force_batch_id or self.current_batch_id + query_type = force_query_type or self.current_query_type + history_id = force_history_id or self.current_history_id + + log_entry = { + 'timestamp': timestamp, + 'level': level, + 'message': message, + 'batch_id': batch_id, + 'query_type': query_type, + 'history_id': history_id + } + + # 添加到内存缓存 + self.logs.append(log_entry) + if len(self.logs) > self.max_logs: + self.logs.pop(0) + + # 保存到数据库 + self._save_log_to_db(log_entry) + + def _save_log_to_db(self, log_entry): + """将日志保存到数据库""" + try: + conn = sqlite3.connect(self.db_path, timeout=30) + cursor = conn.cursor() + + cursor.execute(''' + INSERT INTO query_logs (batch_id, history_id, timestamp, level, message, query_type) + VALUES (?, ?, ?, ?, ?, ?) + ''', ( + log_entry['batch_id'], + log_entry['history_id'], + log_entry['timestamp'], + log_entry['level'], + log_entry['message'], + log_entry['query_type'] + )) + + conn.commit() + conn.close() + except Exception as e: + # 数据库写入失败时记录到控制台,但不影响程序运行 + print(f"Warning: Failed to save log to database: {e}") + + def get_logs(self, limit=None, from_db=True): + """获取日志,支持从数据库或内存获取""" + if from_db: + return self._get_logs_from_db(limit) + else: + # 从内存获取 + if limit: + return self.logs[-limit:] + return self.logs + + def _get_logs_from_db(self, limit=None): + """从数据库获取日志""" + try: + conn = sqlite3.connect(self.db_path, timeout=30) + conn.row_factory = sqlite3.Row + cursor = conn.cursor() + + query = ''' + SELECT batch_id, history_id, timestamp, level, message, query_type + FROM query_logs + ORDER BY id DESC + ''' + + if limit: + query += f' LIMIT {limit}' + + cursor.execute(query) + rows = cursor.fetchall() + + # 转换为字典格式并反转顺序(最新的在前) + logs = [] + for row in reversed(rows): + logs.append({ + 'batch_id': row['batch_id'], + 'history_id': row['history_id'], + 'timestamp': row['timestamp'], + 'level': row['level'], + 'message': row['message'], + 'query_type': row['query_type'] + }) + + conn.close() + return logs + except Exception as e: + print(f"Warning: Failed to get logs from database: {e}") + # 如果数据库读取失败,返回内存中的日志 + return self.get_logs(limit, from_db=False) + + def _get_total_logs_count(self): + """获取数据库中的日志总数""" + try: + conn = sqlite3.connect(self.db_path, timeout=30) + cursor = conn.cursor() + cursor.execute('SELECT COUNT(*) FROM query_logs') + count = cursor.fetchone()[0] + conn.close() + return count + except Exception as e: + print(f"Warning: Failed to get logs count from database: {e}") + return len(self.logs) + + def get_logs_by_history_id(self, history_id): + """根据历史记录ID获取相关日志""" + try: + conn = sqlite3.connect(self.db_path, timeout=30) + conn.row_factory = sqlite3.Row + cursor = conn.cursor() + + cursor.execute(''' + SELECT batch_id, history_id, timestamp, level, message, query_type + FROM query_logs + WHERE history_id = ? + ORDER BY id ASC + ''', (history_id,)) + + rows = cursor.fetchall() + logs = [] + for row in rows: + logs.append({ + 'batch_id': row['batch_id'], + 'history_id': row['history_id'], + 'timestamp': row['timestamp'], + 'level': row['level'], + 'message': row['message'], + 'query_type': row['query_type'] + }) + + conn.close() + return logs + except Exception as e: + print(f"Warning: Failed to get logs by history_id: {e}") + return [] + + def get_logs_grouped_by_batch(self, limit=None, from_db=True): + """按批次分组获取日志""" + logs = self.get_logs(limit, from_db) + grouped_logs = {} + batch_order = [] + + for log in logs: + batch_id = log.get('batch_id', 'unknown') + if batch_id not in grouped_logs: + grouped_logs[batch_id] = [] + batch_order.append(batch_id) + grouped_logs[batch_id].append(log) + + # 返回按时间顺序排列的批次 + return [(batch_id, grouped_logs[batch_id]) for batch_id in batch_order] + + def clear_logs(self, clear_db=True): + """清空日志""" + # 清空内存 + self.logs.clear() + self.current_batch_id = None + self.batch_counter = 0 + + # 清空数据库 + if clear_db: + try: + conn = sqlite3.connect(self.db_path, timeout=30) + cursor = conn.cursor() + cursor.execute('DELETE FROM query_logs') + conn.commit() + conn.close() + except Exception as e: + print(f"Warning: Failed to clear logs from database: {e}") + + def cleanup_old_logs(self, days_to_keep=30): + """清理旧日志,保留指定天数的日志""" + try: + conn = sqlite3.connect(self.db_path, timeout=30) + cursor = conn.cursor() + + # 删除超过指定天数的日志 + cutoff_date = datetime.now() - timedelta(days=days_to_keep) + cursor.execute(''' + DELETE FROM query_logs + WHERE created_at < ? + ''', (cutoff_date.strftime('%Y-%m-%d %H:%M:%S'),)) + + deleted_count = cursor.rowcount + conn.commit() + conn.close() + + logger.info(f"清理了 {deleted_count} 条超过 {days_to_keep} 天的旧日志") + return deleted_count + except Exception as e: + logger.error(f"清理旧日志失败: {e}") + return 0 + +# 全局日志收集器实例 +query_log_collector = QueryLogCollector() + +# 自定义日志处理器 +class CollectorHandler(logging.Handler): + def __init__(self, collector): + super().__init__() + self.collector = collector + + def emit(self, record): + self.collector.add_log(record.levelname, record.getMessage()) + +# 添加收集器处理器到logger +collector_handler = CollectorHandler(query_log_collector) +logger.addHandler(collector_handler) + +class ShardingCalculator: + """分表计算器,基于TWCS策略""" + + def __init__(self, interval_seconds=604800, table_count=14): + """ + 初始化分表计算器 + :param interval_seconds: 时间间隔(秒),默认604800(7天) + :param table_count: 分表数量,默认14 + """ + self.interval_seconds = interval_seconds + self.table_count = table_count + + def extract_timestamp_from_key(self, key): + """ + 从Key中提取时间戳 + 新规则:优先提取最后一个下划线后的数字,如果没有下划线则提取最后连续的数字部分 + """ + if not key: + return None + + key_str = str(key) + + # 方法1:如果包含下划线,尝试提取最后一个下划线后的部分 + if '_' in key_str: + parts = key_str.split('_') + last_part = parts[-1] + # 检查最后一部分是否为纯数字 + if last_part.isdigit(): + timestamp = int(last_part) + logger.info(f"Key '{key}' 通过下划线分割提取到时间戳: {timestamp}") + return timestamp + + # 方法2:使用正则表达式找到所有数字序列,取最后一个较长的 + number_sequences = re.findall(r'\d+', key_str) + + if not number_sequences: + logger.warning(f"Key '{key}' 中没有找到数字字符") + return None + + # 如果有多个数字序列,优先选择最长的,如果长度相同则选择最后一个 + longest_sequence = max(number_sequences, key=len) + + # 如果最长的有多个,选择最后一个最长的 + max_length = len(longest_sequence) + last_longest = None + for seq in number_sequences: + if len(seq) == max_length: + last_longest = seq + + try: + timestamp = int(last_longest) + logger.info(f"Key '{key}' 通过数字序列提取到时间戳: {timestamp} (从序列 {number_sequences} 中选择)") + return timestamp + except ValueError: + logger.error(f"Key '{key}' 时间戳转换失败: {last_longest}") + return None + + def calculate_shard_index(self, timestamp): + """ + 计算分表索引 + 公式:timestamp // interval_seconds % table_count + """ + if timestamp is None: + return None + return int(timestamp) // self.interval_seconds % self.table_count + + def get_shard_table_name(self, base_table_name, key): + """ + 根据Key获取对应的分表名称 + """ + timestamp = self.extract_timestamp_from_key(key) + if timestamp is None: + return None + + shard_index = self.calculate_shard_index(timestamp) + return f"{base_table_name}_{shard_index}" + + def get_all_shard_tables_for_keys(self, base_table_name, keys): + """ + 为一批Keys计算所有需要查询的分表 + 返回: {shard_table_name: [keys_for_this_shard], ...} + """ + shard_mapping = {} + failed_keys = [] + calculation_stats = { + 'total_keys': len(keys), + 'successful_extractions': 0, + 'failed_extractions': 0, + 'unique_shards': 0 + } + + for key in keys: + shard_table = self.get_shard_table_name(base_table_name, key) + if shard_table: + if shard_table not in shard_mapping: + shard_mapping[shard_table] = [] + shard_mapping[shard_table].append(key) + calculation_stats['successful_extractions'] += 1 + else: + failed_keys.append(key) + calculation_stats['failed_extractions'] += 1 + + calculation_stats['unique_shards'] = len(shard_mapping) + + return shard_mapping, failed_keys, calculation_stats + +def init_database(): + """初始化数据库""" + try: + conn = sqlite3.connect(DATABASE_PATH) + cursor = conn.cursor() + + # 创建配置组表 + cursor.execute(''' + CREATE TABLE IF NOT EXISTS config_groups ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL UNIQUE, + description TEXT, + pro_config TEXT NOT NULL, + test_config TEXT NOT NULL, + query_config TEXT NOT NULL, + sharding_config TEXT, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ) + ''') + + # 创建查询历史表,包含分表配置字段 + cursor.execute(''' + CREATE TABLE IF NOT EXISTS query_history ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL, + description TEXT, + pro_config TEXT NOT NULL, + test_config TEXT NOT NULL, + query_config TEXT NOT NULL, + query_keys TEXT NOT NULL, + results_summary TEXT NOT NULL, + execution_time REAL NOT NULL, + total_keys INTEGER NOT NULL, + differences_count INTEGER NOT NULL, + identical_count INTEGER NOT NULL, + sharding_config TEXT, + query_type TEXT DEFAULT 'single', + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ) + ''') + + # 创建分表配置组表 + cursor.execute(''' + CREATE TABLE IF NOT EXISTS sharding_config_groups ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL UNIQUE, + description TEXT, + pro_config TEXT NOT NULL, + test_config TEXT NOT NULL, + query_config TEXT NOT NULL, + sharding_config TEXT NOT NULL, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ) + ''') + + # 创建查询日志表 + cursor.execute(''' + CREATE TABLE IF NOT EXISTS query_logs ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + batch_id TEXT NOT NULL, + history_id INTEGER, + timestamp TEXT NOT NULL, + level TEXT NOT NULL, + message TEXT NOT NULL, + query_type TEXT DEFAULT 'single', + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + FOREIGN KEY (history_id) REFERENCES query_history (id) ON DELETE CASCADE + ) + ''') + + # 创建索引 + cursor.execute('CREATE INDEX IF NOT EXISTS idx_query_logs_batch_id ON query_logs(batch_id)') + cursor.execute('CREATE INDEX IF NOT EXISTS idx_query_logs_history_id ON query_logs(history_id)') + cursor.execute('CREATE INDEX IF NOT EXISTS idx_query_logs_timestamp ON query_logs(timestamp)') + cursor.execute('CREATE INDEX IF NOT EXISTS idx_query_logs_level ON query_logs(level)') + + conn.commit() + conn.close() + logger.info("数据库初始化完成") + return True + except Exception as e: + logger.error(f"数据库初始化失败: {e}") + return False + +def ensure_database(): + """确保数据库和表存在""" + if not os.path.exists(DATABASE_PATH): + logger.info("数据库文件不存在,正在创建...") + return init_database() + + # 检查表是否存在 + try: + conn = sqlite3.connect(DATABASE_PATH) + cursor = conn.cursor() + cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name IN ('config_groups', 'query_history', 'sharding_config_groups', 'query_logs')") + results = cursor.fetchall() + existing_tables = [row[0] for row in results] + + required_tables = ['config_groups', 'query_history', 'sharding_config_groups', 'query_logs'] + missing_tables = [table for table in required_tables if table not in existing_tables] + + if missing_tables: + logger.info(f"数据库表不完整,缺少表:{missing_tables},正在重新创建...") + return init_database() + + # 检查config_groups表是否有sharding_config字段 + cursor.execute("PRAGMA table_info(config_groups)") + columns = cursor.fetchall() + column_names = [column[1] for column in columns] + + if 'sharding_config' not in column_names: + logger.info("添加sharding_config字段到config_groups表...") + cursor.execute("ALTER TABLE config_groups ADD COLUMN sharding_config TEXT") + conn.commit() + logger.info("sharding_config字段添加成功") + + # 检查query_history表是否有分表相关字段 + cursor.execute("PRAGMA table_info(query_history)") + history_columns = cursor.fetchall() + history_column_names = [column[1] for column in history_columns] + + if 'sharding_config' not in history_column_names: + logger.info("添加sharding_config字段到query_history表...") + cursor.execute("ALTER TABLE query_history ADD COLUMN sharding_config TEXT") + conn.commit() + logger.info("query_history表sharding_config字段添加成功") + + if 'query_type' not in history_column_names: + logger.info("添加query_type字段到query_history表...") + cursor.execute("ALTER TABLE query_history ADD COLUMN query_type TEXT DEFAULT 'single'") + conn.commit() + logger.info("query_history表query_type字段添加成功") + + # 添加查询结果数据存储字段 + if 'raw_results' not in history_column_names: + logger.info("添加raw_results字段到query_history表...") + cursor.execute("ALTER TABLE query_history ADD COLUMN raw_results TEXT") + conn.commit() + logger.info("query_history表raw_results字段添加成功") + + if 'differences_data' not in history_column_names: + logger.info("添加differences_data字段到query_history表...") + cursor.execute("ALTER TABLE query_history ADD COLUMN differences_data TEXT") + conn.commit() + logger.info("query_history表differences_data字段添加成功") + + if 'identical_data' not in history_column_names: + logger.info("添加identical_data字段到query_history表...") + cursor.execute("ALTER TABLE query_history ADD COLUMN identical_data TEXT") + conn.commit() + logger.info("query_history表identical_data字段添加成功") + + # 检查query_logs表是否存在history_id字段 + cursor.execute("PRAGMA table_info(query_logs)") + logs_columns = cursor.fetchall() + logs_column_names = [column[1] for column in logs_columns] + + if 'history_id' not in logs_column_names: + logger.info("添加history_id字段到query_logs表...") + cursor.execute("ALTER TABLE query_logs ADD COLUMN history_id INTEGER") + # 创建外键索引 + cursor.execute('CREATE INDEX IF NOT EXISTS idx_query_logs_history_id ON query_logs(history_id)') + conn.commit() + logger.info("query_logs表history_id字段添加成功") + + conn.close() + return True + except Exception as e: + logger.error(f"检查数据库表失败: {e}") + return init_database() + +def get_db_connection(): + """获取数据库连接""" + conn = sqlite3.connect(DATABASE_PATH) + conn.row_factory = sqlite3.Row + return conn + +def normalize_json_string(value): + """标准化JSON字符串,用于比较""" + if not isinstance(value, str): + return value + + try: + # 尝试解析JSON + json_obj = json.loads(value) + + # 如果是数组,需要进行特殊处理 + if isinstance(json_obj, list): + # 尝试对数组元素进行标准化排序 + normalized_array = normalize_json_array(json_obj) + return json.dumps(normalized_array, sort_keys=True, separators=(',', ':')) + else: + # 普通对象,直接序列化 + return json.dumps(json_obj, sort_keys=True, separators=(',', ':')) + except (json.JSONDecodeError, TypeError): + # 如果不是JSON,返回原值 + return value + +def normalize_json_array(json_array): + """标准化JSON数组,处理元素顺序问题""" + try: + normalized_elements = [] + + for element in json_array: + if isinstance(element, dict): + # 对字典元素进行标准化 + normalized_elements.append(json.dumps(element, sort_keys=True, separators=(',', ':'))) + elif isinstance(element, str): + # 如果是字符串,尝试解析为JSON + try: + parsed_element = json.loads(element) + normalized_elements.append(json.dumps(parsed_element, sort_keys=True, separators=(',', ':'))) + except: + normalized_elements.append(element) + else: + normalized_elements.append(element) + + # 对标准化后的元素进行排序,确保顺序一致 + normalized_elements.sort() + + # 重新解析为对象数组 + result_array = [] + for element in normalized_elements: + if isinstance(element, str): + try: + result_array.append(json.loads(element)) + except: + result_array.append(element) + else: + result_array.append(element) + + return result_array + + except Exception as e: + logger.warning(f"数组标准化失败: {e}") + return json_array + +def is_json_array_field(value): + """检查字段是否为JSON数组格式""" + if not isinstance(value, (str, list)): + return False + + try: + if isinstance(value, str): + parsed = json.loads(value) + return isinstance(parsed, list) + elif isinstance(value, list): + # 检查是否为JSON字符串数组 + if len(value) > 0 and isinstance(value[0], str): + try: + json.loads(value[0]) + return True + except: + return False + return True + except: + return False + +def compare_array_values(value1, value2): + """专门用于比较数组类型的值""" + try: + # 处理字符串表示的数组 + if isinstance(value1, str) and isinstance(value2, str): + try: + array1 = json.loads(value1) + array2 = json.loads(value2) + if isinstance(array1, list) and isinstance(array2, list): + return compare_json_arrays(array1, array2) + except: + pass + + # 处理Python列表类型 + elif isinstance(value1, list) and isinstance(value2, list): + return compare_json_arrays(value1, value2) + + # 处理混合情况:一个是字符串数组,一个是列表 + elif isinstance(value1, list) and isinstance(value2, str): + try: + array2 = json.loads(value2) + if isinstance(array2, list): + return compare_json_arrays(value1, array2) + except: + pass + elif isinstance(value1, str) and isinstance(value2, list): + try: + array1 = json.loads(value1) + if isinstance(array1, list): + return compare_json_arrays(array1, value2) + except: + pass + + return False + except Exception as e: + logger.warning(f"数组比较失败: {e}") + return False + +def compare_json_arrays(array1, array2): + """比较两个JSON数组,忽略元素顺序""" + try: + if len(array1) != len(array2): + return False + + # 标准化两个数组 + normalized_array1 = normalize_json_array(array1.copy()) + normalized_array2 = normalize_json_array(array2.copy()) + + # 将标准化后的数组转换为可比较的格式 + comparable1 = json.dumps(normalized_array1, sort_keys=True) + comparable2 = json.dumps(normalized_array2, sort_keys=True) + + return comparable1 == comparable2 + + except Exception as e: + logger.warning(f"JSON数组比较失败: {e}") + return False + +def format_json_for_display(value): + """格式化JSON用于显示""" + if not isinstance(value, str): + return str(value) + + try: + # 尝试解析JSON + json_obj = json.loads(value) + # 格式化显示(带缩进) + return json.dumps(json_obj, sort_keys=True, indent=2, ensure_ascii=False) + except (json.JSONDecodeError, TypeError): + # 如果不是JSON,返回原值 + return str(value) + +def is_json_field(value): + """检查字段是否为JSON格式""" + if not isinstance(value, str): + return False + + try: + json.loads(value) + return True + except (json.JSONDecodeError, TypeError): + return False + +def compare_values(value1, value2): + """智能比较两个值,支持JSON标准化和数组比较""" + # 首先检查是否为数组类型 + if is_json_array_field(value1) or is_json_array_field(value2): + return compare_array_values(value1, value2) + + # 如果两个值都是字符串,尝试JSON标准化比较 + if isinstance(value1, str) and isinstance(value2, str): + normalized_value1 = normalize_json_string(value1) + normalized_value2 = normalize_json_string(value2) + return normalized_value1 == normalized_value2 + + # 其他情况直接比较 + return value1 == value2 + +# 默认配置(不显示敏感信息) +DEFAULT_CONFIG = { + 'pro_config': { + 'cluster_name': '', + 'hosts': [], + 'port': 9042, + 'datacenter': '', + 'username': '', + 'password': '', + 'keyspace': '', + 'table': '' + }, + 'test_config': { + 'cluster_name': '', + 'hosts': [], + 'port': 9042, + 'datacenter': '', + 'username': '', + 'password': '', + 'keyspace': '', + 'table': '' + }, + 'keys': [], + 'fields_to_compare': [], + 'exclude_fields': [] +} + +def save_config_group(name, description, pro_config, test_config, query_config, sharding_config=None): + """保存配置组""" + if not ensure_database(): + logger.error("数据库初始化失败") + return False + + conn = get_db_connection() + cursor = conn.cursor() + + try: + cursor.execute(''' + INSERT OR REPLACE INTO config_groups + (name, description, pro_config, test_config, query_config, sharding_config, updated_at) + VALUES (?, ?, ?, ?, ?, ?, ?) + ''', ( + name, description, + json.dumps(pro_config), + json.dumps(test_config), + json.dumps(query_config), + json.dumps(sharding_config) if sharding_config else None, + datetime.now().isoformat() + )) + conn.commit() + logger.info(f"配置组 '{name}' 保存成功,包含分表配置: {sharding_config is not None}") + return True + except Exception as e: + logger.error(f"保存配置组失败: {e}") + return False + finally: + conn.close() + +def get_config_groups(): + """获取所有配置组""" + if not ensure_database(): + logger.error("数据库初始化失败") + return [] + + conn = get_db_connection() + cursor = conn.cursor() + + try: + cursor.execute(''' + SELECT id, name, description, created_at, updated_at + FROM config_groups + ORDER BY updated_at DESC + ''') + rows = cursor.fetchall() + + config_groups = [] + for row in rows: + config_groups.append({ + 'id': row['id'], + 'name': row['name'], + 'description': row['description'], + 'created_at': row['created_at'], + 'updated_at': row['updated_at'] + }) + + return config_groups + except Exception as e: + logger.error(f"获取配置组失败: {e}") + return [] + finally: + conn.close() + +def get_config_group_by_id(group_id): + """根据ID获取配置组详情""" + if not ensure_database(): + logger.error("数据库初始化失败") + return None + + conn = get_db_connection() + cursor = conn.cursor() + + try: + cursor.execute(''' + SELECT id, name, description, pro_config, test_config, query_config, + sharding_config, created_at, updated_at + FROM config_groups WHERE id = ? + ''', (group_id,)) + row = cursor.fetchone() + + if row: + config = { + 'id': row['id'], + 'name': row['name'], + 'description': row['description'], + 'pro_config': json.loads(row['pro_config']), + 'test_config': json.loads(row['test_config']), + 'query_config': json.loads(row['query_config']), + 'created_at': row['created_at'], + 'updated_at': row['updated_at'] + } + + # 添加分表配置 + if row['sharding_config']: + try: + config['sharding_config'] = json.loads(row['sharding_config']) + except (json.JSONDecodeError, TypeError): + config['sharding_config'] = None + else: + config['sharding_config'] = None + + return config + return None + except Exception as e: + logger.error(f"获取配置组详情失败: {e}") + return None + finally: + conn.close() + +def delete_config_group(group_id): + """删除配置组""" + if not ensure_database(): + logger.error("数据库初始化失败") + return False + + conn = get_db_connection() + cursor = conn.cursor() + + try: + cursor.execute('DELETE FROM config_groups WHERE id = ?', (group_id,)) + conn.commit() + success = cursor.rowcount > 0 + if success: + logger.info(f"配置组ID {group_id} 删除成功") + return success + except Exception as e: + logger.error(f"删除配置组失败: {e}") + return False + finally: + conn.close() + +def save_query_history(name, description, pro_config, test_config, query_config, query_keys, + results_summary, execution_time, total_keys, differences_count, identical_count, + sharding_config=None, query_type='single', raw_results=None, differences_data=None, identical_data=None): + """保存查询历史记录,支持分表查询和查询结果数据,返回历史记录ID""" + if not ensure_database(): + logger.error("数据库初始化失败") + return None + + conn = get_db_connection() + cursor = conn.cursor() + + try: + cursor.execute(''' + INSERT INTO query_history + (name, description, pro_config, test_config, query_config, query_keys, + results_summary, execution_time, total_keys, differences_count, identical_count, + sharding_config, query_type, raw_results, differences_data, identical_data) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + ''', ( + name, description, + json.dumps(pro_config), + json.dumps(test_config), + json.dumps(query_config), + json.dumps(query_keys), + json.dumps(results_summary), + execution_time, + total_keys, + differences_count, + identical_count, + json.dumps(sharding_config) if sharding_config else None, + query_type, + json.dumps(raw_results) if raw_results else None, + json.dumps(differences_data) if differences_data else None, + json.dumps(identical_data) if identical_data else None + )) + + # 获取插入记录的ID + history_id = cursor.lastrowid + conn.commit() + logger.info(f"查询历史记录 '{name}' 保存成功,查询类型:{query_type},ID:{history_id}") + return history_id + except Exception as e: + logger.error(f"保存查询历史记录失败: {e}") + return None + finally: + conn.close() + +def get_query_history(): + """获取所有查询历史记录""" + if not ensure_database(): + logger.error("数据库初始化失败") + return [] + + conn = get_db_connection() + cursor = conn.cursor() + + try: + cursor.execute(''' + SELECT id, name, description, execution_time, total_keys, + differences_count, identical_count, created_at, query_type + FROM query_history + ORDER BY created_at DESC + ''') + rows = cursor.fetchall() + + history_list = [] + for row in rows: + # 获取列名列表以检查字段是否存在 + column_names = [desc[0] for desc in cursor.description] + history_list.append({ + 'id': row['id'], + 'name': row['name'], + 'description': row['description'], + 'execution_time': row['execution_time'], + 'total_keys': row['total_keys'], + 'differences_count': row['differences_count'], + 'identical_count': row['identical_count'], + 'created_at': row['created_at'], + 'query_type': row['query_type'] if 'query_type' in column_names else 'single' + }) + + return history_list + except Exception as e: + logger.error(f"获取查询历史记录失败: {e}") + return [] + finally: + conn.close() + +def get_query_history_by_id(history_id): + """根据ID获取查询历史记录详情""" + if not ensure_database(): + logger.error("数据库初始化失败") + return None + + conn = get_db_connection() + cursor = conn.cursor() + + try: + cursor.execute(''' + SELECT * FROM query_history WHERE id = ? + ''', (history_id,)) + row = cursor.fetchone() + + if row: + # 获取列名列表以检查字段是否存在 + column_names = [desc[0] for desc in cursor.description] + return { + 'id': row['id'], + 'name': row['name'], + 'description': row['description'], + 'pro_config': json.loads(row['pro_config']), + 'test_config': json.loads(row['test_config']), + 'query_config': json.loads(row['query_config']), + 'query_keys': json.loads(row['query_keys']), + 'results_summary': json.loads(row['results_summary']), + 'execution_time': row['execution_time'], + 'total_keys': row['total_keys'], + 'differences_count': row['differences_count'], + 'identical_count': row['identical_count'], + 'created_at': row['created_at'], + # 处理新字段,保持向后兼容 + 'sharding_config': json.loads(row['sharding_config']) if 'sharding_config' in column_names and row['sharding_config'] else None, + 'query_type': row['query_type'] if 'query_type' in column_names else 'single', + # 添加查询结果数据支持 + 'raw_results': json.loads(row['raw_results']) if 'raw_results' in column_names and row['raw_results'] else None, + 'differences_data': json.loads(row['differences_data']) if 'differences_data' in column_names and row['differences_data'] else None, + 'identical_data': json.loads(row['identical_data']) if 'identical_data' in column_names and row['identical_data'] else None + } + return None + except Exception as e: + logger.error(f"获取查询历史记录详情失败: {e}") + return None + finally: + conn.close() + +def delete_query_history(history_id): + """删除查询历史记录""" + if not ensure_database(): + logger.error("数据库初始化失败") + return False + + conn = get_db_connection() + cursor = conn.cursor() + + try: + cursor.execute('DELETE FROM query_history WHERE id = ?', (history_id,)) + conn.commit() + success = cursor.rowcount > 0 + if success: + logger.info(f"查询历史记录ID {history_id} 删除成功") + return success + except Exception as e: + logger.error(f"删除查询历史记录失败: {e}") + return False + finally: + conn.close() + +def create_connection(config): + """创建Cassandra连接,带有增强的错误诊断和容错机制""" + start_time = time.time() + + logger.info(f"=== 开始创建Cassandra连接 ===") + logger.info(f"主机列表: {config.get('hosts', [])}") + logger.info(f"端口: {config.get('port', 9042)}") + logger.info(f"用户名: {config.get('username', 'N/A')}") + logger.info(f"Keyspace: {config.get('keyspace', 'N/A')}") + + try: + logger.info("正在创建认证提供者...") + auth_provider = PlainTextAuthProvider(username=config['username'], password=config['password']) + + logger.info("正在创建集群连接...") + # 设置连接池配置,提高容错性 + from cassandra.policies import DCAwareRoundRobinPolicy + + # 设置负载均衡策略,避免单点故障 + load_balancing_policy = DCAwareRoundRobinPolicy(local_dc=config.get('datacenter', 'dc1')) + + # 创建连接配置,增加容错参数 + cluster = Cluster( + config['hosts'], + port=config['port'], + auth_provider=auth_provider, + load_balancing_policy=load_balancing_policy, + # 增加容错配置 + protocol_version=4, # 使用稳定的协议版本 + connect_timeout=15, # 连接超时 + control_connection_timeout=15, # 控制连接超时 + max_schema_agreement_wait=30 # schema同步等待时间 + ) + + logger.info("正在连接到Keyspace...") + session = cluster.connect(config['keyspace']) + + # 设置session级别的容错参数 + session.default_timeout = 30 # 查询超时时间 + + connection_time = time.time() - start_time + logger.info(f"✅ Cassandra连接成功: 连接时间={connection_time:.3f}秒") + + # 记录集群状态 + try: + cluster_name = cluster.metadata.cluster_name or "Unknown" + logger.info(f" 集群名称: {cluster_name}") + + # 记录可用主机状态 + live_hosts = [str(host.address) for host in cluster.metadata.all_hosts() if host.is_up] + down_hosts = [str(host.address) for host in cluster.metadata.all_hosts() if not host.is_up] + + logger.info(f" 可用节点: {live_hosts} ({len(live_hosts)}个)") + if down_hosts: + logger.warning(f" 故障节点: {down_hosts} ({len(down_hosts)}个)") + + except Exception as meta_error: + logger.warning(f"无法获取集群元数据: {meta_error}") + + return cluster, session + + except Exception as e: + connection_time = time.time() - start_time + error_msg = str(e) + + logger.error(f"❌ Cassandra连接失败: 连接时间={connection_time:.3f}秒") + logger.error(f"错误类型: {type(e).__name__}") + logger.error(f"错误详情: {error_msg}") + + # 提供详细的诊断信息 + if "connection refused" in error_msg.lower() or "unable to connect" in error_msg.lower(): + logger.error("❌ 诊断:无法连接到Cassandra服务器") + logger.error("🔧 建议检查:") + logger.error(" 1. Cassandra服务是否启动") + logger.error(" 2. 主机地址和端口是否正确") + logger.error(" 3. 网络防火墙是否阻挡连接") + + elif "timeout" in error_msg.lower(): + logger.error("❌ 诊断:连接超时") + logger.error("🔧 建议检查:") + logger.error(" 1. 网络延迟是否过高") + logger.error(" 2. Cassandra服务器负载是否过高") + logger.error(" 3. 增加连接超时时间") + + elif "authentication" in error_msg.lower() or "unauthorized" in error_msg.lower(): + logger.error("❌ 诊断:认证失败") + logger.error("🔧 建议检查:") + logger.error(" 1. 用户名和密码是否正确") + logger.error(" 2. 用户是否有访问该keyspace的权限") + + elif "keyspace" in error_msg.lower(): + logger.error("❌ 诊断:Keyspace不存在") + logger.error("🔧 建议检查:") + logger.error(" 1. Keyspace名称是否正确") + logger.error(" 2. Keyspace是否已创建") + + else: + logger.error("❌ 诊断:未知连接错误") + logger.error("🔧 建议:") + logger.error(" 1. 检查所有连接参数") + logger.error(" 2. 查看Cassandra服务器日志") + logger.error(" 3. 测试网络连通性") + + return None, None + +def execute_query(session, table, keys, fields, values, exclude_fields=None): + """执行查询,支持单主键和复合主键""" + try: + # 参数验证 + if not keys or len(keys) == 0: + logger.error("Keys参数为空,无法构建查询") + return [] + + if not values or len(values) == 0: + logger.error("Values参数为空,无法构建查询") + return [] + + # 构建查询条件 + if len(keys) == 1: + # 单主键查询(保持原有逻辑) + quoted_values = [f"'{value}'" for value in values] + query_conditions = f"{keys[0]} IN ({', '.join(quoted_values)})" + else: + # 复合主键查询 + conditions = [] + for value in values: + # 检查value是否包含复合主键分隔符 + if isinstance(value, str) and ',' in value: + # 解析复合主键值 + key_values = [v.strip() for v in value.split(',')] + if len(key_values) == len(keys): + # 构建单个复合主键条件: (key1='val1' AND key2='val2') + key_conditions = [] + for i, (key, val) in enumerate(zip(keys, key_values)): + key_conditions.append(f"{key} = '{val}'") + conditions.append(f"({' AND '.join(key_conditions)})") + else: + logger.warning(f"复合主键值 '{value}' 的字段数量({len(key_values)})与主键字段数量({len(keys)})不匹配") + # 将其作为第一个主键的值处理 + conditions.append(f"{keys[0]} = '{value}'") + else: + # 单值,作为第一个主键的值处理 + conditions.append(f"{keys[0]} = '{value}'") + + if conditions: + query_conditions = ' OR '.join(conditions) + else: + logger.error("无法构建有效的查询条件") + return [] + + # 确定要查询的字段 + if fields: + fields_str = ", ".join(fields) + else: + fields_str = "*" + + query_sql = f"SELECT {fields_str} FROM {table} WHERE {query_conditions};" + + # 记录查询SQL日志 + logger.info(f"执行查询SQL: {query_sql}") + if len(keys) > 1: + logger.info(f"复合主键查询参数: 表={table}, 主键字段={keys}, 字段={fields_str}, Key数量={len(values)}") + else: + logger.info(f"单主键查询参数: 表={table}, 主键字段={keys[0]}, 字段={fields_str}, Key数量={len(values)}") + + # 执行查询 + start_time = time.time() + result = session.execute(query_sql) + execution_time = time.time() - start_time + + result_list = list(result) if result else [] + logger.info(f"查询完成: 执行时间={execution_time:.3f}秒, 返回记录数={len(result_list)}") + + return result_list + except Exception as e: + logger.error(f"查询执行失败: SQL={query_sql if 'query_sql' in locals() else 'N/A'}, 错误={str(e)}") + return [] + +def execute_sharding_query(session, shard_mapping, keys, fields, exclude_fields=None): + """ + 执行分表查询 + :param session: Cassandra会话 + :param shard_mapping: 分表映射 {table_name: [keys]} + :param keys: 主键字段名列表 + :param fields: 要查询的字段列表 + :param exclude_fields: 要排除的字段列表 + :return: (查询结果列表, 查询到的表列表, 查询失败的表列表) + """ + all_results = [] + queried_tables = [] + error_tables = [] + + logger.info(f"开始执行分表查询,涉及 {len(shard_mapping)} 张分表") + total_start_time = time.time() + + for table_name, table_keys in shard_mapping.items(): + try: + logger.info(f"查询分表 {table_name},包含 {len(table_keys)} 个key: {table_keys}") + # 为每个分表执行查询 + table_results = execute_query(session, table_name, keys, fields, table_keys, exclude_fields) + all_results.extend(table_results) + queried_tables.append(table_name) + logger.info(f"分表 {table_name} 查询成功,返回 {len(table_results)} 条记录") + except Exception as e: + logger.error(f"分表 {table_name} 查询失败: {e}") + error_tables.append(table_name) + + total_execution_time = time.time() - total_start_time + logger.info(f"分表查询总计完成: 执行时间={total_execution_time:.3f}秒, 成功表数={len(queried_tables)}, 失败表数={len(error_tables)}, 总记录数={len(all_results)}") + + return all_results, queried_tables, error_tables + +def execute_mixed_query(pro_session, test_session, pro_config, test_config, keys, fields_to_compare, values, exclude_fields, sharding_config): + """ + 执行混合查询(生产环境分表,测试环境可能单表或分表) + """ + results = { + 'pro_data': [], + 'test_data': [], + 'sharding_info': { + 'calculation_stats': {} + } + } + + # 处理生产环境查询 + if sharding_config.get('use_sharding_for_pro', False): + # 获取生产环境分表配置参数,优先使用专用参数,否则使用通用参数 + pro_interval = sharding_config.get('pro_interval_seconds') or sharding_config.get('interval_seconds', 604800) + pro_table_count = sharding_config.get('pro_table_count') or sharding_config.get('table_count', 14) + + # 记录生产环境分表配置信息 + logger.info(f"=== 生产环境分表配置 ===") + logger.info(f"启用分表查询: True") + logger.info(f"时间间隔: {pro_interval}秒 ({pro_interval//86400}天)") + logger.info(f"分表数量: {pro_table_count}张") + logger.info(f"基础表名: {pro_config['table']}") + + pro_calculator = ShardingCalculator( + interval_seconds=pro_interval, + table_count=pro_table_count + ) + pro_shard_mapping, pro_failed_keys, pro_calc_stats = pro_calculator.get_all_shard_tables_for_keys( + pro_config['table'], values + ) + + logger.info(f"生产环境分表映射结果: 涉及{len(pro_shard_mapping)}张分表, 失败Key数量: {len(pro_failed_keys)}") + + pro_data, pro_queried_tables, pro_error_tables = execute_sharding_query( + pro_session, pro_shard_mapping, keys, fields_to_compare, exclude_fields + ) + + results['pro_data'] = pro_data + results['sharding_info']['pro_shards'] = { + 'enabled': True, + 'interval_seconds': sharding_config.get('pro_interval_seconds', 604800), + 'table_count': sharding_config.get('pro_table_count', 14), + 'queried_tables': pro_queried_tables, + 'error_tables': pro_error_tables, + 'failed_keys': pro_failed_keys + } + results['sharding_info']['calculation_stats'].update(pro_calc_stats) + else: + # 生产环境单表查询 + logger.info(f"=== 生产环境单表配置 ===") + logger.info(f"启用分表查询: False") + logger.info(f"表名: {pro_config['table']}") + + pro_data = execute_query(pro_session, pro_config['table'], keys, fields_to_compare, values, exclude_fields) + results['pro_data'] = pro_data + results['sharding_info']['pro_shards'] = { + 'enabled': False, + 'queried_tables': [pro_config['table']] + } + + # 处理测试环境查询 + if sharding_config.get('use_sharding_for_test', False): + # 获取测试环境分表配置参数,优先使用专用参数,否则使用通用参数 + test_interval = sharding_config.get('test_interval_seconds') or sharding_config.get('interval_seconds', 604800) + test_table_count = sharding_config.get('test_table_count') or sharding_config.get('table_count', 14) + + # 记录测试环境分表配置信息 + logger.info(f"=== 测试环境分表配置 ===") + logger.info(f"启用分表查询: True") + logger.info(f"时间间隔: {test_interval}秒 ({test_interval//86400}天)") + logger.info(f"分表数量: {test_table_count}张") + logger.info(f"基础表名: {test_config['table']}") + + test_calculator = ShardingCalculator( + interval_seconds=test_interval, + table_count=test_table_count + ) + test_shard_mapping, test_failed_keys, test_calc_stats = test_calculator.get_all_shard_tables_for_keys( + test_config['table'], values + ) + + logger.info(f"测试环境分表映射结果: 涉及{len(test_shard_mapping)}张分表, 失败Key数量: {len(test_failed_keys)}") + + test_data, test_queried_tables, test_error_tables = execute_sharding_query( + test_session, test_shard_mapping, keys, fields_to_compare, exclude_fields + ) + + results['test_data'] = test_data + results['sharding_info']['test_shards'] = { + 'enabled': True, + 'interval_seconds': test_interval, + 'table_count': test_table_count, + 'queried_tables': test_queried_tables, + 'error_tables': test_error_tables, + 'failed_keys': test_failed_keys + } + + # 合并计算统计信息 + if not results['sharding_info']['calculation_stats']: + results['sharding_info']['calculation_stats'] = test_calc_stats + else: + # 测试环境单表查询 + logger.info(f"=== 测试环境单表配置 ===") + logger.info(f"启用分表查询: False") + logger.info(f"表名: {test_config['table']}") + + test_data = execute_query(test_session, test_config['table'], keys, fields_to_compare, values, exclude_fields) + results['test_data'] = test_data + results['sharding_info']['test_shards'] = { + 'enabled': False, + 'queried_tables': [test_config['table']] + } + + return results + +def compare_results(pro_data, test_data, keys, fields_to_compare, exclude_fields, values): + """比较查询结果,支持复合主键""" + differences = [] + field_diff_count = {} + identical_results = [] # 存储相同的结果 + + def match_composite_key(row, composite_value, keys): + """检查数据行是否匹配复合主键值""" + if len(keys) == 1: + # 单主键匹配 + return getattr(row, keys[0]) == composite_value + else: + # 复合主键匹配 + if isinstance(composite_value, str) and ',' in composite_value: + key_values = [v.strip() for v in composite_value.split(',')] + if len(key_values) == len(keys): + return all(str(getattr(row, key)) == key_val for key, key_val in zip(keys, key_values)) + # 如果不是复合值,只匹配第一个主键 + return getattr(row, keys[0]) == composite_value + + for value in values: + # 查找生产表和测试表中该主键值的相关数据 + rows_pro = [row for row in pro_data if match_composite_key(row, value, keys)] + rows_test = [row for row in test_data if match_composite_key(row, value, keys)] + + for row_pro in rows_pro: + # 在测试表中查找相同主键的行 + row_test = next( + (row for row in rows_test if all(getattr(row, key) == getattr(row_pro, key) for key in keys)), + None + ) + + if row_test: + # 确定要比较的列 + columns = fields_to_compare if fields_to_compare else row_pro._fields + columns = [col for col in columns if col not in exclude_fields] + + has_difference = False + row_differences = [] + identical_fields = {} + + for column in columns: + value_pro = getattr(row_pro, column) + value_test = getattr(row_test, column) + + # 使用智能比较函数 + if not compare_values(value_pro, value_test): + has_difference = True + # 格式化显示值 + formatted_pro_value = format_json_for_display(value_pro) + formatted_test_value = format_json_for_display(value_test) + + row_differences.append({ + 'key': {key: getattr(row_pro, key) for key in keys}, + 'field': column, + 'pro_value': formatted_pro_value, + 'test_value': formatted_test_value, + 'is_json': is_json_field(value_pro) or is_json_field(value_test), + 'is_array': is_json_array_field(value_pro) or is_json_array_field(value_test) + }) + + # 统计字段差异次数 + field_diff_count[column] = field_diff_count.get(column, 0) + 1 + else: + # 存储相同的字段值 + identical_fields[column] = format_json_for_display(value_pro) + + if has_difference: + differences.extend(row_differences) + else: + # 如果没有差异,存储到相同结果中 + identical_results.append({ + 'key': {key: getattr(row_pro, key) for key in keys}, + 'pro_fields': identical_fields, + 'test_fields': {col: format_json_for_display(getattr(row_test, col)) for col in columns} + }) + else: + # 在测试表中未找到对应行 + differences.append({ + 'key': {key: getattr(row_pro, key) for key in keys}, + 'message': '在测试表中未找到该行' + }) + + # 检查测试表中是否有生产表中不存在的行 + for row_test in rows_test: + row_pro = next( + (row for row in rows_pro if all(getattr(row, key) == getattr(row_test, key) for key in keys)), + None + ) + if not row_pro: + differences.append({ + 'key': {key: getattr(row_test, key) for key in keys}, + 'message': '在生产表中未找到该行' + }) + + return differences, field_diff_count, identical_results + +def generate_comparison_summary(total_keys, pro_count, test_count, differences, identical_results, field_diff_count): + """生成比较总结报告""" + # 计算基本统计 + different_records = len(set([list(diff['key'].values())[0] for diff in differences if 'field' in diff])) + identical_records = len(identical_results) + missing_in_test = len([diff for diff in differences if diff.get('message') == '在测试表中未找到该行']) + missing_in_pro = len([diff for diff in differences if diff.get('message') == '在生产表中未找到该行']) + + # 计算百分比 + def safe_percentage(part, total): + return round((part / total * 100), 2) if total > 0 else 0 + + identical_percentage = safe_percentage(identical_records, total_keys) + different_percentage = safe_percentage(different_records, total_keys) + + # 生成总结 + summary = { + 'overview': { + 'total_keys_queried': total_keys, + 'pro_records_found': pro_count, + 'test_records_found': test_count, + 'identical_records': identical_records, + 'different_records': different_records, + 'missing_in_test': missing_in_test, + 'missing_in_pro': missing_in_pro + }, + 'percentages': { + 'data_consistency': identical_percentage, + 'data_differences': different_percentage, + 'missing_rate': safe_percentage(missing_in_test + missing_in_pro, total_keys) + }, + 'field_analysis': { + 'total_fields_compared': len(field_diff_count) if field_diff_count else 0, + 'most_different_fields': sorted(field_diff_count.items(), key=lambda x: x[1], reverse=True)[:5] if field_diff_count else [] + }, + 'data_quality': { + 'completeness': safe_percentage(pro_count + test_count, total_keys * 2), + 'consistency_score': identical_percentage, + 'quality_level': get_quality_level(identical_percentage) + }, + 'recommendations': generate_recommendations(identical_percentage, missing_in_test, missing_in_pro, field_diff_count) + } + + return summary + +def get_quality_level(consistency_percentage): + """根据一致性百分比获取数据质量等级""" + if consistency_percentage >= 95: + return {'level': '优秀', 'color': 'success', 'description': '数据一致性非常高'} + elif consistency_percentage >= 90: + return {'level': '良好', 'color': 'info', 'description': '数据一致性较高'} + elif consistency_percentage >= 80: + return {'level': '一般', 'color': 'warning', 'description': '数据一致性中等,需要关注'} + else: + return {'level': '较差', 'color': 'danger', 'description': '数据一致性较低,需要重点处理'} + +def generate_recommendations(consistency_percentage, missing_in_test, missing_in_pro, field_diff_count): + """生成改进建议""" + recommendations = [] + + if consistency_percentage < 90: + recommendations.append('建议重点关注数据一致性问题,检查数据同步机制') + + if missing_in_test > 0: + recommendations.append(f'测试环境缺失 {missing_in_test} 条记录,建议检查数据迁移过程') + + if missing_in_pro > 0: + recommendations.append(f'生产环境缺失 {missing_in_pro} 条记录,建议检查数据完整性') + + if field_diff_count: + top_diff_field = max(field_diff_count.items(), key=lambda x: x[1]) + recommendations.append(f'字段 "{top_diff_field[0]}" 差异最多({top_diff_field[1]}次),建议优先处理') + + if not recommendations: + recommendations.append('数据质量良好,建议继续保持当前的数据管理流程') + + return recommendations + +@app.route('/') +def index(): + return render_template('index.html') + +@app.route('/test-config-load') +def test_config_load(): + """配置加载测试页面""" + return send_from_directory('.', 'test_config_load.html') + +@app.route('/db-compare') +def db_compare(): + return render_template('db_compare.html') + +@app.route('/api/sharding-query', methods=['POST']) +def sharding_query_compare(): + """分表查询比对API""" + try: + data = request.json + + # 开始新的查询批次 + batch_id = query_log_collector.start_new_batch('分表') + + logger.info("开始执行分表数据库比对查询") + + # 解析配置 + pro_config = data.get('pro_config', DEFAULT_CONFIG['pro_config']) + test_config = data.get('test_config', DEFAULT_CONFIG['test_config']) + + # 从query_config中获取keys等参数 + query_config = data.get('query_config', {}) + keys = query_config.get('keys', DEFAULT_CONFIG['keys']) + fields_to_compare = query_config.get('fields_to_compare', DEFAULT_CONFIG['fields_to_compare']) + exclude_fields = query_config.get('exclude_fields', DEFAULT_CONFIG['exclude_fields']) + + values = data.get('values', []) + sharding_config = data.get('sharding_config', {}) + + # 参数验证 + if not values: + logger.warning("分表查询失败:未提供查询key值") + return jsonify({'error': '请提供查询key值'}), 400 + + if not keys: + logger.warning("分表查询失败:未提供主键字段") + return jsonify({'error': '请提供主键字段'}), 400 + + # 添加详细的参数日志 + logger.info(f"分表查询参数解析结果:") + logger.info(f" keys: {keys}") + logger.info(f" values数量: {len(values)}") + logger.info(f" fields_to_compare: {fields_to_compare}") + logger.info(f" exclude_fields: {exclude_fields}") + logger.info(f" sharding_config原始数据: {sharding_config}") + logger.info(f" sharding_config具体参数:") + logger.info(f" use_sharding_for_pro: {sharding_config.get('use_sharding_for_pro')}") + logger.info(f" use_sharding_for_test: {sharding_config.get('use_sharding_for_test')}") + logger.info(f" pro_interval_seconds: {sharding_config.get('pro_interval_seconds')}") + logger.info(f" pro_table_count: {sharding_config.get('pro_table_count')}") + logger.info(f" test_interval_seconds: {sharding_config.get('test_interval_seconds')}") + logger.info(f" test_table_count: {sharding_config.get('test_table_count')}") + logger.info(f" interval_seconds: {sharding_config.get('interval_seconds')}") + logger.info(f" table_count: {sharding_config.get('table_count')}") + + logger.info(f"分表查询配置:{len(values)}个key值,生产表:{pro_config['table']},测试表:{test_config['table']}") + + # 创建数据库连接 + pro_cluster, pro_session = create_connection(pro_config) + test_cluster, test_session = create_connection(test_config) + + if not pro_session or not test_session: + logger.error("数据库连接失败") + return jsonify({'error': '数据库连接失败,请检查配置信息'}), 500 + + try: + # 执行混合查询(支持生产环境分表、测试环境单表/分表的组合) + logger.info("执行分表混合查询") + query_results = execute_mixed_query( + pro_session, test_session, pro_config, test_config, + keys, fields_to_compare, values, exclude_fields, sharding_config + ) + + pro_data = query_results['pro_data'] + test_data = query_results['test_data'] + sharding_info = query_results['sharding_info'] + + logger.info(f"分表查询结果:生产表 {len(pro_data)} 条记录,测试表 {len(test_data)} 条记录") + + # 比较结果 + differences, field_diff_count, identical_results = compare_results( + pro_data, test_data, keys, fields_to_compare, exclude_fields, values + ) + + # 统计信息 + different_ids = set() + for diff in differences: + if 'field' in diff: + different_ids.add(list(diff['key'].values())[0]) + + non_different_ids = set(values) - different_ids + + # 生成比较总结 + summary = generate_comparison_summary( + len(values), len(pro_data), len(test_data), + differences, identical_results, field_diff_count + ) + + result = { + 'total_keys': len(values), + 'pro_count': len(pro_data), + 'test_count': len(test_data), + 'differences': differences, + 'identical_results': identical_results, + 'field_diff_count': field_diff_count, + 'different_ids': list(different_ids), + 'non_different_ids': list(non_different_ids), + 'summary': summary, + 'sharding_info': sharding_info, # 包含分表查询信息 + 'raw_pro_data': [dict(row._asdict()) for row in pro_data] if pro_data else [], + 'raw_test_data': [dict(row._asdict()) for row in test_data] if test_data else [] + } + + logger.info(f"分表比对完成:发现 {len(differences)} 处差异") + + # 自动保存分表查询历史记录 + try: + # 生成历史记录名称 + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + history_name = f"分表查询_{timestamp}" + history_description = f"自动保存 - 分表查询{len(values)}个Key,发现{len(differences)}处差异" + + # 保存历史记录 + history_id = save_query_history( + name=history_name, + description=history_description, + pro_config=pro_config, + test_config=test_config, + query_config={ + 'keys': keys, + 'fields_to_compare': fields_to_compare, + 'exclude_fields': exclude_fields + }, + query_keys=values, + results_summary=summary, + execution_time=0.0, # 可以后续优化计算实际执行时间 + total_keys=len(values), + differences_count=len(differences), + identical_count=len(identical_results), + # 新增分表相关参数 + sharding_config=sharding_config, + query_type='sharding', + # 添加查询结果数据 + raw_results={ + 'raw_pro_data': [dict(row._asdict()) for row in pro_data] if pro_data else [], + 'raw_test_data': [dict(row._asdict()) for row in test_data] if test_data else [], + 'sharding_info': sharding_info # 包含分表信息 + }, + differences_data=differences, + identical_data=identical_results + ) + + # 关联查询日志与历史记录 + if history_id: + query_log_collector.set_history_id(history_id) + logger.info(f"分表查询历史记录保存成功: {history_name}, ID: {history_id}") + else: + logger.warning("分表查询历史记录保存失败,无法获取history_id") + except Exception as e: + logger.warning(f"保存分表查询历史记录失败: {e}") + + # 结束查询批次 + query_log_collector.end_current_batch() + return jsonify(result) + + except Exception as e: + logger.error(f"分表查询执行失败:{str(e)}") + # 结束查询批次(出错情况) + query_log_collector.end_current_batch() + return jsonify({'error': f'分表查询执行失败:{str(e)}'}), 500 + finally: + # 关闭连接 + if pro_cluster: + pro_cluster.shutdown() + if test_cluster: + test_cluster.shutdown() + + except Exception as e: + logger.error(f"分表查询请求处理失败:{str(e)}") + # 结束查询批次(请求处理出错) + query_log_collector.end_current_batch() + return jsonify({'error': f'分表查询请求处理失败:{str(e)}'}), 500 + +@app.route('/api/query', methods=['POST']) +def query_compare(): + try: + data = request.json + + # 开始新的查询批次 + batch_id = query_log_collector.start_new_batch('单表') + + logger.info("开始执行数据库比对查询") + + # 解析配置 + pro_config = data.get('pro_config', DEFAULT_CONFIG['pro_config']) + test_config = data.get('test_config', DEFAULT_CONFIG['test_config']) + + # 从query_config中获取keys等参数 + query_config = data.get('query_config', {}) + keys = query_config.get('keys', DEFAULT_CONFIG['keys']) + fields_to_compare = query_config.get('fields_to_compare', DEFAULT_CONFIG['fields_to_compare']) + exclude_fields = query_config.get('exclude_fields', DEFAULT_CONFIG['exclude_fields']) + + values = data.get('values', []) + + # 参数验证 + if not values: + logger.warning("查询失败:未提供查询key值") + return jsonify({'error': '请提供查询key值'}), 400 + + if not keys: + logger.warning("查询失败:未提供主键字段") + return jsonify({'error': '请提供主键字段'}), 400 + + # 添加详细的参数日志 + logger.info(f"单表查询参数解析结果:") + logger.info(f" keys: {keys}") + logger.info(f" values数量: {len(values)}") + logger.info(f" fields_to_compare: {fields_to_compare}") + logger.info(f" exclude_fields: {exclude_fields}") + + logger.info(f"查询配置:{len(values)}个key值,生产表:{pro_config['table']},测试表:{test_config['table']}") + + # 创建数据库连接 + pro_cluster, pro_session = create_connection(pro_config) + test_cluster, test_session = create_connection(test_config) + + if not pro_session or not test_session: + logger.error("数据库连接失败") + return jsonify({'error': '数据库连接失败,请检查配置信息'}), 500 + + try: + # 执行查询 + logger.info("执行生产环境查询") + pro_data = execute_query(pro_session, pro_config['table'], keys, fields_to_compare, values, exclude_fields) + logger.info("执行测试环境查询") + test_data = execute_query(test_session, test_config['table'], keys, fields_to_compare, values, exclude_fields) + + logger.info(f"查询结果:生产表 {len(pro_data)} 条记录,测试表 {len(test_data)} 条记录") + + # 比较结果 + differences, field_diff_count, identical_results = compare_results(pro_data, test_data, keys, fields_to_compare, exclude_fields, values) + + # 统计信息 + different_ids = set() + for diff in differences: + if 'field' in diff: + different_ids.add(list(diff['key'].values())[0]) + + non_different_ids = set(values) - different_ids + + # 生成比较总结 + summary = generate_comparison_summary( + len(values), len(pro_data), len(test_data), + differences, identical_results, field_diff_count + ) + + result = { + 'total_keys': len(values), + 'pro_count': len(pro_data), + 'test_count': len(test_data), + 'differences': differences, + 'identical_results': identical_results, + 'field_diff_count': field_diff_count, + 'different_ids': list(different_ids), + 'non_different_ids': list(non_different_ids), + 'summary': summary, + 'raw_pro_data': [dict(row._asdict()) for row in pro_data] if pro_data else [], + 'raw_test_data': [dict(row._asdict()) for row in test_data] if test_data else [] + } + + logger.info(f"比对完成:发现 {len(differences)} 处差异") + + # 自动保存查询历史记录(可选,基于执行结果) + try: + # 生成历史记录名称 + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + history_name = f"查询_{timestamp}" + history_description = f"自动保存 - 查询{len(values)}个Key,发现{len(differences)}处差异" + + # 保存历史记录 + history_id = save_query_history( + name=history_name, + description=history_description, + pro_config=pro_config, + test_config=test_config, + query_config={ + 'keys': keys, + 'fields_to_compare': fields_to_compare, + 'exclude_fields': exclude_fields + }, + query_keys=values, + results_summary=summary, + execution_time=0.0, # 可以后续优化计算实际执行时间 + total_keys=len(values), + differences_count=len(differences), + identical_count=len(identical_results), + # 添加查询结果数据 + raw_results={ + 'raw_pro_data': [dict(row._asdict()) for row in pro_data] if pro_data else [], + 'raw_test_data': [dict(row._asdict()) for row in test_data] if test_data else [] + }, + differences_data=differences, + identical_data=identical_results + ) + + # 关联查询日志与历史记录 + if history_id: + query_log_collector.set_history_id(history_id) + logger.info(f"查询历史记录保存成功: {history_name}, ID: {history_id}") + else: + logger.warning("查询历史记录保存失败,无法获取history_id") + except Exception as e: + logger.warning(f"保存查询历史记录失败: {e}") + + # 结束查询批次 + query_log_collector.end_current_batch() + return jsonify(result) + + except Exception as e: + logger.error(f"查询执行失败:{str(e)}") + # 结束查询批次(出错情况) + query_log_collector.end_current_batch() + return jsonify({'error': f'查询执行失败:{str(e)}'}), 500 + finally: + # 关闭连接 + if pro_cluster: + pro_cluster.shutdown() + if test_cluster: + test_cluster.shutdown() + + except Exception as e: + logger.error(f"请求处理失败:{str(e)}") + # 结束查询批次(请求处理出错) + query_log_collector.end_current_batch() + return jsonify({'error': f'请求处理失败:{str(e)}'}), 500 + +@app.route('/api/default-config') +def get_default_config(): + return jsonify(DEFAULT_CONFIG) + +# 配置组管理API +@app.route('/api/config-groups', methods=['GET']) +def api_get_config_groups(): + """获取所有配置组""" + config_groups = get_config_groups() + return jsonify({'success': True, 'data': config_groups}) + +@app.route('/api/config-groups', methods=['POST']) +def api_save_config_group(): + """保存配置组""" + try: + data = request.json + name = data.get('name', '').strip() + description = data.get('description', '').strip() + pro_config = data.get('pro_config', {}) + test_config = data.get('test_config', {}) + + # 获取查询配置,支持两种格式 + if 'query_config' in data: + # 嵌套格式 + query_config = data.get('query_config', {}) + else: + # 平铺格式 + query_config = { + 'keys': data.get('keys', []), + 'fields_to_compare': data.get('fields_to_compare', []), + 'exclude_fields': data.get('exclude_fields', []) + } + + # 提取分表配置 + sharding_config = data.get('sharding_config') + + if not name: + return jsonify({'success': False, 'error': '配置组名称不能为空'}), 400 + + success = save_config_group(name, description, pro_config, test_config, query_config, sharding_config) + + if success: + return jsonify({'success': True, 'message': '配置组保存成功'}) + else: + return jsonify({'success': False, 'error': '配置组保存失败'}), 500 + + except Exception as e: + logger.error(f"保存配置组API失败: {e}") + return jsonify({'success': False, 'error': str(e)}), 500 + +@app.route('/api/config-groups/', methods=['GET']) +def api_get_config_group(group_id): + """获取指定配置组详情""" + config_group = get_config_group_by_id(group_id) + + if config_group: + return jsonify({'success': True, 'data': config_group}) + else: + return jsonify({'success': False, 'error': '配置组不存在'}), 404 + +@app.route('/api/config-groups/', methods=['DELETE']) +def api_delete_config_group(group_id): + """删除配置组""" + success = delete_config_group(group_id) + + if success: + return jsonify({'success': True, 'message': '配置组删除成功'}) + else: + return jsonify({'success': False, 'error': '配置组删除失败'}), 500 + +@app.route('/api/init-db', methods=['POST']) +def api_init_database(): + """手动初始化数据库(用于测试)""" + success = init_database() + if success: + return jsonify({'success': True, 'message': '数据库初始化成功'}) + else: + return jsonify({'success': False, 'error': '数据库初始化失败'}), 500 + +# 查询历史管理API +@app.route('/api/query-history', methods=['GET']) +def api_get_query_history(): + """获取所有查询历史记录""" + history_list = get_query_history() + return jsonify({'success': True, 'data': history_list}) + +@app.route('/api/query-history', methods=['POST']) +def api_save_query_history(): + """保存查询历史记录,支持分表查询""" + try: + data = request.json + name = data.get('name', '').strip() + description = data.get('description', '').strip() + pro_config = data.get('pro_config', {}) + test_config = data.get('test_config', {}) + query_config = data.get('query_config', {}) + query_keys = data.get('query_keys', []) + results_summary = data.get('results_summary', {}) + execution_time = data.get('execution_time', 0.0) + total_keys = data.get('total_keys', 0) + differences_count = data.get('differences_count', 0) + identical_count = data.get('identical_count', 0) + # 新增分表相关字段 + sharding_config = data.get('sharding_config') + query_type = data.get('query_type', 'single') + + if not name: + return jsonify({'success': False, 'error': '历史记录名称不能为空'}), 400 + + success = save_query_history( + name, description, pro_config, test_config, query_config, + query_keys, results_summary, execution_time, total_keys, + differences_count, identical_count, sharding_config, query_type + ) + + if success: + query_type_desc = '分表查询' if query_type == 'sharding' else '单表查询' + return jsonify({'success': True, 'message': f'{query_type_desc}历史记录保存成功'}) + else: + return jsonify({'success': False, 'error': '查询历史记录保存失败'}), 500 + + except Exception as e: + logger.error(f"保存查询历史记录API失败: {e}") + return jsonify({'success': False, 'error': str(e)}), 500 + +@app.route('/api/query-history/', methods=['GET']) +def api_get_query_history_detail(history_id): + """获取指定查询历史记录详情""" + history_record = get_query_history_by_id(history_id) + + if history_record: + return jsonify({'success': True, 'data': history_record}) + else: + return jsonify({'success': False, 'error': '查询历史记录不存在'}), 404 + +@app.route('/api/query-history//results', methods=['GET']) +def api_get_query_history_results(history_id): + """获取查询历史记录的完整结果数据""" + try: + history_record = get_query_history_by_id(history_id) + if not history_record: + return jsonify({'success': False, 'error': '历史记录不存在'}), 404 + + # 安全获取raw_results数据 + raw_results = history_record.get('raw_results') + if raw_results and isinstance(raw_results, dict): + raw_pro_data = raw_results.get('raw_pro_data', []) or [] + raw_test_data = raw_results.get('raw_test_data', []) or [] + sharding_info = raw_results.get('sharding_info') if history_record.get('query_type') == 'sharding' else None + else: + raw_pro_data = [] + raw_test_data = [] + sharding_info = None + + # 安全获取差异和相同结果数据 + differences_data = history_record.get('differences_data') or [] + identical_data = history_record.get('identical_data') or [] + + # 构建完整的查询结果格式,与API查询结果保持一致 + result = { + 'total_keys': history_record['total_keys'], + 'pro_count': len(raw_pro_data), + 'test_count': len(raw_test_data), + 'differences': differences_data, + 'identical_results': identical_data, + 'field_diff_count': {}, # 可以从differences_data中重新计算 + 'summary': history_record.get('results_summary', {}), + 'raw_pro_data': raw_pro_data, + 'raw_test_data': raw_test_data, + # 如果是分表查询,添加分表信息 + 'sharding_info': sharding_info, + # 添加历史记录元信息 + 'history_info': { + 'id': history_record['id'], + 'name': history_record['name'], + 'description': history_record['description'], + 'created_at': history_record['created_at'], + 'query_type': history_record.get('query_type', 'single') + } + } + + # 重新计算field_diff_count + if differences_data: + field_diff_count = {} + for diff in differences_data: + if isinstance(diff, dict) and 'field' in diff: + field_name = diff['field'] + field_diff_count[field_name] = field_diff_count.get(field_name, 0) + 1 + result['field_diff_count'] = field_diff_count + + return jsonify({ + 'success': True, + 'data': result, + 'message': f'历史记录 "{history_record["name"]}" 结果加载成功' + }) + + except Exception as e: + logger.error(f"获取查询历史记录结果失败: {e}") + return jsonify({'success': False, 'error': f'获取历史记录结果失败: {str(e)}'}), 500 + +@app.route('/api/query-history/', methods=['DELETE']) +def api_delete_query_history(history_id): + """删除查询历史记录""" + success = delete_query_history(history_id) + + if success: + return jsonify({'success': True, 'message': '查询历史记录删除成功'}) + else: + return jsonify({'success': False, 'error': '查询历史记录删除失败'}), 500 + +@app.route('/api/query-logs', methods=['GET']) +def api_get_query_logs(): + """获取查询日志,支持分组显示和数据库存储""" + try: + limit = request.args.get('limit', type=int) + grouped = request.args.get('grouped', 'true').lower() == 'true' # 默认分组显示 + from_db = request.args.get('from_db', 'true').lower() == 'true' # 默认从数据库获取 + + if grouped: + # 返回分组日志 + grouped_logs = query_log_collector.get_logs_grouped_by_batch(limit, from_db) + # 获取总数(用于统计) + total_logs = query_log_collector._get_total_logs_count() if from_db else len(query_log_collector.logs) + + return jsonify({ + 'success': True, + 'data': grouped_logs, + 'total': total_logs, + 'grouped': True, + 'from_db': from_db + }) + else: + # 返回原始日志列表 + logs = query_log_collector.get_logs(limit, from_db) + total_logs = query_log_collector._get_total_logs_count() if from_db else len(query_log_collector.logs) + + return jsonify({ + 'success': True, + 'data': logs, + 'total': total_logs, + 'grouped': False, + 'from_db': from_db + }) + except Exception as e: + logger.error(f"获取查询日志失败: {e}") + return jsonify({'success': False, 'error': str(e)}), 500 + +@app.route('/api/query-logs', methods=['DELETE']) +def api_clear_query_logs(): + """清空查询日志,支持清空数据库日志""" + try: + clear_db = request.args.get('clear_db', 'true').lower() == 'true' # 默认清空数据库 + query_log_collector.clear_logs(clear_db) + + message = '查询日志已清空(包括数据库)' if clear_db else '查询日志已清空(仅内存)' + return jsonify({'success': True, 'message': message}) + except Exception as e: + logger.error(f"清空查询日志失败: {e}") + return jsonify({'success': False, 'error': str(e)}), 500 + +@app.route('/api/query-logs/cleanup', methods=['POST']) +def api_cleanup_old_logs(): + """清理旧的查询日志""" + try: + days_to_keep = request.json.get('days_to_keep', 30) if request.json else 30 + deleted_count = query_log_collector.cleanup_old_logs(days_to_keep) + + return jsonify({ + 'success': True, + 'message': f'成功清理 {deleted_count} 条超过 {days_to_keep} 天的旧日志', + 'deleted_count': deleted_count + }) + except Exception as e: + logger.error(f"清理旧日志失败: {e}") + return jsonify({'success': False, 'error': str(e)}), 500 + +@app.route('/api/query-logs/history/', methods=['GET']) +def api_get_query_logs_by_history(history_id): + """根据历史记录ID获取相关查询日志""" + try: + logs = query_log_collector.get_logs_by_history_id(history_id) + + # 按批次分组显示 + grouped_logs = {} + batch_order = [] + + for log in logs: + batch_id = log.get('batch_id', 'unknown') + if batch_id not in grouped_logs: + grouped_logs[batch_id] = [] + batch_order.append(batch_id) + grouped_logs[batch_id].append(log) + + # 返回按时间顺序排列的批次 + grouped_result = [(batch_id, grouped_logs[batch_id]) for batch_id in batch_order] + + return jsonify({ + 'success': True, + 'data': grouped_result, + 'total': len(logs), + 'history_id': history_id, + 'grouped': True + }) + except Exception as e: + logger.error(f"获取历史记录相关查询日志失败: {e}") + return jsonify({'success': False, 'error': str(e)}), 500 + +if __name__ == '__main__': + app.run(debug=True, port=5001) diff --git a/modules/__init__.py b/modules/__init__.py new file mode 100644 index 0000000..3ee3cc3 --- /dev/null +++ b/modules/__init__.py @@ -0,0 +1,17 @@ +""" +BigDataTool Modules + +This directory contains all functional modules for the BigDataTool application. + +Module List: +- database.py - Database management +- query_logger.py - Query logging management +- sharding.py - Sharding calculations +- cassandra_client.py - Cassandra connections +- query_engine.py - Data query engine +- data_comparison.py - Data comparison algorithms +- config_manager.py - Configuration management +- api_routes.py - API route definitions + +Each module has clear responsibility boundaries and standardized interfaces. +""" \ No newline at end of file diff --git a/modules/api_routes.py b/modules/api_routes.py new file mode 100644 index 0000000..5830185 --- /dev/null +++ b/modules/api_routes.py @@ -0,0 +1,1020 @@ +""" +API路由模块 +定义所有Flask路由和请求处理逻辑 +""" + +import logging +from datetime import datetime +from flask import jsonify, request, render_template, send_from_directory + +# 导入自定义模块 +from .config_manager import ( + DEFAULT_CONFIG, save_config_group, get_config_groups, + get_config_group_by_id, delete_config_group, + save_query_history, get_query_history, + get_query_history_by_id, delete_query_history, + # Redis配置管理 + REDIS_DEFAULT_CONFIG, save_redis_config_group, get_redis_config_groups, + get_redis_config_group_by_id, delete_redis_config_group, + save_redis_query_history, get_redis_query_history, + get_redis_query_history_by_id, delete_redis_query_history, + parse_redis_config_from_yaml +) +from .cassandra_client import create_connection +from .query_engine import execute_query, execute_mixed_query +from .data_comparison import compare_results, generate_comparison_summary +from .database import init_database +# Redis相关模块 +from .redis_client import create_redis_client, test_redis_connection +from .redis_query import execute_redis_comparison + +logger = logging.getLogger(__name__) + +def setup_routes(app, query_log_collector): + """设置所有路由,需要传入app实例和query_log_collector""" + + # 页面路由 + @app.route('/') + def index(): + return render_template('index.html') + + @app.route('/test-config-load') + def test_config_load(): + """配置加载测试页面""" + return send_from_directory('.', 'test_config_load.html') + + @app.route('/db-compare') + def db_compare(): + return render_template('db_compare.html') + + @app.route('/redis-compare') + def redis_compare(): + return render_template('redis_compare.html') + + # 基础API + @app.route('/api/default-config') + def get_default_config(): + return jsonify(DEFAULT_CONFIG) + + @app.route('/api/init-db', methods=['POST']) + def api_init_database(): + """手动初始化数据库(用于测试)""" + success = init_database() + if success: + return jsonify({'success': True, 'message': '数据库初始化成功'}) + else: + return jsonify({'success': False, 'error': '数据库初始化失败'}), 500 + + # 分表查询API + @app.route('/api/sharding-query', methods=['POST']) + def sharding_query_compare(): + """分表查询比对API""" + try: + data = request.json + + # 开始新的查询批次 + batch_id = query_log_collector.start_new_batch('分表') + + logger.info("开始执行分表数据库比对查询") + + # 解析配置 + pro_config = data.get('pro_config', DEFAULT_CONFIG['pro_config']) + test_config = data.get('test_config', DEFAULT_CONFIG['test_config']) + + # 从query_config中获取keys等参数 + query_config = data.get('query_config', {}) + keys = query_config.get('keys', DEFAULT_CONFIG['keys']) + fields_to_compare = query_config.get('fields_to_compare', DEFAULT_CONFIG['fields_to_compare']) + exclude_fields = query_config.get('exclude_fields', DEFAULT_CONFIG['exclude_fields']) + + values = data.get('values', []) + sharding_config = data.get('sharding_config', {}) + + # 参数验证 + if not values: + logger.warning("分表查询失败:未提供查询key值") + return jsonify({'error': '请提供查询key值'}), 400 + + if not keys: + logger.warning("分表查询失败:未提供主键字段") + return jsonify({'error': '请提供主键字段'}), 400 + + # 添加详细的参数日志 + logger.info(f"分表查询参数解析结果:") + logger.info(f" keys: {keys}") + logger.info(f" values数量: {len(values)}") + logger.info(f" fields_to_compare: {fields_to_compare}") + logger.info(f" exclude_fields: {exclude_fields}") + logger.info(f" sharding_config原始数据: {sharding_config}") + logger.info(f" sharding_config具体参数:") + logger.info(f" use_sharding_for_pro: {sharding_config.get('use_sharding_for_pro')}") + logger.info(f" use_sharding_for_test: {sharding_config.get('use_sharding_for_test')}") + logger.info(f" pro_interval_seconds: {sharding_config.get('pro_interval_seconds')}") + logger.info(f" pro_table_count: {sharding_config.get('pro_table_count')}") + logger.info(f" test_interval_seconds: {sharding_config.get('test_interval_seconds')}") + logger.info(f" test_table_count: {sharding_config.get('test_table_count')}") + logger.info(f" interval_seconds: {sharding_config.get('interval_seconds')}") + logger.info(f" table_count: {sharding_config.get('table_count')}") + + logger.info(f"分表查询配置:{len(values)}个key值,生产表:{pro_config['table']},测试表:{test_config['table']}") + + # 创建数据库连接 + pro_cluster, pro_session = create_connection(pro_config) + test_cluster, test_session = create_connection(test_config) + + if not pro_session or not test_session: + logger.error("数据库连接失败") + return jsonify({'error': '数据库连接失败,请检查配置信息'}), 500 + + try: + # 执行混合查询(支持生产环境分表、测试环境单表/分表的组合) + logger.info("执行分表混合查询") + query_results = execute_mixed_query( + pro_session, test_session, pro_config, test_config, + keys, fields_to_compare, values, exclude_fields, sharding_config + ) + + pro_data = query_results['pro_data'] + test_data = query_results['test_data'] + sharding_info = query_results['sharding_info'] + + logger.info(f"分表查询结果:生产表 {len(pro_data)} 条记录,测试表 {len(test_data)} 条记录") + + # 比较结果 + differences, field_diff_count, identical_results = compare_results( + pro_data, test_data, keys, fields_to_compare, exclude_fields, values + ) + + # 统计信息 + different_ids = set() + for diff in differences: + if 'field' in diff: + different_ids.add(list(diff['key'].values())[0]) + + non_different_ids = set(values) - different_ids + + # 生成比较总结 + summary = generate_comparison_summary( + len(values), len(pro_data), len(test_data), + differences, identical_results, field_diff_count + ) + + result = { + 'total_keys': len(values), + 'pro_count': len(pro_data), + 'test_count': len(test_data), + 'differences': differences, + 'identical_results': identical_results, + 'field_diff_count': field_diff_count, + 'different_ids': list(different_ids), + 'non_different_ids': list(non_different_ids), + 'summary': summary, + 'sharding_info': sharding_info, # 包含分表查询信息 + 'raw_pro_data': [dict(row._asdict()) for row in pro_data] if pro_data else [], + 'raw_test_data': [dict(row._asdict()) for row in test_data] if test_data else [] + } + + logger.info(f"分表比对完成:发现 {len(differences)} 处差异") + + # 自动保存分表查询历史记录 + try: + # 生成历史记录名称 + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + history_name = f"分表查询_{timestamp}" + history_description = f"自动保存 - 分表查询{len(values)}个Key,发现{len(differences)}处差异" + + # 保存历史记录 + history_id = save_query_history( + name=history_name, + description=history_description, + pro_config=pro_config, + test_config=test_config, + query_config={ + 'keys': keys, + 'fields_to_compare': fields_to_compare, + 'exclude_fields': exclude_fields + }, + query_keys=values, + results_summary=summary, + execution_time=0.0, # 可以后续优化计算实际执行时间 + total_keys=len(values), + differences_count=len(differences), + identical_count=len(identical_results), + # 新增分表相关参数 + sharding_config=sharding_config, + query_type='sharding', + # 添加查询结果数据 + raw_results={ + 'raw_pro_data': [dict(row._asdict()) for row in pro_data] if pro_data else [], + 'raw_test_data': [dict(row._asdict()) for row in test_data] if test_data else [], + 'sharding_info': sharding_info # 包含分表信息 + }, + differences_data=differences, + identical_data=identical_results + ) + + # 关联查询日志与历史记录 + if history_id: + query_log_collector.set_history_id(history_id) + logger.info(f"分表查询历史记录保存成功: {history_name}, ID: {history_id}") + else: + logger.warning("分表查询历史记录保存失败,无法获取history_id") + except Exception as e: + logger.warning(f"保存分表查询历史记录失败: {e}") + + # 结束查询批次 + query_log_collector.end_current_batch() + return jsonify(result) + + except Exception as e: + logger.error(f"分表查询执行失败:{str(e)}") + # 结束查询批次(出错情况) + query_log_collector.end_current_batch() + return jsonify({'error': f'分表查询执行失败:{str(e)}'}), 500 + finally: + # 关闭连接 + if pro_cluster: + pro_cluster.shutdown() + if test_cluster: + test_cluster.shutdown() + + except Exception as e: + logger.error(f"分表查询请求处理失败:{str(e)}") + # 结束查询批次(请求处理出错) + query_log_collector.end_current_batch() + return jsonify({'error': f'分表查询请求处理失败:{str(e)}'}), 500 + + # 单表查询API + @app.route('/api/query', methods=['POST']) + def query_compare(): + try: + data = request.json + + # 开始新的查询批次 + batch_id = query_log_collector.start_new_batch('单表') + + logger.info("开始执行数据库比对查询") + + # 解析配置 + pro_config = data.get('pro_config', DEFAULT_CONFIG['pro_config']) + test_config = data.get('test_config', DEFAULT_CONFIG['test_config']) + + # 从query_config中获取keys等参数 + query_config = data.get('query_config', {}) + keys = query_config.get('keys', DEFAULT_CONFIG['keys']) + fields_to_compare = query_config.get('fields_to_compare', DEFAULT_CONFIG['fields_to_compare']) + exclude_fields = query_config.get('exclude_fields', DEFAULT_CONFIG['exclude_fields']) + + values = data.get('values', []) + + # 参数验证 + if not values: + logger.warning("查询失败:未提供查询key值") + return jsonify({'error': '请提供查询key值'}), 400 + + if not keys: + logger.warning("查询失败:未提供主键字段") + return jsonify({'error': '请提供主键字段'}), 400 + + # 添加详细的参数日志 + logger.info(f"单表查询参数解析结果:") + logger.info(f" keys: {keys}") + logger.info(f" values数量: {len(values)}") + logger.info(f" fields_to_compare: {fields_to_compare}") + logger.info(f" exclude_fields: {exclude_fields}") + + logger.info(f"查询配置:{len(values)}个key值,生产表:{pro_config['table']},测试表:{test_config['table']}") + + # 创建数据库连接 + pro_cluster, pro_session = create_connection(pro_config) + test_cluster, test_session = create_connection(test_config) + + if not pro_session or not test_session: + logger.error("数据库连接失败") + return jsonify({'error': '数据库连接失败,请检查配置信息'}), 500 + + try: + # 执行查询 + logger.info("执行生产环境查询") + pro_data = execute_query(pro_session, pro_config['table'], keys, fields_to_compare, values, exclude_fields) + logger.info("执行测试环境查询") + test_data = execute_query(test_session, test_config['table'], keys, fields_to_compare, values, exclude_fields) + + logger.info(f"查询结果:生产表 {len(pro_data)} 条记录,测试表 {len(test_data)} 条记录") + + # 比较结果 + differences, field_diff_count, identical_results = compare_results(pro_data, test_data, keys, fields_to_compare, exclude_fields, values) + + # 统计信息 + different_ids = set() + for diff in differences: + if 'field' in diff: + different_ids.add(list(diff['key'].values())[0]) + + non_different_ids = set(values) - different_ids + + # 生成比较总结 + summary = generate_comparison_summary( + len(values), len(pro_data), len(test_data), + differences, identical_results, field_diff_count + ) + + result = { + 'total_keys': len(values), + 'pro_count': len(pro_data), + 'test_count': len(test_data), + 'differences': differences, + 'identical_results': identical_results, + 'field_diff_count': field_diff_count, + 'different_ids': list(different_ids), + 'non_different_ids': list(non_different_ids), + 'summary': summary, + 'raw_pro_data': [dict(row._asdict()) for row in pro_data] if pro_data else [], + 'raw_test_data': [dict(row._asdict()) for row in test_data] if test_data else [] + } + + logger.info(f"比对完成:发现 {len(differences)} 处差异") + + # 自动保存查询历史记录(可选,基于执行结果) + try: + # 生成历史记录名称 + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + history_name = f"查询_{timestamp}" + history_description = f"自动保存 - 查询{len(values)}个Key,发现{len(differences)}处差异" + + # 保存历史记录 + history_id = save_query_history( + name=history_name, + description=history_description, + pro_config=pro_config, + test_config=test_config, + query_config={ + 'keys': keys, + 'fields_to_compare': fields_to_compare, + 'exclude_fields': exclude_fields + }, + query_keys=values, + results_summary=summary, + execution_time=0.0, # 可以后续优化计算实际执行时间 + total_keys=len(values), + differences_count=len(differences), + identical_count=len(identical_results), + # 添加查询结果数据 + raw_results={ + 'raw_pro_data': [dict(row._asdict()) for row in pro_data] if pro_data else [], + 'raw_test_data': [dict(row._asdict()) for row in test_data] if test_data else [] + }, + differences_data=differences, + identical_data=identical_results + ) + + # 关联查询日志与历史记录 + if history_id: + query_log_collector.set_history_id(history_id) + logger.info(f"查询历史记录保存成功: {history_name}, ID: {history_id}") + else: + logger.warning("查询历史记录保存失败,无法获取history_id") + except Exception as e: + logger.warning(f"保存查询历史记录失败: {e}") + + # 结束查询批次 + query_log_collector.end_current_batch() + return jsonify(result) + + except Exception as e: + logger.error(f"查询执行失败:{str(e)}") + # 结束查询批次(出错情况) + query_log_collector.end_current_batch() + return jsonify({'error': f'查询执行失败:{str(e)}'}), 500 + finally: + # 关闭连接 + if pro_cluster: + pro_cluster.shutdown() + if test_cluster: + test_cluster.shutdown() + + except Exception as e: + logger.error(f"请求处理失败:{str(e)}") + # 结束查询批次(请求处理出错) + query_log_collector.end_current_batch() + return jsonify({'error': f'请求处理失败:{str(e)}'}), 500 + + # 配置组管理API + @app.route('/api/config-groups', methods=['GET']) + def api_get_config_groups(): + """获取所有配置组""" + config_groups = get_config_groups() + return jsonify({'success': True, 'data': config_groups}) + + @app.route('/api/config-groups', methods=['POST']) + def api_save_config_group(): + """保存配置组""" + try: + data = request.json + name = data.get('name', '').strip() + description = data.get('description', '').strip() + pro_config = data.get('pro_config', {}) + test_config = data.get('test_config', {}) + + # 获取查询配置,支持两种格式 + if 'query_config' in data: + # 嵌套格式 + query_config = data.get('query_config', {}) + else: + # 平铺格式 + query_config = { + 'keys': data.get('keys', []), + 'fields_to_compare': data.get('fields_to_compare', []), + 'exclude_fields': data.get('exclude_fields', []) + } + + # 提取分表配置 + sharding_config = data.get('sharding_config') + + if not name: + return jsonify({'success': False, 'error': '配置组名称不能为空'}), 400 + + success = save_config_group(name, description, pro_config, test_config, query_config, sharding_config) + + if success: + return jsonify({'success': True, 'message': '配置组保存成功'}) + else: + return jsonify({'success': False, 'error': '配置组保存失败'}), 500 + + except Exception as e: + logger.error(f"保存配置组API失败: {e}") + return jsonify({'success': False, 'error': str(e)}), 500 + + @app.route('/api/config-groups/', methods=['GET']) + def api_get_config_group(group_id): + """获取指定配置组详情""" + config_group = get_config_group_by_id(group_id) + + if config_group: + return jsonify({'success': True, 'data': config_group}) + else: + return jsonify({'success': False, 'error': '配置组不存在'}), 404 + + @app.route('/api/config-groups/', methods=['DELETE']) + def api_delete_config_group(group_id): + """删除配置组""" + success = delete_config_group(group_id) + + if success: + return jsonify({'success': True, 'message': '配置组删除成功'}) + else: + return jsonify({'success': False, 'error': '配置组删除失败'}), 500 + + # 查询历史管理API + @app.route('/api/query-history', methods=['GET']) + def api_get_query_history(): + """获取所有查询历史记录""" + history_list = get_query_history() + return jsonify({'success': True, 'data': history_list}) + + @app.route('/api/query-history', methods=['POST']) + def api_save_query_history(): + """保存查询历史记录,支持分表查询""" + try: + data = request.json + name = data.get('name', '').strip() + description = data.get('description', '').strip() + pro_config = data.get('pro_config', {}) + test_config = data.get('test_config', {}) + query_config = data.get('query_config', {}) + query_keys = data.get('query_keys', []) + results_summary = data.get('results_summary', {}) + execution_time = data.get('execution_time', 0.0) + total_keys = data.get('total_keys', 0) + differences_count = data.get('differences_count', 0) + identical_count = data.get('identical_count', 0) + # 新增分表相关字段 + sharding_config = data.get('sharding_config') + query_type = data.get('query_type', 'single') + + if not name: + return jsonify({'success': False, 'error': '历史记录名称不能为空'}), 400 + + success = save_query_history( + name, description, pro_config, test_config, query_config, + query_keys, results_summary, execution_time, total_keys, + differences_count, identical_count, sharding_config, query_type + ) + + if success: + query_type_desc = '分表查询' if query_type == 'sharding' else '单表查询' + return jsonify({'success': True, 'message': f'{query_type_desc}历史记录保存成功'}) + else: + return jsonify({'success': False, 'error': '查询历史记录保存失败'}), 500 + + except Exception as e: + logger.error(f"保存查询历史记录API失败: {e}") + return jsonify({'success': False, 'error': str(e)}), 500 + + @app.route('/api/query-history/', methods=['GET']) + def api_get_query_history_detail(history_id): + """获取指定查询历史记录详情""" + history_record = get_query_history_by_id(history_id) + + if history_record: + return jsonify({'success': True, 'data': history_record}) + else: + return jsonify({'success': False, 'error': '查询历史记录不存在'}), 404 + + @app.route('/api/query-history//results', methods=['GET']) + def api_get_query_history_results(history_id): + """获取查询历史记录的完整结果数据""" + try: + history_record = get_query_history_by_id(history_id) + if not history_record: + return jsonify({'success': False, 'error': '历史记录不存在'}), 404 + + # 安全获取raw_results数据 + raw_results = history_record.get('raw_results') + if raw_results and isinstance(raw_results, dict): + raw_pro_data = raw_results.get('raw_pro_data', []) or [] + raw_test_data = raw_results.get('raw_test_data', []) or [] + sharding_info = raw_results.get('sharding_info') if history_record.get('query_type') == 'sharding' else None + else: + raw_pro_data = [] + raw_test_data = [] + sharding_info = None + + # 安全获取差异和相同结果数据 + differences_data = history_record.get('differences_data') or [] + identical_data = history_record.get('identical_data') or [] + + # 构建完整的查询结果格式,与API查询结果保持一致 + result = { + 'total_keys': history_record['total_keys'], + 'pro_count': len(raw_pro_data), + 'test_count': len(raw_test_data), + 'differences': differences_data, + 'identical_results': identical_data, + 'field_diff_count': {}, # 可以从differences_data中重新计算 + 'summary': history_record.get('results_summary', {}), + 'raw_pro_data': raw_pro_data, + 'raw_test_data': raw_test_data, + # 如果是分表查询,添加分表信息 + 'sharding_info': sharding_info, + # 添加历史记录元信息 + 'history_info': { + 'id': history_record['id'], + 'name': history_record['name'], + 'description': history_record['description'], + 'created_at': history_record['created_at'], + 'query_type': history_record.get('query_type', 'single') + } + } + + # 重新计算field_diff_count + if differences_data: + field_diff_count = {} + for diff in differences_data: + if isinstance(diff, dict) and 'field' in diff: + field_name = diff['field'] + field_diff_count[field_name] = field_diff_count.get(field_name, 0) + 1 + result['field_diff_count'] = field_diff_count + + return jsonify({ + 'success': True, + 'data': result, + 'message': f'历史记录 "{history_record["name"]}" 结果加载成功' + }) + + except Exception as e: + logger.error(f"获取查询历史记录结果失败: {e}") + return jsonify({'success': False, 'error': f'获取历史记录结果失败: {str(e)}'}), 500 + + @app.route('/api/query-history/', methods=['DELETE']) + def api_delete_query_history(history_id): + """删除查询历史记录""" + success = delete_query_history(history_id) + + if success: + return jsonify({'success': True, 'message': '查询历史记录删除成功'}) + else: + return jsonify({'success': False, 'error': '查询历史记录删除失败'}), 500 + + # 查询日志管理API + @app.route('/api/query-logs', methods=['GET']) + def api_get_query_logs(): + """获取查询日志,支持分组显示和数据库存储""" + try: + limit = request.args.get('limit', type=int) + grouped = request.args.get('grouped', 'true').lower() == 'true' # 默认分组显示 + from_db = request.args.get('from_db', 'true').lower() == 'true' # 默认从数据库获取 + + if grouped: + # 返回分组日志 + grouped_logs = query_log_collector.get_logs_grouped_by_batch(limit, from_db) + # 获取总数(用于统计) + total_logs = query_log_collector._get_total_logs_count() if from_db else len(query_log_collector.logs) + + return jsonify({ + 'success': True, + 'data': grouped_logs, + 'total': total_logs, + 'grouped': True, + 'from_db': from_db + }) + else: + # 返回原始日志列表 + logs = query_log_collector.get_logs(limit, from_db) + total_logs = query_log_collector._get_total_logs_count() if from_db else len(query_log_collector.logs) + + return jsonify({ + 'success': True, + 'data': logs, + 'total': total_logs, + 'grouped': False, + 'from_db': from_db + }) + except Exception as e: + logger.error(f"获取查询日志失败: {e}") + return jsonify({'success': False, 'error': str(e)}), 500 + + @app.route('/api/query-logs', methods=['DELETE']) + def api_clear_query_logs(): + """清空查询日志,支持清空数据库日志""" + try: + clear_db = request.args.get('clear_db', 'true').lower() == 'true' # 默认清空数据库 + query_log_collector.clear_logs(clear_db) + + message = '查询日志已清空(包括数据库)' if clear_db else '查询日志已清空(仅内存)' + return jsonify({'success': True, 'message': message}) + except Exception as e: + logger.error(f"清空查询日志失败: {e}") + return jsonify({'success': False, 'error': str(e)}), 500 + + @app.route('/api/query-logs/cleanup', methods=['POST']) + def api_cleanup_old_logs(): + """清理旧的查询日志""" + try: + days_to_keep = request.json.get('days_to_keep', 30) if request.json else 30 + deleted_count = query_log_collector.cleanup_old_logs(days_to_keep) + + return jsonify({ + 'success': True, + 'message': f'成功清理 {deleted_count} 条超过 {days_to_keep} 天的旧日志', + 'deleted_count': deleted_count + }) + except Exception as e: + logger.error(f"清理旧日志失败: {e}") + return jsonify({'success': False, 'error': str(e)}), 500 + + @app.route('/api/query-logs/history/', methods=['GET']) + def api_get_query_logs_by_history(history_id): + """根据历史记录ID获取相关查询日志""" + try: + logs = query_log_collector.get_logs_by_history_id(history_id) + + # 按批次分组显示 + grouped_logs = {} + batch_order = [] + + for log in logs: + batch_id = log.get('batch_id', 'unknown') + if batch_id not in grouped_logs: + grouped_logs[batch_id] = [] + batch_order.append(batch_id) + grouped_logs[batch_id].append(log) + + # 返回按时间顺序排列的批次 + grouped_result = [(batch_id, grouped_logs[batch_id]) for batch_id in batch_order] + + return jsonify({ + 'success': True, + 'data': grouped_result, + 'total': len(logs), + 'history_id': history_id, + 'grouped': True + }) + except Exception as e: + logger.error(f"获取历史记录相关查询日志失败: {e}") + return jsonify({'success': False, 'error': str(e)}), 500 + + # Redis相关API + @app.route('/api/redis/test-connection', methods=['POST']) + def api_test_redis_connection(): + """测试Redis连接""" + try: + data = request.json + cluster_config = data.get('cluster_config', {}) + cluster_name = data.get('cluster_name', 'Redis集群') + + # 验证配置 + if not cluster_config.get('nodes'): + return jsonify({'success': False, 'error': '未配置Redis节点'}), 400 + + # 测试连接 + result = test_redis_connection(cluster_config, cluster_name) + + if result['success']: + return jsonify({ + 'success': True, + 'message': f'{cluster_name}连接成功', + 'data': { + 'connection_time': result['connection_time'], + 'cluster_info': result['cluster_info'] + } + }) + else: + return jsonify({ + 'success': False, + 'error': result['error'], + 'connection_time': result['connection_time'] + }), 500 + + except Exception as e: + logger.error(f"Redis连接测试失败: {e}") + return jsonify({'success': False, 'error': str(e)}), 500 + + @app.route('/api/redis/compare', methods=['POST']) + def api_redis_compare(): + """Redis数据比较API""" + try: + data = request.json + + # 开始新的查询批次 + batch_id = query_log_collector.start_new_batch('Redis') + + logger.info("开始执行Redis数据比较") + + # 解析配置 + cluster1_config = data.get('cluster1_config', {}) + cluster2_config = data.get('cluster2_config', {}) + query_options = data.get('query_options', {}) + + # 参数验证 + if not cluster1_config.get('nodes'): + logger.warning("Redis比较失败:未配置第一个Redis集群") + return jsonify({'error': '请配置第一个Redis集群'}), 400 + + if not cluster2_config.get('nodes'): + logger.warning("Redis比较失败:未配置第二个Redis集群") + return jsonify({'error': '请配置第二个Redis集群'}), 400 + + # 添加详细的参数日志 + logger.info(f"Redis比较参数解析结果:") + logger.info(f" 集群1: {cluster1_config.get('name', '集群1')}") + logger.info(f" 集群2: {cluster2_config.get('name', '集群2')}") + logger.info(f" 查询模式: {query_options.get('mode', 'random')}") + + if query_options.get('mode') == 'random': + logger.info(f" 随机查询数量: {query_options.get('count', 100)}") + logger.info(f" Key模式: {query_options.get('pattern', '*')}") + else: + logger.info(f" 指定Key数量: {len(query_options.get('keys', []))}") + + # 执行Redis比较 + logger.info("执行Redis数据比较") + result = execute_redis_comparison(cluster1_config, cluster2_config, query_options) + + if 'error' in result: + logger.error(f"Redis比较失败: {result['error']}") + query_log_collector.end_current_batch() + return jsonify({'error': result['error']}), 500 + + logger.info(f"Redis比较完成") + logger.info(f"比较统计: 总计{result['stats']['total_keys']}个key,相同{result['stats']['identical_count']}个,不同{result['stats']['different_count']}个") + + # 自动保存Redis查询历史记录 + try: + # 生成历史记录名称 + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + history_name = f"Redis比较_{timestamp}" + history_description = f"自动保存 - Redis比较{result['stats']['total_keys']}个Key,发现{result['stats']['different_count']}处差异" + + # 保存历史记录 + history_id = save_query_history( + name=history_name, + description=history_description, + pro_config=cluster1_config, + test_config=cluster2_config, + query_config=query_options, + query_keys=result.get('query_options', {}).get('keys', []), + results_summary=result['stats'], + execution_time=result['performance_report']['total_time'], + total_keys=result['stats']['total_keys'], + differences_count=result['stats']['different_count'], + identical_count=result['stats']['identical_count'], + query_type='redis', + # 添加查询结果数据 + raw_results={ + 'identical_results': result['identical_results'], + 'different_results': result['different_results'], + 'missing_results': result['missing_results'], + 'performance_report': result['performance_report'] + }, + differences_data=result['different_results'], + identical_data=result['identical_results'] + ) + + # 关联查询日志与历史记录 + if history_id: + query_log_collector.set_history_id(history_id) + logger.info(f"Redis查询历史记录保存成功: {history_name}, ID: {history_id}") + else: + logger.warning("Redis查询历史记录保存失败,无法获取history_id") + + except Exception as e: + logger.warning(f"保存Redis查询历史记录失败: {e}") + + # 结束查询批次 + query_log_collector.end_current_batch() + return jsonify(result) + + except Exception as e: + logger.error(f"Redis比较请求处理失败:{str(e)}") + # 结束查询批次(请求处理出错) + query_log_collector.end_current_batch() + return jsonify({'error': f'Redis比较请求处理失败:{str(e)}'}), 500 + + @app.route('/api/redis/default-config') + def get_redis_default_config(): + """获取Redis默认配置""" + default_redis_config = { + 'cluster1_config': { + 'name': '生产集群', + 'nodes': [ + {'host': '127.0.0.1', 'port': 7000} + ], + 'password': '', + 'socket_timeout': 3, + 'socket_connect_timeout': 3, + 'max_connections_per_node': 16 + }, + 'cluster2_config': { + 'name': '测试集群', + 'nodes': [ + {'host': '127.0.0.1', 'port': 7001} + ], + 'password': '', + 'socket_timeout': 3, + 'socket_connect_timeout': 3, + 'max_connections_per_node': 16 + }, + 'query_options': { + 'mode': 'random', + 'count': 100, + 'pattern': '*', + 'source_cluster': 'cluster2', + 'keys': [] + } + } + return jsonify(default_redis_config) + + # Redis配置管理API + @app.route('/api/redis/config-groups', methods=['GET']) + def api_get_redis_config_groups(): + """获取所有Redis配置组""" + config_groups = get_redis_config_groups() + return jsonify({'success': True, 'data': config_groups}) + + @app.route('/api/redis/config-groups', methods=['POST']) + def api_save_redis_config_group(): + """保存Redis配置组""" + try: + data = request.json + name = data.get('name', '').strip() + description = data.get('description', '').strip() + cluster1_config = data.get('cluster1_config', {}) + cluster2_config = data.get('cluster2_config', {}) + query_options = data.get('query_options', {}) + + # 参数验证 + if not name: + return jsonify({'success': False, 'error': '配置组名称不能为空'}), 400 + + if not cluster1_config or not cluster1_config.get('nodes'): + return jsonify({'success': False, 'error': '请配置集群1信息'}), 400 + + if not cluster2_config or not cluster2_config.get('nodes'): + return jsonify({'success': False, 'error': '请配置集群2信息'}), 400 + + success = save_redis_config_group(name, description, cluster1_config, cluster2_config, query_options) + + if success: + return jsonify({'success': True, 'message': f'Redis配置组 "{name}" 保存成功'}) + else: + return jsonify({'success': False, 'error': 'Redis配置组保存失败'}), 500 + + except Exception as e: + logger.error(f"保存Redis配置组API失败: {e}") + return jsonify({'success': False, 'error': str(e)}), 500 + + @app.route('/api/redis/config-groups/', methods=['GET']) + def api_get_redis_config_group_detail(group_id): + """获取Redis配置组详情""" + config_group = get_redis_config_group_by_id(group_id) + + if config_group: + return jsonify({'success': True, 'data': config_group}) + else: + return jsonify({'success': False, 'error': 'Redis配置组不存在'}), 404 + + @app.route('/api/redis/config-groups/', methods=['DELETE']) + def api_delete_redis_config_group(group_id): + """删除Redis配置组""" + success = delete_redis_config_group(group_id) + + if success: + return jsonify({'success': True, 'message': 'Redis配置组删除成功'}) + else: + return jsonify({'success': False, 'error': 'Redis配置组删除失败'}), 500 + + @app.route('/api/redis/import-config', methods=['POST']) + def api_import_redis_config(): + """一键导入Redis配置""" + try: + data = request.json + config_text = data.get('config_text', '').strip() + + if not config_text: + return jsonify({'success': False, 'error': '配置内容不能为空'}), 400 + + # 解析配置 + redis_config = parse_redis_config_from_yaml(config_text) + + if not redis_config: + return jsonify({'success': False, 'error': '配置格式解析失败,请检查配置内容'}), 400 + + # 验证必要字段 + if not redis_config.get('nodes'): + return jsonify({'success': False, 'error': '未找到有效的集群地址配置'}), 400 + + return jsonify({ + 'success': True, + 'data': redis_config, + 'message': '配置导入成功' + }) + + except Exception as e: + logger.error(f"导入Redis配置失败: {e}") + return jsonify({'success': False, 'error': f'导入配置失败: {str(e)}'}), 500 + + # Redis查询历史API + @app.route('/api/redis/query-history', methods=['GET']) + def api_get_redis_query_history(): + """获取Redis查询历史记录""" + history_list = get_redis_query_history() + return jsonify({'success': True, 'data': history_list}) + + @app.route('/api/redis/query-history', methods=['POST']) + def api_save_redis_query_history(): + """保存Redis查询历史记录""" + try: + data = request.json + name = data.get('name', '').strip() + description = data.get('description', '').strip() + cluster1_config = data.get('cluster1_config', {}) + cluster2_config = data.get('cluster2_config', {}) + query_options = data.get('query_options', {}) + query_keys = data.get('query_keys', []) + results_summary = data.get('results_summary', {}) + execution_time = data.get('execution_time', 0) + total_keys = data.get('total_keys', 0) + different_count = data.get('different_count', 0) + identical_count = data.get('identical_count', 0) + missing_count = data.get('missing_count', 0) + raw_results = data.get('raw_results') + + # 参数验证 + if not name: + return jsonify({'success': False, 'error': '历史记录名称不能为空'}), 400 + + history_id = save_redis_query_history( + name, description, cluster1_config, cluster2_config, query_options, + query_keys, results_summary, execution_time, total_keys, + different_count, identical_count, missing_count, raw_results + ) + + if history_id: + return jsonify({'success': True, 'message': f'Redis查询历史记录保存成功', 'history_id': history_id}) + else: + return jsonify({'success': False, 'error': 'Redis查询历史记录保存失败'}), 500 + + except Exception as e: + logger.error(f"保存Redis查询历史记录API失败: {e}") + return jsonify({'success': False, 'error': str(e)}), 500 + + @app.route('/api/redis/query-history/', methods=['GET']) + def api_get_redis_query_history_detail(history_id): + """获取Redis查询历史记录详情""" + history_record = get_redis_query_history_by_id(history_id) + + if history_record: + return jsonify({'success': True, 'data': history_record}) + else: + return jsonify({'success': False, 'error': 'Redis查询历史记录不存在'}), 404 + + @app.route('/api/redis/query-history/', methods=['DELETE']) + def api_delete_redis_query_history(history_id): + """删除Redis查询历史记录""" + success = delete_redis_query_history(history_id) + + if success: + return jsonify({'success': True, 'message': 'Redis查询历史记录删除成功'}) + else: + return jsonify({'success': False, 'error': 'Redis查询历史记录删除失败'}), 500 \ No newline at end of file diff --git a/modules/cassandra_client.py b/modules/cassandra_client.py new file mode 100644 index 0000000..5ab13c8 --- /dev/null +++ b/modules/cassandra_client.py @@ -0,0 +1,114 @@ +""" +Cassandra连接管理模块 +负责Cassandra数据库的连接和错误诊断 +""" + +import time +import logging +from cassandra.cluster import Cluster +from cassandra.auth import PlainTextAuthProvider +from cassandra.policies import DCAwareRoundRobinPolicy + +logger = logging.getLogger(__name__) + +def create_connection(config): + """创建Cassandra连接,带有增强的错误诊断和容错机制""" + start_time = time.time() + + logger.info(f"=== 开始创建Cassandra连接 ===") + logger.info(f"主机列表: {config.get('hosts', [])}") + logger.info(f"端口: {config.get('port', 9042)}") + logger.info(f"用户名: {config.get('username', 'N/A')}") + logger.info(f"Keyspace: {config.get('keyspace', 'N/A')}") + + try: + logger.info("正在创建认证提供者...") + auth_provider = PlainTextAuthProvider(username=config['username'], password=config['password']) + + logger.info("正在创建集群连接...") + # 设置负载均衡策略,避免单点故障 + load_balancing_policy = DCAwareRoundRobinPolicy(local_dc=config.get('datacenter', 'dc1')) + + # 创建连接配置,增加容错参数 + cluster = Cluster( + config['hosts'], + port=config['port'], + auth_provider=auth_provider, + load_balancing_policy=load_balancing_policy, + # 增加容错配置 + protocol_version=4, # 使用稳定的协议版本 + connect_timeout=15, # 连接超时 + control_connection_timeout=15, # 控制连接超时 + max_schema_agreement_wait=30 # schema同步等待时间 + ) + + logger.info("正在连接到Keyspace...") + session = cluster.connect(config['keyspace']) + + # 设置session级别的容错参数 + session.default_timeout = 30 # 查询超时时间 + + connection_time = time.time() - start_time + logger.info(f"✅ Cassandra连接成功: 连接时间={connection_time:.3f}秒") + + # 记录集群状态 + try: + cluster_name = cluster.metadata.cluster_name or "Unknown" + logger.info(f" 集群名称: {cluster_name}") + + # 记录可用主机状态 + live_hosts = [str(host.address) for host in cluster.metadata.all_hosts() if host.is_up] + down_hosts = [str(host.address) for host in cluster.metadata.all_hosts() if not host.is_up] + + logger.info(f" 可用节点: {live_hosts} ({len(live_hosts)}个)") + if down_hosts: + logger.warning(f" 故障节点: {down_hosts} ({len(down_hosts)}个)") + + except Exception as meta_error: + logger.warning(f"无法获取集群元数据: {meta_error}") + + return cluster, session + + except Exception as e: + connection_time = time.time() - start_time + error_msg = str(e) + + logger.error(f"❌ Cassandra连接失败: 连接时间={connection_time:.3f}秒") + logger.error(f"错误类型: {type(e).__name__}") + logger.error(f"错误详情: {error_msg}") + + # 提供详细的诊断信息 + if "connection refused" in error_msg.lower() or "unable to connect" in error_msg.lower(): + logger.error("❌ 诊断:无法连接到Cassandra服务器") + logger.error("🔧 建议检查:") + logger.error(" 1. Cassandra服务是否启动") + logger.error(" 2. 主机地址和端口是否正确") + logger.error(" 3. 网络防火墙是否阻挡连接") + + elif "timeout" in error_msg.lower(): + logger.error("❌ 诊断:连接超时") + logger.error("🔧 建议检查:") + logger.error(" 1. 网络延迟是否过高") + logger.error(" 2. Cassandra服务器负载是否过高") + logger.error(" 3. 增加连接超时时间") + + elif "authentication" in error_msg.lower() or "unauthorized" in error_msg.lower(): + logger.error("❌ 诊断:认证失败") + logger.error("🔧 建议检查:") + logger.error(" 1. 用户名和密码是否正确") + logger.error(" 2. 用户是否有访问该keyspace的权限") + + elif "keyspace" in error_msg.lower(): + logger.error("❌ 诊断:Keyspace不存在") + logger.error("🔧 建议检查:") + logger.error(" 1. Keyspace名称是否正确") + logger.error(" 2. Keyspace是否已创建") + + else: + logger.error("❌ 诊断:未知连接错误") + logger.error("🔧 建议:") + logger.error(" 1. 检查所有连接参数") + logger.error(" 2. 查看Cassandra服务器日志") + logger.error(" 3. 测试网络连通性") + + return None, None \ No newline at end of file diff --git a/modules/config_manager.py b/modules/config_manager.py new file mode 100644 index 0000000..f796699 --- /dev/null +++ b/modules/config_manager.py @@ -0,0 +1,671 @@ +""" +配置管理模块 +负责配置组和查询历史的CRUD操作 +""" + +import json +import logging +from datetime import datetime +from .database import ensure_database, get_db_connection + +logger = logging.getLogger(__name__) + +# 默认配置(不显示敏感信息) +DEFAULT_CONFIG = { + 'pro_config': { + 'cluster_name': '', + 'hosts': [], + 'port': 9042, + 'datacenter': '', + 'username': '', + 'password': '', + 'keyspace': '', + 'table': '' + }, + 'test_config': { + 'cluster_name': '', + 'hosts': [], + 'port': 9042, + 'datacenter': '', + 'username': '', + 'password': '', + 'keyspace': '', + 'table': '' + }, + 'keys': [], + 'fields_to_compare': [], + 'exclude_fields': [] +} + +# Redis默认配置 +REDIS_DEFAULT_CONFIG = { + 'cluster1_config': { + 'name': '生产集群', + 'nodes': [ + {'host': '127.0.0.1', 'port': 7000} + ], + 'password': '', + 'socket_timeout': 3, + 'socket_connect_timeout': 3, + 'max_connections_per_node': 16 + }, + 'cluster2_config': { + 'name': '测试集群', + 'nodes': [ + {'host': '127.0.0.1', 'port': 7001} + ], + 'password': '', + 'socket_timeout': 3, + 'socket_connect_timeout': 3, + 'max_connections_per_node': 16 + }, + 'query_options': { + 'mode': 'random', + 'count': 100, + 'pattern': '*', + 'source_cluster': 'cluster2', + 'keys': [] + } +} + +def save_redis_config_group(name, description, cluster1_config, cluster2_config, query_options): + """保存Redis配置组""" + if not ensure_database(): + logger.error("数据库初始化失败") + return False + + conn = get_db_connection() + cursor = conn.cursor() + + try: + cursor.execute(''' + INSERT OR REPLACE INTO redis_config_groups + (name, description, cluster1_config, cluster2_config, query_options, updated_at) + VALUES (?, ?, ?, ?, ?, ?) + ''', ( + name, description, + json.dumps(cluster1_config), + json.dumps(cluster2_config), + json.dumps(query_options), + datetime.now().isoformat() + )) + conn.commit() + logger.info(f"Redis配置组 '{name}' 保存成功") + return True + except Exception as e: + logger.error(f"保存Redis配置组失败: {e}") + return False + finally: + conn.close() + +def get_redis_config_groups(): + """获取所有Redis配置组""" + if not ensure_database(): + logger.error("数据库初始化失败") + return [] + + conn = get_db_connection() + cursor = conn.cursor() + + try: + cursor.execute(''' + SELECT id, name, description, created_at, updated_at + FROM redis_config_groups + ORDER BY updated_at DESC + ''') + rows = cursor.fetchall() + + config_groups = [] + for row in rows: + config_groups.append({ + 'id': row['id'], + 'name': row['name'], + 'description': row['description'], + 'created_at': row['created_at'], + 'updated_at': row['updated_at'] + }) + + return config_groups + except Exception as e: + logger.error(f"获取Redis配置组失败: {e}") + return [] + finally: + conn.close() + +def get_redis_config_group_by_id(group_id): + """根据ID获取Redis配置组详情""" + if not ensure_database(): + logger.error("数据库初始化失败") + return None + + conn = get_db_connection() + cursor = conn.cursor() + + try: + cursor.execute(''' + SELECT id, name, description, cluster1_config, cluster2_config, query_options, + created_at, updated_at + FROM redis_config_groups WHERE id = ? + ''', (group_id,)) + row = cursor.fetchone() + + if row: + config = { + 'id': row['id'], + 'name': row['name'], + 'description': row['description'], + 'cluster1_config': json.loads(row['cluster1_config']), + 'cluster2_config': json.loads(row['cluster2_config']), + 'query_options': json.loads(row['query_options']), + 'created_at': row['created_at'], + 'updated_at': row['updated_at'] + } + return config + return None + except Exception as e: + logger.error(f"获取Redis配置组详情失败: {e}") + return None + finally: + conn.close() + +def delete_redis_config_group(group_id): + """删除Redis配置组""" + if not ensure_database(): + logger.error("数据库初始化失败") + return False + + conn = get_db_connection() + cursor = conn.cursor() + + try: + cursor.execute('DELETE FROM redis_config_groups WHERE id = ?', (group_id,)) + conn.commit() + success = cursor.rowcount > 0 + if success: + logger.info(f"Redis配置组ID {group_id} 删除成功") + return success + except Exception as e: + logger.error(f"删除Redis配置组失败: {e}") + return False + finally: + conn.close() + +def save_redis_query_history(name, description, cluster1_config, cluster2_config, query_options, + query_keys, results_summary, execution_time, total_keys, + different_count, identical_count, missing_count, raw_results=None): + """保存Redis查询历史记录,返回历史记录ID""" + if not ensure_database(): + logger.error("数据库初始化失败") + return None + + conn = get_db_connection() + cursor = conn.cursor() + + try: + cursor.execute(''' + INSERT INTO redis_query_history + (name, description, cluster1_config, cluster2_config, query_options, query_keys, + results_summary, execution_time, total_keys, different_count, identical_count, + missing_count, raw_results) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + ''', ( + name, description, + json.dumps(cluster1_config), + json.dumps(cluster2_config), + json.dumps(query_options), + json.dumps(query_keys), + json.dumps(results_summary), + execution_time, + total_keys, + different_count, + identical_count, + missing_count, + json.dumps(raw_results) if raw_results else None + )) + + # 获取插入记录的ID + history_id = cursor.lastrowid + conn.commit() + logger.info(f"Redis查询历史记录 '{name}' 保存成功,ID:{history_id}") + return history_id + except Exception as e: + logger.error(f"保存Redis查询历史记录失败: {e}") + return None + finally: + conn.close() + +def get_redis_query_history(): + """获取Redis查询历史记录""" + if not ensure_database(): + logger.error("数据库初始化失败") + return [] + + conn = get_db_connection() + cursor = conn.cursor() + + try: + cursor.execute(''' + SELECT id, name, description, execution_time, total_keys, + different_count, identical_count, missing_count, created_at + FROM redis_query_history + ORDER BY created_at DESC + ''') + rows = cursor.fetchall() + + history_list = [] + for row in rows: + history_list.append({ + 'id': row['id'], + 'name': row['name'], + 'description': row['description'], + 'execution_time': row['execution_time'], + 'total_keys': row['total_keys'], + 'different_count': row['different_count'], + 'identical_count': row['identical_count'], + 'missing_count': row['missing_count'], + 'created_at': row['created_at'] + }) + + return history_list + except Exception as e: + logger.error(f"获取Redis查询历史记录失败: {e}") + return [] + finally: + conn.close() + +def get_redis_query_history_by_id(history_id): + """根据ID获取Redis查询历史记录详情""" + if not ensure_database(): + logger.error("数据库初始化失败") + return None + + conn = get_db_connection() + cursor = conn.cursor() + + try: + cursor.execute(''' + SELECT * FROM redis_query_history WHERE id = ? + ''', (history_id,)) + row = cursor.fetchone() + + if row: + return { + 'id': row['id'], + 'name': row['name'], + 'description': row['description'], + 'cluster1_config': json.loads(row['cluster1_config']), + 'cluster2_config': json.loads(row['cluster2_config']), + 'query_options': json.loads(row['query_options']), + 'query_keys': json.loads(row['query_keys']), + 'results_summary': json.loads(row['results_summary']), + 'execution_time': row['execution_time'], + 'total_keys': row['total_keys'], + 'different_count': row['different_count'], + 'identical_count': row['identical_count'], + 'missing_count': row['missing_count'], + 'created_at': row['created_at'], + 'raw_results': json.loads(row['raw_results']) if row['raw_results'] else None + } + return None + except Exception as e: + logger.error(f"获取Redis查询历史记录详情失败: {e}") + return None + finally: + conn.close() + +def delete_redis_query_history(history_id): + """删除Redis查询历史记录""" + if not ensure_database(): + logger.error("数据库初始化失败") + return False + + conn = get_db_connection() + cursor = conn.cursor() + + try: + cursor.execute('DELETE FROM redis_query_history WHERE id = ?', (history_id,)) + conn.commit() + success = cursor.rowcount > 0 + if success: + logger.info(f"Redis查询历史记录ID {history_id} 删除成功") + return success + except Exception as e: + logger.error(f"删除Redis查询历史记录失败: {e}") + return False + finally: + conn.close() + +def parse_redis_config_from_yaml(yaml_text): + """从YAML格式文本解析Redis配置""" + try: + config = {} + lines = yaml_text.strip().split('\n') + + for line in lines: + line = line.strip() + if ':' in line: + key, value = line.split(':', 1) + key = key.strip() + value = value.strip() + + # 移除引号 + if value.startswith('"') and value.endswith('"'): + value = value[1:-1] + elif value.startswith("'") and value.endswith("'"): + value = value[1:-1] + + config[key] = value + + # 转换为Redis集群配置格式 + redis_config = { + 'name': config.get('clusterName', ''), + 'nodes': [], + 'password': config.get('clusterPassword', ''), + 'socket_timeout': 3, + 'socket_connect_timeout': 3, + 'max_connections_per_node': 16 + } + + # 解析地址 + cluster_address = config.get('clusterAddress', '') + if cluster_address: + if ':' in cluster_address: + host, port = cluster_address.split(':', 1) + redis_config['nodes'] = [{'host': host, 'port': int(port)}] + else: + redis_config['nodes'] = [{'host': cluster_address, 'port': 6379}] + + return redis_config + except Exception as e: + logger.error(f"解析Redis配置失败: {e}") + return None + +def save_config_group(name, description, pro_config, test_config, query_config, sharding_config=None): + """保存配置组""" + if not ensure_database(): + logger.error("数据库初始化失败") + return False + + conn = get_db_connection() + cursor = conn.cursor() + + try: + cursor.execute(''' + INSERT OR REPLACE INTO config_groups + (name, description, pro_config, test_config, query_config, sharding_config, updated_at) + VALUES (?, ?, ?, ?, ?, ?, ?) + ''', ( + name, description, + json.dumps(pro_config), + json.dumps(test_config), + json.dumps(query_config), + json.dumps(sharding_config) if sharding_config else None, + datetime.now().isoformat() + )) + conn.commit() + logger.info(f"配置组 '{name}' 保存成功,包含分表配置: {sharding_config is not None}") + return True + except Exception as e: + logger.error(f"保存配置组失败: {e}") + return False + finally: + conn.close() + +def get_config_groups(): + """获取所有配置组""" + if not ensure_database(): + logger.error("数据库初始化失败") + return [] + + conn = get_db_connection() + cursor = conn.cursor() + + try: + cursor.execute(''' + SELECT id, name, description, created_at, updated_at + FROM config_groups + ORDER BY updated_at DESC + ''') + rows = cursor.fetchall() + + config_groups = [] + for row in rows: + config_groups.append({ + 'id': row['id'], + 'name': row['name'], + 'description': row['description'], + 'created_at': row['created_at'], + 'updated_at': row['updated_at'] + }) + + return config_groups + except Exception as e: + logger.error(f"获取配置组失败: {e}") + return [] + finally: + conn.close() + +def get_config_group_by_id(group_id): + """根据ID获取配置组详情""" + if not ensure_database(): + logger.error("数据库初始化失败") + return None + + conn = get_db_connection() + cursor = conn.cursor() + + try: + cursor.execute(''' + SELECT id, name, description, pro_config, test_config, query_config, + sharding_config, created_at, updated_at + FROM config_groups WHERE id = ? + ''', (group_id,)) + row = cursor.fetchone() + + if row: + config = { + 'id': row['id'], + 'name': row['name'], + 'description': row['description'], + 'pro_config': json.loads(row['pro_config']), + 'test_config': json.loads(row['test_config']), + 'query_config': json.loads(row['query_config']), + 'created_at': row['created_at'], + 'updated_at': row['updated_at'] + } + + # 添加分表配置 + if row['sharding_config']: + try: + config['sharding_config'] = json.loads(row['sharding_config']) + except (json.JSONDecodeError, TypeError): + config['sharding_config'] = None + else: + config['sharding_config'] = None + + return config + return None + except Exception as e: + logger.error(f"获取配置组详情失败: {e}") + return None + finally: + conn.close() + +def delete_config_group(group_id): + """删除配置组""" + if not ensure_database(): + logger.error("数据库初始化失败") + return False + + conn = get_db_connection() + cursor = conn.cursor() + + try: + cursor.execute('DELETE FROM config_groups WHERE id = ?', (group_id,)) + conn.commit() + success = cursor.rowcount > 0 + if success: + logger.info(f"配置组ID {group_id} 删除成功") + return success + except Exception as e: + logger.error(f"删除配置组失败: {e}") + return False + finally: + conn.close() + +def save_query_history(name, description, pro_config, test_config, query_config, query_keys, + results_summary, execution_time, total_keys, differences_count, identical_count, + sharding_config=None, query_type='single', raw_results=None, differences_data=None, identical_data=None): + """保存查询历史记录,支持分表查询和查询结果数据,返回历史记录ID""" + if not ensure_database(): + logger.error("数据库初始化失败") + return None + + conn = get_db_connection() + cursor = conn.cursor() + + try: + cursor.execute(''' + INSERT INTO query_history + (name, description, pro_config, test_config, query_config, query_keys, + results_summary, execution_time, total_keys, differences_count, identical_count, + sharding_config, query_type, raw_results, differences_data, identical_data) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + ''', ( + name, description, + json.dumps(pro_config), + json.dumps(test_config), + json.dumps(query_config), + json.dumps(query_keys), + json.dumps(results_summary), + execution_time, + total_keys, + differences_count, + identical_count, + json.dumps(sharding_config) if sharding_config else None, + query_type, + json.dumps(raw_results) if raw_results else None, + json.dumps(differences_data) if differences_data else None, + json.dumps(identical_data) if identical_data else None + )) + + # 获取插入记录的ID + history_id = cursor.lastrowid + conn.commit() + logger.info(f"查询历史记录 '{name}' 保存成功,查询类型:{query_type},ID:{history_id}") + return history_id + except Exception as e: + logger.error(f"保存查询历史记录失败: {e}") + return None + finally: + conn.close() + +def get_query_history(): + """获取所有查询历史记录""" + if not ensure_database(): + logger.error("数据库初始化失败") + return [] + + conn = get_db_connection() + cursor = conn.cursor() + + try: + cursor.execute(''' + SELECT id, name, description, execution_time, total_keys, + differences_count, identical_count, created_at, query_type + FROM query_history + ORDER BY created_at DESC + ''') + rows = cursor.fetchall() + + history_list = [] + for row in rows: + # 获取列名列表以检查字段是否存在 + column_names = [desc[0] for desc in cursor.description] + history_list.append({ + 'id': row['id'], + 'name': row['name'], + 'description': row['description'], + 'execution_time': row['execution_time'], + 'total_keys': row['total_keys'], + 'differences_count': row['differences_count'], + 'identical_count': row['identical_count'], + 'created_at': row['created_at'], + 'query_type': row['query_type'] if 'query_type' in column_names else 'single' + }) + + return history_list + except Exception as e: + logger.error(f"获取查询历史记录失败: {e}") + return [] + finally: + conn.close() + +def get_query_history_by_id(history_id): + """根据ID获取查询历史记录详情""" + if not ensure_database(): + logger.error("数据库初始化失败") + return None + + conn = get_db_connection() + cursor = conn.cursor() + + try: + cursor.execute(''' + SELECT * FROM query_history WHERE id = ? + ''', (history_id,)) + row = cursor.fetchone() + + if row: + # 获取列名列表以检查字段是否存在 + column_names = [desc[0] for desc in cursor.description] + return { + 'id': row['id'], + 'name': row['name'], + 'description': row['description'], + 'pro_config': json.loads(row['pro_config']), + 'test_config': json.loads(row['test_config']), + 'query_config': json.loads(row['query_config']), + 'query_keys': json.loads(row['query_keys']), + 'results_summary': json.loads(row['results_summary']), + 'execution_time': row['execution_time'], + 'total_keys': row['total_keys'], + 'differences_count': row['differences_count'], + 'identical_count': row['identical_count'], + 'created_at': row['created_at'], + # 处理新字段,保持向后兼容 + 'sharding_config': json.loads(row['sharding_config']) if 'sharding_config' in column_names and row['sharding_config'] else None, + 'query_type': row['query_type'] if 'query_type' in column_names else 'single', + # 添加查询结果数据支持 + 'raw_results': json.loads(row['raw_results']) if 'raw_results' in column_names and row['raw_results'] else None, + 'differences_data': json.loads(row['differences_data']) if 'differences_data' in column_names and row['differences_data'] else None, + 'identical_data': json.loads(row['identical_data']) if 'identical_data' in column_names and row['identical_data'] else None + } + return None + except Exception as e: + logger.error(f"获取查询历史记录详情失败: {e}") + return None + finally: + conn.close() + +def delete_query_history(history_id): + """删除查询历史记录""" + if not ensure_database(): + logger.error("数据库初始化失败") + return False + + conn = get_db_connection() + cursor = conn.cursor() + + try: + cursor.execute('DELETE FROM query_history WHERE id = ?', (history_id,)) + conn.commit() + success = cursor.rowcount > 0 + if success: + logger.info(f"查询历史记录ID {history_id} 删除成功") + return success + except Exception as e: + logger.error(f"删除查询历史记录失败: {e}") + return False + finally: + conn.close() \ No newline at end of file diff --git a/modules/data_comparison.py b/modules/data_comparison.py new file mode 100644 index 0000000..fb1816a --- /dev/null +++ b/modules/data_comparison.py @@ -0,0 +1,363 @@ +""" +数据比较模块 +负责两个数据集之间的比较、JSON处理和差异分析 +""" + +import json +import logging + +logger = logging.getLogger(__name__) + +def compare_results(pro_data, test_data, keys, fields_to_compare, exclude_fields, values): + """比较查询结果,支持复合主键""" + differences = [] + field_diff_count = {} + identical_results = [] # 存储相同的结果 + + def match_composite_key(row, composite_value, keys): + """检查数据行是否匹配复合主键值""" + if len(keys) == 1: + # 单主键匹配 + return getattr(row, keys[0]) == composite_value + else: + # 复合主键匹配 + if isinstance(composite_value, str) and ',' in composite_value: + key_values = [v.strip() for v in composite_value.split(',')] + if len(key_values) == len(keys): + return all(str(getattr(row, key)) == key_val for key, key_val in zip(keys, key_values)) + # 如果不是复合值,只匹配第一个主键 + return getattr(row, keys[0]) == composite_value + + for value in values: + # 查找生产表和测试表中该主键值的相关数据 + rows_pro = [row for row in pro_data if match_composite_key(row, value, keys)] + rows_test = [row for row in test_data if match_composite_key(row, value, keys)] + + for row_pro in rows_pro: + # 在测试表中查找相同主键的行 + row_test = next( + (row for row in rows_test if all(getattr(row, key) == getattr(row_pro, key) for key in keys)), + None + ) + + if row_test: + # 确定要比较的列 + columns = fields_to_compare if fields_to_compare else row_pro._fields + columns = [col for col in columns if col not in exclude_fields] + + has_difference = False + row_differences = [] + identical_fields = {} + + for column in columns: + value_pro = getattr(row_pro, column) + value_test = getattr(row_test, column) + + # 使用智能比较函数 + if not compare_values(value_pro, value_test): + has_difference = True + # 格式化显示值 + formatted_pro_value = format_json_for_display(value_pro) + formatted_test_value = format_json_for_display(value_test) + + row_differences.append({ + 'key': {key: getattr(row_pro, key) for key in keys}, + 'field': column, + 'pro_value': formatted_pro_value, + 'test_value': formatted_test_value, + 'is_json': is_json_field(value_pro) or is_json_field(value_test), + 'is_array': is_json_array_field(value_pro) or is_json_array_field(value_test) + }) + + # 统计字段差异次数 + field_diff_count[column] = field_diff_count.get(column, 0) + 1 + else: + # 存储相同的字段值 + identical_fields[column] = format_json_for_display(value_pro) + + if has_difference: + differences.extend(row_differences) + else: + # 如果没有差异,存储到相同结果中 + identical_results.append({ + 'key': {key: getattr(row_pro, key) for key in keys}, + 'pro_fields': identical_fields, + 'test_fields': {col: format_json_for_display(getattr(row_test, col)) for col in columns} + }) + else: + # 在测试表中未找到对应行 + differences.append({ + 'key': {key: getattr(row_pro, key) for key in keys}, + 'message': '在测试表中未找到该行' + }) + + # 检查测试表中是否有生产表中不存在的行 + for row_test in rows_test: + row_pro = next( + (row for row in rows_pro if all(getattr(row, key) == getattr(row_test, key) for key in keys)), + None + ) + if not row_pro: + differences.append({ + 'key': {key: getattr(row_test, key) for key in keys}, + 'message': '在生产表中未找到该行' + }) + + return differences, field_diff_count, identical_results + +def normalize_json_string(value): + """标准化JSON字符串,用于比较""" + if not isinstance(value, str): + return value + + try: + # 尝试解析JSON + json_obj = json.loads(value) + + # 如果是数组,需要进行特殊处理 + if isinstance(json_obj, list): + # 尝试对数组元素进行标准化排序 + normalized_array = normalize_json_array(json_obj) + return json.dumps(normalized_array, sort_keys=True, separators=(',', ':')) + else: + # 普通对象,直接序列化 + return json.dumps(json_obj, sort_keys=True, separators=(',', ':')) + except (json.JSONDecodeError, TypeError): + # 如果不是JSON,返回原值 + return value + +def normalize_json_array(json_array): + """标准化JSON数组,处理元素顺序问题""" + try: + normalized_elements = [] + + for element in json_array: + if isinstance(element, dict): + # 对字典元素进行标准化 + normalized_elements.append(json.dumps(element, sort_keys=True, separators=(',', ':'))) + elif isinstance(element, str): + # 如果是字符串,尝试解析为JSON + try: + parsed_element = json.loads(element) + normalized_elements.append(json.dumps(parsed_element, sort_keys=True, separators=(',', ':'))) + except: + normalized_elements.append(element) + else: + normalized_elements.append(element) + + # 对标准化后的元素进行排序,确保顺序一致 + normalized_elements.sort() + + # 重新解析为对象数组 + result_array = [] + for element in normalized_elements: + if isinstance(element, str): + try: + result_array.append(json.loads(element)) + except: + result_array.append(element) + else: + result_array.append(element) + + return result_array + + except Exception as e: + logger.warning(f"数组标准化失败: {e}") + return json_array + +def is_json_array_field(value): + """检查字段是否为JSON数组格式""" + if not isinstance(value, (str, list)): + return False + + try: + if isinstance(value, str): + parsed = json.loads(value) + return isinstance(parsed, list) + elif isinstance(value, list): + # 检查是否为JSON字符串数组 + if len(value) > 0 and isinstance(value[0], str): + try: + json.loads(value[0]) + return True + except: + return False + return True + except: + return False + +def compare_array_values(value1, value2): + """专门用于比较数组类型的值""" + try: + # 处理字符串表示的数组 + if isinstance(value1, str) and isinstance(value2, str): + try: + array1 = json.loads(value1) + array2 = json.loads(value2) + if isinstance(array1, list) and isinstance(array2, list): + return compare_json_arrays(array1, array2) + except: + pass + + # 处理Python列表类型 + elif isinstance(value1, list) and isinstance(value2, list): + return compare_json_arrays(value1, value2) + + # 处理混合情况:一个是字符串数组,一个是列表 + elif isinstance(value1, list) and isinstance(value2, str): + try: + array2 = json.loads(value2) + if isinstance(array2, list): + return compare_json_arrays(value1, array2) + except: + pass + elif isinstance(value1, str) and isinstance(value2, list): + try: + array1 = json.loads(value1) + if isinstance(array1, list): + return compare_json_arrays(array1, value2) + except: + pass + + return False + except Exception as e: + logger.warning(f"数组比较失败: {e}") + return False + +def compare_json_arrays(array1, array2): + """比较两个JSON数组,忽略元素顺序""" + try: + if len(array1) != len(array2): + return False + + # 标准化两个数组 + normalized_array1 = normalize_json_array(array1.copy()) + normalized_array2 = normalize_json_array(array2.copy()) + + # 将标准化后的数组转换为可比较的格式 + comparable1 = json.dumps(normalized_array1, sort_keys=True) + comparable2 = json.dumps(normalized_array2, sort_keys=True) + + return comparable1 == comparable2 + + except Exception as e: + logger.warning(f"JSON数组比较失败: {e}") + return False + +def format_json_for_display(value): + """格式化JSON用于显示""" + if not isinstance(value, str): + return str(value) + + try: + # 尝试解析JSON + json_obj = json.loads(value) + # 格式化显示(带缩进) + return json.dumps(json_obj, sort_keys=True, indent=2, ensure_ascii=False) + except (json.JSONDecodeError, TypeError): + # 如果不是JSON,返回原值 + return str(value) + +def is_json_field(value): + """检查字段是否为JSON格式""" + if not isinstance(value, str): + return False + + try: + json.loads(value) + return True + except (json.JSONDecodeError, TypeError): + return False + +def compare_values(value1, value2): + """智能比较两个值,支持JSON标准化和数组比较""" + # 首先检查是否为数组类型 + if is_json_array_field(value1) or is_json_array_field(value2): + return compare_array_values(value1, value2) + + # 如果两个值都是字符串,尝试JSON标准化比较 + if isinstance(value1, str) and isinstance(value2, str): + normalized_value1 = normalize_json_string(value1) + normalized_value2 = normalize_json_string(value2) + return normalized_value1 == normalized_value2 + + # 其他情况直接比较 + return value1 == value2 + +def generate_comparison_summary(total_keys, pro_count, test_count, differences, identical_results, field_diff_count): + """生成比较总结报告""" + # 计算基本统计 + different_records = len(set([list(diff['key'].values())[0] for diff in differences if 'field' in diff])) + identical_records = len(identical_results) + missing_in_test = len([diff for diff in differences if diff.get('message') == '在测试表中未找到该行']) + missing_in_pro = len([diff for diff in differences if diff.get('message') == '在生产表中未找到该行']) + + # 计算百分比 + def safe_percentage(part, total): + return round((part / total * 100), 2) if total > 0 else 0 + + identical_percentage = safe_percentage(identical_records, total_keys) + different_percentage = safe_percentage(different_records, total_keys) + + # 生成总结 + summary = { + 'overview': { + 'total_keys_queried': total_keys, + 'pro_records_found': pro_count, + 'test_records_found': test_count, + 'identical_records': identical_records, + 'different_records': different_records, + 'missing_in_test': missing_in_test, + 'missing_in_pro': missing_in_pro + }, + 'percentages': { + 'data_consistency': identical_percentage, + 'data_differences': different_percentage, + 'missing_rate': safe_percentage(missing_in_test + missing_in_pro, total_keys) + }, + 'field_analysis': { + 'total_fields_compared': len(field_diff_count) if field_diff_count else 0, + 'most_different_fields': sorted(field_diff_count.items(), key=lambda x: x[1], reverse=True)[:5] if field_diff_count else [] + }, + 'data_quality': { + 'completeness': safe_percentage(pro_count + test_count, total_keys * 2), + 'consistency_score': identical_percentage, + 'quality_level': get_quality_level(identical_percentage) + }, + 'recommendations': generate_recommendations(identical_percentage, missing_in_test, missing_in_pro, field_diff_count) + } + + return summary + +def get_quality_level(consistency_percentage): + """根据一致性百分比获取数据质量等级""" + if consistency_percentage >= 95: + return {'level': '优秀', 'color': 'success', 'description': '数据一致性非常高'} + elif consistency_percentage >= 90: + return {'level': '良好', 'color': 'info', 'description': '数据一致性较高'} + elif consistency_percentage >= 80: + return {'level': '一般', 'color': 'warning', 'description': '数据一致性中等,需要关注'} + else: + return {'level': '较差', 'color': 'danger', 'description': '数据一致性较低,需要重点处理'} + +def generate_recommendations(consistency_percentage, missing_in_test, missing_in_pro, field_diff_count): + """生成改进建议""" + recommendations = [] + + if consistency_percentage < 90: + recommendations.append('建议重点关注数据一致性问题,检查数据同步机制') + + if missing_in_test > 0: + recommendations.append(f'测试环境缺失 {missing_in_test} 条记录,建议检查数据迁移过程') + + if missing_in_pro > 0: + recommendations.append(f'生产环境缺失 {missing_in_pro} 条记录,建议检查数据完整性') + + if field_diff_count: + top_diff_field = max(field_diff_count.items(), key=lambda x: x[1]) + recommendations.append(f'字段 "{top_diff_field[0]}" 差异最多({top_diff_field[1]}次),建议优先处理') + + if not recommendations: + recommendations.append('数据质量良好,建议继续保持当前的数据管理流程') + + return recommendations \ No newline at end of file diff --git a/modules/database.py b/modules/database.py new file mode 100644 index 0000000..58fe8de --- /dev/null +++ b/modules/database.py @@ -0,0 +1,228 @@ +""" +数据库管理模块 +负责SQLite数据库的初始化、连接和表结构管理 +""" + +import sqlite3 +import json +import os +import logging +from datetime import datetime + +logger = logging.getLogger(__name__) + +DATABASE_PATH = 'config_groups.db' + +def init_database(): + """初始化数据库""" + try: + conn = sqlite3.connect(DATABASE_PATH) + cursor = conn.cursor() + + # 创建配置组表 + cursor.execute(''' + CREATE TABLE IF NOT EXISTS config_groups ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL UNIQUE, + description TEXT, + pro_config TEXT NOT NULL, + test_config TEXT NOT NULL, + query_config TEXT NOT NULL, + sharding_config TEXT, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ) + ''') + + # 创建查询历史表,包含分表配置字段 + cursor.execute(''' + CREATE TABLE IF NOT EXISTS query_history ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL, + description TEXT, + pro_config TEXT NOT NULL, + test_config TEXT NOT NULL, + query_config TEXT NOT NULL, + query_keys TEXT NOT NULL, + results_summary TEXT NOT NULL, + execution_time REAL NOT NULL, + total_keys INTEGER NOT NULL, + differences_count INTEGER NOT NULL, + identical_count INTEGER NOT NULL, + sharding_config TEXT, + query_type TEXT DEFAULT 'single', + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ) + ''') + + # 创建分表配置组表 + cursor.execute(''' + CREATE TABLE IF NOT EXISTS sharding_config_groups ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL UNIQUE, + description TEXT, + pro_config TEXT NOT NULL, + test_config TEXT NOT NULL, + query_config TEXT NOT NULL, + sharding_config TEXT NOT NULL, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ) + ''') + + # 创建查询日志表 + cursor.execute(''' + CREATE TABLE IF NOT EXISTS query_logs ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + batch_id TEXT NOT NULL, + history_id INTEGER, + timestamp TEXT NOT NULL, + level TEXT NOT NULL, + message TEXT NOT NULL, + query_type TEXT DEFAULT 'single', + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + FOREIGN KEY (history_id) REFERENCES query_history (id) ON DELETE CASCADE + ) + ''') + + # 创建Redis配置组表 + cursor.execute(''' + CREATE TABLE IF NOT EXISTS redis_config_groups ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL UNIQUE, + description TEXT, + cluster1_config TEXT NOT NULL, + cluster2_config TEXT NOT NULL, + query_options TEXT NOT NULL, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ) + ''') + + # 创建Redis查询历史表 + cursor.execute(''' + CREATE TABLE IF NOT EXISTS redis_query_history ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL, + description TEXT, + cluster1_config TEXT NOT NULL, + cluster2_config TEXT NOT NULL, + query_options TEXT NOT NULL, + query_keys TEXT NOT NULL, + results_summary TEXT NOT NULL, + execution_time REAL NOT NULL, + total_keys INTEGER NOT NULL, + different_count INTEGER NOT NULL, + identical_count INTEGER NOT NULL, + missing_count INTEGER NOT NULL, + raw_results TEXT, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ) + ''') + + # 创建索引 + cursor.execute('CREATE INDEX IF NOT EXISTS idx_query_logs_batch_id ON query_logs(batch_id)') + cursor.execute('CREATE INDEX IF NOT EXISTS idx_query_logs_history_id ON query_logs(history_id)') + cursor.execute('CREATE INDEX IF NOT EXISTS idx_query_logs_timestamp ON query_logs(timestamp)') + cursor.execute('CREATE INDEX IF NOT EXISTS idx_query_logs_level ON query_logs(level)') + + conn.commit() + conn.close() + logger.info("数据库初始化完成") + return True + except Exception as e: + logger.error(f"数据库初始化失败: {e}") + return False + +def ensure_database(): + """确保数据库和表存在""" + if not os.path.exists(DATABASE_PATH): + logger.info("数据库文件不存在,正在创建...") + return init_database() + + # 检查表是否存在 + try: + conn = sqlite3.connect(DATABASE_PATH) + cursor = conn.cursor() + cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name IN ('config_groups', 'query_history', 'sharding_config_groups', 'query_logs', 'redis_config_groups', 'redis_query_history')") + results = cursor.fetchall() + existing_tables = [row[0] for row in results] + + required_tables = ['config_groups', 'query_history', 'sharding_config_groups', 'query_logs', 'redis_config_groups', 'redis_query_history'] + missing_tables = [table for table in required_tables if table not in existing_tables] + + if missing_tables: + logger.info(f"数据库表不完整,缺少表:{missing_tables},正在重新创建...") + return init_database() + + # 检查config_groups表是否有sharding_config字段 + cursor.execute("PRAGMA table_info(config_groups)") + columns = cursor.fetchall() + column_names = [column[1] for column in columns] + + if 'sharding_config' not in column_names: + logger.info("添加sharding_config字段到config_groups表...") + cursor.execute("ALTER TABLE config_groups ADD COLUMN sharding_config TEXT") + conn.commit() + logger.info("sharding_config字段添加成功") + + # 检查query_history表是否有分表相关字段 + cursor.execute("PRAGMA table_info(query_history)") + history_columns = cursor.fetchall() + history_column_names = [column[1] for column in history_columns] + + if 'sharding_config' not in history_column_names: + logger.info("添加sharding_config字段到query_history表...") + cursor.execute("ALTER TABLE query_history ADD COLUMN sharding_config TEXT") + conn.commit() + logger.info("query_history表sharding_config字段添加成功") + + if 'query_type' not in history_column_names: + logger.info("添加query_type字段到query_history表...") + cursor.execute("ALTER TABLE query_history ADD COLUMN query_type TEXT DEFAULT 'single'") + conn.commit() + logger.info("query_history表query_type字段添加成功") + + # 添加查询结果数据存储字段 + if 'raw_results' not in history_column_names: + logger.info("添加raw_results字段到query_history表...") + cursor.execute("ALTER TABLE query_history ADD COLUMN raw_results TEXT") + conn.commit() + logger.info("query_history表raw_results字段添加成功") + + if 'differences_data' not in history_column_names: + logger.info("添加differences_data字段到query_history表...") + cursor.execute("ALTER TABLE query_history ADD COLUMN differences_data TEXT") + conn.commit() + logger.info("query_history表differences_data字段添加成功") + + if 'identical_data' not in history_column_names: + logger.info("添加identical_data字段到query_history表...") + cursor.execute("ALTER TABLE query_history ADD COLUMN identical_data TEXT") + conn.commit() + logger.info("query_history表identical_data字段添加成功") + + # 检查query_logs表是否存在history_id字段 + cursor.execute("PRAGMA table_info(query_logs)") + logs_columns = cursor.fetchall() + logs_column_names = [column[1] for column in logs_columns] + + if 'history_id' not in logs_column_names: + logger.info("添加history_id字段到query_logs表...") + cursor.execute("ALTER TABLE query_logs ADD COLUMN history_id INTEGER") + # 创建外键索引 + cursor.execute('CREATE INDEX IF NOT EXISTS idx_query_logs_history_id ON query_logs(history_id)') + conn.commit() + logger.info("query_logs表history_id字段添加成功") + + conn.close() + return True + except Exception as e: + logger.error(f"检查数据库表失败: {e}") + return init_database() + +def get_db_connection(): + """获取数据库连接""" + conn = sqlite3.connect(DATABASE_PATH) + conn.row_factory = sqlite3.Row + return conn \ No newline at end of file diff --git a/modules/query_engine.py b/modules/query_engine.py new file mode 100644 index 0000000..53eff0d --- /dev/null +++ b/modules/query_engine.py @@ -0,0 +1,234 @@ +""" +数据查询模块 +负责Cassandra数据的查询执行,支持单表、分表和多主键查询 +""" + +import time +import logging +from .sharding import ShardingCalculator + +logger = logging.getLogger(__name__) + +def execute_query(session, table, keys, fields, values, exclude_fields=None): + """执行查询,支持单主键和复合主键""" + try: + # 参数验证 + if not keys or len(keys) == 0: + logger.error("Keys参数为空,无法构建查询") + return [] + + if not values or len(values) == 0: + logger.error("Values参数为空,无法构建查询") + return [] + + # 构建查询条件 + if len(keys) == 1: + # 单主键查询(保持原有逻辑) + quoted_values = [f"'{value}'" for value in values] + query_conditions = f"{keys[0]} IN ({', '.join(quoted_values)})" + else: + # 复合主键查询 + conditions = [] + for value in values: + # 检查value是否包含复合主键分隔符 + if isinstance(value, str) and ',' in value: + # 解析复合主键值 + key_values = [v.strip() for v in value.split(',')] + if len(key_values) == len(keys): + # 构建单个复合主键条件: (key1='val1' AND key2='val2') + key_conditions = [] + for i, (key, val) in enumerate(zip(keys, key_values)): + key_conditions.append(f"{key} = '{val}'") + conditions.append(f"({' AND '.join(key_conditions)})") + else: + logger.warning(f"复合主键值 '{value}' 的字段数量({len(key_values)})与主键字段数量({len(keys)})不匹配") + # 将其作为第一个主键的值处理 + conditions.append(f"{keys[0]} = '{value}'") + else: + # 单值,作为第一个主键的值处理 + conditions.append(f"{keys[0]} = '{value}'") + + if conditions: + query_conditions = ' OR '.join(conditions) + else: + logger.error("无法构建有效的查询条件") + return [] + + # 确定要查询的字段 + if fields: + fields_str = ", ".join(fields) + else: + fields_str = "*" + + query_sql = f"SELECT {fields_str} FROM {table} WHERE {query_conditions};" + + # 记录查询SQL日志 + logger.info(f"执行查询SQL: {query_sql}") + if len(keys) > 1: + logger.info(f"复合主键查询参数: 表={table}, 主键字段={keys}, 字段={fields_str}, Key数量={len(values)}") + else: + logger.info(f"单主键查询参数: 表={table}, 主键字段={keys[0]}, 字段={fields_str}, Key数量={len(values)}") + + # 执行查询 + start_time = time.time() + result = session.execute(query_sql) + execution_time = time.time() - start_time + + result_list = list(result) if result else [] + logger.info(f"查询完成: 执行时间={execution_time:.3f}秒, 返回记录数={len(result_list)}") + + return result_list + except Exception as e: + logger.error(f"查询执行失败: SQL={query_sql if 'query_sql' in locals() else 'N/A'}, 错误={str(e)}") + return [] + +def execute_sharding_query(session, shard_mapping, keys, fields, exclude_fields=None): + """ + 执行分表查询 + :param session: Cassandra会话 + :param shard_mapping: 分表映射 {table_name: [keys]} + :param keys: 主键字段名列表 + :param fields: 要查询的字段列表 + :param exclude_fields: 要排除的字段列表 + :return: (查询结果列表, 查询到的表列表, 查询失败的表列表) + """ + all_results = [] + queried_tables = [] + error_tables = [] + + logger.info(f"开始执行分表查询,涉及 {len(shard_mapping)} 张分表") + total_start_time = time.time() + + for table_name, table_keys in shard_mapping.items(): + try: + logger.info(f"查询分表 {table_name},包含 {len(table_keys)} 个key: {table_keys}") + # 为每个分表执行查询 + table_results = execute_query(session, table_name, keys, fields, table_keys, exclude_fields) + all_results.extend(table_results) + queried_tables.append(table_name) + logger.info(f"分表 {table_name} 查询成功,返回 {len(table_results)} 条记录") + except Exception as e: + logger.error(f"分表 {table_name} 查询失败: {e}") + error_tables.append(table_name) + + total_execution_time = time.time() - total_start_time + logger.info(f"分表查询总计完成: 执行时间={total_execution_time:.3f}秒, 成功表数={len(queried_tables)}, 失败表数={len(error_tables)}, 总记录数={len(all_results)}") + + return all_results, queried_tables, error_tables + +def execute_mixed_query(pro_session, test_session, pro_config, test_config, keys, fields_to_compare, values, exclude_fields, sharding_config): + """ + 执行混合查询(生产环境分表,测试环境可能单表或分表) + """ + results = { + 'pro_data': [], + 'test_data': [], + 'sharding_info': { + 'calculation_stats': {} + } + } + + # 处理生产环境查询 + if sharding_config.get('use_sharding_for_pro', False): + # 获取生产环境分表配置参数,优先使用专用参数,否则使用通用参数 + pro_interval = sharding_config.get('pro_interval_seconds') or sharding_config.get('interval_seconds', 604800) + pro_table_count = sharding_config.get('pro_table_count') or sharding_config.get('table_count', 14) + + # 记录生产环境分表配置信息 + logger.info(f"=== 生产环境分表配置 ===") + logger.info(f"启用分表查询: True") + logger.info(f"时间间隔: {pro_interval}秒 ({pro_interval//86400}天)") + logger.info(f"分表数量: {pro_table_count}张") + logger.info(f"基础表名: {pro_config['table']}") + + pro_calculator = ShardingCalculator( + interval_seconds=pro_interval, + table_count=pro_table_count + ) + pro_shard_mapping, pro_failed_keys, pro_calc_stats = pro_calculator.get_all_shard_tables_for_keys( + pro_config['table'], values + ) + + logger.info(f"生产环境分表映射结果: 涉及{len(pro_shard_mapping)}张分表, 失败Key数量: {len(pro_failed_keys)}") + + pro_data, pro_queried_tables, pro_error_tables = execute_sharding_query( + pro_session, pro_shard_mapping, keys, fields_to_compare, exclude_fields + ) + + results['pro_data'] = pro_data + results['sharding_info']['pro_shards'] = { + 'enabled': True, + 'interval_seconds': sharding_config.get('pro_interval_seconds', 604800), + 'table_count': sharding_config.get('pro_table_count', 14), + 'queried_tables': pro_queried_tables, + 'error_tables': pro_error_tables, + 'failed_keys': pro_failed_keys + } + results['sharding_info']['calculation_stats'].update(pro_calc_stats) + else: + # 生产环境单表查询 + logger.info(f"=== 生产环境单表配置 ===") + logger.info(f"启用分表查询: False") + logger.info(f"表名: {pro_config['table']}") + + pro_data = execute_query(pro_session, pro_config['table'], keys, fields_to_compare, values, exclude_fields) + results['pro_data'] = pro_data + results['sharding_info']['pro_shards'] = { + 'enabled': False, + 'queried_tables': [pro_config['table']] + } + + # 处理测试环境查询 + if sharding_config.get('use_sharding_for_test', False): + # 获取测试环境分表配置参数,优先使用专用参数,否则使用通用参数 + test_interval = sharding_config.get('test_interval_seconds') or sharding_config.get('interval_seconds', 604800) + test_table_count = sharding_config.get('test_table_count') or sharding_config.get('table_count', 14) + + # 记录测试环境分表配置信息 + logger.info(f"=== 测试环境分表配置 ===") + logger.info(f"启用分表查询: True") + logger.info(f"时间间隔: {test_interval}秒 ({test_interval//86400}天)") + logger.info(f"分表数量: {test_table_count}张") + logger.info(f"基础表名: {test_config['table']}") + + test_calculator = ShardingCalculator( + interval_seconds=test_interval, + table_count=test_table_count + ) + test_shard_mapping, test_failed_keys, test_calc_stats = test_calculator.get_all_shard_tables_for_keys( + test_config['table'], values + ) + + logger.info(f"测试环境分表映射结果: 涉及{len(test_shard_mapping)}张分表, 失败Key数量: {len(test_failed_keys)}") + + test_data, test_queried_tables, test_error_tables = execute_sharding_query( + test_session, test_shard_mapping, keys, fields_to_compare, exclude_fields + ) + + results['test_data'] = test_data + results['sharding_info']['test_shards'] = { + 'enabled': True, + 'interval_seconds': test_interval, + 'table_count': test_table_count, + 'queried_tables': test_queried_tables, + 'error_tables': test_error_tables, + 'failed_keys': test_failed_keys + } + + # 合并计算统计信息 + if not results['sharding_info']['calculation_stats']: + results['sharding_info']['calculation_stats'] = test_calc_stats + else: + # 测试环境单表查询 + logger.info(f"=== 测试环境单表配置 ===") + logger.info(f"启用分表查询: False") + logger.info(f"表名: {test_config['table']}") + + test_data = execute_query(test_session, test_config['table'], keys, fields_to_compare, values, exclude_fields) + results['test_data'] = test_data + results['sharding_info']['test_shards'] = { + 'enabled': False, + 'queried_tables': [test_config['table']] + } + + return results \ No newline at end of file diff --git a/modules/query_logger.py b/modules/query_logger.py new file mode 100644 index 0000000..87684c9 --- /dev/null +++ b/modules/query_logger.py @@ -0,0 +1,272 @@ +""" +查询日志管理模块 +负责查询日志的收集、存储和检索 +""" + +import sqlite3 +import logging +from datetime import datetime, timedelta +from .database import DATABASE_PATH + +logger = logging.getLogger(__name__) + +class QueryLogCollector: + def __init__(self, max_logs=1000, db_path=None): + self.logs = [] # 内存中的日志缓存 + self.max_logs = max_logs + self.current_batch_id = None + self.batch_counter = 0 + self.current_query_type = 'single' + self.current_history_id = None # 当前关联的历史记录ID + self.db_path = db_path or DATABASE_PATH + + def start_new_batch(self, query_type='single'): + """开始新的查询批次""" + self.batch_counter += 1 + self.current_batch_id = f"batch_{self.batch_counter}_{datetime.now().strftime('%H%M%S')}" + self.current_query_type = query_type + self.current_history_id = None # 重置历史记录ID + + # 添加批次开始标记 + self.add_log('INFO', f"=== 开始{query_type}查询批次 (ID: {self.current_batch_id}) ===", force_batch_id=self.current_batch_id) + return self.current_batch_id + + def set_history_id(self, history_id): + """设置当前批次关联的历史记录ID""" + self.current_history_id = history_id + if self.current_batch_id and history_id: + self.add_log('INFO', f"关联历史记录ID: {history_id}", force_batch_id=self.current_batch_id) + # 更新当前批次的所有日志记录的history_id + self._update_batch_history_id(self.current_batch_id, history_id) + + def _update_batch_history_id(self, batch_id, history_id): + """更新批次中所有日志的history_id""" + try: + conn = sqlite3.connect(self.db_path, timeout=30) + cursor = conn.cursor() + + cursor.execute(''' + UPDATE query_logs + SET history_id = ? + WHERE batch_id = ? + ''', (history_id, batch_id)) + + conn.commit() + conn.close() + logger.info(f"已更新批次 {batch_id} 的历史记录关联到 {history_id}") + except Exception as e: + print(f"Warning: Failed to update batch history_id: {e}") + + def end_current_batch(self): + """结束当前查询批次""" + if self.current_batch_id: + self.add_log('INFO', f"=== 查询批次完成 (ID: {self.current_batch_id}) ===", force_batch_id=self.current_batch_id) + self.current_batch_id = None + self.current_history_id = None + + def add_log(self, level, message, force_batch_id=None, force_query_type=None, force_history_id=None): + """添加日志到内存和数据库""" + timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3] + batch_id = force_batch_id or self.current_batch_id + query_type = force_query_type or self.current_query_type + history_id = force_history_id or self.current_history_id + + log_entry = { + 'timestamp': timestamp, + 'level': level, + 'message': message, + 'batch_id': batch_id, + 'query_type': query_type, + 'history_id': history_id + } + + # 添加到内存缓存 + self.logs.append(log_entry) + if len(self.logs) > self.max_logs: + self.logs.pop(0) + + # 保存到数据库 + self._save_log_to_db(log_entry) + + def _save_log_to_db(self, log_entry): + """将日志保存到数据库""" + try: + conn = sqlite3.connect(self.db_path, timeout=30) + cursor = conn.cursor() + + cursor.execute(''' + INSERT INTO query_logs (batch_id, history_id, timestamp, level, message, query_type) + VALUES (?, ?, ?, ?, ?, ?) + ''', ( + log_entry['batch_id'], + log_entry['history_id'], + log_entry['timestamp'], + log_entry['level'], + log_entry['message'], + log_entry['query_type'] + )) + + conn.commit() + conn.close() + except Exception as e: + # 数据库写入失败时记录到控制台,但不影响程序运行 + print(f"Warning: Failed to save log to database: {e}") + + def get_logs(self, limit=None, from_db=True): + """获取日志,支持从数据库或内存获取""" + if from_db: + return self._get_logs_from_db(limit) + else: + # 从内存获取 + if limit: + return self.logs[-limit:] + return self.logs + + def _get_logs_from_db(self, limit=None): + """从数据库获取日志""" + try: + conn = sqlite3.connect(self.db_path, timeout=30) + conn.row_factory = sqlite3.Row + cursor = conn.cursor() + + query = ''' + SELECT batch_id, history_id, timestamp, level, message, query_type + FROM query_logs + ORDER BY id DESC + ''' + + if limit: + query += f' LIMIT {limit}' + + cursor.execute(query) + rows = cursor.fetchall() + + # 转换为字典格式并反转顺序(最新的在前) + logs = [] + for row in reversed(rows): + logs.append({ + 'batch_id': row['batch_id'], + 'history_id': row['history_id'], + 'timestamp': row['timestamp'], + 'level': row['level'], + 'message': row['message'], + 'query_type': row['query_type'] + }) + + conn.close() + return logs + except Exception as e: + print(f"Warning: Failed to get logs from database: {e}") + # 如果数据库读取失败,返回内存中的日志 + return self.get_logs(limit, from_db=False) + + def _get_total_logs_count(self): + """获取数据库中的日志总数""" + try: + conn = sqlite3.connect(self.db_path, timeout=30) + cursor = conn.cursor() + cursor.execute('SELECT COUNT(*) FROM query_logs') + count = cursor.fetchone()[0] + conn.close() + return count + except Exception as e: + print(f"Warning: Failed to get logs count from database: {e}") + return len(self.logs) + + def get_logs_by_history_id(self, history_id): + """根据历史记录ID获取相关日志""" + try: + conn = sqlite3.connect(self.db_path, timeout=30) + conn.row_factory = sqlite3.Row + cursor = conn.cursor() + + cursor.execute(''' + SELECT batch_id, history_id, timestamp, level, message, query_type + FROM query_logs + WHERE history_id = ? + ORDER BY id ASC + ''', (history_id,)) + + rows = cursor.fetchall() + logs = [] + for row in rows: + logs.append({ + 'batch_id': row['batch_id'], + 'history_id': row['history_id'], + 'timestamp': row['timestamp'], + 'level': row['level'], + 'message': row['message'], + 'query_type': row['query_type'] + }) + + conn.close() + return logs + except Exception as e: + print(f"Warning: Failed to get logs by history_id: {e}") + return [] + + def get_logs_grouped_by_batch(self, limit=None, from_db=True): + """按批次分组获取日志""" + logs = self.get_logs(limit, from_db) + grouped_logs = {} + batch_order = [] + + for log in logs: + batch_id = log.get('batch_id', 'unknown') + if batch_id not in grouped_logs: + grouped_logs[batch_id] = [] + batch_order.append(batch_id) + grouped_logs[batch_id].append(log) + + # 返回按时间顺序排列的批次 + return [(batch_id, grouped_logs[batch_id]) for batch_id in batch_order] + + def clear_logs(self, clear_db=True): + """清空日志""" + # 清空内存 + self.logs.clear() + self.current_batch_id = None + self.batch_counter = 0 + + # 清空数据库 + if clear_db: + try: + conn = sqlite3.connect(self.db_path, timeout=30) + cursor = conn.cursor() + cursor.execute('DELETE FROM query_logs') + conn.commit() + conn.close() + except Exception as e: + print(f"Warning: Failed to clear logs from database: {e}") + + def cleanup_old_logs(self, days_to_keep=30): + """清理旧日志,保留指定天数的日志""" + try: + conn = sqlite3.connect(self.db_path, timeout=30) + cursor = conn.cursor() + + # 删除超过指定天数的日志 + cutoff_date = datetime.now() - timedelta(days=days_to_keep) + cursor.execute(''' + DELETE FROM query_logs + WHERE created_at < ? + ''', (cutoff_date.strftime('%Y-%m-%d %H:%M:%S'),)) + + deleted_count = cursor.rowcount + conn.commit() + conn.close() + + logger.info(f"清理了 {deleted_count} 条超过 {days_to_keep} 天的旧日志") + return deleted_count + except Exception as e: + logger.error(f"清理旧日志失败: {e}") + return 0 + +# 自定义日志处理器 +class CollectorHandler(logging.Handler): + def __init__(self, collector): + super().__init__() + self.collector = collector + + def emit(self, record): + self.collector.add_log(record.levelname, record.getMessage()) \ No newline at end of file diff --git a/modules/redis_client.py b/modules/redis_client.py new file mode 100644 index 0000000..1fca580 --- /dev/null +++ b/modules/redis_client.py @@ -0,0 +1,249 @@ +""" +Redis连接管理模块 +负责Redis集群的连接、错误处理和性能追踪 +""" + +import time +import logging +import redis +from redis.cluster import RedisCluster, ClusterNode, key_slot +from redis.exceptions import RedisError, ConnectionError + +logger = logging.getLogger(__name__) + +class RedisPerformanceTracker: + """Redis操作性能统计追踪器""" + + def __init__(self): + self.connection_times = {} # 连接耗时 + self.query_times = {} # 查询耗时 + self.comparison_time = 0 # 比对耗时 + self.scan_time = 0 # scan操作耗时 + self.connection_status = {} # 连接状态 + self.start_time = time.time() + + def record_connection(self, cluster_name, start_time, end_time, success, error_msg=None): + """记录连接信息""" + self.connection_times[cluster_name] = end_time - start_time + self.connection_status[cluster_name] = { + 'success': success, + 'error_msg': error_msg, + 'connect_time': end_time - start_time + } + + def record_query(self, operation_name, duration): + """记录查询操作耗时""" + self.query_times[operation_name] = duration + + def record_scan_time(self, duration): + """记录scan操作耗时""" + self.scan_time = duration + + def record_comparison_time(self, duration): + """记录比对耗时""" + self.comparison_time = duration + + def get_total_time(self): + """获取总耗时""" + return time.time() - self.start_time + + def generate_report(self): + """生成性能报告""" + total_time = self.get_total_time() + report = { + 'total_time': total_time, + 'connections': self.connection_status, + 'operations': { + 'scan_time': self.scan_time, + 'comparison_time': self.comparison_time, + 'queries': self.query_times + } + } + return report + +def create_redis_client(cluster_config, cluster_name="Redis集群", performance_tracker=None): + """ + 创建Redis客户端,自动检测单节点或集群模式 + + Args: + cluster_config: Redis配置 + cluster_name: 集群名称用于日志 + performance_tracker: 性能追踪器 + + Returns: + Redis客户端实例或None + """ + start_time = time.time() + + try: + # 获取节点配置 + nodes = cluster_config.get('nodes', []) + if not nodes: + raise RedisError("未配置Redis节点") + + # 通用连接参数 + common_params = { + 'password': cluster_config.get('password'), + 'socket_timeout': cluster_config.get('socket_timeout', 3), + 'socket_connect_timeout': cluster_config.get('socket_connect_timeout', 3), + 'decode_responses': False, # 保持原始字节数据 + 'retry_on_timeout': True + } + + logger.info(f"正在连接{cluster_name}...") + logger.info(f"节点配置: {[(node['host'], node['port']) for node in nodes]}") + + # 首先尝试单节点模式连接第一个节点 + first_node = nodes[0] + try: + logger.info(f"尝试单节点模式连接: {first_node['host']}:{first_node['port']}") + + single_client = redis.Redis( + host=first_node['host'], + port=first_node['port'], + **common_params + ) + + # 测试连接 + single_client.ping() + + # 检查是否启用了集群模式 + try: + info = single_client.info() + cluster_enabled = info.get('cluster_enabled', 0) + + if cluster_enabled == 1: + # 这是一个集群节点,关闭单节点连接,使用集群模式 + logger.info("检测到集群模式已启用,切换到集群客户端") + single_client.close() + return _create_cluster_client(cluster_config, cluster_name, performance_tracker, start_time, common_params) + else: + # 单节点模式工作正常 + end_time = time.time() + connection_time = end_time - start_time + + if performance_tracker: + performance_tracker.record_connection(cluster_name, start_time, end_time, True) + + logger.info(f"✅ {cluster_name}连接成功(单节点模式),耗时 {connection_time:.3f} 秒") + return single_client + + except Exception as info_error: + # 如果获取info失败,但ping成功,仍然使用单节点模式 + logger.warning(f"无法获取集群信息,继续使用单节点模式: {info_error}") + end_time = time.time() + connection_time = end_time - start_time + + if performance_tracker: + performance_tracker.record_connection(cluster_name, start_time, end_time, True) + + logger.info(f"✅ {cluster_name}连接成功(单节点模式),耗时 {connection_time:.3f} 秒") + return single_client + + except Exception as single_error: + logger.warning(f"单节点模式连接失败: {single_error}") + logger.info("尝试集群模式连接...") + + # 单节点模式失败,尝试集群模式 + return _create_cluster_client(cluster_config, cluster_name, performance_tracker, start_time, common_params) + + except Exception as e: + end_time = time.time() + connection_time = end_time - start_time + error_msg = f"连接失败: {str(e)}" + + if performance_tracker: + performance_tracker.record_connection(cluster_name, start_time, end_time, False, error_msg) + + logger.error(f"❌ {cluster_name}{error_msg},耗时 {connection_time:.3f} 秒") + return None + +def _create_cluster_client(cluster_config, cluster_name, performance_tracker, start_time, common_params): + """创建集群客户端""" + try: + # 构建集群节点列表 + startup_nodes = [] + for node in cluster_config.get('nodes', []): + startup_nodes.append(ClusterNode(node['host'], node['port'])) + + # 创建Redis集群客户端 + cluster_client = RedisCluster( + startup_nodes=startup_nodes, + max_connections_per_node=cluster_config.get('max_connections_per_node', 16), + skip_full_coverage_check=True, # 跳过全覆盖检查,允许部分节点不可用 + **common_params + ) + + # 测试集群连接 + cluster_client.ping() + + end_time = time.time() + connection_time = end_time - start_time + + if performance_tracker: + performance_tracker.record_connection(cluster_name, start_time, end_time, True) + + logger.info(f"✅ {cluster_name}连接成功(集群模式),耗时 {connection_time:.3f} 秒") + return cluster_client + + except Exception as cluster_error: + end_time = time.time() + connection_time = end_time - start_time + error_msg = f"集群模式连接失败: {str(cluster_error)}" + + if performance_tracker: + performance_tracker.record_connection(cluster_name, start_time, end_time, False, error_msg) + + logger.error(f"❌ {cluster_name}{error_msg},耗时 {connection_time:.3f} 秒") + return None + +def test_redis_connection(cluster_config, cluster_name="Redis集群"): + """ + 测试Redis连接 + + Args: + cluster_config: Redis集群配置 + cluster_name: 集群名称 + + Returns: + dict: 连接测试结果 + """ + result = { + 'success': False, + 'error': None, + 'connection_time': 0, + 'cluster_info': None + } + + start_time = time.time() + client = None + + try: + client = create_redis_client(cluster_config, cluster_name) + if client: + # 获取集群信息 + info = client.info() + cluster_info = { + 'redis_version': info.get('redis_version', 'Unknown'), + 'connected_clients': info.get('connected_clients', 0), + 'used_memory_human': info.get('used_memory_human', 'Unknown'), + 'keyspace_hits': info.get('keyspace_hits', 0), + 'keyspace_misses': info.get('keyspace_misses', 0) + } + + result['success'] = True + result['cluster_info'] = cluster_info + else: + result['error'] = "连接创建失败" + + except Exception as e: + result['error'] = str(e) + finally: + result['connection_time'] = time.time() - start_time + if client: + try: + client.close() + except: + pass + + return result \ No newline at end of file diff --git a/modules/redis_query.py b/modules/redis_query.py new file mode 100644 index 0000000..d6138df --- /dev/null +++ b/modules/redis_query.py @@ -0,0 +1,355 @@ +""" +Redis查询和数据比较模块 +负责Redis数据的查询、随机key获取和数据比较 +""" + +import time +import logging +import random +from redis.cluster import key_slot +from redis.exceptions import RedisError +from .redis_client import RedisPerformanceTracker + +logger = logging.getLogger(__name__) + +def get_random_keys_from_redis(redis_client, count=100, pattern="*", performance_tracker=None): + """ + 从Redis集群中获取随机keys + + Args: + redis_client: Redis客户端 + count: 要获取的key数量 + pattern: key匹配模式,默认为 "*" + performance_tracker: 性能追踪器 + + Returns: + list: 随机key列表 + """ + start_time = time.time() + keys = set() + + logger.info(f"开始扫描获取随机keys,目标数量: {count},模式: {pattern}") + + try: + # 使用scan_iter获取keys + scan_count = max(count * 2, 1000) # 扫描更多key以确保随机性 + + for key in redis_client.scan_iter(match=pattern, count=scan_count): + keys.add(key) + if len(keys) >= count * 3: # 获取更多key以便随机选择 + break + + # 如果获取的key数量超过需要的数量,随机选择 + if len(keys) > count: + keys = random.sample(list(keys), count) + else: + keys = list(keys) + + end_time = time.time() + scan_duration = end_time - start_time + + if performance_tracker: + performance_tracker.record_scan_time(scan_duration) + + logger.info(f"扫描获取 {len(keys)} 个随机keys,耗时 {scan_duration:.3f} 秒") + return keys + + except RedisError as e: + end_time = time.time() + scan_duration = end_time - start_time + + if performance_tracker: + performance_tracker.record_scan_time(scan_duration) + + logger.error(f"获取随机keys失败: {e},耗时 {scan_duration:.3f} 秒") + return [] + +def get_redis_values_by_keys(redis_client, keys, cluster_name="Redis集群", performance_tracker=None): + """ + 批量查询Redis中指定keys的值,自动适配单节点和集群模式 + + Args: + redis_client: Redis客户端 + keys: 要查询的key列表 + cluster_name: 集群名称用于日志 + performance_tracker: 性能追踪器 + + Returns: + list: 对应keys的值列表,如果key不存在则为None + """ + start_time = time.time() + result = [None] * len(keys) + + logger.info(f"开始从{cluster_name}批量查询 {len(keys)} 个keys") + + try: + # 检查是否是集群模式 + is_cluster = hasattr(redis_client, 'cluster_nodes') + + if is_cluster: + # 集群模式:按slot分组keys以优化查询性能 + slot_groups = {} + for idx, key in enumerate(keys): + slot = key_slot(key) + slot_groups.setdefault(slot, []).append((idx, key)) + + logger.info(f"集群模式:keys分布在 {len(slot_groups)} 个slot中") + + # 分组批量查询 + for group in slot_groups.values(): + indices, slot_keys = zip(*group) + values = redis_client.mget(slot_keys) + for i, v in zip(indices, values): + result[i] = v + else: + # 单节点模式:直接批量查询 + logger.info(f"单节点模式:直接批量查询") + result = redis_client.mget(keys) + + end_time = time.time() + query_duration = end_time - start_time + + if performance_tracker: + performance_tracker.record_query(f"{cluster_name}_batch_query", query_duration) + + # 统计成功获取的key数量 + successful_count = sum(1 for v in result if v is not None) + logger.info(f"从{cluster_name}查询完成,成功获取 {successful_count}/{len(keys)} 个值,耗时 {query_duration:.3f} 秒") + + return result + + except Exception as e: + end_time = time.time() + query_duration = end_time - start_time + + if performance_tracker: + performance_tracker.record_query(f"{cluster_name}_batch_query_error", query_duration) + + logger.error(f"从{cluster_name}批量查询失败: {e},耗时 {query_duration:.3f} 秒") + return result + +def compare_redis_data(client1, client2, keys, cluster1_name="生产集群", cluster2_name="测试集群", performance_tracker=None): + """ + 比较两个Redis集群中指定keys的数据 + + Args: + client1: 第一个Redis客户端(生产) + client2: 第二个Redis客户端(测试) + keys: 要比较的key列表 + cluster1_name: 第一个集群名称 + cluster2_name: 第二个集群名称 + performance_tracker: 性能追踪器 + + Returns: + dict: 比较结果,包含统计信息和差异详情 + """ + comparison_start_time = time.time() + + logger.info(f"开始比较 {cluster1_name} 和 {cluster2_name} 的数据") + + # 获取两个集群的数据 + values1 = get_redis_values_by_keys(client1, keys, cluster1_name, performance_tracker) + if values1 is None: + return {'error': f'从{cluster1_name}获取数据失败'} + + values2 = get_redis_values_by_keys(client2, keys, cluster2_name, performance_tracker) + if values2 is None: + return {'error': f'从{cluster2_name}获取数据失败'} + + # 开始数据比对 + compare_start = time.time() + + # 初始化统计数据 + stats = { + 'total_keys': len(keys), + 'identical_count': 0, + 'different_count': 0, + 'missing_in_cluster1': 0, + 'missing_in_cluster2': 0, + 'both_missing': 0 + } + + # 详细结果列表 + identical_results = [] + different_results = [] + missing_results = [] + + # 逐个比较 + for i, key in enumerate(keys): + val1 = values1[i] + val2 = values2[i] + + # 将bytes转换为字符串用于显示(如果是bytes类型) + display_val1 = val1.decode('utf-8') if isinstance(val1, bytes) else val1 + display_val2 = val2.decode('utf-8') if isinstance(val2, bytes) else val2 + + if val1 is None and val2 is None: + # 两个集群都没有这个key + stats['both_missing'] += 1 + missing_results.append({ + 'key': key.decode('utf-8') if isinstance(key, bytes) else key, + 'status': 'both_missing', + 'message': '两个集群都不存在该key' + }) + elif val1 is None: + # 只有第一个集群没有 + stats['missing_in_cluster1'] += 1 + missing_results.append({ + 'key': key.decode('utf-8') if isinstance(key, bytes) else key, + 'status': 'missing_in_cluster1', + 'cluster1_value': None, + 'cluster2_value': display_val2, + 'message': f'在{cluster1_name}中不存在' + }) + elif val2 is None: + # 只有第二个集群没有 + stats['missing_in_cluster2'] += 1 + missing_results.append({ + 'key': key.decode('utf-8') if isinstance(key, bytes) else key, + 'status': 'missing_in_cluster2', + 'cluster1_value': display_val1, + 'cluster2_value': None, + 'message': f'在{cluster2_name}中不存在' + }) + elif val1 == val2: + # 值相同 + stats['identical_count'] += 1 + identical_results.append({ + 'key': key.decode('utf-8') if isinstance(key, bytes) else key, + 'value': display_val1 + }) + else: + # 值不同 + stats['different_count'] += 1 + different_results.append({ + 'key': key.decode('utf-8') if isinstance(key, bytes) else key, + 'cluster1_value': display_val1, + 'cluster2_value': display_val2, + 'message': '值不同' + }) + + compare_end = time.time() + comparison_duration = compare_end - compare_start + total_duration = compare_end - comparison_start_time + + if performance_tracker: + performance_tracker.record_comparison_time(comparison_duration) + + # 计算百分比 + def safe_percentage(part, total): + return round((part / total * 100), 2) if total > 0 else 0 + + stats['identical_percentage'] = safe_percentage(stats['identical_count'], stats['total_keys']) + stats['different_percentage'] = safe_percentage(stats['different_count'], stats['total_keys']) + stats['missing_percentage'] = safe_percentage( + stats['missing_in_cluster1'] + stats['missing_in_cluster2'] + stats['both_missing'], + stats['total_keys'] + ) + + result = { + 'success': True, + 'stats': stats, + 'identical_results': identical_results, + 'different_results': different_results, + 'missing_results': missing_results, + 'performance': { + 'comparison_time': comparison_duration, + 'total_time': total_duration + }, + 'clusters': { + 'cluster1_name': cluster1_name, + 'cluster2_name': cluster2_name + } + } + + logger.info(f"数据比对完成,耗时 {comparison_duration:.3f} 秒") + logger.info(f"比对统计: 总计{stats['total_keys']}个key,相同{stats['identical_count']}个,不同{stats['different_count']}个,缺失{stats['missing_in_cluster1'] + stats['missing_in_cluster2'] + stats['both_missing']}个") + + return result + +def execute_redis_comparison(config1, config2, query_options): + """ + 执行Redis数据比较的主要函数 + + Args: + config1: 第一个Redis集群配置 + config2: 第二个Redis集群配置 + query_options: 查询选项,包含查询模式和参数 + + Returns: + dict: 完整的比较结果 + """ + from .redis_client import create_redis_client + + # 创建性能追踪器 + performance_tracker = RedisPerformanceTracker() + + cluster1_name = config1.get('name', '生产集群') + cluster2_name = config2.get('name', '测试集群') + + logger.info(f"开始执行Redis数据比较: {cluster1_name} vs {cluster2_name}") + + # 创建连接 + client1 = create_redis_client(config1, cluster1_name, performance_tracker) + client2 = create_redis_client(config2, cluster2_name, performance_tracker) + + if not client1: + return {'error': f'{cluster1_name}连接失败'} + + if not client2: + return {'error': f'{cluster2_name}连接失败'} + + try: + # 获取要比较的keys + keys = [] + query_mode = query_options.get('mode', 'random') + + if query_mode == 'random': + # 随机获取keys + count = query_options.get('count', 100) + pattern = query_options.get('pattern', '*') + source_cluster = query_options.get('source_cluster', 'cluster2') # 默认从第二个集群获取 + + source_client = client2 if source_cluster == 'cluster2' else client1 + source_name = cluster2_name if source_cluster == 'cluster2' else cluster1_name + + logger.info(f"从{source_name}随机获取 {count} 个keys") + keys = get_random_keys_from_redis(source_client, count, pattern, performance_tracker) + + elif query_mode == 'specified': + # 指定keys + keys = query_options.get('keys', []) + # 如果keys是字符串,需要转换为bytes(Redis通常使用bytes) + keys = [k.encode('utf-8') if isinstance(k, str) else k for k in keys] + + if not keys: + return {'error': '未获取到任何keys进行比较'} + + logger.info(f"准备比较 {len(keys)} 个keys") + + # 执行比较 + comparison_result = compare_redis_data( + client1, client2, keys, + cluster1_name, cluster2_name, + performance_tracker + ) + + # 添加性能报告 + comparison_result['performance_report'] = performance_tracker.generate_report() + comparison_result['query_options'] = query_options + + return comparison_result + + except Exception as e: + logger.error(f"Redis数据比较执行失败: {e}") + return {'error': f'执行失败: {str(e)}'} + + finally: + # 关闭连接 + try: + if client1: + client1.close() + if client2: + client2.close() + except: + pass \ No newline at end of file diff --git a/modules/sharding.py b/modules/sharding.py new file mode 100644 index 0000000..bf8f41e --- /dev/null +++ b/modules/sharding.py @@ -0,0 +1,115 @@ +""" +分表计算模块 +负责TWCS时间分表的计算和映射 +""" + +import re +import logging + +logger = logging.getLogger(__name__) + +class ShardingCalculator: + """分表计算器,基于TWCS策略""" + + def __init__(self, interval_seconds=604800, table_count=14): + """ + 初始化分表计算器 + :param interval_seconds: 时间间隔(秒),默认604800(7天) + :param table_count: 分表数量,默认14 + """ + self.interval_seconds = interval_seconds + self.table_count = table_count + + def extract_timestamp_from_key(self, key): + """ + 从Key中提取时间戳 + 新规则:优先提取最后一个下划线后的数字,如果没有下划线则提取最后连续的数字部分 + """ + if not key: + return None + + key_str = str(key) + + # 方法1:如果包含下划线,尝试提取最后一个下划线后的部分 + if '_' in key_str: + parts = key_str.split('_') + last_part = parts[-1] + # 检查最后一部分是否为纯数字 + if last_part.isdigit(): + timestamp = int(last_part) + logger.info(f"Key '{key}' 通过下划线分割提取到时间戳: {timestamp}") + return timestamp + + # 方法2:使用正则表达式找到所有数字序列,取最后一个较长的 + number_sequences = re.findall(r'\d+', key_str) + + if not number_sequences: + logger.warning(f"Key '{key}' 中没有找到数字字符") + return None + + # 如果有多个数字序列,优先选择最长的,如果长度相同则选择最后一个 + longest_sequence = max(number_sequences, key=len) + + # 如果最长的有多个,选择最后一个最长的 + max_length = len(longest_sequence) + last_longest = None + for seq in number_sequences: + if len(seq) == max_length: + last_longest = seq + + try: + timestamp = int(last_longest) + logger.info(f"Key '{key}' 通过数字序列提取到时间戳: {timestamp} (从序列 {number_sequences} 中选择)") + return timestamp + except ValueError: + logger.error(f"Key '{key}' 时间戳转换失败: {last_longest}") + return None + + def calculate_shard_index(self, timestamp): + """ + 计算分表索引 + 公式:timestamp // interval_seconds % table_count + """ + if timestamp is None: + return None + return int(timestamp) // self.interval_seconds % self.table_count + + def get_shard_table_name(self, base_table_name, key): + """ + 根据Key获取对应的分表名称 + """ + timestamp = self.extract_timestamp_from_key(key) + if timestamp is None: + return None + + shard_index = self.calculate_shard_index(timestamp) + return f"{base_table_name}_{shard_index}" + + def get_all_shard_tables_for_keys(self, base_table_name, keys): + """ + 为一批Keys计算所有需要查询的分表 + 返回: {shard_table_name: [keys_for_this_shard], ...} + """ + shard_mapping = {} + failed_keys = [] + calculation_stats = { + 'total_keys': len(keys), + 'successful_extractions': 0, + 'failed_extractions': 0, + 'unique_shards': 0 + } + + for key in keys: + shard_table = self.get_shard_table_name(base_table_name, key) + if shard_table: + if shard_table not in shard_mapping: + shard_mapping[shard_table] = [] + shard_mapping[shard_table].append(key) + calculation_stats['successful_extractions'] += 1 + else: + failed_keys.append(key) + calculation_stats['failed_extractions'] += 1 + + calculation_stats['unique_shards'] = len(shard_mapping) + + return shard_mapping, failed_keys, calculation_stats \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 23696d6..bb7f5fd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ Flask==2.3.3 -cassandra-driver==3.29.1 \ No newline at end of file +cassandra-driver==3.29.1 +redis==5.0.1 \ No newline at end of file diff --git a/static/js/redis_compare.js b/static/js/redis_compare.js new file mode 100644 index 0000000..2cebb5d --- /dev/null +++ b/static/js/redis_compare.js @@ -0,0 +1,1292 @@ +/** + * Redis比较工具的JavaScript功能 + * 支持Redis集群配置、连接测试、数据比较等功能 + */ + +// 全局变量 +let currentResults = null; +let isQuerying = false; +let currentImportTarget = null; // 记录当前导入目标集群 + +// 页面加载完成后初始化 +document.addEventListener('DOMContentLoaded', function() { + initializePage(); + bindEvents(); + loadRedisConfigGroups(); + loadRedisQueryHistory(); +}); + +/** + * 初始化页面 + */ +function initializePage() { + // 加载默认配置 + loadDefaultConfig(); + + // 绑定节点删除事件 + bindNodeEvents(); + + // 绑定查询模式切换事件 + bindQueryModeEvents(); +} + +/** + * 绑定事件 + */ +function bindEvents() { + // 查询模式切换 + document.querySelectorAll('input[name="queryMode"]').forEach(radio => { + radio.addEventListener('change', toggleQueryMode); + }); + + // 节点删除按钮事件委托 + document.addEventListener('click', function(e) { + if (e.target.closest('.remove-node')) { + e.target.closest('.node-input').remove(); + } + }); +} + +/** + * 绑定节点相关事件 + */ +function bindNodeEvents() { + // 初始绑定现有的删除按钮(如果只有一个节点则禁用删除) + updateNodeDeleteButtons(); +} + +/** + * 绑定查询模式切换事件 + */ +function bindQueryModeEvents() { + const randomMode = document.getElementById('randomMode'); + const specifiedMode = document.getElementById('specifiedMode'); + + randomMode.addEventListener('change', toggleQueryMode); + specifiedMode.addEventListener('change', toggleQueryMode); +} + +/** + * 切换查询模式 + */ +function toggleQueryMode() { + const randomMode = document.getElementById('randomMode').checked; + const randomOptions = document.getElementById('randomOptions'); + const specifiedOptions = document.getElementById('specifiedOptions'); + + if (randomMode) { + randomOptions.style.display = 'block'; + specifiedOptions.style.display = 'none'; + } else { + randomOptions.style.display = 'none'; + specifiedOptions.style.display = 'block'; + } +} + +/** + * 添加节点 + */ +function addNode(clusterId) { + const container = document.getElementById(`${clusterId}-nodes`); + const nodeInput = document.createElement('div'); + nodeInput.className = 'node-input'; + nodeInput.innerHTML = ` + + + + `; + container.appendChild(nodeInput); + updateNodeDeleteButtons(); +} + +/** + * 更新节点删除按钮状态 + */ +function updateNodeDeleteButtons() { + // 每个集群至少需要一个节点 + ['cluster1', 'cluster2'].forEach(clusterId => { + const container = document.getElementById(`${clusterId}-nodes`); + const nodeInputs = container.querySelectorAll('.node-input'); + const deleteButtons = container.querySelectorAll('.remove-node'); + + deleteButtons.forEach(btn => { + btn.disabled = nodeInputs.length <= 1; + }); + }); +} + +/** + * 获取集群配置 + */ +function getClusterConfig(clusterId) { + const name = document.getElementById(`${clusterId}-name`).value; + const password = document.getElementById(`${clusterId}-password`).value; + const timeout = parseInt(document.getElementById(`${clusterId}-timeout`).value); + const maxConn = parseInt(document.getElementById(`${clusterId}-max-conn`).value); + + // 获取节点列表 + const nodes = []; + const nodeInputs = document.querySelectorAll(`#${clusterId}-nodes .node-input`); + + nodeInputs.forEach(nodeInput => { + const host = nodeInput.querySelector('.node-host').value.trim(); + const port = parseInt(nodeInput.querySelector('.node-port').value); + + if (host && port) { + nodes.push({ host, port }); + } + }); + + return { + name, + nodes, + password: password || null, + socket_timeout: timeout, + socket_connect_timeout: timeout, + max_connections_per_node: maxConn + }; +} + +/** + * 获取查询选项 + */ +function getQueryOptions() { + const randomMode = document.getElementById('randomMode').checked; + + if (randomMode) { + return { + mode: 'random', + count: parseInt(document.getElementById('sampleCount').value), + pattern: document.getElementById('keyPattern').value, + source_cluster: document.getElementById('sourceCluster').value + }; + } else { + const keysText = document.getElementById('specifiedKeys').value.trim(); + const keys = keysText ? keysText.split('\n').map(k => k.trim()).filter(k => k) : []; + + return { + mode: 'specified', + keys: keys + }; + } +} + +/** + * 测试连接 + */ +async function testConnection(clusterId) { + const config = getClusterConfig(clusterId); + const clusterName = config.name; + + if (!config.nodes || config.nodes.length === 0) { + showAlert('请至少配置一个Redis节点', 'warning'); + return; + } + + try { + showLoading(`正在测试${clusterName}连接...`); + + const response = await fetch('/api/redis/test-connection', { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ + cluster_config: config, + cluster_name: clusterName + }) + }); + + const result = await response.json(); + hideLoading(); + + if (result.success) { + showAlert(`${clusterName}连接成功!连接耗时: ${result.data.connection_time.toFixed(3)}秒`, 'success'); + + // 高亮成功的集群配置 + document.getElementById(`${clusterId}-config`).classList.add('active'); + setTimeout(() => { + document.getElementById(`${clusterId}-config`).classList.remove('active'); + }, 2000); + } else { + showAlert(`${clusterName}连接失败: ${result.error}`, 'danger'); + } + } catch (error) { + hideLoading(); + showAlert(`连接测试失败: ${error.message}`, 'danger'); + } +} + +/** + * 执行Redis比较 + */ +async function executeRedisComparison() { + if (isQuerying) { + showAlert('查询正在进行中,请稍候...', 'warning'); + return; + } + + // 获取配置 + const cluster1Config = getClusterConfig('cluster1'); + const cluster2Config = getClusterConfig('cluster2'); + const queryOptions = getQueryOptions(); + + // 验证配置 + if (!cluster1Config.nodes || cluster1Config.nodes.length === 0) { + showAlert('请配置集群1的Redis节点', 'warning'); + return; + } + + if (!cluster2Config.nodes || cluster2Config.nodes.length === 0) { + showAlert('请配置集群2的Redis节点', 'warning'); + return; + } + + if (queryOptions.mode === 'specified' && (!queryOptions.keys || queryOptions.keys.length === 0)) { + showAlert('请输入要查询的Key列表', 'warning'); + return; + } + + try { + isQuerying = true; + showLoading('正在执行Redis数据比较,请稍候...'); + clearResults(); + + const response = await fetch('/api/redis/compare', { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ + cluster1_config: cluster1Config, + cluster2_config: cluster2Config, + query_options: queryOptions + }) + }); + + const result = await response.json(); + + if (response.ok && result.success !== false) { + currentResults = result; + displayResults(result); + showAlert('Redis数据比较完成!', 'success'); + } else { + showAlert(`比较失败: ${result.error}`, 'danger'); + } + } catch (error) { + showAlert(`请求失败: ${error.message}`, 'danger'); + } finally { + isQuerying = false; + hideLoading(); + } +} + +/** + * 显示结果 + */ +function displayResults(results) { + // 显示统计卡片 + displayStatsCards(results.stats); + + // 显示详细结果 + displayDifferenceResults(results.different_results || []); + displayIdenticalResults(results.identical_results || []); + displayMissingResults(results.missing_results || []); + displayPerformanceReport(results.performance_report); + + // 更新标签页计数 + updateTabCounts(results); + + // 显示结果区域 + document.getElementById('results').style.display = 'block'; + + // 滚动到结果区域 + document.getElementById('results').scrollIntoView({ behavior: 'smooth' }); +} + +/** + * 显示统计卡片 + */ +function displayStatsCards(stats) { + const container = document.getElementById('statsCards'); + container.innerHTML = ` +
+
+

${stats.total_keys}

+

总Key数量

+
+
+
+
+

${stats.identical_count}

+

相同数据

+ ${stats.identical_percentage}% +
+
+
+
+

${stats.different_count}

+

差异数据

+ ${stats.different_percentage}% +
+
+
+
+

${stats.missing_in_cluster1 + stats.missing_in_cluster2 + stats.both_missing}

+

缺失数据

+ ${stats.missing_percentage}% +
+
+ `; +} + +/** + * 显示差异结果 + */ +function displayDifferenceResults(differences) { + const container = document.getElementById('differenceResults'); + + if (differences.length === 0) { + container.innerHTML = '
未发现数据差异
'; + return; + } + + let html = ''; + differences.forEach((diff, index) => { + html += ` +
+
Key: ${diff.key}
+
+
+ ${currentResults.clusters.cluster1_name}: +
${formatRedisValue(diff.cluster1_value)}
+
+
+ ${currentResults.clusters.cluster2_name}: +
${formatRedisValue(diff.cluster2_value)}
+
+
+
+ ${diff.message} +
+
+ `; + }); + + container.innerHTML = html; +} + +/** + * 显示相同结果 + */ +function displayIdenticalResults(identical) { + const container = document.getElementById('identicalResults'); + + if (identical.length === 0) { + container.innerHTML = '
没有相同的数据
'; + return; + } + + let html = ''; + identical.forEach((item, index) => { + html += ` +
+
Key: ${item.key}
+
+ 值: +
${formatRedisValue(item.value)}
+
+
+ 数据一致 +
+
+ `; + }); + + container.innerHTML = html; +} + +/** + * 显示缺失结果 + */ +function displayMissingResults(missing) { + const container = document.getElementById('missingResults'); + + if (missing.length === 0) { + container.innerHTML = '
没有缺失的数据
'; + return; + } + + let html = ''; + missing.forEach((item, index) => { + html += ` +
+
Key: ${item.key}
+
+ ${item.message} +
+ ${item.cluster1_value !== undefined ? ` +
+ ${currentResults.clusters.cluster1_name}: +
${formatRedisValue(item.cluster1_value)}
+
+ ` : ''} + ${item.cluster2_value !== undefined ? ` +
+ ${currentResults.clusters.cluster2_name}: +
${formatRedisValue(item.cluster2_value)}
+
+ ` : ''} +
+ `; + }); + + container.innerHTML = html; +} + +/** + * 显示性能报告 + */ +function displayPerformanceReport(performanceReport) { + const container = document.getElementById('performanceReport'); + + const connections = performanceReport.connections || {}; + const operations = performanceReport.operations || {}; + + let html = ` +
性能统计报告
+
+
+
连接统计
+ + + + + + + + + + `; + + Object.entries(connections).forEach(([clusterName, status]) => { + const statusClass = status.success ? 'success' : 'danger'; + const statusText = status.success ? '成功' : '失败'; + + html += ` + + + + + + `; + }); + + html += ` + +
集群状态耗时
${clusterName}${statusText}${status.connect_time.toFixed(3)}s
+
+
+
操作统计
+ + + + + + + + + `; + + if (operations.scan_time > 0) { + html += ``; + } + + Object.entries(operations.queries || {}).forEach(([operation, duration]) => { + html += ``; + }); + + if (operations.comparison_time > 0) { + html += ``; + } + + html += ` + + + + + +
操作耗时
扫描Keys${operations.scan_time.toFixed(3)}s
${operation}${duration.toFixed(3)}s
数据比对${operations.comparison_time.toFixed(3)}s
总耗时${performanceReport.total_time.toFixed(3)}s
+
+
+ `; + + container.innerHTML = html; +} + +/** + * 更新标签页计数 + */ +function updateTabCounts(results) { + document.getElementById('diff-count').textContent = (results.different_results || []).length; + document.getElementById('identical-count').textContent = (results.identical_results || []).length; + document.getElementById('missing-count').textContent = (results.missing_results || []).length; +} + +/** + * 格式化Redis值显示 + */ +function formatRedisValue(value) { + if (value === null) { + return '(null)'; + } + + if (value === undefined) { + return '(undefined)'; + } + + // 如果是字符串且看起来像JSON,尝试格式化 + if (typeof value === 'string') { + try { + const parsed = JSON.parse(value); + return JSON.stringify(parsed, null, 2); + } catch (e) { + // 不是JSON,直接返回 + return value; + } + } + + return String(value); +} + +/** + * 加载默认配置 + */ +async function loadDefaultConfig() { + try { + const response = await fetch('/api/redis/default-config'); + const config = await response.json(); + + // 这里可以根据需要设置默认值,当前HTML已经包含了合理的默认值 + } catch (error) { + console.warn('加载默认配置失败:', error); + } +} + +/** + * 清空结果 + */ +function clearResults() { + document.getElementById('results').style.display = 'none'; + currentResults = null; +} + +/** + * 显示加载状态 + */ +function showLoading(message = '正在处理...') { + const loadingElement = document.querySelector('.loading'); + const messageElement = loadingElement.querySelector('span'); + messageElement.textContent = message; + loadingElement.style.display = 'block'; +} + +/** + * 隐藏加载状态 + */ +function hideLoading() { + document.querySelector('.loading').style.display = 'none'; +} + +/** + * 显示提示消息 + */ +function showAlert(message, type = 'info') { + // 移除现有的alert + const existingAlert = document.querySelector('.alert-custom'); + if (existingAlert) { + existingAlert.remove(); + } + + // 创建新的alert + const alertDiv = document.createElement('div'); + alertDiv.className = `alert alert-${type} alert-dismissible fade show alert-custom`; + alertDiv.innerHTML = ` + ${message} + + `; + + // 插入到页面顶部 + const container = document.querySelector('.container'); + container.insertBefore(alertDiv, container.firstChild); + + // 5秒后自动消失 + setTimeout(() => { + if (alertDiv.parentNode) { + alertDiv.remove(); + } + }, 5000); +} + +/** + * Redis配置管理功能 + */ + +// 加载Redis配置组列表 +async function loadRedisConfigGroups() { + try { + const response = await fetch('/api/redis/config-groups'); + const result = await response.json(); + + const select = document.getElementById('redisConfigGroupSelect'); + select.innerHTML = ''; + + if (result.success && result.data) { + result.data.forEach(group => { + const option = document.createElement('option'); + option.value = group.id; + option.textContent = `${group.name} - ${group.description || '无描述'}`; + select.appendChild(option); + }); + } + } catch (error) { + console.error('加载Redis配置组失败:', error); + } +} + +// 显示保存Redis配置对话框 +function showSaveRedisConfigDialog() { + // 生成默认名称 + const timestamp = new Date().toLocaleString('zh-CN'); + document.getElementById('redisConfigGroupName').value = `Redis配置_${timestamp}`; + document.getElementById('redisConfigGroupDescription').value = ''; + + new bootstrap.Modal(document.getElementById('saveRedisConfigModal')).show(); +} + +// 保存Redis配置组 +async function saveRedisConfigGroup() { + const name = document.getElementById('redisConfigGroupName').value.trim(); + const description = document.getElementById('redisConfigGroupDescription').value.trim(); + + if (!name) { + showAlert('请输入配置组名称', 'warning'); + return; + } + + const cluster1Config = getClusterConfig('cluster1'); + const cluster2Config = getClusterConfig('cluster2'); + const queryOptions = getQueryOptions(); + + if (!cluster1Config.nodes || cluster1Config.nodes.length === 0) { + showAlert('请配置集群1信息', 'warning'); + return; + } + + if (!cluster2Config.nodes || cluster2Config.nodes.length === 0) { + showAlert('请配置集群2信息', 'warning'); + return; + } + + try { + const response = await fetch('/api/redis/config-groups', { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ + name: name, + description: description, + cluster1_config: cluster1Config, + cluster2_config: cluster2Config, + query_options: queryOptions + }) + }); + + const result = await response.json(); + + if (result.success) { + showAlert(result.message, 'success'); + bootstrap.Modal.getInstance(document.getElementById('saveRedisConfigModal')).hide(); + loadRedisConfigGroups(); // 重新加载配置组列表 + } else { + showAlert(result.error, 'danger'); + } + } catch (error) { + showAlert(`保存失败: ${error.message}`, 'danger'); + } +} + +// 加载选定的Redis配置组 +async function loadSelectedRedisConfigGroup() { + const groupId = document.getElementById('redisConfigGroupSelect').value; + + if (!groupId) { + showAlert('请选择配置组', 'warning'); + return; + } + + try { + const response = await fetch(`/api/redis/config-groups/${groupId}`); + const result = await response.json(); + + if (result.success && result.data) { + const config = result.data; + + // 加载集群1配置 + loadClusterConfig('cluster1', config.cluster1_config); + + // 加载集群2配置 + loadClusterConfig('cluster2', config.cluster2_config); + + // 加载查询选项 + loadQueryOptions(config.query_options); + + showAlert(`配置组 "${config.name}" 加载成功`, 'success'); + } else { + showAlert(result.error, 'danger'); + } + } catch (error) { + showAlert(`加载失败: ${error.message}`, 'danger'); + } +} + +// 加载集群配置到界面 +function loadClusterConfig(clusterId, config) { + // 设置集群名称 + document.getElementById(`${clusterId}-name`).value = config.name || ''; + + // 设置密码 + document.getElementById(`${clusterId}-password`).value = config.password || ''; + + // 设置超时和连接数 + document.getElementById(`${clusterId}-timeout`).value = config.socket_timeout || 3; + document.getElementById(`${clusterId}-max-conn`).value = config.max_connections_per_node || 16; + + // 清空现有节点 + const container = document.getElementById(`${clusterId}-nodes`); + container.innerHTML = ''; + + // 添加节点 + if (config.nodes && config.nodes.length > 0) { + config.nodes.forEach(node => { + const nodeInput = document.createElement('div'); + nodeInput.className = 'node-input'; + nodeInput.innerHTML = ` + + + + `; + container.appendChild(nodeInput); + }); + } else { + // 添加默认节点 + addNode(clusterId); + } + + updateNodeDeleteButtons(); +} + +// 加载查询选项 +function loadQueryOptions(queryOptions) { + if (queryOptions.mode === 'random') { + document.getElementById('randomMode').checked = true; + document.getElementById('sampleCount').value = queryOptions.count || 100; + document.getElementById('keyPattern').value = queryOptions.pattern || '*'; + document.getElementById('sourceCluster').value = queryOptions.source_cluster || 'cluster2'; + } else { + document.getElementById('specifiedMode').checked = true; + document.getElementById('specifiedKeys').value = (queryOptions.keys || []).join('\n'); + } + + toggleQueryMode(); +} + +// 显示Redis配置管理对话框 +function showManageRedisConfigDialog() { + loadRedisConfigGroupsForManagement(); + new bootstrap.Modal(document.getElementById('manageRedisConfigModal')).show(); +} + +// 为管理界面加载Redis配置组 +async function loadRedisConfigGroupsForManagement() { + try { + const response = await fetch('/api/redis/config-groups'); + const result = await response.json(); + + const container = document.getElementById('redisConfigGroupList'); + + if (result.success && result.data && result.data.length > 0) { + let html = '
'; + html += ''; + + result.data.forEach(group => { + html += ` + + + + + + + `; + }); + + html += '
名称描述创建时间操作
${group.name}${group.description || '无描述'}${new Date(group.created_at).toLocaleString('zh-CN')} + + +
'; + container.innerHTML = html; + } else { + container.innerHTML = '
暂无Redis配置组
'; + } + } catch (error) { + document.getElementById('redisConfigGroupList').innerHTML = '
加载失败: ' + error.message + '
'; + } +} + +// 通过ID加载Redis配置组 +async function loadRedisConfigGroupById(groupId) { + try { + const response = await fetch(`/api/redis/config-groups/${groupId}`); + const result = await response.json(); + + if (result.success && result.data) { + const config = result.data; + + // 加载配置 + loadClusterConfig('cluster1', config.cluster1_config); + loadClusterConfig('cluster2', config.cluster2_config); + loadQueryOptions(config.query_options); + + // 关闭管理对话框 + bootstrap.Modal.getInstance(document.getElementById('manageRedisConfigModal')).hide(); + + showAlert(`配置组 "${config.name}" 加载成功`, 'success'); + } else { + showAlert(result.error, 'danger'); + } + } catch (error) { + showAlert(`加载失败: ${error.message}`, 'danger'); + } +} + +// 删除Redis配置组 +async function deleteRedisConfigGroup(groupId, groupName) { + if (!confirm(`确定要删除配置组 "${groupName}" 吗?此操作不可恢复。`)) { + return; + } + + try { + const response = await fetch(`/api/redis/config-groups/${groupId}`, { + method: 'DELETE' + }); + + const result = await response.json(); + + if (result.success) { + showAlert(result.message, 'success'); + loadRedisConfigGroupsForManagement(); // 重新加载列表 + loadRedisConfigGroups(); // 重新加载下拉框 + } else { + showAlert(result.error, 'danger'); + } + } catch (error) { + showAlert(`删除失败: ${error.message}`, 'danger'); + } +} + +// 显示Redis配置导入对话框 +function showImportRedisConfigDialog(targetCluster) { + currentImportTarget = targetCluster; + document.getElementById('redisConfigImportText').value = ''; + new bootstrap.Modal(document.getElementById('importRedisConfigModal')).show(); +} + +// 导入Redis配置 +async function importRedisConfig() { + const configText = document.getElementById('redisConfigImportText').value.trim(); + + if (!configText) { + showAlert('请输入配置内容', 'warning'); + return; + } + + try { + const response = await fetch('/api/redis/import-config', { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ + config_text: configText + }) + }); + + const result = await response.json(); + + if (result.success && result.data) { + // 将配置应用到目标集群 + loadClusterConfig(currentImportTarget, result.data); + + bootstrap.Modal.getInstance(document.getElementById('importRedisConfigModal')).hide(); + showAlert(result.message, 'success'); + } else { + showAlert(result.error, 'danger'); + } + } catch (error) { + showAlert(`导入失败: ${error.message}`, 'danger'); + } +} + +/** + * Redis查询历史管理功能 + */ + +// 加载Redis查询历史 +async function loadRedisQueryHistory() { + try { + const response = await fetch('/api/redis/query-history'); + const result = await response.json(); + + const select = document.getElementById('redisHistorySelect'); + select.innerHTML = ''; + + if (result.success && result.data) { + result.data.forEach(history => { + const option = document.createElement('option'); + option.value = history.id; + option.textContent = `${history.name} - ${new Date(history.created_at).toLocaleString('zh-CN')}`; + select.appendChild(option); + }); + } + } catch (error) { + console.error('加载Redis查询历史失败:', error); + } +} + +// 显示保存Redis查询历史对话框 +function showSaveRedisHistoryDialog() { + if (!currentResults) { + showAlert('请先执行Redis比较查询', 'warning'); + return; + } + + // 生成默认名称 + const timestamp = new Date().toLocaleString('zh-CN'); + document.getElementById('redisHistoryName').value = `Redis查询_${timestamp}`; + document.getElementById('redisHistoryDescription').value = `Redis比较结果 - 总计${currentResults.stats.total_keys}个Key,发现${currentResults.stats.different_count}处差异`; + + new bootstrap.Modal(document.getElementById('saveRedisHistoryModal')).show(); +} + +// 保存Redis查询历史 +async function saveRedisQueryHistory() { + if (!currentResults) { + showAlert('没有可保存的查询结果', 'warning'); + return; + } + + const name = document.getElementById('redisHistoryName').value.trim(); + const description = document.getElementById('redisHistoryDescription').value.trim(); + + if (!name) { + showAlert('请输入历史记录名称', 'warning'); + return; + } + + try { + const response = await fetch('/api/redis/query-history', { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ + name: name, + description: description, + cluster1_config: getClusterConfig('cluster1'), + cluster2_config: getClusterConfig('cluster2'), + query_options: getQueryOptions(), + query_keys: currentResults.query_options?.keys || [], + results_summary: currentResults.stats, + execution_time: currentResults.performance_report?.total_time || 0, + total_keys: currentResults.stats.total_keys, + different_count: currentResults.stats.different_count, + identical_count: currentResults.stats.identical_count, + missing_count: currentResults.stats.missing_in_cluster1 + currentResults.stats.missing_in_cluster2, + raw_results: { + identical_results: currentResults.identical_results, + different_results: currentResults.different_results, + missing_results: currentResults.missing_results, + performance_report: currentResults.performance_report + } + }) + }); + + const result = await response.json(); + + if (result.success) { + showAlert(result.message, 'success'); + bootstrap.Modal.getInstance(document.getElementById('saveRedisHistoryModal')).hide(); + loadRedisQueryHistory(); // 重新加载历史记录列表 + } else { + showAlert(result.error, 'danger'); + } + } catch (error) { + showAlert(`保存失败: ${error.message}`, 'danger'); + } +} + +// 加载选定的Redis查询历史 +async function loadSelectedRedisHistory() { + const historyId = document.getElementById('redisHistorySelect').value; + + if (!historyId) { + showAlert('请选择历史记录', 'warning'); + return; + } + + try { + const response = await fetch(`/api/redis/query-history/${historyId}`); + const result = await response.json(); + + if (result.success && result.data) { + const history = result.data; + + // 加载配置 + loadClusterConfig('cluster1', history.cluster1_config); + loadClusterConfig('cluster2', history.cluster2_config); + loadQueryOptions(history.query_options); + + // 如果有原始结果,直接显示 + if (history.raw_results) { + const displayResults = { + stats: history.results_summary, + identical_results: history.raw_results.identical_results || [], + different_results: history.raw_results.different_results || [], + missing_results: history.raw_results.missing_results || [], + performance_report: history.raw_results.performance_report || {}, + clusters: { + cluster1_name: history.cluster1_config.name, + cluster2_name: history.cluster2_config.name + } + }; + + currentResults = displayResults; + displayResults(displayResults); + showAlert(`历史记录 "${history.name}" 加载成功`, 'success'); + } else { + showAlert(`历史记录 "${history.name}" 配置加载成功,但没有结果数据`, 'info'); + } + } else { + showAlert(result.error, 'danger'); + } + } catch (error) { + showAlert(`加载失败: ${error.message}`, 'danger'); + } +} + +// 显示Redis查询历史管理对话框 +function showManageRedisHistoryDialog() { + loadRedisHistoryForManagement(); + new bootstrap.Modal(document.getElementById('manageRedisHistoryModal')).show(); +} + +// 为管理界面加载Redis查询历史 +async function loadRedisHistoryForManagement() { + try { + const response = await fetch('/api/redis/query-history'); + const result = await response.json(); + + const container = document.getElementById('redisHistoryList'); + + if (result.success && result.data && result.data.length > 0) { + let html = '
'; + html += ''; + + result.data.forEach(history => { + html += ` + + + + + + + + + + `; + }); + + html += '
名称描述Key数量差异数执行时间创建时间操作
${history.name}${history.description || '无描述'}${history.total_keys}${history.different_count}${history.execution_time.toFixed(3)}s${new Date(history.created_at).toLocaleString('zh-CN')} + + +
'; + container.innerHTML = html; + } else { + container.innerHTML = '
暂无Redis查询历史
'; + } + } catch (error) { + document.getElementById('redisHistoryList').innerHTML = '
加载失败: ' + error.message + '
'; + } +} + +// 通过ID加载Redis查询历史 +async function loadRedisHistoryById(historyId) { + try { + const response = await fetch(`/api/redis/query-history/${historyId}`); + const result = await response.json(); + + if (result.success && result.data) { + const history = result.data; + + // 加载配置 + loadClusterConfig('cluster1', history.cluster1_config); + loadClusterConfig('cluster2', history.cluster2_config); + loadQueryOptions(history.query_options); + + // 如果有原始结果,直接显示 + if (history.raw_results) { + const displayResults = { + stats: history.results_summary, + identical_results: history.raw_results.identical_results || [], + different_results: history.raw_results.different_results || [], + missing_results: history.raw_results.missing_results || [], + performance_report: history.raw_results.performance_report || {}, + clusters: { + cluster1_name: history.cluster1_config.name, + cluster2_name: history.cluster2_config.name + } + }; + + currentResults = displayResults; + displayResults(displayResults); + } + + // 关闭管理对话框 + bootstrap.Modal.getInstance(document.getElementById('manageRedisHistoryModal')).hide(); + + showAlert(`历史记录 "${history.name}" 加载成功`, 'success'); + } else { + showAlert(result.error, 'danger'); + } + } catch (error) { + showAlert(`加载失败: ${error.message}`, 'danger'); + } +} + +// 删除Redis查询历史 +async function deleteRedisHistory(historyId, historyName) { + if (!confirm(`确定要删除历史记录 "${historyName}" 吗?此操作不可恢复。`)) { + return; + } + + try { + const response = await fetch(`/api/redis/query-history/${historyId}`, { + method: 'DELETE' + }); + + const result = await response.json(); + + if (result.success) { + showAlert(result.message, 'success'); + loadRedisHistoryForManagement(); // 重新加载列表 + loadRedisQueryHistory(); // 重新加载下拉框 + } else { + showAlert(result.error, 'danger'); + } + } catch (error) { + showAlert(`删除失败: ${error.message}`, 'danger'); + } +} + +/** + * Redis查询日志功能 + */ + +// 显示Redis查询日志对话框 +function showRedisQueryLogsDialog() { + loadRedisQueryLogs(); + new bootstrap.Modal(document.getElementById('redisQueryLogsModal')).show(); +} + +// 加载Redis查询日志 +async function loadRedisQueryLogs() { + try { + const response = await fetch('/api/query-logs?limit=1000'); + const result = await response.json(); + + const container = document.getElementById('redisQueryLogs'); + + if (result.success && result.data && result.data.length > 0) { + // 过滤Redis相关的日志 + const redisLogs = result.data.filter(log => + log.message.toLowerCase().includes('redis') || + log.query_type === 'redis' + ); + + if (redisLogs.length > 0) { + let html = ''; + redisLogs.forEach(log => { + const levelClass = log.level === 'ERROR' ? 'text-danger' : + log.level === 'WARNING' ? 'text-warning' : 'text-info'; + html += ` +
+ [${log.timestamp}] + ${log.level} + ${log.message} +
+ `; + }); + container.innerHTML = html; + } else { + container.innerHTML = '
暂无Redis查询日志
'; + } + } else { + container.innerHTML = '
暂无查询日志
'; + } + } catch (error) { + document.getElementById('redisQueryLogs').innerHTML = '
加载日志失败: ' + error.message + '
'; + } +} + +// 刷新Redis查询日志 +function refreshRedisQueryLogs() { + loadRedisQueryLogs(); + showAlert('查询日志已刷新', 'info'); +} + +// 清空Redis查询日志 +async function clearRedisQueryLogs() { + if (!confirm('确定要清空所有查询日志吗?此操作不可恢复。')) { + return; + } + + try { + const response = await fetch('/api/query-logs', { + method: 'DELETE' + }); + + const result = await response.json(); + + if (result.success) { + showAlert(result.message, 'success'); + loadRedisQueryLogs(); // 重新加载日志 + } else { + showAlert(result.error, 'danger'); + } + } catch (error) { + showAlert(`清空日志失败: ${error.message}`, 'danger'); + } +} \ No newline at end of file diff --git a/templates/index.html b/templates/index.html index 270a7f3..87245eb 100644 --- a/templates/index.html +++ b/templates/index.html @@ -256,36 +256,36 @@ - +
-
即将推出
+
可用
- +
-

数据分析工具

+

Redis集群比对工具

- 强大的数据分析和可视化工具,支持多种数据源, - 提供丰富的图表类型和统计分析功能。 + 专业的Redis集群数据比对工具,支持生产环境与测试环境Redis数据差异分析, + 提供随机采样和指定Key查询两种模式。

-
计划功能:
+
核心功能:
    -
  • 多数据源连接支持
  • -
  • 交互式图表生成
  • -
  • 自定义报表制作
  • -
  • 数据趋势分析
  • -
  • 自动化报告生成
  • +
  • 支持Redis集群连接配置
  • +
  • 随机采样和指定Key查询
  • +
  • 智能数据比对和差异分析
  • +
  • 详细的性能统计报告
  • +
  • 历史记录和结果导出
- + + 立即使用 +
diff --git a/templates/redis_compare.html b/templates/redis_compare.html new file mode 100644 index 0000000..b5162ec --- /dev/null +++ b/templates/redis_compare.html @@ -0,0 +1,696 @@ + + + + + + Redis集群比对工具 + + + + + + + + +
+ + + + +
+
+
+ +
+

Redis集群比对工具

+

专业的Redis集群数据比对工具,支持随机采样和指定Key查询

+
+
+
+
+ + +
+ +
+
+

配置管理

+ +
+ +
+
+
+
配置组管理
+
+
+
+
+ +
+
+ +
+
+
+
+ +
+
+ +
+
+
+
+
+ + +
+
+
+
查询历史
+
+
+
+
+ +
+
+ +
+
+
+
+ +
+
+ +
+
+
+
+
+
+ + +
+
+
+
+
配置导入
+
+
+ + + + 支持YAML格式配置导入,如:clusterName、clusterAddress、clusterPassword等 + +
+
+
+
+
+
+
查询日志
+
+
+ + + 查看Redis比较操作的详细执行日志 + +
+
+
+
+
+
+ + +
+
+

Redis集群配置

+ +
+ +
+
+
集群1 (生产环境)
+ +
+ + +
+ +
+ +
+
+ + + +
+
+ +
+ +
+ + +
+ +
+
+ + +
+
+ + +
+
+ +
+ +
+
+
+ + +
+
+
集群2 (测试环境)
+ +
+ + +
+ +
+ +
+
+ + + +
+
+ +
+ +
+ + +
+ +
+
+ + +
+
+ + +
+
+ +
+ +
+
+
+
+
+
+
+ + +
+
+
+

查询选项

+ +
+ +
+ + +
+
+ + +
+
+ + +
+
+
+ + +
+
+ + +
+
+ + +
+
+
+ + + +
+
+
+ + +
+
+
+ + + +
+
+ Loading... +
+ 正在执行Redis数据比较,请稍候... +
+
+
+
+ + + +
+ + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file