初始化项目

This commit is contained in:
2025-07-31 18:05:10 +08:00
commit 6fecd70ca5
7 changed files with 3496 additions and 0 deletions

754
app.py Normal file
View File

@@ -0,0 +1,754 @@
from flask import Flask, render_template, request, jsonify
from cassandra.cluster import Cluster
from cassandra.auth import PlainTextAuthProvider
import json
import os
import logging
import sqlite3
from datetime import datetime
app = Flask(__name__)
# 配置日志
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# 数据库配置
DATABASE_PATH = 'config_groups.db'
def init_database():
"""初始化数据库"""
try:
conn = sqlite3.connect(DATABASE_PATH)
cursor = conn.cursor()
# 创建配置组表
cursor.execute('''
CREATE TABLE IF NOT EXISTS config_groups (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL UNIQUE,
description TEXT,
pro_config TEXT NOT NULL,
test_config TEXT NOT NULL,
query_config TEXT NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
''')
conn.commit()
conn.close()
logger.info("数据库初始化完成")
return True
except Exception as e:
logger.error(f"数据库初始化失败: {e}")
return False
def ensure_database():
"""确保数据库和表存在"""
if not os.path.exists(DATABASE_PATH):
logger.info("数据库文件不存在,正在创建...")
return init_database()
# 检查表是否存在
try:
conn = sqlite3.connect(DATABASE_PATH)
cursor = conn.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='config_groups'")
result = cursor.fetchone()
conn.close()
if not result:
logger.info("config_groups表不存在正在创建...")
return init_database()
return True
except Exception as e:
logger.error(f"检查数据库表失败: {e}")
return init_database()
def get_db_connection():
"""获取数据库连接"""
conn = sqlite3.connect(DATABASE_PATH)
conn.row_factory = sqlite3.Row
return conn
def normalize_json_string(value):
"""标准化JSON字符串用于比较"""
if not isinstance(value, str):
return value
try:
# 尝试解析JSON
json_obj = json.loads(value)
# 如果是数组,需要进行特殊处理
if isinstance(json_obj, list):
# 尝试对数组元素进行标准化排序
normalized_array = normalize_json_array(json_obj)
return json.dumps(normalized_array, sort_keys=True, separators=(',', ':'))
else:
# 普通对象,直接序列化
return json.dumps(json_obj, sort_keys=True, separators=(',', ':'))
except (json.JSONDecodeError, TypeError):
# 如果不是JSON返回原值
return value
def normalize_json_array(json_array):
"""标准化JSON数组处理元素顺序问题"""
try:
normalized_elements = []
for element in json_array:
if isinstance(element, dict):
# 对字典元素进行标准化
normalized_elements.append(json.dumps(element, sort_keys=True, separators=(',', ':')))
elif isinstance(element, str):
# 如果是字符串尝试解析为JSON
try:
parsed_element = json.loads(element)
normalized_elements.append(json.dumps(parsed_element, sort_keys=True, separators=(',', ':')))
except:
normalized_elements.append(element)
else:
normalized_elements.append(element)
# 对标准化后的元素进行排序,确保顺序一致
normalized_elements.sort()
# 重新解析为对象数组
result_array = []
for element in normalized_elements:
if isinstance(element, str):
try:
result_array.append(json.loads(element))
except:
result_array.append(element)
else:
result_array.append(element)
return result_array
except Exception as e:
logger.warning(f"数组标准化失败: {e}")
return json_array
def is_json_array_field(value):
"""检查字段是否为JSON数组格式"""
if not isinstance(value, (str, list)):
return False
try:
if isinstance(value, str):
parsed = json.loads(value)
return isinstance(parsed, list)
elif isinstance(value, list):
# 检查是否为JSON字符串数组
if len(value) > 0 and isinstance(value[0], str):
try:
json.loads(value[0])
return True
except:
return False
return True
except:
return False
def compare_array_values(value1, value2):
"""专门用于比较数组类型的值"""
try:
# 处理字符串表示的数组
if isinstance(value1, str) and isinstance(value2, str):
try:
array1 = json.loads(value1)
array2 = json.loads(value2)
if isinstance(array1, list) and isinstance(array2, list):
return compare_json_arrays(array1, array2)
except:
pass
# 处理Python列表类型
elif isinstance(value1, list) and isinstance(value2, list):
return compare_json_arrays(value1, value2)
# 处理混合情况:一个是字符串数组,一个是列表
elif isinstance(value1, list) and isinstance(value2, str):
try:
array2 = json.loads(value2)
if isinstance(array2, list):
return compare_json_arrays(value1, array2)
except:
pass
elif isinstance(value1, str) and isinstance(value2, list):
try:
array1 = json.loads(value1)
if isinstance(array1, list):
return compare_json_arrays(array1, value2)
except:
pass
return False
except Exception as e:
logger.warning(f"数组比较失败: {e}")
return False
def compare_json_arrays(array1, array2):
"""比较两个JSON数组忽略元素顺序"""
try:
if len(array1) != len(array2):
return False
# 标准化两个数组
normalized_array1 = normalize_json_array(array1.copy())
normalized_array2 = normalize_json_array(array2.copy())
# 将标准化后的数组转换为可比较的格式
comparable1 = json.dumps(normalized_array1, sort_keys=True)
comparable2 = json.dumps(normalized_array2, sort_keys=True)
return comparable1 == comparable2
except Exception as e:
logger.warning(f"JSON数组比较失败: {e}")
return False
def format_json_for_display(value):
"""格式化JSON用于显示"""
if not isinstance(value, str):
return str(value)
try:
# 尝试解析JSON
json_obj = json.loads(value)
# 格式化显示(带缩进)
return json.dumps(json_obj, sort_keys=True, indent=2, ensure_ascii=False)
except (json.JSONDecodeError, TypeError):
# 如果不是JSON返回原值
return str(value)
def is_json_field(value):
"""检查字段是否为JSON格式"""
if not isinstance(value, str):
return False
try:
json.loads(value)
return True
except (json.JSONDecodeError, TypeError):
return False
def compare_values(value1, value2):
"""智能比较两个值支持JSON标准化和数组比较"""
# 首先检查是否为数组类型
if is_json_array_field(value1) or is_json_array_field(value2):
return compare_array_values(value1, value2)
# 如果两个值都是字符串尝试JSON标准化比较
if isinstance(value1, str) and isinstance(value2, str):
normalized_value1 = normalize_json_string(value1)
normalized_value2 = normalize_json_string(value2)
return normalized_value1 == normalized_value2
# 其他情况直接比较
return value1 == value2
# 默认配置(不显示敏感信息)
DEFAULT_CONFIG = {
'pro_config': {
'cluster_name': '',
'hosts': [],
'port': 9042,
'datacenter': '',
'username': '',
'password': '',
'keyspace': '',
'table': ''
},
'test_config': {
'cluster_name': '',
'hosts': [],
'port': 9042,
'datacenter': '',
'username': '',
'password': '',
'keyspace': '',
'table': ''
},
'keys': ['docid'],
'fields_to_compare': [],
'exclude_fields': []
}
def save_config_group(name, description, pro_config, test_config, query_config):
"""保存配置组"""
if not ensure_database():
logger.error("数据库初始化失败")
return False
conn = get_db_connection()
cursor = conn.cursor()
try:
cursor.execute('''
INSERT OR REPLACE INTO config_groups
(name, description, pro_config, test_config, query_config, updated_at)
VALUES (?, ?, ?, ?, ?, ?)
''', (
name, description,
json.dumps(pro_config),
json.dumps(test_config),
json.dumps(query_config),
datetime.now().isoformat()
))
conn.commit()
logger.info(f"配置组 '{name}' 保存成功")
return True
except Exception as e:
logger.error(f"保存配置组失败: {e}")
return False
finally:
conn.close()
def get_config_groups():
"""获取所有配置组"""
if not ensure_database():
logger.error("数据库初始化失败")
return []
conn = get_db_connection()
cursor = conn.cursor()
try:
cursor.execute('''
SELECT id, name, description, created_at, updated_at
FROM config_groups
ORDER BY updated_at DESC
''')
rows = cursor.fetchall()
config_groups = []
for row in rows:
config_groups.append({
'id': row['id'],
'name': row['name'],
'description': row['description'],
'created_at': row['created_at'],
'updated_at': row['updated_at']
})
return config_groups
except Exception as e:
logger.error(f"获取配置组失败: {e}")
return []
finally:
conn.close()
def get_config_group_by_id(group_id):
"""根据ID获取配置组详情"""
if not ensure_database():
logger.error("数据库初始化失败")
return None
conn = get_db_connection()
cursor = conn.cursor()
try:
cursor.execute('''
SELECT * FROM config_groups WHERE id = ?
''', (group_id,))
row = cursor.fetchone()
if row:
return {
'id': row['id'],
'name': row['name'],
'description': row['description'],
'pro_config': json.loads(row['pro_config']),
'test_config': json.loads(row['test_config']),
'query_config': json.loads(row['query_config']),
'created_at': row['created_at'],
'updated_at': row['updated_at']
}
return None
except Exception as e:
logger.error(f"获取配置组详情失败: {e}")
return None
finally:
conn.close()
def delete_config_group(group_id):
"""删除配置组"""
if not ensure_database():
logger.error("数据库初始化失败")
return False
conn = get_db_connection()
cursor = conn.cursor()
try:
cursor.execute('DELETE FROM config_groups WHERE id = ?', (group_id,))
conn.commit()
success = cursor.rowcount > 0
if success:
logger.info(f"配置组ID {group_id} 删除成功")
return success
except Exception as e:
logger.error(f"删除配置组失败: {e}")
return False
finally:
conn.close()
def create_connection(config):
"""创建Cassandra连接"""
try:
auth_provider = PlainTextAuthProvider(username=config['username'], password=config['password'])
cluster = Cluster(config['hosts'], port=config['port'], auth_provider=auth_provider)
session = cluster.connect(config['keyspace'])
return cluster, session
except Exception as e:
return None, None
def execute_query(session, table, keys, fields, values, exclude_fields=None):
"""执行查询"""
try:
# 构建查询条件
quoted_values = [f"'{value}'" for value in values]
query_conditions = f"{keys[0]} IN ({', '.join(quoted_values)})"
# 确定要查询的字段
if fields:
fields_str = ", ".join(fields)
else:
fields_str = "*"
query_sql = f"SELECT {fields_str} FROM {table} WHERE {query_conditions};"
result = session.execute(query_sql)
return list(result) if result else []
except Exception as e:
return []
def compare_results(pro_data, test_data, keys, fields_to_compare, exclude_fields, values):
"""比较查询结果"""
differences = []
field_diff_count = {}
identical_results = [] # 存储相同的结果
for value in values:
# 查找原表和测试表中该ID的相关数据
rows_pro = [row for row in pro_data if getattr(row, keys[0]) == value]
rows_test = [row for row in test_data if getattr(row, keys[0]) == value]
for row_pro in rows_pro:
# 在测试表中查找相同主键的行
row_test = next(
(row for row in rows_test if all(getattr(row, key) == getattr(row_pro, key) for key in keys)),
None
)
if row_test:
# 确定要比较的列
columns = fields_to_compare if fields_to_compare else row_pro._fields
columns = [col for col in columns if col not in exclude_fields]
has_difference = False
row_differences = []
identical_fields = {}
for column in columns:
value_pro = getattr(row_pro, column)
value_test = getattr(row_test, column)
# 使用智能比较函数
if not compare_values(value_pro, value_test):
has_difference = True
# 格式化显示值
formatted_pro_value = format_json_for_display(value_pro)
formatted_test_value = format_json_for_display(value_test)
row_differences.append({
'key': {key: getattr(row_pro, key) for key in keys},
'field': column,
'pro_value': formatted_pro_value,
'test_value': formatted_test_value,
'is_json': is_json_field(value_pro) or is_json_field(value_test),
'is_array': is_json_array_field(value_pro) or is_json_array_field(value_test)
})
# 统计字段差异次数
field_diff_count[column] = field_diff_count.get(column, 0) + 1
else:
# 存储相同的字段值
identical_fields[column] = format_json_for_display(value_pro)
if has_difference:
differences.extend(row_differences)
else:
# 如果没有差异,存储到相同结果中
identical_results.append({
'key': {key: getattr(row_pro, key) for key in keys},
'pro_fields': identical_fields,
'test_fields': {col: format_json_for_display(getattr(row_test, col)) for col in columns}
})
else:
# 在测试表中未找到对应行
differences.append({
'key': {key: getattr(row_pro, key) for key in keys},
'message': '在测试表中未找到该行'
})
# 检查测试表中是否有生产表中不存在的行
for row_test in rows_test:
row_pro = next(
(row for row in rows_pro if all(getattr(row, key) == getattr(row_test, key) for key in keys)),
None
)
if not row_pro:
differences.append({
'key': {key: getattr(row_test, key) for key in keys},
'message': '在生产表中未找到该行'
})
return differences, field_diff_count, identical_results
def generate_comparison_summary(total_keys, pro_count, test_count, differences, identical_results, field_diff_count):
"""生成比较总结报告"""
# 计算基本统计
different_records = len(set([list(diff['key'].values())[0] for diff in differences if 'field' in diff]))
identical_records = len(identical_results)
missing_in_test = len([diff for diff in differences if diff.get('message') == '在测试表中未找到该行'])
missing_in_pro = len([diff for diff in differences if diff.get('message') == '在生产表中未找到该行'])
# 计算百分比
def safe_percentage(part, total):
return round((part / total * 100), 2) if total > 0 else 0
identical_percentage = safe_percentage(identical_records, total_keys)
different_percentage = safe_percentage(different_records, total_keys)
# 生成总结
summary = {
'overview': {
'total_keys_queried': total_keys,
'pro_records_found': pro_count,
'test_records_found': test_count,
'identical_records': identical_records,
'different_records': different_records,
'missing_in_test': missing_in_test,
'missing_in_pro': missing_in_pro
},
'percentages': {
'data_consistency': identical_percentage,
'data_differences': different_percentage,
'missing_rate': safe_percentage(missing_in_test + missing_in_pro, total_keys)
},
'field_analysis': {
'total_fields_compared': len(field_diff_count) if field_diff_count else 0,
'most_different_fields': sorted(field_diff_count.items(), key=lambda x: x[1], reverse=True)[:5] if field_diff_count else []
},
'data_quality': {
'completeness': safe_percentage(pro_count + test_count, total_keys * 2),
'consistency_score': identical_percentage,
'quality_level': get_quality_level(identical_percentage)
},
'recommendations': generate_recommendations(identical_percentage, missing_in_test, missing_in_pro, field_diff_count)
}
return summary
def get_quality_level(consistency_percentage):
"""根据一致性百分比获取数据质量等级"""
if consistency_percentage >= 95:
return {'level': '优秀', 'color': 'success', 'description': '数据一致性非常高'}
elif consistency_percentage >= 90:
return {'level': '良好', 'color': 'info', 'description': '数据一致性较高'}
elif consistency_percentage >= 80:
return {'level': '一般', 'color': 'warning', 'description': '数据一致性中等,需要关注'}
else:
return {'level': '较差', 'color': 'danger', 'description': '数据一致性较低,需要重点处理'}
def generate_recommendations(consistency_percentage, missing_in_test, missing_in_pro, field_diff_count):
"""生成改进建议"""
recommendations = []
if consistency_percentage < 90:
recommendations.append('建议重点关注数据一致性问题,检查数据同步机制')
if missing_in_test > 0:
recommendations.append(f'测试环境缺失 {missing_in_test} 条记录,建议检查数据迁移过程')
if missing_in_pro > 0:
recommendations.append(f'生产环境缺失 {missing_in_pro} 条记录,建议检查数据完整性')
if field_diff_count:
top_diff_field = max(field_diff_count.items(), key=lambda x: x[1])
recommendations.append(f'字段 "{top_diff_field[0]}" 差异最多({top_diff_field[1]}次),建议优先处理')
if not recommendations:
recommendations.append('数据质量良好,建议继续保持当前的数据管理流程')
return recommendations
@app.route('/')
def index():
return render_template('index.html')
@app.route('/db-compare')
def db_compare():
return render_template('db_compare.html')
@app.route('/api/query', methods=['POST'])
def query_compare():
try:
data = request.json
logger.info("开始执行数据库比对查询")
# 解析配置
pro_config = data.get('pro_config', DEFAULT_CONFIG['pro_config'])
test_config = data.get('test_config', DEFAULT_CONFIG['test_config'])
keys = data.get('keys', DEFAULT_CONFIG['keys'])
fields_to_compare = data.get('fields_to_compare', DEFAULT_CONFIG['fields_to_compare'])
exclude_fields = data.get('exclude_fields', DEFAULT_CONFIG['exclude_fields'])
values = data.get('values', [])
if not values:
logger.warning("查询失败未提供查询key值")
return jsonify({'error': '请提供查询key值'}), 400
logger.info(f"查询配置:{len(values)}个key值生产表{pro_config['table']},测试表:{test_config['table']}")
# 创建数据库连接
pro_cluster, pro_session = create_connection(pro_config)
test_cluster, test_session = create_connection(test_config)
if not pro_session or not test_session:
logger.error("数据库连接失败")
return jsonify({'error': '数据库连接失败,请检查配置信息'}), 500
try:
# 执行查询
logger.info("执行生产环境查询")
pro_data = execute_query(pro_session, pro_config['table'], keys, fields_to_compare, values, exclude_fields)
logger.info("执行测试环境查询")
test_data = execute_query(test_session, test_config['table'], keys, fields_to_compare, values, exclude_fields)
logger.info(f"查询结果:生产表 {len(pro_data)} 条记录,测试表 {len(test_data)} 条记录")
# 比较结果
differences, field_diff_count, identical_results = compare_results(pro_data, test_data, keys, fields_to_compare, exclude_fields, values)
# 统计信息
different_ids = set()
for diff in differences:
if 'field' in diff:
different_ids.add(list(diff['key'].values())[0])
non_different_ids = set(values) - different_ids
# 生成比较总结
summary = generate_comparison_summary(
len(values), len(pro_data), len(test_data),
differences, identical_results, field_diff_count
)
result = {
'total_keys': len(values),
'pro_count': len(pro_data),
'test_count': len(test_data),
'differences': differences,
'identical_results': identical_results,
'field_diff_count': field_diff_count,
'different_ids': list(different_ids),
'non_different_ids': list(non_different_ids),
'summary': summary,
'raw_pro_data': [dict(row._asdict()) for row in pro_data] if pro_data else [],
'raw_test_data': [dict(row._asdict()) for row in test_data] if test_data else []
}
logger.info(f"比对完成:发现 {len(differences)} 处差异")
return jsonify(result)
except Exception as e:
logger.error(f"查询执行失败:{str(e)}")
return jsonify({'error': f'查询执行失败:{str(e)}'}), 500
finally:
# 关闭连接
if pro_cluster:
pro_cluster.shutdown()
if test_cluster:
test_cluster.shutdown()
except Exception as e:
logger.error(f"请求处理失败:{str(e)}")
return jsonify({'error': f'请求处理失败:{str(e)}'}), 500
@app.route('/api/default-config')
def get_default_config():
return jsonify(DEFAULT_CONFIG)
# 配置组管理API
@app.route('/api/config-groups', methods=['GET'])
def api_get_config_groups():
"""获取所有配置组"""
config_groups = get_config_groups()
return jsonify({'success': True, 'data': config_groups})
@app.route('/api/config-groups', methods=['POST'])
def api_save_config_group():
"""保存配置组"""
try:
data = request.json
name = data.get('name', '').strip()
description = data.get('description', '').strip()
pro_config = data.get('pro_config', {})
test_config = data.get('test_config', {})
query_config = {
'keys': data.get('keys', []),
'fields_to_compare': data.get('fields_to_compare', []),
'exclude_fields': data.get('exclude_fields', [])
}
if not name:
return jsonify({'success': False, 'error': '配置组名称不能为空'}), 400
success = save_config_group(name, description, pro_config, test_config, query_config)
if success:
return jsonify({'success': True, 'message': '配置组保存成功'})
else:
return jsonify({'success': False, 'error': '配置组保存失败'}), 500
except Exception as e:
logger.error(f"保存配置组API失败: {e}")
return jsonify({'success': False, 'error': str(e)}), 500
@app.route('/api/config-groups/<int:group_id>', methods=['GET'])
def api_get_config_group(group_id):
"""获取指定配置组详情"""
config_group = get_config_group_by_id(group_id)
if config_group:
return jsonify({'success': True, 'data': config_group})
else:
return jsonify({'success': False, 'error': '配置组不存在'}), 404
@app.route('/api/config-groups/<int:group_id>', methods=['DELETE'])
def api_delete_config_group(group_id):
"""删除配置组"""
success = delete_config_group(group_id)
if success:
return jsonify({'success': True, 'message': '配置组删除成功'})
else:
return jsonify({'success': False, 'error': '配置组删除失败'}), 500
@app.route('/api/init-db', methods=['POST'])
def api_init_database():
"""手动初始化数据库(用于测试)"""
success = init_database()
if success:
return jsonify({'success': True, 'message': '数据库初始化成功'})
else:
return jsonify({'success': False, 'error': '数据库初始化失败'}), 500
if __name__ == '__main__':
app.run(debug=True)

276
demo/Query.py Normal file
View File

@@ -0,0 +1,276 @@
from cassandra.cluster import Cluster
from cassandra.auth import PlainTextAuthProvider
from cassandra.cqlengine.columns import Boolean
from cassandra.policies import ColDesc
# CBase Hot
cluster_nodes_pro = ['10.20.2.22'] # Cassandra节点的IP地址
port_pro = 9042 # Cassandra使用的端口
username_pro = 'cbase' # Cassandra用户名
password_pro = 'antducbaseadmin@2022' # Cassandra密码
keyspace_pro = 'yuqing_skinny' # Cassandra keyspace_pro
cluster_nodes_test = ['10.20.2.22'] # Cassandra节点的IP地址
port_test = 9042 # Cassandra使用的端口
username_test = 'cbase' # Cassandra用户名
password_test = 'antducbaseadmin@2022' # Cassandra密码
keyspace_test = 'yuqing_skinny' # Cassandra keyspace_pro
# CBase Cold
# cluster_nodes_pro = ['10.20.1.108'] # Cassandra节点的IP地址
# port_pro = 9042 # Cassandra使用的端口
# username_pro = 'cassandra' # Cassandra用户名
# password_pro = 'cassandra' # Cassandra密码
# keyspace_pro = 'yuqing_skinny' # Cassandra keyspace_pro
# cluster_nodes_test = ['10.20.1.108'] # Cassandra节点的IP地址
# port_test = 9042 # Cassandra使用的端口
# username_test = 'cassandra' # Cassandra用户名
# password_test = 'cassandra' # Cassandra密码
# keyspace_test = 'yuqing_skinny' # Cassandra
# cluster_nodes_pro = ['10.20.1.119'] # Cassandra节点的IP地址
# port_pro = 9044 # Cassandra使用的端口
# username_pro = 'cbase' # Cassandra用户名
# password_pro = 'antducbaseadmin@2022' # Cassandra密码
# keyspace_pro = 'yuqing_skinny' # Cassandra keyspace_pro
data_table_pro = "document"
data_table_test = data_table_pro + "_test"
values = [] # 多个ID值
# data_table_pro = "doc_view_8"
# data_table_test = "doc_view_test"
# 定义主键字段及其对应的多ID值
keys = ["docid"]
# 比较全部字段
fields_to_compare = []
# fields_to_compare = [
# "statusid",
# "taglocation",
# "tagemotion",
# "tagindustry",
# "tagdomain",
# "tagtopic",
# "tagsimilar",
# "tagother",
# "hasprivacy",
# "istaged",
# "createat",
# ] # 指定要比较的字段
exclude_fields = [] # 需要排除的字段
# exclude_fields = ['mcrelated','ocrtexts','']
# 小写转换
fields_to_compare = [field.lower() for field in fields_to_compare]
exclude_fields = [field.lower() for field in exclude_fields]
keys = [field.lower() for field in keys]
# 定义存储字段差异数量的字典
field_diff_count = {}
# 输出文件
output_file = "/Users/yovinchen/project/python/CassandraQueryComparator/QueryCassandra/output.txt"
input_file = "/Users/yovinchen/project/python/CassandraQueryComparator/QueryCassandra/input.txt"
# 清空文件内容
open(output_file, "w").close()
with open(input_file, "r") as file:
values = [item.replace('"', '') for line in file for item in line.strip().split(",") if item]
# 创建身份验证提供程序
auth_provider = PlainTextAuthProvider(username=username_pro, password=password_pro)
# 连接到Cassandra集群
cluster = Cluster(cluster_nodes_pro, port=port_pro, auth_provider=auth_provider)
session = cluster.connect(keyspace_pro) # 连接到指定的keyspace
# 创建身份验证提供程序
auth_provider1 = PlainTextAuthProvider(username=username_test, password=password_test)
# 连接到Cassandra集群
cluster1 = Cluster(cluster_nodes_test, port=port_test, auth_provider=auth_provider1)
session1 = cluster1.connect(keyspace_test) # 连接到指定的keyspace
# 构建IN查询语句
query_conditions = f"""{keys[0]} IN ({', '.join([f"'{value}'" for value in values])})"""
# 如果 fields_to_compare 不为空,使用其中的字段,否则使用 *
fields_str = ", ".join(fields_to_compare) if fields_to_compare else "*"
query_sql1 = f"SELECT {fields_str} FROM {data_table_pro} WHERE {query_conditions};"
query_sql2 = f"SELECT {fields_str} FROM {data_table_test} WHERE {query_conditions};"
# 执行查询
result_doc_data = session.execute(query_sql1)
result_doc_data_test = session1.execute(query_sql2)
# 检查查询结果是否为空,并转换查询结果为列表
list_doc_data = list(result_doc_data) if result_doc_data else []
list_doc_data_test = list(result_doc_data_test) if result_doc_data_test else []
# list_doc_data = list(result_doc_data) if result_doc_data else []
# list_doc_data_test = list(result_doc_data_test) if result_doc_data_test else []
with open(output_file, "a") as f:
f.write(f"查询 {data_table_pro} 内容和 {data_table_test} 内容:\n")
for item1, item2 in zip(list_doc_data, list_doc_data_test):
f.write(f"{item1}\n{item2}\n\n")
# with open(output_file, "a") as f:
# f.write(f"查询 {data_table_pro} 内容:\n{list_doc_data}\n")
# f.write(f"查询 {data_table_test} 内容:\n{list_doc_data_test}\n")
if not list_doc_data:
with open(output_file, "a") as f:
f.write(f"查询 {data_table_pro} 的结果为空。")
print(f"查询 {data_table_pro} 的结果为空。")
if not list_doc_data_test:
with open(output_file, "a") as f:
f.write(f"查询 {data_table_test} 的结果为空。")
print(f"查询 {data_table_test} 的结果为空。")
# 创建一个列表来存储详细比较结果
differences = []
# 进行详细比较
def compare_data(fields_to_compare=None, exclude_fields=None):
exclude_fields = exclude_fields or [] # 如果未指定排除字段,默认为空列表
for value in values:
# 查找原表和测试表中该ID的相关数据
rows_data = [row for row in list_doc_data if getattr(row, keys[0]) == value]
rows_test = [row for row in list_doc_data_test if getattr(row, keys[0]) == value]
for row_data in rows_data:
# 在 doc_data_test 中查找相同主键的行
row_test = next(
(row for row in rows_test if all(getattr(row, key) == getattr(row_data, key) for key in keys)),
None)
if row_test:
# 如果在 doc_data_test 中找到相同的主键,则逐列比较
columns = fields_to_compare if fields_to_compare else row_data._fields
columns = [col for col in columns if col not in exclude_fields] # 过滤排除字段
for column in columns:
value_data = getattr(row_data, column)
value_test = getattr(row_test, column)
if value_data != value_test:
differences.append({
'主键': {key: getattr(row_data, key) for key in keys},
'字段': column,
'生产表': f"\n{value_data}\n",
'测试表': f"\n{value_test}\n"
})
# 统计字段差异次数
if column in field_diff_count:
field_diff_count[column] += 1
else:
field_diff_count[column] = 1
else:
# 如果在 doc_data_test 中未找到相同的行
differences.append({
'主键': {key: getattr(row_data, key) for key in keys},
'消息': f'{data_table_test} 中未找到该行'
})
# 比较 doc_data_test 中的行是否在 doc_data 中存在
for row_test in rows_test:
row_data = next(
(row for row in rows_data if all(getattr(row, key) == getattr(row_test, key) for key in keys)), None)
if not row_data:
differences.append({
'主键': {key: getattr(row_test, key) for key in keys},
'消息': f'{data_table_pro} 中未找到该行'
})
compare_data(fields_to_compare, exclude_fields)
# 使用集合来保存唯一的 topicid
id_set = set()
field_set = set()
grouped_id_dict = {}
# 输出指定字段的差异
with open(output_file, "a") as f:
if differences:
f.write("\n发现指定字段的差异:\n")
for diff in differences:
# 逐行打印每个差异
f.write(f"主键: {diff['主键']}\n")
f.write(f"字段: {diff.get('字段', 'N/A')}\n")
f.write(f"生产表: {diff.get('生产表', 'N/A')}\n")
f.write(f"测试表: {diff.get('测试表', 'N/A')}\n")
f.write("-" * 50) # 分隔符,便于查看
f.write("\n")
# 将差异ID按字段名分组
field = diff.get('字段', '未分组')
if field not in grouped_id_dict:
grouped_id_dict[field] = set()
for key in keys:
id = diff['主键'][key]
id_set.add('"' + id + '",')
grouped_id_dict[field].add('"' + id + '",')
# 输出分组后的差异ID
if grouped_id_dict:
f.write("\n差异ID按字段分组如下:")
for field, ids in grouped_id_dict.items():
field_set.add('"' + field + '",')
f.write(f"字段: {field}\n")
f.write("差异ID:")
for id in ids:
f.write(id)
f.write("\n")
f.write("-" * 50) # 分隔符,便于查看
f.write("\n")
else:
f.write("\n指定字段未发现差异。\n")
f.write("\n")
# 只有在 field_set 不为空时才打印
if field_set:
f.write("\n存在差异的 字段 为:\n")
# 打印所有唯一的 field
for field in field_set:
f.write(field + "\n")
# 只有在 id_set 不为空时才打印
if id_set:
f.write("\n存在差异的 ID 为:\n")
# 打印所有唯一的 topicid
for topicid in id_set:
f.write(topicid + "\n")
f.write("\n")
# 计算存在差异的 ID
different_ids = {id.strip('"').strip(',').strip('"') for id in id_set}
# 计算不存在差异的 ID即在 values 中但不在 different_ids 中)
non_different_ids = set(values) - different_ids
# 只有在 non_different_ids 非空时才打印
if non_different_ids:
f.write("\n不存在差异的 ID 为:\n")
for topicid in non_different_ids:
f.write(f'"{topicid}",\n')
f.write("\n")
f.write("总计key " + len(values).__str__() + "")
# 统计每个字段的差异数量
if field_diff_count:
f.write("\n字段差异统计如下:\n")
for field, count in field_diff_count.items():
f.write(f"字段 '{field}' 发现 {count} 处差异\n")
# 关闭连接
cluster.shutdown()

283
demo/twcsQuery.py Normal file
View File

@@ -0,0 +1,283 @@
from cassandra.cluster import Cluster
from cassandra.auth import PlainTextAuthProvider
from cassandra.cqlengine.columns import Boolean
from cassandra.policies import ColDesc
# # 配置Cassandra集群信息
# doc_view
# cluster_nodes_pro = ['10.20.2.43'] # Cassandra节点的IP地址
# port_pro = 9042 # Cassandra使用的端口
# username_pro = 'cbase' # Cassandra用户名
# password_pro = 'antducbaseadmin@2022' # Cassandra密码
# keyspace_pro = 'yuqing_twcs' # Cassandra keyspace_pro
# CBase Hot
cluster_nodes_test = ['10.20.2.22'] # Cassandra节点的IP地址
port_test = 9042 # Cassandra使用的端口
username_test = 'cbase' # Cassandra用户名
password_test = 'antducbaseadmin@2022' # Cassandra密码
keyspace_test = 'yuqing_skinny' # Cassandra keyspace_pro
# CBase Cold
cluster_nodes_pro = ['10.20.4.152'] # Cassandra节点的IP地址
port_pro = 9044 # Cassandra使用的端口
username_pro = 'cbase' # Cassandra用户名
password_pro = 'antducbaseadmin@2022' # Cassandra密码
keyspace_pro = 'yuqing_skinny' # Cassandra keyspace_pro
# cluster_nodes_test = ['10.20.1.108'] # Cassandra节点的IP地址
# port_test = 9042 # Cassandra使用的端口
# username_test = 'cassandra' # Cassandra用户名
# password_test = 'cassandra' # Cassandra密码
# keyspace_test = 'yuqing_skinny' # Cassandra keyspace_pro
# cluster_nodes_pro = ['10.20.1.119'] # Cassandra节点的IP地址
# port_pro = 9044 # Cassandra使用的端口
# username_pro = 'cbase' # Cassandra用户名
# password_pro = 'antducbaseadmin@2022' # Cassandra密码
# keyspace_pro = 'yuqing_skinny' # Cassandra keyspace_pro
# data_table_pro = "doc_view"
# data_table_test = data_table_pro + "_test"
values = [] # 多个ID值
data_table_pro = "wemedia_1"
data_table_test = "wemedia_test"
# 定义主键字段及其对应的多ID值
keys = ["wmid"]
# 比较全部字段
fields_to_compare = []
# fields_to_compare = [
# "docid",
# "bitset",
# "crawltime",
# "createat",
# "domain",
# "fanslevel",
# "nickname",
# "officialsitetypes",
# "platform",
# "tagemotion",
# "taglocation",
# "tagsimilar",
# "userid",
# "username",
# ] # 指定要比较的字段
exclude_fields = [] # 需要排除的字段
# exclude_fields = ['mcrelated','ocrtexts','']
# 定义存储字段差异数量的字典
field_diff_count = {}
# 输出文件
output_file = "/Users/yovinchen/project/python/CassandraQueryComparator/QueryCassandra/output.txt"
input_file = "/Users/yovinchen/project/python/CassandraQueryComparator/QueryCassandra/input.txt"
# 清空文件内容
open(output_file, "w").close()
# 单表
# with open(input_file, "r") as file:
# values = [line.strip() for line in file if line.strip()] # 去除空行
# twcs
with open(input_file, "r") as file:
values = [item.replace('"', '') for line in file for item in line.strip().split(",") if item]
# 创建身份验证提供程序
auth_provider = PlainTextAuthProvider(username=username_pro, password=password_pro)
# 连接到Cassandra集群
cluster = Cluster(cluster_nodes_pro, port=port_pro, auth_provider=auth_provider)
session = cluster.connect(keyspace_pro) # 连接到指定的keyspace
# 创建身份验证提供程序
auth_provider1 = PlainTextAuthProvider(username=username_test, password=password_test)
# 连接到Cassandra集群
cluster1 = Cluster(cluster_nodes_test, port=port_test, auth_provider=auth_provider1)
session1 = cluster1.connect(keyspace_test) # 连接到指定的keyspace
# 构建IN查询语句
query_conditions = f"""{keys[0]} IN ({', '.join([f"'{value}'" for value in values])})"""
# 如果 fields_to_compare 不为空,使用其中的字段,否则使用 *
fields_str = ", ".join(fields_to_compare) if fields_to_compare else "*"
query_sql1 = f"SELECT {fields_str} FROM {data_table_pro} WHERE {query_conditions};"
query_sql2 = f"SELECT {fields_str} FROM {data_table_test} WHERE {query_conditions};"
# 执行查询
result_doc_data = session.execute(query_sql1)
result_doc_data_test = session1.execute(query_sql2)
# 检查查询结果是否为空,并转换查询结果为列表
list_doc_data = list(result_doc_data) if result_doc_data else []
list_doc_data_test = list(result_doc_data_test) if result_doc_data_test else []
# list_doc_data = list(result_doc_data) if result_doc_data else []
# list_doc_data_test = list(result_doc_data_test) if result_doc_data_test else []
with open(output_file, "a") as f:
f.write(f"查询 {data_table_pro} 内容和 {data_table_test} 内容:\n")
for item1, item2 in zip(list_doc_data, list_doc_data_test):
f.write(f"{item1}\n{item2}\n\n")
# with open(output_file, "a") as f:
# f.write(f"查询 {data_table_pro} 内容:\n{list_doc_data}\n")
# f.write(f"查询 {data_table_test} 内容:\n{list_doc_data_test}\n")
if not list_doc_data:
with open(output_file, "a") as f:
f.write(f"查询 {data_table_pro} 的结果为空。")
print(f"查询 {data_table_pro} 的结果为空。")
if not list_doc_data_test:
with open(output_file, "a") as f:
f.write(f"查询 {data_table_test} 的结果为空。")
print(f"查询 {data_table_test} 的结果为空。")
# 创建一个列表来存储详细比较结果
differences = []
# 进行详细比较
def compare_data(fields_to_compare=None, exclude_fields=None):
exclude_fields = exclude_fields or [] # 如果未指定排除字段,默认为空列表
for value in values:
# 查找原表和测试表中该ID的相关数据
rows_data = [row for row in list_doc_data if getattr(row, keys[0]) == value]
rows_test = [row for row in list_doc_data_test if getattr(row, keys[0]) == value]
for row_data in rows_data:
# 在 doc_data_test 中查找相同主键的行
row_test = next(
(row for row in rows_test if all(getattr(row, key) == getattr(row_data, key) for key in keys)),
None)
if row_test:
# 如果在 doc_data_test 中找到相同的主键,则逐列比较
columns = fields_to_compare if fields_to_compare else row_data._fields
columns = [col for col in columns if col not in exclude_fields] # 过滤排除字段
for column in columns:
value_data = getattr(row_data, column)
value_test = getattr(row_test, column)
if value_data != value_test:
differences.append({
'主键': {key: getattr(row_data, key) for key in keys},
'字段': column,
'生产表': f"\n{value_data}\n",
'测试表': f"\n{value_test}\n"
})
# 统计字段差异次数
if column in field_diff_count:
field_diff_count[column] += 1
else:
field_diff_count[column] = 1
else:
# 如果在 doc_data_test 中未找到相同的行
differences.append({
'主键': {key: getattr(row_data, key) for key in keys},
'消息': f'{data_table_test} 中未找到该行'
})
# 比较 doc_data_test 中的行是否在 doc_data 中存在
for row_test in rows_test:
row_data = next(
(row for row in rows_data if all(getattr(row, key) == getattr(row_test, key) for key in keys)), None)
if not row_data:
differences.append({
'主键': {key: getattr(row_test, key) for key in keys},
'消息': f'{data_table_pro} 中未找到该行'
})
compare_data(fields_to_compare, exclude_fields)
# 使用集合来保存唯一的 topicid
id_set = set()
field_set = set()
grouped_id_dict = {}
# 输出指定字段的差异
with open(output_file, "a") as f:
if differences:
f.write("\n发现指定字段的差异:\n")
for diff in differences:
# 逐行打印每个差异
f.write(f"主键: {diff['主键']}\n")
f.write(f"字段: {diff.get('字段', 'N/A')}\n")
f.write(f"生产表: {diff.get('生产表', 'N/A')}\n")
f.write(f"测试表: {diff.get('测试表', 'N/A')}\n")
f.write("-" * 50) # 分隔符,便于查看
f.write("\n")
# 将差异ID按字段名分组
field = diff.get('字段', '未分组')
if field not in grouped_id_dict:
grouped_id_dict[field] = set()
for key in keys:
id = diff['主键'][key]
id_set.add('"' + id + '",')
grouped_id_dict[field].add('"' + id + '",')
# 输出分组后的差异ID
if grouped_id_dict:
f.write("\n差异ID按字段分组如下:")
for field, ids in grouped_id_dict.items():
field_set.add('"' + field + '",')
f.write(f"字段: {field}\n")
f.write("差异ID:")
for id in ids:
f.write(id)
f.write("\n")
f.write("-" * 50) # 分隔符,便于查看
f.write("\n")
else:
f.write("\n指定字段未发现差异。\n")
f.write("\n")
# 只有在 field_set 不为空时才打印
if field_set:
f.write("\n存在差异的 字段 为:\n")
# 打印所有唯一的 field
for field in field_set:
f.write(field + "\n")
# 只有在 id_set 不为空时才打印
if id_set:
f.write("\n存在差异的 ID 为:\n")
# 打印所有唯一的 topicid
for topicid in id_set:
f.write(topicid + "\n")
f.write("\n")
# 计算存在差异的 ID
different_ids = {id.strip('"').strip(',').strip('"') for id in id_set}
# 计算不存在差异的 ID即在 values 中但不在 different_ids 中)
non_different_ids = set(values) - different_ids
# 只有在 non_different_ids 非空时才打印
if non_different_ids:
f.write("\n不存在差异的 ID 为:\n")
for topicid in non_different_ids:
f.write(f'"{topicid}",\n')
f.write("\n")
f.write("总计key " + len(values).__str__() + "")
# 统计每个字段的差异数量
if field_diff_count:
f.write("\n字段差异统计如下:\n")
for field, count in field_diff_count.items():
f.write(f"字段 '{field}' 发现 {count} 处差异\n")
# 关闭连接
cluster.shutdown()

2
requirements.txt Normal file
View File

@@ -0,0 +1,2 @@
Flask==2.3.3
cassandra-driver==3.29.1

1408
static/js/app.js Normal file

File diff suppressed because it is too large Load Diff

399
templates/db_compare.html Normal file
View File

@@ -0,0 +1,399 @@
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>数据库查询比对工具</title>
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css" rel="stylesheet">
<link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css" rel="stylesheet">
<style>
.config-section {
background-color: #f8f9fa;
border-radius: 8px;
padding: 20px;
margin-bottom: 20px;
}
.result-section {
margin-top: 30px;
}
.difference-item {
border-left: 4px solid #dc3545;
padding-left: 15px;
margin-bottom: 15px;
background-color: #fff5f5;
padding: 15px;
border-radius: 5px;
}
.stat-card {
text-align: center;
padding: 20px;
border-radius: 10px;
margin-bottom: 20px;
}
.loading {
display: none;
}
.query-keys {
min-height: 120px;
}
.compare-fields {
min-height: 80px;
}
.exclude-fields {
min-height: 80px;
}
.json-field {
font-family: 'Courier New', monospace;
font-size: 0.9em;
background-color: #f8f9fa !important;
}
.badge {
font-size: 0.7em;
}
.field-container {
border: 1px solid #e9ecef;
border-radius: 5px;
padding: 10px;
margin-bottom: 10px;
}
.field-value {
font-size: 0.85em;
max-height: 200px;
overflow-y: auto;
margin: 0;
}
.field-header {
font-weight: 600;
color: #495057;
}
.pagination {
--bs-pagination-padding-x: 0.5rem;
--bs-pagination-padding-y: 0.25rem;
--bs-pagination-font-size: 0.875rem;
}
.copy-btn {
position: absolute;
top: 5px;
right: 5px;
padding: 2px 6px;
font-size: 0.75rem;
border-radius: 3px;
}
.field-container {
position: relative;
}
/* 确保提示消息在最顶层 */
.alert {
position: fixed !important;
top: 20px !important;
left: 50% !important;
transform: translateX(-50%) !important;
z-index: 9999 !important;
min-width: 300px !important;
max-width: 600px !important;
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.3) !important;
}
</style>
</head>
<body>
<!-- 导航栏 -->
<nav class="navbar navbar-expand-lg navbar-dark bg-primary">
<div class="container">
<a class="navbar-brand" href="/">
<i class="fas fa-tools"></i> 大数据工具集合
</a>
<button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarNav">
<span class="navbar-toggler-icon"></span>
</button>
<div class="collapse navbar-collapse" id="navbarNav">
<ul class="navbar-nav ms-auto">
<li class="nav-item">
<a class="nav-link" href="/">首页</a>
</li>
<li class="nav-item">
<a class="nav-link active" href="/db-compare">数据库比对</a>
</li>
</ul>
</div>
</div>
</nav>
<div class="container-fluid py-4">
<div class="row">
<div class="col-12">
<h1 class="text-center mb-4">
<i class="fas fa-database"></i> 数据库查询比对工具
</h1>
</div>
</div>
<div class="row">
<!-- 配置面板 -->
<div class="col-lg-4">
<div class="config-section">
<h4><i class="fas fa-cogs"></i> 配置管理</h4>
<!-- 配置组管理 -->
<div class="card mb-3">
<div class="card-header">
<h6><i class="fas fa-layer-group"></i> 配置组管理</h6>
</div>
<div class="card-body">
<div class="row mb-3">
<div class="col-8">
<select class="form-select form-select-sm" id="configGroupSelect">
<option value="">选择配置组...</option>
</select>
</div>
<div class="col-4">
<button class="btn btn-primary btn-sm w-100" onclick="loadSelectedConfigGroup()">
<i class="fas fa-download"></i> 加载
</button>
</div>
</div>
<div class="row">
<div class="col-6">
<button class="btn btn-success btn-sm w-100" onclick="showSaveConfigDialog()">
<i class="fas fa-save"></i> 保存配置组
</button>
</div>
<div class="col-6">
<button class="btn btn-info btn-sm w-100" onclick="showManageConfigDialog()">
<i class="fas fa-cog"></i> 管理配置组
</button>
</div>
</div>
</div>
</div>
<div class="mb-3">
<button class="btn btn-secondary btn-sm" onclick="loadDefaultConfig()">
<i class="fas fa-refresh"></i> 重置为空配置
</button>
<button class="btn btn-success btn-sm" onclick="exportConfig()">
<i class="fas fa-file-export"></i> 导出配置
</button>
</div>
<!-- 生产环境配置 -->
<div class="card mb-3">
<div class="card-header d-flex justify-content-between align-items-center">
<h6><i class="fas fa-server"></i> 生产环境配置</h6>
<button class="btn btn-sm btn-outline-primary" onclick="showImportDialog('pro')">
<i class="fas fa-download"></i> 一键导入
</button>
</div>
<div class="card-body">
<div class="row">
<div class="col-6">
<label class="form-label">集群名称</label>
<input type="text" class="form-control form-control-sm" id="pro_cluster_name" placeholder="Production Cluster">
</div>
<div class="col-6">
<label class="form-label">数据中心</label>
<input type="text" class="form-control form-control-sm" id="pro_datacenter" placeholder="dc1">
</div>
</div>
<div class="row mt-2">
<div class="col-8">
<label class="form-label">集群节点 (逗号分隔)</label>
<input type="text" class="form-control form-control-sm" id="pro_hosts" placeholder="10.20.2.22,10.20.2.23">
</div>
<div class="col-4">
<label class="form-label">端口</label>
<input type="number" class="form-control form-control-sm" id="pro_port" placeholder="9042">
</div>
</div>
<div class="row mt-2">
<div class="col-6">
<label class="form-label">用户名</label>
<input type="text" class="form-control form-control-sm" id="pro_username" placeholder="cbase">
</div>
<div class="col-6">
<label class="form-label">密码</label>
<input type="password" class="form-control form-control-sm" id="pro_password">
</div>
</div>
<div class="row mt-2">
<div class="col-6">
<label class="form-label">Keyspace</label>
<input type="text" class="form-control form-control-sm" id="pro_keyspace" placeholder="yuqing_skinny">
</div>
<div class="col-6">
<label class="form-label">表名</label>
<input type="text" class="form-control form-control-sm" id="pro_table" placeholder="document">
</div>
</div>
</div>
</div>
<!-- 测试环境配置 -->
<div class="card mb-3">
<div class="card-header d-flex justify-content-between align-items-center">
<h6><i class="fas fa-flask"></i> 测试环境配置</h6>
<button class="btn btn-sm btn-outline-primary" onclick="showImportDialog('test')">
<i class="fas fa-download"></i> 一键导入
</button>
</div>
<div class="card-body">
<div class="row">
<div class="col-6">
<label class="form-label">集群名称</label>
<input type="text" class="form-control form-control-sm" id="test_cluster_name" placeholder="Test Cluster">
</div>
<div class="col-6">
<label class="form-label">数据中心</label>
<input type="text" class="form-control form-control-sm" id="test_datacenter" placeholder="dc1">
</div>
</div>
<div class="row mt-2">
<div class="col-8">
<label class="form-label">集群节点 (逗号分隔)</label>
<input type="text" class="form-control form-control-sm" id="test_hosts" placeholder="10.20.2.22,10.20.2.23">
</div>
<div class="col-4">
<label class="form-label">端口</label>
<input type="number" class="form-control form-control-sm" id="test_port" placeholder="9042">
</div>
</div>
<div class="row mt-2">
<div class="col-6">
<label class="form-label">用户名</label>
<input type="text" class="form-control form-control-sm" id="test_username" placeholder="cbase">
</div>
<div class="col-6">
<label class="form-label">密码</label>
<input type="password" class="form-control form-control-sm" id="test_password">
</div>
</div>
<div class="row mt-2">
<div class="col-6">
<label class="form-label">Keyspace</label>
<input type="text" class="form-control form-control-sm" id="test_keyspace" placeholder="yuqing_skinny">
</div>
<div class="col-6">
<label class="form-label">表名</label>
<input type="text" class="form-control form-control-sm" id="test_table" placeholder="document_test">
</div>
</div>
</div>
</div>
<!-- 查询配置 -->
<div class="card">
<div class="card-header">
<h6><i class="fas fa-search"></i> 查询配置</h6>
</div>
<div class="card-body">
<div class="mb-3">
<label class="form-label">主键字段 (逗号分隔)</label>
<input type="text" class="form-control form-control-sm" id="keys" placeholder="docid" value="docid">
</div>
<div class="mb-3">
<label class="form-label">比较字段 (空则比较全部,逗号分隔)</label>
<textarea class="form-control form-control-sm compare-fields" id="fields_to_compare" placeholder="留空表示比较所有字段&#10;或输入: field1,field2,field3"></textarea>
</div>
<div>
<label class="form-label">排除字段 (逗号分隔)</label>
<textarea class="form-control form-control-sm exclude-fields" id="exclude_fields" placeholder="排除不需要比较的字段&#10;如: field1,field2"></textarea>
</div>
</div>
</div>
</div>
</div>
<!-- 查询面板 -->
<div class="col-lg-8">
<div class="config-section">
<h4><i class="fas fa-key"></i> 查询Key管理</h4>
<div class="mb-3">
<label class="form-label">批量Key输入 (一行一个)</label>
<textarea class="form-control query-keys" id="query_values" placeholder="请输入查询的Key值一行一个&#10;例如:&#10;key1&#10;key2&#10;key3"></textarea>
</div>
<div class="mb-3">
<button class="btn btn-primary" onclick="executeQuery()">
<i class="fas fa-play"></i> 执行查询比对
</button>
<button class="btn btn-secondary" onclick="clearResults()">
<i class="fas fa-trash"></i> 清空结果
</button>
</div>
<!-- 加载动画 -->
<div class="loading text-center" id="loading">
<div class="spinner-border text-primary" role="status">
<span class="visually-hidden">查询中...</span>
</div>
<p class="mt-2">正在执行查询比对...</p>
</div>
</div>
<!-- 结果面板 -->
<div class="result-section" id="results" style="display: none;">
<!-- 统计信息 -->
<div class="row" id="stats">
<!-- 统计卡片将在这里动态生成 -->
</div>
<!-- 结果选项卡导航 -->
<div class="card mt-4">
<div class="card-header">
<ul class="nav nav-tabs card-header-tabs" id="resultTabs" role="tablist">
<li class="nav-item" role="presentation">
<button class="nav-link active" id="differences-tab" data-bs-toggle="tab" data-bs-target="#differences-panel" type="button" role="tab">
<i class="fas fa-exclamation-triangle"></i> 差异详情 <span class="badge bg-danger ms-1" id="diff-count">0</span>
</button>
</li>
<li class="nav-item" role="presentation">
<button class="nav-link" id="identical-tab" data-bs-toggle="tab" data-bs-target="#identical-panel" type="button" role="tab">
<i class="fas fa-check-circle"></i> 相同结果 <span class="badge bg-success ms-1" id="identical-count">0</span>
</button>
</li>
<li class="nav-item" role="presentation">
<button class="nav-link" id="summary-tab" data-bs-toggle="tab" data-bs-target="#summary-panel" type="button" role="tab">
<i class="fas fa-chart-pie"></i> 比较总结
</button>
</li>
</ul>
<div class="mt-2">
<button class="btn btn-sm btn-outline-primary" onclick="exportResults()">
<i class="fas fa-download"></i> 导出结果
</button>
</div>
</div>
<div class="card-body">
<div class="tab-content" id="resultTabContent">
<!-- 差异详情面板 -->
<div class="tab-pane fade show active" id="differences-panel" role="tabpanel">
<div id="differences">
<!-- 差异内容将在这里动态生成 -->
</div>
</div>
<!-- 相同结果面板 -->
<div class="tab-pane fade" id="identical-panel" role="tabpanel">
<div id="identical-results">
<!-- 相同结果将在这里动态生成 -->
</div>
</div>
<!-- 比较总结面板 -->
<div class="tab-pane fade" id="summary-panel" role="tabpanel">
<div id="comparison-summary">
<!-- 总结报告将在这里动态生成 -->
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/js/bootstrap.bundle.min.js"></script>
<script src="{{ url_for('static', filename='js/app.js') }}"></script>
</body>
</html>

374
templates/index.html Normal file
View File

@@ -0,0 +1,374 @@
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>大数据工具集合</title>
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css" rel="stylesheet">
<link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css" rel="stylesheet">
<style>
body {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
min-height: 100vh;
font-family: 'Microsoft YaHei', Arial, sans-serif;
}
.hero-section {
padding: 80px 0;
text-align: center;
color: white;
}
.hero-title {
font-size: 3.5rem;
font-weight: bold;
margin-bottom: 1rem;
text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
}
.hero-subtitle {
font-size: 1.3rem;
margin-bottom: 3rem;
opacity: 0.9;
}
.tools-section {
padding: 60px 0;
background: rgba(255, 255, 255, 0.95);
backdrop-filter: blur(10px);
}
.tool-card {
background: white;
border-radius: 15px;
padding: 2rem;
margin-bottom: 2rem;
box-shadow: 0 10px 30px rgba(0, 0, 0, 0.1);
transition: all 0.3s ease;
border: none;
height: 100%;
}
.tool-card:hover {
transform: translateY(-10px);
box-shadow: 0 20px 40px rgba(0, 0, 0, 0.15);
}
.tool-icon {
font-size: 3rem;
margin-bottom: 1.5rem;
color: #667eea;
}
.tool-title {
font-size: 1.5rem;
font-weight: bold;
margin-bottom: 1rem;
color: #333;
}
.tool-description {
color: #666;
margin-bottom: 2rem;
line-height: 1.6;
}
.tool-features {
text-align: left;
margin-bottom: 2rem;
}
.tool-features li {
margin-bottom: 0.5rem;
color: #555;
}
.tool-btn {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
border: none;
border-radius: 25px;
padding: 0.75rem 2rem;
color: white;
font-weight: bold;
text-decoration: none;
transition: all 0.3s ease;
display: inline-block;
}
.tool-btn:hover {
transform: scale(1.05);
color: white;
text-decoration: none;
}
.stats-section {
padding: 40px 0;
background: rgba(255, 255, 255, 0.1);
color: white;
}
.stat-item {
text-align: center;
margin-bottom: 2rem;
}
.stat-number {
font-size: 3rem;
font-weight: bold;
display: block;
}
.stat-label {
font-size: 1.1rem;
opacity: 0.9;
}
.footer {
background: rgba(0, 0, 0, 0.8);
color: white;
text-align: center;
padding: 2rem 0;
}
.feature-badge {
background: #28a745;
color: white;
padding: 0.25rem 0.75rem;
border-radius: 15px;
font-size: 0.8rem;
font-weight: bold;
display: inline-block;
margin-bottom: 1rem;
}
.coming-soon {
background: #ffc107;
color: #333;
}
</style>
</head>
<body>
<!-- 导航栏 -->
<nav class="navbar navbar-expand-lg navbar-dark" style="background: rgba(0,0,0,0.1);">
<div class="container">
<a class="navbar-brand" href="/">
<i class="fas fa-tools"></i> 大数据工具集合
</a>
<button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarNav">
<span class="navbar-toggler-icon"></span>
</button>
<div class="collapse navbar-collapse" id="navbarNav">
<ul class="navbar-nav ms-auto">
<li class="nav-item">
<a class="nav-link active" href="/">首页</a>
</li>
<li class="nav-item">
<a class="nav-link" href="/db-compare">数据库比对</a>
</li>
</ul>
</div>
</div>
</nav>
<!-- 主标题区域 -->
<div class="hero-section">
<div class="container">
<h1 class="hero-title">
<i class="fas fa-database"></i> 大数据工具集合
</h1>
<p class="hero-subtitle">
专业的数据处理、分析和比对工具平台<br>
提升数据工作效率,简化复杂操作
</p>
</div>
</div>
<!-- 统计信息 -->
<div class="stats-section">
<div class="container">
<div class="row">
<div class="col-md-4">
<div class="stat-item">
<span class="stat-number">1</span>
<span class="stat-label">可用工具</span>
</div>
</div>
<div class="col-md-4">
<div class="stat-item">
<span class="stat-number">100%</span>
<span class="stat-label">可视化操作</span>
</div>
</div>
<div class="col-md-4">
<div class="stat-item">
<span class="stat-number">0</span>
<span class="stat-label">学习成本</span>
</div>
</div>
</div>
</div>
</div>
<!-- 工具展示区域 -->
<div class="tools-section">
<div class="container">
<div class="row justify-content-center">
<div class="col-lg-8">
<div class="text-center mb-5">
<h2 class="mb-3">可用工具</h2>
<p class="text-muted">选择合适的工具来处理您的数据任务</p>
</div>
</div>
</div>
<div class="row">
<!-- 数据库比对工具 -->
<div class="col-lg-6 col-md-12">
<div class="tool-card">
<div class="text-center">
<div class="feature-badge">可用</div>
<div class="tool-icon">
<i class="fas fa-exchange-alt"></i>
</div>
<h3 class="tool-title">数据库查询比对工具</h3>
<p class="tool-description">
专业的Cassandra数据库比对工具支持生产环境与测试环境数据差异分析
提供批量查询、字段级比对和详细统计报告。
</p>
</div>
<div class="tool-features">
<h5><i class="fas fa-star text-warning"></i> 核心功能:</h5>
<ul>
<li><i class="fas fa-check text-success"></i> 支持多环境数据库配置管理</li>
<li><i class="fas fa-check text-success"></i> 批量Key查询和数据比对</li>
<li><i class="fas fa-check text-success"></i> 自定义比较字段和排除字段</li>
<li><i class="fas fa-check text-success"></i> 可视化差异展示和统计</li>
<li><i class="fas fa-check text-success"></i> 配置和结果导出功能</li>
</ul>
</div>
<div class="text-center">
<a href="/db-compare" class="tool-btn">
<i class="fas fa-rocket"></i> 立即使用
</a>
</div>
</div>
</div>
<!-- 占位工具卡片 -->
<div class="col-lg-6 col-md-12">
<div class="tool-card">
<div class="text-center">
<div class="feature-badge coming-soon">即将推出</div>
<div class="tool-icon">
<i class="fas fa-chart-line"></i>
</div>
<h3 class="tool-title">数据分析工具</h3>
<p class="tool-description">
强大的数据分析和可视化工具,支持多种数据源,
提供丰富的图表类型和统计分析功能。
</p>
</div>
<div class="tool-features">
<h5><i class="fas fa-star text-warning"></i> 计划功能:</h5>
<ul>
<li><i class="fas fa-clock text-muted"></i> 多数据源连接支持</li>
<li><i class="fas fa-clock text-muted"></i> 交互式图表生成</li>
<li><i class="fas fa-clock text-muted"></i> 自定义报表制作</li>
<li><i class="fas fa-clock text-muted"></i> 数据趋势分析</li>
<li><i class="fas fa-clock text-muted"></i> 自动化报告生成</li>
</ul>
</div>
<div class="text-center">
<button class="tool-btn" disabled style="opacity: 0.6;">
<i class="fas fa-hourglass-half"></i> 开发中
</button>
</div>
</div>
</div>
</div>
<!-- 第二行工具 -->
<div class="row mt-4">
<div class="col-lg-6 col-md-12">
<div class="tool-card">
<div class="text-center">
<div class="feature-badge coming-soon">即将推出</div>
<div class="tool-icon">
<i class="fas fa-file-import"></i>
</div>
<h3 class="tool-title">数据导入导出工具</h3>
<p class="tool-description">
高效的数据迁移工具,支持多种格式和数据库类型之间的数据传输,
提供批量处理和进度监控功能。
</p>
</div>
<div class="tool-features">
<h5><i class="fas fa-star text-warning"></i> 计划功能:</h5>
<ul>
<li><i class="fas fa-clock text-muted"></i> 多格式数据支持</li>
<li><i class="fas fa-clock text-muted"></i> 批量数据处理</li>
<li><i class="fas fa-clock text-muted"></i> 实时进度监控</li>
<li><i class="fas fa-clock text-muted"></i> 数据映射配置</li>
<li><i class="fas fa-clock text-muted"></i> 错误处理和日志</li>
</ul>
</div>
<div class="text-center">
<button class="tool-btn" disabled style="opacity: 0.6;">
<i class="fas fa-hourglass-half"></i> 开发中
</button>
</div>
</div>
</div>
<div class="col-lg-6 col-md-12">
<div class="tool-card">
<div class="text-center">
<div class="feature-badge coming-soon">即将推出</div>
<div class="tool-icon">
<i class="fas fa-shield-alt"></i>
</div>
<h3 class="tool-title">数据质量检测工具</h3>
<p class="tool-description">
专业的数据质量评估工具,自动检测数据完整性、一致性和准确性问题,
生成详细的质量报告和改进建议。
</p>
</div>
<div class="tool-features">
<h5><i class="fas fa-star text-warning"></i> 计划功能:</h5>
<ul>
<li><i class="fas fa-clock text-muted"></i> 数据完整性检查</li>
<li><i class="fas fa-clock text-muted"></i> 重复数据检测</li>
<li><i class="fas fa-clock text-muted"></i> 数据格式验证</li>
<li><i class="fas fa-clock text-muted"></i> 质量评分系统</li>
<li><i class="fas fa-clock text-muted"></i> 自动化修复建议</li>
</ul>
</div>
<div class="text-center">
<button class="tool-btn" disabled style="opacity: 0.6;">
<i class="fas fa-hourglass-half"></i> 开发中
</button>
</div>
</div>
</div>
</div>
</div>
</div>
<!-- 页脚 -->
<div class="footer">
<div class="container">
<p>&copy; 2024 大数据工具集合. 专注于提供高效的数据处理解决方案.</p>
</div>
</div>
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/js/bootstrap.bundle.min.js"></script>
</body>
</html>