Files
BigDataTool/modules/redis_query.py
2025-08-04 09:14:27 +08:00

355 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Redis查询和数据比较模块
负责Redis数据的查询、随机key获取和数据比较
"""
import time
import logging
import random
from redis.cluster import key_slot
from redis.exceptions import RedisError
from .redis_client import RedisPerformanceTracker
logger = logging.getLogger(__name__)
def get_random_keys_from_redis(redis_client, count=100, pattern="*", performance_tracker=None):
"""
从Redis集群中获取随机keys
Args:
redis_client: Redis客户端
count: 要获取的key数量
pattern: key匹配模式默认为 "*"
performance_tracker: 性能追踪器
Returns:
list: 随机key列表
"""
start_time = time.time()
keys = set()
logger.info(f"开始扫描获取随机keys目标数量: {count},模式: {pattern}")
try:
# 使用scan_iter获取keys
scan_count = max(count * 2, 1000) # 扫描更多key以确保随机性
for key in redis_client.scan_iter(match=pattern, count=scan_count):
keys.add(key)
if len(keys) >= count * 3: # 获取更多key以便随机选择
break
# 如果获取的key数量超过需要的数量随机选择
if len(keys) > count:
keys = random.sample(list(keys), count)
else:
keys = list(keys)
end_time = time.time()
scan_duration = end_time - start_time
if performance_tracker:
performance_tracker.record_scan_time(scan_duration)
logger.info(f"扫描获取 {len(keys)} 个随机keys耗时 {scan_duration:.3f}")
return keys
except RedisError as e:
end_time = time.time()
scan_duration = end_time - start_time
if performance_tracker:
performance_tracker.record_scan_time(scan_duration)
logger.error(f"获取随机keys失败: {e},耗时 {scan_duration:.3f}")
return []
def get_redis_values_by_keys(redis_client, keys, cluster_name="Redis集群", performance_tracker=None):
"""
批量查询Redis中指定keys的值自动适配单节点和集群模式
Args:
redis_client: Redis客户端
keys: 要查询的key列表
cluster_name: 集群名称用于日志
performance_tracker: 性能追踪器
Returns:
list: 对应keys的值列表如果key不存在则为None
"""
start_time = time.time()
result = [None] * len(keys)
logger.info(f"开始从{cluster_name}批量查询 {len(keys)} 个keys")
try:
# 检查是否是集群模式
is_cluster = hasattr(redis_client, 'cluster_nodes')
if is_cluster:
# 集群模式按slot分组keys以优化查询性能
slot_groups = {}
for idx, key in enumerate(keys):
slot = key_slot(key)
slot_groups.setdefault(slot, []).append((idx, key))
logger.info(f"集群模式keys分布在 {len(slot_groups)} 个slot中")
# 分组批量查询
for group in slot_groups.values():
indices, slot_keys = zip(*group)
values = redis_client.mget(slot_keys)
for i, v in zip(indices, values):
result[i] = v
else:
# 单节点模式:直接批量查询
logger.info(f"单节点模式:直接批量查询")
result = redis_client.mget(keys)
end_time = time.time()
query_duration = end_time - start_time
if performance_tracker:
performance_tracker.record_query(f"{cluster_name}_batch_query", query_duration)
# 统计成功获取的key数量
successful_count = sum(1 for v in result if v is not None)
logger.info(f"{cluster_name}查询完成,成功获取 {successful_count}/{len(keys)} 个值,耗时 {query_duration:.3f}")
return result
except Exception as e:
end_time = time.time()
query_duration = end_time - start_time
if performance_tracker:
performance_tracker.record_query(f"{cluster_name}_batch_query_error", query_duration)
logger.error(f"{cluster_name}批量查询失败: {e},耗时 {query_duration:.3f}")
return result
def compare_redis_data(client1, client2, keys, cluster1_name="生产集群", cluster2_name="测试集群", performance_tracker=None):
"""
比较两个Redis集群中指定keys的数据
Args:
client1: 第一个Redis客户端生产
client2: 第二个Redis客户端测试
keys: 要比较的key列表
cluster1_name: 第一个集群名称
cluster2_name: 第二个集群名称
performance_tracker: 性能追踪器
Returns:
dict: 比较结果,包含统计信息和差异详情
"""
comparison_start_time = time.time()
logger.info(f"开始比较 {cluster1_name}{cluster2_name} 的数据")
# 获取两个集群的数据
values1 = get_redis_values_by_keys(client1, keys, cluster1_name, performance_tracker)
if values1 is None:
return {'error': f'{cluster1_name}获取数据失败'}
values2 = get_redis_values_by_keys(client2, keys, cluster2_name, performance_tracker)
if values2 is None:
return {'error': f'{cluster2_name}获取数据失败'}
# 开始数据比对
compare_start = time.time()
# 初始化统计数据
stats = {
'total_keys': len(keys),
'identical_count': 0,
'different_count': 0,
'missing_in_cluster1': 0,
'missing_in_cluster2': 0,
'both_missing': 0
}
# 详细结果列表
identical_results = []
different_results = []
missing_results = []
# 逐个比较
for i, key in enumerate(keys):
val1 = values1[i]
val2 = values2[i]
# 将bytes转换为字符串用于显示如果是bytes类型
display_val1 = val1.decode('utf-8') if isinstance(val1, bytes) else val1
display_val2 = val2.decode('utf-8') if isinstance(val2, bytes) else val2
if val1 is None and val2 is None:
# 两个集群都没有这个key
stats['both_missing'] += 1
missing_results.append({
'key': key.decode('utf-8') if isinstance(key, bytes) else key,
'status': 'both_missing',
'message': '两个集群都不存在该key'
})
elif val1 is None:
# 只有第一个集群没有
stats['missing_in_cluster1'] += 1
missing_results.append({
'key': key.decode('utf-8') if isinstance(key, bytes) else key,
'status': 'missing_in_cluster1',
'cluster1_value': None,
'cluster2_value': display_val2,
'message': f'{cluster1_name}中不存在'
})
elif val2 is None:
# 只有第二个集群没有
stats['missing_in_cluster2'] += 1
missing_results.append({
'key': key.decode('utf-8') if isinstance(key, bytes) else key,
'status': 'missing_in_cluster2',
'cluster1_value': display_val1,
'cluster2_value': None,
'message': f'{cluster2_name}中不存在'
})
elif val1 == val2:
# 值相同
stats['identical_count'] += 1
identical_results.append({
'key': key.decode('utf-8') if isinstance(key, bytes) else key,
'value': display_val1
})
else:
# 值不同
stats['different_count'] += 1
different_results.append({
'key': key.decode('utf-8') if isinstance(key, bytes) else key,
'cluster1_value': display_val1,
'cluster2_value': display_val2,
'message': '值不同'
})
compare_end = time.time()
comparison_duration = compare_end - compare_start
total_duration = compare_end - comparison_start_time
if performance_tracker:
performance_tracker.record_comparison_time(comparison_duration)
# 计算百分比
def safe_percentage(part, total):
return round((part / total * 100), 2) if total > 0 else 0
stats['identical_percentage'] = safe_percentage(stats['identical_count'], stats['total_keys'])
stats['different_percentage'] = safe_percentage(stats['different_count'], stats['total_keys'])
stats['missing_percentage'] = safe_percentage(
stats['missing_in_cluster1'] + stats['missing_in_cluster2'] + stats['both_missing'],
stats['total_keys']
)
result = {
'success': True,
'stats': stats,
'identical_results': identical_results,
'different_results': different_results,
'missing_results': missing_results,
'performance': {
'comparison_time': comparison_duration,
'total_time': total_duration
},
'clusters': {
'cluster1_name': cluster1_name,
'cluster2_name': cluster2_name
}
}
logger.info(f"数据比对完成,耗时 {comparison_duration:.3f}")
logger.info(f"比对统计: 总计{stats['total_keys']}个key相同{stats['identical_count']}个,不同{stats['different_count']}个,缺失{stats['missing_in_cluster1'] + stats['missing_in_cluster2'] + stats['both_missing']}")
return result
def execute_redis_comparison(config1, config2, query_options):
"""
执行Redis数据比较的主要函数
Args:
config1: 第一个Redis集群配置
config2: 第二个Redis集群配置
query_options: 查询选项,包含查询模式和参数
Returns:
dict: 完整的比较结果
"""
from .redis_client import create_redis_client
# 创建性能追踪器
performance_tracker = RedisPerformanceTracker()
cluster1_name = config1.get('name', '生产集群')
cluster2_name = config2.get('name', '测试集群')
logger.info(f"开始执行Redis数据比较: {cluster1_name} vs {cluster2_name}")
# 创建连接
client1 = create_redis_client(config1, cluster1_name, performance_tracker)
client2 = create_redis_client(config2, cluster2_name, performance_tracker)
if not client1:
return {'error': f'{cluster1_name}连接失败'}
if not client2:
return {'error': f'{cluster2_name}连接失败'}
try:
# 获取要比较的keys
keys = []
query_mode = query_options.get('mode', 'random')
if query_mode == 'random':
# 随机获取keys
count = query_options.get('count', 100)
pattern = query_options.get('pattern', '*')
source_cluster = query_options.get('source_cluster', 'cluster2') # 默认从第二个集群获取
source_client = client2 if source_cluster == 'cluster2' else client1
source_name = cluster2_name if source_cluster == 'cluster2' else cluster1_name
logger.info(f"{source_name}随机获取 {count} 个keys")
keys = get_random_keys_from_redis(source_client, count, pattern, performance_tracker)
elif query_mode == 'specified':
# 指定keys
keys = query_options.get('keys', [])
# 如果keys是字符串需要转换为bytesRedis通常使用bytes
keys = [k.encode('utf-8') if isinstance(k, str) else k for k in keys]
if not keys:
return {'error': '未获取到任何keys进行比较'}
logger.info(f"准备比较 {len(keys)} 个keys")
# 执行比较
comparison_result = compare_redis_data(
client1, client2, keys,
cluster1_name, cluster2_name,
performance_tracker
)
# 添加性能报告
comparison_result['performance_report'] = performance_tracker.generate_report()
comparison_result['query_options'] = query_options
return comparison_result
except Exception as e:
logger.error(f"Redis数据比较执行失败: {e}")
return {'error': f'执行失败: {str(e)}'}
finally:
# 关闭连接
try:
if client1:
client1.close()
if client2:
client2.close()
except:
pass