355 lines
12 KiB
Python
355 lines
12 KiB
Python
"""
|
||
Redis查询和数据比较模块
|
||
负责Redis数据的查询、随机key获取和数据比较
|
||
"""
|
||
|
||
import time
|
||
import logging
|
||
import random
|
||
from redis.cluster import key_slot
|
||
from redis.exceptions import RedisError
|
||
from .redis_client import RedisPerformanceTracker
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
def get_random_keys_from_redis(redis_client, count=100, pattern="*", performance_tracker=None):
|
||
"""
|
||
从Redis集群中获取随机keys
|
||
|
||
Args:
|
||
redis_client: Redis客户端
|
||
count: 要获取的key数量
|
||
pattern: key匹配模式,默认为 "*"
|
||
performance_tracker: 性能追踪器
|
||
|
||
Returns:
|
||
list: 随机key列表
|
||
"""
|
||
start_time = time.time()
|
||
keys = set()
|
||
|
||
logger.info(f"开始扫描获取随机keys,目标数量: {count},模式: {pattern}")
|
||
|
||
try:
|
||
# 使用scan_iter获取keys
|
||
scan_count = max(count * 2, 1000) # 扫描更多key以确保随机性
|
||
|
||
for key in redis_client.scan_iter(match=pattern, count=scan_count):
|
||
keys.add(key)
|
||
if len(keys) >= count * 3: # 获取更多key以便随机选择
|
||
break
|
||
|
||
# 如果获取的key数量超过需要的数量,随机选择
|
||
if len(keys) > count:
|
||
keys = random.sample(list(keys), count)
|
||
else:
|
||
keys = list(keys)
|
||
|
||
end_time = time.time()
|
||
scan_duration = end_time - start_time
|
||
|
||
if performance_tracker:
|
||
performance_tracker.record_scan_time(scan_duration)
|
||
|
||
logger.info(f"扫描获取 {len(keys)} 个随机keys,耗时 {scan_duration:.3f} 秒")
|
||
return keys
|
||
|
||
except RedisError as e:
|
||
end_time = time.time()
|
||
scan_duration = end_time - start_time
|
||
|
||
if performance_tracker:
|
||
performance_tracker.record_scan_time(scan_duration)
|
||
|
||
logger.error(f"获取随机keys失败: {e},耗时 {scan_duration:.3f} 秒")
|
||
return []
|
||
|
||
def get_redis_values_by_keys(redis_client, keys, cluster_name="Redis集群", performance_tracker=None):
|
||
"""
|
||
批量查询Redis中指定keys的值,自动适配单节点和集群模式
|
||
|
||
Args:
|
||
redis_client: Redis客户端
|
||
keys: 要查询的key列表
|
||
cluster_name: 集群名称用于日志
|
||
performance_tracker: 性能追踪器
|
||
|
||
Returns:
|
||
list: 对应keys的值列表,如果key不存在则为None
|
||
"""
|
||
start_time = time.time()
|
||
result = [None] * len(keys)
|
||
|
||
logger.info(f"开始从{cluster_name}批量查询 {len(keys)} 个keys")
|
||
|
||
try:
|
||
# 检查是否是集群模式
|
||
is_cluster = hasattr(redis_client, 'cluster_nodes')
|
||
|
||
if is_cluster:
|
||
# 集群模式:按slot分组keys以优化查询性能
|
||
slot_groups = {}
|
||
for idx, key in enumerate(keys):
|
||
slot = key_slot(key)
|
||
slot_groups.setdefault(slot, []).append((idx, key))
|
||
|
||
logger.info(f"集群模式:keys分布在 {len(slot_groups)} 个slot中")
|
||
|
||
# 分组批量查询
|
||
for group in slot_groups.values():
|
||
indices, slot_keys = zip(*group)
|
||
values = redis_client.mget(slot_keys)
|
||
for i, v in zip(indices, values):
|
||
result[i] = v
|
||
else:
|
||
# 单节点模式:直接批量查询
|
||
logger.info(f"单节点模式:直接批量查询")
|
||
result = redis_client.mget(keys)
|
||
|
||
end_time = time.time()
|
||
query_duration = end_time - start_time
|
||
|
||
if performance_tracker:
|
||
performance_tracker.record_query(f"{cluster_name}_batch_query", query_duration)
|
||
|
||
# 统计成功获取的key数量
|
||
successful_count = sum(1 for v in result if v is not None)
|
||
logger.info(f"从{cluster_name}查询完成,成功获取 {successful_count}/{len(keys)} 个值,耗时 {query_duration:.3f} 秒")
|
||
|
||
return result
|
||
|
||
except Exception as e:
|
||
end_time = time.time()
|
||
query_duration = end_time - start_time
|
||
|
||
if performance_tracker:
|
||
performance_tracker.record_query(f"{cluster_name}_batch_query_error", query_duration)
|
||
|
||
logger.error(f"从{cluster_name}批量查询失败: {e},耗时 {query_duration:.3f} 秒")
|
||
return result
|
||
|
||
def compare_redis_data(client1, client2, keys, cluster1_name="生产集群", cluster2_name="测试集群", performance_tracker=None):
|
||
"""
|
||
比较两个Redis集群中指定keys的数据
|
||
|
||
Args:
|
||
client1: 第一个Redis客户端(生产)
|
||
client2: 第二个Redis客户端(测试)
|
||
keys: 要比较的key列表
|
||
cluster1_name: 第一个集群名称
|
||
cluster2_name: 第二个集群名称
|
||
performance_tracker: 性能追踪器
|
||
|
||
Returns:
|
||
dict: 比较结果,包含统计信息和差异详情
|
||
"""
|
||
comparison_start_time = time.time()
|
||
|
||
logger.info(f"开始比较 {cluster1_name} 和 {cluster2_name} 的数据")
|
||
|
||
# 获取两个集群的数据
|
||
values1 = get_redis_values_by_keys(client1, keys, cluster1_name, performance_tracker)
|
||
if values1 is None:
|
||
return {'error': f'从{cluster1_name}获取数据失败'}
|
||
|
||
values2 = get_redis_values_by_keys(client2, keys, cluster2_name, performance_tracker)
|
||
if values2 is None:
|
||
return {'error': f'从{cluster2_name}获取数据失败'}
|
||
|
||
# 开始数据比对
|
||
compare_start = time.time()
|
||
|
||
# 初始化统计数据
|
||
stats = {
|
||
'total_keys': len(keys),
|
||
'identical_count': 0,
|
||
'different_count': 0,
|
||
'missing_in_cluster1': 0,
|
||
'missing_in_cluster2': 0,
|
||
'both_missing': 0
|
||
}
|
||
|
||
# 详细结果列表
|
||
identical_results = []
|
||
different_results = []
|
||
missing_results = []
|
||
|
||
# 逐个比较
|
||
for i, key in enumerate(keys):
|
||
val1 = values1[i]
|
||
val2 = values2[i]
|
||
|
||
# 将bytes转换为字符串用于显示(如果是bytes类型)
|
||
display_val1 = val1.decode('utf-8') if isinstance(val1, bytes) else val1
|
||
display_val2 = val2.decode('utf-8') if isinstance(val2, bytes) else val2
|
||
|
||
if val1 is None and val2 is None:
|
||
# 两个集群都没有这个key
|
||
stats['both_missing'] += 1
|
||
missing_results.append({
|
||
'key': key.decode('utf-8') if isinstance(key, bytes) else key,
|
||
'status': 'both_missing',
|
||
'message': '两个集群都不存在该key'
|
||
})
|
||
elif val1 is None:
|
||
# 只有第一个集群没有
|
||
stats['missing_in_cluster1'] += 1
|
||
missing_results.append({
|
||
'key': key.decode('utf-8') if isinstance(key, bytes) else key,
|
||
'status': 'missing_in_cluster1',
|
||
'cluster1_value': None,
|
||
'cluster2_value': display_val2,
|
||
'message': f'在{cluster1_name}中不存在'
|
||
})
|
||
elif val2 is None:
|
||
# 只有第二个集群没有
|
||
stats['missing_in_cluster2'] += 1
|
||
missing_results.append({
|
||
'key': key.decode('utf-8') if isinstance(key, bytes) else key,
|
||
'status': 'missing_in_cluster2',
|
||
'cluster1_value': display_val1,
|
||
'cluster2_value': None,
|
||
'message': f'在{cluster2_name}中不存在'
|
||
})
|
||
elif val1 == val2:
|
||
# 值相同
|
||
stats['identical_count'] += 1
|
||
identical_results.append({
|
||
'key': key.decode('utf-8') if isinstance(key, bytes) else key,
|
||
'value': display_val1
|
||
})
|
||
else:
|
||
# 值不同
|
||
stats['different_count'] += 1
|
||
different_results.append({
|
||
'key': key.decode('utf-8') if isinstance(key, bytes) else key,
|
||
'cluster1_value': display_val1,
|
||
'cluster2_value': display_val2,
|
||
'message': '值不同'
|
||
})
|
||
|
||
compare_end = time.time()
|
||
comparison_duration = compare_end - compare_start
|
||
total_duration = compare_end - comparison_start_time
|
||
|
||
if performance_tracker:
|
||
performance_tracker.record_comparison_time(comparison_duration)
|
||
|
||
# 计算百分比
|
||
def safe_percentage(part, total):
|
||
return round((part / total * 100), 2) if total > 0 else 0
|
||
|
||
stats['identical_percentage'] = safe_percentage(stats['identical_count'], stats['total_keys'])
|
||
stats['different_percentage'] = safe_percentage(stats['different_count'], stats['total_keys'])
|
||
stats['missing_percentage'] = safe_percentage(
|
||
stats['missing_in_cluster1'] + stats['missing_in_cluster2'] + stats['both_missing'],
|
||
stats['total_keys']
|
||
)
|
||
|
||
result = {
|
||
'success': True,
|
||
'stats': stats,
|
||
'identical_results': identical_results,
|
||
'different_results': different_results,
|
||
'missing_results': missing_results,
|
||
'performance': {
|
||
'comparison_time': comparison_duration,
|
||
'total_time': total_duration
|
||
},
|
||
'clusters': {
|
||
'cluster1_name': cluster1_name,
|
||
'cluster2_name': cluster2_name
|
||
}
|
||
}
|
||
|
||
logger.info(f"数据比对完成,耗时 {comparison_duration:.3f} 秒")
|
||
logger.info(f"比对统计: 总计{stats['total_keys']}个key,相同{stats['identical_count']}个,不同{stats['different_count']}个,缺失{stats['missing_in_cluster1'] + stats['missing_in_cluster2'] + stats['both_missing']}个")
|
||
|
||
return result
|
||
|
||
def execute_redis_comparison(config1, config2, query_options):
|
||
"""
|
||
执行Redis数据比较的主要函数
|
||
|
||
Args:
|
||
config1: 第一个Redis集群配置
|
||
config2: 第二个Redis集群配置
|
||
query_options: 查询选项,包含查询模式和参数
|
||
|
||
Returns:
|
||
dict: 完整的比较结果
|
||
"""
|
||
from .redis_client import create_redis_client
|
||
|
||
# 创建性能追踪器
|
||
performance_tracker = RedisPerformanceTracker()
|
||
|
||
cluster1_name = config1.get('name', '生产集群')
|
||
cluster2_name = config2.get('name', '测试集群')
|
||
|
||
logger.info(f"开始执行Redis数据比较: {cluster1_name} vs {cluster2_name}")
|
||
|
||
# 创建连接
|
||
client1 = create_redis_client(config1, cluster1_name, performance_tracker)
|
||
client2 = create_redis_client(config2, cluster2_name, performance_tracker)
|
||
|
||
if not client1:
|
||
return {'error': f'{cluster1_name}连接失败'}
|
||
|
||
if not client2:
|
||
return {'error': f'{cluster2_name}连接失败'}
|
||
|
||
try:
|
||
# 获取要比较的keys
|
||
keys = []
|
||
query_mode = query_options.get('mode', 'random')
|
||
|
||
if query_mode == 'random':
|
||
# 随机获取keys
|
||
count = query_options.get('count', 100)
|
||
pattern = query_options.get('pattern', '*')
|
||
source_cluster = query_options.get('source_cluster', 'cluster2') # 默认从第二个集群获取
|
||
|
||
source_client = client2 if source_cluster == 'cluster2' else client1
|
||
source_name = cluster2_name if source_cluster == 'cluster2' else cluster1_name
|
||
|
||
logger.info(f"从{source_name}随机获取 {count} 个keys")
|
||
keys = get_random_keys_from_redis(source_client, count, pattern, performance_tracker)
|
||
|
||
elif query_mode == 'specified':
|
||
# 指定keys
|
||
keys = query_options.get('keys', [])
|
||
# 如果keys是字符串,需要转换为bytes(Redis通常使用bytes)
|
||
keys = [k.encode('utf-8') if isinstance(k, str) else k for k in keys]
|
||
|
||
if not keys:
|
||
return {'error': '未获取到任何keys进行比较'}
|
||
|
||
logger.info(f"准备比较 {len(keys)} 个keys")
|
||
|
||
# 执行比较
|
||
comparison_result = compare_redis_data(
|
||
client1, client2, keys,
|
||
cluster1_name, cluster2_name,
|
||
performance_tracker
|
||
)
|
||
|
||
# 添加性能报告
|
||
comparison_result['performance_report'] = performance_tracker.generate_report()
|
||
comparison_result['query_options'] = query_options
|
||
|
||
return comparison_result
|
||
|
||
except Exception as e:
|
||
logger.error(f"Redis数据比较执行失败: {e}")
|
||
return {'error': f'执行失败: {str(e)}'}
|
||
|
||
finally:
|
||
# 关闭连接
|
||
try:
|
||
if client1:
|
||
client1.close()
|
||
if client2:
|
||
client2.close()
|
||
except:
|
||
pass |