""" Redis查询和数据比较模块 负责Redis数据的查询、随机key获取和数据比较 """ import time import logging import random from redis.cluster import key_slot from redis.exceptions import RedisError from .redis_client import RedisPerformanceTracker logger = logging.getLogger(__name__) def get_random_keys_from_redis(redis_client, count=100, pattern="*", performance_tracker=None): """ 从Redis集群中获取随机keys Args: redis_client: Redis客户端 count: 要获取的key数量 pattern: key匹配模式,默认为 "*" performance_tracker: 性能追踪器 Returns: list: 随机key列表 """ start_time = time.time() keys = set() logger.info(f"开始扫描获取随机keys,目标数量: {count},模式: {pattern}") try: # 使用scan_iter获取keys scan_count = max(count * 2, 1000) # 扫描更多key以确保随机性 for key in redis_client.scan_iter(match=pattern, count=scan_count): keys.add(key) if len(keys) >= count * 3: # 获取更多key以便随机选择 break # 如果获取的key数量超过需要的数量,随机选择 if len(keys) > count: keys = random.sample(list(keys), count) else: keys = list(keys) end_time = time.time() scan_duration = end_time - start_time if performance_tracker: performance_tracker.record_scan_time(scan_duration) logger.info(f"扫描获取 {len(keys)} 个随机keys,耗时 {scan_duration:.3f} 秒") return keys except RedisError as e: end_time = time.time() scan_duration = end_time - start_time if performance_tracker: performance_tracker.record_scan_time(scan_duration) logger.error(f"获取随机keys失败: {e},耗时 {scan_duration:.3f} 秒") return [] def get_redis_values_by_keys(redis_client, keys, cluster_name="Redis集群", performance_tracker=None): """ 批量查询Redis中指定keys的值,自动适配单节点和集群模式 Args: redis_client: Redis客户端 keys: 要查询的key列表 cluster_name: 集群名称用于日志 performance_tracker: 性能追踪器 Returns: list: 对应keys的值列表,如果key不存在则为None """ start_time = time.time() result = [None] * len(keys) logger.info(f"开始从{cluster_name}批量查询 {len(keys)} 个keys") try: # 检查是否是集群模式 is_cluster = hasattr(redis_client, 'cluster_nodes') if is_cluster: # 集群模式:按slot分组keys以优化查询性能 slot_groups = {} for idx, key in enumerate(keys): slot = key_slot(key) slot_groups.setdefault(slot, []).append((idx, key)) logger.info(f"集群模式:keys分布在 {len(slot_groups)} 个slot中") # 分组批量查询 for group in slot_groups.values(): indices, slot_keys = zip(*group) values = redis_client.mget(slot_keys) for i, v in zip(indices, values): result[i] = v else: # 单节点模式:直接批量查询 logger.info(f"单节点模式:直接批量查询") result = redis_client.mget(keys) end_time = time.time() query_duration = end_time - start_time if performance_tracker: performance_tracker.record_query(f"{cluster_name}_batch_query", query_duration) # 统计成功获取的key数量 successful_count = sum(1 for v in result if v is not None) logger.info(f"从{cluster_name}查询完成,成功获取 {successful_count}/{len(keys)} 个值,耗时 {query_duration:.3f} 秒") return result except Exception as e: end_time = time.time() query_duration = end_time - start_time if performance_tracker: performance_tracker.record_query(f"{cluster_name}_batch_query_error", query_duration) logger.error(f"从{cluster_name}批量查询失败: {e},耗时 {query_duration:.3f} 秒") return result def compare_redis_data(client1, client2, keys, cluster1_name="生产集群", cluster2_name="测试集群", performance_tracker=None): """ 比较两个Redis集群中指定keys的数据 Args: client1: 第一个Redis客户端(生产) client2: 第二个Redis客户端(测试) keys: 要比较的key列表 cluster1_name: 第一个集群名称 cluster2_name: 第二个集群名称 performance_tracker: 性能追踪器 Returns: dict: 比较结果,包含统计信息和差异详情 """ comparison_start_time = time.time() logger.info(f"开始比较 {cluster1_name} 和 {cluster2_name} 的数据") # 获取两个集群的数据 values1 = get_redis_values_by_keys(client1, keys, cluster1_name, performance_tracker) if values1 is None: return {'error': f'从{cluster1_name}获取数据失败'} values2 = get_redis_values_by_keys(client2, keys, cluster2_name, performance_tracker) if values2 is None: return {'error': f'从{cluster2_name}获取数据失败'} # 开始数据比对 compare_start = time.time() # 初始化统计数据 stats = { 'total_keys': len(keys), 'identical_count': 0, 'different_count': 0, 'missing_in_cluster1': 0, 'missing_in_cluster2': 0, 'both_missing': 0 } # 详细结果列表 identical_results = [] different_results = [] missing_results = [] # 逐个比较 for i, key in enumerate(keys): val1 = values1[i] val2 = values2[i] # 将bytes转换为字符串用于显示(如果是bytes类型) display_val1 = val1.decode('utf-8') if isinstance(val1, bytes) else val1 display_val2 = val2.decode('utf-8') if isinstance(val2, bytes) else val2 if val1 is None and val2 is None: # 两个集群都没有这个key stats['both_missing'] += 1 missing_results.append({ 'key': key.decode('utf-8') if isinstance(key, bytes) else key, 'status': 'both_missing', 'message': '两个集群都不存在该key' }) elif val1 is None: # 只有第一个集群没有 stats['missing_in_cluster1'] += 1 missing_results.append({ 'key': key.decode('utf-8') if isinstance(key, bytes) else key, 'status': 'missing_in_cluster1', 'cluster1_value': None, 'cluster2_value': display_val2, 'message': f'在{cluster1_name}中不存在' }) elif val2 is None: # 只有第二个集群没有 stats['missing_in_cluster2'] += 1 missing_results.append({ 'key': key.decode('utf-8') if isinstance(key, bytes) else key, 'status': 'missing_in_cluster2', 'cluster1_value': display_val1, 'cluster2_value': None, 'message': f'在{cluster2_name}中不存在' }) elif val1 == val2: # 值相同 stats['identical_count'] += 1 identical_results.append({ 'key': key.decode('utf-8') if isinstance(key, bytes) else key, 'value': display_val1 }) else: # 值不同 stats['different_count'] += 1 different_results.append({ 'key': key.decode('utf-8') if isinstance(key, bytes) else key, 'cluster1_value': display_val1, 'cluster2_value': display_val2, 'message': '值不同' }) compare_end = time.time() comparison_duration = compare_end - compare_start total_duration = compare_end - comparison_start_time if performance_tracker: performance_tracker.record_comparison_time(comparison_duration) # 计算百分比 def safe_percentage(part, total): return round((part / total * 100), 2) if total > 0 else 0 stats['identical_percentage'] = safe_percentage(stats['identical_count'], stats['total_keys']) stats['different_percentage'] = safe_percentage(stats['different_count'], stats['total_keys']) stats['missing_percentage'] = safe_percentage( stats['missing_in_cluster1'] + stats['missing_in_cluster2'] + stats['both_missing'], stats['total_keys'] ) result = { 'success': True, 'stats': stats, 'identical_results': identical_results, 'different_results': different_results, 'missing_results': missing_results, 'performance': { 'comparison_time': comparison_duration, 'total_time': total_duration }, 'clusters': { 'cluster1_name': cluster1_name, 'cluster2_name': cluster2_name } } logger.info(f"数据比对完成,耗时 {comparison_duration:.3f} 秒") logger.info(f"比对统计: 总计{stats['total_keys']}个key,相同{stats['identical_count']}个,不同{stats['different_count']}个,缺失{stats['missing_in_cluster1'] + stats['missing_in_cluster2'] + stats['both_missing']}个") return result def execute_redis_comparison(config1, config2, query_options): """ 执行Redis数据比较的主要函数 Args: config1: 第一个Redis集群配置 config2: 第二个Redis集群配置 query_options: 查询选项,包含查询模式和参数 Returns: dict: 完整的比较结果 """ from .redis_client import create_redis_client # 创建性能追踪器 performance_tracker = RedisPerformanceTracker() cluster1_name = config1.get('name', '生产集群') cluster2_name = config2.get('name', '测试集群') logger.info(f"开始执行Redis数据比较: {cluster1_name} vs {cluster2_name}") # 创建连接 client1 = create_redis_client(config1, cluster1_name, performance_tracker) client2 = create_redis_client(config2, cluster2_name, performance_tracker) if not client1: return {'error': f'{cluster1_name}连接失败'} if not client2: return {'error': f'{cluster2_name}连接失败'} try: # 获取要比较的keys keys = [] query_mode = query_options.get('mode', 'random') if query_mode == 'random': # 随机获取keys count = query_options.get('count', 100) pattern = query_options.get('pattern', '*') source_cluster = query_options.get('source_cluster', 'cluster2') # 默认从第二个集群获取 source_client = client2 if source_cluster == 'cluster2' else client1 source_name = cluster2_name if source_cluster == 'cluster2' else cluster1_name logger.info(f"从{source_name}随机获取 {count} 个keys") keys = get_random_keys_from_redis(source_client, count, pattern, performance_tracker) elif query_mode == 'specified': # 指定keys keys = query_options.get('keys', []) # 如果keys是字符串,需要转换为bytes(Redis通常使用bytes) keys = [k.encode('utf-8') if isinstance(k, str) else k for k in keys] if not keys: return {'error': '未获取到任何keys进行比较'} logger.info(f"准备比较 {len(keys)} 个keys") # 执行比较 comparison_result = compare_redis_data( client1, client2, keys, cluster1_name, cluster2_name, performance_tracker ) # 添加性能报告 comparison_result['performance_report'] = performance_tracker.generate_report() comparison_result['query_options'] = query_options return comparison_result except Exception as e: logger.error(f"Redis数据比较执行失败: {e}") return {'error': f'执行失败: {str(e)}'} finally: # 关闭连接 try: if client1: client1.close() if client2: client2.close() except: pass