增加Redis查询比对

This commit is contained in:
2025-08-04 09:14:27 +08:00
parent e1a566012d
commit 4c4d168471
18 changed files with 8007 additions and 2250 deletions

115
modules/sharding.py Normal file
View File

@@ -0,0 +1,115 @@
"""
分表计算模块
负责TWCS时间分表的计算和映射
"""
import re
import logging
logger = logging.getLogger(__name__)
class ShardingCalculator:
"""分表计算器基于TWCS策略"""
def __init__(self, interval_seconds=604800, table_count=14):
"""
初始化分表计算器
:param interval_seconds: 时间间隔(秒)默认604800(7天)
:param table_count: 分表数量默认14
"""
self.interval_seconds = interval_seconds
self.table_count = table_count
def extract_timestamp_from_key(self, key):
"""
从Key中提取时间戳
新规则:优先提取最后一个下划线后的数字,如果没有下划线则提取最后连续的数字部分
"""
if not key:
return None
key_str = str(key)
# 方法1如果包含下划线尝试提取最后一个下划线后的部分
if '_' in key_str:
parts = key_str.split('_')
last_part = parts[-1]
# 检查最后一部分是否为纯数字
if last_part.isdigit():
timestamp = int(last_part)
logger.info(f"Key '{key}' 通过下划线分割提取到时间戳: {timestamp}")
return timestamp
# 方法2使用正则表达式找到所有数字序列取最后一个较长的
number_sequences = re.findall(r'\d+', key_str)
if not number_sequences:
logger.warning(f"Key '{key}' 中没有找到数字字符")
return None
# 如果有多个数字序列,优先选择最长的,如果长度相同则选择最后一个
longest_sequence = max(number_sequences, key=len)
# 如果最长的有多个,选择最后一个最长的
max_length = len(longest_sequence)
last_longest = None
for seq in number_sequences:
if len(seq) == max_length:
last_longest = seq
try:
timestamp = int(last_longest)
logger.info(f"Key '{key}' 通过数字序列提取到时间戳: {timestamp} (从序列 {number_sequences} 中选择)")
return timestamp
except ValueError:
logger.error(f"Key '{key}' 时间戳转换失败: {last_longest}")
return None
def calculate_shard_index(self, timestamp):
"""
计算分表索引
公式timestamp // interval_seconds % table_count
"""
if timestamp is None:
return None
return int(timestamp) // self.interval_seconds % self.table_count
def get_shard_table_name(self, base_table_name, key):
"""
根据Key获取对应的分表名称
"""
timestamp = self.extract_timestamp_from_key(key)
if timestamp is None:
return None
shard_index = self.calculate_shard_index(timestamp)
return f"{base_table_name}_{shard_index}"
def get_all_shard_tables_for_keys(self, base_table_name, keys):
"""
为一批Keys计算所有需要查询的分表
返回: {shard_table_name: [keys_for_this_shard], ...}
"""
shard_mapping = {}
failed_keys = []
calculation_stats = {
'total_keys': len(keys),
'successful_extractions': 0,
'failed_extractions': 0,
'unique_shards': 0
}
for key in keys:
shard_table = self.get_shard_table_name(base_table_name, key)
if shard_table:
if shard_table not in shard_mapping:
shard_mapping[shard_table] = []
shard_mapping[shard_table].append(key)
calculation_stats['successful_extractions'] += 1
else:
failed_keys.append(key)
calculation_stats['failed_extractions'] += 1
calculation_stats['unique_shards'] = len(shard_mapping)
return shard_mapping, failed_keys, calculation_stats