36 KiB
36 KiB
DataTools Pro 部署指南
1. 部署概述
1.1 部署架构
DataTools Pro支持多种部署方式,从简单的单机部署到企业级的容器化集群部署。
部署架构选择:
┌─────────────────────────────────────────────────────────────┐
│ 部署方案对比 │
├─────────────────────────────────────────────────────────────┤
│ 开发环境部署 │
│ ┌─────────────────────────────────────────────────────────┐ │
│ │ • 本地开发服务器 (Flask dev server) │ │
│ │ • SQLite数据库 │ │
│ │ • 适用于: 开发测试、功能验证 │ │
│ └─────────────────────────────────────────────────────────┘ │
├─────────────────────────────────────────────────────────────┤
│ 生产环境部署 │
│ ┌─────────────────────────────────────────────────────────┐ │
│ │ • WSGI服务器 (Gunicorn/uWSGI) │ │
│ │ • 反向代理 (Nginx/Apache) │ │
│ │ • 进程管理 (systemd/supervisor) │ │
│ │ • 适用于: 小型生产环境 │ │
│ └─────────────────────────────────────────────────────────┘ │
├─────────────────────────────────────────────────────────────┤
│ 容器化部署 │
│ ┌─────────────────────────────────────────────────────────┐ │
│ │ • Docker容器化 │ │
│ │ • Docker Compose编排 │ │
│ │ • 负载均衡和高可用 │ │
│ │ • 适用于: 中大型生产环境 │ │
│ └─────────────────────────────────────────────────────────┘ │
├─────────────────────────────────────────────────────────────┤
│ Kubernetes部署 │
│ ┌─────────────────────────────────────────────────────────┐ │
│ │ • K8s Deployment和Service │ │
│ │ • 自动扩缩容和故障恢复 │ │
│ │ • ConfigMap和Secret管理 │ │
│ │ • 适用于: 企业级云原生环境 │ │
│ └─────────────────────────────────────────────────────────┘ │
└─────────────────────────────────────────────────────────────┘
1.2 环境要求
1.2.1 基础环境
- 操作系统: Linux (推荐Ubuntu 20.04+, CentOS 8+)
- Python版本: Python 3.7+
- 内存: 最低2GB,推荐4GB+
- 磁盘: 最低10GB,推荐50GB+
- 网络: 能够访问Cassandra和Redis集群
1.2.2 软件依赖
# 系统包依赖
sudo apt-get update
sudo apt-get install -y \
python3 \
python3-pip \
python3-venv \
git \
nginx \
supervisor \
curl \
wget
1.2.3 Python依赖
# requirements.txt
Flask==2.3.3
cassandra-driver==3.29.1
redis==4.5.4
redis-py-cluster==2.1.3
gunicorn==21.2.0
gevent==23.7.0
python-dotenv==1.0.0
PyYAML==6.0.1
cryptography==41.0.3
2. 开发环境部署
2.1 快速启动
# 1. 克隆项目
git clone <repository-url>
cd BigDataTool
# 2. 创建虚拟环境
python3 -m venv venv
source venv/bin/activate # Linux/Mac
# venv\Scripts\activate # Windows
# 3. 安装依赖
pip install -r requirements.txt
# 4. 初始化配置
cp config.example.py config.py
# 5. 启动应用
python app.py
2.2 开发环境配置
# config.py - 开发环境配置
DEBUG = True
TESTING = False
# 数据库配置
DATABASE_URL = 'sqlite:///config_groups.db'
# 日志配置
LOG_LEVEL = 'DEBUG'
LOG_FILE = 'logs/datatools.log'
# 安全配置
SECRET_KEY = 'dev-secret-key-change-in-production'
# Cassandra默认配置
DEFAULT_CASSANDRA_CONFIG = {
'hosts': ['127.0.0.1'],
'port': 9042,
'keyspace': 'test_ks'
}
# Redis默认配置
DEFAULT_REDIS_CONFIG = {
'host': '127.0.0.1',
'port': 6379,
'db': 0
}
2.3 开发服务脚本
#!/bin/bash
# dev-server.sh - 开发服务器启动脚本
set -e
# 检查虚拟环境
if [[ "$VIRTUAL_ENV" == "" ]]; then
echo "请先激活虚拟环境: source venv/bin/activate"
exit 1
fi
# 创建必要目录
mkdir -p logs
mkdir -p data
# 检查配置文件
if [ ! -f "config.py" ]; then
echo "复制配置文件: cp config.example.py config.py"
cp config.example.py config.py
fi
# 初始化数据库
python -c "
from app import init_database
init_database()
print('数据库初始化完成')
"
# 启动开发服务器
echo "启动DataTools Pro开发服务器..."
echo "访问地址: http://localhost:5000"
python app.py
3. 生产环境部署
3.1 系统用户创建
# 创建专用用户
sudo useradd -r -s /bin/false datatools
sudo mkdir -p /opt/datatools
sudo chown datatools:datatools /opt/datatools
3.2 应用部署
#!/bin/bash
# deploy-production.sh - 生产环境部署脚本
set -e
APP_USER="datatools"
APP_DIR="/opt/datatools"
APP_NAME="datatools-pro"
PYTHON_VERSION="3.9"
echo "开始部署DataTools Pro到生产环境..."
# 1. 创建应用目录
sudo mkdir -p $APP_DIR/{app,logs,data,backup}
sudo chown -R $APP_USER:$APP_USER $APP_DIR
# 2. 部署应用代码
sudo -u $APP_USER git clone <repository-url> $APP_DIR/app
cd $APP_DIR/app
# 3. 创建虚拟环境
sudo -u $APP_USER python$PYTHON_VERSION -m venv $APP_DIR/venv
sudo -u $APP_USER $APP_DIR/venv/bin/pip install --upgrade pip
# 4. 安装依赖
sudo -u $APP_USER $APP_DIR/venv/bin/pip install -r requirements.txt
# 5. 创建生产配置
sudo -u $APP_USER cp config.example.py config.py
sudo -u $APP_USER cat > config.py << 'EOF'
import os
from dotenv import load_dotenv
# 加载环境变量
load_dotenv()
# 基础配置
DEBUG = False
TESTING = False
SECRET_KEY = os.getenv('SECRET_KEY', 'change-this-in-production')
# 数据库配置
DATABASE_URL = os.getenv('DATABASE_URL', '/opt/datatools/data/config_groups.db')
# 日志配置
LOG_LEVEL = os.getenv('LOG_LEVEL', 'INFO')
LOG_FILE = '/opt/datatools/logs/datatools.log'
# 服务器配置
HOST = os.getenv('HOST', '127.0.0.1')
PORT = int(os.getenv('PORT', 5000))
WORKERS = int(os.getenv('WORKERS', 4))
# 外部服务配置
CASSANDRA_HOSTS = os.getenv('CASSANDRA_HOSTS', '127.0.0.1').split(',')
REDIS_HOSTS = os.getenv('REDIS_HOSTS', '127.0.0.1').split(',')
EOF
# 6. 创建环境变量文件
sudo -u $APP_USER cat > $APP_DIR/.env << 'EOF'
# 生产环境配置
SECRET_KEY=your-production-secret-key-here
DATABASE_URL=/opt/datatools/data/config_groups.db
LOG_LEVEL=INFO
HOST=127.0.0.1
PORT=5000
WORKERS=4
# 外部服务
CASSANDRA_HOSTS=10.0.1.100,10.0.1.101
REDIS_HOSTS=10.0.2.100,10.0.2.101
EOF
# 7. 初始化数据库
sudo -u $APP_USER $APP_DIR/venv/bin/python -c "
import sys
sys.path.insert(0, '$APP_DIR/app')
from app import init_database
init_database()
print('数据库初始化完成')
"
# 8. 设置权限
sudo chown -R $APP_USER:$APP_USER $APP_DIR
sudo chmod 600 $APP_DIR/.env
sudo chmod 755 $APP_DIR/app/app.py
echo "应用部署完成: $APP_DIR"
3.3 Gunicorn配置
# gunicorn.conf.py
import os
import multiprocessing
# 服务器socket
bind = f"{os.getenv('HOST', '127.0.0.1')}:{os.getenv('PORT', 5000)}"
backlog = 2048
# Worker进程
workers = int(os.getenv('WORKERS', multiprocessing.cpu_count() * 2 + 1))
worker_class = 'gevent'
worker_connections = 1000
max_requests = 1000
max_requests_jitter = 100
preload_app = True
# 超时设置
timeout = 120
keepalive = 2
graceful_timeout = 30
# 日志配置
accesslog = '/opt/datatools/logs/access.log'
errorlog = '/opt/datatools/logs/error.log'
loglevel = 'info'
access_log_format = '%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s" %(D)s'
# 进程命名
proc_name = 'datatools-pro'
# 用户和组
user = 'datatools'
group = 'datatools'
# 临时目录
tmp_upload_dir = '/opt/datatools/tmp'
# 启动时钩子
def on_starting(server):
server.log.info("DataTools Pro 正在启动...")
def when_ready(server):
server.log.info("DataTools Pro 启动完成")
def on_exit(server):
server.log.info("DataTools Pro 正在关闭...")
3.4 Systemd服务配置
# /etc/systemd/system/datatools-pro.service
[Unit]
Description=DataTools Pro - Enterprise Data Processing Platform
After=network.target
Wants=network.target
[Service]
Type=notify
User=datatools
Group=datatools
RuntimeDirectory=datatools-pro
WorkingDirectory=/opt/datatools/app
Environment=PATH=/opt/datatools/venv/bin
EnvironmentFile=/opt/datatools/.env
ExecStart=/opt/datatools/venv/bin/gunicorn --config gunicorn.conf.py app:app
ExecReload=/bin/kill -s HUP $MAINPID
KillMode=mixed
TimeoutStopSec=30
PrivateTmp=true
ProtectSystem=strict
ReadWritePaths=/opt/datatools
NoNewPrivileges=yes
# 重启策略
Restart=always
RestartSec=10
StartLimitBurst=3
StartLimitInterval=60
[Install]
WantedBy=multi-user.target
# 启用和启动服务
sudo systemctl daemon-reload
sudo systemctl enable datatools-pro
sudo systemctl start datatools-pro
sudo systemctl status datatools-pro
3.5 Nginx反向代理配置
# /etc/nginx/sites-available/datatools-pro
upstream datatools_backend {
server 127.0.0.1:5000 fail_timeout=0;
# 如果有多个worker,可以添加多个server
# server 127.0.0.1:5001 fail_timeout=0;
# server 127.0.0.1:5002 fail_timeout=0;
}
server {
listen 80;
listen [::]:80;
server_name datatools.yourdomain.com;
# 重定向到HTTPS
return 301 https://$server_name$request_uri;
}
server {
listen 443 ssl http2;
listen [::]:443 ssl http2;
server_name datatools.yourdomain.com;
# SSL配置
ssl_certificate /etc/ssl/certs/datatools.crt;
ssl_certificate_key /etc/ssl/private/datatools.key;
ssl_protocols TLSv1.2 TLSv1.3;
ssl_ciphers ECDHE-RSA-AES256-GCM-SHA512:DHE-RSA-AES256-GCM-SHA512:ECDHE-RSA-AES256-GCM-SHA384;
ssl_prefer_server_ciphers off;
ssl_session_cache shared:SSL:10m;
ssl_session_timeout 10m;
# 安全头
add_header X-Frame-Options DENY;
add_header X-Content-Type-Options nosniff;
add_header X-XSS-Protection "1; mode=block";
add_header Strict-Transport-Security "max-age=63072000; includeSubDomains; preload";
# 基础配置
client_max_body_size 100M;
keepalive_timeout 65;
gzip on;
gzip_vary on;
gzip_types
text/plain
text/css
text/xml
text/javascript
application/javascript
application/xml+rss
application/json;
# 静态文件缓存
location /static/ {
alias /opt/datatools/app/static/;
expires 1y;
add_header Cache-Control "public, immutable";
access_log off;
}
# API接口
location /api/ {
proxy_pass http://datatools_backend;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_connect_timeout 30s;
proxy_send_timeout 30s;
proxy_read_timeout 120s;
proxy_buffering off;
}
# 应用主体
location / {
proxy_pass http://datatools_backend;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_connect_timeout 30s;
proxy_send_timeout 30s;
proxy_read_timeout 30s;
}
# 日志配置
access_log /var/log/nginx/datatools-access.log;
error_log /var/log/nginx/datatools-error.log;
}
# 启用站点
sudo ln -s /etc/nginx/sites-available/datatools-pro /etc/nginx/sites-enabled/
sudo nginx -t
sudo systemctl reload nginx
4. 容器化部署
4.1 Dockerfile
# Dockerfile
FROM python:3.9-slim
# 设置工作目录
WORKDIR /app
# 安装系统依赖
RUN apt-get update && apt-get install -y \
gcc \
g++ \
&& rm -rf /var/lib/apt/lists/*
# 复制依赖文件
COPY requirements.txt .
# 安装Python依赖
RUN pip install --no-cache-dir -r requirements.txt
# 复制应用代码
COPY . .
# 创建非root用户
RUN useradd -r -s /bin/false appuser && \
chown -R appuser:appuser /app
# 切换到非root用户
USER appuser
# 暴露端口
EXPOSE 5000
# 健康检查
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD curl -f http://localhost:5000/api/health || exit 1
# 启动命令
CMD ["gunicorn", "--config", "gunicorn.conf.py", "app:app"]
4.2 Docker Compose配置
# docker-compose.yml
version: '3.8'
services:
datatools-pro:
build:
context: .
dockerfile: Dockerfile
image: datatools-pro:latest
container_name: datatools-pro
restart: unless-stopped
ports:
- "5000:5000"
environment:
- DEBUG=False
- SECRET_KEY=${SECRET_KEY}
- DATABASE_URL=/app/data/config_groups.db
- LOG_LEVEL=INFO
- CASSANDRA_HOSTS=${CASSANDRA_HOSTS}
- REDIS_HOSTS=${REDIS_HOSTS}
volumes:
- ./data:/app/data
- ./logs:/app/logs
- ./backup:/app/backup
networks:
- datatools-network
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:5000/api/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
nginx:
image: nginx:alpine
container_name: datatools-nginx
restart: unless-stopped
ports:
- "80:80"
- "443:443"
volumes:
- ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro
- ./nginx/conf.d:/etc/nginx/conf.d:ro
- ./ssl:/etc/ssl:ro
- ./logs/nginx:/var/log/nginx
depends_on:
- datatools-pro
networks:
- datatools-network
redis:
image: redis:7-alpine
container_name: datatools-redis
restart: unless-stopped
ports:
- "6379:6379"
volumes:
- redis-data:/data
command: redis-server --appendonly yes
networks:
- datatools-network
volumes:
redis-data:
driver: local
networks:
datatools-network:
driver: bridge
4.3 环境变量配置
# .env - Docker Compose环境变量
COMPOSE_PROJECT_NAME=datatools-pro
# 应用配置
SECRET_KEY=your-super-secret-key-for-production
DEBUG=False
LOG_LEVEL=INFO
# 外部服务
CASSANDRA_HOSTS=cassandra-node1,cassandra-node2,cassandra-node3
REDIS_HOSTS=redis-node1,redis-node2,redis-node3
# 数据库
DATABASE_URL=/app/data/config_groups.db
# 监控
ENABLE_MONITORING=true
METRICS_PORT=9090
4.4 容器化部署脚本
#!/bin/bash
# deploy-docker.sh - Docker容器化部署脚本
set -e
PROJECT_NAME="datatools-pro"
DOCKER_COMPOSE_FILE="docker-compose.yml"
ENV_FILE=".env"
echo "开始Docker容器化部署..."
# 1. 检查Docker环境
if ! command -v docker &> /dev/null; then
echo "错误: Docker未安装"
exit 1
fi
if ! command -v docker-compose &> /dev/null; then
echo "错误: Docker Compose未安装"
exit 1
fi
# 2. 检查环境变量文件
if [ ! -f "$ENV_FILE" ]; then
echo "创建环境变量文件: $ENV_FILE"
cp .env.example $ENV_FILE
echo "请编辑 $ENV_FILE 文件并重新运行部署脚本"
exit 1
fi
# 3. 创建必要目录
mkdir -p {data,logs,backup,ssl,nginx/conf.d}
# 4. 构建镜像
echo "构建Docker镜像..."
docker-compose -f $DOCKER_COMPOSE_FILE build
# 5. 停止旧容器
echo "停止旧容器..."
docker-compose -f $DOCKER_COMPOSE_FILE down
# 6. 启动新容器
echo "启动新容器..."
docker-compose -f $DOCKER_COMPOSE_FILE up -d
# 7. 等待容器启动
echo "等待服务启动..."
sleep 30
# 8. 检查服务状态
echo "检查服务状态..."
docker-compose -f $DOCKER_COMPOSE_FILE ps
# 9. 健康检查
echo "执行健康检查..."
if curl -f http://localhost/api/health > /dev/null 2>&1; then
echo "✅ 部署成功! 服务已启动"
echo "访问地址: http://localhost"
else
echo "❌ 部署失败! 服务未正常启动"
echo "查看日志: docker-compose logs"
exit 1
fi
# 10. 显示有用信息
echo ""
echo "🚀 DataTools Pro 容器化部署完成!"
echo ""
echo "常用命令:"
echo " 查看日志: docker-compose logs -f"
echo " 重启服务: docker-compose restart"
echo " 停止服务: docker-compose down"
echo " 更新代码: git pull && docker-compose up -d --build"
echo ""
5. Kubernetes部署
5.1 Kubernetes资源清单
5.1.1 ConfigMap配置
# k8s/configmap.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: datatools-pro-config
namespace: datatools
data:
DEBUG: "False"
LOG_LEVEL: "INFO"
DATABASE_URL: "/app/data/config_groups.db"
CASSANDRA_HOSTS: "cassandra-service.database.svc.cluster.local"
REDIS_HOSTS: "redis-service.cache.svc.cluster.local"
WORKERS: "4"
HOST: "0.0.0.0"
PORT: "5000"
5.1.2 Secret配置
# k8s/secret.yaml
apiVersion: v1
kind: Secret
metadata:
name: datatools-pro-secret
namespace: datatools
type: Opaque
data:
SECRET_KEY: eW91ci1zdXBlci1zZWNyZXQta2V5LWZvci1wcm9kdWN0aW9u # base64 encoded
5.1.3 PersistentVolume配置
# k8s/pvc.yaml
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: datatools-pro-data
namespace: datatools
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 20Gi
storageClassName: fast-ssd
5.1.4 Deployment配置
# k8s/deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: datatools-pro
namespace: datatools
labels:
app: datatools-pro
version: v2.0.0
spec:
replicas: 3
selector:
matchLabels:
app: datatools-pro
template:
metadata:
labels:
app: datatools-pro
version: v2.0.0
spec:
containers:
- name: datatools-pro
image: datatools-pro:2.0.0
ports:
- containerPort: 5000
name: http
env:
- name: SECRET_KEY
valueFrom:
secretKeyRef:
name: datatools-pro-secret
key: SECRET_KEY
envFrom:
- configMapRef:
name: datatools-pro-config
volumeMounts:
- name: data-volume
mountPath: /app/data
- name: logs-volume
mountPath: /app/logs
resources:
requests:
memory: "512Mi"
cpu: "250m"
limits:
memory: "2Gi"
cpu: "1000m"
livenessProbe:
httpGet:
path: /api/health
port: 5000
initialDelaySeconds: 30
periodSeconds: 30
readinessProbe:
httpGet:
path: /api/health
port: 5000
initialDelaySeconds: 5
periodSeconds: 5
securityContext:
runAsNonRoot: true
runAsUser: 1000
readOnlyRootFilesystem: true
volumes:
- name: data-volume
persistentVolumeClaim:
claimName: datatools-pro-data
- name: logs-volume
emptyDir: {}
securityContext:
fsGroup: 1000
5.1.5 Service配置
# k8s/service.yaml
apiVersion: v1
kind: Service
metadata:
name: datatools-pro-service
namespace: datatools
labels:
app: datatools-pro
spec:
selector:
app: datatools-pro
ports:
- port: 80
targetPort: 5000
protocol: TCP
name: http
type: ClusterIP
5.1.6 Ingress配置
# k8s/ingress.yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: datatools-pro-ingress
namespace: datatools
annotations:
kubernetes.io/ingress.class: nginx
cert-manager.io/cluster-issuer: letsencrypt-prod
nginx.ingress.kubernetes.io/ssl-redirect: "true"
nginx.ingress.kubernetes.io/proxy-body-size: "100m"
nginx.ingress.kubernetes.io/proxy-read-timeout: "120"
spec:
tls:
- hosts:
- datatools.yourdomain.com
secretName: datatools-pro-tls
rules:
- host: datatools.yourdomain.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: datatools-pro-service
port:
number: 80
5.2 Kubernetes部署脚本
#!/bin/bash
# deploy-k8s.sh - Kubernetes部署脚本
set -e
NAMESPACE="datatools"
KUBECTL_CMD="kubectl"
KUSTOMIZE_DIR="k8s"
echo "开始Kubernetes部署..."
# 1. 检查kubectl
if ! command -v kubectl &> /dev/null; then
echo "错误: kubectl未安装"
exit 1
fi
# 2. 检查集群连接
if ! kubectl cluster-info &> /dev/null; then
echo "错误: 无法连接到Kubernetes集群"
exit 1
fi
# 3. 创建命名空间
echo "创建命名空间: $NAMESPACE"
kubectl create namespace $NAMESPACE --dry-run=client -o yaml | kubectl apply -f -
# 4. 应用ConfigMap和Secret
echo "应用配置..."
kubectl apply -f $KUSTOMIZE_DIR/configmap.yaml
kubectl apply -f $KUSTOMIZE_DIR/secret.yaml
# 5. 应用存储
echo "应用存储配置..."
kubectl apply -f $KUSTOMIZE_DIR/pvc.yaml
# 6. 等待PVC绑定
echo "等待存储卷绑定..."
kubectl wait --for=condition=Bound pvc/datatools-pro-data -n $NAMESPACE --timeout=300s
# 7. 应用Deployment
echo "部署应用..."
kubectl apply -f $KUSTOMIZE_DIR/deployment.yaml
# 8. 等待Deployment就绪
echo "等待应用就绪..."
kubectl wait --for=condition=Available deployment/datatools-pro -n $NAMESPACE --timeout=300s
# 9. 应用Service
echo "创建服务..."
kubectl apply -f $KUSTOMIZE_DIR/service.yaml
# 10. 应用Ingress
if [ -f "$KUSTOMIZE_DIR/ingress.yaml" ]; then
echo "创建Ingress..."
kubectl apply -f $KUSTOMIZE_DIR/ingress.yaml
fi
# 11. 检查部署状态
echo "检查部署状态..."
kubectl get pods -n $NAMESPACE -l app=datatools-pro
kubectl get svc -n $NAMESPACE -l app=datatools-pro
# 12. 显示访问信息
echo ""
echo "🚀 Kubernetes部署完成!"
echo ""
echo "查看Pod状态: kubectl get pods -n $NAMESPACE"
echo "查看日志: kubectl logs -f deployment/datatools-pro -n $NAMESPACE"
echo "端口转发测试: kubectl port-forward service/datatools-pro-service 8080:80 -n $NAMESPACE"
echo ""
if [ -f "$KUSTOMIZE_DIR/ingress.yaml" ]; then
INGRESS_HOST=$(kubectl get ingress datatools-pro-ingress -n $NAMESPACE -o jsonpath='{.spec.rules[0].host}')
echo "访问地址: https://$INGRESS_HOST"
fi
5.3 HorizontalPodAutoscaler配置
# k8s/hpa.yaml
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: datatools-pro-hpa
namespace: datatools
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: datatools-pro
minReplicas: 2
maxReplicas: 10
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: 80
behavior:
scaleDown:
stabilizationWindowSeconds: 300
policies:
- type: Percent
value: 10
periodSeconds: 60
scaleUp:
stabilizationWindowSeconds: 60
policies:
- type: Percent
value: 50
periodSeconds: 60
6. 监控和日志
6.1 应用监控配置
# monitoring.py - 应用监控模块
from prometheus_client import Counter, Histogram, Gauge, generate_latest, CONTENT_TYPE_LATEST
from flask import Response
import time
import psutil
import sqlite3
# 定义监控指标
REQUEST_COUNT = Counter('datatools_requests_total', 'Total requests', ['method', 'endpoint', 'status'])
REQUEST_LATENCY = Histogram('datatools_request_duration_seconds', 'Request latency')
ACTIVE_CONNECTIONS = Gauge('datatools_active_connections', 'Active database connections')
QUERY_EXECUTION_TIME = Histogram('datatools_query_duration_seconds', 'Query execution time', ['query_type'])
# 系统资源指标
SYSTEM_CPU_USAGE = Gauge('datatools_system_cpu_usage_percent', 'System CPU usage')
SYSTEM_MEMORY_USAGE = Gauge('datatools_system_memory_usage_percent', 'System memory usage')
DATABASE_SIZE = Gauge('datatools_database_size_bytes', 'Database file size')
def init_monitoring(app):
"""初始化监控"""
@app.before_request
def before_request():
"""请求前处理"""
g.start_time = time.time()
@app.after_request
def after_request(response):
"""请求后处理"""
if hasattr(g, 'start_time'):
duration = time.time() - g.start_time
REQUEST_LATENCY.observe(duration)
REQUEST_COUNT.labels(
method=request.method,
endpoint=request.endpoint or 'unknown',
status=response.status_code
).inc()
return response
@app.route('/metrics')
def metrics():
"""Prometheus指标端点"""
# 更新系统指标
update_system_metrics()
return Response(generate_latest(), mimetype=CONTENT_TYPE_LATEST)
def update_system_metrics():
"""更新系统指标"""
# CPU使用率
cpu_percent = psutil.cpu_percent(interval=1)
SYSTEM_CPU_USAGE.set(cpu_percent)
# 内存使用率
memory = psutil.virtual_memory()
SYSTEM_MEMORY_USAGE.set(memory.percent)
# 数据库大小
try:
import os
db_path = 'config_groups.db'
if os.path.exists(db_path):
db_size = os.path.getsize(db_path)
DATABASE_SIZE.set(db_size)
except Exception:
pass
def record_query_time(query_type, duration):
"""记录查询执行时间"""
QUERY_EXECUTION_TIME.labels(query_type=query_type).observe(duration)
6.2 日志配置
# logging_config.py - 日志配置
import logging
import logging.handlers
import os
from datetime import datetime
def setup_logging(app):
"""设置应用日志"""
# 创建日志目录
log_dir = app.config.get('LOG_DIR', 'logs')
os.makedirs(log_dir, exist_ok=True)
# 配置日志格式
formatter = logging.Formatter(
'%(asctime)s [%(levelname)s] %(name)s: %(message)s'
)
# 应用日志
app_handler = logging.handlers.RotatingFileHandler(
os.path.join(log_dir, 'datatools.log'),
maxBytes=10*1024*1024, # 10MB
backupCount=10
)
app_handler.setFormatter(formatter)
app_handler.setLevel(logging.INFO)
# 错误日志
error_handler = logging.handlers.RotatingFileHandler(
os.path.join(log_dir, 'error.log'),
maxBytes=10*1024*1024, # 10MB
backupCount=5
)
error_handler.setFormatter(formatter)
error_handler.setLevel(logging.ERROR)
# 访问日志
access_handler = logging.handlers.RotatingFileHandler(
os.path.join(log_dir, 'access.log'),
maxBytes=10*1024*1024, # 10MB
backupCount=10
)
access_formatter = logging.Formatter(
'%(asctime)s %(remote_addr)s "%(method)s %(url)s" %(status_code)s %(response_size)s "%(user_agent)s"'
)
access_handler.setFormatter(access_formatter)
# 配置Flask应用日志
app.logger.addHandler(app_handler)
app.logger.addHandler(error_handler)
app.logger.setLevel(logging.INFO)
# 配置访问日志
access_logger = logging.getLogger('werkzeug')
access_logger.addHandler(access_handler)
# 配置第三方库日志级别
logging.getLogger('cassandra').setLevel(logging.WARNING)
logging.getLogger('redis').setLevel(logging.WARNING)
logging.getLogger('urllib3').setLevel(logging.WARNING)
6.3 Prometheus配置
# prometheus.yml
global:
scrape_interval: 15s
evaluation_interval: 15s
rule_files:
- "datatools_rules.yml"
scrape_configs:
- job_name: 'datatools-pro'
static_configs:
- targets: ['localhost:5000']
metrics_path: '/metrics'
scrape_interval: 30s
scrape_timeout: 10s
alerting:
alertmanagers:
- static_configs:
- targets:
- alertmanager:9093
6.4 Grafana仪表板配置
{
"dashboard": {
"id": null,
"title": "DataTools Pro Monitoring",
"tags": ["datatools", "monitoring"],
"timezone": "browser",
"panels": [
{
"id": 1,
"title": "Request Rate",
"type": "graph",
"targets": [
{
"expr": "rate(datatools_requests_total[5m])",
"legendFormat": "{{method}} {{endpoint}}"
}
]
},
{
"id": 2,
"title": "Request Latency",
"type": "graph",
"targets": [
{
"expr": "histogram_quantile(0.95, datatools_request_duration_seconds_bucket)",
"legendFormat": "95th percentile"
},
{
"expr": "histogram_quantile(0.50, datatools_request_duration_seconds_bucket)",
"legendFormat": "50th percentile"
}
]
},
{
"id": 3,
"title": "System Resources",
"type": "graph",
"targets": [
{
"expr": "datatools_system_cpu_usage_percent",
"legendFormat": "CPU Usage %"
},
{
"expr": "datatools_system_memory_usage_percent",
"legendFormat": "Memory Usage %"
}
]
},
{
"id": 4,
"title": "Query Performance",
"type": "graph",
"targets": [
{
"expr": "histogram_quantile(0.95, datatools_query_duration_seconds_bucket)",
"legendFormat": "{{query_type}} 95th percentile"
}
]
}
],
"time": {
"from": "now-1h",
"to": "now"
},
"refresh": "30s"
}
}
7. 备份和恢复
7.1 数据备份脚本
#!/bin/bash
# backup.sh - 数据备份脚本
set -e
# 配置变量
BACKUP_DIR="/opt/datatools/backup"
DATA_DIR="/opt/datatools/data"
LOG_DIR="/opt/datatools/logs"
RETENTION_DAYS=30
DATE=$(date +%Y%m%d_%H%M%S)
BACKUP_NAME="datatools_backup_$DATE"
echo "开始数据备份: $BACKUP_NAME"
# 创建备份目录
mkdir -p $BACKUP_DIR/$BACKUP_NAME
# 1. 备份SQLite数据库
echo "备份SQLite数据库..."
if [ -f "$DATA_DIR/config_groups.db" ]; then
sqlite3 $DATA_DIR/config_groups.db ".backup $BACKUP_DIR/$BACKUP_NAME/config_groups.db"
sqlite3 $DATA_DIR/config_groups.db ".dump" > $BACKUP_DIR/$BACKUP_NAME/config_groups.sql
fi
# 2. 备份配置文件
echo "备份配置文件..."
cp -r /opt/datatools/app/config.py $BACKUP_DIR/$BACKUP_NAME/ 2>/dev/null || true
cp -r /opt/datatools/.env $BACKUP_DIR/$BACKUP_NAME/ 2>/dev/null || true
# 3. 备份日志文件(最近7天)
echo "备份日志文件..."
find $LOG_DIR -name "*.log" -mtime -7 -exec cp {} $BACKUP_DIR/$BACKUP_NAME/ \; 2>/dev/null || true
# 4. 创建备份信息文件
cat > $BACKUP_DIR/$BACKUP_NAME/backup_info.txt << EOF
备份时间: $(date)
备份版本: DataTools Pro 2.0
系统信息: $(uname -a)
Python版本: $(python3 --version)
数据库大小: $(du -h $DATA_DIR/config_groups.db 2>/dev/null | cut -f1 || echo "N/A")
备份大小: $(du -sh $BACKUP_DIR/$BACKUP_NAME | cut -f1)
EOF
# 5. 压缩备份
echo "压缩备份文件..."
cd $BACKUP_DIR
tar -czf $BACKUP_NAME.tar.gz $BACKUP_NAME
rm -rf $BACKUP_NAME
# 6. 清理旧备份
echo "清理旧备份..."
find $BACKUP_DIR -name "datatools_backup_*.tar.gz" -mtime +$RETENTION_DAYS -delete
# 7. 验证备份
if [ -f "$BACKUP_DIR/$BACKUP_NAME.tar.gz" ]; then
BACKUP_SIZE=$(du -h $BACKUP_DIR/$BACKUP_NAME.tar.gz | cut -f1)
echo "✅ 备份完成: $BACKUP_NAME.tar.gz ($BACKUP_SIZE)"
else
echo "❌ 备份失败!"
exit 1
fi
# 8. 可选: 上传到远程存储
if [ -n "$BACKUP_REMOTE_PATH" ]; then
echo "上传备份到远程存储..."
# rsync -avz $BACKUP_DIR/$BACKUP_NAME.tar.gz $BACKUP_REMOTE_PATH/
# aws s3 cp $BACKUP_DIR/$BACKUP_NAME.tar.gz s3://your-backup-bucket/
fi
echo "备份脚本执行完成"
7.2 数据恢复脚本
#!/bin/bash
# restore.sh - 数据恢复脚本
set -e
if [ $# -eq 0 ]; then
echo "用法: $0 <backup_file>"
echo "示例: $0 /opt/datatools/backup/datatools_backup_20240805_100000.tar.gz"
exit 1
fi
BACKUP_FILE="$1"
DATA_DIR="/opt/datatools/data"
RESTORE_DIR="/tmp/datatools_restore_$$"
echo "开始数据恢复: $BACKUP_FILE"
# 1. 验证备份文件
if [ ! -f "$BACKUP_FILE" ]; then
echo "错误: 备份文件不存在: $BACKUP_FILE"
exit 1
fi
# 2. 停止服务
echo "停止DataTools Pro服务..."
sudo systemctl stop datatools-pro || true
# 3. 备份当前数据
echo "备份当前数据..."
mkdir -p $DATA_DIR.bak
cp -r $DATA_DIR/* $DATA_DIR.bak/ 2>/dev/null || true
# 4. 解压备份文件
echo "解压备份文件..."
mkdir -p $RESTORE_DIR
cd $RESTORE_DIR
tar -xzf $BACKUP_FILE
# 5. 恢复数据库
echo "恢复数据库..."
BACKUP_DB=$(find $RESTORE_DIR -name "config_groups.db" | head -1)
if [ -f "$BACKUP_DB" ]; then
cp $BACKUP_DB $DATA_DIR/config_groups.db
chown datatools:datatools $DATA_DIR/config_groups.db
chmod 644 $DATA_DIR/config_groups.db
echo "✅ 数据库恢复完成"
else
echo "❌ 未找到数据库备份文件"
fi
# 6. 恢复配置文件
echo "恢复配置文件..."
BACKUP_CONFIG=$(find $RESTORE_DIR -name "config.py" | head -1)
if [ -f "$BACKUP_CONFIG" ]; then
cp $BACKUP_CONFIG /opt/datatools/app/config.py
chown datatools:datatools /opt/datatools/app/config.py
echo "✅ 配置文件恢复完成"
fi
BACKUP_ENV=$(find $RESTORE_DIR -name ".env" | head -1)
if [ -f "$BACKUP_ENV" ]; then
cp $BACKUP_ENV /opt/datatools/.env
chown datatools:datatools /opt/datatools/.env
chmod 600 /opt/datatools/.env
echo "✅ 环境变量文件恢复完成"
fi
# 7. 验证数据库完整性
echo "验证数据库完整性..."
if sqlite3 $DATA_DIR/config_groups.db "PRAGMA integrity_check;" | grep -q "ok"; then
echo "✅ 数据库完整性检查通过"
else
echo "❌ 数据库完整性检查失败"
echo "恢复原始数据..."
cp -r $DATA_DIR.bak/* $DATA_DIR/
exit 1
fi
# 8. 启动服务
echo "启动DataTools Pro服务..."
sudo systemctl start datatools-pro
# 9. 等待服务启动
echo "等待服务启动..."
sleep 10
# 10. 健康检查
if curl -f http://localhost:5000/api/health > /dev/null 2>&1; then
echo "✅ 恢复完成! 服务正常运行"
rm -rf $RESTORE_DIR
rm -rf $DATA_DIR.bak
else
echo "❌ 服务启动失败,回滚到原始数据"
sudo systemctl stop datatools-pro
rm -rf $DATA_DIR/*
cp -r $DATA_DIR.bak/* $DATA_DIR/
sudo systemctl start datatools-pro
rm -rf $RESTORE_DIR
exit 1
fi
echo "数据恢复脚本执行完成"
7.3 自动化备份Cron任务
# 添加定时备份任务
# crontab -e
# 每天凌晨2点执行备份
0 2 * * * /opt/datatools/scripts/backup.sh >> /opt/datatools/logs/backup.log 2>&1
# 每周日凌晨3点清理旧日志
0 3 * * 0 find /opt/datatools/logs -name "*.log.*" -mtime +7 -delete
# 每月1号检查数据库完整性
0 4 1 * * sqlite3 /opt/datatools/data/config_groups.db "PRAGMA integrity_check;" >> /opt/datatools/logs/db_check.log 2>&1
版本: v1.0
更新日期: 2024-08-05
维护者: DataTools Pro Team