Files
BigDataTool/docs/deployment.md
2025-08-05 23:27:25 +08:00

36 KiB
Raw Blame History

DataTools Pro 部署指南

1. 部署概述

1.1 部署架构

DataTools Pro支持多种部署方式从简单的单机部署到企业级的容器化集群部署。

部署架构选择:
┌─────────────────────────────────────────────────────────────┐
│                   部署方案对比                               │
├─────────────────────────────────────────────────────────────┤
│  开发环境部署                                               │
│  ┌─────────────────────────────────────────────────────────┐ │
│  │ • 本地开发服务器 (Flask dev server)                    │ │
│  │ • SQLite数据库                                          │ │
│  │ • 适用于: 开发测试、功能验证                            │ │
│  └─────────────────────────────────────────────────────────┘ │
├─────────────────────────────────────────────────────────────┤
│  生产环境部署                                               │
│  ┌─────────────────────────────────────────────────────────┐ │
│  │ • WSGI服务器 (Gunicorn/uWSGI)                          │ │
│  │ • 反向代理 (Nginx/Apache)                              │ │
│  │ • 进程管理 (systemd/supervisor)                        │ │
│  │ • 适用于: 小型生产环境                                  │ │
│  └─────────────────────────────────────────────────────────┘ │
├─────────────────────────────────────────────────────────────┤
│  容器化部署                                                 │
│  ┌─────────────────────────────────────────────────────────┐ │
│  │ • Docker容器化                                          │ │
│  │ • Docker Compose编排                                   │ │
│  │ • 负载均衡和高可用                                      │ │
│  │ • 适用于: 中大型生产环境                                │ │
│  └─────────────────────────────────────────────────────────┘ │
├─────────────────────────────────────────────────────────────┤
│  Kubernetes部署                                             │
│  ┌─────────────────────────────────────────────────────────┐ │
│  │ • K8s Deployment和Service                               │ │
│  │ • 自动扩缩容和故障恢复                                  │ │
│  │ • ConfigMap和Secret管理                                │ │
│  │ • 适用于: 企业级云原生环境                              │ │
│  └─────────────────────────────────────────────────────────┘ │
└─────────────────────────────────────────────────────────────┘

1.2 环境要求

1.2.1 基础环境

  • 操作系统: Linux (推荐Ubuntu 20.04+, CentOS 8+)
  • Python版本: Python 3.7+
  • 内存: 最低2GB推荐4GB+
  • 磁盘: 最低10GB推荐50GB+
  • 网络: 能够访问Cassandra和Redis集群

1.2.2 软件依赖

# 系统包依赖
sudo apt-get update
sudo apt-get install -y \
    python3 \
    python3-pip \
    python3-venv \
    git \
    nginx \
    supervisor \
    curl \
    wget

1.2.3 Python依赖

# requirements.txt
Flask==2.3.3
cassandra-driver==3.29.1
redis==4.5.4
redis-py-cluster==2.1.3
gunicorn==21.2.0
gevent==23.7.0
python-dotenv==1.0.0
PyYAML==6.0.1
cryptography==41.0.3

2. 开发环境部署

2.1 快速启动

# 1. 克隆项目
git clone <repository-url>
cd BigDataTool

# 2. 创建虚拟环境
python3 -m venv venv
source venv/bin/activate  # Linux/Mac
# venv\Scripts\activate     # Windows

# 3. 安装依赖
pip install -r requirements.txt

# 4. 初始化配置
cp config.example.py config.py

# 5. 启动应用
python app.py

2.2 开发环境配置

# config.py - 开发环境配置
DEBUG = True
TESTING = False

# 数据库配置
DATABASE_URL = 'sqlite:///config_groups.db'

# 日志配置
LOG_LEVEL = 'DEBUG'
LOG_FILE = 'logs/datatools.log'

# 安全配置
SECRET_KEY = 'dev-secret-key-change-in-production'

# Cassandra默认配置
DEFAULT_CASSANDRA_CONFIG = {
    'hosts': ['127.0.0.1'],
    'port': 9042,
    'keyspace': 'test_ks'
}

# Redis默认配置
DEFAULT_REDIS_CONFIG = {
    'host': '127.0.0.1',
    'port': 6379,
    'db': 0
}

2.3 开发服务脚本

#!/bin/bash
# dev-server.sh - 开发服务器启动脚本

set -e

# 检查虚拟环境
if [[ "$VIRTUAL_ENV" == "" ]]; then
    echo "请先激活虚拟环境: source venv/bin/activate"
    exit 1
fi

# 创建必要目录
mkdir -p logs
mkdir -p data

# 检查配置文件
if [ ! -f "config.py" ]; then
    echo "复制配置文件: cp config.example.py config.py"
    cp config.example.py config.py
fi

# 初始化数据库
python -c "
from app import init_database
init_database()
print('数据库初始化完成')
"

# 启动开发服务器
echo "启动DataTools Pro开发服务器..."
echo "访问地址: http://localhost:5000"
python app.py

3. 生产环境部署

3.1 系统用户创建

# 创建专用用户
sudo useradd -r -s /bin/false datatools
sudo mkdir -p /opt/datatools
sudo chown datatools:datatools /opt/datatools

3.2 应用部署

#!/bin/bash
# deploy-production.sh - 生产环境部署脚本

set -e

APP_USER="datatools"
APP_DIR="/opt/datatools"
APP_NAME="datatools-pro"
PYTHON_VERSION="3.9"

echo "开始部署DataTools Pro到生产环境..."

# 1. 创建应用目录
sudo mkdir -p $APP_DIR/{app,logs,data,backup}
sudo chown -R $APP_USER:$APP_USER $APP_DIR

# 2. 部署应用代码
sudo -u $APP_USER git clone <repository-url> $APP_DIR/app
cd $APP_DIR/app

# 3. 创建虚拟环境
sudo -u $APP_USER python$PYTHON_VERSION -m venv $APP_DIR/venv
sudo -u $APP_USER $APP_DIR/venv/bin/pip install --upgrade pip

# 4. 安装依赖
sudo -u $APP_USER $APP_DIR/venv/bin/pip install -r requirements.txt

# 5. 创建生产配置
sudo -u $APP_USER cp config.example.py config.py
sudo -u $APP_USER cat > config.py << 'EOF'
import os
from dotenv import load_dotenv

# 加载环境变量
load_dotenv()

# 基础配置
DEBUG = False
TESTING = False
SECRET_KEY = os.getenv('SECRET_KEY', 'change-this-in-production')

# 数据库配置
DATABASE_URL = os.getenv('DATABASE_URL', '/opt/datatools/data/config_groups.db')

# 日志配置
LOG_LEVEL = os.getenv('LOG_LEVEL', 'INFO')
LOG_FILE = '/opt/datatools/logs/datatools.log'

# 服务器配置
HOST = os.getenv('HOST', '127.0.0.1')
PORT = int(os.getenv('PORT', 5000))
WORKERS = int(os.getenv('WORKERS', 4))

# 外部服务配置
CASSANDRA_HOSTS = os.getenv('CASSANDRA_HOSTS', '127.0.0.1').split(',')
REDIS_HOSTS = os.getenv('REDIS_HOSTS', '127.0.0.1').split(',')
EOF

# 6. 创建环境变量文件
sudo -u $APP_USER cat > $APP_DIR/.env << 'EOF'
# 生产环境配置
SECRET_KEY=your-production-secret-key-here
DATABASE_URL=/opt/datatools/data/config_groups.db
LOG_LEVEL=INFO
HOST=127.0.0.1
PORT=5000
WORKERS=4

# 外部服务
CASSANDRA_HOSTS=10.0.1.100,10.0.1.101
REDIS_HOSTS=10.0.2.100,10.0.2.101
EOF

# 7. 初始化数据库
sudo -u $APP_USER $APP_DIR/venv/bin/python -c "
import sys
sys.path.insert(0, '$APP_DIR/app')
from app import init_database
init_database()
print('数据库初始化完成')
"

# 8. 设置权限
sudo chown -R $APP_USER:$APP_USER $APP_DIR
sudo chmod 600 $APP_DIR/.env
sudo chmod 755 $APP_DIR/app/app.py

echo "应用部署完成: $APP_DIR"

3.3 Gunicorn配置

# gunicorn.conf.py
import os
import multiprocessing

# 服务器socket
bind = f"{os.getenv('HOST', '127.0.0.1')}:{os.getenv('PORT', 5000)}"
backlog = 2048

# Worker进程
workers = int(os.getenv('WORKERS', multiprocessing.cpu_count() * 2 + 1))
worker_class = 'gevent'
worker_connections = 1000
max_requests = 1000
max_requests_jitter = 100
preload_app = True

# 超时设置
timeout = 120
keepalive = 2
graceful_timeout = 30

# 日志配置
accesslog = '/opt/datatools/logs/access.log'
errorlog = '/opt/datatools/logs/error.log'
loglevel = 'info'
access_log_format = '%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s" %(D)s'

# 进程命名
proc_name = 'datatools-pro'

# 用户和组
user = 'datatools'
group = 'datatools'

# 临时目录
tmp_upload_dir = '/opt/datatools/tmp'

# 启动时钩子
def on_starting(server):
    server.log.info("DataTools Pro 正在启动...")

def when_ready(server):
    server.log.info("DataTools Pro 启动完成")

def on_exit(server):
    server.log.info("DataTools Pro 正在关闭...")

3.4 Systemd服务配置

# /etc/systemd/system/datatools-pro.service
[Unit]
Description=DataTools Pro - Enterprise Data Processing Platform
After=network.target
Wants=network.target

[Service]
Type=notify
User=datatools
Group=datatools
RuntimeDirectory=datatools-pro
WorkingDirectory=/opt/datatools/app
Environment=PATH=/opt/datatools/venv/bin
EnvironmentFile=/opt/datatools/.env
ExecStart=/opt/datatools/venv/bin/gunicorn --config gunicorn.conf.py app:app
ExecReload=/bin/kill -s HUP $MAINPID
KillMode=mixed
TimeoutStopSec=30
PrivateTmp=true
ProtectSystem=strict
ReadWritePaths=/opt/datatools
NoNewPrivileges=yes

# 重启策略
Restart=always
RestartSec=10
StartLimitBurst=3
StartLimitInterval=60

[Install]
WantedBy=multi-user.target
# 启用和启动服务
sudo systemctl daemon-reload
sudo systemctl enable datatools-pro
sudo systemctl start datatools-pro
sudo systemctl status datatools-pro

3.5 Nginx反向代理配置

# /etc/nginx/sites-available/datatools-pro
upstream datatools_backend {
    server 127.0.0.1:5000 fail_timeout=0;
    # 如果有多个worker可以添加多个server
    # server 127.0.0.1:5001 fail_timeout=0;
    # server 127.0.0.1:5002 fail_timeout=0;
}

server {
    listen 80;
    listen [::]:80;
    server_name datatools.yourdomain.com;

    # 重定向到HTTPS
    return 301 https://$server_name$request_uri;
}

server {
    listen 443 ssl http2;
    listen [::]:443 ssl http2;
    server_name datatools.yourdomain.com;

    # SSL配置
    ssl_certificate /etc/ssl/certs/datatools.crt;
    ssl_certificate_key /etc/ssl/private/datatools.key;
    ssl_protocols TLSv1.2 TLSv1.3;
    ssl_ciphers ECDHE-RSA-AES256-GCM-SHA512:DHE-RSA-AES256-GCM-SHA512:ECDHE-RSA-AES256-GCM-SHA384;
    ssl_prefer_server_ciphers off;
    ssl_session_cache shared:SSL:10m;
    ssl_session_timeout 10m;

    # 安全头
    add_header X-Frame-Options DENY;
    add_header X-Content-Type-Options nosniff;
    add_header X-XSS-Protection "1; mode=block";
    add_header Strict-Transport-Security "max-age=63072000; includeSubDomains; preload";

    # 基础配置
    client_max_body_size 100M;
    keepalive_timeout 65;
    gzip on;
    gzip_vary on;
    gzip_types
        text/plain
        text/css
        text/xml
        text/javascript
        application/javascript
        application/xml+rss
        application/json;

    # 静态文件缓存
    location /static/ {
        alias /opt/datatools/app/static/;
        expires 1y;
        add_header Cache-Control "public, immutable";
        access_log off;
    }

    # API接口
    location /api/ {
        proxy_pass http://datatools_backend;
        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;
        proxy_connect_timeout 30s;
        proxy_send_timeout 30s;
        proxy_read_timeout 120s;
        proxy_buffering off;
    }

    # 应用主体
    location / {
        proxy_pass http://datatools_backend;
        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;
        proxy_connect_timeout 30s;
        proxy_send_timeout 30s;
        proxy_read_timeout 30s;
    }

    # 日志配置
    access_log /var/log/nginx/datatools-access.log;
    error_log /var/log/nginx/datatools-error.log;
}
# 启用站点
sudo ln -s /etc/nginx/sites-available/datatools-pro /etc/nginx/sites-enabled/
sudo nginx -t
sudo systemctl reload nginx

4. 容器化部署

4.1 Dockerfile

# Dockerfile
FROM python:3.9-slim

# 设置工作目录
WORKDIR /app

# 安装系统依赖
RUN apt-get update && apt-get install -y \
    gcc \
    g++ \
    && rm -rf /var/lib/apt/lists/*

# 复制依赖文件
COPY requirements.txt .

# 安装Python依赖
RUN pip install --no-cache-dir -r requirements.txt

# 复制应用代码
COPY . .

# 创建非root用户
RUN useradd -r -s /bin/false appuser && \
    chown -R appuser:appuser /app

# 切换到非root用户
USER appuser

# 暴露端口
EXPOSE 5000

# 健康检查
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
    CMD curl -f http://localhost:5000/api/health || exit 1

# 启动命令
CMD ["gunicorn", "--config", "gunicorn.conf.py", "app:app"]

4.2 Docker Compose配置

# docker-compose.yml
version: '3.8'

services:
  datatools-pro:
    build:
      context: .
      dockerfile: Dockerfile
    image: datatools-pro:latest
    container_name: datatools-pro
    restart: unless-stopped
    ports:
      - "5000:5000"
    environment:
      - DEBUG=False
      - SECRET_KEY=${SECRET_KEY}
      - DATABASE_URL=/app/data/config_groups.db
      - LOG_LEVEL=INFO
      - CASSANDRA_HOSTS=${CASSANDRA_HOSTS}
      - REDIS_HOSTS=${REDIS_HOSTS}
    volumes:
      - ./data:/app/data
      - ./logs:/app/logs
      - ./backup:/app/backup
    networks:
      - datatools-network
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:5000/api/health"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 40s

  nginx:
    image: nginx:alpine
    container_name: datatools-nginx
    restart: unless-stopped
    ports:
      - "80:80"
      - "443:443"
    volumes:
      - ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro
      - ./nginx/conf.d:/etc/nginx/conf.d:ro
      - ./ssl:/etc/ssl:ro
      - ./logs/nginx:/var/log/nginx
    depends_on:
      - datatools-pro
    networks:
      - datatools-network

  redis:
    image: redis:7-alpine
    container_name: datatools-redis
    restart: unless-stopped
    ports:
      - "6379:6379"
    volumes:
      - redis-data:/data
    command: redis-server --appendonly yes
    networks:
      - datatools-network

volumes:
  redis-data:
    driver: local

networks:
  datatools-network:
    driver: bridge

4.3 环境变量配置

# .env - Docker Compose环境变量
COMPOSE_PROJECT_NAME=datatools-pro

# 应用配置
SECRET_KEY=your-super-secret-key-for-production
DEBUG=False
LOG_LEVEL=INFO

# 外部服务
CASSANDRA_HOSTS=cassandra-node1,cassandra-node2,cassandra-node3
REDIS_HOSTS=redis-node1,redis-node2,redis-node3

# 数据库
DATABASE_URL=/app/data/config_groups.db

# 监控
ENABLE_MONITORING=true
METRICS_PORT=9090

4.4 容器化部署脚本

#!/bin/bash
# deploy-docker.sh - Docker容器化部署脚本

set -e

PROJECT_NAME="datatools-pro"
DOCKER_COMPOSE_FILE="docker-compose.yml"
ENV_FILE=".env"

echo "开始Docker容器化部署..."

# 1. 检查Docker环境
if ! command -v docker &> /dev/null; then
    echo "错误: Docker未安装"
    exit 1
fi

if ! command -v docker-compose &> /dev/null; then
    echo "错误: Docker Compose未安装"
    exit 1
fi

# 2. 检查环境变量文件
if [ ! -f "$ENV_FILE" ]; then
    echo "创建环境变量文件: $ENV_FILE"
    cp .env.example $ENV_FILE
    echo "请编辑 $ENV_FILE 文件并重新运行部署脚本"
    exit 1
fi

# 3. 创建必要目录
mkdir -p {data,logs,backup,ssl,nginx/conf.d}

# 4. 构建镜像
echo "构建Docker镜像..."
docker-compose -f $DOCKER_COMPOSE_FILE build

# 5. 停止旧容器
echo "停止旧容器..."
docker-compose -f $DOCKER_COMPOSE_FILE down

# 6. 启动新容器
echo "启动新容器..."
docker-compose -f $DOCKER_COMPOSE_FILE up -d

# 7. 等待容器启动
echo "等待服务启动..."
sleep 30

# 8. 检查服务状态
echo "检查服务状态..."
docker-compose -f $DOCKER_COMPOSE_FILE ps

# 9. 健康检查
echo "执行健康检查..."
if curl -f http://localhost/api/health > /dev/null 2>&1; then
    echo "✅ 部署成功! 服务已启动"
    echo "访问地址: http://localhost"
else
    echo "❌ 部署失败! 服务未正常启动"
    echo "查看日志: docker-compose logs"
    exit 1
fi

# 10. 显示有用信息
echo ""
echo "🚀 DataTools Pro 容器化部署完成!"
echo ""
echo "常用命令:"
echo "  查看日志: docker-compose logs -f"
echo "  重启服务: docker-compose restart"
echo "  停止服务: docker-compose down"
echo "  更新代码: git pull && docker-compose up -d --build"
echo ""

5. Kubernetes部署

5.1 Kubernetes资源清单

5.1.1 ConfigMap配置

# k8s/configmap.yaml
apiVersion: v1
kind: ConfigMap
metadata:
  name: datatools-pro-config
  namespace: datatools
data:
  DEBUG: "False"
  LOG_LEVEL: "INFO"
  DATABASE_URL: "/app/data/config_groups.db"
  CASSANDRA_HOSTS: "cassandra-service.database.svc.cluster.local"
  REDIS_HOSTS: "redis-service.cache.svc.cluster.local"
  WORKERS: "4"
  HOST: "0.0.0.0"
  PORT: "5000"

5.1.2 Secret配置

# k8s/secret.yaml
apiVersion: v1
kind: Secret
metadata:
  name: datatools-pro-secret
  namespace: datatools
type: Opaque
data:
  SECRET_KEY: eW91ci1zdXBlci1zZWNyZXQta2V5LWZvci1wcm9kdWN0aW9u  # base64 encoded

5.1.3 PersistentVolume配置

# k8s/pvc.yaml
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: datatools-pro-data
  namespace: datatools
spec:
  accessModes:
    - ReadWriteOnce
  resources:
    requests:
      storage: 20Gi
  storageClassName: fast-ssd

5.1.4 Deployment配置

# k8s/deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  name: datatools-pro
  namespace: datatools
  labels:
    app: datatools-pro
    version: v2.0.0
spec:
  replicas: 3
  selector:
    matchLabels:
      app: datatools-pro
  template:
    metadata:
      labels:
        app: datatools-pro
        version: v2.0.0
    spec:
      containers:
      - name: datatools-pro
        image: datatools-pro:2.0.0
        ports:
        - containerPort: 5000
          name: http
        env:
        - name: SECRET_KEY
          valueFrom:
            secretKeyRef:
              name: datatools-pro-secret
              key: SECRET_KEY
        envFrom:
        - configMapRef:
            name: datatools-pro-config
        volumeMounts:
        - name: data-volume
          mountPath: /app/data
        - name: logs-volume
          mountPath: /app/logs
        resources:
          requests:
            memory: "512Mi"
            cpu: "250m"
          limits:
            memory: "2Gi"
            cpu: "1000m"
        livenessProbe:
          httpGet:
            path: /api/health
            port: 5000
          initialDelaySeconds: 30
          periodSeconds: 30
        readinessProbe:
          httpGet:
            path: /api/health
            port: 5000
          initialDelaySeconds: 5
          periodSeconds: 5
        securityContext:
          runAsNonRoot: true
          runAsUser: 1000
          readOnlyRootFilesystem: true
      volumes:
      - name: data-volume
        persistentVolumeClaim:
          claimName: datatools-pro-data
      - name: logs-volume
        emptyDir: {}
      securityContext:
        fsGroup: 1000

5.1.5 Service配置

# k8s/service.yaml
apiVersion: v1
kind: Service
metadata:
  name: datatools-pro-service
  namespace: datatools
  labels:
    app: datatools-pro
spec:
  selector:
    app: datatools-pro
  ports:
  - port: 80
    targetPort: 5000
    protocol: TCP
    name: http
  type: ClusterIP

5.1.6 Ingress配置

# k8s/ingress.yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
  name: datatools-pro-ingress
  namespace: datatools
  annotations:
    kubernetes.io/ingress.class: nginx
    cert-manager.io/cluster-issuer: letsencrypt-prod
    nginx.ingress.kubernetes.io/ssl-redirect: "true"
    nginx.ingress.kubernetes.io/proxy-body-size: "100m"
    nginx.ingress.kubernetes.io/proxy-read-timeout: "120"
spec:
  tls:
  - hosts:
    - datatools.yourdomain.com
    secretName: datatools-pro-tls
  rules:
  - host: datatools.yourdomain.com
    http:
      paths:
      - path: /
        pathType: Prefix
        backend:
          service:
            name: datatools-pro-service
            port:
              number: 80

5.2 Kubernetes部署脚本

#!/bin/bash
# deploy-k8s.sh - Kubernetes部署脚本

set -e

NAMESPACE="datatools"
KUBECTL_CMD="kubectl"
KUSTOMIZE_DIR="k8s"

echo "开始Kubernetes部署..."

# 1. 检查kubectl
if ! command -v kubectl &> /dev/null; then
    echo "错误: kubectl未安装"
    exit 1
fi

# 2. 检查集群连接
if ! kubectl cluster-info &> /dev/null; then
    echo "错误: 无法连接到Kubernetes集群"
    exit 1
fi

# 3. 创建命名空间
echo "创建命名空间: $NAMESPACE"
kubectl create namespace $NAMESPACE --dry-run=client -o yaml | kubectl apply -f -

# 4. 应用ConfigMap和Secret
echo "应用配置..."
kubectl apply -f $KUSTOMIZE_DIR/configmap.yaml
kubectl apply -f $KUSTOMIZE_DIR/secret.yaml

# 5. 应用存储
echo "应用存储配置..."
kubectl apply -f $KUSTOMIZE_DIR/pvc.yaml

# 6. 等待PVC绑定
echo "等待存储卷绑定..."
kubectl wait --for=condition=Bound pvc/datatools-pro-data -n $NAMESPACE --timeout=300s

# 7. 应用Deployment
echo "部署应用..."
kubectl apply -f $KUSTOMIZE_DIR/deployment.yaml

# 8. 等待Deployment就绪
echo "等待应用就绪..."
kubectl wait --for=condition=Available deployment/datatools-pro -n $NAMESPACE --timeout=300s

# 9. 应用Service
echo "创建服务..."
kubectl apply -f $KUSTOMIZE_DIR/service.yaml

# 10. 应用Ingress
if [ -f "$KUSTOMIZE_DIR/ingress.yaml" ]; then
    echo "创建Ingress..."
    kubectl apply -f $KUSTOMIZE_DIR/ingress.yaml
fi

# 11. 检查部署状态
echo "检查部署状态..."
kubectl get pods -n $NAMESPACE -l app=datatools-pro
kubectl get svc -n $NAMESPACE -l app=datatools-pro

# 12. 显示访问信息
echo ""
echo "🚀 Kubernetes部署完成!"
echo ""
echo "查看Pod状态: kubectl get pods -n $NAMESPACE"
echo "查看日志: kubectl logs -f deployment/datatools-pro -n $NAMESPACE"
echo "端口转发测试: kubectl port-forward service/datatools-pro-service 8080:80 -n $NAMESPACE"
echo ""

if [ -f "$KUSTOMIZE_DIR/ingress.yaml" ]; then
    INGRESS_HOST=$(kubectl get ingress datatools-pro-ingress -n $NAMESPACE -o jsonpath='{.spec.rules[0].host}')
    echo "访问地址: https://$INGRESS_HOST"
fi

5.3 HorizontalPodAutoscaler配置

# k8s/hpa.yaml
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
  name: datatools-pro-hpa
  namespace: datatools
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: datatools-pro
  minReplicas: 2
  maxReplicas: 10
  metrics:
  - type: Resource
    resource:
      name: cpu
      target:
        type: Utilization
        averageUtilization: 70
  - type: Resource
    resource:
      name: memory
      target:
        type: Utilization
        averageUtilization: 80
  behavior:
    scaleDown:
      stabilizationWindowSeconds: 300
      policies:
      - type: Percent
        value: 10
        periodSeconds: 60
    scaleUp:
      stabilizationWindowSeconds: 60
      policies:
      - type: Percent
        value: 50
        periodSeconds: 60

6. 监控和日志

6.1 应用监控配置

# monitoring.py - 应用监控模块
from prometheus_client import Counter, Histogram, Gauge, generate_latest, CONTENT_TYPE_LATEST
from flask import Response
import time
import psutil
import sqlite3

# 定义监控指标
REQUEST_COUNT = Counter('datatools_requests_total', 'Total requests', ['method', 'endpoint', 'status'])
REQUEST_LATENCY = Histogram('datatools_request_duration_seconds', 'Request latency')
ACTIVE_CONNECTIONS = Gauge('datatools_active_connections', 'Active database connections')
QUERY_EXECUTION_TIME = Histogram('datatools_query_duration_seconds', 'Query execution time', ['query_type'])

# 系统资源指标
SYSTEM_CPU_USAGE = Gauge('datatools_system_cpu_usage_percent', 'System CPU usage')
SYSTEM_MEMORY_USAGE = Gauge('datatools_system_memory_usage_percent', 'System memory usage')
DATABASE_SIZE = Gauge('datatools_database_size_bytes', 'Database file size')

def init_monitoring(app):
    """初始化监控"""
    
    @app.before_request
    def before_request():
        """请求前处理"""
        g.start_time = time.time()
    
    @app.after_request
    def after_request(response):
        """请求后处理"""
        if hasattr(g, 'start_time'):
            duration = time.time() - g.start_time
            REQUEST_LATENCY.observe(duration)
            REQUEST_COUNT.labels(
                method=request.method,
                endpoint=request.endpoint or 'unknown',
                status=response.status_code
            ).inc()
        
        return response
    
    @app.route('/metrics')
    def metrics():
        """Prometheus指标端点"""
        # 更新系统指标
        update_system_metrics()
        
        return Response(generate_latest(), mimetype=CONTENT_TYPE_LATEST)

def update_system_metrics():
    """更新系统指标"""
    # CPU使用率
    cpu_percent = psutil.cpu_percent(interval=1)
    SYSTEM_CPU_USAGE.set(cpu_percent)
    
    # 内存使用率
    memory = psutil.virtual_memory()
    SYSTEM_MEMORY_USAGE.set(memory.percent)
    
    # 数据库大小
    try:
        import os
        db_path = 'config_groups.db'
        if os.path.exists(db_path):
            db_size = os.path.getsize(db_path)
            DATABASE_SIZE.set(db_size)
    except Exception:
        pass

def record_query_time(query_type, duration):
    """记录查询执行时间"""
    QUERY_EXECUTION_TIME.labels(query_type=query_type).observe(duration)

6.2 日志配置

# logging_config.py - 日志配置
import logging
import logging.handlers
import os
from datetime import datetime

def setup_logging(app):
    """设置应用日志"""
    
    # 创建日志目录
    log_dir = app.config.get('LOG_DIR', 'logs')
    os.makedirs(log_dir, exist_ok=True)
    
    # 配置日志格式
    formatter = logging.Formatter(
        '%(asctime)s [%(levelname)s] %(name)s: %(message)s'
    )
    
    # 应用日志
    app_handler = logging.handlers.RotatingFileHandler(
        os.path.join(log_dir, 'datatools.log'),
        maxBytes=10*1024*1024,  # 10MB
        backupCount=10
    )
    app_handler.setFormatter(formatter)
    app_handler.setLevel(logging.INFO)
    
    # 错误日志
    error_handler = logging.handlers.RotatingFileHandler(
        os.path.join(log_dir, 'error.log'),
        maxBytes=10*1024*1024,  # 10MB
        backupCount=5
    )
    error_handler.setFormatter(formatter)
    error_handler.setLevel(logging.ERROR)
    
    # 访问日志
    access_handler = logging.handlers.RotatingFileHandler(
        os.path.join(log_dir, 'access.log'),
        maxBytes=10*1024*1024,  # 10MB
        backupCount=10
    )
    access_formatter = logging.Formatter(
        '%(asctime)s %(remote_addr)s "%(method)s %(url)s" %(status_code)s %(response_size)s "%(user_agent)s"'
    )
    access_handler.setFormatter(access_formatter)
    
    # 配置Flask应用日志
    app.logger.addHandler(app_handler)
    app.logger.addHandler(error_handler)
    app.logger.setLevel(logging.INFO)
    
    # 配置访问日志
    access_logger = logging.getLogger('werkzeug')
    access_logger.addHandler(access_handler)
    
    # 配置第三方库日志级别
    logging.getLogger('cassandra').setLevel(logging.WARNING)
    logging.getLogger('redis').setLevel(logging.WARNING)
    logging.getLogger('urllib3').setLevel(logging.WARNING)

6.3 Prometheus配置

# prometheus.yml
global:
  scrape_interval: 15s
  evaluation_interval: 15s

rule_files:
  - "datatools_rules.yml"

scrape_configs:
  - job_name: 'datatools-pro'
    static_configs:
      - targets: ['localhost:5000']
    metrics_path: '/metrics'
    scrape_interval: 30s
    scrape_timeout: 10s

alerting:
  alertmanagers:
    - static_configs:
        - targets:
          - alertmanager:9093

6.4 Grafana仪表板配置

{
  "dashboard": {
    "id": null,
    "title": "DataTools Pro Monitoring",
    "tags": ["datatools", "monitoring"],
    "timezone": "browser",
    "panels": [
      {
        "id": 1,
        "title": "Request Rate",
        "type": "graph",
        "targets": [
          {
            "expr": "rate(datatools_requests_total[5m])",
            "legendFormat": "{{method}} {{endpoint}}"
          }
        ]
      },
      {
        "id": 2,
        "title": "Request Latency",
        "type": "graph",
        "targets": [
          {
            "expr": "histogram_quantile(0.95, datatools_request_duration_seconds_bucket)",
            "legendFormat": "95th percentile"
          },
          {
            "expr": "histogram_quantile(0.50, datatools_request_duration_seconds_bucket)",
            "legendFormat": "50th percentile"
          }
        ]
      },
      {
        "id": 3,
        "title": "System Resources",
        "type": "graph",
        "targets": [
          {
            "expr": "datatools_system_cpu_usage_percent",
            "legendFormat": "CPU Usage %"
          },
          {
            "expr": "datatools_system_memory_usage_percent",
            "legendFormat": "Memory Usage %"
          }
        ]
      },
      {
        "id": 4,
        "title": "Query Performance",
        "type": "graph",
        "targets": [
          {
            "expr": "histogram_quantile(0.95, datatools_query_duration_seconds_bucket)",
            "legendFormat": "{{query_type}} 95th percentile"
          }
        ]
      }
    ],
    "time": {
      "from": "now-1h",
      "to": "now"
    },
    "refresh": "30s"
  }
}

7. 备份和恢复

7.1 数据备份脚本

#!/bin/bash
# backup.sh - 数据备份脚本

set -e

# 配置变量
BACKUP_DIR="/opt/datatools/backup"
DATA_DIR="/opt/datatools/data"
LOG_DIR="/opt/datatools/logs"
RETENTION_DAYS=30
DATE=$(date +%Y%m%d_%H%M%S)
BACKUP_NAME="datatools_backup_$DATE"

echo "开始数据备份: $BACKUP_NAME"

# 创建备份目录
mkdir -p $BACKUP_DIR/$BACKUP_NAME

# 1. 备份SQLite数据库
echo "备份SQLite数据库..."
if [ -f "$DATA_DIR/config_groups.db" ]; then
    sqlite3 $DATA_DIR/config_groups.db ".backup $BACKUP_DIR/$BACKUP_NAME/config_groups.db"
    sqlite3 $DATA_DIR/config_groups.db ".dump" > $BACKUP_DIR/$BACKUP_NAME/config_groups.sql
fi

# 2. 备份配置文件
echo "备份配置文件..."
cp -r /opt/datatools/app/config.py $BACKUP_DIR/$BACKUP_NAME/ 2>/dev/null || true
cp -r /opt/datatools/.env $BACKUP_DIR/$BACKUP_NAME/ 2>/dev/null || true

# 3. 备份日志文件(最近7天)
echo "备份日志文件..."
find $LOG_DIR -name "*.log" -mtime -7 -exec cp {} $BACKUP_DIR/$BACKUP_NAME/ \; 2>/dev/null || true

# 4. 创建备份信息文件
cat > $BACKUP_DIR/$BACKUP_NAME/backup_info.txt << EOF
备份时间: $(date)
备份版本: DataTools Pro 2.0
系统信息: $(uname -a)
Python版本: $(python3 --version)
数据库大小: $(du -h $DATA_DIR/config_groups.db 2>/dev/null | cut -f1 || echo "N/A")
备份大小: $(du -sh $BACKUP_DIR/$BACKUP_NAME | cut -f1)
EOF

# 5. 压缩备份
echo "压缩备份文件..."
cd $BACKUP_DIR
tar -czf $BACKUP_NAME.tar.gz $BACKUP_NAME
rm -rf $BACKUP_NAME

# 6. 清理旧备份
echo "清理旧备份..."
find $BACKUP_DIR -name "datatools_backup_*.tar.gz" -mtime +$RETENTION_DAYS -delete

# 7. 验证备份
if [ -f "$BACKUP_DIR/$BACKUP_NAME.tar.gz" ]; then
    BACKUP_SIZE=$(du -h $BACKUP_DIR/$BACKUP_NAME.tar.gz | cut -f1)
    echo "✅ 备份完成: $BACKUP_NAME.tar.gz ($BACKUP_SIZE)"
else
    echo "❌ 备份失败!"
    exit 1
fi

# 8. 可选: 上传到远程存储
if [ -n "$BACKUP_REMOTE_PATH" ]; then
    echo "上传备份到远程存储..."
    # rsync -avz $BACKUP_DIR/$BACKUP_NAME.tar.gz $BACKUP_REMOTE_PATH/
    # aws s3 cp $BACKUP_DIR/$BACKUP_NAME.tar.gz s3://your-backup-bucket/
fi

echo "备份脚本执行完成"

7.2 数据恢复脚本

#!/bin/bash
# restore.sh - 数据恢复脚本

set -e

if [ $# -eq 0 ]; then
    echo "用法: $0 <backup_file>"
    echo "示例: $0 /opt/datatools/backup/datatools_backup_20240805_100000.tar.gz"
    exit 1
fi

BACKUP_FILE="$1"
DATA_DIR="/opt/datatools/data"
RESTORE_DIR="/tmp/datatools_restore_$$"

echo "开始数据恢复: $BACKUP_FILE"

# 1. 验证备份文件
if [ ! -f "$BACKUP_FILE" ]; then
    echo "错误: 备份文件不存在: $BACKUP_FILE"
    exit 1
fi

# 2. 停止服务
echo "停止DataTools Pro服务..."
sudo systemctl stop datatools-pro || true

# 3. 备份当前数据
echo "备份当前数据..."
mkdir -p $DATA_DIR.bak
cp -r $DATA_DIR/* $DATA_DIR.bak/ 2>/dev/null || true

# 4. 解压备份文件
echo "解压备份文件..."
mkdir -p $RESTORE_DIR
cd $RESTORE_DIR
tar -xzf $BACKUP_FILE

# 5. 恢复数据库
echo "恢复数据库..."
BACKUP_DB=$(find $RESTORE_DIR -name "config_groups.db" | head -1)
if [ -f "$BACKUP_DB" ]; then
    cp $BACKUP_DB $DATA_DIR/config_groups.db
    chown datatools:datatools $DATA_DIR/config_groups.db
    chmod 644 $DATA_DIR/config_groups.db
    echo "✅ 数据库恢复完成"
else
    echo "❌ 未找到数据库备份文件"
fi

# 6. 恢复配置文件
echo "恢复配置文件..."
BACKUP_CONFIG=$(find $RESTORE_DIR -name "config.py" | head -1)
if [ -f "$BACKUP_CONFIG" ]; then
    cp $BACKUP_CONFIG /opt/datatools/app/config.py
    chown datatools:datatools /opt/datatools/app/config.py
    echo "✅ 配置文件恢复完成"
fi

BACKUP_ENV=$(find $RESTORE_DIR -name ".env" | head -1)
if [ -f "$BACKUP_ENV" ]; then
    cp $BACKUP_ENV /opt/datatools/.env
    chown datatools:datatools /opt/datatools/.env
    chmod 600 /opt/datatools/.env
    echo "✅ 环境变量文件恢复完成"
fi

# 7. 验证数据库完整性
echo "验证数据库完整性..."
if sqlite3 $DATA_DIR/config_groups.db "PRAGMA integrity_check;" | grep -q "ok"; then
    echo "✅ 数据库完整性检查通过"
else
    echo "❌ 数据库完整性检查失败"
    echo "恢复原始数据..."
    cp -r $DATA_DIR.bak/* $DATA_DIR/
    exit 1
fi

# 8. 启动服务
echo "启动DataTools Pro服务..."
sudo systemctl start datatools-pro

# 9. 等待服务启动
echo "等待服务启动..."
sleep 10

# 10. 健康检查
if curl -f http://localhost:5000/api/health > /dev/null 2>&1; then
    echo "✅ 恢复完成! 服务正常运行"
    rm -rf $RESTORE_DIR
    rm -rf $DATA_DIR.bak
else
    echo "❌ 服务启动失败,回滚到原始数据"
    sudo systemctl stop datatools-pro
    rm -rf $DATA_DIR/*
    cp -r $DATA_DIR.bak/* $DATA_DIR/
    sudo systemctl start datatools-pro
    rm -rf $RESTORE_DIR
    exit 1
fi

echo "数据恢复脚本执行完成"

7.3 自动化备份Cron任务

# 添加定时备份任务
# crontab -e

# 每天凌晨2点执行备份
0 2 * * * /opt/datatools/scripts/backup.sh >> /opt/datatools/logs/backup.log 2>&1

# 每周日凌晨3点清理旧日志
0 3 * * 0 find /opt/datatools/logs -name "*.log.*" -mtime +7 -delete

# 每月1号检查数据库完整性
0 4 1 * * sqlite3 /opt/datatools/data/config_groups.db "PRAGMA integrity_check;" >> /opt/datatools/logs/db_check.log 2>&1

版本: v1.0
更新日期: 2024-08-05
维护者: DataTools Pro Team