start_dataops.sh 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139
  1. #!/bin/bash
  2. #
  3. # DataOps Platform 启动脚本
  4. # 使用 supervisorctl 启动 gunicorn 服务
  5. #
  6. set -e
  7. # 配置变量
  8. APP_NAME="dataops-platform"
  9. APP_DIR="/opt/dataops-platform"
  10. VENV_DIR="${APP_DIR}/venv"
  11. LOG_DIR="${APP_DIR}/logs"
  12. # 颜色输出
  13. RED='\033[0;31m'
  14. GREEN='\033[0;32m'
  15. YELLOW='\033[1;33m'
  16. NC='\033[0m' # No Color
  17. echo_info() {
  18. echo -e "${GREEN}[INFO]${NC} $1"
  19. }
  20. echo_warn() {
  21. echo -e "${YELLOW}[WARN]${NC} $1"
  22. }
  23. echo_error() {
  24. echo -e "${RED}[ERROR]${NC} $1"
  25. }
  26. # 检查虚拟环境是否存在
  27. check_venv() {
  28. if [ ! -d "${VENV_DIR}" ]; then
  29. echo_error "虚拟环境不存在: ${VENV_DIR}"
  30. echo_info "请先运行部署脚本创建虚拟环境"
  31. exit 1
  32. fi
  33. }
  34. # 检查 supervisor 是否运行
  35. check_supervisor() {
  36. if ! pgrep -x "supervisord" > /dev/null; then
  37. echo_warn "supervisord 未运行,正在启动..."
  38. sudo supervisord -c /etc/supervisor/supervisord.conf
  39. sleep 2
  40. fi
  41. }
  42. # 启动应用
  43. start_app() {
  44. echo_info "正在启动 ${APP_NAME}..."
  45. # 检查应用状态
  46. status=$(sudo supervisorctl status ${APP_NAME} 2>/dev/null | awk '{print $2}' || echo "UNKNOWN")
  47. if [ "$status" = "RUNNING" ]; then
  48. echo_warn "${APP_NAME} 已经在运行中"
  49. sudo supervisorctl status ${APP_NAME}
  50. return 0
  51. fi
  52. # 启动应用
  53. sudo supervisorctl start ${APP_NAME}
  54. # 等待启动
  55. sleep 3
  56. # 检查启动状态
  57. status=$(sudo supervisorctl status ${APP_NAME} | awk '{print $2}')
  58. if [ "$status" = "RUNNING" ]; then
  59. echo_info "${APP_NAME} 启动成功!"
  60. sudo supervisorctl status ${APP_NAME}
  61. else
  62. echo_error "${APP_NAME} 启动失败!"
  63. echo_info "查看日志: tail -f ${LOG_DIR}/gunicorn_error.log"
  64. exit 1
  65. fi
  66. }
  67. # 健康检查
  68. health_check() {
  69. echo_info "正在进行健康检查..."
  70. local max_retries=5
  71. local retry_interval=3
  72. local retry_count=0
  73. local response=""
  74. local APP_PORT=5500
  75. while [ $retry_count -lt $max_retries ]; do
  76. sleep $retry_interval
  77. retry_count=$((retry_count + 1))
  78. # 尝试健康检查接口(使用应用实际端口 5500)
  79. response=$(curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:${APP_PORT}/api/system/health 2>/dev/null || echo "000")
  80. if [ "$response" = "200" ]; then
  81. echo_info "健康检查通过! HTTP 状态码: ${response}"
  82. return 0
  83. fi
  84. echo_info "尝试 ${retry_count}/${max_retries}: HTTP 状态码 ${response},等待重试..."
  85. done
  86. # 如果 /api/system/health 失败,尝试其他接口作为备选
  87. echo_warn "健康检查接口返回状态码: ${response}"
  88. # 尝试检查 /api/bd/list 接口作为备选(使用 POST 方法)
  89. response=$(curl -s -o /dev/null -w "%{http_code}" -X POST http://127.0.0.1:${APP_PORT}/api/bd/list -H "Content-Type: application/json" -d "{}" 2>/dev/null || echo "000")
  90. if [ "$response" = "200" ] || [ "$response" = "500" ]; then
  91. echo_info "备选接口 /api/bd/list 有响应(${response}),服务已启动!"
  92. return 0
  93. fi
  94. echo_warn "服务可能需要更多时间启动,或健康检查接口配置有问题"
  95. echo_info "请手动检查: curl http://127.0.0.1:${APP_PORT}/api/system/health"
  96. }
  97. # 主函数
  98. main() {
  99. echo "=========================================="
  100. echo " DataOps Platform 启动脚本"
  101. echo "=========================================="
  102. check_venv
  103. check_supervisor
  104. start_app
  105. health_check
  106. echo ""
  107. echo_info "启动完成!"
  108. echo_info "访问地址: http://localhost:5500"
  109. echo_info "查看日志: tail -f ${LOG_DIR}/gunicorn_error.log"
  110. }
  111. main "$@"