fix_startup.sh 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. #!/bin/bash
  2. #
  3. # DataOps Platform 启动问题快速修复脚本
  4. #
  5. set -e
  6. # 配置变量
  7. APP_NAME="dataops-platform"
  8. APP_DIR="/opt/dataops-platform"
  9. VENV_DIR="${APP_DIR}/venv"
  10. LOG_DIR="${APP_DIR}/logs"
  11. # 颜色输出
  12. RED='\033[0;31m'
  13. GREEN='\033[0;32m'
  14. YELLOW='\033[1;33m'
  15. NC='\033[0m'
  16. echo_info() {
  17. echo -e "${GREEN}[INFO]${NC} $1"
  18. }
  19. echo_warn() {
  20. echo -e "${YELLOW}[WARN]${NC} $1"
  21. }
  22. echo_error() {
  23. echo -e "${RED}[ERROR]${NC} $1"
  24. }
  25. echo "=========================================="
  26. echo " DataOps Platform 快速修复"
  27. echo "=========================================="
  28. # 1. 创建日志目录
  29. echo_info "1. 检查并创建日志目录..."
  30. if [ ! -d "${LOG_DIR}" ]; then
  31. sudo mkdir -p "${LOG_DIR}"
  32. sudo chown ubuntu:ubuntu "${LOG_DIR}"
  33. echo_info "✓ 日志目录已创建: ${LOG_DIR}"
  34. else
  35. echo_info "✓ 日志目录已存在"
  36. fi
  37. # 2. 安装 tzdata(时区数据)
  38. echo_info "2. 检查并安装时区数据..."
  39. if ! dpkg -l | grep -q tzdata; then
  40. echo_info "正在安装 tzdata..."
  41. sudo DEBIAN_FRONTEND=noninteractive apt-get update
  42. sudo DEBIAN_FRONTEND=noninteractive apt-get install -y tzdata
  43. echo_info "✓ tzdata 已安装"
  44. else
  45. echo_info "✓ tzdata 已安装"
  46. fi
  47. # 3. 检查 Python 版本
  48. echo_info "3. 检查 Python 版本..."
  49. if [ -f "${VENV_DIR}/bin/python" ]; then
  50. PYTHON_VERSION=$(${VENV_DIR}/bin/python --version 2>&1 | awk '{print $2}')
  51. echo_info "Python 版本: ${PYTHON_VERSION}"
  52. # 检查是否为 Python 3.9+
  53. MAJOR=$(echo ${PYTHON_VERSION} | cut -d. -f1)
  54. MINOR=$(echo ${PYTHON_VERSION} | cut -d. -f2)
  55. if [ "${MAJOR}" -eq 3 ] && [ "${MINOR}" -ge 9 ]; then
  56. echo_info "✓ Python 版本支持 zoneinfo"
  57. else
  58. echo_warn "Python 版本 < 3.9,zoneinfo 可能不可用"
  59. echo_info "建议使用 Python 3.9 或更高版本"
  60. fi
  61. else
  62. echo_error "✗ Python 虚拟环境不存在"
  63. exit 1
  64. fi
  65. # 4. 测试时区模块
  66. echo_info "4. 测试时区模块..."
  67. cd "${APP_DIR}"
  68. ${VENV_DIR}/bin/python -c "
  69. try:
  70. from zoneinfo import ZoneInfo
  71. except ImportError:
  72. from backports.zoneinfo import ZoneInfo
  73. from datetime import datetime
  74. tz = ZoneInfo('Asia/Shanghai')
  75. now = datetime.now(tz)
  76. print(f'✓ 时区模块正常,当前东八区时间: {now}')
  77. " 2>&1 || {
  78. echo_error "✗ 时区模块测试失败"
  79. echo_info "尝试安装 backports.zoneinfo..."
  80. ${VENV_DIR}/bin/pip install backports.zoneinfo
  81. echo_info "重新测试..."
  82. ${VENV_DIR}/bin/python -c "
  83. try:
  84. from zoneinfo import ZoneInfo
  85. except ImportError:
  86. from backports.zoneinfo import ZoneInfo
  87. from datetime import datetime
  88. tz = ZoneInfo('Asia/Shanghai')
  89. now = datetime.now(tz)
  90. print(f'✓ 时区模块正常,当前东八区时间: {now}')
  91. " 2>&1
  92. }
  93. # 5. 测试应用导入
  94. echo_info "5. 测试应用导入..."
  95. ${VENV_DIR}/bin/python -c "
  96. import sys
  97. sys.path.insert(0, '${APP_DIR}')
  98. from app import create_app
  99. app = create_app()
  100. print('✓ 应用导入成功')
  101. " 2>&1 || {
  102. echo_error "✗ 应用导入失败,查看详细错误:"
  103. ${VENV_DIR}/bin/python -c "
  104. import sys
  105. sys.path.insert(0, '${APP_DIR}')
  106. try:
  107. from app import create_app
  108. app = create_app()
  109. except Exception as e:
  110. import traceback
  111. traceback.print_exc()
  112. " 2>&1
  113. exit 1
  114. }
  115. # 6. 修复文件权限
  116. echo_info "6. 修复文件权限..."
  117. sudo chown -R ubuntu:ubuntu "${APP_DIR}"
  118. sudo chmod -R 755 "${APP_DIR}/scripts"
  119. echo_info "✓ 文件权限已修复"
  120. # 7. 重新加载 supervisor 配置
  121. echo_info "7. 重新加载 Supervisor 配置..."
  122. sudo supervisorctl reread
  123. sudo supervisorctl update
  124. echo_info "✓ Supervisor 配置已重新加载"
  125. # 8. 停止并清理旧进程
  126. echo_info "8. 清理旧进程..."
  127. sudo supervisorctl stop ${APP_NAME} 2>/dev/null || true
  128. sleep 2
  129. # 检查是否有残留进程
  130. if pgrep -f "gunicorn.*dataops" > /dev/null; then
  131. echo_warn "发现残留的 gunicorn 进程,正在清理..."
  132. sudo pkill -f "gunicorn.*dataops" || true
  133. sleep 2
  134. fi
  135. # 9. 启动应用
  136. echo_info "9. 启动应用..."
  137. sudo supervisorctl start ${APP_NAME}
  138. sleep 3
  139. # 10. 检查状态
  140. echo_info "10. 检查应用状态..."
  141. status=$(sudo supervisorctl status ${APP_NAME} | awk '{print $2}')
  142. if [ "$status" = "RUNNING" ]; then
  143. echo_info "✓ ${APP_NAME} 启动成功!"
  144. sudo supervisorctl status ${APP_NAME}
  145. else
  146. echo_error "✗ ${APP_NAME} 启动失败!"
  147. echo_info "查看错误日志:"
  148. echo ""
  149. if [ -f "/var/log/supervisor/${APP_NAME}-stderr.log" ]; then
  150. echo "=== Supervisor stderr 日志 ==="
  151. sudo tail -30 "/var/log/supervisor/${APP_NAME}-stderr.log"
  152. fi
  153. if [ -f "${LOG_DIR}/gunicorn_error.log" ]; then
  154. echo ""
  155. echo "=== Gunicorn 错误日志 ==="
  156. tail -30 "${LOG_DIR}/gunicorn_error.log"
  157. fi
  158. exit 1
  159. fi
  160. # 11. 健康检查
  161. echo_info "11. 进行健康检查..."
  162. sleep 3
  163. response=$(curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:5500/api/system/health 2>/dev/null || echo "000")
  164. if [ "$response" = "200" ]; then
  165. echo_info "✓ 健康检查通过! HTTP 状态码: ${response}"
  166. else
  167. echo_warn "健康检查返回: ${response}"
  168. echo_info "服务可能需要更多时间启动"
  169. fi
  170. echo ""
  171. echo "=========================================="
  172. echo_info "修复完成!"
  173. echo "=========================================="
  174. echo_info "访问地址: http://localhost:5500"
  175. echo_info "查看日志: sudo tail -f /var/log/supervisor/${APP_NAME}-stderr.log"