deploy_dataops.sh 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379
  1. #!/bin/bash
  2. #
  3. # DataOps Platform 部署脚本
  4. # 用于初始化或重建虚拟环境并配置 supervisor
  5. #
  6. set -e
  7. # 配置变量
  8. APP_NAME="dataops-platform"
  9. APP_DIR="/opt/dataops-platform"
  10. VENV_DIR="${APP_DIR}/venv"
  11. LOG_DIR="${APP_DIR}/logs"
  12. PYTHON_VERSION="python3"
  13. SUPERVISOR_CONF="/etc/supervisor/conf.d/${APP_NAME}.conf"
  14. # Gunicorn 配置
  15. GUNICORN_WORKERS=4
  16. GUNICORN_BIND="0.0.0.0:80"
  17. GUNICORN_TIMEOUT=120
  18. # 颜色输出
  19. RED='\033[0;31m'
  20. GREEN='\033[0;32m'
  21. YELLOW='\033[1;33m'
  22. BLUE='\033[0;34m'
  23. NC='\033[0m' # No Color
  24. echo_info() {
  25. echo -e "${GREEN}[INFO]${NC} $1"
  26. }
  27. echo_warn() {
  28. echo -e "${YELLOW}[WARN]${NC} $1"
  29. }
  30. echo_error() {
  31. echo -e "${RED}[ERROR]${NC} $1"
  32. }
  33. echo_step() {
  34. echo -e "${BLUE}[STEP]${NC} $1"
  35. }
  36. # 检查是否以 root 或 sudo 运行
  37. check_permissions() {
  38. if [ "$EUID" -ne 0 ]; then
  39. echo_error "请使用 sudo 运行此脚本"
  40. exit 1
  41. fi
  42. }
  43. # 检查 Python 版本
  44. check_python() {
  45. echo_step "检查 Python 版本..."
  46. if ! command -v ${PYTHON_VERSION} &> /dev/null; then
  47. echo_error "Python3 未安装"
  48. exit 1
  49. fi
  50. python_ver=$(${PYTHON_VERSION} --version 2>&1 | awk '{print $2}')
  51. echo_info "Python 版本: ${python_ver}"
  52. # 检查 python3-venv
  53. if ! dpkg -l | grep -q python3-venv; then
  54. echo_warn "python3-venv 未安装,正在安装..."
  55. apt-get update && apt-get install -y python3-venv python3-pip
  56. fi
  57. }
  58. # 检查 supervisor
  59. check_supervisor() {
  60. echo_step "检查 Supervisor..."
  61. if ! command -v supervisord &> /dev/null; then
  62. echo_warn "Supervisor 未安装,正在安装..."
  63. apt-get update && apt-get install -y supervisor
  64. systemctl enable supervisor
  65. systemctl start supervisor
  66. fi
  67. echo_info "Supervisor 已安装"
  68. }
  69. # 创建目录结构
  70. create_directories() {
  71. echo_step "创建目录结构..."
  72. mkdir -p ${LOG_DIR}
  73. chown -R ubuntu:ubuntu ${LOG_DIR}
  74. echo_info "日志目录: ${LOG_DIR}"
  75. }
  76. # 创建虚拟环境
  77. create_venv() {
  78. echo_step "创建虚拟环境..."
  79. # 如果虚拟环境存在,先备份再删除
  80. if [ -d "${VENV_DIR}" ]; then
  81. echo_warn "发现已存在的虚拟环境,正在删除..."
  82. rm -rf ${VENV_DIR}
  83. fi
  84. # 创建新的虚拟环境
  85. ${PYTHON_VERSION} -m venv ${VENV_DIR}
  86. # 更新 pip
  87. ${VENV_DIR}/bin/pip install --upgrade pip
  88. echo_info "虚拟环境创建完成: ${VENV_DIR}"
  89. }
  90. # 安装依赖
  91. install_dependencies() {
  92. echo_step "安装 Python 依赖..."
  93. if [ ! -f "${APP_DIR}/requirements.txt" ]; then
  94. echo_error "requirements.txt 不存在"
  95. exit 1
  96. fi
  97. # 安装依赖
  98. ${VENV_DIR}/bin/pip install -r ${APP_DIR}/requirements.txt
  99. # 确保 gunicorn 已安装
  100. ${VENV_DIR}/bin/pip install gunicorn
  101. echo_info "依赖安装完成"
  102. # 显示已安装的关键包
  103. echo_info "已安装的关键包:"
  104. ${VENV_DIR}/bin/pip list | grep -E "Flask|gunicorn|neo4j|SQLAlchemy|psycopg2"
  105. }
  106. # 验证安装
  107. verify_installation() {
  108. echo_step "验证安装..."
  109. # 测试导入
  110. cd ${APP_DIR}
  111. ${VENV_DIR}/bin/python -c "
  112. from app import create_app
  113. app = create_app()
  114. print('Flask 应用创建成功')
  115. print(f'已注册的蓝图: {list(app.blueprints.keys())}')
  116. " || {
  117. echo_error "应用验证失败"
  118. exit 1
  119. }
  120. echo_info "应用验证通过"
  121. }
  122. # 创建 Gunicorn 配置文件
  123. create_gunicorn_config() {
  124. echo_step "创建 Gunicorn 配置..."
  125. cat > ${APP_DIR}/gunicorn.conf.py << EOF
  126. # Gunicorn 配置文件
  127. import multiprocessing
  128. # 绑定地址
  129. bind = "${GUNICORN_BIND}"
  130. # Worker 进程数
  131. workers = ${GUNICORN_WORKERS}
  132. # Worker 类型
  133. worker_class = "sync"
  134. # 超时时间
  135. timeout = ${GUNICORN_TIMEOUT}
  136. # 优雅重启超时
  137. graceful_timeout = 30
  138. # 保持连接时间
  139. keepalive = 5
  140. # 最大请求数(防止内存泄漏)
  141. max_requests = 1000
  142. max_requests_jitter = 50
  143. # 日志配置
  144. accesslog = "${LOG_DIR}/gunicorn_access.log"
  145. errorlog = "${LOG_DIR}/gunicorn_error.log"
  146. loglevel = "info"
  147. # 进程名
  148. proc_name = "${APP_NAME}"
  149. # 工作目录
  150. chdir = "${APP_DIR}"
  151. # 预加载应用
  152. preload_app = True
  153. # 环境变量
  154. raw_env = [
  155. "FLASK_ENV=production",
  156. ]
  157. EOF
  158. chown ubuntu:ubuntu ${APP_DIR}/gunicorn.conf.py
  159. echo_info "Gunicorn 配置文件已创建: ${APP_DIR}/gunicorn.conf.py"
  160. }
  161. # 创建 WSGI 入口文件
  162. create_wsgi() {
  163. echo_step "创建 WSGI 入口文件..."
  164. cat > ${APP_DIR}/wsgi.py << 'EOF'
  165. """
  166. WSGI 入口文件
  167. 用于 Gunicorn 启动 Flask 应用
  168. """
  169. from app import create_app
  170. application = create_app()
  171. if __name__ == "__main__":
  172. application.run()
  173. EOF
  174. chown ubuntu:ubuntu ${APP_DIR}/wsgi.py
  175. echo_info "WSGI 入口文件已创建: ${APP_DIR}/wsgi.py"
  176. }
  177. # 配置 Supervisor
  178. configure_supervisor() {
  179. echo_step "配置 Supervisor..."
  180. cat > ${SUPERVISOR_CONF} << EOF
  181. [program:${APP_NAME}]
  182. command=${VENV_DIR}/bin/gunicorn -c ${APP_DIR}/gunicorn.conf.py wsgi:application
  183. directory=${APP_DIR}
  184. user=ubuntu
  185. autostart=true
  186. autorestart=true
  187. stopasgroup=true
  188. killasgroup=true
  189. redirect_stderr=true
  190. stdout_logfile=${LOG_DIR}/supervisor_stdout.log
  191. stderr_logfile=${LOG_DIR}/supervisor_stderr.log
  192. environment=FLASK_ENV="production",PATH="${VENV_DIR}/bin:%(ENV_PATH)s"
  193. EOF
  194. echo_info "Supervisor 配置已创建: ${SUPERVISOR_CONF}"
  195. # 重新加载 supervisor 配置
  196. supervisorctl reread
  197. supervisorctl update
  198. echo_info "Supervisor 配置已更新"
  199. }
  200. # 设置文件权限
  201. set_permissions() {
  202. echo_step "设置文件权限..."
  203. chown -R ubuntu:ubuntu ${APP_DIR}
  204. chmod +x ${APP_DIR}/scripts/*.sh 2>/dev/null || true
  205. echo_info "文件权限设置完成"
  206. }
  207. # 启动应用
  208. start_application() {
  209. echo_step "启动应用..."
  210. supervisorctl start ${APP_NAME}
  211. sleep 3
  212. status=$(supervisorctl status ${APP_NAME} | awk '{print $2}')
  213. if [ "$status" = "RUNNING" ]; then
  214. echo_info "应用启动成功!"
  215. supervisorctl status ${APP_NAME}
  216. else
  217. echo_error "应用启动失败,请检查日志"
  218. echo_info "查看日志: tail -f ${LOG_DIR}/gunicorn_error.log"
  219. exit 1
  220. fi
  221. }
  222. # 健康检查
  223. health_check() {
  224. echo_step "执行健康检查..."
  225. local max_retries=5
  226. local retry_interval=3
  227. local retry_count=0
  228. local response=""
  229. local APP_PORT=5500
  230. while [ $retry_count -lt $max_retries ]; do
  231. sleep $retry_interval
  232. retry_count=$((retry_count + 1))
  233. # 尝试健康检查接口(使用应用实际端口 5500)
  234. response=$(curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:${APP_PORT}/api/system/health 2>/dev/null || echo "000")
  235. if [ "$response" = "200" ]; then
  236. echo_info "健康检查通过! HTTP 状态码: ${response}"
  237. return 0
  238. fi
  239. echo_info "尝试 ${retry_count}/${max_retries}: HTTP 状态码 ${response},等待重试..."
  240. done
  241. # 如果 /api/system/health 失败,尝试其他接口作为备选
  242. echo_warn "健康检查接口返回状态码: ${response}"
  243. # 尝试检查 /api/bd/list 接口作为备选(使用 POST 方法)
  244. response=$(curl -s -o /dev/null -w "%{http_code}" -X POST http://127.0.0.1:${APP_PORT}/api/bd/list -H "Content-Type: application/json" -d "{}" 2>/dev/null || echo "000")
  245. if [ "$response" = "200" ] || [ "$response" = "500" ]; then
  246. echo_info "备选接口 /api/bd/list 有响应(${response}),服务已启动!"
  247. return 0
  248. fi
  249. echo_warn "服务可能需要更多时间启动,或健康检查接口配置有问题"
  250. echo_info "请手动检查: curl http://127.0.0.1:${APP_PORT}/api/system/health"
  251. }
  252. # 显示部署信息
  253. show_summary() {
  254. echo ""
  255. echo "=========================================="
  256. echo -e "${GREEN} 部署完成!${NC}"
  257. echo "=========================================="
  258. echo ""
  259. echo "应用信息:"
  260. echo " - 应用名称: ${APP_NAME}"
  261. echo " - 应用目录: ${APP_DIR}"
  262. echo " - 虚拟环境: ${VENV_DIR}"
  263. echo " - 日志目录: ${LOG_DIR}"
  264. echo " - 监听地址: ${GUNICORN_BIND}"
  265. echo ""
  266. echo "常用命令:"
  267. echo " - 启动: sudo ${APP_DIR}/scripts/start_dataops.sh"
  268. echo " - 停止: sudo ${APP_DIR}/scripts/stop_dataops.sh"
  269. echo " - 重启: sudo ${APP_DIR}/scripts/restart_dataops.sh"
  270. echo " - 状态: sudo supervisorctl status ${APP_NAME}"
  271. echo " - 日志: tail -f ${LOG_DIR}/gunicorn_error.log"
  272. echo ""
  273. echo "Supervisor 命令:"
  274. echo " - sudo supervisorctl status"
  275. echo " - sudo supervisorctl restart ${APP_NAME}"
  276. echo " - sudo supervisorctl tail -f ${APP_NAME}"
  277. echo ""
  278. }
  279. # 主函数
  280. main() {
  281. echo "=========================================="
  282. echo " DataOps Platform 部署脚本"
  283. echo "=========================================="
  284. echo ""
  285. check_permissions
  286. check_python
  287. check_supervisor
  288. create_directories
  289. create_venv
  290. install_dependencies
  291. verify_installation
  292. create_gunicorn_config
  293. create_wsgi
  294. configure_supervisor
  295. set_permissions
  296. start_application
  297. health_check
  298. show_summary
  299. }
  300. main "$@"