| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354 |
- #!/usr/bin/env bash
- # shellcheck disable=SC2034
- # Shared configuration and helpers for DataOps production scripts.
- APP_NAME="${APP_NAME:-dataops-platform}"
- APP_DIR="${APP_DIR:-/opt/dataops-platform}"
- APP_USER="${APP_USER:-ubuntu}"
- APP_GROUP="${APP_GROUP:-$APP_USER}"
- VENV_DIR="${APP_DIR}/venv"
- LOG_DIR="${APP_DIR}/logs"
- SCRIPTS_DIR="${APP_DIR}/scripts"
- RUN_SCRIPT="${SCRIPTS_DIR}/run_dataops.sh"
- ENV_DIR="${ENV_DIR:-/etc/dataops-platform}"
- ENV_FILE="${ENV_FILE:-${ENV_DIR}/dataops.env}"
- LISTEN_HOST="${LISTEN_HOST:-0.0.0.0}"
- LISTEN_PORT="${LISTEN_PORT:-5500}"
- GUNICORN_WORKERS="${GUNICORN_WORKERS:-4}"
- GUNICORN_TIMEOUT="${GUNICORN_TIMEOUT:-120}"
- SUPERVISOR_CONF="/etc/supervisor/conf.d/${APP_NAME}.conf"
- SUPERVISOR_LOG="/var/log/supervisor/${APP_NAME}.log"
- RED='\033[0;31m'
- GREEN='\033[0;32m'
- YELLOW='\033[1;33m'
- BLUE='\033[0;34m'
- NC='\033[0m'
- echo_info() { echo -e "${GREEN}[INFO]${NC} $1"; }
- echo_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
- echo_error() { echo -e "${RED}[ERROR]${NC} $1"; }
- echo_step() { echo -e "${BLUE}[STEP]${NC} $1"; }
- # Windows 编辑的脚本可能带 CRLF,bash 会报 $'\r': command not found
- normalize_shell_scripts() {
- local path
- for path in "$@"; do
- if [[ -d "${path}" ]]; then
- local file
- for file in "${path}"/*.sh; do
- [[ -f "${file}" ]] && sed -i 's/\r$//' "${file}"
- done
- elif [[ -f "${path}" ]]; then
- sed -i 's/\r$//' "${path}"
- fi
- done
- }
- # Windows 编辑的 env 文件带 CRLF/BOM 时,source 会报 line 1: #: command not found
- normalize_env_file() {
- local file="${1:-${ENV_FILE}}"
- [[ -f "${file}" && -w "${file}" ]] || return 0
- sed -i 's/\r$//' "${file}"
- sed -i '1s/^\xEF\xBB\xBF//' "${file}" 2>/dev/null || true
- }
- source_env_file() {
- local file="${1:-${ENV_FILE}}"
- [[ -f "${file}" ]] || return 0
- if [[ ! -r "${file}" ]]; then
- echo_error "无法读取环境变量文件: ${file} (当前用户: $(id -un))"
- echo_info "请执行: sudo chown root:${APP_GROUP} ${file} && sudo chmod 640 ${file}"
- return 1
- fi
- set -a
- # shellcheck disable=SC1090
- source <(sed 's/\r$//' "${file}" | sed '1s/^\xEF\xBB\xBF//')
- set +a
- }
- ensure_env_file_permissions() {
- [[ -f "${ENV_FILE}" ]] || return 0
- chown root:"${APP_GROUP}" "${ENV_FILE}" 2>/dev/null \
- || chown "${APP_USER}:${APP_GROUP}" "${ENV_FILE}" 2>/dev/null \
- || true
- chmod 640 "${ENV_FILE}" 2>/dev/null || chmod 600 "${ENV_FILE}" 2>/dev/null || true
- }
- require_root() {
- if [[ "${EUID}" -ne 0 ]]; then
- echo_error "请使用 sudo 运行此脚本"
- exit 1
- fi
- }
- load_env_file() {
- normalize_env_file
- source_env_file
- LISTEN_HOST="${LISTEN_HOST:-0.0.0.0}"
- LISTEN_PORT="${LISTEN_PORT:-5500}"
- GUNICORN_WORKERS="${GUNICORN_WORKERS:-4}"
- GUNICORN_TIMEOUT="${GUNICORN_TIMEOUT:-120}"
- }
- resolve_listen_port() {
- load_env_file
- echo "${LISTEN_PORT}"
- }
- ensure_env_file() {
- if [[ -f "${ENV_FILE}" ]]; then
- echo_info "环境变量文件: ${ENV_FILE}"
- return 0
- fi
- local script_dir="${1:-}"
- local candidates=()
- if [[ -n "${script_dir}" ]]; then
- candidates+=(
- "${script_dir}/../deployment/dataops.env"
- "${APP_DIR}/deployment/dataops.env"
- )
- fi
- candidates+=(
- "${APP_DIR}/deployment/dataops.env"
- "${APP_DIR}/dataops.env"
- )
- local candidate
- for candidate in "${candidates[@]}"; do
- if [[ -f "${candidate}" ]]; then
- mkdir -p "${ENV_DIR}"
- install -m 640 "${candidate}" "${ENV_FILE}"
- chown root:"${APP_GROUP}" "${ENV_FILE}" 2>/dev/null || true
- normalize_env_file "${ENV_FILE}"
- echo_info "已从 ${candidate} 安装环境变量到 ${ENV_FILE}"
- return 0
- fi
- done
- echo_error "未找到 ${ENV_FILE}"
- echo_info "请创建该文件,或将 deployment/dataops.env 复制到 ${ENV_FILE} 后重试"
- exit 1
- }
- check_env_file() {
- if [[ ! -f "${ENV_FILE}" ]]; then
- echo_error "环境变量文件不存在: ${ENV_FILE}"
- echo_info "请先运行: sudo ${APP_DIR}/scripts/deploy_dataops.sh"
- exit 1
- fi
- if [[ "${EUID}" -eq 0 ]]; then
- ensure_env_file_permissions
- elif [[ ! -r "${ENV_FILE}" ]]; then
- echo_error "无法读取环境变量文件: ${ENV_FILE} (当前用户: $(id -un))"
- echo_info "请执行: sudo chown root:${APP_GROUP} ${ENV_FILE} && sudo chmod 640 ${ENV_FILE}"
- exit 1
- fi
- }
- check_venv() {
- if [[ ! -d "${VENV_DIR}" ]]; then
- echo_error "虚拟环境不存在: ${VENV_DIR}"
- echo_info "请先运行: sudo ${APP_DIR}/scripts/deploy_dataops.sh"
- exit 1
- fi
- }
- check_run_script() {
- if [[ ! -x "${RUN_SCRIPT}" ]]; then
- echo_error "启动脚本不存在或不可执行: ${RUN_SCRIPT}"
- echo_info "请先运行: sudo ${SCRIPTS_DIR}/deploy_dataops.sh"
- exit 1
- fi
- }
- check_supervisor() {
- if ! command -v supervisorctl >/dev/null 2>&1; then
- echo_error "supervisorctl 未安装"
- exit 1
- fi
- if ! pgrep -x supervisord >/dev/null 2>&1; then
- echo_warn "supervisord 未运行,正在启动..."
- supervisord -c /etc/supervisor/supervisord.conf || systemctl start supervisor
- sleep 2
- fi
- }
- is_port_listening() {
- local app_port="$1"
- if command -v ss >/dev/null 2>&1; then
- ss -ltn | grep -q ":${app_port} "
- return $?
- fi
- if command -v netstat >/dev/null 2>&1; then
- netstat -ltn | grep -q ":${app_port} "
- return $?
- fi
- return 1
- }
- diagnose_service() {
- local app_port
- app_port="$(resolve_listen_port)"
- echo_warn "========== 诊断信息 =========="
- echo_info "Supervisor 状态:"
- supervisorctl status "${APP_NAME}" || true
- echo_info "Supervisor 配置 command:"
- grep -E '^command=' "${SUPERVISOR_CONF}" 2>/dev/null || true
- echo_info "端口 ${app_port} 监听情况:"
- if command -v ss >/dev/null 2>&1; then
- ss -ltnp | grep -E ":${app_port}\\b" || echo " (未监听)"
- elif command -v netstat >/dev/null 2>&1; then
- netstat -ltnp 2>/dev/null | grep -E ":${app_port}\\b" || echo " (未监听)"
- fi
- echo_info "Gunicorn 进程:"
- pgrep -af gunicorn || echo " (无 gunicorn 进程)"
- echo_info "最近 Supervisor 日志 (${SUPERVISOR_LOG}):"
- tail -n 30 "${SUPERVISOR_LOG}" 2>/dev/null || echo " (无法读取)"
- echo_info "最近 Gunicorn 错误日志 (${LOG_DIR}/gunicorn_error.log):"
- tail -n 30 "${LOG_DIR}/gunicorn_error.log" 2>/dev/null || echo " (无法读取)"
- echo_warn "=============================="
- }
- health_check() {
- local app_port
- app_port="$(resolve_listen_port)"
- echo_info "健康检查: http://127.0.0.1:${app_port}/api/system/health"
- local max_retries=8
- local retry_interval=3
- local retry_count=0
- local response="000"
- while [[ ${retry_count} -lt ${max_retries} ]]; do
- sleep "${retry_interval}"
- retry_count=$((retry_count + 1))
- if ! is_port_listening "${app_port}"; then
- echo_info "尝试 ${retry_count}/${max_retries}: 端口 ${app_port} 尚未监听,等待..."
- continue
- fi
- response="$(curl -s -o /dev/null -w '%{http_code}' \
- "http://127.0.0.1:${app_port}/api/system/health" 2>/dev/null)" || response="000"
- if [[ "${response}" == "200" ]]; then
- echo_info "健康检查通过 (HTTP ${response})"
- return 0
- fi
- echo_info "尝试 ${retry_count}/${max_retries}: HTTP ${response},等待重试..."
- done
- echo_warn "健康检查失败,最近 HTTP 状态码: ${response}"
- diagnose_service
- return 1
- }
- ensure_wsgi() {
- if [[ -f "${APP_DIR}/wsgi.py" ]]; then
- :
- else
- cat >"${APP_DIR}/wsgi.py" <<'EOF'
- """WSGI entry point for production deployment."""
- from app import create_app
- application = create_app()
- app = application
- EOF
- chown "${APP_USER}:${APP_GROUP}" "${APP_DIR}/wsgi.py"
- echo_info "已创建 ${APP_DIR}/wsgi.py"
- fi
- ensure_gunicorn_config "${1:-}"
- }
- ensure_gunicorn_config() {
- if [[ -f "${APP_DIR}/gunicorn_config.py" ]]; then
- return 0
- fi
- local script_dir="${1:-}"
- local candidates=()
- if [[ -n "${script_dir}" ]]; then
- candidates+=(
- "${script_dir}/../gunicorn_config.py"
- "${script_dir}/gunicorn_config.py"
- )
- fi
- candidates+=(
- "${APP_DIR}/gunicorn_config.py"
- "${APP_DIR}/deployment/gunicorn_config.py"
- )
- local candidate
- for candidate in "${candidates[@]}"; do
- if [[ -f "${candidate}" ]]; then
- install -m 644 "${candidate}" "${APP_DIR}/gunicorn_config.py"
- chown "${APP_USER}:${APP_GROUP}" "${APP_DIR}/gunicorn_config.py"
- echo_info "已安装 ${APP_DIR}/gunicorn_config.py"
- return 0
- fi
- done
- echo_error "缺少 ${APP_DIR}/gunicorn_config.py"
- echo_info "请将 gunicorn_config.py 放到应用目录后重试"
- exit 1
- }
- remove_legacy_run_script() {
- if [[ -f "${APP_DIR}/run_dataops.sh" ]]; then
- rm -f "${APP_DIR}/run_dataops.sh"
- echo_warn "已移除旧版 ${APP_DIR}/run_dataops.sh,统一使用 ${RUN_SCRIPT}"
- fi
- }
- ensure_run_script() {
- if [[ ! -f "${RUN_SCRIPT}" ]]; then
- echo_error "启动脚本不存在: ${RUN_SCRIPT}"
- echo_info "请先运行: sudo ${SCRIPTS_DIR}/deploy_dataops.sh"
- exit 1
- fi
- chmod 755 "${RUN_SCRIPT}"
- chown "${APP_USER}:${APP_GROUP}" "${RUN_SCRIPT}" 2>/dev/null || true
- normalize_shell_scripts "${RUN_SCRIPT}"
- remove_legacy_run_script
- }
- remove_legacy_gunicorn_config() {
- if [[ -f "${APP_DIR}/gunicorn.conf.py" ]]; then
- mv "${APP_DIR}/gunicorn.conf.py" "${APP_DIR}/gunicorn.conf.py.bak.$(date +%Y%m%d%H%M%S)"
- echo_warn "已备份旧 gunicorn.conf.py(曾可能导致错误端口 80 绑定)"
- fi
- }
- configure_supervisor() {
- remove_legacy_gunicorn_config
- ensure_run_script
- mkdir -p "${LOG_DIR}" /var/log/supervisor
- chown -R "${APP_USER}:${APP_GROUP}" "${LOG_DIR}"
- cat >"${SUPERVISOR_CONF}" <<EOF
- [program:${APP_NAME}]
- command=/usr/bin/env bash ${RUN_SCRIPT}
- directory=${APP_DIR}
- user=${APP_USER}
- autostart=true
- autorestart=true
- startsecs=8
- startretries=5
- stopasgroup=true
- killasgroup=true
- redirect_stderr=true
- stdout_logfile=${SUPERVISOR_LOG}
- stdout_logfile_maxbytes=50MB
- stdout_logfile_backups=5
- environment=FLASK_ENV="production",APP_ENV_FILE="${ENV_FILE}",APP_DIR="${APP_DIR}",PATH="${VENV_DIR}/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
- EOF
- supervisorctl reread
- supervisorctl update
- echo_info "Supervisor 配置已更新: ${SUPERVISOR_CONF}"
- }
|