import os import platform def get_bool_env(name: str, default: bool = False) -> bool: """Return a boolean value from an environment variable.""" value = os.environ.get(name) if value is None: return default return value.strip().lower() in {"1", "true", "yes", "on"} def get_environment(): """ 获取当前运行环境 优先级: 1. 环境变量 FLASK_ENV 2. 根据操作系统自动判断(Windows -> development, Linux -> production) """ # 首先检查环境变量 env = os.environ.get("FLASK_ENV") if env: return env.lower() # 根据操作系统判断 system = platform.system().lower() if system == "windows": return "development" elif system == "linux": return "production" else: return "development" # 其他系统默认使用开发环境 def resolve_log_file(default_filename: str = "flask_production.log") -> str: """Resolve application log path to an absolute file under LOG_DIR.""" raw = os.environ.get("LOG_FILE", default_filename) if os.path.isabs(raw): return raw app_dir = os.environ.get("APP_DIR", os.getcwd()) log_dir = os.environ.get("LOG_DIR", os.path.join(app_dir, "logs")) return os.path.join(log_dir, os.path.basename(raw)) def _clean_env(name: str, default: str = "") -> str: value = os.environ.get(name) if value is None: return default return value.strip().strip("\r\n\t") def load_production_env_file() -> None: """Load production env file into os.environ without overriding existing keys.""" env_file = os.environ.get( "APP_ENV_FILE", os.environ.get("ENV_FILE", "/etc/dataops-platform/dataops.env"), ) if not env_file or not os.path.isfile(env_file): return if not os.access(env_file, os.R_OK): return try: from dotenv import load_dotenv load_dotenv(env_file, override=False) except Exception: return def is_placeholder_env_value(value: str) -> bool: """Detect template placeholders that should not override production defaults.""" if not value: return True lower = value.lower() if lower.startswith("replace-"): return True placeholders = ( "replace-password", "replace-neo4j-password", "replace-minio", "replace-with-a-long-random-secret", "replace-n8n-api-key", "your-api-key", ) if any(item in lower for item in placeholders): return True if "dataops_user@" in lower and "127.0.0.1" in lower: return True if lower in {"127.0.0.1:9000", "localhost:9000"}: return True return False def _is_local_minio_host(host: str) -> bool: if not host: return False return host.split(":")[0].lower() in {"127.0.0.1", "localhost"} def _apply_minio_config(app, defaults: dict) -> None: """Apply MinIO settings as a whole to avoid host/user/password mismatch.""" host = _clean_env("MINIO_HOST") user = _clean_env("MINIO_USER") password = _clean_env("MINIO_PASSWORD") bucket = _clean_env("MINIO_BUCKET") prefix = _clean_env("MINIO_PREFIX") is_production = os.environ.get("FLASK_ENV", "").lower() == "production" use_defaults = ( is_placeholder_env_value(user) or is_placeholder_env_value(password) or is_placeholder_env_value(host) or (is_production and _is_local_minio_host(host)) ) if use_defaults: app.config["MINIO_HOST"] = defaults["MINIO_HOST"] app.config["MINIO_USER"] = defaults["MINIO_USER"] app.config["MINIO_PASSWORD"] = defaults["MINIO_PASSWORD"] app.config["MINIO_BUCKET"] = defaults["MINIO_BUCKET"] app.config["PREFIX"] = defaults["MINIO_PREFIX"] app.config["MINIO_SECURE"] = defaults["MINIO_SECURE"] return app.config["MINIO_HOST"] = host or defaults["MINIO_HOST"] app.config["MINIO_USER"] = user or defaults["MINIO_USER"] app.config["MINIO_PASSWORD"] = password or defaults["MINIO_PASSWORD"] app.config["MINIO_BUCKET"] = bucket or defaults["MINIO_BUCKET"] app.config["PREFIX"] = prefix if prefix else defaults["MINIO_PREFIX"] app.config["MINIO_SECURE"] = get_bool_env("MINIO_SECURE", defaults["MINIO_SECURE"]) PRODUCTION_SERVICE_DEFAULTS = { "SQLALCHEMY_DATABASE_URI": "postgresql://postgres:dataOps@192.168.3.143:5432/dataops", "NEO4J_URI": "bolt://192.168.3.143:7687", "NEO4J_HTTP_URI": "http://192.168.3.143:7474", "NEO4J_USER": "neo4j", "NEO4J_PASSWORD": "cituneo4j", "NEO4J_ENCRYPTED": False, "MINIO_HOST": "192.168.3.143:9000", "MINIO_USER": "citu-dataops-acc-key", "MINIO_PASSWORD": "citu-dataops-secret-key", "MINIO_SECURE": False, "MINIO_BUCKET": "dataops-bucket", "MINIO_PREFIX": "", } def _apply_config_from_env(app, config_key: str, env_name: str, default): value = _clean_env(env_name) if value and not is_placeholder_env_value(value): app.config[config_key] = value return value app.config[config_key] = default return default def apply_runtime_env_config(app) -> None: """Re-read env-backed settings when the worker starts (after dataops.env is loaded).""" load_production_env_file() defaults = PRODUCTION_SERVICE_DEFAULTS _apply_config_from_env( app, "SQLALCHEMY_DATABASE_URI", "DATABASE_URL", defaults["SQLALCHEMY_DATABASE_URI"] ) _apply_config_from_env(app, "NEO4J_URI", "NEO4J_URI", defaults["NEO4J_URI"]) _apply_config_from_env( app, "NEO4J_HTTP_URI", "NEO4J_HTTP_URI", defaults["NEO4J_HTTP_URI"] ) _apply_config_from_env(app, "NEO4J_USER", "NEO4J_USER", defaults["NEO4J_USER"]) _apply_config_from_env( app, "NEO4J_PASSWORD", "NEO4J_PASSWORD", defaults["NEO4J_PASSWORD"] ) app.config["NEO4J_ENCRYPTED"] = get_bool_env( "NEO4J_ENCRYPTED", defaults["NEO4J_ENCRYPTED"] ) _apply_minio_config(app, defaults) secret_key = _clean_env("SECRET_KEY") if secret_key and not is_placeholder_env_value(secret_key): app.config["SECRET_KEY"] = secret_key deepseek_key = _clean_env("DEEPSEEK_API_KEY") llm_key = deepseek_key or _clean_env("LLM_API_KEY") if deepseek_key and not is_placeholder_env_value(deepseek_key): app.config["DEEPSEEK_API_KEY"] = deepseek_key app.config["LLM_API_KEY"] = deepseek_key elif llm_key and not is_placeholder_env_value(llm_key): app.config["LLM_API_KEY"] = llm_key _apply_config_from_env( app, "LLM_BASE_URL", "LLM_BASE_URL", "https://api.deepseek.com", ) llm_model = _clean_env("LLM_MODEL_NAME") if llm_model: app.config["LLM_MODEL_NAME"] = llm_model llm_reasoning = _clean_env("LLM_REASONING_EFFORT") if llm_reasoning: app.config["LLM_REASONING_EFFORT"] = llm_reasoning def log_llm_env_status(app) -> None: """Log LLM env load result after logging is configured.""" deepseek_key = _clean_env("DEEPSEEK_API_KEY") or _clean_env("LLM_API_KEY") if not deepseek_key: deepseek_key = str( app.config.get("DEEPSEEK_API_KEY") or app.config.get("LLM_API_KEY") or "" ).strip().strip("\r\n\t") env_file = os.environ.get("APP_ENV_FILE", "/etc/dataops-platform/dataops.env") if deepseek_key: app.logger.info( "DeepSeek API Key 已加载 (长度=%s, base_url=%s, model=%s)", len(deepseek_key), app.config.get("LLM_BASE_URL", "https://api.deepseek.com"), app.config.get("LLM_MODEL_NAME", "deepseek-chat"), ) return if os.path.isfile(env_file) and not os.access(env_file, os.R_OK): app.logger.error( f"无法读取 {env_file},Supervisor 用户需有读权限。" f"请执行: sudo chown root:{os.environ.get('APP_USER', 'ubuntu')} " f"{env_file} && sudo chmod 640 {env_file}" ) return app.logger.warning( f"DeepSeek API Key 未配置,请在 {env_file} 中设置 DEEPSEEK_API_KEY" ) def log_service_env_status(app) -> None: """Log database/Neo4j endpoints after logging is configured.""" db_uri = str(app.config.get("SQLALCHEMY_DATABASE_URI", "")) db_host = db_uri.split("@")[-1] if "@" in db_uri else db_uri app.logger.info( "服务连接配置: PostgreSQL=%s, Neo4j=%s, MinIO=%s (user=%s)", db_host, app.config.get("NEO4J_URI"), app.config.get("MINIO_HOST"), _mask_secret(str(app.config.get("MINIO_USER", ""))), ) if is_placeholder_env_value(_clean_env("DATABASE_URL")): app.logger.warning( "DATABASE_URL 仍为模板占位符,已回退到默认生产库 " f"{PRODUCTION_SERVICE_DEFAULTS['SQLALCHEMY_DATABASE_URI'].split('@')[-1]}" ) if is_placeholder_env_value(_clean_env("NEO4J_PASSWORD")): app.logger.warning( "NEO4J_PASSWORD 仍为模板占位符,已回退到 config.py 中的默认生产配置" ) minio_user_env = _clean_env("MINIO_USER") minio_host_env = _clean_env("MINIO_HOST") if ( is_placeholder_env_value(minio_user_env) or is_placeholder_env_value(minio_host_env) or ( os.environ.get("FLASK_ENV", "").lower() == "production" and _is_local_minio_host(minio_host_env) ) ): app.logger.warning( "MinIO 配置仍为模板或 localhost,已回退到默认生产 MinIO " f"{PRODUCTION_SERVICE_DEFAULTS['MINIO_HOST']}" ) def _mask_secret(value: str) -> str: if not value: return "(empty)" if len(value) <= 4: return "****" return f"{value[:4]}****" class BaseConfig: """基础配置类,包含所有环境共享的配置""" SECRET_KEY = os.environ.get("SECRET_KEY") or "you-will-never-guess" JSON_AS_ASCII = False JSONIFY_PRETTYPRINT_REGULAR = True JSON_SORT_KEYS = False # 平台特定配置 PLATFORM = platform.system().lower() # 文件上传配置 ALLOWED_EXTENSIONS = { "txt", "pdf", "png", "jpg", "jpeg", "gif", "xlsx", "xls", "csv", "sql", "dll", "docx", "doc", } # 数据抽取配置 DATA_EXTRACT_BATCH_SIZE = 1000 # 每批处理的记录数 # PostgreSQL 基础配置 SQLALCHEMY_ENGINE_OPTIONS = { "pool_pre_ping": True, "pool_recycle": 300, "pool_size": 10, "max_overflow": 20, } # DeepSeek LLM(OpenAI 兼容接口) DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY", "") LLM_BASE_URL = os.environ.get("LLM_BASE_URL", "https://api.deepseek.com") LLM_MODEL_NAME = os.environ.get("LLM_MODEL_NAME", "deepseek-chat") LLM_REASONING_EFFORT = os.environ.get("LLM_REASONING_EFFORT", "high") # 兼容旧环境变量名 LLM_API_KEY LLM_API_KEY = DEEPSEEK_API_KEY or os.environ.get("LLM_API_KEY", "") # 日志基础配置 LOG_FORMAT = "%(asctime)s - %(levelname)s - %(filename)s - %(funcName)s - %(lineno)s - %(message)s" LOG_ENCODING = "UTF-8" LOG_ENABLED = True # DataFlow 配置 DATAFLOW_SCHEMA = os.environ.get("DATAFLOW_SCHEMA", "dags") # n8n 工作流引擎配置 N8N_API_URL = os.environ.get("N8N_API_URL", "https://n8n.citupro.com") N8N_API_KEY = os.environ.get( "N8N_API_KEY", "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiI4MTcyNzlmMC1jNTQwLTQyMTEtYjczYy1mNjU4OTI5NTZhMmUiLCJpc3MiOiJuOG4iLCJhdWQiOiJwdWJsaWMtYXBpIiwiaWF0IjoxNzY2NTcyMDg0fQ.QgiUa5tEM1IGZSxhqFaWtdKvwk1SvoRmqdRovTT254M", ) N8N_API_TIMEOUT = int(os.environ.get("N8N_API_TIMEOUT", "30")) # DataOps 平台 API 基础 URL(用于 n8n 工作流回调等) API_BASE_URL = os.environ.get( "API_BASE_URL", "https://company.citupro.com:18183/api" ) class DevelopmentConfig(BaseConfig): """Windows 开发环境配置""" FLASK_ENV = "development" DEBUG = True PORT = 5500 # 开发环境 MinIO 配置 MINIO_HOST = "localhost:9000" MINIO_USER = "citu-test" MINIO_PASSWORD = "citu-test" MINIO_SECURE = False MINIO_BUCKET = "dataops-bucket" PREFIX = "" # 开发环境 PostgreSQL 配置 SQLALCHEMY_DATABASE_URI = "postgresql://postgres:postgres@localhost:5432/dataops" # 开发环境 Neo4j 配置 NEO4J_URI = "bolt://localhost:7687" NEO4J_HTTP_URI = "http://localhost:7474" NEO4J_USER = "neo4j" NEO4J_PASSWORD = "Passw0rd" NEO4J_ENCRYPTED = False # 开发环境文件路径配置 UPLOAD_BASE_PATH = "C:\\tmp\\upload" ARCHIVE_BASE_PATH = "C:\\tmp\\archive" # 开发环境日志配置 LOG_LEVEL = "DEBUG" LOG_FILE = resolve_log_file("flask_development.log") LOG_TO_CONSOLE = True # 开发环境 Airflow 配置 AIRFLOW_BASE_URL = "http://localhost:8080" AIRFLOW_AUTH_USER = "admin" AIRFLOW_AUTH_PASSWORD = "admin" class ProductionConfig(BaseConfig): """Linux 生产环境配置""" FLASK_ENV = "production" DEBUG = False # 与 run_dataops.sh / dataops.env 中 LISTEN_PORT 保持一致(默认 5500,供 Nginx 反代) PORT = int(os.environ.get("LISTEN_PORT", os.environ.get("PORT", "5500"))) # 生产环境 MinIO 配置 MINIO_HOST = os.environ.get("MINIO_HOST", "192.168.3.143:9000") MINIO_USER = os.environ.get("MINIO_USER", "citu-dataops-acc-key") MINIO_PASSWORD = os.environ.get("MINIO_PASSWORD", "citu-dataops-secret-key") MINIO_SECURE = get_bool_env("MINIO_SECURE", False) MINIO_BUCKET = os.environ.get("MINIO_BUCKET", "dataops-bucket") PREFIX = os.environ.get("MINIO_PREFIX", "") # 生产环境 PostgreSQL 配置 SQLALCHEMY_DATABASE_URI = os.environ.get( "DATABASE_URL", "postgresql://postgres:dataOps@192.168.3.143:5432/dataops" ) # 生产环境 Neo4j 配置 NEO4J_URI = os.environ.get("NEO4J_URI", "bolt://192.168.3.143:7687") NEO4J_HTTP_URI = os.environ.get("NEO4J_HTTP_URI", "http://192.168.3.143:7474") NEO4J_USER = os.environ.get("NEO4J_USER", "neo4j") NEO4J_PASSWORD = os.environ.get("NEO4J_PASSWORD", "cituneo4j") NEO4J_ENCRYPTED = get_bool_env("NEO4J_ENCRYPTED", False) # 生产环境文件路径配置 UPLOAD_BASE_PATH = os.environ.get("UPLOAD_BASE_PATH", "/data/upload") ARCHIVE_BASE_PATH = os.environ.get("ARCHIVE_BASE_PATH", "/data/archive") # 生产环境日志配置 LOG_LEVEL = os.environ.get("LOG_LEVEL", "INFO") LOG_FILE = resolve_log_file("flask_production.log") LOG_TO_CONSOLE = get_bool_env("LOG_TO_CONSOLE", False) # 生产环境 Airflow 配置 AIRFLOW_BASE_URL = os.environ.get("AIRFLOW_BASE_URL", "http://192.168.3.143:8080") AIRFLOW_AUTH_USER = os.environ.get("AIRFLOW_AUTH_USER", "admin") AIRFLOW_AUTH_PASSWORD = os.environ.get("AIRFLOW_AUTH_PASSWORD", "admin") # 配置字典 config = { "development": DevelopmentConfig, "production": ProductionConfig, "default": DevelopmentConfig, } # 获取当前环境 current_env = get_environment()