| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443 |
- import os
- import platform
- def get_bool_env(name: str, default: bool = False) -> bool:
- """Return a boolean value from an environment variable."""
- value = os.environ.get(name)
- if value is None:
- return default
- return value.strip().lower() in {"1", "true", "yes", "on"}
- def get_environment():
- """
- 获取当前运行环境
- 优先级:
- 1. 环境变量 FLASK_ENV
- 2. 根据操作系统自动判断(Windows -> development, Linux -> production)
- """
- # 首先检查环境变量
- env = os.environ.get("FLASK_ENV")
- if env:
- return env.lower()
- # 根据操作系统判断
- system = platform.system().lower()
- if system == "windows":
- return "development"
- elif system == "linux":
- return "production"
- else:
- return "development" # 其他系统默认使用开发环境
- def resolve_log_file(default_filename: str = "flask_production.log") -> str:
- """Resolve application log path to an absolute file under LOG_DIR."""
- raw = os.environ.get("LOG_FILE", default_filename)
- if os.path.isabs(raw):
- return raw
- app_dir = os.environ.get("APP_DIR", os.getcwd())
- log_dir = os.environ.get("LOG_DIR", os.path.join(app_dir, "logs"))
- return os.path.join(log_dir, os.path.basename(raw))
- def _clean_env(name: str, default: str = "") -> str:
- value = os.environ.get(name)
- if value is None:
- return default
- return value.strip().strip("\r\n\t")
- def load_production_env_file() -> None:
- """Load production env file into os.environ without overriding existing keys."""
- env_file = os.environ.get(
- "APP_ENV_FILE",
- os.environ.get("ENV_FILE", "/etc/dataops-platform/dataops.env"),
- )
- if not env_file or not os.path.isfile(env_file):
- return
- if not os.access(env_file, os.R_OK):
- return
- try:
- from dotenv import load_dotenv
- load_dotenv(env_file, override=False)
- except Exception:
- return
- def is_placeholder_env_value(value: str) -> bool:
- """Detect template placeholders that should not override production defaults."""
- if not value:
- return True
- lower = value.lower()
- if lower.startswith("replace-"):
- return True
- placeholders = (
- "replace-password",
- "replace-neo4j-password",
- "replace-minio",
- "replace-with-a-long-random-secret",
- "replace-n8n-api-key",
- "your-api-key",
- )
- if any(item in lower for item in placeholders):
- return True
- if "dataops_user@" in lower and "127.0.0.1" in lower:
- return True
- if lower in {"127.0.0.1:9000", "localhost:9000"}:
- return True
- return False
- def _is_local_minio_host(host: str) -> bool:
- if not host:
- return False
- return host.split(":")[0].lower() in {"127.0.0.1", "localhost"}
- def _apply_minio_config(app, defaults: dict) -> None:
- """Apply MinIO settings as a whole to avoid host/user/password mismatch."""
- host = _clean_env("MINIO_HOST")
- user = _clean_env("MINIO_USER")
- password = _clean_env("MINIO_PASSWORD")
- bucket = _clean_env("MINIO_BUCKET")
- prefix = _clean_env("MINIO_PREFIX")
- is_production = os.environ.get("FLASK_ENV", "").lower() == "production"
- use_defaults = (
- is_placeholder_env_value(user)
- or is_placeholder_env_value(password)
- or is_placeholder_env_value(host)
- or (is_production and _is_local_minio_host(host))
- )
- if use_defaults:
- app.config["MINIO_HOST"] = defaults["MINIO_HOST"]
- app.config["MINIO_USER"] = defaults["MINIO_USER"]
- app.config["MINIO_PASSWORD"] = defaults["MINIO_PASSWORD"]
- app.config["MINIO_BUCKET"] = defaults["MINIO_BUCKET"]
- app.config["PREFIX"] = defaults["MINIO_PREFIX"]
- app.config["MINIO_SECURE"] = defaults["MINIO_SECURE"]
- return
- app.config["MINIO_HOST"] = host or defaults["MINIO_HOST"]
- app.config["MINIO_USER"] = user or defaults["MINIO_USER"]
- app.config["MINIO_PASSWORD"] = password or defaults["MINIO_PASSWORD"]
- app.config["MINIO_BUCKET"] = bucket or defaults["MINIO_BUCKET"]
- app.config["PREFIX"] = prefix if prefix else defaults["MINIO_PREFIX"]
- app.config["MINIO_SECURE"] = get_bool_env("MINIO_SECURE", defaults["MINIO_SECURE"])
- PRODUCTION_SERVICE_DEFAULTS = {
- "SQLALCHEMY_DATABASE_URI": "postgresql://postgres:dataOps@192.168.3.143:5432/dataops",
- "NEO4J_URI": "bolt://192.168.3.143:7687",
- "NEO4J_HTTP_URI": "http://192.168.3.143:7474",
- "NEO4J_USER": "neo4j",
- "NEO4J_PASSWORD": "cituneo4j",
- "NEO4J_ENCRYPTED": False,
- "MINIO_HOST": "192.168.3.143:9000",
- "MINIO_USER": "citu-dataops-acc-key",
- "MINIO_PASSWORD": "citu-dataops-secret-key",
- "MINIO_SECURE": False,
- "MINIO_BUCKET": "dataops-bucket",
- "MINIO_PREFIX": "",
- }
- def _apply_config_from_env(app, config_key: str, env_name: str, default):
- value = _clean_env(env_name)
- if value and not is_placeholder_env_value(value):
- app.config[config_key] = value
- return value
- app.config[config_key] = default
- return default
- def apply_runtime_env_config(app) -> None:
- """Re-read env-backed settings when the worker starts (after dataops.env is loaded)."""
- load_production_env_file()
- defaults = PRODUCTION_SERVICE_DEFAULTS
- _apply_config_from_env(
- app, "SQLALCHEMY_DATABASE_URI", "DATABASE_URL", defaults["SQLALCHEMY_DATABASE_URI"]
- )
- _apply_config_from_env(app, "NEO4J_URI", "NEO4J_URI", defaults["NEO4J_URI"])
- _apply_config_from_env(
- app, "NEO4J_HTTP_URI", "NEO4J_HTTP_URI", defaults["NEO4J_HTTP_URI"]
- )
- _apply_config_from_env(app, "NEO4J_USER", "NEO4J_USER", defaults["NEO4J_USER"])
- _apply_config_from_env(
- app, "NEO4J_PASSWORD", "NEO4J_PASSWORD", defaults["NEO4J_PASSWORD"]
- )
- app.config["NEO4J_ENCRYPTED"] = get_bool_env(
- "NEO4J_ENCRYPTED", defaults["NEO4J_ENCRYPTED"]
- )
- _apply_minio_config(app, defaults)
- secret_key = _clean_env("SECRET_KEY")
- if secret_key and not is_placeholder_env_value(secret_key):
- app.config["SECRET_KEY"] = secret_key
- deepseek_key = _clean_env("DEEPSEEK_API_KEY")
- llm_key = deepseek_key or _clean_env("LLM_API_KEY")
- if deepseek_key and not is_placeholder_env_value(deepseek_key):
- app.config["DEEPSEEK_API_KEY"] = deepseek_key
- app.config["LLM_API_KEY"] = deepseek_key
- elif llm_key and not is_placeholder_env_value(llm_key):
- app.config["LLM_API_KEY"] = llm_key
- _apply_config_from_env(
- app,
- "LLM_BASE_URL",
- "LLM_BASE_URL",
- "https://api.deepseek.com",
- )
- llm_model = _clean_env("LLM_MODEL_NAME")
- if llm_model:
- app.config["LLM_MODEL_NAME"] = llm_model
- llm_reasoning = _clean_env("LLM_REASONING_EFFORT")
- if llm_reasoning:
- app.config["LLM_REASONING_EFFORT"] = llm_reasoning
- def log_llm_env_status(app) -> None:
- """Log LLM env load result after logging is configured."""
- deepseek_key = _clean_env("DEEPSEEK_API_KEY") or _clean_env("LLM_API_KEY")
- if not deepseek_key:
- deepseek_key = str(
- app.config.get("DEEPSEEK_API_KEY") or app.config.get("LLM_API_KEY") or ""
- ).strip().strip("\r\n\t")
- env_file = os.environ.get("APP_ENV_FILE", "/etc/dataops-platform/dataops.env")
- if deepseek_key:
- app.logger.info(
- "DeepSeek API Key 已加载 (长度=%s, base_url=%s, model=%s)",
- len(deepseek_key),
- app.config.get("LLM_BASE_URL", "https://api.deepseek.com"),
- app.config.get("LLM_MODEL_NAME", "deepseek-chat"),
- )
- return
- if os.path.isfile(env_file) and not os.access(env_file, os.R_OK):
- app.logger.error(
- f"无法读取 {env_file},Supervisor 用户需有读权限。"
- f"请执行: sudo chown root:{os.environ.get('APP_USER', 'ubuntu')} "
- f"{env_file} && sudo chmod 640 {env_file}"
- )
- return
- app.logger.warning(
- f"DeepSeek API Key 未配置,请在 {env_file} 中设置 DEEPSEEK_API_KEY"
- )
- def log_service_env_status(app) -> None:
- """Log database/Neo4j endpoints after logging is configured."""
- db_uri = str(app.config.get("SQLALCHEMY_DATABASE_URI", ""))
- db_host = db_uri.split("@")[-1] if "@" in db_uri else db_uri
- app.logger.info(
- "服务连接配置: PostgreSQL=%s, Neo4j=%s, MinIO=%s (user=%s)",
- db_host,
- app.config.get("NEO4J_URI"),
- app.config.get("MINIO_HOST"),
- _mask_secret(str(app.config.get("MINIO_USER", ""))),
- )
- if is_placeholder_env_value(_clean_env("DATABASE_URL")):
- app.logger.warning(
- "DATABASE_URL 仍为模板占位符,已回退到默认生产库 "
- f"{PRODUCTION_SERVICE_DEFAULTS['SQLALCHEMY_DATABASE_URI'].split('@')[-1]}"
- )
- if is_placeholder_env_value(_clean_env("NEO4J_PASSWORD")):
- app.logger.warning(
- "NEO4J_PASSWORD 仍为模板占位符,已回退到 config.py 中的默认生产配置"
- )
- minio_user_env = _clean_env("MINIO_USER")
- minio_host_env = _clean_env("MINIO_HOST")
- if (
- is_placeholder_env_value(minio_user_env)
- or is_placeholder_env_value(minio_host_env)
- or (
- os.environ.get("FLASK_ENV", "").lower() == "production"
- and _is_local_minio_host(minio_host_env)
- )
- ):
- app.logger.warning(
- "MinIO 配置仍为模板或 localhost,已回退到默认生产 MinIO "
- f"{PRODUCTION_SERVICE_DEFAULTS['MINIO_HOST']}"
- )
- def _mask_secret(value: str) -> str:
- if not value:
- return "(empty)"
- if len(value) <= 4:
- return "****"
- return f"{value[:4]}****"
- class BaseConfig:
- """基础配置类,包含所有环境共享的配置"""
- SECRET_KEY = os.environ.get("SECRET_KEY") or "you-will-never-guess"
- JSON_AS_ASCII = False
- JSONIFY_PRETTYPRINT_REGULAR = True
- JSON_SORT_KEYS = False
- # 平台特定配置
- PLATFORM = platform.system().lower()
- # 文件上传配置
- ALLOWED_EXTENSIONS = {
- "txt",
- "pdf",
- "png",
- "jpg",
- "jpeg",
- "gif",
- "xlsx",
- "xls",
- "csv",
- "sql",
- "dll",
- "docx",
- "doc",
- }
- # 数据抽取配置
- DATA_EXTRACT_BATCH_SIZE = 1000 # 每批处理的记录数
- # PostgreSQL 基础配置
- SQLALCHEMY_ENGINE_OPTIONS = {
- "pool_pre_ping": True,
- "pool_recycle": 300,
- "pool_size": 10,
- "max_overflow": 20,
- }
- # DeepSeek LLM(OpenAI 兼容接口)
- DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY", "")
- LLM_BASE_URL = os.environ.get("LLM_BASE_URL", "https://api.deepseek.com")
- LLM_MODEL_NAME = os.environ.get("LLM_MODEL_NAME", "deepseek-chat")
- LLM_REASONING_EFFORT = os.environ.get("LLM_REASONING_EFFORT", "high")
- # 兼容旧环境变量名 LLM_API_KEY
- LLM_API_KEY = DEEPSEEK_API_KEY or os.environ.get("LLM_API_KEY", "")
- # 日志基础配置
- LOG_FORMAT = "%(asctime)s - %(levelname)s - %(filename)s - %(funcName)s - %(lineno)s - %(message)s"
- LOG_ENCODING = "UTF-8"
- LOG_ENABLED = True
- # DataFlow 配置
- DATAFLOW_SCHEMA = os.environ.get("DATAFLOW_SCHEMA", "dags")
- # n8n 工作流引擎配置
- N8N_API_URL = os.environ.get("N8N_API_URL", "https://n8n.citupro.com")
- N8N_API_KEY = os.environ.get(
- "N8N_API_KEY",
- "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiI4MTcyNzlmMC1jNTQwLTQyMTEtYjczYy1mNjU4OTI5NTZhMmUiLCJpc3MiOiJuOG4iLCJhdWQiOiJwdWJsaWMtYXBpIiwiaWF0IjoxNzY2NTcyMDg0fQ.QgiUa5tEM1IGZSxhqFaWtdKvwk1SvoRmqdRovTT254M",
- )
- N8N_API_TIMEOUT = int(os.environ.get("N8N_API_TIMEOUT", "30"))
- # DataOps 平台 API 基础 URL(用于 n8n 工作流回调等)
- API_BASE_URL = os.environ.get(
- "API_BASE_URL", "https://company.citupro.com:18183/api"
- )
- class DevelopmentConfig(BaseConfig):
- """Windows 开发环境配置"""
- FLASK_ENV = "development"
- DEBUG = True
- PORT = 5500
- # 开发环境 MinIO 配置
- MINIO_HOST = "localhost:9000"
- MINIO_USER = "citu-test"
- MINIO_PASSWORD = "citu-test"
- MINIO_SECURE = False
- MINIO_BUCKET = "dataops-bucket"
- PREFIX = ""
- # 开发环境 PostgreSQL 配置
- SQLALCHEMY_DATABASE_URI = "postgresql://postgres:postgres@localhost:5432/dataops"
- # 开发环境 Neo4j 配置
- NEO4J_URI = "bolt://localhost:7687"
- NEO4J_HTTP_URI = "http://localhost:7474"
- NEO4J_USER = "neo4j"
- NEO4J_PASSWORD = "Passw0rd"
- NEO4J_ENCRYPTED = False
- # 开发环境文件路径配置
- UPLOAD_BASE_PATH = "C:\\tmp\\upload"
- ARCHIVE_BASE_PATH = "C:\\tmp\\archive"
- # 开发环境日志配置
- LOG_LEVEL = "DEBUG"
- LOG_FILE = resolve_log_file("flask_development.log")
- LOG_TO_CONSOLE = True
- # 开发环境 Airflow 配置
- AIRFLOW_BASE_URL = "http://localhost:8080"
- AIRFLOW_AUTH_USER = "admin"
- AIRFLOW_AUTH_PASSWORD = "admin"
- class ProductionConfig(BaseConfig):
- """Linux 生产环境配置"""
- FLASK_ENV = "production"
- DEBUG = False
- # 与 run_dataops.sh / dataops.env 中 LISTEN_PORT 保持一致(默认 5500,供 Nginx 反代)
- PORT = int(os.environ.get("LISTEN_PORT", os.environ.get("PORT", "5500")))
- # 生产环境 MinIO 配置
- MINIO_HOST = os.environ.get("MINIO_HOST", "192.168.3.143:9000")
- MINIO_USER = os.environ.get("MINIO_USER", "citu-dataops-acc-key")
- MINIO_PASSWORD = os.environ.get("MINIO_PASSWORD", "citu-dataops-secret-key")
- MINIO_SECURE = get_bool_env("MINIO_SECURE", False)
- MINIO_BUCKET = os.environ.get("MINIO_BUCKET", "dataops-bucket")
- PREFIX = os.environ.get("MINIO_PREFIX", "")
- # 生产环境 PostgreSQL 配置
- SQLALCHEMY_DATABASE_URI = os.environ.get(
- "DATABASE_URL", "postgresql://postgres:dataOps@192.168.3.143:5432/dataops"
- )
- # 生产环境 Neo4j 配置
- NEO4J_URI = os.environ.get("NEO4J_URI", "bolt://192.168.3.143:7687")
- NEO4J_HTTP_URI = os.environ.get("NEO4J_HTTP_URI", "http://192.168.3.143:7474")
- NEO4J_USER = os.environ.get("NEO4J_USER", "neo4j")
- NEO4J_PASSWORD = os.environ.get("NEO4J_PASSWORD", "cituneo4j")
- NEO4J_ENCRYPTED = get_bool_env("NEO4J_ENCRYPTED", False)
- # 生产环境文件路径配置
- UPLOAD_BASE_PATH = os.environ.get("UPLOAD_BASE_PATH", "/data/upload")
- ARCHIVE_BASE_PATH = os.environ.get("ARCHIVE_BASE_PATH", "/data/archive")
- # 生产环境日志配置
- LOG_LEVEL = os.environ.get("LOG_LEVEL", "INFO")
- LOG_FILE = resolve_log_file("flask_production.log")
- LOG_TO_CONSOLE = get_bool_env("LOG_TO_CONSOLE", False)
- # 生产环境 Airflow 配置
- AIRFLOW_BASE_URL = os.environ.get("AIRFLOW_BASE_URL", "http://192.168.3.143:8080")
- AIRFLOW_AUTH_USER = os.environ.get("AIRFLOW_AUTH_USER", "admin")
- AIRFLOW_AUTH_PASSWORD = os.environ.get("AIRFLOW_AUTH_PASSWORD", "admin")
- # 配置字典
- config = {
- "development": DevelopmentConfig,
- "production": ProductionConfig,
- "default": DevelopmentConfig,
- }
- # 获取当前环境
- current_env = get_environment()
|