config.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443
  1. import os
  2. import platform
  3. def get_bool_env(name: str, default: bool = False) -> bool:
  4. """Return a boolean value from an environment variable."""
  5. value = os.environ.get(name)
  6. if value is None:
  7. return default
  8. return value.strip().lower() in {"1", "true", "yes", "on"}
  9. def get_environment():
  10. """
  11. 获取当前运行环境
  12. 优先级:
  13. 1. 环境变量 FLASK_ENV
  14. 2. 根据操作系统自动判断(Windows -> development, Linux -> production)
  15. """
  16. # 首先检查环境变量
  17. env = os.environ.get("FLASK_ENV")
  18. if env:
  19. return env.lower()
  20. # 根据操作系统判断
  21. system = platform.system().lower()
  22. if system == "windows":
  23. return "development"
  24. elif system == "linux":
  25. return "production"
  26. else:
  27. return "development" # 其他系统默认使用开发环境
  28. def resolve_log_file(default_filename: str = "flask_production.log") -> str:
  29. """Resolve application log path to an absolute file under LOG_DIR."""
  30. raw = os.environ.get("LOG_FILE", default_filename)
  31. if os.path.isabs(raw):
  32. return raw
  33. app_dir = os.environ.get("APP_DIR", os.getcwd())
  34. log_dir = os.environ.get("LOG_DIR", os.path.join(app_dir, "logs"))
  35. return os.path.join(log_dir, os.path.basename(raw))
  36. def _clean_env(name: str, default: str = "") -> str:
  37. value = os.environ.get(name)
  38. if value is None:
  39. return default
  40. return value.strip().strip("\r\n\t")
  41. def load_production_env_file() -> None:
  42. """Load production env file into os.environ without overriding existing keys."""
  43. env_file = os.environ.get(
  44. "APP_ENV_FILE",
  45. os.environ.get("ENV_FILE", "/etc/dataops-platform/dataops.env"),
  46. )
  47. if not env_file or not os.path.isfile(env_file):
  48. return
  49. if not os.access(env_file, os.R_OK):
  50. return
  51. try:
  52. from dotenv import load_dotenv
  53. load_dotenv(env_file, override=False)
  54. except Exception:
  55. return
  56. def is_placeholder_env_value(value: str) -> bool:
  57. """Detect template placeholders that should not override production defaults."""
  58. if not value:
  59. return True
  60. lower = value.lower()
  61. if lower.startswith("replace-"):
  62. return True
  63. placeholders = (
  64. "replace-password",
  65. "replace-neo4j-password",
  66. "replace-minio",
  67. "replace-with-a-long-random-secret",
  68. "replace-n8n-api-key",
  69. "your-api-key",
  70. )
  71. if any(item in lower for item in placeholders):
  72. return True
  73. if "dataops_user@" in lower and "127.0.0.1" in lower:
  74. return True
  75. if lower in {"127.0.0.1:9000", "localhost:9000"}:
  76. return True
  77. return False
  78. def _is_local_minio_host(host: str) -> bool:
  79. if not host:
  80. return False
  81. return host.split(":")[0].lower() in {"127.0.0.1", "localhost"}
  82. def _apply_minio_config(app, defaults: dict) -> None:
  83. """Apply MinIO settings as a whole to avoid host/user/password mismatch."""
  84. host = _clean_env("MINIO_HOST")
  85. user = _clean_env("MINIO_USER")
  86. password = _clean_env("MINIO_PASSWORD")
  87. bucket = _clean_env("MINIO_BUCKET")
  88. prefix = _clean_env("MINIO_PREFIX")
  89. is_production = os.environ.get("FLASK_ENV", "").lower() == "production"
  90. use_defaults = (
  91. is_placeholder_env_value(user)
  92. or is_placeholder_env_value(password)
  93. or is_placeholder_env_value(host)
  94. or (is_production and _is_local_minio_host(host))
  95. )
  96. if use_defaults:
  97. app.config["MINIO_HOST"] = defaults["MINIO_HOST"]
  98. app.config["MINIO_USER"] = defaults["MINIO_USER"]
  99. app.config["MINIO_PASSWORD"] = defaults["MINIO_PASSWORD"]
  100. app.config["MINIO_BUCKET"] = defaults["MINIO_BUCKET"]
  101. app.config["PREFIX"] = defaults["MINIO_PREFIX"]
  102. app.config["MINIO_SECURE"] = defaults["MINIO_SECURE"]
  103. return
  104. app.config["MINIO_HOST"] = host or defaults["MINIO_HOST"]
  105. app.config["MINIO_USER"] = user or defaults["MINIO_USER"]
  106. app.config["MINIO_PASSWORD"] = password or defaults["MINIO_PASSWORD"]
  107. app.config["MINIO_BUCKET"] = bucket or defaults["MINIO_BUCKET"]
  108. app.config["PREFIX"] = prefix if prefix else defaults["MINIO_PREFIX"]
  109. app.config["MINIO_SECURE"] = get_bool_env("MINIO_SECURE", defaults["MINIO_SECURE"])
  110. PRODUCTION_SERVICE_DEFAULTS = {
  111. "SQLALCHEMY_DATABASE_URI": "postgresql://postgres:dataOps@192.168.3.143:5432/dataops",
  112. "NEO4J_URI": "bolt://192.168.3.143:7687",
  113. "NEO4J_HTTP_URI": "http://192.168.3.143:7474",
  114. "NEO4J_USER": "neo4j",
  115. "NEO4J_PASSWORD": "cituneo4j",
  116. "NEO4J_ENCRYPTED": False,
  117. "MINIO_HOST": "192.168.3.143:9000",
  118. "MINIO_USER": "citu-dataops-acc-key",
  119. "MINIO_PASSWORD": "citu-dataops-secret-key",
  120. "MINIO_SECURE": False,
  121. "MINIO_BUCKET": "dataops-bucket",
  122. "MINIO_PREFIX": "",
  123. }
  124. def _apply_config_from_env(app, config_key: str, env_name: str, default):
  125. value = _clean_env(env_name)
  126. if value and not is_placeholder_env_value(value):
  127. app.config[config_key] = value
  128. return value
  129. app.config[config_key] = default
  130. return default
  131. def apply_runtime_env_config(app) -> None:
  132. """Re-read env-backed settings when the worker starts (after dataops.env is loaded)."""
  133. load_production_env_file()
  134. defaults = PRODUCTION_SERVICE_DEFAULTS
  135. _apply_config_from_env(
  136. app, "SQLALCHEMY_DATABASE_URI", "DATABASE_URL", defaults["SQLALCHEMY_DATABASE_URI"]
  137. )
  138. _apply_config_from_env(app, "NEO4J_URI", "NEO4J_URI", defaults["NEO4J_URI"])
  139. _apply_config_from_env(
  140. app, "NEO4J_HTTP_URI", "NEO4J_HTTP_URI", defaults["NEO4J_HTTP_URI"]
  141. )
  142. _apply_config_from_env(app, "NEO4J_USER", "NEO4J_USER", defaults["NEO4J_USER"])
  143. _apply_config_from_env(
  144. app, "NEO4J_PASSWORD", "NEO4J_PASSWORD", defaults["NEO4J_PASSWORD"]
  145. )
  146. app.config["NEO4J_ENCRYPTED"] = get_bool_env(
  147. "NEO4J_ENCRYPTED", defaults["NEO4J_ENCRYPTED"]
  148. )
  149. _apply_minio_config(app, defaults)
  150. secret_key = _clean_env("SECRET_KEY")
  151. if secret_key and not is_placeholder_env_value(secret_key):
  152. app.config["SECRET_KEY"] = secret_key
  153. deepseek_key = _clean_env("DEEPSEEK_API_KEY")
  154. llm_key = deepseek_key or _clean_env("LLM_API_KEY")
  155. if deepseek_key and not is_placeholder_env_value(deepseek_key):
  156. app.config["DEEPSEEK_API_KEY"] = deepseek_key
  157. app.config["LLM_API_KEY"] = deepseek_key
  158. elif llm_key and not is_placeholder_env_value(llm_key):
  159. app.config["LLM_API_KEY"] = llm_key
  160. _apply_config_from_env(
  161. app,
  162. "LLM_BASE_URL",
  163. "LLM_BASE_URL",
  164. "https://api.deepseek.com",
  165. )
  166. llm_model = _clean_env("LLM_MODEL_NAME")
  167. if llm_model:
  168. app.config["LLM_MODEL_NAME"] = llm_model
  169. llm_reasoning = _clean_env("LLM_REASONING_EFFORT")
  170. if llm_reasoning:
  171. app.config["LLM_REASONING_EFFORT"] = llm_reasoning
  172. def log_llm_env_status(app) -> None:
  173. """Log LLM env load result after logging is configured."""
  174. deepseek_key = _clean_env("DEEPSEEK_API_KEY") or _clean_env("LLM_API_KEY")
  175. if not deepseek_key:
  176. deepseek_key = str(
  177. app.config.get("DEEPSEEK_API_KEY") or app.config.get("LLM_API_KEY") or ""
  178. ).strip().strip("\r\n\t")
  179. env_file = os.environ.get("APP_ENV_FILE", "/etc/dataops-platform/dataops.env")
  180. if deepseek_key:
  181. app.logger.info(
  182. "DeepSeek API Key 已加载 (长度=%s, base_url=%s, model=%s)",
  183. len(deepseek_key),
  184. app.config.get("LLM_BASE_URL", "https://api.deepseek.com"),
  185. app.config.get("LLM_MODEL_NAME", "deepseek-chat"),
  186. )
  187. return
  188. if os.path.isfile(env_file) and not os.access(env_file, os.R_OK):
  189. app.logger.error(
  190. f"无法读取 {env_file},Supervisor 用户需有读权限。"
  191. f"请执行: sudo chown root:{os.environ.get('APP_USER', 'ubuntu')} "
  192. f"{env_file} && sudo chmod 640 {env_file}"
  193. )
  194. return
  195. app.logger.warning(
  196. f"DeepSeek API Key 未配置,请在 {env_file} 中设置 DEEPSEEK_API_KEY"
  197. )
  198. def log_service_env_status(app) -> None:
  199. """Log database/Neo4j endpoints after logging is configured."""
  200. db_uri = str(app.config.get("SQLALCHEMY_DATABASE_URI", ""))
  201. db_host = db_uri.split("@")[-1] if "@" in db_uri else db_uri
  202. app.logger.info(
  203. "服务连接配置: PostgreSQL=%s, Neo4j=%s, MinIO=%s (user=%s)",
  204. db_host,
  205. app.config.get("NEO4J_URI"),
  206. app.config.get("MINIO_HOST"),
  207. _mask_secret(str(app.config.get("MINIO_USER", ""))),
  208. )
  209. if is_placeholder_env_value(_clean_env("DATABASE_URL")):
  210. app.logger.warning(
  211. "DATABASE_URL 仍为模板占位符,已回退到默认生产库 "
  212. f"{PRODUCTION_SERVICE_DEFAULTS['SQLALCHEMY_DATABASE_URI'].split('@')[-1]}"
  213. )
  214. if is_placeholder_env_value(_clean_env("NEO4J_PASSWORD")):
  215. app.logger.warning(
  216. "NEO4J_PASSWORD 仍为模板占位符,已回退到 config.py 中的默认生产配置"
  217. )
  218. minio_user_env = _clean_env("MINIO_USER")
  219. minio_host_env = _clean_env("MINIO_HOST")
  220. if (
  221. is_placeholder_env_value(minio_user_env)
  222. or is_placeholder_env_value(minio_host_env)
  223. or (
  224. os.environ.get("FLASK_ENV", "").lower() == "production"
  225. and _is_local_minio_host(minio_host_env)
  226. )
  227. ):
  228. app.logger.warning(
  229. "MinIO 配置仍为模板或 localhost,已回退到默认生产 MinIO "
  230. f"{PRODUCTION_SERVICE_DEFAULTS['MINIO_HOST']}"
  231. )
  232. def _mask_secret(value: str) -> str:
  233. if not value:
  234. return "(empty)"
  235. if len(value) <= 4:
  236. return "****"
  237. return f"{value[:4]}****"
  238. class BaseConfig:
  239. """基础配置类,包含所有环境共享的配置"""
  240. SECRET_KEY = os.environ.get("SECRET_KEY") or "you-will-never-guess"
  241. JSON_AS_ASCII = False
  242. JSONIFY_PRETTYPRINT_REGULAR = True
  243. JSON_SORT_KEYS = False
  244. # 平台特定配置
  245. PLATFORM = platform.system().lower()
  246. # 文件上传配置
  247. ALLOWED_EXTENSIONS = {
  248. "txt",
  249. "pdf",
  250. "png",
  251. "jpg",
  252. "jpeg",
  253. "gif",
  254. "xlsx",
  255. "xls",
  256. "csv",
  257. "sql",
  258. "dll",
  259. "docx",
  260. "doc",
  261. }
  262. # 数据抽取配置
  263. DATA_EXTRACT_BATCH_SIZE = 1000 # 每批处理的记录数
  264. # PostgreSQL 基础配置
  265. SQLALCHEMY_ENGINE_OPTIONS = {
  266. "pool_pre_ping": True,
  267. "pool_recycle": 300,
  268. "pool_size": 10,
  269. "max_overflow": 20,
  270. }
  271. # DeepSeek LLM(OpenAI 兼容接口)
  272. DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY", "")
  273. LLM_BASE_URL = os.environ.get("LLM_BASE_URL", "https://api.deepseek.com")
  274. LLM_MODEL_NAME = os.environ.get("LLM_MODEL_NAME", "deepseek-chat")
  275. LLM_REASONING_EFFORT = os.environ.get("LLM_REASONING_EFFORT", "high")
  276. # 兼容旧环境变量名 LLM_API_KEY
  277. LLM_API_KEY = DEEPSEEK_API_KEY or os.environ.get("LLM_API_KEY", "")
  278. # 日志基础配置
  279. LOG_FORMAT = "%(asctime)s - %(levelname)s - %(filename)s - %(funcName)s - %(lineno)s - %(message)s"
  280. LOG_ENCODING = "UTF-8"
  281. LOG_ENABLED = True
  282. # DataFlow 配置
  283. DATAFLOW_SCHEMA = os.environ.get("DATAFLOW_SCHEMA", "dags")
  284. # n8n 工作流引擎配置
  285. N8N_API_URL = os.environ.get("N8N_API_URL", "https://n8n.citupro.com")
  286. N8N_API_KEY = os.environ.get(
  287. "N8N_API_KEY",
  288. "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiI4MTcyNzlmMC1jNTQwLTQyMTEtYjczYy1mNjU4OTI5NTZhMmUiLCJpc3MiOiJuOG4iLCJhdWQiOiJwdWJsaWMtYXBpIiwiaWF0IjoxNzY2NTcyMDg0fQ.QgiUa5tEM1IGZSxhqFaWtdKvwk1SvoRmqdRovTT254M",
  289. )
  290. N8N_API_TIMEOUT = int(os.environ.get("N8N_API_TIMEOUT", "30"))
  291. # DataOps 平台 API 基础 URL(用于 n8n 工作流回调等)
  292. API_BASE_URL = os.environ.get(
  293. "API_BASE_URL", "https://company.citupro.com:18183/api"
  294. )
  295. class DevelopmentConfig(BaseConfig):
  296. """Windows 开发环境配置"""
  297. FLASK_ENV = "development"
  298. DEBUG = True
  299. PORT = 5500
  300. # 开发环境 MinIO 配置
  301. MINIO_HOST = "localhost:9000"
  302. MINIO_USER = "citu-test"
  303. MINIO_PASSWORD = "citu-test"
  304. MINIO_SECURE = False
  305. MINIO_BUCKET = "dataops-bucket"
  306. PREFIX = ""
  307. # 开发环境 PostgreSQL 配置
  308. SQLALCHEMY_DATABASE_URI = "postgresql://postgres:postgres@localhost:5432/dataops"
  309. # 开发环境 Neo4j 配置
  310. NEO4J_URI = "bolt://localhost:7687"
  311. NEO4J_HTTP_URI = "http://localhost:7474"
  312. NEO4J_USER = "neo4j"
  313. NEO4J_PASSWORD = "Passw0rd"
  314. NEO4J_ENCRYPTED = False
  315. # 开发环境文件路径配置
  316. UPLOAD_BASE_PATH = "C:\\tmp\\upload"
  317. ARCHIVE_BASE_PATH = "C:\\tmp\\archive"
  318. # 开发环境日志配置
  319. LOG_LEVEL = "DEBUG"
  320. LOG_FILE = resolve_log_file("flask_development.log")
  321. LOG_TO_CONSOLE = True
  322. # 开发环境 Airflow 配置
  323. AIRFLOW_BASE_URL = "http://localhost:8080"
  324. AIRFLOW_AUTH_USER = "admin"
  325. AIRFLOW_AUTH_PASSWORD = "admin"
  326. class ProductionConfig(BaseConfig):
  327. """Linux 生产环境配置"""
  328. FLASK_ENV = "production"
  329. DEBUG = False
  330. # 与 run_dataops.sh / dataops.env 中 LISTEN_PORT 保持一致(默认 5500,供 Nginx 反代)
  331. PORT = int(os.environ.get("LISTEN_PORT", os.environ.get("PORT", "5500")))
  332. # 生产环境 MinIO 配置
  333. MINIO_HOST = os.environ.get("MINIO_HOST", "192.168.3.143:9000")
  334. MINIO_USER = os.environ.get("MINIO_USER", "citu-dataops-acc-key")
  335. MINIO_PASSWORD = os.environ.get("MINIO_PASSWORD", "citu-dataops-secret-key")
  336. MINIO_SECURE = get_bool_env("MINIO_SECURE", False)
  337. MINIO_BUCKET = os.environ.get("MINIO_BUCKET", "dataops-bucket")
  338. PREFIX = os.environ.get("MINIO_PREFIX", "")
  339. # 生产环境 PostgreSQL 配置
  340. SQLALCHEMY_DATABASE_URI = os.environ.get(
  341. "DATABASE_URL", "postgresql://postgres:dataOps@192.168.3.143:5432/dataops"
  342. )
  343. # 生产环境 Neo4j 配置
  344. NEO4J_URI = os.environ.get("NEO4J_URI", "bolt://192.168.3.143:7687")
  345. NEO4J_HTTP_URI = os.environ.get("NEO4J_HTTP_URI", "http://192.168.3.143:7474")
  346. NEO4J_USER = os.environ.get("NEO4J_USER", "neo4j")
  347. NEO4J_PASSWORD = os.environ.get("NEO4J_PASSWORD", "cituneo4j")
  348. NEO4J_ENCRYPTED = get_bool_env("NEO4J_ENCRYPTED", False)
  349. # 生产环境文件路径配置
  350. UPLOAD_BASE_PATH = os.environ.get("UPLOAD_BASE_PATH", "/data/upload")
  351. ARCHIVE_BASE_PATH = os.environ.get("ARCHIVE_BASE_PATH", "/data/archive")
  352. # 生产环境日志配置
  353. LOG_LEVEL = os.environ.get("LOG_LEVEL", "INFO")
  354. LOG_FILE = resolve_log_file("flask_production.log")
  355. LOG_TO_CONSOLE = get_bool_env("LOG_TO_CONSOLE", False)
  356. # 生产环境 Airflow 配置
  357. AIRFLOW_BASE_URL = os.environ.get("AIRFLOW_BASE_URL", "http://192.168.3.143:8080")
  358. AIRFLOW_AUTH_USER = os.environ.get("AIRFLOW_AUTH_USER", "admin")
  359. AIRFLOW_AUTH_PASSWORD = os.environ.get("AIRFLOW_AUTH_PASSWORD", "admin")
  360. # 配置字典
  361. config = {
  362. "development": DevelopmentConfig,
  363. "production": ProductionConfig,
  364. "default": DevelopmentConfig,
  365. }
  366. # 获取当前环境
  367. current_env = get_environment()