health.py 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
  1. """
  2. 系统健康检查模块
  3. 提供系统各组件健康状态检查和系统信息获取功能
  4. """
  5. import logging
  6. import platform
  7. import psutil
  8. import os
  9. import socket
  10. from datetime import datetime
  11. from app.config.config import Config
  12. from app.services.neo4j_driver import neo4j_driver
  13. logger = logging.getLogger(__name__)
  14. def check_neo4j_connection():
  15. """
  16. 检查Neo4j数据库连接状态
  17. Returns:
  18. bool: 连接成功返回True,失败返回False
  19. """
  20. try:
  21. with neo4j_driver.get_session() as session:
  22. # 执行简单查询确认连接
  23. session.run("RETURN 1")
  24. return True
  25. except Exception as e:
  26. logger.error(f"Neo4j数据库连接失败: {str(e)}")
  27. return False
  28. def check_system_health():
  29. """
  30. 检查系统整体健康状态
  31. 包括关键依赖组件的连接状态
  32. Returns:
  33. dict: 包含各组件健康状态的字典
  34. """
  35. # 检查Neo4j连接
  36. neo4j_status = check_neo4j_connection()
  37. # 可以添加其他组件的健康检查
  38. # 例如MySQL、Redis、MinIO等
  39. # 构造健康状态信息
  40. health_status = {
  41. "service": "DataOps-platform",
  42. "status": "UP" if neo4j_status else "DEGRADED",
  43. "version": "1.0.0", # 可以从配置或版本文件中读取
  44. "time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
  45. "dependencies": {
  46. "neo4j": {
  47. "status": "UP" if neo4j_status else "DOWN",
  48. "details": {
  49. "url": Config.NEO4J_URI,
  50. "encrypted": Config.NEO4J_ENCRYPTED
  51. }
  52. }
  53. # 可以添加其他依赖的状态
  54. }
  55. }
  56. return health_status
  57. def get_system_info():
  58. """
  59. 获取系统运行环境信息
  60. 包括操作系统、Python版本、CPU使用率、内存使用情况等
  61. Returns:
  62. dict: 包含系统信息的字典
  63. """
  64. try:
  65. # 获取基本系统信息
  66. sys_info = {
  67. "os": {
  68. "name": platform.system(),
  69. "version": platform.version(),
  70. "platform": platform.platform(),
  71. },
  72. "python": {
  73. "version": platform.python_version(),
  74. "implementation": platform.python_implementation(),
  75. },
  76. "network": {
  77. "hostname": socket.gethostname(),
  78. "ip": socket.gethostbyname(socket.gethostname()),
  79. },
  80. "resources": {
  81. "cpu": {
  82. "cores": psutil.cpu_count(logical=False),
  83. "logical_cores": psutil.cpu_count(logical=True),
  84. "usage_percent": psutil.cpu_percent(interval=0.1),
  85. },
  86. "memory": {
  87. "total": _format_bytes(psutil.virtual_memory().total),
  88. "available": _format_bytes(psutil.virtual_memory().available),
  89. "used": _format_bytes(psutil.virtual_memory().used),
  90. "percent": psutil.virtual_memory().percent,
  91. },
  92. "disk": {
  93. "total": _format_bytes(psutil.disk_usage("/").total),
  94. "used": _format_bytes(psutil.disk_usage("/").used),
  95. "free": _format_bytes(psutil.disk_usage("/").free),
  96. "percent": psutil.disk_usage("/").percent,
  97. },
  98. },
  99. "application": {
  100. "environment": Config.ENVIRONMENT,
  101. "debug_mode": Config.DEBUG,
  102. "port": Config.PORT,
  103. "platform": Config.PLATFORM,
  104. "upload_folder": Config.UPLOAD_FOLDER,
  105. "bucket_name": Config.BUCKET_NAME,
  106. "prefix": Config.PREFIX,
  107. # 不返回敏感信息如密码、密钥等
  108. }
  109. }
  110. return sys_info
  111. except Exception as e:
  112. logger.error(f"获取系统信息失败: {str(e)}")
  113. return {"error": str(e)}
  114. def _format_bytes(bytes_value):
  115. """
  116. 将字节数格式化为易读形式
  117. Args:
  118. bytes_value: 字节数
  119. Returns:
  120. str: 格式化后的字符串,如"1.23 GB"
  121. """
  122. for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
  123. if bytes_value < 1024 or unit == 'TB':
  124. return f"{bytes_value:.2f} {unit}"
  125. bytes_value /= 1024