před 2 měsíci · a128852d15
--- a/dags/common.py
+++ b/dags/common.py
@@ -97,24 +97,62 @@ def execute_script(script_name, table_name, execution_mode):
 
				         return False
			
 
				     
			
 
				     try:
			
 
				-        # 直接使用配置的部署路径
			
 
				+        # 检查脚本路径
			
 
				         script_path = Path(SCRIPTS_BASE_PATH) / script_name
			
 
				-        logger.info(f"使用配置的Airflow部署路径: {script_path}")
			
 
				+        logger.info(f"准备执行脚本，完整路径: {script_path}")
			
 
				+        
			
 
				+        # 检查脚本路径是否存在
			
 
				+        if not os.path.exists(script_path):
			
 
				+            logger.error(f"脚本文件不存在: {script_path}")
			
 
				+            logger.error(f"请确认脚本文件已部署到正确路径: {SCRIPTS_BASE_PATH}")
			
 
				+            
			
 
				+            # 尝试列出脚本目录中的文件
			
 
				+            try:
			
 
				+                script_dir = Path(SCRIPTS_BASE_PATH)
			
 
				+                if os.path.exists(script_dir):
			
 
				+                    files = os.listdir(script_dir)
			
 
				+                    logger.info(f"可用脚本文件: {files}")
			
 
				+                else:
			
 
				+                    logger.error(f"脚本目录不存在: {script_dir}")
			
 
				+            except Exception as le:
			
 
				+                logger.error(f"尝试列出脚本目录内容时出错: {str(le)}")
			
 
				+                
			
 
				+            return False
			
 
				+            
			
 
				+        logger.info(f"脚本文件存在，开始导入: {script_path}")
			
 
				         
			
 
				         # 动态导入模块
			
 
				-        spec = importlib.util.spec_from_file_location("dynamic_module", script_path)
			
 
				-        module = importlib.util.module_from_spec(spec)
			
 
				-        spec.loader.exec_module(module)
			
 
				+        try:
			
 
				+            spec = importlib.util.spec_from_file_location("dynamic_module", script_path)
			
 
				+            if spec is None:
			
 
				+                logger.error(f"无法加载脚本规范: {script_path}")
			
 
				+                return False
			
 
				+                
			
 
				+            module = importlib.util.module_from_spec(spec)
			
 
				+            spec.loader.exec_module(module)
			
 
				+            logger.info(f"成功导入脚本模块: {script_name}")
			
 
				+        except ImportError as ie:
			
 
				+            logger.error(f"导入脚本时出错: {str(ie)}")
			
 
				+            import traceback
			
 
				+            logger.error(traceback.format_exc())
			
 
				+            return False
			
 
				+        except SyntaxError as se:
			
 
				+            logger.error(f"脚本语法错误: {str(se)}")
			
 
				+            logger.error(f"错误位置: {se.filename}, 行 {se.lineno}, 列 {se.offset}")
			
 
				+            return False
			
 
				         
			
 
				-        # 使用标准入口函数run
			
 
				-        if hasattr(module, "run"):
			
 
				-            logger.info(f"执行脚本 {script_name} 的标准入口函数 run()")
			
 
				-            result = module.run(table_name=table_name, execution_mode=execution_mode)
			
 
				-            logger.info(f"脚本 {script_name} 执行结果: {result}")
			
 
				-            return result
			
 
				-        else:
			
 
				-            logger.warning(f"脚本 {script_name} 未定义标准入口函数 run()，无法执行")
			
 
				+        # 验证run函数存在
			
 
				+        if not hasattr(module, "run"):
			
 
				+            available_funcs = [func for func in dir(module) if callable(getattr(module, func)) and not func.startswith("_")]
			
 
				+            logger.error(f"脚本 {script_name} 未定义标准入口函数 run()，无法执行")
			
 
				+            logger.error(f"可用函数: {available_funcs}")
			
 
				             return False
			
 
				+        
			
 
				+        # 执行run函数
			
 
				+        logger.info(f"执行脚本 {script_name} 的run函数，参数: table_name={table_name}, execution_mode={execution_mode}")
			
 
				+        result = module.run(table_name=table_name, execution_mode=execution_mode)
			
 
				+        logger.info(f"脚本 {script_name} 执行结果: {result}")
			
 
				+        return result
			
 
				     except Exception as e:
			
 
				         logger.error(f"执行脚本 {script_name} 时出错: {str(e)}")
			
 
				         import traceback
			
--- a/dags/dag_dataops_unified_data_scheduler.py
+++ b/dags/dag_dataops_unified_data_scheduler.py
@@ -2,7 +2,6 @@
 
				 from airflow import DAG
			
 
				 from airflow.operators.python import PythonOperator
			
 
				 from airflow.operators.empty import EmptyOperator
			
 
				-from airflow.sensors.external_task import ExternalTaskSensor
			
 
				 from datetime import datetime, timedelta, date
			
 
				 import logging
			
 
				 import networkx as nx
			
@@ -14,7 +13,7 @@ from common import (
 
				     execute_with_monitoring,
			
 
				     get_today_date
			
 
				 )
			
 
				-from config import TASK_RETRY_CONFIG
			
 
				+from config import TASK_RETRY_CONFIG, SCRIPTS_BASE_PATH, AIRFLOW_BASE_PATH
			
 
				 
			
 
				 # 创建日志记录器
			
 
				 logger = logging.getLogger(__name__)
			
@@ -183,22 +182,35 @@ def prepare_unified_execution_plan(**kwargs):
 
				     logger.info(f"资源表任务: {resource_names}")
			
 
				     logger.info(f"模型表任务: {model_names}")
			
 
				     
			
 
				-    # 将执行计划保存到XCom，使用自定义序列化器处理日期对象
			
 
				-    try:
			
 
				-        kwargs['ti'].xcom_push(key='execution_plan', value=json.dumps(execution_plan, default=json_serial))
			
 
				-        logger.info(f"准备了执行计划，包含 {len(resource_tasks)} 个资源表任务和 {len(model_tasks)} 个模型表任务")
			
 
				-    except Exception as e:
			
 
				-        logger.error(f"将执行计划保存到XCom时出错: {str(e)}")
			
 
				-        
			
 
				-        # 保存执行计划到文件以备后用
			
 
				+    # 已经不需要推送到XCom，因为我们直接从文件读取
			
 
				+    # 这里仅用于验证执行计划与文件中的是否一致
			
 
				+    plan_path = os.path.join(os.path.dirname(__file__), 'last_execution_plan.json')
			
 
				+    ready_path = f"{plan_path}.ready"
			
 
				+    
			
 
				+    if os.path.exists(plan_path) and os.path.exists(ready_path):
			
 
				         try:
			
 
				-            plan_path = os.path.join(os.path.dirname(__file__), 'last_execution_plan.json')
			
 
				-            with open(plan_path, 'w') as f:
			
 
				-                json.dump(execution_plan, f, default=json_serial)
			
 
				-            logger.info(f"将执行计划保存到文件: {plan_path}")
			
 
				-        except Exception as file_e:
			
 
				-            logger.error(f"保存执行计划到文件时出错: {str(file_e)}")
			
 
				+            with open(plan_path, 'r') as f:
			
 
				+                existing_plan = json.load(f)
			
 
				+            
			
 
				+            # 比较执行计划是否有变化
			
 
				+            existing_resources = sorted([t.get("target_table") for t in existing_plan.get("resource_tasks", [])])
			
 
				+            current_resources = sorted(resource_names)
			
 
				+            
			
 
				+            existing_models = sorted([t.get("target_table") for t in existing_plan.get("model_tasks", [])])
			
 
				+            current_models = sorted(model_names)
			
 
				+            
			
 
				+            if existing_resources == current_resources and existing_models == current_models:
			
 
				+                logger.info("执行计划无变化，继续使用现有任务结构")
			
 
				+            else:
			
 
				+                logger.warning("执行计划与现有文件不一致，但DAG结构已固定，需等待下次解析")
			
 
				+                logger.warning(f"现有资源表: {existing_resources}")
			
 
				+                logger.warning(f"当前资源表: {current_resources}")
			
 
				+                logger.warning(f"现有模型表: {existing_models}")
			
 
				+                logger.warning(f"当前模型表: {current_models}")
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"比较执行计划时出错: {str(e)}")
			
 
				     
			
 
				+    logger.info(f"准备了执行计划，包含 {len(resource_tasks)} 个资源表任务和 {len(model_tasks)} 个模型表任务")
			
 
				     return len(resource_tasks) + len(model_tasks)
			
 
				 
			
 
				 def process_resource(target_table, script_name, script_exec_mode, exec_date):
			
@@ -256,14 +268,22 @@ def process_model(target_table, script_name, script_exec_mode, exec_date):
 
				         # 确保即使出错也返回结果，不会阻塞DAG
			
 
				         return False
			
 
				 
			
 
				-# 预先加载数据以创建任务
			
 
				+# 修改预先加载数据以创建任务的逻辑
			
 
				 try:
			
 
				     logger.info("预先加载执行计划数据用于构建DAG")
			
 
				     plan_path = os.path.join(os.path.dirname(__file__), 'last_execution_plan.json')
			
 
				+    ready_path = f"{plan_path}.ready"
			
 
				     execution_plan = {"exec_date": get_today_date(), "resource_tasks": [], "model_tasks": [], "dependencies": {}}
			
 
				     
			
 
				-    if os.path.exists(plan_path):
			
 
				+    # 首先检查ready文件是否存在，确保JSON文件已完整生成
			
 
				+    if os.path.exists(ready_path) and os.path.exists(plan_path):
			
 
				         try:
			
 
				+            # 读取ready文件中的时间戳
			
 
				+            with open(ready_path, 'r') as f:
			
 
				+                ready_timestamp = f.read().strip()
			
 
				+                logger.info(f"执行计划ready标记时间: {ready_timestamp}")
			
 
				+            
			
 
				+            # 读取执行计划文件
			
 
				             with open(plan_path, 'r') as f:
			
 
				                 execution_plan_json = f.read()
			
 
				                 execution_plan = json.loads(execution_plan_json)
			
@@ -271,8 +291,12 @@ try:
 
				         except Exception as e:
			
 
				             logger.warning(f"读取执行计划文件出错: {str(e)}")
			
 
				     else:
			
 
				-        logger.warning(f"执行计划文件不存在: {plan_path}，将创建基础DAG结构")
			
 
				-        
			
 
				+        if not os.path.exists(ready_path):
			
 
				+            logger.warning(f"执行计划ready标记文件不存在: {ready_path}")
			
 
				+        if not os.path.exists(plan_path):
			
 
				+            logger.warning(f"执行计划文件不存在: {plan_path}")
			
 
				+        logger.warning("将创建基础DAG结构")
			
 
				+    
			
 
				     # 提取信息
			
 
				     exec_date = execution_plan.get("exec_date", get_today_date())
			
 
				     resource_tasks = execution_plan.get("resource_tasks", [])
			
@@ -295,7 +319,8 @@ def handle_dag_failure(context):
 
				 with DAG(
			
 
				     "dag_dataops_unified_data_scheduler", 
			
 
				     start_date=datetime(2024, 1, 1), 
			
 
				-    schedule_interval="@daily", 
			
 
				+    # 修改调度间隔为每15分钟检查一次，以便及时响应执行计划变化
			
 
				+    schedule_interval="*/10 * * * *",
			
 
				     catchup=False,
			
 
				     default_args={
			
 
				         'owner': 'airflow',
			
@@ -305,23 +330,14 @@ with DAG(
 
				         'retries': 1,
			
 
				         'retry_delay': timedelta(minutes=5)
			
 
				     },
			
 
				-    on_failure_callback=handle_dag_failure  # 在这里设置回调函数
			
 
				+    on_failure_callback=handle_dag_failure,
			
 
				+    # 添加DAG级别参数，确保任务运行时有正确的环境
			
 
				+    params={
			
 
				+        "scripts_path": SCRIPTS_BASE_PATH,
			
 
				+        "airflow_base_path": AIRFLOW_BASE_PATH
			
 
				+    }
			
 
				 ) as dag:
			
 
				     
			
 
				-    # 等待准备DAG完成
			
 
				-    wait_for_prepare = ExternalTaskSensor(
			
 
				-        task_id="wait_for_prepare",
			
 
				-        external_dag_id="dag_dataops_unified_prepare_scheduler",
			
 
				-        external_task_id="preparation_completed",
			
 
				-        mode="reschedule",  # 改为reschedule模式，减少资源占用
			
 
				-        timeout=3600,
			
 
				-        poke_interval=30,
			
 
				-        execution_timeout=timedelta(hours=1),
			
 
				-        soft_fail=True,
			
 
				-        allowed_states=["success", "skipped"],
			
 
				-        dag=dag
			
 
				-    )
			
 
				-    
			
 
				     # 准备执行计划
			
 
				     prepare_plan = PythonOperator(
			
 
				         task_id="prepare_execution_plan",
			
@@ -337,12 +353,18 @@ with DAG(
 
				         dag=dag
			
 
				     )
			
 
				     
			
 
				-    # 设置初始任务依赖
			
 
				-    wait_for_prepare >> prepare_plan
			
 
				-    
			
 
				     # 任务字典，用于设置依赖关系
			
 
				     task_dict = {}
			
 
				     
			
 
				+    # 添加一个空任务作为下游任务的起始点，确保即使没有资源表和模型表，DAG也能正常执行
			
 
				+    start_processing = EmptyOperator(
			
 
				+        task_id="start_processing",
			
 
				+        dag=dag
			
 
				+    )
			
 
				+    
			
 
				+    # 设置基本依赖
			
 
				+    prepare_plan >> start_processing
			
 
				+    
			
 
				     # 1. 预先创建资源表任务
			
 
				     for task_info in resource_tasks:
			
 
				         table_name = task_info["target_table"]
			
@@ -370,9 +392,9 @@ with DAG(
 
				         # 将任务添加到字典
			
 
				         task_dict[table_name] = resource_task
			
 
				         
			
 
				-        # 设置与prepare_plan的依赖 - 直接依赖，不需要其他条件
			
 
				-        prepare_plan >> resource_task
			
 
				-        logger.info(f"预先设置基本依赖: prepare_plan >> {task_id}")
			
 
				+        # 设置与start_processing的依赖
			
 
				+        start_processing >> resource_task
			
 
				+        logger.info(f"设置基本依赖: start_processing >> {task_id}")
			
 
				     
			
 
				     # 创建有向图，用于检测模型表之间的依赖关系
			
 
				     G = nx.DiGraph()
			
@@ -449,11 +471,11 @@ with DAG(
 
				                 has_dependency = True
			
 
				                 logger.info(f"预先设置依赖: {dep_table} >> {table_name}")
			
 
				         
			
 
				-        # 如果没有依赖，则依赖于准备任务和所有资源表任务
			
 
				+        # 如果没有依赖，则依赖于start_processing和资源表任务
			
 
				         if not has_dependency:
			
 
				-            # 从prepare_plan任务直接连接
			
 
				-            prepare_plan >> model_task
			
 
				-            logger.info(f"预先设置基本依赖: prepare_plan >> {task_id}")
			
 
				+            # 从start_processing任务直接连接
			
 
				+            start_processing >> model_task
			
 
				+            logger.info(f"设置基本依赖: start_processing >> {task_id}")
			
 
				             
			
 
				             # 同时从所有资源表任务连接 - 限制每个模型表最多依赖5个资源表，避免过度复杂的依赖关系
			
 
				             resource_count = 0
			
@@ -492,10 +514,10 @@ with DAG(
 
				     if not model_tasks and resource_tasks:
			
 
				         terminal_tasks = [task["target_table"] for task in resource_tasks]
			
 
				     
			
 
				-    # 如果既没有模型表任务也没有资源表任务，直接连接准备任务到完成标记
			
 
				+    # 如果既没有模型表任务也没有资源表任务，直接连接start_processing到完成标记
			
 
				     if not terminal_tasks:
			
 
				-        prepare_plan >> processing_completed
			
 
				-        logger.warning("未找到任何任务，直接连接准备任务到完成标记")
			
 
				+        start_processing >> processing_completed
			
 
				+        logger.warning("未找到任何任务，直接连接start_processing到完成标记")
			
 
				     else:
			
 
				         # 将所有终端任务连接到完成标记
			
 
				         for table_name in terminal_tasks:
			
--- a/dags/dag_dataops_unified_prepare_scheduler.py
+++ b/dags/dag_dataops_unified_prepare_scheduler.py
@@ -282,10 +282,69 @@ def write_to_airflow_dag_schedule(exec_date, tables_info):
 
				 
			
 
				 def prepare_unified_dag_schedule(**kwargs):
			
 
				     """准备统一DAG调度任务的主函数"""
			
 
				+    import hashlib
			
 
				+    
			
 
				     exec_date = kwargs.get('ds') or get_today_date()
			
 
				     logger.info(f"开始准备执行日期 {exec_date} 的统一调度任务")
			
 
				     
			
 
				-    # 1. 获取启用的表
			
 
				+    # 检查执行计划文件和ready文件是否存在
			
 
				+    plan_path = os.path.join(os.path.dirname(__file__), 'last_execution_plan.json')
			
 
				+    ready_path = f"{plan_path}.ready"
			
 
				+    files_exist = os.path.exists(plan_path) and os.path.exists(ready_path)
			
 
				+    
			
 
				+    if not files_exist:
			
 
				+        logger.info("执行计划文件或ready标记文件不存在，将重新生成执行计划")
			
 
				+    
			
 
				+    # 1. 计算当前订阅表状态的哈希值，用于检测变化
			
 
				+    def get_subscription_state_hash():
			
 
				+        """获取订阅表状态的哈希值"""
			
 
				+        conn = get_pg_conn()
			
 
				+        cursor = conn.cursor()
			
 
				+        try:
			
 
				+            cursor.execute("""
			
 
				+                SELECT table_name, schedule_is_enabled
			
 
				+                FROM schedule_status
			
 
				+                ORDER BY table_name
			
 
				+            """)
			
 
				+            rows = cursor.fetchall()
			
 
				+            # 将所有行拼接成一个字符串，然后计算哈希值
			
 
				+            data_str = '|'.join(f"{row[0]}:{row[1]}" for row in rows)
			
 
				+            return hashlib.md5(data_str.encode()).hexdigest()
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"计算订阅表状态哈希值时出错: {str(e)}")
			
 
				+            return None
			
 
				+        finally:
			
 
				+            cursor.close()
			
 
				+            conn.close()
			
 
				+    
			
 
				+    # 获取当前订阅表状态哈希值
			
 
				+    current_hash = get_subscription_state_hash()
			
 
				+    if not current_hash:
			
 
				+        logger.error("无法获取订阅表状态，将中止处理")
			
 
				+        return 0
			
 
				+    
			
 
				+    # 2. 读取上次记录的哈希值
			
 
				+    hash_file = os.path.join(os.path.dirname(__file__), '.subscription_state')
			
 
				+    last_hash = None
			
 
				+    if os.path.exists(hash_file):
			
 
				+        try:
			
 
				+            with open(hash_file, 'r') as f:
			
 
				+                last_hash = f.read().strip()
			
 
				+        except Exception as e:
			
 
				+            logger.warning(f"读取上次订阅状态哈希值失败: {str(e)}")
			
 
				+    
			
 
				+    # 3. 如果哈希值相同且文件存在，说明订阅表未变化且执行计划存在，可以提前退出
			
 
				+    if last_hash == current_hash and files_exist:
			
 
				+        logger.info("订阅表状态未变化且执行计划文件存在，无需更新执行计划")
			
 
				+        return 0
			
 
				+    
			
 
				+    # 记录重新生成原因
			
 
				+    if not files_exist:
			
 
				+        logger.info("执行计划文件或ready标记文件不存在，需要重新生成")
			
 
				+    else:
			
 
				+        logger.info(f"检测到订阅表状态变化。旧哈希值: {last_hash}, 新哈希值: {current_hash}")
			
 
				+    
			
 
				+    # 4. 获取启用的表
			
 
				     enabled_tables = get_enabled_tables()
			
 
				     logger.info(f"从schedule_status表获取到 {len(enabled_tables)} 个启用的表")
			
 
				     
			
@@ -293,7 +352,7 @@ def prepare_unified_dag_schedule(**kwargs):
 
				         logger.warning("没有找到启用的表，准备工作结束")
			
 
				         return 0
			
 
				     
			
 
				-    # 2. 获取表的详细信息
			
 
				+    # 5. 获取表的详细信息
			
 
				     tables_info = []
			
 
				     for table_name in enabled_tables:
			
 
				         table_info = get_table_info_from_neo4j(table_name)
			
@@ -302,24 +361,24 @@ def prepare_unified_dag_schedule(**kwargs):
 
				     
			
 
				     logger.info(f"成功获取 {len(tables_info)} 个表的详细信息")
			
 
				     
			
 
				-    # 3. 处理依赖关系，添加被动调度的表
			
 
				+    # 6. 处理依赖关系，添加被动调度的表
			
 
				     enriched_tables = process_dependencies(tables_info)
			
 
				     logger.info(f"处理依赖后，总共有 {len(enriched_tables)} 个表")
			
 
				     
			
 
				-    # 4. 过滤无效表及其依赖
			
 
				+    # 7. 过滤无效表及其依赖
			
 
				     valid_tables = filter_invalid_tables(enriched_tables)
			
 
				     logger.info(f"过滤无效表后，最终有 {len(valid_tables)} 个有效表")
			
 
				     
			
 
				-    # 5. 写入airflow_dag_schedule表
			
 
				+    # 8. 写入airflow_dag_schedule表
			
 
				     inserted_count = write_to_airflow_dag_schedule(exec_date, valid_tables)
			
 
				     
			
 
				-    # 6. 检查插入操作是否成功，如果失败则抛出异常
			
 
				+    # 9. 检查插入操作是否成功，如果失败则抛出异常
			
 
				     if inserted_count == 0 and valid_tables:
			
 
				         error_msg = f"插入操作失败，无记录被插入到airflow_dag_schedule表，但有{len(valid_tables)}个有效表需要处理"
			
 
				         logger.error(error_msg)
			
 
				         raise Exception(error_msg)
			
 
				     
			
 
				-    # 7. 保存最新执行计划，供DAG读取使用
			
 
				+    # 10. 保存最新执行计划，供DAG读取使用
			
 
				     try:
			
 
				         # 构建执行计划
			
 
				         resource_tasks = []
			
@@ -381,34 +440,47 @@ def prepare_unified_dag_schedule(**kwargs):
 
				             "dependencies": dependencies
			
 
				         }
			
 
				         
			
 
				-        # 保存执行计划到文件
			
 
				-        plan_path = os.path.join(os.path.dirname(__file__), 'last_execution_plan.json')
			
 
				-        with open(plan_path, 'w') as f:
			
 
				-            json.dump(execution_plan, f, indent=2)
			
 
				-            
			
 
				-        logger.info(f"保存执行计划到文件: {plan_path}")
			
 
				-        
			
 
				-        # 验证文件是否成功生成并可读
			
 
				-        if not os.path.exists(plan_path):
			
 
				-            raise Exception(f"执行计划文件未成功生成: {plan_path}")
			
 
				+        # 使用临时文件先写入内容，再原子替换，确保写入过程不会被中断
			
 
				+        temp_plan_path = f"{plan_path}.temp"
			
 
				         
			
 
				-        # 尝试读取文件验证内容
			
 
				         try:
			
 
				-            with open(plan_path, 'r') as f:
			
 
				-                validation_data = json.load(f)
			
 
				-                # 验证执行计划内容
			
 
				-                if not isinstance(validation_data, dict):
			
 
				-                    raise Exception("执行计划格式无效，应为JSON对象")
			
 
				-                if "exec_date" not in validation_data:
			
 
				-                    raise Exception("执行计划缺少exec_date字段")
			
 
				-                if not isinstance(validation_data.get("resource_tasks", []), list):
			
 
				-                    raise Exception("执行计划的resource_tasks字段无效")
			
 
				-                if not isinstance(validation_data.get("model_tasks", []), list):
			
 
				-                    raise Exception("执行计划的model_tasks字段无效")
			
 
				-        except json.JSONDecodeError as je:
			
 
				-            raise Exception(f"执行计划文件内容无效，非法的JSON格式: {str(je)}")
			
 
				-        except Exception as ve:
			
 
				-            raise Exception(f"执行计划文件验证失败: {str(ve)}")
			
 
				+            # 10.1 写入临时文件
			
 
				+            with open(temp_plan_path, 'w') as f:
			
 
				+                json.dump(execution_plan, f, indent=2)
			
 
				+            logger.info(f"已保存执行计划到临时文件: {temp_plan_path}")
			
 
				+            
			
 
				+            # 10.2 原子替换正式文件
			
 
				+            os.replace(temp_plan_path, plan_path)
			
 
				+            logger.info(f"已替换执行计划文件: {plan_path}")
			
 
				+            
			
 
				+            # 10.3 创建ready文件，标记执行计划就绪
			
 
				+            with open(ready_path, 'w') as f:
			
 
				+                f.write(datetime.now().isoformat())
			
 
				+            logger.info(f"已创建ready标记文件: {ready_path}")
			
 
				+            
			
 
				+            # 10.4 更新订阅表状态哈希值
			
 
				+            with open(hash_file, 'w') as f:
			
 
				+                f.write(current_hash)
			
 
				+            logger.info(f"已更新订阅表状态哈希值: {current_hash}")
			
 
				+            
			
 
				+            # 10.5 触发data_scheduler DAG重新解析
			
 
				+            data_scheduler_path = os.path.join(os.path.dirname(__file__), 'dag_dataops_unified_data_scheduler.py')
			
 
				+            if os.path.exists(data_scheduler_path):
			
 
				+                # 更新文件修改时间，触发Airflow重新解析
			
 
				+                os.utime(data_scheduler_path, None)
			
 
				+                logger.info(f"已触发数据调度器DAG重新解析: {data_scheduler_path}")
			
 
				+            else:
			
 
				+                logger.warning(f"数据调度器DAG文件不存在: {data_scheduler_path}")
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"保存执行计划文件或触发DAG重新解析时出错: {str(e)}")
			
 
				+            # 出错时清理临时文件
			
 
				+            if os.path.exists(temp_plan_path):
			
 
				+                try:
			
 
				+                    os.remove(temp_plan_path)
			
 
				+                    logger.info(f"已清理临时文件: {temp_plan_path}")
			
 
				+                except Exception as rm_e:
			
 
				+                    logger.error(f"清理临时文件时出错: {str(rm_e)}")
			
 
				+            raise  # 重新抛出异常，确保任务失败
			
 
				             
			
 
				     except Exception as e:
			
 
				         error_msg = f"保存或验证执行计划文件时出错: {str(e)}"
			
@@ -470,7 +542,7 @@ def check_execution_plan_file(**kwargs):
 
				 with DAG(
			
 
				     "dag_dataops_unified_prepare_scheduler",
			
 
				     start_date=datetime(2024, 1, 1),
			
 
				-    schedule_interval="@daily",
			
 
				+    schedule_interval="*/5 * * * *",  # 每10分钟运行一次，而不是每天
			
 
				     catchup=False,
			
 
				     default_args={
			
 
				         'owner': 'airflow',
			
--- a/dags/dag_dataops_unified_summary_scheduler.py
+++ b/dags/dag_dataops_unified_summary_scheduler.py
@@ -8,6 +8,7 @@ import logging
 
				 import json
			
 
				 from decimal import Decimal
			
 
				 from common import get_pg_conn, get_today_date
			
 
				+from airflow.models import Variable
			
 
				 
			
 
				 # 创建日志记录器
			
 
				 logger = logging.getLogger(__name__)
			
@@ -314,7 +315,7 @@ def summarize_unified_execution(**kwargs):
 
				 with DAG(
			
 
				     "dag_dataops_unified_summary_scheduler", 
			
 
				     start_date=datetime(2024, 1, 1), 
			
 
				-    schedule_interval="@daily", 
			
 
				+    schedule_interval="*/10 * * * *",  # 修改为每15分钟执行一次，与data_scheduler保持一致
			
 
				     catchup=False,
			
 
				     default_args={
			
 
				         'owner': 'airflow',
			
@@ -326,16 +327,44 @@ with DAG(
 
				     }
			
 
				 ) as dag:
			
 
				     
			
 
				-    # 等待统一数据处理DAG完成
			
 
				-    wait_for_data_processing = ExternalTaskSensor(
			
 
				-        task_id="wait_for_data_processing",
			
 
				-        external_dag_id="dag_dataops_unified_data_scheduler",
			
 
				-        external_task_id="processing_completed",
			
 
				-        mode="poke",
			
 
				-        timeout=3600,
			
 
				-        poke_interval=30,
			
 
				-        dag=dag
			
 
				-    )
			
 
				+    # 检查是否跳过等待外部任务
			
 
				+    skip_wait = Variable.get("skip_summary_wait", default_var="false").lower() == "true"
			
 
				+    
			
 
				+    if skip_wait:
			
 
				+        # 如果跳过等待，创建一个空操作代替
			
 
				+        wait_for_data_processing = EmptyOperator(
			
 
				+            task_id="wait_for_data_processing",
			
 
				+            dag=dag
			
 
				+        )
			
 
				+        logger.info("跳过等待外部DAG完成，使用EmptyOperator替代")
			
 
				+    else:
			
 
				+        # 等待统一数据处理DAG完成
			
 
				+        # 定义一个函数来打印并返回执行日期
			
 
				+        def print_target_date(dt):
			
 
				+            logger.info(f"===== ExternalTaskSensor等待的目标日期信息 =====")
			
 
				+            logger.info(f"源DAG: dag_dataops_unified_summary_scheduler")
			
 
				+            logger.info(f"目标DAG: dag_dataops_unified_data_scheduler")
			
 
				+            logger.info(f"目标任务: processing_completed")
			
 
				+            logger.info(f"查找的执行日期: {dt}")
			
 
				+            logger.info(f"日期字符串格式: {dt.strftime('%Y-%m-%dT%H:%M:%S')}")
			
 
				+            logger.info(f"日期类型: {type(dt)}")
			
 
				+            logger.info(f"=======================================")
			
 
				+            # 必须返回原始日期，不能修改
			
 
				+            return dt
			
 
				+
			
 
				+        wait_for_data_processing = ExternalTaskSensor(
			
 
				+            task_id="wait_for_data_processing",
			
 
				+            external_dag_id="dag_dataops_unified_data_scheduler",
			
 
				+            external_task_id="processing_completed",
			
 
				+            mode="reschedule",  # 改为reschedule模式，不会占用worker
			
 
				+            timeout=7200,  # 增加超时时间到2小时
			
 
				+            poke_interval=60,  # 增加检查间隔到1分钟
			
 
				+            allowed_states=["success", "skipped"],  # 允许成功或跳过的状态
			
 
				+            failed_states=["failed", "upstream_failed"],  # 当检测到这些状态时立即失败
			
 
				+            dag=dag,
			
 
				+            # 添加自定义方法来打印和返回日期
			
 
				+            execution_date_fn=print_target_date
			
 
				+        )
			
 
				     
			
 
				     # 汇总执行情况
			
 
				     summarize_task = PythonOperator(
			
--- a/dataops/scripts/book_sale_amt_daily_clean.py
+++ b/dataops/scripts/book_sale_amt_daily_clean.py
@@ -5,6 +5,8 @@ import logging
 
				 import sys
			
 
				 import os
			
 
				 from datetime import datetime, timedelta
			
 
				+import time
			
 
				+import random
			
 
				 
			
 
				 # 配置日志记录器
			
 
				 logging.basicConfig(
			
@@ -47,7 +49,27 @@ def clean_daily_book_sales():
 
				         logger.info("数据清洗完成，准备保存结果...")
			
 
				         # 实际应用中这里会将结果保存到数据库
			
 
				         
			
 
				-        return True
			
 
				+        # 模拟处理时间
			
 
				+        processing_time = random.uniform(0.5, 2.0)
			
 
				+        logger.info(f"开始处理数据，预计需要 {processing_time:.2f} 秒")
			
 
				+        time.sleep(processing_time)
			
 
				+        
			
 
				+        # 模拟处理逻辑
			
 
				+        current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
			
 
				+        logger.info(f"数据处理中... 当前时间: {current_time}")
			
 
				+        
			
 
				+        # 模拟数据清洗操作
			
 
				+        logger.info(f"执行数据清洗操作: 移除异常值、填充缺失值、标准化格式")
			
 
				+        time.sleep(processing_time)
			
 
				+        
			
 
				+        # 模拟写入数据库
			
 
				+        success_rate = random.random()
			
 
				+        if success_rate > 0.1:  # 90%的成功率
			
 
				+            logger.info(f"表 {date_str} 数据清洗成功，已处理并写入")
			
 
				+            return True
			
 
				+        else:
			
 
				+            logger.error(f"表 {date_str} 数据清洗或写入过程中出现随机错误")
			
 
				+            return False
			
 
				     except Exception as e:
			
 
				         logger.error(f"清洗日度图书销售额数据时出错: {str(e)}")
			
 
				         return False