1 주 전 · bfc53156ae
--- a/dags/dataops_productline_execute_dag.py
+++ b/dags/dataops_productline_execute_dag.py
@@ -6,12 +6,44 @@
 
				 2. 基于dataops_productline_prepare_dag生成的执行计划执行脚本
			
 
				 3. 支持对脚本执行顺序的优化
			
 
				 4. 提供详细的执行日志和错误处理
			
 
				+
			
 
				+预期的执行计划模式：
			
 
				+{
			
 
				+    "version": "2.0",
			
 
				+    "exec_date": "YYYY-MM-DD",
			
 
				+    "scripts": [
			
 
				+        {
			
 
				+            "task_id": "唯一任务ID",
			
 
				+            "script_id": "唯一脚本ID",
			
 
				+            "script_name": "脚本文件名或标识符",
			
 
				+            "script_type": "python|sql|python_script",
			
 
				+            "target_type": "structure|null",
			
 
				+            "update_mode": "append|full_refresh",
			
 
				+            "target_table": "表名",
			
 
				+            "source_tables": ["表1", "表2"],
			
 
				+            "schedule_status": true,
			
 
				+            "storage_location": "/路径/模式" 或 null,
			
 
				+            "schedule_frequency": "daily|weekly|monthly|quarterly|yearly",
			
 
				+            "target_table_label": "DataModel|DataResource|DataSource"
			
 
				+        },
			
 
				+        ...
			
 
				+    ],
			
 
				+    "model_scripts": ["script_id1", "script_id2", ...],
			
 
				+    "resource_scripts": ["script_id3", "script_id4", ...],
			
 
				+    "execution_order": ["script_id1", "script_id3", "script_id2", "script_id4", ...],
			
 
				+    "script_dependencies": {
			
 
				+        "script_id1": ["script_id3", "script_id4"],
			
 
				+        "script_id2": [],
			
 
				+        ...
			
 
				+    }
			
 
				+}
			
 
				 """
			
 
				 from airflow import DAG
			
 
				 from airflow.operators.python import PythonOperator, ShortCircuitOperator
			
 
				 from airflow.operators.empty import EmptyOperator
			
 
				 from airflow.utils.task_group import TaskGroup
			
 
				 from datetime import datetime, timedelta, date
			
 
				+from airflow.models import Variable
			
 
				 import logging
			
 
				 import networkx as nx
			
 
				 import json
			
@@ -72,7 +104,7 @@ class DecimalEncoder(json.JSONEncoder):
 
				 # 脚本执行函数
			
 
				 #############################################
			
 
				 
			
 
				-def execute_python_script(script_id, script_name, target_table, script_exec_mode, frequency, **kwargs):
			
 
				+def execute_python_script(script_id, script_name, target_table, update_mode, schedule_frequency, **kwargs):
			
 
				     """
			
 
				     执行Python脚本文件并返回执行结果
			
 
				     
			
@@ -80,8 +112,8 @@ def execute_python_script(script_id, script_name, target_table, script_exec_mode
 
				         script_id: 脚本ID
			
 
				         script_name: 脚本文件名（.py文件）
			
 
				         target_table: 目标表名
			
 
				-        script_exec_mode: 执行模式
			
 
				-        frequency: 执行频率
			
 
				+        update_mode: 执行模式
			
 
				+        schedule_frequency: 执行频率
			
 
				         **kwargs: 其他参数，如source_tables、target_type等
			
 
				     
			
 
				     返回:
			
@@ -96,8 +128,8 @@ def execute_python_script(script_id, script_name, target_table, script_exec_mode
 
				     logger.info(f"script_id: {script_id}, 类型: {type(script_id)}")
			
 
				     logger.info(f"script_name: {script_name}, 类型: {type(script_name)}")
			
 
				     logger.info(f"target_table: {target_table}, 类型: {type(target_table)}")
			
 
				-    logger.info(f"script_exec_mode: {script_exec_mode}, 类型: {type(script_exec_mode)}")
			
 
				-    logger.info(f"frequency: {frequency}, 类型: {type(frequency)}")
			
 
				+    logger.info(f"update_mode: {update_mode}, 类型: {type(update_mode)}")
			
 
				+    logger.info(f"schedule_frequency: {schedule_frequency}, 类型: {type(schedule_frequency)}")
			
 
				     logger.info(f"【时间参数】execute_python_script: exec_date={exec_date}, logical_date={logical_date}, local_logical_date={local_logical_date}")
			
 
				 
			
 
				     # 记录额外参数
			
@@ -133,9 +165,9 @@ def execute_python_script(script_id, script_name, target_table, script_exec_mode
 
				             # 构建完整的参数字典
			
 
				             run_params = {
			
 
				                 "table_name": target_table,
			
 
				-                "execution_mode": script_exec_mode,
			
 
				+                "execution_mode": update_mode,
			
 
				                 "exec_date": exec_date,
			
 
				-                "frequency": frequency
			
 
				+                "schedule_frequency": schedule_frequency
			
 
				             }
			
 
				 
			
 
				             ## 添加可能的额外参数
			
@@ -180,7 +212,7 @@ def execute_python_script(script_id, script_name, target_table, script_exec_mode
 
				         return False
			
 
				 
			
 
				 # 使用execute_sql函数代替之前的execute_sql_script
			
 
				-def execute_sql(script_id, script_name, target_table, script_exec_mode, frequency, **kwargs):
			
 
				+def execute_sql(script_id, script_name, target_table, update_mode, schedule_frequency, **kwargs):
			
 
				     """
			
 
				     执行SQL脚本并返回执行结果
			
 
				     
			
@@ -188,8 +220,8 @@ def execute_sql(script_id, script_name, target_table, script_exec_mode, frequenc
 
				         script_id: 脚本ID
			
 
				         script_name: 脚本名称(数据库中的名称)
			
 
				         target_table: 目标表名
			
 
				-        script_exec_mode: 执行模式
			
 
				-        frequency: 执行频率
			
 
				+        update_mode: 执行模式
			
 
				+        schedule_frequency: 执行频率
			
 
				         **kwargs: 其他参数
			
 
				     
			
 
				     返回:
			
@@ -204,8 +236,8 @@ def execute_sql(script_id, script_name, target_table, script_exec_mode, frequenc
 
				     logger.info(f"script_id: {script_id}, 类型: {type(script_id)}")
			
 
				     logger.info(f"script_name: {script_name}, 类型: {type(script_name)}")
			
 
				     logger.info(f"target_table: {target_table}, 类型: {type(target_table)}")
			
 
				-    logger.info(f"script_exec_mode: {script_exec_mode}, 类型: {type(script_exec_mode)}")
			
 
				-    logger.info(f"frequency: {frequency}, 类型: {type(frequency)}")
			
 
				+    logger.info(f"update_mode: {update_mode}, 类型: {type(update_mode)}")
			
 
				+    logger.info(f"schedule_frequency: {schedule_frequency}, 类型: {type(schedule_frequency)}")
			
 
				     logger.info(f"【时间参数】execute_sql: exec_date={exec_date}, logical_date={logical_date}, local_logical_date={local_logical_date}")
			
 
				 
			
 
				     # 记录额外参数
			
@@ -249,9 +281,9 @@ def execute_sql(script_id, script_name, target_table, script_exec_mode, frequenc
 
				                 "target_table": target_table,
			
 
				                 "script_name": script_name,
			
 
				                 "exec_date": exec_date,
			
 
				-                "frequency": frequency,
			
 
				+                "schedule_frequency": schedule_frequency,
			
 
				                 "target_table_label": kwargs.get('target_table_label', ''), # 传递目标表标签，用于ETL幂等性判断
			
 
				-                "execution_mode": script_exec_mode  # 传递执行模式参数
			
 
				+                "update_mode": update_mode  # 传递执行模式参数
			
 
				             }
			
 
				 
			
 
				             # 添加可能的额外参数
			
@@ -297,7 +329,7 @@ def execute_sql(script_id, script_name, target_table, script_exec_mode, frequenc
 
				         return False
			
 
				 
			
 
				 # 使用execute_python函数代替之前的execute_python_script
			
 
				-def execute_python(script_id, script_name, target_table, script_exec_mode, frequency, **kwargs):
			
 
				+def execute_python(script_id, script_name, target_table, update_mode, schedule_frequency, **kwargs):
			
 
				     """
			
 
				     执行Python脚本并返回执行结果
			
 
				     
			
@@ -305,8 +337,8 @@ def execute_python(script_id, script_name, target_table, script_exec_mode, frequ
 
				         script_id: 脚本ID
			
 
				         script_name: 脚本名称(数据库中的名称)
			
 
				         target_table: 目标表名
			
 
				-        script_exec_mode: 执行模式
			
 
				-        frequency: 执行频率
			
 
				+        update_mode: 执行模式
			
 
				+        schedule_frequency: 执行频率
			
 
				         **kwargs: 其他参数
			
 
				     
			
 
				     返回:
			
@@ -321,8 +353,8 @@ def execute_python(script_id, script_name, target_table, script_exec_mode, frequ
 
				     logger.info(f"script_id: {script_id}, 类型: {type(script_id)}")
			
 
				     logger.info(f"script_name: {script_name}, 类型: {type(script_name)}")
			
 
				     logger.info(f"target_table: {target_table}, 类型: {type(target_table)}")
			
 
				-    logger.info(f"script_exec_mode: {script_exec_mode}, 类型: {type(script_exec_mode)}")
			
 
				-    logger.info(f"frequency: {frequency}, 类型: {type(frequency)}")
			
 
				+    logger.info(f"update_mode: {update_mode}, 类型: {type(update_mode)}")
			
 
				+    logger.info(f"schedule_frequency: {schedule_frequency}, 类型: {type(schedule_frequency)}")
			
 
				     logger.info(f"【时间参数】execute_python: exec_date={exec_date}, logical_date={logical_date}, local_logical_date={local_logical_date}")
			
 
				 
			
 
				     # 记录额外参数
			
@@ -366,9 +398,9 @@ def execute_python(script_id, script_name, target_table, script_exec_mode, frequ
 
				                 "target_table": target_table,
			
 
				                 "script_name": script_name,
			
 
				                 "exec_date": exec_date,
			
 
				-                "frequency": frequency,
			
 
				+                "schedule_frequency": schedule_frequency,
			
 
				                 "target_table_label": kwargs.get('target_table_label', ''), # 传递目标表标签
			
 
				-                "execution_mode": script_exec_mode  # 传递执行模式参数
			
 
				+                "update_mode": update_mode  # 传递执行模式参数
			
 
				             }
			
 
				 
			
 
				             # 添加可能的额外参数
			
@@ -538,53 +570,344 @@ def check_execution_plan(**kwargs):
 
				         logger.error("执行计划的scripts字段无效")
			
 
				         return False
			
 
				         
			
 
				-    if not isinstance(execution_plan.get("script_dependencies", {}), dict):
			
 
				-        logger.error("执行计划的script_dependencies字段无效")
			
 
				+    if not isinstance(execution_plan.get("resource_scripts", []), list):
			
 
				+        logger.error("执行计划的resource_scripts字段无效")
			
 
				+        return False
			
 
				+
			
 
				+    if not isinstance(execution_plan.get("model_scripts", []), list):
			
 
				+        logger.error("执行计划的model_scripts字段无效")
			
 
				         return False
			
 
				     
			
 
				     # 检查是否有脚本数据
			
 
				     scripts = execution_plan.get("scripts", [])
			
 
				+    resource_scripts = execution_plan.get("resource_scripts", [])
			
 
				+    model_scripts = execution_plan.get("model_scripts", [])
			
 
				     
			
 
				-    if not scripts:
			
 
				-        logger.warning("执行计划不包含任何脚本")
			
 
				-        # 如果没有脚本，则阻止下游任务执行
			
 
				-        return False
			
 
				-    
			
 
				-    logger.info(f"执行计划验证成功: 包含 {len(scripts)} 个脚本")
			
 
				+    logger.info(f"执行计划验证成功: 包含 {len(scripts)} 个脚本，{len(resource_scripts)} 个资源脚本和 {len(model_scripts)} 个模型脚本")
			
 
				     
			
 
				     # 保存执行计划到XCom以便下游任务使用
			
 
				     kwargs['ti'].xcom_push(key='execution_plan', value=execution_plan)
			
 
				     
			
 
				     return True
			
 
				 
			
 
				-def optimize_execution_order(scripts, script_dependencies):
			
 
				+def save_execution_plan_to_db(execution_plan, dag_id, run_id, logical_date, ds):
			
 
				     """
			
 
				-    使用NetworkX优化脚本执行顺序
			
 
				+    将执行计划保存到airflow_exec_plans表
			
 
				+    
			
 
				+    参数:
			
 
				+        execution_plan (dict): 执行计划字典
			
 
				+        dag_id (str): DAG的ID
			
 
				+        run_id (str): DAG运行的ID
			
 
				+        logical_date (datetime): 逻辑日期
			
 
				+        ds (str): 日期字符串，格式为YYYY-MM-DD
			
 
				+    
			
 
				+    返回:
			
 
				+        bool: 操作是否成功
			
 
				+    """
			
 
				+    try:
			
 
				+        conn = get_pg_conn()
			
 
				+        cursor = conn.cursor()
			
 
				+        
			
 
				+        try:
			
 
				+            # 将执行计划转换为JSON字符串
			
 
				+            plan_json = json.dumps(execution_plan)
			
 
				+            
			
 
				+            # 获取本地时间
			
 
				+            local_logical_date = pendulum.instance(logical_date).in_timezone('Asia/Shanghai')
			
 
				+            
			
 
				+            # 插入记录
			
 
				+            cursor.execute("""
			
 
				+                INSERT INTO airflow_exec_plans
			
 
				+                (dag_id, run_id, logical_date, local_logical_date, exec_date, plan)
			
 
				+                VALUES (%s, %s, %s, %s, %s, %s)
			
 
				+            """, (dag_id, run_id, logical_date, local_logical_date, ds, plan_json))
			
 
				+            
			
 
				+            conn.commit()
			
 
				+            logger.info(f"成功将执行计划保存到airflow_exec_plans表，dag_id={dag_id}, run_id={run_id}, exec_date={ds}")
			
 
				+            return True
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"保存执行计划到数据库时出错: {str(e)}")
			
 
				+            conn.rollback()
			
 
				+            raise Exception(f"PostgreSQL保存执行计划失败: {str(e)}")
			
 
				+        finally:
			
 
				+            cursor.close()
			
 
				+            conn.close()
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"连接PostgreSQL数据库失败: {str(e)}")
			
 
				+        raise Exception(f"无法连接PostgreSQL数据库: {str(e)}")
			
 
				+
			
 
				+def generate_task_id(script_name, source_tables, target_table):
			
 
				+    """
			
 
				+    根据脚本名和表名生成唯一任务ID
			
 
				+    
			
 
				+    参数:
			
 
				+        script_name (str): 脚本文件名
			
 
				+        source_tables (list): 源表列表
			
 
				+        target_table (str): 目标表名
			
 
				+        
			
 
				+    返回:
			
 
				+        str: 唯一的任务ID
			
 
				+    """
			
 
				+    # 移除脚本名的文件扩展名
			
 
				+    script_base = os.path.splitext(script_name)[0]
			
 
				+    
			
 
				+    # 对于特殊脚本如load_file.py，直接使用目标表名
			
 
				+    if script_name.lower() in ['load_file.py']:
			
 
				+        return f"{script_base}_{target_table}"
			
 
				+    
			
 
				+    # 处理源表部分
			
 
				+    if source_tables:
			
 
				+        # 将所有源表按字母顺序排序并连接
			
 
				+        source_part = "_".join(sorted(source_tables))
			
 
				+        # 生成任务ID: 脚本名_源表_to_目标表
			
 
				+        return f"{script_base}_{source_part}_to_{target_table}"
			
 
				+    else:
			
 
				+        # 没有源表时，只使用脚本名和目标表
			
 
				+        return f"{script_base}_{target_table}"
			
 
				+
			
 
				+def prepare_scripts_from_tables(tables_info):
			
 
				+    """
			
 
				+    将表信息转换为脚本信息
			
 
				+    
			
 
				+    参数:
			
 
				+        tables_info (list): 表信息列表
			
 
				+        
			
 
				+    返回:
			
 
				+        list: 脚本信息列表
			
 
				+    """
			
 
				+    scripts = []
			
 
				+    
			
 
				+    for table in tables_info:
			
 
				+        target_table = table['target_table']
			
 
				+        target_table_label = table.get('target_table_label')
			
 
				+        schedule_frequency = table.get('schedule_frequency')
			
 
				+        
			
 
				+        # 处理表的脚本信息
			
 
				+        if 'scripts_info' in table and table['scripts_info']:
			
 
				+            # 表有多个脚本
			
 
				+            for script_name, script_info in table['scripts_info'].items():
			
 
				+                source_tables = script_info.get('sources', [])
			
 
				+                script_type = script_info.get('script_type', 'python')
			
 
				+                update_mode = script_info.get('script_exec_mode', 'append')
			
 
				+                
			
 
				+                # 生成任务ID
			
 
				+                task_id = generate_task_id(script_name, source_tables, target_table)
			
 
				+                
			
 
				+                # 创建脚本信息
			
 
				+                script = {
			
 
				+                    "script_id": task_id,
			
 
				+                    "script_name": script_name,
			
 
				+                    "source_tables": source_tables,
			
 
				+                    "target_table": target_table,
			
 
				+                    "target_table_label": target_table_label,
			
 
				+                    "script_type": script_type,
			
 
				+                    "update_mode": update_mode,
			
 
				+                    "schedule_frequency": schedule_frequency,
			
 
				+                    "task_id": task_id
			
 
				+                }
			
 
				+                
			
 
				+                # 为structure类型添加特殊属性
			
 
				+                if table.get('target_type') == "structure":
			
 
				+                    script["target_type"] = "structure"
			
 
				+                    script["storage_location"] = table.get('storage_location')
			
 
				+                
			
 
				+                scripts.append(script)
			
 
				+                logger.info(f"为表 {target_table} 创建脚本 {script_name}，任务ID: {task_id}")
			
 
				+        else:
			
 
				+            # 表只有单个脚本或没有明确指定脚本信息
			
 
				+            script_name = table.get('script_name')
			
 
				+            
			
 
				+            # 如果没有script_name，使用默认值
			
 
				+            if not script_name:
			
 
				+                script_name = f"{target_table}_script.py"
			
 
				+                logger.warning(f"表 {target_table} 没有指定脚本名，使用默认值: {script_name}")
			
 
				+            
			
 
				+            source_tables = table.get('source_tables', [])
			
 
				+            script_type = table.get('script_type', 'python')
			
 
				+            update_mode = table.get('update_mode', 'append')
			
 
				+            
			
 
				+            # 生成任务ID
			
 
				+            task_id = generate_task_id(script_name, source_tables, target_table)
			
 
				+            
			
 
				+            # 创建脚本信息
			
 
				+            script = {
			
 
				+                "script_id": task_id,
			
 
				+                "script_name": script_name,
			
 
				+                "source_tables": source_tables,
			
 
				+                "target_table": target_table,
			
 
				+                "target_table_label": target_table_label,
			
 
				+                "script_type": script_type,
			
 
				+                "update_mode": update_mode,
			
 
				+                "schedule_frequency": schedule_frequency,
			
 
				+                "task_id": task_id
			
 
				+            }
			
 
				+            
			
 
				+            # 为structure类型添加特殊属性
			
 
				+            if table.get('target_type') == "structure":
			
 
				+                script["target_type"] = "structure"
			
 
				+                script["storage_location"] = table.get('storage_location')
			
 
				+            
			
 
				+            scripts.append(script)
			
 
				+            logger.info(f"为表 {target_table} 创建脚本 {script_name}，任务ID: {task_id}")
			
 
				+    
			
 
				+    return scripts
			
 
				+
			
 
				+def build_script_dependency_graph(scripts):
			
 
				+    """
			
 
				+    处理脚本间的依赖关系
			
 
				     
			
 
				     参数:
			
 
				         scripts (list): 脚本信息列表
			
 
				-        script_dependencies (dict): 脚本依赖关系字典
			
 
				         
			
 
				     返回:
			
 
				-        list: 优化后的脚本执行顺序（脚本ID列表）
			
 
				+        tuple: (依赖关系字典, 图对象)
			
 
				     """
			
 
				-    logger.info("开始使用NetworkX优化脚本执行顺序")
			
 
				+    # 打印所有脚本的源表信息，用于调试
			
 
				+    logger.info("构建脚本依赖图，当前脚本信息:")
			
 
				+    for script in scripts:
			
 
				+        script_id = script['script_id']
			
 
				+        script_name = script['script_name']
			
 
				+        target_table = script['target_table']
			
 
				+        source_tables = script['source_tables']
			
 
				+        logger.info(f"脚本: {script_id} ({script_name}), 目标表: {target_table}, 源表: {source_tables}")
			
 
				+    
			
 
				+    # 创建目标表到脚本ID的映射
			
 
				+    table_to_scripts = {}
			
 
				+    for script in scripts:
			
 
				+        target_table = script['target_table']
			
 
				+        if target_table not in table_to_scripts:
			
 
				+            table_to_scripts[target_table] = []
			
 
				+        table_to_scripts[target_table].append(script['script_id'])
			
 
				+    
			
 
				+    # 记录表到脚本的映射关系
			
 
				+    logger.info("表到脚本的映射关系:")
			
 
				+    for table, script_ids in table_to_scripts.items():
			
 
				+        logger.info(f"表 {table} 由脚本 {script_ids} 生成")
			
 
				+    
			
 
				+    # 创建脚本依赖关系
			
 
				+    script_dependencies = {}
			
 
				+    for script in scripts:
			
 
				+        script_id = script['script_id']
			
 
				+        source_tables = script['source_tables']
			
 
				+        target_table = script['target_table']
			
 
				+        
			
 
				+        # 初始化依赖列表
			
 
				+        script_dependencies[script_id] = []
			
 
				+        
			
 
				+        # 查找源表对应的脚本
			
 
				+        if source_tables:
			
 
				+            logger.info(f"处理脚本 {script_id} 的依赖关系，源表: {source_tables}")
			
 
				+            for source_table in source_tables:
			
 
				+                if source_table in table_to_scripts:
			
 
				+                    # 添加所有生成源表的脚本作为依赖
			
 
				+                    for source_script_id in table_to_scripts[source_table]:
			
 
				+                        if source_script_id != script_id:  # 避免自我依赖
			
 
				+                            script_dependencies[script_id].append(source_script_id)
			
 
				+                            logger.info(f"添加依赖: {script_id} 依赖于 {source_script_id} (表 {target_table} 依赖于表 {source_table})")
			
 
				+                else:
			
 
				+                    logger.warning(f"源表 {source_table} 没有对应的脚本，无法为脚本 {script_id} 创建依赖")
			
 
				+        else:
			
 
				+            logger.info(f"脚本 {script_id} 没有源表依赖")
			
 
				+    
			
 
				+    # 尝试从Neo4j额外查询依赖关系（如果脚本没有显式的source_tables）
			
 
				+    try:
			
 
				+        driver = get_neo4j_driver()
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"连接Neo4j数据库失败: {str(e)}")
			
 
				+        raise Exception(f"无法连接Neo4j数据库: {str(e)}")
			
 
				+    
			
 
				+    try:
			
 
				+        with driver.session() as session:
			
 
				+            # 验证连接
			
 
				+            try:
			
 
				+                test_result = session.run("RETURN 1 as test")
			
 
				+                test_record = test_result.single()
			
 
				+                if not test_record or test_record.get("test") != 1:
			
 
				+                    logger.error("Neo4j连接测试失败")
			
 
				+                    raise Exception("Neo4j连接测试失败")
			
 
				+            except Exception as e:
			
 
				+                logger.error(f"Neo4j连接测试失败: {str(e)}")
			
 
				+                raise Exception(f"Neo4j连接测试失败: {str(e)}")
			
 
				+                
			
 
				+            for script in scripts:
			
 
				+                script_id = script['script_id']
			
 
				+                target_table = script['target_table']
			
 
				+                
			
 
				+                # 只处理没有源表的脚本
			
 
				+                if not script['source_tables'] and not script_dependencies[script_id]:
			
 
				+                    logger.info(f"脚本 {script_id} 没有源表，尝试从Neo4j直接查询表 {target_table} 的依赖")
			
 
				+                    
			
 
				+                    # 查询表的直接依赖
			
 
				+                    query = """
			
 
				+                        MATCH (target {en_name: $table_name})-[rel]->(dep)
			
 
				+                        RETURN dep.en_name AS dep_name
			
 
				+                    """
			
 
				+                    
			
 
				+                    try:
			
 
				+                        result = session.run(query, table_name=target_table)
			
 
				+                        records = list(result)
			
 
				+                        
			
 
				+                        for record in records:
			
 
				+                            dep_name = record.get("dep_name")
			
 
				+                            if dep_name and dep_name in table_to_scripts:
			
 
				+                                for dep_script_id in table_to_scripts[dep_name]:
			
 
				+                                    if dep_script_id != script_id:  # 避免自我依赖
			
 
				+                                        script_dependencies[script_id].append(dep_script_id)
			
 
				+                                        logger.info(f"从Neo4j添加额外依赖: {script_id} 依赖于 {dep_script_id} (表 {target_table} 依赖于表 {dep_name})")
			
 
				+                    except Exception as e:
			
 
				+                        logger.warning(f"从Neo4j查询表 {target_table} 依赖时出错: {str(e)}")
			
 
				+                        raise Exception(f"Neo4j查询表依赖失败: {str(e)}")
			
 
				+    except Exception as e:
			
 
				+        if "Neo4j" in str(e):
			
 
				+            # 已经处理过的错误，直接抛出
			
 
				+            raise
			
 
				+        else:
			
 
				+            logger.error(f"访问Neo4j获取额外依赖时出错: {str(e)}")
			
 
				+            raise Exception(f"Neo4j依赖查询失败: {str(e)}")
			
 
				+    finally:
			
 
				+        driver.close()
			
 
				     
			
 
				     # 构建依赖图
			
 
				     G = nx.DiGraph()
			
 
				     
			
 
				     # 添加所有脚本作为节点
			
 
				     for script in scripts:
			
 
				-        script_id = script['script_id']
			
 
				-        G.add_node(script_id)
			
 
				+        G.add_node(script['script_id'])
			
 
				     
			
 
				     # 添加依赖边
			
 
				     for script_id, dependencies in script_dependencies.items():
			
 
				-        for dep_id in dependencies:
			
 
				-            # 添加从script_id到dep_id的边，表示script_id依赖于dep_id
			
 
				-            G.add_edge(script_id, dep_id)
			
 
				-            logger.debug(f"添加依赖边: {script_id} -> {dep_id}")
			
 
				+        if dependencies:
			
 
				+            for dep_id in dependencies:
			
 
				+                # 添加从script_id到dep_id的边，表示script_id依赖于dep_id
			
 
				+                G.add_edge(script_id, dep_id)
			
 
				+                logger.debug(f"添加依赖边: {script_id} -> {dep_id}")
			
 
				+        else:
			
 
				+            logger.info(f"脚本 {script_id} 没有依赖的上游脚本")
			
 
				+    
			
 
				+    # 确保所有脚本ID都在依赖关系字典中
			
 
				+    for script in scripts:
			
 
				+        script_id = script['script_id']
			
 
				+        if script_id not in script_dependencies:
			
 
				+            script_dependencies[script_id] = []
			
 
				     
			
 
				+    # 记录每个脚本的依赖数量
			
 
				+    for script_id, deps in script_dependencies.items():
			
 
				+        logger.info(f"脚本 {script_id} 有 {len(deps)} 个依赖: {deps}")
			
 
				+    
			
 
				+    return script_dependencies, G
			
 
				+
			
 
				+def optimize_script_execution_order(scripts, script_dependencies, G):
			
 
				+    """
			
 
				+    使用NetworkX优化脚本执行顺序
			
 
				+    
			
 
				+    参数:
			
 
				+        scripts (list): 脚本信息列表
			
 
				+        script_dependencies (dict): 脚本依赖关系字典
			
 
				+        G (nx.DiGraph): 依赖图对象
			
 
				+        
			
 
				+    返回:
			
 
				+        list: 优化后的脚本执行顺序（脚本ID列表）
			
 
				+    """
			
 
				     # 检查是否有循环依赖
			
 
				     try:
			
 
				         cycles = list(nx.simple_cycles(G))
			
@@ -608,12 +931,13 @@ def optimize_execution_order(scripts, script_dependencies):
 
				         # 反转结果，使上游任务先执行
			
 
				         execution_order.reverse()
			
 
				         
			
 
				-        logger.info(f"NetworkX优化后的脚本执行顺序: {execution_order}")
			
 
				+        logger.info(f"生成优化的脚本执行顺序: {execution_order}")
			
 
				         return execution_order
			
 
				     except Exception as e:
			
 
				         logger.error(f"生成脚本执行顺序时出错: {str(e)}")
			
 
				         # 出错时返回原始脚本ID列表，不进行优化
			
 
				-        return [script['script_id'] for script in scripts]
			
 
				+        return [script['script_id'] for script in scripts] 
			
 
				+    
			
 
				 
			
 
				 def create_execution_plan(**kwargs):
			
 
				     """
			
@@ -652,7 +976,7 @@ def create_execution_plan(**kwargs):
 
				         # 如果执行计划中没有execution_order或为空，使用NetworkX优化
			
 
				         if not execution_order:
			
 
				             logger.info("执行计划中没有execution_order，使用NetworkX进行优化")
			
 
				-            execution_order = optimize_execution_order(scripts, script_dependencies)
			
 
				+            execution_order = optimize_script_execution_order(scripts, script_dependencies)
			
 
				             execution_plan["execution_order"] = execution_order
			
 
				         
			
 
				         # 保存完整的执行计划到XCom
			
@@ -740,7 +1064,7 @@ with DAG(
 
				             # 如果执行计划中没有execution_order或为空，使用NetworkX优化
			
 
				             if not execution_order:
			
 
				                 logger.info("执行计划中没有execution_order，使用NetworkX进行优化")
			
 
				-                execution_order = optimize_execution_order(scripts, script_dependencies)
			
 
				+                execution_order = optimize_script_execution_order(scripts, script_dependencies, nx.DiGraph())
			
 
				             
			
 
				             logger.info(f"执行计划: exec_date={exec_date}, scripts数量={len(scripts)}")
			
 
				             
			
@@ -769,7 +1093,7 @@ with DAG(
 
				                 script_name = script.get("script_name")
			
 
				                 target_table = script.get("target_table")
			
 
				                 script_type = script.get("script_type", "python")
			
 
				-                script_exec_mode = script.get("script_exec_mode", "append")
			
 
				+                update_mode = script.get("update_mode", "append")
			
 
				                 source_tables = script.get("source_tables", [])
			
 
				                 target_table_label = script.get("target_table_label", "")
			
 
				                 
			
@@ -792,9 +1116,9 @@ with DAG(
 
				                     "script_id": script_id,
			
 
				                     "script_name": script_name,
			
 
				                     "target_table": target_table,
			
 
				-                    "script_exec_mode": script_exec_mode,
			
 
				+                    "update_mode": update_mode,
			
 
				                     "source_tables": source_tables,
			
 
				-                    "frequency": script.get("frequency", "daily"),  # 显式添加frequency参数
			
 
				+                    "schedule_frequency": script.get("schedule_frequency", "daily"),
			
 
				                     "target_table_label": target_table_label,
			
 
				                     # logical_date会在任务执行时由Airflow自动添加
			
 
				                 }
			
@@ -897,4 +1221,4 @@ with DAG(
 
				     # 设置依赖关系，确保执行阶段完成后触发finalize DAG
			
 
				     execution_group >> trigger_finalize_dag
			
 
				 
			
 
				-logger.info(f"DAG dataops_productline_execute_dag 定义完成") 
			
 
				+logger.info(f"DAG dataops_productline_execute_dag 定义完成")
			
--- a/dags/dataops_productline_manual_trigger_dag.py
+++ b/dags/dataops_productline_manual_trigger_dag.py
@@ -66,7 +66,7 @@ from config import NEO4J_CONFIG, SCRIPTS_BASE_PATH, PG_CONFIG
 
				 import traceback
			
 
				 import pendulum
			
 
				 import pytz
			
 
				-from utils import get_pg_conn, get_cn_exec_date, check_script_exists, get_complete_script_info
			
 
				+from utils import get_pg_conn, get_cn_exec_date, check_script_exists, get_complete_script_info, get_table_label
			
 
				 from airflow.exceptions import AirflowException
			
 
				 
			
 
				 # 设置logger
			
@@ -118,35 +118,6 @@ def get_dag_params(**context):
 
				     logger.info(f"最终使用的参数 - 脚本名称: {script_name}, 目标表: {target_table}, 依赖级别: {dependency_level}, 执行日期: {exec_date}")
			
 
				     return script_name, target_table, dependency_level, exec_date, logical_date
			
 
				 
			
 
				-def get_table_label(table_name):
			
 
				-    """确定表的标签类型（DataModel or DataResource）"""
			
 
				-    driver = GraphDatabase.driver(
			
 
				-        NEO4J_CONFIG['uri'], 
			
 
				-        auth=(NEO4J_CONFIG['user'], NEO4J_CONFIG['password'])
			
 
				-    )
			
 
				-    query = """
			
 
				-        MATCH (n {en_name: $table_name})
			
 
				-        RETURN labels(n) AS labels
			
 
				-    """
			
 
				-    try:
			
 
				-        with driver.session() as session:
			
 
				-            result = session.run(query, table_name=table_name)
			
 
				-            record = result.single()
			
 
				-            if record and record.get("labels"):
			
 
				-                labels = record.get("labels")
			
 
				-                if "DataModel" in labels:
			
 
				-                    return "DataModel"
			
 
				-                elif "DataResource" in labels:
			
 
				-                    return "DataResource"
			
 
				-                elif "DataSource" in labels:
			
 
				-                    return "DataSource"
			
 
				-            return None
			
 
				-    except Exception as e:
			
 
				-        logger.error(f"获取表 {table_name} 的标签时出错: {str(e)}")
			
 
				-        return None
			
 
				-    finally:
			
 
				-        driver.close()
			
 
				-
			
 
				 def find_target_table_for_script(script_name):
			
 
				     """
			
 
				     根据脚本名称查找对应的目标表
			
--- a/dags/dataops_productline_prepare_dag.py
+++ b/dags/dataops_productline_prepare_dag.py
@@ -15,7 +15,8 @@ import hashlib
 
				 import pendulum
			
 
				 from utils import (
			
 
				     get_pg_conn, 
			
 
				-    get_neo4j_driver
			
 
				+    get_neo4j_driver,
			
 
				+    get_cn_exec_date
			
 
				 )
			
 
				 from config import PG_CONFIG, NEO4J_CONFIG, DATAOPS_DAGS_PATH
			
 
				 
			
@@ -23,58 +24,135 @@ from config import PG_CONFIG, NEO4J_CONFIG, DATAOPS_DAGS_PATH
 
				 logger = logging.getLogger(__name__)
			
 
				 
			
 
				 def get_enabled_tables():
			
 
				-    """获取所有启用的表"""
			
 
				+    """获取所有启用调度的表"""
			
 
				     try:
			
 
				-        conn = get_pg_conn()
			
 
				-        cursor = conn.cursor()
			
 
				-        try:
			
 
				-            cursor.execute("""
			
 
				-                SELECT owner_id, table_name 
			
 
				-                FROM schedule_status 
			
 
				-                WHERE schedule_is_enabled = TRUE
			
 
				-            """)
			
 
				-            result = cursor.fetchall()
			
 
				-            return [row[1] for row in result]  # 只返回表名
			
 
				-        except Exception as e:
			
 
				-            logger.error(f"获取启用表失败: {str(e)}")
			
 
				-            raise Exception(f"PostgreSQL数据库查询失败: {str(e)}")
			
 
				-        finally:
			
 
				-            cursor.close()
			
 
				-            conn.close()
			
 
				+        # 使用Neo4j查询所有schedule_status为true的关系
			
 
				+        driver = get_neo4j_driver()
			
 
				+        
			
 
				+        with driver.session() as session:
			
 
				+            # 查询DataModel表中有schedule_status为true的关系
			
 
				+            query_datamodel = """
			
 
				+                MATCH (target:DataModel)-[rel:DERIVED_FROM]->()
			
 
				+                WHERE rel.schedule_status = true
			
 
				+                RETURN target.en_name AS table_name
			
 
				+            """
			
 
				+            
			
 
				+            # 查询DataResource表中有schedule_status为true的关系
			
 
				+            query_dataresource = """
			
 
				+                MATCH (target:DataResource)-[rel:ORIGINATES_FROM]->()
			
 
				+                WHERE rel.schedule_status = true
			
 
				+                RETURN target.en_name AS table_name
			
 
				+            """
			
 
				+            
			
 
				+            # 查询structure类型的DataResource表中有schedule_status为true的节点
			
 
				+            query_structure = """
			
 
				+                MATCH (target:DataResource)
			
 
				+                WHERE target.type = 'structure' AND target.schedule_status = true
			
 
				+                RETURN target.en_name AS table_name
			
 
				+            """
			
 
				+            
			
 
				+            try:
			
 
				+                # 获取结果
			
 
				+                result_datamodel = session.run(query_datamodel)
			
 
				+                result_dataresource = session.run(query_dataresource)
			
 
				+                result_structure = session.run(query_structure)
			
 
				+                
			
 
				+                # 合并结果
			
 
				+                tables = []
			
 
				+                for result in [result_datamodel, result_dataresource, result_structure]:
			
 
				+                    for record in result:
			
 
				+                        table_name = record.get("table_name")
			
 
				+                        if table_name and table_name not in tables:
			
 
				+                            tables.append(table_name)
			
 
				+                
			
 
				+                logger.info(f"从Neo4j找到 {len(tables)} 个启用的表: {tables}")
			
 
				+                return tables
			
 
				+                
			
 
				+            except Exception as e:
			
 
				+                logger.error(f"Neo4j查询启用的表失败: {str(e)}")
			
 
				+                raise Exception(f"Neo4j查询启用的表失败: {str(e)}")
			
 
				     except Exception as e:
			
 
				-        logger.error(f"连接PostgreSQL数据库失败: {str(e)}")
			
 
				-        raise Exception(f"无法连接PostgreSQL数据库: {str(e)}")
			
 
				+        logger.error(f"连接Neo4j数据库失败: {str(e)}")
			
 
				+        raise Exception(f"无法连接Neo4j数据库: {str(e)}")
			
 
				+    
			
 
				 
			
 
				 def check_table_directly_subscribed(table_name):
			
 
				-    """检查表是否在schedule_status表中直接调度"""
			
 
				+    """检查表是否在节点关系中有schedule_status为True的脚本，若有则直接调度"""
			
 
				     try:
			
 
				-        conn = get_pg_conn()
			
 
				-        cursor = conn.cursor()
			
 
				-        try:
			
 
				-            cursor.execute("""
			
 
				-                SELECT schedule_is_enabled
			
 
				-                FROM schedule_status 
			
 
				-                WHERE table_name = %s
			
 
				-            """, (table_name,))
			
 
				-            result = cursor.fetchone()
			
 
				-            return result and result[0] is True
			
 
				-        except Exception as e:
			
 
				-            logger.error(f"检查表订阅状态失败: {str(e)}")
			
 
				-            raise Exception(f"PostgreSQL查询表订阅状态失败: {str(e)}")
			
 
				-        finally:
			
 
				-            cursor.close()
			
 
				-            conn.close()
			
 
				+        driver = get_neo4j_driver()
			
 
				     except Exception as e:
			
 
				-        logger.error(f"连接PostgreSQL数据库失败: {str(e)}")
			
 
				-        raise Exception(f"无法连接PostgreSQL数据库: {str(e)}")
			
 
				+        logger.error(f"连接Neo4j数据库失败: {str(e)}")
			
 
				+        raise Exception(f"无法连接Neo4j数据库: {str(e)}")
			
 
				+    
			
 
				+    try:
			
 
				+        with driver.session() as session:
			
 
				+            # 查询是否有直接调度的脚本
			
 
				+            query_datamodel = """
			
 
				+                MATCH (target:DataModel {en_name: $table_name})-[rel:DERIVED_FROM]->(source)
			
 
				+                WHERE rel.schedule_status = true
			
 
				+                RETURN count(rel) > 0 AS directly_subscribed
			
 
				+            """
			
 
				+            
			
 
				+            query_dataresource = """
			
 
				+                MATCH (target:DataResource {en_name: $table_name})-[rel:ORIGINATES_FROM]->(source)
			
 
				+                WHERE rel.schedule_status = true
			
 
				+                RETURN count(rel) > 0 AS directly_subscribed
			
 
				+            """
			
 
				+            
			
 
				+            # 获取类型
			
 
				+            labels_query = """
			
 
				+                MATCH (n {en_name: $table_name})
			
 
				+                RETURN labels(n) AS labels
			
 
				+            """
			
 
				+            
			
 
				+            result = session.run(labels_query, table_name=table_name)
			
 
				+            record = result.single()
			
 
				+            
			
 
				+            if not record:
			
 
				+                logger.warning(f"在Neo4j中未找到表 {table_name} 的标签信息")
			
 
				+                return False
			
 
				+                
			
 
				+            labels = record.get("labels", [])
			
 
				+            
			
 
				+            # 根据不同标签类型执行不同查询
			
 
				+            if "DataModel" in labels:
			
 
				+                result = session.run(query_datamodel, table_name=table_name)
			
 
				+            elif "DataResource" in labels:
			
 
				+                # 检查是否是structure类型
			
 
				+                structure_query = """
			
 
				+                    MATCH (n:DataResource {en_name: $table_name})
			
 
				+                    RETURN n.type AS type, n.schedule_status AS schedule_status
			
 
				+                """
			
 
				+                result = session.run(structure_query, table_name=table_name)
			
 
				+                record = result.single()
			
 
				+                
			
 
				+                if record and record.get("type") == "structure":
			
 
				+                    # structure类型，从节点获取schedule_status
			
 
				+                    return record.get("schedule_status", False)
			
 
				+                
			
 
				+                # 非structure类型，继续查询关系
			
 
				+                result = session.run(query_dataresource, table_name=table_name)
			
 
				+            else:
			
 
				+                logger.warning(f"表 {table_name} 不是DataModel或DataResource类型")
			
 
				+                return False
			
 
				+            
			
 
				+            record = result.single()
			
 
				+            return record and record.get("directly_subscribed", False)
			
 
				+            
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"检查表订阅状态失败: {str(e)}")
			
 
				+        raise Exception(f"Neo4j查询表订阅状态失败: {str(e)}")
			
 
				+    finally:
			
 
				+        driver.close()
			
 
				+
			
 
				 
			
 
				-def should_execute_today(table_name, frequency, exec_date):
			
 
				+def should_execute_today(table_name, schedule_frequency, exec_date):
			
 
				     """
			
 
				     判断指定频率的表在给定执行日期是否应该执行
			
 
				     
			
 
				     参数:
			
 
				         table_name (str): 表名，用于日志记录
			
 
				-        frequency (str): 调度频率，如'daily'、'weekly'、'monthly'、'yearly'，为None时默认为'daily'
			
 
				+        schedule_frequency (str): 调度频率，如'daily'、'weekly'、'monthly'，为None时默认为'daily'
			
 
				         exec_date (str): 执行日期，格式为'YYYY-MM-DD'
			
 
				     
			
 
				     返回:
			
@@ -91,40 +169,41 @@ def should_execute_today(table_name, frequency, exec_date):
 
				     next_date = exec_date_obj.add(days=1)
			
 
				     
			
 
				     # 如果频率为None或空字符串，默认为daily
			
 
				-    if not frequency:
			
 
				+    if not schedule_frequency:
			
 
				         logger.info(f"表 {table_name} 未指定调度频率，默认为daily")
			
 
				         return True
			
 
				     
			
 
				-    frequency = frequency.lower() if isinstance(frequency, str) else 'daily'
			
 
				+    schedule_frequency = schedule_frequency.lower() if isinstance(schedule_frequency, str) else 'daily'
			
 
				     
			
 
				-    if frequency == 'daily':
			
 
				+    if schedule_frequency == 'daily':
			
 
				         # 日任务每天都执行
			
 
				         return True
			
 
				-    elif frequency == 'weekly':
			
 
				+    elif schedule_frequency == 'weekly':
			
 
				         # 周任务只在周日执行（因为exec_date+1是周一时才执行）
			
 
				         is_sunday = next_date.day_of_week == 1  # 1表示周一
			
 
				         logger.info(f"表 {table_name} 是weekly任务，exec_date={exec_date}，next_date={next_date.to_date_string()}，是否周日: {is_sunday}")
			
 
				         return is_sunday
			
 
				-    elif frequency == 'monthly':
			
 
				+    elif schedule_frequency == 'monthly':
			
 
				         # 月任务只在每月最后一天执行（因为exec_date+1是月初时才执行）
			
 
				         is_month_end = next_date.day == 1
			
 
				         logger.info(f"表 {table_name} 是monthly任务，exec_date={exec_date}，next_date={next_date.to_date_string()}，是否月末: {is_month_end}")
			
 
				         return is_month_end
			
 
				-    elif frequency == 'quarterly':
			
 
				+    elif schedule_frequency == 'quarterly':
			
 
				         # 季度任务只在每季度最后一天执行（因为exec_date+1是季度初时才执行）
			
 
				         is_quarter_end = next_date.day == 1 and next_date.month in [1, 4, 7, 10]
			
 
				         logger.info(f"表 {table_name} 是quarterly任务，exec_date={exec_date}，next_date={next_date.to_date_string()}，是否季末: {is_quarter_end}")
			
 
				         return is_quarter_end
			
 
				-    elif frequency == 'yearly':
			
 
				+    elif schedule_frequency == 'yearly':
			
 
				         # 年任务只在每年最后一天执行（因为exec_date+1是年初时才执行）
			
 
				         is_year_end = next_date.day == 1 and next_date.month == 1
			
 
				         logger.info(f"表 {table_name} 是yearly任务，exec_date={exec_date}，next_date={next_date.to_date_string()}，是否年末: {is_year_end}")
			
 
				         return is_year_end
			
 
				     else:
			
 
				         # 未知频率，默认执行
			
 
				-        logger.warning(f"表 {table_name} 使用未知的调度频率: {frequency}，默认执行")
			
 
				+        logger.warning(f"表 {table_name} 使用未知的调度频率: {schedule_frequency}，默认执行")
			
 
				         return True
			
 
				 
			
 
				+
			
 
				 def get_table_info_from_neo4j(table_name):
			
 
				     """从Neo4j获取表的详细信息，保留完整的scripts_info并确保正确获取源表依赖"""
			
 
				     try:
			
@@ -157,8 +236,9 @@ def get_table_info_from_neo4j(table_name):
 
				             # 查询表标签和状态
			
 
				             query_table = """
			
 
				                 MATCH (t {en_name: $table_name})
			
 
				-                RETURN labels(t) AS labels, t.status AS status, t.frequency AS frequency,
			
 
				-                       t.type AS type, t.storage_location AS storage_location
			
 
				+                RETURN labels(t) AS labels, t.status AS status,
			
 
				+                       t.type AS type, t.storage_location AS storage_location,
			
 
				+                       t.update_mode as update_mode
			
 
				             """
			
 
				             try:
			
 
				                 result = session.run(query_table, table_name=table_name)
			
@@ -171,7 +251,6 @@ def get_table_info_from_neo4j(table_name):
 
				                 labels = record.get("labels", [])
			
 
				                 table_info['target_table_label'] = [label for label in labels if label in ["DataResource", "DataModel", "DataSource"]][0] if labels else None
			
 
				                 table_info['target_table_status'] = record.get("status", True)  # 默认为True
			
 
				-                table_info['frequency'] = record.get("frequency")
			
 
				                 table_info['target_type'] = record.get("type")  # 获取type属性
			
 
				                 table_info['storage_location'] = record.get("storage_location")  # 获取storage_location属性
			
 
				                 
			
@@ -182,18 +261,23 @@ def get_table_info_from_neo4j(table_name):
 
				                         # 对于structure类型，设置默认值，不查询关系
			
 
				                         table_info['source_tables'] = []  # 使用空数组表示无源表
			
 
				                         table_info['script_name'] = "load_file.py"
			
 
				-                        table_info['script_type'] = "python"
			
 
				+                        table_info['script_type'] = "python_script"
			
 
				                         
			
 
				-                        # csv类型的DataResource没有上游，使用默认的append模式
			
 
				-                        table_info['script_exec_mode'] = "append"
			
 
				-                        logger.info(f"表 {table_name} 为structure类型，使用默认执行模式: append")
			
 
				+                        # 从节点属性中获取update_mode，如果不存在则使用默认值
			
 
				+                        table_info['script_update_mode'] = record.get("update_mode", "append")
			
 
				+                        table_info['schedule_frequency'] = record.get("schedule_frequency", "daily")
			
 
				+                        table_info['schedule_status'] = record.get("schedule_status", True)
			
 
				+                        
			
 
				+                        logger.info(f"表 {table_name} 为structure类型，使用执行模式: {table_info['script_update_mode']}")
			
 
				 
			
 
				                         # 添加脚本信息
			
 
				                         table_info['scripts_info'] = {
			
 
				                             "load_file.py": {
			
 
				                                 "sources": [],
			
 
				-                                "script_type": "python",
			
 
				-                                "script_exec_mode": "append"
			
 
				+                                "script_type": "python_script",
			
 
				+                                "script_update_mode": table_info['script_update_mode'],
			
 
				+                                "schedule_frequency": table_info['schedule_frequency'],
			
 
				+                                "schedule_status": table_info['schedule_status']
			
 
				                             }
			
 
				                         }
			
 
				 
			
@@ -202,32 +286,29 @@ def get_table_info_from_neo4j(table_name):
 
				                         # 查询源表关系和脚本信息
			
 
				                         query_rel = """
			
 
				                             MATCH (target {en_name: $table_name})-[rel:ORIGINATES_FROM]->(source)
			
 
				-                            WITH source, rel, 
			
 
				-                                 CASE WHEN rel.script_name IS NULL THEN target.en_name + '_script.py' ELSE rel.script_name END AS script_name,
			
 
				-                                 CASE WHEN rel.script_type IS NULL THEN 'python' ELSE rel.script_type END AS script_type
			
 
				-                            RETURN source.en_name AS source_table, script_name AS script_name,
			
 
				-                                   script_type AS script_type, 'append' AS script_exec_mode
			
 
				+                            RETURN source.en_name AS source_table, rel.script_name AS script_name,
			
 
				+                                  rel.script_type AS script_type, rel.update_mode AS script_update_mode,
			
 
				+                                  rel.schedule_frequency AS schedule_frequency, 
			
 
				+                                  rel.schedule_status AS schedule_status
			
 
				                         """
			
 
				                 elif "DataModel" in labels:
			
 
				                     # 查询源表关系和脚本信息
			
 
				                     query_rel = """
			
 
				                         MATCH (target {en_name: $table_name})-[rel:DERIVED_FROM]->(source)
			
 
				-                        WITH source, rel, 
			
 
				-                             CASE WHEN rel.script_name IS NULL THEN target.en_name + '_script.py' ELSE rel.script_name END AS script_name,
			
 
				-                             CASE WHEN rel.script_type IS NULL THEN 'python' ELSE rel.script_type END AS script_type
			
 
				-                        RETURN source.en_name AS source_table, script_name AS script_name,
			
 
				-                               script_type AS script_type, 'append' AS script_exec_mode
			
 
				+                        RETURN source.en_name AS source_table, rel.script_name AS script_name,
			
 
				+                              rel.script_type AS script_type, rel.update_mode AS script_update_mode,
			
 
				+                              rel.schedule_frequency AS schedule_frequency, 
			
 
				+                              rel.schedule_status AS schedule_status
			
 
				                     """
			
 
				                 else:
			
 
				                     logger.warning(f"表 {table_name} 不是DataResource或DataModel类型")
			
 
				                     # 即使不是这两种类型，也尝试查询其源表依赖关系
			
 
				                     query_rel = """
			
 
				                         MATCH (target {en_name: $table_name})-[rel]->(source)
			
 
				-                        WITH source, rel, 
			
 
				-                             CASE WHEN rel.script_name IS NULL THEN target.en_name + '_script.py' ELSE rel.script_name END AS script_name,
			
 
				-                             CASE WHEN rel.script_type IS NULL THEN 'python' ELSE rel.script_type END AS script_type
			
 
				-                        RETURN source.en_name AS source_table, script_name AS script_name,
			
 
				-                               script_type AS script_type, 'append' AS script_exec_mode
			
 
				+                        RETURN source.en_name AS source_table, rel.script_name AS script_name,
			
 
				+                              rel.script_type AS script_type, rel.update_mode AS script_update_mode,
			
 
				+                              rel.schedule_frequency AS schedule_frequency, 
			
 
				+                              rel.schedule_status AS schedule_status
			
 
				                     """
			
 
				                 
			
 
				                 # 收集所有关系记录
			
@@ -256,7 +337,8 @@ def get_table_info_from_neo4j(table_name):
 
				                 logger.info(f"表 {table_name} 查询到 {len(records)} 条关系记录")
			
 
				                 for idx, rec in enumerate(records):
			
 
				                     logger.info(f"关系记录[{idx}]: source_table={rec.get('source_table')}, script_name={rec.get('script_name')}, " 
			
 
				-                                f"script_type={rec.get('script_type')}, script_exec_mode={rec.get('script_exec_mode')}")
			
 
				+                                f"script_type={rec.get('script_type')}, script_update_mode={rec.get('script_update_mode')}, "
			
 
				+                                f"schedule_frequency={rec.get('schedule_frequency')}, schedule_status={rec.get('schedule_status')}")
			
 
				                 
			
 
				                 if records:
			
 
				                     # 按脚本名称分组源表
			
@@ -264,8 +346,10 @@ def get_table_info_from_neo4j(table_name):
 
				                     for record in records:
			
 
				                         script_name = record.get("script_name")
			
 
				                         source_table = record.get("source_table")
			
 
				-                        script_type = record.get("script_type", "python")
			
 
				-                        script_exec_mode = record.get("script_exec_mode", "append")
			
 
				+                        script_type = record.get("script_type")
			
 
				+                        script_update_mode = record.get("script_update_mode")
			
 
				+                        schedule_frequency = record.get("schedule_frequency")
			
 
				+                        schedule_status = record.get("schedule_status")
			
 
				                         
			
 
				                         logger.info(f"处理记录: source_table={source_table}, script_name={script_name}")
			
 
				                         
			
@@ -278,7 +362,9 @@ def get_table_info_from_neo4j(table_name):
 
				                             scripts_info[script_name] = {
			
 
				                                 "sources": [],
			
 
				                                 "script_type": script_type,
			
 
				-                                "script_exec_mode": script_exec_mode
			
 
				+                                "script_update_mode": script_update_mode,
			
 
				+                                "schedule_frequency": schedule_frequency,
			
 
				+                                "schedule_status": schedule_status
			
 
				                             }
			
 
				                         
			
 
				                         # 确保source_table有值且不为None才添加到sources列表中
			
@@ -299,7 +385,9 @@ def get_table_info_from_neo4j(table_name):
 
				                             table_info['source_tables'] = script_info["sources"]  # 使用数组
			
 
				                             table_info['script_name'] = script_name
			
 
				                             table_info['script_type'] = script_info["script_type"]
			
 
				-                            table_info['script_exec_mode'] = script_info["script_exec_mode"]
			
 
				+                            table_info['script_update_mode'] = script_info["script_update_mode"]
			
 
				+                            table_info['schedule_frequency'] = script_info["schedule_frequency"]
			
 
				+                            table_info['schedule_status'] = script_info["schedule_status"]
			
 
				                             logger.info(f"表 {table_name} 有单个脚本 {script_name}，源表: {script_info['sources']}")
			
 
				                         else:
			
 
				                             # 如果有多个不同脚本，记录多脚本信息
			
@@ -309,7 +397,9 @@ def get_table_info_from_neo4j(table_name):
 
				                             table_info['source_tables'] = scripts_info[first_script]["sources"]
			
 
				                             table_info['script_name'] = first_script
			
 
				                             table_info['script_type'] = scripts_info[first_script]["script_type"]
			
 
				-                            table_info['script_exec_mode'] = scripts_info[first_script]["script_exec_mode"]
			
 
				+                            table_info['script_update_mode'] = scripts_info[first_script]["script_update_mode"]
			
 
				+                            table_info['schedule_frequency'] = scripts_info[first_script]["schedule_frequency"]
			
 
				+                            table_info['schedule_status'] = scripts_info[first_script]["schedule_status"]
			
 
				                     else:
			
 
				                         logger.warning(f"表 {table_name} 未找到有效的脚本信息")
			
 
				                         table_info['source_tables'] = []  # 使用空数组
			
@@ -338,15 +428,19 @@ def get_table_info_from_neo4j(table_name):
 
				                         script_name = f"{table_name}_script.py"
			
 
				                         table_info['source_tables'] = source_tables
			
 
				                         table_info['script_name'] = script_name
			
 
				-                        table_info['script_type'] = "python"
			
 
				-                        table_info['script_exec_mode'] = "append"
			
 
				+                        table_info['script_type'] = "python_script"
			
 
				+                        table_info['script_update_mode'] = "append"
			
 
				+                        table_info['schedule_frequency'] = "daily"
			
 
				+                        table_info['schedule_status'] = True
			
 
				                         
			
 
				                         # 创建scripts_info
			
 
				                         table_info['scripts_info'] = {
			
 
				                             script_name: {
			
 
				                                 "sources": source_tables,
			
 
				-                                "script_type": "python",
			
 
				-                                "script_exec_mode": "append"
			
 
				+                                "script_type": "python_script",
			
 
				+                                "script_update_mode": "append",
			
 
				+                                "schedule_frequency": "daily",
			
 
				+                                "schedule_status": True
			
 
				                             }
			
 
				                         }
			
 
				                         
			
@@ -355,30 +449,38 @@ def get_table_info_from_neo4j(table_name):
 
				                         logger.warning(f"直接查询表 {table_name} 的依赖关系时出错: {str(e)}")
			
 
				                         table_info['source_tables'] = []  # 使用空数组
			
 
				                         table_info['script_name'] = f"{table_name}_script.py"
			
 
				-                        table_info['script_type'] = "python"
			
 
				-                        table_info['script_exec_mode'] = "append"
			
 
				+                        table_info['script_type'] = "python_script"
			
 
				+                        table_info['script_update_mode'] = "append"
			
 
				+                        table_info['schedule_frequency'] = "daily"
			
 
				+                        table_info['schedule_status'] = True
			
 
				                         
			
 
				                         # 创建空的scripts_info
			
 
				                         table_info['scripts_info'] = {
			
 
				                             table_info['script_name']: {
			
 
				                                 "sources": [],
			
 
				-                                "script_type": "python",
			
 
				-                                "script_exec_mode": "append"
			
 
				+                                "script_type": "python_script",
			
 
				+                                "script_update_mode": "append",
			
 
				+                                "schedule_frequency": "daily",
			
 
				+                                "schedule_status": True
			
 
				                             }
			
 
				                         }
			
 
				             else:
			
 
				                 logger.warning(f"在Neo4j中找不到表 {table_name} 的信息，设置默认值")
			
 
				                 table_info['source_tables'] = []
			
 
				                 table_info['script_name'] = f"{table_name}_script.py"
			
 
				-                table_info['script_type'] = "python"
			
 
				-                table_info['script_exec_mode'] = "append"
			
 
				+                table_info['script_type'] = "python_script"
			
 
				+                table_info['script_update_mode'] = "append"
			
 
				+                table_info['schedule_frequency'] = "daily"
			
 
				+                table_info['schedule_status'] = True
			
 
				                 
			
 
				                 # 创建空的scripts_info
			
 
				                 table_info['scripts_info'] = {
			
 
				                     table_info['script_name']: {
			
 
				                         "sources": [],
			
 
				-                        "script_type": "python",
			
 
				-                        "script_exec_mode": "append"
			
 
				+                        "script_type": "python_script",
			
 
				+                        "script_update_mode": "append",
			
 
				+                        "schedule_frequency": "daily",
			
 
				+                        "schedule_status": True
			
 
				                     }
			
 
				                 }
			
 
				     except Exception as e:
			
@@ -393,6 +495,7 @@ def get_table_info_from_neo4j(table_name):
 
				     
			
 
				     return table_info
			
 
				 
			
 
				+
			
 
				 def process_dependencies(tables_info):
			
 
				     """处理表间依赖关系，添加被动调度的表"""
			
 
				     # 存储所有表信息的字典
			
@@ -420,9 +523,11 @@ def process_dependencies(tables_info):
 
				                 if table_info.get('target_table_label') == 'DataModel':
			
 
				                     # 查询其依赖表
			
 
				                     query = """
			
 
				-                        MATCH (dm {en_name: $table_name})-[:DERIVED_FROM]->(dep)
			
 
				+                        MATCH (dm {en_name: $table_name})-[rel:DERIVED_FROM]->(dep)
			
 
				                         RETURN dep.en_name AS dep_name, labels(dep) AS dep_labels, 
			
 
				-                               dep.status AS dep_status, dep.frequency AS dep_frequency
			
 
				+                               dep.status AS dep_status, rel.schedule_frequency AS schedule_frequency,
			
 
				+                               rel.update_mode AS update_mode, rel.schedule_status AS schedule_status,
			
 
				+                               rel.script_name AS script_name, rel.script_type AS script_type
			
 
				                     """
			
 
				                     try:
			
 
				                         result = session.run(query, table_name=table_name)
			
@@ -434,7 +539,11 @@ def process_dependencies(tables_info):
 
				                         dep_name = record.get("dep_name")
			
 
				                         dep_labels = record.get("dep_labels", [])
			
 
				                         dep_status = record.get("dep_status", True)
			
 
				-                        dep_frequency = record.get("dep_frequency")
			
 
				+                        schedule_frequency = record.get("schedule_frequency")
			
 
				+                        update_mode = record.get("update_mode")
			
 
				+                        schedule_status = record.get("schedule_status", False)
			
 
				+                        script_name = record.get("script_name")
			
 
				+                        script_type = record.get("script_type")
			
 
				                         
			
 
				                         # 处理未被直接调度的依赖表
			
 
				                         if dep_name and dep_name not in all_tables:
			
@@ -444,9 +553,17 @@ def process_dependencies(tables_info):
 
				                             dep_info = get_table_info_from_neo4j(dep_name)
			
 
				                             dep_info['is_directly_schedule'] = False
			
 
				                             
			
 
				-                            # 处理调度频率继承
			
 
				-                            if not dep_info.get('frequency'):
			
 
				-                                dep_info['frequency'] = table_info.get('frequency')
			
 
				+                            # 手动更新一些可能从关系中获取到的属性
			
 
				+                            if schedule_frequency:
			
 
				+                                dep_info['schedule_frequency'] = schedule_frequency
			
 
				+                            if update_mode:
			
 
				+                                dep_info['script_update_mode'] = update_mode
			
 
				+                            if schedule_status is not None:
			
 
				+                                dep_info['schedule_status'] = schedule_status
			
 
				+                            if script_name:
			
 
				+                                dep_info['script_name'] = script_name
			
 
				+                            if script_type:
			
 
				+                                dep_info['script_type'] = script_type
			
 
				                             
			
 
				                             all_tables[dep_name] = dep_info
			
 
				     except Exception as e:
			
@@ -461,6 +578,7 @@ def process_dependencies(tables_info):
 
				     
			
 
				     return list(all_tables.values())
			
 
				 
			
 
				+
			
 
				 def filter_invalid_tables(tables_info):
			
 
				     """过滤无效表及其依赖，使用NetworkX构建依赖图"""
			
 
				     # 构建表名到索引的映射
			
@@ -550,6 +668,7 @@ def filter_invalid_tables(tables_info):
 
				     
			
 
				     return valid_tables
			
 
				 
			
 
				+
			
 
				 def touch_product_scheduler_file():
			
 
				     """
			
 
				     更新产品线调度器DAG文件的修改时间，触发重新解析
			
@@ -574,30 +693,30 @@ def touch_product_scheduler_file():
 
				         logger.error(f"触发DAG重新解析时出错: {str(e)}")
			
 
				         return False
			
 
				 
			
 
				-def get_subscription_state_hash():
			
 
				-    """获取订阅表状态的哈希值"""
			
 
				-    try:
			
 
				-        conn = get_pg_conn()
			
 
				-        cursor = conn.cursor()
			
 
				-        try:
			
 
				-            cursor.execute("""
			
 
				-                SELECT table_name, schedule_is_enabled
			
 
				-                FROM schedule_status
			
 
				-                ORDER BY table_name
			
 
				-            """)
			
 
				-            rows = cursor.fetchall()
			
 
				-            # 将所有行拼接成一个字符串，然后计算哈希值
			
 
				-            data_str = '|'.join(f"{row[0]}:{row[1]}" for row in rows)
			
 
				-            return hashlib.md5(data_str.encode()).hexdigest()
			
 
				-        except Exception as e:
			
 
				-            logger.error(f"计算订阅表状态哈希值时出错: {str(e)}")
			
 
				-            raise Exception(f"PostgreSQL查询订阅表状态失败: {str(e)}")
			
 
				-        finally:
			
 
				-            cursor.close()
			
 
				-            conn.close()
			
 
				-    except Exception as e:
			
 
				-        logger.error(f"连接PostgreSQL数据库失败: {str(e)}")
			
 
				-        raise Exception(f"无法连接PostgreSQL数据库: {str(e)}")
			
 
				+# def get_subscription_state_hash():
			
 
				+#     """获取订阅表状态的哈希值"""
			
 
				+#     try:
			
 
				+#         conn = get_pg_conn()
			
 
				+#         cursor = conn.cursor()
			
 
				+#         try:
			
 
				+#             cursor.execute("""
			
 
				+#                 SELECT table_name, schedule_is_enabled
			
 
				+#                 FROM schedule_status
			
 
				+#                 ORDER BY table_name
			
 
				+#             """)
			
 
				+#             rows = cursor.fetchall()
			
 
				+#             # 将所有行拼接成一个字符串，然后计算哈希值
			
 
				+#             data_str = '|'.join(f"{row[0]}:{row[1]}" for row in rows)
			
 
				+#             return hashlib.md5(data_str.encode()).hexdigest()
			
 
				+#         except Exception as e:
			
 
				+#             logger.error(f"计算订阅表状态哈希值时出错: {str(e)}")
			
 
				+#             raise Exception(f"PostgreSQL查询订阅表状态失败: {str(e)}")
			
 
				+#         finally:
			
 
				+#             cursor.close()
			
 
				+#             conn.close()
			
 
				+#     except Exception as e:
			
 
				+#         logger.error(f"连接PostgreSQL数据库失败: {str(e)}")
			
 
				+#         raise Exception(f"无法连接PostgreSQL数据库: {str(e)}")
			
 
				 
			
 
				 def check_execution_plan_in_db(**kwargs):
			
 
				     """
			
@@ -607,8 +726,7 @@ def check_execution_plan_in_db(**kwargs):
 
				     # 获取执行日期
			
 
				     dag_run = kwargs.get('dag_run')
			
 
				     logical_date = dag_run.logical_date
			
 
				-    local_logical_date = pendulum.instance(logical_date).in_timezone('Asia/Shanghai')
			
 
				-    exec_date = local_logical_date.strftime('%Y-%m-%d')
			
 
				+    exec_date, local_logical_date = get_cn_exec_date(logical_date)
			
 
				     logger.info(f"logical_date： {logical_date} ")
			
 
				     logger.info(f"local_logical_date {local_logical_date} ")
			
 
				     logger.info(f"检查执行日期 exec_date {exec_date} 的执行计划是否存在于数据库中")
			
@@ -749,6 +867,7 @@ def generate_task_id(script_name, source_tables, target_table):
 
				         # 没有源表时，只使用脚本名和目标表
			
 
				         return f"{script_base}_{target_table}"
			
 
				 
			
 
				+
			
 
				 def prepare_scripts_from_tables(tables_info):
			
 
				     """
			
 
				     将表信息转换为脚本信息
			
@@ -764,15 +883,17 @@ def prepare_scripts_from_tables(tables_info):
 
				     for table in tables_info:
			
 
				         target_table = table['target_table']
			
 
				         target_table_label = table.get('target_table_label')
			
 
				-        frequency = table.get('frequency')
			
 
				+        schedule_frequency = table.get('schedule_frequency')
			
 
				         
			
 
				         # 处理表的脚本信息
			
 
				         if 'scripts_info' in table and table['scripts_info']:
			
 
				             # 表有多个脚本
			
 
				             for script_name, script_info in table['scripts_info'].items():
			
 
				                 source_tables = script_info.get('sources', [])
			
 
				-                script_type = script_info.get('script_type', 'python')
			
 
				-                script_exec_mode = script_info.get('script_exec_mode', 'append')
			
 
				+                script_type = script_info.get('script_type', 'python_script')
			
 
				+                script_update_mode = script_info.get('script_update_mode', 'append')
			
 
				+                script_schedule_frequency = script_info.get('schedule_frequency', schedule_frequency)
			
 
				+                script_schedule_status = script_info.get('schedule_status', True)
			
 
				                 
			
 
				                 # 生成任务ID
			
 
				                 task_id = generate_task_id(script_name, source_tables, target_table)
			
@@ -785,8 +906,9 @@ def prepare_scripts_from_tables(tables_info):
 
				                     "target_table": target_table,
			
 
				                     "target_table_label": target_table_label,
			
 
				                     "script_type": script_type,
			
 
				-                    "script_exec_mode": script_exec_mode,
			
 
				-                    "frequency": frequency,
			
 
				+                    "update_mode": script_update_mode,  # 使用update_mode代替script_update_mode
			
 
				+                    "schedule_frequency": script_schedule_frequency,
			
 
				+                    "schedule_status": script_schedule_status,
			
 
				                     "task_id": task_id
			
 
				                 }
			
 
				                 
			
@@ -807,8 +929,10 @@ def prepare_scripts_from_tables(tables_info):
 
				                 logger.warning(f"表 {target_table} 没有指定脚本名，使用默认值: {script_name}")
			
 
				             
			
 
				             source_tables = table.get('source_tables', [])
			
 
				-            script_type = table.get('script_type', 'python')
			
 
				-            script_exec_mode = table.get('script_exec_mode', 'append')
			
 
				+            script_type = table.get('script_type', 'python_script')
			
 
				+            script_update_mode = table.get('script_update_mode', 'append')
			
 
				+            table_schedule_frequency = table.get('schedule_frequency', 'daily')
			
 
				+            table_schedule_status = table.get('schedule_status', True)
			
 
				             
			
 
				             # 生成任务ID
			
 
				             task_id = generate_task_id(script_name, source_tables, target_table)
			
@@ -821,8 +945,9 @@ def prepare_scripts_from_tables(tables_info):
 
				                 "target_table": target_table,
			
 
				                 "target_table_label": target_table_label,
			
 
				                 "script_type": script_type,
			
 
				-                "script_exec_mode": script_exec_mode,
			
 
				-                "frequency": frequency,
			
 
				+                "update_mode": script_update_mode,  # 使用update_mode代替script_update_mode
			
 
				+                "schedule_frequency": table_schedule_frequency,
			
 
				+                "schedule_status": table_schedule_status,
			
 
				                 "task_id": task_id
			
 
				             }
			
 
				             
			
@@ -836,6 +961,7 @@ def prepare_scripts_from_tables(tables_info):
 
				     
			
 
				     return scripts
			
 
				 
			
 
				+
			
 
				 def build_script_dependency_graph(scripts):
			
 
				     """
			
 
				     处理脚本间的依赖关系
			
@@ -980,6 +1106,7 @@ def build_script_dependency_graph(scripts):
 
				     
			
 
				     return script_dependencies, G
			
 
				 
			
 
				+
			
 
				 def optimize_script_execution_order(scripts, script_dependencies, G):
			
 
				     """
			
 
				     使用NetworkX优化脚本执行顺序
			
@@ -1035,6 +1162,7 @@ def set_dataops_dags_path_variable():
 
				         logger.error(f"设置Airflow变量DATAOPS_DAGS_PATH失败: {str(e)}")
			
 
				         return False
			
 
				 
			
 
				+
			
 
				 def prepare_productline_dag_schedule(**kwargs):
			
 
				     """准备产品线DAG调度任务的主函数"""
			
 
				     # 添加更严格的异常处理
			
@@ -1069,23 +1197,23 @@ def prepare_productline_dag_schedule(**kwargs):
 
				             raise Exception(f"检查执行计划失败，可能是数据库连接问题: {str(e)}")
			
 
				         
			
 
				         # 条件2: schedule_status表中的数据发生了变更
			
 
				-        if not need_create_plan:
			
 
				-            # 计算当前哈希值
			
 
				-            current_hash = get_subscription_state_hash()
			
 
				-            # 读取上次记录的哈希值
			
 
				-            hash_file = os.path.join(os.path.dirname(__file__), '.subscription_state')
			
 
				-            last_hash = None
			
 
				-            if os.path.exists(hash_file):
			
 
				-                try:
			
 
				-                    with open(hash_file, 'r') as f:
			
 
				-                        last_hash = f.read().strip()
			
 
				-                except Exception as e:
			
 
				-                    logger.warning(f"读取上次订阅状态哈希值失败: {str(e)}")
			
 
				+        # if not need_create_plan:
			
 
				+        #     # 计算当前哈希值
			
 
				+        #     current_hash = get_subscription_state_hash()
			
 
				+        #     # 读取上次记录的哈希值
			
 
				+        #     hash_file = os.path.join(os.path.dirname(__file__), '.subscription_state')
			
 
				+        #     last_hash = None
			
 
				+        #     if os.path.exists(hash_file):
			
 
				+        #         try:
			
 
				+        #             with open(hash_file, 'r') as f:
			
 
				+        #                 last_hash = f.read().strip()
			
 
				+        #         except Exception as e:
			
 
				+        #             logger.warning(f"读取上次订阅状态哈希值失败: {str(e)}")
			
 
				             
			
 
				-            # 如果哈希值不同，表示数据发生了变更
			
 
				-            if current_hash != last_hash:
			
 
				-                logger.info(f"检测到schedule_status表数据变更。旧哈希值: {last_hash}, 新哈希值: {current_hash}")
			
 
				-                need_create_plan = True
			
 
				+        #     # 如果哈希值不同，表示数据发生了变更
			
 
				+        #     if current_hash != last_hash:
			
 
				+        #         logger.info(f"检测到schedule_status表数据变更。旧哈希值: {last_hash}, 新哈希值: {current_hash}")
			
 
				+        #         need_create_plan = True
			
 
				         
			
 
				         # 手动触发模式覆盖以上判断
			
 
				         if is_manual_trigger:
			
@@ -1100,7 +1228,7 @@ def prepare_productline_dag_schedule(**kwargs):
 
				         # 继续处理，创建新的执行计划
			
 
				         # 1. 获取启用的表
			
 
				         enabled_tables = get_enabled_tables()
			
 
				-        logger.info(f"从schedule_status表获取到 {len(enabled_tables)} 个启用的表")
			
 
				+        logger.info(f"获取到 {len(enabled_tables)} 个启用的表")
			
 
				         
			
 
				         if not enabled_tables:
			
 
				             logger.warning("没有找到启用的表，准备工作结束")
			
@@ -1119,13 +1247,13 @@ def prepare_productline_dag_schedule(**kwargs):
 
				         filtered_tables_info = []
			
 
				         for table_info in tables_info:
			
 
				             table_name = table_info['target_table']
			
 
				-            frequency = table_info.get('frequency')
			
 
				+            schedule_frequency = table_info.get('schedule_frequency')
			
 
				             
			
 
				-            if should_execute_today(table_name, frequency, exec_date):
			
 
				+            if should_execute_today(table_name, schedule_frequency, exec_date):
			
 
				                 filtered_tables_info.append(table_info)
			
 
				-                logger.info(f"表 {table_name} (频率: {frequency}) 将在今天{exec_date}执行")
			
 
				+                logger.info(f"表 {table_name} (频率: {schedule_frequency}) 将在今天{exec_date}执行")
			
 
				             else:
			
 
				-                logger.info(f"表 {table_name} (频率: {frequency}) 今天{exec_date}不执行，已过滤")
			
 
				+                logger.info(f"表 {table_name} (频率: {schedule_frequency}) 今天{exec_date}不执行，已过滤")
			
 
				         
			
 
				         logger.info(f"按调度频率过滤后，今天{exec_date}需要执行的表有 {len(filtered_tables_info)} 个")
			
 
				 
			
@@ -1262,11 +1390,11 @@ def prepare_productline_dag_schedule(**kwargs):
 
				             }
			
 
				             
			
 
				             # 10. 更新订阅表状态哈希值
			
 
				-            current_hash = get_subscription_state_hash()
			
 
				-            hash_file = os.path.join(os.path.dirname(__file__), '.subscription_state')
			
 
				-            with open(hash_file, 'w') as f:
			
 
				-                f.write(current_hash)
			
 
				-            logger.info(f"已更新订阅表状态哈希值: {current_hash}")
			
 
				+            # current_hash = get_subscription_state_hash()
			
 
				+            # hash_file = os.path.join(os.path.dirname(__file__), '.subscription_state')
			
 
				+            # with open(hash_file, 'w') as f:
			
 
				+            #     f.write(current_hash)
			
 
				+            # logger.info(f"已更新订阅表状态哈希值: {current_hash}")
			
 
				             
			
 
				             # 11. 触发产品线执行DAG重新解析
			
 
				             touch_product_scheduler_file()
			
@@ -1325,7 +1453,7 @@ def prepare_productline_dag_schedule(**kwargs):
 
				 # 创建DAG
			
 
				 with DAG(
			
 
				     "dataops_productline_prepare_dag",
			
 
				-    start_date=datetime(2024, 1, 1),
			
 
				+    start_date=datetime(2025, 1, 1),
			
 
				     # 每小时执行一次
			
 
				     schedule_interval="0 * * * *",
			
 
				     catchup=False,