3 月之前 · e2b640ae89
--- a/app/core/data_parse/parse_card.py
+++ b/app/core/data_parse/parse_card.py
@@ -91,6 +91,7 @@ def process_business_card_image(image_file):
 
				                 extracted_data = parse_text_with_qwen25VLplus(image_data)
			
 
				                 logging.info("成功使用 Qwen 2.5 VL Plus 模型解析名片")
			
 
				                 
			
 
				+                
			
 
				                 return {
			
 
				                     'code': 200,
			
 
				                     'success': True,
			
@@ -669,6 +670,8 @@ def batch_process_business_card_images(minio_paths_json, task_id=None, task_type
 
				                         # 记录成功解析的人才信息到parsed_talents表
			
 
				                         try:
			
 
				                             from app.core.data_parse.parse_task import record_parsed_talent
			
 
				+                            from app.core.data_parse.parse_system import get_brand_group_by_hotel
			
 
				+                            
			
 
				                             talent_data = process_result.get('data')
			
 
				                             if talent_data and isinstance(talent_data, dict):
			
 
				                                 # 在记录到parsed_talents表之前，设置image_path和origin_source
			
@@ -689,6 +692,39 @@ def batch_process_business_card_images(minio_paths_json, task_id=None, task_type
 
				                                         if isinstance(career_entry, dict):
			
 
				                                             career_entry['image_path'] = minio_path
			
 
				                                 
			
 
				+                                # 调用get_brand_group_by_hotel获取品牌和集团信息
			
 
				+                                if talent_data.get('hotel_zh'):
			
 
				+                                    try:
			
 
				+                                        brand_result = get_brand_group_by_hotel(talent_data['hotel_zh'])
			
 
				+                                        if brand_result.get('success') and brand_result.get('data'):
			
 
				+                                            brand_data = brand_result['data']
			
 
				+                                            # 赋值品牌和集团信息
			
 
				+                                            talent_data['brand_zh'] = brand_data.get('brand_name_zh', '')
			
 
				+                                            talent_data['brand_en'] = brand_data.get('brand_name_en', '')
			
 
				+                                            talent_data['affiliation_zh'] = brand_data.get('group_name_zh', '')
			
 
				+                                            talent_data['affiliation_en'] = brand_data.get('group_name_en', '')
			
 
				+                                            logging.info(f"成功获取品牌和集团信息: {brand_data}")
			
 
				+                                        else:
			
 
				+                                            logging.warning(f"获取品牌信息失败: {brand_result.get('message', '')}")
			
 
				+                                            # 设置默认值
			
 
				+                                            talent_data['brand_zh'] = ''
			
 
				+                                            talent_data['brand_en'] = ''
			
 
				+                                            talent_data['affiliation_zh'] = ''
			
 
				+                                            talent_data['affiliation_en'] = ''
			
 
				+                                    except Exception as brand_error:
			
 
				+                                        logging.error(f"调用get_brand_group_by_hotel失败: {str(brand_error)}")
			
 
				+                                        # 设置默认值
			
 
				+                                        talent_data['brand_zh'] = ''
			
 
				+                                        talent_data['brand_en'] = ''
			
 
				+                                        talent_data['affiliation_zh'] = ''
			
 
				+                                        talent_data['affiliation_en'] = ''
			
 
				+                                else:
			
 
				+                                    # 没有酒店信息，设置默认值
			
 
				+                                    talent_data['brand_zh'] = ''
			
 
				+                                    talent_data['brand_en'] = ''
			
 
				+                                    talent_data['affiliation_zh'] = ''
			
 
				+                                    talent_data['affiliation_en'] = ''
			
 
				+                                
			
 
				                                 record_result = record_parsed_talent(talent_data, task_id, task_type)
			
 
				                                 if record_result.get('success'):
			
 
				                                     # 收集成功解析的记录ID
			
--- a/app/core/data_parse/parse_system.py
+++ b/app/core/data_parse/parse_system.py
@@ -1382,10 +1382,10 @@ def get_talent_tag_list():
 
				         # 构建Cypher查询语句，获取分类为talent的标签
			
 
				         query = """
			
 
				         MATCH (n:DataLabel)
			
 
				-        WHERE n.category CONTAINS 'talent' OR n.category CONTAINS '人才'
			
 
				-        RETURN id(n) as id, n.name as name, n.en_name as en_name, 
			
 
				-               n.category as category, n.describe as description, 
			
 
				-               n.status as status, n.time as time
			
 
				+        WHERE n.category CONTAINS 'talentmap' OR n.category CONTAINS '人才地图'
			
 
				+        RETURN id(n) as id, n.name_zh as name_zh, n.name_en as name_en, 
			
 
				+               n.category as category, n.describe as describe, 
			
 
				+               n.status as status, n.time as time, n.node_type as node_type
			
 
				         ORDER BY n.time DESC
			
 
				         """
			
 
				         
			
@@ -1398,12 +1398,13 @@ def get_talent_tag_list():
 
				             for record in result:
			
 
				                 tag = {
			
 
				                     'id': record['id'],
			
 
				-                    'name': record['name'],
			
 
				-                    'en_name': record['en_name'],
			
 
				+                    'name_zh': record['name_zh'],
			
 
				+                    'name_en': record['name_en'],
			
 
				                     'category': record['category'],
			
 
				-                    'description': record['description'],
			
 
				+                    'describe': record['describe'],
			
 
				                     'status': record['status'],
			
 
				-                    'time': record['time']
			
 
				+                    'time': record['time'],
			
 
				+                    'node_type': record['node_type']
			
 
				                 }
			
 
				                 tags.append(tag)
			
 
				         
			
@@ -1679,7 +1680,7 @@ def query_neo4j_graph(query_requirement):
 
				         
			
 
				         # 构建匹配标签的提示语
			
 
				         matching_prompt = f"""
			
 
				-        请从上传的查询需求文本中提取以下结构化信息。其中datalabel字段从可用标签列表里进行匹配，匹配结果填写可用标签列表里的标签名称。需要严格按照JSON格式输出：   
			
 
				+        请从上传的查询需求文本中提取以下结构化信息。其中datalabel字段从可用标签列表里进行匹配，匹配结果填写可用标签列表里的标签名称。hotel字段提取查询需求中提到的酒店名称。需要严格按照JSON格式输出：   
			
 
				         {{
			
 
				          "basic_info": {{
			
 
				             "中文姓名": "",
			
@@ -1694,6 +1695,9 @@ def query_neo4j_graph(query_requirement):
 
				         }},
			
 
				          "datalabel": [
			
 
				             "标签1","标签2","标签3"
			
 
				+        ],
			
 
				+         "hotel": [
			
 
				+            "酒店名称1","酒店名称2","酒店名称3"
			
 
				         ]
			
 
				         }}
			
 
				         ## 查询需求文本
			
@@ -1704,9 +1708,11 @@ def query_neo4j_graph(query_requirement):
 
				         
			
 
				         输出要求：
			
 
				         1. 中文名称优先，有英文名称也要提取保留
			
 
				-        2. 年龄字段只需填写数字。
			
 
				+        2. 年龄字段只需填写数字
			
 
				         3. 标签没有被匹配到，datalabel字段可以为空数组
			
 
				-        4. 只需返回JSON字符串，不要返回其他信息
			
 
				+        4. 酒店名称提取查询需求中明确提到的酒店名称
			
 
				+        5. 如果没有提到酒店信息，hotel字段可以为空数组
			
 
				+        6. 只需返回JSON字符串，不要返回其他信息
			
 
				         """
			
 
				         
			
 
				         # 调用阿里千问API匹配标签
			
@@ -1725,11 +1731,13 @@ def query_neo4j_graph(query_requirement):
 
				         # 解析API响应
			
 
				         matching_content = completion.choices[0].message.content
			
 
				         
			
 
				-        # 直接解析JSON响应，提取datalabel字段
			
 
				+        # 直接解析JSON响应，提取datalabel和hotel字段
			
 
				         parsed_content = json.loads(matching_content)
			
 
				         matched_labels = parsed_content.get('datalabel', [])
			
 
				+        matched_hotels = parsed_content.get('hotel', [])
			
 
				         
			
 
				         logging.info(f"匹配到的标签: {matched_labels}")
			
 
				+        logging.info(f"匹配到的酒店: {matched_hotels}")
			
 
				         
			
 
				         # 步骤3: 构建查询逻辑和Cypher语句
			
 
				         logging.info("第三步：构建查询逻辑和Cypher语句")
			
@@ -1829,26 +1837,121 @@ def query_neo4j_graph(query_requirement):
 
				         
			
 
				         if matched_labels:
			
 
				             condition_params['labels'] = matched_labels
			
 
				-            logging.info(f"构建DataLabel和Hotel条件查询，标签: {matched_labels}")
			
 
				+            logging.info(f"构建DataLabel条件查询，标签: {matched_labels}")
			
 
				+        
			
 
				+        if matched_hotels:
			
 
				+            condition_params['hotels'] = matched_hotels
			
 
				+            logging.info(f"构建Hotel条件查询，酒店: {matched_hotels}")
			
 
				+        
			
 
				+        # 确保参数不为空时才添加到查询中
			
 
				+        if not matched_labels:
			
 
				+            # 如果没有标签，需要修改Cypher查询以避免引用$labels参数
			
 
				+            logging.info("没有标签条件，将调整Cypher查询")
			
 
				         
			
 
				         # 步骤4: 执行查询并返回结果
			
 
				         logging.info("第四步：执行查询并返回结果")
			
 
				         
			
 
				+        # 检查是否有查询条件，如果都没有则直接返回空结果
			
 
				+        if not talent_conditions and not matched_labels and not matched_hotels:
			
 
				+            logging.info("没有查询条件，直接返回空结果")
			
 
				+            return {
			
 
				+                'code': 200,
			
 
				+                'success': True,
			
 
				+                'message': '查询条件没有匹配到任何人才，返回空结果',
			
 
				+                'query': '查询条件没有匹配到任何人才，返回空结果',
			
 
				+                'matched_labels': matched_labels,
			
 
				+                'matched_hotels': matched_hotels,
			
 
				+                'non_empty_fields': non_empty_fields,
			
 
				+                'data': []
			
 
				+            }
			
 
				+        
			
 
				         # 构建完整的Cypher查询语句
			
 
				-        if matched_labels:
			
 
				-            # 有标签条件的情况 - 查找与条件子集（DataLabel和Hotel）有关系的Talent节点
			
 
				-            # 使用OR逻辑：Talent有WORK_FOR关系链路或者有BELONGS_TO关系链路的节点都可以查询出来
			
 
				+        if matched_hotels and matched_labels:
			
 
				+            # 情况1：提供了酒店名称和标签名称
			
 
				+            # 通过酒店名称查到一组Talent节点，通过标签查到另一组Talent节点，两组节点组合去重
			
 
				+            logging.info("情况1：同时有酒店名称和标签名称，使用组合查询方式")
			
 
				+            
			
 
				+                        # 使用UNION合并两个查询结果
			
 
				             cypher_script = f"""
			
 
				+            // 查询通过酒店名称匹配的Talent节点
			
 
				             {talent_subset_query}
			
 
				             WHERE EXISTS {{
			
 
				-              // 条件1：存在WORK_FOR关系链路
			
 
				-              MATCH (t)-[:WORK_FOR]->(:Hotel)-[:HAS_LABEL]->(dl:DataLabel)
			
 
				-              WHERE dl.name_zh IN $labels
			
 
				-            }} OR EXISTS {{
			
 
				-              // 条件2：存在BELONGS_TO关系链路
			
 
				-              MATCH (t)-[:BELONGS_TO]->(dl2:DataLabel)
			
 
				-              WHERE dl2.name_zh  IN $labels
			
 
				+              // 条件：存在WORK_FOR关系链路，且酒店名称匹配
			
 
				+              MATCH (t)-[:WORK_FOR]->(h:Hotel)
			
 
				+              WHERE h.hotel_zh IN $hotels
			
 
				             }}
			
 
				+            RETURN DISTINCT
			
 
				+              t.pg_id AS pg_id,
			
 
				+              t.name_zh AS name_zh,
			
 
				+              t.name_en AS name_en,
			
 
				+              t.mobile AS mobile,
			
 
				+              t.email AS email,
			
 
				+              t.updated_at AS updated_at
			
 
				+            
			
 
				+            UNION
			
 
				+            
			
 
				+            // 查询通过标签扩展遍历匹配的Talent节点
			
 
				+            // 步骤1: 定义标签条件列表
			
 
				+            WITH $labels AS targetLabels
			
 
				+            
			
 
				+            // 步骤2: 匹配标签条件节点
			
 
				+            MATCH (tag:DataLabel)
			
 
				+            WHERE tag.name_zh IN targetLabels
			
 
				+            WITH collect(tag) AS startNodes
			
 
				+            
			
 
				+            // 步骤3: 使用扩展遍历查找相关Talent节点
			
 
				+            UNWIND startNodes AS startTag
			
 
				+            MATCH (startTag)<-[:BELONGS_TO|WORK_AS]-(t:Talent)
			
 
				+            {f"WHERE {' AND '.join(talent_conditions)}" if talent_conditions else ""}
			
 
				+            
			
 
				+            // 步骤4: 返回去重结果
			
 
				+            RETURN DISTINCT
			
 
				+              t.pg_id AS pg_id,
			
 
				+              t.name_zh AS name_zh,
			
 
				+              t.name_en AS name_en,
			
 
				+              t.mobile AS mobile,
			
 
				+              t.email AS email,
			
 
				+              t.updated_at AS updated_at
			
 
				+            """
			
 
				+            
			
 
				+        elif matched_hotels and not matched_labels:
			
 
				+            # 情况2：只提供了酒店名称，没有标签名称
			
 
				+            # 查询Talent的WORK_FOR与指定酒店的关系
			
 
				+            cypher_script = f"""
			
 
				+            {talent_subset_query}
			
 
				+            WHERE EXISTS {{
			
 
				+              // 条件：存在WORK_FOR关系链路，且酒店名称匹配
			
 
				+              MATCH (t)-[:WORK_FOR]->(h:Hotel)
			
 
				+              WHERE h.hotel_zh IN $hotels
			
 
				+            }}
			
 
				+            RETURN DISTINCT 
			
 
				+              t.pg_id AS pg_id, 
			
 
				+              t.name_zh AS name_zh, 
			
 
				+              t.name_en AS name_en,
			
 
				+              t.mobile AS mobile, 
			
 
				+              t.email AS email, 
			
 
				+              t.updated_at AS updated_at
			
 
				+            """
			
 
				+            
			
 
				+        elif not matched_hotels and matched_labels:
			
 
				+            # 情况3：没有提供酒店名称，但是有指定的标签名称
			
 
				+            # 通过标签扩展遍历查询Talent节点
			
 
				+            logging.info("情况3：只有标签名称，使用标签扩展遍历查询方式")
			
 
				+            cypher_script = f"""
			
 
				+            // 步骤1: 定义标签条件列表
			
 
				+            WITH $labels AS targetLabels
			
 
				+            
			
 
				+            // 步骤2: 匹配标签条件节点
			
 
				+            MATCH (tag:DataLabel)
			
 
				+            WHERE tag.name_zh IN targetLabels
			
 
				+            WITH collect(tag) AS startNodes
			
 
				+            
			
 
				+            // 步骤3: 使用扩展遍历查找相关Talent节点
			
 
				+            UNWIND startNodes AS startTag
			
 
				+            MATCH (startTag)<-[:BELONGS_TO|WORK_AS]-(t:Talent)
			
 
				+            {f"WHERE {' AND '.join(talent_conditions)}" if talent_conditions else ""}
			
 
				+            
			
 
				+            // 步骤4: 返回去重结果
			
 
				             RETURN DISTINCT 
			
 
				               t.pg_id AS pg_id, 
			
 
				               t.name_zh AS name_zh, 
			
@@ -1857,12 +1960,19 @@ def query_neo4j_graph(query_requirement):
 
				               t.email AS email, 
			
 
				               t.updated_at AS updated_at
			
 
				             """
			
 
				+            
			
 
				         else:
			
 
				-            # 无标签条件的情况，只根据Talent属性查询
			
 
				+            # 情况4：没有指定标签，也没有指定酒店
			
 
				+            # 只按照Talent属性进行查询
			
 
				             cypher_script = f"""
			
 
				             {talent_subset_query}
			
 
				-            RETURN DISTINCT t.pg_id as pg_id, t.name_zh as name_zh, t.name_en as name_en, 
			
 
				-                   t.mobile as mobile, t.email as email, t.updated_at as updated_at
			
 
				+            RETURN DISTINCT 
			
 
				+              t.pg_id AS pg_id, 
			
 
				+              t.name_zh AS name_zh, 
			
 
				+              t.name_en AS name_en,
			
 
				+              t.mobile AS mobile, 
			
 
				+              t.email AS email, 
			
 
				+              t.updated_at AS updated_at
			
 
				             """
			
 
				         
			
 
				         logging.info(f"生成的Cypher脚本: {cypher_script}")
			
@@ -1882,6 +1992,7 @@ def query_neo4j_graph(query_requirement):
 
				             'message': '查询成功执行',
			
 
				             'query': cypher_script,
			
 
				             'matched_labels': matched_labels,
			
 
				+            'matched_hotels': matched_hotels,
			
 
				             'non_empty_fields': non_empty_fields,
			
 
				             'data': records
			
 
				         }
			
@@ -1923,9 +2034,9 @@ def talent_get_tags(talent_id):
 
				         
			
 
				         # 构建Cypher查询语句，获取人才节点关联的标签
			
 
				         cypher_query = """
			
 
				-        MATCH (t:Talent)-[r:BELONGS_TO]->(tag:DataLabel)
			
 
				+        MATCH (t:Talent)-[r:BELONGS_TO|WORK_AS]->(tag:DataLabel)
			
 
				         WHERE t.pg_id = $talent_id
			
 
				-        RETURN t.pg_id as talent_id, tag.name as tag_name
			
 
				+        RETURN t.pg_id as talent_id, tag.name_zh as tag_name_zh, type(r) as relation_type
			
 
				         """
			
 
				         
			
 
				         # 执行查询
			
@@ -1942,7 +2053,8 @@ def talent_get_tags(talent_id):
 
				             for record in records:
			
 
				                 talent_tag = {
			
 
				                     'talent': record['talent_id'],
			
 
				-                    'tag': record['tag_name']
			
 
				+                    'tag_name_zh': record['tag_name_zh'],
			
 
				+                    'relation_type': record['relation_type']
			
 
				                 }
			
 
				                 response_data['data'].append(talent_tag)
			
 
				             
			
@@ -2592,4 +2704,201 @@ def update_origin_source(existing_origin_source, task_type, minio_path):
 
				     except Exception as e:
			
 
				         logging.error(f"更新origin_source失败: {str(e)}")
			
 
				         # 如果处理失败，返回包含新记录的数组
			
 
				-        return [create_origin_source_entry(task_type, minio_path)]
			
 
				+        return [create_origin_source_entry(task_type, minio_path)]
			
 
				+
			
 
				+
			
 
				+def get_brand_group_by_hotel(hotel_zh):
			
 
				+    """
			
 
				+    根据酒店中文名称获取对应的品牌和集团信息
			
 
				+    
			
 
				+    Args:
			
 
				+        hotel_zh (str): 酒店中文名称
			
 
				+        
			
 
				+    Returns:
			
 
				+        dict: 包含操作结果和品牌集团信息的字典
			
 
				+    """
			
 
				+    try:
			
 
				+        # 步骤1: 从输入参数获得酒店名称hotel_zh
			
 
				+        if not hotel_zh or not hotel_zh.strip():
			
 
				+            return {
			
 
				+                'code': 400,
			
 
				+                'success': False,
			
 
				+                'message': '酒店名称不能为空',
			
 
				+                'data': None
			
 
				+            }
			
 
				+        
			
 
				+        hotel_name = hotel_zh.strip()
			
 
				+        logging.info(f"开始查询酒店 '{hotel_name}' 的品牌和集团信息")
			
 
				+        
			
 
				+        # 步骤2: 从hotel_group_brands数据库表获取所有品牌名称作为参照值
			
 
				+        try:
			
 
				+            # 使用现有的数据库模型
			
 
				+            from app.core.data_parse.hotel_management import HotelGroupBrands
			
 
				+            
			
 
				+            all_brands = []
			
 
				+            # 查询所有有效的品牌名称
			
 
				+            brands = HotelGroupBrands.query.filter(
			
 
				+                HotelGroupBrands.brand_name_zh.isnot(None),
			
 
				+                HotelGroupBrands.brand_name_zh != '',
			
 
				+                HotelGroupBrands.status == 'active'
			
 
				+            ).distinct(HotelGroupBrands.brand_name_zh).order_by(HotelGroupBrands.brand_name_zh).all()
			
 
				+            
			
 
				+            for brand in brands:
			
 
				+                if brand.brand_name_zh:
			
 
				+                    all_brands.append(brand.brand_name_zh)
			
 
				+            
			
 
				+            if not all_brands:
			
 
				+                return {
			
 
				+                    'code': 404,
			
 
				+                    'success': False,
			
 
				+                    'message': '未找到任何品牌信息',
			
 
				+                    'data': None
			
 
				+                }
			
 
				+            
			
 
				+            logging.info(f"获取到 {len(all_brands)} 个品牌作为参照值")
			
 
				+            
			
 
				+        except Exception as db_error:
			
 
				+            logging.error(f"从数据库获取品牌列表失败: {str(db_error)}")
			
 
				+            return {
			
 
				+                'code': 500,
			
 
				+                'success': False,
			
 
				+                'message': f'获取品牌列表失败: {str(db_error)}',
			
 
				+                'data': None
			
 
				+            }
			
 
				+        
			
 
				+        # 步骤3: 通过阿里千问qwen long模型判断酒店对应的品牌
			
 
				+        try:
			
 
				+            # 构建所有品牌的JSON字符串
			
 
				+            brands_json = json.dumps(all_brands, ensure_ascii=False)
			
 
				+            
			
 
				+            # 构建提示词
			
 
				+            prompt = f"""
			
 
				+            请根据提供的酒店名称，从可用品牌列表中选择最匹配的品牌。
			
 
				+            
			
 
				+            ## 酒店名称
			
 
				+            {hotel_name}
			
 
				+            
			
 
				+            ## 可用品牌列表
			
 
				+            {brands_json}
			
 
				+            
			
 
				+            ## 输出要求
			
 
				+            1. 仔细分析酒店名称，选择最匹配的品牌
			
 
				+            2. 如果酒店名称中包含品牌信息，优先选择该品牌
			
 
				+            3. 如果无法确定，返回空字符串
			
 
				+            4. 严格按照JSON格式输出：{{"brand": "品牌名称"}}
			
 
				+            
			
 
				+            请只返回JSON字符串，不要包含其他解释文字。
			
 
				+            """
			
 
				+            
			
 
				+            # 调用阿里千问API
			
 
				+            client = OpenAI(
			
 
				+                api_key=QWEN_TEXT_API_KEY,
			
 
				+                base_url=QWEN_TEXT_BASE_URL,
			
 
				+            )
			
 
				+            
			
 
				+            completion = client.chat.completions.create(
			
 
				+                model="qwen-long-latest",
			
 
				+                messages=[
			
 
				+                    {"role": "system", "content": "你是一个专业的酒店品牌识别专家。"},
			
 
				+                    {"role": "user", "content": prompt}
			
 
				+                ],
			
 
				+                temperature=0.1,
			
 
				+                response_format={"type": "json_object"}
			
 
				+            )
			
 
				+            
			
 
				+            # 解析API响应
			
 
				+            response_content = completion.choices[0].message.content
			
 
				+            logging.info(f"千问API返回结果: {response_content}")
			
 
				+            
			
 
				+            # 解析JSON响应
			
 
				+            try:
			
 
				+                parsed_response = json.loads(response_content)
			
 
				+                brand_name = parsed_response.get('brand', '')
			
 
				+                
			
 
				+                if not brand_name:
			
 
				+                    return {
			
 
				+                        'code': 404,
			
 
				+                        'success': False,
			
 
				+                        'message': '未能识别出酒店对应的品牌',
			
 
				+                        'data': None
			
 
				+                    }
			
 
				+                
			
 
				+                logging.info(f"识别出的品牌: {brand_name}")
			
 
				+                
			
 
				+            except json.JSONDecodeError as json_error:
			
 
				+                logging.error(f"解析千问API返回的JSON失败: {str(json_error)}")
			
 
				+                return {
			
 
				+                    'code': 500,
			
 
				+                    'success': False,
			
 
				+                    'message': f'解析品牌识别结果失败: {str(json_error)}',
			
 
				+                    'data': None
			
 
				+                }
			
 
				+                
			
 
				+        except Exception as api_error:
			
 
				+            logging.error(f"调用千问API失败: {str(api_error)}")
			
 
				+            return {
			
 
				+                'code': 500,
			
 
				+                'success': False,
			
 
				+                'message': f'品牌识别失败: {str(api_error)}',
			
 
				+                'data': None
			
 
				+            }
			
 
				+        
			
 
				+        # 步骤4: 从hotel_group_brands中查找完整的品牌和集团信息
			
 
				+        try:
			
 
				+            # 使用现有的数据库模型查询品牌信息
			
 
				+            brand_info = None
			
 
				+            brand_record = HotelGroupBrands.query.filter(
			
 
				+                HotelGroupBrands.brand_name_zh == brand_name,
			
 
				+                HotelGroupBrands.status == 'active'
			
 
				+            ).first()
			
 
				+            
			
 
				+            if brand_record:
			
 
				+                brand_info = {
			
 
				+                    'brand_name_zh': brand_record.brand_name_zh,
			
 
				+                    'brand_name_en': brand_record.brand_name_en,
			
 
				+                    'group_name_zh': brand_record.group_name_zh,
			
 
				+                    'group_name_en': brand_record.group_name_en
			
 
				+                }
			
 
				+                logging.info(f"找到品牌信息: {brand_info}")
			
 
				+            else:
			
 
				+                logging.warning(f"未找到品牌 '{brand_name}' 的详细信息")
			
 
				+                return {
			
 
				+                    'code': 404,
			
 
				+                    'success': False,
			
 
				+                    'message': f'未找到品牌 "{brand_name}" 的详细信息',
			
 
				+                    'data': None
			
 
				+                }
			
 
				+            
			
 
				+        except Exception as query_error:
			
 
				+            logging.error(f"查询品牌详细信息失败: {str(query_error)}")
			
 
				+            return {
			
 
				+                'code': 500,
			
 
				+                'success': False,
			
 
				+                'message': f'查询品牌详细信息失败: {str(query_error)}',
			
 
				+                'data': None
			
 
				+            }
			
 
				+        
			
 
				+        # 返回成功结果
			
 
				+        return {
			
 
				+            'code': 200,
			
 
				+            'success': True,
			
 
				+            'message': f'成功获取酒店 "{hotel_name}" 的品牌和集团信息',
			
 
				+            'data': {
			
 
				+                'hotel_zh': hotel_name,
			
 
				+                'brand_name_zh': brand_info['brand_name_zh'],
			
 
				+                'brand_name_en': brand_info['brand_name_en'],
			
 
				+                'group_name_zh': brand_info['group_name_zh'],
			
 
				+                'group_name_en': brand_info['group_name_en']
			
 
				+            }
			
 
				+        }
			
 
				+        
			
 
				+    except Exception as e:
			
 
				+        error_msg = f"获取酒店品牌和集团信息失败: {str(e)}"
			
 
				+        logging.error(error_msg, exc_info=True)
			
 
				+        
			
 
				+        return {
			
 
				+            'code': 500,
			
 
				+            'success': False,
			
 
				+            'message': error_msg,
			
 
				+            'data': None
			
 
				+        }
			
--- a/app/core/data_parse/parse_task.py
+++ b/app/core/data_parse/parse_task.py
@@ -112,6 +112,42 @@ def process_career_path(career_path, talent_node_id, talent_name_zh):
 
				             result['errors'].append(f"清除Talent节点关系失败: {str(clear_error)}")
			
 
				             # 即使清除关系失败，也继续执行后续逻辑
			
 
				         
			
 
				+        # 查询Neo4j图数据库中DataLabel标签中node_type为"brand"的所有节点
			
 
				+        brand_labels = []
			
 
				+        try:
			
 
				+            from app.core.graph.graph_operations import connect_graph
			
 
				+            
			
 
				+            with connect_graph().session() as session:
			
 
				+                # 查询node_type为"brand"的DataLabel节点
			
 
				+                brand_query = """
			
 
				+                MATCH (n:DataLabel)
			
 
				+                WHERE n.node_type = "brand"
			
 
				+                RETURN n.name_zh as name_zh
			
 
				+                ORDER BY n.name_zh
			
 
				+                """
			
 
				+                brand_result = session.run(brand_query)
			
 
				+                
			
 
				+                # 将查询结果的name_zh保存到brand_labels数组
			
 
				+                for record in brand_result:
			
 
				+                    if record['name_zh']:
			
 
				+                        brand_labels.append(record['name_zh'])
			
 
				+                
			
 
				+                logging.info(f"成功查询到 {len(brand_labels)} 个品牌标签")
			
 
				+                logging.info(f"品牌标签列表: {brand_labels}")
			
 
				+                
			
 
				+                # 更新结果统计
			
 
				+                result['brand_labels_count'] = len(brand_labels)
			
 
				+                result['brand_labels'] = brand_labels
			
 
				+                
			
 
				+        except Exception as brand_query_error:
			
 
				+            logging.error(f"查询品牌标签失败: {str(brand_query_error)}")
			
 
				+            result['errors'].append(f"查询品牌标签失败: {str(brand_query_error)}")
			
 
				+            # 即使查询品牌标签失败，也继续执行后续逻辑
			
 
				+        
			
 
				+        # 将brand_labels构建成JSON字符串
			
 
				+        brand_labels_json = json.dumps(brand_labels, ensure_ascii=False)
			
 
				+        logging.info(f"品牌标签JSON字符串: {brand_labels_json}")
			
 
				+        
			
 
				         for i, career_item in enumerate(career_path):
			
 
				             try:
			
 
				                 if not isinstance(career_item, dict):
			
@@ -189,16 +225,17 @@ def process_career_path(career_path, talent_node_id, talent_name_zh):
 
				                     # 构建提示词
			
 
				                     prompt = f"""请根据酒店名称'{hotel_zh}'判断该酒店所属的品牌。通常酒店名称前半部分是地名，后半部分是品牌名称。
			
 
				                     例如：扬州瘦西湖夜泊君亭酒店，品牌名称为'夜泊君亭'。
			
 
				+                    
			
 
				+                    请从以下品牌列表中选择最匹配的品牌名称：
			
 
				+                    {brand_labels_json}
			
 
				+                    
			
 
				                     要求：
			
 
				                     1. 必须返回标准的JSON格式
			
 
				                     2. 格式必须为：{{"brand": "品牌名称"}}
			
 
				                     3. 不要包含任何其他文本、说明或markdown格式
			
 
				-                    4. 如果无法确定品牌，返回：{{"brand": ""}}
			
 
				-                    示例正确格式：
			
 
				-                    {{"brand": "万豪"}}
			
 
				-                    {{"brand": "希尔顿"}}
			
 
				-                    {{"brand": "洲际"}}
			
 
				-                    {{"brand": "夜泊君亭"}}
			
 
				+                    4. 如果无法确定品牌或品牌不在列表中，返回：{{"brand": ""}}
			
 
				+                    5. 品牌名称必须完全匹配列表中的某个品牌名称
			
 
				+                    
			
 
				                     请直接返回JSON，不要有其他内容。"""
			
 
				                     
			
 
				                     # 调用千问大模型
			
@@ -247,7 +284,7 @@ def process_career_path(career_path, talent_node_id, talent_name_zh):
 
				                                         if result_query:
			
 
				                                             label_node_id = result_query['node_id']
			
 
				                                             
			
 
				-                                            # 在创建BELONGS_TO关系之前，先检查关系是否已经存在
			
 
				+                                            # 创建Hotel节点与品牌标签的BELONGS_TO关系
			
 
				                                             try:
			
 
				                                                 from app.core.graph.graph_operations import connect_graph
			
 
				                                                 
			
@@ -308,40 +345,26 @@ def process_career_path(career_path, talent_node_id, talent_name_zh):
 
				                                                         result['brand_relationships_failed'] += 1
			
 
				                                         else:
			
 
				                                             logging.warning(f"未找到品牌标签节点: {brand_name}")
			
 
				+                                            result['brand_relationships_failed'] += 1
			
 
				                                             
			
 
				                                 except Exception as query_error:
			
 
				                                     logging.error(f"查询品牌标签节点失败: {str(query_error)}")
			
 
				+                                    result['brand_relationships_failed'] += 1
			
 
				                                     result['errors'].append(f"查询品牌标签节点失败: {brand_name}, 错误: {str(query_error)}")
			
 
				                             else:
			
 
				-                                logging.warning(f"千问大模型返回的品牌名称为空，酒店: {hotel_zh}, 响应: '{brand_response}'")
			
 
				+                                logging.warning(f"千问大模型返回的品牌名称为空，酒店: {hotel_zh}")
			
 
				+                                result['brand_relationships_failed'] += 1
			
 
				                                 
			
 
				                         except json.JSONDecodeError as json_error:
			
 
				-                            logging.warning(f"解析千问大模型返回的JSON失败，原始响应内容: '{brand_response}', 错误: {json_error}")
			
 
				-                            # 尝试从响应中提取品牌名称（非JSON格式的fallback）
			
 
				-                            brand_name = ''
			
 
				-                            
			
 
				-                            # 首先尝试提取JSON格式的品牌名称
			
 
				-                            import re
			
 
				-                            # 尝试匹配 {"brand": "品牌名称"} 或 {"brand":"品牌名称"} 格式
			
 
				-                            json_brand_match = re.search(r'["\']?brand["\']?\s*:\s*["\']?([^"\']+)["\']?', brand_response, re.IGNORECASE)
			
 
				-                            if json_brand_match:
			
 
				-                                brand_name = json_brand_match.group(1).strip()
			
 
				-                                logging.info(f"从非标准JSON响应中提取到品牌名称: {brand_name}")
			
 
				-                            # 如果上面没匹配到，尝试匹配包含"品牌"的文本
			
 
				-                            elif 'brand' in brand_response.lower() or '品牌' in brand_response:
			
 
				-                                brand_match = re.search(r'["\']?([^"\']*品牌[^"\']*)["\']?', brand_response)
			
 
				-                                if brand_match:
			
 
				-                                    brand_name = brand_match.group(1).strip()
			
 
				-                                    logging.info(f"从非JSON响应中提取到品牌名称: {brand_name}")
			
 
				-                                else:
			
 
				-                                    logging.warning(f"无法从响应中提取品牌名称: {brand_response}")
			
 
				-                            else:
			
 
				-                                logging.warning(f"响应中未包含品牌相关信息: {brand_response}")
			
 
				+                            logging.warning(f"解析千问大模型返回的JSON失败，酒店: {hotel_zh}, 响应: '{brand_response}'")
			
 
				+                            result['brand_relationships_failed'] += 1
			
 
				                     else:
			
 
				-                        logging.warning(f"千问大模型返回结果无效: '{brand_response}'")
			
 
				+                        logging.warning(f"千问大模型返回结果无效，酒店: {hotel_zh}")
			
 
				+                        result['brand_relationships_failed'] += 1
			
 
				                         
			
 
				                 except Exception as brand_error:
			
 
				                     logging.error(f"调用千问大模型判断品牌失败: {str(brand_error)}")
			
 
				+                    result['brand_relationships_failed'] += 1
			
 
				                     result['errors'].append(f"调用千问大模型判断品牌失败: {hotel_zh}, 错误: {str(brand_error)}")
			
 
				                 
			
 
				                 # 创建Talent节点到Hotel节点的WORK_FOR关系
			
--- a/test_batch_process_business_card_images.py
+++ b/test_batch_process_business_card_images.py
@@ -0,0 +1,91 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+测试修改后的batch_process_business_card_images函数
			
 
				+验证在图片解析成功后是否正确调用get_brand_group_by_hotel获取品牌和集团信息
			
 
				+"""
			
 
				+
			
 
				+import sys
			
 
				+import os
			
 
				+import logging
			
 
				+
			
 
				+# 添加项目根目录到Python路径
			
 
				+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
			
 
				+
			
 
				+def test_batch_process_business_card_images():
			
 
				+    """测试修改后的batch_process_business_card_images函数"""
			
 
				+    try:
			
 
				+        print("开始测试修改后的 batch_process_business_card_images 函数...")
			
 
				+        print("验证在图片解析成功后是否正确调用get_brand_group_by_hotel")
			
 
				+        print("-" * 70)
			
 
				+        
			
 
				+        # 导入函数
			
 
				+        from app.core.data_parse.parse_card import batch_process_business_card_images
			
 
				+        
			
 
				+        print("✅ 成功导入 batch_process_business_card_images 函数")
			
 
				+        
			
 
				+        # 检查函数是否包含必要的导入
			
 
				+        import inspect
			
 
				+        function_source = inspect.getsource(batch_process_business_card_images)
			
 
				+        
			
 
				+        # 检查是否导入了get_brand_group_by_hotel
			
 
				+        if "from app.core.data_parse.parse_system import get_brand_group_by_hotel" in function_source:
			
 
				+            print("✅ 函数已正确导入 get_brand_group_by_hotel")
			
 
				+        else:
			
 
				+            print("❌ 函数未导入 get_brand_group_by_hotel")
			
 
				+            return False
			
 
				+        
			
 
				+        # 检查是否调用了get_brand_group_by_hotel
			
 
				+        if "get_brand_group_by_hotel(" in function_source:
			
 
				+            print("✅ 函数已正确调用 get_brand_group_by_hotel")
			
 
				+        else:
			
 
				+            print("❌ 函数未调用 get_brand_group_by_hotel")
			
 
				+            return False
			
 
				+        
			
 
				+        # 检查是否正确赋值品牌和集团信息
			
 
				+        brand_assignments = [
			
 
				+            "talent_data['brand_zh']",
			
 
				+            "talent_data['brand_en']", 
			
 
				+            "talent_data['affiliation_zh']",
			
 
				+            "talent_data['affiliation_en']"
			
 
				+        ]
			
 
				+        
			
 
				+        for assignment in brand_assignments:
			
 
				+            if assignment in function_source:
			
 
				+                print(f"✅ 函数已正确赋值 {assignment}")
			
 
				+            else:
			
 
				+                print(f"❌ 函数未赋值 {assignment}")
			
 
				+                return False
			
 
				+        
			
 
				+        print("\n✅ 所有检查项目都通过！")
			
 
				+        print("\n修改总结:")
			
 
				+        print("1. 在图片解析成功后，调用get_brand_group_by_hotel获取品牌和集团信息")
			
 
				+        print("2. 将brand_name_zh赋值给talent_data['brand_zh']")
			
 
				+        print("3. 将brand_name_en赋值给talent_data['brand_en']")
			
 
				+        print("4. 将group_name_zh赋值给talent_data['affiliation_zh']")
			
 
				+        print("5. 将group_name_en赋值给talent_data['affiliation_en']")
			
 
				+        print("6. 包含完善的错误处理和日志记录")
			
 
				+        print("7. 在调用record_parsed_talent之前完成所有数据准备")
			
 
				+        
			
 
				+        return True
			
 
				+        
			
 
				+    except Exception as e:
			
 
				+        print(f"\n❌ 测试失败: {str(e)}")
			
 
				+        import traceback
			
 
				+        traceback.print_exc()
			
 
				+        return False
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    # 配置日志
			
 
				+    logging.basicConfig(
			
 
				+        level=logging.INFO,
			
 
				+        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
			
 
				+    )
			
 
				+    
			
 
				+    # 运行测试
			
 
				+    success = test_batch_process_business_card_images()
			
 
				+    
			
 
				+    if success:
			
 
				+        print("\n🎉 测试完成！函数修改成功！")
			
 
				+    else:
			
 
				+        print("\n�� 测试失败！需要检查函数修改！") 
			
--- a/test_query_neo4j_graph_v2.py
+++ b/test_query_neo4j_graph_v2.py
@@ -0,0 +1,64 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+测试优化后的 query_neo4j_graph 函数
			
 
				+"""
			
 
				+
			
 
				+import sys
			
 
				+import os
			
 
				+import logging
			
 
				+
			
 
				+# 添加项目根目录到Python路径
			
 
				+sys.path.append(os.path.dirname(os.path.abspath(__file__)))
			
 
				+
			
 
				+# 配置日志
			
 
				+logging.basicConfig(
			
 
				+    level=logging.INFO,
			
 
				+    format='%(asctime)s - %(levelname)s - %(name)s - %(funcName)s - %(lineno)d - %(message)s'
			
 
				+)
			
 
				+
			
 
				+def test_query_neo4j_graph():
			
 
				+    """测试优化后的 query_neo4j_graph 函数"""
			
 
				+    try:
			
 
				+        from app.core.data_parse.parse_system import query_neo4j_graph
			
 
				+        
			
 
				+        # 测试查询文本
			
 
				+        query_text = "大连硬石酒店"
			
 
				+        print(f"测试查询文本: {query_text}")
			
 
				+        print("-" * 50)
			
 
				+        
			
 
				+        # 调用函数
			
 
				+        result = query_neo4j_graph(query_text)
			
 
				+        
			
 
				+        # 打印结果
			
 
				+        print("函数执行结果:")
			
 
				+        print(f"状态码: {result.get('code')}")
			
 
				+        print(f"成功: {result.get('success')}")
			
 
				+        print(f"消息: {result.get('message')}")
			
 
				+        print(f"匹配的标签: {result.get('matched_labels')}")
			
 
				+        print(f"匹配的酒店: {result.get('matched_hotels')}")
			
 
				+        print(f"非空字段: {result.get('non_empty_fields')}")
			
 
				+        print(f"查询语句: {result.get('query')}")
			
 
				+        print(f"数据条数: {len(result.get('data', []))}")
			
 
				+        
			
 
				+        if result.get('data'):
			
 
				+            print("\n前5条数据:")
			
 
				+            for i, record in enumerate(result.get('data', [])[:5]):
			
 
				+                print(f"  {i+1}. {record}")
			
 
				+        
			
 
				+        return result
			
 
				+        
			
 
				+    except Exception as e:
			
 
				+        print(f"测试失败: {str(e)}")
			
 
				+        import traceback
			
 
				+        traceback.print_exc()
			
 
				+        return None
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    print("开始测试优化后的 query_neo4j_graph 函数...")
			
 
				+    result = test_query_neo4j_graph()
			
 
				+    
			
 
				+    if result and result.get('success'):
			
 
				+        print("\n✅ 测试成功！函数工作正常。")
			
 
				+    else:
			
 
				+        print("\n❌ 测试失败！函数存在问题。") 
			
--- a/子图遍历代码样例.txt
+++ b/子图遍历代码样例.txt
@@ -0,0 +1,26 @@
 
				+// 步骤1: 定义标签条件列表
			
 
				+WITH ['Label1', 'Label2'] AS targetLabels  // 替换为实际标签条件
			
 
				+
			
 
				+// 步骤2: 匹配标签条件节点
			
 
				+MATCH (tag:LabelCondition)
			
 
				+WHERE tag.name IN targetLabels
			
 
				+WITH collect(tag) AS startNodes
			
 
				+
			
 
				+// 步骤3: 使用APOC扩展遍历（带双终止条件）
			
 
				+CALL apoc.path.expandConfig(
			
 
				+  startNodes,
			
 
				+  {
			
 
				+    minLevel: 1,           // 至少1步关系（排除起始节点）
			
 
				+    maxLevel: 10,          // 最大路径深度10
			
 
				+    relationshipFilter: 'ALL',  // 所有关系类型和方向
			
 
				+    labelFilter: '>',       // 允许所有标签节点
			
 
				+    terminatorNodes: [      // 动态检测终止节点
			
 
				+      node WHERE 'Talent' IN labels(node)
			
 
				+    ],
			
 
				+    uniqueness: 'NODE_GLOBAL'  // 高性能遍历模式
			
 
				+  }
			
 
				+) YIELD path
			
 
				+
			
 
				+// 步骤4: 提取并返回去重节点
			
 
				+WITH last(nodes(path)) AS node  // 获取路径终点
			
 
				+RETURN DISTINCT node