瀏覽代碼

修改搜索人才的条件匹配逻辑。子图遍历,标签扩展遍历。
修改名片解析,获取品牌和集团信息。
标签的字段名称规范化。

maxiaolong 6 天之前
父節點
當前提交
e2b640ae89

+ 36 - 0
app/core/data_parse/parse_card.py

@@ -91,6 +91,7 @@ def process_business_card_image(image_file):
                 extracted_data = parse_text_with_qwen25VLplus(image_data)
                 logging.info("成功使用 Qwen 2.5 VL Plus 模型解析名片")
                 
+                
                 return {
                     'code': 200,
                     'success': True,
@@ -669,6 +670,8 @@ def batch_process_business_card_images(minio_paths_json, task_id=None, task_type
                         # 记录成功解析的人才信息到parsed_talents表
                         try:
                             from app.core.data_parse.parse_task import record_parsed_talent
+                            from app.core.data_parse.parse_system import get_brand_group_by_hotel
+                            
                             talent_data = process_result.get('data')
                             if talent_data and isinstance(talent_data, dict):
                                 # 在记录到parsed_talents表之前,设置image_path和origin_source
@@ -689,6 +692,39 @@ def batch_process_business_card_images(minio_paths_json, task_id=None, task_type
                                         if isinstance(career_entry, dict):
                                             career_entry['image_path'] = minio_path
                                 
+                                # 调用get_brand_group_by_hotel获取品牌和集团信息
+                                if talent_data.get('hotel_zh'):
+                                    try:
+                                        brand_result = get_brand_group_by_hotel(talent_data['hotel_zh'])
+                                        if brand_result.get('success') and brand_result.get('data'):
+                                            brand_data = brand_result['data']
+                                            # 赋值品牌和集团信息
+                                            talent_data['brand_zh'] = brand_data.get('brand_name_zh', '')
+                                            talent_data['brand_en'] = brand_data.get('brand_name_en', '')
+                                            talent_data['affiliation_zh'] = brand_data.get('group_name_zh', '')
+                                            talent_data['affiliation_en'] = brand_data.get('group_name_en', '')
+                                            logging.info(f"成功获取品牌和集团信息: {brand_data}")
+                                        else:
+                                            logging.warning(f"获取品牌信息失败: {brand_result.get('message', '')}")
+                                            # 设置默认值
+                                            talent_data['brand_zh'] = ''
+                                            talent_data['brand_en'] = ''
+                                            talent_data['affiliation_zh'] = ''
+                                            talent_data['affiliation_en'] = ''
+                                    except Exception as brand_error:
+                                        logging.error(f"调用get_brand_group_by_hotel失败: {str(brand_error)}")
+                                        # 设置默认值
+                                        talent_data['brand_zh'] = ''
+                                        talent_data['brand_en'] = ''
+                                        talent_data['affiliation_zh'] = ''
+                                        talent_data['affiliation_en'] = ''
+                                else:
+                                    # 没有酒店信息,设置默认值
+                                    talent_data['brand_zh'] = ''
+                                    talent_data['brand_en'] = ''
+                                    talent_data['affiliation_zh'] = ''
+                                    talent_data['affiliation_en'] = ''
+                                
                                 record_result = record_parsed_talent(talent_data, task_id, task_type)
                                 if record_result.get('success'):
                                     # 收集成功解析的记录ID

+ 339 - 30
app/core/data_parse/parse_system.py

@@ -1382,10 +1382,10 @@ def get_talent_tag_list():
         # 构建Cypher查询语句,获取分类为talent的标签
         query = """
         MATCH (n:DataLabel)
-        WHERE n.category CONTAINS 'talent' OR n.category CONTAINS '人才'
-        RETURN id(n) as id, n.name as name, n.en_name as en_name, 
-               n.category as category, n.describe as description, 
-               n.status as status, n.time as time
+        WHERE n.category CONTAINS 'talentmap' OR n.category CONTAINS '人才地图'
+        RETURN id(n) as id, n.name_zh as name_zh, n.name_en as name_en, 
+               n.category as category, n.describe as describe, 
+               n.status as status, n.time as time, n.node_type as node_type
         ORDER BY n.time DESC
         """
         
@@ -1398,12 +1398,13 @@ def get_talent_tag_list():
             for record in result:
                 tag = {
                     'id': record['id'],
-                    'name': record['name'],
-                    'en_name': record['en_name'],
+                    'name_zh': record['name_zh'],
+                    'name_en': record['name_en'],
                     'category': record['category'],
-                    'description': record['description'],
+                    'describe': record['describe'],
                     'status': record['status'],
-                    'time': record['time']
+                    'time': record['time'],
+                    'node_type': record['node_type']
                 }
                 tags.append(tag)
         
@@ -1679,7 +1680,7 @@ def query_neo4j_graph(query_requirement):
         
         # 构建匹配标签的提示语
         matching_prompt = f"""
-        请从上传的查询需求文本中提取以下结构化信息。其中datalabel字段从可用标签列表里进行匹配,匹配结果填写可用标签列表里的标签名称。需要严格按照JSON格式输出:   
+        请从上传的查询需求文本中提取以下结构化信息。其中datalabel字段从可用标签列表里进行匹配,匹配结果填写可用标签列表里的标签名称。hotel字段提取查询需求中提到的酒店名称。需要严格按照JSON格式输出:   
         {{
          "basic_info": {{
             "中文姓名": "",
@@ -1694,6 +1695,9 @@ def query_neo4j_graph(query_requirement):
         }},
          "datalabel": [
             "标签1","标签2","标签3"
+        ],
+         "hotel": [
+            "酒店名称1","酒店名称2","酒店名称3"
         ]
         }}
         ## 查询需求文本
@@ -1704,9 +1708,11 @@ def query_neo4j_graph(query_requirement):
         
         输出要求:
         1. 中文名称优先,有英文名称也要提取保留
-        2. 年龄字段只需填写数字
+        2. 年龄字段只需填写数字
         3. 标签没有被匹配到,datalabel字段可以为空数组
-        4. 只需返回JSON字符串,不要返回其他信息
+        4. 酒店名称提取查询需求中明确提到的酒店名称
+        5. 如果没有提到酒店信息,hotel字段可以为空数组
+        6. 只需返回JSON字符串,不要返回其他信息
         """
         
         # 调用阿里千问API匹配标签
@@ -1725,11 +1731,13 @@ def query_neo4j_graph(query_requirement):
         # 解析API响应
         matching_content = completion.choices[0].message.content
         
-        # 直接解析JSON响应,提取datalabel字段
+        # 直接解析JSON响应,提取datalabel和hotel字段
         parsed_content = json.loads(matching_content)
         matched_labels = parsed_content.get('datalabel', [])
+        matched_hotels = parsed_content.get('hotel', [])
         
         logging.info(f"匹配到的标签: {matched_labels}")
+        logging.info(f"匹配到的酒店: {matched_hotels}")
         
         # 步骤3: 构建查询逻辑和Cypher语句
         logging.info("第三步:构建查询逻辑和Cypher语句")
@@ -1829,26 +1837,121 @@ def query_neo4j_graph(query_requirement):
         
         if matched_labels:
             condition_params['labels'] = matched_labels
-            logging.info(f"构建DataLabel和Hotel条件查询,标签: {matched_labels}")
+            logging.info(f"构建DataLabel条件查询,标签: {matched_labels}")
+        
+        if matched_hotels:
+            condition_params['hotels'] = matched_hotels
+            logging.info(f"构建Hotel条件查询,酒店: {matched_hotels}")
+        
+        # 确保参数不为空时才添加到查询中
+        if not matched_labels:
+            # 如果没有标签,需要修改Cypher查询以避免引用$labels参数
+            logging.info("没有标签条件,将调整Cypher查询")
         
         # 步骤4: 执行查询并返回结果
         logging.info("第四步:执行查询并返回结果")
         
+        # 检查是否有查询条件,如果都没有则直接返回空结果
+        if not talent_conditions and not matched_labels and not matched_hotels:
+            logging.info("没有查询条件,直接返回空结果")
+            return {
+                'code': 200,
+                'success': True,
+                'message': '查询条件没有匹配到任何人才,返回空结果',
+                'query': '查询条件没有匹配到任何人才,返回空结果',
+                'matched_labels': matched_labels,
+                'matched_hotels': matched_hotels,
+                'non_empty_fields': non_empty_fields,
+                'data': []
+            }
+        
         # 构建完整的Cypher查询语句
-        if matched_labels:
-            # 有标签条件的情况 - 查找与条件子集(DataLabel和Hotel)有关系的Talent节点
-            # 使用OR逻辑:Talent有WORK_FOR关系链路或者有BELONGS_TO关系链路的节点都可以查询出来
+        if matched_hotels and matched_labels:
+            # 情况1:提供了酒店名称和标签名称
+            # 通过酒店名称查到一组Talent节点,通过标签查到另一组Talent节点,两组节点组合去重
+            logging.info("情况1:同时有酒店名称和标签名称,使用组合查询方式")
+            
+                        # 使用UNION合并两个查询结果
             cypher_script = f"""
+            // 查询通过酒店名称匹配的Talent节点
             {talent_subset_query}
             WHERE EXISTS {{
-              // 条件1:存在WORK_FOR关系链路
-              MATCH (t)-[:WORK_FOR]->(:Hotel)-[:HAS_LABEL]->(dl:DataLabel)
-              WHERE dl.name_zh IN $labels
-            }} OR EXISTS {{
-              // 条件2:存在BELONGS_TO关系链路
-              MATCH (t)-[:BELONGS_TO]->(dl2:DataLabel)
-              WHERE dl2.name_zh  IN $labels
+              // 条件:存在WORK_FOR关系链路,且酒店名称匹配
+              MATCH (t)-[:WORK_FOR]->(h:Hotel)
+              WHERE h.hotel_zh IN $hotels
             }}
+            RETURN DISTINCT
+              t.pg_id AS pg_id,
+              t.name_zh AS name_zh,
+              t.name_en AS name_en,
+              t.mobile AS mobile,
+              t.email AS email,
+              t.updated_at AS updated_at
+            
+            UNION
+            
+            // 查询通过标签扩展遍历匹配的Talent节点
+            // 步骤1: 定义标签条件列表
+            WITH $labels AS targetLabels
+            
+            // 步骤2: 匹配标签条件节点
+            MATCH (tag:DataLabel)
+            WHERE tag.name_zh IN targetLabels
+            WITH collect(tag) AS startNodes
+            
+            // 步骤3: 使用扩展遍历查找相关Talent节点
+            UNWIND startNodes AS startTag
+            MATCH (startTag)<-[:BELONGS_TO|WORK_AS]-(t:Talent)
+            {f"WHERE {' AND '.join(talent_conditions)}" if talent_conditions else ""}
+            
+            // 步骤4: 返回去重结果
+            RETURN DISTINCT
+              t.pg_id AS pg_id,
+              t.name_zh AS name_zh,
+              t.name_en AS name_en,
+              t.mobile AS mobile,
+              t.email AS email,
+              t.updated_at AS updated_at
+            """
+            
+        elif matched_hotels and not matched_labels:
+            # 情况2:只提供了酒店名称,没有标签名称
+            # 查询Talent的WORK_FOR与指定酒店的关系
+            cypher_script = f"""
+            {talent_subset_query}
+            WHERE EXISTS {{
+              // 条件:存在WORK_FOR关系链路,且酒店名称匹配
+              MATCH (t)-[:WORK_FOR]->(h:Hotel)
+              WHERE h.hotel_zh IN $hotels
+            }}
+            RETURN DISTINCT 
+              t.pg_id AS pg_id, 
+              t.name_zh AS name_zh, 
+              t.name_en AS name_en,
+              t.mobile AS mobile, 
+              t.email AS email, 
+              t.updated_at AS updated_at
+            """
+            
+        elif not matched_hotels and matched_labels:
+            # 情况3:没有提供酒店名称,但是有指定的标签名称
+            # 通过标签扩展遍历查询Talent节点
+            logging.info("情况3:只有标签名称,使用标签扩展遍历查询方式")
+            cypher_script = f"""
+            // 步骤1: 定义标签条件列表
+            WITH $labels AS targetLabels
+            
+            // 步骤2: 匹配标签条件节点
+            MATCH (tag:DataLabel)
+            WHERE tag.name_zh IN targetLabels
+            WITH collect(tag) AS startNodes
+            
+            // 步骤3: 使用扩展遍历查找相关Talent节点
+            UNWIND startNodes AS startTag
+            MATCH (startTag)<-[:BELONGS_TO|WORK_AS]-(t:Talent)
+            {f"WHERE {' AND '.join(talent_conditions)}" if talent_conditions else ""}
+            
+            // 步骤4: 返回去重结果
             RETURN DISTINCT 
               t.pg_id AS pg_id, 
               t.name_zh AS name_zh, 
@@ -1857,12 +1960,19 @@ def query_neo4j_graph(query_requirement):
               t.email AS email, 
               t.updated_at AS updated_at
             """
+            
         else:
-            # 无标签条件的情况,只根据Talent属性查询
+            # 情况4:没有指定标签,也没有指定酒店
+            # 只按照Talent属性进行查询
             cypher_script = f"""
             {talent_subset_query}
-            RETURN DISTINCT t.pg_id as pg_id, t.name_zh as name_zh, t.name_en as name_en, 
-                   t.mobile as mobile, t.email as email, t.updated_at as updated_at
+            RETURN DISTINCT 
+              t.pg_id AS pg_id, 
+              t.name_zh AS name_zh, 
+              t.name_en AS name_en,
+              t.mobile AS mobile, 
+              t.email AS email, 
+              t.updated_at AS updated_at
             """
         
         logging.info(f"生成的Cypher脚本: {cypher_script}")
@@ -1882,6 +1992,7 @@ def query_neo4j_graph(query_requirement):
             'message': '查询成功执行',
             'query': cypher_script,
             'matched_labels': matched_labels,
+            'matched_hotels': matched_hotels,
             'non_empty_fields': non_empty_fields,
             'data': records
         }
@@ -1923,9 +2034,9 @@ def talent_get_tags(talent_id):
         
         # 构建Cypher查询语句,获取人才节点关联的标签
         cypher_query = """
-        MATCH (t:Talent)-[r:BELONGS_TO]->(tag:DataLabel)
+        MATCH (t:Talent)-[r:BELONGS_TO|WORK_AS]->(tag:DataLabel)
         WHERE t.pg_id = $talent_id
-        RETURN t.pg_id as talent_id, tag.name as tag_name
+        RETURN t.pg_id as talent_id, tag.name_zh as tag_name_zh, type(r) as relation_type
         """
         
         # 执行查询
@@ -1942,7 +2053,8 @@ def talent_get_tags(talent_id):
             for record in records:
                 talent_tag = {
                     'talent': record['talent_id'],
-                    'tag': record['tag_name']
+                    'tag_name_zh': record['tag_name_zh'],
+                    'relation_type': record['relation_type']
                 }
                 response_data['data'].append(talent_tag)
             
@@ -2592,4 +2704,201 @@ def update_origin_source(existing_origin_source, task_type, minio_path):
     except Exception as e:
         logging.error(f"更新origin_source失败: {str(e)}")
         # 如果处理失败,返回包含新记录的数组
-        return [create_origin_source_entry(task_type, minio_path)]
+        return [create_origin_source_entry(task_type, minio_path)]
+
+
+def get_brand_group_by_hotel(hotel_zh):
+    """
+    根据酒店中文名称获取对应的品牌和集团信息
+    
+    Args:
+        hotel_zh (str): 酒店中文名称
+        
+    Returns:
+        dict: 包含操作结果和品牌集团信息的字典
+    """
+    try:
+        # 步骤1: 从输入参数获得酒店名称hotel_zh
+        if not hotel_zh or not hotel_zh.strip():
+            return {
+                'code': 400,
+                'success': False,
+                'message': '酒店名称不能为空',
+                'data': None
+            }
+        
+        hotel_name = hotel_zh.strip()
+        logging.info(f"开始查询酒店 '{hotel_name}' 的品牌和集团信息")
+        
+        # 步骤2: 从hotel_group_brands数据库表获取所有品牌名称作为参照值
+        try:
+            # 使用现有的数据库模型
+            from app.core.data_parse.hotel_management import HotelGroupBrands
+            
+            all_brands = []
+            # 查询所有有效的品牌名称
+            brands = HotelGroupBrands.query.filter(
+                HotelGroupBrands.brand_name_zh.isnot(None),
+                HotelGroupBrands.brand_name_zh != '',
+                HotelGroupBrands.status == 'active'
+            ).distinct(HotelGroupBrands.brand_name_zh).order_by(HotelGroupBrands.brand_name_zh).all()
+            
+            for brand in brands:
+                if brand.brand_name_zh:
+                    all_brands.append(brand.brand_name_zh)
+            
+            if not all_brands:
+                return {
+                    'code': 404,
+                    'success': False,
+                    'message': '未找到任何品牌信息',
+                    'data': None
+                }
+            
+            logging.info(f"获取到 {len(all_brands)} 个品牌作为参照值")
+            
+        except Exception as db_error:
+            logging.error(f"从数据库获取品牌列表失败: {str(db_error)}")
+            return {
+                'code': 500,
+                'success': False,
+                'message': f'获取品牌列表失败: {str(db_error)}',
+                'data': None
+            }
+        
+        # 步骤3: 通过阿里千问qwen long模型判断酒店对应的品牌
+        try:
+            # 构建所有品牌的JSON字符串
+            brands_json = json.dumps(all_brands, ensure_ascii=False)
+            
+            # 构建提示词
+            prompt = f"""
+            请根据提供的酒店名称,从可用品牌列表中选择最匹配的品牌。
+            
+            ## 酒店名称
+            {hotel_name}
+            
+            ## 可用品牌列表
+            {brands_json}
+            
+            ## 输出要求
+            1. 仔细分析酒店名称,选择最匹配的品牌
+            2. 如果酒店名称中包含品牌信息,优先选择该品牌
+            3. 如果无法确定,返回空字符串
+            4. 严格按照JSON格式输出:{{"brand": "品牌名称"}}
+            
+            请只返回JSON字符串,不要包含其他解释文字。
+            """
+            
+            # 调用阿里千问API
+            client = OpenAI(
+                api_key=QWEN_TEXT_API_KEY,
+                base_url=QWEN_TEXT_BASE_URL,
+            )
+            
+            completion = client.chat.completions.create(
+                model="qwen-long-latest",
+                messages=[
+                    {"role": "system", "content": "你是一个专业的酒店品牌识别专家。"},
+                    {"role": "user", "content": prompt}
+                ],
+                temperature=0.1,
+                response_format={"type": "json_object"}
+            )
+            
+            # 解析API响应
+            response_content = completion.choices[0].message.content
+            logging.info(f"千问API返回结果: {response_content}")
+            
+            # 解析JSON响应
+            try:
+                parsed_response = json.loads(response_content)
+                brand_name = parsed_response.get('brand', '')
+                
+                if not brand_name:
+                    return {
+                        'code': 404,
+                        'success': False,
+                        'message': '未能识别出酒店对应的品牌',
+                        'data': None
+                    }
+                
+                logging.info(f"识别出的品牌: {brand_name}")
+                
+            except json.JSONDecodeError as json_error:
+                logging.error(f"解析千问API返回的JSON失败: {str(json_error)}")
+                return {
+                    'code': 500,
+                    'success': False,
+                    'message': f'解析品牌识别结果失败: {str(json_error)}',
+                    'data': None
+                }
+                
+        except Exception as api_error:
+            logging.error(f"调用千问API失败: {str(api_error)}")
+            return {
+                'code': 500,
+                'success': False,
+                'message': f'品牌识别失败: {str(api_error)}',
+                'data': None
+            }
+        
+        # 步骤4: 从hotel_group_brands中查找完整的品牌和集团信息
+        try:
+            # 使用现有的数据库模型查询品牌信息
+            brand_info = None
+            brand_record = HotelGroupBrands.query.filter(
+                HotelGroupBrands.brand_name_zh == brand_name,
+                HotelGroupBrands.status == 'active'
+            ).first()
+            
+            if brand_record:
+                brand_info = {
+                    'brand_name_zh': brand_record.brand_name_zh,
+                    'brand_name_en': brand_record.brand_name_en,
+                    'group_name_zh': brand_record.group_name_zh,
+                    'group_name_en': brand_record.group_name_en
+                }
+                logging.info(f"找到品牌信息: {brand_info}")
+            else:
+                logging.warning(f"未找到品牌 '{brand_name}' 的详细信息")
+                return {
+                    'code': 404,
+                    'success': False,
+                    'message': f'未找到品牌 "{brand_name}" 的详细信息',
+                    'data': None
+                }
+            
+        except Exception as query_error:
+            logging.error(f"查询品牌详细信息失败: {str(query_error)}")
+            return {
+                'code': 500,
+                'success': False,
+                'message': f'查询品牌详细信息失败: {str(query_error)}',
+                'data': None
+            }
+        
+        # 返回成功结果
+        return {
+            'code': 200,
+            'success': True,
+            'message': f'成功获取酒店 "{hotel_name}" 的品牌和集团信息',
+            'data': {
+                'hotel_zh': hotel_name,
+                'brand_name_zh': brand_info['brand_name_zh'],
+                'brand_name_en': brand_info['brand_name_en'],
+                'group_name_zh': brand_info['group_name_zh'],
+                'group_name_en': brand_info['group_name_en']
+            }
+        }
+        
+    except Exception as e:
+        error_msg = f"获取酒店品牌和集团信息失败: {str(e)}"
+        logging.error(error_msg, exc_info=True)
+        
+        return {
+            'code': 500,
+            'success': False,
+            'message': error_msg,
+            'data': None
+        }

+ 53 - 30
app/core/data_parse/parse_task.py

@@ -112,6 +112,42 @@ def process_career_path(career_path, talent_node_id, talent_name_zh):
             result['errors'].append(f"清除Talent节点关系失败: {str(clear_error)}")
             # 即使清除关系失败,也继续执行后续逻辑
         
+        # 查询Neo4j图数据库中DataLabel标签中node_type为"brand"的所有节点
+        brand_labels = []
+        try:
+            from app.core.graph.graph_operations import connect_graph
+            
+            with connect_graph().session() as session:
+                # 查询node_type为"brand"的DataLabel节点
+                brand_query = """
+                MATCH (n:DataLabel)
+                WHERE n.node_type = "brand"
+                RETURN n.name_zh as name_zh
+                ORDER BY n.name_zh
+                """
+                brand_result = session.run(brand_query)
+                
+                # 将查询结果的name_zh保存到brand_labels数组
+                for record in brand_result:
+                    if record['name_zh']:
+                        brand_labels.append(record['name_zh'])
+                
+                logging.info(f"成功查询到 {len(brand_labels)} 个品牌标签")
+                logging.info(f"品牌标签列表: {brand_labels}")
+                
+                # 更新结果统计
+                result['brand_labels_count'] = len(brand_labels)
+                result['brand_labels'] = brand_labels
+                
+        except Exception as brand_query_error:
+            logging.error(f"查询品牌标签失败: {str(brand_query_error)}")
+            result['errors'].append(f"查询品牌标签失败: {str(brand_query_error)}")
+            # 即使查询品牌标签失败,也继续执行后续逻辑
+        
+        # 将brand_labels构建成JSON字符串
+        brand_labels_json = json.dumps(brand_labels, ensure_ascii=False)
+        logging.info(f"品牌标签JSON字符串: {brand_labels_json}")
+        
         for i, career_item in enumerate(career_path):
             try:
                 if not isinstance(career_item, dict):
@@ -189,16 +225,17 @@ def process_career_path(career_path, talent_node_id, talent_name_zh):
                     # 构建提示词
                     prompt = f"""请根据酒店名称'{hotel_zh}'判断该酒店所属的品牌。通常酒店名称前半部分是地名,后半部分是品牌名称。
                     例如:扬州瘦西湖夜泊君亭酒店,品牌名称为'夜泊君亭'。
+                    
+                    请从以下品牌列表中选择最匹配的品牌名称:
+                    {brand_labels_json}
+                    
                     要求:
                     1. 必须返回标准的JSON格式
                     2. 格式必须为:{{"brand": "品牌名称"}}
                     3. 不要包含任何其他文本、说明或markdown格式
-                    4. 如果无法确定品牌,返回:{{"brand": ""}}
-                    示例正确格式:
-                    {{"brand": "万豪"}}
-                    {{"brand": "希尔顿"}}
-                    {{"brand": "洲际"}}
-                    {{"brand": "夜泊君亭"}}
+                    4. 如果无法确定品牌或品牌不在列表中,返回:{{"brand": ""}}
+                    5. 品牌名称必须完全匹配列表中的某个品牌名称
+                    
                     请直接返回JSON,不要有其他内容。"""
                     
                     # 调用千问大模型
@@ -247,7 +284,7 @@ def process_career_path(career_path, talent_node_id, talent_name_zh):
                                         if result_query:
                                             label_node_id = result_query['node_id']
                                             
-                                            # 创建BELONGS_TO关系之前,先检查关系是否已经存在
+                                            # 创建Hotel节点与品牌标签的BELONGS_TO关系
                                             try:
                                                 from app.core.graph.graph_operations import connect_graph
                                                 
@@ -308,40 +345,26 @@ def process_career_path(career_path, talent_node_id, talent_name_zh):
                                                         result['brand_relationships_failed'] += 1
                                         else:
                                             logging.warning(f"未找到品牌标签节点: {brand_name}")
+                                            result['brand_relationships_failed'] += 1
                                             
                                 except Exception as query_error:
                                     logging.error(f"查询品牌标签节点失败: {str(query_error)}")
+                                    result['brand_relationships_failed'] += 1
                                     result['errors'].append(f"查询品牌标签节点失败: {brand_name}, 错误: {str(query_error)}")
                             else:
-                                logging.warning(f"千问大模型返回的品牌名称为空,酒店: {hotel_zh}, 响应: '{brand_response}'")
+                                logging.warning(f"千问大模型返回的品牌名称为空,酒店: {hotel_zh}")
+                                result['brand_relationships_failed'] += 1
                                 
                         except json.JSONDecodeError as json_error:
-                            logging.warning(f"解析千问大模型返回的JSON失败,原始响应内容: '{brand_response}', 错误: {json_error}")
-                            # 尝试从响应中提取品牌名称(非JSON格式的fallback)
-                            brand_name = ''
-                            
-                            # 首先尝试提取JSON格式的品牌名称
-                            import re
-                            # 尝试匹配 {"brand": "品牌名称"} 或 {"brand":"品牌名称"} 格式
-                            json_brand_match = re.search(r'["\']?brand["\']?\s*:\s*["\']?([^"\']+)["\']?', brand_response, re.IGNORECASE)
-                            if json_brand_match:
-                                brand_name = json_brand_match.group(1).strip()
-                                logging.info(f"从非标准JSON响应中提取到品牌名称: {brand_name}")
-                            # 如果上面没匹配到,尝试匹配包含"品牌"的文本
-                            elif 'brand' in brand_response.lower() or '品牌' in brand_response:
-                                brand_match = re.search(r'["\']?([^"\']*品牌[^"\']*)["\']?', brand_response)
-                                if brand_match:
-                                    brand_name = brand_match.group(1).strip()
-                                    logging.info(f"从非JSON响应中提取到品牌名称: {brand_name}")
-                                else:
-                                    logging.warning(f"无法从响应中提取品牌名称: {brand_response}")
-                            else:
-                                logging.warning(f"响应中未包含品牌相关信息: {brand_response}")
+                            logging.warning(f"解析千问大模型返回的JSON失败,酒店: {hotel_zh}, 响应: '{brand_response}'")
+                            result['brand_relationships_failed'] += 1
                     else:
-                        logging.warning(f"千问大模型返回结果无效: '{brand_response}'")
+                        logging.warning(f"千问大模型返回结果无效,酒店: {hotel_zh}")
+                        result['brand_relationships_failed'] += 1
                         
                 except Exception as brand_error:
                     logging.error(f"调用千问大模型判断品牌失败: {str(brand_error)}")
+                    result['brand_relationships_failed'] += 1
                     result['errors'].append(f"调用千问大模型判断品牌失败: {hotel_zh}, 错误: {str(brand_error)}")
                 
                 # 创建Talent节点到Hotel节点的WORK_FOR关系

+ 91 - 0
test_batch_process_business_card_images.py

@@ -0,0 +1,91 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+测试修改后的batch_process_business_card_images函数
+验证在图片解析成功后是否正确调用get_brand_group_by_hotel获取品牌和集团信息
+"""
+
+import sys
+import os
+import logging
+
+# 添加项目根目录到Python路径
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+
+def test_batch_process_business_card_images():
+    """测试修改后的batch_process_business_card_images函数"""
+    try:
+        print("开始测试修改后的 batch_process_business_card_images 函数...")
+        print("验证在图片解析成功后是否正确调用get_brand_group_by_hotel")
+        print("-" * 70)
+        
+        # 导入函数
+        from app.core.data_parse.parse_card import batch_process_business_card_images
+        
+        print("✅ 成功导入 batch_process_business_card_images 函数")
+        
+        # 检查函数是否包含必要的导入
+        import inspect
+        function_source = inspect.getsource(batch_process_business_card_images)
+        
+        # 检查是否导入了get_brand_group_by_hotel
+        if "from app.core.data_parse.parse_system import get_brand_group_by_hotel" in function_source:
+            print("✅ 函数已正确导入 get_brand_group_by_hotel")
+        else:
+            print("❌ 函数未导入 get_brand_group_by_hotel")
+            return False
+        
+        # 检查是否调用了get_brand_group_by_hotel
+        if "get_brand_group_by_hotel(" in function_source:
+            print("✅ 函数已正确调用 get_brand_group_by_hotel")
+        else:
+            print("❌ 函数未调用 get_brand_group_by_hotel")
+            return False
+        
+        # 检查是否正确赋值品牌和集团信息
+        brand_assignments = [
+            "talent_data['brand_zh']",
+            "talent_data['brand_en']", 
+            "talent_data['affiliation_zh']",
+            "talent_data['affiliation_en']"
+        ]
+        
+        for assignment in brand_assignments:
+            if assignment in function_source:
+                print(f"✅ 函数已正确赋值 {assignment}")
+            else:
+                print(f"❌ 函数未赋值 {assignment}")
+                return False
+        
+        print("\n✅ 所有检查项目都通过!")
+        print("\n修改总结:")
+        print("1. 在图片解析成功后,调用get_brand_group_by_hotel获取品牌和集团信息")
+        print("2. 将brand_name_zh赋值给talent_data['brand_zh']")
+        print("3. 将brand_name_en赋值给talent_data['brand_en']")
+        print("4. 将group_name_zh赋值给talent_data['affiliation_zh']")
+        print("5. 将group_name_en赋值给talent_data['affiliation_en']")
+        print("6. 包含完善的错误处理和日志记录")
+        print("7. 在调用record_parsed_talent之前完成所有数据准备")
+        
+        return True
+        
+    except Exception as e:
+        print(f"\n❌ 测试失败: {str(e)}")
+        import traceback
+        traceback.print_exc()
+        return False
+
+if __name__ == "__main__":
+    # 配置日志
+    logging.basicConfig(
+        level=logging.INFO,
+        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+    )
+    
+    # 运行测试
+    success = test_batch_process_business_card_images()
+    
+    if success:
+        print("\n🎉 测试完成!函数修改成功!")
+    else:
+        print("\n�� 测试失败!需要检查函数修改!") 

+ 64 - 0
test_query_neo4j_graph_v2.py

@@ -0,0 +1,64 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+测试优化后的 query_neo4j_graph 函数
+"""
+
+import sys
+import os
+import logging
+
+# 添加项目根目录到Python路径
+sys.path.append(os.path.dirname(os.path.abspath(__file__)))
+
+# 配置日志
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(name)s - %(funcName)s - %(lineno)d - %(message)s'
+)
+
+def test_query_neo4j_graph():
+    """测试优化后的 query_neo4j_graph 函数"""
+    try:
+        from app.core.data_parse.parse_system import query_neo4j_graph
+        
+        # 测试查询文本
+        query_text = "大连硬石酒店"
+        print(f"测试查询文本: {query_text}")
+        print("-" * 50)
+        
+        # 调用函数
+        result = query_neo4j_graph(query_text)
+        
+        # 打印结果
+        print("函数执行结果:")
+        print(f"状态码: {result.get('code')}")
+        print(f"成功: {result.get('success')}")
+        print(f"消息: {result.get('message')}")
+        print(f"匹配的标签: {result.get('matched_labels')}")
+        print(f"匹配的酒店: {result.get('matched_hotels')}")
+        print(f"非空字段: {result.get('non_empty_fields')}")
+        print(f"查询语句: {result.get('query')}")
+        print(f"数据条数: {len(result.get('data', []))}")
+        
+        if result.get('data'):
+            print("\n前5条数据:")
+            for i, record in enumerate(result.get('data', [])[:5]):
+                print(f"  {i+1}. {record}")
+        
+        return result
+        
+    except Exception as e:
+        print(f"测试失败: {str(e)}")
+        import traceback
+        traceback.print_exc()
+        return None
+
+if __name__ == "__main__":
+    print("开始测试优化后的 query_neo4j_graph 函数...")
+    result = test_query_neo4j_graph()
+    
+    if result and result.get('success'):
+        print("\n✅ 测试成功!函数工作正常。")
+    else:
+        print("\n❌ 测试失败!函数存在问题。") 

+ 26 - 0
子图遍历代码样例.txt

@@ -0,0 +1,26 @@
+// 步骤1: 定义标签条件列表
+WITH ['Label1', 'Label2'] AS targetLabels  // 替换为实际标签条件
+
+// 步骤2: 匹配标签条件节点
+MATCH (tag:LabelCondition)
+WHERE tag.name IN targetLabels
+WITH collect(tag) AS startNodes
+
+// 步骤3: 使用APOC扩展遍历(带双终止条件)
+CALL apoc.path.expandConfig(
+  startNodes,
+  {
+    minLevel: 1,           // 至少1步关系(排除起始节点)
+    maxLevel: 10,          // 最大路径深度10
+    relationshipFilter: 'ALL',  // 所有关系类型和方向
+    labelFilter: '>',       // 允许所有标签节点
+    terminatorNodes: [      // 动态检测终止节点
+      node WHERE 'Talent' IN labels(node)
+    ],
+    uniqueness: 'NODE_GLOBAL'  // 高性能遍历模式
+  }
+) YIELD path
+
+// 步骤4: 提取并返回去重节点
+WITH last(nodes(path)) AS node  // 获取路径终点
+RETURN DISTINCT node