ソースを参照

修复语法错误
每种解析都添加了酒店品牌和集团信息获取功能。

maxiaolong 1 ヶ月 前
コミット
3291e655d9

+ 21 - 23
app/core/data_parse/hotel_management.py

@@ -174,18 +174,17 @@ def add_hotel_positions(position_data):
             }
         
         # 创建新的职位记录
-        new_position = HotelPosition(
-            department_zh=position_data['department_zh'].strip(),
-            department_en=position_data['department_en'].strip(),
-            position_zh=position_data['position_zh'].strip(),
-            position_en=position_data['position_en'].strip(),
-            position_abbr=position_data.get('position_abbr', '').strip() if position_data.get('position_abbr') else None,
-            level_zh=position_data['level_zh'].strip(),
-            level_en=position_data['level_en'].strip(),
-            created_by=position_data.get('created_by', 'system'),
-            updated_by=position_data.get('updated_by', 'system'),
-            status=position_data.get('status', 'active')
-        )
+        new_position = HotelPosition()
+        new_position.department_zh = position_data['department_zh'].strip()
+        new_position.department_en = position_data['department_en'].strip()
+        new_position.position_zh = position_data['position_zh'].strip()
+        new_position.position_en = position_data['position_en'].strip()
+        new_position.position_abbr = position_data.get('position_abbr', '').strip() if position_data.get('position_abbr') else None
+        new_position.level_zh = position_data['level_zh'].strip()
+        new_position.level_en = position_data['level_en'].strip()
+        new_position.created_by = position_data.get('created_by', 'system')
+        new_position.updated_by = position_data.get('updated_by', 'system')
+        new_position.status = position_data.get('status', 'active')
         
         # 保存到数据库
         db.session.add(new_position)
@@ -512,17 +511,16 @@ def add_hotel_group_brands(brand_data):
             }
         
         # 创建新的品牌记录
-        new_brand = HotelGroupBrands(
-            group_name_en=brand_data['group_name_en'].strip(),
-            group_name_zh=brand_data['group_name_zh'].strip(),
-            brand_name_en=brand_data['brand_name_en'].strip(),
-            brand_name_zh=brand_data['brand_name_zh'].strip(),
-            positioning_level_en=brand_data['positioning_level_en'].strip(),
-            positioning_level_zh=brand_data['positioning_level_zh'].strip(),
-            created_by=brand_data.get('created_by', 'system'),
-            updated_by=brand_data.get('updated_by', 'system'),
-            status=brand_data.get('status', 'active')
-        )
+        new_brand = HotelGroupBrands()
+        new_brand.group_name_en = brand_data['group_name_en'].strip()
+        new_brand.group_name_zh = brand_data['group_name_zh'].strip()
+        new_brand.brand_name_en = brand_data['brand_name_en'].strip()
+        new_brand.brand_name_zh = brand_data['brand_name_zh'].strip()
+        new_brand.positioning_level_en = brand_data['positioning_level_en'].strip()
+        new_brand.positioning_level_zh = brand_data['positioning_level_zh'].strip()
+        new_brand.created_by = brand_data.get('created_by', 'system')
+        new_brand.updated_by = brand_data.get('updated_by', 'system')
+        new_brand.status = brand_data.get('status', 'active')
         
         # 保存到数据库
         db.session.add(new_brand)

+ 34 - 0
app/core/data_parse/parse_menduner.py

@@ -456,6 +456,40 @@ def batch_process_menduner_data(data_list: List[Dict[str, Any]], task_id=None, t
                 validation = validate_menduner_data(normalized)
                 
                 if validation.get('is_valid', False):
+                    # 调用get_brand_group_by_hotel获取品牌和集团信息
+                    if normalized.get('hotel_zh'):
+                        try:
+                            from app.core.data_parse.parse_system import get_brand_group_by_hotel
+                            brand_result = get_brand_group_by_hotel(normalized['hotel_zh'])
+                            if brand_result.get('success') and brand_result.get('data'):
+                                brand_data = brand_result['data']
+                                # 赋值品牌和集团信息
+                                normalized['brand_zh'] = brand_data.get('brand_name_zh', '')
+                                normalized['brand_en'] = brand_data.get('brand_name_en', '')
+                                normalized['affiliation_zh'] = brand_data.get('group_name_zh', '')
+                                normalized['affiliation_en'] = brand_data.get('group_name_en', '')
+                                logging.info(f"成功获取品牌和集团信息: {brand_data}")
+                            else:
+                                logging.warning(f"获取品牌信息失败: {brand_result.get('message', '')}")
+                                # 设置默认值
+                                normalized['brand_zh'] = ''
+                                normalized['brand_en'] = ''
+                                normalized['affiliation_zh'] = ''
+                                normalized['affiliation_en'] = ''
+                        except Exception as brand_error:
+                            logging.error(f"调用get_brand_group_by_hotel失败: {str(brand_error)}")
+                            # 设置默认值
+                            normalized['brand_zh'] = ''
+                            normalized['brand_en'] = ''
+                            normalized['affiliation_zh'] = ''
+                            normalized['affiliation_en'] = ''
+                    else:
+                        # 没有酒店信息,设置默认值
+                        normalized['brand_zh'] = ''
+                        normalized['brand_en'] = ''
+                        normalized['affiliation_zh'] = ''
+                        normalized['affiliation_en'] = ''
+                    
                     # 记录成功解析的人才信息到parsed_talents表
                     try:
                         from app.core.data_parse.parse_task import record_parsed_talent

+ 38 - 0
app/core/data_parse/parse_neo4j_process.py

@@ -119,6 +119,10 @@ class HotelPositionNeo4jProcessor:
     def get_hotel_positions(self) -> List[Dict[str, Any]]:
         """从PostgreSQL数据库获取酒店职位数据"""
         try:
+            if not self.pg_engine:
+                self.logger.error("PostgreSQL引擎未初始化")
+                return []
+                
             query = """
                 SELECT DISTINCT 
                     department_zh, department_en,
@@ -161,6 +165,10 @@ class HotelPositionNeo4jProcessor:
     def get_hotel_group_brands(self) -> List[Dict[str, Any]]:
         """从PostgreSQL数据库获取酒店集团品牌数据"""
         try:
+            if not self.pg_engine:
+                self.logger.error("PostgreSQL引擎未初始化")
+                return []
+                
             query = """
                 SELECT DISTINCT 
                     group_name_zh, group_name_en,
@@ -306,6 +314,21 @@ class HotelPositionNeo4jProcessor:
             relationships_created = 0
             
             # 获取Neo4j会话
+            if not self.neo4j_driver:
+                self.logger.error("Neo4j驱动器未初始化")
+                return {
+                    'success': False,
+                    'message': 'Neo4j驱动器未初始化',
+                    'total': 0,
+                    'departments_created': 0,
+                    'departments_skipped': 0,
+                    'positions_created': 0,
+                    'positions_skipped': 0,
+                    'levels_created': 0,
+                    'levels_skipped': 0,
+                    'relationships_created': 0
+                }
+            
             with self.neo4j_driver.get_session() as session:
                 for position in positions:
                     department_zh = position['department_zh']
@@ -429,6 +452,21 @@ class HotelPositionNeo4jProcessor:
             relationships_created = 0
             
             # 获取Neo4j会话
+            if not self.neo4j_driver:
+                self.logger.error("Neo4j驱动器未初始化")
+                return {
+                    'success': False,
+                    'message': 'Neo4j驱动器未初始化',
+                    'total': 0,
+                    'groups_created': 0,
+                    'groups_skipped': 0,
+                    'brands_created': 0,
+                    'brands_skipped': 0,
+                    'brand_levels_created': 0,
+                    'brand_levels_skipped': 0,
+                    'relationships_created': 0
+                }
+            
             with self.neo4j_driver.get_session() as session:
                 for brand in brands:
                     group_name_zh = brand['group_name_zh']

+ 36 - 0
app/core/data_parse/parse_pic.py

@@ -770,6 +770,8 @@ def parse_table_with_qwen(base64_image: str) -> List[Dict[str, Any]]:
         
         # 直接解析 QWen 返回的 JSON 响应
         try:
+            if not response_content:
+                raise Exception("API返回内容为空")
             parsed_data = json.loads(response_content)
             logging.info("成功解析 Qwen 表格响应中的 JSON")
         except json.JSONDecodeError as e:
@@ -981,6 +983,40 @@ def batch_process_images(image_paths: List[Any], process_type: str = 'table', ta
                     if extracted_data and isinstance(extracted_data, list):
                         # 为每个人员创建一个结果记录
                         for person_idx, person_data in enumerate(extracted_data):
+                            # 调用get_brand_group_by_hotel获取品牌和集团信息
+                            if person_data.get('hotel_zh'):
+                                try:
+                                    from app.core.data_parse.parse_system import get_brand_group_by_hotel
+                                    brand_result = get_brand_group_by_hotel(person_data['hotel_zh'])
+                                    if brand_result.get('success') and brand_result.get('data'):
+                                        brand_data = brand_result['data']
+                                        # 赋值品牌和集团信息
+                                        person_data['brand_zh'] = brand_data.get('brand_name_zh', '')
+                                        person_data['brand_en'] = brand_data.get('brand_name_en', '')
+                                        person_data['affiliation_zh'] = brand_data.get('group_name_zh', '')
+                                        person_data['affiliation_en'] = brand_data.get('group_name_en', '')
+                                        logging.info(f"成功获取品牌和集团信息: {brand_data}")
+                                    else:
+                                        logging.warning(f"获取品牌信息失败: {brand_result.get('message', '')}")
+                                        # 设置默认值
+                                        person_data['brand_zh'] = ''
+                                        person_data['brand_en'] = ''
+                                        person_data['affiliation_zh'] = ''
+                                        person_data['affiliation_en'] = ''
+                                except Exception as brand_error:
+                                    logging.error(f"调用get_brand_group_by_hotel失败: {str(brand_error)}")
+                                    # 设置默认值
+                                    person_data['brand_zh'] = ''
+                                    person_data['brand_en'] = ''
+                                    person_data['affiliation_zh'] = ''
+                                    person_data['affiliation_en'] = ''
+                            else:
+                                # 没有酒店信息,设置默认值
+                                person_data['brand_zh'] = ''
+                                person_data['brand_en'] = ''
+                                person_data['affiliation_zh'] = ''
+                                person_data['affiliation_en'] = ''
+                            
                             # 记录成功解析的人才信息到parsed_talents表
                             try:
                                 from app.core.data_parse.parse_task import record_parsed_talent

+ 36 - 0
app/core/data_parse/parse_resume.py

@@ -303,6 +303,8 @@ def parse_resume_with_qwen(file_path: str) -> Dict[str, Any]:
         
         # 直接解析 Qwen 返回的 JSON 响应
         try:
+            if not response_content:
+                raise Exception("API返回内容为空")
             qwen_response = json.loads(response_content)
             logging.info(f"成功解析 Qwen 简历响应中的 JSON: {qwen_response}")
         except json.JSONDecodeError as e:
@@ -881,6 +883,40 @@ def batch_parse_resumes(file_paths: List[str], task_id=None, task_type=None) ->
                         "title_zh": resume_data.get('title_zh', '')
                     }
                     
+                    # 调用get_brand_group_by_hotel获取品牌和集团信息
+                    if standardized_data.get('hotel_zh'):
+                        try:
+                            from app.core.data_parse.parse_system import get_brand_group_by_hotel
+                            brand_result = get_brand_group_by_hotel(standardized_data['hotel_zh'])
+                            if brand_result.get('success') and brand_result.get('data'):
+                                brand_data = brand_result['data']
+                                # 赋值品牌和集团信息
+                                standardized_data['brand_zh'] = brand_data.get('brand_name_zh', '')
+                                standardized_data['brand_en'] = brand_data.get('brand_name_en', '')
+                                standardized_data['affiliation_zh'] = brand_data.get('group_name_zh', '')
+                                standardized_data['affiliation_en'] = brand_data.get('group_name_en', '')
+                                logging.info(f"成功获取品牌和集团信息: {brand_data}")
+                            else:
+                                logging.warning(f"获取品牌信息失败: {brand_result.get('message', '')}")
+                                # 设置默认值
+                                standardized_data['brand_zh'] = ''
+                                standardized_data['brand_en'] = ''
+                                standardized_data['affiliation_zh'] = ''
+                                standardized_data['affiliation_en'] = ''
+                        except Exception as brand_error:
+                            logging.error(f"调用get_brand_group_by_hotel失败: {str(brand_error)}")
+                            # 设置默认值
+                            standardized_data['brand_zh'] = ''
+                            standardized_data['brand_en'] = ''
+                            standardized_data['affiliation_zh'] = ''
+                            standardized_data['affiliation_en'] = ''
+                    else:
+                        # 没有酒店信息,设置默认值
+                        standardized_data['brand_zh'] = ''
+                        standardized_data['brand_en'] = ''
+                        standardized_data['affiliation_zh'] = ''
+                        standardized_data['affiliation_en'] = ''
+                    
                     # 记录成功解析的人才信息到parsed_talents表
                     try:
                         from app.core.data_parse.parse_task import record_parsed_talent

+ 189 - 71
app/core/data_parse/parse_system.py

@@ -504,33 +504,32 @@ def create_main_card_with_duplicates(extracted_data, minio_path, suspected_dupli
             task_type = extracted_data.get('task_type', '名片')
         
         # 创建新的主名片记录
-        main_card = BusinessCard(
-            name_zh=extracted_data.get('name_zh', ''),
-            name_en=extracted_data.get('name_en', ''),
-            title_zh=extracted_data.get('title_zh', ''),
-            title_en=extracted_data.get('title_en', ''),
-            mobile=mobile,
-            phone=extracted_data.get('phone', ''),
-            email=extracted_data.get('email', ''),
-            hotel_zh=extracted_data.get('hotel_zh', ''),
-            hotel_en=extracted_data.get('hotel_en', ''),
-            address_zh=extracted_data.get('address_zh', ''),
-            address_en=extracted_data.get('address_en', ''),
-            postal_code_zh=extracted_data.get('postal_code_zh', ''),
-            postal_code_en=extracted_data.get('postal_code_en', ''),
-            brand_zh=extracted_data.get('brand_zh', ''),
-            brand_en=extracted_data.get('brand_en', ''),
-            affiliation_zh=extracted_data.get('affiliation_zh', ''),
-            affiliation_en=extracted_data.get('affiliation_en', ''),
-            brand_group=extracted_data.get('brand_group', ''),
-            gender=extracted_data.get('gender', ''),  # 新增性别字段
-            image_path=minio_path,
-            career_path=career_path,
-            origin_source=[create_origin_source_entry(task_type, minio_path)],
-            created_at=datetime.now(),
-            updated_by='system',
-            status='duplicate'
-        )
+        main_card = BusinessCard()
+        main_card.name_zh = extracted_data.get('name_zh', '')
+        main_card.name_en = extracted_data.get('name_en', '')
+        main_card.title_zh = extracted_data.get('title_zh', '')
+        main_card.title_en = extracted_data.get('title_en', '')
+        main_card.mobile = mobile
+        main_card.phone = extracted_data.get('phone', '')
+        main_card.email = extracted_data.get('email', '')
+        main_card.hotel_zh = extracted_data.get('hotel_zh', '')
+        main_card.hotel_en = extracted_data.get('hotel_en', '')
+        main_card.address_zh = extracted_data.get('address_zh', '')
+        main_card.address_en = extracted_data.get('address_en', '')
+        main_card.postal_code_zh = extracted_data.get('postal_code_zh', '')
+        main_card.postal_code_en = extracted_data.get('postal_code_en', '')
+        main_card.brand_zh = extracted_data.get('brand_zh', '')
+        main_card.brand_en = extracted_data.get('brand_en', '')
+        main_card.affiliation_zh = extracted_data.get('affiliation_zh', '')
+        main_card.affiliation_en = extracted_data.get('affiliation_en', '')
+        main_card.brand_group = extracted_data.get('brand_group', '')
+        main_card.gender = extracted_data.get('gender', '')  # 新增性别字段
+        main_card.image_path = minio_path
+        main_card.career_path = career_path
+        main_card.origin_source = [create_origin_source_entry(task_type, minio_path)]
+        main_card.created_at = datetime.now()
+        main_card.updated_by = 'system'
+        main_card.status = 'duplicate'
         
         # 保存主记录到数据库
         db.session.add(main_card)
@@ -547,13 +546,12 @@ def create_main_card_with_duplicates(extracted_data, minio_path, suspected_dupli
                 'title_zh': duplicate_card.title_zh
             })
         
-        duplicate_record = DuplicateBusinessCard(
-            main_card_id=main_card.id,
-            suspected_duplicates=suspected_duplicates_data,
-            duplicate_reason=reason,
-            processing_status='pending',
-            created_at=datetime.now()
-        )
+        duplicate_record = DuplicateBusinessCard()
+        duplicate_record.main_card_id = main_card.id
+        duplicate_record.suspected_duplicates = suspected_duplicates_data
+        duplicate_record.duplicate_reason = reason
+        duplicate_record.processing_status = 'pending'
+        duplicate_record.created_at = datetime.now()
         
         # 保存重复记录标记
         db.session.add(duplicate_record)
@@ -1608,7 +1606,8 @@ def delete_talent_tag(tag_id):
             
             # 执行删除操作
             delete_result = session.run(delete_query, nodeId=tag_id)
-            deleted = delete_result.single()['deleted']
+            delete_record = delete_result.single()
+            deleted = delete_record['deleted'] if delete_record else 0
             
             if deleted > 0:
                 return {
@@ -1748,6 +1747,8 @@ def query_neo4j_graph(query_requirement):
         matching_content = completion.choices[0].message.content
         
         # 直接解析JSON响应,提取datalabel和hotel字段
+        if not matching_content:
+            raise Exception("API返回内容为空")
         parsed_content = json.loads(matching_content)
         matched_labels = parsed_content.get('datalabel', [])
         matched_hotels = parsed_content.get('hotel', [])
@@ -2188,7 +2189,8 @@ def talent_update_tags(data):
                 RETURN count(r) as deleted_count
                 """
                 clear_result = session.run(clear_relations_query, talent_id=int(talent_id))
-                deleted_count = clear_result.single()['deleted_count']
+                clear_record = clear_result.single()
+                deleted_count = clear_record['deleted_count'] if clear_record else 0
                 logging.info(f"已删除talent_id={talent_id}的{deleted_count}个已有标签关系")
                 
                 # 处理每个标签
@@ -2219,6 +2221,8 @@ def talent_update_tags(data):
                                 updated_at=current_time
                             )
                             tag_record = tag_result.single()
+                            if not tag_record:
+                                raise Exception(f"创建标签失败: {tag_name}")
                             tag_id = tag_record['tag_id']
                         
                         # 2. 创建人才与标签的BELONGS_TO关系
@@ -2410,6 +2414,8 @@ def parse_text_with_qwen25VLplus(image_data):
         
         # 尝试从响应中提取 JSON
         try:
+            if not response_content:
+                raise Exception("API返回内容为空")
             extracted_data = json.loads(response_content)
             logging.info("成功解析 Qwen 响应中的 JSON")
         except json.JSONDecodeError:
@@ -2523,40 +2529,39 @@ def record_parsed_talents(result):
         for talent_data in talent_records:
             try:
                 # 提取ParsedTalent模型需要的字段
-                parsed_talent = ParsedTalent(
-                    name_zh=talent_data.get('name_zh', ''),
-                    name_en=talent_data.get('name_en', ''),
-                    title_zh=talent_data.get('title_zh', ''),
-                    title_en=talent_data.get('title_en', ''),
-                    mobile=talent_data.get('mobile', ''),
-                    phone=talent_data.get('phone', ''),
-                    email=talent_data.get('email', ''),
-                    hotel_zh=talent_data.get('hotel_zh', ''),
-                    hotel_en=talent_data.get('hotel_en', ''),
-                    address_zh=talent_data.get('address_zh', ''),
-                    address_en=talent_data.get('address_en', ''),
-                    postal_code_zh=talent_data.get('postal_code_zh', ''),
-                    postal_code_en=talent_data.get('postal_code_en', ''),
-                    brand_zh=talent_data.get('brand_zh', ''),
-                    brand_en=talent_data.get('brand_en', ''),
-                    affiliation_zh=talent_data.get('affiliation_zh', ''),
-                    affiliation_en=talent_data.get('affiliation_en', ''),
-                    image_path=talent_data.get('image_path', ''),
-                    career_path=talent_data.get('career_path', []),
-                    brand_group=talent_data.get('brand_group', ''),
-                    birthday=talent_data.get('birthday'),
-                    residence=talent_data.get('residence', ''),
-                    age=talent_data.get('age'),
-                    native_place=talent_data.get('native_place', ''),
-                    gender=talent_data.get('gender', ''),  # 新增性别字段
-                    origin_source=talent_data.get('origin_source', []),
-                    talent_profile=talent_data.get('talent_profile', ''),
-                    task_id=str(task_id) if task_id else '',
-                    task_type=task_type,
-                    status='待审核',  # 统一设置为待审核状态
-                    created_at=datetime.now(),
-                    updated_by='system'
-                )
+                parsed_talent = ParsedTalent()
+                parsed_talent.name_zh = talent_data.get('name_zh', '')
+                parsed_talent.name_en = talent_data.get('name_en', '')
+                parsed_talent.title_zh = talent_data.get('title_zh', '')
+                parsed_talent.title_en = talent_data.get('title_en', '')
+                parsed_talent.mobile = talent_data.get('mobile', '')
+                parsed_talent.phone = talent_data.get('phone', '')
+                parsed_talent.email = talent_data.get('email', '')
+                parsed_talent.hotel_zh = talent_data.get('hotel_zh', '')
+                parsed_talent.hotel_en = talent_data.get('hotel_en', '')
+                parsed_talent.address_zh = talent_data.get('address_zh', '')
+                parsed_talent.address_en = talent_data.get('address_en', '')
+                parsed_talent.postal_code_zh = talent_data.get('postal_code_zh', '')
+                parsed_talent.postal_code_en = talent_data.get('postal_code_en', '')
+                parsed_talent.brand_zh = talent_data.get('brand_zh', '')
+                parsed_talent.brand_en = talent_data.get('brand_en', '')
+                parsed_talent.affiliation_zh = talent_data.get('affiliation_zh', '')
+                parsed_talent.affiliation_en = talent_data.get('affiliation_en', '')
+                parsed_talent.image_path = talent_data.get('image_path', '')
+                parsed_talent.career_path = talent_data.get('career_path', [])
+                parsed_talent.brand_group = talent_data.get('brand_group', '')
+                parsed_talent.birthday = talent_data.get('birthday')
+                parsed_talent.residence = talent_data.get('residence', '')
+                parsed_talent.age = talent_data.get('age')
+                parsed_talent.native_place = talent_data.get('native_place', '')
+                parsed_talent.gender = talent_data.get('gender', '')  # 新增性别字段
+                parsed_talent.origin_source = talent_data.get('origin_source', [])
+                parsed_talent.talent_profile = talent_data.get('talent_profile', '')
+                parsed_talent.task_id = str(task_id) if task_id else ''
+                parsed_talent.task_type = task_type
+                parsed_talent.status = '待审核'  # 统一设置为待审核状态
+                parsed_talent.created_at = datetime.now()
+                parsed_talent.updated_by = 'system'
                 
                 # 添加到数据库会话
                 db.session.add(parsed_talent)
@@ -2828,7 +2833,7 @@ def get_brand_group_by_hotel(hotel_zh):
             )
             
             completion = client.chat.completions.create(
-                model="qwen-long-latest",
+                model="qwen-plus-latest",
                 messages=[
                     {"role": "system", "content": "你是一个专业的酒店品牌识别专家。"},
                     {"role": "user", "content": prompt}
@@ -2843,6 +2848,8 @@ def get_brand_group_by_hotel(hotel_zh):
             
             # 解析JSON响应
             try:
+                if not response_content:
+                    raise Exception("API返回内容为空")
                 parsed_response = json.loads(response_content)
                 brand_name = parsed_response.get('brand', '')
                 
@@ -2927,6 +2934,117 @@ def get_brand_group_by_hotel(hotel_zh):
         error_msg = f"获取酒店品牌和集团信息失败: {str(e)}"
         logging.error(error_msg, exc_info=True)
         
+        return {
+            'code': 500,
+            'success': False,
+            'message': error_msg,
+            'data': None
+        }
+
+
+def get_brand_by_deepseek(hotel_zh):
+    """
+    通过DeepSeek API获取酒店的品牌和集团信息
+    
+    Args:
+        hotel_zh (str): 酒店中文名称
+    
+    Returns:
+        dict: 包含操作结果和品牌集团信息的字典
+    """
+    try:
+        if not hotel_zh or not hotel_zh.strip():
+            return {
+                'code': 400,
+                'success': False,
+                'message': '酒店名称不能为空',
+                'data': None
+            }
+        
+        # 导入OpenAI客户端
+        from openai import OpenAI
+        
+        # 配置DeepSeek API
+        client = OpenAI(
+            api_key="sk-54fe24fcf5cc49a39c1c68d137010f0c",
+            base_url="https://api.deepseek.com/v1"
+        )
+        
+        # 构建提示词
+        prompt = f"""
+请根据酒店名称"{hotel_zh}",返回该酒店对应的品牌和集团信息。
+
+请按照以下JSON格式返回结果:
+{{
+    "brand_name_zh": "品牌中文名称",
+    "brand_name_en": "品牌英文名称",
+    "group_name_zh": "集团中文名称",
+    "group_name_en": "集团英文名称"
+}}
+
+要求:
+1. 只返回JSON格式的数据,不要包含其他文字
+2. 如果无法确定品牌或集团信息,请返回空字符串,不要返回None
+3. 确保返回的是有效的JSON格式
+4. 请在推理后给出明确的JSON答案
+"""
+        
+        # 调用DeepSeek API
+        response = client.chat.completions.create(
+            model="deepseek-reasoner",
+            messages=[
+                {
+                    "role": "user",
+                    "content": prompt
+                }
+            ],
+            temperature=0.1,
+            max_tokens=500
+        )
+        
+        # 获取响应内容
+        response_content = response.choices[0].message.content
+        if not response_content:
+            raise Exception("DeepSeek API返回内容为空")
+        
+        response_content = response_content.strip()
+        
+        # 解析JSON响应
+        import json
+        brand_info = json.loads(response_content)
+        
+        # 验证返回的数据结构
+        if not isinstance(brand_info, dict):
+            raise Exception("DeepSeek API返回的数据格式不正确")
+        
+        brand_name_zh = brand_info.get('brand_name_zh', '')
+        brand_name_en = brand_info.get('brand_name_en', '')
+        group_name_zh = brand_info.get('group_name_zh', '')
+        group_name_en = brand_info.get('group_name_en', '')
+        
+        logging.info(f"DeepSeek成功获取酒店品牌信息: {hotel_zh} -> 品牌: {brand_name_zh}, 集团: {group_name_zh}")
+        
+        return {
+            'code': 200,
+            'success': True,
+            'message': f'成功获取酒店 "{hotel_zh}" 的品牌和集团信息',
+            'data': {
+                'hotel_zh': hotel_zh,
+                'brand_name_zh': brand_name_zh,
+                'brand_name_en': brand_name_en,
+                'group_name_zh': group_name_zh,
+                'group_name_en': group_name_en
+            }
+        }
+        
+    except Exception as json_error:
+        if 'json' in str(type(json_error)):
+            error_msg = f"解析DeepSeek API返回的JSON数据失败: {str(json_error)}"
+        else:
+            error_msg = f"通过DeepSeek获取酒店品牌和集团信息失败: {str(json_error)}"
+        
+        logging.error(error_msg, exc_info=True)
+        
         return {
             'code': 500,
             'success': False,

+ 117 - 112
app/core/data_parse/parse_task.py

@@ -87,9 +87,9 @@ def process_career_path(career_path, talent_node_id, talent_name_zh):
         
         # 在执行当前代码逻辑之前,清除传入节点的WORK_FOR和WORK_AS关系
         try:
-            from app.core.graph.graph_operations import connect_graph
+            from app.services.neo4j_driver import neo4j_driver
             
-            with connect_graph().session() as session:
+            with neo4j_driver.get_session() as session:
                 # 清除WORK_FOR关系
                 clear_work_for_query = """
                 MATCH (t:Talent)-[r:WORK_FOR]->(h:Hotel)
@@ -116,9 +116,9 @@ def process_career_path(career_path, talent_node_id, talent_name_zh):
         # 查询Neo4j图数据库中DataLabel标签中node_type为"brand"的所有节点
         brand_labels = []
         try:
-            from app.core.graph.graph_operations import connect_graph
+            from app.services.neo4j_driver import neo4j_driver
             
-            with connect_graph().session() as session:
+            with neo4j_driver.get_session() as session:
                 # 查询node_type为"brand"的DataLabel节点
                 brand_query = """
                 MATCH (n:DataLabel)
@@ -166,10 +166,10 @@ def process_career_path(career_path, talent_node_id, talent_name_zh):
                 
                 # 创建Hotel节点
                 try:
-                    from app.core.graph.graph_operations import connect_graph
+                    from app.services.neo4j_driver import neo4j_driver
                     
                     # 直接使用Cypher语句查找或创建Hotel节点
-                    with connect_graph().session() as session:
+                    with neo4j_driver.get_session() as session:
                         # 首先查找是否已存在相同hotel_zh的Hotel节点
                         find_query = """
                         MATCH (h:Hotel {hotel_zh: $hotel_zh})
@@ -199,7 +199,7 @@ def process_career_path(career_path, talent_node_id, talent_name_zh):
                                                       hotel_en=hotel_en,
                                                       create_time=current_time).single()
                             
-                            hotel_node_id = create_result['node_id']
+                            hotel_node_id = create_result['node_id'] if create_result else None
                             logging.info(f"成功创建新Hotel节点,Neo4j ID: {hotel_node_id}, 酒店: {hotel_zh}")
                             result['hotels_created'] += 1
                     
@@ -275,10 +275,10 @@ def process_career_path(career_path, talent_node_id, talent_name_zh):
                             if brand_name:
                                 # 查找对应的DataLabel节点
                                 try:
-                                    from app.core.graph.graph_operations import connect_graph
+                                    from app.services.neo4j_driver import neo4j_driver
                                     
                                     # 直接查询Neo4j查找name_zh等于品牌名称的DataLabel节点
-                                    with connect_graph().session() as session:
+                                    with neo4j_driver.get_session() as session:
                                         query = "MATCH (n:DataLabel {name_zh: $brand_name}) RETURN id(n) as node_id, n.name_zh as name_zh LIMIT 1"
                                         result_query = session.run(query, brand_name=brand_name).single()
                                         
@@ -287,9 +287,9 @@ def process_career_path(career_path, talent_node_id, talent_name_zh):
                                             
                                             # 创建Hotel节点与品牌标签的BELONGS_TO关系
                                             try:
-                                                from app.core.graph.graph_operations import connect_graph
+                                                from app.services.neo4j_driver import neo4j_driver
                                                 
-                                                with connect_graph().session() as session:
+                                                with neo4j_driver.get_session() as session:
                                                     # 检查Hotel节点与DataLabel节点之间是否已经存在BELONGS_TO关系
                                                     check_relationship_query = """
                                                     MATCH (h:Hotel)-[r:BELONGS_TO]->(d:DataLabel)
@@ -307,7 +307,7 @@ def process_career_path(career_path, talent_node_id, talent_name_zh):
                                                     else:
                                                         # 关系不存在,创建新的BELONGS_TO关系
                                                         # 直接使用Cypher创建关系,避免使用create_relationship函数
-                                                        with connect_graph().session() as session:
+                                                        with neo4j_driver.get_session() as session:
                                                             create_rel_query = """
                                                             MATCH (h:Hotel), (d:DataLabel)
                                                             WHERE id(h) = $hotel_node_id AND id(d) = $label_node_id
@@ -328,7 +328,7 @@ def process_career_path(career_path, talent_node_id, talent_name_zh):
                                                 logging.error(f"检查Hotel节点与品牌标签关系失败: {str(check_error)}")
                                                 result['errors'].append(f"检查关系失败: {hotel_zh} -> {brand_name}, 错误: {str(check_error)}")
                                                 # 即使检查失败,也尝试创建关系
-                                                with connect_graph().session() as session:
+                                                with neo4j_driver.get_session() as session:
                                                     create_rel_query = """
                                                     MATCH (h:Hotel), (d:DataLabel)
                                                     WHERE id(h) = $hotel_node_id AND id(d) = $label_node_id
@@ -382,9 +382,9 @@ def process_career_path(career_path, talent_node_id, talent_name_zh):
                         work_for_properties['date'] = date
                     
                     # 直接使用Cypher创建WORK_FOR关系,避免使用create_relationship函数
-                    from app.core.graph.graph_operations import connect_graph
+                    from app.services.neo4j_driver import neo4j_driver
                     
-                    with connect_graph().session() as session:
+                    with neo4j_driver.get_session() as session:
                         work_for_query = """
                         MATCH (t:Talent), (h:Hotel)
                         WHERE id(t) = $talent_node_id AND id(h) = $hotel_node_id
@@ -408,10 +408,10 @@ def process_career_path(career_path, talent_node_id, talent_name_zh):
                     try:
                         # 查找对应的DataLabel节点(职位标签)
                         try:
-                            from app.core.graph.graph_operations import connect_graph
+                            from app.services.neo4j_driver import neo4j_driver
                             
                             # 直接查询Neo4j查找name_zh等于title_zh的DataLabel节点
-                            with connect_graph().session() as session:
+                            with neo4j_driver.get_session() as session:
                                 query = "MATCH (n:DataLabel {name_zh: $title_zh}) RETURN id(n) as node_id, n.name_zh as name_zh LIMIT 1"
                                 result_query = session.run(query, title_zh=title_zh).single()
                                 
@@ -504,7 +504,7 @@ def create_or_get_talent_node(**properties):
         节点id
     """
     try:
-        from app.core.graph.graph_operations import connect_graph
+        from app.services.neo4j_driver import neo4j_driver
         
         # 检查是否提供了pg_id
         if 'pg_id' not in properties:
@@ -512,7 +512,7 @@ def create_or_get_talent_node(**properties):
         
         pg_id = properties['pg_id']
         
-        with connect_graph().session() as session:
+        with neo4j_driver.get_session() as session:
             # 检查节点是否存在(根据pg_id查找)
             query = """
             MATCH (n:Talent {pg_id: $pg_id})
@@ -524,14 +524,19 @@ def create_or_get_talent_node(**properties):
                 # 节点存在,更新属性
                 props_string = ", ".join([f"n.{key} = ${key}" for key in properties if key != 'pg_id'])
                 if props_string:
+                    # 使用字符串格式化来避免动态字符串构造
                     update_query = f"""
                     MATCH (n:Talent {{pg_id: $pg_id}})
                     SET {props_string}
                     RETURN id(n) as node_id
                     """
-                    result = session.run(update_query, **properties).single()
-                    logging.info(f"已更新现有Talent节点,pg_id: {pg_id}, Neo4j ID: {result['node_id']}")
-                    return result["node_id"]
+                    result = session.run(update_query, **properties).single()  # type: ignore
+                    if result:
+                        logging.info(f"已更新现有Talent节点,pg_id: {pg_id}, Neo4j ID: {result['node_id']}")
+                        return result["node_id"]
+                    else:
+                        logging.error(f"更新Talent节点失败,pg_id: {pg_id}")
+                        return None
                 else:
                     # 没有需要更新的属性,返回现有节点ID
                     existing_node_id = result['n'].id
@@ -544,9 +549,13 @@ def create_or_get_talent_node(**properties):
             CREATE (n:Talent {{{props_keys}}})
             RETURN id(n) as node_id
             """
-            result = session.run(create_query, **properties).single()
-            logging.info(f"已创建新Talent节点,pg_id: {pg_id}, Neo4j ID: {result['node_id']}")
-            return result["node_id"]
+            result = session.run(create_query, **properties).single()  # type: ignore
+            if result:
+                logging.info(f"已创建新Talent节点,pg_id: {pg_id}, Neo4j ID: {result['node_id']}")
+                return result["node_id"]
+            else:
+                logging.error(f"创建Talent节点失败,pg_id: {pg_id}")
+                return None
             
     except Exception as e:
         logging.error(f"Error in create_or_get_talent_node: {str(e)}")
@@ -760,17 +769,16 @@ def _handle_recruitment_task(created_by, data=None):
                 task_source.append(item)
         
         # 创建解析任务记录
-        parse_task = ParseTaskRepository(
-            task_name=task_name,
-            task_status='待解析',  # 招聘任务不需要实际解析操作,直接设置为成功
-            task_type='招聘',
-            task_source=task_source,
-            collection_count=len(task_source),  # 招聘任务的数据项数量
-            parse_count=0,
-            parse_result=None,
-            created_by=created_by,
-            updated_by=created_by
-        )
+        parse_task = ParseTaskRepository()
+        parse_task.task_name = task_name
+        parse_task.task_status = '待解析'  # 招聘任务不需要实际解析操作,直接设置为成功
+        parse_task.task_type = '招聘'
+        parse_task.task_source = task_source
+        parse_task.collection_count = len(task_source)  # 招聘任务的数据项数量
+        parse_task.parse_count = 0
+        parse_task.parse_result = None
+        parse_task.created_by = created_by
+        parse_task.updated_by = created_by
         
         db.session.add(parse_task)
         db.session.commit()
@@ -1036,17 +1044,16 @@ def add_parse_task(files, task_type, created_by='system', data=None, publish_tim
         
         # 创建解析任务记录
         try:
-            parse_task = ParseTaskRepository(
-                task_name=task_name,
-                task_status='待解析',
-                task_type=task_type,
-                task_source=task_source,
-                collection_count=len(uploaded_files),
-                parse_count=0,  # 解析数量初始为0
-                parse_result=None,  # 解析结果初始为空
-                created_by=created_by,
-                updated_by=created_by
-            )
+            parse_task = ParseTaskRepository()
+            parse_task.task_name = task_name
+            parse_task.task_status = '待解析'
+            parse_task.task_type = task_type
+            parse_task.task_source = task_source
+            parse_task.collection_count = len(uploaded_files)
+            parse_task.parse_count = 0  # 解析数量初始为0
+            parse_task.parse_result = None  # 解析结果初始为空
+            parse_task.created_by = created_by
+            parse_task.updated_by = created_by
             
             db.session.add(parse_task)
             db.session.commit()
@@ -1433,37 +1440,36 @@ def add_single_talent(talent_data, minio_path=None, task_type=None):
                     except (ValueError, TypeError):
                         age_value = None
                 
-                business_card = BusinessCard(
-                    name_zh=talent_data.get('name_zh', ''),
-                    name_en=talent_data.get('name_en', ''),
-                    title_zh=talent_data.get('title_zh', ''),
-                    title_en=talent_data.get('title_en', ''),
-                    mobile=normalize_mobile_numbers(talent_data.get('mobile', '')),
-                    phone=talent_data.get('phone', ''),
-                    email=talent_data.get('email', ''),
-                    hotel_zh=talent_data.get('hotel_zh', ''),
-                    hotel_en=talent_data.get('hotel_en', ''),
-                    address_zh=talent_data.get('address_zh', ''),
-                    address_en=talent_data.get('address_en', ''),
-                    postal_code_zh=talent_data.get('postal_code_zh', ''),
-                    postal_code_en=talent_data.get('postal_code_en', ''),
-                    brand_zh=talent_data.get('brand_zh', ''),
-                    brand_en=talent_data.get('brand_en', ''),
-                    affiliation_zh=talent_data.get('affiliation_zh', ''),
-                    affiliation_en=talent_data.get('affiliation_en', ''),
-                    birthday=datetime.strptime(talent_data.get('birthday'), '%Y-%m-%d').date() if talent_data.get('birthday') else None,
-                    age=age_value,
-                    native_place=talent_data.get('native_place', ''),
-                    gender=talent_data.get('gender', ''),  # 新增性别字段
-                    residence=talent_data.get('residence', ''),
-                    image_path=image_path,  # 从talent_data获取图片路径
-                    career_path=career_path,  # 直接使用talent_data中的career_path
-                    brand_group=talent_data.get('brand_group', ''),
-                    origin_source=_update_origin_source_with_minio_path(None, talent_data),
-                    talent_profile=talent_data.get('talent_profile', ''),
-                    status='active',
-                    updated_by='talent_system'
-                )
+                business_card = BusinessCard()
+                business_card.name_zh = talent_data.get('name_zh', '')
+                business_card.name_en = talent_data.get('name_en', '')
+                business_card.title_zh = talent_data.get('title_zh', '')
+                business_card.title_en = talent_data.get('title_en', '')
+                business_card.mobile = normalize_mobile_numbers(talent_data.get('mobile', ''))
+                business_card.phone = talent_data.get('phone', '')
+                business_card.email = talent_data.get('email', '')
+                business_card.hotel_zh = talent_data.get('hotel_zh', '')
+                business_card.hotel_en = talent_data.get('hotel_en', '')
+                business_card.address_zh = talent_data.get('address_zh', '')
+                business_card.address_en = talent_data.get('address_en', '')
+                business_card.postal_code_zh = talent_data.get('postal_code_zh', '')
+                business_card.postal_code_en = talent_data.get('postal_code_en', '')
+                business_card.brand_zh = talent_data.get('brand_zh', '')
+                business_card.brand_en = talent_data.get('brand_en', '')
+                business_card.affiliation_zh = talent_data.get('affiliation_zh', '')
+                business_card.affiliation_en = talent_data.get('affiliation_en', '')
+                business_card.birthday = datetime.strptime(talent_data.get('birthday'), '%Y-%m-%d').date() if talent_data.get('birthday') else None
+                business_card.age = age_value
+                business_card.native_place = talent_data.get('native_place', '')
+                business_card.gender = talent_data.get('gender', '')  # 新增性别字段
+                business_card.residence = talent_data.get('residence', '')
+                business_card.image_path = image_path  # 从talent_data获取图片路径
+                business_card.career_path = career_path  # 直接使用talent_data中的career_path
+                business_card.brand_group = talent_data.get('brand_group', '')
+                business_card.origin_source = _update_origin_source_with_minio_path(None, talent_data)
+                business_card.talent_profile = talent_data.get('talent_profile', '')
+                business_card.status = 'active'
+                business_card.updated_by = 'talent_system'
                 
                 db.session.add(business_card)
                 db.session.commit()
@@ -1824,40 +1830,39 @@ def record_parsed_talent(talent_data, task_id=None, task_type=None):
             }
         
         # 创建ParsedTalent记录
-        parsed_talent = ParsedTalent(
-            name_zh=_clean_field_value(talent_data.get('name_zh', ''), 'string'),
-            name_en=_clean_field_value(talent_data.get('name_en', ''), 'string'),
-            title_zh=_clean_field_value(talent_data.get('title_zh', ''), 'string'),
-            title_en=_clean_field_value(talent_data.get('title_en', ''), 'string'),
-            mobile=_clean_field_value(talent_data.get('mobile', ''), 'string'),
-            phone=_clean_field_value(talent_data.get('phone', ''), 'string'),
-            email=_clean_field_value(talent_data.get('email', ''), 'string'),
-            hotel_zh=_clean_field_value(talent_data.get('hotel_zh', ''), 'string'),
-            hotel_en=_clean_field_value(talent_data.get('hotel_en', ''), 'string'),
-            address_zh=_clean_field_value(talent_data.get('address_zh', ''), 'string'),
-            address_en=_clean_field_value(talent_data.get('address_en', ''), 'string'),
-            postal_code_zh=_clean_field_value(talent_data.get('postal_code_zh', ''), 'string'),
-            postal_code_en=_clean_field_value(talent_data.get('postal_code_en', ''), 'string'),
-            brand_zh=_clean_field_value(talent_data.get('brand_zh', ''), 'string'),
-            brand_en=_clean_field_value(talent_data.get('brand_en', ''), 'string'),
-            affiliation_zh=_clean_field_value(talent_data.get('affiliation_zh', ''), 'string'),
-            affiliation_en=_clean_field_value(talent_data.get('affiliation_en', ''), 'string'),
-            image_path=_clean_field_value(talent_data.get('image_path', ''), 'string'),
-            career_path=talent_data.get('career_path', []),
-            brand_group=_clean_field_value(talent_data.get('brand_group', ''), 'string'),
-            birthday=_clean_field_value(talent_data.get('birthday'), 'date'),
-            residence=_clean_field_value(talent_data.get('residence', ''), 'string'),
-            age=_clean_field_value(talent_data.get('age'), 'int'),
-            native_place=_clean_field_value(talent_data.get('native_place', ''), 'string'),
-            gender=_clean_field_value(talent_data.get('gender', ''), 'string'),  # 新增性别字段
-            origin_source=talent_data.get('origin_source', []),
-            talent_profile=_clean_field_value(talent_data.get('talent_profile', ''), 'string'),
-            task_id=str(task_id) if task_id else '',
-            task_type=task_type or '',
-            status='待审核',  # 统一设置为待审核状态
-            created_at=get_east_asia_time_naive(),
-            updated_by='system'
-        )
+        parsed_talent = ParsedTalent()
+        parsed_talent.name_zh = _clean_field_value(talent_data.get('name_zh', ''), 'string')
+        parsed_talent.name_en = _clean_field_value(talent_data.get('name_en', ''), 'string')
+        parsed_talent.title_zh = _clean_field_value(talent_data.get('title_zh', ''), 'string')
+        parsed_talent.title_en = _clean_field_value(talent_data.get('title_en', ''), 'string')
+        parsed_talent.mobile = _clean_field_value(talent_data.get('mobile', ''), 'string')
+        parsed_talent.phone = _clean_field_value(talent_data.get('phone', ''), 'string')
+        parsed_talent.email = _clean_field_value(talent_data.get('email', ''), 'string')
+        parsed_talent.hotel_zh = _clean_field_value(talent_data.get('hotel_zh', ''), 'string')
+        parsed_talent.hotel_en = _clean_field_value(talent_data.get('hotel_en', ''), 'string')
+        parsed_talent.address_zh = _clean_field_value(talent_data.get('address_zh', ''), 'string')
+        parsed_talent.address_en = _clean_field_value(talent_data.get('address_en', ''), 'string')
+        parsed_talent.postal_code_zh = _clean_field_value(talent_data.get('postal_code_zh', ''), 'string')
+        parsed_talent.postal_code_en = _clean_field_value(talent_data.get('postal_code_en', ''), 'string')
+        parsed_talent.brand_zh = _clean_field_value(talent_data.get('brand_zh', ''), 'string')
+        parsed_talent.brand_en = _clean_field_value(talent_data.get('brand_en', ''), 'string')
+        parsed_talent.affiliation_zh = _clean_field_value(talent_data.get('affiliation_zh', ''), 'string')
+        parsed_talent.affiliation_en = _clean_field_value(talent_data.get('affiliation_en', ''), 'string')
+        parsed_talent.image_path = _clean_field_value(talent_data.get('image_path', ''), 'string')
+        parsed_talent.career_path = talent_data.get('career_path', [])
+        parsed_talent.brand_group = _clean_field_value(talent_data.get('brand_group', ''), 'string')
+        parsed_talent.birthday = _clean_field_value(talent_data.get('birthday'), 'date')
+        parsed_talent.residence = _clean_field_value(talent_data.get('residence', ''), 'string')
+        parsed_talent.age = _clean_field_value(talent_data.get('age'), 'int')
+        parsed_talent.native_place = _clean_field_value(talent_data.get('native_place', ''), 'string')
+        parsed_talent.gender = _clean_field_value(talent_data.get('gender', ''), 'string')  # 新增性别字段
+        parsed_talent.origin_source = talent_data.get('origin_source', [])
+        parsed_talent.talent_profile = _clean_field_value(talent_data.get('talent_profile', ''), 'string')
+        parsed_talent.task_id = str(task_id) if task_id else ''
+        parsed_talent.task_type = task_type or ''
+        parsed_talent.status = '待审核'  # 统一设置为待审核状态
+        parsed_talent.created_at = get_east_asia_time_naive()
+        parsed_talent.updated_by = 'system'
         
         # 添加到数据库会话并提交
         db.session.add(parsed_talent)

+ 64 - 31
app/core/data_parse/parse_web.py

@@ -528,37 +528,36 @@ def process_single_talent_card(talent_data, minio_md_path):
                 except (ValueError, TypeError):
                     age_value = None
             
-            business_card = BusinessCard(
-                name_zh=talent_data.get('name_zh', ''),
-                name_en=talent_data.get('name_en', ''),
-                title_zh=talent_data.get('title_zh', ''),
-                title_en=talent_data.get('title_en', ''),
-                mobile=normalize_mobile_numbers(talent_data.get('mobile', '')),
-                phone=talent_data.get('phone', ''),
-                email=talent_data.get('email', ''),
-                hotel_zh=talent_data.get('hotel_zh', ''),
-                hotel_en=talent_data.get('hotel_en', ''),
-                address_zh=talent_data.get('address_zh', ''),
-                address_en=talent_data.get('address_en', ''),
-                postal_code_zh=talent_data.get('postal_code_zh', ''),
-                postal_code_en=talent_data.get('postal_code_en', ''),
-                brand_zh=talent_data.get('brand_zh', ''),
-                brand_en=talent_data.get('brand_en', ''),
-                affiliation_zh=talent_data.get('affiliation_zh', ''),
-                affiliation_en=talent_data.get('affiliation_en', ''),
-                birthday=datetime.strptime(talent_data.get('birthday'), '%Y-%m-%d').date() if talent_data.get('birthday') else None,
-                age=age_value,
-                native_place=talent_data.get('native_place', ''),
-                gender=talent_data.get('gender', ''),  # 新增性别字段
-                residence=talent_data.get('residence', ''),
-                image_path=image_path,  # 使用下载的图片路径
-                career_path=initial_career_path,
-                brand_group=talent_data.get('brand_group', ''),
-                origin_source=[create_origin_source_entry('webpage_talent', minio_md_path)],
-                talent_profile=talent_data.get('talent_profile', ''),  # 人才档案
-                status='active',
-                updated_by='webpage_talent_system'
-            )
+            business_card = BusinessCard()
+            business_card.name_zh = talent_data.get('name_zh', '')
+            business_card.name_en = talent_data.get('name_en', '')
+            business_card.title_zh = talent_data.get('title_zh', '')
+            business_card.title_en = talent_data.get('title_en', '')
+            business_card.mobile = normalize_mobile_numbers(talent_data.get('mobile', ''))
+            business_card.phone = talent_data.get('phone', '')
+            business_card.email = talent_data.get('email', '')
+            business_card.hotel_zh = talent_data.get('hotel_zh', '')
+            business_card.hotel_en = talent_data.get('hotel_en', '')
+            business_card.address_zh = talent_data.get('address_zh', '')
+            business_card.address_en = talent_data.get('address_en', '')
+            business_card.postal_code_zh = talent_data.get('postal_code_zh', '')
+            business_card.postal_code_en = talent_data.get('postal_code_en', '')
+            business_card.brand_zh = talent_data.get('brand_zh', '')
+            business_card.brand_en = talent_data.get('brand_en', '')
+            business_card.affiliation_zh = talent_data.get('affiliation_zh', '')
+            business_card.affiliation_en = talent_data.get('affiliation_en', '')
+            business_card.birthday = datetime.strptime(talent_data.get('birthday'), '%Y-%m-%d').date() if talent_data.get('birthday') else None
+            business_card.age = age_value
+            business_card.native_place = talent_data.get('native_place', '')
+            business_card.gender = talent_data.get('gender', '')  # 新增性别字段
+            business_card.residence = talent_data.get('residence', '')
+            business_card.image_path = image_path  # 使用下载的图片路径
+            business_card.career_path = initial_career_path
+            business_card.brand_group = talent_data.get('brand_group', '')
+            business_card.origin_source = [create_origin_source_entry('webpage_talent', minio_md_path)]
+            business_card.talent_profile = talent_data.get('talent_profile', '')  # 人才档案
+            business_card.status = 'active'
+            business_card.updated_by = 'webpage_talent_system'
             
             db.session.add(business_card)
             db.session.commit()
@@ -1307,6 +1306,40 @@ def process_single_markdown_file(minio_path, publish_time, task_id=None, task_ty
                             from app.core.data_parse.parse_task import record_parsed_talent
                             standardized_data = _convert_webpage_to_card_format(person, publish_time)
                             
+                            # 调用get_brand_group_by_hotel获取品牌和集团信息
+                            if standardized_data.get('hotel_zh'):
+                                try:
+                                    from app.core.data_parse.parse_system import get_brand_group_by_hotel
+                                    brand_result = get_brand_group_by_hotel(standardized_data['hotel_zh'])
+                                    if brand_result.get('success') and brand_result.get('data'):
+                                        brand_data = brand_result['data']
+                                        # 赋值品牌和集团信息
+                                        standardized_data['brand_zh'] = brand_data.get('brand_name_zh', '')
+                                        standardized_data['brand_en'] = brand_data.get('brand_name_en', '')
+                                        standardized_data['affiliation_zh'] = brand_data.get('group_name_zh', '')
+                                        standardized_data['affiliation_en'] = brand_data.get('group_name_en', '')
+                                        logging.info(f"成功获取品牌和集团信息: {brand_data}")
+                                    else:
+                                        logging.warning(f"获取品牌信息失败: {brand_result.get('message', '')}")
+                                        # 设置默认值
+                                        standardized_data['brand_zh'] = ''
+                                        standardized_data['brand_en'] = ''
+                                        standardized_data['affiliation_zh'] = ''
+                                        standardized_data['affiliation_en'] = ''
+                                except Exception as brand_error:
+                                    logging.error(f"调用get_brand_group_by_hotel失败: {str(brand_error)}")
+                                    # 设置默认值
+                                    standardized_data['brand_zh'] = ''
+                                    standardized_data['brand_en'] = ''
+                                    standardized_data['affiliation_zh'] = ''
+                                    standardized_data['affiliation_en'] = ''
+                            else:
+                                # 没有酒店信息,设置默认值
+                                standardized_data['brand_zh'] = ''
+                                standardized_data['brand_en'] = ''
+                                standardized_data['affiliation_zh'] = ''
+                                standardized_data['affiliation_en'] = ''
+                            
                             # 在记录到parsed_talents表之前,设置image_path和origin_source
                             standardized_data['image_path'] = minio_path