Explorar el Código

修改创建人才记录的错误,记录原始数据来源,记录任务类型

maxiaolong hace 3 semanas
padre
commit
a6542af931

+ 1 - 1
app/core/data_parse/parse_card.py

@@ -484,7 +484,7 @@ def delete_business_card(card_id):
         try:
             # 构建删除talent节点及其所有关系的Cypher查询
             delete_talent_query = """
-            MATCH (t:talent)
+            MATCH (t:Talent)
             WHERE t.pg_id = $pg_id
             OPTIONAL MATCH (t)-[r]-()
             DELETE r, t

+ 5 - 5
app/core/data_parse/parse_system.py

@@ -1563,7 +1563,7 @@ def query_neo4j_graph(query_requirement):
         假设匹配的标签是 ['五星级酒店', '新开酒店经验', '总经理']
         
         生成的Cypher查询语句应该是:
-        MATCH (t:talent)-[:BELONGS_TO]->(dl:DataLabel)  
+        MATCH (t:Talent)-[:BELONGS_TO]->(dl:DataLabel)  
         WHERE dl.name IN ['五星级酒店', '新开酒店经验', '总经理']  
         WITH t, COLLECT(DISTINCT dl.name) AS labels  
         WHERE size(labels) = 3  
@@ -1666,7 +1666,7 @@ def talent_get_tags(talent_id):
         
         # 构建Cypher查询语句,获取人才节点关联的标签
         cypher_query = """
-        MATCH (t:talent)-[r:BELONGS_TO]->(tag:DataLabel)
+        MATCH (t:Talent)-[r:BELONGS_TO]->(tag:DataLabel)
         WHERE t.pg_id = $talent_id
         RETURN t.pg_id as talent_id, tag.name as tag_name
         """
@@ -1771,7 +1771,7 @@ def talent_update_tags(data):
             for talent_id, tags in talent_tags.items():
                 # 首先验证talent节点是否存在
                 check_talent_query = """
-                MATCH (t:talent) 
+                MATCH (t:Talent) 
                 WHERE t.pg_id = $talent_id
                 RETURN t
                 """
@@ -1784,7 +1784,7 @@ def talent_update_tags(data):
                 
                 # 首先清除所有现有的BELONGS_TO关系
                 clear_relations_query = """
-                MATCH (t:talent)-[r:BELONGS_TO]->(:DataLabel)
+                MATCH (t:Talent)-[r:BELONGS_TO]->(:DataLabel)
                 WHERE t.pg_id = $talent_id
                 DELETE r
                 RETURN count(r) as deleted_count
@@ -1825,7 +1825,7 @@ def talent_update_tags(data):
                         
                         # 2. 创建人才与标签的BELONGS_TO关系
                         create_relation_query = """
-                        MATCH (t:talent), (tag:DataLabel)
+                        MATCH (t:Talent), (tag:DataLabel)
                         WHERE t.pg_id = $talent_id AND tag.name = $tag_name
                         CREATE (t)-[r:BELONGS_TO]->(tag)
                         SET r.created_at = $current_time

+ 44 - 21
app/core/data_parse/parse_task.py

@@ -626,13 +626,14 @@ def add_parse_task(files, task_type, created_by='system', data=None, publish_tim
         }
 
 
-def _update_origin_source_with_minio_path(existing_origin_source, minio_path):
+def _update_origin_source_with_minio_path(existing_origin_source, minio_path, task_type=None):
     """
-    更新origin_source字段,将minio_path添加到JSON数组中
+    更新origin_source字段,将minio_path和task_type组成的JSON对象添加到JSON数组中
     
     Args:
         existing_origin_source: 现有的origin_source内容
         minio_path: 要添加的minio_path
+        task_type: 任务类型
         
     Returns:
         str: 更新后的origin_source JSON字符串
@@ -640,10 +641,6 @@ def _update_origin_source_with_minio_path(existing_origin_source, minio_path):
     import json
     
     try:
-        # 如果minio_path为空,直接返回现有的origin_source
-        if not minio_path:
-            return existing_origin_source
-        
         # 解析现有的origin_source
         if existing_origin_source:
             try:
@@ -656,9 +653,23 @@ def _update_origin_source_with_minio_path(existing_origin_source, minio_path):
         else:
             origin_list = []
         
-        # 添加新的minio_path(如果不存在)
-        if minio_path not in origin_list:
-            origin_list.append(minio_path)
+        # 如果minio_path不为空,则创建新的JSON对象并添加到列表中
+        if minio_path:
+            # 创建新的JSON对象,格式为 {task_type: "任务类型", minio_path: "路径"}
+            new_entry = {
+                "task_type": task_type if task_type else "",
+                "minio_path": minio_path
+            }
+            
+            # 检查是否已存在相同的条目
+            entry_exists = False
+            for existing_entry in origin_list:
+                if isinstance(existing_entry, dict) and existing_entry.get('minio_path') == minio_path:
+                    entry_exists = True
+                    break
+            
+            if not entry_exists:
+                origin_list.append(new_entry)
         
         # 返回JSON字符串
         return json.dumps(origin_list, ensure_ascii=False)
@@ -669,12 +680,14 @@ def _update_origin_source_with_minio_path(existing_origin_source, minio_path):
         return existing_origin_source
 
 
-def add_single_talent(talent_data):
+def add_single_talent(talent_data, minio_path=None, task_type=None):
     """
     添加单个人才记录(基于add_business_card逻辑,去除MinIO图片上传)
     
     Args:
         talent_data (dict): 人才信息数据
+        minio_path (str, optional): MinIO路径,用于更新origin_source字段
+        task_type (str, optional): 任务类型,用于更新origin_source字段
         
     Returns:
         dict: 处理结果,包含保存的信息和状态
@@ -768,8 +781,9 @@ def add_single_talent(talent_data):
                 # 更新image_path字段,从talent_data中获取
                 existing_card.image_path = talent_data.get('image_path', existing_card.image_path)
                 # 更新origin_source字段,将minio_path添加到JSON数组中
-                minio_path = talent_data.get('minio_path', '')
-                existing_card.origin_source = _update_origin_source_with_minio_path(existing_card.origin_source, minio_path)
+                # 只有当minio_path不为空时才更新origin_source
+                if minio_path:
+                    existing_card.origin_source = _update_origin_source_with_minio_path(existing_card.origin_source, minio_path, task_type)
                 existing_card.talent_profile = talent_data.get('talent_profile', existing_card.talent_profile)
                 existing_card.updated_by = 'talent_system'
                 
@@ -785,7 +799,6 @@ def add_single_talent(talent_data):
                 # 在Neo4j图数据库中更新Talent节点
                 try:
                     from app.core.graph.graph_operations import create_or_get_node
-                    from datetime import datetime
                     
                     # 创建Talent节点属性
                     talent_properties = {
@@ -823,14 +836,14 @@ def add_single_talent(talent_data):
                 )
                 
                 # 更新origin_source字段,将minio_path添加到JSON数组中
-                minio_path = talent_data.get('minio_path', '')
-                main_card.origin_source = _update_origin_source_with_minio_path(main_card.origin_source, minio_path)
+                # 只有当minio_path不为空时才更新origin_source
+                if minio_path:
+                    main_card.origin_source = _update_origin_source_with_minio_path(main_card.origin_source, minio_path, task_type)
                 db.session.commit()  # 提交origin_source的更新
                 
                 # 在Neo4j图数据库中创建Talent节点
                 try:
                     from app.core.graph.graph_operations import create_or_get_node
-                    from datetime import datetime
                     
                     # 创建Talent节点属性
                     talent_properties = {
@@ -917,7 +930,7 @@ def add_single_talent(talent_data):
                     image_path=image_path,  # 从talent_data获取图片路径
                     career_path=initial_career_path,
                     brand_group=talent_data.get('brand_group', ''),
-                    origin_source=json.dumps([talent_data.get('minio_path', '')], ensure_ascii=False) if talent_data.get('minio_path') else None,
+                    origin_source=_update_origin_source_with_minio_path(None, minio_path, task_type) if minio_path else None,
                     talent_profile=talent_data.get('talent_profile', ''),
                     status='active',
                     updated_by='talent_system'
@@ -931,7 +944,6 @@ def add_single_talent(talent_data):
                 # 在Neo4j图数据库中创建Talent节点
                 try:
                     from app.core.graph.graph_operations import create_or_get_node
-                    from datetime import datetime
                     
                     # 创建Talent节点属性
                     talent_properties = {
@@ -1031,7 +1043,10 @@ def add_parsed_talents(api_response_data):
                 'data': None
             }
         
-        logging.info(f"开始处理人才数据,共 {len(results)} 条记录")
+        # 从api_response_data中提取task_type
+        task_type = api_response_data.get('task_type', '')
+        
+        logging.info(f"开始处理人才数据,共 {len(results)} 条记录,任务类型: {task_type}")
         
         processed_results = []
         success_count = 0
@@ -1069,13 +1084,21 @@ def add_parsed_talents(api_response_data):
                     logging.warning(f"第 {i+1} 条记录缺少data字段")
                     continue
                 
-                # 处理人才数据 - 新格式直接使用 item_data
+                # 从结果项中提取人才数据和图片保存地址
                 talent_data = item_data
                 
+                # 根据task_type决定如何提取minio_path
+                if task_type == "招聘":
+                    # 如果task_type为"招聘",从result_item中提取id字段的值
+                    minio_path = str(result_item.get('id', ''))
+                else:
+                    # 其他情况,从result_item中提取minio_path字段
+                    minio_path = result_item.get('minio_path', '')
+                
                 # 处理单个人才数据
                 if talent_data and isinstance(talent_data, dict):
                     try:
-                        talent_result = add_single_talent(talent_data)
+                        talent_result = add_single_talent(talent_data, minio_path, task_type)
                         if talent_result.get('success', False):
                             success_count += 1
                             processed_results.append({

+ 124 - 0
创建parsed_talents.txt

@@ -0,0 +1,124 @@
+create table public.parsed_talents
+(
+    id             serial
+        primary key,
+    name_zh        varchar(100)                          not null,
+    name_en        varchar(100),
+    title_zh       varchar(100),
+    title_en       varchar(100),
+    mobile         varchar(50),
+    phone          varchar(50),
+    email          varchar(100),
+    hotel_zh       varchar(200),
+    hotel_en       varchar(200),
+    address_zh     text,
+    address_en     text,
+    postal_code_zh varchar(20),
+    postal_code_en varchar(20),
+    brand_zh       varchar(100),
+    brand_en       varchar(100),
+    affiliation_zh varchar(200),
+    affiliation_en varchar(200),
+    image_path     varchar(255),
+    career_path    jsonb,
+    brand_group    varchar(200),
+    created_at     timestamp   default CURRENT_TIMESTAMP not null,
+    updated_at     timestamp,
+    updated_by     varchar(50),
+    status         varchar(20) default 'active'::character varying,
+    birthday       date,
+    residence      text,
+    age            integer,
+    native_place   text,
+    origin_source  json,
+    talent_profile text,
+    task_id varchar(50),
+    task_type varchar(20)
+);
+
+comment on table public.parsed_talents is '名片信息表';
+
+comment on column public.parsed_talents.id is '自增主键';
+
+comment on column public.parsed_talents.name_zh is '中文姓名';
+
+comment on column public.parsed_talents.name_en is '英文姓名';
+
+comment on column public.parsed_talents.title_zh is '中文头衔';
+
+comment on column public.parsed_talents.title_en is '英文头衔';
+
+comment on column public.parsed_talents.mobile is '手机号码';
+
+comment on column public.parsed_talents.phone is '固定电话';
+
+comment on column public.parsed_talents.email is '电子邮箱';
+
+comment on column public.parsed_talents.hotel_zh is '中文酒店名称';
+
+comment on column public.parsed_talents.hotel_en is '英文酒店名称';
+
+comment on column public.parsed_talents.address_zh is '中文详细地址';
+
+comment on column public.parsed_talents.address_en is '英文详细地址';
+
+comment on column public.parsed_talents.postal_code_zh is '中文邮政编码';
+
+comment on column public.parsed_talents.postal_code_en is '英文邮政编码';
+
+comment on column public.parsed_talents.brand_zh is '中文品牌名称';
+
+comment on column public.parsed_talents.brand_en is '英文品牌名称';
+
+comment on column public.parsed_talents.affiliation_zh is '中文隶属关系';
+
+comment on column public.parsed_talents.affiliation_en is '英文隶属关系';
+
+comment on column public.parsed_talents.image_path is 'MinIO中的图片路径';
+
+comment on column public.parsed_talents.career_path is '职业轨迹,JSON格式';
+
+comment on column public.parsed_talents.brand_group is '品牌组合';
+
+comment on column public.parsed_talents.created_at is '创建时间';
+
+comment on column public.parsed_talents.updated_at is '修改时间';
+
+comment on column public.parsed_talents.updated_by is '修改人';
+
+comment on column public.parsed_talents.status is '状态';
+
+comment on column public.parsed_talents.birthday is '生日';
+
+comment on column public.parsed_talents.residence is '居住地';
+
+comment on column public.parsed_talents.age is '年龄字段 - 存储人员年龄信息,取值范围1-150';
+
+comment on column public.parsed_talents.native_place is '籍贯字段 - 存储人员籍贯或出生地信息';
+
+comment on column public.parsed_talents.origin_source is '原始资料记录字段 - 采用JSON格式保存原始资料信息,包括数据来源、MinIO路径等';
+
+comment on column public.parsed_talents.talent_profile is '人才的情况简介';
+
+comment on column public.parsed_talents.task_id is '解析任务ID';
+
+comment on column public.parsed_talents.task_type is '解析任务类型';
+
+alter table public.parsed_talents
+    owner to postgres;
+
+create index idx_parsed_talents_name_zh
+    on public.parsed_talents (name_zh);
+
+create index idx_parsed_talents_name_en
+    on public.parsed_talents (name_en);
+
+create index idx_parsed_talents_hotel_zh
+    on public.parsed_talents (hotel_zh);
+
+create index idx_parsed_talents_created_at
+    on public.parsed_talents (created_at);
+
+create index idx_parsed_talents_status
+    on public.parsed_talents (status);
+