|
@@ -19,6 +19,7 @@ import time # 添加导入时间模块
|
|
|
|
|
|
# 导入Neo4j相关函数
|
|
|
from app.core.data_parse.parse_task import create_or_get_talent_node, process_career_path
|
|
|
+from app.core.data_parse.time_utils import get_east_asia_time_naive
|
|
|
|
|
|
# 名片解析数据模型
|
|
|
class BusinessCard(db.Model):
|
|
@@ -45,14 +46,15 @@ class BusinessCard(db.Model):
|
|
|
birthday = db.Column(db.Date) # 生日,存储年月日
|
|
|
age = db.Column(db.Integer) # 年龄字段
|
|
|
native_place = db.Column(db.Text) # 籍贯字段
|
|
|
+ gender = db.Column(db.String(10)) # 新增性别字段
|
|
|
residence = db.Column(db.Text) # 居住地
|
|
|
image_path = db.Column(db.String(255)) # MinIO中存储的路径
|
|
|
career_path = db.Column(db.JSON) # 职业轨迹,JSON格式
|
|
|
brand_group = db.Column(db.String(200)) # 品牌组合
|
|
|
origin_source = db.Column(db.JSON) # 原始资料记录,JSON格式
|
|
|
talent_profile = db.Column(db.Text) # 人才档案,文本格式
|
|
|
- created_at = db.Column(db.DateTime, default=datetime.now, nullable=False)
|
|
|
- updated_at = db.Column(db.DateTime, onupdate=datetime.now)
|
|
|
+ created_at = db.Column(db.DateTime, default=get_east_asia_time_naive, nullable=False)
|
|
|
+ updated_at = db.Column(db.DateTime, onupdate=get_east_asia_time_naive)
|
|
|
updated_by = db.Column(db.String(50))
|
|
|
status = db.Column(db.String(20), default='active')
|
|
|
|
|
@@ -79,6 +81,7 @@ class BusinessCard(db.Model):
|
|
|
'birthday': self.birthday.strftime('%Y-%m-%d') if self.birthday else None,
|
|
|
'age': self.age,
|
|
|
'native_place': self.native_place,
|
|
|
+ 'gender': self.gender, # 新增性别字段
|
|
|
'residence': self.residence,
|
|
|
'image_path': self.image_path,
|
|
|
'career_path': self.career_path,
|
|
@@ -117,14 +120,15 @@ class ParsedTalent(db.Model):
|
|
|
image_path = db.Column(db.String(255))
|
|
|
career_path = db.Column(db.JSON)
|
|
|
brand_group = db.Column(db.String(200))
|
|
|
- created_at = db.Column(db.DateTime, default=datetime.now, nullable=False)
|
|
|
- updated_at = db.Column(db.DateTime, onupdate=datetime.now)
|
|
|
+ created_at = db.Column(db.DateTime, default=get_east_asia_time_naive, nullable=False)
|
|
|
+ updated_at = db.Column(db.DateTime, onupdate=get_east_asia_time_naive)
|
|
|
updated_by = db.Column(db.String(50))
|
|
|
status = db.Column(db.String(20), default='active')
|
|
|
birthday = db.Column(db.Date)
|
|
|
residence = db.Column(db.Text)
|
|
|
age = db.Column(db.Integer)
|
|
|
native_place = db.Column(db.Text)
|
|
|
+ gender = db.Column(db.String(10)) # 新增性别字段
|
|
|
origin_source = db.Column(db.JSON)
|
|
|
talent_profile = db.Column(db.Text)
|
|
|
task_id = db.Column(db.String(50))
|
|
@@ -161,6 +165,7 @@ class ParsedTalent(db.Model):
|
|
|
'residence': self.residence,
|
|
|
'age': self.age,
|
|
|
'native_place': self.native_place,
|
|
|
+ 'gender': self.gender, # 新增性别字段
|
|
|
'origin_source': self.origin_source,
|
|
|
'talent_profile': self.talent_profile,
|
|
|
'task_id': self.task_id,
|
|
@@ -177,7 +182,7 @@ class DuplicateBusinessCard(db.Model):
|
|
|
suspected_duplicates = db.Column(db.JSON, nullable=False) # 疑似重复记录列表,JSON格式
|
|
|
duplicate_reason = db.Column(db.String(200), nullable=False) # 重复原因
|
|
|
processing_status = db.Column(db.String(20), default='pending') # 处理状态:pending/processed/ignored
|
|
|
- created_at = db.Column(db.DateTime, default=datetime.now, nullable=False)
|
|
|
+ created_at = db.Column(db.DateTime, default=get_east_asia_time_naive, nullable=False)
|
|
|
processed_at = db.Column(db.DateTime) # 处理时间
|
|
|
processed_by = db.Column(db.String(50)) # 处理人
|
|
|
processing_notes = db.Column(db.Text) # 处理备注
|
|
@@ -518,6 +523,7 @@ def create_main_card_with_duplicates(extracted_data, minio_path, suspected_dupli
|
|
|
affiliation_zh=extracted_data.get('affiliation_zh', ''),
|
|
|
affiliation_en=extracted_data.get('affiliation_en', ''),
|
|
|
brand_group=extracted_data.get('brand_group', ''),
|
|
|
+ gender=extracted_data.get('gender', ''), # 新增性别字段
|
|
|
image_path=minio_path,
|
|
|
career_path=career_path,
|
|
|
origin_source=[create_origin_source_entry(task_type, minio_path)],
|
|
@@ -622,8 +628,9 @@ def get_business_cards():
|
|
|
from app.services.neo4j_driver import neo4j_driver
|
|
|
|
|
|
# 构建批量查询的Cypher语句,获取所有Talent节点的关系数量
|
|
|
+ # 只查询BELONGS_TO和WORK_AS这两种关系
|
|
|
cypher_query = """
|
|
|
- MATCH (t:Talent)-[r]-()
|
|
|
+ MATCH (t:Talent)-[r:BELONGS_TO|WORK_AS]-()
|
|
|
WHERE t.pg_id IS NOT NULL
|
|
|
RETURN t.pg_id as pg_id, count(r) as relation_count
|
|
|
"""
|
|
@@ -738,7 +745,7 @@ def update_business_card(card_id, data):
|
|
|
updatable_fields = ['name_zh', 'name_en', 'title_zh', 'title_en', 'mobile', 'phone', 'email',
|
|
|
'hotel_zh', 'hotel_en', 'address_zh', 'address_en', 'postal_code_zh', 'postal_code_en',
|
|
|
'brand_zh', 'brand_en', 'affiliation_zh', 'affiliation_en', 'career_path', 'brand_group',
|
|
|
- 'birthday', 'residence', 'age', 'native_place', 'talent_profile']
|
|
|
+ 'birthday', 'residence', 'age', 'native_place', 'gender', 'talent_profile']
|
|
|
|
|
|
for field in updatable_fields:
|
|
|
if field in data and data[field] is not None:
|
|
@@ -1297,7 +1304,7 @@ def create_talent_tag(tag_data):
|
|
|
|
|
|
Args:
|
|
|
tag_data: 包含标签信息的字典,包括:
|
|
|
- - name: 标签名称
|
|
|
+ - name_zh: 标签名称
|
|
|
- category: 标签分类
|
|
|
- description: 标签描述
|
|
|
- status: 启用状态
|
|
@@ -1309,7 +1316,7 @@ def create_talent_tag(tag_data):
|
|
|
from app.services.neo4j_driver import neo4j_driver
|
|
|
|
|
|
# 验证必要参数存在
|
|
|
- if not tag_data or 'name' not in tag_data or not tag_data['name']:
|
|
|
+ if not tag_data or 'name_zh' not in tag_data or not tag_data['name_zh']:
|
|
|
return {
|
|
|
'code': 400,
|
|
|
'success': False,
|
|
@@ -1319,7 +1326,7 @@ def create_talent_tag(tag_data):
|
|
|
|
|
|
# 准备节点属性
|
|
|
tag_properties = {
|
|
|
- 'name': tag_data.get('name'),
|
|
|
+ 'name_zh': tag_data.get('name_zh'),
|
|
|
'category': tag_data.get('category', '未分类'),
|
|
|
'describe': tag_data.get('description', ''), # 使用describe与现有系统保持一致
|
|
|
'status': tag_data.get('status', 'active'),
|
|
@@ -1330,14 +1337,14 @@ def create_talent_tag(tag_data):
|
|
|
from app.core.graph.graph_operations import create_or_get_node
|
|
|
|
|
|
# 如果提供了名称,尝试获取英文翻译
|
|
|
- if 'name' in tag_data and tag_data['name']:
|
|
|
+ if 'name_zh' in tag_data and tag_data['name_zh']:
|
|
|
try:
|
|
|
from app.api.data_interface.routes import translate_and_parse
|
|
|
- en_name = translate_and_parse(tag_data['name'])
|
|
|
- tag_properties['en_name'] = en_name[0] if en_name and isinstance(en_name, list) else ''
|
|
|
+ en_name = translate_and_parse(tag_data['name_zh'])
|
|
|
+ tag_properties['name_en'] = en_name[0] if en_name and isinstance(en_name, list) else ''
|
|
|
except Exception as e:
|
|
|
logging.warning(f"获取标签英文名失败: {str(e)}")
|
|
|
- tag_properties['en_name'] = ''
|
|
|
+ tag_properties['name_en'] = ''
|
|
|
|
|
|
# 创建节点
|
|
|
node_id = create_or_get_node('DataLabel', **tag_properties)
|
|
@@ -1433,7 +1440,7 @@ def update_talent_tag(tag_id, tag_data):
|
|
|
Args:
|
|
|
tag_id: 标签节点ID
|
|
|
tag_data: 包含更新信息的字典,可能包括:
|
|
|
- - name: 标签名称
|
|
|
+ - name_zh: 标签名称
|
|
|
- category: 标签分类
|
|
|
- description: 标签描述
|
|
|
- status: 启用状态
|
|
@@ -1448,14 +1455,14 @@ def update_talent_tag(tag_id, tag_data):
|
|
|
update_properties = {}
|
|
|
|
|
|
# 检查并添加需要更新的属性
|
|
|
- if 'name' in tag_data and tag_data['name']:
|
|
|
- update_properties['name'] = tag_data['name']
|
|
|
+ if 'name_zh' in tag_data and tag_data['name_zh']:
|
|
|
+ update_properties['name_zh'] = tag_data['name_zh']
|
|
|
|
|
|
# 如果名称更新了,尝试更新英文名称
|
|
|
try:
|
|
|
from app.api.data_interface.routes import translate_and_parse
|
|
|
- en_name = translate_and_parse(tag_data['name'])
|
|
|
- update_properties['en_name'] = en_name[0] if en_name and isinstance(en_name, list) else ''
|
|
|
+ en_name = translate_and_parse(tag_data['name_zh'])
|
|
|
+ update_properties['name_en'] = en_name[0] if en_name and isinstance(en_name, list) else ''
|
|
|
except Exception as e:
|
|
|
logging.warning(f"更新标签英文名失败: {str(e)}")
|
|
|
|
|
@@ -1495,7 +1502,7 @@ def update_talent_tag(tag_id, tag_data):
|
|
|
MATCH (n:DataLabel)
|
|
|
WHERE id(n) = $nodeId
|
|
|
SET {set_clause}
|
|
|
- RETURN id(n) as id, n.name as name, n.en_name as en_name,
|
|
|
+ RETURN id(n) as id, n.name_zh as name_zh, n.name_en as name_en,
|
|
|
n.category as category, n.describe as description,
|
|
|
n.status as status, n.time as time
|
|
|
"""
|
|
@@ -1516,8 +1523,8 @@ def update_talent_tag(tag_id, tag_data):
|
|
|
# 提取更新后的标签信息
|
|
|
updated_tag = {
|
|
|
'id': record['id'],
|
|
|
- 'name': record['name'],
|
|
|
- 'en_name': record['en_name'],
|
|
|
+ 'name_zh': record['name_zh'],
|
|
|
+ 'name_en': record['name_en'],
|
|
|
'category': record['category'],
|
|
|
'description': record['description'],
|
|
|
'status': record['status'],
|
|
@@ -1559,7 +1566,7 @@ def delete_talent_tag(tag_id):
|
|
|
get_query = """
|
|
|
MATCH (n:DataLabel)
|
|
|
WHERE id(n) = $nodeId
|
|
|
- RETURN id(n) as id, n.name as name, n.en_name as en_name,
|
|
|
+ RETURN id(n) as id, n.name_zh as name_zh, n.name_en as name_en,
|
|
|
n.category as category, n.describe as description,
|
|
|
n.status as status, n.time as time
|
|
|
"""
|
|
@@ -1591,8 +1598,8 @@ def delete_talent_tag(tag_id):
|
|
|
# 保存标签信息用于返回
|
|
|
tag_info = {
|
|
|
'id': record['id'],
|
|
|
- 'name': record['name'],
|
|
|
- 'en_name': record['en_name'],
|
|
|
+ 'name_zh': record['name_zh'],
|
|
|
+ 'name_en': record['name_en'],
|
|
|
'category': record['category'],
|
|
|
'description': record['description'],
|
|
|
'status': record['status'],
|
|
@@ -1633,6 +1640,13 @@ def query_neo4j_graph(query_requirement):
|
|
|
"""
|
|
|
查询Neo4j图数据库,通过阿里千问API生成Cypher脚本
|
|
|
|
|
|
+ 优化特性:
|
|
|
+ - 当有标签名称时,使用递归遍历逻辑
|
|
|
+ - 以标签名称为起点,查找WORK_AS、BELONGS_TO、WORK_FOR关系
|
|
|
+ - 新的节点按照同样的查找逻辑继续找,一直找到没有指向关系的节点或者Talent节点则停止遍历
|
|
|
+ - 检索结果去重后形成最终结果
|
|
|
+ - 使用可变长度路径匹配(*1..10),最大遍历深度为10层,避免无限循环
|
|
|
+
|
|
|
Args:
|
|
|
query_requirement (str): 查询需求描述
|
|
|
|
|
@@ -1690,6 +1704,7 @@ def query_neo4j_graph(query_requirement):
|
|
|
"电子邮箱": "",
|
|
|
"生日": "",
|
|
|
"年龄": "",
|
|
|
+ "性别": "",
|
|
|
"居住地": "",
|
|
|
"籍贯": ""
|
|
|
}},
|
|
@@ -1712,7 +1727,8 @@ def query_neo4j_graph(query_requirement):
|
|
|
3. 标签没有被匹配到,datalabel字段可以为空数组
|
|
|
4. 酒店名称提取查询需求中明确提到的酒店名称
|
|
|
5. 如果没有提到酒店信息,hotel字段可以为空数组
|
|
|
- 6. 只需返回JSON字符串,不要返回其他信息
|
|
|
+ 6. datalabel只能填写可用标签列表中的名称,不能填写查询需求文本里的名称
|
|
|
+ 7. 只需返回JSON字符串,不要返回其他信息
|
|
|
"""
|
|
|
|
|
|
# 调用阿里千问API匹配标签
|
|
@@ -1809,13 +1825,16 @@ def query_neo4j_graph(query_requirement):
|
|
|
talent_params['birthday'] = value
|
|
|
elif field == "年龄":
|
|
|
talent_conditions.append("t.age = $age")
|
|
|
- talent_params['age'] = int(value) if value.isdigit() else 0
|
|
|
+ talent_params['age'] = int(value) if value.isdigit() else ''
|
|
|
elif field == "居住地":
|
|
|
talent_conditions.append("t.residence CONTAINS $residence")
|
|
|
talent_params['residence'] = value
|
|
|
elif field == "籍贯":
|
|
|
talent_conditions.append("t.origin CONTAINS $origin")
|
|
|
talent_params['origin'] = value
|
|
|
+ elif field == "性别":
|
|
|
+ talent_conditions.append("t.gender = $gender")
|
|
|
+ talent_params['gender'] = value
|
|
|
|
|
|
# 构建Talent子集查询
|
|
|
if talent_conditions:
|
|
@@ -1866,12 +1885,16 @@ def query_neo4j_graph(query_requirement):
|
|
|
}
|
|
|
|
|
|
# 构建完整的Cypher查询语句
|
|
|
+ # 优化说明:当有标签名称时,使用递归遍历逻辑
|
|
|
+ # 以标签名称为起点,查找WORK_AS、BELONGS_TO、WORK_FOR关系
|
|
|
+ # 新的节点按照同样的查找逻辑继续找,一直找到没有指向关系的节点或者Talent节点则停止遍历
|
|
|
+ # 检索结果去重后形成最终结果
|
|
|
+
|
|
|
if matched_hotels and matched_labels:
|
|
|
# 情况1:提供了酒店名称和标签名称
|
|
|
# 通过酒店名称查到一组Talent节点,通过标签查到另一组Talent节点,两组节点组合去重
|
|
|
logging.info("情况1:同时有酒店名称和标签名称,使用组合查询方式")
|
|
|
|
|
|
- # 使用UNION合并两个查询结果
|
|
|
cypher_script = f"""
|
|
|
// 查询通过酒店名称匹配的Talent节点
|
|
|
{talent_subset_query}
|
|
@@ -1884,31 +1907,29 @@ def query_neo4j_graph(query_requirement):
|
|
|
t.pg_id AS pg_id,
|
|
|
t.name_zh AS name_zh,
|
|
|
t.name_en AS name_en,
|
|
|
+ t.gender AS gender,
|
|
|
t.mobile AS mobile,
|
|
|
t.email AS email,
|
|
|
t.updated_at AS updated_at
|
|
|
|
|
|
UNION
|
|
|
|
|
|
- // 查询通过标签扩展遍历匹配的Talent节点
|
|
|
- // 步骤1: 定义标签条件列表
|
|
|
+ // 查询通过标签递归遍历匹配的Talent节点
|
|
|
+ // 使用递归遍历:以标签为起点,查找WORK_AS、BELONGS_TO、WORK_FOR关系,递归遍历直到找到Talent节点
|
|
|
WITH $labels AS targetLabels
|
|
|
|
|
|
- // 步骤2: 匹配标签条件节点
|
|
|
- MATCH (tag:DataLabel)
|
|
|
- WHERE tag.name_zh IN targetLabels
|
|
|
- WITH collect(tag) AS startNodes
|
|
|
+ // 递归遍历:从标签节点开始,通过关系网络找到所有相关的Talent节点
|
|
|
+ // 使用可变长度路径匹配,最大遍历深度:10层,避免无限循环
|
|
|
+ MATCH path = (startTag:DataLabel)-[:BELONGS_TO|WORK_AS|WORK_FOR*1..10]-(t:Talent)
|
|
|
+ WHERE startTag.name_zh IN targetLabels
|
|
|
+ {f"AND {' AND '.join(talent_conditions)}" if talent_conditions else ""}
|
|
|
|
|
|
- // 步骤3: 使用扩展遍历查找相关Talent节点
|
|
|
- UNWIND startNodes AS startTag
|
|
|
- MATCH (startTag)<-[:BELONGS_TO|WORK_AS]-(t:Talent)
|
|
|
- {f"WHERE {' AND '.join(talent_conditions)}" if talent_conditions else ""}
|
|
|
-
|
|
|
- // 步骤4: 返回去重结果
|
|
|
+ // 返回去重结果
|
|
|
RETURN DISTINCT
|
|
|
t.pg_id AS pg_id,
|
|
|
t.name_zh AS name_zh,
|
|
|
t.name_en AS name_en,
|
|
|
+ t.gender AS gender,
|
|
|
t.mobile AS mobile,
|
|
|
t.email AS email,
|
|
|
t.updated_at AS updated_at
|
|
@@ -1928,6 +1949,7 @@ def query_neo4j_graph(query_requirement):
|
|
|
t.pg_id AS pg_id,
|
|
|
t.name_zh AS name_zh,
|
|
|
t.name_en AS name_en,
|
|
|
+ t.gender AS gender,
|
|
|
t.mobile AS mobile,
|
|
|
t.email AS email,
|
|
|
t.updated_at AS updated_at
|
|
@@ -1935,30 +1957,36 @@ def query_neo4j_graph(query_requirement):
|
|
|
|
|
|
elif not matched_hotels and matched_labels:
|
|
|
# 情况3:没有提供酒店名称,但是有指定的标签名称
|
|
|
- # 通过标签扩展遍历查询Talent节点
|
|
|
- logging.info("情况3:只有标签名称,使用标签扩展遍历查询方式")
|
|
|
+ # 通过标签递归遍历查询Talent节点
|
|
|
+ logging.info("情况3:只有标签名称,使用标签递归遍历查询方式")
|
|
|
cypher_script = f"""
|
|
|
+ // 递归遍历:以标签为起点,查找WORK_AS、BELONGS_TO、WORK_FOR关系,递归遍历直到找到Talent节点
|
|
|
+
|
|
|
// 步骤1: 定义标签条件列表
|
|
|
WITH $labels AS targetLabels
|
|
|
|
|
|
- // 步骤2: 匹配标签条件节点
|
|
|
- MATCH (tag:DataLabel)
|
|
|
- WHERE tag.name_zh IN targetLabels
|
|
|
- WITH collect(tag) AS startNodes
|
|
|
+ // 步骤2: 递归遍历关系网络
|
|
|
+ // 使用可变长度路径匹配,从标签节点开始,通过关系网络找到所有相关的Talent节点
|
|
|
+ // 关系类型:BELONGS_TO、WORK_AS、WORK_FOR
|
|
|
+ // 最大遍历深度:10层,避免无限循环
|
|
|
|
|
|
- // 步骤3: 使用扩展遍历查找相关Talent节点
|
|
|
- UNWIND startNodes AS startTag
|
|
|
- MATCH (startTag)<-[:BELONGS_TO|WORK_AS]-(t:Talent)
|
|
|
- {f"WHERE {' AND '.join(talent_conditions)}" if talent_conditions else ""}
|
|
|
+ // 方法1: 使用标准Cypher可变长度路径匹配(推荐)
|
|
|
+ MATCH path = (startTag:DataLabel)-[:BELONGS_TO|WORK_AS|WORK_FOR*1..10]-(t:Talent)
|
|
|
+ WHERE startTag.name_zh IN targetLabels
|
|
|
+ {f"AND {' AND '.join(talent_conditions)}" if talent_conditions else ""}
|
|
|
|
|
|
- // 步骤4: 返回去重结果
|
|
|
- RETURN DISTINCT
|
|
|
- t.pg_id AS pg_id,
|
|
|
- t.name_zh AS name_zh,
|
|
|
+ // 步骤3: 返回去重结果
|
|
|
+ RETURN DISTINCT
|
|
|
+ t.pg_id AS pg_id,
|
|
|
+ t.name_zh AS name_zh,
|
|
|
t.name_en AS name_en,
|
|
|
- t.mobile AS mobile,
|
|
|
- t.email AS email,
|
|
|
+ t.gender AS gender,
|
|
|
+ t.mobile AS mobile,
|
|
|
+ t.email AS email,
|
|
|
t.updated_at AS updated_at
|
|
|
+
|
|
|
+ // 注意:如果需要更高级的路径遍历控制,可以使用APOC扩展的apoc.path.expandConfig
|
|
|
+ // 但标准Cypher的可变长度路径匹配已经能够满足大部分递归遍历需求
|
|
|
"""
|
|
|
|
|
|
else:
|
|
@@ -1970,6 +1998,7 @@ def query_neo4j_graph(query_requirement):
|
|
|
t.pg_id AS pg_id,
|
|
|
t.name_zh AS name_zh,
|
|
|
t.name_en AS name_en,
|
|
|
+ t.gender AS gender,
|
|
|
t.mobile AS mobile,
|
|
|
t.email AS email,
|
|
|
t.updated_at AS updated_at
|
|
@@ -2036,7 +2065,7 @@ def talent_get_tags(talent_id):
|
|
|
cypher_query = """
|
|
|
MATCH (t:Talent)-[r:BELONGS_TO|WORK_AS]->(tag:DataLabel)
|
|
|
WHERE t.pg_id = $talent_id
|
|
|
- RETURN t.pg_id as talent_id, tag.name_zh as tag_name_zh, type(r) as relation_type
|
|
|
+ RETURN t.pg_id as talent_pg_id, tag.name_zh as name_zh, type(r) as relation_type
|
|
|
"""
|
|
|
|
|
|
# 执行查询
|
|
@@ -2052,8 +2081,8 @@ def talent_get_tags(talent_id):
|
|
|
# 处理查询结果
|
|
|
for record in records:
|
|
|
talent_tag = {
|
|
|
- 'talent': record['talent_id'],
|
|
|
- 'tag_name_zh': record['tag_name_zh'],
|
|
|
+ 'talent_pg_id': record['talent_pg_id'],
|
|
|
+ 'name_zh': record['name_zh'],
|
|
|
'relation_type': record['relation_type']
|
|
|
}
|
|
|
response_data['data'].append(talent_tag)
|
|
@@ -2169,7 +2198,7 @@ def talent_update_tags(data):
|
|
|
# 先查找是否存在该标签
|
|
|
find_tag_query = """
|
|
|
MATCH (tag:DataLabel)
|
|
|
- WHERE tag.name = $tag_name
|
|
|
+ WHERE tag.name_zh = $tag_name
|
|
|
RETURN id(tag) as tag_id
|
|
|
"""
|
|
|
tag_result = session.run(find_tag_query, tag_name=tag_name)
|
|
@@ -2180,7 +2209,7 @@ def talent_update_tags(data):
|
|
|
else:
|
|
|
# 创建新标签
|
|
|
create_tag_query = """
|
|
|
- CREATE (tag:DataLabel {name: $name, category: $category, updated_at: $updated_at})
|
|
|
+ CREATE (tag:DataLabel {name_zh: $name, category: $category, updated_at: $updated_at})
|
|
|
RETURN id(tag) as tag_id
|
|
|
"""
|
|
|
tag_result = session.run(
|
|
@@ -2195,7 +2224,7 @@ def talent_update_tags(data):
|
|
|
# 2. 创建人才与标签的BELONGS_TO关系
|
|
|
create_relation_query = """
|
|
|
MATCH (t:Talent), (tag:DataLabel)
|
|
|
- WHERE t.pg_id = $talent_id AND tag.name = $tag_name
|
|
|
+ WHERE t.pg_id = $talent_id AND tag.name_zh = $tag_name
|
|
|
CREATE (t)-[r:BELONGS_TO]->(tag)
|
|
|
SET r.created_at = $current_time
|
|
|
RETURN r
|
|
@@ -2307,7 +2336,7 @@ def parse_text_with_qwen25VLplus(image_data):
|
|
|
12. 中文邮政编码 (postal_code_zh)
|
|
|
13. 英文邮政编码 (postal_code_en)
|
|
|
14. 生日 (birthday) - 格式为YYYY-MM-DD,如1990-01-01
|
|
|
-15. 年龄 (age) - 数字格式,如30
|
|
|
+15. 年龄 (age) - 数字格式,如30,如果无法识别,返回空字符串
|
|
|
16. 籍贯 (native_place) - 出生地或户籍所在地信息
|
|
|
17. 居住地 (residence) - 个人居住地址信息
|
|
|
18. 品牌组合 (brand_group) - 如有多个品牌,使用逗号分隔
|
|
@@ -2331,7 +2360,7 @@ def parse_text_with_qwen25VLplus(image_data):
|
|
|
"postal_code_zh": "",
|
|
|
"postal_code_en": "",
|
|
|
"birthday": "",
|
|
|
- "age": 25,
|
|
|
+ "age": "",
|
|
|
"native_place": "",
|
|
|
"residence": "",
|
|
|
"brand_group": "",
|
|
@@ -2519,6 +2548,7 @@ def record_parsed_talents(result):
|
|
|
residence=talent_data.get('residence', ''),
|
|
|
age=talent_data.get('age'),
|
|
|
native_place=talent_data.get('native_place', ''),
|
|
|
+ gender=talent_data.get('gender', ''), # 新增性别字段
|
|
|
origin_source=talent_data.get('origin_source', []),
|
|
|
talent_profile=talent_data.get('talent_profile', ''),
|
|
|
task_id=str(task_id) if task_id else '',
|
|
@@ -2781,15 +2811,16 @@ def get_brand_group_by_hotel(hotel_zh):
|
|
|
## 可用品牌列表
|
|
|
{brands_json}
|
|
|
|
|
|
- ## 输出要求
|
|
|
- 1. 仔细分析酒店名称,选择最匹配的品牌
|
|
|
+ ## 匹配及输出要求
|
|
|
+ 1. 仔细分析酒店名称,选择最匹配的一个品牌,不要返回多个品牌
|
|
|
2. 如果酒店名称中包含品牌信息,优先选择该品牌
|
|
|
- 3. 如果无法确定,返回空字符串
|
|
|
- 4. 严格按照JSON格式输出:{{"brand": "品牌名称"}}
|
|
|
-
|
|
|
- 请只返回JSON字符串,不要包含其他解释文字。
|
|
|
+ 3. 如果酒店名称里有品牌信息,但是品牌信息不在可用品牌列表中,则返回空字符串
|
|
|
+ 4. 如果相似度很低,则可以返回空字符串
|
|
|
+ 5. 严格按照JSON格式输出:{{"brand": "品牌名称"}}
|
|
|
+ 6. 只返回JSON字符串,不要包含其他解释文字。
|
|
|
"""
|
|
|
|
|
|
+ logging.info(f"开始调用千问API: {prompt}")
|
|
|
# 调用阿里千问API
|
|
|
client = OpenAI(
|
|
|
api_key=QWEN_TEXT_API_KEY,
|