yulongyan_citu 1 éve
szülő
commit
c0a9403433
5 módosított fájl, 128 hozzáadás és 81 törlés
  1. 2 1
      app.py
  2. 8 5
      configs/nacos_config.py
  3. 1 1
      functions/operate_graph.py
  4. 117 73
      functions/solve_graph.py
  5. 0 1
      logs/flask.log.2024-10-05

+ 2 - 1
app.py

@@ -43,4 +43,5 @@ def run_app():
         app.run(host='0.0.0.0', port=3333, debug=False, processes=3)
 
 if __name__ == '__main__':
-    run_app()
+    run_app()
+

+ 8 - 5
configs/nacos_config.py

@@ -1,6 +1,6 @@
-
 import json
-from flask import Flask, request
+import platform
+from flask import Flask
 from nacos import NacosClient
 
 app = Flask(__name__)
@@ -15,7 +15,7 @@ config_json = {
         "group": "DEFAULT_GROUP"
     },
     "production": {
-        "address": "175.27.171.220:8848",
+        "address": "127.0.0.1:8848",
         "namespace": "dev",
         "username": "nacos",
         "password": "2099citu##$$**.com",
@@ -51,8 +51,11 @@ class NacosConfig:
         return self.config_data
 
 
-# 使用配置文件中的生产环境配置 (本地是local,提交代码是production)!!
-nacos_config = NacosConfig(environment='production')
+
+# 根据操作系统选择环境
+plat = platform.system().lower()
+environment = 'local' if plat == 'windows' else 'production'
+nacos_config = NacosConfig(environment=environment)
 configs = nacos_config.get_config()
 
 

+ 1 - 1
functions/operate_graph.py

@@ -182,7 +182,7 @@ def operate_seeker(operate, dataList):
     try:
         if operate == 'sync_all':
             create_seeker_dataList(dataList)
-        # 新增 修改基本信息
+        # 新增 修改基本信息 (单个新增)
         elif operate == 'add':
             add_seeker_dataList(dataList)
             # part_seeker_dataList(dataList)

+ 117 - 73
functions/solve_graph.py

@@ -6,15 +6,44 @@ import ast
 import logging
 logger = logging.getLogger(__name__)
 from flask import current_app
+from datetime import datetime
+import  re
 
 
 api_key = "sk-86d4622141d74e9a8d7c38ee873c4d91"
 base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"
 model_name = "qwen-turbo"
 
+# 定义年龄范围字典
+age_ranges = {
+    range(20, 25): "20-25岁",
+    range(25, 30): "25-30岁",
+    range(30, 35): "30-35岁",
+    range(35, 40): "35-40岁",
+    range(40, 45): "40-45岁",
+    range(45, 50): "45-50岁",
+    range(50, 55): "50-55岁",
+    range(55, 60): "55-60岁",
+}
+# 定义学历类别字典
+edu_categories = {
+    re.compile(r'大专|中专|高职'): '大专以及下',
+    re.compile(r'本科'): '本科',
+    re.compile(r'硕士'): '硕士',
+    re.compile(r'博士'): '博士'
+}
+# 定义工作年限类别字典
+work_ranges = {
+            range(0, 2): "2年以下工作经验",
+            range(2, 5): "2-5年工作经验",
+            range(5, 10): "5-10年工作经验",
+            range(10, 15): "10-15年工作经验",
+            range(15, 20): "15-20年工作经验"
+        }
+
 
 # 提取共有标签
-def llm_client(content):
+def llm_client1(content):
     try:
         if content is None or content == []:
             return []
@@ -43,6 +72,32 @@ def llm_client(content):
         current_app.logger.error(f'llm_client error: {e}')
         return []
 
+def llm_client(content):
+    try:
+        if content is None or content == []:
+            return []
+        client = OpenAI(api_key=api_key, base_url=base_url, )
+        response = client.chat.completions.create(
+            model=model_name,
+            messages=[
+                {"role": "system", "content": "你是一个数据处理工具,根据用户的提示提取标签"},
+                {"role": "user",
+                 "content": f"例如,如果输入是 '学历大专为主,不接受实习生,年龄要求20-42岁' "
+                            f"输出应该是 ['20-42岁']。"
+                            f"只提取对应年龄,其他不需要。内容如下:{content}"},
+            ],
+            max_tokens=1024,
+            temperature=0.1,
+            stream=False
+        )
+        result = response.choices[0].message.content
+        temp = result.replace("'", '"')
+        result = ast.literal_eval(temp)
+        return result
+    except Exception as e:
+        current_app.logger.error(f'llm_client error: {e}')
+        return []
+
 def bs_data(data):
     try:
         soup = BeautifulSoup(data,'html.parser')
@@ -61,15 +116,17 @@ def create_job_dataList(dataList):
         :return:
         '''
         for item in dataList:
+            eduType = item['eduType']
+            if eduType:
+                eduType = next((value for pattern, value in edu_categories.items() if pattern.search(eduType)), '未知')
+            else:
+                eduType = None
             value_list = [
-                             item['type'],
-                             item['expType'],
-                             item['eduType'],
-                             f"{item['payFrom']}到{item['payTo']}每{item['payUnit']}",
-                         ] + item["tagList"]+llm_client(bs_data(item['content']+item['requirement']))
-
+                             eduType,
+                             item['name'],
+                         ] + item["tagList"]+llm_client(bs_data(item['requirement']))
             # 招聘职位
-            job = create_or_get_node('job', uniqueId=item['id'], name=item['name'])
+            job = create_or_get_node('job', uniqueId=item['id'])
             # 标签
             for label in value_list:
                 all_label = create_or_get_node('jobLabel', name=label)
@@ -90,20 +147,8 @@ def create_enterprise_dataList(dataList):
         :return:
         '''
         for item in dataList:
-            if item['businessResp'] is None:
-                business_scope = []
-            else:
-                business_scope = item['businessResp']['businessScope'] \
-                    if item['businessResp']['businessScope'] is not None else []
-            value_list = [
-                             item['financingStatus'],
-                             item['scale'],
-                             item['workTime'],
-                         ] + item["welfareList"] if item['welfareList'] is not None else []\
-                         + item["tagList"] if item['tagList'] is not None else [] \
-                         +llm_client(item['introduce'] if item['introduce'] is not None else [])\
-                         +llm_client(business_scope)
-
+            value_list = [] + item["welfareList"] if item['welfareList'] is not None else []\
+                         + item["tagList"] if item['tagList'] is not None else []
             # 招聘职位
             enterprise = create_or_get_node('enterprise', uniqueId=item['id'],
                                             name=item['name'],alias = item['anotherName'])
@@ -118,6 +163,13 @@ def create_enterprise_dataList(dataList):
         current_app.logger.error(f'create_enterprise_dataList error: {e}')
         return str(e)
 
+def calculate_work_duration(work):
+    start_time = work['startTime'] / 1000
+    end_time = work['endTime'] / 1000 if work['endTime'] is not None else datetime.now().timestamp()
+    duration = end_time - start_time
+    return duration
+
+
 
 # 全部新增
 def create_seeker_dataList(dataList):
@@ -127,67 +179,49 @@ def create_seeker_dataList(dataList):
             :param dataList:
             :return:
         '''
-
         for item in dataList:
             if item['person'] is None:
-                person_jobType = ''
-                jobStatus = ''
-                expType = ''
                 eduType = ''
-                advantage = ''
-                sex = ''
             else:
-                person_jobType = item['person'].get('jobType', '')
-                jobStatus = item['person'].get('jobStatus',  '')
-                expType = item['person'].get('expType',  '')
                 eduType = item['person'].get('eduType', '')
-                advantage = item['person'].get('advantage', '')
-                sex = item['person'].get('sex', '')
-            # 确保 advantage 是一个字符串
-            advantage_str = advantage if isinstance(advantage, str) else (', '.join(advantage) if isinstance(advantage, list) else '')
+                birthday = item['person'].get('birthday', '')
+                if birthday:
+                    age = datetime.now().year - datetime.utcfromtimestamp(birthday / 1000).year
+                    age_range = next((value for key, value in age_ranges.items() if age in key), "未知")
+                else:
+                    age_range = None
+
+                if eduType:
+                    eduType = next((value for pattern, value in edu_categories.items() if pattern.search(eduType)),
+                                   '未知')
+                else:
+                    eduType = None
+
 
-            interestedList = item.get('interestedList', [])
             edu_list = item.get('eduList', [])
             workList = item.get('workList', [])
-            projectList = item.get('projectList', [])
-            trainList = item.get('trainList', [])
 
             data = []
-            content_list = []
             for record in edu_list:
-                content_list.append(record.get('content', ''))
-                data.append(record.get('schoolName', ''))
-                data.append(record.get('educationType', ''))
-                data.append(record.get('educationSystemType', ''))
-            for record in interestedList:
-                data.append(record['jobType'])
+                data.append(record.get('major', ''))
 
             for record in workList:
                 data.append(record.get('positionName', ''))
-                content_list.append(record.get('content', ''))
-
-            for record in projectList:
-                data.append(record.get('name', ''))
-                content_list.append(record.get('content', ''))
-
-            for record in trainList:
-                data.append(record.get('orgName', ''))
-                data.append(record.get('course', ''))
-                content_list.append(record.get('content', ''))
-            content_list = [str(content) for content in content_list if content is not None]
-
-            data.extend([str(label) for label in llm_client("\n".join(content_list)) if label is not None])
-
-            advantage_labels = [str(label) for label in llm_client(advantage_str) if
-                                label is not None] if advantage_str else []
-
+            # 选择最长的工作年限
+            if workList:
+                longest_work = max(workList, key=calculate_work_duration)
+                longest_duration = calculate_work_duration(longest_work)
+                # 将秒转换为年
+                longest_duration_years = longest_duration / (365.25 * 24 * 60 * 60)
+                year_range = next((value for key, value in work_ranges.items()
+                                   if longest_duration_years in key), "20年以上工作经验")
+            else:
+                year_range = None
             value_list = [
-                             person_jobType,
-                             jobStatus,
-                             expType,
                              eduType,
-                             sex
-                         ]+ advantage_labels + data
+                             age_range,
+                             year_range
+                         ] + data
             # 剔除value_list为空的值
             value_list = [x for x in value_list if x is not None and x != ""]
 
@@ -213,13 +247,23 @@ def add_seeker_dataList(dataList):
          for record in dataList:
              seeker = create_or_get_node('seeker', uniqueId=record['userId'],
                                          name=record['name'])
+             eduType = record.get('eduType', '')
+             birthday = record.get('birthday', '')
+             if birthday:
+                 age = datetime.now().year - datetime.utcfromtimestamp(birthday / 1000).year
+                 age_range = next((value for key, value in age_ranges.items() if age in key), "未知")
+             else:
+                 age_range = None
+
+             if eduType:
+                 eduType = edu_categories.get(eduType, '未知')
+             else:
+                 eduType = None
+
              value_list = [
-                 record['jobType'],
-                 record['jobStatus'],
-                 record['expType'],
-                 record['eduType'],
-                 record['sex']
-             ] + llm_client( record['advantage'])
+                 age_range,
+                 eduType
+             ]
 
              # 标签
              for label in value_list:

+ 0 - 1
logs/flask.log.2024-10-05

@@ -1 +0,0 @@
-[2024-10-05 21:19:31,433][app.py:34][ERROR][15368] - Failed to rollover log file: [WinError 32] 另一个程序正在使用此文件,进程无法访问。: 'E:\\task1\\mendunr_project\\logs\\flask.log' -> 'E:\\task1\\mendunr_project\\logs\\flask.log.2024-10-05'