1 day ago · 157d8d3575
--- a/add_origin_source_field.sql
+++ b/add_origin_source_field.sql
@@ -0,0 +1,63 @@
 
															+-- ===============================================
														
 
															+-- 修改business_cards表，新增origin_source字段
														
 
															+-- 执行日期: 2024年（请在执行前填写实际日期）
														
 
															+-- 修改说明: 为名片表新增原始资料记录字段，采用JSON格式保存原始资料信息
														
 
															+-- ===============================================
														
 
															+
														
 
															+-- 1. 新增origin_source字段 (原始资料记录字段)
														
 
															+ALTER TABLE business_cards 
														
 
															+ADD COLUMN origin_source JSON;
														
 
															+
														
 
															+-- 2. 为新增字段添加注释
														
 
															+COMMENT ON COLUMN business_cards.origin_source IS '原始资料记录字段 - 采用JSON格式保存原始资料信息，包括数据来源、MinIO路径等';
														
 
															+
														
 
															+-- 3. 验证字段是否成功添加
														
 
															+SELECT 
														
 
															+    column_name,
														
 
															+    data_type,
														
 
															+    is_nullable,
														
 
															+    column_default
														
 
															+FROM information_schema.columns 
														
 
															+WHERE table_name = 'business_cards' 
														
 
															+    AND column_name = 'origin_source'
														
 
															+ORDER BY column_name;
														
 
															+
														
 
															+-- 4. 查看字段注释
														
 
															+SELECT 
														
 
															+    a.attname AS column_name,
														
 
															+    format_type(a.atttypid, a.atttypmod) AS data_type,
														
 
															+    COALESCE(pg_catalog.col_description(a.attrelid, a.attnum), '无注释') AS description
														
 
															+FROM 
														
 
															+    pg_catalog.pg_attribute a
														
 
															+JOIN 
														
 
															+    pg_catalog.pg_class c ON a.attrelid = c.oid
														
 
															+JOIN 
														
 
															+    pg_catalog.pg_namespace n ON c.relnamespace = n.oid
														
 
															+WHERE 
														
 
															+    c.relname = 'business_cards' 
														
 
															+    AND a.attname = 'origin_source'
														
 
															+    AND a.attnum > 0 
														
 
															+    AND NOT a.attisdropped
														
 
															+ORDER BY a.attname;
														
 
															+
														
 
															+-- ===============================================
														
 
															+-- 执行说明：
														
 
															+-- 1. 请在生产环境执行前先在测试环境验证
														
 
															+-- 2. 建议在业务低峰期执行此脚本
														
 
															+-- 3. 执行前请备份相关数据
														
 
															+-- 4. 新增字段允许NULL值，不会影响现有数据
														
 
															+-- 5. origin_source字段用于存储原始资料的JSON数据
														
 
															+-- ===============================================
														
 
															+
														
 
															+-- 可选：示例数据格式说明
														
 
															+-- origin_source字段的JSON格式示例：
														
 
															+-- {
														
 
															+--   "type": "webpage_talent",
														
 
															+--   "minio_path": "webpage_talent/webpage_talent_20240101_12345.md",
														
 
															+--   "source_date": "2024-01-01 12:00:00",
														
 
															+--   "talent_data": {...},
														
 
															+--   "web_md_content": "部分网页内容..."
														
 
															+-- }
														
 
															+
														
 
															+-- 执行完成后检查表结构
														
 
															+\d business_cards; 
														
--- a/add_webpage_talent_api_docs.md
+++ b/add_webpage_talent_api_docs.md
@@ -0,0 +1,698 @@
 
															+# 网页人才信息添加API使用说明手册
														
 
															+
														
 
															+## API概述
														
 
															+
														
 
															+`/add-webpage-talent` 接口用于批量添加网页提取的人才信息到系统中。该接口能够将网页内容保存到MinIO存储，并为每个人才创建对应的业务卡片记录。
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 基本信息
														
 
															+
														
 
															+| 项目 | 值 |
														
 
															+|------|-----|
														
 
															+| **接口路径** | `/api/data-parse/add-webpage-talent` |
														
 
															+| **请求方法** | `POST` |
														
 
															+| **Content-Type** | `application/json` |
														
 
															+| **认证方式** | 根据系统配置 |
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 输入参数
														
 
															+
														
 
															+### 请求体结构
														
 
															+
														
 
															+```json
														
 
															+{
														
 
															+  "talent_list": [
														
 
															+    {
														
 
															+      "name_zh": "张三",
														
 
															+      "name_en": "Zhang San",
														
 
															+      "title_zh": "总经理",
														
 
															+      "title_en": "General Manager",
														
 
															+      "hotel_zh": "北京万豪酒店",
														
 
															+      "hotel_en": "Beijing Marriott Hotel",
														
 
															+      "brand_group": "万豪",
														
 
															+      "mobile": "13800138000",
														
 
															+      "email": "zhangsan@example.com",
														
 
															+      "phone": "010-12345678",
														
 
															+      "address_zh": "北京市朝阳区XXX路123号",
														
 
															+      "address_en": "123 XXX Road, Chaoyang District, Beijing",
														
 
															+      "postal_code_zh": "100000",
														
 
															+      "postal_code_en": "100000",
														
 
															+      "brand_zh": "万豪",
														
 
															+      "brand_en": "Marriott",
														
 
															+      "affiliation_zh": "万豪国际集团",
														
 
															+      "affiliation_en": "Marriott International",
														
 
															+      "birthday": "1980-01-01",
														
 
															+      "age": 44,
														
 
															+      "native_place": "北京市",
														
 
															+      "residence": "北京市朝阳区"
														
 
															+    }
														
 
															+  ],
														
 
															+  "web_md": "# 酒店任命公告\n\n## 人事变动\n\n1. **张三** 被任命为北京万豪酒店总经理...\n\n更多内容..."
														
 
															+}
														
 
															+```
														
 
															+
														
 
															+### 参数详细说明
														
 
															+
														
 
															+#### `talent_list` (必填)
														
 
															+- **类型**: `Array<Object>`
														
 
															+- **描述**: 人才信息列表，每个对象包含一个人才的详细信息
														
 
															+- **限制**: 非空数组
														
 
															+
														
 
															+#### `talent_list[].name_zh` (必填)
														
 
															+- **类型**: `String`
														
 
															+- **描述**: 中文姓名
														
 
															+- **示例**: `"张三"`
														
 
															+
														
 
															+#### `talent_list[].name_en` (可选)
														
 
															+- **类型**: `String`
														
 
															+- **描述**: 英文姓名
														
 
															+- **示例**: `"Zhang San"`
														
 
															+
														
 
															+#### `talent_list[].title_zh` (可选)
														
 
															+- **类型**: `String`
														
 
															+- **描述**: 中文职位
														
 
															+- **示例**: `"总经理"`
														
 
															+
														
 
															+#### `talent_list[].title_en` (可选)
														
 
															+- **类型**: `String`
														
 
															+- **描述**: 英文职位
														
 
															+- **示例**: `"General Manager"`
														
 
															+
														
 
															+#### `talent_list[].hotel_zh` (可选)
														
 
															+- **类型**: `String`
														
 
															+- **描述**: 酒店中文名称
														
 
															+- **示例**: `"北京万豪酒店"`
														
 
															+
														
 
															+#### `talent_list[].hotel_en` (可选)
														
 
															+- **类型**: `String`
														
 
															+- **描述**: 酒店英文名称
														
 
															+- **示例**: `"Beijing Marriott Hotel"`
														
 
															+
														
 
															+#### `talent_list[].brand_group` (可选)
														
 
															+- **类型**: `String`
														
 
															+- **描述**: 品牌组合
														
 
															+- **示例**: `"万豪"`
														
 
															+
														
 
															+#### `talent_list[].mobile` (可选)
														
 
															+- **类型**: `String`
														
 
															+- **描述**: 手机号码，支持多个号码用逗号分隔
														
 
															+- **示例**: `"13800138000,13900139000"`
														
 
															+
														
 
															+#### `talent_list[].email` (可选)
														
 
															+- **类型**: `String`
														
 
															+- **描述**: 电子邮箱
														
 
															+- **示例**: `"zhangsan@example.com"`
														
 
															+
														
 
															+#### `talent_list[].phone` (可选)
														
 
															+- **类型**: `String`
														
 
															+- **描述**: 固定电话
														
 
															+- **示例**: `"010-12345678"`
														
 
															+
														
 
															+#### `talent_list[].address_zh` (可选)
														
 
															+- **类型**: `String`
														
 
															+- **描述**: 中文地址
														
 
															+- **示例**: `"北京市朝阳区XXX路123号"`
														
 
															+
														
 
															+#### `talent_list[].address_en` (可选)
														
 
															+- **类型**: `String`
														
 
															+- **描述**: 英文地址
														
 
															+- **示例**: `"123 XXX Road, Chaoyang District, Beijing"`
														
 
															+
														
 
															+#### `talent_list[].birthday` (可选)
														
 
															+- **类型**: `String`
														
 
															+- **描述**: 生日，格式为YYYY-MM-DD
														
 
															+- **示例**: `"1980-01-01"`
														
 
															+
														
 
															+#### `talent_list[].age` (可选)
														
 
															+- **类型**: `Integer`
														
 
															+- **描述**: 年龄
														
 
															+- **示例**: `44`
														
 
															+
														
 
															+#### `web_md` (必填)
														
 
															+- **类型**: `String`
														
 
															+- **描述**: 网页的markdown格式文本内容，将被保存到MinIO存储
														
 
															+- **限制**: 非空字符串
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 输出参数
														
 
															+
														
 
															+### 响应结构
														
 
															+
														
 
															+#### 成功响应 (所有记录处理成功)
														
 
															+```json
														
 
															+{
														
 
															+  "code": 200,
														
 
															+  "success": true,
														
 
															+  "message": "所有3条人才记录处理成功",
														
 
															+  "data": {
														
 
															+    "total_count": 3,
														
 
															+    "success_count": 3,
														
 
															+    "failed_count": 0,
														
 
															+    "success_records": [
														
 
															+      {
														
 
															+        "index": 1,
														
 
															+        "data": {
														
 
															+          "id": 123,
														
 
															+          "name_zh": "张三",
														
 
															+          "name_en": "Zhang San",
														
 
															+          "title_zh": "总经理",
														
 
															+          "title_en": "General Manager",
														
 
															+          "hotel_zh": "北京万豪酒店",
														
 
															+          "hotel_en": "Beijing Marriott Hotel",
														
 
															+          "mobile": "13800138000",
														
 
															+          "email": "zhangsan@example.com",
														
 
															+          "brand_group": "万豪",
														
 
															+          "origin_source": {
														
 
															+            "type": "webpage_talent",
														
 
															+            "minio_path": "webpage_talent/webpage_talent_20240101_12345678.md",
														
 
															+            "source_date": "2024-01-01 12:00:00",
														
 
															+            "talent_data": {...},
														
 
															+            "web_md_content": "# 酒店任命公告..."
														
 
															+          },
														
 
															+          "created_at": "2024-01-01 12:00:00",
														
 
															+          "updated_at": "2024-01-01 12:00:00",
														
 
															+          "status": "active"
														
 
															+        },
														
 
															+        "message": "名片信息保存成功。未发现重复记录"
														
 
															+      }
														
 
															+    ],
														
 
															+    "failed_records": [],
														
 
															+    "minio_md_path": "webpage_talent/webpage_talent_20240101_12345678.md"
														
 
															+  }
														
 
															+}
														
 
															+```
														
 
															+
														
 
															+#### 部分成功响应
														
 
															+```json
														
 
															+{
														
 
															+  "code": 206,
														
 
															+  "success": true,
														
 
															+  "message": "部分处理成功：2/3条记录成功",
														
 
															+  "data": {
														
 
															+    "total_count": 3,
														
 
															+    "success_count": 2,
														
 
															+    "failed_count": 1,
														
 
															+    "success_records": [...],
														
 
															+    "failed_records": [
														
 
															+      {
														
 
															+        "index": 3,
														
 
															+        "data": {
														
 
															+          "name_zh": "",
														
 
															+          "title_zh": "经理"
														
 
															+        },
														
 
															+        "error": "第3个记录缺少name_zh字段"
														
 
															+      }
														
 
															+    ],
														
 
															+    "minio_md_path": "webpage_talent/webpage_talent_20240101_12345678.md"
														
 
															+  }
														
 
															+}
														
 
															+```
														
 
															+
														
 
															+#### 错误响应
														
 
															+```json
														
 
															+{
														
 
															+  "code": 400,
														
 
															+  "success": false,
														
 
															+  "message": "talent_list参数必须是非空数组",
														
 
															+  "data": null
														
 
															+}
														
 
															+```
														
 
															+
														
 
															+### 响应字段说明
														
 
															+
														
 
															+#### 通用字段
														
 
															+- **`code`**: HTTP状态码
														
 
															+- **`success`**: 操作是否成功
														
 
															+- **`message`**: 响应消息
														
 
															+- **`data`**: 响应数据
														
 
															+
														
 
															+#### 成功响应数据字段
														
 
															+- **`total_count`**: 总记录数
														
 
															+- **`success_count`**: 成功处理的记录数
														
 
															+- **`failed_count`**: 失败记录数
														
 
															+- **`success_records`**: 成功记录详情列表
														
 
															+- **`failed_records`**: 失败记录详情列表
														
 
															+- **`minio_md_path`**: 网页内容在MinIO中的存储路径
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 返回状态码
														
 
															+
														
 
															+| 状态码 | 说明 | 场景 |
														
 
															+|--------|------|------|
														
 
															+| **200** | 成功 | 所有记录处理成功 |
														
 
															+| **206** | 部分成功 | 部分记录处理成功 |
														
 
															+| **400** | 请求参数错误 | 参数格式不正确或缺少必填参数 |
														
 
															+| **500** | 服务器内部错误 | 系统异常或数据库错误 |
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 前端代码示例
														
 
															+
														
 
															+### JavaScript (原生)
														
 
															+
														
 
															+```javascript
														
 
															+// 发送请求函数
														
 
															+async function addWebpageTalent(talentList, webMd) {
														
 
															+    const url = '/api/data-parse/add-webpage-talent';
														
 
															+    
														
 
															+    const requestBody = {
														
 
															+        talent_list: talentList,
														
 
															+        web_md: webMd
														
 
															+    };
														
 
															+    
														
 
															+    try {
														
 
															+        const response = await fetch(url, {
														
 
															+            method: 'POST',
														
 
															+            headers: {
														
 
															+                'Content-Type': 'application/json',
														
 
															+                // 如果需要认证，添加相应的头部
														
 
															+                // 'Authorization': 'Bearer your-token'
														
 
															+            },
														
 
															+            body: JSON.stringify(requestBody)
														
 
															+        });
														
 
															+        
														
 
															+        const result = await response.json();
														
 
															+        
														
 
															+        if (result.success) {
														
 
															+            console.log('处理成功:', result.message);
														
 
															+            console.log('处理详情:', result.data);
														
 
															+            
														
 
															+            // 处理成功记录
														
 
															+            if (result.data.success_records.length > 0) {
														
 
															+                console.log(`成功处理 ${result.data.success_count} 条记录`);
														
 
															+                result.data.success_records.forEach((record, index) => {
														
 
															+                    console.log(`记录 ${record.index}: ${record.data.name_zh} - ${record.message}`);
														
 
															+                });
														
 
															+            }
														
 
															+            
														
 
															+            // 处理失败记录
														
 
															+            if (result.data.failed_records.length > 0) {
														
 
															+                console.log(`失败 ${result.data.failed_count} 条记录`);
														
 
															+                result.data.failed_records.forEach((record, index) => {
														
 
															+                    console.error(`记录 ${record.index}: ${record.error}`);
														
 
															+                });
														
 
															+            }
														
 
															+            
														
 
															+            return result;
														
 
															+        } else {
														
 
															+            console.error('处理失败:', result.message);
														
 
															+            throw new Error(result.message);
														
 
															+        }
														
 
															+    } catch (error) {
														
 
															+        console.error('请求失败:', error);
														
 
															+        throw error;
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+// 使用示例
														
 
															+const talentData = [
														
 
															+    {
														
 
															+        name_zh: "张三",
														
 
															+        name_en: "Zhang San",
														
 
															+        title_zh: "总经理",
														
 
															+        title_en: "General Manager",
														
 
															+        hotel_zh: "北京万豪酒店",
														
 
															+        hotel_en: "Beijing Marriott Hotel",
														
 
															+        brand_group: "万豪",
														
 
															+        mobile: "13800138000",
														
 
															+        email: "zhangsan@example.com"
														
 
															+    },
														
 
															+    {
														
 
															+        name_zh: "李四",
														
 
															+        name_en: "Li Si",
														
 
															+        title_zh: "副总经理",
														
 
															+        title_en: "Deputy General Manager",
														
 
															+        hotel_zh: "上海希尔顿酒店",
														
 
															+        hotel_en: "Shanghai Hilton Hotel",
														
 
															+        brand_group: "希尔顿",
														
 
															+        mobile: "13900139000",
														
 
															+        email: "lisi@example.com"
														
 
															+    }
														
 
															+];
														
 
															+
														
 
															+const webContent = `
														
 
															+# 酒店人事任命公告
														
 
															+
														
 
															+## 重要人事变动
														
 
															+
														
 
															+1. **张三**被任命为北京万豪酒店总经理
														
 
															+   - 手机：13800138000
														
 
															+   - 邮箱：zhangsan@example.com
														
 
															+
														
 
															+2. **李四**被任命为上海希尔顿酒店副总经理
														
 
															+   - 手机：13900139000
														
 
															+   - 邮箱：lisi@example.com
														
 
															+`;
														
 
															+
														
 
															+// 调用函数
														
 
															+addWebpageTalent(talentData, webContent)
														
 
															+    .then(result => {
														
 
															+        console.log('操作完成:', result);
														
 
															+    })
														
 
															+    .catch(error => {
														
 
															+        console.error('操作失败:', error);
														
 
															+    });
														
 
															+```
														
 
															+
														
 
															+### jQuery
														
 
															+
														
 
															+```javascript
														
 
															+function addWebpageTalent(talentList, webMd) {
														
 
															+    return $.ajax({
														
 
															+        url: '/api/data-parse/add-webpage-talent',
														
 
															+        type: 'POST',
														
 
															+        contentType: 'application/json',
														
 
															+        data: JSON.stringify({
														
 
															+            talent_list: talentList,
														
 
															+            web_md: webMd
														
 
															+        }),
														
 
															+        success: function(result) {
														
 
															+            if (result.success) {
														
 
															+                console.log('处理成功:', result.message);
														
 
															+                
														
 
															+                // 显示处理结果
														
 
															+                if (result.data.success_count > 0) {
														
 
															+                    alert(`成功处理 ${result.data.success_count} 条记录`);
														
 
															+                }
														
 
															+                
														
 
															+                if (result.data.failed_count > 0) {
														
 
															+                    console.warn(`失败 ${result.data.failed_count} 条记录`);
														
 
															+                    result.data.failed_records.forEach(record => {
														
 
															+                        console.error(`记录 ${record.index}: ${record.error}`);
														
 
															+                    });
														
 
															+                }
														
 
															+            } else {
														
 
															+                alert('处理失败: ' + result.message);
														
 
															+            }
														
 
															+        },
														
 
															+        error: function(xhr, status, error) {
														
 
															+            console.error('请求失败:', error);
														
 
															+            alert('请求失败: ' + error);
														
 
															+        }
														
 
															+    });
														
 
															+}
														
 
															+
														
 
															+// 使用示例
														
 
															+const talentData = [
														
 
															+    {
														
 
															+        name_zh: "张三",
														
 
															+        title_zh: "总经理",
														
 
															+        hotel_zh: "北京万豪酒店",
														
 
															+        mobile: "13800138000"
														
 
															+    }
														
 
															+];
														
 
															+
														
 
															+const webContent = "# 人事任命公告\n\n张三被任命为总经理...";
														
 
															+
														
 
															+addWebpageTalent(talentData, webContent);
														
 
															+```
														
 
															+
														
 
															+### Vue.js
														
 
															+
														
 
															+```vue
														
 
															+<template>
														
 
															+  <div>
														
 
															+    <h3>添加网页人才信息</h3>
														
 
															+    
														
 
															+    <!-- 人才信息表单 -->
														
 
															+    <div v-for="(talent, index) in talentList" :key="index" class="talent-form">
														
 
															+      <h4>人才 {{ index + 1 }}</h4>
														
 
															+      <div class="form-group">
														
 
															+        <label>中文姓名 (必填):</label>
														
 
															+        <input v-model="talent.name_zh" type="text" required>
														
 
															+      </div>
														
 
															+      <div class="form-group">
														
 
															+        <label>英文姓名:</label>
														
 
															+        <input v-model="talent.name_en" type="text">
														
 
															+      </div>
														
 
															+      <div class="form-group">
														
 
															+        <label>中文职位:</label>
														
 
															+        <input v-model="talent.title_zh" type="text">
														
 
															+      </div>
														
 
															+      <div class="form-group">
														
 
															+        <label>酒店名称:</label>
														
 
															+        <input v-model="talent.hotel_zh" type="text">
														
 
															+      </div>
														
 
															+      <div class="form-group">
														
 
															+        <label>手机号码:</label>
														
 
															+        <input v-model="talent.mobile" type="text">
														
 
															+      </div>
														
 
															+      <div class="form-group">
														
 
															+        <label>电子邮箱:</label>
														
 
															+        <input v-model="talent.email" type="email">
														
 
															+      </div>
														
 
															+    </div>
														
 
															+    
														
 
															+    <!-- 网页内容 -->
														
 
															+    <div class="form-group">
														
 
															+      <label>网页内容 (Markdown格式):</label>
														
 
															+      <textarea v-model="webMd" rows="10" cols="50" required></textarea>
														
 
															+    </div>
														
 
															+    
														
 
															+    <!-- 操作按钮 -->
														
 
															+    <div class="actions">
														
 
															+      <button @click="addTalent">添加人才</button>
														
 
															+      <button @click="submitData" :disabled="loading">
														
 
															+        {{ loading ? '处理中...' : '提交数据' }}
														
 
															+      </button>
														
 
															+    </div>
														
 
															+    
														
 
															+    <!-- 结果显示 -->
														
 
															+    <div v-if="result" class="result">
														
 
															+      <h4>处理结果</h4>
														
 
															+      <p :class="result.success ? 'success' : 'error'">{{ result.message }}</p>
														
 
															+      
														
 
															+      <div v-if="result.data">
														
 
															+        <p>总记录数: {{ result.data.total_count }}</p>
														
 
															+        <p>成功: {{ result.data.success_count }}</p>
														
 
															+        <p>失败: {{ result.data.failed_count }}</p>
														
 
															+        
														
 
															+        <div v-if="result.data.failed_records.length > 0">
														
 
															+          <h5>失败记录:</h5>
														
 
															+          <ul>
														
 
															+            <li v-for="record in result.data.failed_records" :key="record.index">
														
 
															+              记录 {{ record.index }}: {{ record.error }}
														
 
															+            </li>
														
 
															+          </ul>
														
 
															+        </div>
														
 
															+      </div>
														
 
															+    </div>
														
 
															+  </div>
														
 
															+</template>
														
 
															+
														
 
															+<script>
														
 
															+export default {
														
 
															+  name: 'AddWebpageTalent',
														
 
															+  data() {
														
 
															+    return {
														
 
															+      talentList: [
														
 
															+        {
														
 
															+          name_zh: '',
														
 
															+          name_en: '',
														
 
															+          title_zh: '',
														
 
															+          title_en: '',
														
 
															+          hotel_zh: '',
														
 
															+          hotel_en: '',
														
 
															+          brand_group: '',
														
 
															+          mobile: '',
														
 
															+          email: '',
														
 
															+          phone: '',
														
 
															+          address_zh: '',
														
 
															+          address_en: ''
														
 
															+        }
														
 
															+      ],
														
 
															+      webMd: '',
														
 
															+      loading: false,
														
 
															+      result: null
														
 
															+    };
														
 
															+  },
														
 
															+  methods: {
														
 
															+    addTalent() {
														
 
															+      this.talentList.push({
														
 
															+        name_zh: '',
														
 
															+        name_en: '',
														
 
															+        title_zh: '',
														
 
															+        title_en: '',
														
 
															+        hotel_zh: '',
														
 
															+        hotel_en: '',
														
 
															+        brand_group: '',
														
 
															+        mobile: '',
														
 
															+        email: '',
														
 
															+        phone: '',
														
 
															+        address_zh: '',
														
 
															+        address_en: ''
														
 
															+      });
														
 
															+    },
														
 
															+    
														
 
															+    async submitData() {
														
 
															+      if (!this.validateData()) {
														
 
															+        return;
														
 
															+      }
														
 
															+      
														
 
															+      this.loading = true;
														
 
															+      this.result = null;
														
 
															+      
														
 
															+      try {
														
 
															+        const response = await fetch('/api/data-parse/add-webpage-talent', {
														
 
															+          method: 'POST',
														
 
															+          headers: {
														
 
															+            'Content-Type': 'application/json'
														
 
															+          },
														
 
															+          body: JSON.stringify({
														
 
															+            talent_list: this.talentList,
														
 
															+            web_md: this.webMd
														
 
															+          })
														
 
															+        });
														
 
															+        
														
 
															+        const result = await response.json();
														
 
															+        this.result = result;
														
 
															+        
														
 
															+        if (result.success) {
														
 
															+          this.$message.success(result.message);
														
 
															+        } else {
														
 
															+          this.$message.error(result.message);
														
 
															+        }
														
 
															+      } catch (error) {
														
 
															+        this.result = {
														
 
															+          success: false,
														
 
															+          message: '请求失败: ' + error.message
														
 
															+        };
														
 
															+        this.$message.error('请求失败: ' + error.message);
														
 
															+      } finally {
														
 
															+        this.loading = false;
														
 
															+      }
														
 
															+    },
														
 
															+    
														
 
															+    validateData() {
														
 
															+      // 验证必填字段
														
 
															+      for (let i = 0; i < this.talentList.length; i++) {
														
 
															+        if (!this.talentList[i].name_zh.trim()) {
														
 
															+          this.$message.error(`第 ${i + 1} 个人才的中文姓名不能为空`);
														
 
															+          return false;
														
 
															+        }
														
 
															+      }
														
 
															+      
														
 
															+      if (!this.webMd.trim()) {
														
 
															+        this.$message.error('网页内容不能为空');
														
 
															+        return false;
														
 
															+      }
														
 
															+      
														
 
															+      return true;
														
 
															+    }
														
 
															+  }
														
 
															+};
														
 
															+</script>
														
 
															+
														
 
															+<style scoped>
														
 
															+.talent-form {
														
 
															+  border: 1px solid #ddd;
														
 
															+  padding: 15px;
														
 
															+  margin: 10px 0;
														
 
															+  border-radius: 5px;
														
 
															+}
														
 
															+
														
 
															+.form-group {
														
 
															+  margin: 10px 0;
														
 
															+}
														
 
															+
														
 
															+.form-group label {
														
 
															+  display: inline-block;
														
 
															+  width: 120px;
														
 
															+  font-weight: bold;
														
 
															+}
														
 
															+
														
 
															+.form-group input,
														
 
															+.form-group textarea {
														
 
															+  width: 300px;
														
 
															+  padding: 5px;
														
 
															+  border: 1px solid #ccc;
														
 
															+  border-radius: 3px;
														
 
															+}
														
 
															+
														
 
															+.actions {
														
 
															+  margin: 20px 0;
														
 
															+}
														
 
															+
														
 
															+.actions button {
														
 
															+  margin-right: 10px;
														
 
															+  padding: 10px 20px;
														
 
															+  background: #007bff;
														
 
															+  color: white;
														
 
															+  border: none;
														
 
															+  border-radius: 5px;
														
 
															+  cursor: pointer;
														
 
															+}
														
 
															+
														
 
															+.actions button:disabled {
														
 
															+  background: #6c757d;
														
 
															+  cursor: not-allowed;
														
 
															+}
														
 
															+
														
 
															+.result {
														
 
															+  margin: 20px 0;
														
 
															+  padding: 15px;
														
 
															+  border: 1px solid #ddd;
														
 
															+  border-radius: 5px;
														
 
															+}
														
 
															+
														
 
															+.success {
														
 
															+  color: #28a745;
														
 
															+}
														
 
															+
														
 
															+.error {
														
 
															+  color: #dc3545;
														
 
															+}
														
 
															+</style>
														
 
															+```
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 错误处理
														
 
															+
														
 
															+### 常见错误及解决方案
														
 
															+
														
 
															+#### 1. 参数验证错误
														
 
															+**错误**: `talent_list参数必须是非空数组`
														
 
															+**解决**: 确保talent_list是一个非空数组
														
 
															+
														
 
															+#### 2. 缺少必填字段
														
 
															+**错误**: `第X个记录缺少name_zh字段`
														
 
															+**解决**: 确保每个人才记录都包含name_zh字段
														
 
															+
														
 
															+#### 3. MinIO上传失败
														
 
															+**错误**: `上传网页内容到MinIO失败`
														
 
															+**解决**: 检查MinIO服务状态和配置
														
 
															+
														
 
															+#### 4. 数据库错误
														
 
															+**错误**: `数据库操作失败`
														
 
															+**解决**: 检查数据库连接和表结构
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 注意事项
														
 
															+
														
 
															+1. **数据完整性**: 确保talent_list中每个对象都包含name_zh字段
														
 
															+2. **内容大小**: web_md内容不应过大，建议控制在合理范围内
														
 
															+3. **并发处理**: 该接口支持批量处理，但建议控制单次请求的记录数量
														
 
															+4. **重复检查**: 系统会自动检查重复记录并相应处理
														
 
															+5. **存储路径**: 网页内容会自动保存到MinIO的`webpage_talent/`目录下
														
 
															+6. **数据溯源**: origin_source字段保存了完整的数据来源信息，便于后续追溯
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 版本信息
														
 
															+
														
 
															+- **API版本**: v1.0
														
 
															+- **文档版本**: 1.0.0
														
 
															+- **创建日期**: 2024年
														
 
															+- **最后更新**: 2024年
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 联系支持
														
 
															+
														
 
															+如果在使用过程中遇到问题，请联系技术支持团队。 
														
--- a/app/api/data_parse/routes.py
+++ b/app/api/data_parse/routes.py
@@ -4,7 +4,7 @@ from app.core.data_parse.parse import update_business_card, get_business_cards,
 
															 # 导入新的名片图片解析函数和添加名片函数
														
 
															 from app.core.data_parse.parse_card import process_business_card_image, add_business_card, delete_business_card
														
 
															 # 导入网页文本解析函数
														
 
															-from app.core.data_parse.parse_web import process_webpage_with_QWen
														
 
															+from app.core.data_parse.parse_web import process_webpage_with_QWen, add_webpage_talent
														
 
															 from app.config.config import DevelopmentConfig, ProductionConfig
														
 
															 import logging
														
 
															 import boto3
														
@@ -1603,3 +1603,188 @@ def webpage_parse_route():
 
															             'data': None
														
 
															         }), 500
														
 
															+# 添加网页人才信息接口
														
 
															+@bp.route('/add-webpage-talent', methods=['POST'])
														
 
															+def add_webpage_talent_route():
														
 
															+    """
														
 
															+    添加网页人才信息的API接口，包括保存网页内容和创建名片记录
														
 
															+    
														
 
															+    请求参数:
														
 
															+        - talent_list: 人才信息列表，每个item包含业务卡片格式的数据 (JSON数组)
														
 
															+        - web_md: 网页markdown文本内容 (JSON字符串)
														
 
															+        
														
 
															+    请求体示例:
														
 
															+        {
														
 
															+            "talent_list": [
														
 
															+                {
														
 
															+                    "name_zh": "张三",
														
 
															+                    "name_en": "Zhang San",
														
 
															+                    "title_zh": "总经理",
														
 
															+                    "title_en": "General Manager",
														
 
															+                    "hotel_zh": "北京万豪酒店",
														
 
															+                    "hotel_en": "Beijing Marriott Hotel",
														
 
															+                    "brand_group": "万豪",
														
 
															+                    "mobile": "13800138000",
														
 
															+                    "email": "zhangsan@example.com"
														
 
															+                },
														
 
															+                {
														
 
															+                    "name_zh": "李四",
														
 
															+                    "name_en": "Li Si",
														
 
															+                    "title_zh": "市场总监",
														
 
															+                    "title_en": "Marketing Director",
														
 
															+                    "hotel_zh": "上海希尔顿酒店",
														
 
															+                    "hotel_en": "Shanghai Hilton Hotel",
														
 
															+                    "brand_group": "希尔顿",
														
 
															+                    "mobile": "13900139000",
														
 
															+                    "email": "lisi@example.com"
														
 
															+                }
														
 
															+            ],
														
 
															+            "web_md": "# 人事任命公告\n\n**1** 张三被任命为北京万豪酒店总经理...\n\n**2** 李四被任命为上海希尔顿酒店市场总监..."
														
 
															+        }
														
 
															+        
														
 
															+    返回:
														
 
															+        - JSON: 包含处理结果，包括成功和失败的记录统计
														
 
															+        
														
 
															+    功能说明:
														
 
															+        - 将网页markdown内容保存到MinIO
														
 
															+        - 循环处理talent_list中的每个人才记录
														
 
															+        - 为每个人才创建business_card记录
														
 
															+        - 使用与名片上传相同的重复检查逻辑
														
 
															+        - 在business_card记录的updated_by字段中记录MinIO路径
														
 
															+        
														
 
															+    状态码:
														
 
															+        - 200: 所有记录处理成功
														
 
															+        - 206: 部分记录处理成功
														
 
															+        - 400: 请求参数错误
														
 
															+        - 500: 处理失败
														
 
															+    """
														
 
															+    try:
														
 
															+        # 检查请求是否为 JSON 格式
														
 
															+        if not request.is_json:
														
 
															+            return jsonify({
														
 
															+                'success': False,
														
 
															+                'message': '请求必须是 JSON 格式',
														
 
															+                'data': None
														
 
															+            }), 400
														
 
															+        
														
 
															+        # 获取请求数据
														
 
															+        data = request.get_json()
														
 
															+        
														
 
															+        # 检查是否提供了 talent_list 参数
														
 
															+        if 'talent_list' not in data:
														
 
															+            return jsonify({
														
 
															+                'success': False,
														
 
															+                'message': '缺少必填参数: talent_list',
														
 
															+                'data': None
														
 
															+            }), 400
														
 
															+        
														
 
															+        # 检查是否提供了 web_md 参数
														
 
															+        if 'web_md' not in data:
														
 
															+            return jsonify({
														
 
															+                'success': False,
														
 
															+                'message': '缺少必填参数: web_md',
														
 
															+                'data': None
														
 
															+            }), 400
														
 
															+        
														
 
															+        talent_list = data['talent_list']
														
 
															+        web_md = data['web_md']
														
 
															+        
														
 
															+        # 验证 talent_list 是否为数组
														
 
															+        if not isinstance(talent_list, list):
														
 
															+            return jsonify({
														
 
															+                'success': False,
														
 
															+                'message': 'talent_list 必须是数组类型',
														
 
															+                'data': None
														
 
															+            }), 400
														
 
															+        
														
 
															+        # 验证 web_md 是否为字符串
														
 
															+        if not isinstance(web_md, str):
														
 
															+            return jsonify({
														
 
															+                'success': False,
														
 
															+                'message': 'web_md 必须是字符串类型',
														
 
															+                'data': None
														
 
															+            }), 400
														
 
															+        
														
 
															+        # 验证数组不能为空
														
 
															+        if len(talent_list) == 0:
														
 
															+            return jsonify({
														
 
															+                'success': False,
														
 
															+                'message': 'talent_list 不能为空数组',
														
 
															+                'data': None
														
 
															+            }), 400
														
 
															+        
														
 
															+        # 验证文本内容不能为空
														
 
															+        if not web_md.strip():
														
 
															+            return jsonify({
														
 
															+                'success': False,
														
 
															+                'message': 'web_md 内容不能为空',
														
 
															+                'data': None
														
 
															+            }), 400
														
 
															+        
														
 
															+        # 验证文本长度（防止过长的文本）
														
 
															+        if len(web_md) > 100000:  # 限制最大100KB
														
 
															+            return jsonify({
														
 
															+                'success': False,
														
 
															+                'message': 'web_md 内容过长，最大支持100KB',
														
 
															+                'data': None
														
 
															+            }), 400
														
 
															+        
														
 
															+        # 验证数组长度（防止过多记录）
														
 
															+        if len(talent_list) > 50:  # 限制最大50条记录
														
 
															+            return jsonify({
														
 
															+                'success': False,
														
 
															+                'message': 'talent_list 记录过多，最大支持50条记录',
														
 
															+                'data': None
														
 
															+            }), 400
														
 
															+        
														
 
															+        # 基本的数据格式验证
														
 
															+        for index, talent_data in enumerate(talent_list):
														
 
															+            if not isinstance(talent_data, dict):
														
 
															+                return jsonify({
														
 
															+                    'success': False,
														
 
															+                    'message': f'talent_list 第{index + 1}项必须是对象类型',
														
 
															+                    'data': None
														
 
															+                }), 400
														
 
															+            
														
 
															+            # 检查必要的字段
														
 
															+            if not talent_data.get('name_zh'):
														
 
															+                return jsonify({
														
 
															+                    'success': False,
														
 
															+                    'message': f'talent_list 第{index + 1}项缺少必填字段: name_zh',
														
 
															+                    'data': None
														
 
															+                }), 400
														
 
															+        
														
 
															+        # 记录处理请求
														
 
															+        logger.info(f"开始处理网页人才信息，人才数量: {len(talent_list)}, 网页内容长度: {len(web_md)} 字符")
														
 
															+        
														
 
															+        # 调用网页人才处理函数
														
 
															+        result = add_webpage_talent(talent_list, web_md)
														
 
															+        
														
 
															+        # 根据处理结果设置HTTP状态码
														
 
															+        if result['success']:
														
 
															+            if result['code'] == 200:
														
 
															+                status_code = 200  # 全部成功
														
 
															+            elif result['code'] == 206:
														
 
															+                status_code = 206  # 部分成功
														
 
															+            else:
														
 
															+                status_code = 200  # 默认成功
														
 
															+        else:
														
 
															+            if result['code'] == 400:
														
 
															+                status_code = 400  # 参数错误
														
 
															+            else:
														
 
															+                status_code = 500  # 服务器错误
														
 
															+        
														
 
															+        return jsonify(result), status_code
														
 
															+        
														
 
															+    except Exception as e:
														
 
															+        # 记录错误日志
														
 
															+        error_msg = f"添加网页人才信息失败: {str(e)}"
														
 
															+        logger.error(error_msg, exc_info=True)
														
 
															+        
														
 
															+        # 返回错误响应
														
 
															+        return jsonify({
														
 
															+            'success': False,
														
 
															+            'message': error_msg,
														
 
															+            'data': None
														
 
															+        }), 500
														
 
															+
														
--- a/app/core/data_parse/parse.py
+++ b/app/core/data_parse/parse.py
@@ -45,6 +45,7 @@ class BusinessCard(db.Model):
 
															     image_path = db.Column(db.String(255))  # MinIO中存储的路径
														
 
															     career_path = db.Column(db.JSON)  # 职业轨迹，JSON格式
														
 
															     brand_group = db.Column(db.String(200))  # 品牌组合
														
 
															+    origin_source = db.Column(db.JSON)  # 原始资料记录，JSON格式
														
 
															     created_at = db.Column(db.DateTime, default=datetime.now, nullable=False)
														
 
															     updated_at = db.Column(db.DateTime, onupdate=datetime.now)
														
 
															     updated_by = db.Column(db.String(50))
														
@@ -77,6 +78,7 @@ class BusinessCard(db.Model):
 
															             'image_path': self.image_path,
														
 
															             'career_path': self.career_path,
														
 
															             'brand_group': self.brand_group,
														
 
															+            'origin_source': self.origin_source,
														
 
															             'created_at': self.created_at.strftime('%Y-%m-%d %H:%M:%S') if self.created_at else None,
														
 
															             'updated_at': self.updated_at.strftime('%Y-%m-%d %H:%M:%S') if self.updated_at else None,
														
 
															             'updated_by': self.updated_by,
														
@@ -421,6 +423,7 @@ def create_main_card_with_duplicates(extracted_data, minio_path, suspected_dupli
 
															             image_path=minio_path,  # 最新的图片路径
														
 
															             career_path=initial_career_path,  # 包含图片路径的职业轨迹
														
 
															             brand_group=extracted_data.get('brand_group', ''),
														
 
															+            origin_source=extracted_data.get('origin_source'),  # 原始资料记录
														
 
															             status='active',
														
 
															             updated_by='system'
														
 
															         )
														
@@ -1056,6 +1059,7 @@ def update_business_card(card_id, data):
 
															         card.residence = data.get('residence', card.residence)
														
 
															         card.career_path = data.get('career_path', card.career_path)  # 更新职业轨迹
														
 
															         card.brand_group = data.get('brand_group', card.brand_group)  # 更新品牌组合
														
 
															+        card.origin_source = data.get('origin_source', card.origin_source)  # 更新原始资料记录
														
 
															         card.updated_by = data.get('updated_by', 'user')  # 可以根据实际情况修改为当前用户
														
 
															         # 保存更新
														
--- a/app/core/data_parse/parse_card.py
+++ b/app/core/data_parse/parse_card.py
@@ -250,6 +250,7 @@ def add_business_card(card_data, image_file=None):
 
															                 existing_card.residence = card_data.get('residence', existing_card.residence)
														
 
															                 existing_card.brand_group = card_data.get('brand_group', existing_card.brand_group)
														
 
															                 existing_card.image_path = minio_path  # 更新为最新的图片路径
														
 
															+                existing_card.origin_source = card_data.get('origin_source', existing_card.origin_source)  # 更新原始资料记录
														
 
															                 existing_card.updated_by = 'system'
														
 
															                 # 更新职业轨迹，传递图片路径
														
@@ -342,6 +343,7 @@ def add_business_card(card_data, image_file=None):
 
															                     image_path=minio_path,  # 最新的图片路径
														
 
															                     career_path=initial_career_path,  # 包含图片路径的职业轨迹
														
 
															                     brand_group=card_data.get('brand_group', ''),
														
 
															+                    origin_source=card_data.get('origin_source'),  # 原始资料记录
														
 
															                     status='active',
														
 
															                     updated_by='system'
														
 
															                 )
														
--- a/app/core/data_parse/parse_web.py
+++ b/app/core/data_parse/parse_web.py
@@ -2,37 +2,480 @@ import os
 
															 import json
														
 
															 import logging
														
 
															 import re
														
 
															+import uuid
														
 
															+import boto3
														
 
															+from botocore.config import Config
														
 
															+from io import BytesIO
														
 
															 from datetime import datetime
														
 
															 from openai import OpenAI
														
 
															+# 导入配置和业务逻辑模块
														
 
															+from app.config.config import DevelopmentConfig, ProductionConfig
														
 
															+from app.core.data_parse.parse import (
														
 
															+    BusinessCard, check_duplicate_business_card, 
														
 
															+    create_main_card_with_duplicates, update_career_path,
														
 
															+    normalize_mobile_numbers
														
 
															+)
														
 
															+from app import db
														
 
															-def extract_json_from_text(text):
														
 
															+# 使用配置变量,缺省认为在生产环境运行
														
 
															+config = ProductionConfig()
														
 
															+# 使用配置变量
														
 
															+minio_url = f"{'https' if config.MINIO_SECURE else 'http'}://{config.MINIO_HOST}"
														
 
															+minio_access_key = config.MINIO_USER
														
 
															+minio_secret_key = config.MINIO_PASSWORD
														
 
															+minio_bucket = config.MINIO_BUCKET
														
 
															+use_ssl = config.MINIO_SECURE
														
 
															+
														
 
															+
														
 
															+def get_minio_client():
														
 
															+    """获取MinIO客户端连接"""
														
 
															+    try:
														
 
															+        logging.info(f"尝试连接MinIO服务器: {minio_url}")
														
 
															+        
														
 
															+        minio_client = boto3.client(
														
 
															+            's3',
														
 
															+            endpoint_url=minio_url,
														
 
															+            aws_access_key_id=minio_access_key,
														
 
															+            aws_secret_access_key=minio_secret_key,
														
 
															+            config=Config(
														
 
															+                signature_version='s3v4',
														
 
															+                retries={'max_attempts': 3, 'mode': 'standard'},
														
 
															+                connect_timeout=10,
														
 
															+                read_timeout=30
														
 
															+            )
														
 
															+        )
														
 
															+        
														
 
															+        # 确保存储桶存在
														
 
															+        buckets = minio_client.list_buckets()
														
 
															+        bucket_names = [bucket['Name'] for bucket in buckets.get('Buckets', [])]
														
 
															+        logging.info(f"成功连接到MinIO服务器，现有存储桶: {bucket_names}")
														
 
															+        
														
 
															+        if minio_bucket not in bucket_names:
														
 
															+            logging.info(f"创建存储桶: {minio_bucket}")
														
 
															+            minio_client.create_bucket(Bucket=minio_bucket)
														
 
															+            
														
 
															+        return minio_client
														
 
															+    except Exception as e:
														
 
															+        logging.error(f"MinIO连接错误: {str(e)}")
														
 
															+        return None
														
 
															+
														
 
															+
														
 
															+def upload_md_to_minio(web_md, filename=None):
														
 
															     """
														
 
															-    从文本中提取JSON部分
														
 
															+    将markdown文本上传到MinIO
														
 
															     Args:
														
 
															-        text (str): 包含JSON的文本
														
 
															+        web_md (str): markdown格式的文本内容
														
 
															+        filename (str, optional): 指定的文件名，如果不提供则自动生成
														
 
															     Returns:
														
 
															-        str: 提取的JSON字符串
														
 
															+        str: MinIO中的文件路径，如果上传失败返回None
														
 
															     """
														
 
															-    # 尝试找到最外层的花括号对
														
 
															-    start_idx = text.find('{')
														
 
															-    if start_idx == -1:
														
 
															-        return "{}"
														
 
															+    try:
														
 
															+        # 生成文件名
														
 
															+        if not filename:
														
 
															+            timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
														
 
															+            unique_id = uuid.uuid4().hex[:8]
														
 
															+            filename = f"webpage_talent_{timestamp}_{unique_id}.md"
														
 
															+        elif not filename.endswith('.md'):
														
 
															+            filename += '.md'
														
 
															+        
														
 
															+        # 获取MinIO客户端
														
 
															+        minio_client = get_minio_client()
														
 
															+        if not minio_client:
														
 
															+            logging.error("无法获取MinIO客户端")
														
 
															+            return None
														
 
															+        
														
 
															+        # 将文本转换为字节流
														
 
															+        md_bytes = web_md.encode('utf-8')
														
 
															+        md_stream = BytesIO(md_bytes)
														
 
															+        
														
 
															+        # 上传到MinIO
														
 
															+        minio_path = f"webpage_talent/{filename}"
														
 
															+        logging.info(f"开始上传MD文件到MinIO: {minio_path}")
														
 
															+        
														
 
															+        minio_client.put_object(
														
 
															+            Bucket=minio_bucket,
														
 
															+            Key=minio_path,
														
 
															+            Body=md_stream,
														
 
															+            ContentType='text/markdown',
														
 
															+            Metadata={
														
 
															+                'original_filename': filename,
														
 
															+                'upload_time': datetime.now().isoformat(),
														
 
															+                'content_type': 'webpage_talent_md'
														
 
															+            }
														
 
															+        )
														
 
															+        
														
 
															+        logging.info(f"MD文件成功上传到MinIO: {minio_path}")
														
 
															+        return minio_path
														
 
															+        
														
 
															+    except Exception as e:
														
 
															+        logging.error(f"上传MD文件到MinIO失败: {str(e)}", exc_info=True)
														
 
															+        return None
														
 
															+
														
 
															+
														
 
															+def add_webpage_talent(talent_list, web_md):
														
 
															+    """
														
 
															+    添加网页人才信息，包括保存网页内容和创建名片记录
														
 
															-    # 使用简单的括号匹配算法找到对应的闭合括号
														
 
															-    count = 0
														
 
															-    for i in range(start_idx, len(text)):
														
 
															-        if text[i] == '{':
														
 
															-            count += 1
														
 
															-        elif text[i] == '}':
														
 
															-            count -= 1
														
 
															-            if count == 0:
														
 
															-                return text[start_idx:i+1]
														
 
															+    Args:
														
 
															+        talent_list (list): 人才信息列表，每个item包含业务卡片格式的数据
														
 
															+        web_md (str): 网页markdown文本内容
														
 
															+        
														
 
															+    Returns:
														
 
															+        dict: 处理结果，包含成功和失败的记录统计
														
 
															+    """
														
 
															+    try:
														
 
															+        # 参数验证
														
 
															+        if not talent_list or not isinstance(talent_list, list):
														
 
															+            return {
														
 
															+                'code': 400,
														
 
															+                'success': False,
														
 
															+                'message': 'talent_list参数必须是非空数组',
														
 
															+                'data': None
														
 
															+            }
														
 
															+        
														
 
															+        if not web_md or not isinstance(web_md, str):
														
 
															+            return {
														
 
															+                'code': 400,
														
 
															+                'success': False,
														
 
															+                'message': 'web_md参数必须是非空字符串',
														
 
															+                'data': None
														
 
															+            }
														
 
															+        
														
 
															+        # 上传markdown文件到MinIO
														
 
															+        logging.info("开始上传网页内容到MinIO")
														
 
															+        minio_md_path = upload_md_to_minio(web_md)
														
 
															+        
														
 
															+        if not minio_md_path:
														
 
															+            return {
														
 
															+                'code': 500,
														
 
															+                'success': False,
														
 
															+                'message': '上传网页内容到MinIO失败',
														
 
															+                'data': None
														
 
															+            }
														
 
															+        
														
 
															+        # 处理结果统计
														
 
															+        results = {
														
 
															+            'total_count': len(talent_list),
														
 
															+            'success_count': 0,
														
 
															+            'failed_count': 0,
														
 
															+            'success_records': [],
														
 
															+            'failed_records': [],
														
 
															+            'minio_md_path': minio_md_path
														
 
															+        }
														
 
															+        
														
 
															+        # 循环处理每个人才记录
														
 
															+        for index, talent_data in enumerate(talent_list):
														
 
															+            try:
														
 
															+                logging.info(f"开始处理第{index + 1}个人才记录: {talent_data.get('name_zh', 'Unknown')}")
														
 
															+                
														
 
															+                # 验证必要字段
														
 
															+                if not talent_data.get('name_zh'):
														
 
															+                    error_msg = f"第{index + 1}个记录缺少name_zh字段"
														
 
															+                    logging.warning(error_msg)
														
 
															+                    results['failed_records'].append({
														
 
															+                        'index': index + 1,
														
 
															+                        'data': talent_data,
														
 
															+                        'error': error_msg
														
 
															+                    })
														
 
															+                    results['failed_count'] += 1
														
 
															+                    continue
														
 
															+                
														
 
															+                # 设置origin_source为原始资料记录
														
 
															+                talent_data['origin_source'] = {
														
 
															+                    'type': 'webpage_talent',
														
 
															+                    'minio_path': minio_md_path,
														
 
															+                    'source_date': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
														
 
															+                    'web_md_content': web_md[:1000]  # 保存部分网页内容作为参考
														
 
															+                }
														
 
															+                
														
 
															+                # 处理business_card记录
														
 
															+                card_result = process_single_talent_card(talent_data, minio_md_path)
														
 
															+                
														
 
															+                if card_result['success']:
														
 
															+                    results['success_records'].append({
														
 
															+                        'index': index + 1,
														
 
															+                        'data': card_result['data'],
														
 
															+                        'message': card_result['message']
														
 
															+                    })
														
 
															+                    results['success_count'] += 1
														
 
															+                    logging.info(f"成功处理第{index + 1}个人才记录")
														
 
															+                else:
														
 
															+                    results['failed_records'].append({
														
 
															+                        'index': index + 1,
														
 
															+                        'data': talent_data,
														
 
															+                        'error': card_result['message']
														
 
															+                    })
														
 
															+                    results['failed_count'] += 1
														
 
															+                    logging.error(f"处理第{index + 1}个人才记录失败: {card_result['message']}")
														
 
															+                    
														
 
															+            except Exception as e:
														
 
															+                error_msg = f"处理第{index + 1}个人才记录时发生异常: {str(e)}"
														
 
															+                logging.error(error_msg, exc_info=True)
														
 
															+                results['failed_records'].append({
														
 
															+                    'index': index + 1,
														
 
															+                    'data': talent_data,
														
 
															+                    'error': error_msg
														
 
															+                })
														
 
															+                results['failed_count'] += 1
														
 
															+        
														
 
															+        # 生成最终结果
														
 
															+        if results['success_count'] == results['total_count']:
														
 
															+            return {
														
 
															+                'code': 200,
														
 
															+                'success': True,
														
 
															+                'message': f'所有{results["total_count"]}条人才记录处理成功',
														
 
															+                'data': results
														
 
															+            }
														
 
															+        elif results['success_count'] > 0:
														
 
															+            return {
														
 
															+                'code': 206,  # Partial Content
														
 
															+                'success': True,
														
 
															+                'message': f'部分处理成功：{results["success_count"]}/{results["total_count"]}条记录成功',
														
 
															+                'data': results
														
 
															+            }
														
 
															+        else:
														
 
															+            return {
														
 
															+                'code': 500,
														
 
															+                'success': False,
														
 
															+                'message': f'所有{results["total_count"]}条人才记录处理失败',
														
 
															+                'data': results
														
 
															+            }
														
 
															+            
														
 
															+    except Exception as e:
														
 
															+        error_msg = f"add_webpage_talent函数执行失败: {str(e)}"
														
 
															+        logging.error(error_msg, exc_info=True)
														
 
															+        return {
														
 
															+            'code': 500,
														
 
															+            'success': False,
														
 
															+            'message': error_msg,
														
 
															+            'data': None
														
 
															+        }
														
 
															+
														
 
															+
														
 
															+def process_single_talent_card(talent_data, minio_md_path):
														
 
															+    """
														
 
															+    处理单个人才的名片记录创建
														
 
															-    # 如果没有找到闭合括号，返回从开始位置到文本结尾
														
 
															-    return text[start_idx:]
														
 
															+    Args:
														
 
															+        talent_data (dict): 人才信息数据
														
 
															+        minio_md_path (str): MinIO中网页内容的路径
														
 
															+        
														
 
															+    Returns:
														
 
															+        dict: 处理结果
														
 
															+    """
														
 
															+    try:
														
 
															+        # 检查重复记录
														
 
															+        try:
														
 
															+            duplicate_check = check_duplicate_business_card(talent_data)
														
 
															+            logging.info(f"重复记录检查结果: {duplicate_check['reason']}")
														
 
															+        except Exception as e:
														
 
															+            logging.error(f"重复记录检查失败: {str(e)}", exc_info=True)
														
 
															+            # 如果检查失败，默认创建新记录
														
 
															+            duplicate_check = {
														
 
															+                'is_duplicate': False,
														
 
															+                'action': 'create_new',
														
 
															+                'existing_card': None,
														
 
															+                'reason': f'重复检查失败，创建新记录: {str(e)}'
														
 
															+            }
														
 
															+        
														
 
															+        # 根据重复检查结果执行不同操作
														
 
															+        if duplicate_check['action'] == 'update':
														
 
															+            # 更新现有记录
														
 
															+            existing_card = duplicate_check['existing_card']
														
 
															+            
														
 
															+            # 更新基本信息
														
 
															+            existing_card.name_en = talent_data.get('name_en', existing_card.name_en)
														
 
															+            existing_card.title_zh = talent_data.get('title_zh', existing_card.title_zh)
														
 
															+            existing_card.title_en = talent_data.get('title_en', existing_card.title_en)
														
 
															+            
														
 
															+            # 处理手机号码字段，支持多个手机号码
														
 
															+            if 'mobile' in talent_data:
														
 
															+                new_mobile = normalize_mobile_numbers(talent_data.get('mobile', ''))
														
 
															+                if new_mobile:
														
 
															+                    # 合并手机号码
														
 
															+                    from app.core.data_parse.parse import merge_mobile_numbers
														
 
															+                    existing_card.mobile = merge_mobile_numbers(existing_card.mobile, new_mobile)
														
 
															+                elif talent_data.get('mobile') == '':
														
 
															+                    existing_card.mobile = ''
														
 
															+            
														
 
															+            existing_card.phone = talent_data.get('phone', existing_card.phone)
														
 
															+            existing_card.email = talent_data.get('email', existing_card.email)
														
 
															+            existing_card.hotel_zh = talent_data.get('hotel_zh', existing_card.hotel_zh)
														
 
															+            existing_card.hotel_en = talent_data.get('hotel_en', existing_card.hotel_en)
														
 
															+            existing_card.address_zh = talent_data.get('address_zh', existing_card.address_zh)
														
 
															+            existing_card.address_en = talent_data.get('address_en', existing_card.address_en)
														
 
															+            existing_card.postal_code_zh = talent_data.get('postal_code_zh', existing_card.postal_code_zh)
														
 
															+            existing_card.postal_code_en = talent_data.get('postal_code_en', existing_card.postal_code_en)
														
 
															+            existing_card.brand_zh = talent_data.get('brand_zh', existing_card.brand_zh)
														
 
															+            existing_card.brand_en = talent_data.get('brand_en', existing_card.brand_en)
														
 
															+            existing_card.affiliation_zh = talent_data.get('affiliation_zh', existing_card.affiliation_zh)
														
 
															+            existing_card.affiliation_en = talent_data.get('affiliation_en', existing_card.affiliation_en)
														
 
															+            
														
 
															+            # 处理生日字段
														
 
															+            if talent_data.get('birthday'):
														
 
															+                try:
														
 
															+                    existing_card.birthday = datetime.strptime(talent_data.get('birthday'), '%Y-%m-%d').date()
														
 
															+                except ValueError:
														
 
															+                    # 如果日期格式不正确，保持原值
														
 
															+                    pass
														
 
															+            
														
 
															+            # 处理年龄字段
														
 
															+            if 'age' in talent_data:
														
 
															+                try:
														
 
															+                    if talent_data['age'] is not None and str(talent_data['age']).strip():
														
 
															+                        age_value = int(talent_data['age'])
														
 
															+                        if 0 < age_value <= 150:  # 合理的年龄范围检查
														
 
															+                            existing_card.age = age_value
														
 
															+                    else:
														
 
															+                        existing_card.age = None
														
 
															+                except (ValueError, TypeError):
														
 
															+                    # 如果年龄格式不正确，保持原值
														
 
															+                    pass
														
 
															+            
														
 
															+            existing_card.native_place = talent_data.get('native_place', existing_card.native_place)
														
 
															+            existing_card.residence = talent_data.get('residence', existing_card.residence)
														
 
															+            existing_card.brand_group = talent_data.get('brand_group', existing_card.brand_group)
														
 
															+            existing_card.updated_by = 'webpage_talent_system'
														
 
															+            
														
 
															+            existing_card.updated_by = 'webpage_talent_system'
														
 
															+            
														
 
															+            # 设置origin_source为原始资料记录
														
 
															+            existing_card.origin_source = {
														
 
															+                'type': 'webpage_talent',
														
 
															+                'minio_path': minio_md_path,
														
 
															+                'source_date': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
														
 
															+                'talent_data': talent_data
														
 
															+            }
														
 
															+            
														
 
															+            # 更新职业轨迹，传递网页来源信息
														
 
															+            existing_card.career_path = update_career_path(existing_card, talent_data, image_path='')
														
 
															+            
														
 
															+            db.session.commit()
														
 
															+            
														
 
															+            logging.info(f"已更新现有名片记录，ID: {existing_card.id}")
														
 
															+            
														
 
															+            return {
														
 
															+                'success': True,
														
 
															+                'message': f'名片信息已更新。{duplicate_check["reason"]}',
														
 
															+                'data': existing_card.to_dict()
														
 
															+            }
														
 
															+            
														
 
															+        elif duplicate_check['action'] == 'create_with_duplicates':
														
 
															+            # 创建新记录作为主记录，并保存疑似重复记录信息
														
 
															+            main_card, duplicate_record = create_main_card_with_duplicates(
														
 
															+                talent_data, 
														
 
															+                None,  # 网页提取没有图片路径
														
 
															+                duplicate_check['suspected_duplicates'],
														
 
															+                duplicate_check['reason']
														
 
															+            )
														
 
															+            
														
 
															+            main_card.updated_by = 'webpage_talent_system'
														
 
															+            
														
 
															+            # 设置origin_source为原始资料记录
														
 
															+            main_card.origin_source = {
														
 
															+                'type': 'webpage_talent',
														
 
															+                'minio_path': minio_md_path,
														
 
															+                'source_date': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
														
 
															+                'talent_data': talent_data
														
 
															+            }
														
 
															+            db.session.commit()
														
 
															+            
														
 
															+            return {
														
 
															+                'success': True,
														
 
															+                'message': f'创建新记录成功，发现疑似重复记录待处理。{duplicate_check["reason"]}',
														
 
															+                'data': {
														
 
															+                    'main_card': main_card.to_dict(),
														
 
															+                    'duplicate_record_id': duplicate_record.id,
														
 
															+                    'suspected_duplicates_count': len(duplicate_check['suspected_duplicates']),
														
 
															+                    'processing_status': 'pending',
														
 
															+                    'duplicate_reason': duplicate_record.duplicate_reason,
														
 
															+                    'created_at': duplicate_record.created_at.strftime('%Y-%m-%d %H:%M:%S')
														
 
															+                }
														
 
															+            }
														
 
															+            
														
 
															+        else:
														
 
															+            # 创建新记录
														
 
															+            # 准备初始职业轨迹
														
 
															+            initial_entry = {
														
 
															+                'date': datetime.now().strftime('%Y-%m-%d'),
														
 
															+                'hotel_zh': talent_data.get('hotel_zh', ''),
														
 
															+                'hotel_en': talent_data.get('hotel_en', ''),
														
 
															+                'title_zh': talent_data.get('title_zh', ''),
														
 
															+                'title_en': talent_data.get('title_en', ''),
														
 
															+                'image_path': '',  # 网页提取没有图片路径
														
 
															+                'source': 'webpage_extraction'
														
 
															+            }
														
 
															+            initial_career_path = [initial_entry]
														
 
															+            
														
 
															+            # 处理年龄字段，确保是有效的整数或None
														
 
															+            age_value = None
														
 
															+            if talent_data.get('age'):
														
 
															+                try:
														
 
															+                    age_value = int(talent_data.get('age'))
														
 
															+                    if age_value <= 0 or age_value > 150:  # 合理的年龄范围检查
														
 
															+                        age_value = None
														
 
															+                except (ValueError, TypeError):
														
 
															+                    age_value = None
														
 
															+            
														
 
															+            business_card = BusinessCard(
														
 
															+                name_zh=talent_data.get('name_zh', ''),
														
 
															+                name_en=talent_data.get('name_en', ''),
														
 
															+                title_zh=talent_data.get('title_zh', ''),
														
 
															+                title_en=talent_data.get('title_en', ''),
														
 
															+                mobile=normalize_mobile_numbers(talent_data.get('mobile', '')),
														
 
															+                phone=talent_data.get('phone', ''),
														
 
															+                email=talent_data.get('email', ''),
														
 
															+                hotel_zh=talent_data.get('hotel_zh', ''),
														
 
															+                hotel_en=talent_data.get('hotel_en', ''),
														
 
															+                address_zh=talent_data.get('address_zh', ''),
														
 
															+                address_en=talent_data.get('address_en', ''),
														
 
															+                postal_code_zh=talent_data.get('postal_code_zh', ''),
														
 
															+                postal_code_en=talent_data.get('postal_code_en', ''),
														
 
															+                brand_zh=talent_data.get('brand_zh', ''),
														
 
															+                brand_en=talent_data.get('brand_en', ''),
														
 
															+                affiliation_zh=talent_data.get('affiliation_zh', ''),
														
 
															+                affiliation_en=talent_data.get('affiliation_en', ''),
														
 
															+                birthday=datetime.strptime(talent_data.get('birthday'), '%Y-%m-%d').date() if talent_data.get('birthday') else None,
														
 
															+                age=age_value,
														
 
															+                native_place=talent_data.get('native_place', ''),
														
 
															+                residence=talent_data.get('residence', ''),
														
 
															+                image_path=None,  # 网页提取没有图片路径
														
 
															+                career_path=initial_career_path,
														
 
															+                brand_group=talent_data.get('brand_group', ''),
														
 
															+                origin_source={
														
 
															+                    'type': 'webpage_talent',
														
 
															+                    'minio_path': minio_md_path,
														
 
															+                    'source_date': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
														
 
															+                    'talent_data': talent_data
														
 
															+                },
														
 
															+                status='active',
														
 
															+                updated_by='webpage_talent_system'
														
 
															+            )
														
 
															+            
														
 
															+            db.session.add(business_card)
														
 
															+            db.session.commit()
														
 
															+            
														
 
															+            logging.info(f"名片信息已保存到数据库，ID: {business_card.id}")
														
 
															+            
														
 
															+            return {
														
 
															+                'success': True,
														
 
															+                'message': f'名片信息保存成功。{duplicate_check["reason"]}',
														
 
															+                'data': business_card.to_dict()
														
 
															+            }
														
 
															+            
														
 
															+    except Exception as e:
														
 
															+        db.session.rollback()
														
 
															+        error_msg = f"处理单个人才名片记录失败: {str(e)}"
														
 
															+        logging.error(error_msg, exc_info=True)
														
 
															+        
														
 
															+        return {
														
 
															+            'success': False,
														
 
															+            'message': error_msg,
														
 
															+            'data': None
														
 
															+        }
														
 
															 def process_webpage_with_QWen(markdown_text, publish_time):
														
@@ -208,4 +651,5 @@ def process_webpage_with_QWen(markdown_text, publish_time):
 
															     except Exception as e:
														
 
															         error_msg = f"Qwen VL Max 模型网页文本解析失败: {str(e)}"
														
 
															         logging.error(error_msg, exc_info=True)
														
 
															-        raise Exception(error_msg) 
														
 
															+        raise Exception(error_msg) 
														
 
															+
														
--- a/门墩儿人才数据采集.pdf
+++ b/门墩儿人才数据采集.pdf
@@ -0,0 +1 @@
 
															+error