瀏覽代碼

人才地图-网页解析

Xiao_123 10 小時之前
父節點
當前提交
4357722fae

+ 1 - 0
package.json

@@ -30,6 +30,7 @@
     "@form-create/designer": "^3.1.3",
     "@form-create/element-ui": "^3.1.24",
     "@iconify/iconify": "^3.1.1",
+    "@mendable/firecrawl-js": "1.19.1",
     "@videojs-player/vue": "^1.0.0",
     "@vueuse/core": "^10.9.0",
     "@wangeditor/editor": "^5.1.23",

+ 59 - 0
pnpm-lock.yaml

@@ -20,6 +20,9 @@ importers:
       '@iconify/iconify':
         specifier: ^3.1.1
         version: 3.1.1
+      '@mendable/firecrawl-js':
+        specifier: 1.19.1
+        version: 1.19.1(ws@8.18.2)
       '@videojs-player/vue':
         specifier: ^1.0.0
         version: 1.0.0(@types/video.js@7.3.58)(video.js@7.21.5)(vue@3.4.21(typescript@5.3.3))
@@ -1367,6 +1370,9 @@ packages:
   '@jridgewell/trace-mapping@0.3.25':
     resolution: {integrity: sha512-vNk6aEwybGtawWmy/PzwnGDOjCkLWSD2wqvjGGAgOAwCGWySYXfYoxt00IJkTF+8Lb57DwOb3Aa0o9CApepiYQ==}
 
+  '@mendable/firecrawl-js@1.19.1':
+    resolution: {integrity: sha512-rtBnlF6oLJAxhH4YG8P72FernR1TvdI4J7uiSad2hOF7ZtbkzHkuNsa/10KUTPsmeQf8ESxiSQ1p7HMyEXuW9g==}
+
   '@nodelib/fs.scandir@2.1.5':
     resolution: {integrity: sha512-vq24Bq3ym5HEQm2NKCr3yXDwjc7vTsEThRDnkp2DK9p1uqLR+DHurm/NOTo0KG7HYHU7eppKZj3MyqYuMBf62g==}
     engines: {node: '>= 8'}
@@ -3569,6 +3575,11 @@ packages:
     resolution: {integrity: sha512-WhB9zCku7EGTj/HQQRz5aUQEUeoQZH2bWcltRErOpymJ4boYE6wL9Tbr23krRPSZ+C5zqNSrSw+Cc7sZZ4b7vg==}
     engines: {node: '>=0.10.0'}
 
+  isows@1.0.7:
+    resolution: {integrity: sha512-I1fSfDCZL5P0v33sVqeTDSpcstAg/N+wF5HS033mogOVIp4B+oHC7oOCsA3axAbBSGTJ8QubbNmnIRN/h8U7hg==}
+    peerDependencies:
+      ws: '*'
+
   jackspeak@2.3.6:
     resolution: {integrity: sha512-N3yCS/NegsOBokc8GAdM8UcmfsKiSS8cipheD/nivzr700H+nsMOxJjQnvwOcRYVuFkdH0wGUvW2WbXGmrZGbQ==}
     engines: {node: '>=14'}
@@ -4884,6 +4895,9 @@ packages:
     resolution: {integrity: sha512-8WbVAQAUlENo1q3c3zZYuy5k9VzBQvp8AX9WOtbvyWlLM1v5JaSRmjubLjzHF4JFtptjH/5c/i95yaElvcjC0A==}
     engines: {node: '>= 0.4'}
 
+  typescript-event-target@1.1.1:
+    resolution: {integrity: sha512-dFSOFBKV6uwaloBCCUhxlD3Pr/P1a/tJdcmPrTXCHlEFD3faj0mztjcGn6VBAhQ0/Bdy8K3VWrrqwbt/ffsYsg==}
+
   typescript@5.3.3:
     resolution: {integrity: sha512-pXWcraxM0uxAS+tN0AG/BF2TyqmHO014Z070UsJ+pFvYuRSq8KH8DmWpnbXe0pEPDHXZV3FcAbJkijJ5oNEnWw==}
     engines: {node: '>=14.17'}
@@ -5225,6 +5239,18 @@ packages:
     resolution: {integrity: sha512-+QU2zd6OTD8XWIJCbffaiQeH9U73qIqafo1x6V1snCWYGJf6cVE0cDR4D8xRzcEnfI21IFrUPzPGtcPf8AC+Rw==}
     engines: {node: ^14.17.0 || ^16.13.0 || >=18.0.0}
 
+  ws@8.18.2:
+    resolution: {integrity: sha512-DMricUmwGZUVr++AEAe2uiVM7UoO9MAVZMDu05UQOaUII0lp+zOzLLU4Xqh/JvTqklB1T4uELaaPBKyjE1r4fQ==}
+    engines: {node: '>=10.0.0'}
+    peerDependencies:
+      bufferutil: ^4.0.1
+      utf-8-validate: '>=5.0.2'
+    peerDependenciesMeta:
+      bufferutil:
+        optional: true
+      utf-8-validate:
+        optional: true
+
   xml-js@1.6.11:
     resolution: {integrity: sha512-7rVi2KMfwfWFl+GpPg6m80IVMWXLRjO+PxTq7V2CDhoGak0wzYzFgUY2m4XJ47OGdXd8eLE8EmwfAmdjw7lC1g==}
     hasBin: true
@@ -5283,6 +5309,14 @@ packages:
     resolution: {integrity: sha512-9bnSc/HEW2uRy67wc+T8UwauLuPJVn28jb+GtJY16iiKWyvmYJRXVT4UamsAEGQfPohgr2q4Tq0sQbQlxTfi1g==}
     engines: {node: '>=12.20'}
 
+  zod-to-json-schema@3.24.5:
+    resolution: {integrity: sha512-/AuWwMP+YqiPbsJx5D6TfgRTc4kTLjsh5SOcd4bLsfUg2RcEXrFMJl1DGgdHy2aCfsIA/cr/1JM0xcB2GZji8g==}
+    peerDependencies:
+      zod: ^3.24.1
+
+  zod@3.24.4:
+    resolution: {integrity: sha512-OdqJE9UDRPwWsrHjLN2F8bPxvwJBK22EHLWtanu0LSYr5YqzsaaW3RMgmjwr8Rypg5k+meEJdSPXJZXE/yqOMg==}
+
   zrender@5.5.0:
     resolution: {integrity: sha512-O3MilSi/9mwoovx77m6ROZM7sXShR/O/JIanvzTwjN3FORfLSr81PsUGd7jlaYOeds9d8tw82oP44+3YucVo+w==}
 
@@ -6493,6 +6527,17 @@ snapshots:
       '@jridgewell/resolve-uri': 3.1.2
       '@jridgewell/sourcemap-codec': 1.4.15
 
+  '@mendable/firecrawl-js@1.19.1(ws@8.18.2)':
+    dependencies:
+      axios: 1.6.8
+      isows: 1.0.7(ws@8.18.2)
+      typescript-event-target: 1.1.1
+      zod: 3.24.4
+      zod-to-json-schema: 3.24.5(zod@3.24.4)
+    transitivePeerDependencies:
+      - debug
+      - ws
+
   '@nodelib/fs.scandir@2.1.5':
     dependencies:
       '@nodelib/fs.stat': 2.0.5
@@ -9035,6 +9080,10 @@ snapshots:
 
   isobject@3.0.1: {}
 
+  isows@1.0.7(ws@8.18.2):
+    dependencies:
+      ws: 8.18.2
+
   jackspeak@2.3.6:
     dependencies:
       '@isaacs/cliui': 8.0.2
@@ -10413,6 +10462,8 @@ snapshots:
       typed-array-buffer: 1.0.2
       typed-array-byte-offset: 1.0.2
 
+  typescript-event-target@1.1.1: {}
+
   typescript@5.3.3: {}
 
   ufo@1.5.3: {}
@@ -10833,6 +10884,8 @@ snapshots:
       imurmurhash: 0.1.4
       signal-exit: 4.1.0
 
+  ws@8.18.2: {}
+
   xml-js@1.6.11:
     dependencies:
       sax: 1.3.0
@@ -10892,6 +10945,12 @@ snapshots:
 
   yocto-queue@1.0.0: {}
 
+  zod-to-json-schema@3.24.5(zod@3.24.4):
+    dependencies:
+      zod: 3.24.4
+
+  zod@3.24.4: {}
+
   zrender@5.5.0:
     dependencies:
       tslib: 2.3.0

+ 122 - 3
src/views/menduner/system/talentMap/components/webPageParsing.vue

@@ -13,7 +13,7 @@
 					class="!w-60vw"
 					type="textarea"
 					:rows="1"
-					placeholder="请输入需要爬取的页面,多个页面请用','隔开"
+					placeholder="请输入需要爬取的页面"
 				/>
 			</el-form-item>
 			<el-form-item>
@@ -22,22 +22,141 @@
 		</el-form>
 	</ContentWrap>
 
+	<ContentWrap v-if="contents.length">
+		<el-row gutter="20">
+			<el-col v-for="(content, index) in contents" :key="index" :span="12">
+				<el-card class="!h-500px" v-loading="!content.data">
+					<template #header>
+						<div class="flex items-center justify-between">
+							<div 
+								class="flex-1 overflow-hidden whitespace-nowrap" 
+								style="text-overflow: ellipsis;"
+							>{{ content.url }}</div>
+							<div class="!w-85px">
+								<Icon icon="ep:view" size="25" class="ml-10px cursor-pointer" color="#409eff" @click="showPage(content)" />
+								<Icon icon="ep:refresh" size="25" class=" ml-18px cursor-pointer" color="#409eff" @click="handleReload(content)" />
+							</div>
+						</div>
+					</template>
+					<div v-if="content.data">
+						<template v-if="typeof content.data === 'string'">{{ content.data }}</template>
+            <el-tabs v-else v-model="content.tab">
+              <el-tab-pane v-for="(v, k) in content.data.data[0]" :key="k" :label="k" :name="k" class="overflow-y-auto !h-360px">
+								<template v-if="k === 'html'">
+									<div class="position-sticky float-right">
+										<el-button type="primary" class="cursor-pointer" @click="content.showHtml = !content.showHtml" :icon="SetUp" circle />
+									</div>
+                  <pre v-if="!content.showHtml">{{ v }}</pre>
+                  <div v-else v-html="v"></div>
+                </template>
+                <pre v-else>{{ v }}</pre>
+							</el-tab-pane>
+            </el-tabs>
+          </div>
+				</el-card>
+			</el-col>
+		</el-row>
+	</ContentWrap>
+
+	<el-drawer
+		v-model="drawer"
+		class="!w-50vw"
+		:with-header="false"
+		:modal="true"
+	>
+		<iframe class="!w-100%" style="height: calc(100vh - 90px);" :src="drawerUrl" frameborder="0"></iframe>
+		<div class="position-sticky left-20px !h-50px lh-50px" style="border-top: 1px solid #e1e1e1;">
+			<el-button type="primary" class="!w-100px" @click="drawer = false; drawerUrl = ''">关 闭</el-button>
+		</div>
+	</el-drawer>
 </template>
 
 <script setup>
 defineOptions({ name: 'WebPageParsing' })
+import FirecrawlApp from '@mendable/firecrawl-js'
+import { SetUp } from '@element-plus/icons-vue'
 
 const message = useMessage() // 消息弹窗
 const { t } = useI18n() // 国际化
 
 const loading = ref(false)
 const queryParams = reactive({
-	urls: 'https://mp.weixin.qq.com/s/gtCcUeXZUXkQi5CR25vjew'
+	urls: 'https://element.eleme.cn/#/zh-CN/component/installation'
 })
 const queryFormRef = ref()
+const contents = ref([])
+const drawer = ref(false)
+const drawerUrl = ref('')
+
+const showPage = (content) => {
+	drawer.value = true
+	drawerUrl.value = content.url
+}
+
+const handleReload = async (content) => {
+	content.data = null
+	const res = await handleData(queryParams.urls)
+	content.tab = 0
+	content.data = res
+}
+
+const handleData = async (url) => {
+	try {
+    const app = new FirecrawlApp({ apiKey: 'fc-85c1550c6db64ce4ae8f2d2cd2606e6f' })
+    const crawlResponse = await app.crawlUrl(url, {
+      limit: 100,
+      scrapeOptions: {
+        formats: ['markdown', 'html']
+      }
+    })
+    if (!crawlResponse.success) {
+      throw new Error(`Failed to crawl: ${crawlResponse.error}`)
+    }
+    return crawlResponse
+  } catch (error) {
+    return error.message
+  }
+}
+
+// const handleExecute = async () => {
+// 	if (!queryParams.urls) return
+// 	contents.value = []
+// 	const urls = queryParams.urls.split(',')
+
+// 	const run = async (url) => {
+// 	  contents.value.push({ url, tab: 'markdown', showHtml: false, data: null })
+// 		const res = await handleData(url)
+// 		contents.value[contents.value.length - 1] = { url, tab: 'markdown', showHtml: false, data: res }
+
+// 		if (contents.value.length < urls.length) {
+// 			await run(urls[contents.value.length])
+// 		}
+// 	}
+
+// 	await run(urls[contents.value.length])
+// }
 
 // 执行
 const handleExecute = async () => {
-	console.log(queryParams.urls, 'urls')
+	if (!queryParams.urls) return
+	contents.value = []
+	const urls = queryParams.urls.split(',').map(url => url.trim()).filter(url => url)
+	if (urls.length === 0) return
+
+	urls.forEach(url => {
+	  contents.value.push({ url, tab: 'markdown', showHtml: false, data: null })
+	})
+
+	const crawlPromises = urls.map(async (url, index) => {
+		const res = await handleData(url)
+		contents.value[index] = { ...contents.value[index], data: res }
+	})
+
+	try {
+		await Promise.all(crawlPromises)
+		console.log('All crawls completed:', contents.value); // 可在此处添加成功回调
+	} catch (error) {
+		console.error('爬取过程中发生错误:', error);
+	}
 }
 </script>

+ 3 - 3
src/views/menduner/system/talentMap/index.vue

@@ -1,13 +1,13 @@
 <template>
   <div>
 		<el-tabs v-model="activeName" @tab-click="handleClick" type="border-card">
-			<el-tab-pane label="简历" name="resume">
+			<el-tab-pane label="简历解析" name="resume">
 				<resumePage/>
 			</el-tab-pane>
-			<el-tab-pane label="名片" name="card">
+			<el-tab-pane label="名片解析" name="card">
 				<cardPage/>
 			</el-tab-pane>
-			<el-tab-pane label="网页" name="webpage">
+			<el-tab-pane label="网页解析" name="webpage">
 				<webPageParsing/>
 			</el-tab-pane>
 		</el-tabs>