4 tháng trước cách đây · 8011bbed87
--- a/src/views/menduner/system/talentMap/maintenance/gather/components/webAnalysis.vue
+++ b/src/views/menduner/system/talentMap/maintenance/gather/components/webAnalysis.vue
@@ -123,41 +123,41 @@ const wechatHtmlToMarkdown = (html, filename = '新任命.md') => {
 
				 // 	return result
			
 
				 // }
			
 
				 
			
 
				-// function extractPublishTime(doc, html) {
			
 
				-//   // 1. 通过 id
			
 
				-//   let timeEl = doc.getElementById('publish_time')
			
 
				-//   if (timeEl && timeEl.innerText) return timeEl.innerText.trim()
			
 
				-
			
 
				-//   // 2. 通过 class
			
 
				-//   let metaEls = doc.querySelectorAll('.rich_media_meta.rich_media_meta_text')
			
 
				-//   for (let el of metaEls) {
			
 
				-//     if (el.innerText && /\d{4}年\d{1,2}月\d{1,2}日/.test(el.innerText)) {
			
 
				-//       return el.innerText.trim()
			
 
				-//     }
			
 
				-//   }
			
 
				-
			
 
				-//   // 3. 通过 meta 标签
			
 
				-//   let meta = doc.querySelector('meta[property="article:published_time"]')
			
 
				-//   if (meta && meta.content) return meta.content.trim()
			
 
				-
			
 
				-//   // 4. 通过正则从 html 里提取
			
 
				-//   let match = html.match(/(\d{4}年\d{1,2}月\d{1,2}日)/)
			
 
				-//   if (match) return match[1]
			
 
				-
			
 
				-//   return ''
			
 
				-// }
			
 
				+function extractPublishTime(doc, html) {
			
 
				+  // 1. 通过 id
			
 
				+  let timeEl = doc.getElementById('publish_time')
			
 
				+  if (timeEl && timeEl.innerText) return timeEl.innerText.trim()
			
 
				+
			
 
				+  // 2. 通过 class
			
 
				+  let metaEls = doc.querySelectorAll('.rich_media_meta.rich_media_meta_text')
			
 
				+  for (let el of metaEls) {
			
 
				+    if (el.innerText && /\d{4}年\d{1,2}月\d{1,2}日/.test(el.innerText)) {
			
 
				+      return el.innerText.trim()
			
 
				+    }
			
 
				+  }
			
 
				 
			
 
				-// function tryExtractPublishTime(doc, html, cb, maxTry = 10, interval = 200) {
			
 
				-//   let tryCount = 0
			
 
				-//   const timer = setInterval(() => {
			
 
				-//     const publishTime = extractPublishTime(doc, html)
			
 
				-//     if (publishTime || tryCount >= maxTry) {
			
 
				-//       clearInterval(timer)
			
 
				-//       cb(publishTime)
			
 
				-//     }
			
 
				-//     tryCount++
			
 
				-//   }, interval)
			
 
				-// }
			
 
				+  // 3. 通过 meta 标签
			
 
				+  let meta = doc.querySelector('meta[property="article:published_time"]')
			
 
				+  if (meta && meta.content) return meta.content.trim()
			
 
				+
			
 
				+  // 4. 通过正则从 html 里提取
			
 
				+  let match = html.match(/(\d{4}年\d{1,2}月\d{1,2}日)/)
			
 
				+  if (match) return match[1]
			
 
				+
			
 
				+  return ''
			
 
				+}
			
 
				+
			
 
				+function tryExtractPublishTime(doc, html, cb, maxTry = 10, interval = 200) {
			
 
				+  let tryCount = 0
			
 
				+  const timer = setInterval(() => {
			
 
				+    const publishTime = extractPublishTime(doc, html)
			
 
				+    if (publishTime || tryCount >= maxTry) {
			
 
				+      clearInterval(timer)
			
 
				+      cb(publishTime)
			
 
				+    }
			
 
				+    tryCount++
			
 
				+  }, interval)
			
 
				+}
			
 
				 
			
 
				 // 查看原网页
			
 
				 const showPage = (res) => {
			
@@ -168,7 +168,7 @@ const showPage = (res) => {
 
				   html = html.replace(/data-src/g, 'src')
			
 
				     // 需要获取文章发布时间的话需注释下一行代码
			
 
				     // 移除HTML内容中所有的<script>标签，这样可以避免在iframe中执行潜在的不受信任的脚本。
			
 
				-    .replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/g, '')
			
 
				+    // .replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/g, '')
			
 
				     // 将HTML内容中所有的https替换为http，可能是为了避免在HTTPS环境下加载非HTTPS资源导致浏览器警告
			
 
				     .replace(/https/g, 'http')
			
 
				   
			
@@ -193,18 +193,18 @@ const showPage = (res) => {
 
				     }, 100)
			
 
				 
			
 
				     // 获取发布时间
			
 
				-    // setTimeout(() => {
			
 
				-    //   tryExtractPublishTime(doc, html, (publishTime) => {
			
 
				-    //     if (publishTime) {
			
 
				-    //       res.publish_time = publishTime
			
 
				-    //         .replace("年", "-")
			
 
				-    //         .replace("月", "-")
			
 
				-    //         .replace("日", "")
			
 
				-    //         .split(" ")[0];
			
 
				-    //       console.log(publishTime, '发布时间', res.publish_time)
			
 
				-    //     }
			
 
				-    //   });
			
 
				-    // }, 100); // 先等100ms让iframe初步渲染，再开始轮询
			
 
				+    setTimeout(() => {
			
 
				+      tryExtractPublishTime(doc, html, (publishTime) => {
			
 
				+        if (publishTime) {
			
 
				+          res.publish_time = publishTime
			
 
				+            .replace("年", "-")
			
 
				+            .replace("月", "-")
			
 
				+            .replace("日", "")
			
 
				+            .split(" ")[0];
			
 
				+          console.log(publishTime, '发布时间', res.publish_time)
			
 
				+        }
			
 
				+      });
			
 
				+    }, 100); // 先等100ms让iframe初步渲染，再开始轮询
			
 
				   })
			
 
				 }
			
 
				 
			
@@ -243,7 +243,7 @@ const handleAnalysis = async () => {
 
				 		list.forEach(e => {
			
 
				 			contents.value.push({
			
 
				 				...e,
			
 
				-				// publish_time: null,
			
 
				+				publish_time: null,
			
 
				 				id: generateUUID(),
			
 
				         file: wechatHtmlToMarkdown(e.data)
			
 
				 				// markdown_text: handleConvert(e)