|
@@ -123,41 +123,41 @@ const wechatHtmlToMarkdown = (html, filename = '新任命.md') => {
|
|
|
// return result
|
|
|
// }
|
|
|
|
|
|
-// function extractPublishTime(doc, html) {
|
|
|
-// // 1. 通过 id
|
|
|
-// let timeEl = doc.getElementById('publish_time')
|
|
|
-// if (timeEl && timeEl.innerText) return timeEl.innerText.trim()
|
|
|
-
|
|
|
-// // 2. 通过 class
|
|
|
-// let metaEls = doc.querySelectorAll('.rich_media_meta.rich_media_meta_text')
|
|
|
-// for (let el of metaEls) {
|
|
|
-// if (el.innerText && /\d{4}年\d{1,2}月\d{1,2}日/.test(el.innerText)) {
|
|
|
-// return el.innerText.trim()
|
|
|
-// }
|
|
|
-// }
|
|
|
-
|
|
|
-// // 3. 通过 meta 标签
|
|
|
-// let meta = doc.querySelector('meta[property="article:published_time"]')
|
|
|
-// if (meta && meta.content) return meta.content.trim()
|
|
|
-
|
|
|
-// // 4. 通过正则从 html 里提取
|
|
|
-// let match = html.match(/(\d{4}年\d{1,2}月\d{1,2}日)/)
|
|
|
-// if (match) return match[1]
|
|
|
-
|
|
|
-// return ''
|
|
|
-// }
|
|
|
+function extractPublishTime(doc, html) {
|
|
|
+ // 1. 通过 id
|
|
|
+ let timeEl = doc.getElementById('publish_time')
|
|
|
+ if (timeEl && timeEl.innerText) return timeEl.innerText.trim()
|
|
|
+
|
|
|
+ // 2. 通过 class
|
|
|
+ let metaEls = doc.querySelectorAll('.rich_media_meta.rich_media_meta_text')
|
|
|
+ for (let el of metaEls) {
|
|
|
+ if (el.innerText && /\d{4}年\d{1,2}月\d{1,2}日/.test(el.innerText)) {
|
|
|
+ return el.innerText.trim()
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
-// function tryExtractPublishTime(doc, html, cb, maxTry = 10, interval = 200) {
|
|
|
-// let tryCount = 0
|
|
|
-// const timer = setInterval(() => {
|
|
|
-// const publishTime = extractPublishTime(doc, html)
|
|
|
-// if (publishTime || tryCount >= maxTry) {
|
|
|
-// clearInterval(timer)
|
|
|
-// cb(publishTime)
|
|
|
-// }
|
|
|
-// tryCount++
|
|
|
-// }, interval)
|
|
|
-// }
|
|
|
+ // 3. 通过 meta 标签
|
|
|
+ let meta = doc.querySelector('meta[property="article:published_time"]')
|
|
|
+ if (meta && meta.content) return meta.content.trim()
|
|
|
+
|
|
|
+ // 4. 通过正则从 html 里提取
|
|
|
+ let match = html.match(/(\d{4}年\d{1,2}月\d{1,2}日)/)
|
|
|
+ if (match) return match[1]
|
|
|
+
|
|
|
+ return ''
|
|
|
+}
|
|
|
+
|
|
|
+function tryExtractPublishTime(doc, html, cb, maxTry = 10, interval = 200) {
|
|
|
+ let tryCount = 0
|
|
|
+ const timer = setInterval(() => {
|
|
|
+ const publishTime = extractPublishTime(doc, html)
|
|
|
+ if (publishTime || tryCount >= maxTry) {
|
|
|
+ clearInterval(timer)
|
|
|
+ cb(publishTime)
|
|
|
+ }
|
|
|
+ tryCount++
|
|
|
+ }, interval)
|
|
|
+}
|
|
|
|
|
|
// 查看原网页
|
|
|
const showPage = (res) => {
|
|
@@ -168,7 +168,7 @@ const showPage = (res) => {
|
|
|
html = html.replace(/data-src/g, 'src')
|
|
|
// 需要获取文章发布时间的话需注释下一行代码
|
|
|
// 移除HTML内容中所有的<script>标签,这样可以避免在iframe中执行潜在的不受信任的脚本。
|
|
|
- .replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/g, '')
|
|
|
+ // .replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/g, '')
|
|
|
// 将HTML内容中所有的https替换为http,可能是为了避免在HTTPS环境下加载非HTTPS资源导致浏览器警告
|
|
|
.replace(/https/g, 'http')
|
|
|
|
|
@@ -193,18 +193,18 @@ const showPage = (res) => {
|
|
|
}, 100)
|
|
|
|
|
|
// 获取发布时间
|
|
|
- // setTimeout(() => {
|
|
|
- // tryExtractPublishTime(doc, html, (publishTime) => {
|
|
|
- // if (publishTime) {
|
|
|
- // res.publish_time = publishTime
|
|
|
- // .replace("年", "-")
|
|
|
- // .replace("月", "-")
|
|
|
- // .replace("日", "")
|
|
|
- // .split(" ")[0];
|
|
|
- // console.log(publishTime, '发布时间', res.publish_time)
|
|
|
- // }
|
|
|
- // });
|
|
|
- // }, 100); // 先等100ms让iframe初步渲染,再开始轮询
|
|
|
+ setTimeout(() => {
|
|
|
+ tryExtractPublishTime(doc, html, (publishTime) => {
|
|
|
+ if (publishTime) {
|
|
|
+ res.publish_time = publishTime
|
|
|
+ .replace("年", "-")
|
|
|
+ .replace("月", "-")
|
|
|
+ .replace("日", "")
|
|
|
+ .split(" ")[0];
|
|
|
+ console.log(publishTime, '发布时间', res.publish_time)
|
|
|
+ }
|
|
|
+ });
|
|
|
+ }, 100); // 先等100ms让iframe初步渲染,再开始轮询
|
|
|
})
|
|
|
}
|
|
|
|
|
@@ -243,7 +243,7 @@ const handleAnalysis = async () => {
|
|
|
list.forEach(e => {
|
|
|
contents.value.push({
|
|
|
...e,
|
|
|
- // publish_time: null,
|
|
|
+ publish_time: null,
|
|
|
id: generateUUID(),
|
|
|
file: wechatHtmlToMarkdown(e.data)
|
|
|
// markdown_text: handleConvert(e)
|