|
@@ -37,6 +37,7 @@
|
|
|
<div class="overflow-y-auto !h-360px" v-if="content.data">
|
|
|
<pre>{{ content.data }}</pre>
|
|
|
</div>
|
|
|
+ <el-button type="primary" plain class="mt-10px" @click="handleConvert(content)">转换为markdown格式</el-button>
|
|
|
</el-card>
|
|
|
</el-col>
|
|
|
</el-row>
|
|
@@ -54,12 +55,26 @@
|
|
|
<el-button type="primary" class="!w-100px" @click="drawer = false">关 闭</el-button>
|
|
|
</div>
|
|
|
</el-drawer>
|
|
|
+
|
|
|
+ <el-drawer
|
|
|
+ v-model="showMarkDown"
|
|
|
+ class="!w-50vw"
|
|
|
+ :with-header="false"
|
|
|
+ :modal="true"
|
|
|
+ >
|
|
|
+ <pre>{{ markdownContent }}</pre>
|
|
|
+ <el-divider class="!ma-0" />
|
|
|
+ <div class="position-sticky left-20px !h-50px lh-50px">
|
|
|
+ <el-button type="primary" class="!w-100px" @click="showMarkDown = false; markdownContent = ''">关 闭</el-button>
|
|
|
+ </div>
|
|
|
+ </el-drawer>
|
|
|
</template>
|
|
|
|
|
|
<script setup>
|
|
|
/** 人才采集 网页解析 */
|
|
|
defineOptions({ name: 'WebPageParsing' })
|
|
|
import axios from 'axios'
|
|
|
+import TurndownService from 'turndown'
|
|
|
|
|
|
const message = useMessage() // 消息弹窗
|
|
|
const { t } = useI18n() // 国际化
|
|
@@ -72,6 +87,58 @@ const queryFormRef = ref()
|
|
|
const contents = ref([])
|
|
|
const drawer = ref(false)
|
|
|
|
|
|
+// 创建转换服务
|
|
|
+const turndownService = new TurndownService({
|
|
|
+ headingStyle: 'atx',
|
|
|
+ codeBlockStyle: 'fenced',
|
|
|
+ bulletListMarker: '-'
|
|
|
+})
|
|
|
+
|
|
|
+// 添加自定义规则处理微信公众号特有内容
|
|
|
+turndownService.addRule('wechatImages', {
|
|
|
+ filter: 'img',
|
|
|
+ replacement: (content, node) => {
|
|
|
+ const alt = node.getAttribute('alt') || ''
|
|
|
+ const src = node.getAttribute('src') || node.getAttribute('data-src') || ''
|
|
|
+ return ``
|
|
|
+ }
|
|
|
+})
|
|
|
+
|
|
|
+// 提取主要内容并转换为Markdown
|
|
|
+const wechatHtmlToMarkdown = (html) => {
|
|
|
+ // 创建一个临时DOM解析器
|
|
|
+ const parser = new DOMParser()
|
|
|
+ const doc = parser.parseFromString(html, 'text/html')
|
|
|
+
|
|
|
+ // 提取正文内容 - 微信公众号文章通常在id="js_content"的div中
|
|
|
+ const content = doc.querySelector('#js_content') || doc.body
|
|
|
+
|
|
|
+ // 移除不需要的元素
|
|
|
+ const elementsToRemove = [
|
|
|
+ 'script', 'style', 'iframe', 'button',
|
|
|
+ '.qr_code', '.rich_media_extra', '.copyright'
|
|
|
+ ]
|
|
|
+
|
|
|
+ elementsToRemove.forEach(selector => {
|
|
|
+ content.querySelectorAll(selector).forEach(el => el.remove())
|
|
|
+ })
|
|
|
+
|
|
|
+ // 转换为Markdown
|
|
|
+ return turndownService.turndown(content.innerHTML)
|
|
|
+}
|
|
|
+
|
|
|
+// 转换为markdown格式
|
|
|
+const showMarkDown = ref(false)
|
|
|
+const markdownContent = ref(null)
|
|
|
+const handleConvert = (res) => {
|
|
|
+ if (!res.data) return
|
|
|
+ const result = wechatHtmlToMarkdown(res.data)
|
|
|
+ // console.log(result, 'markdown格式')
|
|
|
+ if (!result) return message.warning('转换失败')
|
|
|
+ markdownContent.value = result
|
|
|
+ showMarkDown.value = true
|
|
|
+}
|
|
|
+
|
|
|
// 查看原网页
|
|
|
const showPage = (res) => {
|
|
|
if (res.data) {
|