|
@@ -13,7 +13,7 @@
|
|
|
class="!w-60vw"
|
|
|
type="textarea"
|
|
|
:rows="1"
|
|
|
- placeholder="请输入需要爬取的页面,多个页面请用','隔开"
|
|
|
+ placeholder="请输入需要爬取的页面"
|
|
|
/>
|
|
|
</el-form-item>
|
|
|
<el-form-item>
|
|
@@ -22,22 +22,141 @@
|
|
|
</el-form>
|
|
|
</ContentWrap>
|
|
|
|
|
|
+ <ContentWrap v-if="contents.length">
|
|
|
+ <el-row gutter="20">
|
|
|
+ <el-col v-for="(content, index) in contents" :key="index" :span="12">
|
|
|
+ <el-card class="!h-500px" v-loading="!content.data">
|
|
|
+ <template #header>
|
|
|
+ <div class="flex items-center justify-between">
|
|
|
+ <div
|
|
|
+ class="flex-1 overflow-hidden whitespace-nowrap"
|
|
|
+ style="text-overflow: ellipsis;"
|
|
|
+ >{{ content.url }}</div>
|
|
|
+ <div class="!w-85px">
|
|
|
+ <Icon icon="ep:view" size="25" class="ml-10px cursor-pointer" color="#409eff" @click="showPage(content)" />
|
|
|
+ <Icon icon="ep:refresh" size="25" class=" ml-18px cursor-pointer" color="#409eff" @click="handleReload(content)" />
|
|
|
+ </div>
|
|
|
+ </div>
|
|
|
+ </template>
|
|
|
+ <div v-if="content.data">
|
|
|
+ <template v-if="typeof content.data === 'string'">{{ content.data }}</template>
|
|
|
+ <el-tabs v-else v-model="content.tab">
|
|
|
+ <el-tab-pane v-for="(v, k) in content.data.data[0]" :key="k" :label="k" :name="k" class="overflow-y-auto !h-360px">
|
|
|
+ <template v-if="k === 'html'">
|
|
|
+ <div class="position-sticky float-right">
|
|
|
+ <el-button type="primary" class="cursor-pointer" @click="content.showHtml = !content.showHtml" :icon="SetUp" circle />
|
|
|
+ </div>
|
|
|
+ <pre v-if="!content.showHtml">{{ v }}</pre>
|
|
|
+ <div v-else v-html="v"></div>
|
|
|
+ </template>
|
|
|
+ <pre v-else>{{ v }}</pre>
|
|
|
+ </el-tab-pane>
|
|
|
+ </el-tabs>
|
|
|
+ </div>
|
|
|
+ </el-card>
|
|
|
+ </el-col>
|
|
|
+ </el-row>
|
|
|
+ </ContentWrap>
|
|
|
+
|
|
|
+ <el-drawer
|
|
|
+ v-model="drawer"
|
|
|
+ class="!w-50vw"
|
|
|
+ :with-header="false"
|
|
|
+ :modal="true"
|
|
|
+ >
|
|
|
+ <iframe class="!w-100%" style="height: calc(100vh - 90px);" :src="drawerUrl" frameborder="0"></iframe>
|
|
|
+ <div class="position-sticky left-20px !h-50px lh-50px" style="border-top: 1px solid #e1e1e1;">
|
|
|
+ <el-button type="primary" class="!w-100px" @click="drawer = false; drawerUrl = ''">关 闭</el-button>
|
|
|
+ </div>
|
|
|
+ </el-drawer>
|
|
|
</template>
|
|
|
|
|
|
<script setup>
|
|
|
defineOptions({ name: 'WebPageParsing' })
|
|
|
+import FirecrawlApp from '@mendable/firecrawl-js'
|
|
|
+import { SetUp } from '@element-plus/icons-vue'
|
|
|
|
|
|
const message = useMessage() // 消息弹窗
|
|
|
const { t } = useI18n() // 国际化
|
|
|
|
|
|
const loading = ref(false)
|
|
|
const queryParams = reactive({
|
|
|
- urls: 'https://mp.weixin.qq.com/s/gtCcUeXZUXkQi5CR25vjew'
|
|
|
+ urls: 'https://element.eleme.cn/#/zh-CN/component/installation'
|
|
|
})
|
|
|
const queryFormRef = ref()
|
|
|
+const contents = ref([])
|
|
|
+const drawer = ref(false)
|
|
|
+const drawerUrl = ref('')
|
|
|
+
|
|
|
+const showPage = (content) => {
|
|
|
+ drawer.value = true
|
|
|
+ drawerUrl.value = content.url
|
|
|
+}
|
|
|
+
|
|
|
+const handleReload = async (content) => {
|
|
|
+ content.data = null
|
|
|
+ const res = await handleData(queryParams.urls)
|
|
|
+ content.tab = 0
|
|
|
+ content.data = res
|
|
|
+}
|
|
|
+
|
|
|
+const handleData = async (url) => {
|
|
|
+ try {
|
|
|
+ const app = new FirecrawlApp({ apiKey: 'fc-85c1550c6db64ce4ae8f2d2cd2606e6f' })
|
|
|
+ const crawlResponse = await app.crawlUrl(url, {
|
|
|
+ limit: 100,
|
|
|
+ scrapeOptions: {
|
|
|
+ formats: ['markdown', 'html']
|
|
|
+ }
|
|
|
+ })
|
|
|
+ if (!crawlResponse.success) {
|
|
|
+ throw new Error(`Failed to crawl: ${crawlResponse.error}`)
|
|
|
+ }
|
|
|
+ return crawlResponse
|
|
|
+ } catch (error) {
|
|
|
+ return error.message
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+// const handleExecute = async () => {
|
|
|
+// if (!queryParams.urls) return
|
|
|
+// contents.value = []
|
|
|
+// const urls = queryParams.urls.split(',')
|
|
|
+
|
|
|
+// const run = async (url) => {
|
|
|
+// contents.value.push({ url, tab: 'markdown', showHtml: false, data: null })
|
|
|
+// const res = await handleData(url)
|
|
|
+// contents.value[contents.value.length - 1] = { url, tab: 'markdown', showHtml: false, data: res }
|
|
|
+
|
|
|
+// if (contents.value.length < urls.length) {
|
|
|
+// await run(urls[contents.value.length])
|
|
|
+// }
|
|
|
+// }
|
|
|
+
|
|
|
+// await run(urls[contents.value.length])
|
|
|
+// }
|
|
|
|
|
|
// 执行
|
|
|
const handleExecute = async () => {
|
|
|
- console.log(queryParams.urls, 'urls')
|
|
|
+ if (!queryParams.urls) return
|
|
|
+ contents.value = []
|
|
|
+ const urls = queryParams.urls.split(',').map(url => url.trim()).filter(url => url)
|
|
|
+ if (urls.length === 0) return
|
|
|
+
|
|
|
+ urls.forEach(url => {
|
|
|
+ contents.value.push({ url, tab: 'markdown', showHtml: false, data: null })
|
|
|
+ })
|
|
|
+
|
|
|
+ const crawlPromises = urls.map(async (url, index) => {
|
|
|
+ const res = await handleData(url)
|
|
|
+ contents.value[index] = { ...contents.value[index], data: res }
|
|
|
+ })
|
|
|
+
|
|
|
+ try {
|
|
|
+ await Promise.all(crawlPromises)
|
|
|
+ console.log('All crawls completed:', contents.value); // 可在此处添加成功回调
|
|
|
+ } catch (error) {
|
|
|
+ console.error('爬取过程中发生错误:', error);
|
|
|
+ }
|
|
|
}
|
|
|
</script>
|