123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111 |
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- """
- 测试 process-urls 接口的脚本
- """
- import requests
- import json
- def test_process_urls_api():
- """测试process-urls接口"""
-
- # 正确的接口URL
- api_url = "http://menduner.citupro.com:6868/api/parse/process-urls"
-
- # 测试数据
- test_data = {
- "urlArr": ["https://mp.weixin.qq.com/s/4yz-kNAWAlF36aeQ_cgQQg"]
- }
-
- # 请求头
- headers = {
- "Content-Type": "application/json"
- }
-
- try:
- print(f"正在测试接口: {api_url}")
- print(f"请求数据: {json.dumps(test_data, ensure_ascii=False, indent=2)}")
- print("-" * 50)
-
- # 发送POST请求
- response = requests.post(
- api_url,
- json=test_data,
- headers=headers,
- timeout=30
- )
-
- print(f"响应状态码: {response.status_code}")
- print(f"响应头: {dict(response.headers)}")
- print("-" * 50)
-
- # 解析响应内容
- if response.status_code == 200:
- try:
- result = response.json()
- print("✅ 接口调用成功!")
- print(f"响应内容: {json.dumps(result, ensure_ascii=False, indent=2)}")
-
- # 分析结果
- if result.get('success'):
- data = result.get('data', {})
- total_urls = data.get('total_urls', 0)
- success_count = data.get('success_count', 0)
- failed_count = data.get('failed_count', 0)
-
- print(f"\n📊 处理结果统计:")
- print(f" 总URL数: {total_urls}")
- print(f" 成功数量: {success_count}")
- print(f" 失败数量: {failed_count}")
-
- # 显示成功的内容
- contents = data.get('contents', [])
- if contents:
- print(f"\n📄 成功爬取的内容:")
- for i, content in enumerate(contents):
- print(f" {i+1}. URL: {content.get('url')}")
- print(f" 状态: {content.get('status')}")
- print(f" 内容长度: {content.get('content_length')}")
- print(f" 原始长度: {content.get('original_length')}")
- print(f" 状态码: {content.get('status_code')}")
- print(f" 编码: {content.get('encoding')}")
-
- # 显示内容预览
- data_content = content.get('data', '')
- if data_content:
- preview = data_content[:200] + "..." if len(data_content) > 200 else data_content
- print(f" 内容预览: {preview}")
- print()
-
- # 显示失败的项目
- failed_items = data.get('failed_items', [])
- if failed_items:
- print(f"❌ 失败的项目:")
- for i, failed_item in enumerate(failed_items):
- print(f" {i+1}. URL: {failed_item.get('url')}")
- print(f" 状态: {failed_item.get('status')}")
- print(f" 错误信息: {failed_item.get('error', '未知错误')}")
- print()
- else:
- print(f"❌ 接口返回失败: {result.get('message', '未知错误')}")
-
- except json.JSONDecodeError as e:
- print(f"❌ 响应内容不是有效的JSON格式: {e}")
- print(f"原始响应内容: {response.text}")
- else:
- print(f"❌ 接口调用失败,状态码: {response.status_code}")
- print(f"响应内容: {response.text}")
-
- except requests.exceptions.ConnectionError as e:
- print(f"❌ 连接错误: {e}")
- print("请检查服务器是否正在运行,以及端口是否正确")
- except requests.exceptions.Timeout as e:
- print(f"❌ 请求超时: {e}")
- except requests.exceptions.RequestException as e:
- print(f"❌ 请求异常: {e}")
- except Exception as e:
- print(f"❌ 未知错误: {e}")
- if __name__ == "__main__":
- test_process_urls_api()
|