test_process_urls_api.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. 测试 process-urls 接口的脚本
  5. """
  6. import requests
  7. import json
  8. def test_process_urls_api():
  9. """测试process-urls接口"""
  10. # 正确的接口URL
  11. api_url = "http://menduner.citupro.com:6868/api/parse/process-urls"
  12. # 测试数据
  13. test_data = {
  14. "urlArr": ["https://mp.weixin.qq.com/s/4yz-kNAWAlF36aeQ_cgQQg"]
  15. }
  16. # 请求头
  17. headers = {
  18. "Content-Type": "application/json"
  19. }
  20. try:
  21. print(f"正在测试接口: {api_url}")
  22. print(f"请求数据: {json.dumps(test_data, ensure_ascii=False, indent=2)}")
  23. print("-" * 50)
  24. # 发送POST请求
  25. response = requests.post(
  26. api_url,
  27. json=test_data,
  28. headers=headers,
  29. timeout=30
  30. )
  31. print(f"响应状态码: {response.status_code}")
  32. print(f"响应头: {dict(response.headers)}")
  33. print("-" * 50)
  34. # 解析响应内容
  35. if response.status_code == 200:
  36. try:
  37. result = response.json()
  38. print("✅ 接口调用成功!")
  39. print(f"响应内容: {json.dumps(result, ensure_ascii=False, indent=2)}")
  40. # 分析结果
  41. if result.get('success'):
  42. data = result.get('data', {})
  43. total_urls = data.get('total_urls', 0)
  44. success_count = data.get('success_count', 0)
  45. failed_count = data.get('failed_count', 0)
  46. print(f"\n📊 处理结果统计:")
  47. print(f" 总URL数: {total_urls}")
  48. print(f" 成功数量: {success_count}")
  49. print(f" 失败数量: {failed_count}")
  50. # 显示成功的内容
  51. contents = data.get('contents', [])
  52. if contents:
  53. print(f"\n📄 成功爬取的内容:")
  54. for i, content in enumerate(contents):
  55. print(f" {i+1}. URL: {content.get('url')}")
  56. print(f" 状态: {content.get('status')}")
  57. print(f" 内容长度: {content.get('content_length')}")
  58. print(f" 原始长度: {content.get('original_length')}")
  59. print(f" 状态码: {content.get('status_code')}")
  60. print(f" 编码: {content.get('encoding')}")
  61. # 显示内容预览
  62. data_content = content.get('data', '')
  63. if data_content:
  64. preview = data_content[:200] + "..." if len(data_content) > 200 else data_content
  65. print(f" 内容预览: {preview}")
  66. print()
  67. # 显示失败的项目
  68. failed_items = data.get('failed_items', [])
  69. if failed_items:
  70. print(f"❌ 失败的项目:")
  71. for i, failed_item in enumerate(failed_items):
  72. print(f" {i+1}. URL: {failed_item.get('url')}")
  73. print(f" 状态: {failed_item.get('status')}")
  74. print(f" 错误信息: {failed_item.get('error', '未知错误')}")
  75. print()
  76. else:
  77. print(f"❌ 接口返回失败: {result.get('message', '未知错误')}")
  78. except json.JSONDecodeError as e:
  79. print(f"❌ 响应内容不是有效的JSON格式: {e}")
  80. print(f"原始响应内容: {response.text}")
  81. else:
  82. print(f"❌ 接口调用失败,状态码: {response.status_code}")
  83. print(f"响应内容: {response.text}")
  84. except requests.exceptions.ConnectionError as e:
  85. print(f"❌ 连接错误: {e}")
  86. print("请检查服务器是否正在运行,以及端口是否正确")
  87. except requests.exceptions.Timeout as e:
  88. print(f"❌ 请求超时: {e}")
  89. except requests.exceptions.RequestException as e:
  90. print(f"❌ 请求异常: {e}")
  91. except Exception as e:
  92. print(f"❌ 未知错误: {e}")
  93. if __name__ == "__main__":
  94. test_process_urls_api()