parse.py 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822
  1. from typing import Dict, Any
  2. from app import db
  3. from datetime import datetime
  4. import os
  5. import boto3
  6. from botocore.config import Config
  7. import logging
  8. import requests
  9. import json
  10. import re
  11. import uuid
  12. from PIL import Image
  13. from io import BytesIO
  14. import pytesseract
  15. import base64
  16. from openai import OpenAI
  17. # 测试用的解析数据接口。没有实际使用。
  18. def parse_data(data: Dict[str, Any]) -> Dict[str, Any]:
  19. """
  20. 解析数据的主函数
  21. Args:
  22. data: 要解析的数据
  23. Returns:
  24. 解析后的数据
  25. """
  26. # TODO: 实现数据解析逻辑
  27. return {
  28. 'status': 'success',
  29. 'message': 'Data parsed successfully',
  30. 'data': data
  31. }
  32. # 名片解析数据模型
  33. class BusinessCard(db.Model):
  34. __tablename__ = 'business_cards'
  35. id = db.Column(db.Integer, primary_key=True)
  36. name_zh = db.Column(db.String(100), nullable=False)
  37. name_en = db.Column(db.String(100))
  38. title_zh = db.Column(db.String(100))
  39. title_en = db.Column(db.String(100))
  40. mobile = db.Column(db.String(50))
  41. phone = db.Column(db.String(50))
  42. email = db.Column(db.String(100))
  43. hotel_zh = db.Column(db.String(200))
  44. hotel_en = db.Column(db.String(200))
  45. address_zh = db.Column(db.Text)
  46. address_en = db.Column(db.Text)
  47. postal_code_zh = db.Column(db.String(20))
  48. postal_code_en = db.Column(db.String(20))
  49. brand_zh = db.Column(db.String(100))
  50. brand_en = db.Column(db.String(100))
  51. affiliation_zh = db.Column(db.String(200))
  52. affiliation_en = db.Column(db.String(200))
  53. image_path = db.Column(db.String(255)) # MinIO中存储的路径
  54. created_at = db.Column(db.DateTime, default=datetime.now, nullable=False)
  55. updated_at = db.Column(db.DateTime, onupdate=datetime.now)
  56. updated_by = db.Column(db.String(50))
  57. status = db.Column(db.String(20), default='active')
  58. def to_dict(self):
  59. return {
  60. 'id': self.id,
  61. 'name_zh': self.name_zh,
  62. 'name_en': self.name_en,
  63. 'title_zh': self.title_zh,
  64. 'title_en': self.title_en,
  65. 'mobile': self.mobile,
  66. 'phone': self.phone,
  67. 'email': self.email,
  68. 'hotel_zh': self.hotel_zh,
  69. 'hotel_en': self.hotel_en,
  70. 'address_zh': self.address_zh,
  71. 'address_en': self.address_en,
  72. 'postal_code_zh': self.postal_code_zh,
  73. 'postal_code_en': self.postal_code_en,
  74. 'brand_zh': self.brand_zh,
  75. 'brand_en': self.brand_en,
  76. 'affiliation_zh': self.affiliation_zh,
  77. 'affiliation_en': self.affiliation_en,
  78. 'image_path': self.image_path,
  79. 'created_at': self.created_at.strftime('%Y-%m-%d %H:%M:%S') if self.created_at else None,
  80. 'updated_at': self.updated_at.strftime('%Y-%m-%d %H:%M:%S') if self.updated_at else None,
  81. 'updated_by': self.updated_by,
  82. 'status': self.status
  83. }
  84. # 名片解析功能模块
  85. # MinIO配置
  86. MINIO_URL = os.environ.get('MINIO_URL', 'localhost:19000')
  87. MINIO_ACCESS_KEY = os.environ.get('MINIO_ACCESS_KEY', 'miniomxl')
  88. MINIO_SECRET_KEY = os.environ.get('MINIO_SECRET_KEY', 'minio2357!')
  89. MINIO_BUCKET = os.environ.get('MINIO_BUCKET', 'business-cards')
  90. USE_SSL = os.environ.get('MINIO_USE_SSL', 'false').lower() == 'true'
  91. # DeepSeek API配置
  92. DEEPSEEK_API_KEY = os.environ.get('DEEPSEEK_API_KEY', 'sk-2aea6e8b159b448aa3c1e29acd6f4349')
  93. DEEPSEEK_API_URL = os.environ.get('DEEPSEEK_API_URL', 'https://api.deepseek.com/v1/chat/completions')
  94. # 备用API端点
  95. DEEPSEEK_API_URL_BACKUP = 'https://api.deepseek.com/v1/completions'
  96. # OCR配置
  97. # 设置pytesseract路径(如果需要)
  98. # pytesseract.pytesseract.tesseract_cmd = r'/path/to/tesseract'
  99. # OCR语言设置,支持多语言
  100. OCR_LANG = os.environ.get('OCR_LANG', 'chi_sim+eng')
  101. def get_minio_client():
  102. """获取MinIO客户端连接"""
  103. try:
  104. minio_client = boto3.client(
  105. 's3',
  106. endpoint_url=f'{"https" if USE_SSL else "http"}://{MINIO_URL}',
  107. aws_access_key_id=MINIO_ACCESS_KEY,
  108. aws_secret_access_key=MINIO_SECRET_KEY,
  109. config=Config(signature_version='s3v4'),
  110. region_name='us-east-1' # 可选,但某些S3客户端可能需要
  111. )
  112. # 确保存储桶存在
  113. if MINIO_BUCKET not in [bucket['Name'] for bucket in minio_client.list_buckets()['Buckets']]:
  114. minio_client.create_bucket(Bucket=MINIO_BUCKET)
  115. return minio_client
  116. except Exception as e:
  117. logging.error(f"MinIO连接错误: {str(e)}")
  118. return None
  119. def extract_text_from_image(image_data):
  120. """
  121. 使用OCR从图像中提取文本,然后通过DeepSeek API解析名片信息
  122. Args:
  123. image_data (bytes): 图像的二进制数据
  124. Returns:
  125. dict: 提取的信息(姓名、职位、公司等)
  126. Raises:
  127. Exception: 当OCR或API调用失败或配置错误时抛出异常
  128. """
  129. try:
  130. # 步骤1: 使用OCR从图像中提取文本
  131. ocr_text = ocr_extract_text(image_data)
  132. if not ocr_text or ocr_text.strip() == "":
  133. error_msg = "OCR无法从图像中提取文本"
  134. logging.error(error_msg)
  135. raise Exception(error_msg)
  136. logging.info(f"OCR提取的文本: {ocr_text[:200]}..." if len(ocr_text) > 200 else ocr_text)
  137. # 步骤2: 使用DeepSeek API解析文本中的信息
  138. return parse_text_with_deepseek(ocr_text)
  139. except Exception as e:
  140. error_msg = f"从图像中提取和解析文本失败: {str(e)}"
  141. logging.error(error_msg, exc_info=True)
  142. raise Exception(error_msg)
  143. def ocr_extract_text(image_data):
  144. """
  145. 使用OCR从图像中提取文本
  146. Args:
  147. image_data (bytes): 图像的二进制数据
  148. Returns:
  149. str: 提取的文本
  150. """
  151. try:
  152. # 将二进制数据转换为PIL图像
  153. image = Image.open(BytesIO(image_data))
  154. # 使用pytesseract进行OCR文本提取
  155. text = pytesseract.image_to_string(image, lang=OCR_LANG)
  156. # 清理提取的文本
  157. text = text.strip()
  158. logging.info(f"OCR成功从图像中提取文本,长度: {len(text)}")
  159. print(text)
  160. return text
  161. except Exception as e:
  162. error_msg = f"OCR提取文本失败: {str(e)}"
  163. logging.error(error_msg, exc_info=True)
  164. raise Exception(error_msg)
  165. def parse_text_with_deepseek(text):
  166. """
  167. 使用DeepSeek API解析文本中的名片信息
  168. Args:
  169. text (str): 要解析的文本
  170. Returns:
  171. dict: 解析的名片信息
  172. """
  173. # 准备请求DeepSeek API
  174. if not DEEPSEEK_API_KEY:
  175. error_msg = "未配置DeepSeek API密钥"
  176. logging.error(error_msg)
  177. raise Exception(error_msg)
  178. # 构建API请求的基本信息
  179. headers = {
  180. "Authorization": f"Bearer {DEEPSEEK_API_KEY}",
  181. "Content-Type": "application/json"
  182. }
  183. # 构建提示语,包含OCR提取的文本
  184. prompt = f"""请从以下名片文本中提取详细信息,需分别识别中英文内容。
  185. 以JSON格式返回,包含以下字段:
  186. - name_zh: 中文姓名
  187. - name_en: 英文姓名
  188. - title_zh: 中文职位/头衔
  189. - title_en: 英文职位/头衔
  190. - hotel_zh: 中文酒店/公司名称
  191. - hotel_en: 英文酒店/公司名称
  192. - mobile: 手机号码
  193. - phone: 固定电话
  194. - email: 电子邮箱
  195. - address_zh: 中文地址
  196. - address_en: 英文地址
  197. 名片文本:
  198. {text}
  199. """
  200. # 使用模型名称
  201. model_name = 'deepseek-chat'
  202. try:
  203. # 尝试调用DeepSeek API
  204. logging.info(f"尝试通过DeepSeek API解析文本")
  205. payload = {
  206. "model": model_name,
  207. "messages": [
  208. {"role": "system", "content": "你是一个专业的名片信息提取助手。请用JSON格式返回结果,不要有多余的文字说明。"},
  209. {"role": "user", "content": prompt}
  210. ],
  211. "temperature": 0.1
  212. }
  213. logging.info(f"向DeepSeek API发送请求")
  214. response = requests.post(DEEPSEEK_API_URL, headers=headers, json=payload, timeout=30)
  215. # 检查响应状态
  216. response.raise_for_status()
  217. # 解析API响应
  218. result = response.json()
  219. content = result.get("choices", [{}])[0].get("message", {}).get("content", "{}")
  220. # 尝试解析JSON内容
  221. try:
  222. # 找到内容中的JSON部分(有时模型会在JSON前后添加额外文本)
  223. json_content = extract_json_from_text(content)
  224. extracted_data = json.loads(json_content)
  225. logging.info(f"成功解析DeepSeek API返回的JSON")
  226. except json.JSONDecodeError:
  227. logging.warning(f"无法解析JSON,尝试直接从文本提取信息")
  228. # 如果无法解析JSON,尝试直接从文本中提取关键信息
  229. extracted_data = extract_fields_from_text(content)
  230. # 确保所有必要的字段都存在
  231. required_fields = ['name', 'title', 'company', 'phone', 'email', 'address']
  232. for field in required_fields:
  233. if field not in extracted_data:
  234. extracted_data[field] = ""
  235. logging.info(f"成功从DeepSeek API获取解析结果")
  236. return extracted_data
  237. except requests.exceptions.HTTPError as e:
  238. error_msg = f"DeepSeek API调用失败: {str(e)}"
  239. logging.error(error_msg)
  240. if hasattr(e, 'response') and e.response:
  241. logging.error(f"错误状态码: {e.response.status_code}")
  242. logging.error(f"错误内容: {e.response.text}")
  243. raise Exception(error_msg)
  244. except Exception as e:
  245. error_msg = f"解析文本过程中发生错误: {str(e)}"
  246. logging.error(error_msg, exc_info=True)
  247. raise Exception(error_msg)
  248. def extract_json_from_text(text):
  249. """
  250. 从文本中提取JSON部分
  251. Args:
  252. text (str): 包含JSON的文本
  253. Returns:
  254. str: 提取的JSON字符串
  255. """
  256. # 尝试找到最外层的花括号对
  257. start_idx = text.find('{')
  258. if start_idx == -1:
  259. return "{}"
  260. # 使用简单的括号匹配算法找到对应的闭合括号
  261. count = 0
  262. for i in range(start_idx, len(text)):
  263. if text[i] == '{':
  264. count += 1
  265. elif text[i] == '}':
  266. count -= 1
  267. if count == 0:
  268. return text[start_idx:i+1]
  269. # 如果没有找到闭合括号,返回从开始位置到文本结尾
  270. return text[start_idx:]
  271. def extract_fields_from_text(text):
  272. """
  273. 从文本中直接提取名片字段信息
  274. Args:
  275. text (str): 要分析的文本
  276. Returns:
  277. dict: 提取的字段
  278. """
  279. # 初始化结果字典
  280. result = {
  281. 'name_zh': '',
  282. 'name_en': '',
  283. 'title_zh': '',
  284. 'title_en': '',
  285. 'mobile': '',
  286. 'phone': '',
  287. 'email': '',
  288. 'hotel_zh': '',
  289. 'hotel_en': '',
  290. 'address_zh': '',
  291. 'address_en': '',
  292. 'postal_code_zh': '',
  293. 'postal_code_en': '',
  294. 'brand_zh': '',
  295. 'brand_en': '',
  296. 'affiliation_zh': '',
  297. 'affiliation_en': ''
  298. }
  299. # 提取中文姓名
  300. name_zh_match = re.search(r'["\'](姓名)["\'][\s\{:]*["\']?(中文)["\']?[\s\}:]*["\']([^"\']+)["\']', text, re.IGNORECASE)
  301. if name_zh_match:
  302. result['name_zh'] = name_zh_match.group(3)
  303. # 提取英文姓名
  304. name_en_match = re.search(r'["\'](姓名)["\'][\s\{:]*["\']?(英文)["\']?[\s\}:]*["\']([^"\']+)["\']', text, re.IGNORECASE)
  305. if name_en_match:
  306. result['name_en'] = name_en_match.group(3)
  307. # 提取中文头衔
  308. title_zh_match = re.search(r'["\'](头衔|职位)["\'][\s\{:]*["\']?(中文)["\']?[\s\}:]*["\']([^"\']+)["\']', text, re.IGNORECASE)
  309. if title_zh_match:
  310. result['title_zh'] = title_zh_match.group(3)
  311. # 提取英文头衔
  312. title_en_match = re.search(r'["\'](头衔|职位)["\'][\s\{:]*["\']?(英文)["\']?[\s\}:]*["\']([^"\']+)["\']', text, re.IGNORECASE)
  313. if title_en_match:
  314. result['title_en'] = title_en_match.group(3)
  315. # 提取手机
  316. mobile_match = re.search(r'["\'](手机)["\'][\s:]*["\']([^"\']+)["\']', text, re.IGNORECASE)
  317. if mobile_match:
  318. result['mobile'] = mobile_match.group(2)
  319. # 提取电话
  320. phone_match = re.search(r'["\'](电话)["\'][\s:]*["\']([^"\']+)["\']', text, re.IGNORECASE)
  321. if phone_match:
  322. result['phone'] = phone_match.group(2)
  323. # 提取邮箱
  324. email_match = re.search(r'["\'](邮箱)["\'][\s:]*["\']([^"\']+)["\']', text, re.IGNORECASE)
  325. if email_match:
  326. result['email'] = email_match.group(2)
  327. # 提取中文酒店名称
  328. hotel_zh_match = re.search(r'["\'](地址)["\'][\s\{:]*["\']?(中文)["\']?[\s\{:]*["\']?(酒店名称)["\']?[\s\}:]*["\']([^"\']+)["\']', text, re.IGNORECASE)
  329. if hotel_zh_match:
  330. result['hotel_zh'] = hotel_zh_match.group(4)
  331. # 提取英文酒店名称
  332. hotel_en_match = re.search(r'["\'](地址)["\'][\s\{:]*["\']?(英文)["\']?[\s\{:]*["\']?(酒店名称)["\']?[\s\}:]*["\']([^"\']+)["\']', text, re.IGNORECASE)
  333. if hotel_en_match:
  334. result['hotel_en'] = hotel_en_match.group(4)
  335. # 提取中文详细地址
  336. address_zh_match = re.search(r'["\'](地址)["\'][\s\{:]*["\']?(中文)["\']?[\s\{:]*["\']?(详细地址)["\']?[\s\}:]*["\']([^"\']+)["\']', text, re.IGNORECASE)
  337. if address_zh_match:
  338. result['address_zh'] = address_zh_match.group(4)
  339. # 提取英文详细地址
  340. address_en_match = re.search(r'["\'](地址)["\'][\s\{:]*["\']?(英文)["\']?[\s\{:]*["\']?(详细地址)["\']?[\s\}:]*["\']([^"\']+)["\']', text, re.IGNORECASE)
  341. if address_en_match:
  342. result['address_en'] = address_en_match.group(4)
  343. # 提取中文邮政编码
  344. postal_code_zh_match = re.search(r'["\'](地址)["\'][\s\{:]*["\']?(中文)["\']?[\s\{:]*["\']?(邮政编码)["\']?[\s\}:]*["\']([^"\']+)["\']', text, re.IGNORECASE)
  345. if postal_code_zh_match:
  346. result['postal_code_zh'] = postal_code_zh_match.group(4)
  347. # 提取英文邮政编码
  348. postal_code_en_match = re.search(r'["\'](地址)["\'][\s\{:]*["\']?(英文)["\']?[\s\{:]*["\']?(邮政编码)["\']?[\s\}:]*["\']([^"\']+)["\']', text, re.IGNORECASE)
  349. if postal_code_en_match:
  350. result['postal_code_en'] = postal_code_en_match.group(4)
  351. # 提取中文品牌名称
  352. brand_zh_match = re.search(r'["\'](公司)["\'][\s\{:]*["\']?(中文)["\']?[\s\{:]*["\']?(品牌名称)["\']?[\s\}:]*["\']([^"\']+)["\']', text, re.IGNORECASE)
  353. if brand_zh_match:
  354. result['brand_zh'] = brand_zh_match.group(4)
  355. # 提取英文品牌名称
  356. brand_en_match = re.search(r'["\'](公司)["\'][\s\{:]*["\']?(英文)["\']?[\s\{:]*["\']?(品牌名称)["\']?[\s\}:]*["\']([^"\']+)["\']', text, re.IGNORECASE)
  357. if brand_en_match:
  358. result['brand_en'] = brand_en_match.group(4)
  359. # 提取中文隶属关系
  360. affiliation_zh_match = re.search(r'["\'](公司)["\'][\s\{:]*["\']?(中文)["\']?[\s\{:]*["\']?(隶属关系)["\']?[\s\}:]*["\']([^"\']+)["\']', text, re.IGNORECASE)
  361. if affiliation_zh_match:
  362. result['affiliation_zh'] = affiliation_zh_match.group(4)
  363. # 提取英文隶属关系
  364. affiliation_en_match = re.search(r'["\'](公司)["\'][\s\{:]*["\']?(英文)["\']?[\s\{:]*["\']?(隶属关系)["\']?[\s\}:]*["\']([^"\']+)["\']', text, re.IGNORECASE)
  365. if affiliation_en_match:
  366. result['affiliation_en'] = affiliation_en_match.group(4)
  367. return result
  368. def parse_text_with_qwen25VLplus(image_data):
  369. """
  370. 使用阿里云的 Qwen 2.5 VL Plus 模型解析图像中的名片信息
  371. Args:
  372. image_data (bytes): 图像的二进制数据
  373. Returns:
  374. dict: 解析的名片信息
  375. """
  376. # 阿里云 Qwen API 配置
  377. QWEN_API_KEY = os.environ.get('QWEN_API_KEY', 'sk-8f2320dafc9e4076968accdd8eebd8e9')
  378. try:
  379. # 将图片数据转为 base64 编码
  380. base64_image = base64.b64encode(image_data).decode('utf-8')
  381. # 初始化 OpenAI 客户端,配置为阿里云 API
  382. client = OpenAI(
  383. api_key=QWEN_API_KEY,
  384. base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
  385. )
  386. # 构建提示语
  387. prompt = """请分析这张名片,提取以下信息,需分别识别中英文内容:
  388. 1. 中文姓名 (name_zh)
  389. 2. 英文姓名 (name_en)
  390. 3. 中文职位/头衔 (title_zh)
  391. 4. 英文职位/头衔 (title_en)
  392. 5. 中文酒店/公司名称 (hotel_zh)
  393. 6. 英文酒店/公司名称 (hotel_en)
  394. 7. 手机号码 (mobile)
  395. 8. 固定电话 (phone)
  396. 9. 电子邮箱 (email)
  397. 10. 中文地址 (address_zh)
  398. 11. 英文地址 (address_en)
  399. 12. 中文邮政编码 (postal_code_zh)
  400. 13. 英文邮政编码 (postal_code_en)
  401. 请以JSON格式返回结果,不要有多余的文字说明。每个字段如果名片中没有相应信息,返回空字符串。"""
  402. # 调用 Qwen 2.5 VL Plus API
  403. logging.info("发送请求到 Qwen 2.5 VL Plus 模型")
  404. completion = client.chat.completions.create(
  405. model="qwen-vl-plus",
  406. messages=[
  407. {
  408. "role": "user",
  409. "content": [
  410. {"type": "text", "text": prompt},
  411. {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
  412. ]
  413. }
  414. ]
  415. )
  416. # 解析响应
  417. response_content = completion.choices[0].message.content
  418. logging.info(f"成功从 Qwen 模型获取响应: {response_content[:100]}...")
  419. # 尝试从响应中提取 JSON
  420. try:
  421. json_content = extract_json_from_text(response_content)
  422. extracted_data = json.loads(json_content)
  423. logging.info("成功解析 Qwen 响应中的 JSON")
  424. except json.JSONDecodeError:
  425. logging.warning("无法解析 JSON,尝试从文本中提取信息")
  426. extracted_data = extract_fields_from_text(response_content)
  427. # 确保所有必要字段存在
  428. required_fields = [
  429. 'name_zh', 'name_en', 'title_zh', 'title_en',
  430. 'hotel_zh', 'hotel_en', 'mobile', 'phone',
  431. 'email', 'address_zh', 'address_en',
  432. 'postal_code_zh', 'postal_code_en'
  433. ]
  434. for field in required_fields:
  435. if field not in extracted_data:
  436. extracted_data[field] = ""
  437. return extracted_data
  438. except Exception as e:
  439. error_msg = f"Qwen 2.5 VL Plus 模型解析失败: {str(e)}"
  440. logging.error(error_msg, exc_info=True)
  441. raise Exception(error_msg)
  442. def process_business_card(image_file):
  443. """
  444. 处理名片图片并提取信息
  445. Args:
  446. image_file (FileStorage): 上传的名片图片文件
  447. Returns:
  448. dict: 处理结果,包含提取的信息和状态
  449. """
  450. minio_path = None
  451. try:
  452. # 读取图片数据
  453. image_data = image_file.read()
  454. image_file.seek(0) # 重置文件指针以便后续读取
  455. try:
  456. # 优先使用 Qwen 2.5 VL Plus 模型直接从图像提取信息
  457. try:
  458. logging.info("尝试使用 Qwen 2.5 VL Plus 模型解析名片")
  459. extracted_data = parse_text_with_qwen25VLplus(image_data)
  460. logging.info("成功使用 Qwen 2.5 VL Plus 模型解析名片")
  461. except Exception as qwen_error:
  462. logging.warning(f"Qwen 模型解析失败,错误原因: {str(qwen_error)}")
  463. # extracted_data = extract_text_from_image(image_data)
  464. except Exception as e:
  465. return {
  466. 'success': False,
  467. 'message': f"名片解析失败: {str(e)}",
  468. 'data': None
  469. }
  470. try:
  471. # 生成唯一的文件名
  472. file_ext = os.path.splitext(image_file.filename)[1].lower()
  473. if not file_ext:
  474. file_ext = '.jpg' # 默认扩展名
  475. unique_filename = f"{uuid.uuid4().hex}{file_ext}"
  476. minio_path = f"{unique_filename}"
  477. # 尝试上传到MinIO
  478. minio_client = get_minio_client()
  479. if minio_client:
  480. minio_client.put_object(
  481. Bucket=MINIO_BUCKET,
  482. Key=minio_path,
  483. Body=image_file,
  484. ContentType=image_file.content_type
  485. )
  486. logging.info(f"图片已上传到MinIO: {minio_path}")
  487. # 只存储相对路径,不存储完整URL
  488. # 完整URL会由前端通过API代理访问
  489. logging.info(f"存储图片路径: {minio_path}")
  490. else:
  491. minio_path = None
  492. logging.warning("MinIO客户端未初始化,图片未上传")
  493. except Exception as e:
  494. logging.error(f"上传图片到MinIO失败: {str(e)}", exc_info=True)
  495. minio_path = None
  496. try:
  497. # 保存到数据库
  498. business_card = BusinessCard(
  499. name_zh=extracted_data.get('name_zh', ''),
  500. name_en=extracted_data.get('name_en', ''),
  501. title_zh=extracted_data.get('title_zh', ''),
  502. title_en=extracted_data.get('title_en', ''),
  503. mobile=extracted_data.get('mobile', ''),
  504. phone=extracted_data.get('phone', ''),
  505. email=extracted_data.get('email', ''),
  506. hotel_zh=extracted_data.get('hotel_zh', ''),
  507. hotel_en=extracted_data.get('hotel_en', ''),
  508. address_zh=extracted_data.get('address_zh', ''),
  509. address_en=extracted_data.get('address_en', ''),
  510. postal_code_zh=extracted_data.get('postal_code_zh', ''),
  511. postal_code_en=extracted_data.get('postal_code_en', ''),
  512. brand_zh=extracted_data.get('brand_zh', ''),
  513. brand_en=extracted_data.get('brand_en', ''),
  514. affiliation_zh=extracted_data.get('affiliation_zh', ''),
  515. affiliation_en=extracted_data.get('affiliation_en', ''),
  516. image_path=minio_path, # 存储相对路径
  517. status='active',
  518. updated_by='system'
  519. )
  520. db.session.add(business_card)
  521. db.session.commit()
  522. logging.info(f"名片信息已保存到数据库,ID: {business_card.id}")
  523. return {
  524. 'success': True,
  525. 'message': '名片解析成功',
  526. 'data': business_card.to_dict()
  527. }
  528. except Exception as e:
  529. db.session.rollback()
  530. error_msg = f"保存名片信息到数据库失败: {str(e)}"
  531. logging.error(error_msg, exc_info=True)
  532. # 即使数据库操作失败,仍返回提取的信息
  533. return {
  534. 'success': False,
  535. 'message': error_msg,
  536. 'data': {
  537. 'id': None,
  538. 'name_zh': extracted_data.get('name_zh', ''),
  539. 'name_en': extracted_data.get('name_en', ''),
  540. 'title_zh': extracted_data.get('title_zh', ''),
  541. 'title_en': extracted_data.get('title_en', ''),
  542. 'mobile': extracted_data.get('mobile', ''),
  543. 'phone': extracted_data.get('phone', ''),
  544. 'email': extracted_data.get('email', ''),
  545. 'hotel_zh': extracted_data.get('hotel_zh', ''),
  546. 'hotel_en': extracted_data.get('hotel_en', ''),
  547. 'address_zh': extracted_data.get('address_zh', ''),
  548. 'address_en': extracted_data.get('address_en', ''),
  549. 'postal_code_zh': extracted_data.get('postal_code_zh', ''),
  550. 'postal_code_en': extracted_data.get('postal_code_en', ''),
  551. 'brand_zh': extracted_data.get('brand_zh', ''),
  552. 'brand_en': extracted_data.get('brand_en', ''),
  553. 'affiliation_zh': extracted_data.get('affiliation_zh', ''),
  554. 'affiliation_en': extracted_data.get('affiliation_en', ''),
  555. 'image_path': minio_path, # 返回相对路径
  556. 'created_at': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
  557. 'updated_at': None,
  558. 'updated_by': 'system',
  559. 'status': 'active'
  560. }
  561. }
  562. except Exception as e:
  563. db.session.rollback()
  564. error_msg = f"名片处理失败: {str(e)}"
  565. logging.error(error_msg, exc_info=True)
  566. return {
  567. 'success': False,
  568. 'message': error_msg,
  569. 'data': None
  570. }
  571. def update_business_card(card_id, data):
  572. """
  573. 更新名片信息
  574. Args:
  575. card_id (int): 名片记录ID
  576. data (dict): 包含要更新的字段的字典
  577. Returns:
  578. dict: 包含操作结果和更新后的名片信息
  579. """
  580. try:
  581. # 查找要更新的名片记录
  582. card = BusinessCard.query.get(card_id)
  583. if not card:
  584. return {
  585. 'success': False,
  586. 'message': f'未找到ID为{card_id}的名片记录',
  587. 'data': None
  588. }
  589. # 更新名片信息
  590. card.name_zh = data.get('name_zh', card.name_zh)
  591. card.name_en = data.get('name_en', card.name_en)
  592. card.title_zh = data.get('title_zh', card.title_zh)
  593. card.title_en = data.get('title_en', card.title_en)
  594. card.mobile = data.get('mobile', card.mobile)
  595. card.phone = data.get('phone', card.phone)
  596. card.email = data.get('email', card.email)
  597. card.hotel_zh = data.get('hotel_zh', card.hotel_zh)
  598. card.hotel_en = data.get('hotel_en', card.hotel_en)
  599. card.address_zh = data.get('address_zh', card.address_zh)
  600. card.address_en = data.get('address_en', card.address_en)
  601. card.postal_code_zh = data.get('postal_code_zh', card.postal_code_zh)
  602. card.postal_code_en = data.get('postal_code_en', card.postal_code_en)
  603. card.brand_zh = data.get('brand_zh', card.brand_zh)
  604. card.brand_en = data.get('brand_en', card.brand_en)
  605. card.affiliation_zh = data.get('affiliation_zh', card.affiliation_zh)
  606. card.affiliation_en = data.get('affiliation_en', card.affiliation_en)
  607. card.updated_by = data.get('updated_by', 'user') # 可以根据实际情况修改为当前用户
  608. # 保存更新
  609. db.session.commit()
  610. return {
  611. 'success': True,
  612. 'message': '名片信息已更新',
  613. 'data': card.to_dict()
  614. }
  615. except Exception as e:
  616. db.session.rollback()
  617. error_msg = f"更新名片信息失败: {str(e)}"
  618. logging.error(error_msg, exc_info=True)
  619. return {
  620. 'success': False,
  621. 'message': error_msg,
  622. 'data': None
  623. }
  624. def get_business_cards():
  625. """
  626. 获取所有名片记录列表
  627. Returns:
  628. dict: 包含操作结果和名片列表
  629. """
  630. try:
  631. # 查询所有名片记录
  632. cards = BusinessCard.query.all()
  633. # 将所有记录转换为字典格式
  634. cards_data = [card.to_dict() for card in cards]
  635. return {
  636. 'success': True,
  637. 'message': '获取名片列表成功',
  638. 'data': cards_data
  639. }
  640. except Exception as e:
  641. error_msg = f"获取名片列表失败: {str(e)}"
  642. logging.error(error_msg, exc_info=True)
  643. return {
  644. 'success': False,
  645. 'message': error_msg,
  646. 'data': []
  647. }
  648. def update_business_card_status(card_id, status):
  649. """
  650. 更新名片状态(激活/禁用)
  651. Args:
  652. card_id (int): 名片记录ID
  653. status (str): 新状态,'active'或'inactive'
  654. Returns:
  655. dict: 包含操作结果和更新后的名片信息
  656. """
  657. try:
  658. # 查找要更新的名片记录
  659. card = BusinessCard.query.get(card_id)
  660. if not card:
  661. return {
  662. 'success': False,
  663. 'message': f'未找到ID为{card_id}的名片记录',
  664. 'data': None
  665. }
  666. # 验证状态值
  667. if status not in ['active', 'inactive']:
  668. return {
  669. 'success': False,
  670. 'message': f'无效的状态值: {status},必须为 active 或 inactive',
  671. 'data': None
  672. }
  673. # 更新状态
  674. card.status = status
  675. card.updated_at = datetime.now()
  676. card.updated_by = 'system' # 可以根据实际情况修改为当前用户
  677. # 保存更新
  678. db.session.commit()
  679. return {
  680. 'success': True,
  681. 'message': f'名片状态已更新为: {status}',
  682. 'data': card.to_dict()
  683. }
  684. except Exception as e:
  685. db.session.rollback()
  686. error_msg = f"更新名片状态失败: {str(e)}"
  687. logging.error(error_msg, exc_info=True)
  688. return {
  689. 'success': False,
  690. 'message': error_msg,
  691. 'data': None
  692. }