parse.py 113 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080
  1. from typing import Dict, Any
  2. from app import db
  3. from datetime import datetime
  4. import os
  5. import boto3
  6. from botocore.config import Config
  7. import logging
  8. import requests
  9. import json
  10. import re
  11. import uuid
  12. from PIL import Image
  13. from io import BytesIO
  14. import pytesseract
  15. import base64
  16. from openai import OpenAI
  17. from app.config.config import DevelopmentConfig, ProductionConfig
  18. # 名片解析数据模型
  19. class BusinessCard(db.Model):
  20. __tablename__ = 'business_cards'
  21. id = db.Column(db.Integer, primary_key=True, autoincrement=True)
  22. name_zh = db.Column(db.String(100), nullable=False)
  23. name_en = db.Column(db.String(100))
  24. title_zh = db.Column(db.String(100))
  25. title_en = db.Column(db.String(100))
  26. mobile = db.Column(db.String(50))
  27. phone = db.Column(db.String(50))
  28. email = db.Column(db.String(100))
  29. hotel_zh = db.Column(db.String(200))
  30. hotel_en = db.Column(db.String(200))
  31. address_zh = db.Column(db.Text)
  32. address_en = db.Column(db.Text)
  33. postal_code_zh = db.Column(db.String(20))
  34. postal_code_en = db.Column(db.String(20))
  35. brand_zh = db.Column(db.String(100))
  36. brand_en = db.Column(db.String(100))
  37. affiliation_zh = db.Column(db.String(200))
  38. affiliation_en = db.Column(db.String(200))
  39. birthday = db.Column(db.Date) # 生日,存储年月日
  40. residence = db.Column(db.Text) # 居住地
  41. image_path = db.Column(db.String(255)) # MinIO中存储的路径
  42. career_path = db.Column(db.JSON) # 职业轨迹,JSON格式
  43. brand_group = db.Column(db.String(200)) # 品牌组合
  44. created_at = db.Column(db.DateTime, default=datetime.now, nullable=False)
  45. updated_at = db.Column(db.DateTime, onupdate=datetime.now)
  46. updated_by = db.Column(db.String(50))
  47. status = db.Column(db.String(20), default='active')
  48. def to_dict(self):
  49. return {
  50. 'id': self.id,
  51. 'name_zh': self.name_zh,
  52. 'name_en': self.name_en,
  53. 'title_zh': self.title_zh,
  54. 'title_en': self.title_en,
  55. 'mobile': self.mobile,
  56. 'phone': self.phone,
  57. 'email': self.email,
  58. 'hotel_zh': self.hotel_zh,
  59. 'hotel_en': self.hotel_en,
  60. 'address_zh': self.address_zh,
  61. 'address_en': self.address_en,
  62. 'postal_code_zh': self.postal_code_zh,
  63. 'postal_code_en': self.postal_code_en,
  64. 'brand_zh': self.brand_zh,
  65. 'brand_en': self.brand_en,
  66. 'affiliation_zh': self.affiliation_zh,
  67. 'affiliation_en': self.affiliation_en,
  68. 'birthday': self.birthday.strftime('%Y-%m-%d') if self.birthday else None,
  69. 'residence': self.residence,
  70. 'image_path': self.image_path,
  71. 'career_path': self.career_path,
  72. 'brand_group': self.brand_group,
  73. 'created_at': self.created_at.strftime('%Y-%m-%d %H:%M:%S') if self.created_at else None,
  74. 'updated_at': self.updated_at.strftime('%Y-%m-%d %H:%M:%S') if self.updated_at else None,
  75. 'updated_by': self.updated_by,
  76. 'status': self.status
  77. }
  78. # 重复名片处理数据模型
  79. class DuplicateBusinessCard(db.Model):
  80. __tablename__ = 'duplicate_business_cards'
  81. id = db.Column(db.Integer, primary_key=True, autoincrement=True)
  82. main_card_id = db.Column(db.Integer, db.ForeignKey('business_cards.id'), nullable=False) # 新创建的主记录ID
  83. suspected_duplicates = db.Column(db.JSON, nullable=False) # 疑似重复记录列表,JSON格式
  84. duplicate_reason = db.Column(db.String(200), nullable=False) # 重复原因
  85. processing_status = db.Column(db.String(20), default='pending') # 处理状态:pending/processed/ignored
  86. created_at = db.Column(db.DateTime, default=datetime.now, nullable=False)
  87. processed_at = db.Column(db.DateTime) # 处理时间
  88. processed_by = db.Column(db.String(50)) # 处理人
  89. processing_notes = db.Column(db.Text) # 处理备注
  90. # 关联主记录
  91. main_card = db.relationship('BusinessCard', backref=db.backref('as_main_duplicate_records', lazy=True))
  92. def to_dict(self):
  93. return {
  94. 'id': self.id,
  95. 'main_card_id': self.main_card_id,
  96. 'suspected_duplicates': self.suspected_duplicates,
  97. 'duplicate_reason': self.duplicate_reason,
  98. 'processing_status': self.processing_status,
  99. 'created_at': self.created_at.strftime('%Y-%m-%d %H:%M:%S') if self.created_at else None,
  100. 'processed_at': self.processed_at.strftime('%Y-%m-%d %H:%M:%S') if self.processed_at else None,
  101. 'processed_by': self.processed_by,
  102. 'processing_notes': self.processing_notes
  103. }
  104. # 名片解析功能模块
  105. def check_duplicate_business_card(extracted_data):
  106. """
  107. 检查是否存在重复的名片记录
  108. Args:
  109. extracted_data (dict): 提取的名片信息
  110. Returns:
  111. dict: 包含检查结果的字典,格式为:
  112. {
  113. 'is_duplicate': bool,
  114. 'action': str, # 'update', 'create_with_duplicates' 或 'create_new'
  115. 'existing_card': BusinessCard 或 None,
  116. 'suspected_duplicates': list, # 疑似重复记录列表
  117. 'reason': str
  118. }
  119. """
  120. try:
  121. # 获取提取的中文姓名和手机号码
  122. name_zh = extracted_data.get('name_zh', '').strip()
  123. mobile = extracted_data.get('mobile', '').strip()
  124. if not name_zh:
  125. return {
  126. 'is_duplicate': False,
  127. 'action': 'create_new',
  128. 'existing_card': None,
  129. 'suspected_duplicates': [],
  130. 'reason': '无中文姓名,创建新记录'
  131. }
  132. # 查找具有相同中文姓名的记录
  133. existing_cards = BusinessCard.query.filter_by(name_zh=name_zh).all()
  134. if not existing_cards:
  135. return {
  136. 'is_duplicate': False,
  137. 'action': 'create_new',
  138. 'existing_card': None,
  139. 'suspected_duplicates': [],
  140. 'reason': '未找到同名记录,创建新记录'
  141. }
  142. # 如果找到同名记录,进一步检查手机号码
  143. if mobile:
  144. # 有手机号码的情况
  145. for existing_card in existing_cards:
  146. existing_mobile = existing_card.mobile.strip() if existing_card.mobile else ''
  147. if existing_mobile == mobile:
  148. # 手机号码相同,更新现有记录
  149. return {
  150. 'is_duplicate': True,
  151. 'action': 'update',
  152. 'existing_card': existing_card,
  153. 'suspected_duplicates': [],
  154. 'reason': f'姓名和手机号码均相同:{name_zh} - {mobile}'
  155. }
  156. # 有手机号码但与现有记录不匹配,创建新记录并标记疑似重复
  157. suspected_list = []
  158. for card in existing_cards:
  159. suspected_list.append({
  160. 'id': card.id,
  161. 'name_zh': card.name_zh,
  162. 'name_en': card.name_en,
  163. 'mobile': card.mobile,
  164. 'hotel_zh': card.hotel_zh,
  165. 'hotel_en': card.hotel_en,
  166. 'title_zh': card.title_zh,
  167. 'title_en': card.title_en,
  168. 'created_at': card.created_at.strftime('%Y-%m-%d %H:%M:%S') if card.created_at else None
  169. })
  170. return {
  171. 'is_duplicate': True,
  172. 'action': 'create_with_duplicates',
  173. 'existing_card': None,
  174. 'suspected_duplicates': suspected_list,
  175. 'reason': f'姓名相同但手机号码不同:{name_zh},新手机号:{mobile},发现{len(suspected_list)}条疑似重复记录'
  176. }
  177. else:
  178. # 无手机号码的情况,创建新记录并标记疑似重复
  179. suspected_list = []
  180. for card in existing_cards:
  181. suspected_list.append({
  182. 'id': card.id,
  183. 'name_zh': card.name_zh,
  184. 'name_en': card.name_en,
  185. 'mobile': card.mobile,
  186. 'hotel_zh': card.hotel_zh,
  187. 'hotel_en': card.hotel_en,
  188. 'title_zh': card.title_zh,
  189. 'title_en': card.title_en,
  190. 'created_at': card.created_at.strftime('%Y-%m-%d %H:%M:%S') if card.created_at else None
  191. })
  192. return {
  193. 'is_duplicate': True,
  194. 'action': 'create_with_duplicates',
  195. 'existing_card': None,
  196. 'suspected_duplicates': suspected_list,
  197. 'reason': f'姓名相同但新记录无手机号码可比较:{name_zh},发现{len(suspected_list)}条疑似重复记录'
  198. }
  199. except Exception as e:
  200. logging.error(f"检查重复记录时发生错误: {str(e)}", exc_info=True)
  201. return {
  202. 'is_duplicate': False,
  203. 'action': 'create_new',
  204. 'existing_card': None,
  205. 'suspected_duplicates': [],
  206. 'reason': f'检查过程出错,创建新记录: {str(e)}'
  207. }
  208. def update_career_path(existing_card, new_data, image_path=None):
  209. """
  210. 更新职业轨迹信息
  211. Args:
  212. existing_card (BusinessCard): 现有名片记录
  213. new_data (dict): 新的名片信息
  214. image_path (str, optional): 对应的图片路径
  215. Returns:
  216. list: 更新后的职业轨迹
  217. """
  218. try:
  219. # 获取现有的职业轨迹
  220. career_path = existing_card.career_path if existing_card.career_path else []
  221. # 准备新的职业轨迹条目
  222. new_entry = {
  223. 'date': datetime.now().strftime('%Y-%m-%d'),
  224. 'hotel_zh': new_data.get('hotel_zh', ''),
  225. 'hotel_en': new_data.get('hotel_en', ''),
  226. 'title_zh': new_data.get('title_zh', ''),
  227. 'title_en': new_data.get('title_en', ''),
  228. 'image_path': image_path or '', # 添加图片路径
  229. 'source': 'business_card_update'
  230. }
  231. # 检查是否已存在相似的条目(避免重复添加)
  232. is_duplicate_entry = False
  233. for entry in career_path:
  234. if (entry.get('hotel_zh') == new_entry['hotel_zh'] and
  235. entry.get('title_zh') == new_entry['title_zh'] and
  236. entry.get('date') == new_entry['date']):
  237. is_duplicate_entry = True
  238. break
  239. if not is_duplicate_entry:
  240. career_path.append(new_entry)
  241. logging.info(f"为名片ID {existing_card.id} 添加了新的职业轨迹条目,包含图片路径: {image_path}")
  242. else:
  243. logging.info(f"名片ID {existing_card.id} 的职业轨迹条目已存在,跳过添加")
  244. return career_path
  245. except Exception as e:
  246. logging.error(f"更新职业轨迹时发生错误: {str(e)}", exc_info=True)
  247. return existing_card.career_path if existing_card.career_path else []
  248. def create_main_card_with_duplicates(extracted_data, minio_path, suspected_duplicates, reason):
  249. """
  250. 创建新的主记录并保存疑似重复记录信息
  251. Args:
  252. extracted_data (dict): 提取的新名片信息
  253. minio_path (str): 新图片的MinIO路径
  254. suspected_duplicates (list): 疑似重复记录列表
  255. reason (str): 重复原因
  256. Returns:
  257. tuple: (main_card, duplicate_record) 主记录和重复记录信息
  258. """
  259. try:
  260. # 1. 先创建主记录
  261. # 准备初始职业轨迹,包含当前名片信息和图片路径
  262. initial_career_path = extracted_data.get('career_path', [])
  263. if extracted_data.get('hotel_zh') or extracted_data.get('hotel_en') or extracted_data.get('title_zh') or extracted_data.get('title_en'):
  264. initial_entry = {
  265. 'date': datetime.now().strftime('%Y-%m-%d'),
  266. 'hotel_zh': extracted_data.get('hotel_zh', ''),
  267. 'hotel_en': extracted_data.get('hotel_en', ''),
  268. 'title_zh': extracted_data.get('title_zh', ''),
  269. 'title_en': extracted_data.get('title_en', ''),
  270. 'image_path': minio_path or '', # 当前名片的图片路径
  271. 'source': 'business_card_creation'
  272. }
  273. initial_career_path.append(initial_entry)
  274. main_card = BusinessCard(
  275. name_zh=extracted_data.get('name_zh', ''),
  276. name_en=extracted_data.get('name_en', ''),
  277. title_zh=extracted_data.get('title_zh', ''),
  278. title_en=extracted_data.get('title_en', ''),
  279. mobile=extracted_data.get('mobile', ''),
  280. phone=extracted_data.get('phone', ''),
  281. email=extracted_data.get('email', ''),
  282. hotel_zh=extracted_data.get('hotel_zh', ''),
  283. hotel_en=extracted_data.get('hotel_en', ''),
  284. address_zh=extracted_data.get('address_zh', ''),
  285. address_en=extracted_data.get('address_en', ''),
  286. postal_code_zh=extracted_data.get('postal_code_zh', ''),
  287. postal_code_en=extracted_data.get('postal_code_en', ''),
  288. brand_zh=extracted_data.get('brand_zh', ''),
  289. brand_en=extracted_data.get('brand_en', ''),
  290. affiliation_zh=extracted_data.get('affiliation_zh', ''),
  291. affiliation_en=extracted_data.get('affiliation_en', ''),
  292. birthday=datetime.strptime(extracted_data.get('birthday'), '%Y-%m-%d').date() if extracted_data.get('birthday') else None,
  293. residence=extracted_data.get('residence', ''),
  294. image_path=minio_path, # 最新的图片路径
  295. career_path=initial_career_path, # 包含图片路径的职业轨迹
  296. brand_group=extracted_data.get('brand_group', ''),
  297. status='active',
  298. updated_by='system'
  299. )
  300. db.session.add(main_card)
  301. db.session.flush() # 获取主记录的ID
  302. # 2. 创建重复记录信息
  303. duplicate_record = DuplicateBusinessCard(
  304. main_card_id=main_card.id,
  305. suspected_duplicates=suspected_duplicates,
  306. duplicate_reason=reason,
  307. processing_status='pending'
  308. )
  309. db.session.add(duplicate_record)
  310. db.session.commit()
  311. logging.info(f"已创建主记录(ID: {main_card.id})并保存{len(suspected_duplicates)}条疑似重复记录信息(重复记录ID: {duplicate_record.id})")
  312. return main_card, duplicate_record
  313. except Exception as e:
  314. db.session.rollback()
  315. logging.error(f"创建主记录和重复记录信息失败: {str(e)}", exc_info=True)
  316. raise e
  317. # DeepSeek API配置
  318. DEEPSEEK_API_KEY = os.environ.get('DEEPSEEK_API_KEY', 'sk-2aea6e8b159b448aa3c1e29acd6f4349')
  319. DEEPSEEK_API_URL = os.environ.get('DEEPSEEK_API_URL', 'https://api.deepseek.com/v1/chat/completions')
  320. # 备用API端点
  321. DEEPSEEK_API_URL_BACKUP = 'https://api.deepseek.com/v1/completions'
  322. # OCR配置
  323. # 设置pytesseract路径(如果需要)
  324. # pytesseract.pytesseract.tesseract_cmd = r'/path/to/tesseract'
  325. # OCR语言设置,支持多语言
  326. OCR_LANG = os.environ.get('OCR_LANG', 'chi_sim+eng')
  327. # 根据环境选择配置
  328. """
  329. if os.environ.get('FLASK_ENV') == 'production':
  330. config = ProductionConfig()
  331. else:
  332. config = DevelopmentConfig()
  333. """
  334. # 使用配置变量,缺省认为在生产环境运行
  335. config = ProductionConfig()
  336. # 使用配置变量
  337. minio_url = f"{'https' if config.MINIO_SECURE else 'http'}://{config.MINIO_HOST}"
  338. minio_access_key = config.MINIO_USER
  339. minio_secret_key = config.MINIO_PASSWORD
  340. minio_bucket = config.MINIO_BUCKET
  341. use_ssl = config.MINIO_SECURE
  342. def get_minio_client():
  343. """获取MinIO客户端连接"""
  344. try:
  345. # 使用全局配置变量
  346. global minio_url, minio_access_key, minio_secret_key, minio_bucket, use_ssl
  347. logging.info(f"尝试连接MinIO服务器: {minio_url}")
  348. minio_client = boto3.client(
  349. 's3',
  350. endpoint_url=minio_url,
  351. aws_access_key_id=minio_access_key,
  352. aws_secret_access_key=minio_secret_key,
  353. config=Config(
  354. signature_version='s3v4',
  355. retries={'max_attempts': 3, 'mode': 'standard'},
  356. connect_timeout=10,
  357. read_timeout=30
  358. )
  359. )
  360. # 确保存储桶存在
  361. buckets = minio_client.list_buckets()
  362. bucket_names = [bucket['Name'] for bucket in buckets.get('Buckets', [])]
  363. logging.info(f"成功连接到MinIO服务器,现有存储桶: {bucket_names}")
  364. if minio_bucket not in bucket_names:
  365. logging.info(f"创建存储桶: {minio_bucket}")
  366. minio_client.create_bucket(Bucket=minio_bucket)
  367. return minio_client
  368. except Exception as e:
  369. logging.error(f"MinIO连接错误: {str(e)}")
  370. return None
  371. def extract_text_from_image(image_data):
  372. """
  373. 使用OCR从图像中提取文本,然后通过DeepSeek API解析名片信息
  374. Args:
  375. image_data (bytes): 图像的二进制数据
  376. Returns:
  377. dict: 提取的信息(姓名、职位、公司等)
  378. Raises:
  379. Exception: 当OCR或API调用失败或配置错误时抛出异常
  380. """
  381. try:
  382. # 步骤1: 使用OCR从图像中提取文本
  383. ocr_text = ocr_extract_text(image_data)
  384. if not ocr_text or ocr_text.strip() == "":
  385. error_msg = "OCR无法从图像中提取文本"
  386. logging.error(error_msg)
  387. raise Exception(error_msg)
  388. logging.info(f"OCR提取的文本: {ocr_text[:200]}..." if len(ocr_text) > 200 else ocr_text)
  389. # 步骤2: 使用DeepSeek API解析文本中的信息
  390. return parse_text_with_deepseek(ocr_text)
  391. except Exception as e:
  392. error_msg = f"从图像中提取和解析文本失败: {str(e)}"
  393. logging.error(error_msg, exc_info=True)
  394. raise Exception(error_msg)
  395. def ocr_extract_text(image_data):
  396. """
  397. 使用OCR从图像中提取文本
  398. Args:
  399. image_data (bytes): 图像的二进制数据
  400. Returns:
  401. str: 提取的文本
  402. """
  403. try:
  404. # 将二进制数据转换为PIL图像
  405. image = Image.open(BytesIO(image_data))
  406. # 使用pytesseract进行OCR文本提取
  407. text = pytesseract.image_to_string(image, lang=OCR_LANG)
  408. # 清理提取的文本
  409. text = text.strip()
  410. logging.info(f"OCR成功从图像中提取文本,长度: {len(text)}")
  411. print(text)
  412. return text
  413. except Exception as e:
  414. error_msg = f"OCR提取文本失败: {str(e)}"
  415. logging.error(error_msg, exc_info=True)
  416. raise Exception(error_msg)
  417. def parse_text_with_deepseek(text):
  418. """
  419. 使用DeepSeek API解析文本中的名片信息
  420. Args:
  421. text (str): 要解析的文本
  422. Returns:
  423. dict: 解析的名片信息
  424. """
  425. # 准备请求DeepSeek API
  426. if not DEEPSEEK_API_KEY:
  427. error_msg = "未配置DeepSeek API密钥"
  428. logging.error(error_msg)
  429. raise Exception(error_msg)
  430. # 构建API请求的基本信息
  431. headers = {
  432. "Authorization": f"Bearer {DEEPSEEK_API_KEY}",
  433. "Content-Type": "application/json"
  434. }
  435. # 构建提示语,包含OCR提取的文本
  436. prompt = f"""请从以下名片文本中提取详细信息,需分别识别中英文内容。
  437. 以JSON格式返回,包含以下字段:
  438. - name_zh: 中文姓名
  439. - name_en: 英文姓名
  440. - title_zh: 中文职位/头衔
  441. - title_en: 英文职位/头衔
  442. - hotel_zh: 中文酒店/公司名称
  443. - hotel_en: 英文酒店/公司名称
  444. - mobile: 手机号码
  445. - phone: 固定电话
  446. - email: 电子邮箱
  447. - address_zh: 中文地址
  448. - address_en: 英文地址
  449. - brand_group: 品牌组合(如有多个品牌,以逗号分隔)
  450. - career_path: 职业轨迹(如果能从文本中推断出,以JSON数组格式返回,包含公司名称和职位)
  451. 名片文本:
  452. {text}
  453. """
  454. # 使用模型名称
  455. model_name = 'deepseek-chat'
  456. try:
  457. # 尝试调用DeepSeek API
  458. logging.info(f"尝试通过DeepSeek API解析文本")
  459. payload = {
  460. "model": model_name,
  461. "messages": [
  462. {"role": "system", "content": "你是一个专业的名片信息提取助手。请用JSON格式返回结果,不要有多余的文字说明。"},
  463. {"role": "user", "content": prompt}
  464. ],
  465. "temperature": 0.1
  466. }
  467. logging.info(f"向DeepSeek API发送请求")
  468. response = requests.post(DEEPSEEK_API_URL, headers=headers, json=payload, timeout=30)
  469. # 检查响应状态
  470. response.raise_for_status()
  471. # 解析API响应
  472. result = response.json()
  473. content = result.get("choices", [{}])[0].get("message", {}).get("content", "{}")
  474. # 尝试解析JSON内容
  475. try:
  476. # 找到内容中的JSON部分(有时模型会在JSON前后添加额外文本)
  477. json_content = extract_json_from_text(content)
  478. extracted_data = json.loads(json_content)
  479. logging.info(f"成功解析DeepSeek API返回的JSON")
  480. except json.JSONDecodeError:
  481. logging.warning(f"无法解析JSON,尝试直接从文本提取信息")
  482. # 如果无法解析JSON,尝试直接从文本中提取关键信息
  483. extracted_data = extract_fields_from_text(content)
  484. # 确保所有必要的字段都存在
  485. required_fields = ['name', 'title', 'company', 'phone', 'email', 'address', 'brand_group', 'career_path']
  486. for field in required_fields:
  487. if field not in extracted_data:
  488. extracted_data[field] = "" if field != 'career_path' else []
  489. logging.info(f"成功从DeepSeek API获取解析结果")
  490. return extracted_data
  491. except requests.exceptions.HTTPError as e:
  492. error_msg = f"DeepSeek API调用失败: {str(e)}"
  493. logging.error(error_msg)
  494. if hasattr(e, 'response') and e.response:
  495. logging.error(f"错误状态码: {e.response.status_code}")
  496. logging.error(f"错误内容: {e.response.text}")
  497. raise Exception(error_msg)
  498. except Exception as e:
  499. error_msg = f"解析文本过程中发生错误: {str(e)}"
  500. logging.error(error_msg, exc_info=True)
  501. raise Exception(error_msg)
  502. def extract_json_from_text(text):
  503. """
  504. 从文本中提取JSON部分
  505. Args:
  506. text (str): 包含JSON的文本
  507. Returns:
  508. str: 提取的JSON字符串
  509. """
  510. # 尝试找到最外层的花括号对
  511. start_idx = text.find('{')
  512. if start_idx == -1:
  513. return "{}"
  514. # 使用简单的括号匹配算法找到对应的闭合括号
  515. count = 0
  516. for i in range(start_idx, len(text)):
  517. if text[i] == '{':
  518. count += 1
  519. elif text[i] == '}':
  520. count -= 1
  521. if count == 0:
  522. return text[start_idx:i+1]
  523. # 如果没有找到闭合括号,返回从开始位置到文本结尾
  524. return text[start_idx:]
  525. def extract_fields_from_text(text):
  526. """
  527. 从文本中直接提取名片字段信息
  528. Args:
  529. text (str): 要分析的文本
  530. Returns:
  531. dict: 提取的字段
  532. """
  533. # 初始化结果字典
  534. result = {
  535. 'name_zh': '',
  536. 'name_en': '',
  537. 'title_zh': '',
  538. 'title_en': '',
  539. 'mobile': '',
  540. 'phone': '',
  541. 'email': '',
  542. 'hotel_zh': '',
  543. 'hotel_en': '',
  544. 'address_zh': '',
  545. 'address_en': '',
  546. 'postal_code_zh': '',
  547. 'postal_code_en': '',
  548. 'brand_zh': '',
  549. 'brand_en': '',
  550. 'affiliation_zh': '',
  551. 'affiliation_en': '',
  552. 'birthday': '',
  553. 'residence': ''
  554. }
  555. # 提取中文姓名
  556. name_zh_match = re.search(r'["\'](姓名)["\'][\s\{:]*["\']?(中文)["\']?[\s\}:]*["\']([^"\']+)["\']', text, re.IGNORECASE)
  557. if name_zh_match:
  558. result['name_zh'] = name_zh_match.group(3)
  559. # 提取英文姓名
  560. name_en_match = re.search(r'["\'](姓名)["\'][\s\{:]*["\']?(英文)["\']?[\s\}:]*["\']([^"\']+)["\']', text, re.IGNORECASE)
  561. if name_en_match:
  562. result['name_en'] = name_en_match.group(3)
  563. # 提取中文头衔
  564. title_zh_match = re.search(r'["\'](头衔|职位)["\'][\s\{:]*["\']?(中文)["\']?[\s\}:]*["\']([^"\']+)["\']', text, re.IGNORECASE)
  565. if title_zh_match:
  566. result['title_zh'] = title_zh_match.group(3)
  567. # 提取英文头衔
  568. title_en_match = re.search(r'["\'](头衔|职位)["\'][\s\{:]*["\']?(英文)["\']?[\s\}:]*["\']([^"\']+)["\']', text, re.IGNORECASE)
  569. if title_en_match:
  570. result['title_en'] = title_en_match.group(3)
  571. # 提取手机
  572. mobile_match = re.search(r'["\'](手机)["\'][\s:]*["\']([^"\']+)["\']', text, re.IGNORECASE)
  573. if mobile_match:
  574. result['mobile'] = mobile_match.group(2)
  575. # 提取电话
  576. phone_match = re.search(r'["\'](电话)["\'][\s:]*["\']([^"\']+)["\']', text, re.IGNORECASE)
  577. if phone_match:
  578. result['phone'] = phone_match.group(2)
  579. # 提取邮箱
  580. email_match = re.search(r'["\'](邮箱)["\'][\s:]*["\']([^"\']+)["\']', text, re.IGNORECASE)
  581. if email_match:
  582. result['email'] = email_match.group(2)
  583. # 提取中文酒店名称
  584. hotel_zh_match = re.search(r'["\'](地址)["\'][\s\{:]*["\']?(中文)["\']?[\s\{:]*["\']?(酒店名称)["\']?[\s\}:]*["\']([^"\']+)["\']', text, re.IGNORECASE)
  585. if hotel_zh_match:
  586. result['hotel_zh'] = hotel_zh_match.group(4)
  587. # 提取英文酒店名称
  588. hotel_en_match = re.search(r'["\'](地址)["\'][\s\{:]*["\']?(英文)["\']?[\s\{:]*["\']?(酒店名称)["\']?[\s\}:]*["\']([^"\']+)["\']', text, re.IGNORECASE)
  589. if hotel_en_match:
  590. result['hotel_en'] = hotel_en_match.group(4)
  591. # 提取中文详细地址
  592. address_zh_match = re.search(r'["\'](地址)["\'][\s\{:]*["\']?(中文)["\']?[\s\{:]*["\']?(详细地址)["\']?[\s\}:]*["\']([^"\']+)["\']', text, re.IGNORECASE)
  593. if address_zh_match:
  594. result['address_zh'] = address_zh_match.group(4)
  595. # 提取英文详细地址
  596. address_en_match = re.search(r'["\'](地址)["\'][\s\{:]*["\']?(英文)["\']?[\s\{:]*["\']?(详细地址)["\']?[\s\}:]*["\']([^"\']+)["\']', text, re.IGNORECASE)
  597. if address_en_match:
  598. result['address_en'] = address_en_match.group(4)
  599. # 提取中文邮政编码
  600. postal_code_zh_match = re.search(r'["\'](地址)["\'][\s\{:]*["\']?(中文)["\']?[\s\{:]*["\']?(邮政编码)["\']?[\s\}:]*["\']([^"\']+)["\']', text, re.IGNORECASE)
  601. if postal_code_zh_match:
  602. result['postal_code_zh'] = postal_code_zh_match.group(4)
  603. # 提取英文邮政编码
  604. postal_code_en_match = re.search(r'["\'](地址)["\'][\s\{:]*["\']?(英文)["\']?[\s\{:]*["\']?(邮政编码)["\']?[\s\}:]*["\']([^"\']+)["\']', text, re.IGNORECASE)
  605. if postal_code_en_match:
  606. result['postal_code_en'] = postal_code_en_match.group(4)
  607. # 提取中文品牌名称
  608. brand_zh_match = re.search(r'["\'](公司)["\'][\s\{:]*["\']?(中文)["\']?[\s\{:]*["\']?(品牌名称)["\']?[\s\}:]*["\']([^"\']+)["\']', text, re.IGNORECASE)
  609. if brand_zh_match:
  610. result['brand_zh'] = brand_zh_match.group(4)
  611. # 提取英文品牌名称
  612. brand_en_match = re.search(r'["\'](公司)["\'][\s\{:]*["\']?(英文)["\']?[\s\{:]*["\']?(品牌名称)["\']?[\s\}:]*["\']([^"\']+)["\']', text, re.IGNORECASE)
  613. if brand_en_match:
  614. result['brand_en'] = brand_en_match.group(4)
  615. # 提取中文隶属关系
  616. affiliation_zh_match = re.search(r'["\'](公司)["\'][\s\{:]*["\']?(中文)["\']?[\s\{:]*["\']?(隶属关系)["\']?[\s\}:]*["\']([^"\']+)["\']', text, re.IGNORECASE)
  617. if affiliation_zh_match:
  618. result['affiliation_zh'] = affiliation_zh_match.group(4)
  619. # 提取英文隶属关系
  620. affiliation_en_match = re.search(r'["\'](公司)["\'][\s\{:]*["\']?(英文)["\']?[\s\{:]*["\']?(隶属关系)["\']?[\s\}:]*["\']([^"\']+)["\']', text, re.IGNORECASE)
  621. if affiliation_en_match:
  622. result['affiliation_en'] = affiliation_en_match.group(4)
  623. return result
  624. def parse_text_with_qwen25VLplus(image_data):
  625. """
  626. 使用阿里云的 Qwen VL Max 模型解析图像中的名片信息
  627. Args:
  628. image_data (bytes): 图像的二进制数据
  629. Returns:
  630. dict: 解析的名片信息
  631. """
  632. # 阿里云 Qwen API 配置
  633. QWEN_API_KEY = os.environ.get('QWEN_API_KEY', 'sk-8f2320dafc9e4076968accdd8eebd8e9')
  634. try:
  635. # 将图片数据转为 base64 编码
  636. base64_image = base64.b64encode(image_data).decode('utf-8')
  637. # 初始化 OpenAI 客户端,配置为阿里云 API
  638. client = OpenAI(
  639. api_key=QWEN_API_KEY,
  640. base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
  641. )
  642. # 构建优化后的提示语
  643. prompt = """你是企业名片的信息提取专家。请仔细分析提供的图片,精确提取名片信息。
  644. ## 提取要求
  645. - 区分中英文内容,分别提取
  646. - 保持提取信息的原始格式(如大小写、标点)
  647. - 对于无法识别或名片中不存在的信息,返回空字符串
  648. - 名片中没有的信息,请不要猜测
  649. ## 需提取的字段
  650. 1. 中文姓名 (name_zh)
  651. 2. 英文姓名 (name_en)
  652. 3. 中文职位/头衔 (title_zh)
  653. 4. 英文职位/头衔 (title_en)
  654. 5. 中文酒店/公司名称 (hotel_zh)
  655. 6. 英文酒店/公司名称 (hotel_en)
  656. 7. 手机号码 (mobile) - 如有多个,使用逗号分隔
  657. 8. 固定电话 (phone) - 如有多个,使用逗号分隔
  658. 9. 电子邮箱 (email)
  659. 10. 中文地址 (address_zh)
  660. 11. 英文地址 (address_en)
  661. 12. 中文邮政编码 (postal_code_zh)
  662. 13. 英文邮政编码 (postal_code_en)
  663. 14. 生日 (birthday) - 格式为YYYY-MM-DD,如1990-01-01
  664. 15. 居住地 (residence) - 个人居住地址信息
  665. 16. 品牌组合 (brand_group) - 如有多个品牌,使用逗号分隔
  666. 17. 职业轨迹 (career_path) - 如能从名片中推断,以JSON数组格式返回,包含当前日期,公司名称和职位。自动生成当前日期。
  667. 18. 隶属关系 (affiliation) - 如能从名片中推断,以JSON数组格式返回,包含公司名称和隶属集团名称
  668. ## 输出格式
  669. 请以严格的JSON格式返回结果,不要添加任何额外解释文字。JSON格式如下:
  670. ```json
  671. {
  672. "name_zh": "",
  673. "name_en": "",
  674. "title_zh": "",
  675. "title_en": "",
  676. "hotel_zh": "",
  677. "hotel_en": "",
  678. "mobile": "",
  679. "phone": "",
  680. "email": "",
  681. "address_zh": "",
  682. "address_en": "",
  683. "postal_code_zh": "",
  684. "postal_code_en": "",
  685. "birthday": "",
  686. "residence": "",
  687. "brand_group": "",
  688. "career_path": [],
  689. "affiliation": []
  690. }
  691. ```"""
  692. # 调用 Qwen VL Max API
  693. logging.info("发送请求到 Qwen VL Max 模型")
  694. completion = client.chat.completions.create(
  695. # model="qwen-vl-plus",
  696. model="qwen-vl-max-latest",
  697. messages=[
  698. {
  699. "role": "user",
  700. "content": [
  701. {"type": "text", "text": prompt},
  702. {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
  703. ]
  704. }
  705. ],
  706. temperature=0.1, # 降低温度增加精确性
  707. response_format={"type": "json_object"} # 要求输出JSON格式
  708. )
  709. # 解析响应
  710. response_content = completion.choices[0].message.content
  711. logging.info(f"成功从 Qwen 模型获取响应: {response_content}")
  712. # 尝试从响应中提取 JSON
  713. try:
  714. json_content = extract_json_from_text(response_content)
  715. extracted_data = json.loads(json_content)
  716. logging.info("成功解析 Qwen 响应中的 JSON")
  717. except json.JSONDecodeError:
  718. logging.warning("无法解析 JSON,尝试从文本中提取信息")
  719. extracted_data = extract_fields_from_text(response_content)
  720. # 确保所有必要字段存在
  721. required_fields = [
  722. 'name_zh', 'name_en', 'title_zh', 'title_en',
  723. 'hotel_zh', 'hotel_en', 'mobile', 'phone',
  724. 'email', 'address_zh', 'address_en',
  725. 'postal_code_zh', 'postal_code_en', 'birthday', 'residence',
  726. 'brand_group', 'career_path'
  727. ]
  728. for field in required_fields:
  729. if field not in extracted_data:
  730. extracted_data[field] = [] if field == 'career_path' else ""
  731. # 为career_path增加一条记录
  732. if extracted_data.get('hotel_zh') or extracted_data.get('hotel_en') or extracted_data.get('title_zh') or extracted_data.get('title_en'):
  733. career_entry = {
  734. 'date': datetime.now().strftime('%Y-%m-%d'),
  735. 'hotel_en': extracted_data.get('hotel_en', ''),
  736. 'hotel_zh': extracted_data.get('hotel_zh', ''),
  737. 'image_path': '',
  738. 'source': 'business_card_creation',
  739. 'title_en': extracted_data.get('title_en', ''),
  740. 'title_zh': extracted_data.get('title_zh', '')
  741. }
  742. # 直接清空原有的career_path内容,用career_entry写入
  743. extracted_data['career_path'] = [career_entry]
  744. logging.info(f"为解析结果设置了career_path记录: {career_entry}")
  745. return extracted_data
  746. except Exception as e:
  747. error_msg = f"Qwen VL Max 模型解析失败: {str(e)}"
  748. logging.error(error_msg, exc_info=True)
  749. raise Exception(error_msg)
  750. def update_business_card(card_id, data):
  751. """
  752. 更新名片信息
  753. Args:
  754. card_id (int): 名片记录ID
  755. data (dict): 包含要更新的字段的字典
  756. Returns:
  757. dict: 包含操作结果和更新后的名片信息
  758. """
  759. try:
  760. # 查找要更新的名片记录
  761. card = BusinessCard.query.get(card_id)
  762. if not card:
  763. return {
  764. 'code': 500,
  765. 'success': False,
  766. 'message': f'未找到ID为{card_id}的名片记录',
  767. 'data': None
  768. }
  769. # 更新名片信息
  770. card.name_zh = data.get('name_zh', card.name_zh)
  771. card.name_en = data.get('name_en', card.name_en)
  772. card.title_zh = data.get('title_zh', card.title_zh)
  773. card.title_en = data.get('title_en', card.title_en)
  774. card.mobile = data.get('mobile', card.mobile)
  775. card.phone = data.get('phone', card.phone)
  776. card.email = data.get('email', card.email)
  777. card.hotel_zh = data.get('hotel_zh', card.hotel_zh)
  778. card.hotel_en = data.get('hotel_en', card.hotel_en)
  779. card.address_zh = data.get('address_zh', card.address_zh)
  780. card.address_en = data.get('address_en', card.address_en)
  781. card.postal_code_zh = data.get('postal_code_zh', card.postal_code_zh)
  782. card.postal_code_en = data.get('postal_code_en', card.postal_code_en)
  783. card.brand_zh = data.get('brand_zh', card.brand_zh)
  784. card.brand_en = data.get('brand_en', card.brand_en)
  785. card.affiliation_zh = data.get('affiliation_zh', card.affiliation_zh)
  786. card.affiliation_en = data.get('affiliation_en', card.affiliation_en)
  787. # 处理生日字段,支持字符串转日期
  788. if 'birthday' in data:
  789. if data['birthday']:
  790. try:
  791. card.birthday = datetime.strptime(data['birthday'], '%Y-%m-%d').date()
  792. except ValueError:
  793. # 如果日期格式不正确,设置为None
  794. card.birthday = None
  795. else:
  796. card.birthday = None
  797. card.residence = data.get('residence', card.residence)
  798. card.career_path = data.get('career_path', card.career_path) # 更新职业轨迹
  799. card.brand_group = data.get('brand_group', card.brand_group) # 更新品牌组合
  800. card.updated_by = data.get('updated_by', 'user') # 可以根据实际情况修改为当前用户
  801. # 保存更新
  802. db.session.commit()
  803. # 更新成功后,更新Neo4j图数据库中的人才-酒店关系
  804. try:
  805. from app.services.neo4j_driver import neo4j_driver
  806. from app.core.graph.graph_operations import create_or_get_node
  807. # 获取当前时间
  808. current_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
  809. # 创建或更新人才节点
  810. talent_properties = {
  811. 'pg_id': card_id, # PostgreSQL数据库中的ID
  812. 'name_zh': card.name_zh, # 中文姓名
  813. 'name_en': card.name_en, # 英文姓名
  814. 'mobile': card.mobile, # 手机号码
  815. 'email': card.email, # 电子邮箱
  816. 'updated_at': current_time # 更新时间
  817. }
  818. talent_node_id = create_or_get_node('talent', **talent_properties)
  819. # 如果有酒店信息,创建或更新酒店节点
  820. if card.hotel_zh or card.hotel_en:
  821. hotel_properties = {
  822. 'hotel_zh': card.hotel_zh, # 酒店中文名称
  823. 'hotel_en': card.hotel_en, # 酒店英文名称
  824. 'updated_at': current_time # 更新时间
  825. }
  826. hotel_node_id = create_or_get_node('hotel', **hotel_properties)
  827. # 创建或更新人才与酒店之间的WORK_FOR关系
  828. if talent_node_id and hotel_node_id:
  829. # 构建Cypher查询以创建或更新关系
  830. cypher_query = """
  831. MATCH (t:talent), (h:hotel)
  832. WHERE id(t) = $talent_id AND id(h) = $hotel_id
  833. MERGE (t)-[r:WORKS_FOR]->(h)
  834. SET r.title_zh = $title_zh,
  835. r.title_en = $title_en,
  836. r.updated_at = $updated_at
  837. RETURN r
  838. """
  839. with neo4j_driver.get_session() as session:
  840. session.run(
  841. cypher_query,
  842. talent_id=talent_node_id,
  843. hotel_id=hotel_node_id,
  844. title_zh=card.title_zh,
  845. title_en=card.title_en,
  846. updated_at=current_time
  847. )
  848. logging.info(f"已成功更新人才(ID:{talent_node_id})与酒店(ID:{hotel_node_id})的WORK_FOR关系")
  849. logging.info(f"Neo4j图数据库关系更新成功")
  850. except Exception as e:
  851. logging.error(f"更新Neo4j图数据库关系失败: {str(e)}", exc_info=True)
  852. # 不因为图数据库更新失败而影响PostgreSQL数据库的更新结果
  853. return {
  854. 'code': 200,
  855. 'success': True,
  856. 'message': '名片信息已更新',
  857. 'data': card.to_dict()
  858. }
  859. except Exception as e:
  860. db.session.rollback()
  861. error_msg = f"更新名片信息失败: {str(e)}"
  862. logging.error(error_msg, exc_info=True)
  863. return {
  864. 'code': 500,
  865. 'success': False,
  866. 'message': error_msg,
  867. 'data': None
  868. }
  869. def get_business_cards():
  870. """
  871. 获取所有名片记录列表
  872. Returns:
  873. dict: 包含操作结果和名片列表
  874. """
  875. try:
  876. # 查询所有名片记录
  877. cards = BusinessCard.query.all()
  878. # 将所有记录转换为字典格式
  879. cards_data = [card.to_dict() for card in cards]
  880. return {
  881. 'code': 200,
  882. 'success': True,
  883. 'message': '获取名片列表成功',
  884. 'data': cards_data
  885. }
  886. except Exception as e:
  887. error_msg = f"获取名片列表失败: {str(e)}"
  888. logging.error(error_msg, exc_info=True)
  889. return {
  890. 'code': 500,
  891. 'success': False,
  892. 'message': error_msg,
  893. 'data': []
  894. }
  895. def update_business_card_status(card_id, status):
  896. """
  897. 更新名片状态(激活/禁用)
  898. Args:
  899. card_id (int): 名片记录ID
  900. status (str): 新状态,'active'或'inactive'
  901. Returns:
  902. dict: 包含操作结果和更新后的名片信息
  903. """
  904. try:
  905. # 查找要更新的名片记录
  906. card = BusinessCard.query.get(card_id)
  907. if not card:
  908. return {
  909. 'code': 500,
  910. 'success': False,
  911. 'message': f'未找到ID为{card_id}的名片记录',
  912. 'data': None
  913. }
  914. # 验证状态值
  915. if status not in ['active', 'inactive']:
  916. return {
  917. 'code': 500,
  918. 'success': False,
  919. 'message': f'无效的状态值: {status},必须为 active 或 inactive',
  920. 'data': None
  921. }
  922. # 更新状态
  923. card.status = status
  924. card.updated_at = datetime.now()
  925. card.updated_by = 'system' # 可以根据实际情况修改为当前用户
  926. # 保存更新
  927. db.session.commit()
  928. return {
  929. 'code': 200,
  930. 'success': True,
  931. 'message': f'名片状态已更新为: {status}',
  932. 'data': card.to_dict()
  933. }
  934. except Exception as e:
  935. db.session.rollback()
  936. error_msg = f"更新名片状态失败: {str(e)}"
  937. logging.error(error_msg, exc_info=True)
  938. return {
  939. 'code': 500,
  940. 'success': False,
  941. 'message': error_msg,
  942. 'data': None
  943. }
  944. def create_talent_tag(tag_data):
  945. """
  946. 创建人才标签节点
  947. Args:
  948. tag_data: 包含标签信息的字典,包括:
  949. - name: 标签名称
  950. - category: 标签分类
  951. - description: 标签描述
  952. - status: 启用状态
  953. Returns:
  954. dict: 操作结果字典
  955. """
  956. try:
  957. from app.services.neo4j_driver import neo4j_driver
  958. # 验证必要参数存在
  959. if not tag_data or 'name' not in tag_data or not tag_data['name']:
  960. return {
  961. 'code': 400,
  962. 'success': False,
  963. 'message': '标签名称为必填项',
  964. 'data': None
  965. }
  966. # 准备节点属性
  967. tag_properties = {
  968. 'name': tag_data.get('name'),
  969. 'category': tag_data.get('category', '未分类'),
  970. 'describe': tag_data.get('description', ''), # 使用describe与现有系统保持一致
  971. 'status': tag_data.get('status', 'active'),
  972. 'time': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
  973. }
  974. # 生成标签的英文名(可选)
  975. from app.core.graph.graph_operations import create_or_get_node
  976. # 如果提供了名称,尝试获取英文翻译
  977. if 'name' in tag_data and tag_data['name']:
  978. try:
  979. from app.api.data_interface.routes import translate_and_parse
  980. en_name = translate_and_parse(tag_data['name'])
  981. tag_properties['en_name'] = en_name[0] if en_name and isinstance(en_name, list) else ''
  982. except Exception as e:
  983. logging.warning(f"获取标签英文名失败: {str(e)}")
  984. tag_properties['en_name'] = ''
  985. # 创建节点
  986. node_id = create_or_get_node('data_label', **tag_properties)
  987. if node_id:
  988. return {
  989. 'code': 200,
  990. 'success': True,
  991. 'message': '人才标签创建成功',
  992. 'data': {
  993. 'id': node_id,
  994. **tag_properties
  995. }
  996. }
  997. else:
  998. return {
  999. 'code': 500,
  1000. 'success': False,
  1001. 'message': '人才标签创建失败',
  1002. 'data': None
  1003. }
  1004. except Exception as e:
  1005. logging.error(f"创建人才标签失败: {str(e)}", exc_info=True)
  1006. return {
  1007. 'code': 500,
  1008. 'success': False,
  1009. 'message': f'创建人才标签失败: {str(e)}',
  1010. 'data': None
  1011. }
  1012. def get_talent_tag_list():
  1013. """
  1014. 从Neo4j图数据库获取人才标签列表
  1015. Returns:
  1016. dict: 包含操作结果和标签列表的字典
  1017. """
  1018. try:
  1019. from app.services.neo4j_driver import neo4j_driver
  1020. # 构建Cypher查询语句,获取分类为talent的标签
  1021. query = """
  1022. MATCH (n:data_label)
  1023. WHERE n.category CONTAINS 'talent' OR n.category CONTAINS '人才'
  1024. RETURN id(n) as id, n.name as name, n.en_name as en_name,
  1025. n.category as category, n.describe as description,
  1026. n.status as status, n.time as time
  1027. ORDER BY n.time DESC
  1028. """
  1029. # 执行查询
  1030. tags = []
  1031. with neo4j_driver.get_session() as session:
  1032. result = session.run(query)
  1033. # 处理查询结果
  1034. for record in result:
  1035. tag = {
  1036. 'id': record['id'],
  1037. 'name': record['name'],
  1038. 'en_name': record['en_name'],
  1039. 'category': record['category'],
  1040. 'description': record['description'],
  1041. 'status': record['status'],
  1042. 'time': record['time']
  1043. }
  1044. tags.append(tag)
  1045. return {
  1046. 'code': 200,
  1047. 'success': True,
  1048. 'message': '获取人才标签列表成功',
  1049. 'data': tags
  1050. }
  1051. except Exception as e:
  1052. error_msg = f"获取人才标签列表失败: {str(e)}"
  1053. logging.error(error_msg, exc_info=True)
  1054. return {
  1055. 'code': 500,
  1056. 'success': False,
  1057. 'message': error_msg,
  1058. 'data': []
  1059. }
  1060. def update_talent_tag(tag_id, tag_data):
  1061. """
  1062. 更新人才标签节点属性
  1063. Args:
  1064. tag_id: 标签节点ID
  1065. tag_data: 包含更新信息的字典,可能包括:
  1066. - name: 标签名称
  1067. - category: 标签分类
  1068. - description: 标签描述
  1069. - status: 启用状态
  1070. Returns:
  1071. dict: 操作结果字典
  1072. """
  1073. try:
  1074. from app.services.neo4j_driver import neo4j_driver
  1075. # 准备要更新的属性
  1076. update_properties = {}
  1077. # 检查并添加需要更新的属性
  1078. if 'name' in tag_data and tag_data['name']:
  1079. update_properties['name'] = tag_data['name']
  1080. # 如果名称更新了,尝试更新英文名称
  1081. try:
  1082. from app.api.data_interface.routes import translate_and_parse
  1083. en_name = translate_and_parse(tag_data['name'])
  1084. update_properties['en_name'] = en_name[0] if en_name and isinstance(en_name, list) else ''
  1085. except Exception as e:
  1086. logging.warning(f"更新标签英文名失败: {str(e)}")
  1087. if 'category' in tag_data and tag_data['category']:
  1088. update_properties['category'] = tag_data['category']
  1089. if 'description' in tag_data:
  1090. update_properties['describe'] = tag_data['description']
  1091. if 'status' in tag_data:
  1092. update_properties['status'] = tag_data['status']
  1093. # 添加更新时间
  1094. update_properties['time'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
  1095. # 如果没有可更新的属性,返回错误
  1096. if not update_properties:
  1097. return {
  1098. 'code': 400,
  1099. 'success': False,
  1100. 'message': '未提供任何可更新的属性',
  1101. 'data': None
  1102. }
  1103. # 构建更新的Cypher查询
  1104. set_clauses = []
  1105. params = {'nodeId': tag_id}
  1106. for key, value in update_properties.items():
  1107. param_name = f"param_{key}"
  1108. set_clauses.append(f"n.{key} = ${param_name}")
  1109. params[param_name] = value
  1110. set_clause = ", ".join(set_clauses)
  1111. query = f"""
  1112. MATCH (n:data_label)
  1113. WHERE id(n) = $nodeId
  1114. SET {set_clause}
  1115. RETURN id(n) as id, n.name as name, n.en_name as en_name,
  1116. n.category as category, n.describe as description,
  1117. n.status as status, n.time as time
  1118. """
  1119. # 执行更新查询
  1120. with neo4j_driver.get_session() as session:
  1121. result = session.run(query, **params)
  1122. record = result.single()
  1123. if not record:
  1124. return {
  1125. 'code': 404,
  1126. 'success': False,
  1127. 'message': f'未找到ID为{tag_id}的标签',
  1128. 'data': None
  1129. }
  1130. # 提取更新后的标签信息
  1131. updated_tag = {
  1132. 'id': record['id'],
  1133. 'name': record['name'],
  1134. 'en_name': record['en_name'],
  1135. 'category': record['category'],
  1136. 'description': record['description'],
  1137. 'status': record['status'],
  1138. 'time': record['time']
  1139. }
  1140. return {
  1141. 'code': 200,
  1142. 'success': True,
  1143. 'message': '人才标签更新成功',
  1144. 'data': updated_tag
  1145. }
  1146. except Exception as e:
  1147. error_msg = f"更新人才标签失败: {str(e)}"
  1148. logging.error(error_msg, exc_info=True)
  1149. return {
  1150. 'code': 500,
  1151. 'success': False,
  1152. 'message': error_msg,
  1153. 'data': None
  1154. }
  1155. def delete_talent_tag(tag_id):
  1156. """
  1157. 删除人才标签节点及其相关关系
  1158. Args:
  1159. tag_id: 标签节点ID
  1160. Returns:
  1161. dict: 操作结果字典
  1162. """
  1163. try:
  1164. from app.services.neo4j_driver import neo4j_driver
  1165. # 首先获取要删除的标签信息,以便在成功后返回
  1166. get_query = """
  1167. MATCH (n:data_label)
  1168. WHERE id(n) = $nodeId
  1169. RETURN id(n) as id, n.name as name, n.en_name as en_name,
  1170. n.category as category, n.describe as description,
  1171. n.status as status, n.time as time
  1172. """
  1173. # 构建删除节点和关系的Cypher查询
  1174. delete_query = """
  1175. MATCH (n:data_label)
  1176. WHERE id(n) = $nodeId
  1177. OPTIONAL MATCH (n)-[r]-()
  1178. DELETE r, n
  1179. RETURN count(n) AS deleted
  1180. """
  1181. # 执行查询
  1182. tag_info = None
  1183. with neo4j_driver.get_session() as session:
  1184. # 先获取标签信息
  1185. result = session.run(get_query, nodeId=tag_id)
  1186. record = result.single()
  1187. if not record:
  1188. return {
  1189. 'code': 404,
  1190. 'success': False,
  1191. 'message': f'未找到ID为{tag_id}的标签',
  1192. 'data': None
  1193. }
  1194. # 保存标签信息用于返回
  1195. tag_info = {
  1196. 'id': record['id'],
  1197. 'name': record['name'],
  1198. 'en_name': record['en_name'],
  1199. 'category': record['category'],
  1200. 'description': record['description'],
  1201. 'status': record['status'],
  1202. 'time': record['time']
  1203. }
  1204. # 执行删除操作
  1205. delete_result = session.run(delete_query, nodeId=tag_id)
  1206. deleted = delete_result.single()['deleted']
  1207. if deleted > 0:
  1208. return {
  1209. 'code': 200,
  1210. 'success': True,
  1211. 'message': '人才标签删除成功',
  1212. 'data': tag_info
  1213. }
  1214. else:
  1215. return {
  1216. 'code': 404,
  1217. 'success': False,
  1218. 'message': f'未能删除ID为{tag_id}的标签',
  1219. 'data': None
  1220. }
  1221. except Exception as e:
  1222. error_msg = f"删除人才标签失败: {str(e)}"
  1223. logging.error(error_msg, exc_info=True)
  1224. return {
  1225. 'code': 500,
  1226. 'success': False,
  1227. 'message': error_msg,
  1228. 'data': None
  1229. }
  1230. def query_neo4j_graph(query_requirement):
  1231. """
  1232. 查询Neo4j图数据库,通过Deepseek API生成Cypher脚本
  1233. Args:
  1234. query_requirement (str): 查询需求描述
  1235. Returns:
  1236. dict: 包含查询结果的字典,JSON格式
  1237. """
  1238. try:
  1239. # 导入必要的模块
  1240. from app.services.neo4j_driver import neo4j_driver
  1241. import requests
  1242. import json
  1243. # Deepseek API配置
  1244. api_key = DEEPSEEK_API_KEY
  1245. api_url = DEEPSEEK_API_URL
  1246. # 步骤1: 从Neo4j获取所有标签列表
  1247. logging.info("第一步:从Neo4j获取人才类别的标签列表")
  1248. all_labels_query = """
  1249. MATCH (dl:data_label)
  1250. WHERE dl.category CONTAINS '人才' OR dl.category CONTAINS 'talent'
  1251. RETURN dl.name as name
  1252. """
  1253. all_labels = []
  1254. with neo4j_driver.get_session() as session:
  1255. result = session.run(all_labels_query)
  1256. for record in result:
  1257. all_labels.append(record['name'])
  1258. logging.info(f"获取到{len(all_labels)}个人才标签: {all_labels}")
  1259. # 步骤2: 使用Deepseek判断查询需求中的关键信息与标签的对应关系
  1260. logging.info("第二步:调用Deepseek API匹配查询需求与标签")
  1261. # 构建所有标签的JSON字符串
  1262. labels_json = json.dumps(all_labels, ensure_ascii=False)
  1263. # 构建匹配标签的提示语
  1264. matching_prompt = f"""
  1265. 请分析以下查询需求,并从标签列表中找出与查询需求相关的标签。
  1266. ## 查询需求
  1267. {query_requirement}
  1268. ## 可用标签列表
  1269. {labels_json}
  1270. ## 输出要求
  1271. 1. 请以JSON数组格式返回匹配的标签名称列表,格式如: ["标签1", "标签2", "标签3"]
  1272. 2. 只返回标签名称数组,不要包含任何解释或其他文本
  1273. 3. 如果没有找到匹配的标签,请返回空数组 []
  1274. """
  1275. # 调用Deepseek API匹配标签
  1276. headers = {
  1277. "Authorization": f"Bearer {api_key}",
  1278. "Content-Type": "application/json"
  1279. }
  1280. payload = {
  1281. "model": "deepseek-chat",
  1282. "messages": [
  1283. {"role": "system", "content": "你是一个专业的文本分析和匹配专家。"},
  1284. {"role": "user", "content": matching_prompt}
  1285. ],
  1286. "temperature": 0.1,
  1287. "response_format": {"type": "json_object"}
  1288. }
  1289. logging.info("发送请求到Deepseek API匹配标签:"+matching_prompt)
  1290. response = requests.post(api_url, headers=headers, json=payload, timeout=30)
  1291. response.raise_for_status()
  1292. # 解析API响应
  1293. result = response.json()
  1294. matching_content = result.get("choices", [{}])[0].get("message", {}).get("content", "[]")
  1295. # 提取JSON数组
  1296. try:
  1297. # 尝试直接解析返回结果,预期格式为 ["新开酒店经验", "五星级酒店", "总经理"]
  1298. logging.info(f"Deepseek返回的匹配内容: {matching_content}")
  1299. # 如果返回的是JSON字符串,先去除可能的前后缀文本
  1300. if isinstance(matching_content, str):
  1301. # 查找JSON数组的开始和结束位置
  1302. start_idx = matching_content.find('[')
  1303. end_idx = matching_content.rfind(']') + 1
  1304. if start_idx >= 0 and end_idx > start_idx:
  1305. json_str = matching_content[start_idx:end_idx]
  1306. matched_labels = json.loads(json_str)
  1307. else:
  1308. matched_labels = []
  1309. else:
  1310. matched_labels = []
  1311. # 确保结果是字符串列表
  1312. if matched_labels and all(isinstance(item, str) for item in matched_labels):
  1313. logging.info(f"成功解析到标签列表: {matched_labels}")
  1314. else:
  1315. logging.warning("解析结果不是预期的字符串列表格式,将使用空列表")
  1316. matched_labels = []
  1317. except json.JSONDecodeError as e:
  1318. logging.error(f"JSON解析错误: {str(e)}")
  1319. matched_labels = []
  1320. except Exception as e:
  1321. logging.error(f"解析匹配标签时出错: {str(e)}")
  1322. matched_labels = []
  1323. logging.info(f"匹配到的标签: {matched_labels}")
  1324. # 如果没有匹配到标签,返回空结果
  1325. if not matched_labels:
  1326. return {
  1327. 'code': 200,
  1328. 'success': True,
  1329. 'message': '未找到与查询需求匹配的标签',
  1330. 'query': '',
  1331. 'data': []
  1332. }
  1333. # 步骤3: 构建Cypher生成提示文本
  1334. logging.info("第三步:构建提示文本生成Cypher查询语句")
  1335. # 将匹配的标签转换为字符串
  1336. matched_labels_str = ", ".join([f"'{label}'" for label in matched_labels])
  1337. # 构建生成Cypher的提示语
  1338. cypher_prompt = f"""
  1339. 请根据以下Neo4j图数据库结构和已匹配的标签,生成一个Cypher查询脚本。
  1340. ## 图数据库结构
  1341. ### 节点
  1342. 1. talent - 人才节点
  1343. 属性: pg_id(PostgreSQL数据库ID), name_zh(中文姓名), name_en(英文姓名),
  1344. mobile(手机号码), email(电子邮箱), updated_at(更新时间)
  1345. 2. data_label - 人才标签节点
  1346. ### 关系
  1347. BELONGS_TO - 从属关系
  1348. (talent)-[BELONGS_TO]->(data_label) - 人才属于某标签
  1349. ## 匹配的标签列表
  1350. [{matched_labels_str}]
  1351. ## 查询需求
  1352. {query_requirement}
  1353. ## 输出要求
  1354. 1. 只输出有效的Cypher查询语句,不要包含任何解释或注释
  1355. 2. 确保return语句中包含talent节点属性
  1356. 3. 尽量利用图数据库的特性来优化查询效率
  1357. 4. 使用WITH子句和COLLECT函数收集标签,确保查询到同时拥有所有标签的人才
  1358. 注意:请直接返回Cypher查询语句,无需任何其他文本。
  1359. 以下是一个示例:
  1360. 假设匹配的标签是 ['五星级酒店', '新开酒店经验', '总经理']
  1361. 生成的Cypher查询语句应该是:
  1362. MATCH (t:talent)-[:BELONGS_TO]->(dl:data_label)
  1363. WHERE dl.name IN ['五星级酒店', '新开酒店经验', '总经理']
  1364. WITH t, COLLECT(DISTINCT dl.name) AS labels
  1365. WHERE size(labels) = 3
  1366. RETURN t.pg_id as pg_id, t.name_zh as name_zh, t.name_en as name_en, t.mobile as mobile, t.email as email, t.updated_at as updated_at
  1367. """
  1368. # 调用Deepseek API生成Cypher脚本
  1369. payload = {
  1370. "model": "deepseek-chat",
  1371. "messages": [
  1372. {"role": "system", "content": "你是一个专业的Neo4j Cypher查询专家。"},
  1373. {"role": "user", "content": cypher_prompt}
  1374. ],
  1375. "temperature": 0.1
  1376. }
  1377. logging.info("发送请求到Deepseek API生成Cypher脚本")
  1378. response = requests.post(api_url, headers=headers, json=payload, timeout=30)
  1379. response.raise_for_status()
  1380. # 解析API响应
  1381. result = response.json()
  1382. cypher_script = result.get("choices", [{}])[0].get("message", {}).get("content", "")
  1383. # 清理Cypher脚本,移除不必要的markdown格式或注释
  1384. cypher_script = cypher_script.strip()
  1385. if cypher_script.startswith("```cypher"):
  1386. cypher_script = cypher_script[9:]
  1387. elif cypher_script.startswith("```"):
  1388. cypher_script = cypher_script[3:]
  1389. if cypher_script.endswith("```"):
  1390. cypher_script = cypher_script[:-3]
  1391. cypher_script = cypher_script.strip()
  1392. logging.info(f"生成的Cypher脚本: {cypher_script}")
  1393. # 步骤4: 执行Cypher脚本
  1394. logging.info("第四步:执行Cypher脚本并返回结果")
  1395. with neo4j_driver.get_session() as session:
  1396. result = session.run(cypher_script)
  1397. records = [record.data() for record in result]
  1398. # 构建查询结果
  1399. response_data = {
  1400. 'code': 200,
  1401. 'success': True,
  1402. 'message': '查询成功执行',
  1403. 'query': cypher_script,
  1404. 'matched_labels': matched_labels,
  1405. 'data': records
  1406. }
  1407. return response_data
  1408. except requests.exceptions.HTTPError as e:
  1409. error_msg = f"调用Deepseek API失败: {str(e)}"
  1410. logging.error(error_msg)
  1411. if hasattr(e, 'response') and e.response:
  1412. logging.error(f"错误状态码: {e.response.status_code}")
  1413. logging.error(f"错误内容: {e.response.text}")
  1414. return {
  1415. 'code': 500,
  1416. 'success': False,
  1417. 'message': error_msg,
  1418. 'data': []
  1419. }
  1420. except Exception as e:
  1421. error_msg = f"查询Neo4j图数据库失败: {str(e)}"
  1422. logging.error(error_msg, exc_info=True)
  1423. return {
  1424. 'code': 500,
  1425. 'success': False,
  1426. 'message': error_msg,
  1427. 'data': []
  1428. }
  1429. def talent_get_tags(talent_id):
  1430. """
  1431. 根据talent ID获取人才节点关联的标签
  1432. Args:
  1433. talent_id (int): 人才节点pg_id
  1434. Returns:
  1435. dict: 包含人才ID和关联标签的字典,JSON格式
  1436. """
  1437. try:
  1438. # 导入必要的模块
  1439. from app.services.neo4j_driver import neo4j_driver
  1440. # 准备查询返回数据
  1441. response_data = {
  1442. 'code': 200,
  1443. 'success': True,
  1444. 'message': '获取人才标签成功',
  1445. 'data': []
  1446. }
  1447. # 构建Cypher查询语句,获取人才节点关联的标签
  1448. cypher_query = """
  1449. MATCH (t:talent)-[r:BELONGS_TO]->(tag:data_label)
  1450. WHERE t.pg_id = $talent_id
  1451. RETURN t.pg_id as talent_id, tag.name as tag_name
  1452. """
  1453. # 执行查询
  1454. with neo4j_driver.get_session() as session:
  1455. result = session.run(cypher_query, talent_id=int(talent_id))
  1456. records = list(result)
  1457. # 如果没有查询到标签,返回空数组
  1458. if not records:
  1459. response_data['message'] = f'人才pg_id {talent_id} 没有关联的标签'
  1460. return response_data
  1461. # 处理查询结果
  1462. for record in records:
  1463. talent_tag = {
  1464. 'talent': record['talent_id'],
  1465. 'tag': record['tag_name']
  1466. }
  1467. response_data['data'].append(talent_tag)
  1468. return response_data
  1469. except Exception as e:
  1470. error_msg = f"获取人才标签失败: {str(e)}"
  1471. logging.error(error_msg, exc_info=True)
  1472. return {
  1473. 'code': 500,
  1474. 'success': False,
  1475. 'message': error_msg,
  1476. 'data': []
  1477. }
  1478. def talent_update_tags(data):
  1479. """
  1480. 根据传入的JSON数据为人才节点创建与标签的BELONGS_TO关系
  1481. Args:
  1482. data (list): 包含talent和tag字段的对象列表
  1483. 例如: [
  1484. {"talent": 12345, "tag": "市场营销"},
  1485. {"talent": 12345, "tag": "酒店管理"}
  1486. ]
  1487. Returns:
  1488. dict: 操作结果和状态信息
  1489. """
  1490. try:
  1491. # 导入必要的模块
  1492. from app.services.neo4j_driver import neo4j_driver
  1493. # 验证输入参数
  1494. if not isinstance(data, list):
  1495. return {
  1496. 'code': 400,
  1497. 'success': False,
  1498. 'message': '参数格式错误,需要JSON数组',
  1499. 'data': None
  1500. }
  1501. if len(data) == 0:
  1502. return {
  1503. 'code': 400,
  1504. 'success': False,
  1505. 'message': '数据列表为空',
  1506. 'data': None
  1507. }
  1508. # 获取当前时间
  1509. current_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
  1510. # 成功和失败计数
  1511. success_count = 0
  1512. failed_items = []
  1513. # 按talent分组处理数据
  1514. talent_tags = {}
  1515. for item in data:
  1516. # 验证每个项目的格式
  1517. if not isinstance(item, dict) or 'talent' not in item or 'tag' not in item:
  1518. failed_items.append(item)
  1519. continue
  1520. talent_id = item.get('talent')
  1521. tag_name = item.get('tag')
  1522. # 验证talent_id和tag_name的值
  1523. if not talent_id or not tag_name or not isinstance(tag_name, str):
  1524. failed_items.append(item)
  1525. continue
  1526. # 按talent_id分组
  1527. if talent_id not in talent_tags:
  1528. talent_tags[talent_id] = []
  1529. talent_tags[talent_id].append(tag_name)
  1530. with neo4j_driver.get_session() as session:
  1531. # 处理每个talent及其标签
  1532. for talent_id, tags in talent_tags.items():
  1533. # 首先验证talent节点是否存在
  1534. check_talent_query = """
  1535. MATCH (t:talent)
  1536. WHERE t.pg_id = $talent_id
  1537. RETURN t
  1538. """
  1539. talent_result = session.run(check_talent_query, talent_id=int(talent_id))
  1540. if not talent_result.single():
  1541. # 该talent不存在,记录失败项并继续下一个talent
  1542. for tag in tags:
  1543. failed_items.append({'talent_pg_id': talent_id, 'tag': tag})
  1544. continue
  1545. # 首先清除所有现有的BELONGS_TO关系
  1546. clear_relations_query = """
  1547. MATCH (t:talent)-[r:BELONGS_TO]->(:data_label)
  1548. WHERE t.pg_id = $talent_id
  1549. DELETE r
  1550. RETURN count(r) as deleted_count
  1551. """
  1552. clear_result = session.run(clear_relations_query, talent_id=int(talent_id))
  1553. deleted_count = clear_result.single()['deleted_count']
  1554. logging.info(f"已删除talent_id={talent_id}的{deleted_count}个已有标签关系")
  1555. # 处理每个标签
  1556. for tag_name in tags:
  1557. try:
  1558. # 1. 查找或创建标签节点
  1559. # 先查找是否存在该标签
  1560. find_tag_query = """
  1561. MATCH (tag:data_label)
  1562. WHERE tag.name = $tag_name
  1563. RETURN id(tag) as tag_id
  1564. """
  1565. tag_result = session.run(find_tag_query, tag_name=tag_name)
  1566. tag_record = tag_result.single()
  1567. if tag_record:
  1568. tag_id = tag_record['tag_id']
  1569. else:
  1570. # 创建新标签
  1571. create_tag_query = """
  1572. CREATE (tag:data_label {name: $name, category: $category, updated_at: $updated_at})
  1573. RETURN id(tag) as tag_id
  1574. """
  1575. tag_result = session.run(
  1576. create_tag_query,
  1577. name=tag_name,
  1578. category='talent',
  1579. updated_at=current_time
  1580. )
  1581. tag_record = tag_result.single()
  1582. tag_id = tag_record['tag_id']
  1583. # 2. 创建人才与标签的BELONGS_TO关系
  1584. create_relation_query = """
  1585. MATCH (t:talent), (tag:data_label)
  1586. WHERE t.pg_id = $talent_id AND tag.name = $tag_name
  1587. CREATE (t)-[r:BELONGS_TO]->(tag)
  1588. SET r.created_at = $current_time
  1589. RETURN r
  1590. """
  1591. relation_result = session.run(
  1592. create_relation_query,
  1593. talent_id=int(talent_id),
  1594. tag_name=tag_name,
  1595. current_time=current_time
  1596. )
  1597. if relation_result.single():
  1598. success_count += 1
  1599. else:
  1600. failed_items.append({'talent_pg_id': talent_id, 'tag': tag_name})
  1601. except Exception as tag_error:
  1602. logging.error(f"为标签 {tag_name} 创建关系时出错: {str(tag_error)}")
  1603. failed_items.append({'talent_pg_id': talent_id, 'tag': tag_name})
  1604. # 返回结果
  1605. total_items = len(data)
  1606. if success_count == total_items:
  1607. return {
  1608. 'code': 200,
  1609. 'success': True,
  1610. 'message': f'成功创建或更新了 {success_count} 个标签关系',
  1611. 'data': {
  1612. 'success_count': success_count,
  1613. 'total_count': total_items,
  1614. 'failed_items': []
  1615. }
  1616. }
  1617. elif success_count > 0:
  1618. return {
  1619. 'code': 206, # Partial Content
  1620. 'success': True,
  1621. 'message': f'部分成功: 创建或更新了 {success_count}/{total_items} 个标签关系',
  1622. 'data': {
  1623. 'success_count': success_count,
  1624. 'total_count': total_items,
  1625. 'failed_items': failed_items
  1626. }
  1627. }
  1628. else:
  1629. return {
  1630. 'code': 500,
  1631. 'success': False,
  1632. 'message': '无法创建任何标签关系',
  1633. 'data': {
  1634. 'success_count': 0,
  1635. 'total_count': total_items,
  1636. 'failed_items': failed_items
  1637. }
  1638. }
  1639. except Exception as e:
  1640. error_msg = f"更新人才标签关系失败: {str(e)}"
  1641. logging.error(error_msg, exc_info=True)
  1642. return {
  1643. 'code': 500,
  1644. 'success': False,
  1645. 'message': error_msg,
  1646. 'data': None
  1647. }
  1648. def get_business_card(card_id):
  1649. """
  1650. 根据ID从PostgreSQL数据库中获取名片记录
  1651. Args:
  1652. card_id (int): 名片记录ID
  1653. Returns:
  1654. dict: 包含操作结果和名片信息的字典
  1655. """
  1656. try:
  1657. # 查询指定ID的名片记录
  1658. card = BusinessCard.query.get(card_id)
  1659. if not card:
  1660. return {
  1661. 'code': 404,
  1662. 'success': False,
  1663. 'message': f'未找到ID为{card_id}的名片记录',
  1664. 'data': None
  1665. }
  1666. # 将记录转换为字典格式返回
  1667. return {
  1668. 'code': 200,
  1669. 'success': True,
  1670. 'message': '获取名片记录成功',
  1671. 'data': card.to_dict()
  1672. }
  1673. except Exception as e:
  1674. error_msg = f"获取名片记录失败: {str(e)}"
  1675. logging.error(error_msg, exc_info=True)
  1676. return {
  1677. 'code': 500,
  1678. 'success': False,
  1679. 'message': error_msg,
  1680. 'data': None
  1681. }
  1682. # 酒店职位数据模型
  1683. class HotelPosition(db.Model):
  1684. __tablename__ = 'hotel_positions'
  1685. id = db.Column(db.Integer, primary_key=True, autoincrement=True)
  1686. department_zh = db.Column(db.String(10), nullable=False)
  1687. department_en = db.Column(db.String(50), nullable=False)
  1688. position_zh = db.Column(db.String(20), nullable=False)
  1689. position_en = db.Column(db.String(100), nullable=False)
  1690. position_abbr = db.Column(db.String(20), nullable=True)
  1691. level_zh = db.Column(db.String(10), nullable=False)
  1692. level_en = db.Column(db.String(30), nullable=False)
  1693. created_at = db.Column(db.DateTime, default=datetime.now, nullable=False)
  1694. updated_at = db.Column(db.DateTime, default=datetime.now, onupdate=datetime.now)
  1695. created_by = db.Column(db.String(50), default='system')
  1696. updated_by = db.Column(db.String(50), default='system')
  1697. status = db.Column(db.String(20), default='active')
  1698. def to_dict(self):
  1699. return {
  1700. 'id': self.id,
  1701. 'department_zh': self.department_zh,
  1702. 'department_en': self.department_en,
  1703. 'position_zh': self.position_zh,
  1704. 'position_en': self.position_en,
  1705. 'position_abbr': self.position_abbr,
  1706. 'level_zh': self.level_zh,
  1707. 'level_en': self.level_en,
  1708. 'created_at': self.created_at.strftime('%Y-%m-%d %H:%M:%S') if self.created_at else None,
  1709. 'updated_at': self.updated_at.strftime('%Y-%m-%d %H:%M:%S') if self.updated_at else None,
  1710. 'created_by': self.created_by,
  1711. 'updated_by': self.updated_by,
  1712. 'status': self.status
  1713. }
  1714. def get_hotel_positions_list():
  1715. """
  1716. 获取酒店职位数据表的全部记录
  1717. Returns:
  1718. dict: 包含操作结果和酒店职位列表的字典
  1719. """
  1720. try:
  1721. # 查询所有酒店职位记录,按部门和职位排序
  1722. positions = HotelPosition.query.order_by(
  1723. HotelPosition.department_zh,
  1724. HotelPosition.position_zh
  1725. ).all()
  1726. # 将所有记录转换为字典格式
  1727. positions_data = [position.to_dict() for position in positions]
  1728. return {
  1729. 'code': 200,
  1730. 'success': True,
  1731. 'message': '获取酒店职位列表成功',
  1732. 'data': positions_data,
  1733. 'count': len(positions_data)
  1734. }
  1735. except Exception as e:
  1736. error_msg = f"获取酒店职位列表失败: {str(e)}"
  1737. logging.error(error_msg, exc_info=True)
  1738. return {
  1739. 'code': 500,
  1740. 'success': False,
  1741. 'message': error_msg,
  1742. 'data': [],
  1743. 'count': 0
  1744. }
  1745. def add_hotel_positions(position_data):
  1746. """
  1747. 新增酒店职位数据表记录
  1748. Args:
  1749. position_data (dict): 包含职位信息的字典,包括:
  1750. - department_zh: 部门中文名称 (必填)
  1751. - department_en: 部门英文名称 (必填)
  1752. - position_zh: 职位中文名称 (必填)
  1753. - position_en: 职位英文名称 (必填)
  1754. - position_abbr: 职位英文缩写 (可选)
  1755. - level_zh: 职级中文名称 (必填)
  1756. - level_en: 职级英文名称 (必填)
  1757. - created_by: 创建者 (可选,默认为'system')
  1758. - updated_by: 更新者 (可选,默认为'system')
  1759. - status: 状态 (可选,默认为'active')
  1760. Returns:
  1761. dict: 包含操作结果和创建的职位信息的字典
  1762. """
  1763. try:
  1764. # 验证必填字段
  1765. required_fields = ['department_zh', 'department_en', 'position_zh', 'position_en', 'level_zh', 'level_en']
  1766. missing_fields = []
  1767. for field in required_fields:
  1768. if field not in position_data or not position_data[field] or not position_data[field].strip():
  1769. missing_fields.append(field)
  1770. if missing_fields:
  1771. return {
  1772. 'code': 400,
  1773. 'success': False,
  1774. 'message': f'缺少必填字段: {", ".join(missing_fields)}',
  1775. 'data': None
  1776. }
  1777. # 检查是否已存在相同的职位记录(基于部门和职位的中文名称)
  1778. existing_position = HotelPosition.query.filter_by(
  1779. department_zh=position_data['department_zh'].strip(),
  1780. position_zh=position_data['position_zh'].strip()
  1781. ).first()
  1782. if existing_position:
  1783. return {
  1784. 'code': 409,
  1785. 'success': False,
  1786. 'message': f'职位记录已存在:{position_data["department_zh"]} - {position_data["position_zh"]}',
  1787. 'data': existing_position.to_dict()
  1788. }
  1789. # 创建新的职位记录
  1790. new_position = HotelPosition(
  1791. department_zh=position_data['department_zh'].strip(),
  1792. department_en=position_data['department_en'].strip(),
  1793. position_zh=position_data['position_zh'].strip(),
  1794. position_en=position_data['position_en'].strip(),
  1795. position_abbr=position_data.get('position_abbr', '').strip() if position_data.get('position_abbr') else None,
  1796. level_zh=position_data['level_zh'].strip(),
  1797. level_en=position_data['level_en'].strip(),
  1798. created_by=position_data.get('created_by', 'system'),
  1799. updated_by=position_data.get('updated_by', 'system'),
  1800. status=position_data.get('status', 'active')
  1801. )
  1802. # 保存到数据库
  1803. db.session.add(new_position)
  1804. db.session.commit()
  1805. logging.info(f"成功创建酒店职位记录,ID: {new_position.id}")
  1806. return {
  1807. 'code': 200,
  1808. 'success': True,
  1809. 'message': '酒店职位记录创建成功',
  1810. 'data': new_position.to_dict()
  1811. }
  1812. except Exception as e:
  1813. db.session.rollback()
  1814. error_msg = f"创建酒店职位记录失败: {str(e)}"
  1815. logging.error(error_msg, exc_info=True)
  1816. return {
  1817. 'code': 500,
  1818. 'success': False,
  1819. 'message': error_msg,
  1820. 'data': None
  1821. }
  1822. def update_hotel_positions(position_id, position_data):
  1823. """
  1824. 修改酒店职位数据表记录
  1825. Args:
  1826. position_id (int): 职位记录ID
  1827. position_data (dict): 包含要更新的职位信息的字典,可能包括:
  1828. - department_zh: 部门中文名称
  1829. - department_en: 部门英文名称
  1830. - position_zh: 职位中文名称
  1831. - position_en: 职位英文名称
  1832. - position_abbr: 职位英文缩写
  1833. - level_zh: 职级中文名称
  1834. - level_en: 职级英文名称
  1835. - updated_by: 更新者
  1836. - status: 状态
  1837. Returns:
  1838. dict: 包含操作结果和更新后的职位信息的字典
  1839. """
  1840. try:
  1841. # 查找要更新的职位记录
  1842. position = HotelPosition.query.get(position_id)
  1843. if not position:
  1844. return {
  1845. 'code': 404,
  1846. 'success': False,
  1847. 'message': f'未找到ID为{position_id}的职位记录',
  1848. 'data': None
  1849. }
  1850. # 检查是否有数据需要更新
  1851. if not position_data:
  1852. return {
  1853. 'code': 400,
  1854. 'success': False,
  1855. 'message': '请求数据为空',
  1856. 'data': None
  1857. }
  1858. # 如果要更新部门和职位名称,检查是否会与其他记录冲突
  1859. new_department_zh = position_data.get('department_zh', position.department_zh).strip() if position_data.get('department_zh') else position.department_zh
  1860. new_position_zh = position_data.get('position_zh', position.position_zh).strip() if position_data.get('position_zh') else position.position_zh
  1861. # 查找是否存在相同的职位记录(排除当前记录)
  1862. existing_position = HotelPosition.query.filter(
  1863. HotelPosition.id != position_id,
  1864. HotelPosition.department_zh == new_department_zh,
  1865. HotelPosition.position_zh == new_position_zh
  1866. ).first()
  1867. if existing_position:
  1868. return {
  1869. 'code': 409,
  1870. 'success': False,
  1871. 'message': f'职位记录已存在:{new_department_zh} - {new_position_zh}',
  1872. 'data': existing_position.to_dict()
  1873. }
  1874. # 更新职位信息
  1875. if 'department_zh' in position_data and position_data['department_zh']:
  1876. position.department_zh = position_data['department_zh'].strip()
  1877. if 'department_en' in position_data and position_data['department_en']:
  1878. position.department_en = position_data['department_en'].strip()
  1879. if 'position_zh' in position_data and position_data['position_zh']:
  1880. position.position_zh = position_data['position_zh'].strip()
  1881. if 'position_en' in position_data and position_data['position_en']:
  1882. position.position_en = position_data['position_en'].strip()
  1883. if 'position_abbr' in position_data:
  1884. # 处理position_abbr,可能为空字符串或None
  1885. if position_data['position_abbr'] and position_data['position_abbr'].strip():
  1886. position.position_abbr = position_data['position_abbr'].strip()
  1887. else:
  1888. position.position_abbr = None
  1889. if 'level_zh' in position_data and position_data['level_zh']:
  1890. position.level_zh = position_data['level_zh'].strip()
  1891. if 'level_en' in position_data and position_data['level_en']:
  1892. position.level_en = position_data['level_en'].strip()
  1893. if 'updated_by' in position_data:
  1894. position.updated_by = position_data['updated_by'] or 'system'
  1895. if 'status' in position_data:
  1896. position.status = position_data['status'] or 'active'
  1897. # 更新时间会自动设置(onupdate=datetime.now)
  1898. # 保存更新
  1899. db.session.commit()
  1900. logging.info(f"成功更新酒店职位记录,ID: {position.id}")
  1901. return {
  1902. 'code': 200,
  1903. 'success': True,
  1904. 'message': '酒店职位记录更新成功',
  1905. 'data': position.to_dict()
  1906. }
  1907. except Exception as e:
  1908. db.session.rollback()
  1909. error_msg = f"更新酒店职位记录失败: {str(e)}"
  1910. logging.error(error_msg, exc_info=True)
  1911. return {
  1912. 'code': 500,
  1913. 'success': False,
  1914. 'message': error_msg,
  1915. 'data': None
  1916. }
  1917. def query_hotel_positions(position_id):
  1918. """
  1919. 查找指定ID的酒店职位数据表记录
  1920. Args:
  1921. position_id (int): 职位记录ID
  1922. Returns:
  1923. dict: 包含操作结果和职位信息的字典
  1924. """
  1925. try:
  1926. # 根据ID查找职位记录
  1927. position = HotelPosition.query.get(position_id)
  1928. if not position:
  1929. return {
  1930. 'code': 404,
  1931. 'success': False,
  1932. 'message': f'未找到ID为{position_id}的职位记录',
  1933. 'data': None
  1934. }
  1935. # 返回找到的记录
  1936. return {
  1937. 'code': 200,
  1938. 'success': True,
  1939. 'message': '查找职位记录成功',
  1940. 'data': position.to_dict()
  1941. }
  1942. except Exception as e:
  1943. error_msg = f"查找职位记录失败: {str(e)}"
  1944. logging.error(error_msg, exc_info=True)
  1945. return {
  1946. 'code': 500,
  1947. 'success': False,
  1948. 'message': error_msg,
  1949. 'data': None
  1950. }
  1951. def delete_hotel_positions(position_id):
  1952. """
  1953. 删除指定ID的酒店职位数据表记录
  1954. Args:
  1955. position_id (int): 职位记录ID
  1956. Returns:
  1957. dict: 包含操作结果的字典
  1958. """
  1959. try:
  1960. # 根据ID查找要删除的职位记录
  1961. position = HotelPosition.query.get(position_id)
  1962. if not position:
  1963. return {
  1964. 'code': 404,
  1965. 'success': False,
  1966. 'message': f'未找到ID为{position_id}的职位记录',
  1967. 'data': None
  1968. }
  1969. # 保存被删除记录的信息,用于返回
  1970. deleted_position_info = position.to_dict()
  1971. # 执行删除操作
  1972. db.session.delete(position)
  1973. db.session.commit()
  1974. logging.info(f"成功删除酒店职位记录,ID: {position_id}")
  1975. return {
  1976. 'code': 200,
  1977. 'success': True,
  1978. 'message': '职位记录删除成功',
  1979. 'data': deleted_position_info
  1980. }
  1981. except Exception as e:
  1982. db.session.rollback()
  1983. error_msg = f"删除职位记录失败: {str(e)}"
  1984. logging.error(error_msg, exc_info=True)
  1985. return {
  1986. 'code': 500,
  1987. 'success': False,
  1988. 'message': error_msg,
  1989. 'data': None
  1990. }
  1991. # 酒店集团子品牌数据模型
  1992. class HotelGroupBrands(db.Model):
  1993. __tablename__ = 'hotel_group_brands'
  1994. id = db.Column(db.Integer, primary_key=True, autoincrement=True)
  1995. group_name_en = db.Column(db.String(60), nullable=False)
  1996. group_name_zh = db.Column(db.String(20), nullable=False)
  1997. brand_name_en = db.Column(db.String(40), nullable=False)
  1998. brand_name_zh = db.Column(db.String(40), nullable=False)
  1999. positioning_level_en = db.Column(db.String(20), nullable=False)
  2000. positioning_level_zh = db.Column(db.String(5), nullable=False)
  2001. created_at = db.Column(db.DateTime, default=datetime.now, nullable=False)
  2002. updated_at = db.Column(db.DateTime, default=datetime.now, onupdate=datetime.now)
  2003. created_by = db.Column(db.String(50), default='system')
  2004. updated_by = db.Column(db.String(50), default='system')
  2005. status = db.Column(db.String(20), default='active')
  2006. def to_dict(self):
  2007. return {
  2008. 'id': self.id,
  2009. 'group_name_en': self.group_name_en,
  2010. 'group_name_zh': self.group_name_zh,
  2011. 'brand_name_en': self.brand_name_en,
  2012. 'brand_name_zh': self.brand_name_zh,
  2013. 'positioning_level_en': self.positioning_level_en,
  2014. 'positioning_level_zh': self.positioning_level_zh,
  2015. 'created_at': self.created_at.strftime('%Y-%m-%d %H:%M:%S') if self.created_at else None,
  2016. 'updated_at': self.updated_at.strftime('%Y-%m-%d %H:%M:%S') if self.updated_at else None,
  2017. 'created_by': self.created_by,
  2018. 'updated_by': self.updated_by,
  2019. 'status': self.status
  2020. }
  2021. def get_hotel_group_brands_list():
  2022. """
  2023. 获取酒店集团子品牌数据表的全部记录
  2024. Returns:
  2025. dict: 包含操作结果和酒店集团品牌列表的字典
  2026. """
  2027. try:
  2028. # 查询所有酒店集团品牌记录,按集团和品牌排序
  2029. brands = HotelGroupBrands.query.order_by(
  2030. HotelGroupBrands.group_name_zh,
  2031. HotelGroupBrands.brand_name_zh
  2032. ).all()
  2033. # 将所有记录转换为字典格式
  2034. brands_data = [brand.to_dict() for brand in brands]
  2035. return {
  2036. 'code': 200,
  2037. 'success': True,
  2038. 'message': '获取酒店集团品牌列表成功',
  2039. 'data': brands_data,
  2040. 'count': len(brands_data)
  2041. }
  2042. except Exception as e:
  2043. error_msg = f"获取酒店集团品牌列表失败: {str(e)}"
  2044. logging.error(error_msg, exc_info=True)
  2045. return {
  2046. 'code': 500,
  2047. 'success': False,
  2048. 'message': error_msg,
  2049. 'data': [],
  2050. 'count': 0
  2051. }
  2052. def add_hotel_group_brands(brand_data):
  2053. """
  2054. 新增酒店集团子品牌数据表记录
  2055. Args:
  2056. brand_data (dict): 包含品牌信息的字典,包括:
  2057. - group_name_en: 集团英文名称 (必填)
  2058. - group_name_zh: 集团中文名称 (必填)
  2059. - brand_name_en: 品牌英文名称 (必填)
  2060. - brand_name_zh: 品牌中文名称 (必填)
  2061. - positioning_level_en: 定位级别英文名称 (必填)
  2062. - positioning_level_zh: 定位级别中文名称 (必填)
  2063. - created_by: 创建者 (可选,默认为'system')
  2064. - updated_by: 更新者 (可选,默认为'system')
  2065. - status: 状态 (可选,默认为'active')
  2066. Returns:
  2067. dict: 包含操作结果和创建的品牌信息的字典
  2068. """
  2069. try:
  2070. # 验证必填字段
  2071. required_fields = ['group_name_en', 'group_name_zh', 'brand_name_en', 'brand_name_zh', 'positioning_level_en', 'positioning_level_zh']
  2072. missing_fields = []
  2073. for field in required_fields:
  2074. if field not in brand_data or not brand_data[field] or not brand_data[field].strip():
  2075. missing_fields.append(field)
  2076. if missing_fields:
  2077. return {
  2078. 'code': 400,
  2079. 'success': False,
  2080. 'message': f'缺少必填字段: {", ".join(missing_fields)}',
  2081. 'data': None
  2082. }
  2083. # 检查是否已存在相同的品牌记录(基于集团和品牌的中文名称)
  2084. existing_brand = HotelGroupBrands.query.filter_by(
  2085. group_name_zh=brand_data['group_name_zh'].strip(),
  2086. brand_name_zh=brand_data['brand_name_zh'].strip()
  2087. ).first()
  2088. if existing_brand:
  2089. return {
  2090. 'code': 409,
  2091. 'success': False,
  2092. 'message': f'品牌记录已存在:{brand_data["group_name_zh"]} - {brand_data["brand_name_zh"]}',
  2093. 'data': existing_brand.to_dict()
  2094. }
  2095. # 创建新的品牌记录
  2096. new_brand = HotelGroupBrands(
  2097. group_name_en=brand_data['group_name_en'].strip(),
  2098. group_name_zh=brand_data['group_name_zh'].strip(),
  2099. brand_name_en=brand_data['brand_name_en'].strip(),
  2100. brand_name_zh=brand_data['brand_name_zh'].strip(),
  2101. positioning_level_en=brand_data['positioning_level_en'].strip(),
  2102. positioning_level_zh=brand_data['positioning_level_zh'].strip(),
  2103. created_by=brand_data.get('created_by', 'system'),
  2104. updated_by=brand_data.get('updated_by', 'system'),
  2105. status=brand_data.get('status', 'active')
  2106. )
  2107. # 保存到数据库
  2108. db.session.add(new_brand)
  2109. db.session.commit()
  2110. logging.info(f"成功创建酒店集团品牌记录,ID: {new_brand.id}")
  2111. return {
  2112. 'code': 200,
  2113. 'success': True,
  2114. 'message': '酒店集团品牌记录创建成功',
  2115. 'data': new_brand.to_dict()
  2116. }
  2117. except Exception as e:
  2118. db.session.rollback()
  2119. error_msg = f"创建酒店集团品牌记录失败: {str(e)}"
  2120. logging.error(error_msg, exc_info=True)
  2121. return {
  2122. 'code': 500,
  2123. 'success': False,
  2124. 'message': error_msg,
  2125. 'data': None
  2126. }
  2127. def update_hotel_group_brands(brand_id, brand_data):
  2128. """
  2129. 修改酒店集团子品牌数据表记录
  2130. Args:
  2131. brand_id (int): 品牌记录ID
  2132. brand_data (dict): 包含要更新的品牌信息的字典,可能包括:
  2133. - group_name_en: 集团英文名称
  2134. - group_name_zh: 集团中文名称
  2135. - brand_name_en: 品牌英文名称
  2136. - brand_name_zh: 品牌中文名称
  2137. - positioning_level_en: 定位级别英文名称
  2138. - positioning_level_zh: 定位级别中文名称
  2139. - updated_by: 更新者
  2140. - status: 状态
  2141. Returns:
  2142. dict: 包含操作结果和更新后的品牌信息的字典
  2143. """
  2144. try:
  2145. # 查找要更新的品牌记录
  2146. brand = HotelGroupBrands.query.get(brand_id)
  2147. if not brand:
  2148. return {
  2149. 'code': 404,
  2150. 'success': False,
  2151. 'message': f'未找到ID为{brand_id}的品牌记录',
  2152. 'data': None
  2153. }
  2154. # 检查是否有数据需要更新
  2155. if not brand_data:
  2156. return {
  2157. 'code': 400,
  2158. 'success': False,
  2159. 'message': '请求数据为空',
  2160. 'data': None
  2161. }
  2162. # 如果要更新集团和品牌名称,检查是否会与其他记录冲突
  2163. new_group_name_zh = brand_data.get('group_name_zh', brand.group_name_zh).strip() if brand_data.get('group_name_zh') else brand.group_name_zh
  2164. new_brand_name_zh = brand_data.get('brand_name_zh', brand.brand_name_zh).strip() if brand_data.get('brand_name_zh') else brand.brand_name_zh
  2165. # 查找是否存在相同的品牌记录(排除当前记录)
  2166. existing_brand = HotelGroupBrands.query.filter(
  2167. HotelGroupBrands.id != brand_id,
  2168. HotelGroupBrands.group_name_zh == new_group_name_zh,
  2169. HotelGroupBrands.brand_name_zh == new_brand_name_zh
  2170. ).first()
  2171. if existing_brand:
  2172. return {
  2173. 'code': 409,
  2174. 'success': False,
  2175. 'message': f'品牌记录已存在:{new_group_name_zh} - {new_brand_name_zh}',
  2176. 'data': existing_brand.to_dict()
  2177. }
  2178. # 更新品牌信息
  2179. if 'group_name_en' in brand_data and brand_data['group_name_en']:
  2180. brand.group_name_en = brand_data['group_name_en'].strip()
  2181. if 'group_name_zh' in brand_data and brand_data['group_name_zh']:
  2182. brand.group_name_zh = brand_data['group_name_zh'].strip()
  2183. if 'brand_name_en' in brand_data and brand_data['brand_name_en']:
  2184. brand.brand_name_en = brand_data['brand_name_en'].strip()
  2185. if 'brand_name_zh' in brand_data and brand_data['brand_name_zh']:
  2186. brand.brand_name_zh = brand_data['brand_name_zh'].strip()
  2187. if 'positioning_level_en' in brand_data and brand_data['positioning_level_en']:
  2188. brand.positioning_level_en = brand_data['positioning_level_en'].strip()
  2189. if 'positioning_level_zh' in brand_data and brand_data['positioning_level_zh']:
  2190. brand.positioning_level_zh = brand_data['positioning_level_zh'].strip()
  2191. if 'updated_by' in brand_data:
  2192. brand.updated_by = brand_data['updated_by'] or 'system'
  2193. if 'status' in brand_data:
  2194. brand.status = brand_data['status'] or 'active'
  2195. # 更新时间会自动设置(onupdate=datetime.now)
  2196. # 保存更新
  2197. db.session.commit()
  2198. logging.info(f"成功更新酒店集团品牌记录,ID: {brand.id}")
  2199. return {
  2200. 'code': 200,
  2201. 'success': True,
  2202. 'message': '酒店集团品牌记录更新成功',
  2203. 'data': brand.to_dict()
  2204. }
  2205. except Exception as e:
  2206. db.session.rollback()
  2207. error_msg = f"更新酒店集团品牌记录失败: {str(e)}"
  2208. logging.error(error_msg, exc_info=True)
  2209. return {
  2210. 'code': 500,
  2211. 'success': False,
  2212. 'message': error_msg,
  2213. 'data': None
  2214. }
  2215. def query_hotel_group_brands(brand_id):
  2216. """
  2217. 查找指定ID的酒店集团子品牌数据表记录
  2218. Args:
  2219. brand_id (int): 品牌记录ID
  2220. Returns:
  2221. dict: 包含操作结果和品牌信息的字典
  2222. """
  2223. try:
  2224. # 根据ID查找品牌记录
  2225. brand = HotelGroupBrands.query.get(brand_id)
  2226. if not brand:
  2227. return {
  2228. 'code': 404,
  2229. 'success': False,
  2230. 'message': f'未找到ID为{brand_id}的品牌记录',
  2231. 'data': None
  2232. }
  2233. # 返回找到的记录
  2234. return {
  2235. 'code': 200,
  2236. 'success': True,
  2237. 'message': '查找品牌记录成功',
  2238. 'data': brand.to_dict()
  2239. }
  2240. except Exception as e:
  2241. error_msg = f"查找品牌记录失败: {str(e)}"
  2242. logging.error(error_msg, exc_info=True)
  2243. return {
  2244. 'code': 500,
  2245. 'success': False,
  2246. 'message': error_msg,
  2247. 'data': None
  2248. }
  2249. def delete_hotel_group_brands(brand_id):
  2250. """
  2251. 删除指定ID的酒店集团子品牌数据表记录
  2252. Args:
  2253. brand_id (int): 品牌记录ID
  2254. Returns:
  2255. dict: 包含操作结果的字典
  2256. """
  2257. try:
  2258. # 根据ID查找要删除的品牌记录
  2259. brand = HotelGroupBrands.query.get(brand_id)
  2260. if not brand:
  2261. return {
  2262. 'code': 404,
  2263. 'success': False,
  2264. 'message': f'未找到ID为{brand_id}的品牌记录',
  2265. 'data': None
  2266. }
  2267. # 保存被删除记录的信息,用于返回
  2268. deleted_brand_info = brand.to_dict()
  2269. # 执行删除操作
  2270. db.session.delete(brand)
  2271. db.session.commit()
  2272. logging.info(f"成功删除酒店集团品牌记录,ID: {brand_id}")
  2273. return {
  2274. 'code': 200,
  2275. 'success': True,
  2276. 'message': '品牌记录删除成功',
  2277. 'data': deleted_brand_info
  2278. }
  2279. except Exception as e:
  2280. db.session.rollback()
  2281. error_msg = f"删除品牌记录失败: {str(e)}"
  2282. logging.error(error_msg, exc_info=True)
  2283. return {
  2284. 'code': 500,
  2285. 'success': False,
  2286. 'message': error_msg,
  2287. 'data': None
  2288. }
  2289. def get_duplicate_records(status=None):
  2290. """
  2291. 获取重复记录列表
  2292. Args:
  2293. status (str, optional): 筛选特定状态的记录 ('pending', 'processed', 'ignored')
  2294. Returns:
  2295. dict: 包含操作结果和重复记录列表
  2296. """
  2297. try:
  2298. # 构建查询
  2299. query = DuplicateBusinessCard.query
  2300. if status:
  2301. query = query.filter_by(processing_status=status)
  2302. # 按创建时间倒序排列
  2303. duplicate_records = query.order_by(DuplicateBusinessCard.created_at.desc()).all()
  2304. # 获取详细信息,包括主记录
  2305. records_data = []
  2306. for record in duplicate_records:
  2307. record_dict = record.to_dict()
  2308. # 添加主记录信息
  2309. if record.main_card:
  2310. record_dict['main_card'] = record.main_card.to_dict()
  2311. records_data.append(record_dict)
  2312. return {
  2313. 'code': 200,
  2314. 'success': True,
  2315. 'message': '获取重复记录列表成功',
  2316. 'data': records_data,
  2317. 'count': len(records_data)
  2318. }
  2319. except Exception as e:
  2320. error_msg = f"获取重复记录列表失败: {str(e)}"
  2321. logging.error(error_msg, exc_info=True)
  2322. return {
  2323. 'code': 500,
  2324. 'success': False,
  2325. 'message': error_msg,
  2326. 'data': [],
  2327. 'count': 0
  2328. }
  2329. def process_duplicate_record(duplicate_id, action, selected_duplicate_id=None, processed_by=None, notes=None):
  2330. """
  2331. 处理重复记录
  2332. Args:
  2333. duplicate_id (int): 名片记录ID(对应DuplicateBusinessCard表中的main_card_id字段)
  2334. action (str): 处理动作 ('merge_to_suspected', 'keep_main', 'ignore')
  2335. selected_duplicate_id (int, optional): 当action为'merge_to_suspected'时,选择的疑似重复记录ID
  2336. processed_by (str, optional): 处理人
  2337. notes (str, optional): 处理备注
  2338. Returns:
  2339. dict: 包含操作结果
  2340. """
  2341. try:
  2342. # 查找重复记录 - 使用main_card_id字段匹配
  2343. duplicate_record = DuplicateBusinessCard.query.filter_by(main_card_id=duplicate_id).first()
  2344. if not duplicate_record:
  2345. return {
  2346. 'code': 404,
  2347. 'success': False,
  2348. 'message': f'未找到main_card_id为{duplicate_id}的重复记录',
  2349. 'data': None
  2350. }
  2351. if duplicate_record.processing_status != 'pending':
  2352. return {
  2353. 'code': 400,
  2354. 'success': False,
  2355. 'message': f'重复记录状态为{duplicate_record.processing_status},无法处理',
  2356. 'data': None
  2357. }
  2358. main_card = duplicate_record.main_card
  2359. if not main_card:
  2360. return {
  2361. 'code': 404,
  2362. 'success': False,
  2363. 'message': '未找到对应的主记录',
  2364. 'data': None
  2365. }
  2366. result_data = None
  2367. if action == 'merge_to_suspected':
  2368. # 合并到选中的疑似重复记录
  2369. if not selected_duplicate_id:
  2370. return {
  2371. 'code': 400,
  2372. 'success': False,
  2373. 'message': '执行合并操作时必须提供selected_duplicate_id',
  2374. 'data': None
  2375. }
  2376. # 查找选中的疑似重复记录
  2377. target_card = BusinessCard.query.get(selected_duplicate_id)
  2378. if not target_card:
  2379. return {
  2380. 'code': 404,
  2381. 'success': False,
  2382. 'message': f'未找到ID为{selected_duplicate_id}的目标记录',
  2383. 'data': None
  2384. }
  2385. # 将主记录的信息合并到目标记录,并更新职业轨迹
  2386. target_card.name_en = main_card.name_en or target_card.name_en
  2387. target_card.title_zh = main_card.title_zh or target_card.title_zh
  2388. target_card.title_en = main_card.title_en or target_card.title_en
  2389. target_card.mobile = main_card.mobile or target_card.mobile
  2390. target_card.phone = main_card.phone or target_card.phone
  2391. target_card.email = main_card.email or target_card.email
  2392. target_card.hotel_zh = main_card.hotel_zh or target_card.hotel_zh
  2393. target_card.hotel_en = main_card.hotel_en or target_card.hotel_en
  2394. target_card.address_zh = main_card.address_zh or target_card.address_zh
  2395. target_card.address_en = main_card.address_en or target_card.address_en
  2396. target_card.postal_code_zh = main_card.postal_code_zh or target_card.postal_code_zh
  2397. target_card.postal_code_en = main_card.postal_code_en or target_card.postal_code_en
  2398. target_card.brand_zh = main_card.brand_zh or target_card.brand_zh
  2399. target_card.brand_en = main_card.brand_en or target_card.brand_en
  2400. target_card.affiliation_zh = main_card.affiliation_zh or target_card.affiliation_zh
  2401. target_card.affiliation_en = main_card.affiliation_en or target_card.affiliation_en
  2402. target_card.birthday = main_card.birthday or target_card.birthday
  2403. target_card.residence = main_card.residence or target_card.residence
  2404. target_card.brand_group = main_card.brand_group or target_card.brand_group
  2405. target_card.image_path = main_card.image_path # 更新为最新的MinIO图片路径
  2406. target_card.updated_by = processed_by or 'system'
  2407. # 更新职业轨迹,使用主记录的图片路径
  2408. new_data = {
  2409. 'hotel_zh': main_card.hotel_zh,
  2410. 'hotel_en': main_card.hotel_en,
  2411. 'title_zh': main_card.title_zh,
  2412. 'title_en': main_card.title_en
  2413. }
  2414. target_card.career_path = update_career_path(target_card, new_data, main_card.image_path)
  2415. # 先删除重复记录表中的记录,避免外键约束冲突
  2416. db.session.delete(duplicate_record)
  2417. # 然后删除主记录
  2418. db.session.delete(main_card)
  2419. result_data = target_card.to_dict()
  2420. elif action == 'keep_main':
  2421. # 保留主记录,不做任何合并
  2422. result_data = main_card.to_dict()
  2423. elif action == 'ignore':
  2424. # 忽略,不做任何操作
  2425. result_data = main_card.to_dict()
  2426. # 更新重复记录状态(只有在非merge_to_suspected操作时才更新,因为merge_to_suspected已经删除了记录)
  2427. if action != 'merge_to_suspected':
  2428. duplicate_record.processing_status = 'processed'
  2429. duplicate_record.processed_at = datetime.now()
  2430. duplicate_record.processed_by = processed_by or 'system'
  2431. duplicate_record.processing_notes = notes or f'执行操作: {action}'
  2432. db.session.commit()
  2433. logging.info(f"成功处理重复记录,main_card_id: {duplicate_id},操作: {action}")
  2434. return {
  2435. 'code': 200,
  2436. 'success': True,
  2437. 'message': f'重复记录处理成功,操作: {action}',
  2438. 'data': {
  2439. 'duplicate_record': duplicate_record.to_dict(),
  2440. 'result': result_data
  2441. }
  2442. }
  2443. except Exception as e:
  2444. db.session.rollback()
  2445. error_msg = f"处理重复记录失败: {str(e)}"
  2446. logging.error(error_msg, exc_info=True)
  2447. return {
  2448. 'code': 500,
  2449. 'success': False,
  2450. 'message': error_msg,
  2451. 'data': None
  2452. }
  2453. def get_duplicate_record_detail(duplicate_id):
  2454. """
  2455. 获取指定重复记录的详细信息
  2456. Args:
  2457. duplicate_id (int): 名片记录ID(对应DuplicateBusinessCard表中的main_card_id字段)
  2458. Returns:
  2459. dict: 包含重复记录详细信息
  2460. """
  2461. try:
  2462. # 查找重复记录 - 使用main_card_id字段匹配
  2463. duplicate_record = DuplicateBusinessCard.query.filter_by(main_card_id=duplicate_id).first()
  2464. if not duplicate_record:
  2465. return {
  2466. 'code': 404,
  2467. 'success': False,
  2468. 'message': f'未找到main_card_id为{duplicate_id}的重复记录',
  2469. 'data': None
  2470. }
  2471. # 构建详细信息
  2472. record_dict = duplicate_record.to_dict()
  2473. # 添加主记录信息
  2474. if duplicate_record.main_card:
  2475. record_dict['main_card'] = duplicate_record.main_card.to_dict()
  2476. else:
  2477. record_dict['main_card'] = None
  2478. # 解析suspected_duplicates字段中的JSON信息,并获取详细的名片信息
  2479. suspected_duplicates_details = []
  2480. if duplicate_record.suspected_duplicates:
  2481. try:
  2482. # 确保suspected_duplicates是列表格式
  2483. suspected_list = duplicate_record.suspected_duplicates
  2484. if not isinstance(suspected_list, list):
  2485. logging.warning(f"suspected_duplicates不是列表格式: {type(suspected_list)}")
  2486. suspected_list = []
  2487. # 遍历每个疑似重复记录ID
  2488. for suspected_item in suspected_list:
  2489. try:
  2490. # 支持两种格式:直接ID或包含ID的字典
  2491. if isinstance(suspected_item, dict):
  2492. card_id = suspected_item.get('id')
  2493. else:
  2494. card_id = suspected_item
  2495. if card_id:
  2496. # 调用get_business_card函数获取详细信息
  2497. card_result = get_business_card(card_id)
  2498. if card_result['success'] and card_result['data']:
  2499. suspected_duplicates_details.append(card_result['data'])
  2500. logging.info(f"成功获取疑似重复记录详情,ID: {card_id}")
  2501. else:
  2502. logging.warning(f"无法获取疑似重复记录详情,ID: {card_id}, 原因: {card_result['message']}")
  2503. # 添加错误信息记录
  2504. suspected_duplicates_details.append({
  2505. 'id': card_id,
  2506. 'error': card_result['message'],
  2507. 'success': False
  2508. })
  2509. else:
  2510. logging.warning(f"疑似重复记录项缺少ID信息: {suspected_item}")
  2511. except Exception as item_error:
  2512. logging.error(f"处理疑似重复记录项时出错: {suspected_item}, 错误: {str(item_error)}")
  2513. suspected_duplicates_details.append({
  2514. 'original_item': suspected_item,
  2515. 'error': f"处理出错: {str(item_error)}",
  2516. 'success': False
  2517. })
  2518. except Exception as parse_error:
  2519. logging.error(f"解析suspected_duplicates JSON时出错: {str(parse_error)}")
  2520. suspected_duplicates_details = [{
  2521. 'error': f"解析JSON出错: {str(parse_error)}",
  2522. 'original_data': duplicate_record.suspected_duplicates,
  2523. 'success': False
  2524. }]
  2525. # 将详细的疑似重复记录信息添加到返回数据中
  2526. record_dict['suspected_duplicates_details'] = suspected_duplicates_details
  2527. record_dict['suspected_duplicates_count'] = len(suspected_duplicates_details)
  2528. return {
  2529. 'code': 200,
  2530. 'success': True,
  2531. 'message': '获取重复记录详情成功',
  2532. 'data': record_dict
  2533. }
  2534. except Exception as e:
  2535. error_msg = f"获取重复记录详情失败: {str(e)}"
  2536. logging.error(error_msg, exc_info=True)
  2537. return {
  2538. 'code': 500,
  2539. 'success': False,
  2540. 'message': error_msg,
  2541. 'data': None
  2542. }
  2543. def fix_broken_duplicate_records():
  2544. """
  2545. 修复duplicate_business_cards表中main_card_id为null的损坏记录
  2546. Returns:
  2547. dict: 修复操作的结果
  2548. """
  2549. try:
  2550. # 查找所有main_card_id为null的记录
  2551. broken_records = DuplicateBusinessCard.query.filter(
  2552. DuplicateBusinessCard.main_card_id.is_(None)
  2553. ).all()
  2554. if not broken_records:
  2555. return {
  2556. 'code': 200,
  2557. 'success': True,
  2558. 'message': '没有发现需要修复的损坏记录',
  2559. 'data': {
  2560. 'fixed_count': 0,
  2561. 'total_broken': 0
  2562. }
  2563. }
  2564. # 记录要删除的记录信息
  2565. broken_info = []
  2566. for record in broken_records:
  2567. broken_info.append({
  2568. 'id': record.id,
  2569. 'duplicate_reason': record.duplicate_reason,
  2570. 'processing_status': record.processing_status,
  2571. 'created_at': record.created_at.strftime('%Y-%m-%d %H:%M:%S') if record.created_at else None,
  2572. 'processed_at': record.processed_at.strftime('%Y-%m-%d %H:%M:%S') if record.processed_at else None
  2573. })
  2574. # 删除所有损坏的记录
  2575. for record in broken_records:
  2576. db.session.delete(record)
  2577. # 提交事务
  2578. db.session.commit()
  2579. logging.info(f"成功修复并删除了{len(broken_records)}条损坏的重复记录")
  2580. return {
  2581. 'code': 200,
  2582. 'success': True,
  2583. 'message': f'成功修复并删除了{len(broken_records)}条损坏的重复记录',
  2584. 'data': {
  2585. 'fixed_count': len(broken_records),
  2586. 'total_broken': len(broken_records),
  2587. 'deleted_records': broken_info
  2588. }
  2589. }
  2590. except Exception as e:
  2591. db.session.rollback()
  2592. error_msg = f"修复损坏记录失败: {str(e)}"
  2593. logging.error(error_msg, exc_info=True)
  2594. return {
  2595. 'code': 500,
  2596. 'success': False,
  2597. 'message': error_msg,
  2598. 'data': None
  2599. }