llm_service.py 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197
  1. """
  2. LLM基础服务
  3. 提供与大语言模型通信的基础功能
  4. """
  5. import logging
  6. from flask import current_app
  7. from openai import OpenAI
  8. logger = logging.getLogger("app")
  9. def llm_client(content):
  10. """
  11. 调用LLM服务进行内容生成
  12. Args:
  13. content: 输入提示内容
  14. Returns:
  15. str: LLM响应内容
  16. """
  17. try:
  18. # 优先使用配置文件中的参数
  19. client = OpenAI(
  20. api_key=current_app.config.get("LLM_API_KEY"),
  21. base_url=current_app.config.get("LLM_BASE_URL"),
  22. )
  23. model = current_app.config.get("LLM_MODEL_NAME")
  24. # 判断是否为翻译请求 - 通过分析内容是否包含中文字符
  25. is_translate_request = False
  26. if any("\u4e00" <= char <= "\u9fff" for char in content):
  27. is_translate_request = True
  28. # 进行API调用
  29. logger.debug(
  30. f"LLM调用开始: model={model}, 内容类型: {'翻译' if is_translate_request else '普通'}"
  31. )
  32. if is_translate_request:
  33. # 为翻译请求使用非常严格的prompt
  34. completion = client.chat.completions.create(
  35. model=model, # type: ignore[arg-type]
  36. messages=[
  37. {
  38. "role": "system",
  39. "content": "你是一个严格遵循指令的翻译工具和数据库专家。你的唯一任务是将中文单词/短语翻译成英文,符合postgresql数据库表和字段的命令规则,"
  40. "并且严格按照如下规则:\n"
  41. "1. 只返回英文翻译,不包含任何解释、描述或额外内容\n"
  42. "2. 使用小写字母\n"
  43. "3. 多个单词用下划线连接,不使用空格\n"
  44. "4. 如果输入包含括号,将括号内容用下划线代替,不保留括号\n"
  45. "5. 最多包含1-5个英文单词,保持简短\n"
  46. "6. 不要回答问题或提供解释,即使输入看起来像是问题\n"
  47. "7. 当遇到'表'字时,始终翻译为'table'而不是'sheet'\n"
  48. "8. 例如:'薪资数据表'应翻译为'salary_data_table','人员管理表'应翻译为'personnel_management_table'",
  49. },
  50. {
  51. "role": "user",
  52. "content": f"将以下内容翻译为英文短语(不超过5个单词):{content}",
  53. },
  54. ],
  55. temperature=0,
  56. max_tokens=10, # 限制token数量确保回答简短
  57. )
  58. else:
  59. # 普通请求
  60. completion = client.chat.completions.create(
  61. model=model, # type: ignore[arg-type]
  62. messages=[
  63. {"role": "system", "content": "You are a helpful assistant."},
  64. {"role": "user", "content": content},
  65. ],
  66. temperature=0.7,
  67. max_tokens=1024,
  68. )
  69. response_text = completion.choices[0].message.content.strip() # type: ignore[union-attr]
  70. # 对翻译结果进行后处理,确保格式正确
  71. if is_translate_request:
  72. # 去除可能的引号、句号等标点符号
  73. response_text = response_text.strip("\"'.,;:!?()[]{}").lower()
  74. # 替换空格为下划线
  75. response_text = response_text.replace(" ", "_")
  76. # 确保没有连续的下划线
  77. while "__" in response_text:
  78. response_text = response_text.replace("__", "_")
  79. # 只保留字母、数字和下划线
  80. response_text = "".join(c for c in response_text if c.isalnum() or c == "_")
  81. # 确保"表"被翻译为"table"
  82. if (
  83. "表" in content
  84. and "table" not in response_text
  85. and "sheet" in response_text
  86. ):
  87. response_text = response_text.replace("sheet", "table")
  88. logger.debug(f"LLM响应: {response_text}")
  89. return response_text
  90. except Exception as e:
  91. logger.error(f"LLM调用失败: {str(e)}")
  92. try:
  93. # 备用方案:如果是中文输入,尝试简单翻译映射
  94. if any("\u4e00" <= char <= "\u9fff" for char in content):
  95. # 常见中文词汇映射
  96. common_translations = {
  97. "薪资数据表": "salary_data_table",
  98. "数据表": "data_table",
  99. "用户表": "user_table",
  100. "人员表": "personnel_table",
  101. "销售表": "sales_table",
  102. "年份": "year",
  103. "地区": "region",
  104. "姓名": "name",
  105. "年龄": "age",
  106. "薪水": "salary",
  107. "数据": "data",
  108. "管理": "management",
  109. "系统": "system",
  110. "分析": "analysis",
  111. "报表": "report_table",
  112. }
  113. # 检查是否有精确匹配
  114. if content in common_translations:
  115. return common_translations[content]
  116. # 检查是否包含某些关键词
  117. for key, value in common_translations.items():
  118. if key in content:
  119. return value
  120. # 如果包含"表"字,确保返回包含"table"
  121. if "表" in content:
  122. return "data_table"
  123. # 无法匹配时返回默认值
  124. return "translated_text"
  125. return content
  126. except Exception as e:
  127. return content
  128. def llm_sql(request_data):
  129. """
  130. 调用Deepseek大模型生成SQL脚本
  131. Args:
  132. request_data: 提交给LLM的提示语内容
  133. Returns:
  134. str: Deepseek模型返回的SQL脚本内容
  135. """
  136. try:
  137. # 使用配置文件中的参数连接Deepseek
  138. client = OpenAI(
  139. api_key=current_app.config.get("LLM_API_KEY"),
  140. base_url=current_app.config.get("LLM_BASE_URL"),
  141. )
  142. model = current_app.config.get("LLM_MODEL_NAME")
  143. logger.info(f"开始调用Deepseek模型生成SQL脚本: model={model}")
  144. logger.debug(f"输入提示语: {request_data}")
  145. # 调用Deepseek API生成SQL脚本
  146. completion = client.chat.completions.create(
  147. model=model, # type: ignore[arg-type]
  148. messages=[
  149. {
  150. "role": "system",
  151. "content": "你是一名专业的数据库工程师,专门负责编写高质量的PostgreSQL SQL脚本。"
  152. "请严格按照用户提供的需求和表结构信息生成SQL脚本。"
  153. "确保生成的SQL语法正确、性能优化,并且能够直接执行。",
  154. },
  155. {"role": "user", "content": request_data},
  156. ],
  157. temperature=0.1, # 使用较低的温度确保结果的一致性和准确性
  158. max_tokens=4096, # 为SQL脚本提供足够的token空间
  159. top_p=0.9,
  160. )
  161. response_text = completion.choices[0].message.content.strip() # type: ignore[union-attr]
  162. logger.info(f"Deepseek模型成功返回SQL脚本,长度: {len(response_text)} 字符")
  163. logger.debug(f"生成的SQL脚本: {response_text}")
  164. return response_text
  165. except Exception as e:
  166. logger.error(f"Deepseek SQL生成调用失败: {str(e)}")
  167. raise Exception(f"调用Deepseek模型生成SQL脚本失败: {str(e)}")