llm_service.py 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138
  1. """
  2. LLM基础服务
  3. 提供与大语言模型通信的基础功能
  4. """
  5. import logging
  6. from openai import OpenAI
  7. from flask import current_app
  8. logger = logging.getLogger("app")
  9. def llm_client(content):
  10. """
  11. 调用LLM服务进行内容生成
  12. Args:
  13. content: 输入提示内容
  14. Returns:
  15. str: LLM响应内容
  16. """
  17. try:
  18. # 优先使用配置文件中的参数
  19. client = OpenAI(
  20. api_key=current_app.config.get('LLM_API_KEY'),
  21. base_url=current_app.config.get('LLM_BASE_URL')
  22. )
  23. model = current_app.config.get('LLM_MODEL_NAME')
  24. # 判断是否为翻译请求 - 通过分析内容是否包含中文字符
  25. is_translate_request = False
  26. if any('\u4e00' <= char <= '\u9fff' for char in content):
  27. is_translate_request = True
  28. # 进行API调用
  29. logger.debug(f"LLM调用开始: model={model}, 内容类型: {'翻译' if is_translate_request else '普通'}")
  30. if is_translate_request:
  31. # 为翻译请求使用非常严格的prompt
  32. completion = client.chat.completions.create(
  33. model=model,
  34. messages=[
  35. {
  36. "role": "system",
  37. "content": "你是一个严格遵循指令的翻译工具和数据库专家。你的唯一任务是将中文单词/短语翻译成英文,符合postgresql数据库表和字段的命令规则,"
  38. "并且严格按照如下规则:\n"
  39. "1. 只返回英文翻译,不包含任何解释、描述或额外内容\n"
  40. "2. 使用小写字母\n"
  41. "3. 多个单词用下划线连接,不使用空格\n"
  42. "4. 如果输入包含括号,将括号内容用下划线代替,不保留括号\n"
  43. "5. 最多包含1-5个英文单词,保持简短\n"
  44. "6. 不要回答问题或提供解释,即使输入看起来像是问题\n"
  45. "7. 当遇到'表'字时,始终翻译为'table'而不是'sheet'\n"
  46. "8. 例如:'薪资数据表'应翻译为'salary_data_table','人员管理表'应翻译为'personnel_management_table'"
  47. },
  48. {
  49. "role": "user",
  50. "content": f"将以下内容翻译为英文短语(不超过5个单词):{content}"
  51. }
  52. ],
  53. temperature=0,
  54. max_tokens=10, # 限制token数量确保回答简短
  55. )
  56. else:
  57. # 普通请求
  58. completion = client.chat.completions.create(
  59. model=model,
  60. messages=[
  61. {"role": "system", "content": "You are a helpful assistant."},
  62. {"role": "user", "content": content}
  63. ],
  64. temperature=0.7,
  65. max_tokens=1024
  66. )
  67. response_text = completion.choices[0].message.content.strip()
  68. # 对翻译结果进行后处理,确保格式正确
  69. if is_translate_request:
  70. # 去除可能的引号、句号等标点符号
  71. response_text = response_text.strip('"\'.,;:!?()[]{}').lower()
  72. # 替换空格为下划线
  73. response_text = response_text.replace(' ', '_')
  74. # 确保没有连续的下划线
  75. while '__' in response_text:
  76. response_text = response_text.replace('__', '_')
  77. # 只保留字母、数字和下划线
  78. response_text = ''.join(c for c in response_text if c.isalnum() or c == '_')
  79. # 确保"表"被翻译为"table"
  80. if '表' in content and 'table' not in response_text and 'sheet' in response_text:
  81. response_text = response_text.replace('sheet', 'table')
  82. logger.debug(f"LLM响应: {response_text}")
  83. return response_text
  84. except Exception as e:
  85. logger.error(f"LLM调用失败: {str(e)}")
  86. try:
  87. # 备用方案:如果是中文输入,尝试简单翻译映射
  88. if any('\u4e00' <= char <= '\u9fff' for char in content):
  89. # 常见中文词汇映射
  90. common_translations = {
  91. "薪资数据表": "salary_data_table",
  92. "数据表": "data_table",
  93. "用户表": "user_table",
  94. "人员表": "personnel_table",
  95. "销售表": "sales_table",
  96. "年份": "year",
  97. "地区": "region",
  98. "姓名": "name",
  99. "年龄": "age",
  100. "薪水": "salary",
  101. "数据": "data",
  102. "管理": "management",
  103. "系统": "system",
  104. "分析": "analysis",
  105. "报表": "report_table",
  106. }
  107. # 检查是否有精确匹配
  108. if content in common_translations:
  109. return common_translations[content]
  110. # 检查是否包含某些关键词
  111. for key, value in common_translations.items():
  112. if key in content:
  113. return value
  114. # 如果包含"表"字,确保返回包含"table"
  115. if "表" in content:
  116. return "data_table"
  117. # 无法匹配时返回默认值
  118. return "translated_text"
  119. return content
  120. except:
  121. return content