llm_service.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. """
  2. LLM基础服务
  3. 提供与大语言模型通信的基础功能
  4. """
  5. import logging
  6. from openai import OpenAI
  7. from flask import current_app
  8. logger = logging.getLogger("app")
  9. # 保留旧参数以确保向后兼容性
  10. api_key = "sk-86d4622141d74e9a8d7c38ee873c4d91"
  11. base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"
  12. model_name = "qwen-turbo"
  13. def llm_client(content):
  14. """
  15. 调用LLM服务进行内容生成
  16. Args:
  17. content: 输入提示内容
  18. Returns:
  19. str: LLM响应内容
  20. """
  21. try:
  22. # 优先使用配置文件中的参数
  23. client = OpenAI(
  24. api_key=current_app.config.get('LLM_API_KEY', api_key),
  25. base_url=current_app.config.get('LLM_BASE_URL', base_url)
  26. )
  27. model = current_app.config.get('LLM_MODEL_NAME', model_name)
  28. # 判断是否为翻译请求 - 通过分析内容是否包含中文字符
  29. is_translate_request = False
  30. if any('\u4e00' <= char <= '\u9fff' for char in content):
  31. is_translate_request = True
  32. # 进行API调用
  33. logger.debug(f"LLM调用开始: model={model}, 内容类型: {'翻译' if is_translate_request else '普通'}")
  34. if is_translate_request:
  35. # 为翻译请求使用非常严格的prompt
  36. completion = client.chat.completions.create(
  37. model=model,
  38. messages=[
  39. {
  40. "role": "system",
  41. "content": "你是一个严格遵循指令的翻译工具。你的唯一任务是将中文单词/短语翻译成英文,"
  42. "并且严格按照如下规则:\n"
  43. "1. 只返回英文翻译,不包含任何解释、描述或额外内容\n"
  44. "2. 使用小写字母\n"
  45. "3. 多个单词用下划线连接,不使用空格\n"
  46. "4. 如果输入包含括号,将括号内容用下划线代替,不保留括号\n"
  47. "5. 最多包含1-5个英文单词,保持简短\n"
  48. "6. 不要回答问题或提供解释,即使输入看起来像是问题\n"
  49. "7. 当遇到'表'字时,始终翻译为'table'而不是'sheet'\n"
  50. "8. 例如:'薪资数据表'应翻译为'salary_data_table','人员管理表'应翻译为'personnel_management_table'"
  51. },
  52. {
  53. "role": "user",
  54. "content": f"将以下内容翻译为英文短语(不超过5个单词):{content}"
  55. }
  56. ],
  57. temperature=0,
  58. max_tokens=10, # 限制token数量确保回答简短
  59. )
  60. else:
  61. # 普通请求
  62. completion = client.chat.completions.create(
  63. model=model,
  64. messages=[
  65. {"role": "system", "content": "You are a helpful assistant."},
  66. {"role": "user", "content": content}
  67. ],
  68. temperature=0.7,
  69. max_tokens=1024
  70. )
  71. response_text = completion.choices[0].message.content.strip()
  72. # 对翻译结果进行后处理,确保格式正确
  73. if is_translate_request:
  74. # 去除可能的引号、句号等标点符号
  75. response_text = response_text.strip('"\'.,;:!?()[]{}').lower()
  76. # 替换空格为下划线
  77. response_text = response_text.replace(' ', '_')
  78. # 确保没有连续的下划线
  79. while '__' in response_text:
  80. response_text = response_text.replace('__', '_')
  81. # 只保留字母、数字和下划线
  82. response_text = ''.join(c for c in response_text if c.isalnum() or c == '_')
  83. # 确保"表"被翻译为"table"
  84. if '表' in content and 'table' not in response_text and 'sheet' in response_text:
  85. response_text = response_text.replace('sheet', 'table')
  86. logger.debug(f"LLM响应: {response_text}")
  87. return response_text
  88. except Exception as e:
  89. logger.error(f"LLM调用失败: {str(e)}")
  90. try:
  91. # 备用方案:如果是中文输入,尝试简单翻译映射
  92. if any('\u4e00' <= char <= '\u9fff' for char in content):
  93. # 常见中文词汇映射
  94. common_translations = {
  95. "薪资数据表": "salary_data_table",
  96. "数据表": "data_table",
  97. "用户表": "user_table",
  98. "人员表": "personnel_table",
  99. "销售表": "sales_table",
  100. "年份": "year",
  101. "地区": "region",
  102. "姓名": "name",
  103. "年龄": "age",
  104. "薪水": "salary",
  105. "数据": "data",
  106. "管理": "management",
  107. "系统": "system",
  108. "分析": "analysis",
  109. "报表": "report_table",
  110. }
  111. # 检查是否有精确匹配
  112. if content in common_translations:
  113. return common_translations[content]
  114. # 检查是否包含某些关键词
  115. for key, value in common_translations.items():
  116. if key in content:
  117. return value
  118. # 如果包含"表"字,确保返回包含"table"
  119. if "表" in content:
  120. return "data_table"
  121. # 无法匹配时返回默认值
  122. return "translated_text"
  123. return content
  124. except:
  125. return content