operations_project/apps/common/services/external_api_service.py

313 lines
12 KiB
Python
Raw Normal View History

2025-05-07 18:01:48 +08:00
# apps/common/services/external_api_service.py
import traceback
import requests
import json
import logging
from django.conf import settings
from rest_framework.exceptions import APIException
logger = logging.getLogger(__name__)
class ExternalAPIError(APIException):
status_code = 500
default_detail = '外部API调用失败'
default_code = 'external_api_error'
def create_external_dataset(knowledge_base):
"""创建外部知识库"""
try:
api_data = {
"name": knowledge_base.name,
"desc": knowledge_base.desc,
"type": "0",
"meta": {},
"documents": []
}
response = requests.post(
f'{settings.API_BASE_URL}/api/dataset',
json=api_data,
headers={'Content-Type': 'application/json'},
)
if response.status_code != 200:
raise ExternalAPIError(f"创建失败,状态码: {response.status_code}, 响应: {response.text}")
api_response = response.json()
if not api_response.get('code') == 200:
raise ExternalAPIError(f"业务处理失败: {api_response.get('message', '未知错误')}")
dataset_id = api_response.get('data', {}).get('id')
if not dataset_id:
raise ExternalAPIError("响应数据中缺少dataset id")
return dataset_id
except requests.exceptions.Timeout:
raise ExternalAPIError("请求超时,请稍后重试")
except requests.exceptions.RequestException as e:
raise ExternalAPIError(f"API请求失败: {str(e)}")
except Exception as e:
raise ExternalAPIError(f"创建外部知识库失败: {str(e)}")
def delete_external_dataset(external_id):
"""删除外部知识库"""
try:
if not external_id:
logger.warning("外部知识库ID为空跳过删除")
return True
response = requests.delete(
f'{settings.API_BASE_URL}/api/dataset/{external_id}',
headers={'Content-Type': 'application/json'},
)
logger.info(f"删除外部知识库响应: status_code={response.status_code}, response={response.text}")
if response.status_code == 404:
logger.warning(f"外部知识库不存在: {external_id}")
return True
elif response.status_code not in [200, 204]:
return True # 允许本地删除继续
if response.status_code == 204:
logger.info(f"外部知识库删除成功: {external_id}")
return True
try:
api_response = response.json()
if api_response.get('code') != 200:
if "不存在" in api_response.get('message', ''):
logger.warning(f"外部知识库ID不存在视为删除成功: {external_id}")
return True
logger.warning(f"业务处理返回非200状态码: {api_response.get('code')}, {api_response.get('message')}")
return True
logger.info(f"外部知识库删除成功: {external_id}")
return True
except ValueError:
logger.warning(f"外部知识库删除响应无法解析JSON但状态码为200视为成功: {external_id}")
return True
except requests.exceptions.Timeout:
logger.error(f"删除外部知识库超时: {external_id}")
return False
except requests.exceptions.RequestException as e:
logger.error(f"删除外部知识库请求异常: {external_id}, error={str(e)}")
return False
except Exception as e:
logger.error(f"删除外部知识库其他错误: {external_id}, error={str(e)}")
return False
def call_split_api_multiple(files):
"""调用文档分割API支持多文件批量处理"""
try:
url = f'{settings.API_BASE_URL}/api/dataset/document/split'
# 准备多文件上传数据
files_data = {}
for i, file_obj in enumerate(files):
if hasattr(file_obj, 'seek'):
file_obj.seek(0)
logger.info(f"准备上传文件 {i+1}/{len(files)}: {file_obj.name}, 大小: {file_obj.size}字节, 类型: {file_obj.content_type}")
# 添加文件预览日志
if hasattr(file_obj, 'read') and hasattr(file_obj, 'seek'):
content_preview = file_obj.read(100).decode('utf-8', errors='ignore')
logger.info(f"文件 {i+1} 内容预览: {content_preview}")
file_obj.seek(0)
# 使用唯一的键名添加到files_data
files_data[f'file{i}'] = file_obj
logger.info(f"调用分割API URL: {url}, 批量处理 {len(files_data)} 个文件")
logger.info(f"请求字段: {list(files_data.keys())}")
# 发送批量请求
response = requests.post(url, files=files_data)
logger.info(f"请求头: {response.request.headers}")
logger.info(f"响应状态码: {response.status_code}")
if response.status_code != 200:
logger.error(f"分割API返回错误状态码: {response.status_code}, 响应: {response.text}")
return None
result = response.json()
logger.info(f"分割API响应详情: {result}")
logger.info(f"成功获取 {len(result.get('data', []))} 个文档结果")
if len(result.get('data', [])) == 0:
logger.warning("分割API返回的数据为空使用后备方案")
# 为所有文件创建后备数据
fallback_data = {
'code': 200,
'message': '成功(后备)',
'data': [{
'name': file.name,
'content': [{
'title': '文档内容',
'content': '文件内容无法自动分割请检查外部API。这是一个后备内容。'
}]
} for file in files]
}
logger.info(f"使用后备数据结构,为 {len(files)} 个文件生成数据")
return fallback_data
return result
except Exception as e:
logger.error(f"调用分割API失败: {str(e)}")
logger.error(traceback.format_exc())
# 为所有文件创建后备响应
fallback_response = {
'code': 200,
'message': '成功(后备)',
'data': [{
'name': file.name if hasattr(file, 'name') else f'文件_{i}',
'content': [{
'title': '文档内容',
'content': '文件内容无法自动分割请检查API连接。'
}]
} for i, file in enumerate(files)]
}
logger.info(f"由于异常,返回后备响应,包含 {len(fallback_response['data'])} 个条目")
return fallback_response
def call_upload_api(external_id, doc_data):
"""调用文档上传API"""
try:
url = f'{settings.API_BASE_URL}/api/dataset/{external_id}/document'
logger.info(f"调用文档上传API: {url}")
logger.info(f"上传文档数据: 文档名={doc_data.get('name')}, 段落数={len(doc_data.get('paragraphs', []))}")
response = requests.post(url, json=doc_data)
logger.info(f"上传API响应状态码: {response.status_code}")
if response.status_code != 200:
logger.error(f"上传API HTTP错误: {response.status_code}, 响应: {response.text}")
return {
'code': response.status_code,
'message': f"上传失败HTTP状态码: {response.status_code}",
'data': None
}
result = response.json()
logger.info(f"上传API响应内容: {result}")
if result.get('code') != 200:
error_msg = result.get('message', '未知错误')
logger.error(f"上传API业务错误: {error_msg}")
return {
'code': result.get('code', 500),
'message': error_msg,
'data': None
}
return result
except requests.exceptions.RequestException as e:
logger.error(f"调用上传API网络错误: {str(e)}")
return {
'code': 500,
'message': f"网络请求错误: {str(e)}",
'data': None
}
except json.JSONDecodeError as e:
logger.error(f"解析API响应JSON失败: {str(e)}")
return {
'code': 500,
'message': f"解析响应数据失败: {str(e)}",
'data': None
}
except Exception as e:
logger.error(f"调用上传API其他错误: {str(e)}")
return {
'code': 500,
'message': f"上传API调用失败: {str(e)}",
'data': None
}
def call_delete_document_api(external_id, document_id):
"""调用文档删除API"""
try:
url = f'{settings.API_BASE_URL}/api/dataset/{external_id}/document/{document_id}'
response = requests.delete(url)
return response.json()
except Exception as e:
logger.error(f"调用删除API失败: {str(e)}")
return None
def get_external_document_list(external_id):
"""获取外部知识库的文档列表"""
try:
url = f'{settings.API_BASE_URL}/api/dataset/{external_id}/document'
logger.info(f"调用获取文档列表API: {url}")
response = requests.get(
url,
headers={'Content-Type': 'application/json'},
)
logger.info(f"文档列表API响应状态码: {response.status_code}")
if response.status_code != 200:
logger.error(f"获取文档列表失败: {response.status_code}, 响应: {response.text}")
raise ExternalAPIError(f"获取文档列表失败,状态码: {response.status_code}")
result = response.json()
logger.info(f"文档列表API响应内容: {result}")
if result.get('code') != 200:
logger.error(f"获取文档列表业务错误: {result.get('message', '未知错误')}")
raise ExternalAPIError(f"获取文档列表失败: {result.get('message', '未知错误')}")
return result.get('data', [])
except requests.exceptions.RequestException as e:
logger.error(f"获取文档列表网络错误: {str(e)}")
raise ExternalAPIError(f"获取文档列表失败: {str(e)}")
except json.JSONDecodeError as e:
logger.error(f"解析文档列表响应JSON失败: {str(e)}")
raise ExternalAPIError(f"解析响应数据失败: {str(e)}")
except Exception as e:
logger.error(f"获取文档列表其他错误: {str(e)}")
raise ExternalAPIError(f"获取文档列表失败: {str(e)}")
def get_external_document_paragraphs(external_id, document_external_id):
"""获取外部文档的段落内容"""
try:
url = f'{settings.API_BASE_URL}/api/dataset/{external_id}/document/{document_external_id}/paragraph'
logger.info(f"调用获取文档段落API: {url}")
response = requests.get(url)
logger.info(f"文档段落API响应状态码: {response.status_code}")
if response.status_code != 200:
logger.error(f"获取文档段落内容失败: {response.status_code}, 响应: {response.text}")
raise ExternalAPIError(f"获取文档段落内容失败,状态码: {response.status_code}")
result = response.json()
logger.info(f"文档段落API响应内容: {result}")
if result.get('code') != 200:
logger.error(f"获取文档段落内容业务错误: {result.get('message', '未知错误')}")
raise ExternalAPIError(f"获取文档段落内容失败: {result.get('message', '未知错误')}")
return result.get('data', [])
except requests.exceptions.RequestException as e:
logger.error(f"获取文档段落内容网络错误: {str(e)}")
raise ExternalAPIError(f"获取文档段落内容失败: {str(e)}")
except json.JSONDecodeError as e:
logger.error(f"解析文档段落响应JSON失败: {str(e)}")
raise ExternalAPIError(f"解析响应数据失败: {str(e)}")
except Exception as e:
logger.error(f"获取文档段落内容其他错误: {str(e)}")
raise ExternalAPIError(f"获取文档段落内容失败: {str(e)}")