operations_project/apps/common/services/external_api_service.py

321 lines
12 KiB
Python
Raw Normal View History

2025-05-07 18:01:48 +08:00
# apps/common/services/external_api_service.py
import traceback
import requests
import json
import logging
from django.conf import settings
from rest_framework.exceptions import APIException
logger = logging.getLogger(__name__)
class ExternalAPIError(APIException):
status_code = 500
default_detail = '外部API调用失败'
default_code = 'external_api_error'
def create_external_dataset(knowledge_base):
"""创建外部知识库"""
try:
api_data = {
"name": knowledge_base.name,
"desc": knowledge_base.desc,
"type": "0",
"meta": {},
"documents": []
}
response = requests.post(
f'{settings.API_BASE_URL}/api/dataset',
json=api_data,
headers={'Content-Type': 'application/json'},
)
if response.status_code != 200:
raise ExternalAPIError(f"创建失败,状态码: {response.status_code}, 响应: {response.text}")
api_response = response.json()
if not api_response.get('code') == 200:
raise ExternalAPIError(f"业务处理失败: {api_response.get('message', '未知错误')}")
dataset_id = api_response.get('data', {}).get('id')
if not dataset_id:
raise ExternalAPIError("响应数据中缺少dataset id")
return dataset_id
except requests.exceptions.Timeout:
raise ExternalAPIError("请求超时,请稍后重试")
except requests.exceptions.RequestException as e:
raise ExternalAPIError(f"API请求失败: {str(e)}")
except Exception as e:
raise ExternalAPIError(f"创建外部知识库失败: {str(e)}")
def delete_external_dataset(external_id):
"""删除外部知识库"""
try:
if not external_id:
logger.warning("外部知识库ID为空跳过删除")
return True
response = requests.delete(
f'{settings.API_BASE_URL}/api/dataset/{external_id}',
headers={'Content-Type': 'application/json'},
)
logger.info(f"删除外部知识库响应: status_code={response.status_code}, response={response.text}")
if response.status_code == 404:
logger.warning(f"外部知识库不存在: {external_id}")
return True
elif response.status_code not in [200, 204]:
return True # 允许本地删除继续
if response.status_code == 204:
logger.info(f"外部知识库删除成功: {external_id}")
return True
try:
api_response = response.json()
if api_response.get('code') != 200:
if "不存在" in api_response.get('message', ''):
logger.warning(f"外部知识库ID不存在视为删除成功: {external_id}")
return True
logger.warning(f"业务处理返回非200状态码: {api_response.get('code')}, {api_response.get('message')}")
return True
logger.info(f"外部知识库删除成功: {external_id}")
return True
except ValueError:
logger.warning(f"外部知识库删除响应无法解析JSON但状态码为200视为成功: {external_id}")
return True
except requests.exceptions.Timeout:
logger.error(f"删除外部知识库超时: {external_id}")
return False
except requests.exceptions.RequestException as e:
logger.error(f"删除外部知识库请求异常: {external_id}, error={str(e)}")
return False
except Exception as e:
logger.error(f"删除外部知识库其他错误: {external_id}, error={str(e)}")
return False
def call_split_api_multiple(files):
"""调用文档分割API支持多文件批量处理"""
try:
url = f'{settings.API_BASE_URL}/api/dataset/document/split'
2025-05-07 22:24:02 +08:00
# 准备请求数据 - 将所有文件作为 'file' 字段
files_data = [('file', (file.name, file, file.content_type)) for file in files]
2025-05-07 18:01:48 +08:00
2025-05-07 22:24:02 +08:00
# 记录上传的文件信息
for file in files:
logger.info(f"准备上传文件: {file.name}, 大小: {file.size}字节, 类型: {file.content_type}")
# 读取文件内容前100个字符进行记录
if hasattr(file, 'read') and hasattr(file, 'seek'):
file.seek(0)
content_preview = file.read(100).decode('utf-8', errors='ignore')
logger.info(f"文件内容预览: {content_preview}")
file.seek(0) # 重置文件指针
2025-05-07 18:01:48 +08:00
2025-05-07 22:24:02 +08:00
logger.info(f"调用分割API URL: {url}")
logger.info(f"上传文件数量: {len(files_data)}")
2025-05-07 18:01:48 +08:00
2025-05-07 22:24:02 +08:00
# 发送请求
response = requests.post(
url,
files=files_data
)
# 记录请求头和响应信息
2025-05-07 18:01:48 +08:00
logger.info(f"请求头: {response.request.headers}")
logger.info(f"响应状态码: {response.status_code}")
if response.status_code != 200:
logger.error(f"分割API返回错误状态码: {response.status_code}, 响应: {response.text}")
return None
2025-05-07 22:24:02 +08:00
# 解析响应
2025-05-07 18:01:48 +08:00
result = response.json()
logger.info(f"分割API响应详情: {result}")
2025-05-07 22:24:02 +08:00
# 如果数据为空可能是API处理失败尝试后备方案
2025-05-07 18:01:48 +08:00
if len(result.get('data', [])) == 0:
2025-05-07 22:24:02 +08:00
logger.warning("分割API返回的数据为空尝试使用后备方案")
2025-05-07 18:01:48 +08:00
fallback_data = {
'code': 200,
2025-05-07 22:24:02 +08:00
'message': '成功',
'data': [
{
'name': file.name,
'content': [
{
'title': '文档内容',
'content': '文件内容无法自动分割请检查外部API。这是一个后备内容。'
}
]
} for file in files
]
2025-05-07 18:01:48 +08:00
}
2025-05-07 22:24:02 +08:00
logger.info("使用后备数据结构")
2025-05-07 18:01:48 +08:00
return fallback_data
2025-05-07 22:24:02 +08:00
return result
2025-05-07 18:01:48 +08:00
except Exception as e:
logger.error(f"调用分割API失败: {str(e)}")
logger.error(traceback.format_exc())
2025-05-07 22:24:02 +08:00
# 创建后备响应
2025-05-07 18:01:48 +08:00
fallback_response = {
'code': 200,
2025-05-07 22:24:02 +08:00
'message': '成功',
'data': [
{
'name': file.name,
'content': [
{
'title': '文档内容',
'content': '文件内容无法自动分割请检查API连接。'
}
]
} for file in files
]
2025-05-07 18:01:48 +08:00
}
2025-05-07 22:24:02 +08:00
logger.info("由于异常,返回后备响应")
2025-05-07 18:01:48 +08:00
return fallback_response
def call_upload_api(external_id, doc_data):
"""调用文档上传API"""
try:
url = f'{settings.API_BASE_URL}/api/dataset/{external_id}/document'
logger.info(f"调用文档上传API: {url}")
logger.info(f"上传文档数据: 文档名={doc_data.get('name')}, 段落数={len(doc_data.get('paragraphs', []))}")
response = requests.post(url, json=doc_data)
logger.info(f"上传API响应状态码: {response.status_code}")
if response.status_code != 200:
logger.error(f"上传API HTTP错误: {response.status_code}, 响应: {response.text}")
return {
'code': response.status_code,
'message': f"上传失败HTTP状态码: {response.status_code}",
'data': None
}
result = response.json()
logger.info(f"上传API响应内容: {result}")
if result.get('code') != 200:
error_msg = result.get('message', '未知错误')
logger.error(f"上传API业务错误: {error_msg}")
return {
'code': result.get('code', 500),
'message': error_msg,
'data': None
}
return result
except requests.exceptions.RequestException as e:
logger.error(f"调用上传API网络错误: {str(e)}")
return {
'code': 500,
'message': f"网络请求错误: {str(e)}",
'data': None
}
except json.JSONDecodeError as e:
logger.error(f"解析API响应JSON失败: {str(e)}")
return {
'code': 500,
'message': f"解析响应数据失败: {str(e)}",
'data': None
}
except Exception as e:
logger.error(f"调用上传API其他错误: {str(e)}")
return {
'code': 500,
'message': f"上传API调用失败: {str(e)}",
'data': None
}
def call_delete_document_api(external_id, document_id):
"""调用文档删除API"""
try:
url = f'{settings.API_BASE_URL}/api/dataset/{external_id}/document/{document_id}'
response = requests.delete(url)
return response.json()
except Exception as e:
logger.error(f"调用删除API失败: {str(e)}")
return None
def get_external_document_list(external_id):
"""获取外部知识库的文档列表"""
try:
url = f'{settings.API_BASE_URL}/api/dataset/{external_id}/document'
logger.info(f"调用获取文档列表API: {url}")
response = requests.get(
url,
headers={'Content-Type': 'application/json'},
)
logger.info(f"文档列表API响应状态码: {response.status_code}")
if response.status_code != 200:
logger.error(f"获取文档列表失败: {response.status_code}, 响应: {response.text}")
raise ExternalAPIError(f"获取文档列表失败,状态码: {response.status_code}")
result = response.json()
logger.info(f"文档列表API响应内容: {result}")
if result.get('code') != 200:
logger.error(f"获取文档列表业务错误: {result.get('message', '未知错误')}")
raise ExternalAPIError(f"获取文档列表失败: {result.get('message', '未知错误')}")
return result.get('data', [])
except requests.exceptions.RequestException as e:
logger.error(f"获取文档列表网络错误: {str(e)}")
raise ExternalAPIError(f"获取文档列表失败: {str(e)}")
except json.JSONDecodeError as e:
logger.error(f"解析文档列表响应JSON失败: {str(e)}")
raise ExternalAPIError(f"解析响应数据失败: {str(e)}")
except Exception as e:
logger.error(f"获取文档列表其他错误: {str(e)}")
raise ExternalAPIError(f"获取文档列表失败: {str(e)}")
def get_external_document_paragraphs(external_id, document_external_id):
"""获取外部文档的段落内容"""
try:
url = f'{settings.API_BASE_URL}/api/dataset/{external_id}/document/{document_external_id}/paragraph'
logger.info(f"调用获取文档段落API: {url}")
response = requests.get(url)
logger.info(f"文档段落API响应状态码: {response.status_code}")
if response.status_code != 200:
logger.error(f"获取文档段落内容失败: {response.status_code}, 响应: {response.text}")
raise ExternalAPIError(f"获取文档段落内容失败,状态码: {response.status_code}")
result = response.json()
logger.info(f"文档段落API响应内容: {result}")
if result.get('code') != 200:
logger.error(f"获取文档段落内容业务错误: {result.get('message', '未知错误')}")
raise ExternalAPIError(f"获取文档段落内容失败: {result.get('message', '未知错误')}")
return result.get('data', [])
except requests.exceptions.RequestException as e:
logger.error(f"获取文档段落内容网络错误: {str(e)}")
raise ExternalAPIError(f"获取文档段落内容失败: {str(e)}")
except json.JSONDecodeError as e:
logger.error(f"解析文档段落响应JSON失败: {str(e)}")
raise ExternalAPIError(f"解析响应数据失败: {str(e)}")
except Exception as e:
logger.error(f"获取文档段落内容其他错误: {str(e)}")
raise ExternalAPIError(f"获取文档段落内容失败: {str(e)}")