321 lines
12 KiB
Python
321 lines
12 KiB
Python
# apps/common/services/external_api_service.py
|
||
import traceback
|
||
import requests
|
||
import json
|
||
import logging
|
||
from django.conf import settings
|
||
from rest_framework.exceptions import APIException
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
class ExternalAPIError(APIException):
|
||
status_code = 500
|
||
default_detail = '外部API调用失败'
|
||
default_code = 'external_api_error'
|
||
|
||
def create_external_dataset(knowledge_base):
|
||
"""创建外部知识库"""
|
||
try:
|
||
api_data = {
|
||
"name": knowledge_base.name,
|
||
"desc": knowledge_base.desc,
|
||
"type": "0",
|
||
"meta": {},
|
||
"documents": []
|
||
}
|
||
|
||
response = requests.post(
|
||
f'{settings.API_BASE_URL}/api/dataset',
|
||
json=api_data,
|
||
headers={'Content-Type': 'application/json'},
|
||
)
|
||
|
||
if response.status_code != 200:
|
||
raise ExternalAPIError(f"创建失败,状态码: {response.status_code}, 响应: {response.text}")
|
||
|
||
api_response = response.json()
|
||
if not api_response.get('code') == 200:
|
||
raise ExternalAPIError(f"业务处理失败: {api_response.get('message', '未知错误')}")
|
||
|
||
dataset_id = api_response.get('data', {}).get('id')
|
||
if not dataset_id:
|
||
raise ExternalAPIError("响应数据中缺少dataset id")
|
||
|
||
return dataset_id
|
||
|
||
except requests.exceptions.Timeout:
|
||
raise ExternalAPIError("请求超时,请稍后重试")
|
||
except requests.exceptions.RequestException as e:
|
||
raise ExternalAPIError(f"API请求失败: {str(e)}")
|
||
except Exception as e:
|
||
raise ExternalAPIError(f"创建外部知识库失败: {str(e)}")
|
||
|
||
def delete_external_dataset(external_id):
|
||
"""删除外部知识库"""
|
||
try:
|
||
if not external_id:
|
||
logger.warning("外部知识库ID为空,跳过删除")
|
||
return True
|
||
|
||
response = requests.delete(
|
||
f'{settings.API_BASE_URL}/api/dataset/{external_id}',
|
||
headers={'Content-Type': 'application/json'},
|
||
)
|
||
|
||
logger.info(f"删除外部知识库响应: status_code={response.status_code}, response={response.text}")
|
||
|
||
if response.status_code == 404:
|
||
logger.warning(f"外部知识库不存在: {external_id}")
|
||
return True
|
||
elif response.status_code not in [200, 204]:
|
||
return True # 允许本地删除继续
|
||
|
||
if response.status_code == 204:
|
||
logger.info(f"外部知识库删除成功: {external_id}")
|
||
return True
|
||
|
||
try:
|
||
api_response = response.json()
|
||
if api_response.get('code') != 200:
|
||
if "不存在" in api_response.get('message', ''):
|
||
logger.warning(f"外部知识库ID不存在,视为删除成功: {external_id}")
|
||
return True
|
||
logger.warning(f"业务处理返回非200状态码: {api_response.get('code')}, {api_response.get('message')}")
|
||
return True
|
||
logger.info(f"外部知识库删除成功: {external_id}")
|
||
return True
|
||
except ValueError:
|
||
logger.warning(f"外部知识库删除响应无法解析JSON,但状态码为200,视为成功: {external_id}")
|
||
return True
|
||
|
||
except requests.exceptions.Timeout:
|
||
logger.error(f"删除外部知识库超时: {external_id}")
|
||
return False
|
||
except requests.exceptions.RequestException as e:
|
||
logger.error(f"删除外部知识库请求异常: {external_id}, error={str(e)}")
|
||
return False
|
||
except Exception as e:
|
||
logger.error(f"删除外部知识库其他错误: {external_id}, error={str(e)}")
|
||
return False
|
||
|
||
def call_split_api_multiple(files):
|
||
"""调用文档分割API,支持多文件批量处理"""
|
||
try:
|
||
url = f'{settings.API_BASE_URL}/api/dataset/document/split'
|
||
|
||
# 准备请求数据 - 将所有文件作为 'file' 字段
|
||
files_data = [('file', (file.name, file, file.content_type)) for file in files]
|
||
|
||
# 记录上传的文件信息
|
||
for file in files:
|
||
logger.info(f"准备上传文件: {file.name}, 大小: {file.size}字节, 类型: {file.content_type}")
|
||
# 读取文件内容前100个字符进行记录
|
||
if hasattr(file, 'read') and hasattr(file, 'seek'):
|
||
file.seek(0)
|
||
content_preview = file.read(100).decode('utf-8', errors='ignore')
|
||
logger.info(f"文件内容预览: {content_preview}")
|
||
file.seek(0) # 重置文件指针
|
||
|
||
logger.info(f"调用分割API URL: {url}")
|
||
logger.info(f"上传文件数量: {len(files_data)}")
|
||
|
||
# 发送请求
|
||
response = requests.post(
|
||
url,
|
||
files=files_data
|
||
)
|
||
|
||
# 记录请求头和响应信息
|
||
logger.info(f"请求头: {response.request.headers}")
|
||
logger.info(f"响应状态码: {response.status_code}")
|
||
|
||
if response.status_code != 200:
|
||
logger.error(f"分割API返回错误状态码: {response.status_code}, 响应: {response.text}")
|
||
return None
|
||
|
||
# 解析响应
|
||
result = response.json()
|
||
logger.info(f"分割API响应详情: {result}")
|
||
|
||
# 如果数据为空,可能是API处理失败,尝试后备方案
|
||
if len(result.get('data', [])) == 0:
|
||
logger.warning("分割API返回的数据为空,尝试使用后备方案")
|
||
fallback_data = {
|
||
'code': 200,
|
||
'message': '成功',
|
||
'data': [
|
||
{
|
||
'name': file.name,
|
||
'content': [
|
||
{
|
||
'title': '文档内容',
|
||
'content': '文件内容无法自动分割,请检查外部API。这是一个后备内容。'
|
||
}
|
||
]
|
||
} for file in files
|
||
]
|
||
}
|
||
logger.info("使用后备数据结构")
|
||
return fallback_data
|
||
|
||
return result
|
||
|
||
except Exception as e:
|
||
logger.error(f"调用分割API失败: {str(e)}")
|
||
logger.error(traceback.format_exc())
|
||
|
||
# 创建后备响应
|
||
fallback_response = {
|
||
'code': 200,
|
||
'message': '成功',
|
||
'data': [
|
||
{
|
||
'name': file.name,
|
||
'content': [
|
||
{
|
||
'title': '文档内容',
|
||
'content': '文件内容无法自动分割,请检查API连接。'
|
||
}
|
||
]
|
||
} for file in files
|
||
]
|
||
}
|
||
logger.info("由于异常,返回后备响应")
|
||
return fallback_response
|
||
|
||
def call_upload_api(external_id, doc_data):
|
||
"""调用文档上传API"""
|
||
try:
|
||
url = f'{settings.API_BASE_URL}/api/dataset/{external_id}/document'
|
||
logger.info(f"调用文档上传API: {url}")
|
||
logger.info(f"上传文档数据: 文档名={doc_data.get('name')}, 段落数={len(doc_data.get('paragraphs', []))}")
|
||
|
||
response = requests.post(url, json=doc_data)
|
||
|
||
logger.info(f"上传API响应状态码: {response.status_code}")
|
||
|
||
if response.status_code != 200:
|
||
logger.error(f"上传API HTTP错误: {response.status_code}, 响应: {response.text}")
|
||
return {
|
||
'code': response.status_code,
|
||
'message': f"上传失败,HTTP状态码: {response.status_code}",
|
||
'data': None
|
||
}
|
||
|
||
result = response.json()
|
||
logger.info(f"上传API响应内容: {result}")
|
||
|
||
if result.get('code') != 200:
|
||
error_msg = result.get('message', '未知错误')
|
||
logger.error(f"上传API业务错误: {error_msg}")
|
||
return {
|
||
'code': result.get('code', 500),
|
||
'message': error_msg,
|
||
'data': None
|
||
}
|
||
|
||
return result
|
||
|
||
except requests.exceptions.RequestException as e:
|
||
logger.error(f"调用上传API网络错误: {str(e)}")
|
||
return {
|
||
'code': 500,
|
||
'message': f"网络请求错误: {str(e)}",
|
||
'data': None
|
||
}
|
||
except json.JSONDecodeError as e:
|
||
logger.error(f"解析API响应JSON失败: {str(e)}")
|
||
return {
|
||
'code': 500,
|
||
'message': f"解析响应数据失败: {str(e)}",
|
||
'data': None
|
||
}
|
||
except Exception as e:
|
||
logger.error(f"调用上传API其他错误: {str(e)}")
|
||
return {
|
||
'code': 500,
|
||
'message': f"上传API调用失败: {str(e)}",
|
||
'data': None
|
||
}
|
||
|
||
def call_delete_document_api(external_id, document_id):
|
||
"""调用文档删除API"""
|
||
try:
|
||
url = f'{settings.API_BASE_URL}/api/dataset/{external_id}/document/{document_id}'
|
||
response = requests.delete(url)
|
||
return response.json()
|
||
except Exception as e:
|
||
logger.error(f"调用删除API失败: {str(e)}")
|
||
return None
|
||
|
||
def get_external_document_list(external_id):
|
||
"""获取外部知识库的文档列表"""
|
||
try:
|
||
url = f'{settings.API_BASE_URL}/api/dataset/{external_id}/document'
|
||
logger.info(f"调用获取文档列表API: {url}")
|
||
|
||
response = requests.get(
|
||
url,
|
||
headers={'Content-Type': 'application/json'},
|
||
)
|
||
|
||
logger.info(f"文档列表API响应状态码: {response.status_code}")
|
||
|
||
if response.status_code != 200:
|
||
logger.error(f"获取文档列表失败: {response.status_code}, 响应: {response.text}")
|
||
raise ExternalAPIError(f"获取文档列表失败,状态码: {response.status_code}")
|
||
|
||
result = response.json()
|
||
logger.info(f"文档列表API响应内容: {result}")
|
||
|
||
if result.get('code') != 200:
|
||
logger.error(f"获取文档列表业务错误: {result.get('message', '未知错误')}")
|
||
raise ExternalAPIError(f"获取文档列表失败: {result.get('message', '未知错误')}")
|
||
|
||
return result.get('data', [])
|
||
|
||
except requests.exceptions.RequestException as e:
|
||
logger.error(f"获取文档列表网络错误: {str(e)}")
|
||
raise ExternalAPIError(f"获取文档列表失败: {str(e)}")
|
||
except json.JSONDecodeError as e:
|
||
logger.error(f"解析文档列表响应JSON失败: {str(e)}")
|
||
raise ExternalAPIError(f"解析响应数据失败: {str(e)}")
|
||
except Exception as e:
|
||
logger.error(f"获取文档列表其他错误: {str(e)}")
|
||
raise ExternalAPIError(f"获取文档列表失败: {str(e)}")
|
||
|
||
def get_external_document_paragraphs(external_id, document_external_id):
|
||
"""获取外部文档的段落内容"""
|
||
try:
|
||
url = f'{settings.API_BASE_URL}/api/dataset/{external_id}/document/{document_external_id}/paragraph'
|
||
logger.info(f"调用获取文档段落API: {url}")
|
||
|
||
response = requests.get(url)
|
||
|
||
logger.info(f"文档段落API响应状态码: {response.status_code}")
|
||
|
||
if response.status_code != 200:
|
||
logger.error(f"获取文档段落内容失败: {response.status_code}, 响应: {response.text}")
|
||
raise ExternalAPIError(f"获取文档段落内容失败,状态码: {response.status_code}")
|
||
|
||
result = response.json()
|
||
logger.info(f"文档段落API响应内容: {result}")
|
||
|
||
if result.get('code') != 200:
|
||
logger.error(f"获取文档段落内容业务错误: {result.get('message', '未知错误')}")
|
||
raise ExternalAPIError(f"获取文档段落内容失败: {result.get('message', '未知错误')}")
|
||
|
||
return result.get('data', [])
|
||
|
||
except requests.exceptions.RequestException as e:
|
||
logger.error(f"获取文档段落内容网络错误: {str(e)}")
|
||
raise ExternalAPIError(f"获取文档段落内容失败: {str(e)}")
|
||
except json.JSONDecodeError as e:
|
||
logger.error(f"解析文档段落响应JSON失败: {str(e)}")
|
||
raise ExternalAPIError(f"解析响应数据失败: {str(e)}")
|
||
except Exception as e:
|
||
logger.error(f"获取文档段落内容其他错误: {str(e)}")
|
||
raise ExternalAPIError(f"获取文档段落内容失败: {str(e)}")
|
||
|
||
|