2025-05-07 18:01:48 +08:00
|
|
|
|
# apps/common/services/external_api_service.py
|
|
|
|
|
import traceback
|
|
|
|
|
import requests
|
|
|
|
|
import json
|
|
|
|
|
import logging
|
|
|
|
|
from django.conf import settings
|
|
|
|
|
from rest_framework.exceptions import APIException
|
|
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
class ExternalAPIError(APIException):
|
|
|
|
|
status_code = 500
|
|
|
|
|
default_detail = '外部API调用失败'
|
|
|
|
|
default_code = 'external_api_error'
|
|
|
|
|
|
|
|
|
|
def create_external_dataset(knowledge_base):
|
|
|
|
|
"""创建外部知识库"""
|
|
|
|
|
try:
|
|
|
|
|
api_data = {
|
|
|
|
|
"name": knowledge_base.name,
|
|
|
|
|
"desc": knowledge_base.desc,
|
|
|
|
|
"type": "0",
|
|
|
|
|
"meta": {},
|
|
|
|
|
"documents": []
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
response = requests.post(
|
|
|
|
|
f'{settings.API_BASE_URL}/api/dataset',
|
|
|
|
|
json=api_data,
|
|
|
|
|
headers={'Content-Type': 'application/json'},
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
if response.status_code != 200:
|
|
|
|
|
raise ExternalAPIError(f"创建失败,状态码: {response.status_code}, 响应: {response.text}")
|
|
|
|
|
|
|
|
|
|
api_response = response.json()
|
|
|
|
|
if not api_response.get('code') == 200:
|
|
|
|
|
raise ExternalAPIError(f"业务处理失败: {api_response.get('message', '未知错误')}")
|
|
|
|
|
|
|
|
|
|
dataset_id = api_response.get('data', {}).get('id')
|
|
|
|
|
if not dataset_id:
|
|
|
|
|
raise ExternalAPIError("响应数据中缺少dataset id")
|
|
|
|
|
|
|
|
|
|
return dataset_id
|
|
|
|
|
|
|
|
|
|
except requests.exceptions.Timeout:
|
|
|
|
|
raise ExternalAPIError("请求超时,请稍后重试")
|
|
|
|
|
except requests.exceptions.RequestException as e:
|
|
|
|
|
raise ExternalAPIError(f"API请求失败: {str(e)}")
|
|
|
|
|
except Exception as e:
|
|
|
|
|
raise ExternalAPIError(f"创建外部知识库失败: {str(e)}")
|
|
|
|
|
|
|
|
|
|
def delete_external_dataset(external_id):
|
|
|
|
|
"""删除外部知识库"""
|
|
|
|
|
try:
|
|
|
|
|
if not external_id:
|
|
|
|
|
logger.warning("外部知识库ID为空,跳过删除")
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
response = requests.delete(
|
|
|
|
|
f'{settings.API_BASE_URL}/api/dataset/{external_id}',
|
|
|
|
|
headers={'Content-Type': 'application/json'},
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
logger.info(f"删除外部知识库响应: status_code={response.status_code}, response={response.text}")
|
|
|
|
|
|
|
|
|
|
if response.status_code == 404:
|
|
|
|
|
logger.warning(f"外部知识库不存在: {external_id}")
|
|
|
|
|
return True
|
|
|
|
|
elif response.status_code not in [200, 204]:
|
|
|
|
|
return True # 允许本地删除继续
|
|
|
|
|
|
|
|
|
|
if response.status_code == 204:
|
|
|
|
|
logger.info(f"外部知识库删除成功: {external_id}")
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
api_response = response.json()
|
|
|
|
|
if api_response.get('code') != 200:
|
|
|
|
|
if "不存在" in api_response.get('message', ''):
|
|
|
|
|
logger.warning(f"外部知识库ID不存在,视为删除成功: {external_id}")
|
|
|
|
|
return True
|
|
|
|
|
logger.warning(f"业务处理返回非200状态码: {api_response.get('code')}, {api_response.get('message')}")
|
|
|
|
|
return True
|
|
|
|
|
logger.info(f"外部知识库删除成功: {external_id}")
|
|
|
|
|
return True
|
|
|
|
|
except ValueError:
|
|
|
|
|
logger.warning(f"外部知识库删除响应无法解析JSON,但状态码为200,视为成功: {external_id}")
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
except requests.exceptions.Timeout:
|
|
|
|
|
logger.error(f"删除外部知识库超时: {external_id}")
|
|
|
|
|
return False
|
|
|
|
|
except requests.exceptions.RequestException as e:
|
|
|
|
|
logger.error(f"删除外部知识库请求异常: {external_id}, error={str(e)}")
|
|
|
|
|
return False
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"删除外部知识库其他错误: {external_id}, error={str(e)}")
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
def call_split_api_multiple(files):
|
|
|
|
|
"""调用文档分割API,支持多文件批量处理"""
|
|
|
|
|
try:
|
|
|
|
|
url = f'{settings.API_BASE_URL}/api/dataset/document/split'
|
|
|
|
|
|
2025-05-07 22:24:02 +08:00
|
|
|
|
# 准备请求数据 - 将所有文件作为 'file' 字段
|
|
|
|
|
files_data = [('file', (file.name, file, file.content_type)) for file in files]
|
2025-05-07 18:01:48 +08:00
|
|
|
|
|
2025-05-07 22:24:02 +08:00
|
|
|
|
# 记录上传的文件信息
|
|
|
|
|
for file in files:
|
|
|
|
|
logger.info(f"准备上传文件: {file.name}, 大小: {file.size}字节, 类型: {file.content_type}")
|
|
|
|
|
# 读取文件内容前100个字符进行记录
|
|
|
|
|
if hasattr(file, 'read') and hasattr(file, 'seek'):
|
|
|
|
|
file.seek(0)
|
|
|
|
|
content_preview = file.read(100).decode('utf-8', errors='ignore')
|
|
|
|
|
logger.info(f"文件内容预览: {content_preview}")
|
|
|
|
|
file.seek(0) # 重置文件指针
|
2025-05-07 18:01:48 +08:00
|
|
|
|
|
2025-05-07 22:24:02 +08:00
|
|
|
|
logger.info(f"调用分割API URL: {url}")
|
|
|
|
|
logger.info(f"上传文件数量: {len(files_data)}")
|
2025-05-07 18:01:48 +08:00
|
|
|
|
|
2025-05-07 22:24:02 +08:00
|
|
|
|
# 发送请求
|
|
|
|
|
response = requests.post(
|
|
|
|
|
url,
|
|
|
|
|
files=files_data
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# 记录请求头和响应信息
|
2025-05-07 18:01:48 +08:00
|
|
|
|
logger.info(f"请求头: {response.request.headers}")
|
|
|
|
|
logger.info(f"响应状态码: {response.status_code}")
|
|
|
|
|
|
|
|
|
|
if response.status_code != 200:
|
|
|
|
|
logger.error(f"分割API返回错误状态码: {response.status_code}, 响应: {response.text}")
|
|
|
|
|
return None
|
2025-05-07 22:24:02 +08:00
|
|
|
|
|
|
|
|
|
# 解析响应
|
2025-05-07 18:01:48 +08:00
|
|
|
|
result = response.json()
|
|
|
|
|
logger.info(f"分割API响应详情: {result}")
|
|
|
|
|
|
2025-05-07 22:24:02 +08:00
|
|
|
|
# 如果数据为空,可能是API处理失败,尝试后备方案
|
2025-05-07 18:01:48 +08:00
|
|
|
|
if len(result.get('data', [])) == 0:
|
2025-05-07 22:24:02 +08:00
|
|
|
|
logger.warning("分割API返回的数据为空,尝试使用后备方案")
|
2025-05-07 18:01:48 +08:00
|
|
|
|
fallback_data = {
|
|
|
|
|
'code': 200,
|
2025-05-07 22:24:02 +08:00
|
|
|
|
'message': '成功',
|
|
|
|
|
'data': [
|
|
|
|
|
{
|
|
|
|
|
'name': file.name,
|
|
|
|
|
'content': [
|
|
|
|
|
{
|
|
|
|
|
'title': '文档内容',
|
|
|
|
|
'content': '文件内容无法自动分割,请检查外部API。这是一个后备内容。'
|
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
} for file in files
|
|
|
|
|
]
|
2025-05-07 18:01:48 +08:00
|
|
|
|
}
|
2025-05-07 22:24:02 +08:00
|
|
|
|
logger.info("使用后备数据结构")
|
2025-05-07 18:01:48 +08:00
|
|
|
|
return fallback_data
|
|
|
|
|
|
2025-05-07 22:24:02 +08:00
|
|
|
|
return result
|
|
|
|
|
|
2025-05-07 18:01:48 +08:00
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"调用分割API失败: {str(e)}")
|
|
|
|
|
logger.error(traceback.format_exc())
|
|
|
|
|
|
2025-05-07 22:24:02 +08:00
|
|
|
|
# 创建后备响应
|
2025-05-07 18:01:48 +08:00
|
|
|
|
fallback_response = {
|
|
|
|
|
'code': 200,
|
2025-05-07 22:24:02 +08:00
|
|
|
|
'message': '成功',
|
|
|
|
|
'data': [
|
|
|
|
|
{
|
|
|
|
|
'name': file.name,
|
|
|
|
|
'content': [
|
|
|
|
|
{
|
|
|
|
|
'title': '文档内容',
|
|
|
|
|
'content': '文件内容无法自动分割,请检查API连接。'
|
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
} for file in files
|
|
|
|
|
]
|
2025-05-07 18:01:48 +08:00
|
|
|
|
}
|
2025-05-07 22:24:02 +08:00
|
|
|
|
logger.info("由于异常,返回后备响应")
|
2025-05-07 18:01:48 +08:00
|
|
|
|
return fallback_response
|
|
|
|
|
|
|
|
|
|
def call_upload_api(external_id, doc_data):
|
|
|
|
|
"""调用文档上传API"""
|
|
|
|
|
try:
|
|
|
|
|
url = f'{settings.API_BASE_URL}/api/dataset/{external_id}/document'
|
|
|
|
|
logger.info(f"调用文档上传API: {url}")
|
|
|
|
|
logger.info(f"上传文档数据: 文档名={doc_data.get('name')}, 段落数={len(doc_data.get('paragraphs', []))}")
|
|
|
|
|
|
|
|
|
|
response = requests.post(url, json=doc_data)
|
|
|
|
|
|
|
|
|
|
logger.info(f"上传API响应状态码: {response.status_code}")
|
|
|
|
|
|
|
|
|
|
if response.status_code != 200:
|
|
|
|
|
logger.error(f"上传API HTTP错误: {response.status_code}, 响应: {response.text}")
|
|
|
|
|
return {
|
|
|
|
|
'code': response.status_code,
|
|
|
|
|
'message': f"上传失败,HTTP状态码: {response.status_code}",
|
|
|
|
|
'data': None
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
result = response.json()
|
|
|
|
|
logger.info(f"上传API响应内容: {result}")
|
|
|
|
|
|
|
|
|
|
if result.get('code') != 200:
|
|
|
|
|
error_msg = result.get('message', '未知错误')
|
|
|
|
|
logger.error(f"上传API业务错误: {error_msg}")
|
|
|
|
|
return {
|
|
|
|
|
'code': result.get('code', 500),
|
|
|
|
|
'message': error_msg,
|
|
|
|
|
'data': None
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
except requests.exceptions.RequestException as e:
|
|
|
|
|
logger.error(f"调用上传API网络错误: {str(e)}")
|
|
|
|
|
return {
|
|
|
|
|
'code': 500,
|
|
|
|
|
'message': f"网络请求错误: {str(e)}",
|
|
|
|
|
'data': None
|
|
|
|
|
}
|
|
|
|
|
except json.JSONDecodeError as e:
|
|
|
|
|
logger.error(f"解析API响应JSON失败: {str(e)}")
|
|
|
|
|
return {
|
|
|
|
|
'code': 500,
|
|
|
|
|
'message': f"解析响应数据失败: {str(e)}",
|
|
|
|
|
'data': None
|
|
|
|
|
}
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"调用上传API其他错误: {str(e)}")
|
|
|
|
|
return {
|
|
|
|
|
'code': 500,
|
|
|
|
|
'message': f"上传API调用失败: {str(e)}",
|
|
|
|
|
'data': None
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def call_delete_document_api(external_id, document_id):
|
|
|
|
|
"""调用文档删除API"""
|
|
|
|
|
try:
|
|
|
|
|
url = f'{settings.API_BASE_URL}/api/dataset/{external_id}/document/{document_id}'
|
|
|
|
|
response = requests.delete(url)
|
|
|
|
|
return response.json()
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"调用删除API失败: {str(e)}")
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
def get_external_document_list(external_id):
|
|
|
|
|
"""获取外部知识库的文档列表"""
|
|
|
|
|
try:
|
|
|
|
|
url = f'{settings.API_BASE_URL}/api/dataset/{external_id}/document'
|
|
|
|
|
logger.info(f"调用获取文档列表API: {url}")
|
|
|
|
|
|
|
|
|
|
response = requests.get(
|
|
|
|
|
url,
|
|
|
|
|
headers={'Content-Type': 'application/json'},
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
logger.info(f"文档列表API响应状态码: {response.status_code}")
|
|
|
|
|
|
|
|
|
|
if response.status_code != 200:
|
|
|
|
|
logger.error(f"获取文档列表失败: {response.status_code}, 响应: {response.text}")
|
|
|
|
|
raise ExternalAPIError(f"获取文档列表失败,状态码: {response.status_code}")
|
|
|
|
|
|
|
|
|
|
result = response.json()
|
|
|
|
|
logger.info(f"文档列表API响应内容: {result}")
|
|
|
|
|
|
|
|
|
|
if result.get('code') != 200:
|
|
|
|
|
logger.error(f"获取文档列表业务错误: {result.get('message', '未知错误')}")
|
|
|
|
|
raise ExternalAPIError(f"获取文档列表失败: {result.get('message', '未知错误')}")
|
|
|
|
|
|
|
|
|
|
return result.get('data', [])
|
|
|
|
|
|
|
|
|
|
except requests.exceptions.RequestException as e:
|
|
|
|
|
logger.error(f"获取文档列表网络错误: {str(e)}")
|
|
|
|
|
raise ExternalAPIError(f"获取文档列表失败: {str(e)}")
|
|
|
|
|
except json.JSONDecodeError as e:
|
|
|
|
|
logger.error(f"解析文档列表响应JSON失败: {str(e)}")
|
|
|
|
|
raise ExternalAPIError(f"解析响应数据失败: {str(e)}")
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"获取文档列表其他错误: {str(e)}")
|
|
|
|
|
raise ExternalAPIError(f"获取文档列表失败: {str(e)}")
|
|
|
|
|
|
|
|
|
|
def get_external_document_paragraphs(external_id, document_external_id):
|
|
|
|
|
"""获取外部文档的段落内容"""
|
|
|
|
|
try:
|
|
|
|
|
url = f'{settings.API_BASE_URL}/api/dataset/{external_id}/document/{document_external_id}/paragraph'
|
|
|
|
|
logger.info(f"调用获取文档段落API: {url}")
|
|
|
|
|
|
|
|
|
|
response = requests.get(url)
|
|
|
|
|
|
|
|
|
|
logger.info(f"文档段落API响应状态码: {response.status_code}")
|
|
|
|
|
|
|
|
|
|
if response.status_code != 200:
|
|
|
|
|
logger.error(f"获取文档段落内容失败: {response.status_code}, 响应: {response.text}")
|
|
|
|
|
raise ExternalAPIError(f"获取文档段落内容失败,状态码: {response.status_code}")
|
|
|
|
|
|
|
|
|
|
result = response.json()
|
|
|
|
|
logger.info(f"文档段落API响应内容: {result}")
|
|
|
|
|
|
|
|
|
|
if result.get('code') != 200:
|
|
|
|
|
logger.error(f"获取文档段落内容业务错误: {result.get('message', '未知错误')}")
|
|
|
|
|
raise ExternalAPIError(f"获取文档段落内容失败: {result.get('message', '未知错误')}")
|
|
|
|
|
|
|
|
|
|
return result.get('data', [])
|
|
|
|
|
|
|
|
|
|
except requests.exceptions.RequestException as e:
|
|
|
|
|
logger.error(f"获取文档段落内容网络错误: {str(e)}")
|
|
|
|
|
raise ExternalAPIError(f"获取文档段落内容失败: {str(e)}")
|
|
|
|
|
except json.JSONDecodeError as e:
|
|
|
|
|
logger.error(f"解析文档段落响应JSON失败: {str(e)}")
|
|
|
|
|
raise ExternalAPIError(f"解析响应数据失败: {str(e)}")
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"获取文档段落内容其他错误: {str(e)}")
|
|
|
|
|
raise ExternalAPIError(f"获取文档段落内容失败: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|