# apps/common/services/external_api_service.py import traceback import requests import json import logging from django.conf import settings from rest_framework.exceptions import APIException logger = logging.getLogger(__name__) class ExternalAPIError(APIException): status_code = 500 default_detail = '外部API调用失败' default_code = 'external_api_error' def create_external_dataset(knowledge_base): """创建外部知识库""" try: api_data = { "name": knowledge_base.name, "desc": knowledge_base.desc, "type": "0", "meta": {}, "documents": [] } response = requests.post( f'{settings.API_BASE_URL}/api/dataset', json=api_data, headers={'Content-Type': 'application/json'}, ) if response.status_code != 200: raise ExternalAPIError(f"创建失败,状态码: {response.status_code}, 响应: {response.text}") api_response = response.json() if not api_response.get('code') == 200: raise ExternalAPIError(f"业务处理失败: {api_response.get('message', '未知错误')}") dataset_id = api_response.get('data', {}).get('id') if not dataset_id: raise ExternalAPIError("响应数据中缺少dataset id") return dataset_id except requests.exceptions.Timeout: raise ExternalAPIError("请求超时,请稍后重试") except requests.exceptions.RequestException as e: raise ExternalAPIError(f"API请求失败: {str(e)}") except Exception as e: raise ExternalAPIError(f"创建外部知识库失败: {str(e)}") def delete_external_dataset(external_id): """删除外部知识库""" try: if not external_id: logger.warning("外部知识库ID为空,跳过删除") return True response = requests.delete( f'{settings.API_BASE_URL}/api/dataset/{external_id}', headers={'Content-Type': 'application/json'}, ) logger.info(f"删除外部知识库响应: status_code={response.status_code}, response={response.text}") if response.status_code == 404: logger.warning(f"外部知识库不存在: {external_id}") return True elif response.status_code not in [200, 204]: return True # 允许本地删除继续 if response.status_code == 204: logger.info(f"外部知识库删除成功: {external_id}") return True try: api_response = response.json() if api_response.get('code') != 200: if "不存在" in api_response.get('message', ''): logger.warning(f"外部知识库ID不存在,视为删除成功: {external_id}") return True logger.warning(f"业务处理返回非200状态码: {api_response.get('code')}, {api_response.get('message')}") return True logger.info(f"外部知识库删除成功: {external_id}") return True except ValueError: logger.warning(f"外部知识库删除响应无法解析JSON,但状态码为200,视为成功: {external_id}") return True except requests.exceptions.Timeout: logger.error(f"删除外部知识库超时: {external_id}") return False except requests.exceptions.RequestException as e: logger.error(f"删除外部知识库请求异常: {external_id}, error={str(e)}") return False except Exception as e: logger.error(f"删除外部知识库其他错误: {external_id}, error={str(e)}") return False def call_split_api_multiple(files): """调用文档分割API,支持多文件批量处理""" try: url = f'{settings.API_BASE_URL}/api/dataset/document/split' # 准备请求数据 - 将所有文件作为 'file' 字段 files_data = [('file', (file.name, file, file.content_type)) for file in files] # 记录上传的文件信息 for file in files: logger.info(f"准备上传文件: {file.name}, 大小: {file.size}字节, 类型: {file.content_type}") # 读取文件内容前100个字符进行记录 if hasattr(file, 'read') and hasattr(file, 'seek'): file.seek(0) content_preview = file.read(100).decode('utf-8', errors='ignore') logger.info(f"文件内容预览: {content_preview}") file.seek(0) # 重置文件指针 logger.info(f"调用分割API URL: {url}") logger.info(f"上传文件数量: {len(files_data)}") # 发送请求 response = requests.post( url, files=files_data ) # 记录请求头和响应信息 logger.info(f"请求头: {response.request.headers}") logger.info(f"响应状态码: {response.status_code}") if response.status_code != 200: logger.error(f"分割API返回错误状态码: {response.status_code}, 响应: {response.text}") return None # 解析响应 result = response.json() logger.info(f"分割API响应详情: {result}") # 如果数据为空,可能是API处理失败,尝试后备方案 if len(result.get('data', [])) == 0: logger.warning("分割API返回的数据为空,尝试使用后备方案") fallback_data = { 'code': 200, 'message': '成功', 'data': [ { 'name': file.name, 'content': [ { 'title': '文档内容', 'content': '文件内容无法自动分割,请检查外部API。这是一个后备内容。' } ] } for file in files ] } logger.info("使用后备数据结构") return fallback_data return result except Exception as e: logger.error(f"调用分割API失败: {str(e)}") logger.error(traceback.format_exc()) # 创建后备响应 fallback_response = { 'code': 200, 'message': '成功', 'data': [ { 'name': file.name, 'content': [ { 'title': '文档内容', 'content': '文件内容无法自动分割,请检查API连接。' } ] } for file in files ] } logger.info("由于异常,返回后备响应") return fallback_response def call_upload_api(external_id, doc_data): """调用文档上传API""" try: url = f'{settings.API_BASE_URL}/api/dataset/{external_id}/document' logger.info(f"调用文档上传API: {url}") logger.info(f"上传文档数据: 文档名={doc_data.get('name')}, 段落数={len(doc_data.get('paragraphs', []))}") response = requests.post(url, json=doc_data) logger.info(f"上传API响应状态码: {response.status_code}") if response.status_code != 200: logger.error(f"上传API HTTP错误: {response.status_code}, 响应: {response.text}") return { 'code': response.status_code, 'message': f"上传失败,HTTP状态码: {response.status_code}", 'data': None } result = response.json() logger.info(f"上传API响应内容: {result}") if result.get('code') != 200: error_msg = result.get('message', '未知错误') logger.error(f"上传API业务错误: {error_msg}") return { 'code': result.get('code', 500), 'message': error_msg, 'data': None } return result except requests.exceptions.RequestException as e: logger.error(f"调用上传API网络错误: {str(e)}") return { 'code': 500, 'message': f"网络请求错误: {str(e)}", 'data': None } except json.JSONDecodeError as e: logger.error(f"解析API响应JSON失败: {str(e)}") return { 'code': 500, 'message': f"解析响应数据失败: {str(e)}", 'data': None } except Exception as e: logger.error(f"调用上传API其他错误: {str(e)}") return { 'code': 500, 'message': f"上传API调用失败: {str(e)}", 'data': None } def call_delete_document_api(external_id, document_id): """调用文档删除API""" try: url = f'{settings.API_BASE_URL}/api/dataset/{external_id}/document/{document_id}' response = requests.delete(url) return response.json() except Exception as e: logger.error(f"调用删除API失败: {str(e)}") return None def get_external_document_list(external_id): """获取外部知识库的文档列表""" try: url = f'{settings.API_BASE_URL}/api/dataset/{external_id}/document' logger.info(f"调用获取文档列表API: {url}") response = requests.get( url, headers={'Content-Type': 'application/json'}, ) logger.info(f"文档列表API响应状态码: {response.status_code}") if response.status_code != 200: logger.error(f"获取文档列表失败: {response.status_code}, 响应: {response.text}") raise ExternalAPIError(f"获取文档列表失败,状态码: {response.status_code}") result = response.json() logger.info(f"文档列表API响应内容: {result}") if result.get('code') != 200: logger.error(f"获取文档列表业务错误: {result.get('message', '未知错误')}") raise ExternalAPIError(f"获取文档列表失败: {result.get('message', '未知错误')}") return result.get('data', []) except requests.exceptions.RequestException as e: logger.error(f"获取文档列表网络错误: {str(e)}") raise ExternalAPIError(f"获取文档列表失败: {str(e)}") except json.JSONDecodeError as e: logger.error(f"解析文档列表响应JSON失败: {str(e)}") raise ExternalAPIError(f"解析响应数据失败: {str(e)}") except Exception as e: logger.error(f"获取文档列表其他错误: {str(e)}") raise ExternalAPIError(f"获取文档列表失败: {str(e)}") def get_external_document_paragraphs(external_id, document_external_id): """获取外部文档的段落内容""" try: url = f'{settings.API_BASE_URL}/api/dataset/{external_id}/document/{document_external_id}/paragraph' logger.info(f"调用获取文档段落API: {url}") response = requests.get(url) logger.info(f"文档段落API响应状态码: {response.status_code}") if response.status_code != 200: logger.error(f"获取文档段落内容失败: {response.status_code}, 响应: {response.text}") raise ExternalAPIError(f"获取文档段落内容失败,状态码: {response.status_code}") result = response.json() logger.info(f"文档段落API响应内容: {result}") if result.get('code') != 200: logger.error(f"获取文档段落内容业务错误: {result.get('message', '未知错误')}") raise ExternalAPIError(f"获取文档段落内容失败: {result.get('message', '未知错误')}") return result.get('data', []) except requests.exceptions.RequestException as e: logger.error(f"获取文档段落内容网络错误: {str(e)}") raise ExternalAPIError(f"获取文档段落内容失败: {str(e)}") except json.JSONDecodeError as e: logger.error(f"解析文档段落响应JSON失败: {str(e)}") raise ExternalAPIError(f"解析响应数据失败: {str(e)}") except Exception as e: logger.error(f"获取文档段落内容其他错误: {str(e)}") raise ExternalAPIError(f"获取文档段落内容失败: {str(e)}")