From d13b21e284c2da9b18f87474e3335381a75258f7 Mon Sep 17 00:00:00 2001 From: wanjia Date: Tue, 3 Jun 2025 16:23:57 +0800 Subject: [PATCH] =?UTF-8?q?=E6=97=A0=E6=9D=83=E9=99=90=E7=9A=84=E7=9F=A5?= =?UTF-8?q?=E8=AF=86=E5=BA=93=E7=B3=BB=E7=BB=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- apps/knowledge_base/models.py | 10 +- apps/knowledge_base/views.py | 2153 ++++++++++++++------------------- daren/urls.py | 1 + 3 files changed, 917 insertions(+), 1247 deletions(-) diff --git a/apps/knowledge_base/models.py b/apps/knowledge_base/models.py index 92f0127..f94b3bb 100644 --- a/apps/knowledge_base/models.py +++ b/apps/knowledge_base/models.py @@ -108,14 +108,8 @@ class KnowledgeBase(models.Model): def clean(self): """验证知识库类型与创建者权限是否匹配""" - try: - user = User.objects.get(id=self.user_id) - if user.role == 'member' and self.type != 'private': - raise ValidationError('组员只能创建私人知识库') - if user.role == 'leader' and self.type not in ['member', 'private']: - raise ValidationError('组长只能创建成员级或私人知识库') - except User.DoesNotExist: - raise ValidationError('创建者不存在') + # 移除所有权限验证,允许任何用户创建任何类型知识库 + pass def to_response_dict(self): """转换为API响应格式""" diff --git a/apps/knowledge_base/views.py b/apps/knowledge_base/views.py index 605b195..2a9985e 100644 --- a/apps/knowledge_base/views.py +++ b/apps/knowledge_base/views.py @@ -1,1270 +1,945 @@ -# # apps/knowledge_base/views.py -# import logging -# import json -# import traceback -# from django.db.models import Q -# from django.db import transaction -# from django.utils import timezone -# from django.http import Http404 -# from rest_framework import viewsets, status -# from rest_framework.response import Response -# from rest_framework.permissions import IsAuthenticated -# from rest_framework.decorators import action -# import requests -# from apps.user.models import User -# from apps.knowledge_base.models import KnowledgeBase, KnowledgeBaseDocument -# from apps.permissions.models import KnowledgeBasePermission as KBPermissionModel -# from apps.permissions.services.permission_service import KnowledgeBasePermissionMixin -# from apps.knowledge_base.serializers import KnowledgeBaseSerializer, KnowledgeBaseDocumentSerializer -# from apps.common.services.external_api_service import ( -# create_external_dataset, delete_external_dataset, call_split_api_multiple, -# call_upload_api, call_delete_document_api, ExternalAPIError, -# get_external_document_list, get_external_document_paragraphs, -# call_delete_document_api -# ) -# from daren import settings - -# logger = logging.getLogger(__name__) - -# class KnowledgeBaseViewSet(KnowledgeBasePermissionMixin, viewsets.ModelViewSet): -# serializer_class = KnowledgeBaseSerializer -# permission_classes = [IsAuthenticated] - -# def list(self, request, *args, **kwargs): -# try: -# queryset = self.get_queryset() -# keyword = request.query_params.get('keyword', '') - -# if keyword: -# query = Q(name__icontains=keyword) | Q(desc__icontains=keyword) | \ -# Q(department__icontains=keyword) | Q(group__icontains=keyword) -# queryset = queryset.filter(query) - -# try: -# page = int(request.query_params.get('page', 1)) -# page_size = int(request.query_params.get('page_size', 10)) -# except ValueError: -# page = 1 -# page_size = 10 - -# total = queryset.count() -# start = (page - 1) * page_size -# end = start + page_size -# paginated_queryset = queryset[start:end] - -# serializer = self.get_serializer(paginated_queryset, many=True) -# data = serializer.data -# user = request.user - -# for item in data: -# kb_type = item['type'] -# department = item.get('department') -# group = item.get('group') -# creator_id = item.get('user_id') -# kb_id = item['id'] - -# explicit_permission = KBPermissionModel.objects.filter( -# knowledge_base_id=kb_id, -# user=user, -# status='active' -# ).first() - -# if explicit_permission: -# item['permissions'] = { -# 'can_read': explicit_permission.can_read, -# 'can_edit': explicit_permission.can_edit, -# 'can_delete': explicit_permission.can_delete -# } -# item['expires_at'] = explicit_permission.expires_at.strftime("%Y-%m-%d %H:%M:%S") if explicit_permission.expires_at else None -# else: -# item['permissions'] = { -# 'can_read': self._can_read(kb_type, user, department, group, creator_id, kb_id), -# 'can_edit': self._can_edit(kb_type, user, department, group, creator_id, kb_id), -# 'can_delete': self._can_delete(kb_type, user, department, group, creator_id, kb_id) -# } -# item['expires_at'] = None if kb_type == 'admin' else None - -# if keyword: -# if 'name' in item and keyword.lower() in item['name'].lower(): -# item['highlighted_name'] = item['name'].replace( -# keyword, f'{keyword}' -# ) -# if 'desc' in item and item.get('desc') is not None: -# desc_text = str(item['desc']) -# if keyword.lower() in desc_text.lower(): -# item['highlighted_desc'] = desc_text.replace( -# keyword, f'{keyword}' -# ) - -# return Response({ -# "code": 200, -# "message": "获取知识库列表成功", -# "data": { -# "total": total, -# "page": page, -# "page_size": page_size, -# "keyword": keyword if keyword else None, -# "items": data -# } -# }) -# except Exception as e: -# logger.error(f"获取知识库列表失败: {str(e)}") -# logger.error(traceback.format_exc()) -# return Response({ -# "code": 500, -# "message": f"获取知识库列表失败: {str(e)}", -# "data": None -# }, status=status.HTTP_500_INTERNAL_SERVER_ERROR) - -# def get_queryset(self): -# user = self.request.user -# queryset = KnowledgeBase.objects.all() -# permission_conditions = Q() - -# permission_conditions |= Q(type='admin') -# permission_conditions |= Q(user_id=user.id) - -# active_permissions = KBPermissionModel.objects.filter( -# user=user, -# can_read=True, -# status='active', -# expires_at__gt=timezone.now() -# ).values_list('knowledge_base_id', flat=True) - -# if active_permissions: -# permission_conditions |= Q(id__in=active_permissions) - -# if user.role == 'admin': -# permission_conditions |= ~Q(type='private') | Q(user_id=user.id) -# elif user.role == 'leader': -# permission_conditions |= Q(type__in=['leader', 'member'], department=user.department) -# elif user.role in ['member', 'user']: -# permission_conditions |= Q(type='leader', department=user.department) -# permission_conditions |= Q(type='member', department=user.department, group=user.group) - -# return queryset.filter(permission_conditions).distinct() - -# def create(self, request, *args, **kwargs): -# try: -# name = request.data.get('name') -# if not name: -# return Response({ -# 'code': 400, -# 'message': '知识库名称不能为空', -# 'data': None -# }, status=status.HTTP_400_BAD_REQUEST) - -# if KnowledgeBase.objects.filter(name=name).exists(): -# return Response({ -# 'code': 400, -# 'message': f'知识库名称 "{name}" 已存在', -# 'data': None -# }, status=status.HTTP_400_BAD_REQUEST) - -# user = request.user -# type = request.data.get('type', 'private') -# department = request.data.get('department') -# group = request.data.get('group') - -# if type == 'admin': -# department = None -# group = None -# elif type == 'secret': -# if user.role != 'admin': -# return Response({ -# 'code': 403, -# 'message': '只有管理员可以创建保密级知识库', -# 'data': None -# }, status=status.HTTP_403_FORBIDDEN) -# department = None -# group = None -# elif type == 'leader': -# if user.role != 'admin': -# return Response({ -# 'code': 403, -# 'message': '只有管理员可以创建组长级知识库', -# 'data': None -# }, status=status.HTTP_403_FORBIDDEN) -# if not department: -# return Response({ -# 'code': 400, -# 'message': '创建组长级知识库时必须指定部门', -# 'data': None -# }, status=status.HTTP_400_BAD_REQUEST) -# elif type == 'member': -# if user.role not in ['admin', 'leader']: -# return Response({ -# 'code': 403, -# 'message': '只有管理员和组长可以创建成员级知识库', -# 'data': None -# }, status=status.HTTP_403_FORBIDDEN) -# if user.role == 'admin' and not department: -# return Response({ -# 'code': 400, -# 'message': '管理员创建成员知识库时必须指定部门', -# 'data': None -# }, status=status.HTTP_400_BAD_REQUEST) -# elif user.role == 'leader': -# department = user.department -# if not group: -# return Response({ -# 'code': 400, -# 'message': '创建成员知识库时必须指定组', -# 'data': None -# }, status=status.HTTP_400_BAD_REQUEST) -# elif type == 'private': -# department = None -# group = None - -# data = request.data.copy() -# data['department'] = department -# data['group'] = group - -# serializer = self.get_serializer(data=data, context={'request': request}) -# if not serializer.is_valid(): -# logger.error(f"数据验证失败: {serializer.errors}") -# return Response({ -# 'code': 400, -# 'message': '数据验证失败', -# 'data': serializer.errors -# }, status=status.HTTP_400_BAD_REQUEST) - -# with transaction.atomic(): -# knowledge_base = serializer.save() -# logger.info(f"知识库创建成功: id={knowledge_base.id}, name={knowledge_base.name}, user_id={knowledge_base.user_id}") - -# external_id = create_external_dataset(knowledge_base) -# logger.info(f"外部知识库创建成功,获取ID: {external_id}") - -# knowledge_base.external_id = external_id -# knowledge_base.save() -# logger.info(f"更新knowledge_base的external_id为: {external_id}") - -# KBPermissionModel.objects.create( -# knowledge_base=knowledge_base, -# user=request.user, -# can_read=True, -# can_edit=True, -# can_delete=True, -# granted_by=request.user, -# status='active' -# ) -# logger.info(f"创建者权限创建成功") - -# permissions = [] -# if type == 'admin': -# users_query = User.objects.exclude(id=request.user.id) -# permissions = [ -# KBPermissionModel( -# knowledge_base=knowledge_base, -# user=user, -# can_read=True, -# can_edit=True, -# can_delete=True, -# granted_by=request.user, -# status='active' -# ) for user in users_query -# ] -# elif type == 'secret': -# users_query = User.objects.filter(role='admin').exclude(id=request.user.id) -# permissions = [ -# KBPermissionModel( -# knowledge_base=knowledge_base, -# user=user, -# can_read=True, -# can_edit=self._can_edit(type, user), -# can_delete=self._can_delete(type, user), -# granted_by=request.user, -# status='active' -# ) for user in users_query -# ] -# elif type == 'leader': -# users_query = User.objects.filter( -# Q(role='admin') | Q(role='leader', department=department) -# ).exclude(id=request.user.id) -# permissions = [ -# KBPermissionModel( -# knowledge_base=knowledge_base, -# user=user, -# can_read=True, -# can_edit=self._can_edit(type, user), -# can_delete=self._can_delete(type, user), -# granted_by=request.user, -# status='active' -# ) for user in users_query -# ] -# elif type == 'member': -# users_query = User.objects.filter( -# Q(role='admin') | Q(department=department, role='leader') | -# Q(department=department, group=group, role='member') -# ).exclude(id=request.user.id) -# permissions = [ -# KBPermissionModel( -# knowledge_base=knowledge_base, -# user=user, -# can_read=True, -# can_edit=self._can_edit(type, user), -# can_delete=self._can_delete(type, user), -# granted_by=request.user, -# status='active' -# ) for user in users_query -# ] - -# if permissions: -# KBPermissionModel.objects.bulk_create(permissions) -# logger.info(f"{type}类型权限创建完成: {len(permissions)}条记录") - -# return Response({ -# 'code': 200, -# 'message': '知识库创建成功', -# 'data': { -# 'knowledge_base': serializer.data, -# 'external_id': knowledge_base.external_id -# } -# }) - -# except ExternalAPIError as e: -# logger.error(f"外部知识库创建失败: {str(e)}") -# return Response({ -# 'code': 500, -# 'message': f'创建知识库失败: {str(e)}', -# 'data': None -# }, status=status.HTTP_500_INTERNAL_SERVER_ERROR) -# except Exception as e: -# logger.error(f"创建知识库失败: {str(e)}") -# logger.error(traceback.format_exc()) -# return Response({ -# 'code': 500, -# 'message': f'创建知识库失败: {str(e)}', -# 'data': None -# }, status=status.HTTP_500_INTERNAL_SERVER_ERROR) - -# def update(self, request, *args, **kwargs): -# try: -# instance = self.get_object() -# user = request.user - -# if not self.check_knowledge_base_permission(instance, user, 'edit'): -# return Response({ -# "code": 403, -# "message": "没有编辑权限", -# "data": None -# }, status=status.HTTP_403_FORBIDDEN) - -# with transaction.atomic(): -# serializer = self.get_serializer(instance, data=request.data, partial=True) -# serializer.is_valid(raise_exception=True) -# self.perform_update(serializer) - -# if instance.external_id: -# try: -# api_data = { -# "name": serializer.validated_data.get('name', instance.name), -# "desc": serializer.validated_data.get('desc', instance.desc), -# "type": "0", -# "meta": {}, -# "documents": [] -# } - -# response = requests.put( -# f'{settings.API_BASE_URL}/api/dataset/{instance.external_id}', -# json=api_data, -# headers={'Content-Type': 'application/json'}, -# ) - -# if response.status_code != 200: -# raise ExternalAPIError(f"更新外部知识库失败,状态码: {response.status_code}, 响应: {response.text}") - -# api_response = response.json() -# if not api_response.get('code') == 200: -# raise ExternalAPIError(f"更新外部知识库失败: {api_response.get('message', '未知错误')}") - -# logger.info(f"外部知识库更新成功: {instance.external_id}") - -# except requests.exceptions.Timeout: -# raise ExternalAPIError("请求超时,请稍后重试") -# except requests.exceptions.RequestException as e: -# raise ExternalAPIError(f"API请求失败: {str(e)}") -# except Exception as e: -# raise ExternalAPIError(f"更新外部知识库失败: {str(e)}") - -# return Response({ -# "code": 200, -# "message": "知识库更新成功", -# "data": serializer.data -# }) - -# except Http404: -# return Response({ -# "code": 404, -# "message": "知识库不存在", -# "data": None -# }, status=status.HTTP_404_NOT_FOUND) -# except ExternalAPIError as e: -# logger.error(f"更新外部知识库失败: {str(e)}") -# logger.error(traceback.format_exc()) -# return Response({ -# "code": 500, -# "message": str(e), -# "data": None -# }, status=status.HTTP_500_INTERNAL_SERVER_ERROR) -# except Exception as e: -# logger.error(f"更新知识库失败: {str(e)}") -# logger.error(traceback.format_exc()) -# return Response({ -# "code": 500, -# "message": f"更新知识库失败: {str(e)}", -# "data": None -# }, status=status.HTTP_500_INTERNAL_SERVER_ERROR) - -# def destroy(self, request, *args, **kwargs): -# try: -# instance = self.get_object() -# user = request.user - -# if not self.check_knowledge_base_permission(instance, user, 'delete'): -# return Response({ -# "code": 403, -# "message": "没有删除权限", -# "data": None -# }, status=status.HTTP_403_FORBIDDEN) - -# external_delete_success = True -# external_error_message = None -# if instance.external_id: -# external_delete_success = delete_external_dataset(instance.external_id) -# if not external_delete_success: -# external_error_message = "外部知识库删除失败" -# logger.warning(f"外部知识库删除失败,将继续删除本地知识库: {external_error_message}") - -# self.perform_destroy(instance) -# logger.info(f"本地知识库删除成功: id={instance.id}, name={instance.name}") - -# if not external_delete_success: -# return Response({ -# "code": 200, -# "message": f"知识库已删除,但外部知识库删除失败: {external_error_message}", -# "data": None -# }) - -# return Response({ -# "code": 200, -# "message": "知识库删除成功", -# "data": None -# }) - -# except Http404: -# return Response({ -# "code": 404, -# "message": "知识库不存在", -# "data": None -# }, status=status.HTTP_404_NOT_FOUND) -# except Exception as e: -# logger.error(f"删除知识库失败: {str(e)}") -# logger.error(traceback.format_exc()) -# return Response({ -# "code": 500, -# "message": f"删除知识库失败: {str(e)}", -# "data": None -# }, status=status.HTTP_500_INTERNAL_SERVER_ERROR) - -# @action(detail=True, methods=['get']) -# def permissions(self, request, pk=None): -# try: -# instance = self.get_object() -# user = request.user - -# permissions_data = { -# "can_read": self.check_knowledge_base_permission(instance, user, 'read'), -# "can_edit": self.check_knowledge_base_permission(instance, user, 'edit'), -# "can_delete": self.check_knowledge_base_permission(instance, user, 'delete') -# } - -# return Response({ -# "code": 200, -# "message": "获取权限信息成功", -# "data": { -# "knowledge_base_id": instance.id, -# "knowledge_base_name": instance.name, -# "permissions": permissions_data -# } -# }) - -# except Http404: -# return Response({ -# "code": 404, -# "message": "知识库不存在", -# "data": None -# }, status=status.HTTP_404_NOT_FOUND) -# except Exception as e: -# logger.error(f"获取权限信息失败: {str(e)}") -# logger.error(traceback.format_exc()) -# return Response({ -# "code": 500, -# "message": f"获取权限信息失败: {str(e)}", -# "data": None -# }, status=status.HTTP_500_INTERNAL_SERVER_ERROR) - -# @action(detail=False, methods=['get']) -# def summary(self, request): -# try: -# user = request.user -# queryset = KnowledgeBase.objects.exclude(type='secret') -# summaries = [] - -# for kb in queryset: -# permissions = { -# 'can_read': self.check_knowledge_base_permission(kb, user, 'read'), -# 'can_edit': self.check_knowledge_base_permission(kb, user, 'edit'), -# 'can_delete': self.check_knowledge_base_permission(kb, user, 'delete') -# } - -# explicit_permission = KBPermissionModel.objects.filter( -# knowledge_base_id=kb.id, -# user=user, -# status='active' -# ).first() - -# expires_at = None -# if explicit_permission: -# expires_at = explicit_permission.expires_at.strftime("%Y-%m-%d %H:%M:%S") if explicit_permission.expires_at else None -# elif kb.type == 'admin': -# expires_at = None - -# summary = { -# 'id': str(kb.id), -# 'name': kb.name, -# 'desc': kb.desc, -# 'type': kb.type, -# 'department': kb.department, -# 'permissions': permissions, -# 'expires_at': expires_at -# } -# summaries.append(summary) - -# return Response({ -# 'code': 200, -# 'message': '获取知识库概要信息成功', -# 'data': summaries -# }) - -# except Exception as e: -# logger.error(f"获取知识库概要信息失败: {str(e)}") -# logger.error(traceback.format_exc()) -# return Response({ -# 'code': 500, -# 'message': f'获取知识库概要信息失败: {str(e)}', -# 'data': None -# }, status=status.HTTP_500_INTERNAL_SERVER_ERROR) - -# def retrieve(self, request, *args, **kwargs): -# try: -# instance = self.get_object() -# serializer = self.get_serializer(instance) -# data = serializer.data -# user = request.user - -# data['permissions'] = { -# 'can_read': self.check_knowledge_base_permission(instance, user, 'read'), -# 'can_edit': self.check_knowledge_base_permission(instance, user, 'edit'), -# 'can_delete': self.check_knowledge_base_permission(instance, user, 'delete') -# } - -# explicit_permission = KBPermissionModel.objects.filter( -# knowledge_base_id=instance.id, -# user=user, -# status='active' -# ).first() - -# if explicit_permission: -# data['expires_at'] = explicit_permission.expires_at.strftime("%Y-%m-%d %H:%M:%S") if explicit_permission.expires_at else None -# else: -# data['expires_at'] = None if instance.type == 'admin' else None - -# return Response({ -# 'code': 200, -# 'message': '获取知识库详情成功', -# 'data': data -# }) -# except Exception as e: -# logger.error(f"获取知识库详情失败: {str(e)}") -# logger.error(traceback.format_exc()) -# return Response({ -# 'code': 500, -# 'message': f'获取知识库详情失败: {str(e)}', -# 'data': None -# }, status=status.HTTP_500_INTERNAL_SERVER_ERROR) - -# @action(detail=False, methods=['get']) -# def search(self, request): -# try: -# keyword = request.query_params.get('keyword', '') -# if not keyword: -# return Response({ -# "code": 400, -# "message": "搜索关键字不能为空", -# "data": None -# }, status=status.HTTP_400_BAD_REQUEST) - -# try: -# page = int(request.query_params.get('page', 1)) -# page_size = int(request.query_params.get('page_size', 10)) -# except ValueError: -# page = 1 -# page_size = 10 - -# query = Q(name__icontains=keyword) | Q(desc__icontains=keyword) | \ -# Q(department__icontains=keyword) | Q(group__icontains=keyword) - -# queryset = KnowledgeBase.objects.filter(query).exclude(type='secret') -# user = request.user - -# active_permissions = KBPermissionModel.objects.filter( -# user=user, -# status='active', -# expires_at__gt=timezone.now() -# ).select_related('knowledge_base') - -# permission_map = { -# str(perm.knowledge_base.id): { -# 'can_read': perm.can_read, -# 'can_edit': perm.can_edit, -# 'can_delete': perm.can_delete -# } -# for perm in active_permissions -# } - -# total = queryset.count() -# start = (page - 1) * page_size -# end = start + page_size -# paginated_queryset = queryset[start:end] - -# serializer = self.get_serializer(paginated_queryset, many=True) -# data = serializer.data -# result_items = [] - -# for item in data: -# temp_kb = KnowledgeBase( -# id=item['id'], -# type=item['type'], -# department=item.get('department'), -# group=item.get('group'), -# user_id=item.get('user_id') -# ) - -# explicit_permission = KBPermissionModel.objects.filter( -# knowledge_base_id=item['id'], -# user=user, -# status='active' -# ).first() - -# if explicit_permission: -# kb_permissions = { -# 'can_read': explicit_permission.can_read, -# 'can_edit': explicit_permission.can_edit, -# 'can_delete': explicit_permission.can_delete -# } -# item['expires_at'] = explicit_permission.expires_at.strftime("%Y-%m-%d %H:%M:%S") if explicit_permission.expires_at else None -# else: -# kb_permissions = { -# 'can_read': self.check_knowledge_base_permission(temp_kb, user, 'read'), -# 'can_edit': self.check_knowledge_base_permission(temp_kb, user, 'edit'), -# 'can_delete': self.check_knowledge_base_permission(temp_kb, user, 'delete') -# } -# item['expires_at'] = None if item['type'] == 'admin' else None - -# item['permissions'] = kb_permissions - -# if kb_permissions['can_read']: -# result_items.append(item) -# else: -# summary_info = { -# 'id': item['id'], -# 'name': item['name'], -# 'type': item['type'], -# 'department': item.get('department'), -# 'permissions': kb_permissions -# } -# result_items.append(summary_info) - -# if 'name' in item and keyword.lower() in item['name'].lower(): -# item['highlighted_name'] = item['name'].replace( -# keyword, f'{keyword}' -# ) -# if 'desc' in item and item.get('desc') is not None: -# desc_text = str(item['desc']) -# if keyword.lower() in desc_text.lower(): -# item['highlighted_desc'] = desc_text.replace( -# keyword, f'{keyword}' -# ) - -# return Response({ -# "code": 200, -# "message": "搜索知识库成功", -# "data": { -# "total": total, -# "page": page, -# "page_size": page_size, -# "keyword": keyword, -# "items": result_items -# } -# }) -# except Exception as e: -# logger.error(f"搜索知识库失败: {str(e)}") -# logger.error(traceback.format_exc()) -# return Response({ -# "code": 500, -# "message": f"搜索知识库失败: {str(e)}", -# "data": None -# }, status=status.HTTP_500_INTERNAL_SERVER_ERROR) - -# @action(detail=True, methods=['post']) -# def change_type(self, request, pk=None): -# try: -# instance = self.get_object() -# user = request.user - -# if not self.check_knowledge_base_permission(instance, user, 'edit'): -# return Response({ -# "code": 403, -# "message": "没有修改权限", -# "data": None -# }, status=status.HTTP_403_FORBIDDEN) - -# new_type = request.data.get('type') -# if not new_type: -# return Response({ -# "code": 400, -# "message": "新类型不能为空", -# "data": None -# }, status=status.HTTP_400_BAD_REQUEST) - -# valid_types = ['private', 'admin', 'secret', 'leader', 'member'] -# if new_type not in valid_types: -# return Response({ -# "code": 400, -# "message": f"无效的知识库类型,可选值: {', '.join(valid_types)}", -# "data": None -# }, status=status.HTTP_400_BAD_REQUEST) - -# if new_type == 'leader' and not user.role == 'admin': -# if new_type not in ['private', 'member']: -# return Response({ -# "code": 403, -# "message": "组长只能将知识库设置为private或member类型", -# "data": None -# }, status=status.HTTP_403_FORBIDDEN) - -# department = request.data.get('department') -# group = request.data.get('group') - -# if new_type == 'leader' and not user.role == 'admin': -# if department and department != user.department: -# return Response({ -# "code": 403, -# "message": "组长只能为本部门设置知识库", -# "data": None -# }, status=status.HTTP_403_FORBIDDEN) -# department = user.department - -# if new_type == 'leader': -# if not department: -# return Response({ -# "code": 400, -# "message": "组长级知识库必须指定部门", -# "data": None -# }, status=status.HTTP_400_BAD_REQUEST) - -# if new_type == 'member': -# if not department: -# return Response({ -# "code": 400, -# "message": "成员级知识库必须指定部门", -# "data": None -# }, status=status.HTTP_400_BAD_REQUEST) -# if not group: -# return Response({ -# "code": 400, -# "message": "成员级知识库必须指定组", -# "data": None -# }, status=status.HTTP_400_BAD_REQUEST) - -# if new_type in ['admin', 'secret']: -# department = None -# group = None - -# if new_type == 'private': -# if department is None: -# department = instance.department -# if group is None: -# group = instance.group - -# instance.type = new_type -# instance.department = department -# instance.group = group -# instance.save() - -# return Response({ -# "code": 200, -# "message": f"知识库类型已更新为{new_type}", -# "data": { -# "id": instance.id, -# "name": instance.name, -# "type": instance.type, -# "department": instance.department, -# "group": instance.group -# } -# }) - -# except Http404: -# return Response({ -# "code": 404, -# "message": "知识库不存在", -# "data": None -# }, status=status.HTTP_404_NOT_FOUND) -# except Exception as e: -# logger.error(f"修改知识库类型失败: {str(e)}") -# logger.error(traceback.format_exc()) -# return Response({ -# "code": 500, -# "message": f"修改知识库类型失败: {str(e)}", -# "data": None -# }, status=status.HTTP_500_INTERNAL_SERVER_ERROR) - -# @action(detail=True, methods=['post']) -# def upload_document(self, request, pk=None): -# try: -# instance = self.get_object() -# user = request.user - -# if not self.check_knowledge_base_permission(instance, user, 'edit'): -# return Response({ -# "code": 403, -# "message": "没有编辑权限", -# "data": None -# }, status=status.HTTP_403_FORBIDDEN) - -# logger.info(f"请求内容: {request.data}") -# logger.info(f"请求FILES: {request.FILES}") - -# files = [] -# if 'files' in request.FILES: -# files = request.FILES.getlist('files') -# elif 'file' in request.FILES: -# files = request.FILES.getlist('file') -# elif any(key.startswith('files[') for key in request.FILES): -# files = [file for key, file in request.FILES.items() if key.startswith('files[')] -# elif any(key.startswith('file[') for key in request.FILES): -# files = [file for key, file in request.FILES.items() if key.startswith('file[')] -# elif len(request.FILES) > 0: -# files = list(request.FILES.values()) - -# if not files: -# return Response({ -# "code": 400, -# "message": "未找到上传文件,请确保表单字段名为'files'或'file'", -# "data": { -# "available_fields": list(request.FILES.keys()) -# } -# }, status=status.HTTP_400_BAD_REQUEST) - -# logger.info(f"接收到 {len(files)} 个文件上传请求") - -# saved_documents = [] -# failed_documents = [] - -# if not instance.external_id: -# return Response({ -# "code": 400, -# "message": "知识库没有有效的external_id,请先创建知识库", -# "data": None -# }, status=status.HTTP_400_BAD_REQUEST) - -# try: -# verify_url = f'{settings.API_BASE_URL}/api/dataset/{instance.external_id}' -# verify_response = requests.get(verify_url) -# if verify_response.status_code != 200: -# logger.error(f"外部知识库不存在或无法访问: {instance.external_id}, 状态码: {verify_response.status_code}") -# return Response({ -# "code": 404, -# "message": f"外部知识库不存在或无法访问: {instance.external_id}", -# "data": None -# }, status=status.HTTP_404_NOT_FOUND) - -# verify_data = verify_response.json() -# if verify_data.get('code') != 200: -# logger.error(f"验证外部知识库失败: {verify_data.get('message')}") -# return Response({ -# "code": verify_data.get('code', 500), -# "message": f"验证外部知识库失败: {verify_data.get('message', '未知错误')}", -# "data": None -# }, status=status.HTTP_500_INTERNAL_SERVER_ERROR) - -# logger.info(f"外部知识库验证成功: {instance.external_id}") -# except Exception as e: -# logger.error(f"验证外部知识库时出错: {str(e)}") -# return Response({ -# "code": 500, -# "message": f"验证外部知识库时出错: {str(e)}", -# "data": None -# }, status=status.HTTP_500_INTERNAL_SERVER_ERROR) - -# # 批量处理所有文件 -# split_response = call_split_api_multiple(files) -# if not split_response or split_response.get('code') != 200: -# error_msg = f"文件分割失败: {split_response.get('message', '未知错误') if split_response else '请求失败'}" -# logger.error(error_msg) -# return Response({ -# "code": 400, -# "message": error_msg, -# "data": { -# "uploaded_count": 0, -# "failed_count": len(files), -# "total_files": len(files), -# "documents": [], -# "failed_documents": [{"name": file.name, "error": error_msg} for file in files] -# } -# }, status=status.HTTP_400_BAD_REQUEST) - -# # 处理分割结果 -# documents_data = split_response.get('data', []) -# if not documents_data: -# logger.warning(f"批量分割API未返回文档数据") -# return Response({ -# "code": 400, -# "message": "文件分割未返回有效数据", -# "data": { -# "uploaded_count": 0, -# "failed_count": len(files), -# "total_files": len(files), -# "documents": [], -# "failed_documents": [{"name": file.name, "error": "分割未返回有效数据"} for file in files] -# } -# }, status=status.HTTP_400_BAD_REQUEST) - -# logger.info(f"成功分割出 {len(documents_data)} 个文档,准备上传") - -# # 处理每个文档 -# for doc in documents_data: -# doc_name = doc.get('name', '未命名文档') -# doc_content = doc.get('content', []) - -# logger.info(f"处理文档: {doc_name}, 包含 {len(doc_content)} 个段落") - -# if not doc_content: -# doc_content = [{ -# 'title': '文档内容', -# 'content': '文件内容无法自动分割,请检查文件格式。' -# }] - -# doc_data = { -# "name": doc_name, -# "paragraphs": [] -# } - -# for paragraph in doc_content: -# doc_data["paragraphs"].append({ -# "content": paragraph.get('content', ''), -# "title": paragraph.get('title', ''), -# "is_active": True, -# "problem_list": [] -# }) - -# upload_response = call_upload_api(instance.external_id, doc_data) - -# if upload_response and upload_response.get('code') == 200 and upload_response.get('data'): -# document_id = upload_response['data']['id'] -# doc_record = KnowledgeBaseDocument.objects.create( -# knowledge_base=instance, -# document_id=document_id, -# document_name=doc_name, -# external_id=document_id, -# uploader_name=user.name -# ) - -# saved_documents.append({ -# "id": str(doc_record.id), -# "name": doc_record.document_name, -# "external_id": doc_record.external_id -# }) - -# logger.info(f"文档 '{doc_name}' 上传成功,ID: {document_id}") -# else: -# error_msg = upload_response.get('message', '未知错误') if upload_response else '上传API调用失败' -# logger.error(f"文档 '{doc_name}' 上传失败: {error_msg}") -# failed_documents.append({ -# "name": doc_name, -# "error": error_msg -# }) - -# if saved_documents: -# return Response({ -# "code": 200, -# "message": f"文档上传完成,成功: {len(saved_documents)},失败: {len(failed_documents)}", -# "data": { -# "uploaded_count": len(saved_documents), -# "failed_count": len(failed_documents), -# "total_files": len(files), -# "documents": saved_documents, -# "failed_documents": failed_documents -# } -# }) -# else: -# return Response({ -# "code": 400, -# "message": f"所有文档上传失败", -# "data": { -# "uploaded_count": 0, -# "failed_count": len(failed_documents), -# "total_files": len(files), -# "documents": [], -# "failed_documents": failed_documents -# } -# }, status=status.HTTP_400_BAD_REQUEST) - -# except Exception as e: -# logger.error(f"文档上传失败: {str(e)}") -# logger.error(traceback.format_exc()) -# return Response({ -# "code": 500, -# "message": f"文档上传失败: {str(e)}", -# "data": None -# }, status=status.HTTP_500_INTERNAL_SERVER_ERROR) - -# @action(detail=True, methods=['get']) -# def documents(self, request, pk=None): -# """获取知识库的文档列表""" -# try: -# instance = self.get_object() -# user = request.user +# apps/knowledge_base/views.py +import logging +import json +import traceback +from django.db.models import Q +from django.db import transaction +from django.utils import timezone +from django.http import Http404 +from rest_framework import viewsets, status +from rest_framework.response import Response +from rest_framework.permissions import IsAuthenticated +from rest_framework.decorators import action +import requests +from apps.user.models import User +from apps.knowledge_base.models import KnowledgeBase, KnowledgeBaseDocument +from apps.common.services.external_api_service import ( + create_external_dataset, delete_external_dataset, call_split_api_multiple, + call_upload_api, call_delete_document_api, ExternalAPIError, + get_external_document_list, get_external_document_paragraphs, + call_delete_document_api +) +from apps.knowledge_base.serializers import KnowledgeBaseSerializer, KnowledgeBaseDocumentSerializer +from daren import settings +from apps.user.authentication import CustomTokenAuthentication + + +logger = logging.getLogger(__name__) + +class KnowledgeBaseViewSet(viewsets.ModelViewSet): + serializer_class = KnowledgeBaseSerializer + permission_classes = [IsAuthenticated] + authentication_classes = [CustomTokenAuthentication] + + + def list(self, request, *args, **kwargs): + try: + queryset = self.get_queryset() + keyword = request.query_params.get('keyword', '') + + if keyword: + query = Q(name__icontains=keyword) | Q(desc__icontains=keyword) | \ + Q(department__icontains=keyword) | Q(group__icontains=keyword) + queryset = queryset.filter(query) + + try: + page = int(request.query_params.get('page', 1)) + page_size = int(request.query_params.get('page_size', 10)) + except ValueError: + page = 1 + page_size = 10 + + total = queryset.count() + start = (page - 1) * page_size + end = start + page_size + paginated_queryset = queryset[start:end] + + serializer = self.get_serializer(paginated_queryset, many=True) + data = serializer.data + user = request.user + + # for item in data: + # kb_id = item['id'] + + # explicit_permission = KBPermissionModel.objects.filter( + # knowledge_base_id=kb_id, + # user=user, + # status='active' + # ).first() + + # if explicit_permission: + # item['permissions'] = { + # 'can_read': explicit_permission.can_read, + # 'can_edit': explicit_permission.can_edit, + # 'can_delete': explicit_permission.can_delete + # } + # item['expires_at'] = explicit_permission.expires_at.strftime("%Y-%m-%d %H:%M:%S") if explicit_permission.expires_at else None + # else: + # # 默认所有用户都有权限 + # item['permissions'] = { + # 'can_read': True, + # 'can_edit': True, + # 'can_delete': True + # } + # item['expires_at'] = None + + # if keyword: + # if 'name' in item and keyword.lower() in item['name'].lower(): + # item['highlighted_name'] = item['name'].replace( + # keyword, f'{keyword}' + # ) + # if 'desc' in item and item.get('desc') is not None: + # desc_text = str(item['desc']) + # if keyword.lower() in desc_text.lower(): + # item['highlighted_desc'] = desc_text.replace( + # keyword, f'{keyword}' + # ) + + return Response({ + "code": 200, + "message": "获取知识库列表成功", + "data": { + "total": total, + "page": page, + "page_size": page_size, + "keyword": keyword if keyword else None, + "items": data + } + }) + except Exception as e: + logger.error(f"获取知识库列表失败: {str(e)}") + logger.error(traceback.format_exc()) + return Response({ + "code": 500, + "message": f"获取知识库列表失败: {str(e)}", + "data": None + }, status=status.HTTP_500_INTERNAL_SERVER_ERROR) + + def get_queryset(self): + # 返回所有知识库,不做权限过滤 + return KnowledgeBase.objects.all() + + def create(self, request, *args, **kwargs): + try: + # 简单验证知识库名称 + name = request.data.get('name') + if not name: + return Response({ + 'code': 400, + 'message': '知识库名称不能为空', + 'data': None + }, status=status.HTTP_400_BAD_REQUEST) + + if KnowledgeBase.objects.filter(name=name).exists(): + return Response({ + 'code': 400, + 'message': f'知识库名称 "{name}" 已存在', + 'data': None + }, status=status.HTTP_400_BAD_REQUEST) + + # 准备数据,忽略type、department、group字段 + data = request.data.copy() -# # 权限检查 -# if not self.check_knowledge_base_permission(instance, user, 'read'): -# return Response({ -# "code": 403, -# "message": "没有查看权限", -# "data": None -# }, status=status.HTTP_403_FORBIDDEN) + # 序列化并创建知识库 + serializer = self.get_serializer(data=data, context={'request': request}) + if not serializer.is_valid(): + logger.error(f"数据验证失败: {serializer.errors}") + return Response({ + 'code': 400, + 'message': '数据验证失败', + 'data': serializer.errors + }, status=status.HTTP_400_BAD_REQUEST) + + with transaction.atomic(): + # 保存知识库 + knowledge_base = serializer.save() + logger.info(f"知识库创建成功: id={knowledge_base.id}, name={knowledge_base.name}, user_id={knowledge_base.user_id}") + + # 创建外部知识库 + external_id = create_external_dataset(knowledge_base) + logger.info(f"外部知识库创建成功,获取ID: {external_id}") + + # 更新external_id + knowledge_base.external_id = external_id + knowledge_base.save() + logger.info(f"更新knowledge_base的external_id为: {external_id}") + + # # 创建创建者权限记录 + # KBPermissionModel.objects.create( + # knowledge_base=knowledge_base, + # user=request.user, + # can_read=True, + # can_edit=True, + # can_delete=True, + # granted_by=request.user, + # status='active' + # ) + # logger.info(f"创建者权限创建成功") + + return Response({ + 'code': 200, + 'message': '知识库创建成功', + 'data': { + 'knowledge_base': serializer.data, + 'external_id': knowledge_base.external_id + } + }) + + except ExternalAPIError as e: + logger.error(f"外部知识库创建失败: {str(e)}") + return Response({ + 'code': 500, + 'message': f'创建知识库失败: {str(e)}', + 'data': None + }, status=status.HTTP_500_INTERNAL_SERVER_ERROR) + except Exception as e: + logger.error(f"创建知识库失败: {str(e)}") + logger.error(traceback.format_exc()) + return Response({ + 'code': 500, + 'message': f'创建知识库失败: {str(e)}', + 'data': None + }, status=status.HTTP_500_INTERNAL_SERVER_ERROR) + + def update(self, request, *args, **kwargs): + try: + instance = self.get_object() + user = request.user + + # 所有用户都有编辑权限 + with transaction.atomic(): + serializer = self.get_serializer(instance, data=request.data, partial=True) + serializer.is_valid(raise_exception=True) + self.perform_update(serializer) + + if instance.external_id: + try: + api_data = { + "name": serializer.validated_data.get('name', instance.name), + "desc": serializer.validated_data.get('desc', instance.desc), + "type": "0", + "meta": {}, + "documents": [] + } + + response = requests.put( + f'{settings.API_BASE_URL}/api/dataset/{instance.external_id}', + json=api_data, + headers={'Content-Type': 'application/json'}, + ) + + if response.status_code != 200: + raise ExternalAPIError(f"更新外部知识库失败,状态码: {response.status_code}, 响应: {response.text}") + + api_response = response.json() + if not api_response.get('code') == 200: + raise ExternalAPIError(f"更新外部知识库失败: {api_response.get('message', '未知错误')}") + + logger.info(f"外部知识库更新成功: {instance.external_id}") + + except requests.exceptions.Timeout: + raise ExternalAPIError("请求超时,请稍后重试") + except requests.exceptions.RequestException as e: + raise ExternalAPIError(f"API请求失败: {str(e)}") + except Exception as e: + raise ExternalAPIError(f"更新外部知识库失败: {str(e)}") + + return Response({ + "code": 200, + "message": "知识库更新成功", + "data": serializer.data + }) + + except Http404: + return Response({ + "code": 404, + "message": "知识库不存在", + "data": None + }, status=status.HTTP_404_NOT_FOUND) + except ExternalAPIError as e: + logger.error(f"更新外部知识库失败: {str(e)}") + logger.error(traceback.format_exc()) + return Response({ + "code": 500, + "message": str(e), + "data": None + }, status=status.HTTP_500_INTERNAL_SERVER_ERROR) + except Exception as e: + logger.error(f"更新知识库失败: {str(e)}") + logger.error(traceback.format_exc()) + return Response({ + "code": 500, + "message": f"更新知识库失败: {str(e)}", + "data": None + }, status=status.HTTP_500_INTERNAL_SERVER_ERROR) + + def destroy(self, request, *args, **kwargs): + try: + instance = self.get_object() -# # 检查external_id是否存在 -# if not instance.external_id: -# return Response({ -# "code": 400, -# "message": "知识库没有有效的external_id", -# "data": None -# }, status=status.HTTP_400_BAD_REQUEST) + # 所有用户都有删除权限 + external_delete_success = True + external_error_message = None + if instance.external_id: + external_delete_success = delete_external_dataset(instance.external_id) + if not external_delete_success: + external_error_message = "外部知识库删除失败" + logger.warning(f"外部知识库删除失败,将继续删除本地知识库: {external_error_message}") + + self.perform_destroy(instance) + logger.info(f"本地知识库删除成功: id={instance.id}, name={instance.name}") + + if not external_delete_success: + return Response({ + "code": 200, + "message": f"知识库已删除,但外部知识库删除失败: {external_error_message}", + "data": None + }) + + return Response({ + "code": 200, + "message": "知识库删除成功", + "data": None + }) + + except Http404: + return Response({ + "code": 404, + "message": "知识库不存在", + "data": None + }, status=status.HTTP_404_NOT_FOUND) + except Exception as e: + logger.error(f"删除知识库失败: {str(e)}") + logger.error(traceback.format_exc()) + return Response({ + "code": 500, + "message": f"删除知识库失败: {str(e)}", + "data": None + }, status=status.HTTP_500_INTERNAL_SERVER_ERROR) + + @action(detail=True, methods=['get']) + def permissions(self, request, pk=None): + try: + instance = self.get_object() + + # 所有用户都有所有权限 + permissions_data = { + "can_read": True, + "can_edit": True, + "can_delete": True + } + + return Response({ + "code": 200, + "message": "获取权限信息成功", + "data": { + "knowledge_base_id": instance.id, + "knowledge_base_name": instance.name, + "permissions": permissions_data + } + }) + + except Http404: + return Response({ + "code": 404, + "message": "知识库不存在", + "data": None + }, status=status.HTTP_404_NOT_FOUND) + except Exception as e: + logger.error(f"获取权限信息失败: {str(e)}") + logger.error(traceback.format_exc()) + return Response({ + "code": 500, + "message": f"获取权限信息失败: {str(e)}", + "data": None + }, status=status.HTTP_500_INTERNAL_SERVER_ERROR) + + @action(detail=False, methods=['get']) + def summary(self, request): + try: + user = request.user + queryset = KnowledgeBase.objects.all() + summaries = [] + + for kb in queryset: + # 所有用户都有权限 + permissions = { + 'can_read': True, + 'can_edit': True, + 'can_delete': True + } + + # explicit_permission = KBPermissionModel.objects.filter( + # knowledge_base_id=kb.id, + # user=user, + # status='active' + # ).first() + + # expires_at = None + # if explicit_permission: + # expires_at = explicit_permission.expires_at.strftime("%Y-%m-%d %H:%M:%S") if explicit_permission.expires_at else None + + summary = { + 'id': str(kb.id), + 'name': kb.name, + 'desc': kb.desc, + 'type': kb.type, + 'department': kb.department, + 'permissions': permissions, + # 'expires_at': expires_at + } + summaries.append(summary) + + return Response({ + 'code': 200, + 'message': '获取知识库概要信息成功', + 'data': summaries + }) + + except Exception as e: + logger.error(f"获取知识库概要信息失败: {str(e)}") + logger.error(traceback.format_exc()) + return Response({ + 'code': 500, + 'message': f'获取知识库概要信息失败: {str(e)}', + 'data': None + }, status=status.HTTP_500_INTERNAL_SERVER_ERROR) + + def retrieve(self, request, *args, **kwargs): + try: + instance = self.get_object() + serializer = self.get_serializer(instance) + data = serializer.data + user = request.user + + # 所有用户都有权限 + data['permissions'] = { + 'can_read': True, + 'can_edit': True, + 'can_delete': True + } + + # explicit_permission = KBPermissionModel.objects.filter( + # knowledge_base_id=instance.id, + # user=user, + # status='active' + # ).first() + + # if explicit_permission: + # data['expires_at'] = explicit_permission.expires_at.strftime("%Y-%m-%d %H:%M:%S") if explicit_permission.expires_at else None + # else: + # data['expires_at'] = None + + return Response({ + 'code': 200, + 'message': '获取知识库详情成功', + 'data': data + }) + except Exception as e: + logger.error(f"获取知识库详情失败: {str(e)}") + logger.error(traceback.format_exc()) + return Response({ + 'code': 500, + 'message': f'获取知识库详情失败: {str(e)}', + 'data': None + }, status=status.HTTP_500_INTERNAL_SERVER_ERROR) + + @action(detail=False, methods=['get']) + def search(self, request): + try: + keyword = request.query_params.get('keyword', '') + if not keyword: + return Response({ + "code": 400, + "message": "搜索关键字不能为空", + "data": None + }, status=status.HTTP_400_BAD_REQUEST) + + try: + page = int(request.query_params.get('page', 1)) + page_size = int(request.query_params.get('page_size', 10)) + except ValueError: + page = 1 + page_size = 10 + + query = Q(name__icontains=keyword) | Q(desc__icontains=keyword) | \ + Q(department__icontains=keyword) | Q(group__icontains=keyword) + + queryset = KnowledgeBase.objects.filter(query) + user = request.user + + total = queryset.count() + start = (page - 1) * page_size + end = start + page_size + paginated_queryset = queryset[start:end] + + serializer = self.get_serializer(paginated_queryset, many=True) + data = serializer.data + result_items = [] + + for item in data: + # 所有用户都有权限 + kb_permissions = { + 'can_read': True, + 'can_edit': True, + 'can_delete': True + } -# # 调用外部API获取文档列表 -# try: -# external_documents = get_external_document_list(instance.external_id) + # explicit_permission = KBPermissionModel.objects.filter( + # knowledge_base_id=item['id'], + # user=user, + # status='active' + # ).first() + + # if explicit_permission: + # item['expires_at'] = explicit_permission.expires_at.strftime("%Y-%m-%d %H:%M:%S") if explicit_permission.expires_at else None + # else: + # item['expires_at'] = None + + item['permissions'] = kb_permissions + result_items.append(item) + + if 'name' in item and keyword.lower() in item['name'].lower(): + item['highlighted_name'] = item['name'].replace( + keyword, f'{keyword}' + ) + if 'desc' in item and item.get('desc') is not None: + desc_text = str(item['desc']) + if keyword.lower() in desc_text.lower(): + item['highlighted_desc'] = desc_text.replace( + keyword, f'{keyword}' + ) + + return Response({ + "code": 200, + "message": "搜索知识库成功", + "data": { + "total": total, + "page": page, + "page_size": page_size, + "keyword": keyword, + "items": result_items + } + }) + except Exception as e: + logger.error(f"搜索知识库失败: {str(e)}") + logger.error(traceback.format_exc()) + return Response({ + "code": 500, + "message": f"搜索知识库失败: {str(e)}", + "data": None + }, status=status.HTTP_500_INTERNAL_SERVER_ERROR) + + @action(detail=True, methods=['post']) + def upload_document(self, request, pk=None): + try: + instance = self.get_object() + user = request.user + + # 所有用户都有编辑权限 + logger.info(f"请求内容: {request.data}") + logger.info(f"请求FILES: {request.FILES}") + + files = [] + if 'files' in request.FILES: + files = request.FILES.getlist('files') + elif 'file' in request.FILES: + files = request.FILES.getlist('file') + elif any(key.startswith('files[') for key in request.FILES): + files = [file for key, file in request.FILES.items() if key.startswith('files[')] + elif any(key.startswith('file[') for key in request.FILES): + files = [file for key, file in request.FILES.items() if key.startswith('file[')] + elif len(request.FILES) > 0: + files = list(request.FILES.values()) + + if not files: + return Response({ + "code": 400, + "message": "未找到上传文件,请确保表单字段名为'files'或'file'", + "data": { + "available_fields": list(request.FILES.keys()) + } + }, status=status.HTTP_400_BAD_REQUEST) + + logger.info(f"接收到 {len(files)} 个文件上传请求") + + saved_documents = [] + failed_documents = [] + + if not instance.external_id: + return Response({ + "code": 400, + "message": "知识库没有有效的external_id,请先创建知识库", + "data": None + }, status=status.HTTP_400_BAD_REQUEST) + + try: + verify_url = f'{settings.API_BASE_URL}/api/dataset/{instance.external_id}' + verify_response = requests.get(verify_url) + if verify_response.status_code != 200: + logger.error(f"外部知识库不存在或无法访问: {instance.external_id}, 状态码: {verify_response.status_code}") + return Response({ + "code": 404, + "message": f"外部知识库不存在或无法访问: {instance.external_id}", + "data": None + }, status=status.HTTP_404_NOT_FOUND) + + verify_data = verify_response.json() + if verify_data.get('code') != 200: + logger.error(f"验证外部知识库失败: {verify_data.get('message')}") + return Response({ + "code": verify_data.get('code', 500), + "message": f"验证外部知识库失败: {verify_data.get('message', '未知错误')}", + "data": None + }, status=status.HTTP_500_INTERNAL_SERVER_ERROR) + + logger.info(f"外部知识库验证成功: {instance.external_id}") + except Exception as e: + logger.error(f"验证外部知识库时出错: {str(e)}") + return Response({ + "code": 500, + "message": f"验证外部知识库时出错: {str(e)}", + "data": None + }, status=status.HTTP_500_INTERNAL_SERVER_ERROR) + + # 批量处理所有文件 + split_response = call_split_api_multiple(files) + if not split_response or split_response.get('code') != 200: + error_msg = f"文件分割失败: {split_response.get('message', '未知错误') if split_response else '请求失败'}" + logger.error(error_msg) + return Response({ + "code": 400, + "message": error_msg, + "data": { + "uploaded_count": 0, + "failed_count": len(files), + "total_files": len(files), + "documents": [], + "failed_documents": [{"name": file.name, "error": error_msg} for file in files] + } + }, status=status.HTTP_400_BAD_REQUEST) + + # 处理分割结果 + documents_data = split_response.get('data', []) + if not documents_data: + logger.warning(f"批量分割API未返回文档数据") + return Response({ + "code": 400, + "message": "文件分割未返回有效数据", + "data": { + "uploaded_count": 0, + "failed_count": len(files), + "total_files": len(files), + "documents": [], + "failed_documents": [{"name": file.name, "error": "分割未返回有效数据"} for file in files] + } + }, status=status.HTTP_400_BAD_REQUEST) -# # 同步外部文档到本地数据库 -# for doc in external_documents: -# external_id = doc.get('id') -# doc_name = doc.get('name') + logger.info(f"成功分割出 {len(documents_data)} 个文档,准备上传") + + # 处理每个文档 + for doc in documents_data: + doc_name = doc.get('name', '未命名文档') + doc_content = doc.get('content', []) + + logger.info(f"处理文档: {doc_name}, 包含 {len(doc_content)} 个段落") + + if not doc_content: + doc_content = [{ + 'title': '文档内容', + 'content': '文件内容无法自动分割,请检查文件格式。' + }] + + doc_data = { + "name": doc_name, + "paragraphs": [] + } + + for paragraph in doc_content: + doc_data["paragraphs"].append({ + "content": paragraph.get('content', ''), + "title": paragraph.get('title', ''), + "is_active": True, + "problem_list": [] + }) + + upload_response = call_upload_api(instance.external_id, doc_data) + + if upload_response and upload_response.get('code') == 200 and upload_response.get('data'): + document_id = upload_response['data']['id'] + doc_record = KnowledgeBaseDocument.objects.create( + knowledge_base=instance, + document_id=document_id, + document_name=doc_name, + external_id=document_id, + uploader_name=user.name + ) + + saved_documents.append({ + "id": str(doc_record.id), + "name": doc_record.document_name, + "external_id": doc_record.external_id + }) + + logger.info(f"文档 '{doc_name}' 上传成功,ID: {document_id}") + else: + error_msg = upload_response.get('message', '未知错误') if upload_response else '上传API调用失败' + logger.error(f"文档 '{doc_name}' 上传失败: {error_msg}") + failed_documents.append({ + "name": doc_name, + "error": error_msg + }) + + if saved_documents: + return Response({ + "code": 200, + "message": f"文档上传完成,成功: {len(saved_documents)},失败: {len(failed_documents)}", + "data": { + "uploaded_count": len(saved_documents), + "failed_count": len(failed_documents), + "total_files": len(files), + "documents": saved_documents, + "failed_documents": failed_documents + } + }) + else: + return Response({ + "code": 400, + "message": f"所有文档上传失败", + "data": { + "uploaded_count": 0, + "failed_count": len(failed_documents), + "total_files": len(files), + "documents": [], + "failed_documents": failed_documents + } + }, status=status.HTTP_400_BAD_REQUEST) + + except Exception as e: + logger.error(f"文档上传失败: {str(e)}") + logger.error(traceback.format_exc()) + return Response({ + "code": 500, + "message": f"文档上传失败: {str(e)}", + "data": None + }, status=status.HTTP_500_INTERNAL_SERVER_ERROR) + + @action(detail=True, methods=['get']) + def documents(self, request, pk=None): + """获取知识库的文档列表""" + try: + instance = self.get_object() + user = request.user + + # 所有用户都有查看权限 + + # 检查external_id是否存在 + if not instance.external_id: + return Response({ + "code": 400, + "message": "知识库没有有效的external_id", + "data": None + }, status=status.HTTP_400_BAD_REQUEST) + + # 调用外部API获取文档列表 + try: + external_documents = get_external_document_list(instance.external_id) + + # 同步外部文档到本地数据库 + for doc in external_documents: + external_id = doc.get('id') + doc_name = doc.get('name') -# if external_id and doc_name: -# kb_doc, created = KnowledgeBaseDocument.objects.update_or_create( -# knowledge_base=instance, -# external_id=external_id, -# defaults={ -# 'document_id': external_id, -# 'document_name': doc_name, -# 'status': 'active' if doc.get('is_active', True) else 'deleted' -# } -# ) + if external_id and doc_name: + kb_doc, created = KnowledgeBaseDocument.objects.update_or_create( + knowledge_base=instance, + external_id=external_id, + defaults={ + 'document_id': external_id, + 'document_name': doc_name, + 'status': 'active' if doc.get('is_active', True) else 'deleted' + } + ) -# if created: -# logger.info(f"同步创建文档: {doc_name}, ID: {external_id}") -# else: -# logger.info(f"同步更新文档: {doc_name}, ID: {external_id}") + if created: + logger.info(f"同步创建文档: {doc_name}, ID: {external_id}") + else: + logger.info(f"同步更新文档: {doc_name}, ID: {external_id}") -# # 获取最新的本地文档数据 -# documents = KnowledgeBaseDocument.objects.filter( -# knowledge_base=instance, -# status='active' -# ).order_by('-create_time') + # 获取最新的本地文档数据 + documents = KnowledgeBaseDocument.objects.filter( + knowledge_base=instance, + status='active' + ).order_by('-create_time') -# # 构建响应数据 -# documents_data = [{ -# "id": str(doc.id), -# "document_id": doc.document_id, -# "name": doc.document_name, -# "external_id": doc.external_id, -# "created_at": doc.create_time.strftime('%Y-%m-%d %H:%M:%S'), -# "char_length": next((d.get('char_length', 0) for d in external_documents if d.get('id') == doc.external_id), 0), -# "paragraph_count": next((d.get('paragraph_count', 0) for d in external_documents if d.get('id') == doc.external_id), 0), -# "is_active": next((d.get('is_active', True) for d in external_documents if d.get('id') == doc.external_id), True), -# "uploader_name": doc.uploader_name -# } for doc in documents] + # 构建响应数据 + documents_data = [{ + "id": str(doc.id), + "document_id": doc.document_id, + "name": doc.document_name, + "external_id": doc.external_id, + "created_at": doc.create_time.strftime('%Y-%m-%d %H:%M:%S'), + "char_length": next((d.get('char_length', 0) for d in external_documents if d.get('id') == doc.external_id), 0), + "paragraph_count": next((d.get('paragraph_count', 0) for d in external_documents if d.get('id') == doc.external_id), 0), + "is_active": next((d.get('is_active', True) for d in external_documents if d.get('id') == doc.external_id), True), + "uploader_name": doc.uploader_name + } for doc in documents] -# return Response({ -# "code": 200, -# "message": "获取文档列表成功", -# "data": documents_data -# }) + return Response({ + "code": 200, + "message": "获取文档列表成功", + "data": documents_data + }) -# except ExternalAPIError as e: -# logger.error(f"获取文档列表失败: {str(e)}") -# return Response({ -# "code": 500, -# "message": str(e), -# "data": None -# }, status=status.HTTP_500_INTERNAL_SERVER_ERROR) + except ExternalAPIError as e: + logger.error(f"获取文档列表失败: {str(e)}") + return Response({ + "code": 500, + "message": str(e), + "data": None + }, status=status.HTTP_500_INTERNAL_SERVER_ERROR) -# except Exception as e: -# logger.error(f"获取文档列表失败: {str(e)}") -# logger.error(traceback.format_exc()) -# return Response({ -# "code": 500, -# "message": f"获取文档列表失败: {str(e)}", -# "data": None -# }, status=status.HTTP_500_INTERNAL_SERVER_ERROR) + except Exception as e: + logger.error(f"获取文档列表失败: {str(e)}") + logger.error(traceback.format_exc()) + return Response({ + "code": 500, + "message": f"获取文档列表失败: {str(e)}", + "data": None + }, status=status.HTTP_500_INTERNAL_SERVER_ERROR) -# @action(detail=True, methods=['get']) -# def document_content(self, request, pk=None): -# """获取文档内容 - 段落列表""" -# try: -# knowledge_base = self.get_object() -# user = request.user + @action(detail=True, methods=['get']) + def document_content(self, request, pk=None): + """获取文档内容 - 段落列表""" + try: + knowledge_base = self.get_object() + user = request.user -# # 权限检查 -# if not self.check_knowledge_base_permission(knowledge_base, user, 'read'): -# return Response({ -# "code": 403, -# "message": "没有查看权限", -# "data": None -# }, status=status.HTTP_403_FORBIDDEN) + # 所有用户都有查看权限 -# # 获取文档ID -# document_id = request.query_params.get('document_id') -# if not document_id: -# return Response({ -# "code": 400, -# "message": "缺少document_id参数", -# "data": None -# }, status=status.HTTP_400_BAD_REQUEST) + # 获取文档ID + document_id = request.query_params.get('document_id') + if not document_id: + return Response({ + "code": 400, + "message": "缺少document_id参数", + "data": None + }, status=status.HTTP_400_BAD_REQUEST) -# # 验证文档存在 -# document = KnowledgeBaseDocument.objects.filter( -# knowledge_base=knowledge_base, -# document_id=document_id, -# status='active' -# ).first() + # 验证文档存在 + document = KnowledgeBaseDocument.objects.filter( + knowledge_base=knowledge_base, + document_id=document_id, + status='active' + ).first() -# if not document: -# return Response({ -# "code": 404, -# "message": "文档不存在或已删除", -# "data": None -# }, status=status.HTTP_404_NOT_FOUND) + if not document: + return Response({ + "code": 404, + "message": "文档不存在或已删除", + "data": None + }, status=status.HTTP_404_NOT_FOUND) -# # 调用外部API获取文档段落内容 -# try: -# paragraphs = get_external_document_paragraphs(knowledge_base.external_id, document.external_id) + # 调用外部API获取文档段落内容 + try: + paragraphs = get_external_document_paragraphs(knowledge_base.external_id, document.external_id) -# # 直接返回外部API的段落数据 -# return Response({ -# "code": 200, -# "message": "获取文档内容成功", -# "data": { -# "document_id": document_id, -# "name": document.document_name, -# "paragraphs": paragraphs -# } -# }) + # 直接返回外部API的段落数据 + return Response({ + "code": 200, + "message": "获取文档内容成功", + "data": { + "document_id": document_id, + "name": document.document_name, + "paragraphs": paragraphs + } + }) -# except ExternalAPIError as e: -# logger.error(f"获取文档段落内容失败: {str(e)}") -# return Response({ -# "code": 500, -# "message": str(e), -# "data": None -# }, status=status.HTTP_500_INTERNAL_SERVER_ERROR) + except ExternalAPIError as e: + logger.error(f"获取文档段落内容失败: {str(e)}") + return Response({ + "code": 500, + "message": str(e), + "data": None + }, status=status.HTTP_500_INTERNAL_SERVER_ERROR) -# except Exception as e: -# logger.error(f"获取文档内容失败: {str(e)}") -# logger.error(traceback.format_exc()) -# return Response({ -# "code": 500, -# "message": f"获取文档内容失败: {str(e)}", -# "data": None -# }, status=status.HTTP_500_INTERNAL_SERVER_ERROR) + except Exception as e: + logger.error(f"获取文档内容失败: {str(e)}") + logger.error(traceback.format_exc()) + return Response({ + "code": 500, + "message": f"获取文档内容失败: {str(e)}", + "data": None + }, status=status.HTTP_500_INTERNAL_SERVER_ERROR) -# @action(detail=True, methods=['delete']) -# def delete_document(self, request, pk=None): -# """删除知识库文档""" -# try: -# knowledge_base = self.get_object() -# user = request.user + @action(detail=True, methods=['delete']) + def delete_document(self, request, pk=None): + """删除知识库文档""" + try: + knowledge_base = self.get_object() + user = request.user -# # 权限检查 -# if not self.check_knowledge_base_permission(knowledge_base, user, 'edit'): -# return Response({ -# "code": 403, -# "message": "没有编辑权限", -# "data": None -# }, status=status.HTTP_403_FORBIDDEN) + # 所有用户都有编辑权限 -# # 获取文档ID -# document_id = request.query_params.get('document_id') -# if not document_id: -# return Response({ -# "code": 400, -# "message": "缺少document_id参数", -# "data": None -# }, status=status.HTTP_400_BAD_REQUEST) + # 获取文档ID + document_id = request.query_params.get('document_id') + if not document_id: + return Response({ + "code": 400, + "message": "缺少document_id参数", + "data": None + }, status=status.HTTP_400_BAD_REQUEST) -# # 验证文档存在 -# document = KnowledgeBaseDocument.objects.filter( -# knowledge_base=knowledge_base, -# document_id=document_id, -# status='active' -# ).first() + # 验证文档存在 + document = KnowledgeBaseDocument.objects.filter( + knowledge_base=knowledge_base, + document_id=document_id, + status='active' + ).first() -# if not document: -# return Response({ -# "code": 404, -# "message": "文档不存在或已删除", -# "data": None -# }, status=status.HTTP_404_NOT_FOUND) + if not document: + return Response({ + "code": 404, + "message": "文档不存在或已删除", + "data": None + }, status=status.HTTP_404_NOT_FOUND) -# # 调用外部API删除文档 -# external_id = document.external_id -# delete_result = call_delete_document_api(knowledge_base.external_id, external_id) + # 调用外部API删除文档 + external_id = document.external_id + delete_result = call_delete_document_api(knowledge_base.external_id, external_id) -# # 无论外部API结果如何,都更新本地状态 -# document.status = 'deleted' -# document.save() + # 无论外部API结果如何,都更新本地状态 + document.status = 'deleted' + document.save() -# # 检查外部API结果 -# if delete_result.get('code') != 200: -# logger.warning(f"外部API删除文档失败,但本地标记已更新: {delete_result.get('message')}") -# return Response({ -# "code": 200, -# "message": "文档在系统中已标记为删除,但外部API调用失败", -# "data": { -# "document_id": document_id, -# "name": document.document_name, -# "error": delete_result.get('message') -# } -# }) + # 检查外部API结果 + if delete_result.get('code') != 200: + logger.warning(f"外部API删除文档失败,但本地标记已更新: {delete_result.get('message')}") + return Response({ + "code": 200, + "message": "文档在系统中已标记为删除,但外部API调用失败", + "data": { + "document_id": document_id, + "name": document.document_name, + "error": delete_result.get('message') + } + }) -# return Response({ -# "code": 200, -# "message": "文档删除成功", -# "data": { -# "document_id": document_id, -# "name": document.document_name -# } -# }) + return Response({ + "code": 200, + "message": "文档删除成功", + "data": { + "document_id": document_id, + "name": document.document_name + } + }) -# except Exception as e: -# logger.error(f"删除文档失败: {str(e)}") -# logger.error(traceback.format_exc()) -# return Response({ -# "code": 500, -# "message": f"删除文档失败: {str(e)}", -# "data": None -# }, status=status.HTTP_500_INTERNAL_SERVER_ERROR) + except Exception as e: + logger.error(f"删除文档失败: {str(e)}") + logger.error(traceback.format_exc()) + return Response({ + "code": 500, + "message": f"删除文档失败: {str(e)}", + "data": None + }, status=status.HTTP_500_INTERNAL_SERVER_ERROR) \ No newline at end of file diff --git a/daren/urls.py b/daren/urls.py index 8b8b9f8..aedced5 100644 --- a/daren/urls.py +++ b/daren/urls.py @@ -20,6 +20,7 @@ urlpatterns = [ path('api/chat-history/', include('apps.chat.urls')), path('api/gmail/', include('apps.gmail.urls')), path('api/feishu/', include('apps.feishu.urls')), + path('api/knowledge-bases/', include('apps.knowledge_base.urls')), ] # 在开发环境中提供媒体文件服务