2025-03-05 17:59:31 +08:00
|
|
|
|
from django.http import JsonResponse
|
|
|
|
|
from .tasks import monitor_process, get_process_gpu_usage
|
|
|
|
|
import threading
|
|
|
|
|
import psutil
|
|
|
|
|
from .models import HighCPUProcess, HighGPUProcess, HighMemoryProcess, AllResourceProcess, TiktokUserVideos
|
|
|
|
|
import logging
|
|
|
|
|
import os
|
|
|
|
|
from datetime import datetime
|
|
|
|
|
import time
|
|
|
|
|
import nvidia_smi
|
|
|
|
|
from django.utils import timezone
|
|
|
|
|
from django.views.decorators.http import require_http_methods
|
|
|
|
|
from django.views.decorators.csrf import csrf_exempt
|
|
|
|
|
from django.shortcuts import render
|
|
|
|
|
import pytz
|
|
|
|
|
from pathlib import Path # 使用 pathlib 处理跨平台路径
|
|
|
|
|
import json
|
|
|
|
|
from django.conf import settings
|
|
|
|
|
import requests
|
|
|
|
|
import threading
|
2025-03-12 15:24:31 +08:00
|
|
|
|
from collections import deque
|
|
|
|
|
from celery import shared_task
|
|
|
|
|
import uuid
|
|
|
|
|
from functools import wraps
|
|
|
|
|
from datetime import timedelta
|
|
|
|
|
import redis
|
|
|
|
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
|
|
|
from yt_dlp import YoutubeDL
|
2025-03-05 17:59:31 +08:00
|
|
|
|
|
|
|
|
|
directory_monitoring = {}
|
|
|
|
|
|
|
|
|
|
# 全局变量来控制检测线程
|
|
|
|
|
monitor_thread = None
|
|
|
|
|
is_monitoring = False
|
|
|
|
|
|
|
|
|
|
# 在文件开头定义日志目录
|
|
|
|
|
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
|
LOG_DIR = os.path.join(BASE_DIR, 'logs', 'process_monitor')
|
|
|
|
|
|
|
|
|
|
# 创建保存视频的基本路径
|
|
|
|
|
TIKTOK_VIDEOS_PATH = os.path.join(BASE_DIR, 'media', 'tiktok_videos')
|
|
|
|
|
|
|
|
|
|
# 确保基本目录存在
|
|
|
|
|
os.makedirs(TIKTOK_VIDEOS_PATH, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
# 确保基础目录结构存在,添加 'all' 目录
|
|
|
|
|
for resource_type in ['cpu', 'memory', 'gpu', 'all']:
|
|
|
|
|
os.makedirs(os.path.join(LOG_DIR, resource_type), exist_ok=True)
|
|
|
|
|
|
|
|
|
|
# 获取应用专属的logger
|
|
|
|
|
logger = logging.getLogger('monitor')
|
|
|
|
|
|
|
|
|
|
# 全局变量来跟踪监控的目录
|
|
|
|
|
monitored_directories = set()
|
|
|
|
|
|
|
|
|
|
# 在文件顶部添加 API 基础 URL
|
|
|
|
|
API_BASE_URL = "http://81.69.223.133:45268"
|
|
|
|
|
|
2025-03-12 15:24:31 +08:00
|
|
|
|
# 创建Redis连接
|
|
|
|
|
redis_client = redis.Redis.from_url(settings.CELERY_BROKER_URL)
|
|
|
|
|
|
|
|
|
|
# 替代原来的TASK_STATUS字典方法
|
|
|
|
|
def set_task_status(task_id, status_data):
|
|
|
|
|
"""在Redis中存储任务状态"""
|
|
|
|
|
redis_client.set(f"task_status:{task_id}", json.dumps(status_data))
|
|
|
|
|
|
|
|
|
|
def get_task_status(task_id):
|
|
|
|
|
"""从Redis获取任务状态"""
|
|
|
|
|
data = redis_client.get(f"task_status:{task_id}")
|
|
|
|
|
if data:
|
|
|
|
|
return json.loads(data)
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
def update_task_status(task_id, updates):
|
|
|
|
|
"""更新任务状态的部分字段"""
|
|
|
|
|
status = get_task_status(task_id)
|
|
|
|
|
if status:
|
|
|
|
|
status.update(updates)
|
|
|
|
|
set_task_status(task_id, status)
|
|
|
|
|
return True
|
|
|
|
|
return False
|
|
|
|
|
|
2025-03-05 17:59:31 +08:00
|
|
|
|
# 添加新的监控线程函数
|
|
|
|
|
def monitor_directory(directory):
|
|
|
|
|
"""持续监控目录的后台任务"""
|
|
|
|
|
monitor_interval = settings.MONITOR_INTERVAL # 监控间隔(5秒)
|
|
|
|
|
log_interval = settings.LOG_INTERVAL # 日志写入间隔(60秒)
|
|
|
|
|
processed_pids = set() # 用于跟踪已处理的PID
|
|
|
|
|
last_log_time = {} # 每个进程的最后日志时间
|
|
|
|
|
|
|
|
|
|
while directory_monitoring.get(directory, False):
|
|
|
|
|
try:
|
|
|
|
|
next_update = timezone.now() + timezone.timedelta(seconds=monitor_interval)
|
|
|
|
|
|
|
|
|
|
processes = find_python_processes_in_dir(directory)
|
|
|
|
|
for proc in processes:
|
|
|
|
|
pid = proc['pid']
|
|
|
|
|
try:
|
|
|
|
|
# 设置日志文件
|
|
|
|
|
log_file = setup_process_logger(pid, 'all')
|
|
|
|
|
if log_file:
|
|
|
|
|
process = psutil.Process(pid)
|
|
|
|
|
data = get_process_data(process)
|
|
|
|
|
|
|
|
|
|
# 更新数据库
|
|
|
|
|
AllResourceProcess.objects.update_or_create(
|
|
|
|
|
pid=pid,
|
|
|
|
|
defaults={
|
|
|
|
|
'process_name': process.name(),
|
|
|
|
|
'cpu_usage': float(data['cpu']['usage'].replace('%', '')),
|
|
|
|
|
'cpu_user_time': float(data['cpu']['user_time'].replace('s', '')),
|
|
|
|
|
'cpu_system_time': float(data['cpu']['system_time'].replace('s', '')),
|
|
|
|
|
'memory_usage': float(data['memory']['physical'].split('MB')[0].strip()),
|
|
|
|
|
'memory_percent': float(data['memory']['physical'].split('(')[1].split('%')[0].strip()),
|
|
|
|
|
'virtual_memory': float(data['memory']['virtual'].split('MB')[0].strip()),
|
|
|
|
|
'gpu_usage': float(data['gpu']['usage'].replace('%', '')),
|
|
|
|
|
'gpu_memory': float(data['gpu']['memory'].replace('MB', '')),
|
|
|
|
|
'net_io_sent': float(data['io']['write'].split('MB')[0].strip()),
|
|
|
|
|
'net_io_recv': float(data['io']['read'].split('MB')[0].strip()),
|
|
|
|
|
'is_active': True,
|
|
|
|
|
'status': 1,
|
|
|
|
|
'log_file': log_file
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
now = timezone.now()
|
|
|
|
|
# 如果是新检测到的进程,立即写入日志
|
|
|
|
|
if pid not in processed_pids:
|
|
|
|
|
print(f"首次检测到进程,立即写入日志: PID={pid}") # 调试日志
|
|
|
|
|
log_process_metrics(pid, data, log_file)
|
|
|
|
|
processed_pids.add(pid)
|
|
|
|
|
last_log_time[pid] = now
|
|
|
|
|
# 对于已知进程,每隔一分钟写入一次
|
|
|
|
|
elif (now - last_log_time.get(pid, now)).total_seconds() >= log_interval:
|
|
|
|
|
print(f"定期写入日志: PID={pid}") # 调试日志
|
|
|
|
|
log_process_metrics(pid, data, log_file)
|
|
|
|
|
last_log_time[pid] = now
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"监控进程 {pid} 失败: {str(e)}")
|
|
|
|
|
|
|
|
|
|
# 计算需要等待的时间
|
|
|
|
|
now = timezone.now()
|
|
|
|
|
if next_update > now:
|
|
|
|
|
time.sleep((next_update - now).total_seconds())
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"目录监控出错: {str(e)}")
|
|
|
|
|
time.sleep(5)
|
|
|
|
|
|
|
|
|
|
def get_python_processes(directory):
|
|
|
|
|
"""获取指定目录下的Python进程"""
|
|
|
|
|
directory = str(Path(directory).resolve()).lower()
|
|
|
|
|
processes = []
|
|
|
|
|
|
|
|
|
|
for proc in psutil.process_iter(['pid', 'name', 'cmdline']):
|
|
|
|
|
try:
|
|
|
|
|
if 'python' in proc.info['name'].lower():
|
|
|
|
|
process = psutil.Process(proc.info['pid'])
|
|
|
|
|
try:
|
|
|
|
|
proc_cwd = str(Path(process.cwd()).resolve()).lower()
|
|
|
|
|
if directory in proc_cwd or proc_cwd.startswith(directory):
|
|
|
|
|
processes.append(process)
|
|
|
|
|
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
|
|
|
|
continue
|
|
|
|
|
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
return processes
|
|
|
|
|
|
|
|
|
|
def get_process_info(process):
|
|
|
|
|
"""获取进程详细信息"""
|
|
|
|
|
try:
|
|
|
|
|
with process.oneshot():
|
|
|
|
|
info = {
|
|
|
|
|
'pid': process.pid,
|
|
|
|
|
'name': process.name(),
|
|
|
|
|
'cpu_usage': process.cpu_percent(),
|
|
|
|
|
'memory_usage': process.memory_info().rss / (1024 * 1024), # MB
|
|
|
|
|
'command_line': ' '.join(process.cmdline()),
|
|
|
|
|
'create_time': process.create_time(),
|
|
|
|
|
'status': process.status(),
|
|
|
|
|
'threads': process.num_threads(),
|
|
|
|
|
'working_directory': process.cwd(),
|
|
|
|
|
'gpu_info': {
|
|
|
|
|
'usage': 0,
|
|
|
|
|
'memory': 0
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# 获取IO信息
|
|
|
|
|
try:
|
|
|
|
|
io = process.io_counters()
|
|
|
|
|
info['io'] = {
|
|
|
|
|
'read_bytes': io.read_bytes / (1024 * 1024), # MB
|
|
|
|
|
'write_bytes': io.write_bytes / (1024 * 1024), # MB
|
|
|
|
|
'read_count': io.read_count,
|
|
|
|
|
'write_count': io.write_count
|
|
|
|
|
}
|
|
|
|
|
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
|
|
|
|
info['io'] = {
|
|
|
|
|
'read_bytes': 0,
|
|
|
|
|
'write_bytes': 0,
|
|
|
|
|
'read_count': 0,
|
|
|
|
|
'write_count': 0
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return info
|
|
|
|
|
except (psutil.NoSuchProcess, psutil.AccessDenied) as e:
|
|
|
|
|
logger.error(f"获取进程 {process.pid} 信息失败: {str(e)}")
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
@csrf_exempt
|
|
|
|
|
@require_http_methods(["POST"])
|
|
|
|
|
def scan_directory(request):
|
|
|
|
|
"""扫描目录下的 Python 进程"""
|
|
|
|
|
try:
|
|
|
|
|
data = json.loads(request.body)
|
|
|
|
|
directory = data.get('directory')
|
|
|
|
|
|
|
|
|
|
if not directory:
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'error',
|
|
|
|
|
'message': '请提供目录路径'
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
# 添加到监控目录集合
|
|
|
|
|
directory = str(Path(directory).resolve())
|
|
|
|
|
monitored_directories.add(directory)
|
|
|
|
|
logger.info(f"开始监控目录: {directory}")
|
|
|
|
|
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'success',
|
|
|
|
|
'message': f'开始监控目录: {directory}',
|
|
|
|
|
'directory': directory
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"启动目录监控失败: {str(e)}")
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'error',
|
|
|
|
|
'message': str(e)
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
@require_http_methods(["GET"])
|
|
|
|
|
def get_directory_status(request):
|
|
|
|
|
"""获取目录下的进程状态"""
|
|
|
|
|
try:
|
|
|
|
|
directory = request.GET.get('directory')
|
|
|
|
|
if not directory:
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'error',
|
|
|
|
|
'message': '未提供目录路径'
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
# 获取目录下的Python进程
|
|
|
|
|
processes = get_python_processes(directory)
|
|
|
|
|
|
|
|
|
|
# 获取每个进程的详细信息
|
|
|
|
|
process_info = []
|
|
|
|
|
for proc in processes:
|
|
|
|
|
info = get_process_info(proc)
|
|
|
|
|
if info:
|
|
|
|
|
process_info.append(info)
|
|
|
|
|
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'success',
|
|
|
|
|
'processes': process_info,
|
|
|
|
|
'timestamp': timezone.now().strftime('%Y-%m-%d %H:%M:%S')
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"获取目录状态失败: {str(e)}")
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'error',
|
|
|
|
|
'message': str(e)
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
@require_http_methods(["POST"])
|
|
|
|
|
def stop_directory_monitor(request):
|
|
|
|
|
"""停止目录监控"""
|
|
|
|
|
try:
|
|
|
|
|
data = json.loads(request.body)
|
|
|
|
|
directory = data.get('directory')
|
|
|
|
|
|
|
|
|
|
if not directory:
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'error',
|
|
|
|
|
'message': '未提供目录路径'
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
# 从监控集合中移除
|
|
|
|
|
directory = str(Path(directory).resolve())
|
|
|
|
|
if directory in monitored_directories:
|
|
|
|
|
monitored_directories.remove(directory)
|
|
|
|
|
logger.info(f"停止监控目录: {directory}")
|
|
|
|
|
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'success',
|
|
|
|
|
'message': '监控已停止'
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"停止监控失败: {str(e)}")
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'error',
|
|
|
|
|
'message': str(e)
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
def setup_process_logger(pid, monitor_type):
|
|
|
|
|
"""为进程设置独立的日志记录器"""
|
|
|
|
|
try:
|
|
|
|
|
# 验证监控类型
|
|
|
|
|
if monitor_type not in ['cpu', 'memory', 'gpu', 'all']:
|
|
|
|
|
print(f"警告:无效的监控类型 {monitor_type}")
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
# 获取当前时间(使用本地时区)
|
|
|
|
|
local_tz = pytz.timezone('Asia/Shanghai')
|
|
|
|
|
current_time = timezone.localtime(timezone.now(), local_tz)
|
|
|
|
|
current_date = current_time.strftime("%Y%m%d")
|
|
|
|
|
|
|
|
|
|
# 如果是从目录扫描启动的监控,使用 'all' 类型
|
|
|
|
|
if monitor_type == 'scan':
|
|
|
|
|
monitor_type = 'all'
|
|
|
|
|
|
|
|
|
|
# 构建日志文件路径
|
|
|
|
|
log_dir = os.path.join(LOG_DIR, monitor_type)
|
|
|
|
|
log_file = os.path.join(log_dir, f'{pid}_{current_date}_{monitor_type}.log')
|
|
|
|
|
|
|
|
|
|
# 确保目录存在
|
|
|
|
|
os.makedirs(os.path.dirname(log_file), exist_ok=True)
|
|
|
|
|
|
|
|
|
|
print(f"设置日志文件: {log_file}") # 调试信息
|
|
|
|
|
return log_file
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print(f"设置日志文件失败: {str(e)}")
|
|
|
|
|
raise
|
|
|
|
|
|
|
|
|
|
def log_process_metrics(pid, data, log_file):
|
|
|
|
|
"""记录进程指标到日志文件"""
|
|
|
|
|
try:
|
|
|
|
|
# 确保目录存在
|
|
|
|
|
os.makedirs(os.path.dirname(log_file), exist_ok=True)
|
|
|
|
|
|
|
|
|
|
# 获取当前时间(使用本地时区)
|
|
|
|
|
local_tz = pytz.timezone('Asia/Shanghai') # 使用中国时区
|
|
|
|
|
current_time = timezone.localtime(timezone.now(), local_tz)
|
|
|
|
|
timestamp = current_time.strftime('%Y-%m-%d %H:%M:%S')
|
|
|
|
|
|
|
|
|
|
# 格式化日志内容
|
|
|
|
|
log_content = f"=== {timestamp} ===\n"
|
|
|
|
|
|
|
|
|
|
# CPU信息
|
|
|
|
|
log_content += "CPU信息:\n"
|
|
|
|
|
log_content += f"├─ 使用率: {data['cpu']['usage']}\n"
|
|
|
|
|
log_content += f"├─ 用户态时间: {data['cpu']['user_time']}\n"
|
|
|
|
|
log_content += f"├─ 内核态时间: {data['cpu']['system_time']}\n"
|
|
|
|
|
log_content += f"├─ CPU核心数: {data['cpu']['cores']}\n"
|
|
|
|
|
log_content += f"├─ CPU频率: {data['cpu']['frequency']}\n"
|
|
|
|
|
log_content += f"└─ 上下文切换: {data['cpu']['context_switches']}\n"
|
|
|
|
|
|
|
|
|
|
# 内存信息
|
|
|
|
|
log_content += "内存信息:\n"
|
|
|
|
|
log_content += f"├─ 物理内存: {data['memory']['physical']}\n"
|
|
|
|
|
log_content += f"├─ 虚拟内存: {data['memory']['virtual']}\n"
|
|
|
|
|
log_content += f"├─ 内存映射: {data['memory']['mappings']}\n"
|
|
|
|
|
log_content += f"├─ 系统内存使用: {data['memory']['system_usage']}\n"
|
|
|
|
|
log_content += f"└─ 交换空间使用: {data['memory']['swap_usage']}\n"
|
|
|
|
|
|
|
|
|
|
# GPU信息
|
|
|
|
|
log_content += "GPU信息:\n"
|
|
|
|
|
log_content += f"├─ 使用率: {data['gpu']['usage']}\n"
|
|
|
|
|
log_content += f"└─ 显存使用: {data['gpu']['memory']}\n"
|
|
|
|
|
|
|
|
|
|
# IO信息
|
|
|
|
|
log_content += "IO信息:\n"
|
|
|
|
|
log_content += f"├─ 读取: {data['io']['read']}\n"
|
|
|
|
|
log_content += f"├─ 写入: {data['io']['write']}\n"
|
|
|
|
|
log_content += f"└─ 其他IO: {data['io']['other']}\n"
|
|
|
|
|
|
|
|
|
|
# 进程状态
|
|
|
|
|
log_content += f"进程状态: {data['status']}\n"
|
|
|
|
|
log_content += "-" * 50 + "\n"
|
|
|
|
|
|
|
|
|
|
# 写入日志文件
|
|
|
|
|
with open(log_file, 'a', encoding='utf-8') as f:
|
|
|
|
|
f.write(log_content)
|
|
|
|
|
|
|
|
|
|
print(f"已写入日志到: {log_file}") # 调试信息
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print(f"写入日志失败: {str(e)}")
|
|
|
|
|
raise
|
|
|
|
|
|
|
|
|
|
def get_process_by_name(process_name):
|
|
|
|
|
"""根据进程名称获取进程PID"""
|
|
|
|
|
pids = []
|
|
|
|
|
for proc in psutil.process_iter(['pid', 'name']):
|
|
|
|
|
try:
|
|
|
|
|
if process_name.lower() in proc.info['name'].lower():
|
|
|
|
|
pids.append(proc.info['pid'])
|
|
|
|
|
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
|
|
|
|
|
pass
|
|
|
|
|
return pids
|
|
|
|
|
|
|
|
|
|
def get_process_gpu_usage(pid):
|
|
|
|
|
"""获取进程的GPU使用情况"""
|
|
|
|
|
try:
|
|
|
|
|
# 如果没有GPU或无法获取GPU信息,返回默认值
|
|
|
|
|
return 0.0, 0.0
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"获取GPU信息失败: {str(e)}")
|
|
|
|
|
return 0.0, 0.0
|
|
|
|
|
|
|
|
|
|
def get_high_resource_processes():
|
|
|
|
|
"""获取高资源占用的进程"""
|
|
|
|
|
high_resource_pids = []
|
|
|
|
|
for proc in psutil.process_iter(['pid', 'name', 'cpu_percent', 'memory_percent']):
|
|
|
|
|
try:
|
|
|
|
|
if proc.info['cpu_percent'] > 50 or proc.info['memory_percent'] > 50:
|
|
|
|
|
high_resource_pids.append({
|
|
|
|
|
'pid': proc.info['pid'],
|
|
|
|
|
'name': proc.info['name'],
|
|
|
|
|
'cpu_percent': proc.info['cpu_percent'],
|
|
|
|
|
'memory_percent': proc.info['memory_percent']
|
|
|
|
|
})
|
|
|
|
|
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
|
|
|
|
|
continue
|
|
|
|
|
return high_resource_pids
|
|
|
|
|
|
|
|
|
|
def auto_detect_high_resource_processes(request):
|
|
|
|
|
"""自动检测高资源进程的API端点"""
|
|
|
|
|
try:
|
|
|
|
|
# 获取高资源进程
|
|
|
|
|
high_resource_procs = get_high_resource_processes()
|
|
|
|
|
|
|
|
|
|
# 启动监控
|
|
|
|
|
for proc in high_resource_procs:
|
|
|
|
|
try:
|
|
|
|
|
process = psutil.Process(proc['pid'])
|
|
|
|
|
# 设置日志文件
|
|
|
|
|
log_file = setup_process_logger(proc['pid'], 'auto')
|
|
|
|
|
# 记录进程数据
|
|
|
|
|
data = get_process_data(process)
|
|
|
|
|
log_process_metrics(proc['pid'], data, log_file)
|
|
|
|
|
except psutil.NoSuchProcess:
|
|
|
|
|
continue
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"监控进程 {proc['pid']} 时出错: {str(e)}")
|
|
|
|
|
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'success',
|
|
|
|
|
'message': '已开始自动检测高资源进程',
|
|
|
|
|
'processes': high_resource_procs
|
|
|
|
|
}, json_dumps_params={'ensure_ascii': False})
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"自动检测失败: {str(e)}")
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'error',
|
|
|
|
|
'message': f'自动检测失败: {str(e)}'
|
|
|
|
|
}, json_dumps_params={'ensure_ascii': False})
|
|
|
|
|
|
|
|
|
|
def setup_logger(pid):
|
|
|
|
|
"""为每个进程设置独立的日志记录器"""
|
|
|
|
|
log_file = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'logs', 'process_monitor', f'process_{pid}_{datetime.now().strftime("%Y%m%d")}.log')
|
|
|
|
|
process_logger = logging.getLogger(f'monitor.process.process_{pid}')
|
|
|
|
|
process_logger.setLevel(logging.INFO)
|
|
|
|
|
|
|
|
|
|
handler = logging.FileHandler(log_file)
|
|
|
|
|
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
|
|
|
|
|
handler.setFormatter(formatter)
|
|
|
|
|
process_logger.addHandler(handler)
|
|
|
|
|
|
|
|
|
|
return process_logger, log_file
|
|
|
|
|
|
|
|
|
|
def continuous_monitor():
|
|
|
|
|
"""持续监控高资源进程的线程函数"""
|
|
|
|
|
global is_monitoring
|
|
|
|
|
logger = logging.getLogger('monitor')
|
|
|
|
|
interval = 60 # 设置为60秒间隔
|
|
|
|
|
|
|
|
|
|
while is_monitoring:
|
|
|
|
|
try:
|
|
|
|
|
next_update = timezone.now() + timezone.timedelta(seconds=interval)
|
|
|
|
|
|
|
|
|
|
# 获取所有活跃的监控记录
|
|
|
|
|
monitors = {
|
|
|
|
|
'cpu': HighCPUProcess.objects.filter(is_active=True),
|
|
|
|
|
'memory': HighMemoryProcess.objects.filter(is_active=True),
|
|
|
|
|
'gpu': HighGPUProcess.objects.filter(is_active=True)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for monitor_type, processes in monitors.items():
|
|
|
|
|
for proc in processes:
|
|
|
|
|
try:
|
|
|
|
|
process = psutil.Process(proc.pid)
|
|
|
|
|
data = get_process_data(process)
|
|
|
|
|
log_file = setup_process_logger(proc.pid, monitor_type)
|
|
|
|
|
log_process_metrics(proc.pid, data, log_file)
|
|
|
|
|
|
|
|
|
|
except psutil.NoSuchProcess:
|
|
|
|
|
proc.is_active = False
|
|
|
|
|
proc.save()
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"监控进程 {proc.pid} 时出错: {str(e)}")
|
|
|
|
|
|
|
|
|
|
# 计算需要等待的时间
|
|
|
|
|
now = timezone.now()
|
|
|
|
|
if next_update > now:
|
|
|
|
|
time.sleep((next_update - now).total_seconds())
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"持续监控出错: {str(e)}")
|
|
|
|
|
time.sleep(5)
|
|
|
|
|
|
|
|
|
|
def get_process_data(process):
|
|
|
|
|
"""获取进程的详细数据"""
|
|
|
|
|
with process.oneshot():
|
|
|
|
|
cpu_times = process.cpu_times()
|
|
|
|
|
cpu_freq = psutil.cpu_freq() or psutil._common.scpufreq(current=0, min=0, max=0)
|
|
|
|
|
cpu_ctx = process.num_ctx_switches()
|
|
|
|
|
mem = process.memory_info()
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
mem_maps = len(process.memory_maps())
|
|
|
|
|
except (psutil.AccessDenied, psutil.TimeoutExpired):
|
|
|
|
|
mem_maps = 0
|
|
|
|
|
|
|
|
|
|
sys_mem = psutil.virtual_memory()
|
|
|
|
|
swap = psutil.swap_memory()
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
io = process.io_counters()
|
|
|
|
|
except (psutil.AccessDenied, psutil.TimeoutExpired):
|
|
|
|
|
io = psutil._common.pio(read_count=0, write_count=0, read_bytes=0, write_bytes=0,
|
|
|
|
|
read_chars=0, write_chars=0, other_count=0, other_bytes=0)
|
|
|
|
|
|
|
|
|
|
gpu_usage, gpu_memory = get_process_gpu_usage(process.pid)
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
'timestamp': timezone.now().strftime('%Y-%m-%d %H:%M:%S'),
|
|
|
|
|
'cpu': {
|
|
|
|
|
'usage': f"{process.cpu_percent()}%",
|
|
|
|
|
'user_time': f"{cpu_times.user:.1f}s",
|
|
|
|
|
'system_time': f"{cpu_times.system:.1f}s",
|
|
|
|
|
'cores': str(psutil.cpu_count()),
|
|
|
|
|
'frequency': f"{getattr(cpu_freq, 'current', 0):.1f}MHz",
|
|
|
|
|
'context_switches': f"{cpu_ctx.voluntary}/{cpu_ctx.involuntary}"
|
|
|
|
|
},
|
|
|
|
|
'memory': {
|
|
|
|
|
'physical': f"{mem.rss/1024/1024:.1f}MB ({process.memory_percent():.1f}%)",
|
|
|
|
|
'virtual': f"{mem.vms/1024/1024:.1f}MB",
|
|
|
|
|
'mappings': f"{mem_maps}个",
|
|
|
|
|
'system_usage': f"{sys_mem.percent:.1f}%",
|
|
|
|
|
'swap_usage': f"{swap.percent:.1f}%"
|
|
|
|
|
},
|
|
|
|
|
'gpu': {
|
|
|
|
|
'usage': f"{gpu_usage:.1f}%",
|
|
|
|
|
'memory': f"{gpu_memory:.1f}MB"
|
|
|
|
|
},
|
|
|
|
|
'io': {
|
|
|
|
|
'read': f"{getattr(io, 'read_bytes', 0)/1024/1024:.1f}MB ({getattr(io, 'read_count', 0)}次)",
|
|
|
|
|
'write': f"{getattr(io, 'write_bytes', 0)/1024/1024:.1f}MB ({getattr(io, 'write_count', 0)}次)",
|
|
|
|
|
'other': f"{getattr(io, 'other_count', 0)}次"
|
|
|
|
|
},
|
|
|
|
|
'status': process.status()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@csrf_exempt
|
|
|
|
|
@require_http_methods(["GET", "POST"])
|
|
|
|
|
def start_monitor(request):
|
|
|
|
|
"""开始监控进程"""
|
|
|
|
|
global monitor_thread, is_monitoring
|
|
|
|
|
try:
|
|
|
|
|
pid = request.GET.get('pid')
|
|
|
|
|
monitor_type = request.GET.get('type')
|
|
|
|
|
|
|
|
|
|
if not pid:
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'error',
|
|
|
|
|
'message': '请输入进程ID'
|
|
|
|
|
}, json_dumps_params={'ensure_ascii': False})
|
|
|
|
|
|
|
|
|
|
pid = int(pid)
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
process = psutil.Process(pid)
|
|
|
|
|
process_name = process.name()
|
|
|
|
|
|
|
|
|
|
# 启动监控线程
|
|
|
|
|
if not is_monitoring:
|
|
|
|
|
is_monitoring = True
|
|
|
|
|
monitor_thread = threading.Thread(target=continuous_monitor)
|
|
|
|
|
monitor_thread.daemon = True
|
|
|
|
|
monitor_thread.start()
|
|
|
|
|
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'success',
|
|
|
|
|
'message': f'开始监控进程 {pid} ({process_name})'
|
|
|
|
|
}, json_dumps_params={'ensure_ascii': False})
|
|
|
|
|
|
|
|
|
|
except psutil.NoSuchProcess:
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'error',
|
|
|
|
|
'message': f'进程 {pid} 不存在'
|
|
|
|
|
}, json_dumps_params={'ensure_ascii': False})
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"启动监控失败: {str(e)}")
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'error',
|
|
|
|
|
'message': f'启动监控失败: {str(e)}'
|
|
|
|
|
}, json_dumps_params={'ensure_ascii': False})
|
|
|
|
|
|
|
|
|
|
@csrf_exempt
|
|
|
|
|
@require_http_methods(["GET", "POST"])
|
|
|
|
|
def stop_monitor(request):
|
|
|
|
|
"""停止监控进程"""
|
|
|
|
|
global is_monitoring
|
|
|
|
|
try:
|
|
|
|
|
pid = request.GET.get('pid')
|
|
|
|
|
|
|
|
|
|
if not pid:
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'error',
|
|
|
|
|
'message': '请输入进程ID'
|
|
|
|
|
}, json_dumps_params={'ensure_ascii': False})
|
|
|
|
|
|
|
|
|
|
pid = int(pid)
|
|
|
|
|
|
|
|
|
|
# 停止监控线程
|
|
|
|
|
is_monitoring = False
|
|
|
|
|
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'success',
|
|
|
|
|
'message': f'停止监控进程 {pid}'
|
|
|
|
|
}, json_dumps_params={'ensure_ascii': False})
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"停止监控失败: {str(e)}")
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'error',
|
|
|
|
|
'message': f'停止监控失败: {str(e)}'
|
|
|
|
|
}, json_dumps_params={'ensure_ascii': False})
|
|
|
|
|
|
|
|
|
|
@require_http_methods(["GET"])
|
|
|
|
|
def get_process_status(request, pid):
|
|
|
|
|
"""获取进程监控状态的API"""
|
|
|
|
|
try:
|
|
|
|
|
monitor_type = request.GET.get('type')
|
|
|
|
|
logger.info(f"获取进程状态: pid={pid}, type={monitor_type}")
|
|
|
|
|
|
|
|
|
|
# 先设置日志文件路径
|
|
|
|
|
log_file = setup_process_logger(pid, monitor_type)
|
|
|
|
|
if not log_file:
|
|
|
|
|
raise ValueError(f"无法创建日志文件,监控类型: {monitor_type}")
|
|
|
|
|
|
|
|
|
|
process = psutil.Process(pid)
|
|
|
|
|
data = get_process_data(process)
|
|
|
|
|
|
|
|
|
|
# 同步数据到数据库
|
|
|
|
|
try:
|
|
|
|
|
# 更新 all 资源记录
|
|
|
|
|
AllResourceProcess.objects.update_or_create(
|
|
|
|
|
pid=pid,
|
|
|
|
|
defaults={
|
|
|
|
|
'process_name': process.name(),
|
|
|
|
|
'cpu_usage': float(data['cpu']['usage'].replace('%', '')),
|
|
|
|
|
'cpu_user_time': float(data['cpu']['user_time'].replace('s', '')),
|
|
|
|
|
'cpu_system_time': float(data['cpu']['system_time'].replace('s', '')),
|
|
|
|
|
'memory_usage': float(data['memory']['physical'].split('MB')[0].strip()),
|
|
|
|
|
'memory_percent': float(data['memory']['physical'].split('(')[1].split('%')[0].strip()),
|
|
|
|
|
'virtual_memory': float(data['memory']['virtual'].split('MB')[0].strip()),
|
|
|
|
|
'gpu_usage': float(data['gpu']['usage'].replace('%', '')),
|
|
|
|
|
'gpu_memory': float(data['gpu']['memory'].replace('MB', '')),
|
|
|
|
|
'net_io_sent': float(data['io']['write'].split('MB')[0].strip()),
|
|
|
|
|
'net_io_recv': float(data['io']['read'].split('MB')[0].strip()),
|
|
|
|
|
'is_active': True,
|
|
|
|
|
'status': 1,
|
|
|
|
|
'log_file': log_file
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# 根据监控类型选择对应的模型
|
|
|
|
|
if monitor_type == 'cpu':
|
|
|
|
|
# 从字符串中提取CPU使用率数值
|
|
|
|
|
cpu_usage = float(data['cpu']['usage'].replace('%', ''))
|
|
|
|
|
logger.info(f"CPU使用率: {cpu_usage}%") # 调试日志
|
|
|
|
|
|
|
|
|
|
process_obj, created = HighCPUProcess.objects.update_or_create(
|
|
|
|
|
pid=pid,
|
|
|
|
|
defaults={
|
|
|
|
|
'process_name': process.name(),
|
|
|
|
|
'cpu_usage': cpu_usage,
|
|
|
|
|
'is_active': True,
|
|
|
|
|
'status': 1,
|
|
|
|
|
'log_file': log_file
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
logger.info(f"{'创建' if created else '更新'}CPU进程记录: {process_obj}")
|
|
|
|
|
|
|
|
|
|
elif monitor_type == 'memory':
|
|
|
|
|
# 从字符串中提取内存使用量和百分比
|
|
|
|
|
memory_info = data['memory']['physical']
|
|
|
|
|
memory_usage = float(memory_info.split('MB')[0].strip())
|
|
|
|
|
memory_percent = float(memory_info.split('(')[1].split('%')[0].strip())
|
|
|
|
|
|
|
|
|
|
HighMemoryProcess.objects.update_or_create(
|
|
|
|
|
pid=pid,
|
|
|
|
|
defaults={
|
|
|
|
|
'process_name': process.name(),
|
|
|
|
|
'memory_usage': memory_usage,
|
|
|
|
|
'memory_percent': memory_percent,
|
|
|
|
|
'is_active': True,
|
|
|
|
|
'status': 1,
|
|
|
|
|
'log_file': log_file
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
elif monitor_type == 'gpu':
|
|
|
|
|
# 从字符串中提取GPU使用率和显存
|
|
|
|
|
gpu_usage = float(data['gpu']['usage'].replace('%', ''))
|
|
|
|
|
gpu_memory = float(data['gpu']['memory'].replace('MB', ''))
|
|
|
|
|
|
|
|
|
|
HighGPUProcess.objects.update_or_create(
|
|
|
|
|
pid=pid,
|
|
|
|
|
defaults={
|
|
|
|
|
'process_name': process.name(),
|
|
|
|
|
'gpu_usage': gpu_usage,
|
|
|
|
|
'gpu_memory': gpu_memory,
|
|
|
|
|
'is_active': True,
|
|
|
|
|
'status': 1,
|
|
|
|
|
'log_file': log_file
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'success',
|
|
|
|
|
'data': data
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
except psutil.NoSuchProcess:
|
|
|
|
|
# 进程已终止
|
|
|
|
|
monitor.is_active = False
|
|
|
|
|
monitor.status = 0
|
|
|
|
|
monitor.save()
|
|
|
|
|
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'error',
|
|
|
|
|
'message': f'进程 {pid} 已终止'
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'error',
|
|
|
|
|
'message': str(e)
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
def update_process_status(pid, monitor_type, is_active=False, status=0):
|
|
|
|
|
"""更新进程状态"""
|
|
|
|
|
try:
|
|
|
|
|
if monitor_type == 'cpu':
|
|
|
|
|
HighCPUProcess.objects.filter(pid=pid).update(
|
|
|
|
|
is_active=is_active,
|
|
|
|
|
status=status
|
|
|
|
|
)
|
|
|
|
|
elif monitor_type == 'memory':
|
|
|
|
|
HighMemoryProcess.objects.filter(pid=pid).update(
|
|
|
|
|
is_active=is_active,
|
|
|
|
|
status=status
|
|
|
|
|
)
|
|
|
|
|
elif monitor_type == 'gpu':
|
|
|
|
|
HighGPUProcess.objects.filter(pid=pid).update(
|
|
|
|
|
is_active=is_active,
|
|
|
|
|
status=status
|
|
|
|
|
)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"更新进程状态失败: {str(e)}")
|
|
|
|
|
|
|
|
|
|
def index(request):
|
|
|
|
|
"""渲染主页"""
|
|
|
|
|
return render(request, 'index.html')
|
|
|
|
|
|
|
|
|
|
@csrf_exempt
|
|
|
|
|
@require_http_methods(["POST"])
|
|
|
|
|
def stop_auto_detect(request):
|
|
|
|
|
"""停止自动检测的API端点"""
|
|
|
|
|
try:
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'success',
|
|
|
|
|
'message': '已停止自动检测'
|
|
|
|
|
}, json_dumps_params={'ensure_ascii': False})
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"停止自动检测失败: {str(e)}")
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'error',
|
|
|
|
|
'message': f'停止自动检测失败: {str(e)}'
|
|
|
|
|
}, json_dumps_params={'ensure_ascii': False})
|
|
|
|
|
|
|
|
|
|
@csrf_exempt
|
|
|
|
|
@require_http_methods(["GET"])
|
|
|
|
|
def get_latest_log(request, pid):
|
|
|
|
|
"""获取最新的监控日志"""
|
|
|
|
|
try:
|
|
|
|
|
monitor_type = request.GET.get('type') # 获取监控类型(cpu/gpu/memory)
|
|
|
|
|
if not monitor_type:
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'error',
|
|
|
|
|
'message': '请指定监控类型'
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
# 根据类型获取对应的监控记录
|
|
|
|
|
monitor = None
|
|
|
|
|
if monitor_type == 'cpu':
|
|
|
|
|
monitor = HighCPUProcess.objects.filter(pid=pid, is_active=True).first()
|
|
|
|
|
elif monitor_type == 'gpu':
|
|
|
|
|
monitor = HighGPUProcess.objects.filter(pid=pid, is_active=True).first()
|
|
|
|
|
elif monitor_type == 'memory':
|
|
|
|
|
monitor = HighMemoryProcess.objects.filter(pid=pid, is_active=True).first()
|
|
|
|
|
|
|
|
|
|
if not monitor:
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'error',
|
|
|
|
|
'message': f'未找到进程 {pid} 的{monitor_type}监控记录'
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
# 获取最新数据
|
|
|
|
|
try:
|
|
|
|
|
process = psutil.Process(pid)
|
|
|
|
|
with process.oneshot():
|
|
|
|
|
data = {
|
|
|
|
|
'pid': pid,
|
|
|
|
|
'name': process.name(),
|
|
|
|
|
'status': 1, # 1表示进程运行中
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# 根据监控类型添加相应的数据
|
|
|
|
|
if monitor_type == 'cpu':
|
|
|
|
|
data['cpu_percent'] = process.cpu_percent()
|
|
|
|
|
elif monitor_type == 'memory':
|
|
|
|
|
memory_info = process.memory_info()
|
|
|
|
|
data['memory_gb'] = memory_info.rss / (1024 * 1024 * 1024)
|
|
|
|
|
data['memory_percent'] = process.memory_percent()
|
|
|
|
|
elif monitor_type == 'gpu':
|
|
|
|
|
gpu_usage, gpu_memory = get_process_gpu_usage(pid)
|
|
|
|
|
data['gpu_usage'] = gpu_usage
|
|
|
|
|
data['gpu_memory'] = gpu_memory
|
|
|
|
|
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'success',
|
|
|
|
|
'data': data
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
except psutil.NoSuchProcess:
|
|
|
|
|
# 进程已终止
|
|
|
|
|
monitor.is_active = False
|
|
|
|
|
monitor.status = 0
|
|
|
|
|
monitor.save()
|
|
|
|
|
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'error',
|
|
|
|
|
'message': f'进程 {pid} 已终止'
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'error',
|
|
|
|
|
'message': str(e)
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
@csrf_exempt
|
|
|
|
|
@require_http_methods(["GET"])
|
|
|
|
|
def get_process_list(request):
|
|
|
|
|
"""获取当前监控的进程列表"""
|
|
|
|
|
try:
|
|
|
|
|
processes = []
|
|
|
|
|
|
|
|
|
|
# 获取所有活跃的监控进程
|
|
|
|
|
cpu_processes = HighCPUProcess.objects.filter(is_active=True)
|
|
|
|
|
gpu_processes = HighGPUProcess.objects.filter(is_active=True)
|
|
|
|
|
memory_processes = HighMemoryProcess.objects.filter(is_active=True)
|
|
|
|
|
|
|
|
|
|
# 创建进程信息字典
|
|
|
|
|
process_dict = {}
|
|
|
|
|
|
|
|
|
|
# 处理 CPU 进程
|
|
|
|
|
for proc in cpu_processes:
|
|
|
|
|
if proc.pid not in process_dict:
|
|
|
|
|
process_dict[proc.pid] = {
|
|
|
|
|
'pid': proc.pid,
|
|
|
|
|
'name': proc.process_name,
|
|
|
|
|
'cpu_percent': proc.cpu_usage,
|
|
|
|
|
'memory_gb': 0,
|
|
|
|
|
'memory_percent': 0,
|
|
|
|
|
'gpu_usage': 0,
|
|
|
|
|
'gpu_memory': 0,
|
|
|
|
|
'resource_types': ['cpu']
|
|
|
|
|
}
|
|
|
|
|
else:
|
|
|
|
|
process_dict[proc.pid]['cpu_percent'] = proc.cpu_usage
|
|
|
|
|
process_dict[proc.pid]['resource_types'].append('cpu')
|
|
|
|
|
|
|
|
|
|
# 处理 GPU 进程
|
|
|
|
|
for proc in gpu_processes:
|
|
|
|
|
if proc.pid not in process_dict:
|
|
|
|
|
process_dict[proc.pid] = {
|
|
|
|
|
'pid': proc.pid,
|
|
|
|
|
'name': proc.process_name,
|
|
|
|
|
'cpu_percent': 0,
|
|
|
|
|
'memory_gb': 0,
|
|
|
|
|
'memory_percent': 0,
|
|
|
|
|
'gpu_usage': proc.gpu_usage,
|
|
|
|
|
'gpu_memory': proc.gpu_memory,
|
|
|
|
|
'resource_types': ['gpu']
|
|
|
|
|
}
|
|
|
|
|
else:
|
|
|
|
|
process_dict[proc.pid]['gpu_usage'] = proc.gpu_usage
|
|
|
|
|
process_dict[proc.pid]['gpu_memory'] = proc.gpu_memory
|
|
|
|
|
process_dict[proc.pid]['resource_types'].append('gpu')
|
|
|
|
|
|
|
|
|
|
# 处理内存进程
|
|
|
|
|
for proc in memory_processes:
|
|
|
|
|
if proc.pid not in process_dict:
|
|
|
|
|
process_dict[proc.pid] = {
|
|
|
|
|
'pid': proc.pid,
|
|
|
|
|
'name': proc.process_name,
|
|
|
|
|
'cpu_percent': 0,
|
|
|
|
|
'memory_gb': proc.memory_usage,
|
|
|
|
|
'memory_percent': proc.memory_percent,
|
|
|
|
|
'gpu_usage': 0,
|
|
|
|
|
'gpu_memory': 0,
|
|
|
|
|
'resource_types': ['memory']
|
|
|
|
|
}
|
|
|
|
|
else:
|
|
|
|
|
process_dict[proc.pid]['memory_gb'] = proc.memory_usage
|
|
|
|
|
process_dict[proc.pid]['memory_percent'] = proc.memory_percent
|
|
|
|
|
process_dict[proc.pid]['resource_types'].append('memory')
|
|
|
|
|
|
|
|
|
|
# 转换字典为列表
|
|
|
|
|
processes = list(process_dict.values())
|
|
|
|
|
|
|
|
|
|
# 按总资源占用率排序
|
|
|
|
|
processes.sort(key=lambda x: (
|
|
|
|
|
x['cpu_percent'] / 100 +
|
|
|
|
|
x['memory_percent'] / 100 +
|
|
|
|
|
x['gpu_usage'] / 100
|
|
|
|
|
), reverse=True)
|
|
|
|
|
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'success',
|
|
|
|
|
'processes': processes
|
|
|
|
|
}, json_dumps_params={'ensure_ascii': False})
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'error',
|
|
|
|
|
'message': str(e)
|
|
|
|
|
}, json_dumps_params={'ensure_ascii': False})
|
|
|
|
|
|
|
|
|
|
def high_resource_list(request):
|
|
|
|
|
"""高资源进程列表视图"""
|
|
|
|
|
context = {
|
|
|
|
|
'cpu_processes': HighCPUProcess.objects.all().order_by('-updated_at'),
|
|
|
|
|
'memory_processes': HighMemoryProcess.objects.all().order_by('-updated_at'),
|
|
|
|
|
'gpu_processes': HighGPUProcess.objects.all().order_by('-updated_at'),
|
|
|
|
|
}
|
|
|
|
|
return render(request, 'high_resource_list.html', context)
|
|
|
|
|
|
|
|
|
|
def find_python_processes_in_dir(directory):
|
|
|
|
|
"""查找指定目录下运行的 Python 进程"""
|
|
|
|
|
python_processes = []
|
|
|
|
|
directory = str(Path(directory).resolve()) # 转换为绝对路径,并处理跨平台差异
|
|
|
|
|
|
|
|
|
|
for proc in psutil.process_iter(['pid', 'name', 'cmdline', 'cwd']):
|
|
|
|
|
try:
|
|
|
|
|
# 检查是否是 Python 进程
|
|
|
|
|
if proc.info['name'].lower().startswith('python'):
|
|
|
|
|
# 检查进程的工作目录
|
|
|
|
|
proc_cwd = str(Path(proc.info['cwd']).resolve())
|
|
|
|
|
if proc_cwd.startswith(directory):
|
|
|
|
|
# 检查命令行参数
|
|
|
|
|
cmdline = proc.info['cmdline']
|
|
|
|
|
if cmdline:
|
|
|
|
|
python_processes.append({
|
|
|
|
|
'pid': proc.info['pid'],
|
|
|
|
|
'name': proc.info['name'],
|
|
|
|
|
'cmdline': ' '.join(cmdline),
|
|
|
|
|
'cwd': proc_cwd
|
|
|
|
|
})
|
|
|
|
|
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
return python_processes
|
|
|
|
|
|
|
|
|
|
def get_gpu_info(pid):
|
|
|
|
|
"""获取进程的GPU使用信息"""
|
|
|
|
|
try:
|
|
|
|
|
import pynvml
|
|
|
|
|
pynvml.nvmlInit()
|
|
|
|
|
gpu_info = {
|
|
|
|
|
'usage': 0,
|
|
|
|
|
'memory': 0,
|
|
|
|
|
'device_count': pynvml.nvmlDeviceGetCount()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for i in range(gpu_info['device_count']):
|
|
|
|
|
handle = pynvml.nvmlDeviceGetHandleByIndex(i)
|
|
|
|
|
procs = pynvml.nvmlDeviceGetComputeRunningProcesses(handle)
|
|
|
|
|
for proc in procs:
|
|
|
|
|
if proc.pid == pid:
|
|
|
|
|
gpu_info['memory'] = proc.usedGpuMemory / 1024 / 1024 # 转换为MB
|
|
|
|
|
gpu_info['usage'] = pynvml.nvmlDeviceGetUtilizationRates(handle).gpu
|
|
|
|
|
break
|
|
|
|
|
return gpu_info
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"获取GPU信息失败: {str(e)}")
|
|
|
|
|
return {'usage': 0, 'memory': 0, 'device_count': 0}
|
|
|
|
|
|
|
|
|
|
def get_io_counters(proc):
|
|
|
|
|
"""获取进程的IO计数器信息"""
|
|
|
|
|
try:
|
|
|
|
|
io = proc.io_counters()
|
|
|
|
|
return {
|
|
|
|
|
'read_bytes': io.read_bytes / 1024 / 1024, # 转换为MB
|
|
|
|
|
'write_bytes': io.write_bytes / 1024 / 1024,
|
|
|
|
|
'read_count': io.read_count,
|
|
|
|
|
'write_count': io.write_count
|
|
|
|
|
}
|
|
|
|
|
except:
|
|
|
|
|
return {'read_bytes': 0, 'write_bytes': 0, 'read_count': 0, 'write_count': 0}
|
|
|
|
|
|
|
|
|
|
def get_network_connections(proc):
|
|
|
|
|
"""获取进程的网络连接信息"""
|
|
|
|
|
try:
|
|
|
|
|
connections = []
|
|
|
|
|
for conn in proc.connections():
|
|
|
|
|
connections.append({
|
|
|
|
|
'local_address': f"{conn.laddr.ip}:{conn.laddr.port}" if conn.laddr else "",
|
|
|
|
|
'remote_address': f"{conn.raddr.ip}:{conn.raddr.port}" if conn.raddr else "",
|
|
|
|
|
'status': conn.status
|
|
|
|
|
})
|
|
|
|
|
return connections
|
|
|
|
|
except:
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
def get_open_files(proc):
|
|
|
|
|
"""获取进程打开的文件列表"""
|
|
|
|
|
try:
|
|
|
|
|
return [f.path for f in proc.open_files()]
|
|
|
|
|
except:
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
@csrf_exempt
|
|
|
|
|
@require_http_methods(["POST"])
|
|
|
|
|
def fetch_tiktok_videos(request):
|
2025-03-12 15:24:31 +08:00
|
|
|
|
"""获取TikTok用户播放量前10的视频"""
|
2025-03-05 17:59:31 +08:00
|
|
|
|
try:
|
2025-03-12 15:24:31 +08:00
|
|
|
|
# 添加全局变量引用
|
|
|
|
|
global all_downloaded_videos
|
|
|
|
|
|
|
|
|
|
# 如果变量未初始化,则初始化为空列表
|
|
|
|
|
if 'all_downloaded_videos' not in globals():
|
|
|
|
|
all_downloaded_videos = []
|
|
|
|
|
|
2025-03-05 17:59:31 +08:00
|
|
|
|
data = json.loads(request.body)
|
|
|
|
|
unique_id = data.get('unique_id')
|
|
|
|
|
|
|
|
|
|
if not unique_id:
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'error',
|
|
|
|
|
'message': '请提供TikTok用户ID(unique_id)'
|
|
|
|
|
}, json_dumps_params={'ensure_ascii': False})
|
|
|
|
|
|
|
|
|
|
# 调用API获取用户资料和secUid
|
|
|
|
|
logger.info(f"正在获取用户 {unique_id} 的资料...")
|
|
|
|
|
user_profile = fetch_user_profile(unique_id)
|
|
|
|
|
|
|
|
|
|
if not user_profile or 'data' not in user_profile:
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'error',
|
|
|
|
|
'message': f'无法获取用户 {unique_id} 的资料'
|
|
|
|
|
}, json_dumps_params={'ensure_ascii': False})
|
|
|
|
|
|
|
|
|
|
# 从API响应中提取secUid和其他用户信息
|
|
|
|
|
try:
|
|
|
|
|
user_info = user_profile['data']['userInfo']['user']
|
|
|
|
|
sec_uid = user_info['secUid']
|
|
|
|
|
|
|
|
|
|
# 提取其他用户信息
|
|
|
|
|
nickname = user_info.get('nickname', f'用户_{unique_id}')
|
|
|
|
|
signature = user_info.get('signature', '')
|
|
|
|
|
avatar_url = user_info.get('avatarLarger', '')
|
2025-03-12 15:24:31 +08:00
|
|
|
|
user_stats = user_profile['data']['userInfo']['stats']
|
2025-03-05 17:59:31 +08:00
|
|
|
|
follower_count = user_stats.get('followerCount', 0)
|
|
|
|
|
heart_count = user_stats.get('heartCount', 0)
|
2025-03-12 15:24:31 +08:00
|
|
|
|
video_count = user_stats.get('videoCount', 0)
|
2025-03-05 17:59:31 +08:00
|
|
|
|
|
2025-03-12 15:24:31 +08:00
|
|
|
|
logger.info(f"成功获取用户secUid: {sec_uid}, 该用户有 {video_count} 个视频")
|
2025-03-05 17:59:31 +08:00
|
|
|
|
except (KeyError, TypeError) as e:
|
|
|
|
|
logger.error(f"解析用户资料出错: {e}")
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'error',
|
|
|
|
|
'message': f'解析用户资料出错: {str(e)}'
|
|
|
|
|
}, json_dumps_params={'ensure_ascii': False})
|
|
|
|
|
|
|
|
|
|
# 确保用户目录存在
|
|
|
|
|
user_dir = os.path.join(TIKTOK_VIDEOS_PATH, unique_id)
|
|
|
|
|
os.makedirs(user_dir, exist_ok=True)
|
|
|
|
|
|
2025-03-12 15:24:31 +08:00
|
|
|
|
# 使用辅助函数获取播放量前10的视频
|
|
|
|
|
logger.info(f"开始获取用户 {nickname} 的全部视频并查找播放量前10...")
|
|
|
|
|
top_videos = get_top_n_videos_by_play_count(sec_uid, n=10)
|
2025-03-05 17:59:31 +08:00
|
|
|
|
|
2025-03-12 15:24:31 +08:00
|
|
|
|
# 下载这些视频
|
|
|
|
|
downloaded_videos = []
|
|
|
|
|
for i, (play_count, video_id, desc) in enumerate(top_videos, 1):
|
|
|
|
|
try:
|
|
|
|
|
save_path = os.path.join(user_dir, f"{video_id}.mp4")
|
|
|
|
|
|
|
|
|
|
logger.info(f"下载第 {i}/{len(top_videos)} 个视频 (ID: {video_id}),播放量: {play_count}")
|
|
|
|
|
|
|
|
|
|
if download_video(video_id, unique_id, save_path):
|
|
|
|
|
video_info = {
|
2025-03-05 17:59:31 +08:00
|
|
|
|
'id': video_id,
|
2025-03-12 15:24:31 +08:00
|
|
|
|
'desc': desc,
|
2025-03-05 17:59:31 +08:00
|
|
|
|
'play_count': play_count,
|
2025-03-12 15:24:31 +08:00
|
|
|
|
'user_unique_id': unique_id
|
|
|
|
|
}
|
|
|
|
|
downloaded_videos.append(video_info)
|
|
|
|
|
logger.info(f"视频 {video_id} 下载成功")
|
|
|
|
|
else:
|
|
|
|
|
logger.error(f"视频 {video_id} 下载失败")
|
|
|
|
|
|
|
|
|
|
# 避免过快请求
|
|
|
|
|
time.sleep(2)
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"下载视频时出错: {e}")
|
|
|
|
|
continue
|
2025-03-05 17:59:31 +08:00
|
|
|
|
|
2025-03-12 15:24:31 +08:00
|
|
|
|
all_downloaded_videos.extend(downloaded_videos)
|
2025-03-05 17:59:31 +08:00
|
|
|
|
|
|
|
|
|
# 保存用户信息和视频信息到数据库
|
|
|
|
|
video_info_json = json.dumps([{
|
|
|
|
|
'id': v['id'],
|
|
|
|
|
'desc': v['desc'],
|
|
|
|
|
'play_count': v['play_count']
|
|
|
|
|
} for v in downloaded_videos], ensure_ascii=False)
|
|
|
|
|
|
|
|
|
|
user_record = TiktokUserVideos.objects.update_or_create(
|
|
|
|
|
sec_user_id=sec_uid,
|
|
|
|
|
defaults={
|
|
|
|
|
'nickname': nickname,
|
|
|
|
|
'signature': signature,
|
|
|
|
|
'follower_count': follower_count,
|
|
|
|
|
'total_favorited': heart_count,
|
|
|
|
|
'avatar_url': avatar_url,
|
|
|
|
|
'videos_folder': user_dir,
|
2025-03-12 15:24:31 +08:00
|
|
|
|
'video_paths': video_info_json,
|
|
|
|
|
'video_count': video_count
|
2025-03-05 17:59:31 +08:00
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'success',
|
|
|
|
|
'message': '处理完成',
|
|
|
|
|
'user_info': {
|
|
|
|
|
'nickname': nickname,
|
|
|
|
|
'unique_id': unique_id,
|
|
|
|
|
'sec_uid': sec_uid,
|
|
|
|
|
'avatar': avatar_url,
|
|
|
|
|
'follower_count': follower_count,
|
|
|
|
|
'total_favorited': heart_count,
|
2025-03-12 15:24:31 +08:00
|
|
|
|
'signature': signature,
|
|
|
|
|
'video_count': video_count
|
2025-03-05 17:59:31 +08:00
|
|
|
|
},
|
2025-03-12 15:24:31 +08:00
|
|
|
|
'total_videos_count': video_count,
|
|
|
|
|
'processed_videos_count': len(top_videos),
|
2025-03-05 17:59:31 +08:00
|
|
|
|
'downloaded_videos': len(downloaded_videos),
|
|
|
|
|
'videos': [{'id': v['id'], 'desc': v['desc'][:50], 'play_count': v['play_count']} for v in downloaded_videos],
|
|
|
|
|
'video_directory': user_dir # 添加视频目录路径方便查找
|
|
|
|
|
}, json_dumps_params={'ensure_ascii': False})
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"处理TikTok视频失败: {e}")
|
|
|
|
|
import traceback
|
|
|
|
|
logger.error(f"详细错误: {traceback.format_exc()}")
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'error',
|
|
|
|
|
'message': f'处理TikTok视频失败: {str(e)}'
|
|
|
|
|
}, json_dumps_params={'ensure_ascii': False})
|
|
|
|
|
|
|
|
|
|
@require_http_methods(["GET"])
|
|
|
|
|
def get_tiktok_user_videos(request):
|
|
|
|
|
"""获取已下载的TikTok用户视频列表"""
|
|
|
|
|
try:
|
|
|
|
|
sec_user_id = request.GET.get('sec_user_id')
|
|
|
|
|
|
|
|
|
|
if not sec_user_id:
|
|
|
|
|
# 如果没有指定用户ID,返回所有用户列表
|
|
|
|
|
users = TiktokUserVideos.objects.all().values('sec_user_id', 'nickname', 'follower_count', 'videos_folder', 'create_time')
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'success',
|
|
|
|
|
'users': list(users)
|
|
|
|
|
}, json_dumps_params={'ensure_ascii': False})
|
|
|
|
|
|
|
|
|
|
# 查询指定用户信息
|
|
|
|
|
try:
|
|
|
|
|
user = TiktokUserVideos.objects.get(sec_user_id=sec_user_id)
|
|
|
|
|
# 解析视频信息JSON
|
|
|
|
|
video_info = json.loads(user.video_paths) if user.video_paths else []
|
|
|
|
|
|
|
|
|
|
# 获取文件夹中的文件列表
|
|
|
|
|
videos_folder = user.videos_folder
|
|
|
|
|
video_files = []
|
|
|
|
|
if os.path.exists(videos_folder):
|
|
|
|
|
video_files = [f for f in os.listdir(videos_folder) if os.path.isfile(os.path.join(videos_folder, f))]
|
|
|
|
|
|
|
|
|
|
except TiktokUserVideos.DoesNotExist:
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'error',
|
|
|
|
|
'message': f'用户 {sec_user_id} 不存在'
|
|
|
|
|
}, json_dumps_params={'ensure_ascii': False})
|
|
|
|
|
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'success',
|
|
|
|
|
'user_info': {
|
|
|
|
|
'sec_user_id': user.sec_user_id,
|
|
|
|
|
'nickname': user.nickname,
|
|
|
|
|
'signature': user.signature,
|
|
|
|
|
'follower_count': user.follower_count,
|
|
|
|
|
'total_favorited': user.total_favorited,
|
|
|
|
|
'avatar_url': user.avatar_url,
|
|
|
|
|
'create_time': user.create_time.strftime('%Y-%m-%d %H:%M:%S'),
|
|
|
|
|
'update_time': user.update_time.strftime('%Y-%m-%d %H:%M:%S')
|
|
|
|
|
},
|
|
|
|
|
'videos_folder': videos_folder,
|
|
|
|
|
'video_files': video_files,
|
|
|
|
|
'video_info': video_info
|
|
|
|
|
}, json_dumps_params={'ensure_ascii': False})
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"获取TikTok视频列表失败: {str(e)}")
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'error',
|
|
|
|
|
'message': f'获取TikTok视频列表失败: {str(e)}'
|
|
|
|
|
}, json_dumps_params={'ensure_ascii': False})
|
|
|
|
|
|
|
|
|
|
# 辅助函数
|
|
|
|
|
|
2025-03-12 15:24:31 +08:00
|
|
|
|
def fetch_user_videos(sec_uid, cursor=0, count=10):
|
2025-03-05 17:59:31 +08:00
|
|
|
|
"""获取用户视频列表"""
|
2025-03-12 15:24:31 +08:00
|
|
|
|
url = f"{API_BASE_URL}/api/tiktok/web/fetch_user_post?secUid={sec_uid}&cursor={cursor}&count={count}"
|
2025-03-05 17:59:31 +08:00
|
|
|
|
|
|
|
|
|
try:
|
2025-03-12 15:24:31 +08:00
|
|
|
|
response = requests.get(url, timeout=30)
|
|
|
|
|
|
2025-03-05 17:59:31 +08:00
|
|
|
|
if response.status_code == 200:
|
2025-03-12 15:24:31 +08:00
|
|
|
|
data = response.json()
|
|
|
|
|
logger.info(f"成功获取用户视频,共 {len(data['data'].get('itemList', []))} 个视频")
|
|
|
|
|
return data
|
2025-03-05 17:59:31 +08:00
|
|
|
|
else:
|
2025-03-12 15:24:31 +08:00
|
|
|
|
logger.error(f"获取用户视频失败: {response.status_code}")
|
2025-03-05 17:59:31 +08:00
|
|
|
|
return None
|
|
|
|
|
except Exception as e:
|
2025-03-12 15:24:31 +08:00
|
|
|
|
logger.error(f"获取用户视频异常: {e}")
|
2025-03-05 17:59:31 +08:00
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
def fetch_user_profile(unique_id):
|
|
|
|
|
"""获取用户基本信息"""
|
|
|
|
|
url = f"{API_BASE_URL}/api/tiktok/web/fetch_user_profile?uniqueId={unique_id}"
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
logger.info(f"正在请求用户资料: {url}")
|
2025-03-12 15:24:31 +08:00
|
|
|
|
response = requests.get(url, timeout=30)
|
2025-03-05 17:59:31 +08:00
|
|
|
|
|
|
|
|
|
if response.status_code == 200:
|
2025-03-12 15:24:31 +08:00
|
|
|
|
data = response.json()
|
|
|
|
|
logger.info(f"成功获取用户资料: {unique_id}")
|
|
|
|
|
|
|
|
|
|
# 打印完整响应以便调试
|
|
|
|
|
logger.info(f"API原始响应: {data}")
|
|
|
|
|
|
|
|
|
|
# 验证数据完整性
|
|
|
|
|
if 'data' not in data or not data['data']:
|
|
|
|
|
logger.error(f"API响应缺少data字段: {data}")
|
|
|
|
|
return None
|
2025-03-05 17:59:31 +08:00
|
|
|
|
|
2025-03-12 15:24:31 +08:00
|
|
|
|
if 'userInfo' not in data['data'] or not data['data']['userInfo']:
|
|
|
|
|
logger.error(f"API响应缺少userInfo字段: {data['data']}")
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
if 'user' not in data['data']['userInfo'] or not data['data']['userInfo']['user']:
|
|
|
|
|
logger.error(f"API响应缺少user字段: {data['data']['userInfo']}")
|
2025-03-05 17:59:31 +08:00
|
|
|
|
return None
|
2025-03-12 15:24:31 +08:00
|
|
|
|
|
|
|
|
|
# 打印用户信息
|
|
|
|
|
logger.info(f"用户信息: {data['data']['userInfo']['user']}")
|
|
|
|
|
|
|
|
|
|
return data
|
2025-03-05 17:59:31 +08:00
|
|
|
|
else:
|
|
|
|
|
logger.error(f"获取用户信息失败: HTTP {response.status_code}, 响应: {response.text[:500]}")
|
|
|
|
|
return None
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"获取用户信息异常: {e}")
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
def download_video(video_id, unique_id, save_path):
|
|
|
|
|
"""使用API的直接下载接口下载TikTok视频"""
|
|
|
|
|
# 确保视频ID是纯数字
|
|
|
|
|
if not str(video_id).isdigit():
|
|
|
|
|
logger.error(f"无效的视频ID: {video_id},必须是纯数字")
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
# 构建标准TikTok视频URL
|
|
|
|
|
tiktok_url = f"https://www.tiktok.com/@{unique_id}/video/{video_id}"
|
|
|
|
|
logger.info(f"构建的TikTok URL: {tiktok_url}")
|
|
|
|
|
|
|
|
|
|
# 构建完整的API请求URL
|
|
|
|
|
api_url = f"http://81.69.223.133:45268/api/download"
|
|
|
|
|
full_url = f"{api_url}?url={tiktok_url}&prefix=true&with_watermark=false"
|
|
|
|
|
logger.info(f"完整的API请求URL: {full_url}")
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
# 直接使用完整URL发送请求
|
2025-03-12 15:24:31 +08:00
|
|
|
|
response = requests.get(full_url, stream=True, timeout=60)
|
2025-03-05 17:59:31 +08:00
|
|
|
|
|
|
|
|
|
# 检查响应状态
|
|
|
|
|
if response.status_code != 200:
|
|
|
|
|
logger.error(f"下载视频失败: {response.status_code} - {response.text[:200] if response.text else '无响应内容'}")
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
# 获取内容类型
|
|
|
|
|
content_type = response.headers.get('Content-Type', '')
|
|
|
|
|
logger.info(f"响应内容类型: {content_type}")
|
|
|
|
|
|
|
|
|
|
# 保存文件
|
|
|
|
|
with open(save_path, 'wb') as f:
|
|
|
|
|
for chunk in response.iter_content(chunk_size=8192):
|
|
|
|
|
if chunk:
|
|
|
|
|
f.write(chunk)
|
|
|
|
|
|
|
|
|
|
file_size = os.path.getsize(save_path)
|
|
|
|
|
logger.info(f"视频已下载到: {save_path},文件大小: {file_size}字节")
|
|
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"下载视频异常: {e}")
|
|
|
|
|
import traceback
|
|
|
|
|
logger.error(f"详细错误: {traceback.format_exc()}")
|
|
|
|
|
return False
|
2025-03-05 19:57:43 +08:00
|
|
|
|
|
2025-03-12 15:24:31 +08:00
|
|
|
|
def fetch_user_followings(sec_uid, max_cursor=0, min_cursor=0, page_size=30):
|
|
|
|
|
"""
|
|
|
|
|
获取用户的关注列表
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
sec_uid: 用户的安全ID
|
|
|
|
|
max_cursor: 保持为0
|
|
|
|
|
min_cursor: 分页游标,从上一次响应获取
|
|
|
|
|
page_size: 每页大小,默认30条记录
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
用户关注列表数据
|
|
|
|
|
"""
|
|
|
|
|
url = f"{API_BASE_URL}/api/tiktok/web/fetch_user_follow?secUid={sec_uid}&count={page_size}&maxCursor={max_cursor}&minCursor={min_cursor}"
|
|
|
|
|
|
|
|
|
|
logger.info(f"请求关注列表URL: {url}")
|
2025-03-05 19:57:43 +08:00
|
|
|
|
|
|
|
|
|
try:
|
2025-03-12 15:24:31 +08:00
|
|
|
|
response = requests.get(url, timeout=30)
|
2025-03-05 19:57:43 +08:00
|
|
|
|
|
|
|
|
|
if response.status_code == 200:
|
|
|
|
|
data = response.json()
|
2025-03-12 15:24:31 +08:00
|
|
|
|
logger.info(f"成功获取用户关注列表,共 {len(data['data'].get('userList', []))} 个用户, minCursor={data['data'].get('minCursor')}")
|
2025-03-05 19:57:43 +08:00
|
|
|
|
return data
|
|
|
|
|
else:
|
|
|
|
|
logger.error(f"获取用户关注列表失败: {response.status_code}")
|
|
|
|
|
return None
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"获取用户关注列表异常: {e}")
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
def filter_users_by_followers(user_list, min_followers=5000, max_followers=50000):
|
|
|
|
|
"""筛选粉丝数在指定范围内的用户"""
|
|
|
|
|
filtered_users = []
|
|
|
|
|
|
|
|
|
|
for user_data in user_list:
|
|
|
|
|
try:
|
|
|
|
|
follower_count = user_data.get('stats', {}).get('followerCount', 0)
|
|
|
|
|
if min_followers <= follower_count <= max_followers:
|
|
|
|
|
filtered_users.append(user_data)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"筛选用户时出错: {e}")
|
|
|
|
|
|
|
|
|
|
return filtered_users
|
|
|
|
|
|
2025-03-12 15:24:31 +08:00
|
|
|
|
def get_top_n_videos_by_play_count(sec_uid, n=10):
|
|
|
|
|
"""
|
|
|
|
|
获取用户播放量前N的视频
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
sec_uid: 用户的安全ID
|
|
|
|
|
n: 需要获取的前n个视频,默认为10
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
按播放量降序排列的前n个视频列表,格式为字典
|
|
|
|
|
"""
|
|
|
|
|
import heapq
|
|
|
|
|
top_videos_heap = [] # 小顶堆,元组格式: (播放量, 视频ID, 描述)
|
|
|
|
|
|
|
|
|
|
# 分页获取所有视频
|
|
|
|
|
cursor = 0
|
|
|
|
|
has_more = True
|
|
|
|
|
page = 1
|
|
|
|
|
total_videos = 0
|
|
|
|
|
|
2025-03-05 19:57:43 +08:00
|
|
|
|
try:
|
2025-03-12 15:24:31 +08:00
|
|
|
|
while has_more:
|
|
|
|
|
logger.info(f"获取第{page}页视频,游标: {cursor}")
|
|
|
|
|
videos_data = fetch_user_videos(sec_uid, cursor=cursor)
|
|
|
|
|
|
|
|
|
|
if not videos_data or 'data' not in videos_data:
|
|
|
|
|
logger.error("获取视频失败,中止获取")
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
videos = videos_data['data'].get('itemList', [])
|
|
|
|
|
if not videos:
|
|
|
|
|
logger.info("当前页没有视频,结束获取")
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
# 处理当前页的每个视频
|
|
|
|
|
for video in videos:
|
|
|
|
|
try:
|
|
|
|
|
video_id = video.get('id', '')
|
|
|
|
|
if not video_id:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
play_count = int(video.get('stats', {}).get('playCount', 0))
|
|
|
|
|
desc = video.get('desc', '')
|
|
|
|
|
|
|
|
|
|
# 维护前n个最高播放量的视频
|
|
|
|
|
if len(top_videos_heap) < n:
|
|
|
|
|
# 堆还未满,直接添加
|
|
|
|
|
heapq.heappush(top_videos_heap, (play_count, video_id, desc))
|
|
|
|
|
elif play_count > top_videos_heap[0][0]:
|
|
|
|
|
# 当前视频播放量比堆中最小的高,替换
|
|
|
|
|
heapq.heappushpop(top_videos_heap, (play_count, video_id, desc))
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"处理视频信息出错: {e}")
|
|
|
|
|
|
|
|
|
|
total_videos += len(videos)
|
|
|
|
|
logger.info(f"已处理 {total_videos} 个视频,当前保存了 {len(top_videos_heap)} 个候选视频")
|
|
|
|
|
|
|
|
|
|
# 检查是否有更多页
|
|
|
|
|
has_more = videos_data['data'].get('hasMore', False)
|
|
|
|
|
if has_more:
|
|
|
|
|
cursor = videos_data['data'].get('cursor', 0)
|
|
|
|
|
|
|
|
|
|
page += 1
|
|
|
|
|
time.sleep(1)
|
2025-03-05 19:57:43 +08:00
|
|
|
|
|
2025-03-12 15:24:31 +08:00
|
|
|
|
# 将堆转换为字典列表而不是元组列表,并按播放量降序排序
|
|
|
|
|
top_videos_list = []
|
|
|
|
|
# 从堆中转换为临时列表并排序
|
|
|
|
|
temp_list = [(play_count, video_id, desc) for play_count, video_id, desc in top_videos_heap]
|
|
|
|
|
temp_list.sort(reverse=True) # 从高到低排序
|
|
|
|
|
|
|
|
|
|
# 将元组转换为字典
|
|
|
|
|
for play_count, video_id, desc in temp_list:
|
|
|
|
|
top_videos_list.append({
|
|
|
|
|
'id': video_id,
|
|
|
|
|
'desc': desc,
|
|
|
|
|
'play_count': play_count
|
|
|
|
|
})
|
2025-03-05 19:57:43 +08:00
|
|
|
|
|
2025-03-12 15:24:31 +08:00
|
|
|
|
logger.info(f"从 {total_videos} 个视频中找到播放量最高的 {len(top_videos_list)} 个")
|
|
|
|
|
return top_videos_list
|
2025-03-05 19:57:43 +08:00
|
|
|
|
|
2025-03-12 15:24:31 +08:00
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"获取热门视频时发生错误: {str(e)}")
|
|
|
|
|
import traceback
|
|
|
|
|
logger.error(traceback.format_exc())
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 辅助函数,用于操作所有任务
|
|
|
|
|
def list_all_tasks():
|
|
|
|
|
"""获取所有任务列表"""
|
|
|
|
|
tasks = []
|
|
|
|
|
for key in redis_client.keys("task_status:*"):
|
|
|
|
|
task_id = key.decode().split(":", 1)[1]
|
|
|
|
|
task_data = get_task_status(task_id)
|
|
|
|
|
if task_data:
|
|
|
|
|
task_data['task_id'] = task_id
|
|
|
|
|
tasks.append(task_data)
|
|
|
|
|
return tasks
|
|
|
|
|
|
|
|
|
|
@shared_task
|
|
|
|
|
def async_fetch_videos_task(task_id, start_unique_id, max_depth=3, target_users=None, skip_user_profile=False, start_sec_uid=None):
|
|
|
|
|
"""异步执行视频获取任务"""
|
|
|
|
|
try:
|
|
|
|
|
# 更新任务状态为处理中
|
|
|
|
|
update_task_status(task_id, {
|
|
|
|
|
'status': 'processing',
|
|
|
|
|
'progress': 0
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
# 创建视频获取器实例
|
|
|
|
|
fetcher = TiktokVideoFetcher(
|
|
|
|
|
start_unique_id=start_unique_id,
|
|
|
|
|
max_depth=max_depth,
|
|
|
|
|
target_users=target_users,
|
|
|
|
|
skip_user_profile=skip_user_profile, # 传递测试模式
|
|
|
|
|
start_sec_uid=start_sec_uid # 传递临时secUid
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# 添加进度回调
|
|
|
|
|
fetcher.progress_callback = lambda progress, message: update_task_progress(task_id, progress, message)
|
|
|
|
|
|
|
|
|
|
# 执行处理
|
|
|
|
|
if fetcher.initialize() and fetcher.process():
|
|
|
|
|
# 处理成功,保存结果
|
|
|
|
|
result = fetcher.get_result()
|
|
|
|
|
update_task_status(task_id, {
|
|
|
|
|
'status': 'completed',
|
|
|
|
|
'result': result,
|
|
|
|
|
'progress': 100
|
|
|
|
|
})
|
|
|
|
|
return True
|
|
|
|
|
else:
|
|
|
|
|
# 处理失败
|
|
|
|
|
update_task_status(task_id, {
|
|
|
|
|
'status': 'failed',
|
|
|
|
|
'error': fetcher.error_message
|
|
|
|
|
})
|
|
|
|
|
return False
|
|
|
|
|
except Exception as e:
|
|
|
|
|
# 处理异常
|
|
|
|
|
logger.error(f"异步任务执行失败: {str(e)}")
|
|
|
|
|
import traceback
|
|
|
|
|
logger.error(f"详细错误信息: {traceback.format_exc()}")
|
|
|
|
|
|
|
|
|
|
update_task_status(task_id, {
|
|
|
|
|
'status': 'failed',
|
|
|
|
|
'error': str(e)
|
|
|
|
|
})
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
def update_task_progress(task_id, progress, message=None):
|
|
|
|
|
"""更新任务进度"""
|
|
|
|
|
updates = {'progress': progress}
|
|
|
|
|
if message:
|
|
|
|
|
updates['message'] = message
|
|
|
|
|
update_task_status(task_id, updates)
|
|
|
|
|
|
|
|
|
|
class TiktokVideoFetcher:
|
|
|
|
|
"""抖音视频获取器,支持递归获取用户关注的视频"""
|
|
|
|
|
|
|
|
|
|
def __init__(self, start_unique_id=None, start_sec_uid=None,
|
|
|
|
|
mode="by_users", target_users=None, max_depth=1,
|
|
|
|
|
skip_user_profile=False, max_videos_per_user=10):
|
|
|
|
|
"""
|
|
|
|
|
初始化抖音视频获取器
|
|
|
|
|
:param start_unique_id: 起始用户的uniqueId
|
|
|
|
|
:param start_sec_uid: 起始用户的secUid
|
|
|
|
|
:param mode: 模式,'by_users'(按用户数)或'by_depth'(按深度)
|
|
|
|
|
:param target_users: 目标用户数量,仅在by_users模式下有效
|
|
|
|
|
:param max_depth: 最大层级深度,仅在by_depth模式下有效
|
|
|
|
|
:param skip_user_profile: 是否跳过获取用户资料
|
|
|
|
|
:param max_videos_per_user: 每个用户最多获取的视频数量
|
|
|
|
|
"""
|
|
|
|
|
# 统一参数命名:确保使用 start_sec_uid
|
|
|
|
|
self.start_unique_id = start_unique_id
|
|
|
|
|
self.start_sec_uid = start_sec_uid # 注意这里保持为start_sec_uid
|
|
|
|
|
self.mode = mode
|
|
|
|
|
self.target_users = target_users
|
|
|
|
|
self.max_depth = max_depth
|
|
|
|
|
self.skip_user_profile = skip_user_profile
|
|
|
|
|
self.max_videos_per_user = max_videos_per_user or 10
|
|
|
|
|
|
|
|
|
|
# 状态变量
|
|
|
|
|
self.status = "created" # 状态: created, initializing, ready, processing, completed, failed
|
|
|
|
|
self.user_count = 0 # 已处理用户数量
|
|
|
|
|
self.video_count = 0 # 已下载视频数量
|
|
|
|
|
self.progress = 0 # 进度百分比
|
|
|
|
|
self.progress_message = "" # 进度消息
|
|
|
|
|
self.error_message = "" # 错误消息
|
|
|
|
|
|
|
|
|
|
# 队列和集合
|
|
|
|
|
self.user_queue = deque() # 待处理用户队列,元素为 (sec_uid, unique_id, depth)
|
|
|
|
|
self.processed_users = set() # 已处理的用户集合
|
|
|
|
|
|
|
|
|
|
# 创建目录
|
|
|
|
|
os.makedirs(TIKTOK_VIDEOS_PATH, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
logger.info(f"初始化TiktokVideoFetcher: mode={mode}, max_depth={max_depth}, target_users={target_users}")
|
|
|
|
|
|
|
|
|
|
def initialize(self):
|
|
|
|
|
"""初始化处理,获取起始用户信息"""
|
2025-03-05 19:57:43 +08:00
|
|
|
|
try:
|
2025-03-12 15:24:31 +08:00
|
|
|
|
self.status = "initializing"
|
|
|
|
|
logger.info(f"开始初始化,起始用户: {self.start_unique_id or '使用secUid'}")
|
2025-03-05 19:57:43 +08:00
|
|
|
|
|
2025-03-12 15:24:31 +08:00
|
|
|
|
# 如果直接提供secUid,则直接使用
|
|
|
|
|
if self.start_sec_uid:
|
|
|
|
|
logger.info(f"直接使用提供的secUid: {self.start_sec_uid}")
|
|
|
|
|
|
|
|
|
|
# 如果未提供unique_id,使用secUid的一部分作为替代
|
|
|
|
|
display_id = self.start_unique_id or f"user_{self.start_sec_uid[:8]}"
|
|
|
|
|
|
|
|
|
|
# 简化队列元素:只保留sec_uid、unique_id和深度
|
|
|
|
|
self.user_queue.append((self.start_sec_uid, display_id, 0))
|
|
|
|
|
self.status = "ready"
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
# 如果只提供了uniqueId,需要获取secUid
|
|
|
|
|
elif self.start_unique_id:
|
|
|
|
|
logger.info(f"通过uniqueId获取用户secUid: {self.start_unique_id}")
|
|
|
|
|
user_profile = fetch_user_profile(self.start_unique_id)
|
|
|
|
|
|
|
|
|
|
# 检查错误
|
|
|
|
|
if isinstance(user_profile, dict) and 'error' in user_profile:
|
|
|
|
|
error_message = user_profile['error']
|
|
|
|
|
self.error_message = f'获取用户资料出错: {error_message}'
|
|
|
|
|
self.status = "failed"
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
if not user_profile or 'data' not in user_profile or not user_profile['data']:
|
|
|
|
|
self.error_message = f'无法获取用户资料或用户不存在: {self.start_unique_id}'
|
|
|
|
|
self.status = "failed"
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
user_data = user_profile['data']
|
|
|
|
|
sec_uid = user_data.get('secUid', '')
|
|
|
|
|
if not sec_uid:
|
|
|
|
|
self.error_message = f'无法获取用户secUid: {self.start_unique_id}'
|
|
|
|
|
self.status = "failed"
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
# 添加到队列中,深度为0
|
|
|
|
|
self.user_queue.append((sec_uid, self.start_unique_id, 0))
|
|
|
|
|
|
|
|
|
|
# 创建或更新用户数据库记录
|
|
|
|
|
try:
|
|
|
|
|
TiktokUserVideos.objects.update_or_create(
|
|
|
|
|
sec_user_id=sec_uid,
|
|
|
|
|
defaults={
|
|
|
|
|
'unique_id': self.start_unique_id,
|
|
|
|
|
'nickname': user_data.get('nickname', ''),
|
|
|
|
|
'follower_count': user_data.get('followerCount', 0),
|
|
|
|
|
'following_count': user_data.get('followingCount', 0),
|
|
|
|
|
'video_count': user_data.get('videoCount', 0),
|
|
|
|
|
'videos_folder': os.path.join('videos', self.start_unique_id)
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.warning(f"保存用户数据到数据库失败,但不影响继续处理: {e}")
|
|
|
|
|
|
|
|
|
|
self.status = "ready"
|
|
|
|
|
return True
|
|
|
|
|
except KeyError:
|
|
|
|
|
self.error_message = f'用户资料结构不完整,无法提取secUid'
|
|
|
|
|
self.status = "failed"
|
|
|
|
|
return False
|
2025-03-05 19:57:43 +08:00
|
|
|
|
|
2025-03-12 15:24:31 +08:00
|
|
|
|
# 两者都没提供
|
|
|
|
|
else:
|
|
|
|
|
self.error_message = "未提供secUid或uniqueId,无法初始化"
|
|
|
|
|
self.status = "failed"
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"初始化失败: {e}")
|
|
|
|
|
import traceback
|
|
|
|
|
logger.error(f"详细错误信息: {traceback.format_exc()}")
|
|
|
|
|
self.error_message = f"初始化失败: {str(e)}"
|
|
|
|
|
self.status = "failed"
|
|
|
|
|
return False
|
2025-03-05 19:57:43 +08:00
|
|
|
|
|
2025-03-12 15:24:31 +08:00
|
|
|
|
def process(self):
|
|
|
|
|
"""执行主处理逻辑"""
|
|
|
|
|
if self.status != "ready":
|
|
|
|
|
if not self.initialize():
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
self.status = "processing"
|
|
|
|
|
self._report_progress(5, "初始化完成,开始处理")
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
# 预先估算总任务量
|
|
|
|
|
total_users = self.target_users if self.mode == "by_users" and self.target_users else 50
|
2025-03-05 19:57:43 +08:00
|
|
|
|
|
2025-03-12 15:24:31 +08:00
|
|
|
|
# 主处理循环
|
|
|
|
|
while self.user_queue:
|
|
|
|
|
# 按人数抓取模式下,达到目标人数则停止
|
|
|
|
|
if self.mode == "by_users" and self.target_users is not None and self.user_count >= self.target_users:
|
|
|
|
|
logger.info(f"已达到目标用户数 {self.target_users},停止处理")
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
# 队列中只有sec_uid、unique_id和depth
|
|
|
|
|
sec_uid, unique_id, depth = self.user_queue.popleft()
|
|
|
|
|
|
|
|
|
|
# 如果超过最大深度且是按层级模式,则跳过此用户
|
|
|
|
|
if self.mode == "by_depth" and depth > self.max_depth:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# 如果用户已处理,跳过
|
|
|
|
|
if sec_uid in self.processed_users:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# 处理单个用户
|
|
|
|
|
self._process_single_user(sec_uid, unique_id, depth)
|
|
|
|
|
|
|
|
|
|
# 报告进度
|
|
|
|
|
progress = min(95, int(self.user_count / total_users * 90) + 5)
|
|
|
|
|
self._report_progress(progress, f"已处理 {self.user_count} 个用户,已下载 {self.video_count} 个视频")
|
|
|
|
|
|
|
|
|
|
self.status = "completed"
|
|
|
|
|
self._report_progress(100, f"处理完成,共处理 {self.user_count} 个用户,下载 {self.video_count} 个视频")
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"处理过程中出错: {e}")
|
|
|
|
|
import traceback
|
|
|
|
|
logger.error(f"详细错误信息: {traceback.format_exc()}")
|
|
|
|
|
self.error_message = f"处理过程中出错: {str(e)}"
|
|
|
|
|
self.status = "failed"
|
|
|
|
|
self._report_progress(0, f"处理失败: {str(e)}")
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
def _process_single_user(self, sec_uid, unique_id, depth):
|
|
|
|
|
"""处理单个用户的视频和关注列表 - 简化版"""
|
|
|
|
|
# 标记为已处理
|
|
|
|
|
self.processed_users.add(sec_uid)
|
|
|
|
|
self.user_count += 1
|
|
|
|
|
|
|
|
|
|
logger.info(f"开始处理用户 {unique_id},深度: {depth}")
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
# 创建基本用户记录(如果不存在)
|
|
|
|
|
try:
|
|
|
|
|
TiktokUserVideos.objects.get_or_create(
|
|
|
|
|
sec_user_id=sec_uid,
|
|
|
|
|
defaults={
|
|
|
|
|
'unique_id': unique_id,
|
|
|
|
|
'videos_folder': os.path.join('videos', unique_id)
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"创建基本用户记录失败: {e}")
|
|
|
|
|
|
|
|
|
|
# 获取并下载视频
|
|
|
|
|
downloaded_videos = self._fetch_user_videos(sec_uid, unique_id, depth)
|
|
|
|
|
|
|
|
|
|
# 处理用户的关注列表
|
|
|
|
|
if self.mode == "by_depth" and depth < self.max_depth:
|
|
|
|
|
next_depth = depth + 1
|
|
|
|
|
self._process_user_followings(sec_uid, next_depth)
|
|
|
|
|
elif self.mode == "by_users" and self.user_count < self.target_users:
|
|
|
|
|
next_depth = depth + 1
|
|
|
|
|
self._process_user_followings(sec_uid, next_depth)
|
|
|
|
|
|
|
|
|
|
# 报告进度
|
|
|
|
|
self._report_progress(
|
|
|
|
|
min(95, int(self.user_count / (self.target_users or 50) * 90) + 5),
|
|
|
|
|
f"已处理 {self.user_count} 个用户,已下载 {self.video_count} 个视频"
|
2025-03-05 19:57:43 +08:00
|
|
|
|
)
|
|
|
|
|
|
2025-03-12 15:24:31 +08:00
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"处理用户 {unique_id} 时出错: {e}")
|
|
|
|
|
import traceback
|
|
|
|
|
logger.error(traceback.format_exc())
|
|
|
|
|
|
|
|
|
|
def _fetch_user_videos(self, sec_uid, unique_id, depth):
|
|
|
|
|
"""获取并下载用户视频,优先处理字典格式数据"""
|
|
|
|
|
# 导入traceback
|
|
|
|
|
import traceback
|
2025-03-05 19:57:43 +08:00
|
|
|
|
|
2025-03-12 15:24:31 +08:00
|
|
|
|
downloaded_videos = []
|
|
|
|
|
|
|
|
|
|
# 使用unique_id创建视频文件夹
|
|
|
|
|
videos_folder = os.path.join(TIKTOK_VIDEOS_PATH, unique_id)
|
|
|
|
|
os.makedirs(videos_folder, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
logger.info(f"开始处理用户 {unique_id} 的视频")
|
2025-03-05 19:57:43 +08:00
|
|
|
|
|
2025-03-12 15:24:31 +08:00
|
|
|
|
try:
|
|
|
|
|
# 获取视频列表 - 现在统一返回字典列表
|
|
|
|
|
logger.info(f"调用 get_top_n_videos_by_play_count({sec_uid}, n={self.max_videos_per_user})")
|
|
|
|
|
top_videos = get_top_n_videos_by_play_count(sec_uid, n=self.max_videos_per_user)
|
2025-03-05 19:57:43 +08:00
|
|
|
|
|
2025-03-12 15:24:31 +08:00
|
|
|
|
# 记录返回值类型
|
|
|
|
|
logger.info(f"获取到视频列表: 类型={type(top_videos).__name__}, 数量={len(top_videos)}")
|
2025-03-05 19:57:43 +08:00
|
|
|
|
|
2025-03-12 15:24:31 +08:00
|
|
|
|
# 如果没有获取到视频,仅更新文件夹路径
|
|
|
|
|
if not top_videos:
|
|
|
|
|
logger.warning(f"未能获取到用户 {unique_id} 的任何视频")
|
|
|
|
|
try:
|
|
|
|
|
TiktokUserVideos.objects.filter(sec_user_id=sec_uid).update(
|
|
|
|
|
videos_folder=videos_folder
|
|
|
|
|
)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"更新用户视频文件夹失败: {e}")
|
|
|
|
|
logger.error(traceback.format_exc())
|
|
|
|
|
return []
|
2025-03-05 19:57:43 +08:00
|
|
|
|
|
2025-03-12 15:24:31 +08:00
|
|
|
|
# 处理每个视频 - 显示进度
|
|
|
|
|
total_videos = len(top_videos)
|
|
|
|
|
logger.info(f"开始处理 {total_videos} 个视频")
|
2025-03-05 19:57:43 +08:00
|
|
|
|
|
2025-03-12 15:24:31 +08:00
|
|
|
|
for i, video_data in enumerate(top_videos):
|
|
|
|
|
current_num = i + 1
|
|
|
|
|
progress_str = f"({total_videos}/{current_num})" # 进度显示格式:(总数/当前)
|
|
|
|
|
logger.info(f"{progress_str} 开始处理第 {current_num}/{total_videos} 个视频")
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
# 视频信息提取 - 优先按字典处理,确保兼容性
|
|
|
|
|
if isinstance(video_data, dict):
|
|
|
|
|
video_id = str(video_data.get('id', ''))
|
|
|
|
|
play_count = video_data.get('play_count', 0)
|
|
|
|
|
desc = video_data.get('desc', '')
|
|
|
|
|
logger.info(f"{progress_str} 字典数据: ID={video_id}, 播放={play_count}")
|
|
|
|
|
else:
|
|
|
|
|
# 如果不是字典,尝试按元组处理(向后兼容)
|
|
|
|
|
logger.warning(f"{progress_str} 收到非字典数据: {type(video_data).__name__}")
|
|
|
|
|
if isinstance(video_data, tuple) and len(video_data) >= 2:
|
|
|
|
|
play_count, video_id, *rest = video_data
|
|
|
|
|
desc = rest[0] if rest else ""
|
|
|
|
|
video_id = str(video_id)
|
|
|
|
|
logger.info(f"{progress_str} 元组数据: ID={video_id}, 播放={play_count}")
|
|
|
|
|
else:
|
|
|
|
|
logger.error(f"{progress_str} 无法解析数据: {video_data}")
|
2025-03-05 19:57:43 +08:00
|
|
|
|
continue
|
2025-03-12 15:24:31 +08:00
|
|
|
|
|
|
|
|
|
# 视频ID必须存在
|
|
|
|
|
if not video_id:
|
|
|
|
|
logger.warning(f"{progress_str} 视频ID为空,跳过")
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# 视频文件名
|
|
|
|
|
video_filename = f"{video_id}.mp4"
|
|
|
|
|
video_path = os.path.join(videos_folder, video_filename)
|
|
|
|
|
|
|
|
|
|
# 如果文件已存在且大小大于0,则跳过下载
|
|
|
|
|
if os.path.exists(video_path) and os.path.getsize(video_path) > 0:
|
|
|
|
|
logger.info(f"{progress_str} 视频已存在: {video_id}")
|
2025-03-05 19:57:43 +08:00
|
|
|
|
|
2025-03-12 15:24:31 +08:00
|
|
|
|
# 创建字典并添加到列表 - 统一使用字典格式
|
|
|
|
|
video_dict = {
|
2025-03-05 19:57:43 +08:00
|
|
|
|
'id': video_id,
|
2025-03-12 15:24:31 +08:00
|
|
|
|
'desc': desc,
|
2025-03-05 19:57:43 +08:00
|
|
|
|
'play_count': play_count
|
2025-03-12 15:24:31 +08:00
|
|
|
|
}
|
|
|
|
|
downloaded_videos.append(video_dict)
|
2025-03-05 19:57:43 +08:00
|
|
|
|
continue
|
|
|
|
|
|
2025-03-12 15:24:31 +08:00
|
|
|
|
# 下载视频 - 显示下载进度
|
|
|
|
|
logger.info(f"{progress_str} 开始下载视频: {video_id}")
|
|
|
|
|
if download_video(video_id, unique_id, video_path):
|
|
|
|
|
logger.info(f"{progress_str} 视频下载成功: {video_id}")
|
|
|
|
|
self.video_count += 1
|
|
|
|
|
|
|
|
|
|
# 创建字典并添加到列表
|
|
|
|
|
video_dict = {
|
|
|
|
|
'id': video_id,
|
|
|
|
|
'desc': desc,
|
|
|
|
|
'play_count': play_count
|
|
|
|
|
}
|
|
|
|
|
downloaded_videos.append(video_dict)
|
|
|
|
|
else:
|
|
|
|
|
logger.error(f"{progress_str} 下载视频失败: {video_id}")
|
2025-03-05 19:57:43 +08:00
|
|
|
|
|
2025-03-12 15:24:31 +08:00
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"{progress_str} 处理单个视频时出错: {e}")
|
|
|
|
|
logger.error(traceback.format_exc())
|
|
|
|
|
continue
|
2025-03-05 19:57:43 +08:00
|
|
|
|
|
2025-03-12 15:24:31 +08:00
|
|
|
|
# 避免过快请求
|
|
|
|
|
logger.info(f"{progress_str} 处理完成,等待2秒...")
|
|
|
|
|
time.sleep(2)
|
|
|
|
|
|
|
|
|
|
# 更新数据库 - 只更新模型中实际存在的字段
|
|
|
|
|
try:
|
|
|
|
|
TiktokUserVideos.objects.filter(sec_user_id=sec_uid).update(
|
|
|
|
|
videos_folder=videos_folder
|
2025-03-05 19:57:43 +08:00
|
|
|
|
)
|
2025-03-12 15:24:31 +08:00
|
|
|
|
|
|
|
|
|
# 记录视频信息,但不保存到数据库
|
|
|
|
|
video_info_json = json.dumps(downloaded_videos, ensure_ascii=False)
|
|
|
|
|
logger.info(f"已处理 {len(downloaded_videos)}/{total_videos} 个视频")
|
|
|
|
|
logger.info(f"数据库更新成功: 更新了 videos_folder={videos_folder}")
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"更新数据库失败: {e}")
|
|
|
|
|
logger.error(traceback.format_exc())
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"处理用户 {unique_id} 视频时发生错误: {str(e)}")
|
|
|
|
|
logger.error(traceback.format_exc())
|
|
|
|
|
|
|
|
|
|
return downloaded_videos
|
|
|
|
|
|
|
|
|
|
def _process_user_followings(self, sec_uid, next_depth):
|
|
|
|
|
"""处理用户的关注列表,添加符合粉丝数条件的前5个用户到队列中"""
|
|
|
|
|
try:
|
|
|
|
|
logger.info(f"获取用户关注列表: {sec_uid}, 深度: {next_depth}")
|
2025-03-05 19:57:43 +08:00
|
|
|
|
|
2025-03-12 15:24:31 +08:00
|
|
|
|
max_cursor = 0 # maxCursor保持为0
|
|
|
|
|
min_cursor = 0 # 初始minCursor为0
|
|
|
|
|
max_time = time.time()
|
|
|
|
|
filtered_users_count = 0 # 已筛选出的符合条件的用户数量
|
|
|
|
|
max_filtered_users = 5 # 只获取前5个符合条件的用户
|
|
|
|
|
processed_sec_uids = set() # 用于去重
|
|
|
|
|
page = 1 # 页码计数
|
|
|
|
|
|
|
|
|
|
while True:
|
|
|
|
|
# 如果已经找到足够的用户,停止获取
|
|
|
|
|
if filtered_users_count >= max_filtered_users:
|
|
|
|
|
logger.info(f"已找到 {max_filtered_users} 个符合条件的用户,停止获取")
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
# 获取关注列表,使用正确的游标参数
|
|
|
|
|
logger.info(f"获取关注列表第{page}页: maxCursor={max_cursor}, minCursor={min_cursor}")
|
|
|
|
|
followings_data = fetch_user_followings(sec_uid, max_cursor=max_cursor, min_cursor=min_cursor, page_size=30)
|
2025-03-05 19:57:43 +08:00
|
|
|
|
|
2025-03-12 15:24:31 +08:00
|
|
|
|
if not followings_data or 'data' not in followings_data:
|
|
|
|
|
logger.warning(f"无法获取用户关注列表: {sec_uid}")
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
# 获取用户列表
|
|
|
|
|
user_list = followings_data['data'].get('userList', [])
|
|
|
|
|
|
|
|
|
|
if not user_list:
|
|
|
|
|
logger.info("没有更多关注用户")
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
logger.info(f"获取到 {len(user_list)} 个关注用户")
|
|
|
|
|
|
|
|
|
|
# 使用filter_users_by_followers函数筛选符合粉丝数条件的用户
|
|
|
|
|
filtered_users = filter_users_by_followers(user_list)
|
|
|
|
|
logger.info(f"本页筛选出 {len(filtered_users)} 个粉丝数符合条件的用户")
|
2025-03-05 19:57:43 +08:00
|
|
|
|
|
2025-03-12 15:24:31 +08:00
|
|
|
|
# 处理筛选后的用户
|
|
|
|
|
for following in filtered_users:
|
|
|
|
|
# 如果已经找到足够的用户,停止处理
|
|
|
|
|
if filtered_users_count >= max_filtered_users:
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
# 获取用户信息
|
|
|
|
|
user_data = following.get('user', {})
|
|
|
|
|
stats_data = following.get('stats', {})
|
|
|
|
|
|
|
|
|
|
follower_sec_uid = user_data.get('secUid')
|
|
|
|
|
follower_unique_id = user_data.get('uniqueId')
|
|
|
|
|
|
|
|
|
|
# 检查必要字段
|
|
|
|
|
if not follower_sec_uid or not follower_unique_id:
|
|
|
|
|
logger.warning("用户数据不完整,缺少必要字段")
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# 检查是否已处理过该用户
|
|
|
|
|
if follower_sec_uid in processed_sec_uids or follower_sec_uid in self.processed_users:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# 添加到已处理集合
|
|
|
|
|
processed_sec_uids.add(follower_sec_uid)
|
|
|
|
|
filtered_users_count += 1
|
|
|
|
|
|
|
|
|
|
# 添加到队列
|
|
|
|
|
self.user_queue.append((follower_sec_uid, follower_unique_id, next_depth))
|
|
|
|
|
logger.info(f"添加符合条件的用户到队列: {follower_unique_id}, 粉丝数: {stats_data.get('followerCount', 0)}")
|
|
|
|
|
|
|
|
|
|
# 保存到数据库
|
2025-03-05 19:57:43 +08:00
|
|
|
|
try:
|
|
|
|
|
TiktokUserVideos.objects.update_or_create(
|
2025-03-12 15:24:31 +08:00
|
|
|
|
sec_user_id=follower_sec_uid,
|
2025-03-05 19:57:43 +08:00
|
|
|
|
defaults={
|
2025-03-12 15:24:31 +08:00
|
|
|
|
'unique_id': follower_unique_id,
|
|
|
|
|
'nickname': user_data.get('nickname', ''),
|
|
|
|
|
'follower_count': stats_data.get('followerCount', 0),
|
|
|
|
|
'following_count': stats_data.get('followingCount', 0),
|
|
|
|
|
'video_count': stats_data.get('videoCount', 0)
|
2025-03-05 19:57:43 +08:00
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
except Exception as e:
|
2025-03-12 15:24:31 +08:00
|
|
|
|
logger.error(f"保存关注用户到数据库失败: {follower_unique_id}, {e}")
|
|
|
|
|
|
|
|
|
|
# 检查是否已找到足够的用户
|
|
|
|
|
if filtered_users_count >= max_filtered_users:
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
# 更新游标处理
|
|
|
|
|
old_min_cursor = min_cursor
|
|
|
|
|
min_cursor = followings_data['data'].get('minCursor', 0)
|
|
|
|
|
has_more = followings_data['data'].get('hasMore', False)
|
|
|
|
|
|
|
|
|
|
# 记录游标更新
|
|
|
|
|
logger.info(f"游标更新: 旧min_cursor={old_min_cursor} -> 新min_cursor={min_cursor}, has_more={has_more}")
|
|
|
|
|
|
|
|
|
|
# 检查游标是否有效和是否有更多数据
|
|
|
|
|
if not has_more or min_cursor == old_min_cursor or not min_cursor:
|
|
|
|
|
logger.info("没有更多数据或游标无效,结束获取")
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
# 防止过度请求
|
|
|
|
|
logger.info("等待1秒后获取下一页...")
|
|
|
|
|
time.sleep(1)
|
|
|
|
|
|
|
|
|
|
# 增加页码
|
|
|
|
|
page += 1
|
|
|
|
|
|
|
|
|
|
# 时间限制
|
|
|
|
|
if time.time() - max_time > 60: # 最多获取1分钟
|
|
|
|
|
logger.warning(f"达到时间限制,停止获取更多关注用户")
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
logger.info(f"用户关注处理完成,共筛选出 {filtered_users_count} 个符合条件的用户")
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"处理用户关注列表出错: {e}")
|
|
|
|
|
import traceback
|
|
|
|
|
logger.error(traceback.format_exc())
|
|
|
|
|
|
|
|
|
|
def _report_progress(self, progress, message):
|
|
|
|
|
"""报告处理进度"""
|
|
|
|
|
self.progress = progress
|
|
|
|
|
self.progress_message = message
|
|
|
|
|
logger.info(f"进度更新: {progress}%, {message}")
|
|
|
|
|
|
|
|
|
|
def get_progress(self):
|
|
|
|
|
"""获取当前进度信息"""
|
|
|
|
|
return {
|
|
|
|
|
'status': self.status,
|
|
|
|
|
'progress': self.progress,
|
|
|
|
|
'message': self.progress_message,
|
|
|
|
|
'error': self.error_message,
|
|
|
|
|
'user_count': self.user_count,
|
|
|
|
|
'video_count': self.video_count
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def get_result(self):
|
|
|
|
|
"""获取处理结果,包括统计信息"""
|
|
|
|
|
return {
|
|
|
|
|
'status': self.status,
|
|
|
|
|
'processed_users': self.user_count,
|
|
|
|
|
'downloaded_videos': self.video_count,
|
|
|
|
|
'error': self.error_message,
|
|
|
|
|
'completed_at': datetime.now().isoformat()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@csrf_exempt
|
|
|
|
|
@require_http_methods(["POST"])
|
|
|
|
|
def start_recursive_fetch_videos(request):
|
|
|
|
|
"""启动异步视频获取任务"""
|
|
|
|
|
try:
|
|
|
|
|
# 导入traceback
|
|
|
|
|
import traceback
|
|
|
|
|
|
|
|
|
|
data = json.loads(request.body)
|
|
|
|
|
start_unique_id = data.get('start_unique_id')
|
|
|
|
|
start_sec_uid = data.get('start_sec_uid') # 保持参数一致性
|
|
|
|
|
|
|
|
|
|
# 如果提供了sec_uid但没提供start_sec_uid,使用sec_uid
|
|
|
|
|
if not start_sec_uid and data.get('sec_uid'):
|
|
|
|
|
start_sec_uid = data.get('sec_uid')
|
|
|
|
|
logger.info(f"使用sec_uid作为start_sec_uid: {start_sec_uid}")
|
|
|
|
|
|
|
|
|
|
mode = data.get('mode', 'by_users')
|
|
|
|
|
max_depth = int(data.get('max_depth', 3))
|
|
|
|
|
skip_user_profile = data.get('skip_user_profile', False)
|
|
|
|
|
max_videos_per_user = int(data.get('max_videos_per_user', 10))
|
|
|
|
|
|
|
|
|
|
# 检查必要参数
|
|
|
|
|
if not start_unique_id and not start_sec_uid:
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'code': 400,
|
|
|
|
|
'message': '参数错误',
|
|
|
|
|
'error': '请提供起始用户ID(start_unique_id)或用户secUid(start_sec_uid)'
|
|
|
|
|
}, json_dumps_params={'ensure_ascii': False})
|
|
|
|
|
|
|
|
|
|
# 确保目标用户数是整数
|
|
|
|
|
target_users = None
|
|
|
|
|
if 'target_users' in data and data['target_users']:
|
|
|
|
|
try:
|
|
|
|
|
target_users = int(data['target_users'])
|
|
|
|
|
except (ValueError, TypeError):
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'code': 400,
|
|
|
|
|
'message': '参数错误',
|
|
|
|
|
'error': 'target_users必须是整数'
|
|
|
|
|
}, json_dumps_params={'ensure_ascii': False})
|
|
|
|
|
|
|
|
|
|
# 生成任务ID
|
|
|
|
|
task_id = str(uuid.uuid4())
|
|
|
|
|
|
|
|
|
|
# 记录任务信息
|
|
|
|
|
logger.info(f"========================= 任务启动 =========================")
|
|
|
|
|
logger.info(f"任务ID: {task_id}")
|
|
|
|
|
logger.info(f"起始用户: {start_unique_id or '使用secUid'}")
|
|
|
|
|
logger.info(f"起始secUid: {start_sec_uid}")
|
|
|
|
|
logger.info(f"模式: {mode}")
|
|
|
|
|
logger.info(f"最大深度: {max_depth}")
|
|
|
|
|
logger.info(f"目标用户数: {target_users if target_users else '不限'}")
|
|
|
|
|
logger.info(f"跳过用户资料: {skip_user_profile}")
|
|
|
|
|
logger.info(f"每用户最大视频数: {max_videos_per_user}")
|
|
|
|
|
logger.info(f"========================================================")
|
|
|
|
|
|
|
|
|
|
# 创建任务参数字典
|
|
|
|
|
task_params = {
|
|
|
|
|
'start_unique_id': start_unique_id,
|
|
|
|
|
'start_sec_uid': start_sec_uid, # 保持一致的参数命名
|
|
|
|
|
'mode': mode,
|
|
|
|
|
'target_users': target_users,
|
|
|
|
|
'max_depth': max_depth,
|
|
|
|
|
'skip_user_profile': skip_user_profile,
|
|
|
|
|
'max_videos_per_user': max_videos_per_user
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# 初始化任务状态
|
|
|
|
|
set_task_status(task_id, {
|
|
|
|
|
'status': 'pending',
|
|
|
|
|
'progress': 0,
|
|
|
|
|
'message': '任务已提交,等待处理',
|
|
|
|
|
'result': None,
|
|
|
|
|
'error': None,
|
|
|
|
|
'created_at': datetime.now().isoformat(),
|
|
|
|
|
'params': task_params
|
|
|
|
|
})
|
2025-03-05 19:57:43 +08:00
|
|
|
|
|
2025-03-12 15:24:31 +08:00
|
|
|
|
# 启动异步任务 - 统一使用start_sec_uid参数名
|
|
|
|
|
async_fetch_videos_task.delay(
|
|
|
|
|
task_id,
|
|
|
|
|
start_unique_id,
|
|
|
|
|
max_depth,
|
|
|
|
|
target_users,
|
|
|
|
|
skip_user_profile,
|
|
|
|
|
start_sec_uid # 参数名保持一致
|
|
|
|
|
)
|
2025-03-05 19:57:43 +08:00
|
|
|
|
|
2025-03-12 15:24:31 +08:00
|
|
|
|
# 返回任务ID
|
2025-03-05 19:57:43 +08:00
|
|
|
|
return JsonResponse({
|
2025-03-12 15:24:31 +08:00
|
|
|
|
'code': 200,
|
|
|
|
|
'message': '任务已提交',
|
|
|
|
|
'data': {
|
|
|
|
|
'task_id': task_id,
|
|
|
|
|
'status': 'pending'
|
|
|
|
|
}
|
2025-03-05 19:57:43 +08:00
|
|
|
|
}, json_dumps_params={'ensure_ascii': False})
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
2025-03-12 15:24:31 +08:00
|
|
|
|
logger.error(f"启动异步任务失败: {str(e)}")
|
2025-03-05 19:57:43 +08:00
|
|
|
|
import traceback
|
2025-03-12 15:24:31 +08:00
|
|
|
|
logger.error(f"详细错误信息: {traceback.format_exc()}")
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'code': 500,
|
|
|
|
|
'message': '服务器错误',
|
|
|
|
|
'error': f'启动异步任务失败: {str(e)}'
|
|
|
|
|
}, json_dumps_params={'ensure_ascii': False})
|
|
|
|
|
|
|
|
|
|
@csrf_exempt
|
|
|
|
|
@require_http_methods(["GET"])
|
|
|
|
|
def check_fetch_task_status(request, task_id):
|
|
|
|
|
"""检查任务状态"""
|
|
|
|
|
task_info = get_task_status(task_id)
|
|
|
|
|
|
|
|
|
|
if not task_info:
|
2025-03-05 19:57:43 +08:00
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'error',
|
2025-03-12 15:24:31 +08:00
|
|
|
|
'message': '任务不存在'
|
2025-03-05 19:57:43 +08:00
|
|
|
|
}, json_dumps_params={'ensure_ascii': False})
|
2025-03-12 15:24:31 +08:00
|
|
|
|
|
|
|
|
|
# 构建响应数据
|
|
|
|
|
response_data = {
|
|
|
|
|
'status': task_info.get('status', 'unknown'),
|
|
|
|
|
'progress': task_info.get('progress', 0),
|
|
|
|
|
'message': task_info.get('message', ''),
|
|
|
|
|
'error': task_info.get('error'),
|
|
|
|
|
'created_at': task_info.get('created_at', '')
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return JsonResponse(response_data, json_dumps_params={'ensure_ascii': False})
|
|
|
|
|
|
|
|
|
|
@csrf_exempt
|
|
|
|
|
@require_http_methods(["GET"])
|
|
|
|
|
def get_fetch_task_result(request, task_id):
|
|
|
|
|
"""获取任务完整结果"""
|
|
|
|
|
task_info = get_task_status(task_id)
|
|
|
|
|
|
|
|
|
|
if not task_info:
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'error',
|
|
|
|
|
'message': '任务不存在'
|
|
|
|
|
}, json_dumps_params={'ensure_ascii': False})
|
|
|
|
|
|
|
|
|
|
if task_info['status'] != 'completed':
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'error',
|
|
|
|
|
'message': f'任务尚未完成,当前状态: {task_info["status"]}'
|
|
|
|
|
}, json_dumps_params={'ensure_ascii': False})
|
|
|
|
|
|
|
|
|
|
# 返回完整结果
|
|
|
|
|
return JsonResponse(task_info['result'], json_dumps_params={'ensure_ascii': False})
|
|
|
|
|
|
|
|
|
|
# 任务管理API
|
|
|
|
|
@csrf_exempt
|
|
|
|
|
@require_http_methods(["GET"])
|
|
|
|
|
def list_fetch_tasks(request):
|
|
|
|
|
"""列出所有任务"""
|
|
|
|
|
tasks = []
|
|
|
|
|
all_tasks = list_all_tasks()
|
|
|
|
|
|
|
|
|
|
for task_info in all_tasks:
|
|
|
|
|
tasks.append({
|
|
|
|
|
'task_id': task_info.get('task_id'),
|
|
|
|
|
'status': task_info.get('status'),
|
|
|
|
|
'progress': task_info.get('progress', 0),
|
|
|
|
|
'message': task_info.get('message', ''),
|
|
|
|
|
'created_at': task_info.get('created_at'),
|
|
|
|
|
'unique_id': task_info.get('params', {}).get('unique_id') if 'params' in task_info else None
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'tasks': sorted(tasks, key=lambda x: x['created_at'] if 'created_at' in x else '', reverse=True)
|
|
|
|
|
}, json_dumps_params={'ensure_ascii': False})
|
|
|
|
|
|
|
|
|
|
@csrf_exempt
|
|
|
|
|
@require_http_methods(["DELETE"])
|
|
|
|
|
def clean_completed_tasks(request):
|
|
|
|
|
"""清理已完成的任务"""
|
|
|
|
|
completed_count = 0
|
|
|
|
|
all_tasks = list_all_tasks()
|
|
|
|
|
|
|
|
|
|
for task_info in all_tasks:
|
|
|
|
|
task_id = task_info.get('task_id')
|
|
|
|
|
if task_info.get('status') in ['completed', 'failed']:
|
|
|
|
|
if 'created_at' in task_info and \
|
|
|
|
|
datetime.fromisoformat(task_info['created_at']) < datetime.now() - timedelta(days=1):
|
|
|
|
|
redis_client.delete(f"task_status:{task_id}")
|
|
|
|
|
completed_count += 1
|
|
|
|
|
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'success',
|
|
|
|
|
'message': f'已清理 {completed_count} 个完成超过24小时的任务'
|
|
|
|
|
}, json_dumps_params={'ensure_ascii': False})
|
|
|
|
|
|
|
|
|
|
@csrf_exempt
|
|
|
|
|
@require_http_methods(["POST"])
|
|
|
|
|
def fetch_videos_with_callback(request):
|
|
|
|
|
"""启动任务并设置回调URL,任务完成后自动发送结果"""
|
|
|
|
|
try:
|
|
|
|
|
data = json.loads(request.body)
|
|
|
|
|
callback_url = data.get('callback_url')
|
|
|
|
|
|
|
|
|
|
if not callback_url:
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'error',
|
|
|
|
|
'message': '必须提供callback_url参数'
|
|
|
|
|
}, json_dumps_params={'ensure_ascii': False})
|
|
|
|
|
|
|
|
|
|
task_id = str(uuid.uuid4())
|
|
|
|
|
|
|
|
|
|
# 使用Redis存储任务状态
|
|
|
|
|
set_task_status(task_id, {
|
|
|
|
|
'status': 'pending',
|
|
|
|
|
'progress': 0,
|
|
|
|
|
'message': '任务已创建,等待处理',
|
|
|
|
|
'error': None,
|
|
|
|
|
'callback_url': callback_url,
|
|
|
|
|
'params': data,
|
|
|
|
|
'created_at': datetime.now().isoformat()
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
# 启动任务
|
|
|
|
|
fetch_and_callback_task.delay(task_id, data)
|
|
|
|
|
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'accepted',
|
|
|
|
|
'message': '任务已启动,完成后将回调通知',
|
|
|
|
|
'task_id': task_id
|
|
|
|
|
}, json_dumps_params={'ensure_ascii': False})
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"启动任务失败: {str(e)}")
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'error',
|
|
|
|
|
'message': f'启动任务失败: {str(e)}'
|
|
|
|
|
}, json_dumps_params={'ensure_ascii': False})
|
|
|
|
|
|
|
|
|
|
@shared_task
|
|
|
|
|
def fetch_and_callback_task(task_id, data):
|
|
|
|
|
"""执行视频获取并回调通知"""
|
|
|
|
|
try:
|
|
|
|
|
# 更新任务状态为处理中
|
|
|
|
|
update_task_status(task_id, {
|
|
|
|
|
'status': 'processing',
|
|
|
|
|
'message': '正在处理中'
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
# 在任务开始时关闭可能存在的旧连接
|
|
|
|
|
from django.db import connections
|
|
|
|
|
for conn in connections.all():
|
|
|
|
|
conn.close()
|
|
|
|
|
|
|
|
|
|
fetcher = TiktokVideoFetcher(
|
|
|
|
|
start_unique_id=data.get('unique_id'),
|
|
|
|
|
max_depth=int(data.get('max_depth', 3)),
|
|
|
|
|
target_users=int(data.get('target_users')) if 'target_users' in data and data['target_users'] else None
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
if fetcher.initialize() and fetcher.process():
|
|
|
|
|
result = fetcher.get_result()
|
|
|
|
|
update_task_status(task_id, {
|
|
|
|
|
'status': 'completed',
|
|
|
|
|
'result': result
|
|
|
|
|
})
|
|
|
|
|
else:
|
|
|
|
|
update_task_status(task_id, {
|
|
|
|
|
'status': 'failed',
|
|
|
|
|
'error': fetcher.error_message
|
|
|
|
|
})
|
|
|
|
|
result = {
|
|
|
|
|
'status': 'error',
|
|
|
|
|
'message': fetcher.error_message
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# 发送回调通知
|
|
|
|
|
callback_url = get_task_status(task_id)['callback_url']
|
|
|
|
|
try:
|
|
|
|
|
response = requests.post(callback_url, json={
|
|
|
|
|
'task_id': task_id,
|
|
|
|
|
'result': result
|
|
|
|
|
}, timeout=30)
|
|
|
|
|
|
|
|
|
|
update_task_status(task_id, {
|
|
|
|
|
'callback_status': response.status_code
|
|
|
|
|
})
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"回调通知失败: {str(e)}")
|
|
|
|
|
update_task_status(task_id, {
|
|
|
|
|
'callback_status': 'failed',
|
|
|
|
|
'callback_error': str(e)
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
# 尝试发送错误回调
|
|
|
|
|
try:
|
|
|
|
|
callback_url = get_task_status(task_id)['callback_url']
|
|
|
|
|
requests.post(callback_url, json={
|
|
|
|
|
'task_id': task_id,
|
|
|
|
|
'status': 'error',
|
|
|
|
|
'message': str(e)
|
|
|
|
|
}, timeout=30)
|
|
|
|
|
except:
|
|
|
|
|
pass
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"任务执行失败: {str(e)}")
|
|
|
|
|
update_task_status(task_id, {
|
|
|
|
|
'status': 'failed',
|
|
|
|
|
'error': str(e)
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
# 尝试发送错误回调
|
|
|
|
|
try:
|
|
|
|
|
callback_url = get_task_status(task_id)['callback_url']
|
|
|
|
|
requests.post(callback_url, json={
|
|
|
|
|
'task_id': task_id,
|
|
|
|
|
'status': 'error',
|
|
|
|
|
'message': str(e)
|
|
|
|
|
}, timeout=30)
|
|
|
|
|
except:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
@csrf_exempt
|
|
|
|
|
@require_http_methods(["POST"])
|
|
|
|
|
def reset_task_status(request, task_id):
|
|
|
|
|
"""手动重置任务状态(临时解决方案)"""
|
|
|
|
|
try:
|
|
|
|
|
task_info = get_task_status(task_id)
|
|
|
|
|
|
|
|
|
|
if not task_info:
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'error',
|
|
|
|
|
'message': '任务不存在'
|
|
|
|
|
}, json_dumps_params={'ensure_ascii': False})
|
|
|
|
|
|
|
|
|
|
# 重置任务状态
|
|
|
|
|
set_task_status(task_id, {
|
|
|
|
|
'status': 'reset',
|
|
|
|
|
'message': '任务已手动重置',
|
|
|
|
|
'reset_time': datetime.now().isoformat(),
|
|
|
|
|
'original_status': task_info.get('status', 'unknown')
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'success',
|
|
|
|
|
'message': f'任务 {task_id} 已重置'
|
|
|
|
|
}, json_dumps_params={'ensure_ascii': False})
|
|
|
|
|
except Exception as e:
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'error',
|
|
|
|
|
'message': f'重置失败: {str(e)}'
|
|
|
|
|
}, json_dumps_params={'ensure_ascii': False})
|
|
|
|
|
|
|
|
|
|
@csrf_exempt
|
|
|
|
|
@require_http_methods(["GET"])
|
|
|
|
|
def check_task_status_detail(request, task_id):
|
|
|
|
|
"""获取任务详细状态信息(包含调试数据)"""
|
|
|
|
|
task_info = get_task_status(task_id)
|
|
|
|
|
|
|
|
|
|
if not task_info:
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
'status': 'error',
|
|
|
|
|
'message': '任务不存在'
|
|
|
|
|
}, json_dumps_params={'ensure_ascii': False})
|
|
|
|
|
|
|
|
|
|
# 添加系统诊断信息
|
|
|
|
|
try:
|
|
|
|
|
import celery.app.control
|
|
|
|
|
from automated_task_monitor.celery import app # 替换为您的Celery应用实例
|
|
|
|
|
|
|
|
|
|
# 获取活跃Worker
|
|
|
|
|
inspector = app.control.inspect()
|
|
|
|
|
active_workers = inspector.active()
|
|
|
|
|
active_tasks = inspector.active()
|
|
|
|
|
|
|
|
|
|
task_info['debug'] = {
|
|
|
|
|
'active_workers': bool(active_workers),
|
|
|
|
|
'worker_count': len(active_workers) if active_workers else 0,
|
|
|
|
|
'active_tasks': active_tasks,
|
|
|
|
|
'server_time': datetime.now().isoformat()
|
|
|
|
|
}
|
|
|
|
|
except Exception as e:
|
|
|
|
|
task_info['debug'] = {
|
|
|
|
|
'error': str(e),
|
|
|
|
|
'server_time': datetime.now().isoformat()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return JsonResponse(task_info, json_dumps_params={'ensure_ascii': False})
|
|
|
|
|
|