递归挖掘TikTok网络中的相关用户和热门内容
This commit is contained in:
parent
4ccfc764f7
commit
ab858bcfa8
BIN
media/tiktok_videos/blancedit/7427503274095332615.mp4
Normal file
BIN
media/tiktok_videos/blancedit/7427503274095332615.mp4
Normal file
Binary file not shown.
BIN
media/tiktok_videos/blancedit/7441411075314126098.mp4
Normal file
BIN
media/tiktok_videos/blancedit/7441411075314126098.mp4
Normal file
Binary file not shown.
BIN
media/tiktok_videos/blancedit/7441971406809746706.mp4
Normal file
BIN
media/tiktok_videos/blancedit/7441971406809746706.mp4
Normal file
Binary file not shown.
BIN
media/tiktok_videos/blancedit/7445005001262468370.mp4
Normal file
BIN
media/tiktok_videos/blancedit/7445005001262468370.mp4
Normal file
Binary file not shown.
BIN
media/tiktok_videos/blancedit/7447186510874430727.mp4
Normal file
BIN
media/tiktok_videos/blancedit/7447186510874430727.mp4
Normal file
Binary file not shown.
BIN
media/tiktok_videos/blancedit/7449215372668046610.mp4
Normal file
BIN
media/tiktok_videos/blancedit/7449215372668046610.mp4
Normal file
Binary file not shown.
BIN
media/tiktok_videos/blancedit/7450224315422756114.mp4
Normal file
BIN
media/tiktok_videos/blancedit/7450224315422756114.mp4
Normal file
Binary file not shown.
BIN
media/tiktok_videos/blancedit/7464708431677525254.mp4
Normal file
BIN
media/tiktok_videos/blancedit/7464708431677525254.mp4
Normal file
Binary file not shown.
BIN
media/tiktok_videos/blancedit/7467876751763410183.mp4
Normal file
BIN
media/tiktok_videos/blancedit/7467876751763410183.mp4
Normal file
Binary file not shown.
BIN
media/tiktok_videos/blancedit/7470548218070256904.mp4
Normal file
BIN
media/tiktok_videos/blancedit/7470548218070256904.mp4
Normal file
Binary file not shown.
BIN
media/tiktok_videos/feelinglikenagi/7419711425989414177.mp4
Normal file
BIN
media/tiktok_videos/feelinglikenagi/7419711425989414177.mp4
Normal file
Binary file not shown.
BIN
media/tiktok_videos/feelinglikenagi/7420445155892710689.mp4
Normal file
BIN
media/tiktok_videos/feelinglikenagi/7420445155892710689.mp4
Normal file
Binary file not shown.
BIN
media/tiktok_videos/feelinglikenagi/7421700063505698081.mp4
Normal file
BIN
media/tiktok_videos/feelinglikenagi/7421700063505698081.mp4
Normal file
Binary file not shown.
BIN
media/tiktok_videos/feelinglikenagi/7422837674676899104.mp4
Normal file
BIN
media/tiktok_videos/feelinglikenagi/7422837674676899104.mp4
Normal file
Binary file not shown.
BIN
media/tiktok_videos/feelinglikenagi/7424413668252716321.mp4
Normal file
BIN
media/tiktok_videos/feelinglikenagi/7424413668252716321.mp4
Normal file
Binary file not shown.
BIN
media/tiktok_videos/feelinglikenagi/7436301012924747040.mp4
Normal file
BIN
media/tiktok_videos/feelinglikenagi/7436301012924747040.mp4
Normal file
Binary file not shown.
BIN
media/tiktok_videos/feelinglikenagi/7447734031996718369.mp4
Normal file
BIN
media/tiktok_videos/feelinglikenagi/7447734031996718369.mp4
Normal file
Binary file not shown.
BIN
media/tiktok_videos/feelinglikenagi/7448951036456258848.mp4
Normal file
BIN
media/tiktok_videos/feelinglikenagi/7448951036456258848.mp4
Normal file
Binary file not shown.
BIN
media/tiktok_videos/feelinglikenagi/7453157776852815137.mp4
Normal file
BIN
media/tiktok_videos/feelinglikenagi/7453157776852815137.mp4
Normal file
Binary file not shown.
BIN
media/tiktok_videos/feelinglikenagi/7471140869517675798.mp4
Normal file
BIN
media/tiktok_videos/feelinglikenagi/7471140869517675798.mp4
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -17,4 +17,5 @@ urlpatterns = [
|
||||
path('directory-status/', views.get_directory_status, name='directory_status'),
|
||||
path('tiktok/user-videos/', views.get_tiktok_user_videos, name='get-tiktok-user-videos'),
|
||||
path('api/tiktok/fetch_videos/', views.fetch_tiktok_videos, name='fetch_tiktok_videos'),
|
||||
path('api/recursive_fetch_videos', views.recursive_fetch_videos, name='recursive_fetch_videos'),
|
||||
]
|
||||
|
250
monitor/views.py
250
monitor/views.py
@ -1359,3 +1359,253 @@ def download_video(video_id, unique_id, save_path):
|
||||
import traceback
|
||||
logger.error(f"详细错误: {traceback.format_exc()}")
|
||||
return False
|
||||
|
||||
def fetch_user_followings(sec_uid):
|
||||
"""获取用户关注列表"""
|
||||
url = f"{API_BASE_URL}/api/tiktok/web/fetch_user_follow?secUid={sec_uid}"
|
||||
|
||||
try:
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||
}
|
||||
response = requests.get(url, headers=headers, timeout=30)
|
||||
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
logger.info(f"成功获取用户关注列表,共 {len(data['data'].get('userList', []))} 个关注")
|
||||
return data
|
||||
else:
|
||||
logger.error(f"获取用户关注列表失败: {response.status_code}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"获取用户关注列表异常: {e}")
|
||||
return None
|
||||
|
||||
def filter_users_by_followers(user_list, min_followers=5000, max_followers=50000):
|
||||
"""筛选粉丝数在指定范围内的用户"""
|
||||
filtered_users = []
|
||||
|
||||
for user_data in user_list:
|
||||
try:
|
||||
follower_count = user_data.get('stats', {}).get('followerCount', 0)
|
||||
if min_followers <= follower_count <= max_followers:
|
||||
filtered_users.append(user_data)
|
||||
except Exception as e:
|
||||
logger.error(f"筛选用户时出错: {e}")
|
||||
|
||||
return filtered_users
|
||||
|
||||
@csrf_exempt
|
||||
@require_http_methods(["POST"])
|
||||
def recursive_fetch_videos(request):
|
||||
"""递归获取关注列表中的用户视频"""
|
||||
try:
|
||||
data = json.loads(request.body)
|
||||
start_unique_id = data.get('unique_id')
|
||||
max_depth = int(data.get('max_depth', 3)) # 默认递归深度为3
|
||||
|
||||
if not start_unique_id:
|
||||
return JsonResponse({
|
||||
'status': 'error',
|
||||
'message': '请提供起始TikTok用户ID(unique_id)'
|
||||
}, json_dumps_params={'ensure_ascii': False})
|
||||
|
||||
# 获取起始用户资料和secUid
|
||||
user_profile = fetch_user_profile(start_unique_id)
|
||||
if not user_profile or 'data' not in user_profile:
|
||||
return JsonResponse({
|
||||
'status': 'error',
|
||||
'message': f'无法获取用户 {start_unique_id} 的资料'
|
||||
}, json_dumps_params={'ensure_ascii': False})
|
||||
|
||||
# 提取secUid和其他用户信息
|
||||
try:
|
||||
user_info = user_profile['data']['userInfo']['user']
|
||||
start_sec_uid = user_info['secUid']
|
||||
|
||||
# 提取起始用户的详细信息
|
||||
nickname = user_info.get('nickname', '')
|
||||
signature = user_info.get('signature', '')
|
||||
avatar_url = user_info.get('avatarLarger', '')
|
||||
|
||||
# 提取统计信息
|
||||
stats = user_profile['data']['userInfo'].get('stats', {})
|
||||
follower_count = stats.get('followerCount', 0)
|
||||
following_count = stats.get('followingCount', 0)
|
||||
heart_count = stats.get('heartCount', 0) or stats.get('diggCount', 0)
|
||||
video_count = stats.get('videoCount', 0)
|
||||
|
||||
# 为起始用户创建目录
|
||||
start_user_dir = os.path.join(TIKTOK_VIDEOS_PATH, start_unique_id)
|
||||
os.makedirs(start_user_dir, exist_ok=True)
|
||||
|
||||
# 保存起始用户信息到数据库
|
||||
TiktokUserVideos.objects.update_or_create(
|
||||
sec_user_id=start_sec_uid,
|
||||
defaults={
|
||||
'nickname': nickname,
|
||||
'signature': signature,
|
||||
'follower_count': follower_count,
|
||||
'following_count': following_count,
|
||||
'total_favorited': heart_count,
|
||||
'video_count': video_count,
|
||||
'avatar_url': avatar_url,
|
||||
'videos_folder': start_user_dir
|
||||
}
|
||||
)
|
||||
|
||||
logger.info(f"成功获取并保存起始用户信息: {start_unique_id}, secUid: {start_sec_uid}")
|
||||
except (KeyError, TypeError) as e:
|
||||
logger.error(f"解析用户资料出错: {e}")
|
||||
return JsonResponse({
|
||||
'status': 'error',
|
||||
'message': f'解析用户资料出错: {str(e)}'
|
||||
}, json_dumps_params={'ensure_ascii': False})
|
||||
|
||||
# 开始递归获取视频
|
||||
all_downloaded_videos = []
|
||||
processed_users = set() # 已处理的用户集合,避免重复处理
|
||||
|
||||
def process_user(sec_uid, unique_id, depth=0):
|
||||
"""递归处理用户,获取视频和关注用户"""
|
||||
if depth >= max_depth or sec_uid in processed_users:
|
||||
return
|
||||
|
||||
processed_users.add(sec_uid)
|
||||
logger.info(f"处理用户 {unique_id},递归深度: {depth}")
|
||||
|
||||
# 确保用户目录存在
|
||||
user_dir = os.path.join(TIKTOK_VIDEOS_PATH, unique_id)
|
||||
os.makedirs(user_dir, exist_ok=True)
|
||||
|
||||
# 下载该用户的热门视频
|
||||
videos_data = fetch_user_videos(sec_uid)
|
||||
all_videos = []
|
||||
|
||||
if videos_data and isinstance(videos_data, dict) and 'data' in videos_data and 'itemList' in videos_data['data']:
|
||||
for video in videos_data['data']['itemList']:
|
||||
try:
|
||||
video_id = video.get('id', '')
|
||||
if not video_id or not str(video_id).isdigit():
|
||||
continue
|
||||
|
||||
stats = video.get('stats', {})
|
||||
if not isinstance(stats, dict):
|
||||
stats = {}
|
||||
|
||||
play_count = int(stats.get('playCount', 0))
|
||||
|
||||
all_videos.append({
|
||||
'id': video_id,
|
||||
'desc': video.get('desc', ''),
|
||||
'play_count': play_count
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"处理视频数据出错: {str(e)}")
|
||||
continue
|
||||
|
||||
# 按播放量排序并获取前10个
|
||||
all_videos.sort(key=lambda x: x['play_count'], reverse=True)
|
||||
top_videos = all_videos[:10]
|
||||
|
||||
# 下载视频
|
||||
downloaded_videos = []
|
||||
for i, video in enumerate(top_videos):
|
||||
video_id = video['id']
|
||||
save_path = os.path.join(user_dir, f"{video_id}.mp4")
|
||||
|
||||
logger.info(f"下载用户 {unique_id} 的第 {i+1} 个热门视频: {video_id}")
|
||||
if download_video(video_id, unique_id, save_path):
|
||||
video['download_path'] = save_path
|
||||
video['user_unique_id'] = unique_id
|
||||
downloaded_videos.append(video)
|
||||
all_downloaded_videos.append(video)
|
||||
|
||||
time.sleep(1) # 避免频繁请求
|
||||
|
||||
# 保存用户信息到数据库
|
||||
video_info_json = json.dumps([{
|
||||
'id': v['id'],
|
||||
'desc': v['desc'],
|
||||
'play_count': v['play_count']
|
||||
} for v in downloaded_videos], ensure_ascii=False)
|
||||
|
||||
TiktokUserVideos.objects.update_or_create(
|
||||
sec_user_id=sec_uid,
|
||||
defaults={
|
||||
'nickname': unique_id,
|
||||
'videos_folder': user_dir,
|
||||
'video_paths': video_info_json
|
||||
}
|
||||
)
|
||||
|
||||
# 获取关注列表
|
||||
followings_data = fetch_user_followings(sec_uid)
|
||||
if followings_data and 'data' in followings_data and 'userList' in followings_data['data']:
|
||||
user_list = followings_data['data']['userList']
|
||||
|
||||
# 筛选粉丝数在5000-50000之间的用户
|
||||
filtered_users = filter_users_by_followers(user_list, 5000, 50000)
|
||||
logger.info(f"用户 {unique_id} 的关注列表中有 {len(filtered_users)} 个粉丝数在5000-50000之间")
|
||||
|
||||
# 取前5个用户
|
||||
for user_data in filtered_users[:5]:
|
||||
try:
|
||||
# 直接从关注列表中提取用户信息
|
||||
user_obj = user_data['user']
|
||||
following_sec_uid = user_obj['secUid']
|
||||
following_unique_id = user_obj['uniqueId']
|
||||
|
||||
# 获取用户详细信息
|
||||
nickname = user_obj.get('nickname', '')
|
||||
signature = user_obj.get('signature', '')
|
||||
avatar_url = user_obj.get('avatarLarger', '')
|
||||
|
||||
# 获取统计信息
|
||||
stats = user_data.get('stats', {})
|
||||
follower_count = stats.get('followerCount', 0)
|
||||
following_count = stats.get('followingCount', 0)
|
||||
heart_count = stats.get('heartCount', 0)
|
||||
video_count = stats.get('videoCount', 0)
|
||||
|
||||
# 保存用户信息到数据库(即使尚未下载视频)
|
||||
follow_user_dir = os.path.join(TIKTOK_VIDEOS_PATH, following_unique_id)
|
||||
TiktokUserVideos.objects.update_or_create(
|
||||
sec_user_id=following_sec_uid,
|
||||
defaults={
|
||||
'nickname': nickname,
|
||||
'signature': signature,
|
||||
'follower_count': follower_count,
|
||||
'following_count': following_count,
|
||||
'total_favorited': heart_count,
|
||||
'video_count': video_count,
|
||||
'avatar_url': avatar_url,
|
||||
'videos_folder': follow_user_dir
|
||||
}
|
||||
)
|
||||
|
||||
# 递归处理关注的用户
|
||||
process_user(following_sec_uid, following_unique_id, depth + 1)
|
||||
except Exception as e:
|
||||
logger.error(f"处理关注用户时出错: {e}")
|
||||
continue
|
||||
|
||||
# 开始递归处理
|
||||
process_user(start_sec_uid, start_unique_id)
|
||||
|
||||
return JsonResponse({
|
||||
'status': 'success',
|
||||
'message': '递归获取视频完成',
|
||||
'processed_users_count': len(processed_users),
|
||||
'downloaded_videos_count': len(all_downloaded_videos),
|
||||
'downloaded_videos': [{'id': v['id'], 'desc': v['desc'][:50], 'play_count': v['play_count'], 'user': v['user_unique_id']} for v in all_downloaded_videos[:100]] # 只返回前100个视频信息,避免响应过大
|
||||
}, json_dumps_params={'ensure_ascii': False})
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"递归获取TikTok视频失败: {e}")
|
||||
import traceback
|
||||
logger.error(f"详细错误: {traceback.format_exc()}")
|
||||
return JsonResponse({
|
||||
'status': 'error',
|
||||
'message': f'递归获取TikTok视频失败: {str(e)}'
|
||||
}, json_dumps_params={'ensure_ascii': False})
|
||||
|
Loading…
Reference in New Issue
Block a user