From d956ce667801e8778f164578ee6fb6edffaa2ab7 Mon Sep 17 00:00:00 2001 From: Zixiao Wang Date: Mon, 31 Mar 2025 13:21:15 +0800 Subject: [PATCH] updated optical flow --- __pycache__/tasks.cpython-311.pyc | Bin 11186 -> 8597 bytes celery_debug.py | 47 +- tasks.py | 231 ++++-- .../video_processing.cpython-311.pyc | Bin 6315 -> 7650 bytes utils/video_processing.py | 663 ++++++++++++++++-- 5 files changed, 799 insertions(+), 142 deletions(-) diff --git a/__pycache__/tasks.cpython-311.pyc b/__pycache__/tasks.cpython-311.pyc index 13786d41936c02288779bdaa48949c9cdd3f5830..fdc9f6102289325535f0539eea55fdbd9a6a0ff4 100644 GIT binary patch delta 2222 zcmZuyTWl0n7(Qp_%(k7qbaz{Jx4Z0i7Ph9ARSiTmIl<+ppui6V|g{zpI78|X*BZktbSIV$qq>a znW0SXpdv3D4R(ykne6}D!qYZ5Y$(^SW`^?%T>CMid#ZJ{`-ot-wj z+sBEz30|^9GK1+{NldXv^bM!-2i0y+=LmoVL#<;Z=kW9=_F3Pjz5=@67)u8zqI;xD z7n@Iy0)QB##Uw2{Y5B{B91Z98T;HI-x7=P_L;mQLZ@unYe|@!1fAFlZ3-JC{aqhtMc%+urd+#Kw zK{qqBr|XSqPzxH7ZIe7bCDn8;E(jXV*Ua*v%7ld#B=eJ8Kerpns3439y)*Msd=wqy zccY^?SwR)wo=B3nlA(s-q>^hU*oxdIr}C+!NblCId!EHyO)_W#NZRH(R1IW~D1G_2 zhm{iUQ+pVv1t7;-kL6I3=r_TDe-~^y=$+gnSQJs2_=MH>DxI_(tQ8=VZZE9}rGl^u zCeDo-bt}!fl@l%qTBu& zMOJ{*;|!AR8aac`l2Io@1)J;uj&qVrW7!2wD$pDC(Tx@Jn_jNgDz3QCbI`i*n;vOg zsLb1EcaXBDn!Zq_VS3H$i)*|_w5mfDvsBqL$uG<H8eJ5u&0s6t zH3e}@gmhao{Y5%vXU%5iSM8zuq9cntV4vjFPMAvgP>#LcOt+~U>BDH_+LJ6*EZjw; zyX9%Qn$kThPJ2$T__ziZGcs#bLqJwH)2&Mae%8ZJuvj8_xM-3b%upTbeyS~bFWSdw zDF*BvR7fED@8e&k?D6avs1(F+!LIpuzt&&oQLy2AkKU{5gTo9#R^^zu^B`sZX&?H@ z()#6dQ7BcaN^UqilvC6MJftTnsy>)Y5Y;2bg~)T&}rPTFI$t=4SN)OE&#Fw#54;EDG$q&H})9iA(7pvBkDfRdpGK zsHY(~9$uy|f6)xDpXh-;aC&v`>Wd-(h~a289j&^f6*?jozp&=hHB(mLziGWw^WA=Z z=Nsn6l+kg(>^NYo>o?c+L&6NVY`Wz~JQs64MgY1)0%vDbk7ce$^zJ=o+g?~Q=}S*9 z#tlaXSofv4{!Ewd-^j$bX+h*e$%`#!XiZsQ@qJ`v=COsa_PZuF|K&h+;dAk;2|dtm z1lr9&yY62%(S6S!I^BJ0?1Qn1?)#3&&yI#49Su{F#;L_Ikjbs(3X*^f)7Rp1v@_Y+ zf^M~lT^|0{v+gbjf7^y(ecK@bCZAEa(Ng@lZ95C^VVX)D!#ijxab~4;L9shVWnrGK zEvq6~13#&JZZIp498q3SCtwTVhrgJwE;N3`jU3<>VOtr^!7{`N?*>i;^y)-0VBkg* sH4a=#+LTb2VMwW8QNyiUs? zk_gsy9+c=tH$bPGOL|Eb)_s-iM?ug2@uCBKe1H>#Xe`1ATx6Jwi45-n)+fZXPfWHp z=AjQd4=I^-*S)lMh>JzU(@cDjd6hfI4D#V&PGF+3oTSjkjMM$uS;p(z7;a4D!vkVy zI4(xxu@DG_vOCt>Bf$s_&|6_=!8a85C`JJBb_(pDNhsa%yP)zdUm9&>0CF&hQ%=717DPOWH~}n zb}a1{<6?LyH?5GSqQem(ZP!;0jfBP1{63HotbiD!@TGVX&nkuZKA>L3tTde21e)K} zp;D<06Tg~VKlS!IQhgfLr%-(l9CgztZoQ)PA5AXET-X6t{L8W>%s#l?U7mC--*wx}KTJaWz^69)gbG$nhI|h~7 zrBS;SYFEaBO6z6P+vat^N9zuDj6&#wzfsdie#Sk-NiWqaGzj(HMCYjS@B(!N2^2AyCXwi<*!?jhYNT0v$yJ{PHyOtyY9x10m9? z?{d?M7@4Fbh@TS0`f;Ru>Vqqic8M^wmZd^RF(}%fRd`v^V6ps%!egoN!v2w=s3_tP zdp62)@r@fdK4f9zf<(F?7lqgaAI33q+@7W2Sn55D*%<+gLzorg%(#6i)^2==eQ3P& z=>RS;qfv;`;|^vt&cE8t@dB&}i#dkFV`DqJCqckVLJXdm&_nPOdj}gGFhc)$-3ZU+ zBC@g5%L!tM>+c{h;PoC)pbR2w+{@@+zWT|osfrAVyrow>?|44)%r)L2zwW$eeKAIu(Bzf#@G^8_2H*5dohR z!{~d&?}hft&5<{ilr=75u3A77NU1zwz>NP+L^*U^*@Izf(5L~+UwMkk`AT3;7iPbX zW0%rp&@20k+EbfhSU<=#IUJ3pZ382T&_Fy96Zvge(8k3Q!yH&S$G2fltR)}B=ovsk z7ymMD>ucBsqeEOsJU7Cni4Y&c^uvJC-ob>J;9(@5TRZK^IS}8>39zF=+F~&AN3mEV zM#nHZ4oGkT0^7-zojc{<7nN+rR{uF3HbYi;jLm{-H8=lcM>e zRkv!?V7uz?(EJ@2Qx08$=V#U{?&l3q=`M}#Qs}M+uIiZ=6j!V2YSmn=8RWJ3Z}ntM zc_zDT_78%a=UUZZhZgL}n1SaJ_KF`dQk<0;Ct~WR20s#JTCXQROsZAQ7hhZ`Yf#pu zTGg^`TG_S?vbp>^nzYRO*UeZo|Fe^1#$@&S=hruVzWtBeKiM(6WAer6F4Y%Me1XRc z#Y_g7y&#iA3%*jNZ1>y%Ab>CX?|A=m`1WC=r~3A2zCDU>&thQHT+ys+p{8!S@4Ej( ze+qPMgh>D`PY&}h!DA>HGpvDY8&=W_`=D=zjvnD{`4OXK+RwGH_|I@{`l&%TkQ$X< ztI=x}dhMd_MsqvBm(g(qS+}>*6OIyDow(lja-*lRRa*s3v_e!X}71nzd zByLv|xXsi;`#xpwd4|02_V(11_p1rqt~UebeF#!+UYK!!I`}H|!zuYczKujiu2^%p z04`D%$qbi-8%Zl`nI>@XmoKfUGFw@=Fvyz-zx)~DmX$TtDO#dgn`C9}%efq1&Lj%w z{Bdx%B+WX3$92^dAy_v~wRnzPS$(;|FR!FcwB^R(4(5?;vjzE?BtpP-18a`tfBXHG zq(ibyTd3rP@d0($(ck!5GDTeVO`R(FFg`Vq-u$(5&cxfQg1bJ8p z!0Hf%ePIJ`)hKIPruwE{d#@iFS;!6QnTGELiSfc+_+`W#%s!Py#5P8Mr#vC-X-b5* zSI`7CL{UB=e^GL&<}3*D*Z}ox!FNOJX^m1XSR?#I($nL9q7Ync2``qELw2kR-tSY6 zoK;>EF$UN=iorJq0Euyl+mO^XPE!eiP#FGiW$B$!WkKc!s(oU4i8$wBO_dlh6%X zJEZP7rR_KcHl_N)nlJo_GzAKOM1YinI+)R0uLK5f9Z`0tj%eFof;NT|tkF;l27zm_ z?ir=IM=9^cP_{465~>@fC9S#{qA;8{qzu52GmLh5;iUB^C#tNO9-NCP6`g8Dr&iIa zl<%0N7s{)r>GwLXc23f|4&N87um7%K{Y=m7uhoJ!t)NZOVbNLjy|ey1XZ?ItV7{Vm zzOqT)RPky}x4D}}cWHa~2J&uUL3e<>yOzN1fEjRb6Mss+UGat`j7{N9xvO%F7?5vQ zzP}ki%XysUd3+d#`Qsa#afPHihU08vi0kA(02&+;639Pas=8t-puB_y_Bw|eN72c;Yy1xm Crrf&# diff --git a/celery_debug.py b/celery_debug.py index 7eecf8e..6050ce0 100644 --- a/celery_debug.py +++ b/celery_debug.py @@ -39,9 +39,10 @@ print("Backend:", app.conf.result_backend) print("Broker:", app.conf.broker_url) # Define the number of concurrent tasks -NUM_TASKS = 4 +NUM_TASKS = 1 delay_seconds = 0 -file_paths = [f"../video/1.mp4" for _ in range(NUM_TASKS)] +file_paths = [f"../video/film4.mp4" for _ in range(NUM_TASKS)] + # video_folder = "../video" # video_files = [f for f in os.listdir(video_folder) if f.endswith(('.mp4', '.avi', '.mov', '.mkv'))] @@ -70,32 +71,29 @@ file_paths = [f"../video/1.mp4" for _ in range(NUM_TASKS)] # for i in range(NUM_TASKS) # ] +# questions = [ +# f"Based on the given images and audio script, extract detailed information about the products recommended in the video and format the output as JSON with the following fields:\n" +# f"- Product Name: \n" +# f"- Category: \n" +# f"- Styles or Variants: \n" +# f"- Highlights: \n" +# f"- Promotional Details: \n" +# f"Do not include any disclaimers or comments like 'I'm sorry' or 'I can't assist.' Task {i}" +# for i in range(NUM_TASKS) +# ] + questions = [ - f"Based on the given images and audio script, extract detailed information about the products recommended in the video and format the output as JSON with the following fields:\n" - f"- Product Name: \n" - f"- Category: \n" - f"- Styles or Variants: \n" - f"- Highlights: \n" - f"- Promotional Details: \n" - f"Do not include any disclaimers or comments like 'I'm sorry' or 'I can't assist.' Task {i}" + f"Generate a screenplay based on the given video content and format the output as JSON with the following structured fields:\n" + f"- Scene Descriptions: \n" + f"- Character Introductions: \n" + f"- Dialogue: \n" + f"- Actions & Expressions: \n" + f"- Product Integrations: \n" + f"- Narrative Flow: \n" + f"Follow standard screenplay formatting for headers, character names, dialogue, and actions. Do not include disclaimers or comments like 'I can't assist.' Task {i}" for i in range(NUM_TASKS) ] -# questions = [ -# "Generate a screenplay based on the uploaded video, incorporating detailed elements such as dialogue, scene descriptions, and character actions. The screenplay should follow a structured format with the following components:\n" -# "1. **Scene Descriptions**: Provide a detailed visual setting for each scene, describing the background, lighting, and overall atmosphere.\n" -# "2. **Character Introductions**: Identify key characters, their appearance, and any notable traits or expressions.\n" -# "3. **Dialogue**: Transcribe or adapt spoken lines from the video into screenplay format, ensuring natural flow and emphasis on key moments.\n" -# "4. **Actions & Expressions**: Capture non-verbal cues, body language, and interactions between characters or with objects in the scene.\n" -# "5. **Product Integrations**: If the video features product recommendations, weave them naturally into the script, highlighting their name, category, features, and promotional details as part of the dialogue or narration.\n" -# "6. **Narrative Flow**: Ensure the screenplay has a coherent progression, with clear transitions between scenes, maintaining engagement and pacing similar to the video’s tone and style.\n\n" -# "Format the output as a properly structured screenplay:\n" -# "- Scene headers (INT./EXT. - LOCATION - TIME OF DAY)\n" -# "- Character names in uppercase\n" -# "- Dialogue centered\n" -# "- Actions and descriptions formatted accordingly" -# ] - # def submit_task(question, index): # sends tasks to Celery asynchronously, queues the tasks in Celery broker. If multiple Celery workers, they process tasks in parallel. # """ Submits a Celery task with increasing delay """ # countdown_time = index * delay_seconds # Dynamic delay @@ -141,6 +139,7 @@ def submit_task(file_paths, questions, batch_size=4): # Get the current batch of file paths and questions batch_file_paths = file_paths[i:i + batch_size] batch_questions = questions[i:i + batch_size] + print(f"batch file paths: {batch_file_paths}") # Create preprocessing tasks for the current batch preprocessing_tasks = [ diff --git a/tasks.py b/tasks.py index a23c34e..96590dc 100644 --- a/tasks.py +++ b/tasks.py @@ -153,21 +153,104 @@ from pipeline_setup import pipe from utils.image_processing import encode_image_base64 from concurrent.futures import ThreadPoolExecutor, as_completed -def process_segment(segment_data): - segment_path, segment_idx, total_segments = segment_data - print(f"Processing segment {segment_idx+1}/{total_segments}: {segment_path}") +# def process_segment(segment_data): +# segment_path, segment_idx, total_segments = segment_data +# print(f"Processing segment {segment_idx+1}/{total_segments}: {segment_path}") - imgs = extract_motion_key_frames(segment_path, max_frames=20, sigma_multiplier=4) - print(f"length of key frames in segments: {len(imgs)}") - print(f"Segment {segment_idx+1}: extract_motion_key_frames finished.") +# imgs = extract_motion_key_frames(segment_path, max_frames=20, sigma_multiplier=4) +# print(f"length of key frames in segments: {len(imgs)}") +# print(f"Segment {segment_idx+1}: extract_motion_key_frames finished.") - audio_path = extract_audio_from_video(segment_path) - print(f"Segment {segment_idx+1}: extract_audio_from_video finished.") +# audio_path = extract_audio_from_video(segment_path) +# print(f"Segment {segment_idx+1}: extract_audio_from_video finished.") +# transcribed_text = transcribe_audio(audio_path) +# print(f"Segment {segment_idx+1}: transcribe_audio finished.") + +# return { +# "segment_path": segment_path, +# "key_frames": [encode_image_base64(img) for img in imgs], +# "transcribed_text": transcribed_text +# } + +# @app.task(name="tasks.preprocess_video") +# def preprocess_video(video_path, question): +# try: +# # Monitor CPU usage +# # cpu_usage = psutil.cpu_percent(interval=1) +# # print(f"CPU Usage during preprocessing: {cpu_usage}%") + +# print(f"Preprocessing video: {video_path}") + +# if not os.path.exists(video_path): +# return {"query": question, "error": "Video file not found."} + +# # Determine the file type +# file_type, _ = mimetypes.guess_type(video_path) +# if file_type is None or not file_type.startswith("video/"): +# return {"query": question, "error": "Unsupported video file type."} + +# print("Splitting video...") +# segments = split_video_into_segments(video_path, segment_duration=100) +# print(f"segments: {segments}") +# print(f"Video split into {len(segments)} segments.") + +# # Process segments in parallel +# processed_segments = [] +# max_workers = min(len(segments), os.cpu_count() * 2) + +# print(f"Processing segments with {max_workers} workers...") + +# with ThreadPoolExecutor(max_workers=max_workers) as executor: +# future_to_segment = { +# executor.submit(process_segment, (segment_path, idx, len(segments))): idx +# for idx, segment_path in enumerate(segments) +# } + +# # Collect results as they complete +# segment_results = [None] * len(segments) +# for future in as_completed(future_to_segment): +# idx = future_to_segment[future] +# try: +# segment_results[idx] = future.result() +# except Exception as e: +# print(f"Error processing segment {idx}: {str(e)}") +# segment_results[idx] = { +# "segment_path": segments[idx], +# "error": str(e) +# } + +# print("multithread done!") + +# processed_segments = [result for result in segment_results if "error" not in result] + +# return { +# "video_path": video_path, +# "question": question, +# "processed_segments": processed_segments +# } + +# except Exception as e: +# return {"query": question, "error": str(e)} + +def process_video(video_path): + print(f"Processing video: {video_path}") + + # Extract key frames from the entire video + imgs = extract_motion_key_frames(video_path, max_frames=20, sigma_multiplier=2) + print(f"Number of key frames extracted: {len(imgs)}") + print("Key frame extraction finished.") + + # Extract audio from the video + audio_path = extract_audio_from_video(video_path) + print("Audio extraction finished.") + + # Transcribe the extracted audio transcribed_text = transcribe_audio(audio_path) - print(f"Segment {segment_idx+1}: transcribe_audio finished.") - + print(transcribed_text) + print("Audio transcription finished.") + return { - "segment_path": segment_path, + "video_path": video_path, "key_frames": [encode_image_base64(img) for img in imgs], "transcribed_text": transcribed_text } @@ -175,12 +258,9 @@ def process_segment(segment_data): @app.task(name="tasks.preprocess_video") def preprocess_video(video_path, question): try: - # Monitor CPU usage - # cpu_usage = psutil.cpu_percent(interval=1) - # print(f"CPU Usage during preprocessing: {cpu_usage}%") - print(f"Preprocessing video: {video_path}") + # Check if the video file exists if not os.path.exists(video_path): return {"query": question, "error": "Video file not found."} @@ -189,49 +269,18 @@ def preprocess_video(video_path, question): if file_type is None or not file_type.startswith("video/"): return {"query": question, "error": "Unsupported video file type."} - print("Splitting video...") - segments = split_video_into_segments(video_path, segment_duration=100) - print(f"segments: {segments}") - print(f"Video split into {len(segments)} segments.") - - # Process segments in parallel - processed_segments = [] - max_workers = min(len(segments), os.cpu_count() * 2) - - print(f"Processing segments with {max_workers} workers...") - - with ThreadPoolExecutor(max_workers=max_workers) as executor: - future_to_segment = { - executor.submit(process_segment, (segment_path, idx, len(segments))): idx - for idx, segment_path in enumerate(segments) - } - - # Collect results as they complete - segment_results = [None] * len(segments) - for future in as_completed(future_to_segment): - idx = future_to_segment[future] - try: - segment_results[idx] = future.result() - except Exception as e: - print(f"Error processing segment {idx}: {str(e)}") - segment_results[idx] = { - "segment_path": segments[idx], - "error": str(e) - } - - print("multithread done!") - - processed_segments = [result for result in segment_results if "error" not in result] + # Process the entire video without splitting into segments + processed_data = process_video(video_path) return { "video_path": video_path, "question": question, - "processed_segments": processed_segments + "processed_data": processed_data } except Exception as e: return {"query": question, "error": str(e)} - + # @app.task(name="tasks.inference_video") # def inference_video(preprocessed_data): # try: @@ -282,6 +331,53 @@ def preprocess_video(video_path, question): # return {"query": question, "error": str(e)} +# @app.task(name="tasks.inference_video") +# def inference_video(preprocessed_results): +# """ +# Processes a batch of preprocessed videos on the GPU. +# """ +# try: +# print("Running inference on a batch of videos...") +# aggregated_results = [] +# for preprocessed_data in preprocessed_results: +# video_path = preprocessed_data["video_path"] +# question = preprocessed_data["question"] +# segments = preprocessed_data["processed_segments"] + +# print(f"Inferencing video: {video_path}") + +# # Run inference on the GPU +# aggregated_responses = [] +# for segment in segments: +# # Prepare input for inference +# question_with_frames = "".join( +# [f"Frame{j+1}: {{IMAGE_TOKEN}}\n" for j in range(len(segment["key_frames"]))] +# ) +# question_with_frames += f"Audio Transcript: {segment['transcribed_text']}\n{question}" + +# content = [{"type": "text", "text": question_with_frames}] + [ +# {"type": "image_url", "image_url": {"max_dynamic_patch": 1, "url": f"data:image/jpeg;base64,{img}"}} +# for img in segment["key_frames"] +# ] + +# # Query model +# messages = [dict(role="user", content=content)] +# response = pipe(messages) + +# # Aggregate response +# aggregated_responses.append(response.text) + +# aggregated_results.append({ +# "video_path": video_path, +# "question": question, +# "responses": aggregated_responses +# }) + +# return aggregated_results + +# except Exception as e: +# return {"error": str(e)} + @app.task(name="tasks.inference_video") def inference_video(preprocessed_results): """ @@ -293,35 +389,30 @@ def inference_video(preprocessed_results): for preprocessed_data in preprocessed_results: video_path = preprocessed_data["video_path"] question = preprocessed_data["question"] - segments = preprocessed_data["processed_segments"] + processed_data = preprocessed_data["processed_data"] + # print(f"processed_data: {processed_data}") print(f"Inferencing video: {video_path}") - # Run inference on the GPU - aggregated_responses = [] - for segment in segments: - # Prepare input for inference - question_with_frames = "".join( - [f"Frame{j+1}: {{IMAGE_TOKEN}}\n" for j in range(len(segment["key_frames"]))] - ) - question_with_frames += f"Audio Transcript: {segment['transcribed_text']}\n{question}" + # Prepare input for inference + question_with_frames = "".join( + [f"Frame{j+1}: {{IMAGE_TOKEN}}\n" for j in range(len(processed_data["key_frames"]))] + ) + question_with_frames += f"Audio Transcript: {processed_data['transcribed_text']}\n{question}" - content = [{"type": "text", "text": question_with_frames}] + [ - {"type": "image_url", "image_url": {"max_dynamic_patch": 1, "url": f"data:image/jpeg;base64,{img}"}} - for img in segment["key_frames"] - ] + content = [{"type": "text", "text": question_with_frames}] + [ + {"type": "image_url", "image_url": {"max_dynamic_patch": 1, "url": f"data:image/jpeg;base64,{img}"}} + for img in processed_data["key_frames"] + ] - # Query model - messages = [dict(role="user", content=content)] - response = pipe(messages) - - # Aggregate response - aggregated_responses.append(response.text) + # Query model + messages = [dict(role="user", content=content)] + response = pipe(messages) aggregated_results.append({ "video_path": video_path, "question": question, - "responses": aggregated_responses + "response": response.text }) return aggregated_results diff --git a/utils/__pycache__/video_processing.cpython-311.pyc b/utils/__pycache__/video_processing.cpython-311.pyc index 68aca97e2039c4e52fa957ecb690c4b2dc2bf2ab..ef171bd63572a45a602c62f33196b611d458e109 100644 GIT binary patch literal 7650 zcmb_BYituqn$_;M({9^IZ09)y#vvgg5F8j75^{(k1OiMF5+E}Y-kCGT-ASDIrS3Kl ztF7~`xFsM?lNo`?-QH-u60jM;N{loo<;R_bPP@`*rIor-OJlVZ32AjdxF4si*pWu_ z>%OY?s}tCplde+!x~jVB>#FZvUw-59a0tq&?*E$Gy@1f4aHm{srN-+!(0G7Yt; zGM`k4B7IxPMxJztfu}8QPdGvj3eTbAbqO}ak~R}}Cb$rnaD`l?Z;!hZo{%R|AF3yP zN4z1?7-~#-Ltf}N^6X72bw3F&qc94cbs?0q!)`IPwv@*(@}x3Kp*fUuEV-7bB{WO(%uV(VC;Ka} zk#+X_=#F#Axx~%dc>7IOZm!Pb9hMW>t>?W>Jq8*-snGf4CcL>ixwVQ*sk5#=i9YSi zv5KSMuFk2p6t>{0_Np!G&aD_Jba#&v@pbrbvt%S)UFZOkm}|D{^d)qSqR=Ho!JoXn zikpYsY+k20KslV*2`&KTVmcm^r9dQb6>CBumXuQg$$;4@-G zj-`@;j1)`G1;)k_X<;r%&}97saw0uIkc7+YXF%gv;d)Gx!>Pq+MOI;Vz;}ds_xcY2 z?$ZyD49{|@YDA%;2jzG8d7I`b1^p7t$d{Y6B}eff8NB_r9k^VPL(qp!+QQe>zP-&@ zY0k5}Q=#TuR-Ia?Xp~~(xu>ozRK+I8ea>J@rw?bDU?P3UaLmqPGG%T1Zon^=rT0k^ zG-UWi4JMjO-@s(DN9Tp;VxPmXr6hw%N91{fSxCi_1}({=!AY4bX)zTQB*~z~OcLgC zk{l7`upCPWhP(X9*;rgK>cCJXB1zuhQW-g&k;8mUG#blO$~-hUlUMjt5;g@O3;BHh z#{&cNse~|)f^QE<@_-y$lv9i0k+TCCITn{n3@+^jgdmu{VKfNWWib+!%Y4tiCt;xq zn;*auRemD{kH%BcNL)G=+_dIA>_8F${E$cMt?l=&Jy5<-3LmQb$Jd4dm{n_?&|4># z+%Mft%Y4z@xV)$6_TE3Zoc;WRdmljmewW(PqxpJuUoU>!v(mrea`?HWy6@|e(|@;q z`C5Tj*}WROS7-M^qu8)bWw*VCuOf?dVsa4aJPM8bwg+H&6!d9Zj=F6tz{u8en^q{R zJbZ4u+kO#!g@CmVU|u37_c4YoxL<6C)mSM^$07!xD`BgQsV}|@jUTzAanKm-ZH{I4 z{RB!@r~fTRHV6QSxPfRtR|PK{-`Sv%r}?v~J5wuDt0B!jq`QYUXv-wA6V{4Y=gwta zWhEXCaD5FTCWpZ$B8vh@69{4S>U)O!V{9@;BWXD!3I+{~Ht0D)Hr%5l)8Xk0Q`6zG z>5E2FwRd4;@>FZ5ZrMx7|cg@`1GSW6AN7)j0vBK87?Ba%)FAiWMzkTP-Eu%|`9 z+~*dtixshHgRe)@20fdW47Z$;Bk}O87)c0{QE&DtB3@QS$*7yn;7v-Pjie#=U}OOd zIsi!6USGZpDuB!JezTW(Y>x|>(_%c_RQJFm& zvqxw4K;wG{a@FhHJ1fH)*Q0Yi`O$UGvlRVo?#|r(s18zF->cX6YTRy}+YRshEz8n_ zYhPUZQqtcY)LMu1)*;P*ME4)bPZXOw3!_?dZ~k17sk=4)^YOo*yg8YleA(d5pV?qI zd&dU)YvB7f#C2%S?YeXO26EW<71}mz79pt0%zy6aDvUgmR*z}D!+P(qdUQhDF{$sE z+_1xfKV$j&3kINGENbm43~DWV^_IOGh;r^DU}>!AZXyCV_N&Q%p8Th&f0@$U{kpqf zC3S$p?{#`$Fa(~_L4pIOuN{^KgyZzVC0f0lQaaTNLbr9fUD**xoWfdp2+0EoA3l`gE zEIggFDO8~b7ks)@z{xOA?N*}#->u!1L$Uo5eYpjzVgpB1m%Tvz@+s3a2O>D`4a|zE zM9I;cE!?x5}t=1;9N&f3>SZ%_WiBICR@@$-r0cM7NS6BX?z zGY40PRc;Wf#tiAqkje}dUCk=fOsuX&6tVxodwzWw*o|)tH^KjGi9{*h2IJJ0ahqa; zIJ6Za_ZnfT##9QeY>Jv7#%zPwnyI~M@N{j2q-+v0t60L<#&UMc*D#MrQEZBxr?VVF z*zuVKk5zk?$gdJ(S)=frLtzS-Dq9Lfw{XhpJqXTnT46u|nCu8qfYC@inu$kbK?=-+ zTTRV@s|fJ1*;zppk`Pb@K=?rDuSDeNJdSrwS7OTh7N%f3@_GV5DJ1B$YIqHiDa)}c z7Q#w62oiQ)L}C{p1nKRAhrvd#%A=_`TsiAUrzWN@gioBl@ZRYQBbUCiiM#LwySYJ( zu_V!AFSN5m5LTH74Oh+ykze_oVRFD~_*D%WwgV~&qsSxGH)Gbd%>9G+juz~XB)#{D z<{#Gm!P*0WCY*`;)`26J!g9iY=2T=Jy0cp8E6I~uNndA zD+EmLvz18%Cp`nagaJYl26%cy`v0Ry08`|UNfB>ICG|Ofzw2Iqfmyk_N)@Ellh59J z*1hIk8(m{H--zxTF=qiJF_8t&R-A@kky8Zu-fFD;lhV32RdAZ#c)}`ZNpV%wPa8+gRAM|5DUy7E~W|W z{wcB|{tTA2VN$F?1EL!a`9GpS48C;%(0ntxZzey!;XuBhQE={W7XfJAknRnE6@+LL zQTCk#EHUe>>#k>+Upc9+N>H4X0QgYvMITc zvs_AfT5-I|f7hlb;G*jQa}B$yrX+H8iv7*$oz3ZpH=E4IaM)^ECC5T41u5(nH^PET zaMn5j-g-9HYIy(mlUOLTX|=*w&j>E%Vvds=V5_B~a0-)!Ht)djui@In5tqWkTGv0= zsxBwzmc3=XkQ{JZPm=Q}&O%Kxt!AdjIuo4DOl@ju0jD)cQJmJ9xq8dIN_uF^yauHn z++#x)qzIog*9ctj%G*k4;jwuIP%7~Wh<47G^DBO(agn%Q@rs2)Uh8fYYLOrmr)pD0 z3l(b1oz?FxR44^>^qJJJQc@1o%=DGpRt{LdooiCO1?)$+6s5^J6R4Y1yd;)YYGYX` zuqfOp`DrtvEhn$8z=e_zKvY^WkF}oO^ywc-;PXvyU(I5WAYe8k$$}WbhB+gXJeRqG z_#p^W>8N}?>l=y3AsU5CXK4q+0ZWvCEeVMdu>UXl!uQ|q3l*w*d*EwAB%aCwPc3h2 z6o*yNQX3w)mjA7t32`AAw2v9I3hGxwE)<3tK4`ociz zlFxZl=x_w^OUD5bgbtp|r8@o5KSu8^tTY#zA8uPc|8U2%16t>yXTmp&&pW?KeoJYG z&#HkBwfb?re!Q}XSm|uX%)C&dZ8DR%0=%fiW5WS*s=g)>n^zHsg5r4$$c=!wRVSn# zO1`o#A^`%(Owoy0lK3$LE(tQuD%g@~gGmSxl33#nrvMWL$Z85Aw*Tg}A|isTCB9o6 z!-Jk`BzEES3DY|_o0knP5xHJ=%8gQNE)fYQAZZbU{1^N~q26@hkiHPatC6_D5xEPS zY%uQ}hsg)Z^Y|DD7_198Vppdq8s!=ZP-TMa zP3#}T&*l6WEZP39%CwVD9R96;_2aL$J>8~qhoNfBpw0}c%;0)!$IAIfbYbk*u7@tj zm680*24d_(1T4AMn_5>~g$1o`KS+1mp?{68^?bef&7#(HLT@^;=4r}b7&JL=iLWaoezgMTW?b8~AdSftu zcD=!S_o~{mS8M3k8~XESijD1RNB`<|tz%GY9L%4Ev-p}I-_*WS?HbbBkLc}3AbDf& zBH*?M{!uavH_R313sUT5Bz2pdEZ|J%f{6TKAYahsk#J7H>_ounbmi@k#vqA8KD>bc>IrjpT& z2-h_^SW4rOtf84MY%ulBOpZfBTEw1PBw-%T(UD6%F#$vp$z6+B8zQ(rvPTmsJ`)#? ziNAo!IBJ*v8vvxHD5{9KJozgkXP*2OkykC(BJ!!_T13sNwHDD1)mn?FOMO!_x?T{W)Y<2o8M%98Aj+YxZ)&Fqwu&0h1*C#0+mDa z_M4igNZm8s(4fici3sM zS=Ky)=TrI)jJn&LPa)Ooj6`S=uS1fC?!dge&AAm?4LW@YX>_e+)=s<1?35RMPT9SZ zyH;WT;dT9g*ScQKt1;yeLKPoS8*t{dzVATgZnJ?x8C3DB@n!=O+_d@Jlxx%-kk;L% z#Ll5NuU7)<&UCa$v=KMdfR3jV?1xti{00fW4|nZKB`ElXfDqJ>z^Z$9W@-U94MND( zOYQ3Ags>LOget7Me`k%wh*Q>B(1*xwebD^1HfKcZTMN2Zr_ejA>cO26LYwA)dM(iX z_VNsyfONT-%c@F(Pb_0aPh<b zz9?pf$TH>lu$nIplSPuw{3~EA0r7fPQIpb=jJ4YeH2RWdW+-2L$?~Psc#*Q_=qmiP zW%{V1KtY9n%3@MdaTQxUg|X!5rPnWBvHY@_6M03nyfRA)Pi_Sv^Zz&bT%hi{*;u@ z^94b{>WsIvk-mFjctOgG!xC^Xtf<3kc1e|%k|UGDB{iE<8hh2?2=;TRxN7ZHusS4d zKCmt{_fmciR>!8JoC5b#McV-s>#t62F}@qq>ul|c&OB={&zj7$Q25q|LJ^bQw{g;7 z51Z`a>gX05UQ4~3c{}q#%IrF9MEcE0zrhZe>;Uw|yVjN4EAOv-sF=^ZVsxJ{yH6PL zSIzjV)tP$d!P=NFGZ?|o5M&QS0;Kw?7{>lW`hBr5s$DlPqlbx zguNp6UynbB{H^#~9T+`sAn?gezYo41sd!8y2_Cl>*o`L@@Vm7px;V4&TW$Jn1la2G z`d;^%8twM1?%wZ59=9*A_ypz#(tPKTyZ!CB3UPsx7Mm2^j0>I}H>+YqZKTB}a=3ll z+Z}81wT?7+@EZXaXTDa>ZpPiTyVuxFUBwRw6lg^tH}x;0N>J@|D1)5|{s68pyFIa+ ziV6$p0zWr36e@)J&DT(E_k%KEeO%mVI;cw^yQC zyB1y|)*xSP-7_uXo~cnwgamn9?c1ScG3J!NU!%0BON*LO#owZ_64N3zY_3lwE#{^P zuy;X=0Ja9pV+0h(uB$Si0(4yxR};2}hnJL3IB0tf@J2`gpynlltKf0Pf|6BNZBR|M z7(59mF~N`j3$okb0Eo!`Mp)?zdYZ=XW)%X+mT$BKnv_Nhl;9C)fq}mlZ|WWJ{&$g=mI4>8(YkX?S@)Q8N zl~R7tXR%~clSLjNJy?{*fU!t zt6liwn0ai>=pHw_$Je;6wwT#=WOMSK_Tb7_$Mo5s>X%>FfA({I{ulb2|Egb08Et~u zCaf`A(fF;d4W>r@d(epXn$g}hU%jn!UDMn8^gy3{6h2183*j1_Qm?yI7kg`>x>zM| zLqxBkH(#z$F4b$Qp+;%cVpClOw1WTwRfcv~Z;PL?d($4bg1_f^=Qt=GEkBP5#9MDu zN!$DHlSL(4hL}%T;EN)h_dG+-TVC5#F%6Av`HnrEgD4gPt>T&T(Z6Fuc7~sVRg~8t zt0Fy|ZalNE|6A&v#f{Ed=STZDfBI3+y%&swFWnPAUwUxx^TJn@aePuwOc{}BGcw(r zL`3WoE7oly){?Tad=r&7Q$~5cgm-s zu3)#DNK~st6Xo+z`v-YFi&yD|>NIAh`cifje4c{j_Cr@BX=xjbC z&)_zqjX0!&t=yend(C!~dw(_iV7A(VHWm z_&)XN+_1q7o7`}9tRCNU`|SH?KYn>L`q}6`?-X zu7|`RHva|e!00$_cATzW08ZnbxB54Z8nFR0Hc*|mQ6g+a51Y}$)v0>>e!XYt-aez} z6o{ZY2|UF*;LzC955m82|M-K04|*T;{_&(SFm4Wv8$Ap#ys8z{h>H#orBoG<)tbZ@0qPw_pRZN`#ySZv-CyEJU(Ico;Q2X z+frN`t#e)9a0kBT4s4_huGi#xb*}fx!%(NrbSgMl{B!KkIS>7%hdJl-e;K48f65(U z`+NKVvfHQ?v4masI&f!)M>f(9f<1L-=fb_+CpU=e=?XaSTFY9_nt**Cf%(+yMmYFx z6FcDy=hHmDK_Biq4^l7_P|#nVYmG}2JihC+BU0i88uvQIRiSh(0XGT>g@@3K$G1bO zm@i5)FRwb2;O3!m^Ru5^nI;0|=144*}0j2pI-Op;3xpLiYURRQ=DxW`M@w^;Im zcuie!L4~!)@h0{F>}>5z5V;7`Ux&Z42w4?<8$e!GXZsCiz+?t=W?+lk16ox(X>da( zH&h+lVuG#Rbz%lg@U59&&8+XMja6rwwHfVUsPTI2Z31Ui;g%;3?L44haOo!4X zd}F2%Y$7*3#EBB-3C`MZjU&|W0;ltmP|AsCci8Mq#bsw1{aesvVAl3yJ~ zb?2$0nC?7v)Tz6lI_lBgPaXB?-}ltf3%dK+W`0C threshold and len(key_frames) < max_frames: - img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) - key_frames.append(img) - - return key_frames - def extract_audio_from_video(video_path): """ Extract audio from video using pydub and save as a temporary audio file. @@ -93,3 +112,551 @@ def extract_audio_from_video(video_path): audio.export(audio_path, format="wav") print(f"Audio extracted and saved to: {audio_path}") return audio_path + +############################################################################################################ +# optical motion, multithread, calculates motion between consecutive frames using dense optical flow (Farneback) only +# def extract_motion_key_frames(video_path, max_frames=20, sigma_multiplier=2, frame_interval=1): +# """ +# Extracts key frames from a video based on motion intensity. +# """ +# def calculate_motion(frame_pair): +# """ +# Calculates motion between two consecutive frames using optical flow. +# """ +# prev_gray, current_frame = frame_pair +# current_gray = cv2.cvtColor(current_frame, cv2.COLOR_BGR2GRAY) +# flow = cv2.calcOpticalFlowFarneback(prev_gray, current_gray, None, 0.5, 3, 15, 3, 5, 1.2, 0) +# motion = np.sum(flow ** 2) +# return motion, current_gray + +# # Load video frames using Decord +# video = VideoReader(video_path, ctx=cpu(0)) +# frames_batch = video.get_batch(range(0, len(video), frame_interval)).asnumpy() + +# # Resize frames for faster processing +# frames = [cv2.resize(frame, (frame.shape[1] // 2, frame.shape[0] // 2)) for frame in frames_batch] + +# # Initialize the first frame +# prev_gray = cv2.cvtColor(frames[0], cv2.COLOR_BGR2GRAY) +# frame_pairs = [(prev_gray, frames[i]) for i in range(1, len(frames))] + +# # Calculate motion statistics +# motion_values = [] +# with ThreadPoolExecutor() as executor: +# motion_results = list(executor.map(calculate_motion, frame_pairs)) +# motion_values = [motion for motion, _ in motion_results] + +# # Calculate threshold statistically +# motion_mean = np.mean(motion_values) +# motion_std = np.std(motion_values) +# threshold = motion_mean + sigma_multiplier * motion_std + +# # Extract key frames based on motion threshold +# key_frames = [] +# for i, (motion, frame) in enumerate(zip(motion_values, frames[1:])): +# if motion > threshold and len(key_frames) < max_frames: +# img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) +# key_frames.append(img) + +# return key_frames + +############################################################################################################ +# multithreading with bactch +# def extract_motion_key_frames(video_path, max_frames=20, sigma_multiplier=2, frame_interval=1): +# """ +# Extracts key frames from a video based on motion intensity. +# Optimized for speed and efficiency. +# """ +# def calculate_motion(frame_pair): +# """ +# Calculates motion between two consecutive frames using optical flow. +# """ +# prev_gray, current_frame = frame_pair +# current_gray = cv2.cvtColor(current_frame, cv2.COLOR_BGR2GRAY) +# flow = cv2.calcOpticalFlowFarneback(prev_gray, current_gray, None, 0.5, 3, 15, 3, 5, 1.2, 0) +# motion = np.sum(flow ** 2) +# return motion, current_gray + +# # Load video frames using Decord with reduced resolution +# video = VideoReader(video_path, ctx=cpu(0)) +# total_frames = len(video) +# frame_indices = range(0, total_frames, frame_interval) + +# # Process frames in smaller batches to reduce memory usage +# batch_size = 100 +# motion_values = [] + +# for batch_start in range(0, len(frame_indices), batch_size): +# batch_end = min(batch_start + batch_size, len(frame_indices)) +# batch_indices = frame_indices[batch_start:batch_end] +# frames_batch = video.get_batch(batch_indices).asnumpy() + +# # Resize frames for faster processing +# frames = [cv2.resize(frame, (frame.shape[1] // 2, frame.shape[0] // 2)) for frame in frames_batch] + +# # Initialize the first frame in the batch +# prev_gray = cv2.cvtColor(frames[0], cv2.COLOR_BGR2GRAY) +# frame_pairs = [(prev_gray, frames[i]) for i in range(1, len(frames))] + +# # Calculate motion statistics for the batch +# with ThreadPoolExecutor() as executor: +# motion_results = list(executor.map(calculate_motion, frame_pairs)) +# batch_motion_values = [motion for motion, _ in motion_results] +# motion_values.extend(batch_motion_values) + +# # Update the previous frame for the next batch +# prev_gray = cv2.cvtColor(frames[-1], cv2.COLOR_BGR2GRAY) + +# # Calculate threshold statistically +# motion_mean = np.mean(motion_values) +# motion_std = np.std(motion_values) +# threshold = motion_mean + sigma_multiplier * motion_std + +# # Extract key frames based on motion threshold +# key_frames = [] +# for i, (motion, frame) in enumerate(zip(motion_values, frames[1:])): +# if motion > threshold and len(key_frames) < max_frames: +# img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) +# key_frames.append(img) + +# return key_frames + +############################################################################################################ +# multiprocessing +# def calculate_motion(frame_pair): +# """ +# Calculates motion between two consecutive frames using optical flow. +# """ +# prev_gray, current_gray = frame_pair +# flow = cv2.calcOpticalFlowFarneback(prev_gray, current_gray, None, 0.5, 3, 15, 3, 5, 1.2, 0) +# motion = np.sum(flow ** 2) +# return motion + +# def extract_motion_key_frames(video_path, max_frames=20, sigma_multiplier=2, frame_interval=1): +# """ +# Extracts key frames from a video based on motion intensity. +# Optimized for speed and efficiency. +# """ +# # Load video frames using Decord with reduced resolution +# video = VideoReader(video_path, ctx=cpu(0)) +# total_frames = len(video) +# frame_indices = range(0, total_frames, frame_interval) + +# # Read all frames and resize them for faster processing +# frames = video.get_batch(frame_indices).asnumpy() +# frames = [cv2.resize(frame, (frame.shape[1] // 2, frame.shape[0] // 2)) for frame in frames] + +# # Convert all frames to grayscale in one go +# grayscale_frames = [cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) for frame in frames] + +# # Calculate motion between consecutive frames using multiprocessing +# frame_pairs = list(zip(grayscale_frames[:-1], grayscale_frames[1:])) +# with Pool(cpu_count()) as pool: +# motion_values = pool.map(calculate_motion, frame_pairs) + +# # Calculate threshold statistically +# motion_mean = np.mean(motion_values) +# motion_std = np.std(motion_values) +# threshold = motion_mean + sigma_multiplier * motion_std + +# # Extract key frames based on motion threshold +# key_frames = [] +# for i, motion in enumerate(motion_values): +# if motion > threshold and len(key_frames) < max_frames: +# img = Image.fromarray(cv2.cvtColor(frames[i + 1], cv2.COLOR_BGR2RGB)) +# key_frames.append(img) + +# return key_frames + +############################################################################################################ +# faster optical flow, more aggressive downscaling and frame skipping, looking for motion peaks, uses both dense optical flow and includes additional peak detection logic +# def calculate_motion(frames): +# """ +# Calculate motion metrics using frame differencing and sparse optical flow +# Returns a list of motion intensity values +# """ +# if len(frames) < 2: +# return [] + +# # Convert all frames to grayscale at once +# gray_frames = [cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) for frame in frames] + +# # Parameters for ShiTomasi corner detection and optical flow +# feature_params = dict(maxCorners=100, qualityLevel=0.3, minDistance=7, blockSize=7) +# lk_params = dict(winSize=(15,15), maxLevel=2, +# criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03)) + +# motion_metrics = [] +# prev_frame = gray_frames[0] +# prev_pts = cv2.goodFeaturesToTrack(prev_frame, mask=None, **feature_params) + +# for i in range(1, len(gray_frames)): +# curr_frame = gray_frames[i] + +# # Calculate dense optical flow (Farneback) +# flow = cv2.calcOpticalFlowFarneback(prev_frame, curr_frame, None, 0.5, 3, 15, 3, 5, 1.2, 0) +# magnitude = np.sqrt(flow[...,0]**2 + flow[...,1]**2) +# motion_metrics.append(np.mean(magnitude)) + +# prev_frame = curr_frame + +# return motion_metrics + +# def extract_motion_key_frames(video_path, max_frames=20, sigma_multiplier=2, frame_interval=5): +# # Load video with reduced resolution +# video = VideoReader(video_path, ctx=cpu(0)) +# total_frames = len(video) +# frame_indices = range(0, total_frames, frame_interval) + +# # Read and resize all frames at once +# frames = video.get_batch(frame_indices).asnumpy() +# frames = np.array([cv2.resize(frame, (frame.shape[1]//4, frame.shape[0]//4)) for frame in frames]) + +# # Calculate motion metrics +# motion_values = calculate_motion(frames) + +# if not motion_values: +# return [] + +# # Adaptive thresholding +# mean_motion = np.mean(motion_values) +# std_motion = np.std(motion_values) +# threshold = mean_motion + sigma_multiplier * std_motion + +# # Find peaks in motion values +# key_frame_indices = [] +# for i in range(1, len(motion_values)-1): +# if motion_values[i] > threshold and \ +# motion_values[i] > motion_values[i-1] and \ +# motion_values[i] > motion_values[i+1]: +# key_frame_indices.append(i+1) # +1 because motion is between frames + +# # Select top frames by motion intensity +# if len(key_frame_indices) > max_frames: +# sorted_indices = sorted(key_frame_indices, key=lambda x: motion_values[x-1], reverse=True) +# key_frame_indices = sorted_indices[:max_frames] +# key_frame_indices.sort() + +# # Convert to PIL Images +# key_frames = [Image.fromarray(cv2.cvtColor(frames[i], cv2.COLOR_BGR2RGB)) +# for i in key_frame_indices] + +# return key_frames + + +############################################################################################################ +# RAFT Optical Flow +# import torch +# import torchvision.models.optical_flow as of +# from torch.nn.parallel import DataParallel + +# def pad_to_multiple_of_8(frame): +# """ +# Pads the frame dimensions to the nearest multiple of 8. +# """ +# h, w, _ = frame.shape +# pad_h = (8 - h % 8) % 8 +# pad_w = (8 - w % 8) % 8 +# return cv2.copyMakeBorder(frame, 0, pad_h, 0, pad_w, cv2.BORDER_CONSTANT, value=[0, 0, 0]) + +# def compute_raft_flow_batch(frame_batch, raft_model): +# """ +# Computes optical flow for a batch of frames using the RAFT model. +# """ +# # Pad frames to make dimensions divisible by 8 +# frame_batch = [pad_to_multiple_of_8(frame) for frame in frame_batch] + +# # Convert frames to tensors and normalize +# frame_tensors = torch.stack([torch.from_numpy(frame).permute(2, 0, 1).float().cuda() / 255.0 for frame in frame_batch]) + +# # Compute optical flow for the batch +# with torch.no_grad(): +# flows = raft_model(frame_tensors[:-1], frame_tensors[1:]) + +# # Calculate motion magnitude for each flow +# motions = [np.sum(flow.cpu().numpy() ** 2) for flow in flows] +# return motions + +# def extract_motion_key_frames(video_path, max_frames=20, sigma_multiplier=2, frame_interval=1, batch_size=128): +# """ +# Extracts key frames from a video based on motion intensity using RAFT for optical flow. +# Utilizes multiple GPUs and processes frames in batches. +# """ +# # Load RAFT model and wrap it with DataParallel for multi-GPU support +# print("Loading RAFT model...") +# raft_model = of.raft_large(pretrained=True).cuda() +# if torch.cuda.device_count() > 1: +# print(f"Using {torch.cuda.device_count()} GPUs!") +# raft_model = DataParallel(raft_model) + +# # Load video frames using Decord with reduced resolution +# video = VideoReader(video_path, ctx=cpu(0)) +# total_frames = len(video) +# frame_indices = range(0, total_frames, frame_interval) + +# # Read all frames and resize them for faster processing +# frames = video.get_batch(frame_indices).asnumpy() +# frames = [cv2.resize(frame, (frame.shape[1] // 2, frame.shape[0] // 2)) for frame in frames] + +# # Calculate motion between consecutive frames using RAFT in batches +# motion_values = [] +# print(f"The total number of frames: {len(frames)}") +# for batch_start in range(1, len(frames), batch_size): +# batch_end = min(batch_start + batch_size, len(frames)) +# batch_frames = frames[batch_start - 1:batch_end] +# batch_motions = compute_raft_flow_batch(batch_frames, raft_model) +# motion_values.extend(batch_motions) + +# # Calculate threshold statistically +# motion_mean = np.mean(motion_values) +# motion_std = np.std(motion_values) +# threshold = motion_mean + sigma_multiplier * motion_std + +# # Extract key frames based on motion threshold +# key_frames = [] +# for i, motion in enumerate(motion_values): +# if motion > threshold and len(key_frames) < max_frames: +# img = Image.fromarray(cv2.cvtColor(frames[i + 1], cv2.COLOR_BGR2RGB)) +# key_frames.append(img) + +# return key_frames + +############################################################################################################ +# Histogram Difference +# def calculate_histogram_difference(frame_pair): +# """ +# Calculates the difference between two consecutive frames using color histograms. +# """ +# frame1, frame2 = frame_pair + +# # Calculate histograms for each frame +# hist1 = cv2.calcHist([frame1], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256]) +# hist2 = cv2.calcHist([frame2], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256]) + +# # Normalize histograms +# cv2.normalize(hist1, hist1) +# cv2.normalize(hist2, hist2) + +# # Calculate histogram difference using Chi-Squared distan +# difference = cv2.compareHist(hist1, hist2, cv2.HISTCMP_CHISQR) +# return difference + +# def extract_motion_key_frames(video_path, max_frames=20, sigma_multiplier=2, frame_interval=1): +# """ +# Extracts key frames from a video based on histogram differences. +# Optimized for speed and efficiency. +# """ +# # Load video frames using Decord with reduced resolution +# video = VideoReader(video_path, ctx=cpu(0)) +# total_frames = len(video) +# frame_indices = range(0, total_frames, frame_interval) + +# # Read all frames and resize them for faster processing +# frames = video.get_batch(frame_indices).asnumpy() +# frames = [cv2.resize(frame, (frame.shape[1] // 2, frame.shape[0] // 2)) for frame in frames] + +# # Calculate histogram differences between consecutive frames using multiprocessing +# frame_pairs = list(zip(frames[:-1], frames[1:])) +# with Pool(cpu_count()) as pool: +# histogram_differences = pool.map(calculate_histogram_difference, frame_pairs) + +# # Calculate threshold statistically +# diff_mean = np.mean(histogram_differences) +# diff_std = np.std(histogram_differences) +# threshold = diff_mean + sigma_multiplier * diff_std + +# # Extract key frames based on histogram difference threshold +# key_frames = [] +# for i, difference in enumerate(histogram_differences): +# if difference > threshold and len(key_frames) < max_frames: +# img = Image.fromarray(cv2.cvtColor(frames[i + 1], cv2.COLOR_BGR2RGB)) +# key_frames.append(img) + +# return key_frames + + +############################################################################################################ +# faster histogram +# def calculate_histogram_difference(frame1, frame2): +# """ +# Calculates the difference between two consecutive frames using grayscale histograms. +# """ +# # Convert frames to grayscale +# gray1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY) +# gray2 = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY) + +# # Calculate histograms with fewer bins (e.g., 16 bins) +# hist1 = cv2.calcHist([gray1], [0], None, [16], [0, 256]) +# hist2 = cv2.calcHist([gray2], [0], None, [16], [0, 256]) + +# # Normalize histograms +# cv2.normalize(hist1, hist1) +# cv2.normalize(hist2, hist2) + +# # Calculate histogram difference using Chi-Squared distance +# difference = cv2.compareHist(hist1, hist2, cv2.HISTCMP_CHISQR) +# return difference + +# def extract_motion_key_frames(video_path, max_frames=20, sigma_multiplier=2, frame_interval=10): +# """ +# Extracts key frames from a video based on histogram differences. +# Optimized for speed by reducing histogram complexity and skipping frames. +# """ +# # Load video frames using Decord with reduced resolution +# video = VideoReader(video_path, ctx=cpu(0)) +# total_frames = len(video) +# frame_indices = range(0, total_frames, frame_interval) + +# # Read all frames and resize them for faster processing +# frames = video.get_batch(frame_indices).asnumpy() +# frames = [cv2.resize(frame, (frame.shape[1] // 2, frame.shape[0] // 2)) for frame in frames] + +# # Calculate histogram differences between consecutive frames +# histogram_differences = [] +# for i in range(1, len(frames)): +# difference = calculate_histogram_difference(frames[i - 1], frames[i]) +# histogram_differences.append(difference) + +# # Calculate threshold statistically +# diff_mean = np.mean(histogram_differences) +# diff_std = np.std(histogram_differences) +# threshold = diff_mean + sigma_multiplier * diff_std + +# # Extract key frames based on histogram difference threshold +# key_frames = [] +# for i, difference in enumerate(histogram_differences): +# if difference > threshold and len(key_frames) < max_frames: +# img = Image.fromarray(cv2.cvtColor(frames[i + 1], cv2.COLOR_BGR2RGB)) +# key_frames.append(img) + +# return key_frames + +############################################################################################################ +# faster histogram with batch +def calculate_histogram_difference_batch(frame_batch): + """ + Calculates histogram differences for a batch of frames. + """ + # Convert frames to grayscale + gray_frames = [cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) for frame in frame_batch] + + # Calculate histograms for all frames in the batch + histograms = [cv2.calcHist([gray], [0], None, [16], [0, 256]) for gray in gray_frames] + for hist in histograms: + cv2.normalize(hist, hist) + + # Calculate histogram differences between consecutive frames + differences = [] + for i in range(1, len(histograms)): + difference = cv2.compareHist(histograms[i - 1], histograms[i], cv2.HISTCMP_CHISQR) + differences.append(difference) + + return differences + +def extract_motion_key_frames(video_path, max_frames=20, sigma_multiplier=2, frame_interval=10, batch_size=16): + """ + Extracts key frames from a video based on histogram differences. + Uses batch processing for faster computation. + """ + # Load video frames using Decord with reduced resolution + video = VideoReader(video_path, ctx=cpu(0)) + total_frames = len(video) + print(f"All total frames: {total_frames}") + frame_indices = range(0, total_frames, frame_interval) + + # Read all frames and resize them for faster processing + frames = video.get_batch(frame_indices).asnumpy() + frames = [cv2.resize(frame, (frame.shape[1] // 2, frame.shape[0] // 2)) for frame in frames] + + # Process frames in batches + histogram_differences = [] + print(f"The total number of frames: {len(frames)}") + for batch_start in range(0, len(frames), batch_size): + batch_end = min(batch_start + batch_size + 1, len(frames)) # +1 to include the next frame for difference + batch_frames = frames[batch_start:batch_end] + batch_differences = calculate_histogram_difference_batch(batch_frames) + histogram_differences.extend(batch_differences) + + # Calculate threshold statistically + diff_mean = np.mean(histogram_differences) + diff_std = np.std(histogram_differences) + threshold = diff_mean + sigma_multiplier * diff_std + + # Extract key frames based on histogram difference threshold + key_frames = [] + for i, difference in enumerate(histogram_differences): + if difference > threshold and len(key_frames) < max_frames: + img = Image.fromarray(cv2.cvtColor(frames[i + 1], cv2.COLOR_BGR2RGB)) + key_frames.append(img) + + return key_frames + + +############################################################################################################ +# faster faster histogram +# def calculate_frame_difference(frame1, frame2): +# """ +# Ultra-fast frame difference calculation using downscaled grayscale and absolute pixel differences. +# """ +# # Convert to grayscale and downscale further +# gray1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY) +# gray2 = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY) + +# # Downscale to tiny images (e.g., 16x16) for fast comparison +# tiny1 = cv2.resize(gray1, (16, 16)) +# tiny2 = cv2.resize(gray2, (16, 16)) + +# # Calculate normalized absolute difference +# diff = cv2.absdiff(tiny1, tiny2) +# return np.mean(diff) / 255.0 # Normalize to [0,1] + +# def save_key_frames(key_frames, output_dir="key_frames", prefix="frame"): +# """ +# Saves key frames to disk as JPEG images. +# """ +# if not os.path.exists(output_dir): +# os.makedirs(output_dir) + +# saved_paths = [] +# for i, frame in enumerate(key_frames): +# frame_path = os.path.join(output_dir, f"{prefix}_{i:04d}.jpg") +# frame.save(frame_path, quality=85) # Good quality with reasonable compression +# saved_paths.append(frame_path) + +# return saved_paths + +# def extract_motion_key_frames(video_path, max_frames=20, sigma_multiplier=2, frame_interval=15): +# # Load video with decord (faster than OpenCV) +# video = VideoReader(video_path, ctx=cpu(0)) +# total_frames = len(video) + +# # Pre-calculate frame indices to process +# frame_indices = range(0, total_frames, frame_interval) +# frames = video.get_batch(frame_indices).asnumpy() + +# # Downscale all frames upfront (much faster than per-frame) +# frames = [cv2.resize(frame, (frame.shape[1]//4, frame.shape[0]//4)) for frame in frames] + +# # Calculate differences (vectorized approach) +# differences = [] +# prev_frame = frames[0] +# for frame in frames[1:]: +# diff = calculate_frame_difference(prev_frame, frame) +# differences.append(diff) +# prev_frame = frame + +# # Adaptive thresholding +# diff_mean = np.mean(differences) +# diff_std = np.std(differences) +# threshold = diff_mean + sigma_multiplier * diff_std + +# # Extract key frames +# key_frames = [] +# for i, diff in enumerate(differences): +# if diff > threshold and len(key_frames) < max_frames: +# img = Image.fromarray(cv2.cvtColor(frames[i+1], cv2.COLOR_BGR2RGB)) +# key_frames.append(img) + +# saved_paths = save_key_frames(key_frames, '../video') + +# return key_frames