SpyC0der77 commited on
Commit
969332c
·
verified ·
1 Parent(s): 5516fdb
Files changed (1) hide show
  1. app.py +201 -299
app.py CHANGED
@@ -1,21 +1,18 @@
1
  import cv2
2
  import numpy as np
3
- import csv
4
- import math
5
  import torch
6
  import tempfile
7
- import os
8
  import gradio as gr
9
  import time
10
  import io
11
  from contextlib import redirect_stdout
12
- import concurrent.futures
13
 
14
- # Set up device for torch
15
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
16
  print(f"[INFO] Using device: {device}")
17
 
18
- # Try to load the RAFT model from torch.hub.
 
 
19
  try:
20
  print("[INFO] Attempting to load RAFT model from torch.hub...")
21
  raft_model = torch.hub.load("princeton-vl/RAFT", "raft_small", pretrained=True, trust_repo=True)
@@ -28,248 +25,203 @@ except Exception as e:
28
  raft_model = None
29
  gr.Warning("Falling back to OpenCV Farneback optical flow.")
30
 
31
- def compress_video(video_file, target_width, target_height, progress=gr.Progress(), progress_offset=0.0, progress_scale=0.2, output_file=None):
32
- """
33
- Compresses the video by resizing each frame to the specified target resolution.
34
- The new resolution is exactly (target_width, target_height).
35
- Updates progress from progress_offset to progress_offset+progress_scale.
36
- """
37
- start_time = time.time()
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  cap = cv2.VideoCapture(video_file)
39
  if not cap.isOpened():
40
- raise gr.Error("Could not open video file for compression.")
41
-
42
- fps = cap.get(cv2.CAP_PROP_FPS)
43
- new_width = int(target_width)
44
- new_height = int(target_height)
45
-
46
- if output_file is None:
47
- temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
48
- output_file = temp_file.name
49
- temp_file.close()
50
-
51
- fourcc = cv2.VideoWriter_fourcc(*'mp4v')
52
- out = cv2.VideoWriter(output_file, fourcc, fps, (new_width, new_height))
53
-
54
- total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
55
- frame_idx = 1
56
- print(f"[INFO] Starting video compression: {total_frames} frames, target resolution: {new_width}x{new_height}")
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  while True:
58
  ret, frame = cap.read()
59
  if not ret:
60
  break
61
- compressed_frame = cv2.resize(frame, (new_width, new_height))
62
- out.write(compressed_frame)
63
- if frame_idx % 10 == 0 or frame_idx == total_frames:
64
- print(f"[INFO] Compressed frame {frame_idx}/{total_frames}")
65
- progress(progress_offset + (frame_idx / total_frames) * progress_scale, desc="Compressing Video")
66
- frame_idx += 1
67
 
68
- cap.release()
69
- out.release()
70
- elapsed = time.time() - start_time
71
- print(f"[INFO] Compressed video saved to: {output_file} in {elapsed:.2f} seconds")
72
- return output_file
73
 
74
- def generate_motion_csv(video_file, output_csv=None, progress=gr.Progress(), progress_offset=0.0, progress_scale=0.4):
75
- """
76
- Generates a CSV file with motion data (columns: frame, mag, ang, zoom) from an input video.
77
- Uses RAFT if available, otherwise falls back to OpenCV's Farneback optical flow.
78
- Updates progress from progress_offset to progress_offset+progress_scale.
79
- """
80
- start_time = time.time()
81
- if output_csv is None:
82
- temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.csv')
83
- output_csv = temp_file.name
84
- temp_file.close()
85
-
86
- cap = cv2.VideoCapture(video_file)
87
- if not cap.isOpened():
88
- raise gr.Error("Could not open video file for CSV generation.")
89
-
90
- print(f"[INFO] Generating motion CSV for video: {video_file}")
91
- with open(output_csv, 'w', newline='') as csvfile:
92
- fieldnames = ['frame', 'mag', 'ang', 'zoom']
93
- writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
94
- writer.writeheader()
95
-
96
- ret, first_frame = cap.read()
97
- if not ret:
98
- raise gr.Error("Cannot read first frame from video.")
99
-
100
- if raft_model is not None:
101
- first_frame_rgb = cv2.cvtColor(first_frame, cv2.COLOR_BGR2RGB)
102
- prev_tensor = torch.from_numpy(first_frame_rgb).permute(2, 0, 1).float().unsqueeze(0) / 255.0
103
- prev_tensor = prev_tensor.to(device)
104
- print("[INFO] Using RAFT model for optical flow computation.")
105
  else:
106
- prev_gray = cv2.cvtColor(first_frame, cv2.COLOR_BGR2GRAY)
107
- print("[INFO] Using OpenCV Farneback optical flow for computation.")
108
-
109
- frame_idx = 1
110
- total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
111
- print(f"[INFO] Total frames to process: {total_frames}")
112
- while True:
113
- ret, frame = cap.read()
114
- if not ret:
115
- break
116
-
117
- if raft_model is not None:
118
- curr_frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
119
- curr_tensor = torch.from_numpy(curr_frame_rgb).permute(2, 0, 1).float().unsqueeze(0) / 255.0
120
- curr_tensor = curr_tensor.to(device)
121
- with torch.no_grad():
122
- flow_low, flow_up = raft_model(prev_tensor, curr_tensor, iters=20, test_mode=True)
123
- flow = flow_up[0].permute(1, 2, 0).cpu().numpy()
124
- prev_tensor = curr_tensor.clone()
125
- else:
126
- curr_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
127
- flow = cv2.calcOpticalFlowFarneback(prev_gray, curr_gray, None,
128
- pyr_scale=0.5, levels=3, winsize=15,
129
- iterations=3, poly_n=5, poly_sigma=1.2, flags=0)
130
- prev_gray = curr_gray
131
-
132
- mag, ang = cv2.cartToPolar(flow[..., 0], flow[..., 1], angleInDegrees=True)
133
- median_mag = np.median(mag)
134
- median_ang = np.median(ang)
135
-
136
- h, w = flow.shape[:2]
137
- center_x, center_y = w / 2, h / 2
138
- x_coords, y_coords = np.meshgrid(np.arange(w), np.arange(h))
139
- x_offset = x_coords - center_x
140
- y_offset = y_coords - center_y
141
- dot = flow[..., 0] * x_offset + flow[..., 1] * y_offset
142
- zoom_factor = np.count_nonzero(dot > 0) / (w * h)
143
-
144
- writer.writerow({
145
- 'frame': frame_idx,
146
- 'mag': median_mag,
147
- 'ang': median_ang,
148
- 'zoom': zoom_factor
149
- })
150
-
151
- if frame_idx % 10 == 0 or frame_idx == total_frames:
152
- print(f"[INFO] Processed frame {frame_idx}/{total_frames}")
153
-
154
- progress(progress_offset + (frame_idx / total_frames) * progress_scale, desc="Generating Motion CSV")
155
- frame_idx += 1
156
 
157
  cap.release()
158
- elapsed = time.time() - start_time
159
- print(f"[INFO] Motion CSV generated: {output_csv} in {elapsed:.2f} seconds")
160
- return output_csv
161
-
162
- def read_motion_csv(csv_filename):
163
- """
164
- Reads a motion CSV file and computes cumulative offset per frame.
165
- Returns a dictionary mapping frame numbers to (dx, dy) offsets.
166
- """
167
- print(f"[INFO] Reading motion CSV: {csv_filename}")
168
- motion_data = {}
169
- cumulative_dx = 0.0
170
- cumulative_dy = 0.0
171
- with open(csv_filename, 'r') as csvfile:
172
- reader = csv.DictReader(csvfile)
173
- for row in reader:
174
- frame_num = int(row['frame'])
175
- mag = float(row['mag'])
176
- ang = float(row['ang'])
177
- rad = math.radians(ang)
178
- dx = mag * math.cos(rad)
179
- dy = mag * math.sin(rad)
180
- cumulative_dx += dx
181
- cumulative_dy += dy
182
- motion_data[frame_num] = (-cumulative_dx, -cumulative_dy)
183
- print("[INFO] Completed reading motion CSV.")
184
- return motion_data
185
-
186
- def process_frame(frame, dx, dy, zoom, vertical_only, width, height):
187
- """
188
- Processes a single frame for stabilization.
189
- Applies translation by dx, dy (if not vertical_only), and scaling by zoom.
190
- Uses cv2.BORDER_REPLICATE to avoid black borders.
191
- """
192
- if vertical_only:
193
- dx = 0
194
- # Create transformation matrix: translation + scaling
195
- center = (width / 2, height / 2)
196
- M = cv2.getRotationMatrix2D(center, 0, zoom)
197
- M[0, 2] += dx
198
- M[1, 2] += dy
199
- stabilized_frame = cv2.warpAffine(frame, M, (width, height), borderMode=cv2.BORDER_REPLICATE)
200
- return stabilized_frame
201
-
202
- def stabilize_video_using_csv(video_file, csv_file, zoom=1.0, vertical_only=False,
203
- progress=gr.Progress(), progress_offset=0.6, progress_scale=0.4,
204
- output_file=None):
205
- """
206
- Stabilizes the video using motion data from the CSV.
207
- If vertical_only is True, only vertical motion is corrected.
208
- Updates progress from progress_offset to progress_offset+progress_scale.
209
- Uses cv2.BORDER_REPLICATE to fill any gaps, preventing black borders.
210
- """
211
- start_time = time.time()
212
- print(f"[INFO] Starting stabilization using CSV: {csv_file}")
213
- motion_data = read_motion_csv(csv_file)
214
-
215
  cap = cv2.VideoCapture(video_file)
216
  if not cap.isOpened():
217
  raise gr.Error("Could not open video file for stabilization.")
218
-
219
  fps = cap.get(cv2.CAP_PROP_FPS)
220
- width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
221
- height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
222
- print(f"[INFO] Video properties - FPS: {fps}, Width: {width}, Height: {height}")
223
-
 
 
 
 
224
  if output_file is None:
225
- temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
226
  output_file = temp_file.name
227
  temp_file.close()
228
-
229
- frames = []
230
- while True:
 
 
 
 
 
 
 
231
  ret, frame = cap.read()
232
  if not ret:
233
  break
234
- frames.append(frame)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
  cap.release()
236
- total_frames = len(frames)
237
- print(f"[INFO] Total frames to stabilize: {total_frames}")
238
- # Use threads to avoid multiprocessing pickling issues on Windows/Gradio.
239
- with concurrent.futures.ThreadPoolExecutor() as executor:
240
- dx_list = [motion_data.get(i+1, (0, 0))[0] for i in range(len(frames))]
241
- dy_list = [motion_data.get(i+1, (0, 0))[1] for i in range(len(frames))]
242
- zoom_list = [zoom] * len(frames)
243
- vertical_list = [vertical_only] * len(frames)
244
- width_list = [width] * len(frames)
245
- height_list = [height] * len(frames)
246
- stabilized_frames = list(executor.map(process_frame, frames, dx_list, dy_list, zoom_list, vertical_list, width_list, height_list))
247
- fourcc = cv2.VideoWriter_fourcc(*'mp4v')
248
- out = cv2.VideoWriter(output_file, fourcc, fps, (width, height))
249
- for idx, stabilized_frame in enumerate(stabilized_frames):
250
- out.write(stabilized_frame)
251
- frame_idx = idx + 1
252
- if frame_idx % 10 == 0 or frame_idx == total_frames:
253
- print(f"[INFO] Stabilized frame {frame_idx}/{total_frames}")
254
-
255
- progress(progress_offset + (frame_idx / total_frames) * progress_scale, desc="Stabilizing Video")
256
  out.release()
257
- elapsed = time.time() - start_time
258
- print(f"[INFO] Stabilized video saved to: {output_file} in {elapsed:.2f} seconds")
259
  return output_file
260
 
261
- def process_video_ai(video_file, zoom, vertical_only, compress_mode, target_width, target_height, auto_zoom, progress=gr.Progress(track_tqdm=True)):
262
- """
263
- Gradio interface function:
264
- - Optionally compresses the video if compress_mode is True, resizing it to the chosen resolution.
265
- - Generates motion data from the (possibly compressed) video.
266
- - If auto_zoom is enabled, computes the optimal zoom level based on the maximum cumulative displacements.
267
- - Stabilizes the video based on the generated motion data.
268
- - If vertical_only is True, only vertical stabilization is applied.
269
-
270
- Returns:
271
- Tuple: (original video file path, stabilized video file path, log output)
272
- """
273
  gr.Info("Starting AI-powered video processing...")
274
  log_buffer = io.StringIO()
275
  with redirect_stdout(log_buffer):
@@ -277,82 +229,32 @@ def process_video_ai(video_file, zoom, vertical_only, compress_mode, target_widt
277
  video_file = video_file.get("name", None)
278
  if video_file is None:
279
  raise gr.Error("Please upload a video file.")
280
-
281
- # If compression is enabled, compress the video first.
 
 
 
 
 
 
282
  if compress_mode:
283
- gr.Info("Compressing video before processing...")
284
- video_file = compress_video(video_file, target_width, target_height, progress=progress, progress_offset=0.0, progress_scale=0.2)
285
- gr.Info("Video compression complete.")
286
- motion_offset = 0.2
287
- motion_scale = 0.4
288
- stabilization_offset = 0.6
289
- stabilization_scale = 0.4
290
  else:
291
- motion_offset = 0.0
292
- motion_scale = 0.5
293
- stabilization_offset = 0.5
294
- stabilization_scale = 0.5
295
-
296
- csv_file = generate_motion_csv(video_file, progress=progress, progress_offset=motion_offset, progress_scale=motion_scale)
297
- gr.Info("Motion CSV generated successfully.")
298
-
299
- # Auto Zoom Mode: compute the optimal zoom factor to avoid black borders.
 
 
 
 
 
 
300
  if auto_zoom:
301
- gr.Info("Auto Zoom Mode enabled. Computing optimal zoom factor...")
302
- motion_data = read_motion_csv(csv_file)
303
- # Compute separate left/right and top/bottom displacements.
304
- left_disp = abs(min(v[0] for v in motion_data.values()))
305
- right_disp = max(v[0] for v in motion_data.values())
306
- top_disp = abs(min(v[1] for v in motion_data.values()))
307
- bottom_disp = max(v[1] for v in motion_data.values())
308
- cap = cv2.VideoCapture(video_file)
309
- width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
310
- height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
311
- cap.release()
312
- safe_width = width - (left_disp + right_disp)
313
- safe_height = height - (top_disp + bottom_disp)
314
- zoom_x = width / safe_width if safe_width > 0 else 1.0
315
- zoom_y = height / safe_height if safe_height > 0 else 1.0
316
- auto_zoom_factor = max(1.0, zoom_x, zoom_y)
317
- gr.Info(f"Auto zoom factor computed: {auto_zoom_factor:.2f}")
318
- zoom = auto_zoom_factor
319
-
320
- stabilized_path = stabilize_video_using_csv(video_file, csv_file, zoom=zoom, vertical_only=vertical_only,
321
- progress=progress, progress_offset=stabilization_offset, progress_scale=stabilization_scale)
322
- gr.Info("Video stabilization complete.")
323
- print("[INFO] Video processing complete.")
324
- logs = log_buffer.getvalue()
325
- return video_file, stabilized_path, logs
326
-
327
- # Build the Gradio UI.
328
- with gr.Blocks() as demo:
329
- gr.Markdown("# AI-Powered Video Stabilization")
330
- gr.Markdown(
331
- "Upload a video, select a zoom factor (or use Auto Zoom Mode), choose whether to apply only vertical stabilization, and optionally compress the video before processing. "
332
- "If compressing, specify the target resolution (width and height) for the compressed video. "
333
- "The system will generate motion data using an AI model (RAFT if available) and then stabilize the video with live progress updates and alerts."
334
- )
335
-
336
- with gr.Row():
337
- with gr.Column():
338
- video_input = gr.Video(label="Input Video")
339
- zoom_slider = gr.Slider(minimum=1.0, maximum=3.0, step=0.1, value=1.0, label="Zoom Factor (ignored if Auto Zoom enabled)")
340
- auto_zoom_checkbox = gr.Checkbox(label="Auto Zoom Mode", value=False)
341
- vertical_checkbox = gr.Checkbox(label="Vertical Stabilization Only", value=False)
342
- compress_checkbox = gr.Checkbox(label="Compress Video Before Processing", value=False)
343
- target_width = gr.Number(label="Target Width (px)", value=640)
344
- target_height = gr.Number(label="Target Height (px)", value=360)
345
- process_button = gr.Button("Process Video")
346
- with gr.Column():
347
- original_video = gr.Video(label="Original Video")
348
- stabilized_video = gr.Video(label="Stabilized Video")
349
- logs_output = gr.Textbox(label="Logs", lines=10)
350
-
351
- process_button.click(
352
- fn=process_video_ai,
353
- inputs=[video_input, zoom_slider, vertical_checkbox, compress_checkbox, target_width, target_height, auto_zoom_checkbox],
354
- outputs=[original_video, stabilized_video, logs_output]
355
- )
356
-
357
- if __name__ == "__main__":
358
- demo.launch()
 
1
  import cv2
2
  import numpy as np
 
 
3
  import torch
4
  import tempfile
 
5
  import gradio as gr
6
  import time
7
  import io
8
  from contextlib import redirect_stdout
 
9
 
 
10
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
11
  print(f"[INFO] Using device: {device}")
12
 
13
+ if device.type == "cuda":
14
+ torch.backends.cudnn.benchmark = True
15
+
16
  try:
17
  print("[INFO] Attempting to load RAFT model from torch.hub...")
18
  raft_model = torch.hub.load("princeton-vl/RAFT", "raft_small", pretrained=True, trust_repo=True)
 
25
  raft_model = None
26
  gr.Warning("Falling back to OpenCV Farneback optical flow.")
27
 
28
+ def _resize(frame, w, h):
29
+ if frame.shape[1] == w and frame.shape[0] == h:
30
+ return frame
31
+ return cv2.resize(frame, (w, h), interpolation=cv2.INTER_AREA if (w < frame.shape[1] or h < frame.shape[0]) else cv2.INTER_LINEAR)
32
+
33
+ def _frame_to_raft_tensor_bgr(frame_bgr):
34
+ frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
35
+ t = torch.from_numpy(frame_rgb).permute(2, 0, 1).contiguous().float().unsqueeze(0).div_(255.0)
36
+ return t.to(device, non_blocking=(device.type == "cuda"))
37
+
38
+ def compute_offsets(
39
+ video_file,
40
+ out_w,
41
+ out_h,
42
+ motion_scale=0.5,
43
+ raft_iters=12,
44
+ progress=gr.Progress(),
45
+ progress_offset=0.0,
46
+ progress_scale=0.55,
47
+ ):
48
  cap = cv2.VideoCapture(video_file)
49
  if not cap.isOpened():
50
+ raise gr.Error("Could not open video file for motion estimation.")
51
+
52
+ total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) or 0
53
+
54
+ mw = max(64, int(out_w * float(motion_scale)))
55
+ mh = max(64, int(out_h * float(motion_scale)))
56
+ sx = float(out_w) / float(mw)
57
+ sy = float(out_h) / float(mh)
58
+
59
+ ret, prev = cap.read()
60
+ if not ret:
61
+ cap.release()
62
+ raise gr.Error("Cannot read first frame from video.")
63
+
64
+ prev_out = _resize(prev, out_w, out_h)
65
+ prev_small = _resize(prev_out, mw, mh)
66
+
67
+ use_raft = raft_model is not None
68
+ use_amp = device.type == "cuda"
69
+
70
+ if use_raft:
71
+ prev_t = _frame_to_raft_tensor_bgr(prev_small)
72
+ else:
73
+ prev_g = cv2.cvtColor(prev_small, cv2.COLOR_BGR2GRAY)
74
+
75
+ offsets = [(0.0, 0.0)]
76
+ cum_dx = 0.0
77
+ cum_dy = 0.0
78
+
79
+ idx = 1
80
  while True:
81
  ret, frame = cap.read()
82
  if not ret:
83
  break
 
 
 
 
 
 
84
 
85
+ frame_out = _resize(frame, out_w, out_h)
86
+ curr_small = _resize(frame_out, mw, mh)
 
 
 
87
 
88
+ if use_raft:
89
+ curr_t = _frame_to_raft_tensor_bgr(curr_small)
90
+ with torch.no_grad():
91
+ if use_amp:
92
+ with torch.cuda.amp.autocast(True):
93
+ _, flow_up = raft_model(prev_t, curr_t, iters=int(raft_iters), test_mode=True)
94
+ else:
95
+ _, flow_up = raft_model(prev_t, curr_t, iters=int(raft_iters), test_mode=True)
96
+
97
+ flow = flow_up[0]
98
+ dx = float(flow[0].median().item())
99
+ dy = float(flow[1].median().item())
100
+ prev_t = curr_t
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  else:
102
+ curr_g = cv2.cvtColor(curr_small, cv2.COLOR_BGR2GRAY)
103
+ flow = cv2.calcOpticalFlowFarneback(
104
+ prev_g,
105
+ curr_g,
106
+ None,
107
+ pyr_scale=0.5,
108
+ levels=3,
109
+ winsize=15,
110
+ iterations=3,
111
+ poly_n=5,
112
+ poly_sigma=1.2,
113
+ flags=0,
114
+ )
115
+ dx = float(np.median(flow[..., 0]))
116
+ dy = float(np.median(flow[..., 1]))
117
+ prev_g = curr_g
118
+
119
+ dx *= sx
120
+ dy *= sy
121
+
122
+ cum_dx += dx
123
+ cum_dy += dy
124
+ offsets.append((-cum_dx, -cum_dy))
125
+
126
+ if total > 0 and (idx % 5 == 0 or idx == total - 1):
127
+ progress(progress_offset + (idx / max(1, total - 1)) * progress_scale, desc="Estimating Motion")
128
+
129
+ idx += 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
  cap.release()
132
+ return offsets
133
+
134
+ def compute_auto_zoom(offsets, width, height):
135
+ dxs = [o[0] for o in offsets] or [0.0]
136
+ dys = [o[1] for o in offsets] or [0.0]
137
+
138
+ left = max(0.0, -min(dxs))
139
+ right = max(0.0, max(dxs))
140
+ top = max(0.0, -min(dys))
141
+ bottom = max(0.0, max(dys))
142
+
143
+ safe_w = float(width) - (left + right)
144
+ safe_h = float(height) - (top + bottom)
145
+
146
+ zx = (float(width) / safe_w) if safe_w > 1.0 else 1.0
147
+ zy = (float(height) / safe_h) if safe_h > 1.0 else 1.0
148
+ return max(1.0, zx, zy)
149
+
150
+ def stabilize_stream(
151
+ video_file,
152
+ offsets,
153
+ zoom=1.0,
154
+ vertical_only=False,
155
+ out_w=None,
156
+ out_h=None,
157
+ progress=gr.Progress(),
158
+ progress_offset=0.55,
159
+ progress_scale=0.45,
160
+ output_file=None,
161
+ ):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
  cap = cv2.VideoCapture(video_file)
163
  if not cap.isOpened():
164
  raise gr.Error("Could not open video file for stabilization.")
165
+
166
  fps = cap.get(cv2.CAP_PROP_FPS)
167
+ in_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
168
+ in_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
169
+
170
+ if out_w is None:
171
+ out_w = in_w
172
+ if out_h is None:
173
+ out_h = in_h
174
+
175
  if output_file is None:
176
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
177
  output_file = temp_file.name
178
  temp_file.close()
179
+
180
+ fourcc = cv2.VideoWriter_fourcc(*"mp4v")
181
+ out = cv2.VideoWriter(output_file, fourcc, fps, (int(out_w), int(out_h)))
182
+
183
+ center = (float(out_w) / 2.0, float(out_h) / 2.0)
184
+ base = cv2.getRotationMatrix2D(center, 0.0, float(zoom))
185
+
186
+ total = len(offsets)
187
+ i = 0
188
+ while i < total:
189
  ret, frame = cap.read()
190
  if not ret:
191
  break
192
+
193
+ frame_out = _resize(frame, int(out_w), int(out_h))
194
+
195
+ dx, dy = offsets[i]
196
+ if vertical_only:
197
+ dx = 0.0
198
+
199
+ M = base.copy()
200
+ M[0, 2] += float(dx)
201
+ M[1, 2] += float(dy)
202
+
203
+ stabilized = cv2.warpAffine(frame_out, M, (int(out_w), int(out_h)), borderMode=cv2.BORDER_REPLICATE)
204
+ out.write(stabilized)
205
+
206
+ if total > 0 and (i % 5 == 0 or i == total - 1):
207
+ progress(progress_offset + (i / max(1, total - 1)) * progress_scale, desc="Stabilizing Video")
208
+
209
+ i += 1
210
+
211
  cap.release()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
  out.release()
 
 
213
  return output_file
214
 
215
+ def process_video_ai(
216
+ video_file,
217
+ zoom,
218
+ vertical_only,
219
+ compress_mode,
220
+ target_width,
221
+ target_height,
222
+ auto_zoom,
223
+ progress=gr.Progress(track_tqdm=True),
224
+ ):
 
 
225
  gr.Info("Starting AI-powered video processing...")
226
  log_buffer = io.StringIO()
227
  with redirect_stdout(log_buffer):
 
229
  video_file = video_file.get("name", None)
230
  if video_file is None:
231
  raise gr.Error("Please upload a video file.")
232
+
233
+ cap = cv2.VideoCapture(video_file)
234
+ if not cap.isOpened():
235
+ raise gr.Error("Could not open uploaded video.")
236
+ in_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
237
+ in_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
238
+ cap.release()
239
+
240
  if compress_mode:
241
+ out_w = int(target_width)
242
+ out_h = int(target_height)
 
 
 
 
 
243
  else:
244
+ out_w = in_w
245
+ out_h = in_h
246
+
247
+ offsets = compute_offsets(
248
+ video_file,
249
+ out_w,
250
+ out_h,
251
+ motion_scale=0.5,
252
+ raft_iters=12,
253
+ progress=progress,
254
+ progress_offset=0.0,
255
+ progress_scale=0.55,
256
+ )
257
+ gr.Info("Motion estimated successfully.")
258
+
259
  if auto_zoom:
260
+ z = compute_auto_zoom(offsets, out