ai-forever
diff --git a/‎DPF/filters/videos/farneback_filter.py‎
Lines changed: 35 additions & 27 deletions b/‎DPF/filters/videos/farneback_filter.py‎
Lines changed: 35 additions & 27 deletions
diff --git a/‎DPF/filters/videos/raft_filter.py‎
Lines changed: 44 additions & 39 deletions b/‎DPF/filters/videos/raft_filter.py‎
Lines changed: 44 additions & 39 deletions
@@ -1,5 +1,5 @@
 import io
-from typing import Any
+from typing import Any, Optional
 
 import cv2
 import imageio.v3 as iio
@@ -17,6 +17,21 @@ def transform_frame(frame: MatLike, target_size: tuple[int, int]) -> MatLike:
  return frame
 
 
+def transform_keep_ar(frame: MatLike, min_side_size: int) -> MatLike:
+ h, w = frame.shape[:2]
+ aspect_ratio = w / h
+ if h <= w:
+ new_height = min_side_size
+ new_width = int(aspect_ratio * new_height)
+ else:
+ new_width = min_side_size
+ new_height = int(new_width / aspect_ratio)
+
+ resized_frame = cv2.resize(frame, dsize=(new_width, new_height), interpolation=cv2.INTER_LINEAR)
+ resized_frame = cv2.cvtColor(resized_frame, cv2.COLOR_BGR2GRAY)
+ return resized_frame
+
+
 class GunnarFarnebackFilter(VideoFilter):
  """
  Gunnar-Farneback filter inference class to get mean optical flow each video.
@@ -46,7 +61,9 @@ class GunnarFarnebackFilter(VideoFilter):
 
  def __init__(
  self,
- pass_frames: int = 10,
+ pass_frames: int = 12,
+ num_passes: Optional[int] = None,
+ min_frame_size: int = 512,
  pyramid_scale: float = 0.5,
  levels: int = 3,
  win_size: int = 15,
@@ -55,14 +72,16 @@ def __init__(
  poly_sigma: float = 1.2,
  workers: int = 16,
  flags: int = 0,
- batch_size: int = 1,
  pbar: bool = True,
  _pbar_position: int = 0
  ):
  super().__init__(pbar, _pbar_position)
 
  self.num_workers = workers
- self.batch_size = batch_size
+
+ self.num_passes = num_passes
+ self.min_frame_size = min_frame_size
+ self.pass_frames = pass_frames
 
  self.pyramid_scale = pyramid_scale
  self.levels = levels
@@ -72,17 +91,15 @@ def __init__(
  self.poly_sigma = poly_sigma
  self.flags = flags
 
- self.pass_frames = pass_frames
-
  @property
  def result_columns(self) -> list[str]:
- return ["mean_optical_flow_farneback"]
+ return ["optical_flow_farneback"]
 
  @property
  def dataloader_kwargs(self) -> dict[str, Any]:
  return {
  "num_workers": self.num_workers,
- "batch_size": self.batch_size,
+ "batch_size": 1,
  "drop_last": False,
  }
 
@@ -95,27 +112,18 @@ def preprocess_data(
  video_file = modality2data['video']
 
  frames = iio.imread(io.BytesIO(video_file), plugin="pyav")
+ max_frame_to_process = self.num_passes*self.pass_frames if self.num_passes else len(frames)
+ frames_transformed = []
 
- if frames.shape[1] > frames.shape[2]:
- frames_resized = [
- transform_frame(frame=frames[i], target_size=(450, 800))
- for i in range(self.pass_frames, len(frames), self.pass_frames)
- ]
- elif frames.shape[2] > frames.shape[1]:
- frames_resized = [
- transform_frame(frame=frames[i], target_size=(800, 450))
- for i in range(self.pass_frames, len(frames), self.pass_frames)
- ]
- else:
- frames_resized = [
- transform_frame(frame=frames[i], target_size=(450, 450))
- for i in range(self.pass_frames, len(frames), self.pass_frames)
- ]
+ frames_transformed = [
+ transform_keep_ar(frames[i], self.min_frame_size)
+ for i in range(self.pass_frames, min(max_frame_to_process+1, len(frames)), self.pass_frames)
+ ]
 
  mean_magnitudes: list[float] = []
- for i in range(self.pass_frames, len(frames_resized), self.pass_frames):
- current_frame = frames_resized[i - self.pass_frames]
- next_frame = frames_resized[i]
+ for i in range(len(frames_transformed)-1):
+ current_frame = frames_transformed[i]
+ next_frame = frames_transformed[i+1]
  flow = cv2.calcOpticalFlowFarneback(
  current_frame,
  next_frame,
@@ -139,5 +147,5 @@ def process_batch(self, batch: list[Any]) -> dict[str, list[Any]]:
  for data in batch:
  key, mean_optical_flow = data
  df_batch_labels[self.key_column].append(key)
- df_batch_labels['mean_optical_flow_farneback'].append(round(mean_optical_flow, 3))
+ df_batch_labels[self.result_columns[0]].append(round(mean_optical_flow, 3))
  return df_batch_labels
@@ -1,6 +1,6 @@
 import io
 import os
-from typing import Any
+from typing import Any, Optional
 from urllib.request import urlopen
 from zipfile import ZipFile
 
@@ -28,6 +28,24 @@ def transform_frame(frame: MatLike, target_size: tuple[int, int]) -> Tensor:
  return frame_tensor
 
 
+def transform_keep_ar(frame: MatLike, min_side_size: int) -> Tensor:
+ h, w = frame.shape[:2]
+ aspect_ratio = w / h
+ if h <= w:
+ new_height = min_side_size
+ new_width = int(aspect_ratio * new_height)
+ else:
+ new_width = min_side_size
+ new_height = int(new_width / aspect_ratio)
+
+ frame = cv2.resize(frame, dsize=(new_width, new_height), interpolation=cv2.INTER_LINEAR)
+ frame_tensor = torch.from_numpy(frame).permute(2, 0, 1).float()[None]
+
+ padder = InputPadder(frame_tensor.shape) # type: ignore
+ frame_tensor = padder.pad(frame_tensor)[0]
+ return frame_tensor
+
+
 class InputPadder:
  """ Pads images such that dimensions are divisible by 8 """
 
@@ -62,20 +80,24 @@ class RAFTOpticalFlowFilter(VideoFilter):
  def __init__(
  self,
  pass_frames: int = 10,
+ num_passes: Optional[int] = None,
+ min_frame_size: int = 512,
  use_small_model: bool = False,
+ raft_iters: int = 20,
  device: str = "cuda:0",
  workers: int = 16,
- batch_size: int = 1,
  pbar: bool = True,
  _pbar_position: int = 0
  ):
  super().__init__(pbar, _pbar_position)
  self.num_workers = workers
- self.batch_size = batch_size
  self.device = device
 
  assert pass_frames >= 1, "Number of pass_frames should be greater or equal to 1."
  self.pass_frames = pass_frames
+ self.num_passes = num_passes
+ self.min_frame_size = min_frame_size
+ self.raft_iters = raft_iters
 
  resp = urlopen(WEIGHTS_URL)
  zipped_files = ZipFile(io.BytesIO(resp.read()))
@@ -98,13 +120,13 @@ def __init__(
 
  @property
  def result_columns(self) -> list[str]:
- return [f"mean_optical_flow_{self.model_name}"]
+ return [f"optical_flow_{self.model_name}"]
 
  @property
  def dataloader_kwargs(self) -> dict[str, Any]:
  return {
  "num_workers": self.num_workers,
- "batch_size": self.batch_size,
+ "batch_size": 1,
  "drop_last": False,
  }
 
@@ -117,23 +139,13 @@ def preprocess_data(
  video_file = modality2data['video']
 
  frames = iio.imread(io.BytesIO(video_file), plugin="pyav")
-
- if frames.shape[1] > frames.shape[2]:
- frames_resized = [
- transform_frame(frame=frames[i], target_size=(450, 800))
- for i in range(self.pass_frames, len(frames), self.pass_frames)
- ]
- elif frames.shape[2] > frames.shape[1]:
- frames_resized = [
- transform_frame(frame=frames[i], target_size=(800, 450))
- for i in range(self.pass_frames, len(frames), self.pass_frames)
- ]
- else:
- frames_resized = [
- transform_frame(frame=frames[i], target_size=(450, 450))
- for i in range(self.pass_frames, len(frames), self.pass_frames)
- ]
- return key, frames_resized
+ max_frame_to_process = self.num_passes*self.pass_frames if self.num_passes else len(frames)
+ frames_transformed = []
+ frames_transformed = [
+ transform_keep_ar(frames[i], self.min_frame_size)
+ for i in range(self.pass_frames, min(max_frame_to_process+1, len(frames)), self.pass_frames)
+ ]
+ return key, frames_transformed
 
  def process_batch(self, batch: list[Any]) -> dict[str, list[Any]]:
  df_batch_labels = self._get_dict_from_schema()
@@ -142,28 +154,21 @@ def process_batch(self, batch: list[Any]) -> dict[str, list[Any]]:
  for data in batch:
  key, frames = data
  with torch.no_grad():
- for i in range(self.pass_frames, len(frames), self.pass_frames):
- current_frame = frames[i - self.pass_frames]
- next_frame = frames[i]
+ for i in range(len(frames)-1):
+ current_frame = frames[i]
+ next_frame = frames[i+1]
 
- if (i - self.pass_frames) == 0:
+ if i == 0:
  current_frame_cuda = current_frame.to(self.device)
- next_frame_cuda = next_frame.to(self.device)
-
- _, flow = self.model(
- current_frame_cuda,
- next_frame_cuda,
- iters=20, test_mode=True
- )
  else:
  current_frame_cuda = next_frame_cuda
- next_frame_cuda = next_frame.to(self.device)
 
- _, flow = self.model(
- current_frame_cuda,
- next_frame_cuda,
- iters=20, test_mode=True
- )
+ next_frame_cuda = next_frame.to(self.device)
+ _, flow = self.model(
+ current_frame_cuda,
+ next_frame_cuda,
+ iters=self.raft_iters, test_mode=True
+ )
 
  flow = flow.detach().cpu().numpy()
  magnitude, angle = cv2.cartToPolar(flow[0][..., 0], flow[0][..., 1])