Spaces:

drift-ai
/

classify-rooms

Runtime error

+import torch
+import gradio as gr
+from transformers import AutoProcessor, AutoModel
+from pathlib import Path
+import numpy as np
+from decord import VideoReader
+import imageio
+FRAME_SAMPLING_RATE = 4
+DEFAULT_MODEL = "microsoft/xclip-base-patch16-zero-shot"
+processor = AutoProcessor.from_pretrained(DEFAULT_MODEL)
+model = AutoModel.from_pretrained(DEFAULT_MODEL)
+ROOMS = (
+    "bathroom,sauna,living room, bedroom,kitchen,toilet,hallway,dressing,attic,basement"
+)
+examples = [
+    [
+        "movies/bathroom.mp4",
+        ROOMS,
+    ],
+]
+def sample_frames_from_video_file(
+    file_path: str, num_frames: int = 16, frame_sampling_rate=1
+):
+    videoreader = VideoReader(file_path)
+    videoreader.seek(0)
+    # sample frames
+    start_idx = 0
+    end_idx = num_frames * frame_sampling_rate - 1
+    indices = np.linspace(start_idx, end_idx, num=num_frames, dtype=np.int64)
+    frames = videoreader.get_batch(indices).asnumpy()
+    return frames
+def get_num_total_frames(file_path: str):
+    videoreader = VideoReader(file_path)
+    videoreader.seek(0)
+    return len(videoreader)
+# def convert_frames_to_gif(frames, save_path: str = "frames.gif"):
+#     converted_frames = frames.astype(np.uint8)
+#     Path(save_path).parent.mkdir(parents=True, exist_ok=True)
+#     imageio.mimsave(save_path, converted_frames, fps=8)
+#     return save_path
+# def create_gif_from_video_file(
+#     file_path: str,
+#     num_frames: int = 16,
+#     frame_sampling_rate: int = 1,
+#     save_path: str = "frames.gif",
+# ):
+#     frames = sample_frames_from_video_file(file_path, num_frames, frame_sampling_rate)
+#     return convert_frames_to_gif(frames, save_path)
+def select_model(model_name):
+    global processor, model
+    processor = AutoProcessor.from_pretrained(model_name)
+    model = AutoModel.from_pretrained(model_name)
+def get_frame_sampling_rate(video_path, num_model_input_frames):
+    # rearrange sampling rate based on video length and model input length
+    num_total_frames = get_num_total_frames(video_path)
+    if num_total_frames < FRAME_SAMPLING_RATE * num_model_input_frames:
+        frame_sampling_rate = num_total_frames // num_model_input_frames
+    else:
+        frame_sampling_rate = FRAME_SAMPLING_RATE
+    return frame_sampling_rate
+def predict(video_path, labels_text):
+    labels = labels_text.split(",")
+    num_model_input_frames = model.config.vision_config.num_frames
+    frame_sampling_rate = get_frame_sampling_rate(video_path, num_model_input_frames)
+    frames = sample_frames_from_video_file(
+        video_path, num_model_input_frames, frame_sampling_rate
+    )
+    # gif_path = convert_frames_to_gif(frames, save_path="video.gif")
+    inputs = processor(
+        text=labels, videos=list(frames), return_tensors="pt", padding=True
+    )
+    # forward pass
+    with torch.no_grad():
+        outputs = model(**inputs)
+    probs = outputs.logits_per_video[0].softmax(dim=-1).cpu().numpy()
+    label_to_prob = {}
+    for ind, label in enumerate(labels):
+        label_to_prob[label] = float(probs[ind])
+    # return label_to_prob, gif_path
+    return label_to_prob
+app = gr.Blocks()
+with app:
+    gr.Markdown(
+        "# **<p align='center'>Classification of Rooms</p>**"
+    )
+    gr.Markdown(
+        "### **<p align='center'>Upload a video of a room and provide a list of type of rooms the model should select from.</p>**"
+    )
+    with gr.Row():
+        with gr.Column():
+            video_file = gr.Video(label="Video File:", show_label=True)
+            local_video_labels_text = gr.Textbox(
+                label="Labels Text:", show_label=True
+            )
+            submit_button = gr.Button(value="Predict")
+        # with gr.Column():
+        #     video_gif = gr.Image(
+        #         label="Input Clip",
+        #         show_label=True,
+        #     )
+        with gr.Column():
+            predictions = gr.Label(label="Predictions:", show_label=True)
+    gr.Markdown("**Examples:**")
+    # gr.Examples(
+    #     examples,
+    #     [video_file,local_video_labels_text],
+    #     [predictions, video_gif],
+    #     fn=predict,
+    #     cache_examples=True,
+    # )
+    submit_button.click(
+        predict,
+        inputs=[video_file, local_video_labels_text],
+        # outputs=[predictions, video_gif],
+        outputs=predictions,
+    )
+    # gr.Markdown(
+    #     """
+    #     \n Created by: Vincent Claes, <a href=\"https://www.meet-drift.ai/\">Drift</a>.
+    #     \n Inspired by: <a href=\"https://huggingface.co/spaces/fcakyon/zero-shot-video-classification\">fcakyon</a>.
+    #     """
+    # )
+app.launch()

movies/bathroom.mp4 ADDED Viewed

Binary file (615 kB). View file

movies/bedroom.mp4 ADDED Viewed

Binary file (106 kB). View file

movies/dressing.mp4 ADDED Viewed

Binary file (251 kB). View file

movies/home-office.mp4 ADDED Viewed

Binary file (336 kB). View file

movies/kitchen.mp4 ADDED Viewed

Binary file (317 kB). View file

movies/living-room.mp4 ADDED Viewed

Binary file (376 kB). View file

movies/toilet.mp4 ADDED Viewed

Binary file (215 kB). View file

poetry.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

pyproject.toml ADDED Viewed

	@@ -0,0 +1,27 @@

+[tool.poetry]
+name = "classify-rooms"
+version = "0.1.0"
+description = ""
+authors = ["Vincent Claes <[email protected]>"]
+readme = "README.md"
+[tool.poetry.dependencies]
+python = "^3.8"
+gradio = "^3.12.0"
+decord = "^0.6.0"
+torch = "^1.13.1"
+transformers = "^4.25.1"
+imageio = "^2.24.0"
+[tool.poetry.group.dev.dependencies]
+black = "^22.12.0"
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
+#gradio
+#torch
+#decord
+#pytube
+#imageio
+#transformers @ git+https://github.com/huggingface/transformers.git@799cea64ac1029d66e9e58f18bc6f47892270723

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+gradio
+torch
+decord
+imageio
+transformers @ git+https://github.com/huggingface/transformers.git@799cea64ac1029d66e9e58f18bc6f47892270723