File size: 3,705 Bytes
ce05fe5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
from transformers import (
    AutoTokenizer,
    AutoModel,
    AutoModelForSequenceClassification
)
import torch

# Initialize pipelines and tokenizers
@st.cache_resource
def load_components():
    # Pipeline 1: Director analysis
    director_tokenizer = AutoTokenizer.from_pretrained("huggingface-course/distilbert-base-uncased-finetuned-imdb")
    director_model = AutoModelForSequenceClassification.from_pretrained("huggingface-course/distilbert-base-uncased-finetuned-imdb")

    # Pipeline 2: Semantic similarity for movie recommendation
    sim_tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-mpnet-base-v2")
    sim_model = AutoModel.from_pretrained("sentence-transformers/all-mpnet-base-v2")

    return {
        "director": (director_tokenizer, director_model),
        "similarity": (sim_tokenizer, sim_model)
    }

components = load_components()

# Unpack components
director_tokenizer, director_model = components["director"]
sim_tokenizer, sim_model = components["similarity"]

# Genre mapping (translated)
genre_mapping = {"Action": 0, "Comedy": 1, "Sci-Fi": 2, "Adventure": 3}

# Sample database
movie_db = pd.DataFrame({
    'Title': ['Avatar', 'Interstellar', 'Jurassic Park', 'Fast & Furious 7', 'Hi, Mom'],
    'Genre': ['Sci-Fi', 'Sci-Fi', 'Adventure', 'Action', 'Comedy'],
    'Budget (Billion USD)': [2.37, 1.65, 0.63, 1.9, 0.15],
    'Box Office (Billion USD)': [2.92, 0.71, 1.10, 1.51, 0.83]
})

# Pipeline: Director quality analysis
def analyze_director(director):
    inputs = director_tokenizer(director, return_tensors="pt")
    with torch.no_grad():
        outputs = director_model(**inputs)
    scores = torch.sigmoid(outputs.logits)
    return {
        "Commercial Value": scores[0][0].item() * 10,
        "Artistic Quality": scores[0][1].item() * 10
    }

# Pipeline: Movie recommendation
def find_similar_movies(title, genre):
    inputs = sim_tokenizer(title, padding=True, truncation=True, return_tensors="pt")
    with torch.no_grad():
        title_embed = sim_model(**inputs).last_hidden_state.mean(dim=1)

    similarities = []
    for _, row in movie_db.iterrows():
        movie_inputs = sim_tokenizer(row['Title'], padding=True, truncation=True, return_tensors="pt")
        with torch.no_grad():
            movie_embed = sim_model(**movie_inputs).last_hidden_state.mean(dim=1)
        sim = torch.cosine_similarity(title_embed, movie_embed)
        similarities.append(sim.item())

    movie_db['Similarity'] = similarities
    return movie_db[movie_db['Genre'] == genre].sort_values('Similarity', ascending=False)

# Streamlit Interface
st.title("🎬 Movie Intelligence Dashboard")

with st.sidebar:
    director = st.text_input("Director Name", "Christopher Nolan")
    title = st.text_input("Movie Title", "Inception 2") 
    genre = st.selectbox("Genre", list(genre_mapping.keys()))

if st.button("Analyze"):
    # Director analysis
    st.header("πŸ§‘β€πŸ’Ό Director Profile")
    director_scores = analyze_director(director)
    fig = px.bar(
        x=list(director_scores.keys()),
        y=list(director_scores.values()),
        range_y=[0, 10]
    )
    st.plotly_chart(fig)

    # Movie recommendation
    st.header("🎞 Recommended Movies")
    similar_movies = find_similar_movies(title, genre)
    st.dataframe(
        similar_movies[['Title', 'Genre', 'Budget (Billion USD)', 'Box Office (Billion USD)', 'Similarity']],
        column_config={
            "Similarity": st.column_config.ProgressColumn(
                format="%.2f",
                min_value=0,
                max_value=1
            )
        }
    )