Spaces:
Sleeping
Sleeping
File size: 3,705 Bytes
ce05fe5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
from transformers import (
AutoTokenizer,
AutoModel,
AutoModelForSequenceClassification
)
import torch
# Initialize pipelines and tokenizers
@st.cache_resource
def load_components():
# Pipeline 1: Director analysis
director_tokenizer = AutoTokenizer.from_pretrained("huggingface-course/distilbert-base-uncased-finetuned-imdb")
director_model = AutoModelForSequenceClassification.from_pretrained("huggingface-course/distilbert-base-uncased-finetuned-imdb")
# Pipeline 2: Semantic similarity for movie recommendation
sim_tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-mpnet-base-v2")
sim_model = AutoModel.from_pretrained("sentence-transformers/all-mpnet-base-v2")
return {
"director": (director_tokenizer, director_model),
"similarity": (sim_tokenizer, sim_model)
}
components = load_components()
# Unpack components
director_tokenizer, director_model = components["director"]
sim_tokenizer, sim_model = components["similarity"]
# Genre mapping (translated)
genre_mapping = {"Action": 0, "Comedy": 1, "Sci-Fi": 2, "Adventure": 3}
# Sample database
movie_db = pd.DataFrame({
'Title': ['Avatar', 'Interstellar', 'Jurassic Park', 'Fast & Furious 7', 'Hi, Mom'],
'Genre': ['Sci-Fi', 'Sci-Fi', 'Adventure', 'Action', 'Comedy'],
'Budget (Billion USD)': [2.37, 1.65, 0.63, 1.9, 0.15],
'Box Office (Billion USD)': [2.92, 0.71, 1.10, 1.51, 0.83]
})
# Pipeline: Director quality analysis
def analyze_director(director):
inputs = director_tokenizer(director, return_tensors="pt")
with torch.no_grad():
outputs = director_model(**inputs)
scores = torch.sigmoid(outputs.logits)
return {
"Commercial Value": scores[0][0].item() * 10,
"Artistic Quality": scores[0][1].item() * 10
}
# Pipeline: Movie recommendation
def find_similar_movies(title, genre):
inputs = sim_tokenizer(title, padding=True, truncation=True, return_tensors="pt")
with torch.no_grad():
title_embed = sim_model(**inputs).last_hidden_state.mean(dim=1)
similarities = []
for _, row in movie_db.iterrows():
movie_inputs = sim_tokenizer(row['Title'], padding=True, truncation=True, return_tensors="pt")
with torch.no_grad():
movie_embed = sim_model(**movie_inputs).last_hidden_state.mean(dim=1)
sim = torch.cosine_similarity(title_embed, movie_embed)
similarities.append(sim.item())
movie_db['Similarity'] = similarities
return movie_db[movie_db['Genre'] == genre].sort_values('Similarity', ascending=False)
# Streamlit Interface
st.title("π¬ Movie Intelligence Dashboard")
with st.sidebar:
director = st.text_input("Director Name", "Christopher Nolan")
title = st.text_input("Movie Title", "Inception 2")
genre = st.selectbox("Genre", list(genre_mapping.keys()))
if st.button("Analyze"):
# Director analysis
st.header("π§βπΌ Director Profile")
director_scores = analyze_director(director)
fig = px.bar(
x=list(director_scores.keys()),
y=list(director_scores.values()),
range_y=[0, 10]
)
st.plotly_chart(fig)
# Movie recommendation
st.header("π Recommended Movies")
similar_movies = find_similar_movies(title, genre)
st.dataframe(
similar_movies[['Title', 'Genre', 'Budget (Billion USD)', 'Box Office (Billion USD)', 'Similarity']],
column_config={
"Similarity": st.column_config.ProgressColumn(
format="%.2f",
min_value=0,
max_value=1
)
}
)
|