import streamlit as st import pandas as pd import numpy as np import plotly.express as px from transformers import ( AutoTokenizer, AutoModel, AutoModelForSequenceClassification ) import torch # Initialize pipelines and tokenizers @st.cache_resource def load_components(): # Pipeline 1: Director analysis director_tokenizer = AutoTokenizer.from_pretrained("huggingface-course/distilbert-base-uncased-finetuned-imdb") director_model = AutoModelForSequenceClassification.from_pretrained("huggingface-course/distilbert-base-uncased-finetuned-imdb") # Pipeline 2: Semantic similarity for movie recommendation sim_tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-mpnet-base-v2") sim_model = AutoModel.from_pretrained("sentence-transformers/all-mpnet-base-v2") return { "director": (director_tokenizer, director_model), "similarity": (sim_tokenizer, sim_model) } components = load_components() # Unpack components director_tokenizer, director_model = components["director"] sim_tokenizer, sim_model = components["similarity"] # Genre mapping (translated) genre_mapping = {"Action": 0, "Comedy": 1, "Sci-Fi": 2, "Adventure": 3} # Sample database movie_db = pd.DataFrame({ 'Title': ['Avatar', 'Interstellar', 'Jurassic Park', 'Fast & Furious 7', 'Hi, Mom'], 'Genre': ['Sci-Fi', 'Sci-Fi', 'Adventure', 'Action', 'Comedy'], 'Budget (Billion USD)': [2.37, 1.65, 0.63, 1.9, 0.15], 'Box Office (Billion USD)': [2.92, 0.71, 1.10, 1.51, 0.83] }) # Pipeline: Director quality analysis def analyze_director(director): inputs = director_tokenizer(director, return_tensors="pt") with torch.no_grad(): outputs = director_model(**inputs) scores = torch.sigmoid(outputs.logits) return { "Commercial Value": scores[0][0].item() * 10, "Artistic Quality": scores[0][1].item() * 10 } # Pipeline: Movie recommendation def find_similar_movies(title, genre): inputs = sim_tokenizer(title, padding=True, truncation=True, return_tensors="pt") with torch.no_grad(): title_embed = sim_model(**inputs).last_hidden_state.mean(dim=1) similarities = [] for _, row in movie_db.iterrows(): movie_inputs = sim_tokenizer(row['Title'], padding=True, truncation=True, return_tensors="pt") with torch.no_grad(): movie_embed = sim_model(**movie_inputs).last_hidden_state.mean(dim=1) sim = torch.cosine_similarity(title_embed, movie_embed) similarities.append(sim.item()) movie_db['Similarity'] = similarities return movie_db[movie_db['Genre'] == genre].sort_values('Similarity', ascending=False) # Streamlit Interface st.title("🎬 Movie Intelligence Dashboard") with st.sidebar: director = st.text_input("Director Name", "Christopher Nolan") title = st.text_input("Movie Title", "Inception 2") genre = st.selectbox("Genre", list(genre_mapping.keys())) if st.button("Analyze"): # Director analysis st.header("🧑‍💼 Director Profile") director_scores = analyze_director(director) fig = px.bar( x=list(director_scores.keys()), y=list(director_scores.values()), range_y=[0, 10] ) st.plotly_chart(fig) # Movie recommendation st.header("🎞 Recommended Movies") similar_movies = find_similar_movies(title, genre) st.dataframe( similar_movies[['Title', 'Genre', 'Budget (Billion USD)', 'Box Office (Billion USD)', 'Similarity']], column_config={ "Similarity": st.column_config.ProgressColumn( format="%.2f", min_value=0, max_value=1 ) } )