Spaces:

Abdelrahmann121
/

ragchatbot

Sleeping

App Files Files Community

Abdelrahmann121 commited on Sep 12

Commit

f684524

verified ·

1 Parent(s): 88fb656

Update app.py

Browse files

Files changed (1) hide show

app.py +350 -118

app.py CHANGED Viewed

@@ -97,65 +97,152 @@ class AudioProcessor:
 # ============================
 def prepare_dataset():
-    """Load and prepare the emotion dataset"""
-    print("📊 Loading emotion dataset...")
-    # Load the dataset
-    ds = load_dataset("cardiffnlp/tweet_eval", "emotion")
-    # Define emotion labels (matching the dataset)
-    emotion_labels = ["anger", "joy", "optimism", "sadness"]
-    def clean_text(text):
-        """Clean and preprocess text"""
-        text = text.lower()
-        text = re.sub(r"http\S+", "", text)  # remove URLs
-        text = re.sub(r"[^\w\s]", "", text)  # remove special characters
-        text = re.sub(r"\d+", "", text)  # remove numbers
-        text = re.sub(r"\s+", " ", text)  # normalize whitespace
-        return text.strip()
-    # Sample and prepare training data
-    train_data = ds['train']
-    train_sample = random.sample(list(train_data), min(1000, len(train_data)))
-    # Convert to RAG format
-    rag_json = []
-    for row in train_sample:
-        cleaned_text = clean_text(row['text'])
-        if len(cleaned_text) > 10:  # Filter out very short texts
-            rag_json.append({
-                "text": cleaned_text,
-                "emotion": emotion_labels[row['label']],
-                "original_text": row['text']
-            })
-    print(f"Dataset prepared with {len(rag_json)} samples")
-    return rag_json
 # ============================
-# EMOTION DETECTION MODEL
 # ============================
 class EmotionDetector:
     def __init__(self):
-        self.model_name = "j-hartmann/emotion-english-distilroberta-base"
-        try:
-            self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
-            self.model = AutoModelForSequenceClassification.from_pretrained(self.model_name)
-            self.classifier = pipeline(
-                "text-classification",
-                model=self.model,
-                tokenizer=self.tokenizer,
-                return_all_scores=False
-            )
-        except Exception as e:
-            st.error(f"❌ Error loading emotion model: {e}")
-            raise
     def detect_emotion(self, text):
-        """Detect emotion from text"""
         try:
             result = self.classifier(text)
             emotion = result[0]['label'].lower()
@@ -165,14 +252,36 @@ class EmotionDetector:
             emotion_mapping = {
                 'anger': 'anger',
                 'disgust': 'sadness',
-                'neutral': 'neutral',
                 'joy': 'joy',
                 'love': 'joy',
                 'happiness': 'joy',
                 'sadness': 'sadness',
                 'fear': 'sadness',
                 'surprise': 'optimism',
-                'optimism': 'optimism'
             }
             mapped_emotion = emotion_mapping.get(emotion, 'optimism')
@@ -180,7 +289,75 @@ class EmotionDetector:
         except Exception as e:
             logger.error(f"Error in emotion detection: {e}")
             return 'optimism', 0.5
 # ============================
 # RAG SYSTEM WITH FAISS
@@ -190,80 +367,76 @@ class RAGSystem:
     """
     Retrieval-Augmented Generation (RAG) system for selecting text templates
     based on user input and detected emotion.
-    Workflow:
-    1. Initialization (__init__):
-       - Stores the input RAG data (list of entries with 'text' and 'emotion').
-       - Extracts all texts into `self.texts`.
-       - Loads a sentence embedding model (SentenceTransformer).
-       - Computes embeddings for all texts.
-       - Creates a FAISS index (L2 distance) for fast similarity search.
-    2. Template Retrieval (retrieve_templates):
-       - Takes `user_input`, `detected_emotion`, and optional `top_k`.
-       - Filters the RAG data to only include entries matching the detected emotion.
-         If none match, all entries are considered.
-       - Retrieves embeddings and texts for the filtered entries.
-       - Creates a temporary FAISS index for the filtered subset.
-       - Embeds the user input and searches the index for the most similar templates.
-       - Returns the top `top_k` matching templates as a list.
-    Key Points:
-    - Uses semantic similarity via sentence embeddings to find relevant templates.
-    - Prioritizes entries that match the detected emotion for more personalized responses.
-    - FAISS ensures efficient similarity search even with large datasets.
     """
     def __init__(self, rag_data):
         self.rag_data = rag_data
         self.texts = [entry['text'] for entry in rag_data]
-        # Initialize embedding model
-        self.embed_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
-        # Create embeddings
-        self.embeddings = self.embed_model.encode(
-            self.texts,
-            convert_to_numpy=True,
-            show_progress_bar=False
-        )
-        # Create FAISS index
-        dimension = self.embeddings.shape[1]
-        self.index = faiss.IndexFlatL2(dimension)
-        self.index.add(self.embeddings)
     def retrieve_templates(self, user_input, detected_emotion, top_k=3):
         """Retrieve relevant templates based on emotion and similarity"""
-        # Filter by emotion first
-        emotion_filtered_indices = [
-            i for i, entry in enumerate(self.rag_data)
-            if entry['emotion'] == detected_emotion
-        ]
-        if not emotion_filtered_indices:
-            emotion_filtered_indices = list(range(len(self.rag_data)))
-        # Get filtered embeddings
-        filtered_embeddings = self.embeddings[emotion_filtered_indices]
-        filtered_texts = [self.texts[i] for i in emotion_filtered_indices]
-        # Create temporary index for filtered data
-        temp_index = faiss.IndexFlatL2(filtered_embeddings.shape[1])
-        temp_index.add(filtered_embeddings)
-        # Search for similar templates
-        user_embedding = self.embed_model.encode([user_input], convert_to_numpy=True)
-        distances, indices = temp_index.search(
-            user_embedding,
-            min(top_k, len(filtered_texts))
-        )
-        # Top templates
-        top_templates = [filtered_texts[i] for i in indices[0]]
-        return top_templates
 # ============================
 # RESPONSE GENERATOR
@@ -303,20 +476,20 @@ class ResponseGenerator:
             ]
         }
     def generate_response(self, user_input, top_k=3):
         """Generate empathetic response using RAG and few-shot prompting"""
         try:
             # Step 1: Detect emotion
             detected_emotion, confidence = self.emotion_detector.detect_emotion(user_input)
-            # Step 2: Retrieve relevant templates
-            templates = self.rag_system.retrieve_templates(
-                user_input,
-                detected_emotion,
-                top_k=top_k
-            )
             # Step 3: Create response using templates and emotion
             base_responses = self.response_templates.get(
@@ -346,6 +519,65 @@ class ResponseGenerator:
             return error_msg + disclaimer, 'neutral', 0.0
 # ============================
 # STREAMLIT APP
 # ============================

 # ============================
 def prepare_dataset():
+    """Load and prepare the emotion dataset with error handling"""
+    try:
+        print("📊 Loading emotion dataset...")
+        # Load the dataset
+        ds = load_dataset("cardiffnlp/tweet_eval", "emotion")
+        # Define emotion labels (matching the dataset)
+        emotion_labels = ["anger", "joy", "optimism", "sadness"]
+        def clean_text(text):
+            """Clean and preprocess text"""
+            text = text.lower()
+            text = re.sub(r"http\S+", "", text)  # remove URLs
+            text = re.sub(r"[^\w\s]", "", text)  # remove special characters
+            text = re.sub(r"\d+", "", text)  # remove numbers
+            text = re.sub(r"\s+", " ", text)  # normalize whitespace
+            return text.strip()
+        # Sample and prepare training data
+        train_data = ds['train']
+        train_sample = random.sample(list(train_data), min(1000, len(train_data)))
+        # Convert to RAG format
+        rag_json = []
+        for row in train_sample:
+            cleaned_text = clean_text(row['text'])
+            if len(cleaned_text) > 10:  # Filter out very short texts
+                rag_json.append({
+                    "text": cleaned_text,
+                    "emotion": emotion_labels[row['label']],
+                    "original_text": row['text']
+                })
+        print(f"Dataset prepared with {len(rag_json)} samples")
+        return rag_json
+    except Exception as e:
+        print(f"Warning: Could not load dataset: {e}")
+        # Return minimal fallback dataset
+        return [
+            {"text": "feeling happy and excited", "emotion": "joy"},
+            {"text": "really angry and frustrated", "emotion": "anger"},
+            {"text": "sad and lonely today", "emotion": "sadness"},
+            {"text": "optimistic about the future", "emotion": "optimism"}
+        ]
 # ============================
+# FIXED EMOTION DETECTION MODEL
 # ============================
 class EmotionDetector:
     def __init__(self):
+        # Try multiple emotion models in order of preference
+        self.model_options = [
+            "j-hartmann/emotion-english-distilroberta-base",
+            "cardiffnlp/twitter-roberta-base-emotion-latest",
+            "nateraw/bert-base-uncased-emotion",
+            "michellejieli/emotion_text_classifier"
+        ]
+        self.model = None
+        self.tokenizer = None
+        self.classifier = None
+        # Try loading models in order
+        for model_name in self.model_options:
+            try:
+                st.info(f"🔄 Trying to load {model_name}...")
+                # Force download and load with specific parameters
+                self.tokenizer = AutoTokenizer.from_pretrained(
+                    model_name,
+                    force_download=False,
+                    resume_download=True
+                )
+                # Load model with specific device mapping to avoid meta tensor issues
+                self.model = AutoModelForSequenceClassification.from_pretrained(
+                    model_name,
+                    force_download=False,
+                    resume_download=True,
+                    device_map=None,  # Don't use device_map
+                    torch_dtype=torch.float32,  # Specify dtype explicitly
+                    low_cpu_mem_usage=False  # Disable low_cpu_mem_usage
+                )
+                # Move to CPU explicitly if needed
+                if torch.cuda.is_available():
+                    self.model = self.model.to('cpu')
+                self.classifier = pipeline(
+                    "text-classification",
+                    model=self.model,
+                    tokenizer=self.tokenizer,
+                    return_all_scores=False,
+                    device=-1  # Force CPU usage
+                )
+                st.success(f"✅ Successfully loaded {model_name}")
+                break
+            except Exception as e:
+                st.warning(f"⚠️ Failed to load {model_name}: {str(e)}")
+                continue
+        # Fallback to simple rule-based detection if all models fail
+        if self.classifier is None:
+            st.warning("⚠️ All emotion models failed. Using rule-based fallback.")
+            self.use_fallback = True
+        else:
+            self.use_fallback = False
+    def detect_emotion_fallback(self, text):
+        """Simple rule-based emotion detection as fallback"""
+        text_lower = text.lower()
+        # Define keyword patterns for emotions
+        emotion_keywords = {
+            'joy': ['happy', 'joy', 'excited', 'thrilled', 'wonderful', 'amazing', 'great', 'fantastic', 'love', 'awesome'],
+            'anger': ['angry', 'mad', 'furious', 'annoyed', 'frustrated', 'irritated', 'hate', 'terrible', 'awful'],
+            'sadness': ['sad', 'depressed', 'upset', 'down', 'lonely', 'miserable', 'disappointed', 'heartbroken'],
+            'optimism': ['hope', 'optimistic', 'positive', 'confident', 'believe', 'future', 'better', 'improve']
+        }
+        # Count keyword matches
+        emotion_scores = {}
+        for emotion, keywords in emotion_keywords.items():
+            score = sum(1 for keyword in keywords if keyword in text_lower)
+            emotion_scores[emotion] = score
+        # Get emotion with highest score
+        if max(emotion_scores.values()) > 0:
+            detected_emotion = max(emotion_scores, key=emotion_scores.get)
+            confidence = min(emotion_scores[detected_emotion] * 0.3 + 0.4, 0.9)  # Scale confidence
+        else:
+            detected_emotion = 'optimism'  # Default
+            confidence = 0.5
+        return detected_emotion, confidence
     def detect_emotion(self, text):
+        """Detect emotion from text with fallback"""
+        if self.use_fallback or not text.strip():
+            return self.detect_emotion_fallback(text)
         try:
             result = self.classifier(text)
             emotion = result[0]['label'].lower()
             emotion_mapping = {
                 'anger': 'anger',
                 'disgust': 'sadness',
+                'neutral': 'optimism',
                 'joy': 'joy',
                 'love': 'joy',
                 'happiness': 'joy',
                 'sadness': 'sadness',
                 'fear': 'sadness',
                 'surprise': 'optimism',
+                'optimism': 'optimism',
+                # Additional mappings for different model outputs
+                'positive': 'joy',
+                'negative': 'sadness',
+                'admiration': 'joy',
+                'amusement': 'joy',
+                'annoyance': 'anger',
+                'approval': 'optimism',
+                'caring': 'joy',
+                'confusion': 'sadness',
+                'curiosity': 'optimism',
+                'desire': 'optimism',
+                'disappointment': 'sadness',
+                'disapproval': 'anger',
+                'embarrassment': 'sadness',
+                'excitement': 'joy',
+                'gratitude': 'joy',
+                'grief': 'sadness',
+                'nervousness': 'sadness',
+                'pride': 'joy',
+                'realization': 'optimism',
+                'relief': 'joy',
+                'remorse': 'sadness'
             }
             mapped_emotion = emotion_mapping.get(emotion, 'optimism')
         except Exception as e:
             logger.error(f"Error in emotion detection: {e}")
+            # Fall back to rule-based detection
+            return self.detect_emotion_fallback(text)
+# ============================
+# LIGHTWEIGHT EMOTION DETECTOR (ALTERNATIVE)
+# ============================
+class LightweightEmotionDetector:
+    """A simple, reliable emotion detector that doesn't rely on heavy models"""
+    def __init__(self):
+        # Enhanced keyword-based emotion detection
+        self.emotion_patterns = {
+            'joy': {
+                'keywords': ['happy', 'joy', 'joyful', 'excited', 'thrilled', 'wonderful', 'amazing', 'great', 'fantastic',
+                           'love', 'awesome', 'brilliant', 'perfect', 'delighted', 'cheerful', 'elated', 'glad', 'pleased'],
+                'phrases': ['feel good', 'so happy', 'really excited', 'love it', 'makes me happy', 'feeling great']
+            },
+            'anger': {
+                'keywords': ['angry', 'mad', 'furious', 'annoyed', 'frustrated', 'irritated', 'hate', 'terrible', 'awful',
+                           'disgusting', 'outraged', 'livid', 'enraged', 'pissed', 'infuriated', 'resentful'],
+                'phrases': ['so angry', 'really mad', 'hate it', 'makes me angry', 'fed up', 'sick of']
+            },
+            'sadness': {
+                'keywords': ['sad', 'depressed', 'upset', 'down', 'lonely', 'miserable', 'disappointed', 'heartbroken',
+                           'devastated', 'hopeless', 'melancholy', 'sorrowful', 'dejected', 'despondent', 'gloomy'],
+                'phrases': ['feel sad', 'so down', 'really upset', 'makes me sad', 'feeling low', 'broken hearted']
+            },
+            'optimism': {
+                'keywords': ['hope', 'hopeful', 'optimistic', 'positive', 'confident', 'believe', 'future', 'better',
+                           'improve', 'progress', 'opportunity', 'potential', 'bright', 'promising', 'encouraging'],
+                'phrases': ['looking forward', 'things will get better', 'positive about', 'have hope', 'bright future']
+            }
+        }
+    def detect_emotion(self, text):
+        """Detect emotion using enhanced pattern matching"""
+        if not text.strip():
             return 'optimism', 0.5
+        text_lower = text.lower()
+        emotion_scores = {emotion: 0 for emotion in self.emotion_patterns.keys()}
+        # Score based on keywords and phrases
+        for emotion, patterns in self.emotion_patterns.items():
+            # Keyword matching
+            for keyword in patterns['keywords']:
+                if keyword in text_lower:
+                    emotion_scores[emotion] += 1
+            # Phrase matching (higher weight)
+            for phrase in patterns['phrases']:
+                if phrase in text_lower:
+                    emotion_scores[emotion] += 2
+        # Intensity modifiers
+        intensifiers = ['very', 'really', 'extremely', 'so', 'absolutely', 'totally', 'completely']
+        intensity_boost = sum(1 for word in intensifiers if word in text_lower) * 0.5
+        # Get the emotion with highest score
+        if max(emotion_scores.values()) > 0:
+            detected_emotion = max(emotion_scores, key=emotion_scores.get)
+            base_confidence = min(emotion_scores[detected_emotion] * 0.2 + 0.5, 0.95)
+            confidence = min(base_confidence + intensity_boost * 0.1, 0.98)
+        else:
+            detected_emotion = 'optimism'  # Default to optimism
+            confidence = 0.6
+        return detected_emotion, confidence
 # ============================
 # RAG SYSTEM WITH FAISS
     """
     Retrieval-Augmented Generation (RAG) system for selecting text templates
     based on user input and detected emotion.
     """
     def __init__(self, rag_data):
         self.rag_data = rag_data
         self.texts = [entry['text'] for entry in rag_data]
+        if len(self.texts) == 0:
+            st.warning("⚠️ No RAG data available. Using simple responses.")
+            self.embed_model = None
+            self.embeddings = None
+            self.index = None
+            return
+        try:
+            # Initialize embedding model
+            self.embed_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
+            # Create embeddings
+            self.embeddings = self.embed_model.encode(
+                self.texts,
+                convert_to_numpy=True,
+                show_progress_bar=False
+            )
+            # Create FAISS index
+            dimension = self.embeddings.shape[1]
+            self.index = faiss.IndexFlatL2(dimension)
+            self.index.add(self.embeddings)
+        except Exception as e:
+            st.warning(f"⚠️ Could not initialize RAG system: {e}")
+            self.embed_model = None
+            self.embeddings = None
+            self.index = None
     def retrieve_templates(self, user_input, detected_emotion, top_k=3):
         """Retrieve relevant templates based on emotion and similarity"""
+        if not self.embed_model or not self.index:
+            return []
+        try:
+            # Filter by emotion first
+            emotion_filtered_indices = [
+                i for i, entry in enumerate(self.rag_data)
+                if entry['emotion'] == detected_emotion
+            ]
+            if not emotion_filtered_indices:
+                emotion_filtered_indices = list(range(len(self.rag_data)))
+            # Get filtered embeddings
+            filtered_embeddings = self.embeddings[emotion_filtered_indices]
+            filtered_texts = [self.texts[i] for i in emotion_filtered_indices]
+            # Create temporary index for filtered data
+            temp_index = faiss.IndexFlatL2(filtered_embeddings.shape[1])
+            temp_index.add(filtered_embeddings)
+            # Search for similar templates
+            user_embedding = self.embed_model.encode([user_input], convert_to_numpy=True)
+            distances, indices = temp_index.search(
+                user_embedding,
+                min(top_k, len(filtered_texts))
+            )
+            # Top templates
+            top_templates = [filtered_texts[i] for i in indices[0]]
+            return top_templates
+        except Exception as e:
+            logger.error(f"Error in template retrieval: {e}")
+            return []
 # ============================
 # RESPONSE GENERATOR
             ]
         }
     def generate_response(self, user_input, top_k=3):
         """Generate empathetic response using RAG and few-shot prompting"""
         try:
             # Step 1: Detect emotion
             detected_emotion, confidence = self.emotion_detector.detect_emotion(user_input)
+            # Step 2: Retrieve relevant templates (if RAG is available)
+            templates = []
+            if self.rag_system and self.rag_system.embed_model:
+                templates = self.rag_system.retrieve_templates(
+                    user_input,
+                    detected_emotion,
+                    top_k=top_k
+                )
             # Step 3: Create response using templates and emotion
             base_responses = self.response_templates.get(
             return error_msg + disclaimer, 'neutral', 0.0
 # ============================
+# SIMPLE RESPONSE GENERATOR (FALLBACK)
+# ============================
+class SimpleResponseGenerator:
+    """Simplified response generator that works without RAG"""
+    def __init__(self, emotion_detector):
+        self.emotion_detector = emotion_detector
+        # Enhanced response templates
+        self.response_templates = {
+            'anger': [
+                "I can understand why you're feeling frustrated. It's completely valid to feel this way. Sometimes situations can be really challenging, and it's important to acknowledge these feelings.",
+                "Your anger is understandable. When things don't go as expected, it's natural to feel upset. Would you like to talk about what's causing these feelings?",
+                "I hear that you're upset, and that's okay. These feelings are important and deserve attention. Take a moment to breathe if you need it."
+            ],
+            'sadness': [
+                "I'm sorry you're going through a difficult time. Your feelings are valid, and it's okay to feel sad sometimes. Remember that this feeling will pass.",
+                "It sounds like you're dealing with something really tough right now. I want you to know that it's perfectly normal to feel this way, and you're not alone.",
+                "I can sense your sadness, and I want you to know that it's okay to feel this way. Sometimes life presents us with challenges that naturally make us feel down."
+            ],
+            'joy': [
+                "I'm so happy to hear about your positive experience! That's wonderful, and your joy is really uplifting. It's great when life gives us these beautiful moments.",
+                "Your joy is contagious! It's amazing to hear such positive news. These happy moments are precious and worth celebrating.",
+                "I love hearing about things that make you happy. That sounds absolutely amazing! Your enthusiasm is really inspiring."
+            ],
+            'optimism': [
+                "Your positive outlook is truly inspiring. That's such a great way to look at things, and your hopefulness is really encouraging.",
+                "I appreciate your hopeful perspective. That kind of optimism can make such a difference, not just for you but for others around you too.",
+                "It's wonderful to hear your optimistic thoughts. Keep that positive energy flowing - it's a powerful force for good!"
+            ]
+        }
+    def generate_response(self, user_input, top_k=3):
+        """Generate response without RAG system"""
+        try:
+            # Detect emotion
+            detected_emotion, confidence = self.emotion_detector.detect_emotion(user_input)
+            # Get appropriate response template
+            templates = self.response_templates.get(detected_emotion, self.response_templates['optimism'])
+            selected_response = random.choice(templates)
+            # Add personalized touch based on input length and content
+            if len(user_input) > 100:
+                selected_response += " I can see you've shared quite a bit with me, and I appreciate your openness."
+            elif any(word in user_input.lower() for word in ['help', 'advice', 'what should']):
+                selected_response += " If you'd like to talk more about this, I'm here to listen."
+            # Add disclaimer
+            disclaimer = "\n\n⚠️ This is an automated response. For serious emotional concerns, please consult a mental health professional."
+            return selected_response + disclaimer, detected_emotion, confidence
+        except Exception as e:
+            error_msg = f"I apologize, but I encountered an error: {str(e)}"
+            disclaimer = "\n\n⚠️ This is an automated response. Please consult a professional if needed."
+            return error_msg + disclaimer, 'optimism', 0.0
+# ============================
 # STREAMLIT APP
 # ============================