Spaces:

KNipun
/

Whisper-AI-Psychiatric

Sleeping

App Files Files Community

KNipun commited on Aug 7

Commit

3cd4556

verified ·

1 Parent(s): d69e783

Update streamlit_app.py

Browse files

Files changed (1) hide show

streamlit_app.py +0 -14

streamlit_app.py CHANGED Viewed

@@ -346,7 +346,6 @@ def transcribe_audio(audio_data, whisper_model, whisper_processor):
             # Method 1: Try librosa
             try:
                 audio_array, sampling_rate = librosa.load(tmp_file_path, sr=16000, dtype=np.float32)
-                st.info("✅ Audio loaded with librosa")
             except Exception as e:
                 st.warning(f"Librosa failed: {e}")
@@ -358,7 +357,6 @@ def transcribe_audio(audio_data, whisper_model, whisper_processor):
                         sampling_rate = 16000
                     # Ensure float32 dtype
                     audio_array = audio_array.astype(np.float32)
-                    st.info("✅ Audio loaded with soundfile")
                 except Exception as e2:
                     st.warning(f"Soundfile failed: {e2}")
@@ -377,7 +375,6 @@ def transcribe_audio(audio_data, whisper_model, whisper_processor):
                         if sampling_rate != 16000:
                             audio_array = librosa.resample(audio_array, orig_sr=sampling_rate, target_sr=16000)
                             sampling_rate = 16000
-                        st.info("✅ Audio loaded with scipy.wavfile")
                     except Exception as e3:
                         st.error(f"All audio loading methods failed: {e3}")
                         # Clean up and return empty
@@ -422,20 +419,15 @@ def transcribe_audio(audio_data, whisper_model, whisper_processor):
             # Ensure float32 dtype
             audio_array = audio_array.astype(np.float32)
-            st.info(f"📊 Audio processed: {len(audio_array)/16000:.2f}s duration, shape: {audio_array.shape}")
         # Process audio with Whisper
         try:
-            st.info("🔄 Processing with Whisper...")
             # Try with language parameter first
             input_features = whisper_processor(
                 audio_array,
                 sampling_rate=16000,
                 return_tensors="pt"
             ).input_features
-            st.info("✅ Audio features extracted successfully")
         except Exception as proc_error:
             st.error(f"❌ Failed to process audio features: {proc_error}")
             return ""
@@ -449,7 +441,6 @@ def transcribe_audio(audio_data, whisper_model, whisper_processor):
         # Generate transcription with error handling
         try:
-            st.info("🔄 Generating transcription...")
             with torch.no_grad():
                 # Try simple generation first
                 predicted_ids = whisper_model.generate(
@@ -460,13 +451,11 @@ def transcribe_audio(audio_data, whisper_model, whisper_processor):
                     temperature=0.0,  # Deterministic
                     use_cache=True
                 )
-                st.info("✅ Transcription generated successfully")
         except RuntimeError as e:
             st.warning(f"⚠️ First attempt failed: {e}")
             if "dtype" in str(e).lower() or "float16" in str(e).lower():
                 try:
                     # Try forcing float32 for both input and model
-                    st.info("🔄 Retrying with float32...")
                     input_features = input_features.float()
                     if hasattr(whisper_model, 'float'):
                         whisper_model = whisper_model.float()
@@ -479,7 +468,6 @@ def transcribe_audio(audio_data, whisper_model, whisper_processor):
                             temperature=0.0,
                             use_cache=True
                         )
-                    st.info("✅ Transcription generated with float32")
                 except Exception as e2:
                     st.error(f"❌ Float32 attempt failed: {e2}")
                     return ""
@@ -499,7 +487,6 @@ def transcribe_audio(audio_data, whisper_model, whisper_processor):
                 st.warning("⚠️ Transcription is empty. The audio might be silent or unclear.")
                 return ""
-            st.success(f"✅ Transcription successful: '{transcription[:50]}{'...' if len(transcription) > 50 else ''}'")
             return transcription
         except Exception as decode_error:
@@ -1407,7 +1394,6 @@ def main():
                                 # Reset states if duplicate message
                                 st.session_state.waiting_for_input = True
                                 st.session_state.processing_complete = True
-                                st.warning("⚠️ This audio was already processed. Please record a new message.")
             except Exception:
                 # Fallback to file uploader

             # Method 1: Try librosa
             try:
                 audio_array, sampling_rate = librosa.load(tmp_file_path, sr=16000, dtype=np.float32)
             except Exception as e:
                 st.warning(f"Librosa failed: {e}")
                         sampling_rate = 16000
                     # Ensure float32 dtype
                     audio_array = audio_array.astype(np.float32)
                 except Exception as e2:
                     st.warning(f"Soundfile failed: {e2}")
                         if sampling_rate != 16000:
                             audio_array = librosa.resample(audio_array, orig_sr=sampling_rate, target_sr=16000)
                             sampling_rate = 16000
                     except Exception as e3:
                         st.error(f"All audio loading methods failed: {e3}")
                         # Clean up and return empty
             # Ensure float32 dtype
             audio_array = audio_array.astype(np.float32)
         # Process audio with Whisper
         try:
             # Try with language parameter first
             input_features = whisper_processor(
                 audio_array,
                 sampling_rate=16000,
                 return_tensors="pt"
             ).input_features
         except Exception as proc_error:
             st.error(f"❌ Failed to process audio features: {proc_error}")
             return ""
         # Generate transcription with error handling
         try:
             with torch.no_grad():
                 # Try simple generation first
                 predicted_ids = whisper_model.generate(
                     temperature=0.0,  # Deterministic
                     use_cache=True
                 )
         except RuntimeError as e:
             st.warning(f"⚠️ First attempt failed: {e}")
             if "dtype" in str(e).lower() or "float16" in str(e).lower():
                 try:
                     # Try forcing float32 for both input and model
                     input_features = input_features.float()
                     if hasattr(whisper_model, 'float'):
                         whisper_model = whisper_model.float()
                             temperature=0.0,
                             use_cache=True
                         )
                 except Exception as e2:
                     st.error(f"❌ Float32 attempt failed: {e2}")
                     return ""
                 st.warning("⚠️ Transcription is empty. The audio might be silent or unclear.")
                 return ""
             return transcription
         except Exception as decode_error:
                                 # Reset states if duplicate message
                                 st.session_state.waiting_for_input = True
                                 st.session_state.processing_complete = True
             except Exception:
                 # Fallback to file uploader