Spaces:

gauravbox
/

TalentLensAI

Sleeping

Johnny

updated resume_format > template, hide sidebar, download Spacy model with spacy_loader.py

102e49d 9 months ago

12.7 kB

	# pages/Format_Resume.py

	import os, sys, streamlit as st
	import json
	from io import BytesIO

	# Add parent directory to path so we can import utils
	sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

	# Force reload environment variables for Streamlit
	from dotenv import load_dotenv
	load_dotenv(override=True)

	from utils.hybrid_extractor import extract_resume_sections
	from utils.builder import build_resume_from_data
	from utils.parser import parse_resume # whatever parse_resume you already have

	# Path to your blank template (header/footer only)
	template_path = os.path.join(
	os.path.dirname(__file__), '..', 'templates', 'blank_resume.docx'
	)

	st.set_page_config(
	page_title='Resume Formatter',
	layout='centered',
	initial_sidebar_state="collapsed"
	)

	# Hide sidebar completely with CSS
	st.markdown("""
	<style>
	.css-1d391kg {display: none}
	.css-1rs6os {display: none}
	.css-17ziqus {display: none}
	[data-testid="stSidebar"] {display: none}
	[data-testid="collapsedControl"] {display: none}
	.css-1lcbmhc {display: none}
	.css-1outpf7 {display: none}
	.sidebar .sidebar-content {display: none}
	</style>
	""", unsafe_allow_html=True)

	# Home button at the top
	if st.button("🏠 Home", help="Return to main TalentLens.AI page"):
	st.switch_page("app.py")

	st.title('📄 Resume Formatter')
	st.markdown("---")

	uploaded = st.file_uploader('Upload Resume (PDF or DOCX)', type=['pdf','docx'])
	if not uploaded:
	st.info("Please upload a resume to get started.")

	# Show help information when no file is uploaded
	st.markdown("### 💡 How to Use Resume Formatter")
	st.markdown("""
	1. Upload your resume in PDF or DOCX format
	2. Review extracted data - our AI will parse your resume sections
	3. Edit if needed - make any corrections to the extracted information
	4. Generate formatted resume - download a professionally formatted version
	""")

	st.markdown("### ✨ Features")
	col1, col2 = st.columns(2)
	with col1:
	st.markdown("""
	🤖 AI-Powered Extraction:
	- OpenAI GPT-4o for highest accuracy
	- Hugging Face Cloud as backup
	- Regex fallback for reliability
	""")
	with col2:
	st.markdown("""
	📄 Professional Formatting:
	- Clean, modern design
	- Consistent layout
	- ATS-friendly format
	""")

	st.stop()

	st.success(f'Uploaded: {uploaded.name}')

	# 1) Extract raw text
	ext = uploaded.name.split('.')[-1].lower()
	resume_text = parse_resume(uploaded, ext)

	st.subheader('📄 Raw Resume Text')
	st.text_area(
	label='Raw Resume Text',
	value=resume_text,
	height=300,
	label_visibility='visible'
	)

	# 2) Parse into structured fields using improved hybrid approach
	st.subheader('🔍 Extracting Resume Data...')

	# Show extraction progress
	with st.spinner('Analyzing resume with AI models...'):
	# Use OpenAI as primary, HF Cloud as backup
	data = extract_resume_sections(
	resume_text,
	prefer_ai=True,
	use_openai=True, # Try OpenAI GPT-4o first (best results)
	use_hf_cloud=True # Fallback to HF Cloud (good backup)
	)

	# Show extraction success and method used
	from utils.hybrid_extractor import HybridResumeExtractor
	extractor = HybridResumeExtractor(prefer_ai=True, use_openai=True, use_hf_cloud=True)
	extractor.extract_sections(resume_text) # Just to get the method used
	stats = extractor.get_extraction_stats()

	method_used = stats.get('method_used', 'unknown')
	if method_used == 'openai_gpt4o':
	st.success('✅ Extracted using OpenAI GPT-4o (highest accuracy)')
	elif method_used == 'huggingface_cloud':
	st.info('ℹ️ Extracted using Hugging Face Cloud (good accuracy)')
	else:
	st.warning('⚠️ Used fallback extraction method')

	# Show extraction quality indicators
	name_found = bool(data.get('Name'))
	experiences_found = len(data.get('StructuredExperiences', []))
	skills_found = len(data.get('Skills', []))

	col1, col2, col3 = st.columns(3)
	with col1:
	st.metric("Name", "✅" if name_found else "❌", "Found" if name_found else "Missing")
	with col2:
	st.metric("Job Experiences", experiences_found, f"{experiences_found} positions")
	with col3:
	st.metric("Technical Skills", skills_found, f"{skills_found} skills")

	# 👇 TEMP – remove after test (show raw JSON for debugging)
	with st.expander("🔧 Debug: Raw Extraction Data"):
	import json, textwrap
	st.code(textwrap.indent(json.dumps(data, indent=2), " "), language="json")

	st.subheader('📋 Parsed Resume Sections')

	# Display sections in a more user-friendly way
	col1, col2 = st.columns(2)

	with col1:
	# Name and Summary
	st.markdown("👤 Personal Information")
	if data.get('Name'):
	st.write(f"Name: {data['Name']}")
	else:
	st.error("❌ Name not found")

	if data.get('Summary'):
	st.markdown("📝 Professional Summary:")
	st.write(data['Summary'])
	else:
	st.warning("⚠️ No professional summary found")

	# Education
	st.markdown("🎓 Education")
	education = data.get('Education', [])
	if education:
	for edu in education:
	st.write(f"• {edu}")
	else:
	st.warning("⚠️ No education information found")

	with col2:
	# Skills
	st.markdown("🛠️ Technical Skills")
	skills = data.get('Skills', [])
	if skills:
	# Show skills in a nice format
	skills_text = ", ".join(skills)
	st.write(skills_text)

	# Show skills quality
	company_names = [s for s in skills if any(word in s.lower() for word in ['abc', 'xyz', 'financial', 'insurance', 'solutions'])]
	if company_names:
	st.warning(f"⚠️ Found {len(company_names)} company names in skills (will be cleaned)")
	else:
	st.error("❌ No technical skills found")

	# Training/Certifications
	training = data.get('Training', [])
	if training:
	st.markdown("📜 Certifications/Training")
	for cert in training:
	st.write(f"• {cert}")

	# Work Experience (full width)
	st.markdown("💼 Professional Experience")
	experiences = data.get('StructuredExperiences', [])
	if experiences:
	for i, exp in enumerate(experiences, 1):
	with st.expander(f"Job {i}: {exp.get('title', 'Unknown Title')} at {exp.get('company', 'Unknown Company')}"):
	st.write(f"Position: {exp.get('title', 'N/A')}")
	st.write(f"Company: {exp.get('company', 'N/A')}")
	st.write(f"Duration: {exp.get('date_range', 'N/A')}")

	responsibilities = exp.get('responsibilities', [])
	if responsibilities:
	st.write("Key Responsibilities:")
	for resp in responsibilities:
	st.write(f"• {resp}")
	else:
	st.warning("⚠️ No responsibilities found for this position")
	else:
	st.error("❌ No work experience found")

	# Show editable sections for user to modify if needed
	st.subheader('✏️ Edit Extracted Data (Optional)')
	with st.expander("Click to edit extracted data before formatting"):
	for section, content in data.items():
	st.markdown(f"{section}:")

	# pure list of strings
	if isinstance(content, list) and all(isinstance(i, str) for i in content):
	edited_content = st.text_area(
	label=section,
	value="\n".join(content),
	height=100,
	label_visibility='collapsed',
	key=f"edit_{section}"
	)
	# Update data with edited content
	data[section] = [line.strip() for line in edited_content.split('\n') if line.strip()]

	# list of dicts → show as JSON (read-only for now)
	elif isinstance(content, list) and all(isinstance(i, dict) for i in content):
	st.json(content)

	# everything else (e.g. single string)
	else:
	edited_content = st.text_area(
	label=section,
	value=str(content),
	height=100,
	label_visibility='collapsed',
	key=f"edit_{section}_str"
	)
	# Update data with edited content
	data[section] = edited_content

	# 3) Build & download
	st.subheader('📄 Generate Formatted Resume')

	# Show what will be included in the formatted resume
	col1, col2, col3 = st.columns(3)
	with col1:
	st.metric("Sections to Include", len([k for k, v in data.items() if v]), "sections")
	with col2:
	total_content = sum(len(str(v)) for v in data.values() if v)
	st.metric("Content Length", f"{total_content:,}", "characters")
	with col3:
	quality_score = (
	(1 if data.get('Name') else 0) +
	(1 if data.get('Summary') else 0) +
	(1 if data.get('StructuredExperiences') else 0) +
	(1 if data.get('Skills') else 0)
	) * 25
	st.metric("Quality Score", f"{quality_score}%", "completeness")

	if st.button('📄 Generate Formatted Resume', type='primary'):
	try:
	with st.spinner('Building formatted resume...'):
	# Build the resume document
	doc = build_resume_from_data(template_path, data)

	# Save to buffer
	buf = BytesIO()
	doc.save(buf)
	buf.seek(0)

	st.success('✅ Resume formatted successfully!')

	# Show what was included
	st.info(f"""
	Formatted Resume Includes:
	• Name: {data.get('Name', 'Not found')}
	• Professional Summary: {'✅' if data.get('Summary') else '❌'}
	• Technical Skills: {len(data.get('Skills', []))} items
	• Work Experience: {len(data.get('StructuredExperiences', []))} positions
	• Education: {len(data.get('Education', []))} items
	""")

	# Generate filename with candidate name
	candidate_name = data.get('Name', 'Resume').replace(' ', '_')
	filename = f"{candidate_name}_Formatted_Resume.docx"

	st.download_button(
	'📥 Download Formatted Resume',
	data=buf,
	file_name=filename,
	mime='application/vnd.openxmlformats-officedocument.wordprocessingml.document',
	help=f"Download the formatted resume for {data.get('Name', 'candidate')}"
	)

	except Exception as e:
	st.error(f"❌ Error generating formatted resume: {str(e)}")
	st.info("💡 Try editing the extracted data above to fix any issues, or contact support if the problem persists.")

	# Add helpful tips
	with st.expander("💡 Tips for Better Results"):
	st.markdown("""
	For best extraction results:
	- Ensure your resume has clear section headers (e.g., "Professional Summary", "Technical Skills", "Work Experience")
	- Use consistent formatting for job entries (Title \| Company \| Dates)
	- List technical skills clearly, separated by commas
	- Include bullet points for job responsibilities

	If extraction isn't perfect:
	- Use the "Edit Extracted Data" section above to make corrections
	- The system will learn from different resume formats over time
	- OpenAI GPT-4o provides the most accurate extraction when available
	""")

	# Show extraction method info
	with st.expander("🔧 Extraction Method Details"):
	st.markdown(f"""
	Method Used: {method_used}

	Available Methods:
	- OpenAI GPT-4o: Highest accuracy, best for complex formats
	- Hugging Face Cloud: Good accuracy, reliable backup
	- Regex Fallback: Basic extraction, used when AI methods fail

	Current Status:
	- OpenAI Available: {'✅' if stats.get('ai_available') else '❌'}
	- AI Preferred: {'✅' if stats.get('prefer_ai') else '❌'}
	""")

	# Footer navigation and additional actions
	st.markdown("---")
	st.markdown("### 🚀 What's Next?")

	col1, col2, col3 = st.columns(3)

	with col1:
	if st.button("🏠 Return to Home", use_container_width=True):
	st.switch_page("app.py")

	with col2:
	if st.button("📄 Format Another Resume", use_container_width=True):
	st.rerun()

	with col3:
	st.markdown("Need Help?")
	st.markdown("Check the tips above or contact support")

	# Final footer
	st.markdown("---")
	st.markdown(
	"<div style='text-align: center; color: #666; padding: 20px;'>"
	"🚀 <strong>TalentLens.AI</strong> - Powered by AI for intelligent resume processing"
	"</div>",
	unsafe_allow_html=True
	)