Spaces:
Sleeping
Sleeping
| # MODIFIED: Removed all runtime installation code. Dependencies are now in requirements.txt | |
| import subprocess | |
| import sys | |
| import os | |
| import re | |
| import time | |
| import json | |
| from datetime import datetime | |
| import tempfile | |
| import pandas as pd | |
| import gradio as gr | |
| from openai import OpenAI | |
| import requests | |
| from urllib.parse import quote_plus | |
| from bs4 import BeautifulSoup | |
| from docx import Document | |
| from docx.shared import Inches, Pt | |
| from docx.enum.style import WD_STYLE_TYPE | |
| from docx.enum.text import WD_ALIGN_PARAGRAPH | |
| from docx.oxml import OxmlElement | |
| from docx.oxml.ns import qn | |
| from itertools import cycle | |
| from serpapi import GoogleSearch | |
| import spacy # MODIFIED: Directly import spacy | |
| # MODIFIED: Load the spacy model directly, as it's installed via requirements.txt | |
| nlp = spacy.load("en_core_web_sm") | |
| # ======================== CONFIGURE OPENAI & SERPAPI ======================== | |
| # MODIFIED: Get API keys from Hugging Face Secrets and add error handling | |
| openai_api_key = os.environ.get("OPENAI_API_KEY") | |
| serpapi_key = os.environ.get("SERPAPI_KEY") | |
| if not openai_api_key: | |
| raise ValueError("π΄ ERROR: The OPENAI_API_KEY secret is not set in your Hugging Face Space settings!") | |
| if not serpapi_key: | |
| raise ValueError("π΄ ERROR: The SERPAPI_KEY secret is not set in your Hugging Face Space settings!") | |
| client = OpenAI(api_key=openai_api_key) | |
| SERPAPI_KEY = serpapi_key | |
| # ======================== BLOG CONTENT TYPES ======================== | |
| BLOG_TYPES = { | |
| "How-To Guide π": "step-by-step tutorials", | |
| "Listicle π": "list-based articles", | |
| "Comparison Post βοΈ": "versus comparisons", | |
| "Case Study π": "success stories", | |
| "Industry Trends π": "market analysis", | |
| "Founder Post π‘": "opinion pieces", | |
| "Templates and Toolkits": "downloadable resources", | |
| } | |
| # ======================== NEW: LISTICLE ASPECTS FOR DIVERSE CONTENT ======================== | |
| LISTICLE_ASPECTS = [ | |
| "Technology & Automation (e.g., AI in recruiting, new software)", | |
| "Company Culture & Employee Well-being (e.g., asynchronous work, burnout prevention)", | |
| "Financial Strategy & Compensation (e.g., global pay scales, benefits packages)", | |
| "Global Talent Sourcing & Emerging Markets (e.g., hiring from new regions)", | |
| "Data Analytics & Performance Metrics (e.g., using data to track remote productivity)", | |
| "Legal, Compliance & Security (e.g., cross-border regulations, data privacy)", | |
| "Diversity, Equity, and Inclusion (DEI) in Remote Teams", | |
| "Skill Development & Virtual Onboarding (e.g., upskilling a remote workforce)", | |
| "Collaboration Tools & Digital Workspaces (e.g., the new generation of tools beyond Zoom)", | |
| "Strategic Workforce Planning (e.g., hybrid models, contractor vs. full-time mix)" | |
| ] | |
| # ======================== BLOG STRUCTURES (REFINED PROMPTS) ======================== | |
| faq_prompt = """ | |
| You are an expert on the given blog topic. Your task is to generate 3 to 4 highly relevant frequently asked questions and their concise answers. | |
| **CRITICAL INSTRUCTIONS:** | |
| 1. Generate **at least 3** complete Q&A pairs. | |
| 2. Each question MUST start with "Q." and be on its own line. | |
| 3. Each answer MUST start with "A." and be on its own line, immediately following its question. | |
| 4. There should be a blank line between each Q&A pair. | |
| 5. Do NOT include any other text, introductions, or explanations. Only provide the Q&A pairs. | |
| **EXAMPLE of PERFECT FORMAT:** | |
| Q. What is the main benefit of using this service? | |
| A. The main benefit is a significant reduction in operational costs, often by up to 60%, without sacrificing quality. | |
| Q. Who is this service for? | |
| A. This service is primarily for HR managers at tech startups and global recruitment agencies looking to scale their teams efficiently. | |
| **YOUR TASK:** | |
| Now, generate the FAQs for the current blog topic following these rules and the example format exactly. | |
| """ | |
| BLOG_STRUCTURES = { | |
| # ... (Your entire BLOG_STRUCTURES dictionary remains here, unchanged) | |
| "How-To Guide": [ | |
| ("Intro Hook", "Problem Hook", 40, 60, "Hook the reader by presenting a common, relatable problem they're struggling with. Frame it as a challenge that many in their position face."), | |
| ("Intro Hook", "Promise the Outcome", 20, 30, "In one single, powerful sentence, promise the specific, tangible skill the reader will have after finishing this guide. Be extremely concise. Example: 'By the end of this guide, you will be able to build a high-performing global team from scratch.'"), | |
| ("Table of Contents", "Jump Links", 10, 20, "This is a placeholder for the auto-generated Table of Contents."), | |
| ("Author Details", "Author Byline", 15, 25, "Write a brief, authoritative byline for the 'Digiworks Team,' highlighting expertise in the topic."), | |
| ("Author Details", "Publication Date", 5, 10, "Generate the publication and last updated dates. Format: Published: [Date] | Last updated: [Date]"), | |
| ("Author Details", "Social Proof Badges", 10, 20, "Provide short social proof, like 'Trusted by 10,000+ businesses' or 'As featured in Forbes.'"), | |
| ("Context & Why It Matters", "What Is [Topic]?", 120, 180, "Define the main topic in the context of a business audience. Explain what it is and its relevance to them, linking to a pillar post if applicable."), | |
| ("Context & Why It Matters", "Why [Topic] Matters", 120, 180, "Explain the strategic importance of the topic. Use compelling statistics (e.g., 'boosts productivity by 30%') to show tangible benefits like saving time, reducing costs, or improving efficiency."), | |
| ("Prerequisites", "What You'll Need Checklist", 80, 120, "Create a bulleted checklist of necessary tools, accounts, or knowledge. Use checkmark icons (β) and briefly explain why each item is needed."), | |
| ("Step-by-Step Guide", "Step 1: [Actionable Step Title]", 100, 150, "Start with a clear, action-oriented title. Explain why this step is crucial. Provide 1-3 clear actions. Include a 'Pro Tip:' or a reference to a visual aid (e.g., 'See the dashboard screenshot below')."), | |
| ("Step-by-Step Guide", "Step 2: [Actionable Step Title]", 100, 150, "Write a clear, action-oriented title for the second step. Explain its importance. List 1-3 actions. Include a practical tip or shortcut."), | |
| ("Step-by-Step Guide", "Step 3: [Actionable Step Title]", 100, 150, "Create an action-oriented title for the third step. Explain why it matters. List 1-3 actions. Add a helpful tip or visual cue description."), | |
| ("Step-by-Step Guide", "Step 4: [Actionable Step Title]", 100, 150, "Formulate an action-oriented title for the fourth step. Explain its significance. List 1-3 actions. Provide a useful tip."), | |
| ("Step-by-Step Guide", "Step 5: [Actionable Step Title]", 100, 150, "Develop an action-oriented title for the final step. Explain why this step is important for success. List 1-3 actions. End with an expert tip."), | |
| ("Pro Tips & Common Pitfalls", "Pro Tips from Digiworks Experts", 80, 120, "List 3-5 'power user' tips in a bulleted format. These should be advanced, actionable insights that help readers achieve maximum efficiency."), | |
| ("Pro Tips & Common Pitfalls", "Common Mistakes to Avoid", 60, 100, "Create a bulleted list of 3 common mistakes related to the topic. For each mistake, briefly explain how to avoid it."), | |
| ("Real-World Examples", "Mini Case Study", 100, 150, "Tell a brief story about how a company (e.g., 'a fast-growing tech startup') used this method to achieve a specific, measurable result (e.g., 'increased efficiency by 35%')."), | |
| ("Real-World Examples", "Reader Example", 60, 100, "Write a paragraph that directly addresses the reader, inviting them to share their own success stories or experiences in the comments to build community engagement."), | |
| ("Advanced Techniques", "Going Deeper", 100, 150, "Offer an optional advanced strategy for readers who want to learn more. Link to other internal or external guides for further reading."), | |
| ("Measuring Success", "Key Metrics to Track", 60, 100, "List the key metrics readers should track to measure their success (e.g., time to hire, ROI, engagement rate). Explain why these metrics are important."), | |
| ("Measuring Success", "Recommended Tools", 60, 100, "Recommend 2-3 specific tools (e.g., Google Analytics, Digiworks' dashboards) for monitoring performance and tracking the metrics mentioned above."), | |
| ("Frequently Asked Questions", "FAQs", 40, 80, faq_prompt), | |
| ("Conclusion & Next Steps", "Recap", 40, 60, "Summarize the key takeaways in 2-3 concise bullet points, reinforcing what the reader has learned."), | |
| ("Conclusion & Next Steps", "Primary Call-to-Action", 40, 60, "Write a strong, action-oriented call-to-action that encourages the reader to engage with Digiworks' main service (e.g., 'Hire Vetted Remote Talent')."), | |
| ("Conclusion & Next Steps", "Secondary Call-to-Action", 30, 50, "Provide a secondary, content-focused CTA to keep readers engaged, such as 'Download our free checklist' or 'Read our advanced guide on [related topic].'"), | |
| ], | |
| "Listicle": [ | |
| ("Above-the-Fold Essentials", "Intro Hook (Pain Point)", 40, 60, "Hook the reader with a common frustration, like 'Finding the right [tool/tactic] to scale your business can feel impossible...'"), | |
| ("Above-the-Fold Essentials", "Intro Hook (Promise)", 20, 40, "Promise a clear solution: 'We've narrowed down the [X] best options so you can focus on growth.'"), | |
| ("Above-the-Fold Essentials", "Quick Stats", 20, 40, "Provide 1-2 compelling data points that underscore the importance of the list (e.g., '84% of successful businesses use at least 3 of these tools')."), | |
| ("Above-the-Fold Essentials", "Trust Signals", 20, 40, "Build credibility with a trust-building statement like 'From the experts at Digiworks...' and include publication/update dates."), | |
| ("Above-the-Fold Essentials", "Table of Contents", 10, 20, "This is a placeholder for the auto-generated Table of Contents."), | |
| ("How We Chose These", "Criteria Overview", 80, 120, "Explain the selection methodology in 3-5 bullet points (e.g., cost-effectiveness, scalability, ease of use). This builds transparency."), | |
| ("How We Chose These", "Scoring System", 60, 100, "Briefly explain the scoring rubric if items are scored (e.g., a five-star system for 'Value' or 'Ease of Use')."), | |
| ("The List", "Item 1: [Name]", 80, 150, "For this list item, explain the concept clearly. Use a relatable example or a mini-story to illustrate the point. Provide a tangible, actionable takeaway for the reader."), | |
| ("The List", "Item 2: [Name]", 80, 150, "For this list item, explain the concept clearly. Use a relatable example or a mini-story to illustrate the point. Provide a tangible, actionable takeaway for the reader."), | |
| ("The List", "Item 3: [Name]", 80, 150, "For this list item, explain the concept clearly. Use a relatable example or a mini-story to illustrate the point. Provide a tangible, actionable takeaway for the reader."), | |
| ("The List", "Item 4: [Name]", 80, 150, "For this list item, explain the concept clearly. Use a relatable example or a mini-story to illustrate the point. Provide a tangible, actionable takeaway for the reader."), | |
| ("The List", "Item 5: [Name]", 80, 150, "For this list item, explain the concept clearly. Use a relatable example or a mini-story to illustrate the point. Provide a tangible, actionable takeaway for the reader."), | |
| ("The List", "Item 6: [Name]", 80, 150, "For this list item, explain the concept clearly. Use a relatable example or a mini-story to illustrate the point. Provide a tangible, actionable takeaway for the reader."), | |
| ("The List", "Item 7: [Name]", 80, 150, "For this list item, explain the concept clearly. Use a relatable example or a mini-story to illustrate the point. Provide a tangible, actionable takeaway for the reader."), | |
| ("The List", "Item 8: [Name]", 80, 150, "For this list item, explain the concept clearly. Use a relatable example or a mini-story to illustrate the point. Provide a tangible, actionable takeaway for the reader."), | |
| ("The List", "Item 9: [Name]", 80, 150, "For this list item, explain the concept clearly. Use a relatable example or a mini-story to illustrate the point. Provide a tangible, actionable takeaway for the reader."), | |
| ("The List", "Item 10: [Name]", 80, 150, "For this list item, explain the concept clearly. Use a relatable example or a mini-story to illustrate the point. Provide a tangible, actionable takeaway for the reader."), | |
| ("Comparison Table", "At-a-Glance Comparison", 30, 50, "Write a brief placeholder text. The final summary table will be automatically generated by AI during the Word document creation phase based on the list items above."), | |
| ("In-Depth Picks", "Editor's Choice: Top 3 Deep Dive", 100, 150, "Write brief 3-4 sentence mini-reviews of the top 3 items from the list. Include specific pros and cons for each of these top picks."), | |
| ("Frequently Asked Questions", "FAQs", 40, 80, faq_prompt), | |
| ("Conclusion & Next Steps", "Recap", 40, 60, "Briefly recap the key takeaway of the list, reinforcing the value provided to the reader."), | |
| ("Conclusion & Next Steps", "Primary Call-to-Action", 40, 60, "Write a primary CTA that connects the listicle's topic to Digiworks' service, e.g., 'Ready to delegate these tasks? Hire an expert from Digiworks.'"), | |
| ("Conclusion & Next Steps", "Secondary Call-to-Action", 30, 50, "Encourage community interaction, asking readers to comment with their favorite item or share their own recommendations."), | |
| ], | |
| "Comparison Post": [ | |
| ("Above-the-Fold Essentials", "Intro Hook (Use Case)", 40, 60, "Start with a common dilemma: 'Choosing between [Option A] and [Option B] for your business? Here's the definitive breakdown.'"), | |
| ("Above-the-Fold Essentials", "Intro Hook (Preview Verdict)", 20, 40, "Give a sneak peek of the conclusion to keep readers engaged: 'Read on to see which wins on pricing, features, and overall value.'"), | |
| ("Above-the-Fold Essentials", "Quick Verdict Bar", 20, 40, "Include a one-sentence 'Overall Winner' callout right at the top for skimmers."), | |
| ("Above-the-Fold Essentials", "Trust Signals", 20, 40, "Establish authority with a statement like 'An unbiased breakdown from the experts at Digiworks...' and include publication dates."), | |
| ("Above-the-Fold Essentials", "Jump Links", 10, 20, "This is a placeholder for the auto-generated Table of Contents."), | |
| ("Comparison Summary Table", "At-a-Glance Matrix", 100, 200, r""" | |
| Create a **clean markdown table** comparing [Option A] and [Option B]. | |
| **Rules for the table:** | |
| 1. Start with a header row: `| Metric | [Option A Title] vs. [Option B Title] |` | |
| 2. Follow with a separator: `|---|---|` | |
| 3. Each comparison metric gets **its own row** in this format: | |
| `| **Metric Name** | [Option A details] \| [Option B details] |` | |
| 4. The `\|` is crucial to separate the details for Option A and B within the second column. | |
| **Example Output:** | |
| ``` | |
| | Metric | Local Hiring vs. Global Remote Talent | | |
| |---|---| | |
| | **Pricing** | High overheads ($50/hr) \| Up to 60% savings ($20/hr) | | |
| | **Scalability** | Limited by local talent pool \| Access to global talent | | |
| ``` | |
| """), | |
| ("How We Evaluated", "Criteria & Weighting", 80, 120, "List the evaluation criteria (e.g., Pricing, Features, Talent Vetting, Support). Briefly note the methodology (e.g., 'Based on client feedback and public data')."), | |
| ("How We Evaluated", "Scoring System", 60, 100, "If a scoring system is used, explain it. Clarify any special considerations (e.g., 'annual vs. monthly pricing')."), | |
| ("Deep-Dive Comparison", "Pricing Models Breakdown", 100, 150, "Provide an in-depth comparison of the pricing structures. Discuss cost per hire, monthly fees, and potential hidden costs. Calculate ROI."), | |
| ("Deep-Dive Comparison", "Feature Set Comparison", 100, 150, "Detail and compare the core features, unique capabilities, and integrations of each option."), | |
| ("Deep-Dive Comparison", "Scalability & Growth", 100, 150, "Analyze how each option supports business growth, considering factors like onboarding speed, team expansion, and geographic reach."), | |
| ("Pros & Cons", "Digiworks Pros & Cons", 60, 100, "List 3 major pros for Digiworks (e.g., β Vetted talent, β Cost savings). Then, list 2 honest cons or limitations (e.g., β Niche roles may take longer)."), | |
| ("Pros & Cons", "Competitor Pros & Cons", 60, 100, "List 3 major pros and 2 honest cons for the competitor, maintaining an objective and fair tone."), | |
| ("Use-Case Recommendations", "Which Is Best For You?", 80, 120, "Provide clear, scenario-based recommendations: 'Best for Startups: [Option] because...'; 'Best for Enterprises: [Option] because...'"), | |
| ("Frequently Asked Questions", "FAQs", 40, 80, faq_prompt), | |
| ("Conclusion & Final Verdict", "Recap Key Differences", 40, 60, "Summarize the most critical differences between the two options in one-liner bullet points for quick reference."), | |
| ("Conclusion & Final Verdict", "Overall Winner Declaration", 20, 40, "Clearly declare the winner based on the most important factor for businesses (e.g., 'For cost-effective scalability, Digiworks is the clear winner')."), | |
| ("Conclusion & Final Verdict", "Primary Call-to-Action", 40, 60, "Write a strong CTA that encourages action, like 'Start your risk-free trial with Digiworks' or 'Book a call to build your team today.'"), | |
| ("Conclusion & Final Verdict", "Secondary Call-to-Action", 30, 50, "Offer a related content piece to keep the reader engaged, such as 'Compare Digiworks with [another competitor].'"), | |
| ], | |
| "Case Study": [ | |
| ("Above-the-Fold Essentials", "Hero Section", 20, 40, "Describe a compelling visual, like a client logo plus a dashboard screenshot showing results."), | |
| ("Above-the-Fold Essentials", "Quick-fire Stats", 20, 40, "List 2-3 big, bold, quantifiable results in a 'quick-fire' format (e.g., '+60% revenue growth,' '3x ROI in 6 months')."), | |
| ("Above-the-Fold Essentials", "Quick-Take Summary", 40, 60, "Write a 2-3 sentence summary covering the client's challenge, the Digiworks solution, and the brilliant outcome."), | |
| ("Above-the-Fold Essentials", "Trust Signals", 20, 40, "Include a client logo and a powerful snippet from their testimonial (e.g., '\"Digiworks transformed our workflow...\" β Jane Doe, CMO')."), | |
| ("Above-the-Fold Essentials", "Jump Links", 10, 20, "This is a placeholder for the auto-generated Table of Contents."), | |
| ("Client Overview", "About the Client", 30, 50, "Provide 1-2 sentences about the client: their industry, size, and market position."), | |
| ("Client Overview", "The Context", 60, 100, "Set the scene. What was their situation *before* Digiworks? Why were they seeking a solution?"), | |
| ("The Challenge", "Key Pain Points", 60, 100, "Create a bulleted list of 3-5 specific, tangible problems the client faced (e.g., slow hiring, high costs, skill gaps)."), | |
| ("The Challenge", "Impact of the Problem", 60, 100, "Quantify the negative impact of the problems where possible (e.g., 'Monthly hiring cost was $15K,' 'Customer response time was 48 hours')."), | |
| ("The Solution", "Why They Chose Digiworks", 60, 100, "List 2-3 key reasons the client chose Digiworks, focusing on our core strengths (e.g., pre-vetted talent, speed, cost structure)."), | |
| ("The Solution", "Implementation Details", 100, 150, "Detail the execution: What roles were filled? What tools were used? What was the timeline from start to full deployment?"), | |
| ("The Results", "Headline Metrics", 40, 60, "Display the most impressive outcome numbers in a big, bold format (e.g., '+60% revenue increase,' '90% faster hiring')."), | |
| ("The Results", "Detailed Metrics & Impact", 100, 150, "Provide a more detailed breakdown of the transformation. A 'before/after' comparison table works well here."), | |
| ("The Results", "Visual Data Representation", 20, 40, "Describe a chart, graph, or annotated screenshot that visually represents the positive results."), | |
| ("Client Testimonial", "Featured Quote Block", 40, 60, "Extract a powerful, concise quote from the client that encapsulates the success of the project."), | |
| ("Client Testimonial", "Video Testimonial", 30, 50, "If applicable, describe a 1-2 minute video testimonial with the client discussing their positive experience."), | |
| ("Lessons Learned", "Key Takeaways for Others", 80, 120, "Provide 3 bulleted, actionable insights that other businesses can learn from this case study."), | |
| ("Lessons Learned", "Do's and Don'ts", 60, 100, "Offer 2-3 'Do's' and 'Don'ts' based on the client's journey and the lessons learned."), | |
| ("Next Steps & CTAs", "For Prospective Clients", 40, 60, "Write a direct and compelling CTA for new clients, e.g., 'Ready to see results like these? Start your no-risk trial today.'"), | |
| ("Next Steps & CTAs", "Related Case Studies", 30, 50, "Link to 2-3 similar case studies, categorized by industry or business outcome."), | |
| ("Frequently Asked Questions", "FAQs", 40, 80, faq_prompt), | |
| ], | |
| "Industry Trends": [ | |
| ("Above-the-Fold Essentials", "Intro Hook (Macro Context)", 40, 60, "Start with a big-picture statement about the evolving future of work, positioning the topic as a competitive imperative."), | |
| ("Above-the-Fold Essentials", "Intro Hook (Promise)", 20, 40, "Promise to deliver a specific number of data-backed trends that the reader cannot afford to ignore."), | |
| ("Above-the-Fold Essentials", "Key Stats Snapshot", 30, 50, "List 2-3 eye-popping, one-line statistics that immediately grab the reader's attention."), | |
| ("Above-the-Fold Essentials", "Trust Signals", 20, 40, "Establish credibility by citing Digiworks' expertise and years of market data. Include publication dates."), | |
| ("Above-the-Fold Essentials", "Jump Links", 10, 20, "This is a placeholder for the auto-generated Table of Contents."), | |
| ("Methodology", "Data Sources", 60, 100, "List the authoritative sources used for the analysis (e.g., 'Google Trends, Gartner studies, our internal talent marketplace data')."), | |
| ("Methodology", "Timeframe & Scope", 60, 100, "Define the research period and scope (e.g., 'Analysis period: Q1 2024βQ4 2025; Global sample across 50+ countries')."), | |
| ("Methodology", "Selection Criteria", 60, 100, "Explain why these specific trends were chosen (e.g., 'Based on search volume growth, social media mentions, and market impact potential')."), | |
| ("The Top Trends", "Trend 1: [Trend Name]", 100, 150, "Define the trend, explain why it's critical for modern businesses, provide a real-world example of an early adopter, and support it with a statistic."), | |
| ("The Top Trends", "Trend 2: [Trend Name]", 100, 150, "Define the trend, explain why it matters, give a real-world example, and back it up with a data point."), | |
| ("The Top Trends", "Trend 3: [Trend Name]", 100, 150, "Define the trend, explain its importance, offer a real-world example, and include a supporting statistic."), | |
| ("The Top Trends", "Trend 4: [Trend Name]", 100, 150, "Define the trend, explain its business impact, provide a real-world example, and cite relevant data."), | |
| ("The Top Trends", "Trend 5: [Trend Name]", 100, 150, "Define the trend, explain why it's a must-watch, show a real-world example, and support it with a statistic."), | |
| ("The Top Trends", "Trend 6: [Trend Name]", 100, 150, "Define the trend, explain its relevance, give a real-world example, and provide a supporting data point."), | |
| ("The Top Trends", "Trend 7: [Trend Name]", 100, 150, "Define the trend, explain why it matters for growth, offer a real-world example, and include a compelling statistic."), | |
| ("Expert Voices", "Industry Expert Quotes", 60, 100, "Include 2-3 short, authoritative quotes from respected industry leaders to provide third-party validation."), | |
| ("Expert Voices", "Micro Case Studies", 80, 120, "Showcase 1-2 bullet points of brands successfully capitalizing on these trends, with quantifiable results (e.g., 'Brand X boosted productivity by 45% using trend Y')."), | |
| ("Cross-Trend Connections", "How Trends Intersect", 80, 120, "Explain how multiple trends connect and amplify each other (e.g., 'AI-powered hiring + async work = hyper-efficient global teams'). Suggest a visual concept."), | |
| ("Implications for Your Business", "For Small Teams & Startups", 60, 100, "Provide specific, actionable starting points for resource-constrained businesses. Which trends offer maximum impact for minimal budget?"), | |
| ("Implications for Your Business", "For Enterprises", 60, 100, "Advise on which trends require significant organizational change or investment. How can they build the internal business case?"), | |
| ("Implications for Your Business", "For Agencies & Consultants", 60, 100, "Explain how they can package these trend insights into services for their clients, identifying monetization opportunities."), | |
| ("Tools & Resources", "Data & Analytics Tools", 60, 100, "Recommend specific tools for tracking these trends (e.g., Ahrefs, Google Trends, LinkedIn Analytics)."), | |
| ("Tools & Resources", "Execution & Implementation Tools", 60, 100, "Recommend tools for acting on the trends (e.g., Zapier for automation, Slack for async communication)."), | |
| ("Tools & Resources", "Further Reading & Research", 60, 100, "Link to authoritative deep-dive reports, whitepapers, and thought leadership for readers who want to explore further."), | |
| ("Frequently Asked Questions", "FAQs", 40, 80, faq_prompt), | |
| ("Conclusion & Next Steps", "Recap of Top Trends", 60, 100, "Provide one concise bullet per trend, summarizing the key strategic takeaway for busy executives."), | |
| ("Conclusion & Next Steps", "Primary Call-to-Action", 40, 60, "Create a lead-generation CTA, such as 'Download the complete 2025 Trends Report PDF' or 'Book a complimentary trends workshop.'"), | |
| ("Conclusion & Next Steps", "Secondary Call-to-Action", 30, 50, "Offer a content-based CTA to keep readers in the ecosystem, like 'Read our deep-dive on [most popular trend]' or 'Subscribe to our newsletter.'"), | |
| ], | |
| "Founder Post": [ | |
| ("Hook", "Gripping Anecdote or Statistic", 100, 150, "Start with a powerful personal story, a shocking industry statistic, or a provocative question that challenges conventional wisdom."), | |
| ("Thesis", "The Core Argument", 100, 150, "State the main thesis or controversial opinion clearly and boldly in one paragraph. This is the central idea the entire post will defend."), | |
| ("Supporting Arguments", "Supporting Point 1", 150, 200, "Provide the first major argument supporting the thesis. Back it up with concrete evidence, a personal anecdote, data, or a client example."), | |
| ("Supporting Arguments", "Supporting Point 2", 150, 200, "Provide the second major argument. It should build on the previous point and be supported by fresh evidence or a different example."), | |
| ("Supporting Arguments", "Supporting Point 3", 150, 200, "Provide the third major argument to strengthen the overall case. Use a compelling story or data point as evidence."), | |
| ("Supporting Arguments", "Supporting Point 4 (Optional)", 150, 200, "If essential, add a fourth argument to fully develop the thesis, complete with its own evidence or anecdote."), | |
| ("Counter-Arguments", "Addressing the Other Side", 150, 200, "Acknowledge 1-2 common counter-arguments to your thesis. Thoughtfully rebut them to show intellectual honesty and strengthen your position."), | |
| ("Conclusion", "Actionable Insights & Next Steps", 150, 200, "Summarize the core argument with impact. Provide 2-3 specific, actionable steps or pieces of advice that readers can implement immediately."), | |
| ("Engagement Prompt", "Cross-Post to LinkedIn", 50, 70, "Encourage discussion on a different platform: 'I've also shared my thoughts on this over on LinkedInβI'd love to hear your perspective. Let's discuss in the comments there.'"), | |
| ], | |
| "Templates and Toolkits": [ | |
| ("Above-the-Fold Essentials", "Intro Hook (Pain)", 40, 60, "Highlight a common and tedious pain point: 'Struggling to create high-quality [documents] from scratch every week? It's exhausting and time-consuming.'"), | |
| ("Above-the-Fold Essentials", "Intro Hook (Promise)", 20, 40, "Promise a direct solution: 'Download these [X] battle-tested, ready-to-use templates to cut your creation time in half.'"), | |
| ("Above-the-Fold Essentials", "Visual Preview", 20, 40, "Describe a compelling visual of the toolkit, like a thumbnail or GIF showing the templates in action."), | |
| ("Above-the-Fold Essentials", "Trust Signals", 20, 40, "Add social proof, like 'Trusted by 5,000+ marketers and founders,' along with the author byline and publication dates."), | |
| ("Above-the-Fold Essentials", "Jump Links", 10, 20, "This is a placeholder for the auto-generated Table of Contents."), | |
| ("What's Included", "Complete Template List", 100, 200, "Create a numbered list of every template in the kit. For each, provide a one-line description and specify the file format (e.g., 'Template 1: How-To Blog Post Outline (DOCX)')."), | |
| ("What's Included", "Bonus Assets & Resources", 60, 100, "List any additional value-adds included, such as checklists, swipe files, or calendar spreadsheets."), | |
| ("Why Use Templates", "Speed & Efficiency Gains", 60, 100, "Explain the benefit of speed. Quantify the time savings where possible (e.g., 'Cut drafting time by 60%' or 'Produce content 3x faster')."), | |
| ("Why Use Templates", "Consistency & Brand Voice", 60, 100, "Explain the benefit of consistency. How do templates ensure uniform quality and tone, even with multiple creators?"), | |
| ("Why Use Templates", "Proven Frameworks That Convert", 60, 100, "Explain that these aren't blank docs but are built on frameworks proven to drive engagement, conversions, and business results."), | |
| ("How to Customize", "Customization Guide for Template 1", 100, 150, "Provide a mini step-by-step guide for adapting the first major template. Show a 'before/after' example of a generic vs. customized version."), | |
| ("How to Customize", "Customization Guide for Template 2", 100, 150, "Provide step-by-step instructions for adapting the second major template, including where to add brand voice and what to personalize."), | |
| ("How to Customize", "Customization Guide for Template 3", 100, 150, "Provide instructions for adapting the third major template, with specific use cases for different industries or goals."), | |
| ("Real-World Examples", "Case Study: Company A Success", 80, 120, "Provide a detailed example with metrics: 'How SaaS Startup A used our Listicle Template to increase organic traffic by 40% in 90 days.'"), | |
| ("Real-World Examples", "Case Study: Freelancer B Success", 80, 120, "Provide another success story: 'How Freelancer B leveraged our Content Calendar Template to double their client roster.' Include a testimonial quote."), | |
| ("Advanced Toolkit Hacks", "Automation with Zapier", 60, 100, "Show how to connect the templates to automation workflows (e.g., auto-populate a calendar from form submissions)."), | |
| ("Advanced Toolkit Hacks", "Integration Tips", 60, 100, "Explain how to integrate the templates with popular tools like Google Analytics, Notion, or Airtable."), | |
| ("Advanced Toolkit Hacks", "Team Collaboration Best Practices", 60, 100, "Give best practices for using the templates with a distributed team, such as version control and comment workflows."), | |
| ("Frequently Asked Questions", "FAQs", 40, 80, faq_prompt), | |
| ("Download & Next Steps", "Primary Download CTA", 40, 60, "Write a prominent, action-oriented CTA like 'Download Your Free Templates & Toolkit Now.' This is typically for lead capture."), | |
| ("Download & Next Steps", "Secondary Content CTA", 30, 50, "Keep readers engaged with a link to a related pillar content piece, such as 'Check out our advanced guide to content marketing.'"), | |
| ("Download & Next Steps", "Social Share Prompt", 20, 40, "Encourage viral sharing with a prompt like 'Found these templates helpful? Share with your network!'"), | |
| ], | |
| } | |
| # ======================== RESEARCH FUNCTIONS ======================== | |
| # ... (All your functions from search_web down to the line before WordDocumentGenerator remain here, unchanged) | |
| def search_web(query): # This is now a fallback | |
| try: | |
| search_query = quote_plus(query) | |
| url = f"https://api.duckduckgo.com/?q={search_query}&format=json&no_html=1&skip_disambig=1" | |
| response = requests.get(url, timeout=10) | |
| data = response.json() | |
| results = { | |
| "query": query, "abstract": data.get("Abstract", ""), "answer": data.get("Answer", ""), | |
| "related_topics": [topic.get("Text", "")[:100] for topic in data.get("RelatedTopics", [])[:3] if topic.get("Text")] | |
| } | |
| return {k: v for k, v in results.items() if v} | |
| except Exception as e: return {"error": f"Search failed: {str(e)}"} | |
| def search_with_serpapi(query, api_key): | |
| """NEW: Search the web using SerpApi for richer, more creative results.""" | |
| if not api_key: | |
| print("β οΈ SerpApi key not found. Falling back to basic search.") | |
| return search_web(query) | |
| try: | |
| print(f"π¬ Performing advanced research on '{query}' with SerpApi...") | |
| params = {"engine": "google", "q": query, "api_key": api_key} | |
| search = GoogleSearch(params) | |
| results = search.get_dict() | |
| output = {"query": query} | |
| if "answer_box" in results: | |
| output["answer_box"] = results["answer_box"].get("snippet") or results["answer_box"].get("answer") | |
| if "organic_results" in results: | |
| output["organic_results"] = [ | |
| {"title": r.get("title"), "snippet": r.get("snippet")} for r in results["organic_results"][:5] | |
| ] | |
| if "related_questions" in results: | |
| output["related_questions"] = [q.get("question") for q in results["related_questions"]] | |
| if "related_searches" in results: | |
| output["related_searches"] = [s.get("query") for s in results["related_searches"]] | |
| print("β Advanced research complete.") | |
| return output | |
| except Exception as e: | |
| print(f"β SerpApi search failed: {e}. Falling back to basic search.") | |
| return search_web(query) | |
| def analyze_website(url): | |
| try: | |
| headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'} | |
| response = requests.get(url, headers=headers, timeout=15) | |
| soup = BeautifulSoup(response.content, 'html.parser') | |
| title = soup.find('title').text.strip() if soup.find('title') else "No title" | |
| meta_desc = soup.find('meta', attrs={'name': 'description'}).get('content', '') if soup.find('meta', attrs={'name': 'description'}) else "" | |
| for script in soup(["script", "style"]): script.decompose() | |
| clean_text = ' '.join(soup.get_text().split()) | |
| return {"title": title, "meta_description": meta_desc, "content_preview": clean_text[:1000]} | |
| except Exception as e: return {"error": f"Analysis failed: {str(e)}"} | |
| # ======================== PARSING FUNCTION ======================== | |
| def parse_generated_content(content_text): | |
| topics = [] | |
| topic_sections = re.split(r'TOPIC \d+:', content_text) | |
| for i, section in enumerate(topic_sections): | |
| if i == 0: continue | |
| topic_data = {'Topic Number': i, 'Content Type': '', 'Headline': '', 'Keywords': '', 'Audience': '', 'Content Angle': '', 'SEO Opportunity': '', 'Generated': datetime.now().strftime('%Y-%m-%d %H:%M:%S')} | |
| lines = section.strip().split('\n') | |
| if lines: topic_data['Headline'] = lines[0].strip() | |
| if keywords_match := re.search(r'keywords?:(.+?)(?=audience|content angle|seo|$)', section, re.IGNORECASE | re.DOTALL): topic_data['Keywords'] = keywords_match.group(1).strip().replace('\n', ' ') | |
| if audience_match := re.search(r'audience:(.+?)(?=keywords|content angle|seo|$)', section, re.IGNORECASE | re.DOTALL): topic_data['Audience'] = audience_match.group(1).strip().replace('\n', ' ') | |
| if angle_match := re.search(r'content angle:(.+?)(?=keywords|audience|seo|$)', section, re.IGNORECASE | re.DOTALL): topic_data['Content Angle'] = angle_match.group(1).strip().replace('\n', ' ') | |
| if seo_match := re.search(r'seo opportunity:(.+?)(?=topic|$)', section, re.IGNORECASE | re.DOTALL): topic_data['SEO Opportunity'] = seo_match.group(1).strip().replace('\n', ' ') | |
| topics.append(topic_data) | |
| return topics | |
| # ======================== IDEA GENERATION (MODIFIED) ======================== | |
| def generate_blog_topics(content_type, num_topics): | |
| try: | |
| website_info = analyze_website("https://www.teamdigiworks.com/") | |
| # MODIFICATION: Use SerpApi for primary research | |
| serp_results = search_with_serpapi(f"creative blog ideas for {content_type} about remote hiring", SERPAPI_KEY) | |
| research_data = f""" | |
| **COMPANY CONTEXT:** | |
| - Digiworks Website Title: {website_info.get('title', 'N/A')} | |
| - Digiworks Description: {website_info.get('meta_description', 'N/A')} | |
| - Core Service: Connecting businesses with the top 1% of vetted offshore talent. | |
| **ADVANCED MARKET RESEARCH (from SerpApi):** | |
| - Main Query: "creative blog ideas for {content_type} about remote hiring" | |
| - Top Search Results (Titles & Snippets): {json.dumps(serp_results.get("organic_results", []), indent=2)} | |
| - 'People Also Ask' (Common User Questions): {json.dumps(serp_results.get("related_questions", []), indent=2)} | |
| - Related Searches (Keyword Ideas): {json.dumps(serp_results.get("related_searches", []), indent=2)} | |
| """ | |
| content_focus = BLOG_TYPES.get(content_type, "general content") | |
| # MODIFICATION: Updated prompt to leverage SerpApi data | |
| final_prompt = f""" | |
| Based on the rich market research data provided below, generate {num_topics} highly creative and SEO-optimized {content_focus} blog topics for Digiworks. | |
| **RESEARCH DATA:** | |
| {research_data} | |
| **YOUR TASK:** | |
| For each of the {num_topics} topics, provide the output in this **EXACT format**: | |
| TOPIC 1: [Creative, Engaging Headline] | |
| Keywords: [A comma-separated list of 5-7 primary and long-tail keywords inspired by the 'Related Searches' and 'Organic Results'.] | |
| Audience: [Specific target business audience, e.g., 'HR Managers at SaaS startups', 'Founders of eCommerce brands'.] | |
| Content Angle: [A unique, creative angle for the post. Use the 'People Also Ask' section to address a real user question.] | |
| SEO Opportunity: [Briefly explain the search potential. Why would this rank well? What user intent does it capture?] | |
| --- | |
| (Repeat for TOPIC 2, TOPIC 3, etc.) | |
| **INSTRUCTIONS FOR CREATIVITY:** | |
| 1. **Headlines:** Do not use generic titles. Make them compelling, benefit-driven, and intriguing. Use the top search results for inspiration on what already ranks, then make yours better. | |
| 2. **Keywords:** Go beyond the obvious. Use the 'Related Searches' data to find valuable long-tail keywords. | |
| 3. **Content Angle:** This is crucial. Your angle should be unique. Answering a question from the 'People Also Ask' section is a great way to create a highly relevant and valuable post. | |
| """ | |
| response = client.chat.completions.create(model="gpt-4o", messages=[{"role": "system", "content": "You are a world-class SEO and Content Strategist at Digiworks, specializing in creating viral blog topics."}, {"role": "user", "content": final_prompt}], max_tokens=2000, temperature=0.8) | |
| content_text = response.choices[0].message.content | |
| topics = parse_generated_content(content_text) | |
| for topic in topics: topic['Content Type'] = content_type | |
| df = pd.DataFrame(topics) | |
| return f"π Successfully generated {len(topics)} {content_type} topics!\n\n{content_text}", df, "β Generation complete!" | |
| except Exception as e: return f"β Error: {str(e)}", pd.DataFrame(), f"β Failed: {str(e)}" | |
| def generate_all_types(num_topics): | |
| all_topics, all_results = [], [] | |
| for content_type in BLOG_TYPES.keys(): | |
| try: | |
| result, df, status = generate_blog_topics(content_type, num_topics) | |
| all_results.append(f"\n=== {content_type} ===\n{result}") | |
| if not df.empty: all_topics.append(df) | |
| time.sleep(3) # Respect API rate limits | |
| except Exception as e: all_results.append(f"\n=== {content_type} ===\nβ Error: {str(e)}") | |
| if all_topics: | |
| combined_df = pd.concat(all_topics, ignore_index=True) | |
| return f"π Generated {len(combined_df)} total topics!\n" + "\n".join(all_results), combined_df, "β All types complete!" | |
| else: return "β No topics generated", pd.DataFrame(), "β Generation failed" | |
| # ======================== INTELLIGENT TEXT PROCESSING (MODIFIED) ======================== | |
| def intelligent_word_count_adjustment(text, min_words, max_words, content_context=""): | |
| if not text: return text | |
| current_words = len(text.split()) | |
| if min_words <= current_words <= max_words: return text | |
| print(f" π§ Adjusting: {current_words} β {min_words}-{max_words} words") | |
| doc = nlp(text) | |
| if current_words > max_words: return reduce_content_intelligently(list(doc.sents), max_words) | |
| else: return expand_content_intelligently(text, min_words, content_context) | |
| def reduce_content_intelligently(sentences, target_words): | |
| sentence_scores = [] | |
| for i, sent in enumerate(sentences): | |
| score, sentence_text, words = 0, sent.text.strip(), len(sent.text.split()) | |
| if i == 0 or i == len(sentences) - 1: score += 10 | |
| elif i == 1 or i == len(sentences) - 2: score += 5 | |
| if 8 <= words <= 25: score += 5 | |
| elif words < 5: score -= 3 | |
| for term in ['digiworks', 'remote', 'talent', 'business', 'team', 'scale', 'hire', 'cost', 'efficiency', 'productivity']: | |
| if term.lower() in sentence_text.lower(): score += 3 | |
| if re.search(r'\d+%|\d+\+|\$\d+|\d+x|\d+:', sentence_text): score += 4 | |
| for word in ['implement', 'achieve', 'optimize', 'improve', 'increase', 'reduce', 'enhance', 'build', 'create']: | |
| if word.lower() in sentence_text.lower(): score += 2 | |
| sentence_scores.append((sentence_text, score, words)) | |
| sentence_scores.sort(key=lambda x: x[1], reverse=True) | |
| reduced_text, current_word_count = "", 0 | |
| for sentence, score, word_count in sentence_scores: | |
| if current_word_count + word_count <= target_words: | |
| reduced_text += sentence + " "; current_word_count += word_count | |
| else: break | |
| # --- FIX: Add fallback to prevent returning 0 words --- | |
| if not reduced_text.strip() and sentences: | |
| original_text = " ".join([s.text for s in sentences]) | |
| return ' '.join(original_text.split()[:target_words]) | |
| return reduced_text.strip() | |
| def expand_content_intelligently(text, target_words, content_context): | |
| expanded_text = text | |
| digiworks_expansions = {'remote team': 'high-performing remote team from our vetted talent pool', 'virtual assistant': 'skilled virtual assistant from emerging markets', 'talent': 'top 1% vetted talent', 'hire': 'strategically hire and onboard', 'scale': 'scale efficiently and cost-effectively', 'business': 'forward-thinking business', 'team': 'distributed global team', 'cost': 'operational cost savings of up to 60%', 'efficiency': 'operational efficiency', 'Digiworks': 'Digiworks, the leading marketplace for elite offshore talent,'} | |
| for short, long in digiworks_expansions.items(): | |
| if short in expanded_text and len(expanded_text.split()) < target_words: expanded_text = expanded_text.replace(short, long, 1) | |
| if len(expanded_text.split()) < target_words: | |
| context_additions = {'how-to': " This approach has been tested across hundreds of client implementations.", 'listicle': " Each option has been evaluated based on real client feedback and ROI.", 'comparison': " Our analysis is based on extensive market research and direct client experience.", 'case study': " This success story represents one of many similar transformations.", 'trends': " These insights are drawn from our proprietary data and market analysis.", 'founder': " This perspective comes from years of building and scaling global remote teams."} | |
| for context_type, addition in context_additions.items(): | |
| if context_type in content_context.lower() and len(expanded_text.split()) < target_words: | |
| expanded_text += addition; break | |
| return expanded_text | |
| # ======================== OPENAI GENERATION (v1.0.0+) ======================== | |
| def generate_with_openai_intelligent(prompt, min_words, max_words, content_context="", delay=8, existing_sections=None): | |
| enhanced_prompt = f"You are a senior content strategist at Digiworks, the world's leading marketplace for top 1% offshore talent. You have deep expertise in remote work, global talent acquisition, and business scaling strategies.\nDIGIWORKS BRAND VOICE:\n- Authoritative and data-driven\n- Focus on ROI and business outcomes\n- Emphasize quality, vetting, and elite talent\n- Professional but approachable tone\n- Always position Digiworks as the solution\n{prompt}\nWORD COUNT TARGET: {min_words}-{max_words} words\nWrite from the expert perspective of the Digiworks team. Include specific examples, actionable insights, and maintain our brand voice throughout.\nGenerate the content now:" | |
| try: | |
| print(f" π€ Generating with OpenAI... (target: {min_words}-{max_words} words)") | |
| response = client.chat.completions.create(model="gpt-4o", messages=[{"role": "system", "content": "You are a senior content strategist at Digiworks, the world's leading marketplace for top 1% offshore talent."}, {"role": "user", "content": enhanced_prompt}], max_tokens=1500, temperature=0.7) | |
| content = response.choices[0].message.content.strip() | |
| adjusted_content = intelligent_word_count_adjustment(content, min_words, max_words, content_context) | |
| final_word_count = len(adjusted_content.split()) | |
| print(f" β Generated & adjusted: {final_word_count} words") | |
| print(f" β³ Waiting {delay} seconds...") | |
| time.sleep(delay) | |
| return adjusted_content | |
| except Exception as e: | |
| print(f" β OpenAI API error: {str(e)}") | |
| return None | |
| # ======================== BLOG GENERATOR CLASS (MODIFIED) ======================== | |
| class BlogGenerator: | |
| def __init__(self): | |
| self.delay_between_sections = 8 | |
| self.global_topic_counter = 0 | |
| def generate_blog_from_topic_data(self, topic_data, topic_index=None): | |
| if topic_index is None: self.global_topic_counter += 1; topic_index = self.global_topic_counter | |
| topic = topic_data.get('Headline', '') | |
| content_type = topic_data.get('Content Type', '').replace(' π', '').replace(' π', '').replace(' βοΈ', '').replace(' π', '').replace(' π', '').replace(' π‘', '') | |
| if content_type not in BLOG_STRUCTURES: | |
| yield ('result', None, f"Invalid content type for topic: {topic}"); return | |
| structure = BLOG_STRUCTURES[content_type] | |
| yield ('status', f"π― Starting Topic #{topic_index}: {topic} ({len(structure)} sections)") | |
| print(f"\nπ Generating Topic #{topic_index} - '{content_type}' blog: {topic}\nπ Structure has {len(structure)} sections") | |
| blog_context = f""" | |
| TOPIC NUMBER: {topic_index} | |
| BLOG TOPIC: {topic} | |
| CONTENT TYPE: {content_type} | |
| TARGET KEYWORDS: {topic_data.get('Keywords', '')} | |
| TARGET AUDIENCE: {topic_data.get('Audience', '')} | |
| CONTENT ANGLE: {topic_data.get('Content Angle', '')} | |
| SEO OPPORTUNITY: {topic_data.get('SEO Opportunity', '')} | |
| DIGIWORKS CONTEXT: | |
| - Company: Digiworks (global remote talent marketplace) | |
| - Mission: Connect businesses with top 1% offshore talent | |
| - Value: Cost-effective, vetted, scalable remote teams | |
| - Positioning: Premium, reliable, results-driven solution | |
| """ | |
| sections_data = [] | |
| for i, (section_group, section_name, min_words, max_words, purpose) in enumerate(structure): | |
| yield ('status', f"π Topic #{topic_index} | Section {i+1}/{len(structure)}: Generating '{section_name}'...") | |
| section_content = "" | |
| if section_name.lower() in ["jump links", "table of contents"]: | |
| section_content = "[TOC_PLACEHOLDER]" | |
| print(f"\n π Placeholder for: {section_name}") | |
| else: | |
| print(f"\n π Generating: {section_name} ({min_words}-{max_words} words)") | |
| temp_purpose = purpose.replace("[Date]", datetime.now().strftime('%B %d, %Y')) | |
| item_match = re.match(r'Item (\d+):', section_name) | |
| if content_type == "Listicle" and item_match: | |
| item_number = int(item_match.group(1)) | |
| if 1 <= item_number <= len(LISTICLE_ASPECTS): | |
| aspect = LISTICLE_ASPECTS[item_number - 1] | |
| temp_purpose = ( | |
| f"The overall blog topic is '{topic}'. For this specific list item (Item #{item_number}), " | |
| f"you must focus exclusively on the aspect of: **{aspect}**. " | |
| "Write a compelling title for this item related to this specific aspect, and then write the body content. " | |
| "Do not repeat content from other aspects." | |
| ) | |
| # --- NEW: Context-Aware Content Generation --- | |
| previous_content_summary = "" | |
| if sections_data: | |
| previous_content_summary += "\n\n--- PREVIOUSLY GENERATED SECTIONS FOR CONTEXT ---\n" | |
| for prev_section in sections_data: | |
| if not prev_section['Section Content'].startswith("["): # Don't include placeholders | |
| previous_content_summary += f"\n## {prev_section['Section Name']}\n{prev_section['Section Content']}\n" | |
| section_prompt = f"""{blog_context} | |
| {previous_content_summary} | |
| --- YOUR CURRENT TASK --- | |
| Now, write the "{section_name}" section for this {content_type} blog post. | |
| Section Group: {section_group} | |
| Section Purpose: {temp_purpose} | |
| Requirements: | |
| - Write {min_words}-{max_words} words. | |
| - **Crucially, ensure the content flows logically from the previous sections provided above.** | |
| - **Do not repeat information that has already been covered.** | |
| - Address the target audience: {topic_data.get('Audience', '')} | |
| - Incorporate keywords naturally: {topic_data.get('Keywords', '')} | |
| - Position Digiworks as the premier solution and be data-driven. | |
| Write only the section content, no headers:""" | |
| section_content = generate_with_openai_intelligent( | |
| section_prompt, min_words, max_words, | |
| f"{content_type} {section_name}", self.delay_between_sections, sections_data | |
| ) | |
| if section_content: | |
| sections_data.append({'Topic Number': topic_index, 'Blog Topic': topic, 'Content Type': content_type, 'Section Group': section_group, 'Section Name': section_name, 'Section Content': section_content, 'Word Count': len(section_content.split()), 'Target Range': f"{min_words}-{max_words}", 'Generated At': datetime.now().strftime('%Y-%m-%d %H:%M:%S')}) | |
| print(f" β Success: {len(section_content.split())} words") | |
| else: | |
| sections_data.append({'Topic Number': topic_index, 'Blog Topic': topic, 'Content Type': content_type, 'Section Group': section_group, 'Section Name': section_name, 'Section Content': f"[FAILED TO GENERATE - {section_name}]", 'Word Count': 0, 'Target Range': f"{min_words}-{max_words}", 'Generated At': datetime.now().strftime('%Y-%m-%d %H:%M:%S')}) | |
| print(f" β Failed to generate") | |
| total_words, successful_sections = sum(s['Word Count'] for s in sections_data), len([s for s in sections_data if s['Word Count'] > 0]) | |
| yield ('result', sections_data, f"β Topic #{topic_index} complete: Generated {successful_sections}/{len(structure)} sections ({total_words} words)") | |
| def reset_counter(self): self.global_topic_counter = 0 | |
| # ======================== EXCEL EXPORT ======================== | |
| def create_excel_export(all_sections_data): | |
| if not all_sections_data: return None | |
| df = pd.DataFrame(all_sections_data) | |
| cols = df.columns.tolist() | |
| if 'Topic Number' in cols: cols.insert(0, cols.pop(cols.index('Topic Number'))) | |
| df = df[cols] | |
| filename = f"digiworks_indexed_blog_sections_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx" | |
| temp_path = os.path.join(tempfile.gettempdir(), filename) | |
| with pd.ExcelWriter(temp_path, engine='openpyxl') as writer: | |
| df.to_excel(writer, sheet_name='Indexed Blog Sections', index=False) | |
| worksheet = writer.sheets['Indexed Blog Sections'] | |
| for column in worksheet.columns: | |
| max_length = 0 | |
| for cell in column: | |
| try: | |
| if len(str(cell.value)) > max_length: max_length = len(str(cell.value)) | |
| except: pass | |
| worksheet.column_dimensions[column[0].column_letter].width = min(max_length + 2, 100) | |
| return temp_path | |
| # ======================== HELPER FUNCTIONS FOR TABLE GENERATION (MODIFIED) ======================== | |
| def refine_table_content_with_openai(item_details): | |
| if not item_details: | |
| return [] | |
| try: | |
| initial_json = json.dumps(item_details, indent=2) | |
| prompt = f""" | |
| You are an expert marketing copywriter. Below is a JSON array of summarized trends for a comparison table. The summaries in the "ideal_for" and "business_benefit" fields are repetitive. Your task is to revise them to be more unique, vibrant, and distinct from each other, while staying true to the original meaning. | |
| RULES: | |
| 1. Do NOT change the "name" of the trend. | |
| 2. For "ideal_for", rephrase to be more specific and varied. Avoid using the same phrase (like "Enterprise HR Departments") repeatedly. Think of different user segments (e.g., "High-Growth Startups," "Global Tech Firms," "Boutique Agencies," "Cost-Conscious Founders"). | |
| 3. For "business_benefit", rephrase each benefit to be unique. If two items have a similar benefit like "Saves Time", rephrase them differently (e.g., "Accelerates Project Timelines" vs. "Reduces Manual Task Hours"). | |
| 4. Return the output as a single, valid JSON array of objects, with the same structure as the input. | |
| INPUT DATA: | |
| {initial_json} | |
| Return only the revised JSON array and nothing else. | |
| """ | |
| response = client.chat.completions.create(model="gpt-4o", messages=[{"role": "system", "content": "You are a creative copywriter who refines JSON data to eliminate repetition and add marketing flair."}, {"role": "user", "content": prompt}], temperature=0.75) | |
| response_text = response.choices[0].message.content.strip() | |
| if response_text.startswith("```json"): response_text = response_text[7:].strip() | |
| if response_text.endswith("```"): response_text = response_text[:-3].strip() | |
| refined_details = json.loads(response_text) | |
| print("β Successfully refined table content for creativity.") | |
| return refined_details | |
| except Exception as e: | |
| print(f"β Failed to refine listicle table content via OpenAI: {e}. Using original data.") | |
| return item_details | |
| def generate_listicle_table_from_items(item_details): | |
| if not item_details: return "" | |
| print("π€ Calling OpenAI to format the final clean listicle table...") | |
| item_summary = "\n".join([f"- Item Name: {item.get('name', 'N/A')}\n - Ideal For: {item.get('ideal_for', 'N/A')}\n - Benefit: {item.get('business_benefit', 'N/A')}" for item in item_details]) | |
| prompt = f""" | |
| Based on the following item summaries, create a single, clean, valid markdown table. | |
| The table must have exactly these columns in this order: | Trend / Tool | Ideal For | Key Business Benefit | | |
| Do not add any text, explanations, or introductions before or after the table. Only output the markdown table itself. | |
| Item Summaries to format into the table: | |
| {item_summary} | |
| """ | |
| try: | |
| response = client.chat.completions.create(model="gpt-4o", messages=[{"role": "system", "content": "You are a data formatting assistant that specializes in creating clean, professional markdown tables."}, {"role": "user", "content": prompt}], temperature=0.1) | |
| table_content = response.choices[0].message.content.strip() | |
| print("β Successfully generated clean markdown table.") | |
| return table_content | |
| except Exception as e: | |
| print(f"β Failed to generate listicle table via OpenAI: {e}") | |
| return "| Trend / Tool | Ideal For | Key Business Benefit |\n|---|---|---|\n| Table Generation Failed | - | - |" | |
| # ======================== TABLE PARSERS ======================== | |
| def parse_listicle_table(text): | |
| if not text or pd.isna(text): return [], [] | |
| lines = [line.strip() for line in str(text).strip().split('\n') if line.strip()] | |
| table_lines = [line for line in lines if '|' in line and not line.strip().startswith('|---')] | |
| if len(table_lines) < 2: return [], [] | |
| headers = [h.strip() for h in table_lines[0].strip('|').split('|')] | |
| rows = [] | |
| for line in table_lines[1:]: | |
| cells = [c.strip() for c in line.strip('|').split('|')] | |
| if len(cells) == len(headers): rows.append(cells) | |
| return headers, rows | |
| def parse_comparison_table(text): | |
| if not text or pd.isna(text): return "Comparison", [] | |
| lines = [line.strip() for line in str(text).strip().split('\n') if line.strip()] | |
| header_text, table_lines, found_header = "Comparison", [], False | |
| for line in lines: | |
| if '|' not in line or line.startswith('|---'): continue | |
| if not found_header and '| metric |' in line.lower().replace('*', ''): | |
| if len(cells := [c.strip().replace('*', '') for c in line.strip('|').split('|')]) >= 2: header_text = cells[1] | |
| found_header = True | |
| continue | |
| table_lines.append(line) | |
| rows = [] | |
| for line in table_lines: | |
| if '|' in line: | |
| parts = line.strip('|').split('|') | |
| if len(parts) >= 2: | |
| metric, comparison_data = parts[0].strip().replace('**', ''), '|'.join(parts[1:]).strip() | |
| comp_parts = [p.strip() for p in comparison_data.split(r'\|')] if r'\|' in comparison_data else [p.strip() for p in comparison_data.split('|', 1)] | |
| if len(comp_parts) == 2: rows.append([metric, comp_parts[0], comp_parts[1]]) | |
| elif len(comp_parts) == 1: rows.append([metric, comp_parts[0], '']) | |
| return header_text, rows | |
| # ======================== WORD DOCUMENT FORMATTER (MODIFIED) ======================== | |
| def clean_blog_content(text): | |
| if not text or pd.isna(text): return "" | |
| text = str(text).replace('`', '') | |
| text = re.sub(r'^\s*-\s+', 'β’ ', text, flags=re.MULTILINE) | |
| text = re.sub(r'^\s*\+\s+', 'β’ ', text, flags=re.MULTILINE) | |
| text = re.sub(r'^\s*(\d+)\.\s+', r'\1. ', text, flags=re.MULTILINE) | |
| text = re.sub(r'\n{3,}', '\n\n', text) | |
| text = text.strip().replace('DigiWorks', 'Digiworks').replace('[Client Company]', 'TechStart Solutions') | |
| return text | |
| # --- MODIFIED: More robust FAQ formatting function --- | |
| def format_faq_content(text): | |
| """ | |
| Takes raw, potentially messy FAQ text from the AI and enforces | |
| a clean 'Q. ...\nA. ...\n\n' structure using regex. This version | |
| is more robust against malformed AI output. | |
| """ | |
| if not text or pd.isna(text): | |
| return "" | |
| # Pre-process by forcing newlines before any "Q." or "A." that doesn't have one. | |
| # This separates items that the AI might have put on the same line. | |
| text_with_newlines = re.sub(r'\s*(Q\.)', r'\n\n\1', str(text).strip()) | |
| text_with_newlines = re.sub(r'\s*(A\.)', r'\n\1', text_with_newlines) | |
| lines = [line.strip() for line in text_with_newlines.split('\n') if line.strip()] | |
| qa_pairs = [] | |
| current_q = None | |
| for line in lines: | |
| if line.startswith("Q."): | |
| # If we find a new question while another is pending, the old one is discarded. | |
| # This handles cases of two Q's in a row. | |
| current_q = line[2:].strip() | |
| elif line.startswith("A.") and current_q: | |
| # If we find an answer AND we have a question pending, we form a pair. | |
| answer = line[2:].strip() | |
| qa_pairs.append(f"Q. {current_q}\nA. {answer}") | |
| current_q = None # Reset to find the next question | |
| return "\n\n".join(qa_pairs) | |
| class WordDocumentGenerator: | |
| def __init__(self): | |
| self.doc = Document() | |
| self.setup_styles() | |
| self.bookmark_id = 0 | |
| self.ai_personas = cycle([ | |
| "As a CFO, focus on the financial impact. What is the core ROI or cost-saving benefit?", | |
| "As a COO, focus on operational efficiency. How does this streamline workflows or improve processes?", | |
| "As a Head of HR, focus on talent and culture. How does this improve candidate quality or team building?", | |
| "As a Futurist, focus on the long-term competitive advantage. How does this future-proof the business?" | |
| ]) | |
| def setup_styles(self): | |
| styles = self.doc.styles | |
| try: | |
| if 'BlogTitle' not in styles: | |
| title_style = styles.add_style('BlogTitle', WD_STYLE_TYPE.PARAGRAPH) | |
| tf = title_style.font; tf.name = 'Arial'; tf.size = Pt(20); tf.bold = True | |
| tp = title_style.paragraph_format; tp.alignment = WD_ALIGN_PARAGRAPH.CENTER; tp.space_after = Pt(18) | |
| except Exception: pass | |
| try: | |
| if 'SectionHeader' not in styles: | |
| header_style = styles.add_style('SectionHeader', WD_STYLE_TYPE.PARAGRAPH) | |
| hf = header_style.font; hf.name = 'Arial'; hf.size = Pt(14); hf.bold = True | |
| hp = header_style.paragraph_format; hp.space_before = Pt(12); hp.space_after = Pt(6) | |
| except Exception: pass | |
| try: | |
| if 'BlogContent' not in styles: | |
| content_style = styles.add_style('BlogContent', WD_STYLE_TYPE.PARAGRAPH) | |
| cf = content_style.font; cf.name = 'Calibri'; cf.size = Pt(11) | |
| cp = content_style.paragraph_format; cp.space_after = Pt(6); cp.line_spacing = 1.15 | |
| except Exception: pass | |
| def add_bookmark(self, paragraph, bookmark_name): | |
| run = paragraph.runs[0] if paragraph.runs else paragraph.add_run() | |
| tag = run._r | |
| start = OxmlElement('w:bookmarkStart'); start.set(qn('w:id'), str(self.bookmark_id)); start.set(qn('w:name'), bookmark_name) | |
| tag.addprevious(start) | |
| end = OxmlElement('w:bookmarkEnd'); end.set(qn('w:id'), str(self.bookmark_id)) | |
| tag.addnext(end) | |
| self.bookmark_id += 1 | |
| def add_internal_hyperlink(self, paragraph, text, anchor_name): | |
| hyperlink = OxmlElement('w:hyperlink'); hyperlink.set(qn('w:anchor'), anchor_name) | |
| sub_run = OxmlElement('w:r'); text_el = OxmlElement('w:t'); text_el.text = text; sub_run.append(text_el) | |
| r_pr = OxmlElement('w:rPr'); style = OxmlElement('w:rStyle'); style.set(qn('w:val'), 'Hyperlink'); r_pr.append(style); sub_run.append(r_pr) | |
| hyperlink.append(sub_run); paragraph._p.append(hyperlink) | |
| def add_listicle_table(self, headers, table_data): | |
| if not table_data or not headers or not (num_cols := len(headers)): return | |
| table = self.doc.add_table(rows=1, cols=num_cols, style='TableGrid') | |
| hdr_cells = table.rows[0].cells | |
| for i, header_text in enumerate(headers): | |
| cell = hdr_cells[i]; cell.text = header_text | |
| for para in cell.paragraphs: | |
| para.alignment = WD_ALIGN_PARAGRAPH.CENTER | |
| for run in para.runs: run.bold = True | |
| for row_data in table_data: | |
| row_cells = table.add_row().cells | |
| for i, cell_text in enumerate(row_data): row_cells[i].text = cell_text | |
| col_width = Inches(6.5 / num_cols) if num_cols > 0 else Inches(1) | |
| for col in table.columns: col.width = col_width | |
| def add_comparison_table(self, header_text, table_data): | |
| if not table_data: return | |
| option_a_title, option_b_title = "Option A", "Option B" | |
| if " vs. " in header_text.lower(): | |
| if len(parts := re.split(r'\s+vs\.\s+', header_text, 1, re.IGNORECASE)) == 2: option_a_title, option_b_title = parts[0].strip(), parts[1].split('(')[0].strip() | |
| table = self.doc.add_table(rows=len(table_data) + 2, cols=3, style='TableGrid') | |
| table.autofit = False; table.allow_autofit = False | |
| table.columns[0].width = Inches(1.5); table.columns[1].width = Inches(2.5); table.columns[2].width = Inches(2.5) | |
| title_cell = table.cell(0, 0).merge(table.cell(0, 2)); title_cell.text = header_text; title_cell.paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER | |
| table.cell(1, 0).text = "Metric"; table.cell(1, 1).text = option_a_title; table.cell(1, 2).text = option_b_title | |
| for i, row_data in enumerate(table_data): | |
| for j, cell_data in enumerate(row_data): table.rows[i + 2].cells[j].text = cell_data | |
| for row_idx, row in enumerate(table.rows): | |
| for col_idx, cell in enumerate(row.cells): | |
| try: | |
| for paragraph in cell.paragraphs: | |
| paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER if row_idx < 2 else (WD_ALIGN_PARAGRAPH.LEFT if col_idx > 0 else WD_ALIGN_PARAGRAPH.CENTER) | |
| for run in paragraph.runs: | |
| run.font.name = 'Calibri'; run.font.size = Pt(10) | |
| if row_idx < 2: run.bold = True | |
| except Exception: pass | |
| def _extract_item_details(self, content, persona_prompt): | |
| default_details = {'name': 'Extraction Failed', 'ideal_for': 'N/A', 'business_benefit': 'N/A'} | |
| if lines := content.split('\n'): | |
| if name_match := re.search(r'Item \d+:\s*(.*)', lines[0]): default_details['name'] = name_match.group(1).strip() | |
| try: | |
| prompt = f""" | |
| As a business strategist, analyze the following paragraph. Your task is to extract key strategic information and return it as a single, valid JSON object with ONLY the keys "name", "ideal_for", and "business_benefit". | |
| Your Persona: {persona_prompt} | |
| - "name": The full, official name of the trend or tool. | |
| - "ideal_for": Based on your persona, who is the primary beneficiary? Be specific (e.g., "Fast-Growing SaaS Startups", "Enterprise HR Departments", "B2C Marketing Teams"). | |
| - "business_benefit": From your persona's viewpoint, what is the most impactful, tangible business outcome? Be concise and use action-oriented language (e.g., "Slashes recruitment overhead", "Boosts operational throughput", "Enhances employer branding", "Secures long-term market leadership"). | |
| Paragraph to Analyze: | |
| --- | |
| {content} | |
| --- | |
| Return only the raw JSON object. Be creative and insightful based on your assigned persona. | |
| """ | |
| response = client.chat.completions.create(model="gpt-4o", messages=[{"role": "system", "content": "You are a sharp business strategist providing concise, insightful analysis in JSON format."}, {"role": "user", "content": prompt}], temperature=0.7) | |
| response_text = response.choices[0].message.content.strip() | |
| if response_text.startswith("```json"): response_text = response_text[7:].strip() | |
| if response_text.endswith("```"): response_text = response_text[:-3].strip() | |
| details = json.loads(response_text) | |
| return details | |
| except Exception as e: | |
| print(f" β AI extraction failed for item '{default_details['name']}': {e}.") | |
| return default_details | |
| def add_blog_topic(self, topic_number, topic_title, sections_df, content_type): | |
| self.doc.add_paragraph(f"Topic #{topic_number}: {topic_title}", style='BlogTitle') | |
| self.doc.add_paragraph("=" * 80).alignment = WD_ALIGN_PARAGRAPH.CENTER | |
| listicle_items_data = [] | |
| if content_type == "Listicle": | |
| item_sections = sections_df[sections_df['Section Name'].str.startswith('Item', na=False)] | |
| for _, item_row in item_sections.iterrows(): | |
| persona = next(self.ai_personas) | |
| yield f" -> Analyzing Item: '{item_row['Section Content'][:40]}...' (as {persona.split(',')[0]})" | |
| content = item_row['Section Content'] | |
| # Extract the actual generated title from the first line of the content | |
| generated_title = content.split('\n')[0].strip() if content else 'Untitled Item' | |
| # Get the analysis for 'ideal_for' and 'business_benefit' from the AI | |
| details = self._extract_item_details(content, persona) | |
| # OVERRIDE the name with the actual generated title to ensure consistency | |
| details['name'] = generated_title | |
| listicle_items_data.append(details) | |
| toc_items = [] | |
| non_toc_groups = ["Intro Hook", "Above-the-Fold Essentials", "Author Details", "Conclusion & Next Steps", "Table of Contents"] | |
| for _, section in sections_df.iterrows(): | |
| group = section.get('Section Group', 'General') | |
| original_section_name = section.get('Section Name', '') | |
| section_content = str(section.get('Section Content', '')) | |
| if group not in non_toc_groups and original_section_name: | |
| display_title = original_section_name | |
| if '[Actionable Step Title]' in original_section_name or '[Trend Name]' in original_section_name or ('Item' in original_section_name and '[Name]' in original_section_name): | |
| first_line = section_content.split('\n')[0].strip() | |
| if first_line: | |
| display_title = first_line | |
| elif '[Topic]' in display_title: | |
| display_title = display_title.replace('[Topic]', topic_title) | |
| bookmark_name = f"topic{topic_number}_{re.sub('[^A-Za-z0-9]+', '', original_section_name)}_{len(toc_items)}" | |
| toc_items.append((display_title, bookmark_name, original_section_name)) | |
| grouped_sections = {} | |
| for _, section in sections_df.iterrows(): | |
| if (group := section.get('Section Group', 'General')) not in grouped_sections: grouped_sections[group] = [] | |
| grouped_sections[group].append(section) | |
| for group_name, sections in grouped_sections.items(): | |
| if len(grouped_sections) > 1: | |
| group_para = self.doc.add_paragraph(); group_run = group_para.add_run(f"\n{group_name}") | |
| group_run.font.name = 'Arial'; group_run.font.size = Pt(16); group_run.bold = True; group_run.underline = True | |
| for section in sections: | |
| section_name, section_content, word_count = section.get('Section Name', 'Untitled'), section.get('Section Content', ''), section.get('Word Count', 0) | |
| if section_name.lower() in ["jump links", "table of contents"]: | |
| header_para = self.doc.add_paragraph(style='SectionHeader'); header_para.add_run("Table of Contents") | |
| for text, bookmark, _ in toc_items: | |
| p = self.doc.add_paragraph(style='List Bullet'); self.add_internal_hyperlink(p, text, bookmark) | |
| continue | |
| header_para = self.doc.add_paragraph(style='SectionHeader'); header_para.add_run(f"{section_name}") | |
| count_run = header_para.add_run(f" ({word_count} words)"); count_run.font.name = 'Arial'; count_run.font.size = Pt(10); count_run.italic = True | |
| for _, bookmark, original_name in toc_items: | |
| if original_name == section_name: self.add_bookmark(header_para, bookmark); break | |
| if section_name == "At-a-Glance Comparison" and content_type == "Listicle": | |
| if listicle_items_data: | |
| yield " -> π¨ Refining table content for creativity and uniqueness..." | |
| refined_data = refine_table_content_with_openai(listicle_items_data) | |
| yield " -> π Formatting final summary table..." | |
| clean_table_md = generate_listicle_table_from_items(refined_data) | |
| headers, data = parse_listicle_table(clean_table_md) | |
| if data: self.add_listicle_table(headers, data); self.doc.add_paragraph() | |
| else: self.doc.add_paragraph("[Could not generate or parse the listicle table.]", style='BlogContent') | |
| else: self.doc.add_paragraph("[No listicle items found to build a table.]", style='BlogContent') | |
| continue | |
| cleaned_content = clean_blog_content(section_content) | |
| # --- MODIFIED: Apply special FAQ formatting function --- | |
| if section_name == "FAQs": | |
| cleaned_content = format_faq_content(cleaned_content) | |
| if section_name == "At-a-Glance Matrix" and "Comparison" in content_type: | |
| header, data = parse_comparison_table(cleaned_content) | |
| if data: self.add_comparison_table(header, data); self.doc.add_paragraph(); continue | |
| for para_text in cleaned_content.split('\n'): | |
| if not (para_text := para_text.strip()): continue | |
| if para_text.startswith('β’ '): self.doc.add_paragraph(para_text[2:], style='List Bullet') | |
| elif re.match(r'^\d+\.\s', para_text): self.doc.add_paragraph(re.sub(r'^\d+\.\s', '', para_text), style='List Number') | |
| elif para_text.startswith("###"): | |
| p = self.doc.add_paragraph() | |
| p.add_run(para_text.replace("###", "").strip()).bold = True | |
| elif para_text.startswith("Q.") or para_text.startswith("A."): | |
| p = self.doc.add_paragraph() | |
| parts = para_text.split('.', 1) | |
| p.add_run(f"{parts[0]}.").bold = True | |
| if len(parts) > 1: | |
| p.add_run(parts[1]) | |
| else: | |
| p = self.doc.add_paragraph(style='BlogContent') | |
| for part in re.split(r'(\*\*.*?\*\*)', para_text): | |
| if part.startswith('**') and part.endswith('**'): p.add_run(part[2:-2]).bold = True | |
| else: p.add_run(part) | |
| if self.doc.paragraphs and self.doc.paragraphs[-1].text == "": self.doc.paragraphs[-1]._element.getparent().remove(self.doc.paragraphs[-1]._element) | |
| self.doc.add_page_break() | |
| def save_document(self, filename): self.doc.save(filename) | |
| # ======================== WORD PROCESSING (MODIFIED) ======================== | |
| def process_excel_to_word(excel_file_path): | |
| try: | |
| yield ('status', "β Excel file loaded. Reading data...") | |
| df = pd.read_excel(excel_file_path) | |
| required_columns = ['Topic Number', 'Blog Topic', 'Content Type', 'Section Name', 'Section Content'] | |
| if missing_cols := [col for col in required_columns if col not in df.columns]: | |
| yield ('error', f"Missing required columns: {', '.join(missing_cols)}"); return | |
| yield ('status', "π Initializing Word document...") | |
| doc_generator = WordDocumentGenerator() | |
| header_para = doc_generator.doc.add_paragraph(); hr = header_para.add_run("Digiworks Blog Content Collection") | |
| hr.font.name = 'Arial'; hr.font.size = Pt(24); hr.bold = True; header_para.alignment = WD_ALIGN_PARAGRAPH.CENTER | |
| date_para = doc_generator.doc.add_paragraph(); dr = date_para.add_run(f"Generated on: {datetime.now().strftime('%B %d, %Y at %I:%M %p')}") | |
| dr.font.name = 'Arial'; dr.font.size = Pt(12); dr.italic = True; date_para.alignment = WD_ALIGN_PARAGRAPH.CENTER | |
| doc_generator.doc.add_paragraph("\n") | |
| topics = df.groupby('Topic Number') | |
| processed_topics, total_topics = 0, len(topics) | |
| for topic_number, topic_sections in topics: | |
| processed_topics += 1 | |
| topic_title, content_type = topic_sections.iloc[0]['Blog Topic'], topic_sections.iloc[0]['Content Type'] | |
| yield ('status', f"βοΈ Building Topic {processed_topics}/{total_topics}: '{topic_title[:40]}...'") | |
| yield from doc_generator.add_blog_topic(topic_number, topic_title, topic_sections, content_type) | |
| yield ('status', "π Saving final Word document...") | |
| if doc_generator.doc.paragraphs and doc_generator.doc.paragraphs[-1].text == "": doc_generator.doc.paragraphs[-1]._element.getparent().remove(doc_generator.doc.paragraphs[-1]._element) | |
| filename = f"digiworks_formatted_blogs_{datetime.now().strftime('%Y%m%d_%H%M%S')}.docx" | |
| temp_path = os.path.join(tempfile.gettempdir(), filename) | |
| doc_generator.save_document(temp_path) | |
| total_sections, total_words = len(df), df['Word Count'].sum() if 'Word Count' in df.columns else 0 | |
| success_message = f"β Word document created successfully!\nπ Processed {processed_topics} blog topics\nπ Total sections: {total_sections}\nπ¬ Total words: {total_words:,}\nπ File: {filename}" | |
| yield ('complete', temp_path, success_message) | |
| except Exception as e: | |
| import traceback | |
| yield ('error', f"β Error processing file: {str(e)}\n\nTraceback:\n{traceback.format_exc()}") | |
| # ======================== GRADIO INTERFACE ======================== | |
| def create_interface(): | |
| generator = BlogGenerator() | |
| # MODIFIED: All event handlers and listeners are now correctly indented inside this "with" block. | |
| with gr.Blocks(title="π― Digiworks Unified Blog Generator", theme=gr.themes.Soft()) as interface: | |
| gr.HTML("""<div style='text-align: center; padding: 25px; background: linear-gradient(135deg, #1a365d 0%, #2d3748 100%); color: white; margin-bottom: 25px; border-radius: 15px;'><h1 style='margin: 0; font-size: 2.5em; font-weight: 900;'>π― Digiworks Unified Blog Generator</h1><p style='margin: 15px 0 0 0; font-size: 1.2em;'>Idea Generation β Blog Post Creation β Word Document Formatting</p></div>""") | |
| with gr.Tab("π‘ Idea Generation"): | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| mode_choice = gr.Dropdown(choices=["Single Type", "All Types"], value="Single Type", label="Generation Mode") | |
| content_type = gr.Dropdown(choices=list(BLOG_TYPES.keys()), value=list(BLOG_TYPES.keys())[0], label="Select Type", visible=True) | |
| num_topics = gr.Slider(minimum=1, maximum=5, value=3, step=1, label="Topics per Type") | |
| generate_ideas_btn = gr.Button("π Generate Blog Ideas", variant="primary", size="lg") | |
| with gr.Column(scale=1): | |
| gr.HTML("""<div style='background: #1a202c; color: white; padding: 15px; border-radius: 8px; margin-bottom: 15px; border: 1px solid #4a5568;'><h4 style='color: #63b3ed; margin-top: 0;'>π― Idea Generation:</h4><ul style='margin: 5px 0; color: white; line-height: 1.5;'><li>β Research-backed topics</li><li>β SEO-optimized headlines</li><li>β Target audience & keywords</li><li>β Content angle & SEO opportunity</li></ul></div>""") | |
| with gr.Row(): | |
| with gr.Column(): | |
| ideas_status = gr.Textbox(label="Status", value="Ready to generate blog ideas! π", interactive=False) | |
| ideas_results = gr.Markdown("Click Generate to start!") | |
| ideas_df = gr.Dataframe(label="π Generated Blog Ideas", interactive=False, wrap=True) | |
| download_ideas_btn = gr.File(label="π₯ Download Ideas as Excel", visible=False) | |
| with gr.Tab("π Blog Post Creation"): | |
| loaded_data = gr.State(pd.DataFrame()) | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| gr.HTML("<h3>π Upload Topic Data (from Idea Generation)</h3>") | |
| excel_upload = gr.File(label="Upload Excel with Topic Data", file_types=[".xlsx", ".xls"], type="filepath") | |
| gr.HTML("""<div style='background: #1a202c; color: white; padding: 15px; border-radius: 8px; margin: 10px 0; border: 1px solid #4a5568;'><strong style='color: #63b3ed;'>π Required Excel Columns:</strong><br><ul style='margin: 10px 0; color: white; line-height: 1.5;'><li>β’ Headline, Content Type, Keywords, etc.</li></ul></div>""") | |
| gr.HTML("<h3>π― Select Topics for Blog Post Creation</h3>") | |
| topics_checklist = gr.CheckboxGroup(label="Select Topics to Generate (with automatic indexing)", choices=[], value=[]) | |
| reset_counter_btn = gr.Button("π Reset Topic Counter", variant="secondary", size="sm") | |
| generate_posts_btn = gr.Button("π Generate Blog Posts", variant="primary", size="lg") | |
| with gr.Column(scale=1): | |
| gr.HTML("""<div style='background: #1a202c; color: white; padding: 20px; border-radius: 12px; margin-bottom: 15px; border: 1px solid #4a5568;'><h4 style='color: #63b3ed; margin-top: 0;'>π’ Blog Post Creation:</h4><ul style='margin: 10px 0; color: white; line-height: 1.6;'><li>π Live progress updates</li><li>π― Complete blog structures</li><li>π§ spaCy intelligent text processing</li><li>β¨ Digiworks-optimized content</li><li>π Indexed Excel-ready output</li></ul></div>""") | |
| with gr.Row(): | |
| with gr.Column(): | |
| posts_status = gr.Textbox(label="π Generation Status", value="Upload Excel file to begin blog post generation", interactive=False, lines=10) | |
| posts_dataframe = gr.Dataframe(label="π Generated Blog Posts (Indexed)", interactive=False, wrap=True) | |
| with gr.Row(): | |
| download_posts_btn = gr.File(label="π₯ Download Blog Posts as Excel", visible=False) | |
| with gr.Tab("π Word Document Formatter"): | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| gr.HTML("<h3>π Upload Blog Sections Excel</h3>") | |
| word_excel_input = gr.File(label="Upload Excel File with Blog Sections", file_types=[".xlsx", ".xls"], type="filepath") | |
| gr.HTML("""<div style='background: #1a202c; color: white; padding: 15px; border-radius: 8px; margin: 10px 0; border: 1px solid #4a5568;'><strong style='color: #63b3ed;'>π Expected Excel Format:</strong><br><ul style='margin: 10px 0; color: white; line-height: 1.5;'><li>β’ Topic Number, Blog Topic, Section Name</li><li>β’ Section Content, Word Count, Section Group</li></ul></div>""") | |
| generate_word_btn = gr.Button("π Generate WordPress-Ready Word Document", variant="primary", size="lg") | |
| with gr.Column(scale=1): | |
| gr.HTML("""<div style='background: #1a202c; color: white; padding: 20px; border-radius: 12px; margin-bottom: 15px; border: 1px solid #4a5568;'><h4 style='color: #63b3ed; margin-top: 0;'>π Word Document Features:</h4><ul style='margin: 10px 0; color: white; line-height: 1.6;'><li>π― Organized by topic number</li><li>π€ Creative AI-powered tables</li><li>β¨ Live 'debug' status updates</li><li>π¨ Consistent formatting</li><li>π Ready for WordPress</li></ul></div>""") | |
| with gr.Row(): | |
| with gr.Column(): | |
| word_status = gr.Textbox(label="π Processing Status (Debug Log)", value="Upload Excel file to begin Word document generation", interactive=False, lines=10) | |
| word_preview = gr.Textbox(label="π Content Preview (First 500 characters)", interactive=False, lines=6) | |
| with gr.Row(): | |
| download_word_btn = gr.File(label="π₯ Download Formatted Word Document", visible=False) | |
| gr.HTML("""<div style='background: #065f46; color: white; padding: 15px; border-radius: 8px; text-align: center; margin-left: 15px; border: 1px solid #10b981;'><strong style='color: #6ee7b7;'>π‘ WordPress Tip:</strong><span style='color: white;'>Copy sections directly from Word to WordPress editor!</span></div>""") | |
| gr.HTML("""<div style='text-align: center; padding: 20px; margin-top: 25px; border-top: 1px solid #e0e0e0;'><p style='color: #666; margin: 0;'>π― <strong>Digiworks Unified Blog Generator</strong> | Idea Generation β Blog Post Creation β Word Document Formatting<br><small>Perfect for content teams and marketers</small></p></div>""") | |
| # --- HANDLER FUNCTIONS --- | |
| def toggle_content_type(mode): return gr.update(visible=(mode == "Single Type")) | |
| def handle_idea_generation(mode, content_type_val, num_topics_val): | |
| try: | |
| if mode == "Single Type": result, df, status = generate_blog_topics(content_type_val, num_topics_val) | |
| else: result, df, status = generate_all_types(num_topics_val) | |
| download_file, download_visible = None, False | |
| if not df.empty: | |
| timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') | |
| filename = f"digiworks_blog_ideas_{timestamp}.xlsx" | |
| temp_path = os.path.join(tempfile.gettempdir(), filename) | |
| with pd.ExcelWriter(temp_path, engine='openpyxl') as writer: df.to_excel(writer, sheet_name='Blog Ideas', index=False) | |
| download_file, download_visible = temp_path, True | |
| return status, result, df, gr.update(value=download_file, visible=download_visible) | |
| except Exception as e: return f"β Error: {str(e)}", f"β Generation failed: {str(e)}", pd.DataFrame(), gr.update(visible=False) | |
| def load_excel_and_create_checklist(file_path): | |
| if not file_path: return gr.update(choices=[], value=[]), pd.DataFrame(), "Please upload an Excel file" | |
| try: | |
| df = pd.read_excel(file_path) | |
| required_cols = ['Content Type', 'Headline'] | |
| if missing_cols := [col for col in required_cols if col not in df.columns]: return gr.update(choices=[], value=[]), pd.DataFrame(), f"Missing columns: {', '.join(missing_cols)}" | |
| choices = [] | |
| for i, row in df.iterrows(): | |
| content_type = row.get('Content Type', '').replace(' π', '').replace(' π', '').replace(' βοΈ', '').replace(' π', '').replace(' π', '').replace(' π‘', '') | |
| headline = row.get('Headline', '')[:60]; keywords = row.get('Keywords', '')[:50] | |
| choices.append(f"#{i+1} [{content_type}] {headline}... | Keywords: {keywords}...") | |
| return gr.update(choices=choices, value=choices), df, f"β Loaded {len(df)} topics. Select topics to generate blog posts." | |
| except Exception as e: return gr.update(choices=[], value=[]), pd.DataFrame(), f"β Error: {str(e)}" | |
| def reset_topic_counter(): | |
| generator.reset_counter() | |
| return "π Topic counter reset to 0. Next generation will start from Topic #1." | |
| def generate_selected_posts(file_path, selected_choices, df): | |
| if not file_path or not selected_choices: | |
| yield "Please upload an Excel file and select at least one topic to generate.", pd.DataFrame(), gr.update(visible=False) | |
| return | |
| all_sections_data, status_log = [], ["π Starting blog post generation..."] | |
| latest_file_path = None | |
| yield "\n".join(status_log), pd.DataFrame(), gr.update(value=None, visible=False) | |
| try: | |
| for topic_counter, choice in enumerate(selected_choices, 1): | |
| choice_index = int(choice.split('#')[1].split(' ')[0]) - 1 | |
| if choice_index < len(df): | |
| topic_data = df.iloc[choice_index].to_dict() | |
| generation_iterator = generator.generate_blog_from_topic_data(topic_data, topic_index=topic_counter) | |
| topic_sections = [] | |
| for update_type, update_value, *rest in generation_iterator: | |
| if update_type == 'status': | |
| message = update_value | |
| status_log.append(message) | |
| yield "\n".join(status_log), pd.DataFrame(all_sections_data), gr.update(value=latest_file_path, visible=bool(latest_file_path)) | |
| elif update_type == 'result': | |
| sections, msg = update_value, rest[0] | |
| if sections: | |
| topic_sections.extend(sections) | |
| status_log.append(msg) | |
| if topic_sections: | |
| all_sections_data.extend(topic_sections) | |
| intermediate_df = pd.DataFrame(all_sections_data) | |
| intermediate_file = create_excel_export(all_sections_data) | |
| latest_file_path = intermediate_file | |
| status_log.append(f"π¦ Intermediate file for Topic #{topic_counter} ready.") | |
| yield "\n".join(status_log), intermediate_df, gr.update(value=latest_file_path, visible=True) | |
| if all_sections_data: | |
| final_df = pd.DataFrame(all_sections_data) | |
| total_topics, total_sections = len(selected_choices), len(all_sections_data) | |
| successful_sections = len([s for s in all_sections_data if s['Word Count'] > 0]) | |
| total_words = sum(s['Word Count'] for s in all_sections_data if s['Word Count'] > 0) | |
| final_status = f"""π ALL TOPICS COMPLETE!\n--------------------\nπ Generated {total_sections} sections across {total_topics} topics\nβ Successful sections: {successful_sections}/{total_sections}\nπ Total words generated: {total_words:,}\nπ’ Topics indexed: 1-{total_topics}\n\nFull Log:\n""" + "\n".join(status_log) | |
| yield final_status, final_df, gr.update(value=latest_file_path, visible=True) | |
| else: | |
| yield "β No sections were generated.", pd.DataFrame(), gr.update(visible=False) | |
| except Exception as e: | |
| import traceback | |
| tb_str = traceback.format_exc() | |
| yield f"β An unexpected error occurred: {str(e)}\n\nTraceback:\n{tb_str}", pd.DataFrame(), gr.update(visible=False) | |
| def process_excel_file(excel_file): | |
| if not excel_file: return "Please upload an Excel file", "", gr.update(visible=False) | |
| try: | |
| df = pd.read_excel(excel_file); preview = "No content preview available" | |
| if not df.empty and 'Section Content' in df.columns: | |
| cleaned_sample = clean_blog_content(df.iloc[0]['Section Content']) | |
| preview = cleaned_sample[:500] + "..." if len(cleaned_sample) > 500 else cleaned_sample | |
| status = f"β Excel loaded: {len(df)} sections found\nπ Topics: {df['Topic Number'].nunique() if 'Topic Number' in df.columns else 'Unknown'}\nπ Ready to generate Word document" | |
| return status, preview, gr.update(visible=False) | |
| except Exception as e: return f"β Error reading Excel: {str(e)}", "", gr.update(visible=False) | |
| def generate_word_document(excel_file): | |
| if not excel_file: | |
| yield "Please upload an Excel file.", "", gr.update(visible=False) | |
| return | |
| status_log = ["π Starting Word document generation..."] | |
| yield "\n".join(status_log), "", gr.update(visible=False) | |
| try: | |
| processing_generator = process_excel_to_word(excel_file) | |
| for type, val1, *val2 in processing_generator: | |
| if type == 'status': | |
| status_log.append(val1) | |
| yield "\n".join(status_log), "", gr.update(visible=False) | |
| elif type == 'complete': | |
| word_file_path, result_message = val1, val2[0] | |
| df = pd.read_excel(excel_file) | |
| preview = "Document generated successfully" | |
| if not df.empty and 'Section Content' in df.columns: | |
| cleaned_sample = clean_blog_content(df.iloc[0]['Section Content']) | |
| preview = cleaned_sample[:500] + "..." if len(cleaned_sample) > 500 else cleaned_sample | |
| yield result_message, preview, gr.update(value=word_file_path, visible=True) | |
| elif type == 'error': | |
| yield val1, "", gr.update(visible=False) | |
| except Exception as e: | |
| import traceback | |
| yield f"β An unexpected error occurred: {str(e)}\n{traceback.format_exc()}", "", gr.update(visible=False) | |
| # --- EVENT LISTENERS --- | |
| mode_choice.change(fn=toggle_content_type, inputs=[mode_choice], outputs=[content_type]) | |
| generate_ideas_btn.click(fn=handle_idea_generation, inputs=[mode_choice, content_type, num_topics], outputs=[ideas_status, ideas_results, ideas_df, download_ideas_btn]) | |
| excel_upload.change(fn=load_excel_and_create_checklist, inputs=[excel_upload], outputs=[topics_checklist, loaded_data, posts_status]) | |
| reset_counter_btn.click(fn=reset_topic_counter, inputs=[], outputs=[posts_status]) | |
| generate_posts_btn.click(fn=generate_selected_posts, inputs=[excel_upload, topics_checklist, loaded_data], outputs=[posts_status, posts_dataframe, download_posts_btn]) | |
| word_excel_input.change(fn=process_excel_file, inputs=[word_excel_input], outputs=[word_status, word_preview, download_word_btn]) | |
| generate_word_btn.click(fn=generate_word_document, inputs=[word_excel_input], outputs=[word_status, word_preview, download_word_btn]) | |
| return interface | |
| # ======================== MAIN EXECUTION ======================== | |
| if __name__ == "__main__": | |
| print("π― DIGIWORKS UNIFIED BLOG GENERATOR") | |
| print("π‘ Idea Generation β Blog Post Creation β Word Document Formatting") | |
| print("π Perfect for content teams and marketers") | |
| print("π§ Complete blog structures + spaCy processing") | |
| print("β‘ Intelligent word count management") | |
| print("π¨ Digiworks-optimized content persona") | |
| print("π Excel Input β AI Processing β Excel/Word Output") | |
| print("π Optimized comparison tables for 'At-a-Glance Matrix' sections") | |
| print("="*70) | |
| interface = create_interface() | |
| # MODIFIED: Get Gradio credentials from Hugging Face Secrets | |
| gradio_user = os.environ.get("GRADIO_USERNAME") | |
| gradio_pass = os.environ.get("GRADIO_PASSWORD") | |
| # Ensure both secrets are set. If not, the app will raise an error and fail to start. | |
| if not (gradio_user and gradio_pass): | |
| raise ValueError("π΄ ERROR: GRADIO_USERNAME and GRADIO_PASSWORD secrets must be set in your Hugging Face Space for authentication.") | |
| print("β Gradio authentication enabled.") | |
| # MODIFIED: Only one .launch() call is made, using the credentials from the secrets. | |
| # Removed share=True and debug=True as they are not needed on Spaces. | |
| interface.launch(auth=(gradio_user, gradio_pass), share=True, ssr_mode=False) | |
| print("β Unified blog generator launched!") |