# MODIFIED: Removed all runtime installation code. Dependencies are now in requirements.txt import subprocess import sys import os import re import time import json from datetime import datetime import tempfile import pandas as pd import gradio as gr from openai import OpenAI import requests from urllib.parse import quote_plus from bs4 import BeautifulSoup from docx import Document from docx.shared import Inches, Pt from docx.enum.style import WD_STYLE_TYPE from docx.enum.text import WD_ALIGN_PARAGRAPH from docx.oxml import OxmlElement from docx.oxml.ns import qn from itertools import cycle from serpapi import GoogleSearch import spacy # MODIFIED: Directly import spacy # MODIFIED: Load the spacy model directly, as it's installed via requirements.txt nlp = spacy.load("en_core_web_sm") # ======================== CONFIGURE OPENAI & SERPAPI ======================== # MODIFIED: Get API keys from Hugging Face Secrets and add error handling openai_api_key = os.environ.get("OPENAI_API_KEY") serpapi_key = os.environ.get("SERPAPI_KEY") if not openai_api_key: raise ValueError("π΄ ERROR: The OPENAI_API_KEY secret is not set in your Hugging Face Space settings!") if not serpapi_key: raise ValueError("π΄ ERROR: The SERPAPI_KEY secret is not set in your Hugging Face Space settings!") client = OpenAI(api_key=openai_api_key) SERPAPI_KEY = serpapi_key # ======================== BLOG CONTENT TYPES ======================== BLOG_TYPES = { "How-To Guide π": "step-by-step tutorials", "Listicle π": "list-based articles", "Comparison Post βοΈ": "versus comparisons", "Case Study π": "success stories", "Industry Trends π": "market analysis", "Founder Post π‘": "opinion pieces", "Templates and Toolkits": "downloadable resources", } # ======================== NEW: LISTICLE ASPECTS FOR DIVERSE CONTENT ======================== LISTICLE_ASPECTS = [ "Technology & Automation (e.g., AI in recruiting, new software)", "Company Culture & Employee Well-being (e.g., asynchronous work, burnout prevention)", "Financial Strategy & Compensation (e.g., global pay scales, benefits packages)", "Global Talent Sourcing & Emerging Markets (e.g., hiring from new regions)", "Data Analytics & Performance Metrics (e.g., using data to track remote productivity)", "Legal, Compliance & Security (e.g., cross-border regulations, data privacy)", "Diversity, Equity, and Inclusion (DEI) in Remote Teams", "Skill Development & Virtual Onboarding (e.g., upskilling a remote workforce)", "Collaboration Tools & Digital Workspaces (e.g., the new generation of tools beyond Zoom)", "Strategic Workforce Planning (e.g., hybrid models, contractor vs. full-time mix)" ] # ======================== BLOG STRUCTURES (REFINED PROMPTS) ======================== faq_prompt = """ You are an expert on the given blog topic. Your task is to generate 3 to 4 highly relevant frequently asked questions and their concise answers. **CRITICAL INSTRUCTIONS:** 1. Generate **at least 3** complete Q&A pairs. 2. Each question MUST start with "Q." and be on its own line. 3. Each answer MUST start with "A." and be on its own line, immediately following its question. 4. There should be a blank line between each Q&A pair. 5. Do NOT include any other text, introductions, or explanations. Only provide the Q&A pairs. **EXAMPLE of PERFECT FORMAT:** Q. What is the main benefit of using this service? A. The main benefit is a significant reduction in operational costs, often by up to 60%, without sacrificing quality. Q. Who is this service for? A. This service is primarily for HR managers at tech startups and global recruitment agencies looking to scale their teams efficiently. **YOUR TASK:** Now, generate the FAQs for the current blog topic following these rules and the example format exactly. """ BLOG_STRUCTURES = { # ... (Your entire BLOG_STRUCTURES dictionary remains here, unchanged) "How-To Guide": [ ("Intro Hook", "Problem Hook", 40, 60, "Hook the reader by presenting a common, relatable problem they're struggling with. Frame it as a challenge that many in their position face."), ("Intro Hook", "Promise the Outcome", 20, 30, "In one single, powerful sentence, promise the specific, tangible skill the reader will have after finishing this guide. Be extremely concise. Example: 'By the end of this guide, you will be able to build a high-performing global team from scratch.'"), ("Table of Contents", "Jump Links", 10, 20, "This is a placeholder for the auto-generated Table of Contents."), ("Author Details", "Author Byline", 15, 25, "Write a brief, authoritative byline for the 'Digiworks Team,' highlighting expertise in the topic."), ("Author Details", "Publication Date", 5, 10, "Generate the publication and last updated dates. Format: Published: [Date] | Last updated: [Date]"), ("Author Details", "Social Proof Badges", 10, 20, "Provide short social proof, like 'Trusted by 10,000+ businesses' or 'As featured in Forbes.'"), ("Context & Why It Matters", "What Is [Topic]?", 120, 180, "Define the main topic in the context of a business audience. Explain what it is and its relevance to them, linking to a pillar post if applicable."), ("Context & Why It Matters", "Why [Topic] Matters", 120, 180, "Explain the strategic importance of the topic. Use compelling statistics (e.g., 'boosts productivity by 30%') to show tangible benefits like saving time, reducing costs, or improving efficiency."), ("Prerequisites", "What You'll Need Checklist", 80, 120, "Create a bulleted checklist of necessary tools, accounts, or knowledge. Use checkmark icons (β) and briefly explain why each item is needed."), ("Step-by-Step Guide", "Step 1: [Actionable Step Title]", 100, 150, "Start with a clear, action-oriented title. Explain why this step is crucial. Provide 1-3 clear actions. Include a 'Pro Tip:' or a reference to a visual aid (e.g., 'See the dashboard screenshot below')."), ("Step-by-Step Guide", "Step 2: [Actionable Step Title]", 100, 150, "Write a clear, action-oriented title for the second step. Explain its importance. List 1-3 actions. Include a practical tip or shortcut."), ("Step-by-Step Guide", "Step 3: [Actionable Step Title]", 100, 150, "Create an action-oriented title for the third step. Explain why it matters. List 1-3 actions. Add a helpful tip or visual cue description."), ("Step-by-Step Guide", "Step 4: [Actionable Step Title]", 100, 150, "Formulate an action-oriented title for the fourth step. Explain its significance. List 1-3 actions. Provide a useful tip."), ("Step-by-Step Guide", "Step 5: [Actionable Step Title]", 100, 150, "Develop an action-oriented title for the final step. Explain why this step is important for success. List 1-3 actions. End with an expert tip."), ("Pro Tips & Common Pitfalls", "Pro Tips from Digiworks Experts", 80, 120, "List 3-5 'power user' tips in a bulleted format. These should be advanced, actionable insights that help readers achieve maximum efficiency."), ("Pro Tips & Common Pitfalls", "Common Mistakes to Avoid", 60, 100, "Create a bulleted list of 3 common mistakes related to the topic. For each mistake, briefly explain how to avoid it."), ("Real-World Examples", "Mini Case Study", 100, 150, "Tell a brief story about how a company (e.g., 'a fast-growing tech startup') used this method to achieve a specific, measurable result (e.g., 'increased efficiency by 35%')."), ("Real-World Examples", "Reader Example", 60, 100, "Write a paragraph that directly addresses the reader, inviting them to share their own success stories or experiences in the comments to build community engagement."), ("Advanced Techniques", "Going Deeper", 100, 150, "Offer an optional advanced strategy for readers who want to learn more. Link to other internal or external guides for further reading."), ("Measuring Success", "Key Metrics to Track", 60, 100, "List the key metrics readers should track to measure their success (e.g., time to hire, ROI, engagement rate). Explain why these metrics are important."), ("Measuring Success", "Recommended Tools", 60, 100, "Recommend 2-3 specific tools (e.g., Google Analytics, Digiworks' dashboards) for monitoring performance and tracking the metrics mentioned above."), ("Frequently Asked Questions", "FAQs", 40, 80, faq_prompt), ("Conclusion & Next Steps", "Recap", 40, 60, "Summarize the key takeaways in 2-3 concise bullet points, reinforcing what the reader has learned."), ("Conclusion & Next Steps", "Primary Call-to-Action", 40, 60, "Write a strong, action-oriented call-to-action that encourages the reader to engage with Digiworks' main service (e.g., 'Hire Vetted Remote Talent')."), ("Conclusion & Next Steps", "Secondary Call-to-Action", 30, 50, "Provide a secondary, content-focused CTA to keep readers engaged, such as 'Download our free checklist' or 'Read our advanced guide on [related topic].'"), ], "Listicle": [ ("Above-the-Fold Essentials", "Intro Hook (Pain Point)", 40, 60, "Hook the reader with a common frustration, like 'Finding the right [tool/tactic] to scale your business can feel impossible...'"), ("Above-the-Fold Essentials", "Intro Hook (Promise)", 20, 40, "Promise a clear solution: 'We've narrowed down the [X] best options so you can focus on growth.'"), ("Above-the-Fold Essentials", "Quick Stats", 20, 40, "Provide 1-2 compelling data points that underscore the importance of the list (e.g., '84% of successful businesses use at least 3 of these tools')."), ("Above-the-Fold Essentials", "Trust Signals", 20, 40, "Build credibility with a trust-building statement like 'From the experts at Digiworks...' and include publication/update dates."), ("Above-the-Fold Essentials", "Table of Contents", 10, 20, "This is a placeholder for the auto-generated Table of Contents."), ("How We Chose These", "Criteria Overview", 80, 120, "Explain the selection methodology in 3-5 bullet points (e.g., cost-effectiveness, scalability, ease of use). This builds transparency."), ("How We Chose These", "Scoring System", 60, 100, "Briefly explain the scoring rubric if items are scored (e.g., a five-star system for 'Value' or 'Ease of Use')."), ("The List", "Item 1: [Name]", 80, 150, "For this list item, explain the concept clearly. Use a relatable example or a mini-story to illustrate the point. Provide a tangible, actionable takeaway for the reader."), ("The List", "Item 2: [Name]", 80, 150, "For this list item, explain the concept clearly. Use a relatable example or a mini-story to illustrate the point. Provide a tangible, actionable takeaway for the reader."), ("The List", "Item 3: [Name]", 80, 150, "For this list item, explain the concept clearly. Use a relatable example or a mini-story to illustrate the point. Provide a tangible, actionable takeaway for the reader."), ("The List", "Item 4: [Name]", 80, 150, "For this list item, explain the concept clearly. Use a relatable example or a mini-story to illustrate the point. Provide a tangible, actionable takeaway for the reader."), ("The List", "Item 5: [Name]", 80, 150, "For this list item, explain the concept clearly. Use a relatable example or a mini-story to illustrate the point. Provide a tangible, actionable takeaway for the reader."), ("The List", "Item 6: [Name]", 80, 150, "For this list item, explain the concept clearly. Use a relatable example or a mini-story to illustrate the point. Provide a tangible, actionable takeaway for the reader."), ("The List", "Item 7: [Name]", 80, 150, "For this list item, explain the concept clearly. Use a relatable example or a mini-story to illustrate the point. Provide a tangible, actionable takeaway for the reader."), ("The List", "Item 8: [Name]", 80, 150, "For this list item, explain the concept clearly. Use a relatable example or a mini-story to illustrate the point. Provide a tangible, actionable takeaway for the reader."), ("The List", "Item 9: [Name]", 80, 150, "For this list item, explain the concept clearly. Use a relatable example or a mini-story to illustrate the point. Provide a tangible, actionable takeaway for the reader."), ("The List", "Item 10: [Name]", 80, 150, "For this list item, explain the concept clearly. Use a relatable example or a mini-story to illustrate the point. Provide a tangible, actionable takeaway for the reader."), ("Comparison Table", "At-a-Glance Comparison", 30, 50, "Write a brief placeholder text. The final summary table will be automatically generated by AI during the Word document creation phase based on the list items above."), ("In-Depth Picks", "Editor's Choice: Top 3 Deep Dive", 100, 150, "Write brief 3-4 sentence mini-reviews of the top 3 items from the list. Include specific pros and cons for each of these top picks."), ("Frequently Asked Questions", "FAQs", 40, 80, faq_prompt), ("Conclusion & Next Steps", "Recap", 40, 60, "Briefly recap the key takeaway of the list, reinforcing the value provided to the reader."), ("Conclusion & Next Steps", "Primary Call-to-Action", 40, 60, "Write a primary CTA that connects the listicle's topic to Digiworks' service, e.g., 'Ready to delegate these tasks? Hire an expert from Digiworks.'"), ("Conclusion & Next Steps", "Secondary Call-to-Action", 30, 50, "Encourage community interaction, asking readers to comment with their favorite item or share their own recommendations."), ], "Comparison Post": [ ("Above-the-Fold Essentials", "Intro Hook (Use Case)", 40, 60, "Start with a common dilemma: 'Choosing between [Option A] and [Option B] for your business? Here's the definitive breakdown.'"), ("Above-the-Fold Essentials", "Intro Hook (Preview Verdict)", 20, 40, "Give a sneak peek of the conclusion to keep readers engaged: 'Read on to see which wins on pricing, features, and overall value.'"), ("Above-the-Fold Essentials", "Quick Verdict Bar", 20, 40, "Include a one-sentence 'Overall Winner' callout right at the top for skimmers."), ("Above-the-Fold Essentials", "Trust Signals", 20, 40, "Establish authority with a statement like 'An unbiased breakdown from the experts at Digiworks...' and include publication dates."), ("Above-the-Fold Essentials", "Jump Links", 10, 20, "This is a placeholder for the auto-generated Table of Contents."), ("Comparison Summary Table", "At-a-Glance Matrix", 100, 200, r""" Create a **clean markdown table** comparing [Option A] and [Option B]. **Rules for the table:** 1. Start with a header row: `| Metric | [Option A Title] vs. [Option B Title] |` 2. Follow with a separator: `|---|---|` 3. Each comparison metric gets **its own row** in this format: `| **Metric Name** | [Option A details] \| [Option B details] |` 4. The `\|` is crucial to separate the details for Option A and B within the second column. **Example Output:** ``` | Metric | Local Hiring vs. Global Remote Talent | |---|---| | **Pricing** | High overheads ($50/hr) \| Up to 60% savings ($20/hr) | | **Scalability** | Limited by local talent pool \| Access to global talent | ``` """), ("How We Evaluated", "Criteria & Weighting", 80, 120, "List the evaluation criteria (e.g., Pricing, Features, Talent Vetting, Support). Briefly note the methodology (e.g., 'Based on client feedback and public data')."), ("How We Evaluated", "Scoring System", 60, 100, "If a scoring system is used, explain it. Clarify any special considerations (e.g., 'annual vs. monthly pricing')."), ("Deep-Dive Comparison", "Pricing Models Breakdown", 100, 150, "Provide an in-depth comparison of the pricing structures. Discuss cost per hire, monthly fees, and potential hidden costs. Calculate ROI."), ("Deep-Dive Comparison", "Feature Set Comparison", 100, 150, "Detail and compare the core features, unique capabilities, and integrations of each option."), ("Deep-Dive Comparison", "Scalability & Growth", 100, 150, "Analyze how each option supports business growth, considering factors like onboarding speed, team expansion, and geographic reach."), ("Pros & Cons", "Digiworks Pros & Cons", 60, 100, "List 3 major pros for Digiworks (e.g., β Vetted talent, β Cost savings). Then, list 2 honest cons or limitations (e.g., β Niche roles may take longer)."), ("Pros & Cons", "Competitor Pros & Cons", 60, 100, "List 3 major pros and 2 honest cons for the competitor, maintaining an objective and fair tone."), ("Use-Case Recommendations", "Which Is Best For You?", 80, 120, "Provide clear, scenario-based recommendations: 'Best for Startups: [Option] because...'; 'Best for Enterprises: [Option] because...'"), ("Frequently Asked Questions", "FAQs", 40, 80, faq_prompt), ("Conclusion & Final Verdict", "Recap Key Differences", 40, 60, "Summarize the most critical differences between the two options in one-liner bullet points for quick reference."), ("Conclusion & Final Verdict", "Overall Winner Declaration", 20, 40, "Clearly declare the winner based on the most important factor for businesses (e.g., 'For cost-effective scalability, Digiworks is the clear winner')."), ("Conclusion & Final Verdict", "Primary Call-to-Action", 40, 60, "Write a strong CTA that encourages action, like 'Start your risk-free trial with Digiworks' or 'Book a call to build your team today.'"), ("Conclusion & Final Verdict", "Secondary Call-to-Action", 30, 50, "Offer a related content piece to keep the reader engaged, such as 'Compare Digiworks with [another competitor].'"), ], "Case Study": [ ("Above-the-Fold Essentials", "Hero Section", 20, 40, "Describe a compelling visual, like a client logo plus a dashboard screenshot showing results."), ("Above-the-Fold Essentials", "Quick-fire Stats", 20, 40, "List 2-3 big, bold, quantifiable results in a 'quick-fire' format (e.g., '+60% revenue growth,' '3x ROI in 6 months')."), ("Above-the-Fold Essentials", "Quick-Take Summary", 40, 60, "Write a 2-3 sentence summary covering the client's challenge, the Digiworks solution, and the brilliant outcome."), ("Above-the-Fold Essentials", "Trust Signals", 20, 40, "Include a client logo and a powerful snippet from their testimonial (e.g., '\"Digiworks transformed our workflow...\" β Jane Doe, CMO')."), ("Above-the-Fold Essentials", "Jump Links", 10, 20, "This is a placeholder for the auto-generated Table of Contents."), ("Client Overview", "About the Client", 30, 50, "Provide 1-2 sentences about the client: their industry, size, and market position."), ("Client Overview", "The Context", 60, 100, "Set the scene. What was their situation *before* Digiworks? Why were they seeking a solution?"), ("The Challenge", "Key Pain Points", 60, 100, "Create a bulleted list of 3-5 specific, tangible problems the client faced (e.g., slow hiring, high costs, skill gaps)."), ("The Challenge", "Impact of the Problem", 60, 100, "Quantify the negative impact of the problems where possible (e.g., 'Monthly hiring cost was $15K,' 'Customer response time was 48 hours')."), ("The Solution", "Why They Chose Digiworks", 60, 100, "List 2-3 key reasons the client chose Digiworks, focusing on our core strengths (e.g., pre-vetted talent, speed, cost structure)."), ("The Solution", "Implementation Details", 100, 150, "Detail the execution: What roles were filled? What tools were used? What was the timeline from start to full deployment?"), ("The Results", "Headline Metrics", 40, 60, "Display the most impressive outcome numbers in a big, bold format (e.g., '+60% revenue increase,' '90% faster hiring')."), ("The Results", "Detailed Metrics & Impact", 100, 150, "Provide a more detailed breakdown of the transformation. A 'before/after' comparison table works well here."), ("The Results", "Visual Data Representation", 20, 40, "Describe a chart, graph, or annotated screenshot that visually represents the positive results."), ("Client Testimonial", "Featured Quote Block", 40, 60, "Extract a powerful, concise quote from the client that encapsulates the success of the project."), ("Client Testimonial", "Video Testimonial", 30, 50, "If applicable, describe a 1-2 minute video testimonial with the client discussing their positive experience."), ("Lessons Learned", "Key Takeaways for Others", 80, 120, "Provide 3 bulleted, actionable insights that other businesses can learn from this case study."), ("Lessons Learned", "Do's and Don'ts", 60, 100, "Offer 2-3 'Do's' and 'Don'ts' based on the client's journey and the lessons learned."), ("Next Steps & CTAs", "For Prospective Clients", 40, 60, "Write a direct and compelling CTA for new clients, e.g., 'Ready to see results like these? Start your no-risk trial today.'"), ("Next Steps & CTAs", "Related Case Studies", 30, 50, "Link to 2-3 similar case studies, categorized by industry or business outcome."), ("Frequently Asked Questions", "FAQs", 40, 80, faq_prompt), ], "Industry Trends": [ ("Above-the-Fold Essentials", "Intro Hook (Macro Context)", 40, 60, "Start with a big-picture statement about the evolving future of work, positioning the topic as a competitive imperative."), ("Above-the-Fold Essentials", "Intro Hook (Promise)", 20, 40, "Promise to deliver a specific number of data-backed trends that the reader cannot afford to ignore."), ("Above-the-Fold Essentials", "Key Stats Snapshot", 30, 50, "List 2-3 eye-popping, one-line statistics that immediately grab the reader's attention."), ("Above-the-Fold Essentials", "Trust Signals", 20, 40, "Establish credibility by citing Digiworks' expertise and years of market data. Include publication dates."), ("Above-the-Fold Essentials", "Jump Links", 10, 20, "This is a placeholder for the auto-generated Table of Contents."), ("Methodology", "Data Sources", 60, 100, "List the authoritative sources used for the analysis (e.g., 'Google Trends, Gartner studies, our internal talent marketplace data')."), ("Methodology", "Timeframe & Scope", 60, 100, "Define the research period and scope (e.g., 'Analysis period: Q1 2024βQ4 2025; Global sample across 50+ countries')."), ("Methodology", "Selection Criteria", 60, 100, "Explain why these specific trends were chosen (e.g., 'Based on search volume growth, social media mentions, and market impact potential')."), ("The Top Trends", "Trend 1: [Trend Name]", 100, 150, "Define the trend, explain why it's critical for modern businesses, provide a real-world example of an early adopter, and support it with a statistic."), ("The Top Trends", "Trend 2: [Trend Name]", 100, 150, "Define the trend, explain why it matters, give a real-world example, and back it up with a data point."), ("The Top Trends", "Trend 3: [Trend Name]", 100, 150, "Define the trend, explain its importance, offer a real-world example, and include a supporting statistic."), ("The Top Trends", "Trend 4: [Trend Name]", 100, 150, "Define the trend, explain its business impact, provide a real-world example, and cite relevant data."), ("The Top Trends", "Trend 5: [Trend Name]", 100, 150, "Define the trend, explain why it's a must-watch, show a real-world example, and support it with a statistic."), ("The Top Trends", "Trend 6: [Trend Name]", 100, 150, "Define the trend, explain its relevance, give a real-world example, and provide a supporting data point."), ("The Top Trends", "Trend 7: [Trend Name]", 100, 150, "Define the trend, explain why it matters for growth, offer a real-world example, and include a compelling statistic."), ("Expert Voices", "Industry Expert Quotes", 60, 100, "Include 2-3 short, authoritative quotes from respected industry leaders to provide third-party validation."), ("Expert Voices", "Micro Case Studies", 80, 120, "Showcase 1-2 bullet points of brands successfully capitalizing on these trends, with quantifiable results (e.g., 'Brand X boosted productivity by 45% using trend Y')."), ("Cross-Trend Connections", "How Trends Intersect", 80, 120, "Explain how multiple trends connect and amplify each other (e.g., 'AI-powered hiring + async work = hyper-efficient global teams'). Suggest a visual concept."), ("Implications for Your Business", "For Small Teams & Startups", 60, 100, "Provide specific, actionable starting points for resource-constrained businesses. Which trends offer maximum impact for minimal budget?"), ("Implications for Your Business", "For Enterprises", 60, 100, "Advise on which trends require significant organizational change or investment. How can they build the internal business case?"), ("Implications for Your Business", "For Agencies & Consultants", 60, 100, "Explain how they can package these trend insights into services for their clients, identifying monetization opportunities."), ("Tools & Resources", "Data & Analytics Tools", 60, 100, "Recommend specific tools for tracking these trends (e.g., Ahrefs, Google Trends, LinkedIn Analytics)."), ("Tools & Resources", "Execution & Implementation Tools", 60, 100, "Recommend tools for acting on the trends (e.g., Zapier for automation, Slack for async communication)."), ("Tools & Resources", "Further Reading & Research", 60, 100, "Link to authoritative deep-dive reports, whitepapers, and thought leadership for readers who want to explore further."), ("Frequently Asked Questions", "FAQs", 40, 80, faq_prompt), ("Conclusion & Next Steps", "Recap of Top Trends", 60, 100, "Provide one concise bullet per trend, summarizing the key strategic takeaway for busy executives."), ("Conclusion & Next Steps", "Primary Call-to-Action", 40, 60, "Create a lead-generation CTA, such as 'Download the complete 2025 Trends Report PDF' or 'Book a complimentary trends workshop.'"), ("Conclusion & Next Steps", "Secondary Call-to-Action", 30, 50, "Offer a content-based CTA to keep readers in the ecosystem, like 'Read our deep-dive on [most popular trend]' or 'Subscribe to our newsletter.'"), ], "Founder Post": [ ("Hook", "Gripping Anecdote or Statistic", 100, 150, "Start with a powerful personal story, a shocking industry statistic, or a provocative question that challenges conventional wisdom."), ("Thesis", "The Core Argument", 100, 150, "State the main thesis or controversial opinion clearly and boldly in one paragraph. This is the central idea the entire post will defend."), ("Supporting Arguments", "Supporting Point 1", 150, 200, "Provide the first major argument supporting the thesis. Back it up with concrete evidence, a personal anecdote, data, or a client example."), ("Supporting Arguments", "Supporting Point 2", 150, 200, "Provide the second major argument. It should build on the previous point and be supported by fresh evidence or a different example."), ("Supporting Arguments", "Supporting Point 3", 150, 200, "Provide the third major argument to strengthen the overall case. Use a compelling story or data point as evidence."), ("Supporting Arguments", "Supporting Point 4 (Optional)", 150, 200, "If essential, add a fourth argument to fully develop the thesis, complete with its own evidence or anecdote."), ("Counter-Arguments", "Addressing the Other Side", 150, 200, "Acknowledge 1-2 common counter-arguments to your thesis. Thoughtfully rebut them to show intellectual honesty and strengthen your position."), ("Conclusion", "Actionable Insights & Next Steps", 150, 200, "Summarize the core argument with impact. Provide 2-3 specific, actionable steps or pieces of advice that readers can implement immediately."), ("Engagement Prompt", "Cross-Post to LinkedIn", 50, 70, "Encourage discussion on a different platform: 'I've also shared my thoughts on this over on LinkedInβI'd love to hear your perspective. Let's discuss in the comments there.'"), ], "Templates and Toolkits": [ ("Above-the-Fold Essentials", "Intro Hook (Pain)", 40, 60, "Highlight a common and tedious pain point: 'Struggling to create high-quality [documents] from scratch every week? It's exhausting and time-consuming.'"), ("Above-the-Fold Essentials", "Intro Hook (Promise)", 20, 40, "Promise a direct solution: 'Download these [X] battle-tested, ready-to-use templates to cut your creation time in half.'"), ("Above-the-Fold Essentials", "Visual Preview", 20, 40, "Describe a compelling visual of the toolkit, like a thumbnail or GIF showing the templates in action."), ("Above-the-Fold Essentials", "Trust Signals", 20, 40, "Add social proof, like 'Trusted by 5,000+ marketers and founders,' along with the author byline and publication dates."), ("Above-the-Fold Essentials", "Jump Links", 10, 20, "This is a placeholder for the auto-generated Table of Contents."), ("What's Included", "Complete Template List", 100, 200, "Create a numbered list of every template in the kit. For each, provide a one-line description and specify the file format (e.g., 'Template 1: How-To Blog Post Outline (DOCX)')."), ("What's Included", "Bonus Assets & Resources", 60, 100, "List any additional value-adds included, such as checklists, swipe files, or calendar spreadsheets."), ("Why Use Templates", "Speed & Efficiency Gains", 60, 100, "Explain the benefit of speed. Quantify the time savings where possible (e.g., 'Cut drafting time by 60%' or 'Produce content 3x faster')."), ("Why Use Templates", "Consistency & Brand Voice", 60, 100, "Explain the benefit of consistency. How do templates ensure uniform quality and tone, even with multiple creators?"), ("Why Use Templates", "Proven Frameworks That Convert", 60, 100, "Explain that these aren't blank docs but are built on frameworks proven to drive engagement, conversions, and business results."), ("How to Customize", "Customization Guide for Template 1", 100, 150, "Provide a mini step-by-step guide for adapting the first major template. Show a 'before/after' example of a generic vs. customized version."), ("How to Customize", "Customization Guide for Template 2", 100, 150, "Provide step-by-step instructions for adapting the second major template, including where to add brand voice and what to personalize."), ("How to Customize", "Customization Guide for Template 3", 100, 150, "Provide instructions for adapting the third major template, with specific use cases for different industries or goals."), ("Real-World Examples", "Case Study: Company A Success", 80, 120, "Provide a detailed example with metrics: 'How SaaS Startup A used our Listicle Template to increase organic traffic by 40% in 90 days.'"), ("Real-World Examples", "Case Study: Freelancer B Success", 80, 120, "Provide another success story: 'How Freelancer B leveraged our Content Calendar Template to double their client roster.' Include a testimonial quote."), ("Advanced Toolkit Hacks", "Automation with Zapier", 60, 100, "Show how to connect the templates to automation workflows (e.g., auto-populate a calendar from form submissions)."), ("Advanced Toolkit Hacks", "Integration Tips", 60, 100, "Explain how to integrate the templates with popular tools like Google Analytics, Notion, or Airtable."), ("Advanced Toolkit Hacks", "Team Collaboration Best Practices", 60, 100, "Give best practices for using the templates with a distributed team, such as version control and comment workflows."), ("Frequently Asked Questions", "FAQs", 40, 80, faq_prompt), ("Download & Next Steps", "Primary Download CTA", 40, 60, "Write a prominent, action-oriented CTA like 'Download Your Free Templates & Toolkit Now.' This is typically for lead capture."), ("Download & Next Steps", "Secondary Content CTA", 30, 50, "Keep readers engaged with a link to a related pillar content piece, such as 'Check out our advanced guide to content marketing.'"), ("Download & Next Steps", "Social Share Prompt", 20, 40, "Encourage viral sharing with a prompt like 'Found these templates helpful? Share with your network!'"), ], } # ======================== RESEARCH FUNCTIONS ======================== # ... (All your functions from search_web down to the line before WordDocumentGenerator remain here, unchanged) def search_web(query): # This is now a fallback try: search_query = quote_plus(query) url = f"https://api.duckduckgo.com/?q={search_query}&format=json&no_html=1&skip_disambig=1" response = requests.get(url, timeout=10) data = response.json() results = { "query": query, "abstract": data.get("Abstract", ""), "answer": data.get("Answer", ""), "related_topics": [topic.get("Text", "")[:100] for topic in data.get("RelatedTopics", [])[:3] if topic.get("Text")] } return {k: v for k, v in results.items() if v} except Exception as e: return {"error": f"Search failed: {str(e)}"} def search_with_serpapi(query, api_key): """NEW: Search the web using SerpApi for richer, more creative results.""" if not api_key: print("β οΈ SerpApi key not found. Falling back to basic search.") return search_web(query) try: print(f"π¬ Performing advanced research on '{query}' with SerpApi...") params = {"engine": "google", "q": query, "api_key": api_key} search = GoogleSearch(params) results = search.get_dict() output = {"query": query} if "answer_box" in results: output["answer_box"] = results["answer_box"].get("snippet") or results["answer_box"].get("answer") if "organic_results" in results: output["organic_results"] = [ {"title": r.get("title"), "snippet": r.get("snippet")} for r in results["organic_results"][:5] ] if "related_questions" in results: output["related_questions"] = [q.get("question") for q in results["related_questions"]] if "related_searches" in results: output["related_searches"] = [s.get("query") for s in results["related_searches"]] print("β Advanced research complete.") return output except Exception as e: print(f"β SerpApi search failed: {e}. Falling back to basic search.") return search_web(query) def analyze_website(url): try: headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'} response = requests.get(url, headers=headers, timeout=15) soup = BeautifulSoup(response.content, 'html.parser') title = soup.find('title').text.strip() if soup.find('title') else "No title" meta_desc = soup.find('meta', attrs={'name': 'description'}).get('content', '') if soup.find('meta', attrs={'name': 'description'}) else "" for script in soup(["script", "style"]): script.decompose() clean_text = ' '.join(soup.get_text().split()) return {"title": title, "meta_description": meta_desc, "content_preview": clean_text[:1000]} except Exception as e: return {"error": f"Analysis failed: {str(e)}"} # ======================== PARSING FUNCTION ======================== def parse_generated_content(content_text): topics = [] topic_sections = re.split(r'TOPIC \d+:', content_text) for i, section in enumerate(topic_sections): if i == 0: continue topic_data = {'Topic Number': i, 'Content Type': '', 'Headline': '', 'Keywords': '', 'Audience': '', 'Content Angle': '', 'SEO Opportunity': '', 'Generated': datetime.now().strftime('%Y-%m-%d %H:%M:%S')} lines = section.strip().split('\n') if lines: topic_data['Headline'] = lines[0].strip() if keywords_match := re.search(r'keywords?:(.+?)(?=audience|content angle|seo|$)', section, re.IGNORECASE | re.DOTALL): topic_data['Keywords'] = keywords_match.group(1).strip().replace('\n', ' ') if audience_match := re.search(r'audience:(.+?)(?=keywords|content angle|seo|$)', section, re.IGNORECASE | re.DOTALL): topic_data['Audience'] = audience_match.group(1).strip().replace('\n', ' ') if angle_match := re.search(r'content angle:(.+?)(?=keywords|audience|seo|$)', section, re.IGNORECASE | re.DOTALL): topic_data['Content Angle'] = angle_match.group(1).strip().replace('\n', ' ') if seo_match := re.search(r'seo opportunity:(.+?)(?=topic|$)', section, re.IGNORECASE | re.DOTALL): topic_data['SEO Opportunity'] = seo_match.group(1).strip().replace('\n', ' ') topics.append(topic_data) return topics # ======================== IDEA GENERATION (MODIFIED) ======================== def generate_blog_topics(content_type, num_topics): try: website_info = analyze_website("https://www.teamdigiworks.com/") # MODIFICATION: Use SerpApi for primary research serp_results = search_with_serpapi(f"creative blog ideas for {content_type} about remote hiring", SERPAPI_KEY) research_data = f""" **COMPANY CONTEXT:** - Digiworks Website Title: {website_info.get('title', 'N/A')} - Digiworks Description: {website_info.get('meta_description', 'N/A')} - Core Service: Connecting businesses with the top 1% of vetted offshore talent. **ADVANCED MARKET RESEARCH (from SerpApi):** - Main Query: "creative blog ideas for {content_type} about remote hiring" - Top Search Results (Titles & Snippets): {json.dumps(serp_results.get("organic_results", []), indent=2)} - 'People Also Ask' (Common User Questions): {json.dumps(serp_results.get("related_questions", []), indent=2)} - Related Searches (Keyword Ideas): {json.dumps(serp_results.get("related_searches", []), indent=2)} """ content_focus = BLOG_TYPES.get(content_type, "general content") # MODIFICATION: Updated prompt to leverage SerpApi data final_prompt = f""" Based on the rich market research data provided below, generate {num_topics} highly creative and SEO-optimized {content_focus} blog topics for Digiworks. **RESEARCH DATA:** {research_data} **YOUR TASK:** For each of the {num_topics} topics, provide the output in this **EXACT format**: TOPIC 1: [Creative, Engaging Headline] Keywords: [A comma-separated list of 5-7 primary and long-tail keywords inspired by the 'Related Searches' and 'Organic Results'.] Audience: [Specific target business audience, e.g., 'HR Managers at SaaS startups', 'Founders of eCommerce brands'.] Content Angle: [A unique, creative angle for the post. Use the 'People Also Ask' section to address a real user question.] SEO Opportunity: [Briefly explain the search potential. Why would this rank well? What user intent does it capture?] --- (Repeat for TOPIC 2, TOPIC 3, etc.) **INSTRUCTIONS FOR CREATIVITY:** 1. **Headlines:** Do not use generic titles. Make them compelling, benefit-driven, and intriguing. Use the top search results for inspiration on what already ranks, then make yours better. 2. **Keywords:** Go beyond the obvious. Use the 'Related Searches' data to find valuable long-tail keywords. 3. **Content Angle:** This is crucial. Your angle should be unique. Answering a question from the 'People Also Ask' section is a great way to create a highly relevant and valuable post. """ response = client.chat.completions.create(model="gpt-4o", messages=[{"role": "system", "content": "You are a world-class SEO and Content Strategist at Digiworks, specializing in creating viral blog topics."}, {"role": "user", "content": final_prompt}], max_tokens=2000, temperature=0.8) content_text = response.choices[0].message.content topics = parse_generated_content(content_text) for topic in topics: topic['Content Type'] = content_type df = pd.DataFrame(topics) return f"π Successfully generated {len(topics)} {content_type} topics!\n\n{content_text}", df, "β Generation complete!" except Exception as e: return f"β Error: {str(e)}", pd.DataFrame(), f"β Failed: {str(e)}" def generate_all_types(num_topics): all_topics, all_results = [], [] for content_type in BLOG_TYPES.keys(): try: result, df, status = generate_blog_topics(content_type, num_topics) all_results.append(f"\n=== {content_type} ===\n{result}") if not df.empty: all_topics.append(df) time.sleep(3) # Respect API rate limits except Exception as e: all_results.append(f"\n=== {content_type} ===\nβ Error: {str(e)}") if all_topics: combined_df = pd.concat(all_topics, ignore_index=True) return f"π Generated {len(combined_df)} total topics!\n" + "\n".join(all_results), combined_df, "β All types complete!" else: return "β No topics generated", pd.DataFrame(), "β Generation failed" # ======================== INTELLIGENT TEXT PROCESSING (MODIFIED) ======================== def intelligent_word_count_adjustment(text, min_words, max_words, content_context=""): if not text: return text current_words = len(text.split()) if min_words <= current_words <= max_words: return text print(f" π§ Adjusting: {current_words} β {min_words}-{max_words} words") doc = nlp(text) if current_words > max_words: return reduce_content_intelligently(list(doc.sents), max_words) else: return expand_content_intelligently(text, min_words, content_context) def reduce_content_intelligently(sentences, target_words): sentence_scores = [] for i, sent in enumerate(sentences): score, sentence_text, words = 0, sent.text.strip(), len(sent.text.split()) if i == 0 or i == len(sentences) - 1: score += 10 elif i == 1 or i == len(sentences) - 2: score += 5 if 8 <= words <= 25: score += 5 elif words < 5: score -= 3 for term in ['digiworks', 'remote', 'talent', 'business', 'team', 'scale', 'hire', 'cost', 'efficiency', 'productivity']: if term.lower() in sentence_text.lower(): score += 3 if re.search(r'\d+%|\d+\+|\$\d+|\d+x|\d+:', sentence_text): score += 4 for word in ['implement', 'achieve', 'optimize', 'improve', 'increase', 'reduce', 'enhance', 'build', 'create']: if word.lower() in sentence_text.lower(): score += 2 sentence_scores.append((sentence_text, score, words)) sentence_scores.sort(key=lambda x: x[1], reverse=True) reduced_text, current_word_count = "", 0 for sentence, score, word_count in sentence_scores: if current_word_count + word_count <= target_words: reduced_text += sentence + " "; current_word_count += word_count else: break # --- FIX: Add fallback to prevent returning 0 words --- if not reduced_text.strip() and sentences: original_text = " ".join([s.text for s in sentences]) return ' '.join(original_text.split()[:target_words]) return reduced_text.strip() def expand_content_intelligently(text, target_words, content_context): expanded_text = text digiworks_expansions = {'remote team': 'high-performing remote team from our vetted talent pool', 'virtual assistant': 'skilled virtual assistant from emerging markets', 'talent': 'top 1% vetted talent', 'hire': 'strategically hire and onboard', 'scale': 'scale efficiently and cost-effectively', 'business': 'forward-thinking business', 'team': 'distributed global team', 'cost': 'operational cost savings of up to 60%', 'efficiency': 'operational efficiency', 'Digiworks': 'Digiworks, the leading marketplace for elite offshore talent,'} for short, long in digiworks_expansions.items(): if short in expanded_text and len(expanded_text.split()) < target_words: expanded_text = expanded_text.replace(short, long, 1) if len(expanded_text.split()) < target_words: context_additions = {'how-to': " This approach has been tested across hundreds of client implementations.", 'listicle': " Each option has been evaluated based on real client feedback and ROI.", 'comparison': " Our analysis is based on extensive market research and direct client experience.", 'case study': " This success story represents one of many similar transformations.", 'trends': " These insights are drawn from our proprietary data and market analysis.", 'founder': " This perspective comes from years of building and scaling global remote teams."} for context_type, addition in context_additions.items(): if context_type in content_context.lower() and len(expanded_text.split()) < target_words: expanded_text += addition; break return expanded_text # ======================== OPENAI GENERATION (v1.0.0+) ======================== def generate_with_openai_intelligent(prompt, min_words, max_words, content_context="", delay=8, existing_sections=None): enhanced_prompt = f"You are a senior content strategist at Digiworks, the world's leading marketplace for top 1% offshore talent. You have deep expertise in remote work, global talent acquisition, and business scaling strategies.\nDIGIWORKS BRAND VOICE:\n- Authoritative and data-driven\n- Focus on ROI and business outcomes\n- Emphasize quality, vetting, and elite talent\n- Professional but approachable tone\n- Always position Digiworks as the solution\n{prompt}\nWORD COUNT TARGET: {min_words}-{max_words} words\nWrite from the expert perspective of the Digiworks team. Include specific examples, actionable insights, and maintain our brand voice throughout.\nGenerate the content now:" try: print(f" π€ Generating with OpenAI... (target: {min_words}-{max_words} words)") response = client.chat.completions.create(model="gpt-4o", messages=[{"role": "system", "content": "You are a senior content strategist at Digiworks, the world's leading marketplace for top 1% offshore talent."}, {"role": "user", "content": enhanced_prompt}], max_tokens=1500, temperature=0.7) content = response.choices[0].message.content.strip() adjusted_content = intelligent_word_count_adjustment(content, min_words, max_words, content_context) final_word_count = len(adjusted_content.split()) print(f" β Generated & adjusted: {final_word_count} words") print(f" β³ Waiting {delay} seconds...") time.sleep(delay) return adjusted_content except Exception as e: print(f" β OpenAI API error: {str(e)}") return None # ======================== BLOG GENERATOR CLASS (MODIFIED) ======================== class BlogGenerator: def __init__(self): self.delay_between_sections = 8 self.global_topic_counter = 0 def generate_blog_from_topic_data(self, topic_data, topic_index=None): if topic_index is None: self.global_topic_counter += 1; topic_index = self.global_topic_counter topic = topic_data.get('Headline', '') content_type = topic_data.get('Content Type', '').replace(' π', '').replace(' π', '').replace(' βοΈ', '').replace(' π', '').replace(' π', '').replace(' π‘', '') if content_type not in BLOG_STRUCTURES: yield ('result', None, f"Invalid content type for topic: {topic}"); return structure = BLOG_STRUCTURES[content_type] yield ('status', f"π― Starting Topic #{topic_index}: {topic} ({len(structure)} sections)") print(f"\nπ Generating Topic #{topic_index} - '{content_type}' blog: {topic}\nπ Structure has {len(structure)} sections") blog_context = f""" TOPIC NUMBER: {topic_index} BLOG TOPIC: {topic} CONTENT TYPE: {content_type} TARGET KEYWORDS: {topic_data.get('Keywords', '')} TARGET AUDIENCE: {topic_data.get('Audience', '')} CONTENT ANGLE: {topic_data.get('Content Angle', '')} SEO OPPORTUNITY: {topic_data.get('SEO Opportunity', '')} DIGIWORKS CONTEXT: - Company: Digiworks (global remote talent marketplace) - Mission: Connect businesses with top 1% offshore talent - Value: Cost-effective, vetted, scalable remote teams - Positioning: Premium, reliable, results-driven solution """ sections_data = [] for i, (section_group, section_name, min_words, max_words, purpose) in enumerate(structure): yield ('status', f"π Topic #{topic_index} | Section {i+1}/{len(structure)}: Generating '{section_name}'...") section_content = "" if section_name.lower() in ["jump links", "table of contents"]: section_content = "[TOC_PLACEHOLDER]" print(f"\n π Placeholder for: {section_name}") else: print(f"\n π Generating: {section_name} ({min_words}-{max_words} words)") temp_purpose = purpose.replace("[Date]", datetime.now().strftime('%B %d, %Y')) item_match = re.match(r'Item (\d+):', section_name) if content_type == "Listicle" and item_match: item_number = int(item_match.group(1)) if 1 <= item_number <= len(LISTICLE_ASPECTS): aspect = LISTICLE_ASPECTS[item_number - 1] temp_purpose = ( f"The overall blog topic is '{topic}'. For this specific list item (Item #{item_number}), " f"you must focus exclusively on the aspect of: **{aspect}**. " "Write a compelling title for this item related to this specific aspect, and then write the body content. " "Do not repeat content from other aspects." ) # --- NEW: Context-Aware Content Generation --- previous_content_summary = "" if sections_data: previous_content_summary += "\n\n--- PREVIOUSLY GENERATED SECTIONS FOR CONTEXT ---\n" for prev_section in sections_data: if not prev_section['Section Content'].startswith("["): # Don't include placeholders previous_content_summary += f"\n## {prev_section['Section Name']}\n{prev_section['Section Content']}\n" section_prompt = f"""{blog_context} {previous_content_summary} --- YOUR CURRENT TASK --- Now, write the "{section_name}" section for this {content_type} blog post. Section Group: {section_group} Section Purpose: {temp_purpose} Requirements: - Write {min_words}-{max_words} words. - **Crucially, ensure the content flows logically from the previous sections provided above.** - **Do not repeat information that has already been covered.** - Address the target audience: {topic_data.get('Audience', '')} - Incorporate keywords naturally: {topic_data.get('Keywords', '')} - Position Digiworks as the premier solution and be data-driven. Write only the section content, no headers:""" section_content = generate_with_openai_intelligent( section_prompt, min_words, max_words, f"{content_type} {section_name}", self.delay_between_sections, sections_data ) if section_content: sections_data.append({'Topic Number': topic_index, 'Blog Topic': topic, 'Content Type': content_type, 'Section Group': section_group, 'Section Name': section_name, 'Section Content': section_content, 'Word Count': len(section_content.split()), 'Target Range': f"{min_words}-{max_words}", 'Generated At': datetime.now().strftime('%Y-%m-%d %H:%M:%S')}) print(f" β Success: {len(section_content.split())} words") else: sections_data.append({'Topic Number': topic_index, 'Blog Topic': topic, 'Content Type': content_type, 'Section Group': section_group, 'Section Name': section_name, 'Section Content': f"[FAILED TO GENERATE - {section_name}]", 'Word Count': 0, 'Target Range': f"{min_words}-{max_words}", 'Generated At': datetime.now().strftime('%Y-%m-%d %H:%M:%S')}) print(f" β Failed to generate") total_words, successful_sections = sum(s['Word Count'] for s in sections_data), len([s for s in sections_data if s['Word Count'] > 0]) yield ('result', sections_data, f"β Topic #{topic_index} complete: Generated {successful_sections}/{len(structure)} sections ({total_words} words)") def reset_counter(self): self.global_topic_counter = 0 # ======================== EXCEL EXPORT ======================== def create_excel_export(all_sections_data): if not all_sections_data: return None df = pd.DataFrame(all_sections_data) cols = df.columns.tolist() if 'Topic Number' in cols: cols.insert(0, cols.pop(cols.index('Topic Number'))) df = df[cols] filename = f"digiworks_indexed_blog_sections_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx" temp_path = os.path.join(tempfile.gettempdir(), filename) with pd.ExcelWriter(temp_path, engine='openpyxl') as writer: df.to_excel(writer, sheet_name='Indexed Blog Sections', index=False) worksheet = writer.sheets['Indexed Blog Sections'] for column in worksheet.columns: max_length = 0 for cell in column: try: if len(str(cell.value)) > max_length: max_length = len(str(cell.value)) except: pass worksheet.column_dimensions[column[0].column_letter].width = min(max_length + 2, 100) return temp_path # ======================== HELPER FUNCTIONS FOR TABLE GENERATION (MODIFIED) ======================== def refine_table_content_with_openai(item_details): if not item_details: return [] try: initial_json = json.dumps(item_details, indent=2) prompt = f""" You are an expert marketing copywriter. Below is a JSON array of summarized trends for a comparison table. The summaries in the "ideal_for" and "business_benefit" fields are repetitive. Your task is to revise them to be more unique, vibrant, and distinct from each other, while staying true to the original meaning. RULES: 1. Do NOT change the "name" of the trend. 2. For "ideal_for", rephrase to be more specific and varied. Avoid using the same phrase (like "Enterprise HR Departments") repeatedly. Think of different user segments (e.g., "High-Growth Startups," "Global Tech Firms," "Boutique Agencies," "Cost-Conscious Founders"). 3. For "business_benefit", rephrase each benefit to be unique. If two items have a similar benefit like "Saves Time", rephrase them differently (e.g., "Accelerates Project Timelines" vs. "Reduces Manual Task Hours"). 4. Return the output as a single, valid JSON array of objects, with the same structure as the input. INPUT DATA: {initial_json} Return only the revised JSON array and nothing else. """ response = client.chat.completions.create(model="gpt-4o", messages=[{"role": "system", "content": "You are a creative copywriter who refines JSON data to eliminate repetition and add marketing flair."}, {"role": "user", "content": prompt}], temperature=0.75) response_text = response.choices[0].message.content.strip() if response_text.startswith("```json"): response_text = response_text[7:].strip() if response_text.endswith("```"): response_text = response_text[:-3].strip() refined_details = json.loads(response_text) print("β Successfully refined table content for creativity.") return refined_details except Exception as e: print(f"β Failed to refine listicle table content via OpenAI: {e}. Using original data.") return item_details def generate_listicle_table_from_items(item_details): if not item_details: return "" print("π€ Calling OpenAI to format the final clean listicle table...") item_summary = "\n".join([f"- Item Name: {item.get('name', 'N/A')}\n - Ideal For: {item.get('ideal_for', 'N/A')}\n - Benefit: {item.get('business_benefit', 'N/A')}" for item in item_details]) prompt = f""" Based on the following item summaries, create a single, clean, valid markdown table. The table must have exactly these columns in this order: | Trend / Tool | Ideal For | Key Business Benefit | Do not add any text, explanations, or introductions before or after the table. Only output the markdown table itself. Item Summaries to format into the table: {item_summary} """ try: response = client.chat.completions.create(model="gpt-4o", messages=[{"role": "system", "content": "You are a data formatting assistant that specializes in creating clean, professional markdown tables."}, {"role": "user", "content": prompt}], temperature=0.1) table_content = response.choices[0].message.content.strip() print("β Successfully generated clean markdown table.") return table_content except Exception as e: print(f"β Failed to generate listicle table via OpenAI: {e}") return "| Trend / Tool | Ideal For | Key Business Benefit |\n|---|---|---|\n| Table Generation Failed | - | - |" # ======================== TABLE PARSERS ======================== def parse_listicle_table(text): if not text or pd.isna(text): return [], [] lines = [line.strip() for line in str(text).strip().split('\n') if line.strip()] table_lines = [line for line in lines if '|' in line and not line.strip().startswith('|---')] if len(table_lines) < 2: return [], [] headers = [h.strip() for h in table_lines[0].strip('|').split('|')] rows = [] for line in table_lines[1:]: cells = [c.strip() for c in line.strip('|').split('|')] if len(cells) == len(headers): rows.append(cells) return headers, rows def parse_comparison_table(text): if not text or pd.isna(text): return "Comparison", [] lines = [line.strip() for line in str(text).strip().split('\n') if line.strip()] header_text, table_lines, found_header = "Comparison", [], False for line in lines: if '|' not in line or line.startswith('|---'): continue if not found_header and '| metric |' in line.lower().replace('*', ''): if len(cells := [c.strip().replace('*', '') for c in line.strip('|').split('|')]) >= 2: header_text = cells[1] found_header = True continue table_lines.append(line) rows = [] for line in table_lines: if '|' in line: parts = line.strip('|').split('|') if len(parts) >= 2: metric, comparison_data = parts[0].strip().replace('**', ''), '|'.join(parts[1:]).strip() comp_parts = [p.strip() for p in comparison_data.split(r'\|')] if r'\|' in comparison_data else [p.strip() for p in comparison_data.split('|', 1)] if len(comp_parts) == 2: rows.append([metric, comp_parts[0], comp_parts[1]]) elif len(comp_parts) == 1: rows.append([metric, comp_parts[0], '']) return header_text, rows # ======================== WORD DOCUMENT FORMATTER (MODIFIED) ======================== def clean_blog_content(text): if not text or pd.isna(text): return "" text = str(text).replace('`', '') text = re.sub(r'^\s*-\s+', 'β’ ', text, flags=re.MULTILINE) text = re.sub(r'^\s*\+\s+', 'β’ ', text, flags=re.MULTILINE) text = re.sub(r'^\s*(\d+)\.\s+', r'\1. ', text, flags=re.MULTILINE) text = re.sub(r'\n{3,}', '\n\n', text) text = text.strip().replace('DigiWorks', 'Digiworks').replace('[Client Company]', 'TechStart Solutions') return text # --- MODIFIED: More robust FAQ formatting function --- def format_faq_content(text): """ Takes raw, potentially messy FAQ text from the AI and enforces a clean 'Q. ...\nA. ...\n\n' structure using regex. This version is more robust against malformed AI output. """ if not text or pd.isna(text): return "" # Pre-process by forcing newlines before any "Q." or "A." that doesn't have one. # This separates items that the AI might have put on the same line. text_with_newlines = re.sub(r'\s*(Q\.)', r'\n\n\1', str(text).strip()) text_with_newlines = re.sub(r'\s*(A\.)', r'\n\1', text_with_newlines) lines = [line.strip() for line in text_with_newlines.split('\n') if line.strip()] qa_pairs = [] current_q = None for line in lines: if line.startswith("Q."): # If we find a new question while another is pending, the old one is discarded. # This handles cases of two Q's in a row. current_q = line[2:].strip() elif line.startswith("A.") and current_q: # If we find an answer AND we have a question pending, we form a pair. answer = line[2:].strip() qa_pairs.append(f"Q. {current_q}\nA. {answer}") current_q = None # Reset to find the next question return "\n\n".join(qa_pairs) class WordDocumentGenerator: def __init__(self): self.doc = Document() self.setup_styles() self.bookmark_id = 0 self.ai_personas = cycle([ "As a CFO, focus on the financial impact. What is the core ROI or cost-saving benefit?", "As a COO, focus on operational efficiency. How does this streamline workflows or improve processes?", "As a Head of HR, focus on talent and culture. How does this improve candidate quality or team building?", "As a Futurist, focus on the long-term competitive advantage. How does this future-proof the business?" ]) def setup_styles(self): styles = self.doc.styles try: if 'BlogTitle' not in styles: title_style = styles.add_style('BlogTitle', WD_STYLE_TYPE.PARAGRAPH) tf = title_style.font; tf.name = 'Arial'; tf.size = Pt(20); tf.bold = True tp = title_style.paragraph_format; tp.alignment = WD_ALIGN_PARAGRAPH.CENTER; tp.space_after = Pt(18) except Exception: pass try: if 'SectionHeader' not in styles: header_style = styles.add_style('SectionHeader', WD_STYLE_TYPE.PARAGRAPH) hf = header_style.font; hf.name = 'Arial'; hf.size = Pt(14); hf.bold = True hp = header_style.paragraph_format; hp.space_before = Pt(12); hp.space_after = Pt(6) except Exception: pass try: if 'BlogContent' not in styles: content_style = styles.add_style('BlogContent', WD_STYLE_TYPE.PARAGRAPH) cf = content_style.font; cf.name = 'Calibri'; cf.size = Pt(11) cp = content_style.paragraph_format; cp.space_after = Pt(6); cp.line_spacing = 1.15 except Exception: pass def add_bookmark(self, paragraph, bookmark_name): run = paragraph.runs[0] if paragraph.runs else paragraph.add_run() tag = run._r start = OxmlElement('w:bookmarkStart'); start.set(qn('w:id'), str(self.bookmark_id)); start.set(qn('w:name'), bookmark_name) tag.addprevious(start) end = OxmlElement('w:bookmarkEnd'); end.set(qn('w:id'), str(self.bookmark_id)) tag.addnext(end) self.bookmark_id += 1 def add_internal_hyperlink(self, paragraph, text, anchor_name): hyperlink = OxmlElement('w:hyperlink'); hyperlink.set(qn('w:anchor'), anchor_name) sub_run = OxmlElement('w:r'); text_el = OxmlElement('w:t'); text_el.text = text; sub_run.append(text_el) r_pr = OxmlElement('w:rPr'); style = OxmlElement('w:rStyle'); style.set(qn('w:val'), 'Hyperlink'); r_pr.append(style); sub_run.append(r_pr) hyperlink.append(sub_run); paragraph._p.append(hyperlink) def add_listicle_table(self, headers, table_data): if not table_data or not headers or not (num_cols := len(headers)): return table = self.doc.add_table(rows=1, cols=num_cols, style='TableGrid') hdr_cells = table.rows[0].cells for i, header_text in enumerate(headers): cell = hdr_cells[i]; cell.text = header_text for para in cell.paragraphs: para.alignment = WD_ALIGN_PARAGRAPH.CENTER for run in para.runs: run.bold = True for row_data in table_data: row_cells = table.add_row().cells for i, cell_text in enumerate(row_data): row_cells[i].text = cell_text col_width = Inches(6.5 / num_cols) if num_cols > 0 else Inches(1) for col in table.columns: col.width = col_width def add_comparison_table(self, header_text, table_data): if not table_data: return option_a_title, option_b_title = "Option A", "Option B" if " vs. " in header_text.lower(): if len(parts := re.split(r'\s+vs\.\s+', header_text, 1, re.IGNORECASE)) == 2: option_a_title, option_b_title = parts[0].strip(), parts[1].split('(')[0].strip() table = self.doc.add_table(rows=len(table_data) + 2, cols=3, style='TableGrid') table.autofit = False; table.allow_autofit = False table.columns[0].width = Inches(1.5); table.columns[1].width = Inches(2.5); table.columns[2].width = Inches(2.5) title_cell = table.cell(0, 0).merge(table.cell(0, 2)); title_cell.text = header_text; title_cell.paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER table.cell(1, 0).text = "Metric"; table.cell(1, 1).text = option_a_title; table.cell(1, 2).text = option_b_title for i, row_data in enumerate(table_data): for j, cell_data in enumerate(row_data): table.rows[i + 2].cells[j].text = cell_data for row_idx, row in enumerate(table.rows): for col_idx, cell in enumerate(row.cells): try: for paragraph in cell.paragraphs: paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER if row_idx < 2 else (WD_ALIGN_PARAGRAPH.LEFT if col_idx > 0 else WD_ALIGN_PARAGRAPH.CENTER) for run in paragraph.runs: run.font.name = 'Calibri'; run.font.size = Pt(10) if row_idx < 2: run.bold = True except Exception: pass def _extract_item_details(self, content, persona_prompt): default_details = {'name': 'Extraction Failed', 'ideal_for': 'N/A', 'business_benefit': 'N/A'} if lines := content.split('\n'): if name_match := re.search(r'Item \d+:\s*(.*)', lines[0]): default_details['name'] = name_match.group(1).strip() try: prompt = f""" As a business strategist, analyze the following paragraph. Your task is to extract key strategic information and return it as a single, valid JSON object with ONLY the keys "name", "ideal_for", and "business_benefit". Your Persona: {persona_prompt} - "name": The full, official name of the trend or tool. - "ideal_for": Based on your persona, who is the primary beneficiary? Be specific (e.g., "Fast-Growing SaaS Startups", "Enterprise HR Departments", "B2C Marketing Teams"). - "business_benefit": From your persona's viewpoint, what is the most impactful, tangible business outcome? Be concise and use action-oriented language (e.g., "Slashes recruitment overhead", "Boosts operational throughput", "Enhances employer branding", "Secures long-term market leadership"). Paragraph to Analyze: --- {content} --- Return only the raw JSON object. Be creative and insightful based on your assigned persona. """ response = client.chat.completions.create(model="gpt-4o", messages=[{"role": "system", "content": "You are a sharp business strategist providing concise, insightful analysis in JSON format."}, {"role": "user", "content": prompt}], temperature=0.7) response_text = response.choices[0].message.content.strip() if response_text.startswith("```json"): response_text = response_text[7:].strip() if response_text.endswith("```"): response_text = response_text[:-3].strip() details = json.loads(response_text) return details except Exception as e: print(f" β AI extraction failed for item '{default_details['name']}': {e}.") return default_details def add_blog_topic(self, topic_number, topic_title, sections_df, content_type): self.doc.add_paragraph(f"Topic #{topic_number}: {topic_title}", style='BlogTitle') self.doc.add_paragraph("=" * 80).alignment = WD_ALIGN_PARAGRAPH.CENTER listicle_items_data = [] if content_type == "Listicle": item_sections = sections_df[sections_df['Section Name'].str.startswith('Item', na=False)] for _, item_row in item_sections.iterrows(): persona = next(self.ai_personas) yield f" -> Analyzing Item: '{item_row['Section Content'][:40]}...' (as {persona.split(',')[0]})" content = item_row['Section Content'] # Extract the actual generated title from the first line of the content generated_title = content.split('\n')[0].strip() if content else 'Untitled Item' # Get the analysis for 'ideal_for' and 'business_benefit' from the AI details = self._extract_item_details(content, persona) # OVERRIDE the name with the actual generated title to ensure consistency details['name'] = generated_title listicle_items_data.append(details) toc_items = [] non_toc_groups = ["Intro Hook", "Above-the-Fold Essentials", "Author Details", "Conclusion & Next Steps", "Table of Contents"] for _, section in sections_df.iterrows(): group = section.get('Section Group', 'General') original_section_name = section.get('Section Name', '') section_content = str(section.get('Section Content', '')) if group not in non_toc_groups and original_section_name: display_title = original_section_name if '[Actionable Step Title]' in original_section_name or '[Trend Name]' in original_section_name or ('Item' in original_section_name and '[Name]' in original_section_name): first_line = section_content.split('\n')[0].strip() if first_line: display_title = first_line elif '[Topic]' in display_title: display_title = display_title.replace('[Topic]', topic_title) bookmark_name = f"topic{topic_number}_{re.sub('[^A-Za-z0-9]+', '', original_section_name)}_{len(toc_items)}" toc_items.append((display_title, bookmark_name, original_section_name)) grouped_sections = {} for _, section in sections_df.iterrows(): if (group := section.get('Section Group', 'General')) not in grouped_sections: grouped_sections[group] = [] grouped_sections[group].append(section) for group_name, sections in grouped_sections.items(): if len(grouped_sections) > 1: group_para = self.doc.add_paragraph(); group_run = group_para.add_run(f"\n{group_name}") group_run.font.name = 'Arial'; group_run.font.size = Pt(16); group_run.bold = True; group_run.underline = True for section in sections: section_name, section_content, word_count = section.get('Section Name', 'Untitled'), section.get('Section Content', ''), section.get('Word Count', 0) if section_name.lower() in ["jump links", "table of contents"]: header_para = self.doc.add_paragraph(style='SectionHeader'); header_para.add_run("Table of Contents") for text, bookmark, _ in toc_items: p = self.doc.add_paragraph(style='List Bullet'); self.add_internal_hyperlink(p, text, bookmark) continue header_para = self.doc.add_paragraph(style='SectionHeader'); header_para.add_run(f"{section_name}") count_run = header_para.add_run(f" ({word_count} words)"); count_run.font.name = 'Arial'; count_run.font.size = Pt(10); count_run.italic = True for _, bookmark, original_name in toc_items: if original_name == section_name: self.add_bookmark(header_para, bookmark); break if section_name == "At-a-Glance Comparison" and content_type == "Listicle": if listicle_items_data: yield " -> π¨ Refining table content for creativity and uniqueness..." refined_data = refine_table_content_with_openai(listicle_items_data) yield " -> π Formatting final summary table..." clean_table_md = generate_listicle_table_from_items(refined_data) headers, data = parse_listicle_table(clean_table_md) if data: self.add_listicle_table(headers, data); self.doc.add_paragraph() else: self.doc.add_paragraph("[Could not generate or parse the listicle table.]", style='BlogContent') else: self.doc.add_paragraph("[No listicle items found to build a table.]", style='BlogContent') continue cleaned_content = clean_blog_content(section_content) # --- MODIFIED: Apply special FAQ formatting function --- if section_name == "FAQs": cleaned_content = format_faq_content(cleaned_content) if section_name == "At-a-Glance Matrix" and "Comparison" in content_type: header, data = parse_comparison_table(cleaned_content) if data: self.add_comparison_table(header, data); self.doc.add_paragraph(); continue for para_text in cleaned_content.split('\n'): if not (para_text := para_text.strip()): continue if para_text.startswith('β’ '): self.doc.add_paragraph(para_text[2:], style='List Bullet') elif re.match(r'^\d+\.\s', para_text): self.doc.add_paragraph(re.sub(r'^\d+\.\s', '', para_text), style='List Number') elif para_text.startswith("###"): p = self.doc.add_paragraph() p.add_run(para_text.replace("###", "").strip()).bold = True elif para_text.startswith("Q.") or para_text.startswith("A."): p = self.doc.add_paragraph() parts = para_text.split('.', 1) p.add_run(f"{parts[0]}.").bold = True if len(parts) > 1: p.add_run(parts[1]) else: p = self.doc.add_paragraph(style='BlogContent') for part in re.split(r'(\*\*.*?\*\*)', para_text): if part.startswith('**') and part.endswith('**'): p.add_run(part[2:-2]).bold = True else: p.add_run(part) if self.doc.paragraphs and self.doc.paragraphs[-1].text == "": self.doc.paragraphs[-1]._element.getparent().remove(self.doc.paragraphs[-1]._element) self.doc.add_page_break() def save_document(self, filename): self.doc.save(filename) # ======================== WORD PROCESSING (MODIFIED) ======================== def process_excel_to_word(excel_file_path): try: yield ('status', "β Excel file loaded. Reading data...") df = pd.read_excel(excel_file_path) required_columns = ['Topic Number', 'Blog Topic', 'Content Type', 'Section Name', 'Section Content'] if missing_cols := [col for col in required_columns if col not in df.columns]: yield ('error', f"Missing required columns: {', '.join(missing_cols)}"); return yield ('status', "π Initializing Word document...") doc_generator = WordDocumentGenerator() header_para = doc_generator.doc.add_paragraph(); hr = header_para.add_run("Digiworks Blog Content Collection") hr.font.name = 'Arial'; hr.font.size = Pt(24); hr.bold = True; header_para.alignment = WD_ALIGN_PARAGRAPH.CENTER date_para = doc_generator.doc.add_paragraph(); dr = date_para.add_run(f"Generated on: {datetime.now().strftime('%B %d, %Y at %I:%M %p')}") dr.font.name = 'Arial'; dr.font.size = Pt(12); dr.italic = True; date_para.alignment = WD_ALIGN_PARAGRAPH.CENTER doc_generator.doc.add_paragraph("\n") topics = df.groupby('Topic Number') processed_topics, total_topics = 0, len(topics) for topic_number, topic_sections in topics: processed_topics += 1 topic_title, content_type = topic_sections.iloc[0]['Blog Topic'], topic_sections.iloc[0]['Content Type'] yield ('status', f"βοΈ Building Topic {processed_topics}/{total_topics}: '{topic_title[:40]}...'") yield from doc_generator.add_blog_topic(topic_number, topic_title, topic_sections, content_type) yield ('status', "π Saving final Word document...") if doc_generator.doc.paragraphs and doc_generator.doc.paragraphs[-1].text == "": doc_generator.doc.paragraphs[-1]._element.getparent().remove(doc_generator.doc.paragraphs[-1]._element) filename = f"digiworks_formatted_blogs_{datetime.now().strftime('%Y%m%d_%H%M%S')}.docx" temp_path = os.path.join(tempfile.gettempdir(), filename) doc_generator.save_document(temp_path) total_sections, total_words = len(df), df['Word Count'].sum() if 'Word Count' in df.columns else 0 success_message = f"β Word document created successfully!\nπ Processed {processed_topics} blog topics\nπ Total sections: {total_sections}\nπ¬ Total words: {total_words:,}\nπ File: {filename}" yield ('complete', temp_path, success_message) except Exception as e: import traceback yield ('error', f"β Error processing file: {str(e)}\n\nTraceback:\n{traceback.format_exc()}") # ======================== GRADIO INTERFACE ======================== def create_interface(): generator = BlogGenerator() # MODIFIED: All event handlers and listeners are now correctly indented inside this "with" block. with gr.Blocks(title="π― Digiworks Unified Blog Generator", theme=gr.themes.Soft()) as interface: gr.HTML("""
Idea Generation β Blog Post Creation β Word Document Formatting
π― Digiworks Unified Blog Generator | Idea Generation β Blog Post Creation β Word Document Formatting
Perfect for content teams and marketers