Spaces:
Runtime error
Runtime error
| import os | |
| import re | |
| def parse_markdown_files(file_paths): | |
| """ | |
| Parses markdown files to extract content for translation. | |
| :param file_paths: List of markdown file paths | |
| :return: List of dictionaries containing filename and content | |
| """ | |
| parsed_files = [] | |
| for path in file_paths: | |
| with open(path, 'r', encoding='utf-8') as f: | |
| content = f.read() | |
| parsed_files.append({'filename': path, 'content': content}) | |
| return parsed_files | |
| def extract_translatable_text(content): | |
| """ | |
| Extracts translatable text from markdown content. | |
| :param content: Markdown content | |
| :return: List of translatable text segments | |
| """ | |
| code_block_pattern = re.compile(r'```.*?```', re.DOTALL) | |
| html_block_pattern = re.compile(r'<.*?>', re.DOTALL) | |
| url_pattern = re.compile(r'\[.*?\]\(.*?\)') | |
| # Remove code blocks, HTML blocks, and URLs | |
| content = re.sub(code_block_pattern, '', content) | |
| content = re.sub(html_block_pattern, '', content) | |
| content = re.sub(url_pattern, '', content) | |
| # Extract paragraphs and headers | |
| paragraphs = re.split(r'\n\s*\n', content) | |
| return [para.strip() for para in paragraphs if para.strip()] | |
| def save_translated_files(translated_files): | |
| """ | |
| Saves translated files to the local machine. | |
| :param translated_files: List of translated file data | |
| """ | |
| for file in translated_files: | |
| directory = os.path.dirname(file['filename']) | |
| if not os.path.exists(directory): | |
| os.makedirs(directory) | |
| with open(file['filename'], 'w', encoding='utf-8') as f: | |
| f.write(file['content']) | |