| |
| """ |
| Sync BitTransformerLM repository to HuggingFace Hub for OS launch. |
| Uploads all cleaned documentation and code with proper commit message. |
| """ |
|
|
| import os |
| import logging |
| from pathlib import Path |
| from huggingface_hub import HfApi, login |
| from typing import Optional, List |
|
|
| |
| logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') |
| logger = logging.getLogger(__name__) |
|
|
| def sync_repository_to_hf( |
| repo_id: str = "WCNegentropy/BitTransformerLM", |
| token: Optional[str] = None, |
| commit_message: str = "π OS Launch: Clean documentation and refined licensing" |
| ): |
| """ |
| Sync the entire cleaned BitTransformerLM repository to HuggingFace Hub. |
| |
| Args: |
| repo_id: HuggingFace repository ID |
| token: HF token (defaults to HF_TOKEN environment variable) |
| commit_message: Commit message for the upload |
| """ |
| |
| |
| if token is None: |
| token = os.environ.get('HF_TOKEN') |
| if not token: |
| logger.error("HF_TOKEN environment variable not set and no token provided") |
| return False |
| |
| try: |
| |
| login(token=token) |
| api = HfApi() |
| logger.info("Successfully authenticated with HuggingFace Hub") |
| |
| |
| repo_root = Path(__file__).parent |
| logger.info(f"Repository root: {repo_root}") |
| |
| |
| include_patterns = [ |
| |
| "bit_transformer/**/*.py", |
| "tests/**/*.py", |
| "*.py", |
| |
| |
| "README.md", |
| "MODEL_CARD.md", |
| "RESEARCH_STATUS.md", |
| "EMPIRICAL_VALIDATION.md", |
| "OPEN_SOURCE_LAUNCH.md", |
| "AGENTS.md", |
| |
| |
| "requirements.txt", |
| "pyproject.toml", |
| "Dockerfile", |
| "start.sh", |
| |
| |
| "LICENSE/**/*.txt", |
| ] |
| |
| |
| exclude_patterns = [ |
| "__pycache__/**", |
| "*.pyc", |
| ".git/**", |
| ".pytest_cache/**", |
| "weights/**", |
| "checkpoints/**", |
| "*.log", |
| |
| "BitTransformerLM_full_assessment.md", |
| "FORENSIC_*.md", |
| "state_of_the_repo_audit.md", |
| |
| "upload_to_hf.py", |
| ] |
| |
| |
| files_to_upload = [] |
| for pattern in include_patterns: |
| for file_path in repo_root.glob(pattern): |
| if file_path.is_file(): |
| |
| relative_path = file_path.relative_to(repo_root) |
| should_exclude = any( |
| relative_path.match(exclude) |
| for exclude in exclude_patterns |
| ) |
| if not should_exclude: |
| files_to_upload.append(file_path) |
| |
| logger.info(f"Found {len(files_to_upload)} files to upload") |
| |
| |
| uploaded_count = 0 |
| for file_path in files_to_upload: |
| try: |
| relative_path = file_path.relative_to(repo_root) |
| logger.info(f"Uploading: {relative_path}") |
| |
| api.upload_file( |
| path_or_fileobj=str(file_path), |
| path_in_repo=str(relative_path), |
| repo_id=repo_id, |
| repo_type="model", |
| commit_message=commit_message, |
| commit_description=""" |
| This OS launch commit includes: |
| |
| β
**Cleaned Documentation** |
| - Removed inflated claims and marketing language |
| - Added honest research status and limitations |
| - Created professional model card and validation reports |
| - Streamlined licensing to AGPLv3 + commercial contact |
| |
| β
**Refined Codebase** |
| - Complete experimental bit-native transformer implementation |
| - 57 Python files with comprehensive research framework |
| - Safety telemetry and monitoring systems |
| - Distributed training and development tools |
| |
| β
**Professional Standards** |
| - Empirical validation of all claims |
| - Clear experimental vs production distinctions |
| - Rigorous research methodology requirements |
| - Community contribution framework |
| |
| Ready for serious research evaluation and academic investigation. |
| """.strip() |
| ) |
| |
| uploaded_count += 1 |
| if uploaded_count % 10 == 0: |
| logger.info(f"Progress: {uploaded_count}/{len(files_to_upload)} files uploaded") |
| |
| except Exception as e: |
| logger.warning(f"Failed to upload {relative_path}: {e}") |
| continue |
| |
| logger.info(f"β
Successfully uploaded {uploaded_count}/{len(files_to_upload)} files") |
| logger.info(f"π Repository synced to: https://huggingface.co/{repo_id}") |
| |
| return True |
| |
| except Exception as e: |
| logger.error(f"β Failed to sync repository: {e}") |
| return False |
|
|
| def create_release_info(): |
| """Create a release information file for the OS launch.""" |
| release_info = """# BitTransformerLM v0.1.0 - Experimental Research Release |
| |
| **Release Date:** August 2025 |
| **Status:** Open Source Research Implementation |
| **License:** AGPLv3 + Commercial Licensing Available |
| |
| ## What's Included |
| |
| This release provides a complete experimental framework for bit-native language modeling research: |
| |
| - **Core Architecture:** 57 Python files implementing bit-native transformer with reversible layers |
| - **Safety Systems:** Real-time K/C/S telemetry and monitoring |
| - **Research Tools:** Interactive dashboard, distributed training, comprehensive testing |
| - **Documentation:** Professional model card, research status, and validation reports |
| |
| ## Important Notes |
| |
| β οΈ **Experimental Status:** This is research code requiring rigorous baseline validation |
| β οΈ **Not Production Ready:** Needs extensive evaluation vs standard transformers |
| β οΈ **Research Use Only:** Intended for academic investigation and experimentation |
| |
| ## Licensing |
| |
| - **Open Source:** AGPLv3 for research and open source use |
| - **Commercial:** Contact contact@wcnegentropy.com for commercial licensing |
| |
| ## Next Steps |
| |
| The research community is invited to: |
| 1. Conduct rigorous baseline comparisons vs standard transformers |
| 2. Evaluate on established language modeling benchmarks |
| 3. Validate (or refute) claimed memory efficiency benefits |
| 4. Share findings openly to advance the field |
| |
| **Research responsibly. Validate rigorously. Share openly.** |
| """ |
| |
| release_file = Path(__file__).parent / "RELEASE_INFO.md" |
| with open(release_file, 'w') as f: |
| f.write(release_info) |
| |
| logger.info("Created RELEASE_INFO.md") |
| return release_file |
|
|
| if __name__ == "__main__": |
| |
| create_release_info() |
| |
| |
| success = sync_repository_to_hf() |
| |
| if success: |
| print("\nπ BitTransformerLM OS Launch Sync Complete!") |
| print("π Repository: https://huggingface.co/WCNegentropy/BitTransformerLM") |
| print("π§ Commercial inquiries: contact@wcnegentropy.com") |
| print("\nReady for research community evaluation! π§ͺβ¨") |
| else: |
| print("\nβ Sync failed. Please check logs and try again.") |