- 010425_SoftCap
- 011325_Fp8LmHead
- 011625_Sub3Min
- 011825_GPT2Medium
- 012625_BatchSize
- 020125_RuleTweak
- 020825_GPT2MediumWeightDecay
- 021425_GPT2MediumOptCoeffs
- 030625_GPT2MediumLongerCooldown
- 032525_GPT2MediumArchOptTweaks
- 041625_GPT2Medium_Record7
- 042225_GPT2Medium_Record8
- 052425_FasterReduce
- 052425_StableTorch
- 052525_EvenFasterReduce
- 052525_MuonWithAuxAdamExample
- 053025_noallreduce
- 060624_AdamW
- 061525_GPT2MediumOptimizationLeaderboard
- 071225_BosAlign
- 071325_UpgradeTorch190
- 100924_SOAP
- 101024_Muon
- 101324_llmc
- 101424_ModernArch
- 101724_DistributedMuon
- 101824_PyTorch25
- 102024_ScaleUp1B
- 102924_Optimizers
- 110324_UntieEmbed
- 110424_50Bruns
- 110624_ShortcutsTweaks
- 110824_CastBf16
- 110924_Replicateleloykun
- 111024_ScaleShortcuts
- 111024_UNetDoubleLr
- 111424_QuantizedFP4
- 111924_FlexAttention
- 112424_WindowWarmup
- 120424_ValueEmbed
- 120824_UNetValueEmbedsTweaks
- 121024_MFUTweaks
- 121724_SparsifyEmbeds
- 123124_Target350M