adamkarvonen commited on
Commit
1b5e3d4
·
verified ·
1 Parent(s): 4dd38b6

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt +3 -0
  2. BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt +3 -0
  3. BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt +3 -0
  4. BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt +3 -0
  5. BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt +3 -0
  6. BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt +3 -0
  7. GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt +3 -0
  8. GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt +3 -0
  9. GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt +3 -0
  10. GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt +3 -0
  11. GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt +3 -0
  12. GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt +3 -0
  13. JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt +3 -0
  14. JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt +3 -0
  15. JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt +3 -0
  16. JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt +3 -0
  17. JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt +3 -0
  18. JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt +3 -0
  19. MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt +3 -0
  20. MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/config.json +53 -0
  21. MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/eval_results.json +1 -0
  22. MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt +3 -0
  23. MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/config.json +53 -0
  24. MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/eval_results.json +1 -0
  25. MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt +3 -0
  26. MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/config.json +53 -0
  27. MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/eval_results.json +1 -0
  28. MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt +3 -0
  29. MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/config.json +53 -0
  30. MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/eval_results.json +1 -0
  31. MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt +3 -0
  32. MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/config.json +53 -0
  33. MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt +3 -0
  34. PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt +3 -0
  35. PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt +3 -0
  36. PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt +3 -0
  37. PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt +3 -0
  38. PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt +3 -0
  39. PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt +3 -0
  40. Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt +3 -0
  41. Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt +3 -0
  42. Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt +3 -0
  43. Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt +3 -0
  44. Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt +3 -0
  45. Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt +3 -0
  46. TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt +3 -0
  47. TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt +3 -0
  48. TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt +3 -0
  49. TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt +3 -0
  50. TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt +3 -0
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92e889ae2100e3a454aef7a21f08602f4abf24bc251dfa7fa6efc6079f217d8b
3
+ size 402920470
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6fa74304d4b331eb1f581b885b9f3df33dd140df553e8e3ce2dd5e213514593
3
+ size 402920470
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fb934629636441e02cf7af7b0d439237e1d42e362da599f3ba005939d04f69c
3
+ size 402920470
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86ab3e1887bde228099636fbe868c580c5f131162a9805e1df38ab3f414da93d
3
+ size 402920470
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:300eec2a85fc84fe6a6898c1f68a10ce4c812cde9598729f8f06e43238833b22
3
+ size 402920470
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4aa3157ed8a9b1bb0f22ad802298a3db2c6311aa8763d4c17898fe856b2199b
3
+ size 402920470
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca9e29b47344005be867a37e899ef8680767c8d52f74b82cea6b1348ec8bfe31
3
+ size 403444758
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80a74112f952844d40b7379aeb5d00a168da62d8fd2ac6b9c95da5067385f1ef
3
+ size 403444758
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09652ea312d5eb1ac79109adf6e161146ede32656294d3cb812b909917e95898
3
+ size 403444758
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02864f5049818d9aaaca08d508566eb4bb0c2ee9822e9d55e20be7a8b45de99c
3
+ size 403444758
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a71414125a875e59acf07114d61390cec0a3919a633fd06f4e3b40a032658395
3
+ size 403444758
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:791df5c3501a51a40f2b8dc136aecc18636d096decb401761a825fca98e09c3a
3
+ size 403444758
JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca6b34359dacb91df5944f29cff61978dac1df618b3be527705282afac5e9fcf
3
+ size 403182431
JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:446f7f62641dddd439cf884d1eb546ca068897c521805b7cbbe5dad26f72e88b
3
+ size 403182431
JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78b71c1673165c4f3b17dd96ce760d2e571b9ae363e2e2e7cb8cf6284537b068
3
+ size 403182431
JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:059c46bf1aa9c0c9ae0f751d9ec64a82b90cae2f53bf640b7ddddcbb0be961f6
3
+ size 403182431
JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4c7b8673c9c75afe0c0bce8672b4215921f85cbdbdd1d4e4b41f093760aaf06
3
+ size 403182431
JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb0337c0a94a85d4ed3a0e67f2e985960d0fcb872317ce809ac3e4ae1efb862f
3
+ size 403182431
MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94d7d4f0936be97fff0771c502fbb324a479de4062484c3abad7a76e013b39e2
3
+ size 402920717
MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "MatryoshkaBatchTopKTrainer",
4
+ "dict_class": "MatryoshkaBatchTopKSAE",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 384,
13
+ "seed": 0,
14
+ "activation_dim": 768,
15
+ "dict_size": 65536,
16
+ "group_fractions": [
17
+ 0.03125,
18
+ 0.0625,
19
+ 0.125,
20
+ 0.25,
21
+ 0.53125
22
+ ],
23
+ "group_weights": [
24
+ 0.2,
25
+ 0.2,
26
+ 0.2,
27
+ 0.2,
28
+ 0.2
29
+ ],
30
+ "group_sizes": [
31
+ 2048,
32
+ 4096,
33
+ 8192,
34
+ 16384,
35
+ 34816
36
+ ],
37
+ "k": 20,
38
+ "device": "cuda:0",
39
+ "layer": 8,
40
+ "lm_name": "EleutherAI/pythia-160m-deduped",
41
+ "wandb_name": "MatryoshkaBatchTopKTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_0",
42
+ "submodule_name": "resid_post_layer_8"
43
+ },
44
+ "buffer": {
45
+ "d_submodule": 768,
46
+ "io": "out",
47
+ "n_ctxs": 244,
48
+ "ctx_len": 1024,
49
+ "refresh_batch_size": 32,
50
+ "out_batch_size": 2048,
51
+ "device": "cuda:0"
52
+ }
53
+ }
MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 7.06076750610814, "l1_loss": 55.502148946126304, "l0": 19.923000162298028, "frac_variance_explained": 0.9277354551084114, "cossim": 0.9538878672050707, "l2_ratio": 0.9604759089874498, "relative_reconstruction_bias": 1.0046033064524333, "loss_original": 2.6064688870401094, "loss_reconstructed": 2.945203484910907, "loss_zero": 12.187079458525687, "frac_recovered": 0.9620419469746676, "frac_alive": 0.600677490234375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bbc9e48d3bca0cd9dc0b7d02dd482e1885ac4abde23cfc382e419330e048ce5
3
+ size 402920717
MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "MatryoshkaBatchTopKTrainer",
4
+ "dict_class": "MatryoshkaBatchTopKSAE",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 384,
13
+ "seed": 0,
14
+ "activation_dim": 768,
15
+ "dict_size": 65536,
16
+ "group_fractions": [
17
+ 0.03125,
18
+ 0.0625,
19
+ 0.125,
20
+ 0.25,
21
+ 0.53125
22
+ ],
23
+ "group_weights": [
24
+ 0.2,
25
+ 0.2,
26
+ 0.2,
27
+ 0.2,
28
+ 0.2
29
+ ],
30
+ "group_sizes": [
31
+ 2048,
32
+ 4096,
33
+ 8192,
34
+ 16384,
35
+ 34816
36
+ ],
37
+ "k": 40,
38
+ "device": "cuda:0",
39
+ "layer": 8,
40
+ "lm_name": "EleutherAI/pythia-160m-deduped",
41
+ "wandb_name": "MatryoshkaBatchTopKTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_1",
42
+ "submodule_name": "resid_post_layer_8"
43
+ },
44
+ "buffer": {
45
+ "d_submodule": 768,
46
+ "io": "out",
47
+ "n_ctxs": 244,
48
+ "ctx_len": 1024,
49
+ "refresh_batch_size": 32,
50
+ "out_batch_size": 2048,
51
+ "device": "cuda:0"
52
+ }
53
+ }
MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 6.194424441366484, "l1_loss": 85.02606224291253, "l0": 39.807070992209695, "frac_variance_explained": 0.9439049525694414, "cossim": 0.964661079825777, "l2_ratio": 0.9720291910749493, "relative_reconstruction_bias": 1.0047995177182285, "loss_original": 2.6064688870401094, "loss_reconstructed": 2.8184412428826997, "loss_zero": 12.187079458525687, "frac_recovered": 0.9762207269668579, "frac_alive": 0.653564453125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ff82ae2e3d70c46cbdf8775af63dcd7d569d9644fb6ffb24fafa5e240605b46
3
+ size 402920717
MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "MatryoshkaBatchTopKTrainer",
4
+ "dict_class": "MatryoshkaBatchTopKSAE",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 384,
13
+ "seed": 0,
14
+ "activation_dim": 768,
15
+ "dict_size": 65536,
16
+ "group_fractions": [
17
+ 0.03125,
18
+ 0.0625,
19
+ 0.125,
20
+ 0.25,
21
+ 0.53125
22
+ ],
23
+ "group_weights": [
24
+ 0.2,
25
+ 0.2,
26
+ 0.2,
27
+ 0.2,
28
+ 0.2
29
+ ],
30
+ "group_sizes": [
31
+ 2048,
32
+ 4096,
33
+ 8192,
34
+ 16384,
35
+ 34816
36
+ ],
37
+ "k": 80,
38
+ "device": "cuda:0",
39
+ "layer": 8,
40
+ "lm_name": "EleutherAI/pythia-160m-deduped",
41
+ "wandb_name": "MatryoshkaBatchTopKTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_2",
42
+ "submodule_name": "resid_post_layer_8"
43
+ },
44
+ "buffer": {
45
+ "d_submodule": 768,
46
+ "io": "out",
47
+ "n_ctxs": 244,
48
+ "ctx_len": 1024,
49
+ "refresh_batch_size": 32,
50
+ "out_batch_size": 2048,
51
+ "device": "cuda:0"
52
+ }
53
+ }
MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 5.450232867038611, "l1_loss": 107.3414394494259, "l0": 79.58225065289122, "frac_variance_explained": 0.956367785280401, "cossim": 0.972661397673867, "l2_ratio": 0.9780536167549364, "relative_reconstruction_bias": 1.004194028449781, "loss_original": 2.6064688870401094, "loss_reconstructed": 2.745052102840308, "loss_zero": 12.187079458525687, "frac_recovered": 0.9844544566038883, "frac_alive": 0.56951904296875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb6aa56ceac048dce5fef95c3876c67dd150b7656da902fc0dff4cc341f68bd1
3
+ size 402920717
MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "MatryoshkaBatchTopKTrainer",
4
+ "dict_class": "MatryoshkaBatchTopKSAE",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 384,
13
+ "seed": 0,
14
+ "activation_dim": 768,
15
+ "dict_size": 65536,
16
+ "group_fractions": [
17
+ 0.03125,
18
+ 0.0625,
19
+ 0.125,
20
+ 0.25,
21
+ 0.53125
22
+ ],
23
+ "group_weights": [
24
+ 0.2,
25
+ 0.2,
26
+ 0.2,
27
+ 0.2,
28
+ 0.2
29
+ ],
30
+ "group_sizes": [
31
+ 2048,
32
+ 4096,
33
+ 8192,
34
+ 16384,
35
+ 34816
36
+ ],
37
+ "k": 160,
38
+ "device": "cuda:0",
39
+ "layer": 8,
40
+ "lm_name": "EleutherAI/pythia-160m-deduped",
41
+ "wandb_name": "MatryoshkaBatchTopKTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_3",
42
+ "submodule_name": "resid_post_layer_8"
43
+ },
44
+ "buffer": {
45
+ "d_submodule": 768,
46
+ "io": "out",
47
+ "n_ctxs": 244,
48
+ "ctx_len": 1024,
49
+ "refresh_batch_size": 32,
50
+ "out_batch_size": 2048,
51
+ "device": "cuda:0"
52
+ }
53
+ }
MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 4.619117390025746, "l1_loss": 190.5597150398023, "l0": 159.13051304672703, "frac_variance_explained": 0.9687698682149252, "cossim": 0.9805189949093442, "l2_ratio": 0.985734917900779, "relative_reconstruction_bias": 1.0036569039026897, "loss_original": 2.6064688870401094, "loss_reconstructed": 2.6877589045148906, "loss_zero": 12.187079458525687, "frac_recovered": 0.9908258788513414, "frac_alive": 0.4089202880859375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ca6d6ffcf625867dc63240e8bed8d923e60166cc1c335f54cbf93617b310d17
3
+ size 402920717
MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "MatryoshkaBatchTopKTrainer",
4
+ "dict_class": "MatryoshkaBatchTopKSAE",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 384,
13
+ "seed": 0,
14
+ "activation_dim": 768,
15
+ "dict_size": 65536,
16
+ "group_fractions": [
17
+ 0.03125,
18
+ 0.0625,
19
+ 0.125,
20
+ 0.25,
21
+ 0.53125
22
+ ],
23
+ "group_weights": [
24
+ 0.2,
25
+ 0.2,
26
+ 0.2,
27
+ 0.2,
28
+ 0.2
29
+ ],
30
+ "group_sizes": [
31
+ 2048,
32
+ 4096,
33
+ 8192,
34
+ 16384,
35
+ 34816
36
+ ],
37
+ "k": 320,
38
+ "device": "cuda:0",
39
+ "layer": 8,
40
+ "lm_name": "EleutherAI/pythia-160m-deduped",
41
+ "wandb_name": "MatryoshkaBatchTopKTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_4",
42
+ "submodule_name": "resid_post_layer_8"
43
+ },
44
+ "buffer": {
45
+ "d_submodule": 768,
46
+ "io": "out",
47
+ "n_ctxs": 244,
48
+ "ctx_len": 1024,
49
+ "refresh_batch_size": 32,
50
+ "out_batch_size": 2048,
51
+ "device": "cuda:0"
52
+ }
53
+ }
MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7a4a858562064de91a6225081914e0096b6ad110c29e57b12cad1bc6f14ff68
3
+ size 402920717
PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2cf8f9fea7c1f01ea5b9d2f1edc87c3d7f588ec5e4871f41a5c28ec0890f3cf
3
+ size 402920104
PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de9b42b83893a64fcb8e5eae72e35ac180349873730833aca4bfb19888dad8eb
3
+ size 402920104
PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e13bb4423307fcfa3ba1820491236e65882ae795fabc44418ecc4894a2fa5dff
3
+ size 402920104
PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2698654939c2890c0b1c27001608a2db5689505a4a3905b1f8491cfe3ff1149e
3
+ size 402920104
PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45907780c89994187635465306183d9ea47ab14ae84892436b5de85eb47c3490
3
+ size 402920104
PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8856a08dd880d51d4a83e9deec591f7b2b146f6817ab2805c39a1c4b5b3f4177
3
+ size 402920104
Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1ae2cd98daffefa25d143e34213cab1f94575b2795040c685e1adeb68a2dc08
3
+ size 402920104
Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b802595dc84c9f825edb925906317a7478e5908c8fd07ac27c396d3b53ea1663
3
+ size 402920104
Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ac53f97bcadf146499cef9567c5483f1d9727df40dde27f6db8a2f16c68ed77
3
+ size 402920104
Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2e0326a5b0ac5c55c5e9fa2f5944f16cf63b9171c2c226a938a560ea38d1cb5
3
+ size 402920104
Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2caf6669bb28aa94aac807d63cb99635498b2cc6a679b9bd1b4886b508266b45
3
+ size 402920104
Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2461da6d96f14205c55aa144795118725624306d21f285688d7abcd7988d4c90
3
+ size 402920104
TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cb8cd8832679e1a3747bcd5ad613368b6c794761b1f73dfc597fdb04f110dbb
3
+ size 402920470
TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b69de5d93bc1239a1738955d03bcf8902638535f476b2a56792a32da64e1d4f7
3
+ size 402920470
TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3516579d32eca10d40a3c5ca9568b08174d8204ddfe9e667f3948b83d1970496
3
+ size 402920470
TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a86abe75db90dcbf93e011fe7422d3549d85ae2c258031e65c9d901484e363f8
3
+ size 402920470
TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe1d755917f772c2ad2d44de69a78196ca1671f778d64f70e85d576757dd8a14
3
+ size 402920470