CallumMcDougallGDM commited on
Commit
1caad16
·
verified ·
1 Parent(s): 009c25b

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. attn_out/layer_12_width_16k_l0_big/params.safetensors +3 -0
  2. attn_out/layer_12_width_16k_l0_medium/params.safetensors +3 -0
  3. attn_out/layer_12_width_16k_l0_small/params.safetensors +3 -0
  4. attn_out/layer_12_width_262k_l0_big/params.safetensors +3 -0
  5. attn_out/layer_12_width_262k_l0_medium/params.safetensors +3 -0
  6. attn_out/layer_12_width_262k_l0_small/params.safetensors +3 -0
  7. attn_out/layer_12_width_65k_l0_big/params.safetensors +3 -0
  8. attn_out/layer_12_width_65k_l0_medium/params.safetensors +3 -0
  9. attn_out/layer_12_width_65k_l0_small/params.safetensors +3 -0
  10. attn_out/layer_15_width_16k_l0_big/params.safetensors +3 -0
  11. attn_out/layer_15_width_16k_l0_medium/params.safetensors +3 -0
  12. attn_out/layer_15_width_16k_l0_small/params.safetensors +3 -0
  13. attn_out/layer_15_width_262k_l0_big/params.safetensors +3 -0
  14. attn_out/layer_15_width_262k_l0_medium/params.safetensors +3 -0
  15. attn_out/layer_15_width_262k_l0_small/params.safetensors +3 -0
  16. attn_out/layer_15_width_65k_l0_big/params.safetensors +3 -0
  17. attn_out/layer_15_width_65k_l0_medium/params.safetensors +3 -0
  18. attn_out/layer_15_width_65k_l0_small/params.safetensors +3 -0
  19. attn_out/layer_5_width_16k_l0_big/params.safetensors +3 -0
  20. attn_out/layer_5_width_16k_l0_medium/params.safetensors +3 -0
  21. attn_out/layer_5_width_16k_l0_small/params.safetensors +3 -0
  22. attn_out/layer_5_width_262k_l0_big/params.safetensors +3 -0
  23. attn_out/layer_5_width_262k_l0_medium/params.safetensors +3 -0
  24. attn_out/layer_5_width_262k_l0_small/params.safetensors +3 -0
  25. attn_out/layer_5_width_65k_l0_big/params.safetensors +3 -0
  26. attn_out/layer_5_width_65k_l0_medium/params.safetensors +3 -0
  27. attn_out/layer_5_width_65k_l0_small/params.safetensors +3 -0
  28. attn_out/layer_9_width_16k_l0_big/params.safetensors +3 -0
  29. attn_out/layer_9_width_16k_l0_medium/params.safetensors +3 -0
  30. attn_out/layer_9_width_16k_l0_small/params.safetensors +3 -0
  31. attn_out/layer_9_width_262k_l0_big/params.safetensors +3 -0
  32. attn_out/layer_9_width_262k_l0_medium/params.safetensors +3 -0
  33. attn_out/layer_9_width_262k_l0_small/params.safetensors +3 -0
  34. attn_out/layer_9_width_65k_l0_big/params.safetensors +3 -0
  35. attn_out/layer_9_width_65k_l0_medium/params.safetensors +3 -0
  36. attn_out/layer_9_width_65k_l0_small/params.safetensors +3 -0
  37. transcoder/layer_12_width_16k_l0_big/config.json +9 -0
  38. transcoder/layer_12_width_16k_l0_big_affine/config.json +9 -0
  39. transcoder/layer_12_width_16k_l0_small/config.json +9 -0
  40. transcoder/layer_12_width_262k_l0_big/config.json +9 -0
  41. transcoder/layer_12_width_262k_l0_big_affine/config.json +9 -0
  42. transcoder/layer_12_width_262k_l0_medium/config.json +9 -0
  43. transcoder/layer_12_width_262k_l0_medium_affine/config.json +9 -0
  44. transcoder/layer_12_width_262k_l0_small/config.json +9 -0
  45. transcoder/layer_12_width_262k_l0_small_affine/config.json +9 -0
  46. transcoder/layer_12_width_65k_l0_big/config.json +9 -0
  47. transcoder/layer_12_width_65k_l0_big_affine/config.json +9 -0
  48. transcoder/layer_12_width_65k_l0_medium_affine/config.json +9 -0
  49. transcoder/layer_12_width_65k_l0_small_affine/config.json +9 -0
  50. transcoder/layer_15_width_16k_l0_big_affine/config.json +9 -0
attn_out/layer_12_width_16k_l0_big/params.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5a05e5d64d5bf3718e62750ae0732dcba6038ec7d87856c1591a0ff217ac3dc
3
+ size 134353272
attn_out/layer_12_width_16k_l0_medium/params.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b54141e6e22277f4c6f090ee0031c6c3439d9cdc10dc89eccffbeeb57136e1b8
3
+ size 134353272
attn_out/layer_12_width_16k_l0_small/params.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9c7fc31794dff9b922979fda509cc898ff6997425aadb1b7887dee420365ee0
3
+ size 134353272
attn_out/layer_12_width_262k_l0_big/params.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e492c29cb9a3123f7c3be4dd88b9797cde30dd35e1980086e2903499749e71f4
3
+ size 2149585288
attn_out/layer_12_width_262k_l0_medium/params.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be9485b728825419b12eb5df0da74713b6ce85b6294672f0686855b5ca8c2b16
3
+ size 2149585288
attn_out/layer_12_width_262k_l0_small/params.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2758989783153845a4e4f424a1cd157d1603d407f47b7ad19cdf07a07bab5bc5
3
+ size 2149585288
attn_out/layer_12_width_65k_l0_big/params.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd3c0b700391aefd5576f52eaae184d4f67e81860b146850871b5678d109daf0
3
+ size 537399680
attn_out/layer_12_width_65k_l0_medium/params.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8479c6d7e70874470e44333c6b051b0427f2bacf6ddaa0de2cce9d2e3da611b8
3
+ size 537399680
attn_out/layer_12_width_65k_l0_small/params.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:883823554fd7123730efa2b6429ab324e08b937cb8af7362a1bc0c9826b9edba
3
+ size 537399680
attn_out/layer_15_width_16k_l0_big/params.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca71ef43051d77b66e3de993d2844c243486980a89d51e1bb4abc70c8ee20514
3
+ size 134353272
attn_out/layer_15_width_16k_l0_medium/params.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b0677b0ac674ca50ba56c1245238272cde504d4b174471d675a1afaa3d6465a
3
+ size 134353272
attn_out/layer_15_width_16k_l0_small/params.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c40d7827c852f12085889d48644646af1de2c6027f5843bd40a587e45f9730bd
3
+ size 134353272
attn_out/layer_15_width_262k_l0_big/params.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1f130c4a9e5ed4ab90cb7d669466f74aca315da16c6b7d073b2902b2603ecbb
3
+ size 2149585288
attn_out/layer_15_width_262k_l0_medium/params.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:915997c78ac0b22d4d22c9f48694a65ee5bfc231618365aebf3092255cec44e0
3
+ size 2149585288
attn_out/layer_15_width_262k_l0_small/params.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:073e114e99ca10f1df9de6d4fb0cfda6d5d816744141be6b335f4b563961da01
3
+ size 2149585288
attn_out/layer_15_width_65k_l0_big/params.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4c1d562c2528fa8a208e8cab786c9d83de04393c29adfcd6ac87552beff585f
3
+ size 537399680
attn_out/layer_15_width_65k_l0_medium/params.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9f6ced24072da9dd7a0c279c9392d343144115bbc736fcf17b842d49c67e861
3
+ size 537399680
attn_out/layer_15_width_65k_l0_small/params.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b77f9edf78ae6c1a03c783aed20f7f9d0e5b675630626be269c746851a76afb8
3
+ size 537399680
attn_out/layer_5_width_16k_l0_big/params.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c27213da1fe982482ab47a5a3682ba21a9444fa53c8d930f35144c1143e81ef
3
+ size 134353272
attn_out/layer_5_width_16k_l0_medium/params.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ec8adfd5d2b00940e09d2796609717f47adef04b9d9a3c01b6450b7a10ce8c0
3
+ size 134353272
attn_out/layer_5_width_16k_l0_small/params.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7fea39c45d1be5e89ee2392a86b964abc55da099049550fd6354a160bab0560
3
+ size 134353272
attn_out/layer_5_width_262k_l0_big/params.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:084b5543875ff991b53b4d868e33b77393d01c7357a3689c589cb22c9c01c3f4
3
+ size 2149585288
attn_out/layer_5_width_262k_l0_medium/params.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45800001e859a6dcece21558143c460954fa487b2c34bcb265921389332bf08d
3
+ size 2149585288
attn_out/layer_5_width_262k_l0_small/params.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ee94607026f782026ac693bea425aae273b0e14224119e5c43ffec749fe7c77
3
+ size 2149585288
attn_out/layer_5_width_65k_l0_big/params.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:375e53076e45bfb67904dc61bb670a40b25a9d915442447448d7ce79c9d70530
3
+ size 537399680
attn_out/layer_5_width_65k_l0_medium/params.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4e209e05e1a0068102924194b34375af38f5e9b2c2e624c1ba8a622a23b6ab8
3
+ size 537399680
attn_out/layer_5_width_65k_l0_small/params.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:005b2b3164b8e366e074e237713ad6b9c29f45a6f38c3ee1aca7bc34efddd3fd
3
+ size 537399680
attn_out/layer_9_width_16k_l0_big/params.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3221bd9caf5899d47752d36b46e118fb0dd6c74b7f85c273e3d9e897348c7a25
3
+ size 134353272
attn_out/layer_9_width_16k_l0_medium/params.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbce144b7c35c81f660004ce10228a339c206f8de63f004e4535f25e7b65c618
3
+ size 134353272
attn_out/layer_9_width_16k_l0_small/params.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d07467fd66c1ebfb2d6b36f145a2ef499c13976b7d8edbf39c4c940cf5e8daad
3
+ size 134353272
attn_out/layer_9_width_262k_l0_big/params.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a49a5569abdc7259c02b56b0dc22e5a3d6f8a25214855d273228780a05a4f625
3
+ size 2149585288
attn_out/layer_9_width_262k_l0_medium/params.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:744217c58dc45362d03bedf6fdc1437b33a7ba0fae664b5871d5b72de4a9cfb6
3
+ size 2149585288
attn_out/layer_9_width_262k_l0_small/params.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61aba5e9f8c5fedc28103f5b2f75b702eca65c28f8f10645a89680f3e67394e8
3
+ size 2149585288
attn_out/layer_9_width_65k_l0_big/params.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:305598111284f5f0c5da814b139ed0eb0addba273a6624838953c25a4145f721
3
+ size 537399680
attn_out/layer_9_width_65k_l0_medium/params.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00f20e35b49142d51994301380dd58f5a022a255386cee7e130e9567ded46c1d
3
+ size 537399680
attn_out/layer_9_width_65k_l0_small/params.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd492e6c80ae05ef6f2a4bee981fb67626e7e5d60a75ec030ab951df8ea730e9
3
+ size 537399680
transcoder/layer_12_width_16k_l0_big/config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "hf_hook_point_in": "model.layers.12.pre_feedforward_layernorm.output",
3
+ "hf_hook_point_out": "model.layers.12.post_feedforward_layernorm.output",
4
+ "width": 16384,
5
+ "model_name": "gemma-v3-270m-pt",
6
+ "architecture": "jump_relu",
7
+ "l0": 150,
8
+ "affine_connection": false
9
+ }
transcoder/layer_12_width_16k_l0_big_affine/config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "hf_hook_point_in": "model.layers.12.pre_feedforward_layernorm.output",
3
+ "hf_hook_point_out": "model.layers.12.post_feedforward_layernorm.output",
4
+ "width": 16384,
5
+ "model_name": "gemma-v3-270m-pt",
6
+ "architecture": "jump_relu",
7
+ "l0": 150,
8
+ "affine_connection": true
9
+ }
transcoder/layer_12_width_16k_l0_small/config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "hf_hook_point_in": "model.layers.12.pre_feedforward_layernorm.output",
3
+ "hf_hook_point_out": "model.layers.12.post_feedforward_layernorm.output",
4
+ "width": 16384,
5
+ "model_name": "gemma-v3-270m-pt",
6
+ "architecture": "jump_relu",
7
+ "l0": 20,
8
+ "affine_connection": false
9
+ }
transcoder/layer_12_width_262k_l0_big/config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "hf_hook_point_in": "model.layers.12.pre_feedforward_layernorm.output",
3
+ "hf_hook_point_out": "model.layers.12.post_feedforward_layernorm.output",
4
+ "width": 262144,
5
+ "model_name": "gemma-v3-270m-pt",
6
+ "architecture": "jump_relu",
7
+ "l0": 150,
8
+ "affine_connection": false
9
+ }
transcoder/layer_12_width_262k_l0_big_affine/config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "hf_hook_point_in": "model.layers.12.pre_feedforward_layernorm.output",
3
+ "hf_hook_point_out": "model.layers.12.post_feedforward_layernorm.output",
4
+ "width": 262144,
5
+ "model_name": "gemma-v3-270m-pt",
6
+ "architecture": "jump_relu",
7
+ "l0": 150,
8
+ "affine_connection": true
9
+ }
transcoder/layer_12_width_262k_l0_medium/config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "hf_hook_point_in": "model.layers.12.pre_feedforward_layernorm.output",
3
+ "hf_hook_point_out": "model.layers.12.post_feedforward_layernorm.output",
4
+ "width": 262144,
5
+ "model_name": "gemma-v3-270m-pt",
6
+ "architecture": "jump_relu",
7
+ "l0": 60,
8
+ "affine_connection": false
9
+ }
transcoder/layer_12_width_262k_l0_medium_affine/config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "hf_hook_point_in": "model.layers.12.pre_feedforward_layernorm.output",
3
+ "hf_hook_point_out": "model.layers.12.post_feedforward_layernorm.output",
4
+ "width": 262144,
5
+ "model_name": "gemma-v3-270m-pt",
6
+ "architecture": "jump_relu",
7
+ "l0": 60,
8
+ "affine_connection": true
9
+ }
transcoder/layer_12_width_262k_l0_small/config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "hf_hook_point_in": "model.layers.12.pre_feedforward_layernorm.output",
3
+ "hf_hook_point_out": "model.layers.12.post_feedforward_layernorm.output",
4
+ "width": 262144,
5
+ "model_name": "gemma-v3-270m-pt",
6
+ "architecture": "jump_relu",
7
+ "l0": 20,
8
+ "affine_connection": false
9
+ }
transcoder/layer_12_width_262k_l0_small_affine/config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "hf_hook_point_in": "model.layers.12.pre_feedforward_layernorm.output",
3
+ "hf_hook_point_out": "model.layers.12.post_feedforward_layernorm.output",
4
+ "width": 262144,
5
+ "model_name": "gemma-v3-270m-pt",
6
+ "architecture": "jump_relu",
7
+ "l0": 20,
8
+ "affine_connection": true
9
+ }
transcoder/layer_12_width_65k_l0_big/config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "hf_hook_point_in": "model.layers.12.pre_feedforward_layernorm.output",
3
+ "hf_hook_point_out": "model.layers.12.post_feedforward_layernorm.output",
4
+ "width": 65536,
5
+ "model_name": "gemma-v3-270m-pt",
6
+ "architecture": "jump_relu",
7
+ "l0": 150,
8
+ "affine_connection": false
9
+ }
transcoder/layer_12_width_65k_l0_big_affine/config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "hf_hook_point_in": "model.layers.12.pre_feedforward_layernorm.output",
3
+ "hf_hook_point_out": "model.layers.12.post_feedforward_layernorm.output",
4
+ "width": 65536,
5
+ "model_name": "gemma-v3-270m-pt",
6
+ "architecture": "jump_relu",
7
+ "l0": 150,
8
+ "affine_connection": true
9
+ }
transcoder/layer_12_width_65k_l0_medium_affine/config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "hf_hook_point_in": "model.layers.12.pre_feedforward_layernorm.output",
3
+ "hf_hook_point_out": "model.layers.12.post_feedforward_layernorm.output",
4
+ "width": 65536,
5
+ "model_name": "gemma-v3-270m-pt",
6
+ "architecture": "jump_relu",
7
+ "l0": 60,
8
+ "affine_connection": true
9
+ }
transcoder/layer_12_width_65k_l0_small_affine/config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "hf_hook_point_in": "model.layers.12.pre_feedforward_layernorm.output",
3
+ "hf_hook_point_out": "model.layers.12.post_feedforward_layernorm.output",
4
+ "width": 65536,
5
+ "model_name": "gemma-v3-270m-pt",
6
+ "architecture": "jump_relu",
7
+ "l0": 20,
8
+ "affine_connection": true
9
+ }
transcoder/layer_15_width_16k_l0_big_affine/config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "hf_hook_point_in": "model.layers.15.pre_feedforward_layernorm.output",
3
+ "hf_hook_point_out": "model.layers.15.post_feedforward_layernorm.output",
4
+ "width": 16384,
5
+ "model_name": "gemma-v3-270m-pt",
6
+ "architecture": "jump_relu",
7
+ "l0": 150,
8
+ "affine_connection": true
9
+ }