benjamin
/

zett-hypernetwork-Mistral-7B-v0.1

Feature Extraction

Model card Files Files and versions

benjamin commited on May 15, 2024

Commit

f8ee404

·

1 Parent(s): 758da5d

update tokenizer

Files changed (1) hide show

tokenizer.json +21 -5

tokenizer.json CHANGED Viewed

@@ -31,12 +31,28 @@
       "special": true
     }
   ],
-  "normalizer": null,
   "pre_tokenizer": {
-    "type": "ByteLevel",
-    "add_prefix_space": true,
-    "trim_offsets": true,
-    "use_regex": true
   },
   "post_processor": {
     "type": "TemplateProcessing",

       "special": true
     }
   ],
+  "normalizer": {
+    "type": "Prepend",
+    "prepend": " "
+  },
   "pre_tokenizer": {
+    "type": "Sequence",
+    "pretokenizers": [
+      {
+        "type": "Split",
+        "pattern": {
+          "Regex": "'s|'t|'re|'ve|'m|'ll|'d| ?[\\p{L}\\p{M}]+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+"
+        },
+        "behavior": "Removed",
+        "invert": true
+      },
+      {
+        "type": "ByteLevel",
+        "add_prefix_space": false,
+        "trim_offsets": true,
+        "use_regex": false
+      }
+    ]
   },
   "post_processor": {
     "type": "TemplateProcessing",