Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
·
1edca62
1
Parent(s):
a2aec3f
Restructured codebase
Browse files- .idea/.gitignore +8 -0
- app.py +11 -152
- bayes.py → models/bayes.py +46 -44
- models/bert.py +27 -0
- models/cnn.py +2 -0
- lstm.py → models/lstm.py +182 -174
- utils/consts.py +59 -0
- utils/preprocessing.py +41 -0
- {bayes → weights/bayes}/class_0.joblib +0 -0
- {bayes → weights/bayes}/class_1.joblib +0 -0
- {bayes → weights/bayes}/class_10.joblib +0 -0
- {bayes → weights/bayes}/class_11.joblib +0 -0
- {bayes → weights/bayes}/class_12.joblib +0 -0
- {bayes → weights/bayes}/class_13.joblib +0 -0
- {bayes → weights/bayes}/class_14.joblib +0 -0
- {bayes → weights/bayes}/class_15.joblib +0 -0
- {bayes → weights/bayes}/class_16.joblib +0 -0
- {bayes → weights/bayes}/class_17.joblib +0 -0
- {bayes → weights/bayes}/class_18.joblib +0 -0
- {bayes → weights/bayes}/class_19.joblib +0 -0
- {bayes → weights/bayes}/class_2.joblib +0 -0
- {bayes → weights/bayes}/class_20.joblib +0 -0
- {bayes → weights/bayes}/class_21.joblib +0 -0
- {bayes → weights/bayes}/class_22.joblib +0 -0
- {bayes → weights/bayes}/class_23.joblib +0 -0
- {bayes → weights/bayes}/class_3.joblib +0 -0
- {bayes → weights/bayes}/class_4.joblib +0 -0
- {bayes → weights/bayes}/class_5.joblib +0 -0
- {bayes → weights/bayes}/class_6.joblib +0 -0
- {bayes → weights/bayes}/class_7.joblib +0 -0
- {bayes → weights/bayes}/class_8.joblib +0 -0
- {bayes → weights/bayes}/class_9.joblib +0 -0
- {bayes → weights/bayes}/vectorizer.joblib +0 -0
.idea/.gitignore
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Default ignored files
|
| 2 |
+
/shelf/
|
| 3 |
+
/workspace.xml
|
| 4 |
+
# Editor-based HTTP Client requests
|
| 5 |
+
/httpRequests/
|
| 6 |
+
# Datasource local storage ignored files
|
| 7 |
+
/dataSources/
|
| 8 |
+
/dataSources.local.xml
|
app.py
CHANGED
|
@@ -1,73 +1,13 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
-
from transformers import pipeline
|
| 3 |
-
from collections import defaultdict
|
| 4 |
-
import re
|
| 5 |
-
import string
|
| 6 |
-
from textblob import TextBlob
|
| 7 |
-
from bayes import NaiveBayesMultiClass
|
| 8 |
import fasttext
|
| 9 |
from huggingface_hub import hf_hub_download
|
| 10 |
-
from
|
| 11 |
|
| 12 |
-
topics
|
| 13 |
-
'1.1',
|
| 14 |
-
'1.2',
|
| 15 |
-
'1.3',
|
| 16 |
-
'1.4',
|
| 17 |
-
'1.5',
|
| 18 |
-
'1.6',
|
| 19 |
-
'1.7',
|
| 20 |
-
'1.8',
|
| 21 |
-
'2.1',
|
| 22 |
-
'2.2',
|
| 23 |
-
'2.3',
|
| 24 |
-
'3.1',
|
| 25 |
-
'3.2',
|
| 26 |
-
'3.3',
|
| 27 |
-
'3.4',
|
| 28 |
-
'4.1',
|
| 29 |
-
'4.2',
|
| 30 |
-
'4.3',
|
| 31 |
-
'4.4',
|
| 32 |
-
'4.5',
|
| 33 |
-
'5.1',
|
| 34 |
-
'5.2',
|
| 35 |
-
'6.1',
|
| 36 |
-
'6.2',
|
| 37 |
-
]
|
| 38 |
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
'1.2': 'Motion',
|
| 43 |
-
'1.3': 'Mass and weight',
|
| 44 |
-
'1.4': 'Density',
|
| 45 |
-
'1.5': 'Forces',
|
| 46 |
-
'1.6': 'Momentum',
|
| 47 |
-
'1.7': 'Energy, work and power',
|
| 48 |
-
'1.8': 'Pressure',
|
| 49 |
-
'2': 'Thermal physics',
|
| 50 |
-
'2.1': 'Kinetic particle model of matter',
|
| 51 |
-
'2.2': 'Thermal properties and temperature',
|
| 52 |
-
'2.3': 'Transfer of thermal energy',
|
| 53 |
-
'3': 'Waves',
|
| 54 |
-
'3.1': 'General properties of waves',
|
| 55 |
-
'3.2': 'Light',
|
| 56 |
-
'3.3': 'Electromagnetic spectrum',
|
| 57 |
-
'3.4': 'Sound',
|
| 58 |
-
'4': 'Electricity and magnetism',
|
| 59 |
-
'4.1': 'Simple phenomena of magnetism',
|
| 60 |
-
'4.2': 'Electrical quantities',
|
| 61 |
-
'4.3': 'Electric circuits',
|
| 62 |
-
'4.4': 'Electrical safety',
|
| 63 |
-
'4.5': 'Electromagnetic effects',
|
| 64 |
-
'5': 'Nuclear physics',
|
| 65 |
-
'5.1': 'The nuclear model of the atom',
|
| 66 |
-
'5.2': 'Radioactivity',
|
| 67 |
-
'6': 'Space physics',
|
| 68 |
-
'6.1': 'Earth and the Solar System',
|
| 69 |
-
'6.2': 'Stars and the Universe',
|
| 70 |
-
}
|
| 71 |
|
| 72 |
embedding_model_path = hf_hub_download(
|
| 73 |
repo_id="facebook/fasttext-en-vectors",
|
|
@@ -75,54 +15,7 @@ embedding_model_path = hf_hub_download(
|
|
| 75 |
)
|
| 76 |
embedder = fasttext.load_model(embedding_model_path)
|
| 77 |
|
| 78 |
-
|
| 79 |
-
"yourselves", "he", "him", "his", "himself", "she", "her", "hers", "herself", "it", "its", "itself", "they",
|
| 80 |
-
"them", "their", "theirs", "themselves", "what", "which", "who", "whom", "this", "that", "these", "those",
|
| 81 |
-
"am", "is", "are", "was", "were", "be", "been", "being", "have", "has", "had", "having", "do", "does",
|
| 82 |
-
"did", "doing", "a", "an", "the", "and", "but", "if", "or", "because", "as", "until", "while", "of", "at",
|
| 83 |
-
"by", "for", "with", "about", "against", "between", "into", "through", "during", "before", "after", "above",
|
| 84 |
-
"below", "to", "from", "up", "down", "in", "out", "on", "off", "over", "under", "again", "further", "then",
|
| 85 |
-
"once", "here", "there", "when", "where", "why", "how", "all", "any", "both", "each", "few", "more", "most",
|
| 86 |
-
"other", "some", "such", "no", "nor", "not", "only", "own", "same", "so", "than", "too", "very", "s", "t",
|
| 87 |
-
"can", "will", "just", "don", "should", "now"]
|
| 88 |
-
punctuations = string.punctuation
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
def to_lower(text: str) -> str:
|
| 92 |
-
return text.lower()
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
def remove_html_tags(text: str) -> str:
|
| 96 |
-
pattern = re.compile('<.*?>')
|
| 97 |
-
return pattern.sub(r'', text)
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
def remove_punctuations(text: str) -> str:
|
| 101 |
-
return text.translate(str.maketrans('', '', punctuations))
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
def correct_spellings(text: str) -> str:
|
| 105 |
-
return TextBlob(text).correct().string
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
def remove_stopwords(text: str) -> str:
|
| 109 |
-
return " ".join([word for word in text.split() if word not in stopword])
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
def clean(text: str) -> str:
|
| 113 |
-
return remove_stopwords(
|
| 114 |
-
correct_spellings(remove_punctuations(remove_html_tags(to_lower(text))))
|
| 115 |
-
)
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
bert = pipeline(
|
| 119 |
-
"text-classification",
|
| 120 |
-
model="Lxz20071231/igcse-physics-bert",
|
| 121 |
-
tokenizer="distilbert-base-uncased",
|
| 122 |
-
return_all_scores=True,
|
| 123 |
-
function_to_apply="sigmoid",
|
| 124 |
-
truncation=True
|
| 125 |
-
)
|
| 126 |
|
| 127 |
id2label = {i: topics[i] for i in range(24)}
|
| 128 |
|
|
@@ -131,42 +24,8 @@ lstm = LSTMPipeline(embedder=embedder, id2label=id2label, device=-1)
|
|
| 131 |
n_topics = len(topics)
|
| 132 |
|
| 133 |
bayes = NaiveBayesMultiClass(topics)
|
| 134 |
-
bayes.load('bayes/')
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
def get_tags_multiple_bert(texts, threshold=0.5):
|
| 139 |
-
probs = bert(texts)
|
| 140 |
-
tags = []
|
| 141 |
-
for line in probs:
|
| 142 |
-
found = []
|
| 143 |
-
for p, label in zip(line, topics):
|
| 144 |
-
if p['score'] >= threshold:
|
| 145 |
-
found.append(label)
|
| 146 |
-
tags.append(found)
|
| 147 |
-
return tags
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
def get_tags_bayes(text):
|
| 151 |
-
return bayes.predict(clean(text), True)
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
def get_tags_cnn(text, threshold=0.5):
|
| 155 |
-
return []
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
def get_tags_lstm(text, threshold=0.5):
|
| 159 |
-
probs = lstm(text)[0]
|
| 160 |
-
tags = []
|
| 161 |
-
for p, label in zip(probs, topics):
|
| 162 |
-
if p >= threshold:
|
| 163 |
-
tags.append(label)
|
| 164 |
-
return tags
|
| 165 |
-
|
| 166 |
|
| 167 |
-
def get_tags_bert(text, threshold=0.5):
|
| 168 |
-
tags = get_tags_multiple_bert([text], threshold)[0]
|
| 169 |
-
return tags
|
| 170 |
|
| 171 |
def expand(tags):
|
| 172 |
with_primary = set()
|
|
@@ -201,13 +60,13 @@ def format_as_markdown(predictions: dict) -> str:
|
|
| 201 |
|
| 202 |
def classify_text(classifier, text, threshold, output_format):
|
| 203 |
if classifier == 'Transformer':
|
| 204 |
-
tags = get_tags_bert(text, threshold)
|
| 205 |
elif classifier == 'CNN':
|
| 206 |
-
tags =
|
| 207 |
elif classifier == 'LSTM':
|
| 208 |
-
tags = get_tags_lstm(text, threshold)
|
| 209 |
else:
|
| 210 |
-
tags = get_tags_bayes(text)
|
| 211 |
|
| 212 |
tags = expand(tags)
|
| 213 |
predictions = {tag: topics_full[tag] for tag in tags if tag in topics_full}
|
|
|
|
| 1 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
import fasttext
|
| 3 |
from huggingface_hub import hf_hub_download
|
| 4 |
+
from collections import defaultdict
|
| 5 |
|
| 6 |
+
from utils.consts import topics, topics_full
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
+
from models.bayes import NaiveBayesMultiClass, get_tags_bayes
|
| 9 |
+
from models.lstm import LSTMPipeline, get_tags_lstm
|
| 10 |
+
from models.bert import get_bert_pipeline, get_tags_bert
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
embedding_model_path = hf_hub_download(
|
| 13 |
repo_id="facebook/fasttext-en-vectors",
|
|
|
|
| 15 |
)
|
| 16 |
embedder = fasttext.load_model(embedding_model_path)
|
| 17 |
|
| 18 |
+
bert = get_bert_pipeline()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
id2label = {i: topics[i] for i in range(24)}
|
| 21 |
|
|
|
|
| 24 |
n_topics = len(topics)
|
| 25 |
|
| 26 |
bayes = NaiveBayesMultiClass(topics)
|
| 27 |
+
bayes.load('weights/bayes/')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
def expand(tags):
|
| 31 |
with_primary = set()
|
|
|
|
| 60 |
|
| 61 |
def classify_text(classifier, text, threshold, output_format):
|
| 62 |
if classifier == 'Transformer':
|
| 63 |
+
tags = get_tags_bert(bert, text, threshold)
|
| 64 |
elif classifier == 'CNN':
|
| 65 |
+
tags = []
|
| 66 |
elif classifier == 'LSTM':
|
| 67 |
+
tags = get_tags_lstm(lstm, text, threshold)
|
| 68 |
else:
|
| 69 |
+
tags = get_tags_bayes(bayes, text)
|
| 70 |
|
| 71 |
tags = expand(tags)
|
| 72 |
predictions = {tag: topics_full[tag] for tag in tags if tag in topics_full}
|
bayes.py → models/bayes.py
RENAMED
|
@@ -1,44 +1,46 @@
|
|
| 1 |
-
from sklearn.feature_extraction.text import CountVectorizer
|
| 2 |
-
from sklearn.
|
| 3 |
-
|
| 4 |
-
import
|
| 5 |
-
import
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
class NaiveBayesMultiClass(object):
|
| 9 |
-
def __init__(self, classes: typing.Iterable[str]):
|
| 10 |
-
self.classes = list(classes)
|
| 11 |
-
self.n_classes = len(self.classes)
|
| 12 |
-
self.enc = MultiLabelBinarizer()
|
| 13 |
-
self.enc.fit([classes])
|
| 14 |
-
self.vectorizer = CountVectorizer()
|
| 15 |
-
self.classifiers = []
|
| 16 |
-
|
| 17 |
-
def load(self, path: str):
|
| 18 |
-
self.vectorizer = joblib.load(f'{path}/vectorizer.joblib')
|
| 19 |
-
self.classifiers = [
|
| 20 |
-
joblib.load(f'{path}/class_{i}.joblib') for i in range(self.n_classes)
|
| 21 |
-
]
|
| 22 |
-
|
| 23 |
-
def predict(self, X: typing.Iterable[str] | str, get_tags=False):
|
| 24 |
-
if type(X) == str:
|
| 25 |
-
return self.predict([X], get_tags)[0]
|
| 26 |
-
x = self.vectorizer.transform(X)
|
| 27 |
-
by_class = [self.classifiers[i].predict(x) for i in range(self.n_classes)]
|
| 28 |
-
ans = []
|
| 29 |
-
|
| 30 |
-
for i in range(len(X)):
|
| 31 |
-
y = []
|
| 32 |
-
for j, cls in enumerate(self.classes):
|
| 33 |
-
if get_tags:
|
| 34 |
-
if by_class[j][i]:
|
| 35 |
-
y.append(cls)
|
| 36 |
-
else:
|
| 37 |
-
y.append(by_class[j][i])
|
| 38 |
-
ans.append(y)
|
| 39 |
-
return ans
|
| 40 |
-
|
| 41 |
-
def __call__(self, *args, **kwargs):
|
| 42 |
-
return self.predict(*args, **kwargs)
|
| 43 |
-
|
| 44 |
-
|
|
|
|
|
|
|
|
|
| 1 |
+
from sklearn.feature_extraction.text import CountVectorizer
|
| 2 |
+
from sklearn.preprocessing import MultiLabelBinarizer
|
| 3 |
+
import typing
|
| 4 |
+
import joblib
|
| 5 |
+
from utils.preprocessing import clean
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class NaiveBayesMultiClass(object):
|
| 9 |
+
def __init__(self, classes: typing.Iterable[str]):
|
| 10 |
+
self.classes = list(classes)
|
| 11 |
+
self.n_classes = len(self.classes)
|
| 12 |
+
self.enc = MultiLabelBinarizer()
|
| 13 |
+
self.enc.fit([classes])
|
| 14 |
+
self.vectorizer = CountVectorizer()
|
| 15 |
+
self.classifiers = []
|
| 16 |
+
|
| 17 |
+
def load(self, path: str):
|
| 18 |
+
self.vectorizer = joblib.load(f'{path}/vectorizer.joblib')
|
| 19 |
+
self.classifiers = [
|
| 20 |
+
joblib.load(f'{path}/class_{i}.joblib') for i in range(self.n_classes)
|
| 21 |
+
]
|
| 22 |
+
|
| 23 |
+
def predict(self, X: typing.Iterable[str] | str, get_tags=False):
|
| 24 |
+
if type(X) == str:
|
| 25 |
+
return self.predict([X], get_tags)[0]
|
| 26 |
+
x = self.vectorizer.transform(X)
|
| 27 |
+
by_class = [self.classifiers[i].predict(x) for i in range(self.n_classes)]
|
| 28 |
+
ans = []
|
| 29 |
+
|
| 30 |
+
for i in range(len(X)):
|
| 31 |
+
y = []
|
| 32 |
+
for j, cls in enumerate(self.classes):
|
| 33 |
+
if get_tags:
|
| 34 |
+
if by_class[j][i]:
|
| 35 |
+
y.append(cls)
|
| 36 |
+
else:
|
| 37 |
+
y.append(by_class[j][i])
|
| 38 |
+
ans.append(y)
|
| 39 |
+
return ans
|
| 40 |
+
|
| 41 |
+
def __call__(self, *args, **kwargs):
|
| 42 |
+
return self.predict(*args, **kwargs)
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def get_tags_bayes(model, text):
|
| 46 |
+
return model.predict(clean(text), True)
|
models/bert.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import pipeline
|
| 2 |
+
from utils.consts import topics
|
| 3 |
+
|
| 4 |
+
def get_bert_pipeline():
|
| 5 |
+
return pipeline(
|
| 6 |
+
"text-classification",
|
| 7 |
+
model="Lxz20071231/igcse-physics-bert",
|
| 8 |
+
tokenizer="distilbert-base-uncased",
|
| 9 |
+
return_all_scores=True,
|
| 10 |
+
function_to_apply="sigmoid",
|
| 11 |
+
truncation=True
|
| 12 |
+
)
|
| 13 |
+
|
| 14 |
+
def get_tags_multiple_bert(model, texts, threshold=0.5):
|
| 15 |
+
probs = model(texts)
|
| 16 |
+
tags = []
|
| 17 |
+
for line in probs:
|
| 18 |
+
found = []
|
| 19 |
+
for p, label in zip(line, topics):
|
| 20 |
+
if p['score'] >= threshold:
|
| 21 |
+
found.append(label)
|
| 22 |
+
tags.append(found)
|
| 23 |
+
return tags
|
| 24 |
+
|
| 25 |
+
def get_tags_bert(model, text, threshold=0.5):
|
| 26 |
+
tags = get_tags_multiple_bert(model, [text], threshold)[0]
|
| 27 |
+
return tags
|
models/cnn.py
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def get_tags_cnn(model, text, threshold=0.5):
|
| 2 |
+
return []
|
lstm.py → models/lstm.py
RENAMED
|
@@ -1,174 +1,182 @@
|
|
| 1 |
-
from torch.nn.utils.rnn import pad_sequence
|
| 2 |
-
import torch
|
| 3 |
-
import torch.nn as nn
|
| 4 |
-
import torch.nn.functional as F
|
| 5 |
-
from torch.nn.utils.rnn import pack_padded_sequence
|
| 6 |
-
from types import SimpleNamespace
|
| 7 |
-
from huggingface_hub import PyTorchModelHubMixin
|
| 8 |
-
from transformers import Pipeline
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
def get_words(model, text: str):
|
| 12 |
-
"""
|
| 13 |
-
Break text into tokens using FastText's internal tokenizer.
|
| 14 |
-
"""
|
| 15 |
-
lines = [model.get_line(line)[0] for line in text.split("\n")]
|
| 16 |
-
words = []
|
| 17 |
-
for line in lines:
|
| 18 |
-
for w in line[:-1]:
|
| 19 |
-
words.append(w)
|
| 20 |
-
return words
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
def get_vectors(model, text: str):
|
| 24 |
-
"""
|
| 25 |
-
Convert text → list of embedding vectors.
|
| 26 |
-
"""
|
| 27 |
-
words = get_words(model, text)
|
| 28 |
-
vectors = [model[w] for w in words]
|
| 29 |
-
return vectors
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
def get_tensor(model, text: str):
|
| 33 |
-
"""
|
| 34 |
-
Convert text → (seq_len, embedding_dim) tensor
|
| 35 |
-
"""
|
| 36 |
-
vectors = get_vectors(model, text)
|
| 37 |
-
if len(vectors) == 0:
|
| 38 |
-
# fallback for empty text
|
| 39 |
-
return torch.zeros(1, model.get_dimension())
|
| 40 |
-
return torch.tensor(vectors, dtype=torch.float)
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
def preprocess_batch(embedder, texts):
|
| 44 |
-
"""
|
| 45 |
-
Convert a list of text strings into:
|
| 46 |
-
x_padded: (batch, seq_len, emb_dim)
|
| 47 |
-
lengths: (batch,)
|
| 48 |
-
Both sorted by sequence length (DESC) for pack_padded_sequence.
|
| 49 |
-
"""
|
| 50 |
-
|
| 51 |
-
# Convert each text → tensor
|
| 52 |
-
seq_tensors = [get_tensor(embedder, t) for t in texts]
|
| 53 |
-
|
| 54 |
-
# Compute lengths BEFORE padding
|
| 55 |
-
lengths = torch.tensor([seq.size(0) for seq in seq_tensors], dtype=torch.long)
|
| 56 |
-
|
| 57 |
-
# Sort by length (DESC)
|
| 58 |
-
lengths_sorted, sort_idx = torch.sort(lengths, descending=True)
|
| 59 |
-
seq_tensors = [seq_tensors[i] for i in sort_idx]
|
| 60 |
-
|
| 61 |
-
# Pad to create (batch, max_seq_len, emb_dim)
|
| 62 |
-
x_padded = pad_sequence(seq_tensors, batch_first=True)
|
| 63 |
-
|
| 64 |
-
return x_padded, lengths_sorted
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
class LSTMMultiClassClassifier(nn.Module, PyTorchModelHubMixin):
|
| 68 |
-
def __init__(self, embedding_dim, hidden_dim, num_classes,
|
| 69 |
-
num_layers=1, bidirectional=True, dropout=0.5, **kwargs):
|
| 70 |
-
super().__init__()
|
| 71 |
-
|
| 72 |
-
# REQUIRED for HuggingFace Pipeline
|
| 73 |
-
self.device = torch.device("cpu")
|
| 74 |
-
|
| 75 |
-
# Save config
|
| 76 |
-
self.config = SimpleNamespace(
|
| 77 |
-
embedding_dim=embedding_dim,
|
| 78 |
-
hidden_dim=hidden_dim,
|
| 79 |
-
num_classes=num_classes,
|
| 80 |
-
num_layers=num_layers,
|
| 81 |
-
bidirectional=bidirectional,
|
| 82 |
-
dropout=dropout
|
| 83 |
-
)
|
| 84 |
-
|
| 85 |
-
self.embedding_dim = embedding_dim
|
| 86 |
-
self.hidden_dim = hidden_dim
|
| 87 |
-
self.num_layers = num_layers
|
| 88 |
-
self.bidirectional = bidirectional
|
| 89 |
-
self.dropout = dropout
|
| 90 |
-
self.num_classes = num_classes
|
| 91 |
-
|
| 92 |
-
self.lstm = nn.LSTM(
|
| 93 |
-
input_size=embedding_dim,
|
| 94 |
-
hidden_size=hidden_dim,
|
| 95 |
-
num_layers=num_layers,
|
| 96 |
-
batch_first=True,
|
| 97 |
-
dropout=dropout if num_layers > 1 else 0,
|
| 98 |
-
bidirectional=bidirectional
|
| 99 |
-
)
|
| 100 |
-
|
| 101 |
-
direction = 2 if bidirectional else 1
|
| 102 |
-
self.fc = nn.Sequential(
|
| 103 |
-
nn.Linear(hidden_dim * direction, 128),
|
| 104 |
-
nn.ReLU(),
|
| 105 |
-
nn.Linear(128, 128),
|
| 106 |
-
nn.ReLU(),
|
| 107 |
-
nn.Linear(128, num_classes)
|
| 108 |
-
)
|
| 109 |
-
|
| 110 |
-
@classmethod
|
| 111 |
-
def from_config(cls, config):
|
| 112 |
-
return cls(
|
| 113 |
-
embedding_dim=config.embedding_dim,
|
| 114 |
-
hidden_dim=config.hidden_dim,
|
| 115 |
-
num_classes=config.num_classes,
|
| 116 |
-
num_layers=config.num_layers,
|
| 117 |
-
bidirectional=config.bidirectional,
|
| 118 |
-
dropout=config.dropout
|
| 119 |
-
)
|
| 120 |
-
|
| 121 |
-
# REQUIRED for Transformers Pipeline (updates internal device)
|
| 122 |
-
def to(self, device):
|
| 123 |
-
super().to(device)
|
| 124 |
-
self.device = device
|
| 125 |
-
return self
|
| 126 |
-
|
| 127 |
-
def forward(self, x, lengths):
|
| 128 |
-
x = x.to(self.device)
|
| 129 |
-
lengths = lengths.to(self.device)
|
| 130 |
-
|
| 131 |
-
packed = pack_padded_sequence(
|
| 132 |
-
x, lengths.cpu(), batch_first=True, enforce_sorted=True
|
| 133 |
-
)
|
| 134 |
-
_, (h_n, _) = self.lstm(packed)
|
| 135 |
-
|
| 136 |
-
if self.bidirectional:
|
| 137 |
-
h = torch.cat((h_n[-2], h_n[-1]), dim=1)
|
| 138 |
-
else:
|
| 139 |
-
h = h_n[-1]
|
| 140 |
-
|
| 141 |
-
return self.fc(h)
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
class LSTMPipeline(Pipeline):
|
| 145 |
-
def __init__(self, id2label, embedder, **kwargs):
|
| 146 |
-
model = LSTMMultiClassClassifier.from_pretrained(
|
| 147 |
-
"Lxz20071231/igcse-physics-lstm"
|
| 148 |
-
)
|
| 149 |
-
super().__init__(model=model, tokenizer=None, **kwargs)
|
| 150 |
-
self.id2label = id2label
|
| 151 |
-
self.embedder = embedder
|
| 152 |
-
|
| 153 |
-
def preprocess(self, inputs):
|
| 154 |
-
if isinstance(inputs, str):
|
| 155 |
-
texts = [inputs]
|
| 156 |
-
else:
|
| 157 |
-
texts = list(inputs)
|
| 158 |
-
x, lengths = preprocess_batch(self.embedder, texts)
|
| 159 |
-
return {"x": x, "lengths": lengths}
|
| 160 |
-
|
| 161 |
-
def _forward(self, model_inputs):
|
| 162 |
-
x = model_inputs["x"]
|
| 163 |
-
lengths = model_inputs["lengths"]
|
| 164 |
-
with torch.no_grad():
|
| 165 |
-
logits = self.model(x, lengths)
|
| 166 |
-
return logits
|
| 167 |
-
|
| 168 |
-
def postprocess(self, logits):
|
| 169 |
-
probs = F.sigmoid(logits)
|
| 170 |
-
|
| 171 |
-
return probs
|
| 172 |
-
|
| 173 |
-
def _sanitize_parameters(self, **kwargs):
|
| 174 |
-
return {}, {}, {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from torch.nn.utils.rnn import pad_sequence
|
| 2 |
+
import torch
|
| 3 |
+
import torch.nn as nn
|
| 4 |
+
import torch.nn.functional as F
|
| 5 |
+
from torch.nn.utils.rnn import pack_padded_sequence
|
| 6 |
+
from types import SimpleNamespace
|
| 7 |
+
from huggingface_hub import PyTorchModelHubMixin
|
| 8 |
+
from transformers import Pipeline
|
| 9 |
+
from utils.consts import topics
|
| 10 |
+
|
| 11 |
+
def get_words(model, text: str):
|
| 12 |
+
"""
|
| 13 |
+
Break text into tokens using FastText's internal tokenizer.
|
| 14 |
+
"""
|
| 15 |
+
lines = [model.get_line(line)[0] for line in text.split("\n")]
|
| 16 |
+
words = []
|
| 17 |
+
for line in lines:
|
| 18 |
+
for w in line[:-1]:
|
| 19 |
+
words.append(w)
|
| 20 |
+
return words
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def get_vectors(model, text: str):
|
| 24 |
+
"""
|
| 25 |
+
Convert text → list of embedding vectors.
|
| 26 |
+
"""
|
| 27 |
+
words = get_words(model, text)
|
| 28 |
+
vectors = [model[w] for w in words]
|
| 29 |
+
return vectors
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def get_tensor(model, text: str):
|
| 33 |
+
"""
|
| 34 |
+
Convert text → (seq_len, embedding_dim) tensor
|
| 35 |
+
"""
|
| 36 |
+
vectors = get_vectors(model, text)
|
| 37 |
+
if len(vectors) == 0:
|
| 38 |
+
# fallback for empty text
|
| 39 |
+
return torch.zeros(1, model.get_dimension())
|
| 40 |
+
return torch.tensor(vectors, dtype=torch.float)
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def preprocess_batch(embedder, texts):
|
| 44 |
+
"""
|
| 45 |
+
Convert a list of text strings into:
|
| 46 |
+
x_padded: (batch, seq_len, emb_dim)
|
| 47 |
+
lengths: (batch,)
|
| 48 |
+
Both sorted by sequence length (DESC) for pack_padded_sequence.
|
| 49 |
+
"""
|
| 50 |
+
|
| 51 |
+
# Convert each text → tensor
|
| 52 |
+
seq_tensors = [get_tensor(embedder, t) for t in texts]
|
| 53 |
+
|
| 54 |
+
# Compute lengths BEFORE padding
|
| 55 |
+
lengths = torch.tensor([seq.size(0) for seq in seq_tensors], dtype=torch.long)
|
| 56 |
+
|
| 57 |
+
# Sort by length (DESC)
|
| 58 |
+
lengths_sorted, sort_idx = torch.sort(lengths, descending=True)
|
| 59 |
+
seq_tensors = [seq_tensors[i] for i in sort_idx]
|
| 60 |
+
|
| 61 |
+
# Pad to create (batch, max_seq_len, emb_dim)
|
| 62 |
+
x_padded = pad_sequence(seq_tensors, batch_first=True)
|
| 63 |
+
|
| 64 |
+
return x_padded, lengths_sorted
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
class LSTMMultiClassClassifier(nn.Module, PyTorchModelHubMixin):
|
| 68 |
+
def __init__(self, embedding_dim, hidden_dim, num_classes,
|
| 69 |
+
num_layers=1, bidirectional=True, dropout=0.5, **kwargs):
|
| 70 |
+
super().__init__()
|
| 71 |
+
|
| 72 |
+
# REQUIRED for HuggingFace Pipeline
|
| 73 |
+
self.device = torch.device("cpu")
|
| 74 |
+
|
| 75 |
+
# Save config
|
| 76 |
+
self.config = SimpleNamespace(
|
| 77 |
+
embedding_dim=embedding_dim,
|
| 78 |
+
hidden_dim=hidden_dim,
|
| 79 |
+
num_classes=num_classes,
|
| 80 |
+
num_layers=num_layers,
|
| 81 |
+
bidirectional=bidirectional,
|
| 82 |
+
dropout=dropout
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
self.embedding_dim = embedding_dim
|
| 86 |
+
self.hidden_dim = hidden_dim
|
| 87 |
+
self.num_layers = num_layers
|
| 88 |
+
self.bidirectional = bidirectional
|
| 89 |
+
self.dropout = dropout
|
| 90 |
+
self.num_classes = num_classes
|
| 91 |
+
|
| 92 |
+
self.lstm = nn.LSTM(
|
| 93 |
+
input_size=embedding_dim,
|
| 94 |
+
hidden_size=hidden_dim,
|
| 95 |
+
num_layers=num_layers,
|
| 96 |
+
batch_first=True,
|
| 97 |
+
dropout=dropout if num_layers > 1 else 0,
|
| 98 |
+
bidirectional=bidirectional
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
direction = 2 if bidirectional else 1
|
| 102 |
+
self.fc = nn.Sequential(
|
| 103 |
+
nn.Linear(hidden_dim * direction, 128),
|
| 104 |
+
nn.ReLU(),
|
| 105 |
+
nn.Linear(128, 128),
|
| 106 |
+
nn.ReLU(),
|
| 107 |
+
nn.Linear(128, num_classes)
|
| 108 |
+
)
|
| 109 |
+
|
| 110 |
+
@classmethod
|
| 111 |
+
def from_config(cls, config):
|
| 112 |
+
return cls(
|
| 113 |
+
embedding_dim=config.embedding_dim,
|
| 114 |
+
hidden_dim=config.hidden_dim,
|
| 115 |
+
num_classes=config.num_classes,
|
| 116 |
+
num_layers=config.num_layers,
|
| 117 |
+
bidirectional=config.bidirectional,
|
| 118 |
+
dropout=config.dropout
|
| 119 |
+
)
|
| 120 |
+
|
| 121 |
+
# REQUIRED for Transformers Pipeline (updates internal device)
|
| 122 |
+
def to(self, device):
|
| 123 |
+
super().to(device)
|
| 124 |
+
self.device = device
|
| 125 |
+
return self
|
| 126 |
+
|
| 127 |
+
def forward(self, x, lengths):
|
| 128 |
+
x = x.to(self.device)
|
| 129 |
+
lengths = lengths.to(self.device)
|
| 130 |
+
|
| 131 |
+
packed = pack_padded_sequence(
|
| 132 |
+
x, lengths.cpu(), batch_first=True, enforce_sorted=True
|
| 133 |
+
)
|
| 134 |
+
_, (h_n, _) = self.lstm(packed)
|
| 135 |
+
|
| 136 |
+
if self.bidirectional:
|
| 137 |
+
h = torch.cat((h_n[-2], h_n[-1]), dim=1)
|
| 138 |
+
else:
|
| 139 |
+
h = h_n[-1]
|
| 140 |
+
|
| 141 |
+
return self.fc(h)
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
class LSTMPipeline(Pipeline):
|
| 145 |
+
def __init__(self, id2label, embedder, **kwargs):
|
| 146 |
+
model = LSTMMultiClassClassifier.from_pretrained(
|
| 147 |
+
"Lxz20071231/igcse-physics-lstm"
|
| 148 |
+
)
|
| 149 |
+
super().__init__(model=model, tokenizer=None, **kwargs)
|
| 150 |
+
self.id2label = id2label
|
| 151 |
+
self.embedder = embedder
|
| 152 |
+
|
| 153 |
+
def preprocess(self, inputs):
|
| 154 |
+
if isinstance(inputs, str):
|
| 155 |
+
texts = [inputs]
|
| 156 |
+
else:
|
| 157 |
+
texts = list(inputs)
|
| 158 |
+
x, lengths = preprocess_batch(self.embedder, texts)
|
| 159 |
+
return {"x": x, "lengths": lengths}
|
| 160 |
+
|
| 161 |
+
def _forward(self, model_inputs):
|
| 162 |
+
x = model_inputs["x"]
|
| 163 |
+
lengths = model_inputs["lengths"]
|
| 164 |
+
with torch.no_grad():
|
| 165 |
+
logits = self.model(x, lengths)
|
| 166 |
+
return logits
|
| 167 |
+
|
| 168 |
+
def postprocess(self, logits):
|
| 169 |
+
probs = F.sigmoid(logits)
|
| 170 |
+
|
| 171 |
+
return probs
|
| 172 |
+
|
| 173 |
+
def _sanitize_parameters(self, **kwargs):
|
| 174 |
+
return {}, {}, {}
|
| 175 |
+
|
| 176 |
+
def get_tags_lstm(model, text, threshold=0.5):
|
| 177 |
+
probs = model(text)[0]
|
| 178 |
+
tags = []
|
| 179 |
+
for p, label in zip(probs, topics):
|
| 180 |
+
if p >= threshold:
|
| 181 |
+
tags.append(label)
|
| 182 |
+
return tags
|
utils/consts.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
topics = [
|
| 2 |
+
'1.1',
|
| 3 |
+
'1.2',
|
| 4 |
+
'1.3',
|
| 5 |
+
'1.4',
|
| 6 |
+
'1.5',
|
| 7 |
+
'1.6',
|
| 8 |
+
'1.7',
|
| 9 |
+
'1.8',
|
| 10 |
+
'2.1',
|
| 11 |
+
'2.2',
|
| 12 |
+
'2.3',
|
| 13 |
+
'3.1',
|
| 14 |
+
'3.2',
|
| 15 |
+
'3.3',
|
| 16 |
+
'3.4',
|
| 17 |
+
'4.1',
|
| 18 |
+
'4.2',
|
| 19 |
+
'4.3',
|
| 20 |
+
'4.4',
|
| 21 |
+
'4.5',
|
| 22 |
+
'5.1',
|
| 23 |
+
'5.2',
|
| 24 |
+
'6.1',
|
| 25 |
+
'6.2',
|
| 26 |
+
]
|
| 27 |
+
|
| 28 |
+
topics_full = {
|
| 29 |
+
'1': 'Motion, forces and energy',
|
| 30 |
+
'1.1': 'Physical quantities and measurement techniques',
|
| 31 |
+
'1.2': 'Motion',
|
| 32 |
+
'1.3': 'Mass and weight',
|
| 33 |
+
'1.4': 'Density',
|
| 34 |
+
'1.5': 'Forces',
|
| 35 |
+
'1.6': 'Momentum',
|
| 36 |
+
'1.7': 'Energy, work and power',
|
| 37 |
+
'1.8': 'Pressure',
|
| 38 |
+
'2': 'Thermal physics',
|
| 39 |
+
'2.1': 'Kinetic particle model of matter',
|
| 40 |
+
'2.2': 'Thermal properties and temperature',
|
| 41 |
+
'2.3': 'Transfer of thermal energy',
|
| 42 |
+
'3': 'Waves',
|
| 43 |
+
'3.1': 'General properties of waves',
|
| 44 |
+
'3.2': 'Light',
|
| 45 |
+
'3.3': 'Electromagnetic spectrum',
|
| 46 |
+
'3.4': 'Sound',
|
| 47 |
+
'4': 'Electricity and magnetism',
|
| 48 |
+
'4.1': 'Simple phenomena of magnetism',
|
| 49 |
+
'4.2': 'Electrical quantities',
|
| 50 |
+
'4.3': 'Electric circuits',
|
| 51 |
+
'4.4': 'Electrical safety',
|
| 52 |
+
'4.5': 'Electromagnetic effects',
|
| 53 |
+
'5': 'Nuclear physics',
|
| 54 |
+
'5.1': 'The nuclear model of the atom',
|
| 55 |
+
'5.2': 'Radioactivity',
|
| 56 |
+
'6': 'Space physics',
|
| 57 |
+
'6.1': 'Earth and the Solar System',
|
| 58 |
+
'6.2': 'Stars and the Universe',
|
| 59 |
+
}
|
utils/preprocessing.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
import string
|
| 3 |
+
from textblob import TextBlob
|
| 4 |
+
|
| 5 |
+
stopword = ["i", "me", "my", "myself", "we", "our", "ours", "ourselves", "you", "your", "yours", "yourself",
|
| 6 |
+
"yourselves", "he", "him", "his", "himself", "she", "her", "hers", "herself", "it", "its", "itself", "they",
|
| 7 |
+
"them", "their", "theirs", "themselves", "what", "which", "who", "whom", "this", "that", "these", "those",
|
| 8 |
+
"am", "is", "are", "was", "were", "be", "been", "being", "have", "has", "had", "having", "do", "does",
|
| 9 |
+
"did", "doing", "a", "an", "the", "and", "but", "if", "or", "because", "as", "until", "while", "of", "at",
|
| 10 |
+
"by", "for", "with", "about", "against", "between", "into", "through", "during", "before", "after", "above",
|
| 11 |
+
"below", "to", "from", "up", "down", "in", "out", "on", "off", "over", "under", "again", "further", "then",
|
| 12 |
+
"once", "here", "there", "when", "where", "why", "how", "all", "any", "both", "each", "few", "more", "most",
|
| 13 |
+
"other", "some", "such", "no", "nor", "not", "only", "own", "same", "so", "than", "too", "very", "s", "t",
|
| 14 |
+
"can", "will", "just", "don", "should", "now"]
|
| 15 |
+
punctuations = string.punctuation
|
| 16 |
+
|
| 17 |
+
def to_lower(text: str) -> str:
|
| 18 |
+
return text.lower()
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def remove_html_tags(text: str) -> str:
|
| 22 |
+
pattern = re.compile('<.*?>')
|
| 23 |
+
return pattern.sub(r'', text)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def remove_punctuations(text: str) -> str:
|
| 27 |
+
return text.translate(str.maketrans('', '', punctuations))
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def correct_spellings(text: str) -> str:
|
| 31 |
+
return TextBlob(text).correct().string
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def remove_stopwords(text: str) -> str:
|
| 35 |
+
return " ".join([word for word in text.split() if word not in stopword])
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def clean(text: str) -> str:
|
| 39 |
+
return remove_stopwords(
|
| 40 |
+
correct_spellings(remove_punctuations(remove_html_tags(to_lower(text))))
|
| 41 |
+
)
|
{bayes → weights/bayes}/class_0.joblib
RENAMED
|
File without changes
|
{bayes → weights/bayes}/class_1.joblib
RENAMED
|
File without changes
|
{bayes → weights/bayes}/class_10.joblib
RENAMED
|
File without changes
|
{bayes → weights/bayes}/class_11.joblib
RENAMED
|
File without changes
|
{bayes → weights/bayes}/class_12.joblib
RENAMED
|
File without changes
|
{bayes → weights/bayes}/class_13.joblib
RENAMED
|
File without changes
|
{bayes → weights/bayes}/class_14.joblib
RENAMED
|
File without changes
|
{bayes → weights/bayes}/class_15.joblib
RENAMED
|
File without changes
|
{bayes → weights/bayes}/class_16.joblib
RENAMED
|
File without changes
|
{bayes → weights/bayes}/class_17.joblib
RENAMED
|
File without changes
|
{bayes → weights/bayes}/class_18.joblib
RENAMED
|
File without changes
|
{bayes → weights/bayes}/class_19.joblib
RENAMED
|
File without changes
|
{bayes → weights/bayes}/class_2.joblib
RENAMED
|
File without changes
|
{bayes → weights/bayes}/class_20.joblib
RENAMED
|
File without changes
|
{bayes → weights/bayes}/class_21.joblib
RENAMED
|
File without changes
|
{bayes → weights/bayes}/class_22.joblib
RENAMED
|
File without changes
|
{bayes → weights/bayes}/class_23.joblib
RENAMED
|
File without changes
|
{bayes → weights/bayes}/class_3.joblib
RENAMED
|
File without changes
|
{bayes → weights/bayes}/class_4.joblib
RENAMED
|
File without changes
|
{bayes → weights/bayes}/class_5.joblib
RENAMED
|
File without changes
|
{bayes → weights/bayes}/class_6.joblib
RENAMED
|
File without changes
|
{bayes → weights/bayes}/class_7.joblib
RENAMED
|
File without changes
|
{bayes → weights/bayes}/class_8.joblib
RENAMED
|
File without changes
|
{bayes → weights/bayes}/class_9.joblib
RENAMED
|
File without changes
|
{bayes → weights/bayes}/vectorizer.joblib
RENAMED
|
File without changes
|