Spaces:

Lxz20071231
/

xingzhi-lu-epq

Running on CPU Upgrade

Lxz20071231 commited on about 19 hours ago

Commit

44af154

1 Parent(s): 8ec42ec

Bayes threshold added

Files changed (2) hide show

app.py CHANGED Viewed

@@ -66,7 +66,7 @@ def classify_text(classifier, text, threshold, output_format):
     elif classifier == 'LSTM':
         tags = get_tags_lstm(lstm, text, threshold)
     else:
-        tags = get_tags_bayes(bayes, text)
     tags = expand(tags)
     predictions = {tag: topics_full[tag] for tag in tags if tag in topics_full}
@@ -100,7 +100,7 @@ with gr.Blocks(theme="default") as demo:
                 label="Input Text",
             )
             threshold = gr.Slider(0, 1, value=0.5, step=0.05,
-                                  label="Confidence Threshold (not available for Naïve Bayes)")
             output_format = gr.Radio(
                 ["Markdown", "JSON"],
                 value="Markdown",

     elif classifier == 'LSTM':
         tags = get_tags_lstm(lstm, text, threshold)
     else:
+        tags = get_tags_bayes(bayes, text, threshold)
     tags = expand(tags)
     predictions = {tag: topics_full[tag] for tag in tags if tag in topics_full}
                 label="Input Text",
             )
             threshold = gr.Slider(0, 1, value=0.5, step=0.05,
+                                  label="Confidence Threshold")
             output_format = gr.Radio(
                 ["Markdown", "JSON"],
                 value="Markdown",

models/bayes.py CHANGED Viewed

@@ -15,9 +15,9 @@ class NaiveBayesMultiClass(object):
         self.classifiers = []
     def load(self, path: str):
-        self.vectorizer = joblib.load(f'{path}/vectorizer.joblib')
         self.classifiers = [
-            joblib.load(f'{path}/class_{i}.joblib') for i in range(self.n_classes)
         ]
     def predict(self, X: typing.Iterable[str] | str, get_tags=False):
@@ -27,7 +27,7 @@ class NaiveBayesMultiClass(object):
         by_class = [self.classifiers[i].predict(x) for i in range(self.n_classes)]
         ans = []
-        for i in range(len(X)):
             y = []
             for j, cls in enumerate(self.classes):
                 if get_tags:
@@ -41,6 +41,21 @@ class NaiveBayesMultiClass(object):
     def __call__(self, *args, **kwargs):
         return self.predict(*args, **kwargs)
-def get_tags_bayes(model, text):
-    return model.predict(clean(text), True)

         self.classifiers = []
     def load(self, path: str):
+        self.vectorizer = joblib.load(f"{path}/vectorizer.joblib")
         self.classifiers = [
+            joblib.load(f"{path}/class_{i}.joblib") for i in range(self.n_classes)
         ]
     def predict(self, X: typing.Iterable[str] | str, get_tags=False):
         by_class = [self.classifiers[i].predict(x) for i in range(self.n_classes)]
         ans = []
+        for i in range(len(X)):  # type: ignore
             y = []
             for j, cls in enumerate(self.classes):
                 if get_tags:
     def __call__(self, *args, **kwargs):
         return self.predict(*args, **kwargs)
+    def predict_proba(self, X: typing.Iterable[str] | str):
+        if type(X) == str:
+            return self.predict_proba([X])[0]
+        x = self.vectorizer.transform(X)
+        by_class = [self.classifiers[i].predict_proba(x) for i in range(self.n_classes)]
+        return [[by_class[j][i] for j in range(self.n_classes)] for i in range(len(X))]  # type: ignore
+def get_tags_bayes(model: NaiveBayesMultiClass, text: str, threshold : None | float =None):
+    if threshold is None:
+        return model.predict(text, True)
+    probs = model.predict_proba(text)
+    present = []
+    for i, cls in enumerate(model.classes):
+        if probs[i] >= threshold: # type: ignore
+            present.append(cls)
+    return present