# -*- coding: utf-8 -*-
# Author: Krzysztof Joachimiak 2016
import numpy as np
from bace.base import BaseNB
from bace.utils import inherit_docstring
# TODO: check weight normalization
[docs]@inherit_docstring
class ComplementNB(BaseNB):
'''
Complement Naive Bayes classifier
References
----------
Rennie J. D. M., Shih L., Teevan J., Karger D. R. (2003).
Tackling the Poor Assumptions of Naive Bayes Text Classifiers
https://people.csail.mit.edu/jrennie/papers/icml03-nb.pdf
Parameters
----------
alpha: float
Smoothing parameter
weight_normalized: bool, default False
Enable Weight-normalized Complement Naive Bayes method.
Attributes
----------
alpha_sum_ : int
Sum of alpha params
classes_ : array, shape (n_classes,)
Classes list
class_count_ : array, shape (n_classes,)
number of training samples observed in each class.
Examples
--------
>>> from sklearn.datasets import fetch_20newsgroups
>>> from sklearn.feature_extraction.text import CountVectorizer
>>> from bace import ComplementNB
Prepare data
>>> vectorizer = CountVectorizer()
>>> categories = ['alt.atheism', 'talk.religion.misc','comp.graphics', 'sci.space']
Train set
>>> newsgroups_train = fetch_20newsgroups(subset='train', categories=categories, shuffle=True)
>>> train_vectors = vectorizer.fit_transform(newsgroups_train.data)
Test set
>>> newsgroups_test = fetch_20newsgroups(subset='test', categories=categories, shuffle=True)
>>> test_vectors = vectorizer.transform(newsgroups_test.data)
>>> clf = ComplementNB()
>>> clf.fit(newsgroups_train, train_vectors).accuracy_score(newsgroups_test, test_vectors)
'''
def __init__(self, alpha=1.0, weight_normalized=False):
super(ComplementNB, self).__init__()
# Params
self.alpha = alpha
self._check_alpha_param()
self.weight_normalized = weight_normalized
# Computed attributes
self.complement_features_ = None
self.alpha_sum_ = None
[docs] def predict(self, X):
return self.classes_[np.argmax(self.predict_log_proba(X), axis=1)]
[docs] def predict_log_proba(self, X):
self._check_is_fitted()
denominator = np.sum(self.complement_features, axis=0) + self.alpha_sum_
features_weights = np.log((self.complement_features + self.alpha) / denominator)
if self.weight_normalized:
features_weights /= np.abs(features_weights).sum(axis=1, keepdims=True)
features_doc_logprob = X @ features_weights
return self.class_log_proba_ - features_doc_logprob
#return (features_doc_logprob * - np.exp(-1)) + self.class_log_proba_
# Fitting model
def _partial_fit(self, X, y, classes=None, first_partial_fit=None):
X, y_one_hot = self._prepare_X_y(X, y, first_partial_fit, classes)
#self._class_log_prob()
self._update_complement_features(X, y_one_hot)
self.is_fitted = True