Skip to content

Naive Bayes

toyml.classification.naive_bayes.BaseNaiveBayes dataclass

BaseNaiveBayes(class_prior_: dict[Class, float] = dict())

Bases: ABC

class_prior_ class-attribute instance-attribute

class_prior_: dict[Class, float] = field(default_factory=dict)

The prior probability of each class in training dataset

predict

predict(sample: list[FeatureValue]) -> int

Predict the class label for a given sample.

PARAMETER DESCRIPTION
sample

A single sample to predict, represented as a list of feature values.

TYPE: list[FeatureValue]

RETURNS DESCRIPTION
int

Predicted class label.

TYPE: int

Source code in toyml/classification/naive_bayes.py
25
26
27
28
29
30
31
32
33
34
35
36
def predict(self, sample: list[FeatureValue]) -> int:
    """Predict the class label for a given sample.

    Args:
        sample: A single sample to predict, represented as a list of feature values.

    Returns:
        int: Predicted class label.
    """
    label_posteriors = self.predict_proba(sample)
    label = max(label_posteriors, key=lambda k: label_posteriors[k])
    return label

predict_proba

predict_proba(sample: list[FeatureValue], normalization: bool = True) -> dict[Class, float]

Predict class probabilities for a given sample.

PARAMETER DESCRIPTION
sample

A single sample to predict, represented as a list of feature values.

TYPE: list[FeatureValue]

normalization

Whether to normalize the probabilities. Default is True.

TYPE: bool DEFAULT: True

RETURNS DESCRIPTION
dict[Class, float]

dict[int, float]: Dictionary mapping class labels to their predicted probabilities.

Source code in toyml/classification/naive_bayes.py
38
39
40
41
42
43
44
45
46
47
48
49
def predict_proba(self, sample: list[FeatureValue], normalization: bool = True) -> dict[Class, float]:
    """Predict class probabilities for a given sample.

    Args:
        sample: A single sample to predict, represented as a list of feature values.
        normalization: Whether to normalize the probabilities. Default is True.

    Returns:
        dict[int, float]: Dictionary mapping class labels to their predicted probabilities.
    """
    label_posteriors = self.predict_log_proba(sample, normalization)
    return {label: math.exp(log_prob) for label, log_prob in label_posteriors.items()}

predict_log_proba

predict_log_proba(sample: list[FeatureValue], normalization: bool = True) -> dict[Class, float]

Predict log probabilities for a given sample.

PARAMETER DESCRIPTION
sample

A single sample to predict, represented as a list of feature values.

TYPE: list[FeatureValue]

normalization

Whether to normalize the log probabilities. Default is True.

TYPE: bool DEFAULT: True

RETURNS DESCRIPTION
dict[Class, float]

dict[int, float]: Dictionary mapping class labels to their predicted log probabilities.

Source code in toyml/classification/naive_bayes.py
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
def predict_log_proba(self, sample: list[FeatureValue], normalization: bool = True) -> dict[Class, float]:
    """Predict log probabilities for a given sample.

    Args:
        sample: A single sample to predict, represented as a list of feature values.
        normalization: Whether to normalize the log probabilities. Default is True.

    Returns:
        dict[int, float]: Dictionary mapping class labels to their predicted log probabilities.
    """
    label_likelihoods = self._log_likelihood(sample)
    raw_label_posteriors: dict[int, float] = {}
    for label, likelihood in label_likelihoods.items():
        raw_label_posteriors[label] = likelihood + math.log(self.class_prior_[label])
    if normalization is False:
        return raw_label_posteriors
    # ref: https://github.com/scikit-learn/scikit-learn/blob/2beed55847ee70d363bdbfe14ee4401438fba057/sklearn/naive_bayes.py#L97
    max_log_prob = max(raw_label_posteriors.values())
    logsumexp_prob = max_log_prob + math.log(
        sum(math.exp(log_prob - max_log_prob) for log_prob in raw_label_posteriors.values())
    )
    label_posteriors = {
        label: raw_posterior - logsumexp_prob for label, raw_posterior in raw_label_posteriors.items()
    }
    return label_posteriors

toyml.classification.naive_bayes.GaussianNaiveBayes dataclass

GaussianNaiveBayes(class_prior_: dict[Class, float] = dict(), unbiased_variance: bool = True, var_smoothing: float = 1e-09, labels_: list[Class] = list(), class_count_: int = 0, means_: dict[Class, list[float]] = dict(), variances_: dict[Class, list[float]] = dict(), epsilon_: float = 0)

Bases: BaseNaiveBayes

Gaussian naive bayes classification algorithm implementation.

Examples:

>>> label = [0, 0, 0, 0, 1, 1, 1, 1]
>>> dataset = [[6.00, 180, 12], [5.92, 190, 11], [5.58, 170, 12], [5.92, 165, 10], [5.00, 100, 6], [5.50, 150, 8], [5.42, 130, 7], [5.75, 150, 9]]
>>> clf = GaussianNaiveBayes().fit(dataset, label)
>>> clf.predict([6.00, 130, 8])
1

unbiased_variance class-attribute instance-attribute

unbiased_variance: bool = True

Use the unbiased variance estimation or not. Default is True.

var_smoothing class-attribute instance-attribute

var_smoothing: float = 1e-09

Portion of the largest variance of all features that is added to variances for calculation stability.

labels_ class-attribute instance-attribute

labels_: list[Class] = field(default_factory=list)

The labels in training dataset

class_count_ class-attribute instance-attribute

class_count_: int = 0

The number of classes in training dataset

class_prior_ class-attribute instance-attribute

class_prior_: dict[Class, float] = field(default_factory=dict)

The prior probability of each class in training dataset

means_ class-attribute instance-attribute

means_: dict[Class, list[float]] = field(default_factory=dict)

The means of each class in training dataset

variances_ class-attribute instance-attribute

variances_: dict[Class, list[float]] = field(default_factory=dict)

The variance of each class in training dataset

epsilon_ class-attribute instance-attribute

epsilon_: float = 0

The absolute additive value to variances.

fit

fit(dataset: list[list[FeatureValue]], labels: list[Class]) -> GaussianNaiveBayes

Fit the Gaussian Naive Bayes classifier.

PARAMETER DESCRIPTION
dataset

Training data, where each row is a sample and each column is a feature.

TYPE: list[list[FeatureValue]]

labels

Target labels for training data.

TYPE: list[Class]

RETURNS DESCRIPTION
self

Returns the instance itself.

TYPE: GaussianNaiveBayes

Source code in toyml/classification/naive_bayes.py
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
def fit(self, dataset: list[list[FeatureValue]], labels: list[Class]) -> GaussianNaiveBayes:
    """Fit the Gaussian Naive Bayes classifier.

    Args:
        dataset: Training data, where each row is a sample and each column is a feature.
        labels: Target labels for training data.

    Returns:
        self: Returns the instance itself.
    """
    self.labels_ = sorted(set(labels))
    self.class_count_ = len(set(labels))
    self.class_prior_ = {label: 1 / self.class_count_ for label in self.labels_}
    self.epsilon_ = self.var_smoothing * max(self._variance(col) for col in zip(*dataset))
    self.means_, self.variances_ = self._get_classes_means_variances(dataset, labels)
    return self

toyml.classification.naive_bayes.MultinomialNaiveBayes dataclass

MultinomialNaiveBayes(class_prior_: dict[Class, float] = dict(), alpha: float = 1.0, labels_: list[Class] = list(), class_count_: int = 0, class_feature_count_: dict[Class, list[int]] = dict(), class_feature_log_prob_: dict[Class, list[float]] = dict())

Bases: BaseNaiveBayes

Multinomial Naive Bayes classifier.

Examples:

>>> import random
>>> rng = random.Random(0)
>>> dataset = [[rng.randint(0, 5) for _ in range(100)] for _ in range(6)]
>>> label = [1, 2, 3, 4, 5, 6]
>>> clf = MultinomialNaiveBayes().fit(dataset, label)
>>> clf.predict(dataset[2])
3

alpha class-attribute instance-attribute

alpha: float = 1.0

Additive (Laplace/Lidstone) smoothing parameter

labels_ class-attribute instance-attribute

labels_: list[Class] = field(default_factory=list)

The labels in training dataset

class_count_ class-attribute instance-attribute

class_count_: int = 0

The number of classes in training dataset

class_prior_ class-attribute instance-attribute

class_prior_: dict[Class, float] = field(default_factory=dict)

The prior probability of each class in training dataset

class_feature_count_ class-attribute instance-attribute

class_feature_count_: dict[Class, list[int]] = field(default_factory=dict)

The feature value counts of each class in training dataset

class_feature_log_prob_ class-attribute instance-attribute

class_feature_log_prob_: dict[Class, list[float]] = field(default_factory=dict)

The feature value probability of each class in training dataset

fit

fit(dataset: list[list[FeatureValue]], labels: list[Class]) -> MultinomialNaiveBayes

Fit the Multinomial Naive Bayes classifier.

PARAMETER DESCRIPTION
dataset

Training data, where each row is a sample and each column is a feature. Features should be represented as counts (non-negative integers).

TYPE: list[list[FeatureValue]]

labels

Target labels for training data.

TYPE: list[Class]

RETURNS DESCRIPTION
self

Returns the instance itself.

TYPE: MultinomialNaiveBayes

Source code in toyml/classification/naive_bayes.py
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
def fit(self, dataset: list[list[FeatureValue]], labels: list[Class]) -> MultinomialNaiveBayes:
    """Fit the Multinomial Naive Bayes classifier.

    Args:
        dataset: Training data, where each row is a sample and each column is a feature.
                 Features should be represented as counts (non-negative integers).
        labels: Target labels for training data.

    Returns:
        self: Returns the instance itself.
    """
    self.labels_ = sorted(set(labels))
    self.class_count_ = len(set(labels))
    # get the prior from training dataset labels
    self.class_prior_ = {label: count / len(dataset) for label, count in Counter(labels).items()}
    self.class_feature_count_, self.class_feature_log_prob_ = self._get_classes_feature_count_prob(dataset, labels)
    return self

toyml.classification.naive_bayes.CategoricalNaiveBayes dataclass

CategoricalNaiveBayes(class_prior_: dict[Class, float] = dict(), alpha: float = 1.0, labels_: list[Class] = list(), class_count_: int = 0, class_feature_count_: dict[Class, dict[Dimension, dict[FeatureValue, float]]] = dict(), class_feature_log_prob_: dict[Class, dict[Dimension, dict[FeatureValue, float]]] = dict())

Bases: BaseNaiveBayes

Categorical Naive Bayes classifier.

Examples:

>>> import random
>>> rng = random.Random(0)
>>> dataset = [[rng.randint(0, 5) for _ in range(100)] for _ in range(6)]
>>> label = [1, 2, 3, 4, 5, 6]
>>> clf = CategoricalNaiveBayes().fit(dataset, label)
>>> clf.predict(dataset[2])
3

alpha class-attribute instance-attribute

alpha: float = 1.0

Additive (Laplace/Lidstone) smoothing parameter

labels_ class-attribute instance-attribute

labels_: list[Class] = field(default_factory=list)

The labels in training dataset

class_count_ class-attribute instance-attribute

class_count_: int = 0

The number of classes in training dataset

class_prior_ class-attribute instance-attribute

class_prior_: dict[Class, float] = field(default_factory=dict)

The prior probability of each class in training dataset

class_feature_count_ class-attribute instance-attribute

class_feature_count_: dict[Class, dict[Dimension, dict[FeatureValue, float]]] = field(default_factory=dict)

The feature value counts of each class in training dataset

class_feature_log_prob_ class-attribute instance-attribute

class_feature_log_prob_: dict[Class, dict[Dimension, dict[FeatureValue, float]]] = field(default_factory=dict)

The feature value probability of each class in training dataset

fit

fit(dataset: list[list[FeatureValue]], labels: list[Class]) -> CategoricalNaiveBayes

Fit the Categorical Naive Bayes classifier.

PARAMETER DESCRIPTION
dataset

Training data, where each row is a sample and each column is a feature.

TYPE: list[list[FeatureValue]]

labels

Target labels for training data.

TYPE: list[Class]

RETURNS DESCRIPTION
self

Returns the instance itself.

TYPE: CategoricalNaiveBayes

Source code in toyml/classification/naive_bayes.py
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
def fit(self, dataset: list[list[FeatureValue]], labels: list[Class]) -> CategoricalNaiveBayes:
    """Fit the Categorical Naive Bayes classifier.

    Args:
        dataset: Training data, where each row is a sample and each column is a feature.
        labels: Target labels for training data.

    Returns:
        self: Returns the instance itself.
    """
    self.labels_ = sorted(set(labels))
    self.class_count_ = len(set(labels))
    # get the prior from training dataset labels
    self.class_prior_ = {label: count / len(dataset) for label, count in Counter(labels).items()}
    self.class_feature_count_, self.class_feature_log_prob_ = self._get_classes_feature_count_prob(dataset, labels)
    return self