ddbourgin · ddbourgin · Jun 23, 2021 · May 5, 2021 · May 15, 2021 · May 15, 2021
diff --git a/numpy_ml/naive_bayes/naive_bayes.py b/numpy_ml/naive_bayes/naive_bayes.py
@@ -0,0 +1,128 @@
+import numpy as np 
+
+class GaussianNB():
+ """
+ Gaussian Naive Bayes 
+
+ Assume each class conditional feature distribution is
+ independent and estimate the mean and variance from the
+ training data
+
+ Parameters
+ ----------
+ epsilon: float
+ a value that add to variance to prevent numerical error
+
+ Attributes
+ ----------
+ num_class : ndarray of shape (n_classes,)
+ count of each class in the training sample
+
+ mean: ndarray of shape (n_classes,)
+ mean of each variance
+
+ sigma: ndarray of shape (n_classes,)
+ variance of each class
+
+ prior : ndarray of shape (n_classes,)
+ probability of each class
+
+ """
+ def __init__(self,eps=1e-6):
+ self.eps = eps 
+
+ def fit(self,X,y):
+ """
+ Train the model with X,y
+
+ Parameters
+ ----------
+ X: ndarray of shape (n_samples, n_features)
+ Input data
+ y: ndarray of shape (n_samples,)
+ Target
+
+ returns
+ --------
+ self: object
+ """
+
+ self.n_sample, self.n_features = X.shape
+ self.labels = np.unique(y)
+ self.n_classes = len(self.labels)
+
+ self.mean = np.zeros((self.n_classes,self.n_features))
+ self.sigma = np.zeros((self.n_classes,self.n_features))
+ self.prior = np.zeros((self.n_classes,))
+
+ for i in range(self.n_classes):
+ X_c = X[y==i,:]
+
+ self.mean[i,:] = np.mean(X_c,axis=0)
+ self.sigma[i,:] = np.var(X_c,axis=0) + self.eps
+ self.prior[i] = X_c.shape[0]/self.n_sample
+
+ return self
+
+ def predict(self,X):
+ """
+ used the trained model to generate prediction
+
+ Parameters
+ ---------
+ X: ndarray of shape (n_samples, n_features)
+ Input data
+
+ returns
+ -------
+ probs : ndarray of shape (n_samples, n_classes)
+ The model predictions for each items in X to be in each class
+ """
+
+ probs = np.zeros((X.shape[0],self.n_classes))
+ for i in range(self.n_classes):
+ probs[:,i] = self.prob(X,self.mean[i,:],self.sigma[i,:],self.prior[i])
+
+
+ return probs
+
+ def prob(self,X,mean,sigma,prior):
+ """
+ compute the joint log likelihood of data based on gaussian distribution
+
+ X: ndarray of shape (n_samples, n_features)
+ Input data
+
+ mean: ndarray of shape (n_classes,)
+ mean of each variance
+
+ sigma: ndarray of shape (n_classes,)
+ variance of each class
+
+ prior : ndarray of shape (n_classes,)
+ probability of each class
+
+ returns
+ -------
+ joint_log_likelihood : ndarry of shape (n_samples,)
+ joint log likelihood of data
+
+ """
+
+ prob = -self.n_features / 2 * np.log(2 * np.pi) - 0.5 * np.sum(
+ np.log(sigma )
+ )
+ prob -= 0.5 * np.sum(np.power(X -mean, 2) / (sigma), 1)
+
+ joint_log_likelihood = prior + prob
+ return joint_log_likelihood
+
+
+
+
+
+
+
+
+
+
diff --git a/numpy_ml/naive_bayes/readme.md b/numpy_ml/naive_bayes/readme.md
@@ -0,0 +1,12 @@
+# Naive Bayes
+The `naive_bayes.py` module implements:
+
+1. [Gaussian Naive Bayes]
+
+2. [Multinomial Naive Bayes]
+
+3. [Categorical Naive Bayes]
+
+
+Reference:
+H. Zhang (2004). The optimality of Naive Bayes. Proc. FLAIRS.