- Notifications
You must be signed in to change notification settings - Fork 3.8k
Naive bayes #68
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Naive bayes #68
Changes from 2 commits
2596c2c b92e8f9 d3a1c51 a4eff01 bae8921 f34aefc eaa1267 8589275 0f6a156 126602d File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,128 @@ | ||
| import numpy as np | ||
| | ||
| class GaussianNB(): | ||
| """ | ||
| Gaussian Naive Bayes | ||
| | ||
| Assume each class conditional feature distribution is | ||
| independent and estimate the mean and variance from the | ||
| training data | ||
| | ||
| Parameters | ||
| ---------- | ||
| epsilon: float | ||
| a value that add to variance to prevent numerical error | ||
| | ||
| Attributes | ||
| ---------- | ||
| num_class : ndarray of shape (n_classes,) | ||
| count of each class in the training sample | ||
| | ||
| mean: ndarray of shape (n_classes,) | ||
| mean of each variance | ||
| | ||
| sigma: ndarray of shape (n_classes,) | ||
| variance of each class | ||
| | ||
| prior : ndarray of shape (n_classes,) | ||
| probability of each class | ||
| | ||
| """ | ||
| def __init__(self,eps=1e-6): | ||
| self.eps = eps | ||
| | ||
| def fit(self,X,y): | ||
| """ | ||
| Train the model with X,y | ||
| | ||
| Parameters | ||
| ---------- | ||
| X: ndarray of shape (n_samples, n_features) | ||
| Input data | ||
| y: ndarray of shape (n_samples,) | ||
| Target | ||
| | ||
| returns | ||
| -------- | ||
| self: object | ||
| """ | ||
| | ||
| self.n_sample, self.n_features = X.shape | ||
| self.labels = np.unique(y) | ||
| self.n_classes = len(self.labels) | ||
| | ||
| self.mean = np.zeros((self.n_classes,self.n_features)) | ||
| self.sigma = np.zeros((self.n_classes,self.n_features)) | ||
| self.prior = np.zeros((self.n_classes,)) | ||
| | ||
| for i in range(self.n_classes): | ||
| X_c = X[y==i,:] | ||
| | ||
| self.mean[i,:] = np.mean(X_c,axis=0) | ||
| self.sigma[i,:] = np.var(X_c,axis=0) + self.eps | ||
| self.prior[i] = X_c.shape[0]/self.n_sample | ||
| | ||
| return self | ||
| | ||
| def predict(self,X): | ||
| """ | ||
| used the trained model to generate prediction | ||
| | ||
| Parameters | ||
| --------- | ||
| X: ndarray of shape (n_samples, n_features) | ||
| Input data | ||
| | ||
| returns | ||
| ------- | ||
| probs : ndarray of shape (n_samples, n_classes) | ||
| The model predictions for each items in X to be in each class | ||
| """ | ||
| | ||
| probs = np.zeros((X.shape[0],self.n_classes)) | ||
| for i in range(self.n_classes): | ||
| probs[:,i] = self.prob(X,self.mean[i,:],self.sigma[i,:],self.prior[i]) | ||
| | ||
| | ||
| return probs | ||
| | ||
| def prob(self,X,mean,sigma,prior): | ||
| """ | ||
| compute the joint log likelihood of data based on gaussian distribution | ||
| ||
| | ||
| X: ndarray of shape (n_samples, n_features) | ||
| Input data | ||
| | ||
| mean: ndarray of shape (n_classes,) | ||
| mean of each variance | ||
| | ||
| sigma: ndarray of shape (n_classes,) | ||
| variance of each class | ||
| | ||
| prior : ndarray of shape (n_classes,) | ||
| probability of each class | ||
| | ||
| returns | ||
| ------- | ||
| joint_log_likelihood : ndarry of shape (n_samples,) | ||
| joint log likelihood of data | ||
| | ||
| """ | ||
| | ||
| prob = -self.n_features / 2 * np.log(2 * np.pi) - 0.5 * np.sum( | ||
| np.log(sigma ) | ||
| ) | ||
| prob -= 0.5 * np.sum(np.power(X -mean, 2) / (sigma), 1) | ||
| | ||
| joint_log_likelihood = prior + prob | ||
| return joint_log_likelihood | ||
| ||
| | ||
| | ||
| | ||
| | ||
| | ||
| | ||
| | ||
| | ||
| | ||
| | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,12 @@ | ||
| # Naive Bayes | ||
| The `naive_bayes.py` module implements: | ||
| | ||
| 1. [Gaussian Naive Bayes] | ||
| | ||
| 2. [Multinomial Naive Bayes] | ||
| | ||
| 3. [Categorical Naive Bayes] | ||
| | ||
| | ||
| Reference: | ||
| H. Zhang (2004). The optimality of Naive Bayes. Proc. FLAIRS. |
Uh oh!
There was an error while loading. Please reload this page.