http://scikit-learn.org/stable/modules/naive_bayes.html
Gaussian Naive Bayes 【高斯朴素贝叶斯】
Multinomial Naive Bayes 【多项式朴素贝叶斯】
Bernoulli Naive Bayes 【伯努利的朴素贝叶斯】
# !/usr/bin/env python
# -*- coding: utf-8 -*-
# __author__ = "abdata"
import time
import numpy as np
import pandas as pd
from sklearn.naive_bayes import GaussianNB
# from sklearn import datasets
# import matplotlib.pyplot as plt
mdata = pd.read_csv('http://data.galaxystatistics.com/blog_data/regression/iris.csv')
X = mdata.iloc[:,2:6]
print(X.head())
print(X.shape)
print(type(X))
y = mdata.iloc[:,1]
t0 = time.time()
clf = GaussianNB()
clf.fit(X, y)
t = time.time() - t0
print(t)
print(clf.predict(X))
print(type(clf))
print(clf.class_prior_)
print(clf.class_count_)
print(clf.theta_)
print(clf.sigma_)
######
### 示例:
######
# import numpy as np
# X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
# Y = np.array([1, 1, 1, 2, 2, 2])
# from sklearn.naive_bayes import GaussianNB
# clf = GaussianNB()
# clf.fit(X, Y)
# print(clf.predict([[-0.8, -1]]))
#
# clf_pf = GaussianNB()
# clf_pf.partial_fit(X, Y, np.unique(Y))
# print(clf_pf.predict([[-0.8, -1]]))
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 0 5.1 3.5 1.4 0.2
## 1 4.9 3.0 1.4 0.2
## 2 4.7 3.2 1.3 0.2
## 3 4.6 3.1 1.5 0.2
## 4 5.0 3.6 1.4 0.2
## (150, 4)
## <class 'pandas.core.frame.DataFrame'>
## 0.0010025501251220703
## ['setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
## 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
## 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
## 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
## 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
## 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
## 'setosa' 'setosa' 'versicolor' 'versicolor' 'virginica' 'versicolor'
## 'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
## 'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
## 'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
## 'versicolor' 'virginica' 'versicolor' 'versicolor' 'versicolor'
## 'versicolor' 'versicolor' 'versicolor' 'virginica' 'versicolor'
## 'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
## 'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
## 'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
## 'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
## 'versicolor' 'virginica' 'virginica' 'virginica' 'virginica' 'virginica'
## 'virginica' 'versicolor' 'virginica' 'virginica' 'virginica' 'virginica'
## 'virginica' 'virginica' 'virginica' 'virginica' 'virginica' 'virginica'
## 'virginica' 'virginica' 'versicolor' 'virginica' 'virginica' 'virginica'
## 'virginica' 'virginica' 'virginica' 'virginica' 'virginica' 'virginica'
## 'virginica' 'virginica' 'virginica' 'virginica' 'versicolor' 'virginica'
## 'virginica' 'virginica' 'virginica' 'virginica' 'virginica' 'virginica'
## 'virginica' 'virginica' 'virginica' 'virginica' 'virginica' 'virginica'
## 'virginica' 'virginica' 'virginica']
## <class 'sklearn.naive_bayes.GaussianNB'>
## [ 0.33333333 0.33333333 0.33333333]
## [ 50. 50. 50.]
## [[ 5.006 3.428 1.462 0.246]
## [ 5.936 2.77 4.26 1.326]
## [ 6.588 2.974 5.552 2.026]]
## [[ 0.121764 0.140816 0.029556 0.010884]
## [ 0.261104 0.0965 0.2164 0.038324]
## [ 0.396256 0.101924 0.298496 0.073924]]
# !/usr/bin/env python
# -*- coding: utf-8 -*-
# __author__ = "abdata"
import time
import numpy as np
import pandas as pd
from sklearn.naive_bayes import MultinomialNB
# from sklearn import datasets
# import matplotlib.pyplot as plt
mdata = pd.read_csv('http://data.galaxystatistics.com/blog_data/regression/iris.csv')
X = mdata.iloc[:,2:6]
print(X.head())
print(X.shape)
print(type(X))
y = mdata.iloc[:,1]
t0 = time.time()
clf = MultinomialNB()
clf.fit(X, y)
t = time.time() - t0
print(t)
print(clf.predict(X))
print(type(clf))
print(clf.class_log_prior_)
print(clf.intercept_)
print(clf.coef_)
print(clf.class_count_)
print(clf.feature_count_)
######
### 示例:
######
# import numpy as np
# X = np.random.randint(5, size=(6, 100))
# print(X)
# y = np.array([1, 2, 3, 4, 5, 6])
# print(y)
# from sklearn.naive_bayes import MultinomialNB
# clf = MultinomialNB()
# clf.fit(X, y)
# print(X[2:3])
# print(clf.predict(X[2:3]))
# print(clf.predict(X[2:4]))
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 0 5.1 3.5 1.4 0.2
## 1 4.9 3.0 1.4 0.2
## 2 4.7 3.2 1.3 0.2
## 3 4.6 3.1 1.5 0.2
## 4 5.0 3.6 1.4 0.2
## (150, 4)
## <class 'pandas.core.frame.DataFrame'>
## 0.004511117935180664
## ['setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
## 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
## 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
## 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
## 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
## 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
## 'setosa' 'setosa' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
## 'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
## 'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
## 'versicolor' 'versicolor' 'versicolor' 'versicolor' 'virginica'
## 'versicolor' 'virginica' 'versicolor' 'virginica' 'versicolor'
## 'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
## 'versicolor' 'versicolor' 'versicolor' 'versicolor' 'virginica'
## 'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
## 'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
## 'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
## 'versicolor' 'virginica' 'virginica' 'virginica' 'virginica' 'virginica'
## 'virginica' 'virginica' 'virginica' 'virginica' 'virginica' 'virginica'
## 'virginica' 'virginica' 'virginica' 'virginica' 'virginica' 'virginica'
## 'virginica' 'virginica' 'virginica' 'virginica' 'virginica' 'virginica'
## 'virginica' 'virginica' 'virginica' 'virginica' 'virginica' 'virginica'
## 'versicolor' 'virginica' 'versicolor' 'virginica' 'versicolor' 'virginica'
## 'virginica' 'virginica' 'virginica' 'virginica' 'virginica' 'virginica'
## 'virginica' 'virginica' 'virginica' 'virginica' 'virginica' 'virginica'
## 'virginica' 'virginica' 'virginica']
## <class 'sklearn.naive_bayes.MultinomialNB'>
## [-1.09861229 -1.09861229 -1.09861229]
## [-1.09861229 -1.09861229 -1.09861229]
## [[-0.70991782 -1.08674791 -1.93114973 -3.64880123]
## [-0.88088275 -1.63924027 -1.21132886 -2.36814464]
## [-0.95779046 -1.74946121 -1.12831744 -2.13018483]]
## [ 50. 50. 50.]
## [[ 250.3 171.4 73.1 12.3]
## [ 296.8 138.5 213. 66.3]
## [ 329.4 148.7 277.6 101.3]]
# !/usr/bin/env python
# -*- coding: utf-8 -*-
# __author__ = "abdata"
import time
import numpy as np
import pandas as pd
from sklearn.naive_bayes import BernoulliNB
# from sklearn import datasets
# import matplotlib.pyplot as plt
mdata = pd.read_csv('http://data.galaxystatistics.com/blog_data/regression/iris.csv')
X = mdata.iloc[:,2:6]
print(X.head())
print(X.shape)
print(type(X))
y = mdata.iloc[:,1]
t0 = time.time()
clf = BernoulliNB()
clf.fit(X, y)
t = time.time() - t0
print(t)
print(clf.predict(X))
print(type(clf))
print(clf.class_log_prior_)
print(clf.feature_log_prob_)
print(clf.class_count_)
print(clf.feature_count_)
######
### 示例:
######
# import numpy as np
# X = np.random.randint(2, size=(6, 100))
# Y = np.array([1, 2, 3, 4, 4, 5])
# from sklearn.naive_bayes import BernoulliNB
# clf = BernoulliNB()
# clf.fit(X, Y)
#
# print(clf.predict(X[2:3]))
# print(clf.predict(X[2:5]))
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 0 5.1 3.5 1.4 0.2
## 1 4.9 3.0 1.4 0.2
## 2 4.7 3.2 1.3 0.2
## 3 4.6 3.1 1.5 0.2
## 4 5.0 3.6 1.4 0.2
## (150, 4)
## <class 'pandas.core.frame.DataFrame'>
## 0.004010915756225586
## ['setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
## 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
## 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
## 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
## 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
## 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
## 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
## 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
## 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
## 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
## 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
## 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
## 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
## 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
## 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
## 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
## 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
## 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
## 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa']
## <class 'sklearn.naive_bayes.BernoulliNB'>
## [-1.09861229 -1.09861229 -1.09861229]
## [[-0.01941809 -0.01941809 -0.01941809 -0.01941809]
## [-0.01941809 -0.01941809 -0.01941809 -0.01941809]
## [-0.01941809 -0.01941809 -0.01941809 -0.01941809]]
## [ 50. 50. 50.]
## [[ 50. 50. 50. 50.]
## [ 50. 50. 50. 50.]
## [ 50. 50. 50. 50.]]