http://scikit-learn.org/stable/modules/naive_bayes.html

高斯朴素贝叶斯-分类-GaussianNB

http://scikit-learn.org/stable/modules/generated/sklearn.naive_bayes.GaussianNB.html#sklearn.naive_bayes.GaussianNB

# !/usr/bin/env python
# -*- coding: utf-8 -*-
# __author__ = "abdata"

import time
import numpy as np
import pandas as pd
from sklearn.naive_bayes import GaussianNB
# from sklearn import datasets
# import matplotlib.pyplot as plt

mdata = pd.read_csv('http://data.galaxystatistics.com/blog_data/regression/iris.csv')

X = mdata.iloc[:,2:6]
print(X.head())
print(X.shape)
print(type(X))
y = mdata.iloc[:,1]

t0 = time.time()
clf = GaussianNB()
clf.fit(X, y)
t = time.time() - t0

print(t)
print(clf.predict(X))
print(type(clf))

print(clf.class_prior_)
print(clf.class_count_)
print(clf.theta_)
print(clf.sigma_)

######
### 示例:
######
# import numpy as np
# X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
# Y = np.array([1, 1, 1, 2, 2, 2])
# from sklearn.naive_bayes import GaussianNB
# clf = GaussianNB()
# clf.fit(X, Y)
# print(clf.predict([[-0.8, -1]]))
# 
# clf_pf = GaussianNB()
# clf_pf.partial_fit(X, Y, np.unique(Y))
# print(clf_pf.predict([[-0.8, -1]]))
##    Sepal.Length  Sepal.Width  Petal.Length  Petal.Width
## 0           5.1          3.5           1.4          0.2
## 1           4.9          3.0           1.4          0.2
## 2           4.7          3.2           1.3          0.2
## 3           4.6          3.1           1.5          0.2
## 4           5.0          3.6           1.4          0.2
## (150, 4)
## <class 'pandas.core.frame.DataFrame'>
## 0.0010025501251220703
## ['setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
##  'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
##  'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
##  'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
##  'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
##  'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
##  'setosa' 'setosa' 'versicolor' 'versicolor' 'virginica' 'versicolor'
##  'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
##  'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
##  'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
##  'versicolor' 'virginica' 'versicolor' 'versicolor' 'versicolor'
##  'versicolor' 'versicolor' 'versicolor' 'virginica' 'versicolor'
##  'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
##  'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
##  'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
##  'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
##  'versicolor' 'virginica' 'virginica' 'virginica' 'virginica' 'virginica'
##  'virginica' 'versicolor' 'virginica' 'virginica' 'virginica' 'virginica'
##  'virginica' 'virginica' 'virginica' 'virginica' 'virginica' 'virginica'
##  'virginica' 'virginica' 'versicolor' 'virginica' 'virginica' 'virginica'
##  'virginica' 'virginica' 'virginica' 'virginica' 'virginica' 'virginica'
##  'virginica' 'virginica' 'virginica' 'virginica' 'versicolor' 'virginica'
##  'virginica' 'virginica' 'virginica' 'virginica' 'virginica' 'virginica'
##  'virginica' 'virginica' 'virginica' 'virginica' 'virginica' 'virginica'
##  'virginica' 'virginica' 'virginica']
## <class 'sklearn.naive_bayes.GaussianNB'>
## [ 0.33333333  0.33333333  0.33333333]
## [ 50.  50.  50.]
## [[ 5.006  3.428  1.462  0.246]
##  [ 5.936  2.77   4.26   1.326]
##  [ 6.588  2.974  5.552  2.026]]
## [[ 0.121764  0.140816  0.029556  0.010884]
##  [ 0.261104  0.0965    0.2164    0.038324]
##  [ 0.396256  0.101924  0.298496  0.073924]]

多项式朴素贝叶斯-分类-MultinomialNB

http://scikit-learn.org/stable/modules/generated/sklearn.naive_bayes.MultinomialNB.html#sklearn.naive_bayes.MultinomialNB

# !/usr/bin/env python
# -*- coding: utf-8 -*-
# __author__ = "abdata"

import time
import numpy as np
import pandas as pd
from sklearn.naive_bayes import MultinomialNB
# from sklearn import datasets
# import matplotlib.pyplot as plt

mdata = pd.read_csv('http://data.galaxystatistics.com/blog_data/regression/iris.csv')

X = mdata.iloc[:,2:6]
print(X.head())
print(X.shape)
print(type(X))
y = mdata.iloc[:,1]

t0 = time.time()
clf = MultinomialNB()
clf.fit(X, y)
t = time.time() - t0

print(t)
print(clf.predict(X))
print(type(clf))

print(clf.class_log_prior_)
print(clf.intercept_)
print(clf.coef_)
print(clf.class_count_)
print(clf.feature_count_)


######
### 示例:
######
# import numpy as np
# X = np.random.randint(5, size=(6, 100))
# print(X)
# y = np.array([1, 2, 3, 4, 5, 6])
# print(y)
# from sklearn.naive_bayes import MultinomialNB
# clf = MultinomialNB()
# clf.fit(X, y)
# print(X[2:3])
# print(clf.predict(X[2:3]))
# print(clf.predict(X[2:4]))
##    Sepal.Length  Sepal.Width  Petal.Length  Petal.Width
## 0           5.1          3.5           1.4          0.2
## 1           4.9          3.0           1.4          0.2
## 2           4.7          3.2           1.3          0.2
## 3           4.6          3.1           1.5          0.2
## 4           5.0          3.6           1.4          0.2
## (150, 4)
## <class 'pandas.core.frame.DataFrame'>
## 0.004511117935180664
## ['setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
##  'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
##  'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
##  'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
##  'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
##  'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
##  'setosa' 'setosa' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
##  'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
##  'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
##  'versicolor' 'versicolor' 'versicolor' 'versicolor' 'virginica'
##  'versicolor' 'virginica' 'versicolor' 'virginica' 'versicolor'
##  'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
##  'versicolor' 'versicolor' 'versicolor' 'versicolor' 'virginica'
##  'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
##  'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
##  'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
##  'versicolor' 'virginica' 'virginica' 'virginica' 'virginica' 'virginica'
##  'virginica' 'virginica' 'virginica' 'virginica' 'virginica' 'virginica'
##  'virginica' 'virginica' 'virginica' 'virginica' 'virginica' 'virginica'
##  'virginica' 'virginica' 'virginica' 'virginica' 'virginica' 'virginica'
##  'virginica' 'virginica' 'virginica' 'virginica' 'virginica' 'virginica'
##  'versicolor' 'virginica' 'versicolor' 'virginica' 'versicolor' 'virginica'
##  'virginica' 'virginica' 'virginica' 'virginica' 'virginica' 'virginica'
##  'virginica' 'virginica' 'virginica' 'virginica' 'virginica' 'virginica'
##  'virginica' 'virginica' 'virginica']
## <class 'sklearn.naive_bayes.MultinomialNB'>
## [-1.09861229 -1.09861229 -1.09861229]
## [-1.09861229 -1.09861229 -1.09861229]
## [[-0.70991782 -1.08674791 -1.93114973 -3.64880123]
##  [-0.88088275 -1.63924027 -1.21132886 -2.36814464]
##  [-0.95779046 -1.74946121 -1.12831744 -2.13018483]]
## [ 50.  50.  50.]
## [[ 250.3  171.4   73.1   12.3]
##  [ 296.8  138.5  213.    66.3]
##  [ 329.4  148.7  277.6  101.3]]

伯努利的朴素贝叶斯-分类-BernoulliNB

http://scikit-learn.org/stable/modules/generated/sklearn.naive_bayes.BernoulliNB.html#sklearn.naive_bayes.BernoulliNB

# !/usr/bin/env python
# -*- coding: utf-8 -*-
# __author__ = "abdata"

import time
import numpy as np
import pandas as pd
from sklearn.naive_bayes import BernoulliNB
# from sklearn import datasets
# import matplotlib.pyplot as plt

mdata = pd.read_csv('http://data.galaxystatistics.com/blog_data/regression/iris.csv')

X = mdata.iloc[:,2:6]
print(X.head())
print(X.shape)
print(type(X))
y = mdata.iloc[:,1]

t0 = time.time()
clf = BernoulliNB()
clf.fit(X, y)
t = time.time() - t0

print(t)
print(clf.predict(X))
print(type(clf))

print(clf.class_log_prior_)
print(clf.feature_log_prob_)
print(clf.class_count_)
print(clf.feature_count_)

######
### 示例:
######
# import numpy as np
# X = np.random.randint(2, size=(6, 100))
# Y = np.array([1, 2, 3, 4, 4, 5])
# from sklearn.naive_bayes import BernoulliNB
# clf = BernoulliNB()
# clf.fit(X, Y)
# 
# print(clf.predict(X[2:3]))
# print(clf.predict(X[2:5]))
##    Sepal.Length  Sepal.Width  Petal.Length  Petal.Width
## 0           5.1          3.5           1.4          0.2
## 1           4.9          3.0           1.4          0.2
## 2           4.7          3.2           1.3          0.2
## 3           4.6          3.1           1.5          0.2
## 4           5.0          3.6           1.4          0.2
## (150, 4)
## <class 'pandas.core.frame.DataFrame'>
## 0.004010915756225586
## ['setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
##  'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
##  'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
##  'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
##  'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
##  'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
##  'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
##  'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
##  'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
##  'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
##  'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
##  'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
##  'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
##  'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
##  'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
##  'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
##  'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
##  'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
##  'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa']
## <class 'sklearn.naive_bayes.BernoulliNB'>
## [-1.09861229 -1.09861229 -1.09861229]
## [[-0.01941809 -0.01941809 -0.01941809 -0.01941809]
##  [-0.01941809 -0.01941809 -0.01941809 -0.01941809]
##  [-0.01941809 -0.01941809 -0.01941809 -0.01941809]]
## [ 50.  50.  50.]
## [[ 50.  50.  50.  50.]
##  [ 50.  50.  50.  50.]
##  [ 50.  50.  50.  50.]]