http://scikit-learn.org/stable/modules/tree.html#classification

分类树-DecisionTreeClassifier

http://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html#sklearn.tree.DecisionTreeClassifier

# !/usr/bin/env python
# -*- coding: utf-8 -*-
# __author__ = "abdata"

import time
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
# from sklearn import datasets
# import matplotlib.pyplot as plt

mdata = pd.read_csv('http://data.galaxystatistics.com/blog_data/regression/iris.csv')

X = mdata.iloc[:,2:6]
print(X.head())
print(X.shape)
print(type(X))
y = mdata.iloc[:,1]

t0 = time.time()
clf  = DecisionTreeClassifier()
clf = clf.fit(X, y)
t = time.time() - t0

print(t)

print(clf.predict(X))

print(clf.classes_)
print(clf.feature_importances_)
print(clf.max_features_)
print(clf.n_classes_)
print(clf.n_features_)
print(clf.n_outputs_)
print(clf.tree_)

print(type(clf))


# from sklearn import datasets
# iris = datasets.load_iris()
# X = iris.data[:, [0, 2]]
# y = iris.target
# print(X)
# print(y)
# clf = DecisionTreeClassifier()
# clf = clf.fit(X, y)
# 
# print(clf.classes_)
# print(clf.feature_importances_)
# print(clf.max_features_)
# print(clf.n_classes_)
# print(clf.n_features_)
# print(clf.n_outputs_)
# print(clf.tree_)
# 
# X1 = iris.data[:, [2, 3]]
# print(X1)
# print(clf.predict(X1))
##    Sepal.Length  Sepal.Width  Petal.Length  Petal.Width
## 0           5.1          3.5           1.4          0.2
## 1           4.9          3.0           1.4          0.2
## 2           4.7          3.2           1.3          0.2
## 3           4.6          3.1           1.5          0.2
## 4           5.0          3.6           1.4          0.2
## (150, 4)
## <class 'pandas.core.frame.DataFrame'>
## 0.0010027885437011719
## ['setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
##  'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
##  'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
##  'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
##  'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
##  'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
##  'setosa' 'setosa' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
##  'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
##  'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
##  'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
##  'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
##  'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
##  'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
##  'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
##  'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
##  'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
##  'versicolor' 'virginica' 'virginica' 'virginica' 'virginica' 'virginica'
##  'virginica' 'virginica' 'virginica' 'virginica' 'virginica' 'virginica'
##  'virginica' 'virginica' 'virginica' 'virginica' 'virginica' 'virginica'
##  'virginica' 'virginica' 'virginica' 'virginica' 'virginica' 'virginica'
##  'virginica' 'virginica' 'virginica' 'virginica' 'virginica' 'virginica'
##  'virginica' 'virginica' 'virginica' 'virginica' 'virginica' 'virginica'
##  'virginica' 'virginica' 'virginica' 'virginica' 'virginica' 'virginica'
##  'virginica' 'virginica' 'virginica' 'virginica' 'virginica' 'virginica'
##  'virginica' 'virginica' 'virginica']
## ['setosa' 'versicolor' 'virginica']
## [ 0.02666667  0.          0.05072262  0.92261071]
## 4
## 3
## 4
## 1
## <sklearn.tree._tree.Tree object at 0x0000026EE75B91D0>
## <class 'sklearn.tree.tree.DecisionTreeClassifier'>

回归树-DecisionTreeRegressor

http://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeRegressor.html#sklearn.tree.DecisionTreeRegressor

# !/usr/bin/env python
# -*- coding: utf-8 -*-
# __author__ = "abdata"

import time
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeRegressor
# from sklearn import datasets
# import matplotlib.pyplot as plt

mdata = pd.read_csv('http://data.galaxystatistics.com/blog_data/regression/iris.csv')

X = mdata.iloc[:,3:6]
print(X.head())
print(X.shape)
print(type(X))
y = mdata.iloc[:,2]

t0 = time.time()
clf  = DecisionTreeRegressor()
clf = clf.fit(X, y)
t = time.time() - t0

print(t)

print(clf.predict(X))

print(clf.feature_importances_)
print(clf.max_features_)
print(clf.n_classes_)
print(clf.n_features_)
print(clf.n_outputs_)
print(clf.tree_)

print(type(clf))
##    Sepal.Width  Petal.Length  Petal.Width
## 0          3.5           1.4          0.2
## 1          3.0           1.4          0.2
## 2          3.2           1.3          0.2
## 3          3.1           1.5          0.2
## 4          3.6           1.4          0.2
## (150, 3)
## <class 'pandas.core.frame.DataFrame'>
## 0.0005009174346923828
## [ 5.1   4.9   4.55  4.75  5.    5.4   4.6   5.05  4.4   4.9   5.35  4.8
##   4.8   4.3   5.8   5.7   5.4   5.1   5.7   5.1   5.4   5.1   4.6   5.1
##   4.8   5.    5.    5.2   5.2   4.7   4.8   5.4   5.2   5.5   4.75  5.    5.5
##   4.9   4.4   5.05  5.    4.5   4.55  5.    5.1   4.8   5.1   4.6   5.35
##   5.    7.    6.4   6.9   5.5   6.5   5.7   6.3   4.9   6.6   5.2   5.    5.9
##   6.    6.1   5.6   6.7   5.5   5.8   6.2   5.6   5.9   6.1   6.3   6.1
##   6.3   6.6   6.8   6.7   6.    5.7   5.5   5.5   5.8   6.    5.5   6.    6.7
##   6.3   5.6   5.5   5.5   6.1   5.8   5.    5.6   5.7   5.7   6.3   5.1
##   5.7   6.3   5.8   7.1   6.3   6.5   7.6   4.9   7.3   6.7   7.2   6.5
##   6.4   6.8   5.7   5.8   6.4   6.5   7.7   7.7   6.    6.9   5.6   7.7
##   6.3   6.7   7.2   6.2   6.1   6.4   7.2   7.4   7.9   6.4   6.3   6.1
##   7.7   6.3   6.4   6.    6.9   6.7   6.9   5.8   6.8   6.7   6.7   6.3
##   6.5   6.2   5.9 ]
## [ 0.08265498  0.86853604  0.04880897]
## 3
## 1
## 3
## 1
## <sklearn.tree._tree.Tree object at 0x000001CAE0A48D30>
## <class 'sklearn.tree.tree.DecisionTreeRegressor'>