http://scikit-learn.org/stable/modules/tree.html#classification
# !/usr/bin/env python
# -*- coding: utf-8 -*-
# __author__ = "abdata"
import time
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
# from sklearn import datasets
# import matplotlib.pyplot as plt
mdata = pd.read_csv('http://data.galaxystatistics.com/blog_data/regression/iris.csv')
X = mdata.iloc[:,2:6]
print(X.head())
print(X.shape)
print(type(X))
y = mdata.iloc[:,1]
t0 = time.time()
clf = DecisionTreeClassifier()
clf = clf.fit(X, y)
t = time.time() - t0
print(t)
print(clf.predict(X))
print(clf.classes_)
print(clf.feature_importances_)
print(clf.max_features_)
print(clf.n_classes_)
print(clf.n_features_)
print(clf.n_outputs_)
print(clf.tree_)
print(type(clf))
# from sklearn import datasets
# iris = datasets.load_iris()
# X = iris.data[:, [0, 2]]
# y = iris.target
# print(X)
# print(y)
# clf = DecisionTreeClassifier()
# clf = clf.fit(X, y)
#
# print(clf.classes_)
# print(clf.feature_importances_)
# print(clf.max_features_)
# print(clf.n_classes_)
# print(clf.n_features_)
# print(clf.n_outputs_)
# print(clf.tree_)
#
# X1 = iris.data[:, [2, 3]]
# print(X1)
# print(clf.predict(X1))
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 0 5.1 3.5 1.4 0.2
## 1 4.9 3.0 1.4 0.2
## 2 4.7 3.2 1.3 0.2
## 3 4.6 3.1 1.5 0.2
## 4 5.0 3.6 1.4 0.2
## (150, 4)
## <class 'pandas.core.frame.DataFrame'>
## 0.0010027885437011719
## ['setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
## 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
## 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
## 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
## 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
## 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
## 'setosa' 'setosa' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
## 'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
## 'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
## 'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
## 'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
## 'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
## 'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
## 'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
## 'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
## 'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
## 'versicolor' 'virginica' 'virginica' 'virginica' 'virginica' 'virginica'
## 'virginica' 'virginica' 'virginica' 'virginica' 'virginica' 'virginica'
## 'virginica' 'virginica' 'virginica' 'virginica' 'virginica' 'virginica'
## 'virginica' 'virginica' 'virginica' 'virginica' 'virginica' 'virginica'
## 'virginica' 'virginica' 'virginica' 'virginica' 'virginica' 'virginica'
## 'virginica' 'virginica' 'virginica' 'virginica' 'virginica' 'virginica'
## 'virginica' 'virginica' 'virginica' 'virginica' 'virginica' 'virginica'
## 'virginica' 'virginica' 'virginica' 'virginica' 'virginica' 'virginica'
## 'virginica' 'virginica' 'virginica']
## ['setosa' 'versicolor' 'virginica']
## [ 0.02666667 0. 0.05072262 0.92261071]
## 4
## 3
## 4
## 1
## <sklearn.tree._tree.Tree object at 0x0000026EE75B91D0>
## <class 'sklearn.tree.tree.DecisionTreeClassifier'>
# !/usr/bin/env python
# -*- coding: utf-8 -*-
# __author__ = "abdata"
import time
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeRegressor
# from sklearn import datasets
# import matplotlib.pyplot as plt
mdata = pd.read_csv('http://data.galaxystatistics.com/blog_data/regression/iris.csv')
X = mdata.iloc[:,3:6]
print(X.head())
print(X.shape)
print(type(X))
y = mdata.iloc[:,2]
t0 = time.time()
clf = DecisionTreeRegressor()
clf = clf.fit(X, y)
t = time.time() - t0
print(t)
print(clf.predict(X))
print(clf.feature_importances_)
print(clf.max_features_)
print(clf.n_classes_)
print(clf.n_features_)
print(clf.n_outputs_)
print(clf.tree_)
print(type(clf))
## Sepal.Width Petal.Length Petal.Width
## 0 3.5 1.4 0.2
## 1 3.0 1.4 0.2
## 2 3.2 1.3 0.2
## 3 3.1 1.5 0.2
## 4 3.6 1.4 0.2
## (150, 3)
## <class 'pandas.core.frame.DataFrame'>
## 0.0005009174346923828
## [ 5.1 4.9 4.55 4.75 5. 5.4 4.6 5.05 4.4 4.9 5.35 4.8
## 4.8 4.3 5.8 5.7 5.4 5.1 5.7 5.1 5.4 5.1 4.6 5.1
## 4.8 5. 5. 5.2 5.2 4.7 4.8 5.4 5.2 5.5 4.75 5. 5.5
## 4.9 4.4 5.05 5. 4.5 4.55 5. 5.1 4.8 5.1 4.6 5.35
## 5. 7. 6.4 6.9 5.5 6.5 5.7 6.3 4.9 6.6 5.2 5. 5.9
## 6. 6.1 5.6 6.7 5.5 5.8 6.2 5.6 5.9 6.1 6.3 6.1
## 6.3 6.6 6.8 6.7 6. 5.7 5.5 5.5 5.8 6. 5.5 6. 6.7
## 6.3 5.6 5.5 5.5 6.1 5.8 5. 5.6 5.7 5.7 6.3 5.1
## 5.7 6.3 5.8 7.1 6.3 6.5 7.6 4.9 7.3 6.7 7.2 6.5
## 6.4 6.8 5.7 5.8 6.4 6.5 7.7 7.7 6. 6.9 5.6 7.7
## 6.3 6.7 7.2 6.2 6.1 6.4 7.2 7.4 7.9 6.4 6.3 6.1
## 7.7 6.3 6.4 6. 6.9 6.7 6.9 5.8 6.8 6.7 6.7 6.3
## 6.5 6.2 5.9 ]
## [ 0.08265498 0.86853604 0.04880897]
## 3
## 1
## 3
## 1
## <sklearn.tree._tree.Tree object at 0x000001CAE0A48D30>
## <class 'sklearn.tree.tree.DecisionTreeRegressor'>