Pima Indians Diabetes Database

In [1]:
from IPython.display import HTML
HTML('''
<script>
  function code_toggle() {
    if (code_shown){
      $('div.input').hide('500');
      $('#toggleButton').val('Show Code')
    } else {
      $('div.input').show('500');
      $('#toggleButton').val('Hide Code')
    }
    code_shown = !code_shown
  }

  $( document ).ready(function(){
    code_shown=true;
  });
</script>
<form action="javascript:code_toggle()"><input type="submit" id="toggleButton" value="Hide Code"></form>''')
Out[1]:
In [2]:
import os
print(os.listdir("../input"))
['diabetes.csv']

Preprocessing

In [3]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from keras.utils import np_utils
Using TensorFlow backend.
In [4]:
import warnings
warnings.filterwarnings('ignore')

df = pd.read_csv("../input/diabetes.csv")
#df = df.drop('Unnamed: 0', axis=1)
print(df.head())
print(df.shape)
print(df.columns)
Pregnancies  Glucose   ...     Age  Outcome
0            6      148   ...      50        1
1            1       85   ...      31        0
2            8      183   ...      32        1
3            1       89   ...      21        0
4            0      137   ...      33        1

[5 rows x 9 columns]
(768, 9)
Index(['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
   'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'],
  dtype='object')

Data Visualization

In [5]:
import seaborn as sns
import matplotlib.pyplot as plt
In [6]:
import seaborn as sns

corr=df.corr()
sns.heatmap(corr, xticklabels=corr.columns.values, yticklabels=corr.columns.values)
Out[6]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f8574669390>

Machine Learning

In [7]:
# Thanks to: https://scikit-learn.org/stable/auto_examples/classification/plot_classifier_comparison.html

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.decomposition import PCA

h = .02  # step size in the mesh

names = ["Nearest Neighbors", "Linear SVM", "RBF SVM", "Gaussian Process",
     "Decision Tree", "Random Forest", "Neural Net", "AdaBoost",
     "Naive Bayes", "QDA"]

classifiers = [
KNeighborsClassifier(3),
SVC(kernel="linear", C=0.025),
SVC(gamma=2, C=1),
GaussianProcessClassifier(1.0 * RBF(1.0)),
DecisionTreeClassifier(max_depth=5),
RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
MLPClassifier(alpha=1, max_iter=1000),
AdaBoostClassifier(),
GaussianNB(),
QuadraticDiscriminantAnalysis()]

X = df.drop(['Outcome'], axis = 1).values
pca = PCA(n_components=2,svd_solver='full')
X = pca.fit_transform(X)
y = df['Outcome']


# X, y = make_classification(n_features=2, n_redundant=0, n_informative=2,
#                            random_state=1, n_clusters_per_class=1)
rng = np.random.RandomState(2)
#X += 2 * rng.uniform(size=X.shape)
#linearly_separable = (X, y)

datasets = [df]

figure = plt.figure(figsize=(27, 9))
i = 1
# iterate over datasets
for ds_cnt, ds in enumerate(datasets):
# preprocess dataset, split into training and test part
#X, y = ds
X = StandardScaler().fit_transform(X)
X_train, X_test, y_train, y_test = \
    train_test_split(X, y, test_size=.3, random_state=42)

x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                     np.arange(y_min, y_max, h))

# just plot the dataset first
cm = plt.cm.RdBu
cm_bright = ListedColormap(['#FF0000', '#0000FF'])
ax = plt.subplot(len(datasets), len(classifiers) + 1, i)
if ds_cnt == 0:
    ax.set_title("Input data")
# Plot the training points
ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright,
           edgecolors='k')
# Plot the testing points
ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6,
           edgecolors='k')
ax.set_xlim(xx.min(), xx.max())
ax.set_ylim(yy.min(), yy.max())
ax.set_xticks(())
ax.set_yticks(())
i += 1

# iterate over classifiers
for name, clf in zip(names, classifiers):
    ax = plt.subplot(len(datasets), len(classifiers) + 1, i)
    clf.fit(X_train, y_train)
    score = clf.score(X_test, y_test)

    # Plot the decision boundary. For that, we will assign a color to each
    # point in the mesh [x_min, x_max]x[y_min, y_max].
    if hasattr(clf, "decision_function"):
        Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
    else:
        Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]

    # Put the result into a color plot
    Z = Z.reshape(xx.shape)
    ax.contourf(xx, yy, Z, cmap=cm, alpha=.8)

    # Plot the training points
    ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright,
               edgecolors='k')
    # Plot the testing points
    ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright,
               edgecolors='k', alpha=0.6)

    ax.set_xlim(xx.min(), xx.max())
    ax.set_ylim(yy.min(), yy.max())
    ax.set_xticks(())
    ax.set_yticks(())
    if ds_cnt == 0:
        ax.set_title(name)
    ax.text(xx.max() - .3, yy.min() + .3, ('%.2f' % score).lstrip('0'),
            size=15, horizontalalignment='right')
    i += 1

plt.tight_layout()
plt.show()
In [8]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

X = df.drop(['Outcome'], axis = 1).values
Y = df['Outcome']

X = StandardScaler().fit_transform(X)

X_Train, X_Test, Y_Train, Y_Test = train_test_split(X, Y, test_size = 0.30, random_state = 101)
In [9]:
# Preprocessing :
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report,confusion_matrix
from itertools import product

# Classifiers
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm
from sklearn import tree
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA

Logistic Regression

In [10]:
trainedmodel = LogisticRegression().fit(X_Train,Y_Train)
predictions =trainedmodel.predict(X_Test)
print(confusion_matrix(Y_Test,predictions))
print(classification_report(Y_Test,predictions))
[[133  17]
[ 32  49]]
          precision    recall  f1-score   support

       0       0.81      0.89      0.84       150
       1       0.74      0.60      0.67        81

micro avg       0.79      0.79      0.79       231
macro avg       0.77      0.75      0.76       231
weighted avg       0.78      0.79      0.78       231

Random Forest

In [11]:
trainedforest = RandomForestClassifier(n_estimators=700).fit(X_Train,Y_Train)
predictionforest = trainedforest.predict(X_Test)
print(confusion_matrix(Y_Test,predictionforest))
print(classification_report(Y_Test,predictionforest))
[[130  20]
[ 30  51]]
          precision    recall  f1-score   support

       0       0.81      0.87      0.84       150
       1       0.72      0.63      0.67        81

micro avg       0.78      0.78      0.78       231
macro avg       0.77      0.75      0.75       231
weighted avg       0.78      0.78      0.78       231

Support Vector Machines

In [12]:
trainedsvm = svm.LinearSVC().fit(X_Train, Y_Train)
predictionsvm = trainedsvm.predict(X_Test)
print(confusion_matrix(Y_Test,predictionsvm))
print(classification_report(Y_Test,predictionsvm))
[[133  17]
[ 33  48]]
          precision    recall  f1-score   support

       0       0.80      0.89      0.84       150
       1       0.74      0.59      0.66        81

micro avg       0.78      0.78      0.78       231
macro avg       0.77      0.74      0.75       231
weighted avg       0.78      0.78      0.78       231

/opt/conda/lib/python3.6/site-packages/sklearn/svm/base.py:931: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
"the number of iterations.", ConvergenceWarning)

Decision Tree

In [13]:
trainedtree = tree.DecisionTreeClassifier().fit(X_Train, Y_Train)
predictionstree = trainedtree.predict(X_Test)
print(confusion_matrix(Y_Test,predictionstree))
print(classification_report(Y_Test,predictionstree))
[[112  38]
[ 31  50]]
          precision    recall  f1-score   support

       0       0.78      0.75      0.76       150
       1       0.57      0.62      0.59        81

micro avg       0.70      0.70      0.70       231
macro avg       0.68      0.68      0.68       231
weighted avg       0.71      0.70      0.70       231

In [14]:
import graphviz
from sklearn.tree import DecisionTreeClassifier, export_graphviz

data = export_graphviz(trainedtree,out_file=None,feature_names=df.drop(['Outcome'], axis = 1).columns,
                   class_names=['0', '1'],  
                   filled=True, rounded=True,  
                   max_depth=2,
                   special_characters=True)
graph = graphviz.Source(data)
graph
Out[14]:
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"> Tree 0 Glucose ≤ 1.052 gini = 0.454 samples = 537 value = [350, 187] class = 0 1 Age ≤ -0.488 gini = 0.387 samples = 453 value = [334, 119] class = 0 0->1 True 154 Age ≤ 2.234 gini = 0.308 samples = 84 value = [16, 68] class = 1 0->154 False 2 BMI ≤ -0.113 gini = 0.208 samples = 220 value = [194, 26] class = 0 1->2 57 Glucose ≤ -0.67 gini = 0.48 samples = 233 value = [140, 93] class = 0 1->57 3 (...) 2->3 14 (...) 2->14 58 (...) 57->58 71 (...) 57->71 155 Insulin ≤ 4.018 gini = 0.248 samples = 76 value = [11, 65] class = 1 154->155 182 BMI ≤ -0.164 gini = 0.469 samples = 8 value = [5, 3] class = 0 154->182 156 (...) 155->156 179 (...) 155->179 183 (...) 182->183 184 (...) 182->184

Linear Discriminant Anaylsis

In [15]:
trainedlda = LinearDiscriminantAnalysis().fit(X_Train, Y_Train)
predictionlda = trainedlda.predict(X_Test)
print(confusion_matrix(Y_Test,predictionlda))
print(classification_report(Y_Test,predictionlda))
[[133  17]
[ 33  48]]
          precision    recall  f1-score   support

       0       0.80      0.89      0.84       150
       1       0.74      0.59      0.66        81

micro avg       0.78      0.78      0.78       231
macro avg       0.77      0.74      0.75       231
weighted avg       0.78      0.78      0.78       231

Naive Bayes

In [16]:
trainednb = GaussianNB().fit(X_Train, Y_Train)
predictionnb = trainednb.predict(X_Test)
print(confusion_matrix(Y_Test,predictionnb))
print(classification_report(Y_Test,predictionnb))
[[124  26]
[ 29  52]]
          precision    recall  f1-score   support

       0       0.81      0.83      0.82       150
       1       0.67      0.64      0.65        81

micro avg       0.76      0.76      0.76       231
macro avg       0.74      0.73      0.74       231
weighted avg       0.76      0.76      0.76       231

XGBoost

In [17]:
from xgboost import XGBClassifier
from xgboost import plot_tree
import matplotlib.pyplot as plt
model = XGBClassifier()

# Train
model.fit(X_Train, Y_Train)

plot_tree(model)
plt.figure(figsize = (50,55))
plt.show()
<Figure size 3600x3960 with 0 Axes>
In [18]:
from itertools import product
import itertools

predictions =model.predict(X_Test)
print(confusion_matrix(Y_Test,predictions))
print(classification_report(Y_Test,predictions))

# Thanks to: https://www.kaggle.com/tejainece/data-visualization-and-machine-learning-algorithms
def plot_confusion_matrix(cm, classes=["0", "1"], title="",
                      cmap=plt.cm.Blues):
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title('Confusion matrix ' +title)
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, rotation=45)
plt.yticks(tick_marks, classes)
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
    plt.text(j, i, cm[i, j],
             horizontalalignment="center",
             color="white" if cm[i, j] > thresh else "black")

plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')

cm_plot = confusion_matrix(Y_Test,predictions)

plt.figure()
plot_confusion_matrix(cm_plot, title = 'XGBClassifier')
[[123  27]
[ 26  55]]
          precision    recall  f1-score   support

       0       0.83      0.82      0.82       150
       1       0.67      0.68      0.67        81

micro avg       0.77      0.77      0.77       231
macro avg       0.75      0.75      0.75       231
weighted avg       0.77      0.77      0.77       231

Feature Engineering

Principal Component Analysis

In [19]:
pca = PCA(n_components=2,svd_solver='full')
X_pca = pca.fit_transform(X)
# print(pca.explained_variance_)

X_reduced, X_test_reduced, Y_Train, Y_Test = train_test_split(X_pca, Y, test_size = 0.30, random_state = 101)

# pca = PCA(n_components=2,svd_solver='full')
# X_reduced = pca.fit_transform(X_Train)
#X_reduced = TSNE(n_components=2).fit_transform(X_Train, Y_Train)

trainednb = GaussianNB().fit(X_reduced, Y_Train)
trainedsvm = svm.LinearSVC().fit(X_reduced, Y_Train)
trainedforest = RandomForestClassifier(n_estimators=700).fit(X_reduced,Y_Train)
trainedmodel = LogisticRegression().fit(X_reduced,Y_Train)

# pca = PCA(n_components=2,svd_solver='full')
# X_test_reduced = pca.fit_transform(X_Test)
#X_test_reduced = TSNE(n_components=2).fit_transform(X_Test, Y_Test)

print('Naive Bayes')
predictionnb = trainednb.predict(X_test_reduced)
print(confusion_matrix(Y_Test,predictionnb))
print(classification_report(Y_Test,predictionnb))

print('SVM')
predictionsvm = trainedsvm.predict(X_test_reduced)
print(confusion_matrix(Y_Test,predictionsvm))
print(classification_report(Y_Test,predictionsvm))

print('Random Forest')
predictionforest = trainedforest.predict(X_test_reduced)
print(confusion_matrix(Y_Test,predictionforest))
print(classification_report(Y_Test,predictionforest))

print('Logistic Regression')
predictions =trainedmodel.predict(X_test_reduced)
print(confusion_matrix(Y_Test,predictions))
print(classification_report(Y_Test,predictions))
Naive Bayes
[[129  21]
[ 44  37]]
          precision    recall  f1-score   support

       0       0.75      0.86      0.80       150
       1       0.64      0.46      0.53        81

micro avg       0.72      0.72      0.72       231
macro avg       0.69      0.66      0.67       231
weighted avg       0.71      0.72      0.71       231

SVM
[[128  22]
[ 44  37]]
          precision    recall  f1-score   support

       0       0.74      0.85      0.80       150
       1       0.63      0.46      0.53        81

micro avg       0.71      0.71      0.71       231
macro avg       0.69      0.66      0.66       231
weighted avg       0.70      0.71      0.70       231

Random Forest
[[121  29]
[ 47  34]]
          precision    recall  f1-score   support

       0       0.72      0.81      0.76       150
       1       0.54      0.42      0.47        81

micro avg       0.67      0.67      0.67       231
macro avg       0.63      0.61      0.62       231
weighted avg       0.66      0.67      0.66       231

Logistic Regression
[[128  22]
[ 43  38]]
          precision    recall  f1-score   support

       0       0.75      0.85      0.80       150
       1       0.63      0.47      0.54        81

micro avg       0.72      0.72      0.72       231
macro avg       0.69      0.66      0.67       231
weighted avg       0.71      0.72      0.71       231

In [20]:
reduced_data = X_reduced

trainednb = GaussianNB().fit(reduced_data, Y_Train)
trainedsvm = svm.LinearSVC().fit(reduced_data, Y_Train)
trainedforest = RandomForestClassifier(n_estimators=700).fit(reduced_data,Y_Train)
trainedmodel = LogisticRegression().fit(reduced_data,Y_Train)

# Thanks to: https://scikit-learn.org/stable/auto_examples/ensemble/plot_voting_decision_regions.html

x_min, x_max = reduced_data[:, 0].min() - 1, reduced_data[:, 0].max() + 1
y_min, y_max = reduced_data[:, 1].min() - 1, reduced_data[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
                 np.arange(y_min, y_max, 0.1))

f, axarr = plt.subplots(2, 2, sharex='col', sharey='row', figsize=(10, 8))

for idx, clf, tt in zip(product([0, 1], [0, 1]),
                    [trainednb, trainedsvm, trainedforest, trainedmodel],
                    ['Naive Bayes Classifier', 'SVM',
                     'Random Forest', 'Logistic Regression']):

Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)


axarr[idx[0], idx[1]].contourf(xx, yy, Z,cmap=plt.cm.coolwarm, alpha=0.4)
axarr[idx[0], idx[1]].scatter(reduced_data[:, 0], reduced_data[:, 1], c=Y_Train,
                              s=20, edgecolor='k')
axarr[idx[0], idx[1]].set_title(tt)

plt.show()

Linear Discriminant Analysis

In [21]:
# Load libraries
from sklearn import datasets
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

# Create an LDA that will reduce the data down to 1 feature
lda = LinearDiscriminantAnalysis(n_components=2)

# run an LDA and use it to transform the features
X_lda = lda.fit(X, Y).transform(X)

# Print the number of features
print('Original number of features:', X.shape[1])
print('Reduced number of features:', X_lda.shape[1])

## View the ratio of explained variance
print(lda.explained_variance_ratio_)

X_reduced, X_test_reduced, Y_Train, Y_Test = train_test_split(X_lda, Y, test_size = 0.30, random_state = 101)

trainednb = GaussianNB().fit(X_reduced, Y_Train)
trainedsvm = svm.LinearSVC().fit(X_reduced, Y_Train)

print('Naive Bayes')
predictionnb = trainednb.predict(X_test_reduced)
print(confusion_matrix(Y_Test,predictionnb))
print(classification_report(Y_Test,predictionnb))

print('SVM')
predictionsvm = trainedsvm.predict(X_test_reduced)
print(confusion_matrix(Y_Test,predictionsvm))
print(classification_report(Y_Test,predictionsvm))
Original number of features: 8
Reduced number of features: 1
[1.]
Naive Bayes
[[133  17]
[ 33  48]]
          precision    recall  f1-score   support

       0       0.80      0.89      0.84       150
       1       0.74      0.59      0.66        81

micro avg       0.78      0.78      0.78       231
macro avg       0.77      0.74      0.75       231
weighted avg       0.78      0.78      0.78       231

SVM
[[133  17]
[ 33  48]]
          precision    recall  f1-score   support

       0       0.80      0.89      0.84       150
       1       0.74      0.59      0.66        81

micro avg       0.78      0.78      0.78       231
macro avg       0.77      0.74      0.75       231
weighted avg       0.78      0.78      0.78       231

t-SNE

In [22]:
from sklearn.manifold import TSNE
import time

time_start = time.time()
tsne = TSNE(n_components=2, verbose=1, perplexity=40, n_iter=300)
tsne_results = tsne.fit_transform(X)
print('t-SNE done! Time elapsed: {} seconds'.format(time.time()-time_start))
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 768 samples in 0.001s...
[t-SNE] Computed neighbors for 768 samples in 0.030s...
[t-SNE] Computed conditional probabilities for sample 768 / 768
[t-SNE] Mean sigma: 0.894449
[t-SNE] KL divergence after 250 iterations with early exaggeration: 65.440903
[t-SNE] KL divergence after 300 iterations: 1.034935
t-SNE done! Time elapsed: 2.3509469032287598 seconds
In [23]:
plt.figure(figsize=(6,5))
sns.scatterplot(
x=tsne_results[:,0], y=tsne_results[:,1],
hue=Y,
palette=sns.color_palette("hls", 2),
data=df,
legend="full",
alpha=0.3
)
Out[23]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f85687b22b0>

Clustering

In [24]:
pca = PCA(n_components=2,svd_solver='full')
X_pca = pca.fit_transform(X)
# print(pca.explained_variance_)

# print('Original number of features:', X.shape[1])
# print('Reduced number of features:', X_lda.shape[1])
print(pca.explained_variance_ratio_)

X_reduced, X_test_reduced, Y_Train, Y_Test = train_test_split(X_pca, Y, test_size = 0.30, random_state = 101)
[0.26179749 0.21640127]

K-Means Clustering

In [25]:
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=2, random_state=0).fit(X_reduced)
In [26]:
kpredictions = kmeans.predict(X_test_reduced)
print(confusion_matrix(Y_Test,kpredictions))
print(classification_report(Y_Test,kpredictions))
[[113  37]
[ 37  44]]
          precision    recall  f1-score   support

       0       0.75      0.75      0.75       150
       1       0.54      0.54      0.54        81

micro avg       0.68      0.68      0.68       231
macro avg       0.65      0.65      0.65       231
weighted avg       0.68      0.68      0.68       231

In [27]:
plt.scatter(X_test_reduced[kpredictions ==0,0], X_test_reduced[kpredictions == 0,1], s=100, c='red')
plt.scatter(X_test_reduced[kpredictions ==1,0], X_test_reduced[kpredictions == 1,1], s=100, c='black')
Out[27]:
<matplotlib.collections.PathCollection at 0x7f85561ff2e8>

Hierarchical Clustering

In [28]:
import scipy.cluster.hierarchy as sch
from sklearn.cluster import AgglomerativeClustering

# create dendrogram
dendrogram = sch.dendrogram(sch.linkage(X_reduced, method='ward'))
# create clusters
hc = AgglomerativeClustering(n_clusters=2, affinity = 'euclidean', linkage = 'ward')
# save clusters for chart
hierarchicalpredictions = hc.fit_predict(X_test_reduced)
In [29]:
plt.scatter(X_test_reduced[hierarchicalpredictions ==0,0], X_test_reduced[hierarchicalpredictions == 0,1], s=100, c='red')
plt.scatter(X_test_reduced[hierarchicalpredictions ==1,0], X_test_reduced[hierarchicalpredictions == 1,1], s=100, c='black')
Out[29]:
<matplotlib.collections.PathCollection at 0x7f8555c51240>

Deep Learning

In [30]:
from keras.utils.np_utils import to_categorical
Y_Train = to_categorical(Y_Train)
In [31]:
from keras.models import Sequential
from keras.utils import np_utils
from keras.layers.core import Dense, Activation, Dropout
from keras.utils import to_categorical
from keras.layers import Dense, Dropout, BatchNormalization, Activation

#Y_Test = to_categorical(Y_Test)

input_dim = X_Train.shape[1]
nb_classes = Y_Train.shape[1]

# Here's a Deep Dumb MLP (DDMLP)
model = Sequential()
model.add(Dense(512, input_dim=input_dim))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.15))
model.add(Dense(256))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.15))
model.add(Dense(nb_classes))
model.add(BatchNormalization())
model.add(Activation('sigmoid'))

# we'll use categorical xent for the loss, and RMSprop as the optimizer
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

print("Training...")
model.fit(X_Train, Y_Train, nb_epoch=50, batch_size=16, validation_split=0.1, verbose=80)

preds = model.predict_classes(X_Test, verbose=0)
WARNING:tensorflow:From /opt/conda/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.
WARNING:tensorflow:From /opt/conda/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:3445: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Training...
WARNING:tensorflow:From /opt/conda/lib/python3.6/site-packages/tensorflow/python/ops/math_ops.py:3066: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
/opt/conda/lib/python3.6/site-packages/ipykernel_launcher.py:30: UserWarning: The `nb_epoch` argument in `fit` has been renamed `epochs`.
Train on 483 samples, validate on 54 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
In [32]:
print(confusion_matrix(Y_Test,preds))
print(classification_report(Y_Test,preds))
[[125  25]
[ 25  56]]
          precision    recall  f1-score   support

       0       0.83      0.83      0.83       150
       1       0.69      0.69      0.69        81

micro avg       0.78      0.78      0.78       231
macro avg       0.76      0.76      0.76       231
weighted avg       0.78      0.78      0.78       231