{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "bbe9a2b8-1f68-46e7-8f7d-24530ec80fb2", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
fixed acidityvolatile aciditycitric acidresidual sugarchloridesfree sulfur dioxidetotal sulfur dioxidedensitypHsulphatesalcoholquality
07.40.700.001.90.07611.034.00.99783.510.569.45
17.80.880.002.60.09825.067.00.99683.200.689.85
27.80.760.042.30.09215.054.00.99703.260.659.85
311.20.280.561.90.07517.060.00.99803.160.589.86
47.40.700.001.90.07611.034.00.99783.510.569.45
\n", "
" ], "text/plain": [ " fixed acidity volatile acidity citric acid residual sugar chlorides \\\n", "0 7.4 0.70 0.00 1.9 0.076 \n", "1 7.8 0.88 0.00 2.6 0.098 \n", "2 7.8 0.76 0.04 2.3 0.092 \n", "3 11.2 0.28 0.56 1.9 0.075 \n", "4 7.4 0.70 0.00 1.9 0.076 \n", "\n", " free sulfur dioxide total sulfur dioxide density pH sulphates \\\n", "0 11.0 34.0 0.9978 3.51 0.56 \n", "1 25.0 67.0 0.9968 3.20 0.68 \n", "2 15.0 54.0 0.9970 3.26 0.65 \n", "3 17.0 60.0 0.9980 3.16 0.58 \n", "4 11.0 34.0 0.9978 3.51 0.56 \n", "\n", " alcohol quality \n", "0 9.4 5 \n", "1 9.8 5 \n", "2 9.8 5 \n", "3 9.8 6 \n", "4 9.4 5 " ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "import numpy as np\n", "wine_df = pd.read_csv('winequality-red.csv')\n", "\n", "wine_df.head()" ] }, { "cell_type": "code", "execution_count": 2, "id": "aa49c80d-905d-4cfb-a034-422d352ab9a1", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
fixed acidityvolatile aciditycitric acidresidual sugarchloridesfree sulfur dioxidetotal sulfur dioxidedensitypHsulphatesalcoholquality
07.40.700.001.90.07611.034.00.99783.510.569.4Bad
17.80.880.002.60.09825.067.00.99683.200.689.8Bad
27.80.760.042.30.09215.054.00.99703.260.659.8Bad
311.20.280.561.90.07517.060.00.99803.160.589.8Good
47.40.700.001.90.07611.034.00.99783.510.569.4Bad
\n", "
" ], "text/plain": [ " fixed acidity volatile acidity citric acid residual sugar chlorides \\\n", "0 7.4 0.70 0.00 1.9 0.076 \n", "1 7.8 0.88 0.00 2.6 0.098 \n", "2 7.8 0.76 0.04 2.3 0.092 \n", "3 11.2 0.28 0.56 1.9 0.075 \n", "4 7.4 0.70 0.00 1.9 0.076 \n", "\n", " free sulfur dioxide total sulfur dioxide density pH sulphates \\\n", "0 11.0 34.0 0.9978 3.51 0.56 \n", "1 25.0 67.0 0.9968 3.20 0.68 \n", "2 15.0 54.0 0.9970 3.26 0.65 \n", "3 17.0 60.0 0.9980 3.16 0.58 \n", "4 11.0 34.0 0.9978 3.51 0.56 \n", "\n", " alcohol quality \n", "0 9.4 Bad \n", "1 9.8 Bad \n", "2 9.8 Bad \n", "3 9.8 Good \n", "4 9.4 Bad " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "wine_df.quality = np.where(wine_df.quality >= 6,'Good', 'Bad')\n", "wine_df.head()" ] }, { "cell_type": "code", "execution_count": 5, "id": "b614590c-61f5-4c1e-b763-56cb146d1a8a", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Description Value
0session_id123
1Targetquality
2Target TypeBinary
3Label EncodedBad: 0, Good: 1
4Original Data(1599, 12)
5Missing ValuesFalse
6Numeric Features11
7Categorical Features0
8Ordinal FeaturesFalse
9High Cardinality FeaturesFalse
10High Cardinality MethodNone
11Transformed Train Set(1119, 11)
12Transformed Test Set(480, 11)
13Shuffle Train-TestTrue
14Stratify Train-TestFalse
15Fold GeneratorStratifiedKFold
16Fold Number10
17CPU Jobs-1
18Use GPUFalse
19Log ExperimentFalse
20Experiment Nameclf-default-name
21USI9112
22Imputation Typesimple
23Iterative Imputation IterationNone
24Numeric Imputermean
25Iterative Imputation Numeric ModelNone
26Categorical Imputerconstant
27Iterative Imputation Categorical ModelNone
28Unknown Categoricals Handlingleast_frequent
29NormalizeFalse
30Normalize MethodNone
31TransformationFalse
32Transformation MethodNone
33PCAFalse
34PCA MethodNone
35PCA ComponentsNone
36Ignore Low VarianceFalse
37Combine Rare LevelsFalse
38Rare Level ThresholdNone
39Numeric BinningFalse
40Remove OutliersFalse
41Outliers ThresholdNone
42Remove MulticollinearityFalse
43Multicollinearity ThresholdNone
44Remove Perfect CollinearityTrue
45ClusteringFalse
46Clustering IterationNone
47Polynomial FeaturesFalse
48Polynomial DegreeNone
49Trignometry FeaturesFalse
50Polynomial ThresholdNone
51Group FeaturesFalse
52Feature SelectionFalse
53Feature Selection Methodclassic
54Features Selection ThresholdNone
55Feature InteractionFalse
56Feature RatioFalse
57Interaction ThresholdNone
58Fix ImbalanceFalse
59Fix Imbalance MethodSMOTE
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from pycaret.classification import *\n", "exp_clf01 = setup(data = wine_df, target = 'quality', session_id = 123)" ] }, { "cell_type": "code", "execution_count": 6, "id": "28f83424-20d9-45a2-b4ae-916e7777e7e3", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b4d45ba3952c426b9648e440019064ac", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Summarize dataset: 0%| | 0/25 [00:00" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "exp_clf01 = setup(data = wine_df, target = 'quality', profile=True, session_id = 123)" ] }, { "cell_type": "code", "execution_count": 7, "id": "592441b9-63fb-4bf9-bf8d-3608f2c1017b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Model Accuracy AUC Recall Prec. F1 Kappa MCC TT (Sec)
rfRandom Forest Classifier0.82220.89730.83840.83570.83640.64160.64290.3210
etExtra Trees Classifier0.81590.90440.83190.83020.83060.62900.62990.2800
lightgbmLight Gradient Boosting Machine0.81320.88490.82040.83460.82660.62420.62570.0610
gbcGradient Boosting Classifier0.78550.85930.79900.80710.80180.56820.57030.0900
ridgeRidge Classifier0.75690.00000.74970.79100.76880.51310.51510.0100
lrLogistic Regression0.75070.81780.74800.78250.76420.50000.50150.4770
ldaLinear Discriminant Analysis0.74890.81730.75130.77790.76350.49600.49740.0100
dtDecision Tree Classifier0.74440.74110.78090.75680.76840.48350.48410.0180
nbNaive Bayes0.74180.80430.76460.76150.76210.47980.48110.0090
adaAda Boost Classifier0.73630.81260.76450.75480.75780.46840.47110.0590
qdaQuadratic Discriminant Analysis0.73280.80320.80720.72960.76610.45630.46030.0130
knnK Neighbors Classifier0.63980.69510.66730.66880.66670.27480.27610.0800
svmSVM - Linear Kernel0.59960.00000.73020.66960.64640.17160.18880.0090
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "best = compare_models()" ] }, { "cell_type": "code", "execution_count": 9, "id": "9465ee4e-1044-49ee-9d8e-bb371b8d59c8", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Description Value
0session_id123
1Targetquality
2Target TypeBinary
3Label EncodedBad: 0, Good: 1
4Original Data(1599, 12)
5Missing ValuesFalse
6Numeric Features11
7Categorical Features0
8Ordinal FeaturesFalse
9High Cardinality FeaturesFalse
10High Cardinality MethodNone
11Transformed Train Set(1119, 11)
12Transformed Test Set(480, 11)
13Shuffle Train-TestTrue
14Stratify Train-TestFalse
15Fold GeneratorStratifiedKFold
16Fold Number10
17CPU Jobs-1
18Use GPUFalse
19Log ExperimentFalse
20Experiment Nameclf-default-name
21USIe207
22Imputation Typesimple
23Iterative Imputation IterationNone
24Numeric Imputermean
25Iterative Imputation Numeric ModelNone
26Categorical Imputerconstant
27Iterative Imputation Categorical ModelNone
28Unknown Categoricals Handlingleast_frequent
29NormalizeTrue
30Normalize Methodzscore
31TransformationTrue
32Transformation Methodyeo-johnson
33PCAFalse
34PCA MethodNone
35PCA ComponentsNone
36Ignore Low VarianceFalse
37Combine Rare LevelsFalse
38Rare Level ThresholdNone
39Numeric BinningFalse
40Remove OutliersFalse
41Outliers ThresholdNone
42Remove MulticollinearityFalse
43Multicollinearity ThresholdNone
44Remove Perfect CollinearityTrue
45ClusteringFalse
46Clustering IterationNone
47Polynomial FeaturesFalse
48Polynomial DegreeNone
49Trignometry FeaturesFalse
50Polynomial ThresholdNone
51Group FeaturesFalse
52Feature SelectionFalse
53Feature Selection Methodclassic
54Features Selection ThresholdNone
55Feature InteractionFalse
56Feature RatioFalse
57Interaction ThresholdNone
58Fix ImbalanceFalse
59Fix Imbalance MethodSMOTE
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "exp_clf102 = setup(data = wine_df, target = 'quality', session_id=123, normalize = True, transformation = True)" ] }, { "cell_type": "code", "execution_count": 10, "id": "6da0b7ee-48f5-4f83-9739-9de9f4c4bf53", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Model Accuracy AUC Recall Prec. F1 Kappa MCC TT (Sec)
etExtra Trees Classifier0.82310.90320.84340.83380.83790.64310.64430.2860
rfRandom Forest Classifier0.82220.89760.83510.83800.83590.64180.64300.3170
lightgbmLight Gradient Boosting Machine0.81410.88350.82370.83370.82750.62600.62770.0570
gbcGradient Boosting Classifier0.78730.85960.79910.80950.80300.57200.57410.0970
lrLogistic Regression0.75250.82010.77270.77190.77110.50150.50320.0210
qdaQuadratic Discriminant Analysis0.75070.81230.77600.76790.77110.49720.49850.0130
ridgeRidge Classifier0.74980.00000.75950.77500.76590.49720.49900.0090
ldaLinear Discriminant Analysis0.74980.82150.75950.77500.76590.49720.49900.0080
dtDecision Tree Classifier0.74440.74130.77930.75780.76800.48370.48440.0100
nbNaive Bayes0.73730.81280.71190.78540.74610.47530.47870.0130
adaAda Boost Classifier0.73630.81270.76450.75480.75780.46840.47110.0570
knnK Neighbors Classifier0.73280.80230.78410.74070.76050.45890.46220.0710
svmSVM - Linear Kernel0.72120.00000.72460.75470.73690.44020.44360.0100
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "best = compare_models()" ] }, { "cell_type": "code", "execution_count": 11, "id": "e9a729f4-c2b3-4a6a-b7aa-3cc70784bbb8", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Accuracy AUC Recall Prec. F1 Kappa MCC
00.83040.90870.78690.88890.83480.66180.6670
10.82140.91820.83610.83610.83610.64000.6400
20.84820.93460.85250.86670.85950.69450.6946
30.83930.90390.86890.84130.85480.67490.6754
40.83930.89410.88520.83080.85710.67390.6757
50.84820.92610.85250.86670.85950.69450.6946
60.82140.89870.86890.81540.84130.63770.6393
70.77680.86990.83330.76920.80000.54840.5506
80.77680.88270.80000.78690.79340.55070.5508
90.82880.89530.85000.83610.84300.65490.6550
Mean0.82310.90320.84340.83380.83790.64310.6443
SD0.02480.01860.02910.03460.02250.05030.0502
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "et_model = create_model('et')" ] }, { "cell_type": "code", "execution_count": 12, "id": "c6726376-5f56-41bf-acbd-2d7f266ebb08", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "70cdc491a6284efb9d2e1157562f2b6f", "version_major": 2, "version_minor": 0 }, "text/plain": [ "interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "evaluate_model(et_model)" ] }, { "cell_type": "code", "execution_count": 13, "id": "7c203e50-7976-46a5-8839-272ff13e57d7", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Model Accuracy AUC Recall Prec. F1 Kappa MCC
0Extra Trees Classifier0.78750.88580.83870.77040.80310.57320.5757
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
fixed acidityvolatile aciditycitric acidresidual sugarchloridesfree sulfur dioxidetotal sulfur dioxidedensitypHsulphatesalcoholqualityLabelScore
01.020588-0.2623370.8799400.909526-0.811347-1.539989-1.767543-0.0015690.179457-0.0263191.507669GoodGood0.88
10.8039120.3658250.0698090.1824060.1698950.8531811.9900400.562903-0.216540-0.678448-1.071227BadBad0.95
2-0.5480701.883447-0.893328-0.877415-0.2439560.6269790.590835-0.0121340.309834-0.576033-1.403862BadBad0.79
30.107601-1.8622750.790650-0.626787-0.545447-0.666601-1.164981-1.619909-0.619242-0.7838921.558527GoodGood0.85
4-0.094336-0.848967-0.143101-0.877415-0.0790760.8531810.377180-0.235514-0.1500250.344294-1.234888BadBad0.62
.............................................
475-0.548070-0.1402150.322042-0.184964-1.0255741.3309360.5333540.282229-0.0175890.205814-0.205689GoodGood1.00
4761.829642-0.0214611.0979540.0076630.466399-1.351598-1.3577511.774647-0.754683-2.001079-0.760163BadBad1.00
477-0.629806-0.7810260.222889-0.877415-0.3015601.3309360.9344780.0565490.761128-0.287161-0.912956GoodGood1.00
4780.936346-1.3418070.654205-1.148794-1.025574-1.735120-1.697007-1.047378-1.095523-0.4766661.112170GoodGood0.90
4790.1709830.520056-0.0354300.0076631.252116-0.666601-0.045372-0.310424-1.027116-0.892347-0.471378BadBad1.00
\n", "

480 rows × 14 columns

\n", "
" ], "text/plain": [ " fixed acidity volatile acidity citric acid residual sugar chlorides \\\n", "0 1.020588 -0.262337 0.879940 0.909526 -0.811347 \n", "1 0.803912 0.365825 0.069809 0.182406 0.169895 \n", "2 -0.548070 1.883447 -0.893328 -0.877415 -0.243956 \n", "3 0.107601 -1.862275 0.790650 -0.626787 -0.545447 \n", "4 -0.094336 -0.848967 -0.143101 -0.877415 -0.079076 \n", ".. ... ... ... ... ... \n", "475 -0.548070 -0.140215 0.322042 -0.184964 -1.025574 \n", "476 1.829642 -0.021461 1.097954 0.007663 0.466399 \n", "477 -0.629806 -0.781026 0.222889 -0.877415 -0.301560 \n", "478 0.936346 -1.341807 0.654205 -1.148794 -1.025574 \n", "479 0.170983 0.520056 -0.035430 0.007663 1.252116 \n", "\n", " free sulfur dioxide total sulfur dioxide density pH sulphates \\\n", "0 -1.539989 -1.767543 -0.001569 0.179457 -0.026319 \n", "1 0.853181 1.990040 0.562903 -0.216540 -0.678448 \n", "2 0.626979 0.590835 -0.012134 0.309834 -0.576033 \n", "3 -0.666601 -1.164981 -1.619909 -0.619242 -0.783892 \n", "4 0.853181 0.377180 -0.235514 -0.150025 0.344294 \n", ".. ... ... ... ... ... \n", "475 1.330936 0.533354 0.282229 -0.017589 0.205814 \n", "476 -1.351598 -1.357751 1.774647 -0.754683 -2.001079 \n", "477 1.330936 0.934478 0.056549 0.761128 -0.287161 \n", "478 -1.735120 -1.697007 -1.047378 -1.095523 -0.476666 \n", "479 -0.666601 -0.045372 -0.310424 -1.027116 -0.892347 \n", "\n", " alcohol quality Label Score \n", "0 1.507669 Good Good 0.88 \n", "1 -1.071227 Bad Bad 0.95 \n", "2 -1.403862 Bad Bad 0.79 \n", "3 1.558527 Good Good 0.85 \n", "4 -1.234888 Bad Bad 0.62 \n", ".. ... ... ... ... \n", "475 -0.205689 Good Good 1.00 \n", "476 -0.760163 Bad Bad 1.00 \n", "477 -0.912956 Good Good 1.00 \n", "478 1.112170 Good Good 0.90 \n", "479 -0.471378 Bad Bad 1.00 \n", "\n", "[480 rows x 14 columns]" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "predict_model(et_model)" ] }, { "cell_type": "code", "execution_count": 14, "id": "97300de8-21d0-45d2-8f6e-9623703d5605", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Transformation Pipeline and Model Successfully Saved\n" ] }, { "data": { "text/plain": [ "(Pipeline(memory=None,\n", " steps=[('dtypes',\n", " DataTypes_Auto_infer(categorical_features=[],\n", " display_types=True, features_todrop=[],\n", " id_columns=[],\n", " ml_usecase='classification',\n", " numerical_features=[], target='quality',\n", " time_features=[])),\n", " ('imputer',\n", " Simple_Imputer(categorical_strategy='not_available',\n", " fill_value_categorical=None,\n", " fill_value_numerical=None,\n", " numeric_stra...\n", " ExtraTreesClassifier(bootstrap=False, ccp_alpha=0.0,\n", " class_weight=None, criterion='gini',\n", " max_depth=None, max_features='auto',\n", " max_leaf_nodes=None, max_samples=None,\n", " min_impurity_decrease=0.0,\n", " min_impurity_split=None,\n", " min_samples_leaf=1, min_samples_split=2,\n", " min_weight_fraction_leaf=0.0,\n", " n_estimators=100, n_jobs=-1,\n", " oob_score=False, random_state=123,\n", " verbose=0, warm_start=False)]],\n", " verbose=False),\n", " 'extra_tree_model.pkl')" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "save_model(et_model, model_name = 'extra_tree_model')" ] } ], "metadata": { "kernelspec": { "display_name": "run pycaret", "language": "python", "name": "run_pycaret" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.13" } }, "nbformat": 4, "nbformat_minor": 5 }