{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "bbe9a2b8-1f68-46e7-8f7d-24530ec80fb2",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" fixed acidity | \n",
" volatile acidity | \n",
" citric acid | \n",
" residual sugar | \n",
" chlorides | \n",
" free sulfur dioxide | \n",
" total sulfur dioxide | \n",
" density | \n",
" pH | \n",
" sulphates | \n",
" alcohol | \n",
" quality | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 7.4 | \n",
" 0.70 | \n",
" 0.00 | \n",
" 1.9 | \n",
" 0.076 | \n",
" 11.0 | \n",
" 34.0 | \n",
" 0.9978 | \n",
" 3.51 | \n",
" 0.56 | \n",
" 9.4 | \n",
" 5 | \n",
"
\n",
" \n",
" 1 | \n",
" 7.8 | \n",
" 0.88 | \n",
" 0.00 | \n",
" 2.6 | \n",
" 0.098 | \n",
" 25.0 | \n",
" 67.0 | \n",
" 0.9968 | \n",
" 3.20 | \n",
" 0.68 | \n",
" 9.8 | \n",
" 5 | \n",
"
\n",
" \n",
" 2 | \n",
" 7.8 | \n",
" 0.76 | \n",
" 0.04 | \n",
" 2.3 | \n",
" 0.092 | \n",
" 15.0 | \n",
" 54.0 | \n",
" 0.9970 | \n",
" 3.26 | \n",
" 0.65 | \n",
" 9.8 | \n",
" 5 | \n",
"
\n",
" \n",
" 3 | \n",
" 11.2 | \n",
" 0.28 | \n",
" 0.56 | \n",
" 1.9 | \n",
" 0.075 | \n",
" 17.0 | \n",
" 60.0 | \n",
" 0.9980 | \n",
" 3.16 | \n",
" 0.58 | \n",
" 9.8 | \n",
" 6 | \n",
"
\n",
" \n",
" 4 | \n",
" 7.4 | \n",
" 0.70 | \n",
" 0.00 | \n",
" 1.9 | \n",
" 0.076 | \n",
" 11.0 | \n",
" 34.0 | \n",
" 0.9978 | \n",
" 3.51 | \n",
" 0.56 | \n",
" 9.4 | \n",
" 5 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" fixed acidity volatile acidity citric acid residual sugar chlorides \\\n",
"0 7.4 0.70 0.00 1.9 0.076 \n",
"1 7.8 0.88 0.00 2.6 0.098 \n",
"2 7.8 0.76 0.04 2.3 0.092 \n",
"3 11.2 0.28 0.56 1.9 0.075 \n",
"4 7.4 0.70 0.00 1.9 0.076 \n",
"\n",
" free sulfur dioxide total sulfur dioxide density pH sulphates \\\n",
"0 11.0 34.0 0.9978 3.51 0.56 \n",
"1 25.0 67.0 0.9968 3.20 0.68 \n",
"2 15.0 54.0 0.9970 3.26 0.65 \n",
"3 17.0 60.0 0.9980 3.16 0.58 \n",
"4 11.0 34.0 0.9978 3.51 0.56 \n",
"\n",
" alcohol quality \n",
"0 9.4 5 \n",
"1 9.8 5 \n",
"2 9.8 5 \n",
"3 9.8 6 \n",
"4 9.4 5 "
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"wine_df = pd.read_csv('winequality-red.csv')\n",
"\n",
"wine_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "aa49c80d-905d-4cfb-a034-422d352ab9a1",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" fixed acidity | \n",
" volatile acidity | \n",
" citric acid | \n",
" residual sugar | \n",
" chlorides | \n",
" free sulfur dioxide | \n",
" total sulfur dioxide | \n",
" density | \n",
" pH | \n",
" sulphates | \n",
" alcohol | \n",
" quality | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 7.4 | \n",
" 0.70 | \n",
" 0.00 | \n",
" 1.9 | \n",
" 0.076 | \n",
" 11.0 | \n",
" 34.0 | \n",
" 0.9978 | \n",
" 3.51 | \n",
" 0.56 | \n",
" 9.4 | \n",
" Bad | \n",
"
\n",
" \n",
" 1 | \n",
" 7.8 | \n",
" 0.88 | \n",
" 0.00 | \n",
" 2.6 | \n",
" 0.098 | \n",
" 25.0 | \n",
" 67.0 | \n",
" 0.9968 | \n",
" 3.20 | \n",
" 0.68 | \n",
" 9.8 | \n",
" Bad | \n",
"
\n",
" \n",
" 2 | \n",
" 7.8 | \n",
" 0.76 | \n",
" 0.04 | \n",
" 2.3 | \n",
" 0.092 | \n",
" 15.0 | \n",
" 54.0 | \n",
" 0.9970 | \n",
" 3.26 | \n",
" 0.65 | \n",
" 9.8 | \n",
" Bad | \n",
"
\n",
" \n",
" 3 | \n",
" 11.2 | \n",
" 0.28 | \n",
" 0.56 | \n",
" 1.9 | \n",
" 0.075 | \n",
" 17.0 | \n",
" 60.0 | \n",
" 0.9980 | \n",
" 3.16 | \n",
" 0.58 | \n",
" 9.8 | \n",
" Good | \n",
"
\n",
" \n",
" 4 | \n",
" 7.4 | \n",
" 0.70 | \n",
" 0.00 | \n",
" 1.9 | \n",
" 0.076 | \n",
" 11.0 | \n",
" 34.0 | \n",
" 0.9978 | \n",
" 3.51 | \n",
" 0.56 | \n",
" 9.4 | \n",
" Bad | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" fixed acidity volatile acidity citric acid residual sugar chlorides \\\n",
"0 7.4 0.70 0.00 1.9 0.076 \n",
"1 7.8 0.88 0.00 2.6 0.098 \n",
"2 7.8 0.76 0.04 2.3 0.092 \n",
"3 11.2 0.28 0.56 1.9 0.075 \n",
"4 7.4 0.70 0.00 1.9 0.076 \n",
"\n",
" free sulfur dioxide total sulfur dioxide density pH sulphates \\\n",
"0 11.0 34.0 0.9978 3.51 0.56 \n",
"1 25.0 67.0 0.9968 3.20 0.68 \n",
"2 15.0 54.0 0.9970 3.26 0.65 \n",
"3 17.0 60.0 0.9980 3.16 0.58 \n",
"4 11.0 34.0 0.9978 3.51 0.56 \n",
"\n",
" alcohol quality \n",
"0 9.4 Bad \n",
"1 9.8 Bad \n",
"2 9.8 Bad \n",
"3 9.8 Good \n",
"4 9.4 Bad "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine_df.quality = np.where(wine_df.quality >= 6,'Good', 'Bad')\n",
"wine_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "b614590c-61f5-4c1e-b763-56cb146d1a8a",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" | Description | Value |
\n",
" \n",
" 0 | \n",
" session_id | \n",
" 123 | \n",
"
\n",
" \n",
" 1 | \n",
" Target | \n",
" quality | \n",
"
\n",
" \n",
" 2 | \n",
" Target Type | \n",
" Binary | \n",
"
\n",
" \n",
" 3 | \n",
" Label Encoded | \n",
" Bad: 0, Good: 1 | \n",
"
\n",
" \n",
" 4 | \n",
" Original Data | \n",
" (1599, 12) | \n",
"
\n",
" \n",
" 5 | \n",
" Missing Values | \n",
" False | \n",
"
\n",
" \n",
" 6 | \n",
" Numeric Features | \n",
" 11 | \n",
"
\n",
" \n",
" 7 | \n",
" Categorical Features | \n",
" 0 | \n",
"
\n",
" \n",
" 8 | \n",
" Ordinal Features | \n",
" False | \n",
"
\n",
" \n",
" 9 | \n",
" High Cardinality Features | \n",
" False | \n",
"
\n",
" \n",
" 10 | \n",
" High Cardinality Method | \n",
" None | \n",
"
\n",
" \n",
" 11 | \n",
" Transformed Train Set | \n",
" (1119, 11) | \n",
"
\n",
" \n",
" 12 | \n",
" Transformed Test Set | \n",
" (480, 11) | \n",
"
\n",
" \n",
" 13 | \n",
" Shuffle Train-Test | \n",
" True | \n",
"
\n",
" \n",
" 14 | \n",
" Stratify Train-Test | \n",
" False | \n",
"
\n",
" \n",
" 15 | \n",
" Fold Generator | \n",
" StratifiedKFold | \n",
"
\n",
" \n",
" 16 | \n",
" Fold Number | \n",
" 10 | \n",
"
\n",
" \n",
" 17 | \n",
" CPU Jobs | \n",
" -1 | \n",
"
\n",
" \n",
" 18 | \n",
" Use GPU | \n",
" False | \n",
"
\n",
" \n",
" 19 | \n",
" Log Experiment | \n",
" False | \n",
"
\n",
" \n",
" 20 | \n",
" Experiment Name | \n",
" clf-default-name | \n",
"
\n",
" \n",
" 21 | \n",
" USI | \n",
" 9112 | \n",
"
\n",
" \n",
" 22 | \n",
" Imputation Type | \n",
" simple | \n",
"
\n",
" \n",
" 23 | \n",
" Iterative Imputation Iteration | \n",
" None | \n",
"
\n",
" \n",
" 24 | \n",
" Numeric Imputer | \n",
" mean | \n",
"
\n",
" \n",
" 25 | \n",
" Iterative Imputation Numeric Model | \n",
" None | \n",
"
\n",
" \n",
" 26 | \n",
" Categorical Imputer | \n",
" constant | \n",
"
\n",
" \n",
" 27 | \n",
" Iterative Imputation Categorical Model | \n",
" None | \n",
"
\n",
" \n",
" 28 | \n",
" Unknown Categoricals Handling | \n",
" least_frequent | \n",
"
\n",
" \n",
" 29 | \n",
" Normalize | \n",
" False | \n",
"
\n",
" \n",
" 30 | \n",
" Normalize Method | \n",
" None | \n",
"
\n",
" \n",
" 31 | \n",
" Transformation | \n",
" False | \n",
"
\n",
" \n",
" 32 | \n",
" Transformation Method | \n",
" None | \n",
"
\n",
" \n",
" 33 | \n",
" PCA | \n",
" False | \n",
"
\n",
" \n",
" 34 | \n",
" PCA Method | \n",
" None | \n",
"
\n",
" \n",
" 35 | \n",
" PCA Components | \n",
" None | \n",
"
\n",
" \n",
" 36 | \n",
" Ignore Low Variance | \n",
" False | \n",
"
\n",
" \n",
" 37 | \n",
" Combine Rare Levels | \n",
" False | \n",
"
\n",
" \n",
" 38 | \n",
" Rare Level Threshold | \n",
" None | \n",
"
\n",
" \n",
" 39 | \n",
" Numeric Binning | \n",
" False | \n",
"
\n",
" \n",
" 40 | \n",
" Remove Outliers | \n",
" False | \n",
"
\n",
" \n",
" 41 | \n",
" Outliers Threshold | \n",
" None | \n",
"
\n",
" \n",
" 42 | \n",
" Remove Multicollinearity | \n",
" False | \n",
"
\n",
" \n",
" 43 | \n",
" Multicollinearity Threshold | \n",
" None | \n",
"
\n",
" \n",
" 44 | \n",
" Remove Perfect Collinearity | \n",
" True | \n",
"
\n",
" \n",
" 45 | \n",
" Clustering | \n",
" False | \n",
"
\n",
" \n",
" 46 | \n",
" Clustering Iteration | \n",
" None | \n",
"
\n",
" \n",
" 47 | \n",
" Polynomial Features | \n",
" False | \n",
"
\n",
" \n",
" 48 | \n",
" Polynomial Degree | \n",
" None | \n",
"
\n",
" \n",
" 49 | \n",
" Trignometry Features | \n",
" False | \n",
"
\n",
" \n",
" 50 | \n",
" Polynomial Threshold | \n",
" None | \n",
"
\n",
" \n",
" 51 | \n",
" Group Features | \n",
" False | \n",
"
\n",
" \n",
" 52 | \n",
" Feature Selection | \n",
" False | \n",
"
\n",
" \n",
" 53 | \n",
" Feature Selection Method | \n",
" classic | \n",
"
\n",
" \n",
" 54 | \n",
" Features Selection Threshold | \n",
" None | \n",
"
\n",
" \n",
" 55 | \n",
" Feature Interaction | \n",
" False | \n",
"
\n",
" \n",
" 56 | \n",
" Feature Ratio | \n",
" False | \n",
"
\n",
" \n",
" 57 | \n",
" Interaction Threshold | \n",
" None | \n",
"
\n",
" \n",
" 58 | \n",
" Fix Imbalance | \n",
" False | \n",
"
\n",
" \n",
" 59 | \n",
" Fix Imbalance Method | \n",
" SMOTE | \n",
"
\n",
"
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from pycaret.classification import *\n",
"exp_clf01 = setup(data = wine_df, target = 'quality', session_id = 123)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "28f83424-20d9-45a2-b4ae-916e7777e7e3",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "b4d45ba3952c426b9648e440019064ac",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Summarize dataset: 0%| | 0/25 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "198d18e43e58439ba7f2afa7d78a6d64",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Generate report structure: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "453fdd5cb95f4dc6821fdc3a1f9490f4",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Render HTML: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": []
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"exp_clf01 = setup(data = wine_df, target = 'quality', profile=True, session_id = 123)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "592441b9-63fb-4bf9-bf8d-3608f2c1017b",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" | Model | Accuracy | AUC | Recall | Prec. | F1 | Kappa | MCC | TT (Sec) |
\n",
" \n",
" rf | \n",
" Random Forest Classifier | \n",
" 0.8222 | \n",
" 0.8973 | \n",
" 0.8384 | \n",
" 0.8357 | \n",
" 0.8364 | \n",
" 0.6416 | \n",
" 0.6429 | \n",
" 0.3210 | \n",
"
\n",
" \n",
" et | \n",
" Extra Trees Classifier | \n",
" 0.8159 | \n",
" 0.9044 | \n",
" 0.8319 | \n",
" 0.8302 | \n",
" 0.8306 | \n",
" 0.6290 | \n",
" 0.6299 | \n",
" 0.2800 | \n",
"
\n",
" \n",
" lightgbm | \n",
" Light Gradient Boosting Machine | \n",
" 0.8132 | \n",
" 0.8849 | \n",
" 0.8204 | \n",
" 0.8346 | \n",
" 0.8266 | \n",
" 0.6242 | \n",
" 0.6257 | \n",
" 0.0610 | \n",
"
\n",
" \n",
" gbc | \n",
" Gradient Boosting Classifier | \n",
" 0.7855 | \n",
" 0.8593 | \n",
" 0.7990 | \n",
" 0.8071 | \n",
" 0.8018 | \n",
" 0.5682 | \n",
" 0.5703 | \n",
" 0.0900 | \n",
"
\n",
" \n",
" ridge | \n",
" Ridge Classifier | \n",
" 0.7569 | \n",
" 0.0000 | \n",
" 0.7497 | \n",
" 0.7910 | \n",
" 0.7688 | \n",
" 0.5131 | \n",
" 0.5151 | \n",
" 0.0100 | \n",
"
\n",
" \n",
" lr | \n",
" Logistic Regression | \n",
" 0.7507 | \n",
" 0.8178 | \n",
" 0.7480 | \n",
" 0.7825 | \n",
" 0.7642 | \n",
" 0.5000 | \n",
" 0.5015 | \n",
" 0.4770 | \n",
"
\n",
" \n",
" lda | \n",
" Linear Discriminant Analysis | \n",
" 0.7489 | \n",
" 0.8173 | \n",
" 0.7513 | \n",
" 0.7779 | \n",
" 0.7635 | \n",
" 0.4960 | \n",
" 0.4974 | \n",
" 0.0100 | \n",
"
\n",
" \n",
" dt | \n",
" Decision Tree Classifier | \n",
" 0.7444 | \n",
" 0.7411 | \n",
" 0.7809 | \n",
" 0.7568 | \n",
" 0.7684 | \n",
" 0.4835 | \n",
" 0.4841 | \n",
" 0.0180 | \n",
"
\n",
" \n",
" nb | \n",
" Naive Bayes | \n",
" 0.7418 | \n",
" 0.8043 | \n",
" 0.7646 | \n",
" 0.7615 | \n",
" 0.7621 | \n",
" 0.4798 | \n",
" 0.4811 | \n",
" 0.0090 | \n",
"
\n",
" \n",
" ada | \n",
" Ada Boost Classifier | \n",
" 0.7363 | \n",
" 0.8126 | \n",
" 0.7645 | \n",
" 0.7548 | \n",
" 0.7578 | \n",
" 0.4684 | \n",
" 0.4711 | \n",
" 0.0590 | \n",
"
\n",
" \n",
" qda | \n",
" Quadratic Discriminant Analysis | \n",
" 0.7328 | \n",
" 0.8032 | \n",
" 0.8072 | \n",
" 0.7296 | \n",
" 0.7661 | \n",
" 0.4563 | \n",
" 0.4603 | \n",
" 0.0130 | \n",
"
\n",
" \n",
" knn | \n",
" K Neighbors Classifier | \n",
" 0.6398 | \n",
" 0.6951 | \n",
" 0.6673 | \n",
" 0.6688 | \n",
" 0.6667 | \n",
" 0.2748 | \n",
" 0.2761 | \n",
" 0.0800 | \n",
"
\n",
" \n",
" svm | \n",
" SVM - Linear Kernel | \n",
" 0.5996 | \n",
" 0.0000 | \n",
" 0.7302 | \n",
" 0.6696 | \n",
" 0.6464 | \n",
" 0.1716 | \n",
" 0.1888 | \n",
" 0.0090 | \n",
"
\n",
"
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"best = compare_models()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "9465ee4e-1044-49ee-9d8e-bb371b8d59c8",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" | Description | Value |
\n",
" \n",
" 0 | \n",
" session_id | \n",
" 123 | \n",
"
\n",
" \n",
" 1 | \n",
" Target | \n",
" quality | \n",
"
\n",
" \n",
" 2 | \n",
" Target Type | \n",
" Binary | \n",
"
\n",
" \n",
" 3 | \n",
" Label Encoded | \n",
" Bad: 0, Good: 1 | \n",
"
\n",
" \n",
" 4 | \n",
" Original Data | \n",
" (1599, 12) | \n",
"
\n",
" \n",
" 5 | \n",
" Missing Values | \n",
" False | \n",
"
\n",
" \n",
" 6 | \n",
" Numeric Features | \n",
" 11 | \n",
"
\n",
" \n",
" 7 | \n",
" Categorical Features | \n",
" 0 | \n",
"
\n",
" \n",
" 8 | \n",
" Ordinal Features | \n",
" False | \n",
"
\n",
" \n",
" 9 | \n",
" High Cardinality Features | \n",
" False | \n",
"
\n",
" \n",
" 10 | \n",
" High Cardinality Method | \n",
" None | \n",
"
\n",
" \n",
" 11 | \n",
" Transformed Train Set | \n",
" (1119, 11) | \n",
"
\n",
" \n",
" 12 | \n",
" Transformed Test Set | \n",
" (480, 11) | \n",
"
\n",
" \n",
" 13 | \n",
" Shuffle Train-Test | \n",
" True | \n",
"
\n",
" \n",
" 14 | \n",
" Stratify Train-Test | \n",
" False | \n",
"
\n",
" \n",
" 15 | \n",
" Fold Generator | \n",
" StratifiedKFold | \n",
"
\n",
" \n",
" 16 | \n",
" Fold Number | \n",
" 10 | \n",
"
\n",
" \n",
" 17 | \n",
" CPU Jobs | \n",
" -1 | \n",
"
\n",
" \n",
" 18 | \n",
" Use GPU | \n",
" False | \n",
"
\n",
" \n",
" 19 | \n",
" Log Experiment | \n",
" False | \n",
"
\n",
" \n",
" 20 | \n",
" Experiment Name | \n",
" clf-default-name | \n",
"
\n",
" \n",
" 21 | \n",
" USI | \n",
" e207 | \n",
"
\n",
" \n",
" 22 | \n",
" Imputation Type | \n",
" simple | \n",
"
\n",
" \n",
" 23 | \n",
" Iterative Imputation Iteration | \n",
" None | \n",
"
\n",
" \n",
" 24 | \n",
" Numeric Imputer | \n",
" mean | \n",
"
\n",
" \n",
" 25 | \n",
" Iterative Imputation Numeric Model | \n",
" None | \n",
"
\n",
" \n",
" 26 | \n",
" Categorical Imputer | \n",
" constant | \n",
"
\n",
" \n",
" 27 | \n",
" Iterative Imputation Categorical Model | \n",
" None | \n",
"
\n",
" \n",
" 28 | \n",
" Unknown Categoricals Handling | \n",
" least_frequent | \n",
"
\n",
" \n",
" 29 | \n",
" Normalize | \n",
" True | \n",
"
\n",
" \n",
" 30 | \n",
" Normalize Method | \n",
" zscore | \n",
"
\n",
" \n",
" 31 | \n",
" Transformation | \n",
" True | \n",
"
\n",
" \n",
" 32 | \n",
" Transformation Method | \n",
" yeo-johnson | \n",
"
\n",
" \n",
" 33 | \n",
" PCA | \n",
" False | \n",
"
\n",
" \n",
" 34 | \n",
" PCA Method | \n",
" None | \n",
"
\n",
" \n",
" 35 | \n",
" PCA Components | \n",
" None | \n",
"
\n",
" \n",
" 36 | \n",
" Ignore Low Variance | \n",
" False | \n",
"
\n",
" \n",
" 37 | \n",
" Combine Rare Levels | \n",
" False | \n",
"
\n",
" \n",
" 38 | \n",
" Rare Level Threshold | \n",
" None | \n",
"
\n",
" \n",
" 39 | \n",
" Numeric Binning | \n",
" False | \n",
"
\n",
" \n",
" 40 | \n",
" Remove Outliers | \n",
" False | \n",
"
\n",
" \n",
" 41 | \n",
" Outliers Threshold | \n",
" None | \n",
"
\n",
" \n",
" 42 | \n",
" Remove Multicollinearity | \n",
" False | \n",
"
\n",
" \n",
" 43 | \n",
" Multicollinearity Threshold | \n",
" None | \n",
"
\n",
" \n",
" 44 | \n",
" Remove Perfect Collinearity | \n",
" True | \n",
"
\n",
" \n",
" 45 | \n",
" Clustering | \n",
" False | \n",
"
\n",
" \n",
" 46 | \n",
" Clustering Iteration | \n",
" None | \n",
"
\n",
" \n",
" 47 | \n",
" Polynomial Features | \n",
" False | \n",
"
\n",
" \n",
" 48 | \n",
" Polynomial Degree | \n",
" None | \n",
"
\n",
" \n",
" 49 | \n",
" Trignometry Features | \n",
" False | \n",
"
\n",
" \n",
" 50 | \n",
" Polynomial Threshold | \n",
" None | \n",
"
\n",
" \n",
" 51 | \n",
" Group Features | \n",
" False | \n",
"
\n",
" \n",
" 52 | \n",
" Feature Selection | \n",
" False | \n",
"
\n",
" \n",
" 53 | \n",
" Feature Selection Method | \n",
" classic | \n",
"
\n",
" \n",
" 54 | \n",
" Features Selection Threshold | \n",
" None | \n",
"
\n",
" \n",
" 55 | \n",
" Feature Interaction | \n",
" False | \n",
"
\n",
" \n",
" 56 | \n",
" Feature Ratio | \n",
" False | \n",
"
\n",
" \n",
" 57 | \n",
" Interaction Threshold | \n",
" None | \n",
"
\n",
" \n",
" 58 | \n",
" Fix Imbalance | \n",
" False | \n",
"
\n",
" \n",
" 59 | \n",
" Fix Imbalance Method | \n",
" SMOTE | \n",
"
\n",
"
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"exp_clf102 = setup(data = wine_df, target = 'quality', session_id=123, normalize = True, transformation = True)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "6da0b7ee-48f5-4f83-9739-9de9f4c4bf53",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" | Model | Accuracy | AUC | Recall | Prec. | F1 | Kappa | MCC | TT (Sec) |
\n",
" \n",
" et | \n",
" Extra Trees Classifier | \n",
" 0.8231 | \n",
" 0.9032 | \n",
" 0.8434 | \n",
" 0.8338 | \n",
" 0.8379 | \n",
" 0.6431 | \n",
" 0.6443 | \n",
" 0.2860 | \n",
"
\n",
" \n",
" rf | \n",
" Random Forest Classifier | \n",
" 0.8222 | \n",
" 0.8976 | \n",
" 0.8351 | \n",
" 0.8380 | \n",
" 0.8359 | \n",
" 0.6418 | \n",
" 0.6430 | \n",
" 0.3170 | \n",
"
\n",
" \n",
" lightgbm | \n",
" Light Gradient Boosting Machine | \n",
" 0.8141 | \n",
" 0.8835 | \n",
" 0.8237 | \n",
" 0.8337 | \n",
" 0.8275 | \n",
" 0.6260 | \n",
" 0.6277 | \n",
" 0.0570 | \n",
"
\n",
" \n",
" gbc | \n",
" Gradient Boosting Classifier | \n",
" 0.7873 | \n",
" 0.8596 | \n",
" 0.7991 | \n",
" 0.8095 | \n",
" 0.8030 | \n",
" 0.5720 | \n",
" 0.5741 | \n",
" 0.0970 | \n",
"
\n",
" \n",
" lr | \n",
" Logistic Regression | \n",
" 0.7525 | \n",
" 0.8201 | \n",
" 0.7727 | \n",
" 0.7719 | \n",
" 0.7711 | \n",
" 0.5015 | \n",
" 0.5032 | \n",
" 0.0210 | \n",
"
\n",
" \n",
" qda | \n",
" Quadratic Discriminant Analysis | \n",
" 0.7507 | \n",
" 0.8123 | \n",
" 0.7760 | \n",
" 0.7679 | \n",
" 0.7711 | \n",
" 0.4972 | \n",
" 0.4985 | \n",
" 0.0130 | \n",
"
\n",
" \n",
" ridge | \n",
" Ridge Classifier | \n",
" 0.7498 | \n",
" 0.0000 | \n",
" 0.7595 | \n",
" 0.7750 | \n",
" 0.7659 | \n",
" 0.4972 | \n",
" 0.4990 | \n",
" 0.0090 | \n",
"
\n",
" \n",
" lda | \n",
" Linear Discriminant Analysis | \n",
" 0.7498 | \n",
" 0.8215 | \n",
" 0.7595 | \n",
" 0.7750 | \n",
" 0.7659 | \n",
" 0.4972 | \n",
" 0.4990 | \n",
" 0.0080 | \n",
"
\n",
" \n",
" dt | \n",
" Decision Tree Classifier | \n",
" 0.7444 | \n",
" 0.7413 | \n",
" 0.7793 | \n",
" 0.7578 | \n",
" 0.7680 | \n",
" 0.4837 | \n",
" 0.4844 | \n",
" 0.0100 | \n",
"
\n",
" \n",
" nb | \n",
" Naive Bayes | \n",
" 0.7373 | \n",
" 0.8128 | \n",
" 0.7119 | \n",
" 0.7854 | \n",
" 0.7461 | \n",
" 0.4753 | \n",
" 0.4787 | \n",
" 0.0130 | \n",
"
\n",
" \n",
" ada | \n",
" Ada Boost Classifier | \n",
" 0.7363 | \n",
" 0.8127 | \n",
" 0.7645 | \n",
" 0.7548 | \n",
" 0.7578 | \n",
" 0.4684 | \n",
" 0.4711 | \n",
" 0.0570 | \n",
"
\n",
" \n",
" knn | \n",
" K Neighbors Classifier | \n",
" 0.7328 | \n",
" 0.8023 | \n",
" 0.7841 | \n",
" 0.7407 | \n",
" 0.7605 | \n",
" 0.4589 | \n",
" 0.4622 | \n",
" 0.0710 | \n",
"
\n",
" \n",
" svm | \n",
" SVM - Linear Kernel | \n",
" 0.7212 | \n",
" 0.0000 | \n",
" 0.7246 | \n",
" 0.7547 | \n",
" 0.7369 | \n",
" 0.4402 | \n",
" 0.4436 | \n",
" 0.0100 | \n",
"
\n",
"
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"best = compare_models()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "e9a729f4-c2b3-4a6a-b7aa-3cc70784bbb8",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" | Accuracy | AUC | Recall | Prec. | F1 | Kappa | MCC |
\n",
" \n",
" 0 | \n",
" 0.8304 | \n",
" 0.9087 | \n",
" 0.7869 | \n",
" 0.8889 | \n",
" 0.8348 | \n",
" 0.6618 | \n",
" 0.6670 | \n",
"
\n",
" \n",
" 1 | \n",
" 0.8214 | \n",
" 0.9182 | \n",
" 0.8361 | \n",
" 0.8361 | \n",
" 0.8361 | \n",
" 0.6400 | \n",
" 0.6400 | \n",
"
\n",
" \n",
" 2 | \n",
" 0.8482 | \n",
" 0.9346 | \n",
" 0.8525 | \n",
" 0.8667 | \n",
" 0.8595 | \n",
" 0.6945 | \n",
" 0.6946 | \n",
"
\n",
" \n",
" 3 | \n",
" 0.8393 | \n",
" 0.9039 | \n",
" 0.8689 | \n",
" 0.8413 | \n",
" 0.8548 | \n",
" 0.6749 | \n",
" 0.6754 | \n",
"
\n",
" \n",
" 4 | \n",
" 0.8393 | \n",
" 0.8941 | \n",
" 0.8852 | \n",
" 0.8308 | \n",
" 0.8571 | \n",
" 0.6739 | \n",
" 0.6757 | \n",
"
\n",
" \n",
" 5 | \n",
" 0.8482 | \n",
" 0.9261 | \n",
" 0.8525 | \n",
" 0.8667 | \n",
" 0.8595 | \n",
" 0.6945 | \n",
" 0.6946 | \n",
"
\n",
" \n",
" 6 | \n",
" 0.8214 | \n",
" 0.8987 | \n",
" 0.8689 | \n",
" 0.8154 | \n",
" 0.8413 | \n",
" 0.6377 | \n",
" 0.6393 | \n",
"
\n",
" \n",
" 7 | \n",
" 0.7768 | \n",
" 0.8699 | \n",
" 0.8333 | \n",
" 0.7692 | \n",
" 0.8000 | \n",
" 0.5484 | \n",
" 0.5506 | \n",
"
\n",
" \n",
" 8 | \n",
" 0.7768 | \n",
" 0.8827 | \n",
" 0.8000 | \n",
" 0.7869 | \n",
" 0.7934 | \n",
" 0.5507 | \n",
" 0.5508 | \n",
"
\n",
" \n",
" 9 | \n",
" 0.8288 | \n",
" 0.8953 | \n",
" 0.8500 | \n",
" 0.8361 | \n",
" 0.8430 | \n",
" 0.6549 | \n",
" 0.6550 | \n",
"
\n",
" \n",
" Mean | \n",
" 0.8231 | \n",
" 0.9032 | \n",
" 0.8434 | \n",
" 0.8338 | \n",
" 0.8379 | \n",
" 0.6431 | \n",
" 0.6443 | \n",
"
\n",
" \n",
" SD | \n",
" 0.0248 | \n",
" 0.0186 | \n",
" 0.0291 | \n",
" 0.0346 | \n",
" 0.0225 | \n",
" 0.0503 | \n",
" 0.0502 | \n",
"
\n",
"
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"et_model = create_model('et')"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "c6726376-5f56-41bf-acbd-2d7f266ebb08",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "70cdc491a6284efb9d2e1157562f2b6f",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"evaluate_model(et_model)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "7c203e50-7976-46a5-8839-272ff13e57d7",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" | Model | Accuracy | AUC | Recall | Prec. | F1 | Kappa | MCC |
\n",
" \n",
" 0 | \n",
" Extra Trees Classifier | \n",
" 0.7875 | \n",
" 0.8858 | \n",
" 0.8387 | \n",
" 0.7704 | \n",
" 0.8031 | \n",
" 0.5732 | \n",
" 0.5757 | \n",
"
\n",
"
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" fixed acidity | \n",
" volatile acidity | \n",
" citric acid | \n",
" residual sugar | \n",
" chlorides | \n",
" free sulfur dioxide | \n",
" total sulfur dioxide | \n",
" density | \n",
" pH | \n",
" sulphates | \n",
" alcohol | \n",
" quality | \n",
" Label | \n",
" Score | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1.020588 | \n",
" -0.262337 | \n",
" 0.879940 | \n",
" 0.909526 | \n",
" -0.811347 | \n",
" -1.539989 | \n",
" -1.767543 | \n",
" -0.001569 | \n",
" 0.179457 | \n",
" -0.026319 | \n",
" 1.507669 | \n",
" Good | \n",
" Good | \n",
" 0.88 | \n",
"
\n",
" \n",
" 1 | \n",
" 0.803912 | \n",
" 0.365825 | \n",
" 0.069809 | \n",
" 0.182406 | \n",
" 0.169895 | \n",
" 0.853181 | \n",
" 1.990040 | \n",
" 0.562903 | \n",
" -0.216540 | \n",
" -0.678448 | \n",
" -1.071227 | \n",
" Bad | \n",
" Bad | \n",
" 0.95 | \n",
"
\n",
" \n",
" 2 | \n",
" -0.548070 | \n",
" 1.883447 | \n",
" -0.893328 | \n",
" -0.877415 | \n",
" -0.243956 | \n",
" 0.626979 | \n",
" 0.590835 | \n",
" -0.012134 | \n",
" 0.309834 | \n",
" -0.576033 | \n",
" -1.403862 | \n",
" Bad | \n",
" Bad | \n",
" 0.79 | \n",
"
\n",
" \n",
" 3 | \n",
" 0.107601 | \n",
" -1.862275 | \n",
" 0.790650 | \n",
" -0.626787 | \n",
" -0.545447 | \n",
" -0.666601 | \n",
" -1.164981 | \n",
" -1.619909 | \n",
" -0.619242 | \n",
" -0.783892 | \n",
" 1.558527 | \n",
" Good | \n",
" Good | \n",
" 0.85 | \n",
"
\n",
" \n",
" 4 | \n",
" -0.094336 | \n",
" -0.848967 | \n",
" -0.143101 | \n",
" -0.877415 | \n",
" -0.079076 | \n",
" 0.853181 | \n",
" 0.377180 | \n",
" -0.235514 | \n",
" -0.150025 | \n",
" 0.344294 | \n",
" -1.234888 | \n",
" Bad | \n",
" Bad | \n",
" 0.62 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 475 | \n",
" -0.548070 | \n",
" -0.140215 | \n",
" 0.322042 | \n",
" -0.184964 | \n",
" -1.025574 | \n",
" 1.330936 | \n",
" 0.533354 | \n",
" 0.282229 | \n",
" -0.017589 | \n",
" 0.205814 | \n",
" -0.205689 | \n",
" Good | \n",
" Good | \n",
" 1.00 | \n",
"
\n",
" \n",
" 476 | \n",
" 1.829642 | \n",
" -0.021461 | \n",
" 1.097954 | \n",
" 0.007663 | \n",
" 0.466399 | \n",
" -1.351598 | \n",
" -1.357751 | \n",
" 1.774647 | \n",
" -0.754683 | \n",
" -2.001079 | \n",
" -0.760163 | \n",
" Bad | \n",
" Bad | \n",
" 1.00 | \n",
"
\n",
" \n",
" 477 | \n",
" -0.629806 | \n",
" -0.781026 | \n",
" 0.222889 | \n",
" -0.877415 | \n",
" -0.301560 | \n",
" 1.330936 | \n",
" 0.934478 | \n",
" 0.056549 | \n",
" 0.761128 | \n",
" -0.287161 | \n",
" -0.912956 | \n",
" Good | \n",
" Good | \n",
" 1.00 | \n",
"
\n",
" \n",
" 478 | \n",
" 0.936346 | \n",
" -1.341807 | \n",
" 0.654205 | \n",
" -1.148794 | \n",
" -1.025574 | \n",
" -1.735120 | \n",
" -1.697007 | \n",
" -1.047378 | \n",
" -1.095523 | \n",
" -0.476666 | \n",
" 1.112170 | \n",
" Good | \n",
" Good | \n",
" 0.90 | \n",
"
\n",
" \n",
" 479 | \n",
" 0.170983 | \n",
" 0.520056 | \n",
" -0.035430 | \n",
" 0.007663 | \n",
" 1.252116 | \n",
" -0.666601 | \n",
" -0.045372 | \n",
" -0.310424 | \n",
" -1.027116 | \n",
" -0.892347 | \n",
" -0.471378 | \n",
" Bad | \n",
" Bad | \n",
" 1.00 | \n",
"
\n",
" \n",
"
\n",
"
480 rows × 14 columns
\n",
"
"
],
"text/plain": [
" fixed acidity volatile acidity citric acid residual sugar chlorides \\\n",
"0 1.020588 -0.262337 0.879940 0.909526 -0.811347 \n",
"1 0.803912 0.365825 0.069809 0.182406 0.169895 \n",
"2 -0.548070 1.883447 -0.893328 -0.877415 -0.243956 \n",
"3 0.107601 -1.862275 0.790650 -0.626787 -0.545447 \n",
"4 -0.094336 -0.848967 -0.143101 -0.877415 -0.079076 \n",
".. ... ... ... ... ... \n",
"475 -0.548070 -0.140215 0.322042 -0.184964 -1.025574 \n",
"476 1.829642 -0.021461 1.097954 0.007663 0.466399 \n",
"477 -0.629806 -0.781026 0.222889 -0.877415 -0.301560 \n",
"478 0.936346 -1.341807 0.654205 -1.148794 -1.025574 \n",
"479 0.170983 0.520056 -0.035430 0.007663 1.252116 \n",
"\n",
" free sulfur dioxide total sulfur dioxide density pH sulphates \\\n",
"0 -1.539989 -1.767543 -0.001569 0.179457 -0.026319 \n",
"1 0.853181 1.990040 0.562903 -0.216540 -0.678448 \n",
"2 0.626979 0.590835 -0.012134 0.309834 -0.576033 \n",
"3 -0.666601 -1.164981 -1.619909 -0.619242 -0.783892 \n",
"4 0.853181 0.377180 -0.235514 -0.150025 0.344294 \n",
".. ... ... ... ... ... \n",
"475 1.330936 0.533354 0.282229 -0.017589 0.205814 \n",
"476 -1.351598 -1.357751 1.774647 -0.754683 -2.001079 \n",
"477 1.330936 0.934478 0.056549 0.761128 -0.287161 \n",
"478 -1.735120 -1.697007 -1.047378 -1.095523 -0.476666 \n",
"479 -0.666601 -0.045372 -0.310424 -1.027116 -0.892347 \n",
"\n",
" alcohol quality Label Score \n",
"0 1.507669 Good Good 0.88 \n",
"1 -1.071227 Bad Bad 0.95 \n",
"2 -1.403862 Bad Bad 0.79 \n",
"3 1.558527 Good Good 0.85 \n",
"4 -1.234888 Bad Bad 0.62 \n",
".. ... ... ... ... \n",
"475 -0.205689 Good Good 1.00 \n",
"476 -0.760163 Bad Bad 1.00 \n",
"477 -0.912956 Good Good 1.00 \n",
"478 1.112170 Good Good 0.90 \n",
"479 -0.471378 Bad Bad 1.00 \n",
"\n",
"[480 rows x 14 columns]"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"predict_model(et_model)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "97300de8-21d0-45d2-8f6e-9623703d5605",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Transformation Pipeline and Model Successfully Saved\n"
]
},
{
"data": {
"text/plain": [
"(Pipeline(memory=None,\n",
" steps=[('dtypes',\n",
" DataTypes_Auto_infer(categorical_features=[],\n",
" display_types=True, features_todrop=[],\n",
" id_columns=[],\n",
" ml_usecase='classification',\n",
" numerical_features=[], target='quality',\n",
" time_features=[])),\n",
" ('imputer',\n",
" Simple_Imputer(categorical_strategy='not_available',\n",
" fill_value_categorical=None,\n",
" fill_value_numerical=None,\n",
" numeric_stra...\n",
" ExtraTreesClassifier(bootstrap=False, ccp_alpha=0.0,\n",
" class_weight=None, criterion='gini',\n",
" max_depth=None, max_features='auto',\n",
" max_leaf_nodes=None, max_samples=None,\n",
" min_impurity_decrease=0.0,\n",
" min_impurity_split=None,\n",
" min_samples_leaf=1, min_samples_split=2,\n",
" min_weight_fraction_leaf=0.0,\n",
" n_estimators=100, n_jobs=-1,\n",
" oob_score=False, random_state=123,\n",
" verbose=0, warm_start=False)]],\n",
" verbose=False),\n",
" 'extra_tree_model.pkl')"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"save_model(et_model, model_name = 'extra_tree_model')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "run pycaret",
"language": "python",
"name": "run_pycaret"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}