{ "cells": [ { "cell_type": "markdown", "metadata": { "tags": [] }, "source": [ "# Lectures 5 and 6: Class demo" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Imports, Announcements, LOs" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "skip" } }, "source": [ "### Imports" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "slideshow": { "slide_type": "skip" } }, "outputs": [], "source": [ "# import the libraries\n", "import os\n", "import sys\n", "sys.path.append(os.path.join(os.path.abspath(\"../\"), \"code\"))\n", "from plotting_functions import *\n", "from utils import *\n", "\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", "\n", "%matplotlib inline\n", "\n", "pd.set_option(\"display.max_colwidth\", 200)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Do you recall [the restaurants survey](https://ubc.ca1.qualtrics.com/jfe/form/SV_73VuZiuwM1eDVrw) you completed at the start of the course?\n", "\n", "Let's use that data for this demo. You'll find a [wrangled version](https://github.com/UBC-CS/cpsc330-2023W1/blob/main/lectures/data/cleaned_restaurant_data.csv) in the course repository." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv('../data/cleaned_restaurant_data.csv')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | north_america | \n", "eat_out_freq | \n", "age | \n", "n_people | \n", "price | \n", "food_type | \n", "noise_level | \n", "good_server | \n", "comments | \n", "restaurant_name | \n", "target | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "Yes | \n", "3.0 | \n", "29 | \n", "10.0 | \n", "120.0 | \n", "Italian | \n", "medium | \n", "Yes | \n", "Ambience | \n", "NaN | \n", "dislike | \n", "
1 | \n", "Yes | \n", "2.0 | \n", "23 | \n", "3.0 | \n", "20.0 | \n", "Canadian/American | \n", "no music | \n", "No | \n", "food tastes bad | \n", "NaN | \n", "dislike | \n", "
2 | \n", "Yes | \n", "2.0 | \n", "21 | \n", "20.0 | \n", "15.0 | \n", "Chinese | \n", "medium | \n", "Yes | \n", "bad food | \n", "NaN | \n", "dislike | \n", "
3 | \n", "No | \n", "2.0 | \n", "24 | \n", "14.0 | \n", "18.0 | \n", "Other | \n", "medium | \n", "No | \n", "Overall vibe on the restaurant | \n", "NaN | \n", "dislike | \n", "
4 | \n", "Yes | \n", "5.0 | \n", "23 | \n", "30.0 | \n", "20.0 | \n", "Chinese | \n", "medium | \n", "Yes | \n", "A bad day | \n", "NaN | \n", "dislike | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
959 | \n", "No | \n", "10.0 | \n", "22 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "like | \n", "
960 | \n", "Yes | \n", "1.0 | \n", "20 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "like | \n", "
961 | \n", "No | \n", "1.0 | \n", "22 | \n", "40.0 | \n", "50.0 | \n", "Chinese | \n", "medium | \n", "Yes | \n", "The self service sauce table is very clean and the sauces were always filled up. | \n", "Haidilao | \n", "like | \n", "
962 | \n", "Yes | \n", "3.0 | \n", "21 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "like | \n", "
963 | \n", "Yes | \n", "3.0 | \n", "27 | \n", "20.0 | \n", "22.0 | \n", "Other | \n", "medium | \n", "Yes | \n", "Lots of meat that was very soft and tasty. Hearty and amazing broth. Good noodle thickness and consistency | \n", "Uno Beef Noodle | \n", "like | \n", "
964 rows × 11 columns
\n", "\n", " | eat_out_freq | \n", "age | \n", "n_people | \n", "price | \n", "
---|---|---|---|---|
count | \n", "964.000000 | \n", "964.000000 | \n", "6.960000e+02 | \n", "696.000000 | \n", "
mean | \n", "2.585187 | \n", "23.975104 | \n", "1.439254e+04 | \n", "1472.179152 | \n", "
std | \n", "2.246486 | \n", "4.556716 | \n", "3.790481e+05 | \n", "37903.575636 | \n", "
min | \n", "0.000000 | \n", "10.000000 | \n", "-2.000000e+00 | \n", "0.000000 | \n", "
25% | \n", "1.000000 | \n", "21.000000 | \n", "1.000000e+01 | \n", "18.000000 | \n", "
50% | \n", "2.000000 | \n", "22.000000 | \n", "2.000000e+01 | \n", "25.000000 | \n", "
75% | \n", "3.000000 | \n", "26.000000 | \n", "3.000000e+01 | \n", "40.000000 | \n", "
max | \n", "15.000000 | \n", "46.000000 | \n", "1.000000e+07 | \n", "1000000.000000 | \n", "
\n", " | eat_out_freq | \n", "age | \n", "n_people | \n", "price | \n", "
---|---|---|---|---|
count | \n", "942.000000 | \n", "942.000000 | \n", "674.000000 | \n", "674.000000 | \n", "
mean | \n", "2.598057 | \n", "23.992569 | \n", "24.973294 | \n", "34.023279 | \n", "
std | \n", "2.257787 | \n", "4.582570 | \n", "22.016660 | \n", "29.018622 | \n", "
min | \n", "0.000000 | \n", "10.000000 | \n", "1.000000 | \n", "0.000000 | \n", "
25% | \n", "1.000000 | \n", "21.000000 | \n", "10.000000 | \n", "18.000000 | \n", "
50% | \n", "2.000000 | \n", "22.000000 | \n", "20.000000 | \n", "25.000000 | \n", "
75% | \n", "3.000000 | \n", "26.000000 | \n", "30.000000 | \n", "40.000000 | \n", "
max | \n", "15.000000 | \n", "46.000000 | \n", "200.000000 | \n", "200.000000 | \n", "
\n", " | fit_time | \n", "score_time | \n", "test_score | \n", "train_score | \n", "
---|---|---|---|---|
0 | \n", "0.000870 | \n", "0.000942 | \n", "0.516556 | \n", "0.514950 | \n", "
1 | \n", "0.000575 | \n", "0.000392 | \n", "0.516556 | \n", "0.514950 | \n", "
2 | \n", "0.000965 | \n", "0.000485 | \n", "0.516556 | \n", "0.514950 | \n", "
3 | \n", "0.000537 | \n", "0.000366 | \n", "0.513333 | \n", "0.515755 | \n", "
4 | \n", "0.000498 | \n", "0.000356 | \n", "0.513333 | \n", "0.515755 | \n", "
\n", " | north_america | \n", "eat_out_freq | \n", "age | \n", "n_people | \n", "price | \n", "food_type | \n", "noise_level | \n", "good_server | \n", "comments | \n", "restaurant_name | \n", "
---|---|---|---|---|---|---|---|---|---|---|
80 | \n", "No | \n", "2.0 | \n", "21 | \n", "30.0 | \n", "2200.0 | \n", "Chinese | \n", "high | \n", "No | \n", "The environment was very not clean. The food tasted awful. | \n", "NaN | \n", "
934 | \n", "Yes | \n", "4.0 | \n", "21 | \n", "30.0 | \n", "3000.0 | \n", "Canadian/American | \n", "low | \n", "Yes | \n", "The building and the room gave a very comfy feeling. Immediately after sitting down it felt like we were right at home. | \n", "NaN | \n", "
911 | \n", "No | \n", "4.0 | \n", "20 | \n", "40.0 | \n", "2500.0 | \n", "Canadian/American | \n", "medium | \n", "Yes | \n", "I was hungry | \n", "Chambar | \n", "
459 | \n", "Yes | \n", "5.0 | \n", "21 | \n", "NaN | \n", "NaN | \n", "Quebecois | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
62 | \n", "Yes | \n", "2.0 | \n", "24 | \n", "20.0 | \n", "3000.0 | \n", "Indian | \n", "high | \n", "Yes | \n", "bad taste | \n", "east is east | \n", "
KNeighborsClassifier()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
KNeighborsClassifier()
\n", " | age | \n", "n_people | \n", "price | \n", "
---|---|---|---|
80 | \n", "21 | \n", "30.0 | \n", "2200.0 | \n", "
934 | \n", "21 | \n", "30.0 | \n", "3000.0 | \n", "
911 | \n", "20 | \n", "40.0 | \n", "2500.0 | \n", "
459 | \n", "21 | \n", "NaN | \n", "NaN | \n", "
62 | \n", "24 | \n", "20.0 | \n", "3000.0 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "
106 | \n", "27 | \n", "10.0 | \n", "1500.0 | \n", "
333 | \n", "24 | \n", "12.0 | \n", "800.0 | \n", "
393 | \n", "20 | \n", "5.0 | \n", "1500.0 | \n", "
376 | \n", "20 | \n", "NaN | \n", "NaN | \n", "
525 | \n", "20 | \n", "50.0 | \n", "3000.0 | \n", "
753 rows × 3 columns
\n", "\n", " | north_america | \n", "food_type | \n", "
---|---|---|
80 | \n", "No | \n", "Chinese | \n", "
934 | \n", "Yes | \n", "Canadian/American | \n", "
911 | \n", "No | \n", "Canadian/American | \n", "
459 | \n", "Yes | \n", "Quebecois | \n", "
62 | \n", "Yes | \n", "Indian | \n", "
... | \n", "... | \n", "... | \n", "
106 | \n", "No | \n", "Chinese | \n", "
333 | \n", "No | \n", "Other | \n", "
393 | \n", "Yes | \n", "Canadian/American | \n", "
376 | \n", "Yes | \n", "NaN | \n", "
525 | \n", "Don't want to share | \n", "Chinese | \n", "
753 rows × 2 columns
\n", "\n", " | north_america_Don't want to share | \n", "north_america_No | \n", "north_america_Yes | \n", "food_type_Canadian/American | \n", "food_type_Chinese | \n", "food_type_Fusion | \n", "food_type_Indian | \n", "food_type_Italian | \n", "food_type_Mexican | \n", "food_type_Other | \n", "food_type_Quebecois | \n", "food_type_Thai | \n", "food_type_nan | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
1 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
2 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
3 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "
4 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
748 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
749 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
750 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
751 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "
752 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
753 rows × 13 columns
\n", "ColumnTransformer(transformers=[('pipeline-1',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('standardscaler',\n", " StandardScaler())]),\n", " ['age', 'n_people', 'price']),\n", " ('pipeline-2',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('onehotencoder',\n", " OneHotEncoder(drop='if_binary'))]),\n", " ['good_server']),\n", " ('pipeline-3',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('onehotencoder',\n", " OneHotEncoder(handle_unknown='ignore',\n", " sparse_output=False))]),\n", " ['north_america', 'food_type'])])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
ColumnTransformer(transformers=[('pipeline-1',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('standardscaler',\n", " StandardScaler())]),\n", " ['age', 'n_people', 'price']),\n", " ('pipeline-2',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('onehotencoder',\n", " OneHotEncoder(drop='if_binary'))]),\n", " ['good_server']),\n", " ('pipeline-3',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('onehotencoder',\n", " OneHotEncoder(handle_unknown='ignore',\n", " sparse_output=False))]),\n", " ['north_america', 'food_type'])])
['age', 'n_people', 'price']
SimpleImputer(strategy='median')
StandardScaler()
['good_server']
SimpleImputer(strategy='most_frequent')
OneHotEncoder(drop='if_binary')
['north_america', 'food_type']
SimpleImputer(strategy='most_frequent')
OneHotEncoder(handle_unknown='ignore', sparse_output=False)
\n", " | north_america | \n", "eat_out_freq | \n", "age | \n", "n_people | \n", "price | \n", "food_type | \n", "noise_level | \n", "good_server | \n", "comments | \n", "restaurant_name | \n", "
---|---|---|---|---|---|---|---|---|---|---|
80 | \n", "No | \n", "2.0 | \n", "21 | \n", "30.0 | \n", "2200.0 | \n", "Chinese | \n", "high | \n", "No | \n", "The environment was very not clean. The food tasted awful. | \n", "NaN | \n", "
934 | \n", "Yes | \n", "4.0 | \n", "21 | \n", "30.0 | \n", "3000.0 | \n", "Canadian/American | \n", "low | \n", "Yes | \n", "The building and the room gave a very comfy feeling. Immediately after sitting down it felt like we were right at home. | \n", "NaN | \n", "
911 | \n", "No | \n", "4.0 | \n", "20 | \n", "40.0 | \n", "2500.0 | \n", "Canadian/American | \n", "medium | \n", "Yes | \n", "I was hungry | \n", "Chambar | \n", "
459 | \n", "Yes | \n", "5.0 | \n", "21 | \n", "NaN | \n", "NaN | \n", "Quebecois | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
62 | \n", "Yes | \n", "2.0 | \n", "24 | \n", "20.0 | \n", "3000.0 | \n", "Indian | \n", "high | \n", "Yes | \n", "bad taste | \n", "east is east | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
106 | \n", "No | \n", "3.0 | \n", "27 | \n", "10.0 | \n", "1500.0 | \n", "Chinese | \n", "medium | \n", "Yes | \n", "Food wasn't great. | \n", "NaN | \n", "
333 | \n", "No | \n", "1.0 | \n", "24 | \n", "12.0 | \n", "800.0 | \n", "Other | \n", "medium | \n", "Yes | \n", "NaN | \n", "NaN | \n", "
393 | \n", "Yes | \n", "4.0 | \n", "20 | \n", "5.0 | \n", "1500.0 | \n", "Canadian/American | \n", "low | \n", "No | \n", "NaN | \n", "NaN | \n", "
376 | \n", "Yes | \n", "5.0 | \n", "20 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
525 | \n", "Don't want to share | \n", "4.0 | \n", "20 | \n", "50.0 | \n", "3000.0 | \n", "Chinese | \n", "high | \n", "Yes | \n", "NaN | \n", "Haidilao | \n", "
753 rows × 10 columns
\n", "