Lecture 15: Class demo#

Let’s cluster images!!

For this demo, I’m going to use the following image dataset:

  1. A tiny subset of Food-101 from last lecture (available here).

  2. A small subset of Human Faces dataset (available here).

To run the code below, you need to install pytorch and torchvision in the course conda environment.

conda install pytorch torchvision -c pytorch

import os, sys
sys.path.append(os.path.join(os.path.abspath(".."), "code"))
from plotting_functions_unsup import *
../../_images/434e2bf7b636c3336193854f95f871a8864ef2dcf68ff25548a1a06873bad55f.png
import random
import numpy as np
import pandas as pd
import torch
from torchvision import datasets, models, transforms, utils
from PIL import Image
from torchvision import transforms
from torchvision.models import vgg16
import matplotlib.pyplot as plt
import torchvision

Let’s start with small subset of birds dataset. You can experiment with a bigger dataset if you like.

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
def set_seed(seed=42):
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
set_seed(seed=42)
import glob
IMAGE_SIZE = 200
def read_img_dataset(data_dir):     
    data_transforms = transforms.Compose(
        [
            transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),     
            transforms.ToTensor(),
            transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]),            
        ])
               
    image_dataset = datasets.ImageFolder(root=data_dir, transform=data_transforms)
    dataloader = torch.utils.data.DataLoader(
         image_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0
    )
    dataset_size = len(image_dataset)
    class_names = image_dataset.classes
    inputs, classes = next(iter(dataloader))
    return inputs, classes
def plot_sample_imgs(inputs):
    plt.figure(figsize=(10, 70)); plt.axis("off"); plt.title("Sample Training Images")
    plt.imshow(np.transpose(utils.make_grid(inputs, padding=1, normalize=True),(1, 2, 0)));
def get_features(model, inputs):
    """Extract output of densenet model"""
    with torch.no_grad():  # turn off computational graph stuff
        Z_train = torch.empty((0, 1024))  # Initialize empty tensors
        y_train = torch.empty((0))
        Z_train = torch.cat((Z_train, model(inputs)), dim=0)
    return Z_train.detach()
densenet = models.densenet121(weights="DenseNet121_Weights.IMAGENET1K_V1")
densenet.classifier = torch.nn.Identity()  # remove that last "classification" layer
data_dir = "../data/food"
file_names = [image_file for image_file in glob.glob(data_dir + "/*/*.jpg")]
n_images = len(file_names)
BATCH_SIZE = n_images  # because our dataset is quite small
food_inputs, food_classes = read_img_dataset(data_dir)
n_images
350
X_food = food_inputs.numpy()
plot_sample_imgs(food_inputs[0:24,:,:,:])
../../_images/5133ea5fdd6c3d90c1086108401e98dd7f66f9155009e8104fc3bbb7914523ff.png
Z_food = get_features(
    densenet, food_inputs, 
)
Z_food = Z_food.numpy()
Z_food.shape
(350, 1024)
from sklearn.cluster import KMeans

k = 7
km = KMeans(n_clusters=k, n_init='auto', random_state=123)
km.fit(Z_food)
KMeans(n_clusters=7, n_init='auto', random_state=123)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
for cluster in range(k):
    get_cluster_images(km, Z_food, X_food, cluster, n_img=6)
Image indices:  [ 82  58 168 114 122  44]
../../_images/bd501a3548364d21be702fd34f0f431843af463b172a5153c4811cc185e70375.png
Image indices:  [ 89 209  67 330 141 166]
../../_images/a7503faf0061c025e328b5d41bbc61a2b01a4ee4cf852356f49f2f4cc693857e.png
Image indices:  [249 327 331  30 236 267]
../../_images/ae884218e37909c6b5cf739b6909c2f219eca83fac1ad92c083569fbae9bb928.png
Image indices:  [ 76  90  29 240 237 238]
../../_images/796ab95b08246f7e5ee0edb1d8fae17f1aca75f90c471c1174277979764cb6cd.png
Image indices:  [223 136 124 304  13 137]
../../_images/0ce78247c23ddc7d09292f8de9c24dfea8852c5e935aa2d8259037ea7eec5c98.png
Image indices:  [190 132  73 140   6 149]
../../_images/74e65d60f9612e729830e6d24e7ca2d6097d0197316b66dfd258d0f25c282c83.png
Image indices:  [ 65 238 237 236 235 234]
../../_images/0ea2c1bb56b2f086eea340f31c58d79a80bb800fb8ba65185bec266e982508df.png

Let’s try DBSCAN.

dbscan = DBSCAN()

labels = dbscan.fit_predict(Z_food)
print("Unique labels: {}".format(np.unique(labels)))
Unique labels: [-1]

It identified all points as noise points. Let’s explore the distances between points.

from sklearn.metrics.pairwise import euclidean_distances

dists = euclidean_distances(Z_food)
np.fill_diagonal(dists, np.inf)
dists_df = pd.DataFrame(dists)
dists_df
0 1 2 3 4 5 6 7 8 9 ... 340 341 342 343 344 345 346 347 348 349
0 inf 27.838209 24.272423 28.557386 25.911512 26.729088 28.535007 27.919943 29.910038 24.831701 ... 25.821581 27.069433 27.836058 27.045502 28.334972 28.312830 30.730888 26.483530 26.518341 27.012949
1 27.838209 inf 25.690664 26.808674 28.380119 26.499969 25.269186 27.080601 24.174231 26.724173 ... 25.517481 22.425566 24.153849 26.348946 28.451920 27.344963 26.383554 24.948429 23.117655 25.012907
2 24.272423 25.690664 inf 26.241850 26.272598 24.983030 25.745886 26.138418 27.259501 23.387449 ... 24.976543 24.755798 26.489601 26.757547 23.980059 25.965456 29.195887 25.397800 23.607014 23.029932
3 28.557386 26.808674 26.241850 inf 28.488819 26.426289 28.817778 29.362293 27.543203 22.214371 ... 24.433807 26.525894 26.772890 28.805965 26.740297 24.505461 29.599627 27.887571 26.735687 26.722355
4 25.911512 28.380119 26.272598 28.488819 inf 25.054052 28.763035 28.249685 29.294979 24.665562 ... 25.088818 26.409763 27.046513 26.512915 28.122980 26.731318 29.328199 26.524778 26.566645 26.233059
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
345 28.312830 27.344963 25.965456 24.505461 26.731318 27.732960 28.961937 28.021688 28.355204 24.388792 ... 23.226387 25.771818 26.816877 29.633665 27.816105 inf 29.698130 26.911955 25.400141 26.690134
346 30.730888 26.383554 29.195887 29.599627 29.328199 27.754042 30.597506 31.449287 29.049765 26.228279 ... 26.203318 26.634016 25.843769 28.824900 31.539536 29.698130 inf 28.891027 29.559868 28.736403
347 26.483530 24.948429 25.397800 27.887571 26.524778 25.069878 26.720396 27.640726 28.087231 24.709068 ... 24.990519 25.950739 26.422796 23.982191 27.307524 26.911955 28.891027 inf 24.929638 22.822123
348 26.518341 23.117655 23.607014 26.735687 26.566645 23.524864 23.816399 25.144579 25.315739 25.252844 ... 25.219345 23.628778 24.586554 24.797318 25.021420 25.400141 29.559868 24.929638 inf 24.156252
349 27.012949 25.012907 23.029932 26.722355 26.233059 25.000036 24.752766 26.942122 27.648315 25.040714 ... 24.330954 25.934887 26.133183 24.376286 26.258141 26.690134 28.736403 22.822123 24.156252 inf

350 rows × 350 columns

dists.min(), np.nanmax(dists[dists != np.inf]), np.mean(dists[dists != np.inf])
(14.918853, 39.791836, 27.870182)
for eps in range(16, 30):
    print("\neps={}".format(eps))
    dbscan = DBSCAN(eps=eps, min_samples=3)
    labels = dbscan.fit_predict(Z_food)
    print("Number of clusters: {}".format(len(np.unique(labels))))
    print("Cluster sizes: {}".format(np.bincount(labels + 1)))
eps=16
Number of clusters: 1
Cluster sizes: [350]

eps=17
Number of clusters: 3
Cluster sizes: [341   5   4]

eps=18
Number of clusters: 3
Cluster sizes: [334  13   3]

eps=19
Number of clusters: 3
Cluster sizes: [311  35   4]

eps=20
Number of clusters: 4
Cluster sizes: [274  70   3   3]

eps=21
Number of clusters: 2
Cluster sizes: [234 116]

eps=22
Number of clusters: 2
Cluster sizes: [182 168]

eps=23
Number of clusters: 2
Cluster sizes: [132 218]

eps=24
Number of clusters: 2
Cluster sizes: [ 81 269]

eps=25
Number of clusters: 2
Cluster sizes: [ 52 298]

eps=26
Number of clusters: 2
Cluster sizes: [ 30 320]

eps=27
Number of clusters: 2
Cluster sizes: [ 17 333]

eps=28
Number of clusters: 2
Cluster sizes: [  5 345]

eps=29
Number of clusters: 2
Cluster sizes: [  2 348]
dbscan = DBSCAN(eps=18, min_samples=3)
dbscan_labels = dbscan.fit_predict(Z_food)
print("Number of clusters: {}".format(len(np.unique(dbscan_labels))))
print("Cluster sizes: {}".format(np.bincount(dbscan_labels + 1)))
print("Unique labels: {}".format(np.unique(dbscan_labels)))
Number of clusters: 3
Cluster sizes: [334  13   3]
Unique labels: [-1  0  1]
print_dbscan_clusters(Z_food, food_inputs, dbscan_labels)
../../_images/b71fd793a64a9dd58cc3eb000fa3ef6e1415548c67633652fcf40d974d10d2c6.png ../../_images/c15a651525ebcbd35a32b7bf6c0d45eaa087e0e4a3226ac989285443190ab6f9.png

Let’s examine noise points identified by DBSCAN.

print_dbscan_noise_images(Z_food, food_inputs, dbscan_labels)
../../_images/bf0a6340811131309171ed3b6a92811d26748f5c844982d32f19463e487622a2.png



Now let’s try another dataset with human faces, a small subset of Human Faces dataset (available here).

data_dir = "../data/test"
file_names = [image_file for image_file in glob.glob(data_dir + "/*/*.jpg")]
n_images = len(file_names)
BATCH_SIZE = n_images  # because our dataset is quite small
faces_inputs, classes = read_img_dataset(data_dir)
X_faces = faces_inputs.numpy()
X_faces.shape
(367, 3, 200, 200)
plot_sample_imgs(faces_inputs[0:24,:,:,:])
../../_images/ee943527414a3f403d6ae94e3b2ec018422151c4c7f167feea54dfacfcc4a603.png
Z_faces = get_features(
    densenet, faces_inputs, 
).numpy()
Z_faces.shape
(367, 1024)
from sklearn.cluster import KMeans

k = 7
km = KMeans(n_clusters=k, n_init='auto', random_state=123)
km.fit(Z_faces)
KMeans(n_clusters=7, n_init='auto', random_state=123)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
km.cluster_centers_.shape
(7, 1024)
for cluster in range(k):
    get_cluster_images(km, Z_faces, X_faces, cluster, n_img=6)
Image indices:  [136 175 355 285  56 346]
../../_images/6a49d552ac4a3a0ccf52340cd0ae0fb9d8517f4393f8fc0f1d2f8c80ac38dde2.png
Image indices:  [117 320 208 216 186  59]
../../_images/a588038a1e3464088e642d7e4a24f2d52a1cad4b5ba19df84cfbb5fc781fd888.png
Image indices:  [103 353 359 295  68  60]
../../_images/f54f92f1e84a1d1d96eb8ee2f8ff4bd4b6f7f12509f1f679c9be160421ca9584.png
Image indices:  [ 39 357 332 256 218   1]
../../_images/36cf493895ecadbe3e346a7b00defbbd0c37cd984ffac81008441a371638c802.png
Image indices:  [317 168 267   0 248 247]
../../_images/5355f3362d5b64b6c07c7bf555ac160e4d0885d3a223abd99b9f02f7290b8a01.png
Image indices:  [290 124 271  83 139 146]
../../_images/bdc4858e5aae1f6f1555ce3d9a1ba3e8c436176090320250ccd62afc5378ba7f.png
Image indices:  [312 260 329 127  40 348]
../../_images/5c10eb0c7403a3a0edff88b23b4db0d6c8b9329e48bbf1296f77441fb0c4f8d1.png



Clustering faces with DBSCAN#

dbscan = DBSCAN()
labels = dbscan.fit_predict(Z_faces)
print("Unique labels: {}".format(np.unique(labels)))
Unique labels: [-1]
dists = euclidean_distances(Z_faces)
np.fill_diagonal(dists, np.inf)

dist_df = pd.DataFrame(
    dists
)

dist_df.iloc[10:20, 10:20]
10 11 12 13 14 15 16 17 18 19
10 inf 22.212444 29.023613 24.863905 22.001663 24.938940 28.098724 30.955692 27.270945 27.389570
11 22.212444 inf 27.826571 22.514793 19.561979 26.348663 26.740641 31.028826 27.770222 27.892704
12 29.023613 27.826571 inf 27.719786 28.463976 27.759411 26.995085 29.543497 29.854399 25.650290
13 24.863905 22.514793 27.719786 inf 22.240793 24.963823 25.705948 29.509878 27.242216 26.694988
14 22.001663 19.561979 28.463976 22.240793 inf 25.906570 27.872011 29.647667 28.404259 27.862476
15 24.938940 26.348663 27.759411 24.963823 25.906570 inf 27.595673 26.784702 28.887432 25.958988
16 28.098724 26.740641 26.995085 25.705948 27.872011 27.595673 inf 29.354176 28.925968 26.882242
17 30.955692 31.028826 29.543497 29.509878 29.647667 26.784702 29.354176 inf 33.242348 29.911688
18 27.270945 27.770222 29.854399 27.242216 28.404259 28.887432 28.925968 33.242348 inf 29.257753
19 27.389570 27.892704 25.650290 26.694988 27.862476 25.958988 26.882242 29.911688 29.257753 inf
dists.min(), np.nanmax(dists[dists != np.inf]), np.mean(dists[dists != np.inf])
(0.0, 36.27959, 25.94334)
for eps in [16, 17, 18, 19, 20, 21, 22, 24]:
    print("\neps={}".format(eps))
    dbscan = DBSCAN(eps=eps, min_samples=3)
    labels = dbscan.fit_predict(Z_faces)
    print("Number of clusters: {}".format(len(np.unique(labels))))
    print("Cluster sizes: {}".format(np.bincount(labels + 1)))
eps=16
Number of clusters: 8
Cluster sizes: [345   3   3   3   3   4   3   3]

eps=17
Number of clusters: 12
Cluster sizes: [325   4   3   3   3   3   6   7   4   3   3   3]

eps=18
Number of clusters: 10
Cluster sizes: [305  33   3   3   3   7   3   4   3   3]

eps=19
Number of clusters: 9
Cluster sizes: [261  84   3   3   3   4   3   3   3]

eps=20
Number of clusters: 7
Cluster sizes: [211 140   3   4   3   3   3]

eps=21
Number of clusters: 5
Cluster sizes: [159 197   3   5   3]

eps=22
Number of clusters: 4
Cluster sizes: [100 261   3   3]

eps=24
Number of clusters: 2
Cluster sizes: [ 25 342]
dbscan = DBSCAN(eps=17, min_samples=3)
dbscan_labels = dbscan.fit_predict(Z_faces)
print("Number of clusters: {}".format(len(np.unique(dbscan_labels))))
print("Cluster sizes: {}".format(np.bincount(dbscan_labels + 1)))
print("Unique labels: {}".format(np.unique(dbscan_labels)))
Number of clusters: 12
Cluster sizes: [325   4   3   3   3   3   6   7   4   3   3   3]
Unique labels: [-1  0  1  2  3  4  5  6  7  8  9 10]
print_dbscan_clusters(Z_faces, faces_inputs, dbscan_labels)
../../_images/6c78fc9cf07523896abc512a61ce85963fa97f310b3da7770e7ff7179c5c9d56.png ../../_images/b29fe0c5b77804fa633c421dcd2888236e59ad1096755aca7dd9166bd20ccd2b.png ../../_images/24ebff339a0c7c697774aea38c3a568ec9ca548b4cb432c535aa881b9393aa68.png ../../_images/b8beebee0de0bc4e71c64bc1f04843059b594b7635574f96afef2a3abbc10f6e.png ../../_images/01278c1df72c486ee59752f9566f6546a56044d13bf3ef042355c7ebdd9afa93.png ../../_images/1b3d00d0c77f184b6a8ae5b8e79c946c15f1a2c90c36c056c8fa0c265f348a87.png ../../_images/a809b5daeab5c3fece3725fc994ae88db5bf029185f2a37b9cd813eb6c3d2b0b.png ../../_images/262b1e3e7746385de97e0acb4c9a7420d06810b7e7d6e14e4af40271130c978a.png ../../_images/e1ab00572fa9feee3b2953e121fd6092fcb9532774d78cc417d8015a9c75b4c3.png ../../_images/12abc869490624f73c5884b83cbea4954e7b9b2c566bc2d288e7dc11a2712d8f.png ../../_images/68ed3110715771928b394ea8f2b20cd2d160ae948f177d62612ab6a9b26a65cf.png

Let’s examine noise images identified by DBSCAN.

print_dbscan_noise_images(Z_faces, faces_inputs, dbscan_labels)
../../_images/9e24dbbfa28c2c52a2f43b5056e2801912746e0e2d27810a28e55801e736d9d8.png
  • We can guess why these images are noise images. There are odd angles, cropping, sun glasses, hands near faces etc.

Hierarchical clustering#

set_seed(seed=42)
plt.figure(figsize=(20, 15))
Z_hrch = ward(Z_faces)
dendrogram(Z_hrch, p=7, truncate_mode="level", no_labels=True)
plt.xlabel("Sample index")
plt.ylabel("Cluster distance");
../../_images/471e00e912003fe3c19e4b5585c5d106a070e88e4fc682caa11f67d54dd7ad9a.png
cluster_labels = fcluster(Z_hrch, 30, criterion="maxclust")  # let's get flat clusters
#hand_picked_clusters = np.arange(2, 30)
hand_picked_clusters = [2, 3, 5, 6,7, 8, 9, 10, 12, 14,15,16,17,19,20, 21,22, 24, 26, 27, 28]
print_hierarchical_clusters(
    faces_inputs, Z_faces, cluster_labels, hand_picked_clusters
)
/Users/kvarada/CS/2023-24/330/cpsc330-2023W1/lectures/code/plotting_functions_unsup.py:1490: RuntimeWarning:

More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`). Consider using `matplotlib.pyplot.close()`.
../../_images/9c160a9e4b1834715e1e64fd1991e27a59776c04b88a1e947cac8766abbc512b.png ../../_images/27791abec213384ea4f08a06c65832dc1da18436414025be86fafb952b433aa1.png ../../_images/2fc8a29f122483ec0328d000350609e1a0ab557e72a01e3e9734824db6619c4f.png ../../_images/8fca48910b5ba3a7db872ab0fbd6facdee5977eafd9ea6f81a3a92b1e484680e.png ../../_images/51c1d8d3cba69d8f0cdc36ff948866dc6e7584bf20af82e0040a6e47417b904a.png ../../_images/daf99497c655e86d693931bdbe05e5fd27724e2a2a56b7f8a564037a3d7a29ac.png ../../_images/41026cefd545f963c176f23996e5bab1b9c0114163641af8ad0da61681d94a7a.png ../../_images/2ec5d4dec4afc00a3f46e44d42cead00f7eebaf13d92d97edc8016ca7f0e952e.png ../../_images/252cebb20b215d2c683545aee6ac4c4ed2e0a7616970e7907f1d782af3081073.png ../../_images/db98203da0610b73b5757d1f01d341232c5772241bc56160eb4cafc3095c5d4e.png ../../_images/717be67c1f2f30405fc9125a77230769dac3a98737990d1c5ace7180ded44396.png ../../_images/ea8865f5212a5481d380864c7d2a7f0f405a0c434fa53d18e2a25b6fdb3e5060.png ../../_images/6fc0544c2133a197bc4f739d5e74f4a6ff05c7cf7ba954f86a13935934780a0c.png ../../_images/f041d77e0705ecf95555c3fb0e19e5e573e861af709e6c49590317dcd2a41b3b.png ../../_images/ddd0288aefe340c66aa681d5283338028251c9488285be5bf0c7d753285e68d5.png ../../_images/de96b9f0a7bd822e6795e561acf30ad9c2f761d5cd370612238f312327bef9d5.png ../../_images/2157c47c35b948777762371cbc505c05ca5096700759c2f416ec8bf9c52f180e.png ../../_images/d13526a7ab751c2401b724451613a3efa95aead0429f8a262bc1e7f3b324d7e8.png ../../_images/9b800ef0e0655c1b8737cf1c58c287d8930ee4a9710832726a4df8346d87ac5c.png ../../_images/e574af00ac4d73e78c7838f4e2c1c89e33153c0ef317f8a94ac6d932367c904e.png ../../_images/483c11895c18caa1b9bbe58b7c14401ef0e89c253533a6c308bf9ddcc044d172.png
  • Some clusters correspond to people with distinct faces, age, facial expressions, hair colour and hair style, lighting and skin tone.