#!/usr/bin/env python
# coding: utf-8
# This was input cell with execution count: 1
import os
import numpy as np
from helper_functions import *
# Please don't modify this cell
# This was input cell with execution count:
def get_initial_means(array, k):
"""
Picks k random points from the 2D array
(without replacement) to use as initial
cluster means
params:
array = numpy.ndarray[numpy.ndarray[float]] - m x n | datapoints x features
k = int
returns:
initial_means = numpy.ndarray[numpy.ndarray[float]]
"""
newArr = array.copy()
np.random.shuffle(newArr) # First shuffle up the array (so we don't need to
worry about random selection)
return newArr[:k] # Select k values to return
# This was input cell with execution count:
def dist(a, b, axis=1):
"""
Euclidean Distance metric
"""
return np.linalg.norm(a - b, axis=axis)
def k_means_step(image_values, k, means):
"""
A single update/step of the K-means algorithm
Based on a input image_values and current mean estimate
calculate new means and predict clusters for each of the pixel
params:
image_values = numpy.ndarray[numpy.ndarray[numpy.ndarray[float]]]
k = int
initial_means = numpy.ndarray[numpy.ndarray[float]]
returns:
new_means = numpy.ndarray[numpy.ndarray[float]]
clusters = numpy.ndarray[numpy.ndarray[int]]
"""
new_means = np.ndarray(shape=(k, len(image_values[0])), dtype=float)
clusters = np.zeros(shape=len(image_values), dtype=int)
meanPts = []
for i in range(k):
meanPts.append(
[]) # For meanPts, we group all points by their corresponding
cluster so we can calculate the new means
eucDist = np.zeros(shape=(len(image_values), k), dtype=float) # This is for
distance from each mean
, # Iterate over columns to handle the calculations
for i in range(len(means)):
diff = np.subtract(image_values, means[i]) # Take the difference of
every point with one of the means
squared = np.square(diff) # Square all these differences
sums = np.sum(squared, axis=1) # Then take the sum
eucDist[:, i] = np.sqrt(sums) # Now store these euclidean distances so
we can assign to clusters
# So we need to use this to assign cluster values and group the points by
their cluster assignment
clusters = np.argmin(eucDist, axis=1)
for i in range(k):
meanPts[i] = image_values[np.where(clusters == i)]
# Now we calculate the new means
for i in range(len(meanPts)):
points = np.asarray(meanPts[i])
avgs = np.average(points, axis=0)
new_means[i] = avgs ## updating means
return new_means, clusters
# This was input cell with execution count:
def k_means_segment(image_values, k=3, initial_means=None):
"""
Separate the provided RGB values into
k separate clusters using the k-means algorithm,
then return an updated version of the image
with the original values replaced with
the corresponding cluster values.
params:
image_values = numpy.ndarray[numpy.ndarray[numpy.ndarray[float]]] - r x c x
ch
k = int
initial_means = numpy.ndarray[numpy.ndarray[float]] or None
returns:
updated_image_values = numpy.ndarray[numpy.ndarray[numpy.ndarray[float]]]
"""
firstIter = True # To make sure the loop executes at least once
loops = 0
updatedValues = image_values.copy()
if (initial_means is None):
startMeans = None
else:
startMeans = initial_means.copy()
prevClusters = np.zeros(shape=len(image_values) * len(image_values[0]),
dtype=int) # To hold the previous cluster
assignment
currClusters = np.zeros(shape=len(image_values) * len(image_values[0]),
dtype=int) # To hold the current cluster assignment
# To create a matrix that k_means_step can handle
dataPoints = np.reshape(image_values, (len(image_values) *
len(image_values[0]), len(image_values[0][0])))
# Create some initial means if needed
if (startMeans is None):
startMeans = get_initial_means(dataPoints, k)
# Loop through until convergence
# coding: utf-8
# This was input cell with execution count: 1
import os
import numpy as np
from helper_functions import *
# Please don't modify this cell
# This was input cell with execution count:
def get_initial_means(array, k):
"""
Picks k random points from the 2D array
(without replacement) to use as initial
cluster means
params:
array = numpy.ndarray[numpy.ndarray[float]] - m x n | datapoints x features
k = int
returns:
initial_means = numpy.ndarray[numpy.ndarray[float]]
"""
newArr = array.copy()
np.random.shuffle(newArr) # First shuffle up the array (so we don't need to
worry about random selection)
return newArr[:k] # Select k values to return
# This was input cell with execution count:
def dist(a, b, axis=1):
"""
Euclidean Distance metric
"""
return np.linalg.norm(a - b, axis=axis)
def k_means_step(image_values, k, means):
"""
A single update/step of the K-means algorithm
Based on a input image_values and current mean estimate
calculate new means and predict clusters for each of the pixel
params:
image_values = numpy.ndarray[numpy.ndarray[numpy.ndarray[float]]]
k = int
initial_means = numpy.ndarray[numpy.ndarray[float]]
returns:
new_means = numpy.ndarray[numpy.ndarray[float]]
clusters = numpy.ndarray[numpy.ndarray[int]]
"""
new_means = np.ndarray(shape=(k, len(image_values[0])), dtype=float)
clusters = np.zeros(shape=len(image_values), dtype=int)
meanPts = []
for i in range(k):
meanPts.append(
[]) # For meanPts, we group all points by their corresponding
cluster so we can calculate the new means
eucDist = np.zeros(shape=(len(image_values), k), dtype=float) # This is for
distance from each mean
, # Iterate over columns to handle the calculations
for i in range(len(means)):
diff = np.subtract(image_values, means[i]) # Take the difference of
every point with one of the means
squared = np.square(diff) # Square all these differences
sums = np.sum(squared, axis=1) # Then take the sum
eucDist[:, i] = np.sqrt(sums) # Now store these euclidean distances so
we can assign to clusters
# So we need to use this to assign cluster values and group the points by
their cluster assignment
clusters = np.argmin(eucDist, axis=1)
for i in range(k):
meanPts[i] = image_values[np.where(clusters == i)]
# Now we calculate the new means
for i in range(len(meanPts)):
points = np.asarray(meanPts[i])
avgs = np.average(points, axis=0)
new_means[i] = avgs ## updating means
return new_means, clusters
# This was input cell with execution count:
def k_means_segment(image_values, k=3, initial_means=None):
"""
Separate the provided RGB values into
k separate clusters using the k-means algorithm,
then return an updated version of the image
with the original values replaced with
the corresponding cluster values.
params:
image_values = numpy.ndarray[numpy.ndarray[numpy.ndarray[float]]] - r x c x
ch
k = int
initial_means = numpy.ndarray[numpy.ndarray[float]] or None
returns:
updated_image_values = numpy.ndarray[numpy.ndarray[numpy.ndarray[float]]]
"""
firstIter = True # To make sure the loop executes at least once
loops = 0
updatedValues = image_values.copy()
if (initial_means is None):
startMeans = None
else:
startMeans = initial_means.copy()
prevClusters = np.zeros(shape=len(image_values) * len(image_values[0]),
dtype=int) # To hold the previous cluster
assignment
currClusters = np.zeros(shape=len(image_values) * len(image_values[0]),
dtype=int) # To hold the current cluster assignment
# To create a matrix that k_means_step can handle
dataPoints = np.reshape(image_values, (len(image_values) *
len(image_values[0]), len(image_values[0][0])))
# Create some initial means if needed
if (startMeans is None):
startMeans = get_initial_means(dataPoints, k)
# Loop through until convergence