### THIS FILE WAS AUTOGENERATED! DO NOT EDIT! ###
#################################################
# file to edit: solution.ipynb
import numpy as np
from helper_functions import *
def get_initial_means(array, k):
"""
Picks k random points from the 2D array
(without replacement) to use as initial
cluster means
params:
array = numpy.ndarray[numpy.ndarray[float]] - m x n | datapoints x features
k = int
returns:
initial_means = numpy.ndarray[numpy.ndarray[float]]
"""
# TODO: finish this function
# print(array)
m,n = array.shape
idx = np.random.choice(m,k,replace = False)
return array[idx]
########## DON'T WRITE ANY CODE OUTSIDE THE FUNCTION! ################
##### CODE BELOW IS USED FOR RUNNING LOCAL TEST DON'T MODIFY IT ######
################ END OF LOCAL TEST CODE SECTION ######################
def k_means_step(X, k, means):
"""
A single update/step of the K-means algorithm
Based on a input X and current mean estimate
calculate new means and predict clusters for each of the pixel
params:
X = numpy.ndarray[numpy.ndarray[float]] - m x n | pixels x features (already
flattened)
k = int
means = numpy.ndarray[numpy.ndarray[float]] - k x n
returns:
(new_means, clusters)
new_means = numpy.ndarray[numpy.ndarray[float]] - k x n
clusters = numpy.ndarray[int] - m sized vector
"""
# TODO: finish this function
# 1. calculate the distances between each point and centroid/cluster centre
# 2. assign each point to the closest cluster centre
# 3. calculate the new means
# print(X)
distance_manhattan = X[:,None,:] - means[None,:,:]
# print("man", distance_manhattan)
distance_manhattan_sq = distance_manhattan ** 2
# print("man sq", distance_manhattan_sq)
, distance_sum = np.sum(distance_manhattan_sq, axis=2)
# print("summed", distance_sum)
distance_euc = np.sqrt(distance_sum)
# print("euc", distance_euc)
clusters = np.argmin(distance_euc,axis=1)
# print(clusters)
# new_means = np.array([])
new_means = []
for i in range(k):
# new_means.append(np.mean(X[np.where(clusters==i)],axis=0))
new_means.append(np.mean(X[np.where(clusters==i)],axis=0))
# print("X",X)
# print("k",k)
# print("new_means",new_means)
return (np.array(new_means), clusters)
########## DON'T WRITE ANY CODE OUTSIDE THE FUNCTION! ################
##### CODE BELOW IS USED FOR RUNNING LOCAL TEST DON'T MODIFY IT ######
################ END OF LOCAL TEST CODE SECTION ######################
def k_means_segment(image_values, k=3, initial_means=None):
"""
Separate the provided RGB values into
k separate clusters using the k-means algorithm,
then return an updated version of the image
with the original values replaced with
the corresponding cluster values.
params:
image_values = numpy.ndarray[numpy.ndarray[numpy.ndarray[float]]] - r x c x ch
k = int
initial_means = numpy.ndarray[numpy.ndarray[float]] or None
returns:
updated_image_values = numpy.ndarray[numpy.ndarray[numpy.ndarray[float]]] - r x
c x ch
"""
# TODO: finish this function
# print(image_values.shape)
r,c,ch = image_values.shape
image_values_original = image_values
image_values = image_values_original.reshape(r*c,3)
# print(image_values.shape, image_values_original.shape)
if initial_means is None:
initial_means = get_initial_means(image_values, k)
prev_means = initial_means
# print(image_values, k ,initial_means)
means, prev_clusters = k_means_step(image_values, k, initial_means)
# if means == initial_means:
# return