HiveBrain v1.2.0
Get Started
← Back to all entries
snippetjavascriptTip

Group data using the K-means clustering algorithm in JavaScript

Submitted by: @import:30-seconds-of-code··
0
Viewed 0 times
clusteringdatajavascriptgroupmeanstheusingalgorithm

Problem

The K-means clustering) algorithm is a popular unsupervised machine learning algorithm used to group a set of data into clusters. It works by iteratively assigning data points to the nearest cluster centroid and then recalculating the centroids based on the new assignments. This process is repeated until the centroids no longer change significantly or a maximum number of iterations is reached.
This implementation of the K-means clustering algorithm groups the given data into k clusters. No maximum number of iterations is set, so the algorithm will run until convergence is reached.
  1. As no initial centroids are provided, start by using the first k data points as the initial centroids, using Array.prototype.slice().
  2. Initialize the distances array to store the distances between each data point and each centroid, as well as the classes array to store the cluster assignments for each data point.
  3. Use a while loop to repeat the assignment and update steps as long as there are changes in the previous iteration, as indicated by the itr variable.

Solution

const kMeans = (data, k = 1) => {
  const centroids = data.slice(0, k);
  const distances = Array.from({ length: data.length }, () =>
    Array.from({ length: k }, () => 0)
  );
  const classes = Array.from({ length: data.length }, () => -1);
  let itr = true;

  while (itr) {
    itr = false;

    for (let d in data) {
      for (let c = 0; c < k; c++) {
        distances[d][c] = Math.hypot(
          ...Object.keys(data[0]).map(key => data[d][key] - centroids[c][key])
        );
      }
      const m = distances[d].indexOf(Math.min(...distances[d]));
      if (classes[d] !== m) itr = true;
      classes[d] = m;
    }

    for (let c = 0; c < k; c++) {
      centroids[c] = Array.from({ length: data[0].length }, () => 0);
      const size = data.reduce((acc, _, d) => {
        if (classes[d] === c) {
          acc++;
          for (let i in data[0]) centroids[c][i] += data[d][i];
        }
        return acc;
      }, 0);
      for (let i in data[0]) {
        centroids[c][i] = Number.parseFloat(
          Number(centroids[c][i] / size).toFixed(2)
        );
      }
    }
  }

  return classes;
};

kMeans([[0, 0], [0, 1], [1, 3], [2, 0]], 2); // [0, 1, 1, 0]


  1. As no initial centroids are provided, start by using the first k data points as the initial centroids, using Array.prototype.slice().
  2. Initialize the distances array to store the distances between each data point and each centroid, as well as the classes array to store the cluster assignments for each data point.
  3. Use a while loop to repeat the assignment and update steps as long as there are changes in the previous iteration, as indicated by the itr variable.
  4. Calculate the Euclidean distance between each data point and centroid using Math.hypot(), Object.keys(), and Array.prototype.map().
  5. Use Array.prototype.indexOf() and Math.min() to find the closest centroid for each data point.
  6. Update the cluster assignments in the classes array and check if any changes were made.

Code Snippets

const kMeans = (data, k = 1) => {
  const centroids = data.slice(0, k);
  const distances = Array.from({ length: data.length }, () =>
    Array.from({ length: k }, () => 0)
  );
  const classes = Array.from({ length: data.length }, () => -1);
  let itr = true;

  while (itr) {
    itr = false;

    for (let d in data) {
      for (let c = 0; c < k; c++) {
        distances[d][c] = Math.hypot(
          ...Object.keys(data[0]).map(key => data[d][key] - centroids[c][key])
        );
      }
      const m = distances[d].indexOf(Math.min(...distances[d]));
      if (classes[d] !== m) itr = true;
      classes[d] = m;
    }

    for (let c = 0; c < k; c++) {
      centroids[c] = Array.from({ length: data[0].length }, () => 0);
      const size = data.reduce((acc, _, d) => {
        if (classes[d] === c) {
          acc++;
          for (let i in data[0]) centroids[c][i] += data[d][i];
        }
        return acc;
      }, 0);
      for (let i in data[0]) {
        centroids[c][i] = Number.parseFloat(
          Number(centroids[c][i] / size).toFixed(2)
        );
      }
    }
  }

  return classes;
};

kMeans([[0, 0], [0, 1], [1, 3], [2, 0]], 2); // [0, 1, 1, 0]

Context

From 30-seconds-of-code: k-means

Revisions (0)

No revisions yet.