// K Means Clustering
/*
In this code, the kMeansClustering function performs the K-means clustering algorithm using the provided points and initial centroids. The assignPointsToClusters function assigns each point to the nearest centroid, and the updateCentroids function updates the centroids based on the assigned points. The main function generates random points and centroids, and then performs the K-means clustering sequentially and in parallel using OpenMP. The running time for both versions is measured using the std::chrono library.
*/


#include <iostream>
#include <vector>
#include <cmath>
#include <chrono>
#include <random>
#include <omp.h>

struct Point {
    double x, y;
};

// Function to calculate the Euclidean distance between two points
double distance(const Point& p1, const Point& p2) {
    double dx = p1.x - p2.x;
    double dy = p1.y - p2.y;
    return std::sqrt(dx * dx + dy * dy);
}

// Function to assign points to clusters
void assignPointsToClusters(const std::vector<Point>& points,
                            const std::vector<Point>& centroids,
                            std::vector<int>& clusters) {
    int numPoints = points.size();
    int numClusters = centroids.size();

    #pragma omp parallel for
    for (int i = 0; i < numPoints; ++i) {
        double minDist = std::numeric_limits<double>::max();
        int clusterIndex = -1;
        for (int j = 0; j < numClusters; ++j) {
            double dist = distance(points[i], centroids[j]);
            if (dist < minDist) {
                minDist = dist;
                clusterIndex = j;
            }
        }
        clusters[i] = clusterIndex;
    }
}

// Function to update centroids based on the assigned points
void updateCentroids(const std::vector<Point>& points,
                     const std::vector<int>& clusters,
                     std::vector<Point>& centroids) {
    int numPoints = points.size();
    int numClusters = centroids.size();

    std::vector<int> clusterCounts(numClusters, 0);
    std::vector<double> clusterSumsX(numClusters, 0.0);
    std::vector<double> clusterSumsY(numClusters, 0.0);

    #pragma omp parallel for reduction(+:clusterCounts[:numClusters], clusterSumsX[:numClusters], clusterSumsY[:numClusters])
    for (int i = 0; i < numPoints; ++i) {
        int clusterIndex = clusters[i];
        clusterCounts[clusterIndex]++;
        clusterSumsX[clusterIndex] += points[i].x;
        clusterSumsY[clusterIndex] += points[i].y;
    }

    for (int j = 0; j < numClusters; ++j) {
        if (clusterCounts[j] > 0) {
            centroids[j].x = clusterSumsX[j] / clusterCounts[j];
            centroids[j].y = clusterSumsY[j] / clusterCounts[j];
        }
    }
}

// Function to perform K-means clustering
void kMeansClustering(std::vector<Point>& points, std::vector<Point>& centroids,
                      std::vector<int>& clusters, int maxIterations) {
    for (int iteration = 0; iteration < maxIterations; ++iteration) {
        assignPointsToClusters(points, centroids, clusters);
        updateCentroids(points, clusters, centroids);
    }
}

// Function to generate random points
std::vector<Point> generateRandomPoints(int numPoints, double minX, double maxX,
                                        double minY, double maxY) {
    std::random_device rd;
    std::mt19937 gen(rd());
    std::uniform_real_distribution<double> distX(minX, maxX);
    std::uniform_real_distribution<double> distY(minY, maxY);

    std::vector<Point> points(numPoints);
    for (int i = 0; i < numPoints; ++i) {
        points[i].x = distX(gen);
        points[i].y = distY(gen);
    }

    return points;
}

// Function to generate random centroids
std::vector<Point> generateRandomCentroids(int numClusters, double minX, double maxX,
                                           double minY, double maxY) {
    std::random_device rd;
    std::mt19937 gen(rd());
    std::uniform_real_distribution<double> distX(minX, maxX);
    std::uniform_real_distribution<double> distY(minY, maxY);

    std::vector<Point> centroids(numClusters);
    for (int i = 0; i < numClusters; ++i) {
        centroids[i].x = distX(gen);
        centroids[i].y = distY(gen);
    }

    return centroids;
}

int main() {
    const int numPoints = 1000000;
    const int numClusters = 5;
    const double minX = 0.0;
    const double maxX = 1000.0;
    const double minY = 0.0;
    const double maxY = 1000.0;
    const int maxIterations = 100;

    // Generate random points and centroids
    std::vector<Point> points = generateRandomPoints(numPoints, minX, maxX, minY, maxY);
    std::vector<Point> centroids = generateRandomCentroids(numClusters, minX, maxX, minY, maxY);

    std::vector<int> clusters(numPoints, 0);

    // Perform sequential K-means clustering and measure time
    auto startSeq = std::chrono::high_resolution_clock::now();
    kMeansClustering(points, centroids, clusters, maxIterations);
    auto endSeq = std::chrono::high_resolution_clock::now();
    std::chrono::duration<double> durationSeq = endSeq - startSeq;
    std::cout << "Sequential Time: " << durationSeq.count() << " seconds" << std::endl;

    // Reset clusters
    std::fill(clusters.begin(), clusters.end(), 0);

    // Perform parallel K-means clustering and measure time
    auto startPar = std::chrono::high_resolution_clock::now();
    #pragma omp parallel
    {
        kMeansClustering(points, centroids, clusters, maxIterations);
    }
    auto endPar = std::chrono::high_resolution_clock::now();
    std::chrono::duration<double> durationPar = endPar - startPar;
    std::cout << "Parallel Time: " << durationPar.count() << " seconds" << std::endl;

    return 0;
}
