226 lines
4.8 KiB
C
226 lines
4.8 KiB
C
//
|
|
// Created by Tiago Batista Cardoso on 2/26/2026.
|
|
//
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include "algorithms.h"
|
|
#include <time.h>
|
|
|
|
// -- helper methods
|
|
|
|
// total number of edges
|
|
static int count_edges(const graph_t *graph)
|
|
{
|
|
int count = 0;
|
|
for (int i = 0; i < graph->n; i++) {
|
|
node_t *n = graph->adj_lists[i];
|
|
while (n) {
|
|
count++;
|
|
n = n->next;
|
|
}
|
|
}
|
|
return count / 2; // undirected
|
|
}
|
|
|
|
// degree of node i
|
|
static int degree(const graph_t *graph, int i)
|
|
{
|
|
int d = 0;
|
|
node_t *n = graph->adj_lists[i];
|
|
while (n) {
|
|
d++;
|
|
n = n->next;
|
|
}
|
|
return d;
|
|
}
|
|
|
|
// sum of weights of edges from node i to community c
|
|
static double k_i_in(const graph_t *graph, int i, int c, int *community)
|
|
{
|
|
double sum = 0.0;
|
|
node_t *n = graph->adj_lists[i];
|
|
while (n) {
|
|
if (community[n->id] == c)
|
|
sum += 1.0;
|
|
n = n->next;
|
|
}
|
|
return sum;
|
|
}
|
|
|
|
// sum of degrees of all nodes in community c
|
|
static double sigma_tot(const graph_t *graph, int c, int *community)
|
|
{
|
|
double sum = 0.0;
|
|
for (int i = 0; i < graph->n; i++)
|
|
if (community[i] == c)
|
|
sum += degree(graph, i);
|
|
return sum;
|
|
}
|
|
|
|
// ΔQ = [ (k_i_in - sigma_tot * k_i / 2m) / m ]
|
|
static double delta_modularity(const graph_t *graph, int i, int c,
|
|
int *community, double m)
|
|
{
|
|
double ki = degree(graph, i);
|
|
double ki_in = k_i_in(graph, i, c, community);
|
|
double sig = sigma_tot(graph, c, community);
|
|
|
|
return (ki_in - (sig * ki) / (2.0 * m)) / m;
|
|
}
|
|
|
|
static int phase1(const graph_t *graph, int *community, double m)
|
|
{
|
|
int n = graph->n;
|
|
int improved = 1;
|
|
int total_moves = 0;
|
|
|
|
while (improved) {
|
|
improved = 0;
|
|
for (int i = 0; i < n; i++) {
|
|
int best_community = community[i];
|
|
double best_gain = 0.0;
|
|
|
|
// Collect neighboring communities
|
|
int *neighbor_communities = calloc(n, sizeof(int));
|
|
int nc_count = 0;
|
|
node_t *nb = graph->adj_lists[i];
|
|
while (nb) {
|
|
int c = community[nb->id];
|
|
// Check if already in list
|
|
int found = 0;
|
|
for (int x = 0; x < nc_count; x++)
|
|
if (neighbor_communities[x] == c) {
|
|
found = 1;
|
|
break;
|
|
}
|
|
if (!found)
|
|
neighbor_communities[nc_count++] = c;
|
|
nb = nb->next;
|
|
}
|
|
|
|
// temporarily remove i from its community
|
|
int old_community = community[i];
|
|
community[i] = -1;
|
|
|
|
// try moving i to each neighboring community
|
|
for (int x = 0; x < nc_count; x++) {
|
|
int c = neighbor_communities[x];
|
|
if (c == old_community)
|
|
continue;
|
|
double gain = delta_modularity(graph, i, c,
|
|
community, m) -
|
|
delta_modularity(graph, i,
|
|
old_community,
|
|
community, m);
|
|
if (gain > best_gain) {
|
|
best_gain = gain;
|
|
best_community = c;
|
|
}
|
|
}
|
|
|
|
community[i] = best_community;
|
|
|
|
if (best_community != old_community) {
|
|
improved = 1;
|
|
total_moves++;
|
|
}
|
|
|
|
free(neighbor_communities);
|
|
}
|
|
}
|
|
|
|
return total_moves;
|
|
}
|
|
|
|
static double compute_modularity(const graph_t *graph, int *community, double m)
|
|
{
|
|
int n = graph->n;
|
|
double Q = 0.0;
|
|
|
|
for (int i = 0; i < n; i++) {
|
|
node_t *nb = graph->adj_lists[i];
|
|
while (nb) {
|
|
int j = nb->id;
|
|
if (community[i] == community[j])
|
|
Q += 1.0 - ((double)degree(graph, i) *
|
|
degree(graph, j)) /
|
|
(2.0 * m);
|
|
nb = nb->next;
|
|
}
|
|
}
|
|
return Q / (2.0 * m);
|
|
}
|
|
|
|
static int compact(int *community, int n)
|
|
{
|
|
int *map = malloc(n * sizeof(int));
|
|
memset(map, -1, n * sizeof(int));
|
|
int next = 0;
|
|
for (int i = 0; i < n; i++) {
|
|
if (community[i] == -1)
|
|
continue;
|
|
if (map[community[i]] == -1)
|
|
map[community[i]] = next++;
|
|
community[i] = map[community[i]];
|
|
}
|
|
free(map);
|
|
return next;
|
|
}
|
|
|
|
louvain_result_t *compute_louvain(const graph_t *graph)
|
|
{
|
|
clock_t start, end;
|
|
double cpu_time_used;
|
|
printf("[compute_louvain()] starting...\n");
|
|
start = clock();
|
|
|
|
int n = graph->n;
|
|
double m = (double)count_edges(graph);
|
|
|
|
if (m == 0) {
|
|
printf("[louvain] no edges found\n");
|
|
louvain_result_t *r = malloc(sizeof(louvain_result_t));
|
|
r->node_community = calloc(n, sizeof(int));
|
|
r->count = n;
|
|
r->modularity = 0.0;
|
|
return r;
|
|
}
|
|
|
|
// each node starts in its own community
|
|
int *community = malloc(n * sizeof(int));
|
|
for (int i = 0; i < n; i++)
|
|
community[i] = i;
|
|
|
|
printf("[louvain] m = %.0f, n = %d\n", m, n);
|
|
|
|
int moves = phase1(graph, community, m);
|
|
printf("[louvain] phase 1 done — %d moves\n", moves);
|
|
|
|
double Q = compute_modularity(graph, community, m);
|
|
printf("[louvain] modularity Q = %.4f\n", Q);
|
|
|
|
int count = compact(community, n);
|
|
printf("[louvain] %d communities detected\n", count);
|
|
|
|
louvain_result_t *result = malloc(sizeof(louvain_result_t));
|
|
result->node_community = community;
|
|
result->count = count;
|
|
result->modularity = Q;
|
|
|
|
end = clock();
|
|
cpu_time_used = ((double)(end - start)) / CLOCKS_PER_SEC;
|
|
printf("[compute_louvain()] done (%f s)\n", cpu_time_used);
|
|
|
|
return result;
|
|
}
|
|
|
|
void free_louvain_result(louvain_result_t *result)
|
|
{
|
|
if (!result)
|
|
return;
|
|
free(result->node_community);
|
|
free(result);
|
|
}
|