-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdbscan_algo.py
More file actions
76 lines (64 loc) · 2.46 KB
/
dbscan_algo.py
File metadata and controls
76 lines (64 loc) · 2.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import geopandas as gpd
import pandas as pd
import numpy as np
from sklearn.cluster import DBSCAN
from shapely.geometry import Point
import folium
outbreaks = gpd.read_file("export.geojson")
districts = gpd.read_file("gadm41_IND_2.json")
outbreaks = outbreaks.to_crs(epsg=4326)
outbreak_coords = outbreaks.geometry.apply(lambda p: (p.y, p.x)).tolist()
coords_df = pd.DataFrame(outbreak_coords, columns=["lat", "lon"])
db = DBSCAN(eps=0.002, min_samples=10, metric='haversine').fit(np.radians(coords_df))
outbreaks["cluster"] = db.labels_
core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
core_samples_mask[db.core_sample_indices_] = True
n_noise = (outbreaks["cluster"] == -1).sum()
print(f"Number of noise points: {n_noise}")
m = folium.Map(location=[coords_df["lat"].mean(), coords_df["lon"].mean()], zoom_start=5)
folium.GeoJson(districts, name="Districts").add_to(m)
for idx, (i, row) in enumerate(outbreaks.iterrows()):
cluster_id = row["cluster"]
lat, lon = row.geometry.y, row.geometry.x
if cluster_id == -1:
# Noise: blue marker
folium.Marker(
location=[lat, lon],
icon=folium.Icon(color='blue', icon='exclamation', prefix='fa'),
popup="Noise Point"
).add_to(m)
elif core_samples_mask[idx]:
folium.CircleMarker(
location=[lat, lon],
radius=6,
color='red',
fill=True,
fill_color='red',
fill_opacity=0.9,
popup=f"Core Point (Cluster {cluster_id})"
).add_to(m)
else:
folium.CircleMarker(
location=[lat, lon],
radius=5,
color='yellow',
fill=True,
fill_color='yellow',
fill_opacity=0.7,
popup=f"Border Point (Cluster {cluster_id})"
).add_to(m)
def compute_centroid(group: gpd.GeoDataFrame) -> pd.Series:
return pd.Series({
'lat': group.geometry.y.mean(),
'lon': group.geometry.x.mean()
})
valid_clusters = outbreaks[outbreaks["cluster"] != -1].groupby("cluster")
deployment_locations = valid_clusters.apply(compute_centroid, include_groups=False)
for cluster_id, row in deployment_locations.iterrows():
folium.Marker(
location=[row['lat'], row['lon']],
icon=folium.Icon(color='darkgreen', icon='plus-square', prefix='fa'),
popup=f"Health Unit for Cluster {cluster_id}"
).add_to(m)
m.save("dbscan_clusters_map.html")
print("Map saved as dbscan_clusters_map.html")