-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutils.py
More file actions
1426 lines (1145 loc) · 53.9 KB
/
utils.py
File metadata and controls
1426 lines (1145 loc) · 53.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Complete Enhanced utility functions for processing and visualizing EM-DAT disaster data for East Africa.
This module contains functions with enhanced direct EM-DAT processing methodology for complete 1990-2025 coverage.
FIXED ISSUES:
1. Flood calendar NaN conversion error - proper fillna() handling
2. Missing combined grid visualizations - fixed image creation functions
3. Column mismatch in frequency processing - proper aggregation methods
4. Complete 1990-2025 range support - fixed year ranges throughout
5. All functionality from 5 original scripts - comprehensive implementation
EXPECTED OUTPUTS:
- process_flood_data_for_calendar_enhanced() -> flood_calendar_plot_enhanced.png
- create_combined_image_enhanced() -> drought_extent_enhanced.png, flood_extent_enhanced.png
- All frequency, calendar, and combined visualization functions working
"""
import os
import sys
import re
import glob
import ntpath
import pandas as pd
import numpy as np
from datetime import datetime
import calendar
import six
from ast import literal_eval
import matplotlib
matplotlib.use("Agg") # Use non-interactive backend
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib import colors
from matplotlib.colors import LinearSegmentedColormap
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from cartopy.io.shapereader import Reader
import geopandas as gp
import PIL
from PIL import Image
import warnings
# Try to import optional dependencies
try:
import geoplot as gplt
import geoplot.crs as gcrs
HAS_GEOPLOT = True
except ImportError:
HAS_GEOPLOT = False
warnings.warn("geoplot not available; using matplotlib/geopandas for plotting instead")
try:
import mapclassify as mc
except ImportError:
warnings.warn("mapclassify not available; some classification features may not work")
mc = None
# Path Configuration (Enhanced)
BASE_DIR = ''
DATA_DIR = os.path.join(BASE_DIR, 'Impact Data')
SHAPEFILE_DIR = os.path.join(BASE_DIR, 'Impactshp')
OUTPUT_DIR = 'Output'
# Create output directory
os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(os.path.join(OUTPUT_DIR, 'dr'), exist_ok=True)
os.makedirs(os.path.join(OUTPUT_DIR, 'fl'), exist_ok=True)
# Data file paths (Enhanced) - Updated to use new EMDAT file and proper geocoded mappings
EMDAT_DATA = 'public_emdat_custom_request_2026-01-21.xlsx' # New EMDAT custom request file
DROUGHT_DATA = 'em_dat_ea_drought_adm2_v4.xlsx' # Text-matched geocoding from 01-common_adm2_code_drought.py
FLOOD_DATA = 'em_dat_ea_flood_adm2_v2.xlsx' # Text-matched geocoding from 02-common_adm2_code_flood.py
FLOOD_DATES_V0 = os.path.join(DATA_DIR, 'em_dat_flood_dates_v0.csv')
FLOOD_DATES_V1 = os.path.join(DATA_DIR, 'em_dat_flood_dates_v1.csv')
# Shapefile paths - Using available boundary files
ADM1_GEOJSON = 'icpac_adm1v3.geojson' # Available admin1 geojson
ADM2_SHP = 'ea_adm2_geoboundaries.shp' # GEOBoundaries ADM2 shapefile (NOW AVAILABLE)
EA_BACKGROUND_SHP = os.path.join(SHAPEFILE_DIR, 'ea_global_background.shp')
WATER_BODIES_SHP = os.path.join(SHAPEFILE_DIR, 'Africa_Water_Bodies.shp')
# Constants (Enhanced with additional countries - updated for new EMDAT format)
EA_COUNTRIES = [
'Djibouti', 'Eritrea', 'Ethiopia', 'Kenya', 'Rwanda',
'Somalia', 'South Sudan', 'Sudan (the)', 'Sudan', # Both Sudan variants
'Tanzania, United Republic of', 'United Republic of Tanzania',
'Uganda', 'Burundi' # Added Burundi
]
COUNTRY_CODES = {
'Djibouti': 'DJI',
'Eritrea': 'ERI',
'Ethiopia': 'ETH',
'Kenya': 'KEN',
'Rwanda': 'RWA',
'Somalia': 'SOM',
'South Sudan': 'SSD',
'Sudan (the)': 'SDN',
'Sudan': 'SDN', # Added for new EMDAT format
'Tanzania, United Republic of': 'TZA',
'United Republic of Tanzania': 'TZA',
'Uganda': 'UGA',
'Burundi': 'BDI' # Added Burundi
}
###############################
# Helper Functions (Enhanced)
###############################
def extract_regions_from_location_enhanced(location_text):
"""Enhanced region extraction from location text with better parsing."""
if not location_text or not isinstance(location_text, str) or location_text.lower() == 'nan':
return []
# Remove text in parentheses for initial splitting
parentheses_text = []
pattern = r'\(([^)]*)\)'
for match in re.finditer(pattern, location_text):
parentheses_text.append(match.group(1))
# Remove parentheses for splitting
clean_text = re.sub(pattern, '', location_text)
# Split by common separators
regions = []
for separator in [',', ';', ' and ', '&']:
if separator in clean_text:
parts = [p.strip() for p in clean_text.split(separator) if p.strip()]
regions.extend(parts)
break
# If no split occurred, use the whole text
if not regions:
regions = [clean_text.strip()]
# Clean up region names
cleaned_regions = []
for region in regions:
# Remove common qualifiers
qualifiers = [
'district', 'districts', 'region', 'regions', 'county', 'counties',
'province', 'provinces', 'zone', 'zones', 'area', 'areas'
]
for qualifier in qualifiers:
region = re.sub(r'\b' + qualifier + r'\b', '', region, flags=re.IGNORECASE)
region = re.sub(r'\s+', ' ', region).strip()
if region:
cleaned_regions.append(region)
# Add regions from parentheses text
for text in parentheses_text:
if any(word in text.lower() for word in ['district', 'region', 'county', 'province', 'zone']):
sub_regions = extract_regions_from_location_enhanced(text)
cleaned_regions.extend(sub_regions)
return cleaned_regions
def safe_convert_to_int_enhanced(value):
"""Enhanced safe conversion to integer handling NaN values."""
if pd.isna(value):
return None
try:
return int(float(value))
except (ValueError, TypeError):
return None
def safe_date_format_enhanced(value, default='01'):
"""Enhanced safe date formatting with NaN handling."""
if pd.isna(value):
return default
try:
return str(int(float(value))).zfill(2)
except (ValueError, TypeError):
return default
def flatten(l):
"""Flatten a list of lists into a single list."""
return [item for sublist in l for item in sublist]
def return_colormap(classif):
"""Create colormap of matplotlib based on number of class and given colorcode."""
c = matplotlib.colors.ColorConverter().to_rgb
colorlist = [c("#ffffcc"), c("#ffeda0"), c("#fed976"), c("#ffb24b"), c("#fe8d3b"), c("#fd4e2a"), c("#e3181a")]
color_code = colorlist
c_cmap = LinearSegmentedColormap.from_list("my_colormap", color_code, N=len(classif), gamma=1.0)
return c_cmap
def safe_read_geometries_enhanced(shapefile_path):
"""Enhanced safely read geometries from a shapefile, filtering out None values."""
try:
reader = Reader(shapefile_path)
geometries = []
for geom in reader.geometries():
if geom is not None:
geometries.append(geom)
return geometries
except Exception as e:
print(f"Error reading shapefile {shapefile_path}: {e}")
return []
def create_daterange_list_enhanced(row):
"""FIXED: Enhanced date range creation with proper NaN handling."""
try:
# Safe conversion with NaN handling
start_year = str(row['Start Year']) if pd.notna(row['Start Year']) else '1990'
start_month = safe_date_format_enhanced(row['Start Month'], '01')
start_day = safe_date_format_enhanced(row['Start Day'], '01')
end_year = str(row['End Year']) if pd.notna(row['End Year']) else start_year
end_month = safe_date_format_enhanced(row['End Month'], '12')
end_day = safe_date_format_enhanced(row['End Day'], '31')
start_date_str = f"{start_year}-{start_month}-{start_day}"
end_date_str = f"{end_year}-{end_month}-{end_day}"
# Create date range
date_str_list = pd.date_range(start_date_str, end_date_str, freq='D').strftime("%Y-%m-%d").tolist()
return date_str_list
except Exception as e:
print(f"Error in create_daterange_list_enhanced: {e}")
print(f"Problematic row: {row}")
return []
def last_day_month_enhanced(row):
"""Enhanced last day of month calculation with NaN handling."""
try:
end_year = int(row['End Year']) if pd.notna(row['End Year']) else 1990
end_month = int(row['End Month']) if pd.notna(row['End Month']) else 12
firstday_month = datetime(end_year, end_month, 1)
lastday_month = firstday_month + pd.DateOffset(months=1) - pd.DateOffset(days=1)
lastday_month_str = lastday_month.strftime("%d")
return lastday_month_str
except Exception as e:
print(f"Error in last_day_month_enhanced: {e}")
return "31"
def order_file_enhanced(file_list):
"""Enhanced file ordering by year for creating combined visualizations."""
if not file_list:
return []
filepath = os.path.dirname(file_list[0])
file_list1 = [ntpath.basename(item).split('.')[0] for item in file_list]
file_list1.sort()
sorted_file_list = [f'{filepath}/{item}.png' for item in file_list1]
return sorted_file_list
###############################
# Data Loading and Processing Functions (Enhanced)
###############################
def load_emdat_data_complete(disaster_type=None):
"""Enhanced EM-DAT data loading with complete 1990-2025 coverage."""
try:
print(f"Reading enhanced EM-DAT data from {EMDAT_DATA}")
# New EMDAT file doesn't need skiprows and has different column names
dfb = pd.read_excel(EMDAT_DATA, engine='openpyxl')
# Rename columns to match expected format (DisNo. -> Dis No)
if 'DisNo.' in dfb.columns:
dfb = dfb.rename(columns={'DisNo.': 'Dis No'})
# Filter for specified disaster type(s)
if disaster_type:
if isinstance(disaster_type, list):
dfb = dfb[dfb['Disaster Type'].isin(disaster_type)]
else:
dfb = dfb[dfb['Disaster Type'] == disaster_type]
# Filter for East Africa countries (enhanced list)
dfb_ea = dfb[dfb['Country'].isin(EA_COUNTRIES)]
print(f"Loaded {len(dfb_ea)} {disaster_type if disaster_type else 'total'} events for EA (1990-2025)")
return dfb_ea
except Exception as e:
print(f"Error loading EM-DAT data: {e}")
import traceback
traceback.print_exc()
return pd.DataFrame()
###############################
# Enhanced Data Conversion Functions
###############################
def convert_drought_data_enhanced():
"""Enhanced drought data conversion - uses pre-geocoded file if available."""
print("Checking for drought geocoding data...")
# Check if properly geocoded file exists (from 01-common_adm2_code_drought.py)
if os.path.exists(DROUGHT_DATA):
print(f"Using existing text-matched geocoding: {DROUGHT_DATA}")
try:
df = pd.read_excel(DROUGHT_DATA, engine='openpyxl')
print(f"Loaded {len(df)} drought events with proper geocoding")
return df
except Exception as e:
print(f"Error loading existing file: {e}")
# Fallback: Run the geocoding script or use placeholder
print("WARNING: No proper geocoded file found. Running placeholder approach.")
print("For better results, run: python 01-common_adm2_code_drought.py")
# Load EM-DAT data
dfb_dr_ea = load_emdat_data_complete(disaster_type='Drought')
if dfb_dr_ea.empty:
print("No drought data found")
return pd.DataFrame()
try:
# Try to load admin boundaries
adm = gp.read_file(ADM2_SHP)
shape_col = 'shapeID'
group_col = 'shapeGroup'
print(f"Loaded {len(adm)} admin regions from {ADM2_SHP}")
# Create mapping based on country (placeholder if text matching unavailable)
drought_mappings = {}
for idx, row in dfb_dr_ea.iterrows():
dis_no = row['Dis No']
country = row['Country']
country_code = COUNTRY_CODES.get(country)
if country_code:
country_shapes = adm[adm[group_col] == country_code]
region_ids = country_shapes[shape_col].tolist()
if region_ids:
affected_count = min(3, len(region_ids))
drought_mappings[dis_no] = region_ids[:affected_count]
# Create output DataFrame
drought_data = []
for dis_no, region_codes in drought_mappings.items():
drought_data.append({
'Dis No': dis_no,
'geob_adm2_list': region_codes
})
df = pd.DataFrame(drought_data)
return df
except Exception as e:
print(f"Error in drought conversion: {e}")
import traceback
traceback.print_exc()
return pd.DataFrame()
def convert_flood_data_enhanced():
"""Enhanced flood data conversion - uses pre-geocoded file if available."""
print("Checking for flood geocoding data...")
# Check if properly geocoded file exists (from 02-common_adm2_code_flood.py)
if os.path.exists(FLOOD_DATA):
print(f"Using existing text-matched geocoding: {FLOOD_DATA}")
try:
df = pd.read_excel(FLOOD_DATA, engine='openpyxl')
print(f"Loaded {len(df)} flood events with proper geocoding")
return df
except Exception as e:
print(f"Error loading existing file: {e}")
# Fallback: Run the geocoding script or use placeholder
print("WARNING: No proper geocoded file found. Running placeholder approach.")
print("For better results, run: python 02-common_adm2_code_flood.py")
# Load EM-DAT data
dfb_fl_ea = load_emdat_data_complete(disaster_type='Flood')
if dfb_fl_ea.empty:
print("No flood data found")
return pd.DataFrame()
try:
# Try to load admin boundaries
adm = gp.read_file(ADM2_SHP)
shape_col = 'shapeID'
group_col = 'shapeGroup'
print(f"Loaded {len(adm)} admin regions from {ADM2_SHP}")
# Create mapping based on country (placeholder if text matching unavailable)
flood_mappings = {}
for idx, row in dfb_fl_ea.iterrows():
dis_no = row['Dis No']
country = row['Country']
country_code = COUNTRY_CODES.get(country)
if country_code:
country_shapes = adm[adm[group_col] == country_code]
region_ids = country_shapes[shape_col].tolist()
if region_ids:
affected_count = min(2, len(region_ids))
flood_mappings[dis_no] = region_ids[:affected_count]
# Create output DataFrame
flood_data = []
for dis_no, region_codes in flood_mappings.items():
flood_data.append({
'Dis No': dis_no,
'geob_adm2_list': region_codes
})
df = pd.DataFrame(flood_data)
return df
except Exception as e:
print(f"Error in flood conversion: {e}")
import traceback
traceback.print_exc()
return pd.DataFrame()
###############################
# Enhanced Visualization Functions
###############################
def create_map_enhanced(adm2, year, region_codes, disaster_type, shape_col='shapeID'):
"""Enhanced map creation for a specific year and disaster type."""
try:
# Create figure
plt.figure(figsize=(8.0, 9.5))
ax = plt.axes(projection=ccrs.PlateCarree())
# Set map extent for East Africa
x_min = 21.838949
x_max = 51.415695
y_min = -11.745695
y_max = 23.145147
ax.set_extent([x_min, x_max, y_min, y_max])
# Add background features
ax.add_feature(cfeature.OCEAN, facecolor=(0.5, 0.5, 0.5))
ax.add_feature(cfeature.LAND, facecolor='lightgray')
ax.add_feature(cfeature.BORDERS, linestyle='-', alpha=0.5)
ax.add_feature(cfeature.COASTLINE, linewidth=0.5)
# Add background shapes if available
try:
background_geoms = safe_read_geometries_enhanced(EA_BACKGROUND_SHP)
if background_geoms:
ax.add_geometries(background_geoms, ccrs.PlateCarree(), facecolor='None')
water_geoms = safe_read_geometries_enhanced(WATER_BODIES_SHP)
if water_geoms:
ax.add_geometries(water_geoms, ccrs.PlateCarree(), facecolor='grey')
except Exception as e:
pass # Use cartopy features instead
# Determine shape column based on what's in the GeoDataFrame
if shape_col not in adm2.columns:
shape_col = 'GID_1' if 'GID_1' in adm2.columns else adm2.columns[0]
# Add disaster regions if available
if region_codes:
regions = adm2[adm2[shape_col].isin(region_codes)]
if not regions.empty:
ax.add_geometries(regions['geometry'], ccrs.PlateCarree(),
edgecolor='red', facecolor='red', alpha=1)
print(f"Added {len(regions)} regions to the {disaster_type} map for {year}")
# Add year label
ax.text(0.72, 0.03, str(year), fontsize=50, fontweight='bold',
ha='left', va='center', color='black', transform=ax.transAxes)
# Save the map
event_dir = 'dr' if disaster_type == 'Drought' else 'fl'
event_label = 'dr' if disaster_type == 'Drought' else 'fl'
output_file = os.path.join(OUTPUT_DIR, event_dir, f"{str(year)}_{event_label}.png")
plt.savefig(output_file, transparent=False, bbox_inches='tight', dpi=150)
print(f"Enhanced {disaster_type} map saved for {year}: {output_file}")
plt.close()
except Exception as e:
print(f"Error creating enhanced map for {disaster_type} in {year}: {e}")
def create_year_maps_enhanced(disaster_type='both'):
"""FIXED: Enhanced yearly maps creation for 1990-2025 complete coverage."""
print(f"Creating enhanced yearly maps for disaster type: {disaster_type}")
# Load admin boundaries - try shapefile first, then geojson
try:
adm2 = gp.read_file(ADM2_SHP)
shape_col = 'shapeID'
print(f"Loaded {len(adm2)} admin2 regions from shapefile")
except Exception as e:
print(f"ADM2 shapefile not found ({e}), using ADM1 geojson instead")
try:
adm2 = gp.read_file(ADM1_GEOJSON)
shape_col = 'GID_1'
print(f"Loaded {len(adm2)} admin1 regions from geojson")
except Exception as e2:
print(f"Error loading admin boundaries: {e2}")
return
# FIXED: Complete year range 1990-2025 (36 years)
year_list = list(range(1990, 2026)) # FIXED: Now includes 2025
print(f"Processing {len(year_list)} years: {year_list[0]}-{year_list[-1]}")
# Load mapped data for drought
if disaster_type in ['drought', 'both']:
try:
drought_file = DROUGHT_DATA
if not os.path.exists(drought_file):
print("Converting drought data first...")
convert_drought_data_enhanced()
drought_df = pd.read_excel(drought_file)
drought_df['geob_adm2_list'] = drought_df['geob_adm2_list'].apply(
lambda x: eval(str(x)) if isinstance(x, str) else x)
# Group by year
drought_by_year = {}
for idx, row in drought_df.iterrows():
try:
dis_no = row['Dis No']
year = int(dis_no.split('-')[0])
if year not in drought_by_year:
drought_by_year[year] = []
region_codes = row['geob_adm2_list']
if isinstance(region_codes, list):
drought_by_year[year].extend(region_codes)
except Exception as e:
print(f"Error processing drought entry {idx}: {e}")
# Create drought maps for each year
for year in year_list:
region_codes = drought_by_year.get(year, [])
create_map_enhanced(adm2, year, region_codes, 'Drought', shape_col)
except Exception as e:
print(f"Error processing enhanced drought maps: {e}")
# Load mapped data for flood
if disaster_type in ['flood', 'both']:
try:
flood_file = FLOOD_DATA
if not os.path.exists(flood_file):
print("Converting flood data first...")
convert_flood_data_enhanced()
flood_df = pd.read_excel(flood_file)
flood_df['geob_adm2_list'] = flood_df['geob_adm2_list'].apply(
lambda x: eval(str(x)) if isinstance(x, str) else x)
# Group by year
flood_by_year = {}
for idx, row in flood_df.iterrows():
try:
dis_no = row['Dis No']
year = int(dis_no.split('-')[0])
if year not in flood_by_year:
flood_by_year[year] = []
region_codes = row['geob_adm2_list']
if isinstance(region_codes, list):
flood_by_year[year].extend(region_codes)
except Exception as e:
print(f"Error processing flood entry {idx}: {e}")
# Create flood maps for each year
for year in year_list:
region_codes = flood_by_year.get(year, [])
create_map_enhanced(adm2, year, region_codes, 'Flood', shape_col)
except Exception as e:
print(f"Error processing enhanced flood maps: {e}")
print("Completed enhanced yearly maps creation")
###############################
# FIXED: Enhanced Calendar Visualization Functions
###############################
def process_drought_data_for_calendar_enhanced():
"""Enhanced drought data processing for calendar visualization (1990-2025)."""
print("Processing enhanced drought data for calendar...")
# Read directly from EM-DAT data
dfb_dr_ea = load_emdat_data_complete(disaster_type='Drought')
# Extract time data
dfb_dr_ea_time = dfb_dr_ea[['Dis No', 'Start Year', 'Start Month', 'Start Day',
'End Year', 'End Month', 'End Day']].copy()
# FIXED: Fill missing values BEFORE any conversions
dfb_dr_ea_time['Start Month'] = dfb_dr_ea_time['Start Month'].fillna(1.0)
dfb_dr_ea_time['Start Day'] = dfb_dr_ea_time['Start Day'].fillna(1.0)
dfb_dr_ea_time['End Month'] = dfb_dr_ea_time['End Month'].fillna(12.0)
# Apply function to get end days
for idx, row in dfb_dr_ea_time.iterrows():
dfb_dr_ea_time.loc[idx, 'End Day'] = last_day_month_enhanced(row)
# FIXED: Safe format date fields with NaN handling
dfb_dr_ea_time['Start Year'] = dfb_dr_ea_time["Start Year"].astype(str)
dfb_dr_ea_time['Start Month'] = dfb_dr_ea_time["Start Month"].apply(
lambda x: safe_date_format_enhanced(x, '01'))
dfb_dr_ea_time['End Year'] = dfb_dr_ea_time["End Year"].astype(str)
dfb_dr_ea_time['End Month'] = dfb_dr_ea_time["End Month"].apply(
lambda x: safe_date_format_enhanced(x, '12'))
# Create date range lists
date_str_list_cont = []
for idx, row in dfb_dr_ea_time.iterrows():
date_str_list = create_daterange_list_enhanced(row)
date_str_list_cont.append(date_str_list)
dfb_dr_ea_time['date_str_list'] = date_str_list_cont
df_dr = dfb_dr_ea_time.explode('date_str_list')
# Group by date
df_t = df_dr.groupby('date_str_list').agg({'Dis No': lambda x: list(x)})
# Save intermediate result
df_t.to_csv(os.path.join(OUTPUT_DIR, 'drought_intermediate_enhanced.csv'))
dft1 = df_t.reset_index()
dft1['date'] = dft1['date_str_list'].apply(pd.to_datetime, errors='coerce')
dft1 = dft1.dropna(subset=['date']) # Remove invalid dates
dft1['year'] = dft1['date'].dt.year
dft1['days'] = dft1['date'].dt.dayofyear
# Generate day-by-day event data by year
def gen_list_days_enhanced(im_df, year):
n_days = 366 if calendar.isleap(year) else 365
df = pd.DataFrame({"year": year, "days": range(1, n_days + 1)})
dfta = dft1[dft1['year'] == year]
dfta['event'] = 1
dftb = dfta[['Dis No', 'date', 'days', 'event']]
dftc = pd.merge(df, dftb, on='days', how='left')
dftd = dftc.fillna(0)
event_list = dftd['event'].tolist()
event_list.insert(0, str(year))
return event_list
# FIXED: Create event lists for each year (1990-2025)
year_list = np.arange(1990, 2026, 1) # FIXED: Now includes 2025
el_cont = []
for yearl in year_list:
el = gen_list_days_enhanced(dft1, yearl)
el_cont.append(el)
# Create the output dataframe
db = pd.DataFrame(el_cont)
db1 = db.set_index(0)
db2 = db1.fillna(0)
cols = db2.columns
db2[cols] = db2[cols].apply(lambda x: x.astype(int))
# Save to CSV
output_csv = os.path.join(OUTPUT_DIR, 'cal_data_drought_enhanced.csv')
db2.to_csv(output_csv)
print(f"Enhanced drought calendar data saved to {output_csv}")
return db2
def process_flood_data_for_calendar_enhanced():
"""FIXED: Enhanced flood data processing for calendar visualization with proper NaN handling."""
print("Processing enhanced flood data for calendar...")
# FIXED: Read directly from EM-DAT data like drought processing
dfb_fl_ea = load_emdat_data_complete(disaster_type='Flood')
# Extract time information - using .copy() to avoid SettingWithCopyWarning
dfb_fl_ea_time = dfb_fl_ea[['Dis No', 'Start Year', 'Start Month', 'Start Day',
'End Year', 'End Month', 'End Day']].copy()
# FIXED: Fill NaN values BEFORE any conversions to avoid "cannot convert float NaN to integer"
dfb_fl_ea_time['Start Month'] = dfb_fl_ea_time['Start Month'].fillna(1.0)
dfb_fl_ea_time['Start Day'] = dfb_fl_ea_time['Start Day'].fillna(1.0)
dfb_fl_ea_time['End Month'] = dfb_fl_ea_time['End Month'].fillna(12.0)
# Apply function to get end days with enhanced error handling
for idx, row in dfb_fl_ea_time.iterrows():
dfb_fl_ea_time.loc[idx, 'End Day'] = last_day_month_enhanced(row)
# FIXED: Safe format date fields with proper NaN handling
dfb_fl_ea_time['Start Year'] = dfb_fl_ea_time["Start Year"].astype(str)
dfb_fl_ea_time['Start Month'] = dfb_fl_ea_time["Start Month"].apply(
lambda x: safe_date_format_enhanced(x, '01'))
dfb_fl_ea_time['End Year'] = dfb_fl_ea_time["End Year"].astype(str)
dfb_fl_ea_time['End Month'] = dfb_fl_ea_time["End Month"].apply(
lambda x: safe_date_format_enhanced(x, '12'))
# Create date range lists with enhanced error handling
date_str_list_cont = []
for idx, row in dfb_fl_ea_time.iterrows():
try:
date_str_list = create_daterange_list_enhanced(row)
date_str_list_cont.append(date_str_list)
except Exception as e:
print(f"Error processing row {idx}: {e}")
date_str_list_cont.append([])
dfb_fl_ea_time['date_str_list'] = date_str_list_cont
db_time1 = dfb_fl_ea_time[['Dis No', 'date_str_list']]
# Explode the date lists
df_fl = db_time1.explode('date_str_list')
# Group by date
df_t = df_fl.groupby('date_str_list').agg({'Dis No': lambda x: list(x)})
# Create dataframe with date info
dft1 = df_t.reset_index()
dft1['date'] = dft1['date_str_list'].apply(pd.to_datetime, errors='coerce')
dft1 = dft1.dropna(subset=['date']) # Remove invalid dates
dft1['year'] = dft1['date'].dt.year
dft1['days'] = dft1['date'].dt.dayofyear
# Generate day-by-day event data by year
def gen_list_days_enhanced(im_df, year):
n_days = 366 if calendar.isleap(year) else 365
df = pd.DataFrame({"year": year, "days": range(1, n_days + 1)})
dfta = dft1[dft1['year'] == year]
dfta['event'] = 1
dftb = dfta[['Dis No', 'date', 'days', 'event']]
dftc = pd.merge(df, dftb, on='days', how='left')
dftd = dftc.fillna(0)
event_list = dftd['event'].tolist()
event_list.insert(0, str(year))
return event_list
# FIXED: Create event lists for each year (1990-2025)
year_list = np.arange(1990, 2026, 1) # FIXED: Now includes 2025
el_cont = []
for yearl in year_list:
el = gen_list_days_enhanced(dft1, yearl)
el_cont.append(el)
# Create the output dataframe
db = pd.DataFrame(el_cont)
db1 = db.set_index(0)
db2 = db1.fillna(0)
cols = db2.columns
db2[cols] = db2[cols].apply(lambda x: x.astype(int))
# Save to CSV
output_csv = os.path.join(OUTPUT_DIR, 'cal_data_flood_enhanced.csv')
db2.to_csv(output_csv)
print(f"FIXED: Enhanced flood calendar data saved to {output_csv}")
return db2
def create_calendar_visualization_enhanced(data_frame, output_file, event_type="Events"):
"""Enhanced calendar visualization for disaster events."""
print(f"Creating enhanced calendar visualization for {event_type}...")
# Table styling helper functions
def set_align_for_column(table, col, align="left"):
cells = [key for key in table._cells if key[1] == col]
for cell in cells:
table._cells[cell]._loc = align
def set_width_for_column(table, col, width):
cells = [key for key in table._cells if key[1] == col]
for cell in cells:
table._cells[cell]._width = width
def set_height_for_row(table, row, height):
cells = [key for key in table._cells if key[0] == row]
for cell in cells:
table._cells[cell]._height = height
def colorcell_enhanced(tablerows, tablecols, cellDict):
allcells = [(x, y) for x in tablerows[1:] for y in tablecols[1:]]
for alcls in allcells:
try:
cell_value = int(cellDict[alcls]._text.get_text())
if cell_value == 0:
cellDict[alcls].set_facecolor('#FFFFFF')
elif cell_value == 1:
cellDict[alcls].set_facecolor('#FF0000')
else:
cellDict[alcls].set_facecolor('#961414')
except (ValueError, KeyError):
cellDict[alcls].set_facecolor('#FFFFFF')
def removeaqivalue_enhanced(tablerows, tablecols, mpl_table):
allcells = [(x, y) for x in tablerows[1:] for y in tablecols[1:]]
for alcls in allcells:
mpl_table._cells[alcls]._text.set_text('')
def set_height_for_row_except_head(table, rowlist, height):
cells_list = []
for row in rowlist:
cells = [key for key in table._cells if key[0] == row]
cells_list.append(cells)
for cells in cells_list:
for cell in cells:
table._cells[cell]._height = height
# Function for table creation
def render_mpl_table_enhanced(data, col_width=1.0, row_height=1.625, font_size=12,
header_color='#40466e', row_colors=['#f1f1f2', 'w'], edge_color='w',
bbox=[0, 0, 1, 1], header_columns=0, ax=None, **kwargs):
max_cols = min(366, len(data.columns)) # Ensure we don't exceed available columns
mpl_table = ax.table(cellText=data.values, bbox=bbox,
colLabels=['' for _ in range(max_cols)],
cellLoc='center', **kwargs)
set_align_for_column(mpl_table, col=0, align="left")
set_width_for_column(mpl_table, 0, 0.1)
set_height_for_row(mpl_table, 0, 0.01)
set_height_for_row_except_head(mpl_table, np.arange(1, len(data.index)), 0.012)
mpl_table.auto_set_font_size(False)
mpl_table.set_fontsize(font_size)
cellDict = mpl_table.get_celld()
tablerows = np.arange(0, len(data.index) + 1)
tablecols = np.arange(0, max_cols)
for k, cell in six.iteritems(mpl_table._cells):
cell.set_linewidth(0)
if k[0] == 0 or k[1] < header_columns:
cell.set_text_props(weight='bold', color='w')
cell.set_facecolor(header_color)
colorcell_enhanced(tablerows, tablecols, cellDict)
else:
cell.set_facecolor(row_colors[k[0] % len(row_colors)])
removeaqivalue_enhanced(tablerows, tablecols, mpl_table)
return ax
try:
# Create the plot
fig = plt.figure()
fig.set_size_inches(28, 20) # Enhanced size for better visibility
table = fig.add_axes([0.08, 0.02, 0.55, 0.9], frame_on=False)
table.xaxis.set_ticks_position('none')
table.yaxis.set_ticks_position('none')
table.set_xticklabels('')
table.set_yticklabels('')
render_mpl_table_enhanced(data_frame, header_columns=1, col_width=0.002, ax=table)
# Add title
fig.suptitle(f'Enhanced {event_type} Calendar (1990-2025)',
fontsize=24, fontweight='bold', y=0.95)
# Save the figure
plt.savefig(output_file, dpi=300, bbox_inches='tight')
plt.close()
print(f"Enhanced calendar visualization saved to {output_file}")
except Exception as e:
print(f"Error creating enhanced calendar visualization: {e}")
def create_alternative_drought_analysis_enhanced():
"""Enhanced alternative drought analysis using pivot approach."""
print("Creating enhanced alternative drought analysis...")
try:
# Read drought data
dfb_dr_ea = load_emdat_data_complete(disaster_type='Drought')
# Process dates similar to calendar processing
dfb_dr_ea_time = dfb_dr_ea[['Dis No', 'Start Year', 'Start Month', 'Start Day',
'End Year', 'End Month', 'End Day']].copy()
# Fill missing values
dfb_dr_ea_time['Start Month'] = dfb_dr_ea_time['Start Month'].fillna(1.0)
dfb_dr_ea_time['Start Day'] = dfb_dr_ea_time['Start Day'].fillna(1.0)
dfb_dr_ea_time['End Month'] = dfb_dr_ea_time['End Month'].fillna(12.0)
# Get end days
for idx, row in dfb_dr_ea_time.iterrows():
dfb_dr_ea_time.loc[idx, 'End Day'] = last_day_month_enhanced(row)
# Format dates safely
dfb_dr_ea_time['Start Year'] = dfb_dr_ea_time["Start Year"].astype(str)
dfb_dr_ea_time['Start Month'] = dfb_dr_ea_time["Start Month"].apply(
lambda x: safe_date_format_enhanced(x, '01'))
dfb_dr_ea_time['End Year'] = dfb_dr_ea_time["End Year"].astype(str)
dfb_dr_ea_time['End Month'] = dfb_dr_ea_time["End Month"].apply(
lambda x: safe_date_format_enhanced(x, '12'))
# Create date ranges
date_str_list_cont = []
for idx, row in dfb_dr_ea_time.iterrows():
date_str_list = create_daterange_list_enhanced(row)
date_str_list_cont.append(date_str_list)
dfb_dr_ea_time['date_str_list'] = date_str_list_cont
df_dr = dfb_dr_ea_time.explode('date_str_list')
# Alternative approach: Create a pivoted version
df_dr1 = df_dr.copy()
df_dr1['dr'] = 1
df_dr1['date'] = df_dr1['date_str_list'].apply(pd.to_datetime, errors='coerce')
df_dr1 = df_dr1.dropna(subset=['date'])
df_dr1['doy'] = df_dr1['date'].dt.dayofyear
df_dr1['year'] = df_dr1['date'].dt.year
df_dr2 = df_dr1[['year', 'doy', 'dr']]
# Pivot table
df = pd.pivot_table(df_dr2, values='dr', index=['year'], columns=['doy'])
df1 = df.rename_axis(None)
df2 = df1.fillna(0)
# Convert to integers
cols = df2.columns
df2[cols] = df2[cols].apply(lambda x: x.astype(int))
# Take a subset of the first 60 columns
df3 = df2.iloc[:, 0:60]
df4 = df3.reset_index()
# Save to CSV
alternative_output = os.path.join(OUTPUT_DIR, 'drought_alternative_pivot_enhanced.csv')
df4.to_csv(alternative_output, index=False)
print(f"Enhanced alternative drought analysis saved to {alternative_output}")
return df4
except Exception as e:
print(f"Error in enhanced alternative drought analysis: {e}")
return pd.DataFrame()
###############################
# FIXED: Enhanced Frequency Analysis Functions
###############################
def process_drought_data_for_frequency_enhanced():
"""FIXED: Enhanced drought frequency processing with column mismatch fix."""
print("Processing enhanced drought data for frequency map...")
try:
# Read drought data
dfb_dr = pd.read_excel(DROUGHT_DATA, engine='openpyxl')
dfb_dr['adm2_list'] = dfb_dr.geob_adm2_list.apply(lambda x: literal_eval(str(x)))
dfb_dr1 = dfb_dr[['Dis No', 'adm2_list']]
# Expand the adm2_list column - this is key for correct frequency calculation
dfb_dr2 = dfb_dr1.explode('adm2_list')
dfb_dr2['year'] = dfb_dr2['Dis No'].str.split('-').str[0]
# FIXED: Count occurrences by adm2_list with proper column naming
df = dfb_dr2.groupby(['adm2_list']).agg({'Dis No': 'count'}) # FIXED: specific column name
# Reset index and rename columns properly
df1 = df.reset_index()
df1.columns = ['shapeID', 'count1']
df1['count2'] = df1['count1'] # Add second count column for compatibility
# Read shapefile or geojson
try:
adm = gp.read_file(ADM2_SHP)
shape_col = 'shapeID'
except Exception:
adm = gp.read_file(ADM1_GEOJSON)
shape_col = 'GID_1'
df1 = df1.rename(columns={'shapeID': 'GID_1'})
# Merge data with geometries
db = pd.merge(adm, df1, on=shape_col, how='right')
# Filter out rows with None geometries
db = db[db['geometry'].notna()]
print(f"Enhanced drought frequency data processed: {len(db)} regions")
return db
except Exception as e:
print(f"Error in enhanced drought frequency processing: {e}")
import traceback
traceback.print_exc()
return None
def process_flood_data_for_frequency_enhanced():
"""FIXED: Enhanced flood frequency processing with column mismatch fix."""