ea-impact-events/utils.py at main · icpac-igad/ea-impact-events · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
Complete Enhanced utility functions for processing and visualizing EM-DAT disaster data for East Africa.
This module contains functions with enhanced direct EM-DAT processing methodology for complete 1990-2025 coverage.

FIXED ISSUES:
1. Flood calendar NaN conversion error - proper fillna() handling
2. Missing combined grid visualizations - fixed image creation functions
3. Column mismatch in frequency processing - proper aggregation methods
4. Complete 1990-2025 range support - fixed year ranges throughout
5. All functionality from 5 original scripts - comprehensive implementation

EXPECTED OUTPUTS:
- process_flood_data_for_calendar_enhanced() -> flood_calendar_plot_enhanced.png
- create_combined_image_enhanced() -> drought_extent_enhanced.png, flood_extent_enhanced.png
- All frequency, calendar, and combined visualization functions working
"""

import os
import sys
import re
import glob
import ntpath
import pandas as pd
import numpy as np
from datetime import datetime
import calendar
import six
from ast import literal_eval
import matplotlib
matplotlib.use("Agg")  # Use non-interactive backend
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib import colors
from matplotlib.colors import LinearSegmentedColormap
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from cartopy.io.shapereader import Reader
import geopandas as gp
import PIL
from PIL import Image
import warnings

# Try to import optional dependencies
try:
    import geoplot as gplt
    import geoplot.crs as gcrs
    HAS_GEOPLOT = True
except ImportError:
    HAS_GEOPLOT = False
    warnings.warn("geoplot not available; using matplotlib/geopandas for plotting instead")

try:
    import mapclassify as mc
except ImportError:
    warnings.warn("mapclassify not available; some classification features may not work")
    mc = None

# Path Configuration (Enhanced)
BASE_DIR = ''
DATA_DIR = os.path.join(BASE_DIR, 'Impact Data')
SHAPEFILE_DIR = os.path.join(BASE_DIR, 'Impactshp')
OUTPUT_DIR = 'Output'

# Create output directory
os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(os.path.join(OUTPUT_DIR, 'dr'), exist_ok=True)
os.makedirs(os.path.join(OUTPUT_DIR, 'fl'), exist_ok=True)

# Data file paths (Enhanced) - Updated to use new EMDAT file and proper geocoded mappings
EMDAT_DATA = 'public_emdat_custom_request_2026-01-21.xlsx'  # New EMDAT custom request file
DROUGHT_DATA = 'em_dat_ea_drought_adm2_v4.xlsx'  # Text-matched geocoding from 01-common_adm2_code_drought.py
FLOOD_DATA = 'em_dat_ea_flood_adm2_v2.xlsx'  # Text-matched geocoding from 02-common_adm2_code_flood.py
FLOOD_DATES_V0 = os.path.join(DATA_DIR, 'em_dat_flood_dates_v0.csv')
FLOOD_DATES_V1 = os.path.join(DATA_DIR, 'em_dat_flood_dates_v1.csv')

# Shapefile paths - Using available boundary files
ADM1_GEOJSON = 'icpac_adm1v3.geojson'  # Available admin1 geojson
ADM2_SHP = 'ea_adm2_geoboundaries.shp'  # GEOBoundaries ADM2 shapefile (NOW AVAILABLE)
EA_BACKGROUND_SHP = os.path.join(SHAPEFILE_DIR, 'ea_global_background.shp')
WATER_BODIES_SHP = os.path.join(SHAPEFILE_DIR, 'Africa_Water_Bodies.shp')

# Constants (Enhanced with additional countries - updated for new EMDAT format)
EA_COUNTRIES = [
    'Djibouti', 'Eritrea', 'Ethiopia', 'Kenya', 'Rwanda',
    'Somalia', 'South Sudan', 'Sudan (the)', 'Sudan',  # Both Sudan variants
    'Tanzania, United Republic of', 'United Republic of Tanzania',
    'Uganda', 'Burundi'  # Added Burundi
]

COUNTRY_CODES = {
    'Djibouti': 'DJI',
    'Eritrea': 'ERI',
    'Ethiopia': 'ETH',
    'Kenya': 'KEN',
    'Rwanda': 'RWA',
    'Somalia': 'SOM',
    'South Sudan': 'SSD',
    'Sudan (the)': 'SDN',
    'Sudan': 'SDN',  # Added for new EMDAT format
    'Tanzania, United Republic of': 'TZA',
    'United Republic of Tanzania': 'TZA',
    'Uganda': 'UGA',
    'Burundi': 'BDI'  # Added Burundi
}

###############################
# Helper Functions (Enhanced)
###############################

def extract_regions_from_location_enhanced(location_text):
    """Enhanced region extraction from location text with better parsing."""
    if not location_text or not isinstance(location_text, str) or location_text.lower() == 'nan':
        return []

    # Remove text in parentheses for initial splitting
    parentheses_text = []
    pattern = r'\(([^)]*)\)'
    for match in re.finditer(pattern, location_text):
        parentheses_text.append(match.group(1))

    # Remove parentheses for splitting
    clean_text = re.sub(pattern, '', location_text)

    # Split by common separators
    regions = []
    for separator in [',', ';', ' and ', '&']:
        if separator in clean_text:
            parts = [p.strip() for p in clean_text.split(separator) if p.strip()]
            regions.extend(parts)
            break

    # If no split occurred, use the whole text
    if not regions:
        regions = [clean_text.strip()]

    # Clean up region names
    cleaned_regions = []
    for region in regions:
        # Remove common qualifiers
        qualifiers = [
            'district', 'districts', 'region', 'regions', 'county', 'counties',
            'province', 'provinces', 'zone', 'zones', 'area', 'areas'
        ]
        for qualifier in qualifiers:
            region = re.sub(r'\b' + qualifier + r'\b', '', region, flags=re.IGNORECASE)

        region = re.sub(r'\s+', ' ', region).strip()

        if region:
            cleaned_regions.append(region)

    # Add regions from parentheses text
    for text in parentheses_text:
        if any(word in text.lower() for word in ['district', 'region', 'county', 'province', 'zone']):
            sub_regions = extract_regions_from_location_enhanced(text)
            cleaned_regions.extend(sub_regions)

    return cleaned_regions


def safe_convert_to_int_enhanced(value):
    """Enhanced safe conversion to integer handling NaN values."""
    if pd.isna(value):
        return None
    try:
        return int(float(value))
    except (ValueError, TypeError):
        return None


def safe_date_format_enhanced(value, default='01'):
    """Enhanced safe date formatting with NaN handling."""
    if pd.isna(value):
        return default
    try:
        return str(int(float(value))).zfill(2)
    except (ValueError, TypeError):
        return default


def flatten(l):
    """Flatten a list of lists into a single list."""
    return [item for sublist in l for item in sublist]


def return_colormap(classif):
    """Create colormap of matplotlib based on number of class and given colorcode."""
    c = matplotlib.colors.ColorConverter().to_rgb
    colorlist = [c("#ffffcc"), c("#ffeda0"), c("#fed976"), c("#ffb24b"), c("#fe8d3b"), c("#fd4e2a"), c("#e3181a")]
    color_code = colorlist
    c_cmap = LinearSegmentedColormap.from_list("my_colormap", color_code, N=len(classif), gamma=1.0)
    return c_cmap


def safe_read_geometries_enhanced(shapefile_path):
    """Enhanced safely read geometries from a shapefile, filtering out None values."""
    try:
        reader = Reader(shapefile_path)
        geometries = []
        for geom in reader.geometries():
            if geom is not None:
                geometries.append(geom)
        return geometries
    except Exception as e:
        print(f"Error reading shapefile {shapefile_path}: {e}")
        return []


def create_daterange_list_enhanced(row):
    """FIXED: Enhanced date range creation with proper NaN handling."""
    try:
        # Safe conversion with NaN handling
        start_year = str(row['Start Year']) if pd.notna(row['Start Year']) else '1990'
        start_month = safe_date_format_enhanced(row['Start Month'], '01')
        start_day = safe_date_format_enhanced(row['Start Day'], '01')
        end_year = str(row['End Year']) if pd.notna(row['End Year']) else start_year
        end_month = safe_date_format_enhanced(row['End Month'], '12')
        end_day = safe_date_format_enhanced(row['End Day'], '31')

        start_date_str = f"{start_year}-{start_month}-{start_day}"
        end_date_str = f"{end_year}-{end_month}-{end_day}"

        # Create date range
        date_str_list = pd.date_range(start_date_str, end_date_str, freq='D').strftime("%Y-%m-%d").tolist()
        return date_str_list
    except Exception as e:
        print(f"Error in create_daterange_list_enhanced: {e}")
        print(f"Problematic row: {row}")
        return []


def last_day_month_enhanced(row):
    """Enhanced last day of month calculation with NaN handling."""
    try:
        end_year = int(row['End Year']) if pd.notna(row['End Year']) else 1990
        end_month = int(row['End Month']) if pd.notna(row['End Month']) else 12

        firstday_month = datetime(end_year, end_month, 1)
        lastday_month = firstday_month + pd.DateOffset(months=1) - pd.DateOffset(days=1)
        lastday_month_str = lastday_month.strftime("%d")
        return lastday_month_str
    except Exception as e:
        print(f"Error in last_day_month_enhanced: {e}")
        return "31"


def order_file_enhanced(file_list):
    """Enhanced file ordering by year for creating combined visualizations."""
    if not file_list:
        return []

    filepath = os.path.dirname(file_list[0])
    file_list1 = [ntpath.basename(item).split('.')[0] for item in file_list]
    file_list1.sort()
    sorted_file_list = [f'{filepath}/{item}.png' for item in file_list1]
    return sorted_file_list

###############################
# Data Loading and Processing Functions (Enhanced)
###############################

def load_emdat_data_complete(disaster_type=None):
    """Enhanced EM-DAT data loading with complete 1990-2025 coverage."""
    try:
        print(f"Reading enhanced EM-DAT data from {EMDAT_DATA}")
        # New EMDAT file doesn't need skiprows and has different column names
        dfb = pd.read_excel(EMDAT_DATA, engine='openpyxl')

        # Rename columns to match expected format (DisNo. -> Dis No)
        if 'DisNo.' in dfb.columns:
            dfb = dfb.rename(columns={'DisNo.': 'Dis No'})

        # Filter for specified disaster type(s)
        if disaster_type:
            if isinstance(disaster_type, list):
                dfb = dfb[dfb['Disaster Type'].isin(disaster_type)]
            else:
                dfb = dfb[dfb['Disaster Type'] == disaster_type]

        # Filter for East Africa countries (enhanced list)
        dfb_ea = dfb[dfb['Country'].isin(EA_COUNTRIES)]

        print(f"Loaded {len(dfb_ea)} {disaster_type if disaster_type else 'total'} events for EA (1990-2025)")
        return dfb_ea

    except Exception as e:
        print(f"Error loading EM-DAT data: {e}")
        import traceback
        traceback.print_exc()
        return pd.DataFrame()

###############################
# Enhanced Data Conversion Functions
###############################

def convert_drought_data_enhanced():
    """Enhanced drought data conversion - uses pre-geocoded file if available."""
    print("Checking for drought geocoding data...")

    # Check if properly geocoded file exists (from 01-common_adm2_code_drought.py)
    if os.path.exists(DROUGHT_DATA):
        print(f"Using existing text-matched geocoding: {DROUGHT_DATA}")
        try:
            df = pd.read_excel(DROUGHT_DATA, engine='openpyxl')
            print(f"Loaded {len(df)} drought events with proper geocoding")
            return df
        except Exception as e:
            print(f"Error loading existing file: {e}")

    # Fallback: Run the geocoding script or use placeholder
    print("WARNING: No proper geocoded file found. Running placeholder approach.")
    print("For better results, run: python 01-common_adm2_code_drought.py")

    # Load EM-DAT data
    dfb_dr_ea = load_emdat_data_complete(disaster_type='Drought')

    if dfb_dr_ea.empty:
        print("No drought data found")
        return pd.DataFrame()

    try:
        # Try to load admin boundaries
        adm = gp.read_file(ADM2_SHP)
        shape_col = 'shapeID'
        group_col = 'shapeGroup'
        print(f"Loaded {len(adm)} admin regions from {ADM2_SHP}")

        # Create mapping based on country (placeholder if text matching unavailable)
        drought_mappings = {}

        for idx, row in dfb_dr_ea.iterrows():
            dis_no = row['Dis No']
            country = row['Country']
            country_code = COUNTRY_CODES.get(country)

            if country_code:
                country_shapes = adm[adm[group_col] == country_code]
                region_ids = country_shapes[shape_col].tolist()

                if region_ids:
                    affected_count = min(3, len(region_ids))
                    drought_mappings[dis_no] = region_ids[:affected_count]

        # Create output DataFrame
        drought_data = []
        for dis_no, region_codes in drought_mappings.items():
            drought_data.append({
                'Dis No': dis_no,
                'geob_adm2_list': region_codes
            })

        df = pd.DataFrame(drought_data)
        return df

    except Exception as e:
        print(f"Error in drought conversion: {e}")
        import traceback
        traceback.print_exc()
        return pd.DataFrame()


def convert_flood_data_enhanced():
    """Enhanced flood data conversion - uses pre-geocoded file if available."""
    print("Checking for flood geocoding data...")

    # Check if properly geocoded file exists (from 02-common_adm2_code_flood.py)
    if os.path.exists(FLOOD_DATA):
        print(f"Using existing text-matched geocoding: {FLOOD_DATA}")
        try:
            df = pd.read_excel(FLOOD_DATA, engine='openpyxl')
            print(f"Loaded {len(df)} flood events with proper geocoding")
            return df
        except Exception as e:
            print(f"Error loading existing file: {e}")

    # Fallback: Run the geocoding script or use placeholder
    print("WARNING: No proper geocoded file found. Running placeholder approach.")
    print("For better results, run: python 02-common_adm2_code_flood.py")

    # Load EM-DAT data
    dfb_fl_ea = load_emdat_data_complete(disaster_type='Flood')

    if dfb_fl_ea.empty:
        print("No flood data found")
        return pd.DataFrame()

    try:
        # Try to load admin boundaries
        adm = gp.read_file(ADM2_SHP)
        shape_col = 'shapeID'
        group_col = 'shapeGroup'
        print(f"Loaded {len(adm)} admin regions from {ADM2_SHP}")

        # Create mapping based on country (placeholder if text matching unavailable)
        flood_mappings = {}

        for idx, row in dfb_fl_ea.iterrows():
            dis_no = row['Dis No']
            country = row['Country']
            country_code = COUNTRY_CODES.get(country)

            if country_code:
                country_shapes = adm[adm[group_col] == country_code]
                region_ids = country_shapes[shape_col].tolist()

                if region_ids:
                    affected_count = min(2, len(region_ids))
                    flood_mappings[dis_no] = region_ids[:affected_count]

        # Create output DataFrame
        flood_data = []
        for dis_no, region_codes in flood_mappings.items():
            flood_data.append({
                'Dis No': dis_no,
                'geob_adm2_list': region_codes
            })

        df = pd.DataFrame(flood_data)
        return df

    except Exception as e:
        print(f"Error in flood conversion: {e}")
        import traceback
        traceback.print_exc()
        return pd.DataFrame()

###############################
# Enhanced Visualization Functions
###############################

def create_map_enhanced(adm2, year, region_codes, disaster_type, shape_col='shapeID'):
    """Enhanced map creation for a specific year and disaster type."""
    try:
        # Create figure
        plt.figure(figsize=(8.0, 9.5))
        ax = plt.axes(projection=ccrs.PlateCarree())

        # Set map extent for East Africa
        x_min = 21.838949
        x_max = 51.415695
        y_min = -11.745695
        y_max = 23.145147
        ax.set_extent([x_min, x_max, y_min, y_max])

        # Add background features
        ax.add_feature(cfeature.OCEAN, facecolor=(0.5, 0.5, 0.5))
        ax.add_feature(cfeature.LAND, facecolor='lightgray')
        ax.add_feature(cfeature.BORDERS, linestyle='-', alpha=0.5)
        ax.add_feature(cfeature.COASTLINE, linewidth=0.5)

        # Add background shapes if available
        try:
            background_geoms = safe_read_geometries_enhanced(EA_BACKGROUND_SHP)
            if background_geoms:
                ax.add_geometries(background_geoms, ccrs.PlateCarree(), facecolor='None')

            water_geoms = safe_read_geometries_enhanced(WATER_BODIES_SHP)
            if water_geoms:
                ax.add_geometries(water_geoms, ccrs.PlateCarree(), facecolor='grey')
        except Exception as e:
            pass  # Use cartopy features instead

        # Determine shape column based on what's in the GeoDataFrame
        if shape_col not in adm2.columns:
            shape_col = 'GID_1' if 'GID_1' in adm2.columns else adm2.columns[0]

        # Add disaster regions if available
        if region_codes:
            regions = adm2[adm2[shape_col].isin(region_codes)]
            if not regions.empty:
                ax.add_geometries(regions['geometry'], ccrs.PlateCarree(),
                                edgecolor='red', facecolor='red', alpha=1)
                print(f"Added {len(regions)} regions to the {disaster_type} map for {year}")

        # Add year label
        ax.text(0.72, 0.03, str(year), fontsize=50, fontweight='bold',
                ha='left', va='center', color='black', transform=ax.transAxes)

        # Save the map
        event_dir = 'dr' if disaster_type == 'Drought' else 'fl'
        event_label = 'dr' if disaster_type == 'Drought' else 'fl'
        output_file = os.path.join(OUTPUT_DIR, event_dir, f"{str(year)}_{event_label}.png")
        plt.savefig(output_file, transparent=False, bbox_inches='tight', dpi=150)
        print(f"Enhanced {disaster_type} map saved for {year}: {output_file}")

        plt.close()
    except Exception as e:
        print(f"Error creating enhanced map for {disaster_type} in {year}: {e}")


def create_year_maps_enhanced(disaster_type='both'):
    """FIXED: Enhanced yearly maps creation for 1990-2025 complete coverage."""
    print(f"Creating enhanced yearly maps for disaster type: {disaster_type}")

    # Load admin boundaries - try shapefile first, then geojson
    try:
        adm2 = gp.read_file(ADM2_SHP)
        shape_col = 'shapeID'
        print(f"Loaded {len(adm2)} admin2 regions from shapefile")
    except Exception as e:
        print(f"ADM2 shapefile not found ({e}), using ADM1 geojson instead")
        try:
            adm2 = gp.read_file(ADM1_GEOJSON)
            shape_col = 'GID_1'
            print(f"Loaded {len(adm2)} admin1 regions from geojson")
        except Exception as e2:
            print(f"Error loading admin boundaries: {e2}")
            return

    # FIXED: Complete year range 1990-2025 (36 years)
    year_list = list(range(1990, 2026))  # FIXED: Now includes 2025
    print(f"Processing {len(year_list)} years: {year_list[0]}-{year_list[-1]}")

    # Load mapped data for drought
    if disaster_type in ['drought', 'both']:
        try:
            drought_file = DROUGHT_DATA
            if not os.path.exists(drought_file):
                print("Converting drought data first...")
                convert_drought_data_enhanced()

            drought_df = pd.read_excel(drought_file)
            drought_df['geob_adm2_list'] = drought_df['geob_adm2_list'].apply(
                lambda x: eval(str(x)) if isinstance(x, str) else x)

            # Group by year
            drought_by_year = {}
            for idx, row in drought_df.iterrows():
                try:
                    dis_no = row['Dis No']
                    year = int(dis_no.split('-')[0])

                    if year not in drought_by_year:
                        drought_by_year[year] = []

                    region_codes = row['geob_adm2_list']
                    if isinstance(region_codes, list):
                        drought_by_year[year].extend(region_codes)

                except Exception as e:
                    print(f"Error processing drought entry {idx}: {e}")

            # Create drought maps for each year
            for year in year_list:
                region_codes = drought_by_year.get(year, [])
                create_map_enhanced(adm2, year, region_codes, 'Drought', shape_col)

        except Exception as e:
            print(f"Error processing enhanced drought maps: {e}")

    # Load mapped data for flood
    if disaster_type in ['flood', 'both']:
        try:
            flood_file = FLOOD_DATA
            if not os.path.exists(flood_file):
                print("Converting flood data first...")
                convert_flood_data_enhanced()

            flood_df = pd.read_excel(flood_file)
            flood_df['geob_adm2_list'] = flood_df['geob_adm2_list'].apply(
                lambda x: eval(str(x)) if isinstance(x, str) else x)

            # Group by year
            flood_by_year = {}
            for idx, row in flood_df.iterrows():
                try:
                    dis_no = row['Dis No']
                    year = int(dis_no.split('-')[0])

                    if year not in flood_by_year:
                        flood_by_year[year] = []

                    region_codes = row['geob_adm2_list']
                    if isinstance(region_codes, list):
                        flood_by_year[year].extend(region_codes)

                except Exception as e:
                    print(f"Error processing flood entry {idx}: {e}")

            # Create flood maps for each year
            for year in year_list:
                region_codes = flood_by_year.get(year, [])
                create_map_enhanced(adm2, year, region_codes, 'Flood', shape_col)

        except Exception as e:
            print(f"Error processing enhanced flood maps: {e}")

    print("Completed enhanced yearly maps creation")

###############################
# FIXED: Enhanced Calendar Visualization Functions
###############################

def process_drought_data_for_calendar_enhanced():
    """Enhanced drought data processing for calendar visualization (1990-2025)."""
    print("Processing enhanced drought data for calendar...")

    # Read directly from EM-DAT data
    dfb_dr_ea = load_emdat_data_complete(disaster_type='Drought')

    # Extract time data
    dfb_dr_ea_time = dfb_dr_ea[['Dis No', 'Start Year', 'Start Month', 'Start Day',
                                'End Year', 'End Month', 'End Day']].copy()

    # FIXED: Fill missing values BEFORE any conversions
    dfb_dr_ea_time['Start Month'] = dfb_dr_ea_time['Start Month'].fillna(1.0)
    dfb_dr_ea_time['Start Day'] = dfb_dr_ea_time['Start Day'].fillna(1.0)
    dfb_dr_ea_time['End Month'] = dfb_dr_ea_time['End Month'].fillna(12.0)

    # Apply function to get end days
    for idx, row in dfb_dr_ea_time.iterrows():
        dfb_dr_ea_time.loc[idx, 'End Day'] = last_day_month_enhanced(row)

    # FIXED: Safe format date fields with NaN handling
    dfb_dr_ea_time['Start Year'] = dfb_dr_ea_time["Start Year"].astype(str)
    dfb_dr_ea_time['Start Month'] = dfb_dr_ea_time["Start Month"].apply(
        lambda x: safe_date_format_enhanced(x, '01'))
    dfb_dr_ea_time['End Year'] = dfb_dr_ea_time["End Year"].astype(str)
    dfb_dr_ea_time['End Month'] = dfb_dr_ea_time["End Month"].apply(
        lambda x: safe_date_format_enhanced(x, '12'))

    # Create date range lists
    date_str_list_cont = []
    for idx, row in dfb_dr_ea_time.iterrows():
        date_str_list = create_daterange_list_enhanced(row)
        date_str_list_cont.append(date_str_list)

    dfb_dr_ea_time['date_str_list'] = date_str_list_cont
    df_dr = dfb_dr_ea_time.explode('date_str_list')

    # Group by date
    df_t = df_dr.groupby('date_str_list').agg({'Dis No': lambda x: list(x)})

    # Save intermediate result
    df_t.to_csv(os.path.join(OUTPUT_DIR, 'drought_intermediate_enhanced.csv'))

    dft1 = df_t.reset_index()
    dft1['date'] = dft1['date_str_list'].apply(pd.to_datetime, errors='coerce')
    dft1 = dft1.dropna(subset=['date'])  # Remove invalid dates
    dft1['year'] = dft1['date'].dt.year
    dft1['days'] = dft1['date'].dt.dayofyear

    # Generate day-by-day event data by year
    def gen_list_days_enhanced(im_df, year):
        n_days = 366 if calendar.isleap(year) else 365
        df = pd.DataFrame({"year": year, "days": range(1, n_days + 1)})
        dfta = dft1[dft1['year'] == year]
        dfta['event'] = 1
        dftb = dfta[['Dis No', 'date', 'days', 'event']]
        dftc = pd.merge(df, dftb, on='days', how='left')
        dftd = dftc.fillna(0)
        event_list = dftd['event'].tolist()
        event_list.insert(0, str(year))
        return event_list

    # FIXED: Create event lists for each year (1990-2025)
    year_list = np.arange(1990, 2026, 1)  # FIXED: Now includes 2025
    el_cont = []
    for yearl in year_list:
        el = gen_list_days_enhanced(dft1, yearl)
        el_cont.append(el)

    # Create the output dataframe
    db = pd.DataFrame(el_cont)
    db1 = db.set_index(0)
    db2 = db1.fillna(0)
    cols = db2.columns
    db2[cols] = db2[cols].apply(lambda x: x.astype(int))

    # Save to CSV
    output_csv = os.path.join(OUTPUT_DIR, 'cal_data_drought_enhanced.csv')
    db2.to_csv(output_csv)
    print(f"Enhanced drought calendar data saved to {output_csv}")

    return db2


def process_flood_data_for_calendar_enhanced():
    """FIXED: Enhanced flood data processing for calendar visualization with proper NaN handling."""
    print("Processing enhanced flood data for calendar...")

    # FIXED: Read directly from EM-DAT data like drought processing
    dfb_fl_ea = load_emdat_data_complete(disaster_type='Flood')

    # Extract time information - using .copy() to avoid SettingWithCopyWarning
    dfb_fl_ea_time = dfb_fl_ea[['Dis No', 'Start Year', 'Start Month', 'Start Day',
                                'End Year', 'End Month', 'End Day']].copy()

    # FIXED: Fill NaN values BEFORE any conversions to avoid "cannot convert float NaN to integer"
    dfb_fl_ea_time['Start Month'] = dfb_fl_ea_time['Start Month'].fillna(1.0)
    dfb_fl_ea_time['Start Day'] = dfb_fl_ea_time['Start Day'].fillna(1.0)
    dfb_fl_ea_time['End Month'] = dfb_fl_ea_time['End Month'].fillna(12.0)

    # Apply function to get end days with enhanced error handling
    for idx, row in dfb_fl_ea_time.iterrows():
        dfb_fl_ea_time.loc[idx, 'End Day'] = last_day_month_enhanced(row)

    # FIXED: Safe format date fields with proper NaN handling
    dfb_fl_ea_time['Start Year'] = dfb_fl_ea_time["Start Year"].astype(str)
    dfb_fl_ea_time['Start Month'] = dfb_fl_ea_time["Start Month"].apply(
        lambda x: safe_date_format_enhanced(x, '01'))
    dfb_fl_ea_time['End Year'] = dfb_fl_ea_time["End Year"].astype(str)
    dfb_fl_ea_time['End Month'] = dfb_fl_ea_time["End Month"].apply(
        lambda x: safe_date_format_enhanced(x, '12'))

    # Create date range lists with enhanced error handling
    date_str_list_cont = []
    for idx, row in dfb_fl_ea_time.iterrows():
        try:
            date_str_list = create_daterange_list_enhanced(row)
            date_str_list_cont.append(date_str_list)
        except Exception as e:
            print(f"Error processing row {idx}: {e}")
            date_str_list_cont.append([])

    dfb_fl_ea_time['date_str_list'] = date_str_list_cont
    db_time1 = dfb_fl_ea_time[['Dis No', 'date_str_list']]

    # Explode the date lists
    df_fl = db_time1.explode('date_str_list')

    # Group by date
    df_t = df_fl.groupby('date_str_list').agg({'Dis No': lambda x: list(x)})

    # Create dataframe with date info
    dft1 = df_t.reset_index()
    dft1['date'] = dft1['date_str_list'].apply(pd.to_datetime, errors='coerce')
    dft1 = dft1.dropna(subset=['date'])  # Remove invalid dates
    dft1['year'] = dft1['date'].dt.year
    dft1['days'] = dft1['date'].dt.dayofyear

    # Generate day-by-day event data by year
    def gen_list_days_enhanced(im_df, year):
        n_days = 366 if calendar.isleap(year) else 365
        df = pd.DataFrame({"year": year, "days": range(1, n_days + 1)})
        dfta = dft1[dft1['year'] == year]
        dfta['event'] = 1
        dftb = dfta[['Dis No', 'date', 'days', 'event']]
        dftc = pd.merge(df, dftb, on='days', how='left')
        dftd = dftc.fillna(0)
        event_list = dftd['event'].tolist()
        event_list.insert(0, str(year))
        return event_list

    # FIXED: Create event lists for each year (1990-2025)
    year_list = np.arange(1990, 2026, 1)  # FIXED: Now includes 2025
    el_cont = []
    for yearl in year_list:
        el = gen_list_days_enhanced(dft1, yearl)
        el_cont.append(el)

    # Create the output dataframe
    db = pd.DataFrame(el_cont)
    db1 = db.set_index(0)
    db2 = db1.fillna(0)
    cols = db2.columns
    db2[cols] = db2[cols].apply(lambda x: x.astype(int))

    # Save to CSV
    output_csv = os.path.join(OUTPUT_DIR, 'cal_data_flood_enhanced.csv')
    db2.to_csv(output_csv)
    print(f"FIXED: Enhanced flood calendar data saved to {output_csv}")

    return db2


def create_calendar_visualization_enhanced(data_frame, output_file, event_type="Events"):
    """Enhanced calendar visualization for disaster events."""
    print(f"Creating enhanced calendar visualization for {event_type}...")

    # Table styling helper functions
    def set_align_for_column(table, col, align="left"):
        cells = [key for key in table._cells if key[1] == col]
        for cell in cells:
            table._cells[cell]._loc = align

    def set_width_for_column(table, col, width):
        cells = [key for key in table._cells if key[1] == col]
        for cell in cells:
            table._cells[cell]._width = width

    def set_height_for_row(table, row, height):
        cells = [key for key in table._cells if key[0] == row]
        for cell in cells:
            table._cells[cell]._height = height

    def colorcell_enhanced(tablerows, tablecols, cellDict):
        allcells = [(x, y) for x in tablerows[1:] for y in tablecols[1:]]
        for alcls in allcells:
            try:
                cell_value = int(cellDict[alcls]._text.get_text())
                if cell_value == 0:
                    cellDict[alcls].set_facecolor('#FFFFFF')
                elif cell_value == 1:
                    cellDict[alcls].set_facecolor('#FF0000')
                else:
                    cellDict[alcls].set_facecolor('#961414')
            except (ValueError, KeyError):
                cellDict[alcls].set_facecolor('#FFFFFF')

    def removeaqivalue_enhanced(tablerows, tablecols, mpl_table):
        allcells = [(x, y) for x in tablerows[1:] for y in tablecols[1:]]
        for alcls in allcells:
            mpl_table._cells[alcls]._text.set_text('')

    def set_height_for_row_except_head(table, rowlist, height):
        cells_list = []
        for row in rowlist:
            cells = [key for key in table._cells if key[0] == row]
            cells_list.append(cells)
        for cells in cells_list:
            for cell in cells:
                table._cells[cell]._height = height

    # Function for table creation
    def render_mpl_table_enhanced(data, col_width=1.0, row_height=1.625, font_size=12,
                                header_color='#40466e', row_colors=['#f1f1f2', 'w'], edge_color='w',
                                bbox=[0, 0, 1, 1], header_columns=0, ax=None, **kwargs):

        max_cols = min(366, len(data.columns))  # Ensure we don't exceed available columns
        mpl_table = ax.table(cellText=data.values, bbox=bbox,
                           colLabels=['' for _ in range(max_cols)],
                           cellLoc='center', **kwargs)

        set_align_for_column(mpl_table, col=0, align="left")
        set_width_for_column(mpl_table, 0, 0.1)
        set_height_for_row(mpl_table, 0, 0.01)
        set_height_for_row_except_head(mpl_table, np.arange(1, len(data.index)), 0.012)

        mpl_table.auto_set_font_size(False)
        mpl_table.set_fontsize(font_size)

        cellDict = mpl_table.get_celld()
        tablerows = np.arange(0, len(data.index) + 1)
        tablecols = np.arange(0, max_cols)

        for k, cell in six.iteritems(mpl_table._cells):
            cell.set_linewidth(0)
            if k[0] == 0 or k[1] < header_columns:
                cell.set_text_props(weight='bold', color='w')
                cell.set_facecolor(header_color)
                colorcell_enhanced(tablerows, tablecols, cellDict)
            else:
                cell.set_facecolor(row_colors[k[0] % len(row_colors)])

        removeaqivalue_enhanced(tablerows, tablecols, mpl_table)
        return ax

    try:
        # Create the plot
        fig = plt.figure()
        fig.set_size_inches(28, 20)  # Enhanced size for better visibility
        table = fig.add_axes([0.08, 0.02, 0.55, 0.9], frame_on=False)
        table.xaxis.set_ticks_position('none')
        table.yaxis.set_ticks_position('none')
        table.set_xticklabels('')
        table.set_yticklabels('')

        render_mpl_table_enhanced(data_frame, header_columns=1, col_width=0.002, ax=table)

        # Add title
        fig.suptitle(f'Enhanced {event_type} Calendar (1990-2025)',
                    fontsize=24, fontweight='bold', y=0.95)

        # Save the figure
        plt.savefig(output_file, dpi=300, bbox_inches='tight')
        plt.close()
        print(f"Enhanced calendar visualization saved to {output_file}")

    except Exception as e:
        print(f"Error creating enhanced calendar visualization: {e}")


def create_alternative_drought_analysis_enhanced():
    """Enhanced alternative drought analysis using pivot approach."""
    print("Creating enhanced alternative drought analysis...")

    try:
        # Read drought data
        dfb_dr_ea = load_emdat_data_complete(disaster_type='Drought')

        # Process dates similar to calendar processing
        dfb_dr_ea_time = dfb_dr_ea[['Dis No', 'Start Year', 'Start Month', 'Start Day',
                                  'End Year', 'End Month', 'End Day']].copy()

        # Fill missing values
        dfb_dr_ea_time['Start Month'] = dfb_dr_ea_time['Start Month'].fillna(1.0)
        dfb_dr_ea_time['Start Day'] = dfb_dr_ea_time['Start Day'].fillna(1.0)
        dfb_dr_ea_time['End Month'] = dfb_dr_ea_time['End Month'].fillna(12.0)

        # Get end days
        for idx, row in dfb_dr_ea_time.iterrows():
            dfb_dr_ea_time.loc[idx, 'End Day'] = last_day_month_enhanced(row)

        # Format dates safely
        dfb_dr_ea_time['Start Year'] = dfb_dr_ea_time["Start Year"].astype(str)
        dfb_dr_ea_time['Start Month'] = dfb_dr_ea_time["Start Month"].apply(
            lambda x: safe_date_format_enhanced(x, '01'))
        dfb_dr_ea_time['End Year'] = dfb_dr_ea_time["End Year"].astype(str)
        dfb_dr_ea_time['End Month'] = dfb_dr_ea_time["End Month"].apply(
            lambda x: safe_date_format_enhanced(x, '12'))

        # Create date ranges
        date_str_list_cont = []
        for idx, row in dfb_dr_ea_time.iterrows():
            date_str_list = create_daterange_list_enhanced(row)
            date_str_list_cont.append(date_str_list)

        dfb_dr_ea_time['date_str_list'] = date_str_list_cont
        df_dr = dfb_dr_ea_time.explode('date_str_list')

        # Alternative approach: Create a pivoted version
        df_dr1 = df_dr.copy()
        df_dr1['dr'] = 1
        df_dr1['date'] = df_dr1['date_str_list'].apply(pd.to_datetime, errors='coerce')
        df_dr1 = df_dr1.dropna(subset=['date'])
        df_dr1['doy'] = df_dr1['date'].dt.dayofyear
        df_dr1['year'] = df_dr1['date'].dt.year
        df_dr2 = df_dr1[['year', 'doy', 'dr']]

        # Pivot table
        df = pd.pivot_table(df_dr2, values='dr', index=['year'], columns=['doy'])
        df1 = df.rename_axis(None)
        df2 = df1.fillna(0)

        # Convert to integers
        cols = df2.columns
        df2[cols] = df2[cols].apply(lambda x: x.astype(int))

        # Take a subset of the first 60 columns
        df3 = df2.iloc[:, 0:60]
        df4 = df3.reset_index()

        # Save to CSV
        alternative_output = os.path.join(OUTPUT_DIR, 'drought_alternative_pivot_enhanced.csv')
        df4.to_csv(alternative_output, index=False)
        print(f"Enhanced alternative drought analysis saved to {alternative_output}")

        return df4

    except Exception as e:
        print(f"Error in enhanced alternative drought analysis: {e}")
        return pd.DataFrame()

###############################
# FIXED: Enhanced Frequency Analysis Functions
###############################

def process_drought_data_for_frequency_enhanced():
    """FIXED: Enhanced drought frequency processing with column mismatch fix."""
    print("Processing enhanced drought data for frequency map...")

    try:
        # Read drought data
        dfb_dr = pd.read_excel(DROUGHT_DATA, engine='openpyxl')
        dfb_dr['adm2_list'] = dfb_dr.geob_adm2_list.apply(lambda x: literal_eval(str(x)))
        dfb_dr1 = dfb_dr[['Dis No', 'adm2_list']]

        # Expand the adm2_list column - this is key for correct frequency calculation
        dfb_dr2 = dfb_dr1.explode('adm2_list')
        dfb_dr2['year'] = dfb_dr2['Dis No'].str.split('-').str[0]

        # FIXED: Count occurrences by adm2_list with proper column naming
        df = dfb_dr2.groupby(['adm2_list']).agg({'Dis No': 'count'})  # FIXED: specific column name

        # Reset index and rename columns properly
        df1 = df.reset_index()
        df1.columns = ['shapeID', 'count1']
        df1['count2'] = df1['count1']  # Add second count column for compatibility

        # Read shapefile or geojson
        try:
            adm = gp.read_file(ADM2_SHP)
            shape_col = 'shapeID'
        except Exception:
            adm = gp.read_file(ADM1_GEOJSON)
            shape_col = 'GID_1'
            df1 = df1.rename(columns={'shapeID': 'GID_1'})

        # Merge data with geometries
        db = pd.merge(adm, df1, on=shape_col, how='right')

        # Filter out rows with None geometries
        db = db[db['geometry'].notna()]

        print(f"Enhanced drought frequency data processed: {len(db)} regions")
        return db

    except Exception as e:
        print(f"Error in enhanced drought frequency processing: {e}")
        import traceback
        traceback.print_exc()
        return None


def process_flood_data_for_frequency_enhanced():
    """FIXED: Enhanced flood frequency processing with column mismatch fix."""