Source code for aqua_fetch.rr._simbi


import os
from typing import List, Union, Dict

import pandas as pd

from .utils import _RainfallRunoff

from ._map import (
    catchment_area,
    gauge_latitude,
    gauge_longitude,
    slope
    )

from ._map import (
    observed_streamflow_cms,  
    total_precipitation,
    mean_air_temp
)



[docs]
class Simbi(_RainfallRunoff):
    """
    monthly rainfall from 1905 - 2005, daily rainfall from 1920-1940, 70 daily
    streamflow series, and 23 monthly temperature series for 24 catchments of Haiti

    Data is obtained from `Bathelemy et al., 2023 <https://doi.org/10.23708/02POK6>`_
    while related publication is `Bathelemy et al., 2024 <doi: 10.5194/essd-16-2073-2024>`_

    Examples
    ---------
    >>> from aqua_fetch import Simbi
    >>> simbi = Simbi()
    >>> len(simbi.stations())
    24

    """

    url = {
        '00_SIMBI_OBSERVED_DATA.zip': "https://dataverse.ird.fr/api/access/datafile/44141",
        '01_SIMBI_CATCHMENT.zip': "https://dataverse.ird.fr/api/access/datafile/43638",
        '02_SIMBI_SIMULATED_STREAMFLOW.zip': "https://dataverse.ird.fr/api/access/datafile/43639",
        '03_SIMBI_ATTRIBUTE.zip': "https://dataverse.ird.fr/api/access/datafile/43640",
        "04_SIMBI_MAP.zip": "https://dataverse.ird.fr/api/access/datafile/43646",
        "08_SIMBI_METADATA.zip": "https://dataverse.ird.fr/api/access/datafile/43644",
        'SIMBI_README.txt': 'https://dataverse.ird.fr/api/access/datafile/43644'

    }


[docs]
    def __init__(
            self,
            path: str = None,
            overwrite:bool = False,
            verbosity:int = 1,
            **kwargs
    ):
        """
        Arguments:
            path: path where the Simbi dataset has been downloaded. This path
                must contain five zip files and one xlsx file. If None, then the
                data will be downloaded.
            to_netcdf :
        """
        super().__init__(path=path, verbosity=verbosity, **kwargs)    

        self._download(overwrite=overwrite)

        self._static_features = self._static_data().columns.tolist()
        self._dynamic_features = list(self.dyn_map.values())

        self._create_boundary_id_map()


    @property
    def boundary_file(self) -> os.PathLike:
        return os.path.join(self.path, '01_SIMBI_CATCHMENT', 'Haitian_Catchment.shp')

    @property
    def boundary_id_map(self) -> str:
        return "S__URGE"

    @property
    def static_map(self) -> Dict[str, str]:
        return {
                'Area': catchment_area(),
                'Lat_Cent': gauge_latitude(),
                'Slope': slope('degrees'),
                'Lon_Cent': gauge_longitude(),
        }

    @property
    def dyn_map(self) -> Dict[str, str]:
        return {
            'temp': mean_air_temp(),
            'q': observed_streamflow_cms(),
            'pcp': total_precipitation()
        }

    @property
    def static_features(self):
        return self._static_features
    
    @property
    def dynamic_features(self):
        return self._dynamic_features

    @property
    def _coords_name(self)->List[str]:
        return ['Lat_Exu', 'Lon_Exu']

    @property
    def _area_name(self) ->str:
        return 'Area'  

    @property
    def start(self):
        return pd.Timestamp("19200101")

    @property
    def end(self):
        return pd.Timestamp("20051231")

    @property
    def daily_q_path(self):
        return os.path.join(self.path, '00_SIMBI_OBSERVED_DATA', '02_DAILY_STREAMFLOW')

    @property
    def daily_pcp_path(self):
        return os.path.join(self.path, '00_SIMBI_OBSERVED_DATA', '01_DAILY_RAINFALL') 
    
    @property
    def daily_pcp_20_40_path(self):
        return os.path.join(self.daily_pcp_path, '1920_1940')

    @property
    def daily_pcp_48_60_path(self):
        return os.path.join(self.daily_pcp_path, '1948_1966')

    @property
    def attributes_path(self):
        return os.path.join(self.path, '03_SIMBI_ATTRIBUTE')

    @property
    def clim_sig_path(self):
        return os.path.join(self.attributes_path, '01_CLIMATIC_SIGNATURE')   
    
    @property
    def daily_clim_sig_path(self):
        return os.path.join(self.clim_sig_path, '02_DAILY')
    
    @property
    def monthly_clim_sig_path(self):
        return os.path.join(self.clim_sig_path, '01_MONTHLY')
    
    @property
    def other_attrs_path(self):
        return os.path.join(self.attributes_path, '02_OTHERS')

    @property
    def temp_path(self):
        return os.path.join(self.path, '00_SIMBI_OBSERVED_DATA', '05_DAILY_LONG_TERM_AVERAGE_TEMPERATURE')


[docs]
    def stations(self)->List[str]:
        """Returns names/IDs of 24 stations which have all (boundary, streamflow, 
        static features) data. Although there are 70 stations which have daily 
        streamflow data, only 24 of them have static + boundary data.
        """
        return self.boundary_stations()



[docs]
    def all_stations(self)->List[str]:
        """
        Not all stations have all data.
        """
        return [f"0{str(i).zfill(2)}" for i in range(1, 71)]

    

[docs]
    def q_stations(self)->List[str]:
        """
        Returns names/IDs of 70 stations with daily streamflow data.
        """
        return [f"0{str(i).zfill(2)}" for i in range(1, 71)]

    

[docs]
    def pcp_stations(self)->List[str]:
        """
        Returns IDs of 74 stations with daily rainfall data.
        """
        s1 = [stn.split('.')[0].split('_')[1] for stn in os.listdir(self.daily_pcp_20_40_path)]
        s2 = [stn.split('.')[0].split('_')[1] for stn in os.listdir(self.daily_pcp_48_60_path)]
        return list(set(s1 + s2))

    

[docs]
    def temp_stations(self)->List[str]:
        """
        Returns names/IDs of 21 stations with daily temperature data.
        """
        return [stn.split('.')[0].split('_')[1] for stn in os.listdir(self.temp_path)]

    

[docs]
    def boundary_stations(self)->List[str]:
        """
        Returns names/IDs of 24 stations with boundary data.
        """
        return list(self.bndry_id_map_.keys())



[docs]
    def static_data_stations(self)->List[str]:
        """
        Returns names/IDs of 24 stations with static data.
        """
        return self._static_data().index.tolist()



[docs]
    def daily_bsi(self)->pd.DataFrame:
        """
        Read the daily BSI values.
        """
        fpath = os.path.join(self.daily_clim_sig_path, 'baseflow_index.csv')
        df = pd.read_csv(fpath, index_col=0)
        df.index = [i.split('-')[1] for i in df.index]
        df.columns = [f"{i}_d" for i in df.columns]
        return df

    

[docs]
    def daily_high_q_dur(self)->pd.DataFrame:
        """
        Read the daily high flow values.
        """
        fpath = os.path.join(self.daily_clim_sig_path, 'high_q_dur.csv')
        df = pd.read_csv(fpath, index_col=0)
        df.index = [i.split('-')[1] for i in df.index]
        df.columns = [f"{i}_d_hq_dur" for i in df.columns]
        return df

    

[docs]
    def daily_high_q_freq(self)->pd.DataFrame:
        """
        Read the daily flow frequency values.
        """
        fpath = os.path.join(self.daily_clim_sig_path, 'high_q_freq.csv')
        df = pd.read_csv(fpath, index_col=0)
        df.index = [i.split('-')[1] for i in df.index]
        df.columns = [f"{i}_d_hq_freq" for i in df.columns]
        return df

    

[docs]
    def daily_low_q_dur(self)->pd.DataFrame:
        """
        Read the daily low flow values.
        """
        fpath = os.path.join(self.daily_clim_sig_path, 'low_q_dur.csv')
        df = pd.read_csv(fpath, index_col=0)
        df.index = [i.split('-')[1] for i in df.index]
        df.columns = [f"{i}_d_lq_dur" for i in df.columns]
        return df

    

[docs]
    def daily_low_q_freq(self)->pd.DataFrame:
        """
        Read the daily low flow frequency values.
        """
        fpath = os.path.join(self.daily_clim_sig_path, 'low_q_freq.csv')
        df = pd.read_csv(fpath, index_col=0)
        df.index = [i.split('-')[1] for i in df.index]
        df.columns = [f"{i}_d_lq_freq" for i in df.columns]
        return df

    

[docs]
    def daily_q_mean(self)->pd.DataFrame:
        """
        Read the daily mean flow values.
        """
        fpath = os.path.join(self.daily_clim_sig_path, 'q_mean.csv')
        df = pd.read_csv(fpath, index_col=0)
        df.index = [i.split('-')[1] for i in df.index]
        df.columns = [f"{i}_d_mean" for i in df.columns]
        return df

    

[docs]
    def daily_quantile_5(self)->pd.DataFrame:
        """
        Read the daily 5th quantile flow values.
        """
        fpath = os.path.join(self.daily_clim_sig_path, 'quantile_5.csv')
        df = pd.read_csv(fpath, index_col=0)
        df.index = [i.split('-')[1] for i in df.index]
        df.columns = [f"{i}_d_q5" for i in df.columns]
        return df



[docs]
    def daily_quantile_95(self)->pd.DataFrame:
        """
        Read the daily 95th quantile flow values.
        """
        fpath = os.path.join(self.daily_clim_sig_path, 'quantile_95.csv')
        df = pd.read_csv(fpath, index_col=0)
        df.index = [i.split('-')[1] for i in df.index]
        df.columns = [f"{i}_d_q95" for i in df.columns]
        return df

    

[docs]
    def daily_clim_sigs(self)->pd.DataFrame:
        """
        Read the daily climate signatures.
        """
        return pd.concat([
            self.daily_bsi(),
            self.daily_high_q_dur(),
            self.daily_high_q_freq(),
            self.daily_low_q_dur(),
            self.daily_low_q_freq(),
            self.daily_q_mean(),
            self.daily_quantile_5(),
            self.daily_quantile_95()
        ], axis=1)

    

[docs]
    def monthly_aridity_runoff(self)->pd.DataFrame:
        """
        Read the monthly aridity runoff values.
        """
        fpath = os.path.join(self.monthly_clim_sig_path, 'aridity_runoff.csv')
        df = pd.read_csv(fpath, index_col=0)
        df.index = [i.split('-')[1] for i in df.index]
        df.columns = [f"{i}_mon_arid" for i in df.columns]
        return df

    

[docs]
    def monthly_average(self)->pd.DataFrame:
        """
        Read the monthly average flow values.
        """
        fpath = os.path.join(self.monthly_clim_sig_path, 'average.csv')
        df = pd.read_csv(fpath, #parse_dates=True, 
                         index_col=0)
        df.index = [i.split('-')[1] for i in df.index]
        df.columns = [f"{i}_mon_avg" for i in df.columns]
        return df

    

[docs]
    def monthly_QMNA5(self)->pd.DataFrame:
        """
        Read the monthly QMNA5 flow values.
        """
        fpath = os.path.join(self.monthly_clim_sig_path, 'QMNA5.csv')
        df = pd.read_csv(fpath, index_col=0)
        df.index = [i.split('-')[1] for i in df.index]
        df.columns = [f"{i}_mon_QMNA5" for i in df.columns]
        return df

    

[docs]
    def monthly_QMXA10(self)->pd.DataFrame:
        """
        Read the monthly QMNA10 flow values.
        """
        fpath = os.path.join(self.monthly_clim_sig_path, 'QMXA10.csv')
        df = pd.read_csv(fpath, index_col=0)
        df.index = [i.split('-')[1] for i in df.index]
        df.columns = [f"{i}_mon_QMXA10" for i in df.columns]
        return df

    

[docs]
    def monthly_quantile_5(self)->pd.DataFrame:
        """
        Read the monthly 5th quantile flow values.
        """
        fpath = os.path.join(self.monthly_clim_sig_path, 'quantile_5.csv')
        df = pd.read_csv(fpath, index_col=0)
        df.index = [i.split('-')[1] for i in df.index]
        df.columns = [f"{i}_mon_q5" for i in df.columns]
        return df



[docs]
    def monthly_quantile_95(self)->pd.DataFrame:
        """
        Read the monthly 95th quantile flow values.
        """
        fpath = os.path.join(self.monthly_clim_sig_path, 'quantile_95.csv')
        df = pd.read_csv(fpath, index_col=0)
        df.index = [i.split('-')[1] for i in df.index]
        df.columns = [f"{i}_mon_q95" for i in df.columns]
        return df

    

[docs]
    def monthly_clim_sigs(self)->pd.DataFrame:
        """
        Read the monthly climate signatures.
        """
        return pd.concat([
            self.monthly_aridity_runoff(),
            self.monthly_average(),
            self.monthly_QMNA5(),
            self.monthly_QMXA10(),
            self.monthly_quantile_5(),
            self.monthly_quantile_95()
        ], axis=1)



[docs]
    def stream_density(self)->pd.DataFrame:
        """
        Read the stream density values.
        """
        fpath = os.path.join(self.other_attrs_path, 'stream_density.csv')
        df = pd.read_csv(fpath, index_col=0)
        df.index = [i.split('-')[1] for i in df.index]
        return df

    

[docs]
    def percent_lc_98(self)->pd.DataFrame:
        """
        Read the land cover percentage values.
        """
        fpath = os.path.join(self.other_attrs_path, 'Percent_land_cover_98.csv')
        df = pd.read_csv(fpath, index_col=0)
        df.index = [i.split('-')[1] for i in df.index]
        df.columns = [f"{i}_lc_98" for i in df.columns]
        return df

    

[docs]
    def percent_lc_95(self)->pd.DataFrame:
        """
        Read the 95th land cover percentage values.
        """
        fpath = os.path.join(self.other_attrs_path, 'Percent_land_cover_95.csv')
        df = pd.read_csv(fpath, index_col=0)
        df.index = [i.split('-')[1] for i in df.index]
        df.columns = [f"{i}_lc_95" for i in df.columns]
        return df

    

[docs]
    def percent_geology(self)->pd.DataFrame:
        """
        Read the geology percentage values.
        """
        fpath = os.path.join(self.other_attrs_path, 'Percent_geologic_class.csv')
        df = pd.read_csv(fpath, index_col=0)
        df.index = [i.split('-')[1] for i in df.index]
        df.columns = [f"{i}_geol" for i in df.columns]
        return df

    

[docs]
    def topography(self)->pd.DataFrame:
        """
        Read the topography values.
        """
        fpath = os.path.join(self.other_attrs_path, 'location_and_topography.csv')
        df = pd.read_csv(fpath, index_col=0)
        df.index = [i.split('-')[1] for i in df.index]
        return df

    

[docs]
    def hypsometric_curve(self)->pd.DataFrame:
        """
        Read the hyposometric curve values.
        """
        fpath = os.path.join(self.other_attrs_path, 'hypsometric_curve.csv')
        df = pd.read_csv(fpath, index_col=0)
        df.index = [i.split('-')[1] for i in df.index]
        return df

    

[docs]
    def aquifer_class(self)->pd.DataFrame:
        """
        Read the aquifer class values.
        """
        fpath = os.path.join(self.other_attrs_path, 'Percent_aquifer_class.csv')
        df = pd.read_csv(fpath, index_col=0)
        df.index = [i.split('-')[1] for i in df.index]
        return df

    

[docs]
    def carb_sed_magma(self)->pd.DataFrame:
        """
        Read the carbonated sedimentary and magmatic values.
        """
        fpath = os.path.join(self.other_attrs_path, 'Percent_carb_sediment_magma.csv')
        df = pd.read_csv(fpath, index_col=0)
        df.index = [i.split('-')[1] for i in df.index]
        return df

    

[docs]
    def other_attributes(self)->pd.DataFrame:
        """
        Read the other attributes.
        """
        return pd.concat([
            self.stream_density(),
            self.percent_lc_98(),
            self.percent_lc_95(),
            self.percent_geology(),
            self.topography(),
            self.hypsometric_curve(),
            self.aquifer_class(),
            self.carb_sed_magma()
        ], axis=1)



[docs]
    def clim_sigs(self)->pd.DataFrame:
        """
        Read the climate signatures.
        """
        return pd.concat([
            self.daily_clim_sigs(),
            self.monthly_clim_sigs()
        ], axis=1)

    
    def _static_data(self)->pd.DataFrame:
        """
        Read the static data.
        """
        df = pd.concat([
            self.other_attributes(),
            self.clim_sigs()
        ], axis=1)

        df.rename(columns=self.static_map, inplace=True)

        return df


[docs]
    def read_stn_q(self, stn:str)->pd.DataFrame:
        """
        Read the daily streamflow data for a station.
        """
        fpath = os.path.join(self.daily_q_path, f'Q_{stn}.csv')
        df = pd.read_csv(fpath, parse_dates=True, index_col=0)
        return df

    

[docs]
    def read_stn_pcp(self, stn:str)->pd.DataFrame:
        """
        Read the daily rainfall data for a station.
        """
        df1, df2 = pd.DataFrame(columns=['P']), pd.DataFrame(columns=['P'])
        fpath = os.path.join(self.daily_pcp_20_40_path, f'P_{stn}.csv')
        if os.path.exists(fpath):
            df1 = pd.read_csv(fpath, parse_dates=True, index_col=0)
            #df1.columns = ['pcp']
        
        fpath = os.path.join(self.daily_pcp_48_60_path, f'P_{stn}.csv')
        if os.path.exists(fpath):
            df2 = pd.read_csv(fpath, parse_dates=True, index_col=0)
            #df2.columns = ['pcp2']
        
        df = pd.concat([df1, df2])

        return df

    

[docs]
    def read_stn_temp(self, stn:str)->pd.DataFrame:
        """
        Read the daily temperature data for a station.
        """
        df = pd.DataFrame(columns=['temp'])
        fpath = os.path.join(self.temp_path, f'P_{stn}.csv')
        if os.path.exists(fpath):
            df = pd.read_csv(fpath, parse_dates=True, index_col=0)
        return df

    
    def _read_stn_dyn(self, stn:str)->pd.DataFrame:
        """
        Read the daily streamflow, rainfall, and temperature data for a station.
        """
        df1 = self.read_stn_q(stn)
        df2 = self.read_stn_pcp(stn)
        df3 = self.read_stn_temp(stn)
        df = pd.concat([df1, df2, df3], axis=1)
        df.columns = ['q', 'pcp', 'temp']

        df.rename(columns=self.dyn_map, inplace=True)

        df.index = pd.to_datetime(df.index)
        df.columns.name = 'dynamic_features'
        df.index.name = 'time'
        return df.sort_index()