Source code for aqua_fetch.rr._simbi


import os
from typing import List, Union, Dict

import pandas as pd

from .utils import _RainfallRunoff

from ._map import (
    catchment_area,
    gauge_latitude,
    gauge_longitude,
    slope
    )

from ._map import (
    observed_streamflow_cms,  
    total_precipitation,
    mean_air_temp
)


[docs] class Simbi(_RainfallRunoff): """ monthly rainfall from 1905 - 2005, daily rainfall from 1920-1940, 70 daily streamflow series, and 23 monthly temperature series for 24 catchments of Haiti Data is obtained from `Bathelemy et al., 2023 <https://doi.org/10.23708/02POK6>`_ while related publication is `Bathelemy et al., 2024 <doi: 10.5194/essd-16-2073-2024>`_ Examples --------- >>> from aqua_fetch import Simbi >>> simbi = Simbi() >>> len(simbi.stations()) 24 """ url = { '00_SIMBI_OBSERVED_DATA.zip': "https://dataverse.ird.fr/api/access/datafile/44141", '01_SIMBI_CATCHMENT.zip': "https://dataverse.ird.fr/api/access/datafile/43638", '02_SIMBI_SIMULATED_STREAMFLOW.zip': "https://dataverse.ird.fr/api/access/datafile/43639", '03_SIMBI_ATTRIBUTE.zip': "https://dataverse.ird.fr/api/access/datafile/43640", "04_SIMBI_MAP.zip": "https://dataverse.ird.fr/api/access/datafile/43646", "08_SIMBI_METADATA.zip": "https://dataverse.ird.fr/api/access/datafile/43644", 'SIMBI_README.txt': 'https://dataverse.ird.fr/api/access/datafile/43644' }
[docs] def __init__( self, path: str = None, overwrite:bool = False, verbosity:int = 1, **kwargs ): """ Arguments: path: path where the Simbi dataset has been downloaded. This path must contain five zip files and one xlsx file. If None, then the data will be downloaded. to_netcdf : """ super().__init__(path=path, verbosity=verbosity, **kwargs) self._download(overwrite=overwrite) self._static_features = self._static_data().columns.tolist() self._dynamic_features = list(self.dyn_map.values()) self._create_boundary_id_map()
@property def boundary_file(self) -> os.PathLike: return os.path.join(self.path, '01_SIMBI_CATCHMENT', 'Haitian_Catchment.shp') @property def boundary_id_map(self) -> str: return "S__URGE" @property def static_map(self) -> Dict[str, str]: return { 'Area': catchment_area(), 'Lat_Cent': gauge_latitude(), 'Slope': slope('degrees'), 'Lon_Cent': gauge_longitude(), } @property def dyn_map(self) -> Dict[str, str]: return { 'temp': mean_air_temp(), 'q': observed_streamflow_cms(), 'pcp': total_precipitation() } @property def static_features(self): return self._static_features @property def dynamic_features(self): return self._dynamic_features @property def _coords_name(self)->List[str]: return ['Lat_Exu', 'Lon_Exu'] @property def _area_name(self) ->str: return 'Area' @property def start(self): return pd.Timestamp("19200101") @property def end(self): return pd.Timestamp("20051231") @property def daily_q_path(self): return os.path.join(self.path, '00_SIMBI_OBSERVED_DATA', '02_DAILY_STREAMFLOW') @property def daily_pcp_path(self): return os.path.join(self.path, '00_SIMBI_OBSERVED_DATA', '01_DAILY_RAINFALL') @property def daily_pcp_20_40_path(self): return os.path.join(self.daily_pcp_path, '1920_1940') @property def daily_pcp_48_60_path(self): return os.path.join(self.daily_pcp_path, '1948_1966') @property def attributes_path(self): return os.path.join(self.path, '03_SIMBI_ATTRIBUTE') @property def clim_sig_path(self): return os.path.join(self.attributes_path, '01_CLIMATIC_SIGNATURE') @property def daily_clim_sig_path(self): return os.path.join(self.clim_sig_path, '02_DAILY') @property def monthly_clim_sig_path(self): return os.path.join(self.clim_sig_path, '01_MONTHLY') @property def other_attrs_path(self): return os.path.join(self.attributes_path, '02_OTHERS') @property def temp_path(self): return os.path.join(self.path, '00_SIMBI_OBSERVED_DATA', '05_DAILY_LONG_TERM_AVERAGE_TEMPERATURE')
[docs] def stations(self)->List[str]: """Returns names/IDs of 24 stations which have all (boundary, streamflow, static features) data. Although there are 70 stations which have daily streamflow data, only 24 of them have static + boundary data. """ return self.boundary_stations()
[docs] def all_stations(self)->List[str]: """ Not all stations have all data. """ return [f"0{str(i).zfill(2)}" for i in range(1, 71)]
[docs] def q_stations(self)->List[str]: """ Returns names/IDs of 70 stations with daily streamflow data. """ return [f"0{str(i).zfill(2)}" for i in range(1, 71)]
[docs] def pcp_stations(self)->List[str]: """ Returns IDs of 74 stations with daily rainfall data. """ s1 = [stn.split('.')[0].split('_')[1] for stn in os.listdir(self.daily_pcp_20_40_path)] s2 = [stn.split('.')[0].split('_')[1] for stn in os.listdir(self.daily_pcp_48_60_path)] return list(set(s1 + s2))
[docs] def temp_stations(self)->List[str]: """ Returns names/IDs of 21 stations with daily temperature data. """ return [stn.split('.')[0].split('_')[1] for stn in os.listdir(self.temp_path)]
[docs] def boundary_stations(self)->List[str]: """ Returns names/IDs of 24 stations with boundary data. """ return list(self.bndry_id_map_.keys())
[docs] def static_data_stations(self)->List[str]: """ Returns names/IDs of 24 stations with static data. """ return self._static_data().index.tolist()
[docs] def daily_bsi(self)->pd.DataFrame: """ Read the daily BSI values. """ fpath = os.path.join(self.daily_clim_sig_path, 'baseflow_index.csv') df = pd.read_csv(fpath, index_col=0) df.index = [i.split('-')[1] for i in df.index] df.columns = [f"{i}_d" for i in df.columns] return df
[docs] def daily_high_q_dur(self)->pd.DataFrame: """ Read the daily high flow values. """ fpath = os.path.join(self.daily_clim_sig_path, 'high_q_dur.csv') df = pd.read_csv(fpath, index_col=0) df.index = [i.split('-')[1] for i in df.index] df.columns = [f"{i}_d_hq_dur" for i in df.columns] return df
[docs] def daily_high_q_freq(self)->pd.DataFrame: """ Read the daily flow frequency values. """ fpath = os.path.join(self.daily_clim_sig_path, 'high_q_freq.csv') df = pd.read_csv(fpath, index_col=0) df.index = [i.split('-')[1] for i in df.index] df.columns = [f"{i}_d_hq_freq" for i in df.columns] return df
[docs] def daily_low_q_dur(self)->pd.DataFrame: """ Read the daily low flow values. """ fpath = os.path.join(self.daily_clim_sig_path, 'low_q_dur.csv') df = pd.read_csv(fpath, index_col=0) df.index = [i.split('-')[1] for i in df.index] df.columns = [f"{i}_d_lq_dur" for i in df.columns] return df
[docs] def daily_low_q_freq(self)->pd.DataFrame: """ Read the daily low flow frequency values. """ fpath = os.path.join(self.daily_clim_sig_path, 'low_q_freq.csv') df = pd.read_csv(fpath, index_col=0) df.index = [i.split('-')[1] for i in df.index] df.columns = [f"{i}_d_lq_freq" for i in df.columns] return df
[docs] def daily_q_mean(self)->pd.DataFrame: """ Read the daily mean flow values. """ fpath = os.path.join(self.daily_clim_sig_path, 'q_mean.csv') df = pd.read_csv(fpath, index_col=0) df.index = [i.split('-')[1] for i in df.index] df.columns = [f"{i}_d_mean" for i in df.columns] return df
[docs] def daily_quantile_5(self)->pd.DataFrame: """ Read the daily 5th quantile flow values. """ fpath = os.path.join(self.daily_clim_sig_path, 'quantile_5.csv') df = pd.read_csv(fpath, index_col=0) df.index = [i.split('-')[1] for i in df.index] df.columns = [f"{i}_d_q5" for i in df.columns] return df
[docs] def daily_quantile_95(self)->pd.DataFrame: """ Read the daily 95th quantile flow values. """ fpath = os.path.join(self.daily_clim_sig_path, 'quantile_95.csv') df = pd.read_csv(fpath, index_col=0) df.index = [i.split('-')[1] for i in df.index] df.columns = [f"{i}_d_q95" for i in df.columns] return df
[docs] def daily_clim_sigs(self)->pd.DataFrame: """ Read the daily climate signatures. """ return pd.concat([ self.daily_bsi(), self.daily_high_q_dur(), self.daily_high_q_freq(), self.daily_low_q_dur(), self.daily_low_q_freq(), self.daily_q_mean(), self.daily_quantile_5(), self.daily_quantile_95() ], axis=1)
[docs] def monthly_aridity_runoff(self)->pd.DataFrame: """ Read the monthly aridity runoff values. """ fpath = os.path.join(self.monthly_clim_sig_path, 'aridity_runoff.csv') df = pd.read_csv(fpath, index_col=0) df.index = [i.split('-')[1] for i in df.index] df.columns = [f"{i}_mon_arid" for i in df.columns] return df
[docs] def monthly_average(self)->pd.DataFrame: """ Read the monthly average flow values. """ fpath = os.path.join(self.monthly_clim_sig_path, 'average.csv') df = pd.read_csv(fpath, #parse_dates=True, index_col=0) df.index = [i.split('-')[1] for i in df.index] df.columns = [f"{i}_mon_avg" for i in df.columns] return df
[docs] def monthly_QMNA5(self)->pd.DataFrame: """ Read the monthly QMNA5 flow values. """ fpath = os.path.join(self.monthly_clim_sig_path, 'QMNA5.csv') df = pd.read_csv(fpath, index_col=0) df.index = [i.split('-')[1] for i in df.index] df.columns = [f"{i}_mon_QMNA5" for i in df.columns] return df
[docs] def monthly_QMXA10(self)->pd.DataFrame: """ Read the monthly QMNA10 flow values. """ fpath = os.path.join(self.monthly_clim_sig_path, 'QMXA10.csv') df = pd.read_csv(fpath, index_col=0) df.index = [i.split('-')[1] for i in df.index] df.columns = [f"{i}_mon_QMXA10" for i in df.columns] return df
[docs] def monthly_quantile_5(self)->pd.DataFrame: """ Read the monthly 5th quantile flow values. """ fpath = os.path.join(self.monthly_clim_sig_path, 'quantile_5.csv') df = pd.read_csv(fpath, index_col=0) df.index = [i.split('-')[1] for i in df.index] df.columns = [f"{i}_mon_q5" for i in df.columns] return df
[docs] def monthly_quantile_95(self)->pd.DataFrame: """ Read the monthly 95th quantile flow values. """ fpath = os.path.join(self.monthly_clim_sig_path, 'quantile_95.csv') df = pd.read_csv(fpath, index_col=0) df.index = [i.split('-')[1] for i in df.index] df.columns = [f"{i}_mon_q95" for i in df.columns] return df
[docs] def monthly_clim_sigs(self)->pd.DataFrame: """ Read the monthly climate signatures. """ return pd.concat([ self.monthly_aridity_runoff(), self.monthly_average(), self.monthly_QMNA5(), self.monthly_QMXA10(), self.monthly_quantile_5(), self.monthly_quantile_95() ], axis=1)
[docs] def stream_density(self)->pd.DataFrame: """ Read the stream density values. """ fpath = os.path.join(self.other_attrs_path, 'stream_density.csv') df = pd.read_csv(fpath, index_col=0) df.index = [i.split('-')[1] for i in df.index] return df
[docs] def percent_lc_98(self)->pd.DataFrame: """ Read the land cover percentage values. """ fpath = os.path.join(self.other_attrs_path, 'Percent_land_cover_98.csv') df = pd.read_csv(fpath, index_col=0) df.index = [i.split('-')[1] for i in df.index] df.columns = [f"{i}_lc_98" for i in df.columns] return df
[docs] def percent_lc_95(self)->pd.DataFrame: """ Read the 95th land cover percentage values. """ fpath = os.path.join(self.other_attrs_path, 'Percent_land_cover_95.csv') df = pd.read_csv(fpath, index_col=0) df.index = [i.split('-')[1] for i in df.index] df.columns = [f"{i}_lc_95" for i in df.columns] return df
[docs] def percent_geology(self)->pd.DataFrame: """ Read the geology percentage values. """ fpath = os.path.join(self.other_attrs_path, 'Percent_geologic_class.csv') df = pd.read_csv(fpath, index_col=0) df.index = [i.split('-')[1] for i in df.index] df.columns = [f"{i}_geol" for i in df.columns] return df
[docs] def topography(self)->pd.DataFrame: """ Read the topography values. """ fpath = os.path.join(self.other_attrs_path, 'location_and_topography.csv') df = pd.read_csv(fpath, index_col=0) df.index = [i.split('-')[1] for i in df.index] return df
[docs] def hypsometric_curve(self)->pd.DataFrame: """ Read the hyposometric curve values. """ fpath = os.path.join(self.other_attrs_path, 'hypsometric_curve.csv') df = pd.read_csv(fpath, index_col=0) df.index = [i.split('-')[1] for i in df.index] return df
[docs] def aquifer_class(self)->pd.DataFrame: """ Read the aquifer class values. """ fpath = os.path.join(self.other_attrs_path, 'Percent_aquifer_class.csv') df = pd.read_csv(fpath, index_col=0) df.index = [i.split('-')[1] for i in df.index] return df
[docs] def carb_sed_magma(self)->pd.DataFrame: """ Read the carbonated sedimentary and magmatic values. """ fpath = os.path.join(self.other_attrs_path, 'Percent_carb_sediment_magma.csv') df = pd.read_csv(fpath, index_col=0) df.index = [i.split('-')[1] for i in df.index] return df
[docs] def other_attributes(self)->pd.DataFrame: """ Read the other attributes. """ return pd.concat([ self.stream_density(), self.percent_lc_98(), self.percent_lc_95(), self.percent_geology(), self.topography(), self.hypsometric_curve(), self.aquifer_class(), self.carb_sed_magma() ], axis=1)
[docs] def clim_sigs(self)->pd.DataFrame: """ Read the climate signatures. """ return pd.concat([ self.daily_clim_sigs(), self.monthly_clim_sigs() ], axis=1)
def _static_data(self)->pd.DataFrame: """ Read the static data. """ df = pd.concat([ self.other_attributes(), self.clim_sigs() ], axis=1) df.rename(columns=self.static_map, inplace=True) return df
[docs] def read_stn_q(self, stn:str)->pd.DataFrame: """ Read the daily streamflow data for a station. """ fpath = os.path.join(self.daily_q_path, f'Q_{stn}.csv') df = pd.read_csv(fpath, parse_dates=True, index_col=0) return df
[docs] def read_stn_pcp(self, stn:str)->pd.DataFrame: """ Read the daily rainfall data for a station. """ df1, df2 = pd.DataFrame(columns=['P']), pd.DataFrame(columns=['P']) fpath = os.path.join(self.daily_pcp_20_40_path, f'P_{stn}.csv') if os.path.exists(fpath): df1 = pd.read_csv(fpath, parse_dates=True, index_col=0) #df1.columns = ['pcp'] fpath = os.path.join(self.daily_pcp_48_60_path, f'P_{stn}.csv') if os.path.exists(fpath): df2 = pd.read_csv(fpath, parse_dates=True, index_col=0) #df2.columns = ['pcp2'] df = pd.concat([df1, df2]) return df
[docs] def read_stn_temp(self, stn:str)->pd.DataFrame: """ Read the daily temperature data for a station. """ df = pd.DataFrame(columns=['temp']) fpath = os.path.join(self.temp_path, f'P_{stn}.csv') if os.path.exists(fpath): df = pd.read_csv(fpath, parse_dates=True, index_col=0) return df
def _read_stn_dyn(self, stn:str)->pd.DataFrame: """ Read the daily streamflow, rainfall, and temperature data for a station. """ df1 = self.read_stn_q(stn) df2 = self.read_stn_pcp(stn) df3 = self.read_stn_temp(stn) df = pd.concat([df1, df2, df3], axis=1) df.columns = ['q', 'pcp', 'temp'] df.rename(columns=self.dyn_map, inplace=True) df.index = pd.to_datetime(df.index) df.columns.name = 'dynamic_features' df.index.name = 'time' return df.sort_index()