Source code for aqua_fetch.rr._waterbenchiowa


import os
from typing import List, Union, Dict

import pandas as pd

from .utils import _RainfallRunoff
from ..utils import validate_attributes

from ._map import (
    observed_streamflow_cms,
    observed_streamflow_mm,
    # evapotranspiration
    catchment_area,
    gauge_latitude,
    gauge_longitude,
    slope,
    total_precipitation,
    )



[docs]
class WaterBenchIowa(_RainfallRunoff):
    """
    Rainfall run-off dataset for Iowa (US) following the work of
    `Demir et al., 2022 <https://doi.org/10.5194/essd-14-5605-2022>`_
    This is hourly dataset of 125 catchments with
    7 static features and 3 dynamic features (pcp, et, discharge) for each catchment.
    The dynamic features are timeseries from 2011-10-01 12:00 to 2018-09-30 11:00.

    **Note: ** Currently the coordinates and catchment boundary files are not available
    for this dataset.

    Examples
    --------
    >>> from aqua_fetch import WaterBenchIowa
    >>> ds = WaterBenchIowa()
    ... # fetch static and dynamic features of 5 stations
    >>> static, dynamic = ds.fetch(5, static_features='all', as_dataframe=True)
    >>> len(dynamic)  # it is a dictionary with DataFrame
    5 
    ... # keys of dynamic are station names and values are DataFrames
    >>> data = dynamic.popitem()[1]
    >>> data.shape
    (61344, 3)
    >>> static.shape
    (5, 7)
    ...
    ... # using another method
    >>> dynamic = ds.fetch_dynamic_features('644', as_dataframe=True)
    >>> dynamic['644'].shape
    (61344, 3)
    ...
    >>> static, dynamic = ds.fetch(stations='644', static_features="all", as_dataframe=True)
    >>> static.shape, dynamic['644'].shape
    >>> ((1, 7), (61344, 3))
    """
    url = "https://zenodo.org/record/7087806#.Y6rW-BVByUk"


[docs]
    def __init__(self, path=None, **kwargs):
        super(WaterBenchIowa, self).__init__(path=path, timestep='H', **kwargs)

        self._download()

        self._maybe_to_netcdf()


    @property
    def static_map(self) -> Dict[str, str]:
        return {
            'area': catchment_area(),
            'slope': slope('perc'),
        }

    @property
    def dyn_map(self):
        return {
        'discharge': observed_streamflow_mm(),
        'precipitation': total_precipitation(),
        }


[docs]
    def stations(self)->List[str]:
        return [fname.split('_')[0] for fname in os.listdir(self.ts_path) if fname.endswith('.csv')]


    @property
    def ts_path(self)->str:
        return os.path.join(self.path, 'data_time_series', 'data_time_series')

    @property
    def static_features(self)->List[str]:
        return ['travel_time', catchment_area(), slope('perc'), 'loam', 'silt',
                'sandy_clay_loam', 'silty_clay_loam']

    @property
    def static_map(self)->Dict[str, str]:
        return {
            'area': catchment_area(),
            'slope': slope('perc'),
        }


[docs]
    def fetch_static_features(
            self,
            stations: Union[str, List[str]] = "all",
            static_features:Union[str, List[str]] = "all"
    )->pd.DataFrame:
        """

        Parameters
        ----------
            stations : str
                name/id of station of which to extract the data
            static_features : list/str, optional (default="all")
                The name/names of features to fetch. By default, all available
                static features are returned.

        Examples
        ---------
        >>> from aqua_fetch import WaterBenchIowa
        >>> dataset = WaterBenchIowa()
        get the names of stations
        >>> stns = dataset.stations()
        >>> len(stns)
            125
        get all static data of all stations
        >>> static_data = dataset.fetch_static_features(stns)
        >>> static_data.shape
           (125, 7)
        get static data of one station only
        >>> static_data = dataset.fetch_static_features('592')
        >>> static_data.shape
           (1, 7)
        get the names of static features
        >>> dataset.static_features
        get only selected features of all stations
        >>> static_data = dataset.fetch_static_features(stns, ['slope', 'area_km2'])
        >>> static_data.shape
           (125, 2)
        >>> data = dataset.fetch_static_features('592', static_features=['slope', 'area_km2'])
        >>> data.shape
           (1, 2)

        """
        stations = validate_attributes(stations, self.stations())

        static_features = validate_attributes(static_features, self.static_features, 'static_features')

        dfs = []
        for stn in stations:
            fname = os.path.join(self.ts_path, f"{stn}_data.csv")
            df = pd.read_csv(fname, nrows=1)
            dfs.append(df.iloc[0, :].rename(stn))

        df = pd.concat(dfs, axis=1).T

        df.rename(columns=self.static_map, inplace=True)

        return df.loc[stations, static_features]


    def _read_stn_dyn(self, stn)->pd.DataFrame:
        fname = os.path.join(self.ts_path, f"{stn}_data.csv")
        df = pd.read_csv(fname)
        df.index = pd.to_datetime(df.pop('datetime'))
        df = df.loc[:, ['precipitation', 'discharge', 'et']]

        df.rename(columns=self.dyn_map, inplace=True)
        return df

    @property
    def start(self):
        return pd.Timestamp("20111001 12:00:00")

    @property
    def end(self):
        return pd.Timestamp("20180930 11:00:00")