Source code for aqua_fetch.rr._waterbenchiowa


import os
from typing import List, Union, Dict

import pandas as pd

from .utils import _RainfallRunoff
from ..utils import validate_attributes

from ._map import (
    observed_streamflow_cms,
    observed_streamflow_mm,
    # evapotranspiration
    catchment_area,
    gauge_latitude,
    gauge_longitude,
    slope,
    total_precipitation,
    )


[docs] class WaterBenchIowa(_RainfallRunoff): """ Rainfall run-off dataset for Iowa (US) following the work of `Demir et al., 2022 <https://doi.org/10.5194/essd-14-5605-2022>`_ This is hourly dataset of 125 catchments with 7 static features and 3 dynamic features (pcp, et, discharge) for each catchment. The dynamic features are timeseries from 2011-10-01 12:00 to 2018-09-30 11:00. **Note: ** Currently the coordinates and catchment boundary files are not available for this dataset. Examples -------- >>> from aqua_fetch import WaterBenchIowa >>> ds = WaterBenchIowa() ... # fetch static and dynamic features of 5 stations >>> static, dynamic = ds.fetch(5, static_features='all', as_dataframe=True) >>> len(dynamic) # it is a dictionary with DataFrame 5 ... # keys of dynamic are station names and values are DataFrames >>> data = dynamic.popitem()[1] >>> data.shape (61344, 3) >>> static.shape (5, 7) ... ... # using another method >>> dynamic = ds.fetch_dynamic_features('644', as_dataframe=True) >>> dynamic['644'].shape (61344, 3) ... >>> static, dynamic = ds.fetch(stations='644', static_features="all", as_dataframe=True) >>> static.shape, dynamic['644'].shape >>> ((1, 7), (61344, 3)) """ url = "https://zenodo.org/record/7087806#.Y6rW-BVByUk"
[docs] def __init__(self, path=None, **kwargs): super(WaterBenchIowa, self).__init__(path=path, timestep='H', **kwargs) self._download() self._maybe_to_netcdf()
@property def static_map(self) -> Dict[str, str]: return { 'area': catchment_area(), 'slope': slope('perc'), } @property def dyn_map(self): return { 'discharge': observed_streamflow_mm(), 'precipitation': total_precipitation(), }
[docs] def stations(self)->List[str]: return [fname.split('_')[0] for fname in os.listdir(self.ts_path) if fname.endswith('.csv')]
@property def ts_path(self)->str: return os.path.join(self.path, 'data_time_series', 'data_time_series') @property def static_features(self)->List[str]: return ['travel_time', catchment_area(), slope('perc'), 'loam', 'silt', 'sandy_clay_loam', 'silty_clay_loam'] @property def static_map(self)->Dict[str, str]: return { 'area': catchment_area(), 'slope': slope('perc'), }
[docs] def fetch_static_features( self, stations: Union[str, List[str]] = "all", static_features:Union[str, List[str]] = "all" )->pd.DataFrame: """ Parameters ---------- stations : str name/id of station of which to extract the data static_features : list/str, optional (default="all") The name/names of features to fetch. By default, all available static features are returned. Examples --------- >>> from aqua_fetch import WaterBenchIowa >>> dataset = WaterBenchIowa() get the names of stations >>> stns = dataset.stations() >>> len(stns) 125 get all static data of all stations >>> static_data = dataset.fetch_static_features(stns) >>> static_data.shape (125, 7) get static data of one station only >>> static_data = dataset.fetch_static_features('592') >>> static_data.shape (1, 7) get the names of static features >>> dataset.static_features get only selected features of all stations >>> static_data = dataset.fetch_static_features(stns, ['slope', 'area_km2']) >>> static_data.shape (125, 2) >>> data = dataset.fetch_static_features('592', static_features=['slope', 'area_km2']) >>> data.shape (1, 2) """ stations = validate_attributes(stations, self.stations()) static_features = validate_attributes(static_features, self.static_features, 'static_features') dfs = [] for stn in stations: fname = os.path.join(self.ts_path, f"{stn}_data.csv") df = pd.read_csv(fname, nrows=1) dfs.append(df.iloc[0, :].rename(stn)) df = pd.concat(dfs, axis=1).T df.rename(columns=self.static_map, inplace=True) return df.loc[stations, static_features]
def _read_stn_dyn(self, stn)->pd.DataFrame: fname = os.path.join(self.ts_path, f"{stn}_data.csv") df = pd.read_csv(fname) df.index = pd.to_datetime(df.pop('datetime')) df = df.loc[:, ['precipitation', 'discharge', 'et']] df.rename(columns=self.dyn_map, inplace=True) return df @property def start(self): return pd.Timestamp("20111001 12:00:00") @property def end(self): return pd.Timestamp("20180930 11:00:00")