Source code for aqua_fetch.wq._river_chem_siberia


__all__ = ["RiverChemSiberia"]

import os
from typing import List

import pandas as pd

from .._backend import fiona
from .._datasets import Datasets
from .._geom_utils import calc_centroid


[docs] class RiverChemSiberia(Datasets): """ A database of water chemistry in eastern Siberian rivers following `Liu et al., 2022 <https://doi.org/10.1038/s41597-022-01844-y>`_ . The dataset consists of meteorological data, water chemistry data, and shapefiles of 7 basins in eastern Siberia. The data is collected from 1991 to 2012. The dataset is available at `figshare <https://doi.org/10.6084/m9.figshare.c.5831975.v1>`_ . Following parameters are available in the dataset: - ``La`` - ``Lo`` - ``Ca2+`` - ``Mg2+`` - ``K+`` - ``Na+`` - ``Cl-`` - ``SO42-`` - ``HCO3-`` - ``TDS`` - ``pH`` - ``River`` - ``Basin`` - ``Subbasin`` - ``Tannual`` - ``Tmonthly`` - ``Pannual`` - ``Pmonthly`` - ``Lithology`` - ``Permafrost type`` - ``IB`` - ``Discharge`` - ``Ori_ID`` - ``Li`` - ``Sr`` - ``As`` - ``Ba`` - ``Si`` - ``87Sr/86Sr`` - ``¦Ä18O-H2O`` - ``¦Ä2H-H2O`` Examples -------- >>> from aqua_fetch import RiverChemSiberia >>> ds = RiverChemSiberia() >>> ds.stations() ['Selenga-Baikal', 'Angara', 'Lena', 'Eastern-Siberia', 'Kolyma', 'Yana', 'Indigirka'] >>> len(ds.parameters) 34 """ url = { "Sample data.zip": "https://springernature.figshare.com/ndownloader/files/37706754", "Boundary data.zip": "https://springernature.figshare.com/ndownloader/files/37706622" }
[docs] def __init__(self, path=None, **kwargs): super().__init__(path=path, **kwargs) self.ds_dir = path self._download()
@property def parameters(self)->List[str]: """ Returns the parameters available in the dataset. """ return self.database().columns.tolist()
[docs] def stn_coords(self)->pd.DataFrame: """ Returns the coordinates of the stations. """ stns_file = os.path.join(self.path, "Boundary data", "Boundary data", "Basin_boundary.shp") coords = [] # Read Basin_boundary.shp with fiona.open(stns_file) as src: for _, feature in enumerate(src): geometry = feature['geometry'] centroid = calc_centroid(geometry) basin = feature.properties['Basin'] coords.append([*centroid, basin]) sf = os.path.join(self.path, "Boundary data", "Boundary data", "Eastern_Siberia_boundary.shp") # Read Basin_boundary.shp with fiona.open(sf) as src: for _, feature in enumerate(src): geometry = feature['geometry'] centroid = calc_centroid(geometry) coords.append([*centroid, "Eastern_Siberia"]) coords = pd.DataFrame(coords, columns=['long', 'lat', 'index']).set_index('index') return coords
[docs] def stations(self)->List[str]: """ Returns the names of (7) stations available in the dataset. """ return self.database()['Basin'].unique().tolist()
[docs] def database(self)->pd.DataFrame: """ Returns the database of the water chemistry in eastern Siberian rivers. """ fpath = os.path.join(self.path, "Sample data", "Sample data", "Samples_database.csv") # read the data file with encoding which can handle the special characters df = pd.read_csv(fpath, encoding='latin1', index_col=0) [df.pop(col) for col in ('Year', 'Month')] df.index = pd.to_datetime(df.pop('Data')) return df
[docs] def boundary(self)->pd.DataFrame: """ Returns the boundary data of the water chemistry in eastern Siberian rivers. """ fpath = os.path.join(self.path, "Boundary data", "Boundary data", "Boundary_data.csv") # todo raise NotImplementedError("The method is not implemented yet.")
def meteorology(self): # todo raise NotImplementedError("The method is not implemented yet.")