Source code for aqua_fetch.wq._river_chem_siberia


__all__ = ["RiverChemSiberia"]

import os
from typing import List

import pandas as pd

from .._backend import fiona
from .._datasets import Datasets
from .._geom_utils import calc_centroid



[docs]
class RiverChemSiberia(Datasets):
    """
    A database of water chemistry in eastern Siberian rivers following
    `Liu et al., 2022 <https://doi.org/10.1038/s41597-022-01844-y>`_ .
    The dataset consists of meteorological data, water chemistry data, and
    shapefiles of 7 basins in eastern Siberia. The data is collected from 1991
    to 2012. The dataset is available at `figshare <https://doi.org/10.6084/m9.figshare.c.5831975.v1>`_ .
    Following parameters are available in the dataset:

        - ``La``
        - ``Lo``
        - ``Ca2+``
        - ``Mg2+``
        - ``K+``
        - ``Na+``
        - ``Cl-``
        - ``SO42-``
        - ``HCO3-``
        - ``TDS``
        - ``pH``
        - ``River``
        - ``Basin``
        - ``Subbasin``
        - ``Tannual``
        - ``Tmonthly``
        - ``Pannual``
        - ``Pmonthly``
        - ``Lithology``
        - ``Permafrost type``
        - ``IB``
        - ``Discharge``
        - ``Ori_ID``
        - ``Li``
        - ``Sr``
        - ``As``
        - ``Ba``
        - ``Si``
        - ``87Sr/86Sr``
        - ``¦Ä18O-H2O``
        - ``¦Ä2H-H2O``
    
    Examples
    --------
    >>> from aqua_fetch import RiverChemSiberia
    >>> ds = RiverChemSiberia()
    >>> ds.stations()
    ['Selenga-Baikal', 'Angara', 'Lena', 'Eastern-Siberia', 'Kolyma', 'Yana', 'Indigirka']
    >>> len(ds.parameters)
    34
    """
    url = {
        "Sample data.zip": "https://springernature.figshare.com/ndownloader/files/37706754",
        "Boundary data.zip": "https://springernature.figshare.com/ndownloader/files/37706622"
    }


[docs]
    def __init__(self, path=None, **kwargs):
        super().__init__(path=path, **kwargs)
        self.ds_dir = path
        self._download()


    @property
    def parameters(self)->List[str]:
        """
        Returns the parameters available in the dataset.
        """
        return self.database().columns.tolist()


[docs]
    def stn_coords(self)->pd.DataFrame:
        """
        Returns the coordinates of the stations.
        """
        stns_file = os.path.join(self.path, "Boundary data", "Boundary data", "Basin_boundary.shp")
        coords = []

        # Read Basin_boundary.shp
        with fiona.open(stns_file) as src:
            for _, feature in enumerate(src):

                geometry = feature['geometry']

                centroid = calc_centroid(geometry)

                basin = feature.properties['Basin']

                coords.append([*centroid, basin])

        sf = os.path.join(self.path, "Boundary data", "Boundary data", "Eastern_Siberia_boundary.shp")
        # Read Basin_boundary.shp
        with fiona.open(sf) as src:
            for _, feature in enumerate(src):

                geometry = feature['geometry']

                centroid = calc_centroid(geometry)

                coords.append([*centroid, "Eastern_Siberia"])

        coords = pd.DataFrame(coords, columns=['long', 'lat', 'index']).set_index('index')

        return coords



[docs]
    def stations(self)->List[str]:
        """
        Returns the names of (7) stations available in the dataset.
        """
        return self.database()['Basin'].unique().tolist()



[docs]
    def database(self)->pd.DataFrame:
        """
        Returns the database of the water chemistry in eastern Siberian rivers.
        """
        fpath = os.path.join(self.path, "Sample data", "Sample data", "Samples_database.csv")

        # read the data file with encoding which can handle the special characters
        df = pd.read_csv(fpath, encoding='latin1', index_col=0)

        [df.pop(col) for col in ('Year', 'Month')]

        df.index = pd.to_datetime(df.pop('Data'))

        return df

    

[docs]
    def boundary(self)->pd.DataFrame:
        """
        Returns the boundary data of the water chemistry in eastern Siberian rivers.
        """
        fpath = os.path.join(self.path, "Boundary data", "Boundary data", "Boundary_data.csv")
        # todo
        raise NotImplementedError("The method is not implemented yet.")


    def meteorology(self):
        # todo
        raise NotImplementedError("The method is not implemented yet.")