[1]:
%matplotlib inline

Rainfall-runoff dataset of Laos

[2]:
# sphinx_gallery_thumbnail_number = -1

import site
site.addsitedir('../../..')

from easy_mpl import pie
from ai4water.eda import EDA
from aqua_fetch import MtropicsLaos, ecoli_mekong
from aqua_fetch.utils import print_info

print_info()

**********Tensorflow models could not be imported **********

numpy 1.26.4
pandas 2.1.4
water_datasets 0.1.0
python 3.12.4 | packaged by Anaconda, Inc. | (main, Jun 18 2024, 15:12:24) [GCC 11.2.0]
os posix
matplotlib 3.8.4
xarray 2024.7.0
netCDF4 1.6.2
Script Executed on:  06 August 2024 09:35:03
tot_cpus 112
avail_cpus 112
mem_gib 251.52819442749023
/home/abbaa0a/anaconda3/envs/watds_py312/lib/python3.12/site-packages/sklearn/experimental/enable_hist_gradient_boosting.py:16: UserWarning: Since version 1.0, it is not needed to import enable_hist_gradient_boosting anymore. HistGradientBoostingClassifier and HistGradientBoostingRegressor are now stable and can be normally imported from sklearn.ensemble.
  warnings.warn(
[3]:
laos = MtropicsLaos(path="/mnt/datawaha/hyex/atr/data")

        Not downloading the data since the directory
        /mnt/datawaha/hyex/atr/data/MtropicsLaos already exists.
        Use overwrite=True to remove previously saved files and download again
/home/abbaa0a/water-datasets/water_datasets/mtropics.py:958: UserWarning: preprocessing of shapefiles can not be done because no fiona installation is found.
  warnings.warn("preprocessing of shapefiles can not be done because no fiona installation is found.")

precipitation

[4]:
pcp = laos.fetch_pcp()
print(pcp.shape)
#
(1665361, 1)

weather station

[5]:
w = laos.fetch_weather_station_data()
print(w.shape)
(166536, 4)
[6]:
wl, spm = laos.fetch_hydro()
print(wl.shape)
(454692, 1)
[7]:
print(spm.shape)
(6428, 1)
[8]:
ecoli = laos.fetch_ecoli()
print(ecoli.shape)
(409, 1)
[9]:
print(ecoli.head())
                     Ecoli_mpn100
Date_Time
2011-05-25 10:00:00           NaN
2011-05-25 16:40:00        1100.0
2011-05-25 17:06:00        1400.0
2011-05-25 17:10:00           NaN
2011-05-25 17:24:00       14000.0
[10]:
print(ecoli.tail())

# # %%
ecoli_all = laos.fetch_ecoli(features='all')
print(ecoli_all.shape)
                     Ecoli_mpn100
Date_Time
2021-02-25 14:10:00         250.0
2021-03-07 14:10:00         200.0
2021-03-17 15:11:00         290.0
2021-03-27 15:22:00         720.0
2021-04-06 15:05:00         560.0
(409, 3)
[11]:
ecoli_all.head()
[11]:
Ecoli_LL_mpn100 Ecoli_mpn100 Ecoli_UL_mpn100
Date_Time
2011-05-25 10:00:00 NaN NaN NaN
2011-05-25 16:40:00 715.0 1100.0 1550.0
2011-05-25 17:06:00 1000.0 1400.0 1900.0
2011-05-25 17:10:00 NaN NaN NaN
2011-05-25 17:24:00 9000.0 14000.0 22000.0
[12]:
phy_chem = laos.fetch_physiochem('T_deg')
print(phy_chem.shape)
(411, 1)

pysiochemical attributes

[13]:
phy_chem_all = laos.fetch_physiochem(features='all')
print(phy_chem_all.shape)
(411, 8)

rain gauages

[14]:
rg = laos.fetch_rain_gauges()
print(rg.shape)
(6939, 7)

regression

[15]:
df = laos.make_regression()
print(df.shape)
(650741, 9)
[16]:
df.head()
[16]:
air_temp rel_hum wind_speed sol_rad water_level pcp susp_pm Ecoli_source Ecoli_mpn100
2011-05-25 14:00:00 22.536906 96.167816 7.0 256.0 133.0 0.0 0.9 2.868160e+15 NaN
2011-05-25 14:06:00 22.537042 96.168046 6.7 246.2 133.0 0.0 0.9 2.868160e+15 NaN
2011-05-25 14:12:00 22.537179 96.168276 6.4 236.4 133.0 0.0 0.9 2.868160e+15 NaN
2011-05-25 14:18:00 22.537315 96.168506 6.1 226.6 133.0 0.0 0.9 2.868160e+15 NaN
2011-05-25 14:24:00 22.537452 96.168736 5.8 216.8 133.0 0.0 0.9 2.868160e+15 NaN
[17]:
df = laos.make_regression(lookback_steps=30)
print(df.shape)
(5948, 9)
[18]:
df.head()
[18]:
air_temp rel_hum wind_speed sol_rad water_level pcp susp_pm Ecoli_source Ecoli_mpn100
2011-05-25 14:00:00 22.536906 96.167816 7.0 256.0 133.0 0.0 0.9 2.868160e+15 NaN
2011-05-25 14:06:00 22.537042 96.168046 6.7 246.2 133.0 0.0 0.9 2.868160e+15 NaN
2011-05-25 14:12:00 22.537179 96.168276 6.4 236.4 133.0 0.0 0.9 2.868160e+15 NaN
2011-05-25 14:18:00 22.537315 96.168506 6.1 226.6 133.0 0.0 0.9 2.868160e+15 NaN
2011-05-25 14:24:00 22.537452 96.168736 5.8 216.8 133.0 0.0 0.9 2.868160e+15 NaN
[19]:
print(df.isna().sum())
air_temp           0
rel_hum            0
wind_speed         0
sol_rad            0
water_level        0
pcp                0
susp_pm            0
Ecoli_source       0
Ecoli_mpn100    5690
dtype: int64
[20]:
eda = EDA(data=df)
eda.plot_data(subplots=True, figsize=(14, 20),
              ignore_datetime_index=True)
../_images/_notebooks_mtropics_laos_25_0.png

classification

[21]:
df = laos.make_classification(lookback_steps=30)
print(df.shape)
(5948, 9)
[22]:
df.head()
[22]:
air_temp rel_hum wind_speed sol_rad water_level pcp susp_pm Ecoli_source Ecoli_mpn100
2011-05-25 14:00:00 22.536906 96.167816 7.0 256.0 133.0 0.0 0.9 2.868160e+15 NaN
2011-05-25 14:06:00 22.537042 96.168046 6.7 246.2 133.0 0.0 0.9 2.868160e+15 NaN
2011-05-25 14:12:00 22.537179 96.168276 6.4 236.4 133.0 0.0 0.9 2.868160e+15 NaN
2011-05-25 14:18:00 22.537315 96.168506 6.1 226.6 133.0 0.0 0.9 2.868160e+15 NaN
2011-05-25 14:24:00 22.537452 96.168736 5.8 216.8 133.0 0.0 0.9 2.868160e+15 NaN
[23]:
print(df.isna().sum())
air_temp           0
rel_hum            0
wind_speed         0
sol_rad            0
water_level        0
pcp                0
susp_pm            0
Ecoli_source       0
Ecoli_mpn100    5690
dtype: int64

ecoli_mekong

[24]:
ecoli = ecoli_mekong()
print(ecoli.shape)
(1602, 10)
[25]:
print(ecoli.head())
                    station_name      T     EC  DOpercent    DO    pH   ORP  \
Date_Time
2011-05-25 10:00:00   Houay Pano  24.11  273.0       87.5  7.34  7.67  53.5
2011-05-25 16:40:00   Houay Pano    NaN    NaN        NaN   NaN   NaN   NaN
2011-05-25 17:06:00   Houay Pano    NaN    NaN        NaN   NaN   NaN   NaN
2011-05-25 17:10:00   Houay Pano    NaN    NaN        NaN   NaN   NaN   NaN
2011-05-25 17:24:00   Houay Pano    NaN    NaN        NaN   NaN   NaN   NaN

                     Turbidity       TSS  E-coli_4dilutions
Date_Time
2011-05-25 10:00:00      13.47       NaN                NaN
2011-05-25 16:40:00    1380.00  0.899281             1100.0
2011-05-25 17:06:00    1116.00  0.993190             1400.0
2011-05-25 17:10:00    1392.00  1.107880                NaN
2011-05-25 17:24:00     702.00  1.325030            14000.0
[26]:
_ = pie(df.dropna().iloc[:, -1].values.astype(int), explode=(0, 0.05))
../_images/_notebooks_mtropics_laos_33_0.png