Source code for de.generate_errors

# -*- coding: utf-8 -*-

"""
de.generate_errors
~~~~~~~~~~~
Generate three different error types:
- constant erros
- dynamic errors
- timing errors
:2021 by Robin Schwemmle.
:license: GNU GPLv3, see LICENSE for more details.
"""

import numpy as np
import random
import pandas as pd

# RunTimeWarning will not be displayed (division by zeros or NaN values)
np.seterr(divide="ignore", invalid="ignore")


[docs]def constant(ts, offset=1.5): """ Generate constant errors. Constant errors are generated by multiplying with either constant positive offset or constant negative offset. Parameters ---------- ts : (N,)array_like Observed time series offset : float, optional Offset multiplied to time series. If greater than 1 positive constant offset and if less than 1 negative constant offset. The default is 1.5. Returns ---------- ts_const : array_like Time series with constant error """ ts_const = ts * offset return ts_const
[docs]def negative_dynamic(ts, prop=0.5): """ Generate negative dynamic error (i.e Underestimate high flows - Overestimate low flows) High to medium flows are decreased by linear increasing factors. Medium to low flows are increased by linear increasing factors. Parameters ---------- ts : dataframe Observed time series prop : float, optional Factor by which time series is tilted. Returns ---------- ts_dyn : dataframe Time series with negative dynamic error """ obs_sim = pd.DataFrame(index=ts.index, columns=["Qobs", "Qsim"]) obs_sim.iloc[:, 0] = ts.iloc[:, 0] # sort values by descending order obs_sort = obs_sim.sort_values(by="Qobs", ascending=False) nn = len(obs_sim.index) # factors to decrease/increase runoff downup = np.linspace(1.0 - prop, 1.0 + prop, nn) # tilting the fdc at median obs_sort.iloc[:, 1] = np.multiply(obs_sort.iloc[:, 0].values, downup) # sort by index obs_sim = obs_sort.sort_index() ts_dyn = obs_sim.iloc[:, 1].copy().to_frame() return ts_dyn
[docs]def positive_dynamic(ts, prop=0.5): """ Generate positive dynamic errors (i.e. Overestimate high flows - Underestimate low flows) High to medium flows are increased by linear decreasing factors. Medium to low flows are decreased by linear decreasing factors. Parameters ---------- ts : dataframe Dataframe with time series prop : float, optional Factor by which time series is tilted. Returns ---------- ts_dyn : dataframe Time series with positive dynamic error """ obs_sim = pd.DataFrame(index=ts.index, columns=["Qobs", "Qsim"]) obs_sim.iloc[:, 0] = ts.iloc[:, 0] # sort values by descending order obs_sort = obs_sim.sort_values(by="Qobs", ascending=False) nn = len(obs_sim.index) # factors to decrease/increase runoff updown = np.linspace(1.0 + prop, 1.0 - prop, nn) # tilting the fdc at median obs_sort.iloc[:, 1] = np.multiply(obs_sort.iloc[:, 0].values, updown) # sort by index obs_sim = obs_sort.sort_index() ts_dyn = obs_sim.iloc[:, 1].copy().to_frame() return ts_dyn
[docs]def timing(ts, tshift=3, shuffle=True): """ Generate timing errors. Timing errors are generated by either shifting or shuffling. Parameters ---------- ts : dataframe dataframe with time series tshift : int, optional days by which time series is shifted. Both positive and negative time shift are possible. The default is 3 days. shuffle : boolean, optional If True, time series is shuffled. The default is shuffling. Returns ---------- ts_tim : dataframe Time series with timing error """ if shuffle is False: ts_tim = ts.shift(periods=tshift, fill_value=0) if tshift > 0: ts_tim.iloc[:tshift, 0] = ts.iloc[:, 0].values[-tshift:] elif tshift < 0: ts_tim.iloc[tshift:, 0] = ts.iloc[:, 0].values[:-tshift] elif shuffle is True: ts_tim = ts y = ts_tim.iloc[:, 0].values random.seed(42) random.shuffle(y) ts_tim.iloc[:, 0] = y return ts_tim