diff --git a/scoringrules/_brier.py b/scoringrules/_brier.py index 210dcad..7d48766 100644 --- a/scoringrules/_brier.py +++ b/scoringrules/_brier.py @@ -1,7 +1,7 @@ import typing as tp -from scoringrules.backend import backends from scoringrules.core import brier +from scoringrules.core.utils import binary_array_check, categorical_array_check if tp.TYPE_CHECKING: from scoringrules.core.typing import Array, ArrayLike, Backend @@ -14,30 +14,43 @@ def brier_score( backend: "Backend" = None, ) -> "Array": r""" - Compute the Brier Score (BS). + Brier Score - The BS is formulated as + The Brier Score is defined as .. math:: - BS(f, y) = (f - y)^2, + \text{BS}(F, y) = (F - y)^2, - where :math:`f \in [0, 1]` is the predicted probability of an event and :math:`y \in \{0, 1\}` the actual outcome. + where :math:`F \in [0, 1]` is the predicted probability of an event and :math:`y \in \{0, 1\}` + is the outcome [1]_. Parameters ---------- obs : array_like - Observed outcome, either 0 or 1. + Observed outcomes, either 0 or 1. fct : array_like - Forecasted probabilities between 0 and 1. + Forecast probabilities, between 0 and 1. backend : str - The name of the backend used for computations. Defaults to 'numpy'. + The name of the backend used for computations. Default is 'numpy'. Returns ------- - brier_score : array_like + score : array_like The computed Brier Score. + References + ---------- + .. [1] Brier, G. W. (1950). + Verification of forecasts expressed in terms of probability. + Monthly Weather Review, 78, 1-3. + + Examples + -------- + >>> import scoringrules as sr + >>> sr.brier_score(1, 0.2) + 0.64000 """ + obs, fct = binary_array_check(obs, fct, backend=backend) return brier.brier_score(obs=obs, fct=fct, backend=backend) @@ -46,45 +59,68 @@ def rps_score( fct: "ArrayLike", k_axis: int = -1, *, + onehot: bool = False, backend: "Backend" = None, ) -> "Array": r""" - Compute the (Discrete) Ranked Probability Score (RPS). + (Discrete) Ranked Probability Score (RPS) Suppose the outcome corresponds to one of :math:`K` ordered categories. The RPS is defined as .. math:: - RPS(f, y) = \sum_{k=1}^{K}(\tilde{f}_{k} - \tilde{y}_{k})^2, + \text{RPS}(F, y) = \sum_{k=1}^{K}(\tilde{F}_{k} - \tilde{y}_{k})^2, - where :math:`f \in [0, 1]^{K}` is a vector of length :math:`K` containing forecast probabilities - that each of the :math:`K` categories will occur, and :math:`y \in \{0, 1\}^{K}` is a vector of - length :math:`K`, with the :math:`k`-th element equal to one if the :math:`k`-th category occurs. We - have :math:`\sum_{k=1}^{K} y_{k} = \sum_{k=1}^{K} f_{k} = 1`, and, for :math:`k = 1, \dots, K`, - :math:`\tilde{y}_{k} = \sum_{i=1}^{k} y_{i}` and :math:`\tilde{f}_{k} = \sum_{i=1}^{k} f_{i}`. + where :math:`F \in [0, 1]^{K}` is a vector of length :math:`K`, containing forecast probabilities + that each of the :math:`K` categories will occur, with :math:`\sum_{k=1}^{K} F_{k} = 1` and + :math:`\tilde{F}_{k} = \sum_{i=1}^{k} F_{i}` for all :math:`k = 1, \dots, K`, and where + :math:`y \in \{1, \dots, K\}` is the category that occurs, with :math:`\tilde{y}_{k} = 1\{y \le i\}` + for all :math:`k = 1, \dots, K` [1]_. + + The outcome can alternatively be interpreted as a vector :math:`y \in \{0, 1\}^K` of length :math:`K`, with the + :math:`k`-th element equal to one if the :math:`k`-th category occurs, and zero otherwise. + Using this one-hot encoding, the RPS is defined analogously to as above, but with + :math:`\tilde{y}_{k} = \sum_{i=1}^{k} y_{i}`. Parameters ---------- obs : array_like - Array of 0's and 1's corresponding to unobserved and observed categories - forecasts : + Category that occurs. Or array of 0's and 1's corresponding to unobserved and + observed categories if `onehot=True`. + fct : array Array of forecast probabilities for each category. k_axis: int - The axis corresponding to the categories. Default is the last axis. + The axis of `obs` and `fct` corresponding to the categories. Default is the last axis. + onehot: bool + Boolean indicating whether the observation is the category that occurs or a onehot + encoded vector of 0's and 1's. Default is False. backend : str - The name of the backend used for computations. Defaults to 'numpy'. + The name of the backend used for computations. Default is 'numpy'. Returns ------- - score: + score: array_like The computed Ranked Probability Score. + References + ---------- + .. [1] Epstein, E. S. (1969). + A scoring system for probability forecasts of ranked categories. + Journal of Applied Meteorology, 8, 985-987. + https://www.jstor.org/stable/26174707. + + Examples + -------- + >>> import scoringrules as sr + >>> import numpy as np + >>> fct = np.array([0.1, 0.2, 0.3, 0.4]) + >>> obs = 3 + >>> sr.rps_score(obs, fct) + 0.25999999999999995 + >>> obs = np.array([0, 0, 1, 0]) + >>> sr.rps_score(obs, fct, onehot=True) + 0.25999999999999995 """ - B = backends.active if backend is None else backends[backend] - fct = B.asarray(fct) - - if k_axis != -1: - fct = B.moveaxis(fct, k_axis, -1) - + obs, fct = categorical_array_check(obs, fct, k_axis, onehot, backend=backend) return brier.rps_score(obs=obs, fct=fct, backend=backend) @@ -119,6 +155,7 @@ def log_score( The computed Log Score. """ + obs, fct = binary_array_check(obs, fct, backend=backend) return brier.log_score(obs=obs, fct=fct, backend=backend) @@ -127,6 +164,7 @@ def rls_score( fct: "ArrayLike", k_axis: int = -1, *, + onehot: bool = False, backend: "Backend" = None, ) -> "Array": r""" @@ -151,6 +189,9 @@ def rls_score( Forecasted probabilities between 0 and 1. k_axis: int The axis corresponding to the categories. Default is the last axis. + onehot: bool + Boolean indicating whether the observation is the category that occurs or a onehot + encoded vector of 0's and 1's. Default is False. backend : str The name of the backend used for computations. Defaults to 'numpy'. @@ -160,12 +201,7 @@ def rls_score( The computed Ranked Logarithmic Score. """ - B = backends.active if backend is None else backends[backend] - fct = B.asarray(fct) - - if k_axis != -1: - fct = B.moveaxis(fct, k_axis, -1) - + obs, fct = categorical_array_check(obs, fct, k_axis, onehot, backend=backend) return brier.rls_score(obs=obs, fct=fct, backend=backend) diff --git a/scoringrules/_crps.py b/scoringrules/_crps.py index e3602d3..0800520 100644 --- a/scoringrules/_crps.py +++ b/scoringrules/_crps.py @@ -2,6 +2,13 @@ from scoringrules.backend import backends from scoringrules.core import crps, stats +from scoringrules.core.utils import ( + univariate_array_check, + estimator_check, + uv_weighted_score_weights, + uv_weighted_score_chain, + univariate_sort_ens, +) if tp.TYPE_CHECKING: from scoringrules.core.typing import Array, ArrayLike, Backend @@ -95,29 +102,13 @@ def crps_ensemble( >>> sr.crps_ensemble(obs, pred) array([0.69605316, 0.32865417, 0.39048665]) """ - B = backends.active if backend is None else backends[backend] - obs, fct = map(B.asarray, (obs, fct)) - - if m_axis != -1: - fct = B.moveaxis(fct, m_axis, -1) - - if not sorted_ensemble and estimator not in [ - "nrg", - "akr", - "akr_circperm", - "fair", - ]: - fct = B.sort(fct, axis=-1) - + obs, fct = univariate_array_check(obs, fct, m_axis, backend=backend) + fct = univariate_sort_ens(fct, estimator, sorted_ensemble, backend=backend) if backend == "numba": - if estimator not in crps.estimator_gufuncs: - raise ValueError( - f"{estimator} is not a valid estimator. " - f"Must be one of {crps.estimator_gufuncs.keys()}" - ) + estimator_check(estimator, crps.estimator_gufuncs) return crps.estimator_gufuncs[estimator](obs, fct) - - return crps.ensemble(obs, fct, estimator, backend=backend) + else: + return crps.ensemble(obs, fct, estimator, backend=backend) def twcrps_ensemble( @@ -202,14 +193,9 @@ def twcrps_ensemble( >>> sr.twcrps_ensemble(obs, fct, v_func=v_func) array([0.69605316, 0.32865417, 0.39048665]) """ - if v_func is None: - B = backends.active if backend is None else backends[backend] - a, b, obs, fct = map(B.asarray, (a, b, obs, fct)) - - def v_func(x): - return B.minimum(B.maximum(x, a), b) - - obs, fct = map(v_func, (obs, fct)) + obs, fct = uv_weighted_score_chain( + obs, fct, a=a, b=b, v_func=v_func, backend=backend + ) return crps_ensemble( obs, fct, @@ -300,25 +286,12 @@ def owcrps_ensemble( >>> sr.owcrps_ensemble(obs, fct, w_func=w_func) array([0.91103733, 0.45212402, 0.35686667]) """ - - B = backends.active if backend is None else backends[backend] - obs, fct = map(B.asarray, (obs, fct)) - - if m_axis != -1: - fct = B.moveaxis(fct, m_axis, -1) - - if w_func is None: - - def w_func(x): - return ((a <= x) & (x <= b)) * 1.0 - - obs_weights, fct_weights = map(w_func, (obs, fct)) - obs_weights, fct_weights = map(B.asarray, (obs_weights, fct_weights)) - + obs, fct = univariate_array_check(obs, fct, m_axis, backend=backend) + obs_w, fct_w = uv_weighted_score_weights(obs, fct, a, b, w_func, backend=backend) if backend == "numba": - return crps.estimator_gufuncs["ownrg"](obs, fct, obs_weights, fct_weights) - - return crps.ow_ensemble(obs, fct, obs_weights, fct_weights, backend=backend) + return crps.estimator_gufuncs["ownrg"](obs, fct, obs_w, fct_w) + else: + return crps.ow_ensemble(obs, fct, obs_w, fct_w, backend=backend) def vrcrps_ensemble( @@ -399,24 +372,12 @@ def vrcrps_ensemble( >>> sr.vrcrps_ensemble(obs, fct, w_func) array([0.90036433, 0.41515255, 0.41653833]) """ - B = backends.active if backend is None else backends[backend] - obs, fct = map(B.asarray, (obs, fct)) - - if m_axis != -1: - fct = B.moveaxis(fct, m_axis, -1) - - if w_func is None: - - def w_func(x): - return ((a <= x) & (x <= b)) * 1.0 - - obs_weights, fct_weights = map(w_func, (obs, fct)) - obs_weights, fct_weights = map(B.asarray, (obs_weights, fct_weights)) - + obs, fct = univariate_array_check(obs, fct, m_axis, backend=backend) + obs_w, fct_w = uv_weighted_score_weights(obs, fct, a, b, w_func, backend=backend) if backend == "numba": - return crps.estimator_gufuncs["vrnrg"](obs, fct, obs_weights, fct_weights) - - return crps.vr_ensemble(obs, fct, obs_weights, fct_weights, backend=backend) + return crps.estimator_gufuncs["vrnrg"](obs, fct, obs_w, fct_w) + else: + return crps.vr_ensemble(obs, fct, obs_w, fct_w, backend=backend) def crps_quantile( @@ -473,10 +434,11 @@ def crps_quantile( # TODO: add example """ B = backends.active if backend is None else backends[backend] - obs, fct, alpha = map(B.asarray, (obs, fct, alpha)) + obs, fct = univariate_array_check(obs, fct, m_axis, backend=backend) - if m_axis != -1: - fct = B.moveaxis(fct, m_axis, -1) + alpha = B.asarray(alpha) + if B.any(alpha <= 0) or B.any(alpha >= 1): + raise ValueError("`alpha` contains entries that are not between 0 and 1.") if not fct.shape[-1] == alpha.shape[-1]: raise ValueError("Expected matching length of `fct` and `alpha` values.") diff --git a/scoringrules/_dss.py b/scoringrules/_dss.py index 299c7a4..4857151 100644 --- a/scoringrules/_dss.py +++ b/scoringrules/_dss.py @@ -1,8 +1,7 @@ import typing as tp -from scoringrules.backend import backends from scoringrules.core import dss -from scoringrules.core.utils import multivariate_array_check +from scoringrules.core.utils import univariate_array_check, multivariate_array_check if tp.TYPE_CHECKING: from scoringrules.core.typing import Array, Backend @@ -45,16 +44,11 @@ def dssuv_ensemble( score: Array The computed Dawid-Sebastiani Score. """ - B = backends.active if backend is None else backends[backend] - obs, fct = map(B.asarray, (obs, fct)) - - if m_axis != -1: - fct = B.moveaxis(fct, m_axis, -1) - + obs, fct = univariate_array_check(obs, fct, m_axis, backend=backend) if backend == "numba": return dss._dss_uv_gufunc(obs, fct, bias) - - return dss.ds_score_uv(obs, fct, bias, backend=backend) + else: + return dss.ds_score_uv(obs, fct, bias, backend=backend) def dssmv_ensemble( @@ -99,8 +93,7 @@ def dssmv_ensemble( The computed Dawid-Sebastiani Score. """ obs, fct = multivariate_array_check(obs, fct, m_axis, v_axis, backend=backend) - if backend == "numba": return dss._dss_mv_gufunc(obs, fct, bias) - - return dss.ds_score_mv(obs, fct, bias, backend=backend) + else: + return dss.ds_score_mv(obs, fct, bias, backend=backend) diff --git a/scoringrules/_energy.py b/scoringrules/_energy.py index 4ff2231..3902ee5 100644 --- a/scoringrules/_energy.py +++ b/scoringrules/_energy.py @@ -1,8 +1,12 @@ import typing as tp -from scoringrules.backend import backends from scoringrules.core import energy -from scoringrules.core.utils import multivariate_array_check +from scoringrules.core.utils import ( + multivariate_array_check, + estimator_check, + mv_weighted_score_chain, + mv_weighted_score_weights, +) if tp.TYPE_CHECKING: from scoringrules.core.typing import Array, ArrayLike, Backend @@ -61,18 +65,12 @@ def es_ensemble( :ref:`theory.multivariate` Some theoretical background on scoring rules for multivariate forecasts. """ - backend = backend if backend is not None else backends._active obs, fct = multivariate_array_check(obs, fct, m_axis, v_axis, backend=backend) - if backend == "numba": - if estimator not in energy.estimator_gufuncs: - raise ValueError( - f"{estimator} is not a valid estimator. " - f"Must be one of {energy.estimator_gufuncs.keys()}" - ) + estimator_check(estimator, energy.estimator_gufuncs) return energy.estimator_gufuncs[estimator](obs, fct) - - return energy.es(obs, fct, estimator=estimator, backend=backend) + else: + return energy.es(obs, fct, estimator=estimator, backend=backend) def twes_ensemble( @@ -122,7 +120,7 @@ def twes_ensemble( twes_ensemble : array_like The computed Threshold-Weighted Energy Score. """ - obs, fct = map(v_func, (obs, fct)) + obs, fct = mv_weighted_score_chain(obs, fct, v_func) return es_ensemble( obs, fct, m_axis=m_axis, v_axis=v_axis, estimator=estimator, backend=backend ) @@ -175,17 +173,12 @@ def owes_ensemble( owes_ensemble : array_like The computed Outcome-Weighted Energy Score. """ - B = backends.active if backend is None else backends[backend] - obs, fct = multivariate_array_check(obs, fct, m_axis, v_axis, backend=backend) - - fct_weights = B.apply_along_axis(w_func, fct, -1) - obs_weights = B.apply_along_axis(w_func, obs, -1) - - if B.name == "numba": - return energy.estimator_gufuncs["ownrg"](obs, fct, obs_weights, fct_weights) - - return energy.owes(obs, fct, obs_weights, fct_weights, backend=backend) + obs_w, fct_w = mv_weighted_score_weights(obs, fct, w_func=w_func, backend=backend) + if backend == "numba": + return energy.estimator_gufuncs["ownrg"](obs, fct, obs_w, fct_w) + else: + return energy.owes(obs, fct, obs_w, fct_w, backend=backend) def vres_ensemble( @@ -236,14 +229,9 @@ def vres_ensemble( vres_ensemble : array_like The computed Vertically Re-scaled Energy Score. """ - B = backends.active if backend is None else backends[backend] - obs, fct = multivariate_array_check(obs, fct, m_axis, v_axis, backend=backend) - - fct_weights = B.apply_along_axis(w_func, fct, -1) - obs_weights = B.apply_along_axis(w_func, obs, -1) - + obs_w, fct_w = mv_weighted_score_weights(obs, fct, w_func=w_func, backend=backend) if backend == "numba": - return energy.estimator_gufuncs["vrnrg"](obs, fct, obs_weights, fct_weights) - - return energy.vres(obs, fct, obs_weights, fct_weights, backend=backend) + return energy.estimator_gufuncs["vrnrg"](obs, fct, obs_w, fct_w) + else: + return energy.vres(obs, fct, obs_w, fct_w, backend=backend) diff --git a/scoringrules/_error_spread.py b/scoringrules/_error_spread.py index 4d4566d..a975c25 100644 --- a/scoringrules/_error_spread.py +++ b/scoringrules/_error_spread.py @@ -1,15 +1,15 @@ import typing as tp -from scoringrules.backend import backends from scoringrules.core import error_spread +from scoringrules.core.utils import univariate_array_check if tp.TYPE_CHECKING: from scoringrules.core.typing import Array, ArrayLike, Backend def error_spread_score( - observations: "ArrayLike", - forecasts: "Array", + obs: "ArrayLike", + fct: "Array", m_axis: int = -1, *, backend: "Backend" = None, @@ -18,9 +18,9 @@ def error_spread_score( Parameters ---------- - observations: ArrayLike + obs: ArrayLike The observed values. - forecasts: Array + fct: Array The predicted forecast ensemble, where the ensemble dimension is by default represented by the last axis. m_axis: int @@ -33,13 +33,8 @@ def error_spread_score( - Array An array of error spread scores for each ensemble forecast, which should be averaged to get meaningful values. """ - B = backends.active if backend is None else backends[backend] - observations, forecasts = map(B.asarray, (observations, forecasts)) - - if m_axis != -1: - forecasts = B.moveaxis(forecasts, m_axis, -1) - - if B.name == "numba": - return error_spread._ess_gufunc(observations, forecasts) - - return error_spread.ess(observations, forecasts, backend=backend) + obs, fct = univariate_array_check(obs, fct, m_axis, backend=backend) + if backend == "numba": + return error_spread._ess_gufunc(obs, fct) + else: + return error_spread.ess(obs, fct, backend=backend) diff --git a/scoringrules/_interval.py b/scoringrules/_interval.py index d6d7492..893514d 100644 --- a/scoringrules/_interval.py +++ b/scoringrules/_interval.py @@ -177,5 +177,5 @@ def weighted_interval_score( if B.name == "numba": return interval._weighted_interval_score_gufunc(*args) - - return interval.weighted_interval_score(*args, backend=backend) + else: + return interval.weighted_interval_score(*args, backend=backend) diff --git a/scoringrules/_kernels.py b/scoringrules/_kernels.py index bae2dbb..7e9ca38 100644 --- a/scoringrules/_kernels.py +++ b/scoringrules/_kernels.py @@ -1,8 +1,14 @@ import typing as tp -from scoringrules.backend import backends from scoringrules.core import kernels -from scoringrules.core.utils import multivariate_array_check +from scoringrules.core.utils import ( + univariate_array_check, + multivariate_array_check, + estimator_check, + uv_weighted_score_weights, + uv_weighted_score_chain, + mv_weighted_score_weights, +) if tp.TYPE_CHECKING: from scoringrules.core.typing import Array, ArrayLike, Backend @@ -56,22 +62,12 @@ def gksuv_ensemble( >>> import scoringrules as sr >>> sr.gks_ensemble(obs, pred) """ - B = backends.active if backend is None else backends[backend] - obs, fct = map(B.asarray, (obs, fct)) - - if m_axis != -1: - fct = B.moveaxis(fct, m_axis, -1) - + obs, fct = univariate_array_check(obs, fct, m_axis, backend=backend) if backend == "numba": - if estimator not in kernels.estimator_gufuncs: - raise ValueError( - f"{estimator} is not a valid estimator. " - f"Must be one of {kernels.estimator_gufuncs.keys()}" - ) - else: - return kernels.estimator_gufuncs[estimator](obs, fct) - - return kernels.ensemble_uv(obs, fct, estimator, backend=backend) + estimator_check(estimator, kernels.estimator_gufuncs_uv) + return kernels.estimator_gufuncs_uv[estimator](obs, fct) + else: + return kernels.ensemble_uv(obs, fct, estimator=estimator, backend=backend) def twgksuv_ensemble( @@ -139,14 +135,7 @@ def twgksuv_ensemble( >>> >>> sr.twgksuv_ensemble(obs, pred, v_func=v_func) """ - if v_func is None: - B = backends.active if backend is None else backends[backend] - a, b, obs, fct = map(B.asarray, (a, b, obs, fct)) - - def v_func(x): - return B.minimum(B.maximum(x, a), b) - - obs, fct = map(v_func, (obs, fct)) + obs, fct = uv_weighted_score_chain(obs, fct, a, b, v_func, backend=backend) return gksuv_ensemble( obs, fct, @@ -219,24 +208,12 @@ def owgksuv_ensemble( >>> >>> sr.owgksuv_ensemble(obs, pred, w_func=w_func) """ - B = backends.active if backend is None else backends[backend] - obs, fct = map(B.asarray, (obs, fct)) - - if m_axis != -1: - fct = B.moveaxis(fct, m_axis, -1) - - if w_func is None: - - def w_func(x): - return ((a <= x) & (x <= b)) * 1.0 - - obs_weights, fct_weights = map(w_func, (obs, fct)) - obs_weights, fct_weights = map(B.asarray, (obs_weights, fct_weights)) - + obs, fct = univariate_array_check(obs, fct, m_axis, backend=backend) + obs_w, fct_w = uv_weighted_score_weights(obs, fct, a, b, w_func, backend=backend) if backend == "numba": - return kernels.estimator_gufuncs["ow"](obs, fct, obs_weights, fct_weights) - - return kernels.ow_ensemble_uv(obs, fct, obs_weights, fct_weights, backend=backend) + return kernels.estimator_gufuncs_uv["ow"](obs, fct, obs_w, fct_w) + else: + return kernels.ow_ensemble_uv(obs, fct, obs_w, fct_w, backend=backend) def vrgksuv_ensemble( @@ -301,24 +278,12 @@ def vrgksuv_ensemble( >>> >>> sr.vrgksuv_ensemble(obs, pred, w_func=w_func) """ - B = backends.active if backend is None else backends[backend] - obs, fct = map(B.asarray, (obs, fct)) - - if m_axis != -1: - fct = B.moveaxis(fct, m_axis, -1) - - if w_func is None: - - def w_func(x): - return ((a <= x) & (x <= b)) * 1.0 - - obs_weights, fct_weights = map(w_func, (obs, fct)) - obs_weights, fct_weights = map(B.asarray, (obs_weights, fct_weights)) - + obs, fct = univariate_array_check(obs, fct, m_axis, backend=backend) + obs_w, fct_w = uv_weighted_score_weights(obs, fct, a, b, w_func, backend=backend) if backend == "numba": - return kernels.estimator_gufuncs["vr"](obs, fct, obs_weights, fct_weights) - - return kernels.vr_ensemble_uv(obs, fct, obs_weights, fct_weights, backend=backend) + return kernels.estimator_gufuncs_uv["vr"](obs, fct, obs_w, fct_w) + else: + return kernels.vr_ensemble_uv(obs, fct, obs_w, fct_w, backend=backend) def gksmv_ensemble( @@ -371,19 +336,12 @@ def gksmv_ensemble( score : array_like The GKS between the forecast ensemble and obs. """ - backend = backend if backend is not None else backends._active obs, fct = multivariate_array_check(obs, fct, m_axis, v_axis, backend=backend) - if backend == "numba": - if estimator not in kernels.estimator_gufuncs_mv: - raise ValueError( - f"{estimator} is not a valid estimator. " - f"Must be one of {kernels.estimator_gufuncs_mv.keys()}" - ) - else: - return kernels.estimator_gufuncs_mv[estimator](obs, fct) - - return kernels.ensemble_mv(obs, fct, estimator, backend=backend) + estimator_check(estimator, kernels.estimator_gufuncs_mv) + return kernels.estimator_gufuncs_mv[estimator](obs, fct) + else: + return kernels.ensemble_mv(obs, fct, estimator=estimator, backend=backend) def twgksmv_ensemble( @@ -393,6 +351,7 @@ def twgksmv_ensemble( m_axis: int = -2, v_axis: int = -1, *, + estimator: str = "nrg", backend: "Backend" = None, ) -> "Array": r"""Compute the Threshold-Weighted Gaussian Kernel Score (twGKS) for a finite multivariate ensemble. @@ -425,6 +384,8 @@ def twgksmv_ensemble( The axis corresponding to the ensemble dimension. Defaults to -2. v_axis : int or tuple of ints The axis corresponding to the variables dimension. Defaults to -1. + estimator : str + Indicates the estimator to be used. backend : str The name of the backend used for computations. Defaults to 'numba' if available, else 'numpy'. @@ -434,7 +395,9 @@ def twgksmv_ensemble( The computed Threshold-Weighted Gaussian Kernel Score. """ obs, fct = map(v_func, (obs, fct)) - return gksmv_ensemble(obs, fct, m_axis=m_axis, v_axis=v_axis, backend=backend) + return gksmv_ensemble( + obs, fct, m_axis=m_axis, v_axis=v_axis, estimator=estimator, backend=backend + ) def owgksmv_ensemble( @@ -499,17 +462,12 @@ def owgksmv_ensemble( score : array_like The computed Outcome-Weighted GKS. """ - B = backends.active if backend is None else backends[backend] - obs, fct = multivariate_array_check(obs, fct, m_axis, v_axis, backend=backend) - - fct_weights = B.apply_along_axis(w_func, fct, -1) - obs_weights = B.apply_along_axis(w_func, obs, -1) - - if B.name == "numba": - return kernels.estimator_gufuncs_mv["ow"](obs, fct, obs_weights, fct_weights) - - return kernels.ow_ensemble_mv(obs, fct, obs_weights, fct_weights, backend=backend) + obs_w, fct_w = mv_weighted_score_weights(obs, fct, w_func=w_func, backend=backend) + if backend == "numba": + return kernels.estimator_gufuncs_mv["ow"](obs, fct, obs_w, fct_w) + else: + return kernels.ow_ensemble_mv(obs, fct, obs_w, fct_w, backend=backend) def vrgksmv_ensemble( @@ -560,17 +518,12 @@ def vrgksmv_ensemble( score : array_like The computed Vertically Re-scaled Gaussian Kernel Score. """ - B = backends.active if backend is None else backends[backend] - obs, fct = multivariate_array_check(obs, fct, m_axis, v_axis, backend=backend) - - fct_weights = B.apply_along_axis(w_func, fct, -1) - obs_weights = B.apply_along_axis(w_func, obs, -1) - - if B.name == "numba": - return kernels.estimator_gufuncs_mv["vr"](obs, fct, obs_weights, fct_weights) - - return kernels.vr_ensemble_mv(obs, fct, obs_weights, fct_weights, backend=backend) + obs_w, fct_w = mv_weighted_score_weights(obs, fct, w_func=w_func, backend=backend) + if backend == "numba": + return kernels.estimator_gufuncs_mv["vr"](obs, fct, obs_w, fct_w) + else: + return kernels.vr_ensemble_mv(obs, fct, obs_w, fct_w, backend=backend) __all__ = [ diff --git a/scoringrules/_logs.py b/scoringrules/_logs.py index d153d83..85c7694 100644 --- a/scoringrules/_logs.py +++ b/scoringrules/_logs.py @@ -2,6 +2,7 @@ from scoringrules.backend import backends from scoringrules.core import logarithmic +from scoringrules.core.utils import univariate_array_check if tp.TYPE_CHECKING: from scoringrules.core.typing import Array, ArrayLike, Backend @@ -50,12 +51,9 @@ def logs_ensemble( >>> sr.logs_ensemble(obs, pred) """ B = backends.active if backend is None else backends[backend] - obs, fct = map(B.asarray, (obs, fct)) + obs, fct = univariate_array_check(obs, fct, m_axis, backend=backend) - if m_axis != -1: - fct = B.moveaxis(fct, m_axis, -1) - - M = fct.shape[-1] + M = fct.shape[-1] # number of ensemble members # Silverman's rule of thumb for estimating the bandwidth parameter if bw is None: @@ -66,7 +64,7 @@ def logs_ensemble( bw = 1.06 * B.minimum(sigmahat, iqr / 1.34) * (M ** (-1 / 5)) bw = B.stack([bw] * M, axis=-1) - w = B.zeros(fct.shape) + 1 / M + w = B.ones(fct.shape) / M return logarithmic.mixnorm(obs, fct, bw, w, backend=backend) @@ -125,12 +123,9 @@ def clogs_ensemble( >>> sr.clogs_ensemble(obs, pred, -1.0, 1.0) """ B = backends.active if backend is None else backends[backend] - fct = B.asarray(fct) - - if m_axis != -1: - fct = B.moveaxis(fct, m_axis, -1) + obs, fct = univariate_array_check(obs, fct, m_axis, backend=backend) - M = fct.shape[-1] + M = fct.shape[-1] # number of ensemble members # Silverman's rule of thumb for estimating the bandwidth parameter if bw is None: @@ -707,7 +702,7 @@ def logs_mixnorm( if w is None: M: int = m.shape[mc_axis] - w = B.zeros(m.shape) + 1 / M + w = B.ones(m.shape) / M else: w = B.asarray(w) diff --git a/scoringrules/_variogram.py b/scoringrules/_variogram.py index 4ff690c..6028f65 100644 --- a/scoringrules/_variogram.py +++ b/scoringrules/_variogram.py @@ -2,7 +2,12 @@ from scoringrules.backend import backends from scoringrules.core import variogram -from scoringrules.core.utils import multivariate_array_check +from scoringrules.core.utils import ( + multivariate_array_check, + estimator_check, + mv_weighted_score_chain, + mv_weighted_score_weights, +) if tp.TYPE_CHECKING: from scoringrules.core.typing import Array, Backend @@ -11,6 +16,7 @@ def vs_ensemble( obs: "Array", fct: "Array", + w: "Array" = None, m_axis: int = -2, v_axis: int = -1, *, @@ -23,7 +29,7 @@ def vs_ensemble( For a :math:`D`-variate ensemble the Variogram Score [1]_ is defined as: .. math:: - \text{VS}_{p}(F_{ens}, \mathbf{y})= \sum_{i=1}^{d} \sum_{j=1}^{d} + \text{VS}_{p}(F_{ens}, \mathbf{y})= \sum_{i=1}^{d} \sum_{j=1}^{d} w_{i,j} \left( \frac{1}{M} \sum_{m=1}^{M} | x_{m,i} - x_{m,j} |^{p} - | y_{i} - y_{j} |^{p} \right)^{2}, where :math:`\mathbf{X}` and :math:`\mathbf{X'}` are independently sampled ensembles from from :math:`F`. @@ -36,12 +42,15 @@ def vs_ensemble( fct : array_like The predicted forecast ensemble, where the ensemble dimension is by default represented by the second last axis and the variables dimension by the last axis. - p : float - The order of the Variogram Score. Typical values are 0.5, 1.0 or 2.0. Defaults to 1.0. + w : array_like + The weights assigned to pairs of dimensions. Must be of shape (..., D, D), where + D is the dimension, so that the weights are in the last two axes. m_axis : int The axis corresponding to the ensemble dimension. Defaults to -2. v_axis : int The axis corresponding to the variables dimension. Defaults to -1. + p : float + The order of the Variogram Score. Typical values are 0.5, 1.0 or 2.0. Defaults to 0.5. estimator : str The variogram score estimator to be used. backend: str @@ -68,21 +77,27 @@ def vs_ensemble( >>> sr.vs_ensemble(obs, fct) array([ 8.65630139, 6.84693866, 19.52993307]) """ + B = backends.active if backend is None else backends[backend] obs, fct = multivariate_array_check(obs, fct, m_axis, v_axis, backend=backend) - if backend == "numba": - if estimator == "nrg": - return variogram._variogram_score_nrg_gufunc(obs, fct, p) - elif estimator == "fair": - return variogram._variogram_score_fair_gufunc(obs, fct, p) + if w is None: + D = fct.shape[-1] + w = B.ones(obs.shape + (D,)) + else: + w = B.asarray(w) - return variogram.vs(obs, fct, p, estimator=estimator, backend=backend) + if backend == "numba": + estimator_check(estimator, variogram.estimator_gufuncs) + return variogram.estimator_gufuncs[estimator](obs, fct, w, p) + else: + return variogram.vs(obs, fct, w, p, estimator=estimator, backend=backend) def twvs_ensemble( obs: "Array", fct: "Array", v_func: tp.Callable, + w: "Array" = None, m_axis: int = -2, v_axis: int = -1, *, @@ -108,14 +123,17 @@ def twvs_ensemble( fct : array_like The predicted forecast ensemble, where the ensemble dimension is by default represented by the second last axis and the variables dimension by the last axis. - p : float - The order of the Variogram Score. Typical values are 0.5, 1.0 or 2.0. Defaults to 1.0. + w : array_like + The weights assigned to pairs of dimensions. Must be of shape (..., D, D), where + D is the dimension, so that the weights are in the last two axes. v_func : callable, array_like -> array_like Chaining function used to emphasise particular outcomes. m_axis : int The axis corresponding to the ensemble dimension. Defaults to -2. v_axis : int The axis corresponding to the variables dimension. Defaults to -1. + p : float + The order of the Variogram Score. Typical values are 0.5, 1.0 or 2.0. Defaults to 0.5. estimator : str The variogram score estimator to be used. backend : str @@ -143,9 +161,9 @@ def twvs_ensemble( >>> sr.twvs_ensemble(obs, fct, lambda x: np.maximum(x, -0.2)) array([5.94996894, 4.72029765, 6.08947229]) """ - obs, fct = map(v_func, (obs, fct)) + obs, fct = mv_weighted_score_chain(obs, fct, v_func) return vs_ensemble( - obs, fct, m_axis, v_axis, p=p, estimator=estimator, backend=backend + obs, fct, w, m_axis, v_axis, p=p, estimator=estimator, backend=backend ) @@ -153,6 +171,7 @@ def owvs_ensemble( obs: "Array", fct: "Array", w_func: tp.Callable, + w: "Array" = None, m_axis: int = -2, v_axis: int = -1, *, @@ -182,14 +201,17 @@ def owvs_ensemble( fct : array_like The predicted forecast ensemble, where the ensemble dimension is by default represented by the second last axis and the variables dimension by the last axis. - p : float - The order of the Variogram Score. Typical values are 0.5, 1.0 or 2.0. Defaults to 1.0. w_func : callable, array_like -> array_like Weight function used to emphasise particular outcomes. + w : array_like + The weights assigned to pairs of dimensions. Must be of shape (..., D, D), where + D is the dimension, so that the weights are in the last two axes. m_axis : int The axis corresponding to the ensemble dimension. Defaults to -2. v_axis : int The axis corresponding to the variables dimension. Defaults to -1. + p : float + The order of the Variogram Score. Typical values are 0.5, 1.0 or 2.0. Defaults to 0.5. backend : str The name of the backend used for computations. Defaults to 'numba' if available, else 'numpy'. @@ -209,24 +231,24 @@ def owvs_ensemble( array([ 9.86816636, 6.75532522, 19.59353723]) """ B = backends.active if backend is None else backends[backend] - obs, fct = multivariate_array_check(obs, fct, m_axis, v_axis, backend=backend) + obs_w, fct_w = mv_weighted_score_weights(obs, fct, w_func=w_func, backend=backend) - obs_weights = B.apply_along_axis(w_func, obs, -1) - fct_weights = B.apply_along_axis(w_func, fct, -1) + if w is None: + D = fct.shape[-1] + w = B.ones(obs.shape + (D,)) if backend == "numba": - return variogram._owvariogram_score_gufunc( - obs, fct, p, obs_weights, fct_weights - ) - - return variogram.owvs(obs, fct, obs_weights, fct_weights, p=p, backend=backend) + return variogram.estimator_gufuncs["ownrg"](obs, fct, w, obs_w, fct_w, p) + else: + return variogram.owvs(obs, fct, w, obs_w, fct_w, p=p, backend=backend) def vrvs_ensemble( obs: "Array", fct: "Array", w_func: tp.Callable, + w: "Array" = None, m_axis: int = -2, v_axis: int = -1, *, @@ -258,14 +280,17 @@ def vrvs_ensemble( fct : array_like The predicted forecast ensemble, where the ensemble dimension is by default represented by the second last axis and the variables dimension by the last axis. - p : float - The order of the Variogram Score. Typical values are 0.5, 1.0 or 2.0. Defaults to 1.0. w_func : callable, array_like -> array_like Weight function used to emphasise particular outcomes. + w : array_like + The weights assigned to pairs of dimensions. Must be of shape (..., D, D), where + D is the dimension, so that the weights are in the last two axes. m_axis : int The axis corresponding to the ensemble dimension. Defaults to -2. v_axis : int The axis corresponding to the variables dimension. Defaults to -1. + p : float + The order of the Variogram Score. Typical values are 0.5, 1.0 or 2.0. Defaults to 0.5. backend : str The name of the backend used for computations. Defaults to 'numba' if available, else 'numpy'. @@ -285,15 +310,14 @@ def vrvs_ensemble( array([46.48256493, 57.90759816, 92.37153472]) """ B = backends.active if backend is None else backends[backend] - obs, fct = multivariate_array_check(obs, fct, m_axis, v_axis, backend=backend) + obs_w, fct_w = mv_weighted_score_weights(obs, fct, w_func=w_func, backend=backend) - obs_weights = B.apply_along_axis(w_func, obs, -1) - fct_weights = B.apply_along_axis(w_func, fct, -1) + if w is None: + D = fct.shape[-1] + w = B.ones(obs.shape + (D,)) if backend == "numba": - return variogram._vrvariogram_score_gufunc( - obs, fct, p, obs_weights, fct_weights - ) - - return variogram.vrvs(obs, fct, obs_weights, fct_weights, p=p, backend=backend) + return variogram.estimator_gufuncs["vrnrg"](obs, fct, w, obs_w, fct_w, p) + else: + return variogram.vrvs(obs, fct, w, obs_w, fct_w, p=p, backend=backend) diff --git a/scoringrules/backend/base.py b/scoringrules/backend/base.py index 5e0e710..1f99917 100644 --- a/scoringrules/backend/base.py +++ b/scoringrules/backend/base.py @@ -160,6 +160,15 @@ def zeros( ) -> "Array": """Return a new array having a specified ``shape`` and filled with zeros.""" + @abc.abstractmethod + def ones( + self, + shape: int | tuple[int, ...], + *, + dtype: Dtype | None = None, + ) -> "Array": + """Return a new array having a specified ``shape`` and filled with ones.""" + @abc.abstractmethod def abs(self, x: "Array", /) -> "Array": """Calculate the absolute value for each element ``x_i`` of the input array ``x``.""" @@ -202,6 +211,17 @@ def any( ) -> "Array": """Test whether any input array element evaluates to ``True`` along a specified axis.""" + @abc.abstractmethod + def argsort( + self, + x: "Array", + /, + *, + axis: int = -1, + descending: bool = False, + ) -> "Array": + """Return the indices of a sorted copy of an input array ``x``.""" + @abc.abstractmethod def sort( self, @@ -299,6 +319,10 @@ def size(self, x: "Array") -> int: def indices(self, x: "Array") -> int: """Return an array representing the indices of a grid.""" + @abc.abstractmethod + def gather(self, x: "Array", ind: "Array", axis: int) -> "Array": + """Reorder an array ``x`` depending on a template ``ind`` across an axis ``axis``.""" + @abc.abstractmethod def roll(self, x: "Array", shift: int = 1, axis: int = -1) -> int: """Roll elements of an array along a given axis.""" diff --git a/scoringrules/backend/jax.py b/scoringrules/backend/jax.py index 279781a..9cf4074 100644 --- a/scoringrules/backend/jax.py +++ b/scoringrules/backend/jax.py @@ -149,6 +149,11 @@ def zeros( ) -> "Array": return jnp.zeros(shape, dtype=dtype) + def ones( + self, shape: int | tuple[int, ...], *, dtype: Dtype | None = None + ) -> "Array": + return jnp.ones(shape, dtype=dtype) + def abs(self, x: "Array") -> "Array": return jnp.abs(x) @@ -184,6 +189,16 @@ def all( ) -> "Array": return jnp.all(x, axis=axis, keepdims=keepdims) + def argsort( + self, + x: "Array", + /, + *, + axis: int = -1, + descending: bool = False, + ) -> "Array": + return jnp.argsort(x, axis=axis, descending=descending) + def sort( self, x: "Array", @@ -271,6 +286,9 @@ def size(self, x: "Array") -> int: def indices(self, dimensions: tuple) -> "Array": return jnp.indices(dimensions) + def gather(self, x: "Array", ind: "Array", axis: int) -> "Array": + return jnp.take_along_axis(x, ind, axis=axis) + def roll(self, x: "Array", shift: int = 1, axis: int = -1) -> "Array": return jnp.roll(x, shift=shift, axis=axis) diff --git a/scoringrules/backend/numpy.py b/scoringrules/backend/numpy.py index 03f78d5..6cbfc08 100644 --- a/scoringrules/backend/numpy.py +++ b/scoringrules/backend/numpy.py @@ -153,6 +153,11 @@ def zeros( ) -> "NDArray": return np.zeros(shape, dtype=dtype) + def ones( + self, shape: int | tuple[int, ...], *, dtype: Dtype | None = None + ) -> "NDArray": + return np.ones(shape, dtype=dtype) + def abs(self, x: "NDArray") -> "NDArray": return np.abs(x) @@ -188,6 +193,17 @@ def all( ) -> "NDArray": return np.all(x, axis=axis, keepdims=keepdims) + def argsort( + self, + x: "NDArray", + /, + *, + axis: int = -1, + descending: bool = False, + ) -> "NDArray": + x = -x if descending else x + return np.argsort(x, axis=axis) + def sort( self, x: "NDArray", @@ -267,6 +283,9 @@ def size(self, x: "NDArray") -> int: def indices(self, dimensions: tuple) -> "NDArray": return np.indices(dimensions) + def gather(self, x: "NDArray", ind: "NDArray", axis: int) -> "NDArray": + return np.take_along_axis(x, ind, axis=axis) + def roll(self, x: "NDArray", shift: int = 1, axis: int = -1) -> "NDArray": return np.roll(x, shift=shift, axis=axis) diff --git a/scoringrules/backend/tensorflow.py b/scoringrules/backend/tensorflow.py index 70060c3..9d58712 100644 --- a/scoringrules/backend/tensorflow.py +++ b/scoringrules/backend/tensorflow.py @@ -168,6 +168,16 @@ def zeros( dtype = DTYPE return tf.zeros(shape, dtype=dtype) + def ones( + self, + shape: int | tuple[int, ...], + *, + dtype: Dtype | None = None, + ) -> "Tensor": + if dtype is None: + dtype = DTYPE + return tf.ones(shape, dtype=dtype) + def abs(self, x: "Tensor") -> "Tensor": return tf.math.abs(x) @@ -216,6 +226,17 @@ def all( dtype = DTYPE return tf.cast(tf.math.reduce_all(x, axis=axis, keepdims=keepdims), dtype=dtype) + def argsort( + self, + x: "Tensor", + /, + *, + axis: int = -1, + descending: bool = False, + ) -> "Tensor": + direction = "DESCENDING" if descending else "ASCENDING" + return tf.argsort(x, axis=axis, direction=direction) + def sort( self, x: "Tensor", @@ -306,6 +327,10 @@ def indices(self, dimensions: tuple) -> "Tensor": indices = tf.stack(index_grids) return indices + def gather(self, x: "Tensor", ind: "Tensor", axis: int) -> "Tensor": + d = len(x.shape) + return tf.gather(x, ind, axis=axis, batch_dims=d) + def roll(self, x: "Tensor", shift: int = 1, axis: int = -1) -> "Tensor": return tf.roll(x, shift=shift, axis=axis) diff --git a/scoringrules/backend/torch.py b/scoringrules/backend/torch.py index 1ef9831..1eb141c 100644 --- a/scoringrules/backend/torch.py +++ b/scoringrules/backend/torch.py @@ -157,6 +157,14 @@ def zeros( ) -> "Tensor": return torch.zeros(shape, dtype=dtype) + def ones( + self, + shape: int | tuple[int, ...], + *, + dtype: Dtype | None = None, + ) -> "Tensor": + return torch.ones(shape, dtype=dtype) + def abs(self, x: "Tensor") -> "Tensor": return torch.abs(x) @@ -195,6 +203,17 @@ def all( else: return torch.all(x, dim=axis, keepdim=keepdims) + def argsort( + self, + x: "Tensor", + /, + *, + axis: int = -1, + descending: bool = False, + stable: bool = True, + ) -> "Tensor": + return torch.argsort(x, stable=stable, dim=axis, descending=descending) + def sort( self, x: "Tensor", @@ -289,6 +308,9 @@ def indices(self, dimensions: tuple) -> "Tensor": indices = torch.stack(index_grids) return indices + def gather(self, x: "Tensor", ind: "Tensor", axis: int) -> "Tensor": + return torch.gather(x, index=ind, dim=axis) + def roll(self, x: "Tensor", shift: int = 1, axis: int = -1) -> "Tensor": return torch.roll(x, shifts=shift, dims=axis) diff --git a/scoringrules/core/brier.py b/scoringrules/core/brier.py index e66fbae..18f05ed 100644 --- a/scoringrules/core/brier.py +++ b/scoringrules/core/brier.py @@ -12,16 +12,7 @@ def brier_score( obs: "ArrayLike", fct: "ArrayLike", backend: "Backend" = None ) -> "Array": """Compute the Brier Score for predicted probabilities of events.""" - B = backends.active if backend is None else backends[backend] - obs, fct = map(B.asarray, (obs, fct)) - - if B.any(fct < 0.0) or B.any(fct > 1.0 + EPSILON): - raise ValueError("Forecasted probabilities must be within 0 and 1.") - - if not set(v.item() for v in B.unique_values(obs)) <= {0, 1}: - raise ValueError("Observations must be 0, 1, or NaN.") - - return B.asarray((fct - obs) ** 2) + return (fct - obs) ** 2 def rps_score( @@ -31,30 +22,12 @@ def rps_score( ) -> "Array": """Compute the Ranked Probability Score for ordinal categorical forecasts.""" B = backends.active if backend is None else backends[backend] - obs, fct = map(B.asarray, (obs, fct)) - - if B.any(fct < 0.0) or B.any(fct > 1.0 + EPSILON): - raise ValueError("Forecast probabilities must be between 0 and 1.") - - categories = B.arange(1, fct.shape[-1] + 1) - obs_one_hot = B.where(B.expand_dims(obs, -1) == categories, 1, 0) - - return B.sum( - (B.cumsum(fct, axis=-1) - B.cumsum(obs_one_hot, axis=-1)) ** 2, axis=-1 - ) + return B.sum((B.cumsum(fct, axis=-1) - B.cumsum(obs, axis=-1)) ** 2, axis=-1) def log_score(obs: "ArrayLike", fct: "ArrayLike", backend: "Backend" = None) -> "Array": """Compute the Log Score for predicted probabilities of binary events.""" B = backends.active if backend is None else backends[backend] - obs, fct = map(B.asarray, (obs, fct)) - - if B.any(fct < 0.0) or B.any(fct > 1.0 + EPSILON): - raise ValueError("Forecasted probabilities must be within 0 and 1.") - - if not set(v.item() for v in B.unique_values(obs)) <= {0, 1}: - raise ValueError("Observations must be 0, 1, or NaN.") - return B.asarray(-B.log(B.abs(fct + obs - 1.0))) @@ -65,15 +38,7 @@ def rls_score( ) -> "Array": """Compute the Ranked Logarithmic Score for ordinal categorical forecasts.""" B = backends.active if backend is None else backends[backend] - obs, fct = map(B.asarray, (obs, fct)) - - if B.any(fct < 0.0) or B.any(fct > 1.0 + EPSILON): - raise ValueError("Forecast probabilities must be between 0 and 1.") - - categories = B.arange(1, fct.shape[-1] + 1) - obs_one_hot = B.where(B.expand_dims(obs, -1) == categories, 1, 0) - return B.sum( - -B.log(B.abs(B.cumsum(fct, axis=-1) + B.cumsum(obs_one_hot, axis=-1) - 1.0)), + -B.log(B.abs(B.cumsum(fct, axis=-1) + B.cumsum(obs, axis=-1) - 1.0)), axis=-1, ) diff --git a/scoringrules/core/crps/__init__.py b/scoringrules/core/crps/__init__.py index c54b179..ddeae70 100644 --- a/scoringrules/core/crps/__init__.py +++ b/scoringrules/core/crps/__init__.py @@ -1,4 +1,4 @@ -from ._approx import ensemble, ow_ensemble, quantile_pinball, vr_ensemble +from ._approx import ensemble, ow_ensemble, vr_ensemble, quantile_pinball from ._closed import ( beta, binomial, diff --git a/scoringrules/core/crps/_gufuncs.py b/scoringrules/core/crps/_gufuncs.py index 1e2ed1d..e3ecc2b 100644 --- a/scoringrules/core/crps/_gufuncs.py +++ b/scoringrules/core/crps/_gufuncs.py @@ -1,7 +1,5 @@ -import math - import numpy as np -from numba import guvectorize, njit, vectorize +from numba import guvectorize from scoringrules.core.utils import lazy_gufunc_wrapper_uv @@ -250,57 +248,12 @@ def _vrcrps_ensemble_nrg_gufunc( out[0] = e_1 / M - 0.5 * e_2 / (M**2) + (wabs_x - wabs_y) * (wbar - ow) -@njit(["float32(float32)", "float64(float64)"]) -def _norm_cdf(x: float) -> float: - """Cumulative distribution function for the standard normal distribution.""" - out: float = (1.0 + math.erf(x / math.sqrt(2.0))) / 2.0 - return out - - -@njit(["float32(float32)", "float64(float64)"]) -def _norm_pdf(x: float) -> float: - """Probability density function for the standard normal distribution.""" - out: float = (1 / math.sqrt(2 * math.pi)) * math.exp(-(x**2) / 2) - return out - - -@njit(["float32(float32)", "float64(float64)"]) -def _logis_cdf(x: float) -> float: - """Cumulative distribution function for the standard logistic distribution.""" - out: float = 1.0 / (1.0 + math.exp(-x)) - return out - - -@vectorize(["float32(float32, float32, float32)", "float64(float64, float64, float64)"]) -def _crps_normal_ufunc(obs: float, mu: float, sigma: float) -> float: - ω = (obs - mu) / sigma - out: float = sigma * (ω * (2 * _norm_cdf(ω) - 1) + 2 * _norm_pdf(ω) - INV_SQRT_PI) - return out - - -@vectorize(["float32(float32, float32, float32)", "float64(float64, float64, float64)"]) -def _crps_lognormal_ufunc(obs: float, mulog: float, sigmalog: float) -> float: - ω = (np.log(obs) - mulog) / sigmalog - ex = 2 * np.exp(mulog + sigmalog**2 / 2) - out: float = obs * (2 * _norm_cdf(ω) - 1) - ex * ( - _norm_cdf(ω - sigmalog) + _norm_cdf(sigmalog / np.sqrt(2)) - 1 - ) - return out - - -@vectorize(["float32(float32, float32, float32)", "float64(float64, float64, float64)"]) -def _crps_logistic_ufunc(obs: float, mu: float, sigma: float) -> float: - ω = (obs - mu) / sigma - out: float = sigma * (ω - 2 * np.log(_logis_cdf(ω)) - 1) - return out - - estimator_gufuncs = { "akr_circperm": lazy_gufunc_wrapper_uv(_crps_ensemble_akr_circperm_gufunc), "akr": lazy_gufunc_wrapper_uv(_crps_ensemble_akr_gufunc), "fair": _crps_ensemble_fair_gufunc, "int": lazy_gufunc_wrapper_uv(_crps_ensemble_int_gufunc), - "nrg": lazy_gufunc_wrapper_uv(_crps_ensemble_nrg_gufunc), + "nrg": _crps_ensemble_nrg_gufunc, "pwm": lazy_gufunc_wrapper_uv(_crps_ensemble_pwm_gufunc), "qd": _crps_ensemble_qd_gufunc, "ownrg": lazy_gufunc_wrapper_uv(_owcrps_ensemble_nrg_gufunc), @@ -315,8 +268,5 @@ def _crps_logistic_ufunc(obs: float, mu: float, sigma: float) -> float: "_crps_ensemble_nrg_gufunc", "_crps_ensemble_pwm_gufunc", "_crps_ensemble_qd_gufunc", - "_crps_normal_ufunc", - "_crps_lognormal_ufunc", - "_crps_logistic_ufunc", "quantile_pinball_gufunc", ] diff --git a/scoringrules/core/energy/_gufuncs.py b/scoringrules/core/energy/_gufuncs.py index bd38fba..2359760 100644 --- a/scoringrules/core/energy/_gufuncs.py +++ b/scoringrules/core/energy/_gufuncs.py @@ -19,7 +19,6 @@ def _energy_score_nrg_gufunc( ): """Compute the Energy Score for a finite ensemble.""" M = fct.shape[0] - e_1 = 0.0 e_2 = 0.0 for i in range(M): @@ -45,7 +44,6 @@ def _energy_score_fair_gufunc( ): """Compute the fair Energy Score for a finite ensemble.""" M = fct.shape[0] - e_1 = 0.0 e_2 = 0.0 for i in range(M): @@ -56,12 +54,10 @@ def _energy_score_fair_gufunc( out[0] = e_1 / M - 0.5 / (M * (M - 1)) * e_2 -@lazy_gufunc_wrapper_mv @guvectorize("(d),(m,d)->()") def _energy_score_akr_gufunc(obs: np.ndarray, fct: np.ndarray, out: np.ndarray): """Compute the Energy Score for a finite ensemble using the approximate kernel representation.""" M = fct.shape[0] - e_1 = 0.0 e_2 = 0.0 for i in range(M): @@ -71,14 +67,12 @@ def _energy_score_akr_gufunc(obs: np.ndarray, fct: np.ndarray, out: np.ndarray): out[0] = e_1 / M - 0.5 * 1 / M * e_2 -@lazy_gufunc_wrapper_mv @guvectorize("(d),(m,d)->()") def _energy_score_akr_circperm_gufunc( obs: np.ndarray, fct: np.ndarray, out: np.ndarray ): """Compute the Energy Score for a finite ensemble using the AKR with cyclic permutation.""" M = fct.shape[0] - e_1 = 0.0 e_2 = 0.0 for i in range(M): @@ -89,7 +83,6 @@ def _energy_score_akr_circperm_gufunc( out[0] = e_1 / M - 0.5 * 1 / M * e_2 -@lazy_gufunc_wrapper_mv @guvectorize("(d),(m,d),(),(m)->()") def _owenergy_score_gufunc( obs: np.ndarray, @@ -100,7 +93,6 @@ def _owenergy_score_gufunc( ): """Compute the Outcome-Weighted Energy Score for a finite ensemble.""" M = fct.shape[0] - e_1 = 0.0 e_2 = 0.0 for i in range(M): @@ -113,7 +105,6 @@ def _owenergy_score_gufunc( out[0] = e_1 / (M * wbar) - 0.5 * e_2 / (M**2 * wbar**2) -@lazy_gufunc_wrapper_mv @guvectorize("(d),(m,d),(),(m)->()") def _vrenergy_score_gufunc( obs: np.ndarray, @@ -124,7 +115,6 @@ def _vrenergy_score_gufunc( ): """Compute the Vertically Re-scaled Energy Score for a finite ensemble.""" M = fct.shape[0] - e_1 = 0.0 e_2 = 0.0 wabs_x = 0.0 @@ -142,12 +132,12 @@ def _vrenergy_score_gufunc( estimator_gufuncs = { - "akr_circperm": _energy_score_akr_circperm_gufunc, - "akr": _energy_score_akr_gufunc, - "fair": _energy_score_fair_gufunc, - "nrg": _energy_score_nrg_gufunc, - "ownrg": _owenergy_score_gufunc, - "vrnrg": _vrenergy_score_gufunc, + "akr_circperm": lazy_gufunc_wrapper_mv(_energy_score_akr_circperm_gufunc), + "akr": lazy_gufunc_wrapper_mv(_energy_score_akr_gufunc), + "fair": lazy_gufunc_wrapper_mv(_energy_score_fair_gufunc), + "nrg": lazy_gufunc_wrapper_mv(_energy_score_nrg_gufunc), + "ownrg": lazy_gufunc_wrapper_mv(_owenergy_score_gufunc), + "vrnrg": lazy_gufunc_wrapper_mv(_vrenergy_score_gufunc), } __all__ = [ diff --git a/scoringrules/core/energy/_score.py b/scoringrules/core/energy/_score.py index ad9f63c..1dd6ec5 100644 --- a/scoringrules/core/energy/_score.py +++ b/scoringrules/core/energy/_score.py @@ -98,18 +98,17 @@ def owes_ensemble( ) -> "Array": """Compute the outcome-weighted energy score based on a finite ensemble.""" B = backends.active if backend is None else backends[backend] - M = fct.shape[-2] - wbar = B.sum(fw, -1) / M + wbar = B.mean(fw, axis=-1) err_norm = B.norm(fct - B.expand_dims(obs, -2), -1) # (... M) - E_1 = B.sum(err_norm * fw * B.expand_dims(ow, -1), -1) / (M * wbar) # (...) + E_1 = B.mean(err_norm * fw * B.expand_dims(ow, -1), axis=-1) / wbar # (...) spread_norm = B.norm( B.expand_dims(fct, -2) - B.expand_dims(fct, -3), -1 ) # (... M M) fw_prod = B.expand_dims(fw, -1) * B.expand_dims(fw, -2) # (... M M) spread_norm *= fw_prod * B.expand_dims(ow, (-2, -1)) # (... M M) - E_2 = B.sum(spread_norm, (-2, -1)) / (M**2 * wbar**2) # (...) + E_2 = B.mean(spread_norm, axis=(-2, -1)) / (wbar**2) # (...) return E_1 - 0.5 * E_2 @@ -123,19 +122,18 @@ def vres_ensemble( ) -> "Array": """Compute the vertically re-scaled energy score based on a finite ensemble.""" B = backends.active if backend is None else backends[backend] - M, D = fct.shape[-2:] - wbar = B.sum(fw, -1) / M + wbar = B.mean(fw, axis=-1) err_norm = B.norm(fct - B.expand_dims(obs, -2), -1) # (... M) err_norm *= fw * B.expand_dims(ow, -1) # (... M) - E_1 = B.sum(err_norm, -1) / M # (...) + E_1 = B.mean(err_norm, axis=-1) # (...) spread_norm = B.norm( B.expand_dims(fct, -2) - B.expand_dims(fct, -3), -1 ) # (... M M) fw_prod = B.expand_dims(fw, -2) * B.expand_dims(fw, -1) # (... M M) - E_2 = B.sum(spread_norm * fw_prod, (-2, -1)) / (M**2) # (...) + E_2 = B.mean(spread_norm * fw_prod, axis=(-2, -1)) # (...) - rhobar = B.sum(B.norm(fct, -1) * fw, -1) / M # (...) + rhobar = B.mean(B.norm(fct, -1) * fw, axis=-1) # (...) E_3 = (rhobar - B.norm(obs, -1) * ow) * (wbar - ow) # (...) return E_1 - 0.5 * E_2 + E_3 diff --git a/scoringrules/core/kernels/__init__.py b/scoringrules/core/kernels/__init__.py index d0377b7..2d02ba9 100644 --- a/scoringrules/core/kernels/__init__.py +++ b/scoringrules/core/kernels/__init__.py @@ -8,15 +8,12 @@ ) try: - from ._gufuncs import estimator_gufuncs -except ImportError: - estimator_gufuncs = None - -try: - from ._gufuncs import estimator_gufuncs_mv + from ._gufuncs import estimator_gufuncs_uv, estimator_gufuncs_mv except ImportError: + estimator_gufuncs_uv = None estimator_gufuncs_mv = None + __all__ = [ "ensemble_uv", "ow_ensemble_uv", diff --git a/scoringrules/core/kernels/_gufuncs.py b/scoringrules/core/kernels/_gufuncs.py index 90fe8fa..e07fa84 100644 --- a/scoringrules/core/kernels/_gufuncs.py +++ b/scoringrules/core/kernels/_gufuncs.py @@ -20,7 +20,6 @@ def _gauss_kern_mv(x1: float, x2: float) -> float: return out -@lazy_gufunc_wrapper_uv @guvectorize("(),(n)->()") def _ks_ensemble_uv_nrg_gufunc(obs: np.ndarray, fct: np.ndarray, out: np.ndarray): """Standard version of the kernel score.""" @@ -33,16 +32,15 @@ def _ks_ensemble_uv_nrg_gufunc(obs: np.ndarray, fct: np.ndarray, out: np.ndarray e_1 = 0 e_2 = 0 - for x_i in fct: - e_1 += _gauss_kern_uv(x_i, obs) - for x_j in fct: - e_2 += _gauss_kern_uv(x_i, x_j) + for i in range(M): + e_1 += _gauss_kern_uv(fct[i], obs) + for j in range(M): + e_2 += _gauss_kern_uv(fct[i], fct[j]) e_3 = _gauss_kern_uv(obs, obs) - out[0] = -(e_1 / M - 0.5 * e_2 / (M**2) - 0.5 * e_3) + out[0] = -((e_1 / M) - 0.5 * (e_2 / (M**2)) - 0.5 * e_3) -@lazy_gufunc_wrapper_uv @guvectorize("(),(n)->()") def _ks_ensemble_uv_fair_gufunc(obs: np.ndarray, fct: np.ndarray, out: np.ndarray): """Fair version of the kernel score.""" @@ -58,13 +56,12 @@ def _ks_ensemble_uv_fair_gufunc(obs: np.ndarray, fct: np.ndarray, out: np.ndarra for i in range(M): e_1 += _gauss_kern_uv(fct[i], obs) for j in range(i + 1, M): # important to start from i + 1 and not i - e_2 += 2 * _gauss_kern_uv(fct[j], fct[i]) + e_2 += _gauss_kern_uv(fct[j], fct[i]) e_3 = _gauss_kern_uv(obs, obs) - out[0] = -(e_1 / M - 0.5 * e_2 / (M * (M - 1)) - 0.5 * e_3) + out[0] = -((e_1 / M) - e_2 / (M * (M - 1)) - 0.5 * e_3) -@lazy_gufunc_wrapper_uv @guvectorize("(),(n)->()") def _ks_ensemble_uv_akr_gufunc(obs: np.ndarray, fct: np.ndarray, out: np.ndarray): """Approximate kernel representation estimator of the kernel score.""" @@ -84,7 +81,6 @@ def _ks_ensemble_uv_akr_gufunc(obs: np.ndarray, fct: np.ndarray, out: np.ndarray out[0] = -(e_1 / M - 0.5 * e_2 / M - 0.5 * e_3) -@lazy_gufunc_wrapper_uv @guvectorize("(),(n)->()") def _ks_ensemble_uv_akr_circperm_gufunc( obs: np.ndarray, fct: np.ndarray, out: np.ndarray @@ -107,7 +103,6 @@ def _ks_ensemble_uv_akr_circperm_gufunc( out[0] = -(e_1 / M - 0.5 * e_2 / M - 0.5 * e_3) -@lazy_gufunc_wrapper_uv @guvectorize("(),(n),(),(n)->()") def _owks_ensemble_uv_gufunc( obs: np.ndarray, @@ -134,10 +129,9 @@ def _owks_ensemble_uv_gufunc( wbar = np.mean(fw) - out[0] = -(e_1 / (M * wbar) - 0.5 * e_2 / ((M * wbar) ** 2) - 0.5 * e_3) + out[0] = -(e_1 / (M * wbar) - 0.5 * e_2 / (M**2 * wbar**2) - 0.5 * e_3) -@lazy_gufunc_wrapper_uv @guvectorize("(),(n),(),(n)->()") def _vrks_ensemble_uv_gufunc( obs: np.ndarray, @@ -162,10 +156,9 @@ def _vrks_ensemble_uv_gufunc( e_2 += _gauss_kern_uv(x_i, x_j) * fw[i] * fw[j] e_3 = _gauss_kern_uv(obs, obs) * ow * ow - out[0] = -(e_1 / M - 0.5 * e_2 / (M**2) - 0.5 * e_3) + out[0] = -((e_1 / M) - 0.5 * (e_2 / (M**2)) - 0.5 * e_3) -@lazy_gufunc_wrapper_mv @guvectorize("(d),(m,d)->()") def _ks_ensemble_mv_nrg_gufunc(obs: np.ndarray, fct: np.ndarray, out: np.ndarray): """Standard version of the multivariate kernel score.""" @@ -179,10 +172,9 @@ def _ks_ensemble_mv_nrg_gufunc(obs: np.ndarray, fct: np.ndarray, out: np.ndarray e_2 += float(_gauss_kern_mv(fct[i], fct[j])) e_3 = float(_gauss_kern_mv(obs, obs)) - out[0] = -(e_1 / M - 0.5 * e_2 / (M**2) - 0.5 * e_3) + out[0] = -((e_1 / M) - 0.5 * (e_2 / (M**2)) - 0.5 * e_3) -@lazy_gufunc_wrapper_mv @guvectorize("(d),(m,d)->()") def _ks_ensemble_mv_fair_gufunc(obs: np.ndarray, fct: np.ndarray, out: np.ndarray): """Fair version of the multivariate kernel score.""" @@ -199,7 +191,6 @@ def _ks_ensemble_mv_fair_gufunc(obs: np.ndarray, fct: np.ndarray, out: np.ndarra out[0] = -(e_1 / M - e_2 / (M * (M - 1)) - 0.5 * e_3) -@lazy_gufunc_wrapper_mv @guvectorize("(d),(m,d)->()") def _ks_ensemble_mv_akr_gufunc(obs: np.ndarray, fct: np.ndarray, out: np.ndarray): """Approximate kernel representation estimator of the multivariate kernel score.""" @@ -215,7 +206,6 @@ def _ks_ensemble_mv_akr_gufunc(obs: np.ndarray, fct: np.ndarray, out: np.ndarray out[0] = -(e_1 / M - 0.5 * e_2 / M - 0.5 * e_3) -@lazy_gufunc_wrapper_mv @guvectorize("(d),(m,d)->()") def _ks_ensemble_mv_akr_circperm_gufunc( obs: np.ndarray, fct: np.ndarray, out: np.ndarray @@ -234,7 +224,6 @@ def _ks_ensemble_mv_akr_circperm_gufunc( out[0] = -(e_1 / M - 0.5 * e_2 / M - 0.5 * e_3) -@lazy_gufunc_wrapper_mv @guvectorize("(d),(m,d),(),(m)->()") def _owks_ensemble_mv_gufunc( obs: np.ndarray, @@ -259,7 +248,6 @@ def _owks_ensemble_mv_gufunc( out[0] = -(e_1 / (M * wbar) - 0.5 * e_2 / (M**2 * wbar**2) - 0.5 * e_3) -@lazy_gufunc_wrapper_mv @guvectorize("(d),(m,d),(),(m)->()") def _vrks_ensemble_mv_gufunc( obs: np.ndarray, @@ -279,25 +267,25 @@ def _vrks_ensemble_mv_gufunc( e_2 += float(_gauss_kern_mv(fct[i], fct[j]) * fw[i] * fw[j]) e_3 = float(_gauss_kern_mv(obs, obs)) * ow * ow - out[0] = -(e_1 / M - 0.5 * e_2 / (M**2) - 0.5 * e_3) + out[0] = -((e_1 / M) - 0.5 * (e_2 / (M**2)) - 0.5 * e_3) -estimator_gufuncs = { - "akr": _ks_ensemble_uv_akr_gufunc, - "akr_circperm": _ks_ensemble_uv_akr_circperm_gufunc, - "fair": _ks_ensemble_uv_fair_gufunc, - "nrg": _ks_ensemble_uv_nrg_gufunc, - "ow": _owks_ensemble_uv_gufunc, - "vr": _vrks_ensemble_uv_gufunc, +estimator_gufuncs_uv = { + "akr": lazy_gufunc_wrapper_uv(_ks_ensemble_uv_akr_gufunc), + "akr_circperm": lazy_gufunc_wrapper_uv(_ks_ensemble_uv_akr_circperm_gufunc), + "fair": lazy_gufunc_wrapper_uv(_ks_ensemble_uv_fair_gufunc), + "nrg": lazy_gufunc_wrapper_uv(_ks_ensemble_uv_nrg_gufunc), + "ow": lazy_gufunc_wrapper_uv(_owks_ensemble_uv_gufunc), + "vr": lazy_gufunc_wrapper_uv(_vrks_ensemble_uv_gufunc), } estimator_gufuncs_mv = { - "akr": _ks_ensemble_mv_akr_gufunc, - "akr_circperm": _ks_ensemble_mv_akr_circperm_gufunc, - "fair": _ks_ensemble_mv_fair_gufunc, - "nrg": _ks_ensemble_mv_nrg_gufunc, - "ow": _owks_ensemble_mv_gufunc, - "vr": _vrks_ensemble_mv_gufunc, + "akr": lazy_gufunc_wrapper_mv(_ks_ensemble_mv_akr_gufunc), + "akr_circperm": lazy_gufunc_wrapper_mv(_ks_ensemble_mv_akr_circperm_gufunc), + "fair": lazy_gufunc_wrapper_mv(_ks_ensemble_mv_fair_gufunc), + "nrg": lazy_gufunc_wrapper_mv(_ks_ensemble_mv_nrg_gufunc), + "ow": lazy_gufunc_wrapper_mv(_owks_ensemble_mv_gufunc), + "vr": lazy_gufunc_wrapper_mv(_vrks_ensemble_mv_gufunc), } __all__ = [ diff --git a/scoringrules/core/utils.py b/scoringrules/core/utils.py index 6aa1c13..47450f9 100644 --- a/scoringrules/core/utils.py +++ b/scoringrules/core/utils.py @@ -2,11 +2,14 @@ from scoringrules.backend import backends -_V_AXIS = -1 -_M_AXIS = -2 +_V_AXIS = -1 # variable index for multivariate forecasts +_M_AXIS = -2 # ensemble index for multivariate forecasts +_M_AXIS_UV = -1 # ensemble index for univariate forecasts +_K_AXIS = -1 # categories index for categorical forecasts +EPSILON = 1e-5 -def _multivariate_shape_compatibility(obs, fct, m_axis) -> None: +def _shape_compatibility_check(obs, fct, m_axis) -> None: f_shape = fct.shape o_shape = obs.shape o_shape_broadcast = o_shape[:m_axis] + (f_shape[m_axis],) + o_shape[m_axis:] @@ -16,6 +19,53 @@ def _multivariate_shape_compatibility(obs, fct, m_axis) -> None: ) +def binary_array_check(obs, fct, backend=None): + """Check and adapt the shapes of binary forecasts and observations arrays.""" + B = backends.active if backend is None else backends[backend] + obs, fct = map(B.asarray, (obs, fct)) + if B.any(fct < 0.0) or B.any(fct > 1.0 + EPSILON): + raise ValueError("Forecasted probabilities must be within 0 and 1.") + if not set(v.item() for v in B.unique_values(obs)) <= {0, 1}: + raise ValueError("Observations must be 0, 1, or NaN.") + return obs, fct + + +def categorical_array_check(obs, fct, k_axis, onehot, backend=None): + """Check and adapt the shapes of categorical forecasts and observations arrays.""" + B = backends.active if backend is None else backends[backend] + obs, fct = map(B.asarray, (obs, fct)) + if B.any(fct < 0.0) or B.any(fct > 1.0 + EPSILON): + raise ValueError("Forecasted probabilities must be within 0 and 1.") + if k_axis != -1: + fct = B.moveaxis(fct, k_axis, _K_AXIS) + if onehot: + obs = B.moveaxis(obs, k_axis, _K_AXIS) + if onehot: + if obs.shape != fct.shape: + raise ValueError( + f"Forecasts shape {fct.shape} and observations shape {obs.shape} are not compatible for broadcasting!" + ) + else: + if obs.shape != fct.shape[:-1]: + raise ValueError( + f"Forecasts shape {fct.shape} and observations shape {obs.shape} are not compatible for broadcasting!" + ) + categories = B.arange(1, fct.shape[-1] + 1) + obs = B.where(B.expand_dims(obs, -1) == categories, 1, 0) + return obs, fct + + +def univariate_array_check(obs, fct, m_axis, backend=None): + """Check and adapt the shapes of univariate forecasts and observations arrays.""" + B = backends.active if backend is None else backends[backend] + obs, fct = map(B.asarray, (obs, fct)) + m_axis = m_axis if m_axis >= 0 else fct.ndim + m_axis + _shape_compatibility_check(obs, fct, m_axis) + if m_axis != _M_AXIS_UV: + fct = B.moveaxis(fct, m_axis, _M_AXIS_UV) + return obs, fct + + def _multivariate_shape_permute(obs, fct, m_axis, v_axis, backend=None): B = backends.active if backend is None else backends[backend] v_axis_obs = v_axis - 1 if m_axis < v_axis else v_axis @@ -30,10 +80,81 @@ def multivariate_array_check(obs, fct, m_axis, v_axis, backend=None): obs, fct = map(B.asarray, (obs, fct)) m_axis = m_axis if m_axis >= 0 else fct.ndim + m_axis v_axis = v_axis if v_axis >= 0 else fct.ndim + v_axis - _multivariate_shape_compatibility(obs, fct, m_axis) + _shape_compatibility_check(obs, fct, m_axis) return _multivariate_shape_permute(obs, fct, m_axis, v_axis, backend=backend) +def estimator_check(estimator, gufuncs): + """Check that the estimator is valid for the given score.""" + if estimator not in gufuncs: + raise ValueError( + f"{estimator} is not a valid estimator. " f"Must be one of {gufuncs.keys()}" + ) + + +def uv_weighted_score_weights( + obs, fct, a=float("-inf"), b=float("inf"), w_func=None, backend=None +): + """Calculate weights for a weighted scoring rule corresponding to the observations + and ensemble members given a weight function.""" + B = backends.active if backend is None else backends[backend] + if w_func is None: + + def w_func(x): + return ((a <= x) & (x <= b)) * 1.0 + + obs_w, fct_w = map(w_func, (obs, fct)) + obs_w, fct_w = map(B.asarray, (obs_w, fct_w)) + if B.any(obs_w < 0) or B.any(fct_w < 0): + raise ValueError("`w_func` returns negative values") + return obs_w, fct_w + + +def uv_weighted_score_chain( + obs, fct, a=float("-inf"), b=float("inf"), v_func=None, backend=None +): + """Calculate transformed observations and ensemble members for threshold-weighted scoring rules given a chaining function.""" + B = backends.active if backend is None else backends[backend] + if v_func is None: + a, b, obs, fct = map(B.asarray, (a, b, obs, fct)) + + def v_func(x): + return B.minimum(B.maximum(x, a), b) + + obs, fct = map(v_func, (obs, fct)) + return obs, fct + + +def mv_weighted_score_weights(obs, fct, w_func=None, backend=None): + """Calculate weights for a weighted scoring rule corresponding to the observations + and ensemble members given a weight function.""" + B = backends.active if backend is None else backends[backend] + obs_w = B.apply_along_axis(w_func, obs, -1) + fct_w = B.apply_along_axis(w_func, fct, -1) + if backend == "torch": + obs_w = obs_w * 1.0 + fct_w = fct_w * 1.0 + if B.any(obs_w < 0) or B.any(fct_w < 0): + raise ValueError("`w_func` returns negative values") + return obs_w, fct_w + + +def mv_weighted_score_chain(obs, fct, v_func=None): + """Calculate transformed observations and ensemble members for threshold-weighted scoring rules given a chaining function.""" + obs, fct = map(v_func, (obs, fct)) + return obs, fct + + +def univariate_sort_ens(fct, estimator=None, sorted_ensemble=False, backend=None): + """Sort ensemble members if not already sorted.""" + B = backends.active if backend is None else backends[backend] + sort_ensemble = not sorted_ensemble and estimator in ["qd", "pwm", "int"] + if sort_ensemble: + ind = B.argsort(fct, axis=-1) + fct = B.gather(fct, ind, axis=-1) + return fct + + def lazy_gufunc_wrapper_uv(func): """ Wrapper for lazy/dynamic generalized universal functions so diff --git a/scoringrules/core/variogram/__init__.py b/scoringrules/core/variogram/__init__.py index 26a37d4..f49f7fe 100644 --- a/scoringrules/core/variogram/__init__.py +++ b/scoringrules/core/variogram/__init__.py @@ -1,15 +1,7 @@ try: - from ._gufuncs import ( - _variogram_score_nrg_gufunc, - _variogram_score_fair_gufunc, - _owvariogram_score_gufunc, - _vrvariogram_score_gufunc, - ) + from ._gufuncs import estimator_gufuncs except ImportError: - _variogram_score_nrg_gufunc = None - _variogram_score_fair_gufunc = None - _owvariogram_score_gufunc = None - _vrvariogram_score_gufunc = None + estimator_gufuncs = None from ._score import owvs_ensemble as owvs from ._score import vs_ensemble as vs @@ -19,8 +11,5 @@ "vs", "owvs", "vrvs", - "_variogram_score_nrg_gufunc", - "_variogram_score_fair_gufunc", - "_owvariogram_score_gufunc", - "_vrvariogram_score_gufunc", + "estimator_gufuncs", ] diff --git a/scoringrules/core/variogram/_gufuncs.py b/scoringrules/core/variogram/_gufuncs.py index 1402d28..5eb0484 100644 --- a/scoringrules/core/variogram/_gufuncs.py +++ b/scoringrules/core/variogram/_gufuncs.py @@ -4,14 +4,14 @@ from scoringrules.core.utils import lazy_gufunc_wrapper_mv -@guvectorize( - [ - "void(float32[:], float32[:,:], float32, float32[:])", - "void(float64[:], float64[:,:], float64, float64[:])", - ], - "(d),(m,d),()->()", -) -def _variogram_score_nrg_gufunc(obs, fct, p, out): +@guvectorize("(d),(m,d),(d,d),()->()") +def _variogram_score_nrg_gufunc( + obs: np.ndarray, + fct: np.ndarray, + w: np.ndarray, + p: np.ndarray, + out: np.ndarray, +): M = fct.shape[-2] D = fct.shape[-1] out[0] = 0.0 @@ -22,12 +22,17 @@ def _variogram_score_nrg_gufunc(obs, fct, p, out): vfct += abs(fct[m, i] - fct[m, j]) ** p vfct = vfct / M vobs = abs(obs[i] - obs[j]) ** p - out[0] += (vobs - vfct) ** 2 + out[0] += w[i, j] * (vobs - vfct) ** 2 -@lazy_gufunc_wrapper_mv -@guvectorize("(d),(m,d),()->()") -def _variogram_score_fair_gufunc(obs, fct, p, out): +@guvectorize("(d),(m,d),(d,d),()->()") +def _variogram_score_fair_gufunc( + obs: np.ndarray, + fct: np.ndarray, + w: np.ndarray, + p: np.ndarray, + out: np.ndarray, +): M = fct.shape[-2] D = fct.shape[-1] @@ -38,20 +43,19 @@ def _variogram_score_fair_gufunc(obs, fct, p, out): for j in range(D): rho1 = abs(fct[k, i] - fct[k, j]) ** p rho2 = abs(obs[i] - obs[j]) ** p - e_1 += (rho1 - rho2) ** 2 + e_1 += w[i, j] * (rho1 - rho2) ** 2 for m in range(k + 1, M): for i in range(D): for j in range(D): rho1 = abs(fct[k, i] - fct[k, j]) ** p rho2 = abs(fct[m, i] - fct[m, j]) ** p - e_2 += 2 * ((rho1 - rho2) ** 2) + e_2 += w[i, j] * 2 * ((rho1 - rho2) ** 2) out[0] = e_1 / M - 0.5 * e_2 / (M * (M - 1)) -@lazy_gufunc_wrapper_mv -@guvectorize("(d),(m,d),(),(),(m)->()") -def _owvariogram_score_gufunc(obs, fct, p, ow, fw, out): +@guvectorize("(d),(m,d),(d,d),(),(m),()->()") +def _owvariogram_score_gufunc(obs, fct, w, ow, fw, p, out): M = fct.shape[-2] D = fct.shape[-1] @@ -62,22 +66,21 @@ def _owvariogram_score_gufunc(obs, fct, p, ow, fw, out): for j in range(D): rho1 = abs(fct[k, i] - fct[k, j]) ** p rho2 = abs(obs[i] - obs[j]) ** p - e_1 += (rho1 - rho2) ** 2 * fw[k] * ow + e_1 += w[i, j] * (rho1 - rho2) ** 2 * fw[k] * ow for m in range(k + 1, M): for i in range(D): for j in range(D): rho1 = abs(fct[k, i] - fct[k, j]) ** p rho2 = abs(fct[m, i] - fct[m, j]) ** p - e_2 += 2 * ((rho1 - rho2) ** 2) * fw[k] * fw[m] * ow + e_2 += 2 * w[i, j] * ((rho1 - rho2) ** 2) * fw[k] * fw[m] * ow wbar = np.mean(fw) out[0] = e_1 / (M * wbar) - 0.5 * e_2 / (M**2 * wbar**2) -@lazy_gufunc_wrapper_mv -@guvectorize("(d),(m,d),(),(),(m)->()") -def _vrvariogram_score_gufunc(obs, fct, p, ow, fw, out): +@guvectorize("(d),(m,d),(d,d),(),(m),()->()") +def _vrvariogram_score_gufunc(obs, fct, w, ow, fw, p, out): M = fct.shape[-2] D = fct.shape[-1] @@ -89,14 +92,14 @@ def _vrvariogram_score_gufunc(obs, fct, p, ow, fw, out): for j in range(D): rho1 = abs(fct[k, i] - fct[k, j]) ** p rho2 = abs(obs[i] - obs[j]) ** p - e_1 += (rho1 - rho2) ** 2 * fw[k] * ow - e_3_x += (rho1) ** 2 * fw[k] + e_1 += w[i, j] * (rho1 - rho2) ** 2 * fw[k] * ow + e_3_x += w[i, j] * (rho1) ** 2 * fw[k] for m in range(k + 1, M): for i in range(D): for j in range(D): rho1 = abs(fct[k, i] - fct[k, j]) ** p rho2 = abs(fct[m, i] - fct[m, j]) ** p - e_2 += 2 * ((rho1 - rho2) ** 2) * fw[k] * fw[m] + e_2 += 2 * w[i, j] * ((rho1 - rho2) ** 2) * fw[k] * fw[m] e_3_x *= 1 / M wbar = np.mean(fw) @@ -104,6 +107,21 @@ def _vrvariogram_score_gufunc(obs, fct, p, ow, fw, out): for i in range(D): for j in range(D): rho1 = abs(obs[i] - obs[j]) ** p - e_3_y += (rho1) ** 2 * ow + e_3_y += w[i, j] * (rho1) ** 2 * ow out[0] = e_1 / M - 0.5 * e_2 / (M**2) + (e_3_x - e_3_y) * (wbar - ow) + + +estimator_gufuncs = { + "fair": lazy_gufunc_wrapper_mv(_variogram_score_fair_gufunc), + "nrg": lazy_gufunc_wrapper_mv(_variogram_score_nrg_gufunc), + "ownrg": lazy_gufunc_wrapper_mv(_owvariogram_score_gufunc), + "vrnrg": lazy_gufunc_wrapper_mv(_vrvariogram_score_gufunc), +} + +__all__ = [ + "_variogram_score_fair_gufunc", + "_variogram_score_nrg_gufunc", + "_owvariogram_score_gufunc", + "_vrvariogram_score_gufunc", +] diff --git a/scoringrules/core/variogram/_score.py b/scoringrules/core/variogram/_score.py index f6d091b..7ff36f4 100644 --- a/scoringrules/core/variogram/_score.py +++ b/scoringrules/core/variogram/_score.py @@ -9,6 +9,7 @@ def vs_ensemble( obs: "Array", # (... D) fct: "Array", # (... M D) + w: "Array", # (..., D D) p: float = 0.5, estimator: str = "nrg", backend: "Backend" = None, @@ -23,21 +24,25 @@ def vs_ensemble( obs_diff = B.abs(B.expand_dims(obs, -2) - B.expand_dims(obs, -1)) ** p # (... D D) if estimator == "nrg": - vfct = B.sum(fct_diff, axis=-3) / M # (... D D) - out = B.sum((obs_diff - vfct) ** 2, axis=(-2, -1)) # (...) + fct_diff = B.sum(fct_diff, axis=-3) / M # (... D D) + out = B.sum(w * (obs_diff - fct_diff) ** 2, axis=(-2, -1)) # (...) elif estimator == "fair": E_1 = (fct_diff - B.expand_dims(obs_diff, axis=-3)) ** 2 # (... M D D) - E_1 = B.sum(E_1, axis=(-2, -1)) # (... M) + E_1 = B.sum(B.expand_dims(w, axis=-3) * E_1, axis=(-2, -1)) # (... M) E_1 = B.sum(E_1, axis=-1) / M # (...) E_2 = ( B.expand_dims(fct_diff, -3) - B.expand_dims(fct_diff, -4) ) ** 2 # (... M M D D) - E_2 = B.sum(E_2, axis=(-2, -1)) # (... M M) + E_2 = B.sum(B.expand_dims(w, (-3, -4)) * E_2, axis=(-2, -1)) # (... M M) E_2 = B.sum(E_2, axis=(-2, -1)) / (M * (M - 1)) # (...) out = E_1 - 0.5 * E_2 + else: + raise ValueError( + f"For the variogram score, {estimator} must be one of 'nrg', and 'fair'." + ) return out @@ -45,6 +50,7 @@ def vs_ensemble( def owvs_ensemble( obs: "Array", fct: "Array", + w: "Array", ow: "Array", fw: "Array", p: float = 0.5, @@ -52,25 +58,34 @@ def owvs_ensemble( ) -> "Array": """Compute the Outcome-Weighted Variogram Score for a multivariate finite ensemble.""" B = backends.active if backend is None else backends[backend] - M = fct.shape[-2] - wbar = B.sum(fw, -1) / M + wbar = B.mean(fw, axis=-1) fct_diff = ( B.abs(B.expand_dims(fct, -2) - B.expand_dims(fct, -1)) ** p ) # (... M D D) obs_diff = B.abs(B.expand_dims(obs, -2) - B.expand_dims(obs, -1)) ** p # (... D D) - vfct = B.sum(fct_diff * B.expand_dims(fw, (-2, -1)), axis=-3) / ( - M * B.expand_dims(wbar, (-2, -1)) - ) # (... D D) - out = B.sum(((obs_diff - vfct) ** 2), axis=(-2, -1)) * ow # (...) + E_1 = (fct_diff - B.expand_dims(obs_diff, -3)) ** 2 # (... M D D) + E_1 = B.sum(B.expand_dims(w, -3) * E_1, axis=(-2, -1)) # (... M) + E_1 = B.mean(E_1 * fw * B.expand_dims(ow, -1), axis=-1) / wbar # (...) - return out + fct_diff_spread = B.expand_dims(fct_diff, -3) - B.expand_dims( + fct_diff, -4 + ) # (... M M D D) + fw_prod = B.expand_dims(fw, -2) * B.expand_dims(fw, -1) # (... M M) + E_2 = B.sum( + B.expand_dims(w, (-3, -4)) * fct_diff_spread**2, axis=(-2, -1) + ) # (... M M) + E_2 *= fw_prod * B.expand_dims(ow, (-2, -1)) # (... M M) + E_2 = B.mean(E_2, axis=(-2, -1)) / (wbar**2) # (...) + + return E_1 - 0.5 * E_2 def vrvs_ensemble( obs: "Array", fct: "Array", + w: "Array", ow: "Array", fw: "Array", p: float = 0.5, @@ -78,7 +93,6 @@ def vrvs_ensemble( ) -> "Array": """Compute the Vertically Re-scaled Variogram Score for a multivariate finite ensemble.""" B = backends.active if backend is None else backends[backend] - M: int = fct.shape[-2] wbar = B.mean(fw, axis=-1) fct_diff = ( @@ -87,20 +101,20 @@ def vrvs_ensemble( obs_diff = B.abs(B.expand_dims(obs, -2) - B.expand_dims(obs, -1)) ** p # (... D D) E_1 = (fct_diff - B.expand_dims(obs_diff, axis=-3)) ** 2 # (... M D D) - E_1 = B.sum(E_1, axis=(-2, -1)) # (... M) - E_1 = B.sum(E_1 * fw * B.expand_dims(ow, axis=-1), axis=-1) / M # (...) + E_1 = B.sum(B.expand_dims(w, -3) * E_1, axis=(-2, -1)) # (... M) + E_1 = B.mean(E_1 * fw * B.expand_dims(ow, axis=-1), axis=-1) # (...) E_2 = ( B.expand_dims(fct_diff, -3) - B.expand_dims(fct_diff, -4) ) ** 2 # (... M M D D) - E_2 = B.sum(E_2, axis=(-2, -1)) # (... M M) + E_2 = B.sum(B.expand_dims(w, (-3, -4)) * E_2, axis=(-2, -1)) # (... M M) fw_prod = B.expand_dims(fw, axis=-2) * B.expand_dims(fw, axis=-1) # (... M M) - E_2 = B.sum(E_2 * fw_prod, axis=(-2, -1)) / (M**2) # (...) + E_2 = B.mean(E_2 * fw_prod, axis=(-2, -1)) # (...) - E_3 = B.sum(fct_diff**2, axis=(-2, -1)) # (... M) - E_3 = B.sum(E_3 * fw, axis=-1) / M # (...) - E_3 -= B.sum(obs_diff**2, axis=(-2, -1)) * ow # (...) + E_3 = B.sum(B.expand_dims(w, -3) * fct_diff**2, axis=(-2, -1)) # (... M) + E_3 = B.mean(E_3 * fw, axis=-1) # (...) + E_3 -= B.sum(w * obs_diff**2, axis=(-2, -1)) * ow # (...) E_3 *= wbar - ow # (...) return E_1 - 0.5 * E_2 + E_3 diff --git a/tests/test_kernels.py b/tests/test_kernels.py index 1fa32e7..efd5b29 100644 --- a/tests/test_kernels.py +++ b/tests/test_kernels.py @@ -97,6 +97,11 @@ def test_gksmv(estimator, backend): [[9.8, 8.7, 11.9, 12.1, 13.4], [-24.8, -18.5, -29.9, -18.3, -21.0]] ).transpose() + # test exceptions + with pytest.raises(ValueError): + est = "undefined_estimator" + sr.gksmv_ensemble(obs, fct, estimator=est, backend=backend) + if estimator == "nrg": res = sr.gksmv_ensemble(obs, fct, estimator=estimator, backend=backend) expected = 0.5868737 diff --git a/tests/test_variogram.py b/tests/test_variogram.py index 5cf3985..d3366b3 100644 --- a/tests/test_variogram.py +++ b/tests/test_variogram.py @@ -39,7 +39,8 @@ def test_variogram_score_permuted_dims(estimator, backend): assert "jax" in res.__module__ -def test_variogram_score_correctness(backend): +@pytest.mark.parametrize("estimator", ESTIMATORS) +def test_variogram_score_correctness(estimator, backend): fct = np.array( [ [0.79546742, 0.4777960, 0.2164079, 0.5409873], @@ -48,14 +49,36 @@ def test_variogram_score_correctness(backend): ).T obs = np.array([0.2743836, 0.8146400]) - res = sr.vs_ensemble(obs, fct, p=0.5, estimator="nrg", backend=backend) - np.testing.assert_allclose(res, 0.04114727, rtol=1e-5) - - res = sr.vs_ensemble(obs, fct, p=0.5, estimator="fair", backend=backend) - np.testing.assert_allclose(res, 0.01407421, rtol=1e-5) - - res = sr.vs_ensemble(obs, fct, p=1.0, estimator="nrg", backend=backend) - np.testing.assert_allclose(res, 0.04480374, rtol=1e-5) - - res = sr.vs_ensemble(obs, fct, p=1.0, estimator="fair", backend=backend) - np.testing.assert_allclose(res, 0.004730382, rtol=1e-5) + res = sr.vs_ensemble(obs, fct, p=0.5, estimator=estimator, backend=backend) + if estimator == "nrg": + expected = 0.04114727 + np.testing.assert_allclose(res, expected, rtol=1e-5) + elif estimator == "fair": + expected = 0.01407421 + np.testing.assert_allclose(res, expected, rtol=1e-5) + + res = sr.vs_ensemble(obs, fct, p=1.0, estimator=estimator, backend=backend) + if estimator == "nrg": + expected = 0.04480374 + np.testing.assert_allclose(res, expected, rtol=1e-5) + elif estimator == "fair": + expected = 0.004730382 + np.testing.assert_allclose(res, expected, rtol=1e-5) + + # with and without dimension weights + w = np.ones((2, 2)) + res_w = sr.vs_ensemble(obs, fct, w=w, p=1.0, estimator=estimator, backend=backend) + np.testing.assert_allclose(res_w, res, rtol=1e-5) + + # with dimension weights + w = np.array([[0.1, 0.2], [0.2, 0.4]]) + res = sr.vs_ensemble(obs, fct, w=w, p=0.5, estimator=estimator, backend=backend) + if estimator == "nrg": + expected = 0.008229455 + np.testing.assert_allclose(res, expected, atol=1e-6) + + # invariance to diagonal terms of weight matrix + w = np.array([[1.1, 0.2], [0.2, 100]]) + res = sr.vs_ensemble(obs, fct, w=w, p=0.5, estimator=estimator, backend=backend) + if estimator == "nrg": + np.testing.assert_allclose(res, expected, atol=1e-6) diff --git a/tests/test_wcrps.py b/tests/test_wcrps.py index 2fd2c43..731a277 100644 --- a/tests/test_wcrps.py +++ b/tests/test_wcrps.py @@ -12,21 +12,34 @@ def test_owcrps_ensemble(backend): # test shapes obs = np.random.randn(N) - res = sr.owcrps_ensemble(obs, np.random.randn(N, M), w_func=lambda x: x * 0.0 + 1.0) - assert res.shape == (N,) res = sr.owcrps_ensemble( - obs, np.random.randn(M, N), w_func=lambda x: x * 0.0 + 1.0, m_axis=0 + obs, np.random.randn(N, M), w_func=lambda x: x * 0.0 + 1.0, backend=backend ) assert res.shape == (N,) + fct = np.random.randn(M, N) + res = sr.owcrps_ensemble( + obs, + fct, + w_func=lambda x: x * 0.0 + 1.0, + m_axis=0, + backend=backend, + ) + def test_vrcrps_ensemble(backend): # test shapes obs = np.random.randn(N) - res = sr.vrcrps_ensemble(obs, np.random.randn(N, M), w_func=lambda x: x * 0.0 + 1.0) + res = sr.vrcrps_ensemble( + obs, np.random.randn(N, M), w_func=lambda x: x * 0.0 + 1.0, backend=backend + ) assert res.shape == (N,) res = sr.vrcrps_ensemble( - obs, np.random.randn(M, N), w_func=lambda x: x * 0.0 + 1.0, m_axis=0 + obs, + np.random.randn(M, N), + w_func=lambda x: x * 0.0 + 1.0, + m_axis=0, + backend=backend, ) assert res.shape == (N,) @@ -63,21 +76,21 @@ def test_owcrps_vs_crps(backend): sigma = abs(np.random.randn(N)) * 0.5 fct = np.random.randn(N, M) * sigma[..., None] + mu[..., None] - res = sr.crps_ensemble(obs, fct, backend=backend, estimator="nrg") + res = sr.crps_ensemble(obs, fct, estimator="qd", backend=backend) # no argument given resw = sr.owcrps_ensemble(obs, fct, backend=backend) - np.testing.assert_allclose(res, resw, rtol=1e-5) + np.testing.assert_allclose(res, resw, rtol=1e-4) # a and b resw = sr.owcrps_ensemble( obs, fct, a=float("-inf"), b=float("inf"), backend=backend ) - np.testing.assert_allclose(res, resw, rtol=1e-5) + np.testing.assert_allclose(res, resw, rtol=1e-4) # w_func as identity function resw = sr.owcrps_ensemble(obs, fct, w_func=lambda x: x * 0.0 + 1.0, backend=backend) - np.testing.assert_allclose(res, resw, rtol=1e-5) + np.testing.assert_allclose(res, resw, rtol=1e-4) def test_vrcrps_vs_crps(backend): diff --git a/tests/test_wenergy.py b/tests/test_wenergy.py index 5f2f568..368c4b7 100644 --- a/tests/test_wenergy.py +++ b/tests/test_wenergy.py @@ -20,7 +20,7 @@ def test_owes_vs_es(backend): lambda x: backends[backend].mean(x) * 0.0 + 1.0, backend=backend, ) - np.testing.assert_allclose(res, resw, atol=1e-7) + np.testing.assert_allclose(res, resw, atol=1e-6) def test_twes_vs_es(backend):