diff --git a/scoringrules/_crps.py b/scoringrules/_crps.py index 0800520..6fafb16 100644 --- a/scoringrules/_crps.py +++ b/scoringrules/_crps.py @@ -4,6 +4,7 @@ from scoringrules.core import crps, stats from scoringrules.core.utils import ( univariate_array_check, + univariate_weight_check, estimator_check, uv_weighted_score_weights, uv_weighted_score_chain, @@ -19,8 +20,9 @@ def crps_ensemble( fct: "Array", m_axis: int = -1, *, - sorted_ensemble: bool = False, + ens_w: "Array" = None, estimator: str = "qd", + sorted_ensemble: bool = False, backend: "Backend" = None, ) -> "Array": r"""Estimate the Continuous Ranked Probability Score (CRPS) for a finite ensemble. @@ -56,11 +58,15 @@ def crps_ensemble( represented by the last axis. m_axis : int The axis corresponding to the ensemble. Default is the last axis. + ens_w : array, shape (..., m) + Weights assigned to the ensemble members. Array with the same shape as fct. + Default is equal weighting. Weights are normalised so that they sum to one + across the ensemble members. + estimator : str + Indicates the CRPS estimator to be used. sorted_ensemble : bool Boolean indicating whether the ensemble members are already in ascending order. Default is False. - estimator : str - Indicates the CRPS estimator to be used. backend : str, optional The name of the backend used for computations. Defaults to ``numba`` if available, else ``numpy``. @@ -103,12 +109,21 @@ def crps_ensemble( array([0.69605316, 0.32865417, 0.39048665]) """ obs, fct = univariate_array_check(obs, fct, m_axis, backend=backend) - fct = univariate_sort_ens(fct, estimator, sorted_ensemble, backend=backend) - if backend == "numba": - estimator_check(estimator, crps.estimator_gufuncs) - return crps.estimator_gufuncs[estimator](obs, fct) + fct, ens_w = univariate_sort_ens( + fct, ens_w, m_axis, estimator, sorted_ensemble, backend=backend + ) + if ens_w is None: + if backend == "numba": + estimator_check(estimator, crps.estimator_gufuncs) + return crps.estimator_gufuncs[estimator](obs, fct) + else: + return crps.ensemble(obs, fct, estimator, backend=backend) else: - return crps.ensemble(obs, fct, estimator, backend=backend) + if backend == "numba": + estimator_check(estimator, crps.estimator_gufuncs_w) + return crps.estimator_gufuncs_w[estimator](obs, fct, ens_w) + else: + return crps.ensemble_w(obs, fct, ens_w, estimator, backend=backend) def twcrps_ensemble( @@ -118,6 +133,7 @@ def twcrps_ensemble( b: float = float("inf"), m_axis: int = -1, *, + ens_w: "Array" = None, v_func: tp.Callable[["ArrayLike"], "ArrayLike"] = None, estimator: str = "qd", sorted_ensemble: bool = False, @@ -151,10 +167,19 @@ def twcrps_ensemble( to values in the range [a, b]. m_axis : int The axis corresponding to the ensemble. Default is the last axis. + ens_w : array, shape (..., m) + Weights assigned to the ensemble members. Array with the same shape as fct. + Default is equal weighting. Weights are normalised so that they sum to one + across the ensemble members. v_func : callable, array_like -> array_like Chaining function used to emphasise particular outcomes. For example, a function that only considers values above a certain threshold :math:`t` by projecting forecasts and observations to :math:`[t, \inf)`. + estimator : str + Indicates the CRPS estimator to be used. + sorted_ensemble : bool + Boolean indicating whether the ensemble members are already in ascending order. + Default is False. backend : str, optional The name of the backend used for computations. Defaults to ``numba`` if available, else ``numpy``. @@ -200,8 +225,9 @@ def twcrps_ensemble( obs, fct, m_axis=m_axis, - sorted_ensemble=sorted_ensemble, + ens_w=ens_w, estimator=estimator, + sorted_ensemble=sorted_ensemble, backend=backend, ) @@ -213,6 +239,7 @@ def owcrps_ensemble( b: float = float("inf"), m_axis: int = -1, *, + ens_w: "Array" = None, w_func: tp.Callable[["ArrayLike"], "ArrayLike"] = None, backend: "Backend" = None, ) -> "Array": @@ -250,6 +277,10 @@ def owcrps_ensemble( to values in the range [a, b]. m_axis : int The axis corresponding to the ensemble. Default is the last axis. + ens_w : array, shape (..., m) + Weights assigned to the ensemble members. Array with the same shape as fct. + Default is equal weighting. Weights are normalised so that they sum to one + across the ensemble members. w_func : callable, array_like -> array_like Weight function used to emphasise particular outcomes. backend : str, optional @@ -288,10 +319,17 @@ def owcrps_ensemble( """ obs, fct = univariate_array_check(obs, fct, m_axis, backend=backend) obs_w, fct_w = uv_weighted_score_weights(obs, fct, a, b, w_func, backend=backend) - if backend == "numba": - return crps.estimator_gufuncs["ownrg"](obs, fct, obs_w, fct_w) + if ens_w is None: + if backend == "numba": + return crps.estimator_gufuncs["ownrg"](obs, fct, obs_w, fct_w) + else: + return crps.ow_ensemble(obs, fct, obs_w, fct_w, backend=backend) else: - return crps.ow_ensemble(obs, fct, obs_w, fct_w, backend=backend) + ens_w = univariate_weight_check(ens_w, fct, m_axis, backend=backend) + if backend == "numba": + return crps.estimator_gufuncs_w["ownrg"](obs, fct, obs_w, fct_w, ens_w) + else: + return crps.ow_ensemble_w(obs, fct, obs_w, fct_w, ens_w, backend=backend) def vrcrps_ensemble( @@ -301,6 +339,7 @@ def vrcrps_ensemble( b: float = float("inf"), m_axis: int = -1, *, + ens_w: "Array" = None, w_func: tp.Callable[["ArrayLike"], "ArrayLike"] = None, backend: "Backend" = None, ) -> "Array": @@ -336,6 +375,10 @@ def vrcrps_ensemble( to values in the range [a, b]. m_axis : int The axis corresponding to the ensemble. Default is the last axis. + ens_w : array, shape (..., m) + Weights assigned to the ensemble members. Array with the same shape as fct. + Default is equal weighting. Weights are normalised so that they sum to one + across the ensemble members. w_func : callable, array_like -> array_like Weight function used to emphasise particular outcomes. backend : str, optional @@ -374,10 +417,17 @@ def vrcrps_ensemble( """ obs, fct = univariate_array_check(obs, fct, m_axis, backend=backend) obs_w, fct_w = uv_weighted_score_weights(obs, fct, a, b, w_func, backend=backend) - if backend == "numba": - return crps.estimator_gufuncs["vrnrg"](obs, fct, obs_w, fct_w) + if ens_w is None: + if backend == "numba": + return crps.estimator_gufuncs["vrnrg"](obs, fct, obs_w, fct_w) + else: + return crps.vr_ensemble(obs, fct, obs_w, fct_w, backend=backend) else: - return crps.vr_ensemble(obs, fct, obs_w, fct_w, backend=backend) + ens_w = univariate_weight_check(ens_w, fct, m_axis, backend=backend) + if backend == "numba": + return crps.estimator_gufuncs_w["vrnrg"](obs, fct, obs_w, fct_w, ens_w) + else: + return crps.vr_ensemble_w(obs, fct, obs_w, fct_w, ens_w, backend=backend) def crps_quantile( diff --git a/scoringrules/_energy.py b/scoringrules/_energy.py index 3902ee5..fdc101a 100644 --- a/scoringrules/_energy.py +++ b/scoringrules/_energy.py @@ -3,6 +3,7 @@ from scoringrules.core import energy from scoringrules.core.utils import ( multivariate_array_check, + multivariate_weight_check, estimator_check, mv_weighted_score_chain, mv_weighted_score_weights, @@ -18,6 +19,7 @@ def es_ensemble( m_axis: int = -2, v_axis: int = -1, *, + ens_w: "Array" = None, estimator: str = "nrg", backend: "Backend" = None, ) -> "Array": @@ -43,6 +45,9 @@ def es_ensemble( v_axis : int The axis corresponding to the variables dimension on the forecasts array (or the observations array with an extra dimension on `m_axis`). Defaults to -1. + ens_w : array_like + Weights assigned to the ensemble members. Array with one less dimension than fct (without the v_axis dimension). + Default is equal weighting. Weights are normalised so that they sum to one across the ensemble members. estimator : str The energy score estimator to be used. backend : str @@ -66,11 +71,19 @@ def es_ensemble( Some theoretical background on scoring rules for multivariate forecasts. """ obs, fct = multivariate_array_check(obs, fct, m_axis, v_axis, backend=backend) - if backend == "numba": - estimator_check(estimator, energy.estimator_gufuncs) - return energy.estimator_gufuncs[estimator](obs, fct) + if ens_w is None: + if backend == "numba": + estimator_check(estimator, energy.estimator_gufuncs) + return energy.estimator_gufuncs[estimator](obs, fct) + else: + return energy.es(obs, fct, estimator=estimator, backend=backend) else: - return energy.es(obs, fct, estimator=estimator, backend=backend) + ens_w = multivariate_weight_check(ens_w, fct, m_axis, backend=backend) + if backend == "numba": + estimator_check(estimator, energy.estimator_gufuncs_w) + return energy.estimator_gufuncs_w[estimator](obs, fct, ens_w) + else: + return energy.es_w(obs, fct, ens_w, estimator=estimator, backend=backend) def twes_ensemble( @@ -80,6 +93,7 @@ def twes_ensemble( m_axis: int = -2, v_axis: int = -1, *, + ens_w: "Array" = None, estimator: str = "nrg", backend: "Backend" = None, ) -> "Array": @@ -110,6 +124,9 @@ def twes_ensemble( The axis corresponding to the ensemble dimension. Defaults to -2. v_axis : int or tuple of int The axis corresponding to the variables dimension. Defaults to -1. + ens_w : array_like + Weights assigned to the ensemble members. Array with one less dimension than fct (without the v_axis dimension). + Default is equal weighting. Weights are normalised so that they sum to one across the ensemble members. estimator : str The energy score estimator to be used. backend : str @@ -122,7 +139,13 @@ def twes_ensemble( """ obs, fct = mv_weighted_score_chain(obs, fct, v_func) return es_ensemble( - obs, fct, m_axis=m_axis, v_axis=v_axis, estimator=estimator, backend=backend + obs, + fct, + m_axis=m_axis, + v_axis=v_axis, + ens_w=ens_w, + estimator=estimator, + backend=backend, ) @@ -133,6 +156,7 @@ def owes_ensemble( m_axis: int = -2, v_axis: int = -1, *, + ens_w: "Array" = None, backend: "Backend" = None, ) -> "Array": r"""Compute the Outcome-Weighted Energy Score (owES) for a finite multivariate ensemble. @@ -165,6 +189,9 @@ def owes_ensemble( The axis corresponding to the ensemble dimension. Defaults to -2. v_axis : int or tuple of ints The axis corresponding to the variables dimension. Defaults to -1. + ens_w : array_like + Weights assigned to the ensemble members. Array with one less dimension than fct (without the v_axis dimension). + Default is equal weighting. Weights are normalised so that they sum to one across the ensemble members. backend : str The name of the backend used for computations. Defaults to 'numba' if available, else 'numpy'. @@ -175,10 +202,17 @@ def owes_ensemble( """ obs, fct = multivariate_array_check(obs, fct, m_axis, v_axis, backend=backend) obs_w, fct_w = mv_weighted_score_weights(obs, fct, w_func=w_func, backend=backend) - if backend == "numba": - return energy.estimator_gufuncs["ownrg"](obs, fct, obs_w, fct_w) + if ens_w is None: + if backend == "numba": + return energy.estimator_gufuncs["ownrg"](obs, fct, obs_w, fct_w) + else: + return energy.owes(obs, fct, obs_w, fct_w, backend=backend) else: - return energy.owes(obs, fct, obs_w, fct_w, backend=backend) + ens_w = multivariate_weight_check(ens_w, fct, m_axis, backend=backend) + if backend == "numba": + return energy.estimator_gufuncs_w["ownrg"](obs, fct, obs_w, fct_w, ens_w) + else: + return energy.owes_w(obs, fct, obs_w, fct_w, ens_w=ens_w, backend=backend) def vres_ensemble( @@ -186,6 +220,7 @@ def vres_ensemble( fct: "Array", w_func: tp.Callable[["ArrayLike"], "ArrayLike"], *, + ens_w: "Array" = None, m_axis: int = -2, v_axis: int = -1, backend: "Backend" = None, @@ -221,6 +256,9 @@ def vres_ensemble( The axis corresponding to the ensemble dimension. Defaults to -2. v_axis : int or tuple of int The axis corresponding to the variables dimension. Defaults to -1. + ens_w : array_like + Weights assigned to the ensemble members. Array with one less dimension than fct (without the v_axis dimension). + Default is equal weighting. Weights are normalised so that they sum to one across the ensemble members. backend : str The name of the backend used for computations. Defaults to 'numba' if available, else 'numpy'. @@ -231,7 +269,14 @@ def vres_ensemble( """ obs, fct = multivariate_array_check(obs, fct, m_axis, v_axis, backend=backend) obs_w, fct_w = mv_weighted_score_weights(obs, fct, w_func=w_func, backend=backend) - if backend == "numba": - return energy.estimator_gufuncs["vrnrg"](obs, fct, obs_w, fct_w) + if ens_w is None: + if backend == "numba": + return energy.estimator_gufuncs["vrnrg"](obs, fct, obs_w, fct_w) + else: + return energy.vres(obs, fct, obs_w, fct_w, backend=backend) else: - return energy.vres(obs, fct, obs_w, fct_w, backend=backend) + ens_w = multivariate_weight_check(ens_w, fct, m_axis, backend=backend) + if backend == "numba": + return energy.estimator_gufuncs_w["vrnrg"](obs, fct, obs_w, fct_w, ens_w) + else: + return energy.vres_w(obs, fct, obs_w, fct_w, ens_w=ens_w, backend=backend) diff --git a/scoringrules/_kernels.py b/scoringrules/_kernels.py index 7e9ca38..55f6487 100644 --- a/scoringrules/_kernels.py +++ b/scoringrules/_kernels.py @@ -3,11 +3,14 @@ from scoringrules.core import kernels from scoringrules.core.utils import ( univariate_array_check, + univariate_weight_check, multivariate_array_check, + multivariate_weight_check, estimator_check, uv_weighted_score_weights, uv_weighted_score_chain, mv_weighted_score_weights, + mv_weighted_score_chain, ) if tp.TYPE_CHECKING: @@ -19,6 +22,7 @@ def gksuv_ensemble( fct: "Array", m_axis: int = -1, *, + ens_w: "Array" = None, estimator: str = "nrg", backend: "Backend" = None, ) -> "Array": @@ -47,6 +51,9 @@ def gksuv_ensemble( represented by the last axis. m_axis : int The axis corresponding to the ensemble. Default is the last axis. + ens_w : array + Weights assigned to the ensemble members. Array with the same shape as fct. + Default is equal weighting. Weights are normalised so that they sum to one across the ensemble members. estimator : str Indicates the estimator to be used. backend : str @@ -63,11 +70,21 @@ def gksuv_ensemble( >>> sr.gks_ensemble(obs, pred) """ obs, fct = univariate_array_check(obs, fct, m_axis, backend=backend) - if backend == "numba": - estimator_check(estimator, kernels.estimator_gufuncs_uv) - return kernels.estimator_gufuncs_uv[estimator](obs, fct) + if ens_w is None: + if backend == "numba": + estimator_check(estimator, kernels.estimator_gufuncs_uv) + return kernels.estimator_gufuncs_uv[estimator](obs, fct) + else: + return kernels.ensemble_uv(obs, fct, estimator=estimator, backend=backend) else: - return kernels.ensemble_uv(obs, fct, estimator=estimator, backend=backend) + ens_w = univariate_weight_check(ens_w, fct, m_axis, backend=backend) + if backend == "numba": + estimator_check(estimator, kernels.estimator_gufuncs_uv_w) + return kernels.estimator_gufuncs_uv_w[estimator](obs, fct, ens_w) + else: + return kernels.ensemble_uv_w( + obs, fct, ens_w=ens_w, estimator=estimator, backend=backend + ) def twgksuv_ensemble( @@ -77,6 +94,7 @@ def twgksuv_ensemble( b: float = float("inf"), m_axis: int = -1, *, + ens_w: "Array" = None, v_func: tp.Callable[["ArrayLike"], "ArrayLike"] = None, estimator: str = "nrg", backend: "Backend" = None, @@ -113,6 +131,9 @@ def twgksuv_ensemble( to values in the range [a, b]. m_axis : int The axis corresponding to the ensemble. Default is the last axis. + ens_w : array + Weights assigned to the ensemble members. Array with the same shape as fct. + Default is equal weighting. Weights are normalised so that they sum to one across the ensemble members. v_func : callable, array_like -> array_like Chaining function used to emphasise particular outcomes. For example, a function that only considers values above a certain threshold :math:`t` by projecting forecasts and observations @@ -140,6 +161,7 @@ def twgksuv_ensemble( obs, fct, m_axis=m_axis, + ens_w=ens_w, estimator=estimator, backend=backend, ) @@ -152,6 +174,7 @@ def owgksuv_ensemble( b: float = float("inf"), m_axis: int = -1, *, + ens_w: "Array" = None, w_func: tp.Callable[["ArrayLike"], "ArrayLike"] = None, backend: "Backend" = None, ) -> "Array": @@ -188,6 +211,9 @@ def owgksuv_ensemble( to values in the range [a, b]. m_axis : int The axis corresponding to the ensemble. Default is the last axis. + ens_w : array + Weights assigned to the ensemble members. Array with the same shape as fct. + Default is equal weighting. Weights are normalised so that they sum to one across the ensemble members. w_func : callable, array_like -> array_like Weight function used to emphasise particular outcomes. backend : str @@ -210,10 +236,19 @@ def owgksuv_ensemble( """ obs, fct = univariate_array_check(obs, fct, m_axis, backend=backend) obs_w, fct_w = uv_weighted_score_weights(obs, fct, a, b, w_func, backend=backend) - if backend == "numba": - return kernels.estimator_gufuncs_uv["ow"](obs, fct, obs_w, fct_w) + if ens_w is None: + if backend == "numba": + return kernels.estimator_gufuncs_uv["ow"](obs, fct, obs_w, fct_w) + else: + return kernels.ow_ensemble_uv(obs, fct, obs_w, fct_w, backend=backend) else: - return kernels.ow_ensemble_uv(obs, fct, obs_w, fct_w, backend=backend) + ens_w = univariate_weight_check(ens_w, fct, m_axis, backend=backend) + if backend == "numba": + return kernels.estimator_gufuncs_uv_w["ow"](obs, fct, obs_w, fct_w, ens_w) + else: + return kernels.ow_ensemble_uv_w( + obs, fct, obs_w, fct_w, ens_w, backend=backend + ) def vrgksuv_ensemble( @@ -223,6 +258,7 @@ def vrgksuv_ensemble( b: float = float("inf"), m_axis: int = -1, *, + ens_w: "Array" = None, w_func: tp.Callable[["ArrayLike"], "ArrayLike"] = None, backend: "Backend" = None, ) -> "Array": @@ -258,6 +294,9 @@ def vrgksuv_ensemble( to values in the range [a, b]. m_axis : int The axis corresponding to the ensemble. Default is the last axis. + ens_w : array + Weights assigned to the ensemble members. Array with the same shape as fct. + Default is equal weighting. Weights are normalised so that they sum to one across the ensemble members. w_func : callable, array_like -> array_like Weight function used to emphasise particular outcomes. backend : str @@ -280,10 +319,19 @@ def vrgksuv_ensemble( """ obs, fct = univariate_array_check(obs, fct, m_axis, backend=backend) obs_w, fct_w = uv_weighted_score_weights(obs, fct, a, b, w_func, backend=backend) - if backend == "numba": - return kernels.estimator_gufuncs_uv["vr"](obs, fct, obs_w, fct_w) + if ens_w is None: + if backend == "numba": + return kernels.estimator_gufuncs_uv["vr"](obs, fct, obs_w, fct_w) + else: + return kernels.vr_ensemble_uv(obs, fct, obs_w, fct_w, backend=backend) else: - return kernels.vr_ensemble_uv(obs, fct, obs_w, fct_w, backend=backend) + ens_w = univariate_weight_check(ens_w, fct, m_axis, backend=backend) + if backend == "numba": + return kernels.estimator_gufuncs_uv_w["vr"](obs, fct, obs_w, fct_w, ens_w) + else: + return kernels.vr_ensemble_uv_w( + obs, fct, obs_w, fct_w, ens_w, backend=backend + ) def gksmv_ensemble( @@ -292,6 +340,7 @@ def gksmv_ensemble( m_axis: int = -2, v_axis: int = -1, *, + ens_w: "Array" = None, estimator: str = "nrg", backend: "Backend" = None, ) -> "Array": @@ -326,6 +375,9 @@ def gksmv_ensemble( v_axis : int The axis corresponding to the variables dimension on the forecasts array (or the observations array with an extra dimension on `m_axis`). Defaults to -1. + ens_w : array_like + Weights assigned to the ensemble members. Array with one less dimension than fct (without the v_axis dimension). + Default is equal weighting. Weights are normalised so that they sum to one across the ensemble members. estimator : str Indicates the estimator to be used. backend : str @@ -337,11 +389,21 @@ def gksmv_ensemble( The GKS between the forecast ensemble and obs. """ obs, fct = multivariate_array_check(obs, fct, m_axis, v_axis, backend=backend) - if backend == "numba": - estimator_check(estimator, kernels.estimator_gufuncs_mv) - return kernels.estimator_gufuncs_mv[estimator](obs, fct) + if ens_w is None: + if backend == "numba": + estimator_check(estimator, kernels.estimator_gufuncs_mv) + return kernels.estimator_gufuncs_mv[estimator](obs, fct) + else: + return kernels.ensemble_mv(obs, fct, estimator=estimator, backend=backend) else: - return kernels.ensemble_mv(obs, fct, estimator=estimator, backend=backend) + ens_w = multivariate_weight_check(ens_w, fct, m_axis, backend=backend) + if backend == "numba": + estimator_check(estimator, kernels.estimator_gufuncs_mv_w) + return kernels.estimator_gufuncs_mv_w[estimator](obs, fct, ens_w) + else: + return kernels.ensemble_mv_w( + obs, fct, ens_w, estimator=estimator, backend=backend + ) def twgksmv_ensemble( @@ -351,6 +413,7 @@ def twgksmv_ensemble( m_axis: int = -2, v_axis: int = -1, *, + ens_w: "Array" = None, estimator: str = "nrg", backend: "Backend" = None, ) -> "Array": @@ -384,6 +447,9 @@ def twgksmv_ensemble( The axis corresponding to the ensemble dimension. Defaults to -2. v_axis : int or tuple of ints The axis corresponding to the variables dimension. Defaults to -1. + ens_w : array_like + Weights assigned to the ensemble members. Array with one less dimension than fct (without the v_axis dimension). + Default is equal weighting. Weights are normalised so that they sum to one across the ensemble members. estimator : str Indicates the estimator to be used. backend : str @@ -394,9 +460,15 @@ def twgksmv_ensemble( score : array_like The computed Threshold-Weighted Gaussian Kernel Score. """ - obs, fct = map(v_func, (obs, fct)) + obs, fct = mv_weighted_score_chain(obs, fct, v_func) return gksmv_ensemble( - obs, fct, m_axis=m_axis, v_axis=v_axis, estimator=estimator, backend=backend + obs, + fct, + m_axis=m_axis, + v_axis=v_axis, + ens_w=ens_w, + estimator=estimator, + backend=backend, ) @@ -407,12 +479,11 @@ def owgksmv_ensemble( m_axis: int = -2, v_axis: int = -1, *, + ens_w: "Array" = None, backend: "Backend" = None, ) -> "Array": r"""Compute the multivariate Outcome-Weighted Gaussian Kernel Score (owGKS) for a finite ensemble. - - Given an ensemble forecast :math:`F_{ens}` comprised of multivariate members :math:`\mathbf{x}_{1}, \dots, \mathbf{x}_{M}`, the GKS is @@ -454,6 +525,9 @@ def owgksmv_ensemble( The axis corresponding to the ensemble dimension. Defaults to -2. v_axis : int or tuple of ints The axis corresponding to the variables dimension. Defaults to -1. + ens_w : array_like + Weights assigned to the ensemble members. Array with one less dimension than fct (without the v_axis dimension). + Default is equal weighting. Weights are normalised so that they sum to one across the ensemble members. backend : str The name of the backend used for computations. Defaults to 'numba' if available, else 'numpy'. @@ -464,19 +538,29 @@ def owgksmv_ensemble( """ obs, fct = multivariate_array_check(obs, fct, m_axis, v_axis, backend=backend) obs_w, fct_w = mv_weighted_score_weights(obs, fct, w_func=w_func, backend=backend) - if backend == "numba": - return kernels.estimator_gufuncs_mv["ow"](obs, fct, obs_w, fct_w) + if ens_w is None: + if backend == "numba": + return kernels.estimator_gufuncs_mv["ow"](obs, fct, obs_w, fct_w) + else: + return kernels.ow_ensemble_mv(obs, fct, obs_w, fct_w, backend=backend) else: - return kernels.ow_ensemble_mv(obs, fct, obs_w, fct_w, backend=backend) + ens_w = multivariate_weight_check(ens_w, fct, m_axis, backend=backend) + if backend == "numba": + return kernels.estimator_gufuncs_mv_w["ow"](obs, fct, obs_w, fct_w, ens_w) + else: + return kernels.ow_ensemble_mv_w( + obs, fct, obs_w, fct_w, ens_w, backend=backend + ) def vrgksmv_ensemble( obs: "Array", fct: "Array", w_func: tp.Callable[["ArrayLike"], "ArrayLike"], - *, m_axis: int = -2, v_axis: int = -1, + *, + ens_w: "Array" = None, backend: "Backend" = None, ) -> "Array": r"""Compute the Vertically Re-scaled Gaussian Kernel Score (vrGKS) for a finite multivariate ensemble. @@ -510,6 +594,9 @@ def vrgksmv_ensemble( The axis corresponding to the ensemble dimension. Defaults to -2. v_axis : int or tuple of ints The axis corresponding to the variables dimension. Defaults to -1. + ens_w : array_like + Weights assigned to the ensemble members. Array with one less dimension than fct (without the v_axis dimension). + Default is equal weighting. Weights are normalised so that they sum to one across the ensemble members. backend: str The name of the backend used for computations. Defaults to 'numba' if available, else 'numpy'. @@ -520,10 +607,19 @@ def vrgksmv_ensemble( """ obs, fct = multivariate_array_check(obs, fct, m_axis, v_axis, backend=backend) obs_w, fct_w = mv_weighted_score_weights(obs, fct, w_func=w_func, backend=backend) - if backend == "numba": - return kernels.estimator_gufuncs_mv["vr"](obs, fct, obs_w, fct_w) + if ens_w is None: + if backend == "numba": + return kernels.estimator_gufuncs_mv["vr"](obs, fct, obs_w, fct_w) + else: + return kernels.vr_ensemble_mv(obs, fct, obs_w, fct_w, backend=backend) else: - return kernels.vr_ensemble_mv(obs, fct, obs_w, fct_w, backend=backend) + ens_w = multivariate_weight_check(ens_w, fct, m_axis, backend=backend) + if backend == "numba": + return kernels.estimator_gufuncs_mv_w["vr"](obs, fct, obs_w, fct_w, ens_w) + else: + return kernels.vr_ensemble_mv_w( + obs, fct, obs_w, fct_w, ens_w, backend=backend + ) __all__ = [ diff --git a/scoringrules/_variogram.py b/scoringrules/_variogram.py index 6028f65..8b8fe0a 100644 --- a/scoringrules/_variogram.py +++ b/scoringrules/_variogram.py @@ -4,6 +4,7 @@ from scoringrules.core import variogram from scoringrules.core.utils import ( multivariate_array_check, + multivariate_weight_check, estimator_check, mv_weighted_score_chain, mv_weighted_score_weights, @@ -20,6 +21,7 @@ def vs_ensemble( m_axis: int = -2, v_axis: int = -1, *, + ens_w: "Array" = None, p: float = 0.5, estimator: str = "nrg", backend: "Backend" = None, @@ -49,6 +51,9 @@ def vs_ensemble( The axis corresponding to the ensemble dimension. Defaults to -2. v_axis : int The axis corresponding to the variables dimension. Defaults to -1. + ens_w : array_like + Weights assigned to the ensemble members. Array with one less dimension than fct (without the v_axis dimension). + Default is equal weighting. Weights are normalised so that they sum to one across the ensemble members. p : float The order of the Variogram Score. Typical values are 0.5, 1.0 or 2.0. Defaults to 0.5. estimator : str @@ -86,11 +91,21 @@ def vs_ensemble( else: w = B.asarray(w) - if backend == "numba": - estimator_check(estimator, variogram.estimator_gufuncs) - return variogram.estimator_gufuncs[estimator](obs, fct, w, p) + if ens_w is None: + if backend == "numba": + estimator_check(estimator, variogram.estimator_gufuncs) + return variogram.estimator_gufuncs[estimator](obs, fct, w, p) + else: + return variogram.vs(obs, fct, w, p, estimator=estimator, backend=backend) else: - return variogram.vs(obs, fct, w, p, estimator=estimator, backend=backend) + ens_w = multivariate_weight_check(ens_w, fct, m_axis, backend=backend) + if backend == "numba": + estimator_check(estimator, variogram.estimator_gufuncs_w) + return variogram.estimator_gufuncs_w[estimator](obs, fct, w, ens_w, p) + else: + return variogram.vs_w( + obs, fct, w, ens_w, p, estimator=estimator, backend=backend + ) def twvs_ensemble( @@ -101,6 +116,7 @@ def twvs_ensemble( m_axis: int = -2, v_axis: int = -1, *, + ens_w: "Array" = None, p: float = 0.5, estimator: str = "nrg", backend: "Backend" = None, @@ -132,6 +148,9 @@ def twvs_ensemble( The axis corresponding to the ensemble dimension. Defaults to -2. v_axis : int The axis corresponding to the variables dimension. Defaults to -1. + ens_w : array_like + Weights assigned to the ensemble members. Array with one less dimension than fct (without the v_axis dimension). + Default is equal weighting. Weights are normalised so that they sum to one across the ensemble members. p : float The order of the Variogram Score. Typical values are 0.5, 1.0 or 2.0. Defaults to 0.5. estimator : str @@ -163,7 +182,15 @@ def twvs_ensemble( """ obs, fct = mv_weighted_score_chain(obs, fct, v_func) return vs_ensemble( - obs, fct, w, m_axis, v_axis, p=p, estimator=estimator, backend=backend + obs, + fct, + w, + m_axis, + v_axis, + ens_w=ens_w, + p=p, + estimator=estimator, + backend=backend, ) @@ -175,6 +202,7 @@ def owvs_ensemble( m_axis: int = -2, v_axis: int = -1, *, + ens_w: "Array" = None, p: float = 0.5, backend: "Backend" = None, ) -> "Array": @@ -210,6 +238,9 @@ def owvs_ensemble( The axis corresponding to the ensemble dimension. Defaults to -2. v_axis : int The axis corresponding to the variables dimension. Defaults to -1. + ens_w : array_like + Weights assigned to the ensemble members. Array with one less dimension than fct (without the v_axis dimension). + Default is equal weighting. Weights are normalised so that they sum to one across the ensemble members. p : float The order of the Variogram Score. Typical values are 0.5, 1.0 or 2.0. Defaults to 0.5. backend : str @@ -238,10 +269,21 @@ def owvs_ensemble( D = fct.shape[-1] w = B.ones(obs.shape + (D,)) - if backend == "numba": - return variogram.estimator_gufuncs["ownrg"](obs, fct, w, obs_w, fct_w, p) + if ens_w is None: + if backend == "numba": + return variogram.estimator_gufuncs["ownrg"](obs, fct, w, obs_w, fct_w, p) + else: + return variogram.owvs(obs, fct, w, obs_w, fct_w, p=p, backend=backend) else: - return variogram.owvs(obs, fct, w, obs_w, fct_w, p=p, backend=backend) + ens_w = multivariate_weight_check(ens_w, fct, m_axis, backend=backend) + if backend == "numba": + return variogram.estimator_gufuncs_w["ownrg"]( + obs, fct, w, obs_w, fct_w, ens_w, p + ) + else: + return variogram.owvs_w( + obs, fct, w, obs_w, fct_w, ens_w=ens_w, p=p, backend=backend + ) def vrvs_ensemble( @@ -252,6 +294,7 @@ def vrvs_ensemble( m_axis: int = -2, v_axis: int = -1, *, + ens_w: "Array" = None, p: float = 0.5, backend: "Backend" = None, ) -> "Array": @@ -289,6 +332,9 @@ def vrvs_ensemble( The axis corresponding to the ensemble dimension. Defaults to -2. v_axis : int The axis corresponding to the variables dimension. Defaults to -1. + ens_w : array_like + Weights assigned to the ensemble members. Array with one less dimension than fct (without the v_axis dimension). + Default is equal weighting. Weights are normalised so that they sum to one across the ensemble members. p : float The order of the Variogram Score. Typical values are 0.5, 1.0 or 2.0. Defaults to 0.5. backend : str @@ -317,7 +363,18 @@ def vrvs_ensemble( D = fct.shape[-1] w = B.ones(obs.shape + (D,)) - if backend == "numba": - return variogram.estimator_gufuncs["vrnrg"](obs, fct, w, obs_w, fct_w, p) + if ens_w is None: + if backend == "numba": + return variogram.estimator_gufuncs["vrnrg"](obs, fct, w, obs_w, fct_w, p) + else: + return variogram.vrvs(obs, fct, w, obs_w, fct_w, p=p, backend=backend) else: - return variogram.vrvs(obs, fct, w, obs_w, fct_w, p=p, backend=backend) + ens_w = multivariate_weight_check(ens_w, fct, m_axis, backend=backend) + if backend == "numba": + return variogram.estimator_gufuncs_w["vrnrg"]( + obs, fct, w, obs_w, fct_w, ens_w, p + ) + else: + return variogram.vrvs_w( + obs, fct, w, obs_w, fct_w, ens_w=ens_w, p=p, backend=backend + ) diff --git a/scoringrules/core/crps/__init__.py b/scoringrules/core/crps/__init__.py index ddeae70..4702095 100644 --- a/scoringrules/core/crps/__init__.py +++ b/scoringrules/core/crps/__init__.py @@ -1,4 +1,5 @@ from ._approx import ensemble, ow_ensemble, vr_ensemble, quantile_pinball +from ._approx_w import ensemble_w, ow_ensemble_w, vr_ensemble_w from ._closed import ( beta, binomial, @@ -28,14 +29,19 @@ try: from ._gufuncs import estimator_gufuncs, quantile_pinball_gufunc + from ._gufuncs_w import estimator_gufuncs_w except ImportError: estimator_gufuncs = None + estimator_gufuncs_w = None quantile_pinball_gufunc = None __all__ = [ "ensemble", + "ensemble_w", "ow_ensemble", + "ow_ensemble_w", "vr_ensemble", + "vr_ensemble_w", "beta", "binomial", "exponential", @@ -61,6 +67,7 @@ "t", "uniform", "estimator_gufuncs", + "estimator_gufuncs_w", "quantile_pinball", "quantile_pinball_gufunc", ] diff --git a/scoringrules/core/crps/_approx_w.py b/scoringrules/core/crps/_approx_w.py new file mode 100644 index 0000000..8625d99 --- /dev/null +++ b/scoringrules/core/crps/_approx_w.py @@ -0,0 +1,178 @@ +import typing as tp + +from scoringrules.backend import backends + +if tp.TYPE_CHECKING: + from scoringrules.core.typing import Array, ArrayLike, Backend + + +def ensemble_w( + obs: "ArrayLike", + fct: "Array", + ens_w: "Array" = None, + estimator: str = "pwm", + backend: "Backend" = None, +) -> "Array": + """Compute the CRPS for a finite weighted ensemble.""" + if estimator == "nrg": + out = _crps_ensemble_nrg_w(obs, fct, ens_w, backend=backend) + elif estimator == "pwm": + out = _crps_ensemble_pwm_w(obs, fct, ens_w, backend=backend) + elif estimator == "fair": + out = _crps_ensemble_fair_w(obs, fct, ens_w, backend=backend) + elif estimator == "qd": + out = _crps_ensemble_qd_w(obs, fct, ens_w, backend=backend) + elif estimator == "akr": + out = _crps_ensemble_akr_w(obs, fct, ens_w, backend=backend) + elif estimator == "akr_circperm": + out = _crps_ensemble_akr_circperm_w(obs, fct, ens_w, backend=backend) + elif estimator == "int": + out = _crps_ensemble_int_w(obs, fct, ens_w, backend=backend) + else: + raise ValueError( + f"{estimator} not a valid estimator, must be one of 'nrg', 'fair', 'pwm', 'qd', 'akr', 'akr_circperm' and 'int'." + ) + + return out + + +def _crps_ensemble_fair_w( + obs: "Array", fct: "Array", w: "Array", backend: "Backend" = None +) -> "Array": + """Fair version of the CRPS estimator based on the energy form.""" + B = backends.active if backend is None else backends[backend] + e_1 = B.sum(B.abs(obs[..., None] - fct) * w, axis=-1) + e_2 = B.sum( + B.abs(fct[..., None] - fct[..., None, :]) * w[..., None] * w[..., None, :], + axis=(-1, -2), + ) / (1 - B.sum(w * w, axis=-1)) + return e_1 - 0.5 * e_2 + + +def _crps_ensemble_nrg_w( + obs: "Array", fct: "Array", w: "Array", backend: "Backend" = None +) -> "Array": + """CRPS estimator based on the energy form.""" + B = backends.active if backend is None else backends[backend] + e_1 = B.sum(B.abs(obs[..., None] - fct) * w, axis=-1) + e_2 = B.sum( + B.abs(fct[..., None] - fct[..., None, :]) * w[..., None] * w[..., None, :], + (-1, -2), + ) + return e_1 - 0.5 * e_2 + + +def _crps_ensemble_pwm_w( + obs: "Array", fct: "Array", w: "Array", backend: "Backend" = None +) -> "Array": + """CRPS estimator based on the probability weighted moment (PWM) form.""" + B = backends.active if backend is None else backends[backend] + w_sum = B.cumsum(w, axis=-1) + expected_diff = B.sum(B.abs(obs[..., None] - fct) * w, axis=-1) + β_0 = B.sum(fct * w, axis=-1) + β_1 = B.sum(fct * w * (w_sum - w), axis=-1) / (1 - B.sum(w * w, axis=-1)) + return expected_diff + β_0 - 2.0 * β_1 + + +def _crps_ensemble_qd_w( + obs: "Array", fct: "Array", w: "Array", backend: "Backend" = None +) -> "Array": + """CRPS estimator based on the quantile score decomposition.""" + B = backends.active if backend is None else backends[backend] + w_sum = B.cumsum(w, axis=-1) + a = w_sum - 0.5 * w + dif = fct - obs[..., None] + c = B.where(dif > 0, 1 - a, -a) + s = B.sum(w * c * dif, axis=-1) + return 2 * s + + +def _crps_ensemble_akr_w( + obs: "Array", fct: "Array", w: "Array", backend: "Backend" = None +) -> "Array": + """CRPS estimator based on the approximate kernel representation.""" + B = backends.active if backend is None else backends[backend] + M = fct.shape[-1] + e_1 = B.sum(B.abs(obs[..., None] - fct) * w, axis=-1) + ind = [(i + 1) % M for i in range(M)] + e_2 = B.sum(B.abs(fct[..., ind] - fct) * w[..., ind], axis=-1) + return e_1 - 0.5 * e_2 + + +def _crps_ensemble_akr_circperm_w( + obs: "Array", fct: "Array", w: "Array", backend: "Backend" = None +) -> "Array": + """CRPS estimator based on the AKR with cyclic permutation.""" + B = backends.active if backend is None else backends[backend] + M = fct.shape[-1] + e_1 = B.sum(B.abs(obs[..., None] - fct) * w, axis=-1) + shift = int((M - 1) / 2) + ind = [(i + shift) % M for i in range(M)] + e_2 = B.sum(B.abs(fct[..., ind] - fct) * w[..., ind], axis=-1) + return e_1 - 0.5 * e_2 + + +def _crps_ensemble_int_w( + obs: "Array", fct: "Array", w: "Array", backend: "Backend" = None +) -> "Array": + """CRPS estimator based on the integral representation.""" + B = backends.active if backend is None else backends[backend] + M: int = fct.shape[-1] + y_pos = B.sum((fct <= obs[..., None]) * w, axis=-1, keepdims=True) + fct_cdf = B.cumsum(w, axis=-1) + fct_cdf = B.concat((fct_cdf, y_pos), axis=-1) + fct_cdf = B.sort(fct_cdf, axis=-1) + fct_exp = B.concat((fct, obs[..., None]), axis=-1) + fct_exp = B.sort(fct_exp, axis=-1) + fct_dif = fct_exp[..., 1:] - fct_exp[..., :M] + obs_cdf = (obs[..., None] <= fct_exp) * 1.0 + out = fct_dif * (fct_cdf[..., :M] - obs_cdf[..., :M]) ** 2 + return B.sum(out, axis=-1) + + +def ow_ensemble_w( + obs: "Array", + fct: "Array", + ow: "Array", + fw: "Array", + ens_w: "Array", + backend: "Backend" = None, +) -> "Array": + """Outcome-Weighted CRPS for an ensemble forecast.""" + B = backends.active if backend is None else backends[backend] + wbar = B.sum(ens_w * fw, axis=-1) + e_1 = B.sum(ens_w * B.abs(obs[..., None] - fct) * fw, axis=-1) * ow / wbar + e_2 = B.sum( + ens_w[..., None] + * ens_w[..., None, :] + * B.abs(fct[..., None] - fct[..., None, :]) + * fw[..., None] + * fw[..., None, :], + axis=(-1, -2), + ) + e_2 *= ow / (wbar**2) + return e_1 - 0.5 * e_2 + + +def vr_ensemble_w( + obs: "Array", + fct: "Array", + ow: "Array", + fw: "Array", + ens_w: "Array", + backend: "Backend" = None, +) -> "Array": + """Vertically Re-scaled CRPS for an ensemble forecast.""" + B = backends.active if backend is None else backends[backend] + e_1 = B.sum(ens_w * B.abs(obs[..., None] - fct) * fw, axis=-1) * ow + e_2 = B.sum( + ens_w[..., None] + * ens_w[..., None, :] + * B.abs(fct[..., None] - fct[..., None, :]) + * fw[..., None] + * fw[..., None, :], + axis=(-1, -2), + ) + e_3 = B.sum(ens_w * B.abs(fct) * fw, axis=-1) - B.abs(obs) * ow + e_3 *= B.sum(ens_w * fw, axis=-1) - ow + return e_1 - 0.5 * e_2 + e_3 diff --git a/scoringrules/core/crps/_gufuncs_w.py b/scoringrules/core/crps/_gufuncs_w.py new file mode 100644 index 0000000..24d1c98 --- /dev/null +++ b/scoringrules/core/crps/_gufuncs_w.py @@ -0,0 +1,257 @@ +import numpy as np +from numba import guvectorize + +from scoringrules.core.utils import lazy_gufunc_wrapper_uv + +INV_SQRT_PI = 1 / np.sqrt(np.pi) +EPSILON = 1e-6 + + +@guvectorize("(),(n),(n)->()") +def _crps_ensemble_akr_w_gufunc( + obs: np.ndarray, fct: np.ndarray, w: np.ndarray, out: np.ndarray +): + """CRPS estimaton based on the approximate kernel representation.""" + M = fct.shape[-1] + e_1 = 0 + e_2 = 0 + for i, forecast in enumerate(fct): + if i == 0: + i = M - 1 + e_1 += abs(forecast - obs) * w[i] + e_2 += abs(forecast - fct[i - 1]) * w[i] + out[0] = e_1 - 0.5 * e_2 + + +@guvectorize("(),(n),(n)->()") +def _crps_ensemble_akr_circperm_w_gufunc( + obs: np.ndarray, fct: np.ndarray, w: np.ndarray, out: np.ndarray +): + """CRPS estimaton based on the AKR with cyclic permutation.""" + M = fct.shape[-1] + e_1 = 0.0 + e_2 = 0.0 + for i, forecast in enumerate(fct): + sigma_i = int((i + 1 + ((M - 1) / 2)) % M) + e_1 += abs(forecast - obs) * w[i] + e_2 += abs(forecast - fct[sigma_i]) * w[i] + out[0] = e_1 - 0.5 * e_2 + + +@guvectorize("(),(n),(n)->()") +def _crps_ensemble_fair_w_gufunc( + obs: np.ndarray, fct: np.ndarray, w: np.ndarray, out: np.ndarray +): + """Fair version of the CRPS estimator based on the energy form.""" + M = fct.shape[-1] + + if np.isnan(obs): + out[0] = np.nan + return + + e_1 = 0 + e_2 = 0 + + for i in range(M): + e_1 += abs(fct[i] - obs) * w[i] + for j in range(i + 1, M): + e_2 += abs(fct[j] - fct[i]) * w[j] * w[i] + + fair_c = 1 - np.sum(w**2) + + out[0] = e_1 - e_2 / fair_c + + +@guvectorize("(),(n),(n)->()") +def _crps_ensemble_int_w_gufunc( + obs: np.ndarray, fct: np.ndarray, w: np.ndarray, out: np.ndarray +): + """CRPS estimator based on the integral form.""" + + if np.isnan(obs): + out[0] = np.nan + return + + obs_cdf = 0 + forecast_cdf = 0.0 + prev_forecast = 0.0 + integral = 0.0 + + for n, forecast in enumerate(fct): + if np.isnan(forecast): + if n == 0: + integral = np.nan + forecast = prev_forecast # noqa: PLW2901 + break + + if obs_cdf == 0 and obs < forecast: + # this correctly handles the transition point of the obs CDF + integral += (obs - prev_forecast) * forecast_cdf**2 + integral += (forecast - obs) * (forecast_cdf - 1) ** 2 + obs_cdf = 1 + else: + integral += (forecast_cdf - obs_cdf) ** 2 * (forecast - prev_forecast) + + forecast_cdf += w[n] + prev_forecast = forecast + + if obs_cdf == 0: + integral += obs - forecast + + out[0] = integral + + +@guvectorize("(),(n),(n)->()") +def _crps_ensemble_nrg_w_gufunc( + obs: np.ndarray, fct: np.ndarray, w: np.ndarray, out: np.ndarray +): + """CRPS estimator based on the energy form.""" + M = fct.shape[-1] + + if np.isnan(obs): + out[0] = np.nan + return + + e_1 = 0 + e_2 = 0 + + for i in range(M): + e_1 += abs(fct[i] - obs) * w[i] + for j in range(i + 1, M): + e_2 += abs(fct[j] - fct[i]) * w[j] * w[i] + + out[0] = e_1 - e_2 + + +@guvectorize("(),(n),(n)->()") +def _crps_ensemble_pwm_w_gufunc( + obs: np.ndarray, fct: np.ndarray, w: np.ndarray, out: np.ndarray +): + """CRPS estimator based on the probability weighted moment (PWM) form.""" + + if np.isnan(obs): + out[0] = np.nan + return + + expected_diff = 0.0 + β_0 = 0.0 + β_1 = 0.0 + + w_sum = np.cumsum(w) + + for i, forecast in enumerate(fct): + expected_diff += np.abs(forecast - obs) * w[i] + β_0 += forecast * w[i] + β_1 += forecast * w[i] * (w_sum[i] - w[i]) + + fair_c = 1 - np.sum(w**2) + + out[0] = expected_diff + β_0 - 2 * β_1 / fair_c + + +@guvectorize("(),(n),(n)->()") +def _crps_ensemble_qd_w_gufunc( + obs: np.ndarray, fct: np.ndarray, w: np.ndarray, out: np.ndarray +): + """CRPS estimator based on the quantile decomposition form.""" + + if np.isnan(obs): + out[0] = np.nan + return + + obs_cdf = 0.0 + integral = 0.0 + a = np.cumsum(w) - w / 2 + + for i, forecast in enumerate(fct): + if obs < forecast: + obs_cdf = 1.0 + + integral += w[i] * (obs_cdf - a[i]) * (forecast - obs) + + out[0] = 2 * integral + + +@guvectorize("(),(n),(),(n),(n)->()") +def _owcrps_ensemble_nrg_w_gufunc( + obs: np.ndarray, + fct: np.ndarray, + ow: np.ndarray, + fw: np.ndarray, + ens_w: np.ndarray, + out: np.ndarray, +): + """Outcome-weighted CRPS estimator based on the energy form.""" + M = fct.shape[-1] + + if np.isnan(obs): + out[0] = np.nan + return + + e_1 = 0.0 + e_2 = 0.0 + + for i in range(M): + e_1 += ens_w[i] * abs(fct[i] - obs) * fw[i] * ow + for j in range(i + 1, M): + e_2 += 2 * ens_w[i] * ens_w[j] * abs(fct[i] - fct[j]) * fw[i] * fw[j] * ow + + wbar = np.sum(ens_w * fw) + + out[0] = e_1 / wbar - 0.5 * e_2 / (wbar**2) + + +@guvectorize("(),(n),(),(n),(n)->()") +def _vrcrps_ensemble_nrg_w_gufunc( + obs: np.ndarray, + fct: np.ndarray, + ow: np.ndarray, + fw: np.ndarray, + ens_w: np.ndarray, + out: np.ndarray, +): + """Vertically re-scaled CRPS estimator based on the energy form.""" + M = fct.shape[-1] + + if np.isnan(obs): + out[0] = np.nan + return + + e_1 = 0.0 + e_2 = 0.0 + + for i in range(M): + e_1 += ens_w[i] * abs(fct[i] - obs) * fw[i] * ow + for j in range(i + 1, M): + e_2 += 2 * ens_w[i] * ens_w[j] * abs(fct[i] - fct[j]) * fw[i] * fw[j] + + wbar = np.sum(ens_w * fw) + wabs_x = np.sum(ens_w * np.abs(fct) * fw) + wabs_y = abs(obs) * ow + + out[0] = e_1 - 0.5 * e_2 + (wabs_x - wabs_y) * (wbar - ow) + + +estimator_gufuncs_w = { + "akr_circperm": lazy_gufunc_wrapper_uv(_crps_ensemble_akr_circperm_w_gufunc), + "akr": lazy_gufunc_wrapper_uv(_crps_ensemble_akr_w_gufunc), + "fair": lazy_gufunc_wrapper_uv(_crps_ensemble_fair_w_gufunc), + "int": lazy_gufunc_wrapper_uv(_crps_ensemble_int_w_gufunc), + "nrg": lazy_gufunc_wrapper_uv(_crps_ensemble_nrg_w_gufunc), + "pwm": lazy_gufunc_wrapper_uv(_crps_ensemble_pwm_w_gufunc), + "qd": lazy_gufunc_wrapper_uv(_crps_ensemble_qd_w_gufunc), + "ownrg": lazy_gufunc_wrapper_uv(_owcrps_ensemble_nrg_w_gufunc), + "vrnrg": lazy_gufunc_wrapper_uv(_vrcrps_ensemble_nrg_w_gufunc), +} + +__all__ = [ + "_crps_ensemble_akr_circperm_w_gufunc", + "_crps_ensemble_akr_w_gufunc", + "_crps_ensemble_fair_w_gufunc", + "_crps_ensemble_int_w_gufunc", + "_crps_ensemble_nrg_w_gufunc", + "_crps_ensemble_pwm_w_gufunc", + "_crps_ensemble_qd_w_gufunc", + "_owcrps_ensemble_nrg_w_gufunc", + "_vrcrps_ensemble_nrg_w_gufunc", +] diff --git a/scoringrules/core/energy/__init__.py b/scoringrules/core/energy/__init__.py index 6144c79..6c09a5e 100644 --- a/scoringrules/core/energy/__init__.py +++ b/scoringrules/core/energy/__init__.py @@ -1,15 +1,25 @@ -from ._score import es_ensemble as es -from ._score import owes_ensemble as owes -from ._score import vres_ensemble as vres - try: from ._gufuncs import estimator_gufuncs + from ._gufuncs_w import estimator_gufuncs_w except ImportError: estimator_gufuncs = None + estimator_gufuncs_w = None + +from ._score import es_ensemble as es +from ._score import owes_ensemble as owes +from ._score import vres_ensemble as vres + +from ._score_w import es_ensemble_w as es_w +from ._score_w import owes_ensemble_w as owes_w +from ._score_w import vres_ensemble_w as vres_w __all__ = [ "es", + "es_w", "owes", + "owes_w", "vres", + "vres_w", "estimator_gufuncs", + "estimator_gufuncs_w", ] diff --git a/scoringrules/core/energy/_gufuncs_w.py b/scoringrules/core/energy/_gufuncs_w.py new file mode 100644 index 0000000..7553a66 --- /dev/null +++ b/scoringrules/core/energy/_gufuncs_w.py @@ -0,0 +1,161 @@ +import numpy as np +from numba import guvectorize + +from scoringrules.core.utils import lazy_gufunc_wrapper_mv + + +@guvectorize("(d),(m,d),(m)->()") +def _energy_score_nrg_gufunc_w( + obs: np.ndarray, + fct: np.ndarray, + ens_w: np.ndarray, + out: np.ndarray, +): + """Compute the Energy Score for a finite ensemble.""" + M = fct.shape[0] + + e_1 = 0.0 + e_2 = 0.0 + for i in range(M): + e_1 += float(np.linalg.norm(fct[i] - obs)) * ens_w[i] + for j in range(i + 1, M): + e_2 += 2 * float(np.linalg.norm(fct[i] - fct[j])) * ens_w[i] * ens_w[j] + + out[0] = e_1 - 0.5 * e_2 + + +@guvectorize("(d),(m,d),(m)->()") +def _energy_score_fair_gufunc_w( + obs: np.ndarray, + fct: np.ndarray, + ens_w: np.ndarray, + out: np.ndarray, +): + """Compute the fair Energy Score for a finite ensemble.""" + M = fct.shape[0] + + e_1 = 0.0 + e_2 = 0.0 + for i in range(M): + e_1 += float(np.linalg.norm(fct[i] - obs)) * ens_w[i] + for j in range(i + 1, M): + e_2 += 2 * float(np.linalg.norm(fct[i] - fct[j])) * ens_w[i] * ens_w[j] + + fair_c = 1 - np.sum(ens_w * ens_w) + + out[0] = e_1 - 0.5 * e_2 / fair_c + + +@guvectorize("(d),(m,d),(m)->()") +def _energy_score_akr_gufunc_w( + obs: np.ndarray, fct: np.ndarray, ens_w: np.ndarray, out: np.ndarray +): + """Compute the Energy Score for a finite ensemble using the approximate kernel representation.""" + M = fct.shape[0] + + e_1 = 0.0 + e_2 = 0.0 + for i in range(M): + e_1 += float(np.linalg.norm(fct[i] - obs)) * ens_w[i] + e_2 += float(np.linalg.norm(fct[i] - fct[i - 1])) * ens_w[i] + + out[0] = e_1 - 0.5 * e_2 + + +@guvectorize("(d),(m,d),(m)->()") +def _energy_score_akr_circperm_gufunc_w( + obs: np.ndarray, fct: np.ndarray, ens_w: np.ndarray, out: np.ndarray +): + """Compute the Energy Score for a finite ensemble using the AKR with cyclic permutation.""" + M = fct.shape[0] + + e_1 = 0.0 + e_2 = 0.0 + for i in range(M): + sigma_i = int((i + (M // 2)) % M) + e_1 += float(np.linalg.norm(fct[i] - obs)) * ens_w[i] + e_2 += float(np.linalg.norm(fct[i] - fct[sigma_i])) * ens_w[i] + + out[0] = e_1 - 0.5 * e_2 + + +@guvectorize("(d),(m,d),(),(m),(m)->()") +def _owenergy_score_gufunc_w( + obs: np.ndarray, + fct: np.ndarray, + ow: np.ndarray, + fw: np.ndarray, + ens_w: np.ndarray, + out: np.ndarray, +): + """Compute the Outcome-Weighted Energy Score for a finite ensemble.""" + M = fct.shape[0] + ow = ow[0] + + e_1 = 0.0 + e_2 = 0.0 + for i in range(M): + e_1 += float(np.linalg.norm(fct[i] - obs) * fw[i] * ow) * ens_w[i] + for j in range(i + 1, M): + e_2 += ( + 2 + * float(np.linalg.norm(fct[i] - fct[j]) * fw[i] * fw[j] * ow) + * ens_w[i] + * ens_w[j] + ) + + wbar = np.mean(fw) + + out[0] = e_1 / (wbar) - 0.5 * e_2 / (wbar**2) + + +@guvectorize("(d),(m,d),(),(m),(m)->()") +def _vrenergy_score_gufunc_w( + obs: np.ndarray, + fct: np.ndarray, + ow: np.ndarray, + fw: np.ndarray, + ens_w: np.ndarray, + out: np.ndarray, +): + """Compute the Vertically Re-scaled Energy Score for a finite ensemble.""" + M = fct.shape[0] + ow = ow[0] + + e_1 = 0.0 + e_2 = 0.0 + wabs_x = 0.0 + for i in range(M): + e_1 += float(np.linalg.norm(fct[i] - obs) * fw[i] * ow) * ens_w[i] + wabs_x += np.linalg.norm(fct[i]) * fw[i] * ens_w[i] + for j in range(i + 1, M): + e_2 += ( + 2 + * float(np.linalg.norm(fct[i] - fct[j]) * fw[i] * fw[j]) + * ens_w[i] + * ens_w[j] + ) + + wbar = np.sum(fw * ens_w) + wabs_y = np.linalg.norm(obs) * ow + + out[0] = e_1 - 0.5 * e_2 + (wabs_x - wabs_y) * (wbar - ow) + + +estimator_gufuncs_w = { + "akr_circperm": lazy_gufunc_wrapper_mv(_energy_score_akr_circperm_gufunc_w), + "akr": lazy_gufunc_wrapper_mv(_energy_score_akr_gufunc_w), + "fair": lazy_gufunc_wrapper_mv(_energy_score_fair_gufunc_w), + "nrg": lazy_gufunc_wrapper_mv(_energy_score_nrg_gufunc_w), + "ownrg": lazy_gufunc_wrapper_mv(_owenergy_score_gufunc_w), + "vrnrg": lazy_gufunc_wrapper_mv(_vrenergy_score_gufunc_w), +} + +__all__ = [ + "_energy_score_akr_circperm_gufunc_w", + "_energy_score_akr_gufunc_w", + "_energy_score_fair_gufunc_w", + "_energy_score_nrg_gufunc_w", + "_owenergy_score_gufunc_w", + "_vrenergy_score_gufunc_w", +] diff --git a/scoringrules/core/energy/_score_w.py b/scoringrules/core/energy/_score_w.py new file mode 100644 index 0000000..2d5d9a9 --- /dev/null +++ b/scoringrules/core/energy/_score_w.py @@ -0,0 +1,159 @@ +import typing as tp + +from scoringrules.backend import backends + +if tp.TYPE_CHECKING: + from scoringrules.core.typing import Array, Backend + + +def es_ensemble_w( + obs: "Array", fct: "Array", ens_w: "Array", estimator: str = "nrg", backend=None +) -> "Array": + """ + Compute the energy score based on a finite ensemble. + + The ensemble and variables axes are on the second last and last dimensions respectively. + """ + if estimator == "nrg": + out = _es_ensemble_nrg_w(obs, fct, ens_w, backend=backend) + elif estimator == "fair": + out = _es_ensemble_fair_w(obs, fct, ens_w, backend=backend) + elif estimator == "akr": + out = _es_ensemble_akr_w(obs, fct, ens_w, backend=backend) + elif estimator == "akr_circperm": + out = _es_ensemble_akr_circperm_w(obs, fct, ens_w, backend=backend) + else: + raise ValueError( + f"For the energy score, {estimator} must be one of 'nrg', 'fair', 'akr', and 'akr_circperm'." + ) + + return out + + +def _es_ensemble_nrg_w( + obs: "Array", fct: "Array", ens_w: "Array", backend=None +) -> "Array": + """ + Compute the energy score based on a finite ensemble with the energy estimator. + """ + B = backends.active if backend is None else backends[backend] + + err_norm = B.norm(fct - B.expand_dims(obs, -2), -1) + E_1 = B.sum(err_norm * ens_w, -1) + + spread_norm = B.norm(B.expand_dims(fct, -3) - B.expand_dims(fct, -2), -1) + E_2 = B.sum( + spread_norm * B.expand_dims(ens_w, -1) * B.expand_dims(ens_w, -2), (-2, -1) + ) + + return E_1 - 0.5 * E_2 + + +def _es_ensemble_fair_w( + obs: "Array", fct: "Array", ens_w: "Array", backend=None +) -> "Array": + """ + Compute the energy score based on a finite ensemble with the fair estimator. + """ + B = backends.active if backend is None else backends[backend] + + err_norm = B.norm(fct - B.expand_dims(obs, -2), -1) + E_1 = B.sum(err_norm * ens_w, -1) + + spread_norm = B.norm(B.expand_dims(fct, -3) - B.expand_dims(fct, -2), -1) + E_2 = B.sum( + spread_norm * B.expand_dims(ens_w, -1) * B.expand_dims(ens_w, -2), (-2, -1) + ) + + fair_c = 1 - B.sum(ens_w * ens_w, axis=-1) + + return E_1 - 0.5 * E_2 / fair_c + + +def _es_ensemble_akr_w( + obs: "Array", fct: "Array", ens_w: "Array", backend: "Backend" = None +) -> "Array": + """Compute the Energy Score for a finite ensemble using the approximate kernel representation.""" + B = backends.active if backend is None else backends[backend] + + err_norm = B.norm(fct - B.expand_dims(obs, -2), -1) + E_1 = B.sum(err_norm * ens_w, -1) + + spread_norm = B.norm(fct - B.roll(fct, shift=1, axis=-2), -1) + E_2 = B.sum(spread_norm * ens_w, -1) + + return E_1 - 0.5 * E_2 + + +def _es_ensemble_akr_circperm_w( + obs: "Array", fct: "Array", ens_w: "Array", backend: "Backend" = None +) -> "Array": + """Compute the Energy Score for a finite ensemble using the AKR with cyclic permutation.""" + B = backends.active if backend is None else backends[backend] + M: int = fct.shape[-2] + + err_norm = B.norm(fct - B.expand_dims(obs, -2), -1) + E_1 = B.sum(err_norm * ens_w, -1) + + shift = M // 2 + spread_norm = B.norm(fct - B.roll(fct, shift=shift, axis=-2), -1) + E_2 = B.sum(spread_norm * ens_w, -1) + + return E_1 - 0.5 * E_2 + + +def owes_ensemble_w( + obs: "Array", # (... D) + fct: "Array", # (... M D) + ow: "Array", # (...) + fw: "Array", # (... M) + ens_w: "Array", # (... M) + backend: "Backend" = None, +) -> "Array": + """Compute the outcome-weighted energy score based on a finite ensemble.""" + B = backends.active if backend is None else backends[backend] + wbar = B.sum(fw * ens_w, -1) + + err_norm = B.norm(fct - B.expand_dims(obs, -2), -1) # (... M) + E_1 = B.sum(err_norm * fw * B.expand_dims(ow, -1) * ens_w, -1) / wbar # (...) + + spread_norm = B.norm( + B.expand_dims(fct, -2) - B.expand_dims(fct, -3), -1 + ) # (... M M) + fw_prod = B.expand_dims(fw, -1) * B.expand_dims(fw, -2) # (... M M) + spread_norm *= fw_prod * B.expand_dims(ow, (-2, -1)) # (... M M) + E_2 = B.sum( + spread_norm * B.expand_dims(ens_w, -1) * B.expand_dims(ens_w, -2), (-2, -1) + ) / (wbar**2) # (...) + + return E_1 - 0.5 * E_2 + + +def vres_ensemble_w( + obs: "Array", + fct: "Array", + ow: "Array", + fw: "Array", + ens_w: "Array", + backend: "Backend" = None, +) -> "Array": + """Compute the vertically re-scaled energy score based on a finite ensemble.""" + B = backends.active if backend is None else backends[backend] + wbar = B.sum(fw * ens_w, -1) + + err_norm = B.norm(fct - B.expand_dims(obs, -2), -1) # (... M) + err_norm *= fw * B.expand_dims(ow, -1) # (... M) + E_1 = B.sum(err_norm * ens_w, -1) # (...) + + spread_norm = B.norm( + B.expand_dims(fct, -2) - B.expand_dims(fct, -3), -1 + ) # (... M M) + fw_prod = B.expand_dims(fw, -2) * B.expand_dims(fw, -1) # (... M M) + E_2 = B.sum( + spread_norm * fw_prod * B.expand_dims(ens_w, -1) * B.expand_dims(ens_w, -2), + (-2, -1), + ) # (...) + + rhobar = B.sum(B.norm(fct, -1) * fw * ens_w, -1) # (...) + E_3 = (rhobar - B.norm(obs, -1) * ow) * (wbar - ow) # (...) + return E_1 - 0.5 * E_2 + E_3 diff --git a/scoringrules/core/kernels/__init__.py b/scoringrules/core/kernels/__init__.py index 2d02ba9..66e42f7 100644 --- a/scoringrules/core/kernels/__init__.py +++ b/scoringrules/core/kernels/__init__.py @@ -7,11 +7,23 @@ vr_ensemble_mv, ) +from ._approx_w import ( + ensemble_uv_w, + ow_ensemble_uv_w, + vr_ensemble_uv_w, + ensemble_mv_w, + ow_ensemble_mv_w, + vr_ensemble_mv_w, +) + try: from ._gufuncs import estimator_gufuncs_uv, estimator_gufuncs_mv + from ._gufuncs_w import estimator_gufuncs_uv_w, estimator_gufuncs_mv_w except ImportError: estimator_gufuncs_uv = None estimator_gufuncs_mv = None + estimator_gufuncs_uv_w = None + estimator_gufuncs_mv_w = None __all__ = [ @@ -21,6 +33,14 @@ "ensemble_mv", "ow_ensemble_mv", "vr_ensemble_mv", - "estimator_gufuncs", + "ensemble_uv_w", + "ow_ensemble_uv_w", + "vr_ensemble_uv_w", + "ensemble_mv_w", + "ow_ensemble_mv_w", + "vr_ensemble_mv_w", + "estimator_gufuncs_uv", "estimator_gufuncs_mv", + "estimator_gufuncs_uv_w", + "estimator_gufuncs_mv_w", ] diff --git a/scoringrules/core/kernels/_approx_w.py b/scoringrules/core/kernels/_approx_w.py new file mode 100644 index 0000000..bf71c38 --- /dev/null +++ b/scoringrules/core/kernels/_approx_w.py @@ -0,0 +1,371 @@ +import typing as tp + +from scoringrules.backend import backends + +if tp.TYPE_CHECKING: + from scoringrules.core.typing import Array, ArrayLike, Backend + + +def gauss_kern_uv( + x1: "ArrayLike", x2: "ArrayLike", backend: "Backend" = None +) -> "Array": + """Compute the gaussian kernel evaluated at x1 and x2.""" + B = backends.active if backend is None else backends[backend] + return B.exp(-0.5 * (x1 - x2) ** 2) + + +def gauss_kern_mv( + x1: "ArrayLike", x2: "ArrayLike", backend: "Backend" = None +) -> "Array": + """Compute the gaussian kernel evaluated at vectors x1 and x2.""" + B = backends.active if backend is None else backends[backend] + return B.exp(-0.5 * B.norm(x1 - x2, -1) ** 2) + + +def ensemble_uv_w( + obs: "ArrayLike", + fct: "Array", + ens_w: "Array", + estimator: str = "nrg", + backend: "Backend" = None, +) -> "Array": + """Compute a kernel score for a finite ensemble.""" + if estimator == "nrg": + out = _ks_ensemble_nrg_uv_w(obs, fct, ens_w, backend=backend) + elif estimator == "fair": + out = _ks_ensemble_fair_uv_w(obs, fct, ens_w, backend=backend) + elif estimator == "akr": + out = _ks_ensemble_akr_uv_w(obs, fct, ens_w, backend=backend) + elif estimator == "akr_circperm_w": + out = _ks_ensemble_akr_circperm_uv_w(obs, fct, ens_w, backend=backend) + else: + raise ValueError( + f"{estimator} not a valid estimator, must be one of 'nrg', 'fair', 'akr', and 'akr_circperm'." + ) + + return out + + +def _ks_ensemble_nrg_uv_w( + obs: "ArrayLike", + fct: "Array", + ens_w: "Array", + backend: "Backend" = None, +) -> "Array": + """Energy estimator of the kernel score.""" + B = backends.active if backend is None else backends[backend] + + e_1 = B.sum(gauss_kern_uv(obs[..., None], fct, backend=backend) * ens_w, axis=-1) + e_2 = B.sum( + gauss_kern_uv(fct[..., None], fct[..., None, :], backend=backend) + * ens_w[..., None] + * ens_w[..., None, :], + axis=(-1, -2), + ) + e_3 = gauss_kern_uv(obs, obs) + + out = e_1 - 0.5 * e_2 - 0.5 * e_3 + return -out + + +def _ks_ensemble_fair_uv_w( + obs: "ArrayLike", + fct: "Array", + ens_w: "Array", + backend: "Backend" = None, +) -> "Array": + """Fair estimator of the kernel score.""" + B = backends.active if backend is None else backends[backend] + + e_1 = B.sum(gauss_kern_uv(obs[..., None], fct, backend=backend) * ens_w, axis=-1) + e_2 = B.sum( + gauss_kern_uv(fct[..., None], fct[..., None, :], backend=backend) + * ens_w[..., None] + * ens_w[..., None, :], + axis=(-1, -2), + ) + e_3 = gauss_kern_uv(obs, obs) + + fair_c = 1 - B.sum(ens_w * ens_w, axis=-1) + + out = e_1 - 0.5 * e_2 / fair_c - 0.5 * e_3 + return -out + + +def _ks_ensemble_akr_uv_w( + obs: "ArrayLike", fct: "Array", ens_w: "Array", backend: "Backend" = None +) -> "Array": + """Approximate kernel representation estimator of the kernel score.""" + B = backends.active if backend is None else backends[backend] + + e_1 = B.sum(gauss_kern_uv(obs[..., None], fct, backend=backend) * ens_w, axis=-1) + e_2 = B.sum( + gauss_kern_uv(fct, B.roll(fct, shift=1, axis=-1), backend=backend) * ens_w, + axis=-1, + ) + e_3 = gauss_kern_uv(obs, obs, backend=backend) + + out = e_1 - 0.5 * e_2 - 0.5 * e_3 + + return -out + + +def _ks_ensemble_akr_circperm_uv_w( + obs: "ArrayLike", fct: "Array", ens_w: "Array", backend: "Backend" = None +) -> "Array": + """AKR estimator of the kernel score with cyclic permutation.""" + B = backends.active if backend is None else backends[backend] + M: int = fct.shape[-1] + e_1 = B.sum(gauss_kern_uv(obs[..., None], fct, backend=backend) * ens_w, axis=-1) + shift = M // 2 + e_2 = B.sum( + gauss_kern_uv(fct, B.roll(fct, shift=shift, axis=-1), backend=backend) * ens_w, + axis=-1, + ) + e_3 = gauss_kern_uv(obs, obs, backend=backend) + + out = e_1 - 0.5 * e_2 - 0.5 * e_3 + + return -out + + +def ensemble_mv_w( + obs: "ArrayLike", + fct: "Array", + ens_w: "Array", + estimator: str = "nrg", + backend: "Backend" = None, +) -> "Array": + """Compute a kernel score for a finite multivariate ensemble.""" + if estimator == "nrg": + out = _ks_ensemble_nrg_mv_w(obs, fct, ens_w, backend=backend) + elif estimator == "fair": + out = _ks_ensemble_fair_mv_w(obs, fct, ens_w, backend=backend) + elif estimator == "akr": + out = _ks_ensemble_akr_mv_w(obs, fct, ens_w, backend=backend) + elif estimator == "akr_circperm": + out = _ks_ensemble_akr_circperm_mv_w(obs, fct, ens_w, backend=backend) + else: + raise ValueError( + f"{estimator} not a valid estimator, must be one of 'nrg', 'fair', 'akr', and 'akr_circperm'." + ) + + return out + + +def _ks_ensemble_nrg_mv_w( + obs: "ArrayLike", + fct: "Array", + ens_w: "Array", + backend: "Backend" = None, +) -> "Array": + """Energy estimator of the kernel score.""" + B = backends.active if backend is None else backends[backend] + + e_1 = B.sum( + ens_w * gauss_kern_mv(B.expand_dims(obs, -2), fct, backend=backend), axis=-1 + ) + e_2 = B.sum( + gauss_kern_mv(B.expand_dims(fct, -3), B.expand_dims(fct, -2), backend=backend) + * B.expand_dims(ens_w, -1) + * B.expand_dims(ens_w, -2), + axis=(-2, -1), + ) + e_3 = gauss_kern_mv(obs, obs) + + out = e_1 - 0.5 * e_2 - 0.5 * e_3 + return -out + + +def _ks_ensemble_fair_mv_w( + obs: "ArrayLike", + fct: "Array", + ens_w: "Array", + backend: "Backend" = None, +) -> "Array": + """Fair estimator of the kernel score.""" + B = backends.active if backend is None else backends[backend] + + e_1 = B.sum( + ens_w * gauss_kern_mv(B.expand_dims(obs, -2), fct, backend=backend), axis=-1 + ) + e_2 = B.sum( + gauss_kern_mv(B.expand_dims(fct, -3), B.expand_dims(fct, -2), backend=backend) + * B.expand_dims(ens_w, -1) + * B.expand_dims(ens_w, -2), + axis=(-2, -1), + ) + e_3 = gauss_kern_mv(obs, obs) + + fair_c = 1 - B.sum(ens_w * ens_w, axis=-1) + + out = e_1 - 0.5 * e_2 / fair_c - 0.5 * e_3 + return -out + + +def _ks_ensemble_akr_mv_w( + obs: "ArrayLike", fct: "Array", ens_w: "Array", backend: "Backend" = None +) -> "Array": + B = backends.active if backend is None else backends[backend] + + e_1 = B.sum( + gauss_kern_mv(B.expand_dims(obs, -2), fct, backend=backend) * ens_w, axis=-1 + ) + e_2 = B.sum( + gauss_kern_mv(fct, B.roll(fct, shift=1, axis=-2), backend=backend) * ens_w, + axis=-1, + ) + e_3 = gauss_kern_mv(obs, obs, backend=backend) + + out = e_1 - 0.5 * e_2 - 0.5 * e_3 + + return -out + + +def _ks_ensemble_akr_circperm_mv_w( + obs: "ArrayLike", fct: "Array", ens_w: "Array", backend: "Backend" = None +) -> "Array": + B = backends.active if backend is None else backends[backend] + M: int = fct.shape[-2] + + e_1 = B.sum( + gauss_kern_mv(B.expand_dims(obs, -2), fct, backend=backend) * ens_w, axis=-1 + ) + shift = M // 2 + e_2 = B.sum( + gauss_kern_mv(fct, B.roll(fct, shift=shift, axis=-2), backend=backend) * ens_w, + axis=-1, + ) + e_3 = gauss_kern_mv(obs, obs, backend=backend) + + out = e_1 - 0.5 * e_2 - 0.5 * e_3 + + return -out + + +def ow_ensemble_uv_w( + obs: "ArrayLike", + fct: "Array", + ow: "Array", + fw: "Array", + ens_w: "Array", + backend: "Backend" = None, +) -> "Array": + """Compute an outcome-weighted kernel score for a finite univariate ensemble.""" + B = backends.active if backend is None else backends[backend] + wbar = B.sum(ens_w * fw, axis=-1) + e_1 = ( + B.sum(ens_w * gauss_kern_uv(obs[..., None], fct, backend=backend) * fw, axis=-1) + * ow + / wbar + ) + e_2 = B.sum( + ens_w[..., None] + * ens_w[..., None, :] + * gauss_kern_uv(fct[..., None], fct[..., None, :], backend=backend) + * fw[..., None] + * fw[..., None, :], + axis=(-1, -2), + ) + e_2 *= ow / (wbar**2) + e_3 = gauss_kern_uv(obs, obs, backend=backend) * ow + + out = e_1 - 0.5 * e_2 - 0.5 * e_3 + return -out + + +def ow_ensemble_mv_w( + obs: "ArrayLike", + fct: "Array", + ow: "Array", + fw: "Array", + ens_w: "Array", + backend: "Backend" = None, +) -> "Array": + """Compute an outcome-weighted kernel score for a finite multivariate ensemble. + + The ensemble and variables axes are on the second last and last dimensions respectively. + """ + B = backends.active if backend is None else backends[backend] + wbar = B.sum(fw * ens_w, -1) + + err_kern = gauss_kern_mv(B.expand_dims(obs, -2), fct, backend=backend) + E_1 = B.sum(err_kern * fw * B.expand_dims(ow, -1) * ens_w, axis=-1) / wbar + + spread_kern = gauss_kern_mv( + B.expand_dims(fct, -3), B.expand_dims(fct, -2), backend=backend + ) + fw_prod = B.expand_dims(fw, -1) * B.expand_dims(fw, -2) + spread_kern *= fw_prod * B.expand_dims(ow, (-2, -1)) + E_2 = B.sum( + spread_kern * B.expand_dims(ens_w, -1) * B.expand_dims(ens_w, -2), (-2, -1) + ) / (wbar**2) + + E_3 = gauss_kern_mv(obs, obs, backend=backend) * ow + + out = E_1 - 0.5 * E_2 - 0.5 * E_3 + out = -out + return out + + +def vr_ensemble_uv_w( + obs: "ArrayLike", + fct: "Array", + ow: "Array", + fw: "Array", + ens_w: "Array", + backend: "Backend" = None, +) -> "Array": + """Compute a vertically re-scaled kernel score for a finite univariate ensemble.""" + B = backends.active if backend is None else backends[backend] + + e_1 = ( + B.sum(ens_w * gauss_kern_uv(obs[..., None], fct, backend=backend) * fw, axis=-1) + * ow + ) + e_2 = B.sum( + ens_w[..., None] + * ens_w[..., None, :] + * gauss_kern_uv(fct[..., None], fct[..., None, :], backend=backend) + * fw[..., None] + * fw[..., None, :], + axis=(-1, -2), + ) + e_3 = gauss_kern_uv(obs, obs, backend=backend) * ow * ow + + out = e_1 - 0.5 * e_2 - 0.5 * e_3 + out = -out + return out + + +def vr_ensemble_mv_w( + obs: "ArrayLike", + fct: "Array", + ow: "Array", + fw: "Array", + ens_w: "Array", + backend: "Backend" = None, +) -> "Array": + """Compute a vertically re-scaled kernel score for a finite multivariate ensemble. + + The ensemble and variables axes are on the second last and last dimensions respectively. + """ + B = backends.active if backend is None else backends[backend] + + err_kern = gauss_kern_mv(B.expand_dims(obs, -2), fct, backend=backend) + E_1 = B.sum(err_kern * fw * B.expand_dims(ow, -1) * ens_w, axis=-1) + + spread_kern = gauss_kern_mv( + B.expand_dims(fct, -3), B.expand_dims(fct, -2), backend=backend + ) + fw_prod = B.expand_dims(fw, -1) * B.expand_dims(fw, -2) + spread_kern *= fw_prod + E_2 = B.sum( + spread_kern * B.expand_dims(ens_w, -1) * B.expand_dims(ens_w, -2), (-2, -1) + ) + + E_3 = gauss_kern_mv(obs, obs, backend=backend) * ow * ow + + out = E_1 - 0.5 * E_2 - 0.5 * E_3 + out = -out + return out diff --git a/scoringrules/core/kernels/_gufuncs_w.py b/scoringrules/core/kernels/_gufuncs_w.py new file mode 100644 index 0000000..25d6bc7 --- /dev/null +++ b/scoringrules/core/kernels/_gufuncs_w.py @@ -0,0 +1,321 @@ +import math + +import numpy as np +from numba import njit, guvectorize + +from scoringrules.core.utils import lazy_gufunc_wrapper_uv, lazy_gufunc_wrapper_mv + + +@njit(["float32(float32, float32)", "float64(float64, float64)"]) +def _gauss_kern_uv(x1: float, x2: float) -> float: + """Gaussian kernel evaluated at x1 and x2.""" + out: float = math.exp(-0.5 * (x1 - x2) ** 2) + return out + + +@njit(["float32(float32[:], float32[:])", "float64(float64[:], float64[:])"]) +def _gauss_kern_mv(x1: float, x2: float) -> float: + """Gaussian kernel evaluated at x1 and x2.""" + out: float = math.exp(-0.5 * np.linalg.norm(x1 - x2) ** 2) + return out + + +@guvectorize("(),(n),(n)->()") +def _ks_ensemble_uv_w_nrg_gufunc( + obs: np.ndarray, fct: np.ndarray, w: np.ndarray, out: np.ndarray +): + """Standard version of the kernel score.""" + M = fct.shape[-1] + + if np.isnan(obs): + out[0] = np.nan + return + + e_1 = 0 + e_2 = 0 + + for i in range(M): + e_1 += _gauss_kern_uv(fct[i], obs) * w[i] + for j in range(M): + e_2 += _gauss_kern_uv(fct[i], fct[j]) * w[i] * w[j] + e_3 = _gauss_kern_uv(obs, obs) + + out[0] = -(e_1 - 0.5 * e_2 - 0.5 * e_3) + + +@guvectorize("(),(n),(n)->()") +def _ks_ensemble_uv_w_fair_gufunc( + obs: np.ndarray, fct: np.ndarray, w: np.ndarray, out: np.ndarray +): + """Fair version of the kernel score.""" + M = fct.shape[-1] + + if np.isnan(obs): + out[0] = np.nan + return + + e_1 = 0 + e_2 = 0 + + for i in range(M): + e_1 += _gauss_kern_uv(fct[i], obs) * w[i] + for j in range(i + 1, M): + e_2 += _gauss_kern_uv(fct[i], fct[j]) * w[i] * w[j] + e_3 = _gauss_kern_uv(obs, obs) + + out[0] = -(e_1 - e_2 / (1 - np.sum(w * w)) - 0.5 * e_3) + + +@guvectorize("(),(n),(n)->()") +def _ks_ensemble_uv_w_akr_gufunc( + obs: np.ndarray, fct: np.ndarray, w: np.ndarray, out: np.ndarray +): + """Approximate kernel representation estimator of the kernel score.""" + M = fct.shape[-1] + + if np.isnan(obs): + out[0] = np.nan + return + + e_1 = 0 + e_2 = 0 + for i in range(M): + e_1 += _gauss_kern_uv(fct[i], obs) * w[i] + e_2 += _gauss_kern_uv(fct[i], fct[i - 1]) * w[i] + e_3 = _gauss_kern_uv(obs, obs) + + out[0] = -(e_1 - 0.5 * e_2 - 0.5 * e_3) + + +@guvectorize("(),(n),(n)->()") +def _ks_ensemble_uv_w_akr_circperm_gufunc( + obs: np.ndarray, fct: np.ndarray, w: np.ndarray, out: np.ndarray +): + """AKR estimator of the kernel score with cyclic permutation.""" + M = fct.shape[-1] + + if np.isnan(obs): + out[0] = np.nan + return + + e_1 = 0 + e_2 = 0 + for i in range(M): + sigma_i = int((i + 1 + ((M - 1) / 2)) % M) + e_1 += _gauss_kern_uv(fct[i], obs) * w[i] + e_2 += _gauss_kern_uv(fct[i], fct[sigma_i]) * w[i] + e_3 = _gauss_kern_uv(obs, obs) + + out[0] = -(e_1 - 0.5 * e_2 - 0.5 * e_3) + + +@guvectorize("(),(n),(),(n),(n)->()") +def _owks_ensemble_uv_w_gufunc( + obs: np.ndarray, + fct: np.ndarray, + ow: np.ndarray, + fw: np.ndarray, + w: np.ndarray, + out: np.ndarray, +): + """Outcome-weighted kernel score for univariate ensembles.""" + + if np.isnan(obs): + out[0] = np.nan + return + + e_1 = 0.0 + e_2 = 0.0 + + for i, x_i in enumerate(fct): + e_1 += _gauss_kern_uv(x_i, obs) * fw[i] * ow * w[i] + for j, x_j in enumerate(fct): + e_2 += _gauss_kern_uv(x_i, x_j) * fw[i] * fw[j] * ow * w[i] * w[j] + e_3 = _gauss_kern_uv(obs, obs) * ow + + wbar = np.sum(fw * w) + + out[0] = -(e_1 / wbar - 0.5 * e_2 / (wbar**2) - 0.5 * e_3) + + +@guvectorize("(),(n),(),(n),(n)->()") +def _vrks_ensemble_uv_w_gufunc( + obs: np.ndarray, + fct: np.ndarray, + ow: np.ndarray, + fw: np.ndarray, + w: np.ndarray, + out: np.ndarray, +): + """Vertically re-scaled kernel score for univariate ensembles.""" + + if np.isnan(obs): + out[0] = np.nan + return + + e_1 = 0.0 + e_2 = 0.0 + + for i, x_i in enumerate(fct): + e_1 += _gauss_kern_uv(x_i, obs) * fw[i] * ow * w[i] + for j, x_j in enumerate(fct): + e_2 += _gauss_kern_uv(x_i, x_j) * fw[i] * fw[j] * w[i] * w[j] + e_3 = _gauss_kern_uv(obs, obs) * ow * ow + + out[0] = -(e_1 - 0.5 * e_2 - 0.5 * e_3) + + +@guvectorize("(d),(m,d),(m)->()") +def _ks_ensemble_mv_w_nrg_gufunc( + obs: np.ndarray, fct: np.ndarray, w: np.ndarray, out: np.ndarray +): + """Standard version of the multivariate kernel score.""" + M = fct.shape[0] + + e_1 = 0.0 + e_2 = 0.0 + for i in range(M): + e_1 += float(_gauss_kern_mv(fct[i], obs)) * w[i] + for j in range(M): + e_2 += float(_gauss_kern_mv(fct[i], fct[j])) * w[i] * w[j] + e_3 = float(_gauss_kern_mv(obs, obs)) + + out[0] = -(e_1 - 0.5 * e_2 - 0.5 * e_3) + + +@guvectorize("(d),(m,d),(m)->()") +def _ks_ensemble_mv_w_fair_gufunc( + obs: np.ndarray, fct: np.ndarray, w: np.ndarray, out: np.ndarray +): + """Fair version of the multivariate kernel score.""" + M = fct.shape[0] + + e_1 = 0.0 + e_2 = 0.0 + for i in range(M): + e_1 += float(_gauss_kern_mv(fct[i], obs) * w[i]) + for j in range(i + 1, M): + e_2 += float(_gauss_kern_mv(fct[i], fct[j]) * w[i] * w[j]) + e_3 = float(_gauss_kern_mv(obs, obs)) + + out[0] = -(e_1 - e_2 / (1 - np.sum(w * w)) - 0.5 * e_3) + + +@guvectorize("(d),(m,d),(m)->()") +def _ks_ensemble_mv_w_akr_gufunc( + obs: np.ndarray, fct: np.ndarray, w: np.ndarray, out: np.ndarray +): + """Approximate kernel representation estimator of the multivariate kernel score.""" + M = fct.shape[0] + + e_1 = 0 + e_2 = 0 + for i in range(M): + e_1 += float(_gauss_kern_mv(fct[i], obs) * w[i]) + e_2 += float(_gauss_kern_mv(fct[i], fct[i - 1]) * w[i]) + e_3 = float(_gauss_kern_mv(obs, obs)) + + out[0] = -(e_1 - 0.5 * e_2 - 0.5 * e_3) + + +@guvectorize("(d),(m,d),(m)->()") +def _ks_ensemble_mv_w_akr_circperm_gufunc( + obs: np.ndarray, fct: np.ndarray, w: np.ndarray, out: np.ndarray +): + """AKR estimator of the multivariate kernel score with cyclic permutation.""" + M = fct.shape[0] + + e_1 = 0 + e_2 = 0 + for i in range(M): + sigma_i = int((i + 1 + ((M - 1) / 2)) % M) + e_1 += float(_gauss_kern_mv(fct[i], obs) * w[i]) + e_2 += float(_gauss_kern_mv(fct[i], fct[sigma_i]) * w[i]) + e_3 = float(_gauss_kern_mv(obs, obs)) + + out[0] = -(e_1 - 0.5 * e_2 - 0.5 * e_3) + + +@guvectorize("(d),(m,d),(),(m),(m)->()") +def _owks_ensemble_mv_w_gufunc( + obs: np.ndarray, + fct: np.ndarray, + ow: np.ndarray, + fw: np.ndarray, + w: np.ndarray, + out: np.ndarray, +): + """Outcome-weighted kernel score for multivariate ensembles.""" + M = fct.shape[0] + ow = ow[0] + + e_1 = 0.0 + e_2 = 0.0 + for i in range(M): + e_1 += float(_gauss_kern_mv(fct[i], obs) * fw[i] * ow * w[i]) + for j in range(M): + e_2 += float( + _gauss_kern_mv(fct[i], fct[j]) * fw[i] * fw[j] * ow * w[i] * w[j] + ) + e_3 = float(_gauss_kern_mv(obs, obs)) * ow + + wbar = np.sum(fw * w) + + out[0] = -(e_1 / wbar - 0.5 * e_2 / (wbar**2) - 0.5 * e_3) + + +def _vrks_ensemble_mv_w_gufunc( + obs: np.ndarray, + fct: np.ndarray, + ow: np.ndarray, + fw: np.ndarray, + w: np.ndarray, + out: np.ndarray, +): + """Vertically re-scaled kernel score for multivariate ensembles.""" + M = fct.shape[0] + ow = ow[0] + + e_1 = 0.0 + e_2 = 0.0 + for i in range(M): + e_1 += float(_gauss_kern_mv(fct[i], obs) * fw[i] * ow * w[i]) + for j in range(M): + e_2 += float(_gauss_kern_mv(fct[i], fct[j]) * fw[i] * fw[j] * w[i] * w[j]) + e_3 = float(_gauss_kern_mv(obs, obs)) * ow * ow + + out[0] = -(e_1 - 0.5 * e_2 - 0.5 * e_3) + + +estimator_gufuncs_uv_w = { + "fair": lazy_gufunc_wrapper_uv(_ks_ensemble_uv_w_fair_gufunc), + "nrg": lazy_gufunc_wrapper_uv(_ks_ensemble_uv_w_nrg_gufunc), + "akr": lazy_gufunc_wrapper_uv(_ks_ensemble_uv_w_akr_gufunc), + "akr_circperm": lazy_gufunc_wrapper_uv(_ks_ensemble_uv_w_akr_circperm_gufunc), + "ow": lazy_gufunc_wrapper_uv(_owks_ensemble_uv_w_gufunc), + "vr": lazy_gufunc_wrapper_uv(_vrks_ensemble_uv_w_gufunc), +} + +estimator_gufuncs_mv_w = { + "fair": lazy_gufunc_wrapper_mv(_ks_ensemble_mv_w_fair_gufunc), + "nrg": lazy_gufunc_wrapper_mv(_ks_ensemble_mv_w_nrg_gufunc), + "akr": lazy_gufunc_wrapper_mv(_ks_ensemble_mv_w_akr_gufunc), + "akr_circperm": lazy_gufunc_wrapper_mv(_ks_ensemble_mv_w_akr_circperm_gufunc), + "ow": lazy_gufunc_wrapper_mv(_owks_ensemble_mv_w_gufunc), + "vr": lazy_gufunc_wrapper_mv(_vrks_ensemble_mv_w_gufunc), +} + +__all__ = [ + "_ks_ensemble_uv_w_fair_gufunc", + "_ks_ensemble_uv_w_nrg_gufunc", + "_ks_ensemble_uv_w_akr_gufunc", + "_ks_ensemble_uv_w_akr_circperm_gufunc", + "_owks_ensemble_uv_w_gufunc", + "_vrks_ensemble_uv_w_gufunc", + "_ks_ensemble_mv_w_fair_gufunc", + "_ks_ensemble_mv_w_nrg_gufunc", + "_ks_ensemble_mv_w_akr_gufunc", + "_ks_ensemble_mv_w_akr_circperm_gufunc", + "_owks_ensemble_mv_w_gufunc", + "_vrks_ensemble_mv_w_gufunc", +] diff --git a/scoringrules/core/utils.py b/scoringrules/core/utils.py index 47450f9..690696f 100644 --- a/scoringrules/core/utils.py +++ b/scoringrules/core/utils.py @@ -84,6 +84,43 @@ def multivariate_array_check(obs, fct, m_axis, v_axis, backend=None): return _multivariate_shape_permute(obs, fct, m_axis, v_axis, backend=backend) +def univariate_weight_check(ens_w, fct, m_axis, backend=None): + """Check that ensemble weights are non-negative and compatible with the (permuted) forecast array.""" + B = backends.active if backend is None else backends[backend] + if ens_w is not None: + ens_w = B.asarray(ens_w) + m_axis = m_axis if m_axis >= 0 else fct.ndim + m_axis + ens_w = B.moveaxis(ens_w, m_axis, _M_AXIS_UV) + if ens_w.shape != fct.shape: + raise ValueError( + f"Shape of weights {ens_w.shape} is not compatible with forecast shape {fct.shape}" + ) + if B.any(ens_w < 0): + raise ValueError("`ens_w` contains negative entries") + # renormalise weights so that they sum to one across the ensemble members + ens_w = ens_w / B.sum(ens_w, axis=-1, keepdims=True) + return ens_w + + +def multivariate_weight_check(ens_w, fct, m_axis, backend=None): + """Check that ensemble weights are non-negative and compatible with the (permuted) forecast array.""" + B = backends.active if backend is None else backends[backend] + if ens_w is not None: + ens_w = B.asarray(ens_w) + m_axis = m_axis if m_axis >= 0 else fct.ndim + m_axis + v_axis_pos = _V_AXIS if _V_AXIS > 0 else _V_AXIS + fct.ndim + ens_w = B.moveaxis(ens_w, m_axis, -1) + if ens_w.shape != fct.shape[:v_axis_pos] + fct.shape[(v_axis_pos + 1) :]: + raise ValueError( + f"Shape of weights {ens_w.shape} is not compatible with forecast shape {fct.shape}" + ) + if B.any(ens_w < 0): + raise ValueError("`ens_w` contains negative entries") + # renormalise weights so that they sum to one across the ensemble members + ens_w = ens_w / B.sum(ens_w, axis=-1, keepdims=True) + return ens_w + + def estimator_check(estimator, gufuncs): """Check that the estimator is valid for the given score.""" if estimator not in gufuncs: @@ -145,14 +182,20 @@ def mv_weighted_score_chain(obs, fct, v_func=None): return obs, fct -def univariate_sort_ens(fct, estimator=None, sorted_ensemble=False, backend=None): - """Sort ensemble members if not already sorted.""" +def univariate_sort_ens( + fct, ens_w=None, m_axis=-1, estimator=None, sorted_ensemble=False, backend=None +): + """Sort ensemble members and weights if not already sorted.""" B = backends.active if backend is None else backends[backend] sort_ensemble = not sorted_ensemble and estimator in ["qd", "pwm", "int"] if sort_ensemble: ind = B.argsort(fct, axis=-1) fct = B.gather(fct, ind, axis=-1) - return fct + if ens_w is not None: + ens_w = univariate_weight_check(ens_w, fct, m_axis, backend=backend) + if sort_ensemble: + ens_w = B.gather(ens_w, ind, axis=-1) + return fct, ens_w def lazy_gufunc_wrapper_uv(func): diff --git a/scoringrules/core/variogram/__init__.py b/scoringrules/core/variogram/__init__.py index f49f7fe..7f18d80 100644 --- a/scoringrules/core/variogram/__init__.py +++ b/scoringrules/core/variogram/__init__.py @@ -1,15 +1,25 @@ try: from ._gufuncs import estimator_gufuncs + from ._gufuncs_w import estimator_gufuncs_w except ImportError: estimator_gufuncs = None + estimator_gufuncs_w = None from ._score import owvs_ensemble as owvs from ._score import vs_ensemble as vs from ._score import vrvs_ensemble as vrvs +from ._score_w import owvs_ensemble_w as owvs_w +from ._score_w import vs_ensemble_w as vs_w +from ._score_w import vrvs_ensemble_w as vrvs_w + __all__ = [ "vs", + "vs_w", "owvs", + "owvs_w", "vrvs", + "vrvs_w", "estimator_gufuncs", + "estimator_gufuncs_w", ] diff --git a/scoringrules/core/variogram/_gufuncs_w.py b/scoringrules/core/variogram/_gufuncs_w.py new file mode 100644 index 0000000..8dd61eb --- /dev/null +++ b/scoringrules/core/variogram/_gufuncs_w.py @@ -0,0 +1,125 @@ +import numpy as np +from numba import guvectorize + +from scoringrules.core.utils import lazy_gufunc_wrapper_mv + + +@guvectorize("(d),(m,d),(d,d),(m),()->()") +def _variogram_score_nrg_gufunc_w(obs, fct, w, ens_w, p, out): + M = fct.shape[-2] + D = fct.shape[-1] + out[0] = 0.0 + for i in range(D): + for j in range(D): + vfct = 0.0 + for m in range(M): + vfct += ens_w[m] * abs(fct[m, i] - fct[m, j]) ** p + vobs = abs(obs[i] - obs[j]) ** p + out[0] += w[i, j] * (vobs - vfct) ** 2 + + +@guvectorize("(d),(m,d),(d,d),(m),()->()") +def _variogram_score_fair_gufunc_w(obs, fct, w, ens_w, p, out): + M = fct.shape[-2] + D = fct.shape[-1] + + e_1 = 0.0 + e_2 = 0.0 + for k in range(M): + for i in range(D): + for j in range(D): + rho1 = abs(fct[k, i] - fct[k, j]) ** p + rho2 = abs(obs[i] - obs[j]) ** p + e_1 += w[i, j] * (rho1 - rho2) ** 2 * ens_w[k] + for m in range(k + 1, M): + for i in range(D): + for j in range(D): + rho1 = abs(fct[k, i] - fct[k, j]) ** p + rho2 = abs(fct[m, i] - fct[m, j]) ** p + e_2 += w[i, j] * (2 * ((rho1 - rho2) ** 2) * ens_w[k] * ens_w[m]) + + fair_c = 1 - np.sum(ens_w**2) + + out[0] = e_1 - 0.5 * e_2 / fair_c + + +@guvectorize("(d),(m,d),(d,d),(),(m),(m),()->()") +def _owvariogram_score_gufunc_w(obs, fct, w, ow, fw, ens_w, p, out): + M = fct.shape[-2] + D = fct.shape[-1] + + e_1 = 0.0 + e_2 = 0.0 + for k in range(M): + for i in range(D): + for j in range(D): + rho1 = abs(fct[k, i] - fct[k, j]) ** p + rho2 = abs(obs[i] - obs[j]) ** p + e_1 += w[i, j] * (rho1 - rho2) ** 2 * fw[k] * ow * ens_w[k] + for m in range(k + 1, M): + for i in range(D): + for j in range(D): + rho1 = abs(fct[k, i] - fct[k, j]) ** p + rho2 = abs(fct[m, i] - fct[m, j]) ** p + e_2 += w[i, j] * ( + 2 + * ((rho1 - rho2) ** 2) + * fw[k] + * fw[m] + * ow + * ens_w[k] + * ens_w[m] + ) + + wbar = np.sum(fw * ens_w) + + out[0] = e_1 / wbar - 0.5 * e_2 / (wbar**2) + + +@guvectorize("(d),(m,d),(d,d),(),(m),(m),()->()") +def _vrvariogram_score_gufunc_w(obs, fct, w, ow, fw, ens_w, p, out): + M = fct.shape[-2] + D = fct.shape[-1] + + e_1 = 0.0 + e_2 = 0.0 + e_3_x = 0.0 + for k in range(M): + for i in range(D): + for j in range(D): + rho1 = abs(fct[k, i] - fct[k, j]) ** p + rho2 = abs(obs[i] - obs[j]) ** p + e_1 += w[i, j] * (rho1 - rho2) ** 2 * fw[k] * ow * ens_w[k] + e_3_x += w[i, j] * (rho1) ** 2 * fw[k] * ens_w[k] + for m in range(k + 1, M): + for i in range(D): + for j in range(D): + rho1 = abs(fct[k, i] - fct[k, j]) ** p + rho2 = abs(fct[m, i] - fct[m, j]) ** p + e_2 += w[i, j] * ( + 2 * ((rho1 - rho2) ** 2) * fw[k] * fw[m] * ens_w[k] * ens_w[m] + ) + + wbar = np.sum(fw * ens_w) + e_3_y = 0.0 + for i in range(D): + for j in range(D): + rho1 = abs(obs[i] - obs[j]) ** p + e_3_y += w[i, j] * (rho1) ** 2 * ow + + out[0] = e_1 - 0.5 * e_2 + (e_3_x - e_3_y) * (wbar - ow) + + +estimator_gufuncs_w = { + "fair": lazy_gufunc_wrapper_mv(_variogram_score_fair_gufunc_w), + "nrg": lazy_gufunc_wrapper_mv(_variogram_score_nrg_gufunc_w), + "ownrg": lazy_gufunc_wrapper_mv(_owvariogram_score_gufunc_w), + "vrnrg": lazy_gufunc_wrapper_mv(_vrvariogram_score_gufunc_w), +} + +__all__ = [ + "_variogram_score_fair_gufunc_w", + "_variogram_score_nrg_gufunc_w", + "_owvariogram_score_gufunc_w", + "_vrvariogram_score_gufunc_w", +] diff --git a/scoringrules/core/variogram/_score_w.py b/scoringrules/core/variogram/_score_w.py new file mode 100644 index 0000000..5fa066d --- /dev/null +++ b/scoringrules/core/variogram/_score_w.py @@ -0,0 +1,133 @@ +import typing as tp + +from scoringrules.backend import backends + +if tp.TYPE_CHECKING: + from scoringrules.core.typing import Array, Backend + + +def vs_ensemble_w( + obs: "Array", # (... D) + fct: "Array", # (... M D) + w: "Array", # (..., D D) + ens_w: "Array", # (... M) + p: float = 1, + estimator: str = "nrg", + backend: "Backend" = None, +) -> "Array": + """Compute the Variogram Score for a multivariate finite ensemble.""" + B = backends.active if backend is None else backends[backend] + + fct_diff = ( + B.abs(B.expand_dims(fct, -2) - B.expand_dims(fct, -1)) ** p + ) # (... M D D) + obs_diff = B.abs(B.expand_dims(obs, -2) - B.expand_dims(obs, -1)) ** p # (... D D) + + if estimator == "nrg": + fct_diff = B.sum( + fct_diff * B.expand_dims(ens_w, (-1, -2)), axis=-3 + ) # (... D D) + out = B.sum(w * (obs_diff - fct_diff) ** 2, axis=(-2, -1)) # (...) + + elif estimator == "fair": + E_1 = (fct_diff - B.expand_dims(obs_diff, axis=-3)) ** 2 # (... M D D) + E_1 = B.sum(B.expand_dims(w, axis=-3) * E_1, axis=(-2, -1)) # (... M) + E_1 = B.sum(E_1 * ens_w, axis=-1) # (...) + + E_2 = ( + B.expand_dims(fct_diff, -3) - B.expand_dims(fct_diff, -4) + ) ** 2 # (... M M D D) + E_2 = B.sum(B.expand_dims(w, (-3, -4)) * E_2, axis=(-2, -1)) # (... M M) + E_2 = B.sum( + E_2 * B.expand_dims(ens_w, -2) * B.expand_dims(ens_w, -1), axis=(-2, -1) + ) # (...) + + fair_c = 1 - B.sum(ens_w * ens_w, axis=-1) + + out = E_1 - 0.5 * E_2 / fair_c + + else: + raise ValueError( + f"For the variogram score, {estimator} must be one of 'nrg', and 'fair'." + ) + + return out + + +def owvs_ensemble_w( + obs: "Array", + fct: "Array", + w: "Array", + ow: "Array", + fw: "Array", + ens_w: "Array", + p: float = 1, + backend: "Backend" = None, +) -> "Array": + """Compute the Outcome-Weighted Variogram Score for a multivariate finite ensemble.""" + B = backends.active if backend is None else backends[backend] + wbar = B.sum(fw * ens_w, axis=-1) + + fct_diff = B.expand_dims(fct, -2) - B.expand_dims(fct, -1) # (... M D D) + fct_diff = B.abs(fct_diff) ** p # (... M D D) + + obs_diff = B.expand_dims(obs, -2) - B.expand_dims(obs, -1) # (... D D) + obs_diff = B.abs(obs_diff) ** p # (... D D) + del obs, fct + + E_1 = (fct_diff - B.expand_dims(obs_diff, -3)) ** 2 # (... M D D) + E_1 = B.sum(B.expand_dims(w, -3) * E_1, axis=(-2, -1)) # (... M) + E_1 = B.sum(E_1 * fw * B.expand_dims(ow, -1) * ens_w, axis=-1) / wbar # (...) + + fct_diff_spread = B.expand_dims(fct_diff, -3) - B.expand_dims( + fct_diff, -4 + ) # (... M M D D) + fw_prod = B.expand_dims(fw, -2) * B.expand_dims(fw, -1) # (... M M) + ew_prod = B.expand_dims(ens_w, -2) * B.expand_dims(ens_w, -1) # (... M M) + E_2 = B.sum( + B.expand_dims(w, (-3, -4)) * fct_diff_spread**2, axis=(-2, -1) + ) # (... M M) + E_2 *= fw_prod * B.expand_dims(ow, (-2, -1)) * ew_prod # (... M M) + E_2 = B.sum(E_2, axis=(-2, -1)) / (wbar**2) # (...) + + return E_1 - 0.5 * E_2 + + +def vrvs_ensemble_w( + obs: "Array", + fct: "Array", + w: "Array", + ow: "Array", + fw: "Array", + ens_w: "Array", + p: float = 1, + backend: "Backend" = None, +) -> "Array": + """Compute the Vertically Re-scaled Variogram Score for a multivariate finite ensemble.""" + B = backends.active if backend is None else backends[backend] + wbar = B.sum(fw * ens_w, axis=-1) + + fct_diff = ( + B.abs(B.expand_dims(fct, -2) - B.expand_dims(fct, -1)) ** p + ) # (... M D D) + obs_diff = B.abs(B.expand_dims(obs, -2) - B.expand_dims(obs, -1)) ** p # (... D D) + + E_1 = (fct_diff - B.expand_dims(obs_diff, axis=-3)) ** 2 # (... M D D) + E_1 = B.sum(B.expand_dims(w, -3) * E_1, axis=(-2, -1)) # (... M) + E_1 = B.sum(E_1 * fw * B.expand_dims(ow, axis=-1) * ens_w, axis=-1) # (...) + + E_2 = ( + B.expand_dims(fct_diff, -3) - B.expand_dims(fct_diff, -4) + ) ** 2 # (... M M D D) + E_2 = B.sum(B.expand_dims(w, (-3, -4)) * E_2, axis=(-2, -1)) # (... M M) + + fw_prod = B.expand_dims(fw, axis=-2) * B.expand_dims(fw, axis=-1) # (... M M) + ew_prod = B.expand_dims(ens_w, -2) * B.expand_dims(ens_w, -1) # (... M M) + E_2 = B.sum(E_2 * fw_prod * ew_prod, axis=(-2, -1)) # (...) + + E_3 = B.sum(B.expand_dims(w, -3) * fct_diff**2, axis=(-2, -1)) # (... M) + E_3 = B.sum(E_3 * fw * ens_w, axis=-1) # (...) + E_3 -= B.sum(w * obs_diff**2, axis=(-2, -1)) * ow # (...) + E_3 *= wbar - ow # (...) + + return E_1 - 0.5 * E_2 + E_3 diff --git a/tests/test_crps.py b/tests/test_crps.py index 90c9e0c..439a3cc 100644 --- a/tests/test_crps.py +++ b/tests/test_crps.py @@ -39,6 +39,14 @@ def test_crps_ensemble(estimator, backend): res = np.asarray(res) assert not np.any(res < 0.0) + # test equivalence with and without weights + w = np.ones(fct.shape) + res_nrg_w = sr.crps_ensemble( + obs, fct, ens_w=w, estimator=estimator, backend=backend + ) + res_nrg_now = sr.crps_ensemble(obs, fct, estimator=estimator, backend=backend) + assert np.allclose(res_nrg_w, res_nrg_now, atol=1e-6) + # approx zero when perfect forecast perfect_fct = obs[..., None] + np.random.randn(N, ENSEMBLE_SIZE) * 0.00001 res = sr.crps_ensemble(obs, perfect_fct, estimator=estimator, backend=backend) @@ -46,22 +54,55 @@ def test_crps_ensemble(estimator, backend): assert not np.any(res - 0.0 > 0.0001) -def test_crps_estimators(backend): +def test_crps_ensemble_corr(backend): obs = np.random.randn(N) mu = obs + np.random.randn(N) * 0.3 sigma = abs(np.random.randn(N)) * 0.5 fct = np.random.randn(N, ENSEMBLE_SIZE) * sigma[..., None] + mu[..., None] - # equality of estimators + # test equivalence of different estimators res_nrg = sr.crps_ensemble(obs, fct, estimator="nrg", backend=backend) + res_qd = sr.crps_ensemble(obs, fct, estimator="qd", backend=backend) res_fair = sr.crps_ensemble(obs, fct, estimator="fair", backend=backend) res_pwm = sr.crps_ensemble(obs, fct, estimator="pwm", backend=backend) - res_qd = sr.crps_ensemble(obs, fct, estimator="qd", backend=backend) - res_int = sr.crps_ensemble(obs, fct, estimator="int", backend=backend) + if backend in ["torch", "jax"]: + assert np.allclose(res_nrg, res_qd, rtol=1e-03) + assert np.allclose(res_fair, res_pwm, rtol=1e-03) + else: + assert np.allclose(res_nrg, res_qd) + assert np.allclose(res_fair, res_pwm) + + # test equivalence of different estimators with weights + w = np.abs(np.random.randn(N, ENSEMBLE_SIZE) * sigma[..., None]) + res_nrg = sr.crps_ensemble(obs, fct, ens_w=w, estimator="nrg", backend=backend) + res_qd = sr.crps_ensemble(obs, fct, ens_w=w, estimator="qd", backend=backend) + if backend in ["torch", "jax"]: + assert np.allclose(res_nrg, res_qd, rtol=1e-03) + else: + assert np.allclose(res_nrg, res_qd) + + # test correctness + obs = -0.6042506 + fct = np.array( + [ + 1.7812118, + 0.5863797, + 0.7038174, + -0.7743998, + -0.2751647, + 1.1863249, + 1.2990966, + -0.3242982, + -0.5968781, + 0.9064937, + ] + ) + res = sr.crps_ensemble(obs, fct, estimator="qd") + assert np.isclose(res, 0.6126602) - assert np.allclose(res_nrg, res_qd) - assert np.allclose(res_nrg, res_int) - assert np.allclose(res_fair, res_pwm) + w = np.arange(10) + res = sr.crps_ensemble(obs, fct, ens_w=w, estimator="qd") + assert np.isclose(res, 0.4923673) def test_crps_quantile(backend): diff --git a/tests/test_energy.py b/tests/test_energy.py index 8e9f4cb..6d7ec1e 100644 --- a/tests/test_energy.py +++ b/tests/test_energy.py @@ -34,6 +34,12 @@ def test_energy_score(estimator, backend): res = sr.es_ensemble(obs, fct, estimator=estimator, backend=backend) assert res.shape == (N,) + # test equivalence with and without weights + w = np.ones(fct.shape[:-1]) + res_nrg_w = sr.es_ensemble(obs, fct, ens_w=w, estimator=estimator, backend=backend) + res_nrg_now = sr.es_ensemble(obs, fct, estimator=estimator, backend=backend) + assert np.allclose(res_nrg_w, res_nrg_now) + # correctness fct = np.array( [ @@ -43,19 +49,40 @@ def test_energy_score(estimator, backend): ).T obs = np.array([0.2743836, 0.8146400]) + res = sr.es_ensemble(obs, fct, estimator=estimator, backend=backend) if estimator == "nrg": - res = sr.es_ensemble(obs, fct, estimator=estimator, backend=backend) expected = 0.3949793 np.testing.assert_allclose(res, expected, atol=1e-6) elif estimator == "fair": - res = sr.es_ensemble(obs, fct, estimator=estimator, backend=backend) expected = 0.3274585 np.testing.assert_allclose(res, expected, atol=1e-6) elif estimator == "akr": - res = sr.es_ensemble(obs, fct, estimator=estimator, backend=backend) expected = 0.3522818 np.testing.assert_allclose(res, expected, atol=1e-6) elif estimator == "akr_circperm": - res = sr.es_ensemble(obs, fct, estimator=estimator, backend=backend) expected = 0.2778118 np.testing.assert_allclose(res, expected, atol=1e-6) + + # correctness with weights + w = np.array([0.1, 0.2, 0.3, 0.4]) + + res = sr.es_ensemble(obs, fct, ens_w=w, estimator=estimator, backend=backend) + if estimator == "nrg": + expected = 0.4074382 + np.testing.assert_allclose(res, expected, atol=1e-6) + elif estimator == "fair": + expected = 0.333500969 + np.testing.assert_allclose(res, expected, atol=1e-6) + elif estimator == "akr": + expected = 0.3345371 + np.testing.assert_allclose(res, expected, atol=1e-6) + elif estimator == "akr_circperm": + expected = 0.2612048 + np.testing.assert_allclose(res, expected, atol=1e-6) + + # check invariance to scaling of the weights + w_scaled = w * 10 + res_scaled = sr.es_ensemble( + obs, fct, ens_w=w_scaled, estimator=estimator, backend=backend + ) + np.testing.assert_allclose(res_scaled, res, atol=1e-6) diff --git a/tests/test_kernels.py b/tests/test_kernels.py index efd5b29..b3421f3 100644 --- a/tests/test_kernels.py +++ b/tests/test_kernels.py @@ -34,6 +34,17 @@ def test_gksuv(estimator, backend): res = np.asarray(res) assert not np.any(res < 0.0) + # m_axis keyword + res = sr.gksuv_ensemble( + obs, + np.random.randn(ENSEMBLE_SIZE, N), + m_axis=0, + estimator=estimator, + backend=backend, + ) + res = np.asarray(res) + assert not np.any(res < 0.0) + # approx zero when perfect forecast perfect_fct = obs[..., None] + np.random.randn(N, ENSEMBLE_SIZE) * 0.00001 res = sr.gksuv_ensemble(obs, perfect_fct, estimator=estimator, backend=backend) @@ -102,6 +113,10 @@ def test_gksmv(estimator, backend): est = "undefined_estimator" sr.gksmv_ensemble(obs, fct, estimator=est, backend=backend) + with pytest.raises(ValueError): + est = "undefined_estimator" + sr.gksmv_ensemble(obs, fct, estimator=est, backend=backend) + if estimator == "nrg": res = sr.gksmv_ensemble(obs, fct, estimator=estimator, backend=backend) expected = 0.5868737 diff --git a/tests/test_variogram.py b/tests/test_variogram.py index d3366b3..594aa2d 100644 --- a/tests/test_variogram.py +++ b/tests/test_variogram.py @@ -41,6 +41,7 @@ def test_variogram_score_permuted_dims(estimator, backend): @pytest.mark.parametrize("estimator", ESTIMATORS) def test_variogram_score_correctness(estimator, backend): + # correctness fct = np.array( [ [0.79546742, 0.4777960, 0.2164079, 0.5409873], @@ -82,3 +83,35 @@ def test_variogram_score_correctness(estimator, backend): res = sr.vs_ensemble(obs, fct, w=w, p=0.5, estimator=estimator, backend=backend) if estimator == "nrg": np.testing.assert_allclose(res, expected, atol=1e-6) + + # with ensemble weights + ens_w = np.array([0.1, 0.8, 0.3, 0.4]) + res = sr.vs_ensemble(obs, fct, ens_w=ens_w, p=0.5, estimator="nrg", backend=backend) + if estimator == "nrg": + expected = 0.07648068 + np.testing.assert_allclose(res, expected, atol=1e-6) + + ens_w = np.array([0.9, 0.2, 0.5, 0.5]) + res = sr.vs_ensemble( + obs, fct, ens_w=ens_w, p=0.75, estimator=estimator, backend=backend + ) + if estimator == "nrg": + expected = 0.01160571 + np.testing.assert_allclose(res, expected, atol=1e-6) + + # check invariance to scaling of the weights + ens_w_scaled = ens_w * 10 + res_scaled = sr.vs_ensemble( + obs, fct, ens_w=ens_w_scaled, p=0.75, estimator=estimator, backend=backend + ) + assert np.allclose(res_scaled, res) + + # with dimension and ensemble weights + w = np.array([[0.1, 0.2], [0.2, 0.4]]) + ens_w = np.array([0.1, 0.8, 0.3, 0.4]) + res = sr.vs_ensemble( + obs, fct, w=w, ens_w=ens_w, p=0.5, estimator=estimator, backend=backend + ) + if estimator == "nrg": + expected = 0.01529614 + np.testing.assert_allclose(res, expected, atol=1e-6) diff --git a/tests/test_wcrps.py b/tests/test_wcrps.py index 731a277..32fda0a 100644 --- a/tests/test_wcrps.py +++ b/tests/test_wcrps.py @@ -169,6 +169,12 @@ def w_func(x): res = np.mean(np.float64(sr.owcrps_ensemble(obs, fct, b=1.85, backend=backend))) np.testing.assert_allclose(res, 0.09933139, rtol=1e-6) + # test equivalence with and without weights + w = np.ones(fct.shape) + res_nrg_w = sr.owcrps_ensemble(obs, fct, ens_w=w, b=1.85, backend=backend) + res_nrg_now = sr.owcrps_ensemble(obs, fct, b=1.85, backend=backend) + assert np.allclose(res_nrg_w, res_nrg_now) + def test_twcrps_score_correctness(backend): fct = np.array( diff --git a/tests/test_wvariogram.py b/tests/test_wvariogram.py index 51fda41..c7163d7 100644 --- a/tests/test_wvariogram.py +++ b/tests/test_wvariogram.py @@ -21,7 +21,7 @@ def test_owvs_vs_vs(backend): lambda x: backends[backend].mean(x) * 0.0 + 1.0, backend=backend, ) - np.testing.assert_allclose(res, resw, rtol=1e-3) + np.testing.assert_allclose(res, resw, atol=1e-5) def test_twvs_vs_vs(backend): @@ -30,7 +30,7 @@ def test_twvs_vs_vs(backend): res = sr.vs_ensemble(obs, fct, backend=backend) resw = sr.twvs_ensemble(obs, fct, lambda x: x, backend=backend) - np.testing.assert_allclose(res, resw, rtol=5e-4) + np.testing.assert_allclose(res, resw, atol=1e-5) def test_vrvs_vs_vs(backend):