diff --git a/scoringrules/_brier.py b/scoringrules/_brier.py
index 210dcad..7d48766 100644
--- a/scoringrules/_brier.py
+++ b/scoringrules/_brier.py
@@ -1,7 +1,7 @@
 import typing as tp
 
-from scoringrules.backend import backends
 from scoringrules.core import brier
+from scoringrules.core.utils import binary_array_check, categorical_array_check
 
 if tp.TYPE_CHECKING:
     from scoringrules.core.typing import Array, ArrayLike, Backend
@@ -14,30 +14,43 @@ def brier_score(
     backend: "Backend" = None,
 ) -> "Array":
     r"""
-    Compute the Brier Score (BS).
+    Brier Score
 
-    The BS is formulated as
+    The Brier Score is defined as
 
     .. math::
-       BS(f, y) = (f - y)^2,
+       \text{BS}(F, y) = (F - y)^2,
 
-    where :math:`f \in [0, 1]` is the predicted probability of an event and :math:`y \in \{0, 1\}` the actual outcome.
+    where :math:`F \in [0, 1]` is the predicted probability of an event and :math:`y \in \{0, 1\}`
+    is the outcome [1]_.
 
     Parameters
     ----------
     obs : array_like
-        Observed outcome, either 0 or 1.
+        Observed outcomes, either 0 or 1.
     fct : array_like
-        Forecasted probabilities between 0 and 1.
+        Forecast probabilities, between 0 and 1.
     backend : str
-        The name of the backend used for computations. Defaults to 'numpy'.
+        The name of the backend used for computations. Default is 'numpy'.
 
     Returns
     -------
-    brier_score : array_like
+    score : array_like
         The computed Brier Score.
 
+    References
+    ----------
+    .. [1] Brier, G. W. (1950).
+        Verification of forecasts expressed in terms of probability.
+        Monthly Weather Review, 78, 1-3.
+
+    Examples
+    --------
+    >>> import scoringrules as sr
+    >>> sr.brier_score(1, 0.2)
+    0.64000
     """
+    obs, fct = binary_array_check(obs, fct, backend=backend)
     return brier.brier_score(obs=obs, fct=fct, backend=backend)
 
 
@@ -46,45 +59,68 @@ def rps_score(
     fct: "ArrayLike",
     k_axis: int = -1,
     *,
+    onehot: bool = False,
     backend: "Backend" = None,
 ) -> "Array":
     r"""
-    Compute the (Discrete) Ranked Probability Score (RPS).
+    (Discrete) Ranked Probability Score (RPS)
 
     Suppose the outcome corresponds to one of :math:`K` ordered categories. The RPS is defined as
 
     .. math::
-       RPS(f, y) = \sum_{k=1}^{K}(\tilde{f}_{k} - \tilde{y}_{k})^2,
+       \text{RPS}(F, y) = \sum_{k=1}^{K}(\tilde{F}_{k} - \tilde{y}_{k})^2,
 
-    where :math:`f \in [0, 1]^{K}` is a vector of length :math:`K` containing forecast probabilities
-    that each of the :math:`K` categories will occur, and :math:`y \in \{0, 1\}^{K}` is a vector of
-    length :math:`K`, with the :math:`k`-th element equal to one if the :math:`k`-th category occurs. We
-    have :math:`\sum_{k=1}^{K} y_{k} = \sum_{k=1}^{K} f_{k} = 1`, and, for :math:`k = 1, \dots, K`,
-    :math:`\tilde{y}_{k} = \sum_{i=1}^{k} y_{i}` and :math:`\tilde{f}_{k} = \sum_{i=1}^{k} f_{i}`.
+    where :math:`F \in [0, 1]^{K}` is a vector of length :math:`K`, containing forecast probabilities
+    that each of the :math:`K` categories will occur, with :math:`\sum_{k=1}^{K} F_{k} = 1` and
+    :math:`\tilde{F}_{k} = \sum_{i=1}^{k} F_{i}` for all :math:`k = 1, \dots, K`, and where
+    :math:`y \in \{1, \dots, K\}` is the category that occurs, with :math:`\tilde{y}_{k} = 1\{y \le i\}`
+    for all :math:`k = 1, \dots, K` [1]_.
+
+    The outcome can alternatively be interpreted as a vector :math:`y \in \{0, 1\}^K` of length :math:`K`, with the
+    :math:`k`-th element equal to one if the :math:`k`-th category occurs, and zero otherwise.
+    Using this one-hot encoding, the RPS is defined analogously to as above, but with
+    :math:`\tilde{y}_{k} = \sum_{i=1}^{k} y_{i}`.
 
     Parameters
     ----------
     obs : array_like
-        Array of 0's and 1's corresponding to unobserved and observed categories
-    forecasts :
+        Category that occurs. Or array of 0's and 1's corresponding to unobserved and
+        observed categories if `onehot=True`.
+    fct : array
         Array of forecast probabilities for each category.
     k_axis: int
-        The axis corresponding to the categories. Default is the last axis.
+        The axis of `obs` and `fct` corresponding to the categories. Default is the last axis.
+    onehot: bool
+        Boolean indicating whether the observation is the category that occurs or a onehot
+        encoded vector of 0's and 1's. Default is False.
     backend : str
-        The name of the backend used for computations. Defaults to 'numpy'.
+        The name of the backend used for computations. Default is 'numpy'.
 
     Returns
     -------
-    score:
+    score: array_like
         The computed Ranked Probability Score.
 
+    References
+    ----------
+    .. [1] Epstein, E. S. (1969).
+        A scoring system for probability forecasts of ranked categories.
+        Journal of Applied Meteorology, 8, 985-987.
+        https://www.jstor.org/stable/26174707.
+
+    Examples
+    --------
+    >>> import scoringrules as sr
+    >>> import numpy as np
+    >>> fct = np.array([0.1, 0.2, 0.3, 0.4])
+    >>> obs = 3
+    >>> sr.rps_score(obs, fct)
+    0.25999999999999995
+    >>> obs = np.array([0, 0, 1, 0])
+    >>> sr.rps_score(obs, fct, onehot=True)
+    0.25999999999999995
     """
-    B = backends.active if backend is None else backends[backend]
-    fct = B.asarray(fct)
-
-    if k_axis != -1:
-        fct = B.moveaxis(fct, k_axis, -1)
-
+    obs, fct = categorical_array_check(obs, fct, k_axis, onehot, backend=backend)
     return brier.rps_score(obs=obs, fct=fct, backend=backend)
 
 
@@ -119,6 +155,7 @@ def log_score(
         The computed Log Score.
 
     """
+    obs, fct = binary_array_check(obs, fct, backend=backend)
     return brier.log_score(obs=obs, fct=fct, backend=backend)
 
 
@@ -127,6 +164,7 @@ def rls_score(
     fct: "ArrayLike",
     k_axis: int = -1,
     *,
+    onehot: bool = False,
     backend: "Backend" = None,
 ) -> "Array":
     r"""
@@ -151,6 +189,9 @@ def rls_score(
         Forecasted probabilities between 0 and 1.
     k_axis: int
         The axis corresponding to the categories. Default is the last axis.
+    onehot: bool
+        Boolean indicating whether the observation is the category that occurs or a onehot
+        encoded vector of 0's and 1's. Default is False.
     backend : str
         The name of the backend used for computations. Defaults to 'numpy'.
 
@@ -160,12 +201,7 @@ def rls_score(
         The computed Ranked Logarithmic Score.
 
     """
-    B = backends.active if backend is None else backends[backend]
-    fct = B.asarray(fct)
-
-    if k_axis != -1:
-        fct = B.moveaxis(fct, k_axis, -1)
-
+    obs, fct = categorical_array_check(obs, fct, k_axis, onehot, backend=backend)
     return brier.rls_score(obs=obs, fct=fct, backend=backend)
 
 
diff --git a/scoringrules/_crps.py b/scoringrules/_crps.py
index e3602d3..0800520 100644
--- a/scoringrules/_crps.py
+++ b/scoringrules/_crps.py
@@ -2,6 +2,13 @@
 
 from scoringrules.backend import backends
 from scoringrules.core import crps, stats
+from scoringrules.core.utils import (
+    univariate_array_check,
+    estimator_check,
+    uv_weighted_score_weights,
+    uv_weighted_score_chain,
+    univariate_sort_ens,
+)
 
 if tp.TYPE_CHECKING:
     from scoringrules.core.typing import Array, ArrayLike, Backend
@@ -95,29 +102,13 @@ def crps_ensemble(
     >>> sr.crps_ensemble(obs, pred)
     array([0.69605316, 0.32865417, 0.39048665])
     """
-    B = backends.active if backend is None else backends[backend]
-    obs, fct = map(B.asarray, (obs, fct))
-
-    if m_axis != -1:
-        fct = B.moveaxis(fct, m_axis, -1)
-
-    if not sorted_ensemble and estimator not in [
-        "nrg",
-        "akr",
-        "akr_circperm",
-        "fair",
-    ]:
-        fct = B.sort(fct, axis=-1)
-
+    obs, fct = univariate_array_check(obs, fct, m_axis, backend=backend)
+    fct = univariate_sort_ens(fct, estimator, sorted_ensemble, backend=backend)
     if backend == "numba":
-        if estimator not in crps.estimator_gufuncs:
-            raise ValueError(
-                f"{estimator} is not a valid estimator. "
-                f"Must be one of {crps.estimator_gufuncs.keys()}"
-            )
+        estimator_check(estimator, crps.estimator_gufuncs)
         return crps.estimator_gufuncs[estimator](obs, fct)
-
-    return crps.ensemble(obs, fct, estimator, backend=backend)
+    else:
+        return crps.ensemble(obs, fct, estimator, backend=backend)
 
 
 def twcrps_ensemble(
@@ -202,14 +193,9 @@ def twcrps_ensemble(
     >>> sr.twcrps_ensemble(obs, fct, v_func=v_func)
     array([0.69605316, 0.32865417, 0.39048665])
     """
-    if v_func is None:
-        B = backends.active if backend is None else backends[backend]
-        a, b, obs, fct = map(B.asarray, (a, b, obs, fct))
-
-        def v_func(x):
-            return B.minimum(B.maximum(x, a), b)
-
-    obs, fct = map(v_func, (obs, fct))
+    obs, fct = uv_weighted_score_chain(
+        obs, fct, a=a, b=b, v_func=v_func, backend=backend
+    )
     return crps_ensemble(
         obs,
         fct,
@@ -300,25 +286,12 @@ def owcrps_ensemble(
     >>> sr.owcrps_ensemble(obs, fct, w_func=w_func)
     array([0.91103733, 0.45212402, 0.35686667])
     """
-
-    B = backends.active if backend is None else backends[backend]
-    obs, fct = map(B.asarray, (obs, fct))
-
-    if m_axis != -1:
-        fct = B.moveaxis(fct, m_axis, -1)
-
-    if w_func is None:
-
-        def w_func(x):
-            return ((a <= x) & (x <= b)) * 1.0
-
-    obs_weights, fct_weights = map(w_func, (obs, fct))
-    obs_weights, fct_weights = map(B.asarray, (obs_weights, fct_weights))
-
+    obs, fct = univariate_array_check(obs, fct, m_axis, backend=backend)
+    obs_w, fct_w = uv_weighted_score_weights(obs, fct, a, b, w_func, backend=backend)
     if backend == "numba":
-        return crps.estimator_gufuncs["ownrg"](obs, fct, obs_weights, fct_weights)
-
-    return crps.ow_ensemble(obs, fct, obs_weights, fct_weights, backend=backend)
+        return crps.estimator_gufuncs["ownrg"](obs, fct, obs_w, fct_w)
+    else:
+        return crps.ow_ensemble(obs, fct, obs_w, fct_w, backend=backend)
 
 
 def vrcrps_ensemble(
@@ -399,24 +372,12 @@ def vrcrps_ensemble(
     >>> sr.vrcrps_ensemble(obs, fct, w_func)
     array([0.90036433, 0.41515255, 0.41653833])
     """
-    B = backends.active if backend is None else backends[backend]
-    obs, fct = map(B.asarray, (obs, fct))
-
-    if m_axis != -1:
-        fct = B.moveaxis(fct, m_axis, -1)
-
-    if w_func is None:
-
-        def w_func(x):
-            return ((a <= x) & (x <= b)) * 1.0
-
-    obs_weights, fct_weights = map(w_func, (obs, fct))
-    obs_weights, fct_weights = map(B.asarray, (obs_weights, fct_weights))
-
+    obs, fct = univariate_array_check(obs, fct, m_axis, backend=backend)
+    obs_w, fct_w = uv_weighted_score_weights(obs, fct, a, b, w_func, backend=backend)
     if backend == "numba":
-        return crps.estimator_gufuncs["vrnrg"](obs, fct, obs_weights, fct_weights)
-
-    return crps.vr_ensemble(obs, fct, obs_weights, fct_weights, backend=backend)
+        return crps.estimator_gufuncs["vrnrg"](obs, fct, obs_w, fct_w)
+    else:
+        return crps.vr_ensemble(obs, fct, obs_w, fct_w, backend=backend)
 
 
 def crps_quantile(
@@ -473,10 +434,11 @@ def crps_quantile(
     # TODO: add example
     """
     B = backends.active if backend is None else backends[backend]
-    obs, fct, alpha = map(B.asarray, (obs, fct, alpha))
+    obs, fct = univariate_array_check(obs, fct, m_axis, backend=backend)
 
-    if m_axis != -1:
-        fct = B.moveaxis(fct, m_axis, -1)
+    alpha = B.asarray(alpha)
+    if B.any(alpha <= 0) or B.any(alpha >= 1):
+        raise ValueError("`alpha` contains entries that are not between 0 and 1.")
 
     if not fct.shape[-1] == alpha.shape[-1]:
         raise ValueError("Expected matching length of `fct` and `alpha` values.")
diff --git a/scoringrules/_dss.py b/scoringrules/_dss.py
index 299c7a4..4857151 100644
--- a/scoringrules/_dss.py
+++ b/scoringrules/_dss.py
@@ -1,8 +1,7 @@
 import typing as tp
 
-from scoringrules.backend import backends
 from scoringrules.core import dss
-from scoringrules.core.utils import multivariate_array_check
+from scoringrules.core.utils import univariate_array_check, multivariate_array_check
 
 if tp.TYPE_CHECKING:
     from scoringrules.core.typing import Array, Backend
@@ -45,16 +44,11 @@ def dssuv_ensemble(
     score: Array
         The computed Dawid-Sebastiani Score.
     """
-    B = backends.active if backend is None else backends[backend]
-    obs, fct = map(B.asarray, (obs, fct))
-
-    if m_axis != -1:
-        fct = B.moveaxis(fct, m_axis, -1)
-
+    obs, fct = univariate_array_check(obs, fct, m_axis, backend=backend)
     if backend == "numba":
         return dss._dss_uv_gufunc(obs, fct, bias)
-
-    return dss.ds_score_uv(obs, fct, bias, backend=backend)
+    else:
+        return dss.ds_score_uv(obs, fct, bias, backend=backend)
 
 
 def dssmv_ensemble(
@@ -99,8 +93,7 @@ def dssmv_ensemble(
         The computed Dawid-Sebastiani Score.
     """
     obs, fct = multivariate_array_check(obs, fct, m_axis, v_axis, backend=backend)
-
     if backend == "numba":
         return dss._dss_mv_gufunc(obs, fct, bias)
-
-    return dss.ds_score_mv(obs, fct, bias, backend=backend)
+    else:
+        return dss.ds_score_mv(obs, fct, bias, backend=backend)
diff --git a/scoringrules/_energy.py b/scoringrules/_energy.py
index 4ff2231..3902ee5 100644
--- a/scoringrules/_energy.py
+++ b/scoringrules/_energy.py
@@ -1,8 +1,12 @@
 import typing as tp
 
-from scoringrules.backend import backends
 from scoringrules.core import energy
-from scoringrules.core.utils import multivariate_array_check
+from scoringrules.core.utils import (
+    multivariate_array_check,
+    estimator_check,
+    mv_weighted_score_chain,
+    mv_weighted_score_weights,
+)
 
 if tp.TYPE_CHECKING:
     from scoringrules.core.typing import Array, ArrayLike, Backend
@@ -61,18 +65,12 @@ def es_ensemble(
     :ref:`theory.multivariate`
         Some theoretical background on scoring rules for multivariate forecasts.
     """
-    backend = backend if backend is not None else backends._active
     obs, fct = multivariate_array_check(obs, fct, m_axis, v_axis, backend=backend)
-
     if backend == "numba":
-        if estimator not in energy.estimator_gufuncs:
-            raise ValueError(
-                f"{estimator} is not a valid estimator. "
-                f"Must be one of {energy.estimator_gufuncs.keys()}"
-            )
+        estimator_check(estimator, energy.estimator_gufuncs)
         return energy.estimator_gufuncs[estimator](obs, fct)
-
-    return energy.es(obs, fct, estimator=estimator, backend=backend)
+    else:
+        return energy.es(obs, fct, estimator=estimator, backend=backend)
 
 
 def twes_ensemble(
@@ -122,7 +120,7 @@ def twes_ensemble(
     twes_ensemble : array_like
         The computed Threshold-Weighted Energy Score.
     """
-    obs, fct = map(v_func, (obs, fct))
+    obs, fct = mv_weighted_score_chain(obs, fct, v_func)
     return es_ensemble(
         obs, fct, m_axis=m_axis, v_axis=v_axis, estimator=estimator, backend=backend
     )
@@ -175,17 +173,12 @@ def owes_ensemble(
     owes_ensemble : array_like
         The computed Outcome-Weighted Energy Score.
     """
-    B = backends.active if backend is None else backends[backend]
-
     obs, fct = multivariate_array_check(obs, fct, m_axis, v_axis, backend=backend)
-
-    fct_weights = B.apply_along_axis(w_func, fct, -1)
-    obs_weights = B.apply_along_axis(w_func, obs, -1)
-
-    if B.name == "numba":
-        return energy.estimator_gufuncs["ownrg"](obs, fct, obs_weights, fct_weights)
-
-    return energy.owes(obs, fct, obs_weights, fct_weights, backend=backend)
+    obs_w, fct_w = mv_weighted_score_weights(obs, fct, w_func=w_func, backend=backend)
+    if backend == "numba":
+        return energy.estimator_gufuncs["ownrg"](obs, fct, obs_w, fct_w)
+    else:
+        return energy.owes(obs, fct, obs_w, fct_w, backend=backend)
 
 
 def vres_ensemble(
@@ -236,14 +229,9 @@ def vres_ensemble(
     vres_ensemble : array_like
         The computed Vertically Re-scaled Energy Score.
     """
-    B = backends.active if backend is None else backends[backend]
-
     obs, fct = multivariate_array_check(obs, fct, m_axis, v_axis, backend=backend)
-
-    fct_weights = B.apply_along_axis(w_func, fct, -1)
-    obs_weights = B.apply_along_axis(w_func, obs, -1)
-
+    obs_w, fct_w = mv_weighted_score_weights(obs, fct, w_func=w_func, backend=backend)
     if backend == "numba":
-        return energy.estimator_gufuncs["vrnrg"](obs, fct, obs_weights, fct_weights)
-
-    return energy.vres(obs, fct, obs_weights, fct_weights, backend=backend)
+        return energy.estimator_gufuncs["vrnrg"](obs, fct, obs_w, fct_w)
+    else:
+        return energy.vres(obs, fct, obs_w, fct_w, backend=backend)
diff --git a/scoringrules/_error_spread.py b/scoringrules/_error_spread.py
index 4d4566d..a975c25 100644
--- a/scoringrules/_error_spread.py
+++ b/scoringrules/_error_spread.py
@@ -1,15 +1,15 @@
 import typing as tp
 
-from scoringrules.backend import backends
 from scoringrules.core import error_spread
+from scoringrules.core.utils import univariate_array_check
 
 if tp.TYPE_CHECKING:
     from scoringrules.core.typing import Array, ArrayLike, Backend
 
 
 def error_spread_score(
-    observations: "ArrayLike",
-    forecasts: "Array",
+    obs: "ArrayLike",
+    fct: "Array",
     m_axis: int = -1,
     *,
     backend: "Backend" = None,
@@ -18,9 +18,9 @@ def error_spread_score(
 
     Parameters
     ----------
-    observations: ArrayLike
+    obs: ArrayLike
         The observed values.
-    forecasts: Array
+    fct: Array
         The predicted forecast ensemble, where the ensemble dimension is by default
         represented by the last axis.
     m_axis: int
@@ -33,13 +33,8 @@ def error_spread_score(
     - Array
         An array of error spread scores for each ensemble forecast, which should be averaged to get meaningful values.
     """
-    B = backends.active if backend is None else backends[backend]
-    observations, forecasts = map(B.asarray, (observations, forecasts))
-
-    if m_axis != -1:
-        forecasts = B.moveaxis(forecasts, m_axis, -1)
-
-    if B.name == "numba":
-        return error_spread._ess_gufunc(observations, forecasts)
-
-    return error_spread.ess(observations, forecasts, backend=backend)
+    obs, fct = univariate_array_check(obs, fct, m_axis, backend=backend)
+    if backend == "numba":
+        return error_spread._ess_gufunc(obs, fct)
+    else:
+        return error_spread.ess(obs, fct, backend=backend)
diff --git a/scoringrules/_interval.py b/scoringrules/_interval.py
index d6d7492..893514d 100644
--- a/scoringrules/_interval.py
+++ b/scoringrules/_interval.py
@@ -177,5 +177,5 @@ def weighted_interval_score(
 
     if B.name == "numba":
         return interval._weighted_interval_score_gufunc(*args)
-
-    return interval.weighted_interval_score(*args, backend=backend)
+    else:
+        return interval.weighted_interval_score(*args, backend=backend)
diff --git a/scoringrules/_kernels.py b/scoringrules/_kernels.py
index bae2dbb..7e9ca38 100644
--- a/scoringrules/_kernels.py
+++ b/scoringrules/_kernels.py
@@ -1,8 +1,14 @@
 import typing as tp
 
-from scoringrules.backend import backends
 from scoringrules.core import kernels
-from scoringrules.core.utils import multivariate_array_check
+from scoringrules.core.utils import (
+    univariate_array_check,
+    multivariate_array_check,
+    estimator_check,
+    uv_weighted_score_weights,
+    uv_weighted_score_chain,
+    mv_weighted_score_weights,
+)
 
 if tp.TYPE_CHECKING:
     from scoringrules.core.typing import Array, ArrayLike, Backend
@@ -56,22 +62,12 @@ def gksuv_ensemble(
     >>> import scoringrules as sr
     >>> sr.gks_ensemble(obs, pred)
     """
-    B = backends.active if backend is None else backends[backend]
-    obs, fct = map(B.asarray, (obs, fct))
-
-    if m_axis != -1:
-        fct = B.moveaxis(fct, m_axis, -1)
-
+    obs, fct = univariate_array_check(obs, fct, m_axis, backend=backend)
     if backend == "numba":
-        if estimator not in kernels.estimator_gufuncs:
-            raise ValueError(
-                f"{estimator} is not a valid estimator. "
-                f"Must be one of {kernels.estimator_gufuncs.keys()}"
-            )
-        else:
-            return kernels.estimator_gufuncs[estimator](obs, fct)
-
-    return kernels.ensemble_uv(obs, fct, estimator, backend=backend)
+        estimator_check(estimator, kernels.estimator_gufuncs_uv)
+        return kernels.estimator_gufuncs_uv[estimator](obs, fct)
+    else:
+        return kernels.ensemble_uv(obs, fct, estimator=estimator, backend=backend)
 
 
 def twgksuv_ensemble(
@@ -139,14 +135,7 @@ def twgksuv_ensemble(
     >>>
     >>> sr.twgksuv_ensemble(obs, pred, v_func=v_func)
     """
-    if v_func is None:
-        B = backends.active if backend is None else backends[backend]
-        a, b, obs, fct = map(B.asarray, (a, b, obs, fct))
-
-        def v_func(x):
-            return B.minimum(B.maximum(x, a), b)
-
-    obs, fct = map(v_func, (obs, fct))
+    obs, fct = uv_weighted_score_chain(obs, fct, a, b, v_func, backend=backend)
     return gksuv_ensemble(
         obs,
         fct,
@@ -219,24 +208,12 @@ def owgksuv_ensemble(
     >>>
     >>> sr.owgksuv_ensemble(obs, pred, w_func=w_func)
     """
-    B = backends.active if backend is None else backends[backend]
-    obs, fct = map(B.asarray, (obs, fct))
-
-    if m_axis != -1:
-        fct = B.moveaxis(fct, m_axis, -1)
-
-    if w_func is None:
-
-        def w_func(x):
-            return ((a <= x) & (x <= b)) * 1.0
-
-    obs_weights, fct_weights = map(w_func, (obs, fct))
-    obs_weights, fct_weights = map(B.asarray, (obs_weights, fct_weights))
-
+    obs, fct = univariate_array_check(obs, fct, m_axis, backend=backend)
+    obs_w, fct_w = uv_weighted_score_weights(obs, fct, a, b, w_func, backend=backend)
     if backend == "numba":
-        return kernels.estimator_gufuncs["ow"](obs, fct, obs_weights, fct_weights)
-
-    return kernels.ow_ensemble_uv(obs, fct, obs_weights, fct_weights, backend=backend)
+        return kernels.estimator_gufuncs_uv["ow"](obs, fct, obs_w, fct_w)
+    else:
+        return kernels.ow_ensemble_uv(obs, fct, obs_w, fct_w, backend=backend)
 
 
 def vrgksuv_ensemble(
@@ -301,24 +278,12 @@ def vrgksuv_ensemble(
     >>>
     >>> sr.vrgksuv_ensemble(obs, pred, w_func=w_func)
     """
-    B = backends.active if backend is None else backends[backend]
-    obs, fct = map(B.asarray, (obs, fct))
-
-    if m_axis != -1:
-        fct = B.moveaxis(fct, m_axis, -1)
-
-    if w_func is None:
-
-        def w_func(x):
-            return ((a <= x) & (x <= b)) * 1.0
-
-    obs_weights, fct_weights = map(w_func, (obs, fct))
-    obs_weights, fct_weights = map(B.asarray, (obs_weights, fct_weights))
-
+    obs, fct = univariate_array_check(obs, fct, m_axis, backend=backend)
+    obs_w, fct_w = uv_weighted_score_weights(obs, fct, a, b, w_func, backend=backend)
     if backend == "numba":
-        return kernels.estimator_gufuncs["vr"](obs, fct, obs_weights, fct_weights)
-
-    return kernels.vr_ensemble_uv(obs, fct, obs_weights, fct_weights, backend=backend)
+        return kernels.estimator_gufuncs_uv["vr"](obs, fct, obs_w, fct_w)
+    else:
+        return kernels.vr_ensemble_uv(obs, fct, obs_w, fct_w, backend=backend)
 
 
 def gksmv_ensemble(
@@ -371,19 +336,12 @@ def gksmv_ensemble(
     score : array_like
         The GKS between the forecast ensemble and obs.
     """
-    backend = backend if backend is not None else backends._active
     obs, fct = multivariate_array_check(obs, fct, m_axis, v_axis, backend=backend)
-
     if backend == "numba":
-        if estimator not in kernels.estimator_gufuncs_mv:
-            raise ValueError(
-                f"{estimator} is not a valid estimator. "
-                f"Must be one of {kernels.estimator_gufuncs_mv.keys()}"
-            )
-        else:
-            return kernels.estimator_gufuncs_mv[estimator](obs, fct)
-
-    return kernels.ensemble_mv(obs, fct, estimator, backend=backend)
+        estimator_check(estimator, kernels.estimator_gufuncs_mv)
+        return kernels.estimator_gufuncs_mv[estimator](obs, fct)
+    else:
+        return kernels.ensemble_mv(obs, fct, estimator=estimator, backend=backend)
 
 
 def twgksmv_ensemble(
@@ -393,6 +351,7 @@ def twgksmv_ensemble(
     m_axis: int = -2,
     v_axis: int = -1,
     *,
+    estimator: str = "nrg",
     backend: "Backend" = None,
 ) -> "Array":
     r"""Compute the Threshold-Weighted Gaussian Kernel Score (twGKS) for a finite multivariate ensemble.
@@ -425,6 +384,8 @@ def twgksmv_ensemble(
         The axis corresponding to the ensemble dimension. Defaults to -2.
     v_axis : int or tuple of ints
         The axis corresponding to the variables dimension. Defaults to -1.
+    estimator : str
+        Indicates the estimator to be used.
     backend : str
         The name of the backend used for computations. Defaults to 'numba' if available, else 'numpy'.
 
@@ -434,7 +395,9 @@ def twgksmv_ensemble(
         The computed Threshold-Weighted Gaussian Kernel Score.
     """
     obs, fct = map(v_func, (obs, fct))
-    return gksmv_ensemble(obs, fct, m_axis=m_axis, v_axis=v_axis, backend=backend)
+    return gksmv_ensemble(
+        obs, fct, m_axis=m_axis, v_axis=v_axis, estimator=estimator, backend=backend
+    )
 
 
 def owgksmv_ensemble(
@@ -499,17 +462,12 @@ def owgksmv_ensemble(
     score : array_like
         The computed Outcome-Weighted GKS.
     """
-    B = backends.active if backend is None else backends[backend]
-
     obs, fct = multivariate_array_check(obs, fct, m_axis, v_axis, backend=backend)
-
-    fct_weights = B.apply_along_axis(w_func, fct, -1)
-    obs_weights = B.apply_along_axis(w_func, obs, -1)
-
-    if B.name == "numba":
-        return kernels.estimator_gufuncs_mv["ow"](obs, fct, obs_weights, fct_weights)
-
-    return kernels.ow_ensemble_mv(obs, fct, obs_weights, fct_weights, backend=backend)
+    obs_w, fct_w = mv_weighted_score_weights(obs, fct, w_func=w_func, backend=backend)
+    if backend == "numba":
+        return kernels.estimator_gufuncs_mv["ow"](obs, fct, obs_w, fct_w)
+    else:
+        return kernels.ow_ensemble_mv(obs, fct, obs_w, fct_w, backend=backend)
 
 
 def vrgksmv_ensemble(
@@ -560,17 +518,12 @@ def vrgksmv_ensemble(
     score : array_like
         The computed Vertically Re-scaled Gaussian Kernel Score.
     """
-    B = backends.active if backend is None else backends[backend]
-
     obs, fct = multivariate_array_check(obs, fct, m_axis, v_axis, backend=backend)
-
-    fct_weights = B.apply_along_axis(w_func, fct, -1)
-    obs_weights = B.apply_along_axis(w_func, obs, -1)
-
-    if B.name == "numba":
-        return kernels.estimator_gufuncs_mv["vr"](obs, fct, obs_weights, fct_weights)
-
-    return kernels.vr_ensemble_mv(obs, fct, obs_weights, fct_weights, backend=backend)
+    obs_w, fct_w = mv_weighted_score_weights(obs, fct, w_func=w_func, backend=backend)
+    if backend == "numba":
+        return kernels.estimator_gufuncs_mv["vr"](obs, fct, obs_w, fct_w)
+    else:
+        return kernels.vr_ensemble_mv(obs, fct, obs_w, fct_w, backend=backend)
 
 
 __all__ = [
diff --git a/scoringrules/_logs.py b/scoringrules/_logs.py
index d153d83..85c7694 100644
--- a/scoringrules/_logs.py
+++ b/scoringrules/_logs.py
@@ -2,6 +2,7 @@
 
 from scoringrules.backend import backends
 from scoringrules.core import logarithmic
+from scoringrules.core.utils import univariate_array_check
 
 if tp.TYPE_CHECKING:
     from scoringrules.core.typing import Array, ArrayLike, Backend
@@ -50,12 +51,9 @@ def logs_ensemble(
     >>> sr.logs_ensemble(obs, pred)
     """
     B = backends.active if backend is None else backends[backend]
-    obs, fct = map(B.asarray, (obs, fct))
+    obs, fct = univariate_array_check(obs, fct, m_axis, backend=backend)
 
-    if m_axis != -1:
-        fct = B.moveaxis(fct, m_axis, -1)
-
-    M = fct.shape[-1]
+    M = fct.shape[-1]  # number of ensemble members
 
     # Silverman's rule of thumb for estimating the bandwidth parameter
     if bw is None:
@@ -66,7 +64,7 @@ def logs_ensemble(
         bw = 1.06 * B.minimum(sigmahat, iqr / 1.34) * (M ** (-1 / 5))
     bw = B.stack([bw] * M, axis=-1)
 
-    w = B.zeros(fct.shape) + 1 / M
+    w = B.ones(fct.shape) / M
 
     return logarithmic.mixnorm(obs, fct, bw, w, backend=backend)
 
@@ -125,12 +123,9 @@ def clogs_ensemble(
     >>> sr.clogs_ensemble(obs, pred, -1.0, 1.0)
     """
     B = backends.active if backend is None else backends[backend]
-    fct = B.asarray(fct)
-
-    if m_axis != -1:
-        fct = B.moveaxis(fct, m_axis, -1)
+    obs, fct = univariate_array_check(obs, fct, m_axis, backend=backend)
 
-    M = fct.shape[-1]
+    M = fct.shape[-1]  # number of ensemble members
 
     # Silverman's rule of thumb for estimating the bandwidth parameter
     if bw is None:
@@ -707,7 +702,7 @@ def logs_mixnorm(
 
     if w is None:
         M: int = m.shape[mc_axis]
-        w = B.zeros(m.shape) + 1 / M
+        w = B.ones(m.shape) / M
     else:
         w = B.asarray(w)
 
diff --git a/scoringrules/_variogram.py b/scoringrules/_variogram.py
index 4ff690c..6028f65 100644
--- a/scoringrules/_variogram.py
+++ b/scoringrules/_variogram.py
@@ -2,7 +2,12 @@
 
 from scoringrules.backend import backends
 from scoringrules.core import variogram
-from scoringrules.core.utils import multivariate_array_check
+from scoringrules.core.utils import (
+    multivariate_array_check,
+    estimator_check,
+    mv_weighted_score_chain,
+    mv_weighted_score_weights,
+)
 
 if tp.TYPE_CHECKING:
     from scoringrules.core.typing import Array, Backend
@@ -11,6 +16,7 @@
 def vs_ensemble(
     obs: "Array",
     fct: "Array",
+    w: "Array" = None,
     m_axis: int = -2,
     v_axis: int = -1,
     *,
@@ -23,7 +29,7 @@ def vs_ensemble(
     For a :math:`D`-variate ensemble the Variogram Score [1]_ is defined as:
 
     .. math::
-        \text{VS}_{p}(F_{ens}, \mathbf{y})= \sum_{i=1}^{d} \sum_{j=1}^{d}
+        \text{VS}_{p}(F_{ens}, \mathbf{y})= \sum_{i=1}^{d} \sum_{j=1}^{d} w_{i,j}
         \left( \frac{1}{M} \sum_{m=1}^{M} | x_{m,i} - x_{m,j} |^{p} - | y_{i} - y_{j} |^{p} \right)^{2},
 
     where :math:`\mathbf{X}` and :math:`\mathbf{X'}` are independently sampled ensembles from from :math:`F`.
@@ -36,12 +42,15 @@ def vs_ensemble(
     fct : array_like
         The predicted forecast ensemble, where the ensemble dimension is by default
         represented by the second last axis and the variables dimension by the last axis.
-    p : float
-        The order of the Variogram Score. Typical values are 0.5, 1.0 or 2.0. Defaults to 1.0.
+    w : array_like
+        The weights assigned to pairs of dimensions. Must be of shape (..., D, D), where
+        D is the dimension, so that the weights are in the last two axes.
     m_axis : int
         The axis corresponding to the ensemble dimension. Defaults to -2.
     v_axis : int
         The axis corresponding to the variables dimension. Defaults to -1.
+    p : float
+        The order of the Variogram Score. Typical values are 0.5, 1.0 or 2.0. Defaults to 0.5.
     estimator : str
         The variogram score estimator to be used.
     backend: str
@@ -68,21 +77,27 @@ def vs_ensemble(
     >>> sr.vs_ensemble(obs, fct)
     array([ 8.65630139,  6.84693866, 19.52993307])
     """
+    B = backends.active if backend is None else backends[backend]
     obs, fct = multivariate_array_check(obs, fct, m_axis, v_axis, backend=backend)
 
-    if backend == "numba":
-        if estimator == "nrg":
-            return variogram._variogram_score_nrg_gufunc(obs, fct, p)
-        elif estimator == "fair":
-            return variogram._variogram_score_fair_gufunc(obs, fct, p)
+    if w is None:
+        D = fct.shape[-1]
+        w = B.ones(obs.shape + (D,))
+    else:
+        w = B.asarray(w)
 
-    return variogram.vs(obs, fct, p, estimator=estimator, backend=backend)
+    if backend == "numba":
+        estimator_check(estimator, variogram.estimator_gufuncs)
+        return variogram.estimator_gufuncs[estimator](obs, fct, w, p)
+    else:
+        return variogram.vs(obs, fct, w, p, estimator=estimator, backend=backend)
 
 
 def twvs_ensemble(
     obs: "Array",
     fct: "Array",
     v_func: tp.Callable,
+    w: "Array" = None,
     m_axis: int = -2,
     v_axis: int = -1,
     *,
@@ -108,14 +123,17 @@ def twvs_ensemble(
     fct : array_like
         The predicted forecast ensemble, where the ensemble dimension is by default
         represented by the second last axis and the variables dimension by the last axis.
-    p : float
-        The order of the Variogram Score. Typical values are 0.5, 1.0 or 2.0. Defaults to 1.0.
+    w : array_like
+        The weights assigned to pairs of dimensions. Must be of shape (..., D, D), where
+        D is the dimension, so that the weights are in the last two axes.
     v_func : callable, array_like -> array_like
         Chaining function used to emphasise particular outcomes.
     m_axis : int
         The axis corresponding to the ensemble dimension. Defaults to -2.
     v_axis : int
         The axis corresponding to the variables dimension. Defaults to -1.
+    p : float
+        The order of the Variogram Score. Typical values are 0.5, 1.0 or 2.0. Defaults to 0.5.
     estimator : str
         The variogram score estimator to be used.
     backend : str
@@ -143,9 +161,9 @@ def twvs_ensemble(
     >>> sr.twvs_ensemble(obs, fct, lambda x: np.maximum(x, -0.2))
     array([5.94996894, 4.72029765, 6.08947229])
     """
-    obs, fct = map(v_func, (obs, fct))
+    obs, fct = mv_weighted_score_chain(obs, fct, v_func)
     return vs_ensemble(
-        obs, fct, m_axis, v_axis, p=p, estimator=estimator, backend=backend
+        obs, fct, w, m_axis, v_axis, p=p, estimator=estimator, backend=backend
     )
 
 
@@ -153,6 +171,7 @@ def owvs_ensemble(
     obs: "Array",
     fct: "Array",
     w_func: tp.Callable,
+    w: "Array" = None,
     m_axis: int = -2,
     v_axis: int = -1,
     *,
@@ -182,14 +201,17 @@ def owvs_ensemble(
     fct : array_like
         The predicted forecast ensemble, where the ensemble dimension is by default
         represented by the second last axis and the variables dimension by the last axis.
-    p : float
-        The order of the Variogram Score. Typical values are 0.5, 1.0 or 2.0. Defaults to 1.0.
     w_func : callable, array_like -> array_like
         Weight function used to emphasise particular outcomes.
+    w : array_like
+        The weights assigned to pairs of dimensions. Must be of shape (..., D, D), where
+        D is the dimension, so that the weights are in the last two axes.
     m_axis : int
         The axis corresponding to the ensemble dimension. Defaults to -2.
     v_axis : int
         The axis corresponding to the variables dimension. Defaults to -1.
+    p : float
+        The order of the Variogram Score. Typical values are 0.5, 1.0 or 2.0. Defaults to 0.5.
     backend : str
         The name of the backend used for computations. Defaults to 'numba' if available, else 'numpy'.
 
@@ -209,24 +231,24 @@ def owvs_ensemble(
     array([ 9.86816636,  6.75532522, 19.59353723])
     """
     B = backends.active if backend is None else backends[backend]
-
     obs, fct = multivariate_array_check(obs, fct, m_axis, v_axis, backend=backend)
+    obs_w, fct_w = mv_weighted_score_weights(obs, fct, w_func=w_func, backend=backend)
 
-    obs_weights = B.apply_along_axis(w_func, obs, -1)
-    fct_weights = B.apply_along_axis(w_func, fct, -1)
+    if w is None:
+        D = fct.shape[-1]
+        w = B.ones(obs.shape + (D,))
 
     if backend == "numba":
-        return variogram._owvariogram_score_gufunc(
-            obs, fct, p, obs_weights, fct_weights
-        )
-
-    return variogram.owvs(obs, fct, obs_weights, fct_weights, p=p, backend=backend)
+        return variogram.estimator_gufuncs["ownrg"](obs, fct, w, obs_w, fct_w, p)
+    else:
+        return variogram.owvs(obs, fct, w, obs_w, fct_w, p=p, backend=backend)
 
 
 def vrvs_ensemble(
     obs: "Array",
     fct: "Array",
     w_func: tp.Callable,
+    w: "Array" = None,
     m_axis: int = -2,
     v_axis: int = -1,
     *,
@@ -258,14 +280,17 @@ def vrvs_ensemble(
     fct : array_like
         The predicted forecast ensemble, where the ensemble dimension is by default
         represented by the second last axis and the variables dimension by the last axis.
-    p : float
-        The order of the Variogram Score. Typical values are 0.5, 1.0 or 2.0. Defaults to 1.0.
     w_func : callable, array_like -> array_like
         Weight function used to emphasise particular outcomes.
+    w : array_like
+        The weights assigned to pairs of dimensions. Must be of shape (..., D, D), where
+        D is the dimension, so that the weights are in the last two axes.
     m_axis : int
         The axis corresponding to the ensemble dimension. Defaults to -2.
     v_axis : int
         The axis corresponding to the variables dimension. Defaults to -1.
+    p : float
+        The order of the Variogram Score. Typical values are 0.5, 1.0 or 2.0. Defaults to 0.5.
     backend : str
         The name of the backend used for computations. Defaults to 'numba' if available, else 'numpy'.
 
@@ -285,15 +310,14 @@ def vrvs_ensemble(
     array([46.48256493, 57.90759816, 92.37153472])
     """
     B = backends.active if backend is None else backends[backend]
-
     obs, fct = multivariate_array_check(obs, fct, m_axis, v_axis, backend=backend)
+    obs_w, fct_w = mv_weighted_score_weights(obs, fct, w_func=w_func, backend=backend)
 
-    obs_weights = B.apply_along_axis(w_func, obs, -1)
-    fct_weights = B.apply_along_axis(w_func, fct, -1)
+    if w is None:
+        D = fct.shape[-1]
+        w = B.ones(obs.shape + (D,))
 
     if backend == "numba":
-        return variogram._vrvariogram_score_gufunc(
-            obs, fct, p, obs_weights, fct_weights
-        )
-
-    return variogram.vrvs(obs, fct, obs_weights, fct_weights, p=p, backend=backend)
+        return variogram.estimator_gufuncs["vrnrg"](obs, fct, w, obs_w, fct_w, p)
+    else:
+        return variogram.vrvs(obs, fct, w, obs_w, fct_w, p=p, backend=backend)
diff --git a/scoringrules/backend/base.py b/scoringrules/backend/base.py
index 5e0e710..1f99917 100644
--- a/scoringrules/backend/base.py
+++ b/scoringrules/backend/base.py
@@ -160,6 +160,15 @@ def zeros(
     ) -> "Array":
         """Return a new array having a specified ``shape`` and filled with zeros."""
 
+    @abc.abstractmethod
+    def ones(
+        self,
+        shape: int | tuple[int, ...],
+        *,
+        dtype: Dtype | None = None,
+    ) -> "Array":
+        """Return a new array having a specified ``shape`` and filled with ones."""
+
     @abc.abstractmethod
     def abs(self, x: "Array", /) -> "Array":
         """Calculate the absolute value for each element ``x_i`` of the input array ``x``."""
@@ -202,6 +211,17 @@ def any(
     ) -> "Array":
         """Test whether any input array element evaluates to ``True`` along a specified axis."""
 
+    @abc.abstractmethod
+    def argsort(
+        self,
+        x: "Array",
+        /,
+        *,
+        axis: int = -1,
+        descending: bool = False,
+    ) -> "Array":
+        """Return the indices of a sorted copy of an input array ``x``."""
+
     @abc.abstractmethod
     def sort(
         self,
@@ -299,6 +319,10 @@ def size(self, x: "Array") -> int:
     def indices(self, x: "Array") -> int:
         """Return an array representing the indices of a grid."""
 
+    @abc.abstractmethod
+    def gather(self, x: "Array", ind: "Array", axis: int) -> "Array":
+        """Reorder an array ``x`` depending on a template ``ind`` across an axis ``axis``."""
+
     @abc.abstractmethod
     def roll(self, x: "Array", shift: int = 1, axis: int = -1) -> int:
         """Roll elements of an array along a given axis."""
diff --git a/scoringrules/backend/jax.py b/scoringrules/backend/jax.py
index 279781a..9cf4074 100644
--- a/scoringrules/backend/jax.py
+++ b/scoringrules/backend/jax.py
@@ -149,6 +149,11 @@ def zeros(
     ) -> "Array":
         return jnp.zeros(shape, dtype=dtype)
 
+    def ones(
+        self, shape: int | tuple[int, ...], *, dtype: Dtype | None = None
+    ) -> "Array":
+        return jnp.ones(shape, dtype=dtype)
+
     def abs(self, x: "Array") -> "Array":
         return jnp.abs(x)
 
@@ -184,6 +189,16 @@ def all(
     ) -> "Array":
         return jnp.all(x, axis=axis, keepdims=keepdims)
 
+    def argsort(
+        self,
+        x: "Array",
+        /,
+        *,
+        axis: int = -1,
+        descending: bool = False,
+    ) -> "Array":
+        return jnp.argsort(x, axis=axis, descending=descending)
+
     def sort(
         self,
         x: "Array",
@@ -271,6 +286,9 @@ def size(self, x: "Array") -> int:
     def indices(self, dimensions: tuple) -> "Array":
         return jnp.indices(dimensions)
 
+    def gather(self, x: "Array", ind: "Array", axis: int) -> "Array":
+        return jnp.take_along_axis(x, ind, axis=axis)
+
     def roll(self, x: "Array", shift: int = 1, axis: int = -1) -> "Array":
         return jnp.roll(x, shift=shift, axis=axis)
 
diff --git a/scoringrules/backend/numpy.py b/scoringrules/backend/numpy.py
index 03f78d5..6cbfc08 100644
--- a/scoringrules/backend/numpy.py
+++ b/scoringrules/backend/numpy.py
@@ -153,6 +153,11 @@ def zeros(
     ) -> "NDArray":
         return np.zeros(shape, dtype=dtype)
 
+    def ones(
+        self, shape: int | tuple[int, ...], *, dtype: Dtype | None = None
+    ) -> "NDArray":
+        return np.ones(shape, dtype=dtype)
+
     def abs(self, x: "NDArray") -> "NDArray":
         return np.abs(x)
 
@@ -188,6 +193,17 @@ def all(
     ) -> "NDArray":
         return np.all(x, axis=axis, keepdims=keepdims)
 
+    def argsort(
+        self,
+        x: "NDArray",
+        /,
+        *,
+        axis: int = -1,
+        descending: bool = False,
+    ) -> "NDArray":
+        x = -x if descending else x
+        return np.argsort(x, axis=axis)
+
     def sort(
         self,
         x: "NDArray",
@@ -267,6 +283,9 @@ def size(self, x: "NDArray") -> int:
     def indices(self, dimensions: tuple) -> "NDArray":
         return np.indices(dimensions)
 
+    def gather(self, x: "NDArray", ind: "NDArray", axis: int) -> "NDArray":
+        return np.take_along_axis(x, ind, axis=axis)
+
     def roll(self, x: "NDArray", shift: int = 1, axis: int = -1) -> "NDArray":
         return np.roll(x, shift=shift, axis=axis)
 
diff --git a/scoringrules/backend/tensorflow.py b/scoringrules/backend/tensorflow.py
index 70060c3..9d58712 100644
--- a/scoringrules/backend/tensorflow.py
+++ b/scoringrules/backend/tensorflow.py
@@ -168,6 +168,16 @@ def zeros(
             dtype = DTYPE
         return tf.zeros(shape, dtype=dtype)
 
+    def ones(
+        self,
+        shape: int | tuple[int, ...],
+        *,
+        dtype: Dtype | None = None,
+    ) -> "Tensor":
+        if dtype is None:
+            dtype = DTYPE
+        return tf.ones(shape, dtype=dtype)
+
     def abs(self, x: "Tensor") -> "Tensor":
         return tf.math.abs(x)
 
@@ -216,6 +226,17 @@ def all(
             dtype = DTYPE
         return tf.cast(tf.math.reduce_all(x, axis=axis, keepdims=keepdims), dtype=dtype)
 
+    def argsort(
+        self,
+        x: "Tensor",
+        /,
+        *,
+        axis: int = -1,
+        descending: bool = False,
+    ) -> "Tensor":
+        direction = "DESCENDING" if descending else "ASCENDING"
+        return tf.argsort(x, axis=axis, direction=direction)
+
     def sort(
         self,
         x: "Tensor",
@@ -306,6 +327,10 @@ def indices(self, dimensions: tuple) -> "Tensor":
         indices = tf.stack(index_grids)
         return indices
 
+    def gather(self, x: "Tensor", ind: "Tensor", axis: int) -> "Tensor":
+        d = len(x.shape)
+        return tf.gather(x, ind, axis=axis, batch_dims=d)
+
     def roll(self, x: "Tensor", shift: int = 1, axis: int = -1) -> "Tensor":
         return tf.roll(x, shift=shift, axis=axis)
 
diff --git a/scoringrules/backend/torch.py b/scoringrules/backend/torch.py
index 1ef9831..1eb141c 100644
--- a/scoringrules/backend/torch.py
+++ b/scoringrules/backend/torch.py
@@ -157,6 +157,14 @@ def zeros(
     ) -> "Tensor":
         return torch.zeros(shape, dtype=dtype)
 
+    def ones(
+        self,
+        shape: int | tuple[int, ...],
+        *,
+        dtype: Dtype | None = None,
+    ) -> "Tensor":
+        return torch.ones(shape, dtype=dtype)
+
     def abs(self, x: "Tensor") -> "Tensor":
         return torch.abs(x)
 
@@ -195,6 +203,17 @@ def all(
         else:
             return torch.all(x, dim=axis, keepdim=keepdims)
 
+    def argsort(
+        self,
+        x: "Tensor",
+        /,
+        *,
+        axis: int = -1,
+        descending: bool = False,
+        stable: bool = True,
+    ) -> "Tensor":
+        return torch.argsort(x, stable=stable, dim=axis, descending=descending)
+
     def sort(
         self,
         x: "Tensor",
@@ -289,6 +308,9 @@ def indices(self, dimensions: tuple) -> "Tensor":
             indices = torch.stack(index_grids)
         return indices
 
+    def gather(self, x: "Tensor", ind: "Tensor", axis: int) -> "Tensor":
+        return torch.gather(x, index=ind, dim=axis)
+
     def roll(self, x: "Tensor", shift: int = 1, axis: int = -1) -> "Tensor":
         return torch.roll(x, shifts=shift, dims=axis)
 
diff --git a/scoringrules/core/brier.py b/scoringrules/core/brier.py
index e66fbae..18f05ed 100644
--- a/scoringrules/core/brier.py
+++ b/scoringrules/core/brier.py
@@ -12,16 +12,7 @@ def brier_score(
     obs: "ArrayLike", fct: "ArrayLike", backend: "Backend" = None
 ) -> "Array":
     """Compute the Brier Score for predicted probabilities of events."""
-    B = backends.active if backend is None else backends[backend]
-    obs, fct = map(B.asarray, (obs, fct))
-
-    if B.any(fct < 0.0) or B.any(fct > 1.0 + EPSILON):
-        raise ValueError("Forecasted probabilities must be within 0 and 1.")
-
-    if not set(v.item() for v in B.unique_values(obs)) <= {0, 1}:
-        raise ValueError("Observations must be 0, 1, or NaN.")
-
-    return B.asarray((fct - obs) ** 2)
+    return (fct - obs) ** 2
 
 
 def rps_score(
@@ -31,30 +22,12 @@ def rps_score(
 ) -> "Array":
     """Compute the Ranked Probability Score for ordinal categorical forecasts."""
     B = backends.active if backend is None else backends[backend]
-    obs, fct = map(B.asarray, (obs, fct))
-
-    if B.any(fct < 0.0) or B.any(fct > 1.0 + EPSILON):
-        raise ValueError("Forecast probabilities must be between 0 and 1.")
-
-    categories = B.arange(1, fct.shape[-1] + 1)
-    obs_one_hot = B.where(B.expand_dims(obs, -1) == categories, 1, 0)
-
-    return B.sum(
-        (B.cumsum(fct, axis=-1) - B.cumsum(obs_one_hot, axis=-1)) ** 2, axis=-1
-    )
+    return B.sum((B.cumsum(fct, axis=-1) - B.cumsum(obs, axis=-1)) ** 2, axis=-1)
 
 
 def log_score(obs: "ArrayLike", fct: "ArrayLike", backend: "Backend" = None) -> "Array":
     """Compute the Log Score for predicted probabilities of binary events."""
     B = backends.active if backend is None else backends[backend]
-    obs, fct = map(B.asarray, (obs, fct))
-
-    if B.any(fct < 0.0) or B.any(fct > 1.0 + EPSILON):
-        raise ValueError("Forecasted probabilities must be within 0 and 1.")
-
-    if not set(v.item() for v in B.unique_values(obs)) <= {0, 1}:
-        raise ValueError("Observations must be 0, 1, or NaN.")
-
     return B.asarray(-B.log(B.abs(fct + obs - 1.0)))
 
 
@@ -65,15 +38,7 @@ def rls_score(
 ) -> "Array":
     """Compute the Ranked Logarithmic Score for ordinal categorical forecasts."""
     B = backends.active if backend is None else backends[backend]
-    obs, fct = map(B.asarray, (obs, fct))
-
-    if B.any(fct < 0.0) or B.any(fct > 1.0 + EPSILON):
-        raise ValueError("Forecast probabilities must be between 0 and 1.")
-
-    categories = B.arange(1, fct.shape[-1] + 1)
-    obs_one_hot = B.where(B.expand_dims(obs, -1) == categories, 1, 0)
-
     return B.sum(
-        -B.log(B.abs(B.cumsum(fct, axis=-1) + B.cumsum(obs_one_hot, axis=-1) - 1.0)),
+        -B.log(B.abs(B.cumsum(fct, axis=-1) + B.cumsum(obs, axis=-1) - 1.0)),
         axis=-1,
     )
diff --git a/scoringrules/core/crps/__init__.py b/scoringrules/core/crps/__init__.py
index c54b179..ddeae70 100644
--- a/scoringrules/core/crps/__init__.py
+++ b/scoringrules/core/crps/__init__.py
@@ -1,4 +1,4 @@
-from ._approx import ensemble, ow_ensemble, quantile_pinball, vr_ensemble
+from ._approx import ensemble, ow_ensemble, vr_ensemble, quantile_pinball
 from ._closed import (
     beta,
     binomial,
diff --git a/scoringrules/core/crps/_gufuncs.py b/scoringrules/core/crps/_gufuncs.py
index 1e2ed1d..e3ecc2b 100644
--- a/scoringrules/core/crps/_gufuncs.py
+++ b/scoringrules/core/crps/_gufuncs.py
@@ -1,7 +1,5 @@
-import math
-
 import numpy as np
-from numba import guvectorize, njit, vectorize
+from numba import guvectorize
 
 from scoringrules.core.utils import lazy_gufunc_wrapper_uv
 
@@ -250,57 +248,12 @@ def _vrcrps_ensemble_nrg_gufunc(
     out[0] = e_1 / M - 0.5 * e_2 / (M**2) + (wabs_x - wabs_y) * (wbar - ow)
 
 
-@njit(["float32(float32)", "float64(float64)"])
-def _norm_cdf(x: float) -> float:
-    """Cumulative distribution function for the standard normal distribution."""
-    out: float = (1.0 + math.erf(x / math.sqrt(2.0))) / 2.0
-    return out
-
-
-@njit(["float32(float32)", "float64(float64)"])
-def _norm_pdf(x: float) -> float:
-    """Probability density function for the standard normal distribution."""
-    out: float = (1 / math.sqrt(2 * math.pi)) * math.exp(-(x**2) / 2)
-    return out
-
-
-@njit(["float32(float32)", "float64(float64)"])
-def _logis_cdf(x: float) -> float:
-    """Cumulative distribution function for the standard logistic distribution."""
-    out: float = 1.0 / (1.0 + math.exp(-x))
-    return out
-
-
-@vectorize(["float32(float32, float32, float32)", "float64(float64, float64, float64)"])
-def _crps_normal_ufunc(obs: float, mu: float, sigma: float) -> float:
-    ω = (obs - mu) / sigma
-    out: float = sigma * (ω * (2 * _norm_cdf(ω) - 1) + 2 * _norm_pdf(ω) - INV_SQRT_PI)
-    return out
-
-
-@vectorize(["float32(float32, float32, float32)", "float64(float64, float64, float64)"])
-def _crps_lognormal_ufunc(obs: float, mulog: float, sigmalog: float) -> float:
-    ω = (np.log(obs) - mulog) / sigmalog
-    ex = 2 * np.exp(mulog + sigmalog**2 / 2)
-    out: float = obs * (2 * _norm_cdf(ω) - 1) - ex * (
-        _norm_cdf(ω - sigmalog) + _norm_cdf(sigmalog / np.sqrt(2)) - 1
-    )
-    return out
-
-
-@vectorize(["float32(float32, float32, float32)", "float64(float64, float64, float64)"])
-def _crps_logistic_ufunc(obs: float, mu: float, sigma: float) -> float:
-    ω = (obs - mu) / sigma
-    out: float = sigma * (ω - 2 * np.log(_logis_cdf(ω)) - 1)
-    return out
-
-
 estimator_gufuncs = {
     "akr_circperm": lazy_gufunc_wrapper_uv(_crps_ensemble_akr_circperm_gufunc),
     "akr": lazy_gufunc_wrapper_uv(_crps_ensemble_akr_gufunc),
     "fair": _crps_ensemble_fair_gufunc,
     "int": lazy_gufunc_wrapper_uv(_crps_ensemble_int_gufunc),
-    "nrg": lazy_gufunc_wrapper_uv(_crps_ensemble_nrg_gufunc),
+    "nrg": _crps_ensemble_nrg_gufunc,
     "pwm": lazy_gufunc_wrapper_uv(_crps_ensemble_pwm_gufunc),
     "qd": _crps_ensemble_qd_gufunc,
     "ownrg": lazy_gufunc_wrapper_uv(_owcrps_ensemble_nrg_gufunc),
@@ -315,8 +268,5 @@ def _crps_logistic_ufunc(obs: float, mu: float, sigma: float) -> float:
     "_crps_ensemble_nrg_gufunc",
     "_crps_ensemble_pwm_gufunc",
     "_crps_ensemble_qd_gufunc",
-    "_crps_normal_ufunc",
-    "_crps_lognormal_ufunc",
-    "_crps_logistic_ufunc",
     "quantile_pinball_gufunc",
 ]
diff --git a/scoringrules/core/energy/_gufuncs.py b/scoringrules/core/energy/_gufuncs.py
index bd38fba..2359760 100644
--- a/scoringrules/core/energy/_gufuncs.py
+++ b/scoringrules/core/energy/_gufuncs.py
@@ -19,7 +19,6 @@ def _energy_score_nrg_gufunc(
 ):
     """Compute the Energy Score for a finite ensemble."""
     M = fct.shape[0]
-
     e_1 = 0.0
     e_2 = 0.0
     for i in range(M):
@@ -45,7 +44,6 @@ def _energy_score_fair_gufunc(
 ):
     """Compute the fair Energy Score for a finite ensemble."""
     M = fct.shape[0]
-
     e_1 = 0.0
     e_2 = 0.0
     for i in range(M):
@@ -56,12 +54,10 @@ def _energy_score_fair_gufunc(
     out[0] = e_1 / M - 0.5 / (M * (M - 1)) * e_2
 
 
-@lazy_gufunc_wrapper_mv
 @guvectorize("(d),(m,d)->()")
 def _energy_score_akr_gufunc(obs: np.ndarray, fct: np.ndarray, out: np.ndarray):
     """Compute the Energy Score for a finite ensemble using the approximate kernel representation."""
     M = fct.shape[0]
-
     e_1 = 0.0
     e_2 = 0.0
     for i in range(M):
@@ -71,14 +67,12 @@ def _energy_score_akr_gufunc(obs: np.ndarray, fct: np.ndarray, out: np.ndarray):
     out[0] = e_1 / M - 0.5 * 1 / M * e_2
 
 
-@lazy_gufunc_wrapper_mv
 @guvectorize("(d),(m,d)->()")
 def _energy_score_akr_circperm_gufunc(
     obs: np.ndarray, fct: np.ndarray, out: np.ndarray
 ):
     """Compute the Energy Score for a finite ensemble using the AKR with cyclic permutation."""
     M = fct.shape[0]
-
     e_1 = 0.0
     e_2 = 0.0
     for i in range(M):
@@ -89,7 +83,6 @@ def _energy_score_akr_circperm_gufunc(
     out[0] = e_1 / M - 0.5 * 1 / M * e_2
 
 
-@lazy_gufunc_wrapper_mv
 @guvectorize("(d),(m,d),(),(m)->()")
 def _owenergy_score_gufunc(
     obs: np.ndarray,
@@ -100,7 +93,6 @@ def _owenergy_score_gufunc(
 ):
     """Compute the Outcome-Weighted Energy Score for a finite ensemble."""
     M = fct.shape[0]
-
     e_1 = 0.0
     e_2 = 0.0
     for i in range(M):
@@ -113,7 +105,6 @@ def _owenergy_score_gufunc(
     out[0] = e_1 / (M * wbar) - 0.5 * e_2 / (M**2 * wbar**2)
 
 
-@lazy_gufunc_wrapper_mv
 @guvectorize("(d),(m,d),(),(m)->()")
 def _vrenergy_score_gufunc(
     obs: np.ndarray,
@@ -124,7 +115,6 @@ def _vrenergy_score_gufunc(
 ):
     """Compute the Vertically Re-scaled Energy Score for a finite ensemble."""
     M = fct.shape[0]
-
     e_1 = 0.0
     e_2 = 0.0
     wabs_x = 0.0
@@ -142,12 +132,12 @@ def _vrenergy_score_gufunc(
 
 
 estimator_gufuncs = {
-    "akr_circperm": _energy_score_akr_circperm_gufunc,
-    "akr": _energy_score_akr_gufunc,
-    "fair": _energy_score_fair_gufunc,
-    "nrg": _energy_score_nrg_gufunc,
-    "ownrg": _owenergy_score_gufunc,
-    "vrnrg": _vrenergy_score_gufunc,
+    "akr_circperm": lazy_gufunc_wrapper_mv(_energy_score_akr_circperm_gufunc),
+    "akr": lazy_gufunc_wrapper_mv(_energy_score_akr_gufunc),
+    "fair": lazy_gufunc_wrapper_mv(_energy_score_fair_gufunc),
+    "nrg": lazy_gufunc_wrapper_mv(_energy_score_nrg_gufunc),
+    "ownrg": lazy_gufunc_wrapper_mv(_owenergy_score_gufunc),
+    "vrnrg": lazy_gufunc_wrapper_mv(_vrenergy_score_gufunc),
 }
 
 __all__ = [
diff --git a/scoringrules/core/energy/_score.py b/scoringrules/core/energy/_score.py
index ad9f63c..1dd6ec5 100644
--- a/scoringrules/core/energy/_score.py
+++ b/scoringrules/core/energy/_score.py
@@ -98,18 +98,17 @@ def owes_ensemble(
 ) -> "Array":
     """Compute the outcome-weighted energy score based on a finite ensemble."""
     B = backends.active if backend is None else backends[backend]
-    M = fct.shape[-2]
-    wbar = B.sum(fw, -1) / M
+    wbar = B.mean(fw, axis=-1)
 
     err_norm = B.norm(fct - B.expand_dims(obs, -2), -1)  # (... M)
-    E_1 = B.sum(err_norm * fw * B.expand_dims(ow, -1), -1) / (M * wbar)  # (...)
+    E_1 = B.mean(err_norm * fw * B.expand_dims(ow, -1), axis=-1) / wbar  # (...)
 
     spread_norm = B.norm(
         B.expand_dims(fct, -2) - B.expand_dims(fct, -3), -1
     )  # (... M M)
     fw_prod = B.expand_dims(fw, -1) * B.expand_dims(fw, -2)  # (... M M)
     spread_norm *= fw_prod * B.expand_dims(ow, (-2, -1))  # (... M M)
-    E_2 = B.sum(spread_norm, (-2, -1)) / (M**2 * wbar**2)  # (...)
+    E_2 = B.mean(spread_norm, axis=(-2, -1)) / (wbar**2)  # (...)
 
     return E_1 - 0.5 * E_2
 
@@ -123,19 +122,18 @@ def vres_ensemble(
 ) -> "Array":
     """Compute the vertically re-scaled energy score based on a finite ensemble."""
     B = backends.active if backend is None else backends[backend]
-    M, D = fct.shape[-2:]
-    wbar = B.sum(fw, -1) / M
+    wbar = B.mean(fw, axis=-1)
 
     err_norm = B.norm(fct - B.expand_dims(obs, -2), -1)  # (... M)
     err_norm *= fw * B.expand_dims(ow, -1)  # (... M)
-    E_1 = B.sum(err_norm, -1) / M  # (...)
+    E_1 = B.mean(err_norm, axis=-1)  # (...)
 
     spread_norm = B.norm(
         B.expand_dims(fct, -2) - B.expand_dims(fct, -3), -1
     )  # (... M M)
     fw_prod = B.expand_dims(fw, -2) * B.expand_dims(fw, -1)  # (... M M)
-    E_2 = B.sum(spread_norm * fw_prod, (-2, -1)) / (M**2)  # (...)
+    E_2 = B.mean(spread_norm * fw_prod, axis=(-2, -1))  # (...)
 
-    rhobar = B.sum(B.norm(fct, -1) * fw, -1) / M  # (...)
+    rhobar = B.mean(B.norm(fct, -1) * fw, axis=-1)  # (...)
     E_3 = (rhobar - B.norm(obs, -1) * ow) * (wbar - ow)  # (...)
     return E_1 - 0.5 * E_2 + E_3
diff --git a/scoringrules/core/kernels/__init__.py b/scoringrules/core/kernels/__init__.py
index d0377b7..2d02ba9 100644
--- a/scoringrules/core/kernels/__init__.py
+++ b/scoringrules/core/kernels/__init__.py
@@ -8,15 +8,12 @@
 )
 
 try:
-    from ._gufuncs import estimator_gufuncs
-except ImportError:
-    estimator_gufuncs = None
-
-try:
-    from ._gufuncs import estimator_gufuncs_mv
+    from ._gufuncs import estimator_gufuncs_uv, estimator_gufuncs_mv
 except ImportError:
+    estimator_gufuncs_uv = None
     estimator_gufuncs_mv = None
 
+
 __all__ = [
     "ensemble_uv",
     "ow_ensemble_uv",
diff --git a/scoringrules/core/kernels/_gufuncs.py b/scoringrules/core/kernels/_gufuncs.py
index 90fe8fa..e07fa84 100644
--- a/scoringrules/core/kernels/_gufuncs.py
+++ b/scoringrules/core/kernels/_gufuncs.py
@@ -20,7 +20,6 @@ def _gauss_kern_mv(x1: float, x2: float) -> float:
     return out
 
 
-@lazy_gufunc_wrapper_uv
 @guvectorize("(),(n)->()")
 def _ks_ensemble_uv_nrg_gufunc(obs: np.ndarray, fct: np.ndarray, out: np.ndarray):
     """Standard version of the kernel score."""
@@ -33,16 +32,15 @@ def _ks_ensemble_uv_nrg_gufunc(obs: np.ndarray, fct: np.ndarray, out: np.ndarray
     e_1 = 0
     e_2 = 0
 
-    for x_i in fct:
-        e_1 += _gauss_kern_uv(x_i, obs)
-        for x_j in fct:
-            e_2 += _gauss_kern_uv(x_i, x_j)
+    for i in range(M):
+        e_1 += _gauss_kern_uv(fct[i], obs)
+        for j in range(M):
+            e_2 += _gauss_kern_uv(fct[i], fct[j])
     e_3 = _gauss_kern_uv(obs, obs)
 
-    out[0] = -(e_1 / M - 0.5 * e_2 / (M**2) - 0.5 * e_3)
+    out[0] = -((e_1 / M) - 0.5 * (e_2 / (M**2)) - 0.5 * e_3)
 
 
-@lazy_gufunc_wrapper_uv
 @guvectorize("(),(n)->()")
 def _ks_ensemble_uv_fair_gufunc(obs: np.ndarray, fct: np.ndarray, out: np.ndarray):
     """Fair version of the kernel score."""
@@ -58,13 +56,12 @@ def _ks_ensemble_uv_fair_gufunc(obs: np.ndarray, fct: np.ndarray, out: np.ndarra
     for i in range(M):
         e_1 += _gauss_kern_uv(fct[i], obs)
         for j in range(i + 1, M):  # important to start from i + 1 and not i
-            e_2 += 2 * _gauss_kern_uv(fct[j], fct[i])
+            e_2 += _gauss_kern_uv(fct[j], fct[i])
     e_3 = _gauss_kern_uv(obs, obs)
 
-    out[0] = -(e_1 / M - 0.5 * e_2 / (M * (M - 1)) - 0.5 * e_3)
+    out[0] = -((e_1 / M) - e_2 / (M * (M - 1)) - 0.5 * e_3)
 
 
-@lazy_gufunc_wrapper_uv
 @guvectorize("(),(n)->()")
 def _ks_ensemble_uv_akr_gufunc(obs: np.ndarray, fct: np.ndarray, out: np.ndarray):
     """Approximate kernel representation estimator of the kernel score."""
@@ -84,7 +81,6 @@ def _ks_ensemble_uv_akr_gufunc(obs: np.ndarray, fct: np.ndarray, out: np.ndarray
     out[0] = -(e_1 / M - 0.5 * e_2 / M - 0.5 * e_3)
 
 
-@lazy_gufunc_wrapper_uv
 @guvectorize("(),(n)->()")
 def _ks_ensemble_uv_akr_circperm_gufunc(
     obs: np.ndarray, fct: np.ndarray, out: np.ndarray
@@ -107,7 +103,6 @@ def _ks_ensemble_uv_akr_circperm_gufunc(
     out[0] = -(e_1 / M - 0.5 * e_2 / M - 0.5 * e_3)
 
 
-@lazy_gufunc_wrapper_uv
 @guvectorize("(),(n),(),(n)->()")
 def _owks_ensemble_uv_gufunc(
     obs: np.ndarray,
@@ -134,10 +129,9 @@ def _owks_ensemble_uv_gufunc(
 
     wbar = np.mean(fw)
 
-    out[0] = -(e_1 / (M * wbar) - 0.5 * e_2 / ((M * wbar) ** 2) - 0.5 * e_3)
+    out[0] = -(e_1 / (M * wbar) - 0.5 * e_2 / (M**2 * wbar**2) - 0.5 * e_3)
 
 
-@lazy_gufunc_wrapper_uv
 @guvectorize("(),(n),(),(n)->()")
 def _vrks_ensemble_uv_gufunc(
     obs: np.ndarray,
@@ -162,10 +156,9 @@ def _vrks_ensemble_uv_gufunc(
             e_2 += _gauss_kern_uv(x_i, x_j) * fw[i] * fw[j]
     e_3 = _gauss_kern_uv(obs, obs) * ow * ow
 
-    out[0] = -(e_1 / M - 0.5 * e_2 / (M**2) - 0.5 * e_3)
+    out[0] = -((e_1 / M) - 0.5 * (e_2 / (M**2)) - 0.5 * e_3)
 
 
-@lazy_gufunc_wrapper_mv
 @guvectorize("(d),(m,d)->()")
 def _ks_ensemble_mv_nrg_gufunc(obs: np.ndarray, fct: np.ndarray, out: np.ndarray):
     """Standard version of the multivariate kernel score."""
@@ -179,10 +172,9 @@ def _ks_ensemble_mv_nrg_gufunc(obs: np.ndarray, fct: np.ndarray, out: np.ndarray
             e_2 += float(_gauss_kern_mv(fct[i], fct[j]))
     e_3 = float(_gauss_kern_mv(obs, obs))
 
-    out[0] = -(e_1 / M - 0.5 * e_2 / (M**2) - 0.5 * e_3)
+    out[0] = -((e_1 / M) - 0.5 * (e_2 / (M**2)) - 0.5 * e_3)
 
 
-@lazy_gufunc_wrapper_mv
 @guvectorize("(d),(m,d)->()")
 def _ks_ensemble_mv_fair_gufunc(obs: np.ndarray, fct: np.ndarray, out: np.ndarray):
     """Fair version of the multivariate kernel score."""
@@ -199,7 +191,6 @@ def _ks_ensemble_mv_fair_gufunc(obs: np.ndarray, fct: np.ndarray, out: np.ndarra
     out[0] = -(e_1 / M - e_2 / (M * (M - 1)) - 0.5 * e_3)
 
 
-@lazy_gufunc_wrapper_mv
 @guvectorize("(d),(m,d)->()")
 def _ks_ensemble_mv_akr_gufunc(obs: np.ndarray, fct: np.ndarray, out: np.ndarray):
     """Approximate kernel representation estimator of the multivariate kernel score."""
@@ -215,7 +206,6 @@ def _ks_ensemble_mv_akr_gufunc(obs: np.ndarray, fct: np.ndarray, out: np.ndarray
     out[0] = -(e_1 / M - 0.5 * e_2 / M - 0.5 * e_3)
 
 
-@lazy_gufunc_wrapper_mv
 @guvectorize("(d),(m,d)->()")
 def _ks_ensemble_mv_akr_circperm_gufunc(
     obs: np.ndarray, fct: np.ndarray, out: np.ndarray
@@ -234,7 +224,6 @@ def _ks_ensemble_mv_akr_circperm_gufunc(
     out[0] = -(e_1 / M - 0.5 * e_2 / M - 0.5 * e_3)
 
 
-@lazy_gufunc_wrapper_mv
 @guvectorize("(d),(m,d),(),(m)->()")
 def _owks_ensemble_mv_gufunc(
     obs: np.ndarray,
@@ -259,7 +248,6 @@ def _owks_ensemble_mv_gufunc(
     out[0] = -(e_1 / (M * wbar) - 0.5 * e_2 / (M**2 * wbar**2) - 0.5 * e_3)
 
 
-@lazy_gufunc_wrapper_mv
 @guvectorize("(d),(m,d),(),(m)->()")
 def _vrks_ensemble_mv_gufunc(
     obs: np.ndarray,
@@ -279,25 +267,25 @@ def _vrks_ensemble_mv_gufunc(
             e_2 += float(_gauss_kern_mv(fct[i], fct[j]) * fw[i] * fw[j])
     e_3 = float(_gauss_kern_mv(obs, obs)) * ow * ow
 
-    out[0] = -(e_1 / M - 0.5 * e_2 / (M**2) - 0.5 * e_3)
+    out[0] = -((e_1 / M) - 0.5 * (e_2 / (M**2)) - 0.5 * e_3)
 
 
-estimator_gufuncs = {
-    "akr": _ks_ensemble_uv_akr_gufunc,
-    "akr_circperm": _ks_ensemble_uv_akr_circperm_gufunc,
-    "fair": _ks_ensemble_uv_fair_gufunc,
-    "nrg": _ks_ensemble_uv_nrg_gufunc,
-    "ow": _owks_ensemble_uv_gufunc,
-    "vr": _vrks_ensemble_uv_gufunc,
+estimator_gufuncs_uv = {
+    "akr": lazy_gufunc_wrapper_uv(_ks_ensemble_uv_akr_gufunc),
+    "akr_circperm": lazy_gufunc_wrapper_uv(_ks_ensemble_uv_akr_circperm_gufunc),
+    "fair": lazy_gufunc_wrapper_uv(_ks_ensemble_uv_fair_gufunc),
+    "nrg": lazy_gufunc_wrapper_uv(_ks_ensemble_uv_nrg_gufunc),
+    "ow": lazy_gufunc_wrapper_uv(_owks_ensemble_uv_gufunc),
+    "vr": lazy_gufunc_wrapper_uv(_vrks_ensemble_uv_gufunc),
 }
 
 estimator_gufuncs_mv = {
-    "akr": _ks_ensemble_mv_akr_gufunc,
-    "akr_circperm": _ks_ensemble_mv_akr_circperm_gufunc,
-    "fair": _ks_ensemble_mv_fair_gufunc,
-    "nrg": _ks_ensemble_mv_nrg_gufunc,
-    "ow": _owks_ensemble_mv_gufunc,
-    "vr": _vrks_ensemble_mv_gufunc,
+    "akr": lazy_gufunc_wrapper_mv(_ks_ensemble_mv_akr_gufunc),
+    "akr_circperm": lazy_gufunc_wrapper_mv(_ks_ensemble_mv_akr_circperm_gufunc),
+    "fair": lazy_gufunc_wrapper_mv(_ks_ensemble_mv_fair_gufunc),
+    "nrg": lazy_gufunc_wrapper_mv(_ks_ensemble_mv_nrg_gufunc),
+    "ow": lazy_gufunc_wrapper_mv(_owks_ensemble_mv_gufunc),
+    "vr": lazy_gufunc_wrapper_mv(_vrks_ensemble_mv_gufunc),
 }
 
 __all__ = [
diff --git a/scoringrules/core/utils.py b/scoringrules/core/utils.py
index 6aa1c13..47450f9 100644
--- a/scoringrules/core/utils.py
+++ b/scoringrules/core/utils.py
@@ -2,11 +2,14 @@
 
 from scoringrules.backend import backends
 
-_V_AXIS = -1
-_M_AXIS = -2
+_V_AXIS = -1  # variable index for multivariate forecasts
+_M_AXIS = -2  # ensemble index for multivariate forecasts
+_M_AXIS_UV = -1  # ensemble index for univariate forecasts
+_K_AXIS = -1  # categories index for categorical forecasts
+EPSILON = 1e-5
 
 
-def _multivariate_shape_compatibility(obs, fct, m_axis) -> None:
+def _shape_compatibility_check(obs, fct, m_axis) -> None:
     f_shape = fct.shape
     o_shape = obs.shape
     o_shape_broadcast = o_shape[:m_axis] + (f_shape[m_axis],) + o_shape[m_axis:]
@@ -16,6 +19,53 @@ def _multivariate_shape_compatibility(obs, fct, m_axis) -> None:
         )
 
 
+def binary_array_check(obs, fct, backend=None):
+    """Check and adapt the shapes of binary forecasts and observations arrays."""
+    B = backends.active if backend is None else backends[backend]
+    obs, fct = map(B.asarray, (obs, fct))
+    if B.any(fct < 0.0) or B.any(fct > 1.0 + EPSILON):
+        raise ValueError("Forecasted probabilities must be within 0 and 1.")
+    if not set(v.item() for v in B.unique_values(obs)) <= {0, 1}:
+        raise ValueError("Observations must be 0, 1, or NaN.")
+    return obs, fct
+
+
+def categorical_array_check(obs, fct, k_axis, onehot, backend=None):
+    """Check and adapt the shapes of categorical forecasts and observations arrays."""
+    B = backends.active if backend is None else backends[backend]
+    obs, fct = map(B.asarray, (obs, fct))
+    if B.any(fct < 0.0) or B.any(fct > 1.0 + EPSILON):
+        raise ValueError("Forecasted probabilities must be within 0 and 1.")
+    if k_axis != -1:
+        fct = B.moveaxis(fct, k_axis, _K_AXIS)
+        if onehot:
+            obs = B.moveaxis(obs, k_axis, _K_AXIS)
+    if onehot:
+        if obs.shape != fct.shape:
+            raise ValueError(
+                f"Forecasts shape {fct.shape} and observations shape {obs.shape} are not compatible for broadcasting!"
+            )
+    else:
+        if obs.shape != fct.shape[:-1]:
+            raise ValueError(
+                f"Forecasts shape {fct.shape} and observations shape {obs.shape} are not compatible for broadcasting!"
+            )
+        categories = B.arange(1, fct.shape[-1] + 1)
+        obs = B.where(B.expand_dims(obs, -1) == categories, 1, 0)
+    return obs, fct
+
+
+def univariate_array_check(obs, fct, m_axis, backend=None):
+    """Check and adapt the shapes of univariate forecasts and observations arrays."""
+    B = backends.active if backend is None else backends[backend]
+    obs, fct = map(B.asarray, (obs, fct))
+    m_axis = m_axis if m_axis >= 0 else fct.ndim + m_axis
+    _shape_compatibility_check(obs, fct, m_axis)
+    if m_axis != _M_AXIS_UV:
+        fct = B.moveaxis(fct, m_axis, _M_AXIS_UV)
+    return obs, fct
+
+
 def _multivariate_shape_permute(obs, fct, m_axis, v_axis, backend=None):
     B = backends.active if backend is None else backends[backend]
     v_axis_obs = v_axis - 1 if m_axis < v_axis else v_axis
@@ -30,10 +80,81 @@ def multivariate_array_check(obs, fct, m_axis, v_axis, backend=None):
     obs, fct = map(B.asarray, (obs, fct))
     m_axis = m_axis if m_axis >= 0 else fct.ndim + m_axis
     v_axis = v_axis if v_axis >= 0 else fct.ndim + v_axis
-    _multivariate_shape_compatibility(obs, fct, m_axis)
+    _shape_compatibility_check(obs, fct, m_axis)
     return _multivariate_shape_permute(obs, fct, m_axis, v_axis, backend=backend)
 
 
+def estimator_check(estimator, gufuncs):
+    """Check that the estimator is valid for the given score."""
+    if estimator not in gufuncs:
+        raise ValueError(
+            f"{estimator} is not a valid estimator. " f"Must be one of {gufuncs.keys()}"
+        )
+
+
+def uv_weighted_score_weights(
+    obs, fct, a=float("-inf"), b=float("inf"), w_func=None, backend=None
+):
+    """Calculate weights for a weighted scoring rule corresponding to the observations
+    and ensemble members given a weight function."""
+    B = backends.active if backend is None else backends[backend]
+    if w_func is None:
+
+        def w_func(x):
+            return ((a <= x) & (x <= b)) * 1.0
+
+    obs_w, fct_w = map(w_func, (obs, fct))
+    obs_w, fct_w = map(B.asarray, (obs_w, fct_w))
+    if B.any(obs_w < 0) or B.any(fct_w < 0):
+        raise ValueError("`w_func` returns negative values")
+    return obs_w, fct_w
+
+
+def uv_weighted_score_chain(
+    obs, fct, a=float("-inf"), b=float("inf"), v_func=None, backend=None
+):
+    """Calculate transformed observations and ensemble members for threshold-weighted scoring rules given a chaining function."""
+    B = backends.active if backend is None else backends[backend]
+    if v_func is None:
+        a, b, obs, fct = map(B.asarray, (a, b, obs, fct))
+
+        def v_func(x):
+            return B.minimum(B.maximum(x, a), b)
+
+    obs, fct = map(v_func, (obs, fct))
+    return obs, fct
+
+
+def mv_weighted_score_weights(obs, fct, w_func=None, backend=None):
+    """Calculate weights for a weighted scoring rule corresponding to the observations
+    and ensemble members given a weight function."""
+    B = backends.active if backend is None else backends[backend]
+    obs_w = B.apply_along_axis(w_func, obs, -1)
+    fct_w = B.apply_along_axis(w_func, fct, -1)
+    if backend == "torch":
+        obs_w = obs_w * 1.0
+        fct_w = fct_w * 1.0
+    if B.any(obs_w < 0) or B.any(fct_w < 0):
+        raise ValueError("`w_func` returns negative values")
+    return obs_w, fct_w
+
+
+def mv_weighted_score_chain(obs, fct, v_func=None):
+    """Calculate transformed observations and ensemble members for threshold-weighted scoring rules given a chaining function."""
+    obs, fct = map(v_func, (obs, fct))
+    return obs, fct
+
+
+def univariate_sort_ens(fct, estimator=None, sorted_ensemble=False, backend=None):
+    """Sort ensemble members if not already sorted."""
+    B = backends.active if backend is None else backends[backend]
+    sort_ensemble = not sorted_ensemble and estimator in ["qd", "pwm", "int"]
+    if sort_ensemble:
+        ind = B.argsort(fct, axis=-1)
+        fct = B.gather(fct, ind, axis=-1)
+    return fct
+
+
 def lazy_gufunc_wrapper_uv(func):
     """
     Wrapper for lazy/dynamic generalized universal functions so
diff --git a/scoringrules/core/variogram/__init__.py b/scoringrules/core/variogram/__init__.py
index 26a37d4..f49f7fe 100644
--- a/scoringrules/core/variogram/__init__.py
+++ b/scoringrules/core/variogram/__init__.py
@@ -1,15 +1,7 @@
 try:
-    from ._gufuncs import (
-        _variogram_score_nrg_gufunc,
-        _variogram_score_fair_gufunc,
-        _owvariogram_score_gufunc,
-        _vrvariogram_score_gufunc,
-    )
+    from ._gufuncs import estimator_gufuncs
 except ImportError:
-    _variogram_score_nrg_gufunc = None
-    _variogram_score_fair_gufunc = None
-    _owvariogram_score_gufunc = None
-    _vrvariogram_score_gufunc = None
+    estimator_gufuncs = None
 
 from ._score import owvs_ensemble as owvs
 from ._score import vs_ensemble as vs
@@ -19,8 +11,5 @@
     "vs",
     "owvs",
     "vrvs",
-    "_variogram_score_nrg_gufunc",
-    "_variogram_score_fair_gufunc",
-    "_owvariogram_score_gufunc",
-    "_vrvariogram_score_gufunc",
+    "estimator_gufuncs",
 ]
diff --git a/scoringrules/core/variogram/_gufuncs.py b/scoringrules/core/variogram/_gufuncs.py
index 1402d28..5eb0484 100644
--- a/scoringrules/core/variogram/_gufuncs.py
+++ b/scoringrules/core/variogram/_gufuncs.py
@@ -4,14 +4,14 @@
 from scoringrules.core.utils import lazy_gufunc_wrapper_mv
 
 
-@guvectorize(
-    [
-        "void(float32[:], float32[:,:], float32, float32[:])",
-        "void(float64[:], float64[:,:], float64, float64[:])",
-    ],
-    "(d),(m,d),()->()",
-)
-def _variogram_score_nrg_gufunc(obs, fct, p, out):
+@guvectorize("(d),(m,d),(d,d),()->()")
+def _variogram_score_nrg_gufunc(
+    obs: np.ndarray,
+    fct: np.ndarray,
+    w: np.ndarray,
+    p: np.ndarray,
+    out: np.ndarray,
+):
     M = fct.shape[-2]
     D = fct.shape[-1]
     out[0] = 0.0
@@ -22,12 +22,17 @@ def _variogram_score_nrg_gufunc(obs, fct, p, out):
                 vfct += abs(fct[m, i] - fct[m, j]) ** p
             vfct = vfct / M
             vobs = abs(obs[i] - obs[j]) ** p
-            out[0] += (vobs - vfct) ** 2
+            out[0] += w[i, j] * (vobs - vfct) ** 2
 
 
-@lazy_gufunc_wrapper_mv
-@guvectorize("(d),(m,d),()->()")
-def _variogram_score_fair_gufunc(obs, fct, p, out):
+@guvectorize("(d),(m,d),(d,d),()->()")
+def _variogram_score_fair_gufunc(
+    obs: np.ndarray,
+    fct: np.ndarray,
+    w: np.ndarray,
+    p: np.ndarray,
+    out: np.ndarray,
+):
     M = fct.shape[-2]
     D = fct.shape[-1]
 
@@ -38,20 +43,19 @@ def _variogram_score_fair_gufunc(obs, fct, p, out):
             for j in range(D):
                 rho1 = abs(fct[k, i] - fct[k, j]) ** p
                 rho2 = abs(obs[i] - obs[j]) ** p
-                e_1 += (rho1 - rho2) ** 2
+                e_1 += w[i, j] * (rho1 - rho2) ** 2
         for m in range(k + 1, M):
             for i in range(D):
                 for j in range(D):
                     rho1 = abs(fct[k, i] - fct[k, j]) ** p
                     rho2 = abs(fct[m, i] - fct[m, j]) ** p
-                    e_2 += 2 * ((rho1 - rho2) ** 2)
+                    e_2 += w[i, j] * 2 * ((rho1 - rho2) ** 2)
 
     out[0] = e_1 / M - 0.5 * e_2 / (M * (M - 1))
 
 
-@lazy_gufunc_wrapper_mv
-@guvectorize("(d),(m,d),(),(),(m)->()")
-def _owvariogram_score_gufunc(obs, fct, p, ow, fw, out):
+@guvectorize("(d),(m,d),(d,d),(),(m),()->()")
+def _owvariogram_score_gufunc(obs, fct, w, ow, fw, p, out):
     M = fct.shape[-2]
     D = fct.shape[-1]
 
@@ -62,22 +66,21 @@ def _owvariogram_score_gufunc(obs, fct, p, ow, fw, out):
             for j in range(D):
                 rho1 = abs(fct[k, i] - fct[k, j]) ** p
                 rho2 = abs(obs[i] - obs[j]) ** p
-                e_1 += (rho1 - rho2) ** 2 * fw[k] * ow
+                e_1 += w[i, j] * (rho1 - rho2) ** 2 * fw[k] * ow
         for m in range(k + 1, M):
             for i in range(D):
                 for j in range(D):
                     rho1 = abs(fct[k, i] - fct[k, j]) ** p
                     rho2 = abs(fct[m, i] - fct[m, j]) ** p
-                    e_2 += 2 * ((rho1 - rho2) ** 2) * fw[k] * fw[m] * ow
+                    e_2 += 2 * w[i, j] * ((rho1 - rho2) ** 2) * fw[k] * fw[m] * ow
 
     wbar = np.mean(fw)
 
     out[0] = e_1 / (M * wbar) - 0.5 * e_2 / (M**2 * wbar**2)
 
 
-@lazy_gufunc_wrapper_mv
-@guvectorize("(d),(m,d),(),(),(m)->()")
-def _vrvariogram_score_gufunc(obs, fct, p, ow, fw, out):
+@guvectorize("(d),(m,d),(d,d),(),(m),()->()")
+def _vrvariogram_score_gufunc(obs, fct, w, ow, fw, p, out):
     M = fct.shape[-2]
     D = fct.shape[-1]
 
@@ -89,14 +92,14 @@ def _vrvariogram_score_gufunc(obs, fct, p, ow, fw, out):
             for j in range(D):
                 rho1 = abs(fct[k, i] - fct[k, j]) ** p
                 rho2 = abs(obs[i] - obs[j]) ** p
-                e_1 += (rho1 - rho2) ** 2 * fw[k] * ow
-                e_3_x += (rho1) ** 2 * fw[k]
+                e_1 += w[i, j] * (rho1 - rho2) ** 2 * fw[k] * ow
+                e_3_x += w[i, j] * (rho1) ** 2 * fw[k]
         for m in range(k + 1, M):
             for i in range(D):
                 for j in range(D):
                     rho1 = abs(fct[k, i] - fct[k, j]) ** p
                     rho2 = abs(fct[m, i] - fct[m, j]) ** p
-                    e_2 += 2 * ((rho1 - rho2) ** 2) * fw[k] * fw[m]
+                    e_2 += 2 * w[i, j] * ((rho1 - rho2) ** 2) * fw[k] * fw[m]
 
     e_3_x *= 1 / M
     wbar = np.mean(fw)
@@ -104,6 +107,21 @@ def _vrvariogram_score_gufunc(obs, fct, p, ow, fw, out):
     for i in range(D):
         for j in range(D):
             rho1 = abs(obs[i] - obs[j]) ** p
-            e_3_y += (rho1) ** 2 * ow
+            e_3_y += w[i, j] * (rho1) ** 2 * ow
 
     out[0] = e_1 / M - 0.5 * e_2 / (M**2) + (e_3_x - e_3_y) * (wbar - ow)
+
+
+estimator_gufuncs = {
+    "fair": lazy_gufunc_wrapper_mv(_variogram_score_fair_gufunc),
+    "nrg": lazy_gufunc_wrapper_mv(_variogram_score_nrg_gufunc),
+    "ownrg": lazy_gufunc_wrapper_mv(_owvariogram_score_gufunc),
+    "vrnrg": lazy_gufunc_wrapper_mv(_vrvariogram_score_gufunc),
+}
+
+__all__ = [
+    "_variogram_score_fair_gufunc",
+    "_variogram_score_nrg_gufunc",
+    "_owvariogram_score_gufunc",
+    "_vrvariogram_score_gufunc",
+]
diff --git a/scoringrules/core/variogram/_score.py b/scoringrules/core/variogram/_score.py
index f6d091b..7ff36f4 100644
--- a/scoringrules/core/variogram/_score.py
+++ b/scoringrules/core/variogram/_score.py
@@ -9,6 +9,7 @@
 def vs_ensemble(
     obs: "Array",  # (... D)
     fct: "Array",  # (... M D)
+    w: "Array",  # (..., D D)
     p: float = 0.5,
     estimator: str = "nrg",
     backend: "Backend" = None,
@@ -23,21 +24,25 @@ def vs_ensemble(
     obs_diff = B.abs(B.expand_dims(obs, -2) - B.expand_dims(obs, -1)) ** p  # (... D D)
 
     if estimator == "nrg":
-        vfct = B.sum(fct_diff, axis=-3) / M  # (... D D)
-        out = B.sum((obs_diff - vfct) ** 2, axis=(-2, -1))  # (...)
+        fct_diff = B.sum(fct_diff, axis=-3) / M  # (... D D)
+        out = B.sum(w * (obs_diff - fct_diff) ** 2, axis=(-2, -1))  # (...)
 
     elif estimator == "fair":
         E_1 = (fct_diff - B.expand_dims(obs_diff, axis=-3)) ** 2  # (... M D D)
-        E_1 = B.sum(E_1, axis=(-2, -1))  # (... M)
+        E_1 = B.sum(B.expand_dims(w, axis=-3) * E_1, axis=(-2, -1))  # (... M)
         E_1 = B.sum(E_1, axis=-1) / M  # (...)
 
         E_2 = (
             B.expand_dims(fct_diff, -3) - B.expand_dims(fct_diff, -4)
         ) ** 2  # (... M M D D)
-        E_2 = B.sum(E_2, axis=(-2, -1))  # (... M M)
+        E_2 = B.sum(B.expand_dims(w, (-3, -4)) * E_2, axis=(-2, -1))  # (... M M)
         E_2 = B.sum(E_2, axis=(-2, -1)) / (M * (M - 1))  # (...)
 
         out = E_1 - 0.5 * E_2
+    else:
+        raise ValueError(
+            f"For the variogram score, {estimator} must be one of 'nrg', and 'fair'."
+        )
 
     return out
 
@@ -45,6 +50,7 @@ def vs_ensemble(
 def owvs_ensemble(
     obs: "Array",
     fct: "Array",
+    w: "Array",
     ow: "Array",
     fw: "Array",
     p: float = 0.5,
@@ -52,25 +58,34 @@ def owvs_ensemble(
 ) -> "Array":
     """Compute the Outcome-Weighted Variogram Score for a multivariate finite ensemble."""
     B = backends.active if backend is None else backends[backend]
-    M = fct.shape[-2]
-    wbar = B.sum(fw, -1) / M
+    wbar = B.mean(fw, axis=-1)
 
     fct_diff = (
         B.abs(B.expand_dims(fct, -2) - B.expand_dims(fct, -1)) ** p
     )  # (... M D D)
     obs_diff = B.abs(B.expand_dims(obs, -2) - B.expand_dims(obs, -1)) ** p  # (... D D)
 
-    vfct = B.sum(fct_diff * B.expand_dims(fw, (-2, -1)), axis=-3) / (
-        M * B.expand_dims(wbar, (-2, -1))
-    )  # (... D D)
-    out = B.sum(((obs_diff - vfct) ** 2), axis=(-2, -1)) * ow  # (...)
+    E_1 = (fct_diff - B.expand_dims(obs_diff, -3)) ** 2  # (... M D D)
+    E_1 = B.sum(B.expand_dims(w, -3) * E_1, axis=(-2, -1))  # (... M)
+    E_1 = B.mean(E_1 * fw * B.expand_dims(ow, -1), axis=-1) / wbar  # (...)
 
-    return out
+    fct_diff_spread = B.expand_dims(fct_diff, -3) - B.expand_dims(
+        fct_diff, -4
+    )  # (... M M D D)
+    fw_prod = B.expand_dims(fw, -2) * B.expand_dims(fw, -1)  # (... M M)
+    E_2 = B.sum(
+        B.expand_dims(w, (-3, -4)) * fct_diff_spread**2, axis=(-2, -1)
+    )  # (... M M)
+    E_2 *= fw_prod * B.expand_dims(ow, (-2, -1))  # (... M M)
+    E_2 = B.mean(E_2, axis=(-2, -1)) / (wbar**2)  # (...)
+
+    return E_1 - 0.5 * E_2
 
 
 def vrvs_ensemble(
     obs: "Array",
     fct: "Array",
+    w: "Array",
     ow: "Array",
     fw: "Array",
     p: float = 0.5,
@@ -78,7 +93,6 @@ def vrvs_ensemble(
 ) -> "Array":
     """Compute the Vertically Re-scaled Variogram Score for a multivariate finite ensemble."""
     B = backends.active if backend is None else backends[backend]
-    M: int = fct.shape[-2]
     wbar = B.mean(fw, axis=-1)
 
     fct_diff = (
@@ -87,20 +101,20 @@ def vrvs_ensemble(
     obs_diff = B.abs(B.expand_dims(obs, -2) - B.expand_dims(obs, -1)) ** p  # (... D D)
 
     E_1 = (fct_diff - B.expand_dims(obs_diff, axis=-3)) ** 2  # (... M D D)
-    E_1 = B.sum(E_1, axis=(-2, -1))  # (... M)
-    E_1 = B.sum(E_1 * fw * B.expand_dims(ow, axis=-1), axis=-1) / M  # (...)
+    E_1 = B.sum(B.expand_dims(w, -3) * E_1, axis=(-2, -1))  # (... M)
+    E_1 = B.mean(E_1 * fw * B.expand_dims(ow, axis=-1), axis=-1)  # (...)
 
     E_2 = (
         B.expand_dims(fct_diff, -3) - B.expand_dims(fct_diff, -4)
     ) ** 2  # (... M M D D)
-    E_2 = B.sum(E_2, axis=(-2, -1))  # (... M M)
+    E_2 = B.sum(B.expand_dims(w, (-3, -4)) * E_2, axis=(-2, -1))  # (... M M)
 
     fw_prod = B.expand_dims(fw, axis=-2) * B.expand_dims(fw, axis=-1)  # (... M M)
-    E_2 = B.sum(E_2 * fw_prod, axis=(-2, -1)) / (M**2)  # (...)
+    E_2 = B.mean(E_2 * fw_prod, axis=(-2, -1))  # (...)
 
-    E_3 = B.sum(fct_diff**2, axis=(-2, -1))  # (... M)
-    E_3 = B.sum(E_3 * fw, axis=-1) / M  # (...)
-    E_3 -= B.sum(obs_diff**2, axis=(-2, -1)) * ow  # (...)
+    E_3 = B.sum(B.expand_dims(w, -3) * fct_diff**2, axis=(-2, -1))  # (... M)
+    E_3 = B.mean(E_3 * fw, axis=-1)  # (...)
+    E_3 -= B.sum(w * obs_diff**2, axis=(-2, -1)) * ow  # (...)
     E_3 *= wbar - ow  # (...)
 
     return E_1 - 0.5 * E_2 + E_3
diff --git a/tests/test_kernels.py b/tests/test_kernels.py
index 1fa32e7..efd5b29 100644
--- a/tests/test_kernels.py
+++ b/tests/test_kernels.py
@@ -97,6 +97,11 @@ def test_gksmv(estimator, backend):
         [[9.8, 8.7, 11.9, 12.1, 13.4], [-24.8, -18.5, -29.9, -18.3, -21.0]]
     ).transpose()
 
+    # test exceptions
+    with pytest.raises(ValueError):
+        est = "undefined_estimator"
+        sr.gksmv_ensemble(obs, fct, estimator=est, backend=backend)
+
     if estimator == "nrg":
         res = sr.gksmv_ensemble(obs, fct, estimator=estimator, backend=backend)
         expected = 0.5868737
diff --git a/tests/test_variogram.py b/tests/test_variogram.py
index 5cf3985..d3366b3 100644
--- a/tests/test_variogram.py
+++ b/tests/test_variogram.py
@@ -39,7 +39,8 @@ def test_variogram_score_permuted_dims(estimator, backend):
         assert "jax" in res.__module__
 
 
-def test_variogram_score_correctness(backend):
+@pytest.mark.parametrize("estimator", ESTIMATORS)
+def test_variogram_score_correctness(estimator, backend):
     fct = np.array(
         [
             [0.79546742, 0.4777960, 0.2164079, 0.5409873],
@@ -48,14 +49,36 @@ def test_variogram_score_correctness(backend):
     ).T
     obs = np.array([0.2743836, 0.8146400])
 
-    res = sr.vs_ensemble(obs, fct, p=0.5, estimator="nrg", backend=backend)
-    np.testing.assert_allclose(res, 0.04114727, rtol=1e-5)
-
-    res = sr.vs_ensemble(obs, fct, p=0.5, estimator="fair", backend=backend)
-    np.testing.assert_allclose(res, 0.01407421, rtol=1e-5)
-
-    res = sr.vs_ensemble(obs, fct, p=1.0, estimator="nrg", backend=backend)
-    np.testing.assert_allclose(res, 0.04480374, rtol=1e-5)
-
-    res = sr.vs_ensemble(obs, fct, p=1.0, estimator="fair", backend=backend)
-    np.testing.assert_allclose(res, 0.004730382, rtol=1e-5)
+    res = sr.vs_ensemble(obs, fct, p=0.5, estimator=estimator, backend=backend)
+    if estimator == "nrg":
+        expected = 0.04114727
+        np.testing.assert_allclose(res, expected, rtol=1e-5)
+    elif estimator == "fair":
+        expected = 0.01407421
+        np.testing.assert_allclose(res, expected, rtol=1e-5)
+
+    res = sr.vs_ensemble(obs, fct, p=1.0, estimator=estimator, backend=backend)
+    if estimator == "nrg":
+        expected = 0.04480374
+        np.testing.assert_allclose(res, expected, rtol=1e-5)
+    elif estimator == "fair":
+        expected = 0.004730382
+        np.testing.assert_allclose(res, expected, rtol=1e-5)
+
+    # with and without dimension weights
+    w = np.ones((2, 2))
+    res_w = sr.vs_ensemble(obs, fct, w=w, p=1.0, estimator=estimator, backend=backend)
+    np.testing.assert_allclose(res_w, res, rtol=1e-5)
+
+    # with dimension weights
+    w = np.array([[0.1, 0.2], [0.2, 0.4]])
+    res = sr.vs_ensemble(obs, fct, w=w, p=0.5, estimator=estimator, backend=backend)
+    if estimator == "nrg":
+        expected = 0.008229455
+        np.testing.assert_allclose(res, expected, atol=1e-6)
+
+    # invariance to diagonal terms of weight matrix
+    w = np.array([[1.1, 0.2], [0.2, 100]])
+    res = sr.vs_ensemble(obs, fct, w=w, p=0.5, estimator=estimator, backend=backend)
+    if estimator == "nrg":
+        np.testing.assert_allclose(res, expected, atol=1e-6)
diff --git a/tests/test_wcrps.py b/tests/test_wcrps.py
index 2fd2c43..731a277 100644
--- a/tests/test_wcrps.py
+++ b/tests/test_wcrps.py
@@ -12,21 +12,34 @@
 def test_owcrps_ensemble(backend):
     # test shapes
     obs = np.random.randn(N)
-    res = sr.owcrps_ensemble(obs, np.random.randn(N, M), w_func=lambda x: x * 0.0 + 1.0)
-    assert res.shape == (N,)
     res = sr.owcrps_ensemble(
-        obs, np.random.randn(M, N), w_func=lambda x: x * 0.0 + 1.0, m_axis=0
+        obs, np.random.randn(N, M), w_func=lambda x: x * 0.0 + 1.0, backend=backend
     )
     assert res.shape == (N,)
 
+    fct = np.random.randn(M, N)
+    res = sr.owcrps_ensemble(
+        obs,
+        fct,
+        w_func=lambda x: x * 0.0 + 1.0,
+        m_axis=0,
+        backend=backend,
+    )
+
 
 def test_vrcrps_ensemble(backend):
     # test shapes
     obs = np.random.randn(N)
-    res = sr.vrcrps_ensemble(obs, np.random.randn(N, M), w_func=lambda x: x * 0.0 + 1.0)
+    res = sr.vrcrps_ensemble(
+        obs, np.random.randn(N, M), w_func=lambda x: x * 0.0 + 1.0, backend=backend
+    )
     assert res.shape == (N,)
     res = sr.vrcrps_ensemble(
-        obs, np.random.randn(M, N), w_func=lambda x: x * 0.0 + 1.0, m_axis=0
+        obs,
+        np.random.randn(M, N),
+        w_func=lambda x: x * 0.0 + 1.0,
+        m_axis=0,
+        backend=backend,
     )
     assert res.shape == (N,)
 
@@ -63,21 +76,21 @@ def test_owcrps_vs_crps(backend):
     sigma = abs(np.random.randn(N)) * 0.5
     fct = np.random.randn(N, M) * sigma[..., None] + mu[..., None]
 
-    res = sr.crps_ensemble(obs, fct, backend=backend, estimator="nrg")
+    res = sr.crps_ensemble(obs, fct, estimator="qd", backend=backend)
 
     # no argument given
     resw = sr.owcrps_ensemble(obs, fct, backend=backend)
-    np.testing.assert_allclose(res, resw, rtol=1e-5)
+    np.testing.assert_allclose(res, resw, rtol=1e-4)
 
     # a and b
     resw = sr.owcrps_ensemble(
         obs, fct, a=float("-inf"), b=float("inf"), backend=backend
     )
-    np.testing.assert_allclose(res, resw, rtol=1e-5)
+    np.testing.assert_allclose(res, resw, rtol=1e-4)
 
     # w_func as identity function
     resw = sr.owcrps_ensemble(obs, fct, w_func=lambda x: x * 0.0 + 1.0, backend=backend)
-    np.testing.assert_allclose(res, resw, rtol=1e-5)
+    np.testing.assert_allclose(res, resw, rtol=1e-4)
 
 
 def test_vrcrps_vs_crps(backend):
diff --git a/tests/test_wenergy.py b/tests/test_wenergy.py
index 5f2f568..368c4b7 100644
--- a/tests/test_wenergy.py
+++ b/tests/test_wenergy.py
@@ -20,7 +20,7 @@ def test_owes_vs_es(backend):
         lambda x: backends[backend].mean(x) * 0.0 + 1.0,
         backend=backend,
     )
-    np.testing.assert_allclose(res, resw, atol=1e-7)
+    np.testing.assert_allclose(res, resw, atol=1e-6)
 
 
 def test_twes_vs_es(backend):