epiforecasts · nikosbosse · Feb 13, 2026
diff --git a/R/score.R b/R/score.R
@@ -150,15 +150,31 @@
 #' @returns A data table with the forecasts and the calculated metrics.
 #' @keywords internal
 apply_metrics <- function(forecast, metrics, ...) {
+  failures <- list()
   lapply(names(metrics), function(metric_name) {
     result <- do.call(
       run_safely,
       list(..., fun = metrics[[metric_name]], metric_name = metric_name)
     )
-    if (!is.null(result)) {
+    if (is.list(result) && !is.null(result$error)) {
+      failures[[metric_name]] <<- result$error
+    } else if (!is.null(result)) {
       forecast[, (metric_name) := result]
     }
   })
+  if (length(failures) > 0) {
+    n_fail <- length(failures)
+    detail <- vapply(
+      names(failures),
+      function(nm) paste0("`", nm, "`: ", failures[[nm]]),
+      character(1)
+    )
+    names(detail) <- rep("*", length(detail))
+    cli_warn(c(
+      "!" = "Computation failed for {n_fail} metric{?s}:",
+      detail
+    ))
+  }
   return(forecast)
 }
 
@@ -171,7 +187,8 @@
 #'
 #' All named arguments in `...` that are not accepted by `fun` are removed.
 #' All unnamed arguments are passed on to the function. In case `fun` errors,
-#' the error will be converted to a warning and `run_safely` returns `NULL`.
+#' `run_safely` returns a list with `result = NULL` and `error` containing the
+#' error message, allowing the caller to batch warnings.
 #'
 #' `run_safely` can be useful when constructing functions to be used as
 #' metrics in [score()].
@@ -182,7 +199,8 @@
 #'   provide a more informative warning message in case `fun` errors.
 #' @importFrom cli cli_warn
 #' @importFrom checkmate assert_function
-#' @returns The result of `fun` or `NULL` if `fun` errors
+#' @returns The result of `fun`, or a list with `result = NULL` and `error`
+#'   (a character string) if `fun` errors.
 #' @keywords internal
 #' @examples
 #' f <- function(x) {x}
@@ -209,16 +227,8 @@
   result <- try(do.call(fun, valid_args), silent = TRUE)
 
   if (inherits(result, "try-error")) {
-    #nolint start: object_usage_linter
     msg <- conditionMessage(attr(result, "condition"))
-    cli_warn(
-      c(
-        "!" = "Computation for {.var {metric_name}} failed.
-        Error: {msg}."
-      )
-    )
-    #nolint end
-    return(NULL)
+    return(list(result = NULL, error = msg))
   }
   return(result)
 }

diff --git a/man/run_safely.Rd b/man/run_safely.Rd
diff --git a/tests/testthat/test-class-forecast-quantile.R b/tests/testthat/test-class-forecast-quantile.R
@@ -364,15 +364,14 @@ test_that("score() works even if only some quantiles are missing", {
     scores_temp <- score(asymm, metrics = metrics)
     summarise_scores(scores_temp, by = "model")
   })
-  expect_warning(
-    expect_warning({
-      scores_temp <- score(asymm, metrics = metrics)
-      summarise_scores(scores_temp, by = "model")
-    },
-    "Computation for `interval_coverage_50` failed."
-    ),
-    "Computation for `interval_coverage_90` failed."
+  w <- expect_warning({
+    scores_temp <- score(asymm, metrics = metrics)
+    summarise_scores(scores_temp, by = "model")
+  },
+  "Computation failed for"
   )
+  expect_match(conditionMessage(w), "interval_coverage_50", fixed = TRUE)
+  expect_match(conditionMessage(w), "interval_coverage_90", fixed = TRUE)
 
   # expect a failure with the regular wis wihtout ma.rm=TRUE
   expect_warning(
@@ -399,7 +398,7 @@ test_that("score() works even if only some quantiles are missing", {
   expect_message(
     expect_warning(
       score(test),
-      "Computation for `ae_median` failed."
+      "Computation failed for"
     ),
     "interpolating median from the two innermost quantiles"
   )

diff --git a/tests/testthat/test-score.R b/tests/testthat/test-score.R
@@ -91,16 +91,35 @@
   # with only one sample, dss returns NaN and log_score fails
   onesample <- na.omit(example_sample_continuous)[sample_id == 20]
   scoreonesample <- suppressWarnings(score(onesample))
-  expect_warning(
+  w <- expect_warning(
     score(onesample),
-    "Computation for `log_score` failed. Error: need at least 2 data points."
+    "log_score"
   )
+  expect_match(conditionMessage(w), "need at least 2 data points")
 
   # verify that all goes well with two samples
   twosample <- na.omit(example_sample_continuous)[sample_id %in% c(20, 21)]
   expect_no_condition(score(twosample))
 })
 
+test_that("score() emits a single batched warning when multiple metrics fail", {
+  onesample <- na.omit(example_sample_continuous)[sample_id == 20]
+  always_fail <- function(observed, predicted) stop("intentional")
+  metrics <- c(
+    get_metrics(onesample),
+    list("always_fail" = always_fail)
+  )
+  w <- expect_warning(
+    score(onesample, metrics = metrics)
+  )
+  msg <- conditionMessage(w)
+  # both failures mentioned in a single warning
+
+  expect_match(msg, "log_score", fixed = TRUE)
+  expect_match(msg, "always_fail", fixed = TRUE)
+  expect_match(msg, "intentional", fixed = TRUE)
+})
+
 
 # test nominal case ------------------------------------------------------------
 test_that("function produces output for a nominal format case", {
@@ -137,6 +156,71 @@
   )
 })
 
+test_that("apply_metrics() emits a single batched warning when multiple metrics fail", {
+  dt <- data.table::data.table(x = 1:10)
+  fail1 <- function(x) stop("error in fail1")
+  fail2 <- function(x) stop("error in fail2")
+  good <- function(x) x + 1
+
+  w <- expect_warning(
+    scoringutils:::apply_metrics( # nolint: undesirable_operator_linter
+      forecast = dt,
+      metrics = list("fail1" = fail1, "good" = good, "fail2" = fail2),
+      dt$x
+    )
+  )
+  # single warning mentions both failed metrics
+  expect_match(conditionMessage(w), "fail1", fixed = TRUE)
+  expect_match(conditionMessage(w), "fail2", fixed = TRUE)
+  expect_match(conditionMessage(w), "error in fail1", fixed = TRUE)
+  expect_match(conditionMessage(w), "error in fail2", fixed = TRUE)
+  # successful metric is computed
+
+  expect_true("good" %in% names(dt))
+  # failed metrics are not present
+  expect_false("fail1" %in% names(dt))
+  expect_false("fail2" %in% names(dt))
+})
+
+test_that("apply_metrics() emits no warning when all metrics succeed", {
+  dt <- data.table::data.table(x = 1:10)
+  m1 <- function(x) x + 1
+  m2 <- function(x) x * 2
+  expect_no_condition(
+    scoringutils:::apply_metrics( # nolint: undesirable_operator_linter
+      forecast = dt,
+      metrics = list("m1" = m1, "m2" = m2),
+      dt$x
+    )
+  )
+  expect_true("m1" %in% names(dt))
+  expect_true("m2" %in% names(dt))
+  expect_equal(dt$m1, 2:11) # nolint: expect_identical_linter
+  expect_equal(dt$m2, (1:10) * 2) # nolint: expect_identical_linter
+})
+
+test_that("batched warning message includes metric name and error details for each failure", {
+  dt <- data.table::data.table(x = 1:5)
+  bad_a <- function(x) stop("missing argument foo")
+  bad_b <- function(x) stop("division by zero")
+  ok <- function(x) x
+
+  w <- expect_warning(
+    scoringutils:::apply_metrics( # nolint: undesirable_operator_linter
+      forecast = dt,
+      metrics = list("bad_a" = bad_a, "ok" = ok, "bad_b" = bad_b),
+      dt$x
+    )
+  )
+  msg <- conditionMessage(w)
+  expect_match(msg, "bad_a", fixed = TRUE)
+  expect_match(msg, "missing argument foo", fixed = TRUE)
+  expect_match(msg, "bad_b", fixed = TRUE)
+  expect_match(msg, "division by zero", fixed = TRUE)
+  # successful metric not mentioned in warning
+  expect_false(grepl("\\bok\\b", msg))
+})
+
 # attributes
 test_that("`[` preserves attributes", {
   test <- data.table::copy(scores_binary)
@@ -189,10 +273,18 @@
   }
   expect_identical(run_safely(2, fun = f), 2)
   expect_identical(run_safely(2, y = 3, fun = f), 2)
-  expect_warning(
-    run_safely(fun = f, metric_name = "f"),
-    'Computation for `f` failed. Error: argument "x" is missing, with no default',
-    fixed = TRUE
-  )
-  expect_null(suppressWarnings(run_safely(y = 3, fun = f, metric_name = "f")))
+  # run_safely() no longer warns directly; it returns error info for batching
+  result <- run_safely(fun = f, metric_name = "f")
+  expect_null(result$result)
+  expect_type(result$error, "character")
+  expect_match(result$error, "missing, with no default")
+
+  result2 <- run_safely(y = 3, fun = f, metric_name = "f")
+  expect_null(result2$result)
+})
+
+test_that("run_safely() returns result directly on success", {
+  f <- function(x) x + 1
+  result <- run_safely(2, fun = f, metric_name = "f")
+  expect_identical(result, 3)
 })