diff --git a/R/score.R b/R/score.R index 7f4eb8f7a..3d17a6ac7 100644 --- a/R/score.R +++ b/R/score.R @@ -150,15 +150,31 @@ score.default <- function(forecast, metrics, ...) { #' @returns A data table with the forecasts and the calculated metrics. #' @keywords internal apply_metrics <- function(forecast, metrics, ...) { + failures <- list() lapply(names(metrics), function(metric_name) { result <- do.call( run_safely, list(..., fun = metrics[[metric_name]], metric_name = metric_name) ) - if (!is.null(result)) { + if (is.list(result) && !is.null(result$error)) { + failures[[metric_name]] <<- result$error + } else if (!is.null(result)) { forecast[, (metric_name) := result] } }) + if (length(failures) > 0) { + n_fail <- length(failures) + detail <- vapply( + names(failures), + function(nm) paste0("`", nm, "`: ", failures[[nm]]), + character(1) + ) + names(detail) <- rep("*", length(detail)) + cli_warn(c( + "!" = "Computation failed for {n_fail} metric{?s}:", + detail + )) + } return(forecast) } @@ -171,7 +187,8 @@ apply_metrics <- function(forecast, metrics, ...) { #' #' All named arguments in `...` that are not accepted by `fun` are removed. #' All unnamed arguments are passed on to the function. In case `fun` errors, -#' the error will be converted to a warning and `run_safely` returns `NULL`. +#' `run_safely` returns a list with `result = NULL` and `error` containing the +#' error message, allowing the caller to batch warnings. #' #' `run_safely` can be useful when constructing functions to be used as #' metrics in [score()]. @@ -182,7 +199,8 @@ apply_metrics <- function(forecast, metrics, ...) { #' provide a more informative warning message in case `fun` errors. #' @importFrom cli cli_warn #' @importFrom checkmate assert_function -#' @returns The result of `fun` or `NULL` if `fun` errors +#' @returns The result of `fun`, or a list with `result = NULL` and `error` +#' (a character string) if `fun` errors. #' @keywords internal #' @examples #' f <- function(x) {x} @@ -209,16 +227,8 @@ run_safely <- function(..., fun, metric_name) { result <- try(do.call(fun, valid_args), silent = TRUE) if (inherits(result, "try-error")) { - #nolint start: object_usage_linter msg <- conditionMessage(attr(result, "condition")) - cli_warn( - c( - "!" = "Computation for {.var {metric_name}} failed. - Error: {msg}." - ) - ) - #nolint end - return(NULL) + return(list(result = NULL, error = msg)) } return(result) } diff --git a/man/run_safely.Rd b/man/run_safely.Rd index 5a2cdabc6..be3941c9c 100644 --- a/man/run_safely.Rd +++ b/man/run_safely.Rd @@ -15,7 +15,8 @@ run_safely(..., fun, metric_name) provide a more informative warning message in case \code{fun} errors.} } \value{ -The result of \code{fun} or \code{NULL} if \code{fun} errors +The result of \code{fun}, or a list with \code{result = NULL} and \code{error} +(a character string) if \code{fun} errors. } \description{ This is a wrapper/helper function designed to run a function safely @@ -24,7 +25,8 @@ function. All named arguments in \code{...} that are not accepted by \code{fun} are removed. All unnamed arguments are passed on to the function. In case \code{fun} errors, -the error will be converted to a warning and \code{run_safely} returns \code{NULL}. +\code{run_safely} returns a list with \code{result = NULL} and \code{error} containing the +error message, allowing the caller to batch warnings. \code{run_safely} can be useful when constructing functions to be used as metrics in \code{\link[=score]{score()}}. diff --git a/tests/testthat/test-class-forecast-quantile.R b/tests/testthat/test-class-forecast-quantile.R index 4f46a5ed3..31b0c9d40 100644 --- a/tests/testthat/test-class-forecast-quantile.R +++ b/tests/testthat/test-class-forecast-quantile.R @@ -364,15 +364,14 @@ test_that("score() works even if only some quantiles are missing", { scores_temp <- score(asymm, metrics = metrics) summarise_scores(scores_temp, by = "model") }) - expect_warning( - expect_warning({ - scores_temp <- score(asymm, metrics = metrics) - summarise_scores(scores_temp, by = "model") - }, - "Computation for `interval_coverage_50` failed." - ), - "Computation for `interval_coverage_90` failed." + w <- expect_warning({ + scores_temp <- score(asymm, metrics = metrics) + summarise_scores(scores_temp, by = "model") + }, + "Computation failed for" ) + expect_match(conditionMessage(w), "interval_coverage_50", fixed = TRUE) + expect_match(conditionMessage(w), "interval_coverage_90", fixed = TRUE) # expect a failure with the regular wis wihtout ma.rm=TRUE expect_warning( @@ -399,7 +398,7 @@ test_that("score() works even if only some quantiles are missing", { expect_message( expect_warning( score(test), - "Computation for `ae_median` failed." + "Computation failed for" ), "interpolating median from the two innermost quantiles" ) diff --git a/tests/testthat/test-score.R b/tests/testthat/test-score.R index f31a1307d..bada92e48 100644 --- a/tests/testthat/test-score.R +++ b/tests/testthat/test-score.R @@ -91,16 +91,35 @@ test_that("score() works with only one sample", { # with only one sample, dss returns NaN and log_score fails onesample <- na.omit(example_sample_continuous)[sample_id == 20] scoreonesample <- suppressWarnings(score(onesample)) - expect_warning( + w <- expect_warning( score(onesample), - "Computation for `log_score` failed. Error: need at least 2 data points." + "log_score" ) + expect_match(conditionMessage(w), "need at least 2 data points") # verify that all goes well with two samples twosample <- na.omit(example_sample_continuous)[sample_id %in% c(20, 21)] expect_no_condition(score(twosample)) }) +test_that("score() emits a single batched warning when multiple metrics fail", { + onesample <- na.omit(example_sample_continuous)[sample_id == 20] + always_fail <- function(observed, predicted) stop("intentional") + metrics <- c( + get_metrics(onesample), + list("always_fail" = always_fail) + ) + w <- expect_warning( + score(onesample, metrics = metrics) + ) + msg <- conditionMessage(w) + # both failures mentioned in a single warning + + expect_match(msg, "log_score", fixed = TRUE) + expect_match(msg, "always_fail", fixed = TRUE) + expect_match(msg, "intentional", fixed = TRUE) +}) + # test nominal case ------------------------------------------------------------ test_that("function produces output for a nominal format case", { @@ -137,6 +156,71 @@ test_that("apply_metrics() works", { ) }) +test_that("apply_metrics() emits a single batched warning when multiple metrics fail", { + dt <- data.table::data.table(x = 1:10) + fail1 <- function(x) stop("error in fail1") + fail2 <- function(x) stop("error in fail2") + good <- function(x) x + 1 + + w <- expect_warning( + scoringutils:::apply_metrics( # nolint: undesirable_operator_linter + forecast = dt, + metrics = list("fail1" = fail1, "good" = good, "fail2" = fail2), + dt$x + ) + ) + # single warning mentions both failed metrics + expect_match(conditionMessage(w), "fail1", fixed = TRUE) + expect_match(conditionMessage(w), "fail2", fixed = TRUE) + expect_match(conditionMessage(w), "error in fail1", fixed = TRUE) + expect_match(conditionMessage(w), "error in fail2", fixed = TRUE) + # successful metric is computed + + expect_true("good" %in% names(dt)) + # failed metrics are not present + expect_false("fail1" %in% names(dt)) + expect_false("fail2" %in% names(dt)) +}) + +test_that("apply_metrics() emits no warning when all metrics succeed", { + dt <- data.table::data.table(x = 1:10) + m1 <- function(x) x + 1 + m2 <- function(x) x * 2 + expect_no_condition( + scoringutils:::apply_metrics( # nolint: undesirable_operator_linter + forecast = dt, + metrics = list("m1" = m1, "m2" = m2), + dt$x + ) + ) + expect_true("m1" %in% names(dt)) + expect_true("m2" %in% names(dt)) + expect_equal(dt$m1, 2:11) # nolint: expect_identical_linter + expect_equal(dt$m2, (1:10) * 2) # nolint: expect_identical_linter +}) + +test_that("batched warning message includes metric name and error details for each failure", { + dt <- data.table::data.table(x = 1:5) + bad_a <- function(x) stop("missing argument foo") + bad_b <- function(x) stop("division by zero") + ok <- function(x) x + + w <- expect_warning( + scoringutils:::apply_metrics( # nolint: undesirable_operator_linter + forecast = dt, + metrics = list("bad_a" = bad_a, "ok" = ok, "bad_b" = bad_b), + dt$x + ) + ) + msg <- conditionMessage(w) + expect_match(msg, "bad_a", fixed = TRUE) + expect_match(msg, "missing argument foo", fixed = TRUE) + expect_match(msg, "bad_b", fixed = TRUE) + expect_match(msg, "division by zero", fixed = TRUE) + # successful metric not mentioned in warning + expect_false(grepl("\\bok\\b", msg)) +}) + # attributes test_that("`[` preserves attributes", { test <- data.table::copy(scores_binary) @@ -189,10 +273,18 @@ test_that("run_safely() works as expected", { } expect_identical(run_safely(2, fun = f), 2) expect_identical(run_safely(2, y = 3, fun = f), 2) - expect_warning( - run_safely(fun = f, metric_name = "f"), - 'Computation for `f` failed. Error: argument "x" is missing, with no default', - fixed = TRUE - ) - expect_null(suppressWarnings(run_safely(y = 3, fun = f, metric_name = "f"))) + # run_safely() no longer warns directly; it returns error info for batching + result <- run_safely(fun = f, metric_name = "f") + expect_null(result$result) + expect_type(result$error, "character") + expect_match(result$error, "missing, with no default") + + result2 <- run_safely(y = 3, fun = f, metric_name = "f") + expect_null(result2$result) +}) + +test_that("run_safely() returns result directly on success", { + f <- function(x) x + 1 + result <- run_safely(2, fun = f, metric_name = "f") + expect_identical(result, 3) })