Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 22 additions & 12 deletions R/score.R
Original file line number Diff line number Diff line change
Expand Up @@ -150,15 +150,31 @@
#' @returns A data table with the forecasts and the calculated metrics.
#' @keywords internal
apply_metrics <- function(forecast, metrics, ...) {
failures <- list()
lapply(names(metrics), function(metric_name) {
result <- do.call(
run_safely,
list(..., fun = metrics[[metric_name]], metric_name = metric_name)
)
if (!is.null(result)) {
if (is.list(result) && !is.null(result$error)) {
failures[[metric_name]] <<- result$error
} else if (!is.null(result)) {
forecast[, (metric_name) := result]
}
})
if (length(failures) > 0) {
n_fail <- length(failures)

Check warning on line 166 in R/score.R

View workflow job for this annotation

GitHub Actions / lint-changed-files

file=R/score.R,line=166,col=5,[object_usage_linter] local variable 'n_fail' assigned but may not be used
detail <- vapply(
names(failures),
function(nm) paste0("`", nm, "`: ", failures[[nm]]),
character(1)
)
names(detail) <- rep("*", length(detail))
cli_warn(c(
"!" = "Computation failed for {n_fail} metric{?s}:",
detail
))
}
return(forecast)
}

Expand All @@ -171,7 +187,8 @@
#'
#' All named arguments in `...` that are not accepted by `fun` are removed.
#' All unnamed arguments are passed on to the function. In case `fun` errors,
#' the error will be converted to a warning and `run_safely` returns `NULL`.
#' `run_safely` returns a list with `result = NULL` and `error` containing the
#' error message, allowing the caller to batch warnings.
#'
#' `run_safely` can be useful when constructing functions to be used as
#' metrics in [score()].
Expand All @@ -182,7 +199,8 @@
#' provide a more informative warning message in case `fun` errors.
#' @importFrom cli cli_warn
#' @importFrom checkmate assert_function
#' @returns The result of `fun` or `NULL` if `fun` errors
#' @returns The result of `fun`, or a list with `result = NULL` and `error`
#' (a character string) if `fun` errors.
#' @keywords internal
#' @examples
#' f <- function(x) {x}
Expand All @@ -209,16 +227,8 @@
result <- try(do.call(fun, valid_args), silent = TRUE)

if (inherits(result, "try-error")) {
#nolint start: object_usage_linter
msg <- conditionMessage(attr(result, "condition"))
cli_warn(
c(
"!" = "Computation for {.var {metric_name}} failed.
Error: {msg}."
)
)
#nolint end
return(NULL)
return(list(result = NULL, error = msg))
}
return(result)
}
Expand Down
6 changes: 4 additions & 2 deletions man/run_safely.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 8 additions & 9 deletions tests/testthat/test-class-forecast-quantile.R
Original file line number Diff line number Diff line change
Expand Up @@ -364,15 +364,14 @@ test_that("score() works even if only some quantiles are missing", {
scores_temp <- score(asymm, metrics = metrics)
summarise_scores(scores_temp, by = "model")
})
expect_warning(
expect_warning({
scores_temp <- score(asymm, metrics = metrics)
summarise_scores(scores_temp, by = "model")
},
"Computation for `interval_coverage_50` failed."
),
"Computation for `interval_coverage_90` failed."
w <- expect_warning({
scores_temp <- score(asymm, metrics = metrics)
summarise_scores(scores_temp, by = "model")
},
"Computation failed for"
)
expect_match(conditionMessage(w), "interval_coverage_50", fixed = TRUE)
expect_match(conditionMessage(w), "interval_coverage_90", fixed = TRUE)

# expect a failure with the regular wis wihtout ma.rm=TRUE
expect_warning(
Expand All @@ -399,7 +398,7 @@ test_that("score() works even if only some quantiles are missing", {
expect_message(
expect_warning(
score(test),
"Computation for `ae_median` failed."
"Computation failed for"
),
"interpolating median from the two innermost quantiles"
)
Expand Down
108 changes: 100 additions & 8 deletions tests/testthat/test-score.R
Original file line number Diff line number Diff line change
Expand Up @@ -91,16 +91,35 @@
# with only one sample, dss returns NaN and log_score fails
onesample <- na.omit(example_sample_continuous)[sample_id == 20]
scoreonesample <- suppressWarnings(score(onesample))
expect_warning(
w <- expect_warning(
score(onesample),
"Computation for `log_score` failed. Error: need at least 2 data points."
"log_score"
)
expect_match(conditionMessage(w), "need at least 2 data points")

# verify that all goes well with two samples
twosample <- na.omit(example_sample_continuous)[sample_id %in% c(20, 21)]
expect_no_condition(score(twosample))
})

test_that("score() emits a single batched warning when multiple metrics fail", {
onesample <- na.omit(example_sample_continuous)[sample_id == 20]
always_fail <- function(observed, predicted) stop("intentional")

Check warning on line 107 in tests/testthat/test-score.R

View workflow job for this annotation

GitHub Actions / lint-changed-files

file=tests/testthat/test-score.R,line=107,col=48,[condition_call_linter] Use stop(., call. = FALSE) not to display the call in an error message.
metrics <- c(
get_metrics(onesample),
list("always_fail" = always_fail)
)
w <- expect_warning(
score(onesample, metrics = metrics)
)
msg <- conditionMessage(w)
# both failures mentioned in a single warning

expect_match(msg, "log_score", fixed = TRUE)
expect_match(msg, "always_fail", fixed = TRUE)
expect_match(msg, "intentional", fixed = TRUE)
})


# test nominal case ------------------------------------------------------------
test_that("function produces output for a nominal format case", {
Expand Down Expand Up @@ -137,6 +156,71 @@
)
})

test_that("apply_metrics() emits a single batched warning when multiple metrics fail", {
dt <- data.table::data.table(x = 1:10)
fail1 <- function(x) stop("error in fail1")

Check warning on line 161 in tests/testthat/test-score.R

View workflow job for this annotation

GitHub Actions / lint-changed-files

file=tests/testthat/test-score.R,line=161,col=24,[condition_call_linter] Use stop(., call. = FALSE) not to display the call in an error message.
fail2 <- function(x) stop("error in fail2")

Check warning on line 162 in tests/testthat/test-score.R

View workflow job for this annotation

GitHub Actions / lint-changed-files

file=tests/testthat/test-score.R,line=162,col=24,[condition_call_linter] Use stop(., call. = FALSE) not to display the call in an error message.
good <- function(x) x + 1

w <- expect_warning(
scoringutils:::apply_metrics( # nolint: undesirable_operator_linter
forecast = dt,
metrics = list("fail1" = fail1, "good" = good, "fail2" = fail2),
dt$x
)
)
# single warning mentions both failed metrics
expect_match(conditionMessage(w), "fail1", fixed = TRUE)
expect_match(conditionMessage(w), "fail2", fixed = TRUE)
expect_match(conditionMessage(w), "error in fail1", fixed = TRUE)
expect_match(conditionMessage(w), "error in fail2", fixed = TRUE)
# successful metric is computed

expect_true("good" %in% names(dt))
# failed metrics are not present
expect_false("fail1" %in% names(dt))
expect_false("fail2" %in% names(dt))
})

test_that("apply_metrics() emits no warning when all metrics succeed", {
dt <- data.table::data.table(x = 1:10)
m1 <- function(x) x + 1
m2 <- function(x) x * 2
expect_no_condition(
scoringutils:::apply_metrics( # nolint: undesirable_operator_linter
forecast = dt,
metrics = list("m1" = m1, "m2" = m2),
dt$x
)
)
expect_true("m1" %in% names(dt))
expect_true("m2" %in% names(dt))
expect_equal(dt$m1, 2:11) # nolint: expect_identical_linter
expect_equal(dt$m2, (1:10) * 2) # nolint: expect_identical_linter
})

test_that("batched warning message includes metric name and error details for each failure", {
dt <- data.table::data.table(x = 1:5)
bad_a <- function(x) stop("missing argument foo")

Check warning on line 204 in tests/testthat/test-score.R

View workflow job for this annotation

GitHub Actions / lint-changed-files

file=tests/testthat/test-score.R,line=204,col=24,[condition_call_linter] Use stop(., call. = FALSE) not to display the call in an error message.
bad_b <- function(x) stop("division by zero")

Check warning on line 205 in tests/testthat/test-score.R

View workflow job for this annotation

GitHub Actions / lint-changed-files

file=tests/testthat/test-score.R,line=205,col=24,[condition_call_linter] Use stop(., call. = FALSE) not to display the call in an error message.
ok <- function(x) x

w <- expect_warning(
scoringutils:::apply_metrics( # nolint: undesirable_operator_linter
forecast = dt,
metrics = list("bad_a" = bad_a, "ok" = ok, "bad_b" = bad_b),
dt$x
)
)
msg <- conditionMessage(w)
expect_match(msg, "bad_a", fixed = TRUE)
expect_match(msg, "missing argument foo", fixed = TRUE)
expect_match(msg, "bad_b", fixed = TRUE)
expect_match(msg, "division by zero", fixed = TRUE)
# successful metric not mentioned in warning
expect_false(grepl("\\bok\\b", msg))
})

# attributes
test_that("`[` preserves attributes", {
test <- data.table::copy(scores_binary)
Expand Down Expand Up @@ -189,10 +273,18 @@
}
expect_identical(run_safely(2, fun = f), 2)
expect_identical(run_safely(2, y = 3, fun = f), 2)
expect_warning(
run_safely(fun = f, metric_name = "f"),
'Computation for `f` failed. Error: argument "x" is missing, with no default',
fixed = TRUE
)
expect_null(suppressWarnings(run_safely(y = 3, fun = f, metric_name = "f")))
# run_safely() no longer warns directly; it returns error info for batching
result <- run_safely(fun = f, metric_name = "f")
expect_null(result$result)
expect_type(result$error, "character")
expect_match(result$error, "missing, with no default")

result2 <- run_safely(y = 3, fun = f, metric_name = "f")
expect_null(result2$result)
})

test_that("run_safely() returns result directly on success", {
f <- function(x) x + 1
result <- run_safely(2, fun = f, metric_name = "f")
expect_identical(result, 3)
})
Loading