diff --git a/NEWS.md b/NEWS.md index 36f15bba..75a077be 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # cpp11 (development version) +* Removed non-API usage of `ATTRIB()` (#481). + * Improved hygiene around using C++ specific C compatibility headers (i.e. by using `` rather than `` and `` rather than ``) (#454, @MichaelChirico). * Fixed an rchk issue related to `std::initializer_list` (#457, @pachadotdev). diff --git a/cpp11test/src/test-data_frame.cpp b/cpp11test/src/test-data_frame.cpp index 60b3423a..1533b25e 100644 --- a/cpp11test/src/test-data_frame.cpp +++ b/cpp11test/src/test-data_frame.cpp @@ -26,42 +26,123 @@ context("data_frame-C++") { } test_that("data_frame::nrow works with 0x0 dfs") { + // From bare list SEXP x = PROTECT(Rf_allocVector(VECSXP, 0)); - - cpp11::data_frame df(x); - expect_true(df.nrow() == 0); - + cpp11::data_frame x_df(x); + expect_true(x_df.nrow() == 0); UNPROTECT(1); - } - - test_that("data_frame::nrow works with 10x0 dfs") { - using namespace cpp11::literals; - cpp11::writable::list x(0_xl); - x.attr(R_RowNamesSymbol) = {NA_INTEGER, -10}; - cpp11::data_frame df(x); - expect_true(df.nrow() == 10); + // From bare list with `R_RowNamesSymbol` + SEXP y = PROTECT(Rf_allocVector(VECSXP, 0)); + SEXP y_row_names = PROTECT(Rf_allocVector(INTSXP, 2)); + SET_INTEGER_ELT(y_row_names, 0, NA_INTEGER); + SET_INTEGER_ELT(y_row_names, 1, 0); + Rf_setAttrib(y, R_RowNamesSymbol, y_row_names); + cpp11::data_frame y_df(y); + expect_true(y_df.nrow() == 0); + UNPROTECT(2); + + // From classed data frame with `R_RowNamesSymbol` + SEXP z = PROTECT(Rf_allocVector(VECSXP, 0)); + SEXP z_row_names = PROTECT(Rf_allocVector(INTSXP, 2)); + SET_INTEGER_ELT(z_row_names, 0, NA_INTEGER); + SET_INTEGER_ELT(z_row_names, 1, 0); + Rf_setAttrib(z, R_RowNamesSymbol, z_row_names); + SEXP z_class = PROTECT(Rf_allocVector(STRSXP, 1)); + SET_STRING_ELT(z_class, 0, Rf_mkChar("data.frame")); + Rf_setAttrib(z, R_ClassSymbol, z_class); + cpp11::data_frame z_df(z); + expect_true(z_df.nrow() == 0); + UNPROTECT(3); } test_that("writable::data_frame::nrow works with 0x0 dfs") { + using namespace cpp11::literals; + + // From bare list SEXP x = PROTECT(Rf_allocVector(VECSXP, 0)); + cpp11::writable::data_frame x_df(x); + expect_true(x_df.nrow() == 0); + UNPROTECT(1); - cpp11::writable::data_frame df(x); - expect_true(df.nrow() == 0); + // From bare list with `R_RowNamesSymbol` + cpp11::writable::list y(0_xl); + y.attr(R_RowNamesSymbol) = {NA_INTEGER, 0}; + cpp11::writable::data_frame y_df(y); + expect_true(y_df.nrow() == 0); + + // From classed data frame with `R_RowNamesSymbol` + cpp11::writable::list z(0_xl); + z.attr(R_RowNamesSymbol) = {NA_INTEGER, 0}; + z.attr(R_ClassSymbol) = "data.frame"; + cpp11::writable::data_frame z_df(z); + expect_true(z_df.nrow() == 0); + } - UNPROTECT(1); + test_that("data_frame::nrow works with 10x0 dfs") { + // From bare list with `R_RowNamesSymbol` + SEXP y = PROTECT(Rf_allocVector(VECSXP, 0)); + SEXP y_row_names = PROTECT(Rf_allocVector(INTSXP, 2)); + SET_INTEGER_ELT(y_row_names, 0, NA_INTEGER); + SET_INTEGER_ELT(y_row_names, 1, 10); + Rf_setAttrib(y, R_RowNamesSymbol, y_row_names); + cpp11::data_frame y_df(y); + expect_true(y_df.nrow() == 10); + UNPROTECT(2); + + // From classed data frame with `R_RowNamesSymbol` + SEXP z = PROTECT(Rf_allocVector(VECSXP, 0)); + SEXP z_row_names = PROTECT(Rf_allocVector(INTSXP, 2)); + SET_INTEGER_ELT(z_row_names, 0, NA_INTEGER); + SET_INTEGER_ELT(z_row_names, 1, 10); + Rf_setAttrib(z, R_RowNamesSymbol, z_row_names); + SEXP z_class = PROTECT(Rf_allocVector(STRSXP, 1)); + SET_STRING_ELT(z_class, 0, Rf_mkChar("data.frame")); + Rf_setAttrib(z, R_ClassSymbol, z_class); + cpp11::data_frame z_df(z); + expect_true(z_df.nrow() == 10); + UNPROTECT(3); } test_that("writable::data_frame::nrow works with 10x0 dfs (#272)") { - SEXP x = PROTECT(Rf_allocVector(VECSXP, 0)); + using namespace cpp11::literals; - bool is_altrep = false; - R_xlen_t nrow = 10; + // From bare list with `R_RowNamesSymbol` + cpp11::writable::list y(0_xl); + y.attr(R_RowNamesSymbol) = {NA_INTEGER, 10}; + cpp11::writable::data_frame y_df(y); + expect_true(y_df.nrow() == 10); + + // From classed data frame with `R_RowNamesSymbol` + cpp11::writable::list z(0_xl); + z.attr(R_RowNamesSymbol) = {NA_INTEGER, 10}; + z.attr(R_ClassSymbol) = "data.frame"; + cpp11::writable::data_frame z_df(z); + expect_true(z_df.nrow() == 10); // Manually specify `nrow` using special constructor - cpp11::writable::data_frame df(x, is_altrep, nrow); + bool is_altrep = false; + SEXP x = PROTECT(Rf_allocVector(VECSXP, 0)); + cpp11::writable::data_frame df(x, is_altrep, 10); expect_true(df.nrow() == 10); + UNPROTECT(1); + } + + test_that("data_frame::nrow works with 0x1 dfs") { + // From bare list + SEXP x = PROTECT(Rf_allocVector(VECSXP, 1)); + SET_VECTOR_ELT(x, 0, Rf_allocVector(INTSXP, 0)); + cpp11::data_frame x_df(x); + expect_true(x_df.nrow() == 0); + UNPROTECT(1); + } + test_that("writable::data_frame::nrow works with 0x1 dfs") { + // From bare list + SEXP x = PROTECT(Rf_allocVector(VECSXP, 1)); + SET_VECTOR_ELT(x, 0, Rf_allocVector(INTSXP, 0)); + cpp11::writable::data_frame x_df(x); + expect_true(x_df.nrow() == 0); UNPROTECT(1); } diff --git a/inst/include/cpp11/data_frame.hpp b/inst/include/cpp11/data_frame.hpp index a4748de0..7bb156ab 100644 --- a/inst/include/cpp11/data_frame.hpp +++ b/inst/include/cpp11/data_frame.hpp @@ -1,7 +1,6 @@ #pragma once -#include // for abs -#include +#include // for abs #include // for initializer_list #include // for string, basic_string #include // for move @@ -24,42 +23,38 @@ class data_frame : public list { friend class writable::data_frame; - /* we cannot use Rf_getAttrib because it has a special case for c(NA, -n) and creates - * the full vector */ - static SEXP get_attrib0(SEXP x, SEXP sym) { - for (SEXP attr = ATTRIB(x); attr != R_NilValue; attr = CDR(attr)) { - if (TAG(attr) == sym) { - return CAR(attr); - } - } - - return R_NilValue; - } - - static R_xlen_t calc_nrow(SEXP x) { - auto nms = get_attrib0(x, R_RowNamesSymbol); - bool has_short_rownames = - (Rf_isInteger(nms) && Rf_xlength(nms) == 2 && INTEGER(nms)[0] == NA_INTEGER); - if (has_short_rownames) { - return static_cast(abs(INTEGER(nms)[1])); - } - - if (!Rf_isNull(nms)) { - return Rf_xlength(nms); + static R_xlen_t calculate_nrow(SEXP x) { + // If there is a `R_RowNamesSymbol`, we take the number of rows from there + // (regardless of whether or not there is a `"data.frame"` class yet!). + // + // As of R >=3.5, `Rf_getAttrib(R_RowNamesSymbol)` returns one of the following: + // - A character vector + // - An integer vector + // - An ALTREP integer compact intrange (converted cheaply from `c(NA, -n)`) + // + // We can take the `Rf_xlength()` of all of these cheaply. + // + // We used to worry about `Rf_getAttrib()` fully expanding `c(NA, -n)`, but with + // ALTREP integer compact intranges that is no longer the case. + SEXP row_names = Rf_getAttrib(x, R_RowNamesSymbol); + if (row_names != R_NilValue) { + return Rf_xlength(row_names); } + // Otherwise it's a bare list, and we infer the number of rows from the first element + // (i.e. first column). Calling `Rf_xlength()` on the first column isn't 100% right + // (it doesn't dispatch to `length()`, nor does it correctly handle df-cols or + // matrix-cols), but it is close enough and people can use the data_frame constructor + // that allows you to specify `nrow` directly as needed. if (Rf_xlength(x) == 0) { return 0; + } else { + return Rf_xlength(VECTOR_ELT(x, 0)); } - - return Rf_xlength(VECTOR_ELT(x, 0)); } public: - /* Adapted from - * https://github.com/wch/r-source/blob/f2a0dfab3e26fb42b8b296fcba40cbdbdbec767d/src/main/attrib.c#L198-L207 - */ - R_xlen_t nrow() const { return calc_nrow(*this); } + R_xlen_t nrow() const { return calculate_nrow(*this); } R_xlen_t ncol() const { return size(); } }; @@ -67,10 +62,11 @@ namespace writable { class data_frame : public cpp11::data_frame { private: writable::list set_data_frame_attributes(writable::list&& x) { - return set_data_frame_attributes(std::move(x), calc_nrow(x)); + return set_data_frame_attributes(std::move(x), calculate_nrow(x)); } writable::list set_data_frame_attributes(writable::list&& x, R_xlen_t nrow) { + // `Rf_setAttrib(R_RowNamesSymbol)` will keep `c(NA, -n)` in compact form x.attr(R_RowNamesSymbol) = {NA_INTEGER, -static_cast(nrow)}; x.attr(R_ClassSymbol) = "data.frame"; return std::move(x);