Skip to content

Commit 29cc263

Browse files
authored
ARROW-17085: [R] group_vars() should not return NULL (#13621)
If an ungrouped data.frame or an `arrow_dplyr_query` is given to `dplyr::group_vars()`, `character()` returns. But for an ungrouped Table, `NULL` is returned. ```r mtcars |> dplyr::group_vars() #> character(0) mtcars |> arrow:::as_adq() |> dplyr::group_vars() #> character(0) mtcars |> arrow::arrow_table() |> dplyr::group_vars() #> NULL ``` Therefore, functions that expect `group_vars` to return character, such as the following, will fail. ```r mtcars |> arrow::arrow_table() |> dtplyr::lazy_dt() #> Error in new_step(parent, vars = names(parent), groups = groups, locals = list(), : is.character(groups) is not TRUE ``` This PR modifies `dplyr::group_vars()` and `dplyr::groups()` for Arrow objects to work the same as for data.frame. (Note that `arrow_dplyr_query` already works the same way as data.frame.) Lead-authored-by: SHIMA Tatsuya <ts1s1andn@gmail.com> Co-authored-by: eitsupi <50911393+eitsupi@users.noreply.github.com> Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
1 parent f295da4 commit 29cc263

5 files changed

Lines changed: 21 additions & 9 deletions

File tree

r/R/dplyr-group-by.R

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,13 +58,13 @@ group_by.arrow_dplyr_query <- function(.data,
5858
group_by.Dataset <- group_by.ArrowTabular <- group_by.RecordBatchReader <- group_by.arrow_dplyr_query
5959

6060
groups.arrow_dplyr_query <- function(x) syms(dplyr::group_vars(x))
61-
groups.Dataset <- groups.ArrowTabular <- groups.RecordBatchReader <- function(x) NULL
61+
groups.Dataset <- groups.ArrowTabular <- groups.RecordBatchReader <- groups.arrow_dplyr_query
6262

6363
group_vars.arrow_dplyr_query <- function(x) x$group_by_vars
64-
group_vars.Dataset <- function(x) NULL
65-
group_vars.RecordBatchReader <- function(x) NULL
64+
group_vars.Dataset <- function(x) character()
65+
group_vars.RecordBatchReader <- function(x) character()
6666
group_vars.ArrowTabular <- function(x) {
67-
x$metadata$r$attributes$.group_vars
67+
x$metadata$r$attributes$.group_vars %||% character()
6868
}
6969

7070
# the logical literal in the two functions below controls the default value of

r/R/dplyr.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ arrow_dplyr_query <- function(.data) {
4242
gv <- tryCatch(
4343
# If dplyr is not available, or if the input doesn't have a group_vars
4444
# method, assume no group vars
45-
dplyr::group_vars(.data) %||% character(),
45+
dplyr::group_vars(.data),
4646
error = function(e) character()
4747
)
4848

r/tests/testthat/test-RecordBatch.R

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -654,7 +654,7 @@ test_that("Handling string data with embedded nuls", {
654654
})
655655
})
656656

657-
test_that("ARROW-11769/ARROW-13860 - grouping preserved in record batch creation", {
657+
test_that("ARROW-11769/ARROW-13860/ARROW-17085 - grouping preserved in record batch creation", {
658658
skip_if_not_available("dataset")
659659
library(dplyr, warn.conflicts = FALSE)
660660

@@ -670,6 +670,12 @@ test_that("ARROW-11769/ARROW-13860 - grouping preserved in record batch creation
670670
record_batch(),
671671
"RecordBatch"
672672
)
673+
expect_identical(
674+
tbl %>%
675+
record_batch() %>%
676+
group_vars(),
677+
group_vars(tbl)
678+
)
673679
expect_identical(
674680
tbl %>%
675681
group_by(fct, fct2) %>%
@@ -683,7 +689,7 @@ test_that("ARROW-11769/ARROW-13860 - grouping preserved in record batch creation
683689
record_batch() %>%
684690
ungroup() %>%
685691
group_vars(),
686-
NULL
692+
character()
687693
)
688694
expect_identical(
689695
tbl %>%

r/tests/testthat/test-Table.R

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -592,7 +592,7 @@ test_that("cbind.Table handles record batches and tables", {
592592
)
593593
})
594594

595-
test_that("ARROW-11769 - grouping preserved in table creation", {
595+
test_that("ARROW-11769/ARROW-17085 - grouping preserved in table creation", {
596596
skip_if_not_available("dataset")
597597

598598
tbl <- tibble::tibble(
@@ -601,6 +601,12 @@ test_that("ARROW-11769 - grouping preserved in table creation", {
601601
fct2 = factor(rep(c("C", "D"), each = 5)),
602602
)
603603

604+
expect_identical(
605+
tbl %>%
606+
Table$create() %>%
607+
dplyr::group_vars(),
608+
dplyr::group_vars(tbl)
609+
)
604610
expect_identical(
605611
tbl %>%
606612
dplyr::group_by(fct, fct2) %>%

r/tests/testthat/test-metadata.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -382,7 +382,7 @@ test_that("grouped_df metadata is recorded (efficiently)", {
382382
expect_s3_class(grouped, "grouped_df")
383383
grouped_tab <- Table$create(grouped)
384384
expect_r6_class(grouped_tab, "Table")
385-
expect_equal(grouped_tab$r_metadata$attributes$.group_vars, "a")
385+
expect_equal(grouped_tab$metadata$r$attributes$.group_vars, "a")
386386
})
387387

388388
test_that("grouped_df non-arrow metadata is preserved", {

0 commit comments

Comments
 (0)