Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions R/dplyr.R
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,10 @@ summarise.dm_zoomed <- function(.data, ..., .by = NULL, .groups = NULL) {
# #663: user responsibility: if group columns are manipulated, they are still tracked
groups <- set_names(map_chr(groups(tbl), as_string))
summarized_tbl <- summarize(tbl, ..., .by = {{ .by }}, .groups = .groups)

# Set group vars as new PK
attr(summarized_tbl, "new_pk") <- unname(groups)

new_tracked_cols_zoom <- new_tracked_cols(.data, groups)
replace_zoomed_tbl(.data, summarized_tbl, new_tracked_cols_zoom)
}
Expand Down Expand Up @@ -536,6 +540,9 @@ count.dm_zoomed <- function(
out <- dplyr_reconstruct(out, tbl)
}

# Set counted columns as new PK
attr(out, "new_pk") <- unname(groups)

new_tracked_cols_zoom <- new_tracked_cols(x, groups)
replace_zoomed_tbl(x, out, new_tracked_cols_zoom)
}
Expand All @@ -552,9 +559,22 @@ count.dm_keyed_tbl <- function(
) {
keys_info <- keyed_get_info(x)
tbl <- unclass_keyed_tbl(x)

if (!missing(...)) {
grouped <- group_by(tbl, ..., .add = TRUE, .drop = .drop)
} else {
grouped <- tbl
}

new_pk <- group_vars(grouped)
if (length(new_pk) == 0) {
new_pk <- NULL
}

counted_tbl <- count(tbl, ..., wt = {{ wt }}, sort = sort, name = name, .drop = .drop)
new_keyed_tbl(
counted_tbl,
pk = new_pk,
uuid = keys_info$uuid
)
}
Expand All @@ -577,6 +597,9 @@ tally.dm_zoomed <- function(x, wt = NULL, sort = FALSE, name = NULL) {
out <- dplyr_reconstruct(out, tbl)
}

# Set group vars as new PK
attr(out, "new_pk") <- unname(groups)

new_tracked_cols_zoom <- new_tracked_cols(x, groups)
replace_zoomed_tbl(x, out, new_tracked_cols_zoom)
}
Expand All @@ -586,9 +609,16 @@ tally.dm_zoomed <- function(x, wt = NULL, sort = FALSE, name = NULL) {
tally.dm_keyed_tbl <- function(x, wt = NULL, sort = FALSE, name = NULL) {
keys_info <- keyed_get_info(x)
tbl <- unclass_keyed_tbl(x)

new_pk <- group_vars(x)
if (length(new_pk) == 0) {
new_pk <- NULL
}

tallied_tbl <- tally(tbl, wt = {{ wt }}, sort = sort, name = name)
new_keyed_tbl(
tallied_tbl,
pk = new_pk,
uuid = keys_info$uuid
)
}
Expand Down
18 changes: 18 additions & 0 deletions R/zoom.R
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,14 @@ update_zoomed_pk <- function(dm) {
tracked_cols <- col_tracker_zoomed(dm)
orig_pk <- dm_get_pk_impl(dm, old_tbl_name)

# Check for new PK set by count/tally
new_pk_cols <- attr(tracked_cols, "new_pk")
if (!is.null(new_pk_cols) && length(new_pk_cols) > 0) {
return(new_pk(list(new_pk_cols)))
} else if (!is.null(new_pk_cols)) {
return(new_pk())
}

if (has_length(orig_pk) && all(get_key_cols(orig_pk) %in% tracked_cols)) {
upd_pk <- new_pk(list(recode2(get_key_cols(orig_pk), tracked_cols)))
} else {
Expand Down Expand Up @@ -360,6 +368,16 @@ replace_zoomed_tbl <- function(dm, new_zoomed_tbl, tracked_cols = NULL) {
table <- orig_name_zoomed(dm)
def <- dm_get_def(dm)
where <- which(def$table == table)

# Transfer new_pk attribute from zoomed table to tracked_cols
new_pk_attr <- attr(new_zoomed_tbl, "new_pk")
if (!is.null(new_pk_attr)) {
attr(new_zoomed_tbl, "new_pk") <- NULL
if (!is.null(tracked_cols)) {
attr(tracked_cols, "new_pk") <- new_pk_attr
}
}

def$zoom[[where]] <- new_zoomed_tbl
# the tracked columns are only replaced if they changed, otherwise this function is called with default `NULL`
if (!is_null(tracked_cols)) {
Expand Down
39 changes: 20 additions & 19 deletions tests/testthat/_snaps/dplyr.md
Original file line number Diff line number Diff line change
Expand Up @@ -142,15 +142,15 @@
Output
$pks
# A tibble: 7 x 3
table pk_col autoincrement
<chr> <keys> <lgl>
1 tf_1 a TRUE
2 tf_2 c FALSE
3 tf_3 f, f1 FALSE
4 tf_4 h FALSE
5 tf_5 k FALSE
6 tf_6 o FALSE
7 new_tbl c FALSE
table pk_col autoincrement
<chr> <keys> <lgl>
1 tf_1 a TRUE
2 tf_2 c FALSE
3 tf_3 f, f1 FALSE
4 tf_4 h FALSE
5 tf_5 k FALSE
6 tf_6 o FALSE
7 new_tbl c, e, e1 FALSE

$fks
# A tibble: 6 x 5
Expand All @@ -168,15 +168,16 @@
"new_tbl") %>% get_all_keys()
Output
$pks
# A tibble: 6 x 3
table pk_col autoincrement
<chr> <keys> <lgl>
1 tf_1 a TRUE
2 tf_2 c FALSE
3 tf_3 f, f1 FALSE
4 tf_4 h FALSE
5 tf_5 k FALSE
6 tf_6 o FALSE
# A tibble: 7 x 3
table pk_col autoincrement
<chr> <keys> <lgl>
1 tf_1 a TRUE
2 tf_2 c FALSE
3 tf_3 f, f1 FALSE
4 tf_4 h FALSE
5 tf_5 k FALSE
6 tf_6 o FALSE
7 new_tbl g FALSE

$fks
# A tibble: 5 x 5
Expand Down Expand Up @@ -428,7 +429,7 @@
-- Metadata --------------------------------------------------------------------
Tables: `airlines`, `airports`, `flights`, `planes`, `weather`
Columns: 41
Primary keys: 3
Primary keys: 4
Foreign keys: 3
Code
grouped_zoomed_comp_dm_2 %>% summarize(count = n()) %>% dm_update_zoomed()
Expand Down
45 changes: 45 additions & 0 deletions tests/testthat/test-deconstruct.R
Original file line number Diff line number Diff line change
Expand Up @@ -562,6 +562,51 @@ test_that("summarize for keyed tables produces same output as zooming", {
expect_equal(z_summary$avg_air_time, k_summary$avg_air_time)
})

test_that("count for keyed tables sets counted columns as pk", {
dm <- dm_nycflights13(cycle = TRUE)

# count on non-PK column should set it as new PK
by_origin <- keyed_tbl_impl(dm, "flights") %>%
count(origin)
expect_s3_class(by_origin, "dm_keyed_tbl")
expect_equal(keyed_get_info(by_origin)$pk, "origin")

# count on existing PK column should keep it as PK
by_faa <- keyed_tbl_impl(dm, "airports") %>%
count(faa)
expect_s3_class(by_faa, "dm_keyed_tbl")
expect_equal(keyed_get_info(by_faa)$pk, "faa")

# count on multiple columns should set them all as PK
by_origin_dest <- keyed_tbl_impl(dm, "flights") %>%
count(origin, dest)
expect_s3_class(by_origin_dest, "dm_keyed_tbl")
expect_equal(keyed_get_info(by_origin_dest)$pk, c("origin", "dest"))

# count with no columns (like tally) should have no PK
counted_all <- keyed_tbl_impl(dm, "airports") %>%
count()
expect_s3_class(counted_all, "dm_keyed_tbl")
expect_null(keyed_get_info(counted_all)$pk)
})

test_that("tally for keyed tables sets group vars as pk", {
dm <- dm_nycflights13(cycle = TRUE)

# tally on grouped keyed table should set group vars as PK
by_origin <- keyed_tbl_impl(dm, "flights") %>%
group_by(origin) %>%
tally()
expect_s3_class(by_origin, "dm_keyed_tbl")
expect_equal(keyed_get_info(by_origin)$pk, "origin")

# tally without groups should have no PK
tallied <- keyed_tbl_impl(dm, "airports") %>%
tally()
expect_s3_class(tallied, "dm_keyed_tbl")
expect_null(keyed_get_info(tallied)$pk)
})

# reconstruction ----------------------------------

test_that("pks_df_from_keys_info()", {
Expand Down
79 changes: 78 additions & 1 deletion tests/testthat/test-dplyr.R
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,83 @@ test_that("basic test: 'tally()'-method works", {
)
})

test_that("count on zoomed dm sets counted columns as pk", {
skip_if_remote_src()

dm <- dm_for_filter()

# count by non-PK column should set it as new PK
result <- dm %>%
dm_zoom_to(tf_2) %>%
count(e) %>%
dm_update_zoomed()
expect_identical(dm_get_pk(result, tf_2)[[1]], "e")

# count by PK column should keep it as PK
result2 <- dm %>%
dm_zoom_to(tf_2) %>%
count(c) %>%
dm_update_zoomed()
expect_identical(dm_get_pk(result2, tf_2)[[1]], "c")

# count with no args should drop PK
result3 <- dm %>%
dm_zoom_to(tf_2) %>%
count() %>%
dm_update_zoomed()
expect_identical(length(dm_get_pk(result3, tf_2)), 0L)
})

test_that("tally on zoomed dm sets group vars as pk", {
skip_if_remote_src()

dm <- dm_for_filter()

# tally on grouped zoomed dm should set group vars as PK
result <- dm %>%
dm_zoom_to(tf_2) %>%
group_by(e) %>%
tally() %>%
dm_update_zoomed()
expect_identical(dm_get_pk(result, tf_2)[[1]], "e")

# tally without groups should drop PK
result2 <- dm %>%
dm_zoom_to(tf_2) %>%
tally() %>%
dm_update_zoomed()
expect_identical(length(dm_get_pk(result2, tf_2)), 0L)
})

test_that("summarise on zoomed dm sets group vars as pk", {
skip_if_remote_src()

dm <- dm_for_filter()

# summarise by non-PK column should set it as new PK
result <- dm %>%
dm_zoom_to(tf_2) %>%
group_by(e) %>%
summarise(d_mean = mean(d)) %>%
dm_update_zoomed()
expect_identical(dm_get_pk(result, tf_2)[[1]], "e")

# summarise by PK column should keep it as PK
result2 <- dm %>%
dm_zoom_to(tf_2) %>%
group_by(c) %>%
summarise(d_mean = mean(d)) %>%
dm_update_zoomed()
expect_identical(dm_get_pk(result2, tf_2)[[1]], "c")

# summarise with no groups should drop PK
result3 <- dm %>%
dm_zoom_to(tf_2) %>%
summarise(d_mean = mean(d)) %>%
dm_update_zoomed()
expect_identical(length(dm_get_pk(result3, tf_2)), 0L)
})

test_that("basic test: 'filter()'-methods work", {
skip_if_src("maria")

Expand Down Expand Up @@ -558,7 +635,7 @@ test_that("key tracking works", {
dm_insert_zoomed("new_tbl") %>%
get_all_keys()

# grouped_by non-key col means, that no keys remain
# grouped_by non-key col: group vars become new PK
zoomed_grouped_in_dm %>%
summarize(g_list = list(g)) %>%
dm_insert_zoomed("new_tbl") %>%
Expand Down