diff --git a/NEWS.md b/NEWS.md index fd0ee8bf1..e95190940 100644 --- a/NEWS.md +++ b/NEWS.md @@ -28,6 +28,8 @@ 4. `dcast()` and `melt()` "just work" when passed a data.frame, not just data.tables, with no need for coercion, [#7614](https://github.com/Rdatatable/data.table/issues/7614). Thanks @MichaelChirico for the suggestion and @manmita for the PR. Note that to avoid potential conflicts with {reshape2}'s data.frame methods, we do the dispatch to the data.table method manually. +5. `tables()` can now optionally report `data.table` objects stored one level deep inside list objects when `depth=1L`, [#2606](https://github.com/Rdatatable/data.table/issues/2606). Thanks @MichaelChirico for the report and @manmita for the PR + ### BUG FIXES 1. `fread()` with `skip=0` and `(header=TRUE|FALSE)` no longer skips the first row when it has fewer fields than subsequent rows, [#7463](https://github.com/Rdatatable/data.table/issues/7463). Thanks @emayerhofer for the report and @ben-schwen for the fix. diff --git a/R/tables.R b/R/tables.R index 960c74343..ed62358ae 100644 --- a/R/tables.R +++ b/R/tables.R @@ -19,7 +19,8 @@ type_size = function(DT) { } tables = function(mb=type_size, order.col="NAME", width=80L, - env=parent.frame(), silent=FALSE, index=FALSE) + env=parent.frame(), silent=FALSE, index=FALSE, + depth=0L) { # Prints name, size and colnames of all data.tables in the calling environment by default mb_name = as.character(substitute(mb)) @@ -27,11 +28,78 @@ tables = function(mb=type_size, order.col="NAME", width=80L, names = ls(envir=env, all.names=TRUE) # include "hidden" objects (starting with .) obj = mget(names, envir=env) # doesn't copy; mget is ok with ... unlike get, #5197 w = which(vapply_1b(obj, is.data.table)) - if (!length(w)) { - if (!silent) catf("No objects of class data.table exist in %s\n", if (identical(env, .GlobalEnv)) ".GlobalEnv" else format(env)) - return(invisible(data.table(NULL))) + + info = NULL + # we check if depth=1L is requested and add found tables to w + if (depth == 1L) { + is_list = vapply_1b(obj, is.list) + is_df = vapply_1b(obj, is.data.frame) + is_dt = vapply_1b(obj, is.data.table) + # list_index is a index of list which is not data.frame or data.table + list_index = which(is_list & !is_dt & !is_df) + # obj_list is a list of lists of data.tables found inside lists + obj_list = vector("list", length(list_index)) + # make a list of size list_index and add wl in it + total_dt = 0L + # filling obj_list and counting total_dt + for (i in seq_along(list_index)) { + L = obj[[list_index[i]]] + wl = which(vapply_1b(L, is.data.table)) + total_dt = total_dt + length(wl) + obj_list[[i]] = L[wl] + } + name_count = length(w) + total_dt + # initialize info data.table with total number of data.tables found + if (name_count == 0L) { + # nocov start. Requires long-running test case + if (!silent) catf("No objects of class data.table exist in %s\n", if (identical(env, .GlobalEnv)) ".GlobalEnv" else format(env)) + # nocov end + return(invisible(data.table(NULL))) + } + # create info data.table with total rows equal to number of data.tables found + info = data.table(NAME=character(name_count), NROW=0L, NCOL=0L, MB=0.0, COLS=list(), KEY=list(), INDICES=list()) + # fill in the names of data.tables found in w + for (i in seq_along(w)) { # names of w items + set(info, i, "NAME", names[w[i]]) + } + # now fill in the data.tables found inside lists + cnt = 1L + if (total_dt > 0L) { + for (i in seq_along(list_index)) { + if (length(obj_list[[i]]) == 0L) next + # get the parent list name + parent_name = names[list_index[i]] + for (j in seq_along(obj_list[[i]])) { + elem_names = names(obj[[list_index[i]]]) + if (!is.null(elem_names) && nzchar(elem_names[j])) { + new_name = paste0(parent_name, "$", elem_names[j]) + } else { + new_name = paste0(parent_name, "[[", j, "]]") + } + DT = obj_list[[i]][[j]] + k = cnt + length(w) # row number in info data.table + cnt = cnt + 1L + set(info, k, "NAME", new_name) + set(info, k, "NROW", nrow(DT)) + set(info, k, "NCOL", ncol(DT)) + if (is.function(mb)) set(info, k, "MB", as.integer(mb(DT)/1048576L)) # i.e. 1024**2 + if (!is.null(tt<-names(DT))) set(info, k, "COLS", tt) # TODO: don't need these if()s when #5526 is done + if (!is.null(tt<-key(DT))) set(info, k, "KEY", tt) + if (index && !is.null(tt<-indices(DT))) set(info, k, "INDICES", tt) + } + } + } + } else if (depth == 0L) { + # if depth is 0 + if (!length(w)) { + if (!silent) catf("No objects of class data.table exist in %s\n", if (identical(env, .GlobalEnv)) ".GlobalEnv" else format(env)) + return(invisible(data.table(NULL))) + } + info = data.table(NAME=names[w], NROW=0L, NCOL=0L, MB=0.0, COLS=list(), KEY=list(), INDICES=list()) + } else { + # for depth greater than 1L,recursion is not implemented yet + stopf("depth > 1L is not implemented yet") } - info = data.table(NAME=names[w], NROW=0L, NCOL=0L, MB=0.0, COLS=list(), KEY=list(), INDICES=list()) for (i in seq_along(w)) { # avoid rbindlist(lapply(DT_names)) in case of a large number of tables DT = obj[[w[i]]] set(info, i, "NROW", nrow(DT)) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index f30467dae..01747a63a 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21515,3 +21515,29 @@ test(2365.1, melt(df_melt, id.vars=1:2), melt(dt_melt, id.vars=1:2)) df_dcast = data.frame(a = c("x", "y"), b = 1:2, v = 3:4) dt_dcast = data.table(a = c("x", "y"), b = 1:2, v = 3:4) test(2365.2, dcast(df_dcast, a ~ b, value.var = "v"), dcast(dt_dcast, a ~ b, value.var = "v")) + +#2606 tables() depth=1 finds nested data.tables in lists +# creating env so that the names are within it +xenv2 = new.env() +xenv2$DT = data.table(a = 1L) +xenv2$L = list(data.table(a = 1, b = 4:6), data.table(a = 2, b = 7:10)) +xenv2$M = list(b = data.table(a = 1, b = 4:6), a = 1:5) +xenv2$N = list(a = 1:5) +test(2366.1, tables(env = xenv2, depth = 1L)[, .(NAME, NROW, NCOL)], + data.table( + NAME = c("DT", "L[[1]]", "L[[2]]", "M$b"), + NROW = c(1L, 3L, 4L, 3L), + NCOL = c(1L, 2L, 2L, 2L) +)) +setindex(xenv2$M$b, b) +test(2366.2, tables(env = xenv2, depth = 1L, index = TRUE)$INDICES, list(NULL, NULL, NULL, "b")) +setkey(xenv2$M$b, a) +test(2366.3, tables(env = xenv2, depth = 1L, index = TRUE)$KEY, list(NULL, NULL, NULL, "a")) +test(2366.4, tryCatch(tables(env = xenv2, depth = 2L), error = function(e) e$message), "depth > 1L is not implemented yet") +rm(xenv2) + +# no data.table test and depth >1 test +xenv_empty = new.env() +test(2366.5, tables(env = xenv_empty, depth = 1L), invisible(data.table(NULL))) +test(2366.6, tables(env = xenv_empty), invisible(data.table(NULL))) +rm(xenv_empty) diff --git a/man/tables.Rd b/man/tables.Rd index 5898cd730..437d53e1a 100644 --- a/man/tables.Rd +++ b/man/tables.Rd @@ -6,7 +6,7 @@ } \usage{ tables(mb=type_size, order.col="NAME", width=80, - env=parent.frame(), silent=FALSE, index=FALSE) + env=parent.frame(), silent=FALSE, index=FALSE, depth=0L) } \arguments{ \item{mb}{ a function which accepts a \code{data.table} and returns its size in bytes. By default, \code{type_size} (same as \code{TRUE}) provides a fast lower bound by excluding the size of character strings in R's global cache (which may be shared) and excluding the size of list column items (which also may be shared). A column \code{"MB"} is included in the output unless \code{FALSE} or \code{NULL}. } @@ -15,6 +15,7 @@ tables(mb=type_size, order.col="NAME", width=80, \item{env}{ An \code{environment}, typically the \code{.GlobalEnv} by default, see Details. } \item{silent}{ \code{logical}; should the output be printed? } \item{index}{ \code{logical}; if \code{TRUE}, the column \code{INDICES} is added to indicate the indices assorted with each object, see \code{\link{indices}}. } + \item{depth}{\code{integer}; if \code{1L}, searches for \code{data.table} objects inside top-level lists. If depth = 0L it accepts data.table and Values greater than \code{1L} are not implemented yet.} } \details{ Usually \code{tables()} is executed at the prompt, where \code{parent.frame()} returns \code{.GlobalEnv}. \code{tables()} may also be useful inside functions where \code{parent.frame()} is the local scope of the function; in such a scenario, simply set it to \code{.GlobalEnv} to get the same behaviour as at prompt.