Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@

4. `dcast()` and `melt()` "just work" when passed a data.frame, not just data.tables, with no need for coercion, [#7614](https://github.com/Rdatatable/data.table/issues/7614). Thanks @MichaelChirico for the suggestion and @manmita for the PR. Note that to avoid potential conflicts with {reshape2}'s data.frame methods, we do the dispatch to the data.table method manually.

5. `tables()` can now optionally report `data.table` objects stored one level deep inside list objects when `depth=1L`, [#2606](https://github.com/Rdatatable/data.table/issues/2606). Thanks @MichaelChirico for the report and @manmita for the PR

### BUG FIXES

1. `fread()` with `skip=0` and `(header=TRUE|FALSE)` no longer skips the first row when it has fewer fields than subsequent rows, [#7463](https://github.com/Rdatatable/data.table/issues/7463). Thanks @emayerhofer for the report and @ben-schwen for the fix.
Expand Down
78 changes: 73 additions & 5 deletions R/tables.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,87 @@ type_size = function(DT) {
}

tables = function(mb=type_size, order.col="NAME", width=80L,
env=parent.frame(), silent=FALSE, index=FALSE)
env=parent.frame(), silent=FALSE, index=FALSE,
depth=0L)
{
# Prints name, size and colnames of all data.tables in the calling environment by default
mb_name = as.character(substitute(mb))
if (isTRUE(mb)) { mb=type_size; mb_name="type_size" }
names = ls(envir=env, all.names=TRUE) # include "hidden" objects (starting with .)
obj = mget(names, envir=env) # doesn't copy; mget is ok with ... unlike get, #5197
w = which(vapply_1b(obj, is.data.table))
if (!length(w)) {
if (!silent) catf("No objects of class data.table exist in %s\n", if (identical(env, .GlobalEnv)) ".GlobalEnv" else format(env))
return(invisible(data.table(NULL)))

info = NULL
# we check if depth=1L is requested and add found tables to w
if (depth == 1L) {
is_list = vapply_1b(obj, is.list)
is_df = vapply_1b(obj, is.data.frame)
is_dt = vapply_1b(obj, is.data.table)
# list_index is a index of list which is not data.frame or data.table
list_index = which(is_list & !is_dt & !is_df)
# obj_list is a list of lists of data.tables found inside lists
obj_list = vector("list", length(list_index))
# make a list of size list_index and add wl in it
total_dt = 0L
# filling obj_list and counting total_dt
for (i in seq_along(list_index)) {
L = obj[[list_index[i]]]
wl = which(vapply_1b(L, is.data.table))
total_dt = total_dt + length(wl)
obj_list[[i]] = L[wl]
}
name_count = length(w) + total_dt
# initialize info data.table with total number of data.tables found
if (name_count == 0L) {
# nocov start. Requires long-running test case
if (!silent) catf("No objects of class data.table exist in %s\n", if (identical(env, .GlobalEnv)) ".GlobalEnv" else format(env))
# nocov end
return(invisible(data.table(NULL)))
}
# create info data.table with total rows equal to number of data.tables found
info = data.table(NAME=character(name_count), NROW=0L, NCOL=0L, MB=0.0, COLS=list(), KEY=list(), INDICES=list())
# fill in the names of data.tables found in w
for (i in seq_along(w)) { # names of w items
set(info, i, "NAME", names[w[i]])
}
# now fill in the data.tables found inside lists
cnt = 1L
if (total_dt > 0L) {
for (i in seq_along(list_index)) {
if (length(obj_list[[i]]) == 0L) next
# get the parent list name
parent_name = names[list_index[i]]
for (j in seq_along(obj_list[[i]])) {
elem_names = names(obj[[list_index[i]]])
if (!is.null(elem_names) && nzchar(elem_names[j])) {
new_name = paste0(parent_name, "$", elem_names[j])
} else {
new_name = paste0(parent_name, "[[", j, "]]")
}
DT = obj_list[[i]][[j]]
k = cnt + length(w) # row number in info data.table
cnt = cnt + 1L
set(info, k, "NAME", new_name)
set(info, k, "NROW", nrow(DT))
set(info, k, "NCOL", ncol(DT))
if (is.function(mb)) set(info, k, "MB", as.integer(mb(DT)/1048576L)) # i.e. 1024**2
if (!is.null(tt<-names(DT))) set(info, k, "COLS", tt) # TODO: don't need these if()s when #5526 is done
if (!is.null(tt<-key(DT))) set(info, k, "KEY", tt)
if (index && !is.null(tt<-indices(DT))) set(info, k, "INDICES", tt)
}
}
}
} else if (depth == 0L) {
# if depth is 0
if (!length(w)) {
if (!silent) catf("No objects of class data.table exist in %s\n", if (identical(env, .GlobalEnv)) ".GlobalEnv" else format(env))
return(invisible(data.table(NULL)))
}
info = data.table(NAME=names[w], NROW=0L, NCOL=0L, MB=0.0, COLS=list(), KEY=list(), INDICES=list())
} else {
# for depth greater than 1L,recursion is not implemented yet
stopf("depth > 1L is not implemented yet")
}
info = data.table(NAME=names[w], NROW=0L, NCOL=0L, MB=0.0, COLS=list(), KEY=list(), INDICES=list())
for (i in seq_along(w)) { # avoid rbindlist(lapply(DT_names)) in case of a large number of tables
DT = obj[[w[i]]]
set(info, i, "NROW", nrow(DT))
Expand Down
26 changes: 26 additions & 0 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -21515,3 +21515,29 @@ test(2365.1, melt(df_melt, id.vars=1:2), melt(dt_melt, id.vars=1:2))
df_dcast = data.frame(a = c("x", "y"), b = 1:2, v = 3:4)
dt_dcast = data.table(a = c("x", "y"), b = 1:2, v = 3:4)
test(2365.2, dcast(df_dcast, a ~ b, value.var = "v"), dcast(dt_dcast, a ~ b, value.var = "v"))

#2606 tables() depth=1 finds nested data.tables in lists
# creating env so that the names are within it
xenv2 = new.env()
xenv2$DT = data.table(a = 1L)
xenv2$L = list(data.table(a = 1, b = 4:6), data.table(a = 2, b = 7:10))
xenv2$M = list(b = data.table(a = 1, b = 4:6), a = 1:5)
xenv2$N = list(a = 1:5)
test(2366.1, tables(env = xenv2, depth = 1L)[, .(NAME, NROW, NCOL)],
data.table(
NAME = c("DT", "L[[1]]", "L[[2]]", "M$b"),
NROW = c(1L, 3L, 4L, 3L),
NCOL = c(1L, 2L, 2L, 2L)
))
setindex(xenv2$M$b, b)
test(2366.2, tables(env = xenv2, depth = 1L, index = TRUE)$INDICES, list(NULL, NULL, NULL, "b"))
setkey(xenv2$M$b, a)
test(2366.3, tables(env = xenv2, depth = 1L, index = TRUE)$KEY, list(NULL, NULL, NULL, "a"))
test(2366.4, tryCatch(tables(env = xenv2, depth = 2L), error = function(e) e$message), "depth > 1L is not implemented yet")
rm(xenv2)

# no data.table test and depth >1 test
xenv_empty = new.env()
test(2366.5, tables(env = xenv_empty, depth = 1L), invisible(data.table(NULL)))
test(2366.6, tables(env = xenv_empty), invisible(data.table(NULL)))
rm(xenv_empty)
3 changes: 2 additions & 1 deletion man/tables.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
}
\usage{
tables(mb=type_size, order.col="NAME", width=80,
env=parent.frame(), silent=FALSE, index=FALSE)
env=parent.frame(), silent=FALSE, index=FALSE, depth=0L)
}
\arguments{
\item{mb}{ a function which accepts a \code{data.table} and returns its size in bytes. By default, \code{type_size} (same as \code{TRUE}) provides a fast lower bound by excluding the size of character strings in R's global cache (which may be shared) and excluding the size of list column items (which also may be shared). A column \code{"MB"} is included in the output unless \code{FALSE} or \code{NULL}. }
Expand All @@ -15,6 +15,7 @@ tables(mb=type_size, order.col="NAME", width=80,
\item{env}{ An \code{environment}, typically the \code{.GlobalEnv} by default, see Details. }
\item{silent}{ \code{logical}; should the output be printed? }
\item{index}{ \code{logical}; if \code{TRUE}, the column \code{INDICES} is added to indicate the indices assorted with each object, see \code{\link{indices}}. }
\item{depth}{\code{integer}; if \code{1L}, searches for \code{data.table} objects inside top-level lists. If depth = 0L it accepts data.table and Values greater than \code{1L} are not implemented yet.}
}
\details{
Usually \code{tables()} is executed at the prompt, where \code{parent.frame()} returns \code{.GlobalEnv}. \code{tables()} may also be useful inside functions where \code{parent.frame()} is the local scope of the function; in such a scenario, simply set it to \code{.GlobalEnv} to get the same behaviour as at prompt.
Expand Down
Loading