1 |
#' @title Create TableTree as DataFrame via gentlg |
|
2 |
#' |
|
3 |
#' @param tt TableTree object to convert to a data frame |
|
4 |
#' @param fontspec Font specification object |
|
5 |
#' @param string_map Unicode mapping for special characters |
|
6 |
#' @param markup_df Data frame containing markup information |
|
7 |
#' @return `tt` represented as a `tbl` data.frame suitable for passing |
|
8 |
#' to [tidytlg::gentlg] via the `huxme` argument. |
|
9 |
tt_to_tbldf <- function( |
|
10 |
tt, |
|
11 |
fontspec = font_spec("Times", 9L, 1), |
|
12 |
string_map = default_str_map, |
|
13 |
markup_df = dps_markup_df) { |
|
14 | 44x |
if (!validate_table_struct(tt)) { |
15 | 1x |
stop( |
16 | 1x |
"invalid table structure. summarize_row_groups without ", |
17 | 1x |
"analyze below it in layout structure?" |
18 |
) |
|
19 |
} |
|
20 | 43x |
mpf <- matrix_form( |
21 | 43x |
tt, |
22 | 43x |
indent_rownames = FALSE, |
23 | 43x |
expand_newlines = FALSE, |
24 | 43x |
fontspec = fontspec |
25 |
) |
|
26 | ||
27 | 43x |
strmat <- mf_strings(mpf) |
28 | 43x |
strmat[, 1] <- gsub("^[[:space:]]+", "", strmat[, 1]) |
29 | 43x |
nhl <- mf_nlheader(mpf) |
30 | 43x |
strbody <- strmat[-(1:nhl), , drop = FALSE] |
31 | 43x |
row_type <- apply( |
32 | 43x |
strbody, |
33 | 43x |
1, |
34 | 43x |
function(x) { |
35 | 475x |
nonempty <- nzchar(x) |
36 | 475x |
if (all(!nonempty)) { |
37 |
## shouldn't ever happen |
|
38 | ! |
"EMPTY" |
39 | 475x |
} else if (nonempty[1] && all(!nonempty[-1])) { |
40 |
# only 1st cell nonempty |
|
41 | ! |
"HEADER" |
42 |
} else { |
|
43 | 475x |
if (x[1] == "N") { |
44 | ! |
"N" |
45 |
} else { |
|
46 | 475x |
"VALUE" |
47 |
} |
|
48 |
} |
|
49 |
} |
|
50 |
) |
|
51 | 43x |
rowdf <- mf_rinfo(mpf) |
52 | 43x |
rinds <- mf_lgrouping(mpf)[-(1:nhl)] - |
53 | 43x |
mf_nrheader(mpf) |
54 | 43x |
indentme <- rowdf$indent[rinds] |
55 | 43x |
anbr <- cumsum(!is.na(c(0, utils::head(rowdf$trailing_sep, -1))))[rinds] + 1 ## so it starts at 1 |
56 | 43x |
roworder <- seq_len(NROW(rowdf)) - (anbr - 1) |
57 | 43x |
newrows <- c(0, ifelse(utils::tail(anbr, -1) == utils::head(anbr, -1), 0, 1)) |
58 | ||
59 | 43x |
strbody <- prep_strs_for_rtf(strbody, string_map, markup_df) |
60 | 43x |
tbldf <- as.data.frame(strbody) |
61 | ||
62 | 43x |
names(tbldf) <- c("label", paste("col", seq_len(ncol(tbldf) - 1))) |
63 | 43x |
emptycols <- tbldf[1, ] |
64 | 43x |
emptycols[1, ] <- "" |
65 | ||
66 | 43x |
tbldf <- cbind( |
67 | 43x |
tbldf, |
68 | 43x |
data.frame( |
69 | 43x |
row_type = row_type, |
70 | 43x |
anbr = anbr, |
71 | 43x |
indentme = indentme, |
72 | 43x |
roworder = roworder, |
73 | 43x |
newrows = newrows, |
74 | 43x |
stringsAsFactors = FALSE |
75 |
) |
|
76 |
) |
|
77 | 43x |
tbldf |
78 |
} |
|
79 | ||
80 |
brackets_to_rtf <- function(strs) { |
|
81 | ! |
gsub("\\[\\[([^]]+)\\]\\]", "\\\\{\\1}", strs) |
82 |
} |
|
83 | ||
84 |
gutter_width <- .12 # inches |
|
85 | ||
86 |
## this is for Times New Roman 9 |
|
87 |
mar_plus_gutters <- 2 + gutter_width |
|
88 | ||
89 |
pg_width_by_orient <- function(landscape = FALSE) { |
|
90 | 38x |
fullpg <- ifelse(landscape, 11, 8.5) |
91 | 38x |
fullpg - mar_plus_gutters |
92 |
} |
|
93 | ||
94 |
tlg_type <- function(tt) { |
|
95 | 19x |
if (methods::is(tt, "list") && !methods::is(tt, "listing_df")) { |
96 | ! |
tt <- tt[[1]] |
97 |
} |
|
98 | 19x |
if (methods::is(tt, "ggplot")) { |
99 | ! |
ret <- "Figure" |
100 | 19x |
} else if (methods::is(tt, "VTableTree")) { |
101 | 14x |
ret <- "Table" |
102 | 5x |
} else if (methods::is(tt, "listing_df")) { |
103 | 4x |
ret <- "Listing" |
104 |
} else { |
|
105 | 1x |
stop( |
106 | 1x |
"unable to determine tlg type for object of class: ", |
107 | 1x |
paste(class(tt), collapse = ",") |
108 |
) |
|
109 |
} |
|
110 | 18x |
ret |
111 |
} |
|
112 | ||
113 |
mpf_to_colspan <- function( |
|
114 |
mpf, |
|
115 |
string_map = default_str_map, |
|
116 |
markup_df = dps_markup_df) { |
|
117 | 39x |
if (!methods::is(mpf, "MatrixPrintForm")) { |
118 | ! |
stop("figure out how to make this an mpf (MatrixPrintForm) first.") |
119 |
} |
|
120 | 39x |
strmat <- mf_strings(mpf) |
121 |
## spaces in column header will get underlines, but we dont' want them to |
|
122 | 39x |
nhl <- mf_nlheader(mpf) |
123 | 39x |
strmat[seq_len(nhl - 1), ] <- gsub( |
124 | 39x |
"^[[:space:]]+$", |
125 |
"", |
|
126 | 39x |
strmat[seq_len(nhl - 1), ] |
127 |
) |
|
128 | 39x |
strmat[nhl, ] <- ifelse(nzchar(strmat[nhl, ]), strmat[nhl, ], " ") |
129 | 39x |
strmat <- prep_strs_for_rtf(strmat, string_map, markup_df) |
130 | 39x |
nspancols <- nhl - 1L |
131 | 39x |
if (nspancols > 0) { |
132 | 34x |
csph <- lapply(seq_len(nspancols), function(ii) { |
133 | 68x |
spns <- mf_spans(mpf)[ii, ] |
134 | 68x |
vals <- strmat[ii, ] |
135 | 68x |
jj <- 1 ## could start at 2 cause 1 is row label/topleft space but why complicate thigns |
136 | 68x |
ind <- 1 |
137 | 68x |
myrle <- rle(vals) |
138 | 68x |
while (jj < length(vals)) { |
139 | 154x |
if (myrle$lengths[ind] == 1) { |
140 | 68x |
jj <- jj + 1 |
141 | 68x |
ind <- ind + 1 |
142 |
} else { |
|
143 | 86x |
sq <- seq(jj, jj + myrle$lengths[ind] - 1) |
144 | 86x |
valspns <- spns[sq] |
145 | 86x |
if (valspns[1] != length(sq)) { |
146 | ! |
vals[sq] <- split_val_spans(vals[jj], valspns) |
147 |
} |
|
148 | 86x |
jj <- tail(sq, 1) + 1 |
149 | 86x |
ind <- ind + 1 |
150 |
} |
|
151 |
} |
|
152 | 68x |
vals |
153 |
}) |
|
154 |
} else { |
|
155 | 5x |
csph <- NULL |
156 |
} |
|
157 | ||
158 | 39x |
list(colspan = csph, colheader = strmat[nhl, ]) |
159 |
} |
|
160 | ||
161 |
split_val_spans <- function(val, spns) { |
|
162 | ! |
if (val == "") { |
163 | ! |
return(rep(val, length(spns))) |
164 |
} |
|
165 | ! |
padvec <- rep("", length(spns)) |
166 | ! |
j <- 1 |
167 | ! |
padnum <- 0 |
168 | ! |
while (j < length(spns)) { |
169 | ! |
onespn <- spns[j] |
170 | ! |
if (padnum > 0) { |
171 | ! |
padvec[j:(j + onespn - 1)] <- strrep(" ", padnum) |
172 |
} |
|
173 | ! |
padnum <- padnum + 1 |
174 | ! |
j <- j + onespn |
175 |
} |
|
176 | ! |
paste0(padvec, val, padvec) |
177 |
} |
|
178 | ||
179 |
partmpf_to_colinds <- function(fullmpf, partmpf) { |
|
180 | 30x |
stopifnot( |
181 | 30x |
mf_nlheader(fullmpf) == mf_nlheader(partmpf), |
182 | 30x |
mf_ncol(fullmpf) >= mf_ncol(partmpf) |
183 |
) |
|
184 | 30x |
nlh <- mf_nlheader(fullmpf) |
185 | 30x |
fullhdr <- mf_strings(fullmpf)[1:nlh, , drop = FALSE] |
186 | 30x |
parthdr <- mf_strings(partmpf)[1:nlh, , drop = FALSE] |
187 | 30x |
fullkeys <- apply(fullhdr, 2, paste, collapse = "", simplify = TRUE) |
188 |
## hack to force first key to match uniquely |
|
189 | 30x |
fullkeys[1] <- paste(c("xxx", fullkeys[-1]), collapse = "") |
190 | 30x |
stopifnot(length(fullkeys) == length(unique(fullkeys))) |
191 | 30x |
partkeys <- apply(parthdr, 2, paste, collapse = "", simplify = TRUE) |
192 | 30x |
partkeys[1] <- fullkeys[1] |
193 | 30x |
match(partkeys, fullkeys) |
194 |
} |
|
195 | ||
196 |
subset_border_mat <- function(full_brdr, full_mpf, part_mpf) { |
|
197 | 30x |
if (is.null(full_brdr)) { |
198 | ! |
return(NULL) |
199 |
} |
|
200 | 30x |
inds <- partmpf_to_colinds(full_mpf, part_mpf) |
201 | 30x |
full_brdr[, inds, drop = FALSE] |
202 |
} |
|
203 | ||
204 | ||
205 |
get_ncol <- function(tt) { |
|
206 | 1x |
if (is(tt, "listing_df") || is(tt, "VTableTree")) { |
207 | 1x |
ncol(tt) |
208 | ! |
} else if (is(tt, "MatrixPrintForm")) { |
209 | ! |
mf_ncol(tt) |
210 | ! |
} else if (is.list(tt)) { |
211 | ! |
if (is(tt[[1]], "MatrixPrintForm")) { |
212 | ! |
mf_ncol(tt[[1]]) |
213 |
} else { |
|
214 | ! |
ncol(tt[[1]]) |
215 |
} |
|
216 |
} |
|
217 |
} |
|
218 | ||
219 |
#' @name tt_to_tlgrtf |
|
220 |
#' @title TableTree to .rtf Conversion |
|
221 |
#' @description |
|
222 |
#' A function to convert TableTree to .rtf |
|
223 |
#' @details |
|
224 |
#' This function aids in converting the rtables TableTree into the desired .rtf file. |
|
225 |
#' |
|
226 |
#' @param tt TableTree object to convert to RTF |
|
227 |
#' @param file character(1). File to create, including path, but excluding |
|
228 |
#' .rtf extension. |
|
229 |
#' @param orientation Orientation of the output ("portrait" or "landscape") |
|
230 |
#' @param colwidths Column widths for the table |
|
231 |
#' @param label_width_ins Label width in inches |
|
232 |
#' @param fontspec Font specification object |
|
233 |
#' @param pg_width Page width in inches |
|
234 |
#' @param margins Margins in inches (top, right, bottom, left) |
|
235 |
#' @param paginate Whether to paginate the output |
|
236 |
#' @param col_gap Column gap in spaces |
|
237 |
#' @param verbose Whether to print verbose output |
|
238 |
#' @param tlgtype Type of the output (Table, Listing, or Figure) |
|
239 |
#' @param string_map Unicode mapping for special characters |
|
240 |
#' @param markup_df Data frame containing markup information |
|
241 |
#' @param ... Additional arguments passed to gentlg |
|
242 |
#' @inheritParams tidytlg::gentlg |
|
243 |
#' @param nosplitin list(row=, col=). Path elements whose children should not be paginated within |
|
244 |
#' if it can be avoided. e.g., list(col="TRT01A") means don't split within treatment arms unless |
|
245 |
#' all the associated columns don't fit on a single page. |
|
246 |
#' @param combined_rtf logical(1). In the case where the result is broken up into multiple |
|
247 |
#' parts due to width, should a combined rtf file also be created. Defaults to `FALSE`. |
|
248 |
#' @param one_table logical(1). If `tt` is a (non-`MatrixPrintForm`) list, |
|
249 |
#' should the parts be added to the rtf within a single table (`TRUE`, the |
|
250 |
#' default) or as separate tables. End users will not generally need to set this. |
|
251 |
#' @param border_mat matrix. A `m x k` matrix where m is the number of columns of `tt` |
|
252 |
#' and k is the number of lines the header takes up. See [tidytlg::add_bottom_borders] |
|
253 |
#' for what the matrix should contain. Users should only specify this when the |
|
254 |
#' default behavior does not meet their needs. |
|
255 |
#' @import rlistings |
|
256 |
#' @rdname tt_to_tlgrtf |
|
257 |
#' @export |
|
258 |
#' @seealso Used in all table and listing scripts |
|
259 |
#' @note `file` should always include path. Path will be extracted |
|
260 |
#' and passed separately to `gentlg`. |
|
261 |
#' @note When `one_table` is `FALSE`, only the width of the row label |
|
262 |
#' pseudocolumn can be directly controlled due to a limitation in |
|
263 |
#' `tidytlg::gentlg`. The proportion of the full page that the first value |
|
264 |
#' in colwidths would take up is preserved and all other columns equally |
|
265 |
#' split the remaining available width. This will cause, e.g., the |
|
266 |
#' elements within the allparts rtf generated when `combined_rtf` is `TRUE` |
|
267 |
#' to differ visually from the content of the individual part rtfs. |
|
268 |
#' @return If `file` is non-NULL, this is called for the side-effect of writing |
|
269 |
#' one or more RTF files. Otherwise, returns a list of `huxtable` objects. |
|
270 |
tt_to_tlgrtf <- function( |
|
271 |
tt, |
|
272 |
file = NULL, |
|
273 |
orientation = c("portrait", "landscape"), |
|
274 |
colwidths = def_colwidths( |
|
275 |
tt, |
|
276 |
fontspec, |
|
277 |
col_gap = col_gap, |
|
278 |
label_width_ins = label_width_ins, |
|
279 |
type = tlgtype |
|
280 |
), |
|
281 |
label_width_ins = 2, |
|
282 |
watermark = NULL, |
|
283 |
pagenum = ifelse(tlgtype == "Listing", TRUE, FALSE), |
|
284 |
fontspec = font_spec("Times", 9L, 1.2), |
|
285 |
pg_width = pg_width_by_orient(orientation == "landscape"), |
|
286 |
margins = c(0, 0, 0, 0), |
|
287 |
paginate = tlg_type(tt) == "Table", |
|
288 |
col_gap = ifelse(tlgtype == "Listing", .5, 3), |
|
289 |
nosplitin = list( |
|
290 |
row = character(), |
|
291 |
col = character() |
|
292 |
), |
|
293 |
verbose = FALSE, |
|
294 |
tlgtype = tlg_type(tt), |
|
295 |
string_map = default_str_map, |
|
296 |
markup_df = dps_markup_df, |
|
297 |
combined_rtf = FALSE, |
|
298 |
one_table = TRUE, |
|
299 |
border_mat = make_header_bordmat(obj = tt), |
|
300 |
...) { |
|
301 | 41x |
orientation <- match.arg(orientation) |
302 | 41x |
newdev <- open_font_dev(fontspec) |
303 | 41x |
if (newdev) { |
304 | 10x |
on.exit(close_font_dev()) |
305 |
} |
|
306 | ||
307 | 41x |
if (tlgtype == "Listing" && nrow(tt) == 0) { |
308 | 1x |
dat <- as.list(c("No data to report", rep("", ncol(tt) - 1))) |
309 | 1x |
names(dat) <- names(tt) |
310 | 1x |
df <- as.data.frame(dat) |
311 | 1x |
var_labels(df) <- var_labels(tt) |
312 | ||
313 | 1x |
tt <- as_listing( |
314 | 1x |
df, |
315 | 1x |
key_cols = get_keycols(tt), |
316 | 1x |
disp_cols = listing_dispcols(tt) |
317 |
) |
|
318 |
} |
|
319 | ||
320 | 40x |
if (tlgtype == "Table" && fontspec$size == 8) { |
321 | ! |
opts <- options( |
322 | ! |
tidytlg.fontsize.table.footnote = 8, |
323 | ! |
tidytlg.fontsize.table = 8 |
324 |
) |
|
325 | ! |
on.exit(options(opts), add = TRUE) |
326 |
} |
|
327 | ||
328 | 40x |
if (length(colwidths) == 1) { |
329 | 1x |
nc <- get_ncol(tt) |
330 | 1x |
tot_width <- page_lcpp( |
331 | 1x |
pg_width = pg_width, |
332 | 1x |
pg_height = 20, # don't care about this, |
333 | 1x |
font_family = fontspec$family, |
334 | 1x |
font_size = fontspec$size, |
335 | 1x |
lineheight = fontspec$lineheight, |
336 | 1x |
margins = c(0, 0, 0, 0), |
337 | 1x |
landscape = orientation == "landscape" |
338 | 1x |
)$cpp |
339 | 1x |
wdth_left <- tot_width - colwidths |
340 | 1x |
colwidths <- c(colwidths, rep(floor(wdth_left / nc), nc)) |
341 |
} |
|
342 | ||
343 | 40x |
max_lwidth <- inches_to_spaces(label_width_ins, fontspec) |
344 | 40x |
if (colwidths[1] > max_lwidth) { |
345 | 2x |
colwidths[1] <- max_lwidth |
346 |
} |
|
347 | ||
348 | 40x |
if (!requireNamespace("tidytlg")) { |
349 | ! |
stop("tidytlg not installed, cannot go out to rtf.") |
350 |
} |
|
351 | ||
352 | 40x |
if (paginate) { |
353 |
## implies type Table |
|
354 | 7x |
if (tlgtype != "Table") { |
355 | ! |
stop( |
356 | ! |
"pagination is not currently supported for tlg types other than Table." |
357 |
) |
|
358 |
} |
|
359 | 7x |
if (methods::is(tt, "VTableTree")) { |
360 | 7x |
hdrmpf <- matrix_form(tt[1, ]) |
361 | ! |
} else if (methods::is(tt, "list") && methods::is(tt[[1]], "MatrixPrintForm")) { |
362 | ! |
hdrmpf <- tt[[1]] |
363 |
} else { |
|
364 | ! |
hrdmpf <- tt |
365 |
} |
|
366 | 7x |
pags <- paginate_to_mpfs( |
367 | 7x |
tt, |
368 | 7x |
fontspec = fontspec, |
369 | 7x |
landscape = orientation == "landscape", |
370 | 7x |
colwidths = colwidths, |
371 | 7x |
col_gap = col_gap, |
372 | 7x |
pg_width = pg_width, |
373 | 7x |
pg_height = NULL, |
374 | 7x |
margins = margins, |
375 | 7x |
lpp = NULL, |
376 | 7x |
nosplitin = nosplitin, |
377 | 7x |
verbose = verbose |
378 |
) ## |
|
379 | 7x |
if (has_force_pag(tt)) { |
380 | 1x |
nslices <- which( |
381 | 1x |
cumsum(vapply(pags, mf_ncol, 1L)) == ncol(tt) |
382 |
) |
|
383 | 1x |
oldpags <- pags |
384 | 1x |
pags <- lapply( |
385 | 1x |
seq_len(nslices), |
386 | 1x |
function(ii) { |
387 | 6x |
inds <- seq(ii, by = nslices, length.out = length(oldpags) / nslices) |
388 | 6x |
oldpags[inds] |
389 |
} |
|
390 |
) |
|
391 |
} |
|
392 | 7x |
pag_bord_mats <- lapply( |
393 | 7x |
seq_along(pags), |
394 | 7x |
function(i) { |
395 | 30x |
if (methods::is(pags[[i]], "MatrixPrintForm")) { |
396 | 24x |
partmpf <- pags[[i]] |
397 |
} else { |
|
398 | 6x |
partmpf <- pags[[i]][[1]] |
399 |
} |
|
400 | 30x |
subset_border_mat(border_mat, hdrmpf, partmpf) |
401 |
} |
|
402 |
) |
|
403 | 7x |
ret <- lapply( |
404 | 7x |
seq_along(pags), |
405 | 7x |
function(i) { |
406 | 30x |
if (!is.null(file) && length(pags) > 1) { |
407 | 20x |
fmti <- paste0("%0", ceiling(log(length(pags), base = 10)), "d") |
408 | 20x |
fname <- paste0(file, "part", sprintf(fmti, i), "of", length(pags)) |
409 |
} else { |
|
410 | 10x |
fname <- file |
411 |
} |
|
412 | 30x |
full_pag_i <- pags[[i]] |
413 | 7x |
if ( |
414 | 30x |
is.list(full_pag_i) && |
415 | 30x |
!methods::is(full_pag_i, "MatrixPrintForm") |
416 |
) { |
|
417 | 6x |
pgi_for_cw <- full_pag_i[[1]] |
418 |
} else { |
|
419 | 24x |
pgi_for_cw <- full_pag_i |
420 |
} |
|
421 | 30x |
tt_to_tlgrtf( |
422 | 30x |
full_pag_i, |
423 | 30x |
file = fname, |
424 | 30x |
orientation = orientation, |
425 | 30x |
colwidths = j_mf_col_widths(pgi_for_cw), |
426 | 30x |
fontspec = fontspec, |
427 | 30x |
watermark = watermark, |
428 | 30x |
col_gap = col_gap, |
429 | 30x |
paginate = FALSE, |
430 | 30x |
tlgtype = tlgtype, |
431 | 30x |
string_map = string_map, |
432 | 30x |
markup_df = markup_df, |
433 | 30x |
border_mat = pag_bord_mats[[i]], |
434 |
... |
|
435 |
) |
|
436 |
} |
|
437 |
) |
|
438 | 7x |
if (combined_rtf) { |
439 | 1x |
if (length(pags) > 1) { |
440 | 1x |
tt_to_tlgrtf( |
441 | 1x |
pags, |
442 | 1x |
file = paste0(file, "allparts"), |
443 | 1x |
orientation = orientation, |
444 | 1x |
fontspec = fontspec, |
445 | 1x |
watermark = watermark, |
446 | 1x |
col_gap = col_gap, |
447 | 1x |
paginate = FALSE, |
448 | 1x |
tlgtype = "Table", |
449 | 1x |
string_map = string_map, |
450 | 1x |
markup_df = markup_df, |
451 | 1x |
one_table = FALSE, |
452 |
## gentlg isn't vectorized on column widths so we're SOL here... |
|
453 | 1x |
colwidths = colwidths, ## this is largely ignored see note in docs |
454 |
# colwidths are already on the pags since they are mpfs |
|
455 | 1x |
border_mat = pag_bord_mats, |
456 |
... |
|
457 |
) |
|
458 | ! |
} else if (!is.null(file)) { # only one page after pagination |
459 | ! |
message( |
460 | ! |
"Table ", |
461 | ! |
basename(file), |
462 | ! |
": No combined RTF created, output fit within one part." |
463 |
) |
|
464 |
} |
|
465 |
} |
|
466 | 7x |
if (is.null(file) && length(pags) > 1) { |
467 | 1x |
ret <- unlist(ret, recursive = FALSE) |
468 |
} |
|
469 | 7x |
return(ret) |
470 |
} |
|
471 | ||
472 | 33x |
if (tlgtype == "Table") { |
473 | 31x |
if (is.list(tt) && !methods::is(tt, "MatrixPrintForm")) { |
474 | 7x |
df <- lapply( |
475 | 7x |
tt, |
476 | 7x |
tt_to_tbldf, |
477 | 7x |
fontspec = fontspec, |
478 | 7x |
string_map = string_map, |
479 | 7x |
markup_df = markup_df |
480 |
) |
|
481 | 7x |
if (one_table) { |
482 | 6x |
df <- do.call( |
483 | 6x |
rbind, |
484 | 6x |
lapply( |
485 | 6x |
seq_along(df), |
486 | 6x |
function(ii) { |
487 | 12x |
dfii <- df[[ii]] |
488 | 12x |
dfii$newpage <- 0 |
489 | 12x |
if (ii > 1) { |
490 | 6x |
dfii$newpage[1] <- 1 |
491 |
} |
|
492 | 12x |
dfii$indentme <- ifelse(dfii$indentme <= 1, 0, dfii$indentme - 1) |
493 | 12x |
dfii |
494 |
} |
|
495 |
) |
|
496 |
) |
|
497 |
} |
|
498 |
} else { |
|
499 | 24x |
df <- tt_to_tbldf( |
500 | 24x |
tt, |
501 | 24x |
fontspec = fontspec, |
502 | 24x |
string_map = string_map, |
503 | 24x |
markup_df = markup_df |
504 |
) |
|
505 |
} |
|
506 |
} else { |
|
507 | 2x |
df <- tt[, listing_dispcols(tt)] |
508 |
} |
|
509 | ||
510 |
## we only care about the col labels here... |
|
511 | 33x |
if (tlgtype == "Table" && is.list(tt) && !methods::is(tt, "MatrixPrintForm")) { |
512 | 7x |
mpf <- tt[[1]] |
513 | 7x |
if (!one_table) { |
514 | 1x |
colinfo <- lapply( |
515 | 1x |
tt, |
516 | 1x |
mpf_to_colspan, |
517 | 1x |
markup_df = markup_df, |
518 | 1x |
string_map = string_map |
519 |
) |
|
520 | 1x |
csph <- lapply(colinfo, function(x) x$colspan) |
521 | 1x |
colheader <- lapply(colinfo, function(x) x$colheader) |
522 |
} else { |
|
523 | 6x |
colinfo <- mpf_to_colspan( |
524 | 6x |
mpf, |
525 | 6x |
markup_df = markup_df, |
526 | 6x |
string_map = string_map |
527 |
) |
|
528 | 6x |
csph <- colinfo$colspan |
529 | 6x |
colheader <- colinfo$colheader |
530 |
} |
|
531 | 26x |
} else if (methods::is(tt, "MatrixPrintForm")) { |
532 | 24x |
mpf <- tt |
533 | 24x |
colinfo <- mpf_to_colspan( |
534 | 24x |
mpf, |
535 | 24x |
markup_df = markup_df, |
536 | 24x |
string_map = string_map |
537 |
) |
|
538 | 24x |
csph <- colinfo$colspan |
539 | 24x |
colheader <- colinfo$colheader |
540 |
} else { |
|
541 | 2x |
mpf <- matrix_form( |
542 | 2x |
utils::head(tt, 1), |
543 | 2x |
indent_rownames = FALSE, |
544 | 2x |
expand_newlines = FALSE, |
545 | 2x |
fontspec = fontspec |
546 |
) |
|
547 | 2x |
colinfo <- mpf_to_colspan( |
548 | 2x |
mpf, |
549 | 2x |
markup_df = markup_df, |
550 | 2x |
string_map = string_map |
551 |
) |
|
552 | 2x |
csph <- colinfo$colspan |
553 | 2x |
colheader <- colinfo$colheader |
554 |
} |
|
555 | ||
556 | 33x |
if (is.null(file)) { |
557 | 7x |
fname <- NULL |
558 | 7x |
fpath <- tempdir() |
559 |
} else { |
|
560 | 26x |
fname <- basename(file) |
561 |
## dirname on "table" returns "." so we're good using |
|
562 |
## this unconditionally as opath |
|
563 | 26x |
fpath <- dirname(file) |
564 |
} |
|
565 | ||
566 | 33x |
if (tlgtype == "Table") { |
567 | 31x |
colwidths <- cwidths_final_adj( |
568 | 31x |
labwidth_ins = label_width_ins, |
569 | 31x |
total_width = pg_width, |
570 | 31x |
colwidths = colwidths[-1] |
571 |
) |
|
572 |
} |
|
573 | 33x |
colwidths <- colwidths / sum(colwidths) |
574 |
# finite precision arithmetic is a dreamscape of infinite wonder... |
|
575 |
## sum(rep(1/18, 18)) <= 1 is FALSE... |
|
576 | 33x |
if (sum(colwidths) > 1) { |
577 | ! |
colwidths <- colwidths - 0.00000000001 ## much smaller than a twip = 1/20 printing point |
578 |
} |
|
579 | ||
580 | 33x |
if (!one_table && # nolint start |
581 | 33x |
is.list(tt) && !is(tt, "MatrixPrintForm")) { |
582 |
### gentlg is not vectorized on wcol. x.x x.x x.x |
|
583 |
### but it won't break if we only give it one number... |
|
584 |
### Calling this an ugly hack is an insult to all the hard working hacks |
|
585 |
### out there |
|
586 | 1x |
colwidths <- colwidths[1] |
587 |
} # nolint end |
|
588 | ||
589 | 33x |
footer_val <- prep_strs_for_rtf( |
590 | 33x |
c( |
591 | 33x |
main_footer(mpf), |
592 | 33x |
prov_footer(mpf) |
593 |
), |
|
594 | 33x |
string_map, |
595 | 33x |
markup_df |
596 |
) |
|
597 | 33x |
if (length(footer_val) == 0) { |
598 | 33x |
footer_val <- NULL |
599 |
} |
|
600 | ||
601 | 33x |
if (!is.null(fname) && tlgtype == "Table" && is.data.frame(df)) { |
602 | 23x |
utils::write.csv( |
603 | 23x |
df, |
604 | 23x |
file = file.path(fpath, paste0(tolower(fname), ".csv")), |
605 | 23x |
row.names = FALSE |
606 |
) |
|
607 |
} |
|
608 | ||
609 | 33x |
tidytlg::gentlg( |
610 | 33x |
df, |
611 | 33x |
tlf = tlgtype, |
612 | 33x |
format = "rtf", |
613 | 33x |
idvars = if (tlgtype == "Listing") get_keycols(tt) else NULL, |
614 | 33x |
colspan = csph, |
615 | 33x |
file = fname, |
616 | 33x |
opath = fpath, |
617 | 33x |
colheader = colheader, |
618 | 33x |
title = prep_strs_for_rtf( |
619 | 33x |
main_title(mpf), |
620 | 33x |
string_map, |
621 | 33x |
markup_df |
622 |
), |
|
623 | 33x |
footers = footer_val, |
624 | 33x |
orientation = orientation, |
625 | 33x |
wcol = colwidths, |
626 | 33x |
watermark = watermark, |
627 | 33x |
pagenum = pagenum, |
628 | 33x |
bottom_borders = border_mat, |
629 | 33x |
print.hux = !is.null(fname), |
630 |
... |
|
631 |
) |
|
632 |
} |
|
633 | ||
634 |
## NB x/(x+sum(colwidths)) = labwidth_ins/total_width |
|
635 |
cwidths_final_adj <- function(labwidth_ins, total_width, colwidths) { |
|
636 | 32x |
prop <- labwidth_ins / total_width |
637 | 32x |
lwidth <- floor(prop / (1 - prop) * sum(colwidths)) |
638 | 32x |
c(lwidth, colwidths) |
639 |
} |
|
640 | ||
641 |
make_bordmat_row <- function(rowspns) { |
|
642 | 17x |
havespn <- rowspns > 1 |
643 | 17x |
if (!any(havespn)) { |
644 | 8x |
return(rep(0, times = length(rowspns))) |
645 |
} |
|
646 | ||
647 | 9x |
pos <- 1 |
648 | 9x |
ngrp <- 1 |
649 | 9x |
ret <- numeric(length(rowspns)) |
650 | 9x |
while (pos < length(rowspns)) { |
651 | 58x |
spnval <- rowspns[pos] |
652 | 58x |
if (spnval > 1) { |
653 | 50x |
multipos <- seq(pos, pos + spnval - 1) |
654 | 50x |
val <- ngrp |
655 |
} else { |
|
656 | 8x |
multipos <- pos |
657 | 8x |
val <- 0 |
658 |
} |
|
659 | 58x |
ret[multipos] <- val |
660 | 58x |
pos <- pos + spnval |
661 | 58x |
ngrp <- ngrp + 1 |
662 |
} |
|
663 | 9x |
ret |
664 |
} |
|
665 | ||
666 |
fixup_bord_mat <- function(brdmat, hstrs) { |
|
667 |
## no lines between labels and their counts |
|
668 | 7x |
countcells <- matrix( |
669 | 7x |
grepl("N=", hstrs, fixed = TRUE), |
670 | 7x |
nrow = nrow(hstrs), |
671 | 7x |
ncol = ncol(hstrs) |
672 |
) |
|
673 | ||
674 | 7x |
countcoords <- which(countcells, arr.ind = TRUE) |
675 | 7x |
for (i in seq_len(nrow(countcoords))) { |
676 | ! |
brdmat[countcoords[i, "row"] - 1, countcoords[i, "col"]] <- 0 |
677 |
} |
|
678 | ||
679 | 7x |
brdmat[!nzchar(hstrs) | hstrs == " "] <- 0 |
680 | 7x |
brdmat[nrow(brdmat), ] <- 1 |
681 | 7x |
brdmat[seq_len(nrow(brdmat) - 1), 1] <- 0 |
682 | 7x |
brdmat |
683 |
} |
|
684 | ||
685 |
.make_header_bordmat <- function( |
|
686 |
obj, |
|
687 |
mpf = matrix_form(utils::head(obj, 1), expand_newlines = FALSE)) { |
|
688 | 7x |
spns <- mf_spans(mpf) |
689 | 7x |
nlh <- mf_nlheader(mpf) |
690 | 7x |
nrh <- mf_nrheader(mpf) |
691 | 7x |
stopifnot(nlh == nrh) |
692 | ||
693 | 7x |
hstrs <- mf_strings(mpf)[seq_len(nrh), , drop = FALSE] |
694 | 7x |
spns <- mf_spans(mpf)[seq_len(nrh), , drop = FALSE] |
695 | ||
696 | 7x |
brdmat <- do.call( |
697 | 7x |
rbind, |
698 | 7x |
lapply( |
699 | 7x |
seq_len(nrh), |
700 | 7x |
function(i) make_bordmat_row(spns[i, ]) |
701 |
) |
|
702 |
) |
|
703 | ||
704 | 7x |
brdmat <- fixup_bord_mat(brdmat, hstrs) |
705 | 7x |
brdmat |
706 |
} |
|
707 | ||
708 |
setGeneric( |
|
709 |
"make_header_bordmat", |
|
710 |
function( |
|
711 |
obj, |
|
712 |
mpf = matrix_form(utils::head(obj, 1), expand_newlines = FALSE)) { |
|
713 | 16x |
standardGeneric("make_header_bordmat") |
714 |
} |
|
715 |
) |
|
716 | ||
717 |
setMethod( |
|
718 |
"make_header_bordmat", |
|
719 |
c(obj = "ANY", mpf = "MatrixPrintForm"), |
|
720 |
.make_header_bordmat |
|
721 |
) |
|
722 | ||
723 |
setMethod( |
|
724 |
"make_header_bordmat", |
|
725 |
c(obj = "listing_df"), |
|
726 |
function(obj, mpf) matrix(1, nrow = 1, ncol = length(listing_dispcols(obj))) |
|
727 |
) |
|
728 | ||
729 |
setMethod( |
|
730 |
"make_header_bordmat", |
|
731 |
c(obj = "VTableTree", mpf = "missing"), |
|
732 |
function(obj, mpf) { |
|
733 | 7x |
make_header_bordmat( |
734 | 7x |
mpf = matrix_form( |
735 | 7x |
utils::head(obj, 1), |
736 | 7x |
expand_newlines = FALSE |
737 |
) |
|
738 |
) |
|
739 |
} |
|
740 |
) |
1 |
#' Non-blank Sentinel |
|
2 |
#' |
|
3 |
#' @keywords internal |
|
4 |
non_blank_sentinel <- structure("", class = "non_blank_sentinel") |
|
5 | ||
6 |
#' Get Control Subset |
|
7 |
#' |
|
8 |
#' Retrieves a subset of the DataFrame based on treatment variable and control group. |
|
9 |
#' |
|
10 |
#' @param df Data frame to subset. |
|
11 |
#' @param trt_var Treatment variable name. |
|
12 |
#' @param ctrl_grp Control group value. |
|
13 |
#' @return Subset of the data frame. |
|
14 |
#' @keywords internal |
|
15 |
get_ctrl_subset <- function(df, trt_var, ctrl_grp) { |
|
16 | 303x |
df[df[[trt_var]] == ctrl_grp, ] |
17 |
} |
|
18 | ||
19 | ||
20 |
# sfunction to perform counting of records or subjects on an incoming df and .alt_df |
|
21 | ||
22 |
#' Null Function |
|
23 |
#' |
|
24 |
#' A function that returns NULL. |
|
25 |
#' |
|
26 |
#' @return NULL |
|
27 |
#' @keywords internal |
|
28 |
null_fn <- function(...) { |
|
29 | ! |
NULL |
30 |
} |
|
31 | ||
32 | ||
33 |
#' Create Alternative Data Frame |
|
34 |
#' |
|
35 |
#' Creates an alternative data frame based on the current split context. |
|
36 |
#' |
|
37 |
#' @param .spl_context Current split context. |
|
38 |
#' @param .df_row Current data frame row. |
|
39 |
#' @param denomdf Denominator data frame. |
|
40 |
#' @param denom_by Denominator grouping variable. |
|
41 |
#' @param id Identifier variable. |
|
42 |
#' @param variables Variables to include in the analysis. |
|
43 |
#' @param denom Denominator type. |
|
44 |
#' @return Grand parent dataset. |
|
45 |
#' @noRd |
|
46 |
#' @keywords Internal |
|
47 |
h_create_altdf <- function(.spl_context, .df_row, denomdf, denom_by = NULL, id, variables, denom) { |
|
48 |
### parent df in the current row-split (all col splits are still in) |
|
49 | 483x |
pardf <- .spl_context$full_parent_df[[NROW(.spl_context)]] |
50 | ||
51 |
## if no denomdf defined, use input if you want to use alt_source df, you better pass on the same dataframe as |
|
52 |
## denomdf in the function call |
|
53 | ||
54 | 483x |
colsplit <- .spl_context$cur_col_split[[1]] |
55 | 483x |
if (length(colsplit) == 1 && tolower(colsplit) == "total") { |
56 | 2x |
colsplit <- NULL |
57 |
} |
|
58 | ||
59 | 483x |
if (is.null(denomdf) || denom %in% c("n_df", "n_rowdf")) { |
60 |
#### once we have the rtables version >= 0.6.12 -- is.null(denomdf) only will happen when build_table has been |
|
61 |
#### used without alt_counts_df argument denom options c('N_col', 'n_df', 'n_altdf', 'N_colgroup', 'n_rowdf', |
|
62 |
#### 'n_parentdf') note that n_altdf always has a non-null denomdf N_colgroup and n_parentdf will be handled |
|
63 |
#### separately n_parentdf in h_denom_parentdf N_colgroup with h_colexpr_substr |
|
64 | ||
65 | 151x |
denomdf <- unique(.df_row[, c(id, variables$strata, colsplit, denom_by), drop = FALSE]) |
66 |
} |
|
67 | ||
68 |
# grand parent dataset starts of from denomdf -- not yet rowsplits this is for the risk diff columns to include all |
|
69 |
# subjects to start from |
|
70 | 483x |
gpardf <- denomdf |
71 | ||
72 | 483x |
nm_gpardf <- names(gpardf) |
73 | ||
74 | 483x |
sbgrpvar <- intersect(.spl_context$split, nm_gpardf) |
75 | ||
76 | 483x |
cursbgrp_value <- NULL |
77 | ||
78 | 483x |
if (!is.null(denom_by) && length(sbgrpvar) > 0) { |
79 |
### assumption: the subgroup is the first variable in the row-split if this assumption is not valid --- |
|
80 |
### re-design and let user pass in the subgroup variable |
|
81 | 99x |
sbgrpvar <- denom_by[1] |
82 | 99x |
cur_index <- which(.spl_context$split == sbgrpvar) |
83 | 99x |
if (length(cur_index) > 0) { |
84 | 99x |
cursbgrp_value <- .spl_context$value[[cur_index]] |
85 | ||
86 | 99x |
gpardf <- subset(denomdf, eval(denomdf[[sbgrpvar]] == cursbgrp_value)) |
87 |
} |
|
88 | 99x |
if (length(cur_index) == 0) { |
89 |
# cat(paste('sbgrpvar', sbgrpvar)) cat(paste( unique(.spl_context$value), sep = ', ')) |
|
90 |
} |
|
91 |
} |
|
92 | ||
93 |
## note this is a rowsplit only, not yet column split |
|
94 | 483x |
return(gpardf) |
95 |
} |
|
96 | ||
97 |
#' No Data to Report String |
|
98 |
#' |
|
99 |
#' A constant string used when there is no data to display in a table. |
|
100 |
#' This is used as a placeholder in tables when no data is available for a particular category. |
|
101 |
#' |
|
102 |
#' @return A character string with the value "No data to report". |
|
103 |
#' |
|
104 |
#' @export |
|
105 |
#' @keywords internal |
|
106 |
no_data_to_report_str <- "No data to report" |
|
107 | ||
108 | ||
109 |
#' Update Factor |
|
110 |
#' |
|
111 |
#' Updates a factor variable in a data frame based on specified values. |
|
112 |
#' |
|
113 |
#' @param df Data frame containing the variable to update. |
|
114 |
#' @param .var Variable name to update. |
|
115 |
#' @param val Values to keep. |
|
116 |
#' @param excl_levels Levels to exclude from the factor. |
|
117 |
#' @return Updated data frame. |
|
118 |
#' @noRd |
|
119 |
h_update_factor <- function(df, .var, val = NULL, excl_levels = NULL) { |
|
120 | 250x |
if (!is.factor(df[[.var]]) || (is.null(val) && is.null(excl_levels))) { |
121 | 34x |
return(df) |
122 |
} |
|
123 | ||
124 | 216x |
if ((!is.null(val) && !is.null(excl_levels))) { |
125 | ! |
stop("update_factor cannot be used with both val and excl_levels specified.") |
126 |
} |
|
127 | ||
128 | 216x |
all_levels <- levels(df[[.var]]) |
129 | ||
130 | 216x |
if (!is.null(val)) { |
131 | 215x |
exclude_levels <- all_levels[!(all_levels %in% val)] |
132 | 215x |
remaining_levels <- setdiff(all_levels, exclude_levels) |
133 |
} |
|
134 | 216x |
if (!is.null(excl_levels)) { |
135 | 1x |
exclude_levels <- all_levels[all_levels %in% excl_levels] |
136 | 1x |
remaining_levels <- setdiff(all_levels, exclude_levels) |
137 |
} |
|
138 | ||
139 |
## introduce level No data to report |
|
140 | 216x |
if (length(remaining_levels) == 0) { |
141 | ! |
remaining_levels <- no_data_to_report_str |
142 |
} |
|
143 | 216x |
df[[.var]] <- factor(as.character(df[[.var]]), levels = remaining_levels) |
144 | ||
145 | 216x |
return(df) |
146 |
} |
|
147 | ||
148 | ||
149 |
#' Extract Substring from Column Expression |
|
150 |
#' |
|
151 |
#' Retrieves the substring from a column expression related to a variable component. |
|
152 |
#' |
|
153 |
#' get substring from col_expr related to var component |
|
154 |
#' intended usage is on strings coming from .spl_context$cur_col_expr |
|
155 |
#' these strings are of type '!(is.na(var) & var %in% 'xxx') & !(is.na(var2) & var2 %in% 'xxx')' |
|
156 |
#' |
|
157 |
#' @param var Variable to extract from the expression. |
|
158 |
#' @param col_expr Column expression string. |
|
159 |
#' @return Substring corresponding to the variable. |
|
160 |
#' @noRd |
|
161 |
#' @keywords internal |
|
162 |
h_colexpr_substr <- function(var, col_expr) { |
|
163 |
# reconstructing the strings is not an option as doesn't work for combined columns facets |
|
164 | 87x |
cur_col_expr <- as.character(col_expr) |
165 | ||
166 | 87x |
if (!grepl(var, cur_col_expr, fixed = TRUE)) { |
167 | 1x |
return(NULL) |
168 |
} |
|
169 | ||
170 | 86x |
z2 <- paste0("(!is.na(", var, ") & ", var, " %in%") |
171 | 86x |
start <- regexpr(z2, cur_col_expr, fixed = TRUE) |
172 | 86x |
end <- start + attr(start, "match.length") - 1 |
173 | 86x |
z1 <- cbind(start, end) |
174 | ||
175 |
### start of the string |
|
176 | 86x |
z1_start <- z1[1, 1] |
177 | ||
178 |
### figure out what is the appropriate end expression has several & in string |
|
179 | ||
180 | 86x |
positions <- gregexpr("&", cur_col_expr, fixed = TRUE) |
181 | 86x |
start <- unlist(positions) |
182 | 86x |
end <- start + attr(positions[[1]], "match.length") - 1 |
183 | 86x |
z3 <- cbind(start, end)[, 1] |
184 | ||
185 |
### get the first & after the end of z3 ((!is.na(TRT01A) & TRT01A %in%) |
|
186 | 86x |
h <- z3 > z1[, 2] |
187 | 86x |
if (any(h)) { |
188 | 85x |
z4 <- which.max(h) |
189 | 85x |
z1_end <- z3[z4] - 3 |
190 |
} else { |
|
191 | 1x |
z1_end <- nchar(cur_col_expr) |
192 |
} |
|
193 | ||
194 | 86x |
col_expr_substr <- substr(cur_col_expr, z1_start, z1_end) |
195 | ||
196 | 86x |
return(col_expr_substr) |
197 |
} |
|
198 | ||
199 |
#' Get Denominator Parent Data Frame |
|
200 |
#' |
|
201 |
#' Retrieves the parent data frame based on denominator. |
|
202 |
#' |
|
203 |
#' @param .spl_context Current split context. |
|
204 |
#' @param denom Denominator type. |
|
205 |
#' @param denom_by Denominator grouping variable. |
|
206 |
#' @return Parent data frame. |
|
207 |
#' @noRd |
|
208 |
#' @keywords internal |
|
209 |
h_denom_parentdf <- function(.spl_context, denom, denom_by) { |
|
210 | 463x |
if (denom != "n_parentdf") { |
211 | 463x |
return(NULL) |
212 |
} |
|
213 | ! |
if (is.null(denom_by)) { |
214 | ! |
stop("denom_by must be specified when using denom = 'n_parentdf'.") |
215 |
} |
|
216 | ! |
split <- .spl_context$split |
217 | ! |
if (split[1] == "root" && !is.null(denom_by)) { |
218 | ! |
denom_by <- c("root", denom_by) |
219 |
} |
|
220 | ! |
split <- intersect(denom_by, split) |
221 | ! |
parentdf <- .spl_context$full_parent_df[[length(split)]] |
222 | ! |
return(parentdf) |
223 |
} |
|
224 | ||
225 |
#' Add New Levels to Data Frame |
|
226 |
#' |
|
227 |
#' Adds new factor levels to a specified variable in the data frame. |
|
228 |
#' |
|
229 |
#' @param df Data frame to update. |
|
230 |
#' @param .var Variable to which new levels will be added. |
|
231 |
#' @param new_levels List of new levels to add. |
|
232 |
#' @param addstr2levs String to add to new levels. |
|
233 |
#' @param new_levels_after Boolean, indicating if new levels should be added after existing levels. |
|
234 |
#' @return Updated data frame. |
|
235 |
#' @noRd |
|
236 |
#' @keywords internal |
|
237 |
h_df_add_newlevels <- function(df, .var, new_levels, addstr2levs = NULL, new_levels_after) { |
|
238 | 21x |
varvec <- df[[.var]] |
239 | ||
240 | 21x |
levs <- if (is.factor(varvec)) levels(varvec) else sort(unique(varvec)) |
241 | ||
242 | 21x |
if (!is.null(new_levels)) { |
243 |
## assumption: new_levels[[1]] : names of the new levels new_levels[[2]] : values of the new levels |
|
244 | 21x |
if (length(new_levels[[1]]) != length(new_levels[[2]])) { |
245 | ! |
stop( |
246 | ! |
"new_levels must be a list of length 2, second element must be a ", |
247 | ! |
"list of same length as first element." |
248 |
) |
|
249 |
} |
|
250 | 21x |
if (any(duplicated(unlist(new_levels[[2]])))) { |
251 | ! |
stop( |
252 | ! |
"unlist(new_levels[[2]]) contains duplicates: duplicate assignment ", |
253 | ! |
"of a level to a new level is not allowed." |
254 |
) |
|
255 |
} |
|
256 | ||
257 | 21x |
sortnewlevs <- unlist(lapply(X = new_levels[[2]], FUN = function(x) { |
258 | 21x |
min(which(levs %in% x)) - 0.1 |
259 |
})) |
|
260 | ||
261 | 21x |
if (new_levels_after) { |
262 | 17x |
sortnewlevs <- unlist(lapply(X = new_levels[[2]], FUN = function(x) { |
263 | 17x |
max(which(levs %in% x)) + 0.1 |
264 |
})) |
|
265 |
} |
|
266 | ||
267 | 21x |
newlevs <- c(levs, new_levels[[1]]) |
268 | 21x |
sortx <- c(seq_along(levs), sortnewlevs) |
269 | ||
270 | 21x |
newlevsx <- newlevs[order(sortx)] |
271 | ||
272 | 21x |
levs <- newlevsx |
273 | ||
274 |
### add newlevels to df |
|
275 | 21x |
for (i in seq_along(new_levels[[1]])) { |
276 | 21x |
levii <- unlist(new_levels[[2]][i]) |
277 | ||
278 | 21x |
addi <- df[df[[.var]] %in% levii, ] |
279 | 21x |
addi[[.var]] <- new_levels[[1]][i] |
280 | ||
281 | 21x |
df <- dplyr::bind_rows(df, addi) |
282 |
} |
|
283 |
} |
|
284 | ||
285 | 21x |
levlabels <- levs |
286 | 21x |
if (!is.null(addstr2levs)) { |
287 | 4x |
levlabels <- paste0(levs, addstr2levs) |
288 |
} |
|
289 | ||
290 | 21x |
df[[.var]] <- factor(as.character(df[[.var]]), levels = levs, labels = levlabels) |
291 | ||
292 | 21x |
return(df) |
293 |
} |
|
294 | ||
295 | ||
296 |
#' Get Treatment Variable Reference Path |
|
297 |
#' |
|
298 |
#' Retrieves the treatment variable reference path from the provided context. |
|
299 |
#' |
|
300 |
#' @param ref_path Reference path for treatment variable. |
|
301 |
#' @param .spl_context Current split context. |
|
302 |
#' @param df Data frame. |
|
303 |
#' @return List containing treatment variable details. |
|
304 |
#' @export |
|
305 |
h_get_trtvar_refpath <- function(ref_path, .spl_context, df) { |
|
306 | 49x |
checkmate::check_character(ref_path, min.len = 2L, names = "unnamed") |
307 | 49x |
checkmate::assert_true(length(ref_path) %% 2 == 0) # Even number of elements in ref_path. |
308 | ||
309 | 49x |
trt_var <- utils::tail(.spl_context$cur_col_split[[length(.spl_context$cur_col_split)]], n = 1) |
310 | 49x |
trt_var_refspec <- utils::tail(ref_path, n = 2)[1] |
311 | ||
312 | 49x |
checkmate::assert_true(identical(trt_var, trt_var_refspec)) |
313 | ||
314 |
# current group and ctrl_grp |
|
315 | 49x |
cur_trt_grp <- utils::tail(.spl_context$cur_col_split_val[[length(.spl_context$cur_col_split_val)]], n = 1) |
316 | 49x |
ctrl_grp <- utils::tail(ref_path, n = 1) |
317 | ||
318 |
### check that ctrl_grp is a level of the treatment variable, in case riskdiff is requested |
|
319 | 49x |
if (!ctrl_grp %in% levels(df[[trt_var]])) { |
320 | ! |
stop(paste0( |
321 | ! |
"control group specification in ref_path argument (", |
322 | ! |
ctrl_grp, |
323 | ! |
") is not a level of your treatment group variable (", |
324 | ! |
trt_var, |
325 |
")." |
|
326 |
)) |
|
327 |
} |
|
328 | 49x |
return(list(trt_var = trt_var, trt_var_refspec = trt_var_refspec, cur_trt_grp = cur_trt_grp, ctrl_grp = ctrl_grp)) |
329 |
} |
|
330 | ||
331 | ||
332 |
#' Update Data Frame Row |
|
333 |
#' |
|
334 |
#' Updates a row in the data frame based on various parameters. |
|
335 |
#' |
|
336 |
#' @param df_row Data frame row to update. |
|
337 |
#' @param .var Variable name to update. |
|
338 |
#' @param val Values to keep. |
|
339 |
#' @param excl_levels Levels to exclude from the factor. |
|
340 |
#' @param drop_levels Boolean, indicating if levels should be dropped. |
|
341 |
#' @param new_levels New levels to add. |
|
342 |
#' @param new_levels_after Boolean, indicating if new levels should be added after existing levels. |
|
343 |
#' @param addstr2levs String to add to new levels. |
|
344 |
#' @param label Label string. |
|
345 |
#' @param label_map Mapping for labels. |
|
346 |
#' @param labelstr Label string to replace. |
|
347 |
#' @param label_fstr Format string for labels. |
|
348 |
#' @param .spl_context Current split context. |
|
349 |
#' @return List containing updated data frames and values. |
|
350 |
#' @noRd |
|
351 |
#' @keywords internal |
|
352 |
h_upd_dfrow <- function( |
|
353 |
df_row, |
|
354 |
.var, |
|
355 |
val, |
|
356 |
excl_levels, |
|
357 |
drop_levels, |
|
358 |
new_levels, |
|
359 |
new_levels_after, |
|
360 |
addstr2levs, |
|
361 |
label, |
|
362 |
label_map, |
|
363 |
labelstr, |
|
364 |
label_fstr, |
|
365 |
.spl_context) { |
|
366 | 478x |
if (!is.null(label) && !is.null(label_map)) { |
367 | ! |
stop("a_freq_j: label and label_map cannot be used together.") |
368 |
} |
|
369 | ||
370 | 478x |
if (!is.null(labelstr) && (!is.null(val) || !is.null(new_levels) || !is.null(excl_levels))) { |
371 | ! |
stop("a_freq_j: val/excl_levels/new_levels cannot be used in a summarize_row_group call.") |
372 |
} |
|
373 | ||
374 | 478x |
if (!is.null(new_levels) && (is.null(.var) || is.na(.var))) { |
375 | ! |
stop("When using new_levels, var must be provided in analyze call with a_freq_j.") |
376 |
} |
|
377 | ||
378 | 478x |
if (!is.null(val) && drop_levels == TRUE) { |
379 | ! |
stop("argument val cannot be used together with drop_levels = TRUE.") |
380 |
} |
|
381 | ||
382 | 478x |
if (!is.null(val) && !is.null(excl_levels)) { |
383 | ! |
stop("argument val and excl_levels cannot be used together.") |
384 |
} |
|
385 | ||
386 |
## start making updates to factor information on incoming data |
|
387 | ||
388 | 478x |
if (!is.null(new_levels)) { |
389 | 18x |
df_row <- h_df_add_newlevels( |
390 | 18x |
df = df_row, |
391 | 18x |
.var = .var, |
392 | 18x |
new_levels = new_levels, |
393 | 18x |
new_levels_after = new_levels_after, |
394 | 18x |
addstr2levs = addstr2levs |
395 |
) |
|
396 |
} |
|
397 | ||
398 |
# if character var turn incoming data into factor, with levels from row-based df |
|
399 | 478x |
if (!is.na(.var) && is.character(df_row[[.var]]) && is.null(labelstr)) { |
400 | 20x |
levels <- sort(unique(df_row[[.var]])) |
401 | 20x |
df_row[[.var]] <- factor(df_row[[.var]], levels = levels) |
402 | 20x |
drop_levels <- FALSE |
403 |
} |
|
404 | ||
405 | 478x |
if (!is.null(labelstr)) { |
406 | 177x |
single_level <- labelstr |
407 | ||
408 | 177x |
if (!is.null(label_fstr) && grepl("%s", label_fstr, fixed = TRUE)) { |
409 | 21x |
single_level <- sprintf(label_fstr, single_level) |
410 |
} |
|
411 | ||
412 | 177x |
df_row[[.var]] <- as.character(df_row[[.var]]) |
413 | 177x |
df_row[[.var]][!is.na(df_row[[.var]])] <- single_level |
414 | ||
415 | 177x |
df_row[[.var]] <- factor(as.character(df_row[[.var]]), levels = single_level) |
416 | 177x |
drop_levels <- FALSE |
417 |
} |
|
418 | ||
419 | 478x |
if (drop_levels) { |
420 | 15x |
obs_levs <- unique(df_row[[.var]]) |
421 | 15x |
obs_levs <- intersect(levels(df_row[[.var]]), obs_levs) |
422 | ||
423 | ! |
if (!is.null(excl_levels)) obs_levs <- setdiff(obs_levs, excl_levels) |
424 | ||
425 | 15x |
val <- obs_levs |
426 | 15x |
excl_levels <- NULL |
427 |
} |
|
428 | ||
429 | 478x |
if (!is.null(val)) { |
430 |
# do not yet restrict to val levels, only update factors to the requested levels df_row <- |
|
431 | 35x |
df_row <- h_update_factor(df_row, .var, val) |
432 |
} |
|
433 | 478x |
if (!is.null(excl_levels)) { |
434 |
# do not yet exclude the level specified in excl_levels, only update factors to remove requested levels df_row |
|
435 | ||
436 | ! |
df_row <- h_update_factor(df_row, .var, excl_levels = excl_levels) |
437 | ! |
val <- levels(df_row[[.var]]) |
438 |
} |
|
439 | ||
440 |
## update data with level coming from label -- important that this is done after restriction to val levels!!! |
|
441 | 478x |
if (!is.null(label)) { |
442 |
# similar to processing when labelstr |
|
443 | 17x |
single_level <- label |
444 | 17x |
df_row[[.var]] <- as.character(df_row[[.var]]) |
445 | 17x |
df_row[[.var]][!is.na(df_row[[.var]])] <- single_level |
446 | 17x |
val <- label |
447 | 17x |
df_row[[.var]] <- factor(df_row[[.var]], levels = single_level) |
448 |
} |
|
449 | ||
450 |
# now update labels coming from label_map |
|
451 | 478x |
if (!is.null(label_map)) { |
452 | 15x |
split_info <- .spl_context[c("split", "value")] |
453 | 15x |
new_labels <- h_get_label_map(levels(df_row[[.var]]), label_map, .var, split_info) |
454 | ||
455 | 15x |
df_row[[.var]] <- factor(as.character(df_row[[.var]]), levels = levels(df_row[[.var]]), labels = new_labels) |
456 | ||
457 | 15x |
val <- new_labels |
458 |
} |
|
459 | ||
460 |
# now apply the updated factors from df_row to df as well due to drop_levels = TRUE can have different results when |
|
461 |
# applying all of the above to df |
|
462 | 478x |
col_expr <- .spl_context$cur_col_expr[[1]] |
463 | 478x |
df <- subset(df_row, eval(col_expr)) |
464 | ||
465 | 478x |
return(list(df_row = df_row, df = df, val = val)) |
466 |
} |
|
467 | ||
468 | ||
469 |
#' Get Label Map |
|
470 |
#' |
|
471 |
#' Maps labels based on the provided label map and split context. |
|
472 |
#' |
|
473 |
#' @param .labels Current labels. |
|
474 |
#' @param label_map Mapping for labels. |
|
475 |
#' @param .var Variable name. |
|
476 |
#' @param split_info Current split information. |
|
477 |
#' @return Mapped labels. |
|
478 |
#' @noRd |
|
479 |
#' @keywords internal |
|
480 |
h_get_label_map <- function(.labels, label_map, .var, split_info) { |
|
481 | 17x |
if (!is.null(label_map)) { |
482 | 17x |
if (!all(c("split", "value") %in% names(split_info))) { |
483 | ! |
stop("split_info does not contain required elements.") |
484 |
} |
|
485 | ||
486 |
### if label_map has a variable from row split, apply current splits on label_map tibble as well |
|
487 | 17x |
rowsplits <- split_info$split |
488 | ||
489 | 17x |
label_map_split <- intersect(names(label_map), rowsplits) |
490 | ||
491 | 17x |
if (!(length(label_map_split) == 0)) { |
492 | 1x |
for (i in seq_along(label_map_split)) { |
493 | 1x |
cursplvar <- label_map_split[i] |
494 | 1x |
cid <- match(cursplvar, rowsplits) |
495 | 1x |
cursplval <- split_info$value[cid] |
496 | ||
497 | 1x |
label_map <- label_map[label_map[[cursplvar]] == cursplval, ] |
498 |
} |
|
499 |
} |
|
500 | ||
501 | 17x |
if ("var" %in% names(label_map)) { |
502 | ! |
label_map <- label_map[label_map[["var"]] == .var, ] |
503 |
} |
|
504 | ||
505 | 17x |
.labels <- label_map$label[match(.labels, label_map$value)] |
506 | ||
507 | 17x |
if (anyNA(.labels)) { |
508 | ! |
stop("got a label map that doesn't provide labels for all values.") |
509 |
} |
|
510 |
} |
|
511 | ||
512 | 17x |
return(.labels) |
513 |
} |
|
514 | ||
515 | ||
516 |
#' A Frequency Data Preparation Function |
|
517 |
#' |
|
518 |
#' Prepares frequency data for analysis. |
|
519 |
#' @noRd |
|
520 |
#' @param df Data frame to prepare. |
|
521 |
#' @param labelstr Label string. |
|
522 |
#' @param .var Variable name. |
|
523 |
#' @param val Values for analysis. |
|
524 |
#' @param drop_levels Boolean, indicating if levels should be dropped. |
|
525 |
#' @param excl_levels Levels to exclude. |
|
526 |
#' @param new_levels New levels to add. |
|
527 |
#' @param new_levels_after Boolean for adding new levels after existing ones. |
|
528 |
#' @param addstr2levs String to add to new levels. |
|
529 |
#' @param .df_row Current data frame row. |
|
530 |
#' @param .spl_context Current split context. |
|
531 |
#' @param .N_col Number of columns. |
|
532 |
#' @param id Identifier variable. |
|
533 |
#' @param denom Denominator types. |
|
534 |
#' @param variables Variables to include in the analysis. |
|
535 |
#' @param label Label string. |
|
536 |
#' @param label_fstr Formatted label string. |
|
537 |
#' @param label_map Mapping for labels. |
|
538 |
#' @param .alt_df_full Alternative full data frame. |
|
539 |
#' @param denom_by Denominator grouping variable. |
|
540 |
#' @param .stats Statistics to compute. |
|
541 |
#' @return List containing prepared data frames and values. |
|
542 | ||
543 |
h_a_freq_dataprep <- function( |
|
544 |
df, |
|
545 |
labelstr = NULL, |
|
546 |
.var = NA, |
|
547 |
val = NULL, |
|
548 |
drop_levels = FALSE, |
|
549 |
excl_levels = NULL, |
|
550 |
new_levels = NULL, |
|
551 |
new_levels_after = FALSE, |
|
552 |
addstr2levs = NULL, |
|
553 |
.df_row, |
|
554 |
.spl_context, |
|
555 |
.N_col, |
|
556 |
id = "USUBJID", |
|
557 |
denom = c("N_col", "n_df", "n_altdf", "N_colgroup", "n_rowdf", "n_parentdf"), |
|
558 |
variables, |
|
559 |
label = NULL, |
|
560 |
label_fstr = NULL, |
|
561 |
label_map = NULL, |
|
562 |
.alt_df_full = NULL, |
|
563 |
denom_by = NULL, |
|
564 |
.stats) { |
|
565 | 463x |
denom <- match.arg(denom) |
566 | ||
567 | 463x |
df <- df[!is.na(df[[.var]]), ] |
568 | 463x |
.df_row <- .df_row[!is.na(.df_row[[.var]]), ] |
569 | ||
570 |
# if no stats requested, get all stats |
|
571 | 463x |
.stats <- junco_get_stats("a_freq_j", stats_in = .stats, custom_stats_in = NULL) |
572 | ||
573 |
### combine all preprocessing of incoming df/.df_row in one function do this outside stats derivation functions |
|
574 |
### (s_freq_j/) use all of val/excl_levels/drop_levels//new_levels/ label/label_map/labelstr/label_fstr |
|
575 | 463x |
upd_dfrow <- h_upd_dfrow( |
576 | 463x |
df_row = .df_row, |
577 | 463x |
.var = .var, |
578 | 463x |
val = val, |
579 | 463x |
excl_levels = excl_levels, |
580 | 463x |
drop_levels = drop_levels, |
581 | 463x |
new_levels = new_levels, |
582 | 463x |
new_levels_after = new_levels_after, |
583 | 463x |
addstr2levs = addstr2levs, |
584 | 463x |
label = label, |
585 | 463x |
label_map = label_map, |
586 | 463x |
labelstr = labelstr, |
587 | 463x |
label_fstr = label_fstr, |
588 | 463x |
.spl_context = .spl_context |
589 |
) |
|
590 | ||
591 | 463x |
.df_row <- upd_dfrow$df_row |
592 | 463x |
df <- upd_dfrow$df |
593 | ||
594 | 463x |
val <- upd_dfrow$val |
595 | ||
596 |
# from here onwards proceed with drop_levels = FALSE action has already been done in h_upd_dfrow, and proper |
|
597 |
# observed values will be passed to val for s_freq_j |
|
598 | 463x |
drop_levels <- FALSE |
599 | 463x |
excl_levels <- NULL |
600 | ||
601 |
### derive appropriate alt_df based upon .spl_context and .alt_df_full note that only row-based splits are done for |
|
602 |
### now only for variables from the first split_rows_by |
|
603 | 463x |
alt_df <- h_create_altdf( |
604 | 463x |
.spl_context, |
605 | 463x |
.df_row, |
606 | 463x |
.alt_df_full, |
607 | 463x |
denom_by = denom_by, |
608 | 463x |
id = id, |
609 | 463x |
variables = variables, |
610 | 463x |
denom = denom |
611 |
) |
|
612 | ||
613 | 463x |
new_denomdf <- alt_df |
614 | ||
615 | 463x |
parentdf <- h_denom_parentdf(.spl_context, denom, denom_by) |
616 | 463x |
if (denom == "n_parentdf") { |
617 | ! |
new_denomdf <- parentdf |
618 |
} |
|
619 | ||
620 | 463x |
return(list( |
621 | 463x |
df = df, |
622 | 463x |
.df_row = .df_row, |
623 | 463x |
val = val, |
624 | 463x |
drop_levels = drop_levels, |
625 | 463x |
excl_levels = excl_levels, |
626 | 463x |
alt_df = alt_df, |
627 | 463x |
parentdf = parentdf, |
628 | 463x |
new_denomdf = new_denomdf, |
629 | 463x |
.stats = .stats |
630 |
)) |
|
631 |
} |
|
632 | ||
633 | ||
634 |
#' Frequency Preparation in Rows |
|
635 |
#' |
|
636 |
#' Prepares frequency data in rows based on provided parameters. |
|
637 |
#' @noRd |
|
638 |
#' @param x_stats Statistics data. |
|
639 |
#' @param .stats_adj Adjusted statistics. |
|
640 |
#' @param .formats Format settings. |
|
641 |
#' @param labelstr Label string. |
|
642 |
#' @param label_fstr Formatted label string. |
|
643 |
#' @param label Label string. |
|
644 |
#' @param .indent_mods Indentation settings. |
|
645 |
#' @param .labels_n Labels for statistics. |
|
646 |
#' @param na_str String for NA values. |
|
647 |
#' @return List containing prepared statistics, formats, labels, and indentation. |
|
648 |
#' @noRd |
|
649 |
#' @keywords internal |
|
650 |
h_a_freq_prepinrows <- function( |
|
651 |
x_stats, |
|
652 |
.stats_adj, |
|
653 |
.formats, |
|
654 |
labelstr, |
|
655 |
label_fstr, |
|
656 |
label, |
|
657 |
.indent_mods, |
|
658 |
.labels_n, |
|
659 |
na_str) { |
|
660 |
# Fill in formatting defaults |
|
661 | ||
662 | 463x |
x_stats <- x_stats[.stats_adj] |
663 | ||
664 | 463x |
levels_per_stats <- lapply(x_stats, names) |
665 | ||
666 |
# Fill in formats/indents/labels with custom input and defaults |
|
667 | 463x |
.formats <- junco_get_formats_from_stats(.stats_adj, .formats, levels_per_stats) |
668 | ||
669 |
# lbls |
|
670 | 463x |
all_stats <- junco_get_stats("a_freq_j", stats_in = NULL, custom_stats_in = NULL) |
671 | 463x |
count_rr_stats <- grep("(count|rr_ci)", all_stats, value = TRUE) |
672 | 463x |
n_stats <- setdiff(all_stats, count_rr_stats) |
673 | 463x |
n_stats <- .stats_adj[.stats_adj %in% n_stats] |
674 | ||
675 | 463x |
if (length(.stats_adj) == 1 && length(n_stats) == 1) { |
676 | 45x |
if (!is.null(labelstr) && (!is.null(label))) { |
677 | ! |
lbls <- label |
678 | 45x |
} else if (!is.null(labelstr) && !is.null(.labels_n)) { |
679 | ! |
lbls <- .labels_n |
680 | 45x |
} else if (!is.null(labelstr) && is.null(label_fstr)) { |
681 | 42x |
lbls <- labelstr |
682 | 3x |
} else if (!is.null(labelstr) && !is.null(label_fstr) && grepl("%s", label_fstr, fixed = TRUE)) { |
683 | ! |
lbls <- sprintf(label_fstr, labelstr) |
684 | 3x |
} else if (!is.null(label)) { |
685 | 3x |
lbls <- label |
686 |
} else { |
|
687 | ! |
.labels <- .labels_n |
688 | ! |
lbls <- junco_get_labels_from_stats(.stats_adj, .labels, levels_per_stats) |
689 |
} |
|
690 |
} else { |
|
691 | 418x |
if (length(n_stats) > 1 && is.null(.labels_n)) { |
692 | ! |
msg <- paste0( |
693 | ! |
"recommend to specify non-null .labels_n argument when multiple n_stats selected (", |
694 | ! |
paste0(n_stats, collapse = ", "), |
695 |
")" |
|
696 |
) |
|
697 | ! |
message(msg) |
698 |
} |
|
699 |
### labels from the non n-stat statistics are present on levels_per_stats labels for the n-stat statistics come |
|
700 |
### from .labels_n (user) or from defaults |
|
701 | 418x |
.labels <- .labels_n |
702 | 418x |
lbls <- junco_get_labels_from_stats(.stats_adj, .labels, levels_per_stats) |
703 |
} |
|
704 | 463x |
.labels <- lbls |
705 | 463x |
.labels <- .unlist_keep_nulls(.labels) |
706 | ||
707 |
# indents |
|
708 | 463x |
.indent_mods_orig <- .indent_mods |
709 | 463x |
.indent_mods <- junco_get_indents_from_stats(.stats_adj, .indent_mods, levels_per_stats) |
710 | ||
711 |
# adjust indents when n_stat is included and no indents where passed by user |
|
712 | 463x |
if (length(n_stats) >= 1 && is.null(.indent_mods_orig)) { |
713 | 45x |
.indent_mods_new <- sapply( |
714 | 45x |
names(.indent_mods), |
715 | 45x |
FUN = function(x) { |
716 | 45x |
ret <- 0 |
717 | ! |
if (!(x %in% n_stats)) ret <- 1 |
718 | 45x |
return(ret) |
719 |
}, |
|
720 | 45x |
simplify = FALSE, |
721 | 45x |
USE.NAMES = TRUE |
722 |
) |
|
723 | 45x |
.indent_mods <- .indent_mods_new |
724 |
} |
|
725 | ||
726 | 463x |
.indent_mods <- .unlist_keep_nulls(.indent_mods) |
727 | ||
728 |
# .format_na_strs processing if na_str = c(NA, NA, NA) this will ensure the ci (NA, NA, NA) will be represented as |
|
729 |
# NE (NE, NE) the value NE is defined as the default to replace NA in our jjcs format |
|
730 | ||
731 | 463x |
if (!is.null(na_str)) { |
732 |
# Create a list of na_str values for each format in .formats |
|
733 | 463x |
.format_na_strs <- lapply(names(.formats), FUN = function(x) { |
734 | 2287x |
na_str |
735 |
}) |
|
736 | 463x |
names(.format_na_strs) <- names(.formats) |
737 |
} else { |
|
738 | ! |
.format_na_strs <- NULL |
739 |
} |
|
740 | ||
741 |
# Unlist stats + names |
|
742 | 463x |
x_stats <- .unlist_keep_nulls(x_stats) |
743 | 463x |
names(x_stats) <- names(.formats) |
744 | ||
745 | 463x |
return(list( |
746 | 463x |
x_stats = x_stats, |
747 | 463x |
.formats = .formats, |
748 | 463x |
.labels = .labels, |
749 | 463x |
.indent_mods = .indent_mods, |
750 | 463x |
.format_na_strs = .format_na_strs |
751 |
)) |
|
752 |
} |
|
753 | ||
754 |
#' Subset Combination |
|
755 |
#' |
|
756 |
#' Subsets a data frame based on specified combination criteria. |
|
757 |
#' @noRd |
|
758 |
#' @param df Data frame to subset. |
|
759 |
#' @param combosdf Data frame containing combinations. |
|
760 |
#' @param do_not_filter Variables to not filter. |
|
761 |
#' @param filter_var Variable used for filtering. |
|
762 |
#' @param flag_var Flag variable for filtering. |
|
763 |
#' @param colid Column ID for identification. |
|
764 |
#' @return Subsetted data frame. |
|
765 | ||
766 |
h_subset_combo <- function(df, combosdf, do_not_filter, filter_var, flag_var, colid) { |
|
767 |
### this is the core code for subsetting to appropriate combo level |
|
768 | 15x |
if (!is.null(flag_var)) { |
769 | 14x |
df <- df[df[[flag_var]] == "Y", ] |
770 |
} |
|
771 | ||
772 |
# get the string related to combosdf text from colid it is the last part of the column id after the . eg 'Active |
|
773 |
# Study Agent.Apalutamide.Thru 3 months' colid_str is 'Thru 3 months' colid_str <- stringr::str_split_i(colid, |
|
774 |
# '\\.', i = -1) |
|
775 | 15x |
colid_str <- tail(unlist(strsplit(colid, "\\.")), 1) |
776 | ||
777 | 15x |
filter_val <- combosdf[combosdf$valname == colid_str, ]$label |
778 | ||
779 | 15x |
if (!(colid_str %in% do_not_filter)) { |
780 | 11x |
df <- df |> |
781 | 11x |
dplyr::filter(get(filter_var) == filter_val) |
782 |
} |
|
783 | ||
784 | 15x |
return(df) |
785 |
} |
1 |
#' rm_other_facets_fact |
|
2 |
#' @param nm character. names of facets to keep. all other facets will be |
|
3 |
#' removed |
|
4 |
#' @returns a function suitable for use within the `post` portion make_split_fun |
|
5 | ||
6 |
rm_other_facets_fact <- function(nm) { |
|
7 | 1x |
function(ret, spl, .spl_context, fulldf) { |
8 | 1x |
keep <- which(names(ret$values) %in% nm) |
9 | 1x |
stopifnot(length(keep) > 0) |
10 |
# values already have subsetting expressions on them |
|
11 | 1x |
make_split_result(ret$values[keep], ret$datasplit[keep], ret$labels[keep]) |
12 |
} |
|
13 |
} |
|
14 | ||
15 |
#' @name real_add_overall_facet |
|
16 |
#' |
|
17 |
#' @title Add Overall Facet |
|
18 |
#' |
|
19 |
#' @description |
|
20 |
#' A function to help add an overall facet to your tables |
|
21 |
#' @param name character(1). Name/virtual 'value' for the new facet |
|
22 |
#' @param label character(1). label for the new facet |
|
23 |
#' @note current add_overall_facet is bugged, can use that directly after it's fixed |
|
24 |
#' https://github.com/insightsengineering/rtables/issues/768 |
|
25 |
#' @examples |
|
26 |
#' |
|
27 |
#' splfun <- make_split_fun(post = list(real_add_overall_facet('Total', 'Total'))) |
|
28 |
#' @export |
|
29 |
#' @returns function usable directly as a split function. |
|
30 |
#' |
|
31 |
real_add_overall_facet <- function(name, label) { |
|
32 | 1x |
function(ret, spl, .spl_context, fulldf) { |
33 | 1x |
add_to_split_result( |
34 | 1x |
ret, |
35 | 1x |
values = name, |
36 | 1x |
datasplit = stats::setNames(list(fulldf), name), |
37 | 1x |
labels = stats::setNames(label, name), |
38 | 1x |
subset_exprs = quote(TRUE) |
39 |
) |
|
40 |
} |
|
41 |
} |
|
42 | ||
43 |
#' @name make_combo_splitfun |
|
44 |
#' |
|
45 |
#' @title Split Function Helper |
|
46 |
#' |
|
47 |
#' @description |
|
48 |
#' A function which aids the construction for users to create their own split function for combined columns |
|
49 |
#' @param nm character(1). Name/virtual 'value' for the new facet |
|
50 |
#' @param label character(1). label for the new facet |
|
51 |
#' @param levels character or NULL. The levels to combine into the new facet, |
|
52 |
#' or NULL, indicating the facet should include all incoming data. |
|
53 |
#' @param rm_other_facets logical(1). Should facets other than the newly |
|
54 |
#' created one be removed. Defaults to `TRUE` |
|
55 |
#' @export |
|
56 |
#' @returns function usable directly as a split function. |
|
57 |
#' @examples |
|
58 |
#' aesevall_spf <- make_combo_splitfun(nm = 'AESEV_ALL', label = 'Any AE', levels = NULL) |
|
59 |
#' |
|
60 |
make_combo_splitfun <- function(nm, label = nm, levels = NULL, rm_other_facets = TRUE) { |
|
61 | 1x |
if (is.null(levels)) { |
62 | ! |
fn <- real_add_overall_facet(name = nm, label = label) |
63 |
} else { |
|
64 | 1x |
fn <- add_combo_facet(name = nm, label = label, levels = levels) |
65 |
} |
|
66 | 1x |
if (rm_other_facets) { |
67 | 1x |
rmfun <- rm_other_facets_fact(nm) |
68 |
} else { |
|
69 | ! |
rmfun <- NULL |
70 |
} |
|
71 | 1x |
make_split_fun(post = c(list(fn), if (rm_other_facets) list(rmfun))) |
72 |
} |
|
73 | ||
74 |
blank_regex <- "^[[:space:]]*$" |
|
75 | ||
76 |
combine_nonblank <- function(name, label) { |
|
77 | ! |
function(ret, spl, .spl_context, fulldf) { |
78 | ! |
df <- fulldf[!grepl(blank_regex, fulldf[[spl_variable(spl)]]), ] |
79 | ! |
add_to_split_result( |
80 | ! |
ret, |
81 | ! |
values = name, |
82 | ! |
datasplit = stats::setNames(list(df), name), |
83 | ! |
labels = stats::setNames(label, name) |
84 |
) |
|
85 |
} |
|
86 |
} |
|
87 | ||
88 |
rm_blank_levels <- function(df, spl, ...) { |
|
89 | ! |
var <- spl_variable(spl) |
90 | ! |
varvec <- df[[var]] |
91 | ! |
oldlevs <- levels(varvec) |
92 | ! |
newlevs <- oldlevs[!grepl(blank_regex, oldlevs)] |
93 | ! |
df <- df[!grepl(blank_regex, varvec), ] |
94 | ! |
df[[var]] <- factor(df[[var]], levels = newlevs) |
95 | ! |
df |
96 |
} |
|
97 | ||
98 | ||
99 |
find_torm <- function(spl_ret, torm, torm_regex, keep_matches) { |
|
100 | 2x |
if (!is.null(torm)) { |
101 | 2x |
ans_lgl <- names(spl_ret$datasplit) %in% torm |
102 |
} else { |
|
103 | ! |
ans_lgl <- grepl(torm_regex, names(spl_ret$datasplit)) |
104 |
} |
|
105 | 2x |
if (keep_matches) { |
106 | ! |
ans_lgl <- !ans_lgl |
107 |
} |
|
108 | 2x |
which(ans_lgl) |
109 |
} |
|
110 | ||
111 |
.check_rem_cond <- function(cond_str, cond_regex, spl_ctx, pos, type) { |
|
112 | 4x |
if (is.null(cond_str) && is.null(cond_regex)) { |
113 | ! |
return(TRUE) |
114 |
} |
|
115 | 4x |
if (!is.null(cond_str) && !is.null(cond_regex)) { |
116 | ! |
stop( |
117 | ! |
"Got both ", |
118 | ! |
paste(paste0(type, c("", "_regex")), collapse = " and "), |
119 | ! |
". Please specify at most one of these." |
120 |
) |
|
121 |
} |
|
122 | 4x |
ctx_data <- spl_ctx[[type]][pos] |
123 | 4x |
if (!is.null(cond_str)) { |
124 | 4x |
any(cond_str %in% ctx_data) |
125 |
} else { |
|
126 | ! |
any(grepl(cond_regex, ctx_data)) |
127 |
} |
|
128 |
} |
|
129 | ||
130 |
## handle support for NA and negative positions only called once but code is nicer with it factored out here |
|
131 |
resolve_ancestor_pos <- function(anc_pos, numrows) { |
|
132 | 2x |
if (is.na(anc_pos)) { |
133 | 2x |
anc_pos <- seq_len(numrows) |
134 | ! |
} else if (any(anc_pos < 0)) { |
135 | ! |
if (!all(anc_pos < 0)) { |
136 | ! |
stop("Got mix of negative and non-negative values for ancestor_pos; ", "this is not supported.") |
137 |
} |
|
138 |
## direct parent is actually NROW(.spl_context) so avoid off-by-1 error with the + 1 here |
|
139 | ! |
anc_pos <- numrows - (anc_pos + 1) |
140 |
} |
|
141 | 2x |
anc_pos |
142 |
} |
|
143 | ||
144 |
#' @name cond_rm_facets |
|
145 |
#' @title Conditional Removal of Facets |
|
146 |
#' @param facets character or NULL. Vector of facet names to be removed |
|
147 |
#' if condition(s) are met |
|
148 |
#' @param facets_regex character(1). Regular expression to identify facet |
|
149 |
#' names to be removed if condition(s) are met. |
|
150 |
#' @param ancestor_pos numeric(1). Row in spl_context to check the condition |
|
151 |
#' within. E.g., 1 represents the first split, 2 represents the second split |
|
152 |
#' nested within the first, etc. NA specifies that the conditions |
|
153 |
#' should be checked at all split levels. Negative integers indicate position |
|
154 |
#' counting back from the current one, e.g., -1 indicates the direct parent |
|
155 |
#' (most recent split before this one). Negative and positive/NA positions |
|
156 |
#' cannot be mixed. |
|
157 |
#' @param split character(1) or NULL. If specified, name of the split |
|
158 |
#' at position `ancestor_pos` must be identical to this value for |
|
159 |
#' the removal condition to be met. |
|
160 |
#' @param split_regex character(1) or NULL. If specified, a regular expression |
|
161 |
#' the name of the split at position `ancestor_pos` must match for |
|
162 |
#' the removal condition to be met. Cannot be specified at the same time |
|
163 |
#' as `split`. |
|
164 |
#' @param value character(1) or NULL. If specified, split (facet) value |
|
165 |
#' at position `ancestor_pos` must be identical to this value for |
|
166 |
#' removal condition to be met. |
|
167 |
#' @param value_regex character(1) or NULL. If specified, a regular expression |
|
168 |
#' the value of the split at position `ancestor_pos` must match for |
|
169 |
#' the removal condition to be met. Cannot be specified at the same time |
|
170 |
#' as `value`. |
|
171 |
#' @param keep_matches logical(1). Given the specified condition is met, |
|
172 |
#' should the facets removed be those matching `facets`/`facets_regex` |
|
173 |
#' (`FALSE`, the default), or those *not* matching (`TRUE`). |
|
174 |
#' |
|
175 |
#' @details Facet removal occurs when the specified condition(s) |
|
176 |
#' on the split(s) and or value(s) are met within at least one |
|
177 |
#' of the split_context rows indicated by `ancestor_pos`; otherwise |
|
178 |
#' the set of facets is returned unchanged. |
|
179 |
#' |
|
180 |
#' If facet removal is performed, either *all* facets which match `facets` (or |
|
181 |
#' `facets_regex` will be removed ( the default `keep_matches == FALSE` |
|
182 |
#' case), or all *non-matching* facets will be removed (when |
|
183 |
#' `keep_matches_only == TRUE`). |
|
184 |
#' |
|
185 |
#' @note A degenerate table is likely to be returned if all facets |
|
186 |
#' are removed. |
|
187 |
#' |
|
188 |
#' @returns a function suitable for use in `make_split_fun`'s |
|
189 |
#' `post` argument which encodes the specified condition. |
|
190 |
#' @export |
|
191 |
#' @examples |
|
192 |
#' |
|
193 |
#' rm_a_from_placebo <- cond_rm_facets( |
|
194 |
#' facets = "A", |
|
195 |
#' ancestor_pos = NA, |
|
196 |
#' value_regex = "Placeb", |
|
197 |
#' split = "ARM" |
|
198 |
#' ) |
|
199 |
#' mysplit <- make_split_fun(post = list(rm_a_from_placebo)) |
|
200 |
#' |
|
201 |
#' lyt <- basic_table() |> |
|
202 |
#' split_cols_by("ARM") |> |
|
203 |
#' split_cols_by("STRATA1", split_fun = mysplit) |> |
|
204 |
#' analyze("AGE", mean, format = "xx.x") |
|
205 |
#' build_table(lyt, ex_adsl) |
|
206 |
#' |
|
207 |
#' rm_bc_from_combo <- cond_rm_facets( |
|
208 |
#' facets = c("B", "C"), |
|
209 |
#' ancestor_pos = -1, |
|
210 |
#' value_regex = "Combi" |
|
211 |
#' ) |
|
212 |
#' mysplit2 <- make_split_fun(post = list(rm_bc_from_combo)) |
|
213 |
#' |
|
214 |
#' lyt2 <- basic_table() |> |
|
215 |
#' split_cols_by("ARM") |> |
|
216 |
#' split_cols_by("STRATA1", split_fun = mysplit2) |> |
|
217 |
#' analyze("AGE", mean, format = "xx.x") |
|
218 |
#' tbl2 <- build_table(lyt2, ex_adsl) |
|
219 |
#' tbl2 |
|
220 |
#' |
|
221 |
#' rm_bc_from_combo2 <- cond_rm_facets( |
|
222 |
#' facets_regex = "^A$", |
|
223 |
#' ancestor_pos = -1, |
|
224 |
#' value_regex = "Combi", |
|
225 |
#' keep_matches = TRUE |
|
226 |
#' ) |
|
227 |
#' mysplit3 <- make_split_fun(post = list(rm_bc_from_combo2)) |
|
228 |
#' |
|
229 |
#' lyt3 <- basic_table() |> |
|
230 |
#' split_cols_by("ARM") |> |
|
231 |
#' split_cols_by("STRATA1", split_fun = mysplit3) |> |
|
232 |
#' analyze("AGE", mean, format = "xx.x") |
|
233 |
#' tbl3 <- build_table(lyt3, ex_adsl) |
|
234 |
#' |
|
235 |
#' stopifnot(identical(cell_values(tbl2), cell_values(tbl3))) |
|
236 |
cond_rm_facets <- function( |
|
237 |
facets = NULL, |
|
238 |
facets_regex = NULL, |
|
239 |
ancestor_pos = 1, |
|
240 |
split = NULL, |
|
241 |
split_regex = NULL, |
|
242 |
value = NULL, |
|
243 |
value_regex = NULL, |
|
244 |
keep_matches = FALSE) { |
|
245 |
## detect errors/miscalling which don't even require us to have the facets |
|
246 | 4x |
if (is.null(split) && is.null(split_regex) && is.null(value) && is.null(value_regex)) { |
247 | 1x |
stop( |
248 | 1x |
"Must specify condition in terms of at least one of ", |
249 | 1x |
"split name (split or split_regex) or ", |
250 | 1x |
"facet value (value or value_regex)." |
251 |
) |
|
252 |
} |
|
253 | 3x |
if (is.null(facets) && is.null(facets_regex)) { |
254 | 1x |
stop("Must specify facets either facets or facets_regex, got neither.") |
255 | 2x |
} else if (!is.null(facets) && !is.null(facets_regex)) { |
256 | 1x |
stop("Got both facets and facets_regex, this is not supported, please specify only one.") |
257 |
} |
|
258 | 1x |
function(ret, spl, .spl_context, fulldf) { |
259 | 2x |
torm_ind <- c() |
260 | 2x |
ancestor_pos <- resolve_ancestor_pos(ancestor_pos, NROW(.spl_context)) |
261 | 2x |
torm_ind <- find_torm(ret, facets, facets_regex, keep_matches = keep_matches) |
262 | 2x |
fct_abbrev <- ifelse(is.null(facets_regex), paste(facets, collapse = ", "), facets_regex) |
263 | 2x |
if (length(torm_ind) == 0) { |
264 |
# nocov start |
|
265 | 1x |
warning( |
266 | 1x |
"No facets matched removal criteria [", |
267 | 1x |
fct_abbrev, |
268 |
"] ", |
|
269 | 1x |
"in function created with cond_rm_facets.\n", |
270 | 1x |
"Occured at path: ", |
271 | 1x |
spl_context_to_disp_path(.spl_context), |
272 | 1x |
call. = FALSE |
273 |
) |
|
274 |
# nocov end |
|
275 | 2x |
} else if (length(torm_ind) == length(ret$datasplit)) { |
276 |
# nocov start |
|
277 | 1x |
warning( |
278 | 1x |
"All facets matched removal criteria [", |
279 | 1x |
fct_abbrev, |
280 | 1x |
"] in function created with cond_rm_facets. ", |
281 | 1x |
"This will result in a degenerate table (if the condition ", |
282 | 1x |
"is met) within row splitting and in table-creation failing ", |
283 | 1x |
"entirely in column splitting.\n", |
284 | 1x |
"Occured at path: ", |
285 | 1x |
spl_context_to_disp_path(.spl_context), |
286 | 1x |
call. = FALSE |
287 |
) |
|
288 |
# nocov end |
|
289 |
} |
|
290 | 1x |
if ( |
291 | 2x |
.check_rem_cond(split, split_regex, .spl_context, ancestor_pos, type = "split") && |
292 | 2x |
.check_rem_cond(value, value_regex, .spl_context, ancestor_pos, type = "value") |
293 |
) { |
|
294 |
## find_torm handles the keep matching case, so by this point torm_ind is always the ones to remove |
|
295 | 1x |
ret <- lapply(ret, function(part) part[-torm_ind]) |
296 |
} |
|
297 | ||
298 | 2x |
ret |
299 |
} |
|
300 |
} |
|
301 | ||
302 | ||
303 |
#' @name rm_levels |
|
304 |
#' |
|
305 |
#' @title Removal of Levels |
|
306 |
#' |
|
307 |
#' @description |
|
308 |
#' custom function for removing level inside pre step in make_split_fun. |
|
309 |
#' |
|
310 |
#' @param excl Choose which level(s) to remove |
|
311 |
#' @return a function implementing pre-processing split behavior (for use in |
|
312 |
#' `make_split_fun(pre = )` which removes the levels in `excl` from the data |
|
313 |
#' before facets are generated. |
|
314 |
#' @export |
|
315 |
#' |
|
316 |
rm_levels <- function(excl) { |
|
317 | 2x |
function(df, spl, ...) { |
318 | 2x |
var <- spl_variable(spl) |
319 | 2x |
varvec <- df[[var]] |
320 | 2x |
oldlevs <- levels(varvec) |
321 | ||
322 | 2x |
exclevs <- oldlevs %in% excl |
323 | 2x |
newlevs <- oldlevs[!exclevs] |
324 | 2x |
df[[var]] <- factor(df[[var]], levels = newlevs) |
325 | 2x |
df |
326 |
} |
|
327 |
} |
|
328 | ||
329 |
#' Shortcut for Creating Custom Column Splits |
|
330 |
#' |
|
331 |
#' This is a short cut for a common use of [rtables::make_split_result()] where you need to create |
|
332 |
#' custom column splits with different labels but using the same full dataset for each column. |
|
333 |
#' It automatically sets up the values, datasplit (using the same full dataset for each column), |
|
334 |
#' and subset_exprs (using TRUE for all subsets) parameters for make_split_result(). |
|
335 |
#' |
|
336 |
#' @param ... sequence of named labels for the columns. |
|
337 |
#' @param fulldf (`data.frame`)\cr the `fulldf` which will be used for each column. |
|
338 |
#' @return The result from [rtables::make_split_result()]. |
|
339 |
#' |
|
340 |
#' @keywords internal |
|
341 |
short_split_result <- function(..., fulldf) { |
|
342 | 37x |
labels <- c(...) |
343 | 37x |
values <- stats::setNames(names(labels), names(labels)) |
344 | 37x |
datasplit <- stats::setNames(replicate(n = length(labels), list(fulldf)), names(labels)) |
345 | 37x |
subset_exprs <- replicate(n = length(labels), list(expression(TRUE))) |
346 | 37x |
make_split_result(values = values, labels = labels, datasplit = datasplit, subset_exprs = subset_exprs) |
347 |
} |
1 |
#' @name safe_prune_table |
|
2 |
#' |
|
3 |
#' @title Safely Prune Table With Empty Table Message If Needed |
|
4 |
#' |
|
5 |
#' @inheritParams rtables::prune_table |
|
6 |
#' |
|
7 |
#' @param empty_msg character(1). The message to place in the table |
|
8 |
#' if no rows were left after pruning |
|
9 |
#' |
|
10 |
#' @param spancols logical(1). Should `empty_msg` be spanned |
|
11 |
#' across the table's columns (`TRUE`) or placed in the |
|
12 |
#' rows row label (`FALSE`). Defaults to `FALSE` currently. |
|
13 |
#' |
|
14 |
#' @rdname safe_prune_table |
|
15 |
#' @return `tt` pruned based on the arguments, or, if |
|
16 |
#' pruning would remove all rows, a TableTree with the |
|
17 |
#' same column structure, and one row containing the |
|
18 |
#' empty message spanning all columns |
|
19 |
#' |
|
20 |
#' @export |
|
21 |
#' @examples |
|
22 |
#' prfun <- function(tt) TRUE |
|
23 |
#' |
|
24 |
#' lyt <- basic_table() |> |
|
25 |
#' split_cols_by("ARM") |> |
|
26 |
#' split_cols_by("STRATA1") |> |
|
27 |
#' split_rows_by("SEX") |> |
|
28 |
#' analyze("AGE") |
|
29 |
#' tbl <- build_table(lyt, ex_adsl) |
|
30 |
#' |
|
31 |
#' safe_prune_table(tbl, prfun) |
|
32 |
safe_prune_table <- function( |
|
33 |
tt, |
|
34 |
prune_func = prune_empty_level, |
|
35 |
stop_depth = NA, |
|
36 |
empty_msg = " - No Data To Display - ", |
|
37 |
spancols = FALSE) { |
|
38 | 6x |
ret <- prune_table(tt = tt, prune_func = prune_func, stop_depth = stop_depth, depth = 0) |
39 | 6x |
if (is.null(ret)) { |
40 | 3x |
ret <- tt[integer(), , keep_titles = TRUE, keep_topleft = TRUE, keep_footers = TRUE] |
41 | ||
42 | 3x |
if (spancols) { |
43 |
## this will eventually be the only option... |
|
44 | 1x |
ret <- sanitize_table_struct(ret, empty_msg = empty_msg) |
45 |
} else { |
|
46 | 2x |
tree_children(ret) <- list(rrowl(empty_msg, rep(" ", ncol(tt)))) |
47 |
} |
|
48 |
} |
|
49 | 6x |
ret |
50 |
} |
|
51 | ||
52 |
#' @name count_pruner |
|
53 |
#' |
|
54 |
#' @title Count Pruner |
|
55 |
#' |
|
56 |
#' @description |
|
57 |
#' This is a pruning constructor function which identifies records to be pruned |
|
58 |
#' based on the count (assumed to be the first statistic displayed when a compound |
|
59 |
#' statistic (e.g., ## / ## (XX.X percent) is presented). |
|
60 |
#' |
|
61 |
#' @param count count threshold. Function will keep all records strictly greater |
|
62 |
#' than this threshold. |
|
63 |
#' @param cols column path (character or integer (column indices)) |
|
64 |
#' @param cat_include Category to be considered for pruning |
|
65 |
#' @param cat_exclude logical Category to be excluded from pruning |
|
66 |
#' @export |
|
67 |
#' |
|
68 |
#' |
|
69 |
#' @examples |
|
70 |
#' |
|
71 |
#' ADSL <- data.frame( |
|
72 |
#' USUBJID = c( |
|
73 |
#' "XXXXX01", "XXXXX02", "XXXXX03", "XXXXX04", "XXXXX05", |
|
74 |
#' "XXXXX06", "XXXXX07", "XXXXX08", "XXXXX09", "XXXXX10" |
|
75 |
#' ), |
|
76 |
#' TRT01P = factor( |
|
77 |
#' c( |
|
78 |
#' "ARMA", "ARMB", "ARMA", "ARMB", "ARMB", |
|
79 |
#' "Placebo", "Placebo", "Placebo", "ARMA", "ARMB" |
|
80 |
#' ) |
|
81 |
#' ), |
|
82 |
#' FASFL = c("Y", "Y", "Y", "Y", "N", "Y", "Y", "Y", "Y", "Y"), |
|
83 |
#' SAFFL = c("N", "N", "N", "N", "N", "N", "N", "N", "N", "N"), |
|
84 |
#' PKFL = c("N", "N", "N", "N", "N", "N", "N", "N", "N", "N") |
|
85 |
#' ) |
|
86 |
#' |
|
87 |
#' lyt <- basic_table() |> |
|
88 |
#' split_cols_by("TRT01P") |> |
|
89 |
#' add_overall_col("Total") |> |
|
90 |
#' analyze("FASFL", |
|
91 |
#' var_labels = "Analysis set:", |
|
92 |
#' afun = a_freq_j, |
|
93 |
#' extra_args = list(label = "Full", val = "Y"), |
|
94 |
#' show_labels = "visible" |
|
95 |
#' ) |> |
|
96 |
#' analyze("SAFFL", |
|
97 |
#' var_labels = "Analysis set:", |
|
98 |
#' afun = a_freq_j, |
|
99 |
#' extra_args = list(label = "Safety", val = "Y"), |
|
100 |
#' show_labels = "visible" |
|
101 |
#' ) |> |
|
102 |
#' analyze("PKFL", |
|
103 |
#' var_labels = "Analysis set:", |
|
104 |
#' afun = a_freq_j, |
|
105 |
#' extra_args = list(label = "PK", val = "Y"), |
|
106 |
#' show_labels = "visible" |
|
107 |
#' ) |
|
108 |
#' |
|
109 |
#' result <- build_table(lyt, ADSL) |
|
110 |
#' |
|
111 |
#' result |
|
112 |
#' |
|
113 |
#' result <- prune_table( |
|
114 |
#' result, |
|
115 |
#' prune_func = count_pruner(cat_exclude = c("Safety"), cols = "Total") |
|
116 |
#' ) |
|
117 |
#' |
|
118 |
#' result |
|
119 |
#' |
|
120 |
#' @rdname count_pruner |
|
121 |
#' @returns function that can be utilized as pruning function in prune_table |
|
122 |
#' |
|
123 |
count_pruner <- function(count = 0, cat_include = NULL, cat_exclude = NULL, cols = c("TRT01A")) { |
|
124 | 6x |
function(tt) { |
125 |
# Do not ever prune the following rows. a row that should be kept in the table will get the value of FALSE |
|
126 | ||
127 | 6x |
if ( # nolint start |
128 | 1089x |
!methods::is(tt, "TableRow") || |
129 | 1089x |
methods::is(tt, "LabelRow") || |
130 | 1089x |
obj_label(tt) == " " || |
131 | 1089x |
(!is.null(cat_include) && |
132 | 1089x |
!obj_label(tt) %in% cat_include) || |
133 | 1089x |
(!is.null(cat_exclude) && obj_label(tt) %in% cat_exclude) |
134 | 6x |
) { # nolint end |
135 | 655x |
return(FALSE) |
136 |
} |
|
137 | ||
138 |
# Check the remaining rows to see if any meet the specified threshold. |
|
139 | ||
140 | 434x |
if (!is.null(cols)) { |
141 | 434x |
tt <- subset_cols(tt, cols) |
142 |
} |
|
143 | ||
144 |
# init return value to FALSE (not remove row) |
|
145 | 434x |
remove <- FALSE |
146 | ||
147 | 434x |
colpaths <- col_paths(tt) |
148 |
# identify non relative risk columns as in rrisk columns first element is not a count, but diff percentage with |
|
149 |
# small total columns and count > 0 (eg count = 1, n per group = 10), you can run into count1 = 1, countpbo = |
|
150 |
# 0, diffpct = 10, this row should not be considered though |
|
151 | 434x |
cp_nonrelrisk <- vapply( |
152 | 434x |
colpaths, |
153 | 434x |
function(pth) { |
154 | 1308x |
!any(grepl("rrisk", tolower(pth))) |
155 |
}, |
|
156 | 434x |
FUN.VALUE = TRUE |
157 |
) |
|
158 | ||
159 |
## only continue if at least one non relative risk column selected |
|
160 | 434x |
if (any(cp_nonrelrisk)) { |
161 |
# get cell_values of non rel risk columns |
|
162 | 434x |
cell_vals <- cell_values(tt)[cp_nonrelrisk] |
163 | ||
164 | 434x |
len_cell_vals <- lapply(cell_vals, function(x) { |
165 | 1302x |
length(x) |
166 |
}) |
|
167 | ||
168 | 434x |
if (any(len_cell_vals == 0)) { |
169 | ! |
stop("column cell values has not appropriate structure (a column with NULL value).") |
170 |
} |
|
171 | ||
172 |
# get count (first element) |
|
173 | 434x |
counts <- unlist( |
174 | 434x |
lapply(cell_vals, function(x) { |
175 | 1302x |
x[[1]] |
176 |
}), |
|
177 | 434x |
use.names = FALSE |
178 |
) |
|
179 | ||
180 |
# check that we do have counts only |
|
181 | 434x |
checkmate::assert_integerish(counts) |
182 | ||
183 | 434x |
keep <- counts > count |
184 |
# the row should be kept if at least one column has count above threshold |
|
185 | 434x |
keep <- any(keep) |
186 | ||
187 |
# pruning function should return TRUE if row has to be removed, ie opposite of keep |
|
188 | 434x |
remove <- !keep |
189 |
} |
|
190 | ||
191 | 434x |
return(remove) |
192 |
} |
|
193 |
} |
|
194 | ||
195 | ||
196 |
#' @name bspt_pruner |
|
197 |
#' |
|
198 |
#' @title Pruning Function for pruning based on a fraction and/or a difference from the control arm |
|
199 |
#' |
|
200 |
#' @description |
|
201 |
#' This is a pruning constructor function which identifies records to be pruned |
|
202 |
#' based on the the fraction from the percentages. In addition to just looking at a fraction within an arm |
|
203 |
#' this function also allows further flexibility to also prune based on a comparison versus the control arm. |
|
204 |
#' @param fraction fraction threshold. Function will keep all records strictly greater |
|
205 |
#' than this threshold. |
|
206 |
#' @param cols column path. |
|
207 |
#' @param keeprowtext Row to be excluded from pruning. |
|
208 |
#' @param reg_expr Apply keeprowtext as a regular expression (grepl with fixed = TRUE) |
|
209 |
#' @param control Control Group |
|
210 |
#' @param diff_from_control Difference from control threshold. |
|
211 |
#' @param only_more_often TRUE: Only consider when column pct is more often |
|
212 |
#' than control. FALSE: Also select a row where column pct is less often than |
|
213 |
#' control and abs(diff) above threshold |
|
214 |
#' |
|
215 |
#' @export |
|
216 |
#' |
|
217 |
#' |
|
218 |
#' @examples |
|
219 |
#' ADSL <- data.frame( |
|
220 |
#' USUBJID = c( |
|
221 |
#' "XXXXX01", "XXXXX02", "XXXXX03", "XXXXX04", "XXXXX05", |
|
222 |
#' "XXXXX06", "XXXXX07", "XXXXX08", "XXXXX09", "XXXXX10" |
|
223 |
#' ), |
|
224 |
#' TRT01P = c( |
|
225 |
#' "ARMA", "ARMB", "ARMA", "ARMB", "ARMB", |
|
226 |
#' "Placebo", "Placebo", "Placebo", "ARMA", "ARMB" |
|
227 |
#' ), |
|
228 |
#' FASFL = c("Y", "Y", "Y", "Y", "N", "Y", "Y", "Y", "Y", "Y"), |
|
229 |
#' SAFFL = c("N", "N", "N", "N", "N", "N", "N", "N", "N", "N"), |
|
230 |
#' PKFL = c("N", "N", "N", "N", "N", "N", "N", "N", "N", "N") |
|
231 |
#' ) |
|
232 |
#' |
|
233 |
#' ADSL <- ADSL |> |
|
234 |
#' dplyr::mutate(TRT01P = as.factor(TRT01P)) |> |
|
235 |
#' dplyr::mutate(SAFFL = factor(SAFFL, c("Y", "N"))) |> |
|
236 |
#' dplyr::mutate(PKFL = factor(PKFL, c("Y", "N"))) |
|
237 |
#' |
|
238 |
#' lyt <- basic_table() |> |
|
239 |
#' split_cols_by("TRT01P") |> |
|
240 |
#' add_overall_col("Total") |> |
|
241 |
#' split_rows_by( |
|
242 |
#' "FASFL", |
|
243 |
#' split_fun = drop_and_remove_levels("N"), |
|
244 |
#' child_labels = "hidden" |
|
245 |
#' ) |> |
|
246 |
#' analyze("FASFL", |
|
247 |
#' var_labels = "Analysis set:", |
|
248 |
#' afun = a_freq_j, |
|
249 |
#' show_labels = "visible", |
|
250 |
#' extra_args = list(label = "Full", .stats = "count_unique_fraction") |
|
251 |
#' ) |> |
|
252 |
#' split_rows_by( |
|
253 |
#' "SAFFL", |
|
254 |
#' split_fun = remove_split_levels("N"), |
|
255 |
#' child_labels = "hidden" |
|
256 |
#' ) |> |
|
257 |
#' analyze("SAFFL", |
|
258 |
#' var_labels = "Analysis set:", |
|
259 |
#' afun = a_freq_j, |
|
260 |
#' show_labels = "visible", |
|
261 |
#' extra_args = list(label = "Safety", .stats = "count_unique_fraction") |
|
262 |
#' ) |> |
|
263 |
#' split_rows_by( |
|
264 |
#' "PKFL", |
|
265 |
#' split_fun = remove_split_levels("N"), |
|
266 |
#' child_labels = "hidden" |
|
267 |
#' ) |> |
|
268 |
#' analyze("PKFL", |
|
269 |
#' var_labels = "Analysis set:", |
|
270 |
#' afun = a_freq_j, |
|
271 |
#' show_labels = "visible", |
|
272 |
#' extra_args = list(label = "PK", .stats = "count_unique_fraction") |
|
273 |
#' ) |
|
274 |
#' |
|
275 |
#' result <- build_table(lyt, ADSL) |
|
276 |
#' |
|
277 |
#' result |
|
278 |
#' |
|
279 |
#' result <- prune_table( |
|
280 |
#' result, |
|
281 |
#' prune_func = bspt_pruner( |
|
282 |
#' fraction = 0.05, |
|
283 |
#' keeprowtext = "Safety", |
|
284 |
#' cols = c("Total") |
|
285 |
#' ) |
|
286 |
#' ) |
|
287 |
#' |
|
288 |
#' result |
|
289 |
#' @rdname bspt_pruner |
|
290 |
#' @returns function that can be utilized as pruning function in prune_table |
|
291 |
#' |
|
292 |
bspt_pruner <- function( |
|
293 |
fraction = 0.05, |
|
294 |
keeprowtext = "Analysis set: Safety", |
|
295 |
reg_expr = FALSE, |
|
296 |
control = NULL, |
|
297 |
diff_from_control = NULL, |
|
298 |
only_more_often = TRUE, |
|
299 |
cols = c("TRT01A")) { |
|
300 | 12x |
if (is.null(fraction) && is.null(diff_from_control)) { |
301 | 1x |
stop("At least one of fraction or diff_from_control must be non-NULL.") |
302 |
} |
|
303 | 11x |
if (!is.null(diff_from_control) && is.null(control)) { |
304 | 1x |
stop("control must be specified when diff_from_control is not NULL.") |
305 |
} |
|
306 | ||
307 | 10x |
function(tt) { |
308 |
# Do not ever prune the following rows. |
|
309 | 137x |
if (!methods::is(tt, "TableRow") || methods::is(tt, "LabelRow")) { |
310 | 24x |
return(FALSE) |
311 |
} |
|
312 | ||
313 | 10x |
if ( |
314 | 113x |
reg_expr && |
315 | 113x |
any(sapply(keeprowtext, function(x) { |
316 | ! |
grepl(x, obj_label(tt), fixed = TRUE) |
317 |
})) |
|
318 |
) { |
|
319 | ! |
return(FALSE) |
320 |
} |
|
321 | 113x |
if (!reg_expr && obj_label(tt) %in% keeprowtext) { |
322 | 10x |
return(FALSE) |
323 |
} |
|
324 | ||
325 |
# init return value to FALSE (not remove row) |
|
326 | 103x |
remove <- FALSE |
327 | ||
328 |
# needed later for control column |
|
329 | 103x |
tt_all_cols <- tt |
330 | ||
331 | 103x |
if (!is.null(cols)) { |
332 | 103x |
tt <- subset_cols(tt, cols) |
333 |
} |
|
334 | ||
335 | 103x |
colpaths <- col_paths(tt) |
336 |
# identify non relative risk columns |
|
337 | 103x |
cp_nonrelrisk <- vapply( |
338 | 103x |
colpaths, |
339 | 103x |
function(pth) { |
340 | 329x |
!any(grepl("rrisk", tolower(pth))) |
341 |
}, |
|
342 | 103x |
FUN.VALUE = TRUE |
343 |
) |
|
344 | ||
345 |
## only continue if at least one non relative risk column selected |
|
346 | 103x |
if (any(cp_nonrelrisk)) { |
347 |
# get rid of rel risk columns (if present) |
|
348 | 103x |
cell_vals <- cell_values(tt)[cp_nonrelrisk] |
349 | ||
350 | 103x |
len_cell_vals <- lapply(cell_vals, function(x) { |
351 | 296x |
length(x) |
352 |
}) |
|
353 | ||
354 | 103x |
if (any(len_cell_vals < 2)) { |
355 | ! |
stop("column cell values has not appropriate structure (less than 2 values in cell).") |
356 |
} |
|
357 | ||
358 |
# get count and percentage columns counts in first col pcts in second col |
|
359 | 103x |
counts <- unname(lst_slicer(cell_vals, 1, numeric(1))) |
360 | 103x |
pcts <- unname(lst_slicer(cell_vals, 2, numeric(1))) |
361 | ||
362 | 103x |
checkmate::check_integerish(counts) |
363 |
# similarly check that pcts is indeed a percentage |
|
364 | 103x |
if (!all(dplyr::between(pcts, 0, 1))) { |
365 | ! |
stop("second value column cell is not a percentage.") |
366 |
} |
|
367 | ||
368 | 103x |
if (!is.null(fraction)) { |
369 |
# avoid problems with FALSE for 0.05 >= 0.05 |
|
370 | 103x |
check_pcts <- pcts >= fraction | |
371 | 103x |
vapply( |
372 | 103x |
pcts, |
373 | 103x |
function(x) { |
374 | 296x |
isTRUE(all.equal(x, fraction)) |
375 |
}, |
|
376 | 103x |
TRUE |
377 |
) |
|
378 |
} |
|
379 | ||
380 | 103x |
if (!is.null(diff_from_control)) { |
381 |
# get control group from all columns tt (in case the columns selected would not include the control |
|
382 |
# group) |
|
383 | 78x |
colpaths0 <- col_paths(subset_cols(tt_all_cols, c(control))) |
384 | 78x |
cp0_nonrelrisk <- vapply( |
385 | 78x |
colpaths0, |
386 | 78x |
function(pth) { |
387 | 83x |
!any(grepl("rrisk", tolower(pth))) |
388 |
}, |
|
389 | 78x |
FUN.VALUE = TRUE |
390 |
) |
|
391 | 78x |
colpaths0 <- colpaths0[cp0_nonrelrisk] |
392 | ||
393 | 78x |
if (length(colpaths0) != 1) { |
394 | 1x |
stop("control group spec does not result in single column.") |
395 |
} else { |
|
396 | 77x |
colpaths0 <- colpaths0[[1]] |
397 |
# get percent from control group |
|
398 | 77x |
pct0 <- cell_values(tt_all_cols, colpath = c(control))[[1]][2] |
399 | ||
400 | 77x |
if (!dplyr::between(pct0, 0, 1)) { |
401 | ! |
stop("second value of control column cell is not a percentage.") |
402 |
} |
|
403 |
} |
|
404 | ||
405 |
# check if control group column was also present in main column selector |
|
406 | 77x |
colid0 <- which(sapply(colpaths, function(x) { |
407 | 248x |
all(x == colpaths0) |
408 |
})) |
|
409 | ||
410 | 77x |
pctdiffs <- pcts - pct0 |
411 | ||
412 |
# exclude control column from this (would only be relevant if we want to check criteria on all columns |
|
413 |
# rather than any column) |
|
414 | 66x |
if (!identical(colid0, integer(0))) pctdiffs <- pctdiffs[-colid0] |
415 | ||
416 |
# if not only_more_often be equally strict on worse from control, better as control ie also select rows |
|
417 |
# where column is less often than control pct = 0.07 pct0 = 0.10 (diff = -0.03 ) |
|
418 | 22x |
if (!only_more_often) pctdiffs <- abs(pctdiffs) |
419 | ||
420 |
# avoid problems with FALSE for 0.02 >= 0.02 |
|
421 | 77x |
check_diffpcts <- pctdiffs >= diff_from_control | |
422 | 77x |
vapply( |
423 | 77x |
pctdiffs, |
424 | 77x |
function(x) { |
425 | 161x |
isTRUE(all.equal(x, diff_from_control)) |
426 |
}, |
|
427 | 77x |
TRUE |
428 |
) |
|
429 |
} |
|
430 | ||
431 |
# final step to check if row should be kept in table |
|
432 | 102x |
if (!is.null(fraction) && is.null(diff_from_control)) { |
433 |
# keep row in table if any column pct above threshold |
|
434 | 25x |
keep <- any(check_pcts) |
435 | 77x |
} else if ((!is.null(fraction) && !is.null(diff_from_control))) { |
436 |
# if any column pct above threshold AND any diff pct above threshold note any is used in both |
|
437 |
# expressions separately situation pct = 0.04 pct0 = 0.06 should be kept (fraction = 0.05, abs(diff |
|
438 |
# control) = 0.02) this problem only arises in the non-default situation when only_more_often is FALSE |
|
439 |
# (ie when column pct is less often than control) this would not be the case if we would any(check_pcts |
|
440 |
# & check_diffpcts) as 0.04 is below fraction |
|
441 | 77x |
keep <- any(check_pcts) && any(check_diffpcts) |
442 | ! |
} else if ((is.null(fraction) && !is.null(diff_from_control))) { |
443 | ! |
keep <- any(check_diffpcts) |
444 |
} |
|
445 | ||
446 |
# pruning function should return TRUE if row has to be removed, ie opposite of keep |
|
447 | 102x |
remove <- !keep |
448 |
} |
|
449 | ||
450 | 102x |
return(remove) |
451 |
} |
|
452 |
} |
|
453 | ||
454 |
lst_slicer <- function(lst, ind, type) { |
|
455 | 206x |
vapply(lst, `[[`, i = ind, type) |
456 |
} |
|
457 | ||
458 |
#' @name remove_rows |
|
459 |
#' |
|
460 |
#' @title |
|
461 |
#' Pruning function to remove specific rows of a table regardless of counts |
|
462 |
#' @description |
|
463 |
#' This function will remove all rows of a table based on the row text |
|
464 |
#' provided by the user. |
|
465 |
#' @param removerowtext define a text string for which any row with row text will be removed. |
|
466 |
#' @param reg_expr Apply removerowtext as a regular expression (grepl with fixed = TRUE) |
|
467 |
#' @export |
|
468 |
#' @rdname remove_rows |
|
469 |
#' |
|
470 |
#' |
|
471 |
#' @examples |
|
472 |
#' ADSL <- data.frame( |
|
473 |
#' USUBJID = c( |
|
474 |
#' "XXXXX01", "XXXXX02", "XXXXX03", "XXXXX04", "XXXXX05", |
|
475 |
#' "XXXXX06", "XXXXX07", "XXXXX08", "XXXXX09", "XXXXX10" |
|
476 |
#' ), |
|
477 |
#' TRT01P = c( |
|
478 |
#' "ARMA", "ARMB", "ARMA", "ARMB", "ARMB", "Placebo", |
|
479 |
#' "Placebo", "Placebo", "ARMA", "ARMB" |
|
480 |
#' ), |
|
481 |
#' Category = c( |
|
482 |
#' "Cat 1", "Cat 2", "Cat 1", "Unknown", "Cat 2", |
|
483 |
#' "Cat 1", "Unknown", "Cat 1", "Cat 2", "Cat 1" |
|
484 |
#' ), |
|
485 |
#' SAFFL = c("N", "N", "N", "N", "N", "N", "N", "N", "N", "N"), |
|
486 |
#' PKFL = c("N", "N", "N", "N", "N", "N", "N", "N", "N", "N") |
|
487 |
#' ) |
|
488 |
#' |
|
489 |
#' ADSL <- ADSL |> |
|
490 |
#' dplyr::mutate(TRT01P = as.factor(TRT01P)) |
|
491 |
#' |
|
492 |
#' lyt <- basic_table() |> |
|
493 |
#' split_cols_by("TRT01P") |> |
|
494 |
#' analyze( |
|
495 |
#' "Category", |
|
496 |
#' afun = a_freq_j, |
|
497 |
#' extra_args = list(.stats = "count_unique_fraction") |
|
498 |
#' ) |
|
499 |
#' |
|
500 |
#' result <- build_table(lyt, ADSL) |
|
501 |
#' |
|
502 |
#' result |
|
503 |
#' |
|
504 |
#' result <- prune_table(result, prune_func = remove_rows(removerowtext = "Unknown")) |
|
505 |
#' |
|
506 |
#' result |
|
507 |
#' @aliases remove_rows |
|
508 |
#' @returns function that can be utilized as pruning function in prune_table |
|
509 |
#' |
|
510 |
remove_rows <- function(removerowtext = NULL, reg_expr = FALSE) { |
|
511 | 5x |
function(tt) { |
512 | 97x |
if (!methods::is(tt, "TableRow") || methods::is(tt, "LabelRow")) { |
513 | 5x |
return(FALSE) |
514 |
} |
|
515 | ||
516 | 92x |
ret <- FALSE |
517 | 92x |
if (!is.null(removerowtext)) { |
518 | 72x |
if (!reg_expr) { |
519 | 37x |
ret <- obj_label(tt) %in% removerowtext |
520 |
} else { |
|
521 | 35x |
ret <- any(sapply(removerowtext, function(x) { |
522 | 52x |
grepl(x, obj_label(tt), fixed = TRUE) |
523 |
})) |
|
524 |
} |
|
525 |
} |
|
526 | ||
527 | 92x |
ret |
528 |
} |
|
529 |
} |
|
530 | ||
531 |
#' @name keep_non_null_rows |
|
532 |
#' |
|
533 |
#' @title Pruning Function to accommodate removal of completely NULL rows within a table |
|
534 |
#' |
|
535 |
#' @description |
|
536 |
#' Condition function on individual analysis rows. Flag as FALSE when all |
|
537 |
#' columns are NULL, as then the row should not be kept. To be utilized as a |
|
538 |
#' row_condition in function tern::keep_rows |
|
539 |
#' |
|
540 |
#' @param tr table tree object |
|
541 |
#' @export |
|
542 |
#' |
|
543 |
#' |
|
544 |
#' @examples |
|
545 |
#' |
|
546 |
#' library(dplyr) |
|
547 |
#' |
|
548 |
#' ADSL <- data.frame( |
|
549 |
#' USUBJID = c( |
|
550 |
#' "XXXXX01", "XXXXX02", "XXXXX03", "XXXXX04", "XXXXX05", |
|
551 |
#' "XXXXX06", "XXXXX07", "XXXXX08", "XXXXX09", "XXXXX10" |
|
552 |
#' ), |
|
553 |
#' TRT01P = c( |
|
554 |
#' "ARMA", "ARMB", "ARMA", "ARMB", "ARMB", "Placebo", |
|
555 |
#' "Placebo", "Placebo", "ARMA", "ARMB" |
|
556 |
#' ), |
|
557 |
#' AGE = c(34, 56, 75, 81, 45, 75, 48, 19, 32, 31), |
|
558 |
#' SAFFL = c("N", "N", "N", "N", "N", "N", "N", "N", "N", "N"), |
|
559 |
#' PKFL = c("N", "N", "N", "N", "N", "N", "N", "N", "N", "N") |
|
560 |
#' ) |
|
561 |
#' |
|
562 |
#' ADSL <- ADSL |> |
|
563 |
#' mutate(TRT01P = as.factor(TRT01P)) |
|
564 |
#' |
|
565 |
#' create_blank_line <- function(x) { |
|
566 |
#' list( |
|
567 |
#' "Mean" = rcell(mean(x), format = "xx.x"), |
|
568 |
#' " " = rcell(NULL), |
|
569 |
#' "Max" = rcell(max(x)) |
|
570 |
#' ) |
|
571 |
#' } |
|
572 |
#' |
|
573 |
#' lyt <- basic_table() |> |
|
574 |
#' split_cols_by("TRT01P") |> |
|
575 |
#' analyze("AGE", afun = create_blank_line) |
|
576 |
#' |
|
577 |
#' result <- build_table(lyt, ADSL) |
|
578 |
#' |
|
579 |
#' result |
|
580 |
#' result <- prune_table(result, prune_func = tern::keep_rows(keep_non_null_rows)) |
|
581 |
#' |
|
582 |
#' result |
|
583 |
#' @rdname keep_non_null_rows |
|
584 |
#' @returns a function that can be utilized as a row_condition in the tern::keep_rows function |
|
585 |
#' |
|
586 |
keep_non_null_rows <- function(tr) { |
|
587 | 42x |
if (methods::is(tr, "DataRow")) { |
588 | 41x |
r_cellvalue_null <- unlist(lapply(cell_values(tr), is.null)) |
589 | 41x |
check <- all(r_cellvalue_null) |
590 | ||
591 |
### if check TRUE (all cell values NULL -- need to return FALSE as need to remove) |
|
592 | 41x |
ret <- !check |
593 |
} else { |
|
594 | 1x |
ret <- TRUE |
595 |
} |
|
596 | 42x |
ret |
597 |
} |
1 |
#' Patient years exposure |
|
2 |
#' @description Statistical/Analysis Function for presenting Patient years exposure summary data |
|
3 |
#' |
|
4 |
#' @name a_patyrs_j |
|
5 |
#' @order 1 |
|
6 |
NULL |
|
7 | ||
8 |
#' @describeIn a_patyrs_j Statistical Function for Patient years exposure summary data |
|
9 |
#' @param df (`data.frame`)\cr data set containing all analysis variables. |
|
10 |
#' @param .var (`string`)\cr variable name containing the patient years data. |
|
11 |
#' @param id (`string`)\cr subject variable name. |
|
12 |
#' @param .alt_df_full (`dataframe`)\cr alternative dataset for calculations. |
|
13 |
#' @param source (`string`)\cr source of data, either "alt_df" or "df". |
|
14 |
#' @param inriskdiffcol (`logical`)\cr flag indicating if the function is called within a risk difference column. |
|
15 |
#' |
|
16 |
#' @return |
|
17 |
#' * `s_patyrs_j()` returns a list containing the patient years statistics. |
|
18 |
#' The list of available statistics for can be viewed by running `junco_get_stats("a_patyrs_j")`. |
|
19 |
#' Currently, this is just a single statistic `patyrs`, patient years of exposure. |
|
20 |
#' |
|
21 |
#' @keywords internal |
|
22 |
s_patyrs_j <- function( |
|
23 |
df, |
|
24 |
.var, |
|
25 |
id = "USUBJID", |
|
26 |
.alt_df_full, |
|
27 |
source = c("alt_df", "df"), |
|
28 |
inriskdiffcol = FALSE) { |
|
29 | 5x |
source <- match.arg(source) |
30 | ||
31 | 5x |
if (source == "alt_df") { |
32 | 5x |
if (is.null(.alt_df_full)) { |
33 | ! |
stop(paste( |
34 | ! |
"s_patyrs_j with source = alt_df requires a non-null .alt_df_full" |
35 |
)) |
|
36 |
} |
|
37 | 5x |
df <- .alt_df_full |
38 |
} |
|
39 | 5x |
df <- unique(df[, c(id, .var)]) |
40 | ||
41 | 5x |
x <- list() |
42 | ||
43 | 5x |
if (!inriskdiffcol) { |
44 | 3x |
patyrs <- sum(df[[.var]]) |
45 |
} else { |
|
46 | 2x |
patyrs <- list(x = NULL) |
47 |
} |
|
48 | 5x |
x[["patyrs"]] <- stats::setNames(patyrs, nm = "patyrs") |
49 | ||
50 | 5x |
return(x) |
51 |
} |
|
52 | ||
53 |
#' @describeIn a_patyrs_j Formatted analysis function for patient years summary which is used |
|
54 |
#' as `afun` in `analyze` or `cfun` in `summarize_row_groups`. |
|
55 |
#' |
|
56 |
#' |
|
57 |
#' @param df (`data.frame`)\cr data set containing all analysis variables. |
|
58 |
#' @param .var (`string`)\cr variable name containing the patient years data. |
|
59 |
#' @param .df_row (`data.frame`)\cr data frame across all of the columns for the given row split. |
|
60 |
#' @param id (`string`)\cr subject variable name. |
|
61 |
#' @param .alt_df_full (`dataframe`)\cr alternative dataset for calculations. |
|
62 |
#' @param .formats (named 'character' or 'list')\cr formats for the statistics. |
|
63 |
#' @param .labels (named 'character')\cr labels for the statistics. |
|
64 |
#' @param source (`string`)\cr source of data, either "alt_df" or "df". |
|
65 |
#' @param .spl_context (`data.frame`)\cr gives information about ancestor split states. |
|
66 |
#' @param .stats (`character`)\cr statistics to select for the table. |
|
67 |
#' |
|
68 |
#' @return |
|
69 |
#' * `a_patyrs_j` returns the corresponding list with formatted [rtables::CellValue()]. |
|
70 |
#' |
|
71 |
#' @export |
|
72 |
#' |
|
73 |
#' @examples |
|
74 |
#' library(tern) |
|
75 |
#' library(dplyr) |
|
76 |
#' trtvar <- "ARM" |
|
77 |
#' ctrl_grp <- "B: Placebo" |
|
78 |
#' cutoffd <- as.Date("2023-09-24") |
|
79 |
#' |
|
80 |
#' |
|
81 |
#' adexsum <- ex_adsl |> |
|
82 |
#' create_colspan_var( |
|
83 |
#' non_active_grp = ctrl_grp, |
|
84 |
#' non_active_grp_span_lbl = " ", |
|
85 |
#' active_grp_span_lbl = "Active Study Agent", |
|
86 |
#' colspan_var = "colspan_trt", |
|
87 |
#' trt_var = trtvar |
|
88 |
#' ) |> |
|
89 |
#' mutate( |
|
90 |
#' rrisk_header = "Risk Difference (95% CI)", |
|
91 |
#' rrisk_label = paste(!!rlang::sym(trtvar), "vs", ctrl_grp), |
|
92 |
#' TRTDURY = case_when( |
|
93 |
#' !is.na(EOSDY) ~ EOSDY, |
|
94 |
#' TRUE ~ as.integer(cutoffd - as.Date(TRTSDTM) + 1) |
|
95 |
#' ) |
|
96 |
#' ) |> |
|
97 |
#' select(USUBJID, !!rlang::sym(trtvar), colspan_trt, rrisk_header, rrisk_label, TRTDURY) |
|
98 |
#' |
|
99 |
#' adae <- ex_adae |> |
|
100 |
#' group_by(USUBJID, AEDECOD) |> |
|
101 |
#' select(USUBJID, AEDECOD, ASTDY) |> |
|
102 |
#' mutate(rwnum = row_number()) |> |
|
103 |
#' mutate(AOCCPFL = case_when( |
|
104 |
#' rwnum == 1 ~ "Y", |
|
105 |
#' TRUE ~ NA |
|
106 |
#' )) |> |
|
107 |
#' filter(AOCCPFL == "Y") |
|
108 |
#' |
|
109 |
#' aefup <- left_join(adae, adexsum, by = "USUBJID") |
|
110 |
#' |
|
111 |
#' colspan_trt_map <- create_colspan_map(adexsum, |
|
112 |
#' non_active_grp = ctrl_grp, |
|
113 |
#' non_active_grp_span_lbl = " ", |
|
114 |
#' active_grp_span_lbl = "Active Study Agent", |
|
115 |
#' colspan_var = "colspan_trt", |
|
116 |
#' trt_var = trtvar |
|
117 |
#' ) |
|
118 |
#' |
|
119 |
#' ref_path <- c("colspan_trt", " ", trtvar, ctrl_grp) |
|
120 |
#' |
|
121 |
#' ################################################################################ |
|
122 |
#' # Define layout and build table: |
|
123 |
#' ################################################################################ |
|
124 |
#' |
|
125 |
#' lyt <- basic_table(show_colcounts = TRUE, colcount_format = "N=xx", top_level_section_div = " ") |> |
|
126 |
#' split_cols_by("colspan_trt", split_fun = trim_levels_to_map(map = colspan_trt_map)) |> |
|
127 |
#' split_cols_by(trtvar) |> |
|
128 |
#' split_cols_by("rrisk_header", nested = FALSE) |> |
|
129 |
#' split_cols_by(trtvar, labels_var = "rrisk_label", split_fun = remove_split_levels(ctrl_grp)) |> |
|
130 |
#' analyze("TRTDURY", |
|
131 |
#' nested = FALSE, |
|
132 |
#' show_labels = "hidden", |
|
133 |
#' afun = a_patyrs_j |
|
134 |
#' ) |
|
135 |
#' result <- build_table(lyt, aefup, alt_counts_df = adexsum) |
|
136 |
#' result |
|
137 |
#' |
|
138 |
a_patyrs_j <- function( |
|
139 |
df, |
|
140 |
.var, |
|
141 |
.df_row, |
|
142 |
id = "USUBJID", |
|
143 |
.alt_df_full = NULL, |
|
144 |
.formats = NULL, |
|
145 |
.labels = NULL, |
|
146 |
source = c("alt_df", "df"), |
|
147 |
.spl_context, |
|
148 |
.stats = "patyrs") { |
|
149 | 5x |
source <- match.arg(source) |
150 | ||
151 | 5x |
if (length(.stats) > 1 || (length(.stats) == 1 && .stats != "patyrs")) { |
152 | ! |
stop("a_patyrs_j: .stats must be 'patyrs'.") |
153 |
} |
|
154 | ||
155 | 5x |
check_alt_df_full(source, "alt_df", .alt_df_full) |
156 | ||
157 | 5x |
col_expr <- .spl_context$cur_col_expr[[1]] |
158 |
## colid can be used to figure out if we're in the relative risk columns or not |
|
159 | 5x |
colid <- .spl_context$cur_col_id[[1]] |
160 | 5x |
inriskdiffcol <- grepl("difference", tolower(colid), fixed = TRUE) |
161 | ||
162 | 5x |
if (source == "alt_df") { |
163 |
### derive appropriate alt_df based upon .spl_context and .alt_df_full |
|
164 |
### note that this is not yet within the current column |
|
165 | ||
166 | 5x |
alt_df <- h_create_altdf( |
167 | 5x |
.spl_context, |
168 | 5x |
.df_row, |
169 | 5x |
.alt_df_full, |
170 | 5x |
denom_by = NULL, |
171 | 5x |
id = id, |
172 | 5x |
variables = NULL, |
173 | 5x |
denom = "n_altdf" |
174 |
) |
|
175 |
## restrict to current column |
|
176 | 5x |
new_denomdf <- subset(alt_df, eval(col_expr)) |
177 |
} else { |
|
178 | ! |
new_denomdf <- df |
179 |
} |
|
180 | ||
181 | 5x |
x_stats <- s_patyrs_j( |
182 | 5x |
df, |
183 | 5x |
.alt_df_full = new_denomdf, |
184 | 5x |
.var = .var, |
185 | 5x |
id = id, |
186 | 5x |
source = source, |
187 | 5x |
inriskdiffcol = inriskdiffcol |
188 |
) |
|
189 | ||
190 | 5x |
x_stats <- x_stats[.stats] |
191 | ||
192 | 5x |
levels_per_stats <- lapply(x_stats, names) |
193 | ||
194 | 5x |
.formats <- junco_get_formats_from_stats(.stats, .formats, levels_per_stats) |
195 | 5x |
.labels <- junco_get_labels_from_stats(.stats, .labels, levels_per_stats) |
196 | 5x |
.labels <- .unlist_keep_nulls(.labels) |
197 | ||
198 | 5x |
x_stats <- x_stats[.stats] |
199 | ||
200 |
# Unlist stats + names |
|
201 | 5x |
x_stats <- .unlist_keep_nulls(x_stats) |
202 | 5x |
names(x_stats) <- names(.formats) |
203 | ||
204 |
### final step: turn requested stats into rtables rows |
|
205 | 5x |
inrows <- in_rows( |
206 | 5x |
.list = x_stats, |
207 | 5x |
.formats = .formats, |
208 | 5x |
.labels = .labels |
209 |
) |
|
210 | ||
211 | 5x |
return(inrows) |
212 |
} |
|
213 | ||
214 |
#' Exposure-Adjusted Incidence Rate |
|
215 |
#' @description |
|
216 |
#' Statistical/Analysis Function for presenting Exposure-Adjusted Incidence Rate summary data |
|
217 |
#' |
|
218 |
#' |
|
219 |
#' @name a_eair100_j |
|
220 |
NULL |
|
221 | ||
222 |
#' @noRd |
|
223 |
#' @describeIn a_eair100_j |
|
224 |
#' calculates exposure-adjusted incidence rates (EAIR) per 100 person-years for a |
|
225 |
#' specific level of a variable. |
|
226 |
#' |
|
227 |
#' |
|
228 |
#' @param levii (`string`)\cr the specific level of the variable to calculate EAIR for. |
|
229 |
#' @param df (`data.frame`)\cr data set containing all analysis variables. |
|
230 |
#' @param .df_row (`data.frame`)\cr data frame across all of the columns for the given row split. |
|
231 |
#' @param .var (`string`)\cr variable name that is passed by `rtables`. |
|
232 |
#' @param .alt_df_full (`dataframe`)\cr alternative dataset for calculations. |
|
233 |
#' @param id (`string`)\cr subject variable name. |
|
234 |
#' @param diff (`logical`)\cr if TRUE, risk difference calculations will be performed. |
|
235 |
#' @param conf_level (`proportion`)\cr confidence level of the interval. |
|
236 |
#' @param trt_var (`string`)\cr treatment variable name. |
|
237 |
#' @param ctrl_grp (`string`)\cr control group value. |
|
238 |
#' @param cur_trt_grp (`string`)\cr current treatment group value. |
|
239 |
#' @param inriskdiffcol (`logical`)\cr flag indicating if the function is called within a risk difference column. |
|
240 |
#' @param fup_var (`string`)\cr follow-up variable name. |
|
241 |
#' @param occ_var (`string`)\cr occurrence variable name. |
|
242 |
#' @param occ_dy (`string`)\cr occurrence day variable name. |
|
243 |
#' |
|
244 |
#' @return |
|
245 |
#' * `s_eair100_levii_j()` returns a list containing the following statistics: |
|
246 |
#' \itemize{ |
|
247 |
#' \item n_event: Number of events |
|
248 |
#' \item person_years: Total person-years of follow-up |
|
249 |
#' \item eair: Exposure-adjusted incidence rate per 100 person-years |
|
250 |
#' \item eair_diff: Risk difference in EAIR (if diff=TRUE and inriskdiffcol=TRUE) |
|
251 |
#' \item eair_diff_ci: Confidence interval for the risk difference (if diff=TRUE and inriskdiffcol=TRUE) |
|
252 |
#' }.\cr |
|
253 |
#' The list of available statistics (core columns) can also be viewed by running `junco_get_stats("a_eair100_j")` |
|
254 |
#' |
|
255 |
#' @keywords internal |
|
256 |
s_eair100_levii_j <- function( |
|
257 |
levii, |
|
258 |
df, |
|
259 |
.df_row, |
|
260 |
.var, |
|
261 |
.alt_df_full = NULL, |
|
262 |
id = "USUBJID", |
|
263 |
diff = FALSE, |
|
264 |
# treatment/ref group related arguments |
|
265 |
conf_level = 0.95, |
|
266 |
trt_var = NULL, |
|
267 |
ctrl_grp = NULL, |
|
268 |
cur_trt_grp = NULL, |
|
269 |
inriskdiffcol = FALSE, |
|
270 |
fup_var, |
|
271 |
occ_var, |
|
272 |
occ_dy) { |
|
273 | 150x |
if (diff && inriskdiffcol) { |
274 | 60x |
.alt_df_full_cur_group <- get_ctrl_subset( |
275 | 60x |
.alt_df_full, |
276 | 60x |
trt_var = trt_var, |
277 | 60x |
ctrl_grp = cur_trt_grp |
278 |
) |
|
279 |
} else { |
|
280 |
### within a_eair100_j we need to ensure proper dataframe will be passed to .alt_df_full |
|
281 | 90x |
.alt_df_full_cur_group <- .alt_df_full |
282 |
} |
|
283 | 150x |
cur_dfs <- h_get_eair_df( |
284 | 150x |
levii, |
285 | 150x |
df, |
286 | 150x |
denom_df = .alt_df_full_cur_group, |
287 | 150x |
.var = .var, |
288 | 150x |
id = id, |
289 | 150x |
fup_var = fup_var, |
290 | 150x |
occ_var = occ_var, |
291 | 150x |
occ_dy = occ_dy |
292 |
) |
|
293 | 150x |
cur_df_num <- cur_dfs$df_num |
294 | 150x |
cur_df_denom <- cur_dfs$df_denom |
295 | ||
296 |
### statistics derivation |
|
297 | 150x |
cur_AECOUNT <- length(unique(cur_df_num[[id]])) |
298 | 150x |
cur_YRSFUP <- sum(cur_df_denom[["mod_fup_var"]]) |
299 | 150x |
cur_eair <- 100 * cur_AECOUNT / cur_YRSFUP |
300 | ||
301 | 150x |
x <- list() |
302 | 150x |
x$n_event <- c("n_event" = cur_AECOUNT) |
303 | 150x |
x$person_years <- c("person_years" = cur_YRSFUP) |
304 | 150x |
x$eair <- c("eair" = cur_eair) |
305 | ||
306 | 150x |
if (diff && inriskdiffcol) { |
307 | 60x |
x$n_event <- c("n_event" = NULL) |
308 | 60x |
x$person_years <- c("person_years" = NULL) |
309 | 60x |
x$eair <- c("eair" = NULL) |
310 | ||
311 | 60x |
alt_df_full_ref_group <- get_ctrl_subset( |
312 | 60x |
.alt_df_full, |
313 | 60x |
trt_var = trt_var, |
314 | 60x |
ctrl_grp = ctrl_grp |
315 |
) |
|
316 | ||
317 | 60x |
ref_group <- get_ctrl_subset( |
318 | 60x |
.df_row, |
319 | 60x |
trt_var = trt_var, |
320 | 60x |
ctrl_grp = ctrl_grp |
321 |
) |
|
322 | ||
323 | 60x |
ref_dfs <- h_get_eair_df( |
324 | 60x |
levii, |
325 | 60x |
df = ref_group, |
326 | 60x |
denom_df = alt_df_full_ref_group, |
327 | 60x |
.var = .var, |
328 | 60x |
id = id, |
329 | 60x |
fup_var = fup_var, |
330 | 60x |
occ_var = occ_var, |
331 | 60x |
occ_dy = occ_dy |
332 |
) |
|
333 | ||
334 | 60x |
ref_df_num <- ref_dfs$df_num |
335 | 60x |
ref_df_denom <- ref_dfs$df_denom |
336 | ||
337 |
### statistics derivation |
|
338 | 60x |
ref_AECOUNT <- length(unique(ref_df_num[[id]])) |
339 | 60x |
ref_YRSFUP <- sum(ref_df_denom[["mod_fup_var"]]) |
340 | 60x |
ref_eair <- 100 * ref_AECOUNT / ref_YRSFUP |
341 | ||
342 | 60x |
rdiff <- cur_eair - ref_eair |
343 | ||
344 | 60x |
sd <- sqrt(cur_AECOUNT / cur_YRSFUP^2 + ref_AECOUNT / ref_YRSFUP^2) * 100 |
345 | ||
346 | 60x |
coeff <- stats::qnorm((1 + conf_level) / 2) |
347 | 60x |
lcl <- rdiff - (coeff * sd) |
348 | 60x |
ucl <- rdiff + (coeff * sd) |
349 | ||
350 | 60x |
eair_diff <- c(rdiff, lcl, ucl) |
351 | ||
352 | 60x |
x$eair_diff <- stats::setNames( |
353 | 60x |
c("eair_diff" = eair_diff), |
354 | 60x |
nm = c("estimate", "lcl", "ucl") |
355 |
) |
|
356 |
} else { |
|
357 | 90x |
x$eair_diff <- c("eair_diff" = NULL) |
358 |
} |
|
359 | ||
360 | 150x |
return(x) |
361 |
} |
|
362 | ||
363 |
#' @describeIn a_eair100_j |
|
364 |
#' Formatted analysis function for exposure adjusted incidence rate summary which is |
|
365 |
#' used as `afun` in `analyze` or `cfun` in `summarize_row_groups`. |
|
366 |
#' |
|
367 |
#' |
|
368 |
#' @param df (`data.frame`)\cr data set containing all analysis variables. |
|
369 |
#' @param labelstr (`string`)\cr label string for the row. |
|
370 |
#' @param .var (`string`)\cr variable name for analysis. |
|
371 |
#' @param .df_row (`data.frame`)\cr data frame across all of the columns for the given row split. |
|
372 |
#' @param .spl_context (`data.frame`)\cr gives information about ancestor split states. |
|
373 |
#' @param .alt_df_full (`dataframe`)\cr denominator dataset for calculations. |
|
374 |
#' @param id (`string`)\cr subject variable name. |
|
375 |
#' @param drop_levels (`logical`)\cr if TRUE, non-observed levels will not be included. |
|
376 |
#' @param riskdiff (`logical`)\cr if TRUE, risk difference calculations will be performed. |
|
377 |
#' @param ref_path (`string`)\cr column path specifications for the control group. |
|
378 |
#' @param .stats (`character`)\cr statistics to select for the table. |
|
379 |
#' @param .formats (named 'character' or 'list')\cr formats for the statistics. |
|
380 |
#' @param .labels (named 'character')\cr labels for the statistics. |
|
381 |
#' @param .indent_mods (named `integer`)\cr indent modifiers for the labels. |
|
382 |
#' @param na_str (`string`)\cr string used to replace all NA or empty values in the output. |
|
383 |
#' @param conf_level (`proportion`)\cr confidence level of the interval. |
|
384 |
#' @param fup_var (`string`)\cr variable name for follow-up time. |
|
385 |
#' @param occ_var (`string`)\cr variable name for occurrence. |
|
386 |
#' @param occ_dy (`string`)\cr variable name for occurrence day. |
|
387 |
#' |
|
388 |
#' @return |
|
389 |
#' * `a_eair100_j` returns the corresponding list with formatted [rtables::CellValue()]. |
|
390 |
#' |
|
391 |
#' @export |
|
392 |
#' |
|
393 |
#' @examples |
|
394 |
#' library(tern) |
|
395 |
#' library(dplyr) |
|
396 |
#' trtvar <- "ARM" |
|
397 |
#' ctrl_grp <- "B: Placebo" |
|
398 |
#' cutoffd <- as.Date("2023-09-24") |
|
399 |
#' |
|
400 |
#' |
|
401 |
#' adexsum <- ex_adsl |> |
|
402 |
#' create_colspan_var( |
|
403 |
#' non_active_grp = ctrl_grp, |
|
404 |
#' non_active_grp_span_lbl = " ", |
|
405 |
#' active_grp_span_lbl = "Active Study Agent", |
|
406 |
#' colspan_var = "colspan_trt", |
|
407 |
#' trt_var = trtvar |
|
408 |
#' ) |> |
|
409 |
#' mutate( |
|
410 |
#' rrisk_header = "Risk Difference (95% CI)", |
|
411 |
#' rrisk_label = paste(!!rlang::sym(trtvar), "vs", ctrl_grp), |
|
412 |
#' TRTDURY = case_when( |
|
413 |
#' !is.na(EOSDY) ~ EOSDY, |
|
414 |
#' TRUE ~ as.integer(cutoffd - as.Date(TRTSDTM) + 1) |
|
415 |
#' ) |
|
416 |
#' ) |> |
|
417 |
#' select(USUBJID, !!rlang::sym(trtvar), colspan_trt, rrisk_header, rrisk_label, TRTDURY) |
|
418 |
#' |
|
419 |
#' adexsum$TRTDURY <- as.numeric(adexsum$TRTDURY) |
|
420 |
#' |
|
421 |
#' adae <- ex_adae |> |
|
422 |
#' group_by(USUBJID, AEDECOD) |> |
|
423 |
#' select(USUBJID, AEDECOD, ASTDY) |> |
|
424 |
#' mutate(rwnum = row_number()) |> |
|
425 |
#' mutate(AOCCPFL = case_when( |
|
426 |
#' rwnum == 1 ~ "Y", |
|
427 |
#' TRUE ~ NA |
|
428 |
#' )) |> |
|
429 |
#' filter(AOCCPFL == "Y") |
|
430 |
#' |
|
431 |
#' aefup <- left_join(adae, adexsum, by = "USUBJID") |
|
432 |
#' |
|
433 |
#' colspan_trt_map <- create_colspan_map(adexsum, |
|
434 |
#' non_active_grp = ctrl_grp, |
|
435 |
#' non_active_grp_span_lbl = " ", |
|
436 |
#' active_grp_span_lbl = "Active Study Agent", |
|
437 |
#' colspan_var = "colspan_trt", |
|
438 |
#' trt_var = trtvar |
|
439 |
#' ) |
|
440 |
#' |
|
441 |
#' ref_path <- c("colspan_trt", " ", trtvar, ctrl_grp) |
|
442 |
#' |
|
443 |
#' ################################################################################ |
|
444 |
#' # Define layout and build table: |
|
445 |
#' ################################################################################ |
|
446 |
#' |
|
447 |
#' lyt <- basic_table(show_colcounts = TRUE, colcount_format = "N=xx", top_level_section_div = " ") |> |
|
448 |
#' split_cols_by("colspan_trt", split_fun = trim_levels_to_map(map = colspan_trt_map)) |> |
|
449 |
#' split_cols_by(trtvar) |> |
|
450 |
#' split_cols_by("rrisk_header", nested = FALSE) |> |
|
451 |
#' split_cols_by(trtvar, labels_var = "rrisk_label", split_fun = remove_split_levels(ctrl_grp)) |> |
|
452 |
#' analyze("TRTDURY", |
|
453 |
#' nested = FALSE, |
|
454 |
#' show_labels = "hidden", |
|
455 |
#' afun = a_patyrs_j |
|
456 |
#' ) |> |
|
457 |
#' analyze( |
|
458 |
#' vars = "AEDECOD", |
|
459 |
#' nested = FALSE, |
|
460 |
#' afun = a_eair100_j, |
|
461 |
#' extra_args = list( |
|
462 |
#' fup_var = "TRTDURY", |
|
463 |
#' occ_var = "AOCCPFL", |
|
464 |
#' occ_dy = "ASTDY", |
|
465 |
#' ref_path = ref_path, |
|
466 |
#' drop_levels = TRUE |
|
467 |
#' ) |
|
468 |
#' ) |
|
469 |
#' |
|
470 |
#' result <- build_table(lyt, aefup, alt_counts_df = adexsum) |
|
471 |
#' head(result, 5) |
|
472 |
#' |
|
473 |
a_eair100_j <- function( |
|
474 |
df, |
|
475 |
labelstr = NULL, |
|
476 |
.var, |
|
477 |
.df_row, |
|
478 |
.spl_context, |
|
479 |
.alt_df_full = NULL, |
|
480 |
id = "USUBJID", |
|
481 |
drop_levels = FALSE, |
|
482 |
riskdiff = TRUE, |
|
483 |
ref_path = NULL, |
|
484 |
.stats = c("eair"), |
|
485 |
.formats = NULL, |
|
486 |
.labels = NULL, |
|
487 |
.indent_mods = NULL, |
|
488 |
na_str = rep("NA", 3), |
|
489 |
# treatment/ref group related arguments |
|
490 |
conf_level = 0.95, |
|
491 |
fup_var, |
|
492 |
occ_var, |
|
493 |
occ_dy) { |
|
494 |
## prepare for column based split |
|
495 | 15x |
col_expr <- .spl_context$cur_col_expr[[1]] |
496 |
## colid can be used to figure out if we're in the relative risk columns or not |
|
497 | 15x |
colid <- .spl_context$cur_col_id[[1]] |
498 | 15x |
inriskdiffcol <- grepl("difference", tolower(colid), fixed = TRUE) |
499 | ||
500 |
# if no stats requested, get all stats |
|
501 | 15x |
.stats <- junco_get_stats( |
502 | 15x |
"a_eair100_j", |
503 | 15x |
stats_in = .stats, |
504 | 15x |
custom_stats_in = NULL |
505 |
) |
|
506 | ||
507 |
### combine all preprocessing of incoming df/.df_row in one function |
|
508 |
### do this outside stats derivation functions (s_freq_j/) |
|
509 |
### use all of val/excl_levels/drop_levels//new_levels/label/label_map/labelstr/label_fstr |
|
510 | 15x |
upd_dfrow <- h_upd_dfrow( |
511 | 15x |
.df_row, |
512 | 15x |
.var, |
513 | 15x |
val = NULL, |
514 | 15x |
excl_levels = NULL, |
515 | 15x |
drop_levels = drop_levels, |
516 | 15x |
new_levels = NULL, |
517 | 15x |
new_levels_after = FALSE, |
518 | 15x |
label = NULL, |
519 | 15x |
label_map = NULL, |
520 | 15x |
labelstr = labelstr, |
521 | 15x |
label_fstr = NULL, |
522 | 15x |
.spl_context = .spl_context |
523 |
) |
|
524 | ||
525 | 15x |
.df_row <- upd_dfrow$df_row |
526 | 15x |
df <- upd_dfrow$df |
527 | ||
528 | 15x |
if (is.null(.alt_df_full)) { |
529 | ! |
stop(paste("a_eair100_j: .alt_df_full cannot be NULL.")) |
530 |
} |
|
531 | ||
532 |
### derive appropriate alt_df based upon .spl_context and .alt_df_full |
|
533 |
### note that only row-based splits are done |
|
534 |
### for now only for variables from the first split_rows_by |
|
535 | 15x |
alt_df <- h_create_altdf( |
536 | 15x |
.spl_context, |
537 | 15x |
.df_row, |
538 | 15x |
.alt_df_full, |
539 | 15x |
denom_by = NULL, |
540 | 15x |
id = id, |
541 | 15x |
variables = NULL, |
542 | 15x |
denom = "n_altdf" |
543 |
) |
|
544 | 15x |
new_denomdf <- alt_df |
545 | ||
546 | 15x |
fn_Args <- list( |
547 | 15x |
df = df, |
548 | 15x |
.df_row = .df_row, |
549 | 15x |
.var = .var, |
550 | 15x |
id = id, |
551 | 15x |
diff = riskdiff, |
552 | 15x |
inriskdiffcol = inriskdiffcol, |
553 | 15x |
fup_var = fup_var, |
554 | 15x |
occ_var = occ_var, |
555 | 15x |
occ_dy = occ_dy |
556 |
) |
|
557 | ||
558 | 15x |
if (riskdiff && inriskdiffcol) { |
559 | 6x |
trt_var_refpath <- h_get_trtvar_refpath( |
560 | 6x |
ref_path, |
561 | 6x |
.spl_context, |
562 | 6x |
df |
563 |
) |
|
564 |
# trt_var_refpath is list with elements |
|
565 |
# trt_var trt_var_refspec cur_trt_grp ctrl_grp |
|
566 |
# make these elements available in current environment |
|
567 | 6x |
trt_var <- trt_var_refpath$trt_var |
568 | 6x |
trt_var_refspec <- trt_var_refpath$trt_var_refspec |
569 | 6x |
cur_trt_grp <- trt_var_refpath$cur_trt_grp |
570 | 6x |
ctrl_grp <- trt_var_refpath$ctrl_grp |
571 | ||
572 | 6x |
fn_args_x <- list( |
573 | 6x |
.alt_df_full = .alt_df_full, |
574 |
# treatment/ref group related arguments |
|
575 | 6x |
conf_level = conf_level, |
576 | 6x |
trt_var = trt_var, |
577 | 6x |
ctrl_grp = ctrl_grp, |
578 | 6x |
cur_trt_grp = cur_trt_grp |
579 |
) |
|
580 |
} else { |
|
581 | 9x |
new_denomdf <- subset(new_denomdf, eval(col_expr)) |
582 | 9x |
fn_args_x <- list(.alt_df_full = new_denomdf) |
583 |
} |
|
584 | ||
585 | 15x |
fn_Args <- append(fn_Args, fn_args_x) |
586 | ||
587 | 15x |
levs <- levels(.df_row[[.var]]) |
588 | 15x |
y <- mapply( |
589 | 15x |
s_eair100_levii_j, |
590 | 15x |
levii = levs, |
591 | 15x |
MoreArgs = fn_Args, |
592 | 15x |
SIMPLIFY = FALSE |
593 |
) |
|
594 | ||
595 |
### rearrange list y to list to x_stats |
|
596 |
#### this is to ensure the remainder of the code can stay the same as in a_freq_j |
|
597 | 15x |
stnms <- c("eair", "eair_diff", "n_event", "person_years") |
598 | 15x |
x_stats <- extract_x_stats(y, stnms) |
599 | ||
600 | 15x |
if (!inriskdiffcol) { |
601 | 9x |
.stats_adj <- .stats |
602 |
} else { |
|
603 | 6x |
.stats_adj <- replace(.stats, .stats %in% c("eair"), "eair_diff") |
604 |
} |
|
605 | ||
606 | 15x |
.stats <- .stats_adj |
607 | ||
608 |
# Fill in formatting defaults |
|
609 | ||
610 | 15x |
if (length(levs) > 1 && length(.stats) > 1) { |
611 | ! |
message( |
612 | ! |
"a_eair100_j : with multiple stats and multiple levels of analysis |
613 | ! |
variable it is recommended to apply an extra split_rows_by on the analysis variable" |
614 |
) |
|
615 |
} |
|
616 | ||
617 | 15x |
x_stats <- x_stats[.stats] |
618 | ||
619 | 15x |
levels_per_stats <- lapply(x_stats, names) |
620 | ||
621 | 15x |
.formats <- junco_get_formats_from_stats(.stats, .formats, levels_per_stats) |
622 | 15x |
.labels <- junco_get_labels_from_stats(.stats, .labels, levels_per_stats) |
623 | 15x |
.labels <- .unlist_keep_nulls(.labels) |
624 | ||
625 | 15x |
.indent_mods <- junco_get_indents_from_stats( |
626 | 15x |
.stats, |
627 | 15x |
.indent_mods, |
628 | 15x |
levels_per_stats |
629 |
) |
|
630 | 15x |
.indent_mods <- .unlist_keep_nulls(.indent_mods) |
631 | ||
632 |
# .format_na_strs processing |
|
633 |
# if na_str = c(NA, NA, NA) |
|
634 |
# this will ensure the ci (NA, NA, NA) will be represented as NE (NE, NE) |
|
635 |
# the value NE is defined as the default to replace NA in our jjcs format |
|
636 | ||
637 | 15x |
if (!is.null(na_str)) { |
638 | 15x |
.format_na_strs <- lapply(names(.formats), FUN = function(x) { |
639 | 150x |
na_str |
640 |
}) |
|
641 |
} else { |
|
642 | ! |
.format_na_strs <- NULL |
643 |
} |
|
644 | ||
645 |
# Unlist stats + names |
|
646 | 15x |
x_stats <- .unlist_keep_nulls(x_stats) |
647 | 15x |
names(x_stats) <- names(.formats) |
648 | ||
649 |
### final step: turn requested stats into rtables rows |
|
650 | 15x |
inrows <- in_rows( |
651 | 15x |
.list = x_stats, |
652 | 15x |
.formats = .formats, |
653 | 15x |
.labels = .labels, |
654 | 15x |
.indent_mods = .indent_mods, |
655 | 15x |
.format_na_strs = .format_na_strs |
656 |
) |
|
657 | ||
658 | 15x |
return(inrows) |
659 |
} |
1 |
s_summarize_desc_j <- function(df, .var, .ref_group, .in_ref_col, control = control_analyze_vars()) { |
|
2 | 5x |
x <- df[[.var]] |
3 | 5x |
y1 <- s_summary(x) |
4 | 5x |
y2 <- NULL |
5 | ||
6 |
# diff in means versus control group, based upon 2 sample t.test |
|
7 | 5x |
y2$mean_diffci <- numeric() |
8 | 5x |
if (!is.null(.ref_group) && !.in_ref_col) { |
9 | 3x |
x1 <- df[[.var]] |
10 | 3x |
x2 <- .ref_group[[.var]] |
11 | ||
12 | 3x |
x1 <- x1[!is.na(x1)] |
13 | 3x |
x2 <- x2[!is.na(x2)] |
14 | ||
15 | 3x |
if ((length(x1) > 1 && length(x2) > 1)) { |
16 | 3x |
ttest_stat <- stats::t.test(x1, x2, conf.level = control$conf_level) |
17 | ||
18 | 3x |
stat <- ttest_stat[c("estimate", "conf.int")] |
19 | 3x |
stat$diff <- stat$estimate[1] - stat$estimate[2] |
20 | 3x |
stat <- c(stat$diff, stat$conf.int) |
21 | ||
22 | 3x |
y2$mean_diffci <- with_label( |
23 | 3x |
c(mean_diffci = stat), |
24 | 3x |
paste("Difference in Mean + ", f_conf_level(control$conf_level)) |
25 |
) |
|
26 |
} else { |
|
27 | ! |
y2b <- s_summary(.ref_group[[.var]]) |
28 | ! |
diff <- y1[["mean"]] - y2b[["mean"]] |
29 | ! |
stat <- c(diff, NA, NA) |
30 | ||
31 | ! |
y2$mean_diffci <- with_label( |
32 | ! |
c(mean_diffci = stat), |
33 | ! |
paste("Difference in Mean + ", f_conf_level(control$conf_level)) |
34 |
) |
|
35 |
} |
|
36 |
} |
|
37 | 5x |
y <- c(y1, y2) |
38 | ||
39 | 5x |
return(y) |
40 |
} |
|
41 | ||
42 |
s_aval_chg_col1 <- function(df, .var, denom, .N_col, id, indatavar) { |
|
43 |
## First column AVAL - show n/N (%) |
|
44 | 4x |
mystat <- "n" |
45 | ||
46 | 4x |
if (!is.null(indatavar)) { |
47 | ! |
df <- subset(df, !is.na(df[[indatavar]])) |
48 |
} |
|
49 | ||
50 | 4x |
x <- df[[.var]] |
51 | ||
52 | 4x |
x_stats <- s_summary(x) |
53 | 4x |
x_stats <- x_stats[mystat] |
54 | ||
55 |
### Ndenom derivation in case denom = N |
|
56 | 4x |
Ndenom <- .N_col |
57 | 4x |
if (denom == "N") { |
58 |
### as our input dataset has ensured we have unique subjects we can just use the length of x here still safer |
|
59 |
### to use id variable |
|
60 | 4x |
nsub <- length(unique(df[[id]])) |
61 | 4x |
Ndenom <- nsub |
62 |
} |
|
63 | ||
64 | 4x |
count_denom_frac <- c(x_stats$n, Ndenom, x_stats$n / Ndenom) |
65 | 4x |
names(count_denom_frac) <- c("n", "N", "fraction") |
66 | ||
67 | 4x |
count_frac <- count_denom_frac[c("n", "fraction")] |
68 | 4x |
count <- count_denom_frac[c("n")] |
69 | ||
70 | 4x |
y <- list() |
71 | 4x |
y$count_denom_frac <- count_denom_frac |
72 | 4x |
y$count_frac <- count_frac |
73 | 4x |
y$count <- count |
74 | ||
75 | 4x |
return(y) |
76 |
} |
|
77 | ||
78 |
s_aval_chg_col23_diff <- function( |
|
79 |
df, |
|
80 |
.var, |
|
81 |
.df_row, |
|
82 |
.ref_group, |
|
83 |
.in_ref_col, |
|
84 |
ancova, |
|
85 |
interaction_y, |
|
86 |
interaction_item, |
|
87 |
conf_level, |
|
88 |
variables, |
|
89 |
trt_var, |
|
90 |
ctrl_grp, |
|
91 |
cur_param, |
|
92 |
cur_lvl) { |
|
93 | 10x |
.df_row <- subset(.df_row, !is.na(.df_row[[.var]])) |
94 | 10x |
df <- subset(df, !is.na(df[[.var]])) |
95 | 10x |
.ref_group <- subset(.ref_group, !is.na(.ref_group[[.var]])) |
96 | ||
97 | 10x |
if (nrow(.df_row) == 0) { |
98 |
#### this is only when input row no non-missing records in any of the columns expected to occur for baseline |
|
99 |
#### timepoint for analysis variable change only here we want a blank cell, not a cell with all NA's NULL is |
|
100 |
#### generating a blank cell |
|
101 | ! |
x_stats <- NULL |
102 | ! |
mystat1 <- c("mean_ci_3d", "mean_diffci") |
103 | 10x |
} else if (!ancova) { |
104 | 5x |
mystat1 <- c("mean_ci_3d", "mean_diffci") |
105 | ||
106 | 5x |
control <- control_analyze_vars() |
107 | 5x |
control$conf_level <- conf_level |
108 | 5x |
x_stats <- s_summarize_desc_j( |
109 | 5x |
df = df, |
110 | 5x |
.var = .var, |
111 | 5x |
.ref_group = .ref_group, |
112 | 5x |
.in_ref_col = .in_ref_col, |
113 | 5x |
control = control |
114 |
) |
|
115 |
} else { |
|
116 | 5x |
mystat1 <- c("lsmean_ci", "lsmean_diffci") |
117 | ||
118 |
### sparse data problems with underlying ancova function 1/ if nrow(.df_row) = 0 NULL (blank columns) |
|
119 | ||
120 |
### 2/ if nrow(df) = 0 no ancova - lsmean and lsmean diff should be na |
|
121 | ||
122 |
### 3/ if nrow(df) > 0, & nrow(.ref_group) = 0 ancova for ls mean only, NULL .ref_group -- lsmean diff should |
|
123 |
### be na |
|
124 | ||
125 |
### 4/ if nrow(df) > 0, & nrow(.ref_group) > 0 and at least one group with 0 data update levels in |
|
126 |
### .df_row/df/.ref_group to avoid problems for contrast |
|
127 | ||
128 |
#### by making updates to .df_row/df/.ref_group, situation 3 and 4 could be used together with general case |
|
129 | ||
130 | 5x |
.df_row_trtlevels <- unique(.df_row[[trt_var]]) |
131 | ||
132 | 5x |
if (NROW(.df_row_trtlevels) == 0) { |
133 |
### No data at all |
|
134 | ! |
x_stats <- NULL |
135 | 5x |
} else if (NROW(df) == 0 || NROW(.df_row_trtlevels) == 1) { |
136 |
### current column no data/less than 2 treatment levels |
|
137 | ||
138 | ! |
x_stats <- list() |
139 | ! |
x_stats[[mystat1[1]]] <- rep(NA, 3) |
140 | ! |
x_stats[[mystat1[2]]] <- rep(NA, 3) |
141 |
} else { |
|
142 |
### ancova situation, and some updates to prevent problems with sparse data |
|
143 | 5x |
if ((!ctrl_grp %in% .df_row_trtlevels)) { |
144 |
# LS means for current group can be estimated from model, but not difference in LS means set .ref_group |
|
145 |
# to NULL to proceed with s_summarize_ancova_j function update to underlying s_ancova_j to cover NULL |
|
146 |
# ref_group in call |
|
147 | ! |
.ref_group <- NULL |
148 |
} |
|
149 | 5x |
if (NROW(.df_row_trtlevels) < length(levels(.df_row[[trt_var]]))) { |
150 |
# missing levels need to be removed from the factor, in order to have the correct estimates, and avoid |
|
151 |
# errors with underlying tern:::s_ancova function |
|
152 | ! |
.df_row[[trt_var]] <- droplevels(.df_row[[trt_var]]) |
153 | ! |
df[[trt_var]] <- factor(as.character(df[[trt_var]]), levels = levels(.df_row[[trt_var]])) |
154 | ! |
.ref_group[[trt_var]] <- factor(as.character(.ref_group[[trt_var]]), levels = levels(.df_row[[trt_var]])) |
155 |
} |
|
156 | ||
157 | 5x |
x_stats <- s_summarize_ancova_j( |
158 | 5x |
df = df, |
159 | 5x |
.var = .var, |
160 | 5x |
.ref_group = .ref_group, |
161 | 5x |
.in_ref_col = .in_ref_col, |
162 | 5x |
.df_row = .df_row, |
163 | 5x |
conf_level = conf_level, |
164 | 5x |
interaction_y = interaction_y, |
165 | 5x |
interaction_item = interaction_item, |
166 | 5x |
variables = variables |
167 |
) |
|
168 |
} |
|
169 |
} |
|
170 | ||
171 | 10x |
y <- list(mean_ci_3d = x_stats[[mystat1[1]]], meandiff_ci_3d = x_stats[[mystat1[2]]]) |
172 | 10x |
return(y) |
173 |
} |
|
174 | ||
175 | ||
176 |
xxd_to_xx <- function(str, d = 0) { |
|
177 | 10x |
checkmate::assert_integerish(d, null.ok = TRUE) |
178 | 10x |
if (checkmate::test_list(str, null.ok = FALSE)) { |
179 | ! |
checkmate::assert_list(str, null.ok = FALSE) |
180 |
# Or it may be a vector of characters |
|
181 |
} else { |
|
182 | 10x |
checkmate::assert_character(str, null.ok = FALSE) |
183 |
} |
|
184 | ||
185 | 10x |
nmstr <- names(str) |
186 | ||
187 | 10x |
if (any(grepl("xx.d", str, fixed = TRUE))) { |
188 | 10x |
checkmate::assert_integerish(d) |
189 | 10x |
str <- gsub("xx.d", paste0("xx.", strrep("x", times = d)), str, fixed = TRUE) |
190 |
} |
|
191 | 10x |
str <- stats::setNames(str, nmstr) |
192 | 10x |
return(str) |
193 |
} |
|
194 | ||
195 |
format_xxd <- function(str, d = 0, .df_row, formatting_fun = NULL) { |
|
196 |
# Handling of data precision |
|
197 | 10x |
if (!is.numeric(d)) { |
198 | ! |
if (is.character(d) && length(d) == 1) { |
199 |
# check if d is a variable name available in .df_row |
|
200 | ! |
if (d %in% names(.df_row)) { |
201 | ! |
d <- max(.df_row[[d]], na.rm = TRUE) |
202 |
} else { |
|
203 | ! |
message(paste("precision has been reset to d = 0, as variable", d, "not present on input")) |
204 | ! |
d <- 0 |
205 |
} |
|
206 |
} |
|
207 |
} |
|
208 |
# convert xxd type of string to xx |
|
209 | 10x |
fmt <- xxd_to_xx(str = str, d = d) |
210 | ||
211 | 10x |
if (!is.null(formatting_fun)) { |
212 | 10x |
fmt <- formatting_fun(fmt) |
213 |
} |
|
214 | ||
215 | 10x |
return(fmt) |
216 |
} |
|
217 | ||
218 |
#' @name a_summarize_aval_chg_diff_j |
|
219 |
#' |
|
220 |
#' @title Analysis function 3-column presentation |
|
221 |
#' |
|
222 |
#' @description Analysis functions to produce a 1-row summary presented in |
|
223 |
#' a 3-column layout in the columns (column 1 = N, column 2 = Value, column 3 = Change).\cr |
|
224 |
#' In the difference columns, only 1 column will be presented : difference + CI\cr |
|
225 |
#' When ancova = `TRUE`, the presented statistics will be based on ANCOVA method (`s_summarize_ancova_j`).\cr |
|
226 |
#' mean and ci (both for Value (column 2) and CHG (column 3)) using statistic `lsmean_ci`\cr |
|
227 |
#' mean and ci for the difference column are based on same ANCOVA model using statistic `lsmean_diffci`\cr |
|
228 |
#' When ancova = `FALSE`, descriptive statistics will be used instead.\cr |
|
229 |
#' In the difference column, the 2-sample t-test will be used. |
|
230 |
#' |
|
231 |
#' @details See Description |
|
232 |
#' |
|
233 |
#' @inheritParams proposal_argument_convention |
|
234 |
#' @param denom (`string`)\cr choice of denominator for proportions. Options are: |
|
235 |
#' * `N`: number of records in this column/row split. |
|
236 |
#' \cr There is no check in place that the current split only has one record per subject. |
|
237 |
#' Users should be careful with this. |
|
238 |
#' * `.N_col`: number of records in this column intersection (based on alt_counts_df dataset) |
|
239 |
#' \cr (when alt_counts_df is a single record per subjects, this will match number of subjects) |
|
240 |
#' |
|
241 |
#' @param d (default = 1) \cr choice of Decimal precision. |
|
242 |
#' Note that one extra precision will be added, as means are presented. |
|
243 |
#' \cr Options are: |
|
244 |
#' * numerical(1) |
|
245 |
#' * variable name containing information on the precision, this variable |
|
246 |
#' should be available on input dataset. The content of this variable should |
|
247 |
#' then be an integer. |
|
248 |
#' |
|
249 |
#' @param ancova (`logical`)\cr If FALSE, only descriptive methods will be used. \cr |
|
250 |
#' If TRUE, ANCOVA methods will be used for each of the columns : AVAL, CHG, DIFF. \cr |
|
251 |
#' @param comp_btw_group (`logical`)\cr If TRUE, comparison between groups will be performed. |
|
252 |
#' \cr When ancova = FALSE, the estimate of between group difference (on CHG) |
|
253 |
#' will be based upon a two-sample t-test. |
|
254 |
#' \cr When ancova = TRUE, the same ANCOVA model will be used for the estimate of between group difference (on CHG). |
|
255 |
#' |
|
256 |
#' @param interaction_y (`character`)\cr Will be passed onto the `tern` function `s_ancova`, when ancova = TRUE. |
|
257 |
#' @param interaction_item (`character`)\cr Will be passed onto the `tern` function `s_ancova`, when ancova = TRUE. |
|
258 |
#' @param conf_level (`proportion`)\cr Confidence level of the interval |
|
259 |
#' @param variables (named list of strings)\cr |
|
260 |
#' list of additional analysis variables, with expected elements: |
|
261 |
#' * arm (string)\cr |
|
262 |
#' group variable, for which the covariate adjusted means of multiple groups will be summarized. |
|
263 |
#' Specifically, the first level of arm variable is taken as the reference group. |
|
264 |
#' * covariates (character)\cr |
|
265 |
#' a vector that can contain single variable names (such as 'X1'), and/or interaction terms indicated by 'X1 * X2'. |
|
266 |
#' |
|
267 |
#' |
|
268 |
#' @param format_na_str (`string`)\cr |
|
269 |
#' |
|
270 |
#' @param indatavar (`string`)\cr If not null, variable name to extra subset |
|
271 |
#' incoming df to non-missing values of this variable. |
|
272 |
#' @param multivars (`string(3)`)\cr Variables names to use in 3-col layout. |
|
273 |
#' |
|
274 |
#' @param .stats (named `list`)\cr column statistics to select for the table. |
|
275 |
#' The following column names are to be used: `col1`, `col23`, `coldiff`.\cr |
|
276 |
#' For `col1`, the following stats can be specified.\cr |
|
277 |
#' For `col23`, only `mean_ci_3d` is available. When ancova = `TRUE` these are LS Means, otherwise, arithmetic means.\cr |
|
278 |
#' For `coldiff`, only `meandiff_ci_3d` is available. When ancova = `TRUE` these |
|
279 |
#' are LS difference in means, otherwise, difference in means based upon 2-sample t-test.\cr |
|
280 |
#' @param .formats (named `list`)\cr formats for the column statistics. `xx.d` style formats can be used. |
|
281 |
#' @param .formats_fun (named `list`)\cr formatting functions for the column |
|
282 |
#' statistics, to be applied after the conversion of `xx.d` style to the |
|
283 |
#' appropriate precision. |
|
284 |
#' |
|
285 |
#' @rdname a_summarize_aval_chg_diff_j |
|
286 |
#' @return A function that can be used in an analyze function call |
|
287 |
#' @export |
|
288 |
#' |
|
289 |
#' |
|
290 |
#' @examples |
|
291 |
#' |
|
292 |
#' library(dplyr) |
|
293 |
#' |
|
294 |
#' ADEG <- data.frame( |
|
295 |
#' STUDYID = c( |
|
296 |
#' "DUMMY", "DUMMY", "DUMMY", "DUMMY", "DUMMY", |
|
297 |
#' "DUMMY", "DUMMY", "DUMMY", "DUMMY", "DUMMY" |
|
298 |
#' ), |
|
299 |
#' USUBJID = c( |
|
300 |
#' "XXXXX01", "XXXXX02", "XXXXX03", "XXXXX04", "XXXXX05", |
|
301 |
#' "XXXXX06", "XXXXX07", "XXXXX08", "XXXXX09", "XXXXX10" |
|
302 |
#' ), |
|
303 |
#' TRT01A = c( |
|
304 |
#' "ARMA", "ARMA", "ARMA", "ARMA", "ARMA", "Placebo", |
|
305 |
#' "Placebo", "Placebo", "ARMA", "ARMA" |
|
306 |
#' ), |
|
307 |
#' PARAM = c("BP", "BP", "BP", "BP", "BP", "BP", "BP", "BP", "BP", "BP"), |
|
308 |
#' AVISIT = c( |
|
309 |
#' "Visit 1", "Visit 1", "Visit 1", "Visit 1", "Visit 1", |
|
310 |
#' "Visit 1", "Visit 1", "Visit 1", "Visit 1", "Visit 1" |
|
311 |
#' ), |
|
312 |
#' AVAL = c(56, 78, 67, 87, 88, 93, 39, 87, 65, 55), |
|
313 |
#' CHG = c(2, 3, -1, 9, -2, 0, 6, -2, 5, 2) |
|
314 |
#' ) |
|
315 |
#' |
|
316 |
#' ADEG <- ADEG |> |
|
317 |
#' mutate( |
|
318 |
#' TRT01A = as.factor(TRT01A), |
|
319 |
#' STUDYID = as.factor(STUDYID) |
|
320 |
#' ) |
|
321 |
#' |
|
322 |
#' ADEG$colspan_trt <- factor(ifelse(ADEG$TRT01A == "Placebo", " ", "Active Study Agent"), |
|
323 |
#' levels = c("Active Study Agent", " ") |
|
324 |
#' ) |
|
325 |
#' ADEG$rrisk_header <- "Risk Difference (%) (95% CI)" |
|
326 |
#' ADEG$rrisk_label <- paste(ADEG$TRT01A, paste("vs", "Placebo")) |
|
327 |
#' |
|
328 |
#' colspan_trt_map <- create_colspan_map(ADEG, |
|
329 |
#' non_active_grp = "Placebo", |
|
330 |
#' non_active_grp_span_lbl = " ", |
|
331 |
#' active_grp_span_lbl = "Active Study Agent", |
|
332 |
#' colspan_var = "colspan_trt", |
|
333 |
#' trt_var = "TRT01A" |
|
334 |
#' ) |
|
335 |
#' ref_path <- c("colspan_trt", " ", "TRT01A", "Placebo") |
|
336 |
#' |
|
337 |
#' lyt <- basic_table() |> |
|
338 |
#' split_cols_by( |
|
339 |
#' "colspan_trt", |
|
340 |
#' split_fun = trim_levels_to_map(map = colspan_trt_map) |
|
341 |
#' ) |> |
|
342 |
#' split_cols_by("TRT01A") |> |
|
343 |
#' split_rows_by( |
|
344 |
#' "PARAM", |
|
345 |
#' label_pos = "topleft", |
|
346 |
#' split_label = "Blood Pressure", |
|
347 |
#' section_div = " ", |
|
348 |
#' split_fun = drop_split_levels |
|
349 |
#' ) |> |
|
350 |
#' split_rows_by( |
|
351 |
#' "AVISIT", |
|
352 |
#' label_pos = "topleft", |
|
353 |
#' split_label = "Study Visit", |
|
354 |
#' split_fun = drop_split_levels, |
|
355 |
#' child_labels = "hidden" |
|
356 |
#' ) |> |
|
357 |
#' split_cols_by_multivar( |
|
358 |
#' c("AVAL", "AVAL", "CHG"), |
|
359 |
#' varlabels = c("n/N (%)", "Mean (CI)", "CFB (CI)") |
|
360 |
#' ) |> |
|
361 |
#' split_cols_by("rrisk_header", nested = FALSE) |> |
|
362 |
#' split_cols_by( |
|
363 |
#' "TRT01A", |
|
364 |
#' split_fun = remove_split_levels("Placebo"), |
|
365 |
#' labels_var = "rrisk_label" |
|
366 |
#' ) |> |
|
367 |
#' split_cols_by_multivar(c("CHG"), varlabels = c(" ")) |> |
|
368 |
#' analyze("STUDYID", |
|
369 |
#' afun = a_summarize_aval_chg_diff_j, |
|
370 |
#' extra_args = list( |
|
371 |
#' format_na_str = "-", d = 0, |
|
372 |
#' ref_path = ref_path, variables = list(arm = "TRT01A", covariates = NULL) |
|
373 |
#' ) |
|
374 |
#' ) |
|
375 |
#' |
|
376 |
#' result <- build_table(lyt, ADEG) |
|
377 |
#' |
|
378 |
#' result |
|
379 |
#' @seealso s_summarize_ancova_j |
|
380 |
#' @family Inclusion of ANCOVA Functions |
|
381 |
a_summarize_aval_chg_diff_j <- function( |
|
382 |
df, |
|
383 |
.df_row, |
|
384 |
.spl_context, |
|
385 |
ancova = FALSE, |
|
386 |
comp_btw_group = TRUE, |
|
387 |
ref_path = NULL, |
|
388 |
.N_col, |
|
389 |
denom = c("N", ".N_col"), |
|
390 |
indatavar = NULL, |
|
391 |
d = 0, |
|
392 |
id = "USUBJID", |
|
393 |
interaction_y = FALSE, |
|
394 |
interaction_item = NULL, |
|
395 |
conf_level = 0.95, |
|
396 |
variables = list(arm = "TRT01A", covariates = NULL), |
|
397 |
format_na_str = "", |
|
398 |
.stats = list(col1 = "count_denom_frac", col23 = "mean_ci_3d", coldiff = "meandiff_ci_3d"), |
|
399 |
.formats = list(col1 = NULL, col23 = "xx.dx (xx.dx, xx.dx)", coldiff = "xx.dx (xx.dx, xx.dx)"), |
|
400 |
.formats_fun = list(col1 = jjcsformat_count_denom_fraction, col23 = jjcsformat_xx, coldiff = jjcsformat_xx), |
|
401 |
multivars = c("AVAL", "AVAL", "CHG")) { |
|
402 | 14x |
denom <- match.arg(denom) |
403 | ||
404 | 14x |
if (comp_btw_group && is.null(ref_path)) { |
405 | ! |
stop("ref_path cannot be NULL, please specify it. See ?get_ref_info for details.") |
406 |
} |
|
407 | ||
408 | 14x |
if (!(length(multivars) == 3)) { |
409 | ! |
stop("argument multivars must be of length 3.") |
410 |
} |
|
411 | ||
412 | 14x |
if (!(all(multivars %in% names(df)))) { |
413 | ! |
stop("all variables specified in multivars must be available on input dataset df.") |
414 |
} |
|
415 | ||
416 | 14x |
if (multivars[1] == multivars[2]) { |
417 | 14x |
multivars[2] <- paste0(multivars[2], "._[[2]]_.") |
418 |
} |
|
419 | ||
420 |
if ( |
|
421 | 14x |
!is.list(.stats) || |
422 | 14x |
!all(c("col1", "col23", "coldiff") %in% names(.stats)) || |
423 | 14x |
!is.list(.formats) || |
424 | 14x |
!all(c("col1", "col23", "coldiff") %in% names(.formats)) || |
425 | 14x |
!is.list(.formats_fun) || |
426 | 14x |
!all(c("col1", "col23", "coldiff") %in% names(.formats_fun)) |
427 |
) { |
|
428 | ! |
stop(paste( |
429 | ! |
"Issue a_summarize_aval_chg_diff_j \n.stats/.formats/.formats_fun must", |
430 | ! |
"be a list with names c(\"col1\", \"col23\", \"coldiff\")." |
431 |
)) |
|
432 |
} |
|
433 | ||
434 | 14x |
row_vars <- .spl_context$split |
435 | 14x |
row_val <- .spl_context$cur_col_split_val |
436 | ||
437 | 14x |
col_split <- .spl_context$cur_col_split |
438 | ||
439 |
## treatment group value/variable KEY assumption : treatment variable is the last split_col prior to |
|
440 |
## split_cols_by_multivar |
|
441 | 14x |
colvars <- col_split[[NROW(.spl_context)]] |
442 | 14x |
colvars_multivars <- which(colvars == "multivars") |
443 | 14x |
if (identical(colvars_multivars, integer(0))) { |
444 | ! |
stop("Layout must at least contain a split_cols_by_multivar call.") |
445 |
} |
|
446 | 14x |
if (colvars_multivars == 1) { |
447 | ! |
stop("Layout must at least contain a split_cols_by prior to split_cols_by_multivar call.") |
448 |
} |
|
449 |
## KEY assumption : treatment variable is the last split_col prior to split_cols_by_multivar |
|
450 | 14x |
trt_var <- .spl_context$cur_col_split[[NROW(.spl_context)]][colvars_multivars - 1] |
451 | 14x |
trt_val <- .spl_context$cur_col_split_val[[NROW(.spl_context)]][colvars_multivars - 1] |
452 | ||
453 | 14x |
mysplitlevel <- length(.spl_context$split) |
454 | 14x |
cur_lvl <- .spl_context$value[[mysplitlevel]] |
455 | 14x |
cur_col_id <- .spl_context$cur_col_id[[mysplitlevel]] |
456 | ||
457 | 14x |
cur_param <- .spl_context$value[[mysplitlevel - 1]] |
458 | ||
459 | 14x |
indiffcol <- grepl("difference", tolower(cur_col_id), fixed = TRUE) |
460 | ||
461 |
# Early return if we're not comparing between groups but we're in a difference column |
|
462 | 14x |
if (!comp_btw_group && indiffcol) { |
463 | ! |
return(rcell(NULL, format = NULL, label = cur_lvl, format_na_str = format_na_str)) |
464 |
} |
|
465 | ||
466 |
## variable that is analyzed |
|
467 | 14x |
last_val <- utils::tail(.spl_context$cur_col_split_val[[NROW(.spl_context)]], 1) |
468 | ||
469 |
# as AVAL will be utilized twice, the second call results in last_val == 'AVAL._[[2]]_.' take the first part of it |
|
470 |
# in flast_val (will become AVAL) flast_val <- stringr::str_split_1(last_val, pattern = '._')[1] |
|
471 | 14x |
flast_val <- unlist(strsplit(last_val, "._"))[1] |
472 | ||
473 | 14x |
x_stats <- NULL |
474 | 14x |
fmt_d <- NULL |
475 | ||
476 | 14x |
.in_ref_col <- FALSE |
477 | 14x |
.ref_group <- NULL |
478 | 14x |
if (comp_btw_group) { |
479 | 14x |
trt_var_refspec <- utils::tail(ref_path, n = 2)[1] |
480 | 14x |
checkmate::assert_true(identical(trt_var, trt_var_refspec)) |
481 |
# ctrl_grp |
|
482 | 14x |
ctrl_grp <- utils::tail(ref_path, n = 1) |
483 | ||
484 |
### check that ctrl_grp is a level of the treatment variable, in case riskdiff is requested |
|
485 | 14x |
if (!ctrl_grp %in% levels(df[[trt_var]])) { |
486 | ! |
stop(paste0( |
487 | ! |
"control group specification in ref_path argument (", |
488 | ! |
ctrl_grp, |
489 | ! |
") is not a level of your treatment group variable (", |
490 | ! |
trt_var, |
491 |
")." |
|
492 |
)) |
|
493 |
} |
|
494 | ||
495 | 6x |
if (trt_val == ctrl_grp) .in_ref_col <- TRUE |
496 | ||
497 | 14x |
.ref_group <- .df_row[.df_row[[trt_var]] == ctrl_grp, ] |
498 |
} |
|
499 | ||
500 | 14x |
if (last_val == multivars[1] && !indiffcol) { |
501 | 4x |
mystat <- .stats[["col1"]] |
502 | 4x |
if (is.null(.formats[["col1"]])) { |
503 | 4x |
fmt <- .formats_fun[["col1"]] |
504 |
} else { |
|
505 | ! |
fmt <- .formats[["col1"]] |
506 |
} |
|
507 | ||
508 | 4x |
x_stats <- s_aval_chg_col1( |
509 | 4x |
df = df, |
510 | 4x |
.var = flast_val, |
511 | 4x |
denom = denom, |
512 | 4x |
.N_col = .N_col, |
513 | 4x |
id = id, |
514 | 4x |
indatavar = indatavar |
515 |
) |
|
516 | 10x |
} else if (last_val %in% multivars[2:3]) { |
517 | 10x |
x_stats <- s_aval_chg_col23_diff( |
518 | 10x |
df = df, |
519 | 10x |
.var = flast_val, |
520 | 10x |
.df_row = .df_row, |
521 | 10x |
.ref_group = .ref_group, |
522 | 10x |
.in_ref_col = .in_ref_col, |
523 | 10x |
ancova = ancova, |
524 | 10x |
interaction_y = interaction_y, |
525 | 10x |
interaction_item = interaction_item, |
526 | 10x |
conf_level = conf_level, |
527 | 10x |
variables = variables, |
528 | 10x |
trt_var = trt_var, |
529 | 10x |
ctrl_grp = ctrl_grp, |
530 | 10x |
cur_param = cur_param, |
531 | 10x |
cur_lvl = cur_lvl |
532 |
) |
|
533 | ||
534 | 10x |
if (comp_btw_group && indiffcol) { |
535 | 2x |
mystat1 <- "coldiff" |
536 |
} else { |
|
537 | 8x |
mystat1 <- "col23" |
538 |
} |
|
539 | 10x |
mystat <- .stats[[mystat1]] |
540 | ||
541 | 10x |
fmt_d <- .formats[[mystat1]] |
542 | 10x |
formatting_fun <- .formats_fun[[mystat1]] |
543 | ||
544 | 10x |
fmt <- format_xxd(fmt_d, d = d, .df_row = .df_row, formatting_fun = formatting_fun) |
545 |
} |
|
546 | 14x |
x_stats <- x_stats[[mystat]] |
547 | ||
548 |
## |
|
549 | 14x |
ret <- rcell(x_stats, format = fmt, label = cur_lvl, format_na_str = format_na_str) |
550 | ||
551 | 14x |
return(ret) |
552 |
} |
1 |
#' Helpers for Processing Least Square Means |
|
2 |
#' |
|
3 |
#' @param fit result of model fitting function, e.g. [mmrm::mmrm()] or [stats::lm()]. |
|
4 |
#' @inheritParams fit_mmrm_j |
|
5 |
#' @param averages (`list`)\cr optional named list of visit levels which should be averaged |
|
6 |
#' and reported along side the single visits. |
|
7 |
#' @param weights (`string`)\cr argument from [emmeans::emmeans()], 'counterfactual' by default. |
|
8 |
#' @param specs (`list`)\cr list of least square means specifications, with |
|
9 |
#' elements `coefs` (coefficient list) and `grid` (corresponding `data.frame`). |
|
10 |
#' @param emmeans_res (`list`)\cr initial `emmeans` result from [h_get_emmeans_res()]. |
|
11 |
#' |
|
12 |
#' @name lsmeans_helpers |
|
13 |
#' @keywords internal |
|
14 |
NULL |
|
15 | ||
16 |
#' @describeIn lsmeans_helpers returns a list with |
|
17 |
#' `object` (`emmGrid` object containing `emmeans` results) and `grid` |
|
18 |
#' (`data.frame` containing the potential arm and the visit variables |
|
19 |
#' together with the sample size `n` for each combination). |
|
20 |
h_get_emmeans_res <- function(fit, vars, weights) { |
|
21 | 30x |
data_complete <- stats::model.frame(fit) |
22 | 30x |
checkmate::assert_data_frame(data_complete) |
23 | 30x |
checkmate::assert_list(vars) |
24 | ||
25 | 30x |
emmeans_object <- emmeans::emmeans(fit, data = data_complete, specs = c(vars$visit, vars$arm), weights = weights) |
26 | ||
27 |
# Save grid with renamed number of subjects column. |
|
28 | 30x |
visit_arm_grid <- emmeans_object@grid |
29 | 30x |
wgt_index <- match(".wgt.", names(visit_arm_grid)) |
30 | 30x |
names(visit_arm_grid)[wgt_index] <- "n" |
31 | 30x |
visit_arm_grid$n <- as.integer(visit_arm_grid$n) |
32 | ||
33 | 30x |
list(object = emmeans_object, grid = visit_arm_grid) |
34 |
} |
|
35 | ||
36 |
#' @describeIn lsmeans_helpers constructs average of visits specifications. |
|
37 |
h_get_average_visit_specs <- function(emmeans_res, vars, averages, fit) { |
|
38 | 4x |
visit_grid <- emmeans_res$grid[[vars$visit]] |
39 | 4x |
model_frame <- stats::model.frame(fit) |
40 | 4x |
averages_list <- list() |
41 | 4x |
visit_vec <- n_vec <- c() |
42 | 4x |
if (!is.null(vars$arm)) { |
43 | 4x |
arm_grid <- emmeans_res$grid[[vars$arm]] |
44 | 4x |
arm_vec <- c() |
45 |
} |
|
46 | 4x |
for (i in seq_along(averages)) { |
47 | 5x |
average_label <- names(averages)[i] |
48 | 5x |
visits_average <- averages[[i]] |
49 | 5x |
checkmate::assert_subset(visits_average, choices = levels(visit_grid)) |
50 | 5x |
which_visits_in_average <- visit_grid %in% visits_average |
51 | 5x |
average_coefs <- as.integer(which_visits_in_average) / length(visits_average) |
52 | 5x |
zero_coefs <- numeric(length = length(average_coefs)) |
53 | ||
54 | 5x |
if (is.null(vars$arm)) { |
55 | ! |
averages_list[[average_label]] <- average_coefs |
56 | ! |
visit_vec <- c(visit_vec, average_label) |
57 | ! |
is_in_subset <- (model_frame[[vars$visit]] %in% visits_average) |
58 | ! |
this_n <- length(unique(model_frame[is_in_subset, vars$id])) |
59 | ! |
n_vec <- c(n_vec, this_n) |
60 |
} else { |
|
61 | 5x |
for (this_arm in levels(arm_grid)) { |
62 | 10x |
this_coefs <- zero_coefs |
63 | 10x |
arm_average_label <- paste(this_arm, average_label, sep = ".") |
64 | 10x |
which_arm <- arm_grid == this_arm |
65 | 10x |
this_coefs[which_arm] <- average_coefs[which_arm] |
66 | 10x |
averages_list[[arm_average_label]] <- this_coefs |
67 | 10x |
arm_vec <- c(arm_vec, this_arm) |
68 | 10x |
visit_vec <- c(visit_vec, average_label) |
69 | 10x |
is_in_subset <- (model_frame[[vars$arm]] == this_arm) & (model_frame[[vars$visit]] %in% visits_average) |
70 | 10x |
this_n <- length(unique(model_frame[is_in_subset, vars$id])) |
71 | 10x |
n_vec <- c(n_vec, this_n) |
72 |
} |
|
73 |
} |
|
74 |
} |
|
75 | 4x |
if (is.null(vars$arm)) { |
76 | ! |
averages_grid <- data.frame(visit = visit_vec, n = n_vec) |
77 | ! |
names(averages_grid) <- c(vars$visit, "n") |
78 |
} else { |
|
79 | 4x |
averages_grid <- data.frame(arm = arm_vec, visit = visit_vec, n = n_vec) |
80 | 4x |
names(averages_grid) <- c(vars$arm, vars$visit, "n") |
81 |
} |
|
82 | 4x |
list(coefs = averages_list, grid = averages_grid) |
83 |
} |
|
84 | ||
85 |
#' @describeIn lsmeans_helpers estimates least square means as a `data.frame` |
|
86 |
#' given specifications. |
|
87 |
#' |
|
88 |
#' @note The difference here compared to the original tern.mmrm::h_get_spec_visit_estimates() |
|
89 |
#' function is that additional arguments for [emmeans::contrast()] can be passed via the |
|
90 |
#' dots (`...`) argument. |
|
91 |
#' Once this has been added to the `tern.mmrm` package then its functions can be used instead. |
|
92 |
#' |
|
93 |
#' @param tests (`flag`)\cr whether to add test results to the estimates. |
|
94 |
#' @param ... additional arguments for [emmeans::contrast()]. |
|
95 |
h_get_spec_visit_estimates <- function(emmeans_res, specs, conf_level, tests = FALSE, ...) { |
|
96 | 38x |
checkmate::assert_list(emmeans_res) |
97 | 38x |
checkmate::assert_list(specs) |
98 | 38x |
checkmate::assert_number(conf_level) |
99 | 38x |
checkmate::assert_flag(tests) |
100 | ||
101 | 38x |
conts <- emmeans::contrast(emmeans_res$object, specs$coefs, ...) |
102 | 38x |
cis <- stats::confint(conts, level = conf_level) |
103 | 38x |
res <- cbind( |
104 | 38x |
specs$grid, |
105 | 38x |
data.frame(estimate = cis$estimate, se = cis$SE, df = cis$df, lower_cl = cis$lower.CL, upper_cl = cis$upper.CL) |
106 |
) |
|
107 | 38x |
if (tests) { |
108 | 34x |
conts_df <- as.data.frame(conts) |
109 | 34x |
res$t_stat <- conts_df$t.ratio |
110 | 34x |
res$p_value <- conts_df$p.value |
111 | 34x |
res$p_value_less <- stats::pt(conts_df$t.ratio, df = conts_df$df, lower.tail = TRUE) |
112 | 34x |
res$p_value_greater <- stats::pt(conts_df$t.ratio, df = conts_df$df, lower.tail = FALSE) |
113 |
} |
|
114 | 38x |
res |
115 |
} |
|
116 | ||
117 |
#' @describeIn lsmeans_helpers estimates least square means for single visits. |
|
118 |
h_get_single_visit_estimates <- function(emmeans_res, conf_level) { |
|
119 | 30x |
checkmate::assert_list(emmeans_res) |
120 | 30x |
checkmate::assert_number(conf_level) |
121 | ||
122 | 30x |
cis <- stats::confint(emmeans_res$object, level = conf_level) |
123 | 30x |
cbind( |
124 | 30x |
emmeans_res$grid[, setdiff(names(emmeans_res$grid), "n"), drop = FALSE], |
125 | 30x |
data.frame(estimate = cis$emmean, se = cis$SE, df = cis$df, lower_cl = cis$lower.CL, upper_cl = cis$upper.CL), |
126 | 30x |
emmeans_res$grid[, "n", drop = FALSE] |
127 |
) |
|
128 |
} |
|
129 | ||
130 |
#' @describeIn lsmeans_helpers constructs `data.frame` with |
|
131 |
#' relative reduction vs. reference arm based on single visit estimates. |
|
132 |
#' @param estimates (`data.frame`)\cr single visit least square mean estimates. |
|
133 |
h_get_relative_reduc_df <- function(estimates, vars) { |
|
134 | 30x |
checkmate::assert_data_frame(estimates) |
135 | 30x |
checkmate::assert_list(vars) |
136 | ||
137 | 30x |
ref_arm_level <- estimates[[vars$arm]][1L] |
138 | 30x |
ref_estimates <- estimates[estimates[[vars$arm]] == ref_arm_level, c(vars$visit, "estimate")] |
139 | 30x |
names(ref_estimates)[2L] <- "ref" |
140 | 30x |
result <- merge(estimates[estimates[[vars$arm]] != ref_arm_level, ], ref_estimates, by = vars$visit, sort = FALSE) |
141 | 30x |
result$relative_reduc <- (result$ref - result$estimate) / result$ref |
142 | 30x |
result[, c(vars$visit, vars$arm, "relative_reduc")] |
143 |
} |
|
144 | ||
145 |
#' @describeIn lsmeans_helpers constructs single visit contrast specifications. |
|
146 |
h_single_visit_contrast_specs <- function(emmeans_res, vars) { |
|
147 | 10x |
checkmate::assert_list(emmeans_res) |
148 | 10x |
checkmate::assert_list(vars) |
149 | ||
150 | 10x |
emmeans_res$grid$index <- seq_len(nrow(emmeans_res$grid)) |
151 | ||
152 | 10x |
grid_by_visit <- split(emmeans_res$grid, emmeans_res$grid[[vars$visit]]) |
153 | ||
154 | 10x |
arm_levels <- emmeans_res$object@levels[[vars$arm]] |
155 | 10x |
ref_arm_level <- arm_levels[1L] |
156 | 10x |
zeros_coefs <- numeric(nrow(emmeans_res$grid)) |
157 | 10x |
overall_list <- list() |
158 | 10x |
arm_vec <- visit_vec <- c() |
159 | 10x |
for (j in seq_along(grid_by_visit)) { |
160 | 40x |
this_grid <- grid_by_visit[[j]] |
161 | 40x |
ref_index <- which(this_grid[[vars$arm]] == ref_arm_level) |
162 | 40x |
this_visit <- names(grid_by_visit)[j] |
163 | 40x |
this_ref_coefs <- zeros_coefs |
164 | 40x |
this_ref_coefs[this_grid$index[ref_index]] <- -1 |
165 | 40x |
this_list <- list() |
166 | 40x |
for (i in seq_len(nrow(this_grid))[-ref_index]) { |
167 | 64x |
this_coefs <- this_ref_coefs |
168 | 64x |
this_coefs[this_grid$index[i]] <- 1 |
169 | 64x |
this_arm <- as.character(this_grid[[vars$arm]][i]) |
170 | 64x |
arm_vec <- c(arm_vec, this_arm) |
171 | 64x |
visit_vec <- c(visit_vec, this_visit) |
172 | 64x |
this_label <- paste(this_arm, this_visit, sep = ".") |
173 | 64x |
this_list[[this_label]] <- this_coefs |
174 |
} |
|
175 | 40x |
overall_list <- c(overall_list, this_list) |
176 |
} |
|
177 | ||
178 | 10x |
grid <- data.frame(arm = arm_vec, visit = visit_vec) |
179 | 10x |
names(grid) <- c(vars$arm, vars$visit) |
180 | 10x |
list(coefs = overall_list, grid = grid) |
181 |
} |
|
182 | ||
183 |
#' @describeIn lsmeans_helpers constructs average visits contrast specifications, |
|
184 |
#' given the `specs` for single visit contrasts and the averages required. |
|
185 |
h_average_visit_contrast_specs <- function(specs, averages) { |
|
186 | 4x |
arm_visit_grid <- specs$grid |
187 | 4x |
arm_visit_grid$index <- seq_len(nrow(arm_visit_grid)) |
188 | 4x |
grid_by_arm <- split(arm_visit_grid, arm_visit_grid[, 1L]) |
189 | 4x |
overall_list <- list() |
190 | 4x |
arm_vec <- visit_vec <- c() |
191 | 4x |
for (j in seq_along(grid_by_arm)) { |
192 | 4x |
this_arm <- names(grid_by_arm)[j] |
193 | 4x |
this_grid <- grid_by_arm[[j]] |
194 | 4x |
for (i in seq_along(averages)) { |
195 | 5x |
average_label <- names(averages)[i] |
196 | 5x |
visits_average <- averages[[i]] |
197 | 5x |
which_visits_in_average <- this_grid[, 2L] %in% visits_average |
198 | 5x |
averaged_indices <- this_grid$index[which_visits_in_average] |
199 | 5x |
this_comb <- paste(this_arm, average_label, sep = ".") |
200 | 5x |
averaged_coefs <- colMeans(do.call(rbind, specs$coefs[averaged_indices])) |
201 | 5x |
overall_list[[this_comb]] <- averaged_coefs |
202 | 5x |
arm_vec <- c(arm_vec, this_arm) |
203 | 5x |
visit_vec <- c(visit_vec, average_label) |
204 |
} |
|
205 |
} |
|
206 | ||
207 | 4x |
grid <- data.frame(arm = arm_vec, visit = visit_vec) |
208 | 4x |
names(grid) <- names(arm_visit_grid[, c(1, 2)]) |
209 | ||
210 | 4x |
list(coefs = overall_list, grid = grid) |
211 |
} |
1 |
#' Extract Estimates from Multivariate Cox Regression Model Fit Object |
|
2 |
#' |
|
3 |
#' @param x (`coxreg.multivar`)\cr from [tern::fit_coxreg_multivar()]. |
|
4 |
#' @export |
|
5 |
#' @return A data frame containing Cox regression results with columns for term, |
|
6 |
#' coef_se (coefficient and standard error), p.value, hr (hazard ratio), |
|
7 |
#' hr_ci (confidence interval for hazard ratio), and labels (formatted term labels). |
|
8 |
#' @keywords internal |
|
9 |
#' |
|
10 |
#' @examples |
|
11 |
#' anl <- tern::tern_ex_adtte |> |
|
12 |
#' dplyr::mutate(EVENT = 1 - CNSR) |
|
13 |
#' |
|
14 |
#' variables <- list( |
|
15 |
#' time = "AVAL", |
|
16 |
#' event = "EVENT", |
|
17 |
#' arm = "ARM", |
|
18 |
#' covariates = c("SEX", "AGE") |
|
19 |
#' ) |
|
20 |
#' |
|
21 |
#' control <- tern::control_coxreg( |
|
22 |
#' conf_level = 0.9, |
|
23 |
#' ties = "efron" |
|
24 |
#' ) |
|
25 |
#' |
|
26 |
#' fit <- tern::fit_coxreg_multivar( |
|
27 |
#' data = anl, |
|
28 |
#' variables = variables, |
|
29 |
#' control = control |
|
30 |
#' ) |
|
31 |
#' |
|
32 |
#' h_extract_coxreg_multivar(fit) |
|
33 |
h_extract_coxreg_multivar <- function(x) { |
|
34 | 22x |
checkmate::assert_class(x, "coxreg.multivar") |
35 | 22x |
vars <- c(x$vars$arm, x$vars$covariates) |
36 | ||
37 |
# Extract the table. |
|
38 | 22x |
tab <- broom::tidy(x$mod, exponentiate = FALSE, conf.int = TRUE, conf.level = x$control$conf_level) |
39 | 22x |
tab$hr <- exp(tab$estimate) |
40 | 22x |
tab$hr_ci <- Map(lcl = exp(tab$conf.low), ucl = exp(tab$conf.high), f = function(lcl, ucl) c(lcl, ucl)) |
41 | 22x |
tab$coef_se <- Map(coef = tab$estimate, se = tab$std.error, f = function(coef, se) c(coef, se)) |
42 | 22x |
colnms <- c("term", "coef_se", "p.value", "hr", "hr_ci") |
43 | 22x |
tab <- tab[, colnms, drop = FALSE] |
44 | ||
45 |
# Format nicely the term labels. |
|
46 | 22x |
var_to_term <- sapply(vars, function(v) which(startsWith(tab$term, v)), USE.NAMES = TRUE, simplify = FALSE) |
47 | 22x |
tab$labels <- unlist(lapply(vars, function(v) { |
48 | 67x |
inds <- var_to_term[[v]] |
49 | 67x |
strip_term <- gsub(paste0("^", v), "", tab$term[inds]) |
50 | 67x |
this_df <- x$dat[v] |
51 | 67x |
if (is.character(this_df[[1]])) { |
52 | 1x |
this_df[[1]] <- as.factor(this_df[[1]]) |
53 |
} |
|
54 | 67x |
if (is.factor(this_df[[1]])) { |
55 | 45x |
ref_level <- levels(this_df[[1]])[1] |
56 | 45x |
var_name <- if (v == x$vars$arm) "Treatment" else labels_or_names(this_df) |
57 | 45x |
paste0(var_name, " (", strip_term, " vs. ", ref_level, ")") |
58 |
} else { |
|
59 | 22x |
labels_or_names(this_df) |
60 |
} |
|
61 |
})) |
|
62 | 22x |
tab |
63 |
} |
|
64 | ||
65 |
#' First Level Column Split Function for TEFOS03 (mmy) Table Layout |
|
66 |
#' @seealso [rtables::make_split_fun()] for details. |
|
67 |
#' @keywords internal |
|
68 |
tefos03_first_post_fun <- function(ret, spl, fulldf, .spl_context) { |
|
69 | 5x |
all_expr <- expression(TRUE) |
70 | 5x |
short_split_result(model_fit = "Model Fit", hazard_ratio = "Hazard Ratio", fulldf = fulldf) |
71 |
} |
|
72 |
tefos03_first_split_fun <- make_split_fun(post = list(tefos03_first_post_fun)) |
|
73 | ||
74 |
#' Second Level Column Split Function Factory for TEFOS03 (mmy) Table Layout |
|
75 |
#' |
|
76 |
#' @inheritParams proposal_argument_convention |
|
77 |
#' |
|
78 |
#' @return Split function to use in the TEFOS03 (mmy) and related table layouts. |
|
79 |
#' |
|
80 |
#' @seealso [tefos03_first_post_fun()] for the first level split. |
|
81 |
#' @keywords internal |
|
82 |
tefos03_second_split_fun_fct <- function(conf_level) { |
|
83 | 4x |
post_fun <- function(ret, spl, fulldf, .spl_context) { |
84 | 8x |
all_expr <- expression(TRUE) |
85 | 8x |
colset <- .spl_context[nrow(.spl_context), "value"][[1]] |
86 | 8x |
if (colset == "model_fit") { |
87 | 4x |
short_split_result(coef_se = "Coeff. (SE)", pval = "p-value", fulldf = fulldf) |
88 |
} else { |
|
89 | 4x |
short_split_result(hr_est = "Estimate", hr_ci = f_conf_level(conf_level), fulldf = fulldf) |
90 |
} |
|
91 |
} |
|
92 | 4x |
make_split_fun(post = list(post_fun)) |
93 |
} |
|
94 | ||
95 |
#' @importFrom tern fit_coxreg_multivar |
|
96 |
#' @keywords internal |
|
97 |
memoised_fit_coxreg_multivar <- memoise::memoise(fit_coxreg_multivar) |
|
98 | ||
99 |
#' Analysis Function for TEFOS03 and Related Table Layouts |
|
100 |
#' |
|
101 |
#' @inheritParams proposal_argument_convention |
|
102 |
#' @param control (`list`)\cr from [tern::control_coxreg()]. |
|
103 |
#' @param formats (`list`)\cr including `coef_se`, `hr_est`, `hr_ci` and `pval` formats. |
|
104 |
#' @param variables (`list`)\cr see [tern::fit_coxreg_multivar()] for required variable |
|
105 |
#' specifications. |
|
106 |
#' |
|
107 |
#' @keywords internal |
|
108 |
tefos03_afun <- function(df, .var, .spl_context, variables, control, formats) { |
|
109 | 21x |
this_col_split <- .spl_context[nrow(.spl_context), "cur_col_split_val"][[1]] |
110 | 21x |
model_fit <- memoised_fit_coxreg_multivar(data = df, variables = variables, control = control) |
111 | 21x |
tab <- h_extract_coxreg_multivar(model_fit) |
112 | 21x |
if (this_col_split[1] == "model_fit") { |
113 | 11x |
if (this_col_split[2] == "coef_se") { |
114 | 5x |
in_rows(.list = tab$coef_se, .formats = formats$coef_se, .labels = tab$labels) |
115 |
} else { |
|
116 | 6x |
in_rows(.list = tab$p.value, .formats = formats$pval, .labels = tab$labels) |
117 |
} |
|
118 |
} else { |
|
119 | 10x |
if (this_col_split[2] == "hr_est") { |
120 | 5x |
in_rows(.list = tab$hr, .formats = formats$hr_est, .labels = tab$labels) |
121 |
} else { |
|
122 | 5x |
in_rows(.list = tab$hr_ci, .formats = formats$hr_ci, .labels = tab$labels) |
123 |
} |
|
124 |
} |
|
125 |
} |
|
126 | ||
127 |
#' Layout Generating Function for TEFOS03 and Related Cox Regression Layouts |
|
128 |
#' |
|
129 |
#' @inheritParams proposal_argument_convention |
|
130 |
#' @inheritParams tefos03_afun |
|
131 |
#' @param var (`string`)\cr any variable from the data, because this is not used. |
|
132 |
#' |
|
133 |
#' @return `lyt` modified to add the desired cox regression table section. |
|
134 |
#' @export |
|
135 |
#' @examples |
|
136 |
#' anl <- tern::tern_ex_adtte |> |
|
137 |
#' dplyr::mutate(EVENT = 1 - CNSR) |
|
138 |
#' |
|
139 |
#' variables <- list( |
|
140 |
#' time = "AVAL", |
|
141 |
#' event = "EVENT", |
|
142 |
#' arm = "ARM", |
|
143 |
#' covariates = c("SEX", "AGE") |
|
144 |
#' ) |
|
145 |
#' |
|
146 |
#' basic_table() |> |
|
147 |
#' summarize_coxreg_multivar( |
|
148 |
#' var = "STUDYID", |
|
149 |
#' variables = variables |
|
150 |
#' ) |> |
|
151 |
#' build_table(df = anl) |
|
152 |
summarize_coxreg_multivar <- function( |
|
153 |
lyt, |
|
154 |
var, |
|
155 |
variables, |
|
156 |
control = control_coxreg(), |
|
157 |
formats = list( |
|
158 |
coef_se = jjcsformat_xx("xx.xx (xx.xx)"), |
|
159 |
hr_est = jjcsformat_xx("xx.xx"), |
|
160 |
hr_ci = jjcsformat_xx("(xx.xx, xx.xx)"), |
|
161 |
pval = jjcsformat_pval_fct(0) |
|
162 |
)) { |
|
163 | 3x |
second_split_fun <- tefos03_second_split_fun_fct(conf_level = control$conf_level) |
164 | 3x |
lyt |> |
165 | 3x |
split_cols_by(var = var, split_fun = tefos03_first_split_fun) |> |
166 | 3x |
split_cols_by(var = var, split_fun = second_split_fun) |> |
167 | 3x |
analyze( |
168 | 3x |
vars = var, |
169 | 3x |
var_labels = "Model Parameter", |
170 | 3x |
show_labels = "visible", |
171 | 3x |
afun = tefos03_afun, |
172 | 3x |
extra_args = list(variables = variables, control = control, formats = formats) |
173 |
) |
|
174 |
} |
1 |
#' `ANCOVA` Analysis |
|
2 |
#' |
|
3 |
#' Performs the `ANCOVA` analysis, separately for each visit. |
|
4 |
#' |
|
5 |
#' @param vars (named `list` of `string` or `character`)\cr specifying the variables in the `ANCOVA` analysis. |
|
6 |
#' The following elements need to be included as character vectors and match corresponding columns |
|
7 |
#' in `data`: |
|
8 |
#' |
|
9 |
#' - `response`: the response variable. |
|
10 |
#' - `covariates`: the additional covariate terms (might also include interactions). |
|
11 |
#' - `id`: the subject ID variable (not really needed for the computations but for internal logistics). |
|
12 |
#' - `arm`: the treatment group variable (factor). |
|
13 |
#' - `visit`: the visit variable (factor). |
|
14 |
#' |
|
15 |
#' Note that the `arm` variable is by default included in the model, thus should not be part of `covariates`. |
|
16 |
#' @param data (`data.frame`)\cr with all the variables specified in |
|
17 |
#' `vars`. Records with missing values in any independent variables |
|
18 |
#' will be excluded. |
|
19 |
#' @param conf_level (`proportion`)\cr confidence level of the interval. |
|
20 |
#' @param weights_emmeans (`string`)\cr argument from [emmeans::emmeans()], `'counterfactual'` by default. |
|
21 |
#' |
|
22 |
#' @return A `tern_model` object which is a list with model results: |
|
23 |
#' |
|
24 |
#' - `fit`: A list with a fitted [stats::lm()] result for each visit. |
|
25 |
#' - `mse`: Mean squared error, i.e. variance estimate, for each visit. |
|
26 |
#' - `df`: Degrees of freedom for the variance estimate for each visit. |
|
27 |
#' - `lsmeans`: This is a list with data frames `estimates` and `contrasts`. |
|
28 |
#' The attribute `weights` savse the settings used (`weights_emmeans`). |
|
29 |
#' - `vars`: The variable list. |
|
30 |
#' - `labels`: Corresponding list with variable labels extracted from `data`. |
|
31 |
#' - `ref_level`: The reference level for the arm variable, which is always the first level. |
|
32 |
#' - `treatment_levels`: The treatment levels for the arm variable. |
|
33 |
#' - `conf_level`: The confidence level which was used to construct the `lsmeans` confidence intervals. |
|
34 |
#' |
|
35 |
#' @export |
|
36 |
#' @examples |
|
37 |
#' library(mmrm) |
|
38 |
#' |
|
39 |
#' fit <- fit_ancova( |
|
40 |
#' vars = list( |
|
41 |
#' response = "FEV1", |
|
42 |
#' covariates = c("RACE", "SEX"), |
|
43 |
#' arm = "ARMCD", |
|
44 |
#' id = "USUBJID", |
|
45 |
#' visit = "AVISIT" |
|
46 |
#' ), |
|
47 |
#' data = fev_data, |
|
48 |
#' conf_level = 0.9, |
|
49 |
#' weights_emmeans = "equal" |
|
50 |
#' ) |
|
51 |
#' |
|
52 |
fit_ancova <- function( |
|
53 |
vars = list(response = "AVAL", covariates = c(), arm = "ARM", visit = "AVISIT", id = "USUBJID"), |
|
54 |
data, |
|
55 |
conf_level = 0.95, |
|
56 |
weights_emmeans = "proportional") { |
|
57 | 5x |
labels <- h_labels(vars, data) |
58 | ||
59 | 5x |
arm_levels <- levels(data[[vars$arm]]) |
60 | 5x |
ref_level <- arm_levels[1] |
61 | 5x |
trt_levels <- arm_levels[-1] |
62 | 5x |
visit_levels <- levels(data[[vars$visit]]) |
63 | 5x |
grid <- list( |
64 | 5x |
factor(arm_levels, levels = arm_levels), |
65 | 5x |
factor(rep(visit_levels, each = length(arm_levels)), levels = visit_levels) |
66 |
) |> |
|
67 | 5x |
stats::setNames(c(vars$arm, vars$visit)) |> |
68 | 5x |
as.data.frame() |
69 | 5x |
checkmate::assert_disjunct(vars$arm, vars$covariates) |
70 | 5x |
covariates_part <- paste(vars$covariates, collapse = " + ") |
71 | 5x |
formula <- if (covariates_part != "") { |
72 | 5x |
stats::as.formula(paste0(vars$response, " ~ ", covariates_part, " + ", vars$arm)) |
73 |
} else { |
|
74 | ! |
stats::as.formula(paste0(vars$response, " ~ ", vars$arm)) |
75 |
} |
|
76 | 5x |
results_by_visit <- Map( |
77 | 5x |
data = split(data, data[[vars$visit]]), |
78 | 5x |
grid = split(grid, grid[[vars$visit]]), |
79 | 5x |
f = function(data, grid) { |
80 | 20x |
fit <- stats::lm(formula = formula, data = data) |
81 | 20x |
summary_fit <- summary(fit) |
82 | 20x |
emmeans_res <- h_get_emmeans_res(fit, vars = vars["arm"], weights = weights_emmeans) |
83 | 20x |
estimates <- h_get_single_visit_estimates(emmeans_res, conf_level) |
84 | 20x |
estimates[[vars$visit]] <- grid[[vars$visit]] |
85 | 20x |
estimates <- estimates |> dplyr::relocate(!!vars$arm, !!vars$visit) |
86 | 20x |
contrasts <- h_get_spec_visit_estimates( |
87 | 20x |
emmeans_res, |
88 | 20x |
specs = list( |
89 | 20x |
coefs = "trt.vs.ctrl", |
90 | 20x |
grid = grid[-1, ] # Without the reference level row. |
91 |
), |
|
92 | 20x |
conf_level = conf_level, |
93 | 20x |
tests = TRUE, |
94 | 20x |
ref = ref_level |
95 |
) |
|
96 | 20x |
relative_reduc_df <- h_get_relative_reduc_df(estimates, vars) |
97 | 20x |
contrasts <- merge(contrasts, relative_reduc_df, by = c(vars$arm, vars$visit), sort = FALSE) |
98 | 20x |
list(fit = fit, estimates = estimates, contrasts = contrasts, mse = summary_fit$sigma^2, df = summary_fit$df[2]) |
99 |
} |
|
100 |
) |
|
101 | 5x |
lsmeans <- structure( |
102 | 5x |
list( |
103 | 5x |
estimates = do.call(rbind, c(lapply(results_by_visit, "[[", "estimates"), make.row.names = FALSE)), |
104 | 5x |
contrasts = do.call(rbind, c(lapply(results_by_visit, "[[", "contrasts"), make.row.names = FALSE)) |
105 |
), |
|
106 | 5x |
weights = weights_emmeans |
107 |
) |
|
108 | 5x |
results <- list( |
109 | 5x |
fit = lapply(results_by_visit, "[[", "fit"), |
110 | 5x |
mse = do.call(c, lapply(results_by_visit, "[[", "mse")), |
111 | 5x |
df = do.call(c, lapply(results_by_visit, "[[", "df")), |
112 | 5x |
lsmeans = lsmeans, |
113 | 5x |
vars = vars, |
114 | 5x |
labels = labels, |
115 | 5x |
ref_level = ref_level, |
116 | 5x |
treatment_levels = trt_levels, |
117 | 5x |
conf_level = conf_level |
118 |
) |
|
119 | 5x |
class(results) <- "tern_model" |
120 | 5x |
results |
121 |
} |
1 |
#' Split Function for Compliance Columns |
|
2 |
#' |
|
3 |
#' Here we just split into 3 columns for expected, received and missing visits. |
|
4 |
#' |
|
5 |
#' @inheritParams proposal_argument_convention |
|
6 |
#' @param ret (`list`)\cr result from previous split function steps. |
|
7 |
#' @param spl (`split`)\cr split object. |
|
8 |
#' @param fulldf (`data.frame`)\cr full data frame. |
|
9 |
#' @param vals (`character`)\cr values to use for the split. |
|
10 |
#' @param trim (`logical`)\cr whether to trim the values. |
|
11 |
#' |
|
12 |
#' @seealso [rtables::make_split_fun()] describing the requirements for this kind of |
|
13 |
#' post-processing function. |
|
14 |
cmp_post_fun <- function(ret, spl, fulldf, .spl_context) { |
|
15 | 1x |
short_split_result( |
16 | 1x |
expected = "Expected, N", |
17 | 1x |
received = "Received, n (%)", |
18 | 1x |
missing = "Missing, n (%)", |
19 | 1x |
fulldf = fulldf |
20 |
) |
|
21 |
} |
|
22 |
#' @rdname cmp_post_fun |
|
23 |
#' @return a split function for use with [rtables::split_rows_by] |
|
24 |
#' when creating proportion-based tables with compliance columns. |
|
25 |
#' @export |
|
26 |
cmp_split_fun <- make_split_fun(post = list(cmp_post_fun)) |
|
27 | ||
28 |
#' @title Summary Analysis Function for Compliance Columns |
|
29 |
#' @description A simple statistics function which prepares the numbers with percentages |
|
30 |
#' in the required format, for use in a split content row. The denominator here is |
|
31 |
#' from the expected visits column. |
|
32 |
#' @inheritParams proposal_argument_convention |
|
33 |
#' @param variables (`list`)\cr with variable names of logical columns for |
|
34 |
#' `expected`, `received` and `missing` visits. |
|
35 |
#' @param formats (`list`)\cr with the `count_percent` format to use for the received |
|
36 |
#' and missing visits columns. |
|
37 |
#' @return The [rtables::in_rows()] result with the counts and proportion statistics. |
|
38 |
#' @seealso [cmp_post_fun()] for the corresponding split function. |
|
39 |
#' @export |
|
40 |
cmp_cfun <- function(df, labelstr, .spl_context, variables, formats) { |
|
41 | 3x |
this_col_split <- .spl_context[nrow(.spl_context), "cur_col_split_val"][[1]] |
42 | 3x |
this_afun_col <- this_col_split[2] |
43 | ||
44 | 3x |
exp <- df[[variables$expected]] |
45 | 3x |
checkmate::assert_logical(exp) |
46 | 3x |
n_exp <- sum(exp) |
47 | 3x |
rec <- df[[variables$received]] |
48 | 3x |
mis <- df[[variables$missing]] |
49 | ||
50 | 3x |
if (this_afun_col == "expected") { |
51 | 1x |
in_rows(n_exp, .labels = labelstr, .formats = "xx.") |
52 | 2x |
} else if (this_afun_col == "received") { |
53 | 1x |
checkmate::assert_logical(rec) |
54 | 1x |
checkmate::assert_true(all(exp[rec])) |
55 | ||
56 |
# Check if count_percent format is available |
|
57 | 1x |
if (!is.null(formats) && !("count_percent" %in% names(formats))) { |
58 | ! |
stop("'count_percent' format must be provided in the formats argument") |
59 |
} |
|
60 | ||
61 | 1x |
in_rows(sum(rec) * c(1, 1 / n_exp), .formats = formats$count_percent, .labels = labelstr) |
62 | 1x |
} else if (this_afun_col == "missing") { |
63 | 1x |
checkmate::assert_logical(mis) |
64 | 1x |
checkmate::assert_true(all(exp[mis])) |
65 | 1x |
checkmate::assert_true(!any(rec[mis])) |
66 | ||
67 |
# Check if count_percent format is available |
|
68 | 1x |
if (!is.null(formats) && !("count_percent" %in% names(formats))) { |
69 | ! |
stop("'count_percent' format must be provided in the formats argument") |
70 |
} |
|
71 | ||
72 | 1x |
in_rows(sum(mis) * c(1, 1 / n_exp), .formats = formats$count_percent, .labels = labelstr) |
73 |
} |
|
74 |
} |
1 |
#' @name response_by_var |
|
2 |
#' @title Count denom fraction statistic |
|
3 |
#' |
|
4 |
#' @description Derives the count_denom_fraction statistic (i.e., 'xx /xx (xx.x percent)' ) |
|
5 |
#' Summarizes the number of unique subjects with a response = 'Y' for a given variable |
|
6 |
#' (e.g. TRTEMFL) within each category of another variable (e.g., SEX). |
|
7 |
#' Note that the denominator is derived using input df, |
|
8 |
#' in order to have these aligned with alt_source_df, it is expected that df includes all subjects. |
|
9 |
#' |
|
10 |
#' @details This is an analysis function for use within `analyze`. Arguments |
|
11 |
#' `df`, `.var` will be populated automatically by rtables during |
|
12 |
#' the tabulation process. |
|
13 |
#' |
|
14 |
#' @param df Name of dataframe being analyzed. |
|
15 |
#' @param labelstr Custom label for the variable being analyzed. |
|
16 |
#' @param .var Name of the variable being analyzed. Records with non-missing |
|
17 |
#' values will be counted in the denominator. |
|
18 |
#' @param .N_col numeric(1). The total for the current column. |
|
19 |
#' @param resp_var Name of variable, for which, records with a value of 'Y' |
|
20 |
#' will be counted in the numerator. |
|
21 |
#' @param id Name of column in df which will have patient identifiers |
|
22 |
#' @param .format Format for the count/denominator/fraction output. |
|
23 |
#' @param ... Additional arguments passed to the function. |
|
24 |
# @examples #analyze(vars='AGEGR1_DECODE', var_labels = 'Age group (years), n/Ns (%)', afun = response_by_var, |
|
25 |
# extra_args = list(resp_var = 'TRTEMFL')) |
|
26 |
#' @examples |
|
27 |
#' |
|
28 |
#' library(dplyr) |
|
29 |
#' |
|
30 |
#' ADAE <- data.frame( |
|
31 |
#' USUBJID = c( |
|
32 |
#' "XXXXX01", "XXXXX02", "XXXXX03", "XXXXX04", "XXXXX05", |
|
33 |
#' "XXXXX06", "XXXXX07", "XXXXX08", "XXXXX09", "XXXXX10" |
|
34 |
#' ), |
|
35 |
#' SEX_DECODE = c( |
|
36 |
#' "Female", "Female", "Male", "Female", "Male", |
|
37 |
#' "Female", "Male", "Female", "Male", "Female" |
|
38 |
#' ), |
|
39 |
#' TRT01A = c( |
|
40 |
#' "ARMA", "ARMB", "ARMA", "ARMB", "ARMB", |
|
41 |
#' "Placebo", "Placebo", "Placebo", "ARMA", "ARMB" |
|
42 |
#' ), |
|
43 |
#' TRTEMFL = c("Y", "Y", "N", "Y", "Y", "Y", "Y", "N", "Y", "Y") |
|
44 |
#' ) |
|
45 |
#' |
|
46 |
#' ADAE <- ADAE |> |
|
47 |
#' mutate( |
|
48 |
#' TRT01A = as.factor(TRT01A), |
|
49 |
#' SEX_DECODE = as.factor(SEX_DECODE) |
|
50 |
#' ) |
|
51 |
#' |
|
52 |
#' lyt <- basic_table() |> |
|
53 |
#' split_cols_by("TRT01A") |> |
|
54 |
#' analyze( |
|
55 |
#' vars = "SEX_DECODE", |
|
56 |
#' var_labels = "Sex, n/Ns (%)", |
|
57 |
#' show_labels = "visible", |
|
58 |
#' afun = response_by_var, |
|
59 |
#' extra_args = list(resp_var = "TRTEMFL"), |
|
60 |
#' nested = FALSE |
|
61 |
#' ) |
|
62 |
#' |
|
63 |
#' result <- build_table(lyt, ADAE) |
|
64 |
#' |
|
65 |
#' result |
|
66 |
#' @return a `RowsVerticalSection` for use by the internal tabulation machinery of `rtables` |
|
67 |
#' @export |
|
68 | ||
69 |
response_by_var <- function( |
|
70 |
df, |
|
71 |
labelstr = NULL, |
|
72 |
.var, |
|
73 |
.N_col, |
|
74 |
resp_var = NULL, |
|
75 |
id = "USUBJID", |
|
76 |
.format = jjcsformat_count_denom_fraction, |
|
77 |
...) { |
|
78 |
# Derive statistics: xx / xx (xx.x%) |
|
79 | ||
80 | 15x |
if (is.null(resp_var)) { |
81 | ! |
stop("afun response_by_var: resp_var cannot be NULL.") |
82 |
} |
|
83 | ||
84 | 15x |
resp_var_values <- unique(df[[resp_var]][!is.na(df[[resp_var]])]) |
85 | 15x |
if (is.character(df[[resp_var]]) && any(is.na(df[[resp_var]])) && all(resp_var_values == "Y")) { |
86 | ! |
stop(paste( |
87 | ! |
"afun response_by_var: not clear if missing resp_var should be considered", |
88 | ! |
"non-response. Please make it a factor with appropriate Y(/N) levels." |
89 |
)) |
|
90 |
} |
|
91 | ||
92 | 15x |
if (length(setdiff(resp_var_values, c("Y", "N"))) > 0) { |
93 | ! |
stop("afun response_by_var: resp_var must contain only Y/N values.") |
94 |
} |
|
95 | ||
96 | 15x |
df <- df[!is.na(df[[.var]]), ] |
97 | ||
98 |
if ( # nolint start |
|
99 | 15x |
(is.factor(df[[resp_var]]) && |
100 | 15x |
(identical(levels(df[[resp_var]]), c("Y", "N")) || identical(levels(df[[resp_var]]), c("N", "Y")))) || |
101 | 15x |
is.character(df[[resp_var]]) |
102 |
) { # nolint end |
|
103 |
# missing values in resp_var should be excluded, not considered as not met response subject will then not |
|
104 |
# contribute to denominator |
|
105 | 9x |
df <- df[!is.na(df[[resp_var]]), ] |
106 |
} |
|
107 | ||
108 |
## in other cases, missing values in resp_var will be considered not met response and subject will contribute to |
|
109 |
## denominator |
|
110 | 15x |
varvec <- df[[.var]] |
111 | ||
112 |
# For group summaries, varvec will be a singular value, equal to the current split, whereas for afun summaries, |
|
113 |
# varvec will consist of all possible values of var. |
|
114 | 15x |
if (!is.null(labelstr)) { |
115 | ! |
levs <- labelstr |
116 |
} else { |
|
117 | 15x |
levs <- if (is.factor(varvec)) levels(varvec) else unique(varvec) |
118 |
} |
|
119 | ||
120 | 15x |
fn <- function(levii) { |
121 | 63x |
dfii <- df[df[[.var]] == levii, ] |
122 | ||
123 | 63x |
den <- NROW(unique(dfii[, id])) |
124 | 63x |
num <- NROW(unique(dfii[dfii[[resp_var]] == "Y" & !is.na(dfii[[resp_var]]), id])) |
125 | ||
126 | 63x |
rcell(c(num * 1, den * 1, num * 1 / den), format = .format) |
127 |
} |
|
128 | ||
129 | 15x |
cls <- lapply(levs, fn) |
130 | 15x |
names(cls) <- levs |
131 | ||
132 |
# Hand off results to analyze |
|
133 | ||
134 | 15x |
in_rows(.list = cls) |
135 |
} |
1 |
#' @name a_freq_j |
|
2 |
#' |
|
3 |
#' @title Analysis/statistical function for count and percentage in core columns |
|
4 |
#' and (optional) relative risk columns |
|
5 |
#' |
|
6 |
#' @inheritParams proposal_argument_convention |
|
7 |
#' @param val (`character` or NULL)\cr |
|
8 |
#' When NULL, all levels of the incoming variable (variable used in the `analyze` call) |
|
9 |
#' will be considered.\cr |
|
10 |
#' When a single `string`, only that current level/value of the incoming variable |
|
11 |
#' will be considered.\cr |
|
12 |
#' When multiple levels, only those levels/values of the incoming variable |
|
13 |
#' will be considered.\cr |
|
14 |
#' When no values are observed (eg zero row input df), |
|
15 |
#' a row with row-label `No data reported` will be included in the table. |
|
16 |
#' @param drop_levels (`logical`)\cr If `TRUE` non-observed levels |
|
17 |
#' (based upon .df_row) will not be included.\cr |
|
18 |
#' Cannot be used together with `val`. |
|
19 |
#' @param excl_levels (`character` or NULL)\cr |
|
20 |
#' When NULL, no levels of the incoming variable (variable used in the `analyze` call) |
|
21 |
#' will be excluded.\cr |
|
22 |
#' When multiple levels, those levels/values of the incoming variable |
|
23 |
#' will be excluded.\cr |
|
24 |
#' Cannot be used together with `val`. |
|
25 |
#' @param new_levels (list(2) or NULL)\cr List of length 2.\cr |
|
26 |
#' First element : names of the new levels\cr |
|
27 |
#' Second element: list with values of the new levels.\cr |
|
28 |
#' @param new_levels_after (`logical`)\cr If `TRUE` new levels will be added after last level. |
|
29 |
#' @param denom (`string`)\cr See Details. |
|
30 |
#' @param alt_df (`dataframe`)\cr Will be derived based upon alt_df_full and denom_by within a_freq_j. |
|
31 |
#' @param parent_df (`dataframe`)\cr Will be derived within a_freq_j based |
|
32 |
#' upon the input dataframe that goes into build_table (df) and denom_by.\cr |
|
33 |
#' It is a data frame in the higher row-space than the current input df |
|
34 |
#' (which underwent row-splitting by the rtables splitting machinery). |
|
35 |
#' |
|
36 |
#' @param countsource Either `df` or `alt_df`.\cr |
|
37 |
#' When `alt_df` the counts will be based upon the alternative dataframe `alt_df`.\cr |
|
38 |
#' This is useful for subgroup processing, |
|
39 |
#' to present counts of subjects in a subgroup from the alternative dataframe. |
|
40 |
#' |
|
41 |
#' @details |
|
42 |
#' |
|
43 |
#' `denom` controls the denominator used to calculate proportions/percents. |
|
44 |
#' It must be one of \cr |
|
45 |
#' \itemize{ |
|
46 |
#' \item \strong{N_col} Column count, \cr |
|
47 |
#' \item \strong{n_df} Number of patients (based upon the main input dataframe `df`),\cr |
|
48 |
#' \item \strong{n_altdf} Number of patients from the secondary dataframe (`.alt_df_full`),\cr |
|
49 |
#' Note that argument `denom_by` will perform a row-split on the `.alt_df_full` dataframe.\cr |
|
50 |
#' It is a requirement that variables specified in `denom_by` are part of the row split specifications. \cr |
|
51 |
#' \item \strong{N_colgroup} Number of patients from the column group variable |
|
52 |
#' (note that this is based upon the input .alt_df_full dataframe).\cr |
|
53 |
#' Note that the argument `colgroup` (column variable) needs to be provided, |
|
54 |
#' as it cannot be retrieved directly from the column layout definition. |
|
55 |
#' \item \strong{n_rowdf} Number of patients from the current row-level dataframe |
|
56 |
#' (`.row_df` from the rtables splitting machinery).\cr |
|
57 |
#' \item \strong{n_parentdf} Number of patients from a higher row-level split than the current split.\cr |
|
58 |
#' This higher row-level split is specified in the argument `denom_by`.\cr |
|
59 |
#' } |
|
60 |
#' |
|
61 |
#' @return |
|
62 |
#' * `s_freq_j`: returns a list of following statistics\cr |
|
63 |
#' \itemize{ |
|
64 |
#' \item n_df |
|
65 |
#' \item n_rowdf |
|
66 |
#' \item n_parentdf |
|
67 |
#' \item n_altdf |
|
68 |
#' \item denom |
|
69 |
#' \item count |
|
70 |
#' \item count_unique |
|
71 |
#' \item count_unique_fraction |
|
72 |
#' \item count_unique_denom_fraction |
|
73 |
#' } |
|
74 |
#' |
|
75 |
#' @export |
|
76 |
#' @importFrom stats setNames |
|
77 |
s_freq_j <- function( |
|
78 |
df, |
|
79 |
.var, |
|
80 |
.df_row, |
|
81 |
val = NULL, |
|
82 |
drop_levels = FALSE, |
|
83 |
excl_levels = NULL, |
|
84 |
alt_df, |
|
85 |
parent_df, |
|
86 |
id = "USUBJID", |
|
87 |
denom = c("n_df", "n_altdf", "N_col", "n_rowdf", "n_parentdf"), |
|
88 |
.N_col, |
|
89 |
countsource = c("df", "altdf")) { |
|
90 | 459x |
if (is.na(.var) || is.null(.var)) { |
91 | ! |
stop("Argument .var cannot be NA or NULL.") |
92 |
} |
|
93 | ||
94 | 459x |
countsource <- match.arg(countsource) |
95 | ||
96 | 459x |
if (countsource == "altdf") { |
97 | ! |
df <- alt_df |
98 |
} |
|
99 | ||
100 | 459x |
.alt_df <- alt_df |
101 | ||
102 | 459x |
n1 <- length(unique(.alt_df[[id]])) |
103 | 459x |
n2 <- length(unique(df[[id]])) |
104 | ||
105 | 459x |
n3 <- length(unique(.df_row[[id]])) |
106 | ||
107 | 459x |
if (is.null(parent_df)) { |
108 | 45x |
parent_df <- df |
109 |
} |
|
110 | 459x |
n4 <- length(unique(parent_df[[id]])) |
111 | ||
112 | ||
113 | 459x |
denom <- match.arg(denom) |> switch( |
114 | 459x |
"n_altdf" = n1, |
115 | 459x |
"n_df" = n2, |
116 | 459x |
"n_rowdf" = n3, |
117 | 459x |
"N_col" = .N_col, |
118 | 459x |
"n_parentdf" = n4 |
119 |
) |
|
120 | ||
121 | 459x |
y <- list() |
122 | ||
123 | 459x |
y$n_altdf <- c("n_altdf" = n1) |
124 | 459x |
y$n_df <- c("n_df" = n2) |
125 | 459x |
y$n_rowdf <- c("n_rowdf" = n3) |
126 | 459x |
y$n_parentdf <- c("n_parentdf" = n4) |
127 | 459x |
y$denom <- c("denom" = denom) |
128 | ||
129 | 459x |
if (drop_levels) { |
130 | ! |
obs_levs <- unique(.df_row[[.var]]) |
131 | ! |
obs_levs <- intersect(levels(.df_row[[.var]]), obs_levs) |
132 | ||
133 | ! |
if (!is.null(excl_levels)) obs_levs <- setdiff(obs_levs, excl_levels) |
134 | ||
135 | ! |
if (!is.null(val)) { |
136 | ! |
stop("argument val cannot be used together with drop_levels = TRUE.") |
137 |
} |
|
138 | ! |
val <- obs_levs |
139 |
} |
|
140 | ||
141 | 459x |
if (!is.null(val)) { |
142 | 103x |
df <- df[df[[.var]] %in% val, ] |
143 | 103x |
.df_row <- .df_row[.df_row[[.var]] %in% val, ] |
144 | ||
145 | 103x |
df <- h_update_factor(df, .var, val) |
146 | 103x |
.df_row <- h_update_factor(.df_row, .var, val) |
147 |
} |
|
148 | ||
149 | 459x |
if (!is.null(excl_levels) && drop_levels == FALSE) { |
150 |
# restrict the levels to the ones specified in val argument |
|
151 | ! |
df <- df[!(df[[.var]] %in% excl_levels), ] |
152 | ! |
.df_row <- .df_row[!(.df_row[[.var]] %in% excl_levels), ] |
153 | ||
154 | ! |
df <- h_update_factor(df, .var, excl_levels = excl_levels) |
155 | ! |
.df_row <- h_update_factor(.df_row, .var, excl_levels = excl_levels) |
156 |
} |
|
157 | ||
158 | 459x |
x <- df[[.var]] |
159 | 459x |
x_unique <- unique(df[, c(.var, id)])[[.var]] |
160 | ||
161 | 459x |
if (identical(levels(df[[.var]]), no_data_to_report_str)) { |
162 | ! |
xy <- list() |
163 | ! |
nms <- c( |
164 | ! |
"count", |
165 | ! |
"count_unique", |
166 | ! |
"count_unique_fraction", |
167 | ! |
"count_unique_denom_fraction" |
168 |
) |
|
169 | ! |
xy <- replicate(length(nms), list(setNames(list(NULL), no_data_to_report_str))) |
170 | ! |
names(xy) <- nms |
171 | ! |
y <- append(y, xy) |
172 |
} else { |
|
173 | 459x |
y$count <- lapply( |
174 | 459x |
as.list(table(x, useNA = "ifany")), |
175 | 459x |
stats::setNames, |
176 | 459x |
nm = "count" |
177 |
) |
|
178 | ||
179 | 459x |
y$count_unique <- lapply( |
180 | 459x |
as.list(table(x_unique, useNA = "ifany")), |
181 | 459x |
stats::setNames, |
182 | 459x |
nm = "count_unique" |
183 |
) |
|
184 | ||
185 | 459x |
y$count_unique_fraction <- lapply( |
186 | 459x |
y$count_unique, |
187 | 459x |
function(x) { |
188 |
## we want to return - when denom = 0 |
|
189 |
## this is built into formatting function, when fraction is NA |
|
190 | 1973x |
c(x, "p" = ifelse(denom > 0, x / denom, NA)) |
191 |
} |
|
192 |
) |
|
193 | ||
194 | 459x |
y$count_unique_denom_fraction <- lapply( |
195 | 459x |
y$count_unique, |
196 | 459x |
function(x) { |
197 |
## we want to return - when denom = 0 |
|
198 |
## this is built into formatting function, when fraction is NA |
|
199 | 1973x |
c(x, "d" = denom, "p" = ifelse(denom > 0, x / denom, NA)) |
200 |
} |
|
201 |
) |
|
202 |
} |
|
203 | ||
204 | 459x |
return(y) |
205 |
} |
|
206 | ||
207 |
s_rel_risk_levii_j <- function( |
|
208 |
levii, |
|
209 |
df, |
|
210 |
.var, |
|
211 |
ref_df, |
|
212 |
ref_denom_df, |
|
213 |
.in_ref_col, |
|
214 |
curgrp_denom_df, |
|
215 |
id, |
|
216 |
variables, |
|
217 |
conf_level, |
|
218 |
method, |
|
219 |
weights_method) { |
|
220 | 335x |
dfii <- df[df[[.var]] == levii & !is.na(df[[.var]]), ] |
221 | 335x |
ref_dfii <- ref_df[ref_df[[.var]] == levii & !is.na(ref_df[[.var]]), ] |
222 | ||
223 |
# construction of df_val, based upon curgrp_denom_df, dfii |
|
224 | 335x |
df_val <- curgrp_denom_df |
225 | 335x |
df_val$rsp <- FALSE |
226 |
# subjects with value levii observed in df TRUE |
|
227 | 335x |
df_val$rsp[df_val[[id]] %in% unique(dfii[[id]])] <- TRUE |
228 | ||
229 |
# repeat for ref group, based upon ref_denom_df, ref_dfii |
|
230 | 335x |
ref_df_val <- ref_denom_df |
231 | 335x |
ref_df_val$rsp <- FALSE |
232 |
# subjects with value levii observed in ref_df TRUE |
|
233 | 335x |
ref_df_val$rsp[ref_df_val[[id]] %in% unique(ref_dfii[[id]])] <- TRUE |
234 | ||
235 |
### once 3-d version of diff_ci is available in tern::s_proportion_diff |
|
236 |
### we should call tern::s_proportion_diff directly |
|
237 | 335x |
res_ci_3d <- s_proportion_diff_j( |
238 | 335x |
df_val, |
239 | 335x |
.var = "rsp", |
240 | 335x |
.ref_group = ref_df_val, |
241 | 335x |
.in_ref_col, |
242 | 335x |
variables = variables, |
243 | 335x |
conf_level = conf_level, |
244 | 335x |
method = method, |
245 | 335x |
weights_method = weights_method |
246 | 335x |
)$diff_est_ci |
247 |
} |
|
248 | ||
249 | ||
250 |
s_rel_risk_val_j <- function( |
|
251 |
df, |
|
252 |
.var, |
|
253 |
.df_row, |
|
254 |
ctrl_grp, |
|
255 |
cur_trt_grp, |
|
256 |
trt_var, |
|
257 |
val = NULL, |
|
258 |
drop_levels = FALSE, |
|
259 |
excl_levels = NULL, |
|
260 |
denom_df, |
|
261 |
id = "USUBJID", |
|
262 |
riskdiff = TRUE, |
|
263 |
variables = list(strata = NULL), |
|
264 |
conf_level = 0.95, |
|
265 |
method = c( |
|
266 |
"waldcc", |
|
267 |
"wald", |
|
268 |
"cmh", |
|
269 |
"ha", |
|
270 |
"newcombe", |
|
271 |
"newcombecc", |
|
272 |
"strat_newcombe", |
|
273 |
"strat_newcombecc" |
|
274 |
), |
|
275 |
weights_method = "cmh") { |
|
276 | 49x |
if (drop_levels) { |
277 | ! |
obs_levs <- unique(.df_row[[.var]]) |
278 | ! |
obs_levs <- intersect(levels(.df_row[[.var]]), obs_levs) |
279 | ||
280 | ! |
if (!is.null(excl_levels)) obs_levs <- setdiff(obs_levs, excl_levels) |
281 | ||
282 | ! |
if (!is.null(val)) { |
283 | ! |
stop("argument val cannot be used together with drop_levels = TRUE, please specify one or the other.") |
284 |
} |
|
285 | ! |
val <- obs_levs |
286 |
} |
|
287 | ||
288 | 49x |
if (!is.null(val)) { |
289 |
# restrict the levels to the ones specified in val argument |
|
290 | 1x |
df <- df[df[[.var]] %in% val, ] |
291 | 1x |
.df_row <- .df_row[.df_row[[.var]] %in% val, ] |
292 | ||
293 | 1x |
df <- h_update_factor(df, .var, val) |
294 | 1x |
.df_row <- h_update_factor(.df_row, .var, val) |
295 |
} |
|
296 | ||
297 | 49x |
if (!is.null(excl_levels) && drop_levels == FALSE) { |
298 |
# restrict the levels to the ones specified in val argument |
|
299 | ! |
df <- df[!(df[[.var]] %in% excl_levels), ] |
300 | ! |
.df_row <- .df_row[!(.df_row[[.var]] %in% excl_levels), ] |
301 | ||
302 | ! |
df <- h_update_factor(df, .var, excl_levels = excl_levels) |
303 | ! |
.df_row <- h_update_factor(.df_row, .var, excl_levels = excl_levels) |
304 |
} |
|
305 | ||
306 | 49x |
levs <- levels(df[[.var]]) |
307 | ||
308 | 49x |
if (identical(levs, no_data_to_report_str)) { |
309 | ! |
riskdiff <- FALSE |
310 |
} |
|
311 | 49x |
if (!riskdiff) { |
312 | 8x |
return(list(rr_ci_3d = setNames(replicate(length(levs), list(NULL)), levs))) |
313 |
} |
|
314 |
### check on denom_df |
|
315 | 41x |
if (NROW(denom_df[[id]]) > length(unique(denom_df[[id]]))) { |
316 | ! |
stop( |
317 | ! |
"\nProblem: a_freq_j \n |
318 | ! |
Denominator has multiple records per id. \n |
319 | ! |
Please specify colgroup and/or denom_by to refine your denominator for proper relative risk derivation." |
320 |
) |
|
321 |
} |
|
322 | ||
323 |
### are we in reference column? |
|
324 | 41x |
.in_ref_col <- (cur_trt_grp == ctrl_grp) |
325 | ||
326 |
### data from reference group - df based |
|
327 | 41x |
ref_df <- get_ctrl_subset(.df_row, trt_var = trt_var, ctrl_grp = ctrl_grp) |
328 | ||
329 |
### denominator data from reference group - denom_df based |
|
330 | 41x |
ref_denom_df <- get_ctrl_subset( |
331 | 41x |
denom_df, |
332 | 41x |
trt_var = trt_var, |
333 | 41x |
ctrl_grp = ctrl_grp |
334 |
) |
|
335 | ||
336 |
# ensure this is unique record per subject |
|
337 | 41x |
ref_denom_df <- unique(ref_denom_df[, c(id, variables$strata), drop = FALSE]) |
338 | ||
339 |
### denominator data from current group - denom_df based --- |
|
340 | 41x |
curgrp_denom_df <- get_ctrl_subset( |
341 | 41x |
denom_df, |
342 | 41x |
trt_var = trt_var, |
343 | 41x |
ctrl_grp = cur_trt_grp |
344 |
) |
|
345 | ||
346 |
# ensure this is unique record per subject |
|
347 | 41x |
curgrp_denom_df <- unique(curgrp_denom_df[, c(id, variables$strata), drop = FALSE]) |
348 | ||
349 |
# calculate the stats for each of the levels in levs |
|
350 | 41x |
rr_ci_3d <- sapply( |
351 | 41x |
levs, |
352 | 41x |
s_rel_risk_levii_j, |
353 | 41x |
df = df, |
354 | 41x |
.var = .var, |
355 | 41x |
ref_df = ref_df, |
356 | 41x |
ref_denom_df = ref_denom_df, |
357 | 41x |
.in_ref_col = .in_ref_col, |
358 | 41x |
curgrp_denom_df = curgrp_denom_df, |
359 | 41x |
id = id, |
360 | 41x |
variables = variables, |
361 | 41x |
conf_level = conf_level, |
362 | 41x |
method = method, |
363 | 41x |
weights_method = weights_method, |
364 | 41x |
USE.NAMES = TRUE, |
365 | 41x |
simplify = FALSE |
366 |
) |
|
367 | 41x |
list(rr_ci_3d = rr_ci_3d) |
368 |
} |
|
369 | ||
370 | ||
371 |
#' @name a_freq_j |
|
372 |
#' |
|
373 |
#' |
|
374 |
#' @inheritParams proposal_argument_convention |
|
375 |
#' @param .stats (`character`)\cr statistics to select for the table. |
|
376 |
#' See Value for list of available statistics. |
|
377 |
#' @param riskdiff (`logical`)\cr |
|
378 |
#' When `TRUE`, risk difference calculations will be performed and |
|
379 |
#' presented (if required risk difference column splits are included).\cr |
|
380 |
#' When `FALSE`, risk difference columns will remain blank |
|
381 |
#' (if required risk difference column splits are included). |
|
382 |
#' @param ref_path (`string`)\cr Column path specifications for |
|
383 |
#' the control group for the relative risk derivation. |
|
384 |
#' @param variables Will be passed onto the relative risk function |
|
385 |
#' (internal function s_rel_risk_val_j), which is based upon [tern::s_proportion_diff()].\cr |
|
386 |
#' See `?tern::s_proportion_diff` for details. |
|
387 |
#' @param method Will be passed onto the relative risk function (internal function s_rel_risk_val_j).\cr |
|
388 |
#' @param weights_method Will be passed onto the relative risk function (internal function s_rel_risk_val_j).\cr |
|
389 |
#' @param label (`string`)\cr |
|
390 |
#' When `val` is a single `string`, |
|
391 |
#' the row label to be shown on the output can be specified using this argument.\cr |
|
392 |
#' When `val` is a `character vector`, the `label_map` argument can be specified |
|
393 |
#' to control the row-labels. |
|
394 |
#' @param labelstr An argument to ensure this function can be used |
|
395 |
#' as a `cfun` in a `summarize_row_groups` call.\cr |
|
396 |
#' It is recommended not to utilize this argument for other purposes.\cr |
|
397 |
#' The label argument could be used instead (if `val` is a single string)\cr |
|
398 |
#' An another approach could be to utilize the `label_map` argument |
|
399 |
#' to control the row labels of the incoming analysis variable. |
|
400 |
#' @param label_fstr (`string`)\cr |
|
401 |
#' a sprintf style format string. |
|
402 |
#' It can contain up to one `"%s"`, which takes the current split value and |
|
403 |
#' generates the row/column label.\cr |
|
404 |
#' It will be combined with the `labelstr` argument, |
|
405 |
#' when utilizing this function as |
|
406 |
#' a `cfun` in a `summarize_row_groups` call.\cr |
|
407 |
#' It is recommended not to utilize this argument for other purposes. |
|
408 |
#' The label argument could be used instead (if `val` is a single string)\cr |
|
409 |
#' @param label_map (`tibble`)\cr |
|
410 |
#' A mapping tibble to translate levels from the incoming variable into |
|
411 |
#' a different row label to be presented on the table.\cr |
|
412 |
#' @param .alt_df_full (`dataframe`)\cr Denominator dataset |
|
413 |
#' for fraction and relative risk calculations.\cr |
|
414 |
#' this argument gets populated by the rtables |
|
415 |
#' split machinery (see [rtables::additional_fun_params]). |
|
416 |
#' @param denom_by (`character`)\cr Variables from row-split |
|
417 |
#' to be used in the denominator derivation.\cr |
|
418 |
#' This controls both `denom = "n_parentdf"` and `denom = "n_altdf"`.\cr |
|
419 |
#' When `denom = "n_altdf"`, the denominator is derived from `.alt_df_full` |
|
420 |
#' in combination with `denom_by` argument |
|
421 |
#' @param .labels_n (named `character`)\cr |
|
422 |
#' String to control row labels for the 'n'-statistics.\cr |
|
423 |
#' Only useful when more than one 'n'-statistic is requested (rare situations only). |
|
424 |
#' @param .formats (named 'character' or 'list')\cr |
|
425 |
#' formats for the statistics. |
|
426 |
#' @param extrablankline (`logical`)\cr |
|
427 |
#' When `TRUE`, an extra blank line will be added after the last value.\cr |
|
428 |
#' Avoid using this in template scripts, use section_div = " " instead (once PR for rtables is available)\cr |
|
429 |
#' @param extrablanklineafter (`string`)\cr |
|
430 |
#' When the row-label matches the string, an extra blank line will be added after |
|
431 |
#' that value. |
|
432 |
#' @param restr_columns `character`\cr |
|
433 |
#' If not NULL, columns not defined in `restr_columns` will be blanked out. |
|
434 |
#' @param colgroup The name of the column group variable that is used as source |
|
435 |
#' for denominator calculation.\cr |
|
436 |
#' Required to be specified when `denom = "N_colgroup"`. |
|
437 |
#' @param addstr2levs string, if not NULL will be appended to the rowlabel for that level, |
|
438 |
#' eg to add ",n (percent)" at the end of the rowlabels |
|
439 |
#' |
|
440 |
#' @examples |
|
441 |
#' library(dplyr) |
|
442 |
#' |
|
443 |
#' adsl <- ex_adsl |> select("USUBJID", "SEX", "ARM") |
|
444 |
#' adae <- ex_adae |> select("USUBJID", "AEBODSYS", "AEDECOD") |
|
445 |
#' adae[["TRTEMFL"]] <- "Y" |
|
446 |
#' |
|
447 |
#' trtvar <- "ARM" |
|
448 |
#' ctrl_grp <- "B: Placebo" |
|
449 |
#' adsl$colspan_trt <- factor(ifelse(adsl[[trtvar]] == ctrl_grp, " ", "Active Study Agent"), |
|
450 |
#' levels = c("Active Study Agent", " ") |
|
451 |
#' ) |
|
452 |
#' |
|
453 |
#' adsl$rrisk_header <- "Risk Difference (%) (95% CI)" |
|
454 |
#' adsl$rrisk_label <- paste(adsl[[trtvar]], paste("vs", ctrl_grp)) |
|
455 |
#' |
|
456 |
#' adae <- adae |> left_join(adsl) |
|
457 |
#' |
|
458 |
#' colspan_trt_map <- create_colspan_map(adsl, |
|
459 |
#' non_active_grp = "B: Placebo", |
|
460 |
#' non_active_grp_span_lbl = " ", |
|
461 |
#' active_grp_span_lbl = "Active Study Agent", |
|
462 |
#' colspan_var = "colspan_trt", |
|
463 |
#' trt_var = trtvar |
|
464 |
#' ) |
|
465 |
#' |
|
466 |
#' ref_path <- c("colspan_trt", " ", trtvar, ctrl_grp) |
|
467 |
#' |
|
468 |
#' lyt <- basic_table(show_colcounts = TRUE) |> |
|
469 |
#' split_cols_by("colspan_trt", split_fun = trim_levels_to_map(map = colspan_trt_map)) |> |
|
470 |
#' split_cols_by(trtvar) |> |
|
471 |
#' split_cols_by("rrisk_header", nested = FALSE) |> |
|
472 |
#' split_cols_by(trtvar, labels_var = "rrisk_label", split_fun = remove_split_levels(ctrl_grp)) |
|
473 |
#' |
|
474 |
#' lyt1 <- lyt |> |
|
475 |
#' analyze("TRTEMFL", |
|
476 |
#' show_labels = "hidden", |
|
477 |
#' afun = a_freq_j, |
|
478 |
#' extra_args = list( |
|
479 |
#' method = "wald", |
|
480 |
#' .stats = c("count_unique_denom_fraction"), |
|
481 |
#' ref_path = ref_path |
|
482 |
#' ) |
|
483 |
#' ) |
|
484 |
#' |
|
485 |
#' result1 <- build_table(lyt1, adae, alt_counts_df = adsl) |
|
486 |
#' |
|
487 |
#' result1 |
|
488 |
#' |
|
489 |
#' x_drug_x <- list(length(unique(subset(adae, adae[[trtvar]] == "A: Drug X")[["USUBJID"]]))) |
|
490 |
#' N_x_drug_x <- length(unique(subset(adsl, adsl[[trtvar]] == "A: Drug X")[["USUBJID"]])) |
|
491 |
#' y_placebo <- list(length(unique(subset(adae, adae[[trtvar]] == ctrl_grp)[["USUBJID"]]))) |
|
492 |
#' N_y_placebo <- length(unique(subset(adsl, adsl[[trtvar]] == ctrl_grp)[["USUBJID"]])) |
|
493 |
#' |
|
494 |
#' tern::stat_propdiff_ci( |
|
495 |
#' x = x_drug_x, |
|
496 |
#' N_x = N_x_drug_x, |
|
497 |
#' y = y_placebo, |
|
498 |
#' N_y = N_y_placebo |
|
499 |
#' ) |
|
500 |
#' |
|
501 |
#' x_combo <- list(length(unique(subset(adae, adae[[trtvar]] == "C: Combination")[["USUBJID"]]))) |
|
502 |
#' N_x_combo <- length(unique(subset(adsl, adsl[[trtvar]] == "C: Combination")[["USUBJID"]])) |
|
503 |
#' |
|
504 |
#' tern::stat_propdiff_ci( |
|
505 |
#' x = x_combo, |
|
506 |
#' N_x = N_x_combo, |
|
507 |
#' y = y_placebo, |
|
508 |
#' N_y = N_y_placebo |
|
509 |
#' ) |
|
510 |
#' |
|
511 |
#' |
|
512 |
#' extra_args_rr <- list( |
|
513 |
#' denom = "n_altdf", |
|
514 |
#' denom_by = "SEX", |
|
515 |
#' riskdiff = FALSE, |
|
516 |
#' .stats = c("count_unique") |
|
517 |
#' ) |
|
518 |
#' |
|
519 |
#' extra_args_rr2 <- list( |
|
520 |
#' denom = "n_altdf", |
|
521 |
#' denom_by = "SEX", |
|
522 |
#' riskdiff = TRUE, |
|
523 |
#' ref_path = ref_path, |
|
524 |
#' method = "wald", |
|
525 |
#' .stats = c("count_unique_denom_fraction"), |
|
526 |
#' na_str = rep("NA", 3) |
|
527 |
#' ) |
|
528 |
#' |
|
529 |
#' lyt2 <- basic_table( |
|
530 |
#' top_level_section_div = " ", |
|
531 |
#' colcount_format = "N=xx" |
|
532 |
#' ) |> |
|
533 |
#' split_cols_by("colspan_trt", split_fun = trim_levels_to_map(map = colspan_trt_map)) |> |
|
534 |
#' split_cols_by(trtvar, show_colcounts = TRUE) |> |
|
535 |
#' split_cols_by("rrisk_header", nested = FALSE) |> |
|
536 |
#' split_cols_by(trtvar, |
|
537 |
#' labels_var = "rrisk_label", split_fun = remove_split_levels("B: Placebo"), |
|
538 |
#' show_colcounts = FALSE |
|
539 |
#' ) |> |
|
540 |
#' split_rows_by("SEX", split_fun = drop_split_levels) |> |
|
541 |
#' summarize_row_groups("SEX", |
|
542 |
#' cfun = a_freq_j, |
|
543 |
#' extra_args = append(extra_args_rr, list(label_fstr = "Gender: %s")) |
|
544 |
#' ) |> |
|
545 |
#' split_rows_by("TRTEMFL", |
|
546 |
#' split_fun = keep_split_levels("Y"), |
|
547 |
#' indent_mod = -1L, |
|
548 |
#' section_div = c(" ") |
|
549 |
#' ) |> |
|
550 |
#' summarize_row_groups("TRTEMFL", |
|
551 |
#' cfun = a_freq_j, |
|
552 |
#' extra_args = append(extra_args_rr2, list( |
|
553 |
#' label = |
|
554 |
#' "Subjects with >=1 AE", extrablankline = TRUE |
|
555 |
#' )) |
|
556 |
#' ) |> |
|
557 |
#' split_rows_by("AEBODSYS", |
|
558 |
#' split_label = "System Organ Class", |
|
559 |
#' split_fun = trim_levels_in_group("AEDECOD"), |
|
560 |
#' label_pos = "topleft", |
|
561 |
#' section_div = c(" "), |
|
562 |
#' nested = TRUE |
|
563 |
#' ) |> |
|
564 |
#' summarize_row_groups("AEBODSYS", |
|
565 |
#' cfun = a_freq_j, |
|
566 |
#' extra_args = extra_args_rr2 |
|
567 |
#' ) |> |
|
568 |
#' analyze("AEDECOD", |
|
569 |
#' afun = a_freq_j, |
|
570 |
#' extra_args = extra_args_rr2 |
|
571 |
#' ) |
|
572 |
#' |
|
573 |
#' result2 <- build_table(lyt2, adae, alt_counts_df = adsl) |
|
574 |
#' |
|
575 |
#' @return |
|
576 |
#' * `a_freq_j`: returns a list of requested statistics with formatted `rtables::CellValue()`.\cr |
|
577 |
#' Within the relative risk difference columns, the following stats are blanked out: |
|
578 |
#' \itemize{ |
|
579 |
#' \item any of the n-statistics (n_df, n_altdf, n_parentdf, n_rowdf, denom) |
|
580 |
#' \item count |
|
581 |
#' \item count_unique |
|
582 |
#' } |
|
583 |
#' For the others (count_unique_fraction, count_unique_denom_fraction), |
|
584 |
#' the statistic is replaced by the relative risk difference + confidence interval. |
|
585 |
#' @export |
|
586 |
a_freq_j <- function( |
|
587 |
df, |
|
588 |
labelstr = NULL, |
|
589 |
.var = NA, |
|
590 |
val = NULL, |
|
591 |
drop_levels = FALSE, |
|
592 |
excl_levels = NULL, |
|
593 |
new_levels = NULL, |
|
594 |
new_levels_after = FALSE, |
|
595 |
addstr2levs = NULL, |
|
596 |
.df_row, |
|
597 |
.spl_context, |
|
598 |
.N_col, |
|
599 |
id = "USUBJID", |
|
600 |
denom = c("N_col", "n_df", "n_altdf", "N_colgroup", "n_rowdf", "n_parentdf"), |
|
601 |
riskdiff = TRUE, |
|
602 |
ref_path = NULL, |
|
603 |
variables = list(strata = NULL), |
|
604 |
conf_level = 0.95, |
|
605 |
method = c( |
|
606 |
"wald", |
|
607 |
"waldcc", |
|
608 |
"cmh", |
|
609 |
"ha", |
|
610 |
"newcombe", |
|
611 |
"newcombecc", |
|
612 |
"strat_newcombe", |
|
613 |
"strat_newcombecc" |
|
614 |
), |
|
615 |
weights_method = "cmh", |
|
616 |
label = NULL, |
|
617 |
label_fstr = NULL, |
|
618 |
label_map = NULL, |
|
619 |
.alt_df_full = NULL, |
|
620 |
denom_by = NULL, |
|
621 |
.stats = c("count_unique_denom_fraction"), |
|
622 |
.formats = NULL, |
|
623 |
.indent_mods = NULL, |
|
624 |
na_str = rep("NA", 3), |
|
625 |
.labels_n = NULL, |
|
626 |
extrablankline = FALSE, |
|
627 |
extrablanklineafter = NULL, |
|
628 |
restr_columns = NULL, |
|
629 |
colgroup = NULL, |
|
630 |
countsource = c("df", "altdf")) { |
|
631 | 445x |
denom <- match.arg(denom) |
632 | 445x |
method <- match.arg(method) |
633 | ||
634 | 445x |
if (!is.null(labelstr) && is.na(.var)) { |
635 | ! |
stop( |
636 | ! |
"Please specify var call to summarize_row_groups when using cfun = a_freq_j, i.e.,\n", |
637 | ! |
"summarize_row_groups('varname', cfun = a_freq_j)" |
638 |
) |
|
639 |
} |
|
640 | ||
641 | 445x |
if (denom == "N_colgroup") { |
642 | 84x |
if (is.null(colgroup)) { |
643 | ! |
stop("Colgroup must be specified when denom = N_colgroup.") |
644 |
} |
|
645 | ||
646 | 84x |
checkmate::assert_character(colgroup, null.ok = FALSE, max.len = 1) |
647 | ||
648 | 84x |
if (colgroup == tail(.spl_context$cur_col_split[[1]], 1)) { |
649 | ! |
stop( |
650 | ! |
"N_colgroup cannot be used when colgroup is lowest column split." |
651 |
) |
|
652 |
} |
|
653 |
} |
|
654 | ||
655 | 445x |
check_alt_df_full(denom, c("n_altdf", "N_colgroup"), .alt_df_full) |
656 | ||
657 | 445x |
res_dataprep <- h_a_freq_dataprep( |
658 | 445x |
df = df, |
659 | 445x |
labelstr = labelstr, |
660 | 445x |
.var = .var, |
661 | 445x |
val = val, |
662 | 445x |
drop_levels = drop_levels, |
663 | 445x |
excl_levels = excl_levels, |
664 | 445x |
new_levels = new_levels, |
665 | 445x |
new_levels_after = new_levels_after, |
666 | 445x |
addstr2levs = addstr2levs, |
667 | 445x |
.df_row = .df_row, |
668 | 445x |
.spl_context = .spl_context, |
669 | 445x |
.N_col = .N_col, |
670 | 445x |
id = id, |
671 | 445x |
denom = denom, |
672 | 445x |
variables = variables, |
673 | 445x |
label = label, |
674 | 445x |
label_fstr = label_fstr, |
675 | 445x |
label_map = label_map, |
676 | 445x |
.alt_df_full = .alt_df_full, |
677 | 445x |
denom_by = denom_by, |
678 | 445x |
.stats = .stats |
679 |
) |
|
680 |
# res_dataprep is list with elements |
|
681 |
# df .df_row val |
|
682 |
# drop_levels excl_levels |
|
683 |
# alt_df parentdf new_denomdf |
|
684 |
# .stats |
|
685 |
# make these elements available in current environment |
|
686 | 445x |
df <- res_dataprep$df |
687 | 445x |
.df_row <- res_dataprep$.df_row |
688 | 445x |
val <- res_dataprep$val |
689 | 445x |
drop_levels <- res_dataprep$drop_levels |
690 | 445x |
excl_levels <- res_dataprep$excl_levels |
691 | 445x |
alt_df <- res_dataprep$alt_df |
692 | 445x |
parentdf <- res_dataprep$parentdf |
693 | 445x |
new_denomdf <- res_dataprep$new_denomdf |
694 | 445x |
.stats <- .stats |
695 | ||
696 |
## prepare for column based split |
|
697 | 445x |
col_expr <- .spl_context$cur_col_expr[[1]] |
698 |
## colid can be used to figure out if we're in the relative risk columns or not |
|
699 | 445x |
colid <- .spl_context$cur_col_id[[1]] |
700 | 445x |
inriskdiffcol <- grepl("difference", tolower(colid), fixed = TRUE) |
701 | ||
702 | 445x |
if (!is.null(colgroup)) { |
703 | 84x |
colexpr_substr <- h_colexpr_substr(colgroup, .spl_context$cur_col_expr[[1]]) |
704 | ||
705 | 84x |
if (is.null(colexpr_substr)) { |
706 | ! |
stop("\n Problem a_freq_j: incorrect colgroup specification.") |
707 |
} |
|
708 | ||
709 | 84x |
new_denomdf <- subset(.alt_df_full, eval(parse(text = colexpr_substr))) |
710 | 84x |
.df_row <- subset(.df_row, eval(parse(text = colexpr_substr))) |
711 |
} |
|
712 | ||
713 | 445x |
if (!inriskdiffcol) { |
714 | 396x |
if (denom != "N_colgroup" && !is.null(new_denomdf)) { |
715 |
### for this part : perform column split on denominator dataset |
|
716 | 312x |
new_denomdf <- subset(new_denomdf, eval(col_expr)) |
717 |
} |
|
718 | 396x |
if (denom == "N_colgroup") { |
719 | 84x |
denom <- "n_altdf" |
720 |
} |
|
721 | ||
722 | 396x |
x_stats <- s_freq_j( |
723 | 396x |
df, |
724 | 396x |
.var = .var, |
725 | 396x |
.df_row = .df_row, |
726 | 396x |
val = val, |
727 | 396x |
drop_levels = drop_levels, |
728 | 396x |
excl_levels = excl_levels, |
729 | 396x |
alt_df = new_denomdf, |
730 | 396x |
parent_df = new_denomdf, |
731 | 396x |
id = id, |
732 | 396x |
denom = denom, |
733 | 396x |
.N_col = .N_col, |
734 | 396x |
countsource = countsource |
735 |
) |
|
736 |
## remove relrisk stat from .stats |
|
737 | 396x |
.stats_adj <- .stats[!(.stats %in% "rr_ci_3d")] |
738 |
} else { |
|
739 | 49x |
if (riskdiff && is.null(ref_path)) { |
740 | ! |
stop("argument ref_path cannot be NULL.") |
741 |
} |
|
742 |
### denom N_colgroup should not be used in layout with risk diff columns |
|
743 | 49x |
if (denom == "N_colgroup") { |
744 | ! |
stop( |
745 | ! |
"denom N_colgroup cannot be used in a layout with risk diff columns." |
746 |
) |
|
747 |
} |
|
748 | 49x |
if (!riskdiff) { |
749 | 8x |
trt_var <- NULL |
750 | 8x |
ctrl_grp <- NULL |
751 | 8x |
cur_trt_grp <- NULL |
752 |
} |
|
753 | ||
754 | 49x |
if (riskdiff) { |
755 | 41x |
trt_var_refpath <- h_get_trtvar_refpath( |
756 | 41x |
ref_path, |
757 | 41x |
.spl_context, |
758 | 41x |
df |
759 |
) |
|
760 |
# trt_var_refpath is list with elements |
|
761 |
# trt_var trt_var_refspec cur_trt_grp ctrl_grp |
|
762 |
# make these elements available in current environment |
|
763 | 41x |
trt_var <- trt_var_refpath$trt_var |
764 | 41x |
trt_var_refspec <- trt_var_refpath$trt_var_refspec |
765 | 41x |
cur_trt_grp <- trt_var_refpath$cur_trt_grp |
766 | 41x |
ctrl_grp <- trt_var_refpath$ctrl_grp |
767 | ||
768 | 41x |
if (!is.null(colgroup) && trt_var == colgroup) { |
769 | ! |
stop( |
770 | ! |
"\n Problem: a_freq_j: colgroup and treatment variable from ref_path are the same. |
771 | ! |
This is not intented for usage with relative risk columns. |
772 | ! |
Either remove risk difference columns from layout, set riskdiff = FALSE, or update colgroup." |
773 |
) |
|
774 |
} |
|
775 |
} |
|
776 | ||
777 | 49x |
x_stats <- s_rel_risk_val_j( |
778 | 49x |
df, |
779 | 49x |
.var = .var, |
780 | 49x |
.df_row = .df_row, |
781 | 49x |
val = val, |
782 | 49x |
drop_levels = drop_levels, |
783 | 49x |
excl_levels = excl_levels, |
784 | 49x |
denom_df = new_denomdf, |
785 | 49x |
id = id, |
786 | 49x |
riskdiff = riskdiff, |
787 |
# treatment/ref group related arguments |
|
788 | 49x |
trt_var = trt_var, |
789 | 49x |
ctrl_grp = ctrl_grp, |
790 | 49x |
cur_trt_grp = cur_trt_grp, |
791 |
# relrisk specific arguments |
|
792 | 49x |
variables = variables, |
793 | 49x |
conf_level = conf_level, |
794 | 49x |
method = method, |
795 | 49x |
weights_method = weights_method |
796 |
) |
|
797 | ||
798 |
## this will ensure the following stats will be shown as empty column in relative risk column |
|
799 | 49x |
xy <- sapply( |
800 | 49x |
c( |
801 | 49x |
"count", |
802 | 49x |
"count_unique", |
803 | 49x |
"n_df", |
804 | 49x |
"n_altdf", |
805 | 49x |
"n_rowdf", |
806 | 49x |
"n_parentdf", |
807 | 49x |
"denom" |
808 |
), |
|
809 | 49x |
function(x) { |
810 | 343x |
stats::setNames(list(x = NULL), x) |
811 |
}, |
|
812 | 49x |
USE.NAMES = TRUE, |
813 | 49x |
simplify = FALSE |
814 |
) |
|
815 | 49x |
x_stats <- append(x_stats, xy) |
816 | ||
817 |
## restrict to relrisk stat from .stats |
|
818 |
# when both count_unique_fraction and count_unique_denom_fraction are requested, the rr_ci_3d stat is in here twice |
|
819 |
# this does not seem to introduce a problem, although might not be ideal |
|
820 |
# see further |
|
821 | 49x |
.stats_adj <- replace( |
822 | 49x |
.stats, |
823 | 49x |
.stats %in% |
824 | 49x |
c( |
825 | 49x |
"count_unique_fraction", |
826 | 49x |
"count_unique_denom_fraction", |
827 | 49x |
"fraction_count_unique_denom" |
828 |
), |
|
829 | 49x |
"rr_ci_3d" |
830 |
) |
|
831 |
} |
|
832 | ||
833 | 445x |
res_prepinrows <- h_a_freq_prepinrows( |
834 | 445x |
x_stats, |
835 | 445x |
.stats_adj, |
836 | 445x |
.formats, |
837 | 445x |
labelstr, |
838 | 445x |
label_fstr, |
839 | 445x |
label, |
840 | 445x |
.indent_mods, |
841 | 445x |
.labels_n, |
842 | 445x |
na_str |
843 |
) |
|
844 |
# res_prepinrows is list with elements |
|
845 |
# x_stats .formats .labels .indent_mods .format_na_strs |
|
846 |
# make these elements available in current environment |
|
847 | 445x |
x_stats <- res_prepinrows$x_stats |
848 | 445x |
.formats <- res_prepinrows$.formats |
849 | 445x |
.labels <- res_prepinrows$.labels |
850 | 445x |
.indent_mods <- res_prepinrows$.indent_mods |
851 | 445x |
.format_na_strs <- res_prepinrows$.format_na_strs |
852 | ||
853 |
### blank out columns not in restr_columns |
|
854 |
# get column label |
|
855 | 445x |
colid_lbl <- utils::tail( |
856 | 445x |
.spl_context$cur_col_split_val[[NROW(.spl_context)]], |
857 | 445x |
1 |
858 |
) |
|
859 | 445x |
if (!is.null(restr_columns) && !(tolower(colid_lbl) %in% tolower(restr_columns))) { |
860 | 33x |
x_stats <- lapply(x_stats, FUN = function(x) { |
861 | 96x |
NULL |
862 |
}) |
|
863 |
} |
|
864 | ||
865 |
### final step: turn requested stats into rtables rows |
|
866 | 445x |
inrows <- in_rows( |
867 | 445x |
.list = x_stats, |
868 | 445x |
.formats = .formats, |
869 | 445x |
.labels = .labels, |
870 | 445x |
.indent_mods = .indent_mods, |
871 | 445x |
.format_na_strs = .format_na_strs |
872 |
) |
|
873 | ||
874 |
### add extra blankline to the end of inrows --- as long as section_div is not working as expected |
|
875 |
# nolint start |
|
876 | 445x |
if (!is.null(inrows) && extrablankline || |
877 | 445x |
(!is.null(extrablanklineafter) && length(.labels) == 1 && .labels == extrablanklineafter)) { |
878 | 5x |
inrows <- add_blank_line_rcells(inrows) |
879 |
} # nolint end |
|
880 | ||
881 | 445x |
return(inrows) |
882 |
} |
1 |
#' @name jj_complex_scorefun |
|
2 |
#' @title Complex Scoring Function |
|
3 |
#' @description |
|
4 |
#' A function used for sorting AE tables (and others) as required. |
|
5 |
#' @details |
|
6 |
#' This sort function sorts as follows: |
|
7 |
#' Takes all the columns from a specified spanning column header (default= colspan_trt) and sorts by the last treatment |
|
8 |
#' column within this. |
|
9 |
#' If no spanning column header variable exists (e.g you have only one active treatment arm and have decided to |
|
10 |
#' remove the spanning header from your layout) it will sort by the first treatment column in your table. |
|
11 |
#' This function is not really designed for tables that have sub-columns, however if users wish to override any |
|
12 |
#' default sorting behavior, they can simply specify their own colpath to use for sorting on (default=NULL) |
|
13 |
#' @param spanningheadercolvar name of spanning header variable that defines the active treatment columns. |
|
14 |
#' If you do not have an active treatment spanning header column then user can define this as NA. |
|
15 |
#' @param usefirstcol This allows you to just use the first column of the table to sort on. |
|
16 |
#' @param colpath name of column path that is needed to sort by (default=NULL). |
|
17 |
#' This overrides other arguments if specified |
|
18 |
#' (except firstcat and lastcat which will be applied if requested on this colpath) |
|
19 |
#' @param firstcat If you wish to put any category at the top of the list despite any n's user can specify here. |
|
20 |
#' @param lastcat If you wish to put any category at the bottom of the list despite any n's user can specify here. |
|
21 |
#' @export |
|
22 |
#' @returns a function which can be used as a score function (scorefun in `sort_at_path`). |
|
23 |
# @examples #result <- sort_at_path(result, c('root', 'AEBODSYS'), scorefun = jj_complex_scorefun()) |
|
24 |
#' @examples |
|
25 |
#' ADAE <- data.frame( |
|
26 |
#' USUBJID = c( |
|
27 |
#' "XXXXX01", "XXXXX02", "XXXXX03", "XXXXX04", "XXXXX05", |
|
28 |
#' "XXXXX06", "XXXXX07", "XXXXX08", "XXXXX09", "XXXXX10" |
|
29 |
#' ), |
|
30 |
#' AEBODSYS = c( |
|
31 |
#' "SOC 1", "SOC 2", "SOC 1", "SOC 2", "SOC 2", |
|
32 |
#' "SOC 2", "SOC 2", "SOC 1", "SOC 2", "SOC 1" |
|
33 |
#' ), |
|
34 |
#' AEDECOD = c( |
|
35 |
#' "Coded Term 2", "Coded Term 1", "Coded Term 3", "Coded Term 4", |
|
36 |
#' "Coded Term 4", "Coded Term 4", "Coded Term 5", "Coded Term 3", |
|
37 |
#' "Coded Term 1", "Coded Term 2" |
|
38 |
#' ), |
|
39 |
#' TRT01A = c( |
|
40 |
#' "ARMA", "ARMB", "ARMA", "ARMB", "ARMB", |
|
41 |
#' "Placebo", "Placebo", "Placebo", "ARMA", "ARMB" |
|
42 |
#' ), |
|
43 |
#' TRTEMFL = c("Y", "Y", "N", "Y", "Y", "Y", "Y", "N", "Y", "Y") |
|
44 |
#' ) |
|
45 |
#' |
|
46 |
#' ADAE <- ADAE |> |
|
47 |
#' dplyr::mutate(TRT01A = as.factor(TRT01A)) |
|
48 |
#' |
|
49 |
#' ADAE$colspan_trt <- factor(ifelse(ADAE$TRT01A == "Placebo", " ", "Active Study Agent"), |
|
50 |
#' levels = c("Active Study Agent", " ") |
|
51 |
#' ) |
|
52 |
#' |
|
53 |
#' ADAE$rrisk_header <- "Risk Difference (%) (95% CI)" |
|
54 |
#' ADAE$rrisk_label <- paste(ADAE$TRT01A, paste("vs", "Placebo")) |
|
55 |
#' |
|
56 |
#' colspan_trt_map <- create_colspan_map(ADAE, |
|
57 |
#' non_active_grp = "Placebo", |
|
58 |
#' non_active_grp_span_lbl = " ", |
|
59 |
#' active_grp_span_lbl = "Active Study Agent", |
|
60 |
#' colspan_var = "colspan_trt", |
|
61 |
#' trt_var = "TRT01A" |
|
62 |
#' ) |
|
63 |
#' |
|
64 |
#' ref_path <- c("colspan_trt", " ", "TRT01A", "Placebo") |
|
65 |
#' |
|
66 |
#' lyt <- basic_table() |> |
|
67 |
#' split_cols_by( |
|
68 |
#' "colspan_trt", |
|
69 |
#' split_fun = trim_levels_to_map(map = colspan_trt_map) |
|
70 |
#' ) |> |
|
71 |
#' split_cols_by("TRT01A") |> |
|
72 |
#' split_cols_by("rrisk_header", nested = FALSE) |> |
|
73 |
#' split_cols_by( |
|
74 |
#' "TRT01A", |
|
75 |
#' labels_var = "rrisk_label", |
|
76 |
#' split_fun = remove_split_levels("Placebo") |
|
77 |
#' ) |> |
|
78 |
#' analyze( |
|
79 |
#' "TRTEMFL", |
|
80 |
#' a_freq_j, |
|
81 |
#' show_labels = "hidden", |
|
82 |
#' extra_args = list( |
|
83 |
#' method = "wald", |
|
84 |
#' label = "Subjects with >=1 AE", |
|
85 |
#' ref_path = ref_path, |
|
86 |
#' .stats = "count_unique_fraction" |
|
87 |
#' ) |
|
88 |
#' ) |> |
|
89 |
#' split_rows_by("AEBODSYS", |
|
90 |
#' split_label = "System Organ Class", |
|
91 |
#' split_fun = trim_levels_in_group("AEDECOD"), |
|
92 |
#' label_pos = "topleft", |
|
93 |
#' section_div = c(" "), |
|
94 |
#' nested = FALSE |
|
95 |
#' ) |> |
|
96 |
#' summarize_row_groups( |
|
97 |
#' "AEBODSYS", |
|
98 |
#' cfun = a_freq_j, |
|
99 |
#' extra_args = list( |
|
100 |
#' method = "wald", |
|
101 |
#' ref_path = ref_path, |
|
102 |
#' .stats = "count_unique_fraction" |
|
103 |
#' ) |
|
104 |
#' ) |> |
|
105 |
#' analyze( |
|
106 |
#' "AEDECOD", |
|
107 |
#' afun = a_freq_j, |
|
108 |
#' extra_args = list( |
|
109 |
#' method = "wald", |
|
110 |
#' ref_path = ref_path, |
|
111 |
#' .stats = "count_unique_fraction" |
|
112 |
#' ) |
|
113 |
#' ) |
|
114 |
#' |
|
115 |
#' result <- build_table(lyt, ADAE) |
|
116 |
#' |
|
117 |
#' result |
|
118 |
#' |
|
119 |
#' result <- sort_at_path( |
|
120 |
#' result, |
|
121 |
#' c("root", "AEBODSYS"), |
|
122 |
#' scorefun = jj_complex_scorefun() |
|
123 |
#' ) |
|
124 |
#' |
|
125 |
#' result <- sort_at_path( |
|
126 |
#' result, |
|
127 |
#' c("root", "AEBODSYS", "*", "AEDECOD"), |
|
128 |
#' scorefun = jj_complex_scorefun() |
|
129 |
#' ) |
|
130 |
#' |
|
131 |
#' result |
|
132 |
#' @rdname complex_scoring_function |
|
133 |
#' @aliases jj_complex_scorefun |
|
134 |
jj_complex_scorefun <- function( |
|
135 |
spanningheadercolvar = "colspan_trt", |
|
136 |
usefirstcol = FALSE, |
|
137 |
colpath = NULL, |
|
138 |
firstcat = NULL, |
|
139 |
lastcat = NULL) { |
|
140 | 12x |
paths <- NULL |
141 | ||
142 | 12x |
function(tt) { |
143 | 225x |
if (is.null(paths)) { |
144 | 12x |
paths <<- col_paths(tt) |
145 | 12x |
if (is.null(colpath)) { |
146 | 10x |
if (is.na(spanningheadercolvar)) { |
147 | 6x |
atrt_paths <- vapply(paths, function(pth) pth[[1]] != " ", TRUE) |
148 | 6x |
use_paths <- atrt_paths |
149 | 6x |
act_trt_lst <- paths[use_paths] |
150 | 6x |
first_list <- sapply(act_trt_lst, utils::head, 1) |
151 | 6x |
first_at_value <- utils::head(first_list, n = 1) |
152 | 6x |
colpath <<- first_at_value |
153 |
} else { |
|
154 | 4x |
atrt_paths <- vapply(paths, function(pth) pth[[1]] == spanningheadercolvar && pth[[2]] != " ", TRUE) |
155 | 4x |
use_paths <- atrt_paths |
156 | 4x |
act_trt_lst <- paths[use_paths] |
157 | 4x |
last_list <- sapply(act_trt_lst, utils::tail, 1) |
158 | 4x |
last_at_value <- utils::tail(last_list, n = 1) |
159 | 4x |
colpath <<- last_at_value |
160 |
} |
|
161 | ||
162 | 10x |
if (usefirstcol) { |
163 | 4x |
atrt_paths <- vapply(paths, function(pth) pth[[1]] != " ", TRUE) |
164 | 4x |
use_paths <- atrt_paths |
165 | 4x |
act_trt_lst <- paths[use_paths] |
166 | 4x |
first_list <- sapply(act_trt_lst, utils::head, 1) |
167 | 4x |
first_at_value <- utils::head(first_list, n = 1) |
168 | 4x |
colpath <<- first_at_value |
169 |
} |
|
170 |
} |
|
171 |
} |
|
172 | 225x |
score <- unlist(cell_values(tt, colpath = colpath), use.names = FALSE)[1] |
173 | ||
174 | 225x |
if (length(score) == 0) { |
175 | ! |
score <- 1 |
176 |
} |
|
177 | ||
178 | 225x |
if (!is.null(firstcat)) { |
179 | 30x |
if (obj_name(tt) == firstcat) { |
180 | 3x |
score <- Inf |
181 |
} |
|
182 |
} |
|
183 | 225x |
if (!is.null(lastcat)) { |
184 | 30x |
if (obj_name(tt) == lastcat) { |
185 | 3x |
score <- -99999 |
186 |
} |
|
187 |
} |
|
188 | 225x |
if (obj_name(tt) == " ") { |
189 | ! |
score <- -Inf |
190 |
} |
|
191 | ||
192 | 225x |
return(score) |
193 |
} |
|
194 |
} |
1 |
#' @name jjcs_num_formats |
|
2 |
#' |
|
3 |
#' @title Numeric Formatting Function |
|
4 |
#' |
|
5 |
#' @description |
|
6 |
#' Formatting setter for selected numerical statistics |
|
7 |
#' |
|
8 |
#' @param d precision of individual values |
|
9 |
#' @param cap cap to numerical precision (d > cap -- will use precision as if cap was specified as precision) |
|
10 |
#' |
|
11 |
#' @return list: |
|
12 |
#' - fmt : named vector with formatting function (jjcsformat_xx) for numerical stats: range, median, mean_sd, sd |
|
13 |
#' - spec : named vector with formatting specifications for numerical stats: range, median, mean_sd, sd |
|
14 |
#' @export |
|
15 |
#' @examples |
|
16 |
#' P1_precision <- jjcs_num_formats(d=0)$fmt |
|
17 |
#' jjcs_num_formats(2)$fmt |
|
18 |
#' jjcs_num_formats(2)$spec |
|
19 |
jjcs_num_formats <- function(d, cap = 4) { |
|
20 | 6x |
fmt_xx <- function(d, cap) { |
21 | 22x |
prec <- NULL |
22 |
### report as is |
|
23 | 22x |
if (is.na(d)) { |
24 | 5x |
return("xx") |
25 |
} |
|
26 | 17x |
checkmate::assertCount(d) |
27 | ||
28 |
### set a cap to number of decimal precision |
|
29 | 15x |
d <- min(d, cap) |
30 | 14x |
if (d > 0) prec <- paste0(prec, paste0(rep("x", d), collapse = "")) |
31 | 15x |
if (d >= 0) prec <- paste0("xx.", prec) |
32 | ||
33 | 15x |
return(prec) |
34 |
} |
|
35 | ||
36 | 6x |
fmt_rng <- function(d, cap) { |
37 | 6x |
prec <- fmt_xx(d, cap = cap) |
38 | 4x |
fmt <- list() |
39 | 4x |
fmt$spec <- paste0(prec, ", ", prec) |
40 | 4x |
fmt$fmt <- jjcsformat_xx(fmt$spec) |
41 | ||
42 | 4x |
return(fmt) |
43 |
} |
|
44 | ||
45 | 6x |
fmt_median <- function(d, cap) { |
46 | 4x |
prec <- fmt_xx(d, cap = cap) |
47 | 4x |
fmt <- list() |
48 | 4x |
fmt$spec <- prec |
49 | 4x |
fmt$fmt <- jjcsformat_xx(fmt$spec) |
50 | 4x |
return(fmt) |
51 |
} |
|
52 | ||
53 | 6x |
fmt_meansd <- function(d, cap) { |
54 | 4x |
prec1 <- fmt_xx(d, cap = cap) |
55 | 4x |
prec2 <- fmt_xx(d + 1, cap = cap + 1) |
56 | 4x |
fmt <- list() |
57 | 4x |
fmt$spec <- paste0(prec1, " (", prec2, ")") |
58 | 4x |
fmt$fmt <- jjcsformat_xx(fmt$spec) |
59 | 4x |
return(fmt) |
60 |
} |
|
61 | 6x |
fmt_sd <- function(d, cap) { |
62 | 4x |
prec <- fmt_xx(d, cap) |
63 | 4x |
fmt <- list() |
64 | 4x |
fmt$spec <- prec |
65 | 4x |
fmt$fmt <- jjcsformat_xx(fmt$spec) |
66 | 4x |
return(fmt) |
67 |
} |
|
68 | ||
69 |
## apply formats for each of the stats with appropriate d and cap |
|
70 | 6x |
xfmt_rng <- fmt_rng(d, cap = cap) |
71 | 4x |
xfmt_meansd <- fmt_meansd(d + 1, cap = cap + 1) |
72 | 4x |
xfmt_sd <- fmt_sd(d + 2, cap = cap + 2) |
73 | 4x |
xfmt_median <- fmt_median(d + 1, cap = cap + 1) |
74 | ||
75 | 4x |
fmts <- list() |
76 | ||
77 | 4x |
fmts$fmt <- c(range = xfmt_rng$fmt, mean_sd = xfmt_meansd$fmt, sd = xfmt_sd$fmt, median = xfmt_median$fmt) |
78 | ||
79 | 4x |
fmts$spec <- c(range = xfmt_rng$spec, mean_sd = xfmt_meansd$spec, sd = xfmt_sd$spec, median = xfmt_median$spec) |
80 | ||
81 | 4x |
return(fmts) |
82 |
} |
1 |
#' Difference test for two proportions |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge('stable')` |
|
4 |
#' |
|
5 |
#' The analysis function [a_test_proportion_diff()] can be used to create a layout element to test |
|
6 |
#' the difference between two proportions. The primary analysis variable, `vars`, indicates whether a |
|
7 |
#' response has occurred for each record. See the `method` parameter for options of methods to use |
|
8 |
#' to calculate the p-value. Additionally, a stratification variable can be supplied via the `strata` |
|
9 |
#' element of the `variables` argument. The argument `alternative` specifies the direction of the |
|
10 |
#' alternative hypothesis. |
|
11 |
#' |
|
12 |
#' @inheritParams proposal_argument_convention |
|
13 |
#' @param method (`string`)\cr one of `chisq`, `cmh`, `fisher`; specifies the test used |
|
14 |
#' to calculate the p-value. |
|
15 |
#' @param .stats (`character`)\cr statistics to select for the table. |
|
16 |
#' |
|
17 |
#' @seealso [h_prop_diff_test] |
|
18 |
#' |
|
19 |
#' @note These functions have been forked from the `tern` package. Additional features are: |
|
20 |
#' |
|
21 |
#' * Additional `alternative` argument for the sidedness of the test. |
|
22 |
#' * Additional `ref_path` argument for flexible reference column path specification. |
|
23 |
#' |
|
24 |
#' @name prop_diff_test |
|
25 |
#' @order 1 |
|
26 |
NULL |
|
27 | ||
28 |
#' @describeIn prop_diff_test Statistics function which tests the difference between two proportions. |
|
29 |
#' |
|
30 |
#' @return |
|
31 |
#' * `s_test_proportion_diff()` returns a named `list` with a single item `pval` with an attribute `label` |
|
32 |
#' describing the method used. The p-value tests the null hypothesis that proportions in two groups are the same. |
|
33 |
#' |
|
34 |
#' @keywords internal |
|
35 |
s_test_proportion_diff <- function( |
|
36 |
df, |
|
37 |
.var, |
|
38 |
.ref_group, |
|
39 |
.in_ref_col, |
|
40 |
variables = list(strata = NULL), |
|
41 |
method = c("chisq", "fisher", "cmh"), |
|
42 |
alternative = c("two.sided", "less", "greater")) { |
|
43 | 8x |
method <- match.arg(method) |
44 | 8x |
alternative <- match.arg(alternative) |
45 | 8x |
y <- list(pval = list()) |
46 | ||
47 | 8x |
if (!.in_ref_col) { |
48 | 6x |
assert_df_with_variables(df, list(rsp = .var)) |
49 | 6x |
assert_df_with_variables(.ref_group, list(rsp = .var)) |
50 | 6x |
rsp <- factor(c(.ref_group[[.var]], df[[.var]]), levels = c("TRUE", "FALSE")) |
51 | 6x |
grp <- factor(rep(c("ref", "Not-ref"), c(nrow(.ref_group), nrow(df))), levels = c("ref", "Not-ref")) |
52 | ||
53 | 6x |
if (!is.null(variables$strata) || method == "cmh") { |
54 | 3x |
strata <- variables$strata |
55 | 3x |
checkmate::assert_false(is.null(strata)) |
56 | 3x |
strata_vars <- stats::setNames(as.list(strata), strata) |
57 | 3x |
assert_df_with_variables(df, strata_vars) |
58 | 3x |
assert_df_with_variables(.ref_group, strata_vars) |
59 | 3x |
strata <- c(interaction(.ref_group[strata]), interaction(df[strata])) |
60 |
} |
|
61 | ||
62 | 6x |
tbl <- switch(method, |
63 | 6x |
cmh = table(grp, rsp, strata), |
64 | 6x |
table(grp, rsp) |
65 |
) |
|
66 | ||
67 | 6x |
y$pval <- switch(method, |
68 | 6x |
chisq = prop_chisq(tbl, alternative), |
69 | 6x |
cmh = prop_cmh(tbl, alternative), |
70 | 6x |
fisher = prop_fisher(tbl, alternative) |
71 |
) |
|
72 |
} |
|
73 | ||
74 | 8x |
y$pval <- with_label(y$pval, d_test_proportion_diff_j(method, alternative)) |
75 | 8x |
y |
76 |
} |
|
77 | ||
78 |
#' Description of the difference test between two proportions |
|
79 |
#' |
|
80 |
#' @description `r lifecycle::badge('stable')` |
|
81 |
#' |
|
82 |
#' This is an auxiliary function that describes the analysis in `s_test_proportion_diff`. |
|
83 |
#' |
|
84 |
#' @inheritParams s_test_proportion_diff |
|
85 |
#' |
|
86 |
#' @return A `string` describing the test from which the p-value is derived. |
|
87 |
#' |
|
88 |
#' @export |
|
89 |
d_test_proportion_diff_j <- function(method, alternative) { |
|
90 | 8x |
checkmate::assert_string(method) |
91 | 8x |
meth_part <- switch(method, |
92 | 8x |
chisq = "Chi-Squared Test", |
93 | 8x |
cmh = "Cochran-Mantel-Haenszel Test", |
94 | 8x |
fisher = "Fisher's Exact Test", |
95 | 8x |
stop(paste(method, "does not have a description")) |
96 |
) |
|
97 | 8x |
alt_part <- switch(alternative, |
98 | 8x |
two.sided = "", |
99 | 8x |
less = ", 1-sided, direction less", |
100 | 8x |
greater = ", 1-sided, direction greater" |
101 |
) |
|
102 | 8x |
paste0("p-value (", meth_part, alt_part, ")") |
103 |
} |
|
104 | ||
105 |
#' @describeIn prop_diff_test Formatted analysis function which is used as `afun` |
|
106 |
#' |
|
107 |
#' @return |
|
108 |
#' * `a_test_proportion_diff()` returns the corresponding list with formatted [rtables::CellValue()]. |
|
109 |
#' |
|
110 |
#' @examples |
|
111 |
#' dta <- data.frame( |
|
112 |
#' rsp = sample(c(TRUE, FALSE), 100, TRUE), |
|
113 |
#' grp = factor(rep(c("A", "B"), each = 50)), |
|
114 |
#' strata = factor(rep(c("V", "W", "X", "Y", "Z"), each = 20)) |
|
115 |
#' ) |
|
116 |
#' |
|
117 |
#' l <- basic_table() |> |
|
118 |
#' split_cols_by(var = "grp") |> |
|
119 |
#' analyze( |
|
120 |
#' vars = "rsp", |
|
121 |
#' afun = a_test_proportion_diff, |
|
122 |
#' show_labels = "hidden", |
|
123 |
#' extra_args = list( |
|
124 |
#' method = "cmh", |
|
125 |
#' variables = list(strata = "strata"), |
|
126 |
#' ref_path = c("grp", "B") |
|
127 |
#' ) |
|
128 |
#' ) |
|
129 |
#' |
|
130 |
#' build_table(l, df = dta) |
|
131 |
#' |
|
132 |
#' @export |
|
133 |
#' @order 2 |
|
134 |
a_test_proportion_diff <- function( |
|
135 |
df, |
|
136 |
.var, |
|
137 |
ref_path, |
|
138 |
.spl_context, |
|
139 |
..., |
|
140 |
.stats = NULL, |
|
141 |
.formats = NULL, |
|
142 |
.labels = NULL, |
|
143 |
.indent_mods = NULL) { |
|
144 |
# Check for additional parameters to the statistics function |
|
145 | 2x |
dots_extra_args <- list(...) |
146 | ||
147 |
# Only support default stats, not custom stats |
|
148 | 2x |
.stats <- .split_std_from_custom_stats(.stats)$default_stats |
149 | ||
150 |
# Obtain reference column information |
|
151 | 2x |
ref <- get_ref_info(ref_path, .spl_context) |
152 | ||
153 |
# Apply statistics function |
|
154 | 2x |
x_stats <- .apply_stat_functions( |
155 | 2x |
default_stat_fnc = s_test_proportion_diff, |
156 | 2x |
custom_stat_fnc_list = NULL, |
157 | 2x |
args_list = c( |
158 | 2x |
df = list(df), |
159 | 2x |
.var = .var, |
160 | 2x |
.ref_group = list(ref$ref_group), |
161 | 2x |
.in_ref_col = ref$in_ref_col, |
162 | 2x |
dots_extra_args |
163 |
) |
|
164 |
) |
|
165 | ||
166 |
# Format according to specifications |
|
167 | 2x |
format_stats( |
168 | 2x |
x_stats, |
169 | 2x |
method_groups = "test_proportion_diff", |
170 | 2x |
stats_in = .stats, |
171 | 2x |
formats_in = .formats, |
172 | 2x |
labels_in = .labels, |
173 | 2x |
indents_in = .indent_mods |
174 |
) |
|
175 |
} |
|
176 | ||
177 |
#' Helper functions to test proportion differences |
|
178 |
#' |
|
179 |
#' Helper functions to implement various tests on the difference between two proportions. |
|
180 |
#' |
|
181 |
#' @param tbl (`matrix`)\cr matrix with two groups in rows and the binary response (`TRUE`/`FALSE`) in columns. |
|
182 |
#' |
|
183 |
#' @return A p-value. |
|
184 |
#' |
|
185 |
#' @seealso [prop_diff_test()] for implementation of these helper functions. |
|
186 |
#' |
|
187 |
#' @name h_prop_diff_test |
|
188 |
NULL |
|
189 | ||
190 |
#' @describeIn h_prop_diff_test Performs Chi-Squared test. Internally calls [stats::prop.test()]. |
|
191 |
#' |
|
192 |
#' @keywords internal |
|
193 |
prop_chisq <- function(tbl, alternative) { |
|
194 | 3x |
checkmate::assert_integer(c(ncol(tbl), nrow(tbl)), lower = 2, upper = 2) |
195 | 3x |
tbl <- tbl[, c("TRUE", "FALSE")] |
196 | 3x |
if (any(colSums(tbl) == 0)) { |
197 | ! |
return(1) |
198 |
} |
|
199 | 3x |
stats::prop.test(tbl, correct = FALSE, alternative = alternative)$p.value |
200 |
} |
|
201 | ||
202 |
#' @describeIn h_prop_diff_test Performs stratified Cochran-Mantel-Haenszel test. |
|
203 |
#' Internally calls [stats::mantelhaen.test()]. |
|
204 |
#' |
|
205 |
#' @note strata with less than five observations will result in a warning and |
|
206 |
#' possibly incorrect results; strata with less than two observations are |
|
207 |
#' automatically discarded. |
|
208 |
#' |
|
209 |
#' @param ary (`array`, 3 dimensions)\cr array with two groups in rows, the binary response |
|
210 |
#' (`TRUE`/`FALSE`) in columns, and the strata in the third dimension. |
|
211 |
#' |
|
212 |
#' @keywords internal |
|
213 |
prop_cmh <- function(ary, alternative) { |
|
214 | 9x |
checkmate::assert_array(ary) |
215 | 9x |
checkmate::assert_integer(c(ncol(ary), nrow(ary)), lower = 2, upper = 2) |
216 | 9x |
checkmate::assert_integer(length(dim(ary)), lower = 3, upper = 3) |
217 | 9x |
strata_sizes <- apply(ary, MARGIN = 3, sum) |
218 | 9x |
if (any(strata_sizes < 5)) { |
219 | 3x |
warning("<5 data points in some strata. CMH test may be incorrect.") |
220 | 3x |
ary <- ary[, , strata_sizes > 1] |
221 |
} |
|
222 | 9x |
stats::mantelhaen.test(ary, correct = FALSE, alternative = alternative)$p.value |
223 |
} |
|
224 | ||
225 |
#' @describeIn h_prop_diff_test Performs the Fisher's exact test. Internally calls [stats::fisher.test()]. |
|
226 |
#' |
|
227 |
#' @keywords internal |
|
228 |
prop_fisher <- function(tbl, alternative) { |
|
229 | 6x |
checkmate::assert_integer(c(ncol(tbl), nrow(tbl)), lower = 2, upper = 2) |
230 | 6x |
tbl <- tbl[, c("TRUE", "FALSE")] |
231 | 6x |
stats::fisher.test(tbl, alternative = alternative)$p.value |
232 |
} |
1 |
#' Get default statistical methods and their associated formats, labels, and indent modifiers |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge('experimental')` |
|
4 |
#' |
|
5 |
#' Utility functions to get valid statistic methods for different method groups |
|
6 |
#' (`.stats`) and their associated formats (`.formats`), labels (`.labels`), and indent modifiers |
|
7 |
#' (`.indent_mods`). This utility is used across `junco`, but some of its working principles can be |
|
8 |
#' seen in [tern::analyze_vars()]. See notes to understand why this is experimental. |
|
9 |
#' |
|
10 |
#' @param stats (`character`)\cr statistical methods to return defaults for. |
|
11 |
#' @param levels_per_stats (named `list` of `character` or `NULL`)\cr named list where the name of each element is a |
|
12 |
#' statistic from `stats` and each element is the levels of a `factor` or `character` variable (or variable name), |
|
13 |
#' each corresponding to a single row, for which the named statistic should be calculated for. If a statistic is only |
|
14 |
#' calculated once (one row), the element can be either `NULL` or the name of the statistic. Each list element will be |
|
15 |
#' flattened such that the names of the list elements returned by the function have the format `statistic.level` (or |
|
16 |
#' just `statistic` for statistics calculated for a single row). Defaults to `NULL`. |
|
17 |
#' |
|
18 |
#' @details |
|
19 |
#' Current choices for `type` are `counts` and `numeric` for [tern::analyze_vars()] and affect `junco_get_stats()`. |
|
20 |
#' |
|
21 |
#' @note |
|
22 |
#' These defaults are experimental because we use the names of functions to retrieve the default |
|
23 |
#' statistics. This should be generalized in groups of methods according to more reasonable groupings. |
|
24 |
#' |
|
25 |
#' These functions have been modified from the `tern` file `utils_default_stats_formats_labels.R`. |
|
26 |
#' This file contains `junco` specific wrappers of functions called within the `afun` functions, |
|
27 |
#' in order to point to `junco` specific default statistics, formats and labels. |
|
28 |
#' |
|
29 |
#' @name default_stats_formats_labels |
|
30 |
NULL |
|
31 | ||
32 |
#' @describeIn default_stats_formats_labels Get statistics available for a given method |
|
33 |
#' group (analyze function). To check available defaults see `junco_default_stats` list. |
|
34 |
#' |
|
35 |
#' @param method_groups (`character`)\cr indicates the statistical method group (`junco` analyze function) |
|
36 |
#' to retrieve default statistics for. A character vector can be used to specify more than one statistical |
|
37 |
#' method group. |
|
38 |
#' @param stats_in (`character`)\cr statistics to retrieve for the selected method group. If custom statistical |
|
39 |
#' functions are used, `stats_in` needs to have them in too. |
|
40 |
#' @param custom_stats_in (`character`)\cr custom statistics to add to the default statistics. |
|
41 |
#' @param add_pval (`flag`)\cr should `'pval'` (or `'pval_counts'` if `method_groups` contains |
|
42 |
#' `'analyze_vars_counts'`) be added to the statistical methods? |
|
43 |
#' |
|
44 |
#' @return |
|
45 |
#' * `junco_get_stats()` returns a `character` vector of statistical methods. |
|
46 |
#' |
|
47 |
#' @export |
|
48 |
junco_get_stats <- function( |
|
49 |
method_groups = "analyze_vars_numeric", |
|
50 |
stats_in = NULL, |
|
51 |
custom_stats_in = NULL, |
|
52 |
add_pval = FALSE) { |
|
53 | 1084x |
tern_get_stats( |
54 | 1084x |
method_groups = method_groups, |
55 | 1084x |
stats_in = stats_in, |
56 | 1084x |
custom_stats_in = custom_stats_in, |
57 | 1084x |
add_pval = add_pval, |
58 | 1084x |
tern_defaults = junco_default_stats |
59 |
) |
|
60 |
} |
|
61 | ||
62 |
#' @describeIn default_stats_formats_labels Get formats corresponding to a list of statistics. |
|
63 |
#' To check available defaults see list `junco_default_formats`. |
|
64 |
#' |
|
65 |
#' @param formats_in (named `vector`)\cr custom formats to use instead of defaults. Can be a character vector with |
|
66 |
#' values from [formatters::list_valid_format_labels()] or custom format functions. Defaults to `NULL` for any rows |
|
67 |
#' with no value is provided. |
|
68 |
#' |
|
69 |
#' @return |
|
70 |
#' * `junco_get_formats_from_stats()` returns a named list of formats as strings or functions. |
|
71 |
#' |
|
72 |
#' @note Formats in `tern` or `junco` and `rtables` can be functions that take in the table cell value and |
|
73 |
#' return a string. This is well documented in `vignette('custom_appearance', package = 'rtables')`. |
|
74 |
#' |
|
75 |
#' @export |
|
76 |
junco_get_formats_from_stats <- function(stats, formats_in = NULL, levels_per_stats = NULL) { |
|
77 | 631x |
tern_get_formats_from_stats( |
78 | 631x |
stats = stats, |
79 | 631x |
formats_in = formats_in, |
80 | 631x |
levels_per_stats = levels_per_stats, |
81 | 631x |
tern_defaults = junco_default_formats |
82 |
) |
|
83 |
} |
|
84 | ||
85 |
#' @describeIn default_stats_formats_labels Get labels corresponding to a list of statistics. |
|
86 |
#' To check for available defaults see list `junco_default_labels`. |
|
87 |
#' |
|
88 |
#' @param labels_in (named `character`)\cr custom labels to use instead of defaults. If no value is provided, the |
|
89 |
#' variable level (if rows correspond to levels of a variable) or statistic name will be used as label. |
|
90 |
#' @param label_attr_from_stats (named `list`)\cr if `labels_in = NULL`, then this will be used instead. It is a list |
|
91 |
#' of values defined in statistical functions as default labels. Values are ignored if `labels_in` is provided or `''` |
|
92 |
#' values are provided. |
|
93 |
#' |
|
94 |
#' @return |
|
95 |
#' * `junco_get_labels_from_stats()` returns a named list of labels as strings. |
|
96 |
#' |
|
97 |
#' @export |
|
98 |
junco_get_labels_from_stats <- function( |
|
99 |
stats, |
|
100 |
labels_in = NULL, |
|
101 |
levels_per_stats = NULL, |
|
102 |
label_attr_from_stats = NULL) { |
|
103 | 586x |
tern_get_labels_from_stats( |
104 | 586x |
stats = stats, |
105 | 586x |
labels_in = labels_in, |
106 | 586x |
levels_per_stats = levels_per_stats, |
107 | 586x |
label_attr_from_stats = label_attr_from_stats, |
108 | 586x |
tern_defaults = junco_default_labels |
109 |
) |
|
110 |
} |
|
111 | ||
112 |
#' @describeIn default_stats_formats_labels Get label attributes from statistics list. |
|
113 |
#' @param x_stats (`list`)\cr with the statistics results. |
|
114 |
#' @keywords internal |
|
115 |
get_label_attr_from_stats <- function(x_stats) { |
|
116 | 148x |
sapply(x_stats, obj_label) |
117 |
} |
|
118 | ||
119 |
#' @describeIn default_stats_formats_labels Get row indent modifiers corresponding to a list of statistics/rows. |
|
120 |
#' |
|
121 |
#' @param indents_in (named `integer`)\cr custom row indent modifiers to use instead of defaults. Defaults to `0L` for |
|
122 |
#' all values. |
|
123 |
#' |
|
124 |
#' @return |
|
125 |
#' * `junco_get_indents_from_stats()` returns a named list of indentation modifiers as integers. By default all of the |
|
126 |
#' indentations will be zero. |
|
127 |
#' |
|
128 |
#' @export |
|
129 |
junco_get_indents_from_stats <- function(stats, indents_in = NULL, levels_per_stats = NULL) { |
|
130 |
# For statistics still remaining without default indentation after looking in junco_default_indents, use |
|
131 |
# indentation 0 as default. |
|
132 | 626x |
remaining_stats <- setdiff(stats, names(junco_default_indents)) |
133 | 626x |
default_indents <- c( |
134 | 626x |
junco_default_indents, |
135 | 626x |
as.list(rep(0L, length(remaining_stats))) |> |
136 | 626x |
stats::setNames(remaining_stats) |
137 |
) |
|
138 | ||
139 | 626x |
tern_get_indents_from_stats( |
140 | 626x |
stats = stats, |
141 | 626x |
indents_in = indents_in, |
142 | 626x |
levels_per_stats = levels_per_stats, |
143 | 626x |
tern_defaults = default_indents |
144 |
) |
|
145 |
} |
|
146 | ||
147 |
#' @describeIn default_stats_formats_labels Format statistics results according to format specifications. |
|
148 |
#' |
|
149 |
#' @return |
|
150 |
#' * `format_stats()` returns the correspondingly formatted [rtables::in_rows()] result. |
|
151 |
#' |
|
152 |
#' @export |
|
153 |
format_stats <- function(x_stats, method_groups, stats_in, formats_in, labels_in, indents_in) { |
|
154 | 142x |
.stats <- junco_get_stats(method_groups, stats_in = stats_in) |
155 | ||
156 | 142x |
.formats <- junco_get_formats_from_stats(stats = .stats, formats_in = formats_in) |
157 | ||
158 | 142x |
label_attr <- get_label_attr_from_stats(x_stats) |
159 | 142x |
.labels <- junco_get_labels_from_stats(stats = .stats, labels_in = labels_in, label_attr_from_stats = label_attr) |
160 | 142x |
.labels <- .unlist_keep_nulls(.labels) |
161 | ||
162 | 142x |
.indent_mods <- junco_get_indents_from_stats(stats = .stats, indents_in = indents_in) |
163 | 142x |
.indent_mods <- .unlist_keep_nulls(.indent_mods) |
164 | ||
165 | 142x |
x_stats <- x_stats[.stats] |
166 | ||
167 | 142x |
in_rows( |
168 | 142x |
.list = x_stats, |
169 | 142x |
.formats = .formats, |
170 | 142x |
.names = names(.labels), |
171 | 142x |
.labels = .labels, |
172 | 142x |
.indent_mods = .indent_mods |
173 |
) |
|
174 |
} |
|
175 | ||
176 | ||
177 |
# junco_default_stats ----------------------------------------------------------- |
|
178 | ||
179 |
#' @describeIn default_stats_formats_labels Named list of available statistics by method group for `junco`. |
|
180 |
#' |
|
181 |
#' @format |
|
182 |
#' * `junco_default_stats` is a named list of available statistics, with each element |
|
183 |
#' named for their corresponding statistical method group. |
|
184 |
#' |
|
185 |
#' @export |
|
186 |
junco_default_stats <- list( |
|
187 |
coxph_hr = c("n_tot", "n_tot_events", "hr", "hr_ci", "hr_ci_3d", "pvalue", "lr_stat_df"), |
|
188 |
event_free = c("pt_at_risk", "event_free_rate", "rate_se", "rate_ci", "event_free_ci"), |
|
189 |
kaplan_meier = c("quantiles_lower", "median_ci_3d", "quantiles_upper", "range_with_cens_info"), |
|
190 |
odds_ratio = c("n_tot", "or_ci", "pval"), |
|
191 |
proportion_diff = c("diff", "diff_ci", "diff_est_ci"), |
|
192 |
relative_risk = c("rel_risk_ci", "pval"), |
|
193 |
summarize_ancova_j = c( |
|
194 |
"n", |
|
195 |
"mean_sd", |
|
196 |
"median", |
|
197 |
"range", |
|
198 |
"quantiles", |
|
199 |
"lsmean_se", |
|
200 |
"lsmean_ci", |
|
201 |
"lsmean_diffci", |
|
202 |
"pval" |
|
203 |
), |
|
204 |
summarize_mmrm = c( |
|
205 |
"n", |
|
206 |
"adj_mean_se", |
|
207 |
"adj_mean_ci", |
|
208 |
"adj_mean_est_ci", |
|
209 |
"diff_mean_se", |
|
210 |
"diff_mean_ci", |
|
211 |
"diff_mean_est_ci", |
|
212 |
"change", |
|
213 |
"p_value" |
|
214 |
), |
|
215 |
tabulate_lsmeans = c( |
|
216 |
"n", |
|
217 |
"adj_mean_se", |
|
218 |
"adj_mean_ci", |
|
219 |
"adj_mean_est_ci", |
|
220 |
"diff_mean_se", |
|
221 |
"diff_mean_ci", |
|
222 |
"diff_mean_est_ci", |
|
223 |
"change", |
|
224 |
"p_value" |
|
225 |
), |
|
226 |
tabulate_rbmi = c( |
|
227 |
"adj_mean_se", |
|
228 |
"adj_mean_ci", |
|
229 |
"diff_mean_se", |
|
230 |
"diff_mean_ci", |
|
231 |
"change", |
|
232 |
"p_value", |
|
233 |
"additional_title_row" |
|
234 |
), |
|
235 |
test_proportion_diff = c("pval"), |
|
236 |
a_freq_j = c( |
|
237 |
"n_altdf", |
|
238 |
"n_df", |
|
239 |
"n_rowdf", |
|
240 |
"n_parentdf", |
|
241 |
"denom", |
|
242 |
"count", |
|
243 |
"count_unique", |
|
244 |
"count_unique_fraction", |
|
245 |
"count_unique_denom_fraction" |
|
246 |
), |
|
247 |
a_patyrs_j = c("patyrs"), |
|
248 |
a_eair100_j = c("eair", "n_event", "person_years") |
|
249 |
) |
|
250 | ||
251 |
# junco_default_formats --------------------------------------------------------- |
|
252 |
junco_default_formats_start <- c( |
|
253 |
adj_mean_se = jjcsformat_xx("xx.xxx (xx.xxx)"), |
|
254 |
adj_mean_ci = jjcsformat_xx("(xx.xxx, xx.xxx)"), |
|
255 |
adj_mean_est_ci = jjcsformat_xx("xx.xxx (xx.xxx, xx.xxx)"), |
|
256 |
change = "xx.x%", |
|
257 |
diff = jjcsformat_xx("xx.x"), |
|
258 |
diff_ci = jjcsformat_xx("(xx.x, xx.x)"), |
|
259 |
diff_est_ci = jjcsformat_xx("xx.x (xx.x, xx.x)"), |
|
260 |
diff_mean_se = jjcsformat_xx("xx.xxx (xx.xxx)"), |
|
261 |
diff_mean_ci = jjcsformat_xx("(xx.xxx, xx.xxx)"), |
|
262 |
diff_mean_est_ci = jjcsformat_xx("xx.xxx (xx.xxx, xx.xxx)"), |
|
263 |
event_free_ci = jjcsformat_xx("xx.xx (xx.xx, xx.xx)"), |
|
264 |
event_free_rate = jjcsformat_xx("xx.xx"), |
|
265 |
hr = jjcsformat_xx("xx.xx"), |
|
266 |
hr_ci = jjcsformat_xx("(xx.xx, xx.xx)"), |
|
267 |
hr_ci_3d = jjcsformat_xx("xx.xx (xx.xx, xx.xx)"), |
|
268 |
quantiles_upper = jjcsformat_xx("xx.xx (xx.xx, xx.xx)"), |
|
269 |
lsmean = jjcsformat_xx("xx.xx"), |
|
270 |
lsmean_ci = jjcsformat_xx("xx.xx (xx.xx, xx.xx)"), |
|
271 |
lsmean_diff = jjcsformat_xx("xx.xx"), |
|
272 |
lsmean_diffci = jjcsformat_xx("xx.xx (xx.xx, xx.xx)"), |
|
273 |
lsmean_diff_ci = jjcsformat_xx("(xx.xx, xx.xx)"), |
|
274 |
lsmean_se = jjcsformat_xx("xx.xx (xx.xx)"), |
|
275 |
mean_sd = jjcsformat_xx("xx.xx (xx.xxx)"), |
|
276 |
median = jjcsformat_xx("xx.xx"), |
|
277 |
median_ci = jjcsformat_xx("(xx.xx, xx.xx)"), |
|
278 |
median_ci_3d = jjcsformat_xx("xx.xx (xx.xx, xx.xx)"), |
|
279 |
n = jjcsformat_xx("xx."), |
|
280 |
n_tot = jjcsformat_xx("xx."), |
|
281 |
n_tot_events = jjcsformat_xx("xx."), |
|
282 |
or_ci = jjcsformat_xx("xx.xx (xx.xx - xx.xx)"), |
|
283 |
pt_at_risk = "xx", |
|
284 |
pval = jjcsformat_pval_fct(0), |
|
285 |
pvalue = jjcsformat_pval_fct(0), |
|
286 |
p_value = jjcsformat_pval_fct(0), |
|
287 |
quantiles = jjcsformat_xx("xx.xx, xx.xx"), |
|
288 |
range = jjcsformat_xx("xx.xx, xx.xx"), |
|
289 |
range_with_cens_info = jjcsformat_range_fct("xx.xx"), |
|
290 |
rate_ci = jjcsformat_xx("(xx.xx, xx.xx)"), |
|
291 |
rate_se = jjcsformat_xx("xx.xx"), |
|
292 |
rel_risk_ci = jjcsformat_xx("xx.xx (xx.xx - xx.xx)"), |
|
293 |
quantiles_upper = jjcsformat_xx("xx.xx (xx.xx, xx.xx)"), |
|
294 |
n_altdf = "xx", |
|
295 |
n_df = "xx", |
|
296 |
n_rowdf = "xx", |
|
297 |
n_parentdf = "xx", |
|
298 |
denom = "xx", |
|
299 |
count = "xx", |
|
300 |
count_unique = "xx", |
|
301 |
count_unique_fraction = jjcsformat_count_fraction, |
|
302 |
count_unique_denom_fraction = jjcsformat_count_denom_fraction, |
|
303 |
rr_ci_3d = jjcsformat_xx("xx.x (xx.x, xx.x)"), |
|
304 |
patyrs = jjcsformat_xx("xx.x"), |
|
305 |
eair = jjcsformat_xx("xx.x"), |
|
306 |
eair_diff = jjcsformat_xx("xx.xx (xx.xx, xx.xx)"), |
|
307 |
n_event = "xx", |
|
308 |
person_years = jjcsformat_xx("xx.xx"), |
|
309 |
total_subject_years = jjcsformat_xx("xx.x (xx.x)") |
|
310 |
) |
|
311 | ||
312 |
tern_formats_only <- setdiff(names(tern_default_formats), names(junco_default_formats_start)) |
|
313 |
#' @describeIn default_stats_formats_labels Named vector of default formats for `junco`. |
|
314 |
#' |
|
315 |
#' @format |
|
316 |
#' * `junco_default_formats` is a named vector of available default formats, with each element |
|
317 |
#' named for their corresponding statistic. |
|
318 |
#' |
|
319 |
#' @export |
|
320 |
junco_default_formats <- c(junco_default_formats_start, tern_default_formats[tern_formats_only]) |
|
321 | ||
322 |
# junco_default_labels ---------------------------------------------------------- |
|
323 |
junco_default_labels_start <- c( |
|
324 |
additional_title_row = "Additional Title", |
|
325 |
adj_mean_se = "Adjusted Mean (SE)", |
|
326 |
adj_mean_est_ci = "Adjusted Mean (CI)", |
|
327 |
diff = "Difference in Response rate (%)", |
|
328 |
diff_mean_se = "Difference in Adjusted Means (SE)", |
|
329 |
diff_mean_est_ci = "Difference in Adjusted Means (CI)", |
|
330 |
hr = "Hazard Ratio", |
|
331 |
lr_stat_df = "Log-Rank Chi-Squared", |
|
332 |
mean_sd = "Mean (SD)", |
|
333 |
median = "Median", |
|
334 |
n_tot = "Total n", |
|
335 |
n_tot_events = "Total events", |
|
336 |
pval = "p-value", |
|
337 |
pvalue = "p-value", |
|
338 |
p_value = "p-value", |
|
339 |
range = "Min, max", |
|
340 |
range_with_cens_info = "Min, max", |
|
341 |
n_altdf = "N", |
|
342 |
n_df = "N", |
|
343 |
n_rowdf = "N", |
|
344 |
n_parentdf = "N", |
|
345 |
denom = "N", |
|
346 |
patyrs = "Patient years", |
|
347 |
n_event = "Number of events", |
|
348 |
person_years = "Person years", |
|
349 |
total_subject_years = "Total treatment (subject years)" |
|
350 |
) |
|
351 |
tern_labels_only <- setdiff(names(tern_default_labels), names(junco_default_labels_start)) |
|
352 | ||
353 |
#' @describeIn default_stats_formats_labels Named `character` vector of default labels for `junco`. |
|
354 |
#' |
|
355 |
#' @format |
|
356 |
#' * `junco_default_labels` is a named `character` vector of available default labels, with each element |
|
357 |
#' named for their corresponding statistic. |
|
358 |
#' |
|
359 |
#' @export |
|
360 |
junco_default_labels <- c(junco_default_labels_start, tern_default_labels[tern_labels_only]) |
|
361 | ||
362 |
#' @describeIn default_stats_formats_labels Named `integer` vector of default indents for `junco`. |
|
363 |
#' |
|
364 |
#' @format |
|
365 |
#' * `junco_default_indents` is a named `integer` vector of available default indents, with each element |
|
366 |
#' named for their corresponding statistic. Only indentations different from zero need to be |
|
367 |
#' recorded here. |
|
368 |
#' |
|
369 |
#' @export |
|
370 |
junco_default_indents <- c( |
|
371 |
additional_title_row = 1L, |
|
372 |
adj_mean_ci = 1L, |
|
373 |
adj_mean_est_ci = 1L, |
|
374 |
change = 1L, |
|
375 |
diff_ci = 1L, |
|
376 |
diff_mean_ci = 1L, |
|
377 |
diff_mean_est_ci = 1L, |
|
378 |
hr_ci = 1L, |
|
379 |
or_ci = 1L, |
|
380 |
pval = 1L, |
|
381 |
p_value = 1L, |
|
382 |
rate_se = 1L, |
|
383 |
rate_ci = 1L, |
|
384 |
rel_risk_ci = 1L |
|
385 |
) |
1 |
#' Odds ratio estimation |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge('stable')` |
|
4 |
#' |
|
5 |
#' @param method (`string`)\cr whether to use the correct (`'exact'`) calculation in the conditional likelihood or one |
|
6 |
#' of the approximations, or the CMH method. See [survival::clogit()] for details. |
|
7 |
#' |
|
8 |
#' @param df (`data.frame`)\cr input data frame. |
|
9 |
#' @param .var (`string`)\cr name of the response variable. |
|
10 |
#' @param .df_row (`data.frame`)\cr data frame containing all rows. |
|
11 |
#' @param ref_path (`character`)\cr path to the reference group. |
|
12 |
#' @param .spl_context (`environment`)\cr split context environment. |
|
13 |
#' @param ... Additional arguments passed to the statistics function. |
|
14 |
#' @param .stats (`character`)\cr statistics to calculate. |
|
15 |
#' @param .formats (`list`)\cr formats for the statistics. |
|
16 |
#' @param .labels (`list`)\cr labels for the statistics. |
|
17 |
#' @param .indent_mods (`list`)\cr indentation modifications for the statistics. |
|
18 |
#' @param .ref_group (`data.frame`)\cr reference group data frame. |
|
19 |
#' @param .in_ref_col (`logical`)\cr whether the current column is the reference column. |
|
20 |
#' @param variables (`list`)\cr list with arm and strata variable names. |
|
21 |
#' @param conf_level (`numeric`)\cr confidence level for the confidence interval. |
|
22 |
#' @param groups_list (`list`)\cr list of groups for combination. |
|
23 |
#' |
|
24 |
#' @note |
|
25 |
#' The `a_odds_ratio_j()` and `s_odds_ratio_j()` functions have the `_j` suffix to distinguish them |
|
26 |
#' from [tern::a_odds_ratio()] and [tern::s_odds_ratio()], respectively. |
|
27 |
#' These functions differ as follows: |
|
28 |
#' |
|
29 |
#' * Additional `method = 'cmh'` option is provided to calculate the Cochran-Mantel-Haenszel estimate. |
|
30 |
#' * The p-value is returned as an additional statistic. |
|
31 |
#' |
|
32 |
#' Once these updates are contributed back to `tern`, they can later be replaced by the `tern` versions. |
|
33 |
#' |
|
34 |
#' @name odds_ratio |
|
35 |
#' @order 1 |
|
36 |
NULL |
|
37 | ||
38 |
#' @describeIn odds_ratio Statistics function which estimates the odds ratio |
|
39 |
#' between a treatment and a control. A `variables` list with `arm` and `strata` |
|
40 |
#' variable names must be passed if a stratified analysis is required. |
|
41 |
#' |
|
42 |
#' @param na_if_no_events (`flag`)\cr whether the point estimate should be `NA` if there |
|
43 |
#' are no events in one arm. The p-value and confidence interval will still be computed. |
|
44 |
#' |
|
45 |
#' @return |
|
46 |
#' * `s_odds_ratio_j()` returns a named list with the statistics `or_ci` |
|
47 |
#' (containing `est`, `lcl`, and `ucl`), `pval` and `n_tot`. |
|
48 |
#' |
|
49 |
#' @examples |
|
50 |
#' s_odds_ratio_j( |
|
51 |
#' df = subset(dta, grp == "A"), |
|
52 |
#' .var = "rsp", |
|
53 |
#' .ref_group = subset(dta, grp == "B"), |
|
54 |
#' .in_ref_col = FALSE, |
|
55 |
#' .df_row = dta |
|
56 |
#' ) |
|
57 |
#' |
|
58 |
#' s_odds_ratio_j( |
|
59 |
#' df = subset(dta, grp == "A"), |
|
60 |
#' .var = "rsp", |
|
61 |
#' .ref_group = subset(dta, grp == "B"), |
|
62 |
#' .in_ref_col = FALSE, |
|
63 |
#' .df_row = dta, |
|
64 |
#' variables = list(arm = "grp", strata = "strata") |
|
65 |
#' ) |
|
66 |
#' |
|
67 |
#' s_odds_ratio_j( |
|
68 |
#' df = subset(dta, grp == "A"), |
|
69 |
#' method = "cmh", |
|
70 |
#' .var = "rsp", |
|
71 |
#' .ref_group = subset(dta, grp == "B"), |
|
72 |
#' .in_ref_col = FALSE, |
|
73 |
#' .df_row = dta, |
|
74 |
#' variables = list(arm = "grp", strata = c("strata")) |
|
75 |
#' ) |
|
76 |
#' @export |
|
77 |
s_odds_ratio_j <- function( |
|
78 |
df, |
|
79 |
.var, |
|
80 |
.ref_group, |
|
81 |
.in_ref_col, |
|
82 |
.df_row, |
|
83 |
variables = list(arm = NULL, strata = NULL), |
|
84 |
conf_level = 0.95, |
|
85 |
groups_list = NULL, |
|
86 |
na_if_no_events = TRUE, |
|
87 |
method = c("exact", "approximate", "efron", "breslow", "cmh")) { |
|
88 | 17x |
checkmate::assert_flag(na_if_no_events) |
89 |
# New: pval here |
|
90 | 17x |
y <- list(or_ci = list(), n_tot = list(), pval = list()) |
91 | 17x |
method <- match.arg(method) |
92 | 17x |
one_group_empty <- nrow(df) == 0 || nrow(.ref_group) == 0 |
93 | ||
94 | 17x |
if (!.in_ref_col) { |
95 | 15x |
(assert_proportion_value)(conf_level) |
96 | 15x |
assert_df_with_variables(df, list(rsp = .var)) |
97 | 15x |
assert_df_with_variables(.ref_group, list(rsp = .var)) |
98 | ||
99 | 15x |
if (one_group_empty) { |
100 | 1x |
y <- list(or_ci = c(est = NA_real_, lcl = 0, ucl = Inf), n_tot = c(n_tot = nrow(df) + nrow(.ref_group)), pval = 1) |
101 | 14x |
} else if (is.null(variables$strata)) { |
102 | 6x |
data <- data.frame( |
103 | 6x |
rsp = c(.ref_group[[.var]], df[[.var]]), |
104 | 6x |
grp = factor(rep(c("ref", "Not-ref"), c(nrow(.ref_group), nrow(df))), levels = c("ref", "Not-ref")) |
105 |
) |
|
106 | 6x |
y <- or_glm_j(data, conf_level = conf_level) |
107 |
} else { |
|
108 | 8x |
assert_df_with_variables(.df_row, c(list(rsp = .var), variables)) |
109 | 8x |
checkmate::assert_subset(method, c("exact", "approximate", "efron", "breslow", "cmh"), empty.ok = FALSE) |
110 |
# The group variable prepared for stratified analysis must be synchronized with the combination groups |
|
111 |
# definition. |
|
112 | 8x |
if (is.null(groups_list)) { |
113 | 8x |
ref_grp <- as.character(unique(.ref_group[[variables$arm]])) |
114 | 8x |
trt_grp <- as.character(unique(df[[variables$arm]])) |
115 | 8x |
grp <- stats::relevel(factor(.df_row[[variables$arm]]), ref = ref_grp) |
116 |
} else { |
|
117 |
# If more than one level in reference col. |
|
118 | ! |
reference <- as.character(unique(.ref_group[[variables$arm]])) |
119 | ! |
grp_ref_flag <- vapply(X = groups_list, FUN.VALUE = TRUE, FUN = function(x) all(reference %in% x)) |
120 | ! |
ref_grp <- names(groups_list)[grp_ref_flag] |
121 | ||
122 |
# If more than one level in treatment col. |
|
123 | ! |
treatment <- as.character(unique(df[[variables$arm]])) |
124 | ! |
grp_trt_flag <- vapply(X = groups_list, FUN.VALUE = TRUE, FUN = function(x) all(treatment %in% x)) |
125 | ! |
trt_grp <- names(groups_list)[grp_trt_flag] |
126 | ||
127 | ! |
grp <- combine_levels(.df_row[[variables$arm]], levels = reference, new_level = ref_grp) |
128 | ! |
grp <- combine_levels(grp, levels = treatment, new_level = trt_grp) |
129 |
} |
|
130 | ||
131 |
# New: CMH method |
|
132 | 8x |
if (method == "cmh") { |
133 | 7x |
data <- data.frame( |
134 | 7x |
rsp = c(.ref_group[[.var]], df[[.var]]), |
135 | 7x |
grp = factor(rep(c("ref", "Not-ref"), c(nrow(.ref_group), nrow(df))), levels = c("ref", "Not-ref")), |
136 | 7x |
strata = interaction(rbind(.ref_group[variables$strata], df[variables$strata])) |
137 |
) |
|
138 | ||
139 | 7x |
y <- or_cmh(data, conf_level = conf_level) |
140 |
} else { |
|
141 |
# The reference level in `grp` must be the same as in the `rtables` column split. |
|
142 | 1x |
data <- data.frame(rsp = .df_row[[.var]], grp = grp, strata = interaction(.df_row[variables$strata])) |
143 | ||
144 | 1x |
y_all <- or_clogit_j(data, conf_level = conf_level, method = method) |
145 | 1x |
checkmate::assert_string(trt_grp) |
146 |
# New: pval here |
|
147 | 1x |
checkmate::assert_subset(trt_grp, names(y_all$or_ci_pvals)) |
148 | 1x |
y_or_ci_pval <- y_all$or_ci_pvals[[trt_grp]] |
149 | 1x |
y$or_ci <- y_or_ci_pval[c("est", "lcl", "ucl")] |
150 | 1x |
y$n_tot <- y_all$n_tot |
151 | 1x |
y$pval <- y_or_ci_pval["pval"] |
152 |
} |
|
153 |
} |
|
154 | ||
155 | 15x |
one_group_no_events <- (sum(.ref_group[[.var]]) == 0) || (sum(df[[.var]]) == 0) |
156 | 15x |
if (na_if_no_events && one_group_no_events) { |
157 | 4x |
y$or_ci[["est"]] <- NA_real_ |
158 |
} |
|
159 | ||
160 | 15x |
na_because_sparse <- one_group_empty || (na_if_no_events && one_group_no_events) |
161 | 15x |
if ("est" %in% names(y$or_ci) && is.na(y$or_ci[["est"]]) && !na_because_sparse && method != "approximate") { |
162 | ! |
warning( |
163 | ! |
"Unable to compute the odds ratio estimate. Please try re-running the function with ", |
164 | ! |
"parameter `method` set to \"approximate\"." |
165 |
) |
|
166 |
} |
|
167 |
} |
|
168 | ||
169 | 17x |
y$or_ci <- with_label(x = y$or_ci, label = paste0("Odds Ratio (", 100 * conf_level, "% CI)")) |
170 | ||
171 |
# New: pval here |
|
172 | 17x |
y$pval <- unname(y$pval) |
173 | ||
174 | 17x |
y |
175 |
} |
|
176 | ||
177 |
#' @describeIn odds_ratio Formatted analysis function which is used as `afun`. Note that the |
|
178 |
#' junco specific `ref_path` and `.spl_context` arguments are used for reference column information. |
|
179 |
#' |
|
180 |
#' @return |
|
181 |
#' * `a_odds_ratio_j()` returns the corresponding list with formatted [rtables::CellValue()]. |
|
182 |
#' |
|
183 |
#' @examples |
|
184 |
#' set.seed(12) |
|
185 |
#' dta <- data.frame( |
|
186 |
#' rsp = sample(c(TRUE, FALSE), 100, TRUE), |
|
187 |
#' grp = factor(rep(c("A", "B"), each = 50), levels = c("A", "B")), |
|
188 |
#' strata = factor(sample(c("C", "D"), 100, TRUE)) |
|
189 |
#' ) |
|
190 |
#' |
|
191 |
#' a_odds_ratio_j( |
|
192 |
#' df = subset(dta, grp == "A"), |
|
193 |
#' .var = "rsp", |
|
194 |
#' ref_path = c("grp", "B"), |
|
195 |
#' .spl_context = data.frame( |
|
196 |
#' cur_col_split = I(list("grp")), |
|
197 |
#' cur_col_split_val = I(list(c(grp = "A"))), |
|
198 |
#' full_parent_df = I(list(dta)) |
|
199 |
#' ), |
|
200 |
#' .df_row = dta |
|
201 |
#' ) |
|
202 |
#' |
|
203 |
#' |
|
204 |
#' l <- basic_table() |> |
|
205 |
#' split_cols_by(var = "grp") |> |
|
206 |
#' analyze( |
|
207 |
#' "rsp", |
|
208 |
#' afun = a_odds_ratio_j, |
|
209 |
#' show_labels = "hidden", |
|
210 |
#' extra_args = list( |
|
211 |
#' ref_path = c("grp", "B"), |
|
212 |
#' .stats = c("or_ci", "pval") |
|
213 |
#' ) |
|
214 |
#' ) |
|
215 |
#' |
|
216 |
#' build_table(l, df = dta) |
|
217 |
#' |
|
218 |
#' l2 <- basic_table() |> |
|
219 |
#' split_cols_by(var = "grp") |> |
|
220 |
#' analyze( |
|
221 |
#' "rsp", |
|
222 |
#' afun = a_odds_ratio_j, |
|
223 |
#' show_labels = "hidden", |
|
224 |
#' extra_args = list( |
|
225 |
#' variables = list(arm = "grp", strata = "strata"), |
|
226 |
#' method = "cmh", |
|
227 |
#' ref_path = c("grp", "A"), |
|
228 |
#' .stats = c("or_ci", "pval") |
|
229 |
#' ) |
|
230 |
#' ) |
|
231 |
#' |
|
232 |
#' build_table(l2, df = dta) |
|
233 |
#' @export |
|
234 |
#' @order 2 |
|
235 |
a_odds_ratio_j <- function( |
|
236 |
df, |
|
237 |
.var, |
|
238 |
.df_row, |
|
239 |
ref_path, |
|
240 |
.spl_context, |
|
241 |
..., |
|
242 |
.stats = NULL, |
|
243 |
.formats = NULL, |
|
244 |
.labels = NULL, |
|
245 |
.indent_mods = NULL) { |
|
246 |
# Check for additional parameters to the statistics function |
|
247 | 4x |
dots_extra_args <- list(...) |
248 | ||
249 |
# Only support default stats, not custom stats |
|
250 | 4x |
.stats <- .split_std_from_custom_stats(.stats)$default_stats |
251 | ||
252 |
# Obtain reference column information |
|
253 | 4x |
ref <- get_ref_info(ref_path, .spl_context) |
254 | ||
255 |
# Apply statistics function |
|
256 | 4x |
x_stats <- .apply_stat_functions( |
257 | 4x |
default_stat_fnc = s_odds_ratio_j, |
258 | 4x |
custom_stat_fnc_list = NULL, |
259 | 4x |
args_list = c( |
260 | 4x |
df = list(df), |
261 | 4x |
.var = .var, |
262 | 4x |
.df_row = list(.df_row), |
263 | 4x |
.ref_group = list(ref$ref_group), |
264 | 4x |
.in_ref_col = ref$in_ref_col, |
265 | 4x |
dots_extra_args |
266 |
) |
|
267 |
) |
|
268 | ||
269 |
# Format according to specifications |
|
270 | 4x |
format_stats( |
271 | 4x |
x_stats, |
272 | 4x |
method_groups = "odds_ratio", |
273 | 4x |
stats_in = .stats, |
274 | 4x |
formats_in = .formats, |
275 | 4x |
labels_in = .labels, |
276 | 4x |
indents_in = .indent_mods |
277 |
) |
|
278 |
} |
|
279 | ||
280 |
#' Helper functions for odds ratio estimation |
|
281 |
#' |
|
282 |
#' @description `r lifecycle::badge('stable')` |
|
283 |
#' |
|
284 |
#' Functions to calculate odds ratios in [s_odds_ratio_j()]. |
|
285 |
#' |
|
286 |
#' @inheritParams odds_ratio |
|
287 |
#' @inheritParams proposal_argument_convention |
|
288 |
#' @param data (`data.frame`)\cr data frame containing at least the variables `rsp` and `grp`, and optionally |
|
289 |
#' `strata` for [or_clogit_j()]. |
|
290 |
#' @return A named `list` of elements `or_ci`, `n_tot` and `pval`. |
|
291 |
#' |
|
292 |
#' @seealso [odds_ratio] |
|
293 |
#' |
|
294 |
#' @name h_odds_ratio |
|
295 |
NULL |
|
296 | ||
297 |
#' @describeIn h_odds_ratio Estimates the odds ratio based on [stats::glm()]. Note that there must be |
|
298 |
#' exactly 2 groups in `data` as specified by the `grp` variable. |
|
299 |
#' |
|
300 |
#' @examples |
|
301 |
#' data <- data.frame( |
|
302 |
#' rsp = as.logical(c(1, 1, 0, 1, 0, 0, 1, 1)), |
|
303 |
#' grp = letters[c(1, 1, 1, 2, 2, 2, 1, 2)], |
|
304 |
#' strata = letters[c(1, 2, 1, 2, 2, 2, 1, 2)], |
|
305 |
#' stringsAsFactors = TRUE |
|
306 |
#' ) |
|
307 |
#' |
|
308 |
#' or_glm_j(data, conf_level = 0.95) |
|
309 |
#' |
|
310 |
#' @export |
|
311 |
or_glm_j <- function(data, conf_level) { |
|
312 | 7x |
checkmate::assert_logical(data$rsp) |
313 | 7x |
(assert_proportion_value)(conf_level) |
314 | 7x |
assert_df_with_variables(data, list(rsp = "rsp", grp = "grp")) |
315 | 7x |
checkmate::assert_multi_class(data$grp, classes = c("factor", "character")) |
316 | ||
317 | 7x |
data$grp <- as_factor_keep_attributes(data$grp) |
318 | 7x |
assert_df_with_factors(data, list(val = "grp"), min.levels = 2, max.levels = 2) |
319 | 7x |
formula <- stats::as.formula("rsp ~ grp") |
320 | 7x |
model_fit <- stats::glm(formula = formula, data = data, family = stats::binomial(link = "logit")) |
321 | ||
322 |
# Note that here we need to discard the intercept. |
|
323 | 7x |
or <- exp(stats::coef(model_fit)[-1]) |
324 | 7x |
or_ci <- exp(stats::confint.default(model_fit, level = conf_level)[-1, , drop = FALSE]) |
325 | ||
326 | 7x |
values <- stats::setNames(c(or, or_ci), c("est", "lcl", "ucl")) |
327 | 7x |
n_tot <- stats::setNames(nrow(model_fit$model), "n_tot") |
328 | ||
329 |
# New: pval here |
|
330 | 7x |
pval <- summary(model_fit)$coef[-1, "Pr(>|z|)"] |
331 | ||
332 | 7x |
list(or_ci = values, n_tot = n_tot, pval = pval) |
333 |
} |
|
334 | ||
335 |
#' @describeIn h_odds_ratio Estimates the odds ratio based on [survival::clogit()]. This is done for |
|
336 |
#' the whole data set including all groups, since the results are not the same as when doing |
|
337 |
#' pairwise comparisons between the groups. |
|
338 |
#' |
|
339 |
#' @examples |
|
340 |
#' data <- data.frame( |
|
341 |
#' rsp = as.logical(c(1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0)), |
|
342 |
#' grp = letters[c(1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3)], |
|
343 |
#' strata = LETTERS[c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)], |
|
344 |
#' stringsAsFactors = TRUE |
|
345 |
#' ) |
|
346 |
#' |
|
347 |
#' or_clogit_j(data, conf_level = 0.95) |
|
348 |
#' |
|
349 |
#' @export |
|
350 |
or_clogit_j <- function(data, conf_level, method = "exact") { |
|
351 | 1x |
checkmate::assert_logical(data$rsp) |
352 | 1x |
(assert_proportion_value)(conf_level) |
353 | 1x |
assert_df_with_variables(data, list(rsp = "rsp", grp = "grp", strata = "strata")) |
354 | 1x |
checkmate::assert_multi_class(data$grp, classes = c("factor", "character")) |
355 | 1x |
checkmate::assert_multi_class(data$strata, classes = c("factor", "character")) |
356 | 1x |
checkmate::assert_subset(method, c("exact", "approximate", "efron", "breslow"), empty.ok = FALSE) |
357 | ||
358 | 1x |
data$grp <- as_factor_keep_attributes(data$grp) |
359 | 1x |
data$strata <- as_factor_keep_attributes(data$strata) |
360 | ||
361 |
# Deviation from convention: `survival::strata` must be simply `strata`. |
|
362 | 1x |
strata <- survival::strata |
363 | 1x |
formula <- stats::as.formula("rsp ~ grp + strata(strata)") |
364 | 1x |
model_fit <- clogit_with_tryCatch( |
365 | 1x |
formula = formula, |
366 | 1x |
data = data, |
367 | 1x |
method = method |
368 |
) |
|
369 | ||
370 |
# New: pval here |
|
371 | ||
372 |
# Create a list with one set of OR estimates, CI and p-value per coefficient, i.e. comparison of one group vs. the |
|
373 |
# reference group. |
|
374 | 1x |
coef_est <- stats::coef(model_fit) |
375 | 1x |
ci_est <- stats::confint(model_fit, level = conf_level) |
376 | 1x |
pvals <- summary(model_fit)$coef[, "Pr(>|z|)", drop = FALSE] |
377 | 1x |
or_ci_pvals <- list() |
378 | 1x |
for (coef_name in names(coef_est)) { |
379 | 1x |
grp_name <- gsub("^grp", "", x = coef_name) |
380 | 1x |
or_ci_pvals[[grp_name]] <- stats::setNames( |
381 | 1x |
object = c(exp(c(coef_est[coef_name], ci_est[coef_name, , drop = TRUE])), pvals[coef_name, 1]), |
382 | 1x |
nm = c("est", "lcl", "ucl", "pval") |
383 |
) |
|
384 |
} |
|
385 | 1x |
list(or_ci_pvals = or_ci_pvals, n_tot = c(n_tot = model_fit$n)) |
386 |
} |
|
387 | ||
388 |
#' @describeIn h_odds_ratio Estimates the odds ratio based on CMH. Note that there must be |
|
389 |
#' exactly 2 groups in `data` as specified by the `grp` variable. |
|
390 |
#' |
|
391 |
#' @examples |
|
392 |
#' set.seed(123) |
|
393 |
#' data <- data.frame( |
|
394 |
#' rsp = as.logical(rbinom(n = 40, size = 1, prob = 0.5)), |
|
395 |
#' grp = letters[sample(1:2, size = 40, replace = TRUE)], |
|
396 |
#' strata = LETTERS[sample(1:2, size = 40, replace = TRUE)], |
|
397 |
#' stringsAsFactors = TRUE |
|
398 |
#' ) |
|
399 |
#' |
|
400 |
#' or_cmh(data, conf_level = 0.95) |
|
401 |
#' |
|
402 |
#' @export |
|
403 |
or_cmh <- function(data, conf_level) { |
|
404 | 7x |
checkmate::assert_logical(data$rsp) |
405 | 7x |
assert_proportion_value(conf_level) |
406 | 7x |
assert_df_with_variables(data, list(rsp = "rsp", grp = "grp")) |
407 | 7x |
checkmate::assert_multi_class(data$grp, classes = c("factor", "character")) |
408 | ||
409 | 7x |
data$grp <- as_factor_keep_attributes(data$grp) |
410 | 7x |
assert_df_with_factors(data, list(val = "grp"), min.levels = 2, max.levels = 2) |
411 | ||
412 |
# first dimension: treatment, control - therefore we reverse the levels order 2nd dimension: TRUE, FALSE 3rd |
|
413 |
# dimension: levels of strata rsp as factor rsp to handle edge case of no FALSE (or TRUE) rsp records |
|
414 | 7x |
t_tbl <- table( |
415 | 7x |
factor(data$grp, levels = rev(levels(data$grp))), |
416 | 7x |
factor(data$rsp, levels = c("TRUE", "FALSE")), |
417 | 7x |
data$strata |
418 |
) |
|
419 | ||
420 | 7x |
trt_ind <- 1 |
421 | 7x |
ctrl_ind <- 2 |
422 | ||
423 | 7x |
resp_ind <- 1 |
424 | 7x |
nonresp_ind <- 2 |
425 | ||
426 | 7x |
n1resp <- t_tbl[trt_ind, resp_ind, ] |
427 | 7x |
n2resp <- t_tbl[ctrl_ind, resp_ind, ] |
428 | 7x |
n1non <- t_tbl[trt_ind, nonresp_ind, ] |
429 | 7x |
n2non <- t_tbl[ctrl_ind, nonresp_ind, ] |
430 |
# Example: n2non are the control patient non-responders per stratum. |
|
431 | ||
432 |
# CMH statistic for odds ratio, Treatment over Control |
|
433 | 7x |
use_stratum <- (n1resp + n2resp + n1non + n2non) > 0 |
434 | 7x |
n1resp <- n1resp[use_stratum] |
435 | 7x |
n2resp <- n2resp[use_stratum] |
436 | 7x |
n1non <- n1non[use_stratum] |
437 | 7x |
n2non <- n2non[use_stratum] |
438 | 7x |
n <- n1resp + n2resp + n1non + n2non |
439 | ||
440 | 7x |
or_num <- sum(n1resp * n2non / n) |
441 | 7x |
or_denom <- sum(n1non * n2resp / n) |
442 | 7x |
log_or <- log(or_num) - log(or_denom) |
443 | 7x |
or <- exp(log_or) |
444 | ||
445 | 7x |
term1_num <- sum((n1resp + n2non) * n1resp * n2non / n^2) |
446 | 7x |
term1_denom <- 2 * (sum(n1resp * n2non / n))^2 |
447 | ||
448 | 7x |
term2_num <- sum(((n1resp + n2non) * n1non * n2resp + (n1non + n2resp) * n1resp * n2non) / n^2) |
449 | 7x |
term2_denom <- 2 * (sum(n1resp * n2non / n)) * (sum(n1non * n2resp / n)) |
450 | ||
451 | 7x |
term3_num <- sum((n1non + n2resp) * n1non * n2resp / n^2) |
452 | 7x |
term3_denom <- 2 * (sum(n1non * n2resp / n))^2 |
453 | ||
454 | 7x |
var_log_or <- term1_num / term1_denom + term2_num / term2_denom + term3_num / term3_denom |
455 | 7x |
se_log_or <- sqrt(var_log_or) |
456 | ||
457 | 7x |
z <- stats::qnorm((1 + conf_level) / 2) |
458 | 7x |
log_rel_or <- log_or + c(-1, +1) * z * se_log_or |
459 | 7x |
ci <- exp(log_rel_or) |
460 | 7x |
or_ci <- stats::setNames(c(or, ci), c("est", "lcl", "ucl")) |
461 | ||
462 | 7x |
pval <- stats::mantelhaen.test(t_tbl, correct = FALSE)$p.value |
463 | ||
464 | 7x |
n_tot <- stats::setNames(sum(n), "n_tot") |
465 | ||
466 | 7x |
list(or_ci = or_ci, n_tot = n_tot, pval = pval) |
467 |
} |
1 |
#' Get default statistical methods and their associated formats, labels, and indent modifiers |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge("experimental")` |
|
4 |
#' |
|
5 |
#' @note These functions have been copied from the `tern` package file |
|
6 |
#' `utils_default_stats_formats_labels.R` from GitHub development version 0.9.7.9017. |
|
7 |
#' Slight modifications have been applied to enhance functionality: |
|
8 |
#' |
|
9 |
#' * `tern_get_stats` added the `tern_stats` argument to avoid hardcoding within the function's body. |
|
10 |
#' * `tern_get_labels_from_stats` is more careful when receiving partial `labels_in` |
|
11 |
#' and partial `label_attr_from_stats`. |
|
12 |
#' |
|
13 |
#' Once these features are included in the `tern` package, this file could be removed from |
|
14 |
#' the `junco` package, and the functions could be used from the `tern` namespace directly. |
|
15 |
#' |
|
16 |
#' @name tern_default_stats_formats_labels |
|
17 |
NULL |
|
18 | ||
19 | ||
20 |
# Utility function used to separate custom stats (user-defined functions) from defaults |
|
21 |
.split_std_from_custom_stats <- function(stats_in) { |
|
22 | 144x |
out <- list(default_stats = NULL, custom_stats = NULL, all_stats = NULL) |
23 | 144x |
if (is.list(stats_in)) { |
24 | 1x |
is_custom_fnc <- sapply(stats_in, is.function) |
25 | 1x |
checkmate::assert_list( |
26 | 1x |
stats_in[is_custom_fnc], |
27 | 1x |
types = "function", |
28 | 1x |
names = "named" |
29 |
) |
|
30 | 1x |
out[["custom_stats"]] <- stats_in[is_custom_fnc] |
31 | 1x |
out[["default_stats"]] <- unlist(stats_in[!is_custom_fnc]) |
32 | 1x |
all_stats <- names(stats_in) # to keep the order |
33 | 1x |
all_stats[!is_custom_fnc] <- out[["default_stats"]] |
34 | 1x |
out[["all_stats"]] <- all_stats |
35 |
} else { |
|
36 | 143x |
out[["default_stats"]] <- out[["all_stats"]] <- stats_in |
37 |
} |
|
38 | 144x |
out |
39 |
} |
|
40 | ||
41 |
# Utility function to apply statistical functions |
|
42 |
.apply_stat_functions <- function( |
|
43 |
default_stat_fnc, |
|
44 |
custom_stat_fnc_list, |
|
45 |
args_list) { |
|
46 |
# Default checks |
|
47 | 144x |
checkmate::assert_function(default_stat_fnc) |
48 | 144x |
checkmate::assert_list( |
49 | 144x |
custom_stat_fnc_list, |
50 | 144x |
types = "function", |
51 | 144x |
null.ok = TRUE, |
52 | 144x |
names = "named" |
53 |
) |
|
54 | 144x |
checkmate::assert_list(args_list) |
55 | ||
56 |
# Checking custom stats have same formals |
|
57 | 144x |
if (!is.null(custom_stat_fnc_list)) { |
58 | 1x |
fundamental_call_to_data <- names(formals(default_stat_fnc))[[1]] |
59 | 1x |
for (fnc in custom_stat_fnc_list) { |
60 | 1x |
if (!identical(names(formals(fnc))[[1]], fundamental_call_to_data)) { |
61 | ! |
stop( |
62 | ! |
"The first parameter of a custom statistical function needs to be the same (it can be `df` or `x`) ", |
63 | ! |
"as the default statistical function. In this case your custom function has ", |
64 | ! |
names(formals(fnc))[[1]], |
65 | ! |
" as first parameter, while the default function has ", |
66 | ! |
fundamental_call_to_data, |
67 |
"." |
|
68 |
) |
|
69 |
} |
|
70 | 1x |
if (!any(names(formals(fnc)) == "...")) { |
71 | ! |
stop( |
72 | ! |
"The custom statistical function needs to have `...` as a parameter to accept additional arguments. ", |
73 | ! |
"In this case your custom function does not have `...`." |
74 |
) |
|
75 |
} |
|
76 |
} |
|
77 |
} |
|
78 | ||
79 |
# Applying |
|
80 | 144x |
out_default <- do.call(default_stat_fnc, args = args_list) |
81 | 144x |
out_custom <- lapply( |
82 | 144x |
custom_stat_fnc_list, |
83 | 144x |
function(fnc) do.call(fnc, args = args_list) |
84 |
) |
|
85 | ||
86 |
# Merging |
|
87 | 144x |
c(out_default, out_custom) |
88 |
} |
|
89 | ||
90 |
#' @describeIn tern_default_stats_formats_labels Get statistics available for a given method |
|
91 |
#' group (analyze function). |
|
92 |
#' @keywords internal |
|
93 |
tern_get_stats <- function( |
|
94 |
method_groups = "analyze_vars_numeric", |
|
95 |
stats_in = NULL, |
|
96 |
custom_stats_in = NULL, |
|
97 |
add_pval = FALSE, |
|
98 |
tern_defaults = tern_default_stats) { |
|
99 | 1111x |
checkmate::assert_character(method_groups) |
100 | 1111x |
checkmate::assert_character(stats_in, null.ok = TRUE) |
101 | 1111x |
checkmate::assert_character(custom_stats_in, null.ok = TRUE) |
102 | 1111x |
checkmate::assert_flag(add_pval) |
103 | ||
104 |
# Default is still numeric |
|
105 | 1111x |
if (any(method_groups == "analyze_vars")) { |
106 | 3x |
method_groups[method_groups == "analyze_vars"] <- "analyze_vars_numeric" |
107 |
} |
|
108 | ||
109 | 1111x |
type_tmp <- ifelse(any(grepl("counts$", method_groups)), "counts", "numeric") # for pval checks |
110 | ||
111 |
# Defaults for loop |
|
112 | 1111x |
out <- NULL |
113 | ||
114 |
# Loop for multiple method groups |
|
115 | 1111x |
for (mgi in method_groups) { |
116 | 1112x |
if (mgi %in% names(tern_defaults)) { |
117 | 1111x |
out_tmp <- tern_defaults[[mgi]] |
118 |
} else { |
|
119 | 1x |
stop( |
120 | 1x |
"The selected method group (", |
121 | 1x |
mgi, |
122 | 1x |
") has no default statistical method." |
123 |
) |
|
124 |
} |
|
125 | 1111x |
out <- unique(c(out, out_tmp)) |
126 |
} |
|
127 | ||
128 |
# Add custom stats |
|
129 | 1110x |
out <- c(out, custom_stats_in) |
130 | ||
131 |
# If you added pval to the stats_in you certainly want it |
|
132 | 1110x |
if (!is.null(stats_in) && any(grepl("^pval", stats_in))) { |
133 | 21x |
stats_in_pval_value <- stats_in[grepl("^pval", stats_in)] |
134 | ||
135 |
# Must be only one value between choices |
|
136 | 21x |
checkmate::assert_choice( |
137 | 21x |
stats_in_pval_value, |
138 | 21x |
c("pval", "pval_counts", "pvalue") |
139 |
) |
|
140 | ||
141 |
# Mismatch with counts and numeric |
|
142 |
if ( |
|
143 | 20x |
any(grepl("counts", method_groups)) && |
144 | 20x |
stats_in_pval_value != "pval_counts" || |
145 | 20x |
any(grepl("numeric", method_groups)) && stats_in_pval_value != "pval" |
146 |
) { |
|
147 |
# nolint |
|
148 | 2x |
stop( |
149 | 2x |
"Inserted p-value (", |
150 | 2x |
stats_in_pval_value, |
151 | 2x |
") is not valid for type ", |
152 | 2x |
type_tmp, |
153 | 2x |
". Use ", |
154 | 2x |
paste(ifelse(stats_in_pval_value == "pval", "pval_counts", "pval")), |
155 | 2x |
" instead." |
156 |
) |
|
157 |
} |
|
158 | ||
159 |
# Lets add it even if present (thanks to unique) |
|
160 | 18x |
add_pval <- TRUE |
161 |
} |
|
162 | ||
163 |
# Mainly used in "analyze_vars" but it could be necessary elsewhere |
|
164 | 1107x |
if (isTRUE(add_pval)) { |
165 | 21x |
if (any(grepl("counts", method_groups))) { |
166 | 1x |
out <- unique(c(out, "pval_counts")) |
167 |
} else { |
|
168 | 20x |
out <- unique(c(out, "pval")) |
169 |
} |
|
170 |
} |
|
171 | ||
172 |
# Filtering for stats_in (character vector) |
|
173 | 1107x |
if (!is.null(stats_in)) { |
174 | 591x |
out <- intersect(stats_in, out) # It orders them too |
175 |
} |
|
176 | ||
177 |
# If intersect did not find matches (and no pval?) -> error |
|
178 | 1107x |
if (length(out) == 0) { |
179 | 2x |
stop( |
180 | 2x |
"The selected method group(s) (", |
181 | 2x |
paste0(method_groups, collapse = ", "), |
182 |
")", |
|
183 | 2x |
" do not have the required default statistical methods:\n", |
184 | 2x |
paste0(stats_in, collapse = " ") |
185 |
) |
|
186 |
} |
|
187 | ||
188 | 1105x |
out |
189 |
} |
|
190 | ||
191 |
#' @describeIn tern_default_stats_formats_labels Get formats corresponding to a list of statistics. |
|
192 |
#' @keywords internal |
|
193 |
tern_get_formats_from_stats <- function( |
|
194 |
stats, |
|
195 |
formats_in = NULL, |
|
196 |
levels_per_stats = NULL, |
|
197 |
tern_defaults = tern_default_formats) { |
|
198 | 636x |
checkmate::assert_character(stats, min.len = 1) |
199 |
# It may be a list if there is a function in the formats |
|
200 | 636x |
if (checkmate::test_list(formats_in, null.ok = TRUE)) { |
201 | 635x |
checkmate::assert_list(formats_in, null.ok = TRUE) |
202 |
# Or it may be a vector of characters |
|
203 |
} else { |
|
204 | 1x |
checkmate::assert_character(formats_in, null.ok = TRUE) |
205 |
} |
|
206 | 636x |
checkmate::assert_list(levels_per_stats, null.ok = TRUE) |
207 | ||
208 |
# If unnamed formats given as formats_in and same number of stats, use one format per stat |
|
209 |
if ( |
|
210 | 636x |
!is.null(formats_in) && |
211 | 636x |
length(formats_in) == length(stats) && |
212 | 636x |
is.null(names(formats_in)) && |
213 | 636x |
is.null(levels_per_stats) |
214 |
) { |
|
215 | ! |
out <- as.list(formats_in) |> stats::setNames(stats) |
216 | ! |
return(out) |
217 |
} |
|
218 | ||
219 |
# If levels_per_stats not given, assume one row per statistic |
|
220 | 153x |
if (is.null(levels_per_stats)) levels_per_stats <- as.list(stats) |> stats::setNames(stats) |
221 | ||
222 |
# Apply custom formats |
|
223 | 636x |
out <- .fill_in_vals_by_stats(levels_per_stats, formats_in, tern_defaults) |
224 | ||
225 |
# Default to NULL if no format |
|
226 | 636x |
case_input_is_not_stat <- unlist(out, use.names = FALSE) == unlist(levels_per_stats, use.names = FALSE) |
227 | 636x |
out[names(out) == out | case_input_is_not_stat] <- list(NULL) |
228 | ||
229 | 636x |
out |
230 |
} |
|
231 | ||
232 |
#' @describeIn tern_default_stats_formats_labels Get labels corresponding to a list of statistics. |
|
233 |
#' @keywords internal |
|
234 |
tern_get_labels_from_stats <- function( |
|
235 |
stats, |
|
236 |
labels_in = NULL, |
|
237 |
levels_per_stats = NULL, |
|
238 |
label_attr_from_stats = NULL, |
|
239 |
tern_defaults = tern_default_labels) { |
|
240 | 596x |
checkmate::assert_character(stats, min.len = 1) |
241 | ||
242 |
# Modification: |
|
243 |
# If any label_attr_from_stats is available and valid, save in labels_in |
|
244 |
# if not specified there already (so don't overwrite labels_in). |
|
245 | 596x |
if (!is.null(label_attr_from_stats)) { |
246 | 148x |
valid_label_attr_from_stats <- label_attr_from_stats[ |
247 | 148x |
nzchar(label_attr_from_stats) & |
248 | 148x |
!sapply(label_attr_from_stats, is.null) & |
249 | 148x |
!is.na(label_attr_from_stats) |
250 |
] |
|
251 | 148x |
if (length(valid_label_attr_from_stats)) { |
252 | 136x |
do_save <- setdiff( |
253 | 136x |
names(valid_label_attr_from_stats), |
254 | 136x |
names(labels_in) |
255 |
) |
|
256 | 136x |
labels_in <- c(labels_in, valid_label_attr_from_stats[do_save]) |
257 |
} |
|
258 |
} |
|
259 | ||
260 |
# It may be a list |
|
261 | 596x |
if (checkmate::test_list(labels_in, null.ok = TRUE)) { |
262 | 571x |
checkmate::assert_list(labels_in, null.ok = TRUE) |
263 |
# Or it may be a vector of characters |
|
264 |
} else { |
|
265 | 25x |
checkmate::assert_character(labels_in, null.ok = TRUE) |
266 |
} |
|
267 | 596x |
checkmate::assert_list(levels_per_stats, null.ok = TRUE) |
268 | ||
269 |
# If unnamed labels given as labels_in and same number of stats, use one label per stat |
|
270 |
if ( |
|
271 | 596x |
!is.null(labels_in) && |
272 | 596x |
length(labels_in) == length(stats) && |
273 | 596x |
is.null(names(labels_in)) && |
274 | 596x |
is.null(levels_per_stats) |
275 |
) { |
|
276 | ! |
out <- as.list(labels_in) |> stats::setNames(stats) |
277 | ! |
return(out) |
278 |
} |
|
279 | ||
280 |
# If levels_per_stats not given, assume one row per statistic |
|
281 | 156x |
if (is.null(levels_per_stats)) levels_per_stats <- as.list(stats) |> stats::setNames(stats) |
282 | ||
283 |
# Apply custom labels |
|
284 | 596x |
out <- .fill_in_vals_by_stats(levels_per_stats, labels_in, tern_defaults) |
285 | 596x |
out |
286 |
} |
|
287 | ||
288 |
# Function to loop over each stat and levels to set correct values |
|
289 |
.fill_in_vals_by_stats <- function(levels_per_stats, user_in, tern_defaults) { |
|
290 | 1843x |
out <- list() |
291 | ||
292 | 1843x |
for (stat_i in names(levels_per_stats)) { |
293 |
# Get all levels of the statistic |
|
294 | 3230x |
all_lvls <- levels_per_stats[[stat_i]] |
295 | ||
296 | 3230x |
if ((length(all_lvls) == 1 && all_lvls == stat_i) || is.null(all_lvls)) { |
297 |
# One row per statistic |
|
298 | 1946x |
out[[stat_i]] <- if (stat_i %in% names(user_in)) { |
299 |
# 1. Check for stat_i in user input |
|
300 | 433x |
user_in[[stat_i]] |
301 | 1946x |
} else if (stat_i %in% names(tern_defaults)) { |
302 |
# 2. Check for stat_i in tern defaults |
|
303 | 1504x |
tern_defaults[[stat_i]] |
304 |
} else { |
|
305 |
# 3. Otherwise stat_i |
|
306 | 9x |
stat_i |
307 |
} |
|
308 |
} else { |
|
309 |
# One row per combination of variable level and statistic |
|
310 |
# Loop over levels for each statistic |
|
311 | 1284x |
for (lev_i in all_lvls) { |
312 |
# Construct row name (stat_i.lev_i) |
|
313 | 7115x |
row_nm <- paste(stat_i, lev_i, sep = ".") |
314 | ||
315 | 7115x |
out[[row_nm]] <- if (row_nm %in% names(user_in)) { |
316 |
# 1. Check for stat_i.lev_i in user input |
|
317 | 1x |
user_in[[row_nm]] |
318 | 7115x |
} else if (lev_i %in% names(user_in)) { |
319 |
# 2. Check for lev_i in user input |
|
320 | 5x |
user_in[[lev_i]] |
321 | 7115x |
} else if (stat_i %in% names(user_in)) { |
322 |
# 3. Check for stat_i in user input |
|
323 | 1x |
user_in[[stat_i]] |
324 | 7115x |
} else if (lev_i %in% names(tern_defaults)) { |
325 |
# 4. Check for lev_i in tern defaults (only used for labels) |
|
326 | ! |
tern_defaults[[lev_i]] |
327 | 7115x |
} else if (stat_i %in% names(tern_defaults)) { |
328 |
# 5. Check for stat_i in tern defaults |
|
329 | 4712x |
tern_defaults[[stat_i]] |
330 |
} else { |
|
331 |
# 6. Otherwise lev_i |
|
332 | 2396x |
lev_i |
333 |
} |
|
334 |
} |
|
335 |
} |
|
336 |
} |
|
337 | ||
338 | 1843x |
out |
339 |
} |
|
340 | ||
341 |
#' @describeIn tern_default_stats_formats_labels Get row indent modifiers corresponding to a list of statistics/rows. |
|
342 |
#' @keywords internal |
|
343 |
tern_get_indents_from_stats <- function(stats, |
|
344 |
indents_in = NULL, |
|
345 |
levels_per_stats = NULL, |
|
346 |
tern_defaults = as.list(rep(0L, length(stats))) |> stats::setNames(stats)) { |
|
347 | 630x |
checkmate::assert_character(stats, min.len = 1) |
348 |
# It may be a list |
|
349 | 630x |
if (checkmate::test_list(indents_in, null.ok = TRUE)) { |
350 | 598x |
checkmate::assert_list(indents_in, null.ok = TRUE) |
351 |
# Or it may be a vector of integers |
|
352 |
} else { |
|
353 | 32x |
checkmate::assert_integerish(indents_in, null.ok = TRUE) |
354 |
} |
|
355 | 630x |
checkmate::assert_list(levels_per_stats, null.ok = TRUE) |
356 | ||
357 |
# If levels_per_stats not given, assume one row per statistic |
|
358 | 152x |
if (is.null(levels_per_stats)) levels_per_stats <- as.list(stats) |> stats::setNames(stats) |
359 | ||
360 |
# Single indentation level for all rows |
|
361 | 630x |
if (is.null(names(indents_in)) && length(indents_in) == 1) { |
362 | 19x |
out <- rep(indents_in, length(levels_per_stats |> unlist())) |
363 | 19x |
return(out) |
364 |
} |
|
365 | ||
366 |
# Apply custom indentation |
|
367 | 611x |
out <- .fill_in_vals_by_stats(levels_per_stats, indents_in, tern_defaults) |
368 | 611x |
out |
369 |
} |
|
370 | ||
371 |
# tern_default_labels ---- |
|
372 | ||
373 |
#' @describeIn tern_default_stats_formats_labels Named `character` vector of default labels for `tern`. |
|
374 |
#' This is only copied here from the latest GitHub version, because otherwise a tern test fails. |
|
375 |
#' |
|
376 |
#' @keywords internal |
|
377 |
tern_default_labels <- c( |
|
378 |
cv = "CV (%)", |
|
379 |
iqr = "IQR", |
|
380 |
geom_cv = "CV % Geometric Mean", |
|
381 |
geom_mean = "Geometric Mean", |
|
382 |
geom_mean_sd = "Geometric Mean (SD)", |
|
383 |
geom_mean_ci = "Geometric Mean 95% CI", |
|
384 |
geom_mean_ci_3d = "Geometric Mean (95% CI)", |
|
385 |
geom_sd = "Geometric SD", |
|
386 |
mad = "Median Absolute Deviation", |
|
387 |
max = "Maximum", |
|
388 |
mean = "Mean", |
|
389 |
mean_ci = "Mean 95% CI", |
|
390 |
mean_ci_3d = "Mean (95% CI)", |
|
391 |
mean_pval = "Mean p-value (H0: mean = 0)", |
|
392 |
mean_sd = "Mean (SD)", |
|
393 |
mean_sdi = "Mean -/+ 1xSD", |
|
394 |
mean_se = "Mean (SE)", |
|
395 |
mean_sei = "Mean -/+ 1xSE", |
|
396 |
median = "Median", |
|
397 |
median_ci = "Median 95% CI", |
|
398 |
median_ci_3d = "Median (95% CI)", |
|
399 |
median_range = "Median (Min - Max)", |
|
400 |
min = "Minimum", |
|
401 |
n = "n", |
|
402 |
n_blq = "n_blq", |
|
403 |
nonunique = "Number of events", |
|
404 |
pval = "p-value (t-test)", # Default for numeric |
|
405 |
pval_counts = "p-value (chi-squared test)", # Default for counts |
|
406 |
quantiles = "25% and 75%-ile", |
|
407 |
quantiles_lower = "25%-ile (95% CI)", |
|
408 |
quantiles_upper = "75%-ile (95% CI)", |
|
409 |
range = "Min - Max", |
|
410 |
range_censor = "Range (censored)", |
|
411 |
range_event = "Range (event)", |
|
412 |
rate = "Adjusted Rate", |
|
413 |
rate_ratio = "Adjusted Rate Ratio", |
|
414 |
sd = "SD", |
|
415 |
se = "SE", |
|
416 |
sum = "Sum", |
|
417 |
unique = "Number of patients with at least one event" |
|
418 |
) |
1 |
#' @title Function factory for xx style formatting |
|
2 |
#' @description A function factory to generate formatting functions for value |
|
3 |
#' formatting that support the xx style format and control the rounding method |
|
4 |
#' |
|
5 |
#' @param roundmethod (`string`)\cr choice of rounding methods. Options are: |
|
6 |
#' * `sas`: the underlying rounding method is `tidytlg::roundSAS`, where \cr |
|
7 |
#' roundSAS comes from this Stack Overflow post https://stackoverflow.com/questions/12688717/round-up-from-5 |
|
8 |
#' * `iec`: the underlying rounding method is `round` |
|
9 |
#' |
|
10 |
#' @param na_str_dflt Character to represent NA value |
|
11 |
#' @param replace_na_dflt logical(1). Should an `na_string` of "NA" within |
|
12 |
#' the formatters framework be overridden by `na_str_default`? Defaults to |
|
13 |
#' `TRUE`, as a way to have a different default na string behavior from the |
|
14 |
#' base `formatters` framework. |
|
15 |
#' @return `format_xx_fct()` format function that can be used in rtables formatting calls |
|
16 |
#' @export |
|
17 |
#' |
|
18 |
#' @family JJCS formats |
|
19 |
#' @examples |
|
20 |
#' jjcsformat_xx_SAS <- format_xx_fct(roundmethod = "sas") |
|
21 |
#' jjcsformat_xx <- jjcsformat_xx_SAS |
|
22 |
#' rcell(c(1.453), jjcsformat_xx("xx.xx")) |
|
23 |
#' rcell(c(), jjcsformat_xx("xx.xx")) |
|
24 |
#' rcell(c(1.453, 2.45638), jjcsformat_xx("xx.xx (xx.xxx)")) |
|
25 |
#' |
|
26 |
format_xx_fct <- function(roundmethod = c("sas", "iec"), na_str_dflt = "NE", |
|
27 |
replace_na_dflt = TRUE) { |
|
28 | ! |
roundmethod <- match.arg(roundmethod) |
29 | ||
30 | ! |
if (roundmethod == "sas") { |
31 | ! |
roundfunc <- tidytlg::roundSAS |
32 |
} else { |
|
33 | ! |
roundfunc <- round |
34 |
} |
|
35 | ||
36 | ! |
fnct <- function(str, na_str = na_str_dflt) { |
37 | 450x |
if (grepl("xxx.", str, fixed = TRUE)) { |
38 | ! |
stop( |
39 | ! |
"Error: jjcs_format_xx: do not use xxx. in input str, replace by xx. instead." |
40 |
) |
|
41 |
} |
|
42 | 450x |
if (!grepl("xx", str, fixed = TRUE)) { |
43 | 1x |
stop("Error: jjcs_format_xx: input str must contain xx.") |
44 |
} |
|
45 | 449x |
positions <- gregexpr( |
46 | 449x |
pattern = "xx\\.?x*", |
47 | 449x |
text = str, |
48 | 449x |
perl = TRUE |
49 |
) |
|
50 | 449x |
x_positions <- regmatches(x = str, m = positions)[[1]] |
51 |
### str is splitted into pieces as xx. xx xx.xxx |
|
52 |
### xx is no rounding |
|
53 |
### xx. rounding to integer |
|
54 |
### xx.x rounding to 1 decimal, etc |
|
55 | ||
56 | 449x |
no_round <- function(x, na_str = na_str_dflt) { |
57 | 23x |
if (is.na(x)) { |
58 | ! |
return(na_str) |
59 |
} else { |
|
60 | 23x |
return(x) |
61 |
} |
|
62 |
} |
|
63 | ||
64 | 449x |
roundings <- lapply(X = x_positions, function(x) { |
65 | 609x |
y <- strsplit(split = "\\.", x = x)[[1]] |
66 |
### "xx.x" will result in c("xx","x") |
|
67 |
### "xx." will result in "xx" |
|
68 |
### "xx" will remain "xx" |
|
69 | ||
70 | 609x |
if (x == "xx") { |
71 | 34x |
rounding <- no_round |
72 |
} else { |
|
73 | 575x |
rounding <- function(x, na_str = na_str_dflt) { |
74 | 1172x |
if (is.na(x)) { |
75 | 25x |
return(na_str) |
76 |
} else { |
|
77 | 1147x |
format( |
78 | 1147x |
roundfunc(x, digits = ifelse(length(y) > 1, nchar(y[2]), 0)), |
79 | 1147x |
nsmall = ifelse(length(y) > 1, nchar(y[2]), 0) |
80 |
) |
|
81 |
} |
|
82 |
} |
|
83 |
} |
|
84 | 609x |
return(rounding) |
85 |
}) |
|
86 | 449x |
rtable_format <- function(x, output, na_str = na_str_dflt) { |
87 | 876x |
if (anyNA(na_str) || (replace_na_dflt && any(na_str == "NA"))) { |
88 | 290x |
na_inds <- which(is.na(na_str) | (replace_na_dflt & na_str == "NA")) |
89 | 290x |
na_str[na_inds] <- rep(na_str_dflt, length.out = length(na_str))[na_inds] |
90 |
} |
|
91 | 876x |
if (length(x) == 0 || isTRUE(all(x == ""))) { |
92 | ! |
return(NULL) |
93 | 876x |
} else if (!length(positions[[1]]) == length(x)) { |
94 | 3x |
stop( |
95 | 3x |
"Error: input str in call to jjcs_format_xx must contain same number of xx as the number of stats." |
96 |
) |
|
97 |
} |
|
98 | ||
99 | 873x |
values <- Map(y = x, fun = roundings, na_str = na_str, function(y, fun, na_str) fun(y, na_str = na_str)) |
100 | 873x |
regmatches(x = str, m = positions)[[1]] <- values |
101 | 873x |
return(str) |
102 |
} |
|
103 | 449x |
return(rtable_format) |
104 |
} |
|
105 | ! |
return(fnct) |
106 |
} |
|
107 | ||
108 | ||
109 |
jjcsformat_xx_SAS <- format_xx_fct(roundmethod = "sas") |
|
110 |
jjcsformat_xx_R <- format_xx_fct(roundmethod = "iec") |
|
111 | ||
112 | ||
113 |
### if we ever decide to switch rounding method, we just have to update jjcsformat_xx here |
|
114 | ||
115 |
#' @title Formatting of values |
|
116 |
#' @name jjcsformat_xx |
|
117 |
#' @description jjcs formatting function |
|
118 |
#' @param str The formatting that is required specified as a text string, eg "xx.xx" |
|
119 |
#' @param na_str character. Na string that will be passed from `formatters` into |
|
120 |
#' our formatting functions. |
|
121 |
#' @return a formatting function with `"sas"`-style rounding. |
|
122 |
#' @export |
|
123 |
jjcsformat_xx <- jjcsformat_xx_SAS |
|
124 | ||
125 |
#' @name count_fraction |
|
126 |
#' @title Formatting count and fraction values |
|
127 |
#' |
|
128 |
#' @description |
|
129 |
#' |
|
130 |
#' Formats a count together with fraction (and/or denominator) with special |
|
131 |
#' consideration when count is 0, or fraction is 1. |
|
132 |
#' \cr See also: tern::format_count_fraction_fixed_dp() |
|
133 |
#' |
|
134 |
#' @inheritParams format_xx_fct |
|
135 |
#' @param x `numeric`\cr with elements `num` and `fraction` or `num`, `denom` and `fraction`. |
|
136 |
#' @param d numeric(1). Number of digits to round fraction to (default=1) |
|
137 |
#' @param ... Additional arguments passed to other methods. |
|
138 |
#' @return A string in the format `count / denom (ratio percent)`. If `count` |
|
139 |
#' is 0, the format is `0`. If fraction is >0.99, the format is |
|
140 |
#' `count / denom (>99.9 percent)` |
|
141 |
#' @family JJCS formats |
|
142 |
#' @rdname count_fraction |
|
143 |
#' @export |
|
144 |
#' @examples |
|
145 |
#' jjcsformat_count_fraction(c(7, 0.7)) |
|
146 |
#' jjcsformat_count_fraction(c(70000, 0.9999999)) |
|
147 |
#' jjcsformat_count_fraction(c(70000, 1)) |
|
148 |
#' |
|
149 |
jjcsformat_count_fraction <- function( |
|
150 |
x, |
|
151 |
d = 1, |
|
152 |
roundmethod = c("sas", "iec"), |
|
153 |
...) { |
|
154 | 25x |
roundmethod <- match.arg(roundmethod) |
155 | 25x |
attr(x, "label") <- NULL |
156 | 25x |
if (any(is.na(x))) { |
157 | ! |
return("-") |
158 |
} |
|
159 | ||
160 | 25x |
checkmate::assert_vector(x) |
161 | 25x |
checkmate::assert_integerish(x[1]) |
162 | 25x |
assert_proportion_value( |
163 | 25x |
x[2], |
164 | 25x |
include_boundaries = TRUE |
165 |
) |
|
166 | ||
167 | 25x |
fraction <- x[2] |
168 | ||
169 | ! |
if (isTRUE(all.equal(fraction, 1))) fraction <- 1 |
170 | ||
171 | 25x |
if (roundmethod == "sas") { |
172 | 24x |
fmtpct <- format(tidytlg::roundSAS(fraction * 100, d), nsmall = d) |
173 |
} else { |
|
174 | 1x |
fmtpct <- format(round(fraction * 100, d), nsmall = d) |
175 |
} |
|
176 | ||
177 | 25x |
result <- if (x[1] == 0) { |
178 | ! |
"0" |
179 | 25x |
} else if (fraction == 1) { |
180 |
## per conventions still report as 100.0% |
|
181 | ! |
paste0(x[1], " (100.0%)") |
182 | 25x |
} else if (fmtpct == format(0, nsmall = d)) { |
183 |
# else if (100*x[2] < 10**(-d)) { |
|
184 |
### example pct = 0.09999 ### <0.1% (even if fmtpct == 0.1, |
|
185 |
# but the actual value of pct <0.1) |
|
186 | ! |
paste0(x[1], " (<", 10**(-d), "%)") |
187 | 25x |
} else if (fmtpct == format(100, nsmall = d)) { |
188 |
# else if (100*x[2] > 100-10**(-d)) { |
|
189 |
### example pct = 99.90001 ### >99.9% (even if fmtpct == 99.9, |
|
190 |
# but the actual value of pct >99.9) |
|
191 | ! |
paste0(x[1], " (>", 100 - 10**(-d), "%)") |
192 |
} else { |
|
193 | 25x |
paste0(x[1], " (", fmtpct, "%)") |
194 |
} |
|
195 | 25x |
return(result) |
196 |
} |
|
197 | ||
198 |
#' @title Formatting count, denominator and fraction values |
|
199 |
#' |
|
200 |
#' @inheritParams count_fraction |
|
201 |
#' @param ... Additional arguments passed to other methods. |
|
202 |
#' @export |
|
203 |
#' @rdname count_denom_fraction |
|
204 |
#' @return `x`, formatted into a string with the appropriate |
|
205 |
#' format and `d` digits of precision. |
|
206 |
#' @examples |
|
207 |
#' jjcsformat_count_denom_fraction(c(7, 10, 0.7)) |
|
208 |
#' jjcsformat_count_denom_fraction(c(70000, 70001, 70000 / 70001)) |
|
209 |
#' jjcsformat_count_denom_fraction(c(235, 235, 235 / 235)) |
|
210 |
jjcsformat_count_denom_fraction <- function( |
|
211 |
x, |
|
212 |
d = 1, |
|
213 |
roundmethod = c("sas", "iec"), |
|
214 |
...) { |
|
215 | 313x |
roundmethod <- match.arg(roundmethod) |
216 | 313x |
attr(x, "label") <- NULL |
217 | 313x |
if (any(is.na(x))) { |
218 | 55x |
return("-") |
219 |
} |
|
220 | 258x |
checkmate::assert_vector(x) |
221 | 258x |
checkmate::assert_integerish(x[1]) |
222 | 258x |
assert_proportion_value( |
223 | 258x |
x[3], |
224 | 258x |
include_boundaries = TRUE |
225 |
) |
|
226 | ||
227 | 258x |
fraction <- x[3] |
228 | 107x |
if (x[2] == x[1]) fraction <- 1 |
229 | ||
230 | 258x |
fmt_x12 <- paste0(x[1], "/", x[2]) |
231 | ||
232 | 258x |
if (roundmethod == "sas") { |
233 | 257x |
fmtpct <- format(tidytlg::roundSAS(fraction * 100, d), nsmall = d) |
234 |
} else { |
|
235 | 1x |
fmtpct <- format(round(fraction * 100, d), nsmall = d) |
236 |
} |
|
237 | ||
238 | 258x |
result <- if (x[1] == 0) { |
239 |
# "0" |
|
240 |
# same as in general situation |
|
241 | 7x |
paste0(fmt_x12, " (", fmtpct, "%)") |
242 | 258x |
} else if (100 * fraction == 100) { |
243 | 107x |
paste0(fmt_x12, " (100.0%)") |
244 | 258x |
} else if (100 * fraction < 10**(-d)) { |
245 |
### example pct = 0.09999 ### <0.1% (even if fmtpct == 0.1, but the actual value of pct <0.1) |
|
246 | 1x |
paste0(fmt_x12, " (<", 10**(-d), "%)") |
247 | 258x |
} else if (100 * fraction > 100 - 10**(-d)) { |
248 |
### example pct = 99.90001 ### >99.9% (even if fmtpct == 99.9, but the actual value of pct >99.9) |
|
249 | 1x |
paste0(fmt_x12, " (>", 100 - 10**(-d), "%)") |
250 |
} else { |
|
251 | 142x |
paste0(fmt_x12, " (", fmtpct, "%)") |
252 |
} |
|
253 | 258x |
return(result) |
254 |
} |
|
255 | ||
256 |
#' @title Formatting fraction, count and denominator values |
|
257 |
#' |
|
258 |
#' @details |
|
259 |
#' Formats a 3-dimensional value such that percent values |
|
260 |
#' near 0 or 100% are formatted as .e.g, `"<0.1%"` and |
|
261 |
#' `">99.9%"`, where the cutoff is controlled by `d`, and |
|
262 |
#' formatted as `"xx.x% (xx/xx)"` otherwise, with the |
|
263 |
#' precision of the percent also controlled by `d`. |
|
264 |
#' |
|
265 |
#' @inheritParams count_fraction |
|
266 |
#' @param ... Additional arguments passed to other methods. |
|
267 |
#' @export |
|
268 |
#' @rdname fraction_count_denom |
|
269 |
#' @return `x` formatted as a string with `d` digits of precision, |
|
270 |
#' with special cased values as described in Details above. |
|
271 |
#' @examples |
|
272 |
#' jjcsformat_fraction_count_denom(c(7, 10, 0.7)) |
|
273 |
#' jjcsformat_fraction_count_denom(c(70000, 70001, 70000 / 70001)) |
|
274 |
#' jjcsformat_fraction_count_denom(c(235, 235, 235 / 235)) |
|
275 |
jjcsformat_fraction_count_denom <- function( |
|
276 |
x, |
|
277 |
d = 1, |
|
278 |
roundmethod = c("sas", "iec"), |
|
279 |
...) { |
|
280 | 2x |
roundmethod <- match.arg(roundmethod) |
281 | 2x |
attr(x, "label") <- NULL |
282 | 2x |
if (any(is.na(x))) { |
283 | ! |
return("-") |
284 |
} |
|
285 | 2x |
checkmate::assert_vector(x) |
286 | 2x |
checkmate::assert_integerish(x[1]) |
287 | 2x |
assert_proportion_value( |
288 | 2x |
x[3], |
289 | 2x |
include_boundaries = TRUE |
290 |
) |
|
291 | ||
292 | 2x |
fraction <- x[3] |
293 | ! |
if (x[2] == x[1]) fraction <- 1 |
294 | ||
295 | 2x |
fmt_x12 <- paste0(x[1], "/", x[2]) |
296 | ||
297 | 2x |
if (roundmethod == "sas") { |
298 | 1x |
fmtpct <- format(tidytlg::roundSAS(fraction * 100, d), nsmall = d) |
299 |
} else { |
|
300 | 1x |
fmtpct <- format(round(fraction * 100, d), nsmall = d) |
301 |
} |
|
302 | ||
303 | 2x |
result <- if (x[1] == 0) { |
304 |
# "0" |
|
305 |
# same as in general situation |
|
306 | ! |
paste0("(", fmt_x12, ")") |
307 | 2x |
} else if (100 * fraction == 100) { |
308 | ! |
paste0("100.0%", " (", fmt_x12, ")") |
309 | 2x |
} else if (100 * fraction < 10**(-d)) { |
310 |
### example pct = 0.09999 ### <0.1% (even if fmtpct == 0.1, but the actual value of pct <0.1) |
|
311 | ! |
paste0("<", 10**(-d), "%", " (", fmt_x12, ")") |
312 | 2x |
} else if (100 * fraction > 100 - 10**(-d)) { |
313 |
### example pct = 99.90001 ### >99.9% (even if fmtpct == 99.9, but the actual value of pct >99.9) |
|
314 | ! |
paste0(">", 100 - 10**(-d), "%", " (", fmt_x12, ")") |
315 |
} else { |
|
316 | 2x |
paste0(fmtpct, "%", " (", fmt_x12, ")") |
317 |
} |
|
318 | 2x |
return(result) |
319 |
} |
|
320 | ||
321 |
#' @title Function factory for p-value formatting |
|
322 |
#' |
|
323 |
#' @description A function factory to generate formatting functions for p-value |
|
324 |
#' formatting that support rounding close to the significance level specified |
|
325 |
#' |
|
326 |
#' @param alpha `number`\cr the significance level to account for during rounding. |
|
327 |
#' @return The p-value in the standard format. If `count` is 0, the format is `0`. |
|
328 |
#' If it is smaller than 0.001, then `<0.001`, if it is larger than 0.999, then |
|
329 |
#' `>0.999` is returned. Otherwise, 3 digits are used. In the special case that |
|
330 |
#' rounding from below would make the string equal to the specified `alpha`, |
|
331 |
#' then a higher number of digits is used to be able to still see the difference. |
|
332 |
#' For example, 0.0048 is not rounded to 0.005 but stays at 0.0048 if `alpha = 0.005` |
|
333 |
#' is set. |
|
334 |
#' |
|
335 |
#' @family JJCS formats |
|
336 |
#' @export |
|
337 |
#' |
|
338 |
#' @examples |
|
339 |
#' my_pval_format <- jjcsformat_pval_fct(0.005) |
|
340 |
#' my_pval_format(0.2802359) |
|
341 |
#' my_pval_format(0.0048) |
|
342 |
#' my_pval_format(0.00499) |
|
343 |
#' my_pval_format(0.004999999) |
|
344 |
#' my_pval_format(0.0051) |
|
345 |
#' my_pval_format(0.0009) |
|
346 |
#' my_pval_format(0.9991) |
|
347 |
#' |
|
348 |
jjcsformat_pval_fct <- function(alpha = 0.05) { |
|
349 | 26x |
checkmate::assert_number(alpha, lower = 0, upper = 1) |
350 | ||
351 | 26x |
function(x, ...) { |
352 | 114x |
checkmate::assert_number( |
353 | 114x |
x, |
354 | 114x |
lower = 0, |
355 | 114x |
upper = 1 + .Machine$double.eps, # Be a bit tolerant here. |
356 | 114x |
na.ok = TRUE |
357 |
) |
|
358 | 114x |
if (is.na(x)) { |
359 | ! |
"NE" |
360 | 114x |
} else if (x < 0.001) { |
361 | 34x |
"<0.001" |
362 | 80x |
} else if (x > 0.999) { |
363 | 3x |
">0.999" |
364 |
} else { |
|
365 | 77x |
xx_format <- "xx.xxx" |
366 | 77x |
res <- jjcsformat_xx(xx_format)(x) |
367 | 77x |
while (as.numeric(res) == alpha && x < alpha) { |
368 |
# Increase precision by 1 digit until the result |
|
369 |
# is different from threshold alpha. |
|
370 | 3x |
xx_format <- paste0(xx_format, "x") |
371 | 3x |
res <- jjcsformat_xx(xx_format)(x) |
372 |
} |
|
373 | 77x |
res |
374 |
} |
|
375 |
} |
|
376 |
} |
|
377 | ||
378 |
#' @title Function factory for range with censoring information formatting |
|
379 |
#' @description A function factory to generate formatting functions for range formatting |
|
380 |
#' that includes information about the censoring of survival times. |
|
381 |
#' |
|
382 |
#' @param str `string`\cr the format specifying the number of digits to be used, |
|
383 |
#' for the range values, e.g. `"xx.xx"`. |
|
384 |
#' @return A function that formats a numeric vector with 4 elements: |
|
385 |
#' - minimum |
|
386 |
#' - maximum |
|
387 |
#' - censored minimum? (1 if censored, 0 if event) |
|
388 |
#' - censored maximum? (1 if censored, 0 if event) |
|
389 |
#' The range along with the censoring information is returned as a string |
|
390 |
#' with the specified numeric format as `(min, max)`, and the `+` is appended |
|
391 |
#' to `min` or `max` if these have been censored. |
|
392 |
#' |
|
393 |
#' @family JJCS formats |
|
394 |
#' @export |
|
395 |
#' |
|
396 |
#' @examples |
|
397 |
#' my_range_format <- jjcsformat_range_fct("xx.xx") |
|
398 |
#' my_range_format(c(0.35235, 99.2342, 1, 0)) |
|
399 |
#' my_range_format(c(0.35235, 99.2342, 0, 1)) |
|
400 |
#' my_range_format(c(0.35235, 99.2342, 0, 0)) |
|
401 |
#' my_range_format(c(0.35235, 99.2342, 1, 1)) |
|
402 |
jjcsformat_range_fct <- function(str) { |
|
403 | 1x |
format_xx <- jjcsformat_xx(str) |
404 | ||
405 | 1x |
function(x, ...) { |
406 | 4x |
checkmate::assert_numeric( |
407 | 4x |
x, |
408 | 4x |
len = 4L, |
409 | 4x |
finite = TRUE, |
410 | 4x |
any.missing = FALSE |
411 |
) |
|
412 | 4x |
checkmate::assert_true(all(x[c(3, 4)] %in% c(0, 1))) |
413 | ||
414 | 4x |
res <- vapply(x[c(1, 2)], format_xx, character(1)) |
415 | 2x |
if (x[3] == 1) res[1] <- paste0(res[1], "+") |
416 | 2x |
if (x[4] == 1) res[2] <- paste0(res[2], "+") |
417 | 4x |
paste0("(", res[1], ", ", res[2], ")") |
418 |
} |
|
419 |
} |
1 |
#' Relative risk estimation |
|
2 |
#' |
|
3 |
#' The analysis function [a_relative_risk()] is used to create a layout element |
|
4 |
#' to estimate the relative risk for response within a studied population. Only |
|
5 |
#' the CMH method is available currently. |
|
6 |
#' The primary analysis variable, `vars`, is a logical variable indicating |
|
7 |
#' whether a response has occurred for each record. |
|
8 |
#' A stratification variable must be supplied via the |
|
9 |
#' `strata` element of the `variables` argument. |
|
10 |
#' |
|
11 |
#' @details The variance of the CMH relative risk estimate is calculated using |
|
12 |
#' the Greenland and Robins (1985) variance estimation. |
|
13 |
#' |
|
14 |
#' @param df (`data.frame`)\cr input data frame. |
|
15 |
#' @param .var (`string`)\cr name of the response variable. |
|
16 |
#' @param ref_path (`character`)\cr path to the reference group. |
|
17 |
#' @param .spl_context (`environment`)\cr split context environment. |
|
18 |
#' @param ... Additional arguments passed to the statistics function. |
|
19 |
#' @param .stats (`character`)\cr statistics to calculate. |
|
20 |
#' @param .formats (`list`)\cr formats for the statistics. |
|
21 |
#' @param .labels (`list`)\cr labels for the statistics. |
|
22 |
#' @param .indent_mods (`list`)\cr indentation modifications for the statistics. |
|
23 |
#' @param .ref_group (`data.frame`)\cr reference group data frame. |
|
24 |
#' @param .in_ref_col (`logical`)\cr whether the current column is the reference column. |
|
25 |
#' @param variables (`list`)\cr list with strata variable names. |
|
26 |
#' @param conf_level (`numeric`)\cr confidence level for the confidence interval. |
|
27 |
#' @param method (`string`)\cr method to use for relative risk calculation. |
|
28 |
#' @param weights_method (`string`)\cr method to use for weights calculation in stratified analysis. |
|
29 |
#' |
|
30 |
#' @note This has been adapted from the `odds_ratio` functions in the `tern` package. |
|
31 |
#' |
|
32 |
#' @name relative_risk |
|
33 |
NULL |
|
34 | ||
35 |
#' @describeIn relative_risk Statistics function estimating the relative risk for response. |
|
36 |
#' |
|
37 |
#' @return |
|
38 |
#' * `s_relative_risk()` returns a named list of elements `rel_risk_ci` and `pval`. |
|
39 |
#' |
|
40 |
#' @examples |
|
41 |
#' nex <- 100 |
|
42 |
#' dta <- data.frame( |
|
43 |
#' "rsp" = sample(c(TRUE, FALSE), nex, TRUE), |
|
44 |
#' "grp" = sample(c("A", "B"), nex, TRUE), |
|
45 |
#' "f1" = sample(c("a1", "a2"), nex, TRUE), |
|
46 |
#' "f2" = sample(c("x", "y", "z"), nex, TRUE), |
|
47 |
#' stringsAsFactors = TRUE |
|
48 |
#' ) |
|
49 |
#' |
|
50 |
#' s_relative_risk( |
|
51 |
#' df = subset(dta, grp == "A"), |
|
52 |
#' .var = "rsp", |
|
53 |
#' .ref_group = subset(dta, grp == "B"), |
|
54 |
#' .in_ref_col = FALSE, |
|
55 |
#' variables = list(strata = c("f1", "f2")), |
|
56 |
#' conf_level = 0.90 |
|
57 |
#' ) |
|
58 |
#' @export |
|
59 |
s_relative_risk <- function( |
|
60 |
df, |
|
61 |
.var, |
|
62 |
.ref_group, |
|
63 |
.in_ref_col, |
|
64 |
variables = list(strata = NULL), |
|
65 |
conf_level = 0.95, |
|
66 |
method = "cmh", |
|
67 |
weights_method = "cmh") { |
|
68 | 3x |
method <- match.arg(method) |
69 | 3x |
weights_method <- match.arg(weights_method) |
70 | 3x |
checkmate::assert_character(variables$strata, null.ok = FALSE) |
71 | 3x |
y <- list(rel_risk_ci = list(), pval = list()) |
72 | ||
73 | 3x |
if (!.in_ref_col) { |
74 | 2x |
rsp <- c(.ref_group[[.var]], df[[.var]]) |
75 | 2x |
grp <- factor(rep(c("ref", "Not-ref"), c(nrow(.ref_group), nrow(df))), levels = c("ref", "Not-ref")) |
76 | ||
77 | 2x |
strata_colnames <- variables$strata |
78 | 2x |
checkmate::assert_character(strata_colnames, null.ok = FALSE) |
79 | 2x |
strata_vars <- stats::setNames(as.list(strata_colnames), strata_colnames) |
80 | ||
81 | 2x |
assert_df_with_variables(df, strata_vars) |
82 | 2x |
assert_df_with_variables(.ref_group, strata_vars) |
83 | ||
84 |
# Merging interaction strata for reference group rows data and remaining |
|
85 | 2x |
strata <- c(interaction(.ref_group[strata_colnames]), interaction(df[strata_colnames])) |
86 | 2x |
strata <- as.factor(strata) |
87 | ||
88 | 2x |
y <- prop_ratio_cmh(rsp, grp, strata, conf_level)[c("rel_risk_ci", "pval")] |
89 | ||
90 | 2x |
one_group_no_events <- (sum(.ref_group[[.var]]) == 0) || (sum(df[[.var]]) == 0) |
91 | 2x |
if (one_group_no_events) { |
92 | 1x |
y$rel_risk_ci <- c(est = NA_real_, lcl = 0, ucl = Inf) |
93 |
} |
|
94 |
} |
|
95 | ||
96 | 3x |
y$rel_risk_ci <- with_label( |
97 | 3x |
x = y$rel_risk_ci, |
98 | 3x |
label = paste0("Relative risk (", f_conf_level(conf_level), ")") |
99 |
) |
|
100 | 3x |
y$pval <- unname(y$pval) |
101 | ||
102 | 3x |
y |
103 |
} |
|
104 | ||
105 |
#' @describeIn relative_risk Formatted analysis function which is used as `afun`. Note that the |
|
106 |
#' junco specific `ref_path` and `.spl_context` arguments are used for reference column information. |
|
107 |
#' |
|
108 |
#' @return |
|
109 |
#' * `a_relative_risk()` returns the corresponding list with formatted [rtables::CellValue()]. |
|
110 |
#' |
|
111 |
#' @examples |
|
112 |
#' nex <- 100 |
|
113 |
#' dta <- data.frame( |
|
114 |
#' "rsp" = sample(c(TRUE, FALSE), nex, TRUE), |
|
115 |
#' "grp" = sample(c("A", "B"), nex, TRUE), |
|
116 |
#' "f1" = sample(c("a1", "a2"), nex, TRUE), |
|
117 |
#' "f2" = sample(c("x", "y", "z"), nex, TRUE), |
|
118 |
#' stringsAsFactors = TRUE |
|
119 |
#' ) |
|
120 |
#' |
|
121 |
#' l <- basic_table() |> |
|
122 |
#' split_cols_by(var = "grp") |> |
|
123 |
#' analyze( |
|
124 |
#' vars = "rsp", |
|
125 |
#' afun = a_relative_risk, |
|
126 |
#' extra_args = list( |
|
127 |
#' conf_level = 0.90, |
|
128 |
#' variables = list(strata = "f1"), |
|
129 |
#' ref_path = c("grp", "B") |
|
130 |
#' ) |
|
131 |
#' ) |
|
132 |
#' |
|
133 |
#' build_table(l, df = dta) |
|
134 |
#' @export |
|
135 |
#' @order 2 |
|
136 |
a_relative_risk <- function( |
|
137 |
df, |
|
138 |
.var, |
|
139 |
ref_path, |
|
140 |
.spl_context, |
|
141 |
..., |
|
142 |
.stats = NULL, |
|
143 |
.formats = NULL, |
|
144 |
.labels = NULL, |
|
145 |
.indent_mods = NULL) { |
|
146 |
# Check for additional parameters to the statistics function |
|
147 | 2x |
dots_extra_args <- list(...) |
148 | ||
149 |
# Only support default stats, not custom stats |
|
150 | 2x |
.stats <- .split_std_from_custom_stats(.stats)$default_stats |
151 | ||
152 |
# Obtain reference column information |
|
153 | 2x |
ref <- get_ref_info(ref_path, .spl_context) |
154 | ||
155 |
# Apply statistics function |
|
156 | 2x |
x_stats <- .apply_stat_functions( |
157 | 2x |
default_stat_fnc = s_relative_risk, |
158 | 2x |
custom_stat_fnc_list = NULL, |
159 | 2x |
args_list = c( |
160 | 2x |
df = list(df), |
161 | 2x |
.var = .var, |
162 | 2x |
.ref_group = list(ref$ref_group), |
163 | 2x |
.in_ref_col = ref$in_ref_col, |
164 | 2x |
dots_extra_args |
165 |
) |
|
166 |
) |
|
167 | ||
168 |
# Format according to specifications |
|
169 | 2x |
format_stats( |
170 | 2x |
x_stats, |
171 | 2x |
method_groups = "relative_risk", |
172 | 2x |
stats_in = .stats, |
173 | 2x |
formats_in = .formats, |
174 | 2x |
labels_in = .labels, |
175 | 2x |
indents_in = .indent_mods |
176 |
) |
|
177 |
} |
|
178 | ||
179 | ||
180 |
#' @keywords internal |
|
181 |
safe_mh_test <- function(...) { |
|
182 | 4x |
tryCatch( |
183 | 4x |
stats::mantelhaen.test(...), |
184 | 4x |
error = function(e) list(p.value = NA_real_) |
185 |
) |
|
186 |
} |
|
187 | ||
188 |
#' Relative Risk CMH Statistic |
|
189 |
#' |
|
190 |
#' Calculates the relative risk which is defined as the ratio between the |
|
191 |
#' response rates between the experimental treatment group and the control treatment group, adjusted |
|
192 |
#' for stratification factors by applying Cochran-Mantel-Haenszel (CMH) weights. |
|
193 |
#' |
|
194 |
#' @inheritParams proposal_argument_convention |
|
195 |
#' @param strata (`factor`)\cr variable with one level per stratum and same length as `rsp`. |
|
196 |
#' |
|
197 |
#' @return a list with elements `rel_risk_ci` and `pval`. |
|
198 |
#' @examples |
|
199 |
#' |
|
200 |
#' set.seed(2) |
|
201 |
#' rsp <- sample(c(TRUE, FALSE), 100, TRUE) |
|
202 |
#' grp <- sample(c("Placebo", "Treatment"), 100, TRUE) |
|
203 |
#' grp <- factor(grp, levels = c("Placebo", "Treatment")) |
|
204 |
#' strata_data <- data.frame( |
|
205 |
#' "f1" = sample(c("a", "b"), 100, TRUE), |
|
206 |
#' "f2" = sample(c("x", "y", "z"), 100, TRUE), |
|
207 |
#' stringsAsFactors = TRUE |
|
208 |
#' ) |
|
209 |
#' |
|
210 |
#' prop_ratio_cmh( |
|
211 |
#' rsp = rsp, grp = grp, strata = interaction(strata_data), |
|
212 |
#' conf_level = 0.90 |
|
213 |
#' ) |
|
214 |
#' |
|
215 |
#' @export |
|
216 |
prop_ratio_cmh <- function(rsp, grp, strata, conf_level = 0.95) { |
|
217 | 4x |
grp <- as_factor_keep_attributes(grp) |
218 | 4x |
strata <- as_factor_keep_attributes(strata) |
219 | 4x |
has_single_stratum <- nlevels(strata) == 1 |
220 | 4x |
check_diff_prop_ci( |
221 | 4x |
rsp = rsp, |
222 | 4x |
grp = grp, |
223 | 4x |
conf_level = conf_level, |
224 | 4x |
strata = strata |
225 |
) |
|
226 | ||
227 | 4x |
if (any(tapply(rsp, strata, length) < 5)) { |
228 | ! |
warning("Less than 5 observations in some strata.") |
229 |
} |
|
230 | ||
231 |
# first dimension: treatment, control - therefore we reverse the levels order 2nd dimension: TRUE, FALSE 3rd |
|
232 |
# dimension: levels of strata rsp as factor rsp to handle edge case of no FALSE (or TRUE) rsp records |
|
233 | 4x |
t_tbl <- table(factor(grp, levels = rev(levels(grp))), factor(rsp, levels = c("TRUE", "FALSE")), strata) |
234 | ||
235 |
# Index 1 is for treatment, index 2 is for control, e.g. n1 is number of treatment patients per stratum. |
|
236 | 4x |
n1resp <- t_tbl[1, 1, levels(strata)] |
237 | 4x |
n2resp <- t_tbl[2, 1, levels(strata)] |
238 | ||
239 | 4x |
t1 <- t_tbl[1, 1:2, , drop = TRUE] |
240 | 4x |
t2 <- t_tbl[2, 1:2, , drop = TRUE] |
241 | 4x |
if (has_single_stratum) { |
242 | ! |
n1 <- sum(t1) |
243 | ! |
n2 <- sum(t2) |
244 | ! |
checkmate::assert_count(n1) |
245 | ! |
checkmate::assert_count(n2) |
246 |
} else { |
|
247 | 4x |
n1 <- colSums(t1) |
248 | 4x |
n2 <- colSums(t2) |
249 | 4x |
checkmate::assert_integerish(n1) |
250 | 4x |
checkmate::assert_integerish(n2) |
251 | 4x |
checkmate::assert_true(identical(length(n1), length(n2))) |
252 |
} |
|
253 | ||
254 |
# CMH statistic for relative risk, Treatment over Control |
|
255 | 4x |
use_stratum <- (n1 > 0) & (n2 > 0) |
256 | 4x |
n1 <- n1[use_stratum] |
257 | 4x |
n2 <- n2[use_stratum] |
258 | 4x |
n1resp <- n1resp[use_stratum] |
259 | 4x |
n2resp <- n2resp[use_stratum] |
260 | 4x |
nresp <- n1resp + n2resp |
261 | 4x |
n <- n1 + n2 |
262 | ||
263 | 4x |
rel_risk_num <- sum(n1resp * n2 / n) |
264 | 4x |
rel_risk_denom <- sum(n2resp * n1 / n) |
265 | 4x |
log_rel_risk <- log(rel_risk_num) - log(rel_risk_denom) |
266 | 4x |
rel_risk <- exp(log_rel_risk) |
267 | ||
268 | 4x |
var_num <- sum((n1 * n2 * nresp - n1resp * n2resp * n) / n^2) |
269 | 4x |
var_denom <- rel_risk_num * rel_risk_denom |
270 | 4x |
var_log_rel_risk <- var_num / var_denom |
271 | 4x |
se_log_rel_risk <- sqrt(var_log_rel_risk) |
272 | ||
273 | 4x |
z <- stats::qnorm((1 + conf_level) / 2) |
274 | 4x |
log_rel_risk_ci <- log_rel_risk + c(-1, +1) * z * se_log_rel_risk |
275 | 4x |
ci <- exp(log_rel_risk_ci) |
276 | 4x |
rel_risk_ci <- stats::setNames(c(rel_risk, ci), c("est", "lcl", "ucl")) |
277 | ||
278 | 4x |
pval <- safe_mh_test(t_tbl, correct = FALSE)$p.value |
279 | ||
280 | 4x |
list(rel_risk_ci = rel_risk_ci, pval = pval) |
281 |
} |
1 |
.onLoad <- function(libname, pkgname) { |
|
2 | ! |
settings <- list( |
3 |
# A named list for tracking table names and counts in `insert_blank_line()` |
|
4 | ! |
junco.insert_blank_line = list() |
5 |
) |
|
6 | ||
7 | ! |
to_set <- !names(settings) %in% names(options()) |
8 | ! |
if (any(to_set)) options(settings[to_set]) |
9 | ! |
invisible() |
10 |
} |
|
11 | ||
12 | ||
13 |
.onUnload <- function(libpath) { |
|
14 | ! |
settings <- list( |
15 | ! |
junco.insert_blank_line = NULL |
16 |
) |
|
17 | ||
18 | ! |
options(settings) |
19 | ! |
invisible() |
20 |
} |
1 |
#' @note This has been forked from [tern::h_ancova()], because the new |
|
2 |
#' `weights_emmeans` option was added here. |
|
3 |
h_ancova <- function( |
|
4 |
.var, |
|
5 |
.df_row, |
|
6 |
variables, |
|
7 |
weights_emmeans, |
|
8 |
interaction_item = NULL) { |
|
9 | 17x |
checkmate::assert_string(.var) |
10 | 17x |
checkmate::assert_list(variables) |
11 | 17x |
checkmate::assert_subset(names(variables), c("arm", "covariates")) |
12 | ||
13 | 17x |
assert_df_with_variables( |
14 | 17x |
.df_row, |
15 | 17x |
list(rsp = .var) |
16 |
) |
|
17 | 17x |
arm <- variables$arm |
18 | 17x |
covariates <- variables$covariates |
19 | 17x |
if (!is.null(covariates) && length(covariates) > 0) { |
20 | 7x |
var_list <- get_covariates(covariates) |
21 | 7x |
assert_df_with_variables( |
22 | 7x |
.df_row, |
23 | 7x |
var_list |
24 |
) |
|
25 |
} |
|
26 | 17x |
covariates_part <- paste(covariates, collapse = " + ") |
27 | 17x |
formula_str <- paste0(.var, " ~ ", arm) |
28 | 17x |
if (covariates_part != "") { |
29 | 7x |
formula_str <- paste0(formula_str, "+", covariates_part) |
30 |
} |
|
31 | 17x |
formula <- stats::as.formula(formula_str) |
32 | 17x |
specs <- arm |
33 | 17x |
if (!is.null(interaction_item)) { |
34 | ! |
specs <- c(specs, interaction_item) |
35 |
} |
|
36 | 17x |
lm_fit <- stats::lm(formula = formula, data = .df_row) |
37 | 17x |
emmeans::emmeans( |
38 | 17x |
lm_fit, |
39 | 17x |
specs = specs, |
40 | 17x |
data = .df_row, |
41 | 17x |
weights = weights_emmeans |
42 |
) |
|
43 |
} |
|
44 | ||
45 |
#' @title Junco Extended ANCOVA Function |
|
46 |
#' @name s_ancova_j |
|
47 |
#' @inheritParams tern::s_ancova |
|
48 |
#' @param weights_emmeans (`string`)\cr argument from [emmeans::emmeans()], `"counterfactual"` by default. |
|
49 |
#' @description Extension to tern:::s_ancova, 3 extra statistics are returned |
|
50 |
#' * `lsmean_se`: Marginal mean and estimated SE in the group. |
|
51 |
#' * `lsmean_ci`: Marginal mean and associated confidence interval in the group. |
|
52 |
#' * `lsmean_diffci`: Difference in mean and associated confidence level in one combined statistic. |
|
53 |
#' In addition, the LS mean weights can be specified. |
|
54 |
#' In addition, also a NULL .ref_group can be specified, the lsmean_diff related estimates will be returned as NA. |
|
55 |
#' @export |
|
56 |
#' @return returns a named list of 8 statistics (3 extra compared to `tern:::s_ancova()`). |
|
57 |
#' @family Inclusion of ANCOVA Functions |
|
58 |
#' @examples |
|
59 |
#' library(dplyr) |
|
60 |
#' library(tern) |
|
61 |
#' |
|
62 |
#' df <- iris |> filter(Species == "virginica") |
|
63 |
#' .df_row <- iris |
|
64 |
#' .var <- "Petal.Length" |
|
65 |
#' variables <- list(arm = "Species", covariates = "Sepal.Length * Sepal.Width") |
|
66 |
#' .ref_group <- iris |> filter(Species == "setosa") |
|
67 |
#' conf_level <- 0.95 |
|
68 |
#' s_ancova_j(df, .var, .df_row, variables, .ref_group, .in_ref_col = FALSE, conf_level) |
|
69 |
s_ancova_j <- function( |
|
70 |
df, |
|
71 |
.var, |
|
72 |
.df_row, |
|
73 |
variables, |
|
74 |
.ref_group, |
|
75 |
.in_ref_col, |
|
76 |
conf_level, |
|
77 |
interaction_y = FALSE, |
|
78 |
interaction_item = NULL, |
|
79 |
weights_emmeans = "counterfactual") { |
|
80 | 16x |
arm <- variables$arm |
81 | ||
82 | 16x |
.df_row <- subset(.df_row, !is.na(.df_row[[.var]])) |
83 | 16x |
df <- subset(df, !is.na(df[[.var]])) |
84 | 16x |
.ref_group <- subset(.ref_group, !is.na(.ref_group[[.var]])) |
85 | ||
86 | 16x |
n_obs_trt_lvls <- length(unique(.df_row[[arm]])) |
87 | ||
88 |
### sparse data problems with underlying ancova function |
|
89 | 16x |
if (NROW(.df_row) == 0) { |
90 | ! |
ret <- NULL |
91 | 16x |
} else if (NROW(df) == 0 || n_obs_trt_lvls < 2) { |
92 |
## all stats are NA |
|
93 | ! |
ret <- list( |
94 | ! |
n = with_label(0, "n"), |
95 | ! |
lsmean = with_label(NA, "Adjusted Mean"), |
96 | ! |
lsmean_se = with_label(rep(NA, 2), "Adjusted Mean (SE)"), |
97 | ! |
lsmean_ci = with_label( |
98 | ! |
rep(NA, 3), |
99 | ! |
paste0("Adjusted Mean", " (", f_conf_level(conf_level), ")") |
100 |
), |
|
101 | ! |
lsmean_diff = with_label(NA, "Difference in Adjusted Means"), |
102 | ! |
lsmean_diff_ci = with_label( |
103 | ! |
rep(NA, 2), |
104 | ! |
paste("Difference in Adjusted Means", f_conf_level(conf_level)) |
105 |
), |
|
106 | ! |
lsmean_diffci = with_label( |
107 | ! |
rep(NA, 3), |
108 | ! |
paste0( |
109 | ! |
"Difference in Adjusted Means", |
110 |
" (", |
|
111 | ! |
f_conf_level(conf_level), |
112 |
")" |
|
113 |
) |
|
114 |
), |
|
115 | ! |
pval = with_label(NA, "p-value") |
116 |
) |
|
117 |
} else { |
|
118 | 16x |
if (NROW(.ref_group) == 0) { |
119 | ! |
.ref_group <- NULL |
120 | 16x |
} else if (length(levels(.df_row[[arm]])) > n_obs_trt_lvls) { |
121 |
# missing arm levels need to be removed from the factor, in order to have the correct estimates, |
|
122 |
# and avoid errors with further code |
|
123 | ! |
.df_row[[arm]] <- droplevels(.df_row[[arm]]) |
124 | ! |
df[[arm]] <- factor( |
125 | ! |
as.character(df[[arm]]), |
126 | ! |
levels = levels(.df_row[[arm]]) |
127 |
) |
|
128 | ! |
.ref_group[[arm]] <- factor( |
129 | ! |
as.character(.ref_group[[arm]]), |
130 | ! |
levels = levels(.df_row[[arm]]) |
131 |
) |
|
132 |
} |
|
133 | ||
134 | 16x |
emmeans_fit <- h_ancova( |
135 | 16x |
.var = .var, |
136 | 16x |
variables = variables, |
137 | 16x |
.df_row = .df_row, |
138 | 16x |
weights_emmeans = weights_emmeans, |
139 | 16x |
interaction_item = interaction_item |
140 |
) |
|
141 | 16x |
sum_fit <- summary( |
142 | 16x |
emmeans_fit, |
143 | 16x |
level = conf_level |
144 |
) |
|
145 | 16x |
arm <- variables$arm |
146 | 16x |
sum_level <- as.character(unique(df[[arm]])) |
147 | ||
148 |
# Ensure that there is only one element in sum_level. |
|
149 | 16x |
checkmate::assert_scalar(sum_level) |
150 | ||
151 | 16x |
sum_fit_level <- sum_fit[sum_fit[[arm]] == sum_level, ] |
152 | ||
153 |
# Get the index of the ref arm |
|
154 | 16x |
if (isTRUE(interaction_y)) { |
155 | ! |
y <- unlist(df[(df[[interaction_item]] == interaction_y), .var]) |
156 |
# convert characters selected in interaction_y into the numeric order |
|
157 | ! |
interaction_y <- which(sum_fit_level[[interaction_item]] == interaction_y) |
158 | ! |
sum_fit_level <- sum_fit_level[interaction_y, ] |
159 |
# if interaction is called, reset the index |
|
160 | ! |
ref_key <- seq(sum_fit[[arm]][unique(.ref_group[[arm]])]) |
161 | ! |
ref_key <- utils::tail(ref_key, n = 1) |
162 | ! |
ref_key <- (interaction_y - 1) * length(unique(.df_row[[arm]])) + ref_key |
163 |
} else { |
|
164 | 16x |
y <- df[[.var]] |
165 |
# Get the index of the ref arm when interaction is not called |
|
166 | 16x |
ref_key <- seq(sum_fit[[arm]][unique(.ref_group[[arm]])]) |
167 | 16x |
ref_key <- utils::tail(ref_key, n = 1) |
168 |
} |
|
169 | ||
170 | 16x |
if (.in_ref_col) { |
171 | 4x |
ret <- list( |
172 | 4x |
n = with_label(length(y[!is.na(y)]), "n"), |
173 | 4x |
lsmean = with_label(sum_fit_level$emmean, "Adjusted Mean"), |
174 | 4x |
lsmean_se = with_label( |
175 | 4x |
c(sum_fit_level$emmean, sum_fit_level$SE), |
176 | 4x |
"Adjusted Mean (SE)" |
177 |
), |
|
178 | 4x |
lsmean_ci = with_label( |
179 | 4x |
c( |
180 | 4x |
sum_fit_level$emmean, |
181 | 4x |
sum_fit_level$lower.CL, |
182 | 4x |
sum_fit_level$upper.CL |
183 |
), |
|
184 | 4x |
paste0("Adjusted Mean", " (", f_conf_level(conf_level), ")") |
185 |
), |
|
186 | 4x |
lsmean_diff = with_label( |
187 | 4x |
character(), |
188 | 4x |
"Difference in Adjusted Means" |
189 |
), |
|
190 | 4x |
lsmean_diff_ci = with_label( |
191 | 4x |
character(), |
192 | 4x |
paste("Difference in Adjusted Means", f_conf_level(conf_level)) |
193 |
), |
|
194 | 4x |
lsmean_diffci = with_label( |
195 | 4x |
character(), |
196 | 4x |
paste0( |
197 | 4x |
"Difference in Adjusted Means", |
198 |
" (", |
|
199 | 4x |
f_conf_level(conf_level), |
200 |
")" |
|
201 |
) |
|
202 |
), |
|
203 | 4x |
pval = with_label(character(), "p-value") |
204 |
) |
|
205 |
} else { |
|
206 | 12x |
if (!is.null(.ref_group)) { |
207 |
# Estimate the differences between the marginal means. |
|
208 | 12x |
emmeans_contrasts <- emmeans::contrast( |
209 | 12x |
emmeans_fit, |
210 |
# Compare all arms versus the control arm. |
|
211 | 12x |
method = "trt.vs.ctrl", |
212 |
# Take the arm factor from .ref_group as the control arm. |
|
213 | 12x |
ref = ref_key, |
214 | 12x |
level = conf_level |
215 |
) |
|
216 | 12x |
sum_contrasts <- summary( |
217 | 12x |
emmeans_contrasts, |
218 |
# Derive confidence intervals, t-tests and p-values. |
|
219 | 12x |
infer = TRUE, |
220 |
# Do not adjust the p-values for multiplicity. |
|
221 | 12x |
adjust = "none" |
222 |
) |
|
223 | 12x |
contrast_lvls <- gsub( |
224 | 12x |
paste0(" - ", .ref_group[[arm]][1], ".*"), |
225 |
"", |
|
226 | 12x |
sum_contrasts$contrast |
227 |
) |
|
228 | 12x |
if (!is.null(interaction_item)) { |
229 | ! |
sum_contrasts_level <- sum_contrasts[ |
230 | ! |
grepl(sum_level, contrast_lvls, fixed = TRUE), , |
231 | ! |
drop = FALSE |
232 |
] |
|
233 |
} else { |
|
234 | 12x |
sum_contrasts_level <- sum_contrasts[ |
235 | 12x |
(sum_level == contrast_lvls | paste0("(", sum_level, ")") == contrast_lvls), , |
236 | 12x |
drop = FALSE |
237 |
] |
|
238 |
} |
|
239 | 12x |
if (interaction_y != FALSE) { |
240 | ! |
sum_contrasts_level <- sum_contrasts_level[interaction_y, , drop = FALSE] |
241 |
} |
|
242 |
} else { |
|
243 | ! |
sum_contrasts_level <- list() |
244 | ! |
sum_contrasts_level[["estimate"]] <- NA |
245 | ! |
sum_contrasts_level[["lower.CL"]] <- NA |
246 | ! |
sum_contrasts_level[["upper.CL"]] <- NA |
247 | ! |
sum_contrasts_level[["p.value"]] <- NA |
248 |
} |
|
249 | 12x |
ret <- list( |
250 | 12x |
n = with_label(length(y[!is.na(y)]), "n"), |
251 | 12x |
lsmean = with_label(sum_fit_level$emmean, "Adjusted Mean"), |
252 | 12x |
lsmean_se = with_label( |
253 | 12x |
c(sum_fit_level$emmean, sum_fit_level$SE), |
254 | 12x |
"Adjusted Mean (SE)" |
255 |
), |
|
256 | 12x |
lsmean_ci = with_label( |
257 | 12x |
c( |
258 | 12x |
sum_fit_level$emmean, |
259 | 12x |
sum_fit_level$lower.CL, |
260 | 12x |
sum_fit_level$upper.CL |
261 |
), |
|
262 | 12x |
paste0("Adjusted Mean", " (", f_conf_level(conf_level), ")") |
263 |
), |
|
264 | 12x |
lsmean_diff = with_label( |
265 | 12x |
sum_contrasts_level$estimate, |
266 | 12x |
"Difference in Adjusted Means" |
267 |
), |
|
268 | 12x |
lsmean_diff_ci = with_label( |
269 | 12x |
c(sum_contrasts_level$lower.CL, sum_contrasts_level$upper.CL), |
270 | 12x |
paste("Difference in Adjusted Means", f_conf_level(conf_level)) |
271 |
), |
|
272 | 12x |
lsmean_diffci = with_label( |
273 | 12x |
c( |
274 | 12x |
sum_contrasts_level$estimate, |
275 | 12x |
sum_contrasts_level$lower.CL, |
276 | 12x |
sum_contrasts_level$upper.CL |
277 |
), |
|
278 | 12x |
paste0( |
279 | 12x |
"Difference in Adjusted Means", |
280 |
" (", |
|
281 | 12x |
f_conf_level(conf_level), |
282 |
")" |
|
283 |
) |
|
284 |
), |
|
285 | 12x |
pval = with_label(sum_contrasts_level$p.value, "p-value") |
286 |
) |
|
287 |
} |
|
288 |
} |
|
289 | 16x |
return(ret) |
290 |
} |
|
291 | ||
292 |
#' @name s_summarize_ancova_j |
|
293 |
#' @title ANCOVA Summary Function |
|
294 |
#' @description Combination of tern::s_summary, and ANCOVA based estimates for mean and diff between columns, |
|
295 |
#' based on ANCOVA function `s_ancova_j` |
|
296 |
#' @inherit s_ancova_j |
|
297 |
#' @param ... Additional arguments passed to `s_ancova_j`. |
|
298 |
#' @details Combination of tern::s_summary, and ANCOVA based estimates for mean and diff between columns, |
|
299 |
#' based on ANCOVA function `s_ancova_j` |
|
300 |
#' @return returns the statistics from tern::s_summary(x), appended with a new statistics based upon ANCOVA |
|
301 |
#' @export |
|
302 |
#' @family Inclusion of ANCOVA Functions |
|
303 |
# @seealso s_ancova_j |
|
304 |
#' @examples |
|
305 |
#' library(dplyr) |
|
306 |
#' library(tern) |
|
307 |
#' |
|
308 |
#' df <- iris |> filter(Species == "virginica") |
|
309 |
#' .df_row <- iris |
|
310 |
#' .var <- "Petal.Length" |
|
311 |
#' variables <- list(arm = "Species", covariates = "Sepal.Length * Sepal.Width") |
|
312 |
#' .ref_group <- iris |> filter(Species == "setosa") |
|
313 |
#' conf_level <- 0.95 |
|
314 |
#' s_summarize_ancova_j( |
|
315 |
#' df, |
|
316 |
#' .var = .var, |
|
317 |
#' .df_row = .df_row, |
|
318 |
#' variables = variables, |
|
319 |
#' .ref_group = .ref_group, |
|
320 |
#' .in_ref_col = FALSE, |
|
321 |
#' conf_level = conf_level |
|
322 |
#' ) |
|
323 |
s_summarize_ancova_j <- function( |
|
324 |
df, |
|
325 |
.var, |
|
326 |
.df_row, |
|
327 |
.ref_group, |
|
328 |
.in_ref_col, |
|
329 |
...) { |
|
330 | 14x |
x <- df[[.var]] |
331 | 14x |
y1 <- s_summary(x) |
332 | 14x |
y2 <- s_ancova_j( |
333 | 14x |
df = df, |
334 | 14x |
.var = .var, |
335 | 14x |
.ref_group = .ref_group, |
336 | 14x |
.in_ref_col = .in_ref_col, |
337 | 14x |
.df_row = .df_row, |
338 |
... |
|
339 |
) |
|
340 | 14x |
c(y1, y2) |
341 |
} |
|
342 | ||
343 |
#' @describeIn s_summarize_ancova_j Formatted analysis function which is used as `afun`. Note that the |
|
344 |
#' junco specific `ref_path` and `.spl_context` arguments are used for reference column information. |
|
345 |
#' |
|
346 |
#' @param ref_path (`character`)\cr path to the reference group. |
|
347 |
#' @param .spl_context (`environment`)\cr split context environment. |
|
348 |
#' @param ... Additional arguments passed to the statistics function. |
|
349 |
#' @param .stats (`character`)\cr statistics to calculate. |
|
350 |
#' @param .formats (`list`)\cr formats for the statistics. |
|
351 |
#' @param .labels (`list`)\cr labels for the statistics. |
|
352 |
#' @param .indent_mods (`list`)\cr indentation modifications for the statistics. |
|
353 |
#' |
|
354 |
#' @return |
|
355 |
#' * `a_summarize_ancova_j()` returns the corresponding list with formatted [rtables::CellValue()]. |
|
356 |
#' |
|
357 |
#' @examples |
|
358 |
#' |
|
359 |
#' basic_table() |> |
|
360 |
#' split_cols_by("Species") |> |
|
361 |
#' add_colcounts() |> |
|
362 |
#' analyze( |
|
363 |
#' vars = "Petal.Length", |
|
364 |
#' afun = a_summarize_ancova_j, |
|
365 |
#' show_labels = "hidden", |
|
366 |
#' na_str = tern::default_na_str(), |
|
367 |
#' table_names = "unadj", |
|
368 |
#' var_labels = "Unadjusted comparison", |
|
369 |
#' extra_args = list( |
|
370 |
#' variables = list(arm = "Species", covariates = NULL), |
|
371 |
#' conf_level = 0.95, |
|
372 |
#' .labels = c(lsmean = "Mean", lsmean_diff = "Difference in Means"), |
|
373 |
#' ref_path = c("Species", "setosa") |
|
374 |
#' ) |
|
375 |
#' ) |> |
|
376 |
#' analyze( |
|
377 |
#' vars = "Petal.Length", |
|
378 |
#' afun = a_summarize_ancova_j, |
|
379 |
#' show_labels = "hidden", |
|
380 |
#' na_str = tern::default_na_str(), |
|
381 |
#' table_names = "adj", |
|
382 |
#' var_labels = "Adjusted comparison (covariates: Sepal.Length and Sepal.Width)", |
|
383 |
#' extra_args = list( |
|
384 |
#' variables = list( |
|
385 |
#' arm = "Species", |
|
386 |
#' covariates = c("Sepal.Length", "Sepal.Width") |
|
387 |
#' ), |
|
388 |
#' conf_level = 0.95, |
|
389 |
#' ref_path = c("Species", "setosa") |
|
390 |
#' ) |
|
391 |
#' ) |> |
|
392 |
#' build_table(iris) |
|
393 |
#' |
|
394 |
#' @export |
|
395 |
#' @order 2 |
|
396 |
a_summarize_ancova_j <- function( |
|
397 |
df, |
|
398 |
.var, |
|
399 |
.df_row, |
|
400 |
ref_path, |
|
401 |
.spl_context, |
|
402 |
..., |
|
403 |
.stats = NULL, |
|
404 |
.formats = NULL, |
|
405 |
.labels = NULL, |
|
406 |
.indent_mods = NULL) { |
|
407 |
# Check for additional parameters to the statistics function |
|
408 | 6x |
dots_extra_args <- list(...) |
409 | ||
410 |
# Only support default stats, not custom stats |
|
411 | 6x |
.stats <- .split_std_from_custom_stats(.stats)$default_stats |
412 | ||
413 |
# Obtain reference column information |
|
414 | 6x |
ref <- get_ref_info(ref_path, .spl_context) |
415 | ||
416 |
# Apply statistics function |
|
417 | 6x |
x_stats <- .apply_stat_functions( |
418 | 6x |
default_stat_fnc = s_summarize_ancova_j, |
419 | 6x |
custom_stat_fnc_list = NULL, |
420 | 6x |
args_list = c( |
421 | 6x |
df = list(df), |
422 | 6x |
.var = .var, |
423 | 6x |
.df_row = list(.df_row), |
424 | 6x |
.ref_group = list(ref$ref_group), |
425 | 6x |
.in_ref_col = ref$in_ref_col, |
426 | 6x |
dots_extra_args |
427 |
) |
|
428 |
) |
|
429 | ||
430 |
# Format according to specifications |
|
431 | 6x |
format_stats( |
432 | 6x |
x_stats, |
433 | 6x |
method_groups = "summarize_ancova_j", |
434 | 6x |
stats_in = .stats, |
435 | 6x |
formats_in = .formats, |
436 | 6x |
labels_in = .labels, |
437 | 6x |
indents_in = .indent_mods |
438 |
) |
|
439 |
} |
1 |
make_dps_regex <- function(keyword, type = c("[", "{")) { |
|
2 | 894x |
mustart <- match.arg(type) |
3 | 894x |
muend <- ifelse(mustart == "[", "]", "}") |
4 | ||
5 | 894x |
paste0("[~][", mustart, "]", keyword, " ([^", muend, "]+)[", muend, "]") |
6 |
} |
|
7 | ||
8 |
handle_one_markup <- function(strs, keyword, rtfstart, rtfend) { |
|
9 | 447x |
start <- ifelse(nzchar(rtfstart), paste0("\\", rtfstart, " "), "") |
10 | 447x |
end <- ifelse(nzchar(rtfstart), paste0("\\", rtfend, " "), "") |
11 | 447x |
out <- gsub(make_dps_regex(keyword), paste0(start, "\\1", end), strs) |
12 | 447x |
out <- gsub(make_dps_regex(keyword, "{"), paste0(start, "\\1", end), out) |
13 | 447x |
out |
14 |
} |
|
15 | ||
16 | ||
17 |
dps_markup_df <- tibble::tribble( |
|
18 |
~keyword, |
|
19 |
~rtfstart, |
|
20 |
~rtfend, |
|
21 |
"super", |
|
22 |
"\\super", |
|
23 |
"\\nosupersub", |
|
24 |
"sub", |
|
25 |
"\\sub", |
|
26 |
"\\nosupersub", |
|
27 |
"optional", |
|
28 |
"", |
|
29 |
"" |
|
30 |
) |
|
31 | ||
32 | ||
33 |
#' Default String Mapping for Special Characters |
|
34 |
#' |
|
35 |
#' A tibble that maps special characters to their UTF-8 equivalents for use in RTF output. |
|
36 |
#' Currently it maps ">=" and "<=" to the Unicode characters. |
|
37 |
#' |
|
38 |
#' @return A tibble with columns 'pattern' and 'value', where 'pattern' contains |
|
39 |
#' the string to be replaced and 'value' contains the replacement. |
|
40 |
#' |
|
41 |
#' @export |
|
42 |
#' @keywords internal |
|
43 |
default_str_map <- tibble::tribble( |
|
44 |
~pattern, |
|
45 |
~value, |
|
46 |
">=", |
|
47 |
intToUtf8(strtoi(2265, base = 16)), |
|
48 |
"<=", |
|
49 |
intToUtf8(strtoi(2264, base = 16)) |
|
50 |
) |
|
51 | ||
52 |
convert_dps_markup <- function(strs, markup_df = dps_markup_df) { |
|
53 | 149x |
for (i in seq_len(nrow(markup_df))) { |
54 | 447x |
strs <- handle_one_markup(strs, markup_df$keyword[i], markup_df$rtfstart[i], markup_df$rtfend[i]) |
55 |
} |
|
56 | 149x |
strs |
57 |
} |
|
58 | ||
59 |
strmodify <- function(strs, replacement_str = default_str_map) { |
|
60 | 149x |
map_tbl <- replacement_str |
61 | 149x |
if (!is.null(map_tbl) && nrow(map_tbl) > 0) { |
62 | 149x |
for (i in seq_len(nrow(map_tbl))) { |
63 | 298x |
pattern <- map_tbl$pattern[[i]] |
64 | 298x |
value <- map_tbl$value[i] |
65 | 298x |
strs <- gsub(pattern, value, strs, fixed = TRUE) |
66 |
} |
|
67 |
} |
|
68 | 149x |
return(strs) |
69 |
} |
|
70 | ||
71 |
prep_strs_for_rtf <- function(strs, string_map = default_str_map, markup_df = dps_markup_df) { |
|
72 | 149x |
strs <- convert_dps_markup(strs, markup_df) |
73 | 149x |
strs <- strmodify(strs, string_map) |
74 | 149x |
return(strs) |
75 |
} |
|
76 | ||
77 | ||
78 |
#' Relabel Variables in a Dataset |
|
79 |
#' |
|
80 |
#' This function relabels variables in a dataset based on a provided list of labels. |
|
81 |
#' It can either replace existing labels or only add labels to variables without them. |
|
82 |
#' |
|
83 |
#' @param x (`data.frame`)\cr dataset containing variables to be relabeled. |
|
84 |
#' @param lbl_list (`list`)\cr named list of labels to apply to variables. |
|
85 |
#' @param replace_existing (`logical`)\cr if TRUE, existing labels will be replaced; |
|
86 |
#' if FALSE, only variables without labels will be updated. |
|
87 |
#' |
|
88 |
#' @return The dataset with updated variable labels. |
|
89 |
#' |
|
90 |
#' @export |
|
91 |
#' @keywords internal |
|
92 |
var_relabel_list <- function(x, lbl_list, replace_existing = TRUE) { |
|
93 | ! |
if (replace_existing) { |
94 | ! |
vars_to_relabel <- intersect(names(x), names(lbl_list)) |
95 |
} else { |
|
96 | ! |
get_variables_with_empty_labels <- function(x) { |
97 | ! |
labels <- var_labels(x) |
98 | ! |
mask <- lapply(labels, is.na) |> unlist() |
99 | ! |
return(names(labels[mask])) |
100 |
} |
|
101 | ||
102 | ! |
vars_without_labels <- get_variables_with_empty_labels(x) |
103 | ! |
vars_to_relabel <- intersect(vars_without_labels, names(lbl_list)) |
104 |
} |
|
105 | ! |
do.call(var_relabel, c(list(x = x), lbl_list[vars_to_relabel])) |
106 |
} |
1 |
#' Workaround statistics function to add HR with CI |
|
2 |
#' |
|
3 |
#' This is a workaround for [tern::s_coxph_pairwise()], which adds a statistic |
|
4 |
#' containing the hazard ratio estimate together with the confidence interval. |
|
5 |
#' |
|
6 |
#' @inheritParams proposal_argument_convention |
|
7 |
#' |
|
8 |
#' |
|
9 |
#' @name coxph_hr |
|
10 |
#' @return for `s_coxph_hr` a list containing the same statistics returned by [tern::s_coxph_pairwise] |
|
11 |
#' and the additional `lr_stat_df` statistic. for `a_coxph_hr`, a `VerticalRowsSection` |
|
12 |
#' object. |
|
13 |
#' @order 1 |
|
14 |
NULL |
|
15 | ||
16 |
#' @describeIn coxph_hr Statistics function forked from [tern::s_coxph_pairwise()]. |
|
17 |
#' the difference is that: |
|
18 |
#' 1) It returns the additional statistic `lr_stat_df` (log rank statistic with degrees of freedom). |
|
19 |
#' @export |
|
20 |
#' @order 3 |
|
21 |
#' |
|
22 |
#' @importFrom survival Surv coxph survdiff |
|
23 |
#' |
|
24 |
#' @examples |
|
25 |
#' adtte_f <- tern::tern_ex_adtte |> |
|
26 |
#' dplyr::filter(PARAMCD == "OS") |> |
|
27 |
#' dplyr::mutate(is_event = CNSR == 0) |
|
28 |
#' df <- adtte_f |> dplyr::filter(ARMCD == "ARM A") |
|
29 |
#' df_ref <- adtte_f |> dplyr::filter(ARMCD == "ARM B") |
|
30 |
#' |
|
31 |
#' s_coxph_hr( |
|
32 |
#' df = df, |
|
33 |
#' .ref_group = df_ref, |
|
34 |
#' .in_ref_col = FALSE, |
|
35 |
#' .var = "AVAL", |
|
36 |
#' is_event = "is_event", |
|
37 |
#' strata = NULL |
|
38 |
#' ) |
|
39 |
s_coxph_hr <- function( |
|
40 |
df, |
|
41 |
.ref_group, |
|
42 |
.in_ref_col, |
|
43 |
.var, |
|
44 |
is_event, |
|
45 |
strata = NULL, |
|
46 |
control = control_coxph(), |
|
47 |
alternative = c("two.sided", "less", "greater")) { |
|
48 | 9x |
checkmate::assert_string(.var) |
49 | 9x |
checkmate::assert_numeric(df[[.var]]) |
50 | 9x |
checkmate::assert_logical(df[[is_event]]) |
51 | 9x |
assert_df_with_variables(df, list(tte = .var, is_event = is_event)) |
52 | 9x |
alternative <- match.arg(alternative) |
53 | 9x |
pval_method <- control$pval_method |
54 | 9x |
ties <- control$ties |
55 | 9x |
conf_level <- control$conf_level |
56 | ||
57 | 9x |
if (.in_ref_col) { |
58 | 2x |
return(list( |
59 | 2x |
pvalue = with_label(list(), paste0("p-value (", pval_method, ")")), |
60 | 2x |
lr_stat_df = list(), |
61 | 2x |
hr = list(), |
62 | 2x |
hr_ci = with_label(list(), f_conf_level(conf_level)), |
63 | 2x |
hr_ci_3d = with_label(list(), paste0("Hazard Ratio (", f_conf_level(conf_level), ")")), |
64 | 2x |
n_tot = list(), |
65 | 2x |
n_tot_events = list() |
66 |
)) |
|
67 |
} |
|
68 | 7x |
data <- rbind(.ref_group, df) |
69 | 7x |
group <- factor(rep(c("ref", "x"), c(nrow(.ref_group), nrow(df))), levels = c("ref", "x")) |
70 | ||
71 | 7x |
df_cox <- data.frame(tte = data[[.var]], is_event = data[[is_event]], arm = group) |
72 | 7x |
if (is.null(strata)) { |
73 | 3x |
formula_cox <- survival::Surv(tte, is_event) ~ arm |
74 |
} else { |
|
75 | 4x |
formula_cox <- stats::as.formula(paste0( |
76 | 4x |
"survival::Surv(tte, is_event) ~ arm + strata(", |
77 | 4x |
paste(strata, collapse = ","), |
78 |
")" |
|
79 |
)) |
|
80 | 4x |
df_cox <- cbind(df_cox, data[strata]) |
81 |
} |
|
82 | 7x |
cox_fit <- survival::coxph(formula = formula_cox, data = df_cox, ties = ties) |
83 | 7x |
sum_cox <- summary(cox_fit, conf.int = conf_level, extend = TRUE) |
84 | 7x |
original_survdiff <- survival::survdiff(formula_cox, data = df_cox) |
85 | 7x |
log_rank_stat <- original_survdiff$chisq |
86 | ||
87 |
# See survival::survdiff for the d.f. calculation. |
|
88 | 7x |
etmp <- if (is.matrix(original_survdiff$exp)) { |
89 | 4x |
apply(original_survdiff$exp, 1, sum) |
90 |
} else { |
|
91 | 3x |
original_survdiff$exp |
92 |
} |
|
93 | 7x |
log_rank_df <- (sum(1 * (etmp > 0))) - 1 |
94 |
# Check the consistency of the d.f. with the p-value returned by survival::survdiff. |
|
95 | 7x |
log_rank_pvalue <- stats::pchisq(log_rank_stat, log_rank_df, lower.tail = FALSE) |
96 | 7x |
checkmate::assert_true(all.equal(log_rank_pvalue, original_survdiff$pvalue)) |
97 | ||
98 | 7x |
pval <- switch(pval_method, |
99 | 7x |
wald = sum_cox$waldtest["pvalue"], |
100 | 7x |
`log-rank` = log_rank_pvalue, |
101 | 7x |
likelihood = sum_cox$logtest["pvalue"] |
102 |
) |
|
103 | ||
104 |
# Handle one-sided alternatives. |
|
105 | 7x |
if (alternative != "two.sided") { |
106 | 2x |
right_direction <- if (alternative == "less") { |
107 | 1x |
sum_cox$conf.int[1, 1] < 1 |
108 |
} else { |
|
109 | 1x |
sum_cox$conf.int[1, 1] >= 1 |
110 |
} |
|
111 | 2x |
pval <- if (right_direction) { |
112 | 1x |
pval / 2 |
113 |
} else { |
|
114 | 1x |
1 - pval / 2 |
115 |
} |
|
116 |
} |
|
117 | ||
118 | 7x |
list( |
119 | 7x |
pvalue = with_label(unname(pval), paste0("p-value (", pval_method, ")")), |
120 | 7x |
lr_stat_df = unname(c(log_rank_stat, log_rank_df)), |
121 | 7x |
hr = sum_cox$conf.int[1, 1], |
122 | 7x |
hr_ci = with_label(unname(sum_cox$conf.int[1, 3:4]), f_conf_level(conf_level)), |
123 | 7x |
hr_ci_3d = with_label( |
124 | 7x |
c(sum_cox$conf.int[1, 1], unname(sum_cox$conf.int[1, 3:4])), |
125 | 7x |
paste0("Hazard Ratio (", f_conf_level(conf_level), ")") |
126 |
), |
|
127 | 7x |
n_tot = sum_cox$n, |
128 | 7x |
n_tot_events = sum_cox$nevent |
129 |
) |
|
130 |
} |
|
131 | ||
132 |
#' @describeIn coxph_hr Formatted analysis function which is used as `afun`. |
|
133 |
#' |
|
134 |
#' @examples |
|
135 |
#' library(dplyr) |
|
136 |
#' |
|
137 |
#' adtte_f <- tern::tern_ex_adtte |> |
|
138 |
#' filter(PARAMCD == "OS") |> |
|
139 |
#' mutate(is_event = CNSR == 0) |
|
140 |
#' |
|
141 |
#' df <- adtte_f |> filter(ARMCD == "ARM A") |
|
142 |
#' df_ref_group <- adtte_f |> filter(ARMCD == "ARM B") |
|
143 |
#' |
|
144 |
#' basic_table() |> |
|
145 |
#' split_cols_by(var = "ARMCD", ref_group = "ARM A") |> |
|
146 |
#' add_colcounts() |> |
|
147 |
#' analyze("AVAL", |
|
148 |
#' afun = s_coxph_hr, |
|
149 |
#' extra_args = list(is_event = "is_event"), |
|
150 |
#' var_labels = "Unstratified Analysis", |
|
151 |
#' show_labels = "visible" |
|
152 |
#' ) |> |
|
153 |
#' build_table(df = adtte_f) |
|
154 |
#' |
|
155 |
#' basic_table() |> |
|
156 |
#' split_cols_by(var = "ARMCD", ref_group = "ARM A") |> |
|
157 |
#' add_colcounts() |> |
|
158 |
#' analyze("AVAL", |
|
159 |
#' afun = s_coxph_hr, |
|
160 |
#' extra_args = list( |
|
161 |
#' is_event = "is_event", |
|
162 |
#' strata = "SEX", |
|
163 |
#' control = tern::control_coxph(pval_method = "wald") |
|
164 |
#' ), |
|
165 |
#' var_labels = "Unstratified Analysis", |
|
166 |
#' show_labels = "visible" |
|
167 |
#' ) |> |
|
168 |
#' build_table(df = adtte_f) |
|
169 |
#' @export |
|
170 |
#' @order 2 |
|
171 |
a_coxph_hr <- function( |
|
172 |
df, |
|
173 |
.var, |
|
174 |
ref_path, |
|
175 |
.spl_context, |
|
176 |
..., |
|
177 |
.stats = NULL, |
|
178 |
.formats = NULL, |
|
179 |
.labels = NULL, |
|
180 |
.indent_mods = NULL) { |
|
181 |
# Check for additional parameters to the statistics function |
|
182 | 6x |
dots_extra_args <- list(...) |
183 | ||
184 |
# Only support default stats, not custom stats |
|
185 | 6x |
.stats <- .split_std_from_custom_stats(.stats)$default_stats |
186 | ||
187 |
# Obtain reference column information |
|
188 | 6x |
ref <- get_ref_info(ref_path, .spl_context) |
189 | ||
190 |
# Apply statistics function |
|
191 | 6x |
x_stats <- .apply_stat_functions( |
192 | 6x |
default_stat_fnc = s_coxph_hr, |
193 | 6x |
custom_stat_fnc_list = NULL, |
194 | 6x |
args_list = c( |
195 | 6x |
df = list(df), |
196 | 6x |
.var = .var, |
197 | 6x |
.ref_group = list(ref$ref_group), |
198 | 6x |
.in_ref_col = ref$in_ref_col, |
199 | 6x |
dots_extra_args |
200 |
) |
|
201 |
) |
|
202 | ||
203 |
# Format according to specifications |
|
204 | 6x |
format_stats( |
205 | 6x |
x_stats, |
206 | 6x |
method_groups = "coxph_hr", |
207 | 6x |
stats_in = .stats, |
208 | 6x |
formats_in = .formats, |
209 | 6x |
labels_in = .labels, |
210 | 6x |
indents_in = .indent_mods |
211 |
) |
|
212 |
} |
1 |
## contribute to formatters |
|
2 |
#' @name inches_to_spaces |
|
3 |
#' @title Conversion of inches to spaces |
|
4 |
#' @param ins numeric. Vector of widths in inches |
|
5 |
#' @param fontspec font_spec. The font specification to use |
|
6 |
#' @param raw logical(1). Should the answer be returned unrounded |
|
7 |
#' (`TRUE`), or rounded to the nearest reasonable value (`FALSE`, |
|
8 |
#' the default) |
|
9 |
#' @param tol numeric(1). The numeric tolerance, values |
|
10 |
#' between an integer `n`, and `n+tol` will be returned |
|
11 |
#' as `n`, rather than `n+1`, if `raw == FALSE`. Ignored |
|
12 |
#' when `raw` is `TRUE`. |
|
13 |
#' @return the number of either fractional (`raw = TRUE`) or whole (`raw = FALSE`) |
|
14 |
#' spaces that will fit within `ins` inches in the specified font |
|
15 |
#' @export |
|
16 |
inches_to_spaces <- function(ins, fontspec, raw = FALSE, tol = sqrt(.Machine$double.eps)) { |
|
17 | 48x |
newdev <- open_font_dev(fontspec) |
18 | 48x |
if (newdev) { |
19 | ! |
on.exit(close_font_dev()) |
20 |
} |
|
21 | 48x |
spcw <- grid::convertWidth(grid::unit(1, "strwidth", " "), "inches", valueOnly = TRUE) |
22 | 48x |
ans <- ins / spcw |
23 | 48x |
if (!raw) { |
24 | 48x |
win_tol <- ans - floor(ans) < tol |
25 | 48x |
ans[win_tol] <- floor(ans) |
26 | 48x |
ans[!win_tol] <- ceiling(ans) |
27 |
} |
|
28 | 48x |
ans |
29 |
} |
|
30 | ||
31 |
#' @importFrom formatters wrap_string_ttype |
|
32 |
ttype_wrap_vec <- function(vec, fontspec, width, wordbreak_ok = TRUE, ...) { |
|
33 | ! |
lapply( |
34 | ! |
vec, |
35 | ! |
wrap_string_ttype, |
36 | ! |
width_spc = width, |
37 | ! |
fontspec = fontspec, |
38 | ! |
wordbreak_ok = wordbreak_ok, |
39 |
... |
|
40 |
) |
|
41 |
} |
|
42 | ||
43 |
#' @name check_wrap_nobreak |
|
44 |
#' @title Check Word Wrapping |
|
45 |
#' @description Check a set of column widths for word-breaking wrap behavior |
|
46 |
#' @param tt TableTree |
|
47 |
#' @param colwidths numeric. Column widths (in numbers of spaces under `fontspec`) |
|
48 |
#' @param fontspec font_spec. |
|
49 |
#' |
|
50 |
#' @return `TRUE` if the wrap is able to be done without breaking words, |
|
51 |
#' `FALSE` if wordbreaking is required to apply `colwidths` |
|
52 |
#' @rdname check_wrap_nobreak |
|
53 |
#' @export |
|
54 |
check_wrap_nobreak <- function(tt, colwidths, fontspec) { |
|
55 | ! |
newdev <- open_font_dev(fontspec) |
56 | ! |
if (newdev) { |
57 | ! |
on.exit(close_font_dev()) |
58 |
} |
|
59 | ! |
mpf <- matrix_form(tt, fontspec = fontspec) |
60 | ! |
strs <- mf_strings(mpf) |
61 | ! |
colok <- vapply( |
62 | ! |
seq_len(ncol(strs)), |
63 | ! |
function(i) { |
64 | ! |
res <- tryCatch( |
65 | ! |
ttype_wrap_vec(strs[, i, drop = TRUE], colwidths[i], fontspec = fontspec, wordbreak_ok = FALSE), |
66 | ! |
error = function(e) e |
67 |
) |
|
68 | ! |
!methods::is(res, "error") |
69 |
}, |
|
70 | ! |
TRUE |
71 |
) |
|
72 | ! |
all(colok) |
73 |
} |
|
74 | ||
75 |
#' Colwidths for all columns to be forced on one page |
|
76 |
#' |
|
77 |
#' @param tt TableTree object to calculate column widths for |
|
78 |
#' @param fontspec Font specification object |
|
79 |
#' @param col_gap Column gap in spaces |
|
80 |
#' @param rowlabel_width Width of row labels in spaces |
|
81 |
#' @param print_width_ins Print width in inches |
|
82 |
#' @param landscape Whether the output is in landscape orientation |
|
83 |
#' @param lastcol_gap Whether to include a gap after the last column |
|
84 |
#' @keywords internal |
|
85 |
smart_colwidths_1page <- function( |
|
86 |
tt, |
|
87 |
fontspec, |
|
88 |
col_gap = 6L, |
|
89 |
rowlabel_width = inches_to_spaces(2, fontspec), |
|
90 |
print_width_ins = ifelse(landscape, 11, 8.5) - 2.12, |
|
91 |
landscape = FALSE, |
|
92 |
lastcol_gap = TRUE) { |
|
93 | ! |
total_cpp <- floor(inches_to_spaces(print_width_ins, fontspec = fontspec, raw = TRUE)) |
94 | ||
95 | ! |
nc <- ncol(tt) |
96 | ! |
remain <- total_cpp - rowlabel_width - col_gap * (nc - !lastcol_gap) |
97 | ||
98 | ! |
c(rowlabel_width - col_gap, spread_integer(remain, nc)) |
99 |
} |
|
100 | ||
101 |
spaces_to_inches <- function(spcs, fontspec) { |
|
102 | ! |
nchar_ttype(" ", fontspec, raw = TRUE) * spcs |
103 |
} |
|
104 | ||
105 |
no_cellwrap_colwidths <- function(tt, fontspec, col_gap = 4L, label_width_ins = 2) { |
|
106 | 6x |
if (is.null(col_gap)) { |
107 | ! |
col_gap <- 4L |
108 |
} |
|
109 | 6x |
mpf <- matrix_form(tt, TRUE, FALSE, fontspec = fontspec, col_gap = col_gap) |
110 | 6x |
strmat <- mf_strings(mpf) |
111 | 6x |
label_width_max <- inches_to_spaces(label_width_ins, fontspec) |
112 | ||
113 | 6x |
nchar_mat <- nchar_ttype(strmat[-seq_len(mf_nlheader(mpf)), , drop = FALSE], |
114 | 6x |
fontspec = fontspec |
115 |
) |
|
116 | ||
117 | 6x |
label_width <- min( |
118 | 6x |
label_width_max, |
119 | 6x |
max(nchar_mat[, 1, drop = TRUE]) |
120 |
) |
|
121 | 6x |
col_maxes <- apply(nchar_mat[, -1, drop = FALSE], 2, max) |
122 | 6x |
c(label_width, col_maxes + col_gap) |
123 |
} |
|
124 | ||
125 |
pack_into_lines2 <- function(strs, wrdwidths = nchar_ttype(strs), colwidth, fontspec) { |
|
126 | 3980x |
if (length(wrdwidths) == 0 || (length(wrdwidths) == 1 && wrdwidths <= colwidth)) { |
127 | 1265x |
return(1) |
128 |
} |
|
129 | 2715x |
newdev <- open_font_dev(fontspec) |
130 | 2715x |
if (newdev) { |
131 | ! |
on.exit(close_font_dev()) |
132 |
} |
|
133 | ||
134 | 2715x |
csums <- cumsum(wrdwidths + c(0, rep(1, length(wrdwidths) - 1))) |
135 | 2715x |
oneline_wdth <- sum(wrdwidths) + length(wrdwidths) - 1 |
136 | 2715x |
if (colwidth >= ceiling(oneline_wdth)) { |
137 | 1842x |
return(1) |
138 |
} |
|
139 | 873x |
lines <- 0 |
140 | 873x |
wrdsused <- 0 |
141 | 873x |
widthused <- 0 |
142 | 873x |
index_seq <- seq_along(wrdwidths) |
143 | 873x |
totwrds <- length(wrdwidths) |
144 | 873x |
adj <- 0 |
145 |
## finite precision arithmetic is a dreamscape of infinite wonder |
|
146 | 873x |
tol <- sqrt(.Machine$double.eps) |
147 | 873x |
while (wrdsused < length(wrdwidths)) { |
148 | 2080x |
csums_i <- csums - widthused |
149 | ||
150 | 2080x |
fit <- which(index_seq > wrdsused & csums_i <= colwidth + tol) |
151 | 2080x |
widthused <- sum(widthused, wrdwidths[fit], length(fit)) |
152 | 2080x |
wrdsused <- wrdsused + length(fit) |
153 | 2080x |
lines <- lines + 1 |
154 |
} |
|
155 | 873x |
lines |
156 |
} |
|
157 | ||
158 |
recursive_add_poss <- function( |
|
159 |
wlst, |
|
160 |
cur_lst, |
|
161 |
ubnd_width, |
|
162 |
lbnd_width, |
|
163 |
ubnd_lines = calc_total_lns(wlst, fontspec = fontspec, colwidth = ubnd_width)$lines, |
|
164 |
lbnd_lines = calc_total_lns(wlst, fontspec = fontspec, colwidth = lbnd_width)$lines, |
|
165 |
fontspec) { |
|
166 | 180x |
if (ubnd_width <= lbnd_width + 1 + sqrt(.Machine$double.eps)) { |
167 | 39x |
return(cur_lst) |
168 |
} |
|
169 | 141x |
curw <- floor((ubnd_width + lbnd_width) / 2) |
170 | 141x |
curlnsdf <- calc_total_lns(wlst, fontspec = fontspec, colwidth = curw) |
171 | 141x |
to_add <- list(curlnsdf) |
172 | ||
173 | 141x |
if (curlnsdf$lines != lbnd_lines) { |
174 | 80x |
to_add <- c( |
175 | 80x |
to_add, |
176 | 80x |
recursive_add_poss( |
177 | 80x |
wlst = wlst, |
178 | 80x |
fontspec = fontspec, |
179 | 80x |
ubnd_width = curw, |
180 | 80x |
ubnd_lines = curlnsdf$lines, |
181 | 80x |
lbnd_width = lbnd_width, |
182 | 80x |
lbnd_lines = lbnd_lines, |
183 | 80x |
cur_lst = list() |
184 |
) |
|
185 |
) |
|
186 |
} |
|
187 | 141x |
if (curlnsdf$lines != ubnd_lines) { |
188 | 80x |
to_add <- c( |
189 | 80x |
to_add, |
190 | 80x |
recursive_add_poss( |
191 | 80x |
wlst = wlst, |
192 | 80x |
fontspec = fontspec, |
193 | 80x |
ubnd_width = ubnd_width, |
194 | 80x |
ubnd_lines = ubnd_lines, |
195 | 80x |
lbnd_width = curw, |
196 | 80x |
lbnd_lines = curlnsdf$lines, |
197 | 80x |
cur_lst = list() |
198 |
) |
|
199 |
) |
|
200 |
} |
|
201 | ||
202 | 141x |
c(cur_lst, to_add) |
203 |
} |
|
204 | ||
205 |
calc_total_lns <- function(wlst, colwidth, fontspec, lns_per_pg = 50) { |
|
206 | 221x |
lns <- vapply( |
207 | 221x |
seq_along(wlst), |
208 | 221x |
function(i) { |
209 | 3980x |
pack_into_lines2(wrdwidths = wlst[[i]], colwidth = colwidth, fontspec = fontspec) |
210 |
}, |
|
211 | 221x |
1 |
212 |
) |
|
213 | 221x |
nonblnks <- !vapply(wlst, function(x) length(x) == 0, TRUE) |
214 | 221x |
celllns <- lns[-1] |
215 | 221x |
cllsum <- sum(celllns) |
216 | 221x |
hdr <- lns[1] |
217 | 221x |
data.frame( |
218 | 221x |
colwidth = colwidth, |
219 | 221x |
lines = cllsum + ceiling(cllsum / lns_per_pg) * hdr, |
220 | 221x |
cell_lines = cllsum, |
221 | 221x |
lbl_lines = hdr, |
222 | 221x |
min_cell_lines = min(1, celllns[nonblnks], na.rm = TRUE), |
223 | 221x |
max_cell_lines = max(celllns) |
224 |
) |
|
225 |
} |
|
226 | ||
227 | ||
228 |
calc_poss_lines <- function(wlst, lbound, avail_spc, fontspec) { |
|
229 | 20x |
minposs <- calc_total_lns(wlst, lbound + avail_spc, fontspec) |
230 | 20x |
maxposs <- calc_total_lns(wlst, lbound, fontspec) |
231 | ||
232 | 20x |
retlst <- list(minposs, maxposs) |
233 | ||
234 | 20x |
retlst <- recursive_add_poss( |
235 | 20x |
wlst = wlst, |
236 | 20x |
ubnd_width = lbound + avail_spc, |
237 | 20x |
lbnd_width = lbound, |
238 | 20x |
cur_lst = retlst, |
239 | 20x |
fontspec = fontspec |
240 |
) |
|
241 | 20x |
do.call(rbind, retlst) |
242 |
} |
|
243 | ||
244 | ||
245 |
make_poss_wdf <- function( |
|
246 |
mpf, |
|
247 |
incl_header = FALSE, |
|
248 |
col_gap = 3, |
|
249 |
pg_width_ins = 8.88, |
|
250 |
fontspec = font_spec("Times", 9, 1.2)) { |
|
251 | 2x |
newdev <- open_font_dev(fontspec) |
252 | 2x |
if (newdev) { |
253 | ! |
on.exit(close_font_dev()) |
254 |
} |
|
255 | 2x |
if (!methods::is(mpf, "MatrixPrintForm")) { |
256 | 2x |
mpf <- matrix_form(mpf, fontspec = fontspec, col_gap = col_gap) |
257 |
} |
|
258 | 2x |
strs <- mf_strings(mpf) |
259 | 2x |
if (!incl_header) { |
260 | ! |
strs <- strs[-seq_len(mf_nlheader(mpf)), ] |
261 |
} |
|
262 | 2x |
nc <- ncol(strs) |
263 | 2x |
nr <- nrow(strs) |
264 |
## strip out markup so we are not over countin (by as much) |
|
265 | 2x |
strs <- matrix(gsub("~[{[][[:alpha:]]+ ([^]}]+)[]}]", "\\1", strs), ncol = nc, nrow = nr) |
266 | ||
267 | 2x |
possrows <- lapply(seq_len(ncol(strs)), function(ii) { |
268 | 20x |
res <- one_col_strs(strs[, ii, drop = TRUE], fontspec = fontspec, col_gap = col_gap) |
269 | 20x |
res$col_num <- ii |
270 | 20x |
res |
271 |
}) |
|
272 | ||
273 | 2x |
possdf <- do.call(rbind, possrows) |
274 | 2x |
o <- order(possdf$col_num, possdf$colwidth) |
275 | 2x |
possdf <- possdf[o, ] |
276 | 2x |
possdf |
277 |
} |
|
278 | ||
279 |
#' |
|
280 |
#' @param mpf (`listing_df` or `MatrixPrintForm` derived thereof)\cr The listing |
|
281 |
#' calculate column widths for. |
|
282 |
#' @param incl_header (`logical(1)`)\cr Should the constraint to not break up |
|
283 |
#' individual words be extended to words in the column labels? Defaults to `TRUE` |
|
284 |
#' @param col_gap (`numeric(1)`)\cr Amount of extra space (in spaces) to |
|
285 |
#' assume between columns. Defaults to `0.5` |
|
286 |
#' @param pg_width_ins (`numeric(1)`)\cr Number of inches in width for |
|
287 |
#' *the portion of the page the listing will be printed to*. Defaults to `8.88` |
|
288 |
#' which corresponds to landscape orientation on a standard page after margins. |
|
289 |
#' @param fontspec (`font_spec`)\cr Defaults to Times New Roman 8pt font with 1.2 line |
|
290 |
#' height. |
|
291 |
#' @param verbose (`logical(1)`)\cr Should additional information messages be |
|
292 |
#' displayed during the calculation of the column widths? Defaults to `FALSE`. |
|
293 |
#' @returns A vector of column widths suitable to use in `tt_to_tlgrtf` and |
|
294 |
#' other exporters. |
|
295 |
#' @rdname def_colwidths |
|
296 |
#' @export |
|
297 |
listing_column_widths <- function( |
|
298 |
mpf, |
|
299 |
incl_header = TRUE, |
|
300 |
col_gap = 0.5, |
|
301 |
pg_width_ins = 8.88, |
|
302 |
fontspec = font_spec("Times", 8, 1.2), |
|
303 |
verbose = FALSE) { |
|
304 | 2x |
newdev <- open_font_dev(fontspec) |
305 | 2x |
if (newdev) { |
306 | ! |
on.exit(close_font_dev()) |
307 |
} |
|
308 | 2x |
possdf <- make_poss_wdf( |
309 | 2x |
mpf = mpf, |
310 | 2x |
incl_header = incl_header, |
311 | 2x |
col_gap = col_gap, |
312 | 2x |
fontspec = fontspec, |
313 | 2x |
pg_width_ins = pg_width_ins |
314 |
) |
|
315 | 2x |
optimal <- optimal_widths( |
316 | 2x |
possdf = possdf, |
317 | 2x |
tot_spaces = inches_to_spaces(pg_width_ins, fontspec = fontspec), |
318 | 2x |
verbose = verbose |
319 |
) |
|
320 | 2x |
optimal$colwidth |
321 |
} |
|
322 | ||
323 |
find_free_colspc <- function(curposs, fullposs, thresh = 0.99, skip = integer(), verbose = FALSE) { |
|
324 | ! |
orig_curposs <- curposs |
325 | ! |
maxlns_ind <- which.max(curposs$cell_lines) |
326 | ! |
longestcol <- curposs$col_num[maxlns_ind] |
327 | ! |
maxlns <- curposs$cell_lines[maxlns_ind] |
328 | ! |
adjrow_inds <- setdiff(which(curposs$cell_lines < thresh * maxlns), skip) |
329 | ! |
for (arowi in adjrow_inds) { |
330 | ! |
col <- curposs$col_num[arowi] |
331 | ! |
colwidthii <- curposs$colwidth[arowi] |
332 | ! |
fp_ind <- with(fullposs, min(which(col_num == col & cell_lines <= maxlns & colwidth < colwidthii))) |
333 | ! |
if (is.finite(fp_ind)) { |
334 | ! |
if (verbose) { |
335 | ! |
oldwdth <- curposs$colwidth[arowi] |
336 | ! |
newwdth <- fullposs$colwidth[fp_ind] |
337 | ! |
msg <- sprintf("adjusting column %d width from %d to %d", col, oldwdth, newwdth) |
338 | ! |
message(msg) |
339 |
} |
|
340 | ! |
curposs[arowi, ] <- fullposs[fp_ind, ] |
341 |
} |
|
342 |
} |
|
343 | ! |
curposs |
344 |
} |
|
345 | ||
346 |
constrict_lbl_lns <- function(curdf, possdf, avail_spc, verbose = TRUE) { |
|
347 | 2x |
old_lbl_lns <- max(curdf$lbl_lines) |
348 | 2x |
cols_to_pack <- which(curdf$lbl_lns == old_lbl_lns) |
349 | 2x |
olddf <- curdf |
350 | 2x |
success <- TRUE |
351 | 2x |
for (ii in cols_to_pack) { |
352 | ! |
colii <- curdf$col_num[ii] |
353 | ! |
cwidthii <- curdf$colwidth[ii] |
354 | ! |
possdfii <- possdf[possdf$col_num == colii & possdf$lbl_lines < old_lbl_lns, ] |
355 | ! |
if (nrow(possdfii) == 0) { |
356 | ! |
success <- FALSE |
357 | ! |
break |
358 |
} |
|
359 | ! |
newrow <- possdfii[ii, ] |
360 | ! |
if (newrow$colwidth - cwidthii > avail_spc) { |
361 | ! |
success <- FALSE |
362 | ! |
break |
363 |
} |
|
364 |
## assuming sorted |
|
365 | ! |
curdf[ii, ] <- newrow |
366 | ! |
avail_spc <- avail_spc + cwidthii - newrow$colwidth |
367 |
} |
|
368 | ||
369 | 2x |
if (verbose) { |
370 | ! |
if (success) { |
371 | ! |
msg <- paste( |
372 | ! |
"overall number of label rows successfully reduced. Cols affected: ", |
373 | ! |
paste(curdf$col_num[cols_to_pack], collapse = ", ") |
374 |
) |
|
375 |
} else { |
|
376 | ! |
msg <- paste("Unable to reduce label rows required.") |
377 |
} |
|
378 |
} |
|
379 | ||
380 | 2x |
if (!success) { |
381 | ! |
curdf <- olddf |
382 |
} |
|
383 | 2x |
curdf |
384 |
} |
|
385 | ||
386 |
optimal_widths <- function(possdf, tot_spaces = 320, max_lbl_lines = 3, verbose = FALSE) { |
|
387 | 2x |
odf <- order(possdf$col_num, possdf$colwidth) |
388 | 2x |
possdf <- possdf[odf, ] |
389 | 2x |
badlbl <- which(possdf$lbl_lines > max_lbl_lines) |
390 | 2x |
if (length(badlbl > 0)) { |
391 | 2x |
if (verbose) { |
392 | ! |
message("Excluding ", length(badlbl), " column widths for labels taking over ", max_lbl_lines, " lines.") |
393 |
} |
|
394 | 2x |
possdf <- possdf[-badlbl, ] |
395 | 2x |
possdf <- possdf[possdf$lbl_lines <= max_lbl_lines, , drop = FALSE] |
396 |
} |
|
397 | 2x |
full_possdf <- possdf |
398 |
## already ordered by colnum then width so this the first of each colwidth is the min width for that col |
|
399 | 2x |
dups <- duplicated(possdf$col_num) |
400 | 2x |
curdf <- possdf[!dups, ] |
401 | 2x |
possdf <- possdf[dups, ] ## without rows for ones in curdf |
402 | 2x |
spcleft <- tot_spaces - sum(curdf$colwidth) |
403 | 2x |
if (verbose) { |
404 | ! |
message( |
405 | ! |
"Optimizng Column Widths\n", |
406 | ! |
"Initial lines required: ", |
407 | ! |
max(curdf$lines), |
408 | ! |
"\n", |
409 | ! |
"Available adjustment: ", |
410 | ! |
spcleft, |
411 | ! |
" spaces\n" |
412 |
) |
|
413 |
} |
|
414 | 2x |
done <- FALSE |
415 | 2x |
while (!done) { |
416 | 2x |
oldwdths <- curdf$colwidth |
417 | 2x |
curdf <- constrict_lbl_lns(curdf, possdf, verbose = verbose) |
418 | 2x |
if (all.equal(curdf$colwidth, oldwdths)) { |
419 | 2x |
done <- TRUE |
420 |
} |
|
421 |
} |
|
422 | 2x |
change <- TRUE |
423 | 2x |
while (spcleft > 0 && change && nrow(possdf) > 0) { |
424 | 8x |
change <- FALSE |
425 | 8x |
ii <- which.max(curdf$cell_lines) |
426 | 8x |
spcleft <- tot_spaces - sum(curdf$colwidth) |
427 | 8x |
colii <- curdf$col_num[ii] |
428 | 8x |
bef_lns <- curdf$cell_lines[ii] |
429 | 8x |
bef_width <- curdf$colwidth[ii] |
430 | 8x |
nextlns <- max(curdf$cell_lines[-ii]) |
431 | 8x |
cand_row_cond <- possdf$col_num == colii & possdf$cell_lines < bef_lns & possdf$colwidth - bef_width <= spcleft |
432 | 8x |
canddf <- possdf[cand_row_cond, , drop = FALSE] |
433 | 8x |
if (nrow(canddf) > 0) { |
434 | 6x |
more_than_next <- canddf$cell_lines >= nextlns |
435 | 6x |
if (any(more_than_next)) { |
436 | 3x |
candrow <- canddf[max(which(more_than_next)), ] |
437 |
} else { |
|
438 | 3x |
candrow <- canddf[nrow(canddf), ] |
439 |
} |
|
440 | ||
441 | 6x |
if (verbose) { |
442 | ! |
message( |
443 | ! |
"COL ", |
444 | ! |
colii, |
445 | ! |
" width: ", |
446 | ! |
bef_width, |
447 |
"->", |
|
448 | ! |
candrow$colwidth, |
449 | ! |
" lines req: ", |
450 | ! |
bef_lns, |
451 |
"->", |
|
452 | ! |
candrow$cell_lines |
453 |
) |
|
454 |
} |
|
455 | 6x |
change <- TRUE |
456 | 6x |
curdf[ii, ] <- candrow |
457 |
} |
|
458 |
} |
|
459 | 2x |
curdf |
460 |
} |
|
461 | ||
462 | ||
463 |
one_col_strs <- function(strcol, col_gap = 2, fontspec) { |
|
464 | 20x |
strspls <- strsplit(strcol, split = "(-| (?=[^/]))", perl = TRUE) |
465 | 20x |
strspl_widths <- lapply(strspls, nchar_ttype, fontspec = fontspec, raw = TRUE) |
466 | 20x |
lbound_raw <- max(unlist(strspl_widths)) |
467 | 20x |
lbound <- ceiling(lbound_raw + 2 * col_gap) |
468 | 20x |
ret <- calc_poss_lines(strspl_widths, lbound, 50, fontspec = fontspec) |
469 | 20x |
ret |
470 |
} |
|
471 | ||
472 |
## we have permission from the formatters maintainer to use this |
|
473 |
## unexported function |
|
474 |
j_mf_col_widths <- utils::getFromNamespace("mf_col_widths", "formatters") |
|
475 | ||
476 |
#' @name def_colwidths |
|
477 |
#' |
|
478 |
#' @title Define Column Widths |
|
479 |
#' |
|
480 |
#' @description |
|
481 |
#' `def_colwidths` uses heuristics to determine suitable column widths given a |
|
482 |
#' table or listing, and a font. |
|
483 |
#' |
|
484 |
#' @param tt input Tabletree |
|
485 |
#' @param fontspec Font specification |
|
486 |
#' @param label_width_ins Label Width in Inches. |
|
487 |
#' @param col_gap Column gap in spaces. Defaults to `.5` for listings and `3` |
|
488 |
#' for tables. |
|
489 |
#' @param type Type of the table tree, used to determine column width calculation method. |
|
490 |
#' |
|
491 |
#' @details Listings are assumed to be rendered landscape on standard A1 paper, |
|
492 |
#' such that all columns are rendered on one page. Tables are allowed to |
|
493 |
#' be horizontally paginated, and column widths are determined based only on |
|
494 |
#' required word wrapping. See the `Automatic Column Widths` vignette for |
|
495 |
#' a detailed discussion of the algorithms used. |
|
496 |
#' @return a vector of column widths (including the label row pseudo-column in the table |
|
497 |
#' case) suitable for use rendering `tt` in the specified font. |
|
498 |
#' @export |
|
499 |
#' |
|
500 |
def_colwidths <- function(tt, |
|
501 |
fontspec, |
|
502 |
label_width_ins = 2, |
|
503 |
col_gap = ifelse(type == "Listing", .5, 3), |
|
504 |
type = tlg_type(tt)) { |
|
505 | 8x |
if (type == "Figure") { |
506 | ! |
ret <- NULL |
507 | 8x |
} else if (type == "Table") { |
508 |
if ( |
|
509 | 6x |
is.list(tt) && |
510 | 6x |
!methods::is(tt, "MatrixPrintForm") && |
511 | 6x |
!is.null(j_mf_col_widths(tt[[1]])) |
512 |
) { |
|
513 | ! |
ret <- j_mf_col_widths(tt[[1]]) |
514 |
} else { |
|
515 | 6x |
ret <- no_cellwrap_colwidths(tt, fontspec, col_gap = col_gap, label_width_ins = label_width_ins) |
516 |
} |
|
517 |
} else { |
|
518 | 2x |
ret <- listing_column_widths(tt, fontspec = fontspec, col_gap = col_gap) |
519 |
} |
|
520 | 8x |
ret |
521 |
} |
1 |
#' Survival time analysis |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge('stable')` |
|
4 |
#' |
|
5 |
#' The analyze function [kaplan_meier()] creates a layout element to analyze |
|
6 |
#' survival time by calculating survival time median, 2 quantiles, each with |
|
7 |
#' their confidence intervals, and range (for all, censored, or event patients). |
|
8 |
#' The primary analysis variable `vars` is the time variable and the secondary |
|
9 |
#' analysis variable `is_event` indicates whether or not an event has occurred. |
|
10 |
#' |
|
11 |
#' @inheritParams proposal_argument_convention |
|
12 |
#' @param control (`list`)\cr parameters for comparison details, specified by using the helper function |
|
13 |
#' [tern::control_surv_time()]. Some possible parameter options are: |
|
14 |
#' * `conf_level` (`proportion`)\cr confidence level of the interval for survival time. |
|
15 |
#' * `conf_type` (`string`)\cr confidence interval type. Options are 'plain' (default), 'log', or 'log-log', |
|
16 |
#' see more in [survival::survfit()]. Note option 'none' is not supported. |
|
17 |
#' * `quantiles` (`numeric`)\cr vector of length two to specify the quantiles of survival time. |
|
18 |
#' |
|
19 |
#' @note These functions have been forked from the `tern` package file `survival_time.R`. |
|
20 |
#' Here we have the additional features: |
|
21 |
#' |
|
22 |
#' * Additional statistics `quantiles_lower`, `quantiles_upper`, `range_with_cens_info` are returned. |
|
23 |
#' |
|
24 |
#' @examples |
|
25 |
#' library(dplyr) |
|
26 |
#' library(tern) |
|
27 |
#' adtte_f <- tern::tern_ex_adtte |> |
|
28 |
#' filter(PARAMCD == "OS") |> |
|
29 |
#' mutate( |
|
30 |
#' AVAL = tern::day2month(AVAL), |
|
31 |
#' is_event = CNSR == 0 |
|
32 |
#' ) |
|
33 |
#' df <- adtte_f |> filter(ARMCD == "ARM A") |
|
34 |
#' @keywords internal |
|
35 |
#' @name kaplan_meier |
|
36 |
#' @order 1 |
|
37 |
NULL |
|
38 | ||
39 |
#' @describeIn kaplan_meier Statistics function which analyzes survival times using Kaplan-Meier. |
|
40 |
#' |
|
41 |
#' @return |
|
42 |
#' * `s_kaplan_meier()` returns the following statistics: |
|
43 |
#' * `quantiles_lower`: Lower quantile estimate and confidence interval for it. |
|
44 |
#' * `median_ci_3d`: Median survival time and confidence interval for it. |
|
45 |
#' * `quantiles_upper`: Upper quantile estimate and confidence interval for it. |
|
46 |
#' * `range_with_cens_info`: Survival time range with censoring information. |
|
47 |
#' |
|
48 |
#' @importFrom survival survfit |
|
49 |
#' |
|
50 |
#' @keywords internal |
|
51 |
s_kaplan_meier <- function(df, .var, is_event, control = control_surv_time()) { |
|
52 | 10x |
checkmate::assert_string(.var) |
53 | 10x |
assert_df_with_variables(df, list(tte = .var, is_event = is_event)) |
54 | 10x |
checkmate::assert_numeric(df[[.var]], min.len = 1, any.missing = FALSE) |
55 | 10x |
checkmate::assert_logical(df[[is_event]], min.len = 1, any.missing = FALSE) |
56 | ||
57 | 10x |
conf_type <- control$conf_type |
58 | 10x |
conf_level <- control$conf_level |
59 | ||
60 | 10x |
checkmate::assert_true(control$quantiles[1] < 0.5) |
61 | 10x |
checkmate::assert_true(control$quantiles[2] > 0.5) |
62 | 10x |
quantiles <- c(control$quantiles[1], 0.5, control$quantiles[2]) |
63 | ||
64 | 10x |
formula <- stats::as.formula(paste0("survival::Surv(", .var, ", ", is_event, ") ~ 1")) |
65 | 10x |
srv_fit <- survival::survfit(formula = formula, data = df, conf.int = conf_level, conf.type = conf_type) |
66 | 10x |
srv_qt_tab <- stats::quantile(srv_fit, probs = quantiles) |
67 | 10x |
quantiles_lower <- vapply(srv_qt_tab, "[", 1, FUN.VALUE = numeric(1)) |
68 | 10x |
median_ci <- vapply(srv_qt_tab, "[", 2, FUN.VALUE = numeric(1)) |
69 | 10x |
quantiles_upper <- vapply(srv_qt_tab, "[", 3, FUN.VALUE = numeric(1)) |
70 | 10x |
range_censor <- range_noinf(df[[.var]][!df[[is_event]]], na.rm = TRUE) |
71 | 10x |
range_event <- range_noinf(df[[.var]][df[[is_event]]], na.rm = TRUE) |
72 | 10x |
range <- range_noinf(df[[.var]], na.rm = TRUE) |
73 | 10x |
lower_censored <- as.numeric(range_censor[1] < range_event[1]) |
74 | 10x |
upper_censored <- as.numeric(range_censor[2] > range_event[2]) |
75 | 10x |
range_with_cens_info <- c(range, lower_censored, upper_censored) |
76 | 10x |
list( |
77 | 10x |
quantiles_lower = with_label( |
78 | 10x |
unname(quantiles_lower), |
79 | 10x |
paste0(round(quantiles[1] * 100), "th percentile (", f_conf_level(conf_level), ")") |
80 |
), |
|
81 | 10x |
median_ci_3d = with_label(unname(median_ci), paste0("Median (", f_conf_level(conf_level), ")")), |
82 | 10x |
quantiles_upper = with_label( |
83 | 10x |
unname(quantiles_upper), |
84 | 10x |
paste0(round(quantiles[3] * 100), "th percentile (", f_conf_level(conf_level), ")") |
85 |
), |
|
86 | 10x |
range_with_cens_info = range_with_cens_info |
87 |
) |
|
88 |
} |
|
89 | ||
90 |
#' @describeIn kaplan_meier Formatted analysis function which is used as `afun` |
|
91 |
#' |
|
92 |
#' @return |
|
93 |
#' * `a_kaplan_meier()` returns the corresponding list with formatted [rtables::CellValue()]. |
|
94 |
#' |
|
95 |
#' @examples |
|
96 |
#' a_kaplan_meier( |
|
97 |
#' df, |
|
98 |
#' .var = "AVAL", |
|
99 |
#' is_event = "is_event" |
|
100 |
#' ) |
|
101 |
#' |
|
102 |
#' basic_table() |> |
|
103 |
#' split_cols_by(var = "ARMCD") |> |
|
104 |
#' add_colcounts() |> |
|
105 |
#' analyze( |
|
106 |
#' vars = "AVAL", |
|
107 |
#' afun = a_kaplan_meier, |
|
108 |
#' var_labels = "Kaplan-Meier estimate of time to event (months)", |
|
109 |
#' show_labels = "visible", |
|
110 |
#' extra_args = list( |
|
111 |
#' is_event = "is_event", |
|
112 |
#' control = control_surv_time(conf_level = 0.9, conf_type = "log-log") |
|
113 |
#' ) |
|
114 |
#' ) |> |
|
115 |
#' build_table(df = adtte_f) |
|
116 |
#' |
|
117 |
#' @export |
|
118 |
#' @order 2 |
|
119 |
a_kaplan_meier <- function(df, .var, ..., .stats = NULL, .formats = NULL, .labels = NULL, .indent_mods = NULL) { |
|
120 |
# Check for additional parameters to the statistics function |
|
121 | 8x |
dots_extra_args <- list(...) |
122 | ||
123 |
# Only support default stats, not custom stats |
|
124 | 8x |
.stats <- .split_std_from_custom_stats(.stats)$default_stats |
125 | ||
126 |
# Apply statistics function |
|
127 | 8x |
x_stats <- .apply_stat_functions( |
128 | 8x |
default_stat_fnc = s_kaplan_meier, |
129 | 8x |
custom_stat_fnc_list = NULL, |
130 | 8x |
args_list = c(df = list(df), .var = .var, dots_extra_args) |
131 |
) |
|
132 | ||
133 |
# Format according to specifications |
|
134 | 8x |
format_stats( |
135 | 8x |
x_stats, |
136 | 8x |
method_groups = "kaplan_meier", |
137 | 8x |
stats_in = .stats, |
138 | 8x |
formats_in = .formats, |
139 | 8x |
labels_in = .labels, |
140 | 8x |
indents_in = .indent_mods |
141 |
) |
|
142 |
} |
1 |
#' MMRM Analysis for Imputed Datasets |
|
2 |
#' |
|
3 |
#' Performs an MMRM for two or more groups returning the estimated |
|
4 |
#' 'treatment effect' (i.e. the contrast between treatment groups and the control |
|
5 |
#' group) and the least square means estimates in each group. |
|
6 |
#' |
|
7 |
#' @param data (`data.frame`)\cr containing the data to be used in the model. |
|
8 |
#' @param vars (`vars`)\cr list as generated by [rbmi::set_vars()]. Only the `subjid`, `group`, |
|
9 |
#' `visit`, `outcome` and `covariates` elements are required. See details. |
|
10 |
#' @param cov_struct (`string`)\cr the covariance structure to use. Note that the same |
|
11 |
#' covariance structure is assumed for all treatment groups. |
|
12 |
#' @param visits (`NULL` or `character`)\cr An optional character vector specifying |
|
13 |
#' which visits to fit the MMRM at. If `NULL`, the MMRM model will be fit to |
|
14 |
#' the whole dataset. |
|
15 |
#' @param weights (`string`)\cr the weighting strategy to be used when calculating the |
|
16 |
#' least square means, either `'counterfactual'` or `'equal'`. |
|
17 |
#' @param ... additional arguments passed to [mmrm::mmrm()], in particular |
|
18 |
#' `method` and `vcov` to control the degrees of freedom and variance-covariance |
|
19 |
#' adjustment methods as well as `reml` decide between REML and ML estimation. |
|
20 |
#' |
|
21 |
#' @details |
|
22 |
#' The function works as follows: |
|
23 |
#' |
|
24 |
#' 1. Optionally select the subset of the `data` corresponding to `visits. |
|
25 |
#' 2. Fit an MMRM as `vars$outcome ~ vars$group + vars$visit + vars$covariates` |
|
26 |
#' with the specified covariance structure for visits within subjects. |
|
27 |
#' 3. Extract the 'treatment effect' & least square means for each treatment group |
|
28 |
#' vs the control group. |
|
29 |
#' |
|
30 |
#' In order to meet the formatting standards set by [rbmi::analyse()] the results will be collapsed |
|
31 |
#' into a single list suffixed by the visit name, e.g.: |
|
32 |
#' ``` |
|
33 |
#' list( |
|
34 |
#' var_B_visit_1 = list(est = ...), |
|
35 |
#' trt_B_visit_1 = list(est = ...), |
|
36 |
#' lsm_A_visit_1 = list(est = ...), |
|
37 |
#' lsm_B_visit_1 = list(est = ...), |
|
38 |
#' var_B_visit_2 = list(est = ...), |
|
39 |
#' trt_B_visit_2 = list(est = ...), |
|
40 |
#' lsm_A_visit_2 = list(est = ...), |
|
41 |
#' lsm_B_visit_2 = list(est = ...), |
|
42 |
#' ... |
|
43 |
#' ) |
|
44 |
#' ``` |
|
45 |
#' Please note that 'trt' refers to the treatment effects, and 'lsm' refers to the least |
|
46 |
#' square mean results. In the above example `vars$group` has two factor levels A and B. |
|
47 |
#' The new 'var' refers to the model estimated variance of the residuals at the given |
|
48 |
#' visit, together with the degrees of freedom (which is treatment group specific). |
|
49 |
#' |
|
50 |
#' If you want to include additional interaction terms in your model this can be done |
|
51 |
#' by providing them to the `covariates` argument of [rbmi::set_vars()] |
|
52 |
#' e.g. `set_vars(covariates = c('sex*age'))`. |
|
53 |
#' |
|
54 |
#' @note The `group` and `visit` interaction `group:visit` is not included by |
|
55 |
#' default in the model, therefore please add that to `covariates` manually if |
|
56 |
#' you want to include it. This will make sense in most cases. |
|
57 |
#' @inherit rbmi_ancova return |
|
58 |
#' |
|
59 |
#' @seealso [rbmi_analyse()] |
|
60 |
#' @seealso [mmrm::mmrm()] |
|
61 |
#' @seealso [rbmi::set_vars()] |
|
62 |
#' @export |
|
63 |
rbmi_mmrm <- function( |
|
64 |
data, |
|
65 |
vars, |
|
66 |
cov_struct = c("us", "toep", "cs", "ar1"), |
|
67 |
visits = NULL, |
|
68 |
weights = c("counterfactual", "equal"), |
|
69 |
...) { |
|
70 | 1x |
subjid <- vars[["subjid"]] |
71 | 1x |
outcome <- vars[["outcome"]] |
72 | 1x |
group <- vars[["group"]] |
73 | 1x |
visit <- vars[["visit"]] |
74 | 1x |
covariates <- vars[["covariates"]] |
75 | ||
76 | 1x |
checkmate::assert_string(subjid) |
77 | 1x |
checkmate::assert_string(outcome) |
78 | 1x |
checkmate::assert_string(group) |
79 | 1x |
checkmate::assert_string(visit) |
80 | 1x |
checkmate::assert_character(covariates, null.ok = TRUE) |
81 | ||
82 | 1x |
cov_struct <- match.arg(cov_struct) |
83 | 1x |
weights <- match.arg(weights) |
84 | ||
85 | 1x |
expected_vars <- c((utils::getFromNamespace("extract_covariates", "rbmi"))(covariates), outcome, group, subjid, visit) |
86 | 1x |
checkmate::assert_subset(expected_vars, names(data)) |
87 | ||
88 | 1x |
checkmate::assert_factor(data[[visit]]) |
89 | 1x |
if (is.null(visits)) { |
90 | 1x |
visits <- as.character(unique(data[[visit]])) |
91 |
} |
|
92 | 1x |
checkmate::assert_subset(visits, as.character(data[[visit]])) |
93 | 1x |
data <- data[data[[visit]] %in% visits, , drop = FALSE] |
94 | ||
95 | 1x |
covariates_part <- paste(covariates, collapse = " + ") |
96 | 1x |
grp_visit_part <- paste(group, "+", visit) |
97 | 1x |
random_effects_part <- paste0(cov_struct, "(", visit, " | ", subjid, ")") |
98 | 1x |
rhs_formula <- paste(grp_visit_part, "+", random_effects_part) |
99 | 1x |
if (covariates_part != "") rhs_formula <- paste(covariates_part, "+", rhs_formula) |
100 | 1x |
formula <- stats::as.formula(paste(outcome, "~", rhs_formula)) |
101 | ||
102 | 1x |
fit <- mmrm::mmrm(formula, data = data, ...) |
103 | 1x |
res <- lapply(visits, function(x) { |
104 | 3x |
res <- rbmi_mmrm_single_info(fit, visit_level = x, visit = visit, group = group, weights = weights) |
105 | 3x |
names(res) <- paste0(names(res), "_", x) |
106 | 3x |
return(res) |
107 |
}) |
|
108 | 1x |
unlist(res, recursive = FALSE) |
109 |
} |
|
110 | ||
111 |
#' Extract Single Visit Information from a Fitted MMRM for Multiple Imputation Analysis |
|
112 |
#' |
|
113 |
#' @description |
|
114 |
#' Extracts relevant estimates from a given fitted MMRM. See [rbmi_mmrm()] for full details. |
|
115 |
#' |
|
116 |
#' @param fit (`mmrm`)\cr the fitted MMRM. |
|
117 |
#' @param visit_level (`string`)\cr the visit level to extract information for. |
|
118 |
#' @param visit (`string`)\cr the name of the visit variable. |
|
119 |
#' @param group (`string`)\cr the name of the group variable. |
|
120 |
#' |
|
121 |
#' @return a list with `trt_*`, `var_*` and `lsm_*` elements. See [rbmi_mmrm] for |
|
122 |
#' full details. |
|
123 |
#' @inheritParams rbmi_mmrm |
|
124 |
#' @seealso [rbmi_mmrm()] |
|
125 |
rbmi_mmrm_single_info <- function(fit, visit_level, visit, group, weights) { |
|
126 | 3x |
checkmate::assert_class(fit, "mmrm") |
127 | 3x |
checkmate::assert_string(visit_level) |
128 | 3x |
checkmate::assert_string(visit) |
129 | 3x |
checkmate::assert_string(group) |
130 | ||
131 | 3x |
data <- mmrm::component(fit, "full_frame") |
132 | 3x |
checkmate::assert_factor(data[[group]]) |
133 | 3x |
grp_levels <- levels(data[[group]]) |
134 | ||
135 | 3x |
visit_at_level <- stats::setNames(list(visit_level), visit) |
136 | 3x |
em_res <- emmeans::emmeans(fit, group, by = visit, at = visit_at_level, weights = weights) |
137 | 3x |
em_df <- as.data.frame(em_res) |
138 | ||
139 | 3x |
all_lsm <- lapply(grp_levels, function(x) { |
140 | 9x |
this_df <- em_df[em_df[[group]] == x, ] |
141 | 9x |
with(this_df, list(est = emmean, se = SE, df = df)) |
142 |
}) |
|
143 | 3x |
names(all_lsm) <- paste0("lsm_", grp_levels) |
144 | ||
145 | 3x |
var_est <- mmrm::VarCorr(fit)[visit_level, visit_level] |
146 | ||
147 | 3x |
cont_res <- emmeans::contrast(em_res, "trt.vs.ctrl", ref = grp_levels[1]) |
148 | 3x |
cont_df <- as.data.frame(cont_res) |
149 | ||
150 | 3x |
all_var <- lapply(grp_levels[-1], function(x) { |
151 | 6x |
this_df <- cont_df[cont_df[["contrast"]] == paste(x, "-", grp_levels[1]), ] |
152 |
# Note: This should be revisited here for the `se` - currently we take the SE from the LS means contrast, not |
|
153 |
# from the variance estimate! |
|
154 | 6x |
with(this_df, list(est = var_est, se = SE, df = df)) |
155 |
}) |
|
156 | 3x |
names(all_var) <- paste0("var_", grp_levels[-1]) |
157 | ||
158 | 3x |
all_trt <- lapply(grp_levels[-1], function(x) { |
159 | 6x |
this_df <- cont_df[cont_df[["contrast"]] == paste(x, "-", grp_levels[1]), ] |
160 | 6x |
with(this_df, list(est = estimate, se = SE, df = df)) |
161 |
}) |
|
162 | 3x |
names(all_trt) <- paste0("trt_", grp_levels[-1]) |
163 | ||
164 | 3x |
c(all_var, all_trt, all_lsm) |
165 |
} |
1 |
#' @name a_freq_subcol_j |
|
2 |
#' |
|
3 |
#' @title Analysis function count and percentage with extra column-subsetting in |
|
4 |
#' selected columns (controlled by subcol_* arguments) |
|
5 |
#' |
|
6 |
#' @inheritParams proposal_argument_convention |
|
7 |
#' @inheritParams a_freq_j |
|
8 |
#' @param subcol_split (`string`)\cr text to search colid to determine whether further subsetting |
|
9 |
#' should be performed. |
|
10 |
#' @param subcol_var (`string`)\cr name of variable containing to be searched for the text |
|
11 |
#' identified in subcol_val argument. |
|
12 |
#' @param subcol_val (`string`)\cr value to use to perform further data sub-setting. |
|
13 |
#' @param denom (`string`)\cr |
|
14 |
#' One of \cr |
|
15 |
#' \itemize{ |
|
16 |
#' \item \strong{N_col} Column count, \cr |
|
17 |
#' \item \strong{n_df} Number of patients (based upon the main input dataframe `df`),\cr |
|
18 |
#' \item \strong{n_altdf} Number of patients from the secondary dataframe (`.alt_df_full`),\cr |
|
19 |
#' Note that argument `denom_by` will perform a row-split on the `.alt_df_full` dataframe.\cr |
|
20 |
#' It is a requirement that variables specified in `denom_by` are part of the row split specifications. \cr |
|
21 |
#' \item \strong{n_rowdf} Number of patients from the current row-level dataframe |
|
22 |
#' (`.row_df` from the rtables splitting machinery).\cr |
|
23 |
#' \item \strong{n_parentdf} Number of patients from a higher row-level split than the current split.\cr |
|
24 |
#' This higher row-level split is specified in the argument `denom_by`.\cr |
|
25 |
#' } |
|
26 |
#' @param .formats (named 'character' or 'list')\cr |
|
27 |
#' formats for the statistics. |
|
28 |
#' |
|
29 |
#' @return list of requested statistics with formatted `rtables::CellValue()`.\cr |
|
30 |
#' @export |
|
31 |
#' |
|
32 |
#' @examples |
|
33 |
#' library(dplyr) |
|
34 |
#' |
|
35 |
#' ADSL <- ex_adsl |> |
|
36 |
#' select(USUBJID, ARM) |
|
37 |
#' |
|
38 |
#' ADSL$COLSPAN_REL <- "AEs" |
|
39 |
#' |
|
40 |
#' ADAE <- ex_adae |> |
|
41 |
#' select(USUBJID, ARM, AEDECOD, AREL) |
|
42 |
#' |
|
43 |
#' ADAE <- ADAE |> |
|
44 |
#' mutate( |
|
45 |
#' AEREL = case_when(AREL == "Y" ~ "RELATED", |
|
46 |
#' AREL == "N" ~ "NOT RELATED"), |
|
47 |
#' AEREL = factor(AEREL), |
|
48 |
#' COLSPAN_REL = "AEs" |
|
49 |
#' ) |
|
50 |
#' |
|
51 |
#' combodf <- tribble( |
|
52 |
#' ~valname, ~label, ~levelcombo, ~exargs, |
|
53 |
#' "RELATED", "Related AEs", c("AEs"), list() |
|
54 |
#' ) |
|
55 |
#' |
|
56 |
#' lyt <- basic_table(show_colcounts = TRUE) |> |
|
57 |
#' split_cols_by("COLSPAN_REL", split_fun = add_combo_levels(combodf, trim = TRUE)) |> |
|
58 |
#' split_cols_by("ARM") |> |
|
59 |
#' analyze("AEDECOD", afun = a_freq_subcol_j, |
|
60 |
#' extra_args = list(subcol_split = "RELATED", |
|
61 |
#' subcol_var = "AEREL", |
|
62 |
#' subcol_val = "RELATED")) |
|
63 |
#' |
|
64 |
#' result <- build_table(lyt, ADAE, alt_counts_df = ADSL) |
|
65 |
#' |
|
66 |
#' result |
|
67 |
a_freq_subcol_j <- function( |
|
68 |
df, |
|
69 |
labelstr = NULL, |
|
70 |
.var = NA, |
|
71 |
val = NULL, |
|
72 |
# arguments specific to a_freq_subcol_j |
|
73 |
subcol_split = NULL, |
|
74 |
subcol_var = NULL, |
|
75 |
subcol_val = NULL, |
|
76 |
# arguments specific to a_freq_subcol_j till here |
|
77 |
.df_row, |
|
78 |
.spl_context, |
|
79 |
.N_col, |
|
80 |
id = "USUBJID", |
|
81 |
denom = c("N_col", "n_df", "n_altdf", "n_rowdf", "n_parentdf"), |
|
82 |
label = NULL, |
|
83 |
label_fstr = NULL, |
|
84 |
label_map = NULL, |
|
85 |
.alt_df_full = NULL, |
|
86 |
denom_by = NULL, |
|
87 |
.stats = c("count_unique_denom_fraction"), |
|
88 |
.formats = NULL, |
|
89 |
.labels_n = NULL, |
|
90 |
.indent_mods = NULL, |
|
91 |
na_str = rep("NA", 3)) { |
|
92 | 6x |
denom <- match.arg(denom) |
93 | ||
94 | 6x |
if (!is.null(labelstr) && is.na(.var)) { |
95 | ! |
stop( |
96 | ! |
"Argument var must be specified in call to summarize_row_groups when using cfun = a_freq_subcol_j." |
97 |
) |
|
98 |
} |
|
99 | ||
100 | 6x |
check_alt_df_full(denom, "n_altdf", .alt_df_full) |
101 | ||
102 | 6x |
res_dataprep <- h_a_freq_dataprep( |
103 | 6x |
df = df, |
104 | 6x |
labelstr = labelstr, |
105 | 6x |
.var = .var, |
106 | 6x |
val = val, |
107 | 6x |
drop_levels = FALSE, |
108 | 6x |
excl_levels = NULL, |
109 | 6x |
new_levels = NULL, |
110 | 6x |
new_levels_after = FALSE, |
111 | 6x |
.df_row = .df_row, |
112 | 6x |
.spl_context = .spl_context, |
113 | 6x |
.N_col = .N_col, |
114 | 6x |
id = id, |
115 | 6x |
denom = denom, |
116 | 6x |
variables = NULL, |
117 | 6x |
label = label, |
118 | 6x |
label_fstr = label_fstr, |
119 | 6x |
label_map = label_map, |
120 | 6x |
.alt_df_full = .alt_df_full, |
121 | 6x |
denom_by = denom_by, |
122 | 6x |
.stats = .stats |
123 |
) |
|
124 |
# res_dataprep is list with elements |
|
125 |
# df .df_row val |
|
126 |
# drop_levels excl_levels |
|
127 |
# alt_df parentdf new_denomdf |
|
128 |
# .stats |
|
129 |
# make these elements available in current environment |
|
130 | 6x |
df <- res_dataprep$df |
131 | 6x |
.df_row <- res_dataprep$.df_row |
132 | 6x |
val <- res_dataprep$val |
133 | 6x |
alt_df <- res_dataprep$alt_df |
134 | 6x |
parentdf <- res_dataprep$parentdf |
135 | 6x |
new_denomdf <- res_dataprep$new_denomdf |
136 | 6x |
.stats <- .stats |
137 | ||
138 |
## colid can be used to figure out if we're in subcolum |
|
139 | 6x |
colid <- .spl_context$cur_col_id[[1]] |
140 | ||
141 |
### this is the core code for subsetting to appropriate subcol_val |
|
142 | 6x |
insubcol <- grepl(subcol_split, colid, fixed = TRUE) |
143 | 6x |
if (insubcol) { |
144 | 3x |
df <- subset(df, df[[subcol_var]] == subcol_val) |
145 |
} |
|
146 | ||
147 |
## the same s-function can be used as in a_freq_j |
|
148 | 6x |
x_stats <- s_freq_j( |
149 | 6x |
df, |
150 | 6x |
.var = .var, |
151 | 6x |
.df_row = .df_row, |
152 | 6x |
val = val, |
153 | 6x |
alt_df = new_denomdf, |
154 | 6x |
parent_df = new_denomdf, |
155 | 6x |
id = id, |
156 | 6x |
denom = denom, |
157 | 6x |
.N_col = .N_col, |
158 | 6x |
countsource = "df" |
159 |
) |
|
160 | ||
161 | 6x |
.stats_adj <- .stats |
162 | ||
163 | 6x |
res_prepinrows <- h_a_freq_prepinrows( |
164 | 6x |
x_stats, |
165 | 6x |
.stats_adj, |
166 | 6x |
.formats, |
167 | 6x |
labelstr, |
168 | 6x |
label_fstr, |
169 | 6x |
label, |
170 | 6x |
.indent_mods, |
171 | 6x |
.labels_n, |
172 | 6x |
na_str |
173 |
) |
|
174 |
# res_prepinrows is list with elements |
|
175 |
# x_stats .formats .labels .indent_mods .format_na_strs |
|
176 |
# make these elements available in current environment |
|
177 | 6x |
x_stats <- res_prepinrows$x_stats |
178 | 6x |
.formats <- res_prepinrows$.formats |
179 | 6x |
.labels <- res_prepinrows$.labels |
180 | 6x |
.indent_mods <- res_prepinrows$.indent_mods |
181 | 6x |
.format_na_strs <- res_prepinrows$.format_na_strs |
182 | ||
183 |
### final step: turn requested stats into rtables rows |
|
184 | 6x |
inrows <- in_rows( |
185 | 6x |
.list = x_stats, |
186 | 6x |
.formats = .formats, |
187 | 6x |
.labels = .labels, |
188 | 6x |
.indent_mods = .indent_mods, |
189 | 6x |
.format_na_strs = .format_na_strs |
190 |
) |
|
191 | ||
192 | 6x |
return(inrows) |
193 |
} |
1 |
#' Helper for Finding AVISIT after which CHG are all Missing |
|
2 |
#' |
|
3 |
#' @param df (`data.frame`)\cr with `CHG` and `AVISIT` variables. |
|
4 |
#' |
|
5 |
#' @return A string with either the factor level after which `AVISIT` is all missing, |
|
6 |
#' or `NA`. |
|
7 |
#' @export |
|
8 |
#' |
|
9 |
#' @examples |
|
10 |
#' df <- data.frame( |
|
11 |
#' AVISIT = factor(c(1, 2, 3, 4, 5)), |
|
12 |
#' CHG = c(5, NA, NA, NA, 3) |
|
13 |
#' ) |
|
14 |
#' find_missing_chg_after_avisit(df) |
|
15 |
#' |
|
16 |
#' df2 <- data.frame( |
|
17 |
#' AVISIT = factor(c(1, 2, 3, 4, 5)), |
|
18 |
#' CHG = c(5, NA, 3, NA, NA) |
|
19 |
#' ) |
|
20 |
#' find_missing_chg_after_avisit(df2) |
|
21 |
#' |
|
22 |
#' df3 <- data.frame( |
|
23 |
#' AVISIT = factor(c(1, 2, 3, 4, 5)), |
|
24 |
#' CHG = c(NA, NA, NA, NA, NA) |
|
25 |
#' ) |
|
26 |
#' find_missing_chg_after_avisit(df3) |
|
27 |
find_missing_chg_after_avisit <- function(df) { |
|
28 | 8x |
checkmate::assert_data_frame(df) |
29 | 7x |
checkmate::assert_factor(df$AVISIT, unique = TRUE, any.missing = FALSE) |
30 | 6x |
checkmate::assert_numeric(df$CHG) |
31 | ||
32 |
# Ensure the dataframe is sorted by AVISIT |
|
33 | 5x |
df <- df[order(df$AVISIT), ] |
34 | ||
35 |
# Last visit with available data. |
|
36 | 5x |
visit_levels_available <- as.integer(df[!is.na(df$CHG), ]$AVISIT) |
37 | ||
38 | 5x |
if (!length(visit_levels_available)) { |
39 | 1x |
return(levels(df$AVISIT)[1]) |
40 |
} |
|
41 | 4x |
visit_levels_max_available <- max(visit_levels_available) |
42 | ||
43 |
# Visits with missing data. |
|
44 | 4x |
visit_levels_missing <- as.integer(df[is.na(df$CHG), ]$AVISIT) |
45 | ||
46 |
# Missing visits at the end. |
|
47 | 4x |
visit_levels_missing_end <- visit_levels_missing[visit_levels_missing > visit_levels_max_available] |
48 | ||
49 |
# Return first one if there is any. |
|
50 | 4x |
if (length(visit_levels_missing_end)) { |
51 | 3x |
levels(df$AVISIT)[min(visit_levels_missing_end)] |
52 |
} else { |
|
53 |
NA_character_ |
|
54 |
} |
|
55 |
} |
|
56 | ||
57 |
# Copy from rbmi package, because this was only added recently, so not yet available in our package version. |
|
58 | ||
59 |
#' Create a `rbmi` ready cluster |
|
60 |
#' |
|
61 |
#' @param cluster_or_cores Number of parallel processes to use or an existing cluster to make use of |
|
62 |
#' @param objects a named list of objects to export into the sub-processes |
|
63 |
#' @param packages a character vector of libraries to load in the sub-processes |
|
64 |
#' |
|
65 |
#' This function is a wrapper around `parallel::makePSOCKcluster()` but takes |
|
66 |
#' care of configuring `rbmi` to be used in the sub-processes as well as loading |
|
67 |
#' user defined objects and libraries and setting the seed for reproducibility. |
|
68 |
#' |
|
69 |
#' @return If `cluster_or_cores` is `1` this function will return `NULL`. If `cluster_or_cores` |
|
70 |
#' is a number greater than `1`, a cluster with `cluster_or_cores` cores is returned. |
|
71 |
#' |
|
72 |
#' If `cluster_or_cores` is a cluster created via `parallel::makeCluster()` then this function |
|
73 |
#' returns it after inserting the relevant `rbmi` objects into the existing cluster. |
|
74 |
#' |
|
75 |
#' @examples |
|
76 |
#' \dontrun{ |
|
77 |
#' make_rbmi_cluster(5) |
|
78 |
#' closeAllConnections() |
|
79 |
#' |
|
80 |
#' VALUE <- 5 |
|
81 |
#' myfun <- function(x) { |
|
82 |
#' x + day(VALUE) |
|
83 |
#' } |
|
84 |
#' make_rbmi_cluster(5, list(VALUE = VALUE, myfun = myfun), c("lubridate")) |
|
85 |
#' closeAllConnections() |
|
86 |
#' |
|
87 |
#' cl <- parallel::makeCluster(5) |
|
88 |
#' make_rbmi_cluster(cl) |
|
89 |
#' closeAllConnections() |
|
90 |
#' } |
|
91 |
#' @export |
|
92 |
make_rbmi_cluster <- function(cluster_or_cores = 1, objects = NULL, packages = NULL) { |
|
93 | 6x |
if (is.numeric(cluster_or_cores) && cluster_or_cores == 1) { |
94 | 1x |
return(NULL) |
95 | 5x |
} else if (is.numeric(cluster_or_cores)) { |
96 | 2x |
cl <- parallel::makePSOCKcluster(cluster_or_cores) |
97 | 3x |
} else if (methods::is(cluster_or_cores, "cluster")) { |
98 | 2x |
cl <- cluster_or_cores |
99 |
} else { |
|
100 | 1x |
stop(sprintf("`cluster_or_cores` has unsupported class of: %s", paste(class(cluster_or_cores), collapse = ", "))) |
101 |
} |
|
102 | ||
103 |
# Load user defined objects into the globalname space |
|
104 | 4x |
if (!is.null(objects) && length(objects)) { |
105 | 1x |
export_env <- list2env(objects) |
106 | 1x |
parallel::clusterExport(cl, names(objects), export_env) |
107 |
} |
|
108 | ||
109 |
# Load user defined packages |
|
110 | 4x |
packages <- c(packages, "assertthat") |
111 |
# Remove attempts to load `rbmi` as this will be covered later |
|
112 | 4x |
packages <- setdiff(packages, "rbmi") |
113 | 4x |
devnull <- parallel::clusterCall( |
114 | 4x |
cl, |
115 | 4x |
function(pkgs) lapply(pkgs, function(x) library(x, character.only = TRUE)), |
116 | 4x |
as.list(packages) |
117 |
) |
|
118 | ||
119 |
# Ensure reproducibility |
|
120 | 4x |
parallel::clusterSetRNGStream(cl, sample.int(1)) |
121 | ||
122 |
# If user has previously configured `rbmi` sub-processes then early exit |
|
123 | 4x |
exported_rbmi <- unlist(parallel::clusterEvalQ(cl, exists("..exported..parallel..rbmi"))) |
124 | 4x |
if (all(exported_rbmi)) { |
125 | ! |
return(cl) |
126 |
} |
|
127 | ||
128 |
# Ensure that exported and unexported objects are all directly accessible from the globalenv in the sub-processes |
|
129 | 4x |
is_in_rbmi_development <- FALSE |
130 | 4x |
if (is_in_rbmi_development) { |
131 | ! |
devnull <- parallel::clusterEvalQ(cl, pkgload::load_all()) |
132 |
} else { |
|
133 | 4x |
devnull <- parallel::clusterEvalQ(cl, { |
134 | 8x |
.namespace <- getNamespace("rbmi") |
135 | 8x |
for (.nsfun in ls(.namespace)) { |
136 | 1504x |
assign(.nsfun, get(.nsfun, envir = .namespace)) |
137 |
} |
|
138 |
}) |
|
139 |
} |
|
140 | ||
141 |
# Set variable to signify `rbmi` has been configured |
|
142 | 4x |
devnull <- parallel::clusterEvalQ(cl, { |
143 | 8x |
..exported..parallel..rbmi <- TRUE |
144 |
}) |
|
145 | ||
146 | 4x |
return(cl) |
147 |
} |
|
148 | ||
149 |
#' Parallelise Lapply |
|
150 |
#' |
|
151 |
#' Simple wrapper around `lapply` and [`parallel::clusterApplyLB`] to abstract away |
|
152 |
#' the logic of deciding which one to use |
|
153 |
#' @param cl Cluster created by [`parallel::makeCluster()`] or `NULL` |
|
154 |
#' @param fun Function to be run |
|
155 |
#' @param x object to be looped over |
|
156 |
#' @param ... extra arguments passed to `fun` |
|
157 |
#' @return `list` of results of calling `fun` on elements of `x`. |
|
158 |
par_lapply <- function(cl, fun, x, ...) { |
|
159 | 2x |
result <- if (is.null(cl)) { |
160 | 1x |
lapply(x, fun, ...) |
161 |
} else { |
|
162 | 1x |
parallel::clusterApplyLB(cl, x, fun, ...) |
163 |
} |
|
164 | 2x |
return(result) |
165 |
} |
|
166 | ||
167 |
#' Analyse Multiple Imputed Datasets |
|
168 |
#' |
|
169 |
#' @description |
|
170 |
#' This function takes multiple imputed datasets (as generated by |
|
171 |
#' the [rbmi::impute()] function) and runs an analysis function on |
|
172 |
#' each of them. |
|
173 |
#' |
|
174 |
#' @importFrom assertthat assert_that |
|
175 |
#' |
|
176 |
#' @details |
|
177 |
#' This function works by performing the following steps: |
|
178 |
#' |
|
179 |
#' 1. Extract a dataset from the `imputations` object. |
|
180 |
#' 2. Apply any delta adjustments as specified by the `delta` argument. |
|
181 |
#' 3. Run the analysis function `fun` on the dataset. |
|
182 |
#' 4. Repeat steps 1-3 across all of the datasets inside the `imputations` |
|
183 |
#' object. |
|
184 |
#' 5. Collect and return all of the analysis results. |
|
185 |
#' |
|
186 |
#' The analysis function `fun` must take a `data.frame` as its first |
|
187 |
#' argument. All other options to [rbmi_analyse()] are passed onto `fun` |
|
188 |
#' via `...`. |
|
189 |
#' `fun` must return a named list with each element itself being a |
|
190 |
#' list containing a single |
|
191 |
#' numeric element called `est` (or additionally `se` and `df` if |
|
192 |
#' you had originally specified [rbmi::method_bayes()] or [rbmi::method_approxbayes()]) |
|
193 |
#' i.e.: |
|
194 |
#' \preformatted{ |
|
195 |
#' myfun <- function(dat, ...) { |
|
196 |
#' mod_1 <- lm(data = dat, outcome ~ group) |
|
197 |
#' mod_2 <- lm(data = dat, outcome ~ group + covar) |
|
198 |
#' x <- list( |
|
199 |
#' trt_1 = list( |
|
200 |
#' est = coef(mod_1)[['group']], # Use [[ ]] for safety |
|
201 |
#' se = sqrt(vcov(mod_1)['group', 'group']), # Use ['',''] |
|
202 |
#' df = df.residual(mod_1) |
|
203 |
#' ), |
|
204 |
#' trt_2 = list( |
|
205 |
#' est = coef(mod_2)[['group']], # Use [[ ]] for safety |
|
206 |
#' se = sqrt(vcov(mod_2)['group', 'group']), # Use ['',''] |
|
207 |
#' df = df.residual(mod_2) |
|
208 |
#' ) |
|
209 |
#' ) |
|
210 |
#' return(x) |
|
211 |
#' } |
|
212 |
#' } |
|
213 |
#' |
|
214 |
#' Please note that the `vars$subjid` column (as defined in the original call to |
|
215 |
#' [rbmi::draws()]) will be scrambled in the data.frames that are provided to `fun`. |
|
216 |
#' This is to say they will not contain the original subject values and as such |
|
217 |
#' any hard coding of subject ids is strictly to be avoided. |
|
218 |
#' |
|
219 |
#' By default `fun` is the [rbmi_ancova()] function. |
|
220 |
#' Please note that this function |
|
221 |
#' requires that a `vars` object, as created by [rbmi::set_vars()], is provided via |
|
222 |
#' the `vars` argument e.g. `rbmi_analyse(imputeObj, vars = rbmi::set_vars(...))`. Please |
|
223 |
#' see the documentation for [rbmi_ancova()] for full details. |
|
224 |
#' Please also note that the theoretical justification for the conditional mean imputation |
|
225 |
#' method (`method = method_condmean()` in [rbmi::draws()]) relies on the fact that ANCOVA is |
|
226 |
#' a linear transformation of the outcomes. |
|
227 |
#' Thus care is required when applying alternative analysis functions in this setting. |
|
228 |
#' |
|
229 |
#' The `delta` argument can be used to specify offsets to be applied |
|
230 |
#' to the outcome variable in the imputed datasets prior to the analysis. |
|
231 |
#' This is typically used for sensitivity or tipping point analyses. The |
|
232 |
#' delta dataset must contain columns `vars$subjid`, `vars$visit` (as specified |
|
233 |
#' in the original call to [rbmi::draws()]) and `delta`. Essentially this `data.frame` |
|
234 |
#' is merged onto the imputed dataset by `vars$subjid` and `vars$visit` and then |
|
235 |
#' the outcome variable is modified by: |
|
236 |
#' |
|
237 |
#' ``` |
|
238 |
#' imputed_data[[vars$outcome]] <- imputed_data[[vars$outcome]] + imputed_data[['delta']] |
|
239 |
#' ``` |
|
240 |
#' |
|
241 |
#' Please note that in order to provide maximum flexibility, the `delta` argument |
|
242 |
#' can be used to modify any/all outcome values including those that were not |
|
243 |
#' imputed. Care must be taken when defining offsets. It is recommend that you |
|
244 |
#' use the helper function [rbmi::delta_template()] to define the delta datasets as |
|
245 |
#' this provides utility variables such as `is_missing` which can be used to identify |
|
246 |
#' exactly which visits have been imputed. |
|
247 |
#' |
|
248 |
#' @seealso [rbmi::extract_imputed_dfs()] for manually extracting imputed |
|
249 |
#' datasets. |
|
250 |
#' @seealso [rbmi::delta_template()] for creating delta data.frames. |
|
251 |
#' @seealso [rbmi_ancova()] for the default analysis function. |
|
252 |
#' |
|
253 |
#' @param imputations An `imputations` object as created by [rbmi::impute()]. |
|
254 |
#' @param fun An analysis function to be applied to each imputed dataset. See details. |
|
255 |
#' @param delta A `data.frame` containing the delta transformation to be applied to the imputed |
|
256 |
#' datasets prior to running `fun`. See details. |
|
257 |
#' @param ... Additional arguments passed onto `fun`. |
|
258 |
#' @param cluster_or_cores The number of parallel processes to use when running this function. Can also be a |
|
259 |
#' cluster object created by [`make_rbmi_cluster()`]. See the parallelisation section below. |
|
260 |
#' @param .validate Should `imputations` be checked to ensure it conforms to the required format |
|
261 |
#' (default = `TRUE`) ? Can gain a small performance increase if this is set to `FALSE` when |
|
262 |
#' analysing a large number of samples. |
|
263 |
#' |
|
264 |
#' @section Parallelisation: |
|
265 |
#' To speed up the evaluation of `rbmi_analyse()` you can use the `cluster_or_cores` argument to enable parallelisation. |
|
266 |
#' Simply providing an integer will get `rbmi` to automatically spawn that many background processes |
|
267 |
#' to parallelise across. If you are using a custom analysis function then you need to ensure |
|
268 |
#' that any libraries or global objects required by your function are available in the |
|
269 |
#' sub-processes. To do this you need to use the [`make_rbmi_cluster()`] function for example: |
|
270 |
#' ``` |
|
271 |
#' my_custom_fun <- function(...) <some analysis code> |
|
272 |
#' cl <- make_rbmi_cluster( |
|
273 |
#' 4, |
|
274 |
#' objects = list('my_custom_fun' = my_custom_fun), |
|
275 |
#' packages = c('dplyr', 'nlme') |
|
276 |
#' ) |
|
277 |
#' rbmi_analyse( |
|
278 |
#' imputations = imputeObj, |
|
279 |
#' fun = my_custom_fun, |
|
280 |
#' cluster_or_cores = cl |
|
281 |
#' ) |
|
282 |
#' parallel::stopCluster(cl) |
|
283 |
#' ``` |
|
284 |
#' |
|
285 |
#' Note that there is significant overhead both with setting up the sub-processes and with |
|
286 |
#' transferring data back-and-forth between the main process and the sub-processes. As such |
|
287 |
#' parallelisation of the `rbmi_analyse()` function tends to only be worth it when you have |
|
288 |
#' `> 2000` samples generated by [rbmi::draws()]. Conversely using parallelisation if your samples |
|
289 |
#' are smaller than this may lead to longer run times than just running it sequentially. |
|
290 |
#' |
|
291 |
#' It is important to note that the implementation of parallel processing within [rbmi::analyse()`] has |
|
292 |
#' been optimised around the assumption that the parallel processes will be spawned on the same |
|
293 |
#' machine and not a remote cluster. One such optimisation is that the required data is saved to |
|
294 |
#' a temporary file on the local disk from which it is then read into each sub-process. This is |
|
295 |
#' done to avoid the overhead of transferring the data over the network. Our assumption is that |
|
296 |
#' if you are at the stage where you need to be parallelising your analysis over a remote cluster |
|
297 |
#' then you would likely be better off parallelising across multiple `rbmi` runs rather than within |
|
298 |
#' a single `rbmi` run. |
|
299 |
#' |
|
300 |
#' Finally, if you are doing a tipping point analysis you can get a reasonable performance |
|
301 |
#' improvement by re-using the cluster between each call to `rbmi_analyse()` e.g. |
|
302 |
#' ``` |
|
303 |
#' cl <- make_rbmi_cluster(4) |
|
304 |
#' ana_1 <- rbmi_analyse( |
|
305 |
#' imputations = imputeObj, |
|
306 |
#' delta = delta_plan_1, |
|
307 |
#' cluster_or_cores = cl |
|
308 |
#' ) |
|
309 |
#' ana_2 <- rbmi_analyse( |
|
310 |
#' imputations = imputeObj, |
|
311 |
#' delta = delta_plan_2, |
|
312 |
#' cluster_or_cores = cl |
|
313 |
#' ) |
|
314 |
#' ana_3 <- rbmi_analyse( |
|
315 |
#' imputations = imputeObj, |
|
316 |
#' delta = delta_plan_3, |
|
317 |
#' cluster_or_cores = cl |
|
318 |
#' ) |
|
319 |
#' parallel::clusterStop(cl) |
|
320 |
#' ``` |
|
321 |
#' |
|
322 |
#' @return An `analysis` object, as defined by `rbmi`, representing the desired |
|
323 |
#' analysis applied to each of the imputed datasets in `imputations`. |
|
324 |
#' @examples |
|
325 |
#' library(rbmi) |
|
326 |
#' library(dplyr) |
|
327 |
#' |
|
328 |
#' dat <- antidepressant_data |
|
329 |
#' dat$GENDER <- as.factor(dat$GENDER) |
|
330 |
#' dat$POOLINV <- as.factor(dat$POOLINV) |
|
331 |
#' set.seed(123) |
|
332 |
#' pat_ids <- sample(levels(dat$PATIENT), nlevels(dat$PATIENT) / 4) |
|
333 |
#' dat <- dat |> |
|
334 |
#' filter(PATIENT %in% pat_ids) |> |
|
335 |
#' droplevels() |
|
336 |
#' dat <- expand_locf( |
|
337 |
#' dat, |
|
338 |
#' PATIENT = levels(dat$PATIENT), |
|
339 |
#' VISIT = levels(dat$VISIT), |
|
340 |
#' vars = c("BASVAL", "THERAPY"), |
|
341 |
#' group = c("PATIENT"), |
|
342 |
#' order = c("PATIENT", "VISIT") |
|
343 |
#' ) |
|
344 |
#' dat_ice <- dat |> |
|
345 |
#' arrange(PATIENT, VISIT) |> |
|
346 |
#' filter(is.na(CHANGE)) |> |
|
347 |
#' group_by(PATIENT) |> |
|
348 |
#' slice(1) |> |
|
349 |
#' ungroup() |> |
|
350 |
#' select(PATIENT, VISIT) |> |
|
351 |
#' mutate(strategy = "JR") |
|
352 |
#' dat_ice <- dat_ice[-which(dat_ice$PATIENT == 3618), ] |
|
353 |
#' vars <- set_vars( |
|
354 |
#' outcome = "CHANGE", |
|
355 |
#' visit = "VISIT", |
|
356 |
#' subjid = "PATIENT", |
|
357 |
#' group = "THERAPY", |
|
358 |
#' covariates = c("THERAPY") |
|
359 |
#' ) |
|
360 |
#' drawObj <- draws( |
|
361 |
#' data = dat, |
|
362 |
#' data_ice = dat_ice, |
|
363 |
#' vars = vars, |
|
364 |
#' method = method_condmean(type = "jackknife", covariance = "csh"), |
|
365 |
#' quiet = TRUE |
|
366 |
#' ) |
|
367 |
#' references <- c("DRUG" = "PLACEBO", "PLACEBO" = "PLACEBO") |
|
368 |
#' imputeObj <- impute(drawObj, references) |
|
369 |
#' |
|
370 |
#' rbmi_analyse(imputations = imputeObj, vars = vars) |
|
371 |
#' @export |
|
372 |
rbmi_analyse <- function(imputations, fun = rbmi_ancova, delta = NULL, ..., cluster_or_cores = 1, .validate = TRUE) { |
|
373 |
# nocov |
|
374 | ||
375 | ! |
if (.validate) rbmi::validate(imputations) |
376 | ||
377 | ! |
assertthat::assert_that(is.function(fun), msg = "`fun` must be a function") |
378 | ||
379 | ! |
assertthat::assert_that(is.null(delta) | is.data.frame(delta), msg = "`delta` must be NULL or a data.frame") |
380 | ||
381 | ! |
vars <- imputations$data$vars |
382 | ||
383 | ! |
if (.validate) devnull <- lapply(imputations$imputations, function(x) rbmi::validate(x)) |
384 | ||
385 | ! |
if (!is.null(delta)) { |
386 | ! |
expected_vars <- c(vars$subjid, vars$visit, "delta") |
387 | ! |
assertthat::assert_that( |
388 | ! |
all(expected_vars %in% names(delta)), |
389 | ! |
msg = sprintf("The following variables must exist witin `delta`: `%s`", paste0(expected_vars, collapse = "`, `")) |
390 |
) |
|
391 |
} |
|
392 | ||
393 |
# Mangle name to avoid any conflicts with user defined objects if running in a cluster |
|
394 | ! |
..rbmi..analysis..imputations <- imputations |
395 | ! |
..rbmi..analysis..delta <- delta |
396 | ! |
..rbmi..analysis..fun <- fun |
397 | ! |
objects <- list( |
398 | ! |
..rbmi..analysis..imputations = ..rbmi..analysis..imputations, |
399 | ! |
..rbmi..analysis..delta = ..rbmi..analysis..delta, |
400 | ! |
..rbmi..analysis..fun = ..rbmi..analysis..fun |
401 |
) |
|
402 | ||
403 | ! |
cl <- make_rbmi_cluster(cluster_or_cores) |
404 | ||
405 | ! |
if (methods::is(cl, "cluster")) { |
406 | ! |
..rbmi..analysis..data..path <- tempfile() |
407 | ! |
saveRDS(objects, file = ..rbmi..analysis..data..path, compress = FALSE) |
408 | ! |
devnull <- parallel::clusterExport(cl, "..rbmi..analysis..data..path", environment()) |
409 | ! |
devnull <- parallel::clusterEvalQ(cl, { |
410 | ! |
..rbmi..analysis..objects <- readRDS(..rbmi..analysis..data..path) |
411 | ! |
list2env(..rbmi..analysis..objects, envir = environment()) |
412 |
}) |
|
413 |
} |
|
414 | ||
415 |
# If the user provided the clusters object directly then do not close it on completion |
|
416 | ! |
if (!methods::is(cluster_or_cores, "cluster")) { |
417 | ! |
on.exit( |
418 |
{ |
|
419 | ! |
if (!is.null(cl)) parallel::stopCluster(cl) |
420 |
}, |
|
421 | ! |
add = TRUE, |
422 | ! |
after = FALSE |
423 |
) |
|
424 |
} |
|
425 | ||
426 |
# Chunk up requests for significant speed improvement when running in parallel |
|
427 | ! |
number_of_cores <- ifelse(is.null(cl), 1, length(cl)) |
428 | ! |
indexes <- seq_along(imputations$imputations) |
429 | ! |
indexes_split <- split(indexes, (indexes %% number_of_cores) + 1) |
430 | ||
431 | ! |
results <- par_lapply( |
432 | ! |
cl, |
433 | ! |
function(indicies, ...) { |
434 | ! |
inner_fun <- function(idx, ...) { |
435 | ! |
dat2 <- (utils::getFromNamespace("extract_imputed_df", "rbmi"))( |
436 | ! |
..rbmi..analysis..imputations$imputations[[idx]], |
437 | ! |
..rbmi..analysis..imputations$data, |
438 | ! |
..rbmi..analysis..delta |
439 |
) |
|
440 | ! |
..rbmi..analysis..fun(dat2, ...) |
441 |
} |
|
442 | ! |
lapply(indicies, inner_fun, ...) |
443 |
}, |
|
444 | ! |
indexes_split, |
445 |
... |
|
446 |
) |> |
|
447 | ! |
unlist(recursive = FALSE, use.names = FALSE) |
448 | ||
449 |
# Re-order to ensure results are returned in same order as imputations |
|
450 | ! |
results <- results[order(unlist(indexes_split, use.names = FALSE))] |
451 | ! |
names(results) <- NULL |
452 | ||
453 | ! |
fun_name <- deparse(substitute(fun)) |
454 | ! |
if (length(fun_name) > 1) { |
455 | ! |
fun_name <- "<Anonymous Function>" |
456 | ! |
} else if (is.null(fun_name)) { |
457 | ! |
fun_name <- "<NULL>" |
458 |
} |
|
459 | ||
460 | ! |
ret <- (utils::getFromNamespace("as_analysis", "rbmi"))( |
461 | ! |
results = results, |
462 | ! |
fun_name = fun_name, |
463 | ! |
delta = delta, |
464 | ! |
fun = fun, |
465 | ! |
method = imputations$method |
466 |
) |
|
467 | ! |
rbmi::validate(ret) |
468 | ! |
return(ret) |
469 |
} |
1 |
#' Tabulation for Exposure Tables |
|
2 |
#' |
|
3 |
#' |
|
4 |
#' @details |
|
5 |
#' Creates statistics needed for standard exposure table. |
|
6 |
#' This includes differences and 95% CI and total treatment years. |
|
7 |
#' This is designed to be used as an analysis (afun in `analyze`) function. |
|
8 |
#' |
|
9 |
#' @name a_summarize_ex_j |
|
10 |
NULL |
|
11 | ||
12 | ||
13 |
#' @inheritParams proposal_argument_convention |
|
14 |
#' @describeIn a_summarize_ex_j Statistics function needed for the exposure tables. |
|
15 |
#' |
|
16 |
#' @param daysconv (`numeric`)\cr conversion required to get the values into days |
|
17 |
#' (i.e 1 if original PARAMCD unit is days, 30.4375 if original PARAMCD unit is in months) |
|
18 |
#' @param ancova (`logical`)\cr If FALSE, only descriptive methods will be used. \cr |
|
19 |
#' If TRUE, ANCOVA methods will be used for each of the columns : AVAL, CHG, DIFF. \cr |
|
20 |
#' @param comp_btw_group (`logical`)\cr If TRUE, comparison between groups will be performed. |
|
21 |
#' \cr When ancova = FALSE, the estimate of between group difference (on CHG) will be based upon two-sample t-test. |
|
22 |
#' \cr When ancova = TRUE, the same ANCOVA model will be used for the estimate of between group difference (on CHG). |
|
23 |
#' @param interaction_y (`character`)\cr Will be passed onto the `tern` function `s_ancova`, when ancova = TRUE. |
|
24 |
#' @param interaction_item (`character`)\cr Will be passed onto the `tern` function `s_ancova`, when ancova = TRUE. |
|
25 |
#' @param conf_level (`proportion`)\cr Confidence level of the interval |
|
26 |
#' @param variables (named list of strings)\cr |
|
27 |
#' list of additional analysis variables, with expected elements: |
|
28 |
#' * arm (string)\cr |
|
29 |
#' group variable, for which the covariate adjusted means of multiple groups will be summarized. |
|
30 |
#' Specifically, the first level of arm variable is taken as the reference group. |
|
31 |
#' * covariates (character)\cr |
|
32 |
#' a vector that can contain single variable names (such as 'X1'), and/or interaction terms indicated by 'X1 * X2'. |
|
33 |
s_summarize_ex_j <- function( |
|
34 |
df, |
|
35 |
.var, |
|
36 |
.df_row, |
|
37 |
.spl_context, |
|
38 |
comp_btw_group = TRUE, |
|
39 |
ref_path = NULL, |
|
40 |
ancova = FALSE, |
|
41 |
interaction_y, |
|
42 |
interaction_item, |
|
43 |
conf_level, |
|
44 |
daysconv, |
|
45 |
variables) { |
|
46 | 5x |
control <- control_analyze_vars() |
47 | 5x |
control$conf_level <- conf_level |
48 | 5x |
x_stats <- s_summary(df[[.var]], na.rm = TRUE, .var, control = control) |
49 |
## add extra for subject years |
|
50 | 5x |
subj_years <- x_stats[["sum"]] * daysconv / 365.25 |
51 | 5x |
x_stats[["total_subject_years"]] <- c(x_stats[["sum"]], subj_years) |
52 | 5x |
names(x_stats[["total_subject_years"]]) <- c("total", "subject_years") |
53 | ||
54 | 5x |
cur_col_id <- .spl_context$cur_col_id[[length(.spl_context$split)]] |
55 | 5x |
indiffcol <- grepl("difference", tolower(cur_col_id), fixed = TRUE) |
56 | ||
57 | 5x |
if (indiffcol) { |
58 |
# blank out all stats |
|
59 | 2x |
x_stats <- sapply( |
60 | 2x |
names(x_stats), |
61 | 2x |
FUN = function(x) { |
62 | 60x |
x_stats[[x]] <- NULL |
63 |
}, |
|
64 | 2x |
simplify = FALSE, |
65 | 2x |
USE.NAMES = TRUE |
66 |
) |
|
67 |
# diff between group will be updated in mean_sd stat |
|
68 | 2x |
if (comp_btw_group) { |
69 | 2x |
trt_var_refpath <- h_get_trtvar_refpath(ref_path, .spl_context, df) |
70 |
# trt_var_refpath is list with elements trt_var trt_var_refspec cur_trt_grp ctrl_grp make these elements |
|
71 |
# available in current environment |
|
72 | 2x |
trt_var <- trt_var_refpath$trt_var |
73 | 2x |
trt_var_refspec <- trt_var_refpath$trt_var_refspec |
74 | 2x |
cur_trt_grp <- trt_var_refpath$cur_trt_grp |
75 | 2x |
ctrl_grp <- trt_var_refpath$ctrl_grp |
76 | ||
77 | 2x |
.in_ref_col <- FALSE |
78 | ! |
if (trt_var == ctrl_grp) .in_ref_col <- TRUE |
79 | ||
80 | 2x |
.ref_group <- .df_row[.df_row[[trt_var]] == ctrl_grp, ] |
81 | ||
82 | 2x |
if (ancova) { |
83 |
# ancova method for diff between group |
|
84 | 2x |
x_stats2 <- s_summarize_ancova_j( |
85 | 2x |
df = df, |
86 | 2x |
.var = .var, |
87 | 2x |
.ref_group = .ref_group, |
88 | 2x |
.in_ref_col = .in_ref_col, |
89 | 2x |
.df_row = .df_row, |
90 | 2x |
conf_level = conf_level, |
91 | 2x |
interaction_y = interaction_y, |
92 | 2x |
interaction_item = interaction_item, |
93 | 2x |
variables = variables |
94 |
) |
|
95 | 2x |
diffstat <- x_stats2[["lsmean_diffci"]] |
96 |
} else { |
|
97 |
# descriptive method for diff between group |
|
98 | ! |
x_stats2 <- s_summarize_desc_j( |
99 | ! |
df = df, |
100 | ! |
.var = .var, |
101 | ! |
.ref_group = .ref_group, |
102 | ! |
.in_ref_col = .in_ref_col, |
103 | ! |
control = control |
104 |
) |
|
105 | ! |
diffstat <- x_stats2[["mean_diffci"]] |
106 |
} |
|
107 |
# actual update with the diffstat |
|
108 | 2x |
x_stats[["mean_sd"]] <- diffstat |
109 |
} |
|
110 |
} |
|
111 | ||
112 | 5x |
return(x_stats) |
113 |
} |
|
114 | ||
115 |
#' @title Analysis Function For Exposure Tables |
|
116 |
#' @description |
|
117 |
#' A function to create the appropriate statistics needed for exposure table |
|
118 |
#' @inheritParams proposal_argument_convention |
|
119 |
#' |
|
120 |
#' @describeIn a_summarize_ex_j Formatted analysis function which is used as `afun`. |
|
121 |
#' |
|
122 |
#' @return |
|
123 |
#' * `a_summarize_ex_j()` returns the corresponding list with formatted [rtables::CellValue()]. |
|
124 |
#' |
|
125 |
#' @aliases a_summarize_ex_j |
|
126 |
#' @examples |
|
127 |
#' library(dplyr) |
|
128 |
#' ADEX <- ex_adsl %>% select(USUBJID, ARM, TRTSDTM, EOSSTT, EOSDY) |
|
129 |
#' |
|
130 |
#' trtvar <- "ARM" |
|
131 |
#' ctrl_grp <- "B: Placebo" |
|
132 |
#' cutoffd <- as.Date("2023-09-24") |
|
133 |
#' |
|
134 |
#' ADEX <- ADEX |> |
|
135 |
#' create_colspan_var( |
|
136 |
#' non_active_grp = ctrl_grp, |
|
137 |
#' non_active_grp_span_lbl = " ", |
|
138 |
#' active_grp_span_lbl = "Active Study Agent", |
|
139 |
#' colspan_var = "colspan_trt", |
|
140 |
#' trt_var = trtvar |
|
141 |
#' ) |> |
|
142 |
#' mutate( |
|
143 |
#' diff_header = "Difference in Means (95% CI)", |
|
144 |
#' diff_label = paste(!!rlang::sym(trtvar), "vs", ctrl_grp), |
|
145 |
#' TRTDURY = case_when( |
|
146 |
#' !is.na(EOSDY) ~ EOSDY, |
|
147 |
#' TRUE ~ as.integer(cutoffd - as.Date(TRTSDTM) + 1) |
|
148 |
#' ) |
|
149 |
#' ) |
|
150 |
#' |
|
151 |
#' colspan_trt_map <- create_colspan_map(ADEX, |
|
152 |
#' non_active_grp = ctrl_grp, |
|
153 |
#' non_active_grp_span_lbl = " ", |
|
154 |
#' active_grp_span_lbl = "Active Study Agent", |
|
155 |
#' colspan_var = "colspan_trt", |
|
156 |
#' trt_var = trtvar |
|
157 |
#' ) |
|
158 |
#' |
|
159 |
#' ref_path <- c("colspan_trt", "", trtvar, ctrl_grp) |
|
160 |
#' |
|
161 |
#' lyt <- basic_table() |> |
|
162 |
#' split_cols_by( |
|
163 |
#' "colspan_trt", |
|
164 |
#' split_fun = trim_levels_to_map(map = colspan_trt_map) |
|
165 |
#' ) |> |
|
166 |
#' split_cols_by(trtvar) |> |
|
167 |
#' split_cols_by("diff_header", nested = FALSE) |> |
|
168 |
#' split_cols_by( |
|
169 |
#' trtvar, |
|
170 |
#' split_fun = remove_split_levels(ctrl_grp), |
|
171 |
#' labels_var = "diff_label" |
|
172 |
#' ) |> |
|
173 |
#' analyze("EOSDY", |
|
174 |
#' afun = a_summarize_ex_j, var_labels = "Duration of treatment (Days)", |
|
175 |
#' show_labels = "visible", |
|
176 |
#' indent_mod = 0L, |
|
177 |
#' extra_args = list( |
|
178 |
#' daysconv = 1, |
|
179 |
#' ref_path = ref_path, |
|
180 |
#' variables = list(arm = trtvar, covariates = NULL), |
|
181 |
#' ancova = TRUE, |
|
182 |
#' comp_btw_group = TRUE |
|
183 |
#' ) |
|
184 |
#' ) |
|
185 |
#' |
|
186 |
#' result <- build_table(lyt, ADEX, alt_counts_df = ADEX) |
|
187 |
#' result |
|
188 |
#' @export |
|
189 |
a_summarize_ex_j <- function( |
|
190 |
df, |
|
191 |
.var, |
|
192 |
.df_row, |
|
193 |
.spl_context, |
|
194 |
comp_btw_group = TRUE, |
|
195 |
ref_path = NULL, |
|
196 |
ancova = FALSE, |
|
197 |
interaction_y = FALSE, |
|
198 |
interaction_item = NULL, |
|
199 |
conf_level = 0.95, |
|
200 |
variables, |
|
201 |
.stats = c("mean_sd", "median", "range", "quantiles", "total_subject_years"), |
|
202 |
.formats = c(diff_mean_est_ci = jjcsformat_xx("xx.xx (xx.xx, xx.xx)")), |
|
203 |
.labels = c(quantiles = "Interquartile range"), |
|
204 |
.indent_mods = NULL, |
|
205 |
na_str = rep("NA", 3), |
|
206 |
daysconv = 1) { |
|
207 | 5x |
if (!is.numeric(df[[.var]])) { |
208 | ! |
stop("a_summarize_ex_j issue: input variable must be numeric.") |
209 |
} |
|
210 | ||
211 | 5x |
if (comp_btw_group && is.null(ref_path)) { |
212 | ! |
stop("a_summarize_ex_j issue: argument ref_path cannot be NULL.") |
213 |
} |
|
214 | ||
215 | 5x |
if (comp_btw_group && ancova && is.null(variables)) { |
216 | ! |
stop("a_summarize_ex_j issue: argument variables must be defined when ancova is requested.") |
217 |
} |
|
218 | ||
219 | 5x |
x_stats <- s_summarize_ex_j( |
220 | 5x |
df = df, |
221 | 5x |
.var = .var, |
222 | 5x |
.df_row = .df_row, |
223 | 5x |
.spl_context = .spl_context, |
224 | 5x |
comp_btw_group = comp_btw_group, |
225 | 5x |
ref_path = ref_path, |
226 | 5x |
ancova = ancova, |
227 | 5x |
interaction_y = interaction_y, |
228 | 5x |
interaction_item = interaction_item, |
229 | 5x |
conf_level = conf_level, |
230 | 5x |
daysconv = daysconv, |
231 | 5x |
variables = variables |
232 |
) |
|
233 | ||
234 |
# Fill in formatting defaults |
|
235 | 5x |
.stats_in <- .stats |
236 | 5x |
.stats <- tern_get_stats("analyze_vars_numeric", stats_in = .stats, custom_stats_in = NULL) |
237 | 5x |
if ("total_subject_years" %in% .stats_in) { |
238 |
# place the extra statistic at the appropriate place within .stats vector |
|
239 | 5x |
i <- match("total_subject_years", .stats_in) |
240 | 5x |
x <- .stats_in[i:length(.stats_in)] |
241 | 5x |
if (length(x) == 1) { |
242 | 5x |
.stats <- c(.stats, "total_subject_years") |
243 |
} else { |
|
244 | ! |
i2 <- min(match(x, .stats), na.rm = TRUE) |
245 | ! |
if (i2 == 1) { |
246 | ! |
.stats <- c("total_subject_years", .stats) |
247 |
} else { |
|
248 | ! |
.stats <- c(.stats[1:(i2 - 1)], "total_subject_years", .stats[i2:length(.stats)]) |
249 |
} |
|
250 |
} |
|
251 |
} |
|
252 | ||
253 | 5x |
.stats_ext <- c(.stats, "diff_mean_est_ci") |
254 | ||
255 | 5x |
.formats <- junco_get_formats_from_stats(.stats_ext, .formats) |
256 | 5x |
.labels <- junco_get_labels_from_stats(.stats, .labels, label_attr_from_stats = get_label_attr_from_stats(x_stats)) |
257 | 5x |
.indent_mods <- junco_get_indents_from_stats(.stats, .indent_mods) |
258 | ||
259 | 5x |
.names <- names(.labels) |
260 | 5x |
.labels <- .unlist_keep_nulls(.labels) |
261 | 5x |
.indent_mods <- .unlist_keep_nulls(.indent_mods) |
262 | ||
263 | 5x |
cur_col_id <- .spl_context$cur_col_id[[length(.spl_context$split)]] |
264 | 5x |
indiffcol <- grepl("difference", tolower(cur_col_id), fixed = TRUE) |
265 | ||
266 | 5x |
if (indiffcol && comp_btw_group) { |
267 | 2x |
.formats[["mean_sd"]] <- .formats[["diff_mean_est_ci"]] |
268 |
} |
|
269 | 5x |
.formats[["diff_mean_est_ci"]] <- NULL |
270 | ||
271 | 5x |
if (!is.null(na_str)) { |
272 | 5x |
.format_na_strs <- lapply(names(.formats), FUN = function(x) { |
273 | 25x |
na_str |
274 |
}) |
|
275 |
} else { |
|
276 | ! |
.format_na_strs <- NULL |
277 |
} |
|
278 | ||
279 | 5x |
x_stats <- x_stats[.stats] |
280 | 5x |
ret <- in_rows( |
281 | 5x |
.list = x_stats, |
282 | 5x |
.formats = .formats, |
283 | 5x |
.names = .names, |
284 | 5x |
.labels = .labels, |
285 | 5x |
.indent_mods = .indent_mods, |
286 | 5x |
.format_na_strs = .format_na_strs |
287 |
) |
|
288 | 5x |
return(ret) |
289 |
} |
1 |
#' Adding Labels To Variables For Model |
|
2 |
#' |
|
3 |
#' @param vars (`list`)\cr variables to use. |
|
4 |
#' @param data (`data.frame`)\cr data to use. |
|
5 |
#' @param x (`character`)\cr an element in vars. |
|
6 |
#' |
|
7 |
#' @name labels |
|
8 |
#' @keywords internal |
|
9 |
NULL |
|
10 | ||
11 |
#' @describeIn labels checks if element in `vars` is not `NULL` and not empty. |
|
12 |
h_is_specified <- function(x, vars) { |
|
13 | 112x |
!is.null(vars[[x]]) && (length(vars[[x]]) > 0) |
14 |
} |
|
15 | ||
16 |
#' @describeIn labels checks if element in vars is not NULL and exists in dataset. |
|
17 |
h_is_specified_and_in_data <- function(x, vars, data) { |
|
18 | 70x |
h_is_specified(x, vars) && all(vars[[x]] %in% names(data)) |
19 |
} |
|
20 | ||
21 |
#' @describeIn labels gets label for each element in vars. |
|
22 |
h_check_and_get_label <- function(x, vars, data) { |
|
23 | 70x |
checkmate::assert_true(h_is_specified_and_in_data(x, vars, data)) |
24 | 70x |
res <- NULL |
25 | 70x |
for (v in vars[[x]]) { |
26 | 84x |
label <- attr(data[[v]], "label") |
27 | 84x |
string <- ifelse(!is.null(label), label, v) |
28 | 84x |
res <- c(res, stats::setNames(string, v)) |
29 |
} |
|
30 | 70x |
res |
31 |
} |
|
32 | ||
33 |
#' Extraction of Covariate Parts from Character Vector |
|
34 |
#' |
|
35 |
#' @param covariates (`character`)\cr specification in the usual way, see examples. |
|
36 |
#' |
|
37 |
#' @return Character vector of the covariates involved in `covariates` specification. |
|
38 |
#' @keywords internal |
|
39 |
h_get_covariate_parts <- function(covariates) { |
|
40 | 14x |
checkmate::assert_character(covariates, null.ok = TRUE) |
41 | 14x |
if (is.null(covariates)) { |
42 | ! |
NULL |
43 |
} else { |
|
44 | 14x |
unique(unlist(strsplit(covariates, split = "\\*|:"))) |
45 |
} |
|
46 |
} |
|
47 | ||
48 |
#' @describeIn labels returns the list of variables with labels. |
|
49 |
h_labels <- function(vars, data) { |
|
50 | 14x |
checkmate::assert_list(vars) |
51 | 14x |
checkmate::assert_data_frame(data) |
52 | 14x |
labels <- list() |
53 | 14x |
labels$response <- h_check_and_get_label("response", vars, data) |
54 | 14x |
labels$id <- h_check_and_get_label("id", vars, data) |
55 | 14x |
labels$visit <- h_check_and_get_label("visit", vars, data) |
56 | 14x |
if (h_is_specified("arm", vars)) { |
57 | 14x |
labels$arm <- h_check_and_get_label("arm", vars, data) |
58 |
} |
|
59 | 14x |
if (h_is_specified("covariates", vars)) { |
60 | 14x |
vars$parts <- h_get_covariate_parts(vars$covariates) |
61 | 14x |
labels$parts <- h_check_and_get_label("parts", vars, data) |
62 |
} |
|
63 | 14x |
if (h_is_specified("weights", vars)) { |
64 | ! |
labels$weights <- h_check_and_get_label("weights", vars, data) |
65 |
} |
|
66 | 14x |
return(labels) |
67 |
} |
|
68 | ||
69 |
#' Building Model Formula |
|
70 |
#' |
|
71 |
#' This builds the model formula which is used inside [fit_mmrm_j()] and provided |
|
72 |
#' to [mmrm::mmrm()] internally. It can be instructive to look at the resulting |
|
73 |
#' formula directly sometimes. |
|
74 |
#' |
|
75 |
#' @param vars (`list`)\cr variables to use in the model. |
|
76 |
#' @param cor_struct (`string`)\cr specify the covariance structure to use. |
|
77 |
#' @return Formula to use in [mmrm::mmrm()]. |
|
78 |
#' @export |
|
79 |
#' |
|
80 |
#' @examples |
|
81 |
#' vars <- list( |
|
82 |
#' response = "AVAL", covariates = c("RACE", "SEX"), |
|
83 |
#' id = "USUBJID", arm = "ARMCD", visit = "AVISIT" |
|
84 |
#' ) |
|
85 |
#' build_formula(vars, "auto-regressive") |
|
86 |
#' build_formula(vars) |
|
87 |
build_formula <- function( |
|
88 |
vars, |
|
89 |
cor_struct = c( |
|
90 |
"unstructured", |
|
91 |
"toeplitz", |
|
92 |
"heterogeneous toeplitz", |
|
93 |
"ante-dependence", |
|
94 |
"heterogeneous ante-dependence", |
|
95 |
"auto-regressive", |
|
96 |
"heterogeneous auto-regressive", |
|
97 |
"compound symmetry", |
|
98 |
"heterogeneous compound symmetry" |
|
99 |
)) { |
|
100 | 9x |
checkmate::assert_list(vars) |
101 | 9x |
cor_struct <- match.arg(cor_struct) |
102 | 9x |
covariates_part <- paste(vars$covariates, collapse = " + ") |
103 | 9x |
arm_visit_part <- if (is.null(vars$arm)) { |
104 | ! |
vars$visit |
105 |
} else { |
|
106 | 9x |
paste0(vars$arm, "*", vars$visit) |
107 |
} |
|
108 | 9x |
random_effects_fun <- switch(cor_struct, |
109 | 9x |
unstructured = "us", |
110 | 9x |
toeplitz = "toep", |
111 | 9x |
`heterogeneous toeplitz` = "toeph", |
112 | 9x |
`ante-dependence` = "ad", |
113 | 9x |
`heterogeneous ante-dependence` = "adh", |
114 | 9x |
`auto-regressive` = "ar1", |
115 | 9x |
`heterogeneous auto-regressive` = "ar1h", |
116 | 9x |
`compound symmetry` = "cs", |
117 | 9x |
`heterogeneous compound symmetry` = "csh" |
118 |
) |
|
119 | 9x |
random_effects_part <- paste0(random_effects_fun, "(", vars$visit, " | ", vars$id, ")") |
120 | 9x |
rhs_formula <- paste(arm_visit_part, "+", random_effects_part) |
121 | 9x |
if (covariates_part != "") { |
122 | 9x |
rhs_formula <- paste(covariates_part, "+", rhs_formula) |
123 |
} |
|
124 | 9x |
stats::as.formula(paste(vars$response, "~", rhs_formula)) |
125 |
} |
|
126 | ||
127 |
#' Extract Least Square Means from `MMRM` |
|
128 |
#' |
|
129 |
#' Extracts the least square means from an `MMRM` fit. |
|
130 |
#' |
|
131 |
#' @param fit (`mmrm`)\cr result of [mmrm::mmrm()]. |
|
132 |
#' @inheritParams fit_mmrm_j |
|
133 |
#' @param averages (`list`)\cr named list of visit levels which should be averaged |
|
134 |
#' and reported along side the single visits. |
|
135 |
#' @param weights (`string`)\cr type of weights to be used for the least square means, |
|
136 |
#' see [emmeans::emmeans()] for details. |
|
137 |
#' @return A list with data frames `estimates` and `contrasts`. |
|
138 |
#' The attributes `averages` and `weights` save the settings used. |
|
139 |
#' |
|
140 |
#' @export |
|
141 |
get_mmrm_lsmeans <- function(fit, vars, conf_level, weights, averages = list()) { |
|
142 | 10x |
checkmate::assert_class(fit, "mmrm") |
143 | 10x |
checkmate::assert_list(averages, types = "character") |
144 | 10x |
emmeans_res <- h_get_emmeans_res(fit, vars, weights) |
145 | ||
146 |
# Get least square means estimates for single visits, and possibly averaged visits. |
|
147 | 10x |
estimates <- h_get_single_visit_estimates(emmeans_res, conf_level) |
148 | 10x |
if (length(averages)) { |
149 | 4x |
average_specs <- h_get_average_visit_specs(emmeans_res, vars, averages, fit) |
150 | 4x |
average_estimates <- h_get_spec_visit_estimates(emmeans_res, average_specs, conf_level) |
151 | 4x |
estimates <- rbind(estimates, average_estimates) |
152 |
} |
|
153 | 10x |
has_arm_var <- !is.null(vars$arm) |
154 | 10x |
if (!has_arm_var) { |
155 | ! |
return(list(estimates = estimates)) |
156 |
} |
|
157 |
# Continue with contrasts when we have an arm variable. |
|
158 | 10x |
contrast_specs <- h_single_visit_contrast_specs(emmeans_res, vars) |
159 | 10x |
contrast_estimates <- h_get_spec_visit_estimates(emmeans_res, contrast_specs, conf_level, tests = TRUE) |
160 | 10x |
if (length(averages)) { |
161 | 4x |
average_contrast_specs <- h_average_visit_contrast_specs(contrast_specs, averages) |
162 | 4x |
average_contrasts <- h_get_spec_visit_estimates(emmeans_res, average_contrast_specs, conf_level, tests = TRUE) |
163 | 4x |
contrast_estimates <- rbind(contrast_estimates, average_contrasts) |
164 |
} |
|
165 | ||
166 | 10x |
relative_reduc_df <- h_get_relative_reduc_df(estimates, vars) |
167 | 10x |
contrast_estimates <- merge(contrast_estimates, relative_reduc_df, by = c(vars$arm, vars$visit), sort = FALSE) |
168 | 10x |
contrast_estimates[[vars$arm]] <- factor(contrast_estimates[[vars$arm]]) |
169 | 10x |
contrast_estimates[[vars$visit]] <- factor(contrast_estimates[[vars$visit]]) |
170 | 10x |
structure(list(estimates = estimates, contrasts = contrast_estimates), averages = averages, weights = weights) |
171 |
} |
|
172 | ||
173 |
#' `MMRM` Analysis |
|
174 |
#' |
|
175 |
#' Does the `MMRM` analysis. Multiple other functions can be called on the result to produce |
|
176 |
#' tables and graphs. |
|
177 |
#' |
|
178 |
#' @param vars (named `list` of `string` or `character`)\cr specifying the variables in the `MMRM`. |
|
179 |
#' The following elements need to be included as character vectors and match corresponding columns |
|
180 |
#' in `data`: |
|
181 |
#' |
|
182 |
#' - `response`: the response variable. |
|
183 |
#' - `covariates`: the additional covariate terms (might also include interactions). |
|
184 |
#' - `id`: the subject ID variable. |
|
185 |
#' - `arm`: the treatment group variable (factor). |
|
186 |
#' - `visit`: the visit variable (factor). |
|
187 |
#' - `weights`: optional weights variable (if `NULL` or omitted then no weights will be used). |
|
188 |
#' |
|
189 |
#' Note that the main effects and interaction of `arm` and `visit` are by default |
|
190 |
#' included in the model. |
|
191 |
#' @param data (`data.frame`)\cr with all the variables specified in |
|
192 |
#' `vars`. Records with missing values in any independent variables |
|
193 |
#' will be excluded. |
|
194 |
#' @param conf_level (`proportion`)\cr confidence level of the interval. |
|
195 |
#' @param cor_struct (`string`)\cr specifying the covariance structure, defaults to |
|
196 |
#' `'unstructured'`. See the details. |
|
197 |
#' @param averages_emmeans (`list`)\cr optional named list of visit levels which should be averaged |
|
198 |
#' and reported along side the single visits. |
|
199 |
#' @param weights_emmeans (`string`)\cr argument from [emmeans::emmeans()], `'counterfactual'` by default. |
|
200 |
#' @param ... additional arguments for [mmrm::mmrm()], in particular `reml` and options listed in |
|
201 |
#' [mmrm::mmrm_control()]. |
|
202 |
#' |
|
203 |
#' @details Multiple different degree of freedom adjustments are available via the `method` argument |
|
204 |
#' for [mmrm::mmrm()]. In addition, covariance matrix adjustments are available via `vcov`. |
|
205 |
#' Please see [mmrm::mmrm_control()] for details and additional useful options. |
|
206 |
#' |
|
207 |
#' For the covariance structure (`cor_struct`), the user can choose among the following options. |
|
208 |
#' |
|
209 |
#' - `unstructured`: Unstructured covariance matrix. This is the most flexible choice and default. |
|
210 |
#' If there are `T` visits, then `T * (T+1) / 2` variance parameters are used. |
|
211 |
#' - `toeplitz`: Homogeneous Toeplitz covariance matrix, which uses `T` variance parameters. |
|
212 |
#' - `heterogeneous toeplitz`: Heterogeneous Toeplitz covariance matrix, |
|
213 |
#' which uses `2 * T - 1` variance parameters. |
|
214 |
#' - `ante-dependence`: Homogeneous Ante-Dependence covariance matrix, which uses `T` variance parameters. |
|
215 |
#' - `heterogeneous ante-dependence`: Heterogeneous Ante-Dependence covariance matrix, |
|
216 |
#' which uses `2 * T - 1` variance parameters. |
|
217 |
#' - `auto-regressive`: Homogeneous Auto-Regressive (order 1) covariance matrix, |
|
218 |
#' which uses 2 variance parameters. |
|
219 |
#' - `heterogeneous auto-regressive`: Heterogeneous Auto-Regressive (order 1) covariance matrix, |
|
220 |
#' which uses `T + 1` variance parameters. |
|
221 |
#' - `compound symmetry`: Homogeneous Compound Symmetry covariance matrix, which uses 2 |
|
222 |
#' variance parameters. |
|
223 |
#' - `heterogeneous compound symmetry`: Heterogeneous Compound Symmetry covariance matrix, which uses |
|
224 |
#' `T + 1` variance parameters. |
|
225 |
#' |
|
226 |
#' @return A `tern_model` object which is a list with model results: |
|
227 |
#' |
|
228 |
#' - `fit`: The `mmrm` object which was fitted to the data. Note that via `mmrm::component(fit, 'optimizer')` |
|
229 |
#' the finally used optimization algorithm can be obtained, which can be useful for refitting the model |
|
230 |
#' later on. |
|
231 |
#' - `cov_estimate`: The matrix with the covariance matrix estimate. |
|
232 |
#' - `diagnostics`: A list with model diagnostic statistics (REML criterion, AIC, corrected AIC, BIC). |
|
233 |
#' - `lsmeans`: This is a list with data frames `estimates` and `contrasts`. |
|
234 |
#' The attributes `averages` and `weights` save the settings used |
|
235 |
#' (`averages_emmeans` and `weights_emmeans`). |
|
236 |
#' - `vars`: The variable list. |
|
237 |
#' - `labels`: Corresponding list with variable labels extracted from `data`. |
|
238 |
#' - `cor_struct`: input. |
|
239 |
#' - `ref_level`: The reference level for the arm variable, which is always the first level. |
|
240 |
#' - `treatment_levels`: The treatment levels for the arm variable. |
|
241 |
#' - `conf_level`: The confidence level which was used to construct the `lsmeans` confidence intervals. |
|
242 |
#' - `additional`: List with any additional inputs passed via `...` |
|
243 |
#' |
|
244 |
#' @export |
|
245 |
#' |
|
246 |
#' @note This function has the `_j` suffix to distinguish it from [mmrm::fit_mmrm()]. |
|
247 |
#' It is a copy from the `tern.mmrm` package and later will be replaced by tern.mmrm::fit_mmrm(). |
|
248 |
#' No new features are included in this function here. |
|
249 |
#' |
|
250 |
#' @examples |
|
251 |
#' mmrm_results <- fit_mmrm_j( |
|
252 |
#' vars = list( |
|
253 |
#' response = "FEV1", |
|
254 |
#' covariates = c("RACE", "SEX"), |
|
255 |
#' id = "USUBJID", |
|
256 |
#' arm = "ARMCD", |
|
257 |
#' visit = "AVISIT" |
|
258 |
#' ), |
|
259 |
#' data = mmrm::fev_data, |
|
260 |
#' cor_struct = "unstructured", |
|
261 |
#' weights_emmeans = "equal", |
|
262 |
#' averages_emmeans = list( |
|
263 |
#' "VIS1+2" = c("VIS1", "VIS2") |
|
264 |
#' ) |
|
265 |
#' ) |
|
266 |
fit_mmrm_j <- function( |
|
267 |
vars = list(response = "AVAL", covariates = c(), id = "USUBJID", arm = "ARM", visit = "AVISIT"), |
|
268 |
data, |
|
269 |
conf_level = 0.95, |
|
270 |
cor_struct = "unstructured", |
|
271 |
weights_emmeans = "counterfactual", |
|
272 |
averages_emmeans = list(), |
|
273 |
...) { |
|
274 | 9x |
labels <- h_labels(vars, data) |
275 | 9x |
formula <- build_formula(vars, cor_struct) |
276 | 9x |
weights <- if (!is.null(vars$weights)) data[[vars$weights]] else NULL |
277 | ||
278 | 9x |
fit <- mmrm::mmrm(formula = formula, data = data, weights = weights, reml = TRUE, ...) |
279 | 9x |
lsmeans <- get_mmrm_lsmeans( |
280 | 9x |
fit = fit, |
281 | 9x |
vars = vars, |
282 | 9x |
conf_level = conf_level, |
283 | 9x |
averages = averages_emmeans, |
284 | 9x |
weights = weights_emmeans |
285 |
) |
|
286 | 9x |
cov_estimate <- mmrm::VarCorr(fit) |
287 | 9x |
visit_levels <- rownames(cov_estimate) |
288 | 9x |
contrasts_from_visits <- match(visit_levels, lsmeans$contrasts[[vars$visit]]) |
289 | 9x |
df <- stats::setNames(lsmeans$contrasts$df[contrasts_from_visits], visit_levels) |
290 | 9x |
results <- list( |
291 | 9x |
fit = fit, |
292 | 9x |
cov_estimate = cov_estimate, |
293 | 9x |
lsmeans = lsmeans, |
294 | 9x |
vars = vars, |
295 | 9x |
labels = labels, |
296 | 9x |
mse = diag(cov_estimate), |
297 | 9x |
df = df, |
298 | 9x |
cor_struct = cor_struct, |
299 | 9x |
ref_level = if (is.null(vars$arm)) NULL else levels(data[[vars$arm]])[1], |
300 | 9x |
treatment_levels = if (is.null(vars$arm)) NULL else levels(data[[vars$arm]])[-1], |
301 | 9x |
conf_level = conf_level, |
302 | 9x |
additional = list(...) |
303 |
) |
|
304 | 9x |
class(results) <- "tern_model" |
305 | 9x |
return(results) |
306 |
} |
1 |
#' @name remove_col_count |
|
2 |
#' |
|
3 |
#' @title Removal of Unwanted Column Counts |
|
4 |
#' |
|
5 |
#' @description |
|
6 |
#' Remove the N=xx column headers for specified span_label_var columns - default is 'rrisk_header |
|
7 |
#' @details This works for only the lowest level of column splitting (since colcounts is used) |
|
8 |
#' @param obj table tree object |
|
9 |
#' @param span_label_var the spanning header text variable value for which column headers will be removed from |
|
10 |
#' |
|
11 |
#' @return table tree object with column counts in specified columns removed |
|
12 |
#' @export |
|
13 |
#' |
|
14 |
remove_col_count <- function(obj, span_label_var = "rrisk_header") { |
|
15 |
## programatically figure out which ones we want |
|
16 | 1x |
unwanted_count <- function(pth) pth[1] == span_label_var |
17 | 1x |
to_blank <- sapply(col_paths(obj), unwanted_count) |
18 | 1x |
col_counts(obj)[to_blank] <- NA_integer_ |
19 | 1x |
return(obj) |
20 |
} |
1 |
h_get_eair_df <- function(levii, df, denom_df, .var, id, occ_var, occ_dy, fup_var) { |
|
2 | 210x |
dfii <- df[df[[.var]] == levii & !is.na(df[[.var]]), ] |
3 | ||
4 | 210x |
df_denom <- unique(denom_df[, c(id, fup_var)]) |
5 | 210x |
df_num <- subset(dfii, dfii[[occ_var]] == "Y")[, c(id, .var, occ_var, occ_dy)] |
6 | ||
7 |
### construct modified fup var subjects not in numerator - use fup_var from df_denom |
|
8 | 210x |
df_denom$mod_fup_var <- df_denom[[fup_var]] |
9 | ||
10 |
### add vars from df_num onto df_denom |
|
11 | 210x |
df_denom <- dplyr::left_join(df_denom, df_num, by = id) |
12 | ||
13 |
# subjects in numerator dataset, use occ_dy variable/365.25 |
|
14 | 210x |
id_to_update <- df_denom[[id]] %in% df_num[[id]] |
15 | 210x |
df_denom[id_to_update, "mod_fup_var"] <- df_denom[id_to_update, occ_dy] / 365.25 |
16 | ||
17 | 210x |
return(list(df_denom = df_denom, df_num = df_num)) |
18 |
} |
|
19 | ||
20 |
extract_x_stats <- function(list_with_stats, stat_nms) { |
|
21 | 15x |
sapply( |
22 | 15x |
stat_nms, |
23 | 15x |
function(stat) { |
24 | 60x |
sapply( |
25 | 60x |
names(list_with_stats), |
26 | 60x |
function(x) { |
27 | 600x |
list_with_stats[[x]][[stat]] |
28 |
}, |
|
29 | 60x |
simplify = FALSE |
30 |
) |
|
31 |
}, |
|
32 | 15x |
simplify = FALSE |
33 |
) |
|
34 |
} |
1 |
#' @keywords internal |
|
2 |
memoised_fit_mmrm <- memoise::memoise(fit_mmrm_j) |
|
3 | ||
4 |
#' Helper Function to Fit the MMRM and Return LS Mean Estimates and Contrasts |
|
5 |
#' |
|
6 |
#' @inheritParams proposal_argument_convention |
|
7 |
#' @param df_parent (`data.frame`)\cr data set containing all analysis variables |
|
8 |
#' from all visits and arms. |
|
9 |
#' @param ref_arm_level (`string`)\cr the reference arm which should be compared |
|
10 |
#' against. |
|
11 |
#' @param ref_visit_levels (`character`)\cr the reference visits which should not |
|
12 |
#' be included in the model fit. |
|
13 |
#' @param ... additional options passed to [fit_mmrm_j()]. |
|
14 |
#' |
|
15 |
#' @return The resulting estimates and contrasts LS means as returned by |
|
16 |
#' [tidy.tern_model()]. |
|
17 |
#' @keywords internal |
|
18 |
h_summarize_mmrm <- function(.var, df_parent, variables, ref_arm_level, ref_visit_levels, ...) { |
|
19 | 51x |
checkmate::assert_string(.var) |
20 | 51x |
variables$response <- .var |
21 | ||
22 | 51x |
checkmate::assert_string(ref_arm_level) |
23 | 51x |
arm_levels <- levels(df_parent[[variables$arm]]) |
24 | 51x |
if (arm_levels[1L] != ref_arm_level) { |
25 | 51x |
checkmate::assert_subset(ref_arm_level, arm_levels[-1L]) |
26 | 51x |
df_parent[[variables$arm]] <- stats::relevel(df_parent[[variables$arm]], ref = ref_arm_level) |
27 |
} |
|
28 | 51x |
checkmate::assert_character(ref_visit_levels) |
29 | 51x |
in_ref_visits <- df_parent[[variables$visit]] %in% ref_visit_levels |
30 | 51x |
df_parent <- df_parent[!in_ref_visits, , drop = FALSE] |
31 | 51x |
checkmate::assert_true(nrow(df_parent) > 1) |
32 | 51x |
new_levels <- setdiff(levels(df_parent[[variables$visit]]), ref_visit_levels) |
33 | 51x |
df_parent[[variables$visit]] <- factor(df_parent[[variables$visit]], levels = new_levels) |
34 | 51x |
mod_fit <- memoised_fit_mmrm(vars = variables, data = df_parent, ...) |
35 | 51x |
tidy(mod_fit) |
36 |
} |
|
37 | ||
38 |
#' Dynamic tabulation of MMRM results with tables |
|
39 |
#' |
|
40 |
#' @description `r lifecycle::badge('stable')` |
|
41 |
#' |
|
42 |
#' These functions can be used to produce tables for MMRM results, within |
|
43 |
#' tables which are split by arms and visits. This is helpful when higher-level |
|
44 |
#' row splits are needed (e.g. splits by parameter or subgroup). |
|
45 |
#' |
|
46 |
#' @name summarize_mmrm |
|
47 |
#' @examples |
|
48 |
#' set.seed(123) |
|
49 |
#' longdat <- data.frame( |
|
50 |
#' ID = rep(DM$ID, 5), |
|
51 |
#' AVAL = c( |
|
52 |
#' rep(0, nrow(DM)), |
|
53 |
#' rnorm(n = nrow(DM) * 4) |
|
54 |
#' ), |
|
55 |
#' VISIT = factor(rep(paste0("V", 0:4), each = nrow(DM))) |
|
56 |
#' ) |> |
|
57 |
#' dplyr::inner_join(DM, by = "ID") |
|
58 |
#' |
|
59 |
NULL |
|
60 | ||
61 |
#' @describeIn summarize_mmrm Statistics function which is extracting estimates, |
|
62 |
#' not including any results when in the reference visit, and only showing LS mean |
|
63 |
#' estimates when in the reference arm and not in reference visit. It uses |
|
64 |
#' [s_lsmeans()] for the final processing. |
|
65 |
#' |
|
66 |
#' @inheritParams proposal_argument_convention |
|
67 |
#' @param ref_levels (`list`)\cr with `visit` and `arm` reference levels. |
|
68 |
#' @param ... eventually passed to [fit_mmrm_j()] via [h_summarize_mmrm()]. |
|
69 |
#' @export |
|
70 |
s_summarize_mmrm <- function( |
|
71 |
df, |
|
72 |
.var, |
|
73 |
variables, |
|
74 |
ref_levels, |
|
75 |
.spl_context, |
|
76 |
alternative = c("two.sided", "less", "greater"), |
|
77 |
show_relative = c("reduction", "increase"), |
|
78 |
...) { |
|
79 | 63x |
alternative <- match.arg(alternative) |
80 | ||
81 | 63x |
checkmate::assert_list(variables, names = "unique") |
82 | 63x |
visit_var <- variables$visit |
83 | 63x |
arm_var <- variables$arm |
84 | ||
85 | 63x |
checkmate::assert_list(ref_levels, names = "unique") |
86 | 63x |
checkmate::assert_subset(c(visit_var, arm_var), names(ref_levels)) |
87 | 63x |
ref_visits <- ref_levels[[visit_var]] |
88 | 63x |
ref_arm <- ref_levels[[arm_var]] |
89 | 63x |
checkmate::assert_string(ref_arm) |
90 | ||
91 | 63x |
current_visit <- as.character(unique(df[[visit_var]])) |
92 | 63x |
current_arm <- as.character(unique(df[[arm_var]])) |
93 | 63x |
checkmate::assert_string(current_visit) |
94 | 63x |
checkmate::assert_string(current_arm) |
95 | ||
96 | 63x |
in_ref_visits <- current_visit %in% ref_visits |
97 | 63x |
in_ref_arm <- current_arm == ref_arm |
98 | ||
99 | 63x |
if (in_ref_visits) { |
100 |
## this is returned |
|
101 | 13x |
list( |
102 | 13x |
n = NULL, |
103 | 13x |
adj_mean_se = NULL, |
104 | 13x |
adj_mean_ci = NULL, |
105 | 13x |
adj_mean_est_ci = NULL, |
106 | 13x |
diff_mean_se = NULL, |
107 | 13x |
diff_mean_ci = NULL, |
108 | 13x |
diff_mean_est_ci = NULL, |
109 | 13x |
change = NULL, |
110 | 13x |
p_value = NULL |
111 |
) |
|
112 |
} else { # non ref visit |
|
113 | 50x |
n_splits <- nrow(.spl_context) |
114 | ||
115 |
# Check that the current row split is by the visit variable. |
|
116 | 50x |
current_split_var <- .spl_context[n_splits, "split"] |
117 | 50x |
checkmate::assert_true(identical(current_split_var, visit_var)) |
118 | ||
119 |
# Then take the data frame with all visits and fit the MMRM on it. |
|
120 | 50x |
df_parent <- .spl_context[n_splits - 1, "full_parent_df"][[1]] |
121 | 50x |
lsm_df <- h_summarize_mmrm( |
122 | 50x |
.var = .var, |
123 | 50x |
df_parent = df_parent, |
124 | 50x |
variables = variables, |
125 | 50x |
ref_arm_level = ref_arm, |
126 | 50x |
ref_visit_levels = ref_visits, |
127 |
... |
|
128 |
) |
|
129 | ||
130 |
# Subset to the current table cell we are looking at. |
|
131 | 50x |
matches_visit <- lsm_df[[visit_var]] == current_visit |
132 | 50x |
matches_arm <- lsm_df[[arm_var]] == current_arm |
133 | 50x |
matches_both <- which(matches_visit & matches_arm) |
134 | 50x |
checkmate::assert_int(matches_both) |
135 | ||
136 | 50x |
s_lsmeans( |
137 | 50x |
lsm_df[matches_both, ], |
138 | 50x |
.in_ref_col = in_ref_arm, |
139 | 50x |
alternative = alternative, |
140 | 50x |
show_relative = show_relative |
141 |
) |
|
142 |
} # end ref visits if/else |
|
143 |
} |
|
144 | ||
145 |
#' @describeIn summarize_mmrm Formatted analysis function which is used as `afun`. |
|
146 |
#' |
|
147 |
#' @return |
|
148 |
#' * `a_summarize_mmrm()` returns the corresponding list with formatted [rtables::CellValue()]. |
|
149 |
#' |
|
150 |
#' @examples |
|
151 |
#' basic_table() |> |
|
152 |
#' split_rows_by("VISIT") |> |
|
153 |
#' split_cols_by("ARM") |> |
|
154 |
#' analyze( |
|
155 |
#' vars = "AVAL", |
|
156 |
#' afun = a_summarize_mmrm, |
|
157 |
#' na_str = tern::default_na_str(), |
|
158 |
#' show_labels = "hidden", |
|
159 |
#' extra_args = list( |
|
160 |
#' variables = list( |
|
161 |
#' covariates = c("AGE"), |
|
162 |
#' id = "ID", |
|
163 |
#' arm = "ARM", |
|
164 |
#' visit = "VISIT" |
|
165 |
#' ), |
|
166 |
#' conf_level = 0.9, |
|
167 |
#' cor_struct = "toeplitz", |
|
168 |
#' ref_levels = list(VISIT = "V0", ARM = "B: Placebo") |
|
169 |
#' ) |
|
170 |
#' ) |> |
|
171 |
#' build_table(longdat) |> |
|
172 |
#' prune_table(all_zero) |
|
173 |
#' @export |
|
174 |
a_summarize_mmrm <- function( |
|
175 |
df, |
|
176 |
.var, |
|
177 |
.spl_context, |
|
178 |
..., |
|
179 |
.stats = NULL, |
|
180 |
.formats = NULL, |
|
181 |
.labels = NULL, |
|
182 |
.indent_mods = NULL) { |
|
183 |
# Check for additional parameters to the statistics function |
|
184 | 60x |
dots_extra_args <- list(...) |
185 | ||
186 |
# Only support default stats, not custom stats |
|
187 | 60x |
.stats <- .split_std_from_custom_stats(.stats)$default_stats |
188 | ||
189 |
# Apply statistics function |
|
190 | 60x |
x_stats <- .apply_stat_functions( |
191 | 60x |
default_stat_fnc = s_summarize_mmrm, |
192 | 60x |
custom_stat_fnc_list = NULL, |
193 | 60x |
args_list = c(df = list(df), .var = .var, .spl_context = list(.spl_context), dots_extra_args) |
194 |
) |
|
195 | ||
196 |
# Format according to specifications |
|
197 | 60x |
format_stats( |
198 | 60x |
x_stats, |
199 | 60x |
method_groups = "summarize_mmrm", |
200 | 60x |
stats_in = .stats, |
201 | 60x |
formats_in = .formats, |
202 | 60x |
labels_in = .labels, |
203 | 60x |
indents_in = .indent_mods |
204 |
) |
|
205 |
} |
1 |
postfun_cog <- function(ret, spl, fulldf, .spl_context, |
|
2 |
var_names = c("AVAL", "CHG", "BASE"), |
|
3 |
stats = list( |
|
4 |
main = c(N = "N", mean = "Mean", SD = "SD", SE = "SE", Med = "Med", Min = "Min", Max = "Max"), |
|
5 |
base = c(mean = "Mean") |
|
6 |
)) { |
|
7 | 2x |
all_expr <- expression(TRUE) |
8 | 2x |
colset <- .spl_context[nrow(.spl_context), "value"][[1]] |
9 | ||
10 |
# Check if colset is one of the main variables (default: AVAL or CHG) |
|
11 | 2x |
if (colset == var_names[1] || colset == var_names[2]) { |
12 |
# Create vectors for make_split_result |
|
13 | 1x |
values <- stats$main |
14 | 1x |
labels <- stats$main |
15 | ||
16 |
# Create datasplit and subset_exprs lists with the same length as values |
|
17 | 1x |
datasplit <- replicate(length(values), fulldf, simplify = FALSE) |
18 | 1x |
names(datasplit) <- names(values) |
19 | ||
20 | 1x |
subset_exprs <- replicate(length(values), all_expr, simplify = FALSE) |
21 | ||
22 | 1x |
ret <- rtables::make_split_result( |
23 | 1x |
values = values, |
24 | 1x |
labels = labels, |
25 | 1x |
datasplit = datasplit, |
26 | 1x |
subset_exprs = subset_exprs |
27 |
) |
|
28 | 1x |
} else if (colset == var_names[3]) { # Default: BASE |
29 |
# For base variable, use base stats |
|
30 | ! |
values <- stats$base |
31 | ! |
labels <- stats$base |
32 | ! |
if (names(values)[1] == "mean") { |
33 |
# Keep backward compatibility by adding "Base " prefix to the first label if it's "mean" |
|
34 | ! |
labels[1] <- paste("Base", labels[1]) |
35 |
} |
|
36 | ||
37 |
# Create datasplit and subset_exprs lists with the same length as values |
|
38 | ! |
datasplit <- replicate(length(values), fulldf, simplify = FALSE) |
39 | ! |
names(datasplit) <- names(values) |
40 | ||
41 | ! |
subset_exprs <- replicate(length(values), all_expr, simplify = FALSE) |
42 | ||
43 | ! |
ret <- rtables::make_split_result( |
44 | ! |
values = values, |
45 | ! |
labels = labels, |
46 | ! |
datasplit = datasplit, |
47 | ! |
subset_exprs = subset_exprs |
48 |
) |
|
49 |
} else { |
|
50 | 1x |
stop("something bad happened :(") |
51 |
} |
|
52 | 1x |
ret |
53 |
} |
|
54 | ||
55 |
postfun_eq5d <- function(ret, spl, fulldf, .spl_context) { |
|
56 | 4x |
all_expr <- expression(TRUE) |
57 | 4x |
colset <- .spl_context[nrow(.spl_context), "value"][[1]] |
58 | 4x |
if (colset == "AVAL") { |
59 | 1x |
ret <- make_split_result( |
60 | 1x |
values = c(N = "N", mean = "Mean", SD = "SD", Med = "Med", Min = "Min", Max = "Max"), |
61 | 1x |
labels = c(N = "N", mean = "Mean", SD = "SD", Med = "Med", Min = "Min", Max = "Max"), |
62 | 1x |
datasplit = list(N = fulldf, mean = fulldf, SD = fulldf, Med = fulldf, Min = fulldf, Max = fulldf), |
63 | 1x |
subset_exprs = list(all_expr, all_expr, all_expr, all_expr, all_expr, all_expr) |
64 |
) |
|
65 | 3x |
} else if (colset == "BASE") { |
66 | 1x |
ret <- make_split_result( |
67 | 1x |
values = c(mean_sd = "mean_sd"), |
68 | 1x |
labels = c(mean_sd = "Base Mean (SD)"), |
69 | 1x |
datasplit = list(mean_sd = fulldf), |
70 | 1x |
subset_exprs = list(all_expr) |
71 |
) |
|
72 | 2x |
} else if (colset == "CHG") { |
73 | 1x |
ret <- make_split_result( |
74 | 1x |
values = c(N = "N", mean = "Mean", SE = "SE", SD = "SD", Med = "Med", Min = "Min", Max = "Max"), |
75 | 1x |
labels = c(N = "N", mean = "Mean", SE = "SE", SD = "SD", Med = "Med", Min = "Min", Max = "Max"), |
76 | 1x |
datasplit = list(N = fulldf, mean = fulldf, SE = fulldf, SD = fulldf, Med = fulldf, Min = fulldf, Max = fulldf), |
77 | 1x |
subset_exprs = list(all_expr, all_expr, all_expr, all_expr, all_expr, all_expr, all_expr) |
78 |
) |
|
79 |
} else { |
|
80 | 1x |
stop("something bad happened :(") |
81 |
} |
|
82 | 3x |
ret |
83 |
} |
|
84 | ||
85 |
calc_one_visit <- function(datvec, decimal, statnm, visit, varnm, roundmethod = c("sas", "iec"), exclude_visits, |
|
86 |
var_names = c("AVAL", "CHG", "BASE")) { |
|
87 | 26x |
roundmethod <- match.arg(roundmethod) |
88 | 26x |
if (is.na(decimal)) { |
89 | ! |
decimal <- 0 |
90 |
} |
|
91 | 26x |
if ((varnm == var_names[2] || varnm == var_names[3]) && (visit %in% exclude_visits)) { |
92 | 2x |
return(NULL) |
93 |
} |
|
94 | 24x |
if (roundmethod == "sas") { |
95 | 23x |
switch(statnm, |
96 | 2x |
N = length(stats::na.omit(datvec)), |
97 | 3x |
SE = format( |
98 | 3x |
tidytlg::roundSAS(stats::sd(datvec) / sqrt(length(stats::na.omit(datvec))), decimal + 2), |
99 | 3x |
nsmall = decimal + 2 |
100 |
), |
|
101 | 3x |
SD = format( |
102 | 3x |
tidytlg::roundSAS(stats::sd(datvec), decimal + 2), |
103 | 3x |
nsmall = decimal + |
104 | 3x |
2 |
105 |
), |
|
106 | 6x |
Mean = format(tidytlg::roundSAS(mean(datvec), decimal + 1), nsmall = decimal + 1), |
107 | ! |
mean_sd = paste0( |
108 | ! |
format(tidytlg::roundSAS(mean(datvec), decimal + 1), nsmall = decimal + 1), |
109 |
" (", |
|
110 | ! |
format( |
111 | ! |
tidytlg::roundSAS(stats::sd(datvec), decimal + 2), |
112 | ! |
nsmall = decimal + |
113 | ! |
2 |
114 |
), |
|
115 |
")" |
|
116 |
), |
|
117 | 3x |
Med = format(tidytlg::roundSAS(stats::median(datvec), decimal + 1), nsmall = decimal + 1), |
118 | 3x |
Min = format(tidytlg::roundSAS(min(datvec), decimal), nsmall = decimal), |
119 | 3x |
Max = format(tidytlg::roundSAS(max(datvec), decimal), nsmall = decimal) |
120 |
) |
|
121 |
} else { |
|
122 | 1x |
switch(statnm, |
123 | ! |
N = length(stats::na.omit(datvec)), |
124 | ! |
SE = format(round(stats::sd(datvec) / sqrt(length(stats::na.omit(datvec))), decimal + 2), nsmall = decimal + 2), |
125 | ! |
SD = format(round(stats::sd(datvec), decimal + 2), nsmall = decimal + 2), |
126 | 1x |
Mean = format(round(mean(datvec), decimal + 1), nsmall = decimal + 1), |
127 | ! |
mean_sd = paste0( |
128 | ! |
format(round(mean(datvec), decimal + 1), nsmall = decimal + 1), |
129 |
" (", |
|
130 | ! |
format( |
131 | ! |
round(stats::sd(datvec), decimal + 2), |
132 | ! |
nsmall = decimal + |
133 | ! |
2 |
134 |
), |
|
135 |
")" |
|
136 |
), |
|
137 | ! |
Med = format(round(stats::median(datvec), decimal + 1), nsmall = decimal + 1), |
138 | ! |
Min = format(round(min(datvec), decimal), nsmall = decimal), |
139 | ! |
Max = format(round(max(datvec), decimal), nsmall = decimal) |
140 |
) |
|
141 |
} |
|
142 |
} |
|
143 | ||
144 |
#' @name column_stats |
|
145 |
#' @title Statistics within the column space |
|
146 |
#' @description |
|
147 |
#' A function factory used for obtaining statistics within the columns of your table. |
|
148 |
#' Used in change from baseline tables. This takes the visit names as its row labels. |
|
149 |
#' @param exclude_visits Vector of visit(s) for which you do not want the statistics displayed |
|
150 |
#' in the baseline mean or change from baseline sections of the table. |
|
151 |
#' @param var_names Vector of variable names to use instead of the default AVAL, CHG, BASE. |
|
152 |
#' The first two elements are treated as main variables with full statistics, and the third element |
|
153 |
#' is treated as the base variable. By default, the function expects these specific variable names in your data, |
|
154 |
#' but you can customize them to match your dataset's column names. |
|
155 |
#' @param stats A list with two components, `main` and `base`, that define the statistics to be calculated |
|
156 |
#' for the main variables (default: AVAL, CHG) and the base variable (default: BASE). |
|
157 |
#' Default for main variables: c(N = "N", mean = "Mean", SD = "SD", SE = "SE", Med = "Med", Min = "Min", Max = "Max") |
|
158 |
#' Default for base variable: c(mean = "Mean") |
|
159 |
#' You can customize these statistics by providing your own named vectors in the list. The names are used |
|
160 |
#' internally for calculations, and the values are used as display labels in the table. |
|
161 |
#' |
|
162 |
#' @return an analysis function (for use with [rtables::analyze]) implementing |
|
163 |
#' the specified statistics. |
|
164 |
#' @export |
|
165 |
column_stats <- function(exclude_visits = c("Baseline (DB)"), |
|
166 |
var_names = c("AVAL", "CHG", "BASE"), |
|
167 |
stats = list( |
|
168 |
main = c( |
|
169 |
N = "N", mean = "Mean", SD = "SD", SE = "SE", |
|
170 |
Med = "Med", Min = "Min", Max = "Max" |
|
171 |
), |
|
172 |
base = c(mean = "Mean") |
|
173 |
)) { |
|
174 | 8x |
function(df, .var, .spl_context) { |
175 | 8x |
allcolsplvals <- .spl_context[nrow(.spl_context), "cur_col_split_val"][[1]] |
176 | 8x |
statnm <- utils::tail(allcolsplvals, 1) |
177 | 8x |
varnm <- allcolsplvals[length(allcolsplvals) - 1] |
178 | 8x |
datvec <- df[[varnm]] |
179 | 8x |
decimalp <- utils::tail(df$dp, 1) |
180 | 8x |
datpervis <- split(datvec, df[[.var]]) ## ,var is AVISIT |
181 | 8x |
in_rows( |
182 | 8x |
.list = mapply( |
183 | 8x |
calc_one_visit, |
184 | 8x |
datvec = datpervis, |
185 | 8x |
decimal = decimalp, |
186 | 8x |
visit = names(datpervis), |
187 | 8x |
MoreArgs = list( |
188 | 8x |
statnm = statnm, |
189 | 8x |
varnm = varnm, |
190 | 8x |
exclude_visits = exclude_visits, |
191 | 8x |
var_names = var_names |
192 |
) |
|
193 |
), |
|
194 | 8x |
.names = names(datpervis) |
195 |
) |
|
196 |
} |
|
197 |
} |
|
198 | ||
199 |
calc_N <- function(datvec, statnm, trt, varnm) { |
|
200 | 3x |
if (varnm != "AVAL") { |
201 | 1x |
return(NULL) |
202 |
} |
|
203 | 2x |
length(stats::na.omit(datvec)) |
204 |
} |
|
205 | ||
206 |
column_N <- function(df, .var, .spl_context, id = "USUBJID", var_names = c("AVAL", "CHG", "BASE")) { |
|
207 | 1x |
allcolsplvals <- .spl_context[nrow(.spl_context), "cur_col_split_val"][[1]] |
208 | 1x |
statnm <- utils::tail(allcolsplvals, 1) |
209 | 1x |
varnm <- allcolsplvals[length(allcolsplvals) - 1] |
210 | 1x |
datvec <- unique(df[[id]]) |
211 | 1x |
datpertrt <- split(datvec, df[[.var]]) ## ,var is Treatment |
212 | 1x |
in_rows( |
213 | 1x |
.list = mapply(calc_N, datvec = datpertrt, trt = names(datpertrt), MoreArgs = list(statnm = statnm, varnm = varnm)), |
214 | 1x |
.names = names(datpertrt) |
215 |
) |
|
216 |
} |
1 |
#' Tabulation of RBMI Results |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge('stable')` |
|
4 |
#' |
|
5 |
#' These functions can be used to produce tables from RBMI. |
|
6 |
#' |
|
7 |
#' @name tabulate_rbmi |
|
8 |
#' |
|
9 |
#' @note These functions have been forked from `tern.rbmi`. Additional features are: |
|
10 |
#' |
|
11 |
#' * Additional `ref_path` argument. |
|
12 |
#' * Extraction of variance statistics in the `tidy()` method. |
|
13 |
#' * Adapted to `rbmi` forked functions update with more than two treatment groups. |
|
14 |
NULL |
|
15 | ||
16 |
#' @describeIn tabulate_rbmi Helper function to produce data frame with results |
|
17 |
#' of pool for a single visit. |
|
18 |
#' |
|
19 |
#' @param x (`list`)\cr is a list of pooled object from `rbmi` analysis results. |
|
20 |
#' This list includes analysis results, confidence level, hypothesis testing type. |
|
21 |
#' @param visit_name (`string`)\cr single visit level. |
|
22 |
#' @param group_names (`character`)\cr group levels. |
|
23 |
#' @return The `data.frame` with results of pooled analysis for a single visit. |
|
24 |
#' |
|
25 |
#' @export |
|
26 |
h_tidy_pool <- function(x, visit_name, group_names) { |
|
27 | 13x |
checkmate::assert_list(x) |
28 | 13x |
checkmate::assert_string(visit_name) |
29 | 13x |
checkmate::assert_character(group_names) |
30 | ||
31 | 13x |
ref_name <- paste0("lsm_", group_names[1], "_", visit_name) |
32 | 13x |
ref <- x[[ref_name]] |
33 | ||
34 | 13x |
list2df <- \(l) with(l, data.frame(est, ci_l = ci[1], ci_u = ci[2], se, pvalue, df)) |
35 | ||
36 | 13x |
var_name <- paste0("var_", visit_name) |
37 | 13x |
var <- if (var_name %in% names(x)) { |
38 |
# This is the case for ANCOVA. |
|
39 | 11x |
list2df(x[[var_name]]) |
40 |
} else { |
|
41 |
# This is the case for MMRM. |
|
42 | 2x |
var_names <- paste0("var_", group_names[-1], "_", visit_name) |
43 | 2x |
checkmate::assert_subset(var_names, names(x)) |
44 | 2x |
var_dfs <- lapply(x[var_names], list2df) |
45 | 2x |
var <- do.call(rbind, var_dfs) |
46 |
} |
|
47 | ||
48 | 13x |
contr_names <- paste0("trt_", group_names[-1], "_", visit_name) |
49 | 13x |
contr_dfs <- lapply(x[contr_names], list2df) |
50 | 13x |
contr <- do.call(rbind, contr_dfs) |
51 | ||
52 | 13x |
alt_names <- paste0("lsm_", group_names[-1], "_", visit_name) |
53 | 13x |
alt_dfs <- lapply(x[alt_names], list2df) |
54 | 13x |
alt <- do.call(rbind, alt_dfs) |
55 | ||
56 | 13x |
df_ref <- data.frame( |
57 | 13x |
visit = visit_name, |
58 | 13x |
group = group_names[1], |
59 | 13x |
est = ref$est, |
60 | 13x |
se_est = ref$se, |
61 | 13x |
lower_cl_est = ref$ci[1], |
62 | 13x |
upper_cl_est = ref$ci[2], |
63 | 13x |
est_contr = NA_real_, |
64 | 13x |
se_contr = NA_real_, |
65 | 13x |
lower_cl_contr = NA_real_, |
66 | 13x |
upper_cl_contr = NA_real_, |
67 | 13x |
p_value = NA_real_, |
68 | 13x |
relative_reduc = NA_real_, |
69 | 13x |
mse = NA_real_, |
70 | 13x |
df = NA_real_, |
71 | 13x |
stringsAsFactors = FALSE |
72 |
) |
|
73 | 13x |
df_alt <- data.frame( |
74 | 13x |
visit = visit_name, |
75 | 13x |
group = group_names[-1], |
76 | 13x |
est = alt$est, |
77 | 13x |
se_est = alt$se, |
78 | 13x |
lower_cl_est = alt$ci_l, |
79 | 13x |
upper_cl_est = alt$ci_u, |
80 | 13x |
est_contr = contr$est, |
81 | 13x |
se_contr = contr$se, |
82 | 13x |
lower_cl_contr = contr$ci_l, |
83 | 13x |
upper_cl_contr = contr$ci_u, |
84 | 13x |
p_value = contr$pvalue, |
85 | 13x |
relative_reduc = contr$est / df_ref$est, |
86 | 13x |
mse = var$est, |
87 | 13x |
df = var$df, |
88 | 13x |
stringsAsFactors = FALSE |
89 |
) |
|
90 | 13x |
rbind(df_ref, df_alt) |
91 |
} |
|
92 | ||
93 |
#' Helper method (for [`broom::tidy()`]) to prepare a data frame from an |
|
94 |
#' `pool` `rbmi` object containing the LS means and contrasts and multiple visits |
|
95 |
#' |
|
96 |
#' @method tidy pool |
|
97 |
#' @param x (`pool`) is a list of pooled object from `rbmi` analysis results. This list includes |
|
98 |
#' analysis results, confidence level, hypothesis testing type. |
|
99 |
#' @param visits (`character`)\cr all visit levels. Otherwise too hard to guess this. |
|
100 |
#' @param ... Additional arguments. Not used. Needed to match generic signature only. |
|
101 |
#' @importFrom generics tidy |
|
102 |
#' @export |
|
103 |
#' @keywords internal |
|
104 |
#' @return A `data.frame`. |
|
105 |
tidy.pool <- function(x, visits, ...) { |
|
106 | 6x |
ls_raw <- x$pars |
107 | ||
108 | 6x |
has_lsm <- grepl("^lsm_", names(ls_raw)) |
109 | 6x |
has_first_visit <- grepl(paste0("_", visits[1]), names(ls_raw), fixed = TRUE) |
110 | 6x |
is_lsm_first_visit <- has_lsm & has_first_visit |
111 | ||
112 | 6x |
group_names <- names(ls_raw)[is_lsm_first_visit] |
113 | 6x |
group_names <- gsub(pattern = "^lsm_", replacement = "", x = group_names) |
114 | 6x |
group_names <- gsub(pattern = paste0("_", visits[1]), replacement = "", x = group_names, fixed = TRUE) |
115 | ||
116 | 6x |
spl <- rep(visits, each = length(ls_raw) / length(visits)) |
117 | ||
118 | 6x |
ls_split <- split(ls_raw, spl) |
119 | ||
120 | 6x |
ls_df <- mapply( |
121 | 6x |
FUN = h_tidy_pool, |
122 | 6x |
x = ls_split, |
123 | 6x |
visit_name = visits, |
124 | 6x |
MoreArgs = list(group_names = group_names), |
125 | 6x |
SIMPLIFY = FALSE |
126 |
) |
|
127 | ||
128 | 6x |
result <- do.call(rbind, unname(ls_df)) |
129 | ||
130 | 6x |
result$visit <- factor(result$visit, levels = visits) |
131 | 6x |
result$group <- factor(result$group, levels = group_names) |
132 | 6x |
result$conf_level <- x$conf.level |
133 | ||
134 | 6x |
result |
135 |
} |
|
136 | ||
137 |
#' @describeIn tabulate_rbmi Statistics function which is extracting estimates |
|
138 |
#' from a tidied RBMI results data frame. |
|
139 |
#' |
|
140 |
#' @param df (`data.frame`)\cr input with LS means results. |
|
141 |
#' @param .in_ref_col (`flag`)\cr whether reference column is specified. |
|
142 |
#' @param show_relative (`string`)\cr 'reduction' if (`control - treatment`, default) |
|
143 |
#' or 'increase' (`treatment - control`) of relative change from baseline? |
|
144 |
#' @return A list of statistics extracted from a tidied LS means data frame. |
|
145 |
#' @export |
|
146 |
s_rbmi_lsmeans <- function(df, .in_ref_col, show_relative = c("reduction", "increase")) { |
|
147 | 7x |
checkmate::assert_flag(.in_ref_col) |
148 | ||
149 | 7x |
show_relative <- match.arg(show_relative) |
150 | 7x |
if_not_ref <- function(x) if (.in_ref_col) character() else x |
151 | 7x |
list( |
152 | 7x |
adj_mean_se = c(df$est, df$se_est), |
153 | 7x |
adj_mean_ci = with_label(c(df$lower_cl_est, df$upper_cl_est), f_conf_level(df$conf_level)), |
154 | 7x |
diff_mean_se = if_not_ref(c(df$est_contr, df$se_contr)), |
155 | 7x |
diff_mean_ci = with_label( |
156 | 7x |
if_not_ref(c(df$lower_cl_contr, df$upper_cl_contr)), |
157 | 7x |
f_conf_level(df$conf_level) |
158 |
), |
|
159 | 7x |
change = switch(show_relative, |
160 | 7x |
reduction = with_label(if_not_ref(df$relative_reduc), "Relative Reduction (%)"), |
161 | 7x |
increase = with_label(if_not_ref(-df$relative_reduc), "Relative Increase (%)") |
162 |
), |
|
163 | 7x |
p_value = if_not_ref(df$p_value), |
164 | 7x |
additional_title_row = NULL |
165 |
) |
|
166 |
} |
|
167 | ||
168 |
#' @describeIn tabulate_rbmi Formatted Analysis function which is used as `afun`. |
|
169 |
#' |
|
170 |
#' @inheritParams proposal_argument_convention |
|
171 |
#' @export |
|
172 |
a_rbmi_lsmeans <- function( |
|
173 |
df, |
|
174 |
ref_path, |
|
175 |
.spl_context, |
|
176 |
..., |
|
177 |
.stats = NULL, |
|
178 |
.formats = NULL, |
|
179 |
.labels = NULL, |
|
180 |
.indent_mods = NULL) { |
|
181 |
# Check for additional parameters to the statistics function |
|
182 | 4x |
dots_extra_args <- list(...) |
183 | ||
184 |
# Only support default stats, not custom stats |
|
185 | 4x |
.stats <- .split_std_from_custom_stats(.stats)$default_stats |
186 | ||
187 |
# Obtain reference column information |
|
188 | 4x |
ref <- get_ref_info(ref_path, .spl_context) |
189 | ||
190 |
# Apply statistics function |
|
191 | 4x |
x_stats <- .apply_stat_functions( |
192 | 4x |
default_stat_fnc = s_rbmi_lsmeans, |
193 | 4x |
custom_stat_fnc_list = NULL, |
194 | 4x |
args_list = c(df = list(df), .in_ref_col = ref$in_ref_col, dots_extra_args) |
195 |
) |
|
196 | ||
197 |
# Format according to specifications |
|
198 | 4x |
format_stats( |
199 | 4x |
x_stats, |
200 | 4x |
method_groups = "tabulate_rbmi", |
201 | 4x |
stats_in = .stats, |
202 | 4x |
formats_in = .formats, |
203 | 4x |
labels_in = .labels, |
204 | 4x |
indents_in = .indent_mods |
205 |
) |
|
206 |
} |
1 |
#' Function Factory to Create Padded In Rows Content |
|
2 |
#' |
|
3 |
#' @param length_out (`count` or `NULL`)\cr full length which should be padded |
|
4 |
#' by `NA` which will be printed as empty strings. |
|
5 |
#' @param label (`string`)\cr row label to be used for the first row only. |
|
6 |
#' |
|
7 |
#' @return The function of `content` and `.formats`. |
|
8 |
#' @keywords internal |
|
9 |
pad_in_rows_fct <- function(length_out = NULL, label = "") { |
|
10 | 144x |
checkmate::assert_count(length_out, null.ok = TRUE) |
11 | 144x |
checkmate::assert_string(label) |
12 | ||
13 | 144x |
function(content, .formats) { |
14 | 144x |
content_list <- as.list(content) |
15 | 144x |
if (!is.null(length_out)) { |
16 | 144x |
missing_length <- length_out - length(content_list) |
17 | 144x |
if (missing_length > 0) { |
18 | 23x |
content_list <- c(content_list, rep(NA, missing_length)) |
19 |
} |
|
20 |
} |
|
21 | 144x |
in_rows( |
22 | 144x |
.list = content_list, |
23 | 144x |
.names = as.character(seq_along(content_list)), |
24 | 144x |
.labels = c( |
25 | 144x |
label, |
26 | 144x |
rep( |
27 |
"", |
|
28 | 144x |
length(content_list) - |
29 | 144x |
1L |
30 |
) |
|
31 |
), |
|
32 | 144x |
.formats = .formats, |
33 | 144x |
.format_na_strs = "" |
34 |
) |
|
35 |
} |
|
36 |
} |
|
37 | ||
38 |
#' First Level Column Split for LS Means Wide Table Layouts |
|
39 |
#' |
|
40 |
#' @inheritParams proposal_argument_convention |
|
41 |
#' |
|
42 |
#' @keywords internal |
|
43 |
lsmeans_wide_first_split_fun_fct <- function(include_variance) { |
|
44 | 3x |
checkmate::assert_flag(include_variance) |
45 | ||
46 | 3x |
post_fun <- function(ret, spl, fulldf, .spl_context) { |
47 | 3x |
if (include_variance) { |
48 | 2x |
short_split_result( |
49 | 2x |
reference_group = "Reference Group", |
50 | 2x |
testing_group = "Testing Group", |
51 | 2x |
variance = "", |
52 | 2x |
comparison = "Testing - Reference", |
53 | 2x |
fulldf = fulldf |
54 |
) |
|
55 |
} else { |
|
56 | 1x |
short_split_result( |
57 | 1x |
reference_group = "Reference Group", |
58 | 1x |
testing_group = "Testing Group", |
59 | 1x |
comparison = "Testing - Reference", |
60 | 1x |
fulldf = fulldf |
61 |
) |
|
62 |
} |
|
63 |
} |
|
64 | 3x |
make_split_fun(post = list(post_fun)) |
65 |
} |
|
66 | ||
67 |
#' Second Level Column Split for LS Means Wide Table Layouts |
|
68 |
#' |
|
69 |
#' @inheritParams proposal_argument_convention |
|
70 |
#' @param include_pval (`flag`)\cr whether to include the p-value column. |
|
71 |
#' |
|
72 |
#' @keywords internal |
|
73 |
lsmeans_wide_second_split_fun_fct <- function(pval_sided, conf_level, include_pval) { |
|
74 | 3x |
checkmate::assert_flag(include_pval) |
75 | ||
76 | 3x |
post_fun <- function(ret, spl, fulldf, .spl_context) { |
77 | 11x |
colset <- .spl_context[nrow(.spl_context), "value"][[1]] |
78 | 11x |
if (colset %in% c("reference_group", "testing_group")) { |
79 | 6x |
short_split_result(treatment = "Treatment", n = "N", lsmean = "LS Mean", fulldf = fulldf) |
80 | 5x |
} else if (colset == "variance") { |
81 | 2x |
short_split_result(mse = "M. S. Error", df = "Error DF", fulldf = fulldf) |
82 |
} else { |
|
83 | 3x |
if (include_pval) { |
84 | 2x |
short_split_result( |
85 | 2x |
lsmean = "LS Mean", |
86 | 2x |
se = "SE", |
87 | 2x |
ci = f_conf_level(conf_level), |
88 | 2x |
pval = paste0(abs(as.numeric(pval_sided)), "-sided p-value~[super a]"), |
89 | 2x |
fulldf = fulldf |
90 |
) |
|
91 |
} else { |
|
92 | 1x |
short_split_result(lsmean = "LS Mean", se = "SE", ci = f_conf_level(conf_level), fulldf = fulldf) |
93 |
} |
|
94 |
} |
|
95 |
} |
|
96 | 3x |
make_split_fun(post = list(post_fun)) |
97 |
} |
|
98 | ||
99 |
#' Content Row Analysis Function for LS Means Wide Table Layouts |
|
100 |
#' |
|
101 |
#' @inheritParams proposal_argument_convention |
|
102 |
#' @param variables (`list`)\cr see [fit_ancova()] for required variable |
|
103 |
#' specifications. |
|
104 |
#' @param ref_level (`string`)\cr the reference level of the treatment arm variable. |
|
105 |
#' @param treatment_levels (`character`)\cr the non-reference levels of the treatment arm |
|
106 |
#' variable. |
|
107 |
#' @param pval_sided (`string`)\cr either '2' for two-sided or '1' for 1-sided with greater than |
|
108 |
#' control or '-1' for 1-sided with smaller than control alternative hypothesis. |
|
109 |
#' @param formats (`list`)\cr including `lsmean`, `mse`, `df`, `lsmean_diff`, `se`, |
|
110 |
#' `ci`, `pval` formats. |
|
111 |
#' |
|
112 |
#' @details This assumes a lot of structure of the layout, and is only intended to be used inside |
|
113 |
#' [summarize_lsmeans_wide()], please see there for the layout structure that is needed. |
|
114 |
#' |
|
115 |
#' @keywords internal |
|
116 |
lsmeans_wide_cfun <- function( |
|
117 |
df, |
|
118 |
labelstr, |
|
119 |
.spl_context, |
|
120 |
variables, |
|
121 |
ref_level, |
|
122 |
treatment_levels, |
|
123 |
pval_sided = c("2", "1", "-1"), |
|
124 |
conf_level, |
|
125 |
formats) { |
|
126 | 144x |
this_col_split <- .spl_context[nrow(.spl_context), "cur_col_split_val"][[1]] |
127 | 144x |
pad_in_rows <- pad_in_rows_fct(length_out = length(treatment_levels), label = labelstr) |
128 | 144x |
if (this_col_split[1] %in% c("reference_group", "testing_group")) { |
129 | 75x |
this_level <- if (this_col_split[1] == "reference_group") { |
130 | 39x |
ref_level |
131 |
} else { |
|
132 | 36x |
treatment_levels |
133 |
} |
|
134 | 75x |
has_this_level <- df[[variables$arm]] %in% this_level |
135 | 75x |
if (this_col_split[2] == "treatment") { |
136 | 27x |
pad_in_rows(this_level, .formats = "xx") |
137 | 48x |
} else if (this_col_split[2] == "n") { |
138 | 24x |
pad_in_rows(df$n[has_this_level], .formats = "xx") |
139 |
} else { |
|
140 | 24x |
pad_in_rows(df$estimate_est[has_this_level], .formats = formats$lsmean) |
141 |
} |
|
142 | 69x |
} else if (this_col_split[1] == "variance") { |
143 | 20x |
has_trt <- df[[variables$arm]] %in% treatment_levels |
144 | 20x |
all_the_same <- (length(unique(df$mse[has_trt])) == 1) && (length(unique(df$df[has_trt])) == 1) |
145 |
# Note: We only take the first value, because in this ANCOVA case they are identical and we don't want to |
|
146 |
# repeat it in the table. |
|
147 | 20x |
inds <- if (all_the_same) 1 else seq_len(sum(has_trt)) |
148 | 20x |
if (this_col_split[2] == "mse") { |
149 | 10x |
pad_in_rows(df$mse[has_trt][inds], .formats = formats$mse) |
150 |
} else { |
|
151 | 10x |
pad_in_rows(df$df[has_trt][inds], .formats = formats$df) |
152 |
} |
|
153 |
} else { |
|
154 | 49x |
has_trt <- df[[variables$arm]] %in% treatment_levels |
155 | 49x |
switch(this_col_split[2], |
156 | 12x |
"lsmean" = pad_in_rows(df$estimate_contr[has_trt], .formats = formats$lsmean_diff), |
157 | 12x |
"se" = pad_in_rows(df$se_contr[has_trt], .formats = formats$se), |
158 | 12x |
"ci" = pad_in_rows( |
159 | 12x |
tern::combine_vectors(df$lower_cl_contr[has_trt], df$upper_cl_contr[has_trt]), |
160 | 12x |
.formats = formats$ci |
161 |
), |
|
162 |
"pval" = { |
|
163 | 13x |
pval_sided <- match.arg(pval_sided) |
164 | 13x |
pval_col <- switch(pval_sided, |
165 | 13x |
`2` = "p_value", |
166 | 13x |
`1` = "p_value_greater", |
167 | 13x |
`-1` = "p_value_less" |
168 |
) |
|
169 | 13x |
pad_in_rows(df[[pval_col]][has_trt], .formats = formats$pval) |
170 |
} |
|
171 |
) |
|
172 |
} |
|
173 |
} |
|
174 | ||
175 |
#' Layout Generating Function for LS Means Wide Table Layouts |
|
176 |
#' |
|
177 |
#' @inheritParams proposal_argument_convention |
|
178 |
#' @inheritParams lsmeans_wide_cfun |
|
179 |
#' @param lyt empty layout, i.e. result of [rtables::basic_table()] |
|
180 |
#' @param include_variance (`flag`)\cr whether to include the variance statistics |
|
181 |
#' (M.S. error and d.f.). |
|
182 |
#' @param include_pval (`flag`)\cr whether to include the p-value column. |
|
183 |
#' |
|
184 |
#' @return Modified layout. |
|
185 |
#' @export |
|
186 |
#' @examples |
|
187 |
#' variables <- list( |
|
188 |
#' response = "FEV1", |
|
189 |
#' covariates = c("RACE", "SEX"), |
|
190 |
#' arm = "ARMCD", |
|
191 |
#' id = "USUBJID", |
|
192 |
#' visit = "AVISIT" |
|
193 |
#' ) |
|
194 |
#' fit <- fit_ancova( |
|
195 |
#' vars = variables, |
|
196 |
#' data = mmrm::fev_data, |
|
197 |
#' conf_level = 0.9, |
|
198 |
#' weights_emmeans = "equal" |
|
199 |
#' ) |
|
200 |
#' anl <- broom::tidy(fit) |
|
201 |
#' basic_table() |> |
|
202 |
#' summarize_lsmeans_wide( |
|
203 |
#' variables = variables, |
|
204 |
#' ref_level = fit$ref_level, |
|
205 |
#' treatment_levels = fit$treatment_levels, |
|
206 |
#' pval_sided = "2", |
|
207 |
#' conf_level = 0.8 |
|
208 |
#' ) |> |
|
209 |
#' build_table(df = anl) |
|
210 |
summarize_lsmeans_wide <- function( |
|
211 |
lyt, |
|
212 |
variables, |
|
213 |
ref_level, |
|
214 |
treatment_levels, |
|
215 |
conf_level, |
|
216 |
pval_sided = "2", |
|
217 |
include_variance = TRUE, |
|
218 |
include_pval = TRUE, |
|
219 |
formats = list( |
|
220 |
lsmean = jjcsformat_xx("xx.x"), |
|
221 |
mse = jjcsformat_xx("xx.x"), |
|
222 |
df = jjcsformat_xx("xx."), |
|
223 |
lsmean_diff = jjcsformat_xx("xx.x"), |
|
224 |
se = jjcsformat_xx("xx.xx"), |
|
225 |
ci = jjcsformat_xx("(xx.xx, xx.xx)"), |
|
226 |
pval = jjcsformat_pval_fct(0) |
|
227 |
)) { |
|
228 |
# Check that all required format elements are present in the formats parameter |
|
229 | 3x |
checkmate::assert_names( |
230 | 3x |
names(formats), |
231 | 3x |
must.include = c("lsmean", "mse", "df", "lsmean_diff", "se", "ci", "pval"), |
232 | 3x |
.var.name = "formats" |
233 |
) |
|
234 | 3x |
lyt |> |
235 | 3x |
split_cols_by( |
236 | 3x |
variables$arm, |
237 | 3x |
split_fun = lsmeans_wide_first_split_fun_fct(include_variance = include_variance) |
238 |
) |> |
|
239 | 3x |
split_cols_by( |
240 | 3x |
variables$arm, |
241 | 3x |
split_fun = lsmeans_wide_second_split_fun_fct( |
242 | 3x |
include_pval = include_pval, |
243 | 3x |
pval_sided = pval_sided, |
244 | 3x |
conf_level = conf_level |
245 |
) |
|
246 |
) |> |
|
247 | 3x |
split_rows_by(variables$visit, section_div = "") |> |
248 | 3x |
summarize_row_groups( |
249 | 3x |
var = variables$arm, |
250 | 3x |
cfun = lsmeans_wide_cfun, |
251 | 3x |
extra_args = list( |
252 | 3x |
variables = variables, |
253 | 3x |
ref_level = ref_level, |
254 | 3x |
treatment_levels = treatment_levels, |
255 | 3x |
pval_sided = pval_sided, |
256 | 3x |
conf_level = conf_level, |
257 | 3x |
formats = formats |
258 |
) |
|
259 |
) |
|
260 |
} |
1 |
add_blank_line_rcells <- function(ret) { |
|
2 |
# check that ret is expected structure and not NULL |
|
3 | 5x |
if (is.null(ret)) { |
4 | ! |
stop("add_blank_line_rcells: ret cannot be NULL.") |
5 |
} |
|
6 | 5x |
if (!(class(ret) %in% c("RowsVerticalSection", "CellValue"))) { |
7 | ! |
stop("add_blank_line_rcells: ret must be of class RowsVerticalSection or CellValue.") |
8 |
} |
|
9 | ||
10 | 5x |
if (inherits(ret, "RowsVerticalSection")) { |
11 | 5x |
xlabel <- attr(ret, "row_labels") |
12 | 5x |
indent_mods <- lapply(ret, function(obj) { |
13 | 5x |
attr(obj, "indent_mod") |
14 |
}) |
|
15 |
} else { |
|
16 | ! |
xlabel <- attr(ret, "label") |
17 | ! |
indent_mods <- attr(ret, "indent_mod") |
18 |
} |
|
19 | ||
20 | 5x |
fmts <- lapply(ret, obj_format) |
21 | 5x |
na_strs <- lapply(ret, obj_na_str) |
22 |
# ret <- append(ret,rcell(NA_real_,format = 'xx')) use a character version for the new line, rather than NA - to |
|
23 |
# allow NA processing for other stuff |
|
24 | 5x |
ret <- append(ret, rcell(NA, format = "xx")) |
25 | 5x |
fmts <- append(fmts, "xx") |
26 | 5x |
na_strs <- append(na_strs, " ") |
27 | 5x |
indent_mods <- append(indent_mods, 0L) |
28 | 5x |
xlabel <- append(xlabel, " ") |
29 | 5x |
ret <- stats::setNames(ret, xlabel) |
30 | ||
31 |
# perform the update to add the extra line |
|
32 | 5x |
fret <- in_rows( |
33 | 5x |
.list = ret, |
34 | 5x |
.labels = xlabel, |
35 | 5x |
.formats = fmts, |
36 | 5x |
.format_na_strs = na_strs, |
37 | 5x |
.indent_mods = indent_mods |
38 |
) |
|
39 | ||
40 | 5x |
fret |
41 |
} |
1 |
#' Shortcut Layout Function for Standard Continuous Variable Analysis |
|
2 |
#' |
|
3 |
#' @inheritParams proposal_argument_convention |
|
4 |
#' @param formats (`list`)\cr formats including `mean_sd`, `median` and `range` |
|
5 |
#' specifications. |
|
6 |
#' |
|
7 |
#' @return Modified layout. |
|
8 |
#' @export |
|
9 |
analyze_values <- function(lyt, vars, ..., formats) { |
|
10 | 1x |
checkmate::assert_list(formats) |
11 | 1x |
checkmate::assert_names(names(formats), must.include = c("mean_sd", "median", "range")) |
12 | ||
13 | 1x |
analyze_vars( |
14 | 1x |
lyt, |
15 | 1x |
vars = vars, |
16 |
..., |
|
17 | 1x |
.stats = c("n", "mean_sd", "median", "range"), |
18 | 1x |
.formats = c(n = "xx", mean_sd = formats$mean_sd, median = formats$median, range = formats$range), |
19 | 1x |
.labels = c(n = "N", mean_sd = "Mean (SD)", median = "Median", range = "Min, Max"), |
20 | 1x |
.indent_mods = c(n = 0, mean_sd = 1, median = 1, range = 1) |
21 |
) |
|
22 |
} |
1 |
#' @title s_function for proportion of factor levels |
|
2 |
#' @description A simple statistics function which prepares the numbers with percentages |
|
3 |
#' in the required format. The denominator here is from the alternative counts data set |
|
4 |
#' in the given row and column split. |
|
5 |
#' |
|
6 |
#' If a total row is shown, then here just the total number is shown (without 100%). |
|
7 |
#' @param x (`factor`)\cr categorical variable we want to analyze. |
|
8 |
#' @param .alt_df (`data.frame`)\cr alternative data frame used for denominator calculation. |
|
9 |
#' @param use_alt_counts (`flag`)\cr whether the `.alt_df` should be used for the total, i.e. the denominator. |
|
10 |
#' If not, then the number of non-missing values in `x` is used. |
|
11 |
#' @param show_total (`string`)\cr show the total level optionally on the top or in the bottom |
|
12 |
#' of the factor levels. |
|
13 |
#' @param total_label (`string`)\cr which label to use for the optional total level. |
|
14 |
#' @return The [rtables::in_rows()] result with the proportion statistics. |
|
15 |
#' @seealso [s_proportion_logical()] for tabulating logical `x`. |
|
16 |
#' @export |
|
17 |
s_proportion_factor <- function( |
|
18 |
x, |
|
19 |
.alt_df, |
|
20 |
use_alt_counts = TRUE, |
|
21 |
show_total = c("none", "top", "bottom"), |
|
22 |
total_label = "Total") { |
|
23 | 5x |
checkmate::assert_factor(x) |
24 | 5x |
checkmate::assert_flag(use_alt_counts) |
25 | 5x |
show_total <- match.arg(show_total) |
26 | ||
27 | 5x |
N <- if (use_alt_counts) nrow(.alt_df) else sum(!is.na(x)) |
28 | 5x |
tab <- lapply(as.list(table(x)), function(xi) rcell(xi * c(1, 1 / N), format = jjcsformat_count_fraction)) |
29 | 5x |
if (show_total != "none") { |
30 | 3x |
checkmate::assert_string(total_label) |
31 | 3x |
tab_total <- stats::setNames(list(rcell(N, format = "xx")), total_label) |
32 | ||
33 | 3x |
tab <- if (show_total == "top") { |
34 | 1x |
c(tab_total, tab) |
35 |
} else { |
|
36 | 2x |
c(tab, tab_total) |
37 |
} |
|
38 |
} |
|
39 | 5x |
in_rows(.list = tab) |
40 |
} |
|
41 | ||
42 |
#' @title s_function for proportion of `TRUE` in logical vector |
|
43 |
#' @description A simple statistics function which prepares the numbers with percentages |
|
44 |
#' in the required format. The denominator here is from the alternative counts data set |
|
45 |
#' in the given row and column split. |
|
46 |
#' @param x (`logical`)\cr binary variable we want to analyze. |
|
47 |
#' @param label (`string`)\cr label to use. |
|
48 |
#' @param .alt_df (`data.frame`)\cr alternative data frame used for denominator calculation. |
|
49 |
#' @return The [rtables::in_rows()] result with the proportion statistics. |
|
50 |
#' @seealso [s_proportion_factor()] for tabulating factor `x`. |
|
51 |
#' @export |
|
52 |
s_proportion_logical <- function(x, label = "Responders", .alt_df) { |
|
53 | 1x |
n <- sum(x) |
54 | 1x |
N <- nrow(.alt_df) |
55 | 1x |
p_hat <- n / N |
56 | 1x |
list(n_prop = with_label(rcell(c(n, p_hat), format = jjcsformat_count_fraction), label)) |
57 |
} |
|
58 | ||
59 |
#' @title c_function for proportion of `TRUE` in logical vector |
|
60 |
#' @description A simple statistics function which prepares the numbers with percentages |
|
61 |
#' in the required format, for use in a split content row. The denominator here is |
|
62 |
#' from the column N. Note that we don't use here .alt_df because that might not |
|
63 |
#' have required row split variables available. |
|
64 |
#' @param x (`logical`)\cr binary variable we want to analyze. |
|
65 |
#' @param labelstr (`string`)\cr label string. |
|
66 |
#' @param labelstr (`string`)\cr label string. |
|
67 |
#' @param label_fstr (`string`)\cr format string for the label. |
|
68 |
#' @param format (`character` or `list`)\cr format for the statistics. |
|
69 |
#' @param .N_col (`numeric`)\cr number of columns. |
|
70 |
#' @param .N_col (`numeric`)\cr number of columns. |
|
71 |
#' @return The [rtables::in_rows()] result with the proportion statistics. |
|
72 |
#' @seealso [s_proportion_logical()] for the related statistics function. |
|
73 |
#' @export |
|
74 |
c_proportion_logical <- function(x, labelstr, label_fstr, format, .N_col) { |
|
75 | 1x |
checkmate::assert_logical(x) |
76 | 1x |
num <- sum(x) |
77 | 1x |
denom <- .N_col |
78 | 1x |
in_rows(est_prop = c(num, num / denom), .formats = format, .labels = sprintf(label_fstr, labelstr)) |
79 |
} |
|
80 | ||
81 |
#' Helper Function to Create Logical Design Matrix from Factor Variable |
|
82 |
#' |
|
83 |
#' @param df (`data.frame`)\cr including a factor variable with name in `.var`. |
|
84 |
#' @param .var (`string`)\cr name of the factor variable. |
|
85 |
#' |
|
86 |
#' @return The logical matrix with dummy encoding of all factor levels. |
|
87 |
#' @keywords internal |
|
88 |
#' @export |
|
89 |
#' @examples |
|
90 |
#' h_get_design_mat(df = data.frame(a = factor(c("a", "b", "a"))), .var = "a") |
|
91 |
h_get_design_mat <- function(df, .var) { |
|
92 | 5x |
checkmate::assert_data_frame(df) |
93 | 5x |
checkmate::assert_string(.var) |
94 | 5x |
checkmate::assert_factor(df[[.var]]) |
95 | ||
96 | 5x |
model_formula <- stats::as.formula(paste("~ -1 +", .var)) |
97 | 5x |
design_mat <- stats::model.matrix(model_formula, df) |
98 | 5x |
mode(design_mat) <- "logical" |
99 | 5x |
colnames(design_mat) <- levels(df[[.var]]) |
100 | 5x |
design_mat |
101 |
} |
|
102 | ||
103 |
#' Formatted Analysis Function For Proportion Confidence Interval for Logical |
|
104 |
#' |
|
105 |
#' @param x (`logical`)\cr including binary response values. |
|
106 |
#' @param .alt_df (`data.frame`)\cr alternative data frame used for denominator calculation. |
|
107 |
#' @param formats (`list`)\cr including element `prop_ci` with the |
|
108 |
#' required format. Note that the value is in percent already. |
|
109 |
#' @param method (`string`)\cr please see [tern::s_proportion()] for possible |
|
110 |
#' methods. |
|
111 |
#' @param conf_level (`numeric`)\cr confidence level for the confidence interval. |
|
112 |
#' @param conf_level (`numeric`)\cr confidence level for the confidence interval. |
|
113 |
#' |
|
114 |
#' @return The [rtables::rcell()] result. |
|
115 |
#' @export |
|
116 |
#' |
|
117 |
#' @examples |
|
118 |
#' a_proportion_ci_logical( |
|
119 |
#' x = DM$SEX == "F", |
|
120 |
#' .alt_df = DM, |
|
121 |
#' conf_level = 0.95, |
|
122 |
#' formats = list(prop_ci = jjcsformat_xx("xx.xx% - xx.xx%")), |
|
123 |
#' method = "wald" |
|
124 |
#' ) |
|
125 |
a_proportion_ci_logical <- function(x, .alt_df, conf_level, method, formats) { |
|
126 | 5x |
checkmate::assert_logical(x) |
127 | 5x |
checkmate::assert_data_frame(.alt_df) |
128 | 5x |
checkmate::assert_list(formats) |
129 | ||
130 | 5x |
diff_n <- nrow(.alt_df) - length(x) |
131 | 5x |
if (diff_n > 0) { |
132 | ! |
x <- c(x, rep(FALSE, length = diff_n)) |
133 |
} |
|
134 | 5x |
est <- s_proportion(x, conf_level = conf_level, method = method) |
135 | 5x |
rcell(est$prop_ci, format = formats$prop_ci) |
136 |
} |
|
137 | ||
138 |
#' Formatted Analysis Function For Proportion Confidence Interval for Factor |
|
139 |
#' |
|
140 |
#' @param df (`data.frame`)\cr including factor `.var`. |
|
141 |
#' @param .var (`string`)\cr name of the factor variable. |
|
142 |
#' @param .var (`string`)\cr name of the factor variable. |
|
143 |
#' @param \dots see [a_proportion_ci_logical()] for additionally required |
|
144 |
#' arguments. |
|
145 |
#' |
|
146 |
#' @return The [rtables::rcell()] result. |
|
147 |
#' @export |
|
148 |
#' |
|
149 |
#' @examples |
|
150 |
#' a_proportion_ci_factor( |
|
151 |
#' df = DM, |
|
152 |
#' .var = "SEX", |
|
153 |
#' .alt_df = DM, |
|
154 |
#' conf_level = 0.95, |
|
155 |
#' formats = list(prop_ci = jjcsformat_xx("xx.x%, xx.x%")), |
|
156 |
#' method = "clopper-pearson" |
|
157 |
#' ) |
|
158 |
a_proportion_ci_factor <- function(df, .var, ...) { |
|
159 | 1x |
checkmate::assert_factor(df[[.var]]) |
160 | ||
161 | 1x |
design_mat <- h_get_design_mat(df, .var) |
162 | 1x |
design_df <- as.data.frame(design_mat) |
163 | 1x |
res <- lapply(design_df, a_proportion_ci_logical, ...) |
164 | 1x |
in_rows(.list = res, .labels = colnames(design_mat)) |
165 |
} |
|
166 | ||
167 |
#' Split Function for Proportion Analysis Columns (TEFCGIS08 e.g.) |
|
168 |
#' |
|
169 |
#' Here we just split into 3 columns `n`, `%` and `Cum %`. |
|
170 |
#' |
|
171 |
#' @param ret (`list`)\cr return value from the previous split function. |
|
172 |
#' @param spl (`list`)\cr split information. |
|
173 |
#' @param fulldf (`data.frame`)\cr full data frame. |
|
174 |
#' @param .spl_context (`environment`)\cr split context environment. |
|
175 |
#' |
|
176 |
#' @note This split function is used in the proportion table TEFCGIS08 and similar ones. |
|
177 |
#' @seealso [rtables::make_split_fun()] describing the requirements for this kind of |
|
178 |
#' post-processing function. |
|
179 |
prop_post_fun <- function(ret, spl, fulldf, .spl_context) { |
|
180 | 1x |
short_split_result(n = "n", percent = "%", cum_percent = "Cum %", fulldf = fulldf) |
181 |
} |
|
182 | ||
183 |
#' @rdname prop_post_fun |
|
184 |
#' |
|
185 |
#' @param df A data frame that contains all analysis variables. |
|
186 |
#' @param vals A character vector that contains values to use for the split. |
|
187 |
#' @param labels A character vector that contains labels for the statistics (without indent). |
|
188 |
#' @param trim A single logical that indicates whether to trim the values. |
|
189 |
#' @return a split function for use in [rtables::split_rows_by]. |
|
190 |
#' @export |
|
191 |
prop_split_fun <- make_split_fun(post = list(prop_post_fun)) |
|
192 | ||
193 |
#' Formatted Analysis Function for Proportion Analysis (TEFCGIS08 e.g.) |
|
194 |
#' |
|
195 |
#' This function applies to a factor `x` when a column split was prepared with |
|
196 |
#' [prop_split_fun()] before. |
|
197 |
#' |
|
198 |
#' @details In the column named `n`, the counts of the categories as well as an |
|
199 |
#' optional `Total` count will be shown. In the column named `percent`, the |
|
200 |
#' percentages of the categories will be shown, with an optional blank entry for |
|
201 |
#' `Total`. In the column named `cum_percent`, the cumulative percentages will |
|
202 |
#' be shown instead. |
|
203 |
#' |
|
204 |
#' @param x (`factor`)\cr factor variable to analyze. |
|
205 |
#' @param .spl_context (`environment`)\cr split context environment. |
|
206 |
#' @param formats (`list`)\cr formats for the statistics. |
|
207 |
#' @param add_total_level (`flag`)\cr whether to add a total level. |
|
208 |
#' @param x (`factor`)\cr factor variable to analyze. |
|
209 |
#' @param .spl_context (`environment`)\cr split context environment. |
|
210 |
#' @param formats (`list`)\cr formats for the statistics. |
|
211 |
#' @param add_total_level (`flag`)\cr whether to add a total level. |
|
212 |
#' |
|
213 |
#' @return A `VerticalRowsSection` as returned by [rtables::in_rows]. |
|
214 |
#' @export |
|
215 |
prop_table_afun <- function(x, .spl_context, formats, add_total_level = FALSE) { |
|
216 | 8x |
checkmate::assert_list(formats, len = 3, names = "unique") |
217 | 8x |
checkmate::assert_names(names(formats), permutation.of = c("n", "percent", "cum_percent")) |
218 | 8x |
checkmate::assert_flag(add_total_level) |
219 | ||
220 | 8x |
stat <- utils::tail(.spl_context[nrow(.spl_context), "cur_col_split_val"][[1]], 1) |
221 | 8x |
optional_total <- function(x) if (add_total_level) x else NULL |
222 | 8x |
ns <- table(x) |
223 | 8x |
res <- if (stat == "n") { |
224 | 2x |
c(as.list(ns), optional_total(length(x))) |
225 | 8x |
} else if (stat == "percent") { |
226 | 2x |
c(as.list(ns / length(x) * 100), optional_total(list(NULL))) |
227 | 8x |
} else if (stat == "cum_percent") { |
228 | 2x |
c(as.list(cumsum(ns / length(x) * 100)), optional_total(list(NULL))) |
229 |
} else { |
|
230 | 2x |
stop("unexpected proportion statistic in split") |
231 |
} |
|
232 | 6x |
in_rows(.list = res, .labels = c(levels(x), optional_total("Total")), .formats = formats[[stat]]) |
233 |
} |
1 |
#' Split Function Factory for the Response Tables (RESP01) |
|
2 |
#' |
|
3 |
#' The main purpose here is to have a column dependent split into either comparative |
|
4 |
#' statistic (relative risk or odds ratio with p-value) in the 'Overall' column, |
|
5 |
#' and count proportions and corresponding confidence intervals in the other treatment |
|
6 |
#' arm columns. |
|
7 |
#' |
|
8 |
#' @inheritParams proposal_argument_convention |
|
9 |
#' @param method (`string`)\cr which method to use for the comparative statistics. |
|
10 |
#' |
|
11 |
#' @return A split function for use in the response table RESP01 and similar ones. |
|
12 |
#' @seealso [rtables::make_split_fun()] describing the requirements for this kind of |
|
13 |
#' post-processing function. |
|
14 |
#' @export |
|
15 |
#' |
|
16 |
#' @examples |
|
17 |
#' split_fun <- resp01_split_fun_fct( |
|
18 |
#' method = "or_cmh", |
|
19 |
#' conf_level = 0.95 |
|
20 |
#' ) |
|
21 |
resp01_split_fun_fct <- function(method = c("rr", "or_logistic", "or_cmh"), conf_level) { |
|
22 | 2x |
method <- match.arg(method) |
23 | 2x |
(assert_proportion_value)(conf_level) |
24 | ||
25 | 2x |
post_fun <- function(ret, spl, fulldf, .spl_context) { |
26 | 8x |
all_expr <- expression(TRUE) |
27 |
# Here check if we are in 'Overall' column or not. |
|
28 | 8x |
this_col <- .spl_context[nrow(.spl_context), "value"][[1]] |
29 | 8x |
in_overall <- this_col == "Overall" |
30 |
# Accordingly, return left and right column labels and value identifiers. |
|
31 | 8x |
if (in_overall) { |
32 |
# In the Overall column, we want the comparison statistic and corresponding p-value. |
|
33 | 2x |
comp_stat_name <- if (method == "rr") "Relative Risk" else "Odds Ratio" |
34 | 2x |
short_split_result( |
35 | 2x |
comp_stat_ci = paste0(comp_stat_name, " (", f_conf_level(conf_level), ")~[super a]"), |
36 | 2x |
pval = "p-value~[super b]", |
37 | 2x |
fulldf = fulldf |
38 |
) |
|
39 |
} else { |
|
40 |
# In the treatment arm columns, we want counts and CI for proportions. |
|
41 | 6x |
short_split_result( |
42 | 6x |
count_prop = "n (%)", |
43 | 6x |
prop_ci = paste0(f_conf_level(conf_level), " for %"), |
44 | 6x |
fulldf = fulldf |
45 |
) |
|
46 |
} |
|
47 |
} |
|
48 | 2x |
make_split_fun(post = list(post_fun)) |
49 |
} |
|
50 | ||
51 |
#' Content Row Function for Counts of Subgroups in Response Tables (RESP01) |
|
52 |
#' |
|
53 |
#' @inheritParams proposal_argument_convention |
|
54 |
#' @param .alt_df (`data.frame`)\cr alternative data frame used for denominator calculation. |
|
55 |
#' @param label_fstr (`string`)\cr format string for the label. |
|
56 |
#' |
|
57 |
#' @return The correct [rtables::in_rows()] result. |
|
58 |
#' @export |
|
59 |
#' |
|
60 |
#' @examples |
|
61 |
#' fake_spl_context <- data.frame( |
|
62 |
#' cur_col_split_val = I(list(c(ARM = "A: Drug X", count_prop = "count_prop"))) |
|
63 |
#' ) |
|
64 |
#' resp01_counts_cfun( |
|
65 |
#' df = DM, |
|
66 |
#' labelstr = "Blue", |
|
67 |
#' .spl_context = fake_spl_context, |
|
68 |
#' .alt_df = DM, |
|
69 |
#' label_fstr = "Color: %s" |
|
70 |
#' ) |
|
71 |
resp01_counts_cfun <- function(df, labelstr, .spl_context, .alt_df, label_fstr) { |
|
72 | 3x |
this_col_split <- .spl_context[nrow(.spl_context), "cur_col_split_val"][[1]] |
73 | 3x |
if (this_col_split[1] != "Overall" && this_col_split[2] == "count_prop") { |
74 | 1x |
in_rows(nrow(.alt_df), .formats = "xx", .labels = sprintf(label_fstr, labelstr)) |
75 |
} else { |
|
76 | 2x |
NULL |
77 |
} |
|
78 |
} |
|
79 | ||
80 |
#' Formatted Analysis Function for Comparative Statistic in Response Tables (RESP01) |
|
81 |
#' |
|
82 |
#' This function applies to a `logical` column called `.var` from `df`. |
|
83 |
#' The response proportion is compared between the treatment arms identified |
|
84 |
#' by column `arm`. |
|
85 |
#' |
|
86 |
#' @inheritParams proposal_argument_convention |
|
87 |
#' @param include (`flag`)\cr whether to include the results for this variable. |
|
88 |
#' @param arm (`string`)\cr column name in the data frame that identifies the treatment arms. |
|
89 |
#' @param formats (`list`)\cr containing formats for `comp_stat_ci` and `pval`. |
|
90 |
#' @param methods (`list`)\cr containing methods for comparative statistics. The element `comp_stat_ci` can be |
|
91 |
#' 'rr' (relative risk), 'or_cmh' (odds ratio with CMH estimation and p-value) or 'or_logistic' (odds ratio |
|
92 |
#' estimated by conditional or standard logistic regression). The element `pval` can be 'fisher' (Fisher's |
|
93 |
#' exact test) or 'chisq' (chi-square test), only used when using unstratified analyses with 'or_logistic'. |
|
94 |
#' @param stat (`string`)\cr the statistic to return, either `comp_stat_ci` |
|
95 |
#' or `pval`. |
|
96 |
#' |
|
97 |
#' @return The formatted result as [rtables::rcell()]. |
|
98 |
#' @seealso [resp01_a_comp_stat_factor()] for the `factor` equivalent. |
|
99 |
#' @export |
|
100 |
#' |
|
101 |
#' @examples |
|
102 |
#' dm <- droplevels(subset(formatters::DM, SEX %in% c("F", "M"))) |
|
103 |
#' dm$RESP <- as.logical(sample(c(TRUE, FALSE), size = nrow(DM), replace = TRUE)) |
|
104 |
#' |
|
105 |
#' resp01_a_comp_stat_logical( |
|
106 |
#' dm, |
|
107 |
#' .var = "RESP", |
|
108 |
#' conf_level = 0.9, |
|
109 |
#' include = TRUE, |
|
110 |
#' arm = "SEX", |
|
111 |
#' strata = "RACE", |
|
112 |
#' stat = "comp_stat_ci", |
|
113 |
#' method = list(comp_stat_ci = "or_cmh"), |
|
114 |
#' formats = list( |
|
115 |
#' comp_stat_ci = jjcsformat_xx("xx.xx (xx.xx - xx.xx)"), |
|
116 |
#' pval = jjcsformat_pval_fct(0.05) |
|
117 |
#' ) |
|
118 |
#' ) |
|
119 |
resp01_a_comp_stat_logical <- function(df, |
|
120 |
.var, |
|
121 |
conf_level, |
|
122 |
include, |
|
123 |
arm, |
|
124 |
strata, |
|
125 |
formats, |
|
126 |
methods, |
|
127 |
stat = c("comp_stat_ci", "pval")) { |
|
128 | 30x |
checkmate::assert_logical(df[[.var]]) |
129 | 30x |
checkmate::assert_factor(df[[arm]], n.levels = 2L) |
130 | 30x |
checkmate::assert_flag(include) |
131 | 30x |
stat <- match.arg(stat) |
132 | ||
133 | 30x |
if (include) { |
134 | 9x |
checkmate::assert_list(methods) |
135 | 9x |
checkmate::assert_subset(methods$comp_stat_ci, c("rr", "or_cmh", "or_logistic")) |
136 | ||
137 | 9x |
df_by <- split(df, f = df[[arm]]) |
138 | 9x |
this_df <- df_by[[1]] |
139 | 9x |
this_ref_group <- df_by[[2]] |
140 | ||
141 | 9x |
this_res <- if (methods$comp_stat_ci == "rr") { |
142 |
# Stratified CMH test for point estimate and p-value, and Wald statistic for the confidence interval |
|
143 | ! |
s_res <- s_relative_risk( |
144 | ! |
this_df, |
145 | ! |
.var = .var, |
146 | ! |
.ref_group = this_ref_group, |
147 | ! |
.in_ref_col = FALSE, |
148 | ! |
variables = list(strata = strata), |
149 | ! |
conf_level = conf_level |
150 |
) |
|
151 | ! |
list(comp_stat_ci = s_res$rel_risk_ci, pval = s_res$pval) |
152 | 9x |
} else if (methods$comp_stat_ci == "or_logistic") { |
153 |
# Odds Ratio Variant 1: (Conditional/Standard) Logistic regression for odds ratio point estimate and |
|
154 |
# confidence interval. |
|
155 | 3x |
s_res <- s_odds_ratio_j( |
156 | 3x |
this_df, |
157 | 3x |
.var = .var, |
158 | 3x |
.ref_group = this_ref_group, |
159 | 3x |
.in_ref_col = FALSE, |
160 | 3x |
.df_row = rbind(this_df, this_ref_group), |
161 | 3x |
variables = list(arm = arm, strata = strata), |
162 | 3x |
conf_level = conf_level, |
163 | 3x |
method = "exact" # Only used if strata are present. |
164 |
) |
|
165 | 3x |
pval <- if (is.null(strata)) { |
166 | 3x |
checkmate::assert_subset(methods$pval, c("fisher", "chisq")) |
167 | ||
168 |
# If not stratified, then the p-value can come either from Fisher's exact or chi-square test. |
|
169 | 3x |
p_res <- s_test_proportion_diff( |
170 | 3x |
this_df, |
171 | 3x |
.var = .var, |
172 | 3x |
.ref_group = this_ref_group, |
173 | 3x |
.in_ref_col = FALSE, |
174 | 3x |
method = methods$pval |
175 |
) |
|
176 | 3x |
p_res$pval |
177 |
} else { |
|
178 | ! |
s_res$pval |
179 |
} |
|
180 | 3x |
list(comp_stat_ci = s_res$or_ci, pval = pval) |
181 | 9x |
} else if (methods$comp_stat_ci == "or_cmh") { |
182 |
# Odds Ratio Variant 2: Stratified CMH test for odds ratio point estimate and confidence interval and |
|
183 |
# corresponding CMH test p-value. |
|
184 | 6x |
s_res <- s_odds_ratio_j( |
185 | 6x |
this_df, |
186 | 6x |
.var = .var, |
187 | 6x |
.ref_group = this_ref_group, |
188 | 6x |
.in_ref_col = FALSE, |
189 | 6x |
.df_row = rbind(this_df, this_ref_group), |
190 | 6x |
variables = list(arm = arm, strata = strata), |
191 | 6x |
conf_level = conf_level, |
192 | 6x |
method = "cmh" |
193 |
) |
|
194 | 6x |
list(comp_stat_ci = s_res$or_ci, pval = s_res$pval) |
195 |
} |
|
196 | 9x |
rcell(this_res[[stat]], format = formats[[stat]]) |
197 |
} else { |
|
198 | 21x |
NULL |
199 |
} |
|
200 |
} |
|
201 | ||
202 |
#' Formatted Analysis Function for Comparative Statistic in Response Tables (RESP01) |
|
203 |
#' |
|
204 |
#' This function applies to a `factor` column called `.var` from `df`. |
|
205 |
#' |
|
206 |
#' @inheritParams proposal_argument_convention |
|
207 |
#' @param include (`character`)\cr for which factor levels to include the comparison |
|
208 |
#' statistic results. |
|
209 |
#' @param \dots see [resp01_a_comp_stat_logical()] for additional required arguments. |
|
210 |
#' |
|
211 |
#' @return The formatted result as [rtables::rcell()]. |
|
212 |
#' @export |
|
213 |
#' |
|
214 |
#' @examples |
|
215 |
#' dm <- droplevels(subset(formatters::DM, SEX %in% c("F", "M"))) |
|
216 |
#' |
|
217 |
#' resp01_a_comp_stat_factor( |
|
218 |
#' dm, |
|
219 |
#' .var = "COUNTRY", |
|
220 |
#' conf_level = 0.9, |
|
221 |
#' include = c("USA", "CHN"), |
|
222 |
#' arm = "SEX", |
|
223 |
#' strata = "RACE", |
|
224 |
#' stat = "comp_stat_ci", |
|
225 |
#' method = list(comp_stat_ci = "or_cmh"), |
|
226 |
#' formats = list( |
|
227 |
#' comp_stat_ci = jjcsformat_xx("xx.xx (xx.xx - xx.xx)"), |
|
228 |
#' pval = jjcsformat_pval_fct(0.05) |
|
229 |
#' ) |
|
230 |
#' ) |
|
231 |
resp01_a_comp_stat_factor <- function(df, |
|
232 |
.var, |
|
233 |
include, |
|
234 |
...) { |
|
235 | 3x |
checkmate::assert_factor(df[[.var]]) |
236 | 3x |
checkmate::assert_character(include, null.ok = TRUE) |
237 | 3x |
checkmate::assert_subset(x = include, choices = levels(df[[.var]])) |
238 | ||
239 | 3x |
design_df <- as.data.frame(h_get_design_mat(df, .var)) |
240 | ||
241 | 3x |
res <- sapply(X = levels(df[[.var]]), simplify = FALSE, FUN = function(lvl) { |
242 | 27x |
df[[.var]] <- design_df[[lvl]] |
243 | 27x |
resp01_a_comp_stat_logical(df = df, .var = .var, include = lvl %in% include, ...) |
244 |
}) |
|
245 | 3x |
in_rows(.list = res, .labels = levels(df[[.var]])) |
246 |
} |
|
247 | ||
248 |
#' Formatted Analysis and Content Summary Function for Response Tables (RESP01) |
|
249 |
#' |
|
250 |
#' This function applies to both `factor` and `logical` columns called |
|
251 |
#' `.var` from `df`. Depending on the position in the split, it returns the |
|
252 |
#' right formatted results for the RESP01 and related layouts. |
|
253 |
#' |
|
254 |
#' @inheritParams proposal_argument_convention |
|
255 |
#' @inheritParams resp01_a_comp_stat_logical |
|
256 |
#' @param include_comp (`character` or `flag`)\cr whether to include comparative |
|
257 |
#' statistic results, either `character` for factors or `flag` for logicals. |
|
258 |
#' @param .alt_df (`data.frame`)\cr alternative data frame used for denominator calculation. |
|
259 |
#' @param arm (`string`)\cr column name in the data frame that identifies the treatment arms. |
|
260 |
#' @param label (`string`)\cr only for logicals, which label to use. (For factors, the |
|
261 |
#' labels are the factor levels.) |
|
262 |
#' @param formats (`list`)\cr containing formats for `prop_ci`, `comp_stat_ci` |
|
263 |
#' and `pval`. |
|
264 |
#' @param methods (`list`)\cr containing methods for comparative statistics. The element `comp_stat_ci` can be |
|
265 |
#' 'rr' (relative risk), 'or_cmh' (odds ratio with CMH estimation and p-value) or 'or_logistic' (odds ratio |
|
266 |
#' estimated by conditional or standard logistic regression). The element `pval` can be 'fisher' (Fisher's |
|
267 |
#' exact test) or 'chisq' (chi-square test), only used when using unstratified analyses with 'or_logistic'. |
|
268 |
#' The element `prop_ci` specifies the method for proportion confidence interval calculation. |
|
269 |
#' |
|
270 |
#' @return The formatted result as [rtables::in_rows()] result. |
|
271 |
#' @export |
|
272 |
#' |
|
273 |
#' @examples |
|
274 |
#' fake_spl_context <- data.frame( |
|
275 |
#' cur_col_split_val = I(list(c(ARM = "A: Drug X", count_prop = "count_prop"))) |
|
276 |
#' ) |
|
277 |
#' dm <- droplevels(subset(DM, SEX %in% c("F", "M"))) |
|
278 |
#' resp01_acfun( |
|
279 |
#' dm, |
|
280 |
#' .alt_df = dm, |
|
281 |
#' .var = "COUNTRY", |
|
282 |
#' .spl_context = fake_spl_context, |
|
283 |
#' conf_level = 0.9, |
|
284 |
#' include_comp = c("USA", "CHN"), |
|
285 |
#' arm = "SEX", |
|
286 |
#' strata = "RACE", |
|
287 |
#' methods = list( |
|
288 |
#' comp_stat_ci = "or_cmh", |
|
289 |
#' pval = "", |
|
290 |
#' prop_ci = "wald" |
|
291 |
#' ), |
|
292 |
#' formats = list( |
|
293 |
#' prop_ci = jjcsformat_xx("xx.% - xx.%"), |
|
294 |
#' comp_stat_ci = jjcsformat_xx("xx.xx (xx.xx - xx.xx)"), |
|
295 |
#' pval = jjcsformat_pval_fct(0.05) |
|
296 |
#' ) |
|
297 |
#' ) |
|
298 |
#' fake_spl_context2 <- data.frame( |
|
299 |
#' cur_col_split_val = I(list(c(ARM = "Overall", comp_stat_ci = "comp_stat_ci"))) |
|
300 |
#' ) |
|
301 |
#' resp01_acfun( |
|
302 |
#' dm, |
|
303 |
#' .alt_df = dm, |
|
304 |
#' .var = "COUNTRY", |
|
305 |
#' .spl_context = fake_spl_context2, |
|
306 |
#' conf_level = 0.9, |
|
307 |
#' include_comp = c("USA", "CHN"), |
|
308 |
#' arm = "SEX", |
|
309 |
#' strata = "RACE", |
|
310 |
#' methods = list( |
|
311 |
#' comp_stat_ci = "or_cmh", |
|
312 |
#' pval = "", |
|
313 |
#' prop_ci = "wald" |
|
314 |
#' ), |
|
315 |
#' formats = list( |
|
316 |
#' prop_ci = jjcsformat_xx("xx.% - xx.%"), |
|
317 |
#' comp_stat_ci = jjcsformat_xx("xx.xx (xx.xx - xx.xx)"), |
|
318 |
#' pval = jjcsformat_pval_fct(0.05) |
|
319 |
#' ) |
|
320 |
#' ) |
|
321 |
resp01_acfun <- function(df, |
|
322 |
labelstr = NULL, |
|
323 |
label = NULL, |
|
324 |
.var, |
|
325 |
.spl_context, |
|
326 |
include_comp, |
|
327 |
.alt_df, |
|
328 |
conf_level, |
|
329 |
arm, |
|
330 |
strata, |
|
331 |
formats, |
|
332 |
methods) { |
|
333 | 3x |
this_col_split <- .spl_context[nrow(.spl_context), "cur_col_split_val"][[1]] |
334 | 3x |
x <- df[[.var]] |
335 | 3x |
is_factor <- is.factor(x) |
336 | 3x |
res <- if (this_col_split[1] != "Overall") { |
337 | 1x |
if (this_col_split[2] == "count_prop") { |
338 | 1x |
if (is_factor) { |
339 | 1x |
s_proportion_factor(x, .alt_df = .alt_df) |
340 |
} else { |
|
341 | ! |
s_proportion_logical(x, .alt_df = .alt_df)$n_prop |
342 |
} |
|
343 |
} else { |
|
344 | ! |
checkmate::assert_true(this_col_split[2] == "prop_ci") |
345 | ! |
if (is_factor) { |
346 | ! |
a_proportion_ci_factor( |
347 | ! |
df, |
348 | ! |
.var = .var, |
349 | ! |
.alt_df = .alt_df, |
350 | ! |
conf_level = conf_level, |
351 | ! |
formats = formats, |
352 | ! |
method = methods$prop_ci |
353 |
) |
|
354 |
} else { |
|
355 | ! |
a_proportion_ci_logical( |
356 | ! |
x, |
357 | ! |
.alt_df = .alt_df, |
358 | ! |
conf_level = conf_level, |
359 | ! |
formats = formats, |
360 | ! |
method = methods$prop_ci |
361 |
) |
|
362 |
} |
|
363 |
} |
|
364 |
} else { |
|
365 | 2x |
checkmate::assert_true(this_col_split[1] == "Overall") |
366 | 2x |
if (is_factor) { |
367 | 1x |
resp01_a_comp_stat_factor( |
368 | 1x |
df, |
369 | 1x |
.var = .var, |
370 | 1x |
conf_level = conf_level, |
371 | 1x |
include = include_comp, |
372 | 1x |
arm = arm, |
373 | 1x |
strata = strata, |
374 | 1x |
stat = this_col_split[2], |
375 | 1x |
formats = formats, |
376 | 1x |
methods = methods |
377 |
) |
|
378 |
} else { |
|
379 | 1x |
resp01_a_comp_stat_logical( |
380 | 1x |
df, |
381 | 1x |
.var = .var, |
382 | 1x |
conf_level = conf_level, |
383 | 1x |
include = include_comp, |
384 | 1x |
arm = arm, |
385 | 1x |
strata = strata, |
386 | 1x |
stat = this_col_split[2], |
387 | 1x |
formats = formats, |
388 | 1x |
methods = methods |
389 |
) |
|
390 |
} |
|
391 |
} |
|
392 | 3x |
if (is_factor) { |
393 | 2x |
res |
394 |
} else { |
|
395 | 1x |
if (!is.null(labelstr)) { |
396 | 1x |
label <- labelstr |
397 |
} |
|
398 | 1x |
in_rows(res, .labels = label) |
399 |
} |
|
400 |
} |
1 |
#' Tabulation of Least Square Means Results |
|
2 |
#' |
|
3 |
#' @description `r lifecycle::badge('stable')` |
|
4 |
#' |
|
5 |
#' These functions can be used to produce tables from LS means, e.g. from [fit_mmrm_j()] |
|
6 |
#' or [fit_ancova()]. |
|
7 |
#' |
|
8 |
#' @note These functions have been forked from the `tern.mmrm` package. Additional features |
|
9 |
#' are: |
|
10 |
#' |
|
11 |
#' * Additional `ref_path` argument for tern.mmrm::summarize_lsmeans(). |
|
12 |
#' * The function is more general in that it also works for LS means results from ANCOVA |
|
13 |
#' * Additional statistic `diff_mean_est_ci` is returned |
|
14 |
#' * P-value sidedness can be chosen |
|
15 |
#' |
|
16 |
#' @name tabulate_lsmeans |
|
17 |
#' @return for `s_lsmeans`, a list containing the same statistics returned by tern.mmrm::s_mmrm_lsmeans, |
|
18 |
#' with the additional `diff_mean_est_ci` three-dimensional statistic. For `a_lsmeans`, |
|
19 |
#' a `VertalRowsSection` as returned by [rtables::in_rows]. |
|
20 |
#' @examples |
|
21 |
#' result <- fit_mmrm_j( |
|
22 |
#' vars = list( |
|
23 |
#' response = "FEV1", |
|
24 |
#' covariates = c("RACE", "SEX"), |
|
25 |
#' id = "USUBJID", |
|
26 |
#' arm = "ARMCD", |
|
27 |
#' visit = "AVISIT" |
|
28 |
#' ), |
|
29 |
#' data = mmrm::fev_data, |
|
30 |
#' cor_struct = "unstructured", |
|
31 |
#' weights_emmeans = "equal" |
|
32 |
#' ) |
|
33 |
#' |
|
34 |
#' df <- broom::tidy(result) |
|
35 |
#' |
|
36 |
#' s_lsmeans(df[8, ], .in_ref_col = FALSE) |
|
37 |
#' s_lsmeans(df[8, ], .in_ref_col = FALSE, alternative = "greater", show_relative = "increase") |
|
38 |
#' |
|
39 |
#' dat_adsl <- mmrm::fev_data |> |
|
40 |
#' dplyr::select(USUBJID, ARMCD) |> |
|
41 |
#' unique() |
|
42 |
#' |
|
43 |
#' basic_table() |> |
|
44 |
#' split_cols_by("ARMCD") |> |
|
45 |
#' add_colcounts() |> |
|
46 |
#' split_rows_by("AVISIT") |> |
|
47 |
#' analyze( |
|
48 |
#' "AVISIT", |
|
49 |
#' afun = a_lsmeans, |
|
50 |
#' show_labels = "hidden", |
|
51 |
#' na_str = tern::default_na_str(), |
|
52 |
#' extra_args = list( |
|
53 |
#' .stats = c( |
|
54 |
#' "n", |
|
55 |
#' "adj_mean_se", |
|
56 |
#' "adj_mean_ci", |
|
57 |
#' "diff_mean_se", |
|
58 |
#' "diff_mean_ci" |
|
59 |
#' ), |
|
60 |
#' .labels = c( |
|
61 |
#' adj_mean_se = "Adj. LS Mean (Std. Error)", |
|
62 |
#' adj_mean_ci = "95% CI", |
|
63 |
#' diff_mean_ci = "95% CI" |
|
64 |
#' ), |
|
65 |
#' .formats = c(adj_mean_se = jjcsformat_xx("xx.x (xx.xx)")), |
|
66 |
#' alternative = "greater", |
|
67 |
#' ref_path = c("ARMCD", result$ref_level) |
|
68 |
#' ) |
|
69 |
#' ) |> |
|
70 |
#' build_table( |
|
71 |
#' df = broom::tidy(result), |
|
72 |
#' alt_counts_df = dat_adsl |
|
73 |
#' ) |
|
74 |
NULL |
|
75 | ||
76 |
#' @describeIn tabulate_lsmeans Helper method (for [broom::tidy()]) to prepare a `data.frame` from an |
|
77 |
#' `tern_model` object containing the least-squares means and contrasts. |
|
78 |
#' @method tidy tern_model |
|
79 |
#' @importFrom generics tidy |
|
80 |
#' @export |
|
81 |
tidy.tern_model <- function(x, ...) { |
|
82 |
# nolint |
|
83 | 57x |
vars <- x$vars |
84 | 57x |
estimates <- x$lsmeans$estimates |
85 | 57x |
df <- if (is.null(vars$arm)) { |
86 | ! |
nams <- names(estimates) |
87 | ! |
to_rename <- match(c("estimate", "se", "df", "lower_cl", "upper_cl"), nams) |
88 | ! |
names(estimates)[to_rename] <- paste0(names(estimates)[to_rename], "_est") |
89 | ! |
estimates |
90 |
} else { |
|
91 | 57x |
contrasts <- x$lsmeans$contrasts |
92 | 57x |
merge(x = estimates, y = contrasts, by = c(vars$visit, vars$arm), suffixes = c("_est", "_contr"), all = TRUE) |
93 |
} |
|
94 | 57x |
df[[vars$arm]] <- factor(df[[vars$arm]], levels = levels(estimates[[vars$arm]])) |
95 | 57x |
df[[vars$visit]] <- factor(df[[vars$visit]], levels = levels(estimates[[vars$visit]])) |
96 | 57x |
df$conf_level <- x$conf_level |
97 | 57x |
if (!is.null(x$mse)) { |
98 | 57x |
df$mse <- x$mse[df[[vars$visit]]] |
99 |
} |
|
100 | 57x |
if (!is.null(x$df)) { |
101 | 57x |
df$df <- x$df[df[[vars$visit]]] |
102 |
} |
|
103 | 57x |
df |
104 |
} |
|
105 | ||
106 |
#' @describeIn tabulate_lsmeans Statistics function which is extracting estimates from a tidied least-squares means |
|
107 |
#' data frame. |
|
108 |
#' @inheritParams proposal_argument_convention |
|
109 |
#' @export |
|
110 |
s_lsmeans <- function(df, |
|
111 |
.in_ref_col, |
|
112 |
alternative = c("two.sided", "less", "greater"), |
|
113 |
show_relative = c("reduction", "increase")) { |
|
114 | 85x |
alternative <- match.arg(alternative) |
115 | 85x |
show_relative <- match.arg(show_relative) |
116 | 85x |
if_not_ref <- function(x) if (.in_ref_col) character() else x |
117 | 85x |
list( |
118 | 85x |
n = df$n, |
119 | 85x |
adj_mean_se = c(df$estimate_est, df$se_est), |
120 | 85x |
adj_mean_ci = with_label( |
121 | 85x |
c(df$lower_cl_est, df$upper_cl_est), |
122 | 85x |
paste0("Adjusted Mean ", f_conf_level(df$conf_level)) |
123 |
), |
|
124 | 85x |
adj_mean_est_ci = with_label( |
125 | 85x |
c(df$estimate_est, df$lower_cl_est, df$upper_cl_est), |
126 | 85x |
paste0("Adjusted Mean (", f_conf_level(df$conf_level), ")") |
127 |
), |
|
128 | 85x |
diff_mean_se = if_not_ref(c(df$estimate_contr, df$se_contr)), |
129 | 85x |
diff_mean_ci = with_label( |
130 | 85x |
if_not_ref(c(df$lower_cl_contr, df$upper_cl_contr)), |
131 | 85x |
paste0("Difference in Adjusted Means ", f_conf_level(df$conf_level)) |
132 |
), |
|
133 | 85x |
diff_mean_est_ci = with_label( |
134 | 85x |
if_not_ref(c(df$estimate_contr, df$lower_cl_contr, df$upper_cl_contr)), |
135 | 85x |
paste0("Difference in Adjusted Means (", f_conf_level(df$conf_level), ")") |
136 |
), |
|
137 | 85x |
change = switch(show_relative, |
138 | 85x |
reduction = with_label(if_not_ref(df$relative_reduc), "Relative Reduction (%)"), |
139 | 85x |
increase = with_label(if_not_ref(-df$relative_reduc), "Relative Increase (%)") |
140 |
), |
|
141 | 85x |
p_value = if_not_ref(switch(alternative, |
142 | 85x |
two.sided = with_label(df$p_value, "2-sided p-value"), |
143 | 85x |
less = with_label(df$p_value_less, "1-sided p-value (less)"), |
144 | 85x |
greater = with_label(df$p_value_greater, "1-sided p-value (greater)") |
145 |
)) |
|
146 |
) |
|
147 |
} |
|
148 | ||
149 |
#' @describeIn tabulate_lsmeans Formatted Analysis function to be used as `afun` |
|
150 |
#' @export |
|
151 |
a_lsmeans <- function(df, |
|
152 |
ref_path, |
|
153 |
.spl_context, |
|
154 |
..., |
|
155 |
.stats = NULL, |
|
156 |
.formats = NULL, |
|
157 |
.labels = NULL, |
|
158 |
.indent_mods = NULL) { |
|
159 |
# Check for additional parameters to the statistics function |
|
160 | 30x |
dots_extra_args <- list(...) |
161 | ||
162 |
# Only support default stats, not custom stats |
|
163 | 30x |
.stats <- .split_std_from_custom_stats(.stats)$default_stats |
164 | ||
165 |
# Obtain reference column information |
|
166 | 30x |
ref <- get_ref_info(ref_path, .spl_context) |
167 | ||
168 |
# Apply statistics function |
|
169 | 30x |
x_stats <- .apply_stat_functions( |
170 | 30x |
default_stat_fnc = s_lsmeans, |
171 | 30x |
custom_stat_fnc_list = NULL, |
172 | 30x |
args_list = c(df = list(df), .in_ref_col = ref$in_ref_col, dots_extra_args) |
173 |
) |
|
174 | ||
175 |
# Format according to specifications |
|
176 | 30x |
format_stats( |
177 | 30x |
x_stats, |
178 | 30x |
method_groups = "tabulate_lsmeans", |
179 | 30x |
stats_in = .stats, |
180 | 30x |
formats_in = .formats, |
181 | 30x |
labels_in = .labels, |
182 | 30x |
indents_in = .indent_mods |
183 |
) |
|
184 |
} |
1 |
#' Analysis and Content Summary Function Producing Blank Line |
|
2 |
#' |
|
3 |
#' @inheritParams proposal_argument_convention |
|
4 |
#' |
|
5 |
#' @keywords internal |
|
6 |
ac_blank_line <- function(df, labelstr = "") { |
|
7 | 36x |
in_rows(.list = NA_real_, .labels = labelstr, .formats = "xx", .format_na_strs = "") |
8 |
} |
|
9 | ||
10 |
#' Insertion of Blank Lines in a Layout |
|
11 |
#' |
|
12 |
#' @inheritParams proposal_argument_convention |
|
13 |
#' |
|
14 |
#' @description This is a hack for `rtables` in order to be able to add row gaps, |
|
15 |
#' i.e. blank lines. |
|
16 |
#' In particular, by default this function needs to maintain a global state for avoiding |
|
17 |
#' duplicate table names. The global state variable is hidden by using |
|
18 |
#' a dot in front of its name. However, this likely won't work with parallelisation across |
|
19 |
#' multiple threads and also causes non-reproducibility of the resulting `rtables` |
|
20 |
#' object. Therefore also a custom table name can be used. |
|
21 |
#' |
|
22 |
#' @return The modified layout now including a blank line after the current |
|
23 |
#' row content. |
|
24 |
#' @export |
|
25 |
#' |
|
26 |
#' @examples |
|
27 |
#' ADSL <- ex_adsl |
|
28 |
#' |
|
29 |
#' lyt <- basic_table() |> |
|
30 |
#' split_cols_by("ARM") |> |
|
31 |
#' split_rows_by("STRATA1") |> |
|
32 |
#' analyze(vars = "AGE", afun = function(x) { |
|
33 |
#' in_rows( |
|
34 |
#' "Mean (sd)" = rcell(c(mean(x), sd(x)), format = "xx.xx (xx.xx)") |
|
35 |
#' ) |
|
36 |
#' }) |> |
|
37 |
#' insert_blank_line() |> |
|
38 |
#' analyze(vars = "AGE", table_names = "AGE_Range", afun = function(x) { |
|
39 |
#' in_rows( |
|
40 |
#' "Range" = rcell(range(x), format = "xx.xx - xx.xx") |
|
41 |
#' ) |
|
42 |
#' }) |
|
43 |
#' build_table(lyt, ADSL) |
|
44 |
insert_blank_line <- function(lyt, table_names = NULL) { |
|
45 | 4x |
varnames_rows <- vars_in_layout(lyt@row_layout) |
46 | 4x |
checkmate::assert_character(varnames_rows, min.len = 1L) |
47 | 4x |
last_varname_rows <- utils::tail(varnames_rows, 1L) |
48 | ||
49 | 4x |
this_table_name <- if (is.null(table_names)) { |
50 | 2x |
default_table_name <- paste0(".post_", last_varname_rows, "_blank") |
51 | ||
52 |
# A named list for tracking table names and counts |
|
53 | 2x |
table_count <- getOption("junco.insert_blank_line") |
54 | ||
55 | 2x |
new_count <- if (default_table_name %in% names(table_count)) { |
56 | 1x |
table_count[[default_table_name]] + 1 |
57 |
} else { |
|
58 | 1x |
1 |
59 |
} |
|
60 | ||
61 | 2x |
table_count[[default_table_name]] <- new_count |
62 | 2x |
options(junco.insert_blank_line = table_count) |
63 | ||
64 | 2x |
paste(default_table_name, new_count, sep = "_") |
65 |
} else { |
|
66 | 2x |
checkmate::assert_string(table_names, min.chars = 1L) |
67 | 2x |
table_names |
68 |
} |
|
69 | ||
70 | 4x |
analyze( |
71 | 4x |
lyt, |
72 | 4x |
vars = last_varname_rows, |
73 | 4x |
afun = ac_blank_line, |
74 | 4x |
show_labels = "hidden", |
75 | 4x |
table_names = this_table_name |
76 |
) |
|
77 |
} |
1 |
#' Get Visit Levels in Order Defined by Numeric Version |
|
2 |
#' |
|
3 |
#' @param visit_cat (`character`)\cr the categorical version. |
|
4 |
#' @param visit_n (`numeric`)\cr the numeric version. |
|
5 |
#' |
|
6 |
#' @return The unique visit levels in the order defined by the numeric version. |
|
7 |
#' @export |
|
8 |
#' |
|
9 |
#' @examples |
|
10 |
#' get_visit_levels( |
|
11 |
#' visit_cat = c("Week 1", "Week 11", "Week 2"), |
|
12 |
#' visit_n = c(1, 5, 2) |
|
13 |
#' ) |
|
14 |
get_visit_levels <- function(visit_cat, visit_n) { |
|
15 | 8x |
checkmate::assert_character(visit_cat) |
16 | 7x |
checkmate::assert_numeric(visit_n) |
17 | ||
18 | 6x |
nvar <- "n" |
19 | 6x |
visit_levels_df <- unique(data.frame(cat = visit_cat, n = visit_n)) |
20 | 6x |
visit_levels_df <- visit_levels_df[order(visit_levels_df$n), ] |
21 | 6x |
visit_levels <- visit_levels_df$cat |
22 | ||
23 | 6x |
checkmate::assert_character(visit_levels, unique = TRUE) |
24 | ||
25 | 5x |
visit_levels |
26 |
} |
1 |
#' Analysis of Covariance |
|
2 |
#' |
|
3 |
#' Performs an analysis of covariance between two groups returning the estimated |
|
4 |
#' "treatment effect" (i.e. the contrast between the two treatment groups) and |
|
5 |
#' the least square means estimates in each group. |
|
6 |
#' |
|
7 |
#' @param data (`data.frame`)\cr A `data.frame` containing the data to be used in the model. |
|
8 |
#' @param vars (`list`)\cr A `vars` object as generated by [rbmi::set_vars()]. Only the `group`, |
|
9 |
#' `visit`, `outcome` and `covariates` elements are required. See details. |
|
10 |
#' @param visits (`character vector`)\cr An optional character vector specifying which visits to |
|
11 |
#' fit the ancova model at. If `NULL`, a separate ancova model will be fit to the |
|
12 |
#' outcomes for each visit (as determined by `unique(data[[vars$visit]])`). |
|
13 |
#' See details. |
|
14 |
#' @param weights (`character`)\cr Character, either `"counterfactual"` (default), `"equal"`, |
|
15 |
#' `"proportional_em"` or `"proportional"`. |
|
16 |
#' Specifies the weighting strategy to be used when calculating the lsmeans. |
|
17 |
#' See the weighting section for more details. |
|
18 |
#' |
|
19 |
#' @details |
|
20 |
#' The function works as follows: |
|
21 |
#' |
|
22 |
#' 1. Select the first value from `visits`. |
|
23 |
#' 2. Subset the data to only the observations that occurred on this visit. |
|
24 |
#' 3. Fit a linear model as `vars$outcome ~ vars$group + vars$covariates`. |
|
25 |
#' 4. Extract the "treatment effect" & least square means for each treatment group. |
|
26 |
#' 5. Repeat points 2-3 for all other values in `visits`. |
|
27 |
#' |
|
28 |
#' If no value for `visits` is provided, then it will be set to |
|
29 |
#' `unique(data[[vars$visit]])`. |
|
30 |
#' |
|
31 |
#' In order to meet the formatting standards set by [rbmi_analyse()], the results will be collapsed |
|
32 |
#' into a single list suffixed by the visit name, e.g.: |
|
33 |
#' ``` |
|
34 |
#' list( |
|
35 |
#' var_visit_1 = list(est = ...), |
|
36 |
#' trt_B_visit_1 = list(est = ...), |
|
37 |
#' lsm_A_visit_1 = list(est = ...), |
|
38 |
#' lsm_B_visit_1 = list(est = ...), |
|
39 |
#' var_visit_2 = list(est = ...), |
|
40 |
#' trt_B_visit_2 = list(est = ...), |
|
41 |
#' lsm_A_visit_2 = list(est = ...), |
|
42 |
#' lsm_B_visit_2 = list(est = ...), |
|
43 |
#' ... |
|
44 |
#' ) |
|
45 |
#' ``` |
|
46 |
#' Please note that "trt" refers to the treatment effects, and "lsm" refers to the least |
|
47 |
#' square mean results. In the above example `vars$group` has two factor levels A and B. |
|
48 |
#' The new "var" refers to the model estimated variance of the residuals. |
|
49 |
#' |
|
50 |
#' If you want to include interaction terms in your model this can be done |
|
51 |
#' by providing them to the `covariates` argument of [rbmi::set_vars()] |
|
52 |
#' e.g. `set_vars(covariates = c("sex*age"))`. |
|
53 |
#' |
|
54 |
#' @return a list of variance (`var_*`), treatment effect (`trt_*`), and |
|
55 |
#' least square mean (`lsm_*`) estimates for each visit, organized as |
|
56 |
#' described in Details above. |
|
57 |
#' |
|
58 |
#' @note These functions have the `rbmi_` prefix to distinguish them from the corresponding |
|
59 |
#' `rbmi` package functions, from which they were copied from. Additional features here |
|
60 |
#' include: |
|
61 |
#' |
|
62 |
#' * Support for more than two treatment groups. |
|
63 |
#' * Variance estimates are returned. |
|
64 |
#' |
|
65 |
#' @seealso [rbmi_analyse()] |
|
66 |
#' @seealso [stats::lm()] |
|
67 |
#' @seealso [rbmi::set_vars()] |
|
68 |
#' @export |
|
69 |
rbmi_ancova <- function( |
|
70 |
data, |
|
71 |
vars, |
|
72 |
visits = NULL, |
|
73 |
weights = c("counterfactual", "equal", "proportional_em", "proportional")) { |
|
74 | 4x |
outcome <- vars[["outcome"]] |
75 | 4x |
group <- vars[["group"]] |
76 | 4x |
covariates <- vars[["covariates"]] |
77 | 4x |
visit <- vars[["visit"]] |
78 | 4x |
weights <- match.arg(weights) |
79 | ||
80 | 4x |
expected_vars <- c( |
81 | 4x |
utils::getFromNamespace("extract_covariates", "rbmi")(covariates), |
82 | 4x |
outcome, |
83 | 4x |
group |
84 |
) |
|
85 | ||
86 | 4x |
if (is.null(visits)) { |
87 | 1x |
visits <- unique(data[[visit]]) |
88 |
} |
|
89 | ||
90 | 4x |
res <- lapply( |
91 | 4x |
visits, |
92 | 4x |
function(x) { |
93 | 6x |
data2 <- data[data[[visit]] == x, ] |
94 | 6x |
res <- rbmi_ancova_single(data2, outcome, group, covariates, weights) |
95 | 6x |
names(res) <- paste0(names(res), "_", x) |
96 | 6x |
return(res) |
97 |
} |
|
98 |
) |
|
99 | 4x |
return(unlist(res, recursive = FALSE)) |
100 |
} |
|
101 | ||
102 | ||
103 |
#' Implements an Analysis of Covariance (ANCOVA) |
|
104 |
#' |
|
105 |
#' @description |
|
106 |
#' Performance analysis of covariance. See [rbmi_ancova()] for full details. |
|
107 |
#' |
|
108 |
#' @param outcome (`string`)\cr name of the outcome variable in `data`. |
|
109 |
#' @param group (`string`)\cr name of the group variable in `data`. |
|
110 |
#' @param covariates (`character vector`)\cr character vector containing the |
|
111 |
#' name of any additional covariates |
|
112 |
#' to be included in the model as well as any interaction terms. |
|
113 |
#' |
|
114 |
#' @inheritParams rbmi_ancova |
|
115 |
#' |
|
116 |
#' @details |
|
117 |
#' - `group` must be a factor variable with only 2 levels. |
|
118 |
#' - `outcome` must be a continuous numeric variable. |
|
119 |
#' @export |
|
120 |
#' @return a list containing `var` with variance estimates as well as |
|
121 |
#' `trt_*` and `lsm_*` entries. See [rbmi_ancova()] for full details. |
|
122 |
#' @examples |
|
123 |
#' |
|
124 |
#' iris2 <- iris[iris$Species %in% c("versicolor", "virginica"), ] |
|
125 |
#' iris2$Species <- factor(iris2$Species) |
|
126 |
#' rbmi_ancova_single(iris2, "Sepal.Length", "Species", c("Petal.Length * Petal.Width")) |
|
127 |
#' |
|
128 |
#' @seealso [rbmi_ancova()] |
|
129 |
rbmi_ancova_single <- function( |
|
130 |
data, |
|
131 |
outcome, |
|
132 |
group, |
|
133 |
covariates, |
|
134 |
weights = c("counterfactual", "equal", "proportional_em", "proportional")) { |
|
135 | 7x |
checkmate::assert_string(outcome) |
136 | 7x |
checkmate::assert_string(group) |
137 | 7x |
weights <- match.arg(weights) |
138 | ||
139 | 7x |
checkmate::assert_factor(data[[group]], n.levels = 2L) |
140 | 7x |
checkmate::assert_numeric(data[[outcome]]) |
141 | 7x |
data2 <- data[, c( |
142 | 7x |
utils::getFromNamespace("extract_covariates", "rbmi")(covariates), |
143 | 7x |
outcome, |
144 | 7x |
group |
145 |
)] |
|
146 | ||
147 | 7x |
frm <- utils::getFromNamespace("as_simple_formula", "rbmi")( |
148 | 7x |
outcome, |
149 | 7x |
c(group, covariates) |
150 |
) |
|
151 | ||
152 | 7x |
mod <- stats::lm(formula = frm, data = data2) |
153 | ||
154 | 7x |
args <- list(model = mod, .weights = weights) |
155 | ||
156 | 7x |
grp_levels <- levels(data2[[group]]) |
157 | ||
158 | 7x |
all_lsm <- lapply( |
159 | 7x |
grp_levels, |
160 | 7x |
\(x) { |
161 | 14x |
do.call( |
162 | 14x |
utils::getFromNamespace("lsmeans", "rbmi"), |
163 | 14x |
c(args, stats::setNames(x, group)) |
164 |
) |
|
165 |
} |
|
166 |
) |
|
167 | 7x |
names(all_lsm) <- paste0("lsm_", grp_levels) |
168 | ||
169 | 7x |
var_est <- summary(mod)$sigma^2 |
170 | 7x |
var_df <- stats::df.residual(mod) |
171 | ||
172 | 7x |
all_trt <- lapply( |
173 | 7x |
paste0(group, grp_levels[-1]), |
174 | 7x |
\(x) { |
175 | 7x |
list( |
176 | 7x |
est = stats::coef(mod)[x], |
177 | 7x |
se = sqrt(stats::vcov(mod)[x, x]), |
178 | 7x |
df = var_df |
179 |
) |
|
180 |
} |
|
181 |
) |
|
182 | 7x |
names(all_trt) <- paste0("trt_", grp_levels[-1]) |
183 | ||
184 | 7x |
var_res <- list( |
185 | 7x |
var = list( |
186 | 7x |
est = var_est, |
187 |
# Reference: Davison, Statistical Methods, p. 371 (just above 8.3.2) |
|
188 | 7x |
se = var_est / sqrt(var_df / 2), |
189 | 7x |
df = var_df |
190 |
) |
|
191 |
) |
|
192 | ||
193 | 7x |
c(var_res, all_trt, all_lsm) |
194 |
} |
1 |
#' Unicode Mapping Table |
|
2 |
#' |
|
3 |
#' A tibble that maps special characters to their Unicode equivalents. |
|
4 |
#' |
|
5 |
#' @format A tibble with columns 'pattern' and 'unicode', where 'pattern' contains |
|
6 |
#' the string to be replaced and 'unicode' contains the Unicode code point in hexadecimal. |
|
7 |
#' |
|
8 |
#' @export |
|
9 |
jj_uc_map <- tibble::tribble(~pattern, ~unicode, ">=", "2265", "<=", "2264") |
|
10 | ||
11 |
unicodify <- function(strs, uc_map) { |
|
12 | 4x |
out <- strs |
13 | 4x |
for (i in seq_len(nrow(uc_map))) { |
14 | 8x |
out <- gsub(uc_map$pattern[i], intToUtf8(strtoi(uc_map$unicode[i], base = 16)), out) |
15 |
} |
|
16 | 4x |
out |
17 |
} |
1 |
#' Analysis Function for Response Variables |
|
2 |
#' |
|
3 |
#' This function calculates counts and percentages for response variables (Y/N values), |
|
4 |
#' with optional risk difference calculations. |
|
5 |
#' |
|
6 |
#' @param df (`data.frame`)\cr data set containing all analysis variables. |
|
7 |
#' @param .var (`string`)\cr variable name that is passed by `rtables`. |
|
8 |
#' @param .df_row (`data.frame`)\cr data frame across all of the columns for the given row split. |
|
9 |
#' @param .N_col (`integer`)\cr column-wise N (column count) for the full column being analyzed. |
|
10 |
#' @param .spl_context (`data.frame`)\cr gives information about ancestor split states. |
|
11 |
#' @param resp_var (`string`)\cr response variable name containing Y/N values. |
|
12 |
#' @param id (`string`)\cr subject variable name. |
|
13 |
#' @param drop_levels (`logical`)\cr if TRUE, non-observed levels will not be included. |
|
14 |
#' @param riskdiff (`logical`)\cr if TRUE, risk difference calculations will be performed. |
|
15 |
#' @param ref_path (`string`)\cr column path specifications for the control group. |
|
16 |
#' @param variables (`list`)\cr variables to include in the analysis. |
|
17 |
#' @param conf_level (`proportion`)\cr confidence level of the interval. |
|
18 |
#' @param method (`character`)\cr method for calculating confidence intervals. |
|
19 |
#' @param weights_method (`character`)\cr method for calculating weights. |
|
20 |
#' @param ... Additional arguments passed to other functions. |
|
21 |
#' |
|
22 |
#' @return Formatted analysis function which is used as `afun` in `analyze_vars()` |
|
23 |
#' and as `cfun` in `summarize_row_groups()`. |
|
24 |
#' |
|
25 |
#' @export |
|
26 |
#' |
|
27 |
#' @examples |
|
28 |
#' library(dplyr) |
|
29 |
#' ADSL <- ex_adsl |> select(USUBJID, ARM, SEX) |
|
30 |
#' |
|
31 |
#' ADAE <- ex_adae |> select(USUBJID, ARM, SEX, AEBODSYS, AEDECOD) |
|
32 |
#' |
|
33 |
#' ADAE <- ADAE |> |
|
34 |
#' mutate(TRTEMFL = "Y") |
|
35 |
#' |
|
36 |
#' lyt <- basic_table(show_colcounts = TRUE) |> |
|
37 |
#' split_cols_by("ARM") |> |
|
38 |
#' analyze("SEX", |
|
39 |
#' show_labels = "visible", |
|
40 |
#' afun = a_freq_resp_var_j, |
|
41 |
#' extra_args = list(resp_var = "TRTEMFL", riskdiff = FALSE) |
|
42 |
#' ) |
|
43 |
#' |
|
44 |
#' result <- build_table(lyt, df = ADAE, alt_counts_df = ADSL) |
|
45 |
#' |
|
46 |
#' result |
|
47 |
a_freq_resp_var_j <- function( |
|
48 |
df, |
|
49 |
.var, |
|
50 |
.df_row, |
|
51 |
.N_col, |
|
52 |
.spl_context, |
|
53 |
resp_var = NULL, |
|
54 |
id = "USUBJID", |
|
55 |
drop_levels = FALSE, |
|
56 |
riskdiff = TRUE, |
|
57 |
ref_path = NULL, |
|
58 |
variables = formals(s_proportion_diff)$variables, |
|
59 |
conf_level = formals(s_proportion_diff)$conf_level, |
|
60 |
method = c( |
|
61 |
"wald", |
|
62 |
"waldcc", |
|
63 |
"cmh", |
|
64 |
"ha", |
|
65 |
"newcombe", |
|
66 |
"newcombecc", |
|
67 |
"strat_newcombe", |
|
68 |
"strat_newcombecc" |
|
69 |
), |
|
70 |
weights_method = formals(s_proportion_diff)$weights_method, |
|
71 |
...) { |
|
72 |
# ---- Derive statistics: xx / xx (xx.x%) |
|
73 | ||
74 | 14x |
if (is.null(resp_var)) { |
75 | 1x |
stop( |
76 | 1x |
"afun a_freq_resp_var_j: resp_var cannot be NULL." |
77 |
) |
|
78 |
} |
|
79 | ||
80 | 13x |
resp_var_values <- unique(df[[resp_var]][!is.na(df[[resp_var]])]) |
81 |
if ( |
|
82 | 13x |
is.character(df[[resp_var]]) && |
83 | 13x |
any(is.na(df[[resp_var]])) && |
84 | 13x |
all(resp_var_values == "Y") |
85 |
) { |
|
86 | ! |
stop( |
87 | ! |
paste0( |
88 | ! |
"afun a_freq_resp_var_j: it is not clear if missing resp_var should be considered non-response. ", |
89 | ! |
"Please make it a factor with appropriate Y(/N) levels." |
90 |
) |
|
91 |
) |
|
92 |
} |
|
93 | ||
94 | 13x |
if (length(setdiff(resp_var_values, c("Y", "N"))) > 0) { |
95 | 1x |
stop("afun a_freq_resp_var_j: resp_var must contain only Y/N values.") |
96 |
} |
|
97 | ||
98 | 12x |
if (is.character(df[[resp_var]])) { |
99 | 9x |
df[[resp_var]] <- factor(df[[resp_var]], levels = c("Y", "N")) |
100 |
} |
|
101 | ||
102 | 12x |
df <- df[!is.na(df[[.var]]), ] |
103 | ||
104 |
# nolint start |
|
105 | 12x |
if ((is.factor(df[[resp_var]]) && |
106 | 12x |
(identical(levels(df[[resp_var]]), c("Y", "N")) || identical(levels(df[[resp_var]]), c("N", "Y")))) || |
107 | 12x |
is.character(df[[resp_var]]) |
108 |
) { # nolint end |
|
109 |
# missing values in resp_var should be excluded, not considered as not met response |
|
110 |
# subject will then not contribute to denominator |
|
111 | 12x |
df <- df[!is.na(df[[resp_var]]), ] |
112 | 12x |
.df_row <- .df_row[!is.na(.df_row[[resp_var]]), ] |
113 |
} |
|
114 | ||
115 | 12x |
if (drop_levels) { |
116 | 3x |
obs_levs <- unique(.df_row[[.var]]) |
117 | 3x |
obs_levs <- intersect(levels(.df_row[[.var]]), obs_levs) |
118 | ||
119 | 3x |
val_var <- obs_levs |
120 |
# restrict the levels to the ones specified in val_var |
|
121 | 3x |
df <- df[df[[.var]] %in% val_var, ] |
122 | 3x |
.df_row <- .df_row[.df_row[[.var]] %in% val_var, ] |
123 | ||
124 | 3x |
df <- h_update_factor(df, .var, val_var) |
125 |
} |
|
126 | ||
127 | 12x |
varvec <- df[[.var]] |
128 | ||
129 | 12x |
levs <- if (is.factor(varvec)) levels(varvec) else sort(unique(varvec)) |
130 | ||
131 | 12x |
colid <- .spl_context$cur_col_id[[1]] |
132 | 12x |
inriskdiffcol <- grepl("difference", tolower(colid), fixed = TRUE) |
133 | ||
134 | 12x |
if (riskdiff) { |
135 | ! |
trt_var_refpath <- h_get_trtvar_refpath( |
136 | ! |
ref_path, |
137 | ! |
.spl_context, |
138 | ! |
df |
139 |
) |
|
140 |
# trt_var_refpath is list with elements |
|
141 |
# trt_var trt_var_refspec cur_trt_grp ctrl_grp |
|
142 |
# make these elements available in current environment |
|
143 | ! |
trt_var <- trt_var_refpath$trt_var |
144 | ! |
trt_var_refspec <- trt_var_refpath$trt_var_refspec |
145 | ! |
cur_trt_grp <- trt_var_refpath$cur_trt_grp |
146 | ! |
ctrl_grp <- trt_var_refpath$ctrl_grp |
147 |
} |
|
148 | ||
149 | 12x |
fn <- function(levii) { |
150 | 45x |
dfii <- df[df[[.var]] == levii, ] |
151 | 45x |
dfrowii <- .df_row[.df_row[[.var]] == levii, ] |
152 | ||
153 | 45x |
if (!inriskdiffcol) { |
154 |
# use the core s_freq_j function on the current level of the incoming variable (.var) |
|
155 |
# note that the response variable will become .var in the below call |
|
156 |
# val is restricted to Y to show number of response on the current level of .var |
|
157 | 45x |
rslt <- s_freq_j( |
158 | 45x |
dfii, |
159 | 45x |
.df_row = dfrowii, |
160 | 45x |
.var = resp_var, |
161 | 45x |
alt_df = NULL, |
162 | 45x |
parent_df = NULL, |
163 | 45x |
val = "Y", |
164 | 45x |
denom = "n_df" |
165 |
) |
|
166 | ||
167 | 45x |
.stat <- "count_unique_denom_fraction" |
168 | 45x |
x_stat <- rslt[[.stat]]$Y |
169 | 45x |
rslt <- rcell(x_stat, format = jjcsformat_count_denom_fraction) |
170 |
} else { |
|
171 |
# use the risk differenc function s_rel_risk_val_j on the current level of the incoming variable (.var) |
|
172 |
# note that the response variable will become .var in the below call |
|
173 |
# val is restricted to Y to show number of response on the current level of .var |
|
174 | ! |
denom_df <- dfrowii |
175 | ||
176 | ! |
rslt <- s_rel_risk_val_j( |
177 | ! |
df = dfii, |
178 | ! |
.var = resp_var, |
179 | ! |
.df_row = dfrowii, |
180 | ! |
val = "Y", |
181 | ! |
denom_df = denom_df, |
182 | ! |
id = id, |
183 | ! |
riskdiff = riskdiff, |
184 |
# treatment/ref group related arguments |
|
185 | ! |
trt_var = trt_var, |
186 | ! |
ctrl_grp = ctrl_grp, |
187 | ! |
cur_trt_grp = cur_trt_grp, |
188 |
# relrisk specific arguments |
|
189 | ! |
variables = variables, |
190 | ! |
conf_level = conf_level, |
191 | ! |
method = method, |
192 | ! |
weights_method = weights_method |
193 |
) |
|
194 | ! |
x_stat <- rslt[["rr_ci_3d"]]$Y |
195 | ! |
rslt <- rcell( |
196 | ! |
x_stat, |
197 | ! |
format = jjcsformat_xx("xx.x (xx.x, xx.x)"), |
198 | ! |
format_na_str = rep("NA", 3) |
199 |
) |
|
200 |
} |
|
201 | ||
202 |
# rslt is a single rcell row |
|
203 | 45x |
return(rslt) |
204 |
} |
|
205 | ||
206 |
### apply function fn to all levels of incoming .var |
|
207 |
### cls is a list of single rcell rows - ie one line per level is presented |
|
208 | 12x |
cls <- lapply(levs, fn) |
209 | ||
210 | 12x |
names(cls) <- levs |
211 | 12x |
return(cls) |
212 |
} |
1 |
#' Workaround statistics function to time point survival estimate with CI |
|
2 |
#' |
|
3 |
#' This is a workaround for [tern::s_surv_timepoint()], which adds a statistic |
|
4 |
#' containing the time point specific survival estimate together with the |
|
5 |
#' confidence interval. |
|
6 |
#' |
|
7 |
#' @inheritParams proposal_argument_convention |
|
8 |
#' |
|
9 |
#' @return for `s_event_free`, a list as returned by the [tern::s_surv_timepoint()] |
|
10 |
#' with an additional three-dimensional statistic `event_free_ci` which |
|
11 |
#' combines the `event_free_rate` and `rate_ci` statistics. |
|
12 |
#' |
|
13 |
#' For `a_event_free`, analogous to [tern::a_surv_timepoint] but with the additional |
|
14 |
#' three-dimensional statistic described above available via `.stats`. |
|
15 |
#' |
|
16 |
#' @name event_free |
|
17 |
#' @order 1 |
|
18 |
NULL |
|
19 | ||
20 |
#' @describeIn event_free Statistics function which works like [tern::s_surv_timepoint()], |
|
21 |
#' the difference is that it returns the additional statistic `event_free_ci`. |
|
22 |
#' @param time_point (`numeric`)\cr time point at which to estimate survival. |
|
23 |
#' @param time_unit (`string`)\cr unit of time for the time point. |
|
24 |
#' @param percent (`flag`)\cr whether to return in percent or not. |
|
25 |
#' @export |
|
26 |
#' |
|
27 |
#' @examples |
|
28 |
#' adtte_f <- tern::tern_ex_adtte |> |
|
29 |
#' dplyr::filter(PARAMCD == "OS") |> |
|
30 |
#' dplyr::mutate( |
|
31 |
#' AVAL = tern::day2month(AVAL), |
|
32 |
#' is_event = CNSR == 0 |
|
33 |
#' ) |
|
34 |
#' |
|
35 |
#' s_event_free( |
|
36 |
#' df = adtte_f, |
|
37 |
#' .var = "AVAL", |
|
38 |
#' time_point = 6, |
|
39 |
#' is_event = "is_event", |
|
40 |
#' time_unit = "month" |
|
41 |
#' ) |
|
42 |
#' @order 3 |
|
43 |
s_event_free <- function( |
|
44 |
df, |
|
45 |
.var, |
|
46 |
time_point, |
|
47 |
time_unit, |
|
48 |
is_event, |
|
49 |
percent = FALSE, |
|
50 |
control = control_surv_timepoint()) { |
|
51 | 20x |
checkmate::assert_string(time_unit, min.chars = 1L) |
52 | 20x |
start <- s_surv_timepoint( |
53 | 20x |
df = df, |
54 | 20x |
.var = .var, |
55 | 20x |
time_point = time_point, |
56 | 20x |
is_event = is_event, |
57 | 20x |
control = control |
58 |
) |
|
59 | 20x |
rates <- c(start$event_free_rate, start$rate_ci) |
60 | 10x |
if (!percent) rates <- rates / 100 |
61 | 20x |
unit_label <- if (percent) "(%) " else "" |
62 | 20x |
c( |
63 | 20x |
start, |
64 | 20x |
list( |
65 | 20x |
event_free_ci = with_label( |
66 | 20x |
rates, |
67 | 20x |
paste0( |
68 | 20x |
time_point, |
69 |
"-", |
|
70 | 20x |
time_unit, |
71 | 20x |
" event-free rate ", |
72 | 20x |
unit_label, |
73 |
"(", |
|
74 | 20x |
obj_label(start$rate_ci), |
75 |
")" |
|
76 |
) |
|
77 |
) |
|
78 |
) |
|
79 |
) |
|
80 |
} |
|
81 | ||
82 |
#' @describeIn event_free Formatted analysis function which is used as `afun`. |
|
83 |
#' |
|
84 |
#' @export |
|
85 |
#' @order 2 |
|
86 |
#' |
|
87 |
#' @examples |
|
88 |
#' adtte_f <- tern::tern_ex_adtte |> |
|
89 |
#' dplyr::filter(PARAMCD == "OS") |> |
|
90 |
#' dplyr::mutate( |
|
91 |
#' AVAL = tern::day2month(AVAL), |
|
92 |
#' is_event = CNSR == 0 |
|
93 |
#' ) |
|
94 |
#' |
|
95 |
#' basic_table() |> |
|
96 |
#' split_cols_by(var = "ARMCD") |> |
|
97 |
#' analyze( |
|
98 |
#' vars = "AVAL", |
|
99 |
#' afun = a_event_free, |
|
100 |
#' show_labels = "hidden", |
|
101 |
#' na_str = tern::default_na_str(), |
|
102 |
#' extra_args = list( |
|
103 |
#' time_unit = "week", |
|
104 |
#' time_point = 3, |
|
105 |
#' is_event = "is_event" |
|
106 |
#' ) |
|
107 |
#' ) |> |
|
108 |
#' build_table(df = adtte_f) |
|
109 |
a_event_free <- function(df, .var, ..., .stats = NULL, .formats = NULL, .labels = NULL, .indent_mods = NULL) { |
|
110 |
# Check for additional parameters to the statistics function |
|
111 | 18x |
dots_extra_args <- list(...) |
112 | ||
113 |
# Only support default stats, not custom stats |
|
114 | 18x |
.stats <- .split_std_from_custom_stats(.stats)$default_stats |
115 | ||
116 |
# Apply statistics function |
|
117 | 18x |
x_stats <- .apply_stat_functions( |
118 | 18x |
default_stat_fnc = s_event_free, |
119 | 18x |
custom_stat_fnc_list = NULL, |
120 | 18x |
args_list = c(df = list(df), .var = .var, dots_extra_args) |
121 |
) |
|
122 | ||
123 |
# Format according to specifications |
|
124 | 18x |
format_stats( |
125 | 18x |
x_stats, |
126 | 18x |
method_groups = "event_free", |
127 | 18x |
stats_in = .stats, |
128 | 18x |
formats_in = .formats, |
129 | 18x |
labels_in = .labels, |
130 | 18x |
indents_in = .indent_mods |
131 |
) |
|
132 |
} |
1 |
#' @note: This has been forked from the `rbmi` package, mainly to support in addition |
|
2 |
#' the pooling of variance estimates. |
|
3 |
rbmi_pool <- function( |
|
4 |
results, |
|
5 |
conf.level = 0.95, |
|
6 |
alternative = c("two.sided", "less", "greater"), |
|
7 |
type = c("percentile", "normal")) { |
|
8 | ! |
rbmi::validate(results) |
9 | ||
10 | ! |
alternative <- match.arg(alternative) |
11 | ! |
type <- match.arg(type) |
12 | ||
13 | ! |
pool_type <- class(results$results)[[1]] |
14 | ! |
checkmate::assert_true(identical(pool_type, "rubin")) |
15 | ||
16 | ! |
results_transpose <- (utils::getFromNamespace("transpose_results", "rbmi"))( |
17 | ! |
results$results, |
18 | ! |
(utils::getFromNamespace("get_pool_components", "rbmi"))(pool_type) |
19 |
) |
|
20 | ||
21 | ! |
pars <- lapply( |
22 | ! |
results_transpose, |
23 | ! |
function(x, ...) mod_pool_internal_rubin(x, ...), |
24 | ! |
conf.level = conf.level, |
25 | ! |
alternative = alternative, |
26 | ! |
type = type, |
27 | ! |
D = results$method$D |
28 |
) |
|
29 | ||
30 | ! |
method <- pool_type |
31 | ||
32 | ! |
ret <- list( |
33 | ! |
pars = pars, |
34 | ! |
conf.level = conf.level, |
35 | ! |
alternative = alternative, |
36 | ! |
N = length(results$results), |
37 | ! |
method = method |
38 |
) |
|
39 | ! |
class(ret) <- "pool" |
40 | ! |
return(ret) |
41 |
} |
|
42 | ||
43 |
mod_pool_internal_rubin <- function(results, conf.level, alternative, type, D) { |
|
44 | 3x |
ests <- results$est |
45 | 3x |
ses <- results$se |
46 | 3x |
dfs <- results$df |
47 | 3x |
alpha <- 1 - conf.level |
48 | ||
49 |
# Note: Need to take median here, because in the MMRM case the d.f. will be slightly different for each imputed |
|
50 |
# data set analysis. |
|
51 | 3x |
v_com <- stats::median(dfs) |
52 | ||
53 | 3x |
res_rubin <- (utils::getFromNamespace("rubin_rules", "rbmi"))(ests = ests, ses = ses, v_com = v_com) |
54 | ||
55 | 3x |
ret <- (utils::getFromNamespace("parametric_ci", "rbmi"))( |
56 | 3x |
point = res_rubin$est_point, |
57 | 3x |
se = sqrt(res_rubin$var_t), |
58 | 3x |
alpha = alpha, |
59 | 3x |
alternative = alternative, |
60 | 3x |
qfun = stats::qt, |
61 | 3x |
pfun = stats::pt, |
62 | 3x |
df = res_rubin$df |
63 |
) |
|
64 |
# Here also return the pooled d.f.: |
|
65 | 3x |
ret$df <- res_rubin$df |
66 | ||
67 | 3x |
return(ret) |
68 |
} |
1 |
#' Extract the left-hand side of a formula |
|
2 |
#' |
|
3 |
#' @keywords internal |
|
4 |
leftside <- function(x) { |
|
5 | ! |
checkmate::assert_formula(x) |
6 | ! |
res <- x[[2L]] |
7 | ! |
checkmate::assert_string(res) |
8 | ! |
res |
9 |
} |
|
10 | ||
11 |
#' Custom unlist function |
|
12 |
#' |
|
13 |
#' Unlist a list, but retain `NULL` as `'NULL'` or `NA`. |
|
14 |
#' |
|
15 |
#' @keywords internal |
|
16 |
.unlist_keep_nulls <- function(lst, null_placeholder = "NULL", recursive = FALSE) { |
|
17 | 1741x |
lapply(lst, function(x) if (is.null(x)) null_placeholder else x) |> |
18 | 1741x |
unlist(recursive = recursive) |
19 |
} |
|
20 | ||
21 | ||
22 |
#' Title Case Conversion |
|
23 |
#' |
|
24 |
#' @param x Input string |
|
25 |
#' @return String converted to title case (first letter of each word capitalized) |
|
26 |
#' @export |
|
27 |
#' @keywords internal |
|
28 |
string_to_title <- function(x) { |
|
29 |
# Accept either character or factor |
|
30 | 2x |
if (is.factor(x)) { |
31 | ! |
x <- as.character(x) |
32 |
} |
|
33 | 2x |
checkmate::assert_character(x, null.ok = TRUE) |
34 | 2x |
x_lower <- tolower(x) |
35 | 2x |
gsub("(^|\\s)(\\w)", "\\1\\U\\2", x_lower, perl = TRUE) |
36 |
} |
|
37 | ||
38 | ||
39 |
#' Check If `.alt_df_full` Is `NULL` |
|
40 |
#' |
|
41 |
#' For example, in `a_patyrs_j()`, if `source` is `"alt_df"`, we need to |
|
42 |
#' check if `.alt_df_full` is `NULL`. |
|
43 |
#' |
|
44 |
#' @noRd |
|
45 |
check_alt_df_full <- function(argument, values, .alt_df_full) { |
|
46 | 468x |
if (!argument %in% values || !is.null(.alt_df_full)) { |
47 | 468x |
return(invisible()) |
48 |
} |
|
49 | ||
50 | ! |
name <- deparse(substitute(argument)) |
51 | ||
52 | ! |
stop(sprintf( |
53 | ! |
'`.alt_df_full` cannot be `NULL` when `%s` is `"%s"`', |
54 | ! |
name, argument |
55 |
)) |
|
56 |
} |
1 |
#' @name a_freq_combos_j |
|
2 |
#' |
|
3 |
#' @title Analysis function count and percentage in column design controlled by combosdf |
|
4 |
#' |
|
5 |
#' @inheritParams proposal_argument_convention |
|
6 |
#' @inheritParams a_freq_j |
|
7 |
#' |
|
8 |
#' @param combosdf The df which provides the mapping of column facets to produce cumulative counts for .N_col.\cr |
|
9 |
#' In the cell facet, these cumulative records must then be removed from the numerator, |
|
10 |
#' which can be done via the filter_var parameter |
|
11 |
#' to avoid unwanted counting of events. |
|
12 |
#' @param do_not_filter A vector of facets (i.e., column headers), identifying headers for which |
|
13 |
#' no filtering of records should occur. |
|
14 |
#' That is, the numerator should contain cumulative counts. Generally, this will be used for a |
|
15 |
#' "Total" column, or something similar. |
|
16 |
#' @param filter_var The variable which identifies the records to count in the numerator for any given column. |
|
17 |
#' Generally, this will contain text matching the column header for the column associated with a given record. |
|
18 |
#' @param flag_var Variable which identifies the occurrence (or first occurrence) of an event. |
|
19 |
#' The flag variable is expected to have a value of "Y" identifying that the event should be counted, or NA otherwise. |
|
20 |
#' @param denom (`string`)\cr |
|
21 |
#' One of \cr |
|
22 |
#' \itemize{ |
|
23 |
#' \item \strong{N_col} Column count, \cr |
|
24 |
#' \item \strong{n_df} Number of patients (based upon the main input dataframe `df`),\cr |
|
25 |
#' \item \strong{n_altdf} Number of patients from the secondary dataframe (`.alt_df_full`),\cr |
|
26 |
#' Note that argument `denom_by` will perform a row-split on the `.alt_df_full` dataframe.\cr |
|
27 |
#' It is a requirement that variables specified in `denom_by` are part of the row split specifications. \cr |
|
28 |
#' \item \strong{n_rowdf} Number of patients from the current row-level dataframe |
|
29 |
#' (`.row_df` from the rtables splitting machinery).\cr |
|
30 |
#' \item \strong{n_parentdf} Number of patients from a higher row-level split than the current split.\cr |
|
31 |
#' This higher row-level split is specified in the argument `denom_by`.\cr |
|
32 |
#' } |
|
33 |
#' @param .formats (named 'character' or 'list')\cr |
|
34 |
#' formats for the statistics. |
|
35 |
#' @return list of requested statistics with formatted `rtables::CellValue()`.\cr |
|
36 |
#' |
|
37 |
#' @examples |
|
38 |
#' library(dplyr) |
|
39 |
#' ADSL <- ex_adsl |> select(USUBJID, ARM, EOSSTT, EOSDT, EOSDY, TRTSDTM) |
|
40 |
#' |
|
41 |
#' cutoffd <- as.Date("2023-09-24") |
|
42 |
#' |
|
43 |
#' ADSL <- ADSL |> |
|
44 |
#' mutate( |
|
45 |
#' TRTDURY = case_when( |
|
46 |
#' !is.na(EOSDY) ~ EOSDY, |
|
47 |
#' TRUE ~ as.integer(cutoffd - as.Date(TRTSDTM) + 1) |
|
48 |
#' ) |
|
49 |
#' ) |> |
|
50 |
#' mutate(ACAT1 = case_when( |
|
51 |
#' TRTDURY < 183 ~ "0-6 Months", |
|
52 |
#' TRTDURY < 366 ~ "6-12 Months", |
|
53 |
#' TRUE ~ "+12 Months" |
|
54 |
#' )) |> |
|
55 |
#' mutate(ACAT1 = factor(ACAT1, levels = c("0-6 Months", "6-12 Months", "+12 Months"))) |
|
56 |
#' |
|
57 |
#' |
|
58 |
#' ADAE <- ex_adae |> select(USUBJID, ARM, AEBODSYS, AEDECOD, ASTDY) |
|
59 |
#' |
|
60 |
#' ADAE <- ADAE |> |
|
61 |
#' mutate(TRTEMFL = "Y") |> |
|
62 |
#' mutate(ACAT1 = case_when( |
|
63 |
#' ASTDY < 183 ~ "0-6 Months", |
|
64 |
#' ASTDY < 366 ~ "6-12 Months", |
|
65 |
#' TRUE ~ "+12 Months" |
|
66 |
#' )) |> |
|
67 |
#' mutate(ACAT1 = factor(ACAT1, levels = c("0-6 Months", "6-12 Months", "+12 Months"))) |
|
68 |
#' |
|
69 |
#' combodf <- tribble( |
|
70 |
#' ~valname, ~label, ~levelcombo, ~exargs, |
|
71 |
#' "Tot", "Total", c("0-6 Months", "6-12 Months", "+12 Months"), list(), |
|
72 |
#' "A_0-6 Months", "0-6 Months", c("0-6 Months", "6-12 Months", "+12 Months"), list(), |
|
73 |
#' "B_6-12 Months", "6-12 Months", c( "6-12 Months", "+12 Months"), list(), |
|
74 |
#' "C_+12 Months", "+12 Months", c( "+12 Months"), list() |
|
75 |
#' ) |
|
76 |
#' |
|
77 |
#' |
|
78 |
#' lyt <- basic_table(show_colcounts = TRUE) |> |
|
79 |
#' split_cols_by("ARM") |> |
|
80 |
#' split_cols_by("ACAT1", |
|
81 |
#' split_fun = add_combo_levels(combosdf = combodf, trim = FALSE, keep_levels = combodf$valname) |
|
82 |
#' ) |> |
|
83 |
#' analyze("TRTEMFL", |
|
84 |
#' show_labels = "hidden", |
|
85 |
#' afun = a_freq_combos_j, |
|
86 |
#' extra_args = list( |
|
87 |
#' val = "Y", |
|
88 |
#' label = "Subjects with >= 1 AE", |
|
89 |
#' combosdf = combodf, |
|
90 |
#' filter_var = "ACAT1", |
|
91 |
#' do_not_filter = "Tot" |
|
92 |
#' ) |
|
93 |
#' ) |
|
94 |
#' |
|
95 |
#' |
|
96 |
#' result <- build_table(lyt, df = ADAE, alt_counts_df = ADSL) |
|
97 |
#' |
|
98 |
#' result |
|
99 |
#' @export |
|
100 |
a_freq_combos_j <- function( |
|
101 |
df, |
|
102 |
labelstr = NULL, |
|
103 |
.var = NA, |
|
104 |
val = NULL, |
|
105 |
# arguments specific to a_freq_combos_j |
|
106 |
combosdf = NULL, |
|
107 |
do_not_filter = NULL, |
|
108 |
filter_var = NULL, |
|
109 |
flag_var = NULL, |
|
110 |
# arguments specific to a_freq_combos_j till here |
|
111 |
.df_row, |
|
112 |
.spl_context, |
|
113 |
.N_col, |
|
114 |
id = "USUBJID", |
|
115 |
denom = c("N_col", "n_df", "n_altdf", "n_rowdf", "n_parentdf"), |
|
116 |
label = NULL, |
|
117 |
label_fstr = NULL, |
|
118 |
label_map = NULL, |
|
119 |
.alt_df_full = NULL, |
|
120 |
denom_by = NULL, |
|
121 |
.stats = "count_unique_denom_fraction", |
|
122 |
.formats = NULL, |
|
123 |
.labels_n = NULL, |
|
124 |
.indent_mods = NULL, |
|
125 |
na_str = rep("NA", 3)) { |
|
126 | 12x |
denom <- match.arg(denom) |
127 | ||
128 | 12x |
check_alt_df_full(denom, "n_altdf", .alt_df_full) |
129 | ||
130 | 12x |
if (!is.null(combosdf) && !all(c("valname", "label") %in% names(combosdf))) { |
131 | ! |
stop("a_freq_combos_j: combosdf must have variables valname and label.") |
132 |
} |
|
133 | ||
134 | 12x |
res_dataprep <- h_a_freq_dataprep( |
135 | 12x |
df = df, |
136 | 12x |
labelstr = labelstr, |
137 | 12x |
.var = .var, |
138 | 12x |
val = val, |
139 | 12x |
drop_levels = FALSE, |
140 | 12x |
excl_levels = NULL, |
141 | 12x |
new_levels = NULL, |
142 | 12x |
new_levels_after = FALSE, |
143 | 12x |
.df_row = .df_row, |
144 | 12x |
.spl_context = .spl_context, |
145 | 12x |
.N_col = .N_col, |
146 | 12x |
id = id, |
147 | 12x |
denom = denom, |
148 | 12x |
variables = NULL, |
149 | 12x |
label = label, |
150 | 12x |
label_fstr = label_fstr, |
151 | 12x |
label_map = label_map, |
152 | 12x |
.alt_df_full = .alt_df_full, |
153 | 12x |
denom_by = denom_by, |
154 | 12x |
.stats = .stats |
155 |
) |
|
156 |
# res_dataprep is list with elements |
|
157 |
# df .df_row val |
|
158 |
# drop_levels excl_levels |
|
159 |
# alt_df parentdf new_denomdf |
|
160 |
# .stats |
|
161 |
# make these elements available in current environment |
|
162 | 12x |
df <- res_dataprep$df |
163 | 12x |
.df_row <- res_dataprep$.df_row |
164 | 12x |
val <- res_dataprep$val |
165 | 12x |
drop_levels <- res_dataprep$drop_levels |
166 | 12x |
excl_levels <- res_dataprep$excl_levels |
167 | 12x |
alt_df <- res_dataprep$alt_df |
168 | 12x |
parentdf <- res_dataprep$parentdf |
169 | 12x |
new_denomdf <- res_dataprep$new_denomdf |
170 | 12x |
.stats <- .stats |
171 | ||
172 |
## colid can be used to figure out if we're in the combo column or not |
|
173 | 12x |
colid <- .spl_context$cur_col_id[[1]] |
174 | ||
175 |
### this is the core code for subsetting to appropriate combo level |
|
176 | 12x |
df <- h_subset_combo( |
177 | 12x |
df = df, |
178 | 12x |
combosdf = combosdf, |
179 | 12x |
do_not_filter = do_not_filter, |
180 | 12x |
filter_var = filter_var, |
181 | 12x |
flag_var = flag_var, |
182 | 12x |
colid = colid |
183 |
) |
|
184 | ||
185 |
## the same s-function can be used as in a_freq_j |
|
186 | 12x |
x_stats <- s_freq_j( |
187 | 12x |
df, |
188 | 12x |
.var = .var, |
189 | 12x |
.df_row = .df_row, |
190 | 12x |
val = val, |
191 | 12x |
alt_df = new_denomdf, |
192 | 12x |
parent_df = new_denomdf, |
193 | 12x |
id = id, |
194 | 12x |
denom = denom, |
195 | 12x |
.N_col = .N_col, |
196 | 12x |
countsource = "df" |
197 |
) |
|
198 | ||
199 | 12x |
.stats_adj <- .stats |
200 | ||
201 | 12x |
res_prepinrows <- h_a_freq_prepinrows( |
202 | 12x |
x_stats, |
203 | 12x |
.stats_adj, |
204 | 12x |
.formats, |
205 | 12x |
labelstr, |
206 | 12x |
label_fstr, |
207 | 12x |
label, |
208 | 12x |
.indent_mods, |
209 | 12x |
.labels_n, |
210 | 12x |
na_str |
211 |
) |
|
212 |
# res_prepinrows is list with elements |
|
213 |
# x_stats .formats .labels .indent_mods .format_na_strs |
|
214 |
# make these elements available in current environment |
|
215 | 12x |
x_stats <- res_prepinrows$x_stats |
216 | 12x |
.formats <- res_prepinrows$.formats |
217 | 12x |
.labels <- res_prepinrows$.labels |
218 | 12x |
.indent_mods <- res_prepinrows$.indent_mods |
219 | 12x |
.format_na_strs <- res_prepinrows$.format_na_strs |
220 | ||
221 |
### final step: turn requested stats into rtables rows |
|
222 | 12x |
in_rows( |
223 | 12x |
.list = x_stats, |
224 | 12x |
.formats = .formats, |
225 | 12x |
.labels = .labels, |
226 | 12x |
.indent_mods = .indent_mods, |
227 | 12x |
.format_na_strs = .format_na_strs |
228 |
) |
|
229 |
} |
1 |
#' Obtain Reference Information for a Global Reference Group |
|
2 |
#' |
|
3 |
#' This helper function can be used in custom analysis functions, by passing |
|
4 |
#' an extra argument `ref_path` which defines a global reference group by |
|
5 |
#' the corresponding column split hierarchy levels. |
|
6 |
#' |
|
7 |
#' @param ref_path (`character`)\cr reference group specification as an `rtables` |
|
8 |
#' `colpath`, see details. |
|
9 |
#' @param .spl_context see [rtables::spl_context]. |
|
10 |
#' @param .var the variable being analyzed, see [rtables::additional_fun_params]. |
|
11 |
#' |
|
12 |
#' @return A list with `ref_group` and `in_ref_col`, which can be used as |
|
13 |
#' `.ref_group` and `.in_ref_col` as if being directly passed to an analysis |
|
14 |
#' function by `rtables`, see [rtables::additional_fun_params]. |
|
15 |
#' |
|
16 |
#' @details |
|
17 |
#' The reference group is specified in `colpath` hierarchical fashion in `ref_path`: |
|
18 |
#' The first column split variable is the first element, and the level to use is the |
|
19 |
#' second element. It continues until the last column split variable with last |
|
20 |
#' level to use. |
|
21 |
#' Note that depending on `.var`, either a `data.frame` (if `.var` is `NULL`) or |
|
22 |
#' a vector (otherwise) is returned. This allows usage for analysis functions with |
|
23 |
#' `df` and `x` arguments, respectively. |
|
24 |
#' |
|
25 |
#' @export |
|
26 |
#' |
|
27 |
#' @examples |
|
28 |
#' dm <- DM |
|
29 |
#' dm$colspan_trt <- factor( |
|
30 |
#' ifelse(dm$ARM == "B: Placebo", " ", "Active Study Agent"), |
|
31 |
#' levels = c("Active Study Agent", " ") |
|
32 |
#' ) |
|
33 |
#' colspan_trt_map <- create_colspan_map( |
|
34 |
#' dm, |
|
35 |
#' non_active_grp = "B: Placebo", |
|
36 |
#' non_active_grp_span_lbl = " ", |
|
37 |
#' active_grp_span_lbl = "Active Study Agent", |
|
38 |
#' colspan_var = "colspan_trt", |
|
39 |
#' trt_var = "ARM" |
|
40 |
#' ) |
|
41 |
#' |
|
42 |
#' standard_afun <- function(x, .ref_group, .in_ref_col) { |
|
43 |
#' in_rows( |
|
44 |
#' "Difference of Averages" = non_ref_rcell( |
|
45 |
#' mean(x) - mean(.ref_group), |
|
46 |
#' is_ref = .in_ref_col, |
|
47 |
#' format = "xx.xx" |
|
48 |
#' ) |
|
49 |
#' ) |
|
50 |
#' } |
|
51 |
#' |
|
52 |
#' result_afun <- function(x, ref_path, .spl_context, .var) { |
|
53 |
#' ref <- get_ref_info(ref_path, .spl_context, .var) |
|
54 |
#' standard_afun(x, .ref_group = ref$ref_group, .in_ref_col = ref$in_ref_col) |
|
55 |
#' } |
|
56 |
#' |
|
57 |
#' ref_path <- c("colspan_trt", " ", "ARM", "B: Placebo") |
|
58 |
#' |
|
59 |
#' lyt <- basic_table() |> |
|
60 |
#' split_cols_by( |
|
61 |
#' "colspan_trt", |
|
62 |
#' split_fun = trim_levels_to_map(map = colspan_trt_map) |
|
63 |
#' ) |> |
|
64 |
#' split_cols_by("ARM") |> |
|
65 |
#' analyze( |
|
66 |
#' "AGE", |
|
67 |
#' extra_args = list(ref_path = ref_path), |
|
68 |
#' afun = result_afun |
|
69 |
#' ) |
|
70 |
#' |
|
71 |
#' build_table(lyt, dm) |
|
72 |
get_ref_info <- function(ref_path, .spl_context, .var = NULL) { |
|
73 | 65x |
checkmate::check_character(ref_path, min.len = 2L, names = "unnamed") |
74 | 65x |
checkmate::assert_true(length(ref_path) %% 2 == 0) # Even number of elements in ref_path. |
75 | 65x |
leaf_spl_context <- .spl_context[nrow(.spl_context), ] |
76 | 65x |
full_df <- leaf_spl_context$full_parent_df[[1]] |
77 | 65x |
level_indices <- seq(from = 2L, to = length(ref_path), by = 2L) |
78 | 65x |
ref_group_string <- paste(ref_path[level_indices], collapse = ".") |
79 | 65x |
row_in_ref_group <- leaf_spl_context[[ref_group_string]][[1]] |
80 | 65x |
ref_group <- full_df[row_in_ref_group, ] |
81 | 65x |
if (!is.null(.var)) { |
82 | 6x |
ref_group <- ref_group[[.var]] |
83 |
} |
|
84 | 65x |
colvars_indices <- seq(from = 1L, to = length(ref_path) - 1L, by = 2L) |
85 | 65x |
checkmate::assert_true(identical(leaf_spl_context$cur_col_split[[1]], ref_path[colvars_indices])) |
86 | 65x |
in_ref_col <- identical(leaf_spl_context$cur_col_split_val[[1]], ref_path[level_indices]) |
87 | 65x |
list(ref_group = ref_group, in_ref_col = in_ref_col) |
88 |
} |
1 |
#' @name create_colspan_var |
|
2 |
#' @title Creation of Column Spanning Variables |
|
3 |
#' @description |
|
4 |
#' A function used for creating a spanning variable for treatment groups |
|
5 |
#' @details |
|
6 |
#' This function creates a spanning variable for treatment groups that is intended to |
|
7 |
#' be used within the column space. |
|
8 |
#' @param df The name of the data frame in which the spanning variable is to be appended to |
|
9 |
#' @param non_active_grp The value(s) of the treatments that represent the non-active or comparator |
|
10 |
#' treatment groups |
|
11 |
#' default value = c('Placebo') |
|
12 |
#' @param non_active_grp_span_lbl The assigned value of the spanning variable for the non-active or comparator |
|
13 |
#' treatment groups |
|
14 |
#' default value = '' |
|
15 |
#' @param active_grp_span_lbl The assigned value of the spanning variable for the active treatment group(s) |
|
16 |
#' default value = 'Active Study Agent' |
|
17 |
#' @param colspan_var The desired name of the newly created spanning variable |
|
18 |
#' default value = 'colspan_trt' |
|
19 |
#' @param trt_var The name of the treatment variable that is used to determine which |
|
20 |
#' spanning treatment group value to apply. |
|
21 |
#' default value = 'TRT01A' |
|
22 |
#' @returns a data frame that contains the new variable as specified in colspan_var |
|
23 |
#' @rdname colspan_var |
|
24 |
#' @export |
|
25 |
#' @aliases create_colspan_var |
|
26 |
#' @examples |
|
27 |
#' |
|
28 |
#' library(tibble) |
|
29 |
#' |
|
30 |
#' df <- tribble( |
|
31 |
#' ~TRT01A, |
|
32 |
#' "Placebo", |
|
33 |
#' "Active 1", |
|
34 |
#' "Active 2" |
|
35 |
#' ) |
|
36 |
#' |
|
37 |
#' df$TRT01A <- factor(df$TRT01A, levels = c("Placebo", "Active 1", "Active 2")) |
|
38 |
#' |
|
39 |
#' colspan_var <- create_colspan_var( |
|
40 |
#' df = df, |
|
41 |
#' non_active_grp = c("Placebo"), |
|
42 |
#' non_active_grp_span_lbl = " ", |
|
43 |
#' active_grp_span_lbl = "Active Treatment", |
|
44 |
#' colspan_var = "colspan_trt", |
|
45 |
#' trt_var = "TRT01A" |
|
46 |
#' ) |
|
47 |
#' |
|
48 |
#' colspan_var |
|
49 |
create_colspan_var <- function( |
|
50 |
df, |
|
51 |
non_active_grp = c("Placebo"), |
|
52 |
non_active_grp_span_lbl = " ", |
|
53 |
active_grp_span_lbl = "Active Study Agent", |
|
54 |
colspan_var = "colspan_trt", |
|
55 |
trt_var = "TRT01A") { |
|
56 |
# Create a new column with the specified name using base R |
|
57 | 1x |
df[[colspan_var]] <- factor( |
58 | 1x |
ifelse(df[[trt_var]] %in% non_active_grp, non_active_grp_span_lbl, active_grp_span_lbl), |
59 | 1x |
levels = c(active_grp_span_lbl, non_active_grp_span_lbl) |
60 |
) |
|
61 | ||
62 | 1x |
return(df) |
63 |
} |
|
64 | ||
65 |
#' @name create_colspan_map |
|
66 |
#' @title Creation of Column Spanning Mapping Dataframe |
|
67 |
#' @description |
|
68 |
#' A function used for creating a data frame containing the map that is compatible with rtables split function |
|
69 |
#' `trim_levels_to_map` |
|
70 |
#' @details |
|
71 |
#' This function creates a data frame containing the map that is compatible with rtables split function |
|
72 |
#' `trim_levels_to_map`. |
|
73 |
#' The levels of the specified trt_var variable will be stored within the trt_var variable |
|
74 |
#' and the colspan_var variable will contain the corresponding spanning header value for each treatment group. |
|
75 |
#' @inheritParams create_colspan_var |
|
76 |
#' @param active_first whether the active columns come first. |
|
77 |
#' @returns a data frame that contains the map to be used with rtables split function `trim_levels_to_map` |
|
78 |
#' @rdname colspan_map |
|
79 |
#' @export |
|
80 |
#' @examples |
|
81 |
#' library(tibble) |
|
82 |
#' |
|
83 |
#' df <- tribble( |
|
84 |
#' ~TRT01A, |
|
85 |
#' "Placebo", |
|
86 |
#' "Active 1", |
|
87 |
#' "Active 2" |
|
88 |
#' ) |
|
89 |
#' |
|
90 |
#' df$TRT01A <- factor(df$TRT01A, levels = c("Placebo", "Active 1", "Active 2")) |
|
91 |
#' |
|
92 |
#' colspan_map <- create_colspan_map( |
|
93 |
#' df = df, |
|
94 |
#' non_active_grp = c("Placebo"), |
|
95 |
#' non_active_grp_span_lbl = " ", |
|
96 |
#' active_grp_span_lbl = "Active Study Agent", |
|
97 |
#' colspan_var = "colspan_trt", |
|
98 |
#' trt_var = "TRT01A" |
|
99 |
#' ) |
|
100 |
#' |
|
101 |
#' colspan_map |
|
102 |
create_colspan_map <- function( |
|
103 |
df, |
|
104 |
non_active_grp = c("Placebo"), |
|
105 |
non_active_grp_span_lbl = " ", |
|
106 |
active_grp_span_lbl = "Active Study Agent", |
|
107 |
colspan_var = "colspan_trt", |
|
108 |
trt_var = "TRT01A", |
|
109 |
active_first = TRUE) { |
|
110 | 5x |
act_trtlv <- setdiff(levels(df[[trt_var]]), non_active_grp) |
111 | 5x |
act_map_df <- data.frame(a = active_grp_span_lbl, b = act_trtlv) |
112 | 5x |
nact_map_df <- data.frame(a = non_active_grp_span_lbl, b = non_active_grp) |
113 | 5x |
df <- if (active_first) { |
114 | 4x |
rbind(act_map_df, nact_map_df) |
115 |
} else { |
|
116 | 1x |
rbind(nact_map_df, act_map_df) |
117 |
} |
|
118 | 5x |
names(df) <- c(colspan_var, trt_var) |
119 | ||
120 | 5x |
return(df) |
121 |
} |
1 |
#' Simple Content Row Function to Count Rows |
|
2 |
#' |
|
3 |
#' @return a `VertalRowsSection` object (as returned by [rtables::in_rows()] |
|
4 |
#' containing counts from the data. |
|
5 |
#' @keywords internal |
|
6 |
c_row_counts <- function(df, labelstr, label_fstr) { |
|
7 | 18x |
in_rows(count = nrow(df), .formats = "xx", .labels = sprintf(label_fstr, labelstr)) |
8 |
} |
|
9 | ||
10 |
#' Simple Content Row Function to Count Rows from Alternative Data |
|
11 |
#' @return a `VertalRowsSection` object (as returned by [rtables::in_rows()] |
|
12 |
#' containing counts from the alt data. |
|
13 |
#' @keywords internal |
|
14 |
c_row_counts_alt <- function(df, labelstr, label_fstr, .alt_df) { |
|
15 | 9x |
c_row_counts(df = .alt_df, labelstr = labelstr, label_fstr = label_fstr) |
16 |
} |
|
17 | ||
18 |
#' Layout Creating Function Adding Row Counts |
|
19 |
#' |
|
20 |
#' This is a simple wrapper of [rtables::summarize_row_groups()] and the main |
|
21 |
#' additional value is that we can choose whether we want to use the alternative |
|
22 |
#' (usually ADSL) data set for the counts (default) or use the original data set. |
|
23 |
#' |
|
24 |
#' @inheritParams proposal_argument_convention |
|
25 |
#' @param label_fstr (`string`)\cr a `sprintf` style format string. |
|
26 |
#' It can contain up to one `%s` which takes the current split value and |
|
27 |
#' generates the row label. |
|
28 |
#' @param alt_counts (`flag`)\cr whether row counts should be taken from |
|
29 |
#' `alt_counts_df` (`TRUE`) or from `df` (`FALSE`). |
|
30 |
#' |
|
31 |
#' @return A modified layout where the latest row split now has a row group |
|
32 |
#' summaries (as created by [rtables::summarize_row_groups] for the counts. |
|
33 |
#' for the counts. |
|
34 |
#' @export |
|
35 |
#' @examples |
|
36 |
#' basic_table() |> |
|
37 |
#' split_cols_by("ARM") |> |
|
38 |
#' add_colcounts() |> |
|
39 |
#' split_rows_by("RACE", split_fun = drop_split_levels) |> |
|
40 |
#' summarize_row_counts(label_fstr = "RACE value - %s") |> |
|
41 |
#' analyze("AGE", afun = list_wrap_x(summary), format = "xx.xx") |> |
|
42 |
#' build_table(DM, alt_counts_df = rbind(DM, DM)) |
|
43 |
#' |
|
44 |
summarize_row_counts <- function(lyt, label_fstr = "%s", alt_counts = TRUE) { |
|
45 | 2x |
checkmate::assert_flag(alt_counts) |
46 | ||
47 | 2x |
summarize_row_groups( |
48 | 2x |
lyt, |
49 | 2x |
cfun = if (alt_counts) c_row_counts_alt else c_row_counts, |
50 | 2x |
extra_args = list(label_fstr = label_fstr) |
51 |
) |
|
52 |
} |
1 |
#' Proportion difference estimation |
|
2 |
#' |
|
3 |
#' The analysis function [a_proportion_diff_j()] can be used to create a layout element to estimate |
|
4 |
#' the difference in proportion of responders within a studied population. The primary analysis variable, |
|
5 |
#' `vars`, is a logical variable indicating whether a response has occurred for each record. See the `method` |
|
6 |
#' parameter for options of methods to use when constructing the confidence interval of the proportion difference. |
|
7 |
#' A stratification variable can be supplied via the `strata` element of the `variables` argument. |
|
8 |
#' |
|
9 |
#' @param df (`data.frame`)\cr input data frame. |
|
10 |
#' @param .var (`string`)\cr name of the response variable. |
|
11 |
#' @param ref_path (`character`)\cr path to the reference group. |
|
12 |
#' @param .spl_context (`environment`)\cr split context environment. |
|
13 |
#' @param ... Additional arguments passed to the statistics function. |
|
14 |
#' @param .stats (`character`)\cr statistics to calculate. |
|
15 |
#' @param .formats (`list`)\cr formats for the statistics. |
|
16 |
#' @param .labels (`list`)\cr labels for the statistics. |
|
17 |
#' @param .indent_mods (`list`)\cr indentation modifications for the statistics. |
|
18 |
#' @param .ref_group (`data.frame`)\cr reference group data frame. |
|
19 |
#' @param .in_ref_col (`logical`)\cr whether the current column is the reference column. |
|
20 |
#' @param variables (`list`)\cr list with strata variable names. |
|
21 |
#' @param conf_level (`numeric`)\cr confidence level for the confidence interval. |
|
22 |
#' @param method (`string`)\cr method to use for confidence interval calculation. |
|
23 |
#' @param weights_method (`string`)\cr method to use for weights calculation in stratified analysis. |
|
24 |
#' |
|
25 |
#' @name prop_diff |
|
26 |
#' @order 1 |
|
27 |
#' |
|
28 |
#' @note The [a_proportion_diff_j()] function has the `_j` suffix to distinguish it |
|
29 |
#' from [tern::a_proportion_diff()]. The functions here are a copy from the `tern` package |
|
30 |
#' with additional features: |
|
31 |
#' |
|
32 |
#' * Additional statistic `diff_est_ci` is returned. |
|
33 |
#' * `ref_path` needs to be provided as extra argument to specify the control group column. |
|
34 |
#' |
|
35 |
NULL |
|
36 | ||
37 |
#' @describeIn prop_diff Statistics function estimating the difference |
|
38 |
#' in terms of responder proportion. |
|
39 |
#' |
|
40 |
#' @return |
|
41 |
#' * `s_proportion_diff_j()` returns a named list of elements `diff`, |
|
42 |
#' `diff_ci`, `diff_est_ci` and `diff_ci_3d`. |
|
43 |
#' |
|
44 |
#' @note When performing an unstratified analysis, methods `'cmh'`, `'strat_newcombe'`, |
|
45 |
#' and `'strat_newcombecc'` are not permitted. |
|
46 |
#' |
|
47 |
#' @examples |
|
48 |
#' |
|
49 |
#' s_proportion_diff_j( |
|
50 |
#' df = subset(dta, grp == "A"), |
|
51 |
#' .var = "rsp", |
|
52 |
#' .ref_group = subset(dta, grp == "B"), |
|
53 |
#' .in_ref_col = FALSE, |
|
54 |
#' conf_level = 0.90, |
|
55 |
#' method = "ha" |
|
56 |
#' ) |
|
57 |
#' |
|
58 |
#' s_proportion_diff_j( |
|
59 |
#' df = subset(dta, grp == "A"), |
|
60 |
#' .var = "rsp", |
|
61 |
#' .ref_group = subset(dta, grp == "B"), |
|
62 |
#' .in_ref_col = FALSE, |
|
63 |
#' variables = list(strata = c("f1", "f2")), |
|
64 |
#' conf_level = 0.90, |
|
65 |
#' method = "cmh" |
|
66 |
#' ) |
|
67 |
#' |
|
68 |
#' @export |
|
69 |
#' @order 3 |
|
70 |
s_proportion_diff_j <- function( |
|
71 |
df, |
|
72 |
.var, |
|
73 |
.ref_group, |
|
74 |
.in_ref_col, |
|
75 |
variables = list(strata = NULL), |
|
76 |
conf_level = 0.95, |
|
77 |
method = c("waldcc", "wald", "cmh", "ha", "newcombe", "newcombecc", "strat_newcombe", "strat_newcombecc"), |
|
78 |
weights_method = "cmh") { |
|
79 | 339x |
start <- s_proportion_diff( |
80 | 339x |
df = df, |
81 | 339x |
.var = .var, |
82 | 339x |
.ref_group = .ref_group, |
83 | 339x |
.in_ref_col = .in_ref_col, |
84 | 339x |
variables = variables, |
85 | 339x |
conf_level = conf_level, |
86 | 339x |
method = method, |
87 | 339x |
weights_method = weights_method |
88 |
) |
|
89 | ||
90 | 339x |
c( |
91 | 339x |
start, |
92 | 339x |
list( |
93 | 339x |
diff_est_ci = with_label( |
94 | 339x |
c(start$diff, start$diff_ci), |
95 | 339x |
paste0("% Difference (", f_conf_level(conf_level), ")") |
96 |
), |
|
97 | 339x |
diff_ci_3d = with_label( |
98 | 339x |
c(start$diff, start$diff_ci), |
99 | 339x |
paste0("Relative Risk (", f_conf_level(conf_level), ")") |
100 |
) |
|
101 |
) |
|
102 |
) |
|
103 |
} |
|
104 | ||
105 |
#' @describeIn prop_diff Formatted analysis function which is used as `afun` in `estimate_proportion_diff()`. |
|
106 |
#' |
|
107 |
#' @return |
|
108 |
#' * `a_proportion_diff_j()` returns the corresponding list with formatted [rtables::CellValue()]. |
|
109 |
#' |
|
110 |
#' @examples |
|
111 |
#' nex <- 100 |
|
112 |
#' dta <- data.frame( |
|
113 |
#' "rsp" = sample(c(TRUE, FALSE), nex, TRUE), |
|
114 |
#' "grp" = sample(c("A", "B"), nex, TRUE), |
|
115 |
#' "f1" = sample(c("a1", "a2"), nex, TRUE), |
|
116 |
#' "f2" = sample(c("x", "y", "z"), nex, TRUE), |
|
117 |
#' stringsAsFactors = TRUE |
|
118 |
#' ) |
|
119 |
#' |
|
120 |
#' l <- basic_table() |> |
|
121 |
#' split_cols_by(var = "grp") |> |
|
122 |
#' analyze( |
|
123 |
#' vars = "rsp", |
|
124 |
#' afun = a_proportion_diff_j, |
|
125 |
#' show_labels = "hidden", |
|
126 |
#' na_str = tern::default_na_str(), |
|
127 |
#' extra_args = list( |
|
128 |
#' conf_level = 0.9, |
|
129 |
#' method = "ha", |
|
130 |
#' ref_path = c("grp", "B") |
|
131 |
#' ) |
|
132 |
#' ) |
|
133 |
#' |
|
134 |
#' build_table(l, df = dta) |
|
135 |
#' @export |
|
136 |
#' @order 2 |
|
137 |
a_proportion_diff_j <- function( |
|
138 |
df, |
|
139 |
.var, |
|
140 |
ref_path, |
|
141 |
.spl_context, |
|
142 |
..., |
|
143 |
.stats = NULL, |
|
144 |
.formats = NULL, |
|
145 |
.labels = NULL, |
|
146 |
.indent_mods = NULL) { |
|
147 |
# Check for additional parameters to the statistics function |
|
148 | 2x |
dots_extra_args <- list(...) |
149 | ||
150 |
# Only support default stats, not custom stats |
|
151 | 2x |
.stats <- .split_std_from_custom_stats(.stats)$default_stats |
152 | ||
153 |
# Obtain reference column information |
|
154 | 2x |
ref <- get_ref_info(ref_path, .spl_context) |
155 | ||
156 |
# Apply statistics function |
|
157 | 2x |
x_stats <- .apply_stat_functions( |
158 | 2x |
default_stat_fnc = s_proportion_diff_j, |
159 | 2x |
custom_stat_fnc_list = NULL, |
160 | 2x |
args_list = c( |
161 | 2x |
df = list(df), |
162 | 2x |
.var = .var, |
163 | 2x |
.ref_group = list(ref$ref_group), |
164 | 2x |
.in_ref_col = ref$in_ref_col, |
165 | 2x |
dots_extra_args |
166 |
) |
|
167 |
) |
|
168 | ||
169 |
# Format according to specifications |
|
170 | 2x |
format_stats( |
171 | 2x |
x_stats, |
172 | 2x |
method_groups = "proportion_diff", |
173 | 2x |
stats_in = .stats, |
174 | 2x |
formats_in = .formats, |
175 | 2x |
labels_in = .labels, |
176 | 2x |
indents_in = .indent_mods |
177 |
) |
|
178 |
} |