junco coverage - 85.77%

Files
Source

## contribute to formatters
#' @name inches_to_spaces
#' @title Conversion of inches to spaces
#' @param ins numeric. Vector of widths in inches
#' @param fontspec font_spec. The font specification to use
#' @param raw logical(1). Should the answer be returned unrounded
#'  (`TRUE`), or rounded to the nearest reasonable value (`FALSE`,
#'  the default)
#' @param tol numeric(1). The numeric tolerance,  values
#'  between an integer `n`, and `n+tol` will be returned
#'  as `n`, rather than `n+1`, if `raw == FALSE`. Ignored
#'  when `raw` is `TRUE`.
#' @return the number of either fractional (`raw = TRUE`) or whole (`raw = FALSE`)
#'   spaces that will fit within `ins` inches in the specified font
#' @export
inches_to_spaces <- function(ins, fontspec, raw = FALSE, tol = sqrt(.Machine$double.eps)) {
  newdev <- open_font_dev(fontspec)
  if (newdev) {
    on.exit(close_font_dev())
  }
  spcw <- grid::convertWidth(grid::unit(1, "strwidth", " "), "inches", valueOnly = TRUE)
  ans <- ins / spcw
  if (!raw) {
    win_tol <- ans - floor(ans) < tol
    ans[win_tol] <- floor(ans)
    ans[!win_tol] <- ceiling(ans)
  }
  ans
}

#' @importFrom formatters wrap_string_ttype
ttype_wrap_vec <- function(vec, fontspec, width, wordbreak_ok = TRUE, ...) {
  lapply(
    vec,
    wrap_string_ttype,
    width_spc = width,
    fontspec = fontspec,
    wordbreak_ok = wordbreak_ok,
    ...
  )
}

#' @name check_wrap_nobreak
#' @title Check Word Wrapping
#' @description Check a set of column widths for word-breaking wrap behavior
#' @param tt TableTree
#' @param colwidths numeric. Column widths (in numbers of spaces under `fontspec`)
#' @param fontspec font_spec.
#'
#' @return `TRUE` if the wrap is able to be done without breaking words,
#' `FALSE` if wordbreaking is required to apply `colwidths`
#' @rdname check_wrap_nobreak
#' @export
check_wrap_nobreak <- function(tt, colwidths, fontspec) {
  newdev <- open_font_dev(fontspec)
  if (newdev) {
    on.exit(close_font_dev())
  }
  mpf <- matrix_form(tt, fontspec = fontspec)
  strs <- mf_strings(mpf)
  colok <- vapply(
    seq_len(ncol(strs)),
    function(i) {
      res <- tryCatch(
        ttype_wrap_vec(strs[, i, drop = TRUE], colwidths[i], fontspec = fontspec, wordbreak_ok = FALSE),
        error = function(e) e
      )
      !methods::is(res, "error")
    },
    TRUE
  )
  all(colok)
}

#' Colwidths for all columns to be forced on one page
#'
#' @param tt TableTree object to calculate column widths for
#' @param fontspec Font specification object
#' @param col_gap Column gap in spaces
#' @param rowlabel_width Width of row labels in spaces
#' @param print_width_ins Print width in inches
#' @param landscape Whether the output is in landscape orientation
#' @param lastcol_gap Whether to include a gap after the last column
#' @keywords internal
smart_colwidths_1page <- function(
    tt,
    fontspec,
    col_gap = 6L,
    rowlabel_width = inches_to_spaces(2, fontspec),
    print_width_ins = ifelse(landscape, 11, 8.5) - 2.12,
    landscape = FALSE,
    lastcol_gap = TRUE) {
  total_cpp <- floor(inches_to_spaces(print_width_ins, fontspec = fontspec, raw = TRUE))

  nc <- ncol(tt)
  remain <- total_cpp - rowlabel_width - col_gap * (nc - !lastcol_gap)

  c(rowlabel_width - col_gap, spread_integer(remain, nc))
}

spaces_to_inches <- function(spcs, fontspec) {
  nchar_ttype(" ", fontspec, raw = TRUE) * spcs
}

no_cellwrap_colwidths <- function(tt, fontspec, col_gap = 4L, label_width_ins = 2) {
  if (is.null(col_gap)) {
    col_gap <- 4L
  }
  mpf <- matrix_form(tt, TRUE, FALSE, fontspec = fontspec, col_gap = col_gap)
  strmat <- mf_strings(mpf)
  label_width_max <- inches_to_spaces(label_width_ins, fontspec)

  nchar_mat <- nchar_ttype(strmat[-seq_len(mf_nlheader(mpf)), , drop = FALSE],
    fontspec = fontspec
  )

  label_width <- min(
    label_width_max,
    max(nchar_mat[, 1, drop = TRUE])
  )
  col_maxes <- apply(nchar_mat[, -1, drop = FALSE], 2, max)
  c(label_width, col_maxes + col_gap)
}

pack_into_lines2 <- function(strs, wrdwidths = nchar_ttype(strs), colwidth, fontspec) {
  if (length(wrdwidths) == 0 || (length(wrdwidths) == 1 && wrdwidths <= colwidth)) {
    return(1)
  }
  newdev <- open_font_dev(fontspec)
  if (newdev) {
    on.exit(close_font_dev())
  }

  csums <- cumsum(wrdwidths + c(0, rep(1, length(wrdwidths) - 1)))
  oneline_wdth <- sum(wrdwidths) + length(wrdwidths) - 1
  if (colwidth >= ceiling(oneline_wdth)) {
    return(1)
  }
  lines <- 0
  wrdsused <- 0
  widthused <- 0
  index_seq <- seq_along(wrdwidths)
  totwrds <- length(wrdwidths)
  adj <- 0
  ## finite precision arithmetic is a dreamscape of infinite wonder
  tol <- sqrt(.Machine$double.eps)
  while (wrdsused < length(wrdwidths)) {
    csums_i <- csums - widthused

    fit <- which(index_seq > wrdsused & csums_i <= colwidth + tol)
    widthused <- sum(widthused, wrdwidths[fit], length(fit))
    wrdsused <- wrdsused + length(fit)
    lines <- lines + 1
  }
  lines
}

recursive_add_poss <- function(
    wlst,
    cur_lst,
    ubnd_width,
    lbnd_width,
    ubnd_lines = calc_total_lns(wlst, fontspec = fontspec, colwidth = ubnd_width)$lines,
    lbnd_lines = calc_total_lns(wlst, fontspec = fontspec, colwidth = lbnd_width)$lines,
    fontspec) {
  if (ubnd_width <= lbnd_width + 1 + sqrt(.Machine$double.eps)) {
    return(cur_lst)
  }
  curw <- floor((ubnd_width + lbnd_width) / 2)
  curlnsdf <- calc_total_lns(wlst, fontspec = fontspec, colwidth = curw)
  to_add <- list(curlnsdf)

  if (curlnsdf$lines != lbnd_lines) {
    to_add <- c(
      to_add,
      recursive_add_poss(
        wlst = wlst,
        fontspec = fontspec,
        ubnd_width = curw,
        ubnd_lines = curlnsdf$lines,
        lbnd_width = lbnd_width,
        lbnd_lines = lbnd_lines,
        cur_lst = list()
      )
    )
  }
  if (curlnsdf$lines != ubnd_lines) {
    to_add <- c(
      to_add,
      recursive_add_poss(
        wlst = wlst,
        fontspec = fontspec,
        ubnd_width = ubnd_width,
        ubnd_lines = ubnd_lines,
        lbnd_width = curw,
        lbnd_lines = curlnsdf$lines,
        cur_lst = list()
      )
    )
  }

  c(cur_lst, to_add)
}

calc_total_lns <- function(wlst, colwidth, fontspec, lns_per_pg = 50) {
  lns <- vapply(
    seq_along(wlst),
    function(i) {
      pack_into_lines2(wrdwidths = wlst[[i]], colwidth = colwidth, fontspec = fontspec)
    },
    1
  )
  nonblnks <- !vapply(wlst, function(x) length(x) == 0, TRUE)
  celllns <- lns[-1]
  cllsum <- sum(celllns)
  hdr <- lns[1]
  data.frame(
    colwidth = colwidth,
    lines = cllsum + ceiling(cllsum / lns_per_pg) * hdr,
    cell_lines = cllsum,
    lbl_lines = hdr,
    min_cell_lines = min(1, celllns[nonblnks], na.rm = TRUE),
    max_cell_lines = max(celllns)
  )
}


calc_poss_lines <- function(wlst, lbound, avail_spc, fontspec) {
  minposs <- calc_total_lns(wlst, lbound + avail_spc, fontspec)
  maxposs <- calc_total_lns(wlst, lbound, fontspec)

  retlst <- list(minposs, maxposs)

  retlst <- recursive_add_poss(
    wlst = wlst,
    ubnd_width = lbound + avail_spc,
    lbnd_width = lbound,
    cur_lst = retlst,
    fontspec = fontspec
  )
  do.call(rbind, retlst)
}


make_poss_wdf <- function(
    mpf,
    incl_header = FALSE,
    col_gap = 3,
    pg_width_ins = 8.88,
    fontspec = font_spec("Times", 9, 1.2)) {
  newdev <- open_font_dev(fontspec)
  if (newdev) {
    on.exit(close_font_dev())
  }
  if (!methods::is(mpf, "MatrixPrintForm")) {
    mpf <- matrix_form(mpf, fontspec = fontspec, col_gap = col_gap)
  }
  strs <- mf_strings(mpf)
  if (!incl_header) {
    strs <- strs[-seq_len(mf_nlheader(mpf)), ]
  }
  nc <- ncol(strs)
  nr <- nrow(strs)
  ## strip out markup so we are not over countin (by as much)
  strs <- matrix(gsub("~[{[][[:alpha:]]+ ([^]}]+)[]}]", "\\1", strs), ncol = nc, nrow = nr)

  possrows <- lapply(seq_len(ncol(strs)), function(ii) {
    res <- one_col_strs(strs[, ii, drop = TRUE], fontspec = fontspec, col_gap = col_gap)
    res$col_num <- ii
    res
  })

  possdf <- do.call(rbind, possrows)
  o <- order(possdf$col_num, possdf$colwidth)
  possdf <- possdf[o, ]
  possdf
}

#'
#' @param mpf (`listing_df` or `MatrixPrintForm` derived thereof)\cr The listing
#' calculate column widths for.
#' @param incl_header (`logical(1)`)\cr Should the constraint to not break up
#' individual words be extended to words in the column labels? Defaults to `TRUE`
#' @param col_gap (`numeric(1)`)\cr Amount of extra space (in spaces) to
#' assume between columns. Defaults to `0.5`
#' @param pg_width_ins (`numeric(1)`)\cr Number of inches in width for
#'  *the portion of the page the listing will be printed to*. Defaults to `8.88`
#'  which corresponds to landscape orientation on a standard page after margins.
#' @param fontspec (`font_spec`)\cr Defaults to Times New Roman 8pt font with 1.2 line
#' height.
#' @param verbose (`logical(1)`)\cr Should additional information messages be
#' displayed during the calculation of the column widths? Defaults to `FALSE`.
#' @returns A vector of column widths suitable to use in `tt_to_tlgrtf` and
#' other exporters.
#' @rdname def_colwidths
#' @export
listing_column_widths <- function(
    mpf,
    incl_header = TRUE,
    col_gap = 0.5,
    pg_width_ins = 8.88,
    fontspec = font_spec("Times", 8, 1.2),
    verbose = FALSE) {
  newdev <- open_font_dev(fontspec)
  if (newdev) {
    on.exit(close_font_dev())
  }
  possdf <- make_poss_wdf(
    mpf = mpf,
    incl_header = incl_header,
    col_gap = col_gap,
    fontspec = fontspec,
    pg_width_ins = pg_width_ins
  )
  optimal <- optimal_widths(
    possdf = possdf,
    tot_spaces = inches_to_spaces(pg_width_ins, fontspec = fontspec),
    verbose = verbose
  )
  optimal$colwidth
}

find_free_colspc <- function(curposs, fullposs, thresh = 0.99, skip = integer(), verbose = FALSE) {
  orig_curposs <- curposs
  maxlns_ind <- which.max(curposs$cell_lines)
  longestcol <- curposs$col_num[maxlns_ind]
  maxlns <- curposs$cell_lines[maxlns_ind]
  adjrow_inds <- setdiff(which(curposs$cell_lines < thresh * maxlns), skip)
  for (arowi in adjrow_inds) {
    col <- curposs$col_num[arowi]
    colwidthii <- curposs$colwidth[arowi]
    fp_ind <- with(fullposs, min(which(col_num == col & cell_lines <= maxlns & colwidth < colwidthii)))
    if (is.finite(fp_ind)) {
      if (verbose) {
        oldwdth <- curposs$colwidth[arowi]
        newwdth <- fullposs$colwidth[fp_ind]
        msg <- sprintf("adjusting column %d width from %d to %d", col, oldwdth, newwdth)
        message(msg)
      }
      curposs[arowi, ] <- fullposs[fp_ind, ]
    }
  }
  curposs
}

constrict_lbl_lns <- function(curdf, possdf, avail_spc, verbose = TRUE) {
  old_lbl_lns <- max(curdf$lbl_lines)
  cols_to_pack <- which(curdf$lbl_lns == old_lbl_lns)
  olddf <- curdf
  success <- TRUE
  for (ii in cols_to_pack) {
    colii <- curdf$col_num[ii]
    cwidthii <- curdf$colwidth[ii]
    possdfii <- possdf[possdf$col_num == colii & possdf$lbl_lines < old_lbl_lns, ]
    if (nrow(possdfii) == 0) {
      success <- FALSE
      break
    }
    newrow <- possdfii[ii, ]
    if (newrow$colwidth - cwidthii > avail_spc) {
      success <- FALSE
      break
    }
    ## assuming sorted
    curdf[ii, ] <- newrow
    avail_spc <- avail_spc + cwidthii - newrow$colwidth
  }

  if (verbose) {
    if (success) {
      msg <- paste(
        "overall number of label rows successfully reduced. Cols affected: ",
        paste(curdf$col_num[cols_to_pack], collapse = ", ")
      )
    } else {
      msg <- paste("Unable to reduce label rows required.")
    }
  }

  if (!success) {
    curdf <- olddf
  }
  curdf
}

optimal_widths <- function(possdf, tot_spaces = 320, max_lbl_lines = 3, verbose = FALSE) {
  odf <- order(possdf$col_num, possdf$colwidth)
  possdf <- possdf[odf, ]
  badlbl <- which(possdf$lbl_lines > max_lbl_lines)
  if (length(badlbl > 0)) {
    if (verbose) {
      message("Excluding ", length(badlbl), " column widths for labels taking over ", max_lbl_lines, " lines.")
    }
    possdf <- possdf[-badlbl, ]
    possdf <- possdf[possdf$lbl_lines <= max_lbl_lines, , drop = FALSE]
  }
  full_possdf <- possdf
  ## already ordered by colnum then width so this the first of each colwidth is the min width for that col
  dups <- duplicated(possdf$col_num)
  curdf <- possdf[!dups, ]
  possdf <- possdf[dups, ] ## without rows for ones in curdf
  spcleft <- tot_spaces - sum(curdf$colwidth)
  if (verbose) {
    message(
      "Optimizng Column Widths\n",
      "Initial lines required: ",
      max(curdf$lines),
      "\n",
      "Available adjustment: ",
      spcleft,
      " spaces\n"
    )
  }
  done <- FALSE
  while (!done) {
    oldwdths <- curdf$colwidth
    curdf <- constrict_lbl_lns(curdf, possdf, verbose = verbose)
    if (all.equal(curdf$colwidth, oldwdths)) {
      done <- TRUE
    }
  }
  change <- TRUE
  while (spcleft > 0 && change && nrow(possdf) > 0) {
    change <- FALSE
    ii <- which.max(curdf$cell_lines)
    spcleft <- tot_spaces - sum(curdf$colwidth)
    colii <- curdf$col_num[ii]
    bef_lns <- curdf$cell_lines[ii]
    bef_width <- curdf$colwidth[ii]
    nextlns <- max(curdf$cell_lines[-ii])
    cand_row_cond <- possdf$col_num == colii & possdf$cell_lines < bef_lns & possdf$colwidth - bef_width <= spcleft
    canddf <- possdf[cand_row_cond, , drop = FALSE]
    if (nrow(canddf) > 0) {
      more_than_next <- canddf$cell_lines >= nextlns
      if (any(more_than_next)) {
        candrow <- canddf[max(which(more_than_next)), ]
      } else {
        candrow <- canddf[nrow(canddf), ]
      }

      if (verbose) {
        message(
          "COL ",
          colii,
          " width: ",
          bef_width,
          "->",
          candrow$colwidth,
          " lines req: ",
          bef_lns,
          "->",
          candrow$cell_lines
        )
      }
      change <- TRUE
      curdf[ii, ] <- candrow
    }
  }
  curdf
}


one_col_strs <- function(strcol, col_gap = 2, fontspec) {
  strspls <- strsplit(strcol, split = "(-| (?=[^/]))", perl = TRUE)
  strspl_widths <- lapply(strspls, nchar_ttype, fontspec = fontspec, raw = TRUE)
  lbound_raw <- max(unlist(strspl_widths))
  lbound <- ceiling(lbound_raw + 2 * col_gap)
  ret <- calc_poss_lines(strspl_widths, lbound, 50, fontspec = fontspec)
  ret
}

## we have permission from the formatters maintainer to use this
## unexported function
j_mf_col_widths <- utils::getFromNamespace("mf_col_widths", "formatters")

#' @name def_colwidths
#'
#' @title Define Column Widths
#'
#' @description
#' `def_colwidths` uses heuristics to determine suitable column widths given a
#' table or listing, and a font.
#'
#' @param tt input Tabletree
#' @param fontspec Font specification
#' @param label_width_ins Label Width in Inches.
#' @param col_gap Column gap in spaces. Defaults to `.5` for listings and `3`
#'   for tables.
#' @param type Type of the table tree, used to determine column width calculation method.
#'
#' @details Listings are assumed to be rendered landscape on standard A1 paper,
#'   such that all columns are rendered on one page. Tables are allowed to
#'   be horizontally paginated, and column widths are determined based only on
#'   required word wrapping. See the `Automatic Column Widths` vignette for
#'   a detailed discussion of the algorithms used.
#' @return a vector of column widths (including the label row pseudo-column in the table
#'   case) suitable for use rendering `tt` in the specified font.
#' @export
#'
def_colwidths <- function(tt,
                          fontspec,
                          label_width_ins = 2,
                          col_gap = ifelse(type == "Listing", .5, 3),
                          type = tlg_type(tt)) {
  if (type == "Figure") {
    ret <- NULL
  } else if (type == "Table") {
    if (
      is.list(tt) &&
        !methods::is(tt, "MatrixPrintForm") &&
        !is.null(j_mf_col_widths(tt[[1]]))
    ) {
      ret <- j_mf_col_widths(tt[[1]])
    } else {
      ret <- no_cellwrap_colwidths(tt, fontspec, col_gap = col_gap, label_width_ins = label_width_ins)
    }
  } else {
    ret <- listing_column_widths(tt, fontspec = fontspec, col_gap = col_gap)
  }
  ret
}

#' Adding Labels To Variables For Model
#'
#' @param vars (`list`)\cr variables to use.
#' @param data (`data.frame`)\cr data to use.
#' @param x (`character`)\cr an element in vars.
#'
#' @name labels
#' @keywords internal
NULL

#' @describeIn labels checks if element in `vars` is not `NULL` and not empty.
h_is_specified <- function(x, vars) {
  !is.null(vars[[x]]) && (length(vars[[x]]) > 0)
}

#' @describeIn labels checks if element in vars is not NULL and exists in dataset.
h_is_specified_and_in_data <- function(x, vars, data) {
  h_is_specified(x, vars) && all(vars[[x]] %in% names(data))
}

#' @describeIn labels gets label for each element in vars.
h_check_and_get_label <- function(x, vars, data) {
  checkmate::assert_true(h_is_specified_and_in_data(x, vars, data))
  res <- NULL
  for (v in vars[[x]]) {
    label <- attr(data[[v]], "label")
    string <- ifelse(!is.null(label), label, v)
    res <- c(res, stats::setNames(string, v))
  }
  res
}

#' Extraction of Covariate Parts from Character Vector
#'
#' @param covariates (`character`)\cr specification in the usual way, see examples.
#'
#' @return Character vector of the covariates involved in `covariates` specification.
#' @keywords internal
h_get_covariate_parts <- function(covariates) {
  checkmate::assert_character(covariates, null.ok = TRUE)
  if (is.null(covariates)) {
    NULL
  } else {
    unique(unlist(strsplit(covariates, split = "\\*|:")))
  }
}

#' @describeIn labels returns the list of variables with labels.
h_labels <- function(vars, data) {
  checkmate::assert_list(vars)
  checkmate::assert_data_frame(data)
  labels <- list()
  labels$response <- h_check_and_get_label("response", vars, data)
  labels$id <- h_check_and_get_label("id", vars, data)
  labels$visit <- h_check_and_get_label("visit", vars, data)
  if (h_is_specified("arm", vars)) {
    labels$arm <- h_check_and_get_label("arm", vars, data)
  }
  if (h_is_specified("covariates", vars)) {
    vars$parts <- h_get_covariate_parts(vars$covariates)
    labels$parts <- h_check_and_get_label("parts", vars, data)
  }
  if (h_is_specified("weights", vars)) {
    labels$weights <- h_check_and_get_label("weights", vars, data)
  }
  return(labels)
}

#' Building Model Formula
#'
#' This builds the model formula which is used inside [fit_mmrm_j()] and provided
#' to [mmrm::mmrm()] internally. It can be instructive to look at the resulting
#' formula directly sometimes.
#'
#' @param vars (`list`)\cr variables to use in the model.
#' @param cor_struct (`string`)\cr specify the covariance structure to use.
#' @return Formula to use in [mmrm::mmrm()].
#' @export
#'
#' @examples
#' vars <- list(
#'   response = "AVAL", covariates = c("RACE", "SEX"),
#'   id = "USUBJID", arm = "ARMCD", visit = "AVISIT"
#' )
#' build_formula(vars, "auto-regressive")
#' build_formula(vars)
build_formula <- function(
    vars,
    cor_struct = c(
      "unstructured",
      "toeplitz",
      "heterogeneous toeplitz",
      "ante-dependence",
      "heterogeneous ante-dependence",
      "auto-regressive",
      "heterogeneous auto-regressive",
      "compound symmetry",
      "heterogeneous compound symmetry"
    )) {
  checkmate::assert_list(vars)
  cor_struct <- match.arg(cor_struct)
  covariates_part <- paste(vars$covariates, collapse = " + ")
  arm_visit_part <- if (is.null(vars$arm)) {
    vars$visit
  } else {
    paste0(vars$arm, "*", vars$visit)
  }
  random_effects_fun <- switch(cor_struct,
    unstructured = "us",
    toeplitz = "toep",
    `heterogeneous toeplitz` = "toeph",
    `ante-dependence` = "ad",
    `heterogeneous ante-dependence` = "adh",
    `auto-regressive` = "ar1",
    `heterogeneous auto-regressive` = "ar1h",
    `compound symmetry` = "cs",
    `heterogeneous compound symmetry` = "csh"
  )
  random_effects_part <- paste0(random_effects_fun, "(", vars$visit, " | ", vars$id, ")")
  rhs_formula <- paste(arm_visit_part, "+", random_effects_part)
  if (covariates_part != "") {
    rhs_formula <- paste(covariates_part, "+", rhs_formula)
  }
  stats::as.formula(paste(vars$response, "~", rhs_formula))
}

#' Extract Least Square Means from `MMRM`
#'
#' Extracts the least square means from an `MMRM` fit.
#'
#' @param fit (`mmrm`)\cr result of [mmrm::mmrm()].
#' @inheritParams fit_mmrm_j
#' @param averages (`list`)\cr named list of visit levels which should be averaged
#'   and reported along side the single visits.
#' @param weights (`string`)\cr type of weights to be used for the least square means,
#'   see [emmeans::emmeans()] for details.
#' @return A list with data frames `estimates` and `contrasts`.
#'   The attributes `averages` and `weights` save the settings used.
#'
#' @export
get_mmrm_lsmeans <- function(fit, vars, conf_level, weights, averages = list()) {
  checkmate::assert_class(fit, "mmrm")
  checkmate::assert_list(averages, types = "character")
  emmeans_res <- h_get_emmeans_res(fit, vars, weights)

  # Get least square means estimates for single visits, and possibly averaged visits.
  estimates <- h_get_single_visit_estimates(emmeans_res, conf_level)
  if (length(averages)) {
    average_specs <- h_get_average_visit_specs(emmeans_res, vars, averages, fit)
    average_estimates <- h_get_spec_visit_estimates(emmeans_res, average_specs, conf_level)
    estimates <- rbind(estimates, average_estimates)
  }
  has_arm_var <- !is.null(vars$arm)
  if (!has_arm_var) {
    return(list(estimates = estimates))
  }
  # Continue with contrasts when we have an arm variable.
  contrast_specs <- h_single_visit_contrast_specs(emmeans_res, vars)
  contrast_estimates <- h_get_spec_visit_estimates(emmeans_res, contrast_specs, conf_level, tests = TRUE)
  if (length(averages)) {
    average_contrast_specs <- h_average_visit_contrast_specs(contrast_specs, averages)
    average_contrasts <- h_get_spec_visit_estimates(emmeans_res, average_contrast_specs, conf_level, tests = TRUE)
    contrast_estimates <- rbind(contrast_estimates, average_contrasts)
  }

  relative_reduc_df <- h_get_relative_reduc_df(estimates, vars)
  contrast_estimates <- merge(contrast_estimates, relative_reduc_df, by = c(vars$arm, vars$visit), sort = FALSE)
  contrast_estimates[[vars$arm]] <- factor(contrast_estimates[[vars$arm]])
  contrast_estimates[[vars$visit]] <- factor(contrast_estimates[[vars$visit]])
  structure(list(estimates = estimates, contrasts = contrast_estimates), averages = averages, weights = weights)
}

#' `MMRM` Analysis
#'
#' Does the `MMRM` analysis. Multiple other functions can be called on the result to produce
#' tables and graphs.
#'
#' @param vars (named `list` of `string` or `character`)\cr specifying the variables in the `MMRM`.
#'   The following elements need to be included as character vectors and match corresponding columns
#'   in `data`:
#'
#'   - `response`: the response variable.
#'   - `covariates`: the additional covariate terms (might also include interactions).
#'   - `id`: the subject ID variable.
#'   - `arm`: the treatment group variable (factor).
#'   - `visit`: the visit variable (factor).
#'   - `weights`: optional weights variable (if `NULL` or omitted then no weights will be used).
#'
#'   Note that the main effects and interaction of `arm` and `visit` are by default
#'   included in the model.
#' @param data (`data.frame`)\cr with all the variables specified in
#'   `vars`. Records with missing values in any independent variables
#'   will be excluded.
#' @param conf_level (`proportion`)\cr confidence level of the interval.
#' @param cor_struct (`string`)\cr specifying the covariance structure, defaults to
#'   `'unstructured'`. See the details.
#' @param averages_emmeans (`list`)\cr optional named list of visit levels which should be averaged
#'   and reported along side the single visits.
#' @param weights_emmeans (`string`)\cr argument from [emmeans::emmeans()], `'counterfactual'` by default.
#' @param ... additional arguments for [mmrm::mmrm()], in particular `reml` and options listed in
#'   [mmrm::mmrm_control()].
#'
#' @details Multiple different degree of freedom adjustments are available via the `method` argument
#'   for [mmrm::mmrm()]. In addition, covariance matrix adjustments are available via `vcov`.
#'   Please see [mmrm::mmrm_control()] for details and additional useful options.
#'
#'   For the covariance structure (`cor_struct`), the user can choose among the following options.
#'
#'   - `unstructured`: Unstructured covariance matrix. This is the most flexible choice and default.
#'        If there are `T` visits, then `T * (T+1) / 2` variance parameters are used.
#'   - `toeplitz`: Homogeneous Toeplitz covariance matrix, which uses `T` variance parameters.
#'   - `heterogeneous toeplitz`: Heterogeneous Toeplitz covariance matrix,
#'        which uses `2 * T - 1` variance parameters.
#'   - `ante-dependence`: Homogeneous Ante-Dependence covariance matrix, which uses `T` variance parameters.
#'   - `heterogeneous ante-dependence`: Heterogeneous Ante-Dependence covariance matrix,
#'        which uses `2 * T - 1` variance parameters.
#'   - `auto-regressive`: Homogeneous Auto-Regressive (order 1) covariance matrix,
#'        which uses 2 variance parameters.
#'   - `heterogeneous auto-regressive`: Heterogeneous Auto-Regressive (order 1) covariance matrix,
#'        which uses `T + 1` variance parameters.
#'   - `compound symmetry`: Homogeneous Compound Symmetry covariance matrix, which uses 2
#'        variance parameters.
#'   - `heterogeneous compound symmetry`: Heterogeneous Compound Symmetry covariance matrix, which uses
#'        `T + 1` variance parameters.
#'
#' @return A `tern_model` object which is a list with model results:
#'
#'   - `fit`: The `mmrm` object which was fitted to the data. Note that via `mmrm::component(fit, 'optimizer')`
#'       the finally used optimization algorithm can be obtained, which can be useful for refitting the model
#'       later on.
#'   - `cov_estimate`: The matrix with the covariance matrix estimate.
#'   - `diagnostics`: A list with model diagnostic statistics (REML criterion, AIC, corrected AIC, BIC).
#'   - `lsmeans`: This is a list with data frames `estimates` and `contrasts`.
#'        The attributes `averages` and `weights` save the settings used
#'        (`averages_emmeans` and `weights_emmeans`).
#'   - `vars`: The variable list.
#'   - `labels`: Corresponding list with variable labels extracted from `data`.
#'   - `cor_struct`: input.
#'   - `ref_level`: The reference level for the arm variable, which is always the first level.
#'   - `treatment_levels`: The treatment levels for the arm variable.
#'   - `conf_level`: The confidence level which was used to construct the `lsmeans` confidence intervals.
#'   - `additional`: List with any additional inputs passed via `...`
#'
#' @export
#'
#' @note This function has the `_j` suffix to distinguish it from [mmrm::fit_mmrm()].
#'   It is a copy from the `tern.mmrm` package and later will be replaced by tern.mmrm::fit_mmrm().
#'   No new features are included in this function here.
#'
#' @examples
#' mmrm_results <- fit_mmrm_j(
#'   vars = list(
#'     response = "FEV1",
#'     covariates = c("RACE", "SEX"),
#'     id = "USUBJID",
#'     arm = "ARMCD",
#'     visit = "AVISIT"
#'   ),
#'   data = mmrm::fev_data,
#'   cor_struct = "unstructured",
#'   weights_emmeans = "equal",
#'   averages_emmeans = list(
#'     "VIS1+2" = c("VIS1", "VIS2")
#'   )
#' )
fit_mmrm_j <- function(
    vars = list(response = "AVAL", covariates = c(), id = "USUBJID", arm = "ARM", visit = "AVISIT"),
    data,
    conf_level = 0.95,
    cor_struct = "unstructured",
    weights_emmeans = "counterfactual",
    averages_emmeans = list(),
    ...) {
  labels <- h_labels(vars, data)
  formula <- build_formula(vars, cor_struct)
  weights <- if (!is.null(vars$weights)) data[[vars$weights]] else NULL

  fit <- mmrm::mmrm(formula = formula, data = data, weights = weights, reml = TRUE, ...)
  lsmeans <- get_mmrm_lsmeans(
    fit = fit,
    vars = vars,
    conf_level = conf_level,
    averages = averages_emmeans,
    weights = weights_emmeans
  )
  cov_estimate <- mmrm::VarCorr(fit)
  visit_levels <- rownames(cov_estimate)
  contrasts_from_visits <- match(visit_levels, lsmeans$contrasts[[vars$visit]])
  df <- stats::setNames(lsmeans$contrasts$df[contrasts_from_visits], visit_levels)
  results <- list(
    fit = fit,
    cov_estimate = cov_estimate,
    lsmeans = lsmeans,
    vars = vars,
    labels = labels,
    mse = diag(cov_estimate),
    df = df,
    cor_struct = cor_struct,
    ref_level = if (is.null(vars$arm)) NULL else levels(data[[vars$arm]])[1],
    treatment_levels = if (is.null(vars$arm)) NULL else levels(data[[vars$arm]])[-1],
    conf_level = conf_level,
    additional = list(...)
  )
  class(results) <- "tern_model"
  return(results)
}

s_summarize_desc_j <- function(df, .var, .ref_group, .in_ref_col, control = control_analyze_vars()) {
  x <- df[[.var]]
  y1 <- s_summary(x)
  y2 <- NULL

  # diff in means versus control group, based upon 2 sample t.test
  y2$mean_diffci <- numeric()
  if (!is.null(.ref_group) && !.in_ref_col) {
    x1 <- df[[.var]]
    x2 <- .ref_group[[.var]]

    x1 <- x1[!is.na(x1)]
    x2 <- x2[!is.na(x2)]

    if ((length(x1) > 1 && length(x2) > 1)) {
      ttest_stat <- stats::t.test(x1, x2, conf.level = control$conf_level)

      stat <- ttest_stat[c("estimate", "conf.int")]
      stat$diff <- stat$estimate[1] - stat$estimate[2]
      stat <- c(stat$diff, stat$conf.int)

      y2$mean_diffci <- with_label(
        c(mean_diffci = stat),
        paste("Difference in Mean + ", f_conf_level(control$conf_level))
      )
    } else {
      y2b <- s_summary(.ref_group[[.var]])
      diff <- y1[["mean"]] - y2b[["mean"]]
      stat <- c(diff, NA, NA)

      y2$mean_diffci <- with_label(
        c(mean_diffci = stat),
        paste("Difference in Mean + ", f_conf_level(control$conf_level))
      )
    }
  }
  y <- c(y1, y2)

  return(y)
}

s_aval_chg_col1 <- function(df, .var, denom, .N_col, id, indatavar) {
  ## First column AVAL - show n/N (%)
  mystat <- "n"

  if (!is.null(indatavar)) {
    df <- subset(df, !is.na(df[[indatavar]]))
  }

  x <- df[[.var]]

  x_stats <- s_summary(x)
  x_stats <- x_stats[mystat]

  ### Ndenom derivation in case denom = N
  Ndenom <- .N_col
  if (denom == "N") {
    ### as our input dataset has ensured we have unique subjects we can just use the length of x here still safer
    ### to use id variable
    nsub <- length(unique(df[[id]]))
    Ndenom <- nsub
  }

  count_denom_frac <- c(x_stats$n, Ndenom, x_stats$n / Ndenom)
  names(count_denom_frac) <- c("n", "N", "fraction")

  count_frac <- count_denom_frac[c("n", "fraction")]
  count <- count_denom_frac[c("n")]

  y <- list()
  y$count_denom_frac <- count_denom_frac
  y$count_frac <- count_frac
  y$count <- count

  return(y)
}

s_aval_chg_col23_diff <- function(
    df,
    .var,
    .df_row,
    .ref_group,
    .in_ref_col,
    ancova,
    interaction_y,
    interaction_item,
    conf_level,
    variables,
    trt_var,
    ctrl_grp,
    cur_param,
    cur_lvl) {
  .df_row <- subset(.df_row, !is.na(.df_row[[.var]]))
  df <- subset(df, !is.na(df[[.var]]))
  .ref_group <- subset(.ref_group, !is.na(.ref_group[[.var]]))

  if (nrow(.df_row) == 0) {
    #### this is only when input row no non-missing records in any of the columns expected to occur for baseline
    #### timepoint for analysis variable change only here we want a blank cell, not a cell with all NA's NULL is
    #### generating a blank cell
    x_stats <- NULL
    mystat1 <- c("mean_ci_3d", "mean_diffci")
  } else if (!ancova) {
    mystat1 <- c("mean_ci_3d", "mean_diffci")

    control <- control_analyze_vars()
    control$conf_level <- conf_level
    x_stats <- s_summarize_desc_j(
      df = df,
      .var = .var,
      .ref_group = .ref_group,
      .in_ref_col = .in_ref_col,
      control = control
    )
  } else {
    mystat1 <- c("lsmean_ci", "lsmean_diffci")

    ### sparse data problems with underlying ancova function 1/ if nrow(.df_row) = 0 NULL (blank columns)

    ### 2/ if nrow(df) = 0 no ancova - lsmean and lsmean diff should be na

    ### 3/ if nrow(df) > 0, & nrow(.ref_group) = 0 ancova for ls mean only, NULL .ref_group -- lsmean diff should
    ### be na

    ### 4/ if nrow(df) > 0, & nrow(.ref_group) > 0 and at least one group with 0 data update levels in
    ### .df_row/df/.ref_group to avoid problems for contrast

    #### by making updates to .df_row/df/.ref_group, situation 3 and 4 could be used together with general case

    .df_row_trtlevels <- unique(.df_row[[trt_var]])

    if (NROW(.df_row_trtlevels) == 0) {
      ### No data at all
      x_stats <- NULL
    } else if (NROW(df) == 0 || NROW(.df_row_trtlevels) == 1) {
      ### current column no data/less than 2 treatment levels

      x_stats <- list()
      x_stats[[mystat1[1]]] <- rep(NA, 3)
      x_stats[[mystat1[2]]] <- rep(NA, 3)
    } else {
      ### ancova situation, and some updates to prevent problems with sparse data
      if ((!ctrl_grp %in% .df_row_trtlevels)) {
        # LS means for current group can be estimated from model, but not difference in LS means set .ref_group
        # to NULL to proceed with s_summarize_ancova_j function update to underlying s_ancova_j to cover NULL
        # ref_group in call
        .ref_group <- NULL
      }
      if (NROW(.df_row_trtlevels) < length(levels(.df_row[[trt_var]]))) {
        # missing levels need to be removed from the factor, in order to have the correct estimates, and avoid
        # errors with underlying tern:::s_ancova function
        .df_row[[trt_var]] <- droplevels(.df_row[[trt_var]])
        df[[trt_var]] <- factor(as.character(df[[trt_var]]), levels = levels(.df_row[[trt_var]]))
        .ref_group[[trt_var]] <- factor(as.character(.ref_group[[trt_var]]), levels = levels(.df_row[[trt_var]]))
      }

      x_stats <- s_summarize_ancova_j(
        df = df,
        .var = .var,
        .ref_group = .ref_group,
        .in_ref_col = .in_ref_col,
        .df_row = .df_row,
        conf_level = conf_level,
        interaction_y = interaction_y,
        interaction_item = interaction_item,
        variables = variables
      )
    }
  }

  y <- list(mean_ci_3d = x_stats[[mystat1[1]]], meandiff_ci_3d = x_stats[[mystat1[2]]])
  return(y)
}


xxd_to_xx <- function(str, d = 0) {
  checkmate::assert_integerish(d, null.ok = TRUE)
  if (checkmate::test_list(str, null.ok = FALSE)) {
    checkmate::assert_list(str, null.ok = FALSE)
    # Or it may be a vector of characters
  } else {
    checkmate::assert_character(str, null.ok = FALSE)
  }

  nmstr <- names(str)

  if (any(grepl("xx.d", str, fixed = TRUE))) {
    checkmate::assert_integerish(d)
    str <- gsub("xx.d", paste0("xx.", strrep("x", times = d)), str, fixed = TRUE)
  }
  str <- stats::setNames(str, nmstr)
  return(str)
}

format_xxd <- function(str, d = 0, .df_row, formatting_fun = NULL) {
  # Handling of data precision
  if (!is.numeric(d)) {
    if (is.character(d) && length(d) == 1) {
      # check if d is a variable name available in .df_row
      if (d %in% names(.df_row)) {
        d <- max(.df_row[[d]], na.rm = TRUE)
      } else {
        message(paste("precision has been reset to d = 0, as variable", d, "not present on input"))
        d <- 0
      }
    }
  }
  # convert xxd type of string to xx
  fmt <- xxd_to_xx(str = str, d = d)

  if (!is.null(formatting_fun)) {
    fmt <- formatting_fun(fmt)
  }

  return(fmt)
}

#' @name a_summarize_aval_chg_diff_j
#'
#' @title Analysis function 3-column presentation
#'
#' @inherit proposal_argument_convention
#'
#' @description Analysis functions to produce a 1-row summary presented in
#' a 3-column layout in the columns: column 1: N, column 2: Value, column 3: change\cr
#' In the difference columns, only 1 column will be presented : difference + CI\cr
#' When ancova = `TRUE`, the presented statistics will be based on ANCOVA method (`s_summarize_ancova_j`).\cr
#' mean and ci (both for Value (column 2) and Chg (column 3)) using statistic `lsmean_ci`\cr
#' mean and ci for the difference column are based on same ANCOVA model using statistic `lsmean_diffci`\cr
#' When ancova = `FALSE`, descriptive statistics will be used instead.\cr
#' In the difference column, the 2-sample t-test will be used.
#'
#' @details See Description
#'
#' @inheritParams proposal_argument_convention
#' @param denom (`string`)\cr choice of denominator for proportions. Options are:
#'   * `N`: number of records in this column/row split.
#' \cr There is no check in place that the current split only has one record per subject.
#' Users should be careful with this.
#'   * `.N_col`: number of records in this column intersection (based on alt_counts_df dataset)
#'   \cr (when alt_counts_df is a single record per subjects, this will match number of subjects)
#'
#' @param d (default = 1) \cr choice of Decimal precision.
#' Note that one extra precision will be added, as means are presented.
#'  \cr Options are:
#'   * numerical(1)
#'   * variable name containing information on the precision, this variable
#'   should be available on input dataset. The content of this variable should
#'   then be an integer.
#'
#' @param ancova (`logical`)\cr If FALSE, only descriptive methods will be used. \cr
#' If TRUE Ancova methods will be used for each of the columns : AVAL, CHG, DIFF. \cr
#' @param comp_btw_group (`logical`)\cr If TRUE,
#' \cr When ancova = FALSE, the estimate of between group difference (on CHG)
#' will be based upon a two-sample t-test. \cr
#' \cr When ancova = TRUE, the same ancova model will be used for the estimate of between group difference (on CHG).
#'
#' @param interaction_y (`character`)\cr Will be passed onto the `tern` function `s_ancova`, when ancova = TRUE.
#' @param interaction_item (`character`)\cr Will be passed onto the `tern` function `s_ancova`, when ancova = TRUE.
#' @param conf_level (`proportion`)\cr Confidence level of the interval
#' @param variables (named list of strings)\cr
#' list of additional analysis variables, with expected elements:
#'    * arm (string)\cr
#' group variable, for which the covariate adjusted means of multiple groups will be summarized.
#' Specifically, the first level of arm variable is taken as the reference group.
#'    * covariates (character)\cr
#' a vector that can contain single variable names (such as 'X1'), and/or interaction terms indicated by 'X1 * X2'.
#'
#'
#' @param format_na_str (`string`)\cr
#'
#' @param indatavar (`string`)\cr If not null, variable name to extra subset
#' incoming df to non-missing values of this variable.
#' @param multivars (`string(3)`)\cr Variables names to use in 3-col layout.
#'
#' @param .stats  (named `list`)\cr column statistics to select for the table.
#' The following column names are to be used: `col1`, `col23`, `coldiff`.\cr
#' For `col1`, the following stats can be specified.\cr
#' For `col23`, only `mean_ci_3d` is available. When ancova=`TRUE` these are LS Means, otherwise, arithmetic means.\cr
#' For `coldiff`, only `meandiff_ci_3d` is available. When ancova=`TRUE` these
#' are LS difference in means, otherwise, difference in means based upon 2-sample t-test.\cr
#' @param .formats (named `list`)\cr formats for the column statistics. `xx.d` style formats can be used.
#' @param .formats_fun (named `list`)\cr formatting functions for the column
#' statistics, to be applied after the conversion of `xx.d` style to the
#' appropriate precision.
#'
#' @rdname a_summarize_aval_chg_diff_j
#' @return A function that can be used in an analyze function call
#' @export
#'
#'
#' @examples
#'
#' library(dplyr)
#'
#' ADEG <- data.frame(
#'   STUDYID = c(
#'     "DUMMY", "DUMMY", "DUMMY", "DUMMY", "DUMMY",
#'     "DUMMY", "DUMMY", "DUMMY", "DUMMY", "DUMMY"
#'   ),
#'   USUBJID = c(
#'     "XXXXX01", "XXXXX02", "XXXXX03", "XXXXX04", "XXXXX05",
#'     "XXXXX06", "XXXXX07", "XXXXX08", "XXXXX09", "XXXXX10"
#'   ),
#'   TRT01A = c(
#'     "ARMA", "ARMA", "ARMA", "ARMA", "ARMA", "Placebo",
#'     "Placebo", "Placebo", "ARMA", "ARMA"
#'   ),
#'   PARAM = c("BP", "BP", "BP", "BP", "BP", "BP", "BP", "BP", "BP", "BP"),
#'   AVISIT = c(
#'     "Visit 1", "Visit 1", "Visit 1", "Visit 1", "Visit 1",
#'     "Visit 1", "Visit 1", "Visit 1", "Visit 1", "Visit 1"
#'   ),
#'   AVAL = c(56, 78, 67, 87, 88, 93, 39, 87, 65, 55),
#'   CHG = c(2, 3, -1, 9, -2, 0, 6, -2, 5, 2)
#' )
#'
#' ADEG <- ADEG |>
#'   mutate(
#'     TRT01A = as.factor(TRT01A),
#'     STUDYID = as.factor(STUDYID)
#'   )
#'
#' ADEG$colspan_trt <- factor(ifelse(ADEG$TRT01A == "Placebo", " ", "Active Study Agent"),
#'   levels = c("Active Study Agent", " ")
#' )
#' ADEG$rrisk_header <- "Risk Difference (%) (95% CI)"
#' ADEG$rrisk_label <- paste(ADEG$TRT01A, paste("vs", "Placebo"))
#'
#' colspan_trt_map <- create_colspan_map(ADEG,
#'   non_active_grp = "Placebo",
#'   non_active_grp_span_lbl = " ",
#'   active_grp_span_lbl = "Active Study Agent",
#'   colspan_var = "colspan_trt",
#'   trt_var = "TRT01A"
#' )
#' ref_path <- c("colspan_trt", " ", "TRT01A", "Placebo")
#'
#' lyt <- basic_table() |>
#'   split_cols_by(
#'     "colspan_trt",
#'     split_fun = trim_levels_to_map(map = colspan_trt_map)
#'   ) |>
#'   split_cols_by("TRT01A") |>
#'   split_rows_by(
#'     "PARAM",
#'     label_pos = "topleft",
#'     split_label = "Blood Pressure",
#'     section_div = " ",
#'     split_fun = drop_split_levels
#'   ) |>
#'   split_rows_by(
#'     "AVISIT",
#'     label_pos = "topleft",
#'     split_label = "Study Visit",
#'     split_fun = drop_split_levels,
#'     child_labels = "hidden"
#'   ) |>
#'   split_cols_by_multivar(
#'     c("AVAL", "AVAL", "CHG"),
#'     varlabels = c("n/N (%)", "Mean (CI)", "CFB (CI)")
#'   ) |>
#'   split_cols_by("rrisk_header", nested = FALSE) |>
#'   split_cols_by(
#'     "TRT01A",
#'     split_fun = remove_split_levels("Placebo"),
#'     labels_var = "rrisk_label"
#'   ) |>
#'   split_cols_by_multivar(c("CHG"), varlabels = c(" ")) |>
#'   analyze("STUDYID",
#'     afun = a_summarize_aval_chg_diff_j,
#'     extra_args = list(
#'       format_na_str = "-", d = 0,
#'       ref_path = ref_path, variables = list(arm = "TRT01A", covariates = NULL)
#'     )
#'   )
#'
#' result <- build_table(lyt, ADEG)
#'
#' result
#' @seealso s_summarize_ancova_j
#' @family Inclusion of ANCOVA Functions
a_summarize_aval_chg_diff_j <- function(
    df,
    .df_row,
    .spl_context,
    ancova = FALSE,
    comp_btw_group = TRUE,
    ref_path = NULL,
    .N_col,
    denom = c("N", ".N_col"),
    indatavar = NULL,
    d = 0,
    id = "USUBJID",
    interaction_y = FALSE,
    interaction_item = NULL,
    conf_level = 0.95,
    variables = list(arm = "TRT01A", covariates = NULL),
    format_na_str = "",
    .stats = list(col1 = "count_denom_frac", col23 = "mean_ci_3d", coldiff = "meandiff_ci_3d"),
    .formats = list(col1 = NULL, col23 = "xx.dx (xx.dx, xx.dx)", coldiff = "xx.dx (xx.dx, xx.dx)"),
    .formats_fun = list(col1 = jjcsformat_count_denom_fraction, col23 = jjcsformat_xx, coldiff = jjcsformat_xx),
    multivars = c("AVAL", "AVAL", "CHG")) {
  denom <- match.arg(denom)

  if (comp_btw_group && is.null(ref_path)) {
    stop("ref_path cannot be NULL, please specify it. See ?get_ref_info for details.")
  }

  if (!(length(multivars) == 3)) {
    stop("argument multivars must be of length 3.")
  }

  if (!(all(multivars %in% names(df)))) {
    stop("all variables specified in multivars must be available on input dataset df.")
  }

  if (multivars[1] == multivars[2]) {
    multivars[2] <- paste0(multivars[2], "._[[2]]_.")
  }

  if (
    !is.list(.stats) ||
      !all(c("col1", "col23", "coldiff") %in% names(.stats)) ||
      !is.list(.formats) ||
      !all(c("col1", "col23", "coldiff") %in% names(.formats)) ||
      !is.list(.formats_fun) ||
      !all(c("col1", "col23", "coldiff") %in% names(.formats_fun))
  ) {
    stop(paste(
      "Issue a_summarize_aval_chg_diff_j \n.stats/.formats/.formats_fun must",
      "be a list with names c(\"col1\", \"col23\", \"coldiff\")."
    ))
  }

  row_vars <- .spl_context$split
  row_val <- .spl_context$cur_col_split_val

  col_split <- .spl_context$cur_col_split

  ## treatment group value/variable KEY assumption : treatment variable is the last split_col prior to
  ## split_cols_by_multivar
  colvars <- col_split[[NROW(.spl_context)]]
  colvars_multivars <- which(colvars == "multivars")
  if (identical(colvars_multivars, integer(0))) {
    stop("Layout must at least contain a split_cols_by_multivar call.")
  }
  if (colvars_multivars == 1) {
    stop("Layout must at least contain a split_cols_by prior to split_cols_by_multivar call.")
  }
  ## KEY assumption : treatment variable is the last split_col prior to split_cols_by_multivar
  trt_var <- .spl_context$cur_col_split[[NROW(.spl_context)]][colvars_multivars - 1]
  trt_val <- .spl_context$cur_col_split_val[[NROW(.spl_context)]][colvars_multivars - 1]

  mysplitlevel <- length(.spl_context$split)
  cur_lvl <- .spl_context$value[[mysplitlevel]]
  cur_col_id <- .spl_context$cur_col_id[[mysplitlevel]]

  cur_param <- .spl_context$value[[mysplitlevel - 1]]

  indiffcol <- grepl("difference", tolower(cur_col_id), fixed = TRUE)

  # Early return if we're not comparing between groups but we're in a difference column
  if (!comp_btw_group && indiffcol) {
    return(rcell(NULL, format = NULL, label = cur_lvl, format_na_str = format_na_str))
  }

  ## variable that is analyzed
  last_val <- utils::tail(.spl_context$cur_col_split_val[[NROW(.spl_context)]], 1)

  # as AVAL will be utilized twice, the second call results in last_val == 'AVAL._[[2]]_.'  take the first part of it
  # in flast_val (will become AVAL) flast_val <- stringr::str_split_1(last_val, pattern = '._')[1]
  flast_val <- unlist(strsplit(last_val, "._"))[1]

  x_stats <- NULL
  fmt_d <- NULL

  .in_ref_col <- FALSE
  .ref_group <- NULL
  if (comp_btw_group) {
    trt_var_refspec <- utils::tail(ref_path, n = 2)[1]
    checkmate::assert_true(identical(trt_var, trt_var_refspec))
    # ctrl_grp
    ctrl_grp <- utils::tail(ref_path, n = 1)

    ### check that ctrl_grp is a level of the treatment variable, in case riskdiff is requested
    if (!ctrl_grp %in% levels(df[[trt_var]])) {
      stop(paste0(
        "control group specification in ref_path argument (",
        ctrl_grp,
        ") is not a level of your treatment group variable (",
        trt_var,
        ")."
      ))
    }

    if (trt_val == ctrl_grp) .in_ref_col <- TRUE

    .ref_group <- .df_row[.df_row[[trt_var]] == ctrl_grp, ]
  }

  if (last_val == multivars[1] && !indiffcol) {
    mystat <- .stats[["col1"]]
    if (is.null(.formats[["col1"]])) {
      fmt <- .formats_fun[["col1"]]
    } else {
      fmt <- .formats[["col1"]]
    }

    x_stats <- s_aval_chg_col1(
      df = df,
      .var = flast_val,
      denom = denom,
      .N_col = .N_col,
      id = id,
      indatavar = indatavar
    )
  } else if (last_val %in% multivars[2:3]) {
    x_stats <- s_aval_chg_col23_diff(
      df = df,
      .var = flast_val,
      .df_row = .df_row,
      .ref_group = .ref_group,
      .in_ref_col = .in_ref_col,
      ancova = ancova,
      interaction_y = interaction_y,
      interaction_item = interaction_item,
      conf_level = conf_level,
      variables = variables,
      trt_var = trt_var,
      ctrl_grp = ctrl_grp,
      cur_param = cur_param,
      cur_lvl = cur_lvl
    )

    if (comp_btw_group && indiffcol) {
      mystat1 <- "coldiff"
    } else {
      mystat1 <- "col23"
    }
    mystat <- .stats[[mystat1]]

    fmt_d <- .formats[[mystat1]]
    formatting_fun <- .formats_fun[[mystat1]]

    fmt <- format_xxd(fmt_d, d = d, .df_row = .df_row, formatting_fun = formatting_fun)
  }
  x_stats <- x_stats[[mystat]]

  ##
  ret <- rcell(x_stats, format = fmt, label = cur_lvl, format_na_str = format_na_str)

  return(ret)
}

#' Non-blank Sentinel
#'
#' @keywords internal
non_blank_sentinel <- structure("", class = "non_blank_sentinel")

#' Get Control Subset
#'
#' Retrieves a subset of the DataFrame based on treatment variable and control group.
#'
#' @param df Data frame to subset.
#' @param trt_var Treatment variable name.
#' @param ctrl_grp Control group value.
#' @return Subset of the data frame.
#' @keywords internal
get_ctrl_subset <- function(df, trt_var, ctrl_grp) {
  df[df[[trt_var]] == ctrl_grp, ]
}


# sfunction to perform counting of records or subjects on an incoming df and .alt_df

#' Null Function
#'
#' A function that returns NULL.
#'
#' @return NULL
#' @keywords internal
null_fn <- function(...) {
  NULL
}


#' Create Alternative Data Frame
#'
#' Creates an alternative data frame based on the current split context.
#'
#' @param .spl_context Current split context.
#' @param .df_row Current data frame row.
#' @param denomdf Denominator data frame.
#' @param denom_by Denominator grouping variable.
#' @param id Identifier variable.
#' @param variables Variables to include in the analysis.
#' @param denom Denominator type.
#' @return Grand parent dataset.
#' @export
h_create_altdf <- function(.spl_context, .df_row, denomdf, denom_by = NULL, id, variables, denom) {
  ### parent df in the current row-split (all col splits are still in)
  pardf <- .spl_context$full_parent_df[[NROW(.spl_context)]]

  ## if no denomdf defined, use input if you want to use alt_source df, you better pass on the same dataframe as
  ## denomdf in the function call

  colsplit <- .spl_context$cur_col_split[[1]]
  if (length(colsplit) == 1 && tolower(colsplit) == "total") {
    colsplit <- NULL
  }

  if (is.null(denomdf) || denom %in% c("n_df", "n_rowdf")) {
    #### once we have the rtables version >= 0.6.12 -- is.null(denomdf) only will happen when build_table has been
    #### used without alt_counts_df argument denom options c('N_col', 'n_df', 'n_altdf', 'N_colgroup', 'n_rowdf',
    #### 'n_parentdf') note that n_altdf always has a non-null denomdf N_colgroup and n_parentdf will be handled
    #### separately n_parentdf in h_denom_parentdf N_colgroup with h_colexpr_substr

    denomdf <- unique(.df_row[, c(id, variables$strata, colsplit, denom_by), drop = FALSE])
  }

  # grand parent dataset starts of from denomdf -- not yet rowsplits this is for the risk diff columns to include all
  # subjects to start from
  gpardf <- denomdf

  nm_gpardf <- names(gpardf)

  sbgrpvar <- intersect(.spl_context$split, nm_gpardf)

  cursbgrp_value <- NULL

  if (!is.null(denom_by) && length(sbgrpvar) > 0) {
    ### assumption: the subgroup is the first variable in the row-split if this assumption is not valid ---
    ### re-design and let user pass in the subgroup variable
    sbgrpvar <- denom_by[1]
    cur_index <- which(.spl_context$split == sbgrpvar)
    if (length(cur_index) > 0) {
      cursbgrp_value <- .spl_context$value[[cur_index]]

      gpardf <- subset(denomdf, eval(denomdf[[sbgrpvar]] == cursbgrp_value))
    }
    if (length(cur_index) == 0) {
      # cat(paste('sbgrpvar', sbgrpvar)) cat(paste( unique(.spl_context$value), sep = ', '))
    }
  }

  ## note this is a rowsplit only, not yet column split
  return(gpardf)
}

#' No Data to Report String
#'
#' A constant string used when there is no data to display in a table.
#' This is used as a placeholder in tables when no data is available for a particular category.
#'
#' @return A character string with the value "No data to report".
#'
#' @export
#' @keywords internal
no_data_to_report_str <- "No data to report"


#' Update Factor
#'
#' Updates a factor variable in a data frame based on specified values.
#'
#' @param df Data frame containing the variable to update.
#' @param .var Variable name to update.
#' @param val Values to keep.
#' @param excl_levels Levels to exclude from the factor.
#' @return Updated data frame.
#' @export
h_update_factor <- function(df, .var, val = NULL, excl_levels = NULL) {
  if (!is.factor(df[[.var]]) || (is.null(val) && is.null(excl_levels))) {
    return(df)
  }

  if ((!is.null(val) && !is.null(excl_levels))) {
    stop("update_factor cannot be used with both val and excl_levels specified.")
  }

  all_levels <- levels(df[[.var]])

  if (!is.null(val)) {
    exclude_levels <- all_levels[!(all_levels %in% val)]
    remaining_levels <- setdiff(all_levels, exclude_levels)
  }
  if (!is.null(excl_levels)) {
    exclude_levels <- all_levels[all_levels %in% excl_levels]
    remaining_levels <- setdiff(all_levels, exclude_levels)
  }

  ## introduce level No data to report
  if (length(remaining_levels) == 0) {
    remaining_levels <- no_data_to_report_str
  }
  df[[.var]] <- factor(as.character(df[[.var]]), levels = remaining_levels)

  return(df)
}


#' Extract Substring from Column Expression
#'
#' Retrieves the substring from a column expression related to a variable component.
#'
#' get substring from col_expr related to var component
#' intended usage is on strings coming from .spl_context$cur_col_expr
#' these strings are of type '!(is.na(var) & var %in% 'xxx') & !(is.na(var2) & var2 %in% 'xxx')'
#'
#' @param var Variable to extract from the expression.
#' @param col_expr Column expression string.
#' @return Substring corresponding to the variable.
#' @export
h_colexpr_substr <- function(var, col_expr) {
  # reconstructing the strings is not an option as doesn't work for combined columns facets
  cur_col_expr <- as.character(col_expr)

  if (!grepl(var, cur_col_expr, fixed = TRUE)) {
    return(NULL)
  }

  z2 <- paste0("(!is.na(", var, ") & ", var, " %in%")
  start <- regexpr(z2, cur_col_expr, fixed = TRUE)
  end <- start + attr(start, "match.length") - 1
  z1 <- cbind(start, end)

  ### start of the string
  z1_start <- z1[1, 1]

  ### figure out what is the appropriate end expression has several & in string

  positions <- gregexpr("&", cur_col_expr, fixed = TRUE)
  start <- unlist(positions)
  end <- start + attr(positions[[1]], "match.length") - 1
  z3 <- cbind(start, end)[, 1]

  ### get the first & after the end of z3 ((!is.na(TRT01A) & TRT01A %in%)
  h <- z3 > z1[, 2]
  if (any(h)) {
    z4 <- which.max(h)
    z1_end <- z3[z4] - 3
  } else {
    z1_end <- nchar(cur_col_expr)
  }

  col_expr_substr <- substr(cur_col_expr, z1_start, z1_end)

  return(col_expr_substr)
}

#' Get Denominator Parent Data Frame
#'
#' Retrieves the parent data frame based on denominator.
#'
#' @param .spl_context Current split context.
#' @param denom Denominator type.
#' @param denom_by Denominator grouping variable.
#' @return Parent data frame.
#' @export
h_denom_parentdf <- function(.spl_context, denom, denom_by) {
  if (denom != "n_parentdf") {
    return(NULL)
  }
  if (is.null(denom_by)) {
    stop("denom_by must be specified when using denom = 'n_parentdf'.")
  }
  split <- .spl_context$split
  if (split[1] == "root" && !is.null(denom_by)) {
    denom_by <- c("root", denom_by)
  }
  split <- intersect(denom_by, split)
  parentdf <- .spl_context$full_parent_df[[length(split)]]
  return(parentdf)
}

#' Add New Levels to Data Frame
#'
#' Adds new factor levels to a specified variable in the data frame.
#'
#' @param df Data frame to update.
#' @param .var Variable to which new levels will be added.
#' @param new_levels List of new levels to add.
#' @param addstr2levs String to add to new levels.
#' @param new_levels_after Boolean, indicating if new levels should be added after existing levels.
#' @return Updated data frame.
#' @export
h_df_add_newlevels <- function(df, .var, new_levels, addstr2levs = NULL, new_levels_after) {
  varvec <- df[[.var]]

  levs <- if (is.factor(varvec)) levels(varvec) else sort(unique(varvec))

  if (!is.null(new_levels)) {
    ## assumption: new_levels[[1]] : names of the new levels new_levels[[2]] : values of the new levels
    if (length(new_levels[[1]]) != length(new_levels[[2]])) {
      stop(
        "new_levels must be a list of length 2, second element must be a ",
        "list of same length as first element."
      )
    }
    if (any(duplicated(unlist(new_levels[[2]])))) {
      stop(
        "unlist(new_levels[[2]]) contains duplicates: duplicate assignment ",
        "of a level to a new level is not allowed."
      )
    }

    sortnewlevs <- unlist(lapply(X = new_levels[[2]], FUN = function(x) {
      min(which(levs %in% x)) - 0.1
    }))

    if (new_levels_after) {
      sortnewlevs <- unlist(lapply(X = new_levels[[2]], FUN = function(x) {
        max(which(levs %in% x)) + 0.1
      }))
    }

    newlevs <- c(levs, new_levels[[1]])
    sortx <- c(seq_along(levs), sortnewlevs)

    newlevsx <- newlevs[order(sortx)]

    levs <- newlevsx

    ### add newlevels to df
    for (i in seq_along(new_levels[[1]])) {
      levii <- unlist(new_levels[[2]][i])

      addi <- df[df[[.var]] %in% levii, ]
      addi[[.var]] <- new_levels[[1]][i]

      df <- dplyr::bind_rows(df, addi)
    }
  }

  levlabels <- levs
  if (!is.null(addstr2levs)) {
    levlabels <- paste0(levs, addstr2levs)
  }

  df[[.var]] <- factor(as.character(df[[.var]]), levels = levs, labels = levlabels)

  return(df)
}


#' Get Treatment Variable Reference Path
#'
#' Retrieves the treatment variable reference path from the provided context.
#'
#' @param ref_path Reference path for treatment variable.
#' @param .spl_context Current split context.
#' @param df Data frame.
#' @return List containing treatment variable details.
#' @export
h_get_trtvar_refpath <- function(ref_path, .spl_context, df) {
  checkmate::check_character(ref_path, min.len = 2L, names = "unnamed")
  checkmate::assert_true(length(ref_path) %% 2 == 0) # Even number of elements in ref_path.

  trt_var <- utils::tail(.spl_context$cur_col_split[[length(.spl_context$cur_col_split)]], n = 1)
  trt_var_refspec <- utils::tail(ref_path, n = 2)[1]

  checkmate::assert_true(identical(trt_var, trt_var_refspec))

  # current group and ctrl_grp
  cur_trt_grp <- utils::tail(.spl_context$cur_col_split_val[[length(.spl_context$cur_col_split_val)]], n = 1)
  ctrl_grp <- utils::tail(ref_path, n = 1)

  ### check that ctrl_grp is a level of the treatment variable, in case riskdiff is requested
  if (!ctrl_grp %in% levels(df[[trt_var]])) {
    stop(paste0(
      "control group specification in ref_path argument (",
      ctrl_grp,
      ") is not a level of your treatment group variable (",
      trt_var,
      ")."
    ))
  }
  return(list(trt_var = trt_var, trt_var_refspec = trt_var_refspec, cur_trt_grp = cur_trt_grp, ctrl_grp = ctrl_grp))
}


#' Update Data Frame Row
#'
#' Updates a row in the data frame based on various parameters.
#'
#' @param df_row Data frame row to update.
#' @param .var Variable name to update.
#' @param val Values to keep.
#' @param excl_levels Levels to exclude from the factor.
#' @param drop_levels Boolean, indicating if levels should be dropped.
#' @param new_levels New levels to add.
#' @param new_levels_after Boolean, indicating if new levels should be added after existing levels.
#' @param addstr2levs String to add to new levels.
#' @param label Label string.
#' @param label_map Mapping for labels.
#' @param labelstr Label string to replace.
#' @param label_fstr Format string for labels.
#' @param .spl_context Current split context.
#' @return List containing updated data frames and values.
#' @export
h_upd_dfrow <- function(
    df_row,
    .var,
    val,
    excl_levels,
    drop_levels,
    new_levels,
    new_levels_after,
    addstr2levs,
    label,
    label_map,
    labelstr,
    label_fstr,
    .spl_context) {
  if (!is.null(label) && !is.null(label_map)) {
    stop("a_freq_j: label and label_map cannot be used together.")
  }

  if (!is.null(labelstr) && (!is.null(val) || !is.null(new_levels) || !is.null(excl_levels))) {
    stop("a_freq_j: val/excl_levels/new_levels cannot be used in a summarize_row_group call.")
  }

  if (!is.null(new_levels) && (is.null(.var) || is.na(.var))) {
    stop("When using new_levels, var must be provided in analyze call with a_freq_j.")
  }

  if (!is.null(val) && drop_levels == TRUE) {
    stop("argument val cannot be used together with drop_levels = TRUE.")
  }

  if (!is.null(val) && !is.null(excl_levels)) {
    stop("argument val and excl_levels cannot be used together.")
  }

  ## start making updates to factor information on incoming data

  if (!is.null(new_levels)) {
    df_row <- h_df_add_newlevels(
      df = df_row,
      .var = .var,
      new_levels = new_levels,
      new_levels_after = new_levels_after,
      addstr2levs = addstr2levs
    )
  }

  # if character var turn incoming data into factor, with levels from row-based df
  if (!is.na(.var) && is.character(df_row[[.var]]) && is.null(labelstr)) {
    levels <- sort(unique(df_row[[.var]]))
    df_row[[.var]] <- factor(df_row[[.var]], levels = levels)
    drop_levels <- FALSE
  }

  if (!is.null(labelstr)) {
    single_level <- labelstr

    if (!is.null(label_fstr) && grepl("%s", label_fstr, fixed = TRUE)) {
      single_level <- sprintf(label_fstr, single_level)
    }

    df_row[[.var]] <- as.character(df_row[[.var]])
    df_row[[.var]][!is.na(df_row[[.var]])] <- single_level

    df_row[[.var]] <- factor(as.character(df_row[[.var]]), levels = single_level)
    drop_levels <- FALSE
  }

  if (drop_levels) {
    obs_levs <- unique(df_row[[.var]])
    obs_levs <- intersect(levels(df_row[[.var]]), obs_levs)

    if (!is.null(excl_levels)) obs_levs <- setdiff(obs_levs, excl_levels)

    val <- obs_levs
    excl_levels <- NULL
  }

  if (!is.null(val)) {
    # do not yet restrict to val levels, only update factors to the requested levels df_row <-
    df_row <- h_update_factor(df_row, .var, val)
  }
  if (!is.null(excl_levels)) {
    # do not yet exclude the level specified in excl_levels, only update factors to remove requested levels df_row

    df_row <- h_update_factor(df_row, .var, excl_levels = excl_levels)
    val <- levels(df_row[[.var]])
  }

  ## update data with level coming from label -- important that this is done after restriction to val levels!!!
  if (!is.null(label)) {
    # similar to processing when labelstr
    single_level <- label
    df_row[[.var]] <- as.character(df_row[[.var]])
    df_row[[.var]][!is.na(df_row[[.var]])] <- single_level
    val <- label
    df_row[[.var]] <- factor(df_row[[.var]], levels = single_level)
  }

  # now update labels coming from label_map
  if (!is.null(label_map)) {
    split_info <- .spl_context[c("split", "value")]
    new_labels <- h_get_label_map(levels(df_row[[.var]]), label_map, .var, split_info)

    df_row[[.var]] <- factor(as.character(df_row[[.var]]), levels = levels(df_row[[.var]]), labels = new_labels)

    val <- new_labels
  }

  # now apply the updated factors from df_row to df as well due to drop_levels = TRUE can have different results when
  # applying all of the above to df
  col_expr <- .spl_context$cur_col_expr[[1]]
  df <- subset(df_row, eval(col_expr))

  return(list(df_row = df_row, df = df, val = val))
}


#' Get Label Map
#'
#' Maps labels based on the provided label map and split context.
#'
#' @param .labels Current labels.
#' @param label_map Mapping for labels.
#' @param .var Variable name.
#' @param split_info Current split information.
#' @return Mapped labels.
#' @export
h_get_label_map <- function(.labels, label_map, .var, split_info) {
  if (!is.null(label_map)) {
    if (!all(c("split", "value") %in% names(split_info))) {
      stop("split_info does not contain required elements.")
    }

    ### if label_map has a variable from row split, apply current splits on label_map tibble as well
    rowsplits <- split_info$split

    label_map_split <- intersect(names(label_map), rowsplits)

    if (!(length(label_map_split) == 0)) {
      for (i in seq_along(label_map_split)) {
        cursplvar <- label_map_split[i]
        cid <- match(cursplvar, rowsplits)
        cursplval <- split_info$value[cid]

        label_map <- label_map[label_map[[cursplvar]] == cursplval, ]
      }
    }

    if ("var" %in% names(label_map)) {
      label_map <- label_map[label_map[["var"]] == .var, ]
    }

    .labels <- label_map$label[match(.labels, label_map$value)]

    if (anyNA(.labels)) {
      stop("got a label map that doesn't provide labels for all values.")
    }
  }

  return(.labels)
}


#' A Frequency Data Preparation Function
#'
#' Prepares frequency data for analysis.
#'
#' @param df Data frame to prepare.
#' @param labelstr Label string.
#' @param .var Variable name.
#' @param val Values for analysis.
#' @param drop_levels Boolean, indicating if levels should be dropped.
#' @param excl_levels Levels to exclude.
#' @param new_levels New levels to add.
#' @param new_levels_after Boolean for adding new levels after existing ones.
#' @param addstr2levs String to add to new levels.
#' @param .df_row Current data frame row.
#' @param .spl_context Current split context.
#' @param .N_col Number of columns.
#' @param id Identifier variable.
#' @param denom Denominator types.
#' @param variables Variables to include in the analysis.
#' @param label Label string.
#' @param label_fstr Formatted label string.
#' @param label_map Mapping for labels.
#' @param .alt_df_full Alternative full data frame.
#' @param denom_by Denominator grouping variable.
#' @param .stats Statistics to compute.
#' @return List containing prepared data frames and values.
#' @export
h_a_freq_dataprep <- function(
    df,
    labelstr = NULL,
    .var = NA,
    val = NULL,
    drop_levels = FALSE,
    excl_levels = NULL,
    new_levels = NULL,
    new_levels_after = FALSE,
    addstr2levs = NULL,
    .df_row,
    .spl_context,
    .N_col,
    id = "USUBJID",
    denom = c("N_col", "n_df", "n_altdf", "N_colgroup", "n_rowdf", "n_parentdf"),
    variables,
    label = NULL,
    label_fstr = NULL,
    label_map = NULL,
    .alt_df_full = NULL,
    denom_by = NULL,
    .stats) {
  denom <- match.arg(denom)

  df <- df[!is.na(df[[.var]]), ]
  .df_row <- .df_row[!is.na(.df_row[[.var]]), ]

  # if no stats requested, get all stats
  .stats <- junco_get_stats("a_freq_j", stats_in = .stats, custom_stats_in = NULL)

  ### combine all preprocessing of incoming df/.df_row in one function do this outside stats derivation functions
  ### (s_freq_j/) use all of val/excl_levels/drop_levels//new_levels/ label/label_map/labelstr/label_fstr
  upd_dfrow <- h_upd_dfrow(
    df_row = .df_row,
    .var = .var,
    val = val,
    excl_levels = excl_levels,
    drop_levels = drop_levels,
    new_levels = new_levels,
    new_levels_after = new_levels_after,
    addstr2levs = addstr2levs,
    label = label,
    label_map = label_map,
    labelstr = labelstr,
    label_fstr = label_fstr,
    .spl_context = .spl_context
  )

  .df_row <- upd_dfrow$df_row
  df <- upd_dfrow$df

  val <- upd_dfrow$val

  # from here onwards proceed with drop_levels = FALSE action has already been done in h_upd_dfrow, and proper
  # observed values will be passed to val for s_freq_j
  drop_levels <- FALSE
  excl_levels <- NULL

  ### derive appropriate alt_df based upon .spl_context and .alt_df_full note that only row-based splits are done for
  ### now only for variables from the first split_rows_by
  alt_df <- h_create_altdf(
    .spl_context,
    .df_row,
    .alt_df_full,
    denom_by = denom_by,
    id = id,
    variables = variables,
    denom = denom
  )

  new_denomdf <- alt_df

  parentdf <- h_denom_parentdf(.spl_context, denom, denom_by)
  if (denom == "n_parentdf") {
    new_denomdf <- parentdf
  }

  return(list(
    df = df,
    .df_row = .df_row,
    val = val,
    drop_levels = drop_levels,
    excl_levels = excl_levels,
    alt_df = alt_df,
    parentdf = parentdf,
    new_denomdf = new_denomdf,
    .stats = .stats
  ))
}


#' Frequency Preparation in Rows
#'
#' Prepares frequency data in rows based on provided parameters.
#'
#' @param x_stats Statistics data.
#' @param .stats_adj Adjusted statistics.
#' @param .formats Format settings.
#' @param labelstr Label string.
#' @param label_fstr Formatted label string.
#' @param label Label string.
#' @param .indent_mods Indentation settings.
#' @param .labels_n Labels for statistics.
#' @param na_str String for NA values.
#' @return List containing prepared statistics, formats, labels, and indentation.
#' @export
h_a_freq_prepinrows <- function(
    x_stats,
    .stats_adj,
    .formats,
    labelstr,
    label_fstr,
    label,
    .indent_mods,
    .labels_n,
    na_str) {
  # Fill in formatting defaults

  x_stats <- x_stats[.stats_adj]

  levels_per_stats <- lapply(x_stats, names)

  # Fill in formats/indents/labels with custom input and defaults
  .formats <- junco_get_formats_from_stats(.stats_adj, .formats, levels_per_stats)

  # lbls
  all_stats <- junco_get_stats("a_freq_j", stats_in = NULL, custom_stats_in = NULL)
  count_rr_stats <- grep("(count|rr_ci)", all_stats, value = TRUE)
  n_stats <- setdiff(all_stats, count_rr_stats)
  n_stats <- .stats_adj[.stats_adj %in% n_stats]

  if (length(.stats_adj) == 1 && length(n_stats) == 1) {
    if (!is.null(labelstr) && (!is.null(label))) {
      lbls <- label
    } else if (!is.null(labelstr) && !is.null(.labels_n)) {
      lbls <- .labels_n
    } else if (!is.null(labelstr) && is.null(label_fstr)) {
      lbls <- labelstr
    } else if (!is.null(labelstr) && !is.null(label_fstr) && grepl("%s", label_fstr, fixed = TRUE)) {
      lbls <- sprintf(label_fstr, labelstr)
    } else if (!is.null(label)) {
      lbls <- label
    } else {
      .labels <- .labels_n
      lbls <- junco_get_labels_from_stats(.stats_adj, .labels, levels_per_stats)
    }
  } else {
    if (length(n_stats) > 1 && is.null(.labels_n)) {
      msg <- paste0(
        "recommend to specify non-null .labels_n argument when multiple n_stats selected (",
        paste0(n_stats, collapse = ", "),
        ")"
      )
      message(msg)
    }
    ### labels from the non n-stat statistics are present on levels_per_stats labels for the n-stat statistics come
    ### from .labels_n (user) or from defaults
    .labels <- .labels_n
    lbls <- junco_get_labels_from_stats(.stats_adj, .labels, levels_per_stats)
  }
  .labels <- lbls
  .labels <- .unlist_keep_nulls(.labels)

  # indents
  .indent_mods_orig <- .indent_mods
  .indent_mods <- junco_get_indents_from_stats(.stats_adj, .indent_mods, levels_per_stats)

  # adjust indents when n_stat is included and no indents where passed by user
  if (length(n_stats) >= 1 && is.null(.indent_mods_orig)) {
    .indent_mods_new <- sapply(
      names(.indent_mods),
      FUN = function(x) {
        ret <- 0
        if (!(x %in% n_stats)) ret <- 1
        return(ret)
      },
      simplify = FALSE,
      USE.NAMES = TRUE
    )
    .indent_mods <- .indent_mods_new
  }

  .indent_mods <- .unlist_keep_nulls(.indent_mods)

  # .format_na_strs processing if na_str = c(NA, NA, NA) this will ensure the ci (NA, NA, NA) will be represented as
  # NE (NE, NE) the value NE is defined as the default to replace NA in our jjcs format

  if (!is.null(na_str)) {
    # Create a list of na_str values for each format in .formats
    .format_na_strs <- lapply(names(.formats), FUN = function(x) {
      na_str
    })
    names(.format_na_strs) <- names(.formats)
  } else {
    .format_na_strs <- NULL
  }

  # Unlist stats + names
  x_stats <- .unlist_keep_nulls(x_stats)
  names(x_stats) <- names(.formats)

  return(list(
    x_stats = x_stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods,
    .format_na_strs = .format_na_strs
  ))
}

#' Subset Combination
#'
#' Subsets a data frame based on specified combination criteria.
#'
#' @param df Data frame to subset.
#' @param combosdf Data frame containing combinations.
#' @param do_not_filter Variables to not filter.
#' @param filter_var Variable used for filtering.
#' @param flag_var Flag variable for filtering.
#' @param colid Column ID for identification.
#' @return Subsetted data frame.
#' @export
h_subset_combo <- function(df, combosdf, do_not_filter, filter_var, flag_var, colid) {
  ### this is the core code for subsetting to appropriate combo level
  if (!is.null(flag_var)) {
    df <- df[df[[flag_var]] == "Y", ]
  }

  # get the string related to combosdf text from colid it is the last part of the column id after the .  eg 'Active
  # Study Agent.Apalutamide.Thru 3 months' colid_str is 'Thru 3 months' colid_str <- stringr::str_split_i(colid,
  # '\\.', i = -1)
  colid_str <- tail(unlist(strsplit(colid, "\\.")), 1)

  filter_val <- combosdf[combosdf$valname == colid_str, ]$label

  if (!(colid_str %in% do_not_filter)) {
    df <- df |>
      dplyr::filter(get(filter_var) == filter_val)
  }

  return(df)
}

#' Patient years exposure
#' @description Statistical/Analysis Function for presenting Patient years exposure summary data
#'
#' @name a_patyrs_j
#' @order 1
NULL

#' @describeIn a_patyrs_j Statistical Function for Patient years exposure summary data
#' @param df (`data.frame`)\cr data set containing all analysis variables.
#' @param .var (`string`)\cr variable name containing the patient years data.
#' @param id (`string`)\cr subject variable name.
#' @param .alt_df_full (`dataframe`)\cr alternative dataset for calculations.
#' @param source (`string`)\cr source of data, either "alt_df" or "df".
#' @param inriskdiffcol (`logical`)\cr flag indicating if the function is called within a risk difference column.
#'
#' @return
#' * `s_patyrs_j()` return x a list containing the patient years statistics.
#' The list of available statistics for can be viewed by running `junco_get_stats("a_patyrs_j")`,
#' currently this is just a single statistic `patyrs`, patient years of exposure.
#'
#' @keywords internal
s_patyrs_j <- function(
    df,
    .var,
    id = "USUBJID",
    .alt_df_full,
    source = c("alt_df", "df"),
    inriskdiffcol = FALSE) {
  source <- match.arg(source)

  if (source == "alt_df") {
    if (is.null(.alt_df_full)) {
      stop(paste(
        "s_patyrs_j with source = alt_df requires a non-null .alt_df_full"
      ))
    }
    df <- .alt_df_full
  }
  df <- unique(df[, c(id, .var)])

  x <- list()

  if (!inriskdiffcol) {
    patyrs <- sum(df[[.var]])
  } else {
    patyrs <- list(x = NULL)
  }
  x[["patyrs"]] <- stats::setNames(patyrs, nm = "patyrs")

  return(x)
}

#' @describeIn a_patyrs_j Formatted analysis function for patient years summary which is used
#' as `afun` in `analyze` or `cfun` in `summarize_row_groups`.
#'
#'
#' @param df (`data.frame`)\cr data set containing all analysis variables.
#' @param .var (`string`)\cr variable name containing the patient years data.
#' @param .df_row (`data.frame`)\cr data frame across all of the columns for the given row split.
#' @param id (`string`)\cr subject variable name.
#' @param .alt_df_full (`dataframe`)\cr alternative dataset for calculations.
#' @param .formats (named 'character' or 'list')\cr formats for the statistics.
#' @param .labels (named 'character')\cr labels for the statistics.
#' @param source (`string`)\cr source of data, either "alt_df" or "df".
#' @param .spl_context (`data.frame`)\cr gives information about ancestor split states.
#' @param .stats (`character`)\cr statistics to select for the table.
#'
#' @return
#' * `a_patyrs_j` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @export
#'
#' @examples
#' library(tern)
#' library(dplyr)
#' trtvar <- "ARM"
#' ctrl_grp <- "B: Placebo"
#' cutoffd <- as.Date("2023-09-24")
#'
#'
#' adexsum <- ex_adsl %>%
#'   create_colspan_var(
#'     non_active_grp          = ctrl_grp,
#'     non_active_grp_span_lbl = " ",
#'     active_grp_span_lbl     = "Active Study Agent",
#'     colspan_var             = "colspan_trt",
#'     trt_var                 = trtvar
#'   ) %>%
#'   mutate(
#'     rrisk_header = "Risk Difference (95% CI)",
#'     rrisk_label = paste(!!rlang::sym(trtvar), "vs", ctrl_grp),
#'     TRTDURY = case_when(
#'       !is.na(EOSDY) ~ EOSDY,
#'       TRUE ~ as.integer(cutoffd - as.Date(TRTSDTM) + 1)
#'     )
#'   ) %>%
#'   select(USUBJID, !!rlang::sym(trtvar), colspan_trt, rrisk_header, rrisk_label, TRTDURY)
#'
#' adae <- ex_adae %>%
#'   group_by(USUBJID, AEDECOD) %>%
#'   select(USUBJID, AEDECOD, ASTDY) %>%
#'   mutate(rwnum = row_number()) %>%
#'   mutate(AOCCPFL = case_when(
#'     rwnum == 1 ~ "Y",
#'     TRUE ~ NA
#'   )) %>%
#'   filter(AOCCPFL == "Y")
#'
#' aefup <- left_join(adae, adexsum, by = "USUBJID")
#'
#' colspan_trt_map <- create_colspan_map(adexsum,
#'   non_active_grp = ctrl_grp,
#'   non_active_grp_span_lbl = " ",
#'   active_grp_span_lbl = "Active Study Agent",
#'   colspan_var = "colspan_trt",
#'   trt_var = trtvar
#' )
#'
#' ref_path <- c("colspan_trt", " ", trtvar, ctrl_grp)
#'
#' lyt <- basic_table(show_colcounts = TRUE, colcount_format = "N=xx", top_level_section_div = " ") %>%
#'   split_cols_by("colspan_trt", split_fun = trim_levels_to_map(map = colspan_trt_map)) %>%
#'   split_cols_by(trtvar) %>%
#'   split_cols_by("rrisk_header", nested = FALSE) %>%
#'   split_cols_by(trtvar, labels_var = "rrisk_label", split_fun = remove_split_levels(ctrl_grp)) %>%
#'   analyze("TRTDURY",
#'     nested = FALSE,
#'     show_labels = "hidden",
#'     afun = a_patyrs_j
#'   )
#' result <- build_table(lyt, aefup, alt_counts_df = adexsum)
#' result
#'
a_patyrs_j <- function(
    df,
    .var,
    .df_row,
    id = "USUBJID",
    .alt_df_full = NULL,
    .formats = NULL,
    .labels = NULL,
    source = c("alt_df", "df"),
    .spl_context,
    .stats = "patyrs") {
  source <- match.arg(source)

  if (length(.stats) > 1 || (length(.stats) == 1 && .stats != "patyrs")) {
    stop("a_patyrs_j: .stats must be 'patyrs'.")
  }

  check_alt_df_full(source, "alt_df", .alt_df_full)

  col_expr <- .spl_context$cur_col_expr[[1]]
  ## colid can be used to figure out if we're in the relative risk columns or not
  colid <- .spl_context$cur_col_id[[1]]
  inriskdiffcol <- grepl("difference", tolower(colid), fixed = TRUE)

  if (source == "alt_df") {
    ### derive appropriate alt_df based upon .spl_context and .alt_df_full
    ### note that this is not yet within the current column

    alt_df <- h_create_altdf(
      .spl_context,
      .df_row,
      .alt_df_full,
      denom_by = NULL,
      id = id,
      variables = NULL,
      denom = "n_altdf"
    )
    ## restrict to current column
    new_denomdf <- subset(alt_df, eval(col_expr))
  } else {
    new_denomdf <- df
  }

  x_stats <- s_patyrs_j(
    df,
    .alt_df_full = new_denomdf,
    .var = .var,
    id = id,
    source = source,
    inriskdiffcol = inriskdiffcol
  )

  x_stats <- x_stats[.stats]

  levels_per_stats <- lapply(x_stats, names)

  .formats <- junco_get_formats_from_stats(.stats, .formats, levels_per_stats)
  .labels <- junco_get_labels_from_stats(.stats, .labels, levels_per_stats)
  .labels <- .unlist_keep_nulls(.labels)

  x_stats <- x_stats[.stats]

  # Unlist stats + names
  x_stats <- .unlist_keep_nulls(x_stats)
  names(x_stats) <- names(.formats)

  ### final step: turn requested stats into rtables rows
  inrows <- in_rows(
    .list = x_stats,
    .formats = .formats,
    .labels = .labels
  )

  return(inrows)
}

#' Exposure-Adjusted Incidence Rate
#' @description
#' Statistical/Analysis Function for presenting Exposure-Adjusted Incidence Rate summary data
#'
#'
#' @name a_eair100_j
NULL


#' @describeIn a_eair100_j
#' calculates exposure-adjusted incidence rates (EAIR) per 100 person-years for a
#' specific level of a variable.
#'
#'
#' @param levii (`string`)\cr the specific level of the variable to calculate EAIR for.
#' @param df (`data.frame`)\cr data set containing all analysis variables.
#' @param .df_row (`data.frame`)\cr data frame across all of the columns for the given row split.
#' @param .var (`string`)\cr variable name that is passed by `rtables`.
#' @param .alt_df_full (`dataframe`)\cr alternative dataset for calculations.
#' @param id (`string`)\cr subject variable name.
#' @param diff (`logical`)\cr if TRUE, risk difference calculations will be performed.
#' @param conf_level (`proportion`)\cr confidence level of the interval.
#' @param trt_var (`string`)\cr treatment variable name.
#' @param ctrl_grp (`string`)\cr control group value.
#' @param cur_trt_grp (`string`)\cr current treatment group value.
#' @param inriskdiffcol (`logical`)\cr flag indicating if the function is called within a risk difference column.
#' @param fup_var (`string`)\cr follow-up variable name.
#' @param occ_var (`string`)\cr occurrence variable name.
#' @param occ_dy (`string`)\cr occurrence day variable name.
#'
#' @return
#'  * `s_eair100_levii_j()` returns a list containing the following statistics:
#' \itemize{
#'   \item n_event: Number of events
#'   \item person_years: Total person-years of follow-up
#'   \item eair: Exposure-adjusted incidence rate per 100 person-years
#'   \item eair_diff: Risk difference in EAIR (if diff=TRUE and inriskdiffcol=TRUE)
#'   \item eair_diff_ci: Confidence interval for the risk difference (if diff=TRUE and inriskdiffcol=TRUE)
#' }.\cr
#' The list of available statistics (core columns) can also be viewed by running `junco_get_stats("a_eair100_j")`
#'
#' @keywords internal
s_eair100_levii_j <- function(
    levii,
    df,
    .df_row,
    .var,
    .alt_df_full = NULL,
    id = "USUBJID",
    diff = FALSE,
    # treatment/ref group related arguments
    conf_level = 0.95,
    trt_var = NULL,
    ctrl_grp = NULL,
    cur_trt_grp = NULL,
    inriskdiffcol = FALSE,
    fup_var,
    occ_var,
    occ_dy) {
  if (diff && inriskdiffcol) {
    .alt_df_full_cur_group <- get_ctrl_subset(
      .alt_df_full,
      trt_var = trt_var,
      ctrl_grp = cur_trt_grp
    )
  } else {
    ### within a_eair100_j we need to ensure proper dataframe will be passed to .alt_df_full
    .alt_df_full_cur_group <- .alt_df_full
  }
  cur_dfs <- h_get_eair_df(
    levii,
    df,
    denom_df = .alt_df_full_cur_group,
    .var = .var,
    id = id,
    fup_var = fup_var,
    occ_var = occ_var,
    occ_dy = occ_dy
  )
  cur_df_num <- cur_dfs$df_num
  cur_df_denom <- cur_dfs$df_denom

  ### statistics derivation
  cur_AECOUNT <- length(unique(cur_df_num[[id]]))
  cur_YRSFUP <- sum(cur_df_denom[["mod_fup_var"]])
  cur_eair <- 100 * cur_AECOUNT / cur_YRSFUP

  x <- list()
  x$n_event <- c("n_event" = cur_AECOUNT)
  x$person_years <- c("person_years" = cur_YRSFUP)
  x$eair <- c("eair" = cur_eair)

  if (diff && inriskdiffcol) {
    x$n_event <- c("n_event" = NULL)
    x$person_years <- c("person_years" = NULL)
    x$eair <- c("eair" = NULL)

    alt_df_full_ref_group <- get_ctrl_subset(
      .alt_df_full,
      trt_var = trt_var,
      ctrl_grp = ctrl_grp
    )

    ref_group <- get_ctrl_subset(
      .df_row,
      trt_var = trt_var,
      ctrl_grp = ctrl_grp
    )

    ref_dfs <- h_get_eair_df(
      levii,
      df = ref_group,
      denom_df = alt_df_full_ref_group,
      .var = .var,
      id = id,
      fup_var = fup_var,
      occ_var = occ_var,
      occ_dy = occ_dy
    )

    ref_df_num <- ref_dfs$df_num
    ref_df_denom <- ref_dfs$df_denom

    ### statistics derivation
    ref_AECOUNT <- length(unique(ref_df_num[[id]]))
    ref_YRSFUP <- sum(ref_df_denom[["mod_fup_var"]])
    ref_eair <- 100 * ref_AECOUNT / ref_YRSFUP

    rdiff <- cur_eair - ref_eair

    sd <- sqrt(cur_AECOUNT / cur_YRSFUP^2 + ref_AECOUNT / ref_YRSFUP^2) * 100

    coeff <- stats::qnorm((1 + conf_level) / 2)
    lcl <- rdiff - (coeff * sd)
    ucl <- rdiff + (coeff * sd)

    eair_diff <- c(rdiff, lcl, ucl)

    x$eair_diff <- stats::setNames(
      c("eair_diff" = eair_diff),
      nm = c("estimate", "lcl", "ucl")
    )
  } else {
    x$eair_diff <- c("eair_diff" = NULL)
  }

  return(x)
}

#' @describeIn a_eair100_j
#' Formatted analysis function for exposure adjusted incidence rate summary which is
#' used as `afun` in `analyze` or `cfun` in `summarize_row_groups`.
#'
#'
#' @param df (`data.frame`)\cr data set containing all analysis variables.
#' @param labelstr (`string`)\cr label string for the row.
#' @param .var (`string`)\cr variable name for analysis.
#' @param .df_row (`data.frame`)\cr data frame across all of the columns for the given row split.
#' @param .spl_context (`data.frame`)\cr gives information about ancestor split states.
#' @param .alt_df_full (`dataframe`)\cr denominator dataset for calculations.
#' @param id (`string`)\cr subject variable name.
#' @param drop_levels (`logical`)\cr if TRUE, non-observed levels will not be included.
#' @param riskdiff (`logical`)\cr if TRUE, risk difference calculations will be performed.
#' @param ref_path (`string`)\cr column path specifications for the control group.
#' @param .stats (`character`)\cr statistics to select for the table.
#' @param .formats (named 'character' or 'list')\cr formats for the statistics.
#' @param .labels (named 'character')\cr labels for the statistics.
#' @param .indent_mods (named `integer`)\cr indent modifiers for the labels.
#' @param na_str (`string`)\cr string used to replace all NA or empty values in the output.
#' @param conf_level (`proportion`)\cr confidence level of the interval.
#' @param fup_var (`string`)\cr variable name for follow-up time.
#' @param occ_var (`string`)\cr variable name for occurrence.
#' @param occ_dy (`string`)\cr variable name for occurrence day.
#'
#' @return
#'  * `a_eair100_j` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @export
#'
#' @examples
#' library(tern)
#' library(dplyr)
#' trtvar <- "ARM"
#' ctrl_grp <- "B: Placebo"
#' cutoffd <- as.Date("2023-09-24")
#'
#'
#' adexsum <- ex_adsl %>%
#'   create_colspan_var(
#'     non_active_grp          = ctrl_grp,
#'     non_active_grp_span_lbl = " ",
#'     active_grp_span_lbl     = "Active Study Agent",
#'     colspan_var             = "colspan_trt",
#'     trt_var                 = trtvar
#'   ) %>%
#'   mutate(
#'     rrisk_header = "Risk Difference (95% CI)",
#'     rrisk_label = paste(!!rlang::sym(trtvar), "vs", ctrl_grp),
#'     TRTDURY = case_when(
#'       !is.na(EOSDY) ~ EOSDY,
#'       TRUE ~ as.integer(cutoffd - as.Date(TRTSDTM) + 1)
#'     )
#'   ) %>%
#'   select(USUBJID, !!rlang::sym(trtvar), colspan_trt, rrisk_header, rrisk_label, TRTDURY)
#'
#' adexsum$TRTDURY <- as.numeric(adexsum$TRTDURY)
#'
#' adae <- ex_adae %>%
#'   group_by(USUBJID, AEDECOD) %>%
#'   select(USUBJID, AEDECOD, ASTDY) %>%
#'   mutate(rwnum = row_number()) %>%
#'   mutate(AOCCPFL = case_when(
#'     rwnum == 1 ~ "Y",
#'     TRUE ~ NA
#'   )) %>%
#'   filter(AOCCPFL == "Y")
#'
#' aefup <- left_join(adae, adexsum, by = "USUBJID")
#'
#' colspan_trt_map <- create_colspan_map(adexsum,
#'   non_active_grp = ctrl_grp,
#'   non_active_grp_span_lbl = " ",
#'   active_grp_span_lbl = "Active Study Agent",
#'   colspan_var = "colspan_trt",
#'   trt_var = trtvar
#' )
#'
#' ref_path <- c("colspan_trt", " ", trtvar, ctrl_grp)
#'
#'
#' lyt <- basic_table(show_colcounts = TRUE, colcount_format = "N=xx", top_level_section_div = " ") %>%
#'   split_cols_by("colspan_trt", split_fun = trim_levels_to_map(map = colspan_trt_map)) %>%
#'   split_cols_by(trtvar) %>%
#'   split_cols_by("rrisk_header", nested = FALSE) %>%
#'   split_cols_by(trtvar, labels_var = "rrisk_label", split_fun = remove_split_levels(ctrl_grp)) %>%
#'   analyze("TRTDURY",
#'     nested = FALSE,
#'     show_labels = "hidden",
#'     afun = a_patyrs_j
#'   ) %>%
#'   analyze(
#'     vars = "AEDECOD",
#'     nested = FALSE,
#'     afun = a_eair100_j,
#'     extra_args = list(
#'       fup_var = "TRTDURY",
#'       occ_var = "AOCCPFL",
#'       occ_dy = "ASTDY",
#'       ref_path = ref_path,
#'       drop_levels = TRUE
#'     )
#'   )
#'
#' result <- build_table(lyt, aefup, alt_counts_df = adexsum)
#' head(result, 5)
#'
a_eair100_j <- function(
    df,
    labelstr = NULL,
    .var,
    .df_row,
    .spl_context,
    .alt_df_full = NULL,
    id = "USUBJID",
    drop_levels = FALSE,
    riskdiff = TRUE,
    ref_path = NULL,
    .stats = c("eair"),
    .formats = NULL,
    .labels = NULL,
    .indent_mods = NULL,
    na_str = rep("NA", 3),
    # treatment/ref group related arguments
    conf_level = 0.95,
    fup_var,
    occ_var,
    occ_dy) {
  ## prepare for column based split
  col_expr <- .spl_context$cur_col_expr[[1]]
  ## colid can be used to figure out if we're in the relative risk columns or not
  colid <- .spl_context$cur_col_id[[1]]
  inriskdiffcol <- grepl("difference", tolower(colid), fixed = TRUE)

  # if no stats requested, get all stats
  .stats <- junco_get_stats(
    "a_eair100_j",
    stats_in = .stats,
    custom_stats_in = NULL
  )

  ### combine all preprocessing of incoming df/.df_row in one function
  ### do this outside stats derivation functions (s_freq_j/)
  ### use all of val/excl_levels/drop_levels//new_levels/label/label_map/labelstr/label_fstr
  upd_dfrow <- h_upd_dfrow(
    .df_row,
    .var,
    val = NULL,
    excl_levels = NULL,
    drop_levels = drop_levels,
    new_levels = NULL,
    new_levels_after = FALSE,
    label = NULL,
    label_map = NULL,
    labelstr = labelstr,
    label_fstr = NULL,
    .spl_context = .spl_context
  )

  .df_row <- upd_dfrow$df_row
  df <- upd_dfrow$df

  if (is.null(.alt_df_full)) {
    stop(paste("a_eair100_j: .alt_df_full cannot be NULL."))
  }

  ### derive appropriate alt_df based upon .spl_context and .alt_df_full
  ### note that only row-based splits are done
  ### for now only for variables from the first split_rows_by
  alt_df <- h_create_altdf(
    .spl_context,
    .df_row,
    .alt_df_full,
    denom_by = NULL,
    id = id,
    variables = NULL,
    denom = "n_altdf"
  )
  new_denomdf <- alt_df

  fn_Args <- list(
    df = df,
    .df_row = .df_row,
    .var = .var,
    id = id,
    diff = riskdiff,
    inriskdiffcol = inriskdiffcol,
    fup_var = fup_var,
    occ_var = occ_var,
    occ_dy = occ_dy
  )

  if (riskdiff && inriskdiffcol) {
    trt_var_refpath <- h_get_trtvar_refpath(
      ref_path,
      .spl_context,
      df
    )
    # trt_var_refpath is list with elements
    # trt_var trt_var_refspec cur_trt_grp ctrl_grp
    # make these elements available in current environment
    trt_var <- trt_var_refpath$trt_var
    trt_var_refspec <- trt_var_refpath$trt_var_refspec
    cur_trt_grp <- trt_var_refpath$cur_trt_grp
    ctrl_grp <- trt_var_refpath$ctrl_grp

    fn_args_x <- list(
      .alt_df_full = .alt_df_full,
      # treatment/ref group related arguments
      conf_level = conf_level,
      trt_var = trt_var,
      ctrl_grp = ctrl_grp,
      cur_trt_grp = cur_trt_grp
    )
  } else {
    new_denomdf <- subset(new_denomdf, eval(col_expr))
    fn_args_x <- list(.alt_df_full = new_denomdf)
  }

  fn_Args <- append(fn_Args, fn_args_x)

  levs <- levels(.df_row[[.var]])
  y <- mapply(
    s_eair100_levii_j,
    levii = levs,
    MoreArgs = fn_Args,
    SIMPLIFY = FALSE
  )

  ### rearrange list y to  list to x_stats
  #### this is to ensure the remainder of the code can stay the same as in a_freq_j
  stnms <- c("eair", "eair_diff", "n_event", "person_years")
  x_stats <- extract_x_stats(y, stnms)

  if (!inriskdiffcol) {
    .stats_adj <- .stats
  } else {
    .stats_adj <- replace(.stats, .stats %in% c("eair"), "eair_diff")
  }

  .stats <- .stats_adj

  # Fill in formatting defaults

  if (length(levs) > 1 && length(.stats) > 1) {
    message(
      "a_eair100_j : with multiple stats and multiple levels of analysis
      variable it is recommended to apply an extra split_rows_by on the analysis variable"
    )
  }

  x_stats <- x_stats[.stats]

  levels_per_stats <- lapply(x_stats, names)

  .formats <- junco_get_formats_from_stats(.stats, .formats, levels_per_stats)
  .labels <- junco_get_labels_from_stats(.stats, .labels, levels_per_stats)
  .labels <- .unlist_keep_nulls(.labels)

  .indent_mods <- junco_get_indents_from_stats(
    .stats,
    .indent_mods,
    levels_per_stats
  )
  .indent_mods <- .unlist_keep_nulls(.indent_mods)

  # .format_na_strs processing
  # if na_str = c(NA, NA, NA)
  # this will ensure the ci (NA, NA, NA) will be represented as NE (NE, NE)
  # the value NE is defined as the default to replace NA in our jjcs format

  if (!is.null(na_str)) {
    .format_na_strs <- lapply(names(.formats), FUN = function(x) {
      na_str
    })
  } else {
    .format_na_strs <- NULL
  }

  # Unlist stats + names
  x_stats <- .unlist_keep_nulls(x_stats)
  names(x_stats) <- names(.formats)

  ### final step: turn requested stats into rtables rows
  inrows <- in_rows(
    .list = x_stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods,
    .format_na_strs = .format_na_strs
  )

  return(inrows)
}

#' Helpers for Processing Least Square Means
#'
#' @param fit result of model fitting function, e.g. [mmrm::mmrm()] or [stats::lm()].
#' @inheritParams fit_mmrm_j
#' @param averages (`list`)\cr optional named list of visit levels which should be averaged
#'   and reported along side the single visits.
#' @param weights (`string`)\cr argument from [emmeans::emmeans()], 'counterfactual' by default.
#' @param specs (`list`)\cr list of least square means specifications, with
#'   elements `coefs` (coefficient list) and `grid` (corresponding `data.frame`).
#' @param emmeans_res (`list`)\cr initial `emmeans` result from [h_get_emmeans_res()].
#'
#' @name lsmeans_helpers
#' @keywords internal
NULL

#' @describeIn lsmeans_helpers returns a list with
#'   `object` (`emmGrid` object containing `emmeans` results) and `grid`
#'   (`data.frame` containing the potential arm and the visit variables
#'   together with the sample size `n` for each combination).
h_get_emmeans_res <- function(fit, vars, weights) {
  data_complete <- stats::model.frame(fit)
  checkmate::assert_data_frame(data_complete)
  checkmate::assert_list(vars)

  emmeans_object <- emmeans::emmeans(fit, data = data_complete, specs = c(vars$visit, vars$arm), weights = weights)

  # Save grid with renamed number of subjects column.
  visit_arm_grid <- emmeans_object@grid
  wgt_index <- match(".wgt.", names(visit_arm_grid))
  names(visit_arm_grid)[wgt_index] <- "n"
  visit_arm_grid$n <- as.integer(visit_arm_grid$n)

  list(object = emmeans_object, grid = visit_arm_grid)
}

#' @describeIn lsmeans_helpers constructs average of visits specifications.
h_get_average_visit_specs <- function(emmeans_res, vars, averages, fit) {
  visit_grid <- emmeans_res$grid[[vars$visit]]
  model_frame <- stats::model.frame(fit)
  averages_list <- list()
  visit_vec <- n_vec <- c()
  if (!is.null(vars$arm)) {
    arm_grid <- emmeans_res$grid[[vars$arm]]
    arm_vec <- c()
  }
  for (i in seq_along(averages)) {
    average_label <- names(averages)[i]
    visits_average <- averages[[i]]
    checkmate::assert_subset(visits_average, choices = levels(visit_grid))
    which_visits_in_average <- visit_grid %in% visits_average
    average_coefs <- as.integer(which_visits_in_average) / length(visits_average)
    zero_coefs <- numeric(length = length(average_coefs))

    if (is.null(vars$arm)) {
      averages_list[[average_label]] <- average_coefs
      visit_vec <- c(visit_vec, average_label)
      is_in_subset <- (model_frame[[vars$visit]] %in% visits_average)
      this_n <- length(unique(model_frame[is_in_subset, vars$id]))
      n_vec <- c(n_vec, this_n)
    } else {
      for (this_arm in levels(arm_grid)) {
        this_coefs <- zero_coefs
        arm_average_label <- paste(this_arm, average_label, sep = ".")
        which_arm <- arm_grid == this_arm
        this_coefs[which_arm] <- average_coefs[which_arm]
        averages_list[[arm_average_label]] <- this_coefs
        arm_vec <- c(arm_vec, this_arm)
        visit_vec <- c(visit_vec, average_label)
        is_in_subset <- (model_frame[[vars$arm]] == this_arm) & (model_frame[[vars$visit]] %in% visits_average)
        this_n <- length(unique(model_frame[is_in_subset, vars$id]))
        n_vec <- c(n_vec, this_n)
      }
    }
  }
  if (is.null(vars$arm)) {
    averages_grid <- data.frame(visit = visit_vec, n = n_vec)
    names(averages_grid) <- c(vars$visit, "n")
  } else {
    averages_grid <- data.frame(arm = arm_vec, visit = visit_vec, n = n_vec)
    names(averages_grid) <- c(vars$arm, vars$visit, "n")
  }
  list(coefs = averages_list, grid = averages_grid)
}

#' @describeIn lsmeans_helpers estimates least square means as a `data.frame`
#'   given specifications.
#'
#' @note The difference here compared to the original tern.mmrm::h_get_spec_visit_estimates()
#'   function is that additional arguments for [emmeans::contrast()] can be passed via the
#'   Once this has been added to the `tern.mmrm` package then its functions can be used instead.
#'
#' @param tests (`flag`)\cr whether to add test results to the estimates.
#' @param ... additional arguments for [emmeans::contrast()].
h_get_spec_visit_estimates <- function(emmeans_res, specs, conf_level, tests = FALSE, ...) {
  checkmate::assert_list(emmeans_res)
  checkmate::assert_list(specs)
  checkmate::assert_number(conf_level)
  checkmate::assert_flag(tests)

  conts <- emmeans::contrast(emmeans_res$object, specs$coefs, ...)
  cis <- stats::confint(conts, level = conf_level)
  res <- cbind(
    specs$grid,
    data.frame(estimate = cis$estimate, se = cis$SE, df = cis$df, lower_cl = cis$lower.CL, upper_cl = cis$upper.CL)
  )
  if (tests) {
    conts_df <- as.data.frame(conts)
    res$t_stat <- conts_df$t.ratio
    res$p_value <- conts_df$p.value
    res$p_value_less <- stats::pt(conts_df$t.ratio, df = conts_df$df, lower.tail = TRUE)
    res$p_value_greater <- stats::pt(conts_df$t.ratio, df = conts_df$df, lower.tail = FALSE)
  }
  res
}

#' @describeIn lsmeans_helpers estimates least square means for single visits.
h_get_single_visit_estimates <- function(emmeans_res, conf_level) {
  checkmate::assert_list(emmeans_res)
  checkmate::assert_number(conf_level)

  cis <- stats::confint(emmeans_res$object, level = conf_level)
  cbind(
    emmeans_res$grid[, setdiff(names(emmeans_res$grid), "n"), drop = FALSE],
    data.frame(estimate = cis$emmean, se = cis$SE, df = cis$df, lower_cl = cis$lower.CL, upper_cl = cis$upper.CL),
    emmeans_res$grid[, "n", drop = FALSE]
  )
}

#' @describeIn lsmeans_helpers constructs `data.frame` with
#'   relative reduction vs. reference arm based on single visit estimates.
#' @param estimates (`data.frame`)\cr single visit least square mean estimates.
h_get_relative_reduc_df <- function(estimates, vars) {
  checkmate::assert_data_frame(estimates)
  checkmate::assert_list(vars)

  ref_arm_level <- estimates[[vars$arm]][1L]
  ref_estimates <- estimates[estimates[[vars$arm]] == ref_arm_level, c(vars$visit, "estimate")]
  names(ref_estimates)[2L] <- "ref"
  result <- merge(estimates[estimates[[vars$arm]] != ref_arm_level, ], ref_estimates, by = vars$visit, sort = FALSE)
  result$relative_reduc <- (result$ref - result$estimate) / result$ref
  result[, c(vars$visit, vars$arm, "relative_reduc")]
}

#' @describeIn lsmeans_helpers constructs single visit contrast specifications.
h_single_visit_contrast_specs <- function(emmeans_res, vars) {
  checkmate::assert_list(emmeans_res)
  checkmate::assert_list(vars)

  emmeans_res$grid$index <- seq_len(nrow(emmeans_res$grid))

  grid_by_visit <- split(emmeans_res$grid, emmeans_res$grid[[vars$visit]])

  arm_levels <- emmeans_res$object@levels[[vars$arm]]
  ref_arm_level <- arm_levels[1L]
  zeros_coefs <- numeric(nrow(emmeans_res$grid))
  overall_list <- list()
  arm_vec <- visit_vec <- c()
  for (j in seq_along(grid_by_visit)) {
    this_grid <- grid_by_visit[[j]]
    ref_index <- which(this_grid[[vars$arm]] == ref_arm_level)
    this_visit <- names(grid_by_visit)[j]
    this_ref_coefs <- zeros_coefs
    this_ref_coefs[this_grid$index[ref_index]] <- -1
    this_list <- list()
    for (i in seq_len(nrow(this_grid))[-ref_index]) {
      this_coefs <- this_ref_coefs
      this_coefs[this_grid$index[i]] <- 1
      this_arm <- as.character(this_grid[[vars$arm]][i])
      arm_vec <- c(arm_vec, this_arm)
      visit_vec <- c(visit_vec, this_visit)
      this_label <- paste(this_arm, this_visit, sep = ".")
      this_list[[this_label]] <- this_coefs
    }
    overall_list <- c(overall_list, this_list)
  }

  grid <- data.frame(arm = arm_vec, visit = visit_vec)
  names(grid) <- c(vars$arm, vars$visit)
  list(coefs = overall_list, grid = grid)
}

#' @describeIn lsmeans_helpers constructs average visits contrast specifications,
#'   given the `specs` for single visit contrasts and the averages required.
h_average_visit_contrast_specs <- function(specs, averages) {
  arm_visit_grid <- specs$grid
  arm_visit_grid$index <- seq_len(nrow(arm_visit_grid))
  grid_by_arm <- split(arm_visit_grid, arm_visit_grid[, 1L])
  overall_list <- list()
  arm_vec <- visit_vec <- c()
  for (j in seq_along(grid_by_arm)) {
    this_arm <- names(grid_by_arm)[j]
    this_grid <- grid_by_arm[[j]]
    for (i in seq_along(averages)) {
      average_label <- names(averages)[i]
      visits_average <- averages[[i]]
      which_visits_in_average <- this_grid[, 2L] %in% visits_average
      averaged_indices <- this_grid$index[which_visits_in_average]
      this_comb <- paste(this_arm, average_label, sep = ".")
      averaged_coefs <- colMeans(do.call(rbind, specs$coefs[averaged_indices]))
      overall_list[[this_comb]] <- averaged_coefs
      arm_vec <- c(arm_vec, this_arm)
      visit_vec <- c(visit_vec, average_label)
    }
  }

  grid <- data.frame(arm = arm_vec, visit = visit_vec)
  names(grid) <- names(arm_visit_grid[, c(1, 2)])

  list(coefs = overall_list, grid = grid)
}

#' Workaround statistics function to add HR with CI
#'
#' This is a workaround for [tern::s_coxph_pairwise()], which adds a statistic
#' containing the hazard ratio estimate together with the confidence interval.
#'
#' @inheritParams proposal_argument_convention
#'
#'
#' @name coxph_hr
#' @return for `s_coxph_hr` a list containing the same statistics returned by [tern::s_coxph_pairwise]
#' and the additional `lr_stat_df` statistic. for `a_coxph_hr`, a `VerticalRowsSection`
#' object.
#' @order 1
NULL

#' @describeIn coxph_hr Statistics function forked from [tern::s_coxph_pairwise()].
#'   the difference is that:
#'   1) It returns the additional statistic `lr_stat_df` (log rank statistic with degrees of freedom).
#' @export
#' @order 3
#'
#' @importFrom survival Surv coxph survdiff
#'
#' @examples
#' adtte_f <- tern::tern_ex_adtte |>
#'   dplyr::filter(PARAMCD == "OS") |>
#'   dplyr::mutate(is_event = CNSR == 0)
#' df <- adtte_f |> dplyr::filter(ARMCD == "ARM A")
#' df_ref <- adtte_f |> dplyr::filter(ARMCD == "ARM B")
#'
#' s_coxph_hr(
#'   df = df,
#'   .ref_group = df_ref,
#'   .in_ref_col = FALSE,
#'   .var = "AVAL",
#'   is_event = "is_event",
#'   strata = NULL
#' )
s_coxph_hr <- function(
    df,
    .ref_group,
    .in_ref_col,
    .var,
    is_event,
    strata = NULL,
    control = control_coxph(),
    alternative = c("two.sided", "less", "greater")) {
  checkmate::assert_string(.var)
  checkmate::assert_numeric(df[[.var]])
  checkmate::assert_logical(df[[is_event]])
  assert_df_with_variables(df, list(tte = .var, is_event = is_event))
  alternative <- match.arg(alternative)
  pval_method <- control$pval_method
  ties <- control$ties
  conf_level <- control$conf_level

  if (.in_ref_col) {
    return(list(
      pvalue = with_label(list(), paste0("p-value (", pval_method, ")")),
      lr_stat_df = list(),
      hr = list(),
      hr_ci = with_label(list(), f_conf_level(conf_level)),
      hr_ci_3d = with_label(list(), paste0("Hazard Ratio (", f_conf_level(conf_level), ")")),
      n_tot = list(),
      n_tot_events = list()
    ))
  }
  data <- rbind(.ref_group, df)
  group <- factor(rep(c("ref", "x"), c(nrow(.ref_group), nrow(df))), levels = c("ref", "x"))

  df_cox <- data.frame(tte = data[[.var]], is_event = data[[is_event]], arm = group)
  if (is.null(strata)) {
    formula_cox <- survival::Surv(tte, is_event) ~ arm
  } else {
    formula_cox <- stats::as.formula(paste0(
      "survival::Surv(tte, is_event) ~ arm + strata(",
      paste(strata, collapse = ","),
      ")"
    ))
    df_cox <- cbind(df_cox, data[strata])
  }
  cox_fit <- survival::coxph(formula = formula_cox, data = df_cox, ties = ties)
  sum_cox <- summary(cox_fit, conf.int = conf_level, extend = TRUE)
  original_survdiff <- survival::survdiff(formula_cox, data = df_cox)
  log_rank_stat <- original_survdiff$chisq

  # See survival::survdiff for the d.f. calculation.
  etmp <- if (is.matrix(original_survdiff$exp)) {
    apply(original_survdiff$exp, 1, sum)
  } else {
    original_survdiff$exp
  }
  log_rank_df <- (sum(1 * (etmp > 0))) - 1
  # Check the consistency of the d.f. with the p-value returned by survival::survdiff.
  log_rank_pvalue <- stats::pchisq(log_rank_stat, log_rank_df, lower.tail = FALSE)
  checkmate::assert_true(all.equal(log_rank_pvalue, original_survdiff$pvalue))

  pval <- switch(pval_method,
    wald = sum_cox$waldtest["pvalue"],
    `log-rank` = log_rank_pvalue,
    likelihood = sum_cox$logtest["pvalue"]
  )

  # Handle one-sided alternatives.
  if (alternative != "two.sided") {
    right_direction <- if (alternative == "less") {
      sum_cox$conf.int[1, 1] < 1
    } else {
      sum_cox$conf.int[1, 1] >= 1
    }
    pval <- if (right_direction) {
      pval / 2
    } else {
      1 - pval / 2
    }
  }

  list(
    pvalue = with_label(unname(pval), paste0("p-value (", pval_method, ")")),
    lr_stat_df = unname(c(log_rank_stat, log_rank_df)),
    hr = sum_cox$conf.int[1, 1],
    hr_ci = with_label(unname(sum_cox$conf.int[1, 3:4]), f_conf_level(conf_level)),
    hr_ci_3d = with_label(
      c(sum_cox$conf.int[1, 1], unname(sum_cox$conf.int[1, 3:4])),
      paste0("Hazard Ratio (", f_conf_level(conf_level), ")")
    ),
    n_tot = sum_cox$n,
    n_tot_events = sum_cox$nevent
  )
}

#' @describeIn coxph_hr Formatted analysis function which is used as `afun`.
#'
#' @examples
#' library(dplyr)
#'
#' adtte_f <- tern::tern_ex_adtte |>
#'   filter(PARAMCD == "OS") |>
#'   mutate(is_event = CNSR == 0)
#'
#' df <- adtte_f |> filter(ARMCD == "ARM A")
#' df_ref_group <- adtte_f |> filter(ARMCD == "ARM B")
#'
#' basic_table() |>
#'   split_cols_by(var = "ARMCD", ref_group = "ARM A") |>
#'   add_colcounts() |>
#'   analyze("AVAL",
#'     afun = s_coxph_hr,
#'     extra_args = list(is_event = "is_event"),
#'     var_labels = "Unstratified Analysis",
#'     show_labels = "visible"
#'   ) |>
#'   build_table(df = adtte_f)
#'
#' basic_table() |>
#'   split_cols_by(var = "ARMCD", ref_group = "ARM A") |>
#'   add_colcounts() |>
#'   analyze("AVAL",
#'     afun = s_coxph_hr,
#'     extra_args = list(
#'       is_event = "is_event",
#'       strata = "SEX",
#'       control = tern::control_coxph(pval_method = "wald")
#'     ),
#'     var_labels = "Unstratified Analysis",
#'     show_labels = "visible"
#'   ) |>
#'   build_table(df = adtte_f)
#' @export
#' @order 2
a_coxph_hr <- function(
    df,
    .var,
    ref_path,
    .spl_context,
    ...,
    .stats = NULL,
    .formats = NULL,
    .labels = NULL,
    .indent_mods = NULL) {
  # Check for additional parameters to the statistics function
  dots_extra_args <- list(...)

  # Only support default stats, not custom stats
  .stats <- .split_std_from_custom_stats(.stats)$default_stats

  # Obtain reference column information
  ref <- get_ref_info(ref_path, .spl_context)

  # Apply statistics function
  x_stats <- .apply_stat_functions(
    default_stat_fnc = s_coxph_hr,
    custom_stat_fnc_list = NULL,
    args_list = c(
      df = list(df),
      .var = .var,
      .ref_group = list(ref$ref_group),
      .in_ref_col = ref$in_ref_col,
      dots_extra_args
    )
  )

  # Format according to specifications
  format_stats(
    x_stats,
    method_groups = "coxph_hr",
    stats_in = .stats,
    formats_in = .formats,
    labels_in = .labels,
    indents_in = .indent_mods
  )
}

#' Tabulation of RBMI Results
#'
#' @description `r lifecycle::badge('stable')`
#'
#' These functions can be used to produce tables from RBMI.
#'
#' @name tabulate_rbmi
#'
#' @note These functions have been forked from `tern.rbmi`. Additional features are:
#'
#' * Additional `ref_path` argument.
#' * Extraction of variance statistics in the `tidy()` method.
#' * Adapted to `rbmi` forked functions update with more than two treatment groups.
NULL

#' @describeIn tabulate_rbmi Helper function to produce data frame with results
#'   of pool for a single visit.
#'
#' @param x (`list`)\cr is a list of pooled object from `rbmi` analysis results.
#'   This list includes analysis results, confidence level, hypothesis testing type.
#' @param visit_name (`string`)\cr single visit level.
#' @param group_names (`character`)\cr group levels.
#' @return The `data.frame` with results of pooled analysis for a single visit.
#'
#' @export
h_tidy_pool <- function(x, visit_name, group_names) {
  checkmate::assert_list(x)
  checkmate::assert_string(visit_name)
  checkmate::assert_character(group_names)

  ref_name <- paste0("lsm_", group_names[1], "_", visit_name)
  ref <- x[[ref_name]]

  list2df <- \(l) with(l, data.frame(est, ci_l = ci[1], ci_u = ci[2], se, pvalue, df))

  var_name <- paste0("var_", visit_name)
  var <- if (var_name %in% names(x)) {
    # This is the case for ANCOVA.
    list2df(x[[var_name]])
  } else {
    # This is the case for MMRM.
    var_names <- paste0("var_", group_names[-1], "_", visit_name)
    checkmate::assert_subset(var_names, names(x))
    var_dfs <- lapply(x[var_names], list2df)
    var <- do.call(rbind, var_dfs)
  }

  contr_names <- paste0("trt_", group_names[-1], "_", visit_name)
  contr_dfs <- lapply(x[contr_names], list2df)
  contr <- do.call(rbind, contr_dfs)

  alt_names <- paste0("lsm_", group_names[-1], "_", visit_name)
  alt_dfs <- lapply(x[alt_names], list2df)
  alt <- do.call(rbind, alt_dfs)

  df_ref <- data.frame(
    visit = visit_name,
    group = group_names[1],
    est = ref$est,
    se_est = ref$se,
    lower_cl_est = ref$ci[1],
    upper_cl_est = ref$ci[2],
    est_contr = NA_real_,
    se_contr = NA_real_,
    lower_cl_contr = NA_real_,
    upper_cl_contr = NA_real_,
    p_value = NA_real_,
    relative_reduc = NA_real_,
    mse = NA_real_,
    df = NA_real_,
    stringsAsFactors = FALSE
  )
  df_alt <- data.frame(
    visit = visit_name,
    group = group_names[-1],
    est = alt$est,
    se_est = alt$se,
    lower_cl_est = alt$ci_l,
    upper_cl_est = alt$ci_u,
    est_contr = contr$est,
    se_contr = contr$se,
    lower_cl_contr = contr$ci_l,
    upper_cl_contr = contr$ci_u,
    p_value = contr$pvalue,
    relative_reduc = contr$est / df_ref$est,
    mse = var$est,
    df = var$df,
    stringsAsFactors = FALSE
  )
  rbind(df_ref, df_alt)
}

#' Helper method (for [`broom::tidy()`]) to prepare a data frame from an
#'   `pool` `rbmi` object containing the LS means and contrasts and multiple visits
#'
#' @method tidy pool
#' @param x (`pool`) is a list of pooled object from `rbmi` analysis results. This list includes
#' analysis results, confidence level, hypothesis testing type.
#' @param visits (`character`)\cr all visit levels. Otherwise too hard to guess this.
#' @param ... Additional arguments. Not used. Needed to match generic signature only.
#' @importFrom generics tidy
#' @export
#' @keywords internal
#' @return A `data.frame`.
tidy.pool <- function(x, visits, ...) {
  ls_raw <- x$pars

  has_lsm <- grepl("^lsm_", names(ls_raw))
  has_first_visit <- grepl(paste0("_", visits[1]), names(ls_raw), fixed = TRUE)
  is_lsm_first_visit <- has_lsm & has_first_visit

  group_names <- names(ls_raw)[is_lsm_first_visit]
  group_names <- gsub(pattern = "^lsm_", replacement = "", x = group_names)
  group_names <- gsub(pattern = paste0("_", visits[1]), replacement = "", x = group_names, fixed = TRUE)

  spl <- rep(visits, each = length(ls_raw) / length(visits))

  ls_split <- split(ls_raw, spl)

  ls_df <- mapply(
    FUN = h_tidy_pool,
    x = ls_split,
    visit_name = visits,
    MoreArgs = list(group_names = group_names),
    SIMPLIFY = FALSE
  )

  result <- do.call(rbind, unname(ls_df))

  result$visit <- factor(result$visit, levels = visits)
  result$group <- factor(result$group, levels = group_names)
  result$conf_level <- x$conf.level

  result
}

#' @describeIn tabulate_rbmi Statistics function which is extracting estimates
#'   from a tidied RBMI results data frame.
#'
#' @param df (`data.frame`)\cr input with LS means results.
#' @param .in_ref_col (`flag`)\cr whether reference column is specified.
#' @param show_relative (`string`)\cr 'reduction' if (`control - treatment`, default)
#'   or 'increase' (`treatment - control`) of relative change from baseline?
#' @return A list of statistics extracted from a tidied LS means data frame.
#' @export
s_rbmi_lsmeans <- function(df, .in_ref_col, show_relative = c("reduction", "increase")) {
  checkmate::assert_flag(.in_ref_col)

  show_relative <- match.arg(show_relative)
  if_not_ref <- function(x) if (.in_ref_col) character() else x
  list(
    adj_mean_se = c(df$est, df$se_est),
    adj_mean_ci = with_label(c(df$lower_cl_est, df$upper_cl_est), f_conf_level(df$conf_level)),
    diff_mean_se = if_not_ref(c(df$est_contr, df$se_contr)),
    diff_mean_ci = with_label(
      if_not_ref(c(df$lower_cl_contr, df$upper_cl_contr)),
      f_conf_level(df$conf_level)
    ),
    change = switch(show_relative,
      reduction = with_label(if_not_ref(df$relative_reduc), "Relative Reduction (%)"),
      increase = with_label(if_not_ref(-df$relative_reduc), "Relative Increase (%)")
    ),
    p_value = if_not_ref(df$p_value),
    additional_title_row = NULL
  )
}

#' @describeIn tabulate_rbmi Formatted Analysis function which is used as `afun`.
#'
#' @inheritParams proposal_argument_convention
#' @export
a_rbmi_lsmeans <- function(
    df,
    ref_path,
    .spl_context,
    ...,
    .stats = NULL,
    .formats = NULL,
    .labels = NULL,
    .indent_mods = NULL) {
  # Check for additional parameters to the statistics function
  dots_extra_args <- list(...)

  # Only support default stats, not custom stats
  .stats <- .split_std_from_custom_stats(.stats)$default_stats

  # Obtain reference column information
  ref <- get_ref_info(ref_path, .spl_context)

  # Apply statistics function
  x_stats <- .apply_stat_functions(
    default_stat_fnc = s_rbmi_lsmeans,
    custom_stat_fnc_list = NULL,
    args_list = c(df = list(df), .in_ref_col = ref$in_ref_col, dots_extra_args)
  )

  # Format according to specifications
  format_stats(
    x_stats,
    method_groups = "tabulate_rbmi",
    stats_in = .stats,
    formats_in = .formats,
    labels_in = .labels,
    indents_in = .indent_mods
  )
}

#' Split Function Factory for the Response Tables (RESP01)
#'
#' The main purpose here is to have a column dependent split into either comparative
#' statistic (relative risk or odds ratio with p-value) in the 'Overall' column,
#' and count proportions and corresponding confidence intervals in the other treatment
#' arm columns.
#'
#' @inheritParams proposal_argument_convention
#' @param method (`string`)\cr which method to use for the comparative statistics.
#'
#' @return A split function for use in the response table RESP01 and similar ones.
#' @seealso [rtables::make_split_fun()] describing the requirements for this kind of
#'   post-processing function.
#' @export
#'
#' @examples
#' split_fun <- resp01_split_fun_fct(
#'   method = "or_cmh",
#'   conf_level = 0.95
#' )
resp01_split_fun_fct <- function(method = c("rr", "or_logistic", "or_cmh"), conf_level) {
  method <- match.arg(method)
  (assert_proportion_value)(conf_level)

  post_fun <- function(ret, spl, fulldf, .spl_context) {
    all_expr <- expression(TRUE)
    # Here check if we are in 'Overall' column or not.
    this_col <- .spl_context[nrow(.spl_context), "value"][[1]]
    in_overall <- this_col == "Overall"
    # Accordingly, return left and right column labels and value identifiers.
    if (in_overall) {
      # In the Overall column, we want the comparison statistic and corresponding p-value.
      comp_stat_name <- if (method == "rr") "Relative Risk" else "Odds Ratio"
      short_split_result(
        comp_stat_ci = paste0(comp_stat_name, " (", f_conf_level(conf_level), ")~[super a]"),
        pval = "p-value~[super b]",
        fulldf = fulldf
      )
    } else {
      # In the treatment arm columns, we want counts and CI for proportions.
      short_split_result(
        count_prop = "n (%)",
        prop_ci = paste0(f_conf_level(conf_level), " for %"),
        fulldf = fulldf
      )
    }
  }
  make_split_fun(post = list(post_fun))
}

#' Content Row Function for Counts of Subgroups in Response Tables (RESP01)
#'
#' @inheritParams proposal_argument_convention
#' @param .alt_df (`data.frame`)\cr alternative data frame used for denominator calculation.
#' @param label_fstr (`string`)\cr format string for the label.
#'
#' @return The correct [rtables::in_rows()] result.
#' @export
#'
#' @examples
#' fake_spl_context <- data.frame(
#'   cur_col_split_val = I(list(c(ARM = "A: Drug X", count_prop = "count_prop")))
#' )
#' resp01_counts_cfun(
#'   df = DM,
#'   labelstr = "Blue",
#'   .spl_context = fake_spl_context,
#'   .alt_df = DM,
#'   label_fstr = "Color: %s"
#' )
resp01_counts_cfun <- function(df, labelstr, .spl_context, .alt_df, label_fstr) {
  this_col_split <- .spl_context[nrow(.spl_context), "cur_col_split_val"][[1]]
  if (this_col_split[1] != "Overall" && this_col_split[2] == "count_prop") {
    in_rows(nrow(.alt_df), .formats = "xx", .labels = sprintf(label_fstr, labelstr))
  } else {
    NULL
  }
}

#' Formatted Analysis Function for Comparative Statistic in Response Tables (RESP01)
#'
#' This function applies to a `logical` column called `.var` from `df`.
#' The response proportion is compared between the treatment arms identified
#' by column `arm`.
#'
#' @inheritParams proposal_argument_convention
#' @param include (`flag`)\cr whether to include the results for this variable.
#' @param arm (`string`)\cr column name in the data frame that identifies the treatment arms.
#' @param formats (`list`)\cr containing formats for `comp_stat_ci` and `pval`.
#' @param methods (`list`)\cr containing methods for comparative statistics. The element `comp_stat_ci` can be
#'   'rr' (relative risk), 'or_cmh' (odds ratio with CMH estimation and p-value) or 'or_logistic' (odds ratio
#'   estimated by conditional or standard logistic regression). The element `pval` can be 'fisher' (Fisher's
#'   exact test) or 'chisq' (chi-square test), only used when using unstratified analyses with 'or_logistic'.
#' @param stat (`string`)\cr the statistic to return, either `comp_stat_ci`
#'   or `pval`.
#'
#' @return The formatted result as [rtables::rcell()].
#' @seealso [resp01_a_comp_stat_factor()] for the `factor` equivalent.
#' @export
#'
#' @examples
#' dm <- droplevels(subset(formatters::DM, SEX %in% c("F", "M")))
#' dm$RESP <- as.logical(sample(c(TRUE, FALSE), size = nrow(DM), replace = TRUE))
#'
#' resp01_a_comp_stat_logical(
#'   dm,
#'   .var = "RESP",
#'   conf_level = 0.9,
#'   include = TRUE,
#'   arm = "SEX",
#'   strata = "RACE",
#'   stat = "comp_stat_ci",
#'   method = list(comp_stat_ci = "or_cmh"),
#'   formats = list(
#'     comp_stat_ci = jjcsformat_xx("xx.xx (xx.xx - xx.xx)"),
#'     pval = jjcsformat_pval_fct(0.05)
#'   )
#' )
resp01_a_comp_stat_logical <- function(df,
                                       .var,
                                       conf_level,
                                       include,
                                       arm,
                                       strata,
                                       formats,
                                       methods,
                                       stat = c("comp_stat_ci", "pval")) {
  checkmate::assert_logical(df[[.var]])
  checkmate::assert_factor(df[[arm]], n.levels = 2L)
  checkmate::assert_flag(include)
  stat <- match.arg(stat)

  if (include) {
    checkmate::assert_list(methods)
    checkmate::assert_subset(methods$comp_stat_ci, c("rr", "or_cmh", "or_logistic"))

    df_by <- split(df, f = df[[arm]])
    this_df <- df_by[[1]]
    this_ref_group <- df_by[[2]]

    this_res <- if (methods$comp_stat_ci == "rr") {
      # Stratified CMH test for point estimate and p-value, and Wald statistic for the confidence interval
      s_res <- s_relative_risk(
        this_df,
        .var = .var,
        .ref_group = this_ref_group,
        .in_ref_col = FALSE,
        variables = list(strata = strata),
        conf_level = conf_level
      )
      list(comp_stat_ci = s_res$rel_risk_ci, pval = s_res$pval)
    } else if (methods$comp_stat_ci == "or_logistic") {
      # Odds Ratio Variant 1: (Conditional/Standard) Logistic regression for odds ratio point estimate and
      # confidence interval.
      s_res <- s_odds_ratio_j(
        this_df,
        .var = .var,
        .ref_group = this_ref_group,
        .in_ref_col = FALSE,
        .df_row = rbind(this_df, this_ref_group),
        variables = list(arm = arm, strata = strata),
        conf_level = conf_level,
        method = "exact" # Only used if strata are present.
      )
      pval <- if (is.null(strata)) {
        checkmate::assert_subset(methods$pval, c("fisher", "chisq"))

        # If not stratified, then the p-value can come either from Fisher's exact or chi-square test.
        p_res <- s_test_proportion_diff(
          this_df,
          .var = .var,
          .ref_group = this_ref_group,
          .in_ref_col = FALSE,
          method = methods$pval
        )
        p_res$pval
      } else {
        s_res$pval
      }
      list(comp_stat_ci = s_res$or_ci, pval = pval)
    } else if (methods$comp_stat_ci == "or_cmh") {
      # Odds Ratio Variant 2: Stratified CMH test for odds ratio point estimate and confidence interval and
      # corresponding CMH test p-value.
      s_res <- s_odds_ratio_j(
        this_df,
        .var = .var,
        .ref_group = this_ref_group,
        .in_ref_col = FALSE,
        .df_row = rbind(this_df, this_ref_group),
        variables = list(arm = arm, strata = strata),
        conf_level = conf_level,
        method = "cmh"
      )
      list(comp_stat_ci = s_res$or_ci, pval = s_res$pval)
    }
    rcell(this_res[[stat]], format = formats[[stat]])
  } else {
    NULL
  }
}

#' Formatted Analysis Function for Comparative Statistic in Response Tables (RESP01)
#'
#' This function applies to a `factor` column called `.var` from `df`.
#'
#' @inheritParams proposal_argument_convention
#' @param include (`character`)\cr for which factor levels to include the comparison
#'   statistic results.
#' @param \dots see [resp01_a_comp_stat_logical()] for additional required arguments.
#'
#' @return The formatted result as [rtables::rcell()].
#' @export
#'
#' @examples
#' dm <- droplevels(subset(formatters::DM, SEX %in% c("F", "M")))
#'
#' resp01_a_comp_stat_factor(
#'   dm,
#'   .var = "COUNTRY",
#'   conf_level = 0.9,
#'   include = c("USA", "CHN"),
#'   arm = "SEX",
#'   strata = "RACE",
#'   stat = "comp_stat_ci",
#'   method = list(comp_stat_ci = "or_cmh"),
#'   formats = list(
#'     comp_stat_ci = jjcsformat_xx("xx.xx (xx.xx - xx.xx)"),
#'     pval = jjcsformat_pval_fct(0.05)
#'   )
#' )
resp01_a_comp_stat_factor <- function(df,
                                      .var,
                                      include,
                                      ...) {
  checkmate::assert_factor(df[[.var]])
  checkmate::assert_character(include, null.ok = TRUE)
  checkmate::assert_subset(x = include, choices = levels(df[[.var]]))

  design_df <- as.data.frame(h_get_design_mat(df, .var))

  res <- sapply(X = levels(df[[.var]]), simplify = FALSE, FUN = function(lvl) {
    df[[.var]] <- design_df[[lvl]]
    resp01_a_comp_stat_logical(df = df, .var = .var, include = lvl %in% include, ...)
  })
  in_rows(.list = res, .labels = levels(df[[.var]]))
}

#' Formatted Analysis and Content Summary Function for Response Tables (RESP01)
#'
#' This function applies to both `factor` and `logical` columns called
#' `.var` from `df`. Depending on the position in the split, it returns the
#' right formatted results for the RESP01 and related layouts.
#'
#' @inheritParams proposal_argument_convention
#' @inheritParams resp01_a_comp_stat_logical
#' @param include_comp (`character` or `flag`)\cr whether to include comparative
#'   statistic results, either `character` for factors or `flag` for logicals.
#' @param .alt_df (`data.frame`)\cr alternative data frame used for denominator calculation.
#' @param arm (`string`)\cr column name in the data frame that identifies the treatment arms.
#' @param label (`string`)\cr only for logicals, which label to use. (For factors, the
#'   labels are the factor levels.)
#' @param formats (`list`)\cr containing formats for `prop_ci`, `comp_stat_ci`
#'   and `pval`.
#' @param methods (`list`)\cr containing methods for comparative statistics. The element `comp_stat_ci` can be
#'   'rr' (relative risk), 'or_cmh' (odds ratio with CMH estimation and p-value) or 'or_logistic' (odds ratio
#'   estimated by conditional or standard logistic regression). The element `pval` can be 'fisher' (Fisher's
#'   exact test) or 'chisq' (chi-square test), only used when using unstratified analyses with 'or_logistic'.
#'   The element `prop_ci` specifies the method for proportion confidence interval calculation.
#'
#' @return The formatted result as [rtables::in_rows()] result.
#' @export
#'
#' @examples
#' fake_spl_context <- data.frame(
#'   cur_col_split_val = I(list(c(ARM = "A: Drug X", count_prop = "count_prop")))
#' )
#' dm <- droplevels(subset(DM, SEX %in% c("F", "M")))
#' resp01_acfun(
#'   dm,
#'   .alt_df = dm,
#'   .var = "COUNTRY",
#'   .spl_context = fake_spl_context,
#'   conf_level = 0.9,
#'   include_comp = c("USA", "CHN"),
#'   arm = "SEX",
#'   strata = "RACE",
#'   methods = list(
#'     comp_stat_ci = "or_cmh",
#'     pval = "",
#'     prop_ci = "wald"
#'   ),
#'   formats = list(
#'     prop_ci = jjcsformat_xx("xx.% - xx.%"),
#'     comp_stat_ci = jjcsformat_xx("xx.xx (xx.xx - xx.xx)"),
#'     pval = jjcsformat_pval_fct(0.05)
#'   )
#' )
#' fake_spl_context2 <- data.frame(
#'   cur_col_split_val = I(list(c(ARM = "Overall", comp_stat_ci = "comp_stat_ci")))
#' )
#' resp01_acfun(
#'   dm,
#'   .alt_df = dm,
#'   .var = "COUNTRY",
#'   .spl_context = fake_spl_context2,
#'   conf_level = 0.9,
#'   include_comp = c("USA", "CHN"),
#'   arm = "SEX",
#'   strata = "RACE",
#'   methods = list(
#'     comp_stat_ci = "or_cmh",
#'     pval = "",
#'     prop_ci = "wald"
#'   ),
#'   formats = list(
#'     prop_ci = jjcsformat_xx("xx.% - xx.%"),
#'     comp_stat_ci = jjcsformat_xx("xx.xx (xx.xx - xx.xx)"),
#'     pval = jjcsformat_pval_fct(0.05)
#'   )
#' )
resp01_acfun <- function(df,
                         labelstr = NULL,
                         label = NULL,
                         .var,
                         .spl_context,
                         include_comp,
                         .alt_df,
                         conf_level,
                         arm,
                         strata,
                         formats,
                         methods) {
  this_col_split <- .spl_context[nrow(.spl_context), "cur_col_split_val"][[1]]
  x <- df[[.var]]
  is_factor <- is.factor(x)
  res <- if (this_col_split[1] != "Overall") {
    if (this_col_split[2] == "count_prop") {
      if (is_factor) {
        s_proportion_factor(x, .alt_df = .alt_df)
      } else {
        s_proportion_logical(x, .alt_df = .alt_df)$n_prop
      }
    } else {
      checkmate::assert_true(this_col_split[2] == "prop_ci")
      if (is_factor) {
        a_proportion_ci_factor(
          df,
          .var = .var,
          .alt_df = .alt_df,
          conf_level = conf_level,
          formats = formats,
          method = methods$prop_ci
        )
      } else {
        a_proportion_ci_logical(
          x,
          .alt_df = .alt_df,
          conf_level = conf_level,
          formats = formats,
          method = methods$prop_ci
        )
      }
    }
  } else {
    checkmate::assert_true(this_col_split[1] == "Overall")
    if (is_factor) {
      resp01_a_comp_stat_factor(
        df,
        .var = .var,
        conf_level = conf_level,
        include = include_comp,
        arm = arm,
        strata = strata,
        stat = this_col_split[2],
        formats = formats,
        methods = methods
      )
    } else {
      resp01_a_comp_stat_logical(
        df,
        .var = .var,
        conf_level = conf_level,
        include = include_comp,
        arm = arm,
        strata = strata,
        stat = this_col_split[2],
        formats = formats,
        methods = methods
      )
    }
  }
  if (is_factor) {
    res
  } else {
    if (!is.null(labelstr)) {
      label <- labelstr
    }
    in_rows(res, .labels = label)
  }
}

#' `ANCOVA` Analysis
#'
#' Does the `ANCOVA` analysis, separately for each visit.
#'
#' @param vars (named `list` of `string` or `character`)\cr specifying the variables in the `ANCOVA` analysis.
#'   The following elements need to be included as character vectors and match corresponding columns
#'   in `data`:
#'
#'   - `response`: the response variable.
#'   - `covariates`: the additional covariate terms (might also include interactions).
#'   - `id`: the subject ID variable (not really needed for the computations but for internal logistics).
#'   - `arm`: the treatment group variable (factor).
#'   - `visit`: the visit variable (factor).
#'
#'   Note that the `arm` variable is by default included in the model, thus should not be part of `covariates`.
#' @param data (`data.frame`)\cr with all the variables specified in
#'   `vars`. Records with missing values in any independent variables
#'   will be excluded.
#' @param conf_level (`proportion`)\cr confidence level of the interval.
#' @param weights_emmeans (`string`)\cr argument from [emmeans::emmeans()], `'counterfactual'` by default.
#'
#' @return A `tern_model` object which is a list with model results:
#'
#'   - `fit`: A list with a fitted [stats::lm()] result for each visit.
#'   - `mse`: Mean squared error, i.e. variance estimate, for each visit.
#'   - `df`: Degrees of freedom for the variance estimate for each visit.
#'   - `lsmeans`: This is a list with data frames `estimates` and `contrasts`.
#'        The attribute `weights` savse the settings used (`weights_emmeans`).
#'   - `vars`: The variable list.
#'   - `labels`: Corresponding list with variable labels extracted from `data`.
#'   - `ref_level`: The reference level for the arm variable, which is always the first level.
#'   - `treatment_levels`: The treatment levels for the arm variable.
#'   - `conf_level`: The confidence level which was used to construct the `lsmeans` confidence intervals.
#'
#' @export
#' @examples
#' library(mmrm)
#'
#' fit <- fit_ancova(
#'   vars = list(
#'     response = "FEV1",
#'     covariates = c("RACE", "SEX"),
#'     arm = "ARMCD",
#'     id = "USUBJID",
#'     visit = "AVISIT"
#'   ),
#'   data = fev_data,
#'   conf_level = 0.9,
#'   weights_emmeans = "equal"
#' )
#'
fit_ancova <- function(
    vars = list(response = "AVAL", covariates = c(), arm = "ARM", visit = "AVISIT", id = "USUBJID"),
    data,
    conf_level = 0.95,
    weights_emmeans = "proportional") {
  labels <- h_labels(vars, data)

  arm_levels <- levels(data[[vars$arm]])
  ref_level <- arm_levels[1]
  trt_levels <- arm_levels[-1]
  visit_levels <- levels(data[[vars$visit]])
  grid <- list(
    factor(arm_levels, levels = arm_levels),
    factor(rep(visit_levels, each = length(arm_levels)), levels = visit_levels)
  ) |>
    stats::setNames(c(vars$arm, vars$visit)) |>
    as.data.frame()
  checkmate::assert_disjunct(vars$arm, vars$covariates)
  covariates_part <- paste(vars$covariates, collapse = " + ")
  formula <- if (covariates_part != "") {
    stats::as.formula(paste0(vars$response, " ~ ", covariates_part, " + ", vars$arm))
  } else {
    stats::as.formula(paste0(vars$response, " ~ ", vars$arm))
  }
  results_by_visit <- Map(
    data = split(data, data[[vars$visit]]),
    grid = split(grid, grid[[vars$visit]]),
    f = function(data, grid) {
      fit <- stats::lm(formula = formula, data = data)
      summary_fit <- summary(fit)
      emmeans_res <- h_get_emmeans_res(fit, vars = vars["arm"], weights = weights_emmeans)
      estimates <- h_get_single_visit_estimates(emmeans_res, conf_level)
      estimates[[vars$visit]] <- grid[[vars$visit]]
      estimates <- estimates |> dplyr::relocate(!!vars$arm, !!vars$visit)
      contrasts <- h_get_spec_visit_estimates(
        emmeans_res,
        specs = list(
          coefs = "trt.vs.ctrl",
          grid = grid[-1, ] # Without the reference level row.
        ),
        conf_level = conf_level,
        tests = TRUE,
        ref = ref_level
      )
      relative_reduc_df <- h_get_relative_reduc_df(estimates, vars)
      contrasts <- merge(contrasts, relative_reduc_df, by = c(vars$arm, vars$visit), sort = FALSE)
      list(fit = fit, estimates = estimates, contrasts = contrasts, mse = summary_fit$sigma^2, df = summary_fit$df[2])
    }
  )
  lsmeans <- structure(
    list(
      estimates = do.call(rbind, c(lapply(results_by_visit, "[[", "estimates"), make.row.names = FALSE)),
      contrasts = do.call(rbind, c(lapply(results_by_visit, "[[", "contrasts"), make.row.names = FALSE))
    ),
    weights = weights_emmeans
  )
  results <- list(
    fit = lapply(results_by_visit, "[[", "fit"),
    mse = do.call(c, lapply(results_by_visit, "[[", "mse")),
    df = do.call(c, lapply(results_by_visit, "[[", "df")),
    lsmeans = lsmeans,
    vars = vars,
    labels = labels,
    ref_level = ref_level,
    treatment_levels = trt_levels,
    conf_level = conf_level
  )
  class(results) <- "tern_model"
  results
}

#' Analysis and Content Summary Function Producing Blank Line
#'
#' @inheritParams proposal_argument_convention
#'
#' @keywords internal
ac_blank_line <- function(df, labelstr = "") {
  in_rows(.list = NA_real_, .labels = labelstr, .formats = "xx", .format_na_strs = "")
}

#' Insertion of Blank Lines in a Layout
#'
#' @inheritParams proposal_argument_convention
#'
#' @description This is a hack for `rtables` in order to be able to add row gaps,
#' i.e. blank lines.
#' In particular, by default this function needs to maintain a global state for avoiding
#' duplicate table names. The global state variable is hidden by using
#' a dot in front of its name. However, this likely won't work with parallelisation across
#' multiple threads and also causes non-reproducibility of the resulting `rtables`
#' object. Therefore also a custom table name can be used.
#'
#' @return The modified layout now including a blank line after the current
#'   row content.
#' @export
#'
#' @examples
#' ADSL <- ex_adsl
#'
#' lyt <- basic_table() |>
#'   split_cols_by("ARM") |>
#'   split_rows_by("STRATA1") |>
#'   analyze(vars = "AGE", afun = function(x) {
#'     in_rows(
#'       "Mean (sd)" = rcell(c(mean(x), sd(x)), format = "xx.xx (xx.xx)")
#'     )
#'   }) |>
#'   insert_blank_line() |>
#'   analyze(vars = "AGE", table_names = "AGE_Range", afun = function(x) {
#'     in_rows(
#'       "Range" = rcell(range(x), format = "xx.xx - xx.xx")
#'     )
#'   })
#' build_table(lyt, ADSL)
insert_blank_line <- function(lyt, table_names = NULL) {
  varnames_rows <- vars_in_layout(lyt@row_layout)
  checkmate::assert_character(varnames_rows, min.len = 1L)
  last_varname_rows <- utils::tail(varnames_rows, 1L)

  this_table_name <- if (is.null(table_names)) {
    default_table_name <- paste0(".post_", last_varname_rows, "_blank")

    # A named list for tracking table names and counts
    table_count <- getOption("junco.insert_blank_line")

    new_count <- if (default_table_name %in% names(table_count)) {
      table_count[[default_table_name]] + 1
    } else {
      1
    }

    table_count[[default_table_name]] <- new_count
    options(junco.insert_blank_line = table_count)

    paste(default_table_name, new_count, sep = "_")
  } else {
    checkmate::assert_string(table_names, min.chars = 1L)
    table_names
  }

  analyze(
    lyt,
    vars = last_varname_rows,
    afun = ac_blank_line,
    show_labels = "hidden",
    table_names = this_table_name
  )
}

#' @name a_freq_subcol_j
#'
#' @title Analysis function count and percentage with extra column-subsetting in
#' selected columns (controlled by subcol_* arguments)
#'
#' @inheritParams proposal_argument_convention
#' @inheritParams a_freq_j
#' @param subcol_split Text to search colid to determine whether further subsetting
#'                     should be performed.
#' @param subcol_var Name of variable containing to be searched for the text
#'                     identified in subcol_val argument.
#' @param subcol_val Value to use to perform further data sub-setting.

#' @param denom (`string`)\cr
#' One of \cr
#' \itemize{
#' \item \strong{N_col} Column count, \cr
#' \item \strong{n_df} Number of patients (based upon the main input dataframe `df`),\cr
#' \item \strong{n_altdf} Number of patients from the secondary dataframe (`.alt_df_full`),\cr
#' Note that argument `denom_by` will perform a row-split on the `.alt_df_full` dataframe.\cr
#' It is a requirement that variables specified in `denom_by` are part of the row split specifications. \cr
#' \item \strong{n_rowdf} Number of patients from the current row-level dataframe
#' (`.row_df` from the rtables splitting machinery).\cr
#' \item \strong{n_parentdf} Number of patients from a higher row-level split than the current split.\cr
#' This higher row-level split is specified in the argument `denom_by`.\cr
#' }
#' @param .formats (named 'character' or 'list')\cr
#' formats for the statistics.
#'
#' @return list of requested statistics with formatted `rtables::CellValue()`.\cr
#' @export

a_freq_subcol_j <- function(
    df,
    labelstr = NULL,
    .var = NA,
    val = NULL,
    # arguments specific to a_freq_subcol_j
    subcol_split = NULL,
    subcol_var = NULL,
    subcol_val = NULL,
    # arguments specific to a_freq_subcol_j till here
    .df_row,
    .spl_context,
    .N_col,
    id = "USUBJID",
    denom = c("N_col", "n_df", "n_altdf", "n_rowdf", "n_parentdf"),
    label = NULL,
    label_fstr = NULL,
    label_map = NULL,
    .alt_df_full = NULL,
    denom_by = NULL,
    .stats = c("count_unique_denom_fraction"),
    .formats = NULL,
    .labels_n = NULL,
    .indent_mods = NULL,
    na_str = rep("NA", 3)) {
  denom <- match.arg(denom)

  if (!is.null(labelstr) && is.na(.var)) {
    stop(
      "Argument var must be specified in call to summarize_row_groups when using cfun = a_freq_subcol_j."
    )
  }

  check_alt_df_full(denom, "n_altdf", .alt_df_full)

  res_dataprep <- h_a_freq_dataprep(
    df = df,
    labelstr = labelstr,
    .var = .var,
    val = val,
    drop_levels = FALSE,
    excl_levels = NULL,
    new_levels = NULL,
    new_levels_after = FALSE,
    .df_row = .df_row,
    .spl_context = .spl_context,
    .N_col = .N_col,
    id = id,
    denom = denom,
    variables = NULL,
    label = label,
    label_fstr = label_fstr,
    label_map = label_map,
    .alt_df_full = .alt_df_full,
    denom_by = denom_by,
    .stats = .stats
  )
  # res_dataprep is list with elements
  # df .df_row val
  # drop_levels excl_levels
  # alt_df parentdf new_denomdf
  # .stats
  # make these elements available in current environment
  df <- res_dataprep$df
  .df_row <- res_dataprep$.df_row
  val <- res_dataprep$val
  alt_df <- res_dataprep$alt_df
  parentdf <- res_dataprep$parentdf
  new_denomdf <- res_dataprep$new_denomdf
  .stats <- .stats

  ## colid can be used to figure out if we're in subcolum
  colid <- .spl_context$cur_col_id[[1]]

  ### this is the core code for subsetting to appropriate subcol_val
  insubcol <- grepl(subcol_split, colid, fixed = TRUE)
  if (insubcol) {
    df <- subset(df, df[[subcol_var]] == subcol_val)
  }

  ## the same s-function can be used as in a_freq_j
  x_stats <- s_freq_j(
    df,
    .var = .var,
    .df_row = .df_row,
    val = val,
    alt_df = new_denomdf,
    parent_df = new_denomdf,
    id = id,
    denom = denom,
    .N_col = .N_col,
    countsource = "df"
  )

  .stats_adj <- .stats

  res_prepinrows <- h_a_freq_prepinrows(
    x_stats,
    .stats_adj,
    .formats,
    labelstr,
    label_fstr,
    label,
    .indent_mods,
    .labels_n,
    na_str
  )
  # res_prepinrows is list with elements
  # x_stats .formats .labels .indent_mods .format_na_strs
  # make these elements available in current environment
  x_stats <- res_prepinrows$x_stats
  .formats <- res_prepinrows$.formats
  .labels <- res_prepinrows$.labels
  .indent_mods <- res_prepinrows$.indent_mods
  .format_na_strs <- res_prepinrows$.format_na_strs

  ### final step: turn requested stats into rtables rows
  inrows <- in_rows(
    .list = x_stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods,
    .format_na_strs = .format_na_strs
  )

  return(inrows)
}

#' @keywords internal
memoised_fit_mmrm <- memoise::memoise(fit_mmrm_j)

#' Helper Function to Fit the MMRM and Return LS Mean Estimates and Contrasts
#'
#' @inheritParams proposal_argument_convention
#' @param df_parent (`data.frame`)\cr data set containing all analysis variables
#'   from all visits and arms.
#' @param ref_arm_level (`string`)\cr the reference arm which should be compared
#'   against.
#' @param ref_visit_levels (`character`)\cr the reference visits which should not
#'   be included in the model fit.
#' @param ... additional options passed to [fit_mmrm_j()].
#'
#' @return The resulting estimates and contrasts LS means as returned by
#'   [tidy.tern_model()].
#' @keywords internal
h_summarize_mmrm <- function(.var, df_parent, variables, ref_arm_level, ref_visit_levels, ...) {
  checkmate::assert_string(.var)
  variables$response <- .var

  checkmate::assert_string(ref_arm_level)
  arm_levels <- levels(df_parent[[variables$arm]])
  if (arm_levels[1L] != ref_arm_level) {
    checkmate::assert_subset(ref_arm_level, arm_levels[-1L])
    df_parent[[variables$arm]] <- stats::relevel(df_parent[[variables$arm]], ref = ref_arm_level)
  }
  checkmate::assert_character(ref_visit_levels)
  in_ref_visits <- df_parent[[variables$visit]] %in% ref_visit_levels
  df_parent <- df_parent[!in_ref_visits, , drop = FALSE]
  checkmate::assert_true(nrow(df_parent) > 1)
  new_levels <- setdiff(levels(df_parent[[variables$visit]]), ref_visit_levels)
  df_parent[[variables$visit]] <- factor(df_parent[[variables$visit]], levels = new_levels)
  mod_fit <- memoised_fit_mmrm(vars = variables, data = df_parent, ...)
  tidy(mod_fit)
}

#' Dynamic tabulation of MMRM results with tables
#'
#' @description `r lifecycle::badge('stable')`
#'
#' These functions can be used to produce tables for MMRM results, within
#' tables which are split by arms and visits. This is helpful when higher-level
#' row splits are needed (e.g. splits by parameter or subgroup).
#'
#' @name summarize_mmrm
#' @examples
#' set.seed(123)
#' longdat <- data.frame(
#'   ID = rep(DM$ID, 5),
#'   AVAL = c(
#'     rep(0, nrow(DM)),
#'     rnorm(n = nrow(DM) * 4)
#'   ),
#'   VISIT = factor(rep(paste0("V", 0:4), each = nrow(DM)))
#' ) |>
#'   dplyr::inner_join(DM, by = "ID")
#'
NULL

#' @describeIn summarize_mmrm Statistics function which is extracting estimates,
#'   not including any results when in the reference visit, and only showing LS mean
#'   estimates when in the reference arm and not in reference visit. It uses
#'   [s_lsmeans()] for the final processing.
#'
#' @inheritParams proposal_argument_convention
#' @param ref_levels (`list`)\cr with `visit` and `arm` reference levels.
#' @param ... eventually passed to [fit_mmrm_j()] via [h_summarize_mmrm()].
#' @export
s_summarize_mmrm <- function(
    df,
    .var,
    variables,
    ref_levels,
    .spl_context,
    alternative = c("two.sided", "less", "greater"),
    show_relative = c("reduction", "increase"),
    ...) {
  alternative <- match.arg(alternative)

  checkmate::assert_list(variables, names = "unique")
  visit_var <- variables$visit
  arm_var <- variables$arm

  checkmate::assert_list(ref_levels, names = "unique")
  checkmate::assert_subset(c(visit_var, arm_var), names(ref_levels))
  ref_visits <- ref_levels[[visit_var]]
  ref_arm <- ref_levels[[arm_var]]
  checkmate::assert_string(ref_arm)

  current_visit <- as.character(unique(df[[visit_var]]))
  current_arm <- as.character(unique(df[[arm_var]]))
  checkmate::assert_string(current_visit)
  checkmate::assert_string(current_arm)

  in_ref_visits <- current_visit %in% ref_visits
  in_ref_arm <- current_arm == ref_arm

  if (in_ref_visits) {
    ## this is returned
    list(
      n = NULL,
      adj_mean_se = NULL,
      adj_mean_ci = NULL,
      adj_mean_est_ci = NULL,
      diff_mean_se = NULL,
      diff_mean_ci = NULL,
      diff_mean_est_ci = NULL,
      change = NULL,
      p_value = NULL
    )
  } else { # non ref visit
    n_splits <- nrow(.spl_context)

    # Check that the current row split is by the visit variable.
    current_split_var <- .spl_context[n_splits, "split"]
    checkmate::assert_true(identical(current_split_var, visit_var))

    # Then take the data frame with all visits and fit the MMRM on it.
    df_parent <- .spl_context[n_splits - 1, "full_parent_df"][[1]]
    lsm_df <- h_summarize_mmrm(
      .var = .var,
      df_parent = df_parent,
      variables = variables,
      ref_arm_level = ref_arm,
      ref_visit_levels = ref_visits,
      ...
    )

    # Subset to the current table cell we are looking at.
    matches_visit <- lsm_df[[visit_var]] == current_visit
    matches_arm <- lsm_df[[arm_var]] == current_arm
    matches_both <- which(matches_visit & matches_arm)
    checkmate::assert_int(matches_both)

    s_lsmeans(
      lsm_df[matches_both, ],
      .in_ref_col = in_ref_arm,
      alternative = alternative,
      show_relative = show_relative
    )
  } # end ref visits if/else
}

#' @describeIn summarize_mmrm Formatted analysis function which is used as `afun`.
#'
#' @return
#' * `a_summarize_mmrm()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' basic_table() |>
#'   split_rows_by("VISIT") |>
#'   split_cols_by("ARM") |>
#'   analyze(
#'     vars = "AVAL",
#'     afun = a_summarize_mmrm,
#'     na_str = tern::default_na_str(),
#'     show_labels = "hidden",
#'     extra_args = list(
#'       variables = list(
#'         covariates = c("AGE"),
#'         id = "ID",
#'         arm = "ARM",
#'         visit = "VISIT"
#'       ),
#'       conf_level = 0.9,
#'       cor_struct = "toeplitz",
#'       ref_levels = list(VISIT = "V0", ARM = "B: Placebo")
#'     )
#'   ) |>
#'   build_table(longdat) |>
#'   prune_table(all_zero)
#' @export
a_summarize_mmrm <- function(
    df,
    .var,
    .spl_context,
    ...,
    .stats = NULL,
    .formats = NULL,
    .labels = NULL,
    .indent_mods = NULL) {
  # Check for additional parameters to the statistics function
  dots_extra_args <- list(...)

  # Only support default stats, not custom stats
  .stats <- .split_std_from_custom_stats(.stats)$default_stats

  # Apply statistics function
  x_stats <- .apply_stat_functions(
    default_stat_fnc = s_summarize_mmrm,
    custom_stat_fnc_list = NULL,
    args_list = c(df = list(df), .var = .var, .spl_context = list(.spl_context), dots_extra_args)
  )

  # Format according to specifications
  format_stats(
    x_stats,
    method_groups = "summarize_mmrm",
    stats_in = .stats,
    formats_in = .formats,
    labels_in = .labels,
    indents_in = .indent_mods
  )
}

#' Helper for Finding AVISIT after which CHG are all Missing
#'
#' @param df (`data.frame`)\cr with `CHG` and `AVISIT` variables.
#'
#' @return A string with either the factor level after which `AVISIT` is all missing,
#'   or `NA`.
#' @export
#'
#' @examples
#' df <- data.frame(
#'   AVISIT = factor(c(1, 2, 3, 4, 5)),
#'   CHG = c(5, NA, NA, NA, 3)
#' )
#' find_missing_chg_after_avisit(df)
#'
#' df2 <- data.frame(
#'   AVISIT = factor(c(1, 2, 3, 4, 5)),
#'   CHG = c(5, NA, 3, NA, NA)
#' )
#' find_missing_chg_after_avisit(df2)
#'
#' df3 <- data.frame(
#'   AVISIT = factor(c(1, 2, 3, 4, 5)),
#'   CHG = c(NA, NA, NA, NA, NA)
#' )
#' find_missing_chg_after_avisit(df3)
find_missing_chg_after_avisit <- function(df) {
  checkmate::assert_data_frame(df)
  checkmate::assert_factor(df$AVISIT, unique = TRUE, any.missing = FALSE)
  checkmate::assert_numeric(df$CHG)

  # Ensure the dataframe is sorted by AVISIT
  df <- df[order(df$AVISIT), ]

  # Last visit with available data.
  visit_levels_available <- as.integer(df[!is.na(df$CHG), ]$AVISIT)

  if (!length(visit_levels_available)) {
    return(levels(df$AVISIT)[1])
  }
  visit_levels_max_available <- max(visit_levels_available)

  # Visits with missing data.
  visit_levels_missing <- as.integer(df[is.na(df$CHG), ]$AVISIT)

  # Missing visits at the end.
  visit_levels_missing_end <- visit_levels_missing[visit_levels_missing > visit_levels_max_available]

  # Return first one if there is any.
  if (length(visit_levels_missing_end)) {
    levels(df$AVISIT)[min(visit_levels_missing_end)]
  } else {
    NA_character_
  }
}

# Copy from rbmi package, because this was only added recently, so not yet available in our package version.

#' Create a `rbmi` ready cluster
#'
#' @param cluster_or_cores Number of parallel processes to use or an existing cluster to make use of
#' @param objects a named list of objects to export into the sub-processes
#' @param packages a character vector of libraries to load in the sub-processes
#'
#' This function is a wrapper around `parallel::makePSOCKcluster()` but takes
#' care of configuring `rbmi` to be used in the sub-processes as well as loading
#' user defined objects and libraries and setting the seed for reproducibility.
#'
#' @return If `cluster_or_cores` is `1` this function will return `NULL`. If `cluster_or_cores`
#'   is a number greater than `1`, a cluster with `cluster_or_cores`  cores is returned.
#'
#' If `cluster_or_cores` is a cluster created via `parallel::makeCluster()` then this function
#' returns it after inserting the relevant `rbmi` objects into the existing cluster.
#'
#' @examples
#' \dontrun{
#' make_rbmi_cluster(5)
#' closeAllConnections()
#'
#' VALUE <- 5
#' myfun <- function(x) {
#'   x + day(VALUE)
#' }
#' make_rbmi_cluster(5, list(VALUE = VALUE, myfun = myfun), c("lubridate"))
#' closeAllConnections()
#'
#' cl <- parallel::makeCluster(5)
#' make_rbmi_cluster(cl)
#' closeAllConnections()
#' }
#' @export
make_rbmi_cluster <- function(cluster_or_cores = 1, objects = NULL, packages = NULL) {
  if (is.numeric(cluster_or_cores) && cluster_or_cores == 1) {
    return(NULL)
  } else if (is.numeric(cluster_or_cores)) {
    cl <- parallel::makePSOCKcluster(cluster_or_cores)
  } else if (methods::is(cluster_or_cores, "cluster")) {
    cl <- cluster_or_cores
  } else {
    stop(sprintf("`cluster_or_cores` has unsupported class of: %s", paste(class(cluster_or_cores), collapse = ", ")))
  }

  # Load user defined objects into the globalname space
  if (!is.null(objects) && length(objects)) {
    export_env <- list2env(objects)
    parallel::clusterExport(cl, names(objects), export_env)
  }

  # Load user defined packages
  packages <- c(packages, "assertthat")
  # Remove attempts to load `rbmi` as this will be covered later
  packages <- setdiff(packages, "rbmi")
  devnull <- parallel::clusterCall(
    cl,
    function(pkgs) lapply(pkgs, function(x) library(x, character.only = TRUE)),
    as.list(packages)
  )

  # Ensure reproducibility
  parallel::clusterSetRNGStream(cl, sample.int(1))

  # If user has previously configured `rbmi` sub-processes then early exit
  exported_rbmi <- unlist(parallel::clusterEvalQ(cl, exists("..exported..parallel..rbmi")))
  if (all(exported_rbmi)) {
    return(cl)
  }

  # Ensure that exported and unexported objects are all directly accessible from the globalenv in the sub-processes
  is_in_rbmi_development <- FALSE
  if (is_in_rbmi_development) {
    devnull <- parallel::clusterEvalQ(cl, pkgload::load_all())
  } else {
    devnull <- parallel::clusterEvalQ(cl, {
      .namespace <- getNamespace("rbmi")
      for (.nsfun in ls(.namespace)) {
        assign(.nsfun, get(.nsfun, envir = .namespace))
      }
    })
  }

  # Set variable to signify `rbmi` has been configured
  devnull <- parallel::clusterEvalQ(cl, {
    ..exported..parallel..rbmi <- TRUE
  })

  return(cl)
}

#' Parallelise Lapply
#'
#' Simple wrapper around `lapply` and [`parallel::clusterApplyLB`] to abstract away
#' the logic of deciding which one to use
#' @param cl Cluster created by [`parallel::makeCluster()`] or `NULL`
#' @param fun Function to be run
#' @param x object to be looped over
#' @param ... extra arguments passed to `fun`
#' @return `list` of results of calling `fun` on elements of `x`.
par_lapply <- function(cl, fun, x, ...) {
  result <- if (is.null(cl)) {
    lapply(x, fun, ...)
  } else {
    parallel::clusterApplyLB(cl, x, fun, ...)
  }
  return(result)
}

#' Analyse Multiple Imputed Datasets
#'
#' @description
#' This function takes multiple imputed datasets (as generated by
#' the [rbmi::impute()] function) and runs an analysis function on
#' each of them.
#'
#' @importFrom assertthat assert_that
#'
#' @details
#' This function works by performing the following steps:
#'
#' 1. Extract a dataset from the `imputations` object.
#' 2. Apply any delta adjustments as specified by the `delta` argument.
#' 3. Run the analysis function `fun` on the dataset.
#' 4. Repeat steps 1-3 across all of the datasets inside the `imputations`
#' object.
#' 5. Collect and return all of the analysis results.
#'
#' The analysis function `fun` must take a `data.frame` as its first
#' argument. All other options to [rbmi_analyse()] are passed onto `fun`
#' via `...`.
#' `fun` must return a named list with each element itself being a
#' list containing a single
#' numeric element called `est` (or additionally `se` and `df` if
#' you had originally specified [rbmi::method_bayes()] or [rbmi::method_approxbayes()])
#' i.e.:
#' \preformatted{
#' myfun <- function(dat, ...) {
#'     mod_1 <- lm(data = dat, outcome ~ group)
#'     mod_2 <- lm(data = dat, outcome ~ group + covar)
#'     x <- list(
#'         trt_1 = list(
#'             est = coef(mod_1)[['group']],  # Use [[ ]] for safety
#'             se = sqrt(vcov(mod_1)['group', 'group']), # Use ['','']
#'             df = df.residual(mod_1)
#'         ),
#'         trt_2 = list(
#'             est = coef(mod_2)[['group']],  # Use [[ ]] for safety
#'             se = sqrt(vcov(mod_2)['group', 'group']), # Use ['','']
#'             df = df.residual(mod_2)
#'         )
#'      )
#'      return(x)
#'  }
#' }
#'
#' Please note that the `vars$subjid` column (as defined in the original call to
#' [rbmi::draws()]) will be scrambled in the data.frames that are provided to `fun`.
#' This is to say they will not contain the original subject values and as such
#' any hard coding of subject ids is strictly to be avoided.
#'
#' By default `fun` is the [rbmi_ancova()] function.
#' Please note that this function
#' requires that a `vars` object, as created by [rbmi::set_vars()], is provided via
#' the `vars` argument e.g. `rbmi_analyse(imputeObj, vars = rbmi::set_vars(...))`. Please
#' see the documentation for [rbmi_ancova()] for full details.
#' Please also note that the theoretical justification for the conditional mean imputation
#' method (`method = method_condmean()` in [rbmi::draws()]) relies on the fact that ANCOVA is
#' a linear transformation of the outcomes.
#' Thus care is required when applying alternative analysis functions in this setting.
#'
#' The `delta` argument can be used to specify offsets to be applied
#' to the outcome variable in the imputed datasets prior to the analysis.
#' This is typically used for sensitivity or tipping point analyses. The
#' delta dataset must contain columns `vars$subjid`, `vars$visit` (as specified
#' in the original call to [rbmi::draws()]) and `delta`. Essentially this `data.frame`
#' is merged onto the imputed dataset by `vars$subjid` and `vars$visit` and then
#' the outcome variable is modified by:
#'
#' ```
#' imputed_data[[vars$outcome]] <- imputed_data[[vars$outcome]] + imputed_data[['delta']]
#' ```
#'
#' Please note that in order to provide maximum flexibility, the `delta` argument
#' can be used to modify any/all outcome values including those that were not
#' imputed. Care must be taken when defining offsets. It is recommend that you
#' use the helper function [rbmi::delta_template()] to define the delta datasets as
#' this provides utility variables such as `is_missing` which can be used to identify
#' exactly which visits have been imputed.
#'
#' @seealso [rbmi::extract_imputed_dfs()] for manually extracting imputed
#' datasets.
#' @seealso [rbmi::delta_template()] for creating delta data.frames.
#' @seealso [rbmi_ancova()] for the default analysis function.
#'
#' @param imputations An `imputations` object as created by [rbmi::impute()].
#' @param fun An analysis function to be applied to each imputed dataset. See details.
#' @param delta A `data.frame` containing the delta transformation to be applied to the imputed
#' datasets prior to running `fun`. See details.
#' @param ... Additional arguments passed onto `fun`.
#' @param cluster_or_cores The number of parallel processes to use when running this function. Can also be a
#' cluster object created by [`make_rbmi_cluster()`]. See the parallelisation section below.
#' @param .validate Should `imputations` be checked to ensure it conforms to the required format
#' (default = `TRUE`) ? Can gain a small performance increase if this is set to `FALSE` when
#' analysing a large number of samples.
#'
#' @section Parallelisation:
#' To speed up the evaluation of `rbmi_analyse()` you can use the `cluster_or_cores` argument to enable parallelisation.
#' Simply providing an integer will get `rbmi` to automatically spawn that many background processes
#' to parallelise across. If you are using a custom analysis function then you need to ensure
#' that any libraries or global objects required by your function are available in the
#' sub-processes. To do this you need to use the [`make_rbmi_cluster()`] function for example:
#' ```
#' my_custom_fun <- function(...) <some analysis code>
#' cl <- make_rbmi_cluster(
#'     4,
#'     objects = list('my_custom_fun' = my_custom_fun),
#'     packages = c('dplyr', 'nlme')
#' )
#' rbmi_analyse(
#'     imputations = imputeObj,
#'     fun = my_custom_fun,
#'     cluster_or_cores = cl
#' )
#' parallel::stopCluster(cl)
#' ```
#'
#' Note that there is significant overhead both with setting up the sub-processes and with
#' transferring data back-and-forth between the main process and the sub-processes. As such
#' parallelisation of the `rbmi_analyse()` function tends to only be worth it when you have
#' `> 2000` samples generated by [rbmi::draws()]. Conversely using parallelisation if your samples
#' are smaller than this may lead to longer run times than just running it sequentially.
#'
#' It is important to note that the implementation of parallel processing within [rbmi::analyse()`] has
#' been optimised around the assumption that the parallel processes will be spawned on the same
#' machine and not a remote cluster. One such optimisation is that the required data is saved to
#' a temporary file on the local disk from which it is then read into each sub-process. This is
#' done to avoid the overhead of transferring the data over the network. Our assumption is that
#' if you are at the stage where you need to be parallelising your analysis over a remote cluster
#' then you would likely be better off parallelising across multiple `rbmi` runs rather than within
#' a single `rbmi` run.
#'
#' Finally, if you are doing a tipping point analysis you can get a reasonable performance
#' improvement by re-using the cluster between each call to `rbmi_analyse()` e.g.
#' ```
#' cl <- make_rbmi_cluster(4)
#' ana_1 <- rbmi_analyse(
#'     imputations = imputeObj,
#'     delta = delta_plan_1,
#'     cluster_or_cores = cl
#' )
#' ana_2 <- rbmi_analyse(
#'     imputations = imputeObj,
#'     delta = delta_plan_2,
#'     cluster_or_cores = cl
#' )
#' ana_3 <- rbmi_analyse(
#'     imputations = imputeObj,
#'     delta = delta_plan_3,
#'     cluster_or_cores = cl
#' )
#' parallel::clusterStop(cl)
#' ```
#'
#' @return An `analysis` object, as defined by `rbmi`, representing the desired
#' analysis applied to each of the imputed datasets in `imputations`.
#' @examples
#' library(rbmi)
#' library(dplyr)
#'
#' dat <- antidepressant_data
#' dat$GENDER <- as.factor(dat$GENDER)
#' dat$POOLINV <- as.factor(dat$POOLINV)
#' set.seed(123)
#' pat_ids <- sample(levels(dat$PATIENT), nlevels(dat$PATIENT) / 4)
#' dat <- dat |>
#'   filter(PATIENT %in% pat_ids) |>
#'   droplevels()
#' dat <- expand_locf(
#'   dat,
#'   PATIENT = levels(dat$PATIENT),
#'   VISIT = levels(dat$VISIT),
#'   vars = c("BASVAL", "THERAPY"),
#'   group = c("PATIENT"),
#'   order = c("PATIENT", "VISIT")
#' )
#' dat_ice <- dat %>%
#'   arrange(PATIENT, VISIT) %>%
#'   filter(is.na(CHANGE)) %>%
#'   group_by(PATIENT) %>%
#'   slice(1) %>%
#'   ungroup() %>%
#'   select(PATIENT, VISIT) %>%
#'   mutate(strategy = "JR")
#' dat_ice <- dat_ice[-which(dat_ice$PATIENT == 3618), ]
#' vars <- set_vars(
#'   outcome = "CHANGE",
#'   visit = "VISIT",
#'   subjid = "PATIENT",
#'   group = "THERAPY",
#'   covariates = c("THERAPY")
#' )
#' drawObj <- draws(
#'   data = dat,
#'   data_ice = dat_ice,
#'   vars = vars,
#'   method = method_condmean(type = "jackknife", covariance = "csh"),
#'   quiet = TRUE
#' )
#' references <- c("DRUG" = "PLACEBO", "PLACEBO" = "PLACEBO")
#' imputeObj <- impute(drawObj, references)
#'
#' rbmi_analyse(imputations = imputeObj, vars = vars)
#' @export
rbmi_analyse <- function(imputations, fun = rbmi_ancova, delta = NULL, ..., cluster_or_cores = 1, .validate = TRUE) {
  # nocov

  if (.validate) rbmi::validate(imputations)

  assertthat::assert_that(is.function(fun), msg = "`fun` must be a function")

  assertthat::assert_that(is.null(delta) | is.data.frame(delta), msg = "`delta` must be NULL or a data.frame")

  vars <- imputations$data$vars

  if (.validate) devnull <- lapply(imputations$imputations, function(x) rbmi::validate(x))

  if (!is.null(delta)) {
    expected_vars <- c(vars$subjid, vars$visit, "delta")
    assertthat::assert_that(
      all(expected_vars %in% names(delta)),
      msg = sprintf("The following variables must exist witin `delta`: `%s`", paste0(expected_vars, collapse = "`, `"))
    )
  }

  # Mangle name to avoid any conflicts with user defined objects if running in a cluster
  ..rbmi..analysis..imputations <- imputations
  ..rbmi..analysis..delta <- delta
  ..rbmi..analysis..fun <- fun
  objects <- list(
    ..rbmi..analysis..imputations = ..rbmi..analysis..imputations,
    ..rbmi..analysis..delta = ..rbmi..analysis..delta,
    ..rbmi..analysis..fun = ..rbmi..analysis..fun
  )

  cl <- make_rbmi_cluster(cluster_or_cores)

  if (methods::is(cl, "cluster")) {
    ..rbmi..analysis..data..path <- tempfile()
    saveRDS(objects, file = ..rbmi..analysis..data..path, compress = FALSE)
    devnull <- parallel::clusterExport(cl, "..rbmi..analysis..data..path", environment())
    devnull <- parallel::clusterEvalQ(cl, {
      ..rbmi..analysis..objects <- readRDS(..rbmi..analysis..data..path)
      list2env(..rbmi..analysis..objects, envir = environment())
    })
  }

  # If the user provided the clusters object directly then do not close it on completion
  if (!methods::is(cluster_or_cores, "cluster")) {
    on.exit(
      {
        if (!is.null(cl)) parallel::stopCluster(cl)
      },
      add = TRUE,
      after = FALSE
    )
  }

  # Chunk up requests for significant speed improvement when running in parallel
  number_of_cores <- ifelse(is.null(cl), 1, length(cl))
  indexes <- seq_along(imputations$imputations)
  indexes_split <- split(indexes, (indexes %% number_of_cores) + 1)

  results <- par_lapply(
    cl,
    function(indicies, ...) {
      inner_fun <- function(idx, ...) {
        dat2 <- (utils::getFromNamespace("extract_imputed_df", "rbmi"))(
          ..rbmi..analysis..imputations$imputations[[idx]],
          ..rbmi..analysis..imputations$data,
          ..rbmi..analysis..delta
        )
        ..rbmi..analysis..fun(dat2, ...)
      }
      lapply(indicies, inner_fun, ...)
    },
    indexes_split,
    ...
  ) |>
    unlist(recursive = FALSE, use.names = FALSE)

  # Re-order to ensure results are returned in same order as imputations
  results <- results[order(unlist(indexes_split, use.names = FALSE))]
  names(results) <- NULL

  fun_name <- deparse(substitute(fun))
  if (length(fun_name) > 1) {
    fun_name <- "<Anonymous Function>"
  } else if (is.null(fun_name)) {
    fun_name <- "<NULL>"
  }

  ret <- (utils::getFromNamespace("as_analysis", "rbmi"))(
    results = results,
    fun_name = fun_name,
    delta = delta,
    fun = fun,
    method = imputations$method
  )
  rbmi::validate(ret)
  return(ret)
}

#' Function Factory to Create Padded In Rows Content
#'
#' @param length_out (`count` or `NULL`)\cr full length which should be padded
#'   by `NA` which will be printed as empty strings.
#' @param label (`string`)\cr row label to be used for the first row only.
#'
#' @return The function of `content` and `.formats`.
#' @keywords internal
pad_in_rows_fct <- function(length_out = NULL, label = "") {
  checkmate::assert_count(length_out, null.ok = TRUE)
  checkmate::assert_string(label)

  function(content, .formats) {
    content_list <- as.list(content)
    if (!is.null(length_out)) {
      missing_length <- length_out - length(content_list)
      if (missing_length > 0) {
        content_list <- c(content_list, rep(NA, missing_length))
      }
    }
    in_rows(
      .list = content_list,
      .names = as.character(seq_along(content_list)),
      .labels = c(
        label,
        rep(
          "",
          length(content_list) -
            1L
        )
      ),
      .formats = .formats,
      .format_na_strs = ""
    )
  }
}

#' First Level Column Split for LS Means Wide Table Layouts
#'
#' @inheritParams proposal_argument_convention
#'
#' @keywords internal
lsmeans_wide_first_split_fun_fct <- function(include_variance) {
  checkmate::assert_flag(include_variance)

  post_fun <- function(ret, spl, fulldf, .spl_context) {
    if (include_variance) {
      short_split_result(
        reference_group = "Reference Group",
        testing_group = "Testing Group",
        variance = "",
        comparison = "Testing - Reference",
        fulldf = fulldf
      )
    } else {
      short_split_result(
        reference_group = "Reference Group",
        testing_group = "Testing Group",
        comparison = "Testing - Reference",
        fulldf = fulldf
      )
    }
  }
  make_split_fun(post = list(post_fun))
}

#' Second Level Column Split for LS Means Wide Table Layouts
#'
#' @inheritParams proposal_argument_convention
#' @param include_pval (`flag`)\cr whether to include the p-value column.
#'
#' @keywords internal
lsmeans_wide_second_split_fun_fct <- function(pval_sided, conf_level, include_pval) {
  checkmate::assert_flag(include_pval)

  post_fun <- function(ret, spl, fulldf, .spl_context) {
    colset <- .spl_context[nrow(.spl_context), "value"][[1]]
    if (colset %in% c("reference_group", "testing_group")) {
      short_split_result(treatment = "Treatment", n = "N", lsmean = "LS Mean", fulldf = fulldf)
    } else if (colset == "variance") {
      short_split_result(mse = "M. S. Error", df = "Error DF", fulldf = fulldf)
    } else {
      if (include_pval) {
        short_split_result(
          lsmean = "LS Mean",
          se = "SE",
          ci = f_conf_level(conf_level),
          pval = paste0(abs(as.numeric(pval_sided)), "-sided p-value~[super a]"),
          fulldf = fulldf
        )
      } else {
        short_split_result(lsmean = "LS Mean", se = "SE", ci = f_conf_level(conf_level), fulldf = fulldf)
      }
    }
  }
  make_split_fun(post = list(post_fun))
}

#' Content Row Analysis Function for LS Means Wide Table Layouts
#'
#' @inheritParams proposal_argument_convention
#' @param variables (`list`)\cr see [fit_ancova()] for required variable
#'   specifications.
#' @param ref_level (`string`)\cr the reference level of the treatment arm variable.
#' @param treatment_levels (`character`)\cr the non-reference levels of the treatment arm
#'   variable.
#' @param pval_sided (`string`)\cr either '2' for two-sided or '1' for 1-sided with greater than
#'   control or '-1' for 1-sided with smaller than control alternative hypothesis.
#' @param formats (`list`)\cr including `lsmean`, `mse`, `df`, `lsmean_diff`, `se`,
#'   `ci`, `pval` formats.
#'
#' @details This assumes a lot of structure of the layout, and is only intended to be used inside
#'   [summarize_lsmeans_wide()], please see there for the layout structure that is needed.
#'
#' @keywords internal
lsmeans_wide_cfun <- function(
    df,
    labelstr,
    .spl_context,
    variables,
    ref_level,
    treatment_levels,
    pval_sided = c("2", "1", "-1"),
    conf_level,
    formats) {
  this_col_split <- .spl_context[nrow(.spl_context), "cur_col_split_val"][[1]]
  pad_in_rows <- pad_in_rows_fct(length_out = length(treatment_levels), label = labelstr)
  if (this_col_split[1] %in% c("reference_group", "testing_group")) {
    this_level <- if (this_col_split[1] == "reference_group") {
      ref_level
    } else {
      treatment_levels
    }
    has_this_level <- df[[variables$arm]] %in% this_level
    if (this_col_split[2] == "treatment") {
      pad_in_rows(this_level, .formats = "xx")
    } else if (this_col_split[2] == "n") {
      pad_in_rows(df$n[has_this_level], .formats = "xx")
    } else {
      pad_in_rows(df$estimate_est[has_this_level], .formats = formats$lsmean)
    }
  } else if (this_col_split[1] == "variance") {
    has_trt <- df[[variables$arm]] %in% treatment_levels
    all_the_same <- (length(unique(df$mse[has_trt])) == 1) && (length(unique(df$df[has_trt])) == 1)
    # Note: We only take the first value, because in this ANCOVA case they are identical and we don't want to
    # repeat it in the table.
    inds <- if (all_the_same) 1 else seq_len(sum(has_trt))
    if (this_col_split[2] == "mse") {
      pad_in_rows(df$mse[has_trt][inds], .formats = formats$mse)
    } else {
      pad_in_rows(df$df[has_trt][inds], .formats = formats$df)
    }
  } else {
    has_trt <- df[[variables$arm]] %in% treatment_levels
    switch(this_col_split[2],
      "lsmean" = pad_in_rows(df$estimate_contr[has_trt], .formats = formats$lsmean_diff),
      "se" = pad_in_rows(df$se_contr[has_trt], .formats = formats$se),
      "ci" = pad_in_rows(
        tern::combine_vectors(df$lower_cl_contr[has_trt], df$upper_cl_contr[has_trt]),
        .formats = formats$ci
      ),
      "pval" = {
        pval_sided <- match.arg(pval_sided)
        pval_col <- switch(pval_sided,
          `2` = "p_value",
          `1` = "p_value_greater",
          `-1` = "p_value_less"
        )
        pad_in_rows(df[[pval_col]][has_trt], .formats = formats$pval)
      }
    )
  }
}

#' Layout Generating Function for LS Means Wide Table Layouts
#'
#' @inheritParams proposal_argument_convention
#' @inheritParams lsmeans_wide_cfun
#' @param lyt empty layout, i.e. result of [rtables::basic_table()]
#' @param include_variance (`flag`)\cr whether to include the variance statistics
#'   (M.S. error and d.f.).
#' @param include_pval (`flag`)\cr whether to include the p-value column.
#'
#' @return Modified layout.
#' @export
#' @examples
#' variables <- list(
#'   response = "FEV1",
#'   covariates = c("RACE", "SEX"),
#'   arm = "ARMCD",
#'   id = "USUBJID",
#'   visit = "AVISIT"
#' )
#' fit <- fit_ancova(
#'   vars = variables,
#'   data = mmrm::fev_data,
#'   conf_level = 0.9,
#'   weights_emmeans = "equal"
#' )
#' anl <- broom::tidy(fit)
#' basic_table() |>
#'   summarize_lsmeans_wide(
#'     variables = variables,
#'     ref_level = fit$ref_level,
#'     treatment_levels = fit$treatment_levels,
#'     pval_sided = "2",
#'     conf_level = 0.8
#'   ) |>
#'   build_table(df = anl)
summarize_lsmeans_wide <- function(
    lyt,
    variables,
    ref_level,
    treatment_levels,
    conf_level,
    pval_sided = "2",
    include_variance = TRUE,
    include_pval = TRUE,
    formats = list(
      lsmean = jjcsformat_xx("xx.x"),
      mse = jjcsformat_xx("xx.x"),
      df = jjcsformat_xx("xx."),
      lsmean_diff = jjcsformat_xx("xx.x"),
      se = jjcsformat_xx("xx.xx"),
      ci = jjcsformat_xx("(xx.xx, xx.xx)"),
      pval = jjcsformat_pval_fct(0)
    )) {
  # Check that all required format elements are present in the formats parameter
  checkmate::assert_names(
    names(formats),
    must.include = c("lsmean", "mse", "df", "lsmean_diff", "se", "ci", "pval"),
    .var.name = "formats"
  )
  lyt |>
    split_cols_by(
      variables$arm,
      split_fun = lsmeans_wide_first_split_fun_fct(include_variance = include_variance)
    ) |>
    split_cols_by(
      variables$arm,
      split_fun = lsmeans_wide_second_split_fun_fct(
        include_pval = include_pval,
        pval_sided = pval_sided,
        conf_level = conf_level
      )
    ) |>
    split_rows_by(variables$visit, section_div = "") |>
    summarize_row_groups(
      var = variables$arm,
      cfun = lsmeans_wide_cfun,
      extra_args = list(
        variables = variables,
        ref_level = ref_level,
        treatment_levels = treatment_levels,
        pval_sided = pval_sided,
        conf_level = conf_level,
        formats = formats
      )
    )
}

#' Proportion difference estimation
#'
#' The analysis function [a_proportion_diff_j()] can be used to create a layout element to estimate
#' the difference in proportion of responders within a studied population. The primary analysis variable,
#' `vars`, is a logical variable indicating whether a response has occurred for each record. See the `method`
#' parameter for options of methods to use when constructing the confidence interval of the proportion difference.
#' A stratification variable can be supplied via the `strata` element of the `variables` argument.
#'
#' @param df (`data.frame`)\cr input data frame.
#' @param .var (`string`)\cr name of the response variable.
#' @param ref_path (`character`)\cr path to the reference group.
#' @param .spl_context (`environment`)\cr split context environment.
#' @param ... Additional arguments passed to the statistics function.
#' @param .stats (`character`)\cr statistics to calculate.
#' @param .formats (`list`)\cr formats for the statistics.
#' @param .labels (`list`)\cr labels for the statistics.
#' @param .indent_mods (`list`)\cr indentation modifications for the statistics.
#' @param .ref_group (`data.frame`)\cr reference group data frame.
#' @param .in_ref_col (`logical`)\cr whether the current column is the reference column.
#' @param variables (`list`)\cr list with strata variable names.
#' @param conf_level (`numeric`)\cr confidence level for the confidence interval.
#' @param method (`string`)\cr method to use for confidence interval calculation.
#' @param weights_method (`string`)\cr method to use for weights calculation in stratified analysis.
#'
#' @name prop_diff
#' @order 1
#'
#' @note The [a_proportion_diff_j()] function has the `_j` suffix to distinguish it
#'   from [tern::a_proportion_diff()]. The functions here are a copy from the `tern` package
#'   with additional features:
#'
#'   * Additional statistic `diff_est_ci` is returned.
#'   * `ref_path` needs to be provided as extra argument to specify the control group column.
#'
NULL

#' @describeIn prop_diff Statistics function estimating the difference
#'   in terms of responder proportion.
#'
#' @return
#' * `s_proportion_diff_j()` returns a named list of elements `diff`,
#'    `diff_ci`, `diff_est_ci` and `diff_ci_3d`.
#'
#' @note When performing an unstratified analysis, methods `'cmh'`, `'strat_newcombe'`,
#'   and `'strat_newcombecc'` are not permitted.
#'
#' @examples
#'
#' s_proportion_diff_j(
#'   df = subset(dta, grp == "A"),
#'   .var = "rsp",
#'   .ref_group = subset(dta, grp == "B"),
#'   .in_ref_col = FALSE,
#'   conf_level = 0.90,
#'   method = "ha"
#' )
#'
#' s_proportion_diff_j(
#'   df = subset(dta, grp == "A"),
#'   .var = "rsp",
#'   .ref_group = subset(dta, grp == "B"),
#'   .in_ref_col = FALSE,
#'   variables = list(strata = c("f1", "f2")),
#'   conf_level = 0.90,
#'   method = "cmh"
#' )
#'
#' @export
#' @order 3
s_proportion_diff_j <- function(
    df,
    .var,
    .ref_group,
    .in_ref_col,
    variables = list(strata = NULL),
    conf_level = 0.95,
    method = c("waldcc", "wald", "cmh", "ha", "newcombe", "newcombecc", "strat_newcombe", "strat_newcombecc"),
    weights_method = "cmh") {
  start <- s_proportion_diff(
    df = df,
    .var = .var,
    .ref_group = .ref_group,
    .in_ref_col = .in_ref_col,
    variables = variables,
    conf_level = conf_level,
    method = method,
    weights_method = weights_method
  )

  c(
    start,
    list(
      diff_est_ci = with_label(
        c(start$diff, start$diff_ci),
        paste0("% Difference (", f_conf_level(conf_level), ")")
      ),
      diff_ci_3d = with_label(
        c(start$diff, start$diff_ci),
        paste0("Relative Risk (", f_conf_level(conf_level), ")")
      )
    )
  )
}

#' @describeIn prop_diff Formatted analysis function which is used as `afun` in `estimate_proportion_diff()`.
#'
#' @return
#' * `a_proportion_diff_j()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' nex <- 100
#' dta <- data.frame(
#'   "rsp" = sample(c(TRUE, FALSE), nex, TRUE),
#'   "grp" = sample(c("A", "B"), nex, TRUE),
#'   "f1" = sample(c("a1", "a2"), nex, TRUE),
#'   "f2" = sample(c("x", "y", "z"), nex, TRUE),
#'   stringsAsFactors = TRUE
#' )
#'
#' l <- basic_table() |>
#'   split_cols_by(var = "grp") |>
#'   analyze(
#'     vars = "rsp",
#'     afun = a_proportion_diff_j,
#'     show_labels = "hidden",
#'     na_str = tern::default_na_str(),
#'     extra_args = list(
#'       conf_level = 0.9,
#'       method = "ha",
#'       ref_path = c("grp", "B")
#'     )
#'   )
#'
#' build_table(l, df = dta)
#' @export
#' @order 2
a_proportion_diff_j <- function(
    df,
    .var,
    ref_path,
    .spl_context,
    ...,
    .stats = NULL,
    .formats = NULL,
    .labels = NULL,
    .indent_mods = NULL) {
  # Check for additional parameters to the statistics function
  dots_extra_args <- list(...)

  # Only support default stats, not custom stats
  .stats <- .split_std_from_custom_stats(.stats)$default_stats

  # Obtain reference column information
  ref <- get_ref_info(ref_path, .spl_context)

  # Apply statistics function
  x_stats <- .apply_stat_functions(
    default_stat_fnc = s_proportion_diff_j,
    custom_stat_fnc_list = NULL,
    args_list = c(
      df = list(df),
      .var = .var,
      .ref_group = list(ref$ref_group),
      .in_ref_col = ref$in_ref_col,
      dots_extra_args
    )
  )

  # Format according to specifications
  format_stats(
    x_stats,
    method_groups = "proportion_diff",
    stats_in = .stats,
    formats_in = .formats,
    labels_in = .labels,
    indents_in = .indent_mods
  )
}

#' Odds ratio estimation
#'
#' @description `r lifecycle::badge('stable')`
#'
#' @param method (`string`)\cr whether to use the correct (`'exact'`) calculation in the conditional likelihood or one
#'   of the approximations, or the CMH method. See [survival::clogit()] for details.
#'
#' @param df (`data.frame`)\cr input data frame.
#' @param .var (`string`)\cr name of the response variable.
#' @param .df_row (`data.frame`)\cr data frame containing all rows.
#' @param ref_path (`character`)\cr path to the reference group.
#' @param .spl_context (`environment`)\cr split context environment.
#' @param ... Additional arguments passed to the statistics function.
#' @param .stats (`character`)\cr statistics to calculate.
#' @param .formats (`list`)\cr formats for the statistics.
#' @param .labels (`list`)\cr labels for the statistics.
#' @param .indent_mods (`list`)\cr indentation modifications for the statistics.
#' @param .ref_group (`data.frame`)\cr reference group data frame.
#' @param .in_ref_col (`logical`)\cr whether the current column is the reference column.
#' @param variables (`list`)\cr list with arm and strata variable names.
#' @param conf_level (`numeric`)\cr confidence level for the confidence interval.
#' @param groups_list (`list`)\cr list of groups for combination.
#'
#' @note
#' The `a_odds_ratio_j()` and `s_odds_ratio_j()` functions have the `_j` suffix to distinguish them
#' from [tern::a_odds_ratio()] and [tern::s_odds_ratio()], respectively.
#' These functions differ as follows:
#'
#' * Additional `method = 'cmh'` option is provided to calculate the Cochran-Mantel-Haenszel estimate.
#' * The p-value is returned as an additional statistic.
#'
#' Once these updates are contributed back to `tern`, they can later be replaced by the `tern` versions.
#'
#' @name odds_ratio
#' @order 1
NULL

#' @describeIn odds_ratio Statistics function which estimates the odds ratio
#'   between a treatment and a control. A `variables` list with `arm` and `strata`
#'   variable names must be passed if a stratified analysis is required.
#'
#' @param na_if_no_events (`flag`)\cr whether the point estimate should be `NA` if there
#'   are no events in one arm. The p-value and confidence interval will still be computed.
#'
#' @return
#' * `s_odds_ratio_j()` returns a named list with the statistics `or_ci`
#'   (containing `est`, `lcl`, and `ucl`), `pval` and `n_tot`.
#'
#' @examples
#' s_odds_ratio_j(
#'   df = subset(dta, grp == "A"),
#'   .var = "rsp",
#'   .ref_group = subset(dta, grp == "B"),
#'   .in_ref_col = FALSE,
#'   .df_row = dta
#' )
#'
#' s_odds_ratio_j(
#'   df = subset(dta, grp == "A"),
#'   .var = "rsp",
#'   .ref_group = subset(dta, grp == "B"),
#'   .in_ref_col = FALSE,
#'   .df_row = dta,
#'   variables = list(arm = "grp", strata = "strata")
#' )
#'
#' s_odds_ratio_j(
#'   df = subset(dta, grp == "A"),
#'   method = "cmh",
#'   .var = "rsp",
#'   .ref_group = subset(dta, grp == "B"),
#'   .in_ref_col = FALSE,
#'   .df_row = dta,
#'   variables = list(arm = "grp", strata = c("strata"))
#' )
#' @export
s_odds_ratio_j <- function(
    df,
    .var,
    .ref_group,
    .in_ref_col,
    .df_row,
    variables = list(arm = NULL, strata = NULL),
    conf_level = 0.95,
    groups_list = NULL,
    na_if_no_events = TRUE,
    method = c("exact", "approximate", "efron", "breslow", "cmh")) {
  checkmate::assert_flag(na_if_no_events)
  # New: pval here
  y <- list(or_ci = list(), n_tot = list(), pval = list())
  method <- match.arg(method)
  one_group_empty <- nrow(df) == 0 || nrow(.ref_group) == 0

  if (!.in_ref_col) {
    (assert_proportion_value)(conf_level)
    assert_df_with_variables(df, list(rsp = .var))
    assert_df_with_variables(.ref_group, list(rsp = .var))

    if (one_group_empty) {
      y <- list(or_ci = c(est = NA_real_, lcl = 0, ucl = Inf), n_tot = c(n_tot = nrow(df) + nrow(.ref_group)), pval = 1)
    } else if (is.null(variables$strata)) {
      data <- data.frame(
        rsp = c(.ref_group[[.var]], df[[.var]]),
        grp = factor(rep(c("ref", "Not-ref"), c(nrow(.ref_group), nrow(df))), levels = c("ref", "Not-ref"))
      )
      y <- or_glm_j(data, conf_level = conf_level)
    } else {
      assert_df_with_variables(.df_row, c(list(rsp = .var), variables))
      checkmate::assert_subset(method, c("exact", "approximate", "efron", "breslow", "cmh"), empty.ok = FALSE)
      # The group variable prepared for stratified analysis must be synchronized with the combination groups
      # definition.
      if (is.null(groups_list)) {
        ref_grp <- as.character(unique(.ref_group[[variables$arm]]))
        trt_grp <- as.character(unique(df[[variables$arm]]))
        grp <- stats::relevel(factor(.df_row[[variables$arm]]), ref = ref_grp)
      } else {
        # If more than one level in reference col.
        reference <- as.character(unique(.ref_group[[variables$arm]]))
        grp_ref_flag <- vapply(X = groups_list, FUN.VALUE = TRUE, FUN = function(x) all(reference %in% x))
        ref_grp <- names(groups_list)[grp_ref_flag]

        # If more than one level in treatment col.
        treatment <- as.character(unique(df[[variables$arm]]))
        grp_trt_flag <- vapply(X = groups_list, FUN.VALUE = TRUE, FUN = function(x) all(treatment %in% x))
        trt_grp <- names(groups_list)[grp_trt_flag]

        grp <- combine_levels(.df_row[[variables$arm]], levels = reference, new_level = ref_grp)
        grp <- combine_levels(grp, levels = treatment, new_level = trt_grp)
      }

      # New: CMH method
      if (method == "cmh") {
        data <- data.frame(
          rsp = c(.ref_group[[.var]], df[[.var]]),
          grp = factor(rep(c("ref", "Not-ref"), c(nrow(.ref_group), nrow(df))), levels = c("ref", "Not-ref")),
          strata = interaction(rbind(.ref_group[variables$strata], df[variables$strata]))
        )

        y <- or_cmh(data, conf_level = conf_level)
      } else {
        # The reference level in `grp` must be the same as in the `rtables` column split.
        data <- data.frame(rsp = .df_row[[.var]], grp = grp, strata = interaction(.df_row[variables$strata]))

        y_all <- or_clogit_j(data, conf_level = conf_level, method = method)
        checkmate::assert_string(trt_grp)
        # New: pval here
        checkmate::assert_subset(trt_grp, names(y_all$or_ci_pvals))
        y_or_ci_pval <- y_all$or_ci_pvals[[trt_grp]]
        y$or_ci <- y_or_ci_pval[c("est", "lcl", "ucl")]
        y$n_tot <- y_all$n_tot
        y$pval <- y_or_ci_pval["pval"]
      }
    }

    one_group_no_events <- (sum(.ref_group[[.var]]) == 0) || (sum(df[[.var]]) == 0)
    if (na_if_no_events && one_group_no_events) {
      y$or_ci[["est"]] <- NA_real_
    }

    na_because_sparse <- one_group_empty || (na_if_no_events && one_group_no_events)
    if ("est" %in% names(y$or_ci) && is.na(y$or_ci[["est"]]) && !na_because_sparse && method != "approximate") {
      warning(
        "Unable to compute the odds ratio estimate. Please try re-running the function with ",
        "parameter `method` set to \"approximate\"."
      )
    }
  }

  y$or_ci <- with_label(x = y$or_ci, label = paste0("Odds Ratio (", 100 * conf_level, "% CI)"))

  # New: pval here
  y$pval <- unname(y$pval)

  y
}

#' @describeIn odds_ratio Formatted analysis function which is used as `afun`. Note that the
#'   junco specific `ref_path` and `.spl_context` arguments are used for reference column information.
#'
#' @return
#' * `a_odds_ratio_j()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' set.seed(12)
#' dta <- data.frame(
#'   rsp = sample(c(TRUE, FALSE), 100, TRUE),
#'   grp = factor(rep(c("A", "B"), each = 50), levels = c("A", "B")),
#'   strata = factor(sample(c("C", "D"), 100, TRUE))
#' )
#'
#' a_odds_ratio_j(
#'   df = subset(dta, grp == "A"),
#'   .var = "rsp",
#'   ref_path = c("grp", "B"),
#'   .spl_context = data.frame(
#'     cur_col_split = I(list("grp")),
#'     cur_col_split_val = I(list(c(grp = "A"))),
#'     full_parent_df = I(list(dta))
#'   ),
#'   .df_row = dta
#' )
#'
#'
#' l <- basic_table() |>
#'   split_cols_by(var = "grp") |>
#'   analyze(
#'     "rsp",
#'     afun = a_odds_ratio_j,
#'     show_labels = "hidden",
#'     extra_args = list(
#'       ref_path = c("grp", "B"),
#'       .stats = c("or_ci", "pval")
#'     )
#'   )
#'
#' build_table(l, df = dta)
#'
#' l2 <- basic_table() |>
#'   split_cols_by(var = "grp") |>
#'   analyze(
#'     "rsp",
#'     afun = a_odds_ratio_j,
#'     show_labels = "hidden",
#'     extra_args = list(
#'       variables = list(arm = "grp", strata = "strata"),
#'       method = "cmh",
#'       ref_path = c("grp", "A"),
#'       .stats = c("or_ci", "pval")
#'     )
#'   )
#'
#' build_table(l2, df = dta)
#' @export
#' @order 2
a_odds_ratio_j <- function(
    df,
    .var,
    .df_row,
    ref_path,
    .spl_context,
    ...,
    .stats = NULL,
    .formats = NULL,
    .labels = NULL,
    .indent_mods = NULL) {
  # Check for additional parameters to the statistics function
  dots_extra_args <- list(...)

  # Only support default stats, not custom stats
  .stats <- .split_std_from_custom_stats(.stats)$default_stats

  # Obtain reference column information
  ref <- get_ref_info(ref_path, .spl_context)

  # Apply statistics function
  x_stats <- .apply_stat_functions(
    default_stat_fnc = s_odds_ratio_j,
    custom_stat_fnc_list = NULL,
    args_list = c(
      df = list(df),
      .var = .var,
      .df_row = list(.df_row),
      .ref_group = list(ref$ref_group),
      .in_ref_col = ref$in_ref_col,
      dots_extra_args
    )
  )

  # Format according to specifications
  format_stats(
    x_stats,
    method_groups = "odds_ratio",
    stats_in = .stats,
    formats_in = .formats,
    labels_in = .labels,
    indents_in = .indent_mods
  )
}

#' Helper functions for odds ratio estimation
#'
#' @description `r lifecycle::badge('stable')`
#'
#' Functions to calculate odds ratios in [s_odds_ratio_j()].
#'
#' @inheritParams odds_ratio
#' @inheritParams proposal_argument_convention
#' @param data (`data.frame`)\cr data frame containing at least the variables `rsp` and `grp`, and optionally
#'   `strata` for [or_clogit_j()].
#' @return A named `list` of elements `or_ci`, `n_tot` and `pval`.
#'
#' @seealso [odds_ratio]
#'
#' @name h_odds_ratio
NULL

#' @describeIn h_odds_ratio Estimates the odds ratio based on [stats::glm()]. Note that there must be
#'   exactly 2 groups in `data` as specified by the `grp` variable.
#'
#' @examples
#' data <- data.frame(
#'   rsp = as.logical(c(1, 1, 0, 1, 0, 0, 1, 1)),
#'   grp = letters[c(1, 1, 1, 2, 2, 2, 1, 2)],
#'   strata = letters[c(1, 2, 1, 2, 2, 2, 1, 2)],
#'   stringsAsFactors = TRUE
#' )
#'
#' or_glm_j(data, conf_level = 0.95)
#'
#' @export
or_glm_j <- function(data, conf_level) {
  checkmate::assert_logical(data$rsp)
  (assert_proportion_value)(conf_level)
  assert_df_with_variables(data, list(rsp = "rsp", grp = "grp"))
  checkmate::assert_multi_class(data$grp, classes = c("factor", "character"))

  data$grp <- as_factor_keep_attributes(data$grp)
  assert_df_with_factors(data, list(val = "grp"), min.levels = 2, max.levels = 2)
  formula <- stats::as.formula("rsp ~ grp")
  model_fit <- stats::glm(formula = formula, data = data, family = stats::binomial(link = "logit"))

  # Note that here we need to discard the intercept.
  or <- exp(stats::coef(model_fit)[-1])
  or_ci <- exp(stats::confint.default(model_fit, level = conf_level)[-1, , drop = FALSE])

  values <- stats::setNames(c(or, or_ci), c("est", "lcl", "ucl"))
  n_tot <- stats::setNames(nrow(model_fit$model), "n_tot")

  # New: pval here
  pval <- summary(model_fit)$coef[-1, "Pr(>|z|)"]

  list(or_ci = values, n_tot = n_tot, pval = pval)
}

#' @describeIn h_odds_ratio Estimates the odds ratio based on [survival::clogit()]. This is done for
#'   the whole data set including all groups, since the results are not the same as when doing
#'   pairwise comparisons between the groups.
#'
#' @examples
#' data <- data.frame(
#'   rsp = as.logical(c(1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0)),
#'   grp = letters[c(1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3)],
#'   strata = LETTERS[c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)],
#'   stringsAsFactors = TRUE
#' )
#'
#' or_clogit_j(data, conf_level = 0.95)
#'
#' @export
or_clogit_j <- function(data, conf_level, method = "exact") {
  checkmate::assert_logical(data$rsp)
  (assert_proportion_value)(conf_level)
  assert_df_with_variables(data, list(rsp = "rsp", grp = "grp", strata = "strata"))
  checkmate::assert_multi_class(data$grp, classes = c("factor", "character"))
  checkmate::assert_multi_class(data$strata, classes = c("factor", "character"))
  checkmate::assert_subset(method, c("exact", "approximate", "efron", "breslow"), empty.ok = FALSE)

  data$grp <- as_factor_keep_attributes(data$grp)
  data$strata <- as_factor_keep_attributes(data$strata)

  # Deviation from convention: `survival::strata` must be simply `strata`.
  strata <- survival::strata
  formula <- stats::as.formula("rsp ~ grp + strata(strata)")
  model_fit <- clogit_with_tryCatch(
    formula = formula,
    data = data,
    method = method
  )

  # New: pval here

  # Create a list with one set of OR estimates, CI and p-value per coefficient, i.e.  comparison of one group vs. the
  # reference group.
  coef_est <- stats::coef(model_fit)
  ci_est <- stats::confint(model_fit, level = conf_level)
  pvals <- summary(model_fit)$coef[, "Pr(>|z|)", drop = FALSE]
  or_ci_pvals <- list()
  for (coef_name in names(coef_est)) {
    grp_name <- gsub("^grp", "", x = coef_name)
    or_ci_pvals[[grp_name]] <- stats::setNames(
      object = c(exp(c(coef_est[coef_name], ci_est[coef_name, , drop = TRUE])), pvals[coef_name, 1]),
      nm = c("est", "lcl", "ucl", "pval")
    )
  }
  list(or_ci_pvals = or_ci_pvals, n_tot = c(n_tot = model_fit$n))
}

#' @describeIn h_odds_ratio Estimates the odds ratio based on CMH. Note that there must be
#'   exactly 2 groups in `data` as specified by the `grp` variable.
#'
#' @examples
#' set.seed(123)
#' data <- data.frame(
#'   rsp = as.logical(rbinom(n = 40, size = 1, prob = 0.5)),
#'   grp = letters[sample(1:2, size = 40, replace = TRUE)],
#'   strata = LETTERS[sample(1:2, size = 40, replace = TRUE)],
#'   stringsAsFactors = TRUE
#' )
#'
#' or_cmh(data, conf_level = 0.95)
#'
#' @export
or_cmh <- function(data, conf_level) {
  checkmate::assert_logical(data$rsp)
  assert_proportion_value(conf_level)
  assert_df_with_variables(data, list(rsp = "rsp", grp = "grp"))
  checkmate::assert_multi_class(data$grp, classes = c("factor", "character"))

  data$grp <- as_factor_keep_attributes(data$grp)
  assert_df_with_factors(data, list(val = "grp"), min.levels = 2, max.levels = 2)

  # first dimension: treatment, control - therefore we reverse the levels order 2nd dimension: TRUE, FALSE 3rd
  # dimension: levels of strata rsp as factor rsp to handle edge case of no FALSE (or TRUE) rsp records
  t_tbl <- table(
    factor(data$grp, levels = rev(levels(data$grp))),
    factor(data$rsp, levels = c("TRUE", "FALSE")),
    data$strata
  )

  trt_ind <- 1
  ctrl_ind <- 2

  resp_ind <- 1
  nonresp_ind <- 2

  n1resp <- t_tbl[trt_ind, resp_ind, ]
  n2resp <- t_tbl[ctrl_ind, resp_ind, ]
  n1non <- t_tbl[trt_ind, nonresp_ind, ]
  n2non <- t_tbl[ctrl_ind, nonresp_ind, ]
  # Example: n2non are the control patient non-responders per stratum.

  # CMH statistic for odds ratio, Treatment over Control
  use_stratum <- (n1resp + n2resp + n1non + n2non) > 0
  n1resp <- n1resp[use_stratum]
  n2resp <- n2resp[use_stratum]
  n1non <- n1non[use_stratum]
  n2non <- n2non[use_stratum]
  n <- n1resp + n2resp + n1non + n2non

  or_num <- sum(n1resp * n2non / n)
  or_denom <- sum(n1non * n2resp / n)
  log_or <- log(or_num) - log(or_denom)
  or <- exp(log_or)

  term1_num <- sum((n1resp + n2non) * n1resp * n2non / n^2)
  term1_denom <- 2 * (sum(n1resp * n2non / n))^2

  term2_num <- sum(((n1resp + n2non) * n1non * n2resp + (n1non + n2resp) * n1resp * n2non) / n^2)
  term2_denom <- 2 * (sum(n1resp * n2non / n)) * (sum(n1non * n2resp / n))

  term3_num <- sum((n1non + n2resp) * n1non * n2resp / n^2)
  term3_denom <- 2 * (sum(n1non * n2resp / n))^2

  var_log_or <- term1_num / term1_denom + term2_num / term2_denom + term3_num / term3_denom
  se_log_or <- sqrt(var_log_or)

  z <- stats::qnorm((1 + conf_level) / 2)
  log_rel_or <- log_or + c(-1, +1) * z * se_log_or
  ci <- exp(log_rel_or)
  or_ci <- stats::setNames(c(or, ci), c("est", "lcl", "ucl"))

  pval <- stats::mantelhaen.test(t_tbl, correct = FALSE)$p.value

  n_tot <- stats::setNames(sum(n), "n_tot")

  list(or_ci = or_ci, n_tot = n_tot, pval = pval)
}

#' Get default statistical methods and their associated formats, labels, and indent modifiers
#'
#' @description `r lifecycle::badge("experimental")`
#'
#' @note These functions have been copied from the `tern` package file
#'   `utils_default_stats_formats_labels.R` from GitHub development version 0.9.7.9017.
#'   Slight modifications have been applied to enhance functionality:
#'
#'   * `tern_get_stats` added the `tern_stats` argument to avoid hardcoding within the function's body.
#'   * `tern_get_labels_from_stats` is more careful when receiving partial `labels_in`
#'      and partial `label_attr_from_stats`.
#'
#'   Once these features are included in the `tern` package, this file could be removed from
#'   the `junco` package, and the functions could be used from the `tern` namespace directly.
#'
#' @name tern_default_stats_formats_labels
NULL


# Utility function used to separate custom stats (user-defined functions) from defaults
.split_std_from_custom_stats <- function(stats_in) {
  out <- list(default_stats = NULL, custom_stats = NULL, all_stats = NULL)
  if (is.list(stats_in)) {
    is_custom_fnc <- sapply(stats_in, is.function)
    checkmate::assert_list(
      stats_in[is_custom_fnc],
      types = "function",
      names = "named"
    )
    out[["custom_stats"]] <- stats_in[is_custom_fnc]
    out[["default_stats"]] <- unlist(stats_in[!is_custom_fnc])
    all_stats <- names(stats_in) # to keep the order
    all_stats[!is_custom_fnc] <- out[["default_stats"]]
    out[["all_stats"]] <- all_stats
  } else {
    out[["default_stats"]] <- out[["all_stats"]] <- stats_in
  }
  out
}

# Utility function to apply statistical functions
.apply_stat_functions <- function(
    default_stat_fnc,
    custom_stat_fnc_list,
    args_list) {
  # Default checks
  checkmate::assert_function(default_stat_fnc)
  checkmate::assert_list(
    custom_stat_fnc_list,
    types = "function",
    null.ok = TRUE,
    names = "named"
  )
  checkmate::assert_list(args_list)

  # Checking custom stats have same formals
  if (!is.null(custom_stat_fnc_list)) {
    fundamental_call_to_data <- names(formals(default_stat_fnc))[[1]]
    for (fnc in custom_stat_fnc_list) {
      if (!identical(names(formals(fnc))[[1]], fundamental_call_to_data)) {
        stop(
          "The first parameter of a custom statistical function needs to be the same (it can be `df` or `x`) ",
          "as the default statistical function. In this case your custom function has ",
          names(formals(fnc))[[1]],
          " as first parameter, while the default function has ",
          fundamental_call_to_data,
          "."
        )
      }
      if (!any(names(formals(fnc)) == "...")) {
        stop(
          "The custom statistical function needs to have `...` as a parameter to accept additional arguments. ",
          "In this case your custom function does not have `...`."
        )
      }
    }
  }

  # Applying
  out_default <- do.call(default_stat_fnc, args = args_list)
  out_custom <- lapply(
    custom_stat_fnc_list,
    function(fnc) do.call(fnc, args = args_list)
  )

  # Merging
  c(out_default, out_custom)
}

#' @describeIn tern_default_stats_formats_labels Get statistics available for a given method
#'   group (analyze function).
#' @keywords internal
tern_get_stats <- function(
    method_groups = "analyze_vars_numeric",
    stats_in = NULL,
    custom_stats_in = NULL,
    add_pval = FALSE,
    tern_defaults = tern_default_stats) {
  checkmate::assert_character(method_groups)
  checkmate::assert_character(stats_in, null.ok = TRUE)
  checkmate::assert_character(custom_stats_in, null.ok = TRUE)
  checkmate::assert_flag(add_pval)

  # Default is still numeric
  if (any(method_groups == "analyze_vars")) {
    method_groups[method_groups == "analyze_vars"] <- "analyze_vars_numeric"
  }

  type_tmp <- ifelse(any(grepl("counts$", method_groups)), "counts", "numeric") # for pval checks

  # Defaults for loop
  out <- NULL

  # Loop for multiple method groups
  for (mgi in method_groups) {
    if (mgi %in% names(tern_defaults)) {
      out_tmp <- tern_defaults[[mgi]]
    } else {
      stop(
        "The selected method group (",
        mgi,
        ") has no default statistical method."
      )
    }
    out <- unique(c(out, out_tmp))
  }

  # Add custom stats
  out <- c(out, custom_stats_in)

  # If you added pval to the stats_in you certainly want it
  if (!is.null(stats_in) && any(grepl("^pval", stats_in))) {
    stats_in_pval_value <- stats_in[grepl("^pval", stats_in)]

    # Must be only one value between choices
    checkmate::assert_choice(
      stats_in_pval_value,
      c("pval", "pval_counts", "pvalue")
    )

    # Mismatch with counts and numeric
    if (
      any(grepl("counts", method_groups)) &&
        stats_in_pval_value != "pval_counts" ||
        any(grepl("numeric", method_groups)) && stats_in_pval_value != "pval"
    ) {
      # nolint
      stop(
        "Inserted p-value (",
        stats_in_pval_value,
        ") is not valid for type ",
        type_tmp,
        ". Use ",
        paste(ifelse(stats_in_pval_value == "pval", "pval_counts", "pval")),
        " instead."
      )
    }

    # Lets add it even if present (thanks to unique)
    add_pval <- TRUE
  }

  # Mainly used in "analyze_vars" but it could be necessary elsewhere
  if (isTRUE(add_pval)) {
    if (any(grepl("counts", method_groups))) {
      out <- unique(c(out, "pval_counts"))
    } else {
      out <- unique(c(out, "pval"))
    }
  }

  # Filtering for stats_in (character vector)
  if (!is.null(stats_in)) {
    out <- intersect(stats_in, out) # It orders them too
  }

  # If intersect did not find matches (and no pval?) -> error
  if (length(out) == 0) {
    stop(
      "The selected method group(s) (",
      paste0(method_groups, collapse = ", "),
      ")",
      " do not have the required default statistical methods:\n",
      paste0(stats_in, collapse = " ")
    )
  }

  out
}

#' @describeIn tern_default_stats_formats_labels Get formats corresponding to a list of statistics.
#' @keywords internal
tern_get_formats_from_stats <- function(
    stats,
    formats_in = NULL,
    levels_per_stats = NULL,
    tern_defaults = tern_default_formats) {
  checkmate::assert_character(stats, min.len = 1)
  # It may be a list if there is a function in the formats
  if (checkmate::test_list(formats_in, null.ok = TRUE)) {
    checkmate::assert_list(formats_in, null.ok = TRUE)
    # Or it may be a vector of characters
  } else {
    checkmate::assert_character(formats_in, null.ok = TRUE)
  }
  checkmate::assert_list(levels_per_stats, null.ok = TRUE)

  # If unnamed formats given as formats_in and same number of stats, use one format per stat
  if (
    !is.null(formats_in) &&
      length(formats_in) == length(stats) &&
      is.null(names(formats_in)) &&
      is.null(levels_per_stats)
  ) {
    out <- as.list(formats_in) |> stats::setNames(stats)
    return(out)
  }

  # If levels_per_stats not given, assume one row per statistic
  if (is.null(levels_per_stats)) levels_per_stats <- as.list(stats) |> stats::setNames(stats)

  # Apply custom formats
  out <- .fill_in_vals_by_stats(levels_per_stats, formats_in, tern_defaults)

  # Default to NULL if no format
  case_input_is_not_stat <- unlist(out, use.names = FALSE) == unlist(levels_per_stats, use.names = FALSE)
  out[names(out) == out | case_input_is_not_stat] <- list(NULL)

  out
}

#' @describeIn tern_default_stats_formats_labels Get labels corresponding to a list of statistics.
#' @keywords internal
tern_get_labels_from_stats <- function(
    stats,
    labels_in = NULL,
    levels_per_stats = NULL,
    label_attr_from_stats = NULL,
    tern_defaults = tern_default_labels) {
  checkmate::assert_character(stats, min.len = 1)

  # Modification:
  # If any label_attr_from_stats is available and valid, save in labels_in
  # if not specified there already (so don't overwrite labels_in).
  if (!is.null(label_attr_from_stats)) {
    valid_label_attr_from_stats <- label_attr_from_stats[
      nzchar(label_attr_from_stats) &
        !sapply(label_attr_from_stats, is.null) &
        !is.na(label_attr_from_stats)
    ]
    if (length(valid_label_attr_from_stats)) {
      do_save <- setdiff(
        names(valid_label_attr_from_stats),
        names(labels_in)
      )
      labels_in <- c(labels_in, valid_label_attr_from_stats[do_save])
    }
  }

  # It may be a list
  if (checkmate::test_list(labels_in, null.ok = TRUE)) {
    checkmate::assert_list(labels_in, null.ok = TRUE)
    # Or it may be a vector of characters
  } else {
    checkmate::assert_character(labels_in, null.ok = TRUE)
  }
  checkmate::assert_list(levels_per_stats, null.ok = TRUE)

  # If unnamed labels given as labels_in and same number of stats, use one label per stat
  if (
    !is.null(labels_in) &&
      length(labels_in) == length(stats) &&
      is.null(names(labels_in)) &&
      is.null(levels_per_stats)
  ) {
    out <- as.list(labels_in) |> stats::setNames(stats)
    return(out)
  }

  # If levels_per_stats not given, assume one row per statistic
  if (is.null(levels_per_stats)) levels_per_stats <- as.list(stats) |> stats::setNames(stats)

  # Apply custom labels
  out <- .fill_in_vals_by_stats(levels_per_stats, labels_in, tern_defaults)
  out
}

# Function to loop over each stat and levels to set correct values
.fill_in_vals_by_stats <- function(levels_per_stats, user_in, tern_defaults) {
  out <- list()

  for (stat_i in names(levels_per_stats)) {
    # Get all levels of the statistic
    all_lvls <- levels_per_stats[[stat_i]]

    if ((length(all_lvls) == 1 && all_lvls == stat_i) || is.null(all_lvls)) {
      # One row per statistic
      out[[stat_i]] <- if (stat_i %in% names(user_in)) {
        # 1. Check for stat_i in user input
        user_in[[stat_i]]
      } else if (stat_i %in% names(tern_defaults)) {
        # 2. Check for stat_i in tern defaults
        tern_defaults[[stat_i]]
      } else {
        # 3. Otherwise stat_i
        stat_i
      }
    } else {
      # One row per combination of variable level and statistic
      # Loop over levels for each statistic
      for (lev_i in all_lvls) {
        # Construct row name (stat_i.lev_i)
        row_nm <- paste(stat_i, lev_i, sep = ".")

        out[[row_nm]] <- if (row_nm %in% names(user_in)) {
          # 1. Check for stat_i.lev_i in user input
          user_in[[row_nm]]
        } else if (lev_i %in% names(user_in)) {
          # 2. Check for lev_i in user input
          user_in[[lev_i]]
        } else if (stat_i %in% names(user_in)) {
          # 3. Check for stat_i in user input
          user_in[[stat_i]]
        } else if (lev_i %in% names(tern_defaults)) {
          # 4. Check for lev_i in tern defaults (only used for labels)
          tern_defaults[[lev_i]]
        } else if (stat_i %in% names(tern_defaults)) {
          # 5. Check for stat_i in tern defaults
          tern_defaults[[stat_i]]
        } else {
          # 6. Otherwise lev_i
          lev_i
        }
      }
    }
  }

  out
}

#' @describeIn tern_default_stats_formats_labels Get row indent modifiers corresponding to a list of statistics/rows.
#' @keywords internal
tern_get_indents_from_stats <- function(stats,
                                        indents_in = NULL,
                                        levels_per_stats = NULL,
                                        tern_defaults = as.list(rep(0L, length(stats))) |> stats::setNames(stats)) {
  checkmate::assert_character(stats, min.len = 1)
  # It may be a list
  if (checkmate::test_list(indents_in, null.ok = TRUE)) {
    checkmate::assert_list(indents_in, null.ok = TRUE)
    # Or it may be a vector of integers
  } else {
    checkmate::assert_integerish(indents_in, null.ok = TRUE)
  }
  checkmate::assert_list(levels_per_stats, null.ok = TRUE)

  # If levels_per_stats not given, assume one row per statistic
  if (is.null(levels_per_stats)) levels_per_stats <- as.list(stats) |> stats::setNames(stats)

  # Single indentation level for all rows
  if (is.null(names(indents_in)) && length(indents_in) == 1) {
    out <- rep(indents_in, length(levels_per_stats |> unlist()))
    return(out)
  }

  # Apply custom indentation
  out <- .fill_in_vals_by_stats(levels_per_stats, indents_in, tern_defaults)
  out
}

# tern_default_labels ----

#' @describeIn tern_default_stats_formats_labels Named `character` vector of default labels for `tern`.
#'   This is only copied here from the latest GitHub version, because otherwise a tern test fails.
#'
#' @keywords internal
tern_default_labels <- c(
  cv = "CV (%)",
  iqr = "IQR",
  geom_cv = "CV % Geometric Mean",
  geom_mean = "Geometric Mean",
  geom_mean_sd = "Geometric Mean (SD)",
  geom_mean_ci = "Geometric Mean 95% CI",
  geom_mean_ci_3d = "Geometric Mean (95% CI)",
  geom_sd = "Geometric SD",
  mad = "Median Absolute Deviation",
  max = "Maximum",
  mean = "Mean",
  mean_ci = "Mean 95% CI",
  mean_ci_3d = "Mean (95% CI)",
  mean_pval = "Mean p-value (H0: mean = 0)",
  mean_sd = "Mean (SD)",
  mean_sdi = "Mean -/+ 1xSD",
  mean_se = "Mean (SE)",
  mean_sei = "Mean -/+ 1xSE",
  median = "Median",
  median_ci = "Median 95% CI",
  median_ci_3d = "Median (95% CI)",
  median_range = "Median (Min - Max)",
  min = "Minimum",
  n = "n",
  n_blq = "n_blq",
  nonunique = "Number of events",
  pval = "p-value (t-test)", # Default for numeric
  pval_counts = "p-value (chi-squared test)", # Default for counts
  quantiles = "25% and 75%-ile",
  quantiles_lower = "25%-ile (95% CI)",
  quantiles_upper = "75%-ile (95% CI)",
  range = "Min - Max",
  range_censor = "Range (censored)",
  range_event = "Range (event)",
  rate = "Adjusted Rate",
  rate_ratio = "Adjusted Rate Ratio",
  sd = "SD",
  se = "SE",
  sum = "Sum",
  unique = "Number of patients with at least one event"
)

#' MMRM Analysis for Imputed Datasets
#'
#' Performs an MMRM for two or more groups returning the estimated
#' 'treatment effect' (i.e. the contrast between treatment groups and the control
#' group) and the least square means estimates in each group.
#'
#' @param data (`data.frame`)\cr containing the data to be used in the model.
#' @param vars (`vars`)\cr list as generated by [rbmi::set_vars()]. Only the `subjid`, `group`,
#' `visit`, `outcome` and `covariates` elements are required. See details.
#' @param cov_struct (`string`)\cr the covariance structure to use. Note that the same
#'   covariance structure is assumed for all treatment groups.
#' @param visits (`NULL` or `character`)\cr An optional character vector specifying
#'   which visits to fit the MMRM at. If `NULL`, the MMRM model will be fit to
#'   the whole dataset.
#' @param weights (`string`)\cr the weighting strategy to be used when calculating the
#'   least square means, either `'counterfactual'` or `'equal'`.
#' @param ... additional arguments passed to [mmrm::mmrm()], in particular
#'   `method` and `vcov` to control the degrees of freedom and variance-covariance
#'   adjustment methods as well as `reml` decide between REML and ML estimation.
#'
#' @details
#' The function works as follows:
#'
#' 1. Optionally select the subset of the `data` corresponding to `visits.
#' 2. Fit an MMRM as `vars$outcome ~ vars$group + vars$visit + vars$covariates`
#'    with the specified covariance structure for visits within subjects.
#' 3. Extract the 'treatment effect' & least square means for each treatment group
#'    vs the control group.
#'
#' In order to meet the formatting standards set by [rbmi::analyse()] the results will be collapsed
#' into a single list suffixed by the visit name, e.g.:
#' ```
#' list(
#'    var_B_visit_1 = list(est = ...),
#'    trt_B_visit_1 = list(est = ...),
#'    lsm_A_visit_1 = list(est = ...),
#'    lsm_B_visit_1 = list(est = ...),
#'    var_B_visit_2 = list(est = ...),
#'    trt_B_visit_2 = list(est = ...),
#'    lsm_A_visit_2 = list(est = ...),
#'    lsm_B_visit_2 = list(est = ...),
#'    ...
#' )
#' ```
#' Please note that 'trt' refers to the treatment effects, and 'lsm' refers to the least
#' square mean results. In the above example `vars$group` has two factor levels A and B.
#' The new 'var' refers to the model estimated variance of the residuals at the given
#' visit, together with the degrees of freedom (which is treatment group specific).
#'
#' If you want to include additional interaction terms in your model this can be done
#' by providing them to the `covariates` argument of [rbmi::set_vars()]
#' e.g. `set_vars(covariates = c('sex*age'))`.
#'
#' @note The `group` and `visit` interaction `group:visit` is not included by
#' default in the model, therefore please add that to `covariates` manually if
#' you want to include it. This will make sense in most cases.
#' @inherit rbmi_ancova return
#'
#' @seealso [rbmi_analyse()]
#' @seealso [mmrm::mmrm()]
#' @seealso [rbmi::set_vars()]
#' @export
rbmi_mmrm <- function(
    data,
    vars,
    cov_struct = c("us", "toep", "cs", "ar1"),
    visits = NULL,
    weights = c("counterfactual", "equal"),
    ...) {
  subjid <- vars[["subjid"]]
  outcome <- vars[["outcome"]]
  group <- vars[["group"]]
  visit <- vars[["visit"]]
  covariates <- vars[["covariates"]]

  checkmate::assert_string(subjid)
  checkmate::assert_string(outcome)
  checkmate::assert_string(group)
  checkmate::assert_string(visit)
  checkmate::assert_character(covariates, null.ok = TRUE)

  cov_struct <- match.arg(cov_struct)
  weights <- match.arg(weights)

  expected_vars <- c((utils::getFromNamespace("extract_covariates", "rbmi"))(covariates), outcome, group, subjid, visit)
  checkmate::assert_subset(expected_vars, names(data))

  checkmate::assert_factor(data[[visit]])
  if (is.null(visits)) {
    visits <- as.character(unique(data[[visit]]))
  }
  checkmate::assert_subset(visits, as.character(data[[visit]]))
  data <- data[data[[visit]] %in% visits, , drop = FALSE]

  covariates_part <- paste(covariates, collapse = " + ")
  grp_visit_part <- paste(group, "+", visit)
  random_effects_part <- paste0(cov_struct, "(", visit, " | ", subjid, ")")
  rhs_formula <- paste(grp_visit_part, "+", random_effects_part)
  if (covariates_part != "") rhs_formula <- paste(covariates_part, "+", rhs_formula)
  formula <- stats::as.formula(paste(outcome, "~", rhs_formula))

  fit <- mmrm::mmrm(formula, data = data, ...)
  res <- lapply(visits, function(x) {
    res <- rbmi_mmrm_single_info(fit, visit_level = x, visit = visit, group = group, weights = weights)
    names(res) <- paste0(names(res), "_", x)
    return(res)
  })
  unlist(res, recursive = FALSE)
}

#' Extract Single Visit Information from a Fitted MMRM for Multiple Imputation Analysis
#'
#' @description
#' Extracts relevant estimates from a given fitted MMRM. See [rbmi_mmrm()] for full details.
#'
#' @param fit (`mmrm`)\cr the fitted MMRM.
#' @param visit_level (`string`)\cr the visit level to extract information for.
#' @param visit (`string`)\cr the name of the visit variable.
#' @param group (`string`)\cr the name of the group variable.
#'
#' @return a list with `trt_*`, `var_*` and `lsm_*` elements. See [rbmi_mmrm] for
#' full details.
#' @inheritParams rbmi_mmrm
#' @seealso [rbmi_mmrm()]
rbmi_mmrm_single_info <- function(fit, visit_level, visit, group, weights) {
  checkmate::assert_class(fit, "mmrm")
  checkmate::assert_string(visit_level)
  checkmate::assert_string(visit)
  checkmate::assert_string(group)

  data <- mmrm::component(fit, "full_frame")
  checkmate::assert_factor(data[[group]])
  grp_levels <- levels(data[[group]])

  visit_at_level <- stats::setNames(list(visit_level), visit)
  em_res <- emmeans::emmeans(fit, group, by = visit, at = visit_at_level, weights = weights)
  em_df <- as.data.frame(em_res)

  all_lsm <- lapply(grp_levels, function(x) {
    this_df <- em_df[em_df[[group]] == x, ]
    with(this_df, list(est = emmean, se = SE, df = df))
  })
  names(all_lsm) <- paste0("lsm_", grp_levels)

  var_est <- mmrm::VarCorr(fit)[visit_level, visit_level]

  cont_res <- emmeans::contrast(em_res, "trt.vs.ctrl", ref = grp_levels[1])
  cont_df <- as.data.frame(cont_res)

  all_var <- lapply(grp_levels[-1], function(x) {
    this_df <- cont_df[cont_df[["contrast"]] == paste(x, "-", grp_levels[1]), ]
    # Note: This should be revisited here for the `se` - currently we take the SE from the LS means contrast, not
    # from the variance estimate!
    with(this_df, list(est = var_est, se = SE, df = df))
  })
  names(all_var) <- paste0("var_", grp_levels[-1])

  all_trt <- lapply(grp_levels[-1], function(x) {
    this_df <- cont_df[cont_df[["contrast"]] == paste(x, "-", grp_levels[1]), ]
    with(this_df, list(est = estimate, se = SE, df = df))
  })
  names(all_trt) <- paste0("trt_", grp_levels[-1])

  c(all_var, all_trt, all_lsm)
}

#' @title s_function for proportion of factor levels
#' @description A simple statistics function which prepares the numbers with percentages
#'   in the required format. The denominator here is from the alternative counts data set
#'   in the given row and column split.
#'
#'   If a total row is shown, then here just the total number is shown (without 100%).
#' @param x (`factor`)\cr categorical variable we want to analyze.
#' @param .alt_df (`data.frame`)\cr alternative data frame used for denominator calculation.
#' @param use_alt_counts (`flag`)\cr whether the `.alt_df` should be used for the total, i.e. the denominator.
#'   If not, then the number of non-missing values in `x` is used.
#' @param show_total (`string`)\cr show the total level optionally on the top or in the bottom
#'   of the factor levels.
#' @param total_label (`string`)\cr which label to use for the optional total level.
#' @return The [rtables::in_rows()] result with the proportion statistics.
#' @seealso [s_proportion_logical()] for tabulating logical `x`.
#' @export
s_proportion_factor <- function(
    x,
    .alt_df,
    use_alt_counts = TRUE,
    show_total = c("none", "top", "bottom"),
    total_label = "Total") {
  checkmate::assert_factor(x)
  checkmate::assert_flag(use_alt_counts)
  show_total <- match.arg(show_total)

  N <- if (use_alt_counts) nrow(.alt_df) else sum(!is.na(x))
  tab <- lapply(as.list(table(x)), function(xi) rcell(xi * c(1, 1 / N), format = jjcsformat_count_fraction))
  if (show_total != "none") {
    checkmate::assert_string(total_label)
    tab_total <- stats::setNames(list(rcell(N, format = "xx")), total_label)

    tab <- if (show_total == "top") {
      c(tab_total, tab)
    } else {
      c(tab, tab_total)
    }
  }
  in_rows(.list = tab)
}

#' @title s_function for proportion of `TRUE` in logical vector
#' @description A simple statistics function which prepares the numbers with percentages
#'   in the required format. The denominator here is from the alternative counts data set
#'   in the given row and column split.
#' @param x (`logical`)\cr binary variable we want to analyze.
#' @param label (`string`)\cr label to use.
#' @param .alt_df (`data.frame`)\cr alternative data frame used for denominator calculation.
#' @return The [rtables::in_rows()] result with the proportion statistics.
#' @seealso [s_proportion_factor()] for tabulating factor `x`.
#' @export
s_proportion_logical <- function(x, label = "Responders", .alt_df) {
  n <- sum(x)
  N <- nrow(.alt_df)
  p_hat <- n / N
  list(n_prop = with_label(rcell(c(n, p_hat), format = jjcsformat_count_fraction), label))
}

#' @title c_function for proportion of `TRUE` in logical vector
#' @description A simple statistics function which prepares the numbers with percentages
#'   in the required format, for use in a split content row. The denominator here is
#'   from the column N. Note that we don't use here .alt_df because that might not
#'   have required row split variables available.
#' @param x (`logical`)\cr binary variable we want to analyze.
#' @param labelstr (`string`)\cr label string.
#' @param labelstr (`string`)\cr label string.
#' @param label_fstr (`string`)\cr format string for the label.
#' @param format (`character` or `list`)\cr format for the statistics.
#' @param .N_col (`numeric`)\cr number of columns.
#' @param .N_col (`numeric`)\cr number of columns.
#' @return The [rtables::in_rows()] result with the proportion statistics.
#' @seealso [s_proportion_logical()] for the related statistics function.
#' @export
c_proportion_logical <- function(x, labelstr, label_fstr, format, .N_col) {
  checkmate::assert_logical(x)
  num <- sum(x)
  denom <- .N_col
  in_rows(est_prop = c(num, num / denom), .formats = format, .labels = sprintf(label_fstr, labelstr))
}

#' Helper Function to Create Logical Design Matrix from Factor Variable
#'
#' @param df (`data.frame`)\cr including a factor variable with name in `.var`.
#' @param .var (`string`)\cr name of the factor variable.
#'
#' @return The logical matrix with dummy encoding of all factor levels.
#' @keywords internal
#' @export
#' @examples
#' h_get_design_mat(df = data.frame(a = factor(c("a", "b", "a"))), .var = "a")
h_get_design_mat <- function(df, .var) {
  checkmate::assert_data_frame(df)
  checkmate::assert_string(.var)
  checkmate::assert_factor(df[[.var]])

  model_formula <- stats::as.formula(paste("~ -1 +", .var))
  design_mat <- stats::model.matrix(model_formula, df)
  mode(design_mat) <- "logical"
  colnames(design_mat) <- levels(df[[.var]])
  design_mat
}

#' Formatted Analysis Function For Proportion Confidence Interval for Logical
#'
#' @param x (`logical`)\cr including binary response values.
#' @param .alt_df (`data.frame`)\cr alternative data frame used for denominator calculation.
#' @param formats (`list`)\cr including element `prop_ci` with the
#'   required format. Note that the value is in percent already.
#' @param method (`string`)\cr please see [tern::s_proportion()] for possible
#'   methods.
#' @param conf_level (`numeric`)\cr confidence level for the confidence interval.
#' @param conf_level (`numeric`)\cr confidence level for the confidence interval.
#'
#' @return The [rtables::rcell()] result.
#' @export
#'
#' @examples
#' a_proportion_ci_logical(
#'   x = DM$SEX == "F",
#'   .alt_df = DM,
#'   conf_level = 0.95,
#'   formats = list(prop_ci = jjcsformat_xx("xx.xx% - xx.xx%")),
#'   method = "wald"
#' )
a_proportion_ci_logical <- function(x, .alt_df, conf_level, method, formats) {
  checkmate::assert_logical(x)
  checkmate::assert_data_frame(.alt_df)
  checkmate::assert_list(formats)

  diff_n <- nrow(.alt_df) - length(x)
  if (diff_n > 0) {
    x <- c(x, rep(FALSE, length = diff_n))
  }
  est <- s_proportion(x, conf_level = conf_level, method = method)
  rcell(est$prop_ci, format = formats$prop_ci)
}

#' Formatted Analysis Function For Proportion Confidence Interval for Factor
#'
#' @param df (`data.frame`)\cr including factor `.var`.
#' @param .var (`string`)\cr name of the factor variable.
#' @param .var (`string`)\cr name of the factor variable.
#' @param \dots see [a_proportion_ci_logical()] for additionally required
#'   arguments.
#'
#' @return The [rtables::rcell()] result.
#' @export
#'
#' @examples
#' a_proportion_ci_factor(
#'   df = DM,
#'   .var = "SEX",
#'   .alt_df = DM,
#'   conf_level = 0.95,
#'   formats = list(prop_ci = jjcsformat_xx("xx.x%, xx.x%")),
#'   method = "clopper-pearson"
#' )
a_proportion_ci_factor <- function(df, .var, ...) {
  checkmate::assert_factor(df[[.var]])

  design_mat <- h_get_design_mat(df, .var)
  design_df <- as.data.frame(design_mat)
  res <- lapply(design_df, a_proportion_ci_logical, ...)
  in_rows(.list = res, .labels = colnames(design_mat))
}

#' Split Function for Proportion Analysis Columns (TEFCGIS08 e.g.)
#'
#' Here we just split into 3 columns `n`, `%` and `Cum %`.
#'
#' @param ret (`list`)\cr return value from the previous split function.
#' @param spl (`list`)\cr split information.
#' @param fulldf (`data.frame`)\cr full data frame.
#' @param .spl_context (`environment`)\cr split context environment.
#'
#' @note This split function is used in the proportion table TEFCGIS08 and similar ones.
#' @seealso [rtables::make_split_fun()] describing the requirements for this kind of
#'   post-processing function.
prop_post_fun <- function(ret, spl, fulldf, .spl_context) {
  short_split_result(n = "n", percent = "%", cum_percent = "Cum %", fulldf = fulldf)
}

#' @rdname prop_post_fun
#'
#' @param df A data frame that contains all analysis variables.
#' @param vals A character vector that contains values to use for the split.
#' @param labels A character vector that contains labels for the statistics (without indent).
#' @param trim A single logical that indicates whether to trim the values.
#' @return a split function for use in [rtables::split_rows_by].
#' @export
prop_split_fun <- make_split_fun(post = list(prop_post_fun))

#' Formatted Analysis Function for Proportion Analysis (TEFCGIS08 e.g.)
#'
#' This function applies to a factor `x` when a column split was prepared with
#' [prop_split_fun()] before.
#'
#' @details In the column named `n`, the counts of the categories as well as an
#' optional `Total` count will be shown. In the column named `percent`, the
#' percentages of the categories will be shown, with an optional blank entry for
#' `Total`. In the column named `cum_percent`, the cumulative percentages will
#' be shown instead.
#'
#' @param x (`factor`)\cr factor variable to analyze.
#' @param .spl_context (`environment`)\cr split context environment.
#' @param formats (`list`)\cr formats for the statistics.
#' @param add_total_level (`flag`)\cr whether to add a total level.
#' @param x (`factor`)\cr factor variable to analyze.
#' @param .spl_context (`environment`)\cr split context environment.
#' @param formats (`list`)\cr formats for the statistics.
#' @param add_total_level (`flag`)\cr whether to add a total level.
#'
#' @return A `VerticalRowsSection` as returned by [rtables::in_rows].
#' @export
prop_table_afun <- function(x, .spl_context, formats, add_total_level = FALSE) {
  checkmate::assert_list(formats, len = 3, names = "unique")
  checkmate::assert_names(names(formats), permutation.of = c("n", "percent", "cum_percent"))
  checkmate::assert_flag(add_total_level)

  stat <- utils::tail(.spl_context[nrow(.spl_context), "cur_col_split_val"][[1]], 1)
  optional_total <- function(x) if (add_total_level) x else NULL
  ns <- table(x)
  res <- if (stat == "n") {
    c(as.list(ns), optional_total(length(x)))
  } else if (stat == "percent") {
    c(as.list(ns / length(x) * 100), optional_total(list(NULL)))
  } else if (stat == "cum_percent") {
    c(as.list(cumsum(ns / length(x) * 100)), optional_total(list(NULL)))
  } else {
    stop("unexpected proportion statistic in split")
  }
  in_rows(.list = res, .labels = c(levels(x), optional_total("Total")), .formats = formats[[stat]])
}

#' Tabulation for Exposure Tables
#'
#'
#' @details
#' Creates statistics needed for standard exposure table
#' This includes differences and 95% CI and total treatment years.
#' This is designed to be used as an analysis (afun in `analyze`) function.
#'
#' @name a_summarize_ex_j
NULL


#' @inheritParams proposal_argument_convention
#' @describeIn a_summarize_ex_j Statistics function needed for the exposure tables
#'
#' @param daysconv conversion required to get the values into days
#' (i.e 1 if original PARAMCD unit is days, 30.4375 if original PARAMCD unit is in months)
#' @param ancova (`logical`)\cr If FALSE, only descriptive methods will be used. \cr
#' If TRUE Ancova methods will be used for each of the columns : AVAL, CHG, DIFF. \cr
#' @param comp_btw_group (`logical`)\cr If TRUE,
#' \cr When ancova = FALSE, the estimate of between group difference (on CHG) will be based upon two-sample t-test. \cr
#' \cr When ancova = TRUE, the same ancova model will be used for the estimate of between group difference (on CHG).
#'
#' @param interaction_y (`character`)\cr Will be passed onto the `tern` function `s_ancova`, when ancova = TRUE.
#' @param interaction_item (`character`)\cr Will be passed onto the `tern` function `s_ancova`, when ancova = TRUE.
#' @param conf_level (`proportion`)\cr Confidence level of the interval
#' @param variables (named list of strings)\cr
#' list of additional analysis variables, with expected elements:
#'    * arm (string)\cr
#' group variable, for which the covariate adjusted means of multiple groups will be summarized.
#' Specifically, the first level of arm variable is taken as the reference group.
#'    * covariates (character)\cr
#' a vector that can contain single variable names (such as 'X1'), and/or interaction terms indicated by 'X1 * X2'.
#'
s_summarize_ex_j <- function(
    df,
    .var,
    .df_row,
    .spl_context,
    comp_btw_group = TRUE,
    ref_path = NULL,
    ancova = FALSE,
    interaction_y,
    interaction_item,
    conf_level,
    daysconv,
    variables) {
  control <- control_analyze_vars()
  control$conf_level <- conf_level
  x_stats <- s_summary(df[[.var]], na.rm = TRUE, .var, control = control)
  ## add extra for subject years
  subj_years <- x_stats[["sum"]] * daysconv / 365.25
  x_stats[["total_subject_years"]] <- c(x_stats[["sum"]], subj_years)
  names(x_stats[["total_subject_years"]]) <- c("total", "subject_years")

  cur_col_id <- .spl_context$cur_col_id[[length(.spl_context$split)]]
  indiffcol <- grepl("difference", tolower(cur_col_id), fixed = TRUE)

  if (indiffcol) {
    # blank out all stats
    x_stats <- sapply(
      names(x_stats),
      FUN = function(x) {
        x_stats[[x]] <- NULL
      },
      simplify = FALSE,
      USE.NAMES = TRUE
    )
    # diff between group will be updated in mean_sd stat
    if (comp_btw_group) {
      trt_var_refpath <- h_get_trtvar_refpath(ref_path, .spl_context, df)
      # trt_var_refpath is list with elements trt_var trt_var_refspec cur_trt_grp ctrl_grp make these elements
      # available in current environment
      trt_var <- trt_var_refpath$trt_var
      trt_var_refspec <- trt_var_refpath$trt_var_refspec
      cur_trt_grp <- trt_var_refpath$cur_trt_grp
      ctrl_grp <- trt_var_refpath$ctrl_grp

      .in_ref_col <- FALSE
      if (trt_var == ctrl_grp) .in_ref_col <- TRUE

      .ref_group <- .df_row[.df_row[[trt_var]] == ctrl_grp, ]

      if (ancova) {
        # ancova method for diff between group
        x_stats2 <- s_summarize_ancova_j(
          df = df,
          .var = .var,
          .ref_group = .ref_group,
          .in_ref_col = .in_ref_col,
          .df_row = .df_row,
          conf_level = conf_level,
          interaction_y = interaction_y,
          interaction_item = interaction_item,
          variables = variables
        )
        diffstat <- x_stats2[["lsmean_diffci"]]
      } else {
        # descriptive method for diff between group
        x_stats2 <- s_summarize_desc_j(
          df = df,
          .var = .var,
          .ref_group = .ref_group,
          .in_ref_col = .in_ref_col,
          control = control
        )
        diffstat <- x_stats2[["mean_diffci"]]
      }
      # actual update with the diffstat
      x_stats[["mean_sd"]] <- diffstat
    }
  }

  return(x_stats)
}

#' @title Analysis Function For Exposure Tables
#' @description
#' A function to create the appropriate statistics needed for exposure table
#' @details
#' Creates statistics needed for table. This includes differences and 95% CI and total treatment years.
#' This is designed to be used as an analysis (afun in `analyze`) function.
#' @inheritParams proposal_argument_convention
#'
#' @describeIn a_summarize_ex_j Formatted analysis function which is used as `afun`.
#'
#' @return
#' * `a_summarize_ex_j()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @aliases a_summarize_ex_j
#' @examples
#' library(dplyr)
#'
#' ADEX <- data.frame(
#'   USUBJID = c(
#'     "XXXXX01", "XXXXX02", "XXXXX03", "XXXXX04", "XXXXX05",
#'     "XXXXX06", "XXXXX07", "XXXXX08", "XXXXX09", "XXXXX10"
#'   ),
#'   TRT01A = c(
#'     "ARMA", "ARMA", "ARMA", "ARMA", "ARMA",
#'     "Placebo", "Placebo", "Placebo", "ARMA", "ARMA"
#'   ),
#'   AVAL = c(56, 78, 67, 87, 88, 93, 39, 87, 65, 55)
#' )
#'
#' ADEX <- ADEX |>
#'   mutate(TRT01A = as.factor(TRT01A))
#'
#' ADEX$colspan_trt <- factor(ifelse(ADEX$TRT01A == "Placebo", " ", "Active Study Agent"),
#'   levels = c("Active Study Agent", " ")
#' )
#'
#' ADEX$diff_header <- "Difference in Means (95% CI)"
#' ADEX$diff_label <- paste(ADEX$TRT01A, paste("vs", "Placebo"))
#'
#' colspan_trt_map <- create_colspan_map(ADEX,
#'   non_active_grp = "Placebo",
#'   non_active_grp_span_lbl = " ",
#'   active_grp_span_lbl = "Active Study Agent",
#'   colspan_var = "colspan_trt",
#'   trt_var = "TRT01A"
#' )
#' ref_path <- c("colspan_trt", "", "TRT01A", "Placebo")
#'
#' lyt <- basic_table() |>
#'   split_cols_by(
#'     "colspan_trt",
#'     split_fun = trim_levels_to_map(map = colspan_trt_map)
#'   ) |>
#'   split_cols_by("TRT01A") |>
#'   split_cols_by("diff_header", nested = FALSE) |>
#'   split_cols_by(
#'     "TRT01A",
#'     split_fun = remove_split_levels("Placebo"),
#'     labels_var = "diff_label"
#'   ) |>
#'   analyze("AVAL",
#'     afun = a_summarize_ex_j, var_labels = "Duration of treatment (Days)",
#'     show_labels = "visible",
#'     indent_mod = 0L,
#'     extra_args = list(
#'       daysconv = 1,
#'       ref_path = ref_path,
#'       variables = list(arm = "TRT01A", covariates = NULL),
#'       ancova = TRUE,
#'       comp_btw_group = TRUE
#'     )
#'   )
#'
#' result <- build_table(lyt, ADEX)
#'
#' result
#' @export
a_summarize_ex_j <- function(
    df,
    .var,
    .df_row,
    .spl_context,
    comp_btw_group = TRUE,
    ref_path = NULL,
    ancova = FALSE,
    interaction_y = FALSE,
    interaction_item = NULL,
    conf_level = 0.95,
    variables,
    .stats = c("mean_sd", "median", "range", "quantiles", "total_subject_years"),
    .formats = c(diff_mean_est_ci = jjcsformat_xx("xx.xx (xx.xx, xx.xx)")),
    .labels = c(quantiles = "Interquartile range"),
    .indent_mods = NULL,
    na_str = rep("NA", 3),
    daysconv = 1) {
  if (!is.numeric(df[[.var]])) {
    stop("a_summarize_ex_j issue: input variable must be numeric.")
  }

  if (comp_btw_group && is.null(ref_path)) {
    stop("a_summarize_ex_j issue: argument ref_path cannot be NULL.")
  }

  if (comp_btw_group && ancova && is.null(variables)) {
    stop("a_summarize_ex_j issue: argument variables must be defined when ancova is requested.")
  }

  x_stats <- s_summarize_ex_j(
    df = df,
    .var = .var,
    .df_row = .df_row,
    .spl_context = .spl_context,
    comp_btw_group = comp_btw_group,
    ref_path = ref_path,
    ancova = ancova,
    interaction_y = interaction_y,
    interaction_item = interaction_item,
    conf_level = conf_level,
    daysconv = daysconv,
    variables = variables
  )

  # Fill in formatting defaults
  .stats_in <- .stats
  .stats <- tern_get_stats("analyze_vars_numeric", stats_in = .stats, custom_stats_in = NULL)
  if ("total_subject_years" %in% .stats_in) {
    # place the extra statistic at the appropriate place within .stats vector
    i <- match("total_subject_years", .stats_in)
    x <- .stats_in[i:length(.stats_in)]
    if (length(x) == 1) {
      .stats <- c(.stats, "total_subject_years")
    } else {
      i2 <- min(match(x, .stats), na.rm = TRUE)
      if (i2 == 1) {
        .stats <- c("total_subject_years", .stats)
      } else {
        .stats <- c(.stats[1:(i2 - 1)], "total_subject_years", .stats[i2:length(.stats)])
      }
    }
  }

  .stats_ext <- c(.stats, "diff_mean_est_ci")

  .formats <- junco_get_formats_from_stats(.stats_ext, .formats)
  .labels <- junco_get_labels_from_stats(.stats, .labels, label_attr_from_stats = get_label_attr_from_stats(x_stats))
  .indent_mods <- junco_get_indents_from_stats(.stats, .indent_mods)

  .names <- names(.labels)
  .labels <- .unlist_keep_nulls(.labels)
  .indent_mods <- .unlist_keep_nulls(.indent_mods)

  cur_col_id <- .spl_context$cur_col_id[[length(.spl_context$split)]]
  indiffcol <- grepl("difference", tolower(cur_col_id), fixed = TRUE)

  if (indiffcol && comp_btw_group) {
    .formats[["mean_sd"]] <- .formats[["diff_mean_est_ci"]]
  }
  .formats[["diff_mean_est_ci"]] <- NULL

  if (!is.null(na_str)) {
    .format_na_strs <- lapply(names(.formats), FUN = function(x) {
      na_str
    })
  } else {
    .format_na_strs <- NULL
  }

  x_stats <- x_stats[.stats]
  ret <- in_rows(
    .list = x_stats,
    .formats = .formats,
    .names = .names,
    .labels = .labels,
    .indent_mods = .indent_mods,
    .format_na_strs = .format_na_strs
  )
  return(ret)
}

#' @title Create TableTree as DataFrame via gentlg
#'
#' @param tt TableTree object to convert to a data frame
#' @param fontspec Font specification object
#' @param string_map Unicode mapping for special characters
#' @param markup_df Data frame containing markup information
#' @return `tt` represented as a "tbl" data.frame suitable for passing
#'   to [tidytlg::gentlg] via the `huxme` argument.
tt_to_tbldf <- function(
    tt,
    fontspec = font_spec("Times", 9L, 1),
    string_map = default_str_map,
    markup_df = dps_markup_df) {
  if (!validate_table_struct(tt)) {
    stop(
      "invalid table structure. summarize_row_groups without ",
      "analyze below it in layout structure?"
    )
  }
  mpf <- matrix_form(
    tt,
    indent_rownames = FALSE,
    expand_newlines = FALSE,
    fontspec = fontspec
  )

  strmat <- mf_strings(mpf)
  strmat[, 1] <- gsub("^[[:space:]]+", "", strmat[, 1])
  nhl <- mf_nlheader(mpf)
  strbody <- strmat[-(1:nhl), , drop = FALSE]
  row_type <- apply(
    strbody,
    1,
    function(x) {
      nonempty <- nzchar(x)
      if (all(!nonempty)) {
        ## shouldn't ever happen
        "EMPTY"
      } else if (nonempty[1] && all(!nonempty[-1])) {
        # only 1st cell nonempty
        "HEADER"
      } else {
        if (x[1] == "N") {
          "N"
        } else {
          "VALUE"
        }
      }
    }
  )
  rowdf <- mf_rinfo(mpf)
  rinds <- mf_lgrouping(mpf)[-(1:nhl)] -
    mf_nrheader(mpf)
  indentme <- rowdf$indent[rinds]
  anbr <- cumsum(!is.na(c(0, utils::head(rowdf$trailing_sep, -1))))[rinds] + 1 ## so it starts at 1
  roworder <- seq_len(NROW(rowdf)) - (anbr - 1)
  newrows <- c(0, ifelse(utils::tail(anbr, -1) == utils::head(anbr, -1), 0, 1))

  strbody <- prep_strs_for_rtf(strbody, string_map, markup_df)
  tbldf <- as.data.frame(strbody)

  names(tbldf) <- c("label", paste("col", seq_len(ncol(tbldf) - 1)))
  emptycols <- tbldf[1, ]
  emptycols[1, ] <- ""

  tbldf <- cbind(
    tbldf,
    data.frame(
      row_type = row_type,
      anbr = anbr,
      indentme = indentme,
      roworder = roworder,
      newrows = newrows,
      stringsAsFactors = FALSE
    )
  )
  tbldf
}

brackets_to_rtf <- function(strs) {
  gsub("\\[\\[([^]]+)\\]\\]", "\\\\{\\1}", strs)
}

gutter_width <- .12 # inches

## this is for Times New Roman 9
mar_plus_gutters <- 2 + gutter_width

pg_width_by_orient <- function(landscape = FALSE) {
  fullpg <- ifelse(landscape, 11, 8.5)
  fullpg - mar_plus_gutters
}

tlg_type <- function(tt) {
  if (methods::is(tt, "list") && !methods::is(tt, "listing_df")) {
    tt <- tt[[1]]
  }
  if (methods::is(tt, "ggplot")) {
    ret <- "Figure"
  } else if (methods::is(tt, "VTableTree")) {
    ret <- "Table"
  } else if (methods::is(tt, "listing_df")) {
    ret <- "Listing"
  } else {
    stop(
      "unable to determine tlg type for object of class: ",
      paste(class(tt), collapse = ",")
    )
  }
  ret
}

mpf_to_colspan <- function(
    mpf,
    string_map = default_str_map,
    markup_df = dps_markup_df) {
  if (!methods::is(mpf, "MatrixPrintForm")) {
    stop("figure out how to make this an mpf (MatrixPrintForm) first.")
  }
  strmat <- mf_strings(mpf)
  ## spaces in column header will get underlines, but we dont' want them to
  nhl <- mf_nlheader(mpf)
  strmat[seq_len(nhl - 1), ] <- gsub(
    "^[[:space:]]+$",
    "",
    strmat[seq_len(nhl - 1), ]
  )
  strmat[nhl, ] <- ifelse(nzchar(strmat[nhl, ]), strmat[nhl, ], " ")
  strmat <- prep_strs_for_rtf(strmat, string_map, markup_df)
  nspancols <- nhl - 1L
  if (nspancols > 0) {
    csph <- lapply(seq_len(nspancols), function(ii) {
      spns <- mf_spans(mpf)[ii, ]
      vals <- strmat[ii, ]
      jj <- 1 ## could start at 2 cause 1 is row label/topleft space but why complicate thigns
      ind <- 1
      myrle <- rle(vals)
      while (jj < length(vals)) {
        if (myrle$lengths[ind] == 1) {
          jj <- jj + 1
          ind <- ind + 1
        } else {
          sq <- seq(jj, jj + myrle$lengths[ind] - 1)
          valspns <- spns[sq]
          if (valspns[1] != length(sq)) {
            vals[sq] <- split_val_spans(vals[jj], valspns)
          }
          jj <- tail(sq, 1) + 1
          ind <- ind + 1
        }
      }
      vals
    })
  } else {
    csph <- NULL
  }

  list(colspan = csph, colheader = strmat[nhl, ])
}

split_val_spans <- function(val, spns) {
  if (val == "") {
    return(rep(val, length(spns)))
  }
  padvec <- rep("", length(spns))
  j <- 1
  padnum <- 0
  while (j < length(spns)) {
    onespn <- spns[j]
    if (padnum > 0) {
      padvec[j:(j + onespn - 1)] <- strrep(" ", padnum)
    }
    padnum <- padnum + 1
    j <- j + onespn
  }
  paste0(padvec, val, padvec)
}

partmpf_to_colinds <- function(fullmpf, partmpf) {
  stopifnot(
    mf_nlheader(fullmpf) == mf_nlheader(partmpf),
    mf_ncol(fullmpf) >= mf_ncol(partmpf)
  )
  nlh <- mf_nlheader(fullmpf)
  fullhdr <- mf_strings(fullmpf)[1:nlh, , drop = FALSE]
  parthdr <- mf_strings(partmpf)[1:nlh, , drop = FALSE]
  fullkeys <- apply(fullhdr, 2, paste, collapse = "", simplify = TRUE)
  ## hack to force first key to match uniquely
  fullkeys[1] <- paste(c("xxx", fullkeys[-1]), collapse = "")
  stopifnot(length(fullkeys) == length(unique(fullkeys)))
  partkeys <- apply(parthdr, 2, paste, collapse = "", simplify = TRUE)
  partkeys[1] <- fullkeys[1]
  match(partkeys, fullkeys)
}

subset_border_mat <- function(full_brdr, full_mpf, part_mpf) {
  if (is.null(full_brdr)) {
    return(NULL)
  }
  inds <- partmpf_to_colinds(full_mpf, part_mpf)
  full_brdr[, inds, drop = FALSE]
}


get_ncol <- function(tt) {
  if (is(tt, "listing_df") || is(tt, "VTableTree")) {
    ncol(tt)
  } else if (is(tt, "MatrixPrintForm")) {
    mf_ncol(tt)
  } else if (is.list(tt)) {
    if (is(tt[[1]], "MatrixPrintForm")) {
      mf_ncol(tt[[1]])
    } else {
      ncol(tt[[1]])
    }
  }
}

#' @name tt_to_tlgrtf
#' @title TableTree to .rtf Conversion
#' @description
#' A function to convert TableTree to .rtf
#' @details
#' This function aids in converting the rtables TableTree into the desired .rtf file.
#'
#' @param tt TableTree object to convert to RTF
#' @param file character(1). File to create, including path, but excluding
#' .rtf extension.
#' @param orientation Orientation of the output ("portrait" or "landscape")
#' @param colwidths Column widths for the table
#' @param label_width_ins Label width in inches
#' @param fontspec Font specification object
#' @param pg_width Page width in inches
#' @param margins Margins in inches (top, right, bottom, left)
#' @param paginate Whether to paginate the output
#' @param col_gap Column gap in spaces
#' @param verbose Whether to print verbose output
#' @param tlgtype Type of the output (Table, Listing, or Figure)
#' @param string_map Unicode mapping for special characters
#' @param markup_df Data frame containing markup information
#' @param ... Additional arguments passed to gentlg
#' @inheritParams tidytlg::gentlg
#' @param nosplitin list(row=, col=). Path elements whose children should not be paginated within
#' if it can be avoided. e.g., list(col="TRT01A") means don't split within treatment arms unless
#' all the associated columns don't fit on a single page.
#' @param combined_rtf logical(1). In the case where the result is broken up into multiple
#' parts due to width, should a combined rtf file also be created. Defaults to `FALSE`.
#' @param one_table logical(1). If `tt` is a (non-`MatrixPrintForm`) list,
#' should the parts be added to the rtf within a single table (`TRUE`, the
#' default) or as separate tables. End users will not generally need to set this.
#' @param border_mat matrix. A `m x k` matrix where m is the number of columns of `tt`
#'  and k is the number of lines the header takes up. See [tidytlg::add_bottom_borders]
#'  for what the matrix should contain. Users should only specify this when the
#'  default behavior does not meet their needs.
#' @import rlistings
#' @rdname tt_to_tlgrtf
#' @export
#' @seealso Used in all table and listing scripts
#' @note `file` should always include path. Path will be extracted
#' and passed separately to `gentlg`.
#' @note When `one_table` is `FALSE`, only the width of the row label
#'   pseudocolumn can be directly controlled due to a limitation in
#'   `tidytlg::gentlg`. The proportion of the full page that the first value
#'   in colwidths would take up is preserved and all other columns equally
#'   split the remaining available width. This will cause, e.g., the
#'   elements within the allparts rtf generated when `combined_rtf` is `TRUE`
#'   to differ visually from the content of the individual part rtfs.
#' @return If `file` is non-NULL, this is called for the side-effect of writing
#'   one or more RTF files. Otherwise, returns a list of `huxtable` objects.
tt_to_tlgrtf <- function(
    tt,
    file = NULL,
    orientation = c("portrait", "landscape"),
    colwidths = def_colwidths(
      tt,
      fontspec,
      col_gap = col_gap,
      label_width_ins = label_width_ins,
      type = tlgtype
    ),
    label_width_ins = 2,
    watermark = NULL,
    pagenum = ifelse(tlgtype == "Listing", TRUE, FALSE),
    fontspec = font_spec("Times", 9L, 1.2),
    pg_width = pg_width_by_orient(orientation == "landscape"),
    margins = c(0, 0, 0, 0),
    paginate = tlg_type(tt) == "Table",
    col_gap = ifelse(tlgtype == "Listing", .5, 3),
    nosplitin = list(
      row = character(),
      col = character()
    ),
    verbose = FALSE,
    tlgtype = tlg_type(tt),
    string_map = default_str_map,
    markup_df = dps_markup_df,
    combined_rtf = FALSE,
    one_table = TRUE,
    border_mat = make_header_bordmat(obj = tt),
    ...) {
  orientation <- match.arg(orientation)
  newdev <- open_font_dev(fontspec)
  if (newdev) {
    on.exit(close_font_dev())
  }

  if (tlgtype == "Listing" && nrow(tt) == 0) {
    dat <- as.list(c("No data to report", rep("", ncol(tt) - 1)))
    names(dat) <- names(tt)
    df <- as.data.frame(dat)
    var_labels(df) <- var_labels(tt)

    tt <- as_listing(
      df,
      key_cols = get_keycols(tt),
      disp_cols = listing_dispcols(tt)
    )
  }

  if (tlgtype == "Table" && fontspec$size == 8) {
    opts <- options(
      tidytlg.fontsize.table.footnote = 8,
      tidytlg.fontsize.table = 8
    )
    on.exit(options(opts), add = TRUE)
  }

  if (length(colwidths) == 1) {
    nc <- get_ncol(tt)
    tot_width <- page_lcpp(
      pg_width = pg_width,
      pg_height = 20, # don't care about this,
      font_family = fontspec$family,
      font_size = fontspec$size,
      lineheight = fontspec$lineheight,
      margins = c(0, 0, 0, 0),
      landscape = orientation == "landscape"
    )$cpp
    wdth_left <- tot_width - colwidths
    colwidths <- c(colwidths, rep(floor(wdth_left / nc), nc))
  }

  max_lwidth <- inches_to_spaces(label_width_ins, fontspec)
  if (colwidths[1] > max_lwidth) {
    colwidths[1] <- max_lwidth
  }

  if (!requireNamespace("tidytlg")) {
    stop("tidytlg not installed, cannot go out to rtf.")
  }

  if (paginate) {
    ## implies type Table
    if (tlgtype != "Table") {
      stop(
        "pagination is not currently supported for tlg types other than Table."
      )
    }
    if (methods::is(tt, "VTableTree")) {
      hdrmpf <- matrix_form(tt[1, ])
    } else if (methods::is(tt, "list") && methods::is(tt[[1]], "MatrixPrintForm")) {
      hdrmpf <- tt[[1]]
    } else {
      hrdmpf <- tt
    }
    pags <- paginate_to_mpfs(
      tt,
      fontspec = fontspec,
      landscape = orientation == "landscape",
      colwidths = colwidths,
      col_gap = col_gap,
      pg_width = pg_width,
      pg_height = NULL,
      margins = margins,
      lpp = NULL,
      nosplitin = nosplitin,
      verbose = verbose
    ) ##
    if (has_force_pag(tt)) {
      nslices <- which(
        cumsum(vapply(pags, mf_ncol, 1L)) == ncol(tt)
      )
      oldpags <- pags
      pags <- lapply(
        seq_len(nslices),
        function(ii) {
          inds <- seq(ii, by = nslices, length.out = length(oldpags) / nslices)
          oldpags[inds]
        }
      )
    }
    pag_bord_mats <- lapply(
      seq_along(pags),
      function(i) {
        if (methods::is(pags[[i]], "MatrixPrintForm")) {
          partmpf <- pags[[i]]
        } else {
          partmpf <- pags[[i]][[1]]
        }
        subset_border_mat(border_mat, hdrmpf, partmpf)
      }
    )
    ret <- lapply(
      seq_along(pags),
      function(i) {
        if (!is.null(file) && length(pags) > 1) {
          fmti <- paste0("%0", ceiling(log(length(pags), base = 10)), "d")
          fname <- paste0(file, "part", sprintf(fmti, i), "of", length(pags))
        } else {
          fname <- file
        }
        full_pag_i <- pags[[i]]
        if (
          is.list(full_pag_i) &&
            !methods::is(full_pag_i, "MatrixPrintForm")
        ) {
          pgi_for_cw <- full_pag_i[[1]]
        } else {
          pgi_for_cw <- full_pag_i
        }
        tt_to_tlgrtf(
          full_pag_i,
          file = fname,
          orientation = orientation,
          colwidths = j_mf_col_widths(pgi_for_cw),
          fontspec = fontspec,
          watermark = watermark,
          col_gap = col_gap,
          paginate = FALSE,
          tlgtype = tlgtype,
          string_map = string_map,
          markup_df = markup_df,
          border_mat = pag_bord_mats[[i]],
          ...
        )
      }
    )
    if (combined_rtf) {
      if (length(pags) > 1) {
        tt_to_tlgrtf(
          pags,
          file = paste0(file, "allparts"),
          orientation = orientation,
          fontspec = fontspec,
          watermark = watermark,
          col_gap = col_gap,
          paginate = FALSE,
          tlgtype = "Table",
          string_map = string_map,
          markup_df = markup_df,
          one_table = FALSE,
          ## gentlg isn't vectorized on column widths so we're SOL here...
          colwidths = colwidths, ## this is largely ignored see note in docs
          # colwidths are already on the pags since they are mpfs
          border_mat = pag_bord_mats,
          ...
        )
      } else if (!is.null(file)) { # only one page after pagination
        message(
          "Table ",
          basename(file),
          ": No combined RTF created, output fit within one part."
        )
      }
    }
    if (is.null(file) && length(pags) > 1) {
      ret <- unlist(ret, recursive = FALSE)
    }
    return(ret)
  }

  if (tlgtype == "Table") {
    if (is.list(tt) && !methods::is(tt, "MatrixPrintForm")) {
      df <- lapply(
        tt,
        tt_to_tbldf,
        fontspec = fontspec,
        string_map = string_map,
        markup_df = markup_df
      )
      if (one_table) {
        df <- do.call(
          rbind,
          lapply(
            seq_along(df),
            function(ii) {
              dfii <- df[[ii]]
              dfii$newpage <- 0
              if (ii > 1) {
                dfii$newpage[1] <- 1
              }
              dfii$indentme <- ifelse(dfii$indentme <= 1, 0, dfii$indentme - 1)
              dfii
            }
          )
        )
      }
    } else {
      df <- tt_to_tbldf(
        tt,
        fontspec = fontspec,
        string_map = string_map,
        markup_df = markup_df
      )
    }
  } else {
    df <- tt[, listing_dispcols(tt)]
  }

  ## we only care about the col labels here...
  if (tlgtype == "Table" && is.list(tt) && !methods::is(tt, "MatrixPrintForm")) {
    mpf <- tt[[1]]
    if (!one_table) {
      colinfo <- lapply(
        tt,
        mpf_to_colspan,
        markup_df = markup_df,
        string_map = string_map
      )
      csph <- lapply(colinfo, function(x) x$colspan)
      colheader <- lapply(colinfo, function(x) x$colheader)
    } else {
      colinfo <- mpf_to_colspan(
        mpf,
        markup_df = markup_df,
        string_map = string_map
      )
      csph <- colinfo$colspan
      colheader <- colinfo$colheader
    }
  } else if (methods::is(tt, "MatrixPrintForm")) {
    mpf <- tt
    colinfo <- mpf_to_colspan(
      mpf,
      markup_df = markup_df,
      string_map = string_map
    )
    csph <- colinfo$colspan
    colheader <- colinfo$colheader
  } else {
    mpf <- matrix_form(
      utils::head(tt, 1),
      indent_rownames = FALSE,
      expand_newlines = FALSE,
      fontspec = fontspec
    )
    colinfo <- mpf_to_colspan(
      mpf,
      markup_df = markup_df,
      string_map = string_map
    )
    csph <- colinfo$colspan
    colheader <- colinfo$colheader
  }

  if (is.null(file)) {
    fname <- NULL
    fpath <- tempdir()
  } else {
    fname <- basename(file)
    ## dirname on "table" returns "." so we're good using
    ## this unconditionally as opath
    fpath <- dirname(file)
  }

  if (tlgtype == "Table") {
    colwidths <- cwidths_final_adj(
      labwidth_ins = label_width_ins,
      total_width = pg_width,
      colwidths = colwidths[-1]
    )
  }
  colwidths <- colwidths / sum(colwidths)
  # finite precision arithmetic is a dreamscape of infinite wonder...
  ## sum(rep(1/18, 18)) <= 1 is FALSE...
  if (sum(colwidths) > 1) {
    colwidths <- colwidths - 0.00000000001 ## much smaller than a twip = 1/20 printing point
  }

  if (!one_table && # nolint start
    is.list(tt) && !is(tt, "MatrixPrintForm")) {
    ### gentlg is not vectorized on wcol.  x.x x.x x.x
    ### but it won't break if we only give it one number...
    ### Calling this an ugly hack is an insult to all the hard working hacks
    ### out there
    colwidths <- colwidths[1]
  } # nolint end

  footer_val <- prep_strs_for_rtf(
    c(
      main_footer(mpf),
      prov_footer(mpf)
    ),
    string_map,
    markup_df
  )
  if (length(footer_val) == 0) {
    footer_val <- NULL
  }

  if (!is.null(fname) && tlgtype == "Table" && is.data.frame(df)) {
    utils::write.csv(
      df,
      file = file.path(fpath, paste0(tolower(fname), ".csv")),
      row.names = FALSE
    )
  }

  tidytlg::gentlg(
    df,
    tlf = tlgtype,
    format = "rtf",
    idvars = if (tlgtype == "Listing") get_keycols(tt) else NULL,
    colspan = csph,
    file = fname,
    opath = fpath,
    colheader = colheader,
    title = prep_strs_for_rtf(
      main_title(mpf),
      string_map,
      markup_df
    ),
    footers = footer_val,
    orientation = orientation,
    wcol = colwidths,
    watermark = watermark,
    pagenum = pagenum,
    bottom_borders = border_mat,
    print.hux = !is.null(fname),
    ...
  )
}

## NB x/(x+sum(colwidths)) = labwidth_ins/total_width
cwidths_final_adj <- function(labwidth_ins, total_width, colwidths) {
  prop <- labwidth_ins / total_width
  lwidth <- floor(prop / (1 - prop) * sum(colwidths))
  c(lwidth, colwidths)
}

make_bordmat_row <- function(rowspns) {
  havespn <- rowspns > 1
  if (!any(havespn)) {
    return(rep(0, times = length(rowspns)))
  }

  pos <- 1
  ngrp <- 1
  ret <- numeric(length(rowspns))
  while (pos < length(rowspns)) {
    spnval <- rowspns[pos]
    if (spnval > 1) {
      multipos <- seq(pos, pos + spnval - 1)
      val <- ngrp
    } else {
      multipos <- pos
      val <- 0
    }
    ret[multipos] <- val
    pos <- pos + spnval
    ngrp <- ngrp + 1
  }
  ret
}

fixup_bord_mat <- function(brdmat, hstrs) {
  ## no lines between labels and their counts
  countcells <- matrix(
    grepl("N=", hstrs, fixed = TRUE),
    nrow = nrow(hstrs),
    ncol = ncol(hstrs)
  )

  countcoords <- which(countcells, arr.ind = TRUE)
  for (i in seq_len(nrow(countcoords))) {
    brdmat[countcoords[i, "row"] - 1, countcoords[i, "col"]] <- 0
  }

  brdmat[!nzchar(hstrs) | hstrs == " "] <- 0
  brdmat[nrow(brdmat), ] <- 1
  brdmat[seq_len(nrow(brdmat) - 1), 1] <- 0
  brdmat
}

.make_header_bordmat <- function(
    obj,
    mpf = matrix_form(utils::head(obj, 1), expand_newlines = FALSE)) {
  spns <- mf_spans(mpf)
  nlh <- mf_nlheader(mpf)
  nrh <- mf_nrheader(mpf)
  stopifnot(nlh == nrh)

  hstrs <- mf_strings(mpf)[seq_len(nrh), , drop = FALSE]
  spns <- mf_spans(mpf)[seq_len(nrh), , drop = FALSE]

  brdmat <- do.call(
    rbind,
    lapply(
      seq_len(nrh),
      function(i) make_bordmat_row(spns[i, ])
    )
  )

  brdmat <- fixup_bord_mat(brdmat, hstrs)
  brdmat
}

setGeneric(
  "make_header_bordmat",
  function(
      obj,
      mpf = matrix_form(utils::head(obj, 1), expand_newlines = FALSE)) {
    standardGeneric("make_header_bordmat")
  }
)

setMethod(
  "make_header_bordmat",
  c(obj = "ANY", mpf = "MatrixPrintForm"),
  .make_header_bordmat
)

setMethod(
  "make_header_bordmat",
  c(obj = "listing_df"),
  function(obj, mpf) matrix(1, nrow = 1, ncol = length(listing_dispcols(obj)))
)

setMethod(
  "make_header_bordmat",
  c(obj = "VTableTree", mpf = "missing"),
  function(obj, mpf) {
    make_header_bordmat(
      mpf = matrix_form(
        utils::head(obj, 1),
        expand_newlines = FALSE
      )
    )
  }
)

#' @name a_freq_j
#'
#' @title Analysis/statistical function for count and percentage in core columns
#' and (optional) relative risk columns
#'
#' @inheritParams proposal_argument_convention

#' @param val (`character` or NULL)\cr
#' When NULL, all levels of the incoming variable (variable used in the `analyze` call)
#' will be considered.\cr
#' When a single `string`, only that current level/value of the incoming variable
#' will be considered.\cr
#' When multiple levels, only those levels/values of the incoming variable
#' will be considered.\cr
#' When no values are observed (eg zero row input df),
#' a row with row-label `No data reported` will be included in the table.
#' @param drop_levels (`logical`)\cr If `TRUE` non-observed levels
#' (based upon .df_row) will not be included.\cr
#' Cannot be used together with `val`.
#' @param excl_levels (`character` or NULL)\cr
#' When NULL, no levels of the incoming variable (variable used in the `analyze` call)
#' will be excluded.\cr
#' When multiple levels, those levels/values of the incoming variable
#' will be excluded.\cr
#' Cannot be used together with `val`.
#' @param new_levels (list(2) or NULL)\cr List of length 2.\cr
#'     First element : names of the new levels\cr
#'     Second element: list with values of the new levels.\cr
#' @param new_levels_after (`logical`)\cr If `TRUE` new levels will be added after last level.
#' @param denom (`string`)\cr See Details.
#' @param alt_df (`dataframe`)\cr Will be derived based upon alt_df_full and denom_by within a_freq_j.
#' @param parent_df (`dataframe`)\cr Will be derived within a_freq_j based
#' upon the input dataframe that goes into build_table (df) and denom_by.\cr
#' It is a data frame in the higher row-space than the current input df
#' (which underwent row-splitting by the rtables splitting machinery).
#'
#' @param countsource Either `df` or `alt_df`.\cr
#' When `alt_df` the counts will be based upon the alternative dataframe `alt_df`.\cr
#' This is useful for subgroup processing,
#' to present counts of subjects in a subgroup from the alternative dataframe.
#'
#' @details
#'
#' `denom` controls the denominator used to calculate proportions/percents.
#' It must be one of \cr
#' \itemize{
#' \item \strong{N_col} Column count, \cr
#' \item \strong{n_df} Number of patients (based upon the main input dataframe `df`),\cr
#' \item \strong{n_altdf} Number of patients from the secondary dataframe (`.alt_df_full`),\cr
#' Note that argument `denom_by` will perform a row-split on the `.alt_df_full` dataframe.\cr
#' It is a requirement that variables specified in `denom_by` are part of the row split specifications. \cr
#' \item \strong{N_colgroup} Number of patients from the column group variable
#' (note that this is based upon the input .alt_df_full dataframe).\cr
#' Note that the argument `colgroup` (column variable) needs to be provided,
#' as it cannot be retrieved directly from the column layout definition.
#' \item \strong{n_rowdf} Number of patients from the current row-level dataframe
#' (`.row_df` from the rtables splitting machinery).\cr
#' \item \strong{n_parentdf} Number of patients from a higher row-level split than the current split.\cr
#' This higher row-level split is specified in the argument `denom_by`.\cr
#' }
#'
#' @return
#' * `s_freq_j`: returns a list of following statistics\cr
#' \itemize{
#' \item n_df
#' \item n_rowdf
#' \item n_parentdf
#' \item n_altdf
#' \item denom
#' \item count
#' \item count_unique
#' \item count_unique_fraction
#' \item count_unique_denom_fraction
#' }
#'
#' @export

#' @importFrom stats setNames
s_freq_j <- function(
    df,
    .var,
    .df_row,
    val = NULL,
    drop_levels = FALSE,
    excl_levels = NULL,
    alt_df,
    parent_df,
    id = "USUBJID",
    denom = c("n_df", "n_altdf", "N_col", "n_rowdf", "n_parentdf"),
    .N_col,
    countsource = c("df", "altdf")) {
  if (is.na(.var) || is.null(.var)) {
    stop("Argument .var cannot be NA or NULL.")
  }

  countsource <- match.arg(countsource)

  if (countsource == "altdf") {
    df <- alt_df
  }

  .alt_df <- alt_df

  n1 <- length(unique(.alt_df[[id]]))
  n2 <- length(unique(df[[id]]))

  n3 <- length(unique(.df_row[[id]]))

  if (is.null(parent_df)) {
    parent_df <- df
  }
  n4 <- length(unique(parent_df[[id]]))


  denom <- match.arg(denom) |> switch(
    "n_altdf" = n1,
    "n_df" = n2,
    "n_rowdf" = n3,
    "N_col" = .N_col,
    "n_parentdf" = n4
  )

  y <- list()

  y$n_altdf <- c("n_altdf" = n1)
  y$n_df <- c("n_df" = n2)
  y$n_rowdf <- c("n_rowdf" = n3)
  y$n_parentdf <- c("n_parentdf" = n4)
  y$denom <- c("denom" = denom)

  if (drop_levels) {
    obs_levs <- unique(.df_row[[.var]])
    obs_levs <- intersect(levels(.df_row[[.var]]), obs_levs)

    if (!is.null(excl_levels)) obs_levs <- setdiff(obs_levs, excl_levels)

    if (!is.null(val)) {
      stop("argument val cannot be used together with drop_levels = TRUE.")
    }
    val <- obs_levs
  }

  if (!is.null(val)) {
    df <- df[df[[.var]] %in% val, ]
    .df_row <- .df_row[.df_row[[.var]] %in% val, ]

    df <- h_update_factor(df, .var, val)
    .df_row <- h_update_factor(.df_row, .var, val)
  }

  if (!is.null(excl_levels) && drop_levels == FALSE) {
    # restrict the levels to the ones specified in val argument
    df <- df[!(df[[.var]] %in% excl_levels), ]
    .df_row <- .df_row[!(.df_row[[.var]] %in% excl_levels), ]

    df <- h_update_factor(df, .var, excl_levels = excl_levels)
    .df_row <- h_update_factor(.df_row, .var, excl_levels = excl_levels)
  }

  x <- df[[.var]]
  x_unique <- unique(df[, c(.var, id)])[[.var]]

  if (identical(levels(df[[.var]]), no_data_to_report_str)) {
    xy <- list()
    nms <- c(
      "count",
      "count_unique",
      "count_unique_fraction",
      "count_unique_denom_fraction"
    )
    xy <- replicate(length(nms), list(setNames(list(NULL), no_data_to_report_str)))
    names(xy) <- nms
    y <- append(y, xy)
  } else {
    y$count <- lapply(
      as.list(table(x, useNA = "ifany")),
      stats::setNames,
      nm = "count"
    )

    y$count_unique <- lapply(
      as.list(table(x_unique, useNA = "ifany")),
      stats::setNames,
      nm = "count_unique"
    )

    y$count_unique_fraction <- lapply(
      y$count_unique,
      function(x) {
        ## we want to return - when denom = 0
        ## this is built into formatting function, when fraction is NA
        c(x, "p" = ifelse(denom > 0, x / denom, NA))
      }
    )

    y$count_unique_denom_fraction <- lapply(
      y$count_unique,
      function(x) {
        ## we want to return - when denom = 0
        ## this is built into formatting function, when fraction is NA
        c(x, "d" = denom, "p" = ifelse(denom > 0, x / denom, NA))
      }
    )
  }

  return(y)
}

s_rel_risk_levii_j <- function(
    levii,
    df,
    .var,
    ref_df,
    ref_denom_df,
    .in_ref_col,
    curgrp_denom_df,
    id,
    variables,
    conf_level,
    method,
    weights_method) {
  dfii <- df[df[[.var]] == levii & !is.na(df[[.var]]), ]
  ref_dfii <- ref_df[ref_df[[.var]] == levii & !is.na(ref_df[[.var]]), ]

  # construction of df_val, based upon curgrp_denom_df, dfii
  df_val <- curgrp_denom_df
  df_val$rsp <- FALSE
  # subjects with value levii observed in df TRUE
  df_val$rsp[df_val[[id]] %in% unique(dfii[[id]])] <- TRUE

  # repeat for ref group, based upon ref_denom_df, ref_dfii
  ref_df_val <- ref_denom_df
  ref_df_val$rsp <- FALSE
  # subjects with value levii observed in ref_df TRUE
  ref_df_val$rsp[ref_df_val[[id]] %in% unique(ref_dfii[[id]])] <- TRUE

  ### once 3-d version of diff_ci is available in tern::s_proportion_diff
  ### we should call tern::s_proportion_diff directly
  res_ci_3d <- s_proportion_diff_j(
    df_val,
    .var = "rsp",
    .ref_group = ref_df_val,
    .in_ref_col,
    variables = variables,
    conf_level = conf_level,
    method = method,
    weights_method = weights_method
  )$diff_est_ci
}


s_rel_risk_val_j <- function(
    df,
    .var,
    .df_row,
    ctrl_grp,
    cur_trt_grp,
    trt_var,
    val = NULL,
    drop_levels = FALSE,
    excl_levels = NULL,
    denom_df,
    id = "USUBJID",
    riskdiff = TRUE,
    variables = list(strata = NULL),
    conf_level = 0.95,
    method = c(
      "waldcc",
      "wald",
      "cmh",
      "ha",
      "newcombe",
      "newcombecc",
      "strat_newcombe",
      "strat_newcombecc"
    ),
    weights_method = "cmh") {
  if (drop_levels) {
    obs_levs <- unique(.df_row[[.var]])
    obs_levs <- intersect(levels(.df_row[[.var]]), obs_levs)

    if (!is.null(excl_levels)) obs_levs <- setdiff(obs_levs, excl_levels)

    if (!is.null(val)) {
      stop("argument val cannot be used together with drop_levels = TRUE, please specify one or the other.")
    }
    val <- obs_levs
  }

  if (!is.null(val)) {
    # restrict the levels to the ones specified in val argument
    df <- df[df[[.var]] %in% val, ]
    .df_row <- .df_row[.df_row[[.var]] %in% val, ]

    df <- h_update_factor(df, .var, val)
    .df_row <- h_update_factor(.df_row, .var, val)
  }

  if (!is.null(excl_levels) && drop_levels == FALSE) {
    # restrict the levels to the ones specified in val argument
    df <- df[!(df[[.var]] %in% excl_levels), ]
    .df_row <- .df_row[!(.df_row[[.var]] %in% excl_levels), ]

    df <- h_update_factor(df, .var, excl_levels = excl_levels)
    .df_row <- h_update_factor(.df_row, .var, excl_levels = excl_levels)
  }

  levs <- levels(df[[.var]])

  if (identical(levs, no_data_to_report_str)) {
    riskdiff <- FALSE
  }
  if (!riskdiff) {
    return(list(rr_ci_3d = setNames(replicate(length(levs), list(NULL)), levs)))
  }
  ### check on denom_df
  if (NROW(denom_df[[id]]) > length(unique(denom_df[[id]]))) {
    stop(
      "\nProblem: a_freq_j \n
           Denominator has multiple records per id. \n
           Please specify colgroup and/or denom_by to refine your denominator for proper relative risk derivation."
    )
  }

  ### are we in reference column?
  .in_ref_col <- (cur_trt_grp == ctrl_grp)

  ### data from reference group - df based
  ref_df <- get_ctrl_subset(.df_row, trt_var = trt_var, ctrl_grp = ctrl_grp)

  ### denominator data from reference group - denom_df based
  ref_denom_df <- get_ctrl_subset(
    denom_df,
    trt_var = trt_var,
    ctrl_grp = ctrl_grp
  )

  # ensure this is unique record per subject
  ref_denom_df <- unique(ref_denom_df[, c(id, variables$strata), drop = FALSE])

  ### denominator data from current group - denom_df based ---
  curgrp_denom_df <- get_ctrl_subset(
    denom_df,
    trt_var = trt_var,
    ctrl_grp = cur_trt_grp
  )

  # ensure this is unique record per subject
  curgrp_denom_df <- unique(curgrp_denom_df[, c(id, variables$strata), drop = FALSE])

  # calculate the stats for each of the levels in levs
  rr_ci_3d <- sapply(
    levs,
    s_rel_risk_levii_j,
    df = df,
    .var = .var,
    ref_df = ref_df,
    ref_denom_df = ref_denom_df,
    .in_ref_col = .in_ref_col,
    curgrp_denom_df = curgrp_denom_df,
    id = id,
    variables = variables,
    conf_level = conf_level,
    method = method,
    weights_method = weights_method,
    USE.NAMES = TRUE,
    simplify = FALSE
  )
  list(rr_ci_3d = rr_ci_3d)
}


#' @name a_freq_j
#'
#'
#' @inheritParams proposal_argument_convention
#' @param .stats (`character`)\cr statistics to select for the table.
#' See Value for list of available statistics.
#'
#' @param riskdiff (`logical`)\cr
#' When `TRUE`, risk difference calculations will be performed and
#' presented (if required risk difference column splits are included).\cr
#' When `FALSE`, risk difference columns will remain blank
#' (if required risk difference column splits are included).
#' @param ref_path (`string`)\cr Column path specifications for
#' the control group for the relative risk derivation.
#' @param variables Will be passed onto the relative risk function
#' (internal function s_rel_risk_val_j), which is based upon [tern::s_proportion_diff()].\cr
#' See `?tern::s_proportion_diff` for details.
#' @param method Will be passed onto the relative risk function (internal function s_rel_risk_val_j).\cr
#' @param weights_method Will be passed onto the relative risk function (internal function s_rel_risk_val_j).\cr
#' @param label (`string`)\cr
#' When `val`is a single `string`,
#' the row label to be shown on the output can be specified using this argument.\cr
#' When `val` is a `character vector`, the `label_map` argument can be specified
#' to control the row-labels.
#'
#' @param labelstr An argument to ensure this function can be used
#' as a `cfun` in a `summarize_row_groups` call.\cr
#' It is recommended not to utilize this argument for other purposes.\cr
#' The label argument could be used instead (if `val` is a single string)\cr
#' An another approach could be to utilize the `label_map` argument
#' to control the row labels of the incoming analysis variable.
#'
#'
#' @param label_fstr (`string`)\cr
#' a sprintf style format string.
#' It can contain up to one "\%s" which takes the current split value and
#' generates the row/column label.\cr
#' It will be combined with the `labelstr` argument,
#' when utilizing this function as
#' a `cfun` in a `summarize_row_groups` call.\cr
#' It is recommended not to utilize this argument for other purposes.
#' The label argument could be used instead (if `val` is a single string)\cr
#'
#' @param label_map (`tibble`)\cr
#' A mapping tibble to translate levels from the incoming variable into
#' a different row label to be presented on the table.\cr

#'
#' @param .alt_df_full (`dataframe`)\cr Denominator dataset
#' for fraction and relative risk calculations.\cr
#' .alt_df_full is a crucial parameter for the relative risk calculations
#' if this parameter is not set to utilize `alt_counts_df`,
#' then the values in the relative risk columns might not be correct.\cr
#' Once the rtables PR is integrated, this argument gets populated by the rtables
#' split machinery (see [rtables::additional_fun_params]).
#'
#' @param denom_by (`character`)\cr Variables from row-split
#' to be used in the denominator derivation.\cr
#' This controls both `denom = "n_parentdf"` and `denom = "n_altdf"`.\cr
#' When `denom = "n_altdf"`, the denominator is derived from `.alt_df_full`
#' in combination with `denom_by` argument
#' @param .labels_n (named `character`)\cr
#' String to control row labels for the 'n'-statistics.\cr
#' Only useful when more than one 'n'-statistic is requested (rare situations only).
#' @param .formats (named 'character' or 'list')\cr
#' formats for the statistics.
#' @param extrablankline (`logical`)\cr
#' When `TRUE`, an extra blank line will be added after the last value.\cr
#' Avoid using this in template scripts, use section_div = " " instead (once PR for rtables is available)\cr
#' @param extrablanklineafter (`string`)\cr
#' When the row-label matches the string, an extra blank line will be added after
#' that value.
#' @param restr_columns `character`\cr
#' If not NULL, columns not defined in `restr_columns` will be blanked out.
#' @param colgroup The name of the column group variable that is used as source
#' for denominator calculation.\cr
#' Required to be specified when `denom = "N_colgroup"`.
#'
#' @param addstr2levs string, if not NULL will be appended to the rowlabel for that level,
#' eg to add ",n (percent)" at the end of the rowlabels
#'
#' @examples
#' library(dplyr)
#'
#' adsl <- ex_adsl |> select("USUBJID", "SEX", "ARM")
#' adae <- ex_adae |> select("USUBJID", "AEBODSYS", "AEDECOD")
#' adae[["TRTEMFL"]] <- "Y"
#'
#' trtvar <- "ARM"
#' ctrl_grp <- "B: Placebo"
#' adsl$colspan_trt <- factor(ifelse(adsl[[trtvar]] == ctrl_grp, " ", "Active Study Agent"),
#'   levels = c("Active Study Agent", " ")
#' )
#'
#' adsl$rrisk_header <- "Risk Difference (%) (95% CI)"
#' adsl$rrisk_label <- paste(adsl[[trtvar]], paste("vs", ctrl_grp))
#'
#' adae <- adae |> left_join(adsl)
#'
#' colspan_trt_map <- create_colspan_map(adsl,
#'   non_active_grp = "B: Placebo",
#'   non_active_grp_span_lbl = " ",
#'   active_grp_span_lbl = "Active Study Agent",
#'   colspan_var = "colspan_trt",
#'   trt_var = trtvar
#' )
#'
#' ref_path <- c("colspan_trt", " ", trtvar, ctrl_grp)
#'
#' lyt <- basic_table(show_colcounts = TRUE) |>
#'   split_cols_by("colspan_trt", split_fun = trim_levels_to_map(map = colspan_trt_map)) |>
#'   split_cols_by(trtvar) |>
#'   split_cols_by("rrisk_header", nested = FALSE) |>
#'   split_cols_by(trtvar, labels_var = "rrisk_label", split_fun = remove_split_levels(ctrl_grp))
#'
#' lyt1 <- lyt |>
#'   analyze("TRTEMFL",
#'     show_labels = "hidden",
#'     afun = a_freq_j,
#'     extra_args = list(
#'       method = "wald",
#'       .stats = c("count_unique_denom_fraction"),
#'       ref_path = ref_path
#'     )
#'   )
#'
#' result1 <- build_table(lyt1, adae, alt_counts_df = adsl)
#'
#' result1
#'
#' x_drug_x <- list(length(unique(subset(adae, adae[[trtvar]] == "A: Drug X")[["USUBJID"]])))
#' N_x_drug_x <- length(unique(subset(adsl, adsl[[trtvar]] == "A: Drug X")[["USUBJID"]]))
#' y_placebo <- list(length(unique(subset(adae, adae[[trtvar]] == ctrl_grp)[["USUBJID"]])))
#' N_y_placebo <- length(unique(subset(adsl, adsl[[trtvar]] == ctrl_grp)[["USUBJID"]]))
#'
#' tern::stat_propdiff_ci(
#'   x = x_drug_x,
#'   N_x = N_x_drug_x,
#'   y = y_placebo,
#'   N_y = N_y_placebo
#' )
#'
#' x_combo <- list(length(unique(subset(adae, adae[[trtvar]] == "C: Combination")[["USUBJID"]])))
#' N_x_combo <- length(unique(subset(adsl, adsl[[trtvar]] == "C: Combination")[["USUBJID"]]))
#'
#' tern::stat_propdiff_ci(
#'   x = x_combo,
#'   N_x = N_x_combo,
#'   y = y_placebo,
#'   N_y = N_y_placebo
#' )
#'
#'
#' extra_args_rr <- list(
#'   denom = "n_altdf",
#'   denom_by = "SEX",
#'   riskdiff = FALSE,
#'   .stats = c("count_unique")
#' )
#'
#' extra_args_rr2 <- list(
#'   denom = "n_altdf",
#'   denom_by = "SEX",
#'   riskdiff = TRUE,
#'   ref_path = ref_path,
#'   method = "wald",
#'   .stats = c("count_unique_denom_fraction"),
#'   na_str = rep("NA", 3)
#' )
#'
#' lyt2 <- basic_table(
#'   top_level_section_div = " ",
#'   colcount_format = "N=xx"
#' ) |>
#'   split_cols_by("colspan_trt", split_fun = trim_levels_to_map(map = colspan_trt_map)) |>
#'   split_cols_by(trtvar, show_colcounts = TRUE) |>
#'   split_cols_by("rrisk_header", nested = FALSE) |>
#'   split_cols_by(trtvar,
#'     labels_var = "rrisk_label", split_fun = remove_split_levels("B: Placebo"),
#'     show_colcounts = FALSE
#'   ) |>
#'   split_rows_by("SEX", split_fun = drop_split_levels) |>
#'   summarize_row_groups("SEX",
#'     cfun = a_freq_j,
#'     extra_args = append(extra_args_rr, list(label_fstr = "Gender: %s"))
#'   ) |>
#'   split_rows_by("TRTEMFL",
#'     split_fun = keep_split_levels("Y"),
#'     indent_mod = -1L,
#'     section_div = c(" ")
#'   ) |>
#'   summarize_row_groups("TRTEMFL",
#'     cfun = a_freq_j,
#'     extra_args = append(extra_args_rr2, list(
#'       label =
#'         "Subjects with >=1 AE", extrablankline = TRUE
#'     ))
#'   ) |>
#'   split_rows_by("AEBODSYS",
#'     split_label = "System Organ Class",
#'     split_fun = trim_levels_in_group("AEDECOD"),
#'     label_pos = "topleft",
#'     section_div = c(" "),
#'     nested = TRUE
#'   ) |>
#'   summarize_row_groups("AEBODSYS",
#'     cfun = a_freq_j,
#'     extra_args = extra_args_rr2
#'   ) |>
#'   analyze("AEDECOD",
#'     afun = a_freq_j,
#'     extra_args = extra_args_rr2
#'   )
#'
#' result2 <- build_table(lyt2, adae, alt_counts_df = adsl)
#'
#' @return
#' * `a_freq_j`: returns a list of requested statistics with formatted `rtables::CellValue()`.\cr
#' Within the relative risk difference columns, the following stats are blanked out:
#' \itemize{
#' \item any of the n-statistics (n_df, n_altdf, n_parentdf, n_rowdf, denom)
#' \item count
#' \item count_unique
#' }
#' For the others (count_unique_fraction, count_unique_denom_fraction),
#' the statistic is replaced by the relative risk difference + confidence interval.
#' @export
#'
a_freq_j <- function(
    df,
    labelstr = NULL,
    .var = NA,
    val = NULL,
    drop_levels = FALSE,
    excl_levels = NULL,
    new_levels = NULL,
    new_levels_after = FALSE,
    addstr2levs = NULL,
    .df_row,
    .spl_context,
    .N_col,
    id = "USUBJID",
    denom = c("N_col", "n_df", "n_altdf", "N_colgroup", "n_rowdf", "n_parentdf"),
    riskdiff = TRUE,
    ref_path = NULL,
    variables = list(strata = NULL),
    conf_level = 0.95,
    method = c(
      "wald",
      "waldcc",
      "cmh",
      "ha",
      "newcombe",
      "newcombecc",
      "strat_newcombe",
      "strat_newcombecc"
    ),
    weights_method = "cmh",
    label = NULL,
    label_fstr = NULL,
    label_map = NULL,
    .alt_df_full = NULL,
    denom_by = NULL,
    .stats = c("count_unique_denom_fraction"),
    .formats = NULL,
    .indent_mods = NULL,
    na_str = rep("NA", 3),
    .labels_n = NULL,
    extrablankline = FALSE,
    extrablanklineafter = NULL,
    restr_columns = NULL,
    colgroup = NULL,
    countsource = c("df", "altdf")) {
  denom <- match.arg(denom)
  method <- match.arg(method)

  if (!is.null(labelstr) && is.na(.var)) {
    stop(
      "Please specify var call to summarize_row_groups when using cfun = a_freq_j, i.e.,\n",
      "summarize_row_groups('varname', cfun = a_freq_j)"
    )
  }

  if (denom == "N_colgroup") {
    if (is.null(colgroup)) {
      stop("Colgroup must be specified when denom = N_colgroup.")
    }

    checkmate::assert_character(colgroup, null.ok = FALSE, max.len = 1)

    if (colgroup == tail(.spl_context$cur_col_split[[1]], 1)) {
      stop(
        "N_colgroup cannot be used when colgroup is lowest column split."
      )
    }
  }

  check_alt_df_full(denom, c("n_altdf", "N_colgroup"), .alt_df_full)

  res_dataprep <- h_a_freq_dataprep(
    df = df,
    labelstr = labelstr,
    .var = .var,
    val = val,
    drop_levels = drop_levels,
    excl_levels = excl_levels,
    new_levels = new_levels,
    new_levels_after = new_levels_after,
    addstr2levs = addstr2levs,
    .df_row = .df_row,
    .spl_context = .spl_context,
    .N_col = .N_col,
    id = id,
    denom = denom,
    variables = variables,
    label = label,
    label_fstr = label_fstr,
    label_map = label_map,
    .alt_df_full = .alt_df_full,
    denom_by = denom_by,
    .stats = .stats
  )
  # res_dataprep is list with elements
  # df .df_row val
  # drop_levels excl_levels
  # alt_df parentdf new_denomdf
  # .stats
  # make these elements available in current environment
  df <- res_dataprep$df
  .df_row <- res_dataprep$.df_row
  val <- res_dataprep$val
  drop_levels <- res_dataprep$drop_levels
  excl_levels <- res_dataprep$excl_levels
  alt_df <- res_dataprep$alt_df
  parentdf <- res_dataprep$parentdf
  new_denomdf <- res_dataprep$new_denomdf
  .stats <- .stats

  ## prepare for column based split
  col_expr <- .spl_context$cur_col_expr[[1]]
  ## colid can be used to figure out if we're in the relative risk columns or not
  colid <- .spl_context$cur_col_id[[1]]
  inriskdiffcol <- grepl("difference", tolower(colid), fixed = TRUE)

  if (!is.null(colgroup)) {
    colexpr_substr <- h_colexpr_substr(colgroup, .spl_context$cur_col_expr[[1]])

    if (is.null(colexpr_substr)) {
      stop("\n Problem a_freq_j: incorrect colgroup specification.")
    }

    new_denomdf <- subset(.alt_df_full, eval(parse(text = colexpr_substr)))
    .df_row <- subset(.df_row, eval(parse(text = colexpr_substr)))
  }

  if (!inriskdiffcol) {
    if (denom != "N_colgroup" && !is.null(new_denomdf)) {
      ### for this part : perform column split on denominator dataset
      new_denomdf <- subset(new_denomdf, eval(col_expr))
    }
    if (denom == "N_colgroup") {
      denom <- "n_altdf"
    }

    x_stats <- s_freq_j(
      df,
      .var = .var,
      .df_row = .df_row,
      val = val,
      drop_levels = drop_levels,
      excl_levels = excl_levels,
      alt_df = new_denomdf,
      parent_df = new_denomdf,
      id = id,
      denom = denom,
      .N_col = .N_col,
      countsource = countsource
    )
    ## remove relrisk stat from .stats
    .stats_adj <- .stats[!(.stats %in% "rr_ci_3d")]
  } else {
    if (riskdiff && is.null(ref_path)) {
      stop("argument ref_path cannot be NULL.")
    }
    ### denom N_colgroup should not be used in layout with risk diff columns
    if (denom == "N_colgroup") {
      stop(
        "denom N_colgroup cannot be used in a layout with risk diff columns."
      )
    }
    if (!riskdiff) {
      trt_var <- NULL
      ctrl_grp <- NULL
      cur_trt_grp <- NULL
    }

    if (riskdiff) {
      trt_var_refpath <- h_get_trtvar_refpath(
        ref_path,
        .spl_context,
        df
      )
      # trt_var_refpath is list with elements
      # trt_var trt_var_refspec cur_trt_grp ctrl_grp
      # make these elements available in current environment
      trt_var <- trt_var_refpath$trt_var
      trt_var_refspec <- trt_var_refpath$trt_var_refspec
      cur_trt_grp <- trt_var_refpath$cur_trt_grp
      ctrl_grp <- trt_var_refpath$ctrl_grp

      if (!is.null(colgroup) && trt_var == colgroup) {
        stop(
          "\n Problem: a_freq_j: colgroup and treatment variable from ref_path are the same.
             This is not intented for usage with relative risk columns.
             Either remove risk difference columns from layout, set riskdiff = FALSE, or update colgroup."
        )
      }
    }

    x_stats <- s_rel_risk_val_j(
      df,
      .var = .var,
      .df_row = .df_row,
      val = val,
      drop_levels = drop_levels,
      excl_levels = excl_levels,
      denom_df = new_denomdf,
      id = id,
      riskdiff = riskdiff,
      # treatment/ref group related arguments
      trt_var = trt_var,
      ctrl_grp = ctrl_grp,
      cur_trt_grp = cur_trt_grp,
      # relrisk specific arguments
      variables = variables,
      conf_level = conf_level,
      method = method,
      weights_method = weights_method
    )

    ## this will ensure the following stats will be shown as empty column in relative risk column
    xy <- sapply(
      c(
        "count",
        "count_unique",
        "n_df",
        "n_altdf",
        "n_rowdf",
        "n_parentdf",
        "denom"
      ),
      function(x) {
        stats::setNames(list(x = NULL), x)
      },
      USE.NAMES = TRUE,
      simplify = FALSE
    )
    x_stats <- append(x_stats, xy)

    ## restrict to relrisk stat from .stats
    # when both count_unique_fraction and count_unique_denom_fraction are requested, the rr_ci_3d stat is in here twice
    # this does not seem to introduce a problem, although might not be ideal
    # see further
    .stats_adj <- replace(
      .stats,
      .stats %in%
        c(
          "count_unique_fraction",
          "count_unique_denom_fraction",
          "fraction_count_unique_denom"
        ),
      "rr_ci_3d"
    )
  }

  res_prepinrows <- h_a_freq_prepinrows(
    x_stats,
    .stats_adj,
    .formats,
    labelstr,
    label_fstr,
    label,
    .indent_mods,
    .labels_n,
    na_str
  )
  # res_prepinrows is list with elements
  # x_stats .formats .labels .indent_mods .format_na_strs
  # make these elements available in current environment
  x_stats <- res_prepinrows$x_stats
  .formats <- res_prepinrows$.formats
  .labels <- res_prepinrows$.labels
  .indent_mods <- res_prepinrows$.indent_mods
  .format_na_strs <- res_prepinrows$.format_na_strs

  ### blank out columns not in restr_columns
  # get column label
  colid_lbl <- utils::tail(
    .spl_context$cur_col_split_val[[NROW(.spl_context)]],
    1
  )
  if (!is.null(restr_columns) && !(tolower(colid_lbl) %in% tolower(restr_columns))) {
    x_stats <- lapply(x_stats, FUN = function(x) {
      NULL
    })
  }

  ### final step: turn requested stats into rtables rows
  inrows <- in_rows(
    .list = x_stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods,
    .format_na_strs = .format_na_strs
  )

  ### add extra blankline to the end of inrows --- as long as section_div is not working as expected
  # nolint start
   if (!is.null(inrows) && extrablankline ||
    (!is.null(extrablanklineafter) && length(.labels) == 1 && .labels == extrablanklineafter)) {
    inrows <- add_blank_line_rcells(inrows)
  } # nolint end

  return(inrows)
}

#' Survival time analysis
#'
#' @description `r lifecycle::badge('stable')`
#'
#' The analyze function [kaplan_meier()] creates a layout element to analyze
#' survival time by calculating survival time median, 2 quantiles, each with
#' their confidence intervals, and range (for all, censored, or event patients).
#' The primary analysis variable `vars` is the time variable and the secondary
#' analysis variable `is_event` indicates whether or not an event has occurred.
#'
#' @inheritParams proposal_argument_convention
#' @param control (`list`)\cr parameters for comparison details, specified by using the helper function
#'   [tern::control_surv_time()]. Some possible parameter options are:
#'   * `conf_level` (`proportion`)\cr confidence level of the interval for survival time.
#'   * `conf_type` (`string`)\cr confidence interval type. Options are 'plain' (default), 'log', or 'log-log',
#'     see more in [survival::survfit()]. Note option 'none' is not supported.
#'   * `quantiles` (`numeric`)\cr vector of length two to specify the quantiles of survival time.
#'
#' @note These functions have been forked from the `tern` package file `survival_time.R`.
#'   Here we have the additional features:
#'
#'   * Additional statistics `quantiles_lower`, `quantiles_upper`, `range_with_cens_info` are returned.
#'
#' @examples
#' library(dplyr)
#' library(tern)
#' adtte_f <- tern::tern_ex_adtte |>
#'   filter(PARAMCD == "OS") |>
#'   mutate(
#'     AVAL = tern::day2month(AVAL),
#'     is_event = CNSR == 0
#'   )
#' df <- adtte_f |> filter(ARMCD == "ARM A")
#' @keywords internal
#' @name kaplan_meier
#' @order 1
NULL

#' @describeIn kaplan_meier Statistics function which analyzes survival times using Kaplan-Meier.
#'
#' @return
#' * `s_kaplan_meier()` returns the following statistics:
#'   * `quantiles_lower`: Lower quantile estimate and confidence interval for it.
#'   * `median_ci_3d`: Median survival time and confidence interval for it.
#'   * `quantiles_upper`: Upper quantile estimate and confidence interval for it.
#'   * `range_with_cens_info`: Survival time range with censoring information.
#'
#' @importFrom survival survfit
#'
#' @keywords internal
s_kaplan_meier <- function(df, .var, is_event, control = control_surv_time()) {
  checkmate::assert_string(.var)
  assert_df_with_variables(df, list(tte = .var, is_event = is_event))
  checkmate::assert_numeric(df[[.var]], min.len = 1, any.missing = FALSE)
  checkmate::assert_logical(df[[is_event]], min.len = 1, any.missing = FALSE)

  conf_type <- control$conf_type
  conf_level <- control$conf_level

  checkmate::assert_true(control$quantiles[1] < 0.5)
  checkmate::assert_true(control$quantiles[2] > 0.5)
  quantiles <- c(control$quantiles[1], 0.5, control$quantiles[2])

  formula <- stats::as.formula(paste0("survival::Surv(", .var, ", ", is_event, ") ~ 1"))
  srv_fit <- survival::survfit(formula = formula, data = df, conf.int = conf_level, conf.type = conf_type)
  srv_qt_tab <- stats::quantile(srv_fit, probs = quantiles)
  quantiles_lower <- vapply(srv_qt_tab, "[", 1, FUN.VALUE = numeric(1))
  median_ci <- vapply(srv_qt_tab, "[", 2, FUN.VALUE = numeric(1))
  quantiles_upper <- vapply(srv_qt_tab, "[", 3, FUN.VALUE = numeric(1))
  range_censor <- range_noinf(df[[.var]][!df[[is_event]]], na.rm = TRUE)
  range_event <- range_noinf(df[[.var]][df[[is_event]]], na.rm = TRUE)
  range <- range_noinf(df[[.var]], na.rm = TRUE)
  lower_censored <- as.numeric(range_censor[1] < range_event[1])
  upper_censored <- as.numeric(range_censor[2] > range_event[2])
  range_with_cens_info <- c(range, lower_censored, upper_censored)
  list(
    quantiles_lower = with_label(
      unname(quantiles_lower),
      paste0(round(quantiles[1] * 100), "th percentile (", f_conf_level(conf_level), ")")
    ),
    median_ci_3d = with_label(unname(median_ci), paste0("Median (", f_conf_level(conf_level), ")")),
    quantiles_upper = with_label(
      unname(quantiles_upper),
      paste0(round(quantiles[3] * 100), "th percentile (", f_conf_level(conf_level), ")")
    ),
    range_with_cens_info = range_with_cens_info
  )
}

#' @describeIn kaplan_meier Formatted analysis function which is used as `afun`
#'
#' @return
#' * `a_kaplan_meier()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' a_kaplan_meier(
#'   df,
#'   .var = "AVAL",
#'   is_event = "is_event"
#' )
#'
#' basic_table() |>
#'   split_cols_by(var = "ARMCD") |>
#'   add_colcounts() |>
#'   analyze(
#'     vars = "AVAL",
#'     afun = a_kaplan_meier,
#'     var_labels = "Kaplan-Meier estimate of time to event (months)",
#'     show_labels = "visible",
#'     extra_args = list(
#'       is_event = "is_event",
#'       control = control_surv_time(conf_level = 0.9, conf_type = "log-log")
#'     )
#'   ) |>
#'   build_table(df = adtte_f)
#'
#' @export
#' @order 2
a_kaplan_meier <- function(df, .var, ..., .stats = NULL, .formats = NULL, .labels = NULL, .indent_mods = NULL) {
  # Check for additional parameters to the statistics function
  dots_extra_args <- list(...)

  # Only support default stats, not custom stats
  .stats <- .split_std_from_custom_stats(.stats)$default_stats

  # Apply statistics function
  x_stats <- .apply_stat_functions(
    default_stat_fnc = s_kaplan_meier,
    custom_stat_fnc_list = NULL,
    args_list = c(df = list(df), .var = .var, dots_extra_args)
  )

  # Format according to specifications
  format_stats(
    x_stats,
    method_groups = "kaplan_meier",
    stats_in = .stats,
    formats_in = .formats,
    labels_in = .labels,
    indents_in = .indent_mods
  )
}

#' @title Function factory for xx style formatting
#' @description A function factory to generate formatting functions for value
#' formatting that support the xx style format and control the rounding method
#'
#' @param roundmethod (`string`)\cr choice of rounding methods. Options are:
#'   * `sas`: the underlying rounding method is `tidytlg::roundSAS`, where \cr
#'   roundSAS comes from this Stack Overflow post https://stackoverflow.com/questions/12688717/round-up-from-5
#'   * `iec`: the underlying rounding method is `round`
#'
#' @param na_str_dflt Character to represent NA value
#' @param replace_na_dflt logical(1). Should an `na_string` of "NA" within
#'    the formatters framework be overridden by `na_str_default`? Defaults to
#'    `TRUE`, as a way to have a different default na string behavior from the
#'    base `formatters` framework.
#' @return `format_xx_fct()` format function that can be used in rtables formatting calls
#' @export
#'
#' @family JJCS formats
#' @examples
#' jjcsformat_xx_SAS <- format_xx_fct(roundmethod = "sas")
#' jjcsformat_xx <- jjcsformat_xx_SAS
#' rcell(c(1.453), jjcsformat_xx("xx.xx"))
#' rcell(c(), jjcsformat_xx("xx.xx"))
#' rcell(c(1.453, 2.45638), jjcsformat_xx("xx.xx (xx.xxx)"))
#'
format_xx_fct <- function(roundmethod = c("sas", "iec"), na_str_dflt = "NE",
                          replace_na_dflt = TRUE) {
  roundmethod <- match.arg(roundmethod)

  if (roundmethod == "sas") {
    roundfunc <- tidytlg::roundSAS
  } else {
    roundfunc <- round
  }

  fnct <- function(str, na_str = na_str_dflt) {
    if (grepl("xxx.", str, fixed = TRUE)) {
      stop(
        "Error: jjcs_format_xx: do not use xxx. in input str, replace by xx. instead."
      )
    }
    if (!grepl("xx", str, fixed = TRUE)) {
      stop("Error: jjcs_format_xx: input str must contain xx.")
    }
    positions <- gregexpr(
      pattern = "xx\\.?x*",
      text = str,
      perl = TRUE
    )
    x_positions <- regmatches(x = str, m = positions)[[1]]
    ### str is splitted into pieces as xx. xx xx.xxx
    ### xx is no rounding
    ### xx. rounding to integer
    ### xx.x rounding to 1 decimal, etc

    no_round <- function(x, na_str = na_str_dflt) {
      if (is.na(x)) {
        return(na_str)
      } else {
        return(x)
      }
    }

    roundings <- lapply(X = x_positions, function(x) {
      y <- strsplit(split = "\\.", x = x)[[1]]
      ### "xx.x" will result in c("xx","x")
      ### "xx." will result in "xx"
      ### "xx" will remain "xx"

      if (x == "xx") {
        rounding <- no_round
      } else {
        rounding <- function(x, na_str = na_str_dflt) {
          if (is.na(x)) {
            return(na_str)
          } else {
            format(
              roundfunc(x, digits = ifelse(length(y) > 1, nchar(y[2]), 0)),
              nsmall = ifelse(length(y) > 1, nchar(y[2]), 0)
            )
          }
        }
      }
      return(rounding)
    })
    rtable_format <- function(x, output, na_str = na_str_dflt) {
      if (anyNA(na_str) || (replace_na_dflt && any(na_str == "NA"))) {
        na_inds <- which(is.na(na_str) | (replace_na_dflt & na_str == "NA"))
        na_str[na_inds] <- rep(na_str_dflt, length.out = length(na_str))[na_inds]
      }
      if (length(x) == 0 || isTRUE(all(x == ""))) {
        return(NULL)
      } else if (!length(positions[[1]]) == length(x)) {
        stop(
          "Error: input str in call to jjcs_format_xx must contain same number of xx as the number of stats."
        )
      }

      values <- Map(y = x, fun = roundings, na_str = na_str, function(y, fun, na_str) fun(y, na_str = na_str))
      regmatches(x = str, m = positions)[[1]] <- values
      return(str)
    }
    return(rtable_format)
  }
  return(fnct)
}


jjcsformat_xx_SAS <- format_xx_fct(roundmethod = "sas")
jjcsformat_xx_R <- format_xx_fct(roundmethod = "iec")


### if we ever decide to switch rounding method, we just have to update jjcsformat_xx here

#' @title Formatting of values
#' @name jjcsformat_xx
#' @description jjcs formatting function
#' @param str The formatting that is required specified as a text string, eg "xx.xx"
#' @param na_str character. Na string that will be passed from `formatters` into
#'    our formatting functions.
#' @return a formatting function with `"sas"`-style rounding.
#' @export
jjcsformat_xx <- jjcsformat_xx_SAS

#' @name count_fraction
#' @title Formatting count and fraction values
#'
#' @description
#'
#' Formats a count together with fraction (and/or denominator) with special
#' consideration when count is 0, or fraction is 1.
#' \cr See also: tern::format_count_fraction_fixed_dp()
#'
#' @inheritParams format_xx_fct
#' @param x `numeric`\cr with elements `num` and `fraction` or `num`, `denom` and `fraction`.
#' @param d numeric(1). Number of digits to round fraction to (default=1)
#' @param ... Additional arguments passed to other methods.
#' @return A string in the format `count / denom (ratio percent)`. If `count`
#' is 0, the format is `0`. If fraction is >0.99, the format is
#' `count / denom (>99.9 percent)`
#' @family JJCS formats
#' @rdname count_fraction
#' @export
#' @examples
#' jjcsformat_count_fraction(c(7, 0.7))
#' jjcsformat_count_fraction(c(70000, 0.9999999))
#' jjcsformat_count_fraction(c(70000, 1))
#'
jjcsformat_count_fraction <- function(
    x,
    d = 1,
    roundmethod = c("sas", "iec"),
    ...) {
  roundmethod <- match.arg(roundmethod)
  attr(x, "label") <- NULL
  if (any(is.na(x))) {
    return("-")
  }

  checkmate::assert_vector(x)
  checkmate::assert_integerish(x[1])
  assert_proportion_value(
    x[2],
    include_boundaries = TRUE
  )

  fraction <- x[2]

  if (isTRUE(all.equal(fraction, 1))) fraction <- 1

  if (roundmethod == "sas") {
    fmtpct <- format(tidytlg::roundSAS(fraction * 100, d), nsmall = d)
  } else {
    fmtpct <- format(round(fraction * 100, d), nsmall = d)
  }

  result <- if (x[1] == 0) {
    "0"
  } else if (fraction == 1) {
    ## per conventions still report as 100.0%
    paste0(x[1], " (100.0%)")
  } else if (fmtpct == format(0, nsmall = d)) {
    # else if (100*x[2] < 10**(-d)) {
    ### example pct = 0.09999 ### <0.1% (even if fmtpct == 0.1,
    # but the actual value of pct <0.1)
    paste0(x[1], " (<", 10**(-d), "%)")
  } else if (fmtpct == format(100, nsmall = d)) {
    # else if (100*x[2] > 100-10**(-d)) {
    ### example pct = 99.90001 ### >99.9% (even if fmtpct == 99.9,
    # but the actual value of pct >99.9)
    paste0(x[1], " (>", 100 - 10**(-d), "%)")
  } else {
    paste0(x[1], " (", fmtpct, "%)")
  }
  return(result)
}

#' @title Formatting count, denominator and fraction values
#'
#' @inheritParams count_fraction
#' @param ... Additional arguments passed to other methods.
#' @export
#' @rdname count_denom_fraction
#' @return `x`, formatted into a string with the appropriate
#' format and `d` digits of precision.
#' @examples
#' jjcsformat_count_denom_fraction(c(7, 10, 0.7))
#' jjcsformat_count_denom_fraction(c(70000, 70001, 70000 / 70001))
#' jjcsformat_count_denom_fraction(c(235, 235, 235 / 235))
jjcsformat_count_denom_fraction <- function(
    x,
    d = 1,
    roundmethod = c("sas", "iec"),
    ...) {
  roundmethod <- match.arg(roundmethod)
  attr(x, "label") <- NULL
  if (any(is.na(x))) {
    return("-")
  }
  checkmate::assert_vector(x)
  checkmate::assert_integerish(x[1])
  assert_proportion_value(
    x[3],
    include_boundaries = TRUE
  )

  fraction <- x[3]
  if (x[2] == x[1]) fraction <- 1

  fmt_x12 <- paste0(x[1], "/", x[2])

  if (roundmethod == "sas") {
    fmtpct <- format(tidytlg::roundSAS(fraction * 100, d), nsmall = d)
  } else {
    fmtpct <- format(round(fraction * 100, d), nsmall = d)
  }

  result <- if (x[1] == 0) {
    # "0"
    # same as in general situation
    paste0(fmt_x12, " (", fmtpct, "%)")
  } else if (100 * fraction == 100) {
    paste0(fmt_x12, " (100.0%)")
  } else if (100 * fraction < 10**(-d)) {
    ### example pct = 0.09999 ### <0.1% (even if fmtpct == 0.1, but the actual value of pct <0.1)
    paste0(fmt_x12, " (<", 10**(-d), "%)")
  } else if (100 * fraction > 100 - 10**(-d)) {
    ### example pct = 99.90001 ### >99.9% (even if fmtpct == 99.9, but the actual value of pct >99.9)
    paste0(fmt_x12, " (>", 100 - 10**(-d), "%)")
  } else {
    paste0(fmt_x12, " (", fmtpct, "%)")
  }
  return(result)
}

#' @title Formatting fraction, count and denominator values
#'
#' @details
#' Formats a 3-dimensional value such that percent values
#' near 0 or 100% are formatted as .e.g, `"<0.1%"` and
#' `">99.9%"`, where the cutoff is controlled by `d`, and
#' formatted as `"xx.x% (xx/xx)"` otherwise, with the
#' precision of the percent also controlled by `d`.
#'
#' @inheritParams count_fraction
#' @param ... Additional arguments passed to other methods.
#' @export
#' @rdname fraction_count_denom
#' @return `x` formatted as a string with `d` digits of precision,
#' with special cased values as described in Details above.
#' @examples
#' jjcsformat_fraction_count_denom(c(7, 10, 0.7))
#' jjcsformat_fraction_count_denom(c(70000, 70001, 70000 / 70001))
#' jjcsformat_fraction_count_denom(c(235, 235, 235 / 235))
jjcsformat_fraction_count_denom <- function(
    x,
    d = 1,
    roundmethod = c("sas", "iec"),
    ...) {
  roundmethod <- match.arg(roundmethod)
  attr(x, "label") <- NULL
  if (any(is.na(x))) {
    return("-")
  }
  checkmate::assert_vector(x)
  checkmate::assert_integerish(x[1])
  assert_proportion_value(
    x[3],
    include_boundaries = TRUE
  )

  fraction <- x[3]
  if (x[2] == x[1]) fraction <- 1

  fmt_x12 <- paste0(x[1], "/", x[2])

  if (roundmethod == "sas") {
    fmtpct <- format(tidytlg::roundSAS(fraction * 100, d), nsmall = d)
  } else {
    fmtpct <- format(round(fraction * 100, d), nsmall = d)
  }

  result <- if (x[1] == 0) {
    # "0"
    # same as in general situation
    paste0("(", fmt_x12, ")")
  } else if (100 * fraction == 100) {
    paste0("100.0%", " (", fmt_x12, ")")
  } else if (100 * fraction < 10**(-d)) {
    ### example pct = 0.09999 ### <0.1% (even if fmtpct == 0.1, but the actual value of pct <0.1)
    paste0("<", 10**(-d), "%", " (", fmt_x12, ")")
  } else if (100 * fraction > 100 - 10**(-d)) {
    ### example pct = 99.90001 ### >99.9% (even if fmtpct == 99.9, but the actual value of pct >99.9)
    paste0(">", 100 - 10**(-d), "%", " (", fmt_x12, ")")
  } else {
    paste0(fmtpct, "%", " (", fmt_x12, ")")
  }
  return(result)
}

#' @title Function factory for p-value formatting
#'
#' @description A function factory to generate formatting functions for p-value
#' formatting that support rounding close to the significance level specified
#'
#' @param alpha `number`\cr the significance level to account for during rounding.
#' @return The p-value in the standard format. If `count` is 0, the format is `0`.
#'   If it is smaller than 0.001, then `<0.001`, if it is larger than 0.999, then
#'   `>0.999` is returned. Otherwise, 3 digits are used. In the special case that
#'   rounding from below would make the string equal to the specified `alpha`,
#'   then a higher number of digits is used to be able to still see the difference.
#'   For example, 0.0048 is not rounded to 0.005 but stays at 0.0048 if `alpha = 0.005`
#'   is set.
#'
#' @family JJCS formats
#' @export
#'
#' @examples
#' my_pval_format <- jjcsformat_pval_fct(0.005)
#' my_pval_format(0.2802359)
#' my_pval_format(0.0048)
#' my_pval_format(0.00499)
#' my_pval_format(0.004999999)
#' my_pval_format(0.0051)
#' my_pval_format(0.0009)
#' my_pval_format(0.9991)
#'
jjcsformat_pval_fct <- function(alpha = 0.05) {
  checkmate::assert_number(alpha, lower = 0, upper = 1)

  function(x, ...) {
    checkmate::assert_number(
      x,
      lower = 0,
      upper = 1 + .Machine$double.eps, # Be a bit tolerant here.
      na.ok = TRUE
    )
    if (is.na(x)) {
      "NE"
    } else if (x < 0.001) {
      "<0.001"
    } else if (x > 0.999) {
      ">0.999"
    } else {
      xx_format <- "xx.xxx"
      res <- jjcsformat_xx(xx_format)(x)
      while (as.numeric(res) == alpha && x < alpha) {
        # Increase precision by 1 digit until the result
        # is different from threshold alpha.
        xx_format <- paste0(xx_format, "x")
        res <- jjcsformat_xx(xx_format)(x)
      }
      res
    }
  }
}

#' @title Function factory for range with censoring information formatting
#' @description A function factory to generate formatting functions for range formatting
#'   that includes information about the censoring of survival times.
#'
#' @param str `string`\cr the format specifying the number of digits to be used,
#'   for the range values, e.g. `"xx.xx"`.
#' @return A function that formats a numeric vector with 4 elements:
#'   - minimum
#'   - maximum
#'   - censored minimum? (1 if censored, 0 if event)
#'   - censored maximum? (1 if censored, 0 if event)
#'   The range along with the censoring information is returned as a string
#'   with the specified numeric format as `(min, max)`, and the `+` is appended
#'   to `min` or `max` if these have been censored.
#'
#' @family JJCS formats
#' @export
#'
#' @examples
#' my_range_format <- jjcsformat_range_fct("xx.xx")
#' my_range_format(c(0.35235, 99.2342, 1, 0))
#' my_range_format(c(0.35235, 99.2342, 0, 1))
#' my_range_format(c(0.35235, 99.2342, 0, 0))
#' my_range_format(c(0.35235, 99.2342, 1, 1))
jjcsformat_range_fct <- function(str) {
  format_xx <- jjcsformat_xx(str)

  function(x, ...) {
    checkmate::assert_numeric(
      x,
      len = 4L,
      finite = TRUE,
      any.missing = FALSE
    )
    checkmate::assert_true(all(x[c(3, 4)] %in% c(0, 1)))

    res <- vapply(x[c(1, 2)], format_xx, character(1))
    if (x[3] == 1) res[1] <- paste0(res[1], "+")
    if (x[4] == 1) res[2] <- paste0(res[2], "+")
    paste0("(", res[1], ", ", res[2], ")")
  }
}

#' Tabulation of Least Square Means Results
#'
#' @description `r lifecycle::badge('stable')`
#'
#' These functions can be used to produce tables from LS means, e.g. from [fit_mmrm_j()]
#' or [fit_ancova()].
#'
#' @note These functions have been forked from the `tern.mmrm` package. Additional features
#'   are:
#'
#'   * Additional `ref_path` argument for tern.mmrm::summarize_lsmeans().
#'   * The function is more general in that it also works for LS means results from ANCOVA
#'   * Additional statistic `diff_mean_est_ci` is returned
#'   * P-value sidedness can be chosen
#'
#' @name tabulate_lsmeans
#' @return for `s_lsmeans`, a list containing the same statistics returned by [tern.mmrm::s_mmrm_lsmeans],
#' with the additional `diff_mean_est_ci` three-dimensional statistic. For `a_lsmeans`,
#' a `VertalRowsSection` as returned by [rtables::in_rows].
#' @examples
#' result <- fit_mmrm_j(
#'   vars = list(
#'     response = "FEV1",
#'     covariates = c("RACE", "SEX"),
#'     id = "USUBJID",
#'     arm = "ARMCD",
#'     visit = "AVISIT"
#'   ),
#'   data = mmrm::fev_data,
#'   cor_struct = "unstructured",
#'   weights_emmeans = "equal"
#' )
#'
#' df <- broom::tidy(result)
#'
#' s_lsmeans(df[8, ], .in_ref_col = FALSE)
#' s_lsmeans(df[8, ], .in_ref_col = FALSE, alternative = "greater", show_relative = "increase")
#'
#' dat_adsl <- mmrm::fev_data |>
#'   dplyr::select(USUBJID, ARMCD) |>
#'   unique()
#'
#' basic_table() |>
#'   split_cols_by("ARMCD") |>
#'   add_colcounts() |>
#'   split_rows_by("AVISIT") |>
#'   analyze(
#'     "AVISIT",
#'     afun = a_lsmeans,
#'     show_labels = "hidden",
#'     na_str = tern::default_na_str(),
#'     extra_args = list(
#'       .stats = c(
#'         "n",
#'         "adj_mean_se",
#'         "adj_mean_ci",
#'         "diff_mean_se",
#'         "diff_mean_ci"
#'       ),
#'       .labels = c(
#'         adj_mean_se = "Adj. LS Mean (Std. Error)",
#'         adj_mean_ci = "95% CI",
#'         diff_mean_ci = "95% CI"
#'       ),
#'       .formats = c(adj_mean_se = jjcsformat_xx("xx.x (xx.xx)")),
#'       alternative = "greater",
#'       ref_path = c("ARMCD", result$ref_level)
#'     )
#'   ) |>
#'   build_table(
#'     df = broom::tidy(result),
#'     alt_counts_df = dat_adsl
#'   )
NULL

#' @describeIn tabulate_lsmeans Helper method (for [broom::tidy()]) to prepare a `data.frame` from an
#'   `tern_model` object containing the least-squares means and contrasts.
#' @method tidy tern_model
#' @importFrom generics tidy
#' @export
tidy.tern_model <- function(x, ...) {
  # nolint
  vars <- x$vars
  estimates <- x$lsmeans$estimates
  df <- if (is.null(vars$arm)) {
    nams <- names(estimates)
    to_rename <- match(c("estimate", "se", "df", "lower_cl", "upper_cl"), nams)
    names(estimates)[to_rename] <- paste0(names(estimates)[to_rename], "_est")
    estimates
  } else {
    contrasts <- x$lsmeans$contrasts
    merge(x = estimates, y = contrasts, by = c(vars$visit, vars$arm), suffixes = c("_est", "_contr"), all = TRUE)
  }
  df[[vars$arm]] <- factor(df[[vars$arm]], levels = levels(estimates[[vars$arm]]))
  df[[vars$visit]] <- factor(df[[vars$visit]], levels = levels(estimates[[vars$visit]]))
  df$conf_level <- x$conf_level
  if (!is.null(x$mse)) {
    df$mse <- x$mse[df[[vars$visit]]]
  }
  if (!is.null(x$df)) {
    df$df <- x$df[df[[vars$visit]]]
  }
  df
}

#' @describeIn tabulate_lsmeans Statistics function which is extracting estimates from a tidied least-squares means
#'   data frame.
#' @inheritParams proposal_argument_convention
#' @export
s_lsmeans <- function(df,
                      .in_ref_col,
                      alternative = c("two.sided", "less", "greater"),
                      show_relative = c("reduction", "increase")) {
  alternative <- match.arg(alternative)
  show_relative <- match.arg(show_relative)
  if_not_ref <- function(x) if (.in_ref_col) character() else x
  list(
    n = df$n,
    adj_mean_se = c(df$estimate_est, df$se_est),
    adj_mean_ci = with_label(
      c(df$lower_cl_est, df$upper_cl_est),
      paste0("Adjusted Mean ", f_conf_level(df$conf_level))
    ),
    adj_mean_est_ci = with_label(
      c(df$estimate_est, df$lower_cl_est, df$upper_cl_est),
      paste0("Adjusted Mean (", f_conf_level(df$conf_level), ")")
    ),
    diff_mean_se = if_not_ref(c(df$estimate_contr, df$se_contr)),
    diff_mean_ci = with_label(
      if_not_ref(c(df$lower_cl_contr, df$upper_cl_contr)),
      paste0("Difference in Adjusted Means ", f_conf_level(df$conf_level))
    ),
    diff_mean_est_ci = with_label(
      if_not_ref(c(df$estimate_contr, df$lower_cl_contr, df$upper_cl_contr)),
      paste0("Difference in Adjusted Means (", f_conf_level(df$conf_level), ")")
    ),
    change = switch(show_relative,
      reduction = with_label(if_not_ref(df$relative_reduc), "Relative Reduction (%)"),
      increase = with_label(if_not_ref(-df$relative_reduc), "Relative Increase (%)")
    ),
    p_value = if_not_ref(switch(alternative,
      two.sided = with_label(df$p_value, "2-sided p-value"),
      less = with_label(df$p_value_less, "1-sided p-value (less)"),
      greater = with_label(df$p_value_greater, "1-sided p-value (greater)")
    ))
  )
}

#' @describeIn tabulate_lsmeans Formatted Analysis function to be used as `afun`
#' @export
a_lsmeans <- function(df,
                      ref_path,
                      .spl_context,
                      ...,
                      .stats = NULL,
                      .formats = NULL,
                      .labels = NULL,
                      .indent_mods = NULL) {
  # Check for additional parameters to the statistics function
  dots_extra_args <- list(...)

  # Only support default stats, not custom stats
  .stats <- .split_std_from_custom_stats(.stats)$default_stats

  # Obtain reference column information
  ref <- get_ref_info(ref_path, .spl_context)

  # Apply statistics function
  x_stats <- .apply_stat_functions(
    default_stat_fnc = s_lsmeans,
    custom_stat_fnc_list = NULL,
    args_list = c(df = list(df), .in_ref_col = ref$in_ref_col, dots_extra_args)
  )

  # Format according to specifications
  format_stats(
    x_stats,
    method_groups = "tabulate_lsmeans",
    stats_in = .stats,
    formats_in = .formats,
    labels_in = .labels,
    indents_in = .indent_mods
  )
}

#' @note: This has been forked from the `rbmi` package, mainly to support in addition
#'   the pooling of variance estimates.
rbmi_pool <- function(
    results,
    conf.level = 0.95,
    alternative = c("two.sided", "less", "greater"),
    type = c("percentile", "normal")) {
  rbmi::validate(results)

  alternative <- match.arg(alternative)
  type <- match.arg(type)

  pool_type <- class(results$results)[[1]]
  checkmate::assert_true(identical(pool_type, "rubin"))

  results_transpose <- (utils::getFromNamespace("transpose_results", "rbmi"))(
    results$results,
    (utils::getFromNamespace("get_pool_components", "rbmi"))(pool_type)
  )

  pars <- lapply(
    results_transpose,
    function(x, ...) mod_pool_internal_rubin(x, ...),
    conf.level = conf.level,
    alternative = alternative,
    type = type,
    D = results$method$D
  )

  method <- pool_type

  ret <- list(
    pars = pars,
    conf.level = conf.level,
    alternative = alternative,
    N = length(results$results),
    method = method
  )
  class(ret) <- "pool"
  return(ret)
}

mod_pool_internal_rubin <- function(results, conf.level, alternative, type, D) {
  ests <- results$est
  ses <- results$se
  dfs <- results$df
  alpha <- 1 - conf.level

  # Note: Need to take median here, because in the MMRM case the d.f. will be slightly different for each imputed
  # data set analysis.
  v_com <- stats::median(dfs)

  res_rubin <- (utils::getFromNamespace("rubin_rules", "rbmi"))(ests = ests, ses = ses, v_com = v_com)

  ret <- (utils::getFromNamespace("parametric_ci", "rbmi"))(
    point = res_rubin$est_point,
    se = sqrt(res_rubin$var_t),
    alpha = alpha,
    alternative = alternative,
    qfun = stats::qt,
    pfun = stats::pt,
    df = res_rubin$df
  )
  # Here also return the pooled d.f.:
  ret$df <- res_rubin$df

  return(ret)
}

#' rm_other_facets_fact
#' @param nm character. names of facets to keep. all other facets will be
#' removed
#' @returns a function suitable for use within the `post` portion make_split_fun

rm_other_facets_fact <- function(nm) {
  function(ret, spl, .spl_context, fulldf) {
    keep <- which(names(ret$values) %in% nm)
    stopifnot(length(keep) > 0)
    # values already have subsetting expressions on them
    make_split_result(ret$values[keep], ret$datasplit[keep], ret$labels[keep])
  }
}

#' @name real_add_overall_facet
#'
#' @title Add Overall Facet
#'
#' @description
#' A function to help add an overall facet to your tables
#' @param name character(1). Name/virtual 'value' for the new facet
#' @param label character(1). label for the new facet
#' @note current add_overall_facet is bugged, can use that directly after it's fixed
#' https://github.com/insightsengineering/rtables/issues/768
#' @examples
#'
#' splfun <- make_split_fun(post = list(real_add_overall_facet('Total', 'Total')))
#' @export
#' @returns function usable directly as a split function.
#'
real_add_overall_facet <- function(name, label) {
  function(ret, spl, .spl_context, fulldf) {
    add_to_split_result(
      ret,
      values = name,
      datasplit = stats::setNames(list(fulldf), name),
      labels = stats::setNames(label, name),
      subset_exprs = quote(TRUE)
    )
  }
}

#' @name make_combo_splitfun
#'
#' @title Split Function Helper
#'
#' @description
#' A function which aids the construction for users to create their own split function for combined columns
#' @param nm character(1). Name/virtual 'value' for the new facet
#' @param label character(1). label for the new facet
#' @param levels character or NULL. The levels to combine into the new facet,
#' or NULL, indicating the facet should include all incoming data.
#' @param rm_other_facets logical(1). Should facets other than the newly
#' created one be removed. Defaults to `TRUE`
#' @export
#' @returns function usable directly as a split function.
#' @examples
#' aesevall_spf <- make_combo_splitfun(nm = 'AESEV_ALL', label  = 'Any AE', levels = NULL)
#'
make_combo_splitfun <- function(nm, label = nm, levels = NULL, rm_other_facets = TRUE) {
  if (is.null(levels)) {
    fn <- real_add_overall_facet(name = nm, label = label)
  } else {
    fn <- add_combo_facet(name = nm, label = label, levels = levels)
  }
  if (rm_other_facets) {
    rmfun <- rm_other_facets_fact(nm)
  } else {
    rmfun <- NULL
  }
  make_split_fun(post = c(list(fn), if (rm_other_facets) list(rmfun)))
}

blank_regex <- "^[[:space:]]*$"

combine_nonblank <- function(name, label) {
  function(ret, spl, .spl_context, fulldf) {
    df <- fulldf[!grepl(blank_regex, fulldf[[spl_variable(spl)]]), ]
    add_to_split_result(
      ret,
      values = name,
      datasplit = stats::setNames(list(df), name),
      labels = stats::setNames(label, name)
    )
  }
}

rm_blank_levels <- function(df, spl, ...) {
  var <- spl_variable(spl)
  varvec <- df[[var]]
  oldlevs <- levels(varvec)
  newlevs <- oldlevs[!grepl(blank_regex, oldlevs)]
  df <- df[!grepl(blank_regex, varvec), ]
  df[[var]] <- factor(df[[var]], levels = newlevs)
  df
}


find_torm <- function(spl_ret, torm, torm_regex, keep_matches) {
  if (!is.null(torm)) {
    ans_lgl <- names(spl_ret$datasplit) %in% torm
  } else {
    ans_lgl <- grepl(torm_regex, names(spl_ret$datasplit))
  }
  if (keep_matches) {
    ans_lgl <- !ans_lgl
  }
  which(ans_lgl)
}

.check_rem_cond <- function(cond_str, cond_regex, spl_ctx, pos, type) {
  if (is.null(cond_str) && is.null(cond_regex)) {
    return(TRUE)
  }
  if (!is.null(cond_str) && !is.null(cond_regex)) {
    stop(
      "Got both ",
      paste(paste0(type, c("", "_regex")), collapse = " and "),
      ". Please specify at most one of these."
    )
  }
  ctx_data <- spl_ctx[[type]][pos]
  if (!is.null(cond_str)) {
    any(cond_str %in% ctx_data)
  } else {
    any(grepl(cond_regex, ctx_data))
  }
}

## handle support for NA and negative positions only called once but code is nicer with it factored out here
resolve_ancestor_pos <- function(anc_pos, numrows) {
  if (is.na(anc_pos)) {
    anc_pos <- seq_len(numrows)
  } else if (any(anc_pos < 0)) {
    if (!all(anc_pos < 0)) {
      stop("Got mix of negative and non-negative values for ancestor_pos; ", "this is not supported.")
    }
    ## direct parent is actually NROW(.spl_context) so avoid off-by-1 error with the + 1 here
    anc_pos <- numrows - (anc_pos + 1)
  }
  anc_pos
}

#' @name cond_rm_facets
#' @title Conditional Removal of Facets
#' @param facets character or NULL. Vector of facet names to be removed
#' if condition(s) are met
#' @param facets_regex character(1). Regular expression to identify facet
#' names to be removed if condition(s) are met.
#' @param ancestor_pos numeric(1). Row in spl_context to check the condition
#' within. E.g., 1 represents the first split, 2 represents the second split
#' nested within the first, etc. NA specifies that the conditions
#' should be checked at all split levels. Negative integers indicate position
#' counting back from the current one, e.g., -1 indicates the direct parent
#' (most recent split before this one). Negative and positive/NA positions
#' cannot be mixed.
#' @param split character(1) or NULL. If specified, name of the split
#' at position `ancestor_pos` must be identical to this value for
#' the removal condition to be met.
#' @param split_regex character(1) or NULL. If specified, a regular expression
#' the name of the split at position `ancestor_pos` must match for
#' the removal condition to be met. Cannot be specified at the same time
#' as `split`.
#' @param value character(1) or NULL. If specified, split (facet) value
#' at position `ancestor_pos` must be identical to this value for
#' removal condition to be met.
#' @param value_regex character(1) or NULL. If specified, a regular expression
#' the value of the split at position `ancestor_pos` must match for
#' the removal condition to be met. Cannot be specified at the same time
#' as `value`.
#' @param keep_matches logical(1). Given the specified condition is met,
#' should the facets removed be those matching `facets`/`facets_regex`
#' (`FALSE`, the default), or those *not* matching (`TRUE`).
#'
#' @details Facet removal occurs when the specified condition(s)
#' on the split(s) and or value(s) are met within at least one
#' of the split_context rows indicated by `ancestor_pos`; otherwise
#' the set of facets is returned unchanged.
#'
#' If facet removal is performed, either *all* facets which match `facets` (or
#' `facets_regex` will be removed ( the default `keep_matches == FALSE`
#'  case), or all *non-matching* facets will be removed (when
#'   `keep_matches_only == TRUE`).
#'
#' @note A degenerate table is likely to be returned if all facets
#' are removed.
#'
#' @returns a function suitable for use in `make_split_fun`'s
#' `post` argument which encodes the specified condition.
#' @export
#' @examples
#'
#' rm_a_from_placebo <- cond_rm_facets(
#'   facets = "A",
#'   ancestor_pos = NA,
#'   value_regex = "Placeb",
#'   split = "ARM"
#' )
#' mysplit <- make_split_fun(post = list(rm_a_from_placebo))
#'
#' lyt <- basic_table() |>
#'   split_cols_by("ARM") |>
#'   split_cols_by("STRATA1", split_fun = mysplit) |>
#'   analyze("AGE", mean, format = "xx.x")
#' build_table(lyt, ex_adsl)
#'
#' rm_bc_from_combo <- cond_rm_facets(
#'   facets = c("B", "C"),
#'   ancestor_pos = -1,
#'   value_regex = "Combi"
#' )
#' mysplit2 <- make_split_fun(post = list(rm_bc_from_combo))
#'
#' lyt2 <- basic_table() |>
#'   split_cols_by("ARM") |>
#'   split_cols_by("STRATA1", split_fun = mysplit2) |>
#'   analyze("AGE", mean, format = "xx.x")
#' tbl2 <- build_table(lyt2, ex_adsl)
#' tbl2
#'
#' rm_bc_from_combo2 <- cond_rm_facets(
#'   facets_regex = "^A$",
#'   ancestor_pos = -1,
#'   value_regex = "Combi",
#'   keep_matches = TRUE
#' )
#' mysplit3 <- make_split_fun(post = list(rm_bc_from_combo2))
#'
#' lyt3 <- basic_table() |>
#'   split_cols_by("ARM") |>
#'   split_cols_by("STRATA1", split_fun = mysplit3) |>
#'   analyze("AGE", mean, format = "xx.x")
#' tbl3 <- build_table(lyt3, ex_adsl)
#'
#' stopifnot(identical(cell_values(tbl2), cell_values(tbl3)))
cond_rm_facets <- function(
    facets = NULL,
    facets_regex = NULL,
    ancestor_pos = 1,
    split = NULL,
    split_regex = NULL,
    value = NULL,
    value_regex = NULL,
    keep_matches = FALSE) {
  ## detect errors/miscalling which don't even require us to have the facets
  if (is.null(split) && is.null(split_regex) && is.null(value) && is.null(value_regex)) {
    stop(
      "Must specify condition in terms of at least one of ",
      "split name (split or split_regex) or ",
      "facet value (value or value_regex)."
    )
  }
  if (is.null(facets) && is.null(facets_regex)) {
    stop("Must specify facets either facets or facets_regex, got neither.")
  } else if (!is.null(facets) && !is.null(facets_regex)) {
    stop("Got both facets and facets_regex, this is not supported, please specify only one.")
  }
  function(ret, spl, .spl_context, fulldf) {
    torm_ind <- c()
    ancestor_pos <- resolve_ancestor_pos(ancestor_pos, NROW(.spl_context))
    torm_ind <- find_torm(ret, facets, facets_regex, keep_matches = keep_matches)
    fct_abbrev <- ifelse(is.null(facets_regex), paste(facets, collapse = ", "), facets_regex)
    if (length(torm_ind) == 0) {
      # nocov start
      warning(
        "No facets matched removal criteria [",
        fct_abbrev,
        "] ",
        "in function created with cond_rm_facets.\n",
        "Occured at path: ",
        spl_context_to_disp_path(.spl_context),
        call. = FALSE
      )
      # nocov end
    } else if (length(torm_ind) == length(ret$datasplit)) {
      # nocov start
      warning(
        "All facets matched removal criteria [",
        fct_abbrev,
        "] in function created with cond_rm_facets. ",
        "This will result in a degenerate table (if the condition ",
        "is met) within row splitting and in table-creation failing ",
        "entirely in column splitting.\n",
        "Occured at path: ",
        spl_context_to_disp_path(.spl_context),
        call. = FALSE
      )
      # nocov end
    }
    if (
      .check_rem_cond(split, split_regex, .spl_context, ancestor_pos, type = "split") &&
        .check_rem_cond(value, value_regex, .spl_context, ancestor_pos, type = "value")
    ) {
      ## find_torm handles the keep matching case, so by this point torm_ind is always the ones to remove
      ret <- lapply(ret, function(part) part[-torm_ind])
    }

    ret
  }
}


#' @name rm_levels
#'
#' @title Removal of Levels
#'
#' @description
#' custom function for removing level inside pre step in make_split_fun.
#'
#' @param excl Choose which level(s) to remove
#' @return a function implementing pre-processing split behavior (for use in
#'   `make_split_fun(pre = )` which removes the levels in `excl` from the data
#'   before facets are generated.
#' @export
#'
rm_levels <- function(excl) {
  function(df, spl, ...) {
    var <- spl_variable(spl)
    varvec <- df[[var]]
    oldlevs <- levels(varvec)

    exclevs <- oldlevs %in% excl
    newlevs <- oldlevs[!exclevs]
    df[[var]] <- factor(df[[var]], levels = newlevs)
    df
  }
}

#' Shortcut for Creating Custom Column Splits
#'
#' This is a short cut for a common use of [rtables::make_split_result()] where you need to create
#' custom column splits with different labels but using the same full dataset for each column.
#' It automatically sets up the values, datasplit (using the same full dataset for each column),
#' and subset_exprs (using TRUE for all subsets) parameters for make_split_result().
#'
#' @param ... sequence of named labels for the columns.
#' @param fulldf (`data.frame`)\cr the `fulldf` which will be used for each column.
#' @return The result from [rtables::make_split_result()].
#'
#' @keywords internal
short_split_result <- function(..., fulldf) {
  labels <- c(...)
  values <- stats::setNames(names(labels), names(labels))
  datasplit <- stats::setNames(replicate(n = length(labels), list(fulldf)), names(labels))
  subset_exprs <- replicate(n = length(labels), list(expression(TRUE)))
  make_split_result(values = values, labels = labels, datasplit = datasplit, subset_exprs = subset_exprs)
}

#' Relative risk estimation
#'
#' The analysis function [a_relative_risk()] is used to create a layout element
#' to estimate the relative risk for response within a studied population. Only
#' the CMH method is available currently.
#' The primary analysis variable, `vars`, is a logical variable indicating
#' whether a response has occurred for each record.
#' A stratification variable must be supplied via the
#' `strata` element of the `variables` argument.
#'
#' @details The variance of the CMH relative risk estimate is calculated using
#'   the Greenland and Robins (1985) variance estimation.
#'
#' @param df (`data.frame`)\cr input data frame.
#' @param .var (`string`)\cr name of the response variable.
#' @param ref_path (`character`)\cr path to the reference group.
#' @param .spl_context (`environment`)\cr split context environment.
#' @param ... Additional arguments passed to the statistics function.
#' @param .stats (`character`)\cr statistics to calculate.
#' @param .formats (`list`)\cr formats for the statistics.
#' @param .labels (`list`)\cr labels for the statistics.
#' @param .indent_mods (`list`)\cr indentation modifications for the statistics.
#' @param .ref_group (`data.frame`)\cr reference group data frame.
#' @param .in_ref_col (`logical`)\cr whether the current column is the reference column.
#' @param variables (`list`)\cr list with strata variable names.
#' @param conf_level (`numeric`)\cr confidence level for the confidence interval.
#' @param method (`string`)\cr method to use for relative risk calculation.
#' @param weights_method (`string`)\cr method to use for weights calculation in stratified analysis.
#'
#' @note This has been adapted from the `odds_ratio` functions in the `tern` package.
#'
#' @name relative_risk
NULL

#' @describeIn relative_risk Statistics function estimating the relative risk for response.
#'
#' @return
#' * `s_relative_risk()` returns a named list of elements `rel_risk_ci` and `pval`.
#'
#' @examples
#' nex <- 100
#' dta <- data.frame(
#'   "rsp" = sample(c(TRUE, FALSE), nex, TRUE),
#'   "grp" = sample(c("A", "B"), nex, TRUE),
#'   "f1" = sample(c("a1", "a2"), nex, TRUE),
#'   "f2" = sample(c("x", "y", "z"), nex, TRUE),
#'   stringsAsFactors = TRUE
#' )
#'
#' s_relative_risk(
#'   df = subset(dta, grp == "A"),
#'   .var = "rsp",
#'   .ref_group = subset(dta, grp == "B"),
#'   .in_ref_col = FALSE,
#'   variables = list(strata = c("f1", "f2")),
#'   conf_level = 0.90
#' )
#' @export
s_relative_risk <- function(
    df,
    .var,
    .ref_group,
    .in_ref_col,
    variables = list(strata = NULL),
    conf_level = 0.95,
    method = "cmh",
    weights_method = "cmh") {
  method <- match.arg(method)
  weights_method <- match.arg(weights_method)
  checkmate::assert_character(variables$strata, null.ok = FALSE)
  y <- list(rel_risk_ci = list(), pval = list())

  if (!.in_ref_col) {
    rsp <- c(.ref_group[[.var]], df[[.var]])
    grp <- factor(rep(c("ref", "Not-ref"), c(nrow(.ref_group), nrow(df))), levels = c("ref", "Not-ref"))

    strata_colnames <- variables$strata
    checkmate::assert_character(strata_colnames, null.ok = FALSE)
    strata_vars <- stats::setNames(as.list(strata_colnames), strata_colnames)

    assert_df_with_variables(df, strata_vars)
    assert_df_with_variables(.ref_group, strata_vars)

    # Merging interaction strata for reference group rows data and remaining
    strata <- c(interaction(.ref_group[strata_colnames]), interaction(df[strata_colnames]))
    strata <- as.factor(strata)

    y <- prop_ratio_cmh(rsp, grp, strata, conf_level)[c("rel_risk_ci", "pval")]

    one_group_no_events <- (sum(.ref_group[[.var]]) == 0) || (sum(df[[.var]]) == 0)
    if (one_group_no_events) {
      y$rel_risk_ci <- c(est = NA_real_, lcl = 0, ucl = Inf)
    }
  }

  y$rel_risk_ci <- with_label(
    x = y$rel_risk_ci,
    label = paste0("Relative risk (", f_conf_level(conf_level), ")")
  )
  y$pval <- unname(y$pval)

  y
}

#' @describeIn relative_risk Formatted analysis function which is used as `afun`. Note that the
#'   junco specific `ref_path` and `.spl_context` arguments are used for reference column information.
#'
#' @return
#' * `a_relative_risk()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' nex <- 100
#' dta <- data.frame(
#'   "rsp" = sample(c(TRUE, FALSE), nex, TRUE),
#'   "grp" = sample(c("A", "B"), nex, TRUE),
#'   "f1" = sample(c("a1", "a2"), nex, TRUE),
#'   "f2" = sample(c("x", "y", "z"), nex, TRUE),
#'   stringsAsFactors = TRUE
#' )
#'
#' l <- basic_table() |>
#'   split_cols_by(var = "grp") |>
#'   analyze(
#'     vars = "rsp",
#'     afun = a_relative_risk,
#'     extra_args = list(
#'       conf_level = 0.90,
#'       variables = list(strata = "f1"),
#'       ref_path = c("grp", "B")
#'     )
#'   )
#'
#' build_table(l, df = dta)
#' @export
#' @order 2
a_relative_risk <- function(
    df,
    .var,
    ref_path,
    .spl_context,
    ...,
    .stats = NULL,
    .formats = NULL,
    .labels = NULL,
    .indent_mods = NULL) {
  # Check for additional parameters to the statistics function
  dots_extra_args <- list(...)

  # Only support default stats, not custom stats
  .stats <- .split_std_from_custom_stats(.stats)$default_stats

  # Obtain reference column information
  ref <- get_ref_info(ref_path, .spl_context)

  # Apply statistics function
  x_stats <- .apply_stat_functions(
    default_stat_fnc = s_relative_risk,
    custom_stat_fnc_list = NULL,
    args_list = c(
      df = list(df),
      .var = .var,
      .ref_group = list(ref$ref_group),
      .in_ref_col = ref$in_ref_col,
      dots_extra_args
    )
  )

  # Format according to specifications
  format_stats(
    x_stats,
    method_groups = "relative_risk",
    stats_in = .stats,
    formats_in = .formats,
    labels_in = .labels,
    indents_in = .indent_mods
  )
}


#' @keywords internal
safe_mh_test <- function(...) {
  tryCatch(
    stats::mantelhaen.test(...),
    error = function(e) list(p.value = NA_real_)
  )
}

#' Relative Risk CMH Statistic
#'
#' Calculates the relative risk which is defined as the ratio between the
#' response rates between the experimental treatment group and the control treatment group, adjusted
#' for stratification factors by applying Cochran-Mantel-Haenszel (CMH) weights.
#'
#' @inheritParams proposal_argument_convention
#' @param strata (`factor`)\cr variable with one level per stratum and same length as `rsp`.
#'
#' @return a list with elements `rel_risk_ci` and `pval`.
#' @examples
#'
#' set.seed(2)
#' rsp <- sample(c(TRUE, FALSE), 100, TRUE)
#' grp <- sample(c("Placebo", "Treatment"), 100, TRUE)
#' grp <- factor(grp, levels = c("Placebo", "Treatment"))
#' strata_data <- data.frame(
#'   "f1" = sample(c("a", "b"), 100, TRUE),
#'   "f2" = sample(c("x", "y", "z"), 100, TRUE),
#'   stringsAsFactors = TRUE
#' )
#'
#' prop_ratio_cmh(
#'   rsp = rsp, grp = grp, strata = interaction(strata_data),
#'   conf_level = 0.90
#' )
#'
#' @export
prop_ratio_cmh <- function(rsp, grp, strata, conf_level = 0.95) {
  grp <- as_factor_keep_attributes(grp)
  strata <- as_factor_keep_attributes(strata)
  has_single_stratum <- nlevels(strata) == 1
  check_diff_prop_ci(
    rsp = rsp,
    grp = grp,
    conf_level = conf_level,
    strata = strata
  )

  if (any(tapply(rsp, strata, length) < 5)) {
    warning("Less than 5 observations in some strata.")
  }

  # first dimension: treatment, control - therefore we reverse the levels order 2nd dimension: TRUE, FALSE 3rd
  # dimension: levels of strata rsp as factor rsp to handle edge case of no FALSE (or TRUE) rsp records
  t_tbl <- table(factor(grp, levels = rev(levels(grp))), factor(rsp, levels = c("TRUE", "FALSE")), strata)

  # Index 1 is for treatment, index 2 is for control, e.g. n1 is number of treatment patients per stratum.
  n1resp <- t_tbl[1, 1, levels(strata)]
  n2resp <- t_tbl[2, 1, levels(strata)]

  t1 <- t_tbl[1, 1:2, , drop = TRUE]
  t2 <- t_tbl[2, 1:2, , drop = TRUE]
  if (has_single_stratum) {
    n1 <- sum(t1)
    n2 <- sum(t2)
    checkmate::assert_count(n1)
    checkmate::assert_count(n2)
  } else {
    n1 <- colSums(t1)
    n2 <- colSums(t2)
    checkmate::assert_integerish(n1)
    checkmate::assert_integerish(n2)
    checkmate::assert_true(identical(length(n1), length(n2)))
  }

  # CMH statistic for relative risk, Treatment over Control
  use_stratum <- (n1 > 0) & (n2 > 0)
  n1 <- n1[use_stratum]
  n2 <- n2[use_stratum]
  n1resp <- n1resp[use_stratum]
  n2resp <- n2resp[use_stratum]
  nresp <- n1resp + n2resp
  n <- n1 + n2

  rel_risk_num <- sum(n1resp * n2 / n)
  rel_risk_denom <- sum(n2resp * n1 / n)
  log_rel_risk <- log(rel_risk_num) - log(rel_risk_denom)
  rel_risk <- exp(log_rel_risk)

  var_num <- sum((n1 * n2 * nresp - n1resp * n2resp * n) / n^2)
  var_denom <- rel_risk_num * rel_risk_denom
  var_log_rel_risk <- var_num / var_denom
  se_log_rel_risk <- sqrt(var_log_rel_risk)

  z <- stats::qnorm((1 + conf_level) / 2)
  log_rel_risk_ci <- log_rel_risk + c(-1, +1) * z * se_log_rel_risk
  ci <- exp(log_rel_risk_ci)
  rel_risk_ci <- stats::setNames(c(rel_risk, ci), c("est", "lcl", "ucl"))

  pval <- safe_mh_test(t_tbl, correct = FALSE)$p.value

  list(rel_risk_ci = rel_risk_ci, pval = pval)
}

#' @name safe_prune_table
#'
#' @title Safely Prune Table With Empty Table Message If Needed
#'
#' @inheritParams rtables::prune_table
#'
#' @param empty_msg character(1). The message to place in the table
#' if no rows were left after pruning
#'
#' @param spancols logical(1). Should `empty_msg` be spanned
#' across the table's columns (`TRUE`) or placed in the
#' rows row label (`FALSE`). Defaults to `FALSE` currently.
#'
#' @rdname safe_prune_table
#' @return `tt` pruned based on the arguments, or, if
#' pruning would remove all rows, a TableTree with the
#' same column structure, and one row containing the
#' empty message spanning all columns
#'
#' @export
#' @examples
#' prfun <- function(tt) TRUE
#'
#' lyt <- basic_table() |>
#'   split_cols_by("ARM") |>
#'   split_cols_by("STRATA1") |>
#'   split_rows_by("SEX") |>
#'   analyze("AGE")
#' tbl <- build_table(lyt, ex_adsl)
#'
#' safe_prune_table(tbl, prfun)
safe_prune_table <- function(
    tt,
    prune_func = prune_empty_level,
    stop_depth = NA,
    empty_msg = " - No Data To Display - ",
    spancols = FALSE) {
  ret <- prune_table(tt = tt, prune_func = prune_func, stop_depth = stop_depth, depth = 0)
  if (is.null(ret)) {
    ret <- tt[integer(), , keep_titles = TRUE, keep_topleft = TRUE, keep_footers = TRUE]

    if (spancols) {
      ## this will eventually be the only option...
      ret <- sanitize_table_struct(ret, empty_msg = empty_msg)
    } else {
      tree_children(ret) <- list(rrowl(empty_msg, rep(" ", ncol(tt))))
    }
  }
  ret
}

#' @name count_pruner
#'
#' @title Count Pruner
#'
#' @description
#' This is a pruning constructor function which identifies records to be pruned
#' based on the count (assumed to be the first statistic displayed when a compound
#' statistic (e.g., ## / ## (XX.X percent) is presented).
#'
#' @param  count   count threshold.  Function will keep all records strictly greater
#'                 than this threshold.
#' @param  cols    column path (character or integer (column indices))
#' @param  cat_include     Category to be considered for pruning
#' @param  cat_exclude logical Category to be excluded from pruning
#' @export
#'
#'
#' @examples
#'
#' ADSL <- data.frame(
#'   USUBJID = c(
#'     "XXXXX01", "XXXXX02", "XXXXX03", "XXXXX04", "XXXXX05",
#'     "XXXXX06", "XXXXX07", "XXXXX08", "XXXXX09", "XXXXX10"
#'   ),
#'   TRT01P = factor(
#'     c(
#'       "ARMA", "ARMB", "ARMA", "ARMB", "ARMB",
#'       "Placebo", "Placebo", "Placebo", "ARMA", "ARMB"
#'     )
#'   ),
#'   FASFL = c("Y", "Y", "Y", "Y", "N", "Y", "Y", "Y", "Y", "Y"),
#'   SAFFL = c("N", "N", "N", "N", "N", "N", "N", "N", "N", "N"),
#'   PKFL = c("N", "N", "N", "N", "N", "N", "N", "N", "N", "N")
#' )
#'
#' lyt <- basic_table() |>
#'   split_cols_by("TRT01P") |>
#'   add_overall_col("Total") |>
#'   analyze("FASFL",
#'     var_labels = "Analysis set:",
#'     afun = a_freq_j,
#'     extra_args = list(label = "Full", val = "Y"),
#'     show_labels = "visible"
#'   ) |>
#'   analyze("SAFFL",
#'     var_labels = "Analysis set:",
#'     afun = a_freq_j,
#'     extra_args = list(label = "Safety", val = "Y"),
#'     show_labels = "visible"
#'   ) |>
#'   analyze("PKFL",
#'     var_labels = "Analysis set:",
#'     afun = a_freq_j,
#'     extra_args = list(label = "PK", val = "Y"),
#'     show_labels = "visible"
#'   )
#'
#' result <- build_table(lyt, ADSL)
#'
#' result
#'
#' result <- prune_table(
#'   result,
#'   prune_func = count_pruner(cat_exclude = c("Safety"), cols = "Total")
#' )
#'
#' result
#'
#' @rdname count_pruner
#' @returns  function that can be utilized as pruning function in prune_table
#'
count_pruner <- function(count = 0, cat_include = NULL, cat_exclude = NULL, cols = c("TRT01A")) {
  function(tt) {
    # Do not ever prune the following rows.  a row that should be kept in the table will get the value of FALSE

    if ( # nolint start
      !methods::is(tt, "TableRow") ||
        methods::is(tt, "LabelRow") ||
        obj_label(tt) == " " ||
        (!is.null(cat_include) &&
          !obj_label(tt) %in% cat_include) ||
        (!is.null(cat_exclude) && obj_label(tt) %in% cat_exclude)
    ) { # nolint end
      return(FALSE)
    }

    # Check the remaining rows to see if any meet the specified threshold.

    if (!is.null(cols)) {
      tt <- subset_cols(tt, cols)
    }

    # init return value to FALSE (not remove row)
    remove <- FALSE

    colpaths <- col_paths(tt)
    # identify non relative risk columns as in rrisk columns first element is not a count, but diff percentage with
    # small total columns and count > 0 (eg count = 1, n per group = 10), you can run into count1 = 1, countpbo =
    # 0, diffpct = 10, this row should not be considered though
    cp_nonrelrisk <- vapply(
      colpaths,
      function(pth) {
        !any(grepl("rrisk", tolower(pth)))
      },
      FUN.VALUE = TRUE
    )

    ## only continue if at least one non relative risk column selected
    if (any(cp_nonrelrisk)) {
      # get cell_values of non rel risk columns
      cell_vals <- cell_values(tt)[cp_nonrelrisk]

      len_cell_vals <- lapply(cell_vals, function(x) {
        length(x)
      })

      if (any(len_cell_vals == 0)) {
        stop("column cell values has not appropriate structure (a column with NULL value).")
      }

      # get count (first element)
      counts <- unlist(
        lapply(cell_vals, function(x) {
          x[[1]]
        }),
        use.names = FALSE
      )

      # check that we do have counts only
      checkmate::assert_integerish(counts)

      keep <- counts > count
      # the row should be kept if at least one column has count above threshold
      keep <- any(keep)

      # pruning function should return TRUE if row has to be removed, ie opposite of keep
      remove <- !keep
    }

    return(remove)
  }
}


#' @name bspt_pruner
#'
#' @title Pruning Function for pruning based on a fraction and/or a difference from the control arm
#'
#' @description
#' This is a pruning constructor function which identifies records to be pruned
#' based on the the fraction from the percentages. In addition to just looking at a fraction within an arm
#' this function also allows further flexibility to also prune based on a comparison versus the control arm.
#' @param  fraction   fraction threshold.  Function will keep all records strictly greater
#'                 than this threshold.
#' @param  cols    column path.
#' @param  keeprowtext     Row to be excluded from pruning.
#' @param reg_expr Apply keeprowtext as a regular expression (grepl with fixed = TRUE)
#' @param  control        Control Group
#' @param  diff_from_control  Difference from control threshold.
#' @param  only_more_often    TRUE: Only consider when column pct is more often
#' than control. FALSE: Also select a row where column pct is less often than
#' control and abs(diff) above threshold
#'
#' @export
#'
#'
#' @examples
#' ADSL <- data.frame(
#'   USUBJID = c(
#'     "XXXXX01", "XXXXX02", "XXXXX03", "XXXXX04", "XXXXX05",
#'     "XXXXX06", "XXXXX07", "XXXXX08", "XXXXX09", "XXXXX10"
#'   ),
#'   TRT01P = c(
#'     "ARMA", "ARMB", "ARMA", "ARMB", "ARMB",
#'     "Placebo", "Placebo", "Placebo", "ARMA", "ARMB"
#'   ),
#'   FASFL = c("Y", "Y", "Y", "Y", "N", "Y", "Y", "Y", "Y", "Y"),
#'   SAFFL = c("N", "N", "N", "N", "N", "N", "N", "N", "N", "N"),
#'   PKFL = c("N", "N", "N", "N", "N", "N", "N", "N", "N", "N")
#' )
#'
#' ADSL <- ADSL |>
#'   dplyr::mutate(TRT01P = as.factor(TRT01P)) |>
#'   dplyr::mutate(SAFFL = factor(SAFFL, c("Y", "N"))) |>
#'   dplyr::mutate(PKFL = factor(PKFL, c("Y", "N")))
#'
#' lyt <- basic_table() |>
#'   split_cols_by("TRT01P") |>
#'   add_overall_col("Total") |>
#'   split_rows_by(
#'     "FASFL",
#'     split_fun = drop_and_remove_levels("N"),
#'     child_labels = "hidden"
#'   ) |>
#'   analyze("FASFL",
#'     var_labels = "Analysis set:",
#'     afun = a_freq_j,
#'     show_labels = "visible",
#'     extra_args = list(label = "Full", .stats = "count_unique_fraction")
#'   ) |>
#'   split_rows_by(
#'     "SAFFL",
#'     split_fun = remove_split_levels("N"),
#'     child_labels = "hidden"
#'   ) |>
#'   analyze("SAFFL",
#'     var_labels = "Analysis set:",
#'     afun = a_freq_j,
#'     show_labels = "visible",
#'     extra_args = list(label = "Safety", .stats = "count_unique_fraction")
#'   ) |>
#'   split_rows_by(
#'     "PKFL",
#'     split_fun = remove_split_levels("N"),
#'     child_labels = "hidden"
#'   ) |>
#'   analyze("PKFL",
#'     var_labels = "Analysis set:",
#'     afun = a_freq_j,
#'     show_labels = "visible",
#'     extra_args = list(label = "PK", .stats = "count_unique_fraction")
#'   )
#'
#' result <- build_table(lyt, ADSL)
#'
#' result
#'
#' result <- prune_table(
#'   result,
#'   prune_func = bspt_pruner(
#'     fraction = 0.05,
#'     keeprowtext = "Safety",
#'     cols = c("Total")
#'   )
#' )
#'
#' result
#' @rdname bspt_pruner
#' @returns  function that can be utilized as pruning function in prune_table
#'
bspt_pruner <- function(
    fraction = 0.05,
    keeprowtext = "Analysis set: Safety",
    reg_expr = FALSE,
    control = NULL,
    diff_from_control = NULL,
    only_more_often = TRUE,
    cols = c("TRT01A")) {
  if (is.null(fraction) && is.null(diff_from_control)) {
    stop("At least one of fraction or diff_from_control must be non-NULL.")
  }
  if (!is.null(diff_from_control) && is.null(control)) {
    stop("control must be specified when diff_from_control is not NULL.")
  }

  function(tt) {
    # Do not ever prune the following rows.
    if (!methods::is(tt, "TableRow") || methods::is(tt, "LabelRow")) {
      return(FALSE)
    }

    if (
      reg_expr &&
        any(sapply(keeprowtext, function(x) {
          grepl(x, obj_label(tt), fixed = TRUE)
        }))
    ) {
      return(FALSE)
    }
    if (!reg_expr && obj_label(tt) %in% keeprowtext) {
      return(FALSE)
    }

    # init return value to FALSE (not remove row)
    remove <- FALSE

    # needed later for control column
    tt_all_cols <- tt

    if (!is.null(cols)) {
      tt <- subset_cols(tt, cols)
    }

    colpaths <- col_paths(tt)
    # identify non relative risk columns
    cp_nonrelrisk <- vapply(
      colpaths,
      function(pth) {
        !any(grepl("rrisk", tolower(pth)))
      },
      FUN.VALUE = TRUE
    )

    ## only continue if at least one non relative risk column selected
    if (any(cp_nonrelrisk)) {
      # get rid of rel risk columns (if present)
      cell_vals <- cell_values(tt)[cp_nonrelrisk]

      len_cell_vals <- lapply(cell_vals, function(x) {
        length(x)
      })

      if (any(len_cell_vals < 2)) {
        stop("column cell values has not appropriate structure (less than 2 values in cell).")
      }

      # get count and percentage columns counts in first col pcts in second col
      counts <- unname(lst_slicer(cell_vals, 1, numeric(1)))
      pcts <- unname(lst_slicer(cell_vals, 2, numeric(1)))

      checkmate::check_integerish(counts)
      # similarly check that pcts is indeed a percentage
      if (!all(dplyr::between(pcts, 0, 1))) {
        stop("second value column cell is not a percentage.")
      }

      if (!is.null(fraction)) {
        # avoid problems with FALSE for 0.05 >= 0.05
        check_pcts <- pcts >= fraction |
          vapply(
            pcts,
            function(x) {
              isTRUE(all.equal(x, fraction))
            },
            TRUE
          )
      }

      if (!is.null(diff_from_control)) {
        # get control group from all columns tt (in case the columns selected would not include the control
        # group)
        colpaths0 <- col_paths(subset_cols(tt_all_cols, c(control)))
        cp0_nonrelrisk <- vapply(
          colpaths0,
          function(pth) {
            !any(grepl("rrisk", tolower(pth)))
          },
          FUN.VALUE = TRUE
        )
        colpaths0 <- colpaths0[cp0_nonrelrisk]

        if (length(colpaths0) != 1) {
          stop("control group spec does not result in single column.")
        } else {
          colpaths0 <- colpaths0[[1]]
          # get percent from control group
          pct0 <- cell_values(tt_all_cols, colpath = c(control))[[1]][2]

          if (!dplyr::between(pct0, 0, 1)) {
            stop("second value of control column cell is not a percentage.")
          }
        }

        # check if control group column was also present in main column selector
        colid0 <- which(sapply(colpaths, function(x) {
          all(x == colpaths0)
        }))

        pctdiffs <- pcts - pct0

        # exclude control column from this (would only be relevant if we want to check criteria on all columns
        # rather than any column)
        if (!identical(colid0, integer(0))) pctdiffs <- pctdiffs[-colid0]

        # if not only_more_often be equally strict on worse from control, better as control ie also select rows
        # where column is less often than control pct = 0.07 pct0 = 0.10 (diff = -0.03 )
        if (!only_more_often) pctdiffs <- abs(pctdiffs)

        # avoid problems with FALSE for 0.02 >= 0.02
        check_diffpcts <- pctdiffs >= diff_from_control |
          vapply(
            pctdiffs,
            function(x) {
              isTRUE(all.equal(x, diff_from_control))
            },
            TRUE
          )
      }

      # final step to check if row should be kept in table
      if (!is.null(fraction) && is.null(diff_from_control)) {
        # keep row in table if any column pct above threshold
        keep <- any(check_pcts)
      } else if ((!is.null(fraction) && !is.null(diff_from_control))) {
        # if any column pct above threshold AND any diff pct above threshold note any is used in both
        # expressions separately situation pct = 0.04 pct0 = 0.06 should be kept (fraction = 0.05, abs(diff
        # control) = 0.02) this problem only arises in the non-default situation when only_more_often is FALSE
        # (ie when column pct is less often than control) this would not be the case if we would any(check_pcts
        # & check_diffpcts) as 0.04 is below fraction
        keep <- any(check_pcts) && any(check_diffpcts)
      } else if ((is.null(fraction) && !is.null(diff_from_control))) {
        keep <- any(check_diffpcts)
      }

      # pruning function should return TRUE if row has to be removed, ie opposite of keep
      remove <- !keep
    }

    return(remove)
  }
}

lst_slicer <- function(lst, ind, type) {
  vapply(lst, `[[`, i = ind, type)
}

#' @name remove_rows
#'
#' @title
#' Pruning function to remove specific rows of a table regardless of counts
#' @description
#' This function will remove all rows of a table based on the row text
#' provided by the user.
#' @param  removerowtext  define a text string for which any row with row text will be removed.
#' @param reg_expr Apply removerowtext as a regular expression (grepl with fixed = TRUE)
#' @export
#' @rdname remove_rows
#'
#'
#' @examples
#' ADSL <- data.frame(
#'   USUBJID = c(
#'     "XXXXX01", "XXXXX02", "XXXXX03", "XXXXX04", "XXXXX05",
#'     "XXXXX06", "XXXXX07", "XXXXX08", "XXXXX09", "XXXXX10"
#'   ),
#'   TRT01P = c(
#'     "ARMA", "ARMB", "ARMA", "ARMB", "ARMB", "Placebo",
#'     "Placebo", "Placebo", "ARMA", "ARMB"
#'   ),
#'   Category = c(
#'     "Cat 1", "Cat 2", "Cat 1", "Unknown", "Cat 2",
#'     "Cat 1", "Unknown", "Cat 1", "Cat 2", "Cat 1"
#'   ),
#'   SAFFL = c("N", "N", "N", "N", "N", "N", "N", "N", "N", "N"),
#'   PKFL = c("N", "N", "N", "N", "N", "N", "N", "N", "N", "N")
#' )
#'
#' ADSL <- ADSL |>
#'   dplyr::mutate(TRT01P = as.factor(TRT01P))
#'
#' lyt <- basic_table() |>
#'   split_cols_by("TRT01P") |>
#'   analyze(
#'     "Category",
#'     afun = a_freq_j,
#'     extra_args = list(.stats = "count_unique_fraction")
#'   )
#'
#' result <- build_table(lyt, ADSL)
#'
#' result
#'
#' result <- prune_table(result, prune_func = remove_rows(removerowtext = "Unknown"))
#'
#' result
#' @aliases remove_rows
#' @returns function that can be utilized as pruning function in prune_table
#'
remove_rows <- function(removerowtext = NULL, reg_expr = FALSE) {
  function(tt) {
    if (!methods::is(tt, "TableRow") || methods::is(tt, "LabelRow")) {
      return(FALSE)
    }

    ret <- FALSE
    if (!is.null(removerowtext)) {
      if (!reg_expr) {
        ret <- obj_label(tt) %in% removerowtext
      } else {
        ret <- any(sapply(removerowtext, function(x) {
          grepl(x, obj_label(tt), fixed = TRUE)
        }))
      }
    }

    ret
  }
}

#' @name keep_non_null_rows
#'
#' @title Pruning Function to accommodate removal of completely NULL rows within a table
#'
#' @description
#' Condition function on individual analysis rows. Flag as FALSE when all
#' columns are NULL, as then the row should not be kept. To be utilized as a
#' row_condition in function tern::keep_rows
#'
#' @param  tr      table tree object
#' @export
#'
#'
#' @examples
#'
#' library(dplyr)
#'
#' ADSL <- data.frame(
#'   USUBJID = c(
#'     "XXXXX01", "XXXXX02", "XXXXX03", "XXXXX04", "XXXXX05",
#'     "XXXXX06", "XXXXX07", "XXXXX08", "XXXXX09", "XXXXX10"
#'   ),
#'   TRT01P = c(
#'     "ARMA", "ARMB", "ARMA", "ARMB", "ARMB", "Placebo",
#'     "Placebo", "Placebo", "ARMA", "ARMB"
#'   ),
#'   AGE = c(34, 56, 75, 81, 45, 75, 48, 19, 32, 31),
#'   SAFFL = c("N", "N", "N", "N", "N", "N", "N", "N", "N", "N"),
#'   PKFL = c("N", "N", "N", "N", "N", "N", "N", "N", "N", "N")
#' )
#'
#' ADSL <- ADSL |>
#'   mutate(TRT01P = as.factor(TRT01P))
#'
#' create_blank_line <- function(x) {
#'   list(
#'     "Mean" = rcell(mean(x), format = "xx.x"),
#'     " " = rcell(NULL),
#'     "Max" = rcell(max(x))
#'   )
#' }
#'
#' lyt <- basic_table() |>
#'   split_cols_by("TRT01P") |>
#'   analyze("AGE", afun = create_blank_line)
#'
#' result <- build_table(lyt, ADSL)
#'
#' result
#' result <- prune_table(result, prune_func = tern::keep_rows(keep_non_null_rows))
#'
#' result
#' @rdname keep_non_null_rows
#' @returns a function that can be utilized as a row_condition in the tern::keep_rows function
#'
keep_non_null_rows <- function(tr) {
  if (methods::is(tr, "DataRow")) {
    r_cellvalue_null <- unlist(lapply(cell_values(tr), is.null))
    check <- all(r_cellvalue_null)

    ### if check TRUE (all cell values NULL -- need to return FALSE as need to remove)
    ret <- !check
  } else {
    ret <- TRUE
  }
  ret
}

#' Difference test for two proportions
#'
#' @description `r lifecycle::badge('stable')`
#'
#' The analysis function [a_test_proportion_diff()] can be used to create a layout element to test
#' the difference between two proportions. The primary analysis variable, `vars`, indicates whether a
#' response has occurred for each record. See the `method` parameter for options of methods to use
#' to calculate the p-value. Additionally, a stratification variable can be supplied via the `strata`
#' element of the `variables` argument. The argument `alternative` specifies the direction of the
#' alternative hypothesis.
#'
#' @inheritParams proposal_argument_convention
#' @param method (`string`)\cr one of `chisq`, `cmh`, `fisher`; specifies the test used
#'   to calculate the p-value.
#' @param .stats (`character`)\cr statistics to select for the table.
#'
#' @seealso [h_prop_diff_test]
#'
#' @note These functions have been forked from the `tern` package. Additional features are:
#'
#'   * Additional `alternative` argument for the sidedness of the test.
#'   * Additional `ref_path` argument for flexible reference column path specification.
#'
#' @name prop_diff_test
#' @order 1
NULL

#' @describeIn prop_diff_test Statistics function which tests the difference between two proportions.
#'
#' @return
#' * `s_test_proportion_diff()` returns a named `list` with a single item `pval` with an attribute `label`
#'   describing the method used. The p-value tests the null hypothesis that proportions in two groups are the same.
#'
#' @keywords internal
s_test_proportion_diff <- function(
    df,
    .var,
    .ref_group,
    .in_ref_col,
    variables = list(strata = NULL),
    method = c("chisq", "fisher", "cmh"),
    alternative = c("two.sided", "less", "greater")) {
  method <- match.arg(method)
  alternative <- match.arg(alternative)
  y <- list(pval = list())

  if (!.in_ref_col) {
    assert_df_with_variables(df, list(rsp = .var))
    assert_df_with_variables(.ref_group, list(rsp = .var))
    rsp <- factor(c(.ref_group[[.var]], df[[.var]]), levels = c("TRUE", "FALSE"))
    grp <- factor(rep(c("ref", "Not-ref"), c(nrow(.ref_group), nrow(df))), levels = c("ref", "Not-ref"))

    if (!is.null(variables$strata) || method == "cmh") {
      strata <- variables$strata
      checkmate::assert_false(is.null(strata))
      strata_vars <- stats::setNames(as.list(strata), strata)
      assert_df_with_variables(df, strata_vars)
      assert_df_with_variables(.ref_group, strata_vars)
      strata <- c(interaction(.ref_group[strata]), interaction(df[strata]))
    }

    tbl <- switch(method,
      cmh = table(grp, rsp, strata),
      table(grp, rsp)
    )

    y$pval <- switch(method,
      chisq = prop_chisq(tbl, alternative),
      cmh = prop_cmh(tbl, alternative),
      fisher = prop_fisher(tbl, alternative)
    )
  }

  y$pval <- with_label(y$pval, d_test_proportion_diff_j(method, alternative))
  y
}

#' Description of the difference test between two proportions
#'
#' @description `r lifecycle::badge('stable')`
#'
#' This is an auxiliary function that describes the analysis in `s_test_proportion_diff`.
#'
#' @inheritParams s_test_proportion_diff
#'
#' @return A `string` describing the test from which the p-value is derived.
#'
#' @export
d_test_proportion_diff_j <- function(method, alternative) {
  checkmate::assert_string(method)
  meth_part <- switch(method,
    chisq = "Chi-Squared Test",
    cmh = "Cochran-Mantel-Haenszel Test",
    fisher = "Fisher's Exact Test",
    stop(paste(method, "does not have a description"))
  )
  alt_part <- switch(alternative,
    two.sided = "",
    less = ", 1-sided, direction less",
    greater = ", 1-sided, direction greater"
  )
  paste0("p-value (", meth_part, alt_part, ")")
}

#' @describeIn prop_diff_test Formatted analysis function which is used as `afun`
#'
#' @return
#' * `a_test_proportion_diff()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' dta <- data.frame(
#'   rsp = sample(c(TRUE, FALSE), 100, TRUE),
#'   grp = factor(rep(c("A", "B"), each = 50)),
#'   strata = factor(rep(c("V", "W", "X", "Y", "Z"), each = 20))
#' )
#'
#' l <- basic_table() |>
#'   split_cols_by(var = "grp") |>
#'   analyze(
#'     vars = "rsp",
#'     afun = a_test_proportion_diff,
#'     show_labels = "hidden",
#'     extra_args = list(
#'       method = "cmh",
#'       variables = list(strata = "strata"),
#'       ref_path = c("grp", "B")
#'     )
#'   )
#'
#' build_table(l, df = dta)
#'
#' @export
#' @order 2
a_test_proportion_diff <- function(
    df,
    .var,
    ref_path,
    .spl_context,
    ...,
    .stats = NULL,
    .formats = NULL,
    .labels = NULL,
    .indent_mods = NULL) {
  # Check for additional parameters to the statistics function
  dots_extra_args <- list(...)

  # Only support default stats, not custom stats
  .stats <- .split_std_from_custom_stats(.stats)$default_stats

  # Obtain reference column information
  ref <- get_ref_info(ref_path, .spl_context)

  # Apply statistics function
  x_stats <- .apply_stat_functions(
    default_stat_fnc = s_test_proportion_diff,
    custom_stat_fnc_list = NULL,
    args_list = c(
      df = list(df),
      .var = .var,
      .ref_group = list(ref$ref_group),
      .in_ref_col = ref$in_ref_col,
      dots_extra_args
    )
  )

  # Format according to specifications
  format_stats(
    x_stats,
    method_groups = "test_proportion_diff",
    stats_in = .stats,
    formats_in = .formats,
    labels_in = .labels,
    indents_in = .indent_mods
  )
}

#' Helper functions to test proportion differences
#'
#' Helper functions to implement various tests on the difference between two proportions.
#'
#' @param tbl (`matrix`)\cr matrix with two groups in rows and the binary response (`TRUE`/`FALSE`) in columns.
#'
#' @return A p-value.
#'
#' @seealso [prop_diff_test()] for implementation of these helper functions.
#'
#' @name h_prop_diff_test
NULL

#' @describeIn h_prop_diff_test Performs Chi-Squared test. Internally calls [stats::prop.test()].
#'
#' @keywords internal
prop_chisq <- function(tbl, alternative) {
  checkmate::assert_integer(c(ncol(tbl), nrow(tbl)), lower = 2, upper = 2)
  tbl <- tbl[, c("TRUE", "FALSE")]
  if (any(colSums(tbl) == 0)) {
    return(1)
  }
  stats::prop.test(tbl, correct = FALSE, alternative = alternative)$p.value
}

#' @describeIn h_prop_diff_test Performs stratified Cochran-Mantel-Haenszel test.
#'  Internally calls [stats::mantelhaen.test()].
#'
#' @note strata with less than five observations will result in a warning and
#'  possibly incorrect results; strata with less than two observations are
#'  automatically discarded.
#'
#' @param ary (`array`, 3 dimensions)\cr array with two groups in rows, the binary response
#'   (`TRUE`/`FALSE`) in columns, and the strata in the third dimension.
#'
#' @keywords internal
prop_cmh <- function(ary, alternative) {
  checkmate::assert_array(ary)
  checkmate::assert_integer(c(ncol(ary), nrow(ary)), lower = 2, upper = 2)
  checkmate::assert_integer(length(dim(ary)), lower = 3, upper = 3)
  strata_sizes <- apply(ary, MARGIN = 3, sum)
  if (any(strata_sizes < 5)) {
    warning("<5 data points in some strata. CMH test may be incorrect.")
    ary <- ary[, , strata_sizes > 1]
  }
  stats::mantelhaen.test(ary, correct = FALSE, alternative = alternative)$p.value
}

#' @describeIn h_prop_diff_test Performs the Fisher's exact test. Internally calls [stats::fisher.test()].
#'
#' @keywords internal
prop_fisher <- function(tbl, alternative) {
  checkmate::assert_integer(c(ncol(tbl), nrow(tbl)), lower = 2, upper = 2)
  tbl <- tbl[, c("TRUE", "FALSE")]
  stats::fisher.test(tbl, alternative = alternative)$p.value
}

#' @note This has been forked from [tern::h_ancova()], because the new
#'   `weights_emmeans` option was added here.
h_ancova <- function(
    .var,
    .df_row,
    variables,
    weights_emmeans,
    interaction_item = NULL) {
  checkmate::assert_string(.var)
  checkmate::assert_list(variables)
  checkmate::assert_subset(names(variables), c("arm", "covariates"))

  assert_df_with_variables(
    .df_row,
    list(rsp = .var)
  )
  arm <- variables$arm
  covariates <- variables$covariates
  if (!is.null(covariates) && length(covariates) > 0) {
    var_list <- get_covariates(covariates)
    assert_df_with_variables(
      .df_row,
      var_list
    )
  }
  covariates_part <- paste(covariates, collapse = " + ")
  formula_str <- paste0(.var, " ~ ", arm)
  if (covariates_part != "") {
    formula_str <- paste0(formula_str, "+", covariates_part)
  }
  formula <- stats::as.formula(formula_str)
  specs <- arm
  if (!is.null(interaction_item)) {
    specs <- c(specs, interaction_item)
  }
  lm_fit <- stats::lm(formula = formula, data = .df_row)
  emmeans::emmeans(
    lm_fit,
    specs = specs,
    data = .df_row,
    weights = weights_emmeans
  )
}

#' @title Junco Extended ANCOVA Function
#' @name s_ancova_j
#' @inheritParams tern::s_ancova
#' @param df : need to check on how to inherit params from tern::s_ancova
#' @param weights_emmeans (`string`)\cr argument from [emmeans::emmeans()], `"counterfactual"` by default.
#' @description Extension to tern:::s_ancova, 3 extra statistics are returned
#'   * `lsmean_se`: Marginal mean and estimated SE in the group.
#'   * `lsmean_ci`: Marginal mean and associated confidence interval in the group.
#'   * `lsmean_diffci`: Difference in mean and associated confidence level in one combined statistic.
#'   In addition, the LS mean weights can be specified.
#'   In addition, also a NULL .ref_group can be specified, the lsmean_diff related estimates will be returned as NA.
#' @export
#' @return  returns a named list of 8 statistics (3 extra compared to `tern:::s_ancova()`).
#' @family Inclusion of ANCOVA Functions
#' @examples
#' library(dplyr)
#' library(tern)
#'
#' df <- iris |> filter(Species == "virginica")
#' .df_row <- iris
#' .var <- "Petal.Length"
#' variables <- list(arm = "Species", covariates = "Sepal.Length * Sepal.Width")
#' .ref_group <- iris |> filter(Species == "setosa")
#' conf_level <- 0.95
#' s_ancova_j(df, .var, .df_row, variables, .ref_group, .in_ref_col = FALSE, conf_level)
s_ancova_j <- function(
    df,
    .var,
    .df_row,
    variables,
    .ref_group,
    .in_ref_col,
    conf_level,
    interaction_y = FALSE,
    interaction_item = NULL,
    weights_emmeans = "counterfactual") {
  arm <- variables$arm

  .df_row <- subset(.df_row, !is.na(.df_row[[.var]]))
  df <- subset(df, !is.na(df[[.var]]))
  .ref_group <- subset(.ref_group, !is.na(.ref_group[[.var]]))

  n_obs_trt_lvls <- length(unique(.df_row[[arm]]))

  ### sparse data problems with underlying ancova function
  if (NROW(.df_row) == 0) {
    ret <- NULL
  } else if (NROW(df) == 0 || n_obs_trt_lvls < 2) {
    ## all stats are NA
    ret <- list(
      n = with_label(0, "n"),
      lsmean = with_label(NA, "Adjusted Mean"),
      lsmean_se = with_label(rep(NA, 2), "Adjusted Mean (SE)"),
      lsmean_ci = with_label(
        rep(NA, 3),
        paste0("Adjusted Mean", " (", f_conf_level(conf_level), ")")
      ),
      lsmean_diff = with_label(NA, "Difference in Adjusted Means"),
      lsmean_diff_ci = with_label(
        rep(NA, 2),
        paste("Difference in Adjusted Means", f_conf_level(conf_level))
      ),
      lsmean_diffci = with_label(
        rep(NA, 3),
        paste0(
          "Difference in Adjusted Means",
          " (",
          f_conf_level(conf_level),
          ")"
        )
      ),
      pval = with_label(NA, "p-value")
    )
  } else {
    if (NROW(.ref_group) == 0) {
      .ref_group <- NULL
    } else if (length(levels(.df_row[[arm]])) > n_obs_trt_lvls) {
      # missing arm levels need to be removed from the factor, in order to have the correct estimates,
      # and avoid errors with further code
      .df_row[[arm]] <- droplevels(.df_row[[arm]])
      df[[arm]] <- factor(
        as.character(df[[arm]]),
        levels = levels(.df_row[[arm]])
      )
      .ref_group[[arm]] <- factor(
        as.character(.ref_group[[arm]]),
        levels = levels(.df_row[[arm]])
      )
    }

    emmeans_fit <- h_ancova(
      .var = .var,
      variables = variables,
      .df_row = .df_row,
      weights_emmeans = weights_emmeans,
      interaction_item = interaction_item
    )
    sum_fit <- summary(
      emmeans_fit,
      level = conf_level
    )
    arm <- variables$arm
    sum_level <- as.character(unique(df[[arm]]))

    # Ensure that there is only one element in sum_level.
    checkmate::assert_scalar(sum_level)

    sum_fit_level <- sum_fit[sum_fit[[arm]] == sum_level, ]

    # Get the index of the ref arm
    if (isTRUE(interaction_y)) {
      y <- unlist(df[(df[[interaction_item]] == interaction_y), .var])
      # convert characters selected in interaction_y into the numeric order
      interaction_y <- which(sum_fit_level[[interaction_item]] == interaction_y)
      sum_fit_level <- sum_fit_level[interaction_y, ]
      # if interaction is called, reset the index
      ref_key <- seq(sum_fit[[arm]][unique(.ref_group[[arm]])])
      ref_key <- utils::tail(ref_key, n = 1)
      ref_key <- (interaction_y - 1) * length(unique(.df_row[[arm]])) + ref_key
    } else {
      y <- df[[.var]]
      # Get the index of the ref arm when interaction is not called
      ref_key <- seq(sum_fit[[arm]][unique(.ref_group[[arm]])])
      ref_key <- utils::tail(ref_key, n = 1)
    }

    if (.in_ref_col) {
      ret <- list(
        n = with_label(length(y[!is.na(y)]), "n"),
        lsmean = with_label(sum_fit_level$emmean, "Adjusted Mean"),
        lsmean_se = with_label(
          c(sum_fit_level$emmean, sum_fit_level$SE),
          "Adjusted Mean (SE)"
        ),
        lsmean_ci = with_label(
          c(
            sum_fit_level$emmean,
            sum_fit_level$lower.CL,
            sum_fit_level$upper.CL
          ),
          paste0("Adjusted Mean", " (", f_conf_level(conf_level), ")")
        ),
        lsmean_diff = with_label(
          character(),
          "Difference in Adjusted Means"
        ),
        lsmean_diff_ci = with_label(
          character(),
          paste("Difference in Adjusted Means", f_conf_level(conf_level))
        ),
        lsmean_diffci = with_label(
          character(),
          paste0(
            "Difference in Adjusted Means",
            " (",
            f_conf_level(conf_level),
            ")"
          )
        ),
        pval = with_label(character(), "p-value")
      )
    } else {
      if (!is.null(.ref_group)) {
        # Estimate the differences between the marginal means.
        emmeans_contrasts <- emmeans::contrast(
          emmeans_fit,
          # Compare all arms versus the control arm.
          method = "trt.vs.ctrl",
          # Take the arm factor from .ref_group as the control arm.
          ref = ref_key,
          level = conf_level
        )
        sum_contrasts <- summary(
          emmeans_contrasts,
          # Derive confidence intervals, t-tests and p-values.
          infer = TRUE,
          # Do not adjust the p-values for multiplicity.
          adjust = "none"
        )
        contrast_lvls <- gsub(
          paste0(" - ", .ref_group[[arm]][1], ".*"),
          "",
          sum_contrasts$contrast
        )
        if (!is.null(interaction_item)) {
          sum_contrasts_level <- sum_contrasts[
            grepl(sum_level, contrast_lvls, fixed = TRUE), ,
            drop = FALSE
          ]
        } else {
          sum_contrasts_level <- sum_contrasts[
            (sum_level == contrast_lvls | paste0("(", sum_level, ")") == contrast_lvls), ,
            drop = FALSE
          ]
        }
        if (interaction_y != FALSE) {
          sum_contrasts_level <- sum_contrasts_level[interaction_y, , drop = FALSE]
        }
      } else {
        sum_contrasts_level <- list()
        sum_contrasts_level[["estimate"]] <- NA
        sum_contrasts_level[["lower.CL"]] <- NA
        sum_contrasts_level[["upper.CL"]] <- NA
        sum_contrasts_level[["p.value"]] <- NA
      }
      ret <- list(
        n = with_label(length(y[!is.na(y)]), "n"),
        lsmean = with_label(sum_fit_level$emmean, "Adjusted Mean"),
        lsmean_se = with_label(
          c(sum_fit_level$emmean, sum_fit_level$SE),
          "Adjusted Mean (SE)"
        ),
        lsmean_ci = with_label(
          c(
            sum_fit_level$emmean,
            sum_fit_level$lower.CL,
            sum_fit_level$upper.CL
          ),
          paste0("Adjusted Mean", " (", f_conf_level(conf_level), ")")
        ),
        lsmean_diff = with_label(
          sum_contrasts_level$estimate,
          "Difference in Adjusted Means"
        ),
        lsmean_diff_ci = with_label(
          c(sum_contrasts_level$lower.CL, sum_contrasts_level$upper.CL),
          paste("Difference in Adjusted Means", f_conf_level(conf_level))
        ),
        lsmean_diffci = with_label(
          c(
            sum_contrasts_level$estimate,
            sum_contrasts_level$lower.CL,
            sum_contrasts_level$upper.CL
          ),
          paste0(
            "Difference in Adjusted Means",
            " (",
            f_conf_level(conf_level),
            ")"
          )
        ),
        pval = with_label(sum_contrasts_level$p.value, "p-value")
      )
    }
  }
  return(ret)
}

#' @name s_summarize_ancova_j
#' @title ANCOVA Summary Function
#' @description Combination of tern::s_summary, and ANCOVA based estimates for mean and diff between columns,
#' based on ANCOVA function `s_ancova_j`
#' @inherit s_ancova_j
#' @param ... Additional arguments passed to `s_ancova_j`.
#' @details Combination of tern::s_summary, and ANCOVA based estimates for mean and diff between columns,
#' based on ANCOVA function `s_ancova_j`
#' @return returns the statistics from tern::s_summary(x), appended with a new statistics based upon ANCOVA
#' @export
#' @family Inclusion of ANCOVA Functions
# @seealso s_ancova_j
#' @examples
#' library(dplyr)
#' library(tern)
#'
#' df <- iris |> filter(Species == "virginica")
#' .df_row <- iris
#' .var <- "Petal.Length"
#' variables <- list(arm = "Species", covariates = "Sepal.Length * Sepal.Width")
#' .ref_group <- iris |> filter(Species == "setosa")
#' conf_level <- 0.95
#' s_summarize_ancova_j(
#'   df,
#'   .var = .var,
#'   .df_row = .df_row,
#'   variables = variables,
#'   .ref_group = .ref_group,
#'   .in_ref_col = FALSE,
#'   conf_level = conf_level
#' )
s_summarize_ancova_j <- function(
    df,
    .var,
    .df_row,
    .ref_group,
    .in_ref_col,
    ...) {
  x <- df[[.var]]
  y1 <- s_summary(x)
  y2 <- s_ancova_j(
    df = df,
    .var = .var,
    .ref_group = .ref_group,
    .in_ref_col = .in_ref_col,
    .df_row = .df_row,
    ...
  )
  c(y1, y2)
}

#' @describeIn s_summarize_ancova_j Formatted analysis function which is used as `afun`. Note that the
#'   junco specific `ref_path` and `.spl_context` arguments are used for reference column information.
#'
#' @param ref_path (`character`)\cr path to the reference group.
#' @param .spl_context (`environment`)\cr split context environment.
#' @param ... Additional arguments passed to the statistics function.
#' @param .stats (`character`)\cr statistics to calculate.
#' @param .formats (`list`)\cr formats for the statistics.
#' @param .labels (`list`)\cr labels for the statistics.
#' @param .indent_mods (`list`)\cr indentation modifications for the statistics.
#'
#' @return
#' * `a_summarize_ancova_j()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#'
#' basic_table() |>
#'   split_cols_by("Species") |>
#'   add_colcounts() |>
#'   analyze(
#'     vars = "Petal.Length",
#'     afun = a_summarize_ancova_j,
#'     show_labels = "hidden",
#'     na_str = tern::default_na_str(),
#'     table_names = "unadj",
#'     var_labels = "Unadjusted comparison",
#'     extra_args = list(
#'       variables = list(arm = "Species", covariates = NULL),
#'       conf_level = 0.95,
#'       .labels = c(lsmean = "Mean", lsmean_diff = "Difference in Means"),
#'       ref_path = c("Species", "setosa")
#'     )
#'   ) |>
#'   analyze(
#'     vars = "Petal.Length",
#'     afun = a_summarize_ancova_j,
#'     show_labels = "hidden",
#'     na_str = tern::default_na_str(),
#'     table_names = "adj",
#'     var_labels = "Adjusted comparison (covariates: Sepal.Length and Sepal.Width)",
#'     extra_args = list(
#'       variables = list(
#'         arm = "Species",
#'         covariates = c("Sepal.Length", "Sepal.Width")
#'       ),
#'       conf_level = 0.95,
#'       ref_path = c("Species", "setosa")
#'     )
#'   ) |>
#'   build_table(iris)
#'
#' @export
#' @order 2
a_summarize_ancova_j <- function(
    df,
    .var,
    .df_row,
    ref_path,
    .spl_context,
    ...,
    .stats = NULL,
    .formats = NULL,
    .labels = NULL,
    .indent_mods = NULL) {
  # Check for additional parameters to the statistics function
  dots_extra_args <- list(...)

  # Only support default stats, not custom stats
  .stats <- .split_std_from_custom_stats(.stats)$default_stats

  # Obtain reference column information
  ref <- get_ref_info(ref_path, .spl_context)

  # Apply statistics function
  x_stats <- .apply_stat_functions(
    default_stat_fnc = s_summarize_ancova_j,
    custom_stat_fnc_list = NULL,
    args_list = c(
      df = list(df),
      .var = .var,
      .df_row = list(.df_row),
      .ref_group = list(ref$ref_group),
      .in_ref_col = ref$in_ref_col,
      dots_extra_args
    )
  )

  # Format according to specifications
  format_stats(
    x_stats,
    method_groups = "summarize_ancova_j",
    stats_in = .stats,
    formats_in = .formats,
    labels_in = .labels,
    indents_in = .indent_mods
  )
}

#' Extract Estimates from Multivariate Cox Regression Model Fit Object
#'
#' @param x (`coxreg.multivar`)\cr from [tern::fit_coxreg_multivar()].
#' @export
#' @return A data frame containing Cox regression results with columns for term,
#'   coef_se (coefficient and standard error), p.value, hr (hazard ratio),
#'   hr_ci (confidence interval for hazard ratio), and labels (formatted term labels).
#' @keywords internal
#'
#' @examples
#' anl <- tern::tern_ex_adtte |>
#'   dplyr::mutate(EVENT = 1 - CNSR)
#'
#' variables <- list(
#'   time = "AVAL",
#'   event = "EVENT",
#'   arm = "ARM",
#'   covariates = c("SEX", "AGE")
#' )
#'
#' control <- tern::control_coxreg(
#'   conf_level = 0.9,
#'   ties = "efron"
#' )
#'
#' fit <- tern::fit_coxreg_multivar(
#'   data = anl,
#'   variables = variables,
#'   control = control
#' )
#'
#' h_extract_coxreg_multivar(fit)
h_extract_coxreg_multivar <- function(x) {
  checkmate::assert_class(x, "coxreg.multivar")
  vars <- c(x$vars$arm, x$vars$covariates)

  # Extract the table.
  tab <- broom::tidy(x$mod, exponentiate = FALSE, conf.int = TRUE, conf.level = x$control$conf_level)
  tab$hr <- exp(tab$estimate)
  tab$hr_ci <- Map(lcl = exp(tab$conf.low), ucl = exp(tab$conf.high), f = function(lcl, ucl) c(lcl, ucl))
  tab$coef_se <- Map(coef = tab$estimate, se = tab$std.error, f = function(coef, se) c(coef, se))
  colnms <- c("term", "coef_se", "p.value", "hr", "hr_ci")
  tab <- tab[, colnms, drop = FALSE]

  # Format nicely the term labels.
  var_to_term <- sapply(vars, function(v) which(startsWith(tab$term, v)), USE.NAMES = TRUE, simplify = FALSE)
  tab$labels <- unlist(lapply(vars, function(v) {
    inds <- var_to_term[[v]]
    strip_term <- gsub(paste0("^", v), "", tab$term[inds])
    this_df <- x$dat[v]
    if (is.character(this_df[[1]])) {
      this_df[[1]] <- as.factor(this_df[[1]])
    }
    if (is.factor(this_df[[1]])) {
      ref_level <- levels(this_df[[1]])[1]
      var_name <- if (v == x$vars$arm) "Treatment" else labels_or_names(this_df)
      paste0(var_name, " (", strip_term, " vs. ", ref_level, ")")
    } else {
      labels_or_names(this_df)
    }
  }))
  tab
}

#' First Level Column Split Function for TEFOS03 (mmy) Table Layout
#' @seealso [rtables::make_split_fun()] for details.
#' @keywords internal
tefos03_first_post_fun <- function(ret, spl, fulldf, .spl_context) {
  all_expr <- expression(TRUE)
  short_split_result(model_fit = "Model Fit", hazard_ratio = "Hazard Ratio", fulldf = fulldf)
}
tefos03_first_split_fun <- make_split_fun(post = list(tefos03_first_post_fun))

#' Second Level Column Split Function Factory for TEFOS03 (mmy) Table Layout
#'
#' @inheritParams proposal_argument_convention
#'
#' @return Split function to use in the TEFOS03 (mmy) and related table layouts.
#'
#' @seealso [tefos03_first_post_fun()] for the first level split.
#' @keywords internal
tefos03_second_split_fun_fct <- function(conf_level) {
  post_fun <- function(ret, spl, fulldf, .spl_context) {
    all_expr <- expression(TRUE)
    colset <- .spl_context[nrow(.spl_context), "value"][[1]]
    if (colset == "model_fit") {
      short_split_result(coef_se = "Coeff. (SE)", pval = "p-value", fulldf = fulldf)
    } else {
      short_split_result(hr_est = "Estimate", hr_ci = f_conf_level(conf_level), fulldf = fulldf)
    }
  }
  make_split_fun(post = list(post_fun))
}

#' @importFrom tern fit_coxreg_multivar
#' @keywords internal
memoised_fit_coxreg_multivar <- memoise::memoise(fit_coxreg_multivar)

#' Analysis Function for TEFOS03 and Related Table Layouts
#'
#' @inheritParams proposal_argument_convention
#' @param control (`list`)\cr from [tern::control_coxreg()].
#' @param formats (`list`)\cr including `coef_se`, `hr_est`, `hr_ci` and `pval` formats.
#' @param variables (`list`)\cr see [tern::fit_coxreg_multivar()] for required variable
#'   specifications.
#'
#' @keywords internal
tefos03_afun <- function(df, .var, .spl_context, variables, control, formats) {
  this_col_split <- .spl_context[nrow(.spl_context), "cur_col_split_val"][[1]]
  model_fit <- memoised_fit_coxreg_multivar(data = df, variables = variables, control = control)
  tab <- h_extract_coxreg_multivar(model_fit)
  if (this_col_split[1] == "model_fit") {
    if (this_col_split[2] == "coef_se") {
      in_rows(.list = tab$coef_se, .formats = formats$coef_se, .labels = tab$labels)
    } else {
      in_rows(.list = tab$p.value, .formats = formats$pval, .labels = tab$labels)
    }
  } else {
    if (this_col_split[2] == "hr_est") {
      in_rows(.list = tab$hr, .formats = formats$hr_est, .labels = tab$labels)
    } else {
      in_rows(.list = tab$hr_ci, .formats = formats$hr_ci, .labels = tab$labels)
    }
  }
}

#' Layout Generating Function for TEFOS03 and Related Cox Regression Layouts
#'
#' @inheritParams proposal_argument_convention
#' @inheritParams tefos03_afun
#' @param var (`string`)\cr any variable from the data, because this is not used.
#'
#' @return `lyt` modified to add the desired cox regression table section.
#' @export
#' @examples
#' anl <- tern::tern_ex_adtte |>
#'   dplyr::mutate(EVENT = 1 - CNSR)
#'
#' variables <- list(
#'   time = "AVAL",
#'   event = "EVENT",
#'   arm = "ARM",
#'   covariates = c("SEX", "AGE")
#' )
#'
#' basic_table() |>
#'   summarize_coxreg_multivar(
#'     var = "STUDYID",
#'     variables = variables
#'   ) |>
#'   build_table(df = anl)
summarize_coxreg_multivar <- function(
    lyt,
    var,
    variables,
    control = control_coxreg(),
    formats = list(
      coef_se = jjcsformat_xx("xx.xx (xx.xx)"),
      hr_est = jjcsformat_xx("xx.xx"),
      hr_ci = jjcsformat_xx("(xx.xx, xx.xx)"),
      pval = jjcsformat_pval_fct(0)
    )) {
  second_split_fun <- tefos03_second_split_fun_fct(conf_level = control$conf_level)
  lyt |>
    split_cols_by(var = var, split_fun = tefos03_first_split_fun) |>
    split_cols_by(var = var, split_fun = second_split_fun) |>
    analyze(
      vars = var,
      var_labels = "Model Parameter",
      show_labels = "visible",
      afun = tefos03_afun,
      extra_args = list(variables = variables, control = control, formats = formats)
    )
}

#' @name jjcs_num_formats
#'
#' @title Numeric Formatting Function
#'
#' @description
#' Formatting setter for selected numerical statistics
#'
#' @param d precision of individual values
#' @param cap cap to numerical precision (d > cap -- will use precision as if cap was specified as precision)
#'
#' @return list:
#'  - fmt : named vector with formatting function (jjcsformat_xx) for numerical stats: range, median, mean_sd, sd
#'  - spec : named vector with formatting specifications for numerical stats: range, median, mean_sd, sd
#' @export
#' @examples
#' P1_precision <- jjcs_num_formats(d=0)$fmt
#' jjcs_num_formats(2)$fmt
#' jjcs_num_formats(2)$spec
jjcs_num_formats <- function(d, cap = 4) {
  fmt_xx <- function(d, cap) {
    prec <- NULL
    ### report as is
    if (is.na(d)) {
      return("xx")
    }
    checkmate::assertCount(d)

    ### set a cap to number of decimal precision
    d <- min(d, cap)
    if (d > 0) prec <- paste0(prec, paste0(rep("x", d), collapse = ""))
    if (d >= 0) prec <- paste0("xx.", prec)

    return(prec)
  }

  fmt_rng <- function(d, cap) {
    prec <- fmt_xx(d, cap = cap)
    fmt <- list()
    fmt$spec <- paste0(prec, ", ", prec)
    fmt$fmt <- jjcsformat_xx(fmt$spec)

    return(fmt)
  }

  fmt_median <- function(d, cap) {
    prec <- fmt_xx(d, cap = cap)
    fmt <- list()
    fmt$spec <- prec
    fmt$fmt <- jjcsformat_xx(fmt$spec)
    return(fmt)
  }

  fmt_meansd <- function(d, cap) {
    prec1 <- fmt_xx(d, cap = cap)
    prec2 <- fmt_xx(d + 1, cap = cap + 1)
    fmt <- list()
    fmt$spec <- paste0(prec1, " (", prec2, ")")
    fmt$fmt <- jjcsformat_xx(fmt$spec)
    return(fmt)
  }
  fmt_sd <- function(d, cap) {
    prec <- fmt_xx(d, cap)
    fmt <- list()
    fmt$spec <- prec
    fmt$fmt <- jjcsformat_xx(fmt$spec)
    return(fmt)
  }

  ## apply formats for each of the stats with appropriate d and cap
  xfmt_rng <- fmt_rng(d, cap = cap)
  xfmt_meansd <- fmt_meansd(d + 1, cap = cap + 1)
  xfmt_sd <- fmt_sd(d + 2, cap = cap + 2)
  xfmt_median <- fmt_median(d + 1, cap = cap + 1)

  fmts <- list()

  fmts$fmt <- c(range = xfmt_rng$fmt, mean_sd = xfmt_meansd$fmt, sd = xfmt_sd$fmt, median = xfmt_median$fmt)

  fmts$spec <- c(range = xfmt_rng$spec, mean_sd = xfmt_meansd$spec, sd = xfmt_sd$spec, median = xfmt_median$spec)

  return(fmts)
}

#' Workaround statistics function to time point survival estimate with CI
#'
#' This is a workaround for [tern::s_surv_timepoint()], which adds a statistic
#' containing the time point specific survival estimate together with the
#' confidence interval.
#'
#' @inheritParams proposal_argument_convention
#'
#' @return for `s_event_free`, a list as returned by the [tern::s_surv_timepoint()]
#' with an additional three-dimensional statistic `event_free_ci` which
#' combines the `event_free_rate` and `rate_ci` statistics.
#'
#' For `a_event_free`, analogous to [tern::a_surv_timepoint] but with the additional
#' three-dimensional statistic described above  available via `.stats`.
#'
#' @name event_free
#' @order 1
NULL

#' @describeIn event_free Statistics function which works like [tern::s_surv_timepoint()],
#'   the difference is that it returns the additional statistic `event_free_ci`.
#' @param time_point (`numeric`)\cr time point at which to estimate survival.
#' @param time_unit (`string`)\cr unit of time for the time point.
#' @param percent (`flag`)\cr whether to return in percent or not.
#' @export
#'
#' @examples
#' adtte_f <- tern::tern_ex_adtte |>
#'   dplyr::filter(PARAMCD == "OS") |>
#'   dplyr::mutate(
#'     AVAL = tern::day2month(AVAL),
#'     is_event = CNSR == 0
#'   )
#'
#' s_event_free(
#'   df = adtte_f,
#'   .var = "AVAL",
#'   time_point = 6,
#'   is_event = "is_event",
#'   time_unit = "month"
#' )
#' @order 3
s_event_free <- function(
    df,
    .var,
    time_point,
    time_unit,
    is_event,
    percent = FALSE,
    control = control_surv_timepoint()) {
  checkmate::assert_string(time_unit, min.chars = 1L)
  start <- s_surv_timepoint(
    df = df,
    .var = .var,
    time_point = time_point,
    is_event = is_event,
    control = control
  )
  rates <- c(start$event_free_rate, start$rate_ci)
  if (!percent) rates <- rates / 100
  unit_label <- if (percent) "(%) " else ""
  c(
    start,
    list(
      event_free_ci = with_label(
        rates,
        paste0(
          time_point,
          "-",
          time_unit,
          " event-free rate ",
          unit_label,
          "(",
          obj_label(start$rate_ci),
          ")"
        )
      )
    )
  )
}

#' @describeIn event_free Formatted analysis function which is used as `afun`.
#'
#' @export
#' @order 2
#'
#' @examples
#' adtte_f <- tern::tern_ex_adtte |>
#'   dplyr::filter(PARAMCD == "OS") |>
#'   dplyr::mutate(
#'     AVAL = tern::day2month(AVAL),
#'     is_event = CNSR == 0
#'   )
#'
#' basic_table() |>
#'   split_cols_by(var = "ARMCD") |>
#'   analyze(
#'     vars = "AVAL",
#'     afun = a_event_free,
#'     show_labels = "hidden",
#'     na_str = tern::default_na_str(),
#'     extra_args = list(
#'       time_unit = "week",
#'       time_point = 3,
#'       is_event = "is_event"
#'     )
#'   ) |>
#'   build_table(df = adtte_f)
a_event_free <- function(df, .var, ..., .stats = NULL, .formats = NULL, .labels = NULL, .indent_mods = NULL) {
  # Check for additional parameters to the statistics function
  dots_extra_args <- list(...)

  # Only support default stats, not custom stats
  .stats <- .split_std_from_custom_stats(.stats)$default_stats

  # Apply statistics function
  x_stats <- .apply_stat_functions(
    default_stat_fnc = s_event_free,
    custom_stat_fnc_list = NULL,
    args_list = c(df = list(df), .var = .var, dots_extra_args)
  )

  # Format according to specifications
  format_stats(
    x_stats,
    method_groups = "event_free",
    stats_in = .stats,
    formats_in = .formats,
    labels_in = .labels,
    indents_in = .indent_mods
  )
}

#' Split Function for Compliance Columns (TEFSCNCMP01 e.g.)
#'
#' Here we just split into 3 columns for expected, received and missing visits.
#'
#' @inheritParams proposal_argument_convention
#' @param ret (`list`)\cr result from previous split function steps.
#' @param spl (`split`)\cr split object.
#' @param fulldf (`data.frame`)\cr full data frame.
#' @param vals (`character`)\cr values to use for the split.
#' @param trim (`logical`)\cr whether to trim the values.
#'
#' @note This split function is used in the proportion table TEFSCNCMP01 and similar ones.
#' @seealso [rtables::make_split_fun()] describing the requirements for this kind of
#'   post-processing function.
cmp_post_fun <- function(ret, spl, fulldf, .spl_context) {
  short_split_result(
    expected = "Expected, N",
    received = "Received, n (%)",
    missing = "Missing, n (%)",
    fulldf = fulldf
  )
}
#' @rdname cmp_post_fun
#' @return a split function for use with [rtables::split_rows_by]
#' when creating proportion-based tables with compliance columns.
#' @export
cmp_split_fun <- make_split_fun(post = list(cmp_post_fun))

#' @title Summary Analysis Function for Compliance Columns (TEFSCNCMP01 e.g.)
#' @description A simple statistics function which prepares the numbers with percentages
#'   in the required format, for use in a split content row. The denominator here is
#'   from the expected visits column.
#' @inherit proposal_argument_convention
#' @param variables (`list`)\cr with variable names of logical columns for
#'   `expected`, `received` and `missing` visits.
#' @param formats (`list`)\cr with the `count_percent` format to use for the received
#'   and missing visits columns.
#' @return The [rtables::in_rows()] result with the counts and proportion statistics.
#' @seealso [cmp_post_fun()] for the corresponding split function.
#' @export
cmp_cfun <- function(df, labelstr, .spl_context, variables, formats) {
  this_col_split <- .spl_context[nrow(.spl_context), "cur_col_split_val"][[1]]
  this_afun_col <- this_col_split[2]

  exp <- df[[variables$expected]]
  checkmate::assert_logical(exp)
  n_exp <- sum(exp)
  rec <- df[[variables$received]]
  mis <- df[[variables$missing]]

  if (this_afun_col == "expected") {
    in_rows(n_exp, .labels = labelstr, .formats = "xx.")
  } else if (this_afun_col == "received") {
    checkmate::assert_logical(rec)
    checkmate::assert_true(all(exp[rec]))

    # Check if count_percent format is available
    if (!is.null(formats) && !("count_percent" %in% names(formats))) {
      stop("'count_percent' format must be provided in the formats argument")
    }

    in_rows(sum(rec) * c(1, 1 / n_exp), .formats = formats$count_percent, .labels = labelstr)
  } else if (this_afun_col == "missing") {
    checkmate::assert_logical(mis)
    checkmate::assert_true(all(exp[mis]))
    checkmate::assert_true(!any(rec[mis]))

    # Check if count_percent format is available
    if (!is.null(formats) && !("count_percent" %in% names(formats))) {
      stop("'count_percent' format must be provided in the formats argument")
    }

    in_rows(sum(mis) * c(1, 1 / n_exp), .formats = formats$count_percent, .labels = labelstr)
  }
}

postfun_cog <- function(ret, spl, fulldf, .spl_context,
                        var_names = c("AVAL", "CHG", "BASE"),
                        stats = list(
                          main = c(N = "N", mean = "Mean", SD = "SD", SE = "SE", Med = "Med", Min = "Min", Max = "Max"),
                          base = c(mean = "Mean")
                        )) {
  all_expr <- expression(TRUE)
  colset <- .spl_context[nrow(.spl_context), "value"][[1]]

  # Check if colset is one of the main variables (default: AVAL or CHG)
  if (colset == var_names[1] || colset == var_names[2]) {
    # Create vectors for make_split_result
    values <- stats$main
    labels <- stats$main

    # Create datasplit and subset_exprs lists with the same length as values
    datasplit <- replicate(length(values), fulldf, simplify = FALSE)
    names(datasplit) <- names(values)

    subset_exprs <- replicate(length(values), all_expr, simplify = FALSE)

    ret <- rtables::make_split_result(
      values = values,
      labels = labels,
      datasplit = datasplit,
      subset_exprs = subset_exprs
    )
  } else if (colset == var_names[3]) { # Default: BASE
    # For base variable, use base stats
    values <- stats$base
    labels <- stats$base
    if (names(values)[1] == "mean") {
      # Keep backward compatibility by adding "Base " prefix to the first label if it's "mean"
      labels[1] <- paste("Base", labels[1])
    }

    # Create datasplit and subset_exprs lists with the same length as values
    datasplit <- replicate(length(values), fulldf, simplify = FALSE)
    names(datasplit) <- names(values)

    subset_exprs <- replicate(length(values), all_expr, simplify = FALSE)

    ret <- rtables::make_split_result(
      values = values,
      labels = labels,
      datasplit = datasplit,
      subset_exprs = subset_exprs
    )
  } else {
    stop("something bad happened :(")
  }
  ret
}

postfun_eq5d <- function(ret, spl, fulldf, .spl_context) {
  all_expr <- expression(TRUE)
  colset <- .spl_context[nrow(.spl_context), "value"][[1]]
  if (colset == "AVAL") {
    ret <- make_split_result(
      values = c(N = "N", mean = "Mean", SD = "SD", Med = "Med", Min = "Min", Max = "Max"),
      labels = c(N = "N", mean = "Mean", SD = "SD", Med = "Med", Min = "Min", Max = "Max"),
      datasplit = list(N = fulldf, mean = fulldf, SD = fulldf, Med = fulldf, Min = fulldf, Max = fulldf),
      subset_exprs = list(all_expr, all_expr, all_expr, all_expr, all_expr, all_expr)
    )
  } else if (colset == "BASE") {
    ret <- make_split_result(
      values = c(mean_sd = "mean_sd"),
      labels = c(mean_sd = "Base Mean (SD)"),
      datasplit = list(mean_sd = fulldf),
      subset_exprs = list(all_expr)
    )
  } else if (colset == "CHG") {
    ret <- make_split_result(
      values = c(N = "N", mean = "Mean", SE = "SE", SD = "SD", Med = "Med", Min = "Min", Max = "Max"),
      labels = c(N = "N", mean = "Mean", SE = "SE", SD = "SD", Med = "Med", Min = "Min", Max = "Max"),
      datasplit = list(N = fulldf, mean = fulldf, SE = fulldf, SD = fulldf, Med = fulldf, Min = fulldf, Max = fulldf),
      subset_exprs = list(all_expr, all_expr, all_expr, all_expr, all_expr, all_expr, all_expr)
    )
  } else {
    stop("something bad happened :(")
  }
  ret
}

calc_one_visit <- function(datvec, decimal, statnm, visit, varnm, roundmethod = c("sas", "iec"), exclude_visits,
                           var_names = c("AVAL", "CHG", "BASE")) {
  roundmethod <- match.arg(roundmethod)
  if (is.na(decimal)) {
    decimal <- 0
  }
  if ((varnm == var_names[2] || varnm == var_names[3]) && (visit %in% exclude_visits)) {
    return(NULL)
  }
  if (roundmethod == "sas") {
    switch(statnm,
      N = length(stats::na.omit(datvec)),
      SE = format(
        tidytlg::roundSAS(stats::sd(datvec) / sqrt(length(stats::na.omit(datvec))), decimal + 2),
        nsmall = decimal + 2
      ),
      SD = format(
        tidytlg::roundSAS(stats::sd(datvec), decimal + 2),
        nsmall = decimal +
          2
      ),
      Mean = format(tidytlg::roundSAS(mean(datvec), decimal + 1), nsmall = decimal + 1),
      mean_sd = paste0(
        format(tidytlg::roundSAS(mean(datvec), decimal + 1), nsmall = decimal + 1),
        " (",
        format(
          tidytlg::roundSAS(stats::sd(datvec), decimal + 2),
          nsmall = decimal +
            2
        ),
        ")"
      ),
      Med = format(tidytlg::roundSAS(stats::median(datvec), decimal + 1), nsmall = decimal + 1),
      Min = format(tidytlg::roundSAS(min(datvec), decimal), nsmall = decimal),
      Max = format(tidytlg::roundSAS(max(datvec), decimal), nsmall = decimal)
    )
  } else {
    switch(statnm,
      N = length(stats::na.omit(datvec)),
      SE = format(round(stats::sd(datvec) / sqrt(length(stats::na.omit(datvec))), decimal + 2), nsmall = decimal + 2),
      SD = format(round(stats::sd(datvec), decimal + 2), nsmall = decimal + 2),
      Mean = format(round(mean(datvec), decimal + 1), nsmall = decimal + 1),
      mean_sd = paste0(
        format(round(mean(datvec), decimal + 1), nsmall = decimal + 1),
        " (",
        format(
          round(stats::sd(datvec), decimal + 2),
          nsmall = decimal +
            2
        ),
        ")"
      ),
      Med = format(round(stats::median(datvec), decimal + 1), nsmall = decimal + 1),
      Min = format(round(min(datvec), decimal), nsmall = decimal),
      Max = format(round(max(datvec), decimal), nsmall = decimal)
    )
  }
}

#' @name column_stats
#' @title Statistics within the column space
#' @description
#' A function factory used for obtaining statistics within the columns of your table.
#' Used in change from baseline tables. This takes the visit names as its row labels.
#' @param exclude_visits Vector of visit(s) for which you do not want the statistics displayed
#' in the baseline mean or change from baseline sections of the table.
#' @param var_names Vector of variable names to use instead of the default AVAL, CHG, BASE.
#' The first two elements are treated as main variables with full statistics, and the third element
#' is treated as the base variable. By default, the function expects these specific variable names in your data,
#' but you can customize them to match your dataset's column names.
#' @param stats A list with two components, `main` and `base`, that define the statistics to be calculated
#' for the main variables (default: AVAL, CHG) and the base variable (default: BASE).
#' Default for main variables: c(N = "N", mean = "Mean", SD = "SD", SE = "SE", Med = "Med", Min = "Min", Max = "Max")
#' Default for base variable: c(mean = "Mean")
#' You can customize these statistics by providing your own named vectors in the list. The names are used
#' internally for calculations, and the values are used as display labels in the table.
#'
#' @return an analysis function (for use with [rtables::analyze]) implementing
#'   the specified statistics.
#' @export
column_stats <- function(exclude_visits = c("Baseline (DB)"),
                         var_names = c("AVAL", "CHG", "BASE"),
                         stats = list(
                           main = c(
                             N = "N", mean = "Mean", SD = "SD", SE = "SE",
                             Med = "Med", Min = "Min", Max = "Max"
                           ),
                           base = c(mean = "Mean")
                         )) {
  function(df, .var, .spl_context) {
    allcolsplvals <- .spl_context[nrow(.spl_context), "cur_col_split_val"][[1]]
    statnm <- utils::tail(allcolsplvals, 1)
    varnm <- allcolsplvals[length(allcolsplvals) - 1]
    datvec <- df[[varnm]]
    decimalp <- utils::tail(df$dp, 1)
    datpervis <- split(datvec, df[[.var]]) ## ,var is AVISIT
    in_rows(
      .list = mapply(
        calc_one_visit,
        datvec = datpervis,
        decimal = decimalp,
        visit = names(datpervis),
        MoreArgs = list(
          statnm = statnm,
          varnm = varnm,
          exclude_visits = exclude_visits,
          var_names = var_names
        )
      ),
      .names = names(datpervis)
    )
  }
}

calc_N <- function(datvec, statnm, trt, varnm) {
  if (varnm != "AVAL") {
    return(NULL)
  }
  length(stats::na.omit(datvec))
}

column_N <- function(df, .var, .spl_context, id = "USUBJID", var_names = c("AVAL", "CHG", "BASE")) {
  allcolsplvals <- .spl_context[nrow(.spl_context), "cur_col_split_val"][[1]]
  statnm <- utils::tail(allcolsplvals, 1)
  varnm <- allcolsplvals[length(allcolsplvals) - 1]
  datvec <- unique(df[[id]])
  datpertrt <- split(datvec, df[[.var]]) ## ,var is Treatment
  in_rows(
    .list = mapply(calc_N, datvec = datpertrt, trt = names(datpertrt), MoreArgs = list(statnm = statnm, varnm = varnm)),
    .names = names(datpertrt)
  )
}

#' Simple Content Row Function to Count Rows
#'
#' @return a `VertalRowsSection` object (as returned by [rtables::in_rows()]
#'   containing counts from the data.
#' @keywords internal
c_row_counts <- function(df, labelstr, label_fstr) {
  in_rows(count = nrow(df), .formats = "xx", .labels = sprintf(label_fstr, labelstr))
}

#' Simple Content Row Function to Count Rows from Alternative Data
#' @return a `VertalRowsSection` object (as returned by [rtables::in_rows()]
#'   containing counts from the alt data.
#' @keywords internal
c_row_counts_alt <- function(df, labelstr, label_fstr, .alt_df) {
  c_row_counts(df = .alt_df, labelstr = labelstr, label_fstr = label_fstr)
}

#' Layout Creating Function Adding Row Counts
#'
#' This is a simple wrapper of [rtables::summarize_row_groups()] and the main
#' additional value is that we can choose whether we want to use the alternative
#' (usually ADSL) data set for the counts (default) or use the original data set.
#'
#' @inheritParams proposal_argument_convention
#' @param label_fstr (`string`)\cr a `sprintf` style format string.
#'   It can contain up to one `%s` which takes the current split value and
#'   generates the row label.
#' @param alt_counts (`flag`)\cr whether row counts should be taken from
#'   `alt_counts_df` (`TRUE`) or from `df` (`FALSE`).
#'
#' @return A modified layout where the latest row split now has a row group
#'   summaries (as created by [rtables::summarize_row_groups] for the counts.
#'   for the counts.
#' @export
#' @examples
#' basic_table() |>
#'   split_cols_by("ARM") |>
#'   add_colcounts() |>
#'   split_rows_by("RACE", split_fun = drop_split_levels) |>
#'   summarize_row_counts(label_fstr = "RACE value - %s") |>
#'   analyze("AGE", afun = list_wrap_x(summary), format = "xx.xx") |>
#'   build_table(DM, alt_counts_df = rbind(DM, DM))
#'
summarize_row_counts <- function(lyt, label_fstr = "%s", alt_counts = TRUE) {
  checkmate::assert_flag(alt_counts)

  summarize_row_groups(
    lyt,
    cfun = if (alt_counts) c_row_counts_alt else c_row_counts,
    extra_args = list(label_fstr = label_fstr)
  )
}

#' Unicode Mapping Table
#'
#' A tibble that maps special characters to their Unicode equivalents.
#'
#' @format A tibble with columns 'pattern' and 'unicode', where 'pattern' contains
#' the string to be replaced and 'unicode' contains the Unicode code point in hexadecimal.
#'
#' @export
jj_uc_map <- tibble::tribble(~pattern, ~unicode, ">=", "2265", "<=", "2264")

unicodify <- function(strs, uc_map) {
  out <- strs
  for (i in seq_len(nrow(uc_map))) {
    out <- gsub(uc_map$pattern[i], intToUtf8(strtoi(uc_map$unicode[i], base = 16)), out)
  }
  out
}

#' @name a_freq_combos_j
#'
#' @title Analysis function count and percentage in column design controlled by combosdf
#'
#' @inheritParams proposal_argument_convention
#' @inheritParams a_freq_j
#'
#' @param combosdf The df which provides the mapping of facets to produce cumulative counts for .N_col.
#' @note: These extra records must then be removed from the numerator via the filter_var parameter
#' to avoid double counting of events.
#' @param do_not_filter A vector of facets (i.e., column headers), identifying headers for which
#' no filtering of records should occur.
#' That is, the numerator should contain cumulative counts.  Generally, this will be used for a
#' "Total" column, or something similar.
#' @param filter_var The variable which identifies the records to count in the numerator for any given column.
#' Generally, this will contain text matching the column header for the column associated with a given record.
#' @param flag_var Variable which identifies the occurrence (or first occurrence) of an event.
#' The flag variable is expected to have a value of "Y" identifying that the event should be counted, or NA otherwise.

#' @param denom (`string`)\cr
#' One of \cr
#' \itemize{
#' \item \strong{N_col} Column count, \cr
#' \item \strong{n_df} Number of patients (based upon the main input dataframe `df`),\cr
#' \item \strong{n_altdf} Number of patients from the secondary dataframe (`.alt_df_full`),\cr
#' Note that argument `denom_by` will perform a row-split on the `.alt_df_full` dataframe.\cr
#' It is a requirement that variables specified in `denom_by` are part of the row split specifications. \cr
#' \item \strong{n_rowdf} Number of patients from the current row-level dataframe
#' (`.row_df` from the rtables splitting machinery).\cr
#' \item \strong{n_parentdf} Number of patients from a higher row-level split than the current split.\cr
#' This higher row-level split is specified in the argument `denom_by`.\cr
#' }
#' @param .formats (named 'character' or 'list')\cr
#' formats for the statistics.
#' @return list of requested statistics with formatted `rtables::CellValue()`.\cr
#' @export

a_freq_combos_j <- function(
    df,
    labelstr = NULL,
    .var = NA,
    val = NULL,
    # arguments specific to a_freq_combos_j
    combosdf = NULL,
    do_not_filter = NULL,
    filter_var = NULL,
    flag_var = NULL,
    # arguments specific to a_freq_combos_j till here
    .df_row,
    .spl_context,
    .N_col,
    id = "USUBJID",
    denom = c("N_col", "n_df", "n_altdf", "n_rowdf", "n_parentdf"),
    label = NULL,
    label_fstr = NULL,
    label_map = NULL,
    .alt_df_full = NULL,
    denom_by = NULL,
    .stats = "count_unique_denom_fraction",
    .formats = NULL,
    .labels_n = NULL,
    .indent_mods = NULL,
    na_str = rep("NA", 3)) {
  denom <- match.arg(denom)

  check_alt_df_full(denom, "n_altdf", .alt_df_full)

  if (!is.null(combosdf) && !all(c("valname", "label") %in% names(combosdf))) {
    stop("a_freq_combos_j: combosdf must have variables valname and label.")
  }

  res_dataprep <- h_a_freq_dataprep(
    df = df,
    labelstr = labelstr,
    .var = .var,
    val = val,
    drop_levels = FALSE,
    excl_levels = NULL,
    new_levels = NULL,
    new_levels_after = FALSE,
    .df_row = .df_row,
    .spl_context = .spl_context,
    .N_col = .N_col,
    id = id,
    denom = denom,
    variables = NULL,
    label = label,
    label_fstr = label_fstr,
    label_map = label_map,
    .alt_df_full = .alt_df_full,
    denom_by = denom_by,
    .stats = .stats
  )
  # res_dataprep is list with elements
  # df .df_row val
  # drop_levels excl_levels
  # alt_df parentdf new_denomdf
  # .stats
  # make these elements available in current environment
  df <- res_dataprep$df
  .df_row <- res_dataprep$.df_row
  val <- res_dataprep$val
  drop_levels <- res_dataprep$drop_levels
  excl_levels <- res_dataprep$excl_levels
  alt_df <- res_dataprep$alt_df
  parentdf <- res_dataprep$parentdf
  new_denomdf <- res_dataprep$new_denomdf
  .stats <- .stats

  ## colid can be used to figure out if we're in the combo column or not
  colid <- .spl_context$cur_col_id[[1]]

  ### this is the core code for subsetting to appropriate combo level
  df <- h_subset_combo(
    df = df,
    combosdf = combosdf,
    do_not_filter = do_not_filter,
    filter_var = filter_var,
    flag_var = flag_var,
    colid = colid
  )

  ## the same s-function can be used as in a_freq_j
  x_stats <- s_freq_j(
    df,
    .var = .var,
    .df_row = .df_row,
    val = val,
    alt_df = new_denomdf,
    parent_df = new_denomdf,
    id = id,
    denom = denom,
    .N_col = .N_col,
    countsource = "df"
  )

  .stats_adj <- .stats

  res_prepinrows <- h_a_freq_prepinrows(
    x_stats,
    .stats_adj,
    .formats,
    labelstr,
    label_fstr,
    label,
    .indent_mods,
    .labels_n,
    na_str
  )
  # res_prepinrows is list with elements
  # x_stats .formats .labels .indent_mods .format_na_strs
  # make these elements available in current environment
  x_stats <- res_prepinrows$x_stats
  .formats <- res_prepinrows$.formats
  .labels <- res_prepinrows$.labels
  .indent_mods <- res_prepinrows$.indent_mods
  .format_na_strs <- res_prepinrows$.format_na_strs

  ### final step: turn requested stats into rtables rows
  in_rows(
    .list = x_stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods,
    .format_na_strs = .format_na_strs
  )
}

#' Get default statistical methods and their associated formats, labels, and indent modifiers
#'
#' @description `r lifecycle::badge('experimental')`
#'
#' Utility functions to get valid statistic methods for different method groups
#' (`.stats`) and their associated formats (`.formats`), labels (`.labels`), and indent modifiers
#' (`.indent_mods`). This utility is used across `junco`, but some of its working principles can be
#' seen in [tern::analyze_vars()]. See notes to understand why this is experimental.
#'
#' @param stats (`character`)\cr statistical methods to return defaults for.
#' @param levels_per_stats (named `list` of `character` or `NULL`)\cr named list where the name of each element is a
#'   statistic from `stats` and each element is the levels of a `factor` or `character` variable (or variable name),
#'   each corresponding to a single row, for which the named statistic should be calculated for. If a statistic is only
#'   calculated once (one row), the element can be either `NULL` or the name of the statistic. Each list element will be
#'   flattened such that the names of the list elements returned by the function have the format `statistic.level` (or
#'   just `statistic` for statistics calculated for a single row). Defaults to `NULL`.
#'
#' @details
#' Current choices for `type` are `counts` and `numeric` for [tern::analyze_vars()] and affect `junco_get_stats()`.
#'
#' @note
#' These defaults are experimental because we use the names of functions to retrieve the default
#' statistics. This should be generalized in groups of methods according to more reasonable groupings.
#'
#' These functions have been modified from the `tern` file `utils_default_stats_formats_labels.R`.
#' This file contains `junco` specific wrappers of functions called within the `afun` functions,
#' in order to point to `junco` specific default statistics, formats and labels.
#'
#' @name default_stats_formats_labels
NULL

#' @describeIn default_stats_formats_labels Get statistics available for a given method
#'   group (analyze function). To check available defaults see `junco_default_stats` list.
#'
#' @param method_groups (`character`)\cr indicates the statistical method group (`junco` analyze function)
#'   to retrieve default statistics for. A character vector can be used to specify more than one statistical
#'   method group.
#' @param stats_in (`character`)\cr statistics to retrieve for the selected method group. If custom statistical
#'   functions are used, `stats_in` needs to have them in too.
#' @param custom_stats_in (`character`)\cr custom statistics to add to the default statistics.
#' @param add_pval (`flag`)\cr should `'pval'` (or `'pval_counts'` if `method_groups` contains
#'   `'analyze_vars_counts'`) be added to the statistical methods?
#'
#' @return
#' * `junco_get_stats()` returns a `character` vector of statistical methods.
#'
#' @export
junco_get_stats <- function(
    method_groups = "analyze_vars_numeric",
    stats_in = NULL,
    custom_stats_in = NULL,
    add_pval = FALSE) {
  tern_get_stats(
    method_groups = method_groups,
    stats_in = stats_in,
    custom_stats_in = custom_stats_in,
    add_pval = add_pval,
    tern_defaults = junco_default_stats
  )
}

#' @describeIn default_stats_formats_labels Get formats corresponding to a list of statistics.
#'   To check available defaults see list `junco_default_formats`.
#'
#' @param formats_in (named `vector`)\cr custom formats to use instead of defaults. Can be a character vector with
#'   values from [formatters::list_valid_format_labels()] or custom format functions. Defaults to `NULL` for any rows
#'   with no value is provided.
#'
#' @return
#' * `junco_get_formats_from_stats()` returns a named list of formats as strings or functions.
#'
#' @note Formats in `tern` or `junco` and `rtables` can be functions that take in the table cell value and
#'   return a string. This is well documented in `vignette('custom_appearance', package = 'rtables')`.
#'
#' @export
junco_get_formats_from_stats <- function(stats, formats_in = NULL, levels_per_stats = NULL) {
  tern_get_formats_from_stats(
    stats = stats,
    formats_in = formats_in,
    levels_per_stats = levels_per_stats,
    tern_defaults = junco_default_formats
  )
}

#' @describeIn default_stats_formats_labels Get labels corresponding to a list of statistics.
#'   To check for available defaults see list `junco_default_labels`.
#'
#' @param labels_in (named `character`)\cr custom labels to use instead of defaults. If no value is provided, the
#'   variable level (if rows correspond to levels of a variable) or statistic name will be used as label.
#' @param label_attr_from_stats (named `list`)\cr if `labels_in = NULL`, then this will be used instead. It is a list
#'   of values defined in statistical functions as default labels. Values are ignored if `labels_in` is provided or `''`
#'   values are provided.
#'
#' @return
#' * `junco_get_labels_from_stats()` returns a named list of labels as strings.
#'
#' @export
junco_get_labels_from_stats <- function(
    stats,
    labels_in = NULL,
    levels_per_stats = NULL,
    label_attr_from_stats = NULL) {
  tern_get_labels_from_stats(
    stats = stats,
    labels_in = labels_in,
    levels_per_stats = levels_per_stats,
    label_attr_from_stats = label_attr_from_stats,
    tern_defaults = junco_default_labels
  )
}

#' @describeIn default_stats_formats_labels Get label attributes from statistics list.
#' @param x_stats (`list`)\cr with the statistics results.
#' @keywords internal
get_label_attr_from_stats <- function(x_stats) {
  sapply(x_stats, obj_label)
}

#' @describeIn default_stats_formats_labels Get row indent modifiers corresponding to a list of statistics/rows.
#'
#' @param indents_in (named `integer`)\cr custom row indent modifiers to use instead of defaults. Defaults to `0L` for
#'   all values.
#'
#' @return
#' * `junco_get_indents_from_stats()` returns a named list of indentation modifiers as integers. By default all of the
#'   indentations will be zero.
#'
#' @export
junco_get_indents_from_stats <- function(stats, indents_in = NULL, levels_per_stats = NULL) {
  # For statistics still remaining without default indentation after looking in junco_default_indents, use
  # indentation 0 as default.
  remaining_stats <- setdiff(stats, names(junco_default_indents))
  default_indents <- c(
    junco_default_indents,
    as.list(rep(0L, length(remaining_stats))) |>
      stats::setNames(remaining_stats)
  )

  tern_get_indents_from_stats(
    stats = stats,
    indents_in = indents_in,
    levels_per_stats = levels_per_stats,
    tern_defaults = default_indents
  )
}

#' @describeIn default_stats_formats_labels Format statistics results according to format specifications.
#'
#' @return
#' * `format_stats()` returns the correspondingly formatted [rtables::in_rows()] result.
#'
#' @export
format_stats <- function(x_stats, method_groups, stats_in, formats_in, labels_in, indents_in) {
  .stats <- junco_get_stats(method_groups, stats_in = stats_in)

  .formats <- junco_get_formats_from_stats(stats = .stats, formats_in = formats_in)

  label_attr <- get_label_attr_from_stats(x_stats)
  .labels <- junco_get_labels_from_stats(stats = .stats, labels_in = labels_in, label_attr_from_stats = label_attr)
  .labels <- .unlist_keep_nulls(.labels)

  .indent_mods <- junco_get_indents_from_stats(stats = .stats, indents_in = indents_in)
  .indent_mods <- .unlist_keep_nulls(.indent_mods)

  x_stats <- x_stats[.stats]

  in_rows(
    .list = x_stats,
    .formats = .formats,
    .names = names(.labels),
    .labels = .labels,
    .indent_mods = .indent_mods
  )
}


# junco_default_stats -----------------------------------------------------------

#' @describeIn default_stats_formats_labels Named list of available statistics by method group for `junco`.
#'
#' @format
#' * `junco_default_stats` is a named list of available statistics, with each element
#'   named for their corresponding statistical method group.
#'
#' @export
junco_default_stats <- list(
  coxph_hr = c("n_tot", "n_tot_events", "hr", "hr_ci", "hr_ci_3d", "pvalue", "lr_stat_df"),
  event_free = c("pt_at_risk", "event_free_rate", "rate_se", "rate_ci", "event_free_ci"),
  kaplan_meier = c("quantiles_lower", "median_ci_3d", "quantiles_upper", "range_with_cens_info"),
  odds_ratio = c("n_tot", "or_ci", "pval"),
  proportion_diff = c("diff", "diff_ci", "diff_est_ci"),
  relative_risk = c("rel_risk_ci", "pval"),
  summarize_ancova_j = c(
    "n",
    "mean_sd",
    "median",
    "range",
    "quantiles",
    "lsmean_se",
    "lsmean_ci",
    "lsmean_diffci",
    "pval"
  ),
  summarize_mmrm = c(
    "n",
    "adj_mean_se",
    "adj_mean_ci",
    "adj_mean_est_ci",
    "diff_mean_se",
    "diff_mean_ci",
    "diff_mean_est_ci",
    "change",
    "p_value"
  ),
  tabulate_lsmeans = c(
    "n",
    "adj_mean_se",
    "adj_mean_ci",
    "adj_mean_est_ci",
    "diff_mean_se",
    "diff_mean_ci",
    "diff_mean_est_ci",
    "change",
    "p_value"
  ),
  tabulate_rbmi = c(
    "adj_mean_se",
    "adj_mean_ci",
    "diff_mean_se",
    "diff_mean_ci",
    "change",
    "p_value",
    "additional_title_row"
  ),
  test_proportion_diff = c("pval"),
  a_freq_j = c(
    "n_altdf",
    "n_df",
    "n_rowdf",
    "n_parentdf",
    "denom",
    "count",
    "count_unique",
    "count_unique_fraction",
    "count_unique_denom_fraction"
  ),
  a_patyrs_j = c("patyrs"),
  a_eair100_j = c("eair", "n_event", "person_years")
)

# junco_default_formats ---------------------------------------------------------
junco_default_formats_start <- c(
  adj_mean_se = jjcsformat_xx("xx.xxx (xx.xxx)"),
  adj_mean_ci = jjcsformat_xx("(xx.xxx, xx.xxx)"),
  adj_mean_est_ci = jjcsformat_xx("xx.xxx (xx.xxx, xx.xxx)"),
  change = "xx.x%",
  diff = jjcsformat_xx("xx.x"),
  diff_ci = jjcsformat_xx("(xx.x, xx.x)"),
  diff_est_ci = jjcsformat_xx("xx.x (xx.x, xx.x)"),
  diff_mean_se = jjcsformat_xx("xx.xxx (xx.xxx)"),
  diff_mean_ci = jjcsformat_xx("(xx.xxx, xx.xxx)"),
  diff_mean_est_ci = jjcsformat_xx("xx.xxx (xx.xxx, xx.xxx)"),
  event_free_ci = jjcsformat_xx("xx.xx (xx.xx, xx.xx)"),
  event_free_rate = jjcsformat_xx("xx.xx"),
  hr = jjcsformat_xx("xx.xx"),
  hr_ci = jjcsformat_xx("(xx.xx, xx.xx)"),
  hr_ci_3d = jjcsformat_xx("xx.xx (xx.xx, xx.xx)"),
  quantiles_upper = jjcsformat_xx("xx.xx (xx.xx, xx.xx)"),
  lsmean = jjcsformat_xx("xx.xx"),
  lsmean_ci = jjcsformat_xx("xx.xx (xx.xx, xx.xx)"),
  lsmean_diff = jjcsformat_xx("xx.xx"),
  lsmean_diffci = jjcsformat_xx("xx.xx (xx.xx, xx.xx)"),
  lsmean_diff_ci = jjcsformat_xx("(xx.xx, xx.xx)"),
  lsmean_se = jjcsformat_xx("xx.xx (xx.xx)"),
  mean_sd = jjcsformat_xx("xx.xx (xx.xxx)"),
  median = jjcsformat_xx("xx.xx"),
  median_ci = jjcsformat_xx("(xx.xx, xx.xx)"),
  median_ci_3d = jjcsformat_xx("xx.xx (xx.xx, xx.xx)"),
  n = jjcsformat_xx("xx."),
  n_tot = jjcsformat_xx("xx."),
  n_tot_events = jjcsformat_xx("xx."),
  or_ci = jjcsformat_xx("xx.xx (xx.xx - xx.xx)"),
  pt_at_risk = "xx",
  pval = jjcsformat_pval_fct(0),
  pvalue = jjcsformat_pval_fct(0),
  p_value = jjcsformat_pval_fct(0),
  quantiles = jjcsformat_xx("xx.xx, xx.xx"),
  range = jjcsformat_xx("xx.xx, xx.xx"),
  range_with_cens_info = jjcsformat_range_fct("xx.xx"),
  rate_ci = jjcsformat_xx("(xx.xx, xx.xx)"),
  rate_se = jjcsformat_xx("xx.xx"),
  rel_risk_ci = jjcsformat_xx("xx.xx (xx.xx - xx.xx)"),
  quantiles_upper = jjcsformat_xx("xx.xx (xx.xx, xx.xx)"),
  n_altdf = "xx",
  n_df = "xx",
  n_rowdf = "xx",
  n_parentdf = "xx",
  denom = "xx",
  count = "xx",
  count_unique = "xx",
  count_unique_fraction = jjcsformat_count_fraction,
  count_unique_denom_fraction = jjcsformat_count_denom_fraction,
  rr_ci_3d = jjcsformat_xx("xx.x (xx.x, xx.x)"),
  patyrs = jjcsformat_xx("xx.x"),
  eair = jjcsformat_xx("xx.x"),
  eair_diff = jjcsformat_xx("xx.xx (xx.xx, xx.xx)"),
  n_event = "xx",
  person_years = jjcsformat_xx("xx.xx"),
  total_subject_years = jjcsformat_xx("xx.x (xx.x)")
)

tern_formats_only <- setdiff(names(tern_default_formats), names(junco_default_formats_start))
#' @describeIn default_stats_formats_labels Named vector of default formats for `junco`.
#'
#' @format
#' * `junco_default_formats` is a named vector of available default formats, with each element
#'   named for their corresponding statistic.
#'
#' @export
junco_default_formats <- c(junco_default_formats_start, tern_default_formats[tern_formats_only])

# junco_default_labels ----------------------------------------------------------
junco_default_labels_start <- c(
  additional_title_row = "Additional Title",
  adj_mean_se = "Adjusted Mean (SE)",
  adj_mean_est_ci = "Adjusted Mean (CI)",
  diff = "Difference in Response rate (%)",
  diff_mean_se = "Difference in Adjusted Means (SE)",
  diff_mean_est_ci = "Difference in Adjusted Means (CI)",
  hr = "Hazard Ratio",
  lr_stat_df = "Log-Rank Chi-Squared",
  mean_sd = "Mean (SD)",
  median = "Median",
  n_tot = "Total n",
  n_tot_events = "Total events",
  pval = "p-value",
  pvalue = "p-value",
  p_value = "p-value",
  range = "Min, max",
  range_with_cens_info = "Min, max",
  n_altdf = "N",
  n_df = "N",
  n_rowdf = "N",
  n_parentdf = "N",
  denom = "N",
  patyrs = "Patient years",
  n_event = "Number of events",
  person_years = "Person years",
  total_subject_years = "Total treatment (subject years)"
)
tern_labels_only <- setdiff(names(tern_default_labels), names(junco_default_labels_start))

#' @describeIn default_stats_formats_labels Named `character` vector of default labels for `junco`.
#'
#' @format
#' * `junco_default_labels` is a named `character` vector of available default labels, with each element
#'   named for their corresponding statistic.
#'
#' @export
junco_default_labels <- c(junco_default_labels_start, tern_default_labels[tern_labels_only])

#' @describeIn default_stats_formats_labels Named `integer` vector of default indents for `junco`.
#'
#' @format
#' * `junco_default_indents` is a named `integer` vector of available default indents, with each element
#'   named for their corresponding statistic. Only indentations different from zero need to be
#'   recorded here.
#'
#' @export
junco_default_indents <- c(
  additional_title_row = 1L,
  adj_mean_ci = 1L,
  adj_mean_est_ci = 1L,
  change = 1L,
  diff_ci = 1L,
  diff_mean_ci = 1L,
  diff_mean_est_ci = 1L,
  hr_ci = 1L,
  or_ci = 1L,
  pval = 1L,
  p_value = 1L,
  rate_se = 1L,
  rate_ci = 1L,
  rel_risk_ci = 1L
)

#' Analysis Function for Response Variables
#'
#' This function calculates counts and percentages for response variables (Y/N values),
#' with optional risk difference calculations.
#'
#' @param df (`data.frame`)\cr data set containing all analysis variables.
#' @param .var (`string`)\cr variable name that is passed by `rtables`.
#' @param .df_row (`data.frame`)\cr data frame across all of the columns for the given row split.
#' @param .N_col (`integer`)\cr column-wise N (column count) for the full column being analyzed.
#' @param .spl_context (`data.frame`)\cr gives information about ancestor split states.
#' @param resp_var (`string`)\cr response variable name containing Y/N values.
#' @param id (`string`)\cr subject variable name.
#' @param drop_levels (`logical`)\cr if TRUE, non-observed levels will not be included.
#' @param riskdiff (`logical`)\cr if TRUE, risk difference calculations will be performed.
#' @param ref_path (`string`)\cr column path specifications for the control group.
#' @param variables (`list`)\cr variables to include in the analysis.
#' @param conf_level (`proportion`)\cr confidence level of the interval.
#' @param method (`character`)\cr method for calculating confidence intervals.
#' @param weights_method (`character`)\cr method for calculating weights.
#' @param ... Additional arguments passed to other functions.
#'
#' @return A list of rcell objects containing the response statistics.
#'
#' @export
#' @keywords internal
a_freq_resp_var_j <- function(
    df,
    .var,
    .df_row,
    .N_col,
    .spl_context,
    resp_var = NULL,
    id = "USUBJID",
    drop_levels = FALSE,
    riskdiff = TRUE,
    ref_path = NULL,
    variables = formals(s_proportion_diff)$variables,
    conf_level = formals(s_proportion_diff)$conf_level,
    method = c(
      "wald",
      "waldcc",
      "cmh",
      "ha",
      "newcombe",
      "newcombecc",
      "strat_newcombe",
      "strat_newcombecc"
    ),
    weights_method = formals(s_proportion_diff)$weights_method,
    ...) {
  # ---- Derive statistics: xx / xx (xx.x%)

  if (is.null(resp_var)) {
    stop(
      "afun a_freq_resp_var_j: resp_var cannot be NULL."
    )
  }

  resp_var_values <- unique(df[[resp_var]][!is.na(df[[resp_var]])])
  if (
    is.character(df[[resp_var]]) &&
      any(is.na(df[[resp_var]])) &&
      all(resp_var_values == "Y")
  ) {
    stop(
      paste0(
        "afun a_freq_resp_var_j: it is not clear if missing resp_var should be considered non-response. ",
        "Please make it a factor with appropriate Y(/N) levels."
      )
    )
  }

  if (length(setdiff(resp_var_values, c("Y", "N"))) > 0) {
    stop("afun a_freq_resp_var_j: resp_var must contain only Y/N values.")
  }

  if (is.character(df[[resp_var]])) {
    df[[resp_var]] <- factor(df[[resp_var]], levels = c("Y", "N"))
  }

  df <- df[!is.na(df[[.var]]), ]

  # nolint start
  if ((is.factor(df[[resp_var]]) &&
    (identical(levels(df[[resp_var]]), c("Y", "N")) || identical(levels(df[[resp_var]]), c("N", "Y")))) ||
    is.character(df[[resp_var]])
  ) { # nolint end
    # missing values in resp_var should be excluded, not considered as not met response
    # subject will then not contribute to denominator
    df <- df[!is.na(df[[resp_var]]), ]
    .df_row <- .df_row[!is.na(.df_row[[resp_var]]), ]
  }

  if (drop_levels) {
    obs_levs <- unique(.df_row[[.var]])
    obs_levs <- intersect(levels(.df_row[[.var]]), obs_levs)

    val_var <- obs_levs
    # restrict the levels to the ones specified in val_var
    df <- df[df[[.var]] %in% val_var, ]
    .df_row <- .df_row[.df_row[[.var]] %in% val_var, ]

    df <- h_update_factor(df, .var, val_var)
  }

  varvec <- df[[.var]]

  levs <- if (is.factor(varvec)) levels(varvec) else sort(unique(varvec))

  colid <- .spl_context$cur_col_id[[1]]
  inriskdiffcol <- grepl("difference", tolower(colid), fixed = TRUE)

  if (riskdiff) {
    trt_var_refpath <- h_get_trtvar_refpath(
      ref_path,
      .spl_context,
      df
    )
    # trt_var_refpath is list with elements
    # trt_var trt_var_refspec cur_trt_grp ctrl_grp
    # make these elements available in current environment
    trt_var <- trt_var_refpath$trt_var
    trt_var_refspec <- trt_var_refpath$trt_var_refspec
    cur_trt_grp <- trt_var_refpath$cur_trt_grp
    ctrl_grp <- trt_var_refpath$ctrl_grp
  }

  fn <- function(levii) {
    dfii <- df[df[[.var]] == levii, ]
    dfrowii <- .df_row[.df_row[[.var]] == levii, ]

    if (!inriskdiffcol) {
      # use the core s_freq_j function on the current level of the incoming variable (.var)
      # note that the response variable will become .var in the below call
      # val is restricted to Y to show number of response on the current level of .var
      rslt <- s_freq_j(
        dfii,
        .df_row = dfrowii,
        .var = resp_var,
        alt_df = NULL,
        parent_df = NULL,
        val = "Y",
        denom = "n_df"
      )

      .stat <- "count_unique_denom_fraction"
      x_stat <- rslt[[.stat]]$Y
      rslt <- rcell(x_stat, format = jjcsformat_count_denom_fraction)
    } else {
      # use the risk differenc function s_rel_risk_val_j on the current level of the incoming variable (.var)
      # note that the response variable will become .var in the below call
      # val is restricted to Y to show number of response on the current level of .var
      denom_df <- dfrowii

      rslt <- s_rel_risk_val_j(
        df = dfii,
        .var = resp_var,
        .df_row = dfrowii,
        val = "Y",
        denom_df = denom_df,
        id = id,
        riskdiff = riskdiff,
        # treatment/ref group related arguments
        trt_var = trt_var,
        ctrl_grp = ctrl_grp,
        cur_trt_grp = cur_trt_grp,
        # relrisk specific arguments
        variables = variables,
        conf_level = conf_level,
        method = method,
        weights_method = weights_method
      )
      x_stat <- rslt[["rr_ci_3d"]]$Y
      rslt <- rcell(
        x_stat,
        format = jjcsformat_xx("xx.x (xx.x, xx.x)"),
        format_na_str = rep("NA", 3)
      )
    }

    # rslt is a single rcell row
    return(rslt)
  }

  ### apply function fn to all levels of incoming .var
  ### cls is a list of single rcell rows - ie one line per level is presented
  cls <- lapply(levs, fn)

  names(cls) <- levs
  return(cls)
}

#' @name jj_complex_scorefun
#' @title Complex Scoring Function
#' @description
#' A function used for sorting AE tables (and others) as required.
#' @details
#' This sort function sorts as follows:
#' Takes all the columns from a specified spanning column header (default= colspan_trt) and sorts by the last treatment
#' column within this.
#' If no spanning column header variable exists (e.g you have only one active treatment arm and have decided to
#' remove the spanning header from your layout) it will sort by the first treatment column in your table.
#' This function is not really designed for tables that have sub-columns, however if users wish to override any
#' default sorting behavior, they can simply specify their own colpath to use for sorting on (default=NULL)
#' @param spanningheadercolvar name of spanning header variable that defines the active treatment columns.
#' If you do not have an active treatment spanning header column then user can define this as NA.
#' @param usefirstcol This allows you to just use the first column of the table to sort on.
#' @param colpath name of column path that is needed to sort by (default=NULL).
#' This overrides other arguments if specified
#' (except firstcat and lastcat which will be applied if requested on this colpath)
#' @param firstcat If you wish to put any category at the top of the list despite any n's user can specify here.
#' @param lastcat If you wish to put any category at the bottom of the list despite any n's user can specify here.
#' @export
#' @returns a function which can be used as a score function (scorefun in `sort_at_path`).
# @examples #result <- sort_at_path(result, c('root', 'AEBODSYS'), scorefun = jj_complex_scorefun())
#' @examples
#' ADAE <- data.frame(
#'   USUBJID = c(
#'     "XXXXX01", "XXXXX02", "XXXXX03", "XXXXX04", "XXXXX05",
#'     "XXXXX06", "XXXXX07", "XXXXX08", "XXXXX09", "XXXXX10"
#'   ),
#'   AEBODSYS = c(
#'     "SOC 1", "SOC 2", "SOC 1", "SOC 2", "SOC 2",
#'     "SOC 2", "SOC 2", "SOC 1", "SOC 2", "SOC 1"
#'   ),
#'   AEDECOD = c(
#'     "Coded Term 2", "Coded Term 1", "Coded Term 3", "Coded Term 4",
#'     "Coded Term 4", "Coded Term 4", "Coded Term 5", "Coded Term 3",
#'     "Coded Term 1", "Coded Term 2"
#'   ),
#'   TRT01A = c(
#'     "ARMA", "ARMB", "ARMA", "ARMB", "ARMB",
#'     "Placebo", "Placebo", "Placebo", "ARMA", "ARMB"
#'   ),
#'   TRTEMFL = c("Y", "Y", "N", "Y", "Y", "Y", "Y", "N", "Y", "Y")
#' )
#'
#' ADAE <- ADAE |>
#'   dplyr::mutate(TRT01A = as.factor(TRT01A))
#'
#' ADAE$colspan_trt <- factor(ifelse(ADAE$TRT01A == "Placebo", " ", "Active Study Agent"),
#'   levels = c("Active Study Agent", " ")
#' )
#'
#' ADAE$rrisk_header <- "Risk Difference (%) (95% CI)"
#' ADAE$rrisk_label <- paste(ADAE$TRT01A, paste("vs", "Placebo"))
#'
#' colspan_trt_map <- create_colspan_map(ADAE,
#'   non_active_grp = "Placebo",
#'   non_active_grp_span_lbl = " ",
#'   active_grp_span_lbl = "Active Study Agent",
#'   colspan_var = "colspan_trt",
#'   trt_var = "TRT01A"
#' )
#'
#' ref_path <- c("colspan_trt", " ", "TRT01A", "Placebo")
#'
#' lyt <- basic_table() |>
#'   split_cols_by(
#'     "colspan_trt",
#'     split_fun = trim_levels_to_map(map = colspan_trt_map)
#'   ) |>
#'   split_cols_by("TRT01A") |>
#'   split_cols_by("rrisk_header", nested = FALSE) |>
#'   split_cols_by(
#'     "TRT01A",
#'     labels_var = "rrisk_label",
#'     split_fun = remove_split_levels("Placebo")
#'   ) |>
#'   analyze(
#'     "TRTEMFL",
#'     a_freq_j,
#'     show_labels = "hidden",
#'     extra_args = list(
#'       method = "wald",
#'       label = "Subjects with >=1 AE",
#'       ref_path = ref_path,
#'       .stats = "count_unique_fraction"
#'     )
#'   ) |>
#'   split_rows_by("AEBODSYS",
#'     split_label = "System Organ Class",
#'     split_fun = trim_levels_in_group("AEDECOD"),
#'     label_pos = "topleft",
#'     section_div = c(" "),
#'     nested = FALSE
#'   ) |>
#'   summarize_row_groups(
#'     "AEBODSYS",
#'     cfun = a_freq_j,
#'     extra_args = list(
#'       method = "wald",
#'       ref_path = ref_path,
#'       .stats = "count_unique_fraction"
#'     )
#'   ) |>
#'   analyze(
#'     "AEDECOD",
#'     afun = a_freq_j,
#'     extra_args = list(
#'       method = "wald",
#'       ref_path = ref_path,
#'       .stats = "count_unique_fraction"
#'     )
#'   )
#'
#' result <- build_table(lyt, ADAE)
#'
#' result
#'
#' result <- sort_at_path(
#'   result,
#'   c("root", "AEBODSYS"),
#'   scorefun = jj_complex_scorefun()
#' )
#'
#' result <- sort_at_path(
#'   result,
#'   c("root", "AEBODSYS", "*", "AEDECOD"),
#'   scorefun = jj_complex_scorefun()
#' )
#'
#' result
#' @rdname complex_scoring_function
#' @aliases jj_complex_scorefun
jj_complex_scorefun <- function(
    spanningheadercolvar = "colspan_trt",
    usefirstcol = FALSE,
    colpath = NULL,
    firstcat = NULL,
    lastcat = NULL) {
  paths <- NULL

  function(tt) {
    if (is.null(paths)) {
      paths <<- col_paths(tt)
      if (is.null(colpath)) {
        if (is.na(spanningheadercolvar)) {
          atrt_paths <- vapply(paths, function(pth) pth[[1]] != " ", TRUE)
          use_paths <- atrt_paths
          act_trt_lst <- paths[use_paths]
          first_list <- sapply(act_trt_lst, utils::head, 1)
          first_at_value <- utils::head(first_list, n = 1)
          colpath <<- first_at_value
        } else {
          atrt_paths <- vapply(paths, function(pth) pth[[1]] == spanningheadercolvar && pth[[2]] != " ", TRUE)
          use_paths <- atrt_paths
          act_trt_lst <- paths[use_paths]
          last_list <- sapply(act_trt_lst, utils::tail, 1)
          last_at_value <- utils::tail(last_list, n = 1)
          colpath <<- last_at_value
        }

        if (usefirstcol) {
          atrt_paths <- vapply(paths, function(pth) pth[[1]] != " ", TRUE)
          use_paths <- atrt_paths
          act_trt_lst <- paths[use_paths]
          first_list <- sapply(act_trt_lst, utils::head, 1)
          first_at_value <- utils::head(first_list, n = 1)
          colpath <<- first_at_value
        }
      }
    }
    score <- unlist(cell_values(tt, colpath = colpath), use.names = FALSE)[1]

    if (length(score) == 0) {
      score <- 1
    }

    if (!is.null(firstcat)) {
      if (obj_name(tt) == firstcat) {
        score <- Inf
      }
    }
    if (!is.null(lastcat)) {
      if (obj_name(tt) == lastcat) {
        score <- -99999
      }
    }
    if (obj_name(tt) == " ") {
      score <- -Inf
    }

    return(score)
  }
}

#' @name create_colspan_var
#' @title Creation of Column Spanning Variables
#' @description
#' A function used for creating a spanning variable for treatment groups
#' @details
#' This function creates a spanning variable for treatment groups that is intended to
#' be used within the column space.
#' @param df The name of the data frame in which the spanning variable is to be appended to
#' @param non_active_grp The value(s) of the treatments that represent the non-active or comparator
#' treatment groups
#' default value = c('Placebo')
#' @param non_active_grp_span_lbl The assigned value of the spanning variable for the non-active or comparator
#' treatment groups
#' default value = ''
#' @param active_grp_span_lbl The assigned value of the spanning variable for the active treatment group(s)
#' default value = 'Active Study Agent'
#' @param colspan_var The desired name of the newly created spanning variable
#' default value = 'colspan_trt'
#' @param trt_var The name of the treatment variable that is used to determine which
#' spanning treatment group value to apply.
#' default value = 'TRT01A'
#' @returns a data frame that contains the new variable as specified in colspan_var
#' @rdname colspan_var
#' @export
#' @aliases create_colspan_var
#' @examples
#'
#' library(tibble)
#'
#' df <- tribble(
#'   ~TRT01A,
#'   "Placebo",
#'   "Active 1",
#'   "Active 2"
#' )
#'
#' df$TRT01A <- factor(df$TRT01A, levels = c("Placebo", "Active 1", "Active 2"))
#'
#' colspan_var <- create_colspan_var(
#'   df = df,
#'   non_active_grp = c("Placebo"),
#'   non_active_grp_span_lbl = " ",
#'   active_grp_span_lbl = "Active Treatment",
#'   colspan_var = "colspan_trt",
#'   trt_var = "TRT01A"
#' )
#'
#' colspan_var
create_colspan_var <- function(
    df,
    non_active_grp = c("Placebo"),
    non_active_grp_span_lbl = " ",
    active_grp_span_lbl = "Active Study Agent",
    colspan_var = "colspan_trt",
    trt_var = "TRT01A") {
  # Create a new column with the specified name using base R
  df[[colspan_var]] <- factor(
    ifelse(df[[trt_var]] %in% non_active_grp, non_active_grp_span_lbl, active_grp_span_lbl),
    levels = c(active_grp_span_lbl, non_active_grp_span_lbl)
  )

  return(df)
}

#' @name create_colspan_map
#' @title Creation of Column Spanning Mapping Dataframe
#' @description
#' A function used for creating a data frame containing the map that is compatible with rtables split function
#' `trim_levels_to_map`
#' @details
#' This function creates a data frame containing the map that is compatible with rtables split function
#' `trim_levels_to_map`.
#' The levels of the specified trt_var variable will be stored within the trt_var variable
#' and the colspan_var variable will contain the corresponding spanning header value for each treatment group.
#' @inheritParams create_colspan_var
#' @param active_first whether the active columns come first.
#' @returns a data frame that contains the map to be used with rtables split function `trim_levels_to_map`
#' @rdname colspan_map
#' @export
#' @examples
#' library(tibble)
#'
#' df <- tribble(
#'   ~TRT01A,
#'   "Placebo",
#'   "Active 1",
#'   "Active 2"
#' )
#'
#' df$TRT01A <- factor(df$TRT01A, levels = c("Placebo", "Active 1", "Active 2"))
#'
#' colspan_map <- create_colspan_map(
#'   df = df,
#'   non_active_grp = c("Placebo"),
#'   non_active_grp_span_lbl = " ",
#'   active_grp_span_lbl = "Active Study Agent",
#'   colspan_var = "colspan_trt",
#'   trt_var = "TRT01A"
#' )
#'
#' colspan_map
create_colspan_map <- function(
    df,
    non_active_grp = c("Placebo"),
    non_active_grp_span_lbl = " ",
    active_grp_span_lbl = "Active Study Agent",
    colspan_var = "colspan_trt",
    trt_var = "TRT01A",
    active_first = TRUE) {
  act_trtlv <- setdiff(levels(df[[trt_var]]), non_active_grp)
  act_map_df <- data.frame(a = active_grp_span_lbl, b = act_trtlv)
  nact_map_df <- data.frame(a = non_active_grp_span_lbl, b = non_active_grp)
  df <- if (active_first) {
    rbind(act_map_df, nact_map_df)
  } else {
    rbind(nact_map_df, act_map_df)
  }
  names(df) <- c(colspan_var, trt_var)

  return(df)
}

.find_titles_file <- function(path = ".") {
  if (file.exists(file.path(path, "titles.csv"))) {
    file.path(path, "titles.csv")
  } else if (file.exists(file.path(path, "titles.xlsx"))) {
    file.path(path, "titles.xlsx")
  } else {
    stop(
      "No titles.csv/xlsx file detected at path ", path,
      "please specify parent directory, full file path, or ",
      "pass the full titles data.frame directly."
    )
  }
}

#' @importFrom utils read.csv
.read_titles_file <- function(file) {
  if (grepl("csv$", file, ignore.case = TRUE)) {
    df <- read.csv(file, check.names = FALSE)
  } else if (grepl("xlsx$", file, ignore.case = TRUE)) {
    if (!requireNamespace("readxl")) {
      stop("readxl package is required for xslx file support, please install it.")
    }
    df <- readxl::read_excel(path = file, sheet = 1, range = readxl::cell_cols("A:C"))
  } else {
    stop("Unrecognized titles file type. file: ", file)
  }
  df
}

# ----
#' @name  get_titles_from_file
#' @title Get Titles/Footers For Table From Sources
#'
#' @description Retrieves the titles and footnotes for a given table from a CSV/XLSX file or a data.frame.
#'
#' @details Retrieves the titles for a given output id (see below) and outputs
#'          a list containing the title and footnote objects supported by
#'          rtables. Both titles.csv and titles.xlsx (*if `readxl` is
#'          installed*) files are supported, with titles.csv being checked
#'          first.
#'
#'          Data is expected to have `TABLE ID`, `IDENTIFIER`, and `TEXT` columns,
#'          where `IDENTIFIER` has the value `TITLE` for a title and `FOOT*` for
#'          footer materials where `*` is a positive integer. `TEXT` contains
#'          the value of the title/footer to be applied.
#'
#' @param file (`character(1)`)\cr A path to CSV or xlsx file containing title
#'   and footer information for one or more outputs. See Details. Ignored if
#'   `title_df` is specified.
#' @param input_path (`character(1)`)\cr A path to look for
#'   titles.csv/titles.xlsx. Ignored if `file` or `title_df` is specified.
#' @param title_df (`data.frame`)\cr A data.frame containing titles and footers for
#'   one or more  outputs. See Details.
#' @param id character. The identifier for the table of interest.
#'
#' @export
#' @seealso Used in all template script
#' @returns  List object containing: title, subtitles, main_footer, prov_footer
#'           for the table of interest.  Note: the subtitles and prov_footer are
#'           currently set to NULL. Suitable for use with [`set_titles()`].
#'
# ----

get_titles_from_file <- function(id,
                                 file = .find_titles_file(input_path),
                                 input_path = ".",
                                 title_df = .read_titles_file(file)) {
  ## "TABLE ID" gets munged to "TABLE.ID"
  title_df <- title_df[title_df[["TABLE ID"]] == id, , drop = FALSE]

  message(paste0("Static titles file/data.frame used: "))

  msg <- NULL

  if (nrow(title_df) == 0) {
    msg <- paste0(
      paste("Warning: Table ID", id, "not found in Title file."),
      "\n",
      "A dummy title will be generated to be able to get rtf file produced.",
      "\n",
      "Ensure the titles file gets updated to include the table identifier"
    )

    title <- "Table ID not found in titles file, dummy title for rtf generation purpose"
    main_footer <- "Ensure the titles file gets updated to include the table identifier"
  } else {
    title <- title_df[title_df$IDENTIFIER == "TITLE", ]$TEXT

    if (length(title) != 1) {
      msg <- "Warning: Title file should contain exactly one title record per Table ID"
    } else {
      message(file)
    }

    main_footer <- title_df[grep("^FOOT", title_df$IDENTIFIER), ]$TEXT

    if (length(main_footer) == 0) {
      main_footer <- character()
    }
  }

  if (!is.null(msg)) {
    warning(msg)
  }

  # ---- Return titles and footnotes.

  title_foot <- list(
    title = title,
    subtitles = NULL,
    main_footer = main_footer,
    prov_footer = NULL
  )

  return(title_foot)
}

# ----
#' @name  set_titles
#' @title Set Output Titles
#'
#' @description Retrieves titles and footnotes from the list specified in the titles
#'              argument and appends them to the table tree specified in the obj argument.
#'
#'
#' @param obj The table tree to which the titles and footnotes will be appended.
#' @param titles The list object containing the titles and footnotes to be appended.
#' @seealso Used in all template scripts
#' @export
#'
#' @returns  The table tree object specified in the obj argument, with titles
#'           and footnotes appended.
#'
# ----

set_titles <- function(obj, titles) {
  ## add title and footers
  main_title(obj) <- titles$title
  if (!is.null(titles$subtitles)) {
    subtitles(obj) <- titles$subtitles
  }
  main_footer(obj) <- titles$main_footer
  if (!is.null(titles$prov_footer)) {
    prov_footer(obj) <- titles$prov_footer
  }

  return(obj)
}

make_dps_regex <- function(keyword, type = c("[", "{")) {
  mustart <- match.arg(type)
  muend <- ifelse(mustart == "[", "]", "}")

  paste0("[~][", mustart, "]", keyword, " ([^", muend, "]+)[", muend, "]")
}

handle_one_markup <- function(strs, keyword, rtfstart, rtfend) {
  start <- ifelse(nzchar(rtfstart), paste0("\\", rtfstart, " "), "")
  end <- ifelse(nzchar(rtfstart), paste0("\\", rtfend, " "), "")
  out <- gsub(make_dps_regex(keyword), paste0(start, "\\1", end), strs)
  out <- gsub(make_dps_regex(keyword, "{"), paste0(start, "\\1", end), out)
  out
}


dps_markup_df <- tibble::tribble(
  ~keyword,
  ~rtfstart,
  ~rtfend,
  "super",
  "\\super",
  "\\nosupersub",
  "sub",
  "\\sub",
  "\\nosupersub",
  "optional",
  "",
  ""
)


#' Default String Mapping for Special Characters
#'
#' A tibble that maps special characters to their UTF-8 equivalents for use in RTF output.
#' Currently it maps ">=" and "<=" to the Unicode characters.
#'
#' @return A tibble with columns 'pattern' and 'value', where 'pattern' contains
#'   the string to be replaced and 'value' contains the replacement.
#'
#' @export
#' @keywords internal
default_str_map <- tibble::tribble(
  ~pattern,
  ~value,
  ">=",
  intToUtf8(strtoi(2265, base = 16)),
  "<=",
  intToUtf8(strtoi(2264, base = 16))
)

convert_dps_markup <- function(strs, markup_df = dps_markup_df) {
  for (i in seq_len(nrow(markup_df))) {
    strs <- handle_one_markup(strs, markup_df$keyword[i], markup_df$rtfstart[i], markup_df$rtfend[i])
  }
  strs
}

strmodify <- function(strs, replacement_str = default_str_map) {
  map_tbl <- replacement_str
  if (!is.null(map_tbl) && nrow(map_tbl) > 0) {
    for (i in seq_len(nrow(map_tbl))) {
      pattern <- map_tbl$pattern[[i]]
      value <- map_tbl$value[i]
      strs <- gsub(pattern, value, strs, fixed = TRUE)
    }
  }
  return(strs)
}

prep_strs_for_rtf <- function(strs, string_map = default_str_map, markup_df = dps_markup_df) {
  strs <- convert_dps_markup(strs, markup_df)
  strs <- strmodify(strs, string_map)
  return(strs)
}


#' Relabel Variables in a Dataset
#'
#' This function relabels variables in a dataset based on a provided list of labels.
#' It can either replace existing labels or only add labels to variables without them.
#'
#' @param x (`data.frame`)\cr dataset containing variables to be relabeled.
#' @param lbl_list (`list`)\cr named list of labels to apply to variables.
#' @param replace_existing (`logical`)\cr if TRUE, existing labels will be replaced;
#'   if FALSE, only variables without labels will be updated.
#'
#' @return The dataset with updated variable labels.
#'
#' @export
#' @keywords internal
var_relabel_list <- function(x, lbl_list, replace_existing = TRUE) {
  if (replace_existing) {
    vars_to_relabel <- intersect(names(x), names(lbl_list))
  } else {
    get_variables_with_empty_labels <- function(x) {
      labels <- var_labels(x)
      mask <- lapply(labels, is.na) |> unlist()
      return(names(labels[mask]))
    }

    vars_without_labels <- get_variables_with_empty_labels(x)
    vars_to_relabel <- intersect(vars_without_labels, names(lbl_list))
  }
  do.call(var_relabel, c(list(x = x), lbl_list[vars_to_relabel]))
}

#' @name  response_by_var
#' @title Count denom fraction statistic
#'
#' @description Derives the count_denom_fraction statistic (i.e., 'xx /xx (xx.x percent)' )
#'              Summarizes the number of unique subjects with a response = 'Y' for a given variable
#'              (e.g. TRTEMFL) within each category of another variable (e.g., SEX).
#'              Note that the denominator is derived using input df,
#'              in order to have these aligned with alt_source_df, it is expected that df includes all subjects.
#'
#' @details This is an analysis function for use within `analyze`. Arguments
#'          `df`, `.var` will be populated automatically by rtables during
#'          the tabulation process.
#'
#' @param df           Name of dataframe being analyzed.
#' @param labelstr     Custom label for the variable being analyzed.
#' @param .var         Name of the variable being analyzed. Records with non-missing
#'                     values will be counted in the denominator.
#' @param .N_col numeric(1). The total for the current column.
#' @param resp_var     Name of variable, for which, records with a value of 'Y'
#'                     will be counted in the numerator.
#' @param id           Name of column in df which will have patient identifiers
#' @param .format      Format for the count/denominator/fraction output.
#' @param ...          Additional arguments passed to the function.
# @examples #analyze(vars='AGEGR1_DECODE', var_labels = 'Age group (years), n/Ns (%)', afun = response_by_var,
# extra_args = list(resp_var = 'TRTEMFL'))
#' @examples
#'
#' library(dplyr)
#'
#' ADAE <- data.frame(
#'   USUBJID = c(
#'     "XXXXX01", "XXXXX02", "XXXXX03", "XXXXX04", "XXXXX05",
#'     "XXXXX06", "XXXXX07", "XXXXX08", "XXXXX09", "XXXXX10"
#'   ),
#'   SEX_DECODE = c(
#'     "Female", "Female", "Male", "Female", "Male",
#'     "Female", "Male", "Female", "Male", "Female"
#'   ),
#'   TRT01A = c(
#'     "ARMA", "ARMB", "ARMA", "ARMB", "ARMB",
#'     "Placebo", "Placebo", "Placebo", "ARMA", "ARMB"
#'   ),
#'   TRTEMFL = c("Y", "Y", "N", "Y", "Y", "Y", "Y", "N", "Y", "Y")
#' )
#'
#' ADAE <- ADAE |>
#'   mutate(
#'     TRT01A = as.factor(TRT01A),
#'     SEX_DECODE = as.factor(SEX_DECODE)
#'   )
#'
#' lyt <- basic_table() |>
#'   split_cols_by("TRT01A") |>
#'   analyze(
#'     vars = "SEX_DECODE",
#'     var_labels = "Sex, n/Ns (%)",
#'     show_labels = "visible",
#'     afun = response_by_var,
#'     extra_args = list(resp_var = "TRTEMFL"),
#'     nested = FALSE
#'   )
#'
#' result <- build_table(lyt, ADAE)
#'
#' result
#' @return a `RowsVerticalSection` for use by the internal tabulation machinery of `rtables`
#' @export

response_by_var <- function(
    df,
    labelstr = NULL,
    .var,
    .N_col,
    resp_var = NULL,
    id = "USUBJID",
    .format = jjcsformat_count_denom_fraction,
    ...) {
  # Derive statistics: xx / xx (xx.x%)

  if (is.null(resp_var)) {
    stop("afun response_by_var: resp_var cannot be NULL.")
  }

  resp_var_values <- unique(df[[resp_var]][!is.na(df[[resp_var]])])
  if (is.character(df[[resp_var]]) && any(is.na(df[[resp_var]])) && all(resp_var_values == "Y")) {
    stop(paste(
      "afun response_by_var: not clear if missing resp_var should be considered",
      "non-response. Please make it a factor with appropriate Y(/N) levels."
    ))
  }

  if (length(setdiff(resp_var_values, c("Y", "N"))) > 0) {
    stop("afun response_by_var: resp_var must contain only Y/N values.")
  }

  df <- df[!is.na(df[[.var]]), ]

  if ( # nolint start
    (is.factor(df[[resp_var]]) &&
      (identical(levels(df[[resp_var]]), c("Y", "N")) || identical(levels(df[[resp_var]]), c("N", "Y")))) ||
      is.character(df[[resp_var]])
  ) { # nolint end
    # missing values in resp_var should be excluded, not considered as not met response subject will then not
    # contribute to denominator
    df <- df[!is.na(df[[resp_var]]), ]
  }

  ## in other cases, missing values in resp_var will be considered not met response and subject will contribute to
  ## denominator
  varvec <- df[[.var]]

  # For group summaries, varvec will be a singular value, equal to the current split, whereas for afun summaries,
  # varvec will consist of all possible values of var.
  if (!is.null(labelstr)) {
    levs <- labelstr
  } else {
    levs <- if (is.factor(varvec)) levels(varvec) else unique(varvec)
  }

  fn <- function(levii) {
    dfii <- df[df[[.var]] == levii, ]

    den <- NROW(unique(dfii[, id]))
    num <- NROW(unique(dfii[dfii[[resp_var]] == "Y" & !is.na(dfii[[resp_var]]), id]))

    rcell(c(num * 1, den * 1, num * 1 / den), format = .format)
  }

  cls <- lapply(levs, fn)
  names(cls) <- levs

  # Hand off results to analyze

  in_rows(.list = cls)
}

#' Shortcut Layout Function for Standard Continuous Variable Analysis
#'
#' @inheritParams proposal_argument_convention
#' @param formats (`list`)\cr formats including `mean_sd`, `median` and `range`
#'   specifications.
#'
#' @note This is used in `tefmad01` and `tefmad03a` e.g.
#' @return Modified layout.
#' @export
analyze_values <- function(lyt, vars, ..., formats) {
  checkmate::assert_list(formats)
  checkmate::assert_names(names(formats), must.include = c("mean_sd", "median", "range"))

  analyze_vars(
    lyt,
    vars = vars,
    ...,
    .stats = c("n", "mean_sd", "median", "range"),
    .formats = c(n = "xx", mean_sd = formats$mean_sd, median = formats$median, range = formats$range),
    .labels = c(n = "N", mean_sd = "Mean (SD)", median = "Median", range = "Min, Max"),
    .indent_mods = c(n = 0, mean_sd = 1, median = 1, range = 1)
  )
}

add_blank_line_rcells <- function(ret) {
  # check that ret is expected structure and not NULL
  if (is.null(ret)) {
    stop("add_blank_line_rcells: ret cannot be NULL.")
  }
  if (!(class(ret) %in% c("RowsVerticalSection", "CellValue"))) {
    stop("add_blank_line_rcells:  ret must be of class RowsVerticalSection or CellValue.")
  }

  if (inherits(ret, "RowsVerticalSection")) {
    xlabel <- attr(ret, "row_labels")
    indent_mods <- lapply(ret, function(obj) {
      attr(obj, "indent_mod")
    })
  } else {
    xlabel <- attr(ret, "label")
    indent_mods <- attr(ret, "indent_mod")
  }

  fmts <- lapply(ret, obj_format)
  na_strs <- lapply(ret, obj_na_str)
  # ret <- append(ret,rcell(NA_real_,format = 'xx')) use a character version for the new line, rather than NA - to
  # allow NA processing for other stuff
  ret <- append(ret, rcell(NA, format = "xx"))
  fmts <- append(fmts, "xx")
  na_strs <- append(na_strs, " ")
  indent_mods <- append(indent_mods, 0L)
  xlabel <- append(xlabel, " ")
  ret <- stats::setNames(ret, xlabel)

  # perform the update to add the extra line
  fret <- in_rows(
    .list = ret,
    .labels = xlabel,
    .formats = fmts,
    .format_na_strs = na_strs,
    .indent_mods = indent_mods
  )

  fret
}

h_get_eair_df <- function(levii, df, denom_df, .var, id, occ_var, occ_dy, fup_var) {
  dfii <- df[df[[.var]] == levii & !is.na(df[[.var]]), ]

  df_denom <- unique(denom_df[, c(id, fup_var)])
  df_num <- subset(dfii, dfii[[occ_var]] == "Y")[, c(id, .var, occ_var, occ_dy)]

  ### construct modified fup var subjects not in numerator - use fup_var from df_denom
  df_denom$mod_fup_var <- df_denom[[fup_var]]

  ### add vars from df_num onto df_denom
  df_denom <- dplyr::left_join(df_denom, df_num, by = id)

  # subjects in numerator dataset, use occ_dy variable/365.25
  id_to_update <- df_denom[[id]] %in% df_num[[id]]
  df_denom[id_to_update, "mod_fup_var"] <- df_denom[id_to_update, occ_dy] / 365.25

  return(list(df_denom = df_denom, df_num = df_num))
}

extract_x_stats <- function(list_with_stats, stat_nms) {
  sapply(
    stat_nms,
    function(stat) {
      sapply(
        names(list_with_stats),
        function(x) {
          list_with_stats[[x]][[stat]]
        },
        simplify = FALSE
      )
    },
    simplify = FALSE
  )
}

#' Analysis of Covariance
#'
#' Performs an analysis of covariance between two groups returning the estimated
#' "treatment effect" (i.e. the contrast between the two treatment groups) and
#' the least square means estimates in each group.
#'
#' @param data A `data.frame` containing the data to be used in the model.
#' @param vars A `vars` object as generated by [rbmi::set_vars()]. Only the `group`,
#' `visit`, `outcome` and `covariates` elements are required. See details.
#' @param visits An optional character vector specifying which visits to
#' fit the ancova model at. If `NULL`, a separate ancova model will be fit to the
#' outcomes for each visit (as determined by `unique(data[[vars$visit]])`).
#' See details.
#' @param weights Character, either `"counterfactual"` (default), `"equal"`,
#' `"proportional_em"` or `"proportional"`.
#' Specifies the weighting strategy to be used when calculating the lsmeans.
#' See the weighting section for more details.
#'
#' @details
#' The function works as follows:
#'
#' 1. Select the first value from `visits`.
#' 2. Subset the data to only the observations that occurred on this visit.
#' 3. Fit a linear model as  `vars$outcome ~ vars$group + vars$covariates`.
#' 4. Extract the "treatment effect" & least square means for each treatment group.
#' 5. Repeat points 2-3 for all other values in `visits`.
#'
#' If no value for `visits` is provided then it will be set to
#' `unique(data[[vars$visit]])`.
#'
#' In order to meet the formatting standards set by [rbmi_analyse()] the results will be collapsed
#' into a single list suffixed by the visit name, e.g.:
#' ```
#' list(
#'    var_visit_1 = list(est = ...),
#'    trt_B_visit_1 = list(est = ...),
#'    lsm_A_visit_1 = list(est = ...),
#'    lsm_B_visit_1 = list(est = ...),
#'    var_visit_2 = list(est = ...),
#'    trt_B_visit_2 = list(est = ...),
#'    lsm_A_visit_2 = list(est = ...),
#'    lsm_B_visit_2 = list(est = ...),
#'    ...
#' )
#' ```
#' Please note that "trt" refers to the treatment effects, and "lsm" refers to the least
#' square mean results. In the above example `vars$group` has two factor levels A and B.
#' The new "var" refers to the model estimated variance of the residuals.
#'
#' If you want to include interaction terms in your model this can be done
#' by providing them to the `covariates` argument of [rbmi::set_vars()]
#' e.g. `set_vars(covariates = c("sex*age"))`.
#'
#' @return a list of variance (`var_*`), treatment effect (`trt_*`), and
#'   least square mean (`lsm_*`) estimates for each visit, organized as
#'   described in Details above.
#'
#' @note These functions have the `rbmi_` prefix to distinguish them from the corresponding
#'   `rbmi` package functions, from which they were copied from. Additional features here
#'   include:
#'
#'   * Support for more than two treatment groups.
#'   * Variance estimates are returned.
#'
#' @seealso [rbmi_analyse()]
#' @seealso [stats::lm()]
#' @seealso [rbmi::set_vars()]
#' @export
rbmi_ancova <- function(
    data,
    vars,
    visits = NULL,
    weights = c("counterfactual", "equal", "proportional_em", "proportional")) {
  outcome <- vars[["outcome"]]
  group <- vars[["group"]]
  covariates <- vars[["covariates"]]
  visit <- vars[["visit"]]
  weights <- match.arg(weights)

  expected_vars <- c(
    utils::getFromNamespace("extract_covariates", "rbmi")(covariates),
    outcome,
    group
  )

  if (is.null(visits)) {
    visits <- unique(data[[visit]])
  }

  res <- lapply(
    visits,
    function(x) {
      data2 <- data[data[[visit]] == x, ]
      res <- rbmi_ancova_single(data2, outcome, group, covariates, weights)
      names(res) <- paste0(names(res), "_", x)
      return(res)
    }
  )
  return(unlist(res, recursive = FALSE))
}


#' Implements an Analysis of Covariance (ANCOVA)
#'
#' @description
#' Performance analysis of covariance. See [rbmi_ancova()] for full details.
#'
#' @param outcome string, the name of the outcome variable in `data`.
#' @param group string, the name of the group variable in `data`.
#' @param covariates character vector containing the name of any additional covariates
#' to be included in the model as well as any interaction terms.
#'
#' @inheritParams rbmi_ancova
#'
#' @details
#' - `group` must be a factor variable with only 2 levels.
#' - `outcome` must be a continuous numeric variable.
#' @export
#' @return a list containing `var` with variance estimates as well as
#' `trt_*` and `lsm_*` entries. See [rbmi_ancova()] for full details.
#' @examples
#'
#' iris2 <- iris[iris$Species %in% c("versicolor", "virginica"), ]
#' iris2$Species <- factor(iris2$Species)
#' rbmi_ancova_single(iris2, "Sepal.Length", "Species", c("Petal.Length * Petal.Width"))
#'
#' @seealso [rbmi_ancova()]
rbmi_ancova_single <- function(
    data,
    outcome,
    group,
    covariates,
    weights = c("counterfactual", "equal", "proportional_em", "proportional")) {
  checkmate::assert_string(outcome)
  checkmate::assert_string(group)
  weights <- match.arg(weights)

  checkmate::assert_factor(data[[group]], n.levels = 2L)
  checkmate::assert_numeric(data[[outcome]])
  data2 <- data[, c(
    utils::getFromNamespace("extract_covariates", "rbmi")(covariates),
    outcome,
    group
  )]

  frm <- utils::getFromNamespace("as_simple_formula", "rbmi")(
    outcome,
    c(group, covariates)
  )

  mod <- stats::lm(formula = frm, data = data2)

  args <- list(model = mod, .weights = weights)

  grp_levels <- levels(data2[[group]])

  all_lsm <- lapply(
    grp_levels,
    \(x) {
      do.call(
        utils::getFromNamespace("lsmeans", "rbmi"),
        c(args, stats::setNames(x, group))
      )
    }
  )
  names(all_lsm) <- paste0("lsm_", grp_levels)

  var_est <- summary(mod)$sigma^2
  var_df <- stats::df.residual(mod)

  all_trt <- lapply(
    paste0(group, grp_levels[-1]),
    \(x) {
      list(
        est = stats::coef(mod)[x],
        se = sqrt(stats::vcov(mod)[x, x]),
        df = var_df
      )
    }
  )
  names(all_trt) <- paste0("trt_", grp_levels[-1])

  var_res <- list(
    var = list(
      est = var_est,
      # Reference: Davison, Statistical Methods, p. 371 (just above 8.3.2)
      se = var_est / sqrt(var_df / 2),
      df = var_df
    )
  )

  c(var_res, all_trt, all_lsm)
}

#' Get Visit Levels in Order Defined by Numeric Version
#'
#' @param visit_cat (`character`)\cr the categorical version.
#' @param visit_n (`numeric`)\cr the numeric version.
#'
#' @return The unique visit levels in the order defined by the numeric version.
#' @export
#'
#' @examples
#' get_visit_levels(
#'   visit_cat = c("Week 1", "Week 11", "Week 2"),
#'   visit_n = c(1, 5, 2)
#' )
get_visit_levels <- function(visit_cat, visit_n) {
  checkmate::assert_character(visit_cat)
  checkmate::assert_numeric(visit_n)

  nvar <- "n"
  visit_levels_df <- unique(data.frame(cat = visit_cat, n = visit_n))
  visit_levels_df <- visit_levels_df[order(visit_levels_df$n), ]
  visit_levels <- visit_levels_df$cat

  checkmate::assert_character(visit_levels, unique = TRUE)

  visit_levels
}

#' @name remove_col_count
#'
#' @title Removal of Unwanted Column Counts
#'
#' @description
#' Remove the N=xx column headers for specified span_label_var columns - default is 'rrisk_header
#' @details This works for only the lowest level of column splitting (since colcounts is used)
#' @param obj table tree object
#' @param span_label_var the spanning header text variable value for which column headers will be removed from
#'
#' @return table tree object with column counts in specified columns removed
#' @export
#'
remove_col_count <- function(obj, span_label_var = "rrisk_header") {
  ## programatically figure out which ones we want
  unwanted_count <- function(pth) pth[1] == span_label_var
  to_blank <- sapply(col_paths(obj), unwanted_count)
  col_counts(obj)[to_blank] <- NA_integer_
  return(obj)
}

#' Obtain Reference Information for a Global Reference Group
#'
#' This helper function can be used in custom analysis functions, by passing
#' an extra argument `ref_path` which defines a global reference group by
#' the corresponding column split hierarchy levels.
#'
#' @param ref_path (`character`)\cr reference group specification as an `rtables`
#'   `colpath`, see details.
#' @param .spl_context see [rtables::spl_context].
#' @param .var the variable being analyzed, see [rtables::additional_fun_params].
#'
#' @return A list with `ref_group` and `in_ref_col`, which can be used as
#'   `.ref_group` and `.in_ref_col` as if being directly passed to an analysis
#'   function by `rtables`, see [rtables::additional_fun_params].
#'
#' @details
#' The reference group is specified in `colpath` hierarchical fashion in `ref_path`:
#' The first column split variable is the first element, and the level to use is the
#' second element. It continues until the last column split variable with last
#' level to use.
#' Note that depending on `.var`, either a `data.frame` (if `.var` is `NULL`) or
#' a vector (otherwise) is returned. This allows usage for analysis functions with
#' `df` and `x` arguments, respectively.
#'
#' @export
#'
#' @examples
#' dm <- DM
#' dm$colspan_trt <- factor(
#'   ifelse(dm$ARM == "B: Placebo", " ", "Active Study Agent"),
#'   levels = c("Active Study Agent", " ")
#' )
#' colspan_trt_map <- create_colspan_map(
#'   dm,
#'   non_active_grp = "B: Placebo",
#'   non_active_grp_span_lbl = " ",
#'   active_grp_span_lbl = "Active Study Agent",
#'   colspan_var = "colspan_trt",
#'   trt_var = "ARM"
#' )
#'
#' standard_afun <- function(x, .ref_group, .in_ref_col) {
#'   in_rows(
#'     "Difference of Averages" = non_ref_rcell(
#'       mean(x) - mean(.ref_group),
#'       is_ref = .in_ref_col,
#'       format = "xx.xx"
#'     )
#'   )
#' }
#'
#' result_afun <- function(x, ref_path, .spl_context, .var) {
#'   ref <- get_ref_info(ref_path, .spl_context, .var)
#'   standard_afun(x, .ref_group = ref$ref_group, .in_ref_col = ref$in_ref_col)
#' }
#'
#' ref_path <- c("colspan_trt", " ", "ARM", "B: Placebo")
#'
#' lyt <- basic_table() |>
#'   split_cols_by(
#'     "colspan_trt",
#'     split_fun = trim_levels_to_map(map = colspan_trt_map)
#'   ) |>
#'   split_cols_by("ARM") |>
#'   analyze(
#'     "AGE",
#'     extra_args = list(ref_path = ref_path),
#'     afun = result_afun
#'   )
#'
#' build_table(lyt, dm)
get_ref_info <- function(ref_path, .spl_context, .var = NULL) {
  checkmate::check_character(ref_path, min.len = 2L, names = "unnamed")
  checkmate::assert_true(length(ref_path) %% 2 == 0) # Even number of elements in ref_path.
  leaf_spl_context <- .spl_context[nrow(.spl_context), ]
  full_df <- leaf_spl_context$full_parent_df[[1]]
  level_indices <- seq(from = 2L, to = length(ref_path), by = 2L)
  ref_group_string <- paste(ref_path[level_indices], collapse = ".")
  row_in_ref_group <- leaf_spl_context[[ref_group_string]][[1]]
  ref_group <- full_df[row_in_ref_group, ]
  if (!is.null(.var)) {
    ref_group <- ref_group[[.var]]
  }
  colvars_indices <- seq(from = 1L, to = length(ref_path) - 1L, by = 2L)
  checkmate::assert_true(identical(leaf_spl_context$cur_col_split[[1]], ref_path[colvars_indices]))
  in_ref_col <- identical(leaf_spl_context$cur_col_split_val[[1]], ref_path[level_indices])
  list(ref_group = ref_group, in_ref_col = in_ref_col)
}

1		## contribute to formatters
2		#' @name inches_to_spaces
3		#' @title Conversion of inches to spaces
4		#' @param ins numeric. Vector of widths in inches
5		#' @param fontspec font_spec. The font specification to use
6		#' @param raw logical(1). Should the answer be returned unrounded
7		#' (`TRUE`), or rounded to the nearest reasonable value (`FALSE`,
8		#' the default)
9		#' @param tol numeric(1). The numeric tolerance, values
10		#' between an integer `n`, and `n+tol` will be returned
11		#' as `n`, rather than `n+1`, if `raw == FALSE`. Ignored
12		#' when `raw` is `TRUE`.
13		#' @return the number of either fractional (`raw = TRUE`) or whole (`raw = FALSE`)
14		#' spaces that will fit within `ins` inches in the specified font
15		#' @export
16		inches_to_spaces <- function(ins, fontspec, raw = FALSE, tol = sqrt(.Machine$double.eps)) {
17	48x	newdev <- open_font_dev(fontspec)
18	48x	if (newdev) {
19	!	on.exit(close_font_dev())
20		}
21	48x	spcw <- grid::convertWidth(grid::unit(1, "strwidth", " "), "inches", valueOnly = TRUE)
22	48x	ans <- ins / spcw
23	48x	if (!raw) {
24	48x	win_tol <- ans - floor(ans) < tol
25	48x	ans[win_tol] <- floor(ans)
26	48x	ans[!win_tol] <- ceiling(ans)
27		}
28	48x	ans
29		}
30
31		#' @importFrom formatters wrap_string_ttype
32		ttype_wrap_vec <- function(vec, fontspec, width, wordbreak_ok = TRUE, ...) {
33	!	lapply(
34	!	vec,
35	!	wrap_string_ttype,
36	!	width_spc = width,
37	!	fontspec = fontspec,
38	!	wordbreak_ok = wordbreak_ok,
39		...
40		)
41		}
42
43		#' @name check_wrap_nobreak
44		#' @title Check Word Wrapping
45		#' @description Check a set of column widths for word-breaking wrap behavior
46		#' @param tt TableTree
47		#' @param colwidths numeric. Column widths (in numbers of spaces under `fontspec`)
48		#' @param fontspec font_spec.
49		#'
50		#' @return `TRUE` if the wrap is able to be done without breaking words,
51		#' `FALSE` if wordbreaking is required to apply `colwidths`
52		#' @rdname check_wrap_nobreak
53		#' @export
54		check_wrap_nobreak <- function(tt, colwidths, fontspec) {
55	!	newdev <- open_font_dev(fontspec)
56	!	if (newdev) {
57	!	on.exit(close_font_dev())
58		}
59	!	mpf <- matrix_form(tt, fontspec = fontspec)
60	!	strs <- mf_strings(mpf)
61	!	colok <- vapply(
62	!	seq_len(ncol(strs)),
63	!	function(i) {
64	!	res <- tryCatch(
65	!	ttype_wrap_vec(strs[, i, drop = TRUE], colwidths[i], fontspec = fontspec, wordbreak_ok = FALSE),
66	!	error = function(e) e
67		)
68	!	!methods::is(res, "error")
69		},
70	!	TRUE
71		)
72	!	all(colok)
73		}
74
75		#' Colwidths for all columns to be forced on one page
76		#'
77		#' @param tt TableTree object to calculate column widths for
78		#' @param fontspec Font specification object
79		#' @param col_gap Column gap in spaces
80		#' @param rowlabel_width Width of row labels in spaces
81		#' @param print_width_ins Print width in inches
82		#' @param landscape Whether the output is in landscape orientation
83		#' @param lastcol_gap Whether to include a gap after the last column
84		#' @keywords internal
85		smart_colwidths_1page <- function(
86		tt,
87		fontspec,
88		col_gap = 6L,
89		rowlabel_width = inches_to_spaces(2, fontspec),
90		print_width_ins = ifelse(landscape, 11, 8.5) - 2.12,
91		landscape = FALSE,
92		lastcol_gap = TRUE) {
93	!	total_cpp <- floor(inches_to_spaces(print_width_ins, fontspec = fontspec, raw = TRUE))
94
95	!	nc <- ncol(tt)
96	!	remain <- total_cpp - rowlabel_width - col_gap * (nc - !lastcol_gap)
97
98	!	c(rowlabel_width - col_gap, spread_integer(remain, nc))
99		}
100
101		spaces_to_inches <- function(spcs, fontspec) {
102	!	nchar_ttype(" ", fontspec, raw = TRUE) * spcs
103		}
104
105		no_cellwrap_colwidths <- function(tt, fontspec, col_gap = 4L, label_width_ins = 2) {
106	6x	if (is.null(col_gap)) {
107	!	col_gap <- 4L
108		}
109	6x	mpf <- matrix_form(tt, TRUE, FALSE, fontspec = fontspec, col_gap = col_gap)
110	6x	strmat <- mf_strings(mpf)
111	6x	label_width_max <- inches_to_spaces(label_width_ins, fontspec)
112
113	6x	nchar_mat <- nchar_ttype(strmat[-seq_len(mf_nlheader(mpf)), , drop = FALSE],
114	6x	fontspec = fontspec
115		)
116
117	6x	label_width <- min(
118	6x	label_width_max,
119	6x	max(nchar_mat[, 1, drop = TRUE])
120		)
121	6x	col_maxes <- apply(nchar_mat[, -1, drop = FALSE], 2, max)
122	6x	c(label_width, col_maxes + col_gap)
123		}
124
125		pack_into_lines2 <- function(strs, wrdwidths = nchar_ttype(strs), colwidth, fontspec) {
126	3980x	if (length(wrdwidths) == 0 \|\| (length(wrdwidths) == 1 && wrdwidths <= colwidth)) {
127	1265x	return(1)
128		}
129	2715x	newdev <- open_font_dev(fontspec)
130	2715x	if (newdev) {
131	!	on.exit(close_font_dev())
132		}
133
134	2715x	csums <- cumsum(wrdwidths + c(0, rep(1, length(wrdwidths) - 1)))
135	2715x	oneline_wdth <- sum(wrdwidths) + length(wrdwidths) - 1
136	2715x	if (colwidth >= ceiling(oneline_wdth)) {
137	1842x	return(1)
138		}
139	873x	lines <- 0
140	873x	wrdsused <- 0
141	873x	widthused <- 0
142	873x	index_seq <- seq_along(wrdwidths)
143	873x	totwrds <- length(wrdwidths)
144	873x	adj <- 0
145		## finite precision arithmetic is a dreamscape of infinite wonder
146	873x	tol <- sqrt(.Machine$double.eps)
147	873x	while (wrdsused < length(wrdwidths)) {
148	2080x	csums_i <- csums - widthused
149
150	2080x	fit <- which(index_seq > wrdsused & csums_i <= colwidth + tol)
151	2080x	widthused <- sum(widthused, wrdwidths[fit], length(fit))
152	2080x	wrdsused <- wrdsused + length(fit)
153	2080x	lines <- lines + 1
154		}
155	873x	lines
156		}
157
158		recursive_add_poss <- function(
159		wlst,
160		cur_lst,
161		ubnd_width,
162		lbnd_width,
163		ubnd_lines = calc_total_lns(wlst, fontspec = fontspec, colwidth = ubnd_width)$lines,
164		lbnd_lines = calc_total_lns(wlst, fontspec = fontspec, colwidth = lbnd_width)$lines,
165		fontspec) {
166	180x	if (ubnd_width <= lbnd_width + 1 + sqrt(.Machine$double.eps)) {
167	39x	return(cur_lst)
168		}
169	141x	curw <- floor((ubnd_width + lbnd_width) / 2)
170	141x	curlnsdf <- calc_total_lns(wlst, fontspec = fontspec, colwidth = curw)
171	141x	to_add <- list(curlnsdf)
172
173	141x	if (curlnsdf$lines != lbnd_lines) {
174	80x	to_add <- c(
175	80x	to_add,
176	80x	recursive_add_poss(
177	80x	wlst = wlst,
178	80x	fontspec = fontspec,
179	80x	ubnd_width = curw,
180	80x	ubnd_lines = curlnsdf$lines,
181	80x	lbnd_width = lbnd_width,
182	80x	lbnd_lines = lbnd_lines,
183	80x	cur_lst = list()
184		)
185		)
186		}
187	141x	if (curlnsdf$lines != ubnd_lines) {
188	80x	to_add <- c(
189	80x	to_add,
190	80x	recursive_add_poss(
191	80x	wlst = wlst,
192	80x	fontspec = fontspec,
193	80x	ubnd_width = ubnd_width,
194	80x	ubnd_lines = ubnd_lines,
195	80x	lbnd_width = curw,
196	80x	lbnd_lines = curlnsdf$lines,
197	80x	cur_lst = list()
198		)
199		)
200		}
201
202	141x	c(cur_lst, to_add)
203		}
204
205		calc_total_lns <- function(wlst, colwidth, fontspec, lns_per_pg = 50) {
206	221x	lns <- vapply(
207	221x	seq_along(wlst),
208	221x	function(i) {
209	3980x	pack_into_lines2(wrdwidths = wlst[[i]], colwidth = colwidth, fontspec = fontspec)
210		},
211	221x	1
212		)
213	221x	nonblnks <- !vapply(wlst, function(x) length(x) == 0, TRUE)
214	221x	celllns <- lns[-1]
215	221x	cllsum <- sum(celllns)
216	221x	hdr <- lns[1]
217	221x	data.frame(
218	221x	colwidth = colwidth,
219	221x	lines = cllsum + ceiling(cllsum / lns_per_pg) * hdr,
220	221x	cell_lines = cllsum,
221	221x	lbl_lines = hdr,
222	221x	min_cell_lines = min(1, celllns[nonblnks], na.rm = TRUE),
223	221x	max_cell_lines = max(celllns)
224		)
225		}
226
227
228		calc_poss_lines <- function(wlst, lbound, avail_spc, fontspec) {
229	20x	minposs <- calc_total_lns(wlst, lbound + avail_spc, fontspec)
230	20x	maxposs <- calc_total_lns(wlst, lbound, fontspec)
231
232	20x	retlst <- list(minposs, maxposs)
233
234	20x	retlst <- recursive_add_poss(
235	20x	wlst = wlst,
236	20x	ubnd_width = lbound + avail_spc,
237	20x	lbnd_width = lbound,
238	20x	cur_lst = retlst,
239	20x	fontspec = fontspec
240		)
241	20x	do.call(rbind, retlst)
242		}
243
244
245		make_poss_wdf <- function(
246		mpf,
247		incl_header = FALSE,
248		col_gap = 3,
249		pg_width_ins = 8.88,
250		fontspec = font_spec("Times", 9, 1.2)) {
251	2x	newdev <- open_font_dev(fontspec)
252	2x	if (newdev) {
253	!	on.exit(close_font_dev())
254		}
255	2x	if (!methods::is(mpf, "MatrixPrintForm")) {
256	2x	mpf <- matrix_form(mpf, fontspec = fontspec, col_gap = col_gap)
257		}
258	2x	strs <- mf_strings(mpf)
259	2x	if (!incl_header) {
260	!	strs <- strs[-seq_len(mf_nlheader(mpf)), ]
261		}
262	2x	nc <- ncol(strs)
263	2x	nr <- nrow(strs)
264		## strip out markup so we are not over countin (by as much)
265	2x	strs <- matrix(gsub("~[{[][[:alpha:]]+ ([^]}]+)[]}]", "\\1", strs), ncol = nc, nrow = nr)
266
267	2x	possrows <- lapply(seq_len(ncol(strs)), function(ii) {
268	20x	res <- one_col_strs(strs[, ii, drop = TRUE], fontspec = fontspec, col_gap = col_gap)
269	20x	res$col_num <- ii
270	20x	res
271		})
272
273	2x	possdf <- do.call(rbind, possrows)
274	2x	o <- order(possdf$col_num, possdf$colwidth)
275	2x	possdf <- possdf[o, ]
276	2x	possdf
277		}
278
279		#'
280		#' @param mpf (`listing_df` or `MatrixPrintForm` derived thereof)\cr The listing
281		#' calculate column widths for.
282		#' @param incl_header (`logical(1)`)\cr Should the constraint to not break up
283		#' individual words be extended to words in the column labels? Defaults to `TRUE`
284		#' @param col_gap (`numeric(1)`)\cr Amount of extra space (in spaces) to
285		#' assume between columns. Defaults to `0.5`
286		#' @param pg_width_ins (`numeric(1)`)\cr Number of inches in width for
287		#' the portion of the page the listing will be printed to. Defaults to `8.88`
288		#' which corresponds to landscape orientation on a standard page after margins.
289		#' @param fontspec (`font_spec`)\cr Defaults to Times New Roman 8pt font with 1.2 line
290		#' height.
291		#' @param verbose (`logical(1)`)\cr Should additional information messages be
292		#' displayed during the calculation of the column widths? Defaults to `FALSE`.
293		#' @returns A vector of column widths suitable to use in `tt_to_tlgrtf` and
294		#' other exporters.
295		#' @rdname def_colwidths
296		#' @export
297		listing_column_widths <- function(
298		mpf,
299		incl_header = TRUE,
300		col_gap = 0.5,
301		pg_width_ins = 8.88,
302		fontspec = font_spec("Times", 8, 1.2),
303		verbose = FALSE) {
304	2x	newdev <- open_font_dev(fontspec)
305	2x	if (newdev) {
306	!	on.exit(close_font_dev())
307		}
308	2x	possdf <- make_poss_wdf(
309	2x	mpf = mpf,
310	2x	incl_header = incl_header,
311	2x	col_gap = col_gap,
312	2x	fontspec = fontspec,
313	2x	pg_width_ins = pg_width_ins
314		)
315	2x	optimal <- optimal_widths(
316	2x	possdf = possdf,
317	2x	tot_spaces = inches_to_spaces(pg_width_ins, fontspec = fontspec),
318	2x	verbose = verbose
319		)
320	2x	optimal$colwidth
321		}
322
323		find_free_colspc <- function(curposs, fullposs, thresh = 0.99, skip = integer(), verbose = FALSE) {
324	!	orig_curposs <- curposs
325	!	maxlns_ind <- which.max(curposs$cell_lines)
326	!	longestcol <- curposs$col_num[maxlns_ind]
327	!	maxlns <- curposs$cell_lines[maxlns_ind]
328	!	adjrow_inds <- setdiff(which(curposs$cell_lines < thresh * maxlns), skip)
329	!	for (arowi in adjrow_inds) {
330	!	col <- curposs$col_num[arowi]
331	!	colwidthii <- curposs$colwidth[arowi]
332	!	fp_ind <- with(fullposs, min(which(col_num == col & cell_lines <= maxlns & colwidth < colwidthii)))
333	!	if (is.finite(fp_ind)) {
334	!	if (verbose) {
335	!	oldwdth <- curposs$colwidth[arowi]
336	!	newwdth <- fullposs$colwidth[fp_ind]
337	!	msg <- sprintf("adjusting column %d width from %d to %d", col, oldwdth, newwdth)
338	!	message(msg)
339		}
340	!	curposs[arowi, ] <- fullposs[fp_ind, ]
341		}
342		}
343	!	curposs
344		}
345
346		constrict_lbl_lns <- function(curdf, possdf, avail_spc, verbose = TRUE) {
347	2x	old_lbl_lns <- max(curdf$lbl_lines)
348	2x	cols_to_pack <- which(curdf$lbl_lns == old_lbl_lns)
349	2x	olddf <- curdf
350	2x	success <- TRUE
351	2x	for (ii in cols_to_pack) {
352	!	colii <- curdf$col_num[ii]
353	!	cwidthii <- curdf$colwidth[ii]
354	!	possdfii <- possdf[possdf$col_num == colii & possdf$lbl_lines < old_lbl_lns, ]
355	!	if (nrow(possdfii) == 0) {
356	!	success <- FALSE
357	!	break
358		}
359	!	newrow <- possdfii[ii, ]
360	!	if (newrow$colwidth - cwidthii > avail_spc) {
361	!	success <- FALSE
362	!	break
363		}
364		## assuming sorted
365	!	curdf[ii, ] <- newrow
366	!	avail_spc <- avail_spc + cwidthii - newrow$colwidth
367		}
368
369	2x	if (verbose) {
370	!	if (success) {
371	!	msg <- paste(
372	!	"overall number of label rows successfully reduced. Cols affected: ",
373	!	paste(curdf$col_num[cols_to_pack], collapse = ", ")
374		)
375		} else {
376	!	msg <- paste("Unable to reduce label rows required.")
377		}
378		}
379
380	2x	if (!success) {
381	!	curdf <- olddf
382		}
383	2x	curdf
384		}
385
386		optimal_widths <- function(possdf, tot_spaces = 320, max_lbl_lines = 3, verbose = FALSE) {
387	2x	odf <- order(possdf$col_num, possdf$colwidth)
388	2x	possdf <- possdf[odf, ]
389	2x	badlbl <- which(possdf$lbl_lines > max_lbl_lines)
390	2x	if (length(badlbl > 0)) {
391	2x	if (verbose) {
392	!	message("Excluding ", length(badlbl), " column widths for labels taking over ", max_lbl_lines, " lines.")
393		}
394	2x	possdf <- possdf[-badlbl, ]
395	2x	possdf <- possdf[possdf$lbl_lines <= max_lbl_lines, , drop = FALSE]
396		}
397	2x	full_possdf <- possdf
398		## already ordered by colnum then width so this the first of each colwidth is the min width for that col
399	2x	dups <- duplicated(possdf$col_num)
400	2x	curdf <- possdf[!dups, ]
401	2x	possdf <- possdf[dups, ] ## without rows for ones in curdf
402	2x	spcleft <- tot_spaces - sum(curdf$colwidth)
403	2x	if (verbose) {
404	!	message(
405	!	"Optimizng Column Widths\n",
406	!	"Initial lines required: ",
407	!	max(curdf$lines),
408	!	"\n",
409	!	"Available adjustment: ",
410	!	spcleft,
411	!	" spaces\n"
412		)
413		}
414	2x	done <- FALSE
415	2x	while (!done) {
416	2x	oldwdths <- curdf$colwidth
417	2x	curdf <- constrict_lbl_lns(curdf, possdf, verbose = verbose)
418	2x	if (all.equal(curdf$colwidth, oldwdths)) {
419	2x	done <- TRUE
420		}
421		}
422	2x	change <- TRUE
423	2x	while (spcleft > 0 && change && nrow(possdf) > 0) {
424	8x	change <- FALSE
425	8x	ii <- which.max(curdf$cell_lines)
426	8x	spcleft <- tot_spaces - sum(curdf$colwidth)
427	8x	colii <- curdf$col_num[ii]
428	8x	bef_lns <- curdf$cell_lines[ii]
429	8x	bef_width <- curdf$colwidth[ii]
430	8x	nextlns <- max(curdf$cell_lines[-ii])
431	8x	cand_row_cond <- possdf$col_num == colii & possdf$cell_lines < bef_lns & possdf$colwidth - bef_width <= spcleft
432	8x	canddf <- possdf[cand_row_cond, , drop = FALSE]
433	8x	if (nrow(canddf) > 0) {
434	6x	more_than_next <- canddf$cell_lines >= nextlns
435	6x	if (any(more_than_next)) {
436	3x	candrow <- canddf[max(which(more_than_next)), ]
437		} else {
438	3x	candrow <- canddf[nrow(canddf), ]
439		}
440
441	6x	if (verbose) {
442	!	message(
443	!	"COL ",
444	!	colii,
445	!	" width: ",
446	!	bef_width,
447		"->",
448	!	candrow$colwidth,
449	!	" lines req: ",
450	!	bef_lns,
451		"->",
452	!	candrow$cell_lines
453		)
454		}
455	6x	change <- TRUE
456	6x	curdf[ii, ] <- candrow
457		}
458		}
459	2x	curdf
460		}
461
462
463		one_col_strs <- function(strcol, col_gap = 2, fontspec) {
464	20x	strspls <- strsplit(strcol, split = "(-\| (?=[^/]))", perl = TRUE)
465	20x	strspl_widths <- lapply(strspls, nchar_ttype, fontspec = fontspec, raw = TRUE)
466	20x	lbound_raw <- max(unlist(strspl_widths))
467	20x	lbound <- ceiling(lbound_raw + 2 * col_gap)
468	20x	ret <- calc_poss_lines(strspl_widths, lbound, 50, fontspec = fontspec)
469	20x	ret
470		}
471
472		## we have permission from the formatters maintainer to use this
473		## unexported function
474		j_mf_col_widths <- utils::getFromNamespace("mf_col_widths", "formatters")
475
476		#' @name def_colwidths
477		#'
478		#' @title Define Column Widths
479		#'
480		#' @description
481		#' `def_colwidths` uses heuristics to determine suitable column widths given a
482		#' table or listing, and a font.
483		#'
484		#' @param tt input Tabletree
485		#' @param fontspec Font specification
486		#' @param label_width_ins Label Width in Inches.
487		#' @param col_gap Column gap in spaces. Defaults to `.5` for listings and `3`
488		#' for tables.
489		#' @param type Type of the table tree, used to determine column width calculation method.
490		#'
491		#' @details Listings are assumed to be rendered landscape on standard A1 paper,
492		#' such that all columns are rendered on one page. Tables are allowed to
493		#' be horizontally paginated, and column widths are determined based only on
494		#' required word wrapping. See the `Automatic Column Widths` vignette for
495		#' a detailed discussion of the algorithms used.
496		#' @return a vector of column widths (including the label row pseudo-column in the table
497		#' case) suitable for use rendering `tt` in the specified font.
498		#' @export
499		#'
500		def_colwidths <- function(tt,
501		fontspec,
502		label_width_ins = 2,
503		col_gap = ifelse(type == "Listing", .5, 3),
504		type = tlg_type(tt)) {
505	8x	if (type == "Figure") {
506	!	ret <- NULL
507	8x	} else if (type == "Table") {
508		if (
509	6x	is.list(tt) &&
510	6x	!methods::is(tt, "MatrixPrintForm") &&
511	6x	!is.null(j_mf_col_widths(tt[[1]]))
512		) {
513	!	ret <- j_mf_col_widths(tt[[1]])
514		} else {
515	6x	ret <- no_cellwrap_colwidths(tt, fontspec, col_gap = col_gap, label_width_ins = label_width_ins)
516		}
517		} else {
518	2x	ret <- listing_column_widths(tt, fontspec = fontspec, col_gap = col_gap)
519		}
520	8x	ret
521		}

1		#' Adding Labels To Variables For Model
2		#'
3		#' @param vars (`list`)\cr variables to use.
4		#' @param data (`data.frame`)\cr data to use.
5		#' @param x (`character`)\cr an element in vars.
6		#'
7		#' @name labels
8		#' @keywords internal
9		NULL
10
11		#' @describeIn labels checks if element in `vars` is not `NULL` and not empty.
12		h_is_specified <- function(x, vars) {
13	112x	!is.null(vars[[x]]) && (length(vars[[x]]) > 0)
14		}
15
16		#' @describeIn labels checks if element in vars is not NULL and exists in dataset.
17		h_is_specified_and_in_data <- function(x, vars, data) {
18	70x	h_is_specified(x, vars) && all(vars[[x]] %in% names(data))
19		}
20
21		#' @describeIn labels gets label for each element in vars.
22		h_check_and_get_label <- function(x, vars, data) {
23	70x	checkmate::assert_true(h_is_specified_and_in_data(x, vars, data))
24	70x	res <- NULL
25	70x	for (v in vars[[x]]) {
26	84x	label <- attr(data[[v]], "label")
27	84x	string <- ifelse(!is.null(label), label, v)
28	84x	res <- c(res, stats::setNames(string, v))
29		}
30	70x	res
31		}
32
33		#' Extraction of Covariate Parts from Character Vector
34		#'
35		#' @param covariates (`character`)\cr specification in the usual way, see examples.
36		#'
37		#' @return Character vector of the covariates involved in `covariates` specification.
38		#' @keywords internal
39		h_get_covariate_parts <- function(covariates) {
40	14x	checkmate::assert_character(covariates, null.ok = TRUE)
41	14x	if (is.null(covariates)) {
42	!	NULL
43		} else {
44	14x	unique(unlist(strsplit(covariates, split = "\\*\|:")))
45		}
46		}
47
48		#' @describeIn labels returns the list of variables with labels.
49		h_labels <- function(vars, data) {
50	14x	checkmate::assert_list(vars)
51	14x	checkmate::assert_data_frame(data)
52	14x	labels <- list()
53	14x	labels$response <- h_check_and_get_label("response", vars, data)
54	14x	labels$id <- h_check_and_get_label("id", vars, data)
55	14x	labels$visit <- h_check_and_get_label("visit", vars, data)
56	14x	if (h_is_specified("arm", vars)) {
57	14x	labels$arm <- h_check_and_get_label("arm", vars, data)
58		}
59	14x	if (h_is_specified("covariates", vars)) {
60	14x	vars$parts <- h_get_covariate_parts(vars$covariates)
61	14x	labels$parts <- h_check_and_get_label("parts", vars, data)
62		}
63	14x	if (h_is_specified("weights", vars)) {
64	!	labels$weights <- h_check_and_get_label("weights", vars, data)
65		}
66	14x	return(labels)
67		}
68
69		#' Building Model Formula
70		#'
71		#' This builds the model formula which is used inside [fit_mmrm_j()] and provided
72		#' to [mmrm::mmrm()] internally. It can be instructive to look at the resulting
73		#' formula directly sometimes.
74		#'
75		#' @param vars (`list`)\cr variables to use in the model.
76		#' @param cor_struct (`string`)\cr specify the covariance structure to use.
77		#' @return Formula to use in [mmrm::mmrm()].
78		#' @export
79		#'
80		#' @examples
81		#' vars <- list(
82		#' response = "AVAL", covariates = c("RACE", "SEX"),
83		#' id = "USUBJID", arm = "ARMCD", visit = "AVISIT"
84		#' )
85		#' build_formula(vars, "auto-regressive")
86		#' build_formula(vars)
87		build_formula <- function(
88		vars,
89		cor_struct = c(
90		"unstructured",
91		"toeplitz",
92		"heterogeneous toeplitz",
93		"ante-dependence",
94		"heterogeneous ante-dependence",
95		"auto-regressive",
96		"heterogeneous auto-regressive",
97		"compound symmetry",
98		"heterogeneous compound symmetry"
99		)) {
100	9x	checkmate::assert_list(vars)
101	9x	cor_struct <- match.arg(cor_struct)
102	9x	covariates_part <- paste(vars$covariates, collapse = " + ")
103	9x	arm_visit_part <- if (is.null(vars$arm)) {
104	!	vars$visit
105		} else {
106	9x	paste0(vars$arm, "*", vars$visit)
107		}
108	9x	random_effects_fun <- switch(cor_struct,
109	9x	unstructured = "us",
110	9x	toeplitz = "toep",
111	9x	`heterogeneous toeplitz` = "toeph",
112	9x	`ante-dependence` = "ad",
113	9x	`heterogeneous ante-dependence` = "adh",
114	9x	`auto-regressive` = "ar1",
115	9x	`heterogeneous auto-regressive` = "ar1h",
116	9x	`compound symmetry` = "cs",
117	9x	`heterogeneous compound symmetry` = "csh"
118		)
119	9x	random_effects_part <- paste0(random_effects_fun, "(", vars$visit, " \| ", vars$id, ")")
120	9x	rhs_formula <- paste(arm_visit_part, "+", random_effects_part)
121	9x	if (covariates_part != "") {
122	9x	rhs_formula <- paste(covariates_part, "+", rhs_formula)
123		}
124	9x	stats::as.formula(paste(vars$response, "~", rhs_formula))
125		}
126
127		#' Extract Least Square Means from `MMRM`
128		#'
129		#' Extracts the least square means from an `MMRM` fit.
130		#'
131		#' @param fit (`mmrm`)\cr result of [mmrm::mmrm()].
132		#' @inheritParams fit_mmrm_j
133		#' @param averages (`list`)\cr named list of visit levels which should be averaged
134		#' and reported along side the single visits.
135		#' @param weights (`string`)\cr type of weights to be used for the least square means,
136		#' see [emmeans::emmeans()] for details.
137		#' @return A list with data frames `estimates` and `contrasts`.
138		#' The attributes `averages` and `weights` save the settings used.
139		#'
140		#' @export
141		get_mmrm_lsmeans <- function(fit, vars, conf_level, weights, averages = list()) {
142	10x	checkmate::assert_class(fit, "mmrm")
143	10x	checkmate::assert_list(averages, types = "character")
144	10x	emmeans_res <- h_get_emmeans_res(fit, vars, weights)
145
146		# Get least square means estimates for single visits, and possibly averaged visits.
147	10x	estimates <- h_get_single_visit_estimates(emmeans_res, conf_level)
148	10x	if (length(averages)) {
149	4x	average_specs <- h_get_average_visit_specs(emmeans_res, vars, averages, fit)
150	4x	average_estimates <- h_get_spec_visit_estimates(emmeans_res, average_specs, conf_level)
151	4x	estimates <- rbind(estimates, average_estimates)
152		}
153	10x	has_arm_var <- !is.null(vars$arm)
154	10x	if (!has_arm_var) {
155	!	return(list(estimates = estimates))
156		}
157		# Continue with contrasts when we have an arm variable.
158	10x	contrast_specs <- h_single_visit_contrast_specs(emmeans_res, vars)
159	10x	contrast_estimates <- h_get_spec_visit_estimates(emmeans_res, contrast_specs, conf_level, tests = TRUE)
160	10x	if (length(averages)) {
161	4x	average_contrast_specs <- h_average_visit_contrast_specs(contrast_specs, averages)
162	4x	average_contrasts <- h_get_spec_visit_estimates(emmeans_res, average_contrast_specs, conf_level, tests = TRUE)
163	4x	contrast_estimates <- rbind(contrast_estimates, average_contrasts)
164		}
165
166	10x	relative_reduc_df <- h_get_relative_reduc_df(estimates, vars)
167	10x	contrast_estimates <- merge(contrast_estimates, relative_reduc_df, by = c(vars$arm, vars$visit), sort = FALSE)
168	10x	contrast_estimates[[vars$arm]] <- factor(contrast_estimates[[vars$arm]])
169	10x	contrast_estimates[[vars$visit]] <- factor(contrast_estimates[[vars$visit]])
170	10x	structure(list(estimates = estimates, contrasts = contrast_estimates), averages = averages, weights = weights)
171		}
172
173		#' `MMRM` Analysis
174		#'
175		#' Does the `MMRM` analysis. Multiple other functions can be called on the result to produce
176		#' tables and graphs.
177		#'
178		#' @param vars (named `list` of `string` or `character`)\cr specifying the variables in the `MMRM`.
179		#' The following elements need to be included as character vectors and match corresponding columns
180		#' in `data`:
181		#'
182		#' - `response`: the response variable.
183		#' - `covariates`: the additional covariate terms (might also include interactions).
184		#' - `id`: the subject ID variable.
185		#' - `arm`: the treatment group variable (factor).
186		#' - `visit`: the visit variable (factor).
187		#' - `weights`: optional weights variable (if `NULL` or omitted then no weights will be used).
188		#'
189		#' Note that the main effects and interaction of `arm` and `visit` are by default
190		#' included in the model.
191		#' @param data (`data.frame`)\cr with all the variables specified in
192		#' `vars`. Records with missing values in any independent variables
193		#' will be excluded.
194		#' @param conf_level (`proportion`)\cr confidence level of the interval.
195		#' @param cor_struct (`string`)\cr specifying the covariance structure, defaults to
196		#' `'unstructured'`. See the details.
197		#' @param averages_emmeans (`list`)\cr optional named list of visit levels which should be averaged
198		#' and reported along side the single visits.
199		#' @param weights_emmeans (`string`)\cr argument from [emmeans::emmeans()], `'counterfactual'` by default.
200		#' @param ... additional arguments for [mmrm::mmrm()], in particular `reml` and options listed in
201		#' [mmrm::mmrm_control()].
202		#'
203		#' @details Multiple different degree of freedom adjustments are available via the `method` argument
204		#' for [mmrm::mmrm()]. In addition, covariance matrix adjustments are available via `vcov`.
205		#' Please see [mmrm::mmrm_control()] for details and additional useful options.
206		#'
207		#' For the covariance structure (`cor_struct`), the user can choose among the following options.
208		#'
209		#' - `unstructured`: Unstructured covariance matrix. This is the most flexible choice and default.
210		#' If there are `T` visits, then `T * (T+1) / 2` variance parameters are used.
211		#' - `toeplitz`: Homogeneous Toeplitz covariance matrix, which uses `T` variance parameters.
212		#' - `heterogeneous toeplitz`: Heterogeneous Toeplitz covariance matrix,
213		#' which uses `2 * T - 1` variance parameters.
214		#' - `ante-dependence`: Homogeneous Ante-Dependence covariance matrix, which uses `T` variance parameters.
215		#' - `heterogeneous ante-dependence`: Heterogeneous Ante-Dependence covariance matrix,
216		#' which uses `2 * T - 1` variance parameters.
217		#' - `auto-regressive`: Homogeneous Auto-Regressive (order 1) covariance matrix,
218		#' which uses 2 variance parameters.
219		#' - `heterogeneous auto-regressive`: Heterogeneous Auto-Regressive (order 1) covariance matrix,
220		#' which uses `T + 1` variance parameters.
221		#' - `compound symmetry`: Homogeneous Compound Symmetry covariance matrix, which uses 2
222		#' variance parameters.
223		#' - `heterogeneous compound symmetry`: Heterogeneous Compound Symmetry covariance matrix, which uses
224		#' `T + 1` variance parameters.
225		#'
226		#' @return A `tern_model` object which is a list with model results:
227		#'
228		#' - `fit`: The `mmrm` object which was fitted to the data. Note that via `mmrm::component(fit, 'optimizer')`
229		#' the finally used optimization algorithm can be obtained, which can be useful for refitting the model
230		#' later on.
231		#' - `cov_estimate`: The matrix with the covariance matrix estimate.
232		#' - `diagnostics`: A list with model diagnostic statistics (REML criterion, AIC, corrected AIC, BIC).
233		#' - `lsmeans`: This is a list with data frames `estimates` and `contrasts`.
234		#' The attributes `averages` and `weights` save the settings used
235		#' (`averages_emmeans` and `weights_emmeans`).
236		#' - `vars`: The variable list.
237		#' - `labels`: Corresponding list with variable labels extracted from `data`.
238		#' - `cor_struct`: input.
239		#' - `ref_level`: The reference level for the arm variable, which is always the first level.
240		#' - `treatment_levels`: The treatment levels for the arm variable.
241		#' - `conf_level`: The confidence level which was used to construct the `lsmeans` confidence intervals.
242		#' - `additional`: List with any additional inputs passed via `...`
243		#'
244		#' @export
245		#'
246		#' @note This function has the `_j` suffix to distinguish it from [mmrm::fit_mmrm()].
247		#' It is a copy from the `tern.mmrm` package and later will be replaced by tern.mmrm::fit_mmrm().
248		#' No new features are included in this function here.
249		#'
250		#' @examples
251		#' mmrm_results <- fit_mmrm_j(
252		#' vars = list(
253		#' response = "FEV1",
254		#' covariates = c("RACE", "SEX"),
255		#' id = "USUBJID",
256		#' arm = "ARMCD",
257		#' visit = "AVISIT"
258		#' ),
259		#' data = mmrm::fev_data,
260		#' cor_struct = "unstructured",
261		#' weights_emmeans = "equal",
262		#' averages_emmeans = list(
263		#' "VIS1+2" = c("VIS1", "VIS2")
264		#' )
265		#' )
266		fit_mmrm_j <- function(
267		vars = list(response = "AVAL", covariates = c(), id = "USUBJID", arm = "ARM", visit = "AVISIT"),
268		data,
269		conf_level = 0.95,
270		cor_struct = "unstructured",
271		weights_emmeans = "counterfactual",
272		averages_emmeans = list(),
273		...) {
274	9x	labels <- h_labels(vars, data)
275	9x	formula <- build_formula(vars, cor_struct)
276	9x	weights <- if (!is.null(vars$weights)) data[[vars$weights]] else NULL
277
278	9x	fit <- mmrm::mmrm(formula = formula, data = data, weights = weights, reml = TRUE, ...)
279	9x	lsmeans <- get_mmrm_lsmeans(
280	9x	fit = fit,
281	9x	vars = vars,
282	9x	conf_level = conf_level,
283	9x	averages = averages_emmeans,
284	9x	weights = weights_emmeans
285		)
286	9x	cov_estimate <- mmrm::VarCorr(fit)
287	9x	visit_levels <- rownames(cov_estimate)
288	9x	contrasts_from_visits <- match(visit_levels, lsmeans$contrasts[[vars$visit]])
289	9x	df <- stats::setNames(lsmeans$contrasts$df[contrasts_from_visits], visit_levels)
290	9x	results <- list(
291	9x	fit = fit,
292	9x	cov_estimate = cov_estimate,
293	9x	lsmeans = lsmeans,
294	9x	vars = vars,
295	9x	labels = labels,
296	9x	mse = diag(cov_estimate),
297	9x	df = df,
298	9x	cor_struct = cor_struct,
299	9x	ref_level = if (is.null(vars$arm)) NULL else levels(data[[vars$arm]])[1],
300	9x	treatment_levels = if (is.null(vars$arm)) NULL else levels(data[[vars$arm]])[-1],
301	9x	conf_level = conf_level,
302	9x	additional = list(...)
303		)
304	9x	class(results) <- "tern_model"
305	9x	return(results)
306		}

1		s_summarize_desc_j <- function(df, .var, .ref_group, .in_ref_col, control = control_analyze_vars()) {
2	5x	x <- df[[.var]]
3	5x	y1 <- s_summary(x)
4	5x	y2 <- NULL
5
6		# diff in means versus control group, based upon 2 sample t.test
7	5x	y2$mean_diffci <- numeric()
8	5x	if (!is.null(.ref_group) && !.in_ref_col) {
9	3x	x1 <- df[[.var]]
10	3x	x2 <- .ref_group[[.var]]
11
12	3x	x1 <- x1[!is.na(x1)]
13	3x	x2 <- x2[!is.na(x2)]
14
15	3x	if ((length(x1) > 1 && length(x2) > 1)) {
16	3x	ttest_stat <- stats::t.test(x1, x2, conf.level = control$conf_level)
17
18	3x	stat <- ttest_stat[c("estimate", "conf.int")]
19	3x	stat$diff <- stat$estimate[1] - stat$estimate[2]
20	3x	stat <- c(stat$diff, stat$conf.int)
21
22	3x	y2$mean_diffci <- with_label(
23	3x	c(mean_diffci = stat),
24	3x	paste("Difference in Mean + ", f_conf_level(control$conf_level))
25		)
26		} else {
27	!	y2b <- s_summary(.ref_group[[.var]])
28	!	diff <- y1[["mean"]] - y2b[["mean"]]
29	!	stat <- c(diff, NA, NA)
30
31	!	y2$mean_diffci <- with_label(
32	!	c(mean_diffci = stat),
33	!	paste("Difference in Mean + ", f_conf_level(control$conf_level))
34		)
35		}
36		}
37	5x	y <- c(y1, y2)
38
39	5x	return(y)
40		}
41
42		s_aval_chg_col1 <- function(df, .var, denom, .N_col, id, indatavar) {
43		## First column AVAL - show n/N (%)
44	4x	mystat <- "n"
45
46	4x	if (!is.null(indatavar)) {
47	!	df <- subset(df, !is.na(df[[indatavar]]))
48		}
49
50	4x	x <- df[[.var]]
51
52	4x	x_stats <- s_summary(x)
53	4x	x_stats <- x_stats[mystat]
54
55		### Ndenom derivation in case denom = N
56	4x	Ndenom <- .N_col
57	4x	if (denom == "N") {
58		### as our input dataset has ensured we have unique subjects we can just use the length of x here still safer
59		### to use id variable
60	4x	nsub <- length(unique(df[[id]]))
61	4x	Ndenom <- nsub
62		}
63
64	4x	count_denom_frac <- c(x_stats$n, Ndenom, x_stats$n / Ndenom)
65	4x	names(count_denom_frac) <- c("n", "N", "fraction")
66
67	4x	count_frac <- count_denom_frac[c("n", "fraction")]
68	4x	count <- count_denom_frac[c("n")]
69
70	4x	y <- list()
71	4x	y$count_denom_frac <- count_denom_frac
72	4x	y$count_frac <- count_frac
73	4x	y$count <- count
74
75	4x	return(y)
76		}
77
78		s_aval_chg_col23_diff <- function(
79		df,
80		.var,
81		.df_row,
82		.ref_group,
83		.in_ref_col,
84		ancova,
85		interaction_y,
86		interaction_item,
87		conf_level,
88		variables,
89		trt_var,
90		ctrl_grp,
91		cur_param,
92		cur_lvl) {
93	10x	.df_row <- subset(.df_row, !is.na(.df_row[[.var]]))
94	10x	df <- subset(df, !is.na(df[[.var]]))
95	10x	.ref_group <- subset(.ref_group, !is.na(.ref_group[[.var]]))
96
97	10x	if (nrow(.df_row) == 0) {
98		#### this is only when input row no non-missing records in any of the columns expected to occur for baseline
99		#### timepoint for analysis variable change only here we want a blank cell, not a cell with all NA's NULL is
100		#### generating a blank cell
101	!	x_stats <- NULL
102	!	mystat1 <- c("mean_ci_3d", "mean_diffci")
103	10x	} else if (!ancova) {
104	5x	mystat1 <- c("mean_ci_3d", "mean_diffci")
105
106	5x	control <- control_analyze_vars()
107	5x	control$conf_level <- conf_level
108	5x	x_stats <- s_summarize_desc_j(
109	5x	df = df,
110	5x	.var = .var,
111	5x	.ref_group = .ref_group,
112	5x	.in_ref_col = .in_ref_col,
113	5x	control = control
114		)
115		} else {
116	5x	mystat1 <- c("lsmean_ci", "lsmean_diffci")
117
118		### sparse data problems with underlying ancova function 1/ if nrow(.df_row) = 0 NULL (blank columns)
119
120		### 2/ if nrow(df) = 0 no ancova - lsmean and lsmean diff should be na
121
122		### 3/ if nrow(df) > 0, & nrow(.ref_group) = 0 ancova for ls mean only, NULL .ref_group -- lsmean diff should
123		### be na
124
125		### 4/ if nrow(df) > 0, & nrow(.ref_group) > 0 and at least one group with 0 data update levels in
126		### .df_row/df/.ref_group to avoid problems for contrast
127
128		#### by making updates to .df_row/df/.ref_group, situation 3 and 4 could be used together with general case
129
130	5x	.df_row_trtlevels <- unique(.df_row[[trt_var]])
131
132	5x	if (NROW(.df_row_trtlevels) == 0) {
133		### No data at all
134	!	x_stats <- NULL
135	5x	} else if (NROW(df) == 0 \|\| NROW(.df_row_trtlevels) == 1) {
136		### current column no data/less than 2 treatment levels
137
138	!	x_stats <- list()
139	!	x_stats[[mystat1[1]]] <- rep(NA, 3)
140	!	x_stats[[mystat1[2]]] <- rep(NA, 3)
141		} else {
142		### ancova situation, and some updates to prevent problems with sparse data
143	5x	if ((!ctrl_grp %in% .df_row_trtlevels)) {
144		# LS means for current group can be estimated from model, but not difference in LS means set .ref_group
145		# to NULL to proceed with s_summarize_ancova_j function update to underlying s_ancova_j to cover NULL
146		# ref_group in call
147	!	.ref_group <- NULL
148		}
149	5x	if (NROW(.df_row_trtlevels) < length(levels(.df_row[[trt_var]]))) {
150		# missing levels need to be removed from the factor, in order to have the correct estimates, and avoid
151		# errors with underlying tern:::s_ancova function
152	!	.df_row[[trt_var]] <- droplevels(.df_row[[trt_var]])
153	!	df[[trt_var]] <- factor(as.character(df[[trt_var]]), levels = levels(.df_row[[trt_var]]))
154	!	.ref_group[[trt_var]] <- factor(as.character(.ref_group[[trt_var]]), levels = levels(.df_row[[trt_var]]))
155		}
156
157	5x	x_stats <- s_summarize_ancova_j(
158	5x	df = df,
159	5x	.var = .var,
160	5x	.ref_group = .ref_group,
161	5x	.in_ref_col = .in_ref_col,
162	5x	.df_row = .df_row,
163	5x	conf_level = conf_level,
164	5x	interaction_y = interaction_y,
165	5x	interaction_item = interaction_item,
166	5x	variables = variables
167		)
168		}
169		}
170
171	10x	y <- list(mean_ci_3d = x_stats[[mystat1[1]]], meandiff_ci_3d = x_stats[[mystat1[2]]])
172	10x	return(y)
173		}
174
175
176		xxd_to_xx <- function(str, d = 0) {
177	10x	checkmate::assert_integerish(d, null.ok = TRUE)
178	10x	if (checkmate::test_list(str, null.ok = FALSE)) {
179	!	checkmate::assert_list(str, null.ok = FALSE)
180		# Or it may be a vector of characters
181		} else {
182	10x	checkmate::assert_character(str, null.ok = FALSE)
183		}
184
185	10x	nmstr <- names(str)
186
187	10x	if (any(grepl("xx.d", str, fixed = TRUE))) {
188	10x	checkmate::assert_integerish(d)
189	10x	str <- gsub("xx.d", paste0("xx.", strrep("x", times = d)), str, fixed = TRUE)
190		}
191	10x	str <- stats::setNames(str, nmstr)
192	10x	return(str)
193		}
194
195		format_xxd <- function(str, d = 0, .df_row, formatting_fun = NULL) {
196		# Handling of data precision
197	10x	if (!is.numeric(d)) {
198	!	if (is.character(d) && length(d) == 1) {
199		# check if d is a variable name available in .df_row
200	!	if (d %in% names(.df_row)) {
201	!	d <- max(.df_row[[d]], na.rm = TRUE)
202		} else {
203	!	message(paste("precision has been reset to d = 0, as variable", d, "not present on input"))
204	!	d <- 0
205		}
206		}
207		}
208		# convert xxd type of string to xx
209	10x	fmt <- xxd_to_xx(str = str, d = d)
210
211	10x	if (!is.null(formatting_fun)) {
212	10x	fmt <- formatting_fun(fmt)
213		}
214
215	10x	return(fmt)
216		}
217
218		#' @name a_summarize_aval_chg_diff_j
219		#'
220		#' @title Analysis function 3-column presentation
221		#'
222		#' @inherit proposal_argument_convention
223		#'
224		#' @description Analysis functions to produce a 1-row summary presented in
225		#' a 3-column layout in the columns: column 1: N, column 2: Value, column 3: change\cr
226		#' In the difference columns, only 1 column will be presented : difference + CI\cr
227		#' When ancova = `TRUE`, the presented statistics will be based on ANCOVA method (`s_summarize_ancova_j`).\cr
228		#' mean and ci (both for Value (column 2) and Chg (column 3)) using statistic `lsmean_ci`\cr
229		#' mean and ci for the difference column are based on same ANCOVA model using statistic `lsmean_diffci`\cr
230		#' When ancova = `FALSE`, descriptive statistics will be used instead.\cr
231		#' In the difference column, the 2-sample t-test will be used.
232		#'
233		#' @details See Description
234		#'
235		#' @inheritParams proposal_argument_convention
236		#' @param denom (`string`)\cr choice of denominator for proportions. Options are:
237		#' * `N`: number of records in this column/row split.
238		#' \cr There is no check in place that the current split only has one record per subject.
239		#' Users should be careful with this.
240		#' * `.N_col`: number of records in this column intersection (based on alt_counts_df dataset)
241		#' \cr (when alt_counts_df is a single record per subjects, this will match number of subjects)
242		#'
243		#' @param d (default = 1) \cr choice of Decimal precision.
244		#' Note that one extra precision will be added, as means are presented.
245		#' \cr Options are:
246		#' * numerical(1)
247		#' * variable name containing information on the precision, this variable
248		#' should be available on input dataset. The content of this variable should
249		#' then be an integer.
250		#'
251		#' @param ancova (`logical`)\cr If FALSE, only descriptive methods will be used. \cr
252		#' If TRUE Ancova methods will be used for each of the columns : AVAL, CHG, DIFF. \cr
253		#' @param comp_btw_group (`logical`)\cr If TRUE,
254		#' \cr When ancova = FALSE, the estimate of between group difference (on CHG)
255		#' will be based upon a two-sample t-test. \cr
256		#' \cr When ancova = TRUE, the same ancova model will be used for the estimate of between group difference (on CHG).
257		#'
258		#' @param interaction_y (`character`)\cr Will be passed onto the `tern` function `s_ancova`, when ancova = TRUE.
259		#' @param interaction_item (`character`)\cr Will be passed onto the `tern` function `s_ancova`, when ancova = TRUE.
260		#' @param conf_level (`proportion`)\cr Confidence level of the interval
261		#' @param variables (named list of strings)\cr
262		#' list of additional analysis variables, with expected elements:
263		#' * arm (string)\cr
264		#' group variable, for which the covariate adjusted means of multiple groups will be summarized.
265		#' Specifically, the first level of arm variable is taken as the reference group.
266		#' * covariates (character)\cr
267		#' a vector that can contain single variable names (such as 'X1'), and/or interaction terms indicated by 'X1 * X2'.
268		#'
269		#'
270		#' @param format_na_str (`string`)\cr
271		#'
272		#' @param indatavar (`string`)\cr If not null, variable name to extra subset
273		#' incoming df to non-missing values of this variable.
274		#' @param multivars (`string(3)`)\cr Variables names to use in 3-col layout.
275		#'
276		#' @param .stats (named `list`)\cr column statistics to select for the table.
277		#' The following column names are to be used: `col1`, `col23`, `coldiff`.\cr
278		#' For `col1`, the following stats can be specified.\cr
279		#' For `col23`, only `mean_ci_3d` is available. When ancova=`TRUE` these are LS Means, otherwise, arithmetic means.\cr
280		#' For `coldiff`, only `meandiff_ci_3d` is available. When ancova=`TRUE` these
281		#' are LS difference in means, otherwise, difference in means based upon 2-sample t-test.\cr
282		#' @param .formats (named `list`)\cr formats for the column statistics. `xx.d` style formats can be used.
283		#' @param .formats_fun (named `list`)\cr formatting functions for the column
284		#' statistics, to be applied after the conversion of `xx.d` style to the
285		#' appropriate precision.
286		#'
287		#' @rdname a_summarize_aval_chg_diff_j
288		#' @return A function that can be used in an analyze function call
289		#' @export
290		#'
291		#'
292		#' @examples
293		#'
294		#' library(dplyr)
295		#'
296		#' ADEG <- data.frame(
297		#' STUDYID = c(
298		#' "DUMMY", "DUMMY", "DUMMY", "DUMMY", "DUMMY",
299		#' "DUMMY", "DUMMY", "DUMMY", "DUMMY", "DUMMY"
300		#' ),
301		#' USUBJID = c(
302		#' "XXXXX01", "XXXXX02", "XXXXX03", "XXXXX04", "XXXXX05",
303		#' "XXXXX06", "XXXXX07", "XXXXX08", "XXXXX09", "XXXXX10"
304		#' ),
305		#' TRT01A = c(
306		#' "ARMA", "ARMA", "ARMA", "ARMA", "ARMA", "Placebo",
307		#' "Placebo", "Placebo", "ARMA", "ARMA"
308		#' ),
309		#' PARAM = c("BP", "BP", "BP", "BP", "BP", "BP", "BP", "BP", "BP", "BP"),
310		#' AVISIT = c(
311		#' "Visit 1", "Visit 1", "Visit 1", "Visit 1", "Visit 1",
312		#' "Visit 1", "Visit 1", "Visit 1", "Visit 1", "Visit 1"
313		#' ),
314		#' AVAL = c(56, 78, 67, 87, 88, 93, 39, 87, 65, 55),
315		#' CHG = c(2, 3, -1, 9, -2, 0, 6, -2, 5, 2)
316		#' )
317		#'
318		#' ADEG <- ADEG \|>
319		#' mutate(
320		#' TRT01A = as.factor(TRT01A),
321		#' STUDYID = as.factor(STUDYID)
322		#' )
323		#'
324		#' ADEG$colspan_trt <- factor(ifelse(ADEG$TRT01A == "Placebo", " ", "Active Study Agent"),
325		#' levels = c("Active Study Agent", " ")
326		#' )
327		#' ADEG$rrisk_header <- "Risk Difference (%) (95% CI)"
328		#' ADEG$rrisk_label <- paste(ADEG$TRT01A, paste("vs", "Placebo"))
329		#'
330		#' colspan_trt_map <- create_colspan_map(ADEG,
331		#' non_active_grp = "Placebo",
332		#' non_active_grp_span_lbl = " ",
333		#' active_grp_span_lbl = "Active Study Agent",
334		#' colspan_var = "colspan_trt",
335		#' trt_var = "TRT01A"
336		#' )
337		#' ref_path <- c("colspan_trt", " ", "TRT01A", "Placebo")
338		#'
339		#' lyt <- basic_table() \|>
340		#' split_cols_by(
341		#' "colspan_trt",
342		#' split_fun = trim_levels_to_map(map = colspan_trt_map)
343		#' ) \|>
344		#' split_cols_by("TRT01A") \|>
345		#' split_rows_by(
346		#' "PARAM",
347		#' label_pos = "topleft",
348		#' split_label = "Blood Pressure",
349		#' section_div = " ",
350		#' split_fun = drop_split_levels
351		#' ) \|>
352		#' split_rows_by(
353		#' "AVISIT",
354		#' label_pos = "topleft",
355		#' split_label = "Study Visit",
356		#' split_fun = drop_split_levels,
357		#' child_labels = "hidden"
358		#' ) \|>
359		#' split_cols_by_multivar(
360		#' c("AVAL", "AVAL", "CHG"),
361		#' varlabels = c("n/N (%)", "Mean (CI)", "CFB (CI)")
362		#' ) \|>
363		#' split_cols_by("rrisk_header", nested = FALSE) \|>
364		#' split_cols_by(
365		#' "TRT01A",
366		#' split_fun = remove_split_levels("Placebo"),
367		#' labels_var = "rrisk_label"
368		#' ) \|>
369		#' split_cols_by_multivar(c("CHG"), varlabels = c(" ")) \|>
370		#' analyze("STUDYID",
371		#' afun = a_summarize_aval_chg_diff_j,
372		#' extra_args = list(
373		#' format_na_str = "-", d = 0,
374		#' ref_path = ref_path, variables = list(arm = "TRT01A", covariates = NULL)
375		#' )
376		#' )
377		#'
378		#' result <- build_table(lyt, ADEG)
379		#'
380		#' result
381		#' @seealso s_summarize_ancova_j
382		#' @family Inclusion of ANCOVA Functions
383		a_summarize_aval_chg_diff_j <- function(
384		df,
385		.df_row,
386		.spl_context,
387		ancova = FALSE,
388		comp_btw_group = TRUE,
389		ref_path = NULL,
390		.N_col,
391		denom = c("N", ".N_col"),
392		indatavar = NULL,
393		d = 0,
394		id = "USUBJID",
395		interaction_y = FALSE,
396		interaction_item = NULL,
397		conf_level = 0.95,
398		variables = list(arm = "TRT01A", covariates = NULL),
399		format_na_str = "",
400		.stats = list(col1 = "count_denom_frac", col23 = "mean_ci_3d", coldiff = "meandiff_ci_3d"),
401		.formats = list(col1 = NULL, col23 = "xx.dx (xx.dx, xx.dx)", coldiff = "xx.dx (xx.dx, xx.dx)"),
402		.formats_fun = list(col1 = jjcsformat_count_denom_fraction, col23 = jjcsformat_xx, coldiff = jjcsformat_xx),
403		multivars = c("AVAL", "AVAL", "CHG")) {
404	14x	denom <- match.arg(denom)
405
406	14x	if (comp_btw_group && is.null(ref_path)) {
407	!	stop("ref_path cannot be NULL, please specify it. See ?get_ref_info for details.")
408		}
409
410	14x	if (!(length(multivars) == 3)) {
411	!	stop("argument multivars must be of length 3.")
412		}
413
414	14x	if (!(all(multivars %in% names(df)))) {
415	!	stop("all variables specified in multivars must be available on input dataset df.")
416		}
417
418	14x	if (multivars[1] == multivars[2]) {
419	14x	multivars[2] <- paste0(multivars[2], "._[[2]]_.")
420		}
421
422		if (
423	14x	!is.list(.stats) \|\|
424	14x	!all(c("col1", "col23", "coldiff") %in% names(.stats)) \|\|
425	14x	!is.list(.formats) \|\|
426	14x	!all(c("col1", "col23", "coldiff") %in% names(.formats)) \|\|
427	14x	!is.list(.formats_fun) \|\|
428	14x	!all(c("col1", "col23", "coldiff") %in% names(.formats_fun))
429		) {
430	!	stop(paste(
431	!	"Issue a_summarize_aval_chg_diff_j \n.stats/.formats/.formats_fun must",
432	!	"be a list with names c(\"col1\", \"col23\", \"coldiff\")."
433		))
434		}
435
436	14x	row_vars <- .spl_context$split
437	14x	row_val <- .spl_context$cur_col_split_val
438
439	14x	col_split <- .spl_context$cur_col_split
440
441		## treatment group value/variable KEY assumption : treatment variable is the last split_col prior to
442		## split_cols_by_multivar
443	14x	colvars <- col_split[[NROW(.spl_context)]]
444	14x	colvars_multivars <- which(colvars == "multivars")
445	14x	if (identical(colvars_multivars, integer(0))) {
446	!	stop("Layout must at least contain a split_cols_by_multivar call.")
447		}
448	14x	if (colvars_multivars == 1) {
449	!	stop("Layout must at least contain a split_cols_by prior to split_cols_by_multivar call.")
450		}
451		## KEY assumption : treatment variable is the last split_col prior to split_cols_by_multivar
452	14x	trt_var <- .spl_context$cur_col_split[[NROW(.spl_context)]][colvars_multivars - 1]
453	14x	trt_val <- .spl_context$cur_col_split_val[[NROW(.spl_context)]][colvars_multivars - 1]
454
455	14x	mysplitlevel <- length(.spl_context$split)
456	14x	cur_lvl <- .spl_context$value[[mysplitlevel]]
457	14x	cur_col_id <- .spl_context$cur_col_id[[mysplitlevel]]
458
459	14x	cur_param <- .spl_context$value[[mysplitlevel - 1]]
460
461	14x	indiffcol <- grepl("difference", tolower(cur_col_id), fixed = TRUE)
462
463		# Early return if we're not comparing between groups but we're in a difference column
464	14x	if (!comp_btw_group && indiffcol) {
465	!	return(rcell(NULL, format = NULL, label = cur_lvl, format_na_str = format_na_str))
466		}
467
468		## variable that is analyzed
469	14x	last_val <- utils::tail(.spl_context$cur_col_split_val[[NROW(.spl_context)]], 1)
470
471		# as AVAL will be utilized twice, the second call results in last_val == 'AVAL._[[2]]_.' take the first part of it
472		# in flast_val (will become AVAL) flast_val <- stringr::str_split_1(last_val, pattern = '._')[1]
473	14x	flast_val <- unlist(strsplit(last_val, "._"))[1]
474
475	14x	x_stats <- NULL
476	14x	fmt_d <- NULL
477
478	14x	.in_ref_col <- FALSE
479	14x	.ref_group <- NULL
480	14x	if (comp_btw_group) {
481	14x	trt_var_refspec <- utils::tail(ref_path, n = 2)[1]
482	14x	checkmate::assert_true(identical(trt_var, trt_var_refspec))
483		# ctrl_grp
484	14x	ctrl_grp <- utils::tail(ref_path, n = 1)
485
486		### check that ctrl_grp is a level of the treatment variable, in case riskdiff is requested
487	14x	if (!ctrl_grp %in% levels(df[[trt_var]])) {
488	!	stop(paste0(
489	!	"control group specification in ref_path argument (",
490	!	ctrl_grp,
491	!	") is not a level of your treatment group variable (",
492	!	trt_var,
493		")."
494		))
495		}
496
497	6x	if (trt_val == ctrl_grp) .in_ref_col <- TRUE
498
499	14x	.ref_group <- .df_row[.df_row[[trt_var]] == ctrl_grp, ]
500		}
501
502	14x	if (last_val == multivars[1] && !indiffcol) {
503	4x	mystat <- .stats[["col1"]]
504	4x	if (is.null(.formats[["col1"]])) {
505	4x	fmt <- .formats_fun[["col1"]]
506		} else {
507	!	fmt <- .formats[["col1"]]
508		}
509
510	4x	x_stats <- s_aval_chg_col1(
511	4x	df = df,
512	4x	.var = flast_val,
513	4x	denom = denom,
514	4x	.N_col = .N_col,
515	4x	id = id,
516	4x	indatavar = indatavar
517		)
518	10x	} else if (last_val %in% multivars[2:3]) {
519	10x	x_stats <- s_aval_chg_col23_diff(
520	10x	df = df,
521	10x	.var = flast_val,
522	10x	.df_row = .df_row,
523	10x	.ref_group = .ref_group,
524	10x	.in_ref_col = .in_ref_col,
525	10x	ancova = ancova,
526	10x	interaction_y = interaction_y,
527	10x	interaction_item = interaction_item,
528	10x	conf_level = conf_level,
529	10x	variables = variables,
530	10x	trt_var = trt_var,
531	10x	ctrl_grp = ctrl_grp,
532	10x	cur_param = cur_param,
533	10x	cur_lvl = cur_lvl
534		)
535
536	10x	if (comp_btw_group && indiffcol) {
537	2x	mystat1 <- "coldiff"
538		} else {
539	8x	mystat1 <- "col23"
540		}
541	10x	mystat <- .stats[[mystat1]]
542
543	10x	fmt_d <- .formats[[mystat1]]
544	10x	formatting_fun <- .formats_fun[[mystat1]]
545
546	10x	fmt <- format_xxd(fmt_d, d = d, .df_row = .df_row, formatting_fun = formatting_fun)
547		}
548	14x	x_stats <- x_stats[[mystat]]
549
550		##
551	14x	ret <- rcell(x_stats, format = fmt, label = cur_lvl, format_na_str = format_na_str)
552
553	14x	return(ret)
554		}

1		#' Helpers for Processing Least Square Means
2		#'
3		#' @param fit result of model fitting function, e.g. [mmrm::mmrm()] or [stats::lm()].
4		#' @inheritParams fit_mmrm_j
5		#' @param averages (`list`)\cr optional named list of visit levels which should be averaged
6		#' and reported along side the single visits.
7		#' @param weights (`string`)\cr argument from [emmeans::emmeans()], 'counterfactual' by default.
8		#' @param specs (`list`)\cr list of least square means specifications, with
9		#' elements `coefs` (coefficient list) and `grid` (corresponding `data.frame`).
10		#' @param emmeans_res (`list`)\cr initial `emmeans` result from [h_get_emmeans_res()].
11		#'
12		#' @name lsmeans_helpers
13		#' @keywords internal
14		NULL
15
16		#' @describeIn lsmeans_helpers returns a list with
17		#' `object` (`emmGrid` object containing `emmeans` results) and `grid`
18		#' (`data.frame` containing the potential arm and the visit variables
19		#' together with the sample size `n` for each combination).
20		h_get_emmeans_res <- function(fit, vars, weights) {
21	30x	data_complete <- stats::model.frame(fit)
22	30x	checkmate::assert_data_frame(data_complete)
23	30x	checkmate::assert_list(vars)
24
25	30x	emmeans_object <- emmeans::emmeans(fit, data = data_complete, specs = c(vars$visit, vars$arm), weights = weights)
26
27		# Save grid with renamed number of subjects column.
28	30x	visit_arm_grid <- emmeans_object@grid
29	30x	wgt_index <- match(".wgt.", names(visit_arm_grid))
30	30x	names(visit_arm_grid)[wgt_index] <- "n"
31	30x	visit_arm_grid$n <- as.integer(visit_arm_grid$n)
32
33	30x	list(object = emmeans_object, grid = visit_arm_grid)
34		}
35
36		#' @describeIn lsmeans_helpers constructs average of visits specifications.
37		h_get_average_visit_specs <- function(emmeans_res, vars, averages, fit) {
38	4x	visit_grid <- emmeans_res$grid[[vars$visit]]
39	4x	model_frame <- stats::model.frame(fit)
40	4x	averages_list <- list()
41	4x	visit_vec <- n_vec <- c()
42	4x	if (!is.null(vars$arm)) {
43	4x	arm_grid <- emmeans_res$grid[[vars$arm]]
44	4x	arm_vec <- c()
45		}
46	4x	for (i in seq_along(averages)) {
47	5x	average_label <- names(averages)[i]
48	5x	visits_average <- averages[[i]]
49	5x	checkmate::assert_subset(visits_average, choices = levels(visit_grid))
50	5x	which_visits_in_average <- visit_grid %in% visits_average
51	5x	average_coefs <- as.integer(which_visits_in_average) / length(visits_average)
52	5x	zero_coefs <- numeric(length = length(average_coefs))
53
54	5x	if (is.null(vars$arm)) {
55	!	averages_list[[average_label]] <- average_coefs
56	!	visit_vec <- c(visit_vec, average_label)
57	!	is_in_subset <- (model_frame[[vars$visit]] %in% visits_average)
58	!	this_n <- length(unique(model_frame[is_in_subset, vars$id]))
59	!	n_vec <- c(n_vec, this_n)
60		} else {
61	5x	for (this_arm in levels(arm_grid)) {
62	10x	this_coefs <- zero_coefs
63	10x	arm_average_label <- paste(this_arm, average_label, sep = ".")
64	10x	which_arm <- arm_grid == this_arm
65	10x	this_coefs[which_arm] <- average_coefs[which_arm]
66	10x	averages_list[[arm_average_label]] <- this_coefs
67	10x	arm_vec <- c(arm_vec, this_arm)
68	10x	visit_vec <- c(visit_vec, average_label)
69	10x	is_in_subset <- (model_frame[[vars$arm]] == this_arm) & (model_frame[[vars$visit]] %in% visits_average)
70	10x	this_n <- length(unique(model_frame[is_in_subset, vars$id]))
71	10x	n_vec <- c(n_vec, this_n)
72		}
73		}
74		}
75	4x	if (is.null(vars$arm)) {
76	!	averages_grid <- data.frame(visit = visit_vec, n = n_vec)
77	!	names(averages_grid) <- c(vars$visit, "n")
78		} else {
79	4x	averages_grid <- data.frame(arm = arm_vec, visit = visit_vec, n = n_vec)
80	4x	names(averages_grid) <- c(vars$arm, vars$visit, "n")
81		}
82	4x	list(coefs = averages_list, grid = averages_grid)
83		}
84
85		#' @describeIn lsmeans_helpers estimates least square means as a `data.frame`
86		#' given specifications.
87		#'
88		#' @note The difference here compared to the original tern.mmrm::h_get_spec_visit_estimates()
89		#' function is that additional arguments for [emmeans::contrast()] can be passed via the
90		#' Once this has been added to the `tern.mmrm` package then its functions can be used instead.
91		#'
92		#' @param tests (`flag`)\cr whether to add test results to the estimates.
93		#' @param ... additional arguments for [emmeans::contrast()].
94		h_get_spec_visit_estimates <- function(emmeans_res, specs, conf_level, tests = FALSE, ...) {
95	38x	checkmate::assert_list(emmeans_res)
96	38x	checkmate::assert_list(specs)
97	38x	checkmate::assert_number(conf_level)
98	38x	checkmate::assert_flag(tests)
99
100	38x	conts <- emmeans::contrast(emmeans_res$object, specs$coefs, ...)
101	38x	cis <- stats::confint(conts, level = conf_level)
102	38x	res <- cbind(
103	38x	specs$grid,
104	38x	data.frame(estimate = cis$estimate, se = cis$SE, df = cis$df, lower_cl = cis$lower.CL, upper_cl = cis$upper.CL)
105		)
106	38x	if (tests) {
107	34x	conts_df <- as.data.frame(conts)
108	34x	res$t_stat <- conts_df$t.ratio
109	34x	res$p_value <- conts_df$p.value
110	34x	res$p_value_less <- stats::pt(conts_df$t.ratio, df = conts_df$df, lower.tail = TRUE)
111	34x	res$p_value_greater <- stats::pt(conts_df$t.ratio, df = conts_df$df, lower.tail = FALSE)
112		}
113	38x	res
114		}
115
116		#' @describeIn lsmeans_helpers estimates least square means for single visits.
117		h_get_single_visit_estimates <- function(emmeans_res, conf_level) {
118	30x	checkmate::assert_list(emmeans_res)
119	30x	checkmate::assert_number(conf_level)
120
121	30x	cis <- stats::confint(emmeans_res$object, level = conf_level)
122	30x	cbind(
123	30x	emmeans_res$grid[, setdiff(names(emmeans_res$grid), "n"), drop = FALSE],
124	30x	data.frame(estimate = cis$emmean, se = cis$SE, df = cis$df, lower_cl = cis$lower.CL, upper_cl = cis$upper.CL),
125	30x	emmeans_res$grid[, "n", drop = FALSE]
126		)
127		}
128
129		#' @describeIn lsmeans_helpers constructs `data.frame` with
130		#' relative reduction vs. reference arm based on single visit estimates.
131		#' @param estimates (`data.frame`)\cr single visit least square mean estimates.
132		h_get_relative_reduc_df <- function(estimates, vars) {
133	30x	checkmate::assert_data_frame(estimates)
134	30x	checkmate::assert_list(vars)
135
136	30x	ref_arm_level <- estimates[[vars$arm]][1L]
137	30x	ref_estimates <- estimates[estimates[[vars$arm]] == ref_arm_level, c(vars$visit, "estimate")]
138	30x	names(ref_estimates)[2L] <- "ref"
139	30x	result <- merge(estimates[estimates[[vars$arm]] != ref_arm_level, ], ref_estimates, by = vars$visit, sort = FALSE)
140	30x	result$relative_reduc <- (result$ref - result$estimate) / result$ref
141	30x	result[, c(vars$visit, vars$arm, "relative_reduc")]
142		}
143
144		#' @describeIn lsmeans_helpers constructs single visit contrast specifications.
145		h_single_visit_contrast_specs <- function(emmeans_res, vars) {
146	10x	checkmate::assert_list(emmeans_res)
147	10x	checkmate::assert_list(vars)
148
149	10x	emmeans_res$grid$index <- seq_len(nrow(emmeans_res$grid))
150
151	10x	grid_by_visit <- split(emmeans_res$grid, emmeans_res$grid[[vars$visit]])
152
153	10x	arm_levels <- emmeans_res$object@levels[[vars$arm]]
154	10x	ref_arm_level <- arm_levels[1L]
155	10x	zeros_coefs <- numeric(nrow(emmeans_res$grid))
156	10x	overall_list <- list()
157	10x	arm_vec <- visit_vec <- c()
158	10x	for (j in seq_along(grid_by_visit)) {
159	40x	this_grid <- grid_by_visit[[j]]
160	40x	ref_index <- which(this_grid[[vars$arm]] == ref_arm_level)
161	40x	this_visit <- names(grid_by_visit)[j]
162	40x	this_ref_coefs <- zeros_coefs
163	40x	this_ref_coefs[this_grid$index[ref_index]] <- -1
164	40x	this_list <- list()
165	40x	for (i in seq_len(nrow(this_grid))[-ref_index]) {
166	64x	this_coefs <- this_ref_coefs
167	64x	this_coefs[this_grid$index[i]] <- 1
168	64x	this_arm <- as.character(this_grid[[vars$arm]][i])
169	64x	arm_vec <- c(arm_vec, this_arm)
170	64x	visit_vec <- c(visit_vec, this_visit)
171	64x	this_label <- paste(this_arm, this_visit, sep = ".")
172	64x	this_list[[this_label]] <- this_coefs
173		}
174	40x	overall_list <- c(overall_list, this_list)
175		}
176
177	10x	grid <- data.frame(arm = arm_vec, visit = visit_vec)
178	10x	names(grid) <- c(vars$arm, vars$visit)
179	10x	list(coefs = overall_list, grid = grid)
180		}
181
182		#' @describeIn lsmeans_helpers constructs average visits contrast specifications,
183		#' given the `specs` for single visit contrasts and the averages required.
184		h_average_visit_contrast_specs <- function(specs, averages) {
185	4x	arm_visit_grid <- specs$grid
186	4x	arm_visit_grid$index <- seq_len(nrow(arm_visit_grid))
187	4x	grid_by_arm <- split(arm_visit_grid, arm_visit_grid[, 1L])
188	4x	overall_list <- list()
189	4x	arm_vec <- visit_vec <- c()
190	4x	for (j in seq_along(grid_by_arm)) {
191	4x	this_arm <- names(grid_by_arm)[j]
192	4x	this_grid <- grid_by_arm[[j]]
193	4x	for (i in seq_along(averages)) {
194	5x	average_label <- names(averages)[i]
195	5x	visits_average <- averages[[i]]
196	5x	which_visits_in_average <- this_grid[, 2L] %in% visits_average
197	5x	averaged_indices <- this_grid$index[which_visits_in_average]
198	5x	this_comb <- paste(this_arm, average_label, sep = ".")
199	5x	averaged_coefs <- colMeans(do.call(rbind, specs$coefs[averaged_indices]))
200	5x	overall_list[[this_comb]] <- averaged_coefs
201	5x	arm_vec <- c(arm_vec, this_arm)
202	5x	visit_vec <- c(visit_vec, average_label)
203		}
204		}
205
206	4x	grid <- data.frame(arm = arm_vec, visit = visit_vec)
207	4x	names(grid) <- names(arm_visit_grid[, c(1, 2)])
208
209	4x	list(coefs = overall_list, grid = grid)
210		}

1		#' Workaround statistics function to add HR with CI
2		#'
3		#' This is a workaround for [tern::s_coxph_pairwise()], which adds a statistic
4		#' containing the hazard ratio estimate together with the confidence interval.
5		#'
6		#' @inheritParams proposal_argument_convention
7		#'
8		#'
9		#' @name coxph_hr
10		#' @return for `s_coxph_hr` a list containing the same statistics returned by [tern::s_coxph_pairwise]
11		#' and the additional `lr_stat_df` statistic. for `a_coxph_hr`, a `VerticalRowsSection`
12		#' object.
13		#' @order 1
14		NULL
15
16		#' @describeIn coxph_hr Statistics function forked from [tern::s_coxph_pairwise()].
17		#' the difference is that:
18		#' 1) It returns the additional statistic `lr_stat_df` (log rank statistic with degrees of freedom).
19		#' @export
20		#' @order 3
21		#'
22		#' @importFrom survival Surv coxph survdiff
23		#'
24		#' @examples
25		#' adtte_f <- tern::tern_ex_adtte \|>
26		#' dplyr::filter(PARAMCD == "OS") \|>
27		#' dplyr::mutate(is_event = CNSR == 0)
28		#' df <- adtte_f \|> dplyr::filter(ARMCD == "ARM A")
29		#' df_ref <- adtte_f \|> dplyr::filter(ARMCD == "ARM B")
30		#'
31		#' s_coxph_hr(
32		#' df = df,
33		#' .ref_group = df_ref,
34		#' .in_ref_col = FALSE,
35		#' .var = "AVAL",
36		#' is_event = "is_event",
37		#' strata = NULL
38		#' )
39		s_coxph_hr <- function(
40		df,
41		.ref_group,
42		.in_ref_col,
43		.var,
44		is_event,
45		strata = NULL,
46		control = control_coxph(),
47		alternative = c("two.sided", "less", "greater")) {
48	9x	checkmate::assert_string(.var)
49	9x	checkmate::assert_numeric(df[[.var]])
50	9x	checkmate::assert_logical(df[[is_event]])
51	9x	assert_df_with_variables(df, list(tte = .var, is_event = is_event))
52	9x	alternative <- match.arg(alternative)
53	9x	pval_method <- control$pval_method
54	9x	ties <- control$ties
55	9x	conf_level <- control$conf_level
56
57	9x	if (.in_ref_col) {
58	2x	return(list(
59	2x	pvalue = with_label(list(), paste0("p-value (", pval_method, ")")),
60	2x	lr_stat_df = list(),
61	2x	hr = list(),
62	2x	hr_ci = with_label(list(), f_conf_level(conf_level)),
63	2x	hr_ci_3d = with_label(list(), paste0("Hazard Ratio (", f_conf_level(conf_level), ")")),
64	2x	n_tot = list(),
65	2x	n_tot_events = list()
66		))
67		}
68	7x	data <- rbind(.ref_group, df)
69	7x	group <- factor(rep(c("ref", "x"), c(nrow(.ref_group), nrow(df))), levels = c("ref", "x"))
70
71	7x	df_cox <- data.frame(tte = data[[.var]], is_event = data[[is_event]], arm = group)
72	7x	if (is.null(strata)) {
73	3x	formula_cox <- survival::Surv(tte, is_event) ~ arm
74		} else {
75	4x	formula_cox <- stats::as.formula(paste0(
76	4x	"survival::Surv(tte, is_event) ~ arm + strata(",
77	4x	paste(strata, collapse = ","),
78		")"
79		))
80	4x	df_cox <- cbind(df_cox, data[strata])
81		}
82	7x	cox_fit <- survival::coxph(formula = formula_cox, data = df_cox, ties = ties)
83	7x	sum_cox <- summary(cox_fit, conf.int = conf_level, extend = TRUE)
84	7x	original_survdiff <- survival::survdiff(formula_cox, data = df_cox)
85	7x	log_rank_stat <- original_survdiff$chisq
86
87		# See survival::survdiff for the d.f. calculation.
88	7x	etmp <- if (is.matrix(original_survdiff$exp)) {
89	4x	apply(original_survdiff$exp, 1, sum)
90		} else {
91	3x	original_survdiff$exp
92		}
93	7x	log_rank_df <- (sum(1 * (etmp > 0))) - 1
94		# Check the consistency of the d.f. with the p-value returned by survival::survdiff.
95	7x	log_rank_pvalue <- stats::pchisq(log_rank_stat, log_rank_df, lower.tail = FALSE)
96	7x	checkmate::assert_true(all.equal(log_rank_pvalue, original_survdiff$pvalue))
97
98	7x	pval <- switch(pval_method,
99	7x	wald = sum_cox$waldtest["pvalue"],
100	7x	`log-rank` = log_rank_pvalue,
101	7x	likelihood = sum_cox$logtest["pvalue"]
102		)
103
104		# Handle one-sided alternatives.
105	7x	if (alternative != "two.sided") {
106	2x	right_direction <- if (alternative == "less") {
107	1x	sum_cox$conf.int[1, 1] < 1
108		} else {
109	1x	sum_cox$conf.int[1, 1] >= 1
110		}
111	2x	pval <- if (right_direction) {
112	1x	pval / 2
113		} else {
114	1x	1 - pval / 2
115		}
116		}
117
118	7x	list(
119	7x	pvalue = with_label(unname(pval), paste0("p-value (", pval_method, ")")),
120	7x	lr_stat_df = unname(c(log_rank_stat, log_rank_df)),
121	7x	hr = sum_cox$conf.int[1, 1],
122	7x	hr_ci = with_label(unname(sum_cox$conf.int[1, 3:4]), f_conf_level(conf_level)),
123	7x	hr_ci_3d = with_label(
124	7x	c(sum_cox$conf.int[1, 1], unname(sum_cox$conf.int[1, 3:4])),
125	7x	paste0("Hazard Ratio (", f_conf_level(conf_level), ")")
126		),
127	7x	n_tot = sum_cox$n,
128	7x	n_tot_events = sum_cox$nevent
129		)
130		}
131
132		#' @describeIn coxph_hr Formatted analysis function which is used as `afun`.
133		#'
134		#' @examples
135		#' library(dplyr)
136		#'
137		#' adtte_f <- tern::tern_ex_adtte \|>
138		#' filter(PARAMCD == "OS") \|>
139		#' mutate(is_event = CNSR == 0)
140		#'
141		#' df <- adtte_f \|> filter(ARMCD == "ARM A")
142		#' df_ref_group <- adtte_f \|> filter(ARMCD == "ARM B")
143		#'
144		#' basic_table() \|>
145		#' split_cols_by(var = "ARMCD", ref_group = "ARM A") \|>
146		#' add_colcounts() \|>
147		#' analyze("AVAL",
148		#' afun = s_coxph_hr,
149		#' extra_args = list(is_event = "is_event"),
150		#' var_labels = "Unstratified Analysis",
151		#' show_labels = "visible"
152		#' ) \|>
153		#' build_table(df = adtte_f)
154		#'
155		#' basic_table() \|>
156		#' split_cols_by(var = "ARMCD", ref_group = "ARM A") \|>
157		#' add_colcounts() \|>
158		#' analyze("AVAL",
159		#' afun = s_coxph_hr,
160		#' extra_args = list(
161		#' is_event = "is_event",
162		#' strata = "SEX",
163		#' control = tern::control_coxph(pval_method = "wald")
164		#' ),
165		#' var_labels = "Unstratified Analysis",
166		#' show_labels = "visible"
167		#' ) \|>
168		#' build_table(df = adtte_f)
169		#' @export
170		#' @order 2
171		a_coxph_hr <- function(
172		df,
173		.var,
174		ref_path,
175		.spl_context,
176		...,
177		.stats = NULL,
178		.formats = NULL,
179		.labels = NULL,
180		.indent_mods = NULL) {
181		# Check for additional parameters to the statistics function
182	6x	dots_extra_args <- list(...)
183
184		# Only support default stats, not custom stats
185	6x	.stats <- .split_std_from_custom_stats(.stats)$default_stats
186
187		# Obtain reference column information
188	6x	ref <- get_ref_info(ref_path, .spl_context)
189
190		# Apply statistics function
191	6x	x_stats <- .apply_stat_functions(
192	6x	default_stat_fnc = s_coxph_hr,
193	6x	custom_stat_fnc_list = NULL,
194	6x	args_list = c(
195	6x	df = list(df),
196	6x	.var = .var,
197	6x	.ref_group = list(ref$ref_group),
198	6x	.in_ref_col = ref$in_ref_col,
199	6x	dots_extra_args
200		)
201		)
202
203		# Format according to specifications
204	6x	format_stats(
205	6x	x_stats,
206	6x	method_groups = "coxph_hr",
207	6x	stats_in = .stats,
208	6x	formats_in = .formats,
209	6x	labels_in = .labels,
210	6x	indents_in = .indent_mods
211		)
212		}

1		#' Tabulation of RBMI Results
2		#'
3		#' @description `r lifecycle::badge('stable')`
4		#'
5		#' These functions can be used to produce tables from RBMI.
6		#'
7		#' @name tabulate_rbmi
8		#'
9		#' @note These functions have been forked from `tern.rbmi`. Additional features are:
10		#'
11		#' * Additional `ref_path` argument.
12		#' * Extraction of variance statistics in the `tidy()` method.
13		#' * Adapted to `rbmi` forked functions update with more than two treatment groups.
14		NULL
15
16		#' @describeIn tabulate_rbmi Helper function to produce data frame with results
17		#' of pool for a single visit.
18		#'
19		#' @param x (`list`)\cr is a list of pooled object from `rbmi` analysis results.
20		#' This list includes analysis results, confidence level, hypothesis testing type.
21		#' @param visit_name (`string`)\cr single visit level.
22		#' @param group_names (`character`)\cr group levels.
23		#' @return The `data.frame` with results of pooled analysis for a single visit.
24		#'
25		#' @export
26		h_tidy_pool <- function(x, visit_name, group_names) {
27	13x	checkmate::assert_list(x)
28	13x	checkmate::assert_string(visit_name)
29	13x	checkmate::assert_character(group_names)
30
31	13x	ref_name <- paste0("lsm_", group_names[1], "_", visit_name)
32	13x	ref <- x[[ref_name]]
33
34	13x	list2df <- \(l) with(l, data.frame(est, ci_l = ci[1], ci_u = ci[2], se, pvalue, df))
35
36	13x	var_name <- paste0("var_", visit_name)
37	13x	var <- if (var_name %in% names(x)) {
38		# This is the case for ANCOVA.
39	11x	list2df(x[[var_name]])
40		} else {
41		# This is the case for MMRM.
42	2x	var_names <- paste0("var_", group_names[-1], "_", visit_name)
43	2x	checkmate::assert_subset(var_names, names(x))
44	2x	var_dfs <- lapply(x[var_names], list2df)
45	2x	var <- do.call(rbind, var_dfs)
46		}
47
48	13x	contr_names <- paste0("trt_", group_names[-1], "_", visit_name)
49	13x	contr_dfs <- lapply(x[contr_names], list2df)
50	13x	contr <- do.call(rbind, contr_dfs)
51
52	13x	alt_names <- paste0("lsm_", group_names[-1], "_", visit_name)
53	13x	alt_dfs <- lapply(x[alt_names], list2df)
54	13x	alt <- do.call(rbind, alt_dfs)
55
56	13x	df_ref <- data.frame(
57	13x	visit = visit_name,
58	13x	group = group_names[1],
59	13x	est = ref$est,
60	13x	se_est = ref$se,
61	13x	lower_cl_est = ref$ci[1],
62	13x	upper_cl_est = ref$ci[2],
63	13x	est_contr = NA_real_,
64	13x	se_contr = NA_real_,
65	13x	lower_cl_contr = NA_real_,
66	13x	upper_cl_contr = NA_real_,
67	13x	p_value = NA_real_,
68	13x	relative_reduc = NA_real_,
69	13x	mse = NA_real_,
70	13x	df = NA_real_,
71	13x	stringsAsFactors = FALSE
72		)
73	13x	df_alt <- data.frame(
74	13x	visit = visit_name,
75	13x	group = group_names[-1],
76	13x	est = alt$est,
77	13x	se_est = alt$se,
78	13x	lower_cl_est = alt$ci_l,
79	13x	upper_cl_est = alt$ci_u,
80	13x	est_contr = contr$est,
81	13x	se_contr = contr$se,
82	13x	lower_cl_contr = contr$ci_l,
83	13x	upper_cl_contr = contr$ci_u,
84	13x	p_value = contr$pvalue,
85	13x	relative_reduc = contr$est / df_ref$est,
86	13x	mse = var$est,
87	13x	df = var$df,
88	13x	stringsAsFactors = FALSE
89		)
90	13x	rbind(df_ref, df_alt)
91		}
92
93		#' Helper method (for [`broom::tidy()`]) to prepare a data frame from an
94		#' `pool` `rbmi` object containing the LS means and contrasts and multiple visits
95		#'
96		#' @method tidy pool
97		#' @param x (`pool`) is a list of pooled object from `rbmi` analysis results. This list includes
98		#' analysis results, confidence level, hypothesis testing type.
99		#' @param visits (`character`)\cr all visit levels. Otherwise too hard to guess this.
100		#' @param ... Additional arguments. Not used. Needed to match generic signature only.
101		#' @importFrom generics tidy
102		#' @export
103		#' @keywords internal
104		#' @return A `data.frame`.
105		tidy.pool <- function(x, visits, ...) {
106	6x	ls_raw <- x$pars
107
108	6x	has_lsm <- grepl("^lsm_", names(ls_raw))
109	6x	has_first_visit <- grepl(paste0("_", visits[1]), names(ls_raw), fixed = TRUE)
110	6x	is_lsm_first_visit <- has_lsm & has_first_visit
111
112	6x	group_names <- names(ls_raw)[is_lsm_first_visit]
113	6x	group_names <- gsub(pattern = "^lsm_", replacement = "", x = group_names)
114	6x	group_names <- gsub(pattern = paste0("_", visits[1]), replacement = "", x = group_names, fixed = TRUE)
115
116	6x	spl <- rep(visits, each = length(ls_raw) / length(visits))
117
118	6x	ls_split <- split(ls_raw, spl)
119
120	6x	ls_df <- mapply(
121	6x	FUN = h_tidy_pool,
122	6x	x = ls_split,
123	6x	visit_name = visits,
124	6x	MoreArgs = list(group_names = group_names),
125	6x	SIMPLIFY = FALSE
126		)
127
128	6x	result <- do.call(rbind, unname(ls_df))
129
130	6x	result$visit <- factor(result$visit, levels = visits)
131	6x	result$group <- factor(result$group, levels = group_names)
132	6x	result$conf_level <- x$conf.level
133
134	6x	result
135		}
136
137		#' @describeIn tabulate_rbmi Statistics function which is extracting estimates
138		#' from a tidied RBMI results data frame.
139		#'
140		#' @param df (`data.frame`)\cr input with LS means results.
141		#' @param .in_ref_col (`flag`)\cr whether reference column is specified.
142		#' @param show_relative (`string`)\cr 'reduction' if (`control - treatment`, default)
143		#' or 'increase' (`treatment - control`) of relative change from baseline?
144		#' @return A list of statistics extracted from a tidied LS means data frame.
145		#' @export
146		s_rbmi_lsmeans <- function(df, .in_ref_col, show_relative = c("reduction", "increase")) {
147	7x	checkmate::assert_flag(.in_ref_col)
148
149	7x	show_relative <- match.arg(show_relative)
150	7x	if_not_ref <- function(x) if (.in_ref_col) character() else x
151	7x	list(
152	7x	adj_mean_se = c(df$est, df$se_est),
153	7x	adj_mean_ci = with_label(c(df$lower_cl_est, df$upper_cl_est), f_conf_level(df$conf_level)),
154	7x	diff_mean_se = if_not_ref(c(df$est_contr, df$se_contr)),
155	7x	diff_mean_ci = with_label(
156	7x	if_not_ref(c(df$lower_cl_contr, df$upper_cl_contr)),
157	7x	f_conf_level(df$conf_level)
158		),
159	7x	change = switch(show_relative,
160	7x	reduction = with_label(if_not_ref(df$relative_reduc), "Relative Reduction (%)"),
161	7x	increase = with_label(if_not_ref(-df$relative_reduc), "Relative Increase (%)")
162		),
163	7x	p_value = if_not_ref(df$p_value),
164	7x	additional_title_row = NULL
165		)
166		}
167
168		#' @describeIn tabulate_rbmi Formatted Analysis function which is used as `afun`.
169		#'
170		#' @inheritParams proposal_argument_convention
171		#' @export
172		a_rbmi_lsmeans <- function(
173		df,
174		ref_path,
175		.spl_context,
176		...,
177		.stats = NULL,
178		.formats = NULL,
179		.labels = NULL,
180		.indent_mods = NULL) {
181		# Check for additional parameters to the statistics function
182	4x	dots_extra_args <- list(...)
183
184		# Only support default stats, not custom stats
185	4x	.stats <- .split_std_from_custom_stats(.stats)$default_stats
186
187		# Obtain reference column information
188	4x	ref <- get_ref_info(ref_path, .spl_context)
189
190		# Apply statistics function
191	4x	x_stats <- .apply_stat_functions(
192	4x	default_stat_fnc = s_rbmi_lsmeans,
193	4x	custom_stat_fnc_list = NULL,
194	4x	args_list = c(df = list(df), .in_ref_col = ref$in_ref_col, dots_extra_args)
195		)
196
197		# Format according to specifications
198	4x	format_stats(
199	4x	x_stats,
200	4x	method_groups = "tabulate_rbmi",
201	4x	stats_in = .stats,
202	4x	formats_in = .formats,
203	4x	labels_in = .labels,
204	4x	indents_in = .indent_mods
205		)
206		}

1		#' Split Function Factory for the Response Tables (RESP01)
2		#'
3		#' The main purpose here is to have a column dependent split into either comparative
4		#' statistic (relative risk or odds ratio with p-value) in the 'Overall' column,
5		#' and count proportions and corresponding confidence intervals in the other treatment
6		#' arm columns.
7		#'
8		#' @inheritParams proposal_argument_convention
9		#' @param method (`string`)\cr which method to use for the comparative statistics.
10		#'
11		#' @return A split function for use in the response table RESP01 and similar ones.
12		#' @seealso [rtables::make_split_fun()] describing the requirements for this kind of
13		#' post-processing function.
14		#' @export
15		#'
16		#' @examples
17		#' split_fun <- resp01_split_fun_fct(
18		#' method = "or_cmh",
19		#' conf_level = 0.95
20		#' )
21		resp01_split_fun_fct <- function(method = c("rr", "or_logistic", "or_cmh"), conf_level) {
22	2x	method <- match.arg(method)
23	2x	(assert_proportion_value)(conf_level)
24
25	2x	post_fun <- function(ret, spl, fulldf, .spl_context) {
26	8x	all_expr <- expression(TRUE)
27		# Here check if we are in 'Overall' column or not.
28	8x	this_col <- .spl_context[nrow(.spl_context), "value"][[1]]
29	8x	in_overall <- this_col == "Overall"
30		# Accordingly, return left and right column labels and value identifiers.
31	8x	if (in_overall) {
32		# In the Overall column, we want the comparison statistic and corresponding p-value.
33	2x	comp_stat_name <- if (method == "rr") "Relative Risk" else "Odds Ratio"
34	2x	short_split_result(
35	2x	comp_stat_ci = paste0(comp_stat_name, " (", f_conf_level(conf_level), ")~[super a]"),
36	2x	pval = "p-value~[super b]",
37	2x	fulldf = fulldf
38		)
39		} else {
40		# In the treatment arm columns, we want counts and CI for proportions.
41	6x	short_split_result(
42	6x	count_prop = "n (%)",
43	6x	prop_ci = paste0(f_conf_level(conf_level), " for %"),
44	6x	fulldf = fulldf
45		)
46		}
47		}
48	2x	make_split_fun(post = list(post_fun))
49		}
50
51		#' Content Row Function for Counts of Subgroups in Response Tables (RESP01)
52		#'
53		#' @inheritParams proposal_argument_convention
54		#' @param .alt_df (`data.frame`)\cr alternative data frame used for denominator calculation.
55		#' @param label_fstr (`string`)\cr format string for the label.
56		#'
57		#' @return The correct [rtables::in_rows()] result.
58		#' @export
59		#'
60		#' @examples
61		#' fake_spl_context <- data.frame(
62		#' cur_col_split_val = I(list(c(ARM = "A: Drug X", count_prop = "count_prop")))
63		#' )
64		#' resp01_counts_cfun(
65		#' df = DM,
66		#' labelstr = "Blue",
67		#' .spl_context = fake_spl_context,
68		#' .alt_df = DM,
69		#' label_fstr = "Color: %s"
70		#' )
71		resp01_counts_cfun <- function(df, labelstr, .spl_context, .alt_df, label_fstr) {
72	3x	this_col_split <- .spl_context[nrow(.spl_context), "cur_col_split_val"][[1]]
73	3x	if (this_col_split[1] != "Overall" && this_col_split[2] == "count_prop") {
74	1x	in_rows(nrow(.alt_df), .formats = "xx", .labels = sprintf(label_fstr, labelstr))
75		} else {
76	2x	NULL
77		}
78		}
79
80		#' Formatted Analysis Function for Comparative Statistic in Response Tables (RESP01)
81		#'
82		#' This function applies to a `logical` column called `.var` from `df`.
83		#' The response proportion is compared between the treatment arms identified
84		#' by column `arm`.
85		#'
86		#' @inheritParams proposal_argument_convention
87		#' @param include (`flag`)\cr whether to include the results for this variable.
88		#' @param arm (`string`)\cr column name in the data frame that identifies the treatment arms.
89		#' @param formats (`list`)\cr containing formats for `comp_stat_ci` and `pval`.
90		#' @param methods (`list`)\cr containing methods for comparative statistics. The element `comp_stat_ci` can be
91		#' 'rr' (relative risk), 'or_cmh' (odds ratio with CMH estimation and p-value) or 'or_logistic' (odds ratio
92		#' estimated by conditional or standard logistic regression). The element `pval` can be 'fisher' (Fisher's
93		#' exact test) or 'chisq' (chi-square test), only used when using unstratified analyses with 'or_logistic'.
94		#' @param stat (`string`)\cr the statistic to return, either `comp_stat_ci`
95		#' or `pval`.
96		#'
97		#' @return The formatted result as [rtables::rcell()].
98		#' @seealso [resp01_a_comp_stat_factor()] for the `factor` equivalent.
99		#' @export
100		#'
101		#' @examples
102		#' dm <- droplevels(subset(formatters::DM, SEX %in% c("F", "M")))
103		#' dm$RESP <- as.logical(sample(c(TRUE, FALSE), size = nrow(DM), replace = TRUE))
104		#'
105		#' resp01_a_comp_stat_logical(
106		#' dm,
107		#' .var = "RESP",
108		#' conf_level = 0.9,
109		#' include = TRUE,
110		#' arm = "SEX",
111		#' strata = "RACE",
112		#' stat = "comp_stat_ci",
113		#' method = list(comp_stat_ci = "or_cmh"),
114		#' formats = list(
115		#' comp_stat_ci = jjcsformat_xx("xx.xx (xx.xx - xx.xx)"),
116		#' pval = jjcsformat_pval_fct(0.05)
117		#' )
118		#' )
119		resp01_a_comp_stat_logical <- function(df,
120		.var,
121		conf_level,
122		include,
123		arm,
124		strata,
125		formats,
126		methods,
127		stat = c("comp_stat_ci", "pval")) {
128	30x	checkmate::assert_logical(df[[.var]])
129	30x	checkmate::assert_factor(df[[arm]], n.levels = 2L)
130	30x	checkmate::assert_flag(include)
131	30x	stat <- match.arg(stat)
132
133	30x	if (include) {
134	9x	checkmate::assert_list(methods)
135	9x	checkmate::assert_subset(methods$comp_stat_ci, c("rr", "or_cmh", "or_logistic"))
136
137	9x	df_by <- split(df, f = df[[arm]])
138	9x	this_df <- df_by[[1]]
139	9x	this_ref_group <- df_by[[2]]
140
141	9x	this_res <- if (methods$comp_stat_ci == "rr") {
142		# Stratified CMH test for point estimate and p-value, and Wald statistic for the confidence interval
143	!	s_res <- s_relative_risk(
144	!	this_df,
145	!	.var = .var,
146	!	.ref_group = this_ref_group,
147	!	.in_ref_col = FALSE,
148	!	variables = list(strata = strata),
149	!	conf_level = conf_level
150		)
151	!	list(comp_stat_ci = s_res$rel_risk_ci, pval = s_res$pval)
152	9x	} else if (methods$comp_stat_ci == "or_logistic") {
153		# Odds Ratio Variant 1: (Conditional/Standard) Logistic regression for odds ratio point estimate and
154		# confidence interval.
155	3x	s_res <- s_odds_ratio_j(
156	3x	this_df,
157	3x	.var = .var,
158	3x	.ref_group = this_ref_group,
159	3x	.in_ref_col = FALSE,
160	3x	.df_row = rbind(this_df, this_ref_group),
161	3x	variables = list(arm = arm, strata = strata),
162	3x	conf_level = conf_level,
163	3x	method = "exact" # Only used if strata are present.
164		)
165	3x	pval <- if (is.null(strata)) {
166	3x	checkmate::assert_subset(methods$pval, c("fisher", "chisq"))
167
168		# If not stratified, then the p-value can come either from Fisher's exact or chi-square test.
169	3x	p_res <- s_test_proportion_diff(
170	3x	this_df,
171	3x	.var = .var,
172	3x	.ref_group = this_ref_group,
173	3x	.in_ref_col = FALSE,
174	3x	method = methods$pval
175		)
176	3x	p_res$pval
177		} else {
178	!	s_res$pval
179		}
180	3x	list(comp_stat_ci = s_res$or_ci, pval = pval)
181	9x	} else if (methods$comp_stat_ci == "or_cmh") {
182		# Odds Ratio Variant 2: Stratified CMH test for odds ratio point estimate and confidence interval and
183		# corresponding CMH test p-value.
184	6x	s_res <- s_odds_ratio_j(
185	6x	this_df,
186	6x	.var = .var,
187	6x	.ref_group = this_ref_group,
188	6x	.in_ref_col = FALSE,
189	6x	.df_row = rbind(this_df, this_ref_group),
190	6x	variables = list(arm = arm, strata = strata),
191	6x	conf_level = conf_level,
192	6x	method = "cmh"
193		)
194	6x	list(comp_stat_ci = s_res$or_ci, pval = s_res$pval)
195		}
196	9x	rcell(this_res[[stat]], format = formats[[stat]])
197		} else {
198	21x	NULL
199		}
200		}
201
202		#' Formatted Analysis Function for Comparative Statistic in Response Tables (RESP01)
203		#'
204		#' This function applies to a `factor` column called `.var` from `df`.
205		#'
206		#' @inheritParams proposal_argument_convention
207		#' @param include (`character`)\cr for which factor levels to include the comparison
208		#' statistic results.
209		#' @param \dots see [resp01_a_comp_stat_logical()] for additional required arguments.
210		#'
211		#' @return The formatted result as [rtables::rcell()].
212		#' @export
213		#'
214		#' @examples
215		#' dm <- droplevels(subset(formatters::DM, SEX %in% c("F", "M")))
216		#'
217		#' resp01_a_comp_stat_factor(
218		#' dm,
219		#' .var = "COUNTRY",
220		#' conf_level = 0.9,
221		#' include = c("USA", "CHN"),
222		#' arm = "SEX",
223		#' strata = "RACE",
224		#' stat = "comp_stat_ci",
225		#' method = list(comp_stat_ci = "or_cmh"),
226		#' formats = list(
227		#' comp_stat_ci = jjcsformat_xx("xx.xx (xx.xx - xx.xx)"),
228		#' pval = jjcsformat_pval_fct(0.05)
229		#' )
230		#' )
231		resp01_a_comp_stat_factor <- function(df,
232		.var,
233		include,
234		...) {
235	3x	checkmate::assert_factor(df[[.var]])
236	3x	checkmate::assert_character(include, null.ok = TRUE)
237	3x	checkmate::assert_subset(x = include, choices = levels(df[[.var]]))
238
239	3x	design_df <- as.data.frame(h_get_design_mat(df, .var))
240
241	3x	res <- sapply(X = levels(df[[.var]]), simplify = FALSE, FUN = function(lvl) {
242	27x	df[[.var]] <- design_df[[lvl]]
243	27x	resp01_a_comp_stat_logical(df = df, .var = .var, include = lvl %in% include, ...)
244		})
245	3x	in_rows(.list = res, .labels = levels(df[[.var]]))
246		}
247
248		#' Formatted Analysis and Content Summary Function for Response Tables (RESP01)
249		#'
250		#' This function applies to both `factor` and `logical` columns called
251		#' `.var` from `df`. Depending on the position in the split, it returns the
252		#' right formatted results for the RESP01 and related layouts.
253		#'
254		#' @inheritParams proposal_argument_convention
255		#' @inheritParams resp01_a_comp_stat_logical
256		#' @param include_comp (`character` or `flag`)\cr whether to include comparative
257		#' statistic results, either `character` for factors or `flag` for logicals.
258		#' @param .alt_df (`data.frame`)\cr alternative data frame used for denominator calculation.
259		#' @param arm (`string`)\cr column name in the data frame that identifies the treatment arms.
260		#' @param label (`string`)\cr only for logicals, which label to use. (For factors, the
261		#' labels are the factor levels.)
262		#' @param formats (`list`)\cr containing formats for `prop_ci`, `comp_stat_ci`
263		#' and `pval`.
264		#' @param methods (`list`)\cr containing methods for comparative statistics. The element `comp_stat_ci` can be
265		#' 'rr' (relative risk), 'or_cmh' (odds ratio with CMH estimation and p-value) or 'or_logistic' (odds ratio
266		#' estimated by conditional or standard logistic regression). The element `pval` can be 'fisher' (Fisher's
267		#' exact test) or 'chisq' (chi-square test), only used when using unstratified analyses with 'or_logistic'.
268		#' The element `prop_ci` specifies the method for proportion confidence interval calculation.
269		#'
270		#' @return The formatted result as [rtables::in_rows()] result.
271		#' @export
272		#'
273		#' @examples
274		#' fake_spl_context <- data.frame(
275		#' cur_col_split_val = I(list(c(ARM = "A: Drug X", count_prop = "count_prop")))
276		#' )
277		#' dm <- droplevels(subset(DM, SEX %in% c("F", "M")))
278		#' resp01_acfun(
279		#' dm,
280		#' .alt_df = dm,
281		#' .var = "COUNTRY",
282		#' .spl_context = fake_spl_context,
283		#' conf_level = 0.9,
284		#' include_comp = c("USA", "CHN"),
285		#' arm = "SEX",
286		#' strata = "RACE",
287		#' methods = list(
288		#' comp_stat_ci = "or_cmh",
289		#' pval = "",
290		#' prop_ci = "wald"
291		#' ),
292		#' formats = list(
293		#' prop_ci = jjcsformat_xx("xx.% - xx.%"),
294		#' comp_stat_ci = jjcsformat_xx("xx.xx (xx.xx - xx.xx)"),
295		#' pval = jjcsformat_pval_fct(0.05)
296		#' )
297		#' )
298		#' fake_spl_context2 <- data.frame(
299		#' cur_col_split_val = I(list(c(ARM = "Overall", comp_stat_ci = "comp_stat_ci")))
300		#' )
301		#' resp01_acfun(
302		#' dm,
303		#' .alt_df = dm,
304		#' .var = "COUNTRY",
305		#' .spl_context = fake_spl_context2,
306		#' conf_level = 0.9,
307		#' include_comp = c("USA", "CHN"),
308		#' arm = "SEX",
309		#' strata = "RACE",
310		#' methods = list(
311		#' comp_stat_ci = "or_cmh",
312		#' pval = "",
313		#' prop_ci = "wald"
314		#' ),
315		#' formats = list(
316		#' prop_ci = jjcsformat_xx("xx.% - xx.%"),
317		#' comp_stat_ci = jjcsformat_xx("xx.xx (xx.xx - xx.xx)"),
318		#' pval = jjcsformat_pval_fct(0.05)
319		#' )
320		#' )
321		resp01_acfun <- function(df,
322		labelstr = NULL,
323		label = NULL,
324		.var,
325		.spl_context,
326		include_comp,
327		.alt_df,
328		conf_level,
329		arm,
330		strata,
331		formats,
332		methods) {
333	3x	this_col_split <- .spl_context[nrow(.spl_context), "cur_col_split_val"][[1]]
334	3x	x <- df[[.var]]
335	3x	is_factor <- is.factor(x)
336	3x	res <- if (this_col_split[1] != "Overall") {
337	1x	if (this_col_split[2] == "count_prop") {
338	1x	if (is_factor) {
339	1x	s_proportion_factor(x, .alt_df = .alt_df)
340		} else {
341	!	s_proportion_logical(x, .alt_df = .alt_df)$n_prop
342		}
343		} else {
344	!	checkmate::assert_true(this_col_split[2] == "prop_ci")
345	!	if (is_factor) {
346	!	a_proportion_ci_factor(
347	!	df,
348	!	.var = .var,
349	!	.alt_df = .alt_df,
350	!	conf_level = conf_level,
351	!	formats = formats,
352	!	method = methods$prop_ci
353		)
354		} else {
355	!	a_proportion_ci_logical(
356	!	x,
357	!	.alt_df = .alt_df,
358	!	conf_level = conf_level,
359	!	formats = formats,
360	!	method = methods$prop_ci
361		)
362		}
363		}
364		} else {
365	2x	checkmate::assert_true(this_col_split[1] == "Overall")
366	2x	if (is_factor) {
367	1x	resp01_a_comp_stat_factor(
368	1x	df,
369	1x	.var = .var,
370	1x	conf_level = conf_level,
371	1x	include = include_comp,
372	1x	arm = arm,
373	1x	strata = strata,
374	1x	stat = this_col_split[2],
375	1x	formats = formats,
376	1x	methods = methods
377		)
378		} else {
379	1x	resp01_a_comp_stat_logical(
380	1x	df,
381	1x	.var = .var,
382	1x	conf_level = conf_level,
383	1x	include = include_comp,
384	1x	arm = arm,
385	1x	strata = strata,
386	1x	stat = this_col_split[2],
387	1x	formats = formats,
388	1x	methods = methods
389		)
390		}
391		}
392	3x	if (is_factor) {
393	2x	res
394		} else {
395	1x	if (!is.null(labelstr)) {
396	1x	label <- labelstr
397		}
398	1x	in_rows(res, .labels = label)
399		}
400		}

1		#' `ANCOVA` Analysis
2		#'
3		#' Does the `ANCOVA` analysis, separately for each visit.
4		#'
5		#' @param vars (named `list` of `string` or `character`)\cr specifying the variables in the `ANCOVA` analysis.
6		#' The following elements need to be included as character vectors and match corresponding columns
7		#' in `data`:
8		#'
9		#' - `response`: the response variable.
10		#' - `covariates`: the additional covariate terms (might also include interactions).
11		#' - `id`: the subject ID variable (not really needed for the computations but for internal logistics).
12		#' - `arm`: the treatment group variable (factor).
13		#' - `visit`: the visit variable (factor).
14		#'
15		#' Note that the `arm` variable is by default included in the model, thus should not be part of `covariates`.
16		#' @param data (`data.frame`)\cr with all the variables specified in
17		#' `vars`. Records with missing values in any independent variables
18		#' will be excluded.
19		#' @param conf_level (`proportion`)\cr confidence level of the interval.
20		#' @param weights_emmeans (`string`)\cr argument from [emmeans::emmeans()], `'counterfactual'` by default.
21		#'
22		#' @return A `tern_model` object which is a list with model results:
23		#'
24		#' - `fit`: A list with a fitted [stats::lm()] result for each visit.
25		#' - `mse`: Mean squared error, i.e. variance estimate, for each visit.
26		#' - `df`: Degrees of freedom for the variance estimate for each visit.
27		#' - `lsmeans`: This is a list with data frames `estimates` and `contrasts`.
28		#' The attribute `weights` savse the settings used (`weights_emmeans`).
29		#' - `vars`: The variable list.
30		#' - `labels`: Corresponding list with variable labels extracted from `data`.
31		#' - `ref_level`: The reference level for the arm variable, which is always the first level.
32		#' - `treatment_levels`: The treatment levels for the arm variable.
33		#' - `conf_level`: The confidence level which was used to construct the `lsmeans` confidence intervals.
34		#'
35		#' @export
36		#' @examples
37		#' library(mmrm)
38		#'
39		#' fit <- fit_ancova(
40		#' vars = list(
41		#' response = "FEV1",
42		#' covariates = c("RACE", "SEX"),
43		#' arm = "ARMCD",
44		#' id = "USUBJID",
45		#' visit = "AVISIT"
46		#' ),
47		#' data = fev_data,
48		#' conf_level = 0.9,
49		#' weights_emmeans = "equal"
50		#' )
51		#'
52		fit_ancova <- function(
53		vars = list(response = "AVAL", covariates = c(), arm = "ARM", visit = "AVISIT", id = "USUBJID"),
54		data,
55		conf_level = 0.95,
56		weights_emmeans = "proportional") {
57	5x	labels <- h_labels(vars, data)
58
59	5x	arm_levels <- levels(data[[vars$arm]])
60	5x	ref_level <- arm_levels[1]
61	5x	trt_levels <- arm_levels[-1]
62	5x	visit_levels <- levels(data[[vars$visit]])
63	5x	grid <- list(
64	5x	factor(arm_levels, levels = arm_levels),
65	5x	factor(rep(visit_levels, each = length(arm_levels)), levels = visit_levels)
66		) \|>
67	5x	stats::setNames(c(vars$arm, vars$visit)) \|>
68	5x	as.data.frame()
69	5x	checkmate::assert_disjunct(vars$arm, vars$covariates)
70	5x	covariates_part <- paste(vars$covariates, collapse = " + ")
71	5x	formula <- if (covariates_part != "") {
72	5x	stats::as.formula(paste0(vars$response, " ~ ", covariates_part, " + ", vars$arm))
73		} else {
74	!	stats::as.formula(paste0(vars$response, " ~ ", vars$arm))
75		}
76	5x	results_by_visit <- Map(
77	5x	data = split(data, data[[vars$visit]]),
78	5x	grid = split(grid, grid[[vars$visit]]),
79	5x	f = function(data, grid) {
80	20x	fit <- stats::lm(formula = formula, data = data)
81	20x	summary_fit <- summary(fit)
82	20x	emmeans_res <- h_get_emmeans_res(fit, vars = vars["arm"], weights = weights_emmeans)
83	20x	estimates <- h_get_single_visit_estimates(emmeans_res, conf_level)
84	20x	estimates[[vars$visit]] <- grid[[vars$visit]]
85	20x	estimates <- estimates \|> dplyr::relocate(!!vars$arm, !!vars$visit)
86	20x	contrasts <- h_get_spec_visit_estimates(
87	20x	emmeans_res,
88	20x	specs = list(
89	20x	coefs = "trt.vs.ctrl",
90	20x	grid = grid[-1, ] # Without the reference level row.
91		),
92	20x	conf_level = conf_level,
93	20x	tests = TRUE,
94	20x	ref = ref_level
95		)
96	20x	relative_reduc_df <- h_get_relative_reduc_df(estimates, vars)
97	20x	contrasts <- merge(contrasts, relative_reduc_df, by = c(vars$arm, vars$visit), sort = FALSE)
98	20x	list(fit = fit, estimates = estimates, contrasts = contrasts, mse = summary_fit$sigma^2, df = summary_fit$df[2])
99		}
100		)
101	5x	lsmeans <- structure(
102	5x	list(
103	5x	estimates = do.call(rbind, c(lapply(results_by_visit, "[[", "estimates"), make.row.names = FALSE)),
104	5x	contrasts = do.call(rbind, c(lapply(results_by_visit, "[[", "contrasts"), make.row.names = FALSE))
105		),
106	5x	weights = weights_emmeans
107		)
108	5x	results <- list(
109	5x	fit = lapply(results_by_visit, "[[", "fit"),
110	5x	mse = do.call(c, lapply(results_by_visit, "[[", "mse")),
111	5x	df = do.call(c, lapply(results_by_visit, "[[", "df")),
112	5x	lsmeans = lsmeans,
113	5x	vars = vars,
114	5x	labels = labels,
115	5x	ref_level = ref_level,
116	5x	treatment_levels = trt_levels,
117	5x	conf_level = conf_level
118		)
119	5x	class(results) <- "tern_model"
120	5x	results
121		}

1		#' Analysis and Content Summary Function Producing Blank Line
2		#'
3		#' @inheritParams proposal_argument_convention
4		#'
5		#' @keywords internal
6		ac_blank_line <- function(df, labelstr = "") {
7	36x	in_rows(.list = NA_real_, .labels = labelstr, .formats = "xx", .format_na_strs = "")
8		}
9
10		#' Insertion of Blank Lines in a Layout
11		#'
12		#' @inheritParams proposal_argument_convention
13		#'
14		#' @description This is a hack for `rtables` in order to be able to add row gaps,
15		#' i.e. blank lines.
16		#' In particular, by default this function needs to maintain a global state for avoiding
17		#' duplicate table names. The global state variable is hidden by using
18		#' a dot in front of its name. However, this likely won't work with parallelisation across
19		#' multiple threads and also causes non-reproducibility of the resulting `rtables`
20		#' object. Therefore also a custom table name can be used.
21		#'
22		#' @return The modified layout now including a blank line after the current
23		#' row content.
24		#' @export
25		#'
26		#' @examples
27		#' ADSL <- ex_adsl
28		#'
29		#' lyt <- basic_table() \|>
30		#' split_cols_by("ARM") \|>
31		#' split_rows_by("STRATA1") \|>
32		#' analyze(vars = "AGE", afun = function(x) {
33		#' in_rows(
34		#' "Mean (sd)" = rcell(c(mean(x), sd(x)), format = "xx.xx (xx.xx)")
35		#' )
36		#' }) \|>
37		#' insert_blank_line() \|>
38		#' analyze(vars = "AGE", table_names = "AGE_Range", afun = function(x) {
39		#' in_rows(
40		#' "Range" = rcell(range(x), format = "xx.xx - xx.xx")
41		#' )
42		#' })
43		#' build_table(lyt, ADSL)
44		insert_blank_line <- function(lyt, table_names = NULL) {
45	4x	varnames_rows <- vars_in_layout(lyt@row_layout)
46	4x	checkmate::assert_character(varnames_rows, min.len = 1L)
47	4x	last_varname_rows <- utils::tail(varnames_rows, 1L)
48
49	4x	this_table_name <- if (is.null(table_names)) {
50	2x	default_table_name <- paste0(".post_", last_varname_rows, "_blank")
51
52		# A named list for tracking table names and counts
53	2x	table_count <- getOption("junco.insert_blank_line")
54
55	2x	new_count <- if (default_table_name %in% names(table_count)) {
56	1x	table_count[[default_table_name]] + 1
57		} else {
58	1x	1
59		}
60
61	2x	table_count[[default_table_name]] <- new_count
62	2x	options(junco.insert_blank_line = table_count)
63
64	2x	paste(default_table_name, new_count, sep = "_")
65		} else {
66	2x	checkmate::assert_string(table_names, min.chars = 1L)
67	2x	table_names
68		}
69
70	4x	analyze(
71	4x	lyt,
72	4x	vars = last_varname_rows,
73	4x	afun = ac_blank_line,
74	4x	show_labels = "hidden",
75	4x	table_names = this_table_name
76		)
77		}

1		#' @name a_freq_subcol_j
2		#'
3		#' @title Analysis function count and percentage with extra column-subsetting in
4		#' selected columns (controlled by subcol_* arguments)
5		#'
6		#' @inheritParams proposal_argument_convention
7		#' @inheritParams a_freq_j
8		#' @param subcol_split Text to search colid to determine whether further subsetting
9		#' should be performed.
10		#' @param subcol_var Name of variable containing to be searched for the text
11		#' identified in subcol_val argument.
12		#' @param subcol_val Value to use to perform further data sub-setting.
13
14		#' @param denom (`string`)\cr
15		#' One of \cr
16		#' \itemize{
17		#' \item \strong{N_col} Column count, \cr
18		#' \item \strong{n_df} Number of patients (based upon the main input dataframe `df`),\cr
19		#' \item \strong{n_altdf} Number of patients from the secondary dataframe (`.alt_df_full`),\cr
20		#' Note that argument `denom_by` will perform a row-split on the `.alt_df_full` dataframe.\cr
21		#' It is a requirement that variables specified in `denom_by` are part of the row split specifications. \cr
22		#' \item \strong{n_rowdf} Number of patients from the current row-level dataframe
23		#' (`.row_df` from the rtables splitting machinery).\cr
24		#' \item \strong{n_parentdf} Number of patients from a higher row-level split than the current split.\cr
25		#' This higher row-level split is specified in the argument `denom_by`.\cr
26		#' }
27		#' @param .formats (named 'character' or 'list')\cr
28		#' formats for the statistics.
29		#'
30		#' @return list of requested statistics with formatted `rtables::CellValue()`.\cr
31		#' @export
32
33		a_freq_subcol_j <- function(
34		df,
35		labelstr = NULL,
36		.var = NA,
37		val = NULL,
38		# arguments specific to a_freq_subcol_j
39		subcol_split = NULL,
40		subcol_var = NULL,
41		subcol_val = NULL,
42		# arguments specific to a_freq_subcol_j till here
43		.df_row,
44		.spl_context,
45		.N_col,
46		id = "USUBJID",
47		denom = c("N_col", "n_df", "n_altdf", "n_rowdf", "n_parentdf"),
48		label = NULL,
49		label_fstr = NULL,
50		label_map = NULL,
51		.alt_df_full = NULL,
52		denom_by = NULL,
53		.stats = c("count_unique_denom_fraction"),
54		.formats = NULL,
55		.labels_n = NULL,
56		.indent_mods = NULL,
57		na_str = rep("NA", 3)) {
58	6x	denom <- match.arg(denom)
59
60	6x	if (!is.null(labelstr) && is.na(.var)) {
61	!	stop(
62	!	"Argument var must be specified in call to summarize_row_groups when using cfun = a_freq_subcol_j."
63		)
64		}
65
66	6x	check_alt_df_full(denom, "n_altdf", .alt_df_full)
67
68	6x	res_dataprep <- h_a_freq_dataprep(
69	6x	df = df,
70	6x	labelstr = labelstr,
71	6x	.var = .var,
72	6x	val = val,
73	6x	drop_levels = FALSE,
74	6x	excl_levels = NULL,
75	6x	new_levels = NULL,
76	6x	new_levels_after = FALSE,
77	6x	.df_row = .df_row,
78	6x	.spl_context = .spl_context,
79	6x	.N_col = .N_col,
80	6x	id = id,
81	6x	denom = denom,
82	6x	variables = NULL,
83	6x	label = label,
84	6x	label_fstr = label_fstr,
85	6x	label_map = label_map,
86	6x	.alt_df_full = .alt_df_full,
87	6x	denom_by = denom_by,
88	6x	.stats = .stats
89		)
90		# res_dataprep is list with elements
91		# df .df_row val
92		# drop_levels excl_levels
93		# alt_df parentdf new_denomdf
94		# .stats
95		# make these elements available in current environment
96	6x	df <- res_dataprep$df
97	6x	.df_row <- res_dataprep$.df_row
98	6x	val <- res_dataprep$val
99	6x	alt_df <- res_dataprep$alt_df
100	6x	parentdf <- res_dataprep$parentdf
101	6x	new_denomdf <- res_dataprep$new_denomdf
102	6x	.stats <- .stats
103
104		## colid can be used to figure out if we're in subcolum
105	6x	colid <- .spl_context$cur_col_id[[1]]
106
107		### this is the core code for subsetting to appropriate subcol_val
108	6x	insubcol <- grepl(subcol_split, colid, fixed = TRUE)
109	6x	if (insubcol) {
110	3x	df <- subset(df, df[[subcol_var]] == subcol_val)
111		}
112
113		## the same s-function can be used as in a_freq_j
114	6x	x_stats <- s_freq_j(
115	6x	df,
116	6x	.var = .var,
117	6x	.df_row = .df_row,
118	6x	val = val,
119	6x	alt_df = new_denomdf,
120	6x	parent_df = new_denomdf,
121	6x	id = id,
122	6x	denom = denom,
123	6x	.N_col = .N_col,
124	6x	countsource = "df"
125		)
126
127	6x	.stats_adj <- .stats
128
129	6x	res_prepinrows <- h_a_freq_prepinrows(
130	6x	x_stats,
131	6x	.stats_adj,
132	6x	.formats,
133	6x	labelstr,
134	6x	label_fstr,
135	6x	label,
136	6x	.indent_mods,
137	6x	.labels_n,
138	6x	na_str
139		)
140		# res_prepinrows is list with elements
141		# x_stats .formats .labels .indent_mods .format_na_strs
142		# make these elements available in current environment
143	6x	x_stats <- res_prepinrows$x_stats
144	6x	.formats <- res_prepinrows$.formats
145	6x	.labels <- res_prepinrows$.labels
146	6x	.indent_mods <- res_prepinrows$.indent_mods
147	6x	.format_na_strs <- res_prepinrows$.format_na_strs
148
149		### final step: turn requested stats into rtables rows
150	6x	inrows <- in_rows(
151	6x	.list = x_stats,
152	6x	.formats = .formats,
153	6x	.labels = .labels,
154	6x	.indent_mods = .indent_mods,
155	6x	.format_na_strs = .format_na_strs
156		)
157
158	6x	return(inrows)
159		}

1		#' @keywords internal
2		memoised_fit_mmrm <- memoise::memoise(fit_mmrm_j)
3
4		#' Helper Function to Fit the MMRM and Return LS Mean Estimates and Contrasts
5		#'
6		#' @inheritParams proposal_argument_convention
7		#' @param df_parent (`data.frame`)\cr data set containing all analysis variables
8		#' from all visits and arms.
9		#' @param ref_arm_level (`string`)\cr the reference arm which should be compared
10		#' against.
11		#' @param ref_visit_levels (`character`)\cr the reference visits which should not
12		#' be included in the model fit.
13		#' @param ... additional options passed to [fit_mmrm_j()].
14		#'
15		#' @return The resulting estimates and contrasts LS means as returned by
16		#' [tidy.tern_model()].
17		#' @keywords internal
18		h_summarize_mmrm <- function(.var, df_parent, variables, ref_arm_level, ref_visit_levels, ...) {
19	51x	checkmate::assert_string(.var)
20	51x	variables$response <- .var
21
22	51x	checkmate::assert_string(ref_arm_level)
23	51x	arm_levels <- levels(df_parent[[variables$arm]])
24	51x	if (arm_levels[1L] != ref_arm_level) {
25	51x	checkmate::assert_subset(ref_arm_level, arm_levels[-1L])
26	51x	df_parent[[variables$arm]] <- stats::relevel(df_parent[[variables$arm]], ref = ref_arm_level)
27		}
28	51x	checkmate::assert_character(ref_visit_levels)
29	51x	in_ref_visits <- df_parent[[variables$visit]] %in% ref_visit_levels
30	51x	df_parent <- df_parent[!in_ref_visits, , drop = FALSE]
31	51x	checkmate::assert_true(nrow(df_parent) > 1)
32	51x	new_levels <- setdiff(levels(df_parent[[variables$visit]]), ref_visit_levels)
33	51x	df_parent[[variables$visit]] <- factor(df_parent[[variables$visit]], levels = new_levels)
34	51x	mod_fit <- memoised_fit_mmrm(vars = variables, data = df_parent, ...)
35	51x	tidy(mod_fit)
36		}
37
38		#' Dynamic tabulation of MMRM results with tables
39		#'
40		#' @description `r lifecycle::badge('stable')`
41		#'
42		#' These functions can be used to produce tables for MMRM results, within
43		#' tables which are split by arms and visits. This is helpful when higher-level
44		#' row splits are needed (e.g. splits by parameter or subgroup).
45		#'
46		#' @name summarize_mmrm
47		#' @examples
48		#' set.seed(123)
49		#' longdat <- data.frame(
50		#' ID = rep(DM$ID, 5),
51		#' AVAL = c(
52		#' rep(0, nrow(DM)),
53		#' rnorm(n = nrow(DM) * 4)
54		#' ),
55		#' VISIT = factor(rep(paste0("V", 0:4), each = nrow(DM)))
56		#' ) \|>
57		#' dplyr::inner_join(DM, by = "ID")
58		#'
59		NULL
60
61		#' @describeIn summarize_mmrm Statistics function which is extracting estimates,
62		#' not including any results when in the reference visit, and only showing LS mean
63		#' estimates when in the reference arm and not in reference visit. It uses
64		#' [s_lsmeans()] for the final processing.
65		#'
66		#' @inheritParams proposal_argument_convention
67		#' @param ref_levels (`list`)\cr with `visit` and `arm` reference levels.
68		#' @param ... eventually passed to [fit_mmrm_j()] via [h_summarize_mmrm()].
69		#' @export
70		s_summarize_mmrm <- function(
71		df,
72		.var,
73		variables,
74		ref_levels,
75		.spl_context,
76		alternative = c("two.sided", "less", "greater"),
77		show_relative = c("reduction", "increase"),
78		...) {
79	63x	alternative <- match.arg(alternative)
80
81	63x	checkmate::assert_list(variables, names = "unique")
82	63x	visit_var <- variables$visit
83	63x	arm_var <- variables$arm
84
85	63x	checkmate::assert_list(ref_levels, names = "unique")
86	63x	checkmate::assert_subset(c(visit_var, arm_var), names(ref_levels))
87	63x	ref_visits <- ref_levels[[visit_var]]
88	63x	ref_arm <- ref_levels[[arm_var]]
89	63x	checkmate::assert_string(ref_arm)
90
91	63x	current_visit <- as.character(unique(df[[visit_var]]))
92	63x	current_arm <- as.character(unique(df[[arm_var]]))
93	63x	checkmate::assert_string(current_visit)
94	63x	checkmate::assert_string(current_arm)
95
96	63x	in_ref_visits <- current_visit %in% ref_visits
97	63x	in_ref_arm <- current_arm == ref_arm
98
99	63x	if (in_ref_visits) {
100		## this is returned
101	13x	list(
102	13x	n = NULL,
103	13x	adj_mean_se = NULL,
104	13x	adj_mean_ci = NULL,
105	13x	adj_mean_est_ci = NULL,
106	13x	diff_mean_se = NULL,
107	13x	diff_mean_ci = NULL,
108	13x	diff_mean_est_ci = NULL,
109	13x	change = NULL,
110	13x	p_value = NULL
111		)
112		} else { # non ref visit
113	50x	n_splits <- nrow(.spl_context)
114
115		# Check that the current row split is by the visit variable.
116	50x	current_split_var <- .spl_context[n_splits, "split"]
117	50x	checkmate::assert_true(identical(current_split_var, visit_var))
118
119		# Then take the data frame with all visits and fit the MMRM on it.
120	50x	df_parent <- .spl_context[n_splits - 1, "full_parent_df"][[1]]
121	50x	lsm_df <- h_summarize_mmrm(
122	50x	.var = .var,
123	50x	df_parent = df_parent,
124	50x	variables = variables,
125	50x	ref_arm_level = ref_arm,
126	50x	ref_visit_levels = ref_visits,
127		...
128		)
129
130		# Subset to the current table cell we are looking at.
131	50x	matches_visit <- lsm_df[[visit_var]] == current_visit
132	50x	matches_arm <- lsm_df[[arm_var]] == current_arm
133	50x	matches_both <- which(matches_visit & matches_arm)
134	50x	checkmate::assert_int(matches_both)
135
136	50x	s_lsmeans(
137	50x	lsm_df[matches_both, ],
138	50x	.in_ref_col = in_ref_arm,
139	50x	alternative = alternative,
140	50x	show_relative = show_relative
141		)
142		} # end ref visits if/else
143		}
144
145		#' @describeIn summarize_mmrm Formatted analysis function which is used as `afun`.
146		#'
147		#' @return
148		#' * `a_summarize_mmrm()` returns the corresponding list with formatted [rtables::CellValue()].
149		#'
150		#' @examples
151		#' basic_table() \|>
152		#' split_rows_by("VISIT") \|>
153		#' split_cols_by("ARM") \|>
154		#' analyze(
155		#' vars = "AVAL",
156		#' afun = a_summarize_mmrm,
157		#' na_str = tern::default_na_str(),
158		#' show_labels = "hidden",
159		#' extra_args = list(
160		#' variables = list(
161		#' covariates = c("AGE"),
162		#' id = "ID",
163		#' arm = "ARM",
164		#' visit = "VISIT"
165		#' ),
166		#' conf_level = 0.9,
167		#' cor_struct = "toeplitz",
168		#' ref_levels = list(VISIT = "V0", ARM = "B: Placebo")
169		#' )
170		#' ) \|>
171		#' build_table(longdat) \|>
172		#' prune_table(all_zero)
173		#' @export
174		a_summarize_mmrm <- function(
175		df,
176		.var,
177		.spl_context,
178		...,
179		.stats = NULL,
180		.formats = NULL,
181		.labels = NULL,
182		.indent_mods = NULL) {
183		# Check for additional parameters to the statistics function
184	60x	dots_extra_args <- list(...)
185
186		# Only support default stats, not custom stats
187	60x	.stats <- .split_std_from_custom_stats(.stats)$default_stats
188
189		# Apply statistics function
190	60x	x_stats <- .apply_stat_functions(
191	60x	default_stat_fnc = s_summarize_mmrm,
192	60x	custom_stat_fnc_list = NULL,
193	60x	args_list = c(df = list(df), .var = .var, .spl_context = list(.spl_context), dots_extra_args)
194		)
195
196		# Format according to specifications
197	60x	format_stats(
198	60x	x_stats,
199	60x	method_groups = "summarize_mmrm",
200	60x	stats_in = .stats,
201	60x	formats_in = .formats,
202	60x	labels_in = .labels,
203	60x	indents_in = .indent_mods
204		)
205		}

1		#' Helper for Finding AVISIT after which CHG are all Missing
2		#'
3		#' @param df (`data.frame`)\cr with `CHG` and `AVISIT` variables.
4		#'
5		#' @return A string with either the factor level after which `AVISIT` is all missing,
6		#' or `NA`.
7		#' @export
8		#'
9		#' @examples
10		#' df <- data.frame(
11		#' AVISIT = factor(c(1, 2, 3, 4, 5)),
12		#' CHG = c(5, NA, NA, NA, 3)
13		#' )
14		#' find_missing_chg_after_avisit(df)
15		#'
16		#' df2 <- data.frame(
17		#' AVISIT = factor(c(1, 2, 3, 4, 5)),
18		#' CHG = c(5, NA, 3, NA, NA)
19		#' )
20		#' find_missing_chg_after_avisit(df2)
21		#'
22		#' df3 <- data.frame(
23		#' AVISIT = factor(c(1, 2, 3, 4, 5)),
24		#' CHG = c(NA, NA, NA, NA, NA)
25		#' )
26		#' find_missing_chg_after_avisit(df3)
27		find_missing_chg_after_avisit <- function(df) {
28	8x	checkmate::assert_data_frame(df)
29	7x	checkmate::assert_factor(df$AVISIT, unique = TRUE, any.missing = FALSE)
30	6x	checkmate::assert_numeric(df$CHG)
31
32		# Ensure the dataframe is sorted by AVISIT
33	5x	df <- df[order(df$AVISIT), ]
34
35		# Last visit with available data.
36	5x	visit_levels_available <- as.integer(df[!is.na(df$CHG), ]$AVISIT)
37
38	5x	if (!length(visit_levels_available)) {
39	1x	return(levels(df$AVISIT)[1])
40		}
41	4x	visit_levels_max_available <- max(visit_levels_available)
42
43		# Visits with missing data.
44	4x	visit_levels_missing <- as.integer(df[is.na(df$CHG), ]$AVISIT)
45
46		# Missing visits at the end.
47	4x	visit_levels_missing_end <- visit_levels_missing[visit_levels_missing > visit_levels_max_available]
48
49		# Return first one if there is any.
50	4x	if (length(visit_levels_missing_end)) {
51	3x	levels(df$AVISIT)[min(visit_levels_missing_end)]
52		} else {
53		NA_character_
54		}
55		}
56
57		# Copy from rbmi package, because this was only added recently, so not yet available in our package version.
58
59		#' Create a `rbmi` ready cluster
60		#'
61		#' @param cluster_or_cores Number of parallel processes to use or an existing cluster to make use of
62		#' @param objects a named list of objects to export into the sub-processes
63		#' @param packages a character vector of libraries to load in the sub-processes
64		#'
65		#' This function is a wrapper around `parallel::makePSOCKcluster()` but takes
66		#' care of configuring `rbmi` to be used in the sub-processes as well as loading
67		#' user defined objects and libraries and setting the seed for reproducibility.
68		#'
69		#' @return If `cluster_or_cores` is `1` this function will return `NULL`. If `cluster_or_cores`
70		#' is a number greater than `1`, a cluster with `cluster_or_cores` cores is returned.
71		#'
72		#' If `cluster_or_cores` is a cluster created via `parallel::makeCluster()` then this function
73		#' returns it after inserting the relevant `rbmi` objects into the existing cluster.
74		#'
75		#' @examples
76		#' \dontrun{
77		#' make_rbmi_cluster(5)
78		#' closeAllConnections()
79		#'
80		#' VALUE <- 5
81		#' myfun <- function(x) {
82		#' x + day(VALUE)
83		#' }
84		#' make_rbmi_cluster(5, list(VALUE = VALUE, myfun = myfun), c("lubridate"))
85		#' closeAllConnections()
86		#'
87		#' cl <- parallel::makeCluster(5)
88		#' make_rbmi_cluster(cl)
89		#' closeAllConnections()
90		#' }
91		#' @export
92		make_rbmi_cluster <- function(cluster_or_cores = 1, objects = NULL, packages = NULL) {
93	5x	if (is.numeric(cluster_or_cores) && cluster_or_cores == 1) {
94	1x	return(NULL)
95	4x	} else if (is.numeric(cluster_or_cores)) {
96	2x	cl <- parallel::makePSOCKcluster(cluster_or_cores)
97	2x	} else if (methods::is(cluster_or_cores, "cluster")) {
98	1x	cl <- cluster_or_cores
99		} else {
100	1x	stop(sprintf("`cluster_or_cores` has unsupported class of: %s", paste(class(cluster_or_cores), collapse = ", ")))
101		}
102
103		# Load user defined objects into the globalname space
104	3x	if (!is.null(objects) && length(objects)) {
105	1x	export_env <- list2env(objects)
106	1x	parallel::clusterExport(cl, names(objects), export_env)
107		}
108
109		# Load user defined packages
110	3x	packages <- c(packages, "assertthat")
111		# Remove attempts to load `rbmi` as this will be covered later
112	3x	packages <- setdiff(packages, "rbmi")
113	3x	devnull <- parallel::clusterCall(
114	3x	cl,
115	3x	function(pkgs) lapply(pkgs, function(x) library(x, character.only = TRUE)),
116	3x	as.list(packages)
117		)
118
119		# Ensure reproducibility
120	3x	parallel::clusterSetRNGStream(cl, sample.int(1))
121
122		# If user has previously configured `rbmi` sub-processes then early exit
123	3x	exported_rbmi <- unlist(parallel::clusterEvalQ(cl, exists("..exported..parallel..rbmi")))
124	3x	if (all(exported_rbmi)) {
125	!	return(cl)
126		}
127
128		# Ensure that exported and unexported objects are all directly accessible from the globalenv in the sub-processes
129	3x	is_in_rbmi_development <- FALSE
130	3x	if (is_in_rbmi_development) {
131	!	devnull <- parallel::clusterEvalQ(cl, pkgload::load_all())
132		} else {
133	3x	devnull <- parallel::clusterEvalQ(cl, {
134	6x	.namespace <- getNamespace("rbmi")
135	6x	for (.nsfun in ls(.namespace)) {
136	1128x	assign(.nsfun, get(.nsfun, envir = .namespace))
137		}
138		})
139		}
140
141		# Set variable to signify `rbmi` has been configured
142	3x	devnull <- parallel::clusterEvalQ(cl, {
143	6x	..exported..parallel..rbmi <- TRUE
144		})
145
146	3x	return(cl)
147		}
148
149		#' Parallelise Lapply
150		#'
151		#' Simple wrapper around `lapply` and [`parallel::clusterApplyLB`] to abstract away
152		#' the logic of deciding which one to use
153		#' @param cl Cluster created by [`parallel::makeCluster()`] or `NULL`
154		#' @param fun Function to be run
155		#' @param x object to be looped over
156		#' @param ... extra arguments passed to `fun`
157		#' @return `list` of results of calling `fun` on elements of `x`.
158		par_lapply <- function(cl, fun, x, ...) {
159	2x	result <- if (is.null(cl)) {
160	1x	lapply(x, fun, ...)
161		} else {
162	1x	parallel::clusterApplyLB(cl, x, fun, ...)
163		}
164	2x	return(result)
165		}
166
167		#' Analyse Multiple Imputed Datasets
168		#'
169		#' @description
170		#' This function takes multiple imputed datasets (as generated by
171		#' the [rbmi::impute()] function) and runs an analysis function on
172		#' each of them.
173		#'
174		#' @importFrom assertthat assert_that
175		#'
176		#' @details
177		#' This function works by performing the following steps:
178		#'
179		#' 1. Extract a dataset from the `imputations` object.
180		#' 2. Apply any delta adjustments as specified by the `delta` argument.
181		#' 3. Run the analysis function `fun` on the dataset.
182		#' 4. Repeat steps 1-3 across all of the datasets inside the `imputations`
183		#' object.
184		#' 5. Collect and return all of the analysis results.
185		#'
186		#' The analysis function `fun` must take a `data.frame` as its first
187		#' argument. All other options to [rbmi_analyse()] are passed onto `fun`
188		#' via `...`.
189		#' `fun` must return a named list with each element itself being a
190		#' list containing a single
191		#' numeric element called `est` (or additionally `se` and `df` if
192		#' you had originally specified [rbmi::method_bayes()] or [rbmi::method_approxbayes()])
193		#' i.e.:
194		#' \preformatted{
195		#' myfun <- function(dat, ...) {
196		#' mod_1 <- lm(data = dat, outcome ~ group)
197		#' mod_2 <- lm(data = dat, outcome ~ group + covar)
198		#' x <- list(
199		#' trt_1 = list(
200		#' est = coef(mod_1)[['group']], # Use [[ ]] for safety
201		#' se = sqrt(vcov(mod_1)['group', 'group']), # Use ['','']
202		#' df = df.residual(mod_1)
203		#' ),
204		#' trt_2 = list(
205		#' est = coef(mod_2)[['group']], # Use [[ ]] for safety
206		#' se = sqrt(vcov(mod_2)['group', 'group']), # Use ['','']
207		#' df = df.residual(mod_2)
208		#' )
209		#' )
210		#' return(x)
211		#' }
212		#' }
213		#'
214		#' Please note that the `vars$subjid` column (as defined in the original call to
215		#' [rbmi::draws()]) will be scrambled in the data.frames that are provided to `fun`.
216		#' This is to say they will not contain the original subject values and as such
217		#' any hard coding of subject ids is strictly to be avoided.
218		#'
219		#' By default `fun` is the [rbmi_ancova()] function.
220		#' Please note that this function
221		#' requires that a `vars` object, as created by [rbmi::set_vars()], is provided via
222		#' the `vars` argument e.g. `rbmi_analyse(imputeObj, vars = rbmi::set_vars(...))`. Please
223		#' see the documentation for [rbmi_ancova()] for full details.
224		#' Please also note that the theoretical justification for the conditional mean imputation
225		#' method (`method = method_condmean()` in [rbmi::draws()]) relies on the fact that ANCOVA is
226		#' a linear transformation of the outcomes.
227		#' Thus care is required when applying alternative analysis functions in this setting.
228		#'
229		#' The `delta` argument can be used to specify offsets to be applied
230		#' to the outcome variable in the imputed datasets prior to the analysis.
231		#' This is typically used for sensitivity or tipping point analyses. The
232		#' delta dataset must contain columns `vars$subjid`, `vars$visit` (as specified
233		#' in the original call to [rbmi::draws()]) and `delta`. Essentially this `data.frame`
234		#' is merged onto the imputed dataset by `vars$subjid` and `vars$visit` and then
235		#' the outcome variable is modified by:
236		#'
237		#' ```
238		#' imputed_data[[vars$outcome]] <- imputed_data[[vars$outcome]] + imputed_data[['delta']]
239		#' ```
240		#'
241		#' Please note that in order to provide maximum flexibility, the `delta` argument
242		#' can be used to modify any/all outcome values including those that were not
243		#' imputed. Care must be taken when defining offsets. It is recommend that you
244		#' use the helper function [rbmi::delta_template()] to define the delta datasets as
245		#' this provides utility variables such as `is_missing` which can be used to identify
246		#' exactly which visits have been imputed.
247		#'
248		#' @seealso [rbmi::extract_imputed_dfs()] for manually extracting imputed
249		#' datasets.
250		#' @seealso [rbmi::delta_template()] for creating delta data.frames.
251		#' @seealso [rbmi_ancova()] for the default analysis function.
252		#'
253		#' @param imputations An `imputations` object as created by [rbmi::impute()].
254		#' @param fun An analysis function to be applied to each imputed dataset. See details.
255		#' @param delta A `data.frame` containing the delta transformation to be applied to the imputed
256		#' datasets prior to running `fun`. See details.
257		#' @param ... Additional arguments passed onto `fun`.
258		#' @param cluster_or_cores The number of parallel processes to use when running this function. Can also be a
259		#' cluster object created by [`make_rbmi_cluster()`]. See the parallelisation section below.
260		#' @param .validate Should `imputations` be checked to ensure it conforms to the required format
261		#' (default = `TRUE`) ? Can gain a small performance increase if this is set to `FALSE` when
262		#' analysing a large number of samples.
263		#'
264		#' @section Parallelisation:
265		#' To speed up the evaluation of `rbmi_analyse()` you can use the `cluster_or_cores` argument to enable parallelisation.
266		#' Simply providing an integer will get `rbmi` to automatically spawn that many background processes
267		#' to parallelise across. If you are using a custom analysis function then you need to ensure
268		#' that any libraries or global objects required by your function are available in the
269		#' sub-processes. To do this you need to use the [`make_rbmi_cluster()`] function for example:
270		#' ```
271		#' my_custom_fun <- function(...) <some analysis code>
272		#' cl <- make_rbmi_cluster(
273		#' 4,
274		#' objects = list('my_custom_fun' = my_custom_fun),
275		#' packages = c('dplyr', 'nlme')
276		#' )
277		#' rbmi_analyse(
278		#' imputations = imputeObj,
279		#' fun = my_custom_fun,
280		#' cluster_or_cores = cl
281		#' )
282		#' parallel::stopCluster(cl)
283		#' ```
284		#'
285		#' Note that there is significant overhead both with setting up the sub-processes and with
286		#' transferring data back-and-forth between the main process and the sub-processes. As such
287		#' parallelisation of the `rbmi_analyse()` function tends to only be worth it when you have
288		#' `> 2000` samples generated by [rbmi::draws()]. Conversely using parallelisation if your samples
289		#' are smaller than this may lead to longer run times than just running it sequentially.
290		#'
291		#' It is important to note that the implementation of parallel processing within [rbmi::analyse()`] has
292		#' been optimised around the assumption that the parallel processes will be spawned on the same
293		#' machine and not a remote cluster. One such optimisation is that the required data is saved to
294		#' a temporary file on the local disk from which it is then read into each sub-process. This is
295		#' done to avoid the overhead of transferring the data over the network. Our assumption is that
296		#' if you are at the stage where you need to be parallelising your analysis over a remote cluster
297		#' then you would likely be better off parallelising across multiple `rbmi` runs rather than within
298		#' a single `rbmi` run.
299		#'
300		#' Finally, if you are doing a tipping point analysis you can get a reasonable performance
301		#' improvement by re-using the cluster between each call to `rbmi_analyse()` e.g.
302		#' ```
303		#' cl <- make_rbmi_cluster(4)
304		#' ana_1 <- rbmi_analyse(
305		#' imputations = imputeObj,
306		#' delta = delta_plan_1,
307		#' cluster_or_cores = cl
308		#' )
309		#' ana_2 <- rbmi_analyse(
310		#' imputations = imputeObj,
311		#' delta = delta_plan_2,
312		#' cluster_or_cores = cl
313		#' )
314		#' ana_3 <- rbmi_analyse(
315		#' imputations = imputeObj,
316		#' delta = delta_plan_3,
317		#' cluster_or_cores = cl
318		#' )
319		#' parallel::clusterStop(cl)
320		#' ```
321		#'
322		#' @return An `analysis` object, as defined by `rbmi`, representing the desired
323		#' analysis applied to each of the imputed datasets in `imputations`.
324		#' @examples
325		#' library(rbmi)
326		#' library(dplyr)
327		#'
328		#' dat <- antidepressant_data
329		#' dat$GENDER <- as.factor(dat$GENDER)
330		#' dat$POOLINV <- as.factor(dat$POOLINV)
331		#' set.seed(123)
332		#' pat_ids <- sample(levels(dat$PATIENT), nlevels(dat$PATIENT) / 4)
333		#' dat <- dat \|>
334		#' filter(PATIENT %in% pat_ids) \|>
335		#' droplevels()
336		#' dat <- expand_locf(
337		#' dat,
338		#' PATIENT = levels(dat$PATIENT),
339		#' VISIT = levels(dat$VISIT),
340		#' vars = c("BASVAL", "THERAPY"),
341		#' group = c("PATIENT"),
342		#' order = c("PATIENT", "VISIT")
343		#' )
344		#' dat_ice <- dat %>%
345		#' arrange(PATIENT, VISIT) %>%
346		#' filter(is.na(CHANGE)) %>%
347		#' group_by(PATIENT) %>%
348		#' slice(1) %>%
349		#' ungroup() %>%
350		#' select(PATIENT, VISIT) %>%
351		#' mutate(strategy = "JR")
352		#' dat_ice <- dat_ice[-which(dat_ice$PATIENT == 3618), ]
353		#' vars <- set_vars(
354		#' outcome = "CHANGE",
355		#' visit = "VISIT",
356		#' subjid = "PATIENT",
357		#' group = "THERAPY",
358		#' covariates = c("THERAPY")
359		#' )
360		#' drawObj <- draws(
361		#' data = dat,
362		#' data_ice = dat_ice,
363		#' vars = vars,
364		#' method = method_condmean(type = "jackknife", covariance = "csh"),
365		#' quiet = TRUE
366		#' )
367		#' references <- c("DRUG" = "PLACEBO", "PLACEBO" = "PLACEBO")
368		#' imputeObj <- impute(drawObj, references)
369		#'
370		#' rbmi_analyse(imputations = imputeObj, vars = vars)
371		#' @export
372		rbmi_analyse <- function(imputations, fun = rbmi_ancova, delta = NULL, ..., cluster_or_cores = 1, .validate = TRUE) {
373		# nocov
374
375	!	if (.validate) rbmi::validate(imputations)
376
377	!	assertthat::assert_that(is.function(fun), msg = "`fun` must be a function")
378
379	!	assertthat::assert_that(is.null(delta) \| is.data.frame(delta), msg = "`delta` must be NULL or a data.frame")
380
381	!	vars <- imputations$data$vars
382
383	!	if (.validate) devnull <- lapply(imputations$imputations, function(x) rbmi::validate(x))
384
385	!	if (!is.null(delta)) {
386	!	expected_vars <- c(vars$subjid, vars$visit, "delta")
387	!	assertthat::assert_that(
388	!	all(expected_vars %in% names(delta)),
389	!	msg = sprintf("The following variables must exist witin `delta`: `%s`", paste0(expected_vars, collapse = "`, `"))
390		)
391		}
392
393		# Mangle name to avoid any conflicts with user defined objects if running in a cluster
394	!	..rbmi..analysis..imputations <- imputations
395	!	..rbmi..analysis..delta <- delta
396	!	..rbmi..analysis..fun <- fun
397	!	objects <- list(
398	!	..rbmi..analysis..imputations = ..rbmi..analysis..imputations,
399	!	..rbmi..analysis..delta = ..rbmi..analysis..delta,
400	!	..rbmi..analysis..fun = ..rbmi..analysis..fun
401		)
402
403	!	cl <- make_rbmi_cluster(cluster_or_cores)
404
405	!	if (methods::is(cl, "cluster")) {
406	!	..rbmi..analysis..data..path <- tempfile()
407	!	saveRDS(objects, file = ..rbmi..analysis..data..path, compress = FALSE)
408	!	devnull <- parallel::clusterExport(cl, "..rbmi..analysis..data..path", environment())
409	!	devnull <- parallel::clusterEvalQ(cl, {
410	!	..rbmi..analysis..objects <- readRDS(..rbmi..analysis..data..path)
411	!	list2env(..rbmi..analysis..objects, envir = environment())
412		})
413		}
414
415		# If the user provided the clusters object directly then do not close it on completion
416	!	if (!methods::is(cluster_or_cores, "cluster")) {
417	!	on.exit(
418		{
419	!	if (!is.null(cl)) parallel::stopCluster(cl)
420		},
421	!	add = TRUE,
422	!	after = FALSE
423		)
424		}
425
426		# Chunk up requests for significant speed improvement when running in parallel
427	!	number_of_cores <- ifelse(is.null(cl), 1, length(cl))
428	!	indexes <- seq_along(imputations$imputations)
429	!	indexes_split <- split(indexes, (indexes %% number_of_cores) + 1)
430
431	!	results <- par_lapply(
432	!	cl,
433	!	function(indicies, ...) {
434	!	inner_fun <- function(idx, ...) {
435	!	dat2 <- (utils::getFromNamespace("extract_imputed_df", "rbmi"))(
436	!	..rbmi..analysis..imputations$imputations[[idx]],
437	!	..rbmi..analysis..imputations$data,
438	!	..rbmi..analysis..delta
439		)
440	!	..rbmi..analysis..fun(dat2, ...)
441		}
442	!	lapply(indicies, inner_fun, ...)
443		},
444	!	indexes_split,
445		...
446		) \|>
447	!	unlist(recursive = FALSE, use.names = FALSE)
448
449		# Re-order to ensure results are returned in same order as imputations
450	!	results <- results[order(unlist(indexes_split, use.names = FALSE))]
451	!	names(results) <- NULL
452
453	!	fun_name <- deparse(substitute(fun))
454	!	if (length(fun_name) > 1) {
455	!	fun_name <- "<Anonymous Function>"
456	!	} else if (is.null(fun_name)) {
457	!	fun_name <- "<NULL>"
458		}
459
460	!	ret <- (utils::getFromNamespace("as_analysis", "rbmi"))(
461	!	results = results,
462	!	fun_name = fun_name,
463	!	delta = delta,
464	!	fun = fun,
465	!	method = imputations$method
466		)
467	!	rbmi::validate(ret)
468	!	return(ret)
469		}

1		#' Function Factory to Create Padded In Rows Content
2		#'
3		#' @param length_out (`count` or `NULL`)\cr full length which should be padded
4		#' by `NA` which will be printed as empty strings.
5		#' @param label (`string`)\cr row label to be used for the first row only.
6		#'
7		#' @return The function of `content` and `.formats`.
8		#' @keywords internal
9		pad_in_rows_fct <- function(length_out = NULL, label = "") {
10	144x	checkmate::assert_count(length_out, null.ok = TRUE)
11	144x	checkmate::assert_string(label)
12
13	144x	function(content, .formats) {
14	144x	content_list <- as.list(content)
15	144x	if (!is.null(length_out)) {
16	144x	missing_length <- length_out - length(content_list)
17	144x	if (missing_length > 0) {
18	23x	content_list <- c(content_list, rep(NA, missing_length))
19		}
20		}
21	144x	in_rows(
22	144x	.list = content_list,
23	144x	.names = as.character(seq_along(content_list)),
24	144x	.labels = c(
25	144x	label,
26	144x	rep(
27		"",
28	144x	length(content_list) -
29	144x	1L
30		)
31		),
32	144x	.formats = .formats,
33	144x	.format_na_strs = ""
34		)
35		}
36		}
37
38		#' First Level Column Split for LS Means Wide Table Layouts
39		#'
40		#' @inheritParams proposal_argument_convention
41		#'
42		#' @keywords internal
43		lsmeans_wide_first_split_fun_fct <- function(include_variance) {
44	3x	checkmate::assert_flag(include_variance)
45
46	3x	post_fun <- function(ret, spl, fulldf, .spl_context) {
47	3x	if (include_variance) {
48	2x	short_split_result(
49	2x	reference_group = "Reference Group",
50	2x	testing_group = "Testing Group",
51	2x	variance = "",
52	2x	comparison = "Testing - Reference",
53	2x	fulldf = fulldf
54		)
55		} else {
56	1x	short_split_result(
57	1x	reference_group = "Reference Group",
58	1x	testing_group = "Testing Group",
59	1x	comparison = "Testing - Reference",
60	1x	fulldf = fulldf
61		)
62		}
63		}
64	3x	make_split_fun(post = list(post_fun))
65		}
66
67		#' Second Level Column Split for LS Means Wide Table Layouts
68		#'
69		#' @inheritParams proposal_argument_convention
70		#' @param include_pval (`flag`)\cr whether to include the p-value column.
71		#'
72		#' @keywords internal
73		lsmeans_wide_second_split_fun_fct <- function(pval_sided, conf_level, include_pval) {
74	3x	checkmate::assert_flag(include_pval)
75
76	3x	post_fun <- function(ret, spl, fulldf, .spl_context) {
77	11x	colset <- .spl_context[nrow(.spl_context), "value"][[1]]
78	11x	if (colset %in% c("reference_group", "testing_group")) {
79	6x	short_split_result(treatment = "Treatment", n = "N", lsmean = "LS Mean", fulldf = fulldf)
80	5x	} else if (colset == "variance") {
81	2x	short_split_result(mse = "M. S. Error", df = "Error DF", fulldf = fulldf)
82		} else {
83	3x	if (include_pval) {
84	2x	short_split_result(
85	2x	lsmean = "LS Mean",
86	2x	se = "SE",
87	2x	ci = f_conf_level(conf_level),
88	2x	pval = paste0(abs(as.numeric(pval_sided)), "-sided p-value~[super a]"),
89	2x	fulldf = fulldf
90		)
91		} else {
92	1x	short_split_result(lsmean = "LS Mean", se = "SE", ci = f_conf_level(conf_level), fulldf = fulldf)
93		}
94		}
95		}
96	3x	make_split_fun(post = list(post_fun))
97		}
98
99		#' Content Row Analysis Function for LS Means Wide Table Layouts
100		#'
101		#' @inheritParams proposal_argument_convention
102		#' @param variables (`list`)\cr see [fit_ancova()] for required variable
103		#' specifications.
104		#' @param ref_level (`string`)\cr the reference level of the treatment arm variable.
105		#' @param treatment_levels (`character`)\cr the non-reference levels of the treatment arm
106		#' variable.
107		#' @param pval_sided (`string`)\cr either '2' for two-sided or '1' for 1-sided with greater than
108		#' control or '-1' for 1-sided with smaller than control alternative hypothesis.
109		#' @param formats (`list`)\cr including `lsmean`, `mse`, `df`, `lsmean_diff`, `se`,
110		#' `ci`, `pval` formats.
111		#'
112		#' @details This assumes a lot of structure of the layout, and is only intended to be used inside
113		#' [summarize_lsmeans_wide()], please see there for the layout structure that is needed.
114		#'
115		#' @keywords internal
116		lsmeans_wide_cfun <- function(
117		df,
118		labelstr,
119		.spl_context,
120		variables,
121		ref_level,
122		treatment_levels,
123		pval_sided = c("2", "1", "-1"),
124		conf_level,
125		formats) {
126	144x	this_col_split <- .spl_context[nrow(.spl_context), "cur_col_split_val"][[1]]
127	144x	pad_in_rows <- pad_in_rows_fct(length_out = length(treatment_levels), label = labelstr)
128	144x	if (this_col_split[1] %in% c("reference_group", "testing_group")) {
129	75x	this_level <- if (this_col_split[1] == "reference_group") {
130	39x	ref_level
131		} else {
132	36x	treatment_levels
133		}
134	75x	has_this_level <- df[[variables$arm]] %in% this_level
135	75x	if (this_col_split[2] == "treatment") {
136	27x	pad_in_rows(this_level, .formats = "xx")
137	48x	} else if (this_col_split[2] == "n") {
138	24x	pad_in_rows(df$n[has_this_level], .formats = "xx")
139		} else {
140	24x	pad_in_rows(df$estimate_est[has_this_level], .formats = formats$lsmean)
141		}
142	69x	} else if (this_col_split[1] == "variance") {
143	20x	has_trt <- df[[variables$arm]] %in% treatment_levels
144	20x	all_the_same <- (length(unique(df$mse[has_trt])) == 1) && (length(unique(df$df[has_trt])) == 1)
145		# Note: We only take the first value, because in this ANCOVA case they are identical and we don't want to
146		# repeat it in the table.
147	20x	inds <- if (all_the_same) 1 else seq_len(sum(has_trt))
148	20x	if (this_col_split[2] == "mse") {
149	10x	pad_in_rows(df$mse[has_trt][inds], .formats = formats$mse)
150		} else {
151	10x	pad_in_rows(df$df[has_trt][inds], .formats = formats$df)
152		}
153		} else {
154	49x	has_trt <- df[[variables$arm]] %in% treatment_levels
155	49x	switch(this_col_split[2],
156	12x	"lsmean" = pad_in_rows(df$estimate_contr[has_trt], .formats = formats$lsmean_diff),
157	12x	"se" = pad_in_rows(df$se_contr[has_trt], .formats = formats$se),
158	12x	"ci" = pad_in_rows(
159	12x	tern::combine_vectors(df$lower_cl_contr[has_trt], df$upper_cl_contr[has_trt]),
160	12x	.formats = formats$ci
161		),
162		"pval" = {
163	13x	pval_sided <- match.arg(pval_sided)
164	13x	pval_col <- switch(pval_sided,
165	13x	`2` = "p_value",
166	13x	`1` = "p_value_greater",
167	13x	`-1` = "p_value_less"
168		)
169	13x	pad_in_rows(df[[pval_col]][has_trt], .formats = formats$pval)
170		}
171		)
172		}
173		}
174
175		#' Layout Generating Function for LS Means Wide Table Layouts
176		#'
177		#' @inheritParams proposal_argument_convention
178		#' @inheritParams lsmeans_wide_cfun
179		#' @param lyt empty layout, i.e. result of [rtables::basic_table()]
180		#' @param include_variance (`flag`)\cr whether to include the variance statistics
181		#' (M.S. error and d.f.).
182		#' @param include_pval (`flag`)\cr whether to include the p-value column.
183		#'
184		#' @return Modified layout.
185		#' @export
186		#' @examples
187		#' variables <- list(
188		#' response = "FEV1",
189		#' covariates = c("RACE", "SEX"),
190		#' arm = "ARMCD",
191		#' id = "USUBJID",
192		#' visit = "AVISIT"
193		#' )
194		#' fit <- fit_ancova(
195		#' vars = variables,
196		#' data = mmrm::fev_data,
197		#' conf_level = 0.9,
198		#' weights_emmeans = "equal"
199		#' )
200		#' anl <- broom::tidy(fit)
201		#' basic_table() \|>
202		#' summarize_lsmeans_wide(
203		#' variables = variables,
204		#' ref_level = fit$ref_level,
205		#' treatment_levels = fit$treatment_levels,
206		#' pval_sided = "2",
207		#' conf_level = 0.8
208		#' ) \|>
209		#' build_table(df = anl)
210		summarize_lsmeans_wide <- function(
211		lyt,
212		variables,
213		ref_level,
214		treatment_levels,
215		conf_level,
216		pval_sided = "2",
217		include_variance = TRUE,
218		include_pval = TRUE,
219		formats = list(
220		lsmean = jjcsformat_xx("xx.x"),
221		mse = jjcsformat_xx("xx.x"),
222		df = jjcsformat_xx("xx."),
223		lsmean_diff = jjcsformat_xx("xx.x"),
224		se = jjcsformat_xx("xx.xx"),
225		ci = jjcsformat_xx("(xx.xx, xx.xx)"),
226		pval = jjcsformat_pval_fct(0)
227		)) {
228		# Check that all required format elements are present in the formats parameter
229	3x	checkmate::assert_names(
230	3x	names(formats),
231	3x	must.include = c("lsmean", "mse", "df", "lsmean_diff", "se", "ci", "pval"),
232	3x	.var.name = "formats"
233		)
234	3x	lyt \|>
235	3x	split_cols_by(
236	3x	variables$arm,
237	3x	split_fun = lsmeans_wide_first_split_fun_fct(include_variance = include_variance)
238		) \|>
239	3x	split_cols_by(
240	3x	variables$arm,
241	3x	split_fun = lsmeans_wide_second_split_fun_fct(
242	3x	include_pval = include_pval,
243	3x	pval_sided = pval_sided,
244	3x	conf_level = conf_level
245		)
246		) \|>
247	3x	split_rows_by(variables$visit, section_div = "") \|>
248	3x	summarize_row_groups(
249	3x	var = variables$arm,
250	3x	cfun = lsmeans_wide_cfun,
251	3x	extra_args = list(
252	3x	variables = variables,
253	3x	ref_level = ref_level,
254	3x	treatment_levels = treatment_levels,
255	3x	pval_sided = pval_sided,
256	3x	conf_level = conf_level,
257	3x	formats = formats
258		)
259		)
260		}

1		#' Proportion difference estimation
2		#'
3		#' The analysis function [a_proportion_diff_j()] can be used to create a layout element to estimate
4		#' the difference in proportion of responders within a studied population. The primary analysis variable,
5		#' `vars`, is a logical variable indicating whether a response has occurred for each record. See the `method`
6		#' parameter for options of methods to use when constructing the confidence interval of the proportion difference.
7		#' A stratification variable can be supplied via the `strata` element of the `variables` argument.
8		#'
9		#' @param df (`data.frame`)\cr input data frame.
10		#' @param .var (`string`)\cr name of the response variable.
11		#' @param ref_path (`character`)\cr path to the reference group.
12		#' @param .spl_context (`environment`)\cr split context environment.
13		#' @param ... Additional arguments passed to the statistics function.
14		#' @param .stats (`character`)\cr statistics to calculate.
15		#' @param .formats (`list`)\cr formats for the statistics.
16		#' @param .labels (`list`)\cr labels for the statistics.
17		#' @param .indent_mods (`list`)\cr indentation modifications for the statistics.
18		#' @param .ref_group (`data.frame`)\cr reference group data frame.
19		#' @param .in_ref_col (`logical`)\cr whether the current column is the reference column.
20		#' @param variables (`list`)\cr list with strata variable names.
21		#' @param conf_level (`numeric`)\cr confidence level for the confidence interval.
22		#' @param method (`string`)\cr method to use for confidence interval calculation.
23		#' @param weights_method (`string`)\cr method to use for weights calculation in stratified analysis.
24		#'
25		#' @name prop_diff
26		#' @order 1
27		#'
28		#' @note The [a_proportion_diff_j()] function has the `_j` suffix to distinguish it
29		#' from [tern::a_proportion_diff()]. The functions here are a copy from the `tern` package
30		#' with additional features:
31		#'
32		#' * Additional statistic `diff_est_ci` is returned.
33		#' * `ref_path` needs to be provided as extra argument to specify the control group column.
34		#'
35		NULL
36
37		#' @describeIn prop_diff Statistics function estimating the difference
38		#' in terms of responder proportion.
39		#'
40		#' @return
41		#' * `s_proportion_diff_j()` returns a named list of elements `diff`,
42		#' `diff_ci`, `diff_est_ci` and `diff_ci_3d`.
43		#'
44		#' @note When performing an unstratified analysis, methods `'cmh'`, `'strat_newcombe'`,
45		#' and `'strat_newcombecc'` are not permitted.
46		#'
47		#' @examples
48		#'
49		#' s_proportion_diff_j(
50		#' df = subset(dta, grp == "A"),
51		#' .var = "rsp",
52		#' .ref_group = subset(dta, grp == "B"),
53		#' .in_ref_col = FALSE,
54		#' conf_level = 0.90,
55		#' method = "ha"
56		#' )
57		#'
58		#' s_proportion_diff_j(
59		#' df = subset(dta, grp == "A"),
60		#' .var = "rsp",
61		#' .ref_group = subset(dta, grp == "B"),
62		#' .in_ref_col = FALSE,
63		#' variables = list(strata = c("f1", "f2")),
64		#' conf_level = 0.90,
65		#' method = "cmh"
66		#' )
67		#'
68		#' @export
69		#' @order 3
70		s_proportion_diff_j <- function(
71		df,
72		.var,
73		.ref_group,
74		.in_ref_col,
75		variables = list(strata = NULL),
76		conf_level = 0.95,
77		method = c("waldcc", "wald", "cmh", "ha", "newcombe", "newcombecc", "strat_newcombe", "strat_newcombecc"),
78		weights_method = "cmh") {
79	339x	start <- s_proportion_diff(
80	339x	df = df,
81	339x	.var = .var,
82	339x	.ref_group = .ref_group,
83	339x	.in_ref_col = .in_ref_col,
84	339x	variables = variables,
85	339x	conf_level = conf_level,
86	339x	method = method,
87	339x	weights_method = weights_method
88		)
89
90	339x	c(
91	339x	start,
92	339x	list(
93	339x	diff_est_ci = with_label(
94	339x	c(start$diff, start$diff_ci),
95	339x	paste0("% Difference (", f_conf_level(conf_level), ")")
96		),
97	339x	diff_ci_3d = with_label(
98	339x	c(start$diff, start$diff_ci),
99	339x	paste0("Relative Risk (", f_conf_level(conf_level), ")")
100		)
101		)
102		)
103		}
104
105		#' @describeIn prop_diff Formatted analysis function which is used as `afun` in `estimate_proportion_diff()`.
106		#'
107		#' @return
108		#' * `a_proportion_diff_j()` returns the corresponding list with formatted [rtables::CellValue()].
109		#'
110		#' @examples
111		#' nex <- 100
112		#' dta <- data.frame(
113		#' "rsp" = sample(c(TRUE, FALSE), nex, TRUE),
114		#' "grp" = sample(c("A", "B"), nex, TRUE),
115		#' "f1" = sample(c("a1", "a2"), nex, TRUE),
116		#' "f2" = sample(c("x", "y", "z"), nex, TRUE),
117		#' stringsAsFactors = TRUE
118		#' )
119		#'
120		#' l <- basic_table() \|>
121		#' split_cols_by(var = "grp") \|>
122		#' analyze(
123		#' vars = "rsp",
124		#' afun = a_proportion_diff_j,
125		#' show_labels = "hidden",
126		#' na_str = tern::default_na_str(),
127		#' extra_args = list(
128		#' conf_level = 0.9,
129		#' method = "ha",
130		#' ref_path = c("grp", "B")
131		#' )
132		#' )
133		#'
134		#' build_table(l, df = dta)
135		#' @export
136		#' @order 2
137		a_proportion_diff_j <- function(
138		df,
139		.var,
140		ref_path,
141		.spl_context,
142		...,
143		.stats = NULL,
144		.formats = NULL,
145		.labels = NULL,
146		.indent_mods = NULL) {
147		# Check for additional parameters to the statistics function
148	2x	dots_extra_args <- list(...)
149
150		# Only support default stats, not custom stats
151	2x	.stats <- .split_std_from_custom_stats(.stats)$default_stats
152
153		# Obtain reference column information
154	2x	ref <- get_ref_info(ref_path, .spl_context)
155
156		# Apply statistics function
157	2x	x_stats <- .apply_stat_functions(
158	2x	default_stat_fnc = s_proportion_diff_j,
159	2x	custom_stat_fnc_list = NULL,
160	2x	args_list = c(
161	2x	df = list(df),
162	2x	.var = .var,
163	2x	.ref_group = list(ref$ref_group),
164	2x	.in_ref_col = ref$in_ref_col,
165	2x	dots_extra_args
166		)
167		)
168
169		# Format according to specifications
170	2x	format_stats(
171	2x	x_stats,
172	2x	method_groups = "proportion_diff",
173	2x	stats_in = .stats,
174	2x	formats_in = .formats,
175	2x	labels_in = .labels,
176	2x	indents_in = .indent_mods
177		)
178		}

1		#' Odds ratio estimation
2		#'
3		#' @description `r lifecycle::badge('stable')`
4		#'
5		#' @param method (`string`)\cr whether to use the correct (`'exact'`) calculation in the conditional likelihood or one
6		#' of the approximations, or the CMH method. See [survival::clogit()] for details.
7		#'
8		#' @param df (`data.frame`)\cr input data frame.
9		#' @param .var (`string`)\cr name of the response variable.
10		#' @param .df_row (`data.frame`)\cr data frame containing all rows.
11		#' @param ref_path (`character`)\cr path to the reference group.
12		#' @param .spl_context (`environment`)\cr split context environment.
13		#' @param ... Additional arguments passed to the statistics function.
14		#' @param .stats (`character`)\cr statistics to calculate.
15		#' @param .formats (`list`)\cr formats for the statistics.
16		#' @param .labels (`list`)\cr labels for the statistics.
17		#' @param .indent_mods (`list`)\cr indentation modifications for the statistics.
18		#' @param .ref_group (`data.frame`)\cr reference group data frame.
19		#' @param .in_ref_col (`logical`)\cr whether the current column is the reference column.
20		#' @param variables (`list`)\cr list with arm and strata variable names.
21		#' @param conf_level (`numeric`)\cr confidence level for the confidence interval.
22		#' @param groups_list (`list`)\cr list of groups for combination.
23		#'
24		#' @note
25		#' The `a_odds_ratio_j()` and `s_odds_ratio_j()` functions have the `_j` suffix to distinguish them
26		#' from [tern::a_odds_ratio()] and [tern::s_odds_ratio()], respectively.
27		#' These functions differ as follows:
28		#'
29		#' * Additional `method = 'cmh'` option is provided to calculate the Cochran-Mantel-Haenszel estimate.
30		#' * The p-value is returned as an additional statistic.
31		#'
32		#' Once these updates are contributed back to `tern`, they can later be replaced by the `tern` versions.
33		#'
34		#' @name odds_ratio
35		#' @order 1
36		NULL
37
38		#' @describeIn odds_ratio Statistics function which estimates the odds ratio
39		#' between a treatment and a control. A `variables` list with `arm` and `strata`
40		#' variable names must be passed if a stratified analysis is required.
41		#'
42		#' @param na_if_no_events (`flag`)\cr whether the point estimate should be `NA` if there
43		#' are no events in one arm. The p-value and confidence interval will still be computed.
44		#'
45		#' @return
46		#' * `s_odds_ratio_j()` returns a named list with the statistics `or_ci`
47		#' (containing `est`, `lcl`, and `ucl`), `pval` and `n_tot`.
48		#'
49		#' @examples
50		#' s_odds_ratio_j(
51		#' df = subset(dta, grp == "A"),
52		#' .var = "rsp",
53		#' .ref_group = subset(dta, grp == "B"),
54		#' .in_ref_col = FALSE,
55		#' .df_row = dta
56		#' )
57		#'
58		#' s_odds_ratio_j(
59		#' df = subset(dta, grp == "A"),
60		#' .var = "rsp",
61		#' .ref_group = subset(dta, grp == "B"),
62		#' .in_ref_col = FALSE,
63		#' .df_row = dta,
64		#' variables = list(arm = "grp", strata = "strata")
65		#' )
66		#'
67		#' s_odds_ratio_j(
68		#' df = subset(dta, grp == "A"),
69		#' method = "cmh",
70		#' .var = "rsp",
71		#' .ref_group = subset(dta, grp == "B"),
72		#' .in_ref_col = FALSE,
73		#' .df_row = dta,
74		#' variables = list(arm = "grp", strata = c("strata"))
75		#' )
76		#' @export
77		s_odds_ratio_j <- function(
78		df,
79		.var,
80		.ref_group,
81		.in_ref_col,
82		.df_row,
83		variables = list(arm = NULL, strata = NULL),
84		conf_level = 0.95,
85		groups_list = NULL,
86		na_if_no_events = TRUE,
87		method = c("exact", "approximate", "efron", "breslow", "cmh")) {
88	17x	checkmate::assert_flag(na_if_no_events)
89		# New: pval here
90	17x	y <- list(or_ci = list(), n_tot = list(), pval = list())
91	17x	method <- match.arg(method)
92	17x	one_group_empty <- nrow(df) == 0 \|\| nrow(.ref_group) == 0
93
94	17x	if (!.in_ref_col) {
95	15x	(assert_proportion_value)(conf_level)
96	15x	assert_df_with_variables(df, list(rsp = .var))
97	15x	assert_df_with_variables(.ref_group, list(rsp = .var))
98
99	15x	if (one_group_empty) {
100	1x	y <- list(or_ci = c(est = NA_real_, lcl = 0, ucl = Inf), n_tot = c(n_tot = nrow(df) + nrow(.ref_group)), pval = 1)
101	14x	} else if (is.null(variables$strata)) {
102	6x	data <- data.frame(
103	6x	rsp = c(.ref_group[[.var]], df[[.var]]),
104	6x	grp = factor(rep(c("ref", "Not-ref"), c(nrow(.ref_group), nrow(df))), levels = c("ref", "Not-ref"))
105		)
106	6x	y <- or_glm_j(data, conf_level = conf_level)
107		} else {
108	8x	assert_df_with_variables(.df_row, c(list(rsp = .var), variables))
109	8x	checkmate::assert_subset(method, c("exact", "approximate", "efron", "breslow", "cmh"), empty.ok = FALSE)
110		# The group variable prepared for stratified analysis must be synchronized with the combination groups
111		# definition.
112	8x	if (is.null(groups_list)) {
113	8x	ref_grp <- as.character(unique(.ref_group[[variables$arm]]))
114	8x	trt_grp <- as.character(unique(df[[variables$arm]]))
115	8x	grp <- stats::relevel(factor(.df_row[[variables$arm]]), ref = ref_grp)
116		} else {
117		# If more than one level in reference col.
118	!	reference <- as.character(unique(.ref_group[[variables$arm]]))
119	!	grp_ref_flag <- vapply(X = groups_list, FUN.VALUE = TRUE, FUN = function(x) all(reference %in% x))
120	!	ref_grp <- names(groups_list)[grp_ref_flag]
121
122		# If more than one level in treatment col.
123	!	treatment <- as.character(unique(df[[variables$arm]]))
124	!	grp_trt_flag <- vapply(X = groups_list, FUN.VALUE = TRUE, FUN = function(x) all(treatment %in% x))
125	!	trt_grp <- names(groups_list)[grp_trt_flag]
126
127	!	grp <- combine_levels(.df_row[[variables$arm]], levels = reference, new_level = ref_grp)
128	!	grp <- combine_levels(grp, levels = treatment, new_level = trt_grp)
129		}
130
131		# New: CMH method
132	8x	if (method == "cmh") {
133	7x	data <- data.frame(
134	7x	rsp = c(.ref_group[[.var]], df[[.var]]),
135	7x	grp = factor(rep(c("ref", "Not-ref"), c(nrow(.ref_group), nrow(df))), levels = c("ref", "Not-ref")),
136	7x	strata = interaction(rbind(.ref_group[variables$strata], df[variables$strata]))
137		)
138
139	7x	y <- or_cmh(data, conf_level = conf_level)
140		} else {
141		# The reference level in `grp` must be the same as in the `rtables` column split.
142	1x	data <- data.frame(rsp = .df_row[[.var]], grp = grp, strata = interaction(.df_row[variables$strata]))
143
144	1x	y_all <- or_clogit_j(data, conf_level = conf_level, method = method)
145	1x	checkmate::assert_string(trt_grp)
146		# New: pval here
147	1x	checkmate::assert_subset(trt_grp, names(y_all$or_ci_pvals))
148	1x	y_or_ci_pval <- y_all$or_ci_pvals[[trt_grp]]
149	1x	y$or_ci <- y_or_ci_pval[c("est", "lcl", "ucl")]
150	1x	y$n_tot <- y_all$n_tot
151	1x	y$pval <- y_or_ci_pval["pval"]
152		}
153		}
154
155	15x	one_group_no_events <- (sum(.ref_group[[.var]]) == 0) \|\| (sum(df[[.var]]) == 0)
156	15x	if (na_if_no_events && one_group_no_events) {
157	4x	y$or_ci[["est"]] <- NA_real_
158		}
159
160	15x	na_because_sparse <- one_group_empty \|\| (na_if_no_events && one_group_no_events)
161	15x	if ("est" %in% names(y$or_ci) && is.na(y$or_ci[["est"]]) && !na_because_sparse && method != "approximate") {
162	!	warning(
163	!	"Unable to compute the odds ratio estimate. Please try re-running the function with ",
164	!	"parameter `method` set to \"approximate\"."
165		)
166		}
167		}
168
169	17x	y$or_ci <- with_label(x = y$or_ci, label = paste0("Odds Ratio (", 100 * conf_level, "% CI)"))
170
171		# New: pval here
172	17x	y$pval <- unname(y$pval)
173
174	17x	y
175		}
176
177		#' @describeIn odds_ratio Formatted analysis function which is used as `afun`. Note that the
178		#' junco specific `ref_path` and `.spl_context` arguments are used for reference column information.
179		#'
180		#' @return
181		#' * `a_odds_ratio_j()` returns the corresponding list with formatted [rtables::CellValue()].
182		#'
183		#' @examples
184		#' set.seed(12)
185		#' dta <- data.frame(
186		#' rsp = sample(c(TRUE, FALSE), 100, TRUE),
187		#' grp = factor(rep(c("A", "B"), each = 50), levels = c("A", "B")),
188		#' strata = factor(sample(c("C", "D"), 100, TRUE))
189		#' )
190		#'
191		#' a_odds_ratio_j(
192		#' df = subset(dta, grp == "A"),
193		#' .var = "rsp",
194		#' ref_path = c("grp", "B"),
195		#' .spl_context = data.frame(
196		#' cur_col_split = I(list("grp")),
197		#' cur_col_split_val = I(list(c(grp = "A"))),
198		#' full_parent_df = I(list(dta))
199		#' ),
200		#' .df_row = dta
201		#' )
202		#'
203		#'
204		#' l <- basic_table() \|>
205		#' split_cols_by(var = "grp") \|>
206		#' analyze(
207		#' "rsp",
208		#' afun = a_odds_ratio_j,
209		#' show_labels = "hidden",
210		#' extra_args = list(
211		#' ref_path = c("grp", "B"),
212		#' .stats = c("or_ci", "pval")
213		#' )
214		#' )
215		#'
216		#' build_table(l, df = dta)
217		#'
218		#' l2 <- basic_table() \|>
219		#' split_cols_by(var = "grp") \|>
220		#' analyze(
221		#' "rsp",
222		#' afun = a_odds_ratio_j,
223		#' show_labels = "hidden",
224		#' extra_args = list(
225		#' variables = list(arm = "grp", strata = "strata"),
226		#' method = "cmh",
227		#' ref_path = c("grp", "A"),
228		#' .stats = c("or_ci", "pval")
229		#' )
230		#' )
231		#'
232		#' build_table(l2, df = dta)
233		#' @export
234		#' @order 2
235		a_odds_ratio_j <- function(
236		df,
237		.var,
238		.df_row,
239		ref_path,
240		.spl_context,
241		...,
242		.stats = NULL,
243		.formats = NULL,
244		.labels = NULL,
245		.indent_mods = NULL) {
246		# Check for additional parameters to the statistics function
247	4x	dots_extra_args <- list(...)
248
249		# Only support default stats, not custom stats
250	4x	.stats <- .split_std_from_custom_stats(.stats)$default_stats
251
252		# Obtain reference column information
253	4x	ref <- get_ref_info(ref_path, .spl_context)
254
255		# Apply statistics function
256	4x	x_stats <- .apply_stat_functions(
257	4x	default_stat_fnc = s_odds_ratio_j,
258	4x	custom_stat_fnc_list = NULL,
259	4x	args_list = c(
260	4x	df = list(df),
261	4x	.var = .var,
262	4x	.df_row = list(.df_row),
263	4x	.ref_group = list(ref$ref_group),
264	4x	.in_ref_col = ref$in_ref_col,
265	4x	dots_extra_args
266		)
267		)
268
269		# Format according to specifications
270	4x	format_stats(
271	4x	x_stats,
272	4x	method_groups = "odds_ratio",
273	4x	stats_in = .stats,
274	4x	formats_in = .formats,
275	4x	labels_in = .labels,
276	4x	indents_in = .indent_mods
277		)
278		}
279
280		#' Helper functions for odds ratio estimation
281		#'
282		#' @description `r lifecycle::badge('stable')`
283		#'
284		#' Functions to calculate odds ratios in [s_odds_ratio_j()].
285		#'
286		#' @inheritParams odds_ratio
287		#' @inheritParams proposal_argument_convention
288		#' @param data (`data.frame`)\cr data frame containing at least the variables `rsp` and `grp`, and optionally
289		#' `strata` for [or_clogit_j()].
290		#' @return A named `list` of elements `or_ci`, `n_tot` and `pval`.
291		#'
292		#' @seealso [odds_ratio]
293		#'
294		#' @name h_odds_ratio
295		NULL
296
297		#' @describeIn h_odds_ratio Estimates the odds ratio based on [stats::glm()]. Note that there must be
298		#' exactly 2 groups in `data` as specified by the `grp` variable.
299		#'
300		#' @examples
301		#' data <- data.frame(
302		#' rsp = as.logical(c(1, 1, 0, 1, 0, 0, 1, 1)),
303		#' grp = letters[c(1, 1, 1, 2, 2, 2, 1, 2)],
304		#' strata = letters[c(1, 2, 1, 2, 2, 2, 1, 2)],
305		#' stringsAsFactors = TRUE
306		#' )
307		#'
308		#' or_glm_j(data, conf_level = 0.95)
309		#'
310		#' @export
311		or_glm_j <- function(data, conf_level) {
312	7x	checkmate::assert_logical(data$rsp)
313	7x	(assert_proportion_value)(conf_level)
314	7x	assert_df_with_variables(data, list(rsp = "rsp", grp = "grp"))
315	7x	checkmate::assert_multi_class(data$grp, classes = c("factor", "character"))
316
317	7x	data$grp <- as_factor_keep_attributes(data$grp)
318	7x	assert_df_with_factors(data, list(val = "grp"), min.levels = 2, max.levels = 2)
319	7x	formula <- stats::as.formula("rsp ~ grp")
320	7x	model_fit <- stats::glm(formula = formula, data = data, family = stats::binomial(link = "logit"))
321
322		# Note that here we need to discard the intercept.
323	7x	or <- exp(stats::coef(model_fit)[-1])
324	7x	or_ci <- exp(stats::confint.default(model_fit, level = conf_level)[-1, , drop = FALSE])
325
326	7x	values <- stats::setNames(c(or, or_ci), c("est", "lcl", "ucl"))
327	7x	n_tot <- stats::setNames(nrow(model_fit$model), "n_tot")
328
329		# New: pval here
330	7x	pval <- summary(model_fit)$coef[-1, "Pr(>\|z\|)"]
331
332	7x	list(or_ci = values, n_tot = n_tot, pval = pval)
333		}
334
335		#' @describeIn h_odds_ratio Estimates the odds ratio based on [survival::clogit()]. This is done for
336		#' the whole data set including all groups, since the results are not the same as when doing
337		#' pairwise comparisons between the groups.
338		#'
339		#' @examples
340		#' data <- data.frame(
341		#' rsp = as.logical(c(1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0)),
342		#' grp = letters[c(1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3)],
343		#' strata = LETTERS[c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)],
344		#' stringsAsFactors = TRUE
345		#' )
346		#'
347		#' or_clogit_j(data, conf_level = 0.95)
348		#'
349		#' @export
350		or_clogit_j <- function(data, conf_level, method = "exact") {
351	1x	checkmate::assert_logical(data$rsp)
352	1x	(assert_proportion_value)(conf_level)
353	1x	assert_df_with_variables(data, list(rsp = "rsp", grp = "grp", strata = "strata"))
354	1x	checkmate::assert_multi_class(data$grp, classes = c("factor", "character"))
355	1x	checkmate::assert_multi_class(data$strata, classes = c("factor", "character"))
356	1x	checkmate::assert_subset(method, c("exact", "approximate", "efron", "breslow"), empty.ok = FALSE)
357
358	1x	data$grp <- as_factor_keep_attributes(data$grp)
359	1x	data$strata <- as_factor_keep_attributes(data$strata)
360
361		# Deviation from convention: `survival::strata` must be simply `strata`.
362	1x	strata <- survival::strata
363	1x	formula <- stats::as.formula("rsp ~ grp + strata(strata)")
364	1x	model_fit <- clogit_with_tryCatch(
365	1x	formula = formula,
366	1x	data = data,
367	1x	method = method
368		)
369
370		# New: pval here
371
372		# Create a list with one set of OR estimates, CI and p-value per coefficient, i.e. comparison of one group vs. the
373		# reference group.
374	1x	coef_est <- stats::coef(model_fit)
375	1x	ci_est <- stats::confint(model_fit, level = conf_level)
376	1x	pvals <- summary(model_fit)$coef[, "Pr(>\|z\|)", drop = FALSE]
377	1x	or_ci_pvals <- list()
378	1x	for (coef_name in names(coef_est)) {
379	1x	grp_name <- gsub("^grp", "", x = coef_name)
380	1x	or_ci_pvals[[grp_name]] <- stats::setNames(
381	1x	object = c(exp(c(coef_est[coef_name], ci_est[coef_name, , drop = TRUE])), pvals[coef_name, 1]),
382	1x	nm = c("est", "lcl", "ucl", "pval")
383		)
384		}
385	1x	list(or_ci_pvals = or_ci_pvals, n_tot = c(n_tot = model_fit$n))
386		}
387
388		#' @describeIn h_odds_ratio Estimates the odds ratio based on CMH. Note that there must be
389		#' exactly 2 groups in `data` as specified by the `grp` variable.
390		#'
391		#' @examples
392		#' set.seed(123)
393		#' data <- data.frame(
394		#' rsp = as.logical(rbinom(n = 40, size = 1, prob = 0.5)),
395		#' grp = letters[sample(1:2, size = 40, replace = TRUE)],
396		#' strata = LETTERS[sample(1:2, size = 40, replace = TRUE)],
397		#' stringsAsFactors = TRUE
398		#' )
399		#'
400		#' or_cmh(data, conf_level = 0.95)
401		#'
402		#' @export
403		or_cmh <- function(data, conf_level) {
404	7x	checkmate::assert_logical(data$rsp)
405	7x	assert_proportion_value(conf_level)
406	7x	assert_df_with_variables(data, list(rsp = "rsp", grp = "grp"))
407	7x	checkmate::assert_multi_class(data$grp, classes = c("factor", "character"))
408
409	7x	data$grp <- as_factor_keep_attributes(data$grp)
410	7x	assert_df_with_factors(data, list(val = "grp"), min.levels = 2, max.levels = 2)
411
412		# first dimension: treatment, control - therefore we reverse the levels order 2nd dimension: TRUE, FALSE 3rd
413		# dimension: levels of strata rsp as factor rsp to handle edge case of no FALSE (or TRUE) rsp records
414	7x	t_tbl <- table(
415	7x	factor(data$grp, levels = rev(levels(data$grp))),
416	7x	factor(data$rsp, levels = c("TRUE", "FALSE")),
417	7x	data$strata
418		)
419
420	7x	trt_ind <- 1
421	7x	ctrl_ind <- 2
422
423	7x	resp_ind <- 1
424	7x	nonresp_ind <- 2
425
426	7x	n1resp <- t_tbl[trt_ind, resp_ind, ]
427	7x	n2resp <- t_tbl[ctrl_ind, resp_ind, ]
428	7x	n1non <- t_tbl[trt_ind, nonresp_ind, ]
429	7x	n2non <- t_tbl[ctrl_ind, nonresp_ind, ]
430		# Example: n2non are the control patient non-responders per stratum.
431
432		# CMH statistic for odds ratio, Treatment over Control
433	7x	use_stratum <- (n1resp + n2resp + n1non + n2non) > 0
434	7x	n1resp <- n1resp[use_stratum]
435	7x	n2resp <- n2resp[use_stratum]
436	7x	n1non <- n1non[use_stratum]
437	7x	n2non <- n2non[use_stratum]
438	7x	n <- n1resp + n2resp + n1non + n2non
439
440	7x	or_num <- sum(n1resp * n2non / n)
441	7x	or_denom <- sum(n1non * n2resp / n)
442	7x	log_or <- log(or_num) - log(or_denom)
443	7x	or <- exp(log_or)
444
445	7x	term1_num <- sum((n1resp + n2non) * n1resp * n2non / n^2)
446	7x	term1_denom <- 2 * (sum(n1resp * n2non / n))^2
447
448	7x	term2_num <- sum(((n1resp + n2non) * n1non * n2resp + (n1non + n2resp) * n1resp * n2non) / n^2)
449	7x	term2_denom <- 2 * (sum(n1resp * n2non / n)) * (sum(n1non * n2resp / n))
450
451	7x	term3_num <- sum((n1non + n2resp) * n1non * n2resp / n^2)
452	7x	term3_denom <- 2 * (sum(n1non * n2resp / n))^2
453
454	7x	var_log_or <- term1_num / term1_denom + term2_num / term2_denom + term3_num / term3_denom
455	7x	se_log_or <- sqrt(var_log_or)
456
457	7x	z <- stats::qnorm((1 + conf_level) / 2)
458	7x	log_rel_or <- log_or + c(-1, +1) * z * se_log_or
459	7x	ci <- exp(log_rel_or)
460	7x	or_ci <- stats::setNames(c(or, ci), c("est", "lcl", "ucl"))
461
462	7x	pval <- stats::mantelhaen.test(t_tbl, correct = FALSE)$p.value
463
464	7x	n_tot <- stats::setNames(sum(n), "n_tot")
465
466	7x	list(or_ci = or_ci, n_tot = n_tot, pval = pval)
467		}

1		#' Get default statistical methods and their associated formats, labels, and indent modifiers
2		#'
3		#' @description `r lifecycle::badge("experimental")`
4		#'
5		#' @note These functions have been copied from the `tern` package file
6		#' `utils_default_stats_formats_labels.R` from GitHub development version 0.9.7.9017.
7		#' Slight modifications have been applied to enhance functionality:
8		#'
9		#' * `tern_get_stats` added the `tern_stats` argument to avoid hardcoding within the function's body.
10		#' * `tern_get_labels_from_stats` is more careful when receiving partial `labels_in`
11		#' and partial `label_attr_from_stats`.
12		#'
13		#' Once these features are included in the `tern` package, this file could be removed from
14		#' the `junco` package, and the functions could be used from the `tern` namespace directly.
15		#'
16		#' @name tern_default_stats_formats_labels
17		NULL
18
19
20		# Utility function used to separate custom stats (user-defined functions) from defaults
21		.split_std_from_custom_stats <- function(stats_in) {
22	144x	out <- list(default_stats = NULL, custom_stats = NULL, all_stats = NULL)
23	144x	if (is.list(stats_in)) {
24	1x	is_custom_fnc <- sapply(stats_in, is.function)
25	1x	checkmate::assert_list(
26	1x	stats_in[is_custom_fnc],
27	1x	types = "function",
28	1x	names = "named"
29		)
30	1x	out[["custom_stats"]] <- stats_in[is_custom_fnc]
31	1x	out[["default_stats"]] <- unlist(stats_in[!is_custom_fnc])
32	1x	all_stats <- names(stats_in) # to keep the order
33	1x	all_stats[!is_custom_fnc] <- out[["default_stats"]]
34	1x	out[["all_stats"]] <- all_stats
35		} else {
36	143x	out[["default_stats"]] <- out[["all_stats"]] <- stats_in
37		}
38	144x	out
39		}
40
41		# Utility function to apply statistical functions
42		.apply_stat_functions <- function(
43		default_stat_fnc,
44		custom_stat_fnc_list,
45		args_list) {
46		# Default checks
47	144x	checkmate::assert_function(default_stat_fnc)
48	144x	checkmate::assert_list(
49	144x	custom_stat_fnc_list,
50	144x	types = "function",
51	144x	null.ok = TRUE,
52	144x	names = "named"
53		)
54	144x	checkmate::assert_list(args_list)
55
56		# Checking custom stats have same formals
57	144x	if (!is.null(custom_stat_fnc_list)) {
58	1x	fundamental_call_to_data <- names(formals(default_stat_fnc))[[1]]
59	1x	for (fnc in custom_stat_fnc_list) {
60	1x	if (!identical(names(formals(fnc))[[1]], fundamental_call_to_data)) {
61	!	stop(
62	!	"The first parameter of a custom statistical function needs to be the same (it can be `df` or `x`) ",
63	!	"as the default statistical function. In this case your custom function has ",
64	!	names(formals(fnc))[[1]],
65	!	" as first parameter, while the default function has ",
66	!	fundamental_call_to_data,
67		"."
68		)
69		}
70	1x	if (!any(names(formals(fnc)) == "...")) {
71	!	stop(
72	!	"The custom statistical function needs to have `...` as a parameter to accept additional arguments. ",
73	!	"In this case your custom function does not have `...`."
74		)
75		}
76		}
77		}
78
79		# Applying
80	144x	out_default <- do.call(default_stat_fnc, args = args_list)
81	144x	out_custom <- lapply(
82	144x	custom_stat_fnc_list,
83	144x	function(fnc) do.call(fnc, args = args_list)
84		)
85
86		# Merging
87	144x	c(out_default, out_custom)
88		}
89
90		#' @describeIn tern_default_stats_formats_labels Get statistics available for a given method
91		#' group (analyze function).
92		#' @keywords internal
93		tern_get_stats <- function(
94		method_groups = "analyze_vars_numeric",
95		stats_in = NULL,
96		custom_stats_in = NULL,
97		add_pval = FALSE,
98		tern_defaults = tern_default_stats) {
99	1111x	checkmate::assert_character(method_groups)
100	1111x	checkmate::assert_character(stats_in, null.ok = TRUE)
101	1111x	checkmate::assert_character(custom_stats_in, null.ok = TRUE)
102	1111x	checkmate::assert_flag(add_pval)
103
104		# Default is still numeric
105	1111x	if (any(method_groups == "analyze_vars")) {
106	3x	method_groups[method_groups == "analyze_vars"] <- "analyze_vars_numeric"
107		}
108
109	1111x	type_tmp <- ifelse(any(grepl("counts$", method_groups)), "counts", "numeric") # for pval checks
110
111		# Defaults for loop
112	1111x	out <- NULL
113
114		# Loop for multiple method groups
115	1111x	for (mgi in method_groups) {
116	1112x	if (mgi %in% names(tern_defaults)) {
117	1111x	out_tmp <- tern_defaults[[mgi]]
118		} else {
119	1x	stop(
120	1x	"The selected method group (",
121	1x	mgi,
122	1x	") has no default statistical method."
123		)
124		}
125	1111x	out <- unique(c(out, out_tmp))
126		}
127
128		# Add custom stats
129	1110x	out <- c(out, custom_stats_in)
130
131		# If you added pval to the stats_in you certainly want it
132	1110x	if (!is.null(stats_in) && any(grepl("^pval", stats_in))) {
133	21x	stats_in_pval_value <- stats_in[grepl("^pval", stats_in)]
134
135		# Must be only one value between choices
136	21x	checkmate::assert_choice(
137	21x	stats_in_pval_value,
138	21x	c("pval", "pval_counts", "pvalue")
139		)
140
141		# Mismatch with counts and numeric
142		if (
143	20x	any(grepl("counts", method_groups)) &&
144	20x	stats_in_pval_value != "pval_counts" \|\|
145	20x	any(grepl("numeric", method_groups)) && stats_in_pval_value != "pval"
146		) {
147		# nolint
148	2x	stop(
149	2x	"Inserted p-value (",
150	2x	stats_in_pval_value,
151	2x	") is not valid for type ",
152	2x	type_tmp,
153	2x	". Use ",
154	2x	paste(ifelse(stats_in_pval_value == "pval", "pval_counts", "pval")),
155	2x	" instead."
156		)
157		}
158
159		# Lets add it even if present (thanks to unique)
160	18x	add_pval <- TRUE
161		}
162
163		# Mainly used in "analyze_vars" but it could be necessary elsewhere
164	1107x	if (isTRUE(add_pval)) {
165	21x	if (any(grepl("counts", method_groups))) {
166	1x	out <- unique(c(out, "pval_counts"))
167		} else {
168	20x	out <- unique(c(out, "pval"))
169		}
170		}
171
172		# Filtering for stats_in (character vector)
173	1107x	if (!is.null(stats_in)) {
174	591x	out <- intersect(stats_in, out) # It orders them too
175		}
176
177		# If intersect did not find matches (and no pval?) -> error
178	1107x	if (length(out) == 0) {
179	2x	stop(
180	2x	"The selected method group(s) (",
181	2x	paste0(method_groups, collapse = ", "),
182		")",
183	2x	" do not have the required default statistical methods:\n",
184	2x	paste0(stats_in, collapse = " ")
185		)
186		}
187
188	1105x	out
189		}
190
191		#' @describeIn tern_default_stats_formats_labels Get formats corresponding to a list of statistics.
192		#' @keywords internal
193		tern_get_formats_from_stats <- function(
194		stats,
195		formats_in = NULL,
196		levels_per_stats = NULL,
197		tern_defaults = tern_default_formats) {
198	636x	checkmate::assert_character(stats, min.len = 1)
199		# It may be a list if there is a function in the formats
200	636x	if (checkmate::test_list(formats_in, null.ok = TRUE)) {
201	635x	checkmate::assert_list(formats_in, null.ok = TRUE)
202		# Or it may be a vector of characters
203		} else {
204	1x	checkmate::assert_character(formats_in, null.ok = TRUE)
205		}
206	636x	checkmate::assert_list(levels_per_stats, null.ok = TRUE)
207
208		# If unnamed formats given as formats_in and same number of stats, use one format per stat
209		if (
210	636x	!is.null(formats_in) &&
211	636x	length(formats_in) == length(stats) &&
212	636x	is.null(names(formats_in)) &&
213	636x	is.null(levels_per_stats)
214		) {
215	!	out <- as.list(formats_in) \|> stats::setNames(stats)
216	!	return(out)
217		}
218
219		# If levels_per_stats not given, assume one row per statistic
220	153x	if (is.null(levels_per_stats)) levels_per_stats <- as.list(stats) \|> stats::setNames(stats)
221
222		# Apply custom formats
223	636x	out <- .fill_in_vals_by_stats(levels_per_stats, formats_in, tern_defaults)
224
225		# Default to NULL if no format
226	636x	case_input_is_not_stat <- unlist(out, use.names = FALSE) == unlist(levels_per_stats, use.names = FALSE)
227	636x	out[names(out) == out \| case_input_is_not_stat] <- list(NULL)
228
229	636x	out
230		}
231
232		#' @describeIn tern_default_stats_formats_labels Get labels corresponding to a list of statistics.
233		#' @keywords internal
234		tern_get_labels_from_stats <- function(
235		stats,
236		labels_in = NULL,
237		levels_per_stats = NULL,
238		label_attr_from_stats = NULL,
239		tern_defaults = tern_default_labels) {
240	596x	checkmate::assert_character(stats, min.len = 1)
241
242		# Modification:
243		# If any label_attr_from_stats is available and valid, save in labels_in
244		# if not specified there already (so don't overwrite labels_in).
245	596x	if (!is.null(label_attr_from_stats)) {
246	148x	valid_label_attr_from_stats <- label_attr_from_stats[
247	148x	nzchar(label_attr_from_stats) &
248	148x	!sapply(label_attr_from_stats, is.null) &
249	148x	!is.na(label_attr_from_stats)
250		]
251	148x	if (length(valid_label_attr_from_stats)) {
252	136x	do_save <- setdiff(
253	136x	names(valid_label_attr_from_stats),
254	136x	names(labels_in)
255		)
256	136x	labels_in <- c(labels_in, valid_label_attr_from_stats[do_save])
257		}
258		}
259
260		# It may be a list
261	596x	if (checkmate::test_list(labels_in, null.ok = TRUE)) {
262	571x	checkmate::assert_list(labels_in, null.ok = TRUE)
263		# Or it may be a vector of characters
264		} else {
265	25x	checkmate::assert_character(labels_in, null.ok = TRUE)
266		}
267	596x	checkmate::assert_list(levels_per_stats, null.ok = TRUE)
268
269		# If unnamed labels given as labels_in and same number of stats, use one label per stat
270		if (
271	596x	!is.null(labels_in) &&
272	596x	length(labels_in) == length(stats) &&
273	596x	is.null(names(labels_in)) &&
274	596x	is.null(levels_per_stats)
275		) {
276	!	out <- as.list(labels_in) \|> stats::setNames(stats)
277	!	return(out)
278		}
279
280		# If levels_per_stats not given, assume one row per statistic
281	156x	if (is.null(levels_per_stats)) levels_per_stats <- as.list(stats) \|> stats::setNames(stats)
282
283		# Apply custom labels
284	596x	out <- .fill_in_vals_by_stats(levels_per_stats, labels_in, tern_defaults)
285	596x	out
286		}
287
288		# Function to loop over each stat and levels to set correct values
289		.fill_in_vals_by_stats <- function(levels_per_stats, user_in, tern_defaults) {
290	1843x	out <- list()
291
292	1843x	for (stat_i in names(levels_per_stats)) {
293		# Get all levels of the statistic
294	3230x	all_lvls <- levels_per_stats[[stat_i]]
295
296	3230x	if ((length(all_lvls) == 1 && all_lvls == stat_i) \|\| is.null(all_lvls)) {
297		# One row per statistic
298	1946x	out[[stat_i]] <- if (stat_i %in% names(user_in)) {
299		# 1. Check for stat_i in user input
300	433x	user_in[[stat_i]]
301	1946x	} else if (stat_i %in% names(tern_defaults)) {
302		# 2. Check for stat_i in tern defaults
303	1504x	tern_defaults[[stat_i]]
304		} else {
305		# 3. Otherwise stat_i
306	9x	stat_i
307		}
308		} else {
309		# One row per combination of variable level and statistic
310		# Loop over levels for each statistic
311	1284x	for (lev_i in all_lvls) {
312		# Construct row name (stat_i.lev_i)
313	7115x	row_nm <- paste(stat_i, lev_i, sep = ".")
314
315	7115x	out[[row_nm]] <- if (row_nm %in% names(user_in)) {
316		# 1. Check for stat_i.lev_i in user input
317	1x	user_in[[row_nm]]
318	7115x	} else if (lev_i %in% names(user_in)) {
319		# 2. Check for lev_i in user input
320	5x	user_in[[lev_i]]
321	7115x	} else if (stat_i %in% names(user_in)) {
322		# 3. Check for stat_i in user input
323	1x	user_in[[stat_i]]
324	7115x	} else if (lev_i %in% names(tern_defaults)) {
325		# 4. Check for lev_i in tern defaults (only used for labels)
326	!	tern_defaults[[lev_i]]
327	7115x	} else if (stat_i %in% names(tern_defaults)) {
328		# 5. Check for stat_i in tern defaults
329	4712x	tern_defaults[[stat_i]]
330		} else {
331		# 6. Otherwise lev_i
332	2396x	lev_i
333		}
334		}
335		}
336		}
337
338	1843x	out
339		}
340
341		#' @describeIn tern_default_stats_formats_labels Get row indent modifiers corresponding to a list of statistics/rows.
342		#' @keywords internal
343		tern_get_indents_from_stats <- function(stats,
344		indents_in = NULL,
345		levels_per_stats = NULL,
346		tern_defaults = as.list(rep(0L, length(stats))) \|> stats::setNames(stats)) {
347	630x	checkmate::assert_character(stats, min.len = 1)
348		# It may be a list
349	630x	if (checkmate::test_list(indents_in, null.ok = TRUE)) {
350	598x	checkmate::assert_list(indents_in, null.ok = TRUE)
351		# Or it may be a vector of integers
352		} else {
353	32x	checkmate::assert_integerish(indents_in, null.ok = TRUE)
354		}
355	630x	checkmate::assert_list(levels_per_stats, null.ok = TRUE)
356
357		# If levels_per_stats not given, assume one row per statistic
358	152x	if (is.null(levels_per_stats)) levels_per_stats <- as.list(stats) \|> stats::setNames(stats)
359
360		# Single indentation level for all rows
361	630x	if (is.null(names(indents_in)) && length(indents_in) == 1) {
362	19x	out <- rep(indents_in, length(levels_per_stats \|> unlist()))
363	19x	return(out)
364		}
365
366		# Apply custom indentation
367	611x	out <- .fill_in_vals_by_stats(levels_per_stats, indents_in, tern_defaults)
368	611x	out
369		}
370
371		# tern_default_labels ----
372
373		#' @describeIn tern_default_stats_formats_labels Named `character` vector of default labels for `tern`.
374		#' This is only copied here from the latest GitHub version, because otherwise a tern test fails.
375		#'
376		#' @keywords internal
377		tern_default_labels <- c(
378		cv = "CV (%)",
379		iqr = "IQR",
380		geom_cv = "CV % Geometric Mean",
381		geom_mean = "Geometric Mean",
382		geom_mean_sd = "Geometric Mean (SD)",
383		geom_mean_ci = "Geometric Mean 95% CI",
384		geom_mean_ci_3d = "Geometric Mean (95% CI)",
385		geom_sd = "Geometric SD",
386		mad = "Median Absolute Deviation",
387		max = "Maximum",
388		mean = "Mean",
389		mean_ci = "Mean 95% CI",
390		mean_ci_3d = "Mean (95% CI)",
391		mean_pval = "Mean p-value (H0: mean = 0)",
392		mean_sd = "Mean (SD)",
393		mean_sdi = "Mean -/+ 1xSD",
394		mean_se = "Mean (SE)",
395		mean_sei = "Mean -/+ 1xSE",
396		median = "Median",
397		median_ci = "Median 95% CI",
398		median_ci_3d = "Median (95% CI)",
399		median_range = "Median (Min - Max)",
400		min = "Minimum",
401		n = "n",
402		n_blq = "n_blq",
403		nonunique = "Number of events",
404		pval = "p-value (t-test)", # Default for numeric
405		pval_counts = "p-value (chi-squared test)", # Default for counts
406		quantiles = "25% and 75%-ile",
407		quantiles_lower = "25%-ile (95% CI)",
408		quantiles_upper = "75%-ile (95% CI)",
409		range = "Min - Max",
410		range_censor = "Range (censored)",
411		range_event = "Range (event)",
412		rate = "Adjusted Rate",
413		rate_ratio = "Adjusted Rate Ratio",
414		sd = "SD",
415		se = "SE",
416		sum = "Sum",
417		unique = "Number of patients with at least one event"
418		)

1		#' MMRM Analysis for Imputed Datasets
2		#'
3		#' Performs an MMRM for two or more groups returning the estimated
4		#' 'treatment effect' (i.e. the contrast between treatment groups and the control
5		#' group) and the least square means estimates in each group.
6		#'
7		#' @param data (`data.frame`)\cr containing the data to be used in the model.
8		#' @param vars (`vars`)\cr list as generated by [rbmi::set_vars()]. Only the `subjid`, `group`,
9		#' `visit`, `outcome` and `covariates` elements are required. See details.
10		#' @param cov_struct (`string`)\cr the covariance structure to use. Note that the same
11		#' covariance structure is assumed for all treatment groups.
12		#' @param visits (`NULL` or `character`)\cr An optional character vector specifying
13		#' which visits to fit the MMRM at. If `NULL`, the MMRM model will be fit to
14		#' the whole dataset.
15		#' @param weights (`string`)\cr the weighting strategy to be used when calculating the
16		#' least square means, either `'counterfactual'` or `'equal'`.
17		#' @param ... additional arguments passed to [mmrm::mmrm()], in particular
18		#' `method` and `vcov` to control the degrees of freedom and variance-covariance
19		#' adjustment methods as well as `reml` decide between REML and ML estimation.
20		#'
21		#' @details
22		#' The function works as follows:
23		#'
24		#' 1. Optionally select the subset of the `data` corresponding to `visits.
25		#' 2. Fit an MMRM as `vars$outcome ~ vars$group + vars$visit + vars$covariates`
26		#' with the specified covariance structure for visits within subjects.
27		#' 3. Extract the 'treatment effect' & least square means for each treatment group
28		#' vs the control group.
29		#'
30		#' In order to meet the formatting standards set by [rbmi::analyse()] the results will be collapsed
31		#' into a single list suffixed by the visit name, e.g.:
32		#' ```
33		#' list(
34		#' var_B_visit_1 = list(est = ...),
35		#' trt_B_visit_1 = list(est = ...),
36		#' lsm_A_visit_1 = list(est = ...),
37		#' lsm_B_visit_1 = list(est = ...),
38		#' var_B_visit_2 = list(est = ...),
39		#' trt_B_visit_2 = list(est = ...),
40		#' lsm_A_visit_2 = list(est = ...),
41		#' lsm_B_visit_2 = list(est = ...),
42		#' ...
43		#' )
44		#' ```
45		#' Please note that 'trt' refers to the treatment effects, and 'lsm' refers to the least
46		#' square mean results. In the above example `vars$group` has two factor levels A and B.
47		#' The new 'var' refers to the model estimated variance of the residuals at the given
48		#' visit, together with the degrees of freedom (which is treatment group specific).
49		#'
50		#' If you want to include additional interaction terms in your model this can be done
51		#' by providing them to the `covariates` argument of [rbmi::set_vars()]
52		#' e.g. `set_vars(covariates = c('sex*age'))`.
53		#'
54		#' @note The `group` and `visit` interaction `group:visit` is not included by
55		#' default in the model, therefore please add that to `covariates` manually if
56		#' you want to include it. This will make sense in most cases.
57		#' @inherit rbmi_ancova return
58		#'
59		#' @seealso [rbmi_analyse()]
60		#' @seealso [mmrm::mmrm()]
61		#' @seealso [rbmi::set_vars()]
62		#' @export
63		rbmi_mmrm <- function(
64		data,
65		vars,
66		cov_struct = c("us", "toep", "cs", "ar1"),
67		visits = NULL,
68		weights = c("counterfactual", "equal"),
69		...) {
70	1x	subjid <- vars[["subjid"]]
71	1x	outcome <- vars[["outcome"]]
72	1x	group <- vars[["group"]]
73	1x	visit <- vars[["visit"]]
74	1x	covariates <- vars[["covariates"]]
75
76	1x	checkmate::assert_string(subjid)
77	1x	checkmate::assert_string(outcome)
78	1x	checkmate::assert_string(group)
79	1x	checkmate::assert_string(visit)
80	1x	checkmate::assert_character(covariates, null.ok = TRUE)
81
82	1x	cov_struct <- match.arg(cov_struct)
83	1x	weights <- match.arg(weights)
84
85	1x	expected_vars <- c((utils::getFromNamespace("extract_covariates", "rbmi"))(covariates), outcome, group, subjid, visit)
86	1x	checkmate::assert_subset(expected_vars, names(data))
87
88	1x	checkmate::assert_factor(data[[visit]])
89	1x	if (is.null(visits)) {
90	1x	visits <- as.character(unique(data[[visit]]))
91		}
92	1x	checkmate::assert_subset(visits, as.character(data[[visit]]))
93	1x	data <- data[data[[visit]] %in% visits, , drop = FALSE]
94
95	1x	covariates_part <- paste(covariates, collapse = " + ")
96	1x	grp_visit_part <- paste(group, "+", visit)
97	1x	random_effects_part <- paste0(cov_struct, "(", visit, " \| ", subjid, ")")
98	1x	rhs_formula <- paste(grp_visit_part, "+", random_effects_part)
99	1x	if (covariates_part != "") rhs_formula <- paste(covariates_part, "+", rhs_formula)
100	1x	formula <- stats::as.formula(paste(outcome, "~", rhs_formula))
101
102	1x	fit <- mmrm::mmrm(formula, data = data, ...)
103	1x	res <- lapply(visits, function(x) {
104	3x	res <- rbmi_mmrm_single_info(fit, visit_level = x, visit = visit, group = group, weights = weights)
105	3x	names(res) <- paste0(names(res), "_", x)
106	3x	return(res)
107		})
108	1x	unlist(res, recursive = FALSE)
109		}
110
111		#' Extract Single Visit Information from a Fitted MMRM for Multiple Imputation Analysis
112		#'
113		#' @description
114		#' Extracts relevant estimates from a given fitted MMRM. See [rbmi_mmrm()] for full details.
115		#'
116		#' @param fit (`mmrm`)\cr the fitted MMRM.
117		#' @param visit_level (`string`)\cr the visit level to extract information for.
118		#' @param visit (`string`)\cr the name of the visit variable.
119		#' @param group (`string`)\cr the name of the group variable.
120		#'
121		#' @return a list with `trt_`, `var_` and `lsm_*` elements. See [rbmi_mmrm] for
122		#' full details.
123		#' @inheritParams rbmi_mmrm
124		#' @seealso [rbmi_mmrm()]
125		rbmi_mmrm_single_info <- function(fit, visit_level, visit, group, weights) {
126	3x	checkmate::assert_class(fit, "mmrm")
127	3x	checkmate::assert_string(visit_level)
128	3x	checkmate::assert_string(visit)
129	3x	checkmate::assert_string(group)
130
131	3x	data <- mmrm::component(fit, "full_frame")
132	3x	checkmate::assert_factor(data[[group]])
133	3x	grp_levels <- levels(data[[group]])
134
135	3x	visit_at_level <- stats::setNames(list(visit_level), visit)
136	3x	em_res <- emmeans::emmeans(fit, group, by = visit, at = visit_at_level, weights = weights)
137	3x	em_df <- as.data.frame(em_res)
138
139	3x	all_lsm <- lapply(grp_levels, function(x) {
140	9x	this_df <- em_df[em_df[[group]] == x, ]
141	9x	with(this_df, list(est = emmean, se = SE, df = df))
142		})
143	3x	names(all_lsm) <- paste0("lsm_", grp_levels)
144
145	3x	var_est <- mmrm::VarCorr(fit)[visit_level, visit_level]
146
147	3x	cont_res <- emmeans::contrast(em_res, "trt.vs.ctrl", ref = grp_levels[1])
148	3x	cont_df <- as.data.frame(cont_res)
149
150	3x	all_var <- lapply(grp_levels[-1], function(x) {
151	6x	this_df <- cont_df[cont_df[["contrast"]] == paste(x, "-", grp_levels[1]), ]
152		# Note: This should be revisited here for the `se` - currently we take the SE from the LS means contrast, not
153		# from the variance estimate!
154	6x	with(this_df, list(est = var_est, se = SE, df = df))
155		})
156	3x	names(all_var) <- paste0("var_", grp_levels[-1])
157
158	3x	all_trt <- lapply(grp_levels[-1], function(x) {
159	6x	this_df <- cont_df[cont_df[["contrast"]] == paste(x, "-", grp_levels[1]), ]
160	6x	with(this_df, list(est = estimate, se = SE, df = df))
161		})
162	3x	names(all_trt) <- paste0("trt_", grp_levels[-1])
163
164	3x	c(all_var, all_trt, all_lsm)
165		}

1		#' @title s_function for proportion of factor levels
2		#' @description A simple statistics function which prepares the numbers with percentages
3		#' in the required format. The denominator here is from the alternative counts data set
4		#' in the given row and column split.
5		#'
6		#' If a total row is shown, then here just the total number is shown (without 100%).
7		#' @param x (`factor`)\cr categorical variable we want to analyze.
8		#' @param .alt_df (`data.frame`)\cr alternative data frame used for denominator calculation.
9		#' @param use_alt_counts (`flag`)\cr whether the `.alt_df` should be used for the total, i.e. the denominator.
10		#' If not, then the number of non-missing values in `x` is used.
11		#' @param show_total (`string`)\cr show the total level optionally on the top or in the bottom
12		#' of the factor levels.
13		#' @param total_label (`string`)\cr which label to use for the optional total level.
14		#' @return The [rtables::in_rows()] result with the proportion statistics.
15		#' @seealso [s_proportion_logical()] for tabulating logical `x`.
16		#' @export
17		s_proportion_factor <- function(
18		x,
19		.alt_df,
20		use_alt_counts = TRUE,
21		show_total = c("none", "top", "bottom"),
22		total_label = "Total") {
23	5x	checkmate::assert_factor(x)
24	5x	checkmate::assert_flag(use_alt_counts)
25	5x	show_total <- match.arg(show_total)
26
27	5x	N <- if (use_alt_counts) nrow(.alt_df) else sum(!is.na(x))
28	5x	tab <- lapply(as.list(table(x)), function(xi) rcell(xi * c(1, 1 / N), format = jjcsformat_count_fraction))
29	5x	if (show_total != "none") {
30	3x	checkmate::assert_string(total_label)
31	3x	tab_total <- stats::setNames(list(rcell(N, format = "xx")), total_label)
32
33	3x	tab <- if (show_total == "top") {
34	1x	c(tab_total, tab)
35		} else {
36	2x	c(tab, tab_total)
37		}
38		}
39	5x	in_rows(.list = tab)
40		}
41
42		#' @title s_function for proportion of `TRUE` in logical vector
43		#' @description A simple statistics function which prepares the numbers with percentages
44		#' in the required format. The denominator here is from the alternative counts data set
45		#' in the given row and column split.
46		#' @param x (`logical`)\cr binary variable we want to analyze.
47		#' @param label (`string`)\cr label to use.
48		#' @param .alt_df (`data.frame`)\cr alternative data frame used for denominator calculation.
49		#' @return The [rtables::in_rows()] result with the proportion statistics.
50		#' @seealso [s_proportion_factor()] for tabulating factor `x`.
51		#' @export
52		s_proportion_logical <- function(x, label = "Responders", .alt_df) {
53	1x	n <- sum(x)
54	1x	N <- nrow(.alt_df)
55	1x	p_hat <- n / N
56	1x	list(n_prop = with_label(rcell(c(n, p_hat), format = jjcsformat_count_fraction), label))
57		}
58
59		#' @title c_function for proportion of `TRUE` in logical vector
60		#' @description A simple statistics function which prepares the numbers with percentages
61		#' in the required format, for use in a split content row. The denominator here is
62		#' from the column N. Note that we don't use here .alt_df because that might not
63		#' have required row split variables available.
64		#' @param x (`logical`)\cr binary variable we want to analyze.
65		#' @param labelstr (`string`)\cr label string.
66		#' @param labelstr (`string`)\cr label string.
67		#' @param label_fstr (`string`)\cr format string for the label.
68		#' @param format (`character` or `list`)\cr format for the statistics.
69		#' @param .N_col (`numeric`)\cr number of columns.
70		#' @param .N_col (`numeric`)\cr number of columns.
71		#' @return The [rtables::in_rows()] result with the proportion statistics.
72		#' @seealso [s_proportion_logical()] for the related statistics function.
73		#' @export
74		c_proportion_logical <- function(x, labelstr, label_fstr, format, .N_col) {
75	1x	checkmate::assert_logical(x)
76	1x	num <- sum(x)
77	1x	denom <- .N_col
78	1x	in_rows(est_prop = c(num, num / denom), .formats = format, .labels = sprintf(label_fstr, labelstr))
79		}
80
81		#' Helper Function to Create Logical Design Matrix from Factor Variable
82		#'
83		#' @param df (`data.frame`)\cr including a factor variable with name in `.var`.
84		#' @param .var (`string`)\cr name of the factor variable.
85		#'
86		#' @return The logical matrix with dummy encoding of all factor levels.
87		#' @keywords internal
88		#' @export
89		#' @examples
90		#' h_get_design_mat(df = data.frame(a = factor(c("a", "b", "a"))), .var = "a")
91		h_get_design_mat <- function(df, .var) {
92	5x	checkmate::assert_data_frame(df)
93	5x	checkmate::assert_string(.var)
94	5x	checkmate::assert_factor(df[[.var]])
95
96	5x	model_formula <- stats::as.formula(paste("~ -1 +", .var))
97	5x	design_mat <- stats::model.matrix(model_formula, df)
98	5x	mode(design_mat) <- "logical"
99	5x	colnames(design_mat) <- levels(df[[.var]])
100	5x	design_mat
101		}
102
103		#' Formatted Analysis Function For Proportion Confidence Interval for Logical
104		#'
105		#' @param x (`logical`)\cr including binary response values.
106		#' @param .alt_df (`data.frame`)\cr alternative data frame used for denominator calculation.
107		#' @param formats (`list`)\cr including element `prop_ci` with the
108		#' required format. Note that the value is in percent already.
109		#' @param method (`string`)\cr please see [tern::s_proportion()] for possible
110		#' methods.
111		#' @param conf_level (`numeric`)\cr confidence level for the confidence interval.
112		#' @param conf_level (`numeric`)\cr confidence level for the confidence interval.
113		#'
114		#' @return The [rtables::rcell()] result.
115		#' @export
116		#'
117		#' @examples
118		#' a_proportion_ci_logical(
119		#' x = DM$SEX == "F",
120		#' .alt_df = DM,
121		#' conf_level = 0.95,
122		#' formats = list(prop_ci = jjcsformat_xx("xx.xx% - xx.xx%")),
123		#' method = "wald"
124		#' )
125		a_proportion_ci_logical <- function(x, .alt_df, conf_level, method, formats) {
126	5x	checkmate::assert_logical(x)
127	5x	checkmate::assert_data_frame(.alt_df)
128	5x	checkmate::assert_list(formats)
129
130	5x	diff_n <- nrow(.alt_df) - length(x)
131	5x	if (diff_n > 0) {
132	!	x <- c(x, rep(FALSE, length = diff_n))
133		}
134	5x	est <- s_proportion(x, conf_level = conf_level, method = method)
135	5x	rcell(est$prop_ci, format = formats$prop_ci)
136		}
137
138		#' Formatted Analysis Function For Proportion Confidence Interval for Factor
139		#'
140		#' @param df (`data.frame`)\cr including factor `.var`.
141		#' @param .var (`string`)\cr name of the factor variable.
142		#' @param .var (`string`)\cr name of the factor variable.
143		#' @param \dots see [a_proportion_ci_logical()] for additionally required
144		#' arguments.
145		#'
146		#' @return The [rtables::rcell()] result.
147		#' @export
148		#'
149		#' @examples
150		#' a_proportion_ci_factor(
151		#' df = DM,
152		#' .var = "SEX",
153		#' .alt_df = DM,
154		#' conf_level = 0.95,
155		#' formats = list(prop_ci = jjcsformat_xx("xx.x%, xx.x%")),
156		#' method = "clopper-pearson"
157		#' )
158		a_proportion_ci_factor <- function(df, .var, ...) {
159	1x	checkmate::assert_factor(df[[.var]])
160
161	1x	design_mat <- h_get_design_mat(df, .var)
162	1x	design_df <- as.data.frame(design_mat)
163	1x	res <- lapply(design_df, a_proportion_ci_logical, ...)
164	1x	in_rows(.list = res, .labels = colnames(design_mat))
165		}
166
167		#' Split Function for Proportion Analysis Columns (TEFCGIS08 e.g.)
168		#'
169		#' Here we just split into 3 columns `n`, `%` and `Cum %`.
170		#'
171		#' @param ret (`list`)\cr return value from the previous split function.
172		#' @param spl (`list`)\cr split information.
173		#' @param fulldf (`data.frame`)\cr full data frame.
174		#' @param .spl_context (`environment`)\cr split context environment.
175		#'
176		#' @note This split function is used in the proportion table TEFCGIS08 and similar ones.
177		#' @seealso [rtables::make_split_fun()] describing the requirements for this kind of
178		#' post-processing function.
179		prop_post_fun <- function(ret, spl, fulldf, .spl_context) {
180	1x	short_split_result(n = "n", percent = "%", cum_percent = "Cum %", fulldf = fulldf)
181		}
182
183		#' @rdname prop_post_fun
184		#'
185		#' @param df A data frame that contains all analysis variables.
186		#' @param vals A character vector that contains values to use for the split.
187		#' @param labels A character vector that contains labels for the statistics (without indent).
188		#' @param trim A single logical that indicates whether to trim the values.
189		#' @return a split function for use in [rtables::split_rows_by].
190		#' @export
191		prop_split_fun <- make_split_fun(post = list(prop_post_fun))
192
193		#' Formatted Analysis Function for Proportion Analysis (TEFCGIS08 e.g.)
194		#'
195		#' This function applies to a factor `x` when a column split was prepared with
196		#' [prop_split_fun()] before.
197		#'
198		#' @details In the column named `n`, the counts of the categories as well as an
199		#' optional `Total` count will be shown. In the column named `percent`, the
200		#' percentages of the categories will be shown, with an optional blank entry for
201		#' `Total`. In the column named `cum_percent`, the cumulative percentages will
202		#' be shown instead.
203		#'
204		#' @param x (`factor`)\cr factor variable to analyze.
205		#' @param .spl_context (`environment`)\cr split context environment.
206		#' @param formats (`list`)\cr formats for the statistics.
207		#' @param add_total_level (`flag`)\cr whether to add a total level.
208		#' @param x (`factor`)\cr factor variable to analyze.
209		#' @param .spl_context (`environment`)\cr split context environment.
210		#' @param formats (`list`)\cr formats for the statistics.
211		#' @param add_total_level (`flag`)\cr whether to add a total level.
212		#'
213		#' @return A `VerticalRowsSection` as returned by [rtables::in_rows].
214		#' @export
215		prop_table_afun <- function(x, .spl_context, formats, add_total_level = FALSE) {
216	8x	checkmate::assert_list(formats, len = 3, names = "unique")
217	8x	checkmate::assert_names(names(formats), permutation.of = c("n", "percent", "cum_percent"))
218	8x	checkmate::assert_flag(add_total_level)
219
220	8x	stat <- utils::tail(.spl_context[nrow(.spl_context), "cur_col_split_val"][[1]], 1)
221	8x	optional_total <- function(x) if (add_total_level) x else NULL
222	8x	ns <- table(x)
223	8x	res <- if (stat == "n") {
224	2x	c(as.list(ns), optional_total(length(x)))
225	8x	} else if (stat == "percent") {
226	2x	c(as.list(ns / length(x) * 100), optional_total(list(NULL)))
227	8x	} else if (stat == "cum_percent") {
228	2x	c(as.list(cumsum(ns / length(x) * 100)), optional_total(list(NULL)))
229		} else {
230	2x	stop("unexpected proportion statistic in split")
231		}
232	6x	in_rows(.list = res, .labels = c(levels(x), optional_total("Total")), .formats = formats[[stat]])
233		}

1		#' Tabulation for Exposure Tables
2		#'
3		#'
4		#' @details
5		#' Creates statistics needed for standard exposure table
6		#' This includes differences and 95% CI and total treatment years.
7		#' This is designed to be used as an analysis (afun in `analyze`) function.
8		#'
9		#' @name a_summarize_ex_j
10		NULL
11
12
13		#' @inheritParams proposal_argument_convention
14		#' @describeIn a_summarize_ex_j Statistics function needed for the exposure tables
15		#'
16		#' @param daysconv conversion required to get the values into days
17		#' (i.e 1 if original PARAMCD unit is days, 30.4375 if original PARAMCD unit is in months)
18		#' @param ancova (`logical`)\cr If FALSE, only descriptive methods will be used. \cr
19		#' If TRUE Ancova methods will be used for each of the columns : AVAL, CHG, DIFF. \cr
20		#' @param comp_btw_group (`logical`)\cr If TRUE,
21		#' \cr When ancova = FALSE, the estimate of between group difference (on CHG) will be based upon two-sample t-test. \cr
22		#' \cr When ancova = TRUE, the same ancova model will be used for the estimate of between group difference (on CHG).
23		#'
24		#' @param interaction_y (`character`)\cr Will be passed onto the `tern` function `s_ancova`, when ancova = TRUE.
25		#' @param interaction_item (`character`)\cr Will be passed onto the `tern` function `s_ancova`, when ancova = TRUE.
26		#' @param conf_level (`proportion`)\cr Confidence level of the interval
27		#' @param variables (named list of strings)\cr
28		#' list of additional analysis variables, with expected elements:
29		#' * arm (string)\cr
30		#' group variable, for which the covariate adjusted means of multiple groups will be summarized.
31		#' Specifically, the first level of arm variable is taken as the reference group.
32		#' * covariates (character)\cr
33		#' a vector that can contain single variable names (such as 'X1'), and/or interaction terms indicated by 'X1 * X2'.
34		#'
35		s_summarize_ex_j <- function(
36		df,
37		.var,
38		.df_row,
39		.spl_context,
40		comp_btw_group = TRUE,
41		ref_path = NULL,
42		ancova = FALSE,
43		interaction_y,
44		interaction_item,
45		conf_level,
46		daysconv,
47		variables) {
48	5x	control <- control_analyze_vars()
49	5x	control$conf_level <- conf_level
50	5x	x_stats <- s_summary(df[[.var]], na.rm = TRUE, .var, control = control)
51		## add extra for subject years
52	5x	subj_years <- x_stats[["sum"]] * daysconv / 365.25
53	5x	x_stats[["total_subject_years"]] <- c(x_stats[["sum"]], subj_years)
54	5x	names(x_stats[["total_subject_years"]]) <- c("total", "subject_years")
55
56	5x	cur_col_id <- .spl_context$cur_col_id[[length(.spl_context$split)]]
57	5x	indiffcol <- grepl("difference", tolower(cur_col_id), fixed = TRUE)
58
59	5x	if (indiffcol) {
60		# blank out all stats
61	2x	x_stats <- sapply(
62	2x	names(x_stats),
63	2x	FUN = function(x) {
64	60x	x_stats[[x]] <- NULL
65		},
66	2x	simplify = FALSE,
67	2x	USE.NAMES = TRUE
68		)
69		# diff between group will be updated in mean_sd stat
70	2x	if (comp_btw_group) {
71	2x	trt_var_refpath <- h_get_trtvar_refpath(ref_path, .spl_context, df)
72		# trt_var_refpath is list with elements trt_var trt_var_refspec cur_trt_grp ctrl_grp make these elements
73		# available in current environment
74	2x	trt_var <- trt_var_refpath$trt_var
75	2x	trt_var_refspec <- trt_var_refpath$trt_var_refspec
76	2x	cur_trt_grp <- trt_var_refpath$cur_trt_grp
77	2x	ctrl_grp <- trt_var_refpath$ctrl_grp
78
79	2x	.in_ref_col <- FALSE
80	!	if (trt_var == ctrl_grp) .in_ref_col <- TRUE
81
82	2x	.ref_group <- .df_row[.df_row[[trt_var]] == ctrl_grp, ]
83
84	2x	if (ancova) {
85		# ancova method for diff between group
86	2x	x_stats2 <- s_summarize_ancova_j(
87	2x	df = df,
88	2x	.var = .var,
89	2x	.ref_group = .ref_group,
90	2x	.in_ref_col = .in_ref_col,
91	2x	.df_row = .df_row,
92	2x	conf_level = conf_level,
93	2x	interaction_y = interaction_y,
94	2x	interaction_item = interaction_item,
95	2x	variables = variables
96		)
97	2x	diffstat <- x_stats2[["lsmean_diffci"]]
98		} else {
99		# descriptive method for diff between group
100	!	x_stats2 <- s_summarize_desc_j(
101	!	df = df,
102	!	.var = .var,
103	!	.ref_group = .ref_group,
104	!	.in_ref_col = .in_ref_col,
105	!	control = control
106		)
107	!	diffstat <- x_stats2[["mean_diffci"]]
108		}
109		# actual update with the diffstat
110	2x	x_stats[["mean_sd"]] <- diffstat
111		}
112		}
113
114	5x	return(x_stats)
115		}
116
117		#' @title Analysis Function For Exposure Tables
118		#' @description
119		#' A function to create the appropriate statistics needed for exposure table
120		#' @details
121		#' Creates statistics needed for table. This includes differences and 95% CI and total treatment years.
122		#' This is designed to be used as an analysis (afun in `analyze`) function.
123		#' @inheritParams proposal_argument_convention
124		#'
125		#' @describeIn a_summarize_ex_j Formatted analysis function which is used as `afun`.
126		#'
127		#' @return
128		#' * `a_summarize_ex_j()` returns the corresponding list with formatted [rtables::CellValue()].
129		#'
130		#' @aliases a_summarize_ex_j
131		#' @examples
132		#' library(dplyr)
133		#'
134		#' ADEX <- data.frame(
135		#' USUBJID = c(
136		#' "XXXXX01", "XXXXX02", "XXXXX03", "XXXXX04", "XXXXX05",
137		#' "XXXXX06", "XXXXX07", "XXXXX08", "XXXXX09", "XXXXX10"
138		#' ),
139		#' TRT01A = c(
140		#' "ARMA", "ARMA", "ARMA", "ARMA", "ARMA",
141		#' "Placebo", "Placebo", "Placebo", "ARMA", "ARMA"
142		#' ),
143		#' AVAL = c(56, 78, 67, 87, 88, 93, 39, 87, 65, 55)
144		#' )
145		#'
146		#' ADEX <- ADEX \|>
147		#' mutate(TRT01A = as.factor(TRT01A))
148		#'
149		#' ADEX$colspan_trt <- factor(ifelse(ADEX$TRT01A == "Placebo", " ", "Active Study Agent"),
150		#' levels = c("Active Study Agent", " ")
151		#' )
152		#'
153		#' ADEX$diff_header <- "Difference in Means (95% CI)"
154		#' ADEX$diff_label <- paste(ADEX$TRT01A, paste("vs", "Placebo"))
155		#'
156		#' colspan_trt_map <- create_colspan_map(ADEX,
157		#' non_active_grp = "Placebo",
158		#' non_active_grp_span_lbl = " ",
159		#' active_grp_span_lbl = "Active Study Agent",
160		#' colspan_var = "colspan_trt",
161		#' trt_var = "TRT01A"
162		#' )
163		#' ref_path <- c("colspan_trt", "", "TRT01A", "Placebo")
164		#'
165		#' lyt <- basic_table() \|>
166		#' split_cols_by(
167		#' "colspan_trt",
168		#' split_fun = trim_levels_to_map(map = colspan_trt_map)
169		#' ) \|>
170		#' split_cols_by("TRT01A") \|>
171		#' split_cols_by("diff_header", nested = FALSE) \|>
172		#' split_cols_by(
173		#' "TRT01A",
174		#' split_fun = remove_split_levels("Placebo"),
175		#' labels_var = "diff_label"
176		#' ) \|>
177		#' analyze("AVAL",
178		#' afun = a_summarize_ex_j, var_labels = "Duration of treatment (Days)",
179		#' show_labels = "visible",
180		#' indent_mod = 0L,
181		#' extra_args = list(
182		#' daysconv = 1,
183		#' ref_path = ref_path,
184		#' variables = list(arm = "TRT01A", covariates = NULL),
185		#' ancova = TRUE,
186		#' comp_btw_group = TRUE
187		#' )
188		#' )
189		#'
190		#' result <- build_table(lyt, ADEX)
191		#'
192		#' result
193		#' @export
194		a_summarize_ex_j <- function(
195		df,
196		.var,
197		.df_row,
198		.spl_context,
199		comp_btw_group = TRUE,
200		ref_path = NULL,
201		ancova = FALSE,
202		interaction_y = FALSE,
203		interaction_item = NULL,
204		conf_level = 0.95,
205		variables,
206		.stats = c("mean_sd", "median", "range", "quantiles", "total_subject_years"),
207		.formats = c(diff_mean_est_ci = jjcsformat_xx("xx.xx (xx.xx, xx.xx)")),
208		.labels = c(quantiles = "Interquartile range"),
209		.indent_mods = NULL,
210		na_str = rep("NA", 3),
211		daysconv = 1) {
212	5x	if (!is.numeric(df[[.var]])) {
213	!	stop("a_summarize_ex_j issue: input variable must be numeric.")
214		}
215
216	5x	if (comp_btw_group && is.null(ref_path)) {
217	!	stop("a_summarize_ex_j issue: argument ref_path cannot be NULL.")
218		}
219
220	5x	if (comp_btw_group && ancova && is.null(variables)) {
221	!	stop("a_summarize_ex_j issue: argument variables must be defined when ancova is requested.")
222		}
223
224	5x	x_stats <- s_summarize_ex_j(
225	5x	df = df,
226	5x	.var = .var,
227	5x	.df_row = .df_row,
228	5x	.spl_context = .spl_context,
229	5x	comp_btw_group = comp_btw_group,
230	5x	ref_path = ref_path,
231	5x	ancova = ancova,
232	5x	interaction_y = interaction_y,
233	5x	interaction_item = interaction_item,
234	5x	conf_level = conf_level,
235	5x	daysconv = daysconv,
236	5x	variables = variables
237		)
238
239		# Fill in formatting defaults
240	5x	.stats_in <- .stats
241	5x	.stats <- tern_get_stats("analyze_vars_numeric", stats_in = .stats, custom_stats_in = NULL)
242	5x	if ("total_subject_years" %in% .stats_in) {
243		# place the extra statistic at the appropriate place within .stats vector
244	5x	i <- match("total_subject_years", .stats_in)
245	5x	x <- .stats_in[i:length(.stats_in)]
246	5x	if (length(x) == 1) {
247	5x	.stats <- c(.stats, "total_subject_years")
248		} else {
249	!	i2 <- min(match(x, .stats), na.rm = TRUE)
250	!	if (i2 == 1) {
251	!	.stats <- c("total_subject_years", .stats)
252		} else {
253	!	.stats <- c(.stats[1:(i2 - 1)], "total_subject_years", .stats[i2:length(.stats)])
254		}
255		}
256		}
257
258	5x	.stats_ext <- c(.stats, "diff_mean_est_ci")
259
260	5x	.formats <- junco_get_formats_from_stats(.stats_ext, .formats)
261	5x	.labels <- junco_get_labels_from_stats(.stats, .labels, label_attr_from_stats = get_label_attr_from_stats(x_stats))
262	5x	.indent_mods <- junco_get_indents_from_stats(.stats, .indent_mods)
263
264	5x	.names <- names(.labels)
265	5x	.labels <- .unlist_keep_nulls(.labels)
266	5x	.indent_mods <- .unlist_keep_nulls(.indent_mods)
267
268	5x	cur_col_id <- .spl_context$cur_col_id[[length(.spl_context$split)]]
269	5x	indiffcol <- grepl("difference", tolower(cur_col_id), fixed = TRUE)
270
271	5x	if (indiffcol && comp_btw_group) {
272	2x	.formats[["mean_sd"]] <- .formats[["diff_mean_est_ci"]]
273		}
274	5x	.formats[["diff_mean_est_ci"]] <- NULL
275
276	5x	if (!is.null(na_str)) {
277	5x	.format_na_strs <- lapply(names(.formats), FUN = function(x) {
278	25x	na_str
279		})
280		} else {
281	!	.format_na_strs <- NULL
282		}
283
284	5x	x_stats <- x_stats[.stats]
285	5x	ret <- in_rows(
286	5x	.list = x_stats,
287	5x	.formats = .formats,
288	5x	.names = .names,
289	5x	.labels = .labels,
290	5x	.indent_mods = .indent_mods,
291	5x	.format_na_strs = .format_na_strs
292		)
293	5x	return(ret)
294		}

1		#' Survival time analysis
2		#'
3		#' @description `r lifecycle::badge('stable')`
4		#'
5		#' The analyze function [kaplan_meier()] creates a layout element to analyze
6		#' survival time by calculating survival time median, 2 quantiles, each with
7		#' their confidence intervals, and range (for all, censored, or event patients).
8		#' The primary analysis variable `vars` is the time variable and the secondary
9		#' analysis variable `is_event` indicates whether or not an event has occurred.
10		#'
11		#' @inheritParams proposal_argument_convention
12		#' @param control (`list`)\cr parameters for comparison details, specified by using the helper function
13		#' [tern::control_surv_time()]. Some possible parameter options are:
14		#' * `conf_level` (`proportion`)\cr confidence level of the interval for survival time.
15		#' * `conf_type` (`string`)\cr confidence interval type. Options are 'plain' (default), 'log', or 'log-log',
16		#' see more in [survival::survfit()]. Note option 'none' is not supported.
17		#' * `quantiles` (`numeric`)\cr vector of length two to specify the quantiles of survival time.
18		#'
19		#' @note These functions have been forked from the `tern` package file `survival_time.R`.
20		#' Here we have the additional features:
21		#'
22		#' * Additional statistics `quantiles_lower`, `quantiles_upper`, `range_with_cens_info` are returned.
23		#'
24		#' @examples
25		#' library(dplyr)
26		#' library(tern)
27		#' adtte_f <- tern::tern_ex_adtte \|>
28		#' filter(PARAMCD == "OS") \|>
29		#' mutate(
30		#' AVAL = tern::day2month(AVAL),
31		#' is_event = CNSR == 0
32		#' )
33		#' df <- adtte_f \|> filter(ARMCD == "ARM A")
34		#' @keywords internal
35		#' @name kaplan_meier
36		#' @order 1
37		NULL
38
39		#' @describeIn kaplan_meier Statistics function which analyzes survival times using Kaplan-Meier.
40		#'
41		#' @return
42		#' * `s_kaplan_meier()` returns the following statistics:
43		#' * `quantiles_lower`: Lower quantile estimate and confidence interval for it.
44		#' * `median_ci_3d`: Median survival time and confidence interval for it.
45		#' * `quantiles_upper`: Upper quantile estimate and confidence interval for it.
46		#' * `range_with_cens_info`: Survival time range with censoring information.
47		#'
48		#' @importFrom survival survfit
49		#'
50		#' @keywords internal
51		s_kaplan_meier <- function(df, .var, is_event, control = control_surv_time()) {
52	10x	checkmate::assert_string(.var)
53	10x	assert_df_with_variables(df, list(tte = .var, is_event = is_event))
54	10x	checkmate::assert_numeric(df[[.var]], min.len = 1, any.missing = FALSE)
55	10x	checkmate::assert_logical(df[[is_event]], min.len = 1, any.missing = FALSE)
56
57	10x	conf_type <- control$conf_type
58	10x	conf_level <- control$conf_level
59
60	10x	checkmate::assert_true(control$quantiles[1] < 0.5)
61	10x	checkmate::assert_true(control$quantiles[2] > 0.5)
62	10x	quantiles <- c(control$quantiles[1], 0.5, control$quantiles[2])
63
64	10x	formula <- stats::as.formula(paste0("survival::Surv(", .var, ", ", is_event, ") ~ 1"))
65	10x	srv_fit <- survival::survfit(formula = formula, data = df, conf.int = conf_level, conf.type = conf_type)
66	10x	srv_qt_tab <- stats::quantile(srv_fit, probs = quantiles)
67	10x	quantiles_lower <- vapply(srv_qt_tab, "[", 1, FUN.VALUE = numeric(1))
68	10x	median_ci <- vapply(srv_qt_tab, "[", 2, FUN.VALUE = numeric(1))
69	10x	quantiles_upper <- vapply(srv_qt_tab, "[", 3, FUN.VALUE = numeric(1))
70	10x	range_censor <- range_noinf(df[[.var]][!df[[is_event]]], na.rm = TRUE)
71	10x	range_event <- range_noinf(df[[.var]][df[[is_event]]], na.rm = TRUE)
72	10x	range <- range_noinf(df[[.var]], na.rm = TRUE)
73	10x	lower_censored <- as.numeric(range_censor[1] < range_event[1])
74	10x	upper_censored <- as.numeric(range_censor[2] > range_event[2])
75	10x	range_with_cens_info <- c(range, lower_censored, upper_censored)
76	10x	list(
77	10x	quantiles_lower = with_label(
78	10x	unname(quantiles_lower),
79	10x	paste0(round(quantiles[1] * 100), "th percentile (", f_conf_level(conf_level), ")")
80		),
81	10x	median_ci_3d = with_label(unname(median_ci), paste0("Median (", f_conf_level(conf_level), ")")),
82	10x	quantiles_upper = with_label(
83	10x	unname(quantiles_upper),
84	10x	paste0(round(quantiles[3] * 100), "th percentile (", f_conf_level(conf_level), ")")
85		),
86	10x	range_with_cens_info = range_with_cens_info
87		)
88		}
89
90		#' @describeIn kaplan_meier Formatted analysis function which is used as `afun`
91		#'
92		#' @return
93		#' * `a_kaplan_meier()` returns the corresponding list with formatted [rtables::CellValue()].
94		#'
95		#' @examples
96		#' a_kaplan_meier(
97		#' df,
98		#' .var = "AVAL",
99		#' is_event = "is_event"
100		#' )
101		#'
102		#' basic_table() \|>
103		#' split_cols_by(var = "ARMCD") \|>
104		#' add_colcounts() \|>
105		#' analyze(
106		#' vars = "AVAL",
107		#' afun = a_kaplan_meier,
108		#' var_labels = "Kaplan-Meier estimate of time to event (months)",
109		#' show_labels = "visible",
110		#' extra_args = list(
111		#' is_event = "is_event",
112		#' control = control_surv_time(conf_level = 0.9, conf_type = "log-log")
113		#' )
114		#' ) \|>
115		#' build_table(df = adtte_f)
116		#'
117		#' @export
118		#' @order 2
119		a_kaplan_meier <- function(df, .var, ..., .stats = NULL, .formats = NULL, .labels = NULL, .indent_mods = NULL) {
120		# Check for additional parameters to the statistics function
121	8x	dots_extra_args <- list(...)
122
123		# Only support default stats, not custom stats
124	8x	.stats <- .split_std_from_custom_stats(.stats)$default_stats
125
126		# Apply statistics function
127	8x	x_stats <- .apply_stat_functions(
128	8x	default_stat_fnc = s_kaplan_meier,
129	8x	custom_stat_fnc_list = NULL,
130	8x	args_list = c(df = list(df), .var = .var, dots_extra_args)
131		)
132
133		# Format according to specifications
134	8x	format_stats(
135	8x	x_stats,
136	8x	method_groups = "kaplan_meier",
137	8x	stats_in = .stats,
138	8x	formats_in = .formats,
139	8x	labels_in = .labels,
140	8x	indents_in = .indent_mods
141		)
142		}

1		#' @title Function factory for xx style formatting
2		#' @description A function factory to generate formatting functions for value
3		#' formatting that support the xx style format and control the rounding method
4		#'
5		#' @param roundmethod (`string`)\cr choice of rounding methods. Options are:
6		#' * `sas`: the underlying rounding method is `tidytlg::roundSAS`, where \cr
7		#' roundSAS comes from this Stack Overflow post https://stackoverflow.com/questions/12688717/round-up-from-5
8		#' * `iec`: the underlying rounding method is `round`
9		#'
10		#' @param na_str_dflt Character to represent NA value
11		#' @param replace_na_dflt logical(1). Should an `na_string` of "NA" within
12		#' the formatters framework be overridden by `na_str_default`? Defaults to
13		#' `TRUE`, as a way to have a different default na string behavior from the
14		#' base `formatters` framework.
15		#' @return `format_xx_fct()` format function that can be used in rtables formatting calls
16		#' @export
17		#'
18		#' @family JJCS formats
19		#' @examples
20		#' jjcsformat_xx_SAS <- format_xx_fct(roundmethod = "sas")
21		#' jjcsformat_xx <- jjcsformat_xx_SAS
22		#' rcell(c(1.453), jjcsformat_xx("xx.xx"))
23		#' rcell(c(), jjcsformat_xx("xx.xx"))
24		#' rcell(c(1.453, 2.45638), jjcsformat_xx("xx.xx (xx.xxx)"))
25		#'
26		format_xx_fct <- function(roundmethod = c("sas", "iec"), na_str_dflt = "NE",
27		replace_na_dflt = TRUE) {
28	!	roundmethod <- match.arg(roundmethod)
29
30	!	if (roundmethod == "sas") {
31	!	roundfunc <- tidytlg::roundSAS
32		} else {
33	!	roundfunc <- round
34		}
35
36	!	fnct <- function(str, na_str = na_str_dflt) {
37	450x	if (grepl("xxx.", str, fixed = TRUE)) {
38	!	stop(
39	!	"Error: jjcs_format_xx: do not use xxx. in input str, replace by xx. instead."
40		)
41		}
42	450x	if (!grepl("xx", str, fixed = TRUE)) {
43	1x	stop("Error: jjcs_format_xx: input str must contain xx.")
44		}
45	449x	positions <- gregexpr(
46	449x	pattern = "xx\\.?x*",
47	449x	text = str,
48	449x	perl = TRUE
49		)
50	449x	x_positions <- regmatches(x = str, m = positions)[[1]]
51		### str is splitted into pieces as xx. xx xx.xxx
52		### xx is no rounding
53		### xx. rounding to integer
54		### xx.x rounding to 1 decimal, etc
55
56	449x	no_round <- function(x, na_str = na_str_dflt) {
57	23x	if (is.na(x)) {
58	!	return(na_str)
59		} else {
60	23x	return(x)
61		}
62		}
63
64	449x	roundings <- lapply(X = x_positions, function(x) {
65	609x	y <- strsplit(split = "\\.", x = x)[[1]]
66		### "xx.x" will result in c("xx","x")
67		### "xx." will result in "xx"
68		### "xx" will remain "xx"
69
70	609x	if (x == "xx") {
71	34x	rounding <- no_round
72		} else {
73	575x	rounding <- function(x, na_str = na_str_dflt) {
74	1172x	if (is.na(x)) {
75	25x	return(na_str)
76		} else {
77	1147x	format(
78	1147x	roundfunc(x, digits = ifelse(length(y) > 1, nchar(y[2]), 0)),
79	1147x	nsmall = ifelse(length(y) > 1, nchar(y[2]), 0)
80		)
81		}
82		}
83		}
84	609x	return(rounding)
85		})
86	449x	rtable_format <- function(x, output, na_str = na_str_dflt) {
87	876x	if (anyNA(na_str) \|\| (replace_na_dflt && any(na_str == "NA"))) {
88	290x	na_inds <- which(is.na(na_str) \| (replace_na_dflt & na_str == "NA"))
89	290x	na_str[na_inds] <- rep(na_str_dflt, length.out = length(na_str))[na_inds]
90		}
91	876x	if (length(x) == 0 \|\| isTRUE(all(x == ""))) {
92	!	return(NULL)
93	876x	} else if (!length(positions[[1]]) == length(x)) {
94	3x	stop(
95	3x	"Error: input str in call to jjcs_format_xx must contain same number of xx as the number of stats."
96		)
97		}
98
99	873x	values <- Map(y = x, fun = roundings, na_str = na_str, function(y, fun, na_str) fun(y, na_str = na_str))
100	873x	regmatches(x = str, m = positions)[[1]] <- values
101	873x	return(str)
102		}
103	449x	return(rtable_format)
104		}
105	!	return(fnct)
106		}
107
108
109		jjcsformat_xx_SAS <- format_xx_fct(roundmethod = "sas")
110		jjcsformat_xx_R <- format_xx_fct(roundmethod = "iec")
111
112
113		### if we ever decide to switch rounding method, we just have to update jjcsformat_xx here
114
115		#' @title Formatting of values
116		#' @name jjcsformat_xx
117		#' @description jjcs formatting function
118		#' @param str The formatting that is required specified as a text string, eg "xx.xx"
119		#' @param na_str character. Na string that will be passed from `formatters` into
120		#' our formatting functions.
121		#' @return a formatting function with `"sas"`-style rounding.
122		#' @export
123		jjcsformat_xx <- jjcsformat_xx_SAS
124
125		#' @name count_fraction
126		#' @title Formatting count and fraction values
127		#'
128		#' @description
129		#'
130		#' Formats a count together with fraction (and/or denominator) with special
131		#' consideration when count is 0, or fraction is 1.
132		#' \cr See also: tern::format_count_fraction_fixed_dp()
133		#'
134		#' @inheritParams format_xx_fct
135		#' @param x `numeric`\cr with elements `num` and `fraction` or `num`, `denom` and `fraction`.
136		#' @param d numeric(1). Number of digits to round fraction to (default=1)
137		#' @param ... Additional arguments passed to other methods.
138		#' @return A string in the format `count / denom (ratio percent)`. If `count`
139		#' is 0, the format is `0`. If fraction is >0.99, the format is
140		#' `count / denom (>99.9 percent)`
141		#' @family JJCS formats
142		#' @rdname count_fraction
143		#' @export
144		#' @examples
145		#' jjcsformat_count_fraction(c(7, 0.7))
146		#' jjcsformat_count_fraction(c(70000, 0.9999999))
147		#' jjcsformat_count_fraction(c(70000, 1))
148		#'
149		jjcsformat_count_fraction <- function(
150		x,
151		d = 1,
152		roundmethod = c("sas", "iec"),
153		...) {
154	25x	roundmethod <- match.arg(roundmethod)
155	25x	attr(x, "label") <- NULL
156	25x	if (any(is.na(x))) {
157	!	return("-")
158		}
159
160	25x	checkmate::assert_vector(x)
161	25x	checkmate::assert_integerish(x[1])
162	25x	assert_proportion_value(
163	25x	x[2],
164	25x	include_boundaries = TRUE
165		)
166
167	25x	fraction <- x[2]
168
169	!	if (isTRUE(all.equal(fraction, 1))) fraction <- 1
170
171	25x	if (roundmethod == "sas") {
172	24x	fmtpct <- format(tidytlg::roundSAS(fraction * 100, d), nsmall = d)
173		} else {
174	1x	fmtpct <- format(round(fraction * 100, d), nsmall = d)
175		}
176
177	25x	result <- if (x[1] == 0) {
178	!	"0"
179	25x	} else if (fraction == 1) {
180		## per conventions still report as 100.0%
181	!	paste0(x[1], " (100.0%)")
182	25x	} else if (fmtpct == format(0, nsmall = d)) {
183		# else if (100x[2] < 10*(-d)) {
184		### example pct = 0.09999 ### <0.1% (even if fmtpct == 0.1,
185		# but the actual value of pct <0.1)
186	!	paste0(x[1], " (<", 10**(-d), "%)")
187	25x	} else if (fmtpct == format(100, nsmall = d)) {
188		# else if (100x[2] > 100-10*(-d)) {
189		### example pct = 99.90001 ### >99.9% (even if fmtpct == 99.9,
190		# but the actual value of pct >99.9)
191	!	paste0(x[1], " (>", 100 - 10**(-d), "%)")
192		} else {
193	25x	paste0(x[1], " (", fmtpct, "%)")
194		}
195	25x	return(result)
196		}
197
198		#' @title Formatting count, denominator and fraction values
199		#'
200		#' @inheritParams count_fraction
201		#' @param ... Additional arguments passed to other methods.
202		#' @export
203		#' @rdname count_denom_fraction
204		#' @return `x`, formatted into a string with the appropriate
205		#' format and `d` digits of precision.
206		#' @examples
207		#' jjcsformat_count_denom_fraction(c(7, 10, 0.7))
208		#' jjcsformat_count_denom_fraction(c(70000, 70001, 70000 / 70001))
209		#' jjcsformat_count_denom_fraction(c(235, 235, 235 / 235))
210		jjcsformat_count_denom_fraction <- function(
211		x,
212		d = 1,
213		roundmethod = c("sas", "iec"),
214		...) {
215	313x	roundmethod <- match.arg(roundmethod)
216	313x	attr(x, "label") <- NULL
217	313x	if (any(is.na(x))) {
218	55x	return("-")
219		}
220	258x	checkmate::assert_vector(x)
221	258x	checkmate::assert_integerish(x[1])
222	258x	assert_proportion_value(
223	258x	x[3],
224	258x	include_boundaries = TRUE
225		)
226
227	258x	fraction <- x[3]
228	109x	if (x[2] == x[1]) fraction <- 1
229
230	258x	fmt_x12 <- paste0(x[1], "/", x[2])
231
232	258x	if (roundmethod == "sas") {
233	257x	fmtpct <- format(tidytlg::roundSAS(fraction * 100, d), nsmall = d)
234		} else {
235	1x	fmtpct <- format(round(fraction * 100, d), nsmall = d)
236		}
237
238	258x	result <- if (x[1] == 0) {
239		# "0"
240		# same as in general situation
241	4x	paste0(fmt_x12, " (", fmtpct, "%)")
242	258x	} else if (100 * fraction == 100) {
243	109x	paste0(fmt_x12, " (100.0%)")
244	258x	} else if (100 * fraction < 10**(-d)) {
245		### example pct = 0.09999 ### <0.1% (even if fmtpct == 0.1, but the actual value of pct <0.1)
246	1x	paste0(fmt_x12, " (<", 10**(-d), "%)")
247	258x	} else if (100 * fraction > 100 - 10**(-d)) {
248		### example pct = 99.90001 ### >99.9% (even if fmtpct == 99.9, but the actual value of pct >99.9)
249	1x	paste0(fmt_x12, " (>", 100 - 10**(-d), "%)")
250		} else {
251	143x	paste0(fmt_x12, " (", fmtpct, "%)")
252		}
253	258x	return(result)
254		}
255
256		#' @title Formatting fraction, count and denominator values
257		#'
258		#' @details
259		#' Formats a 3-dimensional value such that percent values
260		#' near 0 or 100% are formatted as .e.g, `"<0.1%"` and
261		#' `">99.9%"`, where the cutoff is controlled by `d`, and
262		#' formatted as `"xx.x% (xx/xx)"` otherwise, with the
263		#' precision of the percent also controlled by `d`.
264		#'
265		#' @inheritParams count_fraction
266		#' @param ... Additional arguments passed to other methods.
267		#' @export
268		#' @rdname fraction_count_denom
269		#' @return `x` formatted as a string with `d` digits of precision,
270		#' with special cased values as described in Details above.
271		#' @examples
272		#' jjcsformat_fraction_count_denom(c(7, 10, 0.7))
273		#' jjcsformat_fraction_count_denom(c(70000, 70001, 70000 / 70001))
274		#' jjcsformat_fraction_count_denom(c(235, 235, 235 / 235))
275		jjcsformat_fraction_count_denom <- function(
276		x,
277		d = 1,
278		roundmethod = c("sas", "iec"),
279		...) {
280	2x	roundmethod <- match.arg(roundmethod)
281	2x	attr(x, "label") <- NULL
282	2x	if (any(is.na(x))) {
283	!	return("-")
284		}
285	2x	checkmate::assert_vector(x)
286	2x	checkmate::assert_integerish(x[1])
287	2x	assert_proportion_value(
288	2x	x[3],
289	2x	include_boundaries = TRUE
290		)
291
292	2x	fraction <- x[3]
293	!	if (x[2] == x[1]) fraction <- 1
294
295	2x	fmt_x12 <- paste0(x[1], "/", x[2])
296
297	2x	if (roundmethod == "sas") {
298	1x	fmtpct <- format(tidytlg::roundSAS(fraction * 100, d), nsmall = d)
299		} else {
300	1x	fmtpct <- format(round(fraction * 100, d), nsmall = d)
301		}
302
303	2x	result <- if (x[1] == 0) {
304		# "0"
305		# same as in general situation
306	!	paste0("(", fmt_x12, ")")
307	2x	} else if (100 * fraction == 100) {
308	!	paste0("100.0%", " (", fmt_x12, ")")
309	2x	} else if (100 * fraction < 10**(-d)) {
310		### example pct = 0.09999 ### <0.1% (even if fmtpct == 0.1, but the actual value of pct <0.1)
311	!	paste0("<", 10**(-d), "%", " (", fmt_x12, ")")
312	2x	} else if (100 * fraction > 100 - 10**(-d)) {
313		### example pct = 99.90001 ### >99.9% (even if fmtpct == 99.9, but the actual value of pct >99.9)
314	!	paste0(">", 100 - 10**(-d), "%", " (", fmt_x12, ")")
315		} else {
316	2x	paste0(fmtpct, "%", " (", fmt_x12, ")")
317		}
318	2x	return(result)
319		}
320
321		#' @title Function factory for p-value formatting
322		#'
323		#' @description A function factory to generate formatting functions for p-value
324		#' formatting that support rounding close to the significance level specified
325		#'
326		#' @param alpha `number`\cr the significance level to account for during rounding.
327		#' @return The p-value in the standard format. If `count` is 0, the format is `0`.
328		#' If it is smaller than 0.001, then `<0.001`, if it is larger than 0.999, then
329		#' `>0.999` is returned. Otherwise, 3 digits are used. In the special case that
330		#' rounding from below would make the string equal to the specified `alpha`,
331		#' then a higher number of digits is used to be able to still see the difference.
332		#' For example, 0.0048 is not rounded to 0.005 but stays at 0.0048 if `alpha = 0.005`
333		#' is set.
334		#'
335		#' @family JJCS formats
336		#' @export
337		#'
338		#' @examples
339		#' my_pval_format <- jjcsformat_pval_fct(0.005)
340		#' my_pval_format(0.2802359)
341		#' my_pval_format(0.0048)
342		#' my_pval_format(0.00499)
343		#' my_pval_format(0.004999999)
344		#' my_pval_format(0.0051)
345		#' my_pval_format(0.0009)
346		#' my_pval_format(0.9991)
347		#'
348		jjcsformat_pval_fct <- function(alpha = 0.05) {
349	26x	checkmate::assert_number(alpha, lower = 0, upper = 1)
350
351	26x	function(x, ...) {
352	114x	checkmate::assert_number(
353	114x	x,
354	114x	lower = 0,
355	114x	upper = 1 + .Machine$double.eps, # Be a bit tolerant here.
356	114x	na.ok = TRUE
357		)
358	114x	if (is.na(x)) {
359	!	"NE"
360	114x	} else if (x < 0.001) {
361	34x	"<0.001"
362	80x	} else if (x > 0.999) {
363	3x	">0.999"
364		} else {
365	77x	xx_format <- "xx.xxx"
366	77x	res <- jjcsformat_xx(xx_format)(x)
367	77x	while (as.numeric(res) == alpha && x < alpha) {
368		# Increase precision by 1 digit until the result
369		# is different from threshold alpha.
370	3x	xx_format <- paste0(xx_format, "x")
371	3x	res <- jjcsformat_xx(xx_format)(x)
372		}
373	77x	res
374		}
375		}
376		}
377
378		#' @title Function factory for range with censoring information formatting
379		#' @description A function factory to generate formatting functions for range formatting
380		#' that includes information about the censoring of survival times.
381		#'
382		#' @param str `string`\cr the format specifying the number of digits to be used,
383		#' for the range values, e.g. `"xx.xx"`.
384		#' @return A function that formats a numeric vector with 4 elements:
385		#' - minimum
386		#' - maximum
387		#' - censored minimum? (1 if censored, 0 if event)
388		#' - censored maximum? (1 if censored, 0 if event)
389		#' The range along with the censoring information is returned as a string
390		#' with the specified numeric format as `(min, max)`, and the `+` is appended
391		#' to `min` or `max` if these have been censored.
392		#'
393		#' @family JJCS formats
394		#' @export
395		#'
396		#' @examples
397		#' my_range_format <- jjcsformat_range_fct("xx.xx")
398		#' my_range_format(c(0.35235, 99.2342, 1, 0))
399		#' my_range_format(c(0.35235, 99.2342, 0, 1))
400		#' my_range_format(c(0.35235, 99.2342, 0, 0))
401		#' my_range_format(c(0.35235, 99.2342, 1, 1))
402		jjcsformat_range_fct <- function(str) {
403	1x	format_xx <- jjcsformat_xx(str)
404
405	1x	function(x, ...) {
406	4x	checkmate::assert_numeric(
407	4x	x,
408	4x	len = 4L,
409	4x	finite = TRUE,
410	4x	any.missing = FALSE
411		)
412	4x	checkmate::assert_true(all(x[c(3, 4)] %in% c(0, 1)))
413
414	4x	res <- vapply(x[c(1, 2)], format_xx, character(1))
415	2x	if (x[3] == 1) res[1] <- paste0(res[1], "+")
416	2x	if (x[4] == 1) res[2] <- paste0(res[2], "+")
417	4x	paste0("(", res[1], ", ", res[2], ")")
418		}
419		}

1		#' Tabulation of Least Square Means Results
2		#'
3		#' @description `r lifecycle::badge('stable')`
4		#'
5		#' These functions can be used to produce tables from LS means, e.g. from [fit_mmrm_j()]
6		#' or [fit_ancova()].
7		#'
8		#' @note These functions have been forked from the `tern.mmrm` package. Additional features
9		#' are:
10		#'
11		#' * Additional `ref_path` argument for tern.mmrm::summarize_lsmeans().
12		#' * The function is more general in that it also works for LS means results from ANCOVA
13		#' * Additional statistic `diff_mean_est_ci` is returned
14		#' * P-value sidedness can be chosen
15		#'
16		#' @name tabulate_lsmeans
17		#' @return for `s_lsmeans`, a list containing the same statistics returned by [tern.mmrm::s_mmrm_lsmeans],
18		#' with the additional `diff_mean_est_ci` three-dimensional statistic. For `a_lsmeans`,
19		#' a `VertalRowsSection` as returned by [rtables::in_rows].
20		#' @examples
21		#' result <- fit_mmrm_j(
22		#' vars = list(
23		#' response = "FEV1",
24		#' covariates = c("RACE", "SEX"),
25		#' id = "USUBJID",
26		#' arm = "ARMCD",
27		#' visit = "AVISIT"
28		#' ),
29		#' data = mmrm::fev_data,
30		#' cor_struct = "unstructured",
31		#' weights_emmeans = "equal"
32		#' )
33		#'
34		#' df <- broom::tidy(result)
35		#'
36		#' s_lsmeans(df[8, ], .in_ref_col = FALSE)
37		#' s_lsmeans(df[8, ], .in_ref_col = FALSE, alternative = "greater", show_relative = "increase")
38		#'
39		#' dat_adsl <- mmrm::fev_data \|>
40		#' dplyr::select(USUBJID, ARMCD) \|>
41		#' unique()
42		#'
43		#' basic_table() \|>
44		#' split_cols_by("ARMCD") \|>
45		#' add_colcounts() \|>
46		#' split_rows_by("AVISIT") \|>
47		#' analyze(
48		#' "AVISIT",
49		#' afun = a_lsmeans,
50		#' show_labels = "hidden",
51		#' na_str = tern::default_na_str(),
52		#' extra_args = list(
53		#' .stats = c(
54		#' "n",
55		#' "adj_mean_se",
56		#' "adj_mean_ci",
57		#' "diff_mean_se",
58		#' "diff_mean_ci"
59		#' ),
60		#' .labels = c(
61		#' adj_mean_se = "Adj. LS Mean (Std. Error)",
62		#' adj_mean_ci = "95% CI",
63		#' diff_mean_ci = "95% CI"
64		#' ),
65		#' .formats = c(adj_mean_se = jjcsformat_xx("xx.x (xx.xx)")),
66		#' alternative = "greater",
67		#' ref_path = c("ARMCD", result$ref_level)
68		#' )
69		#' ) \|>
70		#' build_table(
71		#' df = broom::tidy(result),
72		#' alt_counts_df = dat_adsl
73		#' )
74		NULL
75
76		#' @describeIn tabulate_lsmeans Helper method (for [broom::tidy()]) to prepare a `data.frame` from an
77		#' `tern_model` object containing the least-squares means and contrasts.
78		#' @method tidy tern_model
79		#' @importFrom generics tidy
80		#' @export
81		tidy.tern_model <- function(x, ...) {
82		# nolint
83	57x	vars <- x$vars
84	57x	estimates <- x$lsmeans$estimates
85	57x	df <- if (is.null(vars$arm)) {
86	!	nams <- names(estimates)
87	!	to_rename <- match(c("estimate", "se", "df", "lower_cl", "upper_cl"), nams)
88	!	names(estimates)[to_rename] <- paste0(names(estimates)[to_rename], "_est")
89	!	estimates
90		} else {
91	57x	contrasts <- x$lsmeans$contrasts
92	57x	merge(x = estimates, y = contrasts, by = c(vars$visit, vars$arm), suffixes = c("_est", "_contr"), all = TRUE)
93		}
94	57x	df[[vars$arm]] <- factor(df[[vars$arm]], levels = levels(estimates[[vars$arm]]))
95	57x	df[[vars$visit]] <- factor(df[[vars$visit]], levels = levels(estimates[[vars$visit]]))
96	57x	df$conf_level <- x$conf_level
97	57x	if (!is.null(x$mse)) {
98	57x	df$mse <- x$mse[df[[vars$visit]]]
99		}
100	57x	if (!is.null(x$df)) {
101	57x	df$df <- x$df[df[[vars$visit]]]
102		}
103	57x	df
104		}
105
106		#' @describeIn tabulate_lsmeans Statistics function which is extracting estimates from a tidied least-squares means
107		#' data frame.
108		#' @inheritParams proposal_argument_convention
109		#' @export
110		s_lsmeans <- function(df,
111		.in_ref_col,
112		alternative = c("two.sided", "less", "greater"),
113		show_relative = c("reduction", "increase")) {
114	85x	alternative <- match.arg(alternative)
115	85x	show_relative <- match.arg(show_relative)
116	85x	if_not_ref <- function(x) if (.in_ref_col) character() else x
117	85x	list(
118	85x	n = df$n,
119	85x	adj_mean_se = c(df$estimate_est, df$se_est),
120	85x	adj_mean_ci = with_label(
121	85x	c(df$lower_cl_est, df$upper_cl_est),
122	85x	paste0("Adjusted Mean ", f_conf_level(df$conf_level))
123		),
124	85x	adj_mean_est_ci = with_label(
125	85x	c(df$estimate_est, df$lower_cl_est, df$upper_cl_est),
126	85x	paste0("Adjusted Mean (", f_conf_level(df$conf_level), ")")
127		),
128	85x	diff_mean_se = if_not_ref(c(df$estimate_contr, df$se_contr)),
129	85x	diff_mean_ci = with_label(
130	85x	if_not_ref(c(df$lower_cl_contr, df$upper_cl_contr)),
131	85x	paste0("Difference in Adjusted Means ", f_conf_level(df$conf_level))
132		),
133	85x	diff_mean_est_ci = with_label(
134	85x	if_not_ref(c(df$estimate_contr, df$lower_cl_contr, df$upper_cl_contr)),
135	85x	paste0("Difference in Adjusted Means (", f_conf_level(df$conf_level), ")")
136		),
137	85x	change = switch(show_relative,
138	85x	reduction = with_label(if_not_ref(df$relative_reduc), "Relative Reduction (%)"),
139	85x	increase = with_label(if_not_ref(-df$relative_reduc), "Relative Increase (%)")
140		),
141	85x	p_value = if_not_ref(switch(alternative,
142	85x	two.sided = with_label(df$p_value, "2-sided p-value"),
143	85x	less = with_label(df$p_value_less, "1-sided p-value (less)"),
144	85x	greater = with_label(df$p_value_greater, "1-sided p-value (greater)")
145		))
146		)
147		}
148
149		#' @describeIn tabulate_lsmeans Formatted Analysis function to be used as `afun`
150		#' @export
151		a_lsmeans <- function(df,
152		ref_path,
153		.spl_context,
154		...,
155		.stats = NULL,
156		.formats = NULL,
157		.labels = NULL,
158		.indent_mods = NULL) {
159		# Check for additional parameters to the statistics function
160	30x	dots_extra_args <- list(...)
161
162		# Only support default stats, not custom stats
163	30x	.stats <- .split_std_from_custom_stats(.stats)$default_stats
164
165		# Obtain reference column information
166	30x	ref <- get_ref_info(ref_path, .spl_context)
167
168		# Apply statistics function
169	30x	x_stats <- .apply_stat_functions(
170	30x	default_stat_fnc = s_lsmeans,
171	30x	custom_stat_fnc_list = NULL,
172	30x	args_list = c(df = list(df), .in_ref_col = ref$in_ref_col, dots_extra_args)
173		)
174
175		# Format according to specifications
176	30x	format_stats(
177	30x	x_stats,
178	30x	method_groups = "tabulate_lsmeans",
179	30x	stats_in = .stats,
180	30x	formats_in = .formats,
181	30x	labels_in = .labels,
182	30x	indents_in = .indent_mods
183		)
184		}

1		#' @note: This has been forked from the `rbmi` package, mainly to support in addition
2		#' the pooling of variance estimates.
3		rbmi_pool <- function(
4		results,
5		conf.level = 0.95,
6		alternative = c("two.sided", "less", "greater"),
7		type = c("percentile", "normal")) {
8	!	rbmi::validate(results)
9
10	!	alternative <- match.arg(alternative)
11	!	type <- match.arg(type)
12
13	!	pool_type <- class(results$results)[[1]]
14	!	checkmate::assert_true(identical(pool_type, "rubin"))
15
16	!	results_transpose <- (utils::getFromNamespace("transpose_results", "rbmi"))(
17	!	results$results,
18	!	(utils::getFromNamespace("get_pool_components", "rbmi"))(pool_type)
19		)
20
21	!	pars <- lapply(
22	!	results_transpose,
23	!	function(x, ...) mod_pool_internal_rubin(x, ...),
24	!	conf.level = conf.level,
25	!	alternative = alternative,
26	!	type = type,
27	!	D = results$method$D
28		)
29
30	!	method <- pool_type
31
32	!	ret <- list(
33	!	pars = pars,
34	!	conf.level = conf.level,
35	!	alternative = alternative,
36	!	N = length(results$results),
37	!	method = method
38		)
39	!	class(ret) <- "pool"
40	!	return(ret)
41		}
42
43		mod_pool_internal_rubin <- function(results, conf.level, alternative, type, D) {
44	3x	ests <- results$est
45	3x	ses <- results$se
46	3x	dfs <- results$df
47	3x	alpha <- 1 - conf.level
48
49		# Note: Need to take median here, because in the MMRM case the d.f. will be slightly different for each imputed
50		# data set analysis.
51	3x	v_com <- stats::median(dfs)
52
53	3x	res_rubin <- (utils::getFromNamespace("rubin_rules", "rbmi"))(ests = ests, ses = ses, v_com = v_com)
54
55	3x	ret <- (utils::getFromNamespace("parametric_ci", "rbmi"))(
56	3x	point = res_rubin$est_point,
57	3x	se = sqrt(res_rubin$var_t),
58	3x	alpha = alpha,
59	3x	alternative = alternative,
60	3x	qfun = stats::qt,
61	3x	pfun = stats::pt,
62	3x	df = res_rubin$df
63		)
64		# Here also return the pooled d.f.:
65	3x	ret$df <- res_rubin$df
66
67	3x	return(ret)
68		}

1		#' rm_other_facets_fact
2		#' @param nm character. names of facets to keep. all other facets will be
3		#' removed
4		#' @returns a function suitable for use within the `post` portion make_split_fun
5
6		rm_other_facets_fact <- function(nm) {
7	1x	function(ret, spl, .spl_context, fulldf) {
8	1x	keep <- which(names(ret$values) %in% nm)
9	1x	stopifnot(length(keep) > 0)
10		# values already have subsetting expressions on them
11	1x	make_split_result(ret$values[keep], ret$datasplit[keep], ret$labels[keep])
12		}
13		}
14
15		#' @name real_add_overall_facet
16		#'
17		#' @title Add Overall Facet
18		#'
19		#' @description
20		#' A function to help add an overall facet to your tables
21		#' @param name character(1). Name/virtual 'value' for the new facet
22		#' @param label character(1). label for the new facet
23		#' @note current add_overall_facet is bugged, can use that directly after it's fixed
24		#' https://github.com/insightsengineering/rtables/issues/768
25		#' @examples
26		#'
27		#' splfun <- make_split_fun(post = list(real_add_overall_facet('Total', 'Total')))
28		#' @export
29		#' @returns function usable directly as a split function.
30		#'
31		real_add_overall_facet <- function(name, label) {
32	1x	function(ret, spl, .spl_context, fulldf) {
33	1x	add_to_split_result(
34	1x	ret,
35	1x	values = name,
36	1x	datasplit = stats::setNames(list(fulldf), name),
37	1x	labels = stats::setNames(label, name),
38	1x	subset_exprs = quote(TRUE)
39		)
40		}
41		}
42
43		#' @name make_combo_splitfun
44		#'
45		#' @title Split Function Helper
46		#'
47		#' @description
48		#' A function which aids the construction for users to create their own split function for combined columns
49		#' @param nm character(1). Name/virtual 'value' for the new facet
50		#' @param label character(1). label for the new facet
51		#' @param levels character or NULL. The levels to combine into the new facet,
52		#' or NULL, indicating the facet should include all incoming data.
53		#' @param rm_other_facets logical(1). Should facets other than the newly
54		#' created one be removed. Defaults to `TRUE`
55		#' @export
56		#' @returns function usable directly as a split function.
57		#' @examples
58		#' aesevall_spf <- make_combo_splitfun(nm = 'AESEV_ALL', label = 'Any AE', levels = NULL)
59		#'
60		make_combo_splitfun <- function(nm, label = nm, levels = NULL, rm_other_facets = TRUE) {
61	1x	if (is.null(levels)) {
62	!	fn <- real_add_overall_facet(name = nm, label = label)
63		} else {
64	1x	fn <- add_combo_facet(name = nm, label = label, levels = levels)
65		}
66	1x	if (rm_other_facets) {
67	1x	rmfun <- rm_other_facets_fact(nm)
68		} else {
69	!	rmfun <- NULL
70		}
71	1x	make_split_fun(post = c(list(fn), if (rm_other_facets) list(rmfun)))
72		}
73
74		blank_regex <- "^[[:space:]]*$"
75
76		combine_nonblank <- function(name, label) {
77	!	function(ret, spl, .spl_context, fulldf) {
78	!	df <- fulldf[!grepl(blank_regex, fulldf[[spl_variable(spl)]]), ]
79	!	add_to_split_result(
80	!	ret,
81	!	values = name,
82	!	datasplit = stats::setNames(list(df), name),
83	!	labels = stats::setNames(label, name)
84		)
85		}
86		}
87
88		rm_blank_levels <- function(df, spl, ...) {
89	!	var <- spl_variable(spl)
90	!	varvec <- df[[var]]
91	!	oldlevs <- levels(varvec)
92	!	newlevs <- oldlevs[!grepl(blank_regex, oldlevs)]
93	!	df <- df[!grepl(blank_regex, varvec), ]
94	!	df[[var]] <- factor(df[[var]], levels = newlevs)
95	!	df
96		}
97
98
99		find_torm <- function(spl_ret, torm, torm_regex, keep_matches) {
100	2x	if (!is.null(torm)) {
101	2x	ans_lgl <- names(spl_ret$datasplit) %in% torm
102		} else {
103	!	ans_lgl <- grepl(torm_regex, names(spl_ret$datasplit))
104		}
105	2x	if (keep_matches) {
106	!	ans_lgl <- !ans_lgl
107		}
108	2x	which(ans_lgl)
109		}
110
111		.check_rem_cond <- function(cond_str, cond_regex, spl_ctx, pos, type) {
112	4x	if (is.null(cond_str) && is.null(cond_regex)) {
113	!	return(TRUE)
114		}
115	4x	if (!is.null(cond_str) && !is.null(cond_regex)) {
116	!	stop(
117	!	"Got both ",
118	!	paste(paste0(type, c("", "_regex")), collapse = " and "),
119	!	". Please specify at most one of these."
120		)
121		}
122	4x	ctx_data <- spl_ctx[[type]][pos]
123	4x	if (!is.null(cond_str)) {
124	4x	any(cond_str %in% ctx_data)
125		} else {
126	!	any(grepl(cond_regex, ctx_data))
127		}
128		}
129
130		## handle support for NA and negative positions only called once but code is nicer with it factored out here
131		resolve_ancestor_pos <- function(anc_pos, numrows) {
132	2x	if (is.na(anc_pos)) {
133	2x	anc_pos <- seq_len(numrows)
134	!	} else if (any(anc_pos < 0)) {
135	!	if (!all(anc_pos < 0)) {
136	!	stop("Got mix of negative and non-negative values for ancestor_pos; ", "this is not supported.")
137		}
138		## direct parent is actually NROW(.spl_context) so avoid off-by-1 error with the + 1 here
139	!	anc_pos <- numrows - (anc_pos + 1)
140		}
141	2x	anc_pos
142		}
143
144		#' @name cond_rm_facets
145		#' @title Conditional Removal of Facets
146		#' @param facets character or NULL. Vector of facet names to be removed
147		#' if condition(s) are met
148		#' @param facets_regex character(1). Regular expression to identify facet
149		#' names to be removed if condition(s) are met.
150		#' @param ancestor_pos numeric(1). Row in spl_context to check the condition
151		#' within. E.g., 1 represents the first split, 2 represents the second split
152		#' nested within the first, etc. NA specifies that the conditions
153		#' should be checked at all split levels. Negative integers indicate position
154		#' counting back from the current one, e.g., -1 indicates the direct parent
155		#' (most recent split before this one). Negative and positive/NA positions
156		#' cannot be mixed.
157		#' @param split character(1) or NULL. If specified, name of the split
158		#' at position `ancestor_pos` must be identical to this value for
159		#' the removal condition to be met.
160		#' @param split_regex character(1) or NULL. If specified, a regular expression
161		#' the name of the split at position `ancestor_pos` must match for
162		#' the removal condition to be met. Cannot be specified at the same time
163		#' as `split`.
164		#' @param value character(1) or NULL. If specified, split (facet) value
165		#' at position `ancestor_pos` must be identical to this value for
166		#' removal condition to be met.
167		#' @param value_regex character(1) or NULL. If specified, a regular expression
168		#' the value of the split at position `ancestor_pos` must match for
169		#' the removal condition to be met. Cannot be specified at the same time
170		#' as `value`.
171		#' @param keep_matches logical(1). Given the specified condition is met,
172		#' should the facets removed be those matching `facets`/`facets_regex`
173		#' (`FALSE`, the default), or those not matching (`TRUE`).
174		#'
175		#' @details Facet removal occurs when the specified condition(s)
176		#' on the split(s) and or value(s) are met within at least one
177		#' of the split_context rows indicated by `ancestor_pos`; otherwise
178		#' the set of facets is returned unchanged.
179		#'
180		#' If facet removal is performed, either all facets which match `facets` (or
181		#' `facets_regex` will be removed ( the default `keep_matches == FALSE`
182		#' case), or all non-matching facets will be removed (when
183		#' `keep_matches_only == TRUE`).
184		#'
185		#' @note A degenerate table is likely to be returned if all facets
186		#' are removed.
187		#'
188		#' @returns a function suitable for use in `make_split_fun`'s
189		#' `post` argument which encodes the specified condition.
190		#' @export
191		#' @examples
192		#'
193		#' rm_a_from_placebo <- cond_rm_facets(
194		#' facets = "A",
195		#' ancestor_pos = NA,
196		#' value_regex = "Placeb",
197		#' split = "ARM"
198		#' )
199		#' mysplit <- make_split_fun(post = list(rm_a_from_placebo))
200		#'
201		#' lyt <- basic_table() \|>
202		#' split_cols_by("ARM") \|>
203		#' split_cols_by("STRATA1", split_fun = mysplit) \|>
204		#' analyze("AGE", mean, format = "xx.x")
205		#' build_table(lyt, ex_adsl)
206		#'
207		#' rm_bc_from_combo <- cond_rm_facets(
208		#' facets = c("B", "C"),
209		#' ancestor_pos = -1,
210		#' value_regex = "Combi"
211		#' )
212		#' mysplit2 <- make_split_fun(post = list(rm_bc_from_combo))
213		#'
214		#' lyt2 <- basic_table() \|>
215		#' split_cols_by("ARM") \|>
216		#' split_cols_by("STRATA1", split_fun = mysplit2) \|>
217		#' analyze("AGE", mean, format = "xx.x")
218		#' tbl2 <- build_table(lyt2, ex_adsl)
219		#' tbl2
220		#'
221		#' rm_bc_from_combo2 <- cond_rm_facets(
222		#' facets_regex = "^A$",
223		#' ancestor_pos = -1,
224		#' value_regex = "Combi",
225		#' keep_matches = TRUE
226		#' )
227		#' mysplit3 <- make_split_fun(post = list(rm_bc_from_combo2))
228		#'
229		#' lyt3 <- basic_table() \|>
230		#' split_cols_by("ARM") \|>
231		#' split_cols_by("STRATA1", split_fun = mysplit3) \|>
232		#' analyze("AGE", mean, format = "xx.x")
233		#' tbl3 <- build_table(lyt3, ex_adsl)
234		#'
235		#' stopifnot(identical(cell_values(tbl2), cell_values(tbl3)))
236		cond_rm_facets <- function(
237		facets = NULL,
238		facets_regex = NULL,
239		ancestor_pos = 1,
240		split = NULL,
241		split_regex = NULL,
242		value = NULL,
243		value_regex = NULL,
244		keep_matches = FALSE) {
245		## detect errors/miscalling which don't even require us to have the facets
246	4x	if (is.null(split) && is.null(split_regex) && is.null(value) && is.null(value_regex)) {
247	1x	stop(
248	1x	"Must specify condition in terms of at least one of ",
249	1x	"split name (split or split_regex) or ",
250	1x	"facet value (value or value_regex)."
251		)
252		}
253	3x	if (is.null(facets) && is.null(facets_regex)) {
254	1x	stop("Must specify facets either facets or facets_regex, got neither.")
255	2x	} else if (!is.null(facets) && !is.null(facets_regex)) {
256	1x	stop("Got both facets and facets_regex, this is not supported, please specify only one.")
257		}
258	1x	function(ret, spl, .spl_context, fulldf) {
259	2x	torm_ind <- c()
260	2x	ancestor_pos <- resolve_ancestor_pos(ancestor_pos, NROW(.spl_context))
261	2x	torm_ind <- find_torm(ret, facets, facets_regex, keep_matches = keep_matches)
262	2x	fct_abbrev <- ifelse(is.null(facets_regex), paste(facets, collapse = ", "), facets_regex)
263	2x	if (length(torm_ind) == 0) {
264		# nocov start
265	1x	warning(
266	1x	"No facets matched removal criteria [",
267	1x	fct_abbrev,
268		"] ",
269	1x	"in function created with cond_rm_facets.\n",
270	1x	"Occured at path: ",
271	1x	spl_context_to_disp_path(.spl_context),
272	1x	call. = FALSE
273		)
274		# nocov end
275	2x	} else if (length(torm_ind) == length(ret$datasplit)) {
276		# nocov start
277	1x	warning(
278	1x	"All facets matched removal criteria [",
279	1x	fct_abbrev,
280	1x	"] in function created with cond_rm_facets. ",
281	1x	"This will result in a degenerate table (if the condition ",
282	1x	"is met) within row splitting and in table-creation failing ",
283	1x	"entirely in column splitting.\n",
284	1x	"Occured at path: ",
285	1x	spl_context_to_disp_path(.spl_context),
286	1x	call. = FALSE
287		)
288		# nocov end
289		}
290	1x	if (
291	2x	.check_rem_cond(split, split_regex, .spl_context, ancestor_pos, type = "split") &&
292	2x	.check_rem_cond(value, value_regex, .spl_context, ancestor_pos, type = "value")
293		) {
294		## find_torm handles the keep matching case, so by this point torm_ind is always the ones to remove
295	1x	ret <- lapply(ret, function(part) part[-torm_ind])
296		}
297
298	2x	ret
299		}
300		}
301
302
303		#' @name rm_levels
304		#'
305		#' @title Removal of Levels
306		#'
307		#' @description
308		#' custom function for removing level inside pre step in make_split_fun.
309		#'
310		#' @param excl Choose which level(s) to remove
311		#' @return a function implementing pre-processing split behavior (for use in
312		#' `make_split_fun(pre = )` which removes the levels in `excl` from the data
313		#' before facets are generated.
314		#' @export
315		#'
316		rm_levels <- function(excl) {
317	2x	function(df, spl, ...) {
318	2x	var <- spl_variable(spl)
319	2x	varvec <- df[[var]]
320	2x	oldlevs <- levels(varvec)
321
322	2x	exclevs <- oldlevs %in% excl
323	2x	newlevs <- oldlevs[!exclevs]
324	2x	df[[var]] <- factor(df[[var]], levels = newlevs)
325	2x	df
326		}
327		}
328
329		#' Shortcut for Creating Custom Column Splits
330		#'
331		#' This is a short cut for a common use of [rtables::make_split_result()] where you need to create
332		#' custom column splits with different labels but using the same full dataset for each column.
333		#' It automatically sets up the values, datasplit (using the same full dataset for each column),
334		#' and subset_exprs (using TRUE for all subsets) parameters for make_split_result().
335		#'
336		#' @param ... sequence of named labels for the columns.
337		#' @param fulldf (`data.frame`)\cr the `fulldf` which will be used for each column.
338		#' @return The result from [rtables::make_split_result()].
339		#'
340		#' @keywords internal
341		short_split_result <- function(..., fulldf) {
342	37x	labels <- c(...)
343	37x	values <- stats::setNames(names(labels), names(labels))
344	37x	datasplit <- stats::setNames(replicate(n = length(labels), list(fulldf)), names(labels))
345	37x	subset_exprs <- replicate(n = length(labels), list(expression(TRUE)))
346	37x	make_split_result(values = values, labels = labels, datasplit = datasplit, subset_exprs = subset_exprs)
347		}

1		#' Relative risk estimation
2		#'
3		#' The analysis function [a_relative_risk()] is used to create a layout element
4		#' to estimate the relative risk for response within a studied population. Only
5		#' the CMH method is available currently.
6		#' The primary analysis variable, `vars`, is a logical variable indicating
7		#' whether a response has occurred for each record.
8		#' A stratification variable must be supplied via the
9		#' `strata` element of the `variables` argument.
10		#'
11		#' @details The variance of the CMH relative risk estimate is calculated using
12		#' the Greenland and Robins (1985) variance estimation.
13		#'
14		#' @param df (`data.frame`)\cr input data frame.
15		#' @param .var (`string`)\cr name of the response variable.
16		#' @param ref_path (`character`)\cr path to the reference group.
17		#' @param .spl_context (`environment`)\cr split context environment.
18		#' @param ... Additional arguments passed to the statistics function.
19		#' @param .stats (`character`)\cr statistics to calculate.
20		#' @param .formats (`list`)\cr formats for the statistics.
21		#' @param .labels (`list`)\cr labels for the statistics.
22		#' @param .indent_mods (`list`)\cr indentation modifications for the statistics.
23		#' @param .ref_group (`data.frame`)\cr reference group data frame.
24		#' @param .in_ref_col (`logical`)\cr whether the current column is the reference column.
25		#' @param variables (`list`)\cr list with strata variable names.
26		#' @param conf_level (`numeric`)\cr confidence level for the confidence interval.
27		#' @param method (`string`)\cr method to use for relative risk calculation.
28		#' @param weights_method (`string`)\cr method to use for weights calculation in stratified analysis.
29		#'
30		#' @note This has been adapted from the `odds_ratio` functions in the `tern` package.
31		#'
32		#' @name relative_risk
33		NULL
34
35		#' @describeIn relative_risk Statistics function estimating the relative risk for response.
36		#'
37		#' @return
38		#' * `s_relative_risk()` returns a named list of elements `rel_risk_ci` and `pval`.
39		#'
40		#' @examples
41		#' nex <- 100
42		#' dta <- data.frame(
43		#' "rsp" = sample(c(TRUE, FALSE), nex, TRUE),
44		#' "grp" = sample(c("A", "B"), nex, TRUE),
45		#' "f1" = sample(c("a1", "a2"), nex, TRUE),
46		#' "f2" = sample(c("x", "y", "z"), nex, TRUE),
47		#' stringsAsFactors = TRUE
48		#' )
49		#'
50		#' s_relative_risk(
51		#' df = subset(dta, grp == "A"),
52		#' .var = "rsp",
53		#' .ref_group = subset(dta, grp == "B"),
54		#' .in_ref_col = FALSE,
55		#' variables = list(strata = c("f1", "f2")),
56		#' conf_level = 0.90
57		#' )
58		#' @export
59		s_relative_risk <- function(
60		df,
61		.var,
62		.ref_group,
63		.in_ref_col,
64		variables = list(strata = NULL),
65		conf_level = 0.95,
66		method = "cmh",
67		weights_method = "cmh") {
68	3x	method <- match.arg(method)
69	3x	weights_method <- match.arg(weights_method)
70	3x	checkmate::assert_character(variables$strata, null.ok = FALSE)
71	3x	y <- list(rel_risk_ci = list(), pval = list())
72
73	3x	if (!.in_ref_col) {
74	2x	rsp <- c(.ref_group[[.var]], df[[.var]])
75	2x	grp <- factor(rep(c("ref", "Not-ref"), c(nrow(.ref_group), nrow(df))), levels = c("ref", "Not-ref"))
76
77	2x	strata_colnames <- variables$strata
78	2x	checkmate::assert_character(strata_colnames, null.ok = FALSE)
79	2x	strata_vars <- stats::setNames(as.list(strata_colnames), strata_colnames)
80
81	2x	assert_df_with_variables(df, strata_vars)
82	2x	assert_df_with_variables(.ref_group, strata_vars)
83
84		# Merging interaction strata for reference group rows data and remaining
85	2x	strata <- c(interaction(.ref_group[strata_colnames]), interaction(df[strata_colnames]))
86	2x	strata <- as.factor(strata)
87
88	2x	y <- prop_ratio_cmh(rsp, grp, strata, conf_level)[c("rel_risk_ci", "pval")]
89
90	2x	one_group_no_events <- (sum(.ref_group[[.var]]) == 0) \|\| (sum(df[[.var]]) == 0)
91	2x	if (one_group_no_events) {
92	1x	y$rel_risk_ci <- c(est = NA_real_, lcl = 0, ucl = Inf)
93		}
94		}
95
96	3x	y$rel_risk_ci <- with_label(
97	3x	x = y$rel_risk_ci,
98	3x	label = paste0("Relative risk (", f_conf_level(conf_level), ")")
99		)
100	3x	y$pval <- unname(y$pval)
101
102	3x	y
103		}
104
105		#' @describeIn relative_risk Formatted analysis function which is used as `afun`. Note that the
106		#' junco specific `ref_path` and `.spl_context` arguments are used for reference column information.
107		#'
108		#' @return
109		#' * `a_relative_risk()` returns the corresponding list with formatted [rtables::CellValue()].
110		#'
111		#' @examples
112		#' nex <- 100
113		#' dta <- data.frame(
114		#' "rsp" = sample(c(TRUE, FALSE), nex, TRUE),
115		#' "grp" = sample(c("A", "B"), nex, TRUE),
116		#' "f1" = sample(c("a1", "a2"), nex, TRUE),
117		#' "f2" = sample(c("x", "y", "z"), nex, TRUE),
118		#' stringsAsFactors = TRUE
119		#' )
120		#'
121		#' l <- basic_table() \|>
122		#' split_cols_by(var = "grp") \|>
123		#' analyze(
124		#' vars = "rsp",
125		#' afun = a_relative_risk,
126		#' extra_args = list(
127		#' conf_level = 0.90,
128		#' variables = list(strata = "f1"),
129		#' ref_path = c("grp", "B")
130		#' )
131		#' )
132		#'
133		#' build_table(l, df = dta)
134		#' @export
135		#' @order 2
136		a_relative_risk <- function(
137		df,
138		.var,
139		ref_path,
140		.spl_context,
141		...,
142		.stats = NULL,
143		.formats = NULL,
144		.labels = NULL,
145		.indent_mods = NULL) {
146		# Check for additional parameters to the statistics function
147	2x	dots_extra_args <- list(...)
148
149		# Only support default stats, not custom stats
150	2x	.stats <- .split_std_from_custom_stats(.stats)$default_stats
151
152		# Obtain reference column information
153	2x	ref <- get_ref_info(ref_path, .spl_context)
154
155		# Apply statistics function
156	2x	x_stats <- .apply_stat_functions(
157	2x	default_stat_fnc = s_relative_risk,
158	2x	custom_stat_fnc_list = NULL,
159	2x	args_list = c(
160	2x	df = list(df),
161	2x	.var = .var,
162	2x	.ref_group = list(ref$ref_group),
163	2x	.in_ref_col = ref$in_ref_col,
164	2x	dots_extra_args
165		)
166		)
167
168		# Format according to specifications
169	2x	format_stats(
170	2x	x_stats,
171	2x	method_groups = "relative_risk",
172	2x	stats_in = .stats,
173	2x	formats_in = .formats,
174	2x	labels_in = .labels,
175	2x	indents_in = .indent_mods
176		)
177		}
178
179
180		#' @keywords internal
181		safe_mh_test <- function(...) {
182	4x	tryCatch(
183	4x	stats::mantelhaen.test(...),
184	4x	error = function(e) list(p.value = NA_real_)
185		)
186		}
187
188		#' Relative Risk CMH Statistic
189		#'
190		#' Calculates the relative risk which is defined as the ratio between the
191		#' response rates between the experimental treatment group and the control treatment group, adjusted
192		#' for stratification factors by applying Cochran-Mantel-Haenszel (CMH) weights.
193		#'
194		#' @inheritParams proposal_argument_convention
195		#' @param strata (`factor`)\cr variable with one level per stratum and same length as `rsp`.
196		#'
197		#' @return a list with elements `rel_risk_ci` and `pval`.
198		#' @examples
199		#'
200		#' set.seed(2)
201		#' rsp <- sample(c(TRUE, FALSE), 100, TRUE)
202		#' grp <- sample(c("Placebo", "Treatment"), 100, TRUE)
203		#' grp <- factor(grp, levels = c("Placebo", "Treatment"))
204		#' strata_data <- data.frame(
205		#' "f1" = sample(c("a", "b"), 100, TRUE),
206		#' "f2" = sample(c("x", "y", "z"), 100, TRUE),
207		#' stringsAsFactors = TRUE
208		#' )
209		#'
210		#' prop_ratio_cmh(
211		#' rsp = rsp, grp = grp, strata = interaction(strata_data),
212		#' conf_level = 0.90
213		#' )
214		#'
215		#' @export
216		prop_ratio_cmh <- function(rsp, grp, strata, conf_level = 0.95) {
217	4x	grp <- as_factor_keep_attributes(grp)
218	4x	strata <- as_factor_keep_attributes(strata)
219	4x	has_single_stratum <- nlevels(strata) == 1
220	4x	check_diff_prop_ci(
221	4x	rsp = rsp,
222	4x	grp = grp,
223	4x	conf_level = conf_level,
224	4x	strata = strata
225		)
226
227	4x	if (any(tapply(rsp, strata, length) < 5)) {
228	!	warning("Less than 5 observations in some strata.")
229		}
230
231		# first dimension: treatment, control - therefore we reverse the levels order 2nd dimension: TRUE, FALSE 3rd
232		# dimension: levels of strata rsp as factor rsp to handle edge case of no FALSE (or TRUE) rsp records
233	4x	t_tbl <- table(factor(grp, levels = rev(levels(grp))), factor(rsp, levels = c("TRUE", "FALSE")), strata)
234
235		# Index 1 is for treatment, index 2 is for control, e.g. n1 is number of treatment patients per stratum.
236	4x	n1resp <- t_tbl[1, 1, levels(strata)]
237	4x	n2resp <- t_tbl[2, 1, levels(strata)]
238
239	4x	t1 <- t_tbl[1, 1:2, , drop = TRUE]
240	4x	t2 <- t_tbl[2, 1:2, , drop = TRUE]
241	4x	if (has_single_stratum) {
242	!	n1 <- sum(t1)
243	!	n2 <- sum(t2)
244	!	checkmate::assert_count(n1)
245	!	checkmate::assert_count(n2)
246		} else {
247	4x	n1 <- colSums(t1)
248	4x	n2 <- colSums(t2)
249	4x	checkmate::assert_integerish(n1)
250	4x	checkmate::assert_integerish(n2)
251	4x	checkmate::assert_true(identical(length(n1), length(n2)))
252		}
253
254		# CMH statistic for relative risk, Treatment over Control
255	4x	use_stratum <- (n1 > 0) & (n2 > 0)
256	4x	n1 <- n1[use_stratum]
257	4x	n2 <- n2[use_stratum]
258	4x	n1resp <- n1resp[use_stratum]
259	4x	n2resp <- n2resp[use_stratum]
260	4x	nresp <- n1resp + n2resp
261	4x	n <- n1 + n2
262
263	4x	rel_risk_num <- sum(n1resp * n2 / n)
264	4x	rel_risk_denom <- sum(n2resp * n1 / n)
265	4x	log_rel_risk <- log(rel_risk_num) - log(rel_risk_denom)
266	4x	rel_risk <- exp(log_rel_risk)
267
268	4x	var_num <- sum((n1 * n2 * nresp - n1resp * n2resp * n) / n^2)
269	4x	var_denom <- rel_risk_num * rel_risk_denom
270	4x	var_log_rel_risk <- var_num / var_denom
271	4x	se_log_rel_risk <- sqrt(var_log_rel_risk)
272
273	4x	z <- stats::qnorm((1 + conf_level) / 2)
274	4x	log_rel_risk_ci <- log_rel_risk + c(-1, +1) * z * se_log_rel_risk
275	4x	ci <- exp(log_rel_risk_ci)
276	4x	rel_risk_ci <- stats::setNames(c(rel_risk, ci), c("est", "lcl", "ucl"))
277
278	4x	pval <- safe_mh_test(t_tbl, correct = FALSE)$p.value
279
280	4x	list(rel_risk_ci = rel_risk_ci, pval = pval)
281		}

1		#' @name safe_prune_table
2		#'
3		#' @title Safely Prune Table With Empty Table Message If Needed
4		#'
5		#' @inheritParams rtables::prune_table
6		#'
7		#' @param empty_msg character(1). The message to place in the table
8		#' if no rows were left after pruning
9		#'
10		#' @param spancols logical(1). Should `empty_msg` be spanned
11		#' across the table's columns (`TRUE`) or placed in the
12		#' rows row label (`FALSE`). Defaults to `FALSE` currently.
13		#'
14		#' @rdname safe_prune_table
15		#' @return `tt` pruned based on the arguments, or, if
16		#' pruning would remove all rows, a TableTree with the
17		#' same column structure, and one row containing the
18		#' empty message spanning all columns
19		#'
20		#' @export
21		#' @examples
22		#' prfun <- function(tt) TRUE
23		#'
24		#' lyt <- basic_table() \|>
25		#' split_cols_by("ARM") \|>
26		#' split_cols_by("STRATA1") \|>
27		#' split_rows_by("SEX") \|>
28		#' analyze("AGE")
29		#' tbl <- build_table(lyt, ex_adsl)
30		#'
31		#' safe_prune_table(tbl, prfun)
32		safe_prune_table <- function(
33		tt,
34		prune_func = prune_empty_level,
35		stop_depth = NA,
36		empty_msg = " - No Data To Display - ",
37		spancols = FALSE) {
38	6x	ret <- prune_table(tt = tt, prune_func = prune_func, stop_depth = stop_depth, depth = 0)
39	6x	if (is.null(ret)) {
40	3x	ret <- tt[integer(), , keep_titles = TRUE, keep_topleft = TRUE, keep_footers = TRUE]
41
42	3x	if (spancols) {
43		## this will eventually be the only option...
44	1x	ret <- sanitize_table_struct(ret, empty_msg = empty_msg)
45		} else {
46	2x	tree_children(ret) <- list(rrowl(empty_msg, rep(" ", ncol(tt))))
47		}
48		}
49	6x	ret
50		}
51
52		#' @name count_pruner
53		#'
54		#' @title Count Pruner
55		#'
56		#' @description
57		#' This is a pruning constructor function which identifies records to be pruned
58		#' based on the count (assumed to be the first statistic displayed when a compound
59		#' statistic (e.g., ## / ## (XX.X percent) is presented).
60		#'
61		#' @param count count threshold. Function will keep all records strictly greater
62		#' than this threshold.
63		#' @param cols column path (character or integer (column indices))
64		#' @param cat_include Category to be considered for pruning
65		#' @param cat_exclude logical Category to be excluded from pruning
66		#' @export
67		#'
68		#'
69		#' @examples
70		#'
71		#' ADSL <- data.frame(
72		#' USUBJID = c(
73		#' "XXXXX01", "XXXXX02", "XXXXX03", "XXXXX04", "XXXXX05",
74		#' "XXXXX06", "XXXXX07", "XXXXX08", "XXXXX09", "XXXXX10"
75		#' ),
76		#' TRT01P = factor(
77		#' c(
78		#' "ARMA", "ARMB", "ARMA", "ARMB", "ARMB",
79		#' "Placebo", "Placebo", "Placebo", "ARMA", "ARMB"
80		#' )
81		#' ),
82		#' FASFL = c("Y", "Y", "Y", "Y", "N", "Y", "Y", "Y", "Y", "Y"),
83		#' SAFFL = c("N", "N", "N", "N", "N", "N", "N", "N", "N", "N"),
84		#' PKFL = c("N", "N", "N", "N", "N", "N", "N", "N", "N", "N")
85		#' )
86		#'
87		#' lyt <- basic_table() \|>
88		#' split_cols_by("TRT01P") \|>
89		#' add_overall_col("Total") \|>
90		#' analyze("FASFL",
91		#' var_labels = "Analysis set:",
92		#' afun = a_freq_j,
93		#' extra_args = list(label = "Full", val = "Y"),
94		#' show_labels = "visible"
95		#' ) \|>
96		#' analyze("SAFFL",
97		#' var_labels = "Analysis set:",
98		#' afun = a_freq_j,
99		#' extra_args = list(label = "Safety", val = "Y"),
100		#' show_labels = "visible"
101		#' ) \|>
102		#' analyze("PKFL",
103		#' var_labels = "Analysis set:",
104		#' afun = a_freq_j,
105		#' extra_args = list(label = "PK", val = "Y"),
106		#' show_labels = "visible"
107		#' )
108		#'
109		#' result <- build_table(lyt, ADSL)
110		#'
111		#' result
112		#'
113		#' result <- prune_table(
114		#' result,
115		#' prune_func = count_pruner(cat_exclude = c("Safety"), cols = "Total")
116		#' )
117		#'
118		#' result
119		#'
120		#' @rdname count_pruner
121		#' @returns function that can be utilized as pruning function in prune_table
122		#'
123		count_pruner <- function(count = 0, cat_include = NULL, cat_exclude = NULL, cols = c("TRT01A")) {
124	6x	function(tt) {
125		# Do not ever prune the following rows. a row that should be kept in the table will get the value of FALSE
126
127	6x	if ( # nolint start
128	1089x	!methods::is(tt, "TableRow") \|\|
129	1089x	methods::is(tt, "LabelRow") \|\|
130	1089x	obj_label(tt) == " " \|\|
131	1089x	(!is.null(cat_include) &&
132	1089x	!obj_label(tt) %in% cat_include) \|\|
133	1089x	(!is.null(cat_exclude) && obj_label(tt) %in% cat_exclude)
134	6x	) { # nolint end
135	655x	return(FALSE)
136		}
137
138		# Check the remaining rows to see if any meet the specified threshold.
139
140	434x	if (!is.null(cols)) {
141	434x	tt <- subset_cols(tt, cols)
142		}
143
144		# init return value to FALSE (not remove row)
145	434x	remove <- FALSE
146
147	434x	colpaths <- col_paths(tt)
148		# identify non relative risk columns as in rrisk columns first element is not a count, but diff percentage with
149		# small total columns and count > 0 (eg count = 1, n per group = 10), you can run into count1 = 1, countpbo =
150		# 0, diffpct = 10, this row should not be considered though
151	434x	cp_nonrelrisk <- vapply(
152	434x	colpaths,
153	434x	function(pth) {
154	1308x	!any(grepl("rrisk", tolower(pth)))
155		},
156	434x	FUN.VALUE = TRUE
157		)
158
159		## only continue if at least one non relative risk column selected
160	434x	if (any(cp_nonrelrisk)) {
161		# get cell_values of non rel risk columns
162	434x	cell_vals <- cell_values(tt)[cp_nonrelrisk]
163
164	434x	len_cell_vals <- lapply(cell_vals, function(x) {
165	1302x	length(x)
166		})
167
168	434x	if (any(len_cell_vals == 0)) {
169	!	stop("column cell values has not appropriate structure (a column with NULL value).")
170		}
171
172		# get count (first element)
173	434x	counts <- unlist(
174	434x	lapply(cell_vals, function(x) {
175	1302x	x[[1]]
176		}),
177	434x	use.names = FALSE
178		)
179
180		# check that we do have counts only
181	434x	checkmate::assert_integerish(counts)
182
183	434x	keep <- counts > count
184		# the row should be kept if at least one column has count above threshold
185	434x	keep <- any(keep)
186
187		# pruning function should return TRUE if row has to be removed, ie opposite of keep
188	434x	remove <- !keep
189		}
190
191	434x	return(remove)
192		}
193		}
194
195
196		#' @name bspt_pruner
197		#'
198		#' @title Pruning Function for pruning based on a fraction and/or a difference from the control arm
199		#'
200		#' @description
201		#' This is a pruning constructor function which identifies records to be pruned
202		#' based on the the fraction from the percentages. In addition to just looking at a fraction within an arm
203		#' this function also allows further flexibility to also prune based on a comparison versus the control arm.
204		#' @param fraction fraction threshold. Function will keep all records strictly greater
205		#' than this threshold.
206		#' @param cols column path.
207		#' @param keeprowtext Row to be excluded from pruning.
208		#' @param reg_expr Apply keeprowtext as a regular expression (grepl with fixed = TRUE)
209		#' @param control Control Group
210		#' @param diff_from_control Difference from control threshold.
211		#' @param only_more_often TRUE: Only consider when column pct is more often
212		#' than control. FALSE: Also select a row where column pct is less often than
213		#' control and abs(diff) above threshold
214		#'
215		#' @export
216		#'
217		#'
218		#' @examples
219		#' ADSL <- data.frame(
220		#' USUBJID = c(
221		#' "XXXXX01", "XXXXX02", "XXXXX03", "XXXXX04", "XXXXX05",
222		#' "XXXXX06", "XXXXX07", "XXXXX08", "XXXXX09", "XXXXX10"
223		#' ),
224		#' TRT01P = c(
225		#' "ARMA", "ARMB", "ARMA", "ARMB", "ARMB",
226		#' "Placebo", "Placebo", "Placebo", "ARMA", "ARMB"
227		#' ),
228		#' FASFL = c("Y", "Y", "Y", "Y", "N", "Y", "Y", "Y", "Y", "Y"),
229		#' SAFFL = c("N", "N", "N", "N", "N", "N", "N", "N", "N", "N"),
230		#' PKFL = c("N", "N", "N", "N", "N", "N", "N", "N", "N", "N")
231		#' )
232		#'
233		#' ADSL <- ADSL \|>
234		#' dplyr::mutate(TRT01P = as.factor(TRT01P)) \|>
235		#' dplyr::mutate(SAFFL = factor(SAFFL, c("Y", "N"))) \|>
236		#' dplyr::mutate(PKFL = factor(PKFL, c("Y", "N")))
237		#'
238		#' lyt <- basic_table() \|>
239		#' split_cols_by("TRT01P") \|>
240		#' add_overall_col("Total") \|>
241		#' split_rows_by(
242		#' "FASFL",
243		#' split_fun = drop_and_remove_levels("N"),
244		#' child_labels = "hidden"
245		#' ) \|>
246		#' analyze("FASFL",
247		#' var_labels = "Analysis set:",
248		#' afun = a_freq_j,
249		#' show_labels = "visible",
250		#' extra_args = list(label = "Full", .stats = "count_unique_fraction")
251		#' ) \|>
252		#' split_rows_by(
253		#' "SAFFL",
254		#' split_fun = remove_split_levels("N"),
255		#' child_labels = "hidden"
256		#' ) \|>
257		#' analyze("SAFFL",
258		#' var_labels = "Analysis set:",
259		#' afun = a_freq_j,
260		#' show_labels = "visible",
261		#' extra_args = list(label = "Safety", .stats = "count_unique_fraction")
262		#' ) \|>
263		#' split_rows_by(
264		#' "PKFL",
265		#' split_fun = remove_split_levels("N"),
266		#' child_labels = "hidden"
267		#' ) \|>
268		#' analyze("PKFL",
269		#' var_labels = "Analysis set:",
270		#' afun = a_freq_j,
271		#' show_labels = "visible",
272		#' extra_args = list(label = "PK", .stats = "count_unique_fraction")
273		#' )
274		#'
275		#' result <- build_table(lyt, ADSL)
276		#'
277		#' result
278		#'
279		#' result <- prune_table(
280		#' result,
281		#' prune_func = bspt_pruner(
282		#' fraction = 0.05,
283		#' keeprowtext = "Safety",
284		#' cols = c("Total")
285		#' )
286		#' )
287		#'
288		#' result
289		#' @rdname bspt_pruner
290		#' @returns function that can be utilized as pruning function in prune_table
291		#'
292		bspt_pruner <- function(
293		fraction = 0.05,
294		keeprowtext = "Analysis set: Safety",
295		reg_expr = FALSE,
296		control = NULL,
297		diff_from_control = NULL,
298		only_more_often = TRUE,
299		cols = c("TRT01A")) {
300	12x	if (is.null(fraction) && is.null(diff_from_control)) {
301	1x	stop("At least one of fraction or diff_from_control must be non-NULL.")
302		}
303	11x	if (!is.null(diff_from_control) && is.null(control)) {
304	1x	stop("control must be specified when diff_from_control is not NULL.")
305		}
306
307	10x	function(tt) {
308		# Do not ever prune the following rows.
309	137x	if (!methods::is(tt, "TableRow") \|\| methods::is(tt, "LabelRow")) {
310	24x	return(FALSE)
311		}
312
313	10x	if (
314	113x	reg_expr &&
315	113x	any(sapply(keeprowtext, function(x) {
316	!	grepl(x, obj_label(tt), fixed = TRUE)
317		}))
318		) {
319	!	return(FALSE)
320		}
321	113x	if (!reg_expr && obj_label(tt) %in% keeprowtext) {
322	10x	return(FALSE)
323		}
324
325		# init return value to FALSE (not remove row)
326	103x	remove <- FALSE
327
328		# needed later for control column
329	103x	tt_all_cols <- tt
330
331	103x	if (!is.null(cols)) {
332	103x	tt <- subset_cols(tt, cols)
333		}
334
335	103x	colpaths <- col_paths(tt)
336		# identify non relative risk columns
337	103x	cp_nonrelrisk <- vapply(
338	103x	colpaths,
339	103x	function(pth) {
340	329x	!any(grepl("rrisk", tolower(pth)))
341		},
342	103x	FUN.VALUE = TRUE
343		)
344
345		## only continue if at least one non relative risk column selected
346	103x	if (any(cp_nonrelrisk)) {
347		# get rid of rel risk columns (if present)
348	103x	cell_vals <- cell_values(tt)[cp_nonrelrisk]
349
350	103x	len_cell_vals <- lapply(cell_vals, function(x) {
351	296x	length(x)
352		})
353
354	103x	if (any(len_cell_vals < 2)) {
355	!	stop("column cell values has not appropriate structure (less than 2 values in cell).")
356		}
357
358		# get count and percentage columns counts in first col pcts in second col
359	103x	counts <- unname(lst_slicer(cell_vals, 1, numeric(1)))
360	103x	pcts <- unname(lst_slicer(cell_vals, 2, numeric(1)))
361
362	103x	checkmate::check_integerish(counts)
363		# similarly check that pcts is indeed a percentage
364	103x	if (!all(dplyr::between(pcts, 0, 1))) {
365	!	stop("second value column cell is not a percentage.")
366		}
367
368	103x	if (!is.null(fraction)) {
369		# avoid problems with FALSE for 0.05 >= 0.05
370	103x	check_pcts <- pcts >= fraction \|
371	103x	vapply(
372	103x	pcts,
373	103x	function(x) {
374	296x	isTRUE(all.equal(x, fraction))
375		},
376	103x	TRUE
377		)
378		}
379
380	103x	if (!is.null(diff_from_control)) {
381		# get control group from all columns tt (in case the columns selected would not include the control
382		# group)
383	78x	colpaths0 <- col_paths(subset_cols(tt_all_cols, c(control)))
384	78x	cp0_nonrelrisk <- vapply(
385	78x	colpaths0,
386	78x	function(pth) {
387	83x	!any(grepl("rrisk", tolower(pth)))
388		},
389	78x	FUN.VALUE = TRUE
390		)
391	78x	colpaths0 <- colpaths0[cp0_nonrelrisk]
392
393	78x	if (length(colpaths0) != 1) {
394	1x	stop("control group spec does not result in single column.")
395		} else {
396	77x	colpaths0 <- colpaths0[[1]]
397		# get percent from control group
398	77x	pct0 <- cell_values(tt_all_cols, colpath = c(control))[[1]][2]
399
400	77x	if (!dplyr::between(pct0, 0, 1)) {
401	!	stop("second value of control column cell is not a percentage.")
402		}
403		}
404
405		# check if control group column was also present in main column selector
406	77x	colid0 <- which(sapply(colpaths, function(x) {
407	248x	all(x == colpaths0)
408		}))
409
410	77x	pctdiffs <- pcts - pct0
411
412		# exclude control column from this (would only be relevant if we want to check criteria on all columns
413		# rather than any column)
414	66x	if (!identical(colid0, integer(0))) pctdiffs <- pctdiffs[-colid0]
415
416		# if not only_more_often be equally strict on worse from control, better as control ie also select rows
417		# where column is less often than control pct = 0.07 pct0 = 0.10 (diff = -0.03 )
418	22x	if (!only_more_often) pctdiffs <- abs(pctdiffs)
419
420		# avoid problems with FALSE for 0.02 >= 0.02
421	77x	check_diffpcts <- pctdiffs >= diff_from_control \|
422	77x	vapply(
423	77x	pctdiffs,
424	77x	function(x) {
425	161x	isTRUE(all.equal(x, diff_from_control))
426		},
427	77x	TRUE
428		)
429		}
430
431		# final step to check if row should be kept in table
432	102x	if (!is.null(fraction) && is.null(diff_from_control)) {
433		# keep row in table if any column pct above threshold
434	25x	keep <- any(check_pcts)
435	77x	} else if ((!is.null(fraction) && !is.null(diff_from_control))) {
436		# if any column pct above threshold AND any diff pct above threshold note any is used in both
437		# expressions separately situation pct = 0.04 pct0 = 0.06 should be kept (fraction = 0.05, abs(diff
438		# control) = 0.02) this problem only arises in the non-default situation when only_more_often is FALSE
439		# (ie when column pct is less often than control) this would not be the case if we would any(check_pcts
440		# & check_diffpcts) as 0.04 is below fraction
441	77x	keep <- any(check_pcts) && any(check_diffpcts)
442	!	} else if ((is.null(fraction) && !is.null(diff_from_control))) {
443	!	keep <- any(check_diffpcts)
444		}
445
446		# pruning function should return TRUE if row has to be removed, ie opposite of keep
447	102x	remove <- !keep
448		}
449
450	102x	return(remove)
451		}
452		}
453
454		lst_slicer <- function(lst, ind, type) {
455	206x	vapply(lst, `[[`, i = ind, type)
456		}
457
458		#' @name remove_rows
459		#'
460		#' @title
461		#' Pruning function to remove specific rows of a table regardless of counts
462		#' @description
463		#' This function will remove all rows of a table based on the row text
464		#' provided by the user.
465		#' @param removerowtext define a text string for which any row with row text will be removed.
466		#' @param reg_expr Apply removerowtext as a regular expression (grepl with fixed = TRUE)
467		#' @export
468		#' @rdname remove_rows
469		#'
470		#'
471		#' @examples
472		#' ADSL <- data.frame(
473		#' USUBJID = c(
474		#' "XXXXX01", "XXXXX02", "XXXXX03", "XXXXX04", "XXXXX05",
475		#' "XXXXX06", "XXXXX07", "XXXXX08", "XXXXX09", "XXXXX10"
476		#' ),
477		#' TRT01P = c(
478		#' "ARMA", "ARMB", "ARMA", "ARMB", "ARMB", "Placebo",
479		#' "Placebo", "Placebo", "ARMA", "ARMB"
480		#' ),
481		#' Category = c(
482		#' "Cat 1", "Cat 2", "Cat 1", "Unknown", "Cat 2",
483		#' "Cat 1", "Unknown", "Cat 1", "Cat 2", "Cat 1"
484		#' ),
485		#' SAFFL = c("N", "N", "N", "N", "N", "N", "N", "N", "N", "N"),
486		#' PKFL = c("N", "N", "N", "N", "N", "N", "N", "N", "N", "N")
487		#' )
488		#'
489		#' ADSL <- ADSL \|>
490		#' dplyr::mutate(TRT01P = as.factor(TRT01P))
491		#'
492		#' lyt <- basic_table() \|>
493		#' split_cols_by("TRT01P") \|>
494		#' analyze(
495		#' "Category",
496		#' afun = a_freq_j,
497		#' extra_args = list(.stats = "count_unique_fraction")
498		#' )
499		#'
500		#' result <- build_table(lyt, ADSL)
501		#'
502		#' result
503		#'
504		#' result <- prune_table(result, prune_func = remove_rows(removerowtext = "Unknown"))
505		#'
506		#' result
507		#' @aliases remove_rows
508		#' @returns function that can be utilized as pruning function in prune_table
509		#'
510		remove_rows <- function(removerowtext = NULL, reg_expr = FALSE) {
511	5x	function(tt) {
512	97x	if (!methods::is(tt, "TableRow") \|\| methods::is(tt, "LabelRow")) {
513	5x	return(FALSE)
514		}
515
516	92x	ret <- FALSE
517	92x	if (!is.null(removerowtext)) {
518	72x	if (!reg_expr) {
519	37x	ret <- obj_label(tt) %in% removerowtext
520		} else {
521	35x	ret <- any(sapply(removerowtext, function(x) {
522	52x	grepl(x, obj_label(tt), fixed = TRUE)
523		}))
524		}
525		}
526
527	92x	ret
528		}
529		}
530
531		#' @name keep_non_null_rows
532		#'
533		#' @title Pruning Function to accommodate removal of completely NULL rows within a table
534		#'
535		#' @description
536		#' Condition function on individual analysis rows. Flag as FALSE when all
537		#' columns are NULL, as then the row should not be kept. To be utilized as a
538		#' row_condition in function tern::keep_rows
539		#'
540		#' @param tr table tree object
541		#' @export
542		#'
543		#'
544		#' @examples
545		#'
546		#' library(dplyr)
547		#'
548		#' ADSL <- data.frame(
549		#' USUBJID = c(
550		#' "XXXXX01", "XXXXX02", "XXXXX03", "XXXXX04", "XXXXX05",
551		#' "XXXXX06", "XXXXX07", "XXXXX08", "XXXXX09", "XXXXX10"
552		#' ),
553		#' TRT01P = c(
554		#' "ARMA", "ARMB", "ARMA", "ARMB", "ARMB", "Placebo",
555		#' "Placebo", "Placebo", "ARMA", "ARMB"
556		#' ),
557		#' AGE = c(34, 56, 75, 81, 45, 75, 48, 19, 32, 31),
558		#' SAFFL = c("N", "N", "N", "N", "N", "N", "N", "N", "N", "N"),
559		#' PKFL = c("N", "N", "N", "N", "N", "N", "N", "N", "N", "N")
560		#' )
561		#'
562		#' ADSL <- ADSL \|>
563		#' mutate(TRT01P = as.factor(TRT01P))
564		#'
565		#' create_blank_line <- function(x) {
566		#' list(
567		#' "Mean" = rcell(mean(x), format = "xx.x"),
568		#' " " = rcell(NULL),
569		#' "Max" = rcell(max(x))
570		#' )
571		#' }
572		#'
573		#' lyt <- basic_table() \|>
574		#' split_cols_by("TRT01P") \|>
575		#' analyze("AGE", afun = create_blank_line)
576		#'
577		#' result <- build_table(lyt, ADSL)
578		#'
579		#' result
580		#' result <- prune_table(result, prune_func = tern::keep_rows(keep_non_null_rows))
581		#'
582		#' result
583		#' @rdname keep_non_null_rows
584		#' @returns a function that can be utilized as a row_condition in the tern::keep_rows function
585		#'
586		keep_non_null_rows <- function(tr) {
587	42x	if (methods::is(tr, "DataRow")) {
588	41x	r_cellvalue_null <- unlist(lapply(cell_values(tr), is.null))
589	41x	check <- all(r_cellvalue_null)
590
591		### if check TRUE (all cell values NULL -- need to return FALSE as need to remove)
592	41x	ret <- !check
593		} else {
594	1x	ret <- TRUE
595		}
596	42x	ret
597		}

1		#' Difference test for two proportions
2		#'
3		#' @description `r lifecycle::badge('stable')`
4		#'
5		#' The analysis function [a_test_proportion_diff()] can be used to create a layout element to test
6		#' the difference between two proportions. The primary analysis variable, `vars`, indicates whether a
7		#' response has occurred for each record. See the `method` parameter for options of methods to use
8		#' to calculate the p-value. Additionally, a stratification variable can be supplied via the `strata`
9		#' element of the `variables` argument. The argument `alternative` specifies the direction of the
10		#' alternative hypothesis.
11		#'
12		#' @inheritParams proposal_argument_convention
13		#' @param method (`string`)\cr one of `chisq`, `cmh`, `fisher`; specifies the test used
14		#' to calculate the p-value.
15		#' @param .stats (`character`)\cr statistics to select for the table.
16		#'
17		#' @seealso [h_prop_diff_test]
18		#'
19		#' @note These functions have been forked from the `tern` package. Additional features are:
20		#'
21		#' * Additional `alternative` argument for the sidedness of the test.
22		#' * Additional `ref_path` argument for flexible reference column path specification.
23		#'
24		#' @name prop_diff_test
25		#' @order 1
26		NULL
27
28		#' @describeIn prop_diff_test Statistics function which tests the difference between two proportions.
29		#'
30		#' @return
31		#' * `s_test_proportion_diff()` returns a named `list` with a single item `pval` with an attribute `label`
32		#' describing the method used. The p-value tests the null hypothesis that proportions in two groups are the same.
33		#'
34		#' @keywords internal
35		s_test_proportion_diff <- function(
36		df,
37		.var,
38		.ref_group,
39		.in_ref_col,
40		variables = list(strata = NULL),
41		method = c("chisq", "fisher", "cmh"),
42		alternative = c("two.sided", "less", "greater")) {
43	8x	method <- match.arg(method)
44	8x	alternative <- match.arg(alternative)
45	8x	y <- list(pval = list())
46
47	8x	if (!.in_ref_col) {
48	6x	assert_df_with_variables(df, list(rsp = .var))
49	6x	assert_df_with_variables(.ref_group, list(rsp = .var))
50	6x	rsp <- factor(c(.ref_group[[.var]], df[[.var]]), levels = c("TRUE", "FALSE"))
51	6x	grp <- factor(rep(c("ref", "Not-ref"), c(nrow(.ref_group), nrow(df))), levels = c("ref", "Not-ref"))
52
53	6x	if (!is.null(variables$strata) \|\| method == "cmh") {
54	3x	strata <- variables$strata
55	3x	checkmate::assert_false(is.null(strata))
56	3x	strata_vars <- stats::setNames(as.list(strata), strata)
57	3x	assert_df_with_variables(df, strata_vars)
58	3x	assert_df_with_variables(.ref_group, strata_vars)
59	3x	strata <- c(interaction(.ref_group[strata]), interaction(df[strata]))
60		}
61
62	6x	tbl <- switch(method,
63	6x	cmh = table(grp, rsp, strata),
64	6x	table(grp, rsp)
65		)
66
67	6x	y$pval <- switch(method,
68	6x	chisq = prop_chisq(tbl, alternative),
69	6x	cmh = prop_cmh(tbl, alternative),
70	6x	fisher = prop_fisher(tbl, alternative)
71		)
72		}
73
74	8x	y$pval <- with_label(y$pval, d_test_proportion_diff_j(method, alternative))
75	8x	y
76		}
77
78		#' Description of the difference test between two proportions
79		#'
80		#' @description `r lifecycle::badge('stable')`
81		#'
82		#' This is an auxiliary function that describes the analysis in `s_test_proportion_diff`.
83		#'
84		#' @inheritParams s_test_proportion_diff
85		#'
86		#' @return A `string` describing the test from which the p-value is derived.
87		#'
88		#' @export
89		d_test_proportion_diff_j <- function(method, alternative) {
90	8x	checkmate::assert_string(method)
91	8x	meth_part <- switch(method,
92	8x	chisq = "Chi-Squared Test",
93	8x	cmh = "Cochran-Mantel-Haenszel Test",
94	8x	fisher = "Fisher's Exact Test",
95	8x	stop(paste(method, "does not have a description"))
96		)
97	8x	alt_part <- switch(alternative,
98	8x	two.sided = "",
99	8x	less = ", 1-sided, direction less",
100	8x	greater = ", 1-sided, direction greater"
101		)
102	8x	paste0("p-value (", meth_part, alt_part, ")")
103		}
104
105		#' @describeIn prop_diff_test Formatted analysis function which is used as `afun`
106		#'
107		#' @return
108		#' * `a_test_proportion_diff()` returns the corresponding list with formatted [rtables::CellValue()].
109		#'
110		#' @examples
111		#' dta <- data.frame(
112		#' rsp = sample(c(TRUE, FALSE), 100, TRUE),
113		#' grp = factor(rep(c("A", "B"), each = 50)),
114		#' strata = factor(rep(c("V", "W", "X", "Y", "Z"), each = 20))
115		#' )
116		#'
117		#' l <- basic_table() \|>
118		#' split_cols_by(var = "grp") \|>
119		#' analyze(
120		#' vars = "rsp",
121		#' afun = a_test_proportion_diff,
122		#' show_labels = "hidden",
123		#' extra_args = list(
124		#' method = "cmh",
125		#' variables = list(strata = "strata"),
126		#' ref_path = c("grp", "B")
127		#' )
128		#' )
129		#'
130		#' build_table(l, df = dta)
131		#'
132		#' @export
133		#' @order 2
134		a_test_proportion_diff <- function(
135		df,
136		.var,
137		ref_path,
138		.spl_context,
139		...,
140		.stats = NULL,
141		.formats = NULL,
142		.labels = NULL,
143		.indent_mods = NULL) {
144		# Check for additional parameters to the statistics function
145	2x	dots_extra_args <- list(...)
146
147		# Only support default stats, not custom stats
148	2x	.stats <- .split_std_from_custom_stats(.stats)$default_stats
149
150		# Obtain reference column information
151	2x	ref <- get_ref_info(ref_path, .spl_context)
152
153		# Apply statistics function
154	2x	x_stats <- .apply_stat_functions(
155	2x	default_stat_fnc = s_test_proportion_diff,
156	2x	custom_stat_fnc_list = NULL,
157	2x	args_list = c(
158	2x	df = list(df),
159	2x	.var = .var,
160	2x	.ref_group = list(ref$ref_group),
161	2x	.in_ref_col = ref$in_ref_col,
162	2x	dots_extra_args
163		)
164		)
165
166		# Format according to specifications
167	2x	format_stats(
168	2x	x_stats,
169	2x	method_groups = "test_proportion_diff",
170	2x	stats_in = .stats,
171	2x	formats_in = .formats,
172	2x	labels_in = .labels,
173	2x	indents_in = .indent_mods
174		)
175		}
176
177		#' Helper functions to test proportion differences
178		#'
179		#' Helper functions to implement various tests on the difference between two proportions.
180		#'
181		#' @param tbl (`matrix`)\cr matrix with two groups in rows and the binary response (`TRUE`/`FALSE`) in columns.
182		#'
183		#' @return A p-value.
184		#'
185		#' @seealso [prop_diff_test()] for implementation of these helper functions.
186		#'
187		#' @name h_prop_diff_test
188		NULL
189
190		#' @describeIn h_prop_diff_test Performs Chi-Squared test. Internally calls [stats::prop.test()].
191		#'
192		#' @keywords internal
193		prop_chisq <- function(tbl, alternative) {
194	3x	checkmate::assert_integer(c(ncol(tbl), nrow(tbl)), lower = 2, upper = 2)
195	3x	tbl <- tbl[, c("TRUE", "FALSE")]
196	3x	if (any(colSums(tbl) == 0)) {
197	!	return(1)
198		}
199	3x	stats::prop.test(tbl, correct = FALSE, alternative = alternative)$p.value
200		}
201
202		#' @describeIn h_prop_diff_test Performs stratified Cochran-Mantel-Haenszel test.
203		#' Internally calls [stats::mantelhaen.test()].
204		#'
205		#' @note strata with less than five observations will result in a warning and
206		#' possibly incorrect results; strata with less than two observations are
207		#' automatically discarded.
208		#'
209		#' @param ary (`array`, 3 dimensions)\cr array with two groups in rows, the binary response
210		#' (`TRUE`/`FALSE`) in columns, and the strata in the third dimension.
211		#'
212		#' @keywords internal
213		prop_cmh <- function(ary, alternative) {
214	9x	checkmate::assert_array(ary)
215	9x	checkmate::assert_integer(c(ncol(ary), nrow(ary)), lower = 2, upper = 2)
216	9x	checkmate::assert_integer(length(dim(ary)), lower = 3, upper = 3)
217	9x	strata_sizes <- apply(ary, MARGIN = 3, sum)
218	9x	if (any(strata_sizes < 5)) {
219	3x	warning("<5 data points in some strata. CMH test may be incorrect.")
220	3x	ary <- ary[, , strata_sizes > 1]
221		}
222	9x	stats::mantelhaen.test(ary, correct = FALSE, alternative = alternative)$p.value
223		}
224
225		#' @describeIn h_prop_diff_test Performs the Fisher's exact test. Internally calls [stats::fisher.test()].
226		#'
227		#' @keywords internal
228		prop_fisher <- function(tbl, alternative) {
229	6x	checkmate::assert_integer(c(ncol(tbl), nrow(tbl)), lower = 2, upper = 2)
230	6x	tbl <- tbl[, c("TRUE", "FALSE")]
231	6x	stats::fisher.test(tbl, alternative = alternative)$p.value
232		}

1		#' @note This has been forked from [tern::h_ancova()], because the new
2		#' `weights_emmeans` option was added here.
3		h_ancova <- function(
4		.var,
5		.df_row,
6		variables,
7		weights_emmeans,
8		interaction_item = NULL) {
9	17x	checkmate::assert_string(.var)
10	17x	checkmate::assert_list(variables)
11	17x	checkmate::assert_subset(names(variables), c("arm", "covariates"))
12
13	17x	assert_df_with_variables(
14	17x	.df_row,
15	17x	list(rsp = .var)
16		)
17	17x	arm <- variables$arm
18	17x	covariates <- variables$covariates
19	17x	if (!is.null(covariates) && length(covariates) > 0) {
20	7x	var_list <- get_covariates(covariates)
21	7x	assert_df_with_variables(
22	7x	.df_row,
23	7x	var_list
24		)
25		}
26	17x	covariates_part <- paste(covariates, collapse = " + ")
27	17x	formula_str <- paste0(.var, " ~ ", arm)
28	17x	if (covariates_part != "") {
29	7x	formula_str <- paste0(formula_str, "+", covariates_part)
30		}
31	17x	formula <- stats::as.formula(formula_str)
32	17x	specs <- arm
33	17x	if (!is.null(interaction_item)) {
34	!	specs <- c(specs, interaction_item)
35		}
36	17x	lm_fit <- stats::lm(formula = formula, data = .df_row)
37	17x	emmeans::emmeans(
38	17x	lm_fit,
39	17x	specs = specs,
40	17x	data = .df_row,
41	17x	weights = weights_emmeans
42		)
43		}
44
45		#' @title Junco Extended ANCOVA Function
46		#' @name s_ancova_j
47		#' @inheritParams tern::s_ancova
48		#' @param df : need to check on how to inherit params from tern::s_ancova
49		#' @param weights_emmeans (`string`)\cr argument from [emmeans::emmeans()], `"counterfactual"` by default.
50		#' @description Extension to tern:::s_ancova, 3 extra statistics are returned
51		#' * `lsmean_se`: Marginal mean and estimated SE in the group.
52		#' * `lsmean_ci`: Marginal mean and associated confidence interval in the group.
53		#' * `lsmean_diffci`: Difference in mean and associated confidence level in one combined statistic.
54		#' In addition, the LS mean weights can be specified.
55		#' In addition, also a NULL .ref_group can be specified, the lsmean_diff related estimates will be returned as NA.
56		#' @export
57		#' @return returns a named list of 8 statistics (3 extra compared to `tern:::s_ancova()`).
58		#' @family Inclusion of ANCOVA Functions
59		#' @examples
60		#' library(dplyr)
61		#' library(tern)
62		#'
63		#' df <- iris \|> filter(Species == "virginica")
64		#' .df_row <- iris
65		#' .var <- "Petal.Length"
66		#' variables <- list(arm = "Species", covariates = "Sepal.Length * Sepal.Width")
67		#' .ref_group <- iris \|> filter(Species == "setosa")
68		#' conf_level <- 0.95
69		#' s_ancova_j(df, .var, .df_row, variables, .ref_group, .in_ref_col = FALSE, conf_level)
70		s_ancova_j <- function(
71		df,
72		.var,
73		.df_row,
74		variables,
75		.ref_group,
76		.in_ref_col,
77		conf_level,
78		interaction_y = FALSE,
79		interaction_item = NULL,
80		weights_emmeans = "counterfactual") {
81	16x	arm <- variables$arm
82
83	16x	.df_row <- subset(.df_row, !is.na(.df_row[[.var]]))
84	16x	df <- subset(df, !is.na(df[[.var]]))
85	16x	.ref_group <- subset(.ref_group, !is.na(.ref_group[[.var]]))
86
87	16x	n_obs_trt_lvls <- length(unique(.df_row[[arm]]))
88
89		### sparse data problems with underlying ancova function
90	16x	if (NROW(.df_row) == 0) {
91	!	ret <- NULL
92	16x	} else if (NROW(df) == 0 \|\| n_obs_trt_lvls < 2) {
93		## all stats are NA
94	!	ret <- list(
95	!	n = with_label(0, "n"),
96	!	lsmean = with_label(NA, "Adjusted Mean"),
97	!	lsmean_se = with_label(rep(NA, 2), "Adjusted Mean (SE)"),
98	!	lsmean_ci = with_label(
99	!	rep(NA, 3),
100	!	paste0("Adjusted Mean", " (", f_conf_level(conf_level), ")")
101		),
102	!	lsmean_diff = with_label(NA, "Difference in Adjusted Means"),
103	!	lsmean_diff_ci = with_label(
104	!	rep(NA, 2),
105	!	paste("Difference in Adjusted Means", f_conf_level(conf_level))
106		),
107	!	lsmean_diffci = with_label(
108	!	rep(NA, 3),
109	!	paste0(
110	!	"Difference in Adjusted Means",
111		" (",
112	!	f_conf_level(conf_level),
113		")"
114		)
115		),
116	!	pval = with_label(NA, "p-value")
117		)
118		} else {
119	16x	if (NROW(.ref_group) == 0) {
120	!	.ref_group <- NULL
121	16x	} else if (length(levels(.df_row[[arm]])) > n_obs_trt_lvls) {
122		# missing arm levels need to be removed from the factor, in order to have the correct estimates,
123		# and avoid errors with further code
124	!	.df_row[[arm]] <- droplevels(.df_row[[arm]])
125	!	df[[arm]] <- factor(
126	!	as.character(df[[arm]]),
127	!	levels = levels(.df_row[[arm]])
128		)
129	!	.ref_group[[arm]] <- factor(
130	!	as.character(.ref_group[[arm]]),
131	!	levels = levels(.df_row[[arm]])
132		)
133		}
134
135	16x	emmeans_fit <- h_ancova(
136	16x	.var = .var,
137	16x	variables = variables,
138	16x	.df_row = .df_row,
139	16x	weights_emmeans = weights_emmeans,
140	16x	interaction_item = interaction_item
141		)
142	16x	sum_fit <- summary(
143	16x	emmeans_fit,
144	16x	level = conf_level
145		)
146	16x	arm <- variables$arm
147	16x	sum_level <- as.character(unique(df[[arm]]))
148
149		# Ensure that there is only one element in sum_level.
150	16x	checkmate::assert_scalar(sum_level)
151
152	16x	sum_fit_level <- sum_fit[sum_fit[[arm]] == sum_level, ]
153
154		# Get the index of the ref arm
155	16x	if (isTRUE(interaction_y)) {
156	!	y <- unlist(df[(df[[interaction_item]] == interaction_y), .var])
157		# convert characters selected in interaction_y into the numeric order
158	!	interaction_y <- which(sum_fit_level[[interaction_item]] == interaction_y)
159	!	sum_fit_level <- sum_fit_level[interaction_y, ]
160		# if interaction is called, reset the index
161	!	ref_key <- seq(sum_fit[[arm]][unique(.ref_group[[arm]])])
162	!	ref_key <- utils::tail(ref_key, n = 1)
163	!	ref_key <- (interaction_y - 1) * length(unique(.df_row[[arm]])) + ref_key
164		} else {
165	16x	y <- df[[.var]]
166		# Get the index of the ref arm when interaction is not called
167	16x	ref_key <- seq(sum_fit[[arm]][unique(.ref_group[[arm]])])
168	16x	ref_key <- utils::tail(ref_key, n = 1)
169		}
170
171	16x	if (.in_ref_col) {
172	4x	ret <- list(
173	4x	n = with_label(length(y[!is.na(y)]), "n"),
174	4x	lsmean = with_label(sum_fit_level$emmean, "Adjusted Mean"),
175	4x	lsmean_se = with_label(
176	4x	c(sum_fit_level$emmean, sum_fit_level$SE),
177	4x	"Adjusted Mean (SE)"
178		),
179	4x	lsmean_ci = with_label(
180	4x	c(
181	4x	sum_fit_level$emmean,
182	4x	sum_fit_level$lower.CL,
183	4x	sum_fit_level$upper.CL
184		),
185	4x	paste0("Adjusted Mean", " (", f_conf_level(conf_level), ")")
186		),
187	4x	lsmean_diff = with_label(
188	4x	character(),
189	4x	"Difference in Adjusted Means"
190		),
191	4x	lsmean_diff_ci = with_label(
192	4x	character(),
193	4x	paste("Difference in Adjusted Means", f_conf_level(conf_level))
194		),
195	4x	lsmean_diffci = with_label(
196	4x	character(),
197	4x	paste0(
198	4x	"Difference in Adjusted Means",
199		" (",
200	4x	f_conf_level(conf_level),
201		")"
202		)
203		),
204	4x	pval = with_label(character(), "p-value")
205		)
206		} else {
207	12x	if (!is.null(.ref_group)) {
208		# Estimate the differences between the marginal means.
209	12x	emmeans_contrasts <- emmeans::contrast(
210	12x	emmeans_fit,
211		# Compare all arms versus the control arm.
212	12x	method = "trt.vs.ctrl",
213		# Take the arm factor from .ref_group as the control arm.
214	12x	ref = ref_key,
215	12x	level = conf_level
216		)
217	12x	sum_contrasts <- summary(
218	12x	emmeans_contrasts,
219		# Derive confidence intervals, t-tests and p-values.
220	12x	infer = TRUE,
221		# Do not adjust the p-values for multiplicity.
222	12x	adjust = "none"
223		)
224	12x	contrast_lvls <- gsub(
225	12x	paste0(" - ", .ref_group[[arm]][1], ".*"),
226		"",
227	12x	sum_contrasts$contrast
228		)
229	12x	if (!is.null(interaction_item)) {
230	!	sum_contrasts_level <- sum_contrasts[
231	!	grepl(sum_level, contrast_lvls, fixed = TRUE), ,
232	!	drop = FALSE
233		]
234		} else {
235	12x	sum_contrasts_level <- sum_contrasts[
236	12x	(sum_level == contrast_lvls \| paste0("(", sum_level, ")") == contrast_lvls), ,
237	12x	drop = FALSE
238		]
239		}
240	12x	if (interaction_y != FALSE) {
241	!	sum_contrasts_level <- sum_contrasts_level[interaction_y, , drop = FALSE]
242		}
243		} else {
244	!	sum_contrasts_level <- list()
245	!	sum_contrasts_level[["estimate"]] <- NA
246	!	sum_contrasts_level[["lower.CL"]] <- NA
247	!	sum_contrasts_level[["upper.CL"]] <- NA
248	!	sum_contrasts_level[["p.value"]] <- NA
249		}
250	12x	ret <- list(
251	12x	n = with_label(length(y[!is.na(y)]), "n"),
252	12x	lsmean = with_label(sum_fit_level$emmean, "Adjusted Mean"),
253	12x	lsmean_se = with_label(
254	12x	c(sum_fit_level$emmean, sum_fit_level$SE),
255	12x	"Adjusted Mean (SE)"
256		),
257	12x	lsmean_ci = with_label(
258	12x	c(
259	12x	sum_fit_level$emmean,
260	12x	sum_fit_level$lower.CL,
261	12x	sum_fit_level$upper.CL
262		),
263	12x	paste0("Adjusted Mean", " (", f_conf_level(conf_level), ")")
264		),
265	12x	lsmean_diff = with_label(
266	12x	sum_contrasts_level$estimate,
267	12x	"Difference in Adjusted Means"
268		),
269	12x	lsmean_diff_ci = with_label(
270	12x	c(sum_contrasts_level$lower.CL, sum_contrasts_level$upper.CL),
271	12x	paste("Difference in Adjusted Means", f_conf_level(conf_level))
272		),
273	12x	lsmean_diffci = with_label(
274	12x	c(
275	12x	sum_contrasts_level$estimate,
276	12x	sum_contrasts_level$lower.CL,
277	12x	sum_contrasts_level$upper.CL
278		),
279	12x	paste0(
280	12x	"Difference in Adjusted Means",
281		" (",
282	12x	f_conf_level(conf_level),
283		")"
284		)
285		),
286	12x	pval = with_label(sum_contrasts_level$p.value, "p-value")
287		)
288		}
289		}
290	16x	return(ret)
291		}
292
293		#' @name s_summarize_ancova_j
294		#' @title ANCOVA Summary Function
295		#' @description Combination of tern::s_summary, and ANCOVA based estimates for mean and diff between columns,
296		#' based on ANCOVA function `s_ancova_j`
297		#' @inherit s_ancova_j
298		#' @param ... Additional arguments passed to `s_ancova_j`.
299		#' @details Combination of tern::s_summary, and ANCOVA based estimates for mean and diff between columns,
300		#' based on ANCOVA function `s_ancova_j`
301		#' @return returns the statistics from tern::s_summary(x), appended with a new statistics based upon ANCOVA
302		#' @export
303		#' @family Inclusion of ANCOVA Functions
304		# @seealso s_ancova_j
305		#' @examples
306		#' library(dplyr)
307		#' library(tern)
308		#'
309		#' df <- iris \|> filter(Species == "virginica")
310		#' .df_row <- iris
311		#' .var <- "Petal.Length"
312		#' variables <- list(arm = "Species", covariates = "Sepal.Length * Sepal.Width")
313		#' .ref_group <- iris \|> filter(Species == "setosa")
314		#' conf_level <- 0.95
315		#' s_summarize_ancova_j(
316		#' df,
317		#' .var = .var,
318		#' .df_row = .df_row,
319		#' variables = variables,
320		#' .ref_group = .ref_group,
321		#' .in_ref_col = FALSE,
322		#' conf_level = conf_level
323		#' )
324		s_summarize_ancova_j <- function(
325		df,
326		.var,
327		.df_row,
328		.ref_group,
329		.in_ref_col,
330		...) {
331	14x	x <- df[[.var]]
332	14x	y1 <- s_summary(x)
333	14x	y2 <- s_ancova_j(
334	14x	df = df,
335	14x	.var = .var,
336	14x	.ref_group = .ref_group,
337	14x	.in_ref_col = .in_ref_col,
338	14x	.df_row = .df_row,
339		...
340		)
341	14x	c(y1, y2)
342		}
343
344		#' @describeIn s_summarize_ancova_j Formatted analysis function which is used as `afun`. Note that the
345		#' junco specific `ref_path` and `.spl_context` arguments are used for reference column information.
346		#'
347		#' @param ref_path (`character`)\cr path to the reference group.
348		#' @param .spl_context (`environment`)\cr split context environment.
349		#' @param ... Additional arguments passed to the statistics function.
350		#' @param .stats (`character`)\cr statistics to calculate.
351		#' @param .formats (`list`)\cr formats for the statistics.
352		#' @param .labels (`list`)\cr labels for the statistics.
353		#' @param .indent_mods (`list`)\cr indentation modifications for the statistics.
354		#'
355		#' @return
356		#' * `a_summarize_ancova_j()` returns the corresponding list with formatted [rtables::CellValue()].
357		#'
358		#' @examples
359		#'
360		#' basic_table() \|>
361		#' split_cols_by("Species") \|>
362		#' add_colcounts() \|>
363		#' analyze(
364		#' vars = "Petal.Length",
365		#' afun = a_summarize_ancova_j,
366		#' show_labels = "hidden",
367		#' na_str = tern::default_na_str(),
368		#' table_names = "unadj",
369		#' var_labels = "Unadjusted comparison",
370		#' extra_args = list(
371		#' variables = list(arm = "Species", covariates = NULL),
372		#' conf_level = 0.95,
373		#' .labels = c(lsmean = "Mean", lsmean_diff = "Difference in Means"),
374		#' ref_path = c("Species", "setosa")
375		#' )
376		#' ) \|>
377		#' analyze(
378		#' vars = "Petal.Length",
379		#' afun = a_summarize_ancova_j,
380		#' show_labels = "hidden",
381		#' na_str = tern::default_na_str(),
382		#' table_names = "adj",
383		#' var_labels = "Adjusted comparison (covariates: Sepal.Length and Sepal.Width)",
384		#' extra_args = list(
385		#' variables = list(
386		#' arm = "Species",
387		#' covariates = c("Sepal.Length", "Sepal.Width")
388		#' ),
389		#' conf_level = 0.95,
390		#' ref_path = c("Species", "setosa")
391		#' )
392		#' ) \|>
393		#' build_table(iris)
394		#'
395		#' @export
396		#' @order 2
397		a_summarize_ancova_j <- function(
398		df,
399		.var,
400		.df_row,
401		ref_path,
402		.spl_context,
403		...,
404		.stats = NULL,
405		.formats = NULL,
406		.labels = NULL,
407		.indent_mods = NULL) {
408		# Check for additional parameters to the statistics function
409	6x	dots_extra_args <- list(...)
410
411		# Only support default stats, not custom stats
412	6x	.stats <- .split_std_from_custom_stats(.stats)$default_stats
413
414		# Obtain reference column information
415	6x	ref <- get_ref_info(ref_path, .spl_context)
416
417		# Apply statistics function
418	6x	x_stats <- .apply_stat_functions(
419	6x	default_stat_fnc = s_summarize_ancova_j,
420	6x	custom_stat_fnc_list = NULL,
421	6x	args_list = c(
422	6x	df = list(df),
423	6x	.var = .var,
424	6x	.df_row = list(.df_row),
425	6x	.ref_group = list(ref$ref_group),
426	6x	.in_ref_col = ref$in_ref_col,
427	6x	dots_extra_args
428		)
429		)
430
431		# Format according to specifications
432	6x	format_stats(
433	6x	x_stats,
434	6x	method_groups = "summarize_ancova_j",
435	6x	stats_in = .stats,
436	6x	formats_in = .formats,
437	6x	labels_in = .labels,
438	6x	indents_in = .indent_mods
439		)
440		}

1		#' Extract Estimates from Multivariate Cox Regression Model Fit Object
2		#'
3		#' @param x (`coxreg.multivar`)\cr from [tern::fit_coxreg_multivar()].
4		#' @export
5		#' @return A data frame containing Cox regression results with columns for term,
6		#' coef_se (coefficient and standard error), p.value, hr (hazard ratio),
7		#' hr_ci (confidence interval for hazard ratio), and labels (formatted term labels).
8		#' @keywords internal
9		#'
10		#' @examples
11		#' anl <- tern::tern_ex_adtte \|>
12		#' dplyr::mutate(EVENT = 1 - CNSR)
13		#'
14		#' variables <- list(
15		#' time = "AVAL",
16		#' event = "EVENT",
17		#' arm = "ARM",
18		#' covariates = c("SEX", "AGE")
19		#' )
20		#'
21		#' control <- tern::control_coxreg(
22		#' conf_level = 0.9,
23		#' ties = "efron"
24		#' )
25		#'
26		#' fit <- tern::fit_coxreg_multivar(
27		#' data = anl,
28		#' variables = variables,
29		#' control = control
30		#' )
31		#'
32		#' h_extract_coxreg_multivar(fit)
33		h_extract_coxreg_multivar <- function(x) {
34	22x	checkmate::assert_class(x, "coxreg.multivar")
35	22x	vars <- c(x$vars$arm, x$vars$covariates)
36
37		# Extract the table.
38	22x	tab <- broom::tidy(x$mod, exponentiate = FALSE, conf.int = TRUE, conf.level = x$control$conf_level)
39	22x	tab$hr <- exp(tab$estimate)
40	22x	tab$hr_ci <- Map(lcl = exp(tab$conf.low), ucl = exp(tab$conf.high), f = function(lcl, ucl) c(lcl, ucl))
41	22x	tab$coef_se <- Map(coef = tab$estimate, se = tab$std.error, f = function(coef, se) c(coef, se))
42	22x	colnms <- c("term", "coef_se", "p.value", "hr", "hr_ci")
43	22x	tab <- tab[, colnms, drop = FALSE]
44
45		# Format nicely the term labels.
46	22x	var_to_term <- sapply(vars, function(v) which(startsWith(tab$term, v)), USE.NAMES = TRUE, simplify = FALSE)
47	22x	tab$labels <- unlist(lapply(vars, function(v) {
48	67x	inds <- var_to_term[[v]]
49	67x	strip_term <- gsub(paste0("^", v), "", tab$term[inds])
50	67x	this_df <- x$dat[v]
51	67x	if (is.character(this_df[[1]])) {
52	1x	this_df[[1]] <- as.factor(this_df[[1]])
53		}
54	67x	if (is.factor(this_df[[1]])) {
55	45x	ref_level <- levels(this_df[[1]])[1]
56	45x	var_name <- if (v == x$vars$arm) "Treatment" else labels_or_names(this_df)
57	45x	paste0(var_name, " (", strip_term, " vs. ", ref_level, ")")
58		} else {
59	22x	labels_or_names(this_df)
60		}
61		}))
62	22x	tab
63		}
64
65		#' First Level Column Split Function for TEFOS03 (mmy) Table Layout
66		#' @seealso [rtables::make_split_fun()] for details.
67		#' @keywords internal
68		tefos03_first_post_fun <- function(ret, spl, fulldf, .spl_context) {
69	5x	all_expr <- expression(TRUE)
70	5x	short_split_result(model_fit = "Model Fit", hazard_ratio = "Hazard Ratio", fulldf = fulldf)
71		}
72		tefos03_first_split_fun <- make_split_fun(post = list(tefos03_first_post_fun))
73
74		#' Second Level Column Split Function Factory for TEFOS03 (mmy) Table Layout
75		#'
76		#' @inheritParams proposal_argument_convention
77		#'
78		#' @return Split function to use in the TEFOS03 (mmy) and related table layouts.
79		#'
80		#' @seealso [tefos03_first_post_fun()] for the first level split.
81		#' @keywords internal
82		tefos03_second_split_fun_fct <- function(conf_level) {
83	4x	post_fun <- function(ret, spl, fulldf, .spl_context) {
84	8x	all_expr <- expression(TRUE)
85	8x	colset <- .spl_context[nrow(.spl_context), "value"][[1]]
86	8x	if (colset == "model_fit") {
87	4x	short_split_result(coef_se = "Coeff. (SE)", pval = "p-value", fulldf = fulldf)
88		} else {
89	4x	short_split_result(hr_est = "Estimate", hr_ci = f_conf_level(conf_level), fulldf = fulldf)
90		}
91		}
92	4x	make_split_fun(post = list(post_fun))
93		}
94
95		#' @importFrom tern fit_coxreg_multivar
96		#' @keywords internal
97		memoised_fit_coxreg_multivar <- memoise::memoise(fit_coxreg_multivar)
98
99		#' Analysis Function for TEFOS03 and Related Table Layouts
100		#'
101		#' @inheritParams proposal_argument_convention
102		#' @param control (`list`)\cr from [tern::control_coxreg()].
103		#' @param formats (`list`)\cr including `coef_se`, `hr_est`, `hr_ci` and `pval` formats.
104		#' @param variables (`list`)\cr see [tern::fit_coxreg_multivar()] for required variable
105		#' specifications.
106		#'
107		#' @keywords internal
108		tefos03_afun <- function(df, .var, .spl_context, variables, control, formats) {
109	21x	this_col_split <- .spl_context[nrow(.spl_context), "cur_col_split_val"][[1]]
110	21x	model_fit <- memoised_fit_coxreg_multivar(data = df, variables = variables, control = control)
111	21x	tab <- h_extract_coxreg_multivar(model_fit)
112	21x	if (this_col_split[1] == "model_fit") {
113	11x	if (this_col_split[2] == "coef_se") {
114	5x	in_rows(.list = tab$coef_se, .formats = formats$coef_se, .labels = tab$labels)
115		} else {
116	6x	in_rows(.list = tab$p.value, .formats = formats$pval, .labels = tab$labels)
117		}
118		} else {
119	10x	if (this_col_split[2] == "hr_est") {
120	5x	in_rows(.list = tab$hr, .formats = formats$hr_est, .labels = tab$labels)
121		} else {
122	5x	in_rows(.list = tab$hr_ci, .formats = formats$hr_ci, .labels = tab$labels)
123		}
124		}
125		}
126
127		#' Layout Generating Function for TEFOS03 and Related Cox Regression Layouts
128		#'
129		#' @inheritParams proposal_argument_convention
130		#' @inheritParams tefos03_afun
131		#' @param var (`string`)\cr any variable from the data, because this is not used.
132		#'
133		#' @return `lyt` modified to add the desired cox regression table section.
134		#' @export
135		#' @examples
136		#' anl <- tern::tern_ex_adtte \|>
137		#' dplyr::mutate(EVENT = 1 - CNSR)
138		#'
139		#' variables <- list(
140		#' time = "AVAL",
141		#' event = "EVENT",
142		#' arm = "ARM",
143		#' covariates = c("SEX", "AGE")
144		#' )
145		#'
146		#' basic_table() \|>
147		#' summarize_coxreg_multivar(
148		#' var = "STUDYID",
149		#' variables = variables
150		#' ) \|>
151		#' build_table(df = anl)
152		summarize_coxreg_multivar <- function(
153		lyt,
154		var,
155		variables,
156		control = control_coxreg(),
157		formats = list(
158		coef_se = jjcsformat_xx("xx.xx (xx.xx)"),
159		hr_est = jjcsformat_xx("xx.xx"),
160		hr_ci = jjcsformat_xx("(xx.xx, xx.xx)"),
161		pval = jjcsformat_pval_fct(0)
162		)) {
163	3x	second_split_fun <- tefos03_second_split_fun_fct(conf_level = control$conf_level)
164	3x	lyt \|>
165	3x	split_cols_by(var = var, split_fun = tefos03_first_split_fun) \|>
166	3x	split_cols_by(var = var, split_fun = second_split_fun) \|>
167	3x	analyze(
168	3x	vars = var,
169	3x	var_labels = "Model Parameter",
170	3x	show_labels = "visible",
171	3x	afun = tefos03_afun,
172	3x	extra_args = list(variables = variables, control = control, formats = formats)
173		)
174		}

1		#' @name jjcs_num_formats
2		#'
3		#' @title Numeric Formatting Function
4		#'
5		#' @description
6		#' Formatting setter for selected numerical statistics
7		#'
8		#' @param d precision of individual values
9		#' @param cap cap to numerical precision (d > cap -- will use precision as if cap was specified as precision)
10		#'
11		#' @return list:
12		#' - fmt : named vector with formatting function (jjcsformat_xx) for numerical stats: range, median, mean_sd, sd
13		#' - spec : named vector with formatting specifications for numerical stats: range, median, mean_sd, sd
14		#' @export
15		#' @examples
16		#' P1_precision <- jjcs_num_formats(d=0)$fmt
17		#' jjcs_num_formats(2)$fmt
18		#' jjcs_num_formats(2)$spec
19		jjcs_num_formats <- function(d, cap = 4) {
20	6x	fmt_xx <- function(d, cap) {
21	22x	prec <- NULL
22		### report as is
23	22x	if (is.na(d)) {
24	5x	return("xx")
25		}
26	17x	checkmate::assertCount(d)
27
28		### set a cap to number of decimal precision
29	15x	d <- min(d, cap)
30	14x	if (d > 0) prec <- paste0(prec, paste0(rep("x", d), collapse = ""))
31	15x	if (d >= 0) prec <- paste0("xx.", prec)
32
33	15x	return(prec)
34		}
35
36	6x	fmt_rng <- function(d, cap) {
37	6x	prec <- fmt_xx(d, cap = cap)
38	4x	fmt <- list()
39	4x	fmt$spec <- paste0(prec, ", ", prec)
40	4x	fmt$fmt <- jjcsformat_xx(fmt$spec)
41
42	4x	return(fmt)
43		}
44
45	6x	fmt_median <- function(d, cap) {
46	4x	prec <- fmt_xx(d, cap = cap)
47	4x	fmt <- list()
48	4x	fmt$spec <- prec
49	4x	fmt$fmt <- jjcsformat_xx(fmt$spec)
50	4x	return(fmt)
51		}
52
53	6x	fmt_meansd <- function(d, cap) {
54	4x	prec1 <- fmt_xx(d, cap = cap)
55	4x	prec2 <- fmt_xx(d + 1, cap = cap + 1)
56	4x	fmt <- list()
57	4x	fmt$spec <- paste0(prec1, " (", prec2, ")")
58	4x	fmt$fmt <- jjcsformat_xx(fmt$spec)
59	4x	return(fmt)
60		}
61	6x	fmt_sd <- function(d, cap) {
62	4x	prec <- fmt_xx(d, cap)
63	4x	fmt <- list()
64	4x	fmt$spec <- prec
65	4x	fmt$fmt <- jjcsformat_xx(fmt$spec)
66	4x	return(fmt)
67		}
68
69		## apply formats for each of the stats with appropriate d and cap
70	6x	xfmt_rng <- fmt_rng(d, cap = cap)
71	4x	xfmt_meansd <- fmt_meansd(d + 1, cap = cap + 1)
72	4x	xfmt_sd <- fmt_sd(d + 2, cap = cap + 2)
73	4x	xfmt_median <- fmt_median(d + 1, cap = cap + 1)
74
75	4x	fmts <- list()
76
77	4x	fmts$fmt <- c(range = xfmt_rng$fmt, mean_sd = xfmt_meansd$fmt, sd = xfmt_sd$fmt, median = xfmt_median$fmt)
78
79	4x	fmts$spec <- c(range = xfmt_rng$spec, mean_sd = xfmt_meansd$spec, sd = xfmt_sd$spec, median = xfmt_median$spec)
80
81	4x	return(fmts)
82		}