Skip to contents
### Packages to compile rmarkdown
library(knitr)
library(rmarkdown)
### Packages for nice tables
library(kableExtra)


font_size <- 9
cache <- 0


### Set global option for font_size for bkable in this document 

options(bkable_font_size = font_size)
options(bkable_full_width = TRUE)


knitr::opts_chunk$set(cache = cache, cache.comments = FALSE, echo = TRUE, warning = FALSE, message = FALSE, error = FALSE, fig.width = 6, fig.height = 5, fig.align = "center", tidy=TRUE, tidy.opts = list(width.cutoff = 80))


### Necessary for knit_child
knitr::opts_knit$set(output.dir = getwd())
library(ggplot2)

ggplot2::theme_set(cowplot::theme_cowplot())


library(BiomarkerWrappers)

Load data

bdata object contains random survival and biomarker data. It is not expected to observe any biologically meaningful associations. The goal for this data sets is solely to demonstrate the functionality of this package.

data(bdata)



### Dichotomize gene expression into low and high

genes <- c("BCL2", "GeneA", "GeneB", "GeneC", "GeneD")

data_cat2 <- dplyr::mutate_all(bdata[, genes], wrapper_cut_2groups)

colnames(data_cat2) <- paste0(genes, "_cat2")


### Stratify gene expression into quartiles

data_cat4 <- dplyr::mutate_all(bdata[, genes], wrapper_cut_quartiles)

colnames(data_cat4) <- paste0(genes, "_cat4")


bdata <- cbind(bdata, data_cat2, data_cat4)

Define variable names

variable_names <- format_variable_names(bdata)

variable_names
##          Patient_ID                 Age                 Sex                 IPI 
##        "Patient ID"               "Age"               "Sex"               "IPI" 
##   Geographic_Region       Treatment_Arm                 PFS           PFS_Event 
## "Geographic Region"     "Treatment Arm"               "PFS"         "PFS Event" 
##            Response      Cell_Of_Origin         BCL2_by_IHC        BCL2_by_FISH 
##          "Response"    "Cell Of Origin"       "BCL2 by IHC"      "BCL2 by FISH" 
##                BCL2               GeneA               GeneB               GeneC 
##              "BCL2"             "GeneA"             "GeneB"             "GeneC" 
##               GeneD          BEP_RNAseq                 ORR           BCL2_cat2 
##             "GeneD"        "BEP RNAseq"               "ORR"         "BCL2 cat2" 
##          GeneA_cat2          GeneB_cat2          GeneC_cat2          GeneD_cat2 
##        "GeneA cat2"        "GeneB cat2"        "GeneC cat2"        "GeneD cat2" 
##           BCL2_cat4          GeneA_cat4          GeneB_cat4          GeneC_cat4 
##         "BCL2 cat4"        "GeneA cat4"        "GeneB cat4"        "GeneC cat4" 
##          GeneD_cat4 
##        "GeneD cat4"
### Adjust variable names

variable_names[["GeneA_cat2"]] <- "Gene A stratified by median"

Tile plot

### Plot generated using geom_tile

wrapper_tile_plot1_core(bdata, y_vars = c("BCL2_by_IHC", "BCL2_by_FISH"))

cat("\n\n ------ \n\n")

### Plot generated using geom_col. It has substantially smaller size when
### sample size is large.

wrapper_tile_plot2_core(bdata, y_vars = c("BCL2_by_IHC", "BCL2_by_FISH"))

Cox regression

In wrapper_cox_regression_core_simple(), covariate_vars Vector with names of covariates that are included in the formula of the simple additive model: ~ covariate_vars[1] + covariate_vars[2] + covariate_vars[3] + ....

This function is the core function and should not be used for the analysis, it is a function called by the other functions such as wrapper_cox_regression_biomarker and wrapper_cox_regression_treatment.

In wrapper_cox_regression_biomarker and wrapper_cox_regression_treatment, biomarker_vars is a vector with biomarker names. For each biomarker a separate Cox regression model is fitted. The results are combined in a single table.

In wrapper_cox_regression_biomarker and wrapper_cox_regression_treatment, adjustment_vars are added additionally to biomarker_vars and treatment_var to the model, for example, ~ biomarker_vars[i] + treatment_var + adjustment_vars[1] + adjustment_vars[2] + ...

adjustment_vars would be the official stratification factors from the study plus any other variables you want to use for adjustment, for example, any other known prognostic factors.

strata_vars, would be the same as adjustment_vars. The difference is that when you define strata_vars they are used in Cox regression as “true stratification factors” with the formula ~ biomarker_vars[i] + treatment_var + strata(strata_vars[1] + strata_vars[2] + ...)

See https://stats.stackexchange.com/questions/256148/stratification-in-cox-model

strat1_var, strat2_var are used to split the data (I should call them split1_var, split2_var). Then the analysis is done within the subgroups of each of the variables. For example, if strat2_var = "PDL1", the Cox regression is run separately within the PDL1 low and PDL1 high.

### Biomarker effect

data <- bdata
tte_var <- "PFS"
censor_var <- "PFS_Event"

biomarker_vars <- c("GeneA_cat2", "GeneA")
adjustment_vars <- "IPI"

treatment_var = "Treatment_Arm"


wrapper_cox_regression_biomarker(data, tte_var = tte_var, censor_var = censor_var,
    biomarker_vars = biomarker_vars, adjustment_vars = adjustment_vars, treatment_var = treatment_var)
Biomarker effect on PFS. Adjusted, unstratified analysis. Cox regression model includes the biomarker and IPI.
Treatment Arm Biomarker Subgroup Total N Total Events N Events MST MST 95% CI HR HR 95% CI P-value Adj. P-value
CTRL GeneA cat2 low 204 85 104 41 (39.4%) NA (76.9 - NA)
high 204 85 100 44 (44.0%) 83.1 (82.1 - NA) 1.00 (0.65 - 1.55) 0.9828 0.9828
TRT GeneA cat2 low 196 83 96 40 (41.7%) 99.9 (82.2 - NA)
high 196 83 100 43 (43.0%) 100.3 (81.6 - NA) 0.91 (0.59 - 1.41) 0.6827 0.9828
CTRL GeneA 204 85 1.06 (0.86 - 1.31) 0.5723 0.9828
TRT GeneA 196 83 1.03 (0.82 - 1.29) 0.8129 0.9828
## To represent the results with a forest plot

x <- wrapper_cox_regression_biomarker(data, tte_var = tte_var, censor_var = censor_var,
    biomarker_vars = biomarker_vars, adjustment_vars = adjustment_vars, treatment_var = treatment_var,
    print_total = FALSE, print_adjpvalues = FALSE)

bforest(x)

### Treatment effect within biomarker subgroups

data <- bdata
tte_var <- "PFS"
censor_var <- "PFS_Event"

treatment_var = "Treatment_Arm"
biomarker_vars <- c("GeneA_cat2", "GeneB_cat2")
adjustment_vars <- "IPI"


wrapper_cox_regression_treatment(data, tte_var = tte_var, censor_var = censor_var,
    treatment_var = treatment_var, biomarker_vars = biomarker_vars, adjustment_vars = adjustment_vars)
Treatment effect on PFS. Adjusted, unstratified analysis. Cox regression model includes the treatment and IPI.
Biomarker Biomarker Subgroup Treatment Subgroup Total N Total Events N Events MST MST 95% CI HR HR 95% CI P-value Adj. P-value
GeneA cat2 low CTRL 200 81 104 41 (39.4%) NA (76.9 - NA)
TRT 200 81 96 40 (41.7%) 99.9 (82.2 - NA) 0.83 (0.53 - 1.30) 0.4156 0.5541
GeneA cat2 high CTRL 200 87 100 44 (44.0%) 83.1 (82.1 - NA)
TRT 200 87 100 43 (43.0%) 100.3 (81.6 - NA) 0.75 (0.49 - 1.15) 0.1917 0.3835
GeneB cat2 low CTRL 200 73 96 35 (36.5%) 98.3 (82.6 - NA)
TRT 200 73 104 38 (36.5%) NA (91.0 - NA) 0.92 (0.58 - 1.47) 0.7371 0.7371
GeneB cat2 high CTRL 200 95 108 50 (46.3%) 80.4 (63.6 - NA)
TRT 200 95 92 45 (48.9%) 87.5 (82.1 - NA) 0.63 (0.41 - 0.96) 0.0301 * 0.1204
## To represent the results with a forest plot

x <- wrapper_cox_regression_treatment(data, tte_var = tte_var, censor_var = censor_var,
    treatment_var = treatment_var, biomarker_vars = biomarker_vars, adjustment_vars = adjustment_vars,
    print_total = FALSE, print_adjpvalues = FALSE)


bforest(x)

### Treatment-biomarker interaction effect


data <- bdata
tte_var <- "PFS"
censor_var <- "PFS_Event"

treatment_var = "Treatment_Arm"
biomarker_vars <- c("GeneA_cat2", "GeneA")
adjustment_vars <- "IPI"



wrapper_cox_regression_interaction(data, tte_var, censor_var, treatment_var = treatment_var,
    biomarker_vars = biomarker_vars, adjustment_vars = adjustment_vars)
Effect of interaction between biomarker and treatment on PFS. Adjusted, unstratified analysis. Cox regression model includes: biomarker, treatment and IPI.
Biomarker Biomarker Effect Treatment Effect Total N HR HR 95% CI P-value Adj. P-value
GeneA cat2 high vs low TRT vs CTRL 400 0.92 (0.50 - 1.70) 0.7985 0.8593
GeneA TRT vs CTRL 400 0.97 (0.72 - 1.32) 0.8593 0.8593

Log-rank test

### Biomarker effect

data <- bdata
tte_var <- "PFS"
censor_var <- "PFS_Event"

biomarker_vars <- c("GeneA_cat2", "GeneB_cat2")
treatment_var = "Treatment_Arm"


wrapper_log_rank_test_biomarker(data, tte_var = tte_var, censor_var = censor_var,
    biomarker_vars = biomarker_vars, treatment_var = treatment_var)
Biomarker effect on PFS. Unstratified log-rank test.
Treatment Arm Biomarker Subgroup Total N Total Events N Events MST MST 95% CI HR HR 95% CI P-value Adj. P-value
CTRL GeneA cat2 low 204 85 104 41 (39.4%) NA (76.9 - NA) 0.8211 0.8917
high 204 85 100 44 (44.0%) 83.1 (82.1 - NA) 1.05 (0.69 - 1.61)
TRT GeneA cat2 low 196 83 96 40 (41.7%) 99.9 (82.2 - NA) 0.8917 0.8917
high 196 83 100 43 (43.0%) 100.3 (81.6 - NA) 0.97 (0.63 - 1.49)
CTRL GeneB cat2 low 204 85 96 35 (36.5%) 98.3 (82.6 - NA) 0.0120 * 0.0478 *
high 204 85 108 50 (46.3%) 80.4 (63.6 - NA) 1.73 (1.12 - 2.68)
TRT GeneB cat2 low 196 83 104 38 (36.5%) NA (91.0 - NA) 0.2437 0.4874
high 196 83 92 45 (48.9%) 87.5 (82.1 - NA) 1.29 (0.84 - 1.99)
### Treatment effect within biomarker subgroups


data <- bdata
tte_var <- "PFS"
censor_var <- "PFS_Event"

treatment_var = "Treatment_Arm"
biomarker_vars <- c("GeneA_cat2", "GeneB_cat2")


wrapper_log_rank_test_treatment(data, tte_var = tte_var, censor_var = censor_var,
    treatment_var = treatment_var, biomarker_vars = biomarker_vars)
Treatment effect on PFS. Unstratified log-rank test.
Biomarker Biomarker Subgroup Treatment Subgroup Total N Total Events N Events MST MST 95% CI HR HR 95% CI P-value Adj. P-value
GeneA cat2 low CTRL 200 81 104 41 (39.4%) NA (76.9 - NA) 0.5693 0.7591
TRT 200 81 96 40 (41.7%) 99.9 (82.2 - NA) 0.88 (0.57 - 1.37)
GeneA cat2 high CTRL 200 87 100 44 (44.0%) 83.1 (82.1 - NA) 0.2526 0.5052
TRT 200 87 100 43 (43.0%) 100.3 (81.6 - NA) 0.78 (0.51 - 1.19)
GeneB cat2 low CTRL 200 73 96 35 (36.5%) 98.3 (82.6 - NA) 0.9213 0.9213
TRT 200 73 104 38 (36.5%) NA (91.0 - NA) 0.98 (0.62 - 1.55)
GeneB cat2 high CTRL 200 95 108 50 (46.3%) 80.4 (63.6 - NA) 0.0893 . 0.3574
TRT 200 95 92 45 (48.9%) 87.5 (82.1 - NA) 0.70 (0.47 - 1.06)
x <- wrapper_log_rank_test_treatment(data, tte_var = tte_var, censor_var = censor_var,
    treatment_var = treatment_var, biomarker_vars = biomarker_vars, print_nevent = FALSE,
    print_mst = FALSE, print_total = FALSE)


bkable(x)
Treatment effect on PFS. Unstratified log-rank test.
Biomarker Biomarker Subgroup Treatment Subgroup N HR HR 95% CI P-value Adj. P-value
GeneA cat2 low CTRL 104 0.5693 0.7591
TRT 96 0.88 (0.57 - 1.37)
GeneA cat2 high CTRL 100 0.2526 0.5052
TRT 100 0.78 (0.51 - 1.19)
GeneB cat2 low CTRL 96 0.9213 0.9213
TRT 104 0.98 (0.62 - 1.55)
GeneB cat2 high CTRL 108 0.0893 . 0.3574
TRT 92 0.70 (0.47 - 1.06)

KM plot

Core

data <- bdata

tte_var <- "PFS"
censor_var <- "PFS_Event"
covariate_var <- "Treatment_Arm"


wrapper_KM_plot_core(data = data, tte_var = tte_var, censor_var = censor_var, covariate_var = covariate_var)

Core stratified

data <- bdata
tte_var <- "PFS"
censor_var <- "PFS_Event"
covariate_var <- "GeneA_cat2"

strat1_var = "Treatment_Arm"
strat2_var = "Cell_Of_Origin"


wrapper_KM_plot_core_strat(data = data, tte_var = tte_var, censor_var = censor_var,
    covariate_var = covariate_var, strat1_var = strat1_var, strat2_var = strat2_var,
    linetypes = c(2, 1))

Display tables with Cox regression results

### Display results from Cox regression as table on the KM plot

data <- bdata

tte_var <- "PFS"
censor_var <- "PFS_Event"
biomarker_var <- "GeneA_cat2"


x <- wrapper_cox_regression_biomarker(data, tte_var = tte_var, censor_var = censor_var,
    biomarker_vars = biomarker_var, print_total = FALSE, print_adjpvalues = FALSE)


tb <- boutput(x)[, -c(1)]

tb <- tibble::tibble(x = 1, y = 1, label = list(tb))


ggp <- wrapper_KM_plot_core(data = data, tte_var = tte_var, censor_var = censor_var,
    covariate_var = biomarker_var)

ggp + ggpp::geom_table_npc(data = tb, aes(npcx = x, npcy = y, label = label), size = 3,
    hjust = 1, vjust = 1, table.theme = ggpp::ttheme_gtminimal)

### Display results from Cox regression as table on the KM plot using one-level
### stratification

data <- bdata

tte_var <- "PFS"
censor_var <- "PFS_Event"
biomarker_var <- "GeneA_cat2"
treatment_var = "Treatment_Arm"


x <- wrapper_cox_regression_biomarker(data, tte_var = tte_var, censor_var = censor_var,
    biomarker_vars = biomarker_var, treatment_var = treatment_var, print_total = FALSE,
    print_adjpvalues = FALSE)

### To have the same level order

bresults(x)[[treatment_var]] <- factor(bresults(x)[[treatment_var]], levels = levels(data[,
    treatment_var]))


tb <- boutput(x)[, -c(1, 2)]

tb <- split(tb, bresults(x)[[treatment_var]])

tb <- tibble::tibble(x = seq_len(nlevels(data[, treatment_var]))/nlevels(data[, treatment_var]),
    y = 1, label = tb)


ggp <- wrapper_KM_plot_core_strat(data = data, tte_var = tte_var, censor_var = censor_var,
    covariate_var = biomarker_var, strat1_var = treatment_var)

ggp + ggpp::geom_table_npc(data = tb, aes(npcx = x, npcy = y, label = label), size = 3,
    hjust = 1, vjust = 1, table.theme = ggpp::ttheme_gtminimal)

### Display results from Cox regression as table on the KM plot using two-level
### stratification

data <- bdata

tte_var <- "PFS"
censor_var <- "PFS_Event"
biomarker_var <- "GeneA_cat2"
treatment_var = "Treatment_Arm"

strat2_var <- "Cell_Of_Origin"


x <- wrapper_cox_regression_biomarker(data, tte_var = tte_var, censor_var = censor_var,
    biomarker_vars = biomarker_var, treatment_var = treatment_var, strat2_var = strat2_var,
    print_total = FALSE, print_adjpvalues = FALSE)

### To have the same level order

bresults(x)[[treatment_var]] <- factor(bresults(x)[[treatment_var]], levels = levels(data[,
    treatment_var]))
bresults(x)[[strat2_var]] <- factor(bresults(x)[[strat2_var]], levels = levels(data[,
    strat2_var]))


tb <- boutput(x)[, -c(1, 2, 3)]

tb <- split(tb, list(bresults(x)[[treatment_var]], bresults(x)[[strat2_var]]))

tb <- tibble::tibble(x = rep(seq_len(nlevels(data[, treatment_var]))/nlevels(data[,
    treatment_var]), times = nlevels(data[, strat2_var])), y = rep(rev(seq_len(nlevels(data[,
    strat2_var]))/nlevels(data[, strat2_var])), each = nlevels(data[, treatment_var])),
    label = tb)


ggp <- wrapper_KM_plot_core_strat(data = data, tte_var = tte_var, censor_var = censor_var,
    covariate_var = biomarker_var, strat1_var = treatment_var, strat2_var = strat2_var,
    strat1_label_both = FALSE, strat2_label_both = FALSE)

ggp + ggpp::geom_table_npc(data = tb, aes(npcx = x, npcy = y, label = label), size = 3,
    hjust = 1, vjust = 1, table.theme = ggpp::ttheme_gtminimal)

Curves per treatment arm and biomarker subgroups in one plot

data <- bdata
tte_var <- "PFS"
censor_var <- "PFS_Event"

biomarker_var <- "GeneA_cat2"
treatment_var = "Treatment_Arm"


wrapper_KM_plot_interaction(data, tte_var = tte_var, censor_var = censor_var, biomarker_var = biomarker_var,
    treatment_var = treatment_var)

wrapper_KM_plot_interaction(data, tte_var = tte_var, censor_var = censor_var, biomarker_var = biomarker_var,
    treatment_var = treatment_var, colors = c("blue", "blue2", "red", "red2"), linetypes = c(2,
        1, 2, 1), line_size = 0.5)

Biomarker effect per treatment arm

data <- bdata
tte_var <- "PFS"
censor_var <- "PFS_Event"

biomarker_var <- "GeneA_cat2"
treatment_var = "Treatment_Arm"


wrapper_KM_plot_biomarker(data, tte_var = tte_var, censor_var = censor_var, biomarker_var = biomarker_var,
    treatment_var = treatment_var)

Treatment effect per biomarker subgroup

data <- bdata
tte_var <- "PFS"
censor_var <- "PFS_Event"

biomarker_var <- "GeneA_cat2"
treatment_var = "Treatment_Arm"


wrapper_KM_plot_treatment(data, tte_var = tte_var, censor_var = censor_var, biomarker_var = biomarker_var,
    treatment_var = treatment_var)

Logistic regression

### Biomarker effect

data <- bdata
response_var <- "ORR"

biomarker_vars <- c("GeneA_cat2", "GeneA")
treatment_var = "Treatment_Arm"
adjustment_vars <- "IPI"


wrapper_logistic_regression_biomarker(data, response_var = response_var, biomarker_vars = biomarker_vars,
    adjustment_vars = adjustment_vars, treatment_var = treatment_var)
Biomarker effect on ORR. Adjusted analysis. Logistic regression model includes the biomarker and IPI.
Treatment Arm Biomarker Subgroup Total N N NR R R 95% CI OR OR 95% CI P-value Adj. P-value
CTRL GeneA cat2 low 204 104 14 (13.5%) 90 (86.5%) (78.45 - 92.44)
high 204 100 17 (17.0%) 83 (83.0%) (74.18 - 89.77) 0.83 (0.38 - 1.83) 0.6467 0.6467
TRT GeneA cat2 low 196 96 18 (18.8%) 78 (81.2%) (72.00 - 88.49)
high 196 100 15 (15.0%) 85 (85.0%) (76.47 - 91.35) 1.32 (0.62 - 2.84) 0.4706 0.6275
CTRL GeneA 204 0.86 (0.60 - 1.25) 0.4412 0.6275
TRT GeneA 196 1.21 (0.80 - 1.83) 0.3735 0.6275
### Treatment effect within biomarker subgroups

data <- bdata
response_var <- "ORR"

treatment_var = "Treatment_Arm"
biomarker_vars <- c("GeneA_cat2", "GeneB_cat2")
adjustment_vars <- "IPI"


wrapper_logistic_regression_treatment(data, response_var = response_var, treatment_var = treatment_var,
    biomarker_vars = biomarker_vars, adjustment_vars = adjustment_vars)
Treatment effect on ORR. Adjusted analysis. Logistic regression model includes the treatment and IPI.
Biomarker Biomarker Subgroup Treatment Subgroup Total N N NR R R 95% CI OR OR 95% CI P-value Adj. P-value
GeneA cat2 low CTRL 200 104 14 (13.5%) 90 (86.5%) (78.45 - 92.44)
TRT 200 96 18 (18.8%) 78 (81.2%) (72.00 - 88.49) 0.67 (0.31 - 1.44) 0.3098 0.6195
GeneA cat2 high CTRL 200 100 17 (17.0%) 83 (83.0%) (74.18 - 89.77)
TRT 200 100 15 (15.0%) 85 (85.0%) (76.47 - 91.35) 1.27 (0.58 - 2.79) 0.5434 0.6679
GeneB cat2 low CTRL 200 96 8 (8.3%) 88 (91.7%) (84.24 - 96.33)
TRT 200 104 15 (14.4%) 89 (85.6%) (77.33 - 91.70) 0.57 (0.22 - 1.42) 0.2261 0.6195
GeneB cat2 high CTRL 200 108 23 (21.3%) 85 (78.7%) (69.78 - 86.00)
TRT 200 92 18 (19.6%) 74 (80.4%) (70.85 - 87.97) 1.17 (0.58 - 2.36) 0.6679 0.6679
### Treatment-biomarker interaction effect


data <- bdata
response_var <- "ORR"

treatment_var = "Treatment_Arm"
biomarker_vars <- c("GeneA_cat2", "GeneA")
adjustment_vars <- "IPI"



wrapper_logistic_regression_interaction(data, response_var = response_var, treatment_var = treatment_var,
    biomarker_vars = biomarker_vars, adjustment_vars = adjustment_vars)
Effect of interaction between biomarker and treatment on ORR.Adjusted analysis. Logistic regression model includes the biomarker, treatment and IPI.
Biomarker Biomarker Effect Treatment Effect Total n OR OR 95% CI P-value Adj. P-value
GeneA cat2 high vs low TRT vs CTRL 400 1.83 (0.62 - 5.39) 0.2766 0.2766
GeneA TRT vs CTRL 400 1.42 (0.82 - 2.48) 0.2113 0.2766

Pearson’s test

### Biomarker effect

data <- bdata
response_var <- "ORR"

biomarker_vars <- c("GeneA_cat2")
treatment_var = "Treatment_Arm"

wrapper_pearsons_test_biomarker(data, response_var = response_var, biomarker_vars = biomarker_vars,
    treatment_var = treatment_var)
Biomarker effect on ORR. Unstratified Pearson’s Chi-squared test.
Treatment Arm Biomarker Subgroup Total N N NR R R 95% CI Difference Difference 95% CI P-value Adj. P-value
CTRL GeneA cat2 low 204 104 14 (13.5%) 90 (86.5%) (78.45 - 92.44) -3.54 (-14.38 - 7.30) 0.6110 0.6110
high 204 100 17 (17.0%) 83 (83.0%) (74.18 - 89.77)
TRT GeneA cat2 low 196 96 18 (18.8%) 78 (81.2%) (72.00 - 88.49) 3.75 (-7.76 - 15.26) 0.6097 0.6110
high 196 100 15 (15.0%) 85 (85.0%) (76.47 - 91.35)
### Treatment effect within biomarker subgroups


data <- bdata
response_var <- "ORR"

biomarker_vars <- c("GeneA_cat2")
treatment_var = "Treatment_Arm"

wrapper_pearsons_test_treatment(data, response_var = response_var, biomarker_vars = biomarker_vars,
    treatment_var = treatment_var)
Treatment effect on ORR. Unstratified Pearson’s Chi-squared test.
Biomarker Biomarker Subgroup Treatment Subgroup Total N N NR R R 95% CI Difference Difference 95% CI P-value Adj. P-value
GeneA cat2 low CTRL 200 104 14 (13.5%) 90 (86.5%) (78.45 - 92.44) -5.29 (-16.49 - 5.91) 0.4087 0.8174
TRT 200 96 18 (18.8%) 78 (81.2%) (72.00 - 88.49)
GeneA cat2 high CTRL 200 100 17 (17.0%) 83 (83.0%) (74.18 - 89.77) 2.00 (-9.16 - 13.16) 0.8471 0.8471
TRT 200 100 15 (15.0%) 85 (85.0%) (76.47 - 91.35)

Cochran-Mantel-Haenszel Chi-Squared Test

### Biomarker effect

data <- bdata
response_var <- "ORR"

biomarker_vars <- c("GeneA_cat2")
treatment_var = "Treatment_Arm"
strata_vars <- "IPI"


wrapper_pearsons_test_biomarker(data, response_var = response_var, biomarker_vars = biomarker_vars,
    treatment_var = treatment_var, strata_vars = strata_vars)
Biomarker effect on ORR. Stratified Cochran-Mantel-Haenszel Chi-squared test. Stratification factors: IPI.
Treatment Arm Biomarker Subgroup Total N N NR R R 95% CI Difference Difference 95% CI P-value Adj. P-value
CTRL GeneA cat2 low 204 104 14 (13.5%) 90 (86.5%) (78.45 - 92.44) -3.54 (-14.38 - 7.30) 0.7990 0.7990
high 204 100 17 (17.0%) 83 (83.0%) (74.18 - 89.77)
TRT GeneA cat2 low 196 96 18 (18.8%) 78 (81.2%) (72.00 - 88.49) 3.75 (-7.76 - 15.26) 0.6010 0.7990
high 196 100 15 (15.0%) 85 (85.0%) (76.47 - 91.35)
### Treatment effect within biomarker subgroups


data <- bdata
response_var <- "ORR"

biomarker_vars <- c("GeneA_cat2")
treatment_var = "Treatment_Arm"
strata_vars <- "IPI"


wrapper_pearsons_test_treatment(data, response_var = response_var, biomarker_vars = biomarker_vars,
    treatment_var = treatment_var, strata_vars = strata_vars)
Treatment effect on ORR. Stratified Cochran-Mantel-Haenszel Chi-squared test. Stratification factors: IPI.
Biomarker Biomarker Subgroup Treatment Subgroup Total N N NR R R 95% CI Difference Difference 95% CI P-value Adj. P-value
GeneA cat2 low CTRL 200 104 14 (13.5%) 90 (86.5%) (78.45 - 92.44) -5.29 (-16.49 - 5.91) 0.4133 0.6850
TRT 200 96 18 (18.8%) 78 (81.2%) (72.00 - 88.49)
GeneA cat2 high CTRL 200 100 17 (17.0%) 83 (83.0%) (74.18 - 89.77) 2.00 (-9.16 - 13.16) 0.6850 0.6850
TRT 200 100 15 (15.0%) 85 (85.0%) (76.47 - 91.35)

Barplot response

### Biomarker effect

data <- bdata
response_var <- "ORR"

biomarker_var <- "GeneA_cat2"
treatment_var = "Treatment_Arm"


wrapper_bar_plot_biomarker(data, response_var = response_var, biomarker_var = biomarker_var,
    treatment_var = treatment_var, variable_names = variable_names, less_legends = TRUE,
    ylab = "ORR Proportion (%)", strat1_label_both = FALSE, legend_colors_title = FALSE)

### Treatment effect

data <- bdata
response_var <- "ORR"

biomarker_var <- "GeneA_cat2"
treatment_var = "Treatment_Arm"


wrapper_bar_plot_treatment(data, response_var = response_var, biomarker_var = biomarker_var,
    treatment_var = treatment_var, variable_names = variable_names, less_legends = TRUE,
    ylab = "R Proportion (%)", strat1_label_both = FALSE, legend_colors_title = FALSE,
    skip_levels = c("NR"), legend_position = "none")

Barplot

data <- bdata

x_var = "GeneA_cat2"
y_var = "Response"


wrapper_bar_plot_core(data = data, x_var = x_var, y_var = y_var)

wrapper_bar_plot_core(data = data, x_var = x_var, y_var = y_var, y_type = "Count")

wrapper_bar_plot_core(data = data, x_var = y_var, y_var = x_var, y_type = "Proportion")

wrapper_bar_plot_core(data = data, x_var = y_var, y_var = x_var, y_type = "Count")

data <- bdata

x_var = "GeneA_cat2"
y_var = "Response"
skip_levels <- c("NE", "PD", "SD")
show_subtotal_proportions <- TRUE

wrapper_bar_plot_core(data = data, x_var = x_var, y_var = y_var, show_subtotal_proportions = show_subtotal_proportions,
    skip_levels = skip_levels, ylim = c(0, 100))

Stratified

data <- bdata

x_var = "GeneA_cat2"
y_var = "Response"
facet_var <- "Treatment_Arm"


wrapper_bar_plot_core(data = data, x_var = x_var, y_var = y_var, facet_var = facet_var)

data <- bdata

x_var = "GeneA_cat2"
y_var = "Response"

strat1_var = "Treatment_Arm"
strat2_var = "Cell_Of_Origin"


wrapper_bar_plot_core_strat(data = data, x_var = x_var, y_var = y_var, strat1_var = strat1_var,
    strat2_var = strat2_var)

wrapper_bar_plot_core_strat(data = data, x_var = x_var, y_var = y_var, strat1_var = strat1_var,
    strat2_var = strat2_var, less_legends = TRUE)

Complex barplot

data <- bdata

x_var <- "Cell_Of_Origin"
y_vars <- c("GeneA_cat2", "GeneB_cat2", "GeneC_cat2")


wrapper_bar_plot_yvars_core_strat(data, x_var = x_var, y_vars = y_vars, values_to = "Gene expression")

data <- bdata

x_var <- "Cell_Of_Origin"
y_vars <- c("GeneA_cat2", "GeneB_cat2", "GeneC_cat2")

skip_levels <- "low"
method <- "dodge"


wrapper_bar_plot_yvars_core_strat(data, x_var = x_var, y_vars = y_vars, skip_levels = skip_levels,
    method = method, values_to = "Gene expression", names_to = "Gene")

Fisher’s test

data <- bdata

col_var <- "Cell_Of_Origin"
row_vars <- c("GeneA_cat2", "GeneB_cat2", "GeneC_cat2")


wrapper_fishers_test(data, col_var = col_var, row_vars = row_vars)
Fisher’s exact test.
Cell Of Origin
Covariate Subgroup GCB UNCLASSIFIED ABC P-value Adj. P-value
GeneA cat2 low 116 (59.5%) 23 (11.8%) 56 (28.7%) 0.0810 . 0.0810 .
high 102 (52.0%) 39 (19.9%) 55 (28.1%)
GeneB cat2 low 124 (62.9%) 27 (13.7%) 46 (23.4%) 0.0156 * 0.0468 *
high 94 (48.5%) 35 (18.0%) 65 (33.5%)
GeneC cat2 low 109 (56.5%) 23 (11.9%) 61 (31.6%) 0.0782 . 0.0810 .
high 109 (55.1%) 39 (19.7%) 50 (25.3%)
wrapper_fishers_test(data, col_var = col_var, row_vars = row_vars, margin = 2)
Fisher’s exact test.
Cell Of Origin
Covariate Subgroup GCB UNCLASSIFIED ABC P-value Adj. P-value
GeneA cat2 low 116 (53.2%) 23 (37.1%) 56 (50.5%) 0.0810 . 0.0810 .
high 102 (46.8%) 39 (62.9%) 55 (49.5%)
GeneB cat2 low 124 (56.9%) 27 (43.5%) 46 (41.4%) 0.0156 * 0.0468 *
high 94 (43.1%) 35 (56.5%) 65 (58.6%)
GeneC cat2 low 109 (50.0%) 23 (37.1%) 61 (55.0%) 0.0782 . 0.0810 .
high 109 (50.0%) 39 (62.9%) 50 (45.0%)

Box plot

data <- bdata

x_var <- "Cell_Of_Origin"
y_var <- "GeneA"


wrapper_box_plot_core(data = data, x_var = x_var, y_var = y_var)

Stratified

data <- bdata

x_var <- "Cell_Of_Origin"
y_var <- "GeneA"
facet_var <- "Treatment_Arm"
dodge_var = NULL


wrapper_box_plot_core(data = data, x_var = x_var, y_var = y_var, facet_var = facet_var,
    dodge_var = dodge_var)

data <- bdata

x_var <- "Treatment_Arm"
y_var <- "GeneA"
facet_var <- NULL
dodge_var = "Cell_Of_Origin"


wrapper_box_plot_core(data = data, x_var = x_var, y_var = y_var, facet_var = facet_var,
    dodge_var = dodge_var)

data <- bdata

x_var <- "Cell_Of_Origin"
y_var <- "GeneA"


strat1_var = "Treatment_Arm"


wrapper_box_plot_core_strat(data = data, x_var = x_var, y_var = y_var, strat1_var = strat1_var)

Complex boxplot

data <- bdata
y_vars <- c("GeneA", "GeneB", "GeneC")


wrapper_box_plot_yvars_core_strat(data, y_vars = y_vars)

data <- bdata
y_vars <- c("GeneA", "GeneB", "GeneC")

x_var = "Cell_Of_Origin"
facet_var = NULL
dodge_var = NULL


wrapper_box_plot_yvars_core_strat(data, y_vars = y_vars, x_var = x_var, facet_var = facet_var,
    dodge_var = dodge_var)

data <- bdata
y_vars <- c("GeneA", "GeneB", "GeneC")

x_var = NULL
facet_var = NULL
dodge_var = "Cell_Of_Origin"


wrapper_box_plot_yvars_core_strat(data, y_vars = y_vars, x_var = x_var, facet_var = facet_var,
    dodge_var = dodge_var)

Kruskal–Wallis H test

data <- bdata

cat_vars <- "Cell_Of_Origin"
num_vars <- c("GeneA", "GeneB")


wrapper_kruskal_test(data, num_vars = num_vars, cat_vars = cat_vars)
Kruskal-Wallis H test.
Cell Of Origin
Covariate Statistic GCB UNCLASSIFIED ABC P-value Adj. P-value
GeneA N 218 62 111 0.0265 * 0.0440 *
Median 7.95 8.53 7.98
GeneB N 218 62 111 0.0440 * 0.0440 *
Median 7.92 8.10 8.12
wrapper_kruskal_test(data, num_vars = num_vars, cat_vars = cat_vars, print_pvalues = FALSE)
Kruskal-Wallis H test.
Cell Of Origin
Covariate Statistic GCB UNCLASSIFIED ABC
GeneA N 218 62 111
Median 7.95 8.53 7.98
GeneB N 218 62 111
Median 7.92 8.10 8.12
data <- bdata

cat_vars <- c("Cell_Of_Origin", "IPI", "Treatment_Arm")
num_vars <- "GeneA"


wrapper_kruskal_test(data, num_vars = num_vars, cat_vars = cat_vars)
Kruskal-Wallis H test.
GeneA
Covariate Subgroup N Median Difference P-value Adj. P-value
Cell Of Origin GCB 218 7.95 NA 0.0265 * 0.0794 .
UNCLASSIFIED 62 8.53
ABC 111 7.98
IPI Low 79 7.87 NA 0.0867 . 0.1301
Low-Intermediate 147 7.93
High-Intermediate 108 8.29
High 66 7.90
Treatment Arm CTRL 204 7.98 0.06 0.9043 0.9043
TRT 196 8.03

Point plot

data <- bdata

x_var <- "GeneA"
y_var <- "GeneB"


wrapper_point_plot_core(data = data, x_var = x_var, y_var = y_var)

Stratified

data <- bdata

x_var <- "GeneA"
y_var <- "GeneB"
facet_var <- "Treatment_Arm"
color_point_var <- "Cell_Of_Origin"


wrapper_point_plot_core(data = data, x_var = x_var, y_var = y_var, facet_var = facet_var,
    color_point_var = color_point_var)

data <- bdata

x_var <- "GeneD"
y_var <- "GeneB"
facet_var <- "Treatment_Arm"
strat1_var <- "Cell_Of_Origin"


wrapper_point_plot_core_strat(data = data, x_var = x_var, y_var = y_var, facet_var = facet_var,
    strat1_var = strat1_var)

Characteristics of ITT and BEP

data <- bdata

strat_var <- "Treatment_Arm"
covariate_vars <- c("Age", "IPI")


wrapper_characteristics_core(data, strat_var = strat_var, covariate_vars = covariate_vars)
Treatment Arm
Covariate CTRL TRT
Age
N 252 248
NAs 0 0
Median 63.00 63.00
Mean 60.73 61.05
IPI
N 252 248
NAs 0 0
Low 52 (20.63%) 48 (19.35%)
Low-Intermediate 95 (37.70%) 88 (35.48%)
High-Intermediate 71 (28.17%) 64 (25.81%)
High 34 (13.49%) 48 (19.35%)
bep_vars <- "BEP_RNAseq"
covariate_vars <- c("Age", "IPI")

treatment_var <- "Treatment_Arm"
itt_name = "ITT"


wrapper_characteristics_bep(data, covariate_vars = covariate_vars, bep_vars = bep_vars,
    treatment_var = treatment_var, itt_name = itt_name)
Covariate ITT ITT : CTRL ITT : TRT BEP RNAseq BEP RNAseq : CTRL BEP RNAseq : TRT
Age
N 500 252 248 400 204 196
NAs 0 0 0 0 0 0
Median 63.00 63.00 63.00 63.00 63.00 63.00
Mean 60.89 60.73 61.05 61.71 61.50 61.93
IPI
N 500 252 248 400 204 196
NAs 0 0 0 0 0 0
Low 100 (20.00%) 52 (20.63%) 48 (19.35%) 79 (19.75%) 44 (21.57%) 35 (17.86%)
Low-Intermediate 183 (36.60%) 95 (37.70%) 88 (35.48%) 147 (36.75%) 73 (35.78%) 74 (37.76%)
High-Intermediate 135 (27.00%) 71 (28.17%) 64 (25.81%) 108 (27.00%) 59 (28.92%) 49 (25.00%)
High 82 (16.40%) 34 (13.49%) 48 (19.35%) 66 (16.50%) 28 (13.73%) 38 (19.39%)
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-apple-darwin20
## Running under: macOS Sonoma 14.6.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: Europe/Zurich
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] BiomarkerWrappers_0.0.0.60 ggplot2_3.5.1             
## [3] kableExtra_1.4.0           rmarkdown_2.27            
## [5] knitr_1.47                
## 
## loaded via a namespace (and not attached):
##   [1] polynom_1.4-1         gridExtra_2.3         formatR_1.14         
##   [4] rlang_1.1.4           magrittr_2.0.3        clue_0.3-65          
##   [7] GetoptLong_1.0.5      matrixStats_1.3.0     compiler_4.4.1       
##  [10] png_0.1-8             systemfonts_1.1.0     vctrs_0.6.5          
##  [13] stringr_1.5.1         pkgconfig_2.0.3       shape_1.4.6.1        
##  [16] crayon_1.5.3          fastmap_1.2.0         backports_1.5.0      
##  [19] labeling_0.4.3        KMsurv_0.1-5          utf8_1.2.4           
##  [22] markdown_1.13         forestplot_3.1.3      ggbeeswarm_0.7.2     
##  [25] ragg_1.3.2            purrr_1.0.2           xfun_0.45            
##  [28] cachem_1.1.0          jsonlite_1.8.8        highr_0.11           
##  [31] broom_1.0.6           parallel_4.4.1        cluster_2.1.6        
##  [34] R6_2.5.1              bslib_0.7.0           stringi_1.8.4        
##  [37] RColorBrewer_1.1-3    limma_3.60.3          car_3.1-2            
##  [40] jquerylib_0.1.4       Rcpp_1.0.12           iterators_1.0.14     
##  [43] zoo_1.8-12            IRanges_2.38.0        Matrix_1.7-0         
##  [46] splines_4.4.1         tidyselect_1.2.1      rstudioapi_0.16.0    
##  [49] abind_1.4-5           yaml_2.3.8            ggtext_0.1.2         
##  [52] doParallel_1.0.17     codetools_0.2-20      lattice_0.22-6       
##  [55] tibble_3.2.1          plyr_1.8.9            withr_3.0.0          
##  [58] evaluate_0.24.0       desc_1.4.3            survival_3.7-0       
##  [61] ggpp_0.5.8            xml2_1.3.6            circlize_0.4.16      
##  [64] survMisc_0.5.6        pillar_1.9.0          ggpubr_0.6.0         
##  [67] carData_3.0-5         checkmate_2.3.1       foreach_1.5.2        
##  [70] stats4_4.4.1          generics_0.1.3        S4Vectors_0.42.0     
##  [73] commonmark_1.9.1      munsell_0.5.1         scales_1.3.0         
##  [76] xtable_1.8-4          glue_1.7.0            survminer_0.4.9      
##  [79] tools_4.4.1           data.table_1.15.4     ggsignif_0.6.4       
##  [82] fs_1.6.4              cowplot_1.1.3         grid_4.4.1           
##  [85] tidyr_1.3.1           colorspace_2.1-0      beeswarm_0.4.0       
##  [88] vipor_0.4.7           cli_3.6.3             km.ci_0.5-6          
##  [91] textshaping_0.4.0     fansi_1.0.6           viridisLite_0.4.2    
##  [94] svglite_2.1.3         ComplexHeatmap_2.20.0 dplyr_1.1.4          
##  [97] gtable_0.3.5          rstatix_0.7.2         sass_0.4.9           
## [100] digest_0.6.36         BiocGenerics_0.50.0   rjson_0.2.21         
## [103] htmlwidgets_1.6.4     farver_2.1.2          memoise_2.0.1        
## [106] htmltools_0.5.8.1     pkgdown_2.0.9         lifecycle_1.0.4      
## [109] GlobalOptions_0.1.2   statmod_1.5.0         gridtext_0.1.5       
## [112] MASS_7.3-61