camr_combine_flags
A function is proposed for inclusion with no current implementation.
Combine a set of 0/1 columns into one character vector (or factor) with the pasted column names of those columns where a 1 is present. Intended for use with REDCap checkbox fields, but usable for any set of logical vectors.
combine_ckb <-
function (cols, fn_rename=\(x) str_remove(x, '^.*___'), collapse='|', as.factor=FALSE) {
# Use this function within mutate(). cols is a tidyselect specification.
df_subset <- across(cols)
# Function to be applied to each row. Get the names of columns that have a 1.
acc <- \(x) names(df_subset)[which(x == 1)]
# Rename the values if the user does not want to keep the original column names.
if (!is.null(fn_rename))
acc <- compose(fn_rename, acc)
# Combine the values into a single string.
acc <- compose(\(x) paste(x, collapse=collapse), acc)
# Apply the function and return, unless the user wants a factor.
if (isFALSE(as.factor))
return(apply(df_subset, 1, acc))
# The user wants a factor, so define a function to extract the factor levels.
combinedlevels <- function(labels) {
n <- length(labels)
l <- (2^n) -1
m <- apply(matrix(1:l, l), 1, \(x) as.integer(intToBits(x)))[1:n,]
f <- apply(m, 2, \(x) paste(labels[!!x], collapse=collapse))
f[order(colSums(m))]
}
# Extract all the possible combinations to produce levels for the factor.
fl <- combinedlevels(fn_rename(names(df_subset)))
# Apply the function and convert to factor.
factor(apply(df_subset, 1, acc), fl)
}
Example:
# Given a REDCap Dataframe with Checkbox Fields...
# record_id redcap_event_name sex age race___1 race___2 race___3
# <int> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 1 baseline_arm_1 0 30 1 0 1
# 2 2 baseline_arm_1 1 25 0 0 1
# 3 3 baseline_arm_1 1 31 1 1 0
# 4 4 baseline_arm_1 0 21 1 1 1
proc_race <- \(x) recode(x, race___1='White', race___2='Black', race___3='Asian')
data |>
mutate(
codes = combine_ckb(starts_with('race_')),
labels = combine_ckb(starts_with('race_'), proc_race, collapse=', ', as.factor=TRUE)
)
# record_id redcap_event_name sex age race___1 race___2 race___3 codes labels
# <int> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <fct>
# 1 1 baseline_arm_1 0 30 1 0 1 1|3 White, Asian
# 2 2 baseline_arm_1 1 25 0 0 1 3 Asian
# 3 3 baseline_arm_1 1 31 1 1 0 1|2 White, Black
# 4 4 baseline_arm_1 0 21 1 1 1 1|2|3 White, Black, Asian