Module 4 Solution — Date & Text Handling

✅ Module 4 — Date & Text Handling (Solution)

Integrating R4DS Chapters 14 (Strings), 15 (Regular Expressions), 16 (Factors)

📦 Setup and Libraries

library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(tibble)
library(lubridate)

Attaching package: 'lubridate'
The following objects are masked from 'package:base':

    date, intersect, setdiff, union
library(stringr)    # R4DS Chapter 14: Strings
library(forcats)    # R4DS Chapter 16: Factors

✅ Exercise 1 Solution: Date Parsing Challenge

# Create the dataset
ae_raw <- tibble(
  USUBJID = c("001-001", "001-002", "001-003", "001-004", "001-005", "001-006"),
  AEDECOD = c("HEADACHE", "NAUSEA", "FATIGUE", "DIZZINESS", "RASH", "COUGH"),
  AESTDTC_MESSY = c("2024-01-20", "25/01/2024", "01/18/2024", "20240122", "2024/01/25", "Jan 26, 2024"),
  RFSTDTC = rep("2024-01-15", 6)
)

# Solution: Parse dates and calculate study days
ae_dates <- ae_raw %>%
  mutate(
    # Parse AESTDTC_MESSY using different lubridate functions
    AESTDT = case_when(
      str_detect(AESTDTC_MESSY, "^\\d{4}-\\d{2}-\\d{2}$") ~ ymd(AESTDTC_MESSY),
      str_detect(AESTDTC_MESSY, "^\\d{2}/\\d{2}/\\d{4}$") ~ dmy(AESTDTC_MESSY),
      str_detect(AESTDTC_MESSY, "^\\d{2}/\\d{2}/\\d{4}$") ~ mdy(AESTDTC_MESSY),
      str_detect(AESTDTC_MESSY, "^\\d{8}$") ~ ymd(AESTDTC_MESSY),
      str_detect(AESTDTC_MESSY, "^[A-Za-z]{3}\\s+\\d{2},\\s+\\d{4}$") ~ mdy(AESTDTC_MESSY),
      TRUE ~ as.Date(NA)
    ),
    
    # Parse RFSTDTC
    RFSTDT = ymd(RFSTDTC),
    
    # Calculate study day
    AESTDY = as.numeric(AESTDT - RFSTDT) + 1
  )
Warning: There were 5 warnings in `mutate()`.
The first warning was:
ℹ In argument: `AESTDT = case_when(...)`.
Caused by warning:
!  3 failed to parse.
ℹ Run `dplyr::last_dplyr_warnings()` to see the 4 remaining warnings.
print("✅ Parsed dates and study days:")
[1] "✅ Parsed dates and study days:"
print(ae_dates)
# A tibble: 6 × 7
  USUBJID AEDECOD   AESTDTC_MESSY RFSTDTC    AESTDT     RFSTDT     AESTDY
  <chr>   <chr>     <chr>         <chr>      <date>     <date>      <dbl>
1 001-001 HEADACHE  2024-01-20    2024-01-15 2024-01-20 2024-01-15      6
2 001-002 NAUSEA    25/01/2024    2024-01-15 2024-01-25 2024-01-15     11
3 001-003 FATIGUE   01/18/2024    2024-01-15 NA         2024-01-15     NA
4 001-004 DIZZINESS 20240122      2024-01-15 2024-01-22 2024-01-15      8
5 001-005 RASH      2024/01/25    2024-01-15 NA         2024-01-15     NA
6 001-006 COUGH     Jan 26, 2024  2024-01-15 2024-01-26 2024-01-15     12

✅ Exercise 2 Solution: Study Day Categories

# Add study day categories
ae_with_categories <- ae_dates %>%
  mutate(
    # Create study day categories
    STUDYDAY_PERIOD = case_when(
      is.na(AESTDY) ~ "Unknown",
      AESTDY <= 0 ~ "Pre-treatment",
      AESTDY <= 7 ~ "Week 1",
      AESTDY <= 14 ~ "Week 2",
      AESTDY <= 28 ~ "Month 1",
      TRUE ~ "After Month 1"
    ),
    
    # Create early AE flag (within first 7 days)
    EARLY_AE = ifelse(AESTDY >= 1 & AESTDY <= 7, "Y", "N")
  )

print("✅ With study day categories:")
[1] "✅ With study day categories:"
print(ae_with_categories)
# A tibble: 6 × 9
  USUBJID AEDECOD   AESTDTC_MESSY RFSTDTC    AESTDT     RFSTDT     AESTDY
  <chr>   <chr>     <chr>         <chr>      <date>     <date>      <dbl>
1 001-001 HEADACHE  2024-01-20    2024-01-15 2024-01-20 2024-01-15      6
2 001-002 NAUSEA    25/01/2024    2024-01-15 2024-01-25 2024-01-15     11
3 001-003 FATIGUE   01/18/2024    2024-01-15 NA         2024-01-15     NA
4 001-004 DIZZINESS 20240122      2024-01-15 2024-01-22 2024-01-15      8
5 001-005 RASH      2024/01/25    2024-01-15 NA         2024-01-15     NA
6 001-006 COUGH     Jan 26, 2024  2024-01-15 2024-01-26 2024-01-15     12
# ℹ 2 more variables: STUDYDAY_PERIOD <chr>, EARLY_AE <chr>

✅ Exercise 3 Solution: String Cleaning Challenge (R4DS Chapter 14)

# Create messy adverse event terms
ae_messy_text <- tibble(
  USUBJID = c("001-001", "001-002", "001-003", "001-004", "001-005"),
  AEDECOD_RAW = c(
    "  mild headache  ",
    "SEVERE nausea (grade 3)",
    "fatigue - moderate",
    "  DIZZINESS mild  ",
    "skin rash (MODERATE)"
  ),
  MEDICATION = c("Ibuprofen 400mg", "ondansetron 8 MG", "caffeine 200mg", "rest", "hydrocortisone 1%"),
  TREATMENT_ARM = c("Active", "Placebo", "Active", "Active", "Placebo"),
  SEVERITY = c("Mild", "Moderate", "Severe", "Mild", "Moderate")
)

# Solution: Clean and extract information
ae_cleaned <- ae_messy_text %>%
  mutate(
    # Clean adverse event terms
    AEDECOD_CLEAN = AEDECOD_RAW %>%
      str_trim() %>%                                    # Remove leading/trailing spaces
      str_to_upper() %>%                               # Convert to uppercase  
      str_replace_all("\\([^)]*\\)", "") %>%           # Remove parentheses and contents
      str_replace_all(" - ", " ") %>%                  # Remove dashes
      str_replace_all("\\s+", " ") %>%                 # Replace multiple spaces with single
      str_trim(),                                      # Final trim
    
    # Extract severity
    SEVERITY = case_when(
      str_detect(AEDECOD_RAW, "(?i)mild") ~ "MILD",
      str_detect(AEDECOD_RAW, "(?i)moderate") ~ "MODERATE",
      str_detect(AEDECOD_RAW, "(?i)severe") ~ "SEVERE",
      TRUE ~ "UNKNOWN"
    ),
    
    # Extract base term (remove severity words)
    AETERM_BASE = AEDECOD_CLEAN %>%
      str_replace_all("^(MILD|MODERATE|SEVERE)\\s+", "") %>%  # Remove severity at start
      str_replace_all("\\s+(MILD|MODERATE|SEVERE)$", ""),     # Remove severity at end
    
    # Create specific AE flags
    HEADACHE_FLAG = ifelse(str_detect(AETERM_BASE, "HEADACHE"), "Y", "N"),
    NAUSEA_FLAG = ifelse(str_detect(AETERM_BASE, "NAUSEA"), "Y", "N"),
    FATIGUE_FLAG = ifelse(str_detect(AETERM_BASE, "FATIGUE"), "Y", "N"),
    
    # Extract numeric dose from medication
    DOSE_NUMERIC = as.numeric(str_extract(MEDICATION, "\\d+")),
    
    # Clean medication names
    MED_CLEAN = MEDICATION %>%
      str_to_upper() %>%
      str_replace_all("\\d+\\s?[A-Z%]+", "") %>%      # Remove dose information
      str_trim()
  )

print("✅ Cleaned text data:")
[1] "✅ Cleaned text data:"
print(ae_cleaned)
# A tibble: 5 × 12
  USUBJID AEDECOD_RAW            MEDICATION TREATMENT_ARM SEVERITY AEDECOD_CLEAN
  <chr>   <chr>                  <chr>      <chr>         <chr>    <chr>
1 001-001 "  mild headache  "    Ibuprofen… Active        MILD     MILD HEADACHE
2 001-002 "SEVERE nausea (grade… ondansetr… Placebo       SEVERE   SEVERE NAUSEA
3 001-003 "fatigue - moderate"   caffeine … Active        MODERATE FATIGUE MODE…
4 001-004 "  DIZZINESS mild  "   rest       Active        MILD     DIZZINESS MI…
5 001-005 "skin rash (MODERATE)" hydrocort… Placebo       MODERATE SKIN RASH
# ℹ 6 more variables: AETERM_BASE <chr>, HEADACHE_FLAG <chr>,
#   NAUSEA_FLAG <chr>, FATIGUE_FLAG <chr>, DOSE_NUMERIC <dbl>, MED_CLEAN <chr>

✅ Exercise 4 Solution: Regular Expressions Practice (R4DS Chapter 15)

# Create clinical data with patterns to validate
clinical_data <- tibble(
  USUBJID = c("001-001", "002-001", "001-ABC", "999-123", "001-99"),
  PHONE = c("(555) 123-4567", "555-123-4567", "555.123.4567", "5551234567", "invalid-phone"),
  EMAIL = c("investigator@clinic.com", "bad.email", "test@site.org", "missing@", "@incomplete.com"),
  LAB_RESULT = c("WBC: 7.2 K/uL", "Hemoglobin: 12.5 g/dL", "Glucose: 95 mg/dL", "Invalid result", ""),
  VISIT_DATE = c("2024-01-15", "2024/01/20", "15-Jan-2024", "invalid-date", "2024-13-45")
)

# Solution: Use regular expressions for validation
clinical_validated <- clinical_data %>%
  mutate(
    # Validate subject ID format (###-###)
    VALID_SUBJID = str_detect(USUBJID, "^\\d{3}-\\d{3}$"),
    
    # Validate phone number formats
    VALID_PHONE = str_detect(PHONE, "^(\\(\\d{3}\\)|\\d{3})[-.\\s]?\\d{3}[-.\\s]?\\d{4}$"),
    
    # Validate email addresses  
    VALID_EMAIL = str_detect(EMAIL, "^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$"),
    
    # Extract numeric values from lab results
    LAB_VALUE = as.numeric(str_extract(LAB_RESULT, "\\d+\\.?\\d*")),
    
    # Extract lab test name (before colon)
    LAB_TEST = str_extract(LAB_RESULT, "^[^:]+"),
    
    # Validate date formats (YYYY-MM-DD only)
    VALID_DATE_FORMAT = str_detect(VISIT_DATE, "^\\d{4}-\\d{2}-\\d{2}$"),
    
    # Extract year from valid dates
    VISIT_YEAR = str_extract(VISIT_DATE, "^\\d{4}")
  )

print("✅ Regular expression validation results:")
[1] "✅ Regular expression validation results:"
print(clinical_validated)
# A tibble: 5 × 12
  USUBJID PHONE EMAIL LAB_RESULT VISIT_DATE VALID_SUBJID VALID_PHONE VALID_EMAIL
  <chr>   <chr> <chr> <chr>      <chr>      <lgl>        <lgl>       <lgl>
1 001-001 (555… inve… "WBC: 7.2… 2024-01-15 TRUE         TRUE        TRUE
2 002-001 555-… bad.… "Hemoglob… 2024/01/20 TRUE         TRUE        FALSE
3 001-ABC 555.… test… "Glucose:… 15-Jan-20… FALSE        TRUE        TRUE
4 999-123 5551… miss… "Invalid … invalid-d… TRUE         TRUE        FALSE
5 001-99  inva… @inc… ""         2024-13-45 FALSE        FALSE       FALSE
# ℹ 4 more variables: LAB_VALUE <dbl>, LAB_TEST <chr>, VALID_DATE_FORMAT <lgl>,
#   VISIT_YEAR <chr>

✅ Exercise 5 Solution: Factor Management Practice (R4DS Chapter 16)

# Create clinical factors data
clinical_factors <- tibble(
  USUBJID = paste0("00", 1:8, "-", sprintf("%03d", 1:8)),
  SEVERITY = c("Mild", "Severe", "Moderate", "Mild", "Severe", "Moderate", "Mild", "Moderate"),
  TREATMENT = c("Placebo", "Low Dose", "High Dose", "Placebo", "Low Dose", "High Dose", "Placebo", "Low Dose"),
  OUTCOME = c("Recovered", "Ongoing", "Recovered", "Worsened", "Recovered", "Ongoing", "Recovered", "Ongoing"),
  VISIT_NAME = c("Screening", "Day 1", "Week 2", "Month 1", "Week 4", "End of Study", "Follow-up", "Unscheduled")
)

# Solution: Convert to appropriate factors
clinical_factors_clean <- clinical_factors %>%
  mutate(
    # Create ordered severity factor
    SEVERITY_FACTOR = factor(SEVERITY, 
                           levels = c("Mild", "Moderate", "Severe"),
                           ordered = TRUE),
    
    # Create treatment factor with meaningful labels
    TREATMENT_FACTOR = factor(TREATMENT,
                            levels = c("Placebo", "Low Dose", "High Dose"),
                            labels = c("Placebo", "Low Dose (5mg)", "High Dose (10mg)")),
    
    # Create outcome factor
    OUTCOME_FACTOR = factor(OUTCOME),
    
    # Create visit factor with logical ordering
    VISIT_FACTOR = factor(VISIT_NAME,
                        levels = c("Screening", "Day 1", "Week 2", "Week 4", 
                                 "Month 1", "End of Study", "Follow-up", "Unscheduled")),
    
    # Create risk category based on severity and treatment
    RISK_CATEGORY = case_when(
      SEVERITY == "Severe" & TREATMENT == "High Dose" ~ "High",
      SEVERITY == "Moderate" | (SEVERITY == "Severe" & TREATMENT != "High Dose") ~ "Medium",
      SEVERITY == "Mild" ~ "Low",
      TRUE ~ "Medium"
    ) %>% factor(levels = c("Low", "Medium", "High"), ordered = TRUE)
  )

print("✅ Factor management results:")
[1] "✅ Factor management results:"
print(clinical_factors_clean)
# A tibble: 8 × 10
  USUBJID SEVERITY TREATMENT OUTCOME VISIT_NAME SEVERITY_FACTOR TREATMENT_FACTOR
  <chr>   <chr>    <chr>     <chr>   <chr>      <ord>           <fct>
1 001-001 Mild     Placebo   Recove… Screening  Mild            Placebo
2 002-002 Severe   Low Dose  Ongoing Day 1      Severe          Low Dose (5mg)
3 003-003 Moderate High Dose Recove… Week 2     Moderate        High Dose (10mg)
4 004-004 Mild     Placebo   Worsen… Month 1    Mild            Placebo
5 005-005 Severe   Low Dose  Recove… Week 4     Severe          Low Dose (5mg)
6 006-006 Moderate High Dose Ongoing End of St… Moderate        High Dose (10mg)
7 007-007 Mild     Placebo   Recove… Follow-up  Mild            Placebo
8 008-008 Moderate Low Dose  Ongoing Unschedul… Moderate        Low Dose (5mg)
# ℹ 3 more variables: OUTCOME_FACTOR <fct>, VISIT_FACTOR <fct>,
#   RISK_CATEGORY <ord>
# Factor manipulation exercises solutions:
severity_counts <- table(clinical_factors_clean$SEVERITY_FACTOR)
treatment_reordered <- fct_infreq(clinical_factors_clean$TREATMENT_FACTOR)
visit_collapsed <- fct_collapse(clinical_factors_clean$VISIT_FACTOR,
                              "Early" = c("Screening", "Day 1"),
                              "Late" = c("End of Study", "Follow-up"),
                              "Other" = c("Week 2", "Week 4", "Month 1", "Unscheduled"))

print("✅ Factor manipulation results:")
[1] "✅ Factor manipulation results:"
print(severity_counts)

    Mild Moderate   Severe
       3        3        2 
print(levels(treatment_reordered))
[1] "Placebo"          "Low Dose (5mg)"   "High Dose (10mg)"
print(levels(visit_collapsed))
[1] "Early" "Other" "Late" 

✅ Exercise 6 Solution: AESTDY Derivation Practice

# Create a more complex dataset for AESTDY practice
complex_ae <- tibble(
  USUBJID = c("001-001", "001-001", "001-002", "001-002", "001-003"),
  AESEQ = c(1, 2, 1, 2, 1),
  AEDECOD = c("HEADACHE", "NAUSEA", "FATIGUE", "HEADACHE", "DIZZINESS"),
  AESTDTC = c("2024-01-20T08:30", "2024-01-25T14:15", "2024-01-18T09:00", "2024-01-22T16:30", NA),
  AEENDTC = c("2024-01-21T10:00", "2024-01-26T08:00", "2024-01-20T18:00", "2024-01-23T12:00", NA),
  RFSTDTC = c("2024-01-15T09:00", "2024-01-15T09:00", "2024-01-16T10:00", "2024-01-16T10:00", "2024-01-15T09:00")
)

# Solution: Derive comprehensive study day variables
complex_ae_derived <- complex_ae %>%
  mutate(
    # Parse start dates/times
    AESTDT = ymd_hm(AESTDTC),
    AEENDT = ymd_hm(AEENDTC),
    RFSTDT = ymd_hm(RFSTDTC),
    
    # Calculate study days
    AESTDY = as.numeric(as.Date(AESTDT) - as.Date(RFSTDT)) + 1,
    AEENDY = as.numeric(as.Date(AEENDT) - as.Date(RFSTDT)) + 1,
    
    # Calculate duration
    AE_DURATION_DAYS = as.numeric(as.Date(AEENDT) - as.Date(AESTDT)),
    
    # Handle missing dates
    AESTDY_SAFE = case_when(
      is.na(AESTDT) | is.na(RFSTDT) ~ NA_real_,
      TRUE ~ AESTDY
    ),
    
    # Create validation flags
    VALID_DATES = case_when(
      is.na(AESTDTC) ~ "Missing AE start date",
      is.na(RFSTDTC) ~ "Missing reference date",
      !is.na(AEENDTC) & AEENDT < AESTDT ~ "End date before start date",
      TRUE ~ "Complete"
    )
  )

print("✅ Complex AESTDY derivations:")
[1] "✅ Complex AESTDY derivations:"
print(complex_ae_derived)
# A tibble: 5 × 14
  USUBJID AESEQ AEDECOD   AESTDTC          AEENDTC   RFSTDTC AESTDT
  <chr>   <dbl> <chr>     <chr>            <chr>     <chr>   <dttm>
1 001-001     1 HEADACHE  2024-01-20T08:30 2024-01-… 2024-0… 2024-01-20 08:30:00
2 001-001     2 NAUSEA    2024-01-25T14:15 2024-01-… 2024-0… 2024-01-25 14:15:00
3 001-002     1 FATIGUE   2024-01-18T09:00 2024-01-… 2024-0… 2024-01-18 09:00:00
4 001-002     2 HEADACHE  2024-01-22T16:30 2024-01-… 2024-0… 2024-01-22 16:30:00
5 001-003     1 DIZZINESS <NA>             <NA>      2024-0… NA
# ℹ 7 more variables: AEENDT <dttm>, RFSTDT <dttm>, AESTDY <dbl>, AEENDY <dbl>,
#   AE_DURATION_DAYS <dbl>, AESTDY_SAFE <dbl>, VALID_DATES <chr>

✅ Bonus Solution: Combined Date, Text, Regex, and Factors Challenge

# Combine everything from R4DS Chapters 14, 15, 16
final_challenge <- complex_ae_derived %>%
  # Add severity and treatment factors
  mutate(
    SEVERITY = sample(c("Mild", "Moderate", "Severe"), n(), replace = TRUE),
    TREATMENT = sample(c("Placebo", "Active"), n(), replace = TRUE)
  ) %>%
  mutate(
    # Create comprehensive AE description with factors
    AE_DESCRIPTION = paste0(
      AEDECOD, " - ", SEVERITY, " severity, ",
      "Study Day ", AESTDY_SAFE, 
      ifelse(!is.na(AE_DURATION_DAYS), paste0(", Duration: ", AE_DURATION_DAYS, " days"), ""),
      " (", TREATMENT, " arm)"
    ),
    
    # Create analysis-ready flags
    ONGOING_AE = ifelse(is.na(AEENDTC), "Y", "N"),
    EARLY_ONSET = ifelse(AESTDY_SAFE <= 7 & AESTDY_SAFE >= 1, "Y", "N"),
    LONG_DURATION = ifelse(AE_DURATION_DAYS > 7, "Y", "N"),
    
    # Create factors
    SEVERITY_FACTOR = factor(SEVERITY, levels = c("Mild", "Moderate", "Severe"), ordered = TRUE),
    TREATMENT_FACTOR = factor(TREATMENT, levels = c("Placebo", "Active"))
  )

print("✅ Final combined challenge:")
[1] "✅ Final combined challenge:"
print(final_challenge)
# A tibble: 5 × 22
  USUBJID AESEQ AEDECOD   AESTDTC          AEENDTC   RFSTDTC AESTDT
  <chr>   <dbl> <chr>     <chr>            <chr>     <chr>   <dttm>
1 001-001     1 HEADACHE  2024-01-20T08:30 2024-01-… 2024-0… 2024-01-20 08:30:00
2 001-001     2 NAUSEA    2024-01-25T14:15 2024-01-… 2024-0… 2024-01-25 14:15:00
3 001-002     1 FATIGUE   2024-01-18T09:00 2024-01-… 2024-0… 2024-01-18 09:00:00
4 001-002     2 HEADACHE  2024-01-22T16:30 2024-01-… 2024-0… 2024-01-22 16:30:00
5 001-003     1 DIZZINESS <NA>             <NA>      2024-0… NA
# ℹ 15 more variables: AEENDT <dttm>, RFSTDT <dttm>, AESTDY <dbl>,
#   AEENDY <dbl>, AE_DURATION_DAYS <dbl>, AESTDY_SAFE <dbl>, VALID_DATES <chr>,
#   SEVERITY <chr>, TREATMENT <chr>, AE_DESCRIPTION <chr>, ONGOING_AE <chr>,
#   EARLY_ONSET <chr>, LONG_DURATION <chr>, SEVERITY_FACTOR <ord>,
#   TREATMENT_FACTOR <fct>

📊 Summary Statistics

cat("=== SOLUTION SUMMARY ===\n")
=== SOLUTION SUMMARY ===
cat("AEs by study day period:\n")
AEs by study day period:
print(table(ae_with_categories$STUDYDAY_PERIOD, useNA = "ifany"))

Unknown  Week 1  Week 2
      2       1       3 
cat("\nAEs by severity:\n") 

AEs by severity:
print(table(ae_cleaned$SEVERITY, useNA = "ifany"))

    MILD MODERATE   SEVERE
       2        2        1 
cat("\nEarly AEs (≤7 days):\n")

Early AEs (≤7 days):
print(sum(ae_with_categories$EARLY_AE == "Y", na.rm = TRUE))
[1] 1
cat("\nRegular expression validation summary:\n")

Regular expression validation summary:
print(summary(clinical_validated[c("VALID_SUBJID", "VALID_PHONE", "VALID_EMAIL")]))
 VALID_SUBJID    VALID_PHONE     VALID_EMAIL
 Mode :logical   Mode :logical   Mode :logical
 FALSE:2         FALSE:1         FALSE:3
 TRUE :3         TRUE :4         TRUE :2        
cat("\nFactor level summary:\n")

Factor level summary:
print(summary(clinical_factors_clean$SEVERITY_FACTOR))
    Mild Moderate   Severe
       3        3        2 

🎉 Module 4 Solution Complete!

You’ve successfully mastered R4DS concepts:

Date parsing with lubridate (ymd, dmy, mdy)
Study day calculations (AESTDY) with proper handling of missing data
String manipulation with stringr (R4DS Chapter 14)
Regular expressions for clinical data validation (R4DS Chapter 15)
Factor management with forcats (R4DS Chapter 16)
Clinical pattern matching and data quality validation
Ordered factors for severity assessment and risk categorization
Combined operations integrating dates, strings, regex, and factors

🔗 R4DS Integration Success

This module successfully integrated concepts from: - Chapter 14: Strings - Complete string manipulation toolkit - Chapter 15: Regular expressions - Advanced pattern matching for clinical validation
- Chapter 16: Factors - Categorical data management for clinical variables

Excellent work! You’re ready for Module 5: Functions & Macro Translation! 🚀


💾 Part 4 — Exporting Tables

# Export flextable to Word
library(officer)
doc <- read_docx()
doc <- body_add_flextable(doc, ft)
print(doc, target = "output/subject_listing.docx")

# Export gt table to HTML
library(webshot2)
gtsave(gt_tbl, filename = "output/ae_listing.html")

✅ Summary

  • Created static listings using flextable and gt
  • Added formatting and conditional coloring
  • Created interactive listings using reactable
  • Practiced exporting tables to Word and HTML