Module 4 Solution — Date & Text Handling

✅ Module 4 — Date & Text Handling (Solution)

Integrating R4DS Chapters 14 (Strings), 15 (Regular Expressions), 16 (Factors)

📦 Setup and Libraries

library(dplyr)


Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

library(tibble)
library(lubridate)


Attaching package: 'lubridate'

The following objects are masked from 'package:base':

    date, intersect, setdiff, union

library(stringr)    # R4DS Chapter 14: Strings
library(forcats)    # R4DS Chapter 16: Factors

✅ Exercise 1 Solution: Date Parsing Challenge

# Create the dataset
ae_raw <- tibble(
  USUBJID = c("001-001", "001-002", "001-003", "001-004", "001-005", "001-006"),
  AEDECOD = c("HEADACHE", "NAUSEA", "FATIGUE", "DIZZINESS", "RASH", "COUGH"),
  AESTDTC_MESSY = c("2024-01-20", "25/01/2024", "01/18/2024", "20240122", "2024/01/25", "Jan 26, 2024"),
  RFSTDTC = rep("2024-01-15", 6)
)

# Solution: Parse dates and calculate study days
ae_dates <- ae_raw %>%
  mutate(
    # Parse AESTDTC_MESSY using different lubridate functions
    AESTDT = case_when(
      str_detect(AESTDTC_MESSY, "^\\d{4}-\\d{2}-\\d{2}$") ~ ymd(AESTDTC_MESSY),
      str_detect(AESTDTC_MESSY, "^\\d{2}/\\d{2}/\\d{4}$") ~ dmy(AESTDTC_MESSY),
      str_detect(AESTDTC_MESSY, "^\\d{2}/\\d{2}/\\d{4}$") ~ mdy(AESTDTC_MESSY),
      str_detect(AESTDTC_MESSY, "^\\d{8}$") ~ ymd(AESTDTC_MESSY),
      str_detect(AESTDTC_MESSY, "^[A-Za-z]{3}\\s+\\d{2},\\s+\\d{4}$") ~ mdy(AESTDTC_MESSY),
      TRUE ~ as.Date(NA)
    ),
    
    # Parse RFSTDTC
    RFSTDT = ymd(RFSTDTC),
    
    # Calculate study day
    AESTDY = as.numeric(AESTDT - RFSTDT) + 1
  )

Warning: There were 5 warnings in `mutate()`.
The first warning was:
ℹ In argument: `AESTDT = case_when(...)`.
Caused by warning:
!  3 failed to parse.
ℹ Run `dplyr::last_dplyr_warnings()` to see the 4 remaining warnings.

print("✅ Parsed dates and study days:")

[1] "✅ Parsed dates and study days:"

print(ae_dates)

# A tibble: 6 × 7
  USUBJID AEDECOD   AESTDTC_MESSY RFSTDTC    AESTDT     RFSTDT     AESTDY
  <chr>   <chr>     <chr>         <chr>      <date>     <date>      <dbl>
1 001-001 HEADACHE  2024-01-20    2024-01-15 2024-01-20 2024-01-15      6
2 001-002 NAUSEA    25/01/2024    2024-01-15 2024-01-25 2024-01-15     11
3 001-003 FATIGUE   01/18/2024    2024-01-15 NA         2024-01-15     NA
4 001-004 DIZZINESS 20240122      2024-01-15 2024-01-22 2024-01-15      8
5 001-005 RASH      2024/01/25    2024-01-15 NA         2024-01-15     NA
6 001-006 COUGH     Jan 26, 2024  2024-01-15 2024-01-26 2024-01-15     12

✅ Exercise 2 Solution: Study Day Categories

# Add study day categories
ae_with_categories <- ae_dates %>%
  mutate(
    # Create study day categories
    STUDYDAY_PERIOD = case_when(
      is.na(AESTDY) ~ "Unknown",
      AESTDY <= 0 ~ "Pre-treatment",
      AESTDY <= 7 ~ "Week 1",
      AESTDY <= 14 ~ "Week 2",
      AESTDY <= 28 ~ "Month 1",
      TRUE ~ "After Month 1"
    ),
    
    # Create early AE flag (within first 7 days)
    EARLY_AE = ifelse(AESTDY >= 1 & AESTDY <= 7, "Y", "N")
  )

print("✅ With study day categories:")

[1] "✅ With study day categories:"

print(ae_with_categories)

# A tibble: 6 × 9
  USUBJID AEDECOD   AESTDTC_MESSY RFSTDTC    AESTDT     RFSTDT     AESTDY
  <chr>   <chr>     <chr>         <chr>      <date>     <date>      <dbl>
1 001-001 HEADACHE  2024-01-20    2024-01-15 2024-01-20 2024-01-15      6
2 001-002 NAUSEA    25/01/2024    2024-01-15 2024-01-25 2024-01-15     11
3 001-003 FATIGUE   01/18/2024    2024-01-15 NA         2024-01-15     NA
4 001-004 DIZZINESS 20240122      2024-01-15 2024-01-22 2024-01-15      8
5 001-005 RASH      2024/01/25    2024-01-15 NA         2024-01-15     NA
6 001-006 COUGH     Jan 26, 2024  2024-01-15 2024-01-26 2024-01-15     12
# ℹ 2 more variables: STUDYDAY_PERIOD <chr>, EARLY_AE <chr>

✅ Exercise 3 Solution: String Cleaning Challenge (R4DS Chapter 14)

# Create messy adverse event terms
ae_messy_text <- tibble(
  USUBJID = c("001-001", "001-002", "001-003", "001-004", "001-005"),
  AEDECOD_RAW = c(
    "  mild headache  ",
    "SEVERE nausea (grade 3)",
    "fatigue - moderate",
    "  DIZZINESS mild  ",
    "skin rash (MODERATE)"
  ),
  MEDICATION = c("Ibuprofen 400mg", "ondansetron 8 MG", "caffeine 200mg", "rest", "hydrocortisone 1%"),
  TREATMENT_ARM = c("Active", "Placebo", "Active", "Active", "Placebo"),
  SEVERITY = c("Mild", "Moderate", "Severe", "Mild", "Moderate")
)

# Solution: Clean and extract information
ae_cleaned <- ae_messy_text %>%
  mutate(
    # Clean adverse event terms
    AEDECOD_CLEAN = AEDECOD_RAW %>%
      str_trim() %>%                                    # Remove leading/trailing spaces
      str_to_upper() %>%                               # Convert to uppercase  
      str_replace_all("\\([^)]*\\)", "") %>%           # Remove parentheses and contents
      str_replace_all(" - ", " ") %>%                  # Remove dashes
      str_replace_all("\\s+", " ") %>%                 # Replace multiple spaces with single
      str_trim(),                                      # Final trim
    
    # Extract severity
    SEVERITY = case_when(
      str_detect(AEDECOD_RAW, "(?i)mild") ~ "MILD",
      str_detect(AEDECOD_RAW, "(?i)moderate") ~ "MODERATE",
      str_detect(AEDECOD_RAW, "(?i)severe") ~ "SEVERE",
      TRUE ~ "UNKNOWN"
    ),
    
    # Extract base term (remove severity words)
    AETERM_BASE = AEDECOD_CLEAN %>%
      str_replace_all("^(MILD|MODERATE|SEVERE)\\s+", "") %>%  # Remove severity at start
      str_replace_all("\\s+(MILD|MODERATE|SEVERE)$", ""),     # Remove severity at end
    
    # Create specific AE flags
    HEADACHE_FLAG = ifelse(str_detect(AETERM_BASE, "HEADACHE"), "Y", "N"),
    NAUSEA_FLAG = ifelse(str_detect(AETERM_BASE, "NAUSEA"), "Y", "N"),
    FATIGUE_FLAG = ifelse(str_detect(AETERM_BASE, "FATIGUE"), "Y", "N"),
    
    # Extract numeric dose from medication
    DOSE_NUMERIC = as.numeric(str_extract(MEDICATION, "\\d+")),
    
    # Clean medication names
    MED_CLEAN = MEDICATION %>%
      str_to_upper() %>%
      str_replace_all("\\d+\\s?[A-Z%]+", "") %>%      # Remove dose information
      str_trim()
  )

print("✅ Cleaned text data:")

[1] "✅ Cleaned text data:"

print(ae_cleaned)

# A tibble: 5 × 12
  USUBJID AEDECOD_RAW            MEDICATION TREATMENT_ARM SEVERITY AEDECOD_CLEAN
  <chr>   <chr>                  <chr>      <chr>         <chr>    <chr>
1 001-001 "  mild headache  "    Ibuprofen… Active        MILD     MILD HEADACHE
2 001-002 "SEVERE nausea (grade… ondansetr… Placebo       SEVERE   SEVERE NAUSEA
3 001-003 "fatigue - moderate"   caffeine … Active        MODERATE FATIGUE MODE…
4 001-004 "  DIZZINESS mild  "   rest       Active        MILD     DIZZINESS MI…
5 001-005 "skin rash (MODERATE)" hydrocort… Placebo       MODERATE SKIN RASH
# ℹ 6 more variables: AETERM_BASE <chr>, HEADACHE_FLAG <chr>,
#   NAUSEA_FLAG <chr>, FATIGUE_FLAG <chr>, DOSE_NUMERIC <dbl>, MED_CLEAN <chr>

✅ Exercise 4 Solution: Regular Expressions Practice (R4DS Chapter 15)

# Create clinical data with patterns to validate
clinical_data <- tibble(
  USUBJID = c("001-001", "002-001", "001-ABC", "999-123", "001-99"),
  PHONE = c("(555) 123-4567", "555-123-4567", "555.123.4567", "5551234567", "invalid-phone"),
  EMAIL = c("investigator@clinic.com", "bad.email", "test@site.org", "missing@", "@incomplete.com"),
  LAB_RESULT = c("WBC: 7.2 K/uL", "Hemoglobin: 12.5 g/dL", "Glucose: 95 mg/dL", "Invalid result", ""),
  VISIT_DATE = c("2024-01-15", "2024/01/20", "15-Jan-2024", "invalid-date", "2024-13-45")
)

# Solution: Use regular expressions for validation
clinical_validated <- clinical_data %>%
  mutate(
    # Validate subject ID format (###-###)
    VALID_SUBJID = str_detect(USUBJID, "^\\d{3}-\\d{3}$"),
    
    # Validate phone number formats
    VALID_PHONE = str_detect(PHONE, "^(\\(\\d{3}\\)|\\d{3})[-.\\s]?\\d{3}[-.\\s]?\\d{4}$"),
    
    # Validate email addresses  
    VALID_EMAIL = str_detect(EMAIL, "^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$"),
    
    # Extract numeric values from lab results
    LAB_VALUE = as.numeric(str_extract(LAB_RESULT, "\\d+\\.?\\d*")),
    
    # Extract lab test name (before colon)
    LAB_TEST = str_extract(LAB_RESULT, "^[^:]+"),
    
    # Validate date formats (YYYY-MM-DD only)
    VALID_DATE_FORMAT = str_detect(VISIT_DATE, "^\\d{4}-\\d{2}-\\d{2}$"),
    
    # Extract year from valid dates
    VISIT_YEAR = str_extract(VISIT_DATE, "^\\d{4}")
  )

print("✅ Regular expression validation results:")

[1] "✅ Regular expression validation results:"

print(clinical_validated)

# A tibble: 5 × 12
  USUBJID PHONE EMAIL LAB_RESULT VISIT_DATE VALID_SUBJID VALID_PHONE VALID_EMAIL
  <chr>   <chr> <chr> <chr>      <chr>      <lgl>        <lgl>       <lgl>
1 001-001 (555… inve… "WBC: 7.2… 2024-01-15 TRUE         TRUE        TRUE
2 002-001 555-… bad.… "Hemoglob… 2024/01/20 TRUE         TRUE        FALSE
3 001-ABC 555.… test… "Glucose:… 15-Jan-20… FALSE        TRUE        TRUE
4 999-123 5551… miss… "Invalid … invalid-d… TRUE         TRUE        FALSE
5 001-99  inva… @inc… ""         2024-13-45 FALSE        FALSE       FALSE
# ℹ 4 more variables: LAB_VALUE <dbl>, LAB_TEST <chr>, VALID_DATE_FORMAT <lgl>,
#   VISIT_YEAR <chr>

✅ Exercise 5 Solution: Factor Management Practice (R4DS Chapter 16)

# Create clinical factors data
clinical_factors <- tibble(
  USUBJID = paste0("00", 1:8, "-", sprintf("%03d", 1:8)),
  SEVERITY = c("Mild", "Severe", "Moderate", "Mild", "Severe", "Moderate", "Mild", "Moderate"),
  TREATMENT = c("Placebo", "Low Dose", "High Dose", "Placebo", "Low Dose", "High Dose", "Placebo", "Low Dose"),
  OUTCOME = c("Recovered", "Ongoing", "Recovered", "Worsened", "Recovered", "Ongoing", "Recovered", "Ongoing"),
  VISIT_NAME = c("Screening", "Day 1", "Week 2", "Month 1", "Week 4", "End of Study", "Follow-up", "Unscheduled")
)

# Solution: Convert to appropriate factors
clinical_factors_clean <- clinical_factors %>%
  mutate(
    # Create ordered severity factor
    SEVERITY_FACTOR = factor(SEVERITY, 
                           levels = c("Mild", "Moderate", "Severe"),
                           ordered = TRUE),
    
    # Create treatment factor with meaningful labels
    TREATMENT_FACTOR = factor(TREATMENT,
                            levels = c("Placebo", "Low Dose", "High Dose"),
                            labels = c("Placebo", "Low Dose (5mg)", "High Dose (10mg)")),
    
    # Create outcome factor
    OUTCOME_FACTOR = factor(OUTCOME),
    
    # Create visit factor with logical ordering
    VISIT_FACTOR = factor(VISIT_NAME,
                        levels = c("Screening", "Day 1", "Week 2", "Week 4", 
                                 "Month 1", "End of Study", "Follow-up", "Unscheduled")),
    
    # Create risk category based on severity and treatment
    RISK_CATEGORY = case_when(
      SEVERITY == "Severe" & TREATMENT == "High Dose" ~ "High",
      SEVERITY == "Moderate" | (SEVERITY == "Severe" & TREATMENT != "High Dose") ~ "Medium",
      SEVERITY == "Mild" ~ "Low",
      TRUE ~ "Medium"
    ) %>% factor(levels = c("Low", "Medium", "High"), ordered = TRUE)
  )

print("✅ Factor management results:")

[1] "✅ Factor management results:"

print(clinical_factors_clean)

# A tibble: 8 × 10
  USUBJID SEVERITY TREATMENT OUTCOME VISIT_NAME SEVERITY_FACTOR TREATMENT_FACTOR
  <chr>   <chr>    <chr>     <chr>   <chr>      <ord>           <fct>
1 001-001 Mild     Placebo   Recove… Screening  Mild            Placebo
2 002-002 Severe   Low Dose  Ongoing Day 1      Severe          Low Dose (5mg)
3 003-003 Moderate High Dose Recove… Week 2     Moderate        High Dose (10mg)
4 004-004 Mild     Placebo   Worsen… Month 1    Mild            Placebo
5 005-005 Severe   Low Dose  Recove… Week 4     Severe          Low Dose (5mg)
6 006-006 Moderate High Dose Ongoing End of St… Moderate        High Dose (10mg)
7 007-007 Mild     Placebo   Recove… Follow-up  Mild            Placebo
8 008-008 Moderate Low Dose  Ongoing Unschedul… Moderate        Low Dose (5mg)
# ℹ 3 more variables: OUTCOME_FACTOR <fct>, VISIT_FACTOR <fct>,
#   RISK_CATEGORY <ord>

# Factor manipulation exercises solutions:
severity_counts <- table(clinical_factors_clean$SEVERITY_FACTOR)
treatment_reordered <- fct_infreq(clinical_factors_clean$TREATMENT_FACTOR)
visit_collapsed <- fct_collapse(clinical_factors_clean$VISIT_FACTOR,
                              "Early" = c("Screening", "Day 1"),
                              "Late" = c("End of Study", "Follow-up"),
                              "Other" = c("Week 2", "Week 4", "Month 1", "Unscheduled"))

print("✅ Factor manipulation results:")

[1] "✅ Factor manipulation results:"

print(severity_counts)


    Mild Moderate   Severe
       3        3        2

print(levels(treatment_reordered))

[1] "Placebo"          "Low Dose (5mg)"   "High Dose (10mg)"

print(levels(visit_collapsed))

[1] "Early" "Other" "Late"

✅ Exercise 6 Solution: AESTDY Derivation Practice

# Create a more complex dataset for AESTDY practice
complex_ae <- tibble(
  USUBJID = c("001-001", "001-001", "001-002", "001-002", "001-003"),
  AESEQ = c(1, 2, 1, 2, 1),
  AEDECOD = c("HEADACHE", "NAUSEA", "FATIGUE", "HEADACHE", "DIZZINESS"),
  AESTDTC = c("2024-01-20T08:30", "2024-01-25T14:15", "2024-01-18T09:00", "2024-01-22T16:30", NA),
  AEENDTC = c("2024-01-21T10:00", "2024-01-26T08:00", "2024-01-20T18:00", "2024-01-23T12:00", NA),
  RFSTDTC = c("2024-01-15T09:00", "2024-01-15T09:00", "2024-01-16T10:00", "2024-01-16T10:00", "2024-01-15T09:00")
)

# Solution: Derive comprehensive study day variables
complex_ae_derived <- complex_ae %>%
  mutate(
    # Parse start dates/times
    AESTDT = ymd_hm(AESTDTC),
    AEENDT = ymd_hm(AEENDTC),
    RFSTDT = ymd_hm(RFSTDTC),
    
    # Calculate study days
    AESTDY = as.numeric(as.Date(AESTDT) - as.Date(RFSTDT)) + 1,
    AEENDY = as.numeric(as.Date(AEENDT) - as.Date(RFSTDT)) + 1,
    
    # Calculate duration
    AE_DURATION_DAYS = as.numeric(as.Date(AEENDT) - as.Date(AESTDT)),
    
    # Handle missing dates
    AESTDY_SAFE = case_when(
      is.na(AESTDT) | is.na(RFSTDT) ~ NA_real_,
      TRUE ~ AESTDY
    ),
    
    # Create validation flags
    VALID_DATES = case_when(
      is.na(AESTDTC) ~ "Missing AE start date",
      is.na(RFSTDTC) ~ "Missing reference date",
      !is.na(AEENDTC) & AEENDT < AESTDT ~ "End date before start date",
      TRUE ~ "Complete"
    )
  )

print("✅ Complex AESTDY derivations:")

[1] "✅ Complex AESTDY derivations:"

print(complex_ae_derived)

# A tibble: 5 × 14
  USUBJID AESEQ AEDECOD   AESTDTC          AEENDTC   RFSTDTC AESTDT
  <chr>   <dbl> <chr>     <chr>            <chr>     <chr>   <dttm>
1 001-001     1 HEADACHE  2024-01-20T08:30 2024-01-… 2024-0… 2024-01-20 08:30:00
2 001-001     2 NAUSEA    2024-01-25T14:15 2024-01-… 2024-0… 2024-01-25 14:15:00
3 001-002     1 FATIGUE   2024-01-18T09:00 2024-01-… 2024-0… 2024-01-18 09:00:00
4 001-002     2 HEADACHE  2024-01-22T16:30 2024-01-… 2024-0… 2024-01-22 16:30:00
5 001-003     1 DIZZINESS <NA>             <NA>      2024-0… NA
# ℹ 7 more variables: AEENDT <dttm>, RFSTDT <dttm>, AESTDY <dbl>, AEENDY <dbl>,
#   AE_DURATION_DAYS <dbl>, AESTDY_SAFE <dbl>, VALID_DATES <chr>

✅ Bonus Solution: Combined Date, Text, Regex, and Factors Challenge

# Combine everything from R4DS Chapters 14, 15, 16
final_challenge <- complex_ae_derived %>%
  # Add severity and treatment factors
  mutate(
    SEVERITY = sample(c("Mild", "Moderate", "Severe"), n(), replace = TRUE),
    TREATMENT = sample(c("Placebo", "Active"), n(), replace = TRUE)
  ) %>%
  mutate(
    # Create comprehensive AE description with factors
    AE_DESCRIPTION = paste0(
      AEDECOD, " - ", SEVERITY, " severity, ",
      "Study Day ", AESTDY_SAFE, 
      ifelse(!is.na(AE_DURATION_DAYS), paste0(", Duration: ", AE_DURATION_DAYS, " days"), ""),
      " (", TREATMENT, " arm)"
    ),
    
    # Create analysis-ready flags
    ONGOING_AE = ifelse(is.na(AEENDTC), "Y", "N"),
    EARLY_ONSET = ifelse(AESTDY_SAFE <= 7 & AESTDY_SAFE >= 1, "Y", "N"),
    LONG_DURATION = ifelse(AE_DURATION_DAYS > 7, "Y", "N"),
    
    # Create factors
    SEVERITY_FACTOR = factor(SEVERITY, levels = c("Mild", "Moderate", "Severe"), ordered = TRUE),
    TREATMENT_FACTOR = factor(TREATMENT, levels = c("Placebo", "Active"))
  )

print("✅ Final combined challenge:")

[1] "✅ Final combined challenge:"

print(final_challenge)

# A tibble: 5 × 22
  USUBJID AESEQ AEDECOD   AESTDTC          AEENDTC   RFSTDTC AESTDT
  <chr>   <dbl> <chr>     <chr>            <chr>     <chr>   <dttm>
1 001-001     1 HEADACHE  2024-01-20T08:30 2024-01-… 2024-0… 2024-01-20 08:30:00
2 001-001     2 NAUSEA    2024-01-25T14:15 2024-01-… 2024-0… 2024-01-25 14:15:00
3 001-002     1 FATIGUE   2024-01-18T09:00 2024-01-… 2024-0… 2024-01-18 09:00:00
4 001-002     2 HEADACHE  2024-01-22T16:30 2024-01-… 2024-0… 2024-01-22 16:30:00
5 001-003     1 DIZZINESS <NA>             <NA>      2024-0… NA
# ℹ 15 more variables: AEENDT <dttm>, RFSTDT <dttm>, AESTDY <dbl>,
#   AEENDY <dbl>, AE_DURATION_DAYS <dbl>, AESTDY_SAFE <dbl>, VALID_DATES <chr>,
#   SEVERITY <chr>, TREATMENT <chr>, AE_DESCRIPTION <chr>, ONGOING_AE <chr>,
#   EARLY_ONSET <chr>, LONG_DURATION <chr>, SEVERITY_FACTOR <ord>,
#   TREATMENT_FACTOR <fct>

📊 Summary Statistics

cat("=== SOLUTION SUMMARY ===\n")

=== SOLUTION SUMMARY ===

cat("AEs by study day period:\n")

AEs by study day period:

print(table(ae_with_categories$STUDYDAY_PERIOD, useNA = "ifany"))


Unknown  Week 1  Week 2
      2       1       3

cat("\nAEs by severity:\n")


AEs by severity:

print(table(ae_cleaned$SEVERITY, useNA = "ifany"))


    MILD MODERATE   SEVERE
       2        2        1

cat("\nEarly AEs (≤7 days):\n")


Early AEs (≤7 days):

print(sum(ae_with_categories$EARLY_AE == "Y", na.rm = TRUE))

[1] 1

cat("\nRegular expression validation summary:\n")


Regular expression validation summary:

print(summary(clinical_validated[c("VALID_SUBJID", "VALID_PHONE", "VALID_EMAIL")]))

 VALID_SUBJID    VALID_PHONE     VALID_EMAIL
 Mode :logical   Mode :logical   Mode :logical
 FALSE:2         FALSE:1         FALSE:3
 TRUE :3         TRUE :4         TRUE :2

cat("\nFactor level summary:\n")


Factor level summary:

print(summary(clinical_factors_clean$SEVERITY_FACTOR))

    Mild Moderate   Severe
       3        3        2

🎉 Module 4 Solution Complete!

You’ve successfully mastered R4DS concepts:

✅ Date parsing with lubridate (ymd, dmy, mdy)
✅ Study day calculations (AESTDY) with proper handling of missing data
✅ String manipulation with stringr (R4DS Chapter 14)
✅ Regular expressions for clinical data validation (R4DS Chapter 15)
✅ Factor management with forcats (R4DS Chapter 16)
✅ Clinical pattern matching and data quality validation
✅ Ordered factors for severity assessment and risk categorization
✅ Combined operations integrating dates, strings, regex, and factors

🔗 R4DS Integration Success

This module successfully integrated concepts from: - Chapter 14: Strings - Complete string manipulation toolkit - Chapter 15: Regular expressions - Advanced pattern matching for clinical validation
- Chapter 16: Factors - Categorical data management for clinical variables

Excellent work! You’re ready for Module 5: Functions & Macro Translation! 🚀

💾 Part 4 — Exporting Tables

# Export flextable to Word
library(officer)
doc <- read_docx()
doc <- body_add_flextable(doc, ft)
print(doc, target = "output/subject_listing.docx")

# Export gt table to HTML
library(webshot2)
gtsave(gt_tbl, filename = "output/ae_listing.html")

✅ Summary

Created static listings using flextable and gt
Added formatting and conditional coloring
Created interactive listings using reactable
Practiced exporting tables to Word and HTML