This guide demonstrates practical applications of date/time manipulation with lubridate and text processing with stringr for clinical programming scenarios.
Overview
Clinical programming frequently involves:
Date Operations: Study day calculations, age derivations, date validations
Text Processing: Term standardization, ID parsing, data cleaning
Format Conversions: ISO 8601 compliance, controlled terminology
Pattern Matching: Validation rules, data extraction
Required Libraries
library(dplyr)library(tibble)library(tidyr)library(lubridate)library(stringr)library(stringr)library(readr)cat("=== Date & Text Functions Guide ===\n")
=== Date & Text Functions Guide ===
cat("lubridate and stringr for clinical programming\n\n")
lubridate and stringr for clinical programming
Date/Time Operations with lubridate
1. Basic Date Creation and Parsing
cat("=== Date Creation and Parsing ===\n")
=== Date Creation and Parsing ===
# Multiple ways to create datesdates_example <-tibble(# From strings with different formatsdate_ymd =ymd("2024-03-15"),date_mdy =mdy("03/15/2024"), date_dmy =dmy("15/03/2024"),# From componentsdate_make =make_date(2024, 3, 15),# Current date/timetoday_date =today(),now_datetime =now(),# From ISO 8601 strings (common in clinical data)iso_datetime =ymd_hms("2024-03-15 14:30:25"),iso_date_only =ymd("2024-03-15"))print("Date creation examples:")
# Parsing clinical date formats commonly seen in EDC systemsclinical_dates <-c("2024-01-15", # ISO standard"15-JAN-2024", # SAS-like format"01/15/24", # US format short year"2024-01-15T10:30:00", # ISO with time"15JAN2024:10:30:00"# SAS datetime format)parsed_dates <-tibble(original = clinical_dates,parsed =c(ymd(clinical_dates[1]),dmy(clinical_dates[2], locale ="en_US.UTF-8"),mdy(clinical_dates[3]),ymd_hms(clinical_dates[4]),dmy_hms(clinical_dates[5], locale ="en_US.UTF-8") ))print("\nClinical date parsing:")
# Demographics with birth datesdemographics <-tibble(USUBJID =paste0("STUDY-001-", sprintf("%03d", 1:10)),BIRTH_DATE =sample(seq(as.Date("1950-01-01"), as.Date("1995-12-31"), by ="day"), 10),ICF_DATE =as.Date("2024-01-10") +days(sample(0:20, 10, replace =TRUE)),RFSTDTC =as.Date("2024-01-15"))# Multiple age calculation methodsage_calculations <- demographics %>%mutate(# Age at informed consentAGE_AT_ICF =floor(as.numeric(difftime(ICF_DATE, BIRTH_DATE, units ="days")) /365.25),# Age at study start (standard)AGE =floor(as.numeric(difftime(RFSTDTC, BIRTH_DATE, units ="days")) /365.25),# Using lubridate interval for precise calculationAGE_PRECISE =floor(time_length(interval(BIRTH_DATE, RFSTDTC), "years")),# Age in months (for pediatric studies)AGE_MONTHS =floor(time_length(interval(BIRTH_DATE, RFSTDTC), "months")),# Age groups for analysisAGEGROUP =case_when( AGE <30~"18-29", AGE <50~"30-49", AGE <65~"50-64", TRUE~"65+" ),# Birth year for cohort analysisBIRTH_YEAR =year(BIRTH_DATE),# Days since birth (for very precise calculations)DAYS_SINCE_BIRTH =as.numeric(difftime(RFSTDTC, BIRTH_DATE, units ="days")) )print("Age calculations:")
# Clinical trials often span multiple time zonesmulti_site_data <-tibble(SITE_ID =c("001", "002", "003", "004"),SITE_NAME =c("New York", "London", "Tokyo", "Sydney"),TIMEZONE =c("America/New_York", "Europe/London", "Asia/Tokyo", "Australia/Sydney"),LOCAL_TIME =c("2024-03-15 14:30:00", "2024-03-15 19:30:00", "2024-03-16 03:30:00", "2024-03-16 05:30:00"))# Convert to standard UTC timeutc_times <- multi_site_data %>%mutate(# Parse local time with timezone (simplified approach)LOCAL_DATETIME =ymd_hms(LOCAL_TIME),# Format for CDISC submission (ISO 8601)UTC_ISO =format(LOCAL_DATETIME, "%Y-%m-%dT%H:%M:%S"),# Study day based on dateSTUDY_START =ymd("2024-03-15"),STUDY_DAY =as.numeric(date(LOCAL_DATETIME) - STUDY_START) +1 )print("Multi-site time zone handling:")
cat("\n=== Complete Data Processing Pipeline ===\n")
=== Complete Data Processing Pipeline ===
# Simulate raw clinical data with common issuesraw_clinical_data <-tibble(subject_id =c("abc-123-001", "ABC-123-002", "abc 123 003", "ABC-123-004"),birth_date =c("1985-06-15", "15-JUN-1970", "1990/03/20", "1988-12-01"),consent_date =c("2024-01-10 09:30", "2024-01-12 14:15", "2024-01-08 11:00", "2024-01-15 10:45"),adverse_event =c(" headache ", "NAUSEA", "stomach pain", ""),medication =c("Aspirin 81mg daily", "Tylenol 500 MG PRN", "Ibuprofen", "Metformin 500mg BID"),comments =c("patient doing well", "MILD SYMPTOMS ONLY", "no issues", "follow-up needed"))# Comprehensive cleaning pipelineprocess_clinical_data <-function(data) { processed <- data %>%mutate(# Standardize subject IDsUSUBJID =str_to_upper(str_replace_all(subject_id, "\\s+", "-")),# Parse and standardize datesBIRTH_DATE =case_when(str_detect(birth_date, "^\\d{4}-\\d{2}-\\d{2}$") ~ymd(birth_date),str_detect(birth_date, "^\\d{2}-[A-Z]{3}-\\d{4}$") ~dmy(birth_date),str_detect(birth_date, "^\\d{4}/\\d{2}/\\d{2}$") ~ymd(birth_date),TRUE~as.Date(NA) ),CONSENT_DATETIME =ymd_hm(consent_date),CONSENT_DATE =date(CONSENT_DATETIME),# Calculate age at consentAGE_AT_CONSENT =floor(time_length(interval(BIRTH_DATE, CONSENT_DATE), "years")),# Standardize adverse eventsAE_TERM_CLEAN =case_when(str_trim(adverse_event) ==""~NA_character_,str_detect(str_to_lower(adverse_event), "head") ~"Headache",str_detect(str_to_lower(adverse_event), "nausea|stomach") ~"Nausea", TRUE~str_to_title(str_trim(adverse_event)) ),# Parse medicationsMED_NAME =str_extract(medication, "^[A-Za-z\\s]+"),MED_DOSE =as.numeric(str_extract(medication, "\\d+")),MED_UNIT =str_extract(medication, "mg|g"),# Clean commentsCOMMENTS_CLEAN =str_to_sentence(str_squish(comments)),# Data quality flagsBIRTH_DATE_VALID =!is.na(BIRTH_DATE),AGE_REASONABLE =between(AGE_AT_CONSENT, 18, 90),HAS_AE =!is.na(AE_TERM_CLEAN),# Study day calculationSTUDY_START =as.Date("2024-01-15"),CONSENT_STUDY_DAY =as.numeric(CONSENT_DATE - STUDY_START) ) %>%# Select final variablesselect(USUBJID, BIRTH_DATE, CONSENT_DATE, AGE_AT_CONSENT, AE_TERM_CLEAN, MED_NAME, MED_DOSE, MED_UNIT, COMMENTS_CLEAN, BIRTH_DATE_VALID, AGE_REASONABLE, HAS_AE, CONSENT_STUDY_DAY)return(processed)}# Process the dataprocessed_data <-process_clinical_data(raw_clinical_data)
Warning: There were 3 warnings in `mutate()`.
The first warning was:
ℹ In argument: `BIRTH_DATE = case_when(...)`.
Caused by warning:
! 1 failed to parse.
ℹ Run `dplyr::last_dplyr_warnings()` to see the 2 remaining warnings.