## 9.11 Exercise solutions

# Exercise 1
## Recode
melanoma <- melanoma %>%
mutate(sex.factor = factor(sex) %>%
fct_recode("Female" = "0",
"Male"   = "1") %>%
ff_label("Sex"),

ulcer.factor = factor(ulcer) %>%
fct_recode("Present" = "1",
"Absent"  = "0") %>%
ff_label("Ulcerated tumour"),

age  = ff_label(age,  "Age (years)"),
year = ff_label(year, "Year"),

status.factor = factor(status) %>%
fct_recode("Died melanoma"  = "1",
"Alive" = "2",
"Died - other" = "3") %>%
fct_relevel("Alive") %>%
ff_label("Status"),

t_stage.factor =
thickness %>%
cut(breaks = c(0, 1.0, 2.0, 4.0,
max(thickness, na.rm=TRUE)),
include.lowest = TRUE)
)

# Plot
p1 <- melanoma %>%
ggplot(aes(x = sex.factor, fill = mort_5yr)) +
geom_bar() +
theme(legend.position = "none")

p2 <- melanoma %>%
ggplot(aes(x = sex.factor, fill = mort_5yr)) +
geom_bar(position = "fill") +
ylab("proportion")

p1 + p2
# Exercise 2
## Recode T-stage first
melanoma <- melanoma %>%
mutate(
t_stage.factor =
fct_recode(t_stage.factor,
T1 = "[0,1]",
T2 = "(1,2]",
T3 = "(2,4]",
T4 = "(4,17.4]") %>%
ff_label("T-stage")
)

dependent = "sex.factor"
explanatory = c("age", "t_stage.factor", "ulcer.factor")
melanoma %>%
summary_factorlist(dependent, explanatory, p = TRUE, na_include = TRUE,
cont = "median")

# Men have more T4 tumours and they are more likely to be ulcerated. 
# Exercise 3
dependent = "mort_5yr"
explanatory = c("sex.factor", "age", "t_stage.factor", "ulcer.factor")
melanoma %>%
finalfit(dependent, explanatory, metrics = TRUE)

# c-statistic = 0.798
# In multivariable model, male vs female OR 1.26 (0.57-2.76, p=0.558).
# No relationship after accouting for T-stage and tumour ulceration.
# Sex is confounded by these two variables. 
# Exercise 4
dependent = "mort_5yr"
explanatory = c("sex.factor", "age", "t_stage.factor", "ulcer.factor")
melanoma %>%
or_plot(dependent, explanatory)
library(ggfortify)
dependent <- "mort_5yr"
explanatory_multi <- c("ulcer.factor", "t_stage.factor")
melanoma %>%
glmmulti(dependent, explanatory) %>%
autoplot(which=1:6)