Code
library(tidyverse)
RMST difference plots over three time points. Different variations with the same data.
library(tidyverse)
<- read_rds(
rmst_models ::here("..", "results", "predictions",
here"marginal_rmst_difference_unrestricted_time_points.RDS")
)
# 1. collect your five standsurv outputs into a named list
# rmst_models <- list(
# Overall = s_rmst_overall,
# `HPA count` = s_rmst_hpa,
# Sex1 = lst_sex[[1]],
# Sex2 = lst_sex[[2]],
# Age1 = lst_age[[1]],
# Age2 = lst_age[[2]],
# Age3 = lst_age[[3]],
# Age4 = lst_age[[4]],
# Age5 = lst_age[[5]],
# Occupation1 = lst_occ[[1]],
# Occupation2 = lst_occ[[2]],
# Occupation3 = lst_occ[[3]],
# Occupation4 = lst_occ[[4]],
# Occupation5 = lst_occ[[5]]
# )
attr(rmst_models$HPA_3_test, "standpred_contrast")
# A tibble: 3 × 5
time contrast difference difference_lci difference_uci
<dbl> <chr> <dbl> <dbl> <dbl>
1 1826 treated=HPA, n_tests_within_5_… 1.84 1.23 2.46
2 3653 treated=HPA, n_tests_within_5_… 9.38 7.38 11.4
3 7306 treated=HPA, n_tests_within_5_… 39.5 34.3 44.7
<- imap_dfr(rmst_models, function(fit_obj, model_name) {
rmst_table # extract the contrast‐table
attr(fit_obj, "standpred_contrast") %>%
as_tibble() %>%
#filter(time %in% keep_times) %>%
mutate(
Model = model_name,
Subgroup = case_when(
== "treated=HPA vs treated=Control" ~ "Overall",
contrast == "treated=HPA, n_tests_within_5_fac_vs_ctrl=HPA, 1 test vs treated=Control, n_tests_within_5_fac_vs_ctrl=HPA, 1 test" ~ "HPA, 1 test",
contrast == "treated=HPA, n_tests_within_5_fac_vs_ctrl=HPA, 2 tests vs treated=Control, n_tests_within_5_fac_vs_ctrl=HPA, 2 tests" ~ "HPA, 2 tests",
contrast == "treated=HPA, n_tests_within_5_fac_vs_ctrl=HPA, ≥3 tests vs treated=Control, n_tests_within_5_fac_vs_ctrl=HPA, ≥3 tests" ~ "HPA, ≥3 tests",
contrast == "treated=HPA, Sex=Men vs treated=Control, Sex=Men" ~ "Men",
contrast == "treated=HPA, Sex=Women vs treated=Control, Sex=Women" ~ "Women",
contrast == "treated=HPA, Age=25 vs treated=Control, Age=25" ~ "Age 25",
contrast == "treated=HPA, Age=35 vs treated=Control, Age=35" ~ "Age 35",
contrast == "treated=HPA, Age=45 vs treated=Control, Age=45" ~ "Age 45",
contrast == "treated=HPA, Age=55 vs treated=Control, Age=55" ~ "Age 55",
contrast == "treated=HPA, Age=65 vs treated=Control, Age=65" ~ "Age 65",
contrast == "treated=HPA, age_group=<35 vs treated=Control, age_group=<35" ~ "Age grp <35",
contrast == "treated=HPA, age_group=35–49 vs treated=Control, age_group=35–49" ~ "Age grp 35-49",
contrast == "treated=HPA, age_group=≥50 vs treated=Control, age_group=≥50" ~ "Age grp ≥50",
contrast == "treated=HPA, ssyk_WBHL_group=White-collar high-skilled vs treated=Control, ssyk_WBHL_group=White-collar high-skilled"
contrast ~ "White-collar high-skilled",
== "treated=HPA, ssyk_WBHL_group=White-collar low-skilled vs treated=Control, ssyk_WBHL_group=White-collar low-skilled"
contrast ~ "White-collar low-skilled",
== "treated=HPA, ssyk_WBHL_group=Blue-collar high-skilled vs treated=Control, ssyk_WBHL_group=Blue-collar high-skilled"
contrast ~ "Blue-collar high-skilled",
== "treated=HPA, ssyk_WBHL_group=Blue-collar low-skilled vs treated=Control, ssyk_WBHL_group=Blue-collar low-skilled"
contrast ~ "Blue-collar low-skilled",
TRUE ~ NA_character_
),Time = case_when(time == 1826 ~ "5 yr",
== 3653 ~ "10 yr",
time == 7306 ~ "20 yr"),
time _years = time / 365.25,
τRMST = sprintf("%.2f (%.2f–%.2f)",
Δ
difference,
difference_lci,
difference_uci),## convert to years gained per 1 000 persons
years_gain_per_1000 = difference * 1000 / 365.25,
years_gain_per_1000_lci = difference_lci * 1000 / 365.25,
years_gain_per_1000_uci = difference_uci * 1000 / 365.25,
## optional pretty label
years_gain_per_1000_fmt = sprintf("%.2f (%.2f–%.2f)",
years_gain_per_1000,
years_gain_per_1000_lci,
years_gain_per_1000_uci),months_gain_per_100py = difference / τ_years / 12 * 100
# years gained per 100 py
%>%
) select(Model, Subgroup, Time, ΔRMST, difference, difference_lci, difference_uci, years_gain_per_1000, years_gain_per_1000_lci, years_gain_per_1000_uci, months_gain_per_100py)
|> filter(Model != "HPA count")
})
|> print(n = 20) rmst_table
# A tibble: 27 × 11
Model Subgroup Time ΔRMST difference difference_lci difference_uci
<chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
1 Overall Overall 5 yr 0.82… 0.821 0.560 1.08
2 Overall Overall 10 yr 3.91… 3.91 3.08 4.74
3 Overall Overall 20 yr 17.0… 17.1 15.2 18.9
4 Sex1 Men 5 yr 0.98… 0.981 0.540 1.42
5 Sex1 Men 10 yr 4.58… 4.58 3.18 5.99
6 Sex1 Men 20 yr 18.4… 18.4 14.9 22.0
7 Sex2 Women 5 yr 0.64… 0.645 0.388 0.902
8 Sex2 Women 10 yr 3.11… 3.11 2.26 3.97
9 Sex2 Women 20 yr 15.2… 15.2 13.0 17.5
10 Age_grp1 Age grp <35 5 yr 0.41… 0.414 0.160 0.669
11 Age_grp1 Age grp <35 10 yr 1.60… 1.60 0.615 2.58
12 Age_grp1 Age grp <35 20 yr 5.35… 5.35 1.69 9.00
13 Age_grp2 Age grp 35-49 5 yr 0.59… 0.593 0.238 0.948
14 Age_grp2 Age grp 35-49 10 yr 3.17… 3.17 1.49 4.84
15 Age_grp2 Age grp 35-49 20 yr 14.7… 14.7 3.54 25.9
16 Age_grp3 Age grp ≥50 5 yr 1.12… 1.12 0.644 1.60
17 Age_grp3 Age grp ≥50 10 yr 5.04… 5.04 1.97 8.10
18 Age_grp3 Age grp ≥50 20 yr 20.1… 20.1 -0.937 41.2
19 HPA_1_test HPA, 1 test 5 yr 0.43… 0.432 0.0845 0.779
20 HPA_1_test HPA, 1 test 10 yr 2.15… 2.15 1.04 3.26
# ℹ 7 more rows
# ℹ 4 more variables: years_gain_per_1000 <dbl>, years_gain_per_1000_lci <dbl>,
# years_gain_per_1000_uci <dbl>, months_gain_per_100py <dbl>
# rmst_wide <- rmst_table %>%
# pivot_wider(
# names_from = Time,
# values_from = ΔRMST
# ) %>%
# # optional: order the rows exactly as you listed
# mutate(
# Model = factor(Model,
# levels = c("Overall","HPA count","Sex","Age","Occupation")),
# Subgroup = factor(Subgroup,
# levels = c(
# "Overall",
# "HPA, 1 test","HPA, 2 tests","HPA, ≥3 tests",
# "Men","Women",
# "Age 25","Age 35","Age 45","Age 55","Age 65",
# "White-collar high-skilled","White-collar low-skilled",
# "Blue-collar high-skilled","Blue-collar low-skilled",
# "Unknown occupation"
# ))
# ) %>%
# arrange(Model, Subgroup) |>
# select(-Model) |>
# # 1) rename the columns
# rename(
# `Group` = Subgroup,
# `ΔRMST 5 yr` = `5 yr`,
# `ΔRMST 10 yr` = `10 yr`
# )
# 2) create a forrest plot with the RMST differences
<-
rmst_plot_data |>
rmst_table mutate(
Model = factor(Model, levels = c("Overall", "HPA count", "Sex1", "Sex2",
"Age_grp1", "Age_grp2", "Age_grp3")),
Subgroup = factor(Subgroup, levels = c(
"Overall",
"HPA, 1 test", "HPA, 2 tests", "HPA, ≥3 tests",
"Men", "Women",
"Age 25", "Age 35", "Age 45", "Age 55", "Age 65",
"Age grp <35", "Age grp 35-49", "Age grp ≥50",
"White-collar high-skilled", "White-collar low-skilled",
"Blue-collar high-skilled", "Blue-collar low-skilled"
)),Subgroup = fct_rev(Subgroup),
Time = factor(Time, levels = c("5 yr", "10 yr", "20 yr")),
Time = fct_rev(Time)
)
|>
rmst_plot_data ggplot(aes(x = years_gain_per_1000, y = Subgroup, color = Time)) +
geom_point(aes(shape = Time), size = 3 ) +
geom_errorbarh(aes(xmin = years_gain_per_1000_lci, xmax = years_gain_per_1000_uci, group = Time), height = 0.2) +
geom_vline(xintercept = 0, linetype = "dashed", color = "red") +
scale_shape_manual(values = c(16, 17, 18)) +
labs(
x = "Years gained per 1.000 persons",
y = "Subgroup",
title = "RMST differences by subgroup vs controls"
+
) theme_minimal() +
theme(
legend.position = "bottom",
legend.title = element_blank()
)
%>%
rmst_plot_data ggplot(aes(x = difference, # days gained per person
y = Subgroup,
colour = Time)) +
geom_linerange(aes(xmin = difference_lci,
xmax = difference_uci,
group = Time),
size = 3,
position = position_dodge2(width = 1)) +
geom_point(aes(group = Time),
fill = "white",
color = "white",
shape = 21,
size = .5,
position = position_dodge2(width = 1)) +
geom_text(aes(label = ΔRMST, x = 40),
position = position_dodge2(width = 1),
hjust = -0.2,
size = 3) +
geom_vline(xintercept = 0, linetype = "dashed", colour = "grey") +
scale_colour_manual(values = c("grey40", "#f57c52", "#3a7880")) +
scale_shape_manual(values = c(16, 17, 18)) +
scale_x_continuous(
limits = c(-1, 60),
name = "Δ-RMST (days) per avarage individual",
sec.axis = dup_axis(~ . * 1000 / 365.25, # multiply primary scale by 1 000
name = "Δ-RMST (years) per 1,000 individuals")
+
) guides(colour = guide_legend(reverse = TRUE),
shape = guide_legend(reverse = TRUE)) +
theme_minimal(base_size = 11, base_family = "sans") +
theme(
legend.position = "bottom",
legend.title = element_blank(),
panel.grid.major = element_line(size = .3, colour = "grey90"),
panel.grid.minor = element_blank(),
axis.title.x.top = element_text(margin = margin(b = 8)),
plot.title = element_text(face = "bold", size = 12, hjust = .5)
+
)
labs(title = "difference in restricted mean CVD‑free time (Δ‑rmst) by subgroup")
# difference in restricted mean CVD‑free time (years) per 1 000 individuals
#"difference in restricted mean CVD‑free time (days) vs controls"
#
RMST differences by subgroup
library(showtext)
font_add_google("Roboto", "roboto")
showtext_auto()
# --- 1. wrangle -----------------------------------------------------
<- rmst_table %>%
rmst_line_data mutate(
groups = case_when(
str_starts(Model, "Age") ~ "Age group",
str_starts(Model, "Sex") ~ "Sex",
TRUE ~ Model # keep original for the rest
),Model = factor(Model, c("Overall", "Sex", "Age group", "HPA count")),
# numeric x-axis
time_years = readr::parse_number(Time), # 5 / 10 / 20
# keep factor ordering you already set up
Subgroup = fct_rev(Subgroup)
%>%
)
## ── add τ = 0 for every group / subgroup ─────────────────────────
complete(Subgroup, groups,
time_years = c(0, 5, 10, 20), # <-- include 0
fill = list(
years_gain_per_1000 = 0,
years_gain_per_1000_lci = 0,
years_gain_per_1000_uci = 0,
difference = 0,
difference_lci = 0,
difference_uci = 0,
RMST = "0", # text label if you use it
ΔTime = "0 yr" # optional, for consistency
%>%
)) arrange(Model, Subgroup, time_years)
|> print(n = 20) rmst_line_data
# A tibble: 216 × 13
Subgroup groups time_years Model Time ΔRMST difference difference_lci
<fct> <chr> <dbl> <fct> <chr> <chr> <dbl> <dbl>
1 Overall Overall 5 Overall 5 yr 0.82… 0.821 0.560
2 Overall Overall 10 Overall 10 yr 3.91… 3.91 3.08
3 Overall Overall 20 Overall 20 yr 17.0… 17.1 15.2
4 Women Age group 0 <NA> 0 yr 0 0 0
5 Women HPA_1_test 0 <NA> 0 yr 0 0 0
6 Women HPA_2_tests 0 <NA> 0 yr 0 0 0
7 Women HPA_3_tests 0 <NA> 0 yr 0 0 0
8 Women Overall 0 <NA> 0 yr 0 0 0
9 Women Sex 0 <NA> 0 yr 0 0 0
10 Women Age group 5 <NA> 0 yr 0 0 0
11 Women HPA_1_test 5 <NA> 0 yr 0 0 0
12 Women HPA_2_tests 5 <NA> 0 yr 0 0 0
13 Women HPA_3_tests 5 <NA> 0 yr 0 0 0
14 Women Overall 5 <NA> 0 yr 0 0 0
15 Women Sex 5 <NA> 5 yr 0.64… 0.645 0.388
16 Women Age group 10 <NA> 0 yr 0 0 0
17 Women HPA_1_test 10 <NA> 0 yr 0 0 0
18 Women HPA_2_tests 10 <NA> 0 yr 0 0 0
19 Women HPA_3_tests 10 <NA> 0 yr 0 0 0
20 Women Overall 10 <NA> 0 yr 0 0 0
# ℹ 196 more rows
# ℹ 5 more variables: difference_uci <dbl>, years_gain_per_1000 <dbl>,
# years_gain_per_1000_lci <dbl>, years_gain_per_1000_uci <dbl>,
# months_gain_per_100py <dbl>
## ---- 1. original wrangling -----------------------------------------
<- rmst_table %>%
rmst_line_data mutate(
groups = case_when(
str_starts(Model, "Age") ~ "Age group",
str_starts(Model, "Sex") ~ "Sex",
TRUE ~ Model
),groups = factor(groups, c("Overall", "Sex", "Age group", "HPA count")),
time_years = readr::parse_number(Time), # 5 / 10 / 20
Subgroup = fct_rev(Subgroup)
)
## ---- 2. make ONE baseline row per (Model, Subgroup) ----------------
<- rmst_line_data %>%
baseline distinct(Model, Subgroup, groups) %>% # one per curve
mutate(
time_years = 0,
difference = 0,
difference_lci = 0,
difference_uci = 0,
RMST = "0", # if you keep the label
ΔTime = "0 yr" # optional
)
<- bind_rows(rmst_line_data, baseline) %>%
rmst_line_data0 arrange(Model, Subgroup, time_years) |>
mutate(Subgroup = fct_reorder(Subgroup,
# or years_gain_per_1000
difference, .desc = TRUE))
<- c(
my_subgroup_cols "Overall" = "#418286",
"Men" = "#418286",
"Women" = "#e1800f",
"Age grp <35" = "#e1800f",
"Age grp 35-49" = "grey30",
"Age grp ≥50" = "#418286",
"HPA, 1 test" = "#418286",
"HPA, 2 tests" = "grey30",
"HPA, ≥3 tests" = "#e1800f"
)
# --- 2. plot --------------------------------------------------------
ggplot(rmst_line_data0 ,
aes(x = time_years,
y = difference,
group = Subgroup,
colour = Subgroup,
fill = Subgroup)
+
)
## shade *area under the curve* -------------------------
geom_line()+
geom_ribbon(aes(ymin = 0, ymax = difference),
alpha = 1, linewidth = 0) +
# geom_text(aes(label = Subgroup, x= 3),
# position = position_dodge(width = 0.7),
# data = rmst_line_data0 %>% filter(time_years == 20),
# hjust = -0.2,
# size = 3) +
geom_text(data = rmst_line_data0 %>%
filter(time_years == 20),
aes(label = Subgroup,
color = Subgroup),
position = position_nudge(x = 1), # nudge out to the right
hjust = 0, size = 3.5) +
geom_point(data = rmst_line_data0 %>% filter(time_years > 0),
position = position_dodge(width = 1.2),
color = "grey10") +
geom_errorbar(aes(ymin = difference_lci,
ymax = difference_uci),
position = position_dodge(width = 1.2),
width = 0, # no horizontal whiskers
linewidth = 1,
alpha = .7, color = "grey10") +
## … or, if you’d rather show the 95 % CI band instead:
# geom_ribbon(aes(ymin = years_gain_per_1000_lci,
# ymax = years_gain_per_1000_uci),
# alpha = 0.15, linewidth = 0)
## connect the points -----------------------------------
# markers
scale_y_continuous(
name = "RMST Δ (days), HPAs vs controls",
expand = expansion(mult = c(0, .05)),
sec.axis = sec_axis(
trans = ~ . * 1000 / 365.25,
name = "RMST Δ (years / 1,000 persons), HPAs vs controls",
breaks = c(0, 27, 55, 82, 110),
)+
)
## axes & guides ----------------------------------------
scale_x_continuous(
position = "top",
limits = c(0, 28),
breaks = c(0, 5, 10, 20),
labels = c("0 years\nfrom first\nHPA", "5", "10", "20")) +
#scale_y_continuous(expand = expansion(mult = c(0, .05))) + # 0 at baseline
scale_colour_manual(values = my_subgroup_cols) +
scale_fill_manual(values = my_subgroup_cols) +
# coord_flip() +
## labels & titles --------------------------------------
labs(
x = "", # Years from first HPA
y = "RMST Δ (years / 1 000 persons), HPAs vs controls",
title = "",
caption = "Shaded area represents accumulated Δ to controls"
+
)
## layout options ---------------------------------------
facet_wrap(~ groups, ncol = 2,) + # avoids spaghetti
theme_minimal(base_size = 11, base_family = "Roboto") +
theme(
text = element_text(family = "Roboto"),
axis.title = element_text(color = "grey35"),
legend.position = "none", # facets are labelled
panel.spacing = unit(1, "lines"),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
strip.text = element_blank(),
strip.background = element_blank()
)
ggsave(
::here("..", "results", "figures", "rmstd_area_plot.pdf"),
herewidth = 210, height = 150, units = "mm"
)
<- rmst_plot_data |>
data_clean filter(!is.na(Subgroup)) |>
mutate(
time_numeric = readr::parse_number(as.character(Time)),
# Create clean subgroup labels
subgroup_clean = case_when(
== "Overall" ~ "Overall",
Subgroup == "Men" ~ "Men",
Subgroup == "Women" ~ "Women",
Subgroup == "Age grp <35" ~ "Age <35",
Subgroup == "Age grp 35-49" ~ "Age 35-49",
Subgroup == "Age grp ≥50" ~ "Age ≥50",
Subgroup str_detect(as.character(Subgroup), "HPA") ~ as.character(Subgroup),
TRUE ~ as.character(Subgroup)
),# Create category for faceting/coloring
category = case_when(
== "Overall" ~ "Overall",
Subgroup %in% c("Men", "Women") ~ "Sex",
Subgroup str_detect(as.character(Subgroup), "Age grp") ~ "Age Group",
str_detect(as.character(Subgroup), "HPA") ~ "HPA Count",
str_detect(as.character(Subgroup), "collar") ~ "Occupation",
TRUE ~ "Other"
)|>
) mutate(
Model = if_else(is.na(Model), "HPA Count", Model),
)
print(data_clean, n = 20)
# A tibble: 27 × 14
Model Subgroup Time ΔRMST difference difference_lci difference_uci
<chr> <fct> <fct> <chr> <dbl> <dbl> <dbl>
1 Overall Overall 5 yr 0.82 … 0.821 0.560 1.08
2 Overall Overall 10 yr 3.91 … 3.91 3.08 4.74
3 Overall Overall 20 yr 17.08… 17.1 15.2 18.9
4 Sex1 Men 5 yr 0.98 … 0.981 0.540 1.42
5 Sex1 Men 10 yr 4.58 … 4.58 3.18 5.99
6 Sex1 Men 20 yr 18.43… 18.4 14.9 22.0
7 Sex2 Women 5 yr 0.64 … 0.645 0.388 0.902
8 Sex2 Women 10 yr 3.11 … 3.11 2.26 3.97
9 Sex2 Women 20 yr 15.22… 15.2 13.0 17.5
10 Age_grp1 Age grp <35 5 yr 0.41 … 0.414 0.160 0.669
11 Age_grp1 Age grp <35 10 yr 1.60 … 1.60 0.615 2.58
12 Age_grp1 Age grp <35 20 yr 5.35 … 5.35 1.69 9.00
13 Age_grp2 Age grp 35-49 5 yr 0.59 … 0.593 0.238 0.948
14 Age_grp2 Age grp 35-49 10 yr 3.17 … 3.17 1.49 4.84
15 Age_grp2 Age grp 35-49 20 yr 14.72… 14.7 3.54 25.9
16 Age_grp3 Age grp ≥50 5 yr 1.12 … 1.12 0.644 1.60
17 Age_grp3 Age grp ≥50 10 yr 5.04 … 5.04 1.97 8.10
18 Age_grp3 Age grp ≥50 20 yr 20.15… 20.1 -0.937 41.2
19 HPA Count HPA, 1 test 5 yr 0.43 … 0.432 0.0845 0.779
20 HPA Count HPA, 1 test 10 yr 2.15 … 2.15 1.04 3.26
# ℹ 7 more rows
# ℹ 7 more variables: years_gain_per_1000 <dbl>, years_gain_per_1000_lci <dbl>,
# years_gain_per_1000_uci <dbl>, months_gain_per_100py <dbl>,
# time_numeric <dbl>, subgroup_clean <chr>, category <chr>
# Check data before filtering
print("Data summary before filtering:")
[1] "Data summary before filtering:"
print(table(data_clean$category))
Age Group HPA Count Overall Sex
9 9 3 6
print("Unique subgroups:")
[1] "Unique subgroups:"
print(unique(data_clean$Subgroup))
[1] Overall Men Women Age grp <35 Age grp 35-49
[6] Age grp ≥50 HPA, 1 test HPA, 2 tests HPA, ≥3 tests
18 Levels: Blue-collar low-skilled ... Overall
# Apply filtering
<- data_clean |>
data_clean filter(category %in% c("Overall", "Sex", "Age Group", "HPA Count"))
library(tidyverse)
library(ggrounded)
## helper table – keep only the strata you want to show
<- tribble(
keep ~category, ~subgroup,
"Overall", "Overall",
"Sex", "Men",
"Sex", "Women",
"Age Group", "Age <35",
"Age Group", "Age 35-49",
"Age Group", "Age ≥50",
"HPA Count", "HPA, 1 test",
"HPA Count", "HPA, 2 tests",
"HPA Count", "HPA, ≥3 tests"
)
## prepare data – ordered by 20-year benefit inside each category
<- data_clean |>
df_plot semi_join(keep, by = c("category",
"subgroup_clean" = "subgroup")) |>
mutate(
time_facet = factor(time_numeric,
levels = c(5, 10, 20),
labels = c("5 years", "10 years", "20 years"))
|>
) group_by(category, subgroup_clean) |>
mutate(order_key = years_gain_per_1000[time_numeric == 20]) |>
ungroup() |>
group_by(category) |>
mutate(subgroup_clean = fct_reorder(subgroup_clean,
.desc = TRUE)) |>
order_key, ungroup()
## compute a small x-offset so text sits nicely to the right
<- (max(df_plot$years_gain_per_1000_uci, na.rm = TRUE) -
text_nudge min(df_plot$years_gain_per_1000_lci, na.rm = TRUE)) * 0.02
## forest-style plot ---------------------------------------------------
ggplot(df_plot,
aes(x = years_gain_per_1000,
y = subgroup_clean)) +
# dashed reference line at zero benefit
geom_vline(xintercept = 0, linetype = "dashed",
colour = "grey30") +
# horizontal bar from 0 → estimate
geom_segment(aes(x = 0,
xend = years_gain_per_1000,
yend = subgroup_clean),
lineend = c('butt'),
linewidth = 12,
colour = "steelblue") +
# geom_col_rounded(fill = "grey80",
# radius = grid::unit(4, "pt"),
# width = 0.85) +
# 95 % confidence interval
geom_errorbarh(aes(xmin = years_gain_per_1000_lci,
xmax = years_gain_per_1000_uci),
height = 0,
linewidth = 1.2,
color = "grey20") +
# numeric label just above the bar, centred on the point estimate
geom_text(aes(label = scales::number(years_gain_per_1000,
accuracy = 0.1)),
nudge_x = text_nudge,
nudge_y = 0.25, # shift upward so it clears the bar
size = 3.2,
hjust = 0) +
geom_text(aes(label = paste0("(",
::number(years_gain_per_1000_lci, accuracy = 0.1),
scales"–",
::number(years_gain_per_1000_uci, accuracy = 0.1), ")"
scales
)
),nudge_x = text_nudge,
nudge_y = -0.25, # shift upward so it clears the bar
size = 3.2,
hjust = 0) +
facet_grid(rows = vars(category),
cols = vars(time_facet),
scales = "free_y",
space = "free_y",
switch = "y") +
# turn off the x axis completely
scale_x_continuous(expand = expansion(mult = c(0, 0.9))) +
theme_minimal(base_size = 13) +
theme(
axis.text.x = element_blank(),
#axis.title.x = element_blank(),
axis.ticks.x = element_blank(),
panel.grid.major.x = element_blank(),
strip.text.y = element_text(angle = 0, face = "bold"),
strip.text.x = element_text(face = "bold"),
panel.grid.major.y = element_blank(),
panel.spacing.x = unit(1.2, "lines")
+
)
labs(
title = "",
subtitle = "",
y = NULL,
x = "Δ-RMST (years) per 1,000 individuals",
caption = ""
)
library(tidyverse)
library(ggrounded) # devtools::install_github("botan/ggrounded")
# ── 1. filter & prepare your plotting df (as before) ────────────────────
<- data_clean |>
df_plot semi_join(
tribble(
~category, ~subgroup,
"Overall", "Overall",
"Sex", "Men",
"Sex", "Women",
"Age Group", "Age <35",
"Age Group", "Age 35-44",
"Age Group", "Age ≥50",
"HPA Count", "HPA 0",
"HPA Count", "HPA 1",
"HPA Count", "HPA 2+"
),by = c("category", "subgroup_clean" = "subgroup")
|>
) mutate(
time_facet = factor(time_numeric,
levels = c(5, 10, 20),
labels = c("5 years", "10 years", "20 years"))
|>
) group_by(category, subgroup_clean) |>
mutate(order_key = difference[time_numeric == 20]) |>
ungroup() |>
group_by(category) |>
mutate(subgroup_clean = fct_reorder(subgroup_clean, order_key, .desc = TRUE)) |>
ungroup()
# ── 2. draw with rounded bars (geom_col_rounded) ────────────────────────
ggplot(df_plot, aes(x = subgroup_clean, y = difference)) +
# rounded‐end bars (exact length = difference)
geom_col_rounded(
radius = grid::unit(4, "pt"),
width = 0.7,
fill = "grey80",
colour = NA
+
)
# 95 % CI whiskers
geom_errorbar(
aes(ymin = difference_lci, ymax = difference_uci),
width = 0.2,
size = 0.6
+
)
# numeric labels just beyond bar end
geom_text(
aes(label = scales::number(difference, accuracy = 0.1)),
hjust = -0.1,
size = 3
+
)
# horizontal orientation
coord_flip() +
# one row per category, three cols for 5/10/20 y
facet_grid(category ~ time_facet,
scales = "free_y", space = "free_y") +
# drop the x‐axis (now vertical), keep a little padding
scale_y_continuous(expand = expansion(mult = c(0, 0.05))) +
labs(
title = "Accumulated treatment benefit (ΔRMST)",
subtitle = "bars = point estimates • whiskers = 95 % CI",
x = NULL,
y = "ΔRMST (years)",
caption = "Flexible parametric survival model, Swedish registers"
+
)
theme_minimal(base_size = 13) +
theme(
axis.text.y = element_text(size = 11),
axis.ticks.y = element_blank(),
panel.grid.major.y = element_blank(),
strip.text.y = element_text(angle = 0, face = "bold"),
strip.text.x = element_text(face = "bold"),
panel.spacing.x = unit(1.2, "lines")
)