RMSTD difference plots

Author

Daniel Väisänen

Description

RMST difference plots over three time points. Different variations with the same data.

Code
library(tidyverse)

Data RMSTD

Code
rmst_models <- read_rds(
  here::here("..", "results", "predictions",
             "marginal_rmst_difference_unrestricted_time_points.RDS")
)
Code
# 1. collect your five standsurv outputs into a named list
# rmst_models <- list(
#   Overall     = s_rmst_overall,
#   `HPA count` = s_rmst_hpa,
#   Sex1         = lst_sex[[1]],
#   Sex2         = lst_sex[[2]],
#   Age1          = lst_age[[1]],
#   Age2         = lst_age[[2]],
#   Age3          = lst_age[[3]],
#   Age4          = lst_age[[4]],
#   Age5          = lst_age[[5]],
#   Occupation1  = lst_occ[[1]],
#   Occupation2  = lst_occ[[2]],
#   Occupation3  = lst_occ[[3]],
#   Occupation4  = lst_occ[[4]],
#   Occupation5  = lst_occ[[5]]
# )

attr(rmst_models$HPA_3_test, "standpred_contrast")
# A tibble: 3 × 5
   time contrast                        difference difference_lci difference_uci
  <dbl> <chr>                                <dbl>          <dbl>          <dbl>
1  1826 treated=HPA, n_tests_within_5_…       1.84           1.23           2.46
2  3653 treated=HPA, n_tests_within_5_…       9.38           7.38          11.4 
3  7306 treated=HPA, n_tests_within_5_…      39.5           34.3           44.7 
Code
rmst_table <- imap_dfr(rmst_models, function(fit_obj, model_name) {
  # extract the contrast‐table
  attr(fit_obj, "standpred_contrast") %>%
    as_tibble() %>%
    #filter(time %in% keep_times) %>%
    mutate(
      Model    = model_name,
      Subgroup = case_when(
        contrast == "treated=HPA vs treated=Control"                                                                 ~ "Overall",
        contrast == "treated=HPA, n_tests_within_5_fac_vs_ctrl=HPA, 1 test vs treated=Control, n_tests_within_5_fac_vs_ctrl=HPA, 1 test"          ~ "HPA, 1 test",
        contrast == "treated=HPA, n_tests_within_5_fac_vs_ctrl=HPA, 2 tests vs treated=Control, n_tests_within_5_fac_vs_ctrl=HPA, 2 tests"        ~ "HPA, 2 tests",
        contrast == "treated=HPA, n_tests_within_5_fac_vs_ctrl=HPA, ≥3 tests vs treated=Control, n_tests_within_5_fac_vs_ctrl=HPA, ≥3 tests"             ~ "HPA, ≥3 tests",
        contrast == "treated=HPA, Sex=Men vs treated=Control, Sex=Men"                                               ~ "Men",
        contrast == "treated=HPA, Sex=Women vs treated=Control, Sex=Women"                                           ~ "Women",
        contrast == "treated=HPA, Age=25 vs treated=Control, Age=25"                                                 ~ "Age 25",
        contrast == "treated=HPA, Age=35 vs treated=Control, Age=35"                                                 ~ "Age 35",
        contrast == "treated=HPA, Age=45 vs treated=Control, Age=45"                                                 ~ "Age 45",
        contrast == "treated=HPA, Age=55 vs treated=Control, Age=55"                                                 ~ "Age 55",
        contrast == "treated=HPA, Age=65 vs treated=Control, Age=65"                                                 ~ "Age 65",
        contrast == "treated=HPA, age_group=<35 vs treated=Control, age_group=<35"                                   ~ "Age grp <35",
        contrast == "treated=HPA, age_group=35–49 vs treated=Control, age_group=35–49"                               ~ "Age grp 35-49",
        contrast == "treated=HPA, age_group=≥50 vs treated=Control, age_group=≥50"                                   ~ "Age grp ≥50",
        contrast == "treated=HPA, ssyk_WBHL_group=White-collar high-skilled vs treated=Control, ssyk_WBHL_group=White-collar high-skilled"
                                                                                                                     ~ "White-collar high-skilled",
        contrast == "treated=HPA, ssyk_WBHL_group=White-collar low-skilled vs treated=Control, ssyk_WBHL_group=White-collar low-skilled"
                                                                                                                     ~ "White-collar low-skilled",
        contrast == "treated=HPA, ssyk_WBHL_group=Blue-collar high-skilled vs treated=Control, ssyk_WBHL_group=Blue-collar high-skilled"
                                                                                                                     ~ "Blue-collar high-skilled",
        contrast == "treated=HPA, ssyk_WBHL_group=Blue-collar low-skilled vs treated=Control, ssyk_WBHL_group=Blue-collar low-skilled"
                                                                                                                     ~ "Blue-collar low-skilled",
        TRUE                                                                                                          ~ NA_character_
      ),
        Time  = case_when(time == 1826 ~ "5 yr",
                          time == 3653 ~ "10 yr",
                          time == 7306 ~ "20 yr"),
      τ_years  = time / 365.25,
      ΔRMST = sprintf("%.2f (%.2f–%.2f)",
                      difference,
                      difference_lci,
                      difference_uci),
            ## convert to years gained per 1 000 persons
      years_gain_per_1000      = difference      * 1000 / 365.25,
      years_gain_per_1000_lci  = difference_lci  * 1000 / 365.25,
      years_gain_per_1000_uci  = difference_uci  * 1000 / 365.25,
      ## optional pretty label
      years_gain_per_1000_fmt  = sprintf("%.2f (%.2f–%.2f)",
                                         years_gain_per_1000,
                                         years_gain_per_1000_lci,
                                         years_gain_per_1000_uci),
      months_gain_per_100py = difference / τ_years / 12 * 100
                                                # years gained per 100 py
    ) %>%
    select(Model, Subgroup, Time, ΔRMST, difference, difference_lci, difference_uci, years_gain_per_1000, years_gain_per_1000_lci, years_gain_per_1000_uci, months_gain_per_100py)
}) |>  filter(Model != "HPA count")

rmst_table |> print(n = 20)
# A tibble: 27 × 11
   Model      Subgroup      Time  ΔRMST difference difference_lci difference_uci
   <chr>      <chr>         <chr> <chr>      <dbl>          <dbl>          <dbl>
 1 Overall    Overall       5 yr  0.82…      0.821         0.560           1.08 
 2 Overall    Overall       10 yr 3.91…      3.91          3.08            4.74 
 3 Overall    Overall       20 yr 17.0…     17.1          15.2            18.9  
 4 Sex1       Men           5 yr  0.98…      0.981         0.540           1.42 
 5 Sex1       Men           10 yr 4.58…      4.58          3.18            5.99 
 6 Sex1       Men           20 yr 18.4…     18.4          14.9            22.0  
 7 Sex2       Women         5 yr  0.64…      0.645         0.388           0.902
 8 Sex2       Women         10 yr 3.11…      3.11          2.26            3.97 
 9 Sex2       Women         20 yr 15.2…     15.2          13.0            17.5  
10 Age_grp1   Age grp <35   5 yr  0.41…      0.414         0.160           0.669
11 Age_grp1   Age grp <35   10 yr 1.60…      1.60          0.615           2.58 
12 Age_grp1   Age grp <35   20 yr 5.35…      5.35          1.69            9.00 
13 Age_grp2   Age grp 35-49 5 yr  0.59…      0.593         0.238           0.948
14 Age_grp2   Age grp 35-49 10 yr 3.17…      3.17          1.49            4.84 
15 Age_grp2   Age grp 35-49 20 yr 14.7…     14.7           3.54           25.9  
16 Age_grp3   Age grp ≥50   5 yr  1.12…      1.12          0.644           1.60 
17 Age_grp3   Age grp ≥50   10 yr 5.04…      5.04          1.97            8.10 
18 Age_grp3   Age grp ≥50   20 yr 20.1…     20.1          -0.937          41.2  
19 HPA_1_test HPA, 1 test   5 yr  0.43…      0.432         0.0845          0.779
20 HPA_1_test HPA, 1 test   10 yr 2.15…      2.15          1.04            3.26 
# ℹ 7 more rows
# ℹ 4 more variables: years_gain_per_1000 <dbl>, years_gain_per_1000_lci <dbl>,
#   years_gain_per_1000_uci <dbl>, months_gain_per_100py <dbl>
Code
# rmst_wide <- rmst_table %>%
#   pivot_wider(
#     names_from  = Time,
#     values_from = ΔRMST
#   ) %>%
#   # optional: order the rows exactly as you listed
#   mutate(
#     Model = factor(Model,
#                    levels = c("Overall","HPA count","Sex","Age","Occupation")),
#     Subgroup = factor(Subgroup,
#                       levels = c(
#                         "Overall",
#                         "HPA, 1 test","HPA, 2 tests","HPA, ≥3 tests",
#                         "Men","Women",
#                         "Age 25","Age 35","Age 45","Age 55","Age 65",
#                         "White-collar high-skilled","White-collar low-skilled",
#                         "Blue-collar high-skilled","Blue-collar low-skilled",
#                         "Unknown occupation"
#                       ))
#   ) %>%
#   arrange(Model, Subgroup) |>
#   select(-Model) |>
#   # 1) rename the columns
#   rename(
#     `Group` = Subgroup,
#     `ΔRMST 5 yr`     = `5 yr`,
#     `ΔRMST 10 yr`    = `10 yr`
#   )
Code
# 2) create a forrest plot with the RMST differences

rmst_plot_data <-
rmst_table |>
    mutate(
      Model    = factor(Model,    levels = c("Overall", "HPA count", "Sex1", "Sex2",
                  "Age_grp1", "Age_grp2", "Age_grp3")),
      Subgroup = factor(Subgroup, levels =  c(
  "Overall",
  "HPA, 1 test", "HPA, 2 tests", "HPA, ≥3 tests",
  "Men", "Women",
  "Age 25", "Age 35", "Age 45", "Age 55", "Age 65",
  "Age grp <35", "Age grp 35-49", "Age grp ≥50",
  "White-collar high-skilled", "White-collar low-skilled",
  "Blue-collar high-skilled", "Blue-collar low-skilled"
)),
Subgroup = fct_rev(Subgroup),
      Time     = factor(Time,     levels = c("5 yr", "10 yr", "20 yr")),
Time = fct_rev(Time)
    )


rmst_plot_data |>
ggplot(aes(x = years_gain_per_1000, y = Subgroup, color = Time)) +
  geom_point(aes(shape = Time), size = 3 ) +
  geom_errorbarh(aes(xmin = years_gain_per_1000_lci, xmax = years_gain_per_1000_uci, group = Time), height = 0.2) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "red") +
  scale_shape_manual(values = c(16, 17, 18)) +
  labs(
    x = "Years gained per 1.000 persons",
    y = "Subgroup",
    title = "RMST differences by subgroup vs controls"
  ) +
  theme_minimal() +
  theme(
    legend.position = "bottom",
    legend.title = element_blank()
  )

Code
rmst_plot_data %>%
  ggplot(aes(x = difference,          # days gained per person
             y = Subgroup,
             colour = Time)) +
    geom_linerange(aes(xmin = difference_lci,
                     xmax = difference_uci,
                     group = Time),
                 size = 3,
                     position = position_dodge2(width = 1)) +
  geom_point(aes(group = Time),
             fill = "white",
             color = "white",
             shape = 21,
             size = .5,
             position = position_dodge2(width = 1)) +
  geom_text(aes(label = ΔRMST, x = 40),
           position = position_dodge2(width = 1),
            hjust = -0.2,
            size = 3) +


  geom_vline(xintercept = 0, linetype = "dashed", colour = "grey") +
  scale_colour_manual(values = c("grey40", "#f57c52", "#3a7880")) +
  scale_shape_manual(values = c(16, 17, 18)) +
  scale_x_continuous(
    limits = c(-1, 60),
      name = "Δ-RMST (days) per avarage individual",
      sec.axis = dup_axis(~ .  * 1000 / 365.25,          # multiply primary scale by 1 000
                          name = "Δ-RMST (years) per 1,000 individuals")
  ) +
   guides(colour = guide_legend(reverse = TRUE),
         shape  = guide_legend(reverse = TRUE))  +

  theme_minimal(base_size = 11, base_family = "sans") +
  theme(
    legend.position   = "bottom",
    legend.title      = element_blank(),
    panel.grid.major  = element_line(size = .3, colour = "grey90"),
    panel.grid.minor  = element_blank(),
    axis.title.x.top  = element_text(margin = margin(b = 8)),
    plot.title        = element_text(face = "bold", size = 12, hjust = .5)
  ) +

  labs(title = "difference in restricted mean CVD‑free time (Δ‑rmst) by subgroup")

Code
# difference in restricted mean CVD‑free time (years) per 1 000 individuals
#"difference in restricted mean CVD‑free time (days) vs controls"
#

Area plot

RMST differences by subgroup

data

Code
library(showtext)
font_add_google("Roboto", "roboto")
showtext_auto()

# --- 1. wrangle -----------------------------------------------------
rmst_line_data <- rmst_table %>%
  mutate(
    groups = case_when(
      str_starts(Model, "Age") ~ "Age group",
      str_starts(Model, "Sex") ~ "Sex",
      TRUE                     ~ Model          # keep original for the rest
    ),
    Model = factor(Model, c("Overall", "Sex", "Age group", "HPA count")),
    # numeric x-axis
    time_years = readr::parse_number(Time),                    # 5 / 10 / 20
    # keep factor ordering you already set up
    Subgroup   = fct_rev(Subgroup)
  )%>%

  ## ── add τ = 0 for every group / subgroup ─────────────────────────
  complete(Subgroup, groups,
           time_years = c(0, 5, 10, 20),      # <-- include 0
           fill = list(
             years_gain_per_1000        = 0,
             years_gain_per_1000_lci    = 0,
             years_gain_per_1000_uci    = 0,
             difference                 = 0,
             difference_lci             = 0,
             difference_uci             = 0,
             ΔRMST                      = "0",    # text label if you use it
             Time                       = "0 yr"  # optional, for consistency
           )) %>%
  arrange(Model, Subgroup, time_years)

rmst_line_data |>  print(n = 20)
# A tibble: 216 × 13
   Subgroup groups      time_years Model   Time  ΔRMST difference difference_lci
   <fct>    <chr>            <dbl> <fct>   <chr> <chr>      <dbl>          <dbl>
 1 Overall  Overall              5 Overall 5 yr  0.82…      0.821          0.560
 2 Overall  Overall             10 Overall 10 yr 3.91…      3.91           3.08 
 3 Overall  Overall             20 Overall 20 yr 17.0…     17.1           15.2  
 4 Women    Age group            0 <NA>    0 yr  0          0              0    
 5 Women    HPA_1_test           0 <NA>    0 yr  0          0              0    
 6 Women    HPA_2_tests          0 <NA>    0 yr  0          0              0    
 7 Women    HPA_3_tests          0 <NA>    0 yr  0          0              0    
 8 Women    Overall              0 <NA>    0 yr  0          0              0    
 9 Women    Sex                  0 <NA>    0 yr  0          0              0    
10 Women    Age group            5 <NA>    0 yr  0          0              0    
11 Women    HPA_1_test           5 <NA>    0 yr  0          0              0    
12 Women    HPA_2_tests          5 <NA>    0 yr  0          0              0    
13 Women    HPA_3_tests          5 <NA>    0 yr  0          0              0    
14 Women    Overall              5 <NA>    0 yr  0          0              0    
15 Women    Sex                  5 <NA>    5 yr  0.64…      0.645          0.388
16 Women    Age group           10 <NA>    0 yr  0          0              0    
17 Women    HPA_1_test          10 <NA>    0 yr  0          0              0    
18 Women    HPA_2_tests         10 <NA>    0 yr  0          0              0    
19 Women    HPA_3_tests         10 <NA>    0 yr  0          0              0    
20 Women    Overall             10 <NA>    0 yr  0          0              0    
# ℹ 196 more rows
# ℹ 5 more variables: difference_uci <dbl>, years_gain_per_1000 <dbl>,
#   years_gain_per_1000_lci <dbl>, years_gain_per_1000_uci <dbl>,
#   months_gain_per_100py <dbl>
Code
## ---- 1. original wrangling -----------------------------------------
rmst_line_data <- rmst_table %>%
  mutate(
    groups = case_when(
      str_starts(Model, "Age") ~ "Age group",
      str_starts(Model, "Sex") ~ "Sex",
      TRUE                     ~ Model
    ),
    groups = factor(groups, c("Overall", "Sex", "Age group", "HPA count")),
    time_years = readr::parse_number(Time),   # 5 / 10 / 20
    Subgroup   = fct_rev(Subgroup)
  )

## ---- 2. make ONE baseline row per (Model, Subgroup) ----------------
baseline <- rmst_line_data %>%
  distinct(Model, Subgroup, groups) %>%          # one per curve
  mutate(
    time_years                   = 0,
    difference          = 0,
    difference_lci      = 0,
    difference_uci      = 0,
    ΔRMST                        = "0",          # if you keep the label
    Time                         = "0 yr"        # optional
  )

rmst_line_data0 <- bind_rows(rmst_line_data, baseline) %>%
  arrange(Model, Subgroup, time_years) |>
    mutate(Subgroup = fct_reorder(Subgroup,
                                difference,   # or years_gain_per_1000
                                .desc = TRUE))


my_subgroup_cols <- c(
  "Overall" = "#418286",
  "Men"   = "#418286",
  "Women" = "#e1800f",
  "Age grp <35"   = "#e1800f",
  "Age grp 35-49" = "grey30",
  "Age grp ≥50"   = "#418286",
  "HPA, 1 test" = "#418286",
  "HPA, 2 tests" = "grey30",
  "HPA, ≥3 tests" = "#e1800f"
)

plot

Code
# --- 2. plot --------------------------------------------------------
ggplot(rmst_line_data0 ,
       aes(x = time_years,
           y = difference,
           group = Subgroup,
           colour = Subgroup,
           fill   = Subgroup)
       ) +


  ## shade *area under the curve* -------------------------
geom_line()+
  geom_ribbon(aes(ymin = 0, ymax = difference),
              alpha = 1, linewidth = 0)      +

  # geom_text(aes(label = Subgroup, x= 3),
  #           position = position_dodge(width = 0.7),
  #           data = rmst_line_data0 %>% filter(time_years == 20),
  #           hjust = -0.2,
  #           size = 3) +

    geom_text(data = rmst_line_data0 %>%
  filter(time_years == 20),
            aes(label = Subgroup,
                color = Subgroup),
            position = position_nudge(x = 1),   # nudge out to the right
            hjust = 0, size = 3.5) +

    geom_point(data = rmst_line_data0 %>% filter(time_years > 0),
             position = position_dodge(width = 1.2),
             color = "grey10") +
   geom_errorbar(aes(ymin = difference_lci,
                    ymax = difference_uci),
                 position = position_dodge(width = 1.2),
                width = 0,            # no horizontal whiskers
                linewidth = 1,
                alpha = .7, color = "grey10") +
  ## … or, if you’d rather show the 95 % CI band instead:
  # geom_ribbon(aes(ymin = years_gain_per_1000_lci,
  #                 ymax = years_gain_per_1000_uci),
  #             alpha = 0.15, linewidth = 0)

  ## connect the points -----------------------------------
                                 # markers

  scale_y_continuous(
    name = "RMST Δ (days), HPAs vs controls",
    expand = expansion(mult = c(0, .05)),
    sec.axis = sec_axis(
      trans = ~ . * 1000 / 365.25,
      name = "RMST Δ (years / 1,000 persons), HPAs vs controls",
      breaks = c(0, 27, 55, 82, 110),
    )
  ) +

  ## axes & guides ----------------------------------------
 scale_x_continuous(
   position = "top",
   limits = c(0, 28),
   breaks = c(0, 5, 10, 20),
   labels = c("0 years\nfrom first\nHPA", "5", "10", "20")) +
#scale_y_continuous(expand = expansion(mult = c(0, .05)))  + # 0 at baseline
  scale_colour_manual(values = my_subgroup_cols) +
  scale_fill_manual(values = my_subgroup_cols) +
 # coord_flip() +

  ## labels & titles --------------------------------------
labs(
  x = "", # Years from first HPA
      y = "RMST Δ (years / 1 000 persons), HPAs vs controls",
      title = "",
      caption = "Shaded area represents accumulated Δ to controls"
    ) +

  ## layout options ---------------------------------------
  facet_wrap(~ groups, ncol = 2,) +       # avoids spaghetti
  theme_minimal(base_size = 11, base_family = "Roboto") +
    theme(
       text = element_text(family = "Roboto"),
       axis.title = element_text(color = "grey35"),
      legend.position = "none",                                # facets are labelled
      panel.spacing    = unit(1, "lines"),
      panel.grid.major.x = element_blank(),
      panel.grid.minor = element_blank(),
          strip.text       = element_blank(),
    strip.background = element_blank()
    )

save

Code
ggsave(
  here::here("..", "results", "figures", "rmstd_area_plot.pdf"),
  width = 210, height = 150, units = "mm"
)

Bar plot

data

Code
data_clean <- rmst_plot_data |>
 filter(!is.na(Subgroup)) |>
  mutate(
    time_numeric = readr::parse_number(as.character(Time)),
    # Create clean subgroup labels
    subgroup_clean = case_when(
      Subgroup == "Overall" ~ "Overall",
      Subgroup == "Men" ~ "Men",
      Subgroup == "Women" ~ "Women",
      Subgroup == "Age grp <35" ~ "Age <35",
      Subgroup == "Age grp 35-49" ~ "Age 35-49",
      Subgroup == "Age grp ≥50" ~ "Age ≥50",
      str_detect(as.character(Subgroup), "HPA") ~ as.character(Subgroup),
      TRUE ~ as.character(Subgroup)
    ),
    # Create category for faceting/coloring
    category = case_when(
      Subgroup == "Overall" ~ "Overall",
      Subgroup %in% c("Men", "Women") ~ "Sex",
      str_detect(as.character(Subgroup), "Age grp") ~ "Age Group",
      str_detect(as.character(Subgroup), "HPA") ~ "HPA Count",
      str_detect(as.character(Subgroup), "collar") ~ "Occupation",
      TRUE ~ "Other"
    )
  ) |>
  mutate(
   Model = if_else(is.na(Model), "HPA Count", Model),
  )

print(data_clean, n = 20)
# A tibble: 27 × 14
   Model     Subgroup      Time  ΔRMST  difference difference_lci difference_uci
   <chr>     <fct>         <fct> <chr>       <dbl>          <dbl>          <dbl>
 1 Overall   Overall       5 yr  0.82 …      0.821         0.560           1.08 
 2 Overall   Overall       10 yr 3.91 …      3.91          3.08            4.74 
 3 Overall   Overall       20 yr 17.08…     17.1          15.2            18.9  
 4 Sex1      Men           5 yr  0.98 …      0.981         0.540           1.42 
 5 Sex1      Men           10 yr 4.58 …      4.58          3.18            5.99 
 6 Sex1      Men           20 yr 18.43…     18.4          14.9            22.0  
 7 Sex2      Women         5 yr  0.64 …      0.645         0.388           0.902
 8 Sex2      Women         10 yr 3.11 …      3.11          2.26            3.97 
 9 Sex2      Women         20 yr 15.22…     15.2          13.0            17.5  
10 Age_grp1  Age grp <35   5 yr  0.41 …      0.414         0.160           0.669
11 Age_grp1  Age grp <35   10 yr 1.60 …      1.60          0.615           2.58 
12 Age_grp1  Age grp <35   20 yr 5.35 …      5.35          1.69            9.00 
13 Age_grp2  Age grp 35-49 5 yr  0.59 …      0.593         0.238           0.948
14 Age_grp2  Age grp 35-49 10 yr 3.17 …      3.17          1.49            4.84 
15 Age_grp2  Age grp 35-49 20 yr 14.72…     14.7           3.54           25.9  
16 Age_grp3  Age grp ≥50   5 yr  1.12 …      1.12          0.644           1.60 
17 Age_grp3  Age grp ≥50   10 yr 5.04 …      5.04          1.97            8.10 
18 Age_grp3  Age grp ≥50   20 yr 20.15…     20.1          -0.937          41.2  
19 HPA Count HPA, 1 test   5 yr  0.43 …      0.432         0.0845          0.779
20 HPA Count HPA, 1 test   10 yr 2.15 …      2.15          1.04            3.26 
# ℹ 7 more rows
# ℹ 7 more variables: years_gain_per_1000 <dbl>, years_gain_per_1000_lci <dbl>,
#   years_gain_per_1000_uci <dbl>, months_gain_per_100py <dbl>,
#   time_numeric <dbl>, subgroup_clean <chr>, category <chr>
Code
# Check data before filtering
print("Data summary before filtering:")
[1] "Data summary before filtering:"
Code
print(table(data_clean$category))

Age Group HPA Count   Overall       Sex 
        9         9         3         6 
Code
print("Unique subgroups:")
[1] "Unique subgroups:"
Code
print(unique(data_clean$Subgroup))
[1] Overall       Men           Women         Age grp <35   Age grp 35-49
[6] Age grp ≥50   HPA, 1 test   HPA, 2 tests  HPA, ≥3 tests
18 Levels: Blue-collar low-skilled ... Overall
Code
# Apply filtering
data_clean <- data_clean |>
  filter(category %in% c("Overall", "Sex", "Age Group", "HPA Count"))

plot

Code
library(tidyverse)
library(ggrounded)
## helper table – keep only the strata you want to show
keep <- tribble(
  ~category,   ~subgroup,
  "Overall",   "Overall",
  "Sex",       "Men",
  "Sex",       "Women",
  "Age Group", "Age <35",
  "Age Group", "Age 35-49",
  "Age Group", "Age ≥50",
  "HPA Count", "HPA, 1 test",
  "HPA Count", "HPA, 2 tests",
  "HPA Count", "HPA, ≥3 tests"
)

## prepare data – ordered by 20-year benefit inside each category
df_plot <- data_clean |>
  semi_join(keep, by = c("category",
                         "subgroup_clean" = "subgroup")) |>
  mutate(
    time_facet = factor(time_numeric,
                        levels = c(5, 10, 20),
                        labels = c("5 years", "10 years", "20 years"))
  ) |>
  group_by(category, subgroup_clean) |>
  mutate(order_key = years_gain_per_1000[time_numeric == 20]) |>
  ungroup() |>
  group_by(category) |>
  mutate(subgroup_clean = fct_reorder(subgroup_clean,
                                      order_key, .desc = TRUE)) |>
  ungroup()

## compute a small x-offset so text sits nicely to the right
text_nudge <- (max(df_plot$years_gain_per_1000_uci, na.rm = TRUE) -
               min(df_plot$years_gain_per_1000_lci, na.rm = TRUE)) * 0.02

## forest-style plot ---------------------------------------------------
ggplot(df_plot,
       aes(x = years_gain_per_1000,
           y = subgroup_clean)) +

  # dashed reference line at zero benefit
  geom_vline(xintercept = 0, linetype = "dashed",
             colour = "grey30") +

  # horizontal bar from 0 → estimate
  geom_segment(aes(x    = 0,
                   xend = years_gain_per_1000,
                   yend = subgroup_clean),
               lineend = c('butt'),
               linewidth = 12,
               colour    = "steelblue") +

    # geom_col_rounded(fill    = "grey80",
    #                radius  = grid::unit(4, "pt"),
    #                width   = 0.85) +

  # 95 % confidence interval
  geom_errorbarh(aes(xmin = years_gain_per_1000_lci,
                     xmax = years_gain_per_1000_uci),
                 height    = 0,
                 linewidth = 1.2,
                 color = "grey20") +

  # numeric label just above the bar, centred on the point estimate
  geom_text(aes(label = scales::number(years_gain_per_1000,
                                       accuracy = 0.1)),
            nudge_x = text_nudge,
            nudge_y = 0.25,          # shift upward so it clears the bar
            size    = 3.2,
            hjust   = 0) +

    geom_text(aes(label = paste0("(",
      scales::number(years_gain_per_1000_lci, accuracy = 0.1),
      "–",
      scales::number(years_gain_per_1000_uci, accuracy = 0.1), ")"
    )
  ),
            nudge_x = text_nudge,
            nudge_y = -0.25,          # shift upward so it clears the bar
            size    = 3.2,
            hjust   = 0) +

  facet_grid(rows  = vars(category),
             cols  = vars(time_facet),
             scales = "free_y",
             space  = "free_y",
             switch = "y") +

  # turn off the x axis completely
  scale_x_continuous(expand = expansion(mult = c(0, 0.9))) +
  theme_minimal(base_size = 13) +
  theme(
    axis.text.x        = element_blank(),
    #axis.title.x       = element_blank(),
    axis.ticks.x       = element_blank(),
    panel.grid.major.x = element_blank(),
    strip.text.y       = element_text(angle = 0, face = "bold"),
    strip.text.x       = element_text(face = "bold"),
    panel.grid.major.y = element_blank(),
    panel.spacing.x    = unit(1.2, "lines")
  ) +

  labs(
    title    = "",
    subtitle = "",
    y        = NULL,
    x = "Δ-RMST (years) per 1,000 individuals",
    caption  = ""
  )

flipped

Code
library(tidyverse)
library(ggrounded)   # devtools::install_github("botan/ggrounded")

# ── 1. filter & prepare your plotting df (as before) ────────────────────
df_plot <- data_clean |>
  semi_join(
    tribble(
      ~category,   ~subgroup,
      "Overall",   "Overall",
      "Sex",       "Men",
      "Sex",       "Women",
      "Age Group", "Age <35",
      "Age Group", "Age 35-44",
      "Age Group", "Age ≥50",
      "HPA Count", "HPA 0",
      "HPA Count", "HPA 1",
      "HPA Count", "HPA 2+"
    ),
    by = c("category", "subgroup_clean" = "subgroup")
  ) |>
  mutate(
    time_facet = factor(time_numeric,
                        levels = c(5, 10, 20),
                        labels = c("5 years", "10 years", "20 years"))
  ) |>
  group_by(category, subgroup_clean) |>
  mutate(order_key = difference[time_numeric == 20]) |>
  ungroup() |>
  group_by(category) |>
  mutate(subgroup_clean = fct_reorder(subgroup_clean, order_key, .desc = TRUE)) |>
  ungroup()

# ── 2. draw with rounded bars (geom_col_rounded) ────────────────────────
ggplot(df_plot, aes(x = subgroup_clean, y = difference)) +

  # rounded‐end bars (exact length = difference)
  geom_col_rounded(
    radius = grid::unit(4, "pt"),
    width  = 0.7,
    fill   = "grey80",
    colour = NA
  ) +

  # 95 % CI whiskers
  geom_errorbar(
    aes(ymin = difference_lci, ymax = difference_uci),
    width = 0.2,
    size  = 0.6
  ) +

  # numeric labels just beyond bar end
  geom_text(
    aes(label = scales::number(difference, accuracy = 0.1)),
    hjust = -0.1,
    size  = 3
  ) +

  # horizontal orientation
  coord_flip() +

  # one row per category, three cols for 5/10/20 y
  facet_grid(category ~ time_facet,
             scales = "free_y", space = "free_y") +

  # drop the x‐axis (now vertical), keep a little padding
  scale_y_continuous(expand = expansion(mult = c(0, 0.05))) +

  labs(
    title    = "Accumulated treatment benefit (ΔRMST)",
    subtitle = "bars = point estimates • whiskers = 95 % CI",
    x        = NULL,
    y        = "ΔRMST (years)",
    caption  = "Flexible parametric survival model, Swedish registers"
  ) +

  theme_minimal(base_size = 13) +
  theme(
    axis.text.y        = element_text(size = 11),
    axis.ticks.y       = element_blank(),
    panel.grid.major.y = element_blank(),
    strip.text.y       = element_text(angle = 0, face = "bold"),
    strip.text.x       = element_text(face = "bold"),
    panel.spacing.x    = unit(1.2, "lines")
  )