Instantenous 10 year risk of CVD in HPA participants vs controls

Author

Daniel Väisänen

Description

Forrestplot style plot of all different cox-models.

Load libraries

Code

library(tidyverse)
library(ckbplotr)
library(glue)
library(scales)
library(showtext)
library(systemfonts)

Setup fonts

Code

font_add_google("Source Sans 3", "sourcesans")

font_add_google("Roboto", "roboto")
showtext_auto()

Load and prepare data

Code

df_forrest_plot <-
  read.csv(here::here(
    "..",
    "results",
    "models",
    "ten_yr_lifestyle_interaction_HR.csv"
  ))

# Data preparation and sorting
df_forrest_plot <- df_forrest_plot |>
  # Filter out old occupation groups
  filter(!exposure %in% c("m8")) |>

  # Create participant/event summary columns
  mutate(
    ctrl = glue("{comma(n_Control)} ({comma(events_Control)})"),
    hpa = glue("{comma(n_HPA)} ({comma(events_HPA)})")
  ) |>

  # Add header rows
  add_row(label = "Sex", block = "Sociodemographics", ctrl = " ", hpa = " ") |>
  add_row(
    label = "Age group",
    block = "Sociodemographics",
    ctrl = " ",
    hpa = " "
  ) |>
  add_row(
    label = "Education",
    block = "Sociodemographics",
    ctrl = " ",
    hpa = " "
  ) |>
  add_row(
    label = "Occupation",
    block = "Sociodemographics",
    ctrl = " ",
    hpa = " "
  ) |>
  add_row(
    label = "Income (% of median)",
    block = "Sociodemographics",
    ctrl = " ",
    hpa = " "
  ) |>
  add_row(label = "BMI", block = "Lifestyle related", ctrl = " ", hpa = " ") |>
  add_row(
    label = "VO₂max (mL·kg⁻¹·min⁻¹)",
    block = "Lifestyle related",
    ctrl = " ",
    hpa = " "
  ) |>
  add_row(
    label = "Exercise",
    block = "Lifestyle related",
    ctrl = " ",
    hpa = " "
  ) |>
  add_row(
    label = "Health",
    block = "Lifestyle related",
    ctrl = " ",
    hpa = " "
  ) |>

  # Clean up labels
  mutate(
    label = case_when(
      label == "HPA, 1 test" ~ "1 assessment",
      label == "HPA, 2 tests" ~ "2 assessments",
      label == "HPA, ≥3 tests" ~ "≥3 assessments",

      label == "BMI: Underweight" ~ "< 18.5",
      label == "BMI: Normal weight" ~ "18.5-25",
      label == "BMI: Overweight" ~ "25-30",
      label == "BMI: Obesity" ~ "≥30",

      label == "VO2max: Very high" ~ "≥50",
      label == "VO2max: High" ~ "40–49",
      label == "VO2max: Moderate" ~ "30–39",
      label == "VO2max: Low" ~ "20–29",
      label == "VO2max: Very low" ~ "<20",

      label == "Exercise: Good" ~ "3 or more times/week",
      label == "Exercise: Moderate" ~ "1-2 times/week",
      label == "Exercise: Low" ~ "Never/sometime",

      label == "Health: Good" ~ "Good or very good",
      label == "Health: Moderate" ~ "Neither poor nor good",
      label == "Health: Low" ~ "Poor or very poor",

      label == "Education" & exposure == "m11" ~ "Education professionals",

      TRUE ~ label
    ),

    # Set factor levels for proper ordering
    label = factor(
      label,
      levels = c(
        # Main models
        "M1: birth cohort",
        "M2: + age, sex, comorbidity & CVD",
        "M3: + income",
        "M4: + occupation",
        "M5: + municipality & origin",

        # Number of tests
        "≥3 assessments",
        "2 assessments",
        "1 assessment",

        # Sociodemographics
        "Sex",
        "Women",
        "Men",
        "Age group",
        "<35 yrs",
        "35–49 yrs",
        "≥50 yrs",
        "Education",
        "Tertiary",
        "Secondary",
        "Primary",
        "Occupation",
        "Managers",
        "Science and engineering",
        "Other professionals",
        "Education professionals",
        "Health care",
        "Associate professionals",
        "Administration and customer service",
        "Service and shop sales",
        "Personal care",
        "Manufacturing",
        "Mechanical manufacturing",
        "Building",
        "Transport",
        "Agriculture and forestry",
        "Cleaners",
        "Other elementary occupations",
        "Income (% of median)",
        "≥200%",
        "120–199%",
        "80–119%",
        "60–79%",
        "<60%",

        # Lifestyle related
        "BMI",
        "<18.5",
        "18.5-25",
        "25-30",
        "≥30",
        "VO₂max (mL·kg⁻¹·min⁻¹)",
        "≥50",
        "40–49",
        "30–39",
        "20–29",
        "<20",
        "Exercise",
        "3 or more times/week",
        "1-2 times/week",
        "Never/sometime",
        "Health",
        "Good or very good",
        "Neither poor nor good",
        "Poor or very poor"
      )
    ),

    # Clean up block names
    block = case_when(
      block == "Main models" ~ "Main models",
      block == "Number of tests" ~ "HPAs within 5 yrs",
      block == "Lifestyle related" ~ "Lifestyle related*",
      TRUE ~ block
    ),

    block = factor(
      block,
      levels = c(
        "Main models",
        "HPAs within 5 yrs",
        "Sociodemographics",
        "Lifestyle related*"
      )
    ),

    # Set group colors
    group_color = case_when(
      block == "Main models" ~ block,
      block == "HPAs within 5 yrs" ~ block,
      exposure == "m6" ~ "Sex",
      exposure == "m7" ~ "Age",
      exposure == "m10" ~ "Education",
      exposure == "m11" ~ "Occupation",
      exposure == "m9" ~ "Income",
      block == "Lifestyle related*" ~ exposure,
    )
  ) |>

  # Final cleanup
  arrange(block, label) |>
  mutate(
    row = row_number(),
    across(
      where(is.character),
      ~ .x |> str_replace_all("\\p{Z}", " ") |> str_squish()
    )
  )

Prepare data for forest plot

Code

# Build plotting data
my_results <- df_forrest_plot |>
  transmute(
    subgroup = as.character(label),
    ctrl = ctrl,
    hpa = hpa,
    est = estimate,
    lci = conf.low,
    uci = conf.high,
    exposure = exposure,
    block = block,
    group_color = group_color
  ) |>
  drop_na(subgroup)

# Build row labels
row_labels <- df_forrest_plot |>
  transmute(
    row = row,
    subgroup = as.character(label),
    group = as.character(block)
  ) |>
  distinct() |>
  mutate(label = subgroup) |>
  arrange(row) |>
  drop_na(subgroup)

# Define color palette
my_cols <- c(
  "Main models" = "#418286",
  "HPAs within 5 yrs" = "#e1800f",
  "Sex" = "#418286",
  "Age" = "#e1800f",
  "Education" = "#418286",
  "Occupation" = "#e1800f",
  "Income" = "#418286",
  "BMI" = "#e1800f",
  "Exercise" = "#418286",
  "Health" = "#e1800f",
  "VO2" = "#418286"
)

Create forest plot

Code

forest_plot(
  my_results,
  col.key = "subgroup",
  row.labels = row_labels,
  row.labels.levels = c("group", "label"),
  col.lci = "lci",
  col.uci = "uci",
  exponentiate = FALSE,

  # Layout positioning
  col.left.pos = unit(c(1.2, 31), "mm"),
  left.space = unit(60, "mm"),
  col.right.pos = unit(1.2, "mm"),
  right.space = unit(32.7, "mm"),

  # Axis settings
  xlab = "HPA vs controls\nHazard ratio (95% CI)",
  xlim = c(0.4, 1.5),
  xticks = c(0.6, .8, 1, 1),
  nullval = 1,

  # Column settings
  col.left = c("ctrl", "hpa"),
  col.left.heading = c("Control\nN (events)", "HPA\nN (events)"),
  col.right.heading = "Hazard ratio\n(95% CI)",

  # Visual styling
  shape = 15,
  pointsize = 2.5,
  plotcolour = "grey20",
  colour = "group_color",
  cicolour = "group_color",
  fill = "#e1800f",
  plot.margin = margin(8, 8, 2, 8, "mm"),

  # Additional styling
  addarg = list(nullline = c("linetype = 'solid'", "colour = 'grey20'")),

  add = list(
    start = list(
      geom_vline(
        xintercept = c(0.6, 0.8, 1.2),
        linetype = "dashed",
        colour = "grey",
        size = 0.3
      )
    ),
    end = list(
      scale_x_log10(
        breaks = c(0.4, 0.6, 0.8, 1, 1.2, 1.5),
        labels = c("0.4", "0.6", "0.8", "1", "1.2", "1.5"),
        expand = expansion(mult = 0),
        sec.axis = dup_axis(
          name = NULL,
          breaks = c(.6, 0.8, 1, 1.2),
          labels = c("0.6", "0.8", "1", "1.2")
        )
      ),

      scale_colour_manual(
        values = my_cols,
        name = "Exposure\nblock"
      ),

      labs(
        caption = "*Lifestyle related variables were available only for HPA participants"
      ),

      scale_fill_manual(
        values = my_cols,
        guide = "none"
      ),

      theme(
        axis.title.x.bottom = element_blank(),
        axis.title.x.top = element_text(face = "bold", vjust = 1),
        axis.text.x.top = element_text(),
        axis.line.x.top = element_blank(),
        axis.ticks.length.x.top = unit(0, "mm"),
        axis.ticks.x.top = element_line(colour = "black"),
        panel.background = element_rect(fill = "white", color = "white"),
        plot.background = element_rect(fill = "white", color = "white"),
        text = element_text(
          family = c(
            'Source Sans 3',
            'roboto-flex',
            'dejavu-bold',
            'roboto',
            'Source Sans 3'
          )
        ),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_blank(),
        panel.grid.minor = element_blank(),
        plot.caption = element_text(
          hjust = 2.2,
          size = 9,
          face = "italic",
          margin = margin(t = 5)
        )
      ),

      annotate(
        "segment",
        x = 0.75,
        xend = 1.25,
        y = Inf,
        yend = Inf,
        colour = "grey60",
        size = 0.5,
        inherit.aes = FALSE
      )
    )
  )
)

Save plot

Code

ggsave(
  here::here(
    "..",
    "results",
    "figures",
    "forest_plot_ten_yr_lifestyle_interaction_HR.pdf"
  ),
  width = 210,
  height = 280,
  units = "mm",
  dpi = 300
)

ggsave(
  here::here(
    "..",
    "results",
    "figures",
    "forest_plot_ten_yr_lifestyle_interaction_HR.svg"
  ),
  width = 210,
  height = 280,
  units = "mm",
  dpi = 300
)