library (tidyverse)
library (magrittr)
link_list_dk <- list (
"1996_atc_code_data.txt" = "https://medstat.dk/da/download/file/MTk5Nl9hdGNfY29kZV9kYXRhLnR4dA==" ,
"1997_atc_code_data.txt" = "https://medstat.dk/da/download/file/MTk5N19hdGNfY29kZV9kYXRhLnR4dA==" ,
"1998_atc_code_data.txt" = "https://medstat.dk/da/download/file/MTk5OF9hdGNfY29kZV9kYXRhLnR4dA==" ,
"1999_atc_code_data.txt" = "https://medstat.dk/da/download/file/MTk5OV9hdGNfY29kZV9kYXRhLnR4dA==" ,
"2000_atc_code_data.txt" = "https://medstat.dk/da/download/file/MjAwMF9hdGNfY29kZV9kYXRhLnR4dA==" ,
"2001_atc_code_data.txt" = "https://medstat.dk/da/download/file/MjAwMV9hdGNfY29kZV9kYXRhLnR4dA==" ,
"2002_atc_code_data.txt" = "https://medstat.dk/da/download/file/MjAwMl9hdGNfY29kZV9kYXRhLnR4dA==" ,
"2003_atc_code_data.txt" = "https://medstat.dk/da/download/file/MjAwM19hdGNfY29kZV9kYXRhLnR4dA==" ,
"2004_atc_code_data.txt" = "https://medstat.dk/da/download/file/MjAwNF9hdGNfY29kZV9kYXRhLnR4dA==" ,
"2006_atc_code_data.txt" = "https://medstat.dk/da/download/file/MjAwNl9hdGNfY29kZV9kYXRhLnR4dA==" ,
"2007_atc_code_data.txt" = "https://medstat.dk/da/download/file/MjAwN19hdGNfY29kZV9kYXRhLnR4dA==" ,
"2008_atc_code_data.txt" = "https://medstat.dk/da/download/file/MjAwOF9hdGNfY29kZV9kYXRhLnR4dA==" ,
"2009_atc_code_data.txt" = "https://medstat.dk/da/download/file/MjAwOV9hdGNfY29kZV9kYXRhLnR4dA==" ,
"2010_atc_code_data.txt" = "https://medstat.dk/da/download/file/MjAxMF9hdGNfY29kZV9kYXRhLnR4dA==" ,
"2011_atc_code_data.txt" = "https://medstat.dk/da/download/file/MjAxMV9hdGNfY29kZV9kYXRhLnR4dA==" ,
"2012_atc_code_data.txt" = "https://medstat.dk/da/download/file/MjAxMl9hdGNfY29kZV9kYXRhLnR4dA==" ,
"2013_atc_code_data.txt" = "https://medstat.dk/da/download/file/MjAxM19hdGNfY29kZV9kYXRhLnR4dA==" ,
"2014_atc_code_data.txt" = "https://medstat.dk/da/download/file/MjAxNF9hdGNfY29kZV9kYXRhLnR4dA==" ,
"2015_atc_code_data.txt" = "https://medstat.dk/da/download/file/MjAxNV9hdGNfY29kZV9kYXRhLnR4dA==" ,
"2016_atc_code_data.txt" = "https://medstat.dk/da/download/file/MjAxNl9hdGNfY29kZV9kYXRhLnR4dA==" ,
"2017_atc_code_data.txt" = "https://medstat.dk/da/download/file/MjAxN19hdGNfY29kZV9kYXRhLnR4dA==" ,
"2018_atc_code_data.txt" = "https://medstat.dk/da/download/file/MjAxOF9hdGNfY29kZV9kYXRhLnR4dA==" ,
"2019_atc_code_data.txt" = "https://medstat.dk/da/download/file/MjAxOV9hdGNfY29kZV9kYXRhLnR4dA==" ,
"2020_atc_code_data.txt" = "https://medstat.dk/da/download/file/MjAyMF9hdGNfY29kZV9kYXRhLnR4dA==" ,
"2021_atc_code_data.txt" = "https://medstat.dk/da/download/file/MjAyMV9hdGNfY29kZV9kYXRhLnR4dA==" ,
"2022_atc_code_data.txt" = "https://medstat.dk/da/download/file/MjAyMl9hdGNfY29kZV9kYXRhLnR4dA==" ,
"atc_code_text.txt" = "https://medstat.dk/da/download/file/YXRjX2NvZGVfdGV4dC50eHQ=" ,
"atc_groups.txt" = "https://medstat.dk/da/download/file/YXRjX2dyb3Vwcy50eHQ=" ,
"population_data.txt" = "https://medstat.dk/da/download/file/cG9wdWxhdGlvbl9kYXRhLnR4dA=="
)
# ATC codes are stored in files
seq_atc <- c (1 : 26 )
atc_code_data_list <- link_list_dk[seq_atc]
# drugs names
eng_drug <- read_delim (link_list_dk[["atc_groups.txt" ]], delim = ";" , col_names = c (paste0 ("V" , c (1 : 6 ))), col_types = cols (V1 = col_character (), V2 = col_character (), V3 = col_character (), V4 = col_character (), V5 = col_character (), V6 = col_character ())) %>%
# keep drug classes labels in English
filter (V5 == "1" ) %>%
select (ATC = V1,
drug = V2,
unit_dk = V4)
# drugs data
atc_data <- map (atc_code_data_list, ~ read_delim (file = .x, delim = ";" , trim_ws = T, col_names = c (paste0 ("V" , c (1 : 14 ))), col_types = cols (V1 = col_character (), V2 = col_character (), V3 = col_character (), V4 = col_character (), V5 = col_character (),V6 = col_character (), V7 = col_character (), V8 = col_character (), V9 = col_character (), V10 = col_character (), V11 = col_character (), V12 = col_character (), V13 = col_character (), V14 = col_character ()))) %>%
# bind atc_data from all years
bind_rows ()
# population data
pop_data <- read_delim (link_list_dk[["population_data.txt" ]], delim = ";" , col_names = c (paste0 ("V" , c (1 : 7 ))), col_types = cols (V1 = col_character (), V2 = col_character (), V3 = col_character (), V4 = col_character (), V5 = col_character (), V6 = col_character (), V7 = col_character ())) %>%
# rename and keep columns
select (year = V1,
region_text = V2,
region = V3,
gender = V4,
age = V5,
denominator_per_year = V6) %>%
# human-reading friendly label on sex categories
mutate (
gender_text = case_when (
gender == "1" ~ "men" ,
gender == "2" ~ "women" ,
T ~ as.character (gender)
)
) %>%
# make numeric variables
mutate_at (vars (year, age, denominator_per_year), as.numeric) %>%
arrange (year, age, region, gender)
atc_data %<>%
# rename and keep columns
rename (
ATC = V1,
year = V2,
sector = V3,
region = V4,
gender = V5, # 0 - both; 1 - male; 2 - female; A - age in categories
age = V6,
number_of_people = V7,
patients_per_1000_inhabitants = V8,
turnover = V9,
regional_grant_paid = V10,
quantity_sold_1000_units = V11,
quantity_sold_units_per_unit_1000_inhabitants_per_day = V12,
percentage_of_sales_in_the_primary_sector = V13
)
atc_data %<>%
# clean columns names and set-up labels
mutate (
year = as.numeric (year),
region_text = case_when (
region == "0" ~ "DK" ,
region == "1" ~ "Region Hovedstaden" ,
region == "2" ~ "Region Midtjylland" ,
region == "3" ~ "Region Nordjylland" ,
region == "4" ~ "Region Sjælland" ,
region == "5" ~ "Region Syddanmark" ,
T ~ NA_character_
),
gender_text = case_when (
gender == "0" ~ "both sexes" ,
gender == "1" ~ "men" ,
gender == "2" ~ "women" ,
T ~ as.character (gender)
)
) %>%
mutate_at (vars (turnover, regional_grant_paid, quantity_sold_1000_units, quantity_sold_units_per_unit_1000_inhabitants_per_day, number_of_people, patients_per_1000_inhabitants), as.numeric) %>%
select (- V14) %>%
filter (
region == "0" ,
sector == "0" ,
gender == "2"
)
atc_data %<>%
# deal with non-numeric age in groups
filter (
str_detect (age, "[0-9][0-9][0-9]" )
) %>%
mutate (
age = parse_number (age)
) %>%
select (year, ATC, gender, age, number_of_people, patients_per_1000_inhabitants, region_text, gender_text)
regex_pregabalin <- "^N02BF02$"
atc_data %<>% filter (str_detect (ATC, regex_pregabalin))
atc_data %<>% left_join (eng_drug)
atc_data %<>% left_join (pop_data)
# group to the same categories as in Swedish data
atc_data_se <- readxl:: read_xlsx (path = "C:/Users/au595736/OneDrive - Aarhus Universitet/ISPOR 2023/Statistikdatabasen_2023-03-15 10_54_00.xlsx" , sheet = 1 ) %>%
pivot_longer (cols = c ("2006" : "2022" )) %>%
mutate (
name = as.numeric (name),
country = "Sweden"
)
atc_data %<>% mutate (
age_cat = cut (age, c (- Inf , 14 , 19 , 24 , 29 , 34 , 39 , 44 , 49 , Inf ))
) %>%
group_by (age_cat, year) %>%
mutate (
number_of_people_age_cat = sum (number_of_people),
denominator_per_year_age_cat = sum (denominator_per_year),
patients_per_1000_inhabitants_age_cat = (number_of_people_age_cat/ denominator_per_year_age_cat)* 1000
) %>%
ungroup () %>%
filter (
age_cat != "(-Inf,14]" ,
age_cat != "(49, Inf]"
) %>%
mutate (
age_cat = case_when (
age_cat == "(14,19]" ~ "15-19" ,
age_cat == "(19,24]" ~ "20-24" ,
age_cat == "(24,29]" ~ "25-29" ,
age_cat == "(29,34]" ~ "30-34" ,
age_cat == "(34,39]" ~ "35-39" ,
age_cat == "(39,44]" ~ "40-44" ,
age_cat == "(44,49]" ~ "45-49"
),
country = "Denmark"
) %>%
distinct (age_cat, year, number_of_people_age_cat, denominator_per_year_age_cat, patients_per_1000_inhabitants_age_cat)
atc <- full_join (atc_data, atc_data_se, by = c ("year" = "name" , "age_cat" = "Ålder" , "patients_per_1000_inhabitants_age_cat" = "value" , "country" = "country" )) %>%
select (year, age_cat, patients_per_1000_inhabitants_age_cat, country)
ggplot <- atc %>%
# filter(!is.na(patients_per_1000_inhabitants_age_cat)) %>%
mutate (label = if_else (year == 2022 , as.character (age_cat), NA_character_ ),
value = case_when (
age_cat == "30-34" ~ "#3E9286" ,
age_cat == "35-39" ~ "#E8B88A" ,
T ~ "gray"
)) %>%
ggplot (aes (x = year, y = patients_per_1000_inhabitants_age_cat, group = age_cat, color = value)) +
geom_line () +
facet_grid (cols = vars (country), scales = "free" , drop = F) +
theme_light (base_size = 14 , base_family = "Ubuntu" ) +
scale_x_continuous (breaks = c (seq (2004 , 2022 , 5 ))) +
scale_color_identity () +
theme (plot.caption = element_text (hjust = 0 , size = 10 ),
legend.position = "none" ,
panel.spacing = unit (0.8 , "cm" )) +
labs (y = "Female patients of reproductive age \n per 1,000 women in the population" , title = "Pregabalin utilization in women" , subtitle = "aged between 15 and 49 years in DK and SE \n 2004-2022" , caption = "Elena Dudukina | @evpatora" ) +
ggrepel:: geom_label_repel (aes (label = label), na.rm = TRUE , nudge_x = 3 , hjust = 0.5 , direction = "y" , segment.size = 0.1 , segment.colour = "black" , show.legend = F, family = "Ubuntu" )
ggsave (filename = paste0 (Sys.Date (), "_pregabalin use in DK and SE.pdf" ), plot = ggplot, path = "C:/Users/au595736/OneDrive - Aarhus Universitet/ISPOR 2023/" , dpi = 900 , height = 6 , width = 8 , device = cairo_pdf)