Function correct_prevalence()
is used for estimating the
true prevalence if the serological test used is imperfect
Arguments:
age
the age vector
pos
the positive count vector (optional if status is
provided).
tot
the total count vector (optional if status is
provided).
status
the serostatus vector (optional if pos &
tot are provided).
bayesian
whether to adjust sero-prevalence using the
Bayesian or frequentist approach. If set to TRUE
, true
sero-prevalence is estimated using MCMC.
init_se
sensitivity of the serological test (default
value 0.95
)
init_sp
specificity of the serological test (default
value 0.8
)
study_size_se
(applicable when
bayesian=TRUE
) sample size for sensitivity validation study
(default value 1000
)
study_size_sp
(applicable when
bayesian=TRUE
) sample size for specificity validation study
(default value 1000
)
chains
(applicable when bayesian=TRUE
)
number of Markov chains (default to 1
)
warmup
(applicable when bayesian=TRUE
)
number of warm up runs (default value 1000
)
iter
(applicable when bayesian=TRUE
)
number of iterations (default value 2000
)
The function will return a list of 2 items:
info
if bayesian = TRUE
contains estimated values for se,
sp and corrected seroprevalence
else return the formula for computing corrected seroprevalence
corrected_sero
return a data.frame with
age
, sero
(corrected sero) and
pos
, tot
(adjusted based on corrected
prevalence)
# ---- estimate real prevalence using Bayesian approach ----
data <- rubella_uk_1986_1987
output <- correct_prevalence(data$age, pos = data$pos, tot = data$tot, warmup = 1000, iter = 4000, init_se=0.9, init_sp = 0.8, study_size_se=1000, study_size_sp=3000)
#>
#> SAMPLING FOR MODEL 'prevalence_correction' NOW (CHAIN 1).
#> Chain 1:
#> Chain 1: Gradient evaluation took 2.3e-05 seconds
#> Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.23 seconds.
#> Chain 1: Adjust your expectations accordingly!
#> Chain 1:
#> Chain 1:
#> Chain 1: Iteration: 1 / 4000 [ 0%] (Warmup)
#> Chain 1: Iteration: 400 / 4000 [ 10%] (Warmup)
#> Chain 1: Iteration: 800 / 4000 [ 20%] (Warmup)
#> Chain 1: Iteration: 1001 / 4000 [ 25%] (Sampling)
#> Chain 1: Iteration: 1400 / 4000 [ 35%] (Sampling)
#> Chain 1: Iteration: 1800 / 4000 [ 45%] (Sampling)
#> Chain 1: Iteration: 2200 / 4000 [ 55%] (Sampling)
#> Chain 1: Iteration: 2600 / 4000 [ 65%] (Sampling)
#> Chain 1: Iteration: 3000 / 4000 [ 75%] (Sampling)
#> Chain 1: Iteration: 3400 / 4000 [ 85%] (Sampling)
#> Chain 1: Iteration: 3800 / 4000 [ 95%] (Sampling)
#> Chain 1: Iteration: 4000 / 4000 [100%] (Sampling)
#> Chain 1:
#> Chain 1: Elapsed Time: 0.438 seconds (Warm-up)
#> Chain 1: 0.88 seconds (Sampling)
#> Chain 1: 1.318 seconds (Total)
#> Chain 1:
# check fitted value
output$info[1:2, ]
#> mean se_mean sd 2.5% 25% 50%
#> est_se 0.9276338 9.946100e-05 0.005901742 0.9161846 0.9236231 0.9276495
#> est_sp 0.8029867 8.780135e-05 0.006854400 0.7889984 0.7985877 0.8031301
#> 75% 97.5% n_eff Rhat
#> est_se 0.9316438 0.9390469 3520.908 0.9996704
#> est_sp 0.8075381 0.8162755 6094.478 0.9999055
# ---- estimate real prevalence using frequentist approach ----
freq_output <- correct_prevalence(data$age, pos = data$pos, tot = data$tot, bayesian = FALSE, init_se=0.9, init_sp = 0.8)
# check info
freq_output$info
#> [1] "Formula: real_sero = (apparent_sero + sp - 1) / (se + sp -1)"
# compare original prevalence and corrected prevalence
ggplot()+
geom_point(aes(x = data$age, y = data$pos/data$tot, color="apparent prevalence")) +
geom_point(aes(x = output$corrected_se$age, y = output$corrected_se$sero, color="estimated prevalence (bayesian)" )) +
geom_point(aes(x = freq_output$corrected_se$age, y = freq_output$corrected_se$sero, color="estimated prevalence (frequentist)" )) +
scale_color_manual(
values = c(
"apparent prevalence" = "red",
"estimated prevalence (bayesian)" = "blueviolet",
"estimated prevalence (frequentist)" = "royalblue")
)+
labs(x = "Age", y = "Prevalence")
Data after seroprevalence correction
Bayesian approach
suppressWarnings(
corrected_data <- farrington_model(
age = output$corrected_se$age, pos = output$corrected_se$pos, tot = output$corrected_se$tot,
start=list(alpha=0.07,beta=0.1,gamma=0.03))
)
plot(corrected_data)
#> Warning: No shared levels found between `names(values)` of the manual scale and the
#> data's fill values.
Frequentist approach
suppressWarnings(
corrected_data <- farrington_model(
age = freq_output$corrected_se$age, pos = freq_output$corrected_se$pos, tot = freq_output$corrected_se$tot,
start=list(alpha=0.07,beta=0.1,gamma=0.03))
)
plot(corrected_data)
#> Warning: No shared levels found between `names(values)` of the manual scale and the
#> data's fill values.
Original data
suppressWarnings(
original_data <- farrington_model(
age = data$age, pos = data$pos, tot = data$tot,
start=list(alpha=0.07,beta=0.1,gamma=0.03))
)
plot(original_data)
#> Warning: No shared levels found between `names(values)` of the manual scale and the
#> data's fill values.