Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
n <- 1000000
b_g_ldl <- 0.5
b_age_ihd <- 0.1
b_ldl_ihd <- 0.2
b_ldl_vd <- 0.3
b_age_death <- 0.2
b_ihd_death <- 1.5
b_age_vd <- 0.1
age <- rnorm (n)
g <- rbinom (n, 2 , 0.4 )
ldl <- g * b_g_ldl + rnorm (n, 0 , 1 )
ihd_liability <- age * b_age_ihd + ldl * b_ldl_ihd + rnorm (n, 0 , 1 )
ihd <- rbinom (n, 1 , plogis (ihd_liability))
death_liability <- age * b_age_death + ihd * b_ihd_death + rnorm (n, 0 , 1 )
death <- rbinom (n, 1 , plogis (death_liability))
vd_liability <- age * b_age_vd + ldl * b_ldl_vd + rnorm (n, 0 , 1 )
vd <- rbinom (n, 1 , plogis (vd_liability))
mean_ldl_vd <- mean (ldl[vd == 1 ])
vd_diagnosed <- ldl > mean_ldl_vd & vd == 1
vd_diagnosed[vd == 1 & ldl <= mean_ldl_vd] <- NA
table (vd_diagnosed)
vd_diagnosed
FALSE TRUE
474742 262284
unknown_diagnosed <- vd_diagnosed
unknown_diagnosed[vd_diagnosed == 1 ] <- NA
unknown_diagnosed[is.na (vd_diagnosed)] <- 1
dat <- tibble (age, ldl, ihd, death, vd, g, vd_diagnosed, unknown_diagnosed)
table (dat$ death)
table (dat$ unknown_diagnosed)
library (ivreg)
summary (ivreg (ihd ~ ldl | g, data = dat))
Call:
ivreg(formula = ihd ~ ldl | g, data = dat)
Residuals:
Min 1Q Median 3Q Max
-0.7223 -0.5110 0.4048 0.4820 0.6625
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.5009955 0.0007576 661.29 <2e-16 ***
ldl 0.0385066 0.0014332 26.87 <2e-16 ***
Diagnostic tests:
df1 df2 statistic p-value
Weak instruments 1e+00 1e+06 1.205e+05 <2e-16 ***
Wu-Hausman 1e+00 1e+06 2.083e+00 0.149
Sargan 0e+00 NA NA NA
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.4979 on 999998 degrees of freedom
Multiple R-Squared: 0.007339, Adjusted R-squared: 0.007338
Wald test: 721.8 on 1 and 999998 DF, p-value: < 2.2e-16
summary (ivreg (death ~ ldl | g, data = dat))
Call:
ivreg(formula = death ~ ldl | g, data = dat)
Residuals:
Min 1Q Median 3Q Max
-0.7011 -0.6359 0.3485 0.3607 0.4160
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.6380094 0.0007291 875.104 < 2e-16 ***
ldl 0.0109827 0.0013792 7.963 1.68e-15 ***
Diagnostic tests:
df1 df2 statistic p-value
Weak instruments 1e+00 1e+06 1.205e+05 <2e-16 ***
Wu-Hausman 1e+00 1e+06 1.270e-01 0.721
Sargan 0e+00 NA NA NA
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.4791 on 999998 degrees of freedom
Multiple R-Squared: 0.0006391, Adjusted R-squared: 0.0006381
Wald test: 63.41 on 1 and 999998 DF, p-value: 1.683e-15
summary (ivreg (vd ~ ldl | g, data = dat))
Call:
ivreg(formula = vd ~ ldl | g, data = dat)
Residuals:
Min 1Q Median 3Q Max
-0.8361 -0.5120 0.3585 0.4708 0.7639
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.5006012 0.0007536 664.31 <2e-16 ***
ldl 0.0618878 0.0014256 43.41 <2e-16 ***
Diagnostic tests:
df1 df2 statistic p-value
Weak instruments 1e+00 1e+06 1.205e+05 <2e-16 ***
Wu-Hausman 1e+00 1e+06 1.186e+00 0.276
Sargan 0e+00 NA NA NA
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.4952 on 999998 degrees of freedom
Multiple R-Squared: 0.01642, Adjusted R-squared: 0.01642
Wald test: 1885 on 1 and 999998 DF, p-value: < 2.2e-16
summary (ivreg (vd_diagnosed ~ ldl | g, data = dat))
Call:
ivreg(formula = vd_diagnosed ~ ldl | g, data = dat)
Residuals:
Min 1Q Median 3Q Max
-1.44993 -0.33306 -0.08287 0.42341 0.92120
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.206503 0.001020 202.4 <2e-16 ***
ldl 0.229349 0.001379 166.3 <2e-16 ***
Diagnostic tests:
df1 df2 statistic p-value
Weak instruments 1 737024 89237.91 < 2e-16 ***
Wu-Hausman 1 737023 15.16 9.9e-05 ***
Sargan 0 NA NA NA
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.4148 on 737024 degrees of freedom
Multiple R-Squared: 0.2492, Adjusted R-squared: 0.2492
Wald test: 2.765e+04 on 1 and 737024 DF, p-value: < 2.2e-16
summary (ivreg (unknown_diagnosed ~ ldl | g, data = dat))
Call:
ivreg(formula = unknown_diagnosed ~ ldl | g, data = dat)
Residuals:
Min 1Q Median 3Q Max
-1.0940 -0.3711 -0.2081 0.5517 0.7138
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.3644671 0.0005445 669.32 <2e-16 ***
ldl -0.1483687 0.0018707 -79.31 <2e-16 ***
Diagnostic tests:
df1 df2 statistic p-value
Weak instruments 1 737714 71584.97 < 2e-16 ***
Wu-Hausman 1 737713 20.41 6.24e-06 ***
Sargan 0 NA NA NA
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.4596 on 737714 degrees of freedom
Multiple R-Squared: 0.07912, Adjusted R-squared: 0.07912
Wald test: 6291 on 1 and 737714 DF, p-value: < 2.2e-16
summary (ivreg (vd_diagnosed ~ ldl | g, data = dat %>% subset (death != 1 )))
Call:
ivreg(formula = vd_diagnosed ~ ldl | g, data = dat %>% subset(death !=
1))
Residuals:
Min 1Q Median 3Q Max
-1.3291 -0.3276 -0.0848 0.4186 0.8088
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.206043 0.001613 127.7 <2e-16 ***
ldl 0.228060 0.002278 100.1 <2e-16 ***
Diagnostic tests:
df1 df2 statistic p-value
Weak instruments 1 261600 31766.914 <2e-16 ***
Wu-Hausman 1 261599 3.263 0.0709 .
Sargan 0 NA NA NA
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.4107 on 261600 degrees of freedom
Multiple R-Squared: 0.2548, Adjusted R-squared: 0.2548
Wald test: 1.003e+04 on 1 and 261600 DF, p-value: < 2.2e-16
summary (ivreg (unknown_diagnosed ~ ldl | g, data = dat %>% subset (death != 1 )))
Call:
ivreg(formula = unknown_diagnosed ~ ldl | g, data = dat %>% subset(death !=
1))
Residuals:
Min 1Q Median 3Q Max
-1.0323 -0.3744 -0.2083 0.5509 0.7160
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.3632821 0.0008972 404.92 <2e-16 ***
ldl -0.1503424 0.0031415 -47.86 <2e-16 ***
Diagnostic tests:
df1 df2 statistic p-value
Weak instruments 1 267055 25804.97 < 2e-16 ***
Wu-Hausman 1 267054 21.08 4.41e-06 ***
Sargan 0 NA NA NA
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.4619 on 267055 degrees of freedom
Multiple R-Squared: 0.07365, Adjusted R-squared: 0.07364
Wald test: 2290 on 1 and 267055 DF, p-value: < 2.2e-16