residual-height notebook 2

Author

Gibran Hemani

Published

February 13, 2026

dat <- sim(b_xy = -0.5, b_uy = -0.2, b_ux = 1, b_prs=1, h2=0.7, r2_env_height=0.15, r2_prs=0.4, n=100000)[[2]]
str(dat)
tibble [100,000 × 9] (S3: tbl_df/tbl/data.frame)
 $ prs_explained  : num [1:100000] -0.4 0.114 0.213 -0.449 0.138 ...
 $ prs_unexplained: num [1:100000] -0.21 -0.692 -0.491 0.825 0.526 ...
 $ prs            : num [1:100000] -0.61 -0.577 -0.277 0.377 0.665 ...
 $ u              : num [1:100000] -0.106 0.406 0.151 -0.251 0.682 ...
 $ ex             : num [1:100000] 0.518 -0.422 -0.401 -0.494 -0.104 ...
 $ x              : num [1:100000] -0.199 -0.593 -0.527 -0.369 1.242 ...
 $ residual_x     : num [1:100000] 0.2013 -0.7078 -0.74 0.0798 1.104 ...
 $ ey             : num [1:100000] -0.955 -0.132 0.382 -1.381 -0.833 ...
 $ y              : num [1:100000] -0.8345 0.0837 0.6148 -1.1465 -1.5905 ...
gx_to_gp <- function(liability, varexp, prevalence) {
    x_prime <- qnorm(prevalence, 0, 1, lower.tail=FALSE)
    p <- pnorm(x_prime, mean=liability, sd = sqrt(1 - varexp), lower.tail=FALSE)
    return(p)
}

generate_binary_outcome <- function(liability, varexp, prevalence) {
    cc <- rbinom(length(liability), 1, gx_to_gp(liability, varexp, prevalence))
    return(cc)
}

binary_2sls <- function(x, y, prs) {
    mod <- summary(glm(y ~ prs, family="binomial"))
    bgy <- mod$coefficients[2, 1]
    se_bgy <- mod$coefficients[2, 2]

    mod <- summary(lm(x ~ prs))
    bgx <- mod$coefficients[2, 1]
    se_bgx <- mod$coefficients[2, 2]
    beta_xy <- bgy / bgx
    se_beta_xy <- sqrt((se_bgy / bgx)^2 + (bgy * se_bgx / bgx^2)^2)
    return(list(beta_xy = beta_xy, se_beta_xy = se_beta_xy))
}

continuous_2sls <- function(x, y, prs) {
    mod <- summary(lm(y ~ prs))
    bgy <- mod$coefficients[2, 1]
    se_bgy <- mod$coefficients[2, 2]

    mod <- summary(lm(x ~ prs))
    bgx <- mod$coefficients[2, 1]
    se_bgx <- mod$coefficients[2, 2]
    beta_xy <- bgy / bgx
    se_beta_xy <- sqrt((se_bgy / bgx)^2 + (bgy * se_bgx / bgx^2)^2)
    return(list(beta_xy = beta_xy, se_beta_xy = se_beta_xy))
}

dat$cc <- generate_binary_outcome(dat$y, varexp = 0.2, prevalence = 0.1)

continuous_2sls(dat$x, dat$y, dat$prs_explained)
binary_2sls(dat$x, dat$cc, dat$prs_explained)
$beta_xy
-0.497850997027835
$se_beta_xy
0.00707376641206964
$beta_xy
-0.626409705538854
$se_beta_xy
0.0160154339944029
param <- expand.grid(
    varexp = c(0.2, 0.5, 0.8),
    prevalence = c(0.01, 0.1, 0.5)
)

binary_res <- lapply(1:nrow(param), \(i) {
    varexp <- param[i,]$varexp
    prevalence <- param[i,]$prevalence
    dat$cc <- generate_binary_outcome(dat$y, varexp = varexp, prevalence = prevalence)
    res_continuous <- continuous_2sls(dat$x, dat$y, dat$prs_explained)
    res_binary <- binary_2sls(dat$x, dat$cc, dat$prs_explained)
    tibble(
        varexp = varexp,
        prevalence = prevalence,
        method = c("binary", "continuous"),
        beta_xy = c(res_binary$beta_xy, res_continuous$beta_xy),
        se_beta_xy = c(res_binary$se_beta_xy, res_continuous$se_beta_xy)
    )
}) %>% bind_rows()

ggplot(binary_res, aes(x = beta_xy, y = se_beta_xy, color = method)) +
    geom_point() +
    facet_grid(varexp ~ prevalence) +
    labs(x = "Estimated beta_xy", y = "Standard error of beta_xy", color = "Method")

estimate_beta_cy(
    height = dat$x,
    prs = dat$prs_explained,
    outcome = dat$cc,
    var_c = 0.15,
    h2 = 0.7,
    n_boot = 100
)

estimate_beta_cy(
    height = dat$x,
    prs = dat$prs_explained,
    outcome = dat$y,
    var_c = 0.15,
    h2 = 0.7,
    n_boot = 100
)
Binary outcome detected (prevalence = 0.189)
Liability scale transformation factor = 0.2705
$beta_xy
height: -0.353678285546259
$se_beta_xy
0.00836783982859172
$beta_cy_method1
height: -0.0461408201620962
$beta_cy_method2
height: -0.0461408201621222
$beta_cy_combined
height: -0.0461408201621133
$se_method1
0.0642545301989432
$se_method2
0.0466293324546653
$se_combined
0.0380479336137564
$cov_estimators
5.18850345151184e-05
$correlation_estimators
0.0173172592745444
$weight_method1
0.342370006827975
$weight_method2
0.657629993172025
$r2_prs
0.281387979315542
$is_binary
TRUE
$prevalence
0.18905
$outcome_scale
'liability'
$beta_cy_method1_observed
height: -0.012482458056538
$beta_cy_method2_observed
height: -0.0124824580565451
$boot_distribution_method1
  1. -0.0611031904933605
  2. -0.135169080428818
  3. -0.139630417357704
  4. -0.156230633575275
  5. -0.119066010525505
  6. 0.0205816882683652
  7. 0.080569271731431
  8. -0.0527727037713818
  9. -0.0498117314459362
  10. -0.0787439314490554
  11. 0.041691414202236
  12. -0.0609742220878743
  13. 0.0397545020862654
  14. -0.0954466012620649
  15. -0.0591472885341207
  16. -0.0585753434347414
  17. -0.0162898277995962
  18. -0.105405567851969
  19. 0.0198725815306079
  20. -0.122971528519119
  21. -0.0434949530082326
  22. -0.0622390114312051
  23. 0.0757112913282528
  24. -0.0151974663859412
  25. -0.116032428498501
  26. 0.0806090973291542
  27. -0.129793099597847
  28. 0.0371662748236907
  29. -0.0484091990962174
  30. -0.053298371618411
  31. -0.00561247893316303
  32. -0.0686899047729173
  33. 0.00749802630228404
  34. -0.0598614575650598
  35. -0.121132656930218
  36. 0.000982959868562029
  37. -0.0806600994747147
  38. -0.043797327737302
  39. 0.037344789315098
  40. -0.101471647641016
  41. -0.06088227629016
  42. -0.0420804320225053
  43. -0.0225762662441897
  44. -0.073730119682587
  45. 0.0408548152575747
  46. -0.0344591629950885
  47. 0.0382961762728246
  48. -0.0211696677122442
  49. -0.118105800628724
  50. -0.0360727545944138
  51. -0.0207528663455216
  52. 0.0259590111030823
  53. -0.108834868225128
  54. 0.0209172890814239
  55. -0.00841872670066864
  56. -0.0886134145444476
  57. -0.0121242066573223
  58. -0.0120022724837197
  59. -0.0863692445534315
  60. -0.0105136817488118
  61. -0.109834948624145
  62. -0.0524489238679272
  63. 0.0421164689889682
  64. -0.0692817925022548
  65. -0.0750808129470261
  66. -0.139994570485952
  67. -0.10529482754501
  68. -0.00906185948704938
  69. -0.0424167002940804
  70. -0.0278478007856253
  71. -0.0682019069045269
  72. -0.0427361827046255
  73. -0.173127137533978
  74. -0.0686733587952739
  75. -0.056433295853779
  76. -0.0770850539543246
  77. -0.052261924502032
  78. -0.0246860279526018
  79. -0.0724053705073356
  80. -0.0178154070971077
  81. -0.00365709639212909
  82. 0.0439906158214736
  83. 9.3172723094532e-05
  84. -0.0521812699715958
  85. 0.0383029205540669
  86. 0.102361982320747
  87. -0.260909275675758
  88. 0.0672883814651478
  89. -0.0941630129920866
  90. 0.102542943248134
  91. -0.0455534770373631
  92. 0.00612279782910471
  93. -0.0598015006251095
  94. -0.148969568495035
  95. -0.0245998959426125
  96. -0.040854015979174
  97. -0.146992108183965
  98. -0.0655688569651259
  99. 0.0145760157573712
  100. -0.0251416546763345
$boot_distribution_method2
  1. -0.0652278245705572
  2. -0.104650626868185
  3. -0.113056574457559
  4. -0.0945793262498817
  5. -0.101569936702741
  6. -0.0265829238892682
  7. 0.0292920175141744
  8. -0.087682005918698
  9. -0.0592853920776707
  10. -0.0731726695504334
  11. 0.0257606459816746
  12. -0.0685198125204218
  13. 0.015087702267375
  14. -0.0722248795388675
  15. -0.0396232631309289
  16. -0.0771622496094649
  17. -0.00965308231199817
  18. -0.0884132588714599
  19. -0.0241147025590098
  20. -0.113912765098206
  21. -0.0528612609615785
  22. -0.0404603064127261
  23. 0.0382899789869132
  24. -0.0167458503278753
  25. -0.12514307401458
  26. 0.0405238362724186
  27. -0.108546341675922
  28. -0.00408163724004184
  29. -0.0826122096248635
  30. -0.0533845155778164
  31. -0.0237342762112691
  32. -0.064053665904024
  33. -0.0208504732247801
  34. -0.0711807293854162
  35. -0.110322320963719
  36. -0.0300852487189132
  37. -0.0714529760448407
  38. -0.0356023969553724
  39. 0.0133827003732791
  40. -0.0784652912766696
  41. -0.0532794933065255
  42. -0.0625523855787123
  43. -0.0216974410389273
  44. -0.0619901822883103
  45. 0.0123211903912979
  46. -0.0241138549295462
  47. -0.00260650974968311
  48. -0.0351973835872563
  49. -0.0710054075846515
  50. -0.0444341983998022
  51. -0.0110788458400416
  52. 0.0181073797348081
  53. -0.078523480124807
  54. 0.0220511992277951
  55. -0.0285536538276524
  56. -0.0762069611480715
  57. -0.0265961304900171
  58. -0.0178983001983176
  59. -0.084151750407915
  60. -0.016452289940594
  61. -0.0983578674944817
  62. -0.0447059399782941
  63. 0.0194687654654045
  64. -0.0597526454723468
  65. -0.0683541557797384
  66. -0.125784181966446
  67. -0.083293725878232
  68. -0.034211427401645
  69. -0.0412799394959916
  70. 0.000485459087808684
  71. -0.0709076677296979
  72. -0.0527319789048432
  73. -0.140697611245831
  74. -0.068678899573747
  75. -0.0299791196313125
  76. -0.0689337749031155
  77. -0.063454270599156
  78. -0.0567547738398126
  79. -0.0653908346359646
  80. -0.0470383796348957
  81. -0.00823525867640807
  82. 0.0242222037493185
  83. -0.0153889859976803
  84. -0.0569068866363121
  85. 0.0241691026948184
  86. 0.0619171053039704
  87. -0.191423449680033
  88. 0.0274568382979265
  89. -0.0609507817970366
  90. 0.0736247469107616
  91. -0.0668609025927943
  92. -0.00893319266005886
  93. -0.0513381878313581
  94. -0.118940878537881
  95. -0.0342167235115769
  96. -0.0550974678645145
  97. -0.119563117226794
  98. -0.0471305447376192
  99. -0.00327214786177437
  100. -0.030755677306192
$boot_distribution_combined
  1. -0.0638156735733845
  2. -0.115099230022118
  3. -0.122154661232727
  4. -0.11568688475983
  5. -0.107560067616903
  6. -0.0104351753028205
  7. 0.0468478113906564
  8. -0.0757301079041615
  9. -0.0560418948224978
  10. -0.075080102524705
  11. 0.0312148632061231
  12. -0.0659364286725095
  13. 0.0235328746897928
  14. -0.0801753005637959
  15. -0.0463077038415292
  16. -0.0707986504155139
  17. -0.0119253049099026
  18. -0.0942309158131398
  19. -0.0090547758049033
  20. -0.117014213992477
  21. -0.0496545180436386
  22. -0.0479166817986072
  23. 0.0511019139487295
  24. -0.016215730107103
  25. -0.122023862247033
  26. 0.0542478273741143
  27. -0.115820594330724
  28. 0.010040410694858
  29. -0.0709021246766336
  30. -0.0533550224698466
  31. -0.0175299163534287
  32. -0.0656409750372231
  33. -0.0111447972481363
  34. -0.0673053502149931
  35. -0.114023455762382
  36. -0.0194484259326864
  37. -0.0746052189563927
  38. -0.0384080954631364
  39. 0.0215866009279022
  40. -0.0863419776622178
  41. -0.0558824581685441
  42. -0.0555434026998917
  43. -0.0219983244304536
  44. -0.0660095847341488
  45. 0.022090247731592
  46. -0.0276557781225834
  47. 0.0113973431431254
  48. -0.0303947144073475
  49. -0.0871311694727508
  50. -0.0415714908270597
  51. -0.0143909403065567
  52. 0.0207955428199749
  53. -0.0889011902756792
  54. 0.0216629824032396
  55. -0.0216600586897062
  56. -0.0804545586820998
  57. -0.0216413778286035
  58. -0.0158796771494128
  59. -0.0849109538936565
  60. -0.0144190886134249
  61. -0.10228727583921
  62. -0.0473569054254567
  63. 0.027222659875405
  64. -0.0630151396060412
  65. -0.0706571614400322
  66. -0.130649392780898
  67. -0.0908262432061099
  68. -0.0256009696630043
  69. -0.041669132298195
  70. -0.00921499928851772
  71. -0.0699812963775092
  72. -0.0493097180915236
  73. -0.151800508382532
  74. -0.0686770025773833
  75. -0.0390362361252268
  76. -0.0717245283675347
  77. -0.0596223469894626
  78. -0.0457753970914436
  79. -0.067792401330141
  80. -0.0370333103275994
  81. -0.00666783322387989
  82. 0.0309903151254405
  83. -0.0100883592107368
  84. -0.0552889772265468
  85. 0.0290080980117947
  86. 0.0757642181243608
  87. -0.215213312400637
  88. 0.0410939640040569
  89. -0.0723216536180582
  90. 0.0835254699882406
  91. -0.0595658791598948
  92. -0.00377847309348199
  93. -0.0542357722903421
  94. -0.129221801323547
  95. -0.0309242101911271
  96. -0.0502209371452765
  97. -0.128953981048085
  98. -0.053443269820847
  99. 0.00283852803837933
  100. -0.0288336043400753
Continuous outcome detected - standardized to mean=0, SD=1
$beta_xy
height: -0.441665535052701
$se_beta_xy
0.00528348139012448
$beta_cy_method1
height: -0.165954112166875
$beta_cy_method2
height: -0.165954112166932
$beta_cy_combined
height: -0.165954112166916
$se_method1
0.0401685981744717
$se_method2
0.0292640184437315
$se_combined
0.0269693133336915
$cov_estimators
0.000389186019560699
$correlation_estimators
0.331082780199051
$weight_method1
0.276198223019937
$weight_method2
0.723801776980063
$r2_prs
0.281387979315542
$is_binary
FALSE
$prevalence
<NA>
$outcome_scale
'standardized'
$beta_cy_method1_observed
height: -0.165954112166875
$beta_cy_method2_observed
height: -0.165954112166932
$boot_distribution_method1
  1. -0.201080149534597
  2. -0.141986241730799
  3. -0.117992732362003
  4. -0.234078495267115
  5. -0.198139496172601
  6. -0.169901487133364
  7. -0.219311061001482
  8. -0.202691888846666
  9. -0.139387591249826
  10. -0.143533783033519
  11. -0.20139024933716
  12. -0.131939542347391
  13. -0.182179716321898
  14. -0.246757857627421
  15. -0.1007136685065
  16. -0.208860558880945
  17. -0.106211936450269
  18. -0.143161972668909
  19. -0.172665742473354
  20. -0.14934277410912
  21. -0.212169846032858
  22. -0.1698829744339
  23. -0.186058899620131
  24. -0.15198439422287
  25. -0.134086974194455
  26. -0.142712698959012
  27. -0.174861051623753
  28. -0.231885334860841
  29. -0.160361219837752
  30. -0.158182310786196
  31. -0.186017737163378
  32. -0.10404919494268
  33. -0.151216138236994
  34. -0.137823530039221
  35. -0.118465958980409
  36. -0.246967996908424
  37. -0.133031676581657
  38. -0.157065628461535
  39. -0.159713528843896
  40. -0.1546551446749
  41. -0.123983467660231
  42. -0.151145393514394
  43. -0.161621358629921
  44. -0.245922446713711
  45. -0.14578944366245
  46. -0.184883500452221
  47. -0.129485534936634
  48. -0.174601407857576
  49. -0.224856958729769
  50. -0.159858524100665
  51. -0.144174454129412
  52. -0.154487056212054
  53. -0.154419920955245
  54. -0.19183105558998
  55. -0.170427287908341
  56. -0.130975792442076
  57. -0.154918142569359
  58. -0.166314263600151
  59. -0.210058320097843
  60. -0.254272844404001
  61. -0.15922022015918
  62. -0.220100981200499
  63. -0.133058995852077
  64. -0.145392329889137
  65. -0.116319367183657
  66. -0.129477440555102
  67. -0.231506108093738
  68. -0.128681873275744
  69. -0.248938361317341
  70. -0.177045115971261
  71. -0.14912645048928
  72. -0.135253323620739
  73. -0.137059929184164
  74. -0.204411929375568
  75. -0.251610842808553
  76. -0.173152409110393
  77. -0.176604958309179
  78. -0.167423909291602
  79. -0.152941611521434
  80. -0.170485976545322
  81. -0.18239432664296
  82. -0.0757607054596917
  83. -0.104856076655347
  84. -0.271247756902795
  85. -0.170263412951742
  86. -0.147889486564038
  87. -0.201135270251341
  88. -0.244233025931237
  89. -0.189926407640508
  90. -0.177175495219979
  91. -0.164555823561874
  92. -0.181345122288013
  93. -0.186207406443152
  94. -0.0828697596409811
  95. -0.187953696453619
  96. -0.189810480735222
  97. -0.156560171137261
  98. -0.216972190791485
  99. -0.190278224842864
  100. -0.176162472366338
$boot_distribution_method2
  1. -0.189023247474652
  2. -0.147645214443561
  3. -0.123346812318564
  4. -0.215228704147959
  5. -0.188592171856754
  6. -0.154094450621137
  7. -0.198485458322053
  8. -0.206278923158365
  9. -0.135047129427976
  10. -0.13669786837128
  11. -0.205080464368643
  12. -0.153665238017864
  13. -0.16743611671424
  14. -0.23333631572506
  15. -0.122411146185772
  16. -0.184052972893622
  17. -0.137854615180077
  18. -0.15296174714388
  19. -0.178070993834158
  20. -0.148522337295296
  21. -0.200037332526188
  22. -0.161872414269867
  23. -0.167351033493232
  24. -0.15775434771434
  25. -0.146554375462847
  26. -0.146537920558163
  27. -0.165065807591714
  28. -0.223106707810149
  29. -0.158039951534927
  30. -0.156631695223541
  31. -0.165661724086163
  32. -0.12413587920615
  33. -0.166828375511943
  34. -0.14495905001803
  35. -0.1383541702391
  36. -0.228396757415416
  37. -0.155474542614563
  38. -0.157372770924548
  39. -0.162294393756779
  40. -0.149731418027964
  41. -0.136009617606024
  42. -0.157876236996877
  43. -0.132829212630696
  44. -0.22164551174682
  45. -0.16380754716289
  46. -0.159011878387065
  47. -0.140934085418359
  48. -0.177168149266043
  49. -0.218581250077076
  50. -0.160850218139862
  51. -0.164185721595368
  52. -0.148508841603871
  53. -0.142274482453375
  54. -0.174475874867003
  55. -0.164351211525489
  56. -0.144585879019798
  57. -0.168389965234683
  58. -0.175599434430052
  59. -0.199168812838767
  60. -0.216916074136781
  61. -0.162672343273242
  62. -0.20692413808712
  63. -0.142723545768491
  64. -0.152482475337673
  65. -0.127320708177366
  66. -0.137756432611885
  67. -0.205593481982909
  68. -0.143316534658032
  69. -0.227691623297184
  70. -0.164525026605583
  71. -0.165075349057447
  72. -0.145049151486755
  73. -0.127080926629979
  74. -0.200930870781448
  75. -0.222359238505474
  76. -0.17406081960274
  77. -0.173040342755499
  78. -0.156542957655167
  79. -0.157451822058959
  80. -0.167301495854578
  81. -0.175600392273362
  82. -0.105617730486561
  83. -0.132445986720648
  84. -0.247316327825268
  85. -0.169433738040939
  86. -0.16038671529588
  87. -0.180275153379003
  88. -0.227313321123545
  89. -0.179184563615351
  90. -0.165641164533858
  91. -0.157272321819679
  92. -0.199273472152803
  93. -0.199049366732507
  94. -0.114367186839892
  95. -0.179031093164698
  96. -0.177781138115106
  97. -0.158977300027737
  98. -0.182812012803223
  99. -0.172264826833272
  100. -0.163921762531624
$boot_distribution_combined
  1. -0.192353342398734
  2. -0.146082216236178
  3. -0.121868024948655
  4. -0.220434982959367
  5. -0.191229125867386
  6. -0.158460326017025
  7. -0.204237452775431
  8. -0.205288190655562
  9. -0.136245957270257
  10. -0.138585935853706
  11. -0.204061233534385
  12. -0.147664639479807
  13. -0.171508272726792
  14. -0.23704332174868
  15. -0.116418341406742
  16. -0.190904784060735
  17. -0.129114963543314
  18. -0.150255066847897
  19. -0.176578073013328
  20. -0.148748940485374
  21. -0.203388311197496
  22. -0.164084916752567
  23. -0.172518112873977
  24. -0.156160696813088
  25. -0.143110901386841
  26. -0.14548140114982
  27. -0.16777123658741
  28. -0.225531349002105
  29. -0.15868108171532
  30. -0.157059972486533
  31. -0.17128401872586
  32. -0.118587972706217
  33. -0.162516303319237
  34. -0.14298823207956
  35. -0.132861081630404
  36. -0.233526100762663
  37. -0.1492758628968
  38. -0.15728793872205
  39. -0.161581563453986
  40. -0.151091342578484
  41. -0.132688016361225
  42. -0.15601718998759
  43. -0.140781552192613
  44. -0.228350758045046
  45. -0.15883097899388
  46. -0.166157574428104
  47. -0.137772016119152
  48. -0.176459219850073
  49. -0.22031458965514
  50. -0.160576314008456
  51. -0.158658645080894
  52. -0.150160013855483
  53. -0.145629030985389
  54. -0.179269344942879
  55. -0.166029413025366
  56. -0.140826797291883
  57. -0.164669071753681
  58. -0.173034886746397
  59. -0.202176475393287
  60. -0.227233947702351
  61. -0.161718873003492
  62. -0.210563558740048
  63. -0.14005421425529
  64. -0.150524189763834
  65. -0.124282157344067
  66. -0.135469789717406
  67. -0.2127505032685
  68. -0.139274467189746
  69. -0.233559934583322
  70. -0.167983053040434
  71. -0.160670291613794
  72. -0.142343561237152
  73. -0.129837109402956
  74. -0.201892332979372
  75. -0.230438479634467
  76. -0.173809918238981
  77. -0.174024883237174
  78. -0.159548257161917
  79. -0.156206109923048
  80. -0.168181043762603
  81. -0.177476864873559
  82. -0.0973712732294781
  83. -0.124825702587332
  84. -0.253926146010808
  85. -0.169662892776987
  86. -0.156935002927472
  87. -0.186036680591131
  88. -0.231986513525451
  89. -0.182151441847057
  90. -0.168826926173089
  91. -0.159284012058236
  92. -0.194321693778469
  93. -0.195502440120494
  94. -0.105667653417853
  95. -0.18149550033781
  96. -0.18110362117088
  97. -0.158309693323377
  98. -0.192246993261625
  99. -0.177240095354072
  100. -0.167302624836475
generate_binary_outcome2 <- function(liability, varexp, prevalence) {
    n <- length(liability)
    liability <- scale(liability) * sqrt(varexp)*2 + log(prevalence / (1 - prevalence))
    cc <- rbinom(length(liability), 1, plogis(liability))
    return(cc)
}
dat$cc2 <- generate_binary_outcome2(dat$y, varexp = 0.4, prevalence = 0.4)
dat$cc <- generate_binary_outcome(dat$y, varexp = 0.4, prevalence = 0.4)
print(mean(dat$cc))
cor(dat$y, dat$cc2)
cor(dat$y, dat$cc)
[1] 0.42636
0.480093929886205
0.655762667041965
param <- expand.grid(
    varexp = c(0.2, 0.5, 0.8),
    prevalence = c(0.01, 0.1, 0.5)
)

binary_res <- lapply(1:nrow(param), \(i) {
    varexp <- param[i,]$varexp
    prevalence <- param[i,]$prevalence
    dat$cc2 <- generate_binary_outcome2(dat$y, varexp = varexp, prevalence = prevalence)
    res_continuous <- continuous_2sls(dat$x, dat$y, dat$prs_explained)
    res_binary <- binary_2sls(dat$x, dat$cc2, dat$prs_explained)
    tibble(
        varexp = varexp,
        prevalence = prevalence,
        method = c("binary", "continuous"),
        beta_xy = c(res_binary$beta_xy, res_continuous$beta_xy),
        se_beta_xy = c(res_binary$se_beta_xy, res_continuous$se_beta_xy)
    )
}) %>% bind_rows()

tidyr::pivot_wider(binary_res, names_from = method, values_from = c(beta_xy, se_beta_xy)) %>%
ggplot(., aes(x=beta_xy_continuous, y = beta_xy_binary)) +
    geom_point(aes(colour=prevalence)) +
    facet_grid(. ~ varexp) +
    geom_abline(slope = 1, intercept = 0, linetype = "dashed")

dat <- sim(b_xy = -0.1, b_uy = -0.2, b_ux = 1, b_prs=1, h2=0.7, r2_env_height=0.15, r2_prs=0.4, n=100000)[[2]]

param <- expand.grid(
    varexp = c(0.2, 0.5, 0.8),
    prevalence = c(0.01, 0.1, 0.5),
    rep = 1:100
)
binary_res <- lapply(1:nrow(param), \(i) {
    varexp <- param[i,]$varexp
    prevalence <- param[i,]$prevalence
    dat$cc2 <- generate_binary_outcome2(dat$y, varexp = varexp, prevalence = prevalence)
    res_continuous <- continuous_2sls(dat$x, dat$y, dat$prs_explained)
    res_binary <- binary_2sls(dat$x, dat$cc2, dat$prs_explained)
    dat$cc <- generate_binary_outcome(dat$y, varexp = varexp, prevalence = prevalence)
    res_binary1 <- binary_2sls(dat$x, dat$cc, dat$prs_explained)
    tibble(
        
        varexp = varexp,
        prevalence = prevalence,
        method = c("binary2", "continuous", "binary1"),
        beta_xy = c(res_binary$beta_xy, res_continuous$beta_xy, res_binary1$beta_xy),
        se_beta_xy = c(res_binary$se_beta_xy, res_continuous$se_beta_xy, res_binary1$se_beta_xy)
    )
}) %>% bind_rows()
ggplot(binary_res, aes(x=method, y = beta_xy)) +
    geom_boxplot(aes(fill=as.factor(prevalence))) +
    facet_grid(. ~ varexp) +
    geom_abline(slope = 1, intercept = 0, linetype = "dashed")

simdat <- function(n, prevalence, beta_xl) {
    # 1. Simulate Exposure X (Standard Normal)
    x <- rnorm(n)

    # 2. Simulate Liability L
    # L = beta*X + epsilon, where epsilon is standard normal noise
    epsilon <- rnorm(n)
    l <- beta_xl * x + epsilon

    # 3. Define Threshold T based on prevalence
    threshold <- qnorm(1 - prevalence, mean = 0, sd = sqrt(var(l)))
    y <- as.numeric(l > threshold)

    return(tibble(x = x, l = l, y = y))
}

estimation <- function(dat) {
    # A. Estimate X -> L (Linear Regression - the "Hidden" Truth)
    model_linear <- lm(l ~ x, dat)
    hat_beta_xl <- coef(model_linear)["x"]

    # B. Estimate X -> Y (Logistic GLM - what we usually observe)
    model_logit <- glm(y ~ x, data = dat, family = binomial(link = "logit"))
    hat_beta_logit <- coef(model_logit)["x"]

    # C. Estimate X -> Y (Probit GLM - the LTM-appropriate GLM)
    model_probit <- glm(y ~ x, data = dat, family = binomial(link = "probit"))
    hat_beta_probit <- coef(model_probit)["x"]

    return(tibble(
        method = c("Linear", "Logistic", "Probit", "Logistic (Transformed)"),
        beta_hat = c(hat_beta_xl, hat_beta_logit, hat_beta_probit, hat_beta_logit * (sqrt(3) / pi))
    ))
}

set.seed(42)
simdat(100000, prevalence = 0.1, beta_xl = -0.5) %>% estimation()
A tibble: 4 × 2
method beta_hat
<chr> <dbl>
Linear -0.5028558
Logistic -0.9754070
Probit -0.5108572
Logistic (Transformed) -0.5377701
library(furrr)
plan("multicore", workers=4)
param <- expand.grid(
    n = 100000,
    prevalence = c(0.01, 0.1, 0.5),
    beta_xl = c(-0.5, -0.3, 0)
) %>% furrr::future_pmap(., \(n, prevalence, beta_xl) {
    dat <- simdat(n = n, prevalence = prevalence, beta_xl = beta_xl)
    est <- estimation(dat)
    bind_cols(tibble(n = n, prevalence = prevalence, beta_xl = beta_xl), est)
}) %>% bind_rows()
Warning message:
“UNRELIABLE VALUE: Future (<unnamed-1>) unexpectedly generated random numbers without specifying argument 'seed'. There is a risk that those random numbers are not statistically sound and the overall results might be invalid. To fix this, specify 'seed=TRUE'. This ensures that proper, parallel-safe random numbers are produced. To disable this check, use 'seed=NULL', or set option 'future.rng.onMisuse' to "ignore". [future <unnamed-1> (ed502c4b95166bb22746467ddf260d51-1); on ed502c4b95166bb22746467ddf260d51@DT7MTP4F2G<63874>]”
Warning message:
“UNRELIABLE VALUE: Future (<unnamed-2>) unexpectedly generated random numbers without specifying argument 'seed'. There is a risk that those random numbers are not statistically sound and the overall results might be invalid. To fix this, specify 'seed=TRUE'. This ensures that proper, parallel-safe random numbers are produced. To disable this check, use 'seed=NULL', or set option 'future.rng.onMisuse' to "ignore". [future <unnamed-2> (ed502c4b95166bb22746467ddf260d51-2); on ed502c4b95166bb22746467ddf260d51@DT7MTP4F2G<63874>]”
Warning message:
“UNRELIABLE VALUE: Future (<unnamed-3>) unexpectedly generated random numbers without specifying argument 'seed'. There is a risk that those random numbers are not statistically sound and the overall results might be invalid. To fix this, specify 'seed=TRUE'. This ensures that proper, parallel-safe random numbers are produced. To disable this check, use 'seed=NULL', or set option 'future.rng.onMisuse' to "ignore". [future <unnamed-3> (ed502c4b95166bb22746467ddf260d51-3); on ed502c4b95166bb22746467ddf260d51@DT7MTP4F2G<63874>]”
Warning message:
“UNRELIABLE VALUE: Future (<unnamed-4>) unexpectedly generated random numbers without specifying argument 'seed'. There is a risk that those random numbers are not statistically sound and the overall results might be invalid. To fix this, specify 'seed=TRUE'. This ensures that proper, parallel-safe random numbers are produced. To disable this check, use 'seed=NULL', or set option 'future.rng.onMisuse' to "ignore". [future <unnamed-4> (ed502c4b95166bb22746467ddf260d51-4); on ed502c4b95166bb22746467ddf260d51@DT7MTP4F2G<63874>]”
ggplot(param, aes(x=beta_xl, y=beta_hat, color=method)) +
geom_point() +
geom_abline(slope = 1, intercept = 0, linetype = "dashed")

generate_binary_outcome3 <- function(liability, varexp=1, prevalence) {
    n <- length(liability)
    threshold <- qnorm(1 - prevalence, mean = 0, sd = sqrt(var(liability)))
    cc <- as.numeric(liability > threshold)
    return(cc)
}

binary_2sls_probit <- function(x, y, prs) {
    mod <- summary(glm(y ~ prs, family=binomial(link = "probit")))
    bgy <- mod$coefficients[2, 1]
    se_bgy <- mod$coefficients[2, 2]

    mod <- summary(lm(x ~ prs))
    bgx <- mod$coefficients[2, 1]
    se_bgx <- mod$coefficients[2, 2]
    beta_xy <- bgy / bgx
    se_beta_xy <- sqrt((se_bgy / bgx)^2 + (bgy * se_bgx / bgx^2)^2)
    return(list(beta_xy = beta_xy, se_beta_xy = se_beta_xy))
}

param <- expand.grid(
    prevalence = c(0.01, 0.1, 0.5),
    b_xy = c(-0.5, -0.3, 0),
    rep = 1:50
)
plan("multicore", workers=8)
binary_res <- future_pmap(param, \(prevalence, rep, b_xy) {
    dat <- sim(b_xy = b_xy, b_uy = -0.2, b_ux = 1, b_prs=1, h2=0.7, r2_env_height=0.15, r2_prs=0.4, n=100000)[[2]]

    dat$cc <- generate_binary_outcome3(dat$y, prevalence = prevalence)
    res_continuous <- continuous_2sls(dat$x, dat$y, dat$prs_explained)
    res_binary <- binary_2sls_probit(dat$x, dat$cc, dat$prs_explained)
    tibble(
        prevalence = prevalence,
        method = c("binary", "continuous"),
        beta_xy = c(res_binary$beta_xy, res_continuous$beta_xy),
        se_beta_xy = c(res_binary$se_beta_xy, res_continuous$se_beta_xy),
        rep = rep,
        b_xy = b_xy
    )
}) %>% bind_rows()
str(binary_res)
Warning message:
“UNRELIABLE VALUE: Future (<unnamed-25>) unexpectedly generated random numbers without specifying argument 'seed'. There is a risk that those random numbers are not statistically sound and the overall results might be invalid. To fix this, specify 'seed=TRUE'. This ensures that proper, parallel-safe random numbers are produced. To disable this check, use 'seed=NULL', or set option 'future.rng.onMisuse' to "ignore". [future <unnamed-25> (ed502c4b95166bb22746467ddf260d51-25); on ed502c4b95166bb22746467ddf260d51@DT7MTP4F2G<63874>]”
Warning message:
“UNRELIABLE VALUE: Future (<unnamed-26>) unexpectedly generated random numbers without specifying argument 'seed'. There is a risk that those random numbers are not statistically sound and the overall results might be invalid. To fix this, specify 'seed=TRUE'. This ensures that proper, parallel-safe random numbers are produced. To disable this check, use 'seed=NULL', or set option 'future.rng.onMisuse' to "ignore". [future <unnamed-26> (ed502c4b95166bb22746467ddf260d51-26); on ed502c4b95166bb22746467ddf260d51@DT7MTP4F2G<63874>]”
Warning message:
“UNRELIABLE VALUE: Future (<unnamed-27>) unexpectedly generated random numbers without specifying argument 'seed'. There is a risk that those random numbers are not statistically sound and the overall results might be invalid. To fix this, specify 'seed=TRUE'. This ensures that proper, parallel-safe random numbers are produced. To disable this check, use 'seed=NULL', or set option 'future.rng.onMisuse' to "ignore". [future <unnamed-27> (ed502c4b95166bb22746467ddf260d51-27); on ed502c4b95166bb22746467ddf260d51@DT7MTP4F2G<63874>]”
Warning message:
“UNRELIABLE VALUE: Future (<unnamed-28>) unexpectedly generated random numbers without specifying argument 'seed'. There is a risk that those random numbers are not statistically sound and the overall results might be invalid. To fix this, specify 'seed=TRUE'. This ensures that proper, parallel-safe random numbers are produced. To disable this check, use 'seed=NULL', or set option 'future.rng.onMisuse' to "ignore". [future <unnamed-28> (ed502c4b95166bb22746467ddf260d51-28); on ed502c4b95166bb22746467ddf260d51@DT7MTP4F2G<63874>]”
Warning message:
“UNRELIABLE VALUE: Future (<unnamed-29>) unexpectedly generated random numbers without specifying argument 'seed'. There is a risk that those random numbers are not statistically sound and the overall results might be invalid. To fix this, specify 'seed=TRUE'. This ensures that proper, parallel-safe random numbers are produced. To disable this check, use 'seed=NULL', or set option 'future.rng.onMisuse' to "ignore". [future <unnamed-29> (ed502c4b95166bb22746467ddf260d51-29); on ed502c4b95166bb22746467ddf260d51@DT7MTP4F2G<63874>]”
Warning message:
“UNRELIABLE VALUE: Future (<unnamed-30>) unexpectedly generated random numbers without specifying argument 'seed'. There is a risk that those random numbers are not statistically sound and the overall results might be invalid. To fix this, specify 'seed=TRUE'. This ensures that proper, parallel-safe random numbers are produced. To disable this check, use 'seed=NULL', or set option 'future.rng.onMisuse' to "ignore". [future <unnamed-30> (ed502c4b95166bb22746467ddf260d51-30); on ed502c4b95166bb22746467ddf260d51@DT7MTP4F2G<63874>]”
Warning message:
“UNRELIABLE VALUE: Future (<unnamed-31>) unexpectedly generated random numbers without specifying argument 'seed'. There is a risk that those random numbers are not statistically sound and the overall results might be invalid. To fix this, specify 'seed=TRUE'. This ensures that proper, parallel-safe random numbers are produced. To disable this check, use 'seed=NULL', or set option 'future.rng.onMisuse' to "ignore". [future <unnamed-31> (ed502c4b95166bb22746467ddf260d51-31); on ed502c4b95166bb22746467ddf260d51@DT7MTP4F2G<63874>]”
Warning message:
“UNRELIABLE VALUE: Future (<unnamed-32>) unexpectedly generated random numbers without specifying argument 'seed'. There is a risk that those random numbers are not statistically sound and the overall results might be invalid. To fix this, specify 'seed=TRUE'. This ensures that proper, parallel-safe random numbers are produced. To disable this check, use 'seed=NULL', or set option 'future.rng.onMisuse' to "ignore". [future <unnamed-32> (ed502c4b95166bb22746467ddf260d51-32); on ed502c4b95166bb22746467ddf260d51@DT7MTP4F2G<63874>]”
tibble [900 × 6] (S3: tbl_df/tbl/data.frame)
 $ prevalence: num [1:900] 0.01 0.01 0.1 0.1 0.5 0.5 0.01 0.01 0.1 0.1 ...
 $ method    : chr [1:900] "binary" "continuous" "binary" "continuous" ...
 $ beta_xy   : num [1:900] -0.444 -0.497 -0.465 -0.51 -0.442 ...
 $ se_beta_xy: num [1:900] 0.0238 0.00702 0.01087 0.007 0.00797 ...
 $ rep       : int [1:900] 1 1 1 1 1 1 1 1 1 1 ...
 $ b_xy      : num [1:900] -0.5 -0.5 -0.5 -0.5 -0.5 -0.5 -0.3 -0.3 -0.3 -0.3 ...
ggplot(binary_res, aes(x=method, y = beta_xy-b_xy)) +
    geom_boxplot(aes(colour=as.factor(prevalence))) +
    facet_grid(method ~ b_xy)
    

# Function to estimate beta_cy from height, PRS, and outcome

binary_2sls <- function(x, y, prs, method=c("probit", "logistic", "absolute")[1]) {
    if(method == "probit") {
        mod <- summary(glm(y ~ prs, family=binomial(link = "probit")))
    } else if(method == "logistic") {
        mod <- summary(glm(y ~ prs, family=binomial(link = "logit")))
    } else if(method == "absolute") {
        mod <- summary(lm(y ~ prs))
    }
    bgy <- mod$coefficients[2, 1]
    se_bgy <- mod$coefficients[2, 2]

    mod <- summary(lm(x ~ prs))
    bgx <- mod$coefficients[2, 1]
    se_bgx <- mod$coefficients[2, 2]
    beta_xy <- bgy / bgx
    se_beta_xy <- sqrt((se_bgy / bgx)^2 + (bgy * se_bgx / bgx^2)^2)
    return(list(beta_xy = beta_xy, se_beta_xy = se_beta_xy))
}

estimate_beta_cy2 <- function(height, prs, outcome, var_c = 0.15, h2 = 0.7, n_boot = 1000, binary_method="probit") {
    stopifnot(binary_method %in% c("probit", "logistic", "absolute"))
    
    # Detect if outcome is binary or continuous
    unique_vals <- unique(outcome)
    is_binary <- length(unique_vals) == 2 && all(sort(unique_vals) == c(0, 1))
    
    # Store original outcome for prevalence calculation
    outcome_original <- outcome
    
    # Calculate prevalence if binary
    if (is_binary) {
        prevalence <- mean(outcome)
        # Calculate liability scale threshold transformation factor
        threshold <- qnorm(prevalence)
        liability_scale_factor <- dnorm(threshold)
        cat(sprintf("Binary outcome detected (prevalence = %.3f)\n", prevalence))
        cat(sprintf("Liability scale transformation factor = %.4f\n", liability_scale_factor))
    } else {
        # Standardize continuous outcome
        outcome <- (outcome - mean(outcome)) / sd(outcome)
        cat("Continuous outcome detected - standardized to mean=0, SD=1\n")
        prevalence <- NA
        liability_scale_factor <- 1
    }

    

    # Use instrumental variable regression to estimate causal effect
    if(is_binary) {
        iv_res <- binary_2sls(height, outcome, prs, method = binary_method)
    } else {
        iv_res <- binary_2sls(height, outcome, prs, method = "absolute")
    }
    beta_xy_prs <- iv_res$beta_xy
    se_beta_xy <- iv_res$se_beta_xy
    
    # Calculate R^2 for PRS
    model_prs <- lm(height ~ prs)
    prs_explained <- fitted.values(model_prs)
    r2_prs <- var(prs_explained) / var(height)
    
    # Estimate beta_cy using method 1 (from covariance of height and outcome)
    beta_cy_method1_observed <- (cov(height, outcome) - beta_xy_prs * var(height)) / var_c
    
    # Method 2: Using height residual
    # Get residual height (height not explained by PRS)
    residual_height <- residuals(model_prs)
    
    # Estimate beta_cy using method 2 (from residual height)
    beta_cy_method2_observed <- (cov(residual_height, outcome) - beta_xy_prs_observed * var(residual_height)) / var_c
    
    # Transform to liability scale if binary
    beta_cy_method1 <- beta_cy_method1_observed / liability_scale_factor
    beta_cy_method2 <- beta_cy_method2_observed / liability_scale_factor
    
    # Parametric bootstrap using 2SLS SE for beta_xy
    n <- length(height)
    boot_results <- matrix(NA, n_boot, 2)
    
    for(i in 1:n_boot) {
        # Sample with replacement
        idx <- sample(1:n, n, replace = TRUE)
        h_boot <- height[idx]
        p_boot <- prs[idx]
        y_boot <- outcome[idx]
        
        # Parametrically resample beta_xy from its sampling distribution
        beta_xy_boot <- rnorm(1, mean = beta_xy_prs, sd = se_beta_xy)
        beta_xy_boot <- beta_xy_boot / liability_scale_factor
        
        # Method 1 bootstrap
        boot_observed_1 <- (cov(h_boot, y_boot) - beta_xy_boot_observed * var(h_boot)) / var_c
        boot_results[i, 1] <- boot_observed_1 / liability_scale_factor
        
        # Method 2 bootstrap
        model_prs_boot <- lm(h_boot ~ p_boot)
        res_boot <- residuals(model_prs_boot)
        boot_observed_2 <- (cov(res_boot, y_boot) - beta_xy_boot_observed * var(res_boot)) / var_c
        boot_results[i, 2] <- boot_observed_2 / liability_scale_factor
    }
    
    # Calculate standard errors
    se_method1 <- sd(boot_results[, 1])
    se_method2 <- sd(boot_results[, 2])
    
    # Calculate covariance between the two estimators using GLS
    # cov(β̂_cy,1, β̂_cy,2) ≈ 1/(N·(σ_C²)²) * [var(Y)var(X,X_res) + var(X,Y)var(X_res,Y)]
    cov_x_xres <- cov(height, residual_height)
    cov_x_y <- cov(height, outcome)
    cov_xres_y <- cov(residual_height, outcome)
    var_y <- var(outcome)
    
    # Covariance of the two beta_cy estimates
    cov_estimators <- (1 / (n * var_c^2)) * (var_y * cov_x_xres + cov_x_y * cov_xres_y)
    
    # Build variance-covariance matrix
    var_method1 <- se_method1^2
    var_method2 <- se_method2^2
    Sigma <- matrix(c(var_method1, cov_estimators,
                      cov_estimators, var_method2), nrow = 2)
    
    # GLS weights (inverse of variance-covariance matrix)
    Sigma_inv <- solve(Sigma)
    weights <- Sigma_inv %*% c(1, 1)
    weight_sum <- sum(weights)
    
    # GLS combined estimate
    beta_cy_combined <- (weights[1] * beta_cy_method1 + weights[2] * beta_cy_method2) / weight_sum
    
    # Variance of combined estimate: var(β̂_combined) = 1 / (1' Σ^(-1) 1)
    var_combined <- as.numeric(1 / (t(c(1, 1)) %*% Sigma_inv %*% c(1, 1)))
    se_combined <- sqrt(var_combined)
    
    # Apply GLS weights to bootstrap distributions
    boot_combined <- (weights[1] * boot_results[, 1] + weights[2] * boot_results[, 2]) / weight_sum
    
    # Return results
    return(list(
        beta_xy = beta_xy_prs,
        se_beta_xy = se_beta_xy,
        beta_cy_method1 = beta_cy_method1,
        beta_cy_method2 = beta_cy_method2,
        beta_cy_combined = beta_cy_combined,
        se_method1 = se_method1,
        se_method2 = se_method2,
        se_combined = se_combined,
        cov_estimators = cov_estimators,
        correlation_estimators = cov_estimators / (se_method1 * se_method2),
        weight_method1 = weights[1] / weight_sum,
        weight_method2 = weights[2] / weight_sum,
        r2_prs = r2_prs,
        is_binary = is_binary,
        prevalence = if(is_binary) prevalence else NA,
        outcome_scale = if(is_binary) "liability" else "standardized",
        beta_cy_method1_observed = beta_cy_method1_observed,
        beta_cy_method2_observed = beta_cy_method2_observed,
        boot_distribution_method1 = boot_results[, 1],
        boot_distribution_method2 = boot_results[, 2],
        boot_distribution_combined = boot_combined
    ))
}

Expected betas for

library(dplyr)
sim_dan <- function(n=100000, bgx1=0.1, bgx2=0.2, bx1y=0.3, bex2=0.4, bey=0.5) {
    g <- rbinom(n, 2, 0.4)
    e <- rnorm(n)
    x1 <- bgx1 * g + rnorm(n)
    x2 <- bgx2 * g + bex2 * e + rnorm(n)
    y <- bx1y * x1 + bey * e + rnorm(n)
    return(list(
        dat = tibble(g = g, x1 = x1, x2 = x2, y = y, e = e),
        param = tibble(bgx1 = bgx1, bgx2 = bgx2, bx1y = bx1y, bex2 = bex2, bey = bey)
    ))
}

dat <- sim_dan()

cov(dat$dat$x2, dat$dat$y)
0.203145466403477
dat$param$bgx2 * dat$param$bgx1 * dat$param$bx1y * var(dat$dat$g) + dat$param$bex2 * dat$param$bey * var(dat$dat$e)
0.201365693134032