The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
library(purrr)#' Simulate simple dataset and compare p-values from linear regression and bootstrap#' #' @param n Number of observations#' @param b True effect size#' @param nboot Number of bootstrap samples#' @param sim Simulation number#' #' #' @return A tibble with p-valuesboot_sim <-function(n, b, nboot, sim=1) {# Simple simulation of y being influenced by x x <-rnorm(n) y <-rnorm(n) + b * x# Linear model mod <-summary(lm(y ~ x)) bhat <- mod$coefficients[2, 1] pval <- mod$coefficients[2, 4]# Get p-value using bootstrap# Generate a null distribution of the slope# Do this by resampling x bboot <-numeric(nboot)for(i in1:nboot) { x1 <-sample(x) bboot[i] <-lm(y ~ x1)$coefficients[2] }# This is a non-parametric p-value pvalboot_nonpara <-sum(abs(bboot) >abs(b)) / nboot# This is a parametric p-value pvalboot_para <-pnorm(bhat, mean=mean(bboot), sd=sd(bboot), low=F)# Compare parametric pval, non para pval and linear model pvaltibble(b=b,n=n,sim=sim,nboot=nboot,pval = pval,pvalboot_nonpara = pvalboot_nonpara,pvalboot_para = pvalboot_para )}
Example run of this simulation
boot_sim(1000, 0.3, 1000)
# A tibble: 1 × 7
b n sim nboot pval pvalboot_nonpara pvalboot_para
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 0.3 1000 1 1000 1.50e-18 0 1.10e-18
To evaluate performance do it over a range of simulation values