Background
Remove variables from an inverted matrix
From https://stats.stackexchange.com/questions/450146/updating-the-inverse-covariance-matrix-after-deleting-the-i-th-column-and-row-of/450186#450186 :
inverse.update <- function (x, i) {
a <- x[- i,- i, drop= FALSE ]
b <- x[- i,i, drop= FALSE ]
c <- x[i,- i, drop= FALSE ]
d <- x[i,i]
a - b %*% solve (d) %*% c # For production code, should throw an error when d is 0.
}
#
# Example.
#
A <- matrix (c (2 ,- 1 ,0 , - 1 ,2 ,- 1 , 0 ,- 1 ,2 ), 3 )
A.inv <- solve (A)
i <- 2
(x.1 <- solve (A[- i,- i])) # The desired result, directly obtained
[,1] [,2]
[1,] 0.5 0.0
[2,] 0.0 0.5
(x.0 <- inverse.update (A.inv, i)) # The result via an update
[,1] [,2]
[1,] 0.5 0.0
[2,] 0.0 0.5
n <- 100
p <- qr.Q (qr (matrix (rnorm (n^ 2 ), n)))
Sigma <- crossprod (p, p* (n: 1 ))
Sigma[1 : 10 ,1 : 10 ]
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 57.9385521 0.3208281 -0.4977199 -1.0868291 -1.6627980 1.3160580
[2,] 0.3208281 52.4695149 -0.9527482 -0.3403789 -2.0292978 -0.9617932
[3,] -0.4977199 -0.9527482 52.5479548 -3.9389601 -0.3454971 -0.5248873
[4,] -1.0868291 -0.3403789 -3.9389601 55.6025900 5.4163401 3.5721138
[5,] -1.6627980 -2.0292978 -0.3454971 5.4163401 56.2864414 4.2190419
[6,] 1.3160580 -0.9617932 -0.5248873 3.5721138 4.2190419 50.0241607
[7,] 2.2595601 2.0683065 2.2184804 2.4608634 0.1627784 1.5666412
[8,] 2.6967693 -1.7750383 0.3179390 -0.7733417 -0.7389027 -0.3089740
[9,] -0.4383502 -0.9908480 1.1284561 -2.3302970 0.9202238 -1.2106191
[10,] -1.3535960 3.2367953 1.3182395 0.1211423 -3.2249309 1.3989303
[,7] [,8] [,9] [,10]
[1,] 2.2595601 2.6967693 -0.4383502 -1.3535960
[2,] 2.0683065 -1.7750383 -0.9908480 3.2367953
[3,] 2.2184804 0.3179390 1.1284561 1.3182395
[4,] 2.4608634 -0.7733417 -2.3302970 0.1211423
[5,] 0.1627784 -0.7389027 0.9202238 -3.2249309
[6,] 1.5666412 -0.3089740 -1.2106191 1.3989303
[7,] 49.9562189 3.2698375 -3.2764609 -2.2851399
[8,] 3.2698375 48.6555301 4.8965876 -3.3894722
[9,] -3.2764609 4.8965876 51.4523676 2.5008322
[10,] -2.2851399 -3.3894722 2.5008322 48.0365556
With just one variable
Sigma.inv <- solve (Sigma)
to_remove <- 10
a <- solve (Sigma[- to_remove,- to_remove])
b <- inverse.update (Sigma.inv, to_remove)
cor (c (a),c (b))
With mutiple variables
to_remove <- sample (1 : 100 , 10 )
a <- solve (Sigma[- to_remove,- to_remove])
b <- inverse.update (Sigma.inv, to_remove)
cor (c (a), c (b))
Time complexity
fn <- function (n, nrem) {
p <- qr.Q (qr (matrix (rnorm (n^ 2 ), n)))
Sigma <- crossprod (p, p* (n: 1 ))
Sigma.inv <- solve (Sigma)
to_remove <- sample (1 : n, nrem, replace= FALSE )
t1 <- Sys.time ()
a <- solve (Sigma[- to_remove,- to_remove])
at <- Sys.time () - t1
t1 <- Sys.time ()
b <- inverse.update (Sigma.inv, to_remove)
bt <- Sys.time () - t1
stopifnot (all.equal (a, b))
return (c (at, bt))
}
fn (1000 , 10 )
Time differences in secs
[1] 0.631145954 0.007416964
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
param <- expand.grid (
n = 300 ,
nrem = seq (1 , 290 , 10 ),
t1= NA ,
t2= NA
)
set.seed (100 )
res <- lapply (1 : nrow (param), function (i) {
message (i)
p <- param[i,]
a <- fn (p$ n, p$ nrem)
p$ t1 <- a[1 ]
p$ t2 <- a[2 ]
return (p)
}) %>% bind_rows ()
res$ frac_rem <- res$ nrem / res$ n
res$ frac_time <- as.numeric (res$ t2) / as.numeric (res$ t1)
plot (res$ frac_rem, log (res$ frac_time))
abline (1 ,0 )
If you are removing more than 50% of the matrix then this method probably won’t help with speed.
R version 4.4.1 (2024-06-14)
Platform: aarch64-apple-darwin20
Running under: macOS Ventura 13.6
Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
time zone: Europe/London
tzcode source: internal
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] dplyr_1.1.4
loaded via a namespace (and not attached):
[1] digest_0.6.35 utf8_1.2.4 R6_2.5.1 fastmap_1.2.0
[5] tidyselect_1.2.1 xfun_0.44 magrittr_2.0.3 glue_1.7.0
[9] tibble_3.2.1 knitr_1.47 pkgconfig_2.0.3 htmltools_0.5.8.1
[13] rmarkdown_2.27 generics_0.1.3 lifecycle_1.0.4 cli_3.6.2
[17] fansi_1.0.6 vctrs_0.6.5 compiler_4.4.1 tools_4.4.1
[21] pillar_1.9.0 evaluate_0.23 yaml_2.3.8 rlang_1.1.3
[25] jsonlite_1.8.8 htmlwidgets_1.6.4