Removing variables from an inverted matrix

Author

Gibran Hemani

Published

July 29, 2024

Background

Remove variables from an inverted matrix

From https://stats.stackexchange.com/questions/450146/updating-the-inverse-covariance-matrix-after-deleting-the-i-th-column-and-row-of/450186#450186 :

inverse.update <- function(x, i) {
  a <- x[-i,-i, drop=FALSE]
  b <- x[-i,i, drop=FALSE]
  c <- x[i,-i, drop=FALSE]
  d <- x[i,i]
  a - b %*% solve(d) %*% c # For production code, should throw an error when d is 0.
}
#
# Example.
#
A <- matrix(c(2,-1,0, -1,2,-1, 0,-1,2), 3)
A.inv <- solve(A)
i <- 2
(x.1 <- solve(A[-i,-i]))           # The desired result, directly obtained
     [,1] [,2]
[1,]  0.5  0.0
[2,]  0.0  0.5
(x.0 <- inverse.update(A.inv, i))  # The result via an update
     [,1] [,2]
[1,]  0.5  0.0
[2,]  0.0  0.5
n <- 100
p <- qr.Q(qr(matrix(rnorm(n^2), n)))
Sigma <- crossprod(p, p*(n:1))
Sigma[1:10,1:10]
            [,1]       [,2]       [,3]       [,4]       [,5]       [,6]
 [1,] 57.9385521  0.3208281 -0.4977199 -1.0868291 -1.6627980  1.3160580
 [2,]  0.3208281 52.4695149 -0.9527482 -0.3403789 -2.0292978 -0.9617932
 [3,] -0.4977199 -0.9527482 52.5479548 -3.9389601 -0.3454971 -0.5248873
 [4,] -1.0868291 -0.3403789 -3.9389601 55.6025900  5.4163401  3.5721138
 [5,] -1.6627980 -2.0292978 -0.3454971  5.4163401 56.2864414  4.2190419
 [6,]  1.3160580 -0.9617932 -0.5248873  3.5721138  4.2190419 50.0241607
 [7,]  2.2595601  2.0683065  2.2184804  2.4608634  0.1627784  1.5666412
 [8,]  2.6967693 -1.7750383  0.3179390 -0.7733417 -0.7389027 -0.3089740
 [9,] -0.4383502 -0.9908480  1.1284561 -2.3302970  0.9202238 -1.2106191
[10,] -1.3535960  3.2367953  1.3182395  0.1211423 -3.2249309  1.3989303
            [,7]       [,8]       [,9]      [,10]
 [1,]  2.2595601  2.6967693 -0.4383502 -1.3535960
 [2,]  2.0683065 -1.7750383 -0.9908480  3.2367953
 [3,]  2.2184804  0.3179390  1.1284561  1.3182395
 [4,]  2.4608634 -0.7733417 -2.3302970  0.1211423
 [5,]  0.1627784 -0.7389027  0.9202238 -3.2249309
 [6,]  1.5666412 -0.3089740 -1.2106191  1.3989303
 [7,] 49.9562189  3.2698375 -3.2764609 -2.2851399
 [8,]  3.2698375 48.6555301  4.8965876 -3.3894722
 [9,] -3.2764609  4.8965876 51.4523676  2.5008322
[10,] -2.2851399 -3.3894722  2.5008322 48.0365556

With just one variable

Sigma.inv <- solve(Sigma)

to_remove <- 10
a <- solve(Sigma[-to_remove,-to_remove])
b <- inverse.update(Sigma.inv, to_remove)

cor(c(a),c(b))
[1] 1

With mutiple variables

to_remove <- sample(1:100, 10)
a <- solve(Sigma[-to_remove,-to_remove])
b <- inverse.update(Sigma.inv, to_remove)
cor(c(a), c(b))
[1] 1

Time complexity

fn <- function(n, nrem) {
    p <- qr.Q(qr(matrix(rnorm(n^2), n)))
    Sigma <- crossprod(p, p*(n:1))
    Sigma.inv <- solve(Sigma)
    to_remove <- sample(1:n, nrem, replace=FALSE)
    t1 <- Sys.time()
    a <- solve(Sigma[-to_remove,-to_remove])
    at <- Sys.time() - t1

    t1 <- Sys.time()
    b <- inverse.update(Sigma.inv, to_remove)
    bt <- Sys.time() - t1
    stopifnot(all.equal(a, b))
    return(c(at, bt))
}

fn(1000, 10)
Time differences in secs
[1] 0.631145954 0.007416964
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
param <- expand.grid(
    n = 300,
    nrem = seq(1, 290, 10),
    t1=NA,
    t2=NA
)
set.seed(100)
res <- lapply(1:nrow(param), function(i) {
    message(i)
    p <- param[i,]
    a <- fn(p$n, p$nrem)

    p$t1 <- a[1]
    p$t2 <- a[2]
    return(p)
}) %>% bind_rows()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
res$frac_rem <- res$nrem / res$n
res$frac_time <- as.numeric(res$t2) / as.numeric(res$t1)

plot(res$frac_rem, log(res$frac_time))
abline(1,0)

If you are removing more than 50% of the matrix then this method probably won’t help with speed.


sessionInfo()
R version 4.4.1 (2024-06-14)
Platform: aarch64-apple-darwin20
Running under: macOS Ventura 13.6

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib 
LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

time zone: Europe/London
tzcode source: internal

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] dplyr_1.1.4

loaded via a namespace (and not attached):
 [1] digest_0.6.35     utf8_1.2.4        R6_2.5.1          fastmap_1.2.0    
 [5] tidyselect_1.2.1  xfun_0.44         magrittr_2.0.3    glue_1.7.0       
 [9] tibble_3.2.1      knitr_1.47        pkgconfig_2.0.3   htmltools_0.5.8.1
[13] rmarkdown_2.27    generics_0.1.3    lifecycle_1.0.4   cli_3.6.2        
[17] fansi_1.0.6       vctrs_0.6.5       compiler_4.4.1    tools_4.4.1      
[21] pillar_1.9.0      evaluate_0.23     yaml_2.3.8        rlang_1.1.3      
[25] jsonlite_1.8.8    htmlwidgets_1.6.4