library(data.table)library(dplyr)#' Get just one row of the triangle GRM matrix#' #' @param i ID to extract (location ID rather than character ID e.g. row 100)#' @param ki List of all IDs to extract (location IDs again)#' @param b GRM file rootname#' #' @return vector of length i's position in kifindone <-function(i, ki, b) { f <-file(b, "rb")seek(f, where = (i * (i+1) /2- i) *4) a <-readBin(f, "double", n=i, size=4)close(f) x <- ki[ki <= i] a[x]}#' Get all rows of the triangle GRM matrix for required individuals#' #' @param ki List of all IDs to extract (location IDs again)#' @param b GRM file rootnamefindall <-function(ki, b) { n <-length(ki) mat <-matrix(0, n, n)for(i in1:n) {message(i) mat[i,1:i] <- mat[1:i, i] <-findone(ki[i], ki, b) }return(mat)}#' Get only diagonal elements from GRM#' #' @param n sample size of GRM#' @param b GRM file rootname#' #' @return vector of length n, should all be close to 1diags <-function(n, b) { f <-file(b, "rb") x <-sapply(1:n, \(i) {seek(f, where = (i-1)*4, origin="current")readBin(f, "double", n=1, size=4) })close(f)return(x)}# takes a few minutesd <-diags(ids, b)length(d)# The first few look goodhead(d)# But most of them are way offsummary(d)# Once we get to 10% of the sample the matrix is no longer following the expected patternwhich(d <0.75)[1]nrow(ids)# The matrix was generated by concatenating chunks from plink. Perhaps it's not possible to do this# Expected size of the GRMn <-nrow(ids)n * (n+1) /2*4# This matches the exact size of the GRMmat <-"/local-scratch/data/ukb/genetic/variants/arrays/imputed/draft/grm/full-ukb-indep"ids <-fread(paste0(mat, ".grm.id")) %>%as_tibble()idsb <-paste0(mat, ".grm.bin")file.exists(b)f <-file(b, "rb")readBin(f, "double", n=1, size=4)seek(f, where=1)readBin(f, "double", n=1, size=4)readBin(f, "double", n=1, size=4)readBin(f, "double", n=1, size=4)readBin(f, "double", n=1, size=4)readBin(f, "double", n=1, size=4)close(f)ki <-sort(sample(1:nrow(ids), 100, replace=FALSE))findall(ki, b)findone(ki[15], ki, b)findone(9, 1:10, b)diags <-function(ids, b) { f <-file(b, "rb") x <-sapply(1:nrow(ids), \(i) {seek(f, where = (i-1)*4, origin="current")readBin(f, "double", n=1, size=4) })close(f)return(x)}d <-diags(ids, b)length(d)summary(d)