Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@ Description: Provides data source agnostic utility functions to support
License: GPL (>= 3)
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.2.3
RoxygenNote: 7.3.2
Imports:
bit64,
checkmate,
glue,
Matrix,
Matrix (>= 1.5-3),
grDevices,
stats
Suggests:
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
# Generated by roxygen2: do not edit by hand

export(add_cluster_info)
export(colScaleM)
export(cosine_sim)
export(dataset_names)
export(geomScaleM)
export(id2char)
export(partner_summary2adjacency_matrix)
export(prepare_cosine_matrix)
export(register_dataset)
export(rowScaleM)
importFrom(grDevices,hcl.colors)
importFrom(stats,as.dist)
importFrom(stats,hclust)
Expand Down
79 changes: 79 additions & 0 deletions R/sparsemat.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#' Efficient column and row scaling for sparse matrices
#'
#' @details Conventionally in connectivity matrices, rows are upstream input
#' neurons and columns are downstream output neurons. \code{colScaleM} will
#' therefore normalise by the total input onto each downstream neuron while
#' \code{rowScaleM} will normalise by the total output from each upstream
#' neuron.
#'
#' These functions will work with dense inputs but are less efficient than
#' their base R cousins (e.g. \code{scale}). As an example, normalising a 4x4k
#' subset of VNC connectivity data took \itemize{
#'
#' \item \code{colScaleM} sparse 3.5ms
#'
#' \item \code{colScaleM} dense 1460ms
#'
#' \item \code{scale} dense 365ms
#'
#' }
#'
#' In conclusion sparse matrices with \code{colScaleM}/\code{rowScaleM} are
#' the way to go for real connectivity matrices! For large matrices they will
#' be \emph{much} faster and for small ones, both methods will be fast anyway.
#'
#' Note also that these functions were originally called \code{colScale} etc
#' but then the \code{\link[Matrix]{Matrix}} package added functions of the
#' same name. The functions in this package differ in two respects from the
#' \code{Matrix::\link[Matrix:colScale]{colScale,rowScale}} functions. First
#' they calculate the required row or column sums to perform the scaling.
#' Second, they handle the inevitable zero sum rows or columns via the default
#' \code{na.rm} argument.
#'
#' @param A a sparse (or dense) matrix
#' @param na.rm when \code{T}, the default, converts any NA values (usually
#' resulting from divide by zero errors when a neuron has no partners) to 0
#' @description \code{colScale} normalises a matrix by the sum of each column
#'
#' @return A scaled sparse matrix
#' @export
#'
#' @seealso \code{Matrix::\link[Matrix]{colScale}} on which these are based and,
#' for the original version,
#' \url{https://stackoverflow.com/questions/39284774/column-rescaling-for-a-very-large-sparse-matrix-in-r}
#'
#' @examples
#' library(Matrix)
#' set.seed(42)
#' A <- Matrix(rbinom(100,10,0.05), nrow = 10)
#' rownames(A)=letters[1:10]
#' colnames(A)=LETTERS[1:10]
#'
#' colScaleM(A)
#' rowScaleM(A)
#' geomScaleM(A)
colScaleM <- function(A, na.rm=TRUE) {
scalefac = 1 / Matrix::colSums(A)
if(na.rm) scalefac[!is.finite(scalefac)]=0
Matrix::colScale(A, scalefac)
}

#' @export
#' @rdname colScaleM
#' @description \code{rowScale} normalises a matrix by the sum of each row
rowScaleM <- function(A, na.rm=TRUE) {
scalefac = 1 / Matrix::rowSums(A)
if(na.rm) scalefac[!is.finite(scalefac)]=0
Matrix::rowScale(A, scalefac)
}

#' @export
#' @rdname colScaleM
#' @description sqrt of both column and row normalisation
geomScaleM <- function(A, na.rm=TRUE) {
sf1=sqrt(1/Matrix::rowSums(A))
if(na.rm) sf1[!is.finite(sf1)]=0
sf2=sqrt(1/Matrix::colSums(A))
if(na.rm) sf2[!is.finite(sf2)]=0
Matrix::dimScale(A, d1 = sf1, d2 = sf2)
}
2 changes: 2 additions & 0 deletions inst/WORDLIST
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@ Lifecycle
Natverse
ORCID
PNs
VNC
connectome
connectomics
etc
fafbseg
flywire
natverse
Expand Down
77 changes: 77 additions & 0 deletions man/colScaleM.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

37 changes: 37 additions & 0 deletions tests/testthat/test-sparsemat.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
test_that("multiplication works", {
# library(Matrix)
# set.seed(42)
# A <- Matrix(rbinom(100,10,0.05), nrow = 10)
# rownames(A)=letters[1:10]
# colnames(A)=LETTERS[1:10]
A=new("dgCMatrix", i = c(0L, 1L, 3L, 4L, 6L, 8L, 9L, 1L, 2L, 5L,
6L, 0L, 2L, 3L, 7L, 9L, 0L, 1L, 3L, 5L, 8L, 9L, 3L, 5L, 6L, 7L,
8L, 9L, 3L, 5L, 6L, 0L, 1L, 2L, 4L, 7L, 8L, 5L, 3L, 4L, 0L, 3L,
4L, 5L, 8L, 9L), p = c(0L, 7L, 11L, 16L, 22L, 28L, 31L, 37L,
38L, 40L, 46L), Dim = c(10L, 10L), Dimnames = list(c("a", "b",
"c", "d", "e", "f", "g", "h", "i", "j"), c("A", "B", "C", "D",
"E", "F", "G", "H", "I", "J")), x = c(2, 2, 1, 1, 1, 1, 1, 1,
2, 2, 2, 1, 3, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 1, 1,
1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1), factors = list())
colScaleM(A)
csA=new("dgCMatrix", i = c(0L, 1L, 3L, 4L, 6L, 8L, 9L, 1L, 2L, 5L,
6L, 0L, 2L, 3L, 7L, 9L, 0L, 1L, 3L, 5L, 8L, 9L, 3L, 5L, 6L, 7L,
8L, 9L, 3L, 5L, 6L, 0L, 1L, 2L, 4L, 7L, 8L, 5L, 3L, 4L, 0L, 3L,
4L, 5L, 8L, 9L), p = c(0L, 7L, 11L, 16L, 22L, 28L, 31L, 37L,
38L, 40L, 46L), Dim = c(10L, 10L), Dimnames = list(c("a", "b",
"c", "d", "e", "f", "g", "h", "i", "j"), c("A", "B", "C", "D",
"E", "F", "G", "H", "I", "J")), x = c(0.222222222222222, 0.222222222222222,
0.111111111111111, 0.111111111111111, 0.111111111111111, 0.111111111111111,
0.111111111111111, 0.142857142857143, 0.285714285714286, 0.285714285714286,
0.285714285714286, 0.125, 0.375, 0.25, 0.125, 0.125, 0.166666666666667,
0.166666666666667, 0.166666666666667, 0.166666666666667, 0.166666666666667,
0.166666666666667, 0.222222222222222, 0.222222222222222, 0.111111111111111,
0.111111111111111, 0.222222222222222, 0.111111111111111, 0.333333333333333,
0.333333333333333, 0.333333333333333, 0.142857142857143, 0.285714285714286,
0.142857142857143, 0.142857142857143, 0.142857142857143, 0.142857142857143,
1, 0.5, 0.5, 0.125, 0.25, 0.25, 0.125, 0.125, 0.125), factors = list())
expect_equal(colScaleM(A), csA)

expect_equal(geomScaleM(A),
sqrt(colScaleM(A)*rowScaleM(A)))
})