Skip to content

Commit b22ab75

Browse files
committed
still testing
1 parent 81d8f09 commit b22ab75

File tree

4 files changed

+62
-67
lines changed

4 files changed

+62
-67
lines changed

R/bambu-assignDist.R

Lines changed: 23 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -82,33 +82,27 @@ generateIncompatibleCounts <- function(incompatibleCountMatrix, annotations){
8282
#' Generate non-unique counts
8383
#' @noRd
8484
generateNonUniqueCounts <- function(readClassDt, countMatrix, annotations){
85-
#fuse multi align RCs by gene
86-
x <- readClassDt %>% filter(multi_align & !is.na(eqClass.match))
87-
x <- x %>% distinct(eqClassId, .keep_all = TRUE)
88-
nonuniqueCounts <- countMatrix[x$eqClass.match,, drop = FALSE]
89-
if(nrow(x)>1 & length(unique(x$gene_sid))>1){
90-
nonuniqueCounts.gene <- sparse.model.matrix(~ factor(x$gene_sid) - 1)
91-
nonuniqueCounts <- t(nonuniqueCounts.gene) %*% nonuniqueCounts
92-
#covert ids into gene ids
93-
geneids <- as.numeric(levels(factor(x$gene_sid)))
94-
geneids <- x$txid[match(geneids, x$gene_sid)]
95-
geneids <- mcols(annotations)$GENEID[as.numeric(geneids)]
96-
rownames(nonuniqueCounts) <- geneids
97-
} else{
98-
warning("The factor variable 'gene_sid' has only one level. Adjusting output.")
99-
nonuniqueCounts.gene <- Matrix(1, nrow = nrow(x), ncol = 1, sparse = TRUE)
100-
nonuniqueCounts <- t(nonuniqueCounts.gene) %*% nonuniqueCounts
101-
#covert ids into gene ids
102-
geneids <- as.numeric(levels(factor(x$gene_sid)))
103-
geneids <- x$txid[match(geneids, x$gene_sid)]
104-
geneids <- mcols(annotations)$GENEID[as.numeric(geneids)]
105-
rownames(nonuniqueCounts) <- rownames(geneMat)[1:nrow(nonuniqueCounts)]
106-
107-
}
108-
#create matrix for all annotated genes
109-
genes <- levels(factor(unique(mcols(annotations)$GENEID)))
110-
geneMat <- sparseMatrix(length(genes), ncol(nonuniqueCounts), x = 0)
111-
rownames(geneMat) <- genes
112-
geneMat[rownames(nonuniqueCounts),] <- nonuniqueCounts
113-
return(geneMat)
85+
#fuse multi align RCs by gene
86+
x <- readClassDt %>% filter(multi_align & !is.na(eqClass.match))
87+
x <- x %>% distinct(eqClassId, .keep_all = TRUE)
88+
nonuniqueCounts <- countMatrix[x$eqClass.match,, drop = FALSE]
89+
if(nrow(x)>1 & length(unique(x$gene_sid))>1){
90+
nonuniqueCounts.gene <- sparse.model.matrix(~ factor(x$gene_sid) - 1)
91+
nonuniqueCounts <- t(nonuniqueCounts.gene) %*% nonuniqueCounts
92+
} else{
93+
warning("The factor variable 'gene_sid' has only one level. Adjusting output.")
94+
nonuniqueCounts.gene <- Matrix(1, nrow = nrow(x), ncol = 1, sparse = TRUE)
95+
nonuniqueCounts <- t(nonuniqueCounts.gene) %*% nonuniqueCounts
96+
}
97+
#covert ids into gene ids
98+
geneids <- as.numeric(levels(factor(x$gene_sid)))
99+
geneids <- x$txid[match(geneids, x$gene_sid)]
100+
geneids <- mcols(annotations)$GENEID[as.numeric(geneids)]
101+
rownames(nonuniqueCounts) <- geneids
102+
#create matrix for all annotated genes
103+
genes <- levels(factor(unique(mcols(annotations)$GENEID)))
104+
geneMat <- sparseMatrix(length(genes), ncol(nonuniqueCounts), x = 0)
105+
rownames(geneMat) <- genes
106+
geneMat[rownames(nonuniqueCounts),] <- nonuniqueCounts
107+
return(geneMat)
114108
}

R/bambu-processReads.R

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ bambu.processReads <- function(reads, annotations, genomeSequence,
1616
readClass.outputDir=NULL, yieldSize=1000000, bpParameters,
1717
stranded=FALSE, verbose=FALSE, isoreParameters = setIsoreParameters(NULL),
1818
processByChromosome = FALSE, processByBam = TRUE, trackReads = trackReads, fusionMode = fusionMode,
19-
demultiplexed = FALSE, cleanReads = FALSE, dedupUMI = FALSE, sampleNames = NULL, barcodesToFilter = NULL, trustReadStartEnd = FALSE) {
19+
demultiplexed = FALSE, cleanReads = FALSE, dedupUMI = FALSE, sampleNames = NULL, barcodesToFilter = NULL) {
2020
genomeSequence <- checkInputSequence(genomeSequence)
2121
# ===# create BamFileList object from character #===#
2222
if (is(reads, "BamFile")) {
@@ -125,7 +125,7 @@ bambu.processReadsByFile <- function(bam.file, genomeSequence, annotations,
125125
yieldSize = NULL, stranded = FALSE, min.readCount = 2,
126126
fitReadClassModel = TRUE, min.exonOverlap = 10, defaultModels = NULL, returnModel = FALSE,
127127
verbose = FALSE, processByChromosome = FALSE, trackReads = FALSE, fusionMode = FALSE, demultiplexed = FALSE,
128-
cleanReads = FALSE, dedupUMI = FALSE, index = 0, barcodesToFilter = NULL, trustReadStartEnd = FALSE) {
128+
cleanReads = FALSE, dedupUMI = FALSE, index = 0, barcodesToFilter = NULL) {
129129
if(verbose) message(names(bam.file)[1])
130130
readGrgList <- prepareDataFromBam(bam.file[[1]], verbose = verbose, yieldSize = yieldSize, use.names = trackReads, demultiplexed = demultiplexed, cleanReads = cleanReads, dedupUMI = dedupUMI)
131131
if(verbose) message(paste0("Number of alignments/reads: ",length(readGrgList)))
@@ -196,7 +196,7 @@ bambu.processReadsByFile <- function(bam.file, genomeSequence, annotations,
196196
annotations,genomeSequence, stranded = stranded, verbose = verbose)
197197
se <- isore.constructReadClasses(readGrgList,
198198
unlisted_junctions, uniqueJunctions, runName = "TODO",
199-
annotations, stranded, verbose, trustReadStartEnd = FALSE)
199+
annotations, stranded, verbose)
200200

201201
}
202202

R/bambu-processReads_utilityConstructReadClasses.R

Lines changed: 34 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
#' @noRd
1111
isore.constructReadClasses <- function(readGrgList, unlisted_junctions,
1212
uniqueJunctions, runName = "sample1",
13-
annotations, stranded = FALSE, verbose = FALSE, trustReadStartEnd = FALSE) {
13+
annotations, stranded = FALSE, verbose = FALSE) {
1414
#split reads into single exon and multi exon reads
1515
reads.singleExon <- unlist(readGrgList[elementNROWS(readGrgList) == 1],
1616
use.names = FALSE)
@@ -29,7 +29,7 @@ isore.constructReadClasses <- function(readGrgList, unlisted_junctions,
2929
uniqueJunctions = uniqueJunctions,
3030
unlisted_junctions = unlisted_junctions,
3131
readGrgList = readGrgList,
32-
stranded = stranded, annotations, trustReadStartEnd = FALSE)}
32+
stranded = stranded, annotations)}
3333
else{exonsByRC.spliced = GRangesList()}
3434
end.ptm <- proc.time()
3535
rm(readGrgList, unlisted_junctions, uniqueJunctions)
@@ -57,7 +57,7 @@ isore.constructReadClasses <- function(readGrgList, unlisted_junctions,
5757
#' @importFrom GenomicRanges match
5858
#' @noRd
5959
constructSplicedReadClasses <- function(uniqueJunctions, unlisted_junctions,
60-
readGrgList, annotations, stranded = FALSE, trustReadStartEnd = FALSE) {
60+
readGrgList, annotations, stranded = FALSE) {
6161
options(scipen = 999)
6262
allToUniqueJunctionMatch <- GenomicRanges::match(unlisted_junctions,
6363
uniqueJunctions, ignore.strand = TRUE)
@@ -91,7 +91,7 @@ constructSplicedReadClasses <- function(uniqueJunctions, unlisted_junctions,
9191
rm(lowConfidenceReads, uniqueJunctions, allToUniqueJunctionMatch)
9292
readTable <- createReadTable(start(unlisted_junctions),
9393
end(unlisted_junctions), mcols(unlisted_junctions)$id, readGrgList,
94-
readStrand, readConfidence, annotations, trustReadStartEnd = FALSE)
94+
readStrand, readConfidence, annotations)
9595
exonsByReadClass <- createExonsByReadClass(readTable)
9696
readTable <- readTable %>% dplyr::select(chr.rc = chr, strand.rc = strand,
9797
startSD = startSD, endSD = endSD, firstExonGroup = firstExonGroup,
@@ -159,8 +159,7 @@ correctReadStrandById <- function(strand, id, stranded = FALSE){
159159
#' row_number .groups
160160
#' @noRd
161161
createReadTable <- function(unlisted_junctions_start, unlisted_junctions_end,
162-
unlisted_junctions_id, readGrgList,readStrand, readConfidence, annotations, trustReadStartEnd = FALSE) {
163-
firstExons <- selectFirstExonFromRead(readGrgList)
162+
unlisted_junctions_id, readGrgList,readStrand, readConfidence, annotations) {
164163
readRanges <- unlist(range(ranges(readGrgList)), use.names = FALSE)
165164
intronStartCoordinatesInt <-
166165
as.integer(min(splitAsList(unlisted_junctions_start,
@@ -178,22 +177,29 @@ createReadTable <- function(unlisted_junctions_start, unlisted_junctions_end,
178177
start = pmin(start(readRanges), intronStartCoordinatesInt),
179178
end = pmax(end(readRanges), intronEndCoordinatesInt),
180179
strand = readStrand, confidenceType = readConfidence,
181-
firstExon5prime = ifelse(strand != "-", start(firstExons), end(firstExons)), #assume * is +
182-
firstExon3prime = ifelse(strand != "-", end(firstExons), start(firstExons)),
183180
alignmentStrand = as.character(getStrandFromGrList(readGrgList))=='+',
184181
readId = mcols(readGrgList)$id,
185182
sampleID = mcols(readGrgList)$sampleID)
183+
readTable <- readTable %>%
184+
mutate(intronStartCoordinatesInt = intronStartCoordinatesInt,
185+
intronEndCoordinatesInt = intronEndCoordinatesInt,
186+
firstExon5prime = ifelse(strand != "-", start, end), #assume * is +
187+
firstExon3prime = ifelse(strand != "-", intronStartCoordinatesInt+1, intronEndCoordinatesInt-1),
188+
lastExon5prime = ifelse(strand != "-", intronEndCoordinatesInt-1, intronStartCoordinatesInt+1),
189+
lastExon3prime = ifelse(strand != "-", end, start)
190+
) %>%
191+
select(-intronStartCoordinatesInt, -intronEndCoordinatesInt)
186192
rm(readRanges, readStrand, unlisted_junctions_start,
187193
unlisted_junctions_end, unlisted_junctions_id, readConfidence,
188194
intronStartCoordinatesInt, intronEndCoordinatesInt)
189-
readTable <- readsPotentialTss(readTable, annotations, trustReadStartEnd = FALSE)
195+
readTable <- splitReadClassByStartEnd(readTable, annotations)
190196
## currently 80%/20% quantile of reads is used to identify start/end sites
191197
readTable <- readTable %>%
192-
group_by(chr, strand, intronEnds, intronStarts, confidenceType, firstExonGroup) %>%
198+
group_by(chr, strand, intronEnds, intronStarts, confidenceType, firstExonGroup, lastExonGroup) %>%
193199
summarise(readCount = n(), startSD = sd(start), endSD = sd(end),
194200
start = nth(x = start, n = ceiling(readCount / 5), order_by = start),
195201
end = nth(x = end, n = ceiling(readCount / 1.25), order_by = end),
196-
firstExonGroup = unique(firstExonGroup),
202+
firstExonGroup = unique(firstExonGroup), lastExonGroup = unique(lastExonGroup),
197203
readCount.posStrand = sum(alignmentStrand, na.rm = TRUE),
198204
readIds = list(readId), sampleIDs = list(sampleID),
199205
.groups = 'drop') %>%
@@ -202,42 +208,37 @@ createReadTable <- function(unlisted_junctions_start, unlisted_junctions_end,
202208
return(readTable)
203209
}
204210

205-
readsPotentialTss <- function(readTable, annotations, trustReadStartEnd = TRUE){
211+
splitReadClassByStartEnd <- function(readTable, annotations){
206212
exons <- unlist(annotations)
207-
annoTable <- tibble(Tx = names(exons),
213+
mcols(exons) <- cbind(mcols(exons),
214+
mcols(annotations)[rep(seq_along(annotations), elementNROWS(annotations)), ])
215+
annoTable <- tibble(TXNAME = names(exons),
216+
GENEID = mcols(exons)$GENEID,
208217
exonRank = mcols(exons)$exon_rank,
209218
chr = as.character(seqnames(exons)),
210219
start = start(exons),
211220
end = end(exons),
212221
strand = as.character(strand(exons)),
213222
firstExon5prime = ifelse(strand != "-", start(exons), end(exons)), #assume * is +
214-
firstExon3prime = ifelse(strand != "-", end(exons), start(exons)))
223+
firstExon3prime = ifelse(strand != "-", end(exons), start(exons)),
224+
lastExon5prime = ifelse(strand != "-", start(exons), end(exons)), #assume * is +
225+
lastExon3prime = ifelse(strand != "-", end(exons), start(exons)))
215226
readTable = bind_rows(readTable, annoTable)
216-
#add Tx id for mapped reads
227+
#add gene id id for mapped reads
217228
readTable <- readTable %>%
218229
filter(strand != "*") %>%
219230
group_by(chr, strand, firstExon3prime) %>%
220-
mutate(Tx = ifelse(is.na(Tx), Tx[!is.na(Tx)][1], Tx)) %>% # is it possible that two tx from annotation have same exon
231+
mutate(GENEID = ifelse(is.na(GENEID), GENEID[!is.na(GENEID)][1], GENEID)) %>% # is it possible that two tx from annotation have same exon
232+
ungroup() %>%
233+
group_by(chr, strand, lastExon5prime) %>%
234+
mutate(GENEID = ifelse(is.na(GENEID), GENEID[!is.na(GENEID)][1], GENEID)) %>% # is it possible that two tx from annotation have same exon
221235
ungroup()
222236
#add first exon group for reads
223237
readTable <- readTable %>%
224-
group_by(Tx) %>%
225-
arrange(firstExon5prime, .by_group = TRUE) %>%
226-
mutate(firstExonGroup = findInterval(start,sort(start[is.na(readId)]), left.open = F)) %>%
227-
ungroup()
228-
229-
if(trustReadStartEnd == TRUE){
230-
readTable <- readTable %>%
231-
group_by(Tx, firstExon3prime, firstExonGroup) %>%
232-
#mutate(potentialTss = ifelse(strand != "-", min(start[!is.na(readId)]), max(end[!is.na(readId)]))) %>%
233-
ungroup() %>% filter(!is.na(readId))
234-
}
235-
else{
236-
readTable <- readTable %>%
237-
group_by(Tx, firstExon3prime, firstExonGroup) %>%
238-
#mutate(potentialTss = ifelse(strand != "-", start[is.na(readId)], end[is.na(readId)])) %>%
239-
ungroup() %>% filter(!is.na(readId))
240-
}
238+
group_by(GENEID) %>%
239+
mutate(firstExonGroup = findInterval(start,sort(start[is.na(readId)]))) %>%
240+
mutate(lastExonGroup = findInterval(end,sort(end[is.na(readId)]), left.open = T)) %>%
241+
ungroup() %>% filter(!is.na(readId))
241242
return(readTable)
242243
}
243244

R/bambu.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL,
141141
trackReads = FALSE, returnDistTable = FALSE, lowMemory = FALSE,
142142
fusionMode = FALSE, verbose = FALSE, demultiplexed = FALSE, spatial = NULL, quantData = NULL,
143143
sampleNames = NULL, cleanReads = FALSE, dedupUMI = FALSE, barcodesToFilter = NULL, clusters = NULL,
144-
processByChromosome = FALSE, processByBam = TRUE, trustReadStartEnd = FALSE) {
144+
processByChromosome = FALSE, processByBam = TRUE) {
145145
message(paste0("Running Bambu-v", "3.9.0"))
146146
if(!is.null(mode)){
147147
if(mode == "bulk"){
@@ -209,7 +209,7 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL,
209209
processByChromosome = processByChromosome, processByBam = processByBam,
210210
demultiplexed = demultiplexed,
211211
sampleNames = sampleNames, cleanReads = cleanReads,
212-
dedupUMI = dedupUMI,barcodesToFilter = barcodesToFilter, trustReadStartEnd = FALSE)
212+
dedupUMI = dedupUMI,barcodesToFilter = barcodesToFilter)
213213
}
214214

215215
#warnings = handleWarnings(readClassList, verbose)

0 commit comments

Comments
 (0)