1010# ' @noRd
1111isore.constructReadClasses <- function (readGrgList , unlisted_junctions ,
1212 uniqueJunctions , runName = " sample1" ,
13- annotations , stranded = FALSE , verbose = FALSE , trustReadStartEnd = FALSE ) {
13+ annotations , stranded = FALSE , verbose = FALSE ) {
1414 # split reads into single exon and multi exon reads
1515 reads.singleExon <- unlist(readGrgList [elementNROWS(readGrgList ) == 1 ],
1616 use.names = FALSE )
@@ -29,7 +29,7 @@ isore.constructReadClasses <- function(readGrgList, unlisted_junctions,
2929 uniqueJunctions = uniqueJunctions ,
3030 unlisted_junctions = unlisted_junctions ,
3131 readGrgList = readGrgList ,
32- stranded = stranded , annotations , trustReadStartEnd = FALSE )}
32+ stranded = stranded , annotations )}
3333 else {exonsByRC.spliced = GRangesList()}
3434 end.ptm <- proc.time()
3535 rm(readGrgList , unlisted_junctions , uniqueJunctions )
@@ -57,7 +57,7 @@ isore.constructReadClasses <- function(readGrgList, unlisted_junctions,
5757# ' @importFrom GenomicRanges match
5858# ' @noRd
5959constructSplicedReadClasses <- function (uniqueJunctions , unlisted_junctions ,
60- readGrgList , annotations , stranded = FALSE , trustReadStartEnd = FALSE ) {
60+ readGrgList , annotations , stranded = FALSE ) {
6161 options(scipen = 999 )
6262 allToUniqueJunctionMatch <- GenomicRanges :: match(unlisted_junctions ,
6363 uniqueJunctions , ignore.strand = TRUE )
@@ -91,7 +91,7 @@ constructSplicedReadClasses <- function(uniqueJunctions, unlisted_junctions,
9191 rm(lowConfidenceReads , uniqueJunctions , allToUniqueJunctionMatch )
9292 readTable <- createReadTable(start(unlisted_junctions ),
9393 end(unlisted_junctions ), mcols(unlisted_junctions )$ id , readGrgList ,
94- readStrand , readConfidence , annotations , trustReadStartEnd = FALSE )
94+ readStrand , readConfidence , annotations )
9595 exonsByReadClass <- createExonsByReadClass(readTable )
9696 readTable <- readTable %> % dplyr :: select(chr.rc = chr , strand.rc = strand ,
9797 startSD = startSD , endSD = endSD , firstExonGroup = firstExonGroup ,
@@ -159,8 +159,7 @@ correctReadStrandById <- function(strand, id, stranded = FALSE){
159159# ' row_number .groups
160160# ' @noRd
161161createReadTable <- function (unlisted_junctions_start , unlisted_junctions_end ,
162- unlisted_junctions_id , readGrgList ,readStrand , readConfidence , annotations , trustReadStartEnd = FALSE ) {
163- firstExons <- selectFirstExonFromRead(readGrgList )
162+ unlisted_junctions_id , readGrgList ,readStrand , readConfidence , annotations ) {
164163 readRanges <- unlist(range(ranges(readGrgList )), use.names = FALSE )
165164 intronStartCoordinatesInt <-
166165 as.integer(min(splitAsList(unlisted_junctions_start ,
@@ -178,22 +177,29 @@ createReadTable <- function(unlisted_junctions_start, unlisted_junctions_end,
178177 start = pmin(start(readRanges ), intronStartCoordinatesInt ),
179178 end = pmax(end(readRanges ), intronEndCoordinatesInt ),
180179 strand = readStrand , confidenceType = readConfidence ,
181- firstExon5prime = ifelse(strand != " -" , start(firstExons ), end(firstExons )), # assume * is +
182- firstExon3prime = ifelse(strand != " -" , end(firstExons ), start(firstExons )),
183180 alignmentStrand = as.character(getStrandFromGrList(readGrgList ))== ' +' ,
184181 readId = mcols(readGrgList )$ id ,
185182 sampleID = mcols(readGrgList )$ sampleID )
183+ readTable <- readTable %> %
184+ mutate(intronStartCoordinatesInt = intronStartCoordinatesInt ,
185+ intronEndCoordinatesInt = intronEndCoordinatesInt ,
186+ firstExon5prime = ifelse(strand != " -" , start , end ), # assume * is +
187+ firstExon3prime = ifelse(strand != " -" , intronStartCoordinatesInt + 1 , intronEndCoordinatesInt - 1 ),
188+ lastExon5prime = ifelse(strand != " -" , intronEndCoordinatesInt - 1 , intronStartCoordinatesInt + 1 ),
189+ lastExon3prime = ifelse(strand != " -" , end , start )
190+ ) %> %
191+ select(- intronStartCoordinatesInt , - intronEndCoordinatesInt )
186192 rm(readRanges , readStrand , unlisted_junctions_start ,
187193 unlisted_junctions_end , unlisted_junctions_id , readConfidence ,
188194 intronStartCoordinatesInt , intronEndCoordinatesInt )
189- readTable <- readsPotentialTss (readTable , annotations , trustReadStartEnd = FALSE )
195+ readTable <- splitReadClassByStartEnd (readTable , annotations )
190196 # # currently 80%/20% quantile of reads is used to identify start/end sites
191197 readTable <- readTable %> %
192- group_by(chr , strand , intronEnds , intronStarts , confidenceType , firstExonGroup ) %> %
198+ group_by(chr , strand , intronEnds , intronStarts , confidenceType , firstExonGroup , lastExonGroup ) %> %
193199 summarise(readCount = n(), startSD = sd(start ), endSD = sd(end ),
194200 start = nth(x = start , n = ceiling(readCount / 5 ), order_by = start ),
195201 end = nth(x = end , n = ceiling(readCount / 1.25 ), order_by = end ),
196- firstExonGroup = unique(firstExonGroup ),
202+ firstExonGroup = unique(firstExonGroup ), lastExonGroup = unique( lastExonGroup ),
197203 readCount.posStrand = sum(alignmentStrand , na.rm = TRUE ),
198204 readIds = list (readId ), sampleIDs = list (sampleID ),
199205 .groups = ' drop' ) %> %
@@ -202,42 +208,37 @@ createReadTable <- function(unlisted_junctions_start, unlisted_junctions_end,
202208 return (readTable )
203209}
204210
205- readsPotentialTss <- function (readTable , annotations , trustReadStartEnd = TRUE ){
211+ splitReadClassByStartEnd <- function (readTable , annotations ){
206212 exons <- unlist(annotations )
207- annoTable <- tibble(Tx = names(exons ),
213+ mcols(exons ) <- cbind(mcols(exons ),
214+ mcols(annotations )[rep(seq_along(annotations ), elementNROWS(annotations )), ])
215+ annoTable <- tibble(TXNAME = names(exons ),
216+ GENEID = mcols(exons )$ GENEID ,
208217 exonRank = mcols(exons )$ exon_rank ,
209218 chr = as.character(seqnames(exons )),
210219 start = start(exons ),
211220 end = end(exons ),
212221 strand = as.character(strand(exons )),
213222 firstExon5prime = ifelse(strand != " -" , start(exons ), end(exons )), # assume * is +
214- firstExon3prime = ifelse(strand != " -" , end(exons ), start(exons )))
223+ firstExon3prime = ifelse(strand != " -" , end(exons ), start(exons )),
224+ lastExon5prime = ifelse(strand != " -" , start(exons ), end(exons )), # assume * is +
225+ lastExon3prime = ifelse(strand != " -" , end(exons ), start(exons )))
215226 readTable = bind_rows(readTable , annoTable )
216- # add Tx id for mapped reads
227+ # add gene id id for mapped reads
217228 readTable <- readTable %> %
218229 filter(strand != " *" ) %> %
219230 group_by(chr , strand , firstExon3prime ) %> %
220- mutate(Tx = ifelse(is.na(Tx ), Tx [! is.na(Tx )][1 ], Tx )) %> % # is it possible that two tx from annotation have same exon
231+ mutate(GENEID = ifelse(is.na(GENEID ), GENEID [! is.na(GENEID )][1 ], GENEID )) %> % # is it possible that two tx from annotation have same exon
232+ ungroup() %> %
233+ group_by(chr , strand , lastExon5prime ) %> %
234+ mutate(GENEID = ifelse(is.na(GENEID ), GENEID [! is.na(GENEID )][1 ], GENEID )) %> % # is it possible that two tx from annotation have same exon
221235 ungroup()
222236 # add first exon group for reads
223237 readTable <- readTable %> %
224- group_by(Tx ) %> %
225- arrange(firstExon5prime , .by_group = TRUE ) %> %
226- mutate(firstExonGroup = findInterval(start ,sort(start [is.na(readId )]), left.open = F )) %> %
227- ungroup()
228-
229- if (trustReadStartEnd == TRUE ){
230- readTable <- readTable %> %
231- group_by(Tx , firstExon3prime , firstExonGroup ) %> %
232- # mutate(potentialTss = ifelse(strand != "-", min(start[!is.na(readId)]), max(end[!is.na(readId)]))) %>%
233- ungroup() %> % filter(! is.na(readId ))
234- }
235- else {
236- readTable <- readTable %> %
237- group_by(Tx , firstExon3prime , firstExonGroup ) %> %
238- # mutate(potentialTss = ifelse(strand != "-", start[is.na(readId)], end[is.na(readId)])) %>%
239- ungroup() %> % filter(! is.na(readId ))
240- }
238+ group_by(GENEID ) %> %
239+ mutate(firstExonGroup = findInterval(start ,sort(start [is.na(readId )]))) %> %
240+ mutate(lastExonGroup = findInterval(end ,sort(end [is.na(readId )]), left.open = T )) %> %
241+ ungroup() %> % filter(! is.na(readId ))
241242 return (readTable )
242243}
243244
0 commit comments