Skip to content

Commit 262bfba

Browse files
committed
Fixing Issue #9 and other minor changes
1 parent dd65469 commit 262bfba

File tree

3 files changed

+50
-23
lines changed

3 files changed

+50
-23
lines changed

NGS-Utils/README.pod

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -737,12 +737,18 @@ features from C<< --cat-ncRNAs >> module.
737737

738738
Run the C<< CPC >> module. No options are required.
739739

740+
=item --blast-strand (Optional)
741+
742+
Mention the query strand to search against the database. 1 = plus strand, 2 = minus strand, 3 = both.
743+
744+
Default: 1
745+
740746
=item --skip-cpc
741747

742748
Skip runnning C<< CPC >> altogether. Use this option, if you think CPC is flagging a lot of
743749
your transcripts as B<I<coding>> and instead rely only on Infernal search results.
744750

745-
=item --skip-blastall-core
751+
=item --skip-cpc-core
746752

747753
Skip runnning core C<< CPC >> process once you know you have output from C<< CPC >>.
748754
This option can be used when lncRNApipe fails for some reason after C<< blastall >>
@@ -831,6 +837,6 @@ This program is distributed under the Artistic License 2.0.
831837

832838
=head1 DATE
833839

834-
Feb-12-2018
840+
Feb-14-2018
835841

836842
=cut

NGS-Utils/categorize_ncRNAs.pl

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
use Fcntl qw / :flock SEEK_END /;
1010

1111
my ($LASTCHANGEDBY) = q$LastChangedBy: konganti $ =~ m/.+?\:(.+)/;
12-
my ($LASTCHANGEDDATE) = q$LastChangedDate: 2016-01-26 12:11:27 -0500 (Tue, 26 Jan 2016) $ =~ m/.+?\:(.+)/;
12+
my ($LASTCHANGEDDATE) = q$LastChangedDate: 2018-02-14 08:47:27 -0500 (Wed, 14 Feb 2018) $ =~ m/.+?\:(.+)/;
1313
my ($VERSION) = q$LastChangedRevision: 1043 $ =~ m/.+?(\d+)/;
1414
my $AUTHORFULLNAME = 'Kranti Konganti';
1515

@@ -987,8 +987,8 @@ sub check_gtf_attributes {
987987
qq/chr3\tCufflinks\ttranscript\t30549662\t30551349\t1000\t-\t.\tgene_id "CUFF.22498"; transcript_id "CUFF.22498.1"; FPKM "2.5052666329"; frac "1.000000"; conf_lo "1.676755"; conf_hi "3.353509"; cov "4.749121";\n/ .
988988
qq/chr3\tCufflinks\texon\t30549662\t30550273\t1000\t-\t.\tgene_id "CUFF.22498"; transcript_id "CUFF.22498.1"; exon_number "1"; FPKM "2.5052666329"; frac "1.000000"; conf_lo "1.676755"; conf_hi "3.353509"; cov "4.749121";\n/ .
989989
qq/chr3\tCufflinks\texon\t30551033\t30551349\t1000\t-\t.\tgene_id "CUFF.22498"; transcript_id "CUFF.22498.1"; exon_number "2"; FPKM "2.5052666329"; frac "1.000000"; conf_lo "1.676755"; conf_hi "3.353509"; cov "4.749121";\n/ .
990-
qq/\nYour File:\n----------\n/ . $io->execute_get_sys_cmd_output("head -n 3 $file"), 'INFO!');
991-
$io->warning('Sit back and relax. We got it covered ... Formatting the GTF file to process with lncRNApipe.', 'INFO!');
990+
qq/\nYour File:\n----------\n/ . $io->execute_get_sys_cmd_output("head -n 3 $file"), 'INFO!!');
991+
$io->warning('Sit back and relax. We got it covered ... Formatting the GTF file to process with lncRNApipe.', 'INFO!!');
992992
$file = format_gtf($file, $label);
993993
}
994994
return $file;
@@ -1039,7 +1039,7 @@ sub format_gtf {
10391039
sub check_for_zero_class {
10401040
for (0 .. $#ARGV) {
10411041
if (-e $get_putative_ncRNAs_chkpt && !-e $p_file_names_gtf->[$_]) {
1042-
$io->warning("We could not find any requested class codes [ in $cuffcmp ] ...\nBailing out!");
1042+
#$io->warning("We could not find any requested class codes [ in $cuffcmp ] ...\nBailing out!");
10431043
$io->execute_system_command("touch $cat_class_zero");
10441044
exit 0;
10451045
}
@@ -1293,6 +1293,6 @@ =head1 COPYRIGHT
12931293
12941294
=head1 DATE
12951295
1296-
Jan-26-2016
1296+
Feb-14-2018
12971297
12981298
=cut

NGS-Utils/lncRNApipe

Lines changed: 37 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ use Data::Dumper;
1212
Getopt::Long::Configure('prefix=--');
1313

1414
my ($LASTCHANGEDBY) = q$LastChangedBy: konganti $ =~ m/.+?\:(.+)/;
15-
my ($LASTCHANGEDDATE) = q$LastChangedDate: 2018-02-12 09:27:27 -0500 (Mon, 12 February 2018) $ =~ m/.+?\:(.+)/;
16-
my ($VERSION) = q$LastChangedRevision: 1.2.2 $ =~ m/.+?\:\s*(.*)\s*.*/;
15+
my ($LASTCHANGEDDATE) = q$LastChangedDate: 2018-02-14 11:27:27 -0500 (Mon, 14 Feb 2018) $ =~ m/.+?\:(.+)/;
16+
my ($VERSION) = q$LastChangedRevision: 1.2.3 $ =~ m/.+?\:\s*(.*)\s*.*/;
1717
my $AUTHORFULLNAME = 'Kranti Konganti';
1818

1919
my ($help, $quiet, $setup, $get_uq_sc_opts,
@@ -25,7 +25,7 @@ my ($help, $quiet, $setup, $get_uq_sc_opts,
2525
$lncRNApipe_ver_info, $no_update_check,
2626
$skip_get_uq, $lncRNApipe_succ, $num_cpu_by_2,
2727
$skip_cpc, $debug, $sp_list, $pipe_config,
28-
$conf_run_id, $setup_compiler,
28+
$conf_run_id, $setup_compiler, $blast_strand,
2929
$cpanm_args, $donot_wait4jobIDs);
3030

3131
my $kill_jobs_thru_conf = my $show_lncrna_counts = [];
@@ -38,11 +38,12 @@ my $is_valid_option = GetOptions('help:s' => \$help,
3838
'fetch-seq:s' => \$fetch_sc_opts,
3939
'get-uq-feat=s' => \$get_uq_sc_opts,
4040
'cpc' => \$start_cpc,
41+
'blast-strand:i' => \$blast_strand,
4142
'rnafold:s' => \$start_rnafold,
4243
'rm-int-plots' => \$rm_int_plots,
4344
'infernal:s' => \$start_infernal,
4445
'cpu=i' => \$num_cpu,
45-
'skip-blastall-core' => \$skip_cpc_core,
46+
'skip-cpc-core' => \$skip_cpc_core,
4647
'skip-rnafold-core' => \$skip_rnafold_core,
4748
'skip-cmscan-core' => \$skip_cmscan_core,
4849
'coverage-infernal|cov-inf=f' => \$inf_cov,
@@ -208,6 +209,20 @@ sub run_lncRNApipe {
208209

209210
my $dep_tools_fh = $io->open_file('<', $USER_HOME . '/.lncRNApipe.depconf');
210211
get_deps($dep_tools_fh);
212+
213+
# Check blast strand options for CPC.
214+
if (defined $blast_strand && $blast_strand !~ m/^[123]$/) {
215+
$io->error("The binary blastall takes only 1, 2, or 3 for -S argument, wherein\n" .
216+
"1 means search the plus strand of the query sequence\n" .
217+
"2 means search the minus strand of the query sequence\n" .
218+
"3 means earch both strands of the query sequence\n\n" .
219+
"You entered: $blast_strand !");
220+
}
221+
elsif (!defined $blast_strand) {
222+
#$io->warning("Will run CPC using blastall [ with default option: -S 1 ]." .
223+
# "\nUse --blast-strand option with lncRNApipe to change this.", 'INFO!!');
224+
$blast_strand = 1;
225+
}
211226

212227
# User has requested to run the pipeline from assembly stage. Handle it here.
213228
run_lncRNApipe_from_conf($io, $shall_I_run, $s_time) if (ref($shall_I_run) eq 'HASH');
@@ -317,7 +332,7 @@ sub run_lncRNApipe {
317332
$gtfToGenePred_failed =~ s/[\s\r\n]+//g;
318333

319334
if ($gtfToGenePred_failed =~ m/exonFramesfieldisbeingadded/i) {
320-
$io->warning('Trying without -genePredExt ...', 'INFO!');
335+
$io->warning('Trying without -genePredExt ...', 'INFO!!');
321336
$gtfToGenePred_failed = $is_failed = $io->execute_get_sys_cmd_output($deps->{'bin-gtfToGenePred'} . ' -geneNameAsName2 ' .
322337
$annot . ' ' . $cat_dir . '/' . $io->file_basename($annot) . '.txt',
323338
"Command call:\n" .
@@ -367,7 +382,7 @@ sub run_lncRNApipe {
367382

368383
# Rare case exit code. When categorize_ncRNAs.pl could not find cuffcompare class codes requested by user or pipeline.
369384
if (-e $cat_class_zero) {
370-
$io->warning("Could not find any cuffcompare class codes as requested [ in $cuffcmp_dir/lncRNApipe_cuffcmp.tracking ] ...", 'INFO!');
385+
$io->warning("We could not find any cuffcompare class codes as requested [ in $cuffcmp_dir/lncRNApipe_cuffcmp.tracking ] ...", 'INFO!!');
371386
$lncRNApipe_succ = 1;
372387

373388
undef $fetch_sc_opts;
@@ -598,13 +613,13 @@ sub run_lncRNApipe {
598613
$cpc_cpu = $num_cpu_by_2 if (defined $num_cpu);
599614

600615
if (!defined $skip_cpc_core) {
601-
$io->execute_system_command('NUM_CPU=' . $cpc_cpu . ' ' . $deps->{'cpc'} .
602-
' "' . $lncRNApipe_unique_seq_file . ' ' . $CPC_out_file . ' ' . $cpc_work_dir . ' ' . $cpc_work_dir . '" ' .
603-
$blastall_path->($deps->{'blastall'}),
616+
$io->execute_system_command('BLAST_STRAND=' . $blast_strand . ' NUM_CPU=' . $cpc_cpu . ' ' . $deps->{'cpc'} .
617+
' "' . $lncRNApipe_unique_seq_file . ' ' . $CPC_out_file . ' ' . $cpc_work_dir . ' ' .
618+
$cpc_work_dir . '" ' . $blastall_path->($deps->{'blastall'}),
604619
"Command call:\n" .
605-
"-------------\n" . 'NUM_CPU=' . $cpc_cpu . ' ' . $deps->{'cpc'} .
606-
' "' . $lncRNApipe_unique_seq_file . ' ' . $CPC_out_file . ' ' . $cpc_work_dir . ' ' . $cpc_work_dir . '" ' .
607-
$blastall_path->($deps->{'blastall'}));
620+
"-------------\n" . 'BLAST_STRAND=' . $blast_strand . ' NUM_CPU=' . $cpc_cpu . ' ' . $deps->{'cpc'} .
621+
' "' . $lncRNApipe_unique_seq_file . ' ' . $CPC_out_file . ' ' . $cpc_work_dir . ' ' .
622+
$cpc_work_dir . '" ' . $blastall_path->($deps->{'blastall'}));
608623
}
609624
} if (defined $start_cpc);
610625

@@ -798,7 +813,7 @@ sub run_lncRNApipe {
798813
"Truncated output from $cat_dir/*.CPC.predict.txt");
799814

800815
$io->error("Could not get final putative lncRNA count.\n" .
801-
"Try re-running the final module. Take a look at --skip-blastall-core, --skip-rnafold-core and --skip-cmscan-core options." .
816+
"Try re-running the final module. Take a look at --skip-cpc-core, --skip-rnafold-core and --skip-cmscan-core options." .
802817
"\nThis may also mean that the putative lncRNAs that were used with CPC may have all been flagged as \"coding\".");
803818
}
804819
else {
@@ -2708,7 +2723,7 @@ sub get_job_dependency_chain {
27082723
"sleep 10; else break; fi; done;";
27092724
}
27102725
elsif ($file =~ m/job_IDs_cufflinks/i) {
2711-
$job_deps_str = "while [ true ]; do if [ -e \"$check_file_presence\" ] && [ -s \"$check_file_presence\" ]; then " .
2726+
$job_deps_str = "while [ true ]; do if [ -e \"$check_file_presence\" ] && [ -s \"$check_file_presence\" ] && [ ! -z \"\$(ps aux | grep -P 'cufflinks.+?($job_deps_str)' | grep -v grep)\" ]; then " .
27122727
"break; else sleep 10; fi; done;";
27132728
}
27142729
}
@@ -3773,12 +3788,18 @@ features from C<< --cat-ncRNAs >> module.
37733788
37743789
Run the C<< CPC >> module. No options are required.
37753790
3791+
=item --blast-strand (Optional)
3792+
3793+
Mention the query strand to search against the database. 1 = plus strand, 2 = minus strand, 3 = both.
3794+
3795+
Default: 1
3796+
37763797
=item --skip-cpc
37773798
37783799
Skip runnning C<< CPC >> altogether. Use this option, if you think CPC is flagging a lot of
37793800
your transcripts as B<I<coding>> and instead rely only on Infernal search results.
37803801
3781-
=item --skip-blastall-core
3802+
=item --skip-cpc-core
37823803
37833804
Skip runnning core C<< CPC >> process once you know you have output from C<< CPC >>.
37843805
This option can be used when lncRNApipe fails for some reason after C<< blastall >>
@@ -3867,6 +3888,6 @@ This program is distributed under the Artistic License 2.0.
38673888
38683889
=head1 DATE
38693890
3870-
Feb-12-2018
3891+
Feb-14-2018
38713892
38723893
=cut

0 commit comments

Comments
 (0)