Skip to content

Commit 64fd510

Browse files
authored
Merge pull request #78 from SATAY-LL/release_1.1.5
Release 1.1.5
2 parents 68e2c5b + 3b2798e commit 64fd510

File tree

8 files changed

+106
-22
lines changed

8 files changed

+106
-22
lines changed

CHANGELOG.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,15 @@ All notable changes to this project will be documented in this file.
44
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
55
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
66

7+
## [1.1.5]-2022-03-22
8+
9+
### Modified
10+
11+
- Update the plot that generates the volcano module.
12+
- Update font size, colors and size of the plot.
13+
- Allowing for save the figure as a png file.
14+
- Allowing to annotate the plot based on the fold change and p values.
15+
716
## [1.1.4] - 2022-01-25
817

918
### Modified

CITATION.cff

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,5 +26,5 @@ message: "If you use this software, please cite it using these metadata."
2626
repository-code: "https://github.com/SATAY-LL/Transposonmapper"
2727
title: transposonmapper
2828
doi: 10.5281/zenodo.4636301
29-
version: 1.1.4
29+
version: 1.1.5
3030
...

notebooks/pipeline_workflow.ipynb

Lines changed: 33 additions & 7 deletions
Large diffs are not rendered by default.

notebooks/volcano_WTvsdnrp1-1.png

2 MB
Loading

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[bumpversion]
2-
current_version = 1.1.4
2+
current_version = 1.1.5
33

44
[bumpversion:file:transposonmapper/__version__.py]
55
search = __version__ = "{current_version}"

tests/statistics/test_volcano.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,6 @@ def test_output():
1010
file_b= ['dnrp1-1_merged-DpnII-NlaIII-a_trimmed.sorted.bam_pergene.txt',
1111
'dnrp1-1_merged-DpnII-NlaIII-b_trimmed.sorted.bam_pergene.txt']
1212

13-
data=volcano(path,file_a,path,file_b)
13+
data=volcano(path,file_a,path,file_b,fold_change_interval=[1,-1],p_value_interval=[2,2])
1414

1515
assert isinstance(data,pandas.core.frame.DataFrame) , "It is expected a dataframe"

transposonmapper/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "1.1.4"
1+
__version__ = "1.1.5"

transposonmapper/statistics/volcanoplot.py

Lines changed: 60 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
import os, sys
22

33
import matplotlib.pyplot as plt
4+
import numpy as np
45

56

67
from transposonmapper.statistics.volcano_helpers import apply_stats, info_from_datasets, make_datafile
78

8-
def volcano(path_a, filelist_a, path_b, filelist_b, variable='read_per_gene', significance_threshold=0.01, normalize=True, trackgene_list=[], figure_title=""):
9+
def volcano(path_a, filelist_a, path_b, filelist_b,fold_change_interval,p_value_interval,variable='read_per_gene',
10+
significance_threshold=0.01, normalize=True, trackgene_list=[], figure_title="",savefigure=False):
911
"""This script creates a volcanoplot to show the significance of fold change between two datasets.
1012
It is based on this website:
1113
- https://towardsdatascience.com/inferential-statistics-series-t-test-using-numpy-2718f8f9bf2f
@@ -50,6 +52,16 @@ def volcano(path_a, filelist_a, path_b, filelist_b, variable='read_per_gene', si
5052
The format of the pergene file should be TAB separated and NOT COMMA separated. if you have
5153
it as comma separated you can convert to tab separated using the command line with
5254
this command: `cat oldfile.txt | tr '[,]' '[\t]' > newfile.txt`
55+
56+
fold_change_interval : numpy.array
57+
58+
a vector of two elements where the first element(positive value) set the upper threshold for the positive
59+
values and the second element(negative value) set the lower threshold for the negative values.
60+
61+
62+
p_value_interval : numpy.array
63+
a vector of two elements where the first element set the upper threshold for the positive
64+
values and the second element set the lower threshold for the negative values.
5365
variable : str, optional
5466
tn_per_gene, read_per_gene or Nreadsperinsrt , by default 'read_per_gene'
5567
significance_threshold : float, optional
@@ -61,6 +73,7 @@ def volcano(path_a, filelist_a, path_b, filelist_b, variable='read_per_gene', si
6173
Enter a list of gene name(s) which will be highlighted in the plot (e.g. ['cdc42', 'nrp1']), by default []
6274
figure_title : str, optional
6375
The title of the figure if not empty, by default ""
76+
savefigure : bool, optional
6477
6578
6679
Returns
@@ -101,26 +114,29 @@ def volcano(path_a, filelist_a, path_b, filelist_b, variable='read_per_gene', si
101114
grid = plt.GridSpec(1, 1, wspace=0.0, hspace=0.0)
102115
ax = plt.subplot(grid[0,0])
103116

104-
colors = {False:'black', True:'red'} # based on p-value significance
105-
sc = ax.scatter(x=volcano_df['fold_change'], y=volcano_df['p_value'], alpha=0.4, marker='.', c=volcano_df['significance'].apply(lambda x:colors[x]))
117+
colors = {False:'gray', True:'red'} # based on p-value significance
118+
sc = ax.scatter(x=volcano_df['fold_change'], y=volcano_df['p_value'], alpha=0.4,
119+
s=100, c=volcano_df['significance'].apply(lambda x:colors[x]),label=variable)
106120
ax.grid(True, which='major', axis='both', alpha=0.4)
107-
ax.set_xlabel('Log2 FC')
108-
ax.set_ylabel('-1*Log10 p-value')
121+
ax.set_xlabel('Log2 FC',fontsize=16)
122+
ax.set_ylabel('-*Log10 p-value',fontsize=16)
123+
ax.tick_params(axis='both', which='major', labelsize=14)
124+
ax.legend(loc='best',fontsize=16)
109125
if not figure_title == "":
110-
ax.set_title(variable + " - " + figure_title)
126+
ax.set_title(figure_title,fontsize=20)
111127
else:
112-
ax.set_title(variable)
113-
ax.scatter(x=[],y=[],marker='.',color='black', label='p-value > {}'.format(significance_threshold)) #set empty scatterplot for legend
114-
ax.scatter(x=[],y=[],marker='.',color='red', label='p-value < {}'.format(significance_threshold)) #set empty scatterplot for legend
115-
ax.legend()
128+
ax.set_title(variable,fontsize=20)
129+
# ax.scatter(x=[],y=[],marker='.',color='black', label='p-value > {}'.format(significance_threshold)) #set empty scatterplot for legend
130+
# ax.scatter(x=[],y=[],marker='.',color='red', label='p-value < {}'.format(significance_threshold)) #set empty scatterplot for legend
131+
# ax.legend()
116132
if not trackgene_list == []:
117133
genenames_array = volcano_df['gene_names'].to_numpy()
118134
for trackgene in trackgene_list:
119135
trackgene = trackgene.upper()
120136
if trackgene in genenames_array:
121137
trackgene_index = tnread_gene_a.loc[tnread_gene_a['gene_names'] == trackgene].index[0]
122138
trackgene_annot = ax.annotate(volcano_df.iloc[trackgene_index,:]['gene_names'], (volcano_df.iloc[trackgene_index,:]['fold_change'], volcano_df.iloc[trackgene_index,:]['p_value']),
123-
size=10, c='green', bbox=dict(boxstyle="round", fc="w"))
139+
size=12, c='green', bbox=dict(boxstyle="round", fc="w"))
124140
trackgene_annot.get_bbox_patch().set_alpha(0.6)
125141
else:
126142
print('WARNING: %s not found' % trackgene)
@@ -133,6 +149,36 @@ def volcano(path_a, filelist_a, path_b, filelist_b, variable='read_per_gene', si
133149
arrowprops=dict(arrowstyle="->"))
134150
annot.set_visible(False)
135151

152+
## Annotate based on fold change and p values
153+
if not fold_change_interval == [] or not p_value_interval == []:
154+
target=volcano_df[(volcano_df["fold_change"]>fold_change_interval[0]) & (volcano_df["p_value"]>p_value_interval[0])]["gene_names"]
155+
target_left=volcano_df[(volcano_df["fold_change"]<fold_change_interval[1]) & (volcano_df["p_value"]>p_value_interval[1])]["gene_names"]
156+
157+
if len(target)!=0:
158+
for i in np.arange(0,len(target)):
159+
160+
index=np.where(volcano_df==target.iloc[i])[0][0]
161+
162+
163+
164+
trackgene_annot = ax.annotate(volcano_df.iloc[index,:]['gene_names'], (volcano_df.iloc[index,:]['fold_change'],
165+
volcano_df.iloc[index,:]['p_value']),
166+
size=12, c='green', bbox=dict(boxstyle="round", fc="w"))
167+
trackgene_annot.get_bbox_patch().set_alpha(0.6)
168+
169+
if len(target_left)!=0:
170+
for i in np.arange(0,len(target_left)):
171+
172+
index=np.where(volcano_df==target_left.iloc[i])[0][0]
173+
174+
175+
176+
trackgene_annot = ax.annotate(volcano_df.iloc[index,:]['gene_names'], (volcano_df.iloc[index,:]['fold_change'],
177+
volcano_df.iloc[index,:]['p_value']),
178+
size=12, c='green', bbox=dict(boxstyle="round", fc="w"))
179+
trackgene_annot.get_bbox_patch().set_alpha(0.6)
180+
181+
136182

137183

138184
def update_annot(ind):
@@ -162,6 +208,9 @@ def hover(event):
162208

163209
fig.canvas.mpl_connect("motion_notify_event", hover)
164210

211+
if savefigure:
212+
fig.savefig("volcano_"+filelist_a[0].split("_")[0]+"vs"+filelist_b[0].split("_")[0]+".png", dpi=400)
213+
165214

166215
## return function
167216
return(volcano_df)

0 commit comments

Comments
 (0)