Skip to content

Commit f66d58a

Browse files
authored
Merge pull request #179 from ProteinsWebTeam/dev
Merge dev into master
2 parents 072237c + af0b84d commit f66d58a

21 files changed

+389
-125
lines changed

config/elastic_mapping.json

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,9 @@
8181
"protein_af_score": {
8282
"type": "float"
8383
},
84+
"protein_bfvd_score": {
85+
"type": "float"
86+
},
8487
"proteome_acc": {
8588
"type": "keyword"
8689
},
@@ -105,6 +108,12 @@
105108
"structure_protein_acc": {
106109
"type": "keyword"
107110
},
111+
"structure_protein_db": {
112+
"type": "keyword"
113+
},
114+
"structure_protein_length": {
115+
"type": "long"
116+
},
108117
"structure_protein_locations": {
109118
"type": "object",
110119
"enabled": false

docs/README.md

Lines changed: 92 additions & 51 deletions
Large diffs are not rendered by default.

docs/examples/fetch-alphafold-for-entry.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,7 @@
2121
def get_uniprot_accessions(source_db, query):
2222
api_url = "https://www.ebi.ac.uk/interpro/api"
2323
url = f"{api_url}/protein/UniProt/entry/{source_db}/{query}/?"
24-
url += urlencode({"has_model": True, "page_size": 100})
25-
24+
url += urlencode({"with": "alphafold", "page_size": 100})
2625
accessions = []
2726

2827
while True:
@@ -38,10 +37,9 @@ def get_uniprot_accessions(source_db, query):
3837

3938
return accessions
4039

41-
4240
def get_mem_db(query):
4341
url = f"https://www.ebi.ac.uk/interpro/api/utils/accession/{query}"
44-
42+
4543
with urlopen(url) as res:
4644
if res.status != 200:
4745
sys.stderr.write(f"error: no results found for {query}\n")
@@ -53,8 +51,7 @@ def get_mem_db(query):
5351
sys.stderr.write(f"error: {query} is not an entry\n")
5452

5553
return obj["source_database"]
56-
57-
54+
5855
def download_af_pdb(accession, outdir):
5956
url = f"https://alphafold.ebi.ac.uk/api/prediction/{accession}"
6057
with urlopen(url) as res:
@@ -69,8 +66,8 @@ def download_af_pdb(accession, outdir):
6966
for chunk in res:
7067
fh.write(chunk)
7168

72-
7369
def main():
70+
7471
query = sys.argv[1]
7572
outdir = sys.argv[2]
7673

docs/modifiers.md

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -196,10 +196,18 @@ Information on member database signatures integrated/unintegrated in InterPro en
196196
| `filter_by_entry=< InterPro accession >` | x | Selected taxon hierarchy and counters for the InterPro entry accession specified | https://www.ebi.ac.uk:443/interpro/api/taxonomy/uniprot/1?filter_by_entry=IPR001165 |
197197
| `filter_by_entry_db=< db name >` | x | Selected taxon hierarchy and counters for the database name specified (e.g. interpro, pfam, smart...) | https://www.ebi.ac.uk:443/interpro/api/taxonomy/uniprot/1?filter_by_entry_db=interpro |
198198

199-
## /api/protein/uniprot/entry/InterPro/< _interpro accession_ >
199+
## /api/protein/uniprot/entry/< _source database_ >/< _interpro accession_ >
200200

201201
Proteins with an AlphaFold model.
202202

203203
| Modifier | Compatible with other modifiers | Data returned | Example |
204204
|--------------------------|---------------------------------|---------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------|
205-
| `has_model=[true,false]` | x | List of proteins with/without an AlphaFold prediction for the InterPro entry selected | https://www.ebi.ac.uk:443/interpro/api/protein/uniprot/entry/InterPro/IPR000001/?has_model=true |
205+
| `has_model=[true,false]` | x | List of proteins with/without an AlphaFold prediction for the <_ source database _> entry selected | https://www.ebi.ac.uk:443/interpro/api/protein/uniprot/entry/InterPro/IPR000001/?has_model=true |
206+
207+
208+
Proteins with an AlphaFold or BFVD model.
209+
210+
| Modifier | Compatible with other modifiers | Data returned | Example |
211+
|--------------------------|---------------------------------|---------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------|
212+
| `with=[alphafold,bfvd]` | x | List of proteins with an AlphaFold prediction or a BFVD prediction for the <_ source database _> entry selected | https://www.ebi.ac.uk:443/interpro/api/protein/uniprot/entry/InterPro/IPR000001/?with=alphafold|
213+

requirements.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# remember to check the version is compatible with python 3.8
2-
Django==4.2.13
3-
djangorestframework==3.15.1
2+
Django==4.2.19
3+
djangorestframework==3.15.2
44
PyYAML==6.0
55
jsonfield2==4.0.0.post0
66
pymysql==1.1.1
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# Generated by Django 4.2.19 on 2025-02-24 14:11
2+
3+
from django.db import migrations, models
4+
import django.db.models.deletion
5+
6+
7+
class Migration(migrations.Migration):
8+
9+
dependencies = [
10+
("webfront", "0034_set_wikipedia"),
11+
]
12+
13+
operations = [
14+
migrations.CreateModel(
15+
name="InterProNMatches",
16+
fields=[
17+
("match_id", models.AutoField(primary_key=True, serialize=False)),
18+
("in_interpro", models.BooleanField(db_column="in_interpro")),
19+
("is_preferred", models.BooleanField(db_column="is_preferred")),
20+
("locations", models.JSONField()),
21+
(
22+
"entry",
23+
models.ForeignKey(
24+
db_column="entry_acc",
25+
null=True,
26+
on_delete=django.db.models.deletion.SET_NULL,
27+
to="webfront.entry",
28+
),
29+
),
30+
(
31+
"protein_acc",
32+
models.ForeignKey(
33+
db_column="protein_acc",
34+
null=True,
35+
on_delete=django.db.models.deletion.SET_NULL,
36+
to="webfront.protein",
37+
),
38+
),
39+
],
40+
options={
41+
"db_table": "webfront_interpro_n",
42+
},
43+
),
44+
]
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# Generated by Django 4.2.19 on 2025-02-24 14:14
2+
3+
from django.db import migrations
4+
import jsonfield.fields
5+
6+
7+
class Migration(migrations.Migration):
8+
9+
dependencies = [
10+
("webfront", "0035_interpronmatches"),
11+
]
12+
13+
operations = [
14+
migrations.AlterField(
15+
model_name="entry",
16+
name="overlaps_with",
17+
field=jsonfield.fields.JSONField(default=list),
18+
),
19+
migrations.AlterField(
20+
model_name="protein",
21+
name="structure",
22+
field=jsonfield.fields.JSONField(default=dict, null=True),
23+
),
24+
migrations.AlterField(
25+
model_name="set",
26+
name="authors",
27+
field=jsonfield.fields.JSONField(default=list),
28+
),
29+
migrations.AlterField(
30+
model_name="set",
31+
name="literature",
32+
field=jsonfield.fields.JSONField(default=list),
33+
),
34+
migrations.AlterField(
35+
model_name="set",
36+
name="wikipedia",
37+
field=jsonfield.fields.JSONField(default=list),
38+
),
39+
]
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Generated by Django 4.2.13 on 2025-03-11 09:46
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
('webfront', '0036_default_value_lists_dicts'),
10+
]
11+
12+
operations = [
13+
migrations.AddField(
14+
model_name='protein',
15+
name='in_bfvd',
16+
field=models.BooleanField(default=False),
17+
),
18+
]

webfront/models/interpro_new.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ class Entry(models.Model):
3535
hierarchy = JSONField(null=True)
3636
cross_references = JSONField(null=True)
3737
entry_date = models.DateTimeField(null=True)
38-
overlaps_with = JSONField(default=[])
38+
overlaps_with = JSONField(default=list)
3939
is_public = models.BooleanField(default=False)
4040
deletion_date = models.DateTimeField(null=True)
4141
counts = JSONField(null=True)
@@ -86,9 +86,10 @@ class Protein(models.Model):
8686
source_database = models.CharField(
8787
max_length=20, default="unreviewed", db_index=True
8888
)
89-
structure = JSONField(default={}, null=True)
89+
structure = JSONField(default=dict, null=True)
9090
is_fragment = models.BooleanField(default=False)
9191
in_alphafold = models.BooleanField(default=False)
92+
in_bfvd = models.BooleanField(default=False)
9293
tax_id = models.CharField(max_length=20, null=False, default="")
9394
ida_id = models.CharField(max_length=40, null=True)
9495
ida = models.TextField(null=True)
@@ -267,9 +268,9 @@ class Set(models.Model):
267268
source_database = models.CharField(max_length=20, db_index=True)
268269
relationships = JSONField(null=True)
269270
counts = JSONField(null=True)
270-
authors = JSONField(null=True)
271-
literature = JSONField(null=True)
272-
wikipedia = JSONField(null=True)
271+
authors = JSONField(default=list)
272+
literature = JSONField(default=list)
273+
wikipedia = JSONField(default=list)
273274

274275

275276
class Release_Note(models.Model):
@@ -296,7 +297,9 @@ class InterProNMatches(models.Model):
296297
entry = models.ForeignKey(
297298
Entry, db_column="entry_acc", on_delete=models.SET_NULL, null=True
298299
)
300+
in_interpro = models.BooleanField(db_column="in_interpro", null=False)
301+
is_preferred = models.BooleanField(db_column="is_preferred", null=False)
299302
locations = models.JSONField()
300303

301304
class Meta:
302-
db_table = "webfront_interpro_n"
305+
db_table = "webfront_interpro_n"

webfront/searcher/elastic_controller.py

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -216,9 +216,19 @@ def tune_counter_facet_for_entry(self, facet, endpoint, extra_counters):
216216

217217
def tune_counter_facet_for_protein(self, facet, endpoint, extra_counters):
218218
if endpoint == "protein":
219+
if self.queryset_manager.main_endpoint == "structure":
220+
acc_field = "structure_protein_acc"
221+
db_field = "structure_protein_db"
222+
else:
223+
acc_field = "protein_acc"
224+
db_field = "protein_db"
225+
226+
facet["aggs"]["databases"]["terms"]["field"] = db_field
227+
facet["aggs"]["databases"]["aggs"]["unique"]["cardinality"]["field"] = acc_field
228+
219229
facet["aggs"]["uniprot"] = {
220-
"filter": {"exists": {"field": "protein_acc"}},
221-
"aggs": {"unique": {"cardinality": {"field": "protein_acc"}}},
230+
"filter": {"exists": {"field": acc_field}},
231+
"aggs": {"unique": {"cardinality": {"field": acc_field}}},
222232
}
223233
self.add_extra_counters(facet, "uniprot", extra_counters)
224234

@@ -310,19 +320,26 @@ def get_group_obj_of_field_by_query(
310320

311321
return output
312322

323+
def get_cardinality_field(self, endpoint):
324+
filters = self.queryset_manager.filters
325+
if endpoint == "protein" and filters.get("structure") and not filters.get("entry"):
326+
return "structure_protein_acc"
327+
328+
return endpoint + "_acc"
329+
313330
def get_list_of_endpoint(self, endpoint, query=None, rows=10, start=0, cursor=None):
314331
should_keep_elastic_order = False
315332
qs = self.queryset_manager.get_searcher_query() if query is None else query
316333
if qs == "":
317334
qs = "*:*"
318335
facet = {
319336
"aggs": {
320-
"ngroups": {"cardinality": {"field": endpoint + "_acc"}},
337+
"ngroups": {"cardinality": {"field": self.get_cardinality_field(endpoint)}},
321338
"groups": {
322339
"composite": {
323340
"size": rows,
324341
"sources": [
325-
{"source": {"terms": {"field": "{}_acc".format(endpoint)}}}
342+
{"source": {"terms": {"field": self.get_cardinality_field(endpoint)}}}
326343
],
327344
}
328345
},

0 commit comments

Comments
 (0)