Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
name: Linting

on:
push:
branches: [ main ]
pull_request:
branches: [ main ]

jobs:
lint:
name: Lint
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [ "3.7", "3.10" ]
steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: pip install tox
- name: Check code quality with flake8
run: tox -e flake8
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -1002,3 +1002,11 @@ Generation of analogues of the specified molecule and unbiased library enumerati

* ``enumerationInstance.getReconstructedMols(mol)`` - fragment provided molecule (should be RdKit molecule object and not smiles). Return two dictionary - *allSyntheticPathways* and *allSynthons* obtained during fragmentation.

# Development

This code is auto-formated with [`black`](https://github.com/psf/black) and [`isort`](https://pycqa.github.io/isort/) and checked with continuous integration to ensure code formatting has been properly applied. Please run the following in all pull requests:

```shell
$ pip install tox
$ tox
```
60 changes: 37 additions & 23 deletions SyntOn_BBScaffoldGeneration.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
import sys,os
import os
import sys

from rdkit import Chem

srcPath = os.path.split(os.path.realpath(__file__))[0]
sys.path.insert(1, srcPath)
from src.UsefulFunctions import *
from src.SyntOn_BBs import *
from src.UsefulFunctions import *


def main(args):
with open(args.output + "_Scaffolds.smi", "w") as out:
Expand All @@ -21,7 +25,9 @@ def main(args):
out.write(" " + scaffold + "\n")
else:
out.write(" linearMolecule\n")
scaffoldsCountSorted = {r: scaffoldsCount[r] for r in sorted(scaffoldsCount, key=scaffoldsCount.get, reverse=True)}
scaffoldsCountSorted = {
r: scaffoldsCount[r] for r in sorted(scaffoldsCount, key=scaffoldsCount.get, reverse=True)
}
scaffoldsCount.clear()
with open(args.output + "_scaffoldsCounts.smi", "w") as outCounts:
for scaffold in scaffoldsCountSorted:
Expand All @@ -30,41 +36,49 @@ def main(args):
cumSum = 0
TotalCompNumb = sum(scaffoldsCountSorted.values())
TotalScaffNumb = len(scaffoldsCountSorted)
for ind,scaff in enumerate(scaffoldsCountSorted):
for ind, scaff in enumerate(scaffoldsCountSorted):
cumSum += scaffoldsCountSorted[scaff]
outCumPer.write(str(int(round((ind + 1) / TotalScaffNumb * 100))) + " " + str(
int(round(cumSum / TotalCompNumb * 100))) + "\n")
outCumPer.write(
str(int(round((ind + 1) / TotalScaffNumb * 100)))
+ " "
+ str(int(round(cumSum / TotalCompNumb * 100)))
+ "\n"
)
scaffoldsCountSorted.clear()
scaffoldPlot(args.output + "_cumulativeprecentage.smi", args.output)



def scaffoldPlot(cumPercentageFile, outName):
from matplotlib import pyplot as plt
from numpy import genfromtxt
Data = genfromtxt(cumPercentageFile, delimiter=' ', names=['x', 'y'])

Data = genfromtxt(cumPercentageFile, delimiter=" ", names=["x", "y"])
fig, ax = plt.subplots()
ax.tick_params(axis='both', which='major', labelsize=12)
plt.plot(Data['x'], Data['y'], color="darkgreen")
ax.tick_params(axis="both", which="major", labelsize=12)
plt.plot(Data["x"], Data["y"], color="darkgreen")
plt.ylim(ymin=0, ymax=100)
plt.xlim(xmin=0, xmax=100)
plt.ylabel("Fraction of BBs, %", fontweight='bold', fontsize=14)
plt.xlabel("Fraction of scaffolds, %", fontweight='bold', fontsize=14)
plt.title("Cumulative Scaffold Frequency Plot", fontweight='bold', fontsize=14)
plt.ylabel("Fraction of BBs, %", fontweight="bold", fontsize=14)
plt.xlabel("Fraction of scaffolds, %", fontweight="bold", fontsize=14)
plt.title("Cumulative Scaffold Frequency Plot", fontweight="bold", fontsize=14)
plt.savefig("Scaffolds_FreqPlot_" + outName + ".png")


if __name__ == '__main__':
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="BBs Scaffold analysis. Generates meaningful BBs scaffolds after removing ring-containing leaving and protective groups. Count scaffolds occurrence in the provided collection of BBs, and construct cumulative scaffold frequency plot.",
epilog="Code implementation: Yuliana Zabolotna, Alexandre Varnek\n"
" Laboratoire de Chémoinformatique, Université de Strasbourg.\n\n"
"Knowledge base (SMARTS library): Dmitriy M.Volochnyuk, Sergey V.Ryabukhin, Kostiantyn Gavrylenko, Olexandre Oksiuta\n"
" Institute of Organic Chemistry, National Academy of Sciences of Ukraine\n"
" Kyiv National Taras Shevchenko University\n"
"2021 Strasbourg, Kiev",
prog="SyntOn_BBScaffoldGeneration", formatter_class=argparse.RawTextHelpFormatter)

parser = argparse.ArgumentParser(
description="BBs Scaffold analysis. Generates meaningful BBs scaffolds after removing ring-containing leaving and protective groups. Count scaffolds occurrence in the provided collection of BBs, and construct cumulative scaffold frequency plot.",
epilog="Code implementation: Yuliana Zabolotna, Alexandre Varnek\n"
" Laboratoire de Chémoinformatique, Université de Strasbourg.\n\n"
"Knowledge base (SMARTS library): Dmitriy M.Volochnyuk, Sergey V.Ryabukhin, Kostiantyn Gavrylenko, Olexandre Oksiuta\n"
" Institute of Organic Chemistry, National Academy of Sciences of Ukraine\n"
" Kyiv National Taras Shevchenko University\n"
"2021 Strasbourg, Kiev",
prog="SyntOn_BBScaffoldGeneration",
formatter_class=argparse.RawTextHelpFormatter,
)
parser.add_argument("-i", "--input", type=str, help="Input BBs file.")
parser.add_argument("-o", "--output", type=str, help="Output files suffix name.")
args = parser.parse_args()
main(args)
main(args)
Loading