@@ -47,12 +47,35 @@ export MMSEQS_FORCE_MERGE=1
4747
4848OUTDB=" $( abspath " ${OUTDB} " ) "
4949
50- if notExists " ${TMP_PATH} /seqDB" ; then
50+ # check if already created db
51+ if notExists " ${1} .dbtype" ; then
5152 # shellcheck disable=SC2086
5253 " ${MMSEQS} " createdb " $@ " " ${TMP_PATH} /seqDB" ${CREATEDB_PAR} \
5354 || fail " createdb failed"
54- fi
55+
56+ else
57+ echo " Input DB already exists. Generating associated metadata."
58+
59+ if notExists " ${TMP_PATH} /seqDB.index" ; then
60+ # shellcheck disable=SC2086
61+ " ${MMSEQS} " cpdb " $1 " " ${TMP_PATH} /seqDB" ${VERBOSITY} \
62+ || fail " cpdb failed"
63+ fi
64+
65+ if notExists " ${TMP_PATH} /seqDB_h.index" ; then
66+ # shellcheck disable=SC2086
67+ " ${MMSEQS} " cpdb " $1 _h" " ${TMP_PATH} /seqDB_h" ${VERBOSITY} \
68+ || fail " cpdb failed"
69+ fi
5570
71+ if notExists " ${OUTDB} _ss.index" ; then
72+ # shellcheck disable=SC2086
73+ " ${MMSEQS} " cpdb " $1 _ss" " ${OUTDB} _ss" ${VERBOSITY} \
74+ || fail " cpdb failed"
75+ fi
76+
77+ touch " ${TMP_PATH} /seqDB.external"
78+ fi
5679
5780if [ " $( " ${MMSEQS} " dbtype " ${TMP_PATH} /seqDB" ) " = " Nucleotide" ]; then
5881
@@ -99,10 +122,18 @@ elif [ "$("${MMSEQS}" dbtype "${TMP_PATH}/seqDB")" = "Aminoacid" ]; then
99122 | awk -F ' [\t#]' ' NF{NF-=1};1' OFS=' \t' \
100123 | awk -F' \t' ' $5=="-1" { temp = $4; $4 = $3; $3 = temp } 1' OFS=' \t' \
101124 | sort -k1,1n > " ${TMP_PATH} /seqDB_h_pref.tmp"
102-
103- join -t " $( printf ' \t' ) " -o ' 1.1 2.2 2.3 2.4 1.3' " ${TMP_PATH} /seqDB.lookup" " ${TMP_PATH} /seqDB_h_pref.tmp" \
104- | awk -F ' [\t]' ' { if (setid == $NF) { counter++ } else { counter = 1; setid = $NF }; print $1"\t"$2"_"counter-1"_"$3"_"$4"\t"$NF }' \
105- > " ${TMP_PATH} /seqDB.lookup.tmp"
125+
126+ if notExists " ${TMP_PATH} /seqDB.external" ; then
127+ join -t " $( printf ' \t' ) " -o ' 1.1 2.2 2.3 2.4 1.3' " ${TMP_PATH} /seqDB.lookup" " ${TMP_PATH} /seqDB_h_pref.tmp" \
128+ | awk -F ' [\t]' ' { if (setid == $NF) { counter++ } else { counter = 1; setid = $NF }; print $1"\t"$2"_"counter-1"_"$3"_"$4"\t"$NF }' \
129+ > " ${TMP_PATH} /seqDB.lookup.tmp"
130+ else
131+ join -t " $( printf ' \t' ) " -o ' 1.1 2.2 2.3 2.4 1.3' " ${TMP_PATH} /seqDB.lookup" " ${TMP_PATH} /seqDB_h_pref.tmp" \
132+ | sort -k2,2 -V\
133+ | awk -F ' [\t]' ' { if (setid == $NF) { counter++ } else { counter = 1; setid = $NF }; print $1"\t"$2"_"counter-1"_"$3"_"$4"\t"$NF }' \
134+ | sort -k1,1n\
135+ > " ${TMP_PATH} /seqDB.lookup.tmp"
136+ fi
106137 fi
107138
108139 mv -f -- " ${TMP_PATH} /seqDB.lookup.tmp" " ${TMP_PATH} /seqDB.lookup"
@@ -148,6 +179,8 @@ if notExists "${OUTDB}_set_size.index"; then
148179 || fail " result2stats failed"
149180fi
150181
182+ # add check for lookup format
183+
151184if [ -n " ${REMOVE_TMP} " ]; then
152185 echo " Remove temporary files"
153186 rm -f " ${TMP_PATH} /createsetdb.sh"
0 commit comments