Skip to content

Commit dc216e6

Browse files
committed
createsetdb can take already created db
1 parent 56a7f5f commit dc216e6

File tree

1 file changed

+39
-6
lines changed

1 file changed

+39
-6
lines changed

data/createsetdb.sh

Lines changed: 39 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -47,12 +47,35 @@ export MMSEQS_FORCE_MERGE=1
4747

4848
OUTDB="$(abspath "${OUTDB}")"
4949

50-
if notExists "${TMP_PATH}/seqDB"; then
50+
#check if already created db
51+
if notExists "${1}.dbtype"; then
5152
# shellcheck disable=SC2086
5253
"${MMSEQS}" createdb "$@" "${TMP_PATH}/seqDB" ${CREATEDB_PAR} \
5354
|| fail "createdb failed"
54-
fi
55+
56+
else
57+
echo "Input DB already exists. Generating associated metadata."
58+
59+
if notExists "${TMP_PATH}/seqDB.index"; then
60+
# shellcheck disable=SC2086
61+
"${MMSEQS}" cpdb "$1" "${TMP_PATH}/seqDB" ${VERBOSITY} \
62+
|| fail "cpdb failed"
63+
fi
64+
65+
if notExists "${TMP_PATH}/seqDB_h.index"; then
66+
# shellcheck disable=SC2086
67+
"${MMSEQS}" cpdb "$1_h" "${TMP_PATH}/seqDB_h" ${VERBOSITY} \
68+
|| fail "cpdb failed"
69+
fi
5570

71+
if notExists "${OUTDB}_ss.index"; then
72+
# shellcheck disable=SC2086
73+
"${MMSEQS}" cpdb "$1_ss" "${OUTDB}_ss" ${VERBOSITY} \
74+
|| fail "cpdb failed"
75+
fi
76+
77+
touch "${TMP_PATH}/seqDB.external"
78+
fi
5679

5780
if [ "$("${MMSEQS}" dbtype "${TMP_PATH}/seqDB")" = "Nucleotide" ]; then
5881

@@ -99,10 +122,18 @@ elif [ "$("${MMSEQS}" dbtype "${TMP_PATH}/seqDB")" = "Aminoacid" ]; then
99122
|awk -F '[\t#]' 'NF{NF-=1};1' OFS='\t' \
100123
|awk -F'\t' '$5=="-1" { temp = $4; $4 = $3; $3 = temp } 1' OFS='\t' \
101124
|sort -k1,1n > "${TMP_PATH}/seqDB_h_pref.tmp"
102-
103-
join -t "$(printf '\t')" -o '1.1 2.2 2.3 2.4 1.3' "${TMP_PATH}/seqDB.lookup" "${TMP_PATH}/seqDB_h_pref.tmp" \
104-
|awk -F '[\t]' '{ if (setid == $NF) { counter++ } else { counter = 1; setid = $NF }; print $1"\t"$2"_"counter-1"_"$3"_"$4"\t"$NF }' \
105-
> "${TMP_PATH}/seqDB.lookup.tmp"
125+
126+
if notExists "${TMP_PATH}/seqDB.external"; then
127+
join -t "$(printf '\t')" -o '1.1 2.2 2.3 2.4 1.3' "${TMP_PATH}/seqDB.lookup" "${TMP_PATH}/seqDB_h_pref.tmp" \
128+
|awk -F '[\t]' '{ if (setid == $NF) { counter++ } else { counter = 1; setid = $NF }; print $1"\t"$2"_"counter-1"_"$3"_"$4"\t"$NF }' \
129+
> "${TMP_PATH}/seqDB.lookup.tmp"
130+
else
131+
join -t "$(printf '\t')" -o '1.1 2.2 2.3 2.4 1.3' "${TMP_PATH}/seqDB.lookup" "${TMP_PATH}/seqDB_h_pref.tmp" \
132+
|sort -k2,2 -V\
133+
|awk -F '[\t]' '{ if (setid == $NF) { counter++ } else { counter = 1; setid = $NF }; print $1"\t"$2"_"counter-1"_"$3"_"$4"\t"$NF }' \
134+
|sort -k1,1n\
135+
> "${TMP_PATH}/seqDB.lookup.tmp"
136+
fi
106137
fi
107138

108139
mv -f -- "${TMP_PATH}/seqDB.lookup.tmp" "${TMP_PATH}/seqDB.lookup"
@@ -148,6 +179,8 @@ if notExists "${OUTDB}_set_size.index"; then
148179
|| fail "result2stats failed"
149180
fi
150181

182+
#add check for lookup format
183+
151184
if [ -n "${REMOVE_TMP}" ]; then
152185
echo "Remove temporary files"
153186
rm -f "${TMP_PATH}/createsetdb.sh"

0 commit comments

Comments
 (0)