-
Notifications
You must be signed in to change notification settings - Fork 0
/
find-best-timeseries.sh
executable file
·34 lines (30 loc) · 1.11 KB
/
find-best-timeseries.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
#!/usr/bin/env bash
if [ ! -e rothman.unenriched.lengths.simple ]; then
cat rothman.unenriched.simple | \
while read accession date wtp ; do
echo $accession $date $wtp \
$(
aws s3 cp s3://prjna729801/${accession}_1.fastq.gz - \
2>/dev/null | \
gunzip | \
head -n 2 | \
tail -n 1 | \
wc -c)
done | tr ' ' '\t' > rothman.unenriched.lengths.simple
fi
if [ ! -e longest-timeseries.tsv ] ; then
cat rothman.unenriched.lengths.simple | \
./find-best-timeseries.py > longest-timeseries.tsv
fi
if [ ! -e longest-timeseries-with-lengths.tsv ]; then
cat longest-timeseries.tsv | \
awk '{print $1}' | \
xargs -P 32 -I {} bash -c \
"aws s3 cp s3://prjna729801/{}_1.fastq.gz - | \
gunzip | \
grep -c ^@ > {}.n_reads"
cat longest-timeseries.tsv | \
while read accession date wtp ; do
echo $accession $date $wtp $(cat $accession.n_reads)
done | tr ' ' '\t' > longest-timeseries-with-lengths.tsv
fi