containers-ftw/scientific-example-ftw:latest
$ singularity pull shub://containers-ftw/scientific-example-ftw:latest
Singularity Recipe
# To build the container:
# sudo singularity build <container> Singularity
Bootstrap: docker
From: ubuntu:14.04
# =======================
# global
# =======================
%post
locale-gen "en_US.UTF-8"
dpkg-reconfigure locales
export LANGUAGE="en_US.UTF-8"
echo 'LANGUAGE="en_US.UTF-8"' >> /etc/default/locale
echo 'LC_ALL="en_US.UTF-8"' >> /etc/default/locale
apt-get update
apt-get install -y apt-transport-https build-essential cmake curl libsm6 libxrender1 libfontconfig1 wget git unzip python-setuptools ruby bc
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 51716619E084DAB9
echo "deb https://cloud.r-project.org/bin/linux/ubuntu trusty/" >> /etc/apt/sources.list
apt-get update
apt-get install -y r-base-dev gdebi-core
apt-get clean
%runscript
if [ $# -eq 0 ]; then
echo "\nThe following software is installed in this image:"
ls /scif/apps | sort -u --ignore-case
echo "Example usage: singularity --app <name> <container> [command] [args] [options]"
else
exec "$@"
fi
%help
This container is an example container to provide several steps of a genomic analysis pipeline.
You generally would want to specify a path on your host to mount for data ($DATA) and
do the following:
# Download data, these can be run in parallel
singularity run --app download-fastq -B $DATA:/scif/data <container>
singularity run --app download-reference -B $DATA:/scif/data <container>
singularity run --app download-rtg -B $DATA:/scif/data <container>
singularity run --app simulate-reads -B $DATA:/scif/data <container>
singularity run --app transcript -B $DATA:/scif/data <container>
singularity run --app bwa-index-align -B $DATA:/scif/data <container>
singularity run --app run-rtg -B $DATA:/scif/data <container>
%environment
DATADIR=/scif/data
REF_DIR=${DATADIR}/Reference
FASTQ_DIR=${DATADIR}/Fastq
RTG_DIR=${DATADIR}/RTG
NUMCORES=$(nproc)
export RTG_DIR DATADIR REF_DIR FASTQ_DIR NUMCORES
# =======================
# download
# =======================
%apprun download-fastq
mkdir -p $FASTQ_DIR
wget -P $FASTQ_DIR ftp://ngs.sanger.ac.uk/production/gencode/rgasp/RGASP1/inputdata/human_fastq/GM12878_2x75_split.tgz
tar --directory $FASTQ_DIR -xzf $FASTQ_DIR/GM12878_2x75_split.tgz
find $FASTQ_DIR/GM12878_2x75_split -name "GM12878_2x75_rep[1-2].lane[1-3]_1.fq" -exec cat {} \; > $FASTQ_DIR/rna_1.fq
gzip $FASTQ_DIR/rna_1.fq
find $FASTQ_DIR/GM12878_2x75_split -name "GM12878_2x75_rep[1-2].lane[1-3]_2.fq" -exec cat {} \; > $FASTQ_DIR/rna_2.fq
gzip $FASTQ_DIR/rna_2.fq
rm -r $FASTQ_DIR/GM12878_2x75_split
%apprun download-reference
mkdir -p $REF_DIR
wget -P $REF_DIR ftp://ftp.sanger.ac.uk/pub/gencode/Gencode_human/release_25/gencode.v25.transcripts.fa.gz
gzip -d $REF_DIR/gencode.v25.transcripts.fa.gz
wget -P $REF_DIR ftp://ftp.ensembl.org/pub/release-85/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz
gzip -d $REF_DIR/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz
%apprun download-rtg
# url info for AJtrio was taken from this url
# https://raw.githubusercontent.com/genome-in-a-bottle/giab_data_indexes/master/AshkenazimTrio/sequence.index.AJtrio_Illumina_2x250bps_06012016
mkdir -p $RTG_DIR
## THESE FILES HAVE BEEN MADE AVAILABLE BY FTP DOWNLOAD
wget https://stanfordmedicine.box.com/shared/static/beky9c9u05xmljtgj4kq9iuik33xqtbq.gz -O $RTG_DIR/HG002.1.10M.fastq.gz
wget https://stanfordmedicine.box.com/shared/static/isod88qhvfy11d3jlxy2hc1am3axhqg9.gz -O $RTG_DIR/HG002.2.10M.fastq.gz
wget https://stanfordmedicine.box.com/shared/static/wu7kn19y16org4sxvp5r7nw25x3kcc18.gz -O $RTG_DIR/HG003.1.10M.fastq.gz
wget https://stanfordmedicine.box.com/shared/static/o2cdwpn55nuw98kmoq5o0ci67ojz4647.gz -O $RTG_DIR/HG003.2.10M.fastq.gz
wget https://stanfordmedicine.box.com/shared/static/sdufnqkmspj4r8sd1h1cskx5sge7t6c4.gz -O $RTG_DIR/HG004.1.10M.fastq.gz
wget https://stanfordmedicine.box.com/shared/static/5h4t29utrxcg9hbyen4lf6v6qid251bk.gz -O $RTG_DIR/HG004.2.10M.fastq.gz
# =======================
# simulate reads
# =======================
%apphelp simulate-reads
Optionally set any of the following environment variables (defaults shown)
READS (100000000)
READ_LEN (150)
GENOME_SIZE (3400000000)
%appenv simulate-reads
READS=${READS:-100000000}
READ_LEN=${READ_LEN:-150}
GENOME_SIZE=${GENOME_SIZE:-3400000000}
export GENOME_SIZE READ_LEN READS
%appinstall simulate-reads
wget https://www.niehs.nih.gov/research/resources/assets/docs/artbinmountrainier20160605linux64tgz.tgz
tar -xzvf artbinmountrainier20160605linux64tgz.tgz
mv art_bin_MountRainier/* bin/
chmod u+x bin/art_*
%apprun simulate-reads
GENOME="$REF_DIR/Homo_sapiens.GRCh38.dna.primary_assembly.fa"
FOLD_COVERAGE=$(python -c "print($READS*$READ_LEN/$GENOME_SIZE)")
art_illumina --rndSeed 1 --in $GENOME --paired --len 75 --fcov $FOLD_COVERAGE --seqSys HS25 --mflen 500 --sdev 20 --noALN --out $FASTQ_DIR/dna_ && gzip $FASTQ_DIR/dna_1.fq && gzip $FASTQ_DIR/dna_2.fq
# =======================
# quantify transcripts
# =======================
%appinstall transcript
cd /scif/apps && rm -rf transcript
wget http://repo.continuum.io/archive/Anaconda3-4.1.1-Linux-x86_64.sh
bash Anaconda3-4.1.1-Linux-x86_64.sh -b -p ./transcript
rm Anaconda3-4.1.1-Linux-x86_64.sh && cd transcript
bin/conda update -y conda
bin/conda update -y anaconda
bin/conda config --add channels bioconda
bin/conda install -y --channel bioconda kallisto
bin/conda clean -y --all
%apprun transcript
kallisto index $REF_DIR/gencode.v25.transcripts.fa -i $REF_DIR/kallisto_index
OUT_DIR=${SINGULARITY_APPDATA}/rna # /scif/data/transcript
mkdir -p $OUT_DIR
kallisto quant -b 100 --seed=1 --plaintext -t $NUMCORES -i $DATADIR/Reference/kallisto_index $DATADIR/Fastq/rna_1.fq.gz $DATADIR/Fastq/rna_2.fq.gz -o $OUT_DIR
# =======================
# bwa index and align
# =======================
%appinstall bwa-index-align
git clone https://github.com/lh3/bwa.git build
cd build && git checkout v0.7.15 && make
mv -t ../bin bwa bwakit
apt-get install -y liblzma-dev
cd .. && wget https://github.com/samtools/samtools/releases/download/1.5/samtools-1.5.tar.bz2
tar -xvjf samtools-1.5.tar.bz2
cd samtools-1.5 && ./configure --prefix=${SINGULARITY_APPROOT}
make && make install
%apprun bwa-index-align
mkdir -p $DATADIR/Bam
bwa index -a bwtsw $DATADIR/Reference/Homo_sapiens.GRCh38.dna.primary_assembly.fa
bwa mem -t $NUMCORES $DATADIR/Reference/Homo_sapiens.GRCh38.dna.primary_assembly.fa $DATADIR/Fastq/dna_1.fq.gz $DATADIR/Fastq/dna_2.fq.gz | samtools view -bhS - > $DATADIR/Bam/container.bam
%applabels bwa-index-align
bwa-version v0.7.15
samtools-version v1.5
# =======================
# rtg
# =======================
%appinstall run-rtg
wget https://github.com/RealTimeGenomics/rtg-core/releases/download/3.6.2/rtg-core-non-commercial-3.6.2-linux-x64.zip
unzip rtg-core-non-commercial-3.6.2-linux-x64.zip
mv rtg-core-non-commercial-3.6.2/* bin/
%appenv run-rtg
MEM=${MEM:-4g}
THREADS=${THREADS:2}
export MEM THREADS
%applabel run-rtg
rtg-version 3.6.2
%apprun run-rtg
REFERENCE=$DATADIR/Reference/Homo_sapiens.GRCh38.dna.primary_assembly.fa
rtg format --format fasta --output=$REFERENCE.sdf $REFERENCE
parallel --jobs 1 --xapply rtg RTG_MEM=$MEM map --format fastq --quality-format sanger --template $REFERENCE.sdf --output $RTG_DIR/container.{1} --left $OUT_DIR/{1}.1.10M.fastq.gz --right $OUT_DIR/{1}.2.10M.fastq.gz --sam-rg {2} --threads $THREADS ::: HG002 HG003 HG004 ::: "@RG\tID:HG002\tSM:NA24385\tPL:ILLUMINA" "@RG\tID:HG003\tSM:NA24149\tPL:ILLUMINA" "@RG\tID:HG004\tSM:NA24143\tPL:ILLUMINA"
rtg RTG_MEM=$MEM family \
--output $RTG_DIR/container.trio \
--template $REFERENCE.sdf \
--machine-errors illumina \
--avr-model illumina-wgs.avr \
--threads $THREADS \
--son NA24385 \
--father NA24149 \
--mother NA24143 \
$RTG_DIR/container.HG002/alignments.bam \
$RTG_DIR/container.HG003/alignments.bam \
$RTG_DIR/container.HG004/alignments.bam
Collection
- Name: containers-ftw/scientific-example-ftw
- License: None
View on Datalad
Metrics
key | value |
---|---|
id | /containers/containers-ftw-scientific-example-ftw-latest |
collection name | containers-ftw/scientific-example-ftw |
branch | master |
tag | latest |
commit | 691f39d32ff395251790482253f9d5261a0cdfb0 |
version (container hash) | a815513812973bb04c8052de1445c973 |
build date | 2017-10-17T21:47:54.094Z |
size (MB) | None |
size (bytes) | 1174466591 |
SIF | Download URL (please use pull with shub://) |
Datalad URL | View on Datalad |
Singularity Recipe | Singularity Recipe on Datalad |
Feedback
Was this page helpful?
Glad to hear it! Please tell us how we can improve.
Sorry to hear that. Please tell us how we can improve.