bneron/CRISPRCasFinder:latest

$ singularity pull shub://bneron/CRISPRCasFinder:latest

Singularity Recipe

# To build the container (Singularity version >=2.6):
# sudo singularity build CRISPRCasFinder Singularity

BootStrap: docker
from: ubuntu:xenial

%labels
	MAINTAINER Bertrand Neron <bneron@pasteur.fr>
	AUTHOR Couvin David, Bernheim Aude, Toffano-Nioche Claire, Touchon Marie, Michalik Juraj, Neron Bertrand, Rocha Eduardo, Vergnaud Gilles, Gautheret Daniel, Pourcel Christine.
    CRISPRCasFinder.version 4.2.18

%help
    Name:
      CRISPRCasFinder standalone version 4.2.18

    Synopsis:
      A perl script to identify CRISPR arrays and associated Cas genes in DNA sequences

    Usage:
      ./CRISPRCasFinder.img <filename.fasta>
      OR
      singularity run CRISPRCasFinder.img [options] -in <filename.fasta>

      --Please note <filename.fasta> must be in Fasta format. Please also note that when several options are called, the option "-in or -i" must precede the input FASTA file.

    General:
      -help or -h           This help
      -version or -v        The current version of the program will be displayed

    Other options:

      [Input/Output and -so]
      -in or -i [XXX]       Input Fasta file (with extensions: .fasta, .fna, .mfa, .fa)
      -outdir or -out [XXX] Output directory (if users do not use this option, a delault directory will be created wit the date and time)
      -keepAll or -keep     Option allowing to keep secondary folders/files (Prodigal/Prokka, CasFinder, rawFASTA, Properties); (default: 0)
      -LOG or -log          Option allowing to write LOG files (default: 0)
      -HTML or -html        Option allowing to display results as a static HTML web page (default value: 0). The web page created (index.html) will be dependent of a CSS file (crispr.css)
      -copyCSS [XXX]        Option allowing to copy provided CSS file into "Visualization" repository if option -HTML is set (default: '/usr/local/share/CRISPRCasFinder/crispr.css')
      -soFile or -so [XXX]  Option allowing to use the shared object file if it is not present in current directory (default: '/.singularity.d/libs/sel392v2.so')

     [Detection of CRISPR arrays]
      -mismDRs or -md [XXX] Percentage mismatchs allowed between DRs (default: 20)
      -truncDR or -t [XXX]  Percentage mismatchs allowed for truncated DR (default: 33.3)
      -minDR or -mr [XXX]   Minimal size of DRs (default: 23)
      -maxDR or -xr [XXX]   Maximal size of DRs (default: 55)
      -minSP or -ms [XXX]   Minimal size of Spacers (default: 25)
      -maxSP or -xs [XXX]   Maximal size of Spacers (default: 60)
      -noMism or -n Option used to do not allow mismatches (default value is 1 when this option is not called. i.e. mismatches are allowed by default)
      -percSPmin or -pm [XXX]       Minimal Spacers size in function of DR size (default: 0.6)
      -percSPmax or -px [XXX]       Maximal Spacers size in function of DR size (default: 2.5)
      -spSim or -s [XXX]    Maximal allowed percentage of similarity between Spacers (default: 60)
      -DBcrispr or -dbc [XXX]       Option allowing to use a CSV file of all CRISPR candidates contained in CRISPRdb (from last update) (default: '/usr/local/share/CRISPRCasFinder/CRISPR_crisprdb.csv')
      -repeats or -rpts [XXX]       Option allowing to use a consensus repeats list generated by CRISPRdb (default: '/usr/local/share/CRISPRCasFinder/Repeat_List.csv')
      -DIRrepeat or -drpt [XXX]     Option allowing to use a file file containing repeat IDs and orientation according to CRISPRDirection (default: '/usr/local/share/CRISPRCasFinder/repeatDirection.tsv')
      -flank or -fl [XXX]   Option allowing to set size of flanking regions in base pairs (bp) for each analyzed CRISPR array (default: 100)
      -levelMin or -lMin [XXX]      Option allowing to choose the minimum evidence-level corresponding to CRISPR arrays we want to display (default:1)

      [Detection of Cas clusters]
      -cas or -cs   Search corresponding Cas genes using Prokka (default kingdom: "Bacteria") and MacSyFinder (default: 0)
      -ccvRep or -ccvr      Option used to write the CRISPR-Cas vicinity report (CRISPRs and Cas) if option -cas is set (default: 0)
      -vicinity or -vi [XXX]        Option used to define number of nucleotides separating a CRISPR array from its neighboring Cas system (default: 600)
      -CASFinder or -cf [XXX]       Option allowing to use a custom CasFinder instead of using the CasFinder provided by Institut Pasteur  (default: '/usr/local/share/macsyfinder/CasFinder-2.0')
      -cpuMacSyFinder or -cpuM [XXX]        Option allowing to set number of CPUs to use for MacSyFinder (default: 1)
      -rcfowce      Option allowing to run Casfinder only when any CRISPR exists (default: 0) (set if -cas is set)
      -definition or -def [XXX]     Option allowing to specify CasFinder definition (if option -cas is set) to be more or less stringent (allowed values: 'General', 'Typing' or 'SubTyping'; default: 'General')
      -gffAnnot or -gff [XXX]       Option allowing user to provide an annotation GFF file (if options -cas and -faa are set) (default: '')
      -proteome or -faa [XXX]       Option allowing user to provide a proteome file '.faa' (if options -cas and -gff are set) (default: '')
      -cluster or -ccc [XXX]        Option allowing to constitute clusters or groups of CRISPR or Cas systemes given a determined threshold e.g. 20kb (default: 0) (set if -cas is set)
      -getSummaryCasfinder or -gscf Option allowing to get summary file of Cas-finder (MacSyFinder) and copy it to TSV repository (default: 0)
      -geneticCode or -gcode [XXX]  Option allowing to modify the genetic code (translation table) for CDS annotation (default: 11)

      [Use Prokka instead of Prodigal (default option)]
      -useProkka or -prokka Option allowing to use Prokka instead of Prodigal (default: 0)
      -cpuProkka or -cpuP [XXX]     Option allowing to set number of CPUs to use for Prokka (default: 1)
      -metagenome or -meta  Option allowing to better analyze metagenome with Prokka (default: )
      -ArchaCas or -ac      same option as -cas using "Archaea" as default kingdom instead of "Bacteria" (default: 0). Option to be used when -prokka is used.

    Options waiting for a given parameter (filename, text, or number) are followed by symbols "[XXX]". Other options could be considered as booleans (yes or no, 1 or 0).

    #####################################################

    The input file should meet these constraints:

    - the file name must not contain multiple dots (an acceptable file name is e.g. "multifasta.fna")
    - the sequence must be identified/named (the ID follows character ">", and a description could be added after a space character),
    - the ID should not contain special characters such as "|$%" or multiple dots,
    - the file must contain nucleotides (not amino acids),
    - the file could contain several sequences in FASTA format,
    - each ID must be unique,
    - the ID and the file name must not be too long,
    - the ID will be used for output.

    Examples:
    (1): ./CRISPRCasFinder.simg test.fasta
    In this example, your result folder will be in the directory named: "Result_test"

    (2): ./CRISPRCasFinder -in test.fasta -md 20 -t 33.3 -mr 23 -xr 55 -ms 25 -xs 60 -pm 0.6 -px 2.5 -s 60

    (3): ./CRISPRCasFinder -in genomes100.fna -drpt my_repeatDirection.tsv -rpts my_Repeat_List.csv -cs -fr -dbc my_CRISPR_crisprdb.csv -html

    (4): ./CRISPRCasFinder -in metagenome.fna -rcfowce -prokka -log -out Results_metagenome -cpuProkka 8 -cpuMacSyFinder 8 -meta

    (5): ./CRISPRCasFinder -in sequence.fasta -cas -log -out RES_Sequence -def G -force

%environment
    LC_ALL='C'
    export LC_ALL

%post
	export DEBIAN_FRONTEND=noninteractive
	apt-get update
	apt-get install -y apt-utils zlib1g-dev make gcc
	# dash is too restricted
	ln -nsf /bin/bash /bin/sh
    # to be runnable on tars @ Institut Pasteur
    mkdir /pasteur

    apt-get update -y
    apt-get install -y curl default-jre python perl parallel cpanminus patch wget unzip

    ###################
    # Bioinfo package #
    ###################
    apt-get install -y \
    hmmer \
    emboss emboss-lib \
    ncbi-blast+ \
    bioperl \
    bioperl-run \
    libdatetime-perl \
    libxml-simple-perl \
    libdigest-md5-perl \
    clustalw \
    muscle \
    prodigal \
    aragorn \
    infernal \

    cd /usr/bin
    ln -s clustalw2 clustalw2
    cd /

    cpanm Try::Tiny
    cpanm Test::Most
    cpanm JSON::Parse
    cpanm Date::Calc
    cpanm Class::Struct
    cpanm Bio::DB::Fasta
    cpanm File::Copy
    cpanm Bio::Seq Bio::SeqIO
    cpanm --force Bio::Tools::Run::Alignment::Clustalw
    cpanm --force Bio::Tools::Run::Alignment::Muscle

    prefix="/usr/local"

    ##########
    # vmatch #
    ##########
    PN="vmatch"
    PV="2.3.0"
    P="${PN}-${PV}"
    P_SRC=${prefix}/src/${PN}

    mkdir -p ${prefix}/src/vmatch
    cd ${prefix}/src/vmatch
    distribution='Linux_x86_64'
    vmatch="${PN}-${PV}-${distribution}-64bit"
    vmatch_url="http://vmatch.de/distributions/${vmatch}.tar.gz"
    curl -L -O --silent "${vmatch_url}"
    tar -zxf ${vmatch}.tar.gz
    cd ${vmatch}
    gcc -Wall -Werror -fPIC -O3 -shared SELECT/sel392.c -m64 -o sel392v2.so
    # copy the shared library in LD_LIBRARY_PATH
    install -m 0775 sel392v2.so /.singularity.d/libs/sel392v2.so
    cd /.singularity.d/libs/
    ln -s sel392v2.so sel392.so
    cd ${prefix}/src/${PN}/${vmatch}
    install -m 0775 vmatch ${prefix}/bin/vmatch2
    install -m 0775 vsubseqselect ${prefix}/bin/vsubseqselect2
    install -m 0775 mkvtree ${prefix}/bin/mkvtree2
    cd /

    ###############
    # macsyfinder #
    ###############
    PN="macsyfinder"
    PV="1.0.5"
    P="${PN}-${PV}"
    P_SRC=${prefix}/src/${PN}

    mkdir -p ${prefix}/src/${PN}
    cd ${prefix}/src/${PN}
    macsyfinder_url="https://dl.bintray.com/gem-pasteur/MacSyFinder/${P}.tar.gz"
    curl -L -O --silent "${macsyfinder_url}"
    tar -xzf ${P}.tar.gz
    cd ${P}
    python setup.py build
    python setup.py install
    cd /

    #######################
    # prokka dependencies #
    #######################

    ###########
    # signalp #
    ###########

    # Cannot be installed due to Licensing problem.

    ###########
    # tbl2asn #
    ###########
    # trusty package ncbi-tools-bin provide a too old tbl2asn
    PN="tbl2asn"
    PV="1.12"
    P="${PN}-${PV}"
    P_SRC=${prefix}/src/${PN}

    mkdir -p ${P_SRC}
    cd ${prefix}/src/tbl2asn
    tbl2asn_url="ftp://ftp.ncbi.nih.gov/toolbox/ncbi_tools/converters/by_program/${PN}/linux64.${PN}.gz"
    wget "${tbl2asn_url}"
    gunzip linux64.tbl2asn.gz
    install -m 0755 linux64.tbl2asn ${prefix}/bin/${PN}

    ##########
    # prokka #
    ##########
    PN="prokka"
    PV="1.12"
    P="${PN}-${PV}"
    P_SRC=${prefix}/src/${PN}

    mkdir -p ${P_SRC}
    cd ${P_SRC}

    prokka_url="http://www.vicbioinformatics.com/${P}.tar.gz"
    curl -L -O --silent "${prokka_url}"
    tar -xzf ${P}.tar.gz
    cd ${P}

    prokka_data=${prefix}/share/${PN}
    prokka_db=${prokka_data}/db
    test -d ${prokka_db} || mkdir -p ${prokka_db}
    # copy database
    cp -pr db/* ${prokka_db}

    # tell prokka where to find its tools and db once installed
    sed -i -e "s|my \$BINDIR.*|my \$BINDIR=\"${prefix}/libexec/prokka\";|" \
           -e "s|my \$DBDIR.*|my \$DBDIR=\"${prokka_db}\";|" \
           bin/prokka

    for bin in bin/*;
    do
        install -m 0755 ${bin} ${prefix}/bin/
    done

    # install prokka binaries
    test -d ${prefix}/libexec/${PN} || mkdir -p ${prefix}/libexec/${PN}

    for p in binaries/linux/*;
    do
        install -m 0755 ${p} ${prefix}/libexec/${PN}
    done
    # parallel is installed via packet manager
    install -m 0755 binaries/common/minced ${prefix}/libexec/${PN}/
    install -m 0644 binaries/common/minced.jar ${prefix}/libexec/${PN}/

    # setup prokka db
    prokka_cmd="${prefix}/bin/${PN}"

    ${prokka_cmd} --setupdb
    cd /

    ###################
    # CRISPRCasFinder #
    ###################
    PN="CRISPRCasFinder"
    PV="4.2.18"
    P="${PN}-${PV}"

    test -d "${prefix}/src/${PN}" || mkdir -p "${prefix}/src/${PN}"
    cd "${prefix}/src/${PN}"

    cripsr_cas_url="https://github.com/bneron/${PN}/archive/master.zip"
    curl -L -o "${PN}.zip" --silent "${cripsr_cas_url}"

    unzip "${PN}.zip"
    mv "${PN}-master" "${PN}"

    cd "${PN}"
    crispr_data="${prefix}/share/${PN}"
    test -d "${crispr_data}" || mkdir "${crispr_data}"

    patch CRISPRCasFinder.pl CRISPRCasFinder.patch

    install -m 0755 CRISPRCasFinder.pl ${prefix}/bin/CRISPRCasFinder
    install -m 0644 supplementary_files/crispr.css ${crispr_data}
    install -m 0644 supplementary_files/Repeat_List.csv ${crispr_data}
    install -m 0644 supplementary_files/CRISPR_crisprdb.csv ${crispr_data}
    install -m 0644 supplementary_files/repeatDirection.tsv ${crispr_data}

    #############
    # CasFinder #
    #############
    # use the CasFinder distributed with CRISPRCasFinder
    cas_data="${prefix}/share/macsyfinder/"
    # remove profiles and definitions packaged with macsyfinder
    rm -R ${cas_data}DEF/*
    rm -R ${cas_data}profiles
    # install cas profiles and definition packaged with CRISPRCasFinder
    cp -r CasFinder-2.0.2/definitions/* ${cas_data}/DEF/
    cp -r CasFinder-2.0.2/profiles ${cas_data}
    cd /


%test
    stamp=$(date '+%Y-%m-%d-%H:%M:%S')
    result_dir="/tmp/test_CRISPRCasFinder_${stamp}"
    prefix=/usr/local
    crispr_cas_src="${prefix}/src/CRISPRCasFinder/CRISPRCasFinder/"

    CRISPRCasFinder -def General -cas -i "${crispr_cas_src}/install_test/sequence.fasta" -out "${result_dir}" -keep

    returncode=$?
    if [ ${returncode} -ne 0 ];
    then
        echo "Test failed see ${result_dir} for details."
        exit ${returncode}
    fi

    for f in "Cas_REPORT.tsv" "Crisprs_REPORT.tsv";
    do
        diff "${crispr_cas_src}/install_test/${f}" "${result_dir}/TSV/${f}"
        returncode=$?
        if [ ${returncode} -ne 0 ];
        then
            echo "Test failed see ${result_dir} for details."
            exit ${returncode}
        fi
    done

    rm -Rf "${result_dir}"
    exit 0

%runscript
    exec /usr/local/bin/CRISPRCasFinder "$@"

Collection

Name: bneron/CRISPRCasFinder
License: Other

View on Datalad

Metrics

key	value
id	/containers/bneron-CRISPRCasFinder-latest
collection name	bneron/CRISPRCasFinder
branch	master
tag	latest
commit	d45799276dfb0a8b1e6394206f54b8182775a892
version (container hash)	eab8327f8bf135e25bc0229979e4ebf2
build date	2020-12-31T14:33:44.826Z
size (MB)	2884
size (bytes)	845594655
SIF	Download URL (please use pull with shub://)
Datalad URL	View on Datalad
Singularity Recipe	Singularity Recipe on Datalad

We cannot guarantee that all containers will still exist on GitHub.

Feedback

Was this page helpful?

Glad to hear it! Please tell us how we can improve.

Sorry to hear that. Please tell us how we can improve.