#!/bin/bash #PBS -S /bin/bash #PBS -N AF2 #PBS -o $PBS_JOBID.out #PBS -e $PBS_JOBID.err #Half node for a sequence < 1000 residues #Full node otherwise and for the multimer version. #PBS -l nodes=1:ppn=16 #PBS -l walltime=24:00:00 #PBS -A simlab_project #PBS -q alphafold_1n #script version 20.10.2022 ### FOR EVERYTHING BELOW, I ADVISE YOU TO MODIFY THE USER-part ONLY ### WORKDIR="/" NUM_NODES=$(cat $PBS_NODEFILE|uniq|wc -l) if [ ! -n "$PBS_O_HOME" ] || [ ! -n "$PBS_JOBID" ]; then echo "At least one variable is needed but not defined. Please touch your manager about." exit 1 else if [ $NUM_NODES -le 1 ]; then WORKDIR+="scratch/" export WORKDIR+=$(echo $PBS_O_HOME |sed 's#.*/\(home\|workdir\)/\(.*_team\)*.*#\2#g')"/$PBS_JOBID/" mkdir $WORKDIR rsync -ap $PBS_O_WORKDIR/ $WORKDIR/ # if you need to check your job output during execution (example: each hour) you can uncomment the following line # /shared/scripts/ADMIN__auto-rsync.example 3600 & else export WORKDIR=$PBS_O_WORKDIR fi fi echo "your current dir is: $PBS_O_WORKDIR" echo "your workdir is: $WORKDIR" echo "number of nodes: $NUM_NODES" echo "number of cores: "$(cat $PBS_NODEFILE|wc -l) echo "your execution environment: "$(cat $PBS_NODEFILE|uniq|while read line; do printf "%s" "$line "; done) cd $WORKDIR # If you're using only one node, it's counterproductive to use IB network for your MPI process communications if [ $NUM_NODES -eq 1 ]; then export PSM_DEVICES=self,shm export OMPI_MCA_mtl=^psm export OMPI_MCA_btl=shm,self else # Since we are using a single IB card per node which can initiate only up to a maximum of 16 PSM contexts # we have to share PSM contexts between processes # CIN is here the number of cores in node CIN=$(cat /proc/cpuinfo | grep -i processor | wc -l) if [ $(($CIN/16)) -ge 2 ]; then PPN=$(grep $HOSTNAME $PBS_NODEFILE|wc -l) if [ $CIN -eq 40 ]; then export PSM_SHAREDCONTEXTS_MAX=$(($PPN/4)) elif [ $CIN -eq 32 ]; then export PSM_SHAREDCONTEXTS_MAX=$(($PPN/2)) else echo "This computing node is not supported by this script" fi echo "PSM_SHAREDCONTEXTS_MAX defined to $PSM_SHAREDCONTEXTS_MAX" else echo "no PSM_SHAREDCONTEXTS_MAX to define" fi fi function get_gpu-ids() { if [ $PBS_NUM_PPN -eq $(cat /proc/cpuinfo | grep -cE "^processor.*:") ]; then echo "0,1" && return fi if [ -e /dev/cpuset/torque/$PBS_JOBID/cpus ]; then FILE="/dev/cpuset/torque/$PBS_JOBID/cpus" elif [ -e /dev/cpuset/torque/$PBS_JOBID/cpuset.cpus ]; then FILE="/dev/cpuset/torque/$PBS_JOBID/cpuset.cpus" else FILE="" fi if [ -e $FILE ]; then if [ $(cat $FILE | sed -r 's/^([0-9]).*$/\1/') -eq 0 ]; then echo "0" && return else echo "1" && return fi else echo "0,1" && return fi } gpus=$(get_gpu-ids) ## USER Part module load gcc/8.3.0 module load miniconda-py3/latest conda activate alphafold nb_cores=$(cat $PBS_NODEFILE|wc -l) #Run cd $WORKDIR/ AF2_path="/scratch-nv/software/alphafold/alphafold_repo" AF2_db_path="/scratch-nv/software/alphafold/alphafold_db" d1=`date +%s` echo $(date) # Use either one by uncommenting the command line. # Just change the name of the fasta file. #Monomer #bash ${AF2_path}/run_alphafold.sh -n $nb_cores -a $gpus -d ${AF2_db_path} -o . -f myfasta.fasta -t 2021-12-01 #Multimer #bash ${AF2_path}/run_alphafold.sh -n $nb_cores -a $gpus -d ${AF2_db_path} -o . -f sequences.fasta -t 2021-11-01 -m multimer d2=$(date +%s) echo $(date) diff=$((($d2 - $d1)/60)) echo "Time spent (min) : ${diff}" ## DO NOT MODIFY THE PART OF SCRIPT: you will be accountable for any damage you cause # At the term of your job, you need to get back all produced data synchronizing workdir folder with you starting job folder and delete the temporary one (workdir) if [ $NUM_NODES -le 1 ]; then cd $PBS_O_WORKDIR rsync -ap $WORKDIR/ $PBS_O_WORKDIR/ rm -rf $WORKDIR fi ## END-DO