#!/bin/bash
#$ -S /bin/bash 


#########################################################################
#                                                                       #
# morePhyML : a BASH script to improve ML tree search with PhyML 3.*    #
  VERSION=1.14                                                         
# Copyright (C) 2010  Alexis Criscuolo                                  #
#                                                                       #
# morePhyML is free software;  you can redistribute it and/or modify    #
# it under the terms of the  GNU General Public License as published    #
# by the Free Software Foundation;  either version 2 of the License,    #
# or (at your option) any later version.                                #
#                                                                       #
# morePhyML is  distributed in the hope  that it will be useful, but    #
# WITHOUT  ANY  WARRANTY;  without  even  the  implied  warranty  of    #
# MERCHANTABILITY or  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU    #
# General Public License for more details.                              #
#                                                                       #
# You should have received  a copy of the GNU General Public License    #
# along  with  this program;  if  not,  write to the  Free  Software    #
# Foundation Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307    #
#                                                                       # 
# Contact:                                                              #
# > before 2011                                                         #
#  Unité de Biologie Moléculaire du Gène chez les Extrêmophiles         #
#  Département de Microbiologie                                         #
#  INSTITUT  PASTEUR                                                    #
#  25 rue du Dr Roux - 75015 Paris  (France)                            #
# > since 2011                                                          #
#  PF8 - Génotypage des Pathogènes et Santé Publique                    #
#  INSTITUT  PASTEUR                                                    #
#  28 rue du Dr Roux - 75724 Paris  (France)                            #
#                                                                       #
# alexis.criscuolo@pasteur.fr                                           #
#                                                                       #
#########################################################################



#########################################################################
#                                                                       #
# ================                                                      #
# = INTRODUCTION =                                                      #
# ================                                                      #
#                                                                       #
# The  ML tree  reconstruction  is performed  here with the software    #
# PhyML (Guindon and Gascuel 2003; Guindon et al. 2010). PhyML first    #
# infers an initial tree  (i.e. BioNJ tree, MP tree, random tree, or    #
# user-defined tree)  that is  secondly used  as starting tree  in a    #
# heuristic local search in order to optimize the ML criterion.         #
# As  the accuracy  of  a  phylogenetic tree inference  based  on  a    #
# heuristic local search  is  highly dependant  on both the starting    #
# tree  (Criscuolo et  al. 2006)  and the  size of  the neighborhood    #
# (e.g., NNI, SPR;  see  Swofford et al. 1996), morePhyML  has  been    #
# implemented to  improve the  ML tree  search with  PhyML.  A first    #
# phylogenetic tree  is inferred  by PhyML.  Secondly,  this tree is    #
# used as  starting tree to  perform another NNI-based  local search    #
# but on  a bootstrap replicate  of the multiple sequence alignment;    #
# this last 'noisy' tree is then used as  starting  tree  to perform    #
# a new  NNI-based  tree search  on the  initial  multiple  sequence    # 
# alignment.  This ratchet loop (i.e., NNI-based ML tree search on a    # 
# bootstrap replicate of the initial multiple alignment, followed by    # 
# another NNI-based ML tree search) is iterated as long as it allows    # 
# trees  with  better  log-likelihood  value  to  be  reached.  This    #
# approach,  closely related  to the ratchet technique  (Nixon 1999;    # 
# see also  Morrison 2007),  often allows the ML criterion (and  the    #
# corresponding  phylogenetic tree)  to be improved.  Finally,  when    #
# the ratchet loop  does not allow  the ML criterion  to be improved    #
# further, a last (and slower) SPR-based tree search is performed to    #
# escape from a possible local optimum.                                 #
#                                                                       #
# Criscuolo A,  Berry V,  Douzery EJP,  Gascuel O (2006) SDM: a fast    #
#      distance-based   approach   for   (super)tree   building   in    #
#      phylogenomics. Syst Biol 55:740–755                              #
# Gascuel O  (1997)  BIONJ:  an improved version of the NJ algorithm    #
#     based  on  a  simple  model  of  sequence  data. Mol Biol Evol    #
#     14:685–695                                                        #
# Guindon S, Gascuel O  (2003) A simple, fast and accurate method to    #
#     estimate large phylogenies  by  maximum-likelihood.  Syst Biol    #
#     52:696–704                                                        #
# Guindon  S,  Dufayard  JF,  Lefort  V,  Anisimova  M,  Hordijk  W,    #
#     Gascuel O  (2010)  New  algorithms  and  methods  to  estimate    #
#     maximum-likelihood phylogenies:  assessing  the performance of    #
#     PhyML 3.0. Syst Biol 59:307-321.                                  #
# Morrison DA  (2007)  Increasing the efficiency of searches for the    #
#     Maximum  Likelihood  tree in a phylogenetic  analysis of up to    #
#     150 nucleotide sequences. Syst Biol 56:988-1010                   #
# Nixon KC  (1999)  The  parsimony ratchet,  a  new method for rapid    #
#     parsimony analysis. Cladistics 15:407–414                         #
# Swofford DL, Olsen GJ, Waddell PJ, Hillis DM  (1996)  Phylogenetic    #
#     inference.  In:  Molecular Systematics.  Edited by  Hillis DM,    #
#     Moritz C, Mable BK. Sinauer Associates, Massachussets, 407-509    #
#                                                                       #
#########################################################################



#########################################################################
#                                                                       #
# ================                                                      #
# = INSTALLATION =                                                      #
# ================                                                      #
#                                                                       #
# Prior to any launch,  first verify here that the path to the PhyML    #
# binary on your computer is correct:                                   #

                                                        ################################################
                                                        ################################################
  PHYML="./phyml_3.0_linux32";                          ## <=== WRITE HERE THE PATH TO THE PHYML      ##
                                                        ##      BINARY (VERSION 3.0 MINIMUM)          ##
                                                        ################################################
                                                        ################################################

# Secondly,  give the execute permission on the script  morePhyML.sh    #
# by using the following command:                                       #
#                                                                       #
#   chmod +x morePhyML.sh                                               #
#                                                                       #
#########################################################################



#########################################################################
#                                                                       #
# =============                                                         #
# = EXECUTION =                                                         #
# =============                                                         #
#                                                                       #
# You can launch morePhyML with the following command:                  #
#                                                                       #
#   ./morePhyML.sh [options]                                            #
#                                                                       #
#                                                                       #
# Most of the  options in  morePhyML  (e.g., -i, -d, -q, -m, -f, -t,    #
# -v, -c, -a, and -u)  are  identical  to  those  available  in  the    #
# command-line  interface of  PhyML:                                    #
#                                                                       #
#   -i <file>         input file name (no absolute path name)           #
#                                                                       #
#   -d <nt|aa>        'nt' for  nucleotide sequences  (default)  or     #
#                     'aa' for amino acid sequences                     #
#                                                                       #
#   -q                sequential alignment (default is interleaved)     #
#                                                                       #
#   -m <model>        nt:  GTR (default), JC69, K80, F81, F84, TN93,    #
#                          HKY85, K81uf, TIM, TVM ...                   #
#                     aa:  LG (default),  WAG, JTT,  MtREV, Dayhoff,    #
#                          VT, DCMut, RtREV, CpREV, Blosum62, MtMam,    #
#                          MtArt, HIVw, HIVb                            #
#                     (see below the list of all evolutionary models)   #
#                                                                       #
#   -f <e|m>          character frequencies:  <e>mpirical, estimated    #
#                     by <m>l (for nt) or from the <m>odel (for aa),    #
#                     or set by the user (for nt) (m by default)        #
#                                                                       #
#   -t <e|real>       ts/tv ratio (<e>stimated by default; only with    #
#                     the nt models K80, HKY85 and TN93)                #
#                                                                       #
#   -v <e|real>       proportion of invariable sites (0 by default)     #
#                                                                       #
#   -c <integer>      the  number  of   relative  substitution  rate    #
#                     categories (1 by default; must be positive)       #
#                                                                       #
#   -a <e|real>       alpha parameter  (<e>stimated by  default when    #
#                     option -c is set)                                 #
#                                                                       #
#   -u <file>         user tree file name                               #
#                                                                       #
#   -p                MP starting tree                                  #
#                                                                       #
#   -n <integer>      number of random starting trees to be used        #
#                                                                       #
#   -s <NNI|SPR|BEST> tree topology search first performed: NNI, SPR    #
#                     (default), or the best of NNI and SPR searches    #
#                     (BEST). NNIs are fast,  whereas SPRs are a bit    #
#                     slower but give better results.                   #
#                                                                       #
#   -z <nt|aa>        to launch morePhyML with  BioNJ starting tree,    #
#                     first ML tree search using  SPR tree swapping,    #
#                     and standard evolutionary models, i.e             # 
#                          nt: GTR+Gamma4+I+F,  aa: LG+Gamma4+I         #
#                                                                       #
#   -b <0|-1|-2>      branch support:  no confidence value (0), aLRT    #
#                     statistics  (-1),   or  khi2-based  parametric    #
#                     supports  (-2).   By default,  SH-like  branch    #
#                     supports are computed.                            #
#                                                                       #
#   -x                this  option  allows  saving  the  first  tree    #
#                     (the tree outputed by  phyml before performing    #
#                     the ratchet technique)                            #
#                                                                       #
#   -l                to write the likelihood for each character        #
#                                                                       #
#   -e <string>       extension name for output files (default: txt)    #
#                                                                       #
#   -?                to get help                                       #
#                                                                       #
#                                                                       #
# For more details, see the morePhyML documentation at:                 #
#   ftp://ftp.pasteur.fr/pub/gensoft/projects/morePhyML/                #
# See also the PhyML documentation at:                                  #
#   http://www.atgc-montpellier.fr/phyml/usersguide.php?type=command    #
#                                                                       #
# Given a data file 'INFILE',  morePhyML creates different temporary    #
# files:                                                                #
#   - INFILE_launcher.sh                                                #
#   - INFILE_starting_tree.txt                                          #
#   - INFILE_phyml_tree.txt                                             #
#   - INFILE_phyml_stats.txt                                            #
#   - INFILE_phyml_boot_trees.txt                                       #
#   - INFILE_phyml_boot_stats.txt                                       #
#   - INFILE_phyml_rand_trees.txt.                                      #
# All the temporary files  are deleted  when morePhyML ends  without    #
# error, and results are written into two output files:                 #
#   - INFILE_morephyml_tree.txt                                         #
#   - INFILE_morephyml_stats.txt.                                       #
# Two other output files are written when the option -x is set:         #
#   - INFILE_phyml_tree.txt                                             #
#   - INFILE_phyml_stats.txt.                                           #
# One other output file is written when the option -l is set:           #
#   - INFILE_morephyml_lk.txt.                                          #
#                                                                       #
#########################################################################







###############################
##### beginning morePhyML #####
###############################
if [ ! -e $PHYML ]; then echo "   problem with phyml binary: '$PHYML' does not exist" ; exit ; fi
PHYML="echo | $PHYML";

if [ "$1" = "-?" ] || [ $# -le 1 ]
then
  echo "" ;
  echo " morePhyML v.$VERSION" ;
  echo "" ;
  echo " Please cite this paper if you use this software in your publications:" ;
  echo "   Criscuolo A (2011) morePhyML: improving the phylogenetic tree space exploration" ;
  echo "   with PhyML 3. Molecular Phylogenetics and Evolution (in press)"
  echo "" ;
  echo " USAGE :" ;
  echo "    ./morePhyML.sh [options]" ;
  echo "  where 'options' are :" ;
  echo "   -i <file>          input file name" ;   
  echo "   -d <nt|aa>         'nt' for nucleotide (default) or 'aa' for amino acid" ;   
  echo "   -q                 sequential alignment (default is interleaved)" ;
  echo "   -m <model>         nt: JC69,F81,K80,HKY85,F84,TN93e,TN93,TPM1e,K81,TPM1u,TPM2e,TPM2u," ;
  echo "                          TPM3e,TPM3u,TIM1e,TIM1u,TIM2e,TIM2u,TIM3e,TIM3u,TVMe,TVMu," ;
  echo "                          SYM,GTR(default)" ;
  echo "                      aa: LG(default),WAG,JTT,MtREV,Dayhoff,DCMut,RtREV,CpREV,VT," ;
  echo "                          Blosum62,MtMam,MtArt,HIVw,HIVb" ;
  echo "   -f <e|m>           equilibrium frequencies: <e>mpirical, or estimated by <m>l";
  echo "                      for nt or from the <m>odel for aa (m by default)" ;
  echo "   -t <e|real>        ts/tv ratio (<e>stimated by default with nt only)" ;
  echo "   -v <e|real>        proportion of invariable sites (0 by default)" ;
  echo "   -c <integer>       number of relative substitution rate categories (1 by default)" ;
  echo "   -a <e|real>        alpha parameter (<e>stimated by default when option -c is set)" ;
  echo "   -u <file>          user tree file name" ;
  echo "   -p                 most parsimonious starting tree" ;
  echo "   -n <integer>       number of random starting trees to be used" ;
  echo "   -s <NNI|SPR|BEST>  first tree swapping: NNI, SPR (default), or both (BEST)" ;
  echo "   -b <0|-1|-2>       branch support: none (0), aLRT (-1), or khi2-based parametric";
  echo "                      supports (-2); by default, SH-like branch supports are computed" ;
  echo "   -z <nt|aa>         to launch morePhyML with BioNJ starting tree, first ML tree search" ;
  echo "                      using SPR tree swapping, and standard evolutionary models, i.e " ;
  echo "                      nt: -m GTR -f m -c 4 -a e -v e,   aa: -m LG -f m -c 4 -a e -v e" ;
  echo "   -l                 to write the likelihood for each character";
  echo "   -x                 to write the results outputed by the first run of phyml";
  echo "   -e                 to set a new output filename extension (default: txt)";
  echo "" ;
  echo "  Examples :" ;
  echo "    Both are equivalent for nucleotide sequences:" ;
  echo "      ./morePhyML.sh -i input.phy -d nt -q -m GTR -f m -v e -c 4 -a e -s SPR" ;
  echo "      ./morePhyML.sh -i input.phy -q -z nt" ;
  echo "    Both are equivalent for amino acid sequences:" ;
  echo "      ./morePhyML.sh -i input.phy -d aa -q -m LG -f m -v e -c 4 -a e -s SPR" ;
  echo "      ./morePhyML.sh -i input.phy -q -z aa" ;
  echo "" ;
  exit ;
fi



########################################
########################################
##### ACCEPTED EVOLUTIONARY MODELS #####
########################################
########################################
MODELS=(JC69 F81 K80 HKY85 F84 TN93e TN93 TPM1e K81 TPM1u TPM2e TPM2u TPM3e TPM3u TIM1e TIM1u TIM2e TIM2u TIM3e TIM3u TVMe TVMu SYM GTR  
        LG WAG JTT MtREV Dayhoff DCMut RtREV CpREV VT Blosum62 MtMam MtArt HIVw HIVb);






###########################
##### parsing options #####
###########################
opt_i="null"; opt_d="null"; opt_q="";     
opt_m="null"; opt_f="null"; opt_t="null"; opt_v="null"; opt_c="null"; opt_a="null"; 
opt_u="null"; opt_n="null"; opt_p="false"; opt_s="null"; 
opt_b="null"; 
opt_x="false"; opt_l="false"; 
opt_z="null"; opt_e="txt";
while getopts i:d:m:f:t:v:c:a:b:s:u:n:z:e:qxpl option
do
  case $option in
  i)
    opt_i=$OPTARG;
    if [ ! -e $opt_i ]; then echo "   problem with the input file (option -i) '$opt_i' does not exist" ; exit ; fi
   ;;
  d)
    opt_d=$OPTARG;
    if [ "$opt_d" != "nt" ] && [ "$opt_d" != "aa" ]; 
    then echo "   option -d must be set to 'nt' or 'aa'" ; exit ; fi
   ;;
  q)
    opt_q="-q"
   ;;
  m) 
    opt_m=$OPTARG;
    i=0; size=${#MODELS[@]};
    while [ $i -lt $size ]; do if [ "$opt_m" = "${MODELS[$i]}" ]; then break; else i=$(( $i + 1 )); fi; done
    if [ $i -eq $size ]; then echo "   incorrect substitution model (option -m)" ; exit ; fi
   ;;
  f)
    opt_f=$OPTARG
   ;;
  t)
    opt_t=$OPTARG
   ;;
  v)
    opt_v=$OPTARG
    if [ "$opt_v" != "e" ] && [ $opt_v -lt 0 ] && [ $opt_v -gt 1 ]
    then echo "   the proportion of invariable sites must range from 0 to 1 (option -v)" ; exit ; fi
   ;;
  c)
    opt_c=$OPTARG
    if [ $opt_c -lt 1 ]
    then echo "   the number of relative substitution rate categories must be a positive integer (option -c)" ; exit ; fi
   ;;
  a)
    opt_a=$OPTARG
    if [ "$opt_a" != "e" ] && [ $opt_a -le 0 ]
    then echo "   the alpha parameter must be positive (option -a)" ; exit ; fi
   ;;
  b)
    opt_b=$OPTARG
    if [ "$opt_b" != "0" ] && [ "$opt_b" != "-1" ] && [ "$opt_b" != "-2" ] 
    then echo "   the type of branch support must be a negative integer (option -b)" ; exit ; fi
   ;;
  s)
    opt_s=$OPTARG
    if [ "$opt_s" != "NNI" ] && [ "$opt_s" != "SPR" ] && [ "$opt_s" != "BEST" ]
    then echo "   tree swapping must be NNI, SPR or BEST (option -s)" ; exit ; fi
   ;;
  u)
    opt_u=$OPTARG
    if [ ! -e $opt_u ]; then echo "   problem with the user tree file (option -u) '$opt_u' does not exist" ; exit ; fi
   ;;
  p)
    opt_p="true";
   ;;
  n)
    opt_n=$OPTARG
    if [ $opt_n -lt 1 ]
    then echo "   the number of random starting trees must be a positive integer (option -n)" ; exit ; fi
   ;;
  x)
    opt_x="true";
   ;;
  l)
    opt_l="true";
   ;;
  z)
    opt_z=$OPTARG
    if [ "$opt_z" = "nt" ]
    then
      opt_d="nt"; opt_m="GTR"; opt_f="m"; opt_v="e"; opt_c=4; opt_a="e"; opt_s="SPR";
    elif [ "$opt_z" = "aa" ]
    then
      opt_d="aa"; opt_m="LG"; opt_f="m"; opt_v="e"; opt_c=4; opt_a="e"; opt_s="SPR";
    else
      echo "   option -z must be set to 'nt' or 'aa'" ; exit ; 
    fi
   ;;
  e)
    opt_e=$OPTARG
   ;;
 esac
done

###########################
##### default options #####
###########################
if [ "$opt_d" = "null" ]; then opt_d="nt"; fi                                                     ##### -d nt        #####
if [ "$opt_m" = "null" ]; then if [ "$opt_d" = "nt" ]; then opt_m="GTR"; else opt_m="LG"; fi; fi  ##### -d nt -m GTR #####
                                                                                                  ##### -d aa -m LG  #####
if [ "$opt_v" = "null" ]; then opt_v=0; fi                                                        ##### -v 0         #####
if [ "$opt_c" = "null" ]; then opt_c=1; else if [ "$opt_a" = "null" ]; then opt_a="e"; fi; fi     ##### -c 1         #####
                                                                                                  ##### -c * -a e    #####
if [ "$opt_s" = "null" ]; then opt_s="SPR"; fi                                                    ##### SPR swapping #####
if [ "$opt_b" = "null" ]; then opt_b=-4; fi                                                       ##### -b -4        #####

##########################################
##### building option line for phyML #####
##########################################
if [ "$opt_d" = "nt" ]
then
  if [ "$opt_m" = "JC69" ];  then                 opt_f="null";                                                               fi
  if [ "$opt_m" = "F81" ];   then                                              if [ "$opt_f" = "null" ]; then opt_f="m"; fi;  fi
  if [ "$opt_m" = "K80" ];   then                 opt_f="null";                                                               fi
  if [ "$opt_m" = "HKY85" ]; then                                              if [ "$opt_f" = "null" ]; then opt_f="m"; fi;  fi
  if [ "$opt_m" = "F84" ];   then                                              if [ "$opt_f" = "null" ]; then opt_f="m"; fi;  fi
  if [ "$opt_m" = "TN93e" ]; then opt_m="010020"; opt_f="0.25,0.25,0.25,0.25";                                                fi
  if [ "$opt_m" = "TN93" ];  then                                              if [ "$opt_f" = "null" ]; then opt_f="m"; fi;  fi
  if [ "$opt_m" = "TPM1e" ]; then opt_m="012210"; opt_f="0.25,0.25,0.25,0.25";                                                fi
  if [ "$opt_m" = "K81" ];   then opt_m="012210"; opt_f="0.25,0.25,0.25,0.25";                                                fi
  if [ "$opt_m" = "TPM1u" ]; then opt_m="012210";                              if [ "$opt_f" = "null" ]; then opt_f="m"; fi;  fi
  if [ "$opt_m" = "TPM2e" ]; then opt_m="010212"; opt_f="0.25,0.25,0.25,0.25";                                                fi
  if [ "$opt_m" = "TPM2u" ]; then opt_m="010212";                              if [ "$opt_f" = "null" ]; then opt_f="m"; fi;  fi
  if [ "$opt_m" = "TPM3e" ]; then opt_m="012012"; opt_f="0.25,0.25,0.25,0.25";                                                fi
  if [ "$opt_m" = "TPM3u" ]; then opt_m="012012";                              if [ "$opt_f" = "null" ]; then opt_f="m"; fi;  fi
  if [ "$opt_m" = "TIM1e" ]; then opt_m="012230"; opt_f="0.25,0.25,0.25,0.25";                                                fi
  if [ "$opt_m" = "TIM1u" ]; then opt_m="012230";                              if [ "$opt_f" = "null" ]; then opt_f="m"; fi;  fi
  if [ "$opt_m" = "TIM2e" ]; then opt_m="010232"; opt_f="0.25,0.25,0.25,0.25";                                                fi
  if [ "$opt_m" = "TIM2u" ]; then opt_m="010232";                              if [ "$opt_f" = "null" ]; then opt_f="m"; fi;  fi
  if [ "$opt_m" = "TIM3e" ]; then opt_m="012032"; opt_f="0.25,0.25,0.25,0.25";                                                fi
  if [ "$opt_m" = "TIM3u" ]; then opt_m="012032";                              if [ "$opt_f" = "null" ]; then opt_f="m"; fi;  fi
  if [ "$opt_m" = "TVMe" ];  then opt_m="012314"; opt_f="0.25,0.25,0.25,0.25";                                                fi
  if [ "$opt_m" = "TVMu" ];  then opt_m="012314";                              if [ "$opt_f" = "null" ]; then opt_f="m"; fi;  fi
  if [ "$opt_m" = "SYM" ];   then opt_m="012345"; opt_f="0.25,0.25,0.25,0.25";                                                fi
  if [ "$opt_m" = "GTR" ];   then                                              if [ "$opt_f" = "null" ]; then opt_f="m"; fi;  fi
fi
if [ "$opt_d" = "aa" ]
then
  if [ "$opt_f" = "null" ]; then opt_f="m"; fi
fi
options="-i $opt_i $opt_q -d $opt_d -m $opt_m -v $opt_v";
if [ "$opt_t" != "null" ]; then options="$options -t $opt_t"; fi
if [ "$opt_f" != "null" ]; then options="$options -f $opt_f"; fi
if [ $opt_c -eq 1 ]; then options="$options -c $opt_c"; else options="$options -c $opt_c -a $opt_a"; fi
echo $options ;
############################################
##### initializing variables and files #####
############################################
TREE_START=$opt_i"""_starting_tree.$opt_e";
TREE_BEST=$opt_i"""_morephyml_tree.$opt_e";
STAT_BEST=$opt_i"""_morephyml_stats.$opt_e";
LK_BEST=$opt_i"""_morephyml_lk.$opt_e";

FIRST_TREE=$opt_i"""_phyml_first_tree.$opt_e";
FIRST_STAT=$opt_i"""_phyml_first_stats.$opt_e";

OUTPUT_TREE=$opt_i"""_phyml_tree.txt";
OUTPUT_STAT=$opt_i"""_phyml_stats.txt";
OUTPUT_LK=$opt_i"""_phyml_lk.txt";
OUTPUT_TREE_BOOT=$opt_i"""_phyml_boot_trees.txt";
OUTPUT_STAT_BOOT=$opt_i"""_phyml_boot_stats.txt";
OUTPUT_TREE_RAND=$opt_i"""_phyml_rand_trees.txt";

LAUNCHER=$opt_i"""_launcher.sh";

for file in $TREE_START $TREE_BEST $STAT_BEST $LK_BEST $FIRST_TREE $FIST_STAT $OUTPUT_TREE $OUTPUT_STAT $OUTPUT_TREE_BOOT $OUTPUT_STAT_BOOT $OUTPUT_TREE_RAND $LAUNCHER 
do
  if [ "$opt_u" = "$file" ]
  then
    echo "   problem with the user tree file (option -u) '$opt_u' is an incorrect file name" ; 
    echo "   rename this file and avoid following file names : " ;
    for file in $TREE_START $TREE_BEST $STAT_BEST $LK_BEST $FIRST_TREE $FIST_STAT $OUTPUT_TREE $OUTPUT_STAT $OUTPUT_TREE_BOOT $OUTPUT_STAT_BOOT $OUTPUT_TREE_RAND $LAUNCHER 
    do
      echo "     $file" ;
    done    
    exit
  fi
  if [ -e $file ]; then rm $file ; fi
done

echo $EOF > $LAUNCHER ;
                                                  ###################################################
                                                  ###################################################
RUN_PHYML="/bin/sh $LAUNCHER";                    ## <=== PHYML LAUNCHER: MODIFY THIS LINE WHEN A  ##
                                                  ##      SPECIAL COMMAND IS NEEDED TO RUN PHYML   ##
                                                  ###################################################
                                                  ###################################################
###################################
##### inferring starting tree #####
###################################
STEP=0; 
if [ "$opt_n" != "null" ]
then
  opt_s="SPR";

  echo "" ; echo "" ; echo "" ; 
  echo "oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo" ;
  echo "oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo" ;
  if [ $opt_n -eq 1 ]
  then echo "  iteration $STEP : random starting tree ($opt_s moves)" ;
  else echo "  iteration $STEP : $opt_n random starting trees ($opt_s moves)" ;
  fi
  echo "oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo" ;
  echo "oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo" ;

  runline="$PHYML $options -b 0 --rand_start --n_rand_starts $opt_n --r_seed $STEP -s $opt_s";
  echo "$runline ;" > $LAUNCHER ;

  $RUN_PHYML ;
  if [ ! -e $OUTPUT_TREE ] || [ -z "$(head -1 $OUTPUT_TREE)" ]
  then echo ; echo "  problem with phyml binary:" ; echo "  $PHYML" | cut -c9- ; exit ; fi
  if [ ! -e $OUTPUT_STAT ] || [ -z "$(grep -m 1 -F "Log-likelihood" $OUTPUT_STAT)" ]
  then echo ; echo "  problem with phyml binary:" ; echo "  $PHYML" | cut -c9- ; exit ; fi

  mv $OUTPUT_TREE $TREE_BEST ; rm $OUTPUT_STAT $OUTPUT_TREE_RAND ;

  echo "$PHYML $options -b 0 -u $TREE_BEST -olr --r_seed $STEP ;" > $LAUNCHER ;

elif [ "$opt_p" == "true" ]
then
  echo "" ; echo "" ; echo "" ; 
  echo "oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo" ;
  echo "oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo" ;
  echo "  iteration $STEP : MP starting tree ($opt_s moves)" ;
  echo "oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo" ;
  echo "oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo" ;

  runline="$PHYML $options -b 0 -s $opt_s -p --r_seed $STEP";
  echo "$runline ;" > $LAUNCHER ;

elif [ "$opt_u" != "null" ]
then
  echo "" ; echo "" ; echo "" ; 
  echo "oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo" ;
  echo "oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo" ;
  echo "  iteration $STEP : user-defined starting tree ($opt_s moves)" ;
  echo "oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo" ;
  echo "oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo" ;

  runline="$PHYML $options -b 0 -s $opt_s -u $opt_u --r_seed $STEP";
  echo "$runline ;" > $LAUNCHER ;

else
  echo "" ; echo "" ; echo "" ; 
  echo "oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo" ;
  echo "oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo" ;
  echo "  iteration $STEP : BioNJ starting tree ($opt_s moves)" ;
  echo "oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo" ;
  echo "oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo" ;

  runline="$PHYML $options -b 0 -s $opt_s --r_seed $STEP";
  echo "$runline ;" > $LAUNCHER ;

fi

$RUN_PHYML ;
if [ ! -e $OUTPUT_TREE ] || [ -z "$(head -1 $OUTPUT_TREE)" ]
then echo ; echo "  problem with phyml binary:" ; echo "  $PHYML" | cut -c9- ; exit ; fi
if [ ! -e $OUTPUT_STAT ] || [ -z "$(grep -m 1 -F "Log-likelihood" $OUTPUT_STAT)" ]
then echo ; echo "  problem with phyml binary:" ; echo "  $PHYML" | cut -c9- ; exit ; fi

if [ "$opt_x" = "true" ]; then cp $OUTPUT_TREE $FIRST_TREE ; cp $OUTPUT_STAT $FIRST_STAT ; fi
  
LK=$(cat $OUTPUT_STAT | grep "Log-likelihood" | tr -d '[:cntrl:]' | tr -d '. Log\-likelihood:'); 
LK=${LK:0:$(( ${#LK} - 1 ))};

BEST_LK=$LK ; 
#cat $OUTPUT_TREE | sed 's/:0/:1/g' > $TREE_START ;
cat $OUTPUT_TREE | sed 's/\([0123456789]*\)\.\([0123456789]*\)//g' | sed 's/://g' > $TREE_START ;
mv $OUTPUT_TREE $TREE_BEST ; mv $OUTPUT_STAT $STAT_BEST ; 


################################
##### running ratchet loop #####
################################
swap="NNI"; ratchet="true";
one_more_time="true"; 
while [ "$one_more_time" = "true" ]
do
  STEP=$(( $STEP + 1 ));

  line=$(cat $STAT_BEST | grep "Log-likelihood");
  echo ; echo ; echo ; echo ; echo ; echo ; 
  echo "oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo" ;
  echo "oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo" ;
  if [ "$ratchet" = "true" ]
  then echo "  iteration $STEP : ML tree searching and ratchet ($swap moves)" ;
  else echo "  iteration $STEP : ML tree searching ($swap moves)" ;
  fi
  echo "  $line";
  echo "oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo" ;
  echo "oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo" ;

  runline="$PHYML $options -s $swap -u $TREE_START --r_seed $STEP";
  if [ "$ratchet" = "true" ]
  then 
    runline="$runline -b 1"; 
    #alpha=$(echo $(grep "Gamma" $STAT_BEST | sed 's/\(.*\)://g'));
    #if [ -n "$alpha" ]; then runline="$runline -a $alpha"; fi
    pinv=$(echo $(grep "invariant" $STAT_BEST | sed 's/\(.*\)://g'))    
    if [ -n "$pinv" ]; then runline="$runline -v $pinv"; fi
  else 
    runline="$runline -b 0";
  fi
  echo "$runline ;" > $LAUNCHER ;

  $RUN_PHYML ;

  error="false"; 
  if [ ! -e $OUTPUT_TREE ]
  then error="true";
  else
    line=$(head -1 $OUTPUT_TREE);
    if [ -z "$line" ]; then error="true" ; fi
  fi
  if [ "$ratchet" = "true" ]
  then
    if [ ! -e $OUTPUT_TREE_BOOT ]
    then error="true";
    else
      line=$(head -1 $OUTPUT_TREE_BOOT);
      if [ -z "$line" ]; then error="true" ; fi
    fi
  fi

  if [ "$ratchet" = "false" ] && [ "$error" = "false" ]
  then
    cat $OUTPUT_TREE | sed 's/\([0123456789]*\)\.\([0123456789]*\)//g' | sed 's/://g' > $TREE_START ;
    rm $OUTPUT_STAT $OUTPUT_TREE ; 

    echo "$PHYML $options -b 0 -u $TREE_START -olr --r_seed $STEP ;" > $LAUNCHER ;
   
    $RUN_PHYML ;
  fi

  LK=$(cat $OUTPUT_STAT | grep "Log-likelihood" | tr -d '[:cntrl:]' | tr -d '. Log\-likelihood:'); 
  LK=${LK:0:$(( ${#LK} - 1 ))};

  diff=$(( $BEST_LK - $LK ));
  if [ $diff -gt 0 ] && [ -n "$(head -1 $OUTPUT_TREE)" ]
  then 
    i=$(( $diff / 10000)); r="$(( $diff - 10000*$i ))";
    if [ ${#r} -eq 1 ]; then r="000$r"; elif [ ${#r} -eq 2 ]; then r="00$r"; elif [ ${#r} -eq 3 ]; then r="00$r"; fi
    echo "" ; echo "" ; echo "" ; 
    echo "oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo" ;
    echo "oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo" ;
    line=$(cat $STAT_BEST | grep "Log-likelihood");
    echo "  $line";
    line=$(cat $OUTPUT_STAT | grep "Log-likelihood");
    echo "  $line";
    echo "  . Log-likelihood improvement:         $i.$r" ;
    echo "oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo" ;
    echo "oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo" ;
    BEST_LK=$LK ; cp $OUTPUT_TREE $TREE_BEST ; cp $OUTPUT_STAT $STAT_BEST ; 
  fi

  rm $OUTPUT_STAT ;
  if [ "$ratchet" = "true" ]; then rm $OUTPUT_STAT_BOOT ; fi

  if [ "$ratchet" = "true" ]
  then
    if [ "error" = "true" ]
    then
      if [ -e $TREE_START ]; then rm $TREE_START ; fi
      if [ -e $OUTPUT_TREE_BOOT ]; then cat $OUTPUT_TREE_BOOT >> $TREE_START ; rm $OUTPUT_TREE_BOOT ; fi
      if [ -e $OUTPUT_TREE ]; then cat $OUTPUT_TREE | sed 's/:0/:1/g' >> $TREE_START ; rm $OUTPUT_TREE ; fi
      if [ -e $TREE_BEST ]; then cat $TREE_BEST | sed 's/:0/:1/g' >> $TREE_START ; fi
      ratchet="false";
      swap="SPR"; 
    elif [ $diff -gt 0 ]
    then
      one_more_time="true"; 
      rm $OUTPUT_TREE ;
      mv $OUTPUT_TREE_BOOT $TREE_START ;
      ratchet="true";
      swap="NNI"; 
    else
      one_more_time="true"; 
      cp $OUTPUT_TREE_BOOT $TREE_START ;
      cat $OUTPUT_TREE | sed 's/:0/:1/g' >> $TREE_START ;
      cat $TREE_BEST | sed 's/:0/:1/g' >> $TREE_START ;
      rm $OUTPUT_TREE_BOOT $OUTPUT_TREE ;
      ratchet="false";
      swap="SPR"; 
    fi
  else
    if [ "error" = "true" ]
    then
      one_more_time="false"; 
      rm $OUTPUT_TREE ;
    elif [ $diff -gt 0 ]
    then
      one_more_time="true"; 
      cat $OUTPUT_TREE | sed 's/:0/:1/g' > $TREE_START ;
      rm $OUTPUT_TREE ;
      ratchet="true";
      swap="NNI"; 
    else
      one_more_time="false"; 
      rm $OUTPUT_TREE ;
    fi
  fi

done

################################################
##### final computations with optimal tree #####
###############################################
 cat $TREE_BEST | sed 's/\([0123456789]*\)\.\([0123456789]*\)//g' | sed 's/://g' > $TREE_START ;
# cat $TREE_BEST > $TREE_START ;


runline="$PHYML $options -u $TREE_START -b $opt_b -olr --r_seed $(( $STEP + 1 ))";
if [ "$opt_l" = "true" ]; then runline="$runline --print_site_lnl"; fi
echo "$runline ;" > $LAUNCHER ;

$RUN_PHYML ;

mv $OUTPUT_TREE $TREE_BEST ; mv $OUTPUT_STAT $STAT_BEST ;
sed 's/1.0000000000:/1.000:/g' $TREE_BEST | sed 's/0000000:/:/g' > $TREE_START ; mv $TREE_START $TREE_BEST ;
if [ "$opt_l" = "true" ]; then mv $OUTPUT_LK $LK_BEST ; fi

if [ -e $FIRST_TREE ]; then mv $FIRST_TREE $OUTPUT_TREE ; fi
if [ -e $FIRST_STAT ]; then mv $FIRST_STAT $OUTPUT_STAT ; fi

rm $LAUNCHER ;

line=$(cat $STAT_BEST | grep "Log-likelihood") ;
echo "" ;
echo "oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo" ;
echo "oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo" ;
echo "  $STEP iterations " ;
echo "  $line " ;
echo "oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo" ;
echo "oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo" ;
echo "" ;


