/* -----------------------------------------------------------------
 file      : /home/schuerer/prosite/src/translate.c

 author    : Schuerer <schuerer@pasteur.fr>
 creation  : <Fri May 18 09:32:01 2001>
 Time-stamp: <Tue Feb 19 14:42:17 2002>
 Dev-stage : under construction

 description : 


-------------------------------------------------------------------- */

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include <stdio.h>
#include <ctype.h>

#ifdef STDC_HEADERS
#include <stdlib.h>
#include <string.h>
#endif

#include <sys/types.h>
#include <regex.h>

#include "mytag.h"
#include "error.h"
#include "translate.h"

#define PARA 0
#define NO_PARA 1
#define END 2

/* internal macros */

/* internal prototypes */

static int valid_aa(char c);

/* function definition */

char *pros2re (regex_t *re, char *pros, para_t *pa) {
 
  char *pre, *prei, *prosi;
  int state; 

  char estr[100]; 
  int err;

  if ((pre = (char *) malloc((strlen(pros)+3)*sizeof(char))) == NULL)
    error_fatal("memory", NULL);
  
  prei = pre;
  for(prosi = pros; *prosi != '\0'; prosi++) {
        
    switch (*prosi) {
    case '-': prei--; break;
    case '{': case '[': *prei = '['; break;
    case '}': case ']': *prei = ']'; break;
    case '(': *prei = '{'; break;
    case ')': *prei = '}'; break;
    case '>': *prei = '$'; break;
    case '<': *prei = '^'; break;
    case 'x': *prei = '.'; break;
    case 'X': *prei = '.'; break; /* needed for case insensitive search */
    default:  *prei = *prosi; break;
    }
    if (pa->is_nucl && (*prei == 'n' || *prei == 'N' )) *prei = '.';
    prei++;
    
    if (*prosi == '{') { *prei = '^'; prei++; }
  }
  *prei = '\0';
  
  /* if ((err = regcomp(re, pre, REG_EXTENDED)) != 0) {
     if (regerror(err, re, estr, 100)) error_fatal(pros, estr);
     else error_fatal(pros, "error while compiling regular expression");
     } */
  
  return pre;
}



static int valid_aa(char c) {
  return (isalpha(c) && isupper(c) && (strchr("JOX", c) == NULL)); /* U = Selenocysteine */
}

/* automate of valid prosite pattern


                             _A-Z_                      _0-9_              _0-9_                 
                             \   /                      \   /              \   /                 
      __<__    __[_ (2) _A-Z_ (4) __]__   __(_ (7) _0-9_ (8) __,_ (9) _0-9_ (10) __)_   ___>___  
     /     \  /                        \ /                  \                        \ /       \ 
  [(0)] --- (1) ---------A-Z--------- ((6))                   \____)_______________ ((11))    ((12))
 [start]    /|\                       / | \                                          /  \     (stop)
           / | \_{_ (3) _A-Z_ (5) _}_/  |  \________________________________________/    \      
           | |               /   \      |                                                |      
           | |              /_A-Z_\     |                                                |      
	   | \_____________-___________/                                                 |      
	   \_________________________-_______________-___________________________________/      
                                                                                               
				      
*/

/*

                                    
                               A-Z __________>______(13) _______]_______________   
                               \ //                                             \                           
              __[___(2)___A-Z___(3)__]__                                         |
     ___<___ / ___________-____________ \                  _0-9_                 |
    /       \|/                        \|                  \   /                 |
  ((0)) ___ (1) _________A-Z__________((6))__(__(10)__0-9__(11)__)__((12))__>__((15)) 
           //|\                         /        |                    | \        |
          // | \__{__(4)__A-Z__(5)__}__/         /                    |  \       |
         | | |                / \\__________>__ / __ (14)______}_________________/
         | | |                A-Z              /                       
         | | \____________________________-__ / ______________________/  |
	 | |                                 /                           |
         | \___x___(7)__(__(8)__0-9__(9)__,_/                            |
	 \___-_____/                / \\___________________)_____________/
                                    0-9          
                      

*/

/*

                                    
                               A-Z __________>______(13) _______]_______________   
                               \ //                                             \                           
              __[___(2)___A-Z___(3)__]__   __________________________            |
     ___<___ /                          \ /                _0-9_     \   _______ |
    /       \|                          |/                 \   /      \ /       \|
  ((0)) ___ (1) _________A-Z___________(6)___(__(10)__0-9__(11)__)___(12)___>__((15)) 
            /|\                         /        |                    | \\__      |
           / | \__{__(4)__A-Z__(5)__}__/         /                    |  \  \     |
           | |                / \\__________>__ / __ (14)______}_________________/
           | |                A-Z              /                             
           | \____________________________-__ / ______________________/  |  |
	   |                                 /                           |  |
           \___x___(7)__(__(8)__0-9__(9)__,_/                            |  |
	            \               / \\___________________)_____________/  |
                     \              0-9                                     |
                      \_____________________________________________________/

*/

int prosite_verif (char *pprosite, char joker) {

  int etat = 0;
  char *ppro = pprosite;
  int err = 0;
  char ucjoker;

  ucjoker = toupper(joker);

  while (*ppro != '\0') {

    switch (etat) {
    case 0:
      if (*ppro == '<') ppro++;
      etat = 1;
      break;
    case 1:
      if (*ppro == '[') { ppro++; etat = 2; }
      else if (*ppro == '{') { ppro++; etat = 4; }
      else if (*ppro == joker || *ppro == ucjoker) { ppro++; etat = 7; }
      else if (valid_aa(*ppro)) { ppro++; etat = 6; }
      else err = 1;
      break;
    case 2: 
      if (valid_aa(*ppro)) { ppro++; etat = 3; }
      else err = 1;
      break;
    case 3:
      if (valid_aa(*ppro)) ppro++;
      else if (*ppro == ']') { ppro++; etat = 6; }
      else if (*ppro == '>') { ppro++; etat = 13; }
      else err = 1;
      break;
    case 4:
      if (valid_aa(*ppro)) { ppro++; etat = 5; }
      else err = 1;
      break;
    case 5:
      if (valid_aa(*ppro)) ppro++;
      else if (*ppro == '}') { ppro++; etat = 6; }
      else if (*ppro == '>') { ppro++; etat = 14; }
      else err = 1;
      break;
    case 6:   
      if (*ppro == '(') { ppro++; etat = 10; }
      else if (*ppro == '-') {ppro++; etat = 1; }
      else if (*ppro == '>') {ppro++; etat = 15; }
      else err = 1;
      break;
    case 7: 
      if (*ppro == '(') { ppro++; etat = 8; }
      else if (*ppro == '-') {ppro++; etat = 1; }
      else if (*ppro == '>') {ppro++; etat = 15; }
      else err = 1;
      break;
    case 8:
      if (isdigit(*ppro)) { ppro++; etat = 9; }
      else err = 1;
      break;
    case 9:
      if (isdigit(*ppro)) ppro++;
      else if (*ppro == ',') { ppro++; etat = 10; }
      else if (*ppro == ')') { ppro++; etat = 12; }
      break;
    case 10:
      if (isdigit(*ppro)) { ppro++; etat = 11; }
      else err = 1;  
      break;
    case 11:
      if (isdigit(*ppro)) ppro++;
      else if (*ppro == ')') { ppro++; etat = 12; }
      else err = 1;
      break;
    case 12:
      if (*ppro == '-') { ppro++; etat = 1; }
      else if (*ppro == '>') { ppro++; etat = 15; }
      else err = 1;
      break;
    case 13:
      if (*ppro == ']') { ppro++; etat = 15; }
      else err = 1;
      break;
    case 14:
      if (*ppro == '}') { ppro++; etat = 15; }
      else err = 1;
      break;
    default: /* 15 */
      err = 1;
    }
    
    if (err) break;

  }

  if (*ppro) {

    if ((etat == 6 || etat == 7 || etat == 12) && *ppro != '-') 
      return CARELESS_PROSITE;

    if (etat == 1 && *ppro == 'X') return CASE_PROSITE;

    if (etat > 0 && etat < 6 && 
	!valid_aa(*ppro)) {
      if (valid_aa(toupper(*ppro))) return CASE_PROSITE;
      else return UNKNOWNAA_PROSITE;
    }

    else return INCORRECT_PROSITE;
  }

  if (etat != 15 && etat != 12 && etat != 6 && etat != 7) 
    return INCORRECT_PROSITE;
  
  return CORRECT_PROSITE;
}


char *error_verif_prosite (int err) {

  char *mesg;

  switch (err) {
  case UNKNOWNAA_PROSITE:
    mesg = "unknown aminoacids";
    break;
  case CARELESS_PROSITE:
    mesg = "aminoacid positions are not separated by '-'";
    break;
  case CASE_PROSITE:
    mesg = "aminoacids has to be upper case letters and 'x' wildcards lower case";
    break;  
  default: /* case INCORRECT_PROSITE: */
    mesg = "invalid format of prosite pattern";
  }

  return mesg;
}






