/* -----------------------------------------------------------------
 file      : /home/schuerer/prosite/src/pat.c

 author    : Schuerer <schuerer@pasteur.fr>
 creation  : <Wed May 16 23:28:26 2001>
 Time-stamp: <Tue Feb 19 13:43:07 2002>
 Dev-stage : under construction

 description : 


-------------------------------------------------------------------- */

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include <stdio.h>
#include <ctype.h>

#ifdef STDC_HEADERS
#include <stdlib.h>
#include <string.h>
#endif

#include "mytag.h"
#include "error.h"
#include "pat.h"

#define BUFSIZE 100

/* internal macros */

/* internal prototypes */

/* function definitions */

int getplist(pat_t *p,FILE *IN) {

  char c;
  char *buf, *ebuf, *isbuf, *iebuf;
  int bufsize, plen;
    
  /* no more pattern */
  if ((c = fgetc(IN)) == EOF) return EOF;
  else if (ungetc(c, IN) != (int) c) 
    error_fatal("ungetc getplist", NULL);

  /* read */
  bufsize = BUFSIZE;
  if ((buf = (char *) malloc(bufsize*sizeof(char))) == NULL)
    error_fatal ("memory" , NULL);
    
  pat_init(p);
  p->type = PONLY;
    
  while (fgets(buf, bufsize, IN) != NULL &&
	 (ebuf = strchr(buf, '\n')) == NULL) {
    
    if (feof(IN)) { free (buf); return TRUNC;}
    
    /* end of line verification */
    if (fseek(IN, (long) -strlen(buf), SEEK_CUR) != 0)
      error_fatal ("file", NULL);
    bufsize += BUFSIZE;
    if ((buf = (char *) realloc(buf, bufsize*sizeof(char))) == NULL)
      error_fatal("memory", NULL);
  }
  *ebuf = '\0'; 

  /* stockage */
  isbuf = buf;
  while( *isbuf && isspace(*isbuf) ) isbuf++;

  if ( (iebuf =  strpbrk(isbuf, "\t ")) != NULL ) {
    /* have an id for the pattern */
    plen = iebuf - isbuf;
    if ((p->ac = (char *) malloc((plen+1)*sizeof(char))) == NULL)
      error_fatal("memory", NULL);
    (void) strncpy(p->ac, isbuf, (size_t) plen);
    *(p->ac+plen) = '\0';
  
    /* skip separation blanks */
    while( *iebuf && isspace(*iebuf) ) iebuf++;
    isbuf = iebuf;
  }
  
  if ( *isbuf ) {
    if ( (iebuf = strpbrk(isbuf, "\t ")) == NULL ) iebuf = ebuf;
    plen = iebuf - isbuf;
    if ((p->pat = (char *) malloc((plen+1)*sizeof(char))) == NULL)
      error_fatal("memory", NULL);
    (void) strncpy(p->pat, isbuf, (size_t) plen);
    *(p->pat+plen) = '\0';
  }
  else {
    /* delimiters at the end, so pat = id */
    p->pat = p->ac;
    p->ac = NULL;
  }

  free (buf);
  if ( p->pat == NULL) return ERR;
  return OK;
}

int getprosite(pat_t *p, FILE *IN) {

  char *buf, *ebuf, *bufe, *bufi; 
  int bufsize;
  
  int plen, dlen;
  
  /* no more sequences */
  
  if (fgetc(IN) == EOF)
    return EOF;
  if (fseek(IN, -1, SEEK_CUR) != 0)
    error_fatal("file", NULL);
  
  /* read */
  bufsize = BUFSIZE;
  if ((buf = (char *) malloc(bufsize*sizeof(char))) == NULL)
    error_fatal ("memory" , NULL);
  
  plen = dlen = 0; 
  pat_init(p);
  while (fgets(buf, bufsize, IN) != NULL) {
   
    if (feof(IN)) {
      free (buf);
      return TRUNC;
    }

    /* end of line verification */
    if ((ebuf = strchr(buf, '\n')) == NULL) {
      if (fseek(IN, (long) -strlen(buf), SEEK_CUR) != 0)
	error_fatal("file", NULL);
      bufsize += BUFSIZE;
      if ((buf = (char *) realloc(buf, bufsize*sizeof(char))) == NULL)
        error_fatal("memory", NULL);
      continue;
    }
    *ebuf = '\0'; 
    if (*(ebuf - 1) == '.') *(--ebuf) = '\0';

    /* end entry */
    if (strcmp(buf, "//") == 0) 
      break;

    if ((ebuf - buf) < 2)
      error_fatal(buf, "line is not in prosite format");
  
    /* stockage */
    bufi = buf + 2;
    while (*bufi && isspace((int) *bufi)) bufi++;
    bufe = bufi;
    if (strncmp(buf, "ID", 2) == 0) {
      /* parsing ID */
      while (*bufe && *bufe != ';') bufe++;
      *bufe = '\0';
      if ((p->id = (char *) malloc((bufe-bufi+1)*sizeof(char))) == NULL)
        error_fatal("memory", NULL);
      (void) strncpy(p->id, bufi, (size_t) (bufe-bufi+1));
      /* parsing type */
      bufi = bufe + 1;
      while (*bufi && isspace((int) *bufi)) bufi++;
      bufe = bufi;
      while (*bufe && *bufe != '.') bufe++;
      *bufe = '\0';
      if (strcmp(bufi, "PATTERN") == 0)	p->type = PAT;
      else if (strcmp(bufi, "RULE") == 0) p->type = RULE;

      else if (strcmp(bufi, "MATRIX") == 0) p->type = MATRIX;
      else error_fatal (bufi, "unknown type of prosite pattern");
    }
    else if (strncmp(buf, "DE", 2) == 0) {
      if ((p->desc = (char *) realloc(p->desc, (dlen+ebuf-bufi+1)*sizeof(char))) == NULL)
        error_fatal("memory", NULL);
      (void) strncpy(p->desc+dlen, bufi, (size_t) (ebuf-bufi+1));
      dlen += ebuf-bufi;
    }
    else if (strncmp(buf, "AC", 2) == 0) {
      /* parsing AC */
      while (*bufe && *bufe != ';') bufe++;
      *bufe = '\0';
      if ((p->ac = (char *) malloc((bufe-bufi+1)*sizeof(char))) == NULL)
        error_fatal("memory", NULL);
      (void) strncpy(p->ac, bufi, (size_t) (bufe-bufi+1));
    }
    else if (strncmp(buf, "DO", 2) == 0) {
      /* parsing DO */
      while (*bufe && *bufe != ';') bufe++;
      *bufe = '\0';
      if ((p->acdoc = (char *) malloc((bufe-bufi+1)*sizeof(char))) == NULL)
        error_fatal("memory", NULL);
      (void) strncpy(p->acdoc, bufi, (size_t) (bufe-bufi+1));
    }
    else if (strncmp(buf, "PA", 2) == 0) {
      if ((p->pat = (char *) realloc(p->pat, (plen+ebuf-bufi+1)*sizeof(char))) == NULL)
	error_fatal("memory", NULL);
      (void) strncpy(p->pat + plen, bufi, (size_t) (ebuf-bufi+1));
      plen += ebuf-bufi;
    }
    else if (strncmp(buf, "CC   /SKIP-FLAG=TRUE", 20) == 0 ) {
      p->skip = TRUE;
    }
  }
  free(buf);

  if (p->type == PAT && p->pat == NULL) 
    error_fatal(p->id, "missing pattern");
  
  return OK;

}

void pat_init (pat_t *p) {
  p->pat = p->id = p->ac = p->acdoc = p->desc = NULL;
  p->skip = FALSE;
}

void pat_free (pat_t *p) {

  if (p->id != NULL) free(p->id);
  if (p->ac != NULL) free(p->ac);
  if (p->acdoc != NULL) free(p->acdoc);
  if (p->desc != NULL) free(p->desc);
  if (p->pat != NULL) free(p->pat);

}

FILE *fopen_prosite (char *file) {

  FILE *IN;
  char *buf, *ebuf;
  int bufsize;

    
  if ((IN = fopen(file, "r")) == NULL) {
    error_fatal (file, NULL);
    return NULL;
  }

  bufsize = BUFSIZE;
  if ((buf = (char *) malloc(bufsize*sizeof(char))) == NULL)
    error_fatal ("memory" , NULL);
 
  while (fgets(buf, bufsize, IN) != NULL) {

    if (feof(IN)) 
      error_fatal (file, "unexpected end of file");
    
    /* end of line verification */
    if ((ebuf = strchr(buf, '\n')) == NULL) {
      if (fseek(IN, (long) -strlen(buf), SEEK_CUR) != 0)
	error_fatal("file", NULL);
      bufsize += BUFSIZE;
      if ((buf = (char *) realloc(buf, bufsize*sizeof(char))) == NULL)
        error_fatal("memory", NULL);
      continue;
    }
    *ebuf = '\0'; 

    if ((ebuf - buf) < 2)
      error_fatal (file, "contains too short lines ");

    /* read to end of comments */
    if (strncmp(buf, "//", 2) == 0) 
      break;

    if (strncmp(buf, "CC", 2) != 0)
      error_fatal (file, "not a prosite file");
  }

  free(buf);
  return IN;
}












