/* -----------------------------------------------------------------
 file      : /home/schuerer/prosite/src/fasta.c

 author    : Schuerer <schuerer@pasteur.fr>
 creation  : <Wed May 16 23:28:18 2001>
 Time-stamp: <Thu Nov 15 11:00:20 2001>
 Dev-stage : under construction

 description : 

-------------------------------------------------------------------- */

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include <stdio.h>
#include <ctype.h>

#ifdef STDC_HEADERS
#include <stdlib.h>
#include <string.h>
#endif

#include "mytag.h"
#include "error.h"
#include "fasta.h"

#define BUFSIZE 100

#define IDCOM 0
#define SEQ 1

/* internal macros */

/* internal prototypes */

static int strcpy_space (char *dest, char *src);

/* function definitions */

int getfseq(fseq_t *s, FILE *IN) {

  char *buf, *bufi, *ebuf; 
  int bufsize, obufsize, size;
  int state;

  char *eid, *scom, *seqi; 
  int blen, slen;

  /* no more sequences */
  
  (void) fgetc(IN);
  if (feof(IN))
    return EOF;
    
  /* read */
  
  bufsize = BUFSIZE;
  if ((buf = (char *) malloc(bufsize*sizeof(char))) == NULL)
    error_fatal ("memory" , NULL);
  
  state = IDCOM;
  blen = slen = 0; 
  s->seq = s->id = s->desc = NULL;
  while (fgets(buf, 2, IN) != NULL) {
 
    /* end entry */
    if (*buf == '>') {
      if (ungetc('>', IN) != '>')
	error_fatal("ungetc >", NULL);
      break;
    }
    
    /* end of line verification */
    bufi = buf+1; size = bufsize - 1;
    while ((ebuf = strchr(buf, '\n')) == NULL &&
           fgets(bufi, size, IN) != NULL) {
      obufsize = bufsize - 1;
      bufsize += BUFSIZE;
      size = bufsize - obufsize;
      if ((buf = (char *) realloc(buf, bufsize*sizeof(char))) == NULL)
        error_fatal("memory", NULL);
      bufi = buf + obufsize;
    }

    if (feof(IN)) {
      free (buf);
      return TRUNC;
    }

    *ebuf = '\0'; 

    /* jump empty lines */
    bufi = buf;
    while (isspace(*bufi)) { bufi++; }
    if (*bufi == '\0') { continue; }

    /* stockage */
    switch (state) {
    case IDCOM:
      /* id */
      if ((eid = strchr(buf, ' ')) == NULL) eid = ebuf++;
      *eid = '\0';
      if ((s->id = (char *) malloc((eid-buf+1)*sizeof(char))) == NULL)
        error_fatal("memory", NULL);
      (void) strncpy(s->id, buf, (size_t) (eid-buf+1));
      /* comment */
      scom = eid + 1;
      while (scom != ebuf && isspace((int) *scom)) scom++;
      if ((s->desc = (char *) malloc((ebuf-scom+1)*sizeof(char))) == NULL)
        error_fatal("memory", NULL);
      (void) strncpy(s->desc, scom, (size_t) (ebuf-scom+1)); 
      /* next state */
      state = SEQ;
      break;
    default: /* SEQ */
      (void) strcpy_space(buf, buf);
      blen =  strlen(buf);
      if ((s->seq = (char *) realloc(s->seq, (slen + blen +1)*sizeof(char))) == NULL)
        error_fatal("memory", NULL);
      seqi = s->seq + slen;
      (void) strncpy(seqi, buf, (size_t) blen+1);
      slen += blen; 
      break;
    }
  }
  free(buf);

  if (s->seq == NULL)
    return NOSEQ;

  return OK;
}

int strcpy_space (char *dest, char *src) {

  int sp = 0;
  char *srci = src;
  char *desti = dest;

  for (srci=src; *srci != '\0'; srci++) {
    if (isspace((int) *srci)) sp++;
    else *(desti++) = *srci;
  }
  *desti = '\0';

  return sp;
}

void fseq_free(fseq_t *s) {
  free(s->id);
  free(s->desc);
  free(s->seq);
}

FILE *fopen_fasta (char *file) {

  FILE *IN;    
  char c;
  
  if (strcmp(file, "-") == 0) IN = stdin;
  else if ((IN = fopen(file, "r")) == NULL)
    error_fatal (file, NULL);

  while (isspace(c = fgetc(IN)));

  if (feof(IN))
    error_fatal (file, "empty file");
  
  if (c != '>') 
    error_fatal (file, "not in fasta format");
  
  if (ungetc(c, IN) != '>')
    error_fatal (file, NULL);
  return IN;
}

