#! /local/bin/perl -w
# __________________________________________________________________
# file      : /home/schuerer/prose/utils/prose+.pl
#
# author    : Schuerer <schuerer@pasteur.fr>
#
# creation  : <Tue Feb 12 15:07:16 2002>
#
# Time-stamp: <Thu Oct 18 14:47:53 2001>
#
# Dev-stage : under construction
#
# __________________________________________________________________

# POD Documentation

=head1 NAME

B<prose+> F<prose.out> 

=head1 SYNOPSIS

=head1 DESCRIPTION

B<prose+> takes a B<prose> output and adds the documentation from the
F<prosite.doc> file to this output. The B<prose> output has to be produced
with the B<-d> option in order to get the accession number of the 
prosite documentation entry.

=head1 EXAMPLES

=over 4

=item golden sp:TAUD_ECOLI |flat2fasta -| prose -d -| prose+ 

=back

=head1 ENVIRONMENT

=over 4

=item PROSITEDOC

Used to specify a prosite documentation directory.

=item PROSITEDATA

Used to specify a prosite database directory. 
If the B<PROSITEDOC> environement variable is not set search this directory 
for documentation.

=back

=head1 SEE ALSO

=over 4

B<prose>(1)

=back

=head1 AUTHOR

Schuerer <schuerer@pasteur.fr>, Pasteur Institute.

=head1 APPENDIX

=cut

# __________________________________________________________________

# Let the code begin ..

use strict;

my $docpath = $ENV{'PROSITEDOC'} || $ENV{'PROSITEDATA'} || "";

my @prositedocs = ();

# get documentation accession numbers and reprint prose output
print "Pattern occurences:\n";
while(my $line = <>) {

    my @fields = split(/\s+/, $line);
    $#fields == 6 || die "not enough fields in the prose output (use -d option)\n";

    my $isin = grep (/$fields[5]/, @prositedocs);
    push( @prositedocs, $fields[5] ) if ! $isin;

    # print occurence
    print $line;
}

exit if $#prositedocs < 0; # no pattern found
 
# get documentation 
print "\nDocumentation:";

open (DOC, "$docpath/prosite.doc") or die "$docpath/prosite.doc: $!\n";
foreach my $doc ( sort @prositedocs ) {

    while(<DOC>) { 
	if (/^\{$doc\}/) {
	    $_ = <DOC>;
	    /^\{\w+;\s+(\w+)\}/;
	    print "\n$doc $1\n";
	    $_ = <DOC>;
	    last;
	}
    }
    while(<DOC>) { 
	last if /^\{END\}/;
	print $_;
    }

}
close DOC;





