#!/usr/bin/perl # Description: Prodidgal gene prediction client # Author: Peter Fischer Hallin # Email: pfh@cbs.dtu.dk # Version: Genome Atlas 3.0 ws2 # Date: 2009-09-01 # usage: prodigal.pl -t 11 -fasta < file.fsa # usage: prodigal.pl -t 11 -ann < file.fsa use strict; use Data::Dumper; use Getopt::Long; my ($opt_fasta,$opt_ann,$opt_ta); &GetOptions ( "ann" => \$opt_ann, # output annotation format "fasta" => \$opt_fasta, # output fasta format "ta:s" => \$opt_ta, # choose translation table (only 11 and 4 are supported) ); $opt_ta = 11 unless defined $opt_ta; $opt_fasta = 1 if ! defined $opt_ann and ! defined $opt_fasta; require "xml-compile.pl"; # create proxy to genome atlas my $prodigal = WSDLclient ( 'http://www.cbs.dtu.dk/ws/GenomeAtlas/GenomeAtlas_3_0_ws2.wsdl' ); my $entry = -1; my $fasta; my @input; while () { chomp; if (/^>(\S+)\s*(.*)/) { $entry++; $input[$entry]{id} = $1; $input[$entry]{comment} = "test"; } elsif (m/([A-Za-z\-]+)/g) { $input[$entry]{seq} .= $1; } } # construct input sequence my @SEQUENCE; foreach my $entry ( 0 .. $#input) { push @SEQUENCE, { id => $entry, seq => $input[$entry]{seq}, comment =>$input[$entry]{comment} }; } my $response = $prodigal->{runProdigal}->( parameters => { parameters => { transl_tbl => $opt_ta , sequencedata => { sequence => [@SEQUENCE] } }}); my ($jobid,$status,$expires); die "error obtaining jobid\n" unless defined $response->{queueentry}->{queueentry}; $jobid = $response->{queueentry}->{queueentry}->{jobid}; wait_job($prodigal->{pollQueue},$jobid); # fetch the result $response = $prodigal->{prodigalFetchResult}->( job => { jobid => $jobid }) ; print STDERR "# parsing 'anndata' object\n"; printf STDERR "# annotations from %s version %s\n" , $response->{parameters}->{anndata}->{annsource}->{method} , $response->{parameters}->{anndata}->{annsource}->{version}; foreach my $ann (@{$response->{parameters}->{anndata}->{ann}}) { my $id = $input[$ann->{sequence}->{id}]{id}; my $comment = $input[$ann->{sequence}->{id}]{comment}; my $seq = $input[$ann->{sequence}->{id}]{seq}; foreach my $feature (@{$ann->{annrecords}->{annrecord}}) { my $begin = $feature->{range}->{begin}->numify; my $end = $feature->{range}->{end}->numify; my $strand = $feature->{strand}; if ( $opt_ann ) { print $feature->{feature}."\t". $begin."\t". $end."\t". $feature->{strand}."\t". $feature->{score}[0]->{value}."\t". $id."\t".$comment."\n"; } if ( $opt_fasta ) { my $len = ( $end - $begin) + 1; my $orf; if ( $strand eq "+") { printf ">$id\_CDS_%d-%d /score=%d\n" , $begin,$end, $feature->{score}[0]->{value}; $orf = substr($seq,$begin-1,$len-3); } else { $orf = reverse ( substr($seq,$begin+2,$len-3)); $orf =~ tr/ACTGactg/TGACtgac/; printf ">$id\_CDS_%d-%d /score=%d\n" , $end,$begin, $feature->{score}[0]->{value}; } for ( my $i = 0 ; $i < length ( $orf ) ; $i += 60 ) { print substr( $orf,$i,60),"\n"; } } } }