#!/usr/bin/python # Description: Use eFetch to retrieve a protein sequence given its id and predict if it's a signalling peptide # Author: Francisco Roque # Email: chico@cbs.dtu.dk # Version: SignalP 3.1 # Date: 2008-01-23 # test with sequence Q9BS26 import sys, time from SOAPpy import WSDL # Receive the protein id from the command line protid = sys.argv[1] # Define wsdl locations EFetchWSDL = 'http://www.ncbi.nlm.nih.gov/entrez/eutils/soap/efetch.wsdl' SignalPWSDL = 'http://www.cbs.dtu.dk/ws/SignalP/SignalP_3_1.wsdl' # Establish the endpoint for the eFetch wsdl EUtilsProxy = WSDL.Proxy(EFetchWSDL) # Define the input data to eFetch eFetchIn = { 'db' : 'protein', 'id' : protid} # Run the run_eFetch service with the input data eFetchOut = EUtilsProxy.run_eFetch(eFetchIn) print 'Fetched sequence %s' % eFetchOut['GBSeq']['GBSeq_primary-accession'] # Define Signalp input data (a bit more complex) SignalPIn = {'organism' : 'euk', 'sequences' : { 'entry' : { 'ident' : eFetchOut['GBSeq']['GBSeq_primary-accession'], 'seq' : eFetchOut['GBSeq']['GBSeq_sequence'] } }} # Establish the endpoint for the SignalP wsdl SignalPProxy = WSDL.Proxy(SignalPWSDL) # The service will return a jobid (Asynchronous service) jobid = SignalPProxy.runService(SignalPIn) print 'SignalP service launched with jobid: %s' % jobid.jobid # The pollQueue service needs to be run to get the job status job = SignalPProxy.pollQueue(jobid) # do not continue until the job is finished while job.status != 'FINISHED': job = SignalPProxy.pollQueue(jobid) print 'The job is %s' % job.status time.sleep(5) print 'The job is FINISHED, fetching results.' # Getting the output of the server SignalPOut = SignalPProxy.fetchResult(jobid) # loop over the output and print if predicted as a signalling peptide for record in SignalPOut.prediction.gff_record: if record.comment == 'Y': print "seqname: %s" % record.seqname print "score: %s" % record.score print "seq: %s" % eFetchOut['GBSeq']['GBSeq_sequence'].upper() print "start: %s" % record.start print "end: %s" % record.end