<?xml version="1.0" encoding="UTF-8"?>
<!--WARNING: Do not change anything inside this workflow file. BioExtract won't recognize your changes, or may even refuse to import it. Instead, modify the workflow in BioExtract after importing. This workflow file can be imported into BioExtract only.-->
  <s:bioextract xmlns:s="http://bioextract.org" version="2.1" log="0">
<s:workflowdescription description="This workflow retrieves Liliopsida chloroplast petb gene sequences from NCBI Nucleotide, removes duplicate sequences and saves the results at BioExtract Server. These results are then converted into GenBank format and fed into Fetch Translation, which removes the translation from the CDS coding region. Translations are then used to build a multiple alignment using ClustalW." author="guest" uid="3faeca21-0776-4ac0-9b57-9fc2f03c3793" title="Liliopsida Protein Alignment"/>
<s:processor name="query"><s:stringconstant>NCBI:nuccore:liliopsida AND Definition=chloroplast AND Definition=petb AND Definition=gene</s:stringconstant></s:processor><s:processor name="Xmknr"><s:description>"Reads multiple sequence records in FASTA format and removes duplicates"</s:description><s:helpurl>"http://www.vmatch.de/"</s:helpurl><s:parameter name="Sequence type (protein or DNA)" value="dna"/><s:parameter name="Minimum sequence length to include" value="1"/><s:parameter name="xdrop value for edit distance" value="2"/><s:parameter name="Small cluster value" value="90"/><s:parameter name="Large cluster value" value="90"/></s:processor><s:processor name="FormatConversion"><s:description>"Convert from one allowed format to another allowed format"</s:description><s:helpurl>"http://emboss.sourceforge.net/docs/themes/SequenceFormats.html"</s:helpurl><s:parameter name="To Format" value="genbank"/><s:parameter name="From Format" value="fasta"/></s:processor><s:processor name="FetchTranslation"><s:description>"Extracts translations from feature tables"</s:description><s:helpurl>""</s:helpurl><s:parameter name="Feature name" value="CDS"/><s:parameter name="Qualifier name" value="gene"/></s:processor><s:processor name="ClustalW"><s:description>"Multiple sequence alignment for DNA or proteins"</s:description><s:helpurl>"http://align.genome.jp/clustalw/clustalw_help.html"</s:helpurl><s:parameter name="Output format" value="clu"/><s:parameter name="Pairwise alignment" value="fast"/><s:parameter name="Sequence type" value="protein"/><s:parameter name="K-tuple(word) size" value="1"/><s:parameter name="Window size" value="5"/><s:parameter name="Gap penalty" value="3"/><s:parameter name="Number of top diagonals" value="5"/><s:parameter name="Scoring method" value="percent"/><s:parameter name="Gap open penalty" value="10"/><s:parameter name="Gap extension penalty" value="0.1"/><s:parameter name="Select weight matrix" value="blosum"/></s:processor><s:link source="query:value" sink="Xmknr:sequence_usa" />
<s:link source="Xmknr:outseq" sink="FormatConversion:sequences_direct_data" />
 <s:link source="FormatConversion:Graphics_in_PNG" sink="FormatConversion" />
<s:sink name="FormatConversion">
 <s:metadata> <s:mimeTypes>
<s:mimeType>image/png</s:mimeType>
 </s:mimeTypes> </s:metadata> </s:sink>
  </s:bioextract>

