Perl scripts for Bioinformatics. Perl for Bioinformatics

Perl Program to convert DNA Sequence to Protein.

# Reading the DNA Seq File.
open FH, “Dna_seq.txt”;

#Initializing the $seq variable with nothing so that we can concatenate.
$seq = “”;

#Reading the Seq File Line by line by using the file handler

while(<FH>)
{
if($_ !~ />/)
{
$seq = $seq. $_ ;
chomp $seq;
}
elsif($_ =~ />/)
{
protein_converter($seq);
print $_;
$seq =”;

}
}
protein_converter($seq);

# This is the Subroutine to convert DNA to RNA
sub protein_converter
{
my $seq = shift;
@sequence = split(/\s*/, $seq);

# Its the CODON TABLE. Here codons are the HASH Keys and amino acids are the values.
%codon_hash = (

ATT => “I”, ATC => “I”, ATA => ‘I’, CTT => “L”, CTC => “L”, CTA => “L”, CTG => “L”, TTA => “L”, TTG => “L”, GTT => “V”,GTC => “V”, GTA => “V”, GTG => “V”, TTT => “F”, TTC => “F”, ATG => “M”, TGT => “C”, TGC => “C”,
GCT => “A”, GCC => “A”, GCA => “A”, GCG => “A”, GGT => “G”, GGC => “G”, GGA => “G”, GGG => “G”, CCT => “P”,
CCC => “P”, CCA => “P”, CCG => “P”, ACT => “T”, ACC => “T”, ACA => “T”, ACG => “T”, TCT => “S”, TCC => “S”,
TCA => “S”, TCG => “S”, AGT => “S”, AGC => “S”, TAT => “Y”, TAC => “Y”, TGG => “W”, CAA => “Q”, CAG => “Q”,
AAT => “N”, AAC => “N”, CAT => “H”, CAC => “H”, GAA => “E”, GAG => “E”, GAT => “D”, GAC => “D”, AAA => “K”,
AAG => “K”, CGT => “R”, CGC => “R”, CGA => “R”, CGG => “R”, AGA => “R”, AGG => “R”,TAA => “Stop”, TAG => “Stop”, TGA => “Stop”
);

for($i=0; $i<=$#sequence; $i=$i+3)
{
$codon = $sequence[$i].$sequence[$i+1].$sequence[$i+2];
print $codon_hash{$codon};
}
print “\n\n”;
}