-
Perl codons: m protéines n organismes
27.02.16 Paris
#
# Perl pour compter les codons d'un ensemble de protéines pour plusieurs organismes.
#
# Les noms des organismes (@noms) sont ceux de KEGG. Les protéines sont extraits de KEGG au format 60 caractères, mises chacune dans un fichier txt (sans le .txt) dont le nom est sous la forme nom-organisme concaténé au nom de la protéine, exemple: smf6115.
#Mettre à jour la liste des noms des organismes (@noms) et des protéines (@z) sans supprimer le préfixe a de a6115 par exemple (pour pouvoir mettre des chiffres).my @noms=(smf,hcr,ssdc,sbw,ple,bfl,rip,pub,cad,tme,hhl,ial,dte,axl,thl,bbd,hmr,vpr,nis,nse,hhd,hth,pdi,fbt,tli,amo,mah,ssm,lfc,bvs,caa,dal,mcu,din,aba,dba,sus,saci,
tai,gau,vin,age,mts,cmi,ksk,tsu,aae,sbn,bmf,sho,zin,crp);
my @z=( a6115,
a6114,
a27771,
a2777A,
a61142,
a6119,
a27761,
a27762
);
my @cod = (tta,ttg,ctt,cta,ctg,ctc, # Leu
aaa,aag,ttt,ttc, # Lys Phe
tct,tca,agt,tcg,tcc,agc, # Ser
tat,tac,aat,aac, # Tyr Asn
aga,cga,cgt,agg,cgc,cgg, # Arg
cat,cac,caa,cag, # His Gln
gga,ggt,ggc,ggg, # Gly
gaa,gag,gat,gac, # Glu Asp
gca,gct,gcg,gcc, # Ala
tgt,tgc,atg, # Cys Met
cca,cct,ccg,ccc, # Pro
tgg, # Trp
gtt,gta,gtg,gtc, # Val
act,aca,acg,acc, # Thr
att,ata,atc, # Ile
taa,tag,tga); # Stp
my $m=@z;
my $pt=$m-1;
my $n=@noms;
my $nt=$n-1;
my @q = ([(0)x64]x$n);
my @fic = (a)x$m;
my $i=0;
my $j=0;
my @p = (0)x64;
foreach $l (0..$nt) {
@p = (0)x64;
foreach $k (0..$pt) {
$lz=length($z[$k])-1;
$zi=substr($z[$k],1,$lz);
$fic[$k]=$noms[$l].$zi;
}
foreach $k (0..$pt) { comptfich($fic[$k]);}
sub comptfich {
$filename=$fic[$k];
open ($fh, '<', $filename);
while( defined( $lis = <$fh> ) ) {
chomp $lis;
while ($i <= 60) {
$lu = substr($lis,$i,3 );
foreach $j (0..63) {
if( $lu eq $cod[$j] ) { $p[$j]+=1; }
}
$i=$i+3; }
$i = 0; }
close $fh;
}
@{$q[$l]}=@p; }
foreach $k (0..$pt) {
print ($z[$k],";"); }
print ("\n");
#
print ( "Leu;");
foreach $i (0..9) {
print ($cod[$i],";");}
#
print ( "Lys;Phe;Ser;");
foreach $i (10..19) {
print ($cod[$i],";");}
#
print ( "Tyr;Asn;Arg;");
foreach $i (20..29) {
print ($cod[$i],";");}
#
print ( "His;Gln;Gly;");
foreach $i (30..37) {
print ($cod[$i],";");}
#
print ( "Glu;Asp;Ala;");
foreach $i (38..44) {
print ($cod[$i],";");}
print ($cod[49],";");
#
print ( "Cys;Met;Trp;Pro;");
foreach $i (45..48) {
print ($cod[$i],";");}
#
print ( "Val;");
foreach $i (50..53) {
print ($cod[$i],";");}
#
print ( "Thr;");
foreach $i (54..57) {
print ($cod[$i],";");}
#
print ( "Ile;");
foreach $i (58..60) {
print ($cod[$i],";");}
#
print ( "Stp;");
foreach $i (61..63) {
print ($cod[$i],";");}
#
print ("\n");
#
foreach $i (0..$nt) {
print ( $noms[$i],";"); # Leu
@p=@{$q[$i]};
foreach $k (0..9) {
print ( $p[$k],";");}
#
print (";;;"); # Lys Phe Ser
foreach $k (10..19) {
print ( $p[$k],";");}
#
print (";;;"); # Tyr Asn Arg
foreach $k (20..29) {
print ( $p[$k],";");}
#
print (";;;"); # His Gln Gly
foreach $k (30..37) {
print ( $p[$k],";");}
#
print (";;;"); # Glu Asp Ala
foreach $k (38..44) {
print ( $p[$k],";");}
print ( $p[49],";");
#
print (";;;;"); # Cys Met Trp Pro
foreach $k (45..48) {
print ( $p[$k],";");}
#
print (";"); # Val
foreach $k (50..53) {
print ( $p[$k],";");}
#
print (";"); # Thr
foreach $k (54..57) {
print ( $p[$k],";");}
#
print (";"); # Ile
foreach $k (58..60) {
print ( $p[$k],";");}
#
print (";"); # Stp
foreach $k (61..63) {
print ( $p[$k],";");}
#
print ("\n"); }
-
Commentaires