#!/usr/local/bin/perl -w # # Usage: izaak -targetdir file1.pdb [file2.pdb, ... ] # chomp($cwd = `pwd`); # Get target directory if( $ARGV[0] =~ /^-(\w+)/ ) { $TARGETDIR = "$cwd/$1"; if( -d "$TARGETDIR" ) { warn "Results will be stored in $TARGETDIR, which already exists\n"; } else { mkdir ("$TARGETDIR", 0755) or die "Sorry; can't create the directory $TARGETDIR\n"; } shift; } else { die "Usage: angler -targetdir files\n"; } $pi = atan2(1, 1) * 4; # This converts "subchain" letters to numbers for easier indexing %resNUM = ( "" => "0", " A" => "1", " B" => "2", " C" => "3", " D" => "4", " E" => "5", " F" => "6", " G" => "7", " H" => "8", " I" => "9", " J" => "10", " K" => "11", " L" => "10", " M" => "12", " N" => "13", " O" => "14", " P" => "15", " Q" => "16", " R" => "17", " S" => "18", " T" => "19", " U" => "20", " V" => "21", " W" => "22", " X" => "23", " Y" => "24", " Z" => "25", ); # We need the names of the amino acid names so we don't read lines # whose residues are not amino acids @AA = ( 'ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS', 'ILE', 'LYS', 'LEU', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL', ); # This is the template for unpacking lines of a pdf file # We need to check for anomalies at the marked spot # VV $TEMPLATE = "A13 A3 A1 A3 A2 A4 A4 A8 A8 A8"; $null1 = $null2 = ""; # Open a master file to list files that have been "izaak"ed if( -e "$TARGETDIR/.masterfile" ) { open( MASTER, "$TARGETDIR/.masterfile" ) ; while() { chomp; @masterlist = split; } close(MASTER); } FILE: while ($file = shift(@ARGV)) { # First check the masterlist of files that have been read. # Skip to the next FILE if this one is already in the list; # otherwise add this to the list and continue if ( grep( /$file/, @masterlist ) ) { print "*** Already read $file; continuing\n"; next FILE; } else { push(@masterlist, $file, " " ); } open (FILE, "$file"); print "*** Reading the file $file\n"; # initialize @resNUM = (); @next_N = (0, 0, 0); @C = (0, 0, 0); $previous_res = $relevant_res = ""; foreach $aminoacid (@AA) { @{ $DIHED{$aminoacid} } = (""); } # Get residue information from the file LINE: while () { next LINE unless /^ATOM/; ($null1, $atom, $anomaly, $residue, $resletter, $resnum, $null2, $x, $y, $z) = unpack($TEMPLATE, $_); # Skip a line if its residue is not an amino acid next LINE unless grep( /$residue/, @AA ); # If there are subchain letters then $resid takes the values 10001, 10002, ..., 20001, ...., 80999. if ($resletter =~ /[A-Z]/) { $resid = $resNUM{$resletter} * 10000 + $resnum; } elsif ($resletter =~ /[1-9]/) { $resid = $resletter * 10000 + $resnum; } else { $resid = $resnum; } # For later work we need a list of $resid numbers actually built, so we push them onto # the array @resNUM, but only once for each residue. Also, we can't analyze the first # residue in any subchain, so we leave it out of the array. push(@resNUM, $resid) unless grep(/$resid/, @resNUM); if ($atom eq "N") { @N = @next_N; @next_N = ($x, $y, $z); $resrelevant = $resid - 1; $resback = $resid - 2; if ( grep(/\b$resback\b/, @resNUM) ) { $phi = sprintf( "%6.1f", &dihedral(@pre_C, @C, @CA, @N) ); $psi = sprintf( "%6.1f", &dihedral(@next_N, @N, @CA, @C) ); push( @{ $DIHED{$relevant_res} }, $phi, $psi, $previous_res, $residue, $file, $resrelevant, $anomaly, "\n"); } } if ($atom eq "C") { @pre_C = @C; @C = ($x, $y, $z); } if ($atom eq "CA") { @CA = ($x, $y, $z); $previous_res = $relevant_res; $relevant_res = $residue; } } close (FILE); # Now add the information to the various AMINO ACID files. # These files are described by the three-letter $residue names. @KEYS = keys( %{DIHED} ); foreach $residue ( @KEYS ) { open(FD, ">>$TARGETDIR/$residue"); print FD "@{ $DIHED{$residue} }"; close(FD); } # After all the files have been processed, update the MASTER file open(MASTER, ">$TARGETDIR/.masterfile"); print MASTER "@masterlist"; close(MASTER); } ############################# Subroutines ############################# ##################### # dihedral angles # ##################### # input: output: # pre_C, C, CA, N PHI # next_N, N, CA, C PSI sub dihedral { # get coords of the four points my ($x1, $y1, $z1, $x2, $y2, $z2, $x3, $y3, $z3, $x4, $y4, $z4) = @_; # make V1, V2 and the spine my @V1 = &displacement($x4, $y4, $z4, $x1, $y1, $z1); my @V2 = &displacement($x4, $y4, $z4, $x2, $y2, $z2, ); my @S = &unitvector( &displacement($x4, $y4, $z4, $x3, $y3, $z3) ); # make the X basis vector @P1 = &scalarmul( &dotproduct(@V1, @S), @S ); my @X = &unitvector( &displacement(@P1, @V1) ); # make the Y basis vector my @Y = &crossproduct( @S, @X ); # make R2 and get its coordinates with respect to X, Y basis my @P2 = &scalarmul( &dotproduct(@V2, @S), @S ); @R2 = &displacement(@P2, @V2); my $x = &dotproduct( @R2, @X ); my $y = &dotproduct( @R2, @Y ); my $angle = atan2($y, $x) * 180 / $pi; return $angle; } ##################### # Vector operations # ##################### # input: two points # output: displacement vector from first to second sub displacement { my ($x1, $y1, $z1, $x2, $y2, $z2) = @_; @displacement = ($x2 - $x1, $y2 - $y1, $z2 - $z1); } # input: vector # output: unit vector in its direction sub unitvector { my ($x, $y, $z) = @_; my $norm = sqrt($x * $x + $y * $y + $z * $z); @unitvector = ($x/$norm, $y/$norm, $z/$norm); } # input: two vectors # output: their cross product in order given sub crossproduct { my ($x1, $y1, $z1, $x2, $y2, $z2) = @_; @crossproduct = ($y1 * $z2 - $y2 * $z1, $z1 * $x2 - $z2 * $x1, $x1 * $y2 - $x2 * $y1); } # input: two vectors # output: their scalar product sub dotproduct { my ($x1, $y1, $z1, $x2, $y2, $z2) = @_; $dotproduct = $x1 * $x2 + $y1 * $y2 + $z1 * $z2; } # input: scalar, vector # output: vector that is their product sub scalarmul { my $scalar = shift; foreach $i (0..2) { $output[$i] = $scalar * $_[$i]; } return @output; }