GLYLIB  0.3.0b
load_pdb.c
Go to the documentation of this file.
00001 /*Author: Michael Tessier with changes by many others 
00002         including BLFoley, possibly also Spandana Makeneni,
00003         Anita Nivedha, Matthew Tessier */
00004 #include <load_pdb.h>
00005 assembly* load_pdb(char* file_name)
00006 {
00007   char* temp;
00008   WARN=1;SILENT=1;INWC=0;DATE=0;UNCUT=1;UNx=1;UNy=1;UNz=1;
00009   temp = suf; strcpy(temp,".pdb");
00010   temp = sufc; strcpy(temp,"_change.txt"); ACT='0';
00011   DEBUG=-1;LASTRES=-1;LASTOKX=0;LASTOKY=0;LASTOKZ=0;
00012   UNCTOL=0;CRYX=0;CRYY=0;CRYZ=0;LASTX=0;LASTY=0;LASTZ=0;
00013   int ma,ra;
00014   assembly* asmbl;
00015   IN = myfopen(file_name, "r");
00016   char find_LINK='n',find_CONECT='n';
00017   /*
00018   init_struct() scans pdb file for number of lines
00019   allocates memory for line structures initializes 
00020   structures that contain line formats
00021 
00022   This, among other PDB functions, needs to be made less "global".
00023   */
00024   init_struct();
00025   rewind(IN);
00026   /*Read in lines to ln structure */
00027   //printf("Reading in %s...\n",file_name);
00028   for(ma=0;ma<INWC;ma++){
00029   if(DEBUG>=1){printf("made it to here main line loop %d ...\n", ma);} 
00030         rwm_line(ma+1); 
00031         if(strncmp(ln[ma].f[0].c,"CONECT",6)==0) find_CONECT='y';
00032         if(strncmp(ln[ma].f[0].c,"LINK",4)==0) find_LINK='y';
00033         }
00034 
00035   /*Determine the number of molecules in the pdb */
00036   //printf("There are %d molecule(s)\n",howManyMolecules());
00037   asmbl = getAssembly();
00038   set_assembly_molindexes(asmbl);
00039   if(find_CONECT=='y')
00040     { 
00041     set_assembly_atom_molbonds_from_PDB_CONECT(asmbl, ln, file_name, INWC); 
00042 /* 
00043  * dropping this because apparently not working...
00044     for(ma=0;ma<asmbl[0].nm;ma++)
00045         {
00046         for(ra=0;ra<asmbl[0].m[ma][0].nr;ra++)
00047             {
00048             set_residue_atom_nodes_from_bonds(&asmbl[0].m[ma][0].r[ra]);
00049             }
00050         set_molecule_atom_nodes_from_bonds(asmbl[0].m[ma]);
00051 */
00052         /*
00053         The atom-level CONECT cards are more trusted than LINK cards,
00054         so they are used unless there are no CONECT cards.
00055         */
00056 /*
00057  * also this
00058         set_molecule_residue_molbonds(asmbl[0].m[ma]);
00059         set_molecule_residue_nodes_from_bonds(asmbl[0].m[ma]);
00060         }
00061 */
00062     }
00063 /* and this isn't even written
00064   if((find_LINK=='y')&&(find_CONECT=='n'))
00065     { 
00066     set_assembly_residue_molbonds_from_PDB_LINK(asmbl, ln, file_name, INWC); 
00067     }
00068 */
00069   //printf("PDB Information Successfully Read.\n");
00070   free(ln);
00071   return asmbl;
00072 }
00073 
00074 molecule load_pdb_from_slurp(fileslurp in){
00075  int i,hldr;
00076  char* temp;
00077  init_struct_slurp(in);
00078  for(i = 0; i < in.n; i++){     //Assign data to the rwm line
00079   rwm_line_char(in.L[i],i+1);
00080   if(isAtom((ln+i))){           //And determine the smallest residue
00081    temp  = (*(ln+i)).f[8].c; sscanf(temp,"%d",&hldr);
00082   }
00083  }
00084  INWC = in.n;
00085 // printf("%d\n",INWC);
00086  return (*getMolecule());
00087 }
00088 
00089 void set_assembly_atom_molbonds_from_PDB_CONECT(assembly *A, linedef *ln, const char *file_name, int nlines)
00090 {
00091 int ai,bi,li,ncurrent=-1,this_n,that_n,list_loc,na=A[0].na;
00092 char badorder='n';
00093 molindex_set found_a;
00094 int this_ai,alist[na],bonds[na],blist[na],llist[na];
00095 int conect_first=-1,conect_last=-1,tmpi,tmpj;
00096 /*
00097     0.  Check that atoms are all in order.  Complain if not, but try anyway.
00098 */
00099  for(ai=0;ai<A[0].na;ai++){
00100   if(A[0].a[ai][0].n<=ncurrent) { badorder = 'y'; }
00101 /*printf("ncurrent is %d ; n is %d \n",ncurrent,A[0].a[ai][0].n); */
00102 /*if((A[0].a[ai][0].n-ncurrent)!=1) printf("ncurrent (%d) differs from n (%d) by %d\n",ncurrent,A[0].a[ai][0].n,A[0].a[ai][0].n-ncurrent);*/
00103   ncurrent=A[0].a[ai][0].n;
00104   }
00105  if(badorder=='y')
00106   {
00107   printf("\nAtoms serials (numbers) in file %s are not in increasing order.\n",file_name);
00108   printf("This might cause bonding problems.\n");
00109   }
00110 for(ai=0;ai<A[0].na;ai++) 
00111   { 
00112   alist[ai]=-1; 
00113   blist[ai]=-1; 
00114   llist[ai]=-1; 
00115   bonds[ai]=0; 
00116   }
00117 /*
00118     1.  Find numbers for atoms and number of bonds for each.
00119 */ 
00120 ncurrent=0;
00121  for(li=0;li<nlines;li++)
00122   {
00123   if(strcmp(ln[li].f[0].c,"CONECT")==0)
00124    { 
00125    if(conect_first==-1) conect_first=li;
00126    sscanf(ln[li].f[1].c,"%d",&this_n);
00127    list_loc=-1;
00128    for(ai=0;ai<ncurrent;ai++) { if(alist[ai]==this_n) list_loc=ai; }
00129 //printf("this_n is %d ; list_loc is %d \n",this_n,list_loc); 
00130    if(list_loc<0)
00131     { 
00132     list_loc=ncurrent;
00133     ncurrent++;
00134     alist[list_loc]=this_n;
00135     llist[list_loc]=li;
00136     }
00137 //printf(" ---  list_loc is %d \n",list_loc); 
00138    for(bi=2;bi<15;bi++)
00139     {
00140     if(ln[li].f[bi].c==NULL) break;
00141     tmpi=sscanf(ln[li].f[bi].c,"%d",&tmpj); 
00142     if(tmpi<=0) { continue; }
00143 //printf("\t\ttmpi is %d ; tmpj is %d\n",tmpi,tmpj); 
00144     bonds[list_loc]++;
00145     }
00146 //printf("\t\tbonds[%d] is  %d\n",list_loc,bonds[list_loc]); 
00147     conect_last=li;
00148    }
00149   }
00150 /*printf("conect first/last are:  %d   %d  \n",conect_first,conect_last); */
00151 /*
00152     2.  Set atom-level connections.
00153 */ 
00154   for(ai=0;ai<na;ai++) 
00155    {
00156    if(alist[ai]==-1) continue;
00157    for(ncurrent=0;ncurrent<na;ncurrent++) { blist[ncurrent]=-1; }
00158    found_a=find_assembly_top_level_atoms_by_n(A,alist[ai]);
00159 /*printf("found_a.nP is %d and P[0] i-m-r-a is %d-%d-%d-%d n=%d N=%s\n",found_a.nP,\
00160         found_a.P[0].i,found_a.P[0].m,found_a.P[0].r,found_a.P[0].a,\
00161         A[0].m[found_a.P[0].m][0].r[found_a.P[0].r].a[found_a.P[0].a].n,A[0].m[found_a.P[0].m][0].r[found_a.P[0].r].a[found_a.P[0].a].N);
00162 */
00163    if(found_a.nP==0)
00164     {
00165     printf("\nBonding specified in file %s for non-existent atom serial number %d\n",file_name,this_n);
00166     printf("Skipping this one and hoping for better next time.\n");
00167     continue;
00168     }
00169    if(found_a.nP>1)
00170     {
00171     printf("\nBonding specified in file %s for duplicated atom serial number %d\n",file_name,this_n);
00172     printf("Choosing first in list and moving on.\n");
00173     }
00174    this_ai=found_a.P[0].i;
00175    free(found_a.P);
00176    A[0].a[this_ai][0].nmb=bonds[ai];
00177    A[0].a[this_ai][0].mb=(molbond*)calloc(bonds[ai],sizeof(molbond));
00178    list_loc=0;
00179    for(li=conect_first;li<=conect_last;li++)
00180     {
00181     tmpi=sscanf(ln[li].f[1].c,"%d",&tmpj);
00182     if(tmpi<=0) {mywhine("trouble reading ln[li].f[1].c in set_assembly_atom_molbonds_from_PDB_CONECT");} 
00183     if(tmpj==alist[ai])
00184      {
00185      for(bi=2;bi<15;bi++)
00186       {
00187       if(ln[li].f[bi].c==NULL) break;
00188       tmpi=sscanf(ln[li].f[bi].c,"%d",&that_n); 
00189       if(tmpi<=0) { continue; }
00190       /* If there is an entry in this location */
00191       found_a=find_assembly_top_level_atoms_by_n(A,that_n);
00192       if(found_a.nP==0)
00193        {
00194        printf("\nBonding specified in file %s for non-existent atom serial number %d\n",file_name,that_n);
00195        printf("Skipping this one and hoping for better next time.\n");
00196        continue;
00197        }
00198       if(found_a.nP>1)
00199        {
00200         printf("\nBonding specified in file %s for duplicated atom serial number %d\n",file_name,that_n);
00201         printf("Choosing first in list and moving on.\n");
00202         } 
00203        A[0].a[this_ai][0].mb[list_loc].s=A[0].a[this_ai][0].moli;
00204        A[0].a[this_ai][0].mb[list_loc].t=found_a.P[0];
00205        free(found_a.P);
00206        list_loc++;
00207        if(list_loc>A[0].a[this_ai][0].nmb){mywhine("list_loc>A[0].a[this_ai][0].nmb in set_assembly_atom_molbonds_from_PDB_CONECT");}
00208        }
00209      }
00210     }
00211    if(list_loc<A[0].a[this_ai][0].nmb){mywhine("list_loc<A[0].a[this_ai][0].nmb in set_assembly_atom_molbonds_from_PDB_CONECT");}
00212    }
00213 return;
00214 }
00215 
00216 void set_assembly_residue_molbonds_from_PDB_LINK(assembly *asmbl, linedef *ln, const char *file_name, int nlines)
00217 {
00218 mywhine("The function set_assembly_residue_molbonds_from_PDB_LINK has not been written yet.\n");
00219 }
00220 
00221 
00222 fileslurp isolateInputPDB(fileslurp S){
00223  int i,itr;
00224  char* plc;
00225  fileslurp O; O.n = 0; itr = 0;
00226  for(i = 0; i < S.n; i++){
00227   if(strstr(S.L[i],"INPUT-PDBQ: ATOM")!=NULL ||         //These cover the two types of
00228      strstr(S.L[i],"INPUT-PDBQ: HETATM")!=NULL ||       //AD3 style input data
00229      strstr(S.L[i],"INPUT-LIGAND-PDBQT: ATOM")!=NULL || //These cover the two types of
00230      strstr(S.L[i],"INPUT-LIGAND-PDBQT: HETATM")!=NULL) //AD4 style input data
00231    O.n++;
00232  }
00233  O.L = (char**)calloc(O.n,sizeof(char*));
00234  for(i = 0; i < S.n; i++){
00235   if(strstr(S.L[i],"INPUT-PDBQ: ATOM")!=NULL ||
00236      strstr(S.L[i],"INPUT-PDBQ: HETATM")!=NULL ||
00237      strstr(S.L[i],"INPUT-LIGAND-PDBQT: ATOM")!=NULL ||
00238      strstr(S.L[i],"INPUT-LIGAND-PDBQT: HETATM")!=NULL){
00239    if(strstr(S.L[i],"INPUT-PDBQ: ")!=NULL)
00240     plc = S.L[i]+12;    //Based on what starts the line, duplicate the line minus
00241    else                 //The first couple of words
00242     plc = S.L[i]+20;
00243    O.L[itr] = strdup(plc); itr++;
00244   }
00245  }
00246  for(i = 0; i < S.n; i++)
00247   free(S.L[i]);
00248  free(S.L);
00249  return O;
00250 }
00251 
00252 fileslurp isolateDockedPDB(fileslurp S){
00253  int i,itr;
00254  char* plc;
00255  fileslurp O; O.n = 0; itr = 0;
00256  for(i = 0; i < S.n; i++){
00257   if(strstr(S.L[i],"DOCKED: ATOM")!=NULL)
00258    O.n++;
00259  }
00260  O.L = (char**)calloc(O.n,sizeof(char*));
00261  for(i = 0; i < S.n; i++){
00262   if(strstr(S.L[i],"DOCKED: ATOM")!=NULL){
00263    plc = S.L[i]+8;
00264    O.L[itr] = strdup(plc); itr++;
00265   }
00266  }
00267  for(i = 0; i < S.n; i++)
00268   free(S.L[i]);
00269  free(S.L);
00270  return O;
00271 }
00272 
00273 
00274 int howManyMolecules()
00275 {
00276  int i; int counter = 0;
00277  char in_molecule_switch = 'n';
00278  int check_status=0;
00279 /* Adding a little robustness to this function.
00280         20100727, BLFoley.  
00281    In particular, making a check for the possibility that there
00282         are no molecules found.  Also fixing issue due to there
00283         being multiple TER, CONECT, etc., cards in a row. */ 
00284  for(i = 0; i < INWC; i++) {
00285         check_status = isAtom((ln+i)); /* is this an atom? (ATOM or HETATM) */
00286         if(in_molecule_switch == 'n' && check_status == 1){
00287                 in_molecule_switch = 'y'; /* we have entered a molecule */
00288                 counter++;
00289                 }
00290         check_status = endOfMol((ln+i)); /* If a card that usually ends a 
00291                 molecule: TER, LINK, CONECT, END, ENDMDL, MASTER */
00292         if(check_status == 1){ in_molecule_switch = 'n'; }
00293         } 
00294  return counter;
00295 }
00296 
00297 assembly* getAssembly()
00298 {
00299   int molNum = howManyMolecules();
00300   int i = 0;int j = 0;int k = 0;int ka = 0; int la=0;int atom_num;
00301   int init=0, next_mol_line=0, ri=0,natoms=0,resNum,Kref=0;
00302   char in_molecule_switch = 'n';
00303   char is_same_residue = 'y';
00304   double x,y,z;
00305   char atmName[5], atmElem[3], resName[4],IC=' ',cID=' ',*temp; 
00306   assembly* asmbl = (assembly*) calloc (1 , sizeof(assembly));
00307   (*asmbl).nm = molNum;
00308   molecule *curMol=NULL; residue* curRes=NULL; 
00309   atom* curAtm;// atom* atm;
00310   /* made further changes for molecule double pointers in assembly
00311         starting on 20100723.  BLFoley */ 
00312 
00313   (*asmbl).m = (molecule**)calloc(molNum,sizeof(molecule*));//added by MNT on 20080806
00314   for(init=0;init<molNum;init++){ 
00315         (*asmbl).m[init] = (molecule*) calloc (1,sizeof(molecule)); 
00316         (*asmbl).m[init][0].nr=-1; /* initializing as empty/not-seen. */ 
00317         }
00318   (*asmbl).nr = 0; // initializing 
00319   (*asmbl).r = (residue**) calloc(1, sizeof(residue*));
00320   (*asmbl).na = 0; // initializing 
00321   (*asmbl).a = (atom**) calloc(1, sizeof(atom*));
00322   i=0; 
00323   strcpy(atmName,"");
00324   strcpy(resName,"");
00325   strcpy(atmElem,"");
00326 /*printf("First allocate of residues and atoms:\n");*/
00327 /*printf("\t molecule %d, init is %d; residue %d -- nm is %d, A.nr is %d, A.na is %d\n",j,init,k,(*asmbl).nm,(*asmbl).nr,(*asmbl).na);*/
00328 /* 
00329         i = line number
00330         j = molecule number
00331         k = residue number (within the molecule, not absolute)
00332         ka = absolute residue number 
00333         (*curRes).ni = current atom to place (set back to zero when done)
00334         la = absolute atom number 
00335 */
00336 
00337 in_molecule_switch = 'n';
00338 for(i = 0; i < INWC; i++) {
00339         if(j==molNum){break;}
00340         if(in_molecule_switch == 'n'){ /* If we are not in a molecule. */
00341                 while( (i<(INWC)) && (isAtom(ln+i)==0) ){ i++; } /* advance to first ATOM/HETATM line */
00342                 if(i==INWC){break;}
00343                 curMol = &asmbl[0].m[j][0];
00344                 (*curMol).Des=(char*)calloc(2,sizeof(char));
00345                 (*curMol).Des[0]=(*(ln+i)).f[7].c[0]; (*curMol).Des[1]='\0';
00346                 (*curMol).nr = findTotalResidue(i);
00347                 (*curMol).r = (residue*) calloc ((*curMol).nr,sizeof(residue));
00348                 /*Initialize the residue numbers to -1 so the program knows they're not used yet. */
00349                 for(ri=0;ri<(*curMol).nr;ri++){
00350                         initialize_residue(&curMol[0].r[ri]);
00351                         (*curMol).r[ri].n = -1;
00352                         (*curMol).r[ri].ni = 0;
00353                         (*curMol).r[ri].moli.m=j; 
00354                         (*curMol).r[ri].moli.r=ri; 
00355                         (*curMol).r[ri].moli.a=-1;
00356                         }
00357                 /*Get the total atoms in the molecule while we're here. */
00358                 next_mol_line=getResInfo((*curMol).r,i);
00359                 natoms=0;
00360                 for(ri = 0; ri < (*curMol).nr; ri++){
00361                         (*curMol).r[ri].a = (atom*) calloc ((*curMol).r[ri].na,sizeof(atom));
00362                         natoms+=(*curMol).r[ri].na;
00363 //printf("na is %d for residue %d(ka=%d)\n",(*curMol).r[ri].na,ri,ka);
00364                         }
00365                 /* Allocate the top-level atoms and residues for this molecule */
00366                 (*asmbl).nr+=(*curMol).nr;
00367                 (*asmbl).r = (residue**) realloc ((*asmbl).r, (*asmbl).nr*sizeof(residue*));
00368                 (*asmbl).na+=natoms; 
00369                 (*asmbl).a = (atom**) realloc ((*asmbl).a, (*asmbl).na*sizeof(atom*));
00370                 /* set the top-level residues */
00371                 for(ri = 0; ri < (*curMol).nr; ri++) { (*asmbl).r[ka+ri] = &curMol[0].r[ri]; }
00372                 in_molecule_switch = 'y';/* At this point, we should be at an ATOM/HETATM entry. */
00373                 curRes = curMol[0].r;
00374                 curAtm = (*curRes).a;
00375                 ka+=curMol[0].nr;
00376                 } 
00377         if(i==INWC){ /* If we ran out of lines... */
00378                 if((*curMol).nr==-1){ /* If this is molecule does not currently contain residues */
00379                         (*curMol).nr = 0;
00380                         (*curMol).r = NULL;
00381                         (*curMol).N = strdup("EMPTY_MOLECULE");
00382                         if(j!=molNum){printf("Found %d molecules, but ran out of atoms before molecule %d.\n",molNum,j+1);
00383                                 printf("This is probably a very, very bad thing, but we're ignoring it for now.\n");}
00384                         } 
00385                 else if ((*curMol).nr>0){ /* if this molecule already contains residues */
00386                         printf("Got to i=INWC while in a molecule.  Go fix code.\n"); }
00387                 else { printf("Got to i=INWC for an uncoded value of nr.  Go fix code.\n");}
00388                 }
00389         while( (i<INWC) && (in_molecule_switch=='y') ) 
00390                 { /* while we are in a molecule */
00391 /*printf("line %d: >>%s<<\n",i,(*(ln+i)).f[0].c);*/
00392                 if(endOfMol((ln+i)) == 1){
00393                         in_molecule_switch='n';
00394                         if(next_mol_line!=i) {printf("i is %d and next_mol_line is %d ... should match.\n",i,next_mol_line);}
00395                         break;
00396                         }
00397                 else {
00398                         if(isAtom((ln+i)) == 1) {
00399 //printf("\nTOP:  k = %d ; ka = %d ; j = %d ; i = %d\n",k,ka,j,i);
00400                                 /* Figure out which residue the atom goes in */
00401                                 is_same_residue='y';
00402                                 temp  = ln[i].f[8].c; sscanf(temp,"%d",&resNum);
00403                                 temp = ln[i].f[5].c; sscanf(temp,"%s",resName);
00404                                 IC = ln[i].f[9].c[0];
00405                                 cID = ln[i].f[7].c[0];
00406 /*
00407 printf("0. resnum is %d ;    resname is >>%s<<     cID is >>%c<<           IC is >>%c<< \n",resNum,resName,cID,IC);
00408 printf("   *curres.n is %d ; (*curRes).N is >>%s<< (*curRes).cID is >>%s<< (*curRes).IC is >>%s<< \n",(*curRes).n,(*curRes).N,(*curRes).cID,(*curRes).IC);
00409 */
00410                                 if((*curRes).n!=resNum){ is_same_residue='n';}
00411                                 if(strcmp((*curRes).N,resName)!=0){ is_same_residue='n';}
00412                                 if((*curRes).IC[0]!=IC){ is_same_residue='n';}
00413                                 if((*curRes).cID[0]!=cID){ is_same_residue='n';}
00414                                 if(is_same_residue=='n'){ /* try the rest of the residues */
00415                                 Kref=k+1;
00416                                 for(k=Kref;k<(*curMol).nr;k++) {
00417                                         is_same_residue='y';
00418                                         temp  = ln[i].f[8].c; sscanf(temp,"%d",&resNum);
00419                                         temp = ln[i].f[5].c; sscanf(temp,"%s",resName);
00420                                         IC = ln[i].f[9].c[0];
00421                                         cID = ln[i].f[7].c[0];
00422                                         curRes = &curMol[0].r[k];
00423                                         if(curRes[0].na==1) break;
00424 /*
00425 printf("2. resnum is %d ;    resname is >>%s<<     cID is >>%c<<           IC is >>%c<< \n",resNum,resName,cID,IC);
00426 printf("   *curres.n is %d ; (*curRes).N is >>%s<< (*curRes).cID is >>%s<< (*curRes).IC is >>%s<< \n",(*curRes).n,(*curRes).N,(*curRes).cID,(*curRes).IC);
00427 */
00428                                         if((*curRes).n!=resNum){ is_same_residue='n';}
00429                                         if(strcmp((*curRes).N,resName)!=0){ is_same_residue='n';}
00430                                         if((*curRes).IC[0]!=IC){ is_same_residue='n';}
00431                                         if((*curRes).cID[0]!=cID){ is_same_residue='n';}
00432 /*
00433 printf("\t2. Is same residue (N=%s  n=%d IC=%c cID=%c) = %c\n",(*curRes).N,(*curRes).n,(*curRes).IC[0],(*curRes).cID[0],is_same_residue);
00434 */
00435                                         if(is_same_residue=='y') { break; }
00436                                         }}
00437                                 if(is_same_residue=='n'){ /* try the first residues */
00438                                 for(k=0;k<Kref;k++) {
00439                                         is_same_residue='y';
00440                                         temp  = ln[i].f[8].c; sscanf(temp,"%d",&resNum);
00441                                         temp = ln[i].f[5].c; sscanf(temp,"%s",resName);
00442                                         IC = ln[i].f[9].c[0];
00443                                         cID = ln[i].f[7].c[0];
00444                                         curRes = &(*curMol).r[k];
00445                                         if((*curRes).n!=resNum){ is_same_residue='n';}
00446                                         if(strcmp((*curRes).N,resName)!=0){ is_same_residue='n';}
00447                                         if((*curRes).IC[0]!=IC){ is_same_residue='n';}
00448                                         if((*curRes).cID[0]!=cID){ is_same_residue='n';}
00449 /*
00450 printf("\t3. Is same residue (N=%s  n=%d IC=%c cID=%c) = %c\n",(*curRes).N,(*curRes).n,(*curRes).IC[0],(*curRes).cID[0],is_same_residue);
00451 */
00452                                         if(is_same_residue=='y') { break; }
00453                                         }}
00454                                 if(is_same_residue=='n'){mywhine("Can't find residue in load_pdb's getAssembly.");}
00455 //printf("ni is %d and na is %d \n",(*curRes).ni,(*curRes).na);
00456                                 if((*curRes).ni==(*curRes).na){mywhine("(*curRes).ni==(*curRes).na in getAssembly\n");}
00457                                 curAtm = &(*curRes).a[(*curRes).ni];
00458                                 asmbl[0].a[la] = &(*curRes).a[(*curRes).ni];
00459                                 //Getting the atom # //
00460                                 sscanf( (*(ln+i)).f[1].c ,"%d",&atom_num);
00461                                 //Getting the atom name //temp  = (*(ln+i)).f[3].c;
00462                                 if(((*(ln+i)).f[3].c==NULL)||((*(ln+i)).f[3].c[0]=='\0')){strcpy(atmName,"    ");}
00463                                 else{sscanf( (*(ln+i)).f[3].c ,"%s",atmName);}
00464                                 if(((*(ln+i)).f[17].c==NULL)||((*(ln+i)).f[17].c[0]=='\0')){strcpy(atmElem,"  ");}
00465                                 else{sscanf( (*(ln+i)).f[17].c ,"%s",atmElem); }
00466 /*
00467 printf("\t atmName is >>>%s<<< atmElem is >>>%s<<<\n",atmName,atmElem);
00468 */ 
00469                                 //Getting the X, Y and Z coordinates
00470                                 sscanf( (*(ln+i)).f[11].c ,"%lf",&x);
00471                                 sscanf( (*(ln+i)).f[12].c ,"%lf",&y);
00472                                 sscanf( (*(ln+i)).f[13].c ,"%lf",&z);   
00473                                 (*curAtm).n = atom_num;//set the atom #
00474                                 (*curAtm).N = strdup(atmName);//set the atom name
00475                                 (*curAtm).E = strdup(atmElem);//set the atom element
00476                                 // Record the chain identifier at the atom level
00477                                 if((*(ln+i)).f[7].c!=NULL){
00478                                         (*curAtm).cID=(char*)calloc(2,sizeof(char));
00479                                         (*curAtm).cID[0] = (*(ln+i)).f[7].c[0];
00480                                         (*curAtm).cID[1] = '\0';
00481                                         }
00482                                 else{strcpy((*curAtm).cID," ");}
00483 /*
00484 printf("\t the (*curAtm).cID is >>>%s<<< E is >>>%s<<<  the atom serial is %d\n",(*curAtm).cID,(*curAtm).E,(*curAtm).n); 
00485 */
00486                                 //set the atom's coordinates
00487                                 (*curAtm).x.i = x; (*curAtm).x.j = y; (*curAtm).x.k = z;
00488                                 (*curAtm).moli.m=j; (*curAtm).moli.r=k; (*curAtm).moli.a=(*curRes).ni;
00489                                 (*curRes).ni++;
00490                                 if(((*curRes).na==1)&&(k<(*curMol).nr)) { curAtm = (*curRes).a; }
00491                                 if((*curRes).ni==(*curRes).na) (*curRes).ni=0;
00492                                 la++;
00493                                 }
00494                         }
00495                 i++;
00496                 } /* close while we are in a molecule */
00497         j++;
00498         }
00499 /*printf("ka=%d ; (*asmbl).nr=%d ; la=%d ; (*asmbl).na=%d\n",ka,(*asmbl).nr,la,(*asmbl).na);*/
00500 if((*asmbl).na != la){mywhine("(*asmbl).na != la in load_pdb's getAssembly");}
00501 if((*asmbl).nr != ka){mywhine("(*asmbl).nr != ka in load_pdb's getAssembly");}
00502 return asmbl;
00503 }
00504 
00505 int findTotalResidue(int start)
00506 {
00507   int count=0;int i = start;int thisRes;//int curRes = 0;
00508   int inList,j;
00509   char *temp, icode=' ',chainid=' ';
00510   int  *known = (int*)calloc(1,sizeof(int));
00511   char *knownI = (char*)calloc(1,sizeof(char));
00512   char *knownID = (char*)calloc(1,sizeof(char));
00513 
00514 //printf(" i is %d, and INWC is %d and ln[i].a-b are %d-%d\n",i,INWC,ln[i].a,ln[i].b);
00515 //printf("isAtomln+i is %d\n",isAtom(ln+i));
00516 //printf("endof Mol is %d\n",endOfMol(ln+i));  
00517 while( (i!=INWC) && (isAtom((ln+i))==0) ) { i++; }
00518 
00519   while( (i != INWC) && (endOfMol((ln+i)) == 0) )
00520   {
00521     if(isAtom((ln+i)) == 1)
00522     {
00523      temp  = (*(ln+i)).f[8].c;
00524      icode = ln[i].f[9].c[0];  // we know this is only one character
00525      chainid = ln[i].f[7].c[0];  // we know this is only one character
00526      sscanf(temp,"%d",&thisRes);
00527      inList = 0;
00528      //The New Way
00529      for(j = 0; j < count; j++){
00530 //printf("thisRes=%d icode=%c chainid=%c\n",thisRes,icode,chainid);
00531 //printf("known=%d knownI=%c knownID=%c  j=%d\n",known[j],knownI[j],knownID[j],j);
00532       if((known[j]==thisRes)&&(knownI[j]==icode)&&(knownID[j]==chainid)){inList=1; break;}
00533 //printf("still here\n");
00534      }
00535      if(!inList){
00536       count++;
00537       known = (int*)realloc(known,count*sizeof(int));
00538       known[count-1] = thisRes;
00539       knownI = (char*)realloc(knownI,count*sizeof(char));
00540       knownI[count-1] = icode;
00541       knownID = (char*)realloc(knownID,count*sizeof(char));
00542       knownID[count-1] = chainid;
00543 //printf("known=%d knownI=%c knownID=%c  count-1=%d\n",known[count-1],knownI[count-1],knownID[count-1],count-1);
00544      }
00545     }
00546    i++;
00547   }
00548 free(known);
00549 free(knownI);
00550 free(knownID);
00551 //printf("count is %d\n",count);
00552   return count;
00553 }
00554 
00555 int endOfMol(linedef* line)
00556 {
00557 /* Originally, this checked for these cards:
00558         CONECT, LINK, TER.  
00559    Adding ENDMDL, MASTER and END.  20100727, BLFoley */
00560  switch((*line).a){ 
00561 /*  if(((*line).a == 4 && (*line).b == 2)||
00562      ((*line).a == 2 && ((*line).b == 9||(*line).b == 3))) */
00563         case 0:
00564                 switch ((*line).b) {
00565                         case 1: /* END */
00566                         case 3: /* MASTER */
00567                                 return 1;
00568                         default: break;
00569                         }
00570         case 2:
00571                 switch ((*line).b) {
00572                         case 3:  /* CONECT */
00573                         case 9: /* LINK */
00574                                 return 1;
00575                         default: break;
00576                         }
00577         case 4:
00578                 switch ((*line).b) {
00579                         case 0: /* ENDMDL */
00580                         case 2: /* TER */
00581                                 return 1;
00582                         default: break; 
00583                         }
00584         default: return 0;
00585         }
00586 }
00587 
00588 int isAtom(linedef* line)
00589 {
00590   if(((*line).a == 2 || (*line).a == 3) && (*line).b == 1)
00591     return 1;
00592   else
00593     return 0;
00594 }
00595 int getResInfo(residue* res, int start)
00596 {
00597 int resNum,j;int i = start;
00598 int count = 0, current_status=0;
00599 residue* curRes = (res+0);
00600 char name [10];char* temp;char* resName = name;
00601 char IC=' ', cID=' ',is_same='y';
00602 
00603 //printf("\n**1.  i is %d; atom name is %s; atom number is %s\n",i,(*(ln+i)).f[3].c,(*(ln+i)).f[1].c);
00604 
00605 while( (current_status == 0) && (i<INWC) ){ 
00606         current_status = isAtom(ln+i); /* find out if we have an atom line  */
00607         i++; } /* if not, check the next line */
00608 //printf("i is %d  and INWC is %d \n",i,INWC);
00609 //printf("current ststus is %d\n",current_status);
00610 if(i==INWC){ /* if we got to the end with no atoms */
00611 //if current status is that we have an atom and i==1 then break
00612         if(current_status==1 && i==1){
00613         
00614         }
00615         else{
00616         (*curRes).N = strdup("EMPTY_RESIDUE");
00617         (*curRes).n = -100;
00618         (*curRes).na = 0;
00619         return INWC;
00620         }
00621         } 
00622 i--; /* still here? decrement the counter to undo the while loop above */
00623 
00624 while( (i != INWC)  && (endOfMol((ln+i)) == 0) ){ 
00625         if(isAtom(ln+i) == 1){
00626                 temp  = ln[i].f[8].c; sscanf(temp,"%d",&resNum);
00627                 temp = ln[i].f[5].c; sscanf(temp,"%s",resName);
00628                 IC = ln[i].f[9].c[0];
00629                 cID = ln[i].f[7].c[0];
00630                 is_same='y';
00631                 if(resNum != (*curRes).n) is_same='n';
00632                 if((curRes[0].IC==NULL)||(IC!=curRes[0].IC[0])) is_same='n';
00633                 if((curRes[0].cID==NULL)||(cID!=curRes[0].cID[0])) is_same='n';
00634 /*printf("resNum is %d and (*curRes).n is %d ; resName is >%s<<\n",resNum,(*curRes).n,resName);*/
00635 /*printf("**2.  i is %d; atom name is %s; atom number is %s\n",i,(*(ln+i)).f[3].c,(*(ln+i)).f[1].c);*/
00636                 //If this is a different residue than the one in the previous line
00637                 /*if((resNum != (*curRes).n)||(curRes[0].IC==NULL)||(IC!=curRes[0].IC[0]))*/
00638                 if(is_same=='n')
00639                         {
00640 /*printf("This residue is different from the previous.  Checking for broken residue.\n");*/
00641                         curRes = NULL;
00642                         for(j = 0; j < count; j++){//Cycle through all of the found residues
00643                                 if(((*(res+j)).n==resNum)&&((*(res+j)).IC[0]==IC)){ /* if already seen... */
00644                                         curRes = (res+j); break;} /* reset residue pointer */
00645                                 } 
00646                         /* If it is not a known residue, assign it a new slot */
00647                         if(curRes == NULL || curRes == 0x0){ curRes = (res+count); count++; }
00648                         }
00649                 if((*curRes).n < 0) { /* If this residue has not been found yet */
00650 /*printf("This residue has not been found yet.  Assigning values\n");*/
00651                         if(resName!=NULL){(*curRes).N = strdup(resName);}/* Set the residue name */
00652                         else{(*curRes).N = strdup("   ");}
00653                         curRes[0].IC=(char*)calloc(2, sizeof(char));
00654                         (*curRes).IC[0] = IC;   /* Set the IC */
00655                         (*curRes).IC[1] = '\0'; /* Set the IC */
00656                         curRes[0].cID=(char*)calloc(2, sizeof(char));
00657                         (*curRes).cID[0] = cID; /* Set the cID */
00658                         (*curRes).cID[1] = '\0';        /* Set the cID */
00659                         (*curRes).n = resNum;           /* Set the actual residue number */
00660                         (*curRes).na = 0;                       /* ...and make sure the total # of atoms is 0 */
00661                         } /* Otherwise we can assume all of these have already been set */
00662                         
00663 /*printf("  -->  resNum is %d and (*curRes).n is %d, (*curRes).na is %d \n",resNum,(*curRes).n,(*curRes).na);*/
00664 /*printf("**3.  i is %d; atom name is %s; atom number is %s\n",i,(*(ln+i)).f[3].c,(*(ln+i)).f[1].c);*/
00665                 (*curRes).na++;
00666 /*printf(" --> (curRes). n=%d, na=%d, N=%s\n",(*curRes).n,(*curRes).na,(*curRes).N);*/
00667 /*printf("**4.  i is %d; atom name is %s; atom number is %s\n",i,(*(ln+i)).f[3].c,(*(ln+i)).f[1].c);*/
00668                 }//End if an atom 
00669         i++; //..and then incriment to the next line
00670         } //End loop through file
00671 return i;
00672 }
00673 
00674 molecule* getMolecule(void){
00675   int molNum = 1;
00676   int i = 0;//int j = 0;int k = 0;
00677   int ntX,j,rI,next_mol_line;
00678   double dblY;
00679   char* temp;char name[6] = ""; //char* atmName = name;
00680   char icode=' ',chainid=' ';
00681   molecule* mol = (molecule*)calloc(molNum,sizeof(molecule));
00682   residue* curRes;// residue* res;
00683   atom* curAtm;// atom* atm;
00684   //Get all the information for the first molecule
00685   (*mol).nr = findTotalResidue(i);
00686   //printf("the number of residues are %d\n",(*mol).nr);
00687   (*mol).r = (residue*)calloc((*mol).nr,sizeof(residue));
00688   //Initialize the residue numbers to -1 so the program knows they're not used yet
00689   for(j = 0; j < (*mol).nr; j++){(*mol).r[j].n = -1;}
00690   next_mol_line=getResInfo((*mol).r,i);
00691 //printf("next_mol_line is %d\n",next_mol_line);
00692   int l[(*mol).nr];
00693   for(i = 0; i < (*mol).nr; i++){
00694         //printf("the number of atoms are %d\n",(*mol).r[i].na);
00695         (*mol).r[i].a = (atom*)calloc((*mol).r[i].na,sizeof(atom));
00696    l[i] = 0;
00697   }
00698   //Set the current residue, and residue index to 0
00699   rI = 0;
00700   curRes = ((*mol).r+0);
00701   for(i = 0; i < INWC; i++)
00702   {
00703     if(isAtom((ln+i)) == 1)
00704     {
00705 //printf("in loop for isAtom\n");
00706       temp  = (*(ln+i)).f[8].c; sscanf(temp,"%d",&ntX);
00707       icode  = (*(ln+i)).f[9].c[0]; 
00708       chainid  = (*(ln+i)).f[7].c[0]; 
00709       if(((*curRes).n != ntX)||(icode!=curRes[0].IC[0])||(chainid!=curRes[0].cID[0]))//If the residue numbers do not match
00710       {//Seach through all of the resiudes until one is found that matches
00711        for(j = 0; j < (*mol).nr; j++){//then assign it to curRes
00712         if(mol[0].r[j].n == ntX){rI = j; break;}
00713        }
00714        curRes = ((*mol).r+rI);
00715       }
00716       curAtm = ((*curRes).a+l[rI]);
00717 /*printf("\t still in loop for isAtom\n"); */
00718       //Getting the record name
00719       temp = (*(ln+i)).f[0].c; sscanf(temp,"%s",name);
00720       if(name!=NULL){(*curAtm).D = strdup(name);}
00721       else{(*curAtm).D = strdup("");}
00722       //Getting the atom #
00723       temp  = (*(ln+i)).f[1].c; sscanf(temp,"%d",&ntX);
00724       (*curAtm).n = ntX;
00725       //Getting the atom name
00726       temp  = (*(ln+i)).f[3].c; sscanf(temp,"%s",name);
00727       if(name!=NULL){(*curAtm).N = strdup(name);}
00728       else{(*curAtm).D = strdup("    ");}
00729 
00730       //Getting the chain identifier
00731       if((*(ln+i)).f[7].c!=NULL){
00732         (*curAtm).cID = (char*)calloc(2,sizeof(char));
00733         (*curAtm).cID[0] = (*(ln+i)).f[7].c[0];
00734         (*curAtm).cID[1] = '\0';
00735         }
00736       else{(*curAtm).cID = strdup(" ");}
00737 /*printf("\t the (*curAtm).cID is >>>%s<<<\n",(*curAtm).cID); */
00738 
00739       //Getting the X coordinate
00740       temp  = (*(ln+i)).f[11].c; sscanf(temp,"%lf",&dblY); (*curAtm).x.i = dblY;
00741       //Getting the Y coordinate
00742       temp  = (*(ln+i)).f[12].c; sscanf(temp,"%lf",&dblY); (*curAtm).x.j = dblY;
00743       //Getting the Z coordinate
00744       temp  = (*(ln+i)).f[13].c; sscanf(temp,"%lf",&dblY); (*curAtm).x.k = dblY;   
00745 
00746       l[rI]++;
00747 //printf("the current atom's xyz are: %f  %f  %f \n",(*curAtm).x.i,(*curAtm).x.j,(*curAtm).x.k);
00748     }
00749   }
00750 
00751 //printf("the number of atoms in the molecule's first residue is %d\n",(*mol).r[0].na);
00752 
00753  return mol;
00754 }
00755 
 All Classes Files Functions Variables Typedefs Defines