GLYLIB
0.3.0b
|
00001 /*Author: Michael Tessier with changes by many others 00002 including BLFoley, possibly also Spandana Makeneni, 00003 Anita Nivedha, Matthew Tessier */ 00004 #include <load_pdb.h> 00005 assembly* load_pdb(char* file_name) 00006 { 00007 char* temp; 00008 WARN=1;SILENT=1;INWC=0;DATE=0;UNCUT=1;UNx=1;UNy=1;UNz=1; 00009 temp = suf; strcpy(temp,".pdb"); 00010 temp = sufc; strcpy(temp,"_change.txt"); ACT='0'; 00011 DEBUG=-1;LASTRES=-1;LASTOKX=0;LASTOKY=0;LASTOKZ=0; 00012 UNCTOL=0;CRYX=0;CRYY=0;CRYZ=0;LASTX=0;LASTY=0;LASTZ=0; 00013 int ma,ra; 00014 assembly* asmbl; 00015 IN = myfopen(file_name, "r"); 00016 char find_LINK='n',find_CONECT='n'; 00017 /* 00018 init_struct() scans pdb file for number of lines 00019 allocates memory for line structures initializes 00020 structures that contain line formats 00021 00022 This, among other PDB functions, needs to be made less "global". 00023 */ 00024 init_struct(); 00025 rewind(IN); 00026 /*Read in lines to ln structure */ 00027 //printf("Reading in %s...\n",file_name); 00028 for(ma=0;ma<INWC;ma++){ 00029 if(DEBUG>=1){printf("made it to here main line loop %d ...\n", ma);} 00030 rwm_line(ma+1); 00031 if(strncmp(ln[ma].f[0].c,"CONECT",6)==0) find_CONECT='y'; 00032 if(strncmp(ln[ma].f[0].c,"LINK",4)==0) find_LINK='y'; 00033 } 00034 00035 /*Determine the number of molecules in the pdb */ 00036 //printf("There are %d molecule(s)\n",howManyMolecules()); 00037 asmbl = getAssembly(); 00038 set_assembly_molindexes(asmbl); 00039 if(find_CONECT=='y') 00040 { 00041 set_assembly_atom_molbonds_from_PDB_CONECT(asmbl, ln, file_name, INWC); 00042 /* 00043 * dropping this because apparently not working... 00044 for(ma=0;ma<asmbl[0].nm;ma++) 00045 { 00046 for(ra=0;ra<asmbl[0].m[ma][0].nr;ra++) 00047 { 00048 set_residue_atom_nodes_from_bonds(&asmbl[0].m[ma][0].r[ra]); 00049 } 00050 set_molecule_atom_nodes_from_bonds(asmbl[0].m[ma]); 00051 */ 00052 /* 00053 The atom-level CONECT cards are more trusted than LINK cards, 00054 so they are used unless there are no CONECT cards. 00055 */ 00056 /* 00057 * also this 00058 set_molecule_residue_molbonds(asmbl[0].m[ma]); 00059 set_molecule_residue_nodes_from_bonds(asmbl[0].m[ma]); 00060 } 00061 */ 00062 } 00063 /* and this isn't even written 00064 if((find_LINK=='y')&&(find_CONECT=='n')) 00065 { 00066 set_assembly_residue_molbonds_from_PDB_LINK(asmbl, ln, file_name, INWC); 00067 } 00068 */ 00069 //printf("PDB Information Successfully Read.\n"); 00070 free(ln); 00071 return asmbl; 00072 } 00073 00074 molecule load_pdb_from_slurp(fileslurp in){ 00075 int i,hldr; 00076 char* temp; 00077 init_struct_slurp(in); 00078 for(i = 0; i < in.n; i++){ //Assign data to the rwm line 00079 rwm_line_char(in.L[i],i+1); 00080 if(isAtom((ln+i))){ //And determine the smallest residue 00081 temp = (*(ln+i)).f[8].c; sscanf(temp,"%d",&hldr); 00082 } 00083 } 00084 INWC = in.n; 00085 // printf("%d\n",INWC); 00086 return (*getMolecule()); 00087 } 00088 00089 void set_assembly_atom_molbonds_from_PDB_CONECT(assembly *A, linedef *ln, const char *file_name, int nlines) 00090 { 00091 int ai,bi,li,ncurrent=-1,this_n,that_n,list_loc,na=A[0].na; 00092 char badorder='n'; 00093 molindex_set found_a; 00094 int this_ai,alist[na],bonds[na],blist[na],llist[na]; 00095 int conect_first=-1,conect_last=-1,tmpi,tmpj; 00096 /* 00097 0. Check that atoms are all in order. Complain if not, but try anyway. 00098 */ 00099 for(ai=0;ai<A[0].na;ai++){ 00100 if(A[0].a[ai][0].n<=ncurrent) { badorder = 'y'; } 00101 /*printf("ncurrent is %d ; n is %d \n",ncurrent,A[0].a[ai][0].n); */ 00102 /*if((A[0].a[ai][0].n-ncurrent)!=1) printf("ncurrent (%d) differs from n (%d) by %d\n",ncurrent,A[0].a[ai][0].n,A[0].a[ai][0].n-ncurrent);*/ 00103 ncurrent=A[0].a[ai][0].n; 00104 } 00105 if(badorder=='y') 00106 { 00107 printf("\nAtoms serials (numbers) in file %s are not in increasing order.\n",file_name); 00108 printf("This might cause bonding problems.\n"); 00109 } 00110 for(ai=0;ai<A[0].na;ai++) 00111 { 00112 alist[ai]=-1; 00113 blist[ai]=-1; 00114 llist[ai]=-1; 00115 bonds[ai]=0; 00116 } 00117 /* 00118 1. Find numbers for atoms and number of bonds for each. 00119 */ 00120 ncurrent=0; 00121 for(li=0;li<nlines;li++) 00122 { 00123 if(strcmp(ln[li].f[0].c,"CONECT")==0) 00124 { 00125 if(conect_first==-1) conect_first=li; 00126 sscanf(ln[li].f[1].c,"%d",&this_n); 00127 list_loc=-1; 00128 for(ai=0;ai<ncurrent;ai++) { if(alist[ai]==this_n) list_loc=ai; } 00129 //printf("this_n is %d ; list_loc is %d \n",this_n,list_loc); 00130 if(list_loc<0) 00131 { 00132 list_loc=ncurrent; 00133 ncurrent++; 00134 alist[list_loc]=this_n; 00135 llist[list_loc]=li; 00136 } 00137 //printf(" --- list_loc is %d \n",list_loc); 00138 for(bi=2;bi<15;bi++) 00139 { 00140 if(ln[li].f[bi].c==NULL) break; 00141 tmpi=sscanf(ln[li].f[bi].c,"%d",&tmpj); 00142 if(tmpi<=0) { continue; } 00143 //printf("\t\ttmpi is %d ; tmpj is %d\n",tmpi,tmpj); 00144 bonds[list_loc]++; 00145 } 00146 //printf("\t\tbonds[%d] is %d\n",list_loc,bonds[list_loc]); 00147 conect_last=li; 00148 } 00149 } 00150 /*printf("conect first/last are: %d %d \n",conect_first,conect_last); */ 00151 /* 00152 2. Set atom-level connections. 00153 */ 00154 for(ai=0;ai<na;ai++) 00155 { 00156 if(alist[ai]==-1) continue; 00157 for(ncurrent=0;ncurrent<na;ncurrent++) { blist[ncurrent]=-1; } 00158 found_a=find_assembly_top_level_atoms_by_n(A,alist[ai]); 00159 /*printf("found_a.nP is %d and P[0] i-m-r-a is %d-%d-%d-%d n=%d N=%s\n",found_a.nP,\ 00160 found_a.P[0].i,found_a.P[0].m,found_a.P[0].r,found_a.P[0].a,\ 00161 A[0].m[found_a.P[0].m][0].r[found_a.P[0].r].a[found_a.P[0].a].n,A[0].m[found_a.P[0].m][0].r[found_a.P[0].r].a[found_a.P[0].a].N); 00162 */ 00163 if(found_a.nP==0) 00164 { 00165 printf("\nBonding specified in file %s for non-existent atom serial number %d\n",file_name,this_n); 00166 printf("Skipping this one and hoping for better next time.\n"); 00167 continue; 00168 } 00169 if(found_a.nP>1) 00170 { 00171 printf("\nBonding specified in file %s for duplicated atom serial number %d\n",file_name,this_n); 00172 printf("Choosing first in list and moving on.\n"); 00173 } 00174 this_ai=found_a.P[0].i; 00175 free(found_a.P); 00176 A[0].a[this_ai][0].nmb=bonds[ai]; 00177 A[0].a[this_ai][0].mb=(molbond*)calloc(bonds[ai],sizeof(molbond)); 00178 list_loc=0; 00179 for(li=conect_first;li<=conect_last;li++) 00180 { 00181 tmpi=sscanf(ln[li].f[1].c,"%d",&tmpj); 00182 if(tmpi<=0) {mywhine("trouble reading ln[li].f[1].c in set_assembly_atom_molbonds_from_PDB_CONECT");} 00183 if(tmpj==alist[ai]) 00184 { 00185 for(bi=2;bi<15;bi++) 00186 { 00187 if(ln[li].f[bi].c==NULL) break; 00188 tmpi=sscanf(ln[li].f[bi].c,"%d",&that_n); 00189 if(tmpi<=0) { continue; } 00190 /* If there is an entry in this location */ 00191 found_a=find_assembly_top_level_atoms_by_n(A,that_n); 00192 if(found_a.nP==0) 00193 { 00194 printf("\nBonding specified in file %s for non-existent atom serial number %d\n",file_name,that_n); 00195 printf("Skipping this one and hoping for better next time.\n"); 00196 continue; 00197 } 00198 if(found_a.nP>1) 00199 { 00200 printf("\nBonding specified in file %s for duplicated atom serial number %d\n",file_name,that_n); 00201 printf("Choosing first in list and moving on.\n"); 00202 } 00203 A[0].a[this_ai][0].mb[list_loc].s=A[0].a[this_ai][0].moli; 00204 A[0].a[this_ai][0].mb[list_loc].t=found_a.P[0]; 00205 free(found_a.P); 00206 list_loc++; 00207 if(list_loc>A[0].a[this_ai][0].nmb){mywhine("list_loc>A[0].a[this_ai][0].nmb in set_assembly_atom_molbonds_from_PDB_CONECT");} 00208 } 00209 } 00210 } 00211 if(list_loc<A[0].a[this_ai][0].nmb){mywhine("list_loc<A[0].a[this_ai][0].nmb in set_assembly_atom_molbonds_from_PDB_CONECT");} 00212 } 00213 return; 00214 } 00215 00216 void set_assembly_residue_molbonds_from_PDB_LINK(assembly *asmbl, linedef *ln, const char *file_name, int nlines) 00217 { 00218 mywhine("The function set_assembly_residue_molbonds_from_PDB_LINK has not been written yet.\n"); 00219 } 00220 00221 00222 fileslurp isolateInputPDB(fileslurp S){ 00223 int i,itr; 00224 char* plc; 00225 fileslurp O; O.n = 0; itr = 0; 00226 for(i = 0; i < S.n; i++){ 00227 if(strstr(S.L[i],"INPUT-PDBQ: ATOM")!=NULL || //These cover the two types of 00228 strstr(S.L[i],"INPUT-PDBQ: HETATM")!=NULL || //AD3 style input data 00229 strstr(S.L[i],"INPUT-LIGAND-PDBQT: ATOM")!=NULL || //These cover the two types of 00230 strstr(S.L[i],"INPUT-LIGAND-PDBQT: HETATM")!=NULL) //AD4 style input data 00231 O.n++; 00232 } 00233 O.L = (char**)calloc(O.n,sizeof(char*)); 00234 for(i = 0; i < S.n; i++){ 00235 if(strstr(S.L[i],"INPUT-PDBQ: ATOM")!=NULL || 00236 strstr(S.L[i],"INPUT-PDBQ: HETATM")!=NULL || 00237 strstr(S.L[i],"INPUT-LIGAND-PDBQT: ATOM")!=NULL || 00238 strstr(S.L[i],"INPUT-LIGAND-PDBQT: HETATM")!=NULL){ 00239 if(strstr(S.L[i],"INPUT-PDBQ: ")!=NULL) 00240 plc = S.L[i]+12; //Based on what starts the line, duplicate the line minus 00241 else //The first couple of words 00242 plc = S.L[i]+20; 00243 O.L[itr] = strdup(plc); itr++; 00244 } 00245 } 00246 for(i = 0; i < S.n; i++) 00247 free(S.L[i]); 00248 free(S.L); 00249 return O; 00250 } 00251 00252 fileslurp isolateDockedPDB(fileslurp S){ 00253 int i,itr; 00254 char* plc; 00255 fileslurp O; O.n = 0; itr = 0; 00256 for(i = 0; i < S.n; i++){ 00257 if(strstr(S.L[i],"DOCKED: ATOM")!=NULL) 00258 O.n++; 00259 } 00260 O.L = (char**)calloc(O.n,sizeof(char*)); 00261 for(i = 0; i < S.n; i++){ 00262 if(strstr(S.L[i],"DOCKED: ATOM")!=NULL){ 00263 plc = S.L[i]+8; 00264 O.L[itr] = strdup(plc); itr++; 00265 } 00266 } 00267 for(i = 0; i < S.n; i++) 00268 free(S.L[i]); 00269 free(S.L); 00270 return O; 00271 } 00272 00273 00274 int howManyMolecules() 00275 { 00276 int i; int counter = 0; 00277 char in_molecule_switch = 'n'; 00278 int check_status=0; 00279 /* Adding a little robustness to this function. 00280 20100727, BLFoley. 00281 In particular, making a check for the possibility that there 00282 are no molecules found. Also fixing issue due to there 00283 being multiple TER, CONECT, etc., cards in a row. */ 00284 for(i = 0; i < INWC; i++) { 00285 check_status = isAtom((ln+i)); /* is this an atom? (ATOM or HETATM) */ 00286 if(in_molecule_switch == 'n' && check_status == 1){ 00287 in_molecule_switch = 'y'; /* we have entered a molecule */ 00288 counter++; 00289 } 00290 check_status = endOfMol((ln+i)); /* If a card that usually ends a 00291 molecule: TER, LINK, CONECT, END, ENDMDL, MASTER */ 00292 if(check_status == 1){ in_molecule_switch = 'n'; } 00293 } 00294 return counter; 00295 } 00296 00297 assembly* getAssembly() 00298 { 00299 int molNum = howManyMolecules(); 00300 int i = 0;int j = 0;int k = 0;int ka = 0; int la=0;int atom_num; 00301 int init=0, next_mol_line=0, ri=0,natoms=0,resNum,Kref=0; 00302 char in_molecule_switch = 'n'; 00303 char is_same_residue = 'y'; 00304 double x,y,z; 00305 char atmName[5], atmElem[3], resName[4],IC=' ',cID=' ',*temp; 00306 assembly* asmbl = (assembly*) calloc (1 , sizeof(assembly)); 00307 (*asmbl).nm = molNum; 00308 molecule *curMol=NULL; residue* curRes=NULL; 00309 atom* curAtm;// atom* atm; 00310 /* made further changes for molecule double pointers in assembly 00311 starting on 20100723. BLFoley */ 00312 00313 (*asmbl).m = (molecule**)calloc(molNum,sizeof(molecule*));//added by MNT on 20080806 00314 for(init=0;init<molNum;init++){ 00315 (*asmbl).m[init] = (molecule*) calloc (1,sizeof(molecule)); 00316 (*asmbl).m[init][0].nr=-1; /* initializing as empty/not-seen. */ 00317 } 00318 (*asmbl).nr = 0; // initializing 00319 (*asmbl).r = (residue**) calloc(1, sizeof(residue*)); 00320 (*asmbl).na = 0; // initializing 00321 (*asmbl).a = (atom**) calloc(1, sizeof(atom*)); 00322 i=0; 00323 strcpy(atmName,""); 00324 strcpy(resName,""); 00325 strcpy(atmElem,""); 00326 /*printf("First allocate of residues and atoms:\n");*/ 00327 /*printf("\t molecule %d, init is %d; residue %d -- nm is %d, A.nr is %d, A.na is %d\n",j,init,k,(*asmbl).nm,(*asmbl).nr,(*asmbl).na);*/ 00328 /* 00329 i = line number 00330 j = molecule number 00331 k = residue number (within the molecule, not absolute) 00332 ka = absolute residue number 00333 (*curRes).ni = current atom to place (set back to zero when done) 00334 la = absolute atom number 00335 */ 00336 00337 in_molecule_switch = 'n'; 00338 for(i = 0; i < INWC; i++) { 00339 if(j==molNum){break;} 00340 if(in_molecule_switch == 'n'){ /* If we are not in a molecule. */ 00341 while( (i<(INWC)) && (isAtom(ln+i)==0) ){ i++; } /* advance to first ATOM/HETATM line */ 00342 if(i==INWC){break;} 00343 curMol = &asmbl[0].m[j][0]; 00344 (*curMol).Des=(char*)calloc(2,sizeof(char)); 00345 (*curMol).Des[0]=(*(ln+i)).f[7].c[0]; (*curMol).Des[1]='\0'; 00346 (*curMol).nr = findTotalResidue(i); 00347 (*curMol).r = (residue*) calloc ((*curMol).nr,sizeof(residue)); 00348 /*Initialize the residue numbers to -1 so the program knows they're not used yet. */ 00349 for(ri=0;ri<(*curMol).nr;ri++){ 00350 initialize_residue(&curMol[0].r[ri]); 00351 (*curMol).r[ri].n = -1; 00352 (*curMol).r[ri].ni = 0; 00353 (*curMol).r[ri].moli.m=j; 00354 (*curMol).r[ri].moli.r=ri; 00355 (*curMol).r[ri].moli.a=-1; 00356 } 00357 /*Get the total atoms in the molecule while we're here. */ 00358 next_mol_line=getResInfo((*curMol).r,i); 00359 natoms=0; 00360 for(ri = 0; ri < (*curMol).nr; ri++){ 00361 (*curMol).r[ri].a = (atom*) calloc ((*curMol).r[ri].na,sizeof(atom)); 00362 natoms+=(*curMol).r[ri].na; 00363 //printf("na is %d for residue %d(ka=%d)\n",(*curMol).r[ri].na,ri,ka); 00364 } 00365 /* Allocate the top-level atoms and residues for this molecule */ 00366 (*asmbl).nr+=(*curMol).nr; 00367 (*asmbl).r = (residue**) realloc ((*asmbl).r, (*asmbl).nr*sizeof(residue*)); 00368 (*asmbl).na+=natoms; 00369 (*asmbl).a = (atom**) realloc ((*asmbl).a, (*asmbl).na*sizeof(atom*)); 00370 /* set the top-level residues */ 00371 for(ri = 0; ri < (*curMol).nr; ri++) { (*asmbl).r[ka+ri] = &curMol[0].r[ri]; } 00372 in_molecule_switch = 'y';/* At this point, we should be at an ATOM/HETATM entry. */ 00373 curRes = curMol[0].r; 00374 curAtm = (*curRes).a; 00375 ka+=curMol[0].nr; 00376 } 00377 if(i==INWC){ /* If we ran out of lines... */ 00378 if((*curMol).nr==-1){ /* If this is molecule does not currently contain residues */ 00379 (*curMol).nr = 0; 00380 (*curMol).r = NULL; 00381 (*curMol).N = strdup("EMPTY_MOLECULE"); 00382 if(j!=molNum){printf("Found %d molecules, but ran out of atoms before molecule %d.\n",molNum,j+1); 00383 printf("This is probably a very, very bad thing, but we're ignoring it for now.\n");} 00384 } 00385 else if ((*curMol).nr>0){ /* if this molecule already contains residues */ 00386 printf("Got to i=INWC while in a molecule. Go fix code.\n"); } 00387 else { printf("Got to i=INWC for an uncoded value of nr. Go fix code.\n");} 00388 } 00389 while( (i<INWC) && (in_molecule_switch=='y') ) 00390 { /* while we are in a molecule */ 00391 /*printf("line %d: >>%s<<\n",i,(*(ln+i)).f[0].c);*/ 00392 if(endOfMol((ln+i)) == 1){ 00393 in_molecule_switch='n'; 00394 if(next_mol_line!=i) {printf("i is %d and next_mol_line is %d ... should match.\n",i,next_mol_line);} 00395 break; 00396 } 00397 else { 00398 if(isAtom((ln+i)) == 1) { 00399 //printf("\nTOP: k = %d ; ka = %d ; j = %d ; i = %d\n",k,ka,j,i); 00400 /* Figure out which residue the atom goes in */ 00401 is_same_residue='y'; 00402 temp = ln[i].f[8].c; sscanf(temp,"%d",&resNum); 00403 temp = ln[i].f[5].c; sscanf(temp,"%s",resName); 00404 IC = ln[i].f[9].c[0]; 00405 cID = ln[i].f[7].c[0]; 00406 /* 00407 printf("0. resnum is %d ; resname is >>%s<< cID is >>%c<< IC is >>%c<< \n",resNum,resName,cID,IC); 00408 printf(" *curres.n is %d ; (*curRes).N is >>%s<< (*curRes).cID is >>%s<< (*curRes).IC is >>%s<< \n",(*curRes).n,(*curRes).N,(*curRes).cID,(*curRes).IC); 00409 */ 00410 if((*curRes).n!=resNum){ is_same_residue='n';} 00411 if(strcmp((*curRes).N,resName)!=0){ is_same_residue='n';} 00412 if((*curRes).IC[0]!=IC){ is_same_residue='n';} 00413 if((*curRes).cID[0]!=cID){ is_same_residue='n';} 00414 if(is_same_residue=='n'){ /* try the rest of the residues */ 00415 Kref=k+1; 00416 for(k=Kref;k<(*curMol).nr;k++) { 00417 is_same_residue='y'; 00418 temp = ln[i].f[8].c; sscanf(temp,"%d",&resNum); 00419 temp = ln[i].f[5].c; sscanf(temp,"%s",resName); 00420 IC = ln[i].f[9].c[0]; 00421 cID = ln[i].f[7].c[0]; 00422 curRes = &curMol[0].r[k]; 00423 if(curRes[0].na==1) break; 00424 /* 00425 printf("2. resnum is %d ; resname is >>%s<< cID is >>%c<< IC is >>%c<< \n",resNum,resName,cID,IC); 00426 printf(" *curres.n is %d ; (*curRes).N is >>%s<< (*curRes).cID is >>%s<< (*curRes).IC is >>%s<< \n",(*curRes).n,(*curRes).N,(*curRes).cID,(*curRes).IC); 00427 */ 00428 if((*curRes).n!=resNum){ is_same_residue='n';} 00429 if(strcmp((*curRes).N,resName)!=0){ is_same_residue='n';} 00430 if((*curRes).IC[0]!=IC){ is_same_residue='n';} 00431 if((*curRes).cID[0]!=cID){ is_same_residue='n';} 00432 /* 00433 printf("\t2. Is same residue (N=%s n=%d IC=%c cID=%c) = %c\n",(*curRes).N,(*curRes).n,(*curRes).IC[0],(*curRes).cID[0],is_same_residue); 00434 */ 00435 if(is_same_residue=='y') { break; } 00436 }} 00437 if(is_same_residue=='n'){ /* try the first residues */ 00438 for(k=0;k<Kref;k++) { 00439 is_same_residue='y'; 00440 temp = ln[i].f[8].c; sscanf(temp,"%d",&resNum); 00441 temp = ln[i].f[5].c; sscanf(temp,"%s",resName); 00442 IC = ln[i].f[9].c[0]; 00443 cID = ln[i].f[7].c[0]; 00444 curRes = &(*curMol).r[k]; 00445 if((*curRes).n!=resNum){ is_same_residue='n';} 00446 if(strcmp((*curRes).N,resName)!=0){ is_same_residue='n';} 00447 if((*curRes).IC[0]!=IC){ is_same_residue='n';} 00448 if((*curRes).cID[0]!=cID){ is_same_residue='n';} 00449 /* 00450 printf("\t3. Is same residue (N=%s n=%d IC=%c cID=%c) = %c\n",(*curRes).N,(*curRes).n,(*curRes).IC[0],(*curRes).cID[0],is_same_residue); 00451 */ 00452 if(is_same_residue=='y') { break; } 00453 }} 00454 if(is_same_residue=='n'){mywhine("Can't find residue in load_pdb's getAssembly.");} 00455 //printf("ni is %d and na is %d \n",(*curRes).ni,(*curRes).na); 00456 if((*curRes).ni==(*curRes).na){mywhine("(*curRes).ni==(*curRes).na in getAssembly\n");} 00457 curAtm = &(*curRes).a[(*curRes).ni]; 00458 asmbl[0].a[la] = &(*curRes).a[(*curRes).ni]; 00459 //Getting the atom # // 00460 sscanf( (*(ln+i)).f[1].c ,"%d",&atom_num); 00461 //Getting the atom name //temp = (*(ln+i)).f[3].c; 00462 if(((*(ln+i)).f[3].c==NULL)||((*(ln+i)).f[3].c[0]=='\0')){strcpy(atmName," ");} 00463 else{sscanf( (*(ln+i)).f[3].c ,"%s",atmName);} 00464 if(((*(ln+i)).f[17].c==NULL)||((*(ln+i)).f[17].c[0]=='\0')){strcpy(atmElem," ");} 00465 else{sscanf( (*(ln+i)).f[17].c ,"%s",atmElem); } 00466 /* 00467 printf("\t atmName is >>>%s<<< atmElem is >>>%s<<<\n",atmName,atmElem); 00468 */ 00469 //Getting the X, Y and Z coordinates 00470 sscanf( (*(ln+i)).f[11].c ,"%lf",&x); 00471 sscanf( (*(ln+i)).f[12].c ,"%lf",&y); 00472 sscanf( (*(ln+i)).f[13].c ,"%lf",&z); 00473 (*curAtm).n = atom_num;//set the atom # 00474 (*curAtm).N = strdup(atmName);//set the atom name 00475 (*curAtm).E = strdup(atmElem);//set the atom element 00476 // Record the chain identifier at the atom level 00477 if((*(ln+i)).f[7].c!=NULL){ 00478 (*curAtm).cID=(char*)calloc(2,sizeof(char)); 00479 (*curAtm).cID[0] = (*(ln+i)).f[7].c[0]; 00480 (*curAtm).cID[1] = '\0'; 00481 } 00482 else{strcpy((*curAtm).cID," ");} 00483 /* 00484 printf("\t the (*curAtm).cID is >>>%s<<< E is >>>%s<<< the atom serial is %d\n",(*curAtm).cID,(*curAtm).E,(*curAtm).n); 00485 */ 00486 //set the atom's coordinates 00487 (*curAtm).x.i = x; (*curAtm).x.j = y; (*curAtm).x.k = z; 00488 (*curAtm).moli.m=j; (*curAtm).moli.r=k; (*curAtm).moli.a=(*curRes).ni; 00489 (*curRes).ni++; 00490 if(((*curRes).na==1)&&(k<(*curMol).nr)) { curAtm = (*curRes).a; } 00491 if((*curRes).ni==(*curRes).na) (*curRes).ni=0; 00492 la++; 00493 } 00494 } 00495 i++; 00496 } /* close while we are in a molecule */ 00497 j++; 00498 } 00499 /*printf("ka=%d ; (*asmbl).nr=%d ; la=%d ; (*asmbl).na=%d\n",ka,(*asmbl).nr,la,(*asmbl).na);*/ 00500 if((*asmbl).na != la){mywhine("(*asmbl).na != la in load_pdb's getAssembly");} 00501 if((*asmbl).nr != ka){mywhine("(*asmbl).nr != ka in load_pdb's getAssembly");} 00502 return asmbl; 00503 } 00504 00505 int findTotalResidue(int start) 00506 { 00507 int count=0;int i = start;int thisRes;//int curRes = 0; 00508 int inList,j; 00509 char *temp, icode=' ',chainid=' '; 00510 int *known = (int*)calloc(1,sizeof(int)); 00511 char *knownI = (char*)calloc(1,sizeof(char)); 00512 char *knownID = (char*)calloc(1,sizeof(char)); 00513 00514 //printf(" i is %d, and INWC is %d and ln[i].a-b are %d-%d\n",i,INWC,ln[i].a,ln[i].b); 00515 //printf("isAtomln+i is %d\n",isAtom(ln+i)); 00516 //printf("endof Mol is %d\n",endOfMol(ln+i)); 00517 while( (i!=INWC) && (isAtom((ln+i))==0) ) { i++; } 00518 00519 while( (i != INWC) && (endOfMol((ln+i)) == 0) ) 00520 { 00521 if(isAtom((ln+i)) == 1) 00522 { 00523 temp = (*(ln+i)).f[8].c; 00524 icode = ln[i].f[9].c[0]; // we know this is only one character 00525 chainid = ln[i].f[7].c[0]; // we know this is only one character 00526 sscanf(temp,"%d",&thisRes); 00527 inList = 0; 00528 //The New Way 00529 for(j = 0; j < count; j++){ 00530 //printf("thisRes=%d icode=%c chainid=%c\n",thisRes,icode,chainid); 00531 //printf("known=%d knownI=%c knownID=%c j=%d\n",known[j],knownI[j],knownID[j],j); 00532 if((known[j]==thisRes)&&(knownI[j]==icode)&&(knownID[j]==chainid)){inList=1; break;} 00533 //printf("still here\n"); 00534 } 00535 if(!inList){ 00536 count++; 00537 known = (int*)realloc(known,count*sizeof(int)); 00538 known[count-1] = thisRes; 00539 knownI = (char*)realloc(knownI,count*sizeof(char)); 00540 knownI[count-1] = icode; 00541 knownID = (char*)realloc(knownID,count*sizeof(char)); 00542 knownID[count-1] = chainid; 00543 //printf("known=%d knownI=%c knownID=%c count-1=%d\n",known[count-1],knownI[count-1],knownID[count-1],count-1); 00544 } 00545 } 00546 i++; 00547 } 00548 free(known); 00549 free(knownI); 00550 free(knownID); 00551 //printf("count is %d\n",count); 00552 return count; 00553 } 00554 00555 int endOfMol(linedef* line) 00556 { 00557 /* Originally, this checked for these cards: 00558 CONECT, LINK, TER. 00559 Adding ENDMDL, MASTER and END. 20100727, BLFoley */ 00560 switch((*line).a){ 00561 /* if(((*line).a == 4 && (*line).b == 2)|| 00562 ((*line).a == 2 && ((*line).b == 9||(*line).b == 3))) */ 00563 case 0: 00564 switch ((*line).b) { 00565 case 1: /* END */ 00566 case 3: /* MASTER */ 00567 return 1; 00568 default: break; 00569 } 00570 case 2: 00571 switch ((*line).b) { 00572 case 3: /* CONECT */ 00573 case 9: /* LINK */ 00574 return 1; 00575 default: break; 00576 } 00577 case 4: 00578 switch ((*line).b) { 00579 case 0: /* ENDMDL */ 00580 case 2: /* TER */ 00581 return 1; 00582 default: break; 00583 } 00584 default: return 0; 00585 } 00586 } 00587 00588 int isAtom(linedef* line) 00589 { 00590 if(((*line).a == 2 || (*line).a == 3) && (*line).b == 1) 00591 return 1; 00592 else 00593 return 0; 00594 } 00595 int getResInfo(residue* res, int start) 00596 { 00597 int resNum,j;int i = start; 00598 int count = 0, current_status=0; 00599 residue* curRes = (res+0); 00600 char name [10];char* temp;char* resName = name; 00601 char IC=' ', cID=' ',is_same='y'; 00602 00603 //printf("\n**1. i is %d; atom name is %s; atom number is %s\n",i,(*(ln+i)).f[3].c,(*(ln+i)).f[1].c); 00604 00605 while( (current_status == 0) && (i<INWC) ){ 00606 current_status = isAtom(ln+i); /* find out if we have an atom line */ 00607 i++; } /* if not, check the next line */ 00608 //printf("i is %d and INWC is %d \n",i,INWC); 00609 //printf("current ststus is %d\n",current_status); 00610 if(i==INWC){ /* if we got to the end with no atoms */ 00611 //if current status is that we have an atom and i==1 then break 00612 if(current_status==1 && i==1){ 00613 00614 } 00615 else{ 00616 (*curRes).N = strdup("EMPTY_RESIDUE"); 00617 (*curRes).n = -100; 00618 (*curRes).na = 0; 00619 return INWC; 00620 } 00621 } 00622 i--; /* still here? decrement the counter to undo the while loop above */ 00623 00624 while( (i != INWC) && (endOfMol((ln+i)) == 0) ){ 00625 if(isAtom(ln+i) == 1){ 00626 temp = ln[i].f[8].c; sscanf(temp,"%d",&resNum); 00627 temp = ln[i].f[5].c; sscanf(temp,"%s",resName); 00628 IC = ln[i].f[9].c[0]; 00629 cID = ln[i].f[7].c[0]; 00630 is_same='y'; 00631 if(resNum != (*curRes).n) is_same='n'; 00632 if((curRes[0].IC==NULL)||(IC!=curRes[0].IC[0])) is_same='n'; 00633 if((curRes[0].cID==NULL)||(cID!=curRes[0].cID[0])) is_same='n'; 00634 /*printf("resNum is %d and (*curRes).n is %d ; resName is >%s<<\n",resNum,(*curRes).n,resName);*/ 00635 /*printf("**2. i is %d; atom name is %s; atom number is %s\n",i,(*(ln+i)).f[3].c,(*(ln+i)).f[1].c);*/ 00636 //If this is a different residue than the one in the previous line 00637 /*if((resNum != (*curRes).n)||(curRes[0].IC==NULL)||(IC!=curRes[0].IC[0]))*/ 00638 if(is_same=='n') 00639 { 00640 /*printf("This residue is different from the previous. Checking for broken residue.\n");*/ 00641 curRes = NULL; 00642 for(j = 0; j < count; j++){//Cycle through all of the found residues 00643 if(((*(res+j)).n==resNum)&&((*(res+j)).IC[0]==IC)){ /* if already seen... */ 00644 curRes = (res+j); break;} /* reset residue pointer */ 00645 } 00646 /* If it is not a known residue, assign it a new slot */ 00647 if(curRes == NULL || curRes == 0x0){ curRes = (res+count); count++; } 00648 } 00649 if((*curRes).n < 0) { /* If this residue has not been found yet */ 00650 /*printf("This residue has not been found yet. Assigning values\n");*/ 00651 if(resName!=NULL){(*curRes).N = strdup(resName);}/* Set the residue name */ 00652 else{(*curRes).N = strdup(" ");} 00653 curRes[0].IC=(char*)calloc(2, sizeof(char)); 00654 (*curRes).IC[0] = IC; /* Set the IC */ 00655 (*curRes).IC[1] = '\0'; /* Set the IC */ 00656 curRes[0].cID=(char*)calloc(2, sizeof(char)); 00657 (*curRes).cID[0] = cID; /* Set the cID */ 00658 (*curRes).cID[1] = '\0'; /* Set the cID */ 00659 (*curRes).n = resNum; /* Set the actual residue number */ 00660 (*curRes).na = 0; /* ...and make sure the total # of atoms is 0 */ 00661 } /* Otherwise we can assume all of these have already been set */ 00662 00663 /*printf(" --> resNum is %d and (*curRes).n is %d, (*curRes).na is %d \n",resNum,(*curRes).n,(*curRes).na);*/ 00664 /*printf("**3. i is %d; atom name is %s; atom number is %s\n",i,(*(ln+i)).f[3].c,(*(ln+i)).f[1].c);*/ 00665 (*curRes).na++; 00666 /*printf(" --> (curRes). n=%d, na=%d, N=%s\n",(*curRes).n,(*curRes).na,(*curRes).N);*/ 00667 /*printf("**4. i is %d; atom name is %s; atom number is %s\n",i,(*(ln+i)).f[3].c,(*(ln+i)).f[1].c);*/ 00668 }//End if an atom 00669 i++; //..and then incriment to the next line 00670 } //End loop through file 00671 return i; 00672 } 00673 00674 molecule* getMolecule(void){ 00675 int molNum = 1; 00676 int i = 0;//int j = 0;int k = 0; 00677 int ntX,j,rI,next_mol_line; 00678 double dblY; 00679 char* temp;char name[6] = ""; //char* atmName = name; 00680 char icode=' ',chainid=' '; 00681 molecule* mol = (molecule*)calloc(molNum,sizeof(molecule)); 00682 residue* curRes;// residue* res; 00683 atom* curAtm;// atom* atm; 00684 //Get all the information for the first molecule 00685 (*mol).nr = findTotalResidue(i); 00686 //printf("the number of residues are %d\n",(*mol).nr); 00687 (*mol).r = (residue*)calloc((*mol).nr,sizeof(residue)); 00688 //Initialize the residue numbers to -1 so the program knows they're not used yet 00689 for(j = 0; j < (*mol).nr; j++){(*mol).r[j].n = -1;} 00690 next_mol_line=getResInfo((*mol).r,i); 00691 //printf("next_mol_line is %d\n",next_mol_line); 00692 int l[(*mol).nr]; 00693 for(i = 0; i < (*mol).nr; i++){ 00694 //printf("the number of atoms are %d\n",(*mol).r[i].na); 00695 (*mol).r[i].a = (atom*)calloc((*mol).r[i].na,sizeof(atom)); 00696 l[i] = 0; 00697 } 00698 //Set the current residue, and residue index to 0 00699 rI = 0; 00700 curRes = ((*mol).r+0); 00701 for(i = 0; i < INWC; i++) 00702 { 00703 if(isAtom((ln+i)) == 1) 00704 { 00705 //printf("in loop for isAtom\n"); 00706 temp = (*(ln+i)).f[8].c; sscanf(temp,"%d",&ntX); 00707 icode = (*(ln+i)).f[9].c[0]; 00708 chainid = (*(ln+i)).f[7].c[0]; 00709 if(((*curRes).n != ntX)||(icode!=curRes[0].IC[0])||(chainid!=curRes[0].cID[0]))//If the residue numbers do not match 00710 {//Seach through all of the resiudes until one is found that matches 00711 for(j = 0; j < (*mol).nr; j++){//then assign it to curRes 00712 if(mol[0].r[j].n == ntX){rI = j; break;} 00713 } 00714 curRes = ((*mol).r+rI); 00715 } 00716 curAtm = ((*curRes).a+l[rI]); 00717 /*printf("\t still in loop for isAtom\n"); */ 00718 //Getting the record name 00719 temp = (*(ln+i)).f[0].c; sscanf(temp,"%s",name); 00720 if(name!=NULL){(*curAtm).D = strdup(name);} 00721 else{(*curAtm).D = strdup("");} 00722 //Getting the atom # 00723 temp = (*(ln+i)).f[1].c; sscanf(temp,"%d",&ntX); 00724 (*curAtm).n = ntX; 00725 //Getting the atom name 00726 temp = (*(ln+i)).f[3].c; sscanf(temp,"%s",name); 00727 if(name!=NULL){(*curAtm).N = strdup(name);} 00728 else{(*curAtm).D = strdup(" ");} 00729 00730 //Getting the chain identifier 00731 if((*(ln+i)).f[7].c!=NULL){ 00732 (*curAtm).cID = (char*)calloc(2,sizeof(char)); 00733 (*curAtm).cID[0] = (*(ln+i)).f[7].c[0]; 00734 (*curAtm).cID[1] = '\0'; 00735 } 00736 else{(*curAtm).cID = strdup(" ");} 00737 /*printf("\t the (*curAtm).cID is >>>%s<<<\n",(*curAtm).cID); */ 00738 00739 //Getting the X coordinate 00740 temp = (*(ln+i)).f[11].c; sscanf(temp,"%lf",&dblY); (*curAtm).x.i = dblY; 00741 //Getting the Y coordinate 00742 temp = (*(ln+i)).f[12].c; sscanf(temp,"%lf",&dblY); (*curAtm).x.j = dblY; 00743 //Getting the Z coordinate 00744 temp = (*(ln+i)).f[13].c; sscanf(temp,"%lf",&dblY); (*curAtm).x.k = dblY; 00745 00746 l[rI]++; 00747 //printf("the current atom's xyz are: %f %f %f \n",(*curAtm).x.i,(*curAtm).x.j,(*curAtm).x.k); 00748 } 00749 } 00750 00751 //printf("the number of atoms in the molecule's first residue is %d\n",(*mol).r[0].na); 00752 00753 return mol; 00754 } 00755