GLYLIB
0.3.0b
|
00001 /* File read_prep.c begun on 20071120 by BLFoley 00002 00003 Purpose: Read prep file contents into a molecule structure 00004 00005 NOTE: At present, this function only reads in the prep file. 00006 It will not set bonding, etc., in the main residue. 00007 00008 NOTE: Much information is read into the void pointer space. 00009 00010 Since prep files are generated on a per-residue basis, each 00011 entry will be considered part of the molecule structure. 00012 00013 This function will rely on read_prepres, which will read in 00014 a single residue. That function will rely on read_prepatom 00015 as well. Both read_prepres and read_prepatom will return 00016 enture residues/atoms. 00017 00018 In order to allow for the addition of many prep files to an 00019 expanding database of residues, this function will add residues 00020 to an existing molecule structure (rather than returning a 00021 new structure). 00022 00023 NOTE: THE PARENT MOLECULE STRUCTURE MUST BE PREINITIALIZED 00024 THIS INCLUDES AN INITIAL ALLOCATION OF THE RESIDUE 00025 SUB STRUCTURES (M.nr should be set to zero if there are 00026 no existing residues in the molecule). 00027 */ 00028 #include <mylib.h> 00029 #include <molecules.h> 00030 #include <general.h> 00031 #include <AMBER/read_prep.h> 00032 #include <declarations.h> 00033 //#include "../inc/mylib.h" 00034 //#include "../inc/molecules.h" 00035 //#include "../inc/general.h" 00036 //#include "../inc/read_prep.h" 00037 //#include "../inc/declarations.h" 00038 00039 00040 00041 void read_prep(molecule *M, fileset F, types *TYP){ 00042 int rpa=0,stopflag=1,rpuse=0,rpsz=0,rpbins=0; 00043 char line[501]; 00044 ambermprepinfo *amp; 00045 fpos_t f_linestart; 00046 00047 M[0].nVP=1; 00048 amp=(ambermprepinfo*)calloc(1,sizeof(ambermprepinfo)); 00049 F.F=myfreopen(F.N,"r",F.F); // don't depend on calling function to have opened 00050 amp[0].FN=strdup(F.N); 00051 fgets(line,500,F.F); 00052 sscanf(line,"%d %d %d",&[0].IDBGEN,&[0].IREST,&[0].ITYPF); 00053 fgets(line,500,F.F); 00054 amp[0].NAMDBF=strdup(line); 00055 M[0].VP=amp; 00056 00057 rpuse=malloc_usable_size(M[0].r); 00058 rpsz=sizeof(residue); 00059 rpbins=rpuse/rpsz; 00060 if(rpbins<1){mywhine("read_prep: no space in residue -- is molecule not initialized?");} 00061 00062 //printf("about to read through the file. Current line is\n\t>>%s<<\n",line); 00063 fgetpos(F.F,&f_linestart);// get current position 00064 while(fgets(line,500,F.F)!=NULL){// while not end of file 00065 //printf("\treading a line ... line is\n>>%s<<\n",line); 00066 stopflag=1; 00067 for(rpa=0;rpa<strlen(line);rpa++){// if the first word is not "STOP" (and only that??) 00068 if((line[rpa]!='\t')&&(line[rpa]!=' ')){ 00069 //printf("rpa is %d and line[rpa] is >>%c<< and strlen(line) is %d\n",rpa,line[rpa],strlen(line)); 00070 if(rpa+4>strlen(line)){break;} 00071 else{ 00072 if((line[rpa]=='S')&&(line[rpa+1]=='T')&&(line[rpa+2]=='O')&&(line[rpa+3]=='P')){ 00073 stopflag=0; 00074 } 00075 break; 00076 } 00077 } 00078 } 00079 //printf("stopflag is %d\n",stopflag); 00080 if(stopflag==0){break;} // break loop 00081 //printf("about to allocate space for the residue...\n"); 00082 M[0].nr++;// allocate new memory for a residue 00083 M[0].r=(residue*)realloc(M[0].r,M[0].nr*sizeof(residue)); 00084 fsetpos(F.F,&f_linestart); // rewind to start of line 00085 M[0].r[M[0].nr-1]=read_prepres(F,TYP);// call read_prepres to read in the residue information 00086 fgetpos(F.F,&f_linestart);// get current position 00087 } // end while not end of file 00088 return; 00089 } 00090 00091 /******************** read_prepres ****************/ 00092 // START HERE -- add in the void pointer read stuff 00093 /* this function reads in residue information from a prep file 00094 -- currently, it only reads in number, name, type and charge 00095 -- connectivity, etc., shall be defined later, though it will 00096 read in the information */ 00097 residue read_prepres(fileset F, types *TYP){ // will the position be what is expected? 00098 amberrprepinfo *arp; 00099 residue R; 00100 char line[501],desc[1001],ctmp[51],ctmp2[51]; // hope no prep file line is longer than this... 00101 char dumat[21]; // the symbol used for dummy atoms 00102 char *linestat; // did the line read work? 00103 int itmp=0,rra=0,rline=0; 00104 char whinetext[1001]; 00105 00106 // initialize R 00107 R.n=0; // residue number given in input file 00108 R.N=strdup(""); // reside name -- can live with residue names of 200 chars or less??? :-) 00109 R.na=0; // number of atoms in residue 00110 R.m=0; // molecular weight 00111 R.COM=zero_coord(); // center of mass for residue 00112 R.a=(atom*)calloc(1,sizeof(atom)); // atom structures (na of these) 00113 R.nbs=0; // number of bond sets 00114 R.bs=(molbondset*)calloc(1,sizeof(molbondset)); // (consecutive bonds, use these for plotting, etc.) 00115 //int nr; // number of simple rings (no cage structures, etc.) 00116 //int nrbs; // number of ring bondsets defined 00117 //bondset *rbs; // bondsets for rings 00118 //int nrc; // number of ring/reference coordinate sets defined 00119 //coord_3D *rc; // coordinates for ring/reference centers 00120 //int nrp; // number of ring planes defined 00121 //plane *rp; // equations for average/approximate/exact/etc. ring planes (where useful) 00122 R.ni=1; // number of other indices 00123 R.i=(int*)calloc(R.ni,sizeof(int)); // other indices, as needed (ni of these) 00124 R.nd=1; // number of double-precision parameters 00125 R.d=(double*)calloc(R.nd,sizeof(double)); // other parameters, as needed (nd of these) 00126 00127 // allocate space for prepinfo 00128 arp=(amberrprepinfo*)calloc(1,sizeof(amberrprepinfo)); 00129 arp[0].IOPR=(llcharset*)calloc(1,sizeof(llcharset)); 00130 00131 // Read residue header information into residue structure 00132 /* A sample header is: 00133 0-[ALPHA-D-ALLO-] terminal, RESP 0.010 HF/6-31Gstar//HF/6-31Gstar 00134 0na.dat 00135 0NA INT 0 00136 CORRECT OMIT DU BEG 00137 0.1940 00138 00139 By line, they are: 00140 1-2] R.D 00141 3] R.N, error if not INT (for now), error if not zero (for now) 00142 4] error if not "CORRECT", ignore, read in dummy atom name, ignore 00143 5] R.d[0] 00144 */ 00145 // first two lines (description) 00146 linestat=fgets(desc,500,F.F); // first line 00147 //printf("line 1 looks like: >>%s<<\n",desc); 00148 if(linestat==NULL){read_eek("EOF during residue header read, line 1",F.N);} 00149 arp[0].TITLE=strdup(desc); // add description to arp (destined for void pointer) 00150 R.D=strdup(desc); // copy to residue structure 00151 linestat=fgets(line,500,F.F); // second line 00152 //printf("line 2 looks like: >>%s<<\n",line); 00153 if(linestat==NULL){read_eek("EOF during residue header read, line 2",F.N);} 00154 arp[0].NAMF=strdup(line); 00155 // next line (name and other) 00156 linestat=fgets(line,500,F.F); // third line 00157 if(linestat==NULL){read_eek("EOF during residue header read, line 3",F.N);} 00158 sscanf(line,"%s %s %d",desc,ctmp,&itmp); 00159 //printf("line 3 looks like: >>%s<<\n",line); 00160 R.N=strdup(desc); // set the residue name 00161 arp[0].NAMRES=strdup(desc); 00162 if(strcmp(ctmp,"INT")!=0){read_eek("entry not INT on line 3 of residue header",F.N);} 00163 if(itmp!=0){read_eek("KFORM not zero on line 3 of residue header",F.N);} 00164 arp[0].INTX=strdup(ctmp); 00165 arp[0].KFORM=itmp; 00166 // fourth line (dummy atom symbol & check) 00167 linestat=fgets(line,500,F.F); // fourth line (dummy atom symbol & check) 00168 if(linestat==NULL)read_eek("EOF during residue header read, line 4",F.N); 00169 sscanf(line,"%s %s %s %s",desc,ctmp,dumat,ctmp2); 00170 if(strcmp(desc,"CORRECT")!=0){read_eek("entry not CORRECT on line 4 of residue header",F.N); } 00171 arp[0].IFIXC=strdup(desc); 00172 arp[0].IOMIT=strdup(ctmp); 00173 arp[0].ISYMDU=strdup(dumat); 00174 arp[0].IPOS=strdup(ctmp2); 00175 // fifth line (charge on residue) 00176 linestat=fgets(line,500,F.F); // fourth line (dummy atom symbol & check) 00177 if(linestat==NULL)read_eek("EOF during residue header read, line 5 (residue charge)",F.N); 00178 sscanf(line,"%lf",&arp[0].CUT); 00179 // scan first three dummy atom lines, make sure that the third field 00180 // of each is dumat and ignore otherwise (for now, at least) 00181 for(rra=0;rra<3;rra++){ 00182 linestat=fgets(line,500,F.F); // fourth line (dummy atom symbol & check) 00183 if(linestat==NULL){ 00184 sprintf(whinetext,"EOF during residue header read, dummy atom line %d",(rra+1)); 00185 read_eek(whinetext,F.N); } 00186 sscanf(line,"%s %s %s ",desc,ctmp,ctmp2); 00187 if(strcmp(ctmp2,dumat)!=0){ 00188 //printf("\t dumat is >>%s<<, and line looks like: >>%s<<\n",dumat,line); 00189 sprintf(whinetext,"dummy atom symbol, line %d, does not match declared value",(rra+1)); 00190 read_eek(whinetext,F.N); } 00191 } 00192 00193 while(fgets(line,500,F.F)!=NULL){ // while not end of file 00194 rline=1; 00195 //printf("line is: %s\n",line); 00196 for(rra=0;rra<strlen(line);rra++){ // check for an empty line 00197 if((line[rra]!='\t')&&(line[rra]!=' ')&&(line[rra]!='\n')){ // if this character is NOT a space or tab 00198 rline=0; 00199 break; } } 00200 if(rline==1) break;// if an empty line, break loop 00201 //printf("calling read atom for line: %s\n",line); 00202 R.na++;// allocate space 00203 R.a=(atom*)realloc(R.a,R.na*sizeof(atom)); 00204 R.a[R.na-1]=read_prepatom(line,TYP);//read_prepatom 00205 // update molecular weight for residue 00206 R.m+=TYP[0].a[R.a[R.na-1].t].m; 00207 } // end read of the residue's atoms 00208 00209 // while not "DONE", read any after-atoms residue information 00210 while(fgets(line,500,F.F)!=NULL){ // while not end of file 00211 rline=1; 00212 for(rra=0;rra<strlen(line);rra++){ // check for an empty line 00213 if((line[rra]!='\t')&&(line[rra]!=' ')){ // if this character is NOT a space or tab 00214 rline=0; 00215 break; } } 00216 if(rline==1) continue;// if an empty line, check next line 00217 if((line[0]=='D')&&(line[1]=='O')&&(line[2]=='N')&&(line[3]=='E')){break;} 00218 arp[0].nIOPR++; 00219 arp[0].IOPR=(llcharset*)realloc(arp[0].IOPR,arp[0].nIOPR*sizeof(llcharset)); 00220 arp[0].IOPR[arp[0].nIOPR-1].T=strdup(line); 00221 } // end scan of post-atom residue info 00222 00223 //printf("Residue %s contains %d atoms\n",R.N,R.na); 00224 // for future: 00225 // scan atom info to set bonding 00226 R.nVP=1; 00227 R.VP=arp; 00228 00229 return R; 00230 } 00231 00232 00233 /******************* read_prepatom *******************/ 00234 00235 atom read_prepatom(const char *line, types *TYP){ 00236 amberaprepinfo *aap; 00237 atom A; 00238 char N[21],T[21],C[21]; // if any entry is longer than 21 chars, something is wrong... 00239 char whinetext[201]; // error message text 00240 int raa=0; 00241 00242 // Most of this will go into an amberaprepinfo structure. 00243 // Some bits also go into an atom structure 00244 // Here is a typical prep-file atom line: 00245 // >> 4 C1 CG M 3 2 1 1.400 113.9 60.0 0.4780<< 00246 // 1 2 3 4 5 6 7 8 9 10 11 00247 // The atom structure bits: 00248 // 1: A.n 00249 // 2: A.N=strdup(2) 00250 // 3: A.T=strdup(3) --and-- A.t=number for type 3 00251 00252 aap=(amberaprepinfo*)calloc(1,sizeof(amberaprepinfo)); 00253 00254 sscanf(line,"%d %s %s %s %d %d %d %lf %lf %lf %lf",\ 00255 &aap[0].I,N,T,C,&aap[0].NA,&aap[0].NB,&aap[0].NC,&aap[0].R,&aap[0].THETA,&aap[0].PHI,&aap[0].CHG); 00256 A.N=strdup(N); 00257 aap[0].IGRAPH=strdup(N); 00258 A.T=strdup(T); 00259 aap[0].ISYMBL=strdup(T); 00260 aap[0].ITREE=strdup(C); 00261 A.n=aap[0].I-3; 00262 // set the atom's type according to the data in TYP 00263 A.t=-1; 00264 for(raa=0;raa<TYP[0].na;raa++){ 00265 //printf("TYP[0].a[raa].N is >>%s<< ; A.T is >>%s<<\n",TYP[0].a[raa].N,A.T); 00266 if(strcmp(TYP[0].a[raa].N,A.T)==0){ 00267 A.t=raa; 00268 break;} } 00269 if(A.t==-1){ 00270 sprintf(whinetext,"read_prepatom:\n\tMatch not found for atom type %s. The prep file line follows:\n\n%s\n\n",A.T,line); 00271 mywhine(whinetext); 00272 } 00273 //printf("The entire line is:\n"); 00274 /*printf("%d %s %s %s %d %d %d %f %f %f %f\n",aap[0].I,aap[0].IGRAPH,aap[0].ISYMBL,\ 00275 aap[0].ITREE,aap[0].NA,aap[0].NB,aap[0].NC,aap[0].R,aap[0].THETA,aap[0].PHI,aap[0].CHG);*/ 00276 // 00277 A.nVP=1; 00278 A.VP=aap; 00279 00280 return A; 00281 } 00282