GLYLIB  0.3.0b
read_prep.c
Go to the documentation of this file.
00001 /* File read_prep.c begun on 20071120 by BLFoley
00002 
00003 Purpose:  Read prep file contents into a molecule structure
00004 
00005 NOTE:   At present, this function only reads in the prep file.
00006         It will not set bonding, etc., in the main residue.
00007 
00008 NOTE:   Much information is read into the void pointer space.
00009 
00010 Since prep files are generated on a per-residue basis, each 
00011 entry will be considered part of the molecule structure.
00012 
00013 This function will rely on read_prepres, which will read in
00014 a single residue.  That function will rely on read_prepatom
00015 as well.  Both read_prepres and read_prepatom will return
00016 enture residues/atoms.
00017 
00018 In order to allow for the addition of many prep files to an
00019 expanding database of residues, this function will add residues
00020 to an existing molecule structure (rather than returning a
00021 new structure).  
00022 
00023 NOTE:   THE PARENT MOLECULE STRUCTURE MUST BE PREINITIALIZED
00024         THIS INCLUDES AN INITIAL ALLOCATION OF THE RESIDUE
00025         SUB STRUCTURES (M.nr should be set to zero if there are
00026         no existing residues in the molecule).
00027 */ 
00028 #include <mylib.h>
00029 #include <molecules.h>
00030 #include <general.h>
00031 #include <AMBER/read_prep.h>
00032 #include <declarations.h>
00033 //#include "../inc/mylib.h"
00034 //#include "../inc/molecules.h"
00035 //#include "../inc/general.h"
00036 //#include "../inc/read_prep.h"
00037 //#include "../inc/declarations.h"
00038 
00039 
00040 
00041 void read_prep(molecule *M, fileset F, types *TYP){
00042 int rpa=0,stopflag=1,rpuse=0,rpsz=0,rpbins=0;
00043 char line[501];
00044 ambermprepinfo *amp;
00045 fpos_t f_linestart;
00046 
00047 M[0].nVP=1;
00048 amp=(ambermprepinfo*)calloc(1,sizeof(ambermprepinfo));
00049 F.F=myfreopen(F.N,"r",F.F); // don't depend on calling function to have opened
00050 amp[0].FN=strdup(F.N);
00051 fgets(line,500,F.F);
00052 sscanf(line,"%d %d %d",&amp[0].IDBGEN,&amp[0].IREST,&amp[0].ITYPF);
00053 fgets(line,500,F.F);
00054 amp[0].NAMDBF=strdup(line);
00055 M[0].VP=amp;
00056 
00057 rpuse=malloc_usable_size(M[0].r);
00058 rpsz=sizeof(residue);
00059 rpbins=rpuse/rpsz;
00060 if(rpbins<1){mywhine("read_prep: no space in residue -- is molecule not initialized?");}
00061 
00062 //printf("about to read through the file. Current line is\n\t>>%s<<\n",line);
00063 fgetpos(F.F,&f_linestart);// get current position
00064 while(fgets(line,500,F.F)!=NULL){// while not end of file
00065 //printf("\treading a line ...  line is\n>>%s<<\n",line);
00066         stopflag=1;
00067         for(rpa=0;rpa<strlen(line);rpa++){// if the first word is not "STOP" (and only that??)
00068                 if((line[rpa]!='\t')&&(line[rpa]!=' ')){
00069 //printf("rpa is %d and line[rpa] is >>%c<< and strlen(line) is %d\n",rpa,line[rpa],strlen(line));
00070                         if(rpa+4>strlen(line)){break;}
00071                         else{ 
00072                                 if((line[rpa]=='S')&&(line[rpa+1]=='T')&&(line[rpa+2]=='O')&&(line[rpa+3]=='P')){ 
00073                                         stopflag=0;
00074                                         }
00075                                 break;
00076                                 }
00077                         }
00078                 }
00079 //printf("stopflag is %d\n",stopflag);
00080         if(stopflag==0){break;} // break loop
00081 //printf("about to allocate space for the residue...\n");
00082         M[0].nr++;// allocate new memory for a residue
00083         M[0].r=(residue*)realloc(M[0].r,M[0].nr*sizeof(residue));
00084         fsetpos(F.F,&f_linestart); // rewind to start of line
00085         M[0].r[M[0].nr-1]=read_prepres(F,TYP);// call read_prepres to read in the residue information
00086         fgetpos(F.F,&f_linestart);// get current position 
00087         } // end while not end of file
00088 return;
00089 }
00090 
00091 /******************** read_prepres ****************/
00092 // START HERE -- add in the void pointer read stuff
00093 /* this function reads in residue information from a prep file
00094         -- currently, it only reads in number, name, type and charge
00095         -- connectivity, etc., shall be defined later, though it will
00096                 read in the information */
00097 residue read_prepres(fileset F, types *TYP){ // will the position be what is expected?
00098 amberrprepinfo *arp;
00099 residue R;
00100 char line[501],desc[1001],ctmp[51],ctmp2[51]; // hope no prep file line is longer than this...
00101 char dumat[21]; // the symbol used for dummy atoms 
00102 char *linestat; // did the line read work?
00103 int itmp=0,rra=0,rline=0;
00104 char whinetext[1001];
00105 
00106 // initialize R
00107 R.n=0; // residue number given in input file
00108 R.N=strdup(""); // reside name -- can live with residue names of 200 chars or less???  :-)
00109 R.na=0; // number of atoms in residue
00110 R.m=0; // molecular weight
00111 R.COM=zero_coord(); // center of mass for residue
00112 R.a=(atom*)calloc(1,sizeof(atom)); // atom structures (na of these)
00113 R.nbs=0; // number of bond sets
00114 R.bs=(molbondset*)calloc(1,sizeof(molbondset)); // (consecutive bonds, use these for plotting, etc.)
00115 //int nr; // number of simple rings (no cage structures, etc.)
00116 //int nrbs; // number of ring bondsets defined
00117 //bondset *rbs; // bondsets for rings
00118 //int nrc; // number of ring/reference coordinate sets defined
00119 //coord_3D *rc; // coordinates for ring/reference centers
00120 //int nrp; // number of ring planes defined
00121 //plane *rp; // equations for average/approximate/exact/etc. ring planes (where useful)
00122 R.ni=1; // number of other indices
00123 R.i=(int*)calloc(R.ni,sizeof(int)); // other indices, as needed (ni of these)
00124 R.nd=1; // number of double-precision parameters
00125 R.d=(double*)calloc(R.nd,sizeof(double)); // other parameters, as needed (nd of these)
00126 
00127 // allocate space for prepinfo
00128 arp=(amberrprepinfo*)calloc(1,sizeof(amberrprepinfo));
00129 arp[0].IOPR=(llcharset*)calloc(1,sizeof(llcharset));
00130 
00131 // Read residue header information into residue structure
00132 /* A sample header is:
00133 0-[ALPHA-D-ALLO-] terminal, RESP 0.010 HF/6-31Gstar//HF/6-31Gstar
00134 0na.dat
00135 0NA   INT 0
00136 CORRECT OMIT DU BEG
00137  0.1940
00138 
00139 By line, they are:
00140 1-2] R.D
00141 3] R.N, error if not INT (for now), error if not zero (for now)
00142 4] error if not "CORRECT", ignore, read in dummy atom name, ignore
00143 5] R.d[0]
00144 */
00145 // first two lines (description)
00146 linestat=fgets(desc,500,F.F); // first line
00147 //printf("line 1 looks like: >>%s<<\n",desc);
00148 if(linestat==NULL){read_eek("EOF during residue header read, line 1",F.N);}
00149 arp[0].TITLE=strdup(desc); // add description to arp (destined for void pointer)
00150 R.D=strdup(desc); // copy to residue structure
00151 linestat=fgets(line,500,F.F); // second line
00152 //printf("line 2 looks like: >>%s<<\n",line);
00153 if(linestat==NULL){read_eek("EOF during residue header read, line 2",F.N);}
00154 arp[0].NAMF=strdup(line);
00155 // next line (name and other)
00156 linestat=fgets(line,500,F.F); // third line
00157 if(linestat==NULL){read_eek("EOF during residue header read, line 3",F.N);}
00158 sscanf(line,"%s %s %d",desc,ctmp,&itmp);
00159 //printf("line 3 looks like: >>%s<<\n",line);
00160 R.N=strdup(desc); // set the residue name
00161 arp[0].NAMRES=strdup(desc);
00162 if(strcmp(ctmp,"INT")!=0){read_eek("entry not INT on line 3 of residue header",F.N);}
00163 if(itmp!=0){read_eek("KFORM not zero on line 3 of residue header",F.N);}
00164 arp[0].INTX=strdup(ctmp);
00165 arp[0].KFORM=itmp;
00166 // fourth line (dummy atom symbol & check)
00167 linestat=fgets(line,500,F.F); // fourth line (dummy atom symbol & check)
00168 if(linestat==NULL)read_eek("EOF during residue header read, line 4",F.N);
00169 sscanf(line,"%s %s %s %s",desc,ctmp,dumat,ctmp2);
00170 if(strcmp(desc,"CORRECT")!=0){read_eek("entry not CORRECT on line 4 of residue header",F.N); }
00171 arp[0].IFIXC=strdup(desc);
00172 arp[0].IOMIT=strdup(ctmp);
00173 arp[0].ISYMDU=strdup(dumat);
00174 arp[0].IPOS=strdup(ctmp2);
00175 // fifth line (charge on residue)
00176 linestat=fgets(line,500,F.F); // fourth line (dummy atom symbol & check)
00177 if(linestat==NULL)read_eek("EOF during residue header read, line 5 (residue charge)",F.N);
00178 sscanf(line,"%lf",&arp[0].CUT);
00179 // scan first three dummy atom lines, make sure that the third field
00180 //      of each is dumat and ignore otherwise (for now, at least)
00181 for(rra=0;rra<3;rra++){
00182         linestat=fgets(line,500,F.F); // fourth line (dummy atom symbol & check)
00183         if(linestat==NULL){
00184                 sprintf(whinetext,"EOF during residue header read, dummy atom line %d",(rra+1));
00185                 read_eek(whinetext,F.N); }
00186         sscanf(line,"%s %s %s ",desc,ctmp,ctmp2);
00187         if(strcmp(ctmp2,dumat)!=0){
00188 //printf("\t dumat is >>%s<<, and line looks like: >>%s<<\n",dumat,line);
00189                 sprintf(whinetext,"dummy atom symbol, line %d, does not match declared value",(rra+1));
00190                 read_eek(whinetext,F.N); }
00191         }
00192 
00193 while(fgets(line,500,F.F)!=NULL){ // while not end of file
00194         rline=1;
00195 //printf("line is: %s\n",line);
00196         for(rra=0;rra<strlen(line);rra++){ // check for an empty line
00197                 if((line[rra]!='\t')&&(line[rra]!=' ')&&(line[rra]!='\n')){ // if this character is NOT a space or tab
00198                         rline=0;
00199                         break; } }
00200         if(rline==1) break;// if an empty line, break loop
00201 //printf("calling read atom for line: %s\n",line);
00202         R.na++;//       allocate space
00203         R.a=(atom*)realloc(R.a,R.na*sizeof(atom));
00204         R.a[R.na-1]=read_prepatom(line,TYP);//read_prepatom
00205         // update molecular weight for residue
00206         R.m+=TYP[0].a[R.a[R.na-1].t].m;
00207         } // end read of the residue's atoms
00208 
00209 // while not "DONE", read any after-atoms residue information
00210 while(fgets(line,500,F.F)!=NULL){ // while not end of file
00211         rline=1;
00212         for(rra=0;rra<strlen(line);rra++){ // check for an empty line
00213                 if((line[rra]!='\t')&&(line[rra]!=' ')){ // if this character is NOT a space or tab
00214                         rline=0;
00215                         break; } }
00216         if(rline==1) continue;// if an empty line, check next line 
00217         if((line[0]=='D')&&(line[1]=='O')&&(line[2]=='N')&&(line[3]=='E')){break;}
00218         arp[0].nIOPR++;
00219         arp[0].IOPR=(llcharset*)realloc(arp[0].IOPR,arp[0].nIOPR*sizeof(llcharset));
00220         arp[0].IOPR[arp[0].nIOPR-1].T=strdup(line);
00221         } // end scan of post-atom residue info
00222 
00223 //printf("Residue %s contains %d atoms\n",R.N,R.na);
00224 // for future:
00225 //   scan atom info to set bonding
00226 R.nVP=1;
00227 R.VP=arp;
00228 
00229 return R;       
00230 }
00231 
00232 
00233 /******************* read_prepatom *******************/
00234 
00235 atom read_prepatom(const char *line, types *TYP){
00236 amberaprepinfo *aap;
00237 atom A;
00238 char N[21],T[21],C[21]; // if any entry is longer than 21 chars, something is wrong...
00239 char whinetext[201]; // error message text
00240 int raa=0;
00241 
00242 // Most of this will go into an amberaprepinfo structure.
00243 // Some bits also go into an atom structure
00244 // Here is a typical prep-file atom line:
00245 //      >> 4 C1   CG  M  3  2  1  1.400   113.9      60.0     0.4780<<
00246 //         1  2   3   4  5  6  7    8      9          10       11
00247 //      The atom structure bits:
00248 //      1: A.n
00249 //      2: A.N=strdup(2)
00250 //      3: A.T=strdup(3)  --and-- A.t=number for type 3
00251 
00252 aap=(amberaprepinfo*)calloc(1,sizeof(amberaprepinfo));
00253 
00254 sscanf(line,"%d %s %s %s %d %d %d %lf %lf %lf %lf",\
00255         &aap[0].I,N,T,C,&aap[0].NA,&aap[0].NB,&aap[0].NC,&aap[0].R,&aap[0].THETA,&aap[0].PHI,&aap[0].CHG);
00256 A.N=strdup(N);
00257 aap[0].IGRAPH=strdup(N);
00258 A.T=strdup(T);
00259 aap[0].ISYMBL=strdup(T);
00260 aap[0].ITREE=strdup(C);
00261 A.n=aap[0].I-3; 
00262 // set the atom's type according to the data in TYP
00263 A.t=-1;
00264 for(raa=0;raa<TYP[0].na;raa++){
00265 //printf("TYP[0].a[raa].N is >>%s<< ; A.T is >>%s<<\n",TYP[0].a[raa].N,A.T);
00266         if(strcmp(TYP[0].a[raa].N,A.T)==0){ 
00267                 A.t=raa; 
00268                 break;} }
00269 if(A.t==-1){
00270         sprintf(whinetext,"read_prepatom:\n\tMatch not found for atom type %s.  The prep file line follows:\n\n%s\n\n",A.T,line);
00271         mywhine(whinetext);
00272         }
00273 //printf("The entire line is:\n");
00274 /*printf("%d %s %s %s %d %d %d %f %f %f %f\n",aap[0].I,aap[0].IGRAPH,aap[0].ISYMBL,\
00275         aap[0].ITREE,aap[0].NA,aap[0].NB,aap[0].NC,aap[0].R,aap[0].THETA,aap[0].PHI,aap[0].CHG);*/
00276 //
00277 A.nVP=1;
00278 A.VP=aap;
00279 
00280 return A;
00281 }
00282 
 All Classes Files Functions Variables Typedefs Defines