GLYLIB  0.3.0b
find_molecules.c
Go to the documentation of this file.
00001 /* File find_molecules.c begun on 20080625 by BLFoley 
00002         Purpose: Contain functions designed to determine which atoms and/or
00003         residues are parts of molecules and to separate them into molecules.
00004 
00005         For example, find_molecules_molbond_array will:
00006                 Given an array of molbond structures, use the bonding to 
00007                 determine which atoms (and residues) are parts of molecules.
00008                 The molecules found will also be numbered. */
00009 
00010 // change this before adding to library -- doesn't need whole prmtop header
00011 // this is here because the first function written was written for a read
00012 // of an amber prmtop file.
00013 #include "AMBER/amber_prmtop.h"
00014 
00015 typedef struct {
00016         int n; // number of
00017         int *i; // n of these 
00018 } tempintset;
00019 
00020 void follow_find_molecule_amber_prmtop_bond_target(molbond *MB, molindex *AT, tempintset *SI, tempintset *TI, int i, int mN);
00021 
00022 /***************  find_molecules_molbond_array() ******************/
00023 /* Finds molecules in a molbond array based on bonding patterns 
00024         Updates an array of NATOM molindices with molecule information.
00025         Also writes molecule info to the molbond array.  
00026         In both cases, it overwrites existing information. 
00027         NOTES:  
00028         1.  There should be no useful molecule or residue information
00029         in the molbond array (it will be igored).
00030         2.  The index, i, in the molindex structures should correspond to the
00031         absolute atom number as indexed in the molindex array.
00032         This way, any residue and atom info you have previously will
00033         be retained -- and it makes this function loads easier to write.
00034         Later, you can renumber your residues and atoms however you like,
00035         but any existing molecule information will be overwritten.
00036 */
00037 void find_molecules_molbond_array(int nMB, molbond *MB, int NATOM, molindex *AT){
00038 int a=0,b=0,currmol=0,localmol=0; // current and local molecule numbers
00039 tempintset *SI,*TI;
00040 
00041 // set all AT molecule indices to -1 (not seen)
00042 for(a=0;a<NATOM;a++){
00043 //printf("(find mols) AT[%d].a is %d ; .i is %d \n",a,AT[a].a,AT[a].i);
00044         AT[a].m=-1;
00045         }
00046 
00047 // index each source and target to all relevant indices in the AT array
00048 //      calloc space  (other way???)
00049 SI=(tempintset*)calloc(NATOM,sizeof(tempintset));
00050 TI=(tempintset*)calloc(NATOM,sizeof(tempintset));
00051 for(a=0;a<NATOM;a++){ // allocate space 
00052         SI[a].i=(int*)calloc(1,sizeof(int));
00053         TI[a].i=(int*)calloc(1,sizeof(int)); }
00054 // set backward indices for the atoms to the bonds
00055 //for(a=0;a<nMB;a++){
00056 //printf("MB[%d] : s = %d ; t = %d\n",a,MB[a].s.i,MB[a].t.i);}
00057 for(a=0;a<nMB;a++){ 
00058 //printf("SI[MB[a].s.i].n is %d TI[MB[a].t.i].n is %d \n",SI[MB[a].s.i].n,TI[MB[a].t.i].n);
00059         SI[MB[a].s.i].n++;
00060         TI[MB[a].t.i].n++;
00061         SI[MB[a].s.i].i=(int*)realloc(SI[MB[a].s.i].i,SI[MB[a].s.i].n*sizeof(int));
00062         TI[MB[a].t.i].i=(int*)realloc(TI[MB[a].t.i].i,TI[MB[a].t.i].n*sizeof(int));
00063         SI[MB[a].s.i].i[SI[MB[a].s.i].n-1]=a;
00064         TI[MB[a].t.i].i[TI[MB[a].t.i].n-1]=a;
00065 //printf("  MB[%d].s.i is %d ; SI[MB[a].s.i].n-1 is %d \t",a,MB[a].s.i,SI[MB[a].s.i].n-1);
00066 //printf("  MB[%d].t.i is %d ; TI[MB[a].t.i].n-1 is %d \n",a,MB[a].t.i,TI[MB[a].t.i].n-1);
00067         }
00068 for(a=0;a<NATOM;a++){
00069 //printf("SI[%d].n is %d\n",a,SI[a].n); 
00070 for(b=0;b<SI[a].n;b++){
00071 //printf("\tSI[%d].i[%d] is %d\n",a,b,SI[a].i[b]);
00072 }
00073 //printf("TI[%d].n is %d\n",a,TI[a].n); 
00074 for(b=0;b<TI[a].n;b++){
00075 //printf("\tTI[%d].i[%d] is %d\n",a,b,TI[a].i[b]);
00076 }
00077 }
00078 for(a=0;a<nMB;a++){
00079 //printf("MB[%d] : s = %d ; t = %d\n",a,MB[a].s.i,MB[a].t.i);
00080 }
00081 
00082 // Find the molecules
00083 for(a=0;a<nMB;a++){ // loop through the molbond array 
00084         localmol=-1; // = we don't know the local molecule number yet
00085         // see if s or t already belong to a molecule 
00086         // if so, set localmol to that number
00087 //printf("MB[%d].s.i is %d ;  AT[MB[a].s.i].m is %d  ; MB[a].t.i is %d ;  AT[MB[a].t.i].m is %d  \n",a,MB[a].s.i,AT[MB[a].s.i].m,MB[a].t.i,AT[MB[a].t.i].m);
00088         if(AT[MB[a].s.i].m!=-1) {localmol=AT[MB[a].s.i].m;}
00089         if(AT[MB[a].t.i].m!=-1) {
00090                 if(localmol!=-1){
00091                         if(AT[MB[a].t.i].m!=AT[MB[a].s.i].m){
00092                         mywhine("molecule mismatch in find_molecules_molbond_array");} 
00093                         }
00094                 if(localmol==-1){ localmol=AT[MB[a].s.i].m=AT[MB[a].t.i].m;}
00095                 else {AT[MB[a].t.i].m=localmol;}
00096                 }
00097         // if we still don't know the local molecule number
00098         if(localmol==-1){ // set localmol to currmol and increment currmol
00099                 localmol=currmol;
00100                 currmol++;
00101                 // set s and t AT's as belonging to localmol
00102                 //AT[MB[a].s.i].m=AT[MB[a].t.i].m=localmol;
00103                 }
00104         // set molecule info local to the bonds, too
00105         //MB[a].s.m=MB[a].t.m=localmol; 
00106         //follow_find_molecule_amber_prmtop_bond_target(MB,AT,SI,TI,MB[a].t.i,localmol);
00107         // try this way instead... Start with sources only
00108         follow_find_molecule_amber_prmtop_bond_target(MB,AT,SI,TI,MB[a].s.i,localmol);
00109         }
00110 // check our work:
00111 for(a=0;a<nMB;a++){ 
00112 //printf("a=%d ; MB[a].s.m=%d ; MB[a].t.m=%d\n",a,MB[a].s.m,MB[a].t.m);
00113         if(MB[a].s.m==-1) {mywhine("source MB molecule not defined at end of assignment.");}
00114         if(MB[a].t.m==-1) {mywhine("target MB molecule not defined at end of assignment.");}
00115         }
00116 for(a=0;a>NATOM;a++){ // assign lone atoms to molecules
00117         if(AT[a].m==-1){
00118                 AT[a].m=currmol;
00119                 currmol++;
00120                 } 
00121         }
00122 
00123 for(a=0;a<NATOM;a++){ // allocate space 
00124         free(SI[a].i);
00125         free(TI[a].i);
00126         }
00127 free(SI);
00128 free(TI);
00129 return;
00130 }
00131 
00132 /************** follow_find_molecule_amber_prmtop_bond_target() ************/
00133 /* Follow bonds along the molecule recursively */
00134 void follow_find_molecule_amber_prmtop_bond_target(molbond *MB, molindex *AT, tempintset *SI, tempintset *TI, int i, int mN){
00135 int b=0,c=0; // for counting 
00136 // for sanity:
00137 int     sourcei=0,  // index for the instance of this atom being a bond source
00138         targeti=0,  // index for the instance of this atom being a bond target
00139         sbondi=0,   // index into the molbond array for this source index
00140         tbondi=0,   // index into the molbond array for this target index
00141         stbondi=0;  // index into molbond array for sources of targets or targets as sources
00142 
00143 //Each bond indicator has a source atom and a target atom.  So, for each target atom,
00144 //(1) find all bonds for which that target is a source
00145 //printf("in recursive follow at top\n");
00146 //printf("m is %d ; the atom index, i, is %d ; SI[i].n is %d ; TI[i].n is %d \n",mN,i,SI[i].n,TI[i].n);
00147 
00148 for(b=0;b<SI[i].n;b++){
00149 //printf("SI-i=%d (recursive follow, m=%d) the first source bond is %d  \n",i,mN,SI[i].i[b]);
00150 //printf("\t the source-target atoms are %d-%d ; its molecule is %d \n",MB[SI[i].i[b]].s.i,MB[SI[i].i[b]].t.i,AT[MB[SI[i].i[b]].s.i].m);
00151         sourcei=SI[i].i[b]; // the b-th target associated with this source
00152         sbondi=MB[sourcei].s.i; // the bonding index info associated with this source
00153         if((AT[sbondi].m!=-1)&&(AT[sbondi].m!=mN)){mywhine("AT[sbondi].m!=-1)&&(AT[sbondi].m!=mN, in SI recursive follow\n");}
00154         AT[sbondi].m=MB[sourcei].s.m=MB[sourcei].t.m=mN; // set molecule for the source
00155 //printf("sbondi=%d ; just set MB[%d].s/t.m=%d\n",sbondi,sourcei,MB[sourcei].s.m);
00156         tbondi=MB[sourcei].t.i;
00157         if((AT[tbondi].m!=-1)&&(AT[tbondi].m!=mN)){mywhine("AT[tbondi].m!=-1)&&(AT[tbondi].m!=mN, in SI recursive follow\n");} 
00158         if(AT[tbondi].m==-1){
00159                 AT[tbondi].m=MB[sourcei].t.m=mN; // set molecule for the target
00160                 for(c=0;c<SI[tbondi].n;c++){ // for each other bond for which this target is a source
00161                         targeti=SI[tbondi].i[c];
00162                         stbondi=MB[targeti].s.i;
00163                         // follow each of the sources in those other targets, but only if not already assigned
00164                         if((AT[stbondi].m!=-1)&&(AT[stbondi].m!=mN)){
00165                                 mywhine("AT[stbondi].m!=-1)&&(AT[stbondi].m!=mN, in SI-TI recursive follow\n");}
00166                         follow_find_molecule_amber_prmtop_bond_target(MB,AT,SI,TI,MB[targeti].s.i,mN); 
00167                         }
00168 //printf("just set MB[%d].t.m=%d\n",sourcei,MB[sourcei].t.m);
00169                 for(c=0;c<TI[tbondi].n;c++){ // for each other bond for which this target is a target
00170                         targeti=TI[tbondi].i[c];
00171                         stbondi=MB[targeti].s.i;
00172                         // follow each of the sources in those other targets, but only if not already assigned
00173                         if((AT[stbondi].m!=-1)&&(AT[stbondi].m!=mN)){
00174                                 mywhine("AT[stbondi].m!=-1)&&(AT[stbondi].m!=mN, in SI-TI recursive follow\n");}
00175                         follow_find_molecule_amber_prmtop_bond_target(MB,AT,SI,TI,MB[targeti].s.i,mN); 
00176                         }
00177                 }
00178         }
00179 // If this source is also a target for another bond, follow that source, too
00180 for(b=0;b<TI[i].n;b++){
00181 //printf("TI-i=%d (recursive follow, m=%d) the first source bond is %d  \n",i,mN,TI[i].i[b]);
00182 //printf("\t the target-source atoms are %d-%d ; its molecule is %d \n",MB[TI[i].i[b]].t.i,MB[TI[i].i[b]].s.i,AT[MB[TI[i].i[b]].t.i].m);
00183         targeti=TI[i].i[b];
00184         tbondi=MB[targeti].t.i;
00185         if(AT[tbondi].m!=mN){mywhine("AT[tbondi].m!=mN in TI recursive follow\n");} // should already be set...
00186         //if((AT[tbondi].m!=-1)&&(AT[tbondi].m!=mN)){mywhine("AT[tbondi].m!=-1)&&(AT[tbondi].m!=mN, in TI recursive follow\n");}
00187         //AT[MB[TI[i].i[b]].t.i].m=mN; // set molecule for the target
00188         sbondi=MB[TI[i].i[b]].s.i;
00189         if(AT[sbondi].m==-1){ // if this source has not already been seen
00190                 //AT[sbondi].m=mN; // set molecule for the source -- should not be necessary
00191                 follow_find_molecule_amber_prmtop_bond_target(MB,AT,SI,TI,sbondi,mN); }
00192         }
00193 
00194 return;
00195 }
 All Classes Files Functions Variables Typedefs Defines