GLYLIB
0.3.0b
|
00001 /* File find_molecules.c begun on 20080625 by BLFoley 00002 Purpose: Contain functions designed to determine which atoms and/or 00003 residues are parts of molecules and to separate them into molecules. 00004 00005 For example, find_molecules_molbond_array will: 00006 Given an array of molbond structures, use the bonding to 00007 determine which atoms (and residues) are parts of molecules. 00008 The molecules found will also be numbered. */ 00009 00010 // change this before adding to library -- doesn't need whole prmtop header 00011 // this is here because the first function written was written for a read 00012 // of an amber prmtop file. 00013 #include "AMBER/amber_prmtop.h" 00014 00015 typedef struct { 00016 int n; // number of 00017 int *i; // n of these 00018 } tempintset; 00019 00020 void follow_find_molecule_amber_prmtop_bond_target(molbond *MB, molindex *AT, tempintset *SI, tempintset *TI, int i, int mN); 00021 00022 /*************** find_molecules_molbond_array() ******************/ 00023 /* Finds molecules in a molbond array based on bonding patterns 00024 Updates an array of NATOM molindices with molecule information. 00025 Also writes molecule info to the molbond array. 00026 In both cases, it overwrites existing information. 00027 NOTES: 00028 1. There should be no useful molecule or residue information 00029 in the molbond array (it will be igored). 00030 2. The index, i, in the molindex structures should correspond to the 00031 absolute atom number as indexed in the molindex array. 00032 This way, any residue and atom info you have previously will 00033 be retained -- and it makes this function loads easier to write. 00034 Later, you can renumber your residues and atoms however you like, 00035 but any existing molecule information will be overwritten. 00036 */ 00037 void find_molecules_molbond_array(int nMB, molbond *MB, int NATOM, molindex *AT){ 00038 int a=0,b=0,currmol=0,localmol=0; // current and local molecule numbers 00039 tempintset *SI,*TI; 00040 00041 // set all AT molecule indices to -1 (not seen) 00042 for(a=0;a<NATOM;a++){ 00043 //printf("(find mols) AT[%d].a is %d ; .i is %d \n",a,AT[a].a,AT[a].i); 00044 AT[a].m=-1; 00045 } 00046 00047 // index each source and target to all relevant indices in the AT array 00048 // calloc space (other way???) 00049 SI=(tempintset*)calloc(NATOM,sizeof(tempintset)); 00050 TI=(tempintset*)calloc(NATOM,sizeof(tempintset)); 00051 for(a=0;a<NATOM;a++){ // allocate space 00052 SI[a].i=(int*)calloc(1,sizeof(int)); 00053 TI[a].i=(int*)calloc(1,sizeof(int)); } 00054 // set backward indices for the atoms to the bonds 00055 //for(a=0;a<nMB;a++){ 00056 //printf("MB[%d] : s = %d ; t = %d\n",a,MB[a].s.i,MB[a].t.i);} 00057 for(a=0;a<nMB;a++){ 00058 //printf("SI[MB[a].s.i].n is %d TI[MB[a].t.i].n is %d \n",SI[MB[a].s.i].n,TI[MB[a].t.i].n); 00059 SI[MB[a].s.i].n++; 00060 TI[MB[a].t.i].n++; 00061 SI[MB[a].s.i].i=(int*)realloc(SI[MB[a].s.i].i,SI[MB[a].s.i].n*sizeof(int)); 00062 TI[MB[a].t.i].i=(int*)realloc(TI[MB[a].t.i].i,TI[MB[a].t.i].n*sizeof(int)); 00063 SI[MB[a].s.i].i[SI[MB[a].s.i].n-1]=a; 00064 TI[MB[a].t.i].i[TI[MB[a].t.i].n-1]=a; 00065 //printf(" MB[%d].s.i is %d ; SI[MB[a].s.i].n-1 is %d \t",a,MB[a].s.i,SI[MB[a].s.i].n-1); 00066 //printf(" MB[%d].t.i is %d ; TI[MB[a].t.i].n-1 is %d \n",a,MB[a].t.i,TI[MB[a].t.i].n-1); 00067 } 00068 for(a=0;a<NATOM;a++){ 00069 //printf("SI[%d].n is %d\n",a,SI[a].n); 00070 for(b=0;b<SI[a].n;b++){ 00071 //printf("\tSI[%d].i[%d] is %d\n",a,b,SI[a].i[b]); 00072 } 00073 //printf("TI[%d].n is %d\n",a,TI[a].n); 00074 for(b=0;b<TI[a].n;b++){ 00075 //printf("\tTI[%d].i[%d] is %d\n",a,b,TI[a].i[b]); 00076 } 00077 } 00078 for(a=0;a<nMB;a++){ 00079 //printf("MB[%d] : s = %d ; t = %d\n",a,MB[a].s.i,MB[a].t.i); 00080 } 00081 00082 // Find the molecules 00083 for(a=0;a<nMB;a++){ // loop through the molbond array 00084 localmol=-1; // = we don't know the local molecule number yet 00085 // see if s or t already belong to a molecule 00086 // if so, set localmol to that number 00087 //printf("MB[%d].s.i is %d ; AT[MB[a].s.i].m is %d ; MB[a].t.i is %d ; AT[MB[a].t.i].m is %d \n",a,MB[a].s.i,AT[MB[a].s.i].m,MB[a].t.i,AT[MB[a].t.i].m); 00088 if(AT[MB[a].s.i].m!=-1) {localmol=AT[MB[a].s.i].m;} 00089 if(AT[MB[a].t.i].m!=-1) { 00090 if(localmol!=-1){ 00091 if(AT[MB[a].t.i].m!=AT[MB[a].s.i].m){ 00092 mywhine("molecule mismatch in find_molecules_molbond_array");} 00093 } 00094 if(localmol==-1){ localmol=AT[MB[a].s.i].m=AT[MB[a].t.i].m;} 00095 else {AT[MB[a].t.i].m=localmol;} 00096 } 00097 // if we still don't know the local molecule number 00098 if(localmol==-1){ // set localmol to currmol and increment currmol 00099 localmol=currmol; 00100 currmol++; 00101 // set s and t AT's as belonging to localmol 00102 //AT[MB[a].s.i].m=AT[MB[a].t.i].m=localmol; 00103 } 00104 // set molecule info local to the bonds, too 00105 //MB[a].s.m=MB[a].t.m=localmol; 00106 //follow_find_molecule_amber_prmtop_bond_target(MB,AT,SI,TI,MB[a].t.i,localmol); 00107 // try this way instead... Start with sources only 00108 follow_find_molecule_amber_prmtop_bond_target(MB,AT,SI,TI,MB[a].s.i,localmol); 00109 } 00110 // check our work: 00111 for(a=0;a<nMB;a++){ 00112 //printf("a=%d ; MB[a].s.m=%d ; MB[a].t.m=%d\n",a,MB[a].s.m,MB[a].t.m); 00113 if(MB[a].s.m==-1) {mywhine("source MB molecule not defined at end of assignment.");} 00114 if(MB[a].t.m==-1) {mywhine("target MB molecule not defined at end of assignment.");} 00115 } 00116 for(a=0;a>NATOM;a++){ // assign lone atoms to molecules 00117 if(AT[a].m==-1){ 00118 AT[a].m=currmol; 00119 currmol++; 00120 } 00121 } 00122 00123 for(a=0;a<NATOM;a++){ // allocate space 00124 free(SI[a].i); 00125 free(TI[a].i); 00126 } 00127 free(SI); 00128 free(TI); 00129 return; 00130 } 00131 00132 /************** follow_find_molecule_amber_prmtop_bond_target() ************/ 00133 /* Follow bonds along the molecule recursively */ 00134 void follow_find_molecule_amber_prmtop_bond_target(molbond *MB, molindex *AT, tempintset *SI, tempintset *TI, int i, int mN){ 00135 int b=0,c=0; // for counting 00136 // for sanity: 00137 int sourcei=0, // index for the instance of this atom being a bond source 00138 targeti=0, // index for the instance of this atom being a bond target 00139 sbondi=0, // index into the molbond array for this source index 00140 tbondi=0, // index into the molbond array for this target index 00141 stbondi=0; // index into molbond array for sources of targets or targets as sources 00142 00143 //Each bond indicator has a source atom and a target atom. So, for each target atom, 00144 //(1) find all bonds for which that target is a source 00145 //printf("in recursive follow at top\n"); 00146 //printf("m is %d ; the atom index, i, is %d ; SI[i].n is %d ; TI[i].n is %d \n",mN,i,SI[i].n,TI[i].n); 00147 00148 for(b=0;b<SI[i].n;b++){ 00149 //printf("SI-i=%d (recursive follow, m=%d) the first source bond is %d \n",i,mN,SI[i].i[b]); 00150 //printf("\t the source-target atoms are %d-%d ; its molecule is %d \n",MB[SI[i].i[b]].s.i,MB[SI[i].i[b]].t.i,AT[MB[SI[i].i[b]].s.i].m); 00151 sourcei=SI[i].i[b]; // the b-th target associated with this source 00152 sbondi=MB[sourcei].s.i; // the bonding index info associated with this source 00153 if((AT[sbondi].m!=-1)&&(AT[sbondi].m!=mN)){mywhine("AT[sbondi].m!=-1)&&(AT[sbondi].m!=mN, in SI recursive follow\n");} 00154 AT[sbondi].m=MB[sourcei].s.m=MB[sourcei].t.m=mN; // set molecule for the source 00155 //printf("sbondi=%d ; just set MB[%d].s/t.m=%d\n",sbondi,sourcei,MB[sourcei].s.m); 00156 tbondi=MB[sourcei].t.i; 00157 if((AT[tbondi].m!=-1)&&(AT[tbondi].m!=mN)){mywhine("AT[tbondi].m!=-1)&&(AT[tbondi].m!=mN, in SI recursive follow\n");} 00158 if(AT[tbondi].m==-1){ 00159 AT[tbondi].m=MB[sourcei].t.m=mN; // set molecule for the target 00160 for(c=0;c<SI[tbondi].n;c++){ // for each other bond for which this target is a source 00161 targeti=SI[tbondi].i[c]; 00162 stbondi=MB[targeti].s.i; 00163 // follow each of the sources in those other targets, but only if not already assigned 00164 if((AT[stbondi].m!=-1)&&(AT[stbondi].m!=mN)){ 00165 mywhine("AT[stbondi].m!=-1)&&(AT[stbondi].m!=mN, in SI-TI recursive follow\n");} 00166 follow_find_molecule_amber_prmtop_bond_target(MB,AT,SI,TI,MB[targeti].s.i,mN); 00167 } 00168 //printf("just set MB[%d].t.m=%d\n",sourcei,MB[sourcei].t.m); 00169 for(c=0;c<TI[tbondi].n;c++){ // for each other bond for which this target is a target 00170 targeti=TI[tbondi].i[c]; 00171 stbondi=MB[targeti].s.i; 00172 // follow each of the sources in those other targets, but only if not already assigned 00173 if((AT[stbondi].m!=-1)&&(AT[stbondi].m!=mN)){ 00174 mywhine("AT[stbondi].m!=-1)&&(AT[stbondi].m!=mN, in SI-TI recursive follow\n");} 00175 follow_find_molecule_amber_prmtop_bond_target(MB,AT,SI,TI,MB[targeti].s.i,mN); 00176 } 00177 } 00178 } 00179 // If this source is also a target for another bond, follow that source, too 00180 for(b=0;b<TI[i].n;b++){ 00181 //printf("TI-i=%d (recursive follow, m=%d) the first source bond is %d \n",i,mN,TI[i].i[b]); 00182 //printf("\t the target-source atoms are %d-%d ; its molecule is %d \n",MB[TI[i].i[b]].t.i,MB[TI[i].i[b]].s.i,AT[MB[TI[i].i[b]].t.i].m); 00183 targeti=TI[i].i[b]; 00184 tbondi=MB[targeti].t.i; 00185 if(AT[tbondi].m!=mN){mywhine("AT[tbondi].m!=mN in TI recursive follow\n");} // should already be set... 00186 //if((AT[tbondi].m!=-1)&&(AT[tbondi].m!=mN)){mywhine("AT[tbondi].m!=-1)&&(AT[tbondi].m!=mN, in TI recursive follow\n");} 00187 //AT[MB[TI[i].i[b]].t.i].m=mN; // set molecule for the target 00188 sbondi=MB[TI[i].i[b]].s.i; 00189 if(AT[sbondi].m==-1){ // if this source has not already been seen 00190 //AT[sbondi].m=mN; // set molecule for the source -- should not be necessary 00191 follow_find_molecule_amber_prmtop_bond_target(MB,AT,SI,TI,sbondi,mN); } 00192 } 00193 00194 return; 00195 }