RNAlib-2.0.1
|
00001 #include <ctype.h> 00002 #define NBASES 8 00003 /*@notnull@*/ 00004 00005 static const char Law_and_Order[] = "_ACGUTXKI"; 00006 static int BP_pair[NBASES][NBASES]= 00007 /* _ A C G U X K I */ 00008 {{ 0, 0, 0, 0, 0, 0, 0, 0}, 00009 { 0, 0, 0, 0, 5, 0, 0, 5}, 00010 { 0, 0, 0, 1, 0, 0, 0, 0}, 00011 { 0, 0, 2, 0, 3, 0, 0, 0}, 00012 { 0, 6, 0, 4, 0, 0, 0, 6}, 00013 { 0, 0, 0, 0, 0, 0, 2, 0}, 00014 { 0, 0, 0, 0, 0, 1, 0, 0}, 00015 { 0, 6, 0, 0, 5, 0, 0, 0}}; 00016 00017 #define MAXALPHA 20 /* maximal length of alphabet */ 00018 00019 static short alias[MAXALPHA+1]; 00020 static int pair[MAXALPHA+1][MAXALPHA+1]; 00021 /* rtype[pair[i][j]]:=pair[j][i] */ 00022 static int rtype[8] = {0, 2, 1, 4, 3, 6, 5, 7}; 00023 00024 #ifdef _OPENMP 00025 #pragma omp threadprivate(Law_and_Order, BP_pair, alias, pair, rtype) 00026 #endif 00027 00028 /* for backward compatibility */ 00029 #define ENCODE(c) encode_char(c) 00030 00031 static int encode_char(char c) { 00032 /* return numerical representation of base used e.g. in pair[][] */ 00033 int code; 00034 if (energy_set>0) code = (int) (c-'A')+1; 00035 else { 00036 char *pos; 00037 pos = strchr(Law_and_Order, c); 00038 if (pos==NULL) code=0; 00039 else code = (int) (pos-Law_and_Order); 00040 if (code>5) code = 0; 00041 if (code>4) code--; /* make T and U equivalent */ 00042 } 00043 return code; 00044 } 00045 00046 /*@+boolint +charint@*/ 00047 /*@null@*/ 00048 extern char *nonstandards; 00049 extern void nrerror(const char message[]); 00050 static void make_pair_matrix(void) 00051 { 00052 int i,j; 00053 00054 if (energy_set==0) { 00055 for (i=0; i<5; i++) alias[i] = (short) i; 00056 alias[5] = 3; /* X <-> G */ 00057 alias[6] = 2; /* K <-> C */ 00058 alias[7] = 0; /* I <-> default base '@' */ 00059 for (i=0; i<NBASES; i++) { 00060 for (j=0; j<NBASES; j++) 00061 pair[i][j] = BP_pair[i][j]; 00062 } 00063 if (noGU) pair[3][4] = pair[4][3] =0; 00064 if (nonstandards!=NULL) { /* allow nonstandard bp's */ 00065 for (i=0; i<(int)strlen(nonstandards); i+=2) 00066 pair[encode_char(nonstandards[i])] 00067 [encode_char(nonstandards[i+1])]=7; 00068 } 00069 for (i=0; i<NBASES; i++) { 00070 for (j=0; j<NBASES; j++) 00071 rtype[pair[i][j]] = pair[j][i]; 00072 } 00073 } else { 00074 for (i=0; i<=MAXALPHA; i++) { 00075 for (j=0; j<=MAXALPHA; j++) 00076 pair[i][j] = 0; 00077 } 00078 if (energy_set==1) { 00079 for (i=1; i<MAXALPHA;) { 00080 alias[i++] = 3; /* A <-> G */ 00081 alias[i++] = 2; /* B <-> C */ 00082 } 00083 for (i=1; i<MAXALPHA; i++) { 00084 pair[i][i+1] = 2; /* AB <-> GC */ 00085 i++; 00086 pair[i][i-1] = 1; /* BA <-> CG */ 00087 } 00088 } 00089 else if (energy_set==2) { 00090 for (i=1; i<MAXALPHA;) { 00091 alias[i++] = 1; /* A <-> A*/ 00092 alias[i++] = 4; /* B <-> U */ 00093 } 00094 for (i=1; i<MAXALPHA; i++) { 00095 pair[i][i+1] = 5; /* AB <-> AU */ 00096 i++; 00097 pair[i][i-1] = 6; /* BA <-> UA */ 00098 } 00099 } 00100 else if (energy_set==3) { 00101 for (i=1; i<MAXALPHA-2; ) { 00102 alias[i++] = 3; /* A <-> G */ 00103 alias[i++] = 2; /* B <-> C */ 00104 alias[i++] = 1; /* C <-> A */ 00105 alias[i++] = 4; /* D <-> U */ 00106 } 00107 for (i=1; i<MAXALPHA-2; i++) { 00108 pair[i][i+1] = 2; /* AB <-> GC */ 00109 i++; 00110 pair[i][i-1] = 1; /* BA <-> CG */ 00111 i++; 00112 pair[i][i+1] = 5; /* CD <-> AU */ 00113 i++; 00114 pair[i][i-1] = 6; /* DC <-> UA */ 00115 } 00116 } 00117 else nrerror("What energy_set are YOU using??"); 00118 for (i=0; i<=MAXALPHA; i++) { 00119 for (j=0; j<=MAXALPHA; j++) 00120 rtype[pair[i][j]] = pair[j][i]; 00121 } 00122 } 00123 } 00124 00125 static short *encode_sequence(const char *sequence, short how){ 00126 unsigned int i,l = (unsigned int)strlen(sequence); 00127 short *S = (short *) space(sizeof(short)*(l+2)); 00128 00129 switch(how){ 00130 /* standard encoding as always used for S */ 00131 case 0: for(i=1; i<=l; i++) /* make numerical encoding of sequence */ 00132 S[i]= (short) encode_char(toupper(sequence[i-1])); 00133 S[l+1] = S[1]; 00134 S[0] = (short) l; 00135 break; 00136 /* encoding for mismatches of nostandard bases (normally used for S1) */ 00137 case 1: for(i=1; i<=l; i++) 00138 S[i] = alias[(short) encode_char(toupper(sequence[i-1]))]; 00139 S[l+1] = S[1]; 00140 S[0] = S[l]; 00141 break; 00142 } 00143 00144 return S; 00145 }