RNAlib-2.1.9h
pair_mat.h
1 #include <ctype.h>
2 
3 #include "energy_const.h"
4 #include "utils.h"
5 #include "fold_vars.h"
6 
7 #define NBASES 16
8 
9 /*@notnull@*/
10 
11 static const char Law_and_Order[] = "_ACGUTXKI";
12 /*
13 static int BP_pair[NBASES][NBASES]=
14 */
15 /* _ A C G U X K I */
16 /*
17  {{ 0, 0, 0, 0, 0, 0, 0, 0},
18  { 0, 0, 0, 0, 5, 0, 0, 5},
19  { 0, 0, 0, 1, 0, 0, 0, 0},
20  { 0, 0, 2, 0, 3, 0, 0, 0},
21  { 0, 6, 0, 4, 0, 0, 0, 6},
22  { 0, 0, 0, 0, 0, 0, 2, 0},
23  { 0, 0, 0, 0, 0, 1, 0, 0},
24  { 0, 6, 0, 0, 5, 0, 0, 0}};
25 */
26 
27 /*
28  in the block below, uppercase letters ACGU... are
29  used for RNA nucleotides whereas lowercase letters
30  are used for DNA nucleotides
31 */
32 static int BP_pair[NNUCLEOTIDES_HYBRID][NNUCLEOTIDES_HYBRID]=
33 /* _ A C G U X K I _ a c g t x k i*/
34 /*_*/{{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
35 /*A*/ { 0, 0, 0, 0, 5, 0, 0, 5, 0, 0, 0, 0,13, 0, 0,13},/*Ai?*/
36 /*C*/ { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0},
37 /*G*/ { 0, 0, 2, 0, 3, 0, 0, 0, 0, 0,10, 0,11, 0, 0, 0},
38 /*U*/ { 0, 6, 0, 4, 0, 0, 0, 6, 0,14, 0,12, 0, 0, 0,14},
39 /*X*/ { 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0,10, 0},
40 /*K*/ { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0},
41 /*I*/ { 0, 6, 0, 0, 5, 0, 0, 0, 0,14, 0, 0,13, 0, 0, 0},
42 /*_*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
43 /*a*/ { 0, 0, 0, 0,21, 0, 0,21, 0, 0, 0, 0,29, 0, 0,29},/*ai?*/
44 /*c*/ { 0, 0, 0,17, 0, 0, 0, 0, 0, 0, 0,25, 0, 0, 0, 0},
45 /*g*/ { 0, 0,18, 0,19, 0, 0, 0, 0, 0,26, 0,27, 0, 0, 0},
46 /*u*/ { 0,22, 0,20, 0, 0, 0,22, 0,30, 0,28, 0, 0, 0,30},
47 /*x*/ { 0, 0, 0, 0, 0, 0,18, 0, 0, 0, 0, 0, 0, 0,26, 0},
48 /*k*/ { 0, 0, 0, 0, 0, 17, 0, 0, 0, 0, 0, 0, 0,25, 0, 0},
49 /*i*/ { 0,22, 0, 0,21, 0, 0, 0, 0,30, 0, 0,29, 0, 0, 0}};
50 
51 /* define the above as possibly right */
52 
53 #define MAXALPHA 20 /* maximal length of alphabet */
54 
55 static short alias[MAXALPHA+1];
56 static int pair[MAXALPHA+1][MAXALPHA+1];
57 /* rtype[pair[i][j]]:=pair[j][i] */
58 /*
59 static int rtype[8] = {0, 2, 1, 4, 3, 6, 5, 7};
60 */
61 static int rtype[NBPAIRS_HYBRID] = {
62  0, 2, 1, 4, 3, 6, 5, 7,
63  0,18,17,20,19,22,21, 7,
64  0,10, 9,12,11,14,13, 7,
65  0,26,25,28,27,30,29, 7};
66 
67 #ifdef _OPENMP
68 #pragma omp threadprivate(Law_and_Order, BP_pair, alias, pair, rtype)
69 #endif
70 
71 /* for backward compatibility */
72 #define ENCODE(c) encode_char(c)
73 
74 static int encode_char(char c) {
75  /* return numerical representation of base used e.g. in pair[][] */
76  int code;
77  if (energy_set>0) code = (int) (c-'A')+1;
78  else {
79  const char *pos;
80  pos = strchr(Law_and_Order, c);
81  if (pos==NULL) code=0;
82  else code = (int) (pos-Law_and_Order);
83  if (code>5) code = 0;
84  if (code>4) code--; /* make T and U equivalent */
85  }
86  return code;
87 }
88 
89 /*@+boolint +charint@*/
90 /*@null@*/
91 extern char *nonstandards;
92 extern void nrerror(const char message[]);
93 static void make_pair_matrix(void)
94 {
95  int i,j;
96 
97  if (energy_set==0) {
98  /* RNA encoding */
99  for (i=0; i<5; i++) alias[i] = (short) i;
100  alias[5] = 3; /* X <-> G */
101  alias[6] = 2; /* K <-> C */
102  alias[7] = 0; /* I <-> default base '@' */
103  /* DNA encoding */
104  for (i=8; i<13; i++) alias[i] = (short) i;
105  alias[13] = 11; /* x <-> g */
106  alias[14] = 10; /* k <-> c */
107  alias[15] = 8; /* i <-> default base '@' */
108 
109  for (i=0; i<NBASES; i++) {
110  for (j=0; j<NBASES; j++)
111  pair[i][j] = BP_pair[i][j];
112  }
113  if (noGU){
114  pair[3][4] = pair[4][3] = 0; /* RNA */
115  pair[11][4] = pair[4][11] = 0; /* RNA-DNA hybrid */
116  pair[3][12] = pair[12][3] = 0;
117  pair[11][12] = pair[12][11] = 0; /* DNA */
118  }
119 
120  /* extend this in the near future for hybrids if necessary !!! */
121  if (nonstandards!=NULL) { /* allow nonstandard bp's */
122  for (i=0; i<(int)strlen(nonstandards); i+=2)
123  pair[encode_char(nonstandards[i])]
124  [encode_char(nonstandards[i+1])]=7;
125  }
126  for (i=0; i<NBASES; i++) {
127  for (j=0; j<NBASES; j++)
128  rtype[pair[i][j]] = pair[j][i];
129  }
130 
131  /* we dont modify this for the hybrid case */
132  } else {
133  for (i=0; i<=MAXALPHA; i++) {
134  for (j=0; j<=MAXALPHA; j++)
135  pair[i][j] = 0;
136  }
137  if (energy_set==1) {
138  for (i=1; i<MAXALPHA;) {
139  alias[i++] = 3; /* A <-> G */
140  alias[i++] = 2; /* B <-> C */
141  }
142  for (i=1; i<MAXALPHA; i++) {
143  pair[i][i+1] = 2; /* AB <-> GC */
144  i++;
145  pair[i][i-1] = 1; /* BA <-> CG */
146  }
147  }
148  else if (energy_set==2) {
149  for (i=1; i<MAXALPHA;) {
150  alias[i++] = 1; /* A <-> A*/
151  alias[i++] = 4; /* B <-> U */
152  }
153  for (i=1; i<MAXALPHA; i++) {
154  pair[i][i+1] = 5; /* AB <-> AU */
155  i++;
156  pair[i][i-1] = 6; /* BA <-> UA */
157  }
158  }
159  else if (energy_set==3) {
160  for (i=1; i<MAXALPHA-2; ) {
161  alias[i++] = 3; /* A <-> G */
162  alias[i++] = 2; /* B <-> C */
163  alias[i++] = 1; /* C <-> A */
164  alias[i++] = 4; /* D <-> U */
165  }
166  for (i=1; i<MAXALPHA-2; i++) {
167  pair[i][i+1] = 2; /* AB <-> GC */
168  i++;
169  pair[i][i-1] = 1; /* BA <-> CG */
170  i++;
171  pair[i][i+1] = 5; /* CD <-> AU */
172  i++;
173  pair[i][i-1] = 6; /* DC <-> UA */
174  }
175  }
176  else nrerror("What energy_set are YOU using??");
177  for (i=0; i<=MAXALPHA; i++) {
178  for (j=0; j<=MAXALPHA; j++)
179  rtype[pair[i][j]] = pair[j][i];
180  }
181  }
182 }
183 
184 static short *encode_sequence(const char *sequence, short how){
185  unsigned int i,l = (unsigned int)strlen(sequence);
186  short *S = (short *) space(sizeof(short)*(l+2));
187 
188  switch(how){
189  /* standard encoding as always used for S */
190  case 0: for(i=1; i<=l; i++) /* make numerical encoding of sequence */
191  S[i]= (short) encode_char(toupper(sequence[i-1]));
192  S[l+1] = S[1];
193  S[0] = (short) l;
194  break;
195  /* encoding for mismatches of nostandard bases (normally used for S1) */
196  case -1:
197  case 1: for(i=1; i<=l; i++)
198  S[i] = alias[(short) encode_char(toupper(sequence[i-1]))];
199  S[l+1] = S[1];
200  S[0] = S[l];
201  break;
202  /* encode DNA */
203  case -2: for(i=1; i<=l; i++)
204  S[i] = ((short) encode_char(toupper(sequence[i-1])))+8;
205  S[l+1] = S[1];
206  S[0] = (short) l;
207  break;
208  case -3: for(i=1; i<=l; i++)
209  S[i] = alias[(short) encode_char(toupper(sequence[i-1]))]+8;
210  S[l+1] = S[1];
211  S[0] = S[l];
212  break;
213  /*encoding dna for rna-dna hybrids */
214  default: for(i=1; i<how; i++)
215  S[i] = alias[(short) encode_char(toupper(sequence[i-1]))];
216  for(; i<=l; i++)
217  S[i] = alias[(short) encode_char(toupper(sequence[i-1]))]+8;
218  S[l+1] = S[1];
219  S[0] = (short) l;
220  break;
221  }
222 
223  return S;
224 }
void * space(unsigned size)
Allocate space safely.
#define NNUCLEOTIDES_HYBRID
Definition: energy_const.h:36
char * nonstandards
contains allowed non standard base pairs
Various utility- and helper-functions used throughout the Vienna RNA package.
#define MAXALPHA
Maximal length of alphabet.
Definition: data_structures.h:22
void nrerror(const char message[])
Die with an error message.
#define NBPAIRS_HYBRID
Definition: energy_const.h:34
Here all all declarations of the global variables used throughout RNAlib.
int noGU
Global switch to forbid/allow GU base pairs at all.
int energy_set
0 = BP; 1=any mit GC; 2=any mit AU-parameter