RNAlib-2.1.9h
pair_mat.h
1#include <ctype.h>
2
3#include "energy_const.h"
4#include "utils.h"
5#include "fold_vars.h"
6
7#define NBASES 16
8
9/*@notnull@*/
10
11static const char Law_and_Order[] = "_ACGUTXKI";
12/*
13static int BP_pair[NBASES][NBASES]=
14*/
15/* _ A C G U X K I */
16/*
17 {{ 0, 0, 0, 0, 0, 0, 0, 0},
18 { 0, 0, 0, 0, 5, 0, 0, 5},
19 { 0, 0, 0, 1, 0, 0, 0, 0},
20 { 0, 0, 2, 0, 3, 0, 0, 0},
21 { 0, 6, 0, 4, 0, 0, 0, 6},
22 { 0, 0, 0, 0, 0, 0, 2, 0},
23 { 0, 0, 0, 0, 0, 1, 0, 0},
24 { 0, 6, 0, 0, 5, 0, 0, 0}};
25*/
26
27/*
28 in the block below, uppercase letters ACGU... are
29 used for RNA nucleotides whereas lowercase letters
30 are used for DNA nucleotides
31*/
32static int BP_pair[NNUCLEOTIDES_HYBRID][NNUCLEOTIDES_HYBRID]=
33/* _ A C G U X K I _ a c g t x k i*/
34/*_*/{{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
35/*A*/ { 0, 0, 0, 0, 5, 0, 0, 5, 0, 0, 0, 0,13, 0, 0,13},/*Ai?*/
36/*C*/ { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0},
37/*G*/ { 0, 0, 2, 0, 3, 0, 0, 0, 0, 0,10, 0,11, 0, 0, 0},
38/*U*/ { 0, 6, 0, 4, 0, 0, 0, 6, 0,14, 0,12, 0, 0, 0,14},
39/*X*/ { 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0,10, 0},
40/*K*/ { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0},
41/*I*/ { 0, 6, 0, 0, 5, 0, 0, 0, 0,14, 0, 0,13, 0, 0, 0},
42/*_*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
43/*a*/ { 0, 0, 0, 0,21, 0, 0,21, 0, 0, 0, 0,29, 0, 0,29},/*ai?*/
44/*c*/ { 0, 0, 0,17, 0, 0, 0, 0, 0, 0, 0,25, 0, 0, 0, 0},
45/*g*/ { 0, 0,18, 0,19, 0, 0, 0, 0, 0,26, 0,27, 0, 0, 0},
46/*u*/ { 0,22, 0,20, 0, 0, 0,22, 0,30, 0,28, 0, 0, 0,30},
47/*x*/ { 0, 0, 0, 0, 0, 0,18, 0, 0, 0, 0, 0, 0, 0,26, 0},
48/*k*/ { 0, 0, 0, 0, 0, 17, 0, 0, 0, 0, 0, 0, 0,25, 0, 0},
49/*i*/ { 0,22, 0, 0,21, 0, 0, 0, 0,30, 0, 0,29, 0, 0, 0}};
50
51/* define the above as possibly right */
52
53#define MAXALPHA 20 /* maximal length of alphabet */
54
55static short alias[MAXALPHA+1];
56static int pair[MAXALPHA+1][MAXALPHA+1];
57/* rtype[pair[i][j]]:=pair[j][i] */
58/*
59static int rtype[8] = {0, 2, 1, 4, 3, 6, 5, 7};
60*/
61static int rtype[NBPAIRS_HYBRID] = {
62 0, 2, 1, 4, 3, 6, 5, 7,
63 0,18,17,20,19,22,21, 7,
64 0,10, 9,12,11,14,13, 7,
65 0,26,25,28,27,30,29, 7};
66
67#ifdef _OPENMP
68#pragma omp threadprivate(Law_and_Order, BP_pair, alias, pair, rtype)
69#endif
70
71/* for backward compatibility */
72#define ENCODE(c) encode_char(c)
73
74static int encode_char(char c) {
75 /* return numerical representation of base used e.g. in pair[][] */
76 int code;
77 if (energy_set>0) code = (int) (c-'A')+1;
78 else {
79 const char *pos;
80 pos = strchr(Law_and_Order, c);
81 if (pos==NULL) code=0;
82 else code = (int) (pos-Law_and_Order);
83 if (code>5) code = 0;
84 if (code>4) code--; /* make T and U equivalent */
85 }
86 return code;
87}
88
89/*@+boolint +charint@*/
90/*@null@*/
91extern char *nonstandards;
92extern void nrerror(const char message[]);
93static void make_pair_matrix(void)
94{
95 int i,j;
96
97 if (energy_set==0) {
98 /* RNA encoding */
99 for (i=0; i<5; i++) alias[i] = (short) i;
100 alias[5] = 3; /* X <-> G */
101 alias[6] = 2; /* K <-> C */
102 alias[7] = 0; /* I <-> default base '@' */
103 /* DNA encoding */
104 for (i=8; i<13; i++) alias[i] = (short) i;
105 alias[13] = 11; /* x <-> g */
106 alias[14] = 10; /* k <-> c */
107 alias[15] = 8; /* i <-> default base '@' */
108
109 for (i=0; i<NBASES; i++) {
110 for (j=0; j<NBASES; j++)
111 pair[i][j] = BP_pair[i][j];
112 }
113 if (noGU){
114 pair[3][4] = pair[4][3] = 0; /* RNA */
115 pair[11][4] = pair[4][11] = 0; /* RNA-DNA hybrid */
116 pair[3][12] = pair[12][3] = 0;
117 pair[11][12] = pair[12][11] = 0; /* DNA */
118 }
119
120 /* extend this in the near future for hybrids if necessary !!! */
121 if (nonstandards!=NULL) { /* allow nonstandard bp's */
122 for (i=0; i<(int)strlen(nonstandards); i+=2)
123 pair[encode_char(nonstandards[i])]
124 [encode_char(nonstandards[i+1])]=7;
125 }
126 for (i=0; i<NBASES; i++) {
127 for (j=0; j<NBASES; j++)
128 rtype[pair[i][j]] = pair[j][i];
129 }
130
131 /* we dont modify this for the hybrid case */
132 } else {
133 for (i=0; i<=MAXALPHA; i++) {
134 for (j=0; j<=MAXALPHA; j++)
135 pair[i][j] = 0;
136 }
137 if (energy_set==1) {
138 for (i=1; i<MAXALPHA;) {
139 alias[i++] = 3; /* A <-> G */
140 alias[i++] = 2; /* B <-> C */
141 }
142 for (i=1; i<MAXALPHA; i++) {
143 pair[i][i+1] = 2; /* AB <-> GC */
144 i++;
145 pair[i][i-1] = 1; /* BA <-> CG */
146 }
147 }
148 else if (energy_set==2) {
149 for (i=1; i<MAXALPHA;) {
150 alias[i++] = 1; /* A <-> A*/
151 alias[i++] = 4; /* B <-> U */
152 }
153 for (i=1; i<MAXALPHA; i++) {
154 pair[i][i+1] = 5; /* AB <-> AU */
155 i++;
156 pair[i][i-1] = 6; /* BA <-> UA */
157 }
158 }
159 else if (energy_set==3) {
160 for (i=1; i<MAXALPHA-2; ) {
161 alias[i++] = 3; /* A <-> G */
162 alias[i++] = 2; /* B <-> C */
163 alias[i++] = 1; /* C <-> A */
164 alias[i++] = 4; /* D <-> U */
165 }
166 for (i=1; i<MAXALPHA-2; i++) {
167 pair[i][i+1] = 2; /* AB <-> GC */
168 i++;
169 pair[i][i-1] = 1; /* BA <-> CG */
170 i++;
171 pair[i][i+1] = 5; /* CD <-> AU */
172 i++;
173 pair[i][i-1] = 6; /* DC <-> UA */
174 }
175 }
176 else nrerror("What energy_set are YOU using??");
177 for (i=0; i<=MAXALPHA; i++) {
178 for (j=0; j<=MAXALPHA; j++)
179 rtype[pair[i][j]] = pair[j][i];
180 }
181 }
182}
183
184static short *encode_sequence(const char *sequence, short how){
185 unsigned int i,l = (unsigned int)strlen(sequence);
186 short *S = (short *) space(sizeof(short)*(l+2));
187
188 switch(how){
189 /* standard encoding as always used for S */
190 case 0: for(i=1; i<=l; i++) /* make numerical encoding of sequence */
191 S[i]= (short) encode_char(toupper(sequence[i-1]));
192 S[l+1] = S[1];
193 S[0] = (short) l;
194 break;
195 /* encoding for mismatches of nostandard bases (normally used for S1) */
196 case -1:
197 case 1: for(i=1; i<=l; i++)
198 S[i] = alias[(short) encode_char(toupper(sequence[i-1]))];
199 S[l+1] = S[1];
200 S[0] = S[l];
201 break;
202 /* encode DNA */
203 case -2: for(i=1; i<=l; i++)
204 S[i] = ((short) encode_char(toupper(sequence[i-1])))+8;
205 S[l+1] = S[1];
206 S[0] = (short) l;
207 break;
208 case -3: for(i=1; i<=l; i++)
209 S[i] = alias[(short) encode_char(toupper(sequence[i-1]))]+8;
210 S[l+1] = S[1];
211 S[0] = S[l];
212 break;
213 /*encoding dna for rna-dna hybrids */
214 default: for(i=1; i<how; i++)
215 S[i] = alias[(short) encode_char(toupper(sequence[i-1]))];
216 for(; i<=l; i++)
217 S[i] = alias[(short) encode_char(toupper(sequence[i-1]))]+8;
218 S[l+1] = S[1];
219 S[0] = (short) l;
220 break;
221 }
222
223 return S;
224}
#define MAXALPHA
Maximal length of alphabet.
Definition data_structures.h:22
#define NBPAIRS_HYBRID
Definition energy_const.h:34
#define NNUCLEOTIDES_HYBRID
Definition energy_const.h:36
Here all all declarations of the global variables used throughout RNAlib.
char * nonstandards
contains allowed non standard base pairs
int noGU
Global switch to forbid/allow GU base pairs at all.
int energy_set
0 = BP; 1=any mit GC; 2=any mit AU-parameter
Various utility- and helper-functions used throughout the Vienna RNA package.
void * space(unsigned size)
Allocate space safely.