RNAlib-2.2.0-RC2
multibranch_loops.h
Go to the documentation of this file.
1 #ifndef VIENNA_RNA_PACKAGE_MULTIBRANCH_LOOPS_H
2 #define VIENNA_RNA_PACKAGE_MULTIBRANCH_LOOPS_H
3 
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <math.h>
7 #include <ctype.h>
8 #include <string.h>
9 #include <ViennaRNA/fold_vars.h>
10 #include <ViennaRNA/energy_par.h>
11 #include <ViennaRNA/params.h>
12 #include <ViennaRNA/constraints.h>
13 #include <ViennaRNA/gquad.h>
14 
15 #ifdef __GNUC__
16 # define INLINE inline
17 #else
18 # define INLINE
19 #endif
20 
49 INLINE PRIVATE int E_MLstem( int type,
50  int si1,
51  int sj1,
52  vrna_param_t *P);
53 
60 INLINE PRIVATE double exp_E_MLstem(int type,
61  int si1,
62  int sj1,
63  vrna_exp_param_t *P);
64 
65 
66 
72 INLINE PRIVATE int E_mb_loop_stack(int i, int j, vrna_fold_compound *vc);
73 
74 /*
75 #################################
76 # BEGIN OF FUNCTION DEFINITIONS #
77 #################################
78 */
79 
80 
81 INLINE PRIVATE int
82 E_mb_loop_fast( int i,
83  int j,
85  int *dmli1,
86  int *dmli2){
87 
88  int decomp, en;
89  unsigned char type, tt;
90  short S_i1, S_j1;
91 
92  int cp = vc->cutpoint;
93  char *ptype = vc->ptype;
94  short *S = vc->sequence_encoding;
95  int *indx = vc->jindx;
96  char *hc = vc->hc->matrix;
97  int *hc_up = vc->hc->up_ml;
98  vrna_sc_t *sc = vc->sc;
99  int *fc = vc->matrices->fc;
100  vrna_param_t *P = vc->params;
101 
102  int ij = indx[j] + i;
103  int hc_decompose = hc[ij];
104  int e = INF;
105  int dangle_model = P->model_details.dangles;
106  int *rtype = &(P->model_details.rtype[0]);
107 
108  type = (unsigned char)ptype[ij];
109 
110  if(cp < 0){
111  S_i1 = S[i+1];
112  S_j1 = S[j-1];
113  } else {
114  S_i1 = ((i >= cp) || ((i + 1) < cp)) ? S[i+1] : -1;
115  S_j1 = (((j - 1) >= cp) || (j < cp)) ? S[j-1] : -1;
116  }
117 
118  if(hc_decompose & VRNA_CONSTRAINT_CONTEXT_MB_LOOP){
119  if((S_i1 >= 0) && (S_j1 >= 0)){ /* regular multi branch loop */
120  decomp = dmli1[j-1];
121  tt = rtype[type];
122  switch(dangle_model){
123  /* no dangles */
124  case 0: if(decomp != INF){
125  decomp += E_MLstem(tt, -1, -1, P);
126  if(sc){
127  if(sc->en_basepair)
128  decomp += sc->en_basepair[ij];
129  }
130  }
131  break;
132 
133  /* double dangles */
134  case 2: decomp += E_MLstem(tt, S_j1, S_i1, P);
135  if(sc){
136  if(sc->en_basepair)
137  decomp += sc->en_basepair[ij];
138  }
139  break;
140 
141  /* normal dangles, aka dangles = 1 || 3 */
142  default: if(decomp != INF){
143  decomp += E_MLstem(tt, -1, -1, P);
144  if(sc){
145  if(sc->en_basepair)
146  decomp += sc->en_basepair[ij];
147  }
148  }
149  if(hc_up[i+1]){
150  if(dmli2[j-1] != INF){
151  en = dmli2[j-1] + E_MLstem(tt, -1, S_i1, P) + P->MLbase;
152  if(sc){
153  if(sc->free_energies)
154  en += sc->free_energies[i+1][1];
155 
156  if(sc->en_basepair)
157  en += sc->en_basepair[ij];
158  }
159  decomp = MIN2(decomp, en);
160  }
161  }
162  if(hc_up[j-1] && hc_up[i+1]){
163  if(dmli2[j-2] != INF){
164  en = dmli2[j-2] + E_MLstem(tt, S_j1, S_i1, P) + 2*P->MLbase;
165  if(sc){
166  if(sc->free_energies)
167  en += sc->free_energies[i+1][1]
168  + sc->free_energies[j-1][1];
169 
170  if(sc->en_basepair)
171  en += sc->en_basepair[ij];
172  }
173  decomp = MIN2(decomp, en);
174  }
175  }
176  if(hc_up[j-1]){
177  if(dmli1[j-2] != INF){
178  en = dmli1[j-2] + E_MLstem(tt, S_j1, -1, P) + P->MLbase;
179  if(sc){
180  if(sc->free_energies)
181  en += sc->free_energies[j-1][1];
182 
183  if(sc->en_basepair)
184  en += sc->en_basepair[ij];
185  }
186  decomp = MIN2(decomp, en);
187  }
188  }
189  break;
190  }
191  if(decomp != INF)
192  e = decomp + P->MLclosing;
193  }
194 
195  if(!((i >= cp) || (j < cp))){ /* multibrach like cofold structure with cut somewhere between i and j */
196  decomp = fc[i+1] + fc[j-1];
197  tt = rtype[type];
198  switch(dangle_model){
199  case 0: decomp += E_ExtLoop(tt, -1, -1, P);
200  break;
201  case 2: decomp += E_ExtLoop(tt, S_j1, S_i1, P);
202  break;
203  default: decomp += E_ExtLoop(tt, -1, -1, P);
204  if((hc_up[i+1]) && (hc_up[j-1])){
205  en = fc[i+2] + fc[j-2] + E_ExtLoop(tt, S_j1, S_i1, P);
206  decomp = MIN2(decomp, en);
207  }
208  if(hc_up[i+1]){
209  en = fc[i+2] + fc[j-1] + E_ExtLoop(tt, -1, S_i1, P);
210  decomp = MIN2(decomp, en);
211  }
212  if(hc_up[j-1]){
213  en = fc[i+1] + fc[j-2] + E_ExtLoop(tt, S_j1, -1, P);
214  decomp = MIN2(decomp, en);
215  }
216  break;
217  }
218  e = MIN2(e, decomp);
219  }
220  }
221  return e;
222 }
223 
224 INLINE PRIVATE int
226  int j,
227  vrna_fold_compound *vc){
228 
229  int e, decomp, en, i1k, k1j1, ij, k;
230  unsigned char type, type_2;
231 
232  int *indx = vc->jindx;
233  char *hc = vc->hc->matrix;
234  int *c = vc->matrices->c;
235  int *fML = vc->matrices->fML;
236  vrna_param_t *P = vc->params;
237  vrna_md_t *md = &(P->model_details);
238  int turn = md->min_loop_size;
239  char *ptype = vc->ptype;
240  int *rtype = &(md->rtype[0]);
241  vrna_sc_t *sc = vc->sc;
242 
243  e = INF;
244  ij = indx[j] + i;
245  type = ptype[ij];
246 
247  if(hc[ij] & VRNA_CONSTRAINT_CONTEXT_MB_LOOP){
248  decomp = INF;
249  k1j1 = indx[j-1] + i + 2 + turn + 1;
250  for (k = i+2+turn; k < j-2-turn; k++, k1j1++){
251  i1k = indx[k] + i + 1;
253  type_2 = rtype[(unsigned char)ptype[i1k]];
254  en = c[i1k]+P->stack[type][type_2]+fML[k1j1];
255  decomp = MIN2(decomp, en);
256  }
257  if(hc[k1j1] & VRNA_CONSTRAINT_CONTEXT_MB_LOOP_ENC){
258  type_2 = rtype[(unsigned char)ptype[k1j1]];
259  en = c[k1j1]+P->stack[type][type_2]+fML[i1k];
260  decomp = MIN2(decomp, en);
261  }
262  }
263  /* no TermAU penalty if coax stack */
264  decomp += 2*P->MLintern[1] + P->MLclosing;
265  if(sc){
266  if(sc->en_basepair)
267  decomp += sc->en_basepair[ij];
268  }
269  e = decomp;
270 
271  }
272  return e;
273 }
274 
275 INLINE PRIVATE int
276 E_ml_rightmost_stem(int i,
277  int j,
278  vrna_fold_compound *vc){
279 
280  int en;
281  vrna_param_t *P = vc->params;
282  int length = vc->length;
283  short *S = vc->sequence_encoding;
284  int *indx = vc->jindx;
285  char *hc = vc->hc->matrix;
286  int *hc_up = vc->hc->up_ml;
287  vrna_sc_t *sc = vc->sc;
288  int *c = vc->matrices->c;
289  int *fm = (P->model_details.uniq_ML) ? vc->matrices->fM1 : vc->matrices->fML;
290  int *ggg = vc->matrices->ggg;
291  int ij = indx[j] + i;
292  int type = vc->ptype[ij];
293  int hc_decompose = hc[ij];
294  int dangle_model = P->model_details.dangles;
295  int with_gquad = P->model_details.gquad;
296  int cp = vc->cutpoint;
297  int e = INF;
298 
299  if((cp < 0) || (((i - 1) >= cp) || (i < cp))){
300  if((cp < 0) || ((j >= cp) || ((j + 1) < cp))){
301  if(hc_decompose & VRNA_CONSTRAINT_CONTEXT_MB_LOOP_ENC){
302  e = c[ij];
303  if(e != INF){
304  switch(dangle_model){
305  case 2: e += E_MLstem(type, (i==1) ? S[length] : S[i-1], S[j+1], P);
306  break;
307  default: e += E_MLstem(type, -1, -1, P);
308  break;
309  }
310  }
311  }
312 
313  if(with_gquad)
314  if((cp < 0) || ((i >= cp) || (j < cp))){
315  en = ggg[ij] + E_MLstem(0, -1, -1, P);
316  e = MIN2(e, en);
317  }
318 
319  }
320 
321  if((cp < 0) || (((j - 1) >= cp) || (j < cp)))
322  if(hc_up[j]){
323  if(fm[indx[j - 1] + i] != INF){
324  en = fm[indx[j - 1] + i] + P->MLbase;
325  if(sc)
326  if(sc->free_energies)
327  en += sc->free_energies[j][1];
328 
329  e = MIN2(e, en);
330  }
331  }
332  }
333  return e;
334 }
335 
336 INLINE PRIVATE int
337 E_ml_stems_fast(int i,
338  int j,
339  vrna_fold_compound *vc,
340  int *fmi,
341  int *dmli){
342 
343  int k, en, decomp, mm5, mm3, type_2, k1j, stop;
344 
345  int length = (int)vc->length;
346  char *ptype = vc->ptype;
347  short *S = vc->sequence_encoding;
348  int *indx = vc->jindx;
349  char *hc = vc->hc->matrix;
350  int *hc_up = vc->hc->up_ml;
351  vrna_sc_t *sc = vc->sc;
352  int *c = vc->matrices->c;
353  int *fm = vc->matrices->fML;
354  vrna_param_t *P = vc->params;
355  int ij = indx[j] + i;
356  int dangle_model = P->model_details.dangles;
357  int turn = P->model_details.min_loop_size;
358  int type = ptype[ij];
359  int *rtype = &(P->model_details.rtype[0]);
360  int circular = P->model_details.circ;
361  int cp = vc->cutpoint;
362  int e = INF;
363 
364  /* extension with one unpaired nucleotide at the right (3' site)
365  or full branch of (i,j)
366  */
367  e = E_ml_rightmost_stem(i,j,vc);
368 
369  /* extension with one unpaired nucleotide at 5' site
370  and all other variants which are needed for odd
371  dangle models
372  */
373  if((cp < 0) || (((i - 1) >= cp) || (i < cp))){
374  switch(dangle_model){
375  /* no dangles */
376  case 0: /* fall through */
377 
378  /* double dangles */
379  case 2: if((cp < 0) || ((i >= cp) || ((i + 1) < cp)))
380  if(hc_up[i]){
381  if(fm[ij + 1] != INF){
382  en = fm[ij + 1] + P->MLbase;
383  if(sc)
384  if(sc->free_energies)
385  en += sc->free_energies[i][1];
386  e = MIN2(e, en);
387  }
388  }
389  break;
390 
391  /* normal dangles, aka dangle_model = 1 || 3 */
392  default: mm5 = ((i>1) || circular) ? S[i] : -1;
393  mm3 = ((j<length) || circular) ? S[j] : -1;
394  if((cp < 0) || ((i >= cp) || ((i + 1) < cp)))
395  if(hc_up[i]){
396  if(fm[ij+1] != INF){
397  en = fm[ij+1] + P->MLbase;
398  if(sc)
399  if(sc->free_energies)
400  en += sc->free_energies[i][1];
401  e = MIN2(e, en);
402  }
403  if(hc[ij+1] & VRNA_CONSTRAINT_CONTEXT_MB_LOOP_ENC){
404  if(c[ij+1] != INF){
405  type = ptype[ij+1];
406  en = c[ij+1] + E_MLstem(type, mm5, -1, P) + P->MLbase;
407  if(sc)
408  if(sc->free_energies)
409  en += sc->free_energies[i][1];
410  e = MIN2(e, en);
411  }
412  }
413  }
414 
415  if((cp < 0) || (((j - 1) >= cp) || (j < cp)))
416  if(hc_up[j]){
417  if(hc[indx[j-1]+i] & VRNA_CONSTRAINT_CONTEXT_MB_LOOP_ENC){
418  if(c[indx[j-1]+i] != INF){
419  type = ptype[indx[j-1]+i];
420  en = c[indx[j-1]+i] + E_MLstem(type, -1, mm3, P) + P->MLbase;
421  if(sc)
422  if(sc->free_energies)
423  en += sc->free_energies[j][1];
424  e = MIN2(e, en);
425  }
426  }
427  }
428 
429  if( (cp < 0)
430  || ( (((j - 1) >= cp) || (j < cp))
431  && ((i >= cp) || ((i + 1) < cp))))
432  if(hc[indx[j-1]+i+1] & VRNA_CONSTRAINT_CONTEXT_MB_LOOP_ENC){
433  if(hc_up[i] && hc_up[j]){
434  if(c[indx[j-1]+i+1] != INF){
435  type = ptype[indx[j-1]+i+1];
436  en = c[indx[j-1]+i+1] + E_MLstem(type, mm5, mm3, P) + 2*P->MLbase;
437  if(sc)
438  if(sc->free_energies)
439  en += sc->free_energies[j][1] + sc->free_energies[i][1];
440  e = MIN2(e, en);
441  }
442  }
443  }
444  break;
445  }
446  }
447 
448  /* modular decomposition -------------------------------*/
449  k1j = indx[j] + i + turn + 2;
450  stop = (cp > 0) ? (cp - 1) : (j - 2 - turn);
451  for (decomp = INF, k = i + 1 + turn; k <= stop; k++, k1j++){
452  if((fmi[k] != INF ) && (fm[k1j] != INF)){
453  en = fmi[k] + fm[k1j];
454  decomp = MIN2(decomp, en);
455  }
456  }
457  k++; k1j++;
458  for (;k <= j - 2 - turn; k++, k1j++){
459  if((fmi[k] != INF) && (fm[k1j] != INF)){
460  en = fmi[k] + fm[k1j];
461  decomp = MIN2(decomp, en);
462  }
463  }
464 
465  dmli[j] = decomp; /* store for use in fast ML decompositon */
466  e = MIN2(e, decomp);
467 
468  /* coaxial stacking */
469  if (dangle_model==3) {
470  /* additional ML decomposition as two coaxially stacked helices */
471  int ik;
472  k1j = indx[j]+i+turn+2;
473  for (decomp = INF, k = i + 1 + turn; k <= stop; k++, k1j++){
474  ik = indx[k]+i;
475  if((hc[ik] & VRNA_CONSTRAINT_CONTEXT_MB_LOOP_ENC) && (hc[k1j] & VRNA_CONSTRAINT_CONTEXT_MB_LOOP_ENC)){
476  type = rtype[(unsigned char)ptype[ik]];
477  type_2 = rtype[(unsigned char)ptype[k1j]];
478  en = c[ik] + c[k1j] + P->stack[type][type_2];
479  decomp = MIN2(decomp, en);
480  }
481  }
482  k++; k1j++;
483  for (; k <= j-2-turn; k++, k1j++){
484  ik = indx[k]+i;
485  if((hc[ik] & VRNA_CONSTRAINT_CONTEXT_MB_LOOP_ENC) && (hc[k1j] & VRNA_CONSTRAINT_CONTEXT_MB_LOOP_ENC)){
486  type = rtype[(unsigned char)ptype[ik]];
487  type_2 = rtype[(unsigned char)ptype[k1j]];
488  en = c[ik] + c[k1j] + P->stack[type][type_2];
489  decomp = MIN2(decomp, en);
490  }
491  }
492 
493  decomp += 2*P->MLintern[1]; /* no TermAU penalty if coax stack */
494 #if 0
495  /* This is needed for Y shaped ML loops with coax stacking of
496  interior pairts, but backtracking will fail if activated */
497  DMLi[j] = MIN2(DMLi[j], decomp);
498  DMLi[j] = MIN2(DMLi[j], DMLi[j-1]+P->MLbase);
499  DMLi[j] = MIN2(DMLi[j], DMLi1[j]+P->MLbase);
500  new_fML = MIN2(new_fML, DMLi[j]);
501 #endif
502  e = MIN2(e, decomp);
503  }
504 
505  fmi[j] = e;
506 
507  return e;
508 }
509 
510 
511 
512 INLINE PRIVATE int E_MLstem(int type, int si1, int sj1, vrna_param_t *P){
513  int energy = 0;
514  if(si1 >= 0 && sj1 >= 0){
515  energy += P->mismatchM[type][si1][sj1];
516  }
517  else if (si1 >= 0){
518  energy += P->dangle5[type][si1];
519  }
520  else if (sj1 >= 0){
521  energy += P->dangle3[type][sj1];
522  }
523 
524  if(type > 2)
525  energy += P->TerminalAU;
526 
527  energy += P->MLintern[type];
528 
529  return energy;
530 }
531 
532 
533 
534 INLINE PRIVATE double
535 exp_E_MLstem( int type,
536  int si1,
537  int sj1,
538  vrna_exp_param_t *P){
539 
540  double energy = 1.0;
541  if(si1 >= 0 && sj1 >= 0){
542  energy = P->expmismatchM[type][si1][sj1];
543  }
544  else if(si1 >= 0){
545  energy = P->expdangle5[type][si1];
546  }
547  else if(sj1 >= 0){
548  energy = P->expdangle3[type][sj1];
549  }
550 
551  if(type > 2)
552  energy *= P->expTermAU;
553 
554  energy *= P->expMLintern[type];
555  return energy;
556 }
557 
562 #endif
int * ggg
Energies of g-quadruplexes.
Definition: data_structures.h:429
int uniq_ML
Flag to ensure unique multibranch loop decomposition during folding.
Definition: model.h:186
int * c
Energy array, given that i-j pair.
Definition: data_structures.h:422
struct vrna_hc_t * hc
The hard constraints data structure used for structure prediction.
Definition: data_structures.h:707
int * fM1
Second ML array, only for unique multibrnach loop decomposition.
Definition: data_structures.h:427
int cutpoint
The position of the (cofold) cutpoint within the provided sequence. If there is no cutpoint...
Definition: data_structures.h:703
char * matrix
Upper triangular matrix encoding where a base pair or unpaired nucleotide is allowed.
Definition: constraints.h:379
struct vrna_param_t * params
The precomputed free energy contributions for each type of loop.
Definition: data_structures.h:712
int * jindx
DP matrix accessor.
Definition: data_structures.h:716
int * fML
Multi-loop auxiliary energy array.
Definition: data_structures.h:426
int * up_ml
A linear array that holds the number of allowed unpaired nucleotides in a multi branched loop...
Definition: constraints.h:391
int gquad
Include G-quadruplexes in structure prediction.
Definition: model.h:184
int ** free_energies
Energy contribution for unpaired sequence stretches.
Definition: constraints.h:402
#define MIN2(A, B)
Get the minimum of two comparable values.
Definition: utils.h:118
vrna_md_t model_details
Model details to be used in the recursions.
Definition: params.h:79
The most basic data structure required by many functions throughout the RNAlib.
Definition: data_structures.h:689
#define VRNA_CONSTRAINT_CONTEXT_MB_LOOP
Hard constraints flag, base pair is enclosed in an interior loop.
Definition: constraints.h:221
PRIVATE int E_ExtLoop(int type, int si1, int sj1, vrna_param_t *P)
Definition: exterior_loops.h:454
int * fc
Energy from i to cutpoint (and vice versa if i>cut)
Definition: data_structures.h:425
#define INF
Definition: energy_const.h:16
#define VRNA_CONSTRAINT_CONTEXT_MB_LOOP_ENC
Hard constraints flag, base pair is enclosed in a multi branch loop.
Definition: constraints.h:229
vrna_mx_mfe_t * matrices
The MFE DP matrices.
Definition: data_structures.h:709
int min_loop_size
Minimum size of hairpin loops.
Definition: model.h:194
int dangles
Specifies the dangle model used in any energy evaluation (0,1,2 or 3)
Definition: model.h:172
unsigned int length
The length of the sequence (or sequence alignment)
Definition: data_structures.h:702
Various functions related to G-quadruplex computations.
char * ptype
Pair type array.
Definition: data_structures.h:740
The datastructure that contains temperature scaled energy parameters.
Definition: params.h:41
The soft constraints data structure.
Definition: constraints.h:401
int circ
Assume RNA to be circular instead of linear.
Definition: model.h:183
The data structure that contains the complete model details used throughout the calculations.
Definition: model.h:169
PRIVATE int E_mb_loop_stack(int i, int j, vrna_fold_compound *vc)
Evaluate energy of a multi branch helices stacking onto closing pair (i,j)
Definition: multibranch_loops.h:225
Here all all declarations of the global variables used throughout RNAlib.
short * sequence_encoding
Numerical encoding of the input sequence.
Definition: data_structures.h:735
struct vrna_sc_t * sc
The soft constraints for usage in structure prediction and evaluation.
Definition: data_structures.h:754
int * en_basepair
Energy contribution for base pairs.
Definition: constraints.h:403
The datastructure that contains temperature scaled Boltzmann weights of the energy parameters...
Definition: params.h:86