RNAlib-2.2.0-RC3
hairpin_loops.h
Go to the documentation of this file.
1 #ifndef VIENNA_RNA_PACKAGE_HAIRPIN_LOOPS_H
2 #define VIENNA_RNA_PACKAGE_HAIRPIN_LOOPS_H
3 
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <math.h>
7 #include <ctype.h>
8 #include <string.h>
9 #include <ViennaRNA/fold_vars.h>
10 #include <ViennaRNA/energy_par.h>
12 #include <ViennaRNA/params.h>
13 #include <ViennaRNA/constraints.h>
15 #include <ViennaRNA/gquad.h>
16 
17 #ifdef __GNUC__
18 # define INLINE inline
19 #else
20 # define INLINE
21 #endif
22 
23 #ifdef ON_SAME_STRAND
24 #undef ON_SAME_STRAND
25 #endif
26 
27 #define ON_SAME_STRAND(I,J,C) (((I)>=(C))||((J)<(C)))
28 
71 INLINE PRIVATE int E_Hairpin(int size,
72  int type,
73  int si1,
74  int sj1,
75  const char *string,
76  vrna_param_t *P);
77 
96 INLINE PRIVATE double
97 exp_E_Hairpin( int u,
98  int type,
99  short si1,
100  short sj1,
101  const char *string,
102  vrna_exp_param_t *P);
103 
104 
105 INLINE PRIVATE int
107  int i,
108  int j);
109 
110 INLINE PRIVATE int
112  int i,
113  int j);
114 
115 /*
116 #################################
117 # BEGIN OF FUNCTION DEFINITIONS #
118 #################################
119 */
120 
121 INLINE PRIVATE int
122 E_Hairpin(int size,
123  int type,
124  int si1,
125  int sj1,
126  const char *string,
127  vrna_param_t *P){
128 
129  int energy;
130 
131  if(size <= 30)
132  energy = P->hairpin[size];
133  else
134  energy = P->hairpin[30] + (int)(P->lxc*log((size)/30.));
135 
136  if(size < 3) return energy; /* should only be the case when folding alignments */
137 
138  if(P->model_details.special_hp){
139  if(size == 4){ /* check for tetraloop bonus */
140  char tl[7]={0}, *ts;
141  strncpy(tl, string, 6);
142  if ((ts=strstr(P->Tetraloops, tl)))
143  return (P->Tetraloop_E[(ts - P->Tetraloops)/7]);
144  }
145  else if(size == 6){
146  char tl[9]={0}, *ts;
147  strncpy(tl, string, 8);
148  if ((ts=strstr(P->Hexaloops, tl)))
149  return (energy = P->Hexaloop_E[(ts - P->Hexaloops)/9]);
150  }
151  else if(size == 3){
152  char tl[6]={0,0,0,0,0,0}, *ts;
153  strncpy(tl, string, 5);
154  if ((ts=strstr(P->Triloops, tl))) {
155  return (P->Triloop_E[(ts - P->Triloops)/6]);
156  }
157  return (energy + (type>2 ? P->TerminalAU : 0));
158  }
159  }
160  energy += P->mismatchH[type][si1][sj1];
161 
162  return energy;
163 }
164 
173 INLINE PRIVATE int
175  int i,
176  int j){
177 
178  int u, *hc_up;
179  char hc;
180 
181  u = j - i - 1;
182  hc_up = vc->hc->up_hp;
183  hc = vc->hc->matrix[vc->jindx[j] + i];
184 
185  /* is this base pair allowed to close a hairpin (like) loop ? */
187  /* are all nucleotides in the loop allowed to be unpaired ? */
188  if(hc_up[i+1] >= u){
189  return vrna_eval_hp_loop(vc, i, j);
190  }
191  }
192  return INF;
193 }
194 
199 INLINE PRIVATE int
201  int i,
202  int j){
203 
204  int u, *hc_up;
205  char hc;
206 
207  u = vc->length - j + i - 1;
208  hc_up = vc->hc->up_hp;
209  hc = vc->hc->matrix[vc->jindx[j] + i];
210 
211  /* is this base pair allowed to close a hairpin (like) loop ? */
213  /* are all nucleotides in the loop allowed to be unpaired ? */
214  if(hc_up[j+1] >= u){
215  return vrna_eval_ext_hp_loop(vc, i, j);
216  }
217  }
218  return INF;
219 }
220 
227 INLINE PRIVATE int
229  int i,
230  int j){
231 
232  int u, e, type;
233  char loopseq[10];
234  short *S;
235  vrna_param_t *P;
236  vrna_sc_t *sc;
237  vrna_md_t *md;
238 
239  S = vc->sequence_encoding;
240  P = vc->params;
241  sc = vc->sc;
242  md = &(P->model_details);
243  u = vc->length - j + i - 1;
244  type = md->pair[S[j]][S[i]];
245 
246  if(type == 0)
247  type = 7;
248 
249  if (u<7) {
250  strcpy(loopseq , vc->sequence + j - 1);
251  strncat(loopseq, vc->sequence, i);
252  }
253 
254  e = E_Hairpin(u, type, S[j + 1], S[i - 1], loopseq, P);
255 
256  if(sc){
257  if(sc->free_energies)
258  e += sc->free_energies[j + 1][vc->length - j]
259  + sc->free_energies[1][i - 1];
260 
261  if(sc->f)
262  e += sc->f(j, i, i, j, VRNA_DECOMP_PAIR_HP, sc->data);
263  }
264 
265  return e;
266 }
267 
281 INLINE PRIVATE int
283  int i,
284  int j){
285 
286  int u, e, s, ij, cp, type, *types, *idx, n_seq;
287  short *S, **SS, **S5, **S3;
288  char **Ss;
289  unsigned short **a2s;
290  vrna_param_t *P;
291  vrna_sc_t *sc, **scs;
292  vrna_md_t *md;
293 
294  cp = vc->cutpoint;
295  idx = vc->jindx;
296  P = vc->params;
297  md = &(P->model_details);
298  e = INF;
299 
300  switch(vc->type){
301  /* single sequences and cofolding hybrids */
303  sc = vc->sc;
304  u = j - i - 1;
305  ij = idx[j] + i;
306  type = md->pair[S[i]][S[j]];
307 
308  if(type == 0)
309  type = 7;
310 
311  if((cp < 0) || ON_SAME_STRAND(i, j, cp)){ /* regular hairpin loop */
312  e = E_Hairpin(u, type, S[i+1], S[j-1], vc->sequence+i-1, P);
313  } else { /* hairpin-like exterior loop (for cofolding) */
314  short si, sj;
315  si = ON_SAME_STRAND(i, i + 1, cp) ? S[i+1] : -1;
316  sj = ON_SAME_STRAND(j - 1, j, cp) ? S[j-1] : -1;
317  if (md->dangles)
318  e = E_ExtLoop(md->rtype[type], sj, si, P);
319  else
320  e = E_ExtLoop(md->rtype[type], -1, -1, P);
321  }
322 
323  /* add soft constraints */
324  if(sc){
325  if(sc->free_energies)
326  e += sc->free_energies[i+1][u];
327 
328  if(sc->en_basepair){
329  e += sc->en_basepair[ij];
330  }
331  if(sc->f)
332  e += sc->f(i, j, i, j, VRNA_DECOMP_PAIR_HP, sc->data);
333  }
334  break;
335  /* sequence alignments */
336  case VRNA_VC_TYPE_ALIGNMENT: SS = vc->S;
337  S5 = vc->S5; /*S5[s][i] holds next base 5' of i in sequence s*/
338  S3 = vc->S3; /*Sl[s][i] holds next base 3' of i in sequence s*/
339  Ss = vc->Ss;
340  a2s = vc->a2s;
341  scs = vc->scs;
342  n_seq = vc->n_seq;
343  types = (int *)vrna_alloc(sizeof(int) * n_seq);
344 
345  for (s=0; s<n_seq; s++) {
346  types[s] = md->pair[SS[s][i]][SS[s][j]];
347  if (types[s]==0) types[s]=7;
348  }
349 
350  for(e = s = 0; s < n_seq; s++){
351  u = a2s[s][j-1] - a2s[s][i];
352  e += (u < 3) ? 600 : E_Hairpin(u, types[s], S3[s][i], S5[s][j], Ss[s]+(a2s[s][i-1]), P); /* ??? really 600 ??? */
353 
354  }
355 
356  if(scs)
357  for(s = 0; s < n_seq; s++){
358  if(scs[s]){
359  u = a2s[s][j-1]-a2s[s][i];
360 
361  if(scs[s]->free_energies)
362  e += scs[s]->free_energies[a2s[s][i]+1][u];
363 
364  if(scs[s]->en_basepair)
365  e += scs[s]->en_basepair[idx[j] + i];
366 
367  if(scs[s]->f)
368  e += scs[s]->f(i, j, i, j, VRNA_DECOMP_PAIR_HP, scs[s]->data);
369  }
370  }
371 
372  free(types);
373 
374  break;
375  /* nothing */
376  default: break;
377  }
378 
379 
380  return e;
381 }
382 
383 /*
384 *************************************
385 * Partition function variants below *
386 *************************************
387 */
388 
389 
390 INLINE PRIVATE double
392  int type,
393  short si1,
394  short sj1,
395  const char *string,
396  vrna_exp_param_t *P){
397 
398  double q, kT;
399  kT = P->kT; /* kT in cal/mol */
400 
401  if(u <= 30)
402  q = P->exphairpin[u];
403  else
404  q = P->exphairpin[30] * exp( -(P->lxc*log( u/30.))*10./kT);
405 
406  if(u < 3) return q; /* should only be the case when folding alignments */
407 
408  if(P->model_details.special_hp){
409  if(u==4){
410  char tl[7]={0,0,0,0,0,0,0}, *ts;
411  strncpy(tl, string, 6);
412  if ((ts=strstr(P->Tetraloops, tl))){
413  if(type != 7)
414  return (P->exptetra[(ts-P->Tetraloops)/7]);
415  else
416  q *= P->exptetra[(ts-P->Tetraloops)/7];
417  }
418  }
419  else if(u==6){
420  char tl[9]={0,0,0,0,0,0,0,0,0}, *ts;
421  strncpy(tl, string, 8);
422  if ((ts=strstr(P->Hexaloops, tl)))
423  return (P->exphex[(ts-P->Hexaloops)/9]);
424  }
425  else if(u==3){
426  char tl[6]={0,0,0,0,0,0}, *ts;
427  strncpy(tl, string, 5);
428  if ((ts=strstr(P->Triloops, tl)))
429  return (P->exptri[(ts-P->Triloops)/6]);
430  if (type>2)
431  return q * P->expTermAU;
432  else
433  return q;
434  }
435  }
436  q *= P->expmismatchH[type][si1][sj1];
437 
438  return q;
439 }
440 
446 INLINE PRIVATE double
448  int i,
449  int j){
450 
451  int u, ij, type;
452  char hc;
453  double q;
454 
455  int cp = vc->cutpoint;
456  short *S = vc->sequence_encoding;
457  int *idx = vc->jindx;
458  vrna_exp_param_t *P = vc->exp_params;
459  int *hc_up = vc->hc->up_hp;
460  vrna_sc_t *sc = vc->sc;
461  FLT_OR_DBL *scale = vc->exp_matrices->scale;
462 
463  q = 0.;
464  u = j - i - 1;
465  ij = idx[j] + i;
466  type = vc->ptype[ij];
467  hc = vc->hc->matrix[ij];
468 
469  /* is this base pair allowed to close a hairpin (like) loop ? */
471  /* are all nucleotides in the loop allowed to be unpaired ? */
472  if(hc_up[i+1] >= u){
473 
474  if((cp < 0) || ON_SAME_STRAND(i, j, cp)){ /* regular hairpin loop */
475  q = exp_E_Hairpin(u, type, S[i+1], S[j-1], vc->sequence+i-1, P)
476  * scale[u+2];
477  } else { /* hairpin-like exterior loop (for cofolding) */
478 
479  }
480 
481  /* add soft constraints */
482  if(sc){
483  if(sc->boltzmann_factors)
484  q *= sc->boltzmann_factors[i+1][u];
485 
486  if(sc->exp_en_basepair)
487  q *= sc->exp_en_basepair[ij];
488 
489  if(sc->exp_f)
490  q *= sc->exp_f(i, j, i, j, VRNA_DECOMP_PAIR_HP, sc->data);
491  }
492  }
493  }
494  return q;
495 }
496 
504 INLINE PRIVATE int
506  int i,
507  int j,
508  int en,
509  bondT *bp_stack,
510  int *stack_count){
511 
512  int e;
513  vrna_sc_t *sc;
514 
515  sc = NULL;
516  e = vrna_E_hp_loop(vc, i, j);
517 
518  if(e == en){
519  switch(vc->type){
520  case VRNA_VC_TYPE_SINGLE: sc = vc->sc;
521  break;
522  case VRNA_VC_TYPE_ALIGNMENT: if(vc->scs)
523  sc = vc->scs[0];
524  break;
525  default: break;
526  }
527 
528  if(sc)
529  if(sc->bt){
530  PAIR *ptr, *aux_bps;
531  aux_bps = sc->bt(i, j, -1, -1, VRNA_DECOMP_PAIR_HP, sc->data);
532  for(ptr = aux_bps; ptr && ptr->i != -1; ptr++){
533  bp_stack[++(*stack_count)].i = ptr->i;
534  bp_stack[(*stack_count)].j = ptr->j;
535  }
536  free(aux_bps);
537  }
538 
539  return 1;
540  }
541 
542  return 0;
543 }
544 
550 #endif
void * vrna_alloc(unsigned size)
Allocate space safely.
int special_hp
Include special hairpin contributions for tri, tetra and hexaloops.
Definition: model.h:178
FLT_OR_DBL ** boltzmann_factors
Boltzmann Factors of the energy contributions for unpaired sequence stretches.
Definition: constraints.h:416
int * up_hp
A linear array that holds the number of allowed unpaired nucleotides in a hairpin loop...
Definition: constraints.h:397
PRIVATE int E_Hairpin(int size, int type, int si1, int sj1, const char *string, vrna_param_t *P)
Compute the Energy of a hairpin-loop.
Definition: hairpin_loops.h:122
struct vrna_hc_t * hc
The hard constraints data structure used for structure prediction.
Definition: data_structures.h:716
#define VRNA_DECOMP_PAIR_HP
Generalized constraint folding flag indicating hairpin loop decomposition step.
Definition: constraints.h:266
int cutpoint
The position of the (cofold) cutpoint within the provided sequence. If there is no cutpoint...
Definition: data_structures.h:712
short ** S5
S5[s][i] holds next base 5' of i in sequence s.
Definition: data_structures.h:797
PRIVATE int vrna_eval_ext_hp_loop(vrna_fold_compound *vc, int i, int j)
Evaluate free energy of an exterior hairpin loop.
Definition: hairpin_loops.h:228
PRIVATE int vrna_eval_hp_loop(vrna_fold_compound *vc, int i, int j)
Evaluate free energy of a hairpin loop.
Definition: hairpin_loops.h:282
char * matrix
Upper triangular matrix encoding where a base pair or unpaired nucleotide is allowed.
Definition: constraints.h:391
struct vrna_param_t * params
The precomputed free energy contributions for each type of loop.
Definition: data_structures.h:721
PAIR *(* bt)(int, int, int, int, char, void *)
A function pointer used to obtain backtraced base pairs in loop regions that were altered by soft con...
Definition: constraints.h:433
int * jindx
DP matrix accessor.
Definition: data_structures.h:725
int ** free_energies
Energy contribution for unpaired sequence stretches.
Definition: constraints.h:414
Definition: data_structures.h:681
vrna_md_t model_details
Model details to be used in the recursions.
Definition: params.h:79
short ** S3
Sl[s][i] holds next base 3' of i in sequence s.
Definition: data_structures.h:800
unsigned int n_seq
The number of sequences in the alignment.
Definition: data_structures.h:785
vrna_mx_pf_t * exp_matrices
The PF DP matrices.
Definition: data_structures.h:719
FLT_OR_DBL * exp_en_basepair
Boltzmann Factors of the energy contribution for base pairs.
Definition: constraints.h:417
PRIVATE int vrna_E_hp_loop(vrna_fold_compound *vc, int i, int j)
Evaluate the free energy of a hairpin loop and consider possible hard constraints.
Definition: hairpin_loops.h:174
The most basic data structure required by many functions throughout the RNAlib.
Definition: data_structures.h:698
PRIVATE int E_ExtLoop(int type, int si1, int sj1, vrna_param_t *P)
Definition: exterior_loops.h:460
#define INF
Definition: energy_const.h:16
Energy evaluation of exterior loops for MFE and partition function calculations.
vrna_md_t model_details
Model details to be used in the recursions.
Definition: params.h:136
short ** S
Numerical encoding of the sequences in the alignment.
Definition: data_structures.h:794
int dangles
Specifies the dangle model used in any energy evaluation (0,1,2 or 3)
Definition: model.h:172
unsigned int length
The length of the sequence (or sequence alignment)
Definition: data_structures.h:711
PRIVATE double vrna_exp_E_hp_loop(vrna_fold_compound *vc, int i, int j)
High-Level function for hairpin loop energy evaluation (partition function variant) ...
Definition: hairpin_loops.h:447
vrna_vc_t type
The type of the vrna_fold_compound.
Definition: data_structures.h:704
Various functions related to G-quadruplex computations.
char * ptype
Pair type array.
Definition: data_structures.h:749
struct vrna_exp_param_t * exp_params
The precomputed free energy contributions as Boltzmann factors.
Definition: data_structures.h:722
Base pair.
Definition: data_structures.h:73
#define VRNA_CONSTRAINT_CONTEXT_HP_LOOP
Hard constraints flag, base pair encloses hairpin loop.
Definition: constraints.h:209
The datastructure that contains temperature scaled energy parameters.
Definition: params.h:41
Definition: data_structures.h:682
The soft constraints data structure.
Definition: constraints.h:413
The data structure that contains the complete model details used throughout the calculations.
Definition: model.h:169
void * data
A pointer to the data object provided for for pseudo energy contribution functions of the generalized...
Definition: constraints.h:470
Here all all declarations of the global variables used throughout RNAlib.
PRIVATE double exp_E_Hairpin(int u, int type, short si1, short sj1, const char *string, vrna_exp_param_t *P)
Compute Boltzmann weight of a hairpin loop.
Definition: hairpin_loops.h:391
Base pair data structure used in subopt.c.
Definition: data_structures.h:97
short * sequence_encoding
Numerical encoding of the input sequence.
Definition: data_structures.h:744
struct vrna_sc_t * sc
The soft constraints for usage in structure prediction and evaluation.
Definition: data_structures.h:763
PRIVATE int vrna_E_ext_hp_loop(vrna_fold_compound *vc, int i, int j)
Evaluate the free energy of an exterior hairpin loop and consider possible hard constraints.
Definition: hairpin_loops.h:200
int * en_basepair
Energy contribution for base pairs.
Definition: constraints.h:415
The datastructure that contains temperature scaled Boltzmann weights of the energy parameters...
Definition: params.h:86
PRIVATE int vrna_BT_hp_loop(vrna_fold_compound *vc, int i, int j, int en, bondT *bp_stack, int *stack_count)
Backtrack a hairpin loop closed by .
Definition: hairpin_loops.h:505
char * sequence
The input sequence string.
Definition: data_structures.h:741
FLT_OR_DBL(* exp_f)(int, int, int, int, char, void *)
A function pointer used for pseudo energy contribution boltzmann factors in PF calculations.
Definition: constraints.h:444
int(* f)(int, int, int, int, char, void *)
A function pointer used for pseudo energy contribution in MFE calculations.
Definition: constraints.h:423
struct vrna_sc_t ** scs
A set of soft constraints (for each sequence in the alignment)
Definition: data_structures.h:808