RNAlib-2.2.0-RC2
data_structures.h
Go to the documentation of this file.
1 #ifndef VIENNA_RNA_PACKAGE_DATA_STRUCTURES_H
2 #define VIENNA_RNA_PACKAGE_DATA_STRUCTURES_H
3 
13 #include <ViennaRNA/energy_const.h>
14 #include <ViennaRNA/model.h>
15 #include <ViennaRNA/params.h>
16 
17 /* to use floats instead of doubles in pf_fold() comment next line */
18 #define LARGE_PF
19 
20 #ifdef LARGE_PF
21 #define FLT_OR_DBL double
22 #else
23 #define FLT_OR_DBL float
24 #endif
25 
26 #ifndef NBASES
27 #define NBASES 8
28 #endif
29 
33 #define MAXDOS 1000
34 
35 /*
36 * ############################################################
37 * Here are the type definitions of various datastructures
38 * shared among the Vienna RNA Package
39 * ############################################################
40 */
41 
45 typedef struct plist {
46  int i;
47  int j;
48  float p;
49  int type;
50 } plist;
51 
55 typedef struct cpair {
56  int i,j,mfe;
57  float p, hue, sat;
58 } cpair;
59 
60 
64 typedef struct sect {
65  int i;
66  int j;
67  int ml;
68 } sect;
69 
73 typedef struct bondT {
74  unsigned int i;
75  unsigned int j;
76 } bondT;
77 
81 typedef struct bondTEn {
82  int i;
83  int j;
84  int energy;
85 } bondTEn;
86 
87 
88 /*
89 * ############################################################
90 * SUBOPT data structures
91 * ############################################################
92 */
93 
97 typedef struct {
98  int i;
99  int j;
100 } PAIR;
101 
105 typedef struct {
106  int i;
107  int j;
108  int array_flag;
109 } INTERVAL;
110 
114 typedef struct {
115  float energy;
116  char *structure;
117 } SOLUTION;
118 
119 /*
120 * ############################################################
121 * COFOLD data structures
122 * ############################################################
123 */
124 
128 typedef struct cofoldF {
129  /* free energies for: */
130  double F0AB;
131  double FAB;
132  double FcAB;
133  double FA;
134  double FB;
135 } cofoldF;
136 
140 typedef struct ConcEnt {
141  double A0;
142  double B0;
143  double ABc;
144  double AAc;
145  double BBc;
146  double Ac;
147  double Bc;
148 } ConcEnt;
149 
153 typedef struct pairpro{
154  struct plist *AB;
155  struct plist *AA;
156  struct plist *A;
157  struct plist *B;
158  struct plist *BB;
159 } pairpro;
160 
171 typedef struct {
172  unsigned i;
173  unsigned j;
174  float p;
175  float ent;
176  short bp[8];
177  char comp;
178 } pair_info;
179 
180 
181 /*
182 * ############################################################
183 * FINDPATH data structures
184 * ############################################################
185 */
186 
190 typedef struct move {
191  int i; /* i,j>0 insert; i,j<0 delete */
192  int j;
193  int when; /* 0 if still available, else resulting distance from start */
194  int E;
195 } move_t;
196 
200 typedef struct intermediate {
201  short *pt;
202  int Sen;
203  int curr_en;
206 
210 typedef struct path {
211  double en;
212  char *s;
213 } path_t;
214 
215 /*
216 * ############################################################
217 * RNAup data structures
218 * ############################################################
219 */
220 
224 typedef struct pu_contrib {
225  double **H;
226  double **I;
227  double **M;
228  double **E;
229  int length;
230  int w;
231 } pu_contrib;
232 
236 typedef struct interact {
237  double *Pi;
238  double *Gi;
239  double Gikjl;
241  double Gikjl_wo;
242  int i;
243  int k;
244  int j;
245  int l;
246  int length;
247 } interact;
248 
252 typedef struct pu_out {
253  int len;
254  int u_vals;
255  int contribs;
256  char **header;
257  double **u_values;
258 } pu_out;
259 
263 typedef struct constrain{
264  int *indx;
265  char *ptype;
266 } constrain;
267 
268 /*
269 * ############################################################
270 * RNAduplex data structures
271 * ############################################################
272 */
273 
277 typedef struct {
278  int i;
279  int j;
280  int end;
281  char *structure;
282  double energy;
283  double energy_backtrack;
284  double opening_backtrack_x;
285  double opening_backtrack_y;
286  int offset;
287  double dG1;
288  double dG2;
289  double ddG;
290  int tb;
291  int te;
292  int qb;
293  int qe;
294 } duplexT;
295 
296 /*
297 * ############################################################
298 * RNAsnoop data structures
299 * ############################################################
300 */
301 
305 typedef struct node {
306  int k;
307  int energy;
308  struct node *next;
309 } folden;
310 
314 typedef struct {
315  int i;
316  int j;
317  int u;
318  char *structure;
319  float energy;
320  float Duplex_El;
321  float Duplex_Er;
322  float Loop_E;
323  float Loop_D;
324  float pscd;
325  float psct;
326  float pscg;
327  float Duplex_Ol;
328  float Duplex_Or;
329  float Duplex_Ot;
330  float fullStemEnergy;
331 } snoopT;
332 
333 
334 /*
335 * ############################################################
336 * PKplex data structures
337 * ############################################################
338 */
339 
343 typedef struct dupVar{
344  int i;
345  int j;
346  int end;
347  char *pk_helix;
348  char *structure;
349  double energy;
350  int offset;
351  double dG1;
352  double dG2;
353  double ddG;
354  int tb;
355  int te;
356  int qb;
357  int qe;
358  int inactive;
359  int processed;
360 } dupVar;
361 
367 /*
368 * ############################################################
369 * VRNA fold compound related functions
370 * ############################################################
371 */
372 
390 typedef enum {
398 } vrna_mx_t;
399 
400 
404 typedef struct{
408  vrna_mx_t type;
409  unsigned int length;
414 #if __STDC_VERSION__ >= 201112L
415  /* C11 support for unnamed unions/structs */
416  union {
417  struct {
418 #endif
419 
422  int *c;
423  int *f5;
424  int *f3;
425  int *fc;
426  int *fML;
427  int *fM1;
428  int *fM2;
429  int *ggg;
430  int Fc;
431  int FcH;
432  int FcI;
433  int FcM;
438 #if __STDC_VERSION__ >= 201112L
439  /* C11 support for unnamed unions/structs */
440  };
441  struct {
442 #endif
443 
447  int ***E_F5;
448  int **l_min_F5;
449  int **l_max_F5;
450  int *k_min_F5;
451  int *k_max_F5;
452 
453  int ***E_F3;
454  int **l_min_F3;
455  int **l_max_F3;
456  int *k_min_F3;
457  int *k_max_F3;
458 
459  int ***E_C;
460  int **l_min_C;
461  int **l_max_C;
462  int *k_min_C;
463  int *k_max_C;
464 
465  int ***E_M;
466  int **l_min_M;
467  int **l_max_M;
468  int *k_min_M;
469  int *k_max_M;
470 
471  int ***E_M1;
472  int **l_min_M1;
473  int **l_max_M1;
474  int *k_min_M1;
475  int *k_max_M1;
476 
477  int ***E_M2;
478  int **l_min_M2;
479  int **l_max_M2;
480  int *k_min_M2;
481  int *k_max_M2;
482 
483  int **E_Fc;
484  int *l_min_Fc;
485  int *l_max_Fc;
486  int k_min_Fc;
487  int k_max_Fc;
488 
489  int **E_FcH;
490  int *l_min_FcH;
491  int *l_max_FcH;
492  int k_min_FcH;
493  int k_max_FcH;
494 
495  int **E_FcI;
496  int *l_min_FcI;
497  int *l_max_FcI;
498  int k_min_FcI;
499  int k_max_FcI;
500 
501  int **E_FcM;
502  int *l_min_FcM;
503  int *l_max_FcM;
504  int k_min_FcM;
505  int k_max_FcM;
506 
507  /* auxilary arrays for remaining set of coarse graining (k,l) > (k_max, l_max) */
508  int *E_F5_rem;
509  int *E_F3_rem;
510  int *E_C_rem;
511  int *E_M_rem;
512  int *E_M1_rem;
513  int *E_M2_rem;
514 
515  int E_Fc_rem;
516  int E_FcH_rem;
517  int E_FcI_rem;
518  int E_FcM_rem;
519 
520 #ifdef COUNT_STATES
521  unsigned long ***N_F5;
522  unsigned long ***N_C;
523  unsigned long ***N_M;
524  unsigned long ***N_M1;
525 #endif
526 
531 #if __STDC_VERSION__ >= 201112L
532  /* C11 support for unnamed unions/structs */
533  }
534  };
535 #endif
536 } vrna_mx_mfe_t;
537 
541 typedef struct{
545  vrna_mx_t type;
546  unsigned int length;
547 
552 #if __STDC_VERSION__ >= 201112L
553  /* C11 support for unnamed unions/structs */
554  union {
555  struct {
556 #endif
557 
561  FLT_OR_DBL *q;
562  FLT_OR_DBL *qb;
563  FLT_OR_DBL *qm;
564  FLT_OR_DBL *qm1;
565  FLT_OR_DBL *probs;
566  FLT_OR_DBL *q1k;
567  FLT_OR_DBL *qln;
568  FLT_OR_DBL *G;
569 
570  FLT_OR_DBL qo;
571  FLT_OR_DBL *qm2;
572  FLT_OR_DBL qho;
573  FLT_OR_DBL qio;
574  FLT_OR_DBL qmo;
575 
576  FLT_OR_DBL *scale;
577  FLT_OR_DBL *expMLbase;
582 #if __STDC_VERSION__ >= 201112L
583  /* C11 support for unnamed unions/structs */
584  };
585  struct {
586 #endif
587 
591  FLT_OR_DBL ***Q;
592  int **l_min_Q;
593  int **l_max_Q;
594  int *k_min_Q;
595  int *k_max_Q;
596 
597 
598  FLT_OR_DBL ***Q_B;
599  int **l_min_Q_B;
600  int **l_max_Q_B;
601  int *k_min_Q_B;
602  int *k_max_Q_B;
603 
604  FLT_OR_DBL ***Q_M;
605  int **l_min_Q_M;
606  int **l_max_Q_M;
607  int *k_min_Q_M;
608  int *k_max_Q_M;
609 
610  FLT_OR_DBL ***Q_M1;
611  int **l_min_Q_M1;
612  int **l_max_Q_M1;
613  int *k_min_Q_M1;
614  int *k_max_Q_M1;
615 
616  FLT_OR_DBL ***Q_M2;
617  int **l_min_Q_M2;
618  int **l_max_Q_M2;
619  int *k_min_Q_M2;
620  int *k_max_Q_M2;
621 
622  FLT_OR_DBL **Q_c;
623  int *l_min_Q_c;
624  int *l_max_Q_c;
625  int k_min_Q_c;
626  int k_max_Q_c;
627 
628  FLT_OR_DBL **Q_cH;
629  int *l_min_Q_cH;
630  int *l_max_Q_cH;
631  int k_min_Q_cH;
632  int k_max_Q_cH;
633 
634  FLT_OR_DBL **Q_cI;
635  int *l_min_Q_cI;
636  int *l_max_Q_cI;
637  int k_min_Q_cI;
638  int k_max_Q_cI;
639 
640  FLT_OR_DBL **Q_cM;
641  int *l_min_Q_cM;
642  int *l_max_Q_cM;
643  int k_min_Q_cM;
644  int k_max_Q_cM;
645 
646  /* auxilary arrays for remaining set of coarse graining (k,l) > (k_max, l_max) */
647  FLT_OR_DBL *Q_rem;
648  FLT_OR_DBL *Q_B_rem;
649  FLT_OR_DBL *Q_M_rem;
650  FLT_OR_DBL *Q_M1_rem;
651  FLT_OR_DBL *Q_M2_rem;
652 
653  FLT_OR_DBL Q_c_rem;
654  FLT_OR_DBL Q_cH_rem;
655  FLT_OR_DBL Q_cI_rem;
656  FLT_OR_DBL Q_cM_rem;
661 #if __STDC_VERSION__ >= 201112L
662  /* C11 support for unnamed unions/structs */
663  };
664  };
665 #endif
666 } vrna_mx_pf_t;
667 
671 typedef enum {
674 } vrna_vc_t;
675 
676 
689 typedef struct{
690 
702  unsigned int length;
703  int cutpoint;
707  struct vrna_hc_t *hc;
715  int *iindx;
716  int *jindx;
722 #if __STDC_VERSION__ >= 201112L
723  /* C11 support for unnamed unions/structs */
724  union {
725  struct {
726 #endif
727 
732  char *sequence;
739  short *sequence_encoding2;
740  char *ptype;
754  struct vrna_sc_t *sc;
762 #if __STDC_VERSION__ >= 201112L
763  /* C11 support for unnamed unions/structs */
764  };
765  struct {
766 #endif
767 
772  char **sequences;
776  unsigned int n_seq;
779  char *cons_seq;
782  short *S_cons;
785  short **S;
788  short **S5;
791  short **S3;
794  char **Ss;
795  unsigned short **a2s;
796  int *pscore;
799  struct vrna_sc_t **scs;
802  int oldAliEn;
803 
807 #if __STDC_VERSION__ >= 201112L
808  };
809  };
810 #endif
811 
818  unsigned int maxD1;
819  unsigned int maxD2;
820  short *reference_pt1;
821  short *reference_pt2;
823  unsigned int *referenceBPs1;
824  unsigned int *referenceBPs2;
825  unsigned int *bpdist;
827  unsigned int *mm1;
828  unsigned int *mm2;
835 
836 
837 /* the definitions below should be used for functions that return/receive/destroy fold compound data structures */
838 
845 #define VRNA_OPTION_MFE 1
846 
853 #define VRNA_OPTION_PF 2
854 
855 #define VRNA_OPTION_HYBRID 4
856 
857 #define VRNA_OPTION_DIST_CLASS 16
858 
859 #define VRNA_OPTION_LFOLD 32
860 
870 #define VRNA_OPTION_EVAL_ONLY 8
871 
872 
873 
901 vrna_fold_compound *vrna_get_fold_compound( const char *sequence,
902  vrna_md_t *md_p,
903  unsigned int options);
904 
932 vrna_fold_compound *vrna_get_fold_compound_ali( const char **sequences,
933  vrna_md_t *md_p,
934  unsigned int options);
935 
936 
937 vrna_fold_compound *vrna_get_fold_compound_2D(const char *sequence,
938  const char *s1,
939  const char *s2,
940  vrna_md_t *md_p,
941  unsigned int options);
942 
951 
960 
969 
974 #endif
int Sen
saddle energy so far
Definition: data_structures.h:202
int * f3
Energy of 3' end.
Definition: data_structures.h:424
Definition: data_structures.h:305
unsigned int length
Length of the sequence, therefore an indicator of the size of the DP matrices.
Definition: data_structures.h:409
double FAB
all states with DuplexInit correction
Definition: data_structures.h:131
int * ggg
Energies of g-quadruplexes.
Definition: data_structures.h:429
unsigned int * referenceBPs2
Matrix containing number of basepairs of reference structure2 in interval [i,j].
Definition: data_structures.h:824
int w
longest unpaired region
Definition: data_structures.h:230
int * c
Energy array, given that i-j pair.
Definition: data_structures.h:422
DP matrices suitable for local structure prediction.
Definition: data_structures.h:392
vrna_fold_compound * vrna_get_fold_compound_ali(const char **sequences, vrna_md_t *md_p, unsigned int options)
Retrieve a vrna_fold_compound data structure for sequence alignments.
struct vrna_hc_t * hc
The hard constraints data structure used for structure prediction.
Definition: data_structures.h:707
int * fM1
Second ML array, only for unique multibrnach loop decomposition.
Definition: data_structures.h:427
int cutpoint
The position of the (cofold) cutpoint within the provided sequence. If there is no cutpoint...
Definition: data_structures.h:703
The hard constraints data structure.
Definition: constraints.h:378
short ** S5
S5[s][i] holds next base 5' of i in sequence s.
Definition: data_structures.h:788
unsigned j
nucleotide position j
Definition: data_structures.h:173
char ** header
header line
Definition: data_structures.h:256
int contribs
[-c "SHIME"]
Definition: data_structures.h:255
Base pair with associated energy.
Definition: data_structures.h:81
int oldAliEn
use old alifold energies (with gaps)
Definition: data_structures.h:200
Definition: data_structures.h:128
struct vrna_param_t * params
The precomputed free energy contributions for each type of loop.
Definition: data_structures.h:712
move_t * moves
remaining moves to target
Definition: data_structures.h:204
double FcAB
true hybrid states only
Definition: data_structures.h:132
int * jindx
DP matrix accessor.
Definition: data_structures.h:716
int * fML
Multi-loop auxiliary energy array.
Definition: data_structures.h:426
short * S_cons
Numerical encoding of the consensus sequence.
Definition: data_structures.h:782
The model details data structure and its corresponding modifiers.
Definition: data_structures.h:190
double * Gi
free energies of interaction
Definition: data_structures.h:238
float energy
Free Energy of structure in kcal/mol.
Definition: data_structures.h:115
float p
Probability.
Definition: data_structures.h:174
Definition: data_structures.h:672
Default DP matrices.
Definition: data_structures.h:391
Definition: data_structures.h:314
int * pscore
Precomputed array of pair types expressed as pairing scores.
Definition: data_structures.h:796
double FB
monomer B
Definition: data_structures.h:134
short ** S3
Sl[s][i] holds next base 3' of i in sequence s.
Definition: data_structures.h:791
unsigned int n_seq
The number of sequences in the alignment.
Definition: data_structures.h:776
int length
length of longer sequence
Definition: data_structures.h:246
Collection of all free_energy of beeing unpaired values for output.
Definition: data_structures.h:252
vrna_fold_compound * vrna_get_fold_compound(const char *sequence, vrna_md_t *md_p, unsigned int options)
Retrieve a vrna_fold_compound data structure for single sequences and hybridizing sequences...
vrna_mx_pf_t * exp_matrices
The PF DP matrices.
Definition: data_structures.h:710
double Gikjl
full free energy for interaction between [k,i] k
Definition: data_structures.h:239
Definition: data_structures.h:343
double ** I
interior loops
Definition: data_structures.h:226
void vrna_free_mfe_matrices(vrna_fold_compound *vc)
Free memory occupied by the Minimum Free Energy (MFE) Dynamic Programming (DP) matrices.
The most basic data structure required by many functions throughout the RNAlib.
Definition: data_structures.h:689
Solution element from subopt.c.
Definition: data_structures.h:114
unsigned int * mm2
Maximum matching matrix, reference struct 2 disallowed.
Definition: data_structures.h:828
double ** M
multi loops
Definition: data_structures.h:227
constraints for cofolding
Definition: data_structures.h:263
char ** sequences
The aligned sequences.
Definition: data_structures.h:772
Definition: data_structures.h:277
int curr_en
current energy
Definition: data_structures.h:203
unsigned int * referenceBPs1
Matrix containing number of basepairs of reference structure1 in interval [i,j].
Definition: data_structures.h:823
int * fc
Energy from i to cutpoint (and vice versa if i>cut)
Definition: data_structures.h:425
unsigned int maxD2
Maximum allowed base pair distance to second reference.
Definition: data_structures.h:819
A base pair info structure.
Definition: data_structures.h:171
this datastructure is used as input parameter in functions of PS_dot.h and others ...
Definition: data_structures.h:45
void vrna_free_fold_compound(vrna_fold_compound *vc)
Free memory occupied by a vrna_fold_compound.
double ** H
hairpin loops
Definition: data_structures.h:225
void vrna_free_pf_matrices(vrna_fold_compound *vc)
Free memory occupied by the Partition Function (PF) Dynamic Programming (DP) matrices.
Partition function (PF) Dynamic Programming (DP) matrices data structure required within the vrna_fol...
Definition: data_structures.h:541
vrna_mx_mfe_t * matrices
The MFE DP matrices.
Definition: data_structures.h:709
double F0AB
Null model without DuplexInit.
Definition: data_structures.h:130
int * iindx
DP matrix accessor.
Definition: data_structures.h:715
int k
k
Definition: data_structures.h:243
char * cons_seq
The consensus sequence of the aligned sequences.
Definition: data_structures.h:779
double A0
start concentration A
Definition: data_structures.h:141
short ** S
Numerical encoding of the sequences in the alignment.
Definition: data_structures.h:785
short * reference_pt2
A pairtable of the second reference structure.
Definition: data_structures.h:821
short * reference_pt1
A pairtable of the first reference structure.
Definition: data_structures.h:820
unsigned int * mm1
Maximum matching matrix, reference struct 1 disallowed.
Definition: data_structures.h:827
Sequence interval stack element used in subopt.c.
Definition: data_structures.h:105
Definition: data_structures.h:236
short * pt
pair table
Definition: data_structures.h:201
unsigned int length
The length of the sequence (or sequence alignment)
Definition: data_structures.h:702
int l
j
Definition: data_structures.h:245
DP matrices suitable for distance class partitioned structure prediction.
Definition: data_structures.h:395
double ABc
End concentration AB.
Definition: data_structures.h:143
vrna_vc_t type
The type of the vrna_fold_compound.
Definition: data_structures.h:695
char * ptype
Pair type array.
Definition: data_structures.h:740
struct vrna_exp_param_t * exp_params
The precomputed free energy contributions as Boltzmann factors.
Definition: data_structures.h:713
double ** u_values
(the -u values * [-c "SHIME"]) * seq len
Definition: data_structures.h:257
Base pair.
Definition: data_structures.h:73
Stack of partial structures for backtracking.
Definition: data_structures.h:64
double ** E
exterior loop
Definition: data_structures.h:228
int len
sequence length
Definition: data_structures.h:253
The datastructure that contains temperature scaled energy parameters.
Definition: params.h:41
Definition: data_structures.h:673
The soft constraints data structure.
Definition: constraints.h:401
The data structure that contains the complete model details used throughout the calculations.
Definition: model.h:169
this datastructure is used as input parameter in functions of PS_dot.c
Definition: data_structures.h:55
int i
k
Definition: data_structures.h:242
Definition: data_structures.h:210
int length
length of the input sequence
Definition: data_structures.h:229
double Gikjl_wo
Gikjl without contributions for prob_unpaired.
Definition: data_structures.h:241
int j
j
Definition: data_structures.h:244
Base pair data structure used in subopt.c.
Definition: data_structures.h:97
short * sequence_encoding
Numerical encoding of the input sequence.
Definition: data_structures.h:735
double * Pi
probabilities of interaction
Definition: data_structures.h:237
struct vrna_sc_t * sc
The soft constraints for usage in structure prediction and evaluation.
Definition: data_structures.h:754
vrna_vc_t
An enumerator that is used to specify the type of a vrna_fold_compound.
Definition: data_structures.h:671
unsigned i
nucleotide position i
Definition: data_structures.h:172
unsigned int * bpdist
Matrix containing base pair distance of reference structure 1 and 2 on interval [i,j].
Definition: data_structures.h:825
contributions to p_u
Definition: data_structures.h:224
Definition: data_structures.h:153
The datastructure that contains temperature scaled Boltzmann weights of the energy parameters...
Definition: params.h:86
unsigned int maxD1
Maximum allowed base pair distance to first reference.
Definition: data_structures.h:818
float ent
Pseudo entropy for .
Definition: data_structures.h:175
double B0
start concentration B
Definition: data_structures.h:142
int * fM2
Energy for a multibranch loop region with exactly two stems, extending to 3' end. ...
Definition: data_structures.h:428
char * ptype_pf_compat
ptype array indexed via iindx
Definition: data_structures.h:749
char * sequence
The input sequence string.
Definition: data_structures.h:732
int Fc
Minimum Free Energy of entire circular RNA.
Definition: data_structures.h:430
vrna_mx_t
An enumerator that is used to specify the type of a Dynamic Programming (DP) matrix data structure...
Definition: data_structures.h:390
Definition: data_structures.h:140
Minimum Free Energy (MFE) Dynamic Programming (DP) matrices data structure required within the vrna_f...
Definition: data_structures.h:404
int * f5
Energy of 5' end.
Definition: data_structures.h:423
int u_vals
number of different -u values
Definition: data_structures.h:254
double FA
monomer A
Definition: data_structures.h:133
char * structure
Structure in dot-bracket notation.
Definition: data_structures.h:116
char comp
1 iff pair is in mfe structure
Definition: data_structures.h:177
struct vrna_sc_t ** scs
A set of soft constraints (for each sequence in the alignment)
Definition: data_structures.h:799