RNAlib-2.0.3b
H/data_structures.h
Go to the documentation of this file.
00001 #ifndef __VIENNA_RNA_PACKAGE_DATA_STRUCTURES_H__
00002 #define __VIENNA_RNA_PACKAGE_DATA_STRUCTURES_H__
00003 
00004 #include "energy_const.h"
00005 
00011 /* to use floats instead of doubles in pf_fold() comment next line */
00012 #define LARGE_PF
00013 #ifdef  LARGE_PF
00014 #define FLT_OR_DBL double
00015 #else
00016 #define FLT_OR_DBL float
00017 #endif
00018 
00019 #ifndef NBASES
00020 #define NBASES 8
00021 #endif
00022 
00023 #ifndef MAXALPHA
00024 
00027 #define MAXALPHA              20
00028 #endif
00029 
00033 #define MAXDOS                1000
00034 
00035 
00036 
00037 
00038 /*
00039 * ############################################################
00040 * Here are the type definitions of various datastructures
00041 * shared among the Vienna RNA Package
00042 * ############################################################
00043 */
00044 
00048 typedef struct plist {
00049   int i;
00050   int j;
00051   float p;
00052 } plist;
00053 
00057 typedef struct cpair {
00058   int i,j,mfe;
00059   float p, hue, sat;
00060 } cpair;
00061 
00066 typedef struct {
00067   float X; /* X coords */
00068   float Y; /* Y coords */
00069 } COORDINATE;
00070 
00074 typedef struct sect {
00075   int  i;
00076   int  j;
00077   int ml;
00078 } sect;
00079 
00083 typedef struct bondT {
00084    unsigned int i;
00085    unsigned int j;
00086 } bondT;
00087 
00091 typedef struct bondTEn {
00092    int i;
00093    int j;
00094    int energy;
00095 } bondTEn;
00096 
00101 typedef struct{
00102   int     dangles;      /*  dangle model (0,1,2 or 3) */
00103   int     special_hp;   /*  include special hairpin contributions for tri, tetra and hexaloops */
00104   int     noLP;         /*  only consider canonical structures, i.e. no 'lonely' base pairs */
00105   int     noGU;         /*  do not allow GU pairs */
00106   int     noGUclosure;  /*  do not allow loops to be closed by GU pair */
00107   int     logML;        /*  use logarithmic scaling for multi loops */
00108 } model_detailsT;
00109 
00113 typedef struct{
00114   int id;
00115   int stack[NBPAIRS+1][NBPAIRS+1];
00116   int hairpin[31];
00117   int bulge[MAXLOOP+1];
00118   int internal_loop[MAXLOOP+1];
00119   int mismatchExt[NBPAIRS+1][5][5];
00120   int mismatchI[NBPAIRS+1][5][5];
00121   int mismatch1nI[NBPAIRS+1][5][5];
00122   int mismatch23I[NBPAIRS+1][5][5];
00123   int mismatchH[NBPAIRS+1][5][5];
00124   int mismatchM[NBPAIRS+1][5][5];
00125   int dangle5[NBPAIRS+1][5];
00126   int dangle3[NBPAIRS+1][5];
00127   int int11[NBPAIRS+1][NBPAIRS+1][5][5];
00128   int int21[NBPAIRS+1][NBPAIRS+1][5][5][5];
00129   int int22[NBPAIRS+1][NBPAIRS+1][5][5][5][5];
00130   int ninio[5];
00131   double  lxc;
00132   int     MLbase;
00133   int     MLintern[NBPAIRS+1];
00134   int     MLclosing;
00135   int     TerminalAU;
00136   int     DuplexInit;
00137   int     Tetraloop_E[200];
00138   char    Tetraloops[1401];
00139   int     Triloop_E[40];
00140   char    Triloops[241];
00141   int     Hexaloop_E[40];
00142   char    Hexaloops[1801];
00143   int     TripleC;
00144   int     MultipleCA;
00145   int     MultipleCB;
00146 
00147   double  temperature;  /*  temperature used for loop contribution scaling */
00148 
00149   model_detailsT model_details;
00150 
00151 }  paramT;
00152 
00156 typedef struct{
00157   int     id;
00158   double  expstack[NBPAIRS+1][NBPAIRS+1];
00159   double  exphairpin[31];
00160   double  expbulge[MAXLOOP+1];
00161   double  expinternal[MAXLOOP+1];
00162   double  expmismatchExt[NBPAIRS+1][5][5];
00163   double  expmismatchI[NBPAIRS+1][5][5];
00164   double  expmismatch23I[NBPAIRS+1][5][5];
00165   double  expmismatch1nI[NBPAIRS+1][5][5];
00166   double  expmismatchH[NBPAIRS+1][5][5];
00167   double  expmismatchM[NBPAIRS+1][5][5];
00168   double  expdangle5[NBPAIRS+1][5];
00169   double  expdangle3[NBPAIRS+1][5];
00170   double  expint11[NBPAIRS+1][NBPAIRS+1][5][5];
00171   double  expint21[NBPAIRS+1][NBPAIRS+1][5][5][5];
00172   double  expint22[NBPAIRS+1][NBPAIRS+1][5][5][5][5];
00173   double  expninio[5][MAXLOOP+1];
00174   double  lxc;
00175   double  expMLbase;
00176   double  expMLintern[NBPAIRS+1];
00177   double  expMLclosing;
00178   double  expTermAU;
00179   double  expDuplexInit;
00180   double  exptetra[40];
00181   double  exptri[40];
00182   double  exphex[40];
00183   char    Tetraloops[1401];
00184   double  expTriloop[40];
00185   char    Triloops[241];
00186   char    Hexaloops[1801];
00187   double  expTripleC;
00188   double  expMultipleCA;
00189   double  expMultipleCB;
00190 
00191   double  kT;
00192   double  pf_scale;
00193 
00194   double  temperature;  /*  temperature used for loop contribution scaling */
00195   double  alpha;        /*  used for scaling the thermodynamic temperature in Boltzmann factors
00196                             independently from the energy contributions */
00197 
00198   model_detailsT model_details;
00199 
00200 }  pf_paramT;
00201 
00202 
00203 
00204 /*
00205 * ############################################################
00206 * SUBOPT data structures
00207 * ############################################################
00208 */
00209 
00210 
00214 typedef struct {
00215   int i;
00216   int j;
00217 } PAIR;
00218 
00222 typedef struct {
00223     int i;
00224     int j;
00225     int array_flag;
00226 } INTERVAL;
00227 
00231 typedef struct {
00232   float energy;                            /* energy of structure */
00233   char *structure;
00234 } SOLUTION;
00235 
00236 /*
00237 * ############################################################
00238 * COFOLD data structures
00239 * ############################################################
00240 */
00241 typedef struct cofoldF {
00242   /* free energies for: */
00243   double F0AB; /* null model without DuplexInit */
00244   double FAB;  /* all states with DuplexInit corretion */
00245   double FcAB; /* true hybrid states only */
00246   double FA;   /* monomer A */
00247   double FB;   /* monomer B */
00248 } cofoldF;
00249 
00250 typedef struct ConcEnt {
00251   double A0;    /*start concentration A*/
00252   double B0;    /*start concentration B*/
00253   double ABc;   /*End concentration AB*/
00254   double AAc;
00255   double BBc;
00256   double Ac;
00257   double Bc;
00258 } ConcEnt;
00259 
00260 typedef struct pairpro{
00261   struct plist *AB;
00262   struct plist *AA;
00263   struct plist *A;
00264   struct plist *B;
00265   struct plist *BB;
00266 }pairpro;
00267 
00277 typedef struct {
00278    unsigned i;        /* i,j in [0, n-1] */
00279    unsigned j;
00280    float p;      /* probability */
00281    float ent;    /* pseudo entropy for p(i,j) = S_i + S_j - p_ij*ln(p_ij) */
00282    short bp[8];  /* frequencies of pair_types */
00283    char comp;    /* 1 iff pair is in mfe structure */
00284 } pair_info;
00285 
00286 
00287 /*
00288 * ############################################################
00289 * FINDPATH data structures
00290 * ############################################################
00291 */
00292 
00293 typedef struct move {
00294   int i;  /* i,j>0 insert; i,j<0 delete */
00295   int j;
00296   int when;  /* 0 if still available, else resulting distance from start */
00297   int E;
00298 } move_t;
00299 
00300 typedef struct intermediate {
00301   short *pt;     /* pair table */
00302   int Sen;       /* saddle energy so far */
00303   int curr_en;   /* current energy */
00304   move_t *moves; /* remaining moves to target */
00305 } intermediate_t;
00306 
00307 typedef struct path {
00308   double en;
00309   char *s;
00310 } path_t;
00311 
00312 /*
00313 * ############################################################
00314 * RNAup data structures
00315 * ############################################################
00316 */
00317 typedef struct pu_contrib { /* contributions to prob_unpaired in */
00318   double **H; /* hairpin loops */
00319   double **I; /* interior loops */
00320   double **M; /* multi loops */
00321   double **E; /* exterior loop */
00322   int length; /* length of the input sequence */
00323   int w;      /* longest unpaired region */
00324 } pu_contrib;
00325 
00326 typedef struct interact { /* contributions to prob_unpaired in */
00327   double *Pi; /* probabilities of interaction */
00328   double *Gi; /* free energies of interaction */
00329   double Gikjl; /* full free energy for interaction between [k,i] k<i
00330                    in longer seq and [j,l] j<l in shorter seq */
00331   double Gikjl_wo; /* Gikjl without contributions for prob_unpaired */
00332   int i; /* k<i in longer seq */
00333   int k; /* k<i in longer seq */
00334   int j; /*j<l in shorter seq */
00335   int l; /*j<l in shorter seq */
00336   int length; /* length of longer sequence */
00337 } interact;
00338 
00339 typedef struct pu_out { /* collect all free_energy of beeing unpaired
00340                            values for output */
00341   int len;        /* sequence length */
00342   int u_vals;     /* number of different -u values */
00343   int contribs;   /* [-c "SHIME"] */
00344   char **header;  /* header line */
00345   double **u_values; /* (differnet -u values * [-c "SHIME"]) * seq len */
00346 } pu_out;
00347 
00348 typedef struct constrain { /* constrains for cofolding */
00349   int *indx;
00350   char *ptype;
00351 } constrain;
00352 
00353 /*
00354 * ############################################################
00355 * RNAduplex data structures
00356 * ############################################################
00357 */
00358 
00359 typedef struct {
00360   int i;
00361   int j;
00362   int end;
00363   char *structure;
00364   double energy;
00365   double energy_backtrack;
00366   double opening_backtrack_x;
00367   double opening_backtrack_y;
00368   int offset;
00369   double dG1;
00370   double dG2;
00371   double ddG;
00372   int tb;
00373   int te;
00374   int qb;
00375   int qe;
00376 } duplexT;
00377 
00378 /*
00379 * ############################################################
00380 * RNAsnoop data structures
00381 * ############################################################
00382 */
00383 
00384 typedef struct node {
00385   int k;
00386   int energy;
00387   struct node *next;
00388 } folden;
00389 
00390 typedef struct {
00391   int i;
00392   int j;
00393   int u;
00394   char *structure;
00395   float energy;
00396   float Duplex_El;
00397   float Duplex_Er;
00398   float Loop_E;
00399   float Loop_D;
00400   float pscd;
00401   float psct;
00402   float pscg;
00403   float Duplex_Ol;
00404   float Duplex_Or;
00405   float Duplex_Ot;
00406 } snoopT;
00407 
00408 
00409 
00410 
00411 
00412 
00413 
00414 /*
00415 * ############################################################
00416 * PKplex data structures
00417 * ############################################################
00418 */
00419 
00420 typedef struct dupVar{
00421   int i;
00422   int j;
00423   int end;
00424   char *structure;
00425   double energy;
00426   int offset;
00427   double dG1;
00428   double dG2;
00429   double ddG;
00430   int tb;
00431   int te;
00432   int qb;
00433   int qe;
00434 } dupVar;
00435 
00436 
00437 
00438 /*
00439 * ############################################################
00440 * 2Dfold data structures
00441 * ############################################################
00442 */
00443 
00456 typedef struct{
00457   int k;
00458   int l;
00459   float en;
00460   char *s;
00461 } TwoDfold_solution;
00462 
00468 typedef struct{
00469   paramT          *P;
00470   int             do_backtrack;
00471   char            *ptype;   /* precomputed array of pair types */
00472   char            *sequence;
00473   short           *S, *S1;
00474   unsigned int    maxD1;
00475   unsigned int    maxD2;
00476 
00477 
00478   unsigned int    *mm1;         /* maximum matching matrix, reference struct 1 disallowed */
00479   unsigned int    *mm2;         /* maximum matching matrix, reference struct 2 disallowed */
00480 
00481   int             *my_iindx;    /* index for moving in quadratic distancy dimsensions */
00482 
00483   double          temperature;
00484 
00485   unsigned int    *referenceBPs1; /* matrix containing number of basepairs of reference structure1 in interval [i,j] */
00486   unsigned int    *referenceBPs2; /* matrix containing number of basepairs of reference structure2 in interval [i,j] */
00487   unsigned int    *bpdist;        /* matrix containing base pair distance of reference structure 1 and 2 on interval [i,j] */
00488 
00489   short           *reference_pt1;
00490   short           *reference_pt2;
00491   int             circ;
00492   int             dangles;
00493   unsigned int    seq_length;
00494 
00495   int             ***E_F5;
00496   int             ***E_F3;
00497   int             ***E_C;
00498   int             ***E_M;
00499   int             ***E_M1;
00500   int             ***E_M2;
00501 
00502   int             **E_Fc;
00503   int             **E_FcH;
00504   int             **E_FcI;
00505   int             **E_FcM;
00506 
00507   int             **l_min_values;
00508   int             **l_max_values;
00509   int             *k_min_values;
00510   int             *k_max_values;
00511 
00512   int             **l_min_values_m;
00513   int             **l_max_values_m;
00514   int             *k_min_values_m;
00515   int             *k_max_values_m;
00516 
00517   int             **l_min_values_m1;
00518   int             **l_max_values_m1;
00519   int             *k_min_values_m1;
00520   int             *k_max_values_m1;
00521 
00522   int             **l_min_values_f;
00523   int             **l_max_values_f;
00524   int             *k_min_values_f;
00525   int             *k_max_values_f;
00526 
00527   int             **l_min_values_f3;
00528   int             **l_max_values_f3;
00529   int             *k_min_values_f3;
00530   int             *k_max_values_f3;
00531 
00532   int             **l_min_values_m2;
00533   int             **l_max_values_m2;
00534   int             *k_min_values_m2;
00535   int             *k_max_values_m2;
00536 
00537   int             *l_min_values_fc;
00538   int             *l_max_values_fc;
00539   int             k_min_values_fc;
00540   int             k_max_values_fc;
00541 
00542   int             *l_min_values_fcH;
00543   int             *l_max_values_fcH;
00544   int             k_min_values_fcH;
00545   int             k_max_values_fcH;
00546 
00547   int             *l_min_values_fcI;
00548   int             *l_max_values_fcI;
00549   int             k_min_values_fcI;
00550   int             k_max_values_fcI;
00551 
00552   int             *l_min_values_fcM;
00553   int             *l_max_values_fcM;
00554   int             k_min_values_fcM;
00555   int             k_max_values_fcM;
00556 
00557   /* auxilary arrays for remaining set of coarse graining (k,l) > (k_max, l_max) */
00558   int             *E_F5_rem;
00559   int             *E_F3_rem;
00560   int             *E_C_rem;
00561   int             *E_M_rem;
00562   int             *E_M1_rem;
00563   int             *E_M2_rem;
00564 
00565   int             E_Fc_rem;
00566   int             E_FcH_rem;
00567   int             E_FcI_rem;
00568   int             E_FcM_rem;
00569 
00570 #ifdef COUNT_STATES
00571   unsigned long             ***N_F5;
00572   unsigned long             ***N_C;
00573   unsigned long             ***N_M;
00574   unsigned long             ***N_M1;
00575 #endif
00576 } TwoDfold_vars;
00577 
00588 typedef struct{
00589   int k;
00590   int l;
00591   FLT_OR_DBL  q;
00592 } TwoDpfold_solution;
00593 
00599 typedef struct{
00600 
00601   unsigned int    alloc;
00602   char            *ptype;   /* precomputed array of pair types */
00603   char            *sequence;
00604   short           *S, *S1;
00605   double          temperature;      /* temperature in last call to scale_pf_params */
00606   double          init_temp;      /* temperature in last call to scale_pf_params */
00607   unsigned int    maxD1;
00608   unsigned int    maxD2;
00609 
00610   FLT_OR_DBL  *scale;
00611   FLT_OR_DBL  pf_scale;
00612   pf_paramT   *pf_params;     /* holds all [unscaled] pf parameters */
00613 
00614   int             *my_iindx;         /* index for moving in quadratic distancy dimsensions */
00615   int             *jindx;         /* index for moving in the triangle matrix qm1 */
00616 
00617   unsigned int    *referenceBPs1;    /* matrix containing number of basepairs of reference structure1 in interval [i,j] */
00618   unsigned int    *referenceBPs2;    /* matrix containing number of basepairs of reference structure2 in interval [i,j] */
00619   short           *reference_pt1;
00620   short           *reference_pt2;
00621   unsigned int    *mm1;         /* maximum matching matrix, reference struct 1 disallowed */
00622   unsigned int    *mm2;         /* maximum matching matrix, reference struct 2 disallowed */
00623   unsigned int    *bpdist;      /* matrix containing base pair distance of reference structure 1 and 2 on interval [i,j] */
00624   int             circ;
00625   int             dangles;
00626   unsigned int    seq_length;
00627 
00628   FLT_OR_DBL      ***Q;
00629   FLT_OR_DBL      ***Q_B;
00630   FLT_OR_DBL      ***Q_M;
00631   FLT_OR_DBL      ***Q_M1;
00632   FLT_OR_DBL      ***Q_M2;
00633 
00634   FLT_OR_DBL      **Q_c;
00635   FLT_OR_DBL      **Q_cH;
00636   FLT_OR_DBL      **Q_cI;
00637   FLT_OR_DBL      **Q_cM;
00638 
00639   int             **l_min_values;
00640   int             **l_max_values;
00641   int             *k_min_values;
00642   int             *k_max_values;
00643 
00644   int             **l_min_values_b;
00645   int             **l_max_values_b;
00646   int             *k_min_values_b;
00647   int             *k_max_values_b;
00648 
00649   int             **l_min_values_m;
00650   int             **l_max_values_m;
00651   int             *k_min_values_m;
00652   int             *k_max_values_m;
00653 
00654   int             **l_min_values_m1;
00655   int             **l_max_values_m1;
00656   int             *k_min_values_m1;
00657   int             *k_max_values_m1;
00658 
00659   int             **l_min_values_m2;
00660   int             **l_max_values_m2;
00661   int             *k_min_values_m2;
00662   int             *k_max_values_m2;
00663 
00664   int             *l_min_values_qc;
00665   int             *l_max_values_qc;
00666   int             k_min_values_qc;
00667   int             k_max_values_qc;
00668 
00669   int             *l_min_values_qcH;
00670   int             *l_max_values_qcH;
00671   int             k_min_values_qcH;
00672   int             k_max_values_qcH;
00673 
00674   int             *l_min_values_qcI;
00675   int             *l_max_values_qcI;
00676   int             k_min_values_qcI;
00677   int             k_max_values_qcI;
00678 
00679   int             *l_min_values_qcM;
00680   int             *l_max_values_qcM;
00681   int             k_min_values_qcM;
00682   int             k_max_values_qcM;
00683 
00684   /* auxilary arrays for remaining set of coarse graining (k,l) > (k_max, l_max) */
00685   FLT_OR_DBL      *Q_rem;
00686   FLT_OR_DBL      *Q_B_rem;
00687   FLT_OR_DBL      *Q_M_rem;
00688   FLT_OR_DBL      *Q_M1_rem;
00689   FLT_OR_DBL      *Q_M2_rem;
00690 
00691   FLT_OR_DBL      Q_c_rem;
00692   FLT_OR_DBL      Q_cH_rem;
00693   FLT_OR_DBL      Q_cI_rem;
00694   FLT_OR_DBL      Q_cM_rem;
00695 
00696 } TwoDpfold_vars;
00697 
00698 #endif