RNAlib-2.0.2
H/data_structures.h
Go to the documentation of this file.
00001 #ifndef __VIENNA_RNA_PACKAGE_DATA_STRUCTURES_H__
00002 #define __VIENNA_RNA_PACKAGE_DATA_STRUCTURES_H__
00003 
00004 #include "energy_const.h"
00005 
00011 /* to use floats instead of doubles in pf_fold() comment next line */
00012 #define LARGE_PF
00013 #ifdef  LARGE_PF
00014 #define FLT_OR_DBL double
00015 #else
00016 #define FLT_OR_DBL float
00017 #endif
00018 
00019 #ifndef NBASES
00020 #define NBASES 8
00021 #endif
00022 
00023 #ifndef MAXALPHA
00024 
00027 #define MAXALPHA              20
00028 #endif
00029 
00033 #define MAXDOS                1000
00034 
00035 
00036 
00037 
00038 /*
00039 * ############################################################
00040 * Here are the type definitions of various datastructures
00041 * shared among the Vienna RNA Package
00042 * ############################################################
00043 */
00044 
00048 typedef struct plist {
00049   int i;
00050   int j;
00051   float p;
00052 } plist;
00053 
00057 typedef struct cpair {
00058   int i,j,mfe;
00059   float p, hue, sat;
00060 } cpair;
00061 
00066 typedef struct {
00067   float X; /* X coords */
00068   float Y; /* Y coords */
00069 } COORDINATE;
00070 
00074 typedef struct sect {
00075   int  i;
00076   int  j;
00077   int ml;
00078 } sect;
00079 
00083 typedef struct bondT {
00084    unsigned int i;
00085    unsigned int j;
00086 } bondT;
00087 
00091 typedef struct bondTEn {
00092    int i;
00093    int j;
00094    int energy;
00095 } bondTEn;
00096 
00100 typedef struct{
00101   int id;
00102   int stack[NBPAIRS+1][NBPAIRS+1];
00103   int hairpin[31];
00104   int bulge[MAXLOOP+1];
00105   int internal_loop[MAXLOOP+1];
00106   int mismatchExt[NBPAIRS+1][5][5];
00107   int mismatchI[NBPAIRS+1][5][5];
00108   int mismatch1nI[NBPAIRS+1][5][5];
00109   int mismatch23I[NBPAIRS+1][5][5];
00110   int mismatchH[NBPAIRS+1][5][5];
00111   int mismatchM[NBPAIRS+1][5][5];
00112   int dangle5[NBPAIRS+1][5];
00113   int dangle3[NBPAIRS+1][5];
00114   int int11[NBPAIRS+1][NBPAIRS+1][5][5];
00115   int int21[NBPAIRS+1][NBPAIRS+1][5][5][5];
00116   int int22[NBPAIRS+1][NBPAIRS+1][5][5][5][5];
00117   int ninio[5];
00118   double lxc;
00119   int MLbase;
00120   int MLintern[NBPAIRS+1];
00121   int MLclosing;
00122   int TerminalAU;
00123   int DuplexInit;
00124   int Tetraloop_E[200];
00125   char Tetraloops[1401];
00126   int Triloop_E[40];
00127   char Triloops[241];
00128   int Hexaloop_E[40];
00129   char Hexaloops[1801];
00130   int TripleC;
00131   int MultipleCA;
00132   int MultipleCB;
00133   double temperature;
00134 }  paramT;
00135 
00139 typedef struct{
00140   int     id;
00141   double  expstack[NBPAIRS+1][NBPAIRS+1];
00142   double  exphairpin[31];
00143   double  expbulge[MAXLOOP+1];
00144   double  expinternal[MAXLOOP+1];
00145   double  expmismatchExt[NBPAIRS+1][5][5];
00146   double  expmismatchI[NBPAIRS+1][5][5];
00147   double  expmismatch23I[NBPAIRS+1][5][5];
00148   double  expmismatch1nI[NBPAIRS+1][5][5];
00149   double  expmismatchH[NBPAIRS+1][5][5];
00150   double  expmismatchM[NBPAIRS+1][5][5];
00151   double  expdangle5[NBPAIRS+1][5];
00152   double  expdangle3[NBPAIRS+1][5];
00153   double  expint11[NBPAIRS+1][NBPAIRS+1][5][5];
00154   double  expint21[NBPAIRS+1][NBPAIRS+1][5][5][5];
00155   double  expint22[NBPAIRS+1][NBPAIRS+1][5][5][5][5];
00156   double  expninio[5][MAXLOOP+1];
00157   double  lxc;
00158   double  expMLbase;
00159   double  expMLintern[NBPAIRS+1];
00160   double  expMLclosing;
00161   double  expTermAU;
00162   double  expDuplexInit;
00163   double  exptetra[40];
00164   double  exptri[40];
00165   double  exphex[40];
00166   char    Tetraloops[1401];
00167   double  expTriloop[40];
00168   char    Triloops[241];
00169   char    Hexaloops[1801];
00170   double  expTripleC;
00171   double  expMultipleCA;
00172   double  expMultipleCB;
00173   double  pf_scale;
00174   double  temperature;
00175   double  alpha; /* used for scaling the thermodynamic temperature independently from the energy contributions */
00176   double  kT;
00177   int     dangles;
00178 }  pf_paramT;
00179 
00180 
00181 
00182 /*
00183 * ############################################################
00184 * SUBOPT data structures
00185 * ############################################################
00186 */
00187 
00188 
00192 typedef struct {
00193   int i;
00194   int j;
00195 } PAIR;
00196 
00200 typedef struct {
00201     int i;
00202     int j;
00203     int array_flag;
00204 } INTERVAL;
00205 
00209 typedef struct {
00210   float energy;                            /* energy of structure */
00211   char *structure;
00212 } SOLUTION;
00213 
00214 /*
00215 * ############################################################
00216 * COFOLD data structures
00217 * ############################################################
00218 */
00219 typedef struct cofoldF {
00220   /* free energies for: */
00221   double F0AB; /* null model without DuplexInit */
00222   double FAB;  /* all states with DuplexInit corretion */
00223   double FcAB; /* true hybrid states only */
00224   double FA;   /* monomer A */
00225   double FB;   /* monomer B */
00226 } cofoldF;
00227 
00228 typedef struct ConcEnt {
00229   double A0;    /*start concentration A*/
00230   double B0;    /*start concentration B*/
00231   double ABc;   /*End concentration AB*/
00232   double AAc;
00233   double BBc;
00234   double Ac;
00235   double Bc;
00236 } ConcEnt;
00237 
00238 typedef struct pairpro{
00239   struct plist *AB;
00240   struct plist *AA;
00241   struct plist *A;
00242   struct plist *B;
00243   struct plist *BB;
00244 }pairpro;
00245 
00255 typedef struct {
00256    unsigned i;        /* i,j in [0, n-1] */
00257    unsigned j;
00258    float p;      /* probability */
00259    float ent;    /* pseudo entropy for p(i,j) = S_i + S_j - p_ij*ln(p_ij) */
00260    short bp[8];  /* frequencies of pair_types */
00261    char comp;    /* 1 iff pair is in mfe structure */
00262 } pair_info;
00263 
00264 
00265 /*
00266 * ############################################################
00267 * FINDPATH data structures
00268 * ############################################################
00269 */
00270 
00271 typedef struct move {
00272   int i;  /* i,j>0 insert; i,j<0 delete */
00273   int j;
00274   int when;  /* 0 if still available, else resulting distance from start */
00275   int E;
00276 } move_t;
00277 
00278 typedef struct intermediate {
00279   short *pt;     /* pair table */
00280   int Sen;       /* saddle energy so far */
00281   int curr_en;   /* current energy */
00282   move_t *moves; /* remaining moves to target */
00283 } intermediate_t;
00284 
00285 typedef struct path {
00286   double en;
00287   char *s;
00288 } path_t;
00289 
00290 /*
00291 * ############################################################
00292 * RNAup data structures
00293 * ############################################################
00294 */
00295 typedef struct pu_contrib { /* contributions to prob_unpaired in */
00296   double **H; /* hairpin loops */
00297   double **I; /* interior loops */
00298   double **M; /* multi loops */
00299   double **E; /* exterior loop */
00300   int length; /* length of the input sequence */
00301   int w;      /* longest unpaired region */
00302 } pu_contrib;
00303 
00304 typedef struct interact { /* contributions to prob_unpaired in */
00305   double *Pi; /* probabilities of interaction */
00306   double *Gi; /* free energies of interaction */
00307   double Gikjl; /* full free energy for interaction between [k,i] k<i
00308                    in longer seq and [j,l] j<l in shorter seq */
00309   double Gikjl_wo; /* Gikjl without contributions for prob_unpaired */
00310   int i; /* k<i in longer seq */
00311   int k; /* k<i in longer seq */
00312   int j; /*j<l in shorter seq */
00313   int l; /*j<l in shorter seq */
00314   int length; /* length of longer sequence */
00315 } interact;
00316 
00317 typedef struct pu_out { /* collect all free_energy of beeing unpaired
00318                            values for output */
00319   int len;        /* sequence length */
00320   int u_vals;     /* number of different -u values */
00321   int contribs;   /* [-c "SHIME"] */
00322   char **header;  /* header line */
00323   double **u_values; /* (differnet -u values * [-c "SHIME"]) * seq len */
00324 } pu_out;
00325 
00326 typedef struct constrain { /* constrains for cofolding */
00327   int *indx;
00328   char *ptype;
00329 } constrain;
00330 
00331 /*
00332 * ############################################################
00333 * RNAduplex data structures
00334 * ############################################################
00335 */
00336 
00337 typedef struct {
00338   int i;
00339   int j;
00340   int end;
00341   char *structure;
00342   double energy;
00343   double energy_backtrack;
00344   double opening_backtrack_x;
00345   double opening_backtrack_y;
00346   int offset;
00347   double dG1;
00348   double dG2;
00349   double ddG;
00350   int tb;
00351   int te;
00352   int qb;
00353   int qe;
00354 } duplexT;
00355 
00356 /*
00357 * ############################################################
00358 * RNAsnoop data structures
00359 * ############################################################
00360 */
00361 
00362 typedef struct node {
00363   int k;
00364   int energy;
00365   struct node *next;
00366 } folden;
00367 
00368 typedef struct {
00369   int i;
00370   int j;
00371   int u;
00372   char *structure;
00373   float energy;
00374   float Duplex_El;
00375   float Duplex_Er;
00376   float Loop_E;
00377   float Loop_D;
00378   float pscd;
00379   float psct;
00380   float pscg;
00381   float Duplex_Ol;
00382   float Duplex_Or;
00383   float Duplex_Ot;
00384 } snoopT;
00385 
00386 
00387 
00388 
00389 
00390 
00391 
00392 /*
00393 * ############################################################
00394 * PKplex data structures
00395 * ############################################################
00396 */
00397 
00398 typedef struct dupVar{
00399   int i;
00400   int j;
00401   int end;
00402   char *structure;
00403   double energy;
00404   int offset;
00405   double dG1;
00406   double dG2;
00407   double ddG;
00408   int tb;
00409   int te;
00410   int qb;
00411   int qe;
00412 } dupVar;
00413 
00414 
00415 
00416 /*
00417 * ############################################################
00418 * 2Dfold data structures
00419 * ############################################################
00420 */
00421 
00434 typedef struct{
00435   int k;
00436   int l;
00437   float en;
00438   char *s;
00439 } TwoDfold_solution;
00440 
00446 typedef struct{
00447   paramT          *P;
00448   int             do_backtrack;
00449   char            *ptype;   /* precomputed array of pair types */
00450   char            *sequence;
00451   short           *S, *S1;
00452   unsigned int    maxD1;
00453   unsigned int    maxD2;
00454 
00455 
00456   unsigned int    *mm1;         /* maximum matching matrix, reference struct 1 disallowed */
00457   unsigned int    *mm2;         /* maximum matching matrix, reference struct 2 disallowed */
00458 
00459   int             *my_iindx;    /* index for moving in quadratic distancy dimsensions */
00460 
00461   double          temperature;
00462 
00463   unsigned int    *referenceBPs1; /* matrix containing number of basepairs of reference structure1 in interval [i,j] */
00464   unsigned int    *referenceBPs2; /* matrix containing number of basepairs of reference structure2 in interval [i,j] */
00465   unsigned int    *bpdist;        /* matrix containing base pair distance of reference structure 1 and 2 on interval [i,j] */
00466 
00467   short           *reference_pt1;
00468   short           *reference_pt2;
00469   int             circ;
00470   int             dangles;
00471   unsigned int    seq_length;
00472 
00473   int             ***E_F5;
00474   int             ***E_F3;
00475   int             ***E_C;
00476   int             ***E_M;
00477   int             ***E_M1;
00478   int             ***E_M2;
00479 
00480   int             **E_Fc;
00481   int             **E_FcH;
00482   int             **E_FcI;
00483   int             **E_FcM;
00484 
00485   int             **l_min_values;
00486   int             **l_max_values;
00487   int             *k_min_values;
00488   int             *k_max_values;
00489 
00490   int             **l_min_values_m;
00491   int             **l_max_values_m;
00492   int             *k_min_values_m;
00493   int             *k_max_values_m;
00494 
00495   int             **l_min_values_m1;
00496   int             **l_max_values_m1;
00497   int             *k_min_values_m1;
00498   int             *k_max_values_m1;
00499 
00500   int             **l_min_values_f;
00501   int             **l_max_values_f;
00502   int             *k_min_values_f;
00503   int             *k_max_values_f;
00504 
00505   int             **l_min_values_f3;
00506   int             **l_max_values_f3;
00507   int             *k_min_values_f3;
00508   int             *k_max_values_f3;
00509 
00510   int             **l_min_values_m2;
00511   int             **l_max_values_m2;
00512   int             *k_min_values_m2;
00513   int             *k_max_values_m2;
00514 
00515   int             *l_min_values_fc;
00516   int             *l_max_values_fc;
00517   int             k_min_values_fc;
00518   int             k_max_values_fc;
00519 
00520   int             *l_min_values_fcH;
00521   int             *l_max_values_fcH;
00522   int             k_min_values_fcH;
00523   int             k_max_values_fcH;
00524 
00525   int             *l_min_values_fcI;
00526   int             *l_max_values_fcI;
00527   int             k_min_values_fcI;
00528   int             k_max_values_fcI;
00529 
00530   int             *l_min_values_fcM;
00531   int             *l_max_values_fcM;
00532   int             k_min_values_fcM;
00533   int             k_max_values_fcM;
00534 
00535   /* auxilary arrays for remaining set of coarse graining (k,l) > (k_max, l_max) */
00536   int             *E_F5_rem;
00537   int             *E_F3_rem;
00538   int             *E_C_rem;
00539   int             *E_M_rem;
00540   int             *E_M1_rem;
00541   int             *E_M2_rem;
00542 
00543   int             E_Fc_rem;
00544   int             E_FcH_rem;
00545   int             E_FcI_rem;
00546   int             E_FcM_rem;
00547 
00548 #ifdef COUNT_STATES
00549   unsigned long             ***N_F5;
00550   unsigned long             ***N_C;
00551   unsigned long             ***N_M;
00552   unsigned long             ***N_M1;
00553 #endif
00554 } TwoDfold_vars;
00555 
00566 typedef struct{
00567   int k;
00568   int l;
00569   FLT_OR_DBL  q;
00570 } TwoDpfold_solution;
00571 
00577 typedef struct{
00578 
00579   unsigned int    alloc;
00580   char            *ptype;   /* precomputed array of pair types */
00581   char            *sequence;
00582   short           *S, *S1;
00583   double          temperature;      /* temperature in last call to scale_pf_params */
00584   double          init_temp;      /* temperature in last call to scale_pf_params */
00585   unsigned int    maxD1;
00586   unsigned int    maxD2;
00587 
00588   FLT_OR_DBL  *scale;
00589   FLT_OR_DBL  pf_scale;
00590   pf_paramT   *pf_params;     /* holds all [unscaled] pf parameters */
00591 
00592   int             *my_iindx;         /* index for moving in quadratic distancy dimsensions */
00593   int             *jindx;         /* index for moving in the triangle matrix qm1 */
00594 
00595   unsigned int    *referenceBPs1;    /* matrix containing number of basepairs of reference structure1 in interval [i,j] */
00596   unsigned int    *referenceBPs2;    /* matrix containing number of basepairs of reference structure2 in interval [i,j] */
00597   short           *reference_pt1;
00598   short           *reference_pt2;
00599   unsigned int    *mm1;         /* maximum matching matrix, reference struct 1 disallowed */
00600   unsigned int    *mm2;         /* maximum matching matrix, reference struct 2 disallowed */
00601   unsigned int    *bpdist;      /* matrix containing base pair distance of reference structure 1 and 2 on interval [i,j] */
00602   int             circ;
00603   int             dangles;
00604   unsigned int    seq_length;
00605 
00606   FLT_OR_DBL      ***Q;
00607   FLT_OR_DBL      ***Q_B;
00608   FLT_OR_DBL      ***Q_M;
00609   FLT_OR_DBL      ***Q_M1;
00610   FLT_OR_DBL      ***Q_M2;
00611 
00612   FLT_OR_DBL      **Q_c;
00613   FLT_OR_DBL      **Q_cH;
00614   FLT_OR_DBL      **Q_cI;
00615   FLT_OR_DBL      **Q_cM;
00616 
00617   int             **l_min_values;
00618   int             **l_max_values;
00619   int             *k_min_values;
00620   int             *k_max_values;
00621 
00622   int             **l_min_values_b;
00623   int             **l_max_values_b;
00624   int             *k_min_values_b;
00625   int             *k_max_values_b;
00626 
00627   int             **l_min_values_m;
00628   int             **l_max_values_m;
00629   int             *k_min_values_m;
00630   int             *k_max_values_m;
00631 
00632   int             **l_min_values_m1;
00633   int             **l_max_values_m1;
00634   int             *k_min_values_m1;
00635   int             *k_max_values_m1;
00636 
00637   int             **l_min_values_m2;
00638   int             **l_max_values_m2;
00639   int             *k_min_values_m2;
00640   int             *k_max_values_m2;
00641 
00642   int             *l_min_values_qc;
00643   int             *l_max_values_qc;
00644   int             k_min_values_qc;
00645   int             k_max_values_qc;
00646 
00647   int             *l_min_values_qcH;
00648   int             *l_max_values_qcH;
00649   int             k_min_values_qcH;
00650   int             k_max_values_qcH;
00651 
00652   int             *l_min_values_qcI;
00653   int             *l_max_values_qcI;
00654   int             k_min_values_qcI;
00655   int             k_max_values_qcI;
00656 
00657   int             *l_min_values_qcM;
00658   int             *l_max_values_qcM;
00659   int             k_min_values_qcM;
00660   int             k_max_values_qcM;
00661 
00662   /* auxilary arrays for remaining set of coarse graining (k,l) > (k_max, l_max) */
00663   FLT_OR_DBL      *Q_rem;
00664   FLT_OR_DBL      *Q_B_rem;
00665   FLT_OR_DBL      *Q_M_rem;
00666   FLT_OR_DBL      *Q_M1_rem;
00667   FLT_OR_DBL      *Q_M2_rem;
00668 
00669   FLT_OR_DBL      Q_c_rem;
00670   FLT_OR_DBL      Q_cH_rem;
00671   FLT_OR_DBL      Q_cI_rem;
00672   FLT_OR_DBL      Q_cM_rem;
00673 
00674 } TwoDpfold_vars;
00675 
00676 #endif