RNAlib-2.0.1
H/data_structures.h
Go to the documentation of this file.
00001 #ifndef __VIENNA_RNA_PACKAGE_DATA_STRUCTURES_H__
00002 #define __VIENNA_RNA_PACKAGE_DATA_STRUCTURES_H__
00003 
00004 #include "energy_const.h"
00005 
00011 /* to use floats instead of doubles in pf_fold() comment next line */
00012 #define LARGE_PF
00013 #ifdef  LARGE_PF
00014 #define FLT_OR_DBL double
00015 #else
00016 #define FLT_OR_DBL float
00017 #endif
00018 
00019 #ifndef NBASES
00020 #define NBASES 8
00021 #endif
00022 
00023 #ifndef MAXALPHA
00024 
00027 #define MAXALPHA              20
00028 #endif
00029 
00033 #define MAXDOS                1000
00034 
00035 #ifndef FILENAME_MAX_LENGTH
00036 
00042 #define FILENAME_MAX_LENGTH   80
00043 
00049 #define FILENAME_ID_LENGTH    42
00050 #endif
00051 
00052 
00053 
00054 /*
00055 * ############################################################
00056 * Here are the type definitions of various datastructures
00057 * shared among the Vienna RNA Package
00058 * ############################################################
00059 */
00060 
00064 typedef struct plist {
00065   int i;
00066   int j;
00067   float p;
00068 } plist;
00069 
00073 typedef struct cpair {
00074   int i,j,mfe;
00075   float p, hue, sat;
00076 } cpair;
00077 
00082 typedef struct {
00083   float X; /* X coords */
00084   float Y; /* Y coords */
00085 } COORDINATE;
00086 
00090 typedef struct sect {
00091   int  i;
00092   int  j;
00093   int ml;
00094 } sect;
00095 
00099 typedef struct bondT {
00100    unsigned int i;
00101    unsigned int j;
00102 } bondT;
00103 
00107 typedef struct bondTEn {
00108    int i;
00109    int j;
00110    int energy;
00111 } bondTEn;
00112 
00116 typedef struct{
00117   int id;
00118   int stack[NBPAIRS+1][NBPAIRS+1];
00119   int hairpin[31];
00120   int bulge[MAXLOOP+1];
00121   int internal_loop[MAXLOOP+1];
00122   int mismatchExt[NBPAIRS+1][5][5];
00123   int mismatchI[NBPAIRS+1][5][5];
00124   int mismatch1nI[NBPAIRS+1][5][5];
00125   int mismatch23I[NBPAIRS+1][5][5];
00126   int mismatchH[NBPAIRS+1][5][5];
00127   int mismatchM[NBPAIRS+1][5][5];
00128   int dangle5[NBPAIRS+1][5];
00129   int dangle3[NBPAIRS+1][5];
00130   int int11[NBPAIRS+1][NBPAIRS+1][5][5];
00131   int int21[NBPAIRS+1][NBPAIRS+1][5][5][5];
00132   int int22[NBPAIRS+1][NBPAIRS+1][5][5][5][5];
00133   int ninio[5];
00134   double lxc;
00135   int MLbase;
00136   int MLintern[NBPAIRS+1];
00137   int MLclosing;
00138   int TerminalAU;
00139   int DuplexInit;
00140   int Tetraloop_E[200];
00141   char Tetraloops[1401];
00142   int Triloop_E[40];
00143   char Triloops[241];
00144   int Hexaloop_E[40];
00145   char Hexaloops[1801];
00146   int TripleC;
00147   int MultipleCA;
00148   int MultipleCB;
00149   double temperature;
00150 }  paramT;
00151 
00155 typedef struct{
00156   int     id;
00157   double  expstack[NBPAIRS+1][NBPAIRS+1];
00158   double  exphairpin[31];
00159   double  expbulge[MAXLOOP+1];
00160   double  expinternal[MAXLOOP+1];
00161   double  expmismatchExt[NBPAIRS+1][5][5];
00162   double  expmismatchI[NBPAIRS+1][5][5];
00163   double  expmismatch23I[NBPAIRS+1][5][5];
00164   double  expmismatch1nI[NBPAIRS+1][5][5];
00165   double  expmismatchH[NBPAIRS+1][5][5];
00166   double  expmismatchM[NBPAIRS+1][5][5];
00167   double  expdangle5[NBPAIRS+1][5];
00168   double  expdangle3[NBPAIRS+1][5];
00169   double  expint11[NBPAIRS+1][NBPAIRS+1][5][5];
00170   double  expint21[NBPAIRS+1][NBPAIRS+1][5][5][5];
00171   double  expint22[NBPAIRS+1][NBPAIRS+1][5][5][5][5];
00172   double  expninio[5][MAXLOOP+1];
00173   double  lxc;
00174   double  expMLbase;
00175   double  expMLintern[NBPAIRS+1];
00176   double  expMLclosing;
00177   double  expTermAU;
00178   double  expDuplexInit;
00179   double  exptetra[40];
00180   double  exptri[40];
00181   double  exphex[40];
00182   char    Tetraloops[1401];
00183   double  expTriloop[40];
00184   char    Triloops[241];
00185   char    Hexaloops[1801];
00186   double  expTripleC;
00187   double  expMultipleCA;
00188   double  expMultipleCB;
00189   double  temperature;
00190   double  kT;
00191 }  pf_paramT;
00192 
00193 
00194 
00195 /*
00196 * ############################################################
00197 * SUBOPT data structures
00198 * ############################################################
00199 */
00200 
00201 
00205 typedef struct {
00206   int i;
00207   int j;
00208 } PAIR;
00209 
00213 typedef struct {
00214     int i;
00215     int j;
00216     int array_flag;
00217 } INTERVAL;
00218 
00222 typedef struct {
00223   float energy;                            /* energy of structure */
00224   char *structure;
00225 } SOLUTION;
00226 
00227 /*
00228 * ############################################################
00229 * COFOLD data structures
00230 * ############################################################
00231 */
00232 typedef struct cofoldF {
00233   /* free energies for: */
00234   double F0AB; /* null model without DuplexInit */
00235   double FAB;  /* all states with DuplexInit corretion */
00236   double FcAB; /* true hybrid states only */
00237   double FA;   /* monomer A */
00238   double FB;   /* monomer B */
00239 } cofoldF;
00240 
00241 typedef struct ConcEnt {
00242   double A0;    /*start concentration A*/
00243   double B0;    /*start concentration B*/
00244   double ABc;   /*End concentration AB*/
00245   double AAc;
00246   double BBc;
00247   double Ac;
00248   double Bc;
00249 } ConcEnt;
00250 
00251 typedef struct pairpro{
00252   struct plist *AB;
00253   struct plist *AA;
00254   struct plist *A;
00255   struct plist *B;
00256   struct plist *BB;
00257 }pairpro;
00258 
00268 typedef struct {
00269    unsigned i;        /* i,j in [0, n-1] */
00270    unsigned j;
00271    float p;      /* probability */
00272    float ent;    /* pseudo entropy for p(i,j) = S_i + S_j - p_ij*ln(p_ij) */
00273    short bp[8];  /* frequencies of pair_types */
00274    char comp;    /* 1 iff pair is in mfe structure */
00275 } pair_info;
00276 
00277 
00278 /*
00279 * ############################################################
00280 * FINDPATH data structures
00281 * ############################################################
00282 */
00283 
00284 typedef struct move {
00285   int i;  /* i,j>0 insert; i,j<0 delete */
00286   int j;
00287   int when;  /* 0 if still available, else resulting distance from start */
00288   int E;
00289 } move_t;
00290 
00291 typedef struct intermediate {
00292   short *pt;     /* pair table */
00293   int Sen;       /* saddle energy so far */
00294   int curr_en;   /* current energy */
00295   move_t *moves; /* remaining moves to target */
00296 } intermediate_t;
00297 
00298 typedef struct path {
00299   double en;
00300   char *s;
00301 } path_t;
00302 
00303 /*
00304 * ############################################################
00305 * RNAup data structures
00306 * ############################################################
00307 */
00308 typedef struct pu_contrib { /* contributions to prob_unpaired in */
00309   double **H; /* hairpin loops */
00310   double **I; /* interior loops */
00311   double **M; /* multi loops */
00312   double **E; /* exterior loop */
00313   int length; /* length of the input sequence */
00314   int w;      /* longest unpaired region */
00315 } pu_contrib;
00316 
00317 typedef struct interact { /* contributions to prob_unpaired in */
00318   double *Pi; /* probabilities of interaction */
00319   double *Gi; /* free energies of interaction */
00320   double Gikjl; /* full free energy for interaction between [k,i] k<i
00321                    in longer seq and [j,l] j<l in shorter seq */
00322   double Gikjl_wo; /* Gikjl without contributions for prob_unpaired */
00323   int i; /* k<i in longer seq */
00324   int k; /* k<i in longer seq */
00325   int j; /*j<l in shorter seq */
00326   int l; /*j<l in shorter seq */
00327   int length; /* length of longer sequence */
00328 } interact;
00329 
00330 typedef struct pu_out { /* collect all free_energy of beeing unpaired
00331                            values for output */
00332   int len;        /* sequence length */
00333   int u_vals;     /* number of different -u values */
00334   int contribs;   /* [-c "SHIME"] */
00335   char **header;  /* header line */
00336   double **u_values; /* (differnet -u values * [-c "SHIME"]) * seq len */
00337 } pu_out;
00338 
00339 typedef struct constrain { /* constrains for cofolding */
00340   int *indx;
00341   char *ptype;
00342 } constrain;
00343 
00344 /*
00345 * ############################################################
00346 * RNAduplex data structures
00347 * ############################################################
00348 */
00349 
00350 typedef struct {
00351   int i;
00352   int j;
00353   int end;
00354   char *structure;
00355   double energy;
00356   double energy_backtrack;
00357   double opening_backtrack_x;
00358   double opening_backtrack_y;
00359   int offset;
00360   double dG1;
00361   double dG2;
00362   double ddG;
00363   int tb;
00364   int te;
00365   int qb;
00366   int qe;
00367 } duplexT;
00368 
00369 /*
00370 * ############################################################
00371 * RNAsnoop data structures
00372 * ############################################################
00373 */
00374 
00375 typedef struct node {
00376   int k;
00377   int energy;
00378   struct node *next;
00379 } folden;
00380 
00381 typedef struct {
00382   int i;
00383   int j;
00384   int u;
00385   char *structure;
00386   float energy;
00387   float Duplex_El;
00388   float Duplex_Er;
00389   float Loop_E;
00390   float Loop_D;
00391   float pscd;
00392   float psct;
00393   float pscg;
00394   float Duplex_Ol;
00395   float Duplex_Or;
00396   float Duplex_Ot;
00397 } snoopT;
00398 
00399 
00400 
00401 
00402 
00403 
00404 
00405 /*
00406 * ############################################################
00407 * PKplex data structures
00408 * ############################################################
00409 */
00410 
00411 typedef struct dupVar{
00412   int i;
00413   int j;
00414   int end;
00415   char *structure;
00416   double energy;
00417   int offset;
00418   double dG1;
00419   double dG2;
00420   double ddG;
00421   int tb;
00422   int te;
00423   int qb;
00424   int qe;
00425 } dupVar;
00426 
00427 
00428 
00429 /*
00430 * ############################################################
00431 * 2Dfold data structures
00432 * ############################################################
00433 */
00434 
00447 typedef struct{
00448   int k;
00449   int l;
00450   float en;
00451   char *s;
00452 } TwoDfold_solution;
00453 
00459 typedef struct{
00460   paramT          *P;
00461   int             do_backtrack;
00462   char            *ptype;   /* precomputed array of pair types */
00463   char            *sequence;
00464   short           *S, *S1;
00465   unsigned int    maxD1;
00466   unsigned int    maxD2;
00467 
00468 
00469   unsigned int    *mm1;         /* maximum matching matrix, reference struct 1 disallowed */
00470   unsigned int    *mm2;         /* maximum matching matrix, reference struct 2 disallowed */
00471 
00472   int             *my_iindx;    /* index for moving in quadratic distancy dimsensions */
00473 
00474   double          temperature;
00475 
00476   unsigned int    *referenceBPs1; /* matrix containing number of basepairs of reference structure1 in interval [i,j] */
00477   unsigned int    *referenceBPs2; /* matrix containing number of basepairs of reference structure2 in interval [i,j] */
00478   unsigned int    *bpdist;        /* matrix containing base pair distance of reference structure 1 and 2 on interval [i,j] */
00479 
00480   short           *reference_pt1;
00481   short           *reference_pt2;
00482   int             circ;
00483   int             dangles;
00484   unsigned int    seq_length;
00485 
00486   int             ***E_F5;
00487   int             ***E_F3;
00488   int             ***E_C;
00489   int             ***E_M;
00490   int             ***E_M1;
00491   int             ***E_M2;
00492 
00493   int             **E_Fc;
00494   int             **E_FcH;
00495   int             **E_FcI;
00496   int             **E_FcM;
00497 
00498   int             **l_min_values;
00499   int             **l_max_values;
00500   int             *k_min_values;
00501   int             *k_max_values;
00502 
00503   int             **l_min_values_m;
00504   int             **l_max_values_m;
00505   int             *k_min_values_m;
00506   int             *k_max_values_m;
00507 
00508   int             **l_min_values_m1;
00509   int             **l_max_values_m1;
00510   int             *k_min_values_m1;
00511   int             *k_max_values_m1;
00512 
00513   int             **l_min_values_f;
00514   int             **l_max_values_f;
00515   int             *k_min_values_f;
00516   int             *k_max_values_f;
00517 
00518   int             **l_min_values_f3;
00519   int             **l_max_values_f3;
00520   int             *k_min_values_f3;
00521   int             *k_max_values_f3;
00522 
00523   int             **l_min_values_m2;
00524   int             **l_max_values_m2;
00525   int             *k_min_values_m2;
00526   int             *k_max_values_m2;
00527 
00528   int             *l_min_values_fc;
00529   int             *l_max_values_fc;
00530   int             k_min_values_fc;
00531   int             k_max_values_fc;
00532 
00533   int             *l_min_values_fcH;
00534   int             *l_max_values_fcH;
00535   int             k_min_values_fcH;
00536   int             k_max_values_fcH;
00537 
00538   int             *l_min_values_fcI;
00539   int             *l_max_values_fcI;
00540   int             k_min_values_fcI;
00541   int             k_max_values_fcI;
00542 
00543   int             *l_min_values_fcM;
00544   int             *l_max_values_fcM;
00545   int             k_min_values_fcM;
00546   int             k_max_values_fcM;
00547 
00548   /* auxilary arrays for remaining set of coarse graining (k,l) > (k_max, l_max) */
00549   int             *E_F5_rem;
00550   int             *E_F3_rem;
00551   int             *E_C_rem;
00552   int             *E_M_rem;
00553   int             *E_M1_rem;
00554   int             *E_M2_rem;
00555 
00556   int             E_Fc_rem;
00557   int             E_FcH_rem;
00558   int             E_FcI_rem;
00559   int             E_FcM_rem;
00560 
00561 #ifdef COUNT_STATES
00562   unsigned long             ***N_F5;
00563   unsigned long             ***N_C;
00564   unsigned long             ***N_M;
00565   unsigned long             ***N_M1;
00566 #endif
00567 } TwoDfold_vars;
00568 
00579 typedef struct{
00580   int k;
00581   int l;
00582   FLT_OR_DBL  q;
00583 } TwoDpfold_solution;
00584 
00590 typedef struct{
00591 
00592   unsigned int    alloc;
00593   char            *ptype;   /* precomputed array of pair types */
00594   char            *sequence;
00595   short           *S, *S1;
00596   double          temperature;      /* temperature in last call to scale_pf_params */
00597   double          init_temp;      /* temperature in last call to scale_pf_params */
00598   unsigned int    maxD1;
00599   unsigned int    maxD2;
00600 
00601   FLT_OR_DBL  *scale;
00602   FLT_OR_DBL  pf_scale;
00603   pf_paramT   *pf_params;     /* holds all [unscaled] pf parameters */
00604 
00605   int             *my_iindx;         /* index for moving in quadratic distancy dimsensions */
00606   int             *jindx;         /* index for moving in the triangle matrix qm1 */
00607 
00608   unsigned int    *referenceBPs1;    /* matrix containing number of basepairs of reference structure1 in interval [i,j] */
00609   unsigned int    *referenceBPs2;    /* matrix containing number of basepairs of reference structure2 in interval [i,j] */
00610   short           *reference_pt1;
00611   short           *reference_pt2;
00612   unsigned int    *mm1;         /* maximum matching matrix, reference struct 1 disallowed */
00613   unsigned int    *mm2;         /* maximum matching matrix, reference struct 2 disallowed */
00614   unsigned int    *bpdist;      /* matrix containing base pair distance of reference structure 1 and 2 on interval [i,j] */
00615   int             circ;
00616   int             dangles;
00617   unsigned int    seq_length;
00618 
00619   FLT_OR_DBL      ***Q;
00620   FLT_OR_DBL      ***Q_B;
00621   FLT_OR_DBL      ***Q_M;
00622   FLT_OR_DBL      ***Q_M1;
00623   FLT_OR_DBL      ***Q_M2;
00624 
00625   FLT_OR_DBL      **Q_c;
00626   FLT_OR_DBL      **Q_cH;
00627   FLT_OR_DBL      **Q_cI;
00628   FLT_OR_DBL      **Q_cM;
00629 
00630   int             **l_min_values;
00631   int             **l_max_values;
00632   int             *k_min_values;
00633   int             *k_max_values;
00634 
00635   int             **l_min_values_b;
00636   int             **l_max_values_b;
00637   int             *k_min_values_b;
00638   int             *k_max_values_b;
00639 
00640   int             **l_min_values_m;
00641   int             **l_max_values_m;
00642   int             *k_min_values_m;
00643   int             *k_max_values_m;
00644 
00645   int             **l_min_values_m1;
00646   int             **l_max_values_m1;
00647   int             *k_min_values_m1;
00648   int             *k_max_values_m1;
00649 
00650   int             **l_min_values_m2;
00651   int             **l_max_values_m2;
00652   int             *k_min_values_m2;
00653   int             *k_max_values_m2;
00654 
00655   int             *l_min_values_qc;
00656   int             *l_max_values_qc;
00657   int             k_min_values_qc;
00658   int             k_max_values_qc;
00659 
00660   int             *l_min_values_qcH;
00661   int             *l_max_values_qcH;
00662   int             k_min_values_qcH;
00663   int             k_max_values_qcH;
00664 
00665   int             *l_min_values_qcI;
00666   int             *l_max_values_qcI;
00667   int             k_min_values_qcI;
00668   int             k_max_values_qcI;
00669 
00670   int             *l_min_values_qcM;
00671   int             *l_max_values_qcM;
00672   int             k_min_values_qcM;
00673   int             k_max_values_qcM;
00674 
00675   /* auxilary arrays for remaining set of coarse graining (k,l) > (k_max, l_max) */
00676   FLT_OR_DBL      *Q_rem;
00677   FLT_OR_DBL      *Q_B_rem;
00678   FLT_OR_DBL      *Q_M_rem;
00679   FLT_OR_DBL      *Q_M1_rem;
00680   FLT_OR_DBL      *Q_M2_rem;
00681 
00682   FLT_OR_DBL      Q_c_rem;
00683   FLT_OR_DBL      Q_cH_rem;
00684   FLT_OR_DBL      Q_cI_rem;
00685   FLT_OR_DBL      Q_cM_rem;
00686 
00687 } TwoDpfold_vars;
00688 
00689 #endif