RNAlib-2.0.4
|
00001 00785 { 0, 2, 2, 2, 2, 1, 1}, /* Null replaced */ 00786 { 2, 0, 2, 2, 2, INF, INF}, /* H replaced */ 00787 { 2, 2, 0, 1, 2, INF, INF}, /* B replaced */ 00788 { 2, 2, 1, 0, 2, INF, INF}, /* I replaced */ 00789 { 2, 2, 2, 2, 0, INF, INF}, /* M replaced */ 00790 { 1, INF, INF, INF, INF, 0, INF}, /* S replaced */ 00791 { 1, INF, INF, INF, INF, INF, 0}, /* E replaced */ 00792 00793 00794 /* Null, H, B, I, M, S, E */ 00795 { 0, 100, 5, 5, 75, 5, 5}, /* Null replaced */ 00796 { 100, 0, 8, 8, 8, INF, INF}, /* H replaced */ 00797 { 5, 8, 0, 3, 8, INF, INF}, /* B replaced */ 00798 { 5, 8, 3, 0, 8, INF, INF}, /* I replaced */ 00799 { 75, 8, 8, 8, 0, INF, INF}, /* M replaced */ 00800 { 5, INF, INF, INF, INF, 0, INF}, /* S replaced */ 00801 { 5, INF, INF, INF, INF, INF, 0}, /* E replaced */ 00802 \endverbatim 00803 00804 The lower matrix uses the costs given in \ref shapiro_90 "Shapiro (1990)". 00805 All distance functions use the following global variables: 00806 00807 \verbatim 00808 int cost_matrix; 00809 \endverbatim 00810 \copybrief cost_matrix 00811 00812 \verbatim 00813 int edit_backtrack; 00814 \endverbatim 00815 \copybrief edit_backtrack 00816 00817 \verbatim 00818 char *aligned_line[4]; 00819 \endverbatim 00820 \copybrief aligned_line 00821 00822 \see utils.h, dist_vars.h and stringdist.h for more details 00823 00824 <h3>Functions for Tree Edit Distances</h3> 00825 00826 \verbatim 00827 Tree *make_tree (char *struc) 00828 \endverbatim 00829 \copybrief make_tree() 00830 00831 \verbatim 00832 float tree_edit_distance (Tree *T1, 00833 Tree *T2) 00834 \endverbatim 00835 \copybrief tree_edit_distance() 00836 00837 \verbatim 00838 void free_tree(Tree *t) 00839 \endverbatim 00840 \copybrief free_tree() 00841 00842 \see dist_vars.h and treedist.h for prototypes and more detailed descriptions 00843 00844 <h3>Functions for String Alignment</h3> 00845 00846 \verbatim 00847 swString *Make_swString (char *string) 00848 \endverbatim 00849 \copybrief Make_swString() 00850 00851 \verbatim 00852 float string_edit_distance (swString *T1, 00853 swString *T2) 00854 \endverbatim 00855 \copybrief string_edit_distance() 00856 00857 \see dist_vars.h and stringdist.h for prototypes and more detailed descriptions 00858 00859 <h3>Functions for Comparison of Base Pair Probabilities</h3> 00860 00861 For comparison of base pair probability matrices, the matrices are first 00862 condensed into probability profiles which are the compared by alignment. 00863 00864 \verbatim 00865 float *Make_bp_profile_bppm ( double *bppm, 00866 int length) 00867 \endverbatim 00868 \copybrief Make_bp_profile_bppm() 00869 00870 \verbatim 00871 float profile_edit_distance ( const float *T1, 00872 const float *T2) 00873 \endverbatim 00874 \copybrief profile_edit_distance() 00875 00876 \see ProfileDist.h for prototypes and more details of the above functions 00877 00878 \ref mp_utils "Next Page: Utilities" 00879 00880 \page mp_utils Utilities - Odds and Ends 00881 00882 \anchor toc 00883 00884 <h3>Table of Contents</h3> 00885 <hr> 00886 00887 \li \ref utils_ss 00888 \li \ref utils_dot 00889 \li \ref utils_aln 00890 \li \ref utils_seq 00891 \li \ref utils_struc 00892 \li \ref utils_misc 00893 00894 <hr> 00895 00896 \section utils_ss Producing secondary structure graphs 00897 00898 \verbatim 00899 int PS_rna_plot ( char *string, 00900 char *structure, 00901 char *file) 00902 \endverbatim 00903 \copybrief PS_rna_plot() 00904 00905 \verbatim 00906 int PS_rna_plot_a ( 00907 char *string, 00908 char *structure, 00909 char *file, 00910 char *pre, 00911 char *post) 00912 \endverbatim 00913 \copybrief PS_rna_plot_a() 00914 00915 \verbatim 00916 int gmlRNA (char *string, 00917 char *structure, 00918 char *ssfile, 00919 char option) 00920 \endverbatim 00921 \copybrief gmlRNA() 00922 00923 \verbatim 00924 int ssv_rna_plot (char *string, 00925 char *structure, 00926 char *ssfile) 00927 \endverbatim 00928 \copybrief ssv_rna_plot() 00929 00930 \verbatim 00931 int svg_rna_plot (char *string, 00932 char *structure, 00933 char *ssfile) 00934 \endverbatim 00935 \copybrief svg_rna_plot() 00936 00937 \verbatim 00938 int xrna_plot ( char *string, 00939 char *structure, 00940 char *ssfile) 00941 \endverbatim 00942 \copybrief xrna_plot() 00943 00944 \verbatim 00945 int rna_plot_type 00946 \endverbatim 00947 \copybrief rna_plot_type 00948 00949 Two low-level functions provide direct access to the graph lauyouting 00950 algorithms: 00951 00952 \verbatim 00953 int simple_xy_coordinates ( short *pair_table, 00954 float *X, 00955 float *Y) 00956 \endverbatim 00957 \copybrief simple_xy_coordinates() 00958 00959 \verbatim 00960 int naview_xy_coordinates ( short *pair_table, 00961 float *X, 00962 float *Y) 00963 \endverbatim 00964 \copybrief naview_xy_coordinates() 00965 00966 \see PS_dot.h and naview.h for more detailed descriptions. 00967 00968 \htmlonly 00969 <hr> 00970 <a href="#toc">Table of Contents</a> 00971 <hr> 00972 \endhtmlonly 00973 00974 \section utils_dot Producing (colored) dot plots for base pair probabilities 00975 00976 \verbatim 00977 int PS_color_dot_plot ( char *string, 00978 cpair *pi, 00979 char *filename) 00980 \endverbatim 00981 \copybrief PS_color_dot_plot() 00982 00983 \verbatim 00984 int PS_color_dot_plot_turn (char *seq, 00985 cpair *pi, 00986 char *filename, 00987 int winSize) 00988 \endverbatim 00989 \copybrief PS_color_dot_plot_turn() 00990 00991 \verbatim 00992 int PS_dot_plot_list (char *seq, 00993 char *filename, 00994 plist *pl, 00995 plist *mf, 00996 char *comment) 00997 \endverbatim 00998 \copybrief PS_dot_plot_list() 00999 01000 \verbatim 01001 int PS_dot_plot_turn (char *seq, 01002 struct plist *pl, 01003 char *filename, 01004 int winSize) 01005 \endverbatim 01006 \copybrief PS_dot_plot_turn() 01007 01008 \see PS_dot.h for more detailed descriptions. 01009 01010 \section utils_aln Producing (colored) alignments 01011 01012 \verbatim 01013 int PS_color_aln ( 01014 const char *structure, 01015 const char *filename, 01016 const char *seqs[], 01017 const char *names[]) 01018 \endverbatim 01019 \copybrief PS_color_aln() 01020 01021 \htmlonly 01022 <hr> 01023 <a href="#toc">Table of Contents</a> 01024 <hr> 01025 \endhtmlonly 01026 01027 \section utils_seq RNA sequence related utilities 01028 01029 Several functions provide useful applications to RNA sequences 01030 01031 \verbatim 01032 char *random_string (int l, 01033 const char symbols[]) 01034 \endverbatim 01035 \copybrief random_string() 01036 01037 \verbatim 01038 int hamming ( const char *s1, 01039 const char *s2) 01040 \endverbatim 01041 \copybrief hamming() 01042 01043 \verbatim 01044 void str_DNA2RNA(char *sequence); 01045 \endverbatim 01046 \copybrief str_DNA2RNA() 01047 01048 \verbatim 01049 void str_uppercase(char *sequence); 01050 \endverbatim 01051 \copybrief str_uppercase() 01052 01053 \htmlonly 01054 <hr> 01055 <a href="#toc">Table of Contents</a> 01056 <hr> 01057 \endhtmlonly 01058 01059 \section utils_struc RNA secondary structure related utilities 01060 01061 \verbatim 01062 char *pack_structure (const char *struc) 01063 \endverbatim 01064 \copybrief pack_structure() 01065 01066 \verbatim 01067 char *unpack_structure (const char *packed) 01068 \endverbatim 01069 \copybrief unpack_structure() 01070 01071 \verbatim 01072 short *make_pair_table (const char *structure) 01073 \endverbatim 01074 \copybrief make_pair_table() 01075 01076 \verbatim 01077 short *copy_pair_table (const short *pt) 01078 \endverbatim 01079 \copybrief copy_pair_table() 01080 01081 \htmlonly 01082 <hr> 01083 <a href="#toc">Table of Contents</a> 01084 <hr> 01085 \endhtmlonly 01086 01087 \section utils_misc Miscellaneous Utilities 01088 01089 \verbatim 01090 void print_tty_input_seq (void) 01091 \endverbatim 01092 \copybrief print_tty_input_seq() 01093 01094 \verbatim 01095 void print_tty_constraint_full (void) 01096 \endverbatim 01097 \copybrief print_tty_constraint_full() 01098 01099 \verbatim 01100 void print_tty_constraint (unsigned int option) 01101 \endverbatim 01102 \copybrief print_tty_constraint() 01103 01104 \verbatim 01105 int *get_iindx (unsigned int length) 01106 \endverbatim 01107 \copybrief get_iindx() 01108 01109 \verbatim 01110 int *get_indx (unsigned int length) 01111 \endverbatim 01112 \copybrief get_indx() 01113 01114 \verbatim 01115 void constrain_ptypes ( 01116 const char *constraint, 01117 unsigned int length, 01118 char *ptype, 01119 int *BP, 01120 int min_loop_size, 01121 unsigned int idx_type) 01122 \endverbatim 01123 \copybrief constrain_ptypes() 01124 01125 \verbatim 01126 char *get_line(FILE *fp); 01127 \endverbatim 01128 \copybrief get_line() 01129 01130 \verbatim 01131 unsigned int read_record( 01132 char **header, 01133 char **sequence, 01134 char ***rest, 01135 unsigned int options); 01136 \endverbatim 01137 \copybrief read_record() 01138 01139 \verbatim 01140 char *time_stamp (void) 01141 \endverbatim 01142 \copybrief time_stamp() 01143 01144 \verbatim 01145 void warn_user (const char message[]) 01146 \endverbatim 01147 \copybrief warn_user() 01148 01149 \verbatim 01150 void nrerror (const char message[]) 01151 \endverbatim 01152 \copybrief nrerror() 01153 01154 \verbatim 01155 void init_rand (void) 01156 \endverbatim 01157 \copybrief init_rand() 01158 01159 \verbatim 01160 unsigned short xsubi[3]; 01161 \endverbatim 01162 \copybrief xsubi 01163 01164 \verbatim 01165 double urn (void) 01166 \endverbatim 01167 \copybrief urn() 01168 01169 \verbatim 01170 int int_urn (int from, int to) 01171 \endverbatim 01172 \copybrief int_urn() 01173 01174 \verbatim 01175 void *space (unsigned size) 01176 \endverbatim 01177 \copybrief space() 01178 01179 \verbatim 01180 void *xrealloc ( void *p, 01181 unsigned size) 01182 \endverbatim 01183 \copybrief xrealloc() 01184 01185 \see utils.h for a complete overview and detailed description of the utility functions 01186 01187 \htmlonly 01188 <hr> 01189 <a href="#toc">Table of Contents</a> 01190 <hr> 01191 \endhtmlonly 01192 01193 \ref mp_example "Next Page: Examples" 01194 01195 \page mp_example Example - A Small Example Program 01196 01197 The following program exercises most commonly used functions of the library. 01198 The program folds two sequences using both the mfe and partition function 01199 algorithms and calculates the tree edit and profile distance of the 01200 resulting structures and base pairing probabilities. 01201 01202 \verbatim 01203 #include <stdio.h> 01204 #include <math.h> 01205 #include "utils.h" 01206 #include "fold_vars.h" 01207 #include "fold.h" 01208 #include "part_func.h" 01209 #include "inverse.h" 01210 #include "RNAstruct.h" 01211 #include "treedist.h" 01212 #include "stringdist.h" 01213 #include "ProfileDist.h" 01214 01215 void main() 01216 { 01217 char *seq1="CGCAGGGAUACCCGCG", *seq2="GCGCCCAUAGGGACGC", 01218 *struct1,* struct2,* xstruc; 01219 float e1, e2, tree_dist, string_dist, profile_dist, kT; 01220 Tree *T1, *T2; 01221 swString *S1, *S2; 01222 float **pf1, **pf2; 01223 FLT_OR_DBL *bppm; 01224 /* fold at 30C instead of the default 37C */ 01225 temperature = 30.; /* must be set *before* initializing */ 01226 01227 /* allocate memory for structure and fold */ 01228 struct1 = (char* ) space(sizeof(char)*(strlen(seq1)+1)); 01229 e1 = fold(seq1, struct1); 01230 01231 struct2 = (char* ) space(sizeof(char)*(strlen(seq2)+1)); 01232 e2 = fold(seq2, struct2); 01233 01234 free_arrays(); /* free arrays used in fold() */ 01235 01236 /* produce tree and string representations for comparison */ 01237 xstruc = expand_Full(struct1); 01238 T1 = make_tree(xstruc); 01239 S1 = Make_swString(xstruc); 01240 free(xstruc); 01241 01242 xstruc = expand_Full(struct2); 01243 T2 = make_tree(xstruc); 01244 S2 = Make_swString(xstruc); 01245 free(xstruc); 01246 01247 /* calculate tree edit distance and aligned structures with gaps */ 01248 edit_backtrack = 1; 01249 tree_dist = tree_edit_distance(T1, T2); 01250 free_tree(T1); free_tree(T2); 01251 unexpand_aligned_F(aligned_line); 01252 printf("%s\n%s %3.2f\n", aligned_line[0], aligned_line[1], tree_dist); 01253 01254 /* same thing using string edit (alignment) distance */ 01255 string_dist = string_edit_distance(S1, S2); 01256 free(S1); free(S2); 01257 printf("%s mfe=%5.2f\n%s mfe=%5.2f dist=%3.2f\n", 01258 aligned_line[0], e1, aligned_line[1], e2, string_dist); 01259 01260 /* for longer sequences one should also set a scaling factor for 01261 partition function folding, e.g: */ 01262 kT = (temperature+273.15)*1.98717/1000.; /* kT in kcal/mol */ 01263 pf_scale = exp(-e1/kT/strlen(seq1)); 01264 01265 /* calculate partition function and base pair probabilities */ 01266 e1 = pf_fold(seq1, struct1); 01267 /* get the base pair probability matrix for the previous run of pf_fold() */ 01268 bppm = export_bppm(); 01269 pf1 = Make_bp_profile_bppm(bppm, strlen(seq1)); 01270 01271 e2 = pf_fold(seq2, struct2); 01272 /* get the base pair probability matrix for the previous run of pf_fold() */ 01273 bppm = export_bppm(); 01274 pf2 = Make_bp_profile(strlen(seq2)); 01275 01276 free_pf_arrays(); /* free space allocated for pf_fold() */ 01277 01278 profile_dist = profile_edit_distance(pf1, pf2); 01279 printf("%s free energy=%5.2f\n%s free energy=%5.2f dist=%3.2f\n", 01280 aligned_line[0], e1, aligned_line[1], e2, profile_dist); 01281 01282 free_profile(pf1); free_profile(pf2); 01283 } 01284 \endverbatim 01285 01286 In a typical Unix environment you would compile this program using: 01287 \verbatim 01288 cc ${OPENMP_CFLAGS} -c example.c -I${hpath} 01289 \endverbatim 01290 and link using 01291 \verbatim 01292 cc ${OPENMP_CFLAGS} -o example -L${lpath} -lRNA -lm 01293 \endverbatim 01294 where \e ${hpath} and \e ${lpath} point to the location of the header 01295 files and library, respectively. 01296 \note As default, the RNAlib is compiled with build-in \e OpenMP multithreading 01297 support. Thus, when linking your own object files to the library you have to pass 01298 the compiler specific \e ${OPENMP_CFLAGS} (e.g. '-fopenmp' for \b gcc) even if your code does not 01299 use openmp specific code. However, in that case the \e OpenMP flags may be ommited when compiling 01300 example.c 01301 01302 \ref mp_ref "Next Page: References" 01303 01304 \page mp_ref References 01305 01306 -# \anchor mathews_04 D.H. Mathews, M. D. Disney, J.L. Childs, S.J. Schroeder, M. Zuker, D.H. Turner (2004)\n 01307 Incorporating chemical modification constraints into a dynamic programming algorithm for prediction of 01308 RNA secondary structure, Proc Natl Acad Sci U S A, 101(19):7287-92 01309 -# \anchor mathews_99 D.H. Mathews, J. Sabina, M. Zuker and H. Turner (1999)\n 01310 Expanded sequence dependence of thermodynamic parameters provides 01311 robust prediction of RNA secondary structure, JMB, 288: 911-940 01312 -# \anchor zuker_81 Zuker and P. Stiegler (1981)\n 01313 Optimal computer folding of large RNA sequences using 01314 thermodynamic and auxiliary information, Nucl Acid Res 9: 133-148 01315 -# \anchor dimitrov_04 D.A. Dimitrov, M.Zuker(2004)\n 01316 Prediction of hybridization and melting for double stranded nucleic 01317 acids, Biophysical J. 87: 215-226, 01318 -# \anchor mccaskill_90 J.S. McCaskill (1990)\n 01319 The equilibrium partition function and base pair binding 01320 probabilities for RNA secondary structures, Biopolymers 29: 1105-1119 01321 -# \anchor turner_88 D.H. Turner, N. Sugimoto and S.M. Freier (1988)\n 01322 RNA structure prediction, Ann Rev Biophys Biophys Chem 17: 167-192 01323 -# \anchor jaeger_89 J.A. Jaeger, D.H. Turner and M. Zuker (1989)\n 01324 Improved predictions of secondary structures for RNA, 01325 Proc. Natl. Acad. Sci. 86: 7706-7710 01326 -# \anchor he_91 L. He, R. Kierzek, J. SantaLucia, A.E. Walter and D.H. Turner (1991)\n 01327 Nearest-Neighbor Parameters For GU Mismatches, 01328 Biochemistry 30: 11124-11132 01329 -# \anchor peritz_91 A.E. Peritz, R. Kierzek, N, Sugimoto, D.H. Turner (1991)\n 01330 Thermodynamic Study of Internal Loops in Oligoribonucleotides ... , 01331 Biochemistry 30: 6428--6435 01332 -# \anchor walter_94 A. Walter, D. Turner, J. Kim, M. Lyttle, P. Müller, D. Mathews and M. Zuker (1994)\n 01333 Coaxial stacking of helices enhances binding of Oligoribonucleotides.., 01334 Proc. Natl. Acad. Sci. 91: 9218-9222 01335 -# \anchor shapiro_88 B.A. Shapiro, (1988)\n 01336 An algorithm for comparing multiple RNA secondary structures, 01337 CABIOS 4, 381-393 01338 -# \anchor shapiro_90 B.A. Shapiro and K. Zhang (1990)\n 01339 Comparing multiple RNA secondary structures using tree comparison, 01340 CABIOS 6, 309-318 01341 -# \anchor bruccoleri_88 R. Bruccoleri and G. Heinrich (1988)\n 01342 An improved algorithm for nucleic acid secondary structure display, 01343 CABIOS 4, 167-173 01344 -# \anchor fontana_93a W. Fontana , D.A.M. Konings, P.F. Stadler, P. Schuster (1993) \n 01345 Statistics of RNA secondary structures, Biopolymers 33, 1389-1404 01346 -# \anchor fontana_93b W. Fontana, P.F. Stadler, E.G. Bornberg-Bauer, T. Griesmacher, I.L. 01347 Hofacker, M. Tacker, P. Tarazona, E.D. Weinberger, P. Schuster (1993)\n 01348 RNA folding and combinatory landscapes, Phys. Rev. E 47: 2083-2099 01349 -# \anchor hofacker_94a I.L. Hofacker, W. Fontana, P.F. Stadler, S. Bonhoeffer, M. Tacker, P. 01350 Schuster (1994) Fast Folding and Comparison of RNA Secondary Structures. 01351 Monatshefte f. Chemie 125: 167-188 01352 -# \anchor hofacker_94b I.L. Hofacker (1994) The Rules of the Evolutionary Game for RNA: 01353 A Statistical Characterization of the Sequence to Structure Mapping in RNA. 01354 PhD Thesis, University of Vienna. 01355 -# \anchor hofacker_02 I.L. Hofacker, M. Fekete, P.F. Stadler (2002). 01356 Secondary Structure Prediction for Aligned RNA Sequences. 01357 J. Mol. Biol. 319:1059-1066 01358 -# \anchor adams_79 D. Adams (1979)\n 01359 The hitchhiker's guide to the galaxy, Pan Books, London 01360 01361 **/ 01362