RNAlib-2.1.9h
mainpage.h
1/*
2#############################################################
3# The next comment is used to order the modules correctly #
4#############################################################
5*/
6
108/*
109#############################################################
110# Now the mainpage text is following #
111#############################################################
112*/
113
327 { 0, 2, 2, 2, 2, 1, 1}, /* Null replaced */
328 { 2, 0, 2, 2, 2, INF, INF}, /* H replaced */
329 { 2, 2, 0, 1, 2, INF, INF}, /* B replaced */
330 { 2, 2, 1, 0, 2, INF, INF}, /* I replaced */
331 { 2, 2, 2, 2, 0, INF, INF}, /* M replaced */
332 { 1, INF, INF, INF, INF, 0, INF}, /* S replaced */
333 { 1, INF, INF, INF, INF, INF, 0}, /* E replaced */
334
335
336/* Null, H, B, I, M, S, E */
337 { 0, 100, 5, 5, 75, 5, 5}, /* Null replaced */
338 { 100, 0, 8, 8, 8, INF, INF}, /* H replaced */
339 { 5, 8, 0, 3, 8, INF, INF}, /* B replaced */
340 { 5, 8, 3, 0, 8, INF, INF}, /* I replaced */
341 { 75, 8, 8, 8, 0, INF, INF}, /* M replaced */
342 { 5, INF, INF, INF, INF, 0, INF}, /* S replaced */
343 { 5, INF, INF, INF, INF, INF, 0}, /* E replaced */
344\endverbatim
345
346The lower matrix uses the costs given in \cite shapiro:1990.
347All distance functions use the following global variables:
348
349\verbatim
350int cost_matrix;
351\endverbatim
352\copybrief cost_matrix
353
354\verbatim
356\endverbatim
357\copybrief edit_backtrack
358
359\verbatim
360char *aligned_line[4];
361\endverbatim
362\copybrief aligned_line
363
364\see utils.h, dist_vars.h and stringdist.h for more details
365
366<h3>Functions for Tree Edit Distances</h3>
367
368\verbatim
369Tree *make_tree (char *struc)
370\endverbatim
371\copybrief make_tree()
372
373\verbatim
374float tree_edit_distance (Tree *T1,
375 Tree *T2)
376\endverbatim
377\copybrief tree_edit_distance()
378
379\verbatim
380void free_tree(Tree *t)
381\endverbatim
382\copybrief free_tree()
383
384\see dist_vars.h and treedist.h for prototypes and more detailed descriptions
385
386<h3>Functions for String Alignment</h3>
387
388\verbatim
389swString *Make_swString (char *string)
390\endverbatim
391\copybrief Make_swString()
392
393\verbatim
395 swString *T2)
396\endverbatim
397\copybrief string_edit_distance()
398
399\see dist_vars.h and stringdist.h for prototypes and more detailed descriptions
400
401<h3>Functions for Comparison of Base Pair Probabilities</h3>
402
403For comparison of base pair probability matrices, the matrices are first
404condensed into probability profiles which are the compared by alignment.
405
406\verbatim
407float *Make_bp_profile_bppm ( double *bppm,
408 int length)
409\endverbatim
410\copybrief Make_bp_profile_bppm()
411
412\verbatim
413float profile_edit_distance ( const float *T1,
414 const float *T2)
415\endverbatim
416\copybrief profile_edit_distance()
417
418\see ProfileDist.h for prototypes and more details of the above functions
419
420\ref mp_utils "Next Page: Utilities"
421
422\page mp_utils Utilities - Odds and Ends
423
424\anchor toc
425
426<h3>Table of Contents</h3>
427<hr>
428
429\li \ref utils_ss
430\li \ref utils_dot
431\li \ref utils_aln
432\li \ref utils_seq
433\li \ref utils_struc
434\li \ref utils_misc
435
436<hr>
437
438\section utils_ss Producing secondary structure graphs
439
440\verbatim
441int PS_rna_plot ( char *string,
442 char *structure,
443 char *file)
444\endverbatim
445\copybrief PS_rna_plot()
446
447\verbatim
448int PS_rna_plot_a (
449 char *string,
450 char *structure,
451 char *file,
452 char *pre,
453 char *post)
454\endverbatim
455\copybrief PS_rna_plot_a()
456
457\verbatim
458int gmlRNA (char *string,
459 char *structure,
460 char *ssfile,
461 char option)
462\endverbatim
463\copybrief gmlRNA()
464
465\verbatim
466int ssv_rna_plot (char *string,
467 char *structure,
468 char *ssfile)
469\endverbatim
470\copybrief ssv_rna_plot()
471
472\verbatim
473int svg_rna_plot (char *string,
474 char *structure,
475 char *ssfile)
476\endverbatim
477\copybrief svg_rna_plot()
478
479\verbatim
480int xrna_plot ( char *string,
481 char *structure,
482 char *ssfile)
483\endverbatim
484\copybrief xrna_plot()
485
486\verbatim
487int rna_plot_type
488\endverbatim
489\copybrief rna_plot_type
490
491Two low-level functions provide direct access to the graph lauyouting
492algorithms:
493
494\verbatim
495int simple_xy_coordinates ( short *pair_table,
496 float *X,
497 float *Y)
498\endverbatim
499\copybrief simple_xy_coordinates()
500
501\verbatim
502int naview_xy_coordinates ( short *pair_table,
503 float *X,
504 float *Y)
505\endverbatim
506\copybrief naview_xy_coordinates()
507
508\see PS_dot.h and naview.h for more detailed descriptions.
509
510\htmlonly
511<hr>
512<a href="#toc">Table of Contents</a>
513<hr>
514\endhtmlonly
515
516\section utils_dot Producing (colored) dot plots for base pair probabilities
517
518\verbatim
519int PS_color_dot_plot ( char *string,
520 cpair *pi,
521 char *filename)
522\endverbatim
523\copybrief PS_color_dot_plot()
524
525\verbatim
526int PS_color_dot_plot_turn (char *seq,
527 cpair *pi,
528 char *filename,
529 int winSize)
530\endverbatim
531\copybrief PS_color_dot_plot_turn()
532
533\verbatim
534int PS_dot_plot_list (char *seq,
535 char *filename,
536 plist *pl,
537 plist *mf,
538 char *comment)
539\endverbatim
540\copybrief PS_dot_plot_list()
541
542\verbatim
543int PS_dot_plot_turn (char *seq,
544 struct plist *pl,
545 char *filename,
546 int winSize)
547\endverbatim
548\copybrief PS_dot_plot_turn()
549
550\see PS_dot.h for more detailed descriptions.
551
552\section utils_aln Producing (colored) alignments
553
554\verbatim
555int PS_color_aln (
556 const char *structure,
557 const char *filename,
558 const char *seqs[],
559 const char *names[])
560\endverbatim
561\copybrief PS_color_aln()
562
563\htmlonly
564<hr>
565<a href="#toc">Table of Contents</a>
566<hr>
567\endhtmlonly
568
569\section utils_seq RNA sequence related utilities
570
571Several functions provide useful applications to RNA sequences
572
573\verbatim
574char *random_string (int l,
575 const char symbols[])
576\endverbatim
577\copybrief random_string()
578
579\verbatim
580int hamming ( const char *s1,
581 const char *s2)
582\endverbatim
583\copybrief hamming()
584
585\verbatim
586void str_DNA2RNA(char *sequence);
587\endverbatim
588\copybrief str_DNA2RNA()
589
590\verbatim
591void str_uppercase(char *sequence);
592\endverbatim
593\copybrief str_uppercase()
594
595\htmlonly
596<hr>
597<a href="#toc">Table of Contents</a>
598<hr>
599\endhtmlonly
600
601\section utils_struc RNA secondary structure related utilities
602
603\verbatim
604char *pack_structure (const char *struc)
605\endverbatim
606\copybrief pack_structure()
607
608\verbatim
609char *unpack_structure (const char *packed)
610\endverbatim
611\copybrief unpack_structure()
612
613\verbatim
614short *make_pair_table (const char *structure)
615\endverbatim
616\copybrief make_pair_table()
617
618\verbatim
619short *copy_pair_table (const short *pt)
620\endverbatim
621\copybrief copy_pair_table()
622
623\htmlonly
624<hr>
625<a href="#toc">Table of Contents</a>
626<hr>
627\endhtmlonly
628
629\section utils_misc Miscellaneous Utilities
630
631\verbatim
632void print_tty_input_seq (void)
633\endverbatim
634\copybrief print_tty_input_seq()
635
636\verbatim
638\endverbatim
639\copybrief print_tty_constraint_full()
640
641\verbatim
642void print_tty_constraint (unsigned int option)
643\endverbatim
644\copybrief print_tty_constraint()
645
646\verbatim
647int *get_iindx (unsigned int length)
648\endverbatim
649\copybrief get_iindx()
650
651\verbatim
652int *get_indx (unsigned int length)
653\endverbatim
654\copybrief get_indx()
655
656\verbatim
657void constrain_ptypes (
658 const char *constraint,
659 unsigned int length,
660 char *ptype,
661 int *BP,
662 int min_loop_size,
663 unsigned int idx_type)
664\endverbatim
665\copybrief constrain_ptypes()
666
667\verbatim
668char *get_line(FILE *fp);
669\endverbatim
670\copybrief get_line()
671
672\verbatim
673unsigned int read_record(
674 char **header,
675 char **sequence,
676 char ***rest,
677 unsigned int options);
678\endverbatim
679\copybrief read_record()
680
681\verbatim
682char *time_stamp (void)
683\endverbatim
684\copybrief time_stamp()
685
686\verbatim
687void warn_user (const char message[])
688\endverbatim
689\copybrief warn_user()
690
691\verbatim
692void nrerror (const char message[])
693\endverbatim
694\copybrief nrerror()
695
696\verbatim
697void init_rand (void)
698\endverbatim
699\copybrief init_rand()
700
701\verbatim
702unsigned short xsubi[3];
703\endverbatim
704\copybrief xsubi
705
706\verbatim
707double urn (void)
708\endverbatim
709\copybrief urn()
710
711\verbatim
712int int_urn (int from, int to)
713\endverbatim
714\copybrief int_urn()
715
716\verbatim
717void *space (unsigned size)
718\endverbatim
719\copybrief space()
720
721\verbatim
722void *xrealloc ( void *p,
723 unsigned size)
724\endverbatim
725\copybrief xrealloc()
726
727\see utils.h for a complete overview and detailed description of the utility functions
728
729\htmlonly
730<hr>
731<a href="#toc">Table of Contents</a>
732<hr>
733\endhtmlonly
734
735\ref mp_example "Next Page: Examples"
736
737\page mp_example Example - A Small Example Program
738
739The following program exercises most commonly used functions of the library.
740The program folds two sequences using both the mfe and partition function
741algorithms and calculates the tree edit and profile distance of the
742resulting structures and base pairing probabilities.
743
744\code{.c}
745#include <stdio.h>
746#include <stdlib.h>
747#include <math.h>
748#include <string.h>
749#include "utils.h"
750#include "fold_vars.h"
751#include "fold.h"
752#include "part_func.h"
753#include "inverse.h"
754#include "RNAstruct.h"
755#include "treedist.h"
756#include "stringdist.h"
757#include "profiledist.h"
758
759void main()
760{
761 char *seq1="CGCAGGGAUACCCGCG", *seq2="GCGCCCAUAGGGACGC",
762 *struct1,* struct2,* xstruc;
763 float e1, e2, tree_dist, string_dist, profile_dist, kT;
764 Tree *T1, *T2;
765 swString *S1, *S2;
766 float *pf1, *pf2;
767 FLT_OR_DBL *bppm;
768 /* fold at 30C instead of the default 37C */
769 temperature = 30.; /* must be set *before* initializing */
770
771 /* allocate memory for structure and fold */
772 struct1 = (char* ) space(sizeof(char)*(strlen(seq1)+1));
773 e1 = fold(seq1, struct1);
774
775 struct2 = (char* ) space(sizeof(char)*(strlen(seq2)+1));
776 e2 = fold(seq2, struct2);
777
778 free_arrays(); /* free arrays used in fold() */
779
780 /* produce tree and string representations for comparison */
781 xstruc = expand_Full(struct1);
782 T1 = make_tree(xstruc);
783 S1 = Make_swString(xstruc);
784 free(xstruc);
785
786 xstruc = expand_Full(struct2);
787 T2 = make_tree(xstruc);
788 S2 = Make_swString(xstruc);
789 free(xstruc);
790
791 /* calculate tree edit distance and aligned structures with gaps */
792 edit_backtrack = 1;
793 tree_dist = tree_edit_distance(T1, T2);
794 free_tree(T1); free_tree(T2);
796 printf("%s\n%s %3.2f\n", aligned_line[0], aligned_line[1], tree_dist);
797
798 /* same thing using string edit (alignment) distance */
799 string_dist = string_edit_distance(S1, S2);
800 free(S1); free(S2);
801 printf("%s mfe=%5.2f\n%s mfe=%5.2f dist=%3.2f\n",
802 aligned_line[0], e1, aligned_line[1], e2, string_dist);
803
804 /* for longer sequences one should also set a scaling factor for
805 partition function folding, e.g: */
806 kT = (temperature+273.15)*1.98717/1000.; /* kT in kcal/mol */
807 pf_scale = exp(-e1/kT/strlen(seq1));
808
809 /* calculate partition function and base pair probabilities */
810 e1 = pf_fold(seq1, struct1);
811 /* get the base pair probability matrix for the previous run of pf_fold() */
812 bppm = export_bppm();
813 pf1 = Make_bp_profile_bppm(bppm, strlen(seq1));
814
815 e2 = pf_fold(seq2, struct2);
816 /* get the base pair probability matrix for the previous run of pf_fold() */
817 bppm = export_bppm();
818 pf2 = Make_bp_profile_bppm(bppm, strlen(seq2));
819
820 free_pf_arrays(); /* free space allocated for pf_fold() */
821
822 profile_dist = profile_edit_distance(pf1, pf2);
823 printf("%s free energy=%5.2f\n%s free energy=%5.2f dist=%3.2f\n",
824 aligned_line[0], e1, aligned_line[1], e2, profile_dist);
825
826 free_profile(pf1); free_profile(pf2);
827}
828\endcode
829
830In a typical Unix environment you would compile this program using:
831\verbatim
832cc ${OPENMP_CFLAGS} -c example.c -I${hpath}
833\endverbatim
834and link using
835\verbatim
836cc ${OPENMP_CFLAGS} -o example -L${lpath} -lRNA -lm
837\endverbatim
838where \e ${hpath} and \e ${lpath} point to the location of the header
839files and library, respectively.
840\note As default, the RNAlib is compiled with build-in \e OpenMP multithreading
841support. Thus, when linking your own object files to the library you have to pass
842the compiler specific \e ${OPENMP_CFLAGS} (e.g. '-fopenmp' for \b gcc) even if your code does not
843use openmp specific code. However, in that case the \e OpenMP flags may be ommited when compiling
844example.c
845
846
847**/
848
int PS_dot_plot_list(char *seq, char *filename, plist *pl, plist *mf, char *comment)
Produce a postscript dot-plot from two pair lists.
int PS_rna_plot(char *string, char *structure, char *file)
Produce a secondary structure graph in PostScript and write it to 'filename'.
int xrna_plot(char *string, char *structure, char *ssfile)
Produce a secondary structure plot for further editing in XRNA.
int PS_rna_plot_a(char *string, char *structure, char *file, char *pre, char *post)
Produce a secondary structure graph in PostScript including additional annotation macros and write it...
int gmlRNA(char *string, char *structure, char *ssfile, char option)
Produce a secondary structure graph in Graph Meta Language (gml) and write it to a file.
int ssv_rna_plot(char *string, char *structure, char *ssfile)
Produce a secondary structure graph in SStructView format.
int svg_rna_plot(char *string, char *structure, char *ssfile)
Produce a secondary structure plot in SVG format and write it to a file.
Parsing and Coarse Graining of Structures.
void unexpand_aligned_F(char *align[2])
Converts two aligned structures in expanded notation.
char * expand_Full(const char *structure)
Convert the full structure from bracket notation to the expanded notation including root.
int edit_backtrack
Produce an alignment of the two structures being compared by tracing the editing path giving the mini...
int cost_matrix
Specify the cost matrix to be used for distance calculations.
char * aligned_line[4]
Contains the two aligned structures after a call to one of the distance functions with edit_backtrack...
MFE calculations and energy evaluations for single RNA sequences.
Here all all declarations of the global variables used throughout RNAlib.
double temperature
Rescale energy parameters to a temperature in degC.
double pf_scale
A scaling factor used by pf_fold() to avoid overflows.
void free_arrays(void)
Free arrays for mfe folding.
float fold(const char *sequence, char *structure)
Compute minimum free energy and an appropriate secondary structure of an RNA sequence.
double * export_bppm(void)
Get a pointer to the base pair probability array.
float pf_fold(const char *sequence, char *structure)
Compute the partition function of an RNA sequence.
void free_pf_arrays(void)
Free arrays for the partition function recursions.
Inverse folding routines.
Partition function of single RNA sequences.
int rna_plot_type
Switch for changing the secondary structure layout algorithm.
int simple_xy_coordinates(short *pair_table, float *X, float *Y)
Calculate nucleotide coordinates for secondary structure plot the Simple way
float * Make_bp_profile_bppm(double *bppm, int length)
condense pair probability matrix into a vector containing probabilities for unpaired,...
void free_profile(float *T)
free space allocated in Make_bp_profile
float profile_edit_distance(const float *T1, const float *T2)
Align the 2 probability profiles T1, T2 .
Functions for String Alignment.
swString * Make_swString(char *string)
Convert a structure into a format suitable for string_edit_distance().
float string_edit_distance(swString *T1, swString *T2)
Calculate the string edit distance of T1 and T2.
Definition dist_vars.h:41
this datastructure is used as input parameter in functions of PS_dot.c
Definition data_structures.h:58
this datastructure is used as input parameter in functions of PS_dot.h and others
Definition data_structures.h:48
Definition dist_vars.h:46
Functions for Tree Edit Distances.
Tree * make_tree(char *struc)
Constructs a Tree ( essentially the postorder list ) of the structure 'struc', for use in tree_edit_d...
float tree_edit_distance(Tree *T1, Tree *T2)
Calculates the edit distance of the two trees.
void free_tree(Tree *t)
Free the memory allocated for Tree t.
Various utility- and helper-functions used throughout the Vienna RNA package.
int * get_iindx(unsigned int length)
Get an index mapper array (iindx) for accessing the energy matrices, e.g. in partition function relat...
void str_uppercase(char *sequence)
Convert an input sequence to uppercase.
short * copy_pair_table(const short *pt)
Get an exact copy of a pair table.
char * unpack_structure(const char *packed)
Unpack secondary structure previously packed with pack_structure()
void constrain_ptypes(const char *constraint, unsigned int length, char *ptype, int *BP, int min_loop_size, unsigned int idx_type)
Insert constraining pair types according to constraint structure string.
void print_tty_constraint(unsigned int option)
Print structure constraint characters to stdout. (constraint support is specified by option parameter...
char * time_stamp(void)
Get a timestamp.
int int_urn(int from, int to)
Generates a pseudo random integer in a specified range.
void print_tty_input_seq(void)
Print a line to stdout that asks for an input sequence.
void * space(unsigned size)
Allocate space safely.
void * xrealloc(void *p, unsigned size)
Reallocate space safely.
void init_rand(void)
Make random number seeds.
char * pack_structure(const char *struc)
Pack secondary secondary structure, 5:1 compression using base 3 encoding.
char * get_line(FILE *fp)
Read a line of arbitrary length from a stream.
double urn(void)
get a random number from [0..1]
short * make_pair_table(const char *structure)
Create a pair table of a secondary structure.
void str_DNA2RNA(char *sequence)
Convert a DNA input sequence to RNA alphabet.
char * random_string(int l, const char symbols[])
Create a random string using characters from a specified symbol set.
int hamming(const char *s1, const char *s2)
Calculate hamming distance between two sequences.
int * get_indx(unsigned int length)
Get an index mapper array (indx) for accessing the energy matrices, e.g. in MFE related functions.
void print_tty_constraint_full(void)
Print structure constraint characters to stdout (full constraint support)
void warn_user(const char message[])
Print a warning message.
unsigned short xsubi[3]
Current 48 bit random number.
unsigned int read_record(char **header, char **sequence, char ***rest, unsigned int options)