RNAlib-2.2.0RC0
mainpage.h
1 /*
2 #############################################################
3 # The next comment is used to order the modules correctly #
4 #############################################################
5 */
6 
117 /*
118 #############################################################
119 # Now the mainpage text is following #
120 #############################################################
121 */
122 
336  { 0, 2, 2, 2, 2, 1, 1}, /* Null replaced */
337  { 2, 0, 2, 2, 2, INF, INF}, /* H replaced */
338  { 2, 2, 0, 1, 2, INF, INF}, /* B replaced */
339  { 2, 2, 1, 0, 2, INF, INF}, /* I replaced */
340  { 2, 2, 2, 2, 0, INF, INF}, /* M replaced */
341  { 1, INF, INF, INF, INF, 0, INF}, /* S replaced */
342  { 1, INF, INF, INF, INF, INF, 0}, /* E replaced */
343 
344 
345 /* Null, H, B, I, M, S, E */
346  { 0, 100, 5, 5, 75, 5, 5}, /* Null replaced */
347  { 100, 0, 8, 8, 8, INF, INF}, /* H replaced */
348  { 5, 8, 0, 3, 8, INF, INF}, /* B replaced */
349  { 5, 8, 3, 0, 8, INF, INF}, /* I replaced */
350  { 75, 8, 8, 8, 0, INF, INF}, /* M replaced */
351  { 5, INF, INF, INF, INF, 0, INF}, /* S replaced */
352  { 5, INF, INF, INF, INF, INF, 0}, /* E replaced */
353 \endverbatim
354 
355 The lower matrix uses the costs given in \cite shapiro:1990.
356 All distance functions use the following global variables:
357 
358 \verbatim
359 int cost_matrix;
360 \endverbatim
361 \copybrief cost_matrix
362 
363 \verbatim
364 int edit_backtrack;
365 \endverbatim
366 \copybrief edit_backtrack
367 
368 \verbatim
369 char *aligned_line[4];
370 \endverbatim
371 \copybrief aligned_line
372 
373 \see utils.h, dist_vars.h and stringdist.h for more details
374 
375 <h3>Functions for Tree Edit Distances</h3>
376 
377 \verbatim
378 Tree *make_tree (char *struc)
379 \endverbatim
380 \copybrief make_tree()
381 
382 \verbatim
383 float tree_edit_distance (Tree *T1,
384  Tree *T2)
385 \endverbatim
386 \copybrief tree_edit_distance()
387 
388 \verbatim
389 void free_tree(Tree *t)
390 \endverbatim
391 \copybrief free_tree()
392 
393 \see dist_vars.h and treedist.h for prototypes and more detailed descriptions
394 
395 <h3>Functions for String Alignment</h3>
396 
397 \verbatim
398 swString *Make_swString (char *string)
399 \endverbatim
400 \copybrief Make_swString()
401 
402 \verbatim
403 float string_edit_distance (swString *T1,
404  swString *T2)
405 \endverbatim
406 \copybrief string_edit_distance()
407 
408 \see dist_vars.h and stringdist.h for prototypes and more detailed descriptions
409 
410 <h3>Functions for Comparison of Base Pair Probabilities</h3>
411 
412 For comparison of base pair probability matrices, the matrices are first
413 condensed into probability profiles which are the compared by alignment.
414 
415 \verbatim
416 float *Make_bp_profile_bppm ( double *bppm,
417  int length)
418 \endverbatim
419 \copybrief Make_bp_profile_bppm()
420 
421 \verbatim
422 float profile_edit_distance ( const float *T1,
423  const float *T2)
424 \endverbatim
425 \copybrief profile_edit_distance()
426 
427 \see ProfileDist.h for prototypes and more details of the above functions
428 
429 \ref mp_utils "Next Page: Utilities"
430 
431 \page mp_utils Utilities - Odds and Ends
432 
433 \anchor toc
434 
435 <h3>Table of Contents</h3>
436 <hr>
437 
438 \li \ref utils_ss
439 \li \ref utils_dot
440 \li \ref utils_aln
441 \li \ref utils_seq
442 \li \ref utils_struc
443 \li \ref utils_misc
444 
445 <hr>
446 
447 \section utils_ss Producing secondary structure graphs
448 
449 \verbatim
450 int PS_rna_plot ( char *string,
451  char *structure,
452  char *file)
453 \endverbatim
454 \copybrief PS_rna_plot()
455 
456 \verbatim
457 int PS_rna_plot_a (
458  char *string,
459  char *structure,
460  char *file,
461  char *pre,
462  char *post)
463 \endverbatim
464 \copybrief PS_rna_plot_a()
465 
466 \verbatim
467 int gmlRNA (char *string,
468  char *structure,
469  char *ssfile,
470  char option)
471 \endverbatim
472 \copybrief gmlRNA()
473 
474 \verbatim
475 int ssv_rna_plot (char *string,
476  char *structure,
477  char *ssfile)
478 \endverbatim
479 \copybrief ssv_rna_plot()
480 
481 \verbatim
482 int svg_rna_plot (char *string,
483  char *structure,
484  char *ssfile)
485 \endverbatim
486 \copybrief svg_rna_plot()
487 
488 \verbatim
489 int xrna_plot ( char *string,
490  char *structure,
491  char *ssfile)
492 \endverbatim
493 \copybrief xrna_plot()
494 
495 \verbatim
496 int rna_plot_type
497 \endverbatim
498 \copybrief rna_plot_type
499 
500 Two low-level functions provide direct access to the graph lauyouting
501 algorithms:
502 
503 \verbatim
504 int simple_xy_coordinates ( short *pair_table,
505  float *X,
506  float *Y)
507 \endverbatim
508 \copybrief simple_xy_coordinates()
509 
510 \verbatim
511 int naview_xy_coordinates ( short *pair_table,
512  float *X,
513  float *Y)
514 \endverbatim
515 \copybrief naview_xy_coordinates()
516 
517 \see PS_dot.h and naview.h for more detailed descriptions.
518 
519 \htmlonly
520 <hr>
521 <a href="#toc">Table of Contents</a>
522 <hr>
523 \endhtmlonly
524 
525 \section utils_dot Producing (colored) dot plots for base pair probabilities
526 
527 \verbatim
528 int PS_color_dot_plot ( char *string,
529  cpair *pi,
530  char *filename)
531 \endverbatim
532 \copybrief PS_color_dot_plot()
533 
534 \verbatim
535 int PS_color_dot_plot_turn (char *seq,
536  cpair *pi,
537  char *filename,
538  int winSize)
539 \endverbatim
540 \copybrief PS_color_dot_plot_turn()
541 
542 \verbatim
543 int PS_dot_plot_list (char *seq,
544  char *filename,
545  plist *pl,
546  plist *mf,
547  char *comment)
548 \endverbatim
549 \copybrief PS_dot_plot_list()
550 
551 \verbatim
552 int PS_dot_plot_turn (char *seq,
553  struct plist *pl,
554  char *filename,
555  int winSize)
556 \endverbatim
557 \copybrief PS_dot_plot_turn()
558 
559 \see PS_dot.h for more detailed descriptions.
560 
561 \section utils_aln Producing (colored) alignments
562 
563 \verbatim
564 int PS_color_aln (
565  const char *structure,
566  const char *filename,
567  const char *seqs[],
568  const char *names[])
569 \endverbatim
570 \copybrief PS_color_aln()
571 
572 \htmlonly
573 <hr>
574 <a href="#toc">Table of Contents</a>
575 <hr>
576 \endhtmlonly
577 
578 \section utils_seq RNA sequence related utilities
579 
580 Several functions provide useful applications to RNA sequences
581 
582 \verbatim
583 char *random_string (int l,
584  const char symbols[])
585 \endverbatim
586 \copybrief random_string()
587 
588 \verbatim
589 int hamming ( const char *s1,
590  const char *s2)
591 \endverbatim
592 \copybrief hamming()
593 
594 \verbatim
595 void str_DNA2RNA(char *sequence);
596 \endverbatim
597 \copybrief str_DNA2RNA()
598 
599 \verbatim
600 void str_uppercase(char *sequence);
601 \endverbatim
602 \copybrief str_uppercase()
603 
604 \htmlonly
605 <hr>
606 <a href="#toc">Table of Contents</a>
607 <hr>
608 \endhtmlonly
609 
610 \section utils_struc RNA secondary structure related utilities
611 
612 \verbatim
613 char *pack_structure (const char *struc)
614 \endverbatim
615 \copybrief pack_structure()
616 
617 \verbatim
618 char *unpack_structure (const char *packed)
619 \endverbatim
620 \copybrief unpack_structure()
621 
622 \verbatim
623 short *make_pair_table (const char *structure)
624 \endverbatim
625 \copybrief make_pair_table()
626 
627 \verbatim
628 short *copy_pair_table (const short *pt)
629 \endverbatim
630 \copybrief copy_pair_table()
631 
632 \htmlonly
633 <hr>
634 <a href="#toc">Table of Contents</a>
635 <hr>
636 \endhtmlonly
637 
638 \section utils_misc Miscellaneous Utilities
639 
640 \verbatim
641 void print_tty_input_seq (void)
642 \endverbatim
643 \copybrief print_tty_input_seq()
644 
645 \verbatim
646 void print_tty_constraint_full (void)
647 \endverbatim
648 \copybrief print_tty_constraint_full()
649 
650 \verbatim
651 void print_tty_constraint (unsigned int option)
652 \endverbatim
653 \copybrief print_tty_constraint()
654 
655 \verbatim
656 int *get_iindx (unsigned int length)
657 \endverbatim
658 \copybrief get_iindx()
659 
660 \verbatim
661 int *get_indx (unsigned int length)
662 \endverbatim
663 \copybrief get_indx()
664 
665 \verbatim
666 void constrain_ptypes (
667  const char *constraint,
668  unsigned int length,
669  char *ptype,
670  int *BP,
671  int min_loop_size,
672  unsigned int idx_type)
673 \endverbatim
674 \copybrief constrain_ptypes()
675 
676 \verbatim
677 char *get_line(FILE *fp);
678 \endverbatim
679 \copybrief get_line()
680 
681 \verbatim
682 unsigned int read_record(
683  char **header,
684  char **sequence,
685  char ***rest,
686  unsigned int options);
687 \endverbatim
688 \copybrief read_record()
689 
690 \verbatim
691 char *time_stamp (void)
692 \endverbatim
693 \copybrief time_stamp()
694 
695 \verbatim
696 void warn_user (const char message[])
697 \endverbatim
698 \copybrief warn_user()
699 
700 \verbatim
701 void nrerror (const char message[])
702 \endverbatim
703 \copybrief nrerror()
704 
705 \verbatim
706 void init_rand (void)
707 \endverbatim
708 \copybrief init_rand()
709 
710 \verbatim
711 unsigned short xsubi[3];
712 \endverbatim
713 \copybrief xsubi
714 
715 \verbatim
716 double urn (void)
717 \endverbatim
718 \copybrief urn()
719 
720 \verbatim
721 int int_urn (int from, int to)
722 \endverbatim
723 \copybrief int_urn()
724 
725 \verbatim
726 void *space (unsigned size)
727 \endverbatim
728 \copybrief space()
729 
730 \verbatim
731 void *xrealloc ( void *p,
732  unsigned size)
733 \endverbatim
734 \copybrief xrealloc()
735 
736 \see utils.h for a complete overview and detailed description of the utility functions
737 
738 \htmlonly
739 <hr>
740 <a href="#toc">Table of Contents</a>
741 <hr>
742 \endhtmlonly
743 
744 \ref mp_example "Next Page: Examples"
745 
746 \page mp_example Example - A Small Example Program
747 
748 The following program exercises most commonly used functions of the library.
749 The program folds two sequences using both the mfe and partition function
750 algorithms and calculates the tree edit and profile distance of the
751 resulting structures and base pairing probabilities.
752 
753 \code{.c}
754 #include <stdio.h>
755 #include <stdlib.h>
756 #include <math.h>
757 #include <string.h>
758 #include "utils.h"
759 #include "fold_vars.h"
760 #include "fold.h"
761 #include "part_func.h"
762 #include "inverse.h"
763 #include "RNAstruct.h"
764 #include "treedist.h"
765 #include "stringdist.h"
766 #include "profiledist.h"
767 
768 void main()
769 {
770  char *seq1="CGCAGGGAUACCCGCG", *seq2="GCGCCCAUAGGGACGC",
771  *struct1,* struct2,* xstruc;
772  float e1, e2, tree_dist, string_dist, profile_dist, kT;
773  Tree *T1, *T2;
774  swString *S1, *S2;
775  float *pf1, *pf2;
776  FLT_OR_DBL *bppm;
777  /* fold at 30C instead of the default 37C */
778  temperature = 30.; /* must be set *before* initializing */
779 
780  /* allocate memory for structure and fold */
781  struct1 = (char* ) space(sizeof(char)*(strlen(seq1)+1));
782  e1 = fold(seq1, struct1);
783 
784  struct2 = (char* ) space(sizeof(char)*(strlen(seq2)+1));
785  e2 = fold(seq2, struct2);
786 
787  free_arrays(); /* free arrays used in fold() */
788 
789  /* produce tree and string representations for comparison */
790  xstruc = expand_Full(struct1);
791  T1 = make_tree(xstruc);
792  S1 = Make_swString(xstruc);
793  free(xstruc);
794 
795  xstruc = expand_Full(struct2);
796  T2 = make_tree(xstruc);
797  S2 = Make_swString(xstruc);
798  free(xstruc);
799 
800  /* calculate tree edit distance and aligned structures with gaps */
801  edit_backtrack = 1;
802  tree_dist = tree_edit_distance(T1, T2);
803  free_tree(T1); free_tree(T2);
805  printf("%s\n%s %3.2f\n", aligned_line[0], aligned_line[1], tree_dist);
806 
807  /* same thing using string edit (alignment) distance */
808  string_dist = string_edit_distance(S1, S2);
809  free(S1); free(S2);
810  printf("%s mfe=%5.2f\n%s mfe=%5.2f dist=%3.2f\n",
811  aligned_line[0], e1, aligned_line[1], e2, string_dist);
812 
813  /* for longer sequences one should also set a scaling factor for
814  partition function folding, e.g: */
815  kT = (temperature+273.15)*1.98717/1000.; /* kT in kcal/mol */
816  pf_scale = exp(-e1/kT/strlen(seq1));
817 
818  /* calculate partition function and base pair probabilities */
819  e1 = pf_fold(seq1, struct1);
820  /* get the base pair probability matrix for the previous run of pf_fold() */
821  bppm = export_bppm();
822  pf1 = Make_bp_profile_bppm(bppm, strlen(seq1));
823 
824  e2 = pf_fold(seq2, struct2);
825  /* get the base pair probability matrix for the previous run of pf_fold() */
826  bppm = export_bppm();
827  pf2 = Make_bp_profile_bppm(bppm, strlen(seq2));
828 
829  free_pf_arrays(); /* free space allocated for pf_fold() */
830 
831  profile_dist = profile_edit_distance(pf1, pf2);
832  printf("%s free energy=%5.2f\n%s free energy=%5.2f dist=%3.2f\n",
833  aligned_line[0], e1, aligned_line[1], e2, profile_dist);
834 
835  free_profile(pf1); free_profile(pf2);
836 }
837 \endcode
838 
839 In a typical Unix environment you would compile this program using:
840 \verbatim
841 cc ${OPENMP_CFLAGS} -c example.c -I${hpath}
842 \endverbatim
843 and link using
844 \verbatim
845 cc ${OPENMP_CFLAGS} -o example -L${lpath} -lRNA -lm
846 \endverbatim
847 where \e ${hpath} and \e ${lpath} point to the location of the header
848 files and library, respectively.
849 \note As default, the RNAlib is compiled with build-in \e OpenMP multithreading
850 support. Thus, when linking your own object files to the library you have to pass
851 the compiler specific \e ${OPENMP_CFLAGS} (e.g. '-fopenmp' for \b gcc) even if your code does not
852 use openmp specific code. However, in that case the \e OpenMP flags may be ommited when compiling
853 example.c
854 
855 
856 **/
857 
Tree * make_tree(char *struc)
Constructs a Tree ( essentially the postorder list ) of the structure 'struc', for use in tree_edit_d...
Functions for String Alignment.
float string_edit_distance(swString *T1, swString *T2)
Calculate the string edit distance of T1 and T2.
int svg_rna_plot(char *string, char *structure, char *ssfile)
Produce a secondary structure plot in SVG format and write it to a file.
void * space(unsigned size)
Allocate space safely.
void print_tty_constraint_full(void)
Print structure constraint characters to stdout (full constraint support)
swString * Make_swString(char *string)
Convert a structure into a format suitable for string_edit_distance().
char * expand_Full(const char *structure)
Convert the full structure from bracket notation to the expanded notation including root...
void warn_user(const char message[])
Print a warning message.
MFE calculations for single RNA sequences.
char * time_stamp(void)
Get a timestamp.
void free_tree(Tree *t)
Free the memory allocated for Tree t.
int edit_backtrack
Produce an alignment of the two structures being compared by tracing the editing path giving the mini...
float profile_edit_distance(const float *T1, const float *T2)
Align the 2 probability profiles T1, T2 .
int gmlRNA(char *string, char *structure, char *ssfile, char option)
Produce a secondary structure graph in Graph Meta Language (gml) and write it to a file...
void free_profile(float *T)
free space allocated in Make_bp_profile
int ssv_rna_plot(char *string, char *structure, char *ssfile)
Produce a secondary structure graph in SStructView format.
char * get_line(FILE *fp)
Read a line of arbitrary length from a stream.
float fold(const char *sequence, char *structure)
Compute minimum free energy and an appropriate secondary structure of an RNA sequence.
int int_urn(int from, int to)
Generates a pseudo random integer in a specified range.
Definition: dist_vars.h:46
void unexpand_aligned_F(char *align[2])
Converts two aligned structures in expanded notation.
short * make_pair_table(const char *structure)
Create a pair table of a secondary structure.
Functions for Tree Edit Distances.
int cost_matrix
Specify the cost matrix to be used for distance calculations.
void print_tty_input_seq(void)
Print a line to stdout that asks for an input sequence.
Various utility- and helper-functions used throughout the Vienna RNA package.
Parsing and Coarse Graining of Structures.
int rna_plot_type
Switch for changing the secondary structure layout algorithm.
int simple_xy_coordinates(short *pair_table, float *X, float *Y)
Calculate nucleotide coordinates for secondary structure plot the Simple way
void print_tty_constraint(unsigned int option)
Print structure constraint characters to stdout. (constraint support is specified by option parameter...
float tree_edit_distance(Tree *T1, Tree *T2)
Calculates the edit distance of the two trees.
char * pack_structure(const char *struc)
Pack secondary secondary structure, 5:1 compression using base 3 encoding.
char * random_string(int l, const char symbols[])
Create a random string using characters from a specified symbol set.
void str_DNA2RNA(char *sequence)
Convert a DNA input sequence to RNA alphabet.
unsigned int read_record(char **header, char **sequence, char ***rest, unsigned int options)
Get a data record from stdin.
int * get_indx(unsigned int length)
Get an index mapper array (indx) for accessing the energy matrices, e.g. in MFE related functions...
float pf_fold(const char *sequence, char *structure)
Compute the partition function of an RNA sequence.
FLT_OR_DBL * export_bppm(void)
Get a pointer to the base pair probability arrayAccessing the base pair probabilities for a pair (i...
this datastructure is used as input parameter in functions of PS_dot.h and others ...
Definition: data_structures.h:94
double pf_scale
A scaling factor used by pf_fold() to avoid overflows.
void constrain_ptypes(const char *constraint, unsigned int length, char *ptype, int *BP, int min_loop_size, unsigned int idx_type)
Insert constraining pair types according to constraint structure string.
#define INF
Definition: energy_const.h:16
char * aligned_line[4]
Contains the two aligned structures after a call to one of the distance functions with edit_backtrack...
void free_pf_arrays(void)
Free arrays for the partition function recursions.
int PS_rna_plot(char *string, char *structure, char *file)
Produce a secondary structure graph in PostScript and write it to 'filename'.
void nrerror(const char message[])
Die with an error message.
char * unpack_structure(const char *packed)
Unpack secondary structure previously packed with pack_structure()
int xrna_plot(char *string, char *structure, char *ssfile)
Produce a secondary structure plot for further editing in XRNA.
int * get_iindx(unsigned int length)
Get an index mapper array (iindx) for accessing the energy matrices, e.g. in partition function relat...
void * xrealloc(void *p, unsigned size)
Reallocate space safely.
Definition: dist_vars.h:41
Partition function of single RNA sequences.
short * copy_pair_table(const short *pt)
Get an exact copy of a pair table.
int PS_rna_plot_a(char *string, char *structure, char *file, char *pre, char *post)
Produce a secondary structure graph in PostScript including additional annotation macros and write it...
Inverse folding routines.
float * Make_bp_profile_bppm(FLT_OR_DBL *bppm, int length)
condense pair probability matrix into a vector containing probabilities for unpaired, upstream paired and downstream paired.
this datastructure is used as input parameter in functions of PS_dot.c
Definition: data_structures.h:104
Here all all declarations of the global variables used throughout RNAlib.
double temperature
Rescale energy parameters to a temperature in degC.
double urn(void)
get a random number from [0..1]
void init_rand(void)
Make random number seeds.
unsigned short xsubi[3]
Current 48 bit random number.
void free_arrays(void)
Free arrays for mfe folding.
void str_uppercase(char *sequence)
Convert an input sequence to uppercase.
int PS_dot_plot_list(char *seq, char *filename, plist *pl, plist *mf, char *comment)
Produce a postscript dot-plot from two pair lists.
int hamming(const char *s1, const char *s2)
Calculate hamming distance between two sequences.