RNAlib-2.2.0-RC2
mainpage.h
1 /*
2 #############################################################
3 # The next comment is used to order the modules correctly #
4 #############################################################
5 */
6 
133 /*
134 #############################################################
135 # Now the mainpage text is following #
136 #############################################################
137 */
138 
352  { 0, 2, 2, 2, 2, 1, 1}, /* Null replaced */
353  { 2, 0, 2, 2, 2, INF, INF}, /* H replaced */
354  { 2, 2, 0, 1, 2, INF, INF}, /* B replaced */
355  { 2, 2, 1, 0, 2, INF, INF}, /* I replaced */
356  { 2, 2, 2, 2, 0, INF, INF}, /* M replaced */
357  { 1, INF, INF, INF, INF, 0, INF}, /* S replaced */
358  { 1, INF, INF, INF, INF, INF, 0}, /* E replaced */
359 
360 
361 /* Null, H, B, I, M, S, E */
362  { 0, 100, 5, 5, 75, 5, 5}, /* Null replaced */
363  { 100, 0, 8, 8, 8, INF, INF}, /* H replaced */
364  { 5, 8, 0, 3, 8, INF, INF}, /* B replaced */
365  { 5, 8, 3, 0, 8, INF, INF}, /* I replaced */
366  { 75, 8, 8, 8, 0, INF, INF}, /* M replaced */
367  { 5, INF, INF, INF, INF, 0, INF}, /* S replaced */
368  { 5, INF, INF, INF, INF, INF, 0}, /* E replaced */
369 @endverbatim
370 
371 The lower matrix uses the costs given in @cite shapiro:1990.
372 All distance functions use the following global variables:
373 
374 @verbatim
375 int cost_matrix;
376 @endverbatim
377 @copybrief cost_matrix
378 
379 @verbatim
380 int edit_backtrack;
381 @endverbatim
382 @copybrief edit_backtrack
383 
384 @verbatim
385 char *aligned_line[4];
386 @endverbatim
387 @copybrief aligned_line
388 
389 @see utils.h, dist_vars.h and stringdist.h for more details
390 
391 <h3>Functions for Tree Edit Distances</h3>
392 
393 @verbatim
394 Tree *make_tree (char *struc)
395 @endverbatim
396 @copybrief make_tree()
397 
398 @verbatim
399 float tree_edit_distance (Tree *T1,
400  Tree *T2)
401 @endverbatim
402 @copybrief tree_edit_distance()
403 
404 @verbatim
405 void free_tree(Tree *t)
406 @endverbatim
407 @copybrief free_tree()
408 
409 @see dist_vars.h and treedist.h for prototypes and more detailed descriptions
410 
411 <h3>Functions for String Alignment</h3>
412 
413 @verbatim
414 swString *Make_swString (char *string)
415 @endverbatim
416 @copybrief Make_swString()
417 
418 @verbatim
419 float string_edit_distance (swString *T1,
420  swString *T2)
421 @endverbatim
422 @copybrief string_edit_distance()
423 
424 @see dist_vars.h and stringdist.h for prototypes and more detailed descriptions
425 
426 <h3>Functions for Comparison of Base Pair Probabilities</h3>
427 
428 For comparison of base pair probability matrices, the matrices are first
429 condensed into probability profiles which are the compared by alignment.
430 
431 @verbatim
432 float *Make_bp_profile_bppm ( double *bppm,
433  int length)
434 @endverbatim
435 @copybrief Make_bp_profile_bppm()
436 
437 @verbatim
438 float profile_edit_distance ( const float *T1,
439  const float *T2)
440 @endverbatim
441 @copybrief profile_edit_distance()
442 
443 @see ProfileDist.h for prototypes and more details of the above functions
444 
445 @ref mp_utils "Next Page: Utilities"
446 
447 @page mp_utils Utilities - Odds and Ends
448 
449 @anchor toc
450 
451 <h3>Table of Contents</h3>
452 <hr>
453 
454 \li \ref utils_ss
455 \li \ref utils_dot
456 \li \ref utils_aln
457 \li \ref utils_seq
458 \li \ref utils_struc
459 \li \ref utils_misc
460 
461 <hr>
462 
463 @section utils_ss Producing secondary structure graphs
464 
465 @verbatim
466 int PS_rna_plot ( char *string,
467  char *structure,
468  char *file)
469 @endverbatim
470 @copybrief PS_rna_plot()
471 
472 @verbatim
473 int PS_rna_plot_a (
474  char *string,
475  char *structure,
476  char *file,
477  char *pre,
478  char *post)
479 @endverbatim
480 @copybrief PS_rna_plot_a()
481 
482 @verbatim
483 int gmlRNA (char *string,
484  char *structure,
485  char *ssfile,
486  char option)
487 @endverbatim
488 @copybrief gmlRNA()
489 
490 @verbatim
491 int ssv_rna_plot (char *string,
492  char *structure,
493  char *ssfile)
494 @endverbatim
495 @copybrief ssv_rna_plot()
496 
497 @verbatim
498 int svg_rna_plot (char *string,
499  char *structure,
500  char *ssfile)
501 @endverbatim
502 @copybrief svg_rna_plot()
503 
504 @verbatim
505 int xrna_plot ( char *string,
506  char *structure,
507  char *ssfile)
508 @endverbatim
509 @copybrief xrna_plot()
510 
511 @verbatim
512 int rna_plot_type
513 @endverbatim
514 @copybrief rna_plot_type
515 
516 Two low-level functions provide direct access to the graph lauyouting
517 algorithms:
518 
519 @verbatim
520 int simple_xy_coordinates ( short *pair_table,
521  float *X,
522  float *Y)
523 @endverbatim
524 @copybrief simple_xy_coordinates()
525 
526 @verbatim
527 int naview_xy_coordinates ( short *pair_table,
528  float *X,
529  float *Y)
530 @endverbatim
531 @copybrief naview_xy_coordinates()
532 
533 @see PS_dot.h and naview.h for more detailed descriptions.
534 
535 @htmlonly
536 <hr>
537 <a href="#toc">Table of Contents</a>
538 <hr>
539 @endhtmlonly
540 
541 @section utils_dot Producing (colored) dot plots for base pair probabilities
542 
543 @verbatim
544 int PS_color_dot_plot ( char *string,
545  cpair *pi,
546  char *filename)
547 @endverbatim
548 @copybrief PS_color_dot_plot()
549 
550 @verbatim
551 int PS_color_dot_plot_turn (char *seq,
552  cpair *pi,
553  char *filename,
554  int winSize)
555 @endverbatim
556 @copybrief PS_color_dot_plot_turn()
557 
558 @verbatim
559 int PS_dot_plot_list (char *seq,
560  char *filename,
561  plist *pl,
562  plist *mf,
563  char *comment)
564 @endverbatim
565 @copybrief PS_dot_plot_list()
566 
567 @verbatim
568 int PS_dot_plot_turn (char *seq,
569  struct plist *pl,
570  char *filename,
571  int winSize)
572 @endverbatim
573 @copybrief PS_dot_plot_turn()
574 
575 @see PS_dot.h for more detailed descriptions.
576 
577 @section utils_aln Producing (colored) alignments
578 
579 @verbatim
580 int PS_color_aln (
581  const char *structure,
582  const char *filename,
583  const char *seqs[],
584  const char *names[])
585 @endverbatim
586 @copybrief PS_color_aln()
587 
588 @htmlonly
589 <hr>
590 <a href="#toc">Table of Contents</a>
591 <hr>
592 @endhtmlonly
593 
594 @section utils_seq RNA sequence related utilities
595 
596 Several functions provide useful applications to RNA sequences
597 
598 @verbatim
599 char *random_string (int l,
600  const char symbols[])
601 @endverbatim
602 @copybrief random_string()
603 
604 @verbatim
605 int hamming ( const char *s1,
606  const char *s2)
607 @endverbatim
608 @copybrief hamming()
609 
610 @verbatim
611 void str_DNA2RNA(char *sequence);
612 @endverbatim
613 @copybrief str_DNA2RNA()
614 
615 @verbatim
616 void str_uppercase(char *sequence);
617 @endverbatim
618 @copybrief str_uppercase()
619 
620 @htmlonly
621 <hr>
622 <a href="#toc">Table of Contents</a>
623 <hr>
624 @endhtmlonly
625 
626 @section utils_struc RNA secondary structure related utilities
627 
628 @verbatim
629 char *pack_structure (const char *struc)
630 @endverbatim
631 @copybrief pack_structure()
632 
633 @verbatim
634 char *unpack_structure (const char *packed)
635 @endverbatim
636 @copybrief unpack_structure()
637 
638 @verbatim
639 short *make_pair_table (const char *structure)
640 @endverbatim
641 @copybrief make_pair_table()
642 
643 @verbatim
644 short *copy_pair_table (const short *pt)
645 @endverbatim
646 @copybrief copy_pair_table()
647 
648 @htmlonly
649 <hr>
650 <a href="#toc">Table of Contents</a>
651 <hr>
652 @endhtmlonly
653 
654 @section utils_misc Miscellaneous Utilities
655 
656 @verbatim
657 void print_tty_input_seq (void)
658 @endverbatim
659 @copybrief print_tty_input_seq()
660 
661 @verbatim
662 void print_tty_constraint_full (void)
663 @endverbatim
664 @copybrief print_tty_constraint_full()
665 
666 @verbatim
667 void print_tty_constraint (unsigned int option)
668 @endverbatim
669 @copybrief print_tty_constraint()
670 
671 @verbatim
672 int *get_iindx (unsigned int length)
673 @endverbatim
674 @copybrief get_iindx()
675 
676 @verbatim
677 int *get_indx (unsigned int length)
678 @endverbatim
679 @copybrief get_indx()
680 
681 @verbatim
682 void constrain_ptypes (
683  const char *constraint,
684  unsigned int length,
685  char *ptype,
686  int *BP,
687  int min_loop_size,
688  unsigned int idx_type)
689 @endverbatim
690 @copybrief constrain_ptypes()
691 
692 @verbatim
693 char *get_line(FILE *fp);
694 @endverbatim
695 @copybrief get_line()
696 
697 @verbatim
698 unsigned int read_record(
699  char **header,
700  char **sequence,
701  char ***rest,
702  unsigned int options);
703 @endverbatim
704 @copybrief read_record()
705 
706 @verbatim
707 char *time_stamp (void)
708 @endverbatim
709 @copybrief time_stamp()
710 
711 @verbatim
712 void warn_user (const char message[])
713 @endverbatim
714 @copybrief warn_user()
715 
716 @verbatim
717 void nrerror (const char message[])
718 @endverbatim
719 @copybrief nrerror()
720 
721 @verbatim
722 void init_rand (void)
723 @endverbatim
724 @copybrief init_rand()
725 
726 @verbatim
727 unsigned short xsubi[3];
728 @endverbatim
729 @copybrief xsubi
730 
731 @verbatim
732 double urn (void)
733 @endverbatim
734 @copybrief urn()
735 
736 @verbatim
737 int int_urn (int from, int to)
738 @endverbatim
739 @copybrief int_urn()
740 
741 @verbatim
742 void *space (unsigned size)
743 @endverbatim
744 @copybrief space()
745 
746 @verbatim
747 void *xrealloc ( void *p,
748  unsigned size)
749 @endverbatim
750 @copybrief xrealloc()
751 
752 @see utils.h for a complete overview and detailed description of the utility functions
753 
754 @htmlonly
755 <hr>
756 <a href="#toc">Table of Contents</a>
757 <hr>
758 @endhtmlonly
759 
760 @ref mp_example "Next Page: Examples"
761 
762 @page mp_example Example - A Small Example Program
763 
764 The following program exercises most commonly used functions of the library.
765 The program folds two sequences using both the mfe and partition function
766 algorithms and calculates the tree edit and profile distance of the
767 resulting structures and base pairing probabilities.
768 
769 @code{.c}
770 #include <stdio.h>
771 #include <stdlib.h>
772 #include <math.h>
773 #include <string.h>
774 #include "utils.h"
775 #include "fold_vars.h"
776 #include "fold.h"
777 #include "part_func.h"
778 #include "inverse.h"
779 #include "RNAstruct.h"
780 #include "treedist.h"
781 #include "stringdist.h"
782 #include "profiledist.h"
783 
784 void main()
785 {
786  char *seq1="CGCAGGGAUACCCGCG", *seq2="GCGCCCAUAGGGACGC",
787  *struct1,* struct2,* xstruc;
788  float e1, e2, tree_dist, string_dist, profile_dist, kT;
789  Tree *T1, *T2;
790  swString *S1, *S2;
791  float *pf1, *pf2;
792  FLT_OR_DBL *bppm;
793  /* fold at 30C instead of the default 37C */
794  temperature = 30.; /* must be set *before* initializing */
795 
796  /* allocate memory for structure and fold */
797  struct1 = (char* ) space(sizeof(char)*(strlen(seq1)+1));
798  e1 = fold(seq1, struct1);
799 
800  struct2 = (char* ) space(sizeof(char)*(strlen(seq2)+1));
801  e2 = fold(seq2, struct2);
802 
803  free_arrays(); /* free arrays used in fold() */
804 
805  /* produce tree and string representations for comparison */
806  xstruc = expand_Full(struct1);
807  T1 = make_tree(xstruc);
808  S1 = Make_swString(xstruc);
809  free(xstruc);
810 
811  xstruc = expand_Full(struct2);
812  T2 = make_tree(xstruc);
813  S2 = Make_swString(xstruc);
814  free(xstruc);
815 
816  /* calculate tree edit distance and aligned structures with gaps */
817  edit_backtrack = 1;
818  tree_dist = tree_edit_distance(T1, T2);
819  free_tree(T1); free_tree(T2);
821  printf("%s\n%s %3.2f\n", aligned_line[0], aligned_line[1], tree_dist);
822 
823  /* same thing using string edit (alignment) distance */
824  string_dist = string_edit_distance(S1, S2);
825  free(S1); free(S2);
826  printf("%s mfe=%5.2f\n%s mfe=%5.2f dist=%3.2f\n",
827  aligned_line[0], e1, aligned_line[1], e2, string_dist);
828 
829  /* for longer sequences one should also set a scaling factor for
830  partition function folding, e.g: */
831  kT = (temperature+273.15)*1.98717/1000.; /* kT in kcal/mol */
832  pf_scale = exp(-e1/kT/strlen(seq1));
833 
834  /* calculate partition function and base pair probabilities */
835  e1 = pf_fold(seq1, struct1);
836  /* get the base pair probability matrix for the previous run of pf_fold() */
837  bppm = export_bppm();
838  pf1 = Make_bp_profile_bppm(bppm, strlen(seq1));
839 
840  e2 = pf_fold(seq2, struct2);
841  /* get the base pair probability matrix for the previous run of pf_fold() */
842  bppm = export_bppm();
843  pf2 = Make_bp_profile_bppm(bppm, strlen(seq2));
844 
845  free_pf_arrays(); /* free space allocated for pf_fold() */
846 
847  profile_dist = profile_edit_distance(pf1, pf2);
848  printf("%s free energy=%5.2f\n%s free energy=%5.2f dist=%3.2f\n",
849  aligned_line[0], e1, aligned_line[1], e2, profile_dist);
850 
851  free_profile(pf1); free_profile(pf2);
852 }
853 @endcode
854 
855 In a typical Unix environment you would compile this program using:
856 @verbatim
857 cc ${OPENMP_CFLAGS} -c example.c -I${hpath}
858 @endverbatim
859 and link using
860 @verbatim
861 cc ${OPENMP_CFLAGS} -o example -L${lpath} -lRNA -lm
862 @endverbatim
863 where @e ${hpath} and @e ${lpath} point to the location of the header
864 files and library, respectively.
865 @note As default, the RNAlib is compiled with build-in @e OpenMP multithreading
866 support. Thus, when linking your own object files to the library you have to pass
867 the compiler specific @e ${OPENMP_CFLAGS} (e.g. '-fopenmp' for @b gcc) even if your code does not
868 use openmp specific code. However, in that case the @e OpenMP flags may be ommited when compiling
869 example.c
870 
871 
872 **/
873 
Tree * make_tree(char *struc)
Constructs a Tree ( essentially the postorder list ) of the structure 'struc', for use in tree_edit_d...
Functions for String Alignment.
float string_edit_distance(swString *T1, swString *T2)
Calculate the string edit distance of T1 and T2.
int simple_xy_coordinates(short *pair_table, float *X, float *Y)
Calculate nucleotide coordinates for secondary structure plot the Simple way
char * get_line(FILE *fp)
Read a line of arbitrary length from a stream.
swString * Make_swString(char *string)
Convert a structure into a format suitable for string_edit_distance().
int hamming(const char *s1, const char *s2)
Calculate hamming distance between two sequences.
void init_rand(void)
Make random number seeds.
MFE calculations for single RNA sequences.
void free_tree(Tree *t)
Free the memory allocated for Tree t.
int edit_backtrack
Produce an alignment of the two structures being compared by tracing the editing path giving the mini...
float profile_edit_distance(const float *T1, const float *T2)
Align the 2 probability profiles T1, T2 .
void free_profile(float *T)
free space allocated in Make_bp_profile
char * expand_Full(const char *structure)
Convert the full structure from bracket notation to the expanded notation including root...
double urn(void)
get a random number from [0..1]
void * xrealloc(void *p, unsigned size)
Reallocate space safely.
int int_urn(int from, int to)
Generates a pseudo random integer in a specified range.
void unexpand_aligned_F(char *align[2])
Converts two aligned structures in expanded notation.
float fold(const char *sequence, char *structure)
Compute minimum free energy and an appropriate secondary structure of an RNA sequence.
void * space(unsigned size)
Allocate space safely.
Definition: dist_vars.h:46
int ssv_rna_plot(char *string, char *structure, char *ssfile)
Produce a secondary structure graph in SStructView format.
Functions for Tree Edit Distances.
int cost_matrix
Specify the cost matrix to be used for distance calculations.
General utility- and helper-functions used throughout the ViennaRNA Package.
Parsing and Coarse Graining of Structures.
float tree_edit_distance(Tree *T1, Tree *T2)
Calculates the edit distance of the two trees.
int rna_plot_type
Switch for changing the secondary structure layout algorithm.
short * make_pair_table(const char *structure)
Create a pair table of a secondary structure.
int PS_rna_plot(char *string, char *structure, char *file)
Produce a secondary structure graph in PostScript and write it to 'filename'.
unsigned int read_record(char **header, char **sequence, char ***rest, unsigned int options)
Get a data record from stdin.
float pf_fold(const char *sequence, char *structure)
Compute the partition function of an RNA sequence.
char * random_string(int l, const char symbols[])
Create a random string using characters from a specified symbol set.
FLT_OR_DBL * export_bppm(void)
Get a pointer to the base pair probability arrayAccessing the base pair probabilities for a pair (i...
this datastructure is used as input parameter in functions of PS_dot.h and others ...
Definition: data_structures.h:45
char * time_stamp(void)
Get a timestamp.
void warn_user(const char message[])
Print a warning message.
#define INF
Definition: energy_const.h:16
char * aligned_line[4]
Contains the two aligned structures after a call to one of the distance functions with edit_backtrack...
short * copy_pair_table(const short *pt)
Get an exact copy of a pair table.
void free_pf_arrays(void)
Free arrays for the partition function recursions.
void nrerror(const char message[])
Die with an error message.
Definition: dist_vars.h:41
int svg_rna_plot(char *string, char *structure, char *ssfile)
Produce a secondary structure plot in SVG format and write it to a file.
Partition function of single RNA sequences.
int PS_rna_plot_a(char *string, char *structure, char *file, char *pre, char *post)
Produce a secondary structure graph in PostScript including additional annotation macros and write it...
Inverse folding routines.
float * Make_bp_profile_bppm(FLT_OR_DBL *bppm, int length)
condense pair probability matrix into a vector containing probabilities for unpaired, upstream paired and downstream paired.
int gmlRNA(char *string, char *structure, char *ssfile, char option)
Produce a secondary structure graph in Graph Meta Language (gml) and write it to a file...
this datastructure is used as input parameter in functions of PS_dot.c
Definition: data_structures.h:55
void str_uppercase(char *sequence)
Convert an input sequence to uppercase.
double temperature
Rescale energy parameters to a temperature in degC.
char * unpack_structure(const char *packed)
Unpack secondary structure previously packed with pack_structure()
char * pack_structure(const char *struc)
Pack secondary secondary structure, 5:1 compression using base 3 encoding.
double pf_scale
A scaling factor used by pf_fold() to avoid overflows.
Here all all declarations of the global variables used throughout RNAlib.
int PS_dot_plot_list(char *seq, char *filename, plist *pl, plist *mf, char *comment)
Produce a postscript dot-plot from two pair lists.
void print_tty_input_seq(void)
Print a line to stdout that asks for an input sequence.
void str_DNA2RNA(char *sequence)
Convert a DNA input sequence to RNA alphabet.
unsigned short xsubi[3]
Current 48 bit random number.
void free_arrays(void)
Free arrays for mfe folding.
int xrna_plot(char *string, char *structure, char *ssfile)
Produce a secondary structure plot for further editing in XRNA.