Parsing, converting, comparing secondary structures

Overview

// typedefs

typedef struct vrna_hx_s vrna_hx_t
typedef struct vrna_elem_prob_s vrna_ep_t

// structs

struct vrna_elem_prob_s
struct vrna_hx_s

// global variables

int loop_size[STRUC]
int helix_size[STRUC]
int loop_degree[STRUC]
int loops
int unpaired
int pairs

// global functions

char* b2HIT (const char* structure)
char* b2C (const char* structure)
char* b2Shapiro (const char* structure)
char* add_root (const char* structure)
char* expand_Shapiro (const char* coarse)
char* expand_Full (const char* structure)
char* unexpand_Full (const char* ffull)
char* unweight (const char* wcoarse)
void unexpand_aligned_F (char* align [2])
void parse_structure (const char* structure)
char* vrna_db_pack (const char* struc)
char* vrna_db_unpack (const char* packed)
short* vrna_ptable (const char* structure)

short* vrna_ptable_from_string (
    const char* string,
    unsigned int options
    )

short* vrna_pt_pk_get (const char* structure)
short* vrna_ptable_copy (const short* pt)
short* vrna_pt_ali_get (const char* structure)
short* vrna_pt_snoop_get (const char* structure)
int* vrna_loopidx_from_ptable (const short* pt)

void vrna_db_flatten (
    char* structure,
    unsigned int options
    )

void vrna_db_flatten_to (
    char* string,
    const char target [3],
    unsigned int options
    )

char* vrna_db_from_ptable (short* pt)
char* vrna_db_from_WUSS (const char* wuss)

int vrna_bp_distance (
    const char* str1,
    const char* str2
    )

unsigned int* vrna_refBPcnt_matrix (
    const short* reference_pt,
    unsigned int turn
    )

unsigned int* vrna_refBPdist_matrix (
    const short* pt1,
    const short* pt2,
    unsigned int turn
    )

char* vrna_db_from_probs (
    const FLT_OR_DBL* pr,
    unsigned int length
    )

char vrna_bpp_symbol (const float* x)

char* vrna_db_from_bp_stack (
    vrna_bp_stack_t* bp,
    unsigned int length
    )

void vrna_letter_structure (
    char* structure,
    vrna_bp_stack_t* bp,
    unsigned int length
    )

vrna_ep_t* vrna_plist (
    const char* struc,
    float pr
    )

char* vrna_db_from_plist (
    vrna_ep_t* pairs,
    unsigned int n
    )

char* vrna_db_to_element_string (const char* structure)
vrna_hx_t* vrna_hx_from_ptable (short* pt)

vrna_hx_t* vrna_hx_merge (
    const vrna_hx_t* list,
    int maxdist
    )

void assign_plist_from_db (
    vrna_ep_t** pl,
    const char* struc,
    float pr
    )

char* pack_structure (const char* struc)
char* unpack_structure (const char* packed)
short* make_pair_table (const char* structure)
short* make_pair_table_pk (const char* structure)
short* copy_pair_table (const short* pt)
short* alimake_pair_table (const char* structure)
short* make_pair_table_snoop (const char* structure)
int* make_loop_index_pt (short* pt)

int bp_distance (
    const char* str1,
    const char* str2
    )

unsigned int* make_referenceBP_array (
    short* reference_pt,
    unsigned int turn
    )

unsigned int* compute_BPdifferences (
    short* pt1,
    short* pt2,
    unsigned int turn
    )

void parenthesis_structure (
    char* structure,
    vrna_bp_stack_t* bp,
    int length
    )

void parenthesis_zuker (
    char* structure,
    vrna_bp_stack_t* bp,
    int length
    )

void letter_structure (
    char* structure,
    vrna_bp_stack_t* bp,
    int length
    )

void bppm_to_structure (
    char* structure,
    FLT_OR_DBL* pr,
    unsigned int length
    )

char bppm_symbol (const float* x)

// macros

#define STRUC
#define VRNA_BRACKETS_ALPHA
#define VRNA_BRACKETS_ANG
#define VRNA_BRACKETS_CLY
#define VRNA_BRACKETS_DEFAULT
#define VRNA_BRACKETS_RND
#define VRNA_BRACKETS_SQR
#define VRNA_PLIST_TYPE_BASEPAIR
#define VRNA_PLIST_TYPE_GQUAD
#define VRNA_PLIST_TYPE_H_MOTIF
#define VRNA_PLIST_TYPE_I_MOTIF
#define VRNA_PLIST_TYPE_STACK
#define VRNA_PLIST_TYPE_UD_MOTIF

Detailed Documentation

Typedefs

typedef struct vrna_hx_s vrna_hx_t
Convenience typedef for data structure vrna_hx_s .
typedef struct vrna_elem_prob_s vrna_ep_t
Convenience typedef for data structure #vrna_pp_s.

Global Variables

int loop_size [STRUC]
contains a list of all loop sizes. loop_size[0] contains the number of external bases.
int helix_size [STRUC]
contains a list of all stack sizes.
int loop_degree [STRUC]
contains the corresponding list of loop degrees.
int loops
contains the number of loops ( and therefore of stacks ).
int unpaired
contains the number of unpaired bases.
int pairs
contains the number of base pairs in the last parsed structure.

Global Functions

char* b2HIT (const char* structure)
Converts the full structure from bracket notation to the HIT notation including root.

Parameters:

structure  

Returns:

char* b2C (const char* structure)
Converts the full structure from bracket notation to the a coarse grained notation using the ‘H’ ‘B’ ‘I’ ‘M’ and ‘R’ identifiers.

Parameters:

structure  

Returns:

char* b2Shapiro (const char* structure)
Converts the full structure from bracket notation to the weighted coarse grained notation using the ‘H’ ‘B’ ‘I’ ‘M’ ‘S’ ‘E’ and ‘R’ identifiers.

Parameters:

structure  

Returns:

char* add_root (const char* structure)
Adds a root to an un-rooted tree in any except bracket notation.

Parameters:

structure  

Returns:

char* expand_Shapiro (const char* coarse)
Inserts missing ‘S’ identifiers in unweighted coarse grained structures as obtained from b2C() .

Parameters:

coarse  

Returns:

char* expand_Full (const char* structure)
Convert the full structure from bracket notation to the expanded notation including root.

Parameters:

structure  

Returns:

char* unexpand_Full (const char* ffull)
Restores the bracket notation from an expanded full or HIT tree, that is any tree using only identifiers ‘U’ ‘P’ and ‘R’.

Parameters:

ffull  

Returns:

char* unweight (const char* wcoarse)
Strip weights from any weighted tree.

Parameters:

wcoarse  

Returns:

void unexpand_aligned_F (char* align [2])
Converts two aligned structures in expanded notation.

Takes two aligned structures as produced by tree_edit_distance() function back to bracket notation with ‘_’ as the gap character. The result overwrites the input.

Parameters:

align  
void parse_structure (const char* structure)
Collects a statistic of structure elements of the full structure in bracket notation.

The function writes to the following global variables: loop_size , loop_degree , helix_size , loops , pairs , unpaired

Parameters:

structure  

Returns:

char* vrna_db_pack (const char* struc)
Pack secondary secondary structure, 5:1 compression using base 3 encoding.

Returns a binary string encoding of the secondary structure using a 5:1 compression scheme. The string is NULL terminated and can therefore be used with standard string functions such as strcmp(). Useful for programs that need to keep many structures in memory.

Parameters:

struc The secondary structure in dot-bracket notation

Returns:

The binary encoded structure

See also:

vrna_db_unpack()

char* vrna_db_unpack (const char* packed)
Unpack secondary structure previously packed with vrna_db_pack()

Translate a compressed binary string produced by vrna_db_pack() back into the familiar dot-bracket notation.

Parameters:

packed The binary encoded packed secondary structure

Returns:

The unpacked secondary structure in dot-bracket notation

See also:

vrna_db_pack()

short* vrna_ptable (const char* structure)
Create a pair table of a secondary structure.

Returns a newly allocated table, such that table[i]=j if (i.j) pair or 0 if i is unpaired, table[0] contains the length of the structure.

Parameters:

structure The secondary structure in dot-bracket notation

Returns:

A pointer to the created pair_table

short* vrna_ptable_from_string (
    const char* string,
    unsigned int options
    )
Create a pair table for a secondary structure string.

This function takes an input string of a secondary structure annotation in Dot-Bracket Notation (a.k.a. Dot-Parenthesis Notation) or Extended Dot-Bracket Notation , and converts it into a pair table representation.

Parameters:

string Secondary structure in Extended Dot-Bracket Notation
options A bitmask to specify which brackets are recognized during conversion to pair table

Returns:

A pointer to a new pair table of the provided secondary structure

short* vrna_pt_pk_get (const char* structure)
Create a pair table of a secondary structure (pseudo-knot version)

Returns a newly allocated table, such that table[i]=j if (i.j) pair or 0 if i is unpaired, table[0] contains the length of the structure.

In contrast to vrna_ptable() this function also recognizes the base pairs denoted by ‘[‘ and ‘]’ brackets.

Parameters:

structure The secondary structure in (extended) dot-bracket notation

Returns:

A pointer to the created pair_table

short* vrna_ptable_copy (const short* pt)
Get an exact copy of a pair table.

Parameters:

pt The pair table to be copied

Returns:

A pointer to the copy of ‘pt’

short* vrna_pt_ali_get (const char* structure)
Create a pair table of a secondary structure (snoop align version)
short* vrna_pt_snoop_get (const char* structure)
Create a pair table of a secondary structure (snoop version)
returns a newly allocated table, such that: table[i]=j if (i.j) pair or 0 if i is unpaired, table[0] contains the length of the structure. The special pseudoknotted H/ACA-mRNA structure is taken into account.
int* vrna_loopidx_from_ptable (const short* pt)
Get a loop index representation of a structure.
void vrna_db_flatten (
    char* structure,
    unsigned int options
    )
Substitute pairs of brackets in a string with parenthesis.

This function can be used to replace brackets of unusual types, such as angular brackets <> , to dot-bracket format. The options parameter is used tpo specify which types of brackets will be replaced by round parenthesis () .

SWIG Wrapper Notes This function flattens an input structure string in-place! The second parameter is optional and defaults to VRNA_BRACKETS_DEFAULT .

An overloaded version of this function exists, where an additional second parameter can be passed to specify the target brackets, i.e. the type of matching pair characters all brackets will be flattened to. Therefore, in the scripting language interface this function is a replacement for vrna_db_flatten_to() .

Parameters:

structure The structure string where brackets are flattened in-place
options A bitmask to specify which types of brackets should be flattened out
void vrna_db_flatten_to (
    char* string,
    const char target [3],
    unsigned int options
    )
Substitute pairs of brackets in a string with another type of pair characters.

This function can be used to replace brackets in a structure annotation string, such as square brackets [] , to another type of pair characters, e.g. angular brackets <> .

The target array must contain a character for the ‘pair open’ annotation at position 0, and one for ‘pair close’ at position 1. T options parameter is used to specify which types of brackets will be replaced by the new pairs.

SWIG Wrapper Notes This function is available as an overloaded version of vrna_db_flatten()

Parameters:

structure The structure string where brackets are flattened in-place
target The new pair characters the string will be flattened to
options A bitmask to specify which types of brackets should be flattened out
char* vrna_db_from_ptable (short* pt)
Convert a pair table into dot-parenthesis notation.

Parameters:

pt The pair table to be copied

Returns:

A char pointer to the dot-bracket string

char* vrna_db_from_WUSS (const char* wuss)
Convert a WUSS annotation string to dot-bracket format.

Parameters:

wuss The input string in WUSS notation

Returns:

A dot-bracket notation of the input secondary structure

Note

This function flattens all brackets, and treats pseudo-knots annotated by matching pairs of upper/lowercase letters as unpaired nucleotides

int vrna_bp_distance (
    const char* str1,
    const char* str2
    )
Compute the “base pair” distance between two secondary structures s1 and s2.

The sequences should have the same length. dist = number of base pairs in one structure but not in the other same as edit distance with open-pair close-pair as move-set

Parameters:

str1 First structure in dot-bracket notation
str2 Second structure in dot-bracket notation

Returns:

The base pair distance between str1 and str2

unsigned int* vrna_refBPcnt_matrix (
    const short* reference_pt,
    unsigned int turn
    )
Make a reference base pair count matrix.
Get an upper triangular matrix containing the number of basepairs of a reference structure for each interval [i,j] with i<j. Access it via iindx!!!
unsigned int* vrna_refBPdist_matrix (
    const short* pt1,
    const short* pt2,
    unsigned int turn
    )
Make a reference base pair distance matrix.
Get an upper triangular matrix containing the base pair distance of two reference structures for each interval [i,j] with i<j. Access it via iindx!!!
char* vrna_db_from_probs (
    const FLT_OR_DBL* pr,
    unsigned int length
    )
Create a dot-bracket like structure string from base pair probability matrix.
char vrna_bpp_symbol (const float* x)
Get a pseudo dot bracket notation for a given probability information.
char* vrna_db_from_bp_stack (
    vrna_bp_stack_t* bp,
    unsigned int length
    )
Create a dot-backet/parenthesis structure from backtracking stack.

This function is capable to create dot-bracket structures from suboptimal structure prediction sensu M. Zuker

Parameters:

bp Base pair stack containing the traced base pairs
length The length of the structure

Returns:

The secondary structure in dot-bracket notation as provided in the input

vrna_ep_t* vrna_plist (
    const char* struc,
    float pr
    )
Create a vrna_ep_t from a dot-bracket string.

The dot-bracket string is parsed and for each base pair an entry in the plist is created. The probability of each pair in the list is set by a function parameter.

The end of the plist is marked by sequence positions i as well as j equal to 0. This condition should be used to stop looping over its entries

Parameters:

struc The secondary structure in dot-bracket notation
pr The probability for each base pair used in the plist

Returns:

The plist array

char* vrna_db_from_plist (
    vrna_ep_t* pairs,
    unsigned int n
    )
Convert a list of base pairs into dot-bracket notation.

Parameters:

pairs A vrna_ep_t containing the pairs to be included in the dot-bracket string
n The length of the structure (number of nucleotides)

Returns:

The dot-bracket string containing the provided base pairs

See also:

vrna_plist()

void assign_plist_from_db (
    vrna_ep_t** pl,
    const char* struc,
    float pr
    )
Create a vrna_ep_t from a dot-bracket string.

The dot-bracket string is parsed and for each base pair an entry in the plist is created. The probability of each pair in the list is set by a function parameter.

The end of the plist is marked by sequence positions i as well as j equal to 0. This condition should be used to stop looping over its entries

Deprecated Use vrna_plist() instead

Parameters:

pl A pointer to the vrna_ep_t that is to be created
struc The secondary structure in dot-bracket notation
pr The probability for each base pair
char* pack_structure (const char* struc)
Pack secondary secondary structure, 5:1 compression using base 3 encoding.

Returns a binary string encoding of the secondary structure using a 5:1 compression scheme. The string is NULL terminated and can therefore be used with standard string functions such as strcmp(). Useful for programs that need to keep many structures in memory.

Deprecated Use vrna_db_pack() as a replacement

Parameters:

struc The secondary structure in dot-bracket notation

Returns:

The binary encoded structure

char* unpack_structure (const char* packed)
Unpack secondary structure previously packed with pack_structure()

Translate a compressed binary string produced by pack_structure() back into the familiar dot-bracket notation.

Deprecated Use vrna_db_unpack() as a replacement

Parameters:

packed The binary encoded packed secondary structure

Returns:

The unpacked secondary structure in dot-bracket notation

short* make_pair_table (const char* structure)
Create a pair table of a secondary structure.

Returns a newly allocated table, such that table[i]=j if (i.j) pair or 0 if i is unpaired, table[0] contains the length of the structure.

Deprecated Use vrna_ptable() instead

Parameters:

structure The secondary structure in dot-bracket notation

Returns:

A pointer to the created pair_table

short* copy_pair_table (const short* pt)
Get an exact copy of a pair table.

Deprecated Use vrna_ptable_copy() instead

Parameters:

pt The pair table to be copied

Returns:

A pointer to the copy of ‘pt’

short* alimake_pair_table (const char* structure)

Pair table for snoop align

Deprecated Use vrna_pt_ali_get() instead!

short* make_pair_table_snoop (const char* structure)
returns a newly allocated table, such that: table[i]=j if (i.j) pair or 0 if i is unpaired, table[0] contains the length of the structure. The special pseudoknotted H/ACA-mRNA structure is taken into account. Deprecated Use vrna_pt_snoop_get() instead!
int bp_distance (
    const char* str1,
    const char* str2
    )
Compute the “base pair” distance between two secondary structures s1 and s2.

The sequences should have the same length. dist = number of base pairs in one structure but not in the other same as edit distance with open-pair close-pair as move-set

Deprecated Use vrna_bp_distance instead

Parameters:

str1 First structure in dot-bracket notation
str2 Second structure in dot-bracket notation

Returns:

The base pair distance between str1 and str2

unsigned int* make_referenceBP_array (
    short* reference_pt,
    unsigned int turn
    )
Make a reference base pair count matrix.

Get an upper triangular matrix containing the number of basepairs of a reference structure for each interval [i,j] with i<j. Access it via iindx!!!

Deprecated Use vrna_refBPcnt_matrix() instead

unsigned int* compute_BPdifferences (
    short* pt1,
    short* pt2,
    unsigned int turn
    )
Make a reference base pair distance matrix.

Get an upper triangular matrix containing the base pair distance of two reference structures for each interval [i,j] with i<j. Access it via iindx!!!

Deprecated Use vrna_refBPdist_matrix() instead

void parenthesis_structure (
    char* structure,
    vrna_bp_stack_t* bp,
    int length
    )
Create a dot-backet/parenthesis structure from backtracking stack.

Deprecated use vrna_parenthesis_structure() instead

Note

This function is threadsafe

void parenthesis_zuker (
    char* structure,
    vrna_bp_stack_t* bp,
    int length
    )
Create a dot-backet/parenthesis structure from backtracking stack obtained by zuker suboptimal calculation in cofold.c.

Deprecated use vrna_parenthesis_zuker instead

Note

This function is threadsafe

void bppm_to_structure (
    char* structure,
    FLT_OR_DBL* pr,
    unsigned int length
    )
Create a dot-bracket like structure string from base pair probability matrix.
Deprecated Use vrna_db_from_probs() instead!
char bppm_symbol (const float* x)
Get a pseudo dot bracket notation for a given probability information.
Deprecated Use vrna_bpp_symbol() instead!

Macros

#define VRNA_BRACKETS_ALPHA
Bitflag to indicate secondary structure notations using uppercase/lowercase letters from the latin alphabet.
#define VRNA_BRACKETS_ANG
Bitflag to indicate secondary structure notations using angular brackets, <>
#define VRNA_BRACKETS_CLY
Bitflag to indicate secondary structure notations using curly brackets, {}
#define VRNA_BRACKETS_DEFAULT
Default bitmask to indicate secondary structure notation using any pair of brackets.
#define VRNA_BRACKETS_RND
Bitflag to indicate secondary structure notations using round brackets (parenthesis), ()
#define VRNA_BRACKETS_SQR
Bitflag to indicate secondary structure notations using square brackets, []