Utilities for sequence alignments

Overview

// typedefs

typedef struct vrna_pinfo_s vrna_pinfo_t
typedef struct vrna_pinfo_s pair_info

// structs

struct vrna_pinfo_s

// global functions

int vrna_aln_mpi (const char** alignment)

vrna_pinfo_t* vrna_aln_pinfo (
    vrna_fold_compound_t* vc,
    const char* structure,
    double threshold
    )

int* vrna_aln_pscore (
    const char** alignment,
    vrna_md_t* md
    )

char** vrna_aln_slice (
    const char** alignment,
    unsigned int i,
    unsigned int j
    )

void vrna_aln_free (char** alignment)
char** vrna_aln_uppercase (const char** alignment)
char** vrna_aln_toRNA (const char** alignment)

char** vrna_aln_copy (
    const char** alignment,
    unsigned int options
    )

float* vrna_aln_conservation_struct (
    const char** alignment,
    const char* structure,
    const vrna_md_t* md
    )

float* vrna_aln_conservation_col (
    const char** alignment,
    const vrna_md_t* md_p,
    unsigned int options
    )

int read_clustal (
    FILE* clust,
    char* AlignedSeqs [],
    char* names []
    )

char* consensus (const char* AS [])
char* consens_mis (const char* AS [])
char* get_ungapped_sequence (const char* seq)

int get_mpi (
    char* Alseq [],
    int n_seq,
    int length,
    int* mini
    )

void encode_ali_sequence (
    const char* sequence,
    short* S,
    short* s5,
    short* s3,
    char* ss,
    unsigned short* as,
    int circ
    )

void alloc_sequence_arrays (
    const char** sequences,
    short*** S,
    short*** S5,
    short*** S3,
    unsigned short*** a2s,
    char*** Ss,
    int circ
    )

void free_sequence_arrays (
    unsigned int n_seq,
    short*** S,
    short*** S5,
    short*** S3,
    unsigned short*** a2s,
    char*** Ss
    )

// macros

#define VRNA_ALN_DEFAULT
#define VRNA_ALN_DNA
#define VRNA_ALN_LOWERCASE
#define VRNA_ALN_RNA
#define VRNA_ALN_UPPERCASE
#define VRNA_MEASURE_SHANNON_ENTROPY

Detailed Documentation

Typedefs

typedef struct vrna_pinfo_s vrna_pinfo_t
Typename for the base pair info repesenting data structure vrna_pinfo_s .
typedef struct vrna_pinfo_s pair_info
Old typename of vrna_pinfo_s .
Deprecated Use vrna_pinfo_t instead!

Global Functions

int vrna_aln_mpi (const char** alignment)
Get the mean pairwise identity in steps from ?to?(ident)

Parameters:

alignment Aligned sequences

Returns:

The mean pairwise identity

vrna_pinfo_t* vrna_aln_pinfo (
    vrna_fold_compound_t* vc,
    const char* structure,
    double threshold
    )
Retrieve an array of vrna_pinfo_t structures from precomputed pair probabilities.

This array of structures contains information about positionwise pair probabilies, base pair entropy and more

Parameters:

vc The vrna_fold_compound_t of type VRNA_FC_TYPE_COMPARATIVE with precomputed partition function matrices
structure An optional structure in dot-bracket notation (Maybe NULL)
threshold Do not include results with pair probabilities below threshold

Returns:

The vrna_pinfo_t array

See also:

vrna_pinfo_t , and vrna_pf()

char** vrna_aln_slice (
    const char** alignment,
    unsigned int i,
    unsigned int j
    )
Slice out a subalignment from a larger alignment.

Parameters:

alignment The input alignment
i The first column of the subalignment (1-based)
j The last column of the subalignment (1-based)

Returns:

The subalignment between column \(i\) and \(j\)

Note

The user is responsible to free the memory occupied by the returned subalignment

See also:

vrna_aln_free()

void vrna_aln_free (char** alignment)
Free memory occupied by a set of aligned sequences.

Parameters:

alignment The input alignment
char** vrna_aln_uppercase (const char** alignment)
Create a copy of an alignment with only uppercase letters in the sequences.

Parameters:

alignment The input sequence alignment (last entry must be NULL terminated)

Returns:

A copy of the input alignment where lowercase sequence letters are replaced by uppercase letters

See also:

vrna_aln_copy

char** vrna_aln_toRNA (const char** alignment)
Create a copy of an alignment where DNA alphabet is replaced by RNA alphabet.

Parameters:

alignment The input sequence alignment (last entry must be NULL terminated)

Returns:

A copy of the input alignment where DNA alphabet is replaced by RNA alphabet (T -> U)

See also:

vrna_aln_copy

char** vrna_aln_copy (
    const char** alignment,
    unsigned int options
    )
Make a copy of a multiple sequence alignment.

This function allows one to create a copy of a multiple sequence alignment. The options parameter additionally allows for sequence manipulation, such as converting DNA to RNA alphabet, and conversion to uppercase letters.

Parameters:

alignment The input sequence alignment (last entry must be NULL terminated)
options Option flags indicating whether the aligned sequences should be converted

Returns:

A (manipulated) copy of the input alignment

float* vrna_aln_conservation_struct (
    const char** alignment,
    const char* structure,
    const vrna_md_t* md
    )
Compute base pair conservation of a consensus structure.

This function computes the base pair conservation (fraction of canonical base pairs) of a consensus structure given a multiple sequence alignment. The base pair types that are considered canonical may be specified using the #vrna_md_t.pairs array. Passing NULL as parameter md results in default pairing rules, i.e. canonical Watson-Crick and GU Wobble pairs.

SWIG Wrapper Notes This function is available in an overloaded form where the last parameter may be omitted, indicating md = NULL

Parameters:

alignment The input sequence alignment (last entry must be NULL terminated)
structure The consensus structure in dot-bracket notation
md Model details that specify compatible base pairs (Maybe NULL )

Returns:

A 1-based vector of base pair conservations

float* vrna_aln_conservation_col (
    const char** alignment,
    const vrna_md_t* md_p,
    unsigned int options
    )
Compute nucleotide conservation in an alignment.

This function computes the conservation of nucleotides in alignment columns. The simples measure is Shannon Entropy and can be selected by passing the VRNA_MEASURE_SHANNON_ENTROPY flag in the options parameter.

SWIG Wrapper Notes This function is available in an overloaded form where the last two parameters may be omitted, indicating md = NULL , and options = VRNA_MEASURE_SHANNON_ENTROPY , respectively.

Parameters:

alignment The input sequence alignment (last entry must be NULL terminated)
md Model details that specify known nucleotides (Maybe NULL )
options A flag indicating which measure of conservation should be applied

Returns:

A 1-based vector of column conservations

Note

Currently, only VRNA_MEASURE_SHANNON_ENTROPY is supported as conservation measure.

int get_mpi (
    char* Alseq [],
    int n_seq,
    int length,
    int* mini
    )
Get the mean pairwise identity in steps from ?to?(ident)

Deprecated Use vrna_aln_mpi() as a replacement

Parameters:

Alseq  
n_seq The number of sequences in the alignment
length The length of the alignment
mini  

Returns:

The mean pairwise identity

void encode_ali_sequence (
    const char* sequence,
    short* S,
    short* s5,
    short* s3,
    char* ss,
    unsigned short* as,
    int circ
    )
Get arrays with encoded sequence of the alignment.

this function assumes that in S, S5, s3, ss and as enough space is already allocated (size must be at least sequence length+2)

Parameters:

sequence The gapped sequence from the alignment
S pointer to an array that holds encoded sequence
s5 pointer to an array that holds the next base 5’ of alignment position i
s3 pointer to an array that holds the next base 3’ of alignment position i
ss  
as  
circ assume the molecules to be circular instead of linear (circ=0)
void alloc_sequence_arrays (
    const char** sequences,
    short*** S,
    short*** S5,
    short*** S3,
    unsigned short*** a2s,
    char*** Ss,
    int circ
    )
Allocate memory for sequence array used to deal with aligned sequences.

Note that these arrays will also be initialized according to the sequence alignment given

Parameters:

sequences The aligned sequences
S A pointer to the array of encoded sequences
S5 A pointer to the array that contains the next 5’ nucleotide of a sequence position
S3 A pointer to the array that contains the next 3’ nucleotide of a sequence position
a2s A pointer to the array that contains the alignment to sequence position mapping
Ss A pointer to the array that contains the ungapped sequence
circ assume the molecules to be circular instead of linear (circ=0)
void free_sequence_arrays (
    unsigned int n_seq,
    short*** S,
    short*** S5,
    short*** S3,
    unsigned short*** a2s,
    char*** Ss
    )
Free the memory of the sequence arrays used to deal with aligned sequences.

This function frees the memory previously allocated with alloc_sequence_arrays()

Parameters:

n_seq The number of aligned sequences
S A pointer to the array of encoded sequences
S5 A pointer to the array that contains the next 5’ nucleotide of a sequence position
S3 A pointer to the array that contains the next 3’ nucleotide of a sequence position
a2s A pointer to the array that contains the alignment to sequence position mapping
Ss A pointer to the array that contains the ungapped sequence

Macros

#define VRNA_MEASURE_SHANNON_ENTROPY
Flag indicating Shannon Entropy measure.
Shannon Entropy is defined as :math:` H = - sum_c p_c cdot log_2 p_c `