Utilities to deal with Nucleotide Alphabets

Functions to cope with various aspects related to the nucleotide sequence alphabet.


#include <ViennaRNA/sequence.h>
#include <ViennaRNA/sequence.h>


typedef struct vrna_sequence_s vrna_seq_t
#include <ViennaRNA/sequence.h>

Typename for nucleotide sequence representation data structure vrna_sequence_s.

typedef struct vrna_alignment_s vrna_msa_t
#include <ViennaRNA/sequence.h>


enum vrna_seq_type_e

A enumerator used in vrna_sequence_s to distinguish different nucleotide sequences.



Nucleotide sequence represents an Unkown type.

enumerator VRNA_SEQ_RNA

Nucleotide sequence represents an RNA type.

enumerator VRNA_SEQ_DNA

Nucleotide sequence represents a DNA type.


unsigned int vrna_sequence_length_max(unsigned int options)
#include <ViennaRNA/alphabet.h>
int vrna_nucleotide_IUPAC_identity(char a, char b)
#include <ViennaRNA/alphabet.h>
void vrna_ptypes_prepare(vrna_fold_compound_t *fc, unsigned int options)
#include <ViennaRNA/alphabet.h>
char *vrna_ptypes(const short *S, vrna_md_t *md)
#include <ViennaRNA/alphabet.h>

Get an array of the numerical encoding for each possible base pair (i,j)


This array is always indexed in column-wise order, in contrast to previously different indexing between mfe and pf variants!

short *vrna_seq_encode(const char *sequence, vrna_md_t *md)
#include <ViennaRNA/alphabet.h>

Get a numerical representation of the nucleotide sequence.

SWIG Wrapper Notes:

In the target scripting language, this function is wrapped as overloaded function seq_encode() where the last parameter, the model_details data structure, is optional. If it is omitted, default model settings are applied, i.e. default nucleotide letter conversion. The wrapped function returns a list/tuple of integer representations of the input sequence. See, e.g. RNA.seq_encode() in the Python API.

  • sequence – The input sequence in upper-case letters

  • md – A pointer to a vrna_md_t data structure that specifies the conversion type


A list of integer encodings for each sequence letter (1-based). Position 0 denotes the length of the list

short *vrna_seq_encode_simple(const char *sequence, vrna_md_t *md)
#include <ViennaRNA/alphabet.h>

Get a numerical representation of the nucleotide sequence (simple version)

int vrna_nucleotide_encode(char c, vrna_md_t *md)
#include <ViennaRNA/alphabet.h>

Encode a nucleotide character to numerical value.

This function encodes a nucleotide character to its numerical representation as required by many functions in RNAlib.

  • c – The nucleotide character to encode

  • md – The model details that determine the kind of encoding


The encoded nucleotide

char vrna_nucleotide_decode(int enc, vrna_md_t *md)
#include <ViennaRNA/alphabet.h>

Decode a numerical representation of a nucleotide back into nucleotide alphabet.

This function decodes a numerical representation of a nucleotide character back into nucleotide alphabet

  • enc – The encoded nucleotide

  • md – The model details that determine the kind of decoding


The decoded nucleotide character

void vrna_aln_encode(const char *sequence, short **S_p, short **s5_p, short **s3_p, char **ss_p, unsigned int **as_p, vrna_md_t *md)
#include <ViennaRNA/alphabet.h>
unsigned int vrna_get_ptype_md(int i, int j, vrna_md_t *md)
#include <ViennaRNA/alphabet.h>
unsigned int vrna_get_ptype(int ij, char *ptype)
#include <ViennaRNA/alphabet.h>
unsigned int vrna_get_ptype_window(int i, int j, char **ptype)
#include <ViennaRNA/alphabet.h>
vrna_seq_t *vrna_sequence(const char *string, unsigned int options)
#include <ViennaRNA/sequence.h>
int vrna_sequence_add(vrna_fold_compound_t *fc, const char *string, unsigned int options)
#include <ViennaRNA/sequence.h>
int vrna_sequence_remove(vrna_fold_compound_t *fc, unsigned int i)
#include <ViennaRNA/sequence.h>
void vrna_sequence_remove_all(vrna_fold_compound_t *fc)
#include <ViennaRNA/sequence.h>
void vrna_sequence_prepare(vrna_fold_compound_t *fc)
#include <ViennaRNA/sequence.h>
int vrna_sequence_order_update(vrna_fold_compound_t *fc, const unsigned int *order)
#include <ViennaRNA/sequence.h>
int vrna_msa_add(vrna_fold_compound_t *fc, const char **alignment, const char **names, const unsigned char *orientation, const unsigned long long *start, const unsigned long long *genome_size, unsigned int options)
#include <ViennaRNA/sequence.h>
struct vrna_sequence_s
#include <ViennaRNA/sequence.h>

Data structure representing a nucleotide sequence.

Public Members

vrna_seq_type_e type

The type of sequence.

char *name
char *string

The string representation of the sequence.

short *encoding

The integer representation of the sequence.

short *encoding5
short *encoding3
unsigned int length

The length of the sequence.

struct vrna_alignment_s

Public Members

unsigned int n_seq
vrna_seq_t *sequences
char **gapfree_seq
unsigned int *gapfree_size
unsigned long long *genome_size
unsigned long long *start
unsigned char *orientation
unsigned int **a2s