Python Examples
MFE Prediction (flat interface)
import RNA
# The RNA sequence
seq = "GAGUAGUGGAACCAGGCUAUGUUUGUGACUCGCAGACUAACA"
# compute minimum free energy (MFE) and corresponding structure
(ss, mfe) = RNA.fold(seq)
# print output
print("{}\n{} [ {:6.2f} ]".format(seq, ss, mfe))
MFE Prediction (object oriented interface)
import RNA;
sequence = "CGCAGGGAUACCCGCG"
# create new fold_compound object
fc = RNA.fold_compound(sequence)
# compute minimum free energy (mfe) and corresponding structure
(ss, mfe) = fc.mfe()
# print output
print("{} [ {:6.2f} ]".format(ss, mfe))
Suboptimal Structure Prediction
import RNA
sequence = "GGGGAAAACCCC"
# Set global switch for unique ML decomposition
RNA.cvar.uniq_ML = 1
subopt_data = { 'counter' : 1, 'sequence' : sequence }
# Print a subopt result as FASTA record
def print_subopt_result(structure, energy, data):
if not structure == None:
print(">subopt {:d}".format(data['counter']))
print("{}\n{} [{:6.2f}]".format(data['sequence'], structure, energy))
# increase structure counter
data['counter'] = data['counter'] + 1
# Create a 'fold_compound' for our sequence
a = RNA.fold_compound(sequence)
# Enumerate all structures 500 dacal/mol = 5 kcal/mol arround
# the MFE and print each structure using the function above
a.subopt_cb(500, print_subopt_result, subopt_data);
Boltzmann Sampling
a.k.a. Probabilistic Backtracing
import RNA
sequence = "UGGGAAUAGUCUCUUCCGAGUCUCGCGGGCGACGGGCGAUCUUCGAAAGUGGAAUCCGUACUUAUACCGCCUGUGCGGACUACUAUCCUGACCACAUAGU"
def store_structure(s, data):
"""
A simple callback function that stores
a structure sample into a list
"""
if s:
data.append(s)
"""
First we prepare a fold_compound object
"""
# create model details
md = RNA.md()
# activate unique multibranch loop decomposition
md.uniq_ML = 1
# create fold compound object
fc = RNA.fold_compound(sequence, md)
# compute MFE
(ss, mfe) = fc.mfe()
# rescale Boltzmann factors according to MFE
fc.exp_params_rescale(mfe)
# compute partition function to fill DP matrices
fc.pf()
"""
Now we are ready to perform Boltzmann sampling
"""
# 1. backtrace a single sub-structure of length 10
print("{}".format(fc.pbacktrack5(10)))
# 2. backtrace a single sub-structure of length 50
print("{}".format(fc.pbacktrack5(50)))
# 3. backtrace multiple sub-structures of length 10 at once
for s in fc.pbacktrack5(20, 10):
print("{}".format(s))
# 4. backtrace multiple sub-structures of length 50 at once
for s in fc.pbacktrack5(100, 50):
print("{}".format(s))
# 5. backtrace a single structure (full length)
print("{}".format(fc.pbacktrack()))
# 6. backtrace multiple structures at once
for s in fc.pbacktrack(100):
print("{}".format(s))
# 7. backtrace multiple structures non-redundantly
for s in fc.pbacktrack(100, RNA.PBACKTRACK_NON_REDUNDANT):
print("{}".format(s))
# 8. backtrace multiple structures non-redundantly (with resume option)
num_samples = 500
iterations = 15
d = None # pbacktrack memory object
s_list = []
for i in range(0, iterations):
d, ss = fc.pbacktrack(num_samples, d, RNA.PBACKTRACK_NON_REDUNDANT)
s_list = s_list + list(ss)
for s in s_list:
print("{}".format(s))
# 9. backtrace multiple sub-structures of length 50 in callback mode
ss = []
i = fc.pbacktrack5(100, 50, store_structure, ss)
for s in ss:
print("{}".format(s))
# 10. backtrace multiple full-length structures in callback mode
ss = list()
i = fc.pbacktrack(100, store_structure, ss)
for s in ss:
print("{}".format(s))
# 11. non-redundantly backtrace multiple full-length structures in callback mode
ss = list()
i = fc.pbacktrack(100, store_structure, ss, RNA.PBACKTRACK_NON_REDUNDANT)
for s in ss:
print("{}".format(s))
# 12. non-redundantly backtrace multiple full length structures
# in callback mode with resume option
ss = []
d = None # pbacktrack memory object
for i in range(0, iterations):
d, i = fc.pbacktrack(num_samples, store_structure, ss, d, RNA.PBACKTRACK_NON_REDUNDANT)
for s in ss:
print("{}".format(s))
# 13. backtrace a single substructure from the sequence interval [10:50]
print("{}".format(fc.pbacktrack_sub(10, 50)))
# 14. backtrace multiple substructures from the sequence interval [10:50]
for s in fc.pbacktrack_sub(100, 10, 50):
print("{}".format(s))
# 15. backtrace multiple substructures from the sequence interval [10:50] non-redundantly
for s in fc.pbacktrack_sub(100, 10, 50, RNA.PBACKTRACK_NON_REDUNDANT):
print("{}".format(s))
RNAfold -p MEA equivalent
#!/usr/bin/python
#
import RNA
seq = "AUUUCCACUAGAGAAGGUCUAGAGUGUUUGUCGUUUGUCAGAAGUCCCUAUUCCAGGUACGAACACGGUGGAUAUGUUCGACGACAGGAUCGGCGCACUA"
# create fold_compound data structure (required for all subsequently applied algorithms)
fc = RNA.fold_compound(seq)
# compute MFE and MFE structure
(mfe_struct, mfe) = fc.mfe()
# rescale Boltzmann factors for partition function computation
fc.exp_params_rescale(mfe)
# compute partition function
(pp, pf) = fc.pf()
# compute centroid structure
(centroid_struct, dist) = fc.centroid()
# compute free energy of centroid structure
centroid_en = fc.eval_structure(centroid_struct)
# compute MEA structure
(MEA_struct, MEA) = fc.MEA()
# compute free energy of MEA structure
MEA_en = fc.eval_structure(MEA_struct)
# print everything like RNAfold -p --MEA
print("{}\n{} ({:6.2f})".format(seq, mfe_struct, mfe))
print("{} [{:6.2f}]".format(pp, pf))
print("{} {{{:6.2f} d={:.2f}}}".format(centroid_struct, centroid_en, dist))
print("{} {{{:6.2f} MEA={:.2f}}}".format(MEA_struct, MEA_en, MEA))
print(" frequency of mfe structure in ensemble {:g}; ensemble diversity {:-6.2f}".format(fc.pr_structure(mfe_struct), fc.mean_bp_distance()))
MFE Consensus Structure Prediction
import RNA
# The RNA sequence alignment
sequences = [
"CUGCCUCACAACGUUUGUGCCUCAGUUACCCGUAGAUGUAGUGAGGGU",
"CUGCCUCACAACAUUUGUGCCUCAGUUACUCAUAGAUGUAGUGAGGGU",
"---CUCGACACCACU---GCCUCGGUUACCCAUCGGUGCAGUGCGGGU"
]
# compute the consensus sequence
cons = RNA.consensus(sequences)
# predict Minmum Free Energy and corresponding secondary structure
(ss, mfe) = RNA.alifold(sequences);
# print output
print("{}\n{} [ {:6.2f} ]".format(cons, ss, mfe))
MFE Prediction (deviating from default settings)
import RNA
# The RNA sequence
seq = "GAGUAGUGGAACCAGGCUAUGUUUGUGACUCGCAGACUAACA"
# create a new model details structure
md = RNA.md()
# change temperature and dangle model
md.temperature = 20.0 # 20 Deg Celcius
md.dangles = 1 # Dangle Model 1
# create a fold compound
fc = RNA.fold_compound(seq, md)
# predict Minmum Free Energy and corresponding secondary structure
(ss, mfe) = fc.mfe()
# print sequence, structure and MFE
print("{}\n{} [ {:6.2f} ]".format(seq, ss, mfe))
Fun with Soft Constraints
import RNA
seq1 = "CUCGUCGCCUUAAUCCAGUGCGGGCGCUAGACAUCUAGUUAUCGCCGCAA"
# Turn-off dangles globally
RNA.cvar.dangles = 0
# Data structure that will be passed to our MaximumMatching() callback with two components:
# 1. a 'dummy' fold_compound to evaluate loop energies w/o constraints, 2. a fresh set of energy parameters
mm_data = { 'dummy': RNA.fold_compound(seq1), 'params': RNA.param() }
# Nearest Neighbor Parameter reversal functions
revert_NN = {
RNA.DECOMP_PAIR_HP: lambda i, j, k, l, f, p: - f.eval_hp_loop(i, j) - 100,
RNA.DECOMP_PAIR_IL: lambda i, j, k, l, f, p: - f.eval_int_loop(i, j, k, l) - 100,
RNA.DECOMP_PAIR_ML: lambda i, j, k, l, f, p: - p.MLclosing - p.MLintern[0] - (j - i - k + l - 2) * p.MLbase - 100,
RNA.DECOMP_ML_ML_STEM: lambda i, j, k, l, f, p: - p.MLintern[0] - (l - k - 1) * p.MLbase,
RNA.DECOMP_ML_STEM: lambda i, j, k, l, f, p: - p.MLintern[0] - (j - i - k + l) * p.MLbase,
RNA.DECOMP_ML_ML: lambda i, j, k, l, f, p: - (j - i - k + l) * p.MLbase,
RNA.DECOMP_ML_ML_ML: lambda i, j, k, l, f, p: 0,
RNA.DECOMP_ML_UP: lambda i, j, k, l, f, p: - (j - i + 1) * p.MLbase,
RNA.DECOMP_EXT_STEM: lambda i, j, k, l, f, p: - f.eval_ext_stem(k, l),
RNA.DECOMP_EXT_EXT: lambda i, j, k, l, f, p: 0,
RNA.DECOMP_EXT_STEM_EXT: lambda i, j, k, l, f, p: - f.eval_ext_stem(i, k),
RNA.DECOMP_EXT_EXT_STEM: lambda i, j, k, l, f, p: - f.eval_ext_stem(l, j),
}
# Maximum Matching callback function (will be called by RNAlib in each decomposition step)
def MaximumMatching(i, j, k, l, d, data):
return revert_NN[d](i, j, k, l, data['dummy'], data['params'])
# Create a 'fold_compound' for our sequence
fc = RNA.fold_compound(seq1)
# Add maximum matching soft-constraints
fc.sc_add_f(MaximumMatching)
fc.sc_add_data(mm_data, None)
# Call MFE algorithm
(s, mm) = fc.mfe()
# print result
print("{}\n{} (MM: {:d})".format(seq1, s, int(-mm)))
Parsing Alignments
Reading the first entry from a STOCKHOLM 1.0 formatted MSA file msa.stk
may look like this:
num, names, aln, id, ss = RNA.file_msa_read("msa.stk")
Similarly, if the file contains more than one alignment, one can use the RNA.file_msa_read_record()
function to subsequently read each alignment separately:
with open("msa.stk") as f:
while True:
num, names, aln, id, ss = RNA.file_msa_read_record(f)
if num < 0:
break
elif num == 0:
print("empty alignment")
else:
print(names, aln)
After successfully reading the first record, the variable num
contains the number of
sequences in the alignment (the actual return value of the C-function), while the variables
names
, aln
, id
, and ss
are lists of the sequence names and aligned sequences,
as well as strings holding the alignment ID and the structure as stated in the SS_cons
line,
respectively.
Note
The last two return values may be empty strings in case the alignment does not provide the required data.
Logging
The example below demonstrates how log messages issued by the RNAlib C-library can be re-routed to the native Python logging module.
import RNA
import logging
# Set the default format and log-level for the Python `logging` module
logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.DEBUG)
def reroute_logs(e, data):
"""
The wrapper callback function that receives RNAlib log messages
and passes them through to the Python 'logging' module
Parameters
----------
e: dict
A dictionary that contains the log message, the log level,
as well as source code file and line number that issued the
log message
data: object
A data object that is passed through to this callback
"""
log_reroute = {
RNA.LOG_LEVEL_ERROR: logging.error,
RNA.LOG_LEVEL_WARNING: logging.warning,
RNA.LOG_LEVEL_INFO: logging.info,
RNA.LOG_LEVEL_DEBUG: logging.debug
}
# compose an example log message
message = f"ViennaRNA: \"{e['message']}\" ({e['file_name']}:{e['line_number']})"
# send log message to appropriate logging channel
if e['level'] in log_reroute:
log_reroute[e['level']](message)
# turn-off RNAlib self logging
RNA.log_level_set(RNA.LOG_LEVEL_SILENT)
# attach RNAlib logging to python logging and capture all
# logs with level at least DEBUG
RNA.log_cb_add(reroute_logs, level = RNA.LOG_LEVEL_DEBUG)
# compose an example call that might issue a few debug- or other
# log messages
md = RNA.md(circ=1, gquad=1)
s = RNA.random_string(10, "ACGU")
fc = RNA.fold_compound(s, md)
fc.mfe()