Generated on Wed Apr 29 2015 11:51:40 for GGL-4.1.2 by doxygen 1.8.3.1
MoleculeDecomposition.hh
Go to the documentation of this file.
1 
2 #ifndef MOLECULEDECOMPOSITION_HH_
3 #define MOLECULEDECOMPOSITION_HH_
4 
5 #include <stdexcept>
6 
7 #include "sgm/Graph_Interface.hh"
9 #include "sgm/Match_Reporter.hh"
10 #include "sgm/RingReporter.hh"
11 #include "sgm/GM_vf2.hh"
12 #include "sgm/SGM_vf2.hh"
13 
14 #include "ggl/Graph.hh"
15 #include "ggl/chem/Molecule.hh"
17 
18 namespace ggl {
19  namespace chem {
20 
21 
22  /*! @brief Molecule energy estimation ala Jankowski et al.
23  *
24  * Molecule decomposition approach to analyze a molecules thermodynamics.
25  * The approach follows the paper by Jankowski et al. (2008)
26  *
27  * Group Contribution Method for Thermodynamic Analysis of Complex
28  * Metabolic Networks
29  * Matthew D. Jankowski, Christopher S. Henry, Linda J. Broadbelt
30  * and Vassily Hatzimanikatis
31  * Biophysical Journal, Volume 95, Issue 3, 1487-1499, 1 August 2008
32  * doi:10.1529/biophysj.107.124784
33  *
34  *
35  * @author Martin Mann - 2010 - http://www.bioinf.uni-freiburg.de/~mmann/
36  *
37  */
39  public:
40 
41  /*!
42  * Abstract class to report matched molecule components and
43  * accordingly the labeled match-graph.
44  */
46  public:
47  //! construction
49  //! destruction
51 
52  /*!
53  * Reports a component that was matched and its according ID
54  * in the final match graph
55  * @param component the component that was matched
56  * @param matchCount the number of matches found
57  * @param matchID the ID of the component in the final match graph
58  */
59  virtual
60  void
61  reportComponent( const MoleculeComponent & component,
62  const size_t matchCount,
63  const size_t matchID ) = 0;
64  /*!
65  * Reports an interaction pattern that was matched
66  * @param interactionDescription the interaction that was matched
67  * @param matchCount the number of matches found
68  */
69  virtual
70  void
71  reportInteraction( const std::string & interactionDescription,
72  const size_t matchCount ) = 0;
73 
74  /*!
75  * Reports the matching of a full small molecule.
76  * @param smallMolecule the small molecule matched
77  */
78  virtual
79  void
80  reportSmallMolecule( const MoleculeComponent & smallMolecule ) = 0;
81 
82  /*!
83  * Reports the final match graph where each matched component ID
84  * is given as class information of the matched nodes.
85  *
86  * @param graph the matched graph
87  */
88  virtual
89  void
90  reportMatchGraph( const sgm::Graph_Interface & graph ) = 0;
91 
92  /*!
93  * Reports whether or not the complete graph (all nodes) were
94  * successfully mapped or not.
95  *
96  * @param matchComplete true, if all nodes have been mapped;
97  * false otherwise.
98  */
99  virtual
100  void
101  reportMatchComplete( const bool matchComplete ) = 0;
102 
103  /*!
104  * Reports the matching of a polyphosphate chain. Only the number
105  * and match ID of the middle phosphates is given. The end
106  * phosphate is reported as independent component.
107  * @param numOfMiddlePhosphates the number of middle phosphate
108  * matches found
109  * @param matchID the ID of the component in the final match graph
110  */
111  virtual
112  void
113  reportPolyPhosphate( const size_t numOfMiddlePhosphates
114  , const size_t matchID ) = 0;
115  };
116 
117 
118  protected:
119 
120  /*!
121  * helper class to order MoleculeComponent pointers by the priority
122  * of the MoleculeComponents within an ordered STL container.
123  */
125  public:
126 
127  /*!
128  * operator to order MoleculeComponent pointers by the priority
129  * of the MoleculeComponents.
130  * @param c1 the first component
131  * @param c2 the second component
132  * @return true if the first component has higher priority compared
133  * to the second; false otherwise
134  */
135  bool operator()(const MoleculeComponent & c1,
136  const MoleculeComponent & c2 ) const;
137  };
138 
139  typedef std::set< MoleculeComponent, priority_order > ComponentContainer;
140 
141  //! set of all groups that are matched exclusively starting with the
142  //! one with lowest priority value
144 
145  //! set of all interaction patterns to be screened for
147 
148  //! set of all small molecules to be screened for
150 
151  //! correction term if a molecule is a hydrocarbon, i.e.
152  //! it contains only H and C
153  static double correctionHydroCarbon;
154 
155  //! correction term for each heteroaromatic ring within the molecule
157 
158  //! correction term for each three-membered ring
159  //! independently of the atom labels
161 
162  //! correction term for each amide within the molecule; note,
163  //! for nitrogen atom involved in two amides only one occurrence is
164  //! considered
165  static double correctionPerAmide;
166 
167  //! correction term for each thioester within the molecule; note,
168  //! for sulfur atom involved in two amides only one occurrence is
169  //! considered
170  static double correctionPerThioester;
171 
172  //! correction term for vicinal chlorines within the molecule
174 
175  //! energy contribution term for a middle phosphate within a phosphate
176  //! chain
178 
179  //! Access to the groups member. This function
180  //! initializes the container if not already done.
181  //! @return the groups container to be matched.
182  //! @thrown std::logic_error in case the group initialization failed
183  //! which should never happen
184  static
185  const ComponentContainer &
186  getGroups( void ) throw (std::logic_error);
187 
188  //! Access to the interactions member. This function
189  //! initializes the container if not already done.
190  //! @return the interactions container to be matched.
191  //! @thrown std::logic_error in case the group initialization failed
192  //! which should never happen
193  static
194  const ComponentContainer &
195  getInteractions( void ) throw (std::logic_error);
196 
197  //! Access to the smallMolecules member. This function
198  //! initializes the container if not already done.
199  //! @return the smallMolecules container to be matched.
200  //! @thrown std::logic_error in case the group initialization failed
201  //! which should never happen
202  static
203  const ComponentContainer &
204  getSmallMolecules( void ) throw (std::logic_error);
205 
206  //! Computes the number of amide matches within the molecule where for
207  //! nitrogen atom involved in two amides only one occurrence is
208  //! counted
209  //! @param matcher the sub graph matcher to be used
210  //! @param molGraph the molecule graph to check
211  //! @return the number of amides within the molecule
212  static
213  size_t
214  getAmideNumber( sgm::SubGraphMatching & matcher
215  , sgm::Graph_Interface & molGraph );
216 
217  //! Computes the number of thioester matches within the molecule
218  //! where for sulfur atoms involved in two thioester only one
219  //! occurrence is counted
220  //! @param matcher the sub graph matcher to be used
221  //! @param molGraph the molecule graph to check
222  //! @return the number of thioester within the molecule
223  static
224  size_t
225  getThioesterNumber( sgm::SubGraphMatching & matcher
226  , sgm::Graph_Interface & molGraph );
227 
228  //! Corrects the mapping of cyclic phosphates. Due to their symmetric
229  //! symmetric pattern they are matched twice. Thus, one oxygen is
230  //! mapped too much.
231  //! @param curMatchedIDs the node sets for each match that has to be
232  //! corrected
233  static
234  void
235  correctCyclicPhosphates( std::set< std::set<size_t> > & curMatchedIDs );
236 
237  //! Corrects the mapping of enclosed primary phosphates. Due to their
238  //! symmetric symmetric pattern they are matched all along the
239  //! phosphate fragment and not only at the ends.
240  //! @param curMatchedIDs the node sets for each match that has to be
241  //! corrected
242  static
243  void
244  correctEnclosedPhosphates( std::set< std::set<size_t> > & curMatchedIDs );
245 
246  protected:
247 
248  //! the energy of the currently parsed molecule
249  double energy;
250  //! the currently matched MoleculeComponent
252  //! the matched set of node indices for current MoleculeComponent
253  std::set< std::set<size_t> > curMatchedIDs;
254 
255  //! set of all nodes participating in a ring within the current
256  //! molecule handled
258 
259  //! descriptor of a ring within the molecule
261  public:
262  typedef std::pair<size_t,size_t> Edge;
263  typedef std::set<Edge> EdgeSet;
264  public:
265  //! edge set of this ring
266  EdgeSet edges;
267 
268  //! constructor
269  //! @param edges the edge set to set
270  RingDescriptor( const EdgeSet& edges )
271  : edges(edges)
272  {}
273 
274  //! constructor
275  //! @param ringList the ring to describe
276  RingDescriptor( const sgm::RingReporter::RingList & ringList );
277 
278  //! decides order based on ring set size
279  bool
280  operator <( const RingDescriptor& toCompare ) const;
281 
282  /*!
283  * Checks whether or not this ring is contained within the other
284  * larger ring as a fuse with some other ring. Therefore, the
285  * edge set difference leaves only one edge.
286  * @param largerRing the larger ring this ring might be contained
287  * @return true, if the set difference of this edge set with the
288  * edge set of the larger ring leaves only one edge
289  */
290  bool
291  isContained( const RingDescriptor& largerRing ) const;
292 
293  };
294  //! string encoding of all rings within the current molecule grouped
295  //! by their type
296  std::map< MoleculeComponent::RingFragmentType, std::vector< RingDescriptor > > curMolRings;
297 
298  //! an optional decomposition reporter
300 
301  //! the graph matching implementation to be used
303  //! the sub graph matching implementation to be used
305 
306  /*!
307  * Node constraint to ensure that a matched node is part of a ring
308  * based on the given ring node set information.
309  *
310  * This increases the probability that the ring fragment constraints,
311  * that are checked only for full matches, are fulfilled.
312  */
313  class MC_MC_RingNode : public sgm::MC_Node {
314 
315  public:
316 
317  //! the set of possible ring node ids the constrainedNodeID can
318  //! be matched on
320 
321  //! construction
322  //! @param constrainedNodeID the node ID to be constrained
323  //! @param ringNodes the set of possible ring node ids the
324  //! constrainedNodeID can be matched on
326  , const RingNodes & ringNodes)
327  : sgm::MC_Node(constrainedNodeID), ringNodes(&ringNodes)
328  {}
329 
330  //! copy construction
331  //! @param toCopy the object to copy
333  : sgm::MC_Node(toCopy.constrainedNodeID)
334  , ringNodes(toCopy.ringNodes)
335  {}
336 
337  //! destruction
338  virtual
340  {}
341 
342  /*!
343  * Creates a new heap object that equals the current object.
344  * NOTE: YOU have to delete it later on! There is no garbage
345  * collection!
346  * @return a new allocated object that equals this
347  */
348  virtual
349  MC_Node*
350  clone( void ) const {
351  return new MC_MC_RingNode(*this);
352  }
353 
354  /*!
355  * Checks whether or not a given label is part of the constraint
356  * information. This check is needed by some parsers to verify the
357  * wildcard definition.
358  *
359  * @param label the label of interest
360  * @return false, since no label information is used/stored
361  */
362  virtual
363  bool
364  isConstrainedLabel(const std::string& label) const {
365  return false;
366  }
367 
368  /*!
369  * Creates a new heap object that equals the current
370  * object but uses the new indices given by old2newIndexMapping.
371  * NOTE: YOU have to delete it later on! There is no garbage
372  * collection!
373  * @param old2newIndexMapping the index mapping to be used for the
374  * remapping
375  * @param unmatchedIndex an optional specific index that marks
376  * unmatched nodes within old2newIndexMapping. if this
377  * constrains one of these nodes, no remapping is done and
378  * NULL is returned
379  * @return a new allocated object
380  */
381  virtual
382  MC_Node*
383  remap( const sgm::Match & old2newIndexMapping, const size_t unmatchedIndex = UINT_MAX )
384  {
385  assert(this->constrainedNodeID < old2newIndexMapping.size());
386  // check if this node is an unmatched node and thus to be ignored
387  if (old2newIndexMapping.at(this->constrainedNodeID)==unmatchedIndex) {
388  return NULL;
389  }
390  // create copy
391  MC_MC_RingNode* copy = new MC_MC_RingNode(*this);
392  // do remapping
393  copy->constrainedNodeID = old2newIndexMapping.at(this->constrainedNodeID);
394  // return remapped copy
395  return copy;
396  }
397 
398 
399 
400  /*!
401  * Checks whether or not a match on a given target fulfills the
402  * additional node constraint for the pattern matching.
403  *
404  * @param pattern the pattern graph that was matched
405  * @param target the target graph the pattern was matched on
406  * @param matchedTargetID the matched node index within
407  * the target graph
408  * @return true if the match is valid; false if the constraint is
409  * violated
410  */
411  virtual
412  bool
414  const sgm::Graph_Interface & target,
415  const size_t matchedTargetID ) const
416  {
417  assert(ringNodes != NULL);
418  // check whether or not the given target id is among the
419  // known ring nodes
420  return ringNodes->find(matchedTargetID) != ringNodes->end();
421  }
422 
423 
424 
425  };
426 
427  protected:
428 
429  /*!
430  * Checks whether or not a given ring fragment is present in the
431  * molecule.
432  *
433  * @param ringFragment the ring fragment to check
434  * @param match the node matching that triggered the check
435  * @return true, if the ring fragment is matched; false otherwise
436  */
437  bool
439  , const sgm::Match & match);
440 
441  //! node label map typedef for shorter parameter lists
442  typedef boost::property_map< Molecule, PropNodeLabel>::type NodeLabelMap;
443  //! node index map typedef for shorter parameter lists
444  typedef boost::property_map< Molecule, PropNodeIndex>::const_type NodeIndexMap;
445 
446  /*!
447  * Extend the labeling of a phosphate chain starting from a given
448  * phosphorus node using a specified component label.
449  *
450  * @param m the molecule to label
451  * @param mNodeLabel the node label access for m
452  * @param mNodeIndex the node index access for m
453  * @param idxNextP the phosphorus node to start the labeling
454  * @param compID the label for the phosphate chain
455  * @return the length of the chain
456  */
457  size_t
459  , NodeLabelMap & mNodeLabel
460  , NodeIndexMap & mNodeIndex
461  , const size_t idxNextP
462  , const size_t compID
463  );
464 
465  public:
466 
467  //! construction
468  //! @param fullMatcher the graph matching implementation to be used
469  //! @param matcher the sub graph matching implementation to be used
472 
473  //! construction with a given reporter
474  //! @param fullMatcher the graph matching implementation to be used
475  //! @param matcher the sub graph matching implementation to be used
476  //! @param reporter the decomposition reporter to be used
480 
481  //! destruction
482  virtual ~MoleculeDecomposition();
483 
484  /*!
485  * Decomposes the given molecule into MoleculeComponents and derives
486  * an estimate of its energy in kcal/mol.
487  *
488  * @param mol the molecule to be analyzed
489  * @return an estimate of its energy in kcal/mol
490  */
491  double
492  getEnergy( const Molecule & mol );
493 
494  public:
495 
496  //! Handles the occurence of a MoleculeComponent within a decomposed
497  //! molecule graph.
498  //!
499  //! @param componentPattern the pattern of the MoleculeComponent to
500  //! be matched.
501  //! @param targetMol the molecule graph the pattern was found within
502  //! @param match contains the indices of the matched pattern nodes in
503  //! the target graph. match[i] corresponds to the mapping of the ith
504  //! vertex in the pattern graph.
505  void
506  reportHit ( const sgm::Pattern_Interface & componentPattern,
507  const sgm::Graph_Interface & targetMol,
508  const sgm::Match & match );
509 
510  /*!
511  * Is called to report a ring. The ring is stored for lookup of ring
512  * constraints of the MoleculeComponents matched.
513  * @param graph the graph that contains the ring
514  * @param ringList the ring to report
515  */
516  void
517  reportRing( const sgm::Graph_Interface& graph, const RingList & ringList );
518 
519  };
520 
521 
522 
523  } // namespace chem
524 } // namespace ggl
525 
526 #include "ggl/chem/MoleculeDecomposition.icc"
527 
528 #endif /* MOLECULEDECOMPOSITION_HH_ */