/usr/include/OpenMS/DATASTRUCTURES/QTCluster.h is in libopenms-dev 1.11.1-5.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 | // --------------------------------------------------------------------------
// OpenMS -- Open-Source Mass Spectrometry
// --------------------------------------------------------------------------
// Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
// ETH Zurich, and Freie Universitaet Berlin 2002-2013.
//
// This software is released under a three-clause BSD license:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of any author or any participating institution
// may be used to endorse or promote products derived from this software
// without specific prior written permission.
// For a full list of authors, refer to the file AUTHORS.
// --------------------------------------------------------------------------
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
// INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// --------------------------------------------------------------------------
// $Maintainer: Hendrik Weisser $
// $Authors: Steffen Sass, Hendrik Weisser $
// --------------------------------------------------------------------------
#ifndef OPENMS_DATASTRUCTURES_QTCLUSTER_H
#define OPENMS_DATASTRUCTURES_QTCLUSTER_H
#include <OpenMS/DATASTRUCTURES/GridFeature.h>
#include <OpenMS/CHEMISTRY/AASequence.h>
#include <boost/unordered_map.hpp>
namespace OpenMS
{
// Boost switch since with 1.47 several classes got moved into a new
// boost::unordered namespace (specifically unordered_map).
namespace OpenMSBoost
{
#if OPENMS_BOOST_VERSION_MINOR > 47
using namespace boost::unordered;
#else
using namespace boost;
#endif
}
/**
@brief A representation of a QT cluster used for feature grouping.
Ultimately, a cluster represents a group of corresponding features (or consensus features) from different input maps (feature maps or consensus maps).
Clusters are defined by their center points (one feature each). A cluster also stores a number of potential cluster elements (other features) from different input maps, together with their distances to the cluster center.
Every feature that satisfies certain constraints with respect to the cluster center is a @e potential cluster element. However, since a feature group can only contain one feature from each input map, only the "best" (i.e. closest to the cluster center) such feature is considered a true cluster element.
The QT clustering algorithm has the characteristic of initially producing all possible, overlapping clusters. Iteratively, the best cluster is then extracted and the clustering is recomputed for the remaining points.
In our implementation, multiple rounds of clustering are not necessary. Instead, the clustering is updated in each iteration. This is the reason for storing all potential cluster elements: When a certain cluster is finalized, its elements have to be removed from the remaining clusters, and affected clusters change their composition. (Note that clusters can also be invalidated by this, if the cluster center is being removed.)
The quality of a cluster is the normalized average distance to the cluster center for present and missing cluster elements. The distance value for missing elements (if the cluster contains no feature from a certain input map) is the user-defined threshold that marks the maximum allowed radius of a cluster.
@see QTClusterFinder
@ingroup Datastructures
*/
class OPENMS_DLLAPI QTCluster
{
private:
/**
* @brief Mapping: input map -> distance to center (ordered!) -> neighboring point
* @note There should never be an empty sub-map! (When a sub-map becomes empty, it should be removed from the overall map.)
*/
typedef OpenMSBoost::unordered_map<Size, std::multimap<DoubleReal, GridFeature *> > NeighborMap;
/// Pointer to the cluster center
GridFeature * center_point_;
/**
* @brief Neighbors of the cluster center, sorted by distance, for different input maps.
*
* The first (best) point in each sub-map is considered a cluster element.
*/
NeighborMap neighbors_;
/// Maximum distance of a point that can still belong to the cluster
DoubleReal max_distance_;
/// Number of input maps
Size num_maps_;
/// Quality of the cluster
DoubleReal quality_;
/// Has the cluster changed (if yes, quality needs to be recomputed)?
bool changed_;
/// Keep track of peptide IDs and use them for matching?
bool use_IDs_;
/**
* @brief Set of annotations of the cluster
*
* The set of peptide sequences that is compatible to the cluster center and results in the best cluster quality.
*/
std::set<AASequence> annotations_;
/// Base constructor (not accessible)
QTCluster();
/// Computes the quality of the cluster
void computeQuality_();
/**
* @brief Finds the optimal annotation (peptide sequences) for the cluster
*
* The optimal annotation is the one that results in the best quality. It is stored in @p annotations_;
*
* @returns The total distance between cluster elements and the center.
*/
DoubleReal optimizeAnnotations_();
bool valid_;
public:
/**
* @brief Detailed constructor
* @param center_point Pointer to the center point
* @param num_maps Number of input maps
* @param max_distance Maximum allowed distance of two points
* @param use_IDs Use peptide annotations?
*/
QTCluster(GridFeature * center_point, Size num_maps,
DoubleReal max_distance, bool use_IDs);
/// Destructor
virtual ~QTCluster();
/// Returns the RT value of the cluster
DoubleReal getCenterRT() const;
/// Returns the m/z value of the cluster center
DoubleReal getCenterMZ() const;
/// Returns the size of the cluster (number of elements, incl. center)
Size size() const;
/// Compare by quality
bool operator<(QTCluster & cluster);
/**
* @brief Adds a new element/neighbor to the cluster
* @note There is no check whether the element/neighbor already exists in the cluster!
* @param element The element to be added
* @param distance Distance of the element to the center point
*/
void add(GridFeature * element, DoubleReal distance);
/// Gets the clustered elements
void getElements(OpenMSBoost::unordered_map<Size, GridFeature *> & elements);
/**
* @brief Updates the cluster after data points were removed
* @return Whether the cluster is still valid (it's not if the cluster center is among the removed points).
*/
bool update(const OpenMSBoost::unordered_map<Size, GridFeature *> & removed);
/// Returns the cluster quality
DoubleReal getQuality();
/// Return the set of peptide sequences annotated to the cluster center
const std::set<AASequence> & getAnnotations();
inline void setInvalid() {valid_ = false;}
inline bool isInvalid() {return !valid_;}
NeighborMap getNeighbors() {return neighbors_;}
};
}
#endif // OPENMS_DATASTRUCTURES_QTCLUSTER_H
|