/usr/include/shogun/kernel/WeightedCommWordStringKernel.h is in libshogun-dev 1.1.0-4ubuntu2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 | /*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 1999-2009 Soeren Sonnenburg
* Written (W) 1999-2008 Gunnar Raetsch
* Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
*/
#ifndef _WEIGHTEDCOMMWORDSTRINGKERNEL_H___
#define _WEIGHTEDCOMMWORDSTRINGKERNEL_H___
#include <shogun/lib/common.h>
#include <shogun/mathematics/Math.h>
#include <shogun/kernel/CommWordStringKernel.h>
namespace shogun
{
class CCommWordStringKernel;
/** @brief The WeightedCommWordString kernel may be used to compute the weighted
* spectrum kernel (i.e. a spectrum kernel for 1 to K-mers, where each k-mer
* length is weighted by some coefficient \f$\beta_k\f$) from strings that have
* been mapped into unsigned 16bit integers.
*
* These 16bit integers correspond to k-mers. To applicable in this kernel they
* need to be sorted (e.g. via the SortWordString pre-processor).
*
* It basically uses the algorithm in the unix "comm" command (hence the name)
* to compute:
*
* \f[
* k({\bf x},({\bf x'})= \sum_{k=1}^K\beta_k\Phi_k({\bf x})\cdot \Phi_k({\bf x'})
* \f]
*
* where \f$\Phi_k\f$ maps a sequence \f${\bf x}\f$ that consists of letters in
* \f$\Sigma\f$ to a feature vector of size \f$|\Sigma|^k\f$. In this feature
* vector each entry denotes how often the k-mer appears in that \f${\bf x}\f$.
*
* Note that this representation is especially tuned to small alphabets
* (like the 2-bit alphabet DNA), for which it enables spectrum kernels
* of order 8.
*
* For this kernel the linadd speedups are quite efficiently implemented using
* direct maps.
*
*/
class CWeightedCommWordStringKernel: public CCommWordStringKernel
{
public:
/** default constructor */
CWeightedCommWordStringKernel();
/** constructor
*
* @param size cache size
* @param use_sign if sign shall be used
*/
CWeightedCommWordStringKernel(int32_t size, bool use_sign);
/** constructor
*
* @param l features of left-hand side
* @param r features of right-hand side
* @param use_sign if sign shall be used
* @param size cache size
*/
CWeightedCommWordStringKernel(
CStringFeatures<uint16_t>* l, CStringFeatures<uint16_t>* r,
bool use_sign=false, int32_t size=10);
virtual ~CWeightedCommWordStringKernel();
/** initialize kernel
*
* @param l features of left-hand side
* @param r features of right-hand side
* @return if initializing was successful
*/
virtual bool init(CFeatures* l, CFeatures* r);
/** clean up kernel */
virtual void cleanup();
/** compute optimized
*
* @param idx index to compute
* @return optimized value at given index
*/
virtual float64_t compute_optimized(int32_t idx);
/** add to normal
*
* @param idx where to add
* @param weight what to add
*/
virtual void add_to_normal(int32_t idx, float64_t weight);
/** merge normal */
void merge_normal();
/** set weighted degree weights
*
* @return if setting was successful
*/
bool set_wd_weights();
/** set custom weights (swig compatible)
*
* @param w weights
* @param d degree (must match number of weights)
* @return if setting was successful
*/
bool set_weights(float64_t* w, int32_t d);
/** return what type of kernel we are
*
* @return kernel type WEIGHTEDCOMMWORDSTRING
*/
virtual EKernelType get_kernel_type() { return K_WEIGHTEDCOMMWORDSTRING; }
/** return the kernel's name
*
* @return name WeightedCommWordString
*/
virtual const char* get_name() const { return "WeightedCommWordStringKernel"; }
/** return feature type the kernel can deal with
*
* @return feature type WORD
*/
inline virtual EFeatureType get_feature_type() { return F_WORD; }
/** compute scoring
*
* @param max_degree maximum degree
* @param num_feat number of features
* @param num_sym number of symbols
* @param target target
* @param num_suppvec number of support vectors
* @param IDX IDX
* @param alphas alphas
* @param do_init if initialization shall be performed
* @return computed score
*/
virtual float64_t* compute_scoring(
int32_t max_degree, int32_t& num_feat, int32_t& num_sym,
float64_t* target, int32_t num_suppvec, int32_t* IDX,
float64_t* alphas, bool do_init=true);
protected:
/** helper for compute
*
* @param idx_a index a
* @param idx_b index b
* @param do_sort if sorting shall be performed
*/
virtual float64_t compute_helper(
int32_t idx_a, int32_t idx_b, bool do_sort);
private:
void init();
protected:
/** degree */
int32_t degree;
/** weights for each of the subkernels of degree 1...d */
float64_t* weights;
};
}
#endif /* _WEIGHTEDCOMMWORDSTRINGKERNEL_H__ */
|