This file is indexed.

/usr/include/apertium-3.4/apertium/tmx_alignment.h is in apertium-dev 3.4.0~r61013-5.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
/*************************************************************************
*                                                                        *
*  (C) Copyright 2004. Media Research Centre at the                      *
*  Sociology and Communications Department of the                        *
*  Budapest University of Technology and Economics.                      *
*                                                                        *
*  Developed by Daniel Varga.                                            *
*                                                                        *
*  From hunalign; for license see ../AUTHORS and ../COPYING.hunalign     *
*                                                                        *
*************************************************************************/
#ifndef __TMXALIGNER_ALIGNMENT_ALIGNMENT_H
#define __TMXALIGNER_ALIGNMENT_ALIGNMENT_H

#include <apertium/tmx_quasi_diagonal.h>

#include <string>
#include <set>

namespace TMXAligner
{

// Simply double values for each sentence. Right now we store sentence lengths in them.
typedef std::vector<double> SentenceValues;

// See quasiDiagonal.h
typedef QuasiDiagonal<double> AlignMatrix;

// Contains directions, a bit like a force field.
typedef QuasiDiagonal<unsigned char> TrelliMatrix;

// A Rundle (x,y) cuts the bitext into two sub-bitexts:
// [0,x)+[0,y) and [x,huSize)+[y,enSize).
typedef std::pair<int,int> Rundle;

// A Trail is a strictly ordered list of Rundles.
// It cuts the bitext into small bitexts.
// Such a small bitext is called a hole or segmentum.
// A hole can contion zero Hungarian sentence,
// it can contain zero English sentences, but not both.
// A Trail is sometimes referred to as a Ladder.
typedef std::vector<Rundle> Trail;

// A BisentenceList is formally identical to a Trail, but semantically very different.
// It represents an ordered list of bisentences.
// There are some functions which utilize the formal identity,
// manipulating both structures.
typedef std::vector< std::pair<int,int> > BisentenceList;

// OBSOLETE:
// TrailValues gives scores to the Rundles of a Trail (of the same size).
// Conceptually TrailValues should be attached to Trails.
// A TrailValues structure always accompanies a Trails list,
// but their consistency must be maintained by hand, pre-OO-style. (TODO)
// typedef std::vector<double> TrailValues;

// OBSOLETE:
// Has the exactly same relation to BisentenceList as
// a TrailValues has to a Trail. But note that these 
// scores mark the confidence in a bisentence. This is
// very different from the confidence in a rundle.
// typedef std::vector<double> BisentenceValues;

double closeness( double twoSentenceLength, double oneSentenceLength );

const double skipScore = -0.3;


// The main align function,
// Gets a confidence value for every sentence-pair,
// and sentence lengths for each sentence (for a a Gale-Church-like scoring).
// Returns a trail with the best total score, and the computed dynMatrix matrix:
// dynMatrix[huPos][enPos] gives the similarity of the [0,huPos) and [0,enPos) intervals.
void align( const AlignMatrix& w, const SentenceValues& huLength, const SentenceValues& enLength,
            Trail& bestTrail, AlignMatrix& dynMatrix );


bool oneToOne( const Trail& bestTrail, int pos );

// Collect bisentences.
void trailToBisentenceList( const Trail& bestTrail,
                            BisentenceList& bisentenceList );

// Score precision-recall of a BisentenceList according to a hand-aligned bicorpus.
// For best results, zero-to-many holes of the hand-alignment should be subdivided to zero-to-ones.
// Builds the manual bisentencelist. The compared sets consist of Bisentences.
double scoreBisentenceList( const BisentenceList& bisentenceList, const Trail& trailHand );

// The same precision-recall calculation for Trails. The compared sets consist of Rundles.
double scoreTrail         ( const Trail&          trailAuto,      const Trail& trailHand );


const int outsideOfRadiusValue = -1000000;
const int insideOfRadiusValue  = 0;

// Fills the complement of the radius of the trail with minus infties.
// The return value true means success. Failure means that during the fill,
// we intersected the outside of the quasidiagonal area.
// In this case, the operation is not finished.
bool borderDetailedAlignMatrix( AlignMatrix& m, const Trail& trail, int radius );

// What the name implies.
void dumpAlignMatrix( const AlignMatrix& m, bool graphical );

template <class T>
void dumpAlignMatrix( const QuasiDiagonal<T>& alignMatrix );

void dumpAlignMatrix( const QuasiDiagonal<int>& alignMatrix, bool graphical );

void dumpTrelliMatrix( const TrelliMatrix& trellis );


} // namespace TMXAligner

#endif // #define __TMXALIGNER_ALIGNMENT_ALIGNMENT_H