/usr/include/hfst/parsers/lexc-utils.h is in libhfst45-dev 3.10.0~r2798-3.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 | // Copyright (c) 2016 University of Helsinki
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
// See the file COPYING included with this distribution for more
// information.
//! @file lexc-utils.h
//!
//! @brief Various string handling methods for HFST lexc.
#ifndef GUARD_lexc_utils_h
#define GUARD_lexc_utils_h
#if HAVE_CONFIG_H
# include <config.h>
#endif
#include <vector>
#include <string>
using namespace std;
//for med alignment
#define SUBSTITUTE 2 //diag
#define DELETE 1 //left
#define INSERT 0 //down
#define EPSILON_ "@@ANOTHER_EPSILON@@"
namespace hfst { namespace lexc {
const char LEXC_JOINER_START[] = "$_LEXC_JOINER.";
const char LEXC_JOINER_END[] = "_$";
const char LEXC_FLAG_LEFT_START[] = "$R.LEXNAME.";
const char LEXC_FLAG_RIGHT_START[] = "$P.LEXNAME.";
const char LEXC_FLAG_END[] = "$";
const char LEXC_DFN_START[] = "@_LEXC_DEFINITION.";
const char LEXC_DFN_END[] = "_@";
const char REG_EX_START[] = "$_REG.";
const char REG_EX_END[] = "_$";
// RECODE LEXC STYLE
//! @brief Strips lexc style percent escaping from a string.
//!
//! E.g. like stripslashes() in PHP.
std::string& stripPercents(std::string& s);
//! @brief Percent encode critical characters in raw string for lexc.
std::string& addPercents(std::string& s);
//! @brief Find flag representation of given joiner name string.
std::string& flagJoinerEncode(std::string& s, bool left);
//! @brief Find inner representation of given joiner name string.
std::string& joinerEncode(std::string& s);
//! @brief Format inner representation of joiner string in readable format as
//! it was in lexc source.
std::string& joinerDecode(std::string& s);
//! @brief Find inner representation for regex map key of given joiner name string.
std::string& regExpresionEncode(std::string& s);
//! @brief Format inner representation of joiner string in readable format as
//! it was in lexc source.
std::string& regExpresionDecode(std::string& s);
//! @brief Replaces @ZERO@ with "0" in a string
std::string replace_zero(const std::string s);
// FLEX HANDLING
//! @brief Set filename used for position messages.
void set_infile_name(const char* s);
//! @brief Initialise memory of file positions to zeros.
//!
//! Sets all members of current yylloc structure to zeros.
void token_reset_positions();
//! @brief Keep memory of positions of last parsed tokens for error messages.
//!
//! Counts length, height and width of the given token. Update yylloc structure
//! provided by lex and yacc, for location data.
void token_update_positions(const char* token);
//! @brief writes token positions in standard format.
char* strdup_token_positions();
//! @brief create some sensible representation of current token.
char* strdup_token_part();
//! @brief Strips percent escaping and strdups
char* strip_percents(const char* s, bool do_zeros);
//! @brief Strips initial and final white space and strdups
char* strstrip(const char* s);
//! @brief extracts the variable substring part from token.
//! Omits constant string prefix, suffix and optionally strips spaces.
char* strdup_nonconst_part(const char* token,
const char* prefix,
const char* suffix,
bool strip);
// help flex/yacc with meaningful error messages
//! @brief print error_at_line style error message for current token
void error_at_current_token(int status, int errnum, const char* format);
//! @brief Finds med alignment between two strings
//! Given an upper-lower string lexicon entry, the upper-lower pair is aligned by minimum edit distance with the following costs:
//! x:x costs 0
//! x:y costs ∞
//! x:0 costs 1
//! 0:x costs 1
//! This means that if we have a lexicon entry like:
//! abc:bc
//! As this is compiled into a transducer for the entry, we align it a:0 b:b c:c (instead of the default a:b b:c c:0).
//! Additionally, if we have a lexicon entry like:
//! abc:xyz
//! we align it 0:x 0:y 0:z a:0 b:0 c:0 (instead of x:0 y:0 z:0 0:a 0:b 0:c)
std::pair<vector<string>, vector<string> > find_med_alingment(const vector<string> &s1, const vector<string> &s2);
} }
// vim: set ft=cpp.doxygen:
#endif // GUARD_lexc_utils_h
|