This file is indexed.

/usr/include/hphp/zend/zend-html.h is in hhvm-dev 3.11.1+dfsg-1ubuntu1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
/*
   +----------------------------------------------------------------------+
   | HipHop for PHP                                                       |
   +----------------------------------------------------------------------+
   | Copyright (c) 2010-2015 Facebook, Inc. (http://www.facebook.com)     |
   | Copyright (c) 1998-2010 Zend Technologies Ltd. (http://www.zend.com) |
   +----------------------------------------------------------------------+
   | This source file is subject to version 2.00 of the Zend license,     |
   | that is bundled with this package in the file LICENSE, and is        |
   | available through the world-wide-web at the following url:           |
   | http://www.zend.com/license/2_00.txt.                                |
   | If you did not receive a copy of the Zend license and are unable to  |
   | obtain it through the world-wide-web, please send a note to          |
   | license@zend.com so we can mail you a copy immediately.              |
   +----------------------------------------------------------------------+
*/

#ifndef incl_HPHP_ZEND_HTML_H_
#define incl_HPHP_ZEND_HTML_H_

#include <cstdint>

// Avoid dragging in the icu namespace.
#ifndef U_USING_ICU_NAMESPACE
#define U_USING_ICU_NAMESPACE 0
#endif

namespace HPHP {
///////////////////////////////////////////////////////////////////////////////
/**
 * Major departures from Zend:
 *
 * 1. We are only supporting UTF-8 and ISO-8859-1 encoding.
 *    Major reason for this is because the original get_next_char() bothers me,
 *    sacrificing performance for some character sets that people rarely used
 *    or that people shouldn't use. UTF-8 should really be the standard string
 *    format everywhere, and we ought to write coding specifilized for it to
 *    take full advantage of it: one example would be the new html encoding
 *    function that simply do *p one a time iterating through the strings to
 *    look for special characters for entity escaping.
 *
 * 2. HTML encoding function no longer encodes entities other than the basic
 *    ones. There is no need to encode them, since all browsers support UTF-8
 *    natively, and we are ok to send out UTF-8 encoding characters without
 *    turning them into printable ASCIIs. Basic entities are encoded for
 *    a different reason! In fact, I personally don't see why HTML spec has
 *    those extended list of entities, other than historical artifacts.
 *
 * 3. Double encoding parameter is not supported. That really sounds like
 *    a workaround of buggy coding. I don't find a legit use for that yet.
 */

struct AsciiMap {
  uint64_t map[2];
};

enum StringHtmlEncoding {
  STRING_HTML_ENCODE_UTF8 = 1,
  STRING_HTML_ENCODE_NBSP = 2,
  STRING_HTML_ENCODE_HIGH = 4,
  STRING_HTML_ENCODE_UTF8IZE_REPLACE = 8
};

enum class EntBitmask {
  ENT_BM_NOQUOTES = 0,   /* leave all quotes alone */
  ENT_BM_SINGLE = 1,     /* escape single quotes only */
  ENT_BM_DOUBLE = 2,     /* escape double quotes only */
  ENT_BM_IGNORE = 4,     /* silently discard invalid chars */
  ENT_BM_SUBSTITUTE = 8, /* replace invalid chars with U+FFFD */
  ENT_BM_XML1 = 16,      /* XML1 mode*/
  ENT_BM_XHTML = 32,     /* XHTML mode */
};

namespace entity_charset_enum {
enum entity_charset_impl {
  cs_terminator, cs_8859_1, cs_cp1252,
  cs_8859_15, cs_utf_8, cs_big5, cs_gb2312,
  cs_big5hkscs, cs_sjis, cs_eucjp, cs_koi8r,
  cs_cp1251, cs_8859_5, cs_cp866, cs_macroman,
  cs_unknown,
  cs_end
};
}
typedef entity_charset_enum::entity_charset_impl entity_charset;

struct HtmlBasicEntity {
  unsigned short charcode;
  const char *entity;
  int entitylen;
  int flags;
};

typedef const char *const entity_table_t;

struct html_entity_map {
  entity_charset charset; /* charset identifier */
  unsigned short basechar; /* char code at start of table */
  unsigned short endchar;  /* last char code in the table */
  entity_table_t *table;   /* the table of mappings */
};

const html_entity_map* html_get_entity_map();

/*
 * returns cs_unknown iff not found;
 * if input null, returns default charset of cs_utf_8
 */
entity_charset determine_charset(const char*);

char *string_html_encode(const char *input, int &len,
                         const int64_t qsBitmask, bool utf8,
                         bool dEncode, bool htmlEnt);
char *string_html_encode_extra(const char *input, int &len,
                               StringHtmlEncoding flags,
                               const AsciiMap *asciiMap);

/**
 * returns decoded string;
 * note, can return nullptr if the charset could not be detected
 * using the given charset_hint; can also pass in nullptr
 * for the charset_hint to use the default one (UTF-8).
 * (see determine_charset).
 */
char *string_html_decode(const char *input, int &len,
                         bool decode_double_quote, bool decode_single_quote,
                         const char *charset_hint,
                         bool all, bool xhp = false );

///////////////////////////////////////////////////////////////////////////////
}

#endif