/usr/lib/ruby/vendor_ruby/hpricot/xchar.rb is in ruby-hpricot 0.8.6-5ubuntu1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 | #!/usr/bin/env ruby
# The XChar library is provided courtesy of Sam Ruby (See
# http://intertwingly.net/stories/2005/09/28/xchar.rb)
# --------------------------------------------------------------------
######################################################################
module Hpricot
####################################################################
# XML Character converter, from Sam Ruby:
# (see http://intertwingly.net/stories/2005/09/28/xchar.rb).
#
module XChar # :nodoc:
# See
# http://intertwingly.net/stories/2004/04/14/i18n.html#CleaningWindows
# for details.
CP1252 = { # :nodoc:
128 => 8364, # euro sign
130 => 8218, # single low-9 quotation mark
131 => 402, # latin small letter f with hook
132 => 8222, # double low-9 quotation mark
133 => 8230, # horizontal ellipsis
134 => 8224, # dagger
135 => 8225, # double dagger
136 => 710, # modifier letter circumflex accent
137 => 8240, # per mille sign
138 => 352, # latin capital letter s with caron
139 => 8249, # single left-pointing angle quotation mark
140 => 338, # latin capital ligature oe
142 => 381, # latin capital letter z with caron
145 => 8216, # left single quotation mark
146 => 8217, # right single quotation mark
147 => 8220, # left double quotation mark
148 => 8221, # right double quotation mark
149 => 8226, # bullet
150 => 8211, # en dash
151 => 8212, # em dash
152 => 732, # small tilde
153 => 8482, # trade mark sign
154 => 353, # latin small letter s with caron
155 => 8250, # single right-pointing angle quotation mark
156 => 339, # latin small ligature oe
158 => 382, # latin small letter z with caron
159 => 376, # latin capital letter y with diaeresis
}
# See http://www.w3.org/TR/REC-xml/#dt-chardata for details.
PREDEFINED = {
34 => '"', # quotation mark
38 => '&', # ampersand
60 => '<', # left angle bracket
62 => '>' # right angle bracket
}
PREDEFINED_U = PREDEFINED.inject({}) { |hsh, (k, v)| hsh[v] = k; hsh }
# See http://www.w3.org/TR/REC-xml/#charsets for details.
VALID = [
0x9, 0xA, 0xD,
(0x20..0xD7FF),
(0xE000..0xFFFD),
(0x10000..0x10FFFF)
]
end
class << self
# XML escaped version of chr
def xchr(str)
n = XChar::CP1252[str] || str
case n when *XChar::VALID
XChar::PREDEFINED[n] or (n<128 ? n.chr : "&##{n};")
else
'*'
end
end
# XML escaped version of to_s
def xs(str)
str.to_s.unpack('U*').map {|n| xchr(n)}.join # ASCII, UTF-8
rescue
str.to_s.unpack('C*').map {|n| xchr(n)}.join # ISO-8859-1, WIN-1252
end
# XML unescape
def uxs(str)
str.to_s.
gsub(/\&\w+;/) { |x| (XChar::PREDEFINED_U[x] || 63).chr }. # 63 = ?? (query char)
gsub(/\&\#(\d+);/) { [$1.to_i].pack("U*") }.
gsub(/\&\#x([0-9a-fA-F]+);/) { [$1.to_i(16)].pack("U*") }
end
end
end
|