This file is indexed.

/usr/share/arm/util/hostnames.py is in tor-arm 1.4.5.0-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
"""
Service providing hostname resolution via reverse DNS lookups. This provides
both resolution via a thread pool (looking up several addresses at a time) and
caching of the results. If used, it's advisable that this service is stopped
when it's no longer needed. All calls are both non-blocking and thread safe.

Be aware that this relies on querying the system's DNS servers, possibly
leaking the requested addresses to third parties.
"""

# The only points of concern in terms of concurrent calls are the RESOLVER and
# RESOLVER.resolvedCache. This services provides (mostly) non-locking thread
# safety via the following invariants:
# - Resolver and cache instances are non-destructible
#     Nothing can be removed or invalidated. Rather, halting resolvers and
#     trimming the cache are done via reassignment (pointing the RESOLVER or
#     RESOLVER.resolvedCache to another copy).
# - Functions create and use local references to the resolver and its cache
#     This is for consistency (ie, all operations are done on the same resolver
#     or cache instance regardless of concurrent assignments). Usually it's
#     assigned to a local variable called 'resolverRef' or 'cacheRef'.
# - Locks aren't necessary, but used to help in the following cases:
#     - When assigning to the RESOLVER (to avoid orphaned instances with
#       running thread pools).
#     - When adding/removing from the cache (prevents workers from updating
#       an outdated cache reference).

import time
import socket
import threading
import itertools
import Queue
import distutils.sysconfig

from util import log, sysTools

RESOLVER = None                       # hostname resolver (service is stopped if None)
RESOLVER_LOCK = threading.RLock()     # regulates assignment to the RESOLVER
RESOLVER_COUNTER = itertools.count()  # atomic counter, providing the age for new entries (for trimming)
DNS_ERROR_CODES = ("1(FORMERR)", "2(SERVFAIL)", "3(NXDOMAIN)", "4(NOTIMP)", "5(REFUSED)", "6(YXDOMAIN)",
                   "7(YXRRSET)", "8(NXRRSET)", "9(NOTAUTH)", "10(NOTZONE)", "16(BADVERS)")

CONFIG = {"queries.hostnames.poolSize": 5,
          "queries.hostnames.useSocketModule": False,
          "cache.hostnames.size": 700000,
          "cache.hostnames.trimSize": 200000,
          "log.hostnameCacheTrimmed": log.INFO}

def loadConfig(config):
  config.update(CONFIG, {
    "queries.hostnames.poolSize": 1,
    "cache.hostnames.size": 100,
    "cache.hostnames.trimSize": 10})
  
  CONFIG["cache.hostnames.trimSize"] = min(CONFIG["cache.hostnames.trimSize"], CONFIG["cache.hostnames.size"] / 2)

def start():
  """
  Primes the service to start resolving addresses. Calling this explicitly is
  not necessary since resolving any address will start the service if it isn't
  already running.
  """
  
  global RESOLVER
  RESOLVER_LOCK.acquire()
  if not isRunning(): RESOLVER = _Resolver()
  RESOLVER_LOCK.release()

def stop():
  """
  Halts further resolutions and stops the service. This joins on the resolver's
  thread pool and clears its lookup cache.
  """
  
  global RESOLVER
  RESOLVER_LOCK.acquire()
  if isRunning():
    # Releases resolver instance. This is done first so concurrent calls to the
    # service won't try to use it. However, using a halted instance is fine and
    # all calls currently in progress can still proceed on the RESOLVER's local
    # references.
    resolverRef, RESOLVER = RESOLVER, None
    
    # joins on its worker thread pool
    resolverRef.stop()
    for t in resolverRef.threadPool: t.join()
  RESOLVER_LOCK.release()

def setPaused(isPause):
  """
  Allows or prevents further hostname resolutions (resolutions still make use of
  cached entries if available). This starts the service if it isn't already
  running.
  
  Arguments:
    isPause - puts a freeze on further resolutions if true, allows them to
              continue otherwise
  """
  
  # makes sure a running resolver is set with the pausing setting
  RESOLVER_LOCK.acquire()
  start()
  RESOLVER.isPaused = isPause
  RESOLVER_LOCK.release()

def isRunning():
  """
  Returns True if the service is currently running, False otherwise.
  """
  
  return bool(RESOLVER)

def isPaused():
  """
  Returns True if the resolver is paused, False otherwise.
  """
  
  resolverRef = RESOLVER
  if resolverRef: return resolverRef.isPaused
  else: return False

def isResolving():
  """
  Returns True if addresses are currently waiting to be resolved, False
  otherwise.
  """
  
  resolverRef = RESOLVER
  if resolverRef: return not resolverRef.unresolvedQueue.empty()
  else: return False

def resolve(ipAddr, timeout = 0, suppressIOExc = True):
  """
  Provides the hostname associated with a given IP address. By default this is
  a non-blocking call, fetching cached results if available and queuing the
  lookup if not. This provides None if the lookup fails (with a suppressed
  exception) or timeout is reached without resolution. This starts the service
  if it isn't already running.
  
  If paused this simply returns the cached reply (no request is queued and
  returns immediately regardless of the timeout argument).
  
  Requests may raise the following exceptions:
  - ValueError - address was unresolvable (includes the DNS error response)
  - IOError - lookup failed due to os or network issues (suppressed by default)
  
  Arguments:
    ipAddr        - ip address to be resolved
    timeout       - maximum duration to wait for a resolution (blocks to
                    completion if None)
    suppressIOExc - suppresses lookup errors and re-runs failed calls if true,
                    raises otherwise
  """
  
  # starts the service if it isn't already running (making sure we have an
  # instance in a thread safe fashion before continuing)
  resolverRef = RESOLVER
  if resolverRef == None:
    RESOLVER_LOCK.acquire()
    start()
    resolverRef = RESOLVER
    RESOLVER_LOCK.release()
  
  if resolverRef.isPaused:
    # get cache entry, raising if an exception and returning if a hostname
    cacheRef = resolverRef.resolvedCache
    
    if ipAddr in cacheRef:
      entry = cacheRef[ipAddr][0]
      if suppressIOExc and type(entry) == IOError: return None
      elif isinstance(entry, Exception): raise entry
      else: return entry
    else: return None
  elif suppressIOExc:
    # if resolver has cached an IOError then flush the entry (this defaults to
    # suppression since these error may be transient)
    cacheRef = resolverRef.resolvedCache
    flush = ipAddr in cacheRef and type(cacheRef[ipAddr]) == IOError
    
    try: return resolverRef.getHostname(ipAddr, timeout, flush)
    except IOError: return None
  else: return resolverRef.getHostname(ipAddr, timeout)

def getPendingCount():
  """
  Provides an approximate count of the number of addresses still pending
  resolution.
  """
  
  resolverRef = RESOLVER
  if resolverRef: return resolverRef.unresolvedQueue.qsize()
  else: return 0

def getRequestCount():
  """
  Provides the number of resolutions requested since starting the service.
  """
  
  resolverRef = RESOLVER
  if resolverRef: return resolverRef.totalResolves
  else: return 0

def _resolveViaSocket(ipAddr):
  """
  Performs hostname lookup via the socket module's gethostbyaddr function. This
  raises an IOError if the lookup fails (network issue) and a ValueError in
  case of DNS errors (address unresolvable).
  
  Arguments:
    ipAddr - ip address to be resolved
  """
  
  try:
    # provides tuple like: ('localhost', [], ['127.0.0.1'])
    return socket.gethostbyaddr(ipAddr)[0]
  except socket.herror, exc:
    if exc[0] == 2: raise IOError(exc[1]) # "Host name lookup failure"
    else: raise ValueError(exc[1]) # usually "Unknown host"
  except socket.error, exc: raise ValueError(exc[1])

def _resolveViaHost(ipAddr):
  """
  Performs a host lookup for the given IP, returning the resolved hostname.
  This raises an IOError if the lookup fails (os or network issue), and a
  ValueError in the case of DNS errors (address is unresolvable).
  
  Arguments:
    ipAddr - ip address to be resolved
  """
  
  hostname = sysTools.call("host %s" % ipAddr)[0].split()[-1:][0]
  
  if hostname == "reached":
    # got message: ";; connection timed out; no servers could be reached"
    raise IOError("lookup timed out")
  elif hostname in DNS_ERROR_CODES:
    # got error response (can't do resolution on address)
    raise ValueError("address is unresolvable: %s" % hostname)
  else:
    # strips off ending period and returns hostname
    return hostname[:-1]

class _Resolver():
  """
  Performs reverse DNS resolutions. Lookups are a network bound operation so
  this spawns a pool of worker threads to do several at a time in parallel.
  """
  
  def __init__(self):
    # IP Address => (hostname/error, age), resolution failures result in a
    # ValueError with the lookup's status
    self.resolvedCache = {}
    
    self.resolvedLock = threading.RLock() # governs concurrent access when modifying resolvedCache
    self.unresolvedQueue = Queue.Queue()  # unprocessed lookup requests
    self.recentQueries = []               # recent resolution requests to prevent duplicate requests
    self.threadPool = []                  # worker threads that process requests
    self.totalResolves = 0                # counter for the total number of addresses queried to be resolved
    self.isPaused = False                 # prevents further resolutions if true
    self.halt = False                     # if true, tells workers to stop
    self.cond = threading.Condition()     # used for pausing threads
    
    # Determines if resolutions are made using os 'host' calls or python's
    # 'socket.gethostbyaddr'. The following checks if the system has the
    # gethostbyname_r function, which determines if python resolutions can be
    # done in parallel or not. If so, this is preferable.
    isSocketResolutionParallel = distutils.sysconfig.get_config_var("HAVE_GETHOSTBYNAME_R")
    self.useSocketResolution = CONFIG["queries.hostnames.useSocketModule"] and isSocketResolutionParallel
    
    for _ in range(CONFIG["queries.hostnames.poolSize"]):
      t = threading.Thread(target = self._workerLoop)
      t.setDaemon(True)
      t.start()
      self.threadPool.append(t)
  
  def getHostname(self, ipAddr, timeout, flushCache = False):
    """
    Provides the hostname, queuing the request and returning None if the
    timeout is reached before resolution. If a problem's encountered then this
    either raises an IOError (for os and network issues) or ValueError (for DNS
    resolution errors).
    
    Arguments:
      ipAddr     - ip address to be resolved
      timeout    - maximum duration to wait for a resolution (blocks to
                   completion if None)
      flushCache - if true the cache is skipped and address re-resolved
    """
    
    # if outstanding requests are done then clear recentQueries to allow
    # entries removed from the cache to be re-run
    if self.unresolvedQueue.empty(): self.recentQueries = []
    
    # copies reference cache (this is important in case the cache is trimmed
    # during this call)
    cacheRef = self.resolvedCache
    
    if not flushCache and ipAddr in cacheRef:
      # cached response is available - raise if an error, return if a hostname
      response = cacheRef[ipAddr][0]
      if isinstance(response, Exception): raise response
      else: return response
    elif flushCache or ipAddr not in self.recentQueries:
      # new request - queue for resolution
      self.totalResolves += 1
      self.recentQueries.append(ipAddr)
      self.unresolvedQueue.put(ipAddr)
    
    # periodically check cache if requester is willing to wait
    if timeout == None or timeout > 0:
      startTime = time.time()
      
      while timeout == None or time.time() - startTime < timeout:
        if ipAddr in cacheRef:
          # address was resolved - raise if an error, return if a hostname
          response = cacheRef[ipAddr][0]
          if isinstance(response, Exception): raise response
          else: return response
        else: time.sleep(0.1)
    
    return None # timeout reached without resolution
  
  def stop(self):
    """
    Halts further resolutions and terminates the thread.
    """
    
    self.cond.acquire()
    self.halt = True
    self.cond.notifyAll()
    self.cond.release()
  
  def _workerLoop(self):
    """
    Simple producer-consumer loop followed by worker threads. This takes
    addresses from the unresolvedQueue, attempts to look up its hostname, and
    adds its results or the error to the resolved cache. Resolver reference
    provides shared resources used by the thread pool.
    """
    
    while not self.halt:
      # if resolver is paused then put a hold on further resolutions
      if self.isPaused:
        self.cond.acquire()
        if not self.halt: self.cond.wait(1)
        self.cond.release()
        continue
      
      # snags next available ip, timeout is because queue can't be woken up
      # when 'halt' is set
      try: ipAddr = self.unresolvedQueue.get_nowait()
      except Queue.Empty:
        # no elements ready, wait a little while and try again
        self.cond.acquire()
        if not self.halt: self.cond.wait(1)
        self.cond.release()
        continue
      if self.halt: break
      
      try:
        if self.useSocketResolution: result = _resolveViaSocket(ipAddr)
        else: result = _resolveViaHost(ipAddr)
      except IOError, exc: result = exc # lookup failed
      except ValueError, exc: result = exc # dns error
      
      self.resolvedLock.acquire()
      self.resolvedCache[ipAddr] = (result, RESOLVER_COUNTER.next())
      
      # trim cache if excessively large (clearing out oldest entries)
      if len(self.resolvedCache) > CONFIG["cache.hostnames.size"]:
        # Providing for concurrent, non-blocking calls require that entries are
        # never removed from the cache, so this creates a new, trimmed version
        # instead.
        
        # determines minimum age of entries to be kept
        currentCount = RESOLVER_COUNTER.next()
        newCacheSize = CONFIG["cache.hostnames.size"] - CONFIG["cache.hostnames.trimSize"]
        threshold = currentCount - newCacheSize
        newCache = {}
        
        msg = "trimming hostname cache from %i entries to %i" % (len(self.resolvedCache), newCacheSize)
        log.log(CONFIG["log.hostnameCacheTrimmed"], msg)
        
        # checks age of each entry, adding to toDelete if too old
        for ipAddr, entry in self.resolvedCache.iteritems():
          if entry[1] >= threshold: newCache[ipAddr] = entry
        
        self.resolvedCache = newCache
      
      self.resolvedLock.release()