This file is indexed.

/usr/lib/python2.7/dist-packages/gplugs/urlinfo.py is in gozerbot 0.99.1-5.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
# urlinfo.py
# -*- coding: utf-8 -*-

"""
Catches URLs on channel and gives information about them like title, image size, etc.
Uses http://whatisthisfile.appspot.com/ via XMLRPC

Example:
19:20 <@raspi> http://www.youtube.com/watch?v=9RZ-hYPAMFQ
19:20 <@bot> Title: "YouTube - Black Knight Holy Grail"
19:28 <@raspi> test http://www.raspi.fi foobar http://raspi.fi/wp-includes/images/rss.png
19:28 <@bot> 1. Title: "raspi.fi" Redirect: http://raspi.fi/  2. Image: 14x14
"""

__author__ = u"Pekka 'raspi' Järvinen - http://raspi.fi/"
__license__ = 'BSD'

from gozerbot.generic import handle_exception, rlog
from gozerbot.callbacks import callbacks
from gozerbot.commands import cmnds
from gozerbot.plughelp import plughelp
from gozerbot.persist.persist import Persist
from gozerbot.examples import examples
from gozerbot.datadir import datadir

import re
import urlparse
import xmlrpclib
import socket
import os

plughelp.add('urlinfo', 'Gets information about URLs spoken on channel')

cfg = Persist(datadir + os.sep + 'plugs' + os.sep + 'urlinfo' + os.sep + 'urlinfo', {})


# Remove non-urls word by word
def sanitize(text):
  text = text.strip()

  # Remove extra space
  text = re.sub('\s\s+', ' ', text)

  tmp = ''
  for i in text.split(' '):
    if len(i) >= 5:
      if i.find('www.') != -1 or i.find('http') != -1:
        # String has to contain www. or http somewhere
        tmp += i + ' '

  tmp = tmp.strip();
  
  tmp2 = ''
  for i in tmp.split(' '):
    if (i[0] == '(' and i[-1] == ')') or (i[0] == '[' and i[-1] == ']') or (i[0] == '<' and i[-1] == '>') or (i[0] == '{' and i[-1] == '}'):
      # First and last character is one of ()[]{}<>
      tmp2 += i[1:-1:1] + ' '
    else:
      tmp2 += i + ' '

  tmp2 = tmp2.strip();
  tmp = ''
  for i in tmp2.split(' '):
    if i.find('www.') == 0:
      # Add http:// to beginning of string
      tmp += 'http://' + i + ' '
    else:
      tmp += i + ' '

  tmp = tmp.strip();
  out = tmp;

  return out;

# Get valid URLs
def getUrls(text):
  regex = r"http[s]?://[-A-Za-z0-9+&@#/%?=~_()|!:,.;]*[-A-Za-z0-9+&@#/%=~_()|]"
  p = re.compile(regex)
  urls = []
  
  for i in text.split(' '):
    for x in p.findall(i):
      url = urlparse.urlparse(x)
      if url.geturl() not in urls:
        urls.append(url.geturl())
      
  return urls

# Get URL information
def getUrlInfo(text):
  out = ''
  text = sanitize(text)
  urls = getUrls(text)

  if len(urls):
    idx = 1
    for i in urls:
      o = ''
      try:
        socket.setdefaulttimeout(30)

        server = xmlrpclib.ServerProxy("http://whatisthisfile.appspot.com/xmlrpc")
        rlog(10, 'urlinfo', "XMLRPC query: %s" % i)
        urlinfo = server.app.query(i)

        if urlinfo.has_key('html'):
          if urlinfo['html'].has_key('title'):
            o += 'Title: "%s" ' % urlinfo['html']['title'].strip()
        elif urlinfo.has_key('image'):
          o += 'Image: %dx%d ' % (urlinfo['image']['width'], urlinfo['image']['height'])

        if urlinfo.has_key('real_url'):
          if urlinfo['real_url'] != i:
            o += 'Redirect: %s ' % (urlinfo['real_url'])

        if len(o):
          if len(urls) > 1:
            out += ' ' + str(idx) + '. '
            idx += 1

          out += o

      except Exception:
        pass
  return out.strip()

# Catch channel chat for possible URLs
def catchHasUrls(bot, ievent):
  if cfg.data.has_key(bot.name) and cfg.data[bot.name].has_key(ievent.printto) and cfg.data[bot.name][ievent.printto]:
    if len(ievent.txt) >= 5:
      if (ievent.txt.find('www.') != -1) or (ievent.txt.find('http') != -1):
        return 1
  return 0  

# Catch channel chat
def catchUrls(bot, ievent):
  ievent.reply(getUrlInfo(ievent.txt))

callbacks.add('PRIVMSG', catchUrls, catchHasUrls, threaded=True)

# Enable on channel
def handle_urlinfo_enable(bot, ievent):
  if not cfg.data.has_key(bot.name):
    cfg.data[bot.name] = {}
  cfg.data[bot.name][ievent.printto] = True
  cfg.save()
  ievent.reply('urlinfo enabled')

cmnds.add('urlinfo-enable', handle_urlinfo_enable, 'OPER')
examples.add('urlinfo-enable', 'enable urlinfo in the channel', 'urlinfo-enable')

# Disable on channel
def handle_urlinfo_disable(bot, ievent):
   if cfg.data.has_key(bot.name):
     cfg.data[bot.name][ievent.printto] = False
     cfg.save()
   ievent.reply('urlinfo disabled')

cmnds.add('urlinfo-disable', handle_urlinfo_disable, 'OPER')
examples.add('urlinfo-disable', 'disable urlinfo in the channel', 'urlinfo-disable')

def handle_urlinfo_list(bot, ievent):
  chans = []
  names = cfg.data.keys()
  names.sort()
  
  for name in names:
    targets = cfg.data[name].keys()
    targets.sort()
    chans.append('%s: %s' % (name, ' '.join(targets)))
  if not chans:
    ievent.reply('none')
  else:
    ievent.reply('urlinfo enabled on channels: %s' % ', '.join(chans))

cmnds.add('urlinfo-list', handle_urlinfo_list, 'OPER')
examples.add('urlinfo-list', 'show in which channels urlinfo is enabled', 'urlinfo-list')