/usr/share/pyshared/jsb/utils/urlstats.py is in jsonbot 0.84.4-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 | # jsb/utils/urlstats.py
#
#
""" persist stats of an url. """
## jsb imports
from jsb.lib.persist import Persist, PersistCollection
from jsb.lib.datadir import getdatadir
from jsb.utils.statdict import StatDict
from jsb.utils.name import stripname
from jsb.utils.url import striphtml, Url
## basic imports
import time
import logging
import os
## UrlStats class
class UrlStats(Persist):
def __init__(self, url):
self.scantime = 0
self.url = Url(url)
self.fname = getdatadir() + os.sep + 'spider' + os.sep + 'stats' + os.sep + stripname(url)
Persist.__init__(self, self.fname)
def get(self):
content = geturl2(self.url)
if content: return self.input(content)
def input(self, html):
self.scantime = time.time()
words = striphtml(html)
words = words.replace("\n", "").split()
stats = StatDict()
for w in words:
stats.upitem(w)
self.data.url = self.url.url
self.data.words = stats
self.save()
logging.warn("%s words found for %s" % (len(stats), self.url.url))
return stats
def stats(self):
stats = StatDict(self.data.words)
return stats
|