/usr/bin/nd_verifymirrors is in neurodebian-dev 0.37.2.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 | #!/usr/bin/python
#emacs: -*- mode: python-mode; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*-
#ex: set sts=4 ts=4 sw=4 noet:
"""Script to do rudimentary checks of NeuroDebian mirrors to verify they are in good shape
"""
import sys
from urllib import urlopen
from ConfigParser import SafeConfigParser
#cfg_path="/etc/neurodebian/neurodebian.cfg"
cfg_path="./neurodebian.cfg"
main_mirror='us-nh'
# read configuration
cfg = SafeConfigParser()
cfg.read(cfg_path)
# load information about mirrors
mirrors = cfg.options('mirrors')
urls = dict([(x, cfg.get('mirrors', x)) for x in mirrors])
slave_mirrors = mirrors.pop(mirrors.index(main_mirror))
#if True:
def fetch_listing(url):
"""Traverses whole website, obtains listing of all files available
TODO: eventually use scrapy, but stable one has only 0.8 while
0.16 is out... so -- later
"""
print url
#url = 'http://neuro.debian.net/debian/dists/dapper/'
#url = "http://mirror.aarnet.edu.au/pub/neurodebian/dists/dapper/"
parser = etree.HTMLParser()
from lxml.html import parse, submit_form, fromstring
#page = etree.parse(urlopen('http://neuro.debian.net/debian/dists/dapper/'), parser)
#page = objectify.parse(urlopen('http://neuro.debian.net/debian/dists/dapper/'), parser)
page = parse(url).getroot()
#page = fromstring(''.join(urlopen(url).readlines()))
#page.make_links_absolute(url)
# go through all rows with links
rows = [row for row in page.iter('tr')]
res = {}
for row in rows:
pass
# do I need parent actually for anything? yes -- time/size
# actually -- of no use since presence/presentation heavily varies
# across mirrors, so let's not rely on them
links = [ (l[0].getparent().getparent(),
l[2].endswith('/'),) +
l
for l in page.iterlinks()
if (l[1] == 'href'
and not (
l[2][0] in ('/', '?')
or l[2].startswith('http://')
or l[2].startswith('mailto:')
)) ]
for p, isdir, a, _, name, _ in links:
print name
if isdir:
fetch_listing('%s/%s' %
(url, name))
if False:
for m, url in urls.iteritems():
print "Mirror %s" % m
fetch_listing(url + '/dists/dapper')
else:
fetch_listing(urls[main_mirror] + '/dists/dapper')
"""
au has fancier index pages, so we would need to distil page first more
"""
|