/usr/share/ask/bootstrap/random-file is in ask 1.1.1-2.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 | #! /usr/bin/python
# Copyright (c) 2011-2012, Universite de Versailles St-Quentin-en-Yvelines
#
# This file is part of ASK. ASK is free software: you can redistribute
# it and/or modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation, version 2.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 51
# Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
"""
This module select n random samples from a set of points in filename
"""
import random
from common.configuration import Configuration
from common.util import fatal
def select_n_random(input_file, output_file, n):
"""
Select n random points in the file. This approach is innefficient since it
loads the entire file in memory to sample it. If it becomes a bottleneck
we should try the approach outlined in
http://www.bryceboe.com/2009/03/23/random-lines-from-a-file/
"""
inpf = open(input_file, "r")
lines = inpf.readlines()
inpf.close()
if (n > len(lines)):
fatal("Not enough points in {0} to satisfy your sampling size of {1}"
.format(input_file, n))
samples = random.sample(lines, n)
outf = open(output_file, "w")
for s in samples:
outf.write(" ".join(s.split()[:len(conf("factors"))]) + "\n")
outf.close()
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(
description="Select random samples from a set of"
" points in the filename")
parser.add_argument('configuration')
parser.add_argument('output_file')
args = parser.parse_args()
conf = Configuration(args.configuration)
if "seed" in conf("modules.bootstrap.params"):
random.seed(conf("modules.bootstrap.params.seed", int))
select_n_random(conf("modules.bootstrap.params.data_file"),
args.output_file, conf("modules.bootstrap.params.n", int))
|