This file is indexed.

/usr/bin/svm-checkdata is in libsvm-tools 3.12-1.1.

This file is owned by root:root, with mode 0o755.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
#! /usr/bin/python

#
# A format checker for LIBSVM
#

#
# Copyright (c) 2007, Rong-En Fan
#
# All rights reserved.
#
# This program is distributed under the same license of the LIBSVM package.
# 

from sys import argv, exit
import os.path

def err(line_no, msg):
	print("line {0}: {1}".format(line_no, msg))

# works like float() but does not accept nan and inf
def my_float(x):
	if x.lower().find("nan") != -1 or x.lower().find("inf") != -1:
		raise ValueError

	return float(x)

def main():
	if len(argv) != 2:
		print("Usage: {0} dataset".format(argv[0]))
		exit(1)

	dataset = argv[1]

	if not os.path.exists(dataset):
		print("dataset {0} not found".format(dataset))
		exit(1)

	line_no = 1
	error_line_count = 0
	for line in open(dataset, 'r'):
		line_error = False

		# each line must end with a newline character
		if line[-1] != '\n':
			err(line_no, "missing a newline character in the end")
			line_error = True

		nodes = line.split()

		# check label
		try:
			label = nodes.pop(0)
			
			if label.find(',') != -1:
				# multi-label format
				try:
					for l in label.split(','):
						l = my_float(l)
				except:
					err(line_no, "label {0} is not a valid multi-label form".format(label))
					line_error = True
			else:
				try:
					label = my_float(label)
				except:
					err(line_no, "label {0} is not a number".format(label))
					line_error = True
		except:
			err(line_no, "missing label, perhaps an empty line?")
			line_error = True

		# check features
		prev_index = -1
		for i in range(len(nodes)):
			try:
				(index, value) =  nodes[i].split(':')

				index = int(index)
				value = my_float(value)

				# precomputed kernel's index starts from 0 and LIBSVM
				# checks it. Hence, don't treat index 0 as an error.
				if index < 0:
					err(line_no, "feature index must be positive; wrong feature {0}".format(nodes[i]))
					line_error = True
				elif index <= prev_index:
					err(line_no, "feature indices must be in an ascending order, previous/current features {0} {1}".format(nodes[i-1], nodes[i]))
					line_error = True
				prev_index = index
			except:
				err(line_no, "feature '{0}' not an <index>:<value> pair, <index> integer, <value> real number ".format(nodes[i]))
				line_error = True

		line_no += 1

		if line_error:
			error_line_count += 1
	
	if error_line_count > 0:
		print("Found {0} lines with error.".format(error_line_count))
		return 1
	else:
		print("No error.")
		return 0

if __name__ == "__main__":
	exit(main())