From 295923589f23aeb8e17a788d87b3b385bcc4067e Mon Sep 17 00:00:00 2001 From: linlin Date: Tue, 6 Oct 2020 17:22:22 +0200 Subject: [PATCH] New translations checkdata.py (Chinese Simplified) --- .../gedlib/lib/libsvm.3.22/tools/checkdata.py | 108 +++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 lang/zh/gklearn/gedlib/lib/libsvm.3.22/tools/checkdata.py diff --git a/lang/zh/gklearn/gedlib/lib/libsvm.3.22/tools/checkdata.py b/lang/zh/gklearn/gedlib/lib/libsvm.3.22/tools/checkdata.py new file mode 100644 index 0000000..a1d8414 --- /dev/null +++ b/lang/zh/gklearn/gedlib/lib/libsvm.3.22/tools/checkdata.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python + +# +# A format checker for LIBSVM +# + +# +# Copyright (c) 2007, Rong-En Fan +# +# All rights reserved. +# +# This program is distributed under the same license of the LIBSVM package. +# + +from sys import argv, exit +import os.path + +def err(line_no, msg): + print("line {0}: {1}".format(line_no, msg)) + +# works like float() but does not accept nan and inf +def my_float(x): + if x.lower().find("nan") != -1 or x.lower().find("inf") != -1: + raise ValueError + + return float(x) + +def main(): + if len(argv) != 2: + print("Usage: {0} dataset".format(argv[0])) + exit(1) + + dataset = argv[1] + + if not os.path.exists(dataset): + print("dataset {0} not found".format(dataset)) + exit(1) + + line_no = 1 + error_line_count = 0 + for line in open(dataset, 'r'): + line_error = False + + # each line must end with a newline character + if line[-1] != '\n': + err(line_no, "missing a newline character in the end") + line_error = True + + nodes = line.split() + + # check label + try: + label = nodes.pop(0) + + if label.find(',') != -1: + # multi-label format + try: + for l in label.split(','): + l = my_float(l) + except: + err(line_no, "label {0} is not a valid multi-label form".format(label)) + line_error = True + else: + try: + label = my_float(label) + except: + err(line_no, "label {0} is not a number".format(label)) + line_error = True + except: + err(line_no, "missing label, perhaps an empty line?") + line_error = True + + # check features + prev_index = -1 + for i in range(len(nodes)): + try: + (index, value) = nodes[i].split(':') + + index = int(index) + value = my_float(value) + + # precomputed kernel's index starts from 0 and LIBSVM + # checks it. Hence, don't treat index 0 as an error. + if index < 0: + err(line_no, "feature index must be positive; wrong feature {0}".format(nodes[i])) + line_error = True + elif index <= prev_index: + err(line_no, "feature indices must be in an ascending order, previous/current features {0} {1}".format(nodes[i-1], nodes[i])) + line_error = True + prev_index = index + except: + err(line_no, "feature '{0}' not an : pair, integer, real number ".format(nodes[i])) + line_error = True + + line_no += 1 + + if line_error: + error_line_count += 1 + + if error_line_count > 0: + print("Found {0} lines with error.".format(error_line_count)) + return 1 + else: + print("No error.") + return 0 + +if __name__ == "__main__": + exit(main())