You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

svm-predict.c 5.5 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239
  1. #include <stdio.h>
  2. #include <ctype.h>
  3. #include <stdlib.h>
  4. #include <string.h>
  5. #include <errno.h>
  6. #include "svm.h"
  7. int print_null(const char *s,...) {return 0;}
  8. static int (*info)(const char *fmt,...) = &printf;
  9. struct svm_node *x;
  10. int max_nr_attr = 64;
  11. struct svm_model* model;
  12. int predict_probability=0;
  13. static char *line = NULL;
  14. static int max_line_len;
  15. static char* readline(FILE *input)
  16. {
  17. int len;
  18. if(fgets(line,max_line_len,input) == NULL)
  19. return NULL;
  20. while(strrchr(line,'\n') == NULL)
  21. {
  22. max_line_len *= 2;
  23. line = (char *) realloc(line,max_line_len);
  24. len = (int) strlen(line);
  25. if(fgets(line+len,max_line_len-len,input) == NULL)
  26. break;
  27. }
  28. return line;
  29. }
  30. void exit_input_error(int line_num)
  31. {
  32. fprintf(stderr,"Wrong input format at line %d\n", line_num);
  33. exit(1);
  34. }
  35. void predict(FILE *input, FILE *output)
  36. {
  37. int correct = 0;
  38. int total = 0;
  39. double error = 0;
  40. double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0;
  41. int svm_type=svm_get_svm_type(model);
  42. int nr_class=svm_get_nr_class(model);
  43. double *prob_estimates=NULL;
  44. int j;
  45. if(predict_probability)
  46. {
  47. if (svm_type==NU_SVR || svm_type==EPSILON_SVR)
  48. info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma=%g\n",svm_get_svr_probability(model));
  49. else
  50. {
  51. int *labels=(int *) malloc(nr_class*sizeof(int));
  52. svm_get_labels(model,labels);
  53. prob_estimates = (double *) malloc(nr_class*sizeof(double));
  54. fprintf(output,"labels");
  55. for(j=0;j<nr_class;j++)
  56. fprintf(output," %d",labels[j]);
  57. fprintf(output,"\n");
  58. free(labels);
  59. }
  60. }
  61. max_line_len = 1024;
  62. line = (char *)malloc(max_line_len*sizeof(char));
  63. while(readline(input) != NULL)
  64. {
  65. int i = 0;
  66. double target_label, predict_label;
  67. char *idx, *val, *label, *endptr;
  68. int inst_max_index = -1; // strtol gives 0 if wrong format, and precomputed kernel has <index> start from 0
  69. label = strtok(line," \t\n");
  70. if(label == NULL) // empty line
  71. exit_input_error(total+1);
  72. target_label = strtod(label,&endptr);
  73. if(endptr == label || *endptr != '\0')
  74. exit_input_error(total+1);
  75. while(1)
  76. {
  77. if(i>=max_nr_attr-1) // need one more for index = -1
  78. {
  79. max_nr_attr *= 2;
  80. x = (struct svm_node *) realloc(x,max_nr_attr*sizeof(struct svm_node));
  81. }
  82. idx = strtok(NULL,":");
  83. val = strtok(NULL," \t");
  84. if(val == NULL)
  85. break;
  86. errno = 0;
  87. x[i].index = (int) strtol(idx,&endptr,10);
  88. if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index)
  89. exit_input_error(total+1);
  90. else
  91. inst_max_index = x[i].index;
  92. errno = 0;
  93. x[i].value = strtod(val,&endptr);
  94. if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
  95. exit_input_error(total+1);
  96. ++i;
  97. }
  98. x[i].index = -1;
  99. if (predict_probability && (svm_type==C_SVC || svm_type==NU_SVC))
  100. {
  101. predict_label = svm_predict_probability(model,x,prob_estimates);
  102. fprintf(output,"%g",predict_label);
  103. for(j=0;j<nr_class;j++)
  104. fprintf(output," %g",prob_estimates[j]);
  105. fprintf(output,"\n");
  106. }
  107. else
  108. {
  109. predict_label = svm_predict(model,x);
  110. fprintf(output,"%g\n",predict_label);
  111. }
  112. if(predict_label == target_label)
  113. ++correct;
  114. error += (predict_label-target_label)*(predict_label-target_label);
  115. sump += predict_label;
  116. sumt += target_label;
  117. sumpp += predict_label*predict_label;
  118. sumtt += target_label*target_label;
  119. sumpt += predict_label*target_label;
  120. ++total;
  121. }
  122. if (svm_type==NU_SVR || svm_type==EPSILON_SVR)
  123. {
  124. info("Mean squared error = %g (regression)\n",error/total);
  125. info("Squared correlation coefficient = %g (regression)\n",
  126. ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
  127. ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt))
  128. );
  129. }
  130. else
  131. info("Accuracy = %g%% (%d/%d) (classification)\n",
  132. (double)correct/total*100,correct,total);
  133. if(predict_probability)
  134. free(prob_estimates);
  135. }
  136. void exit_with_help()
  137. {
  138. printf(
  139. "Usage: svm-predict [options] test_file model_file output_file\n"
  140. "options:\n"
  141. "-b probability_estimates: whether to predict probability estimates, 0 or 1 (default 0); for one-class SVM only 0 is supported\n"
  142. "-q : quiet mode (no outputs)\n"
  143. );
  144. exit(1);
  145. }
  146. int main(int argc, char **argv)
  147. {
  148. FILE *input, *output;
  149. int i;
  150. // parse options
  151. for(i=1;i<argc;i++)
  152. {
  153. if(argv[i][0] != '-') break;
  154. ++i;
  155. switch(argv[i-1][1])
  156. {
  157. case 'b':
  158. predict_probability = atoi(argv[i]);
  159. break;
  160. case 'q':
  161. info = &print_null;
  162. i--;
  163. break;
  164. default:
  165. fprintf(stderr,"Unknown option: -%c\n", argv[i-1][1]);
  166. exit_with_help();
  167. }
  168. }
  169. if(i>=argc-2)
  170. exit_with_help();
  171. input = fopen(argv[i],"r");
  172. if(input == NULL)
  173. {
  174. fprintf(stderr,"can't open input file %s\n",argv[i]);
  175. exit(1);
  176. }
  177. output = fopen(argv[i+2],"w");
  178. if(output == NULL)
  179. {
  180. fprintf(stderr,"can't open output file %s\n",argv[i+2]);
  181. exit(1);
  182. }
  183. if((model=svm_load_model(argv[i+1]))==0)
  184. {
  185. fprintf(stderr,"can't open model file %s\n",argv[i+1]);
  186. exit(1);
  187. }
  188. x = (struct svm_node *) malloc(max_nr_attr*sizeof(struct svm_node));
  189. if(predict_probability)
  190. {
  191. if(svm_check_probability_model(model)==0)
  192. {
  193. fprintf(stderr,"Model does not support probabiliy estimates\n");
  194. exit(1);
  195. }
  196. }
  197. else
  198. {
  199. if(svm_check_probability_model(model)!=0)
  200. info("Model supports probability estimates, but disabled in prediction.\n");
  201. }
  202. predict(input,output);
  203. svm_free_and_destroy_model(&model);
  204. free(x);
  205. free(line);
  206. fclose(input);
  207. fclose(output);
  208. return 0;
  209. }

A Python package for graph kernels, graph edit distances and graph pre-image problem.