-
Notifications
You must be signed in to change notification settings - Fork 0
/
pcaoptions.hh
173 lines (156 loc) · 4.69 KB
/
pcaoptions.hh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
#ifndef __PCAOPTIONS_HH__
#define __PCAOPTIONS_HH__
#include <cstdlib>
#include <iostream>
namespace hashpca
{
const char* help="Usage: pca [options] input [input2]\nAvailable options:\n\
-b <int> : size of feature hash (default: 65535)\n\
-k <int> : rank of approximation (default: 40)\n\
-m <name> : model file\n\
-t : projection mode\n\
-z : dont prefix projection with index\n\
-f : flush after each output line\n\
-e : evidence normalize\n\
-s : tanh(0.85)ify\n\
-c : center data\n\
-w : do not whiten projection\n\
-w0 : whiten only first component, scale others\n\
-a : hash all features (including integers)\n\
-q ab : pair features from a and b\n";
struct PcaOptions
{
typedef enum { ALL, NONE, FIRST } WhitenType;
unsigned int hashsize;
unsigned int rank;
const char* model;
bool project;
bool withprefix;
bool flush;
bool normalize;
bool tanhify;
bool center;
WhitenType whiten;
bool hashall;
const char* dashq;
PcaOptions () : hashsize (65535),
rank (40),
model (0),
project (false),
withprefix (true),
flush (false),
normalize (false),
tanhify (false),
center (false),
whiten (ALL),
hashall (false),
dashq (0)
{
}
};
int
parse_int (int argc,
char* argv[])
{
if (argc < 1)
{
std::cerr << "ERROR: missing expected integer argument" << std::endl;
std::cerr << help << std::endl;
exit (1);
}
char* endptr;
int rv = ::strtol (argv[0], &endptr, 0);
if (endptr == argv[0] || (*endptr != '\0' && ! ::isspace (*endptr)))
{
std::cerr << "ERROR: invalid integer argument '"
<< argv[0] << "'" << std::endl;
std::cerr << help << std::endl;
exit (1);
}
return rv;
}
char*
parse_string (int argc,
char* argv[])
{
if (argc < 1)
{
std::cerr << "ERROR: missing expected string argument" << std::endl;
std::cerr << help << std::endl;
exit (1);
}
return argv[0];
}
PcaOptions
parse_pca_options (int& argc,
char**& argv)
{
PcaOptions options;
--argc;
++argv;
while (argc > 0 && argv[0][0] == '-')
{
switch (argv[0][1])
{
case '-':
--argc;
++argv;
return options;
case 'b':
--argc;
++argv;
options.hashsize = parse_int (argc, argv);
break;
case 'k':
--argc;
++argv;
options.rank = parse_int (argc, argv);
break;
case 'e':
options.normalize = true;
break;
case 's':
options.tanhify = true;
break;
case 't':
options.project = true;
break;
case 'z':
options.withprefix = false;
break;
case 'f':
options.flush = true;
break;
case 'c':
options.center = true;
break;
case 'w':
options.whiten = (argv[0][2] == '0') ? PcaOptions::FIRST
: PcaOptions::NONE;
break;
case 'a':
options.hashall = true;
break;
case 'm':
--argc;
++argv;
options.model = parse_string (argc, argv);
break;
case 'q':
--argc;
++argv;
options.dashq = parse_string (argc, argv);
break;
default:
std::cerr << "ERROR: unrecognized switch " << argv[0] << std::endl;
std::cerr << help << std::endl;
exit (1);
break;
}
--argc;
++argv;
}
return options;
}
}
#endif // __PCAOPTIONS_HH__