Skip to content

Commit

Permalink
fix: Skip of newline for single-examples (#4636)
Browse files Browse the repository at this point in the history
* fix: Skip of newline for single-examples

* remove from cs test
  • Loading branch information
bassmang authored Sep 8, 2023
1 parent dfef808 commit de52303
Show file tree
Hide file tree
Showing 9 changed files with 63 additions and 17 deletions.
3 changes: 2 additions & 1 deletion cs/unittest/RunTests.tt
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ var skipList = new[] { 13, 32, 39, 258, 40, 259, 41, 260, 59, 60, 61, 66, 68, 90
256, 299, 300, 306, 310, 311, 327, 328, 329, 330, 331, 367, 368, 396, 397, 398, // DSJSON not supported
383, 389, 390, 391, 392, 393, // no data file
400, 404, // positional args
405, 406, 407, 411, 415, 417, 456, 457, 458, 459, 460, 461, 462 // DSJSON not supported
405, 406, 407, 411, 415, 417, 456, 457, 458, 459, 460, 461, 462, // DSJSON not supported
464 // Empty lines not supported
};

var outputModels = new Dictionary<string, TestCase>();
Expand Down
13 changes: 13 additions & 0 deletions test/core.vwtest.json
Original file line number Diff line number Diff line change
Expand Up @@ -6004,5 +6004,18 @@
"input_files": [
"train-sets/automl_spin_off.txt"
]
},
{
"id": 464,
"desc": "Ignore empty lines on single-examples",
"vw_command": "-d train-sets/single_empty_lines.txt -p single_empty_lines.predict",
"diff_files": {
"stderr": "test-sets/ref/single_empty_lines.stderr",
"single_empty_lines.predict": "pred-sets/ref/single_empty_lines.predict",
"stdout": "test-sets/ref/single_empty_lines.stdout"
},
"input_files": [
"train-sets/single_empty_lines.txt"
]
}
]
1 change: 1 addition & 0 deletions test/pred-sets/ref/single_empty_lines.predict
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0
23 changes: 23 additions & 0 deletions test/test-sets/ref/single_empty_lines.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
predictions = single_empty_lines.predict
using no cache
Reading datafile = train-sets/single_empty_lines.txt
num sources = 1
Num weight bits = 18
learning rate = 0.5
initial_t = 0
power_t = 0.5
Enabled learners: gd, scorer-identity, count_label
Input label = SIMPLE
Output pred = SCALAR
average since example example current current current
loss last counter weight label predict features
1.000000 1.000000 1 1.0 1.0000 0.0000 2

finished run
number of examples = 1
weighted example sum = 1.000000
weighted label sum = 1.000000
average loss = 1.000000
best constant = 1.000000
best constant's loss = 0.000000
total feature number = 2
Empty file.
7 changes: 3 additions & 4 deletions test/train-sets/ref/empty-set.stderr
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,10 @@ Input label = SIMPLE
Output pred = SCALAR
average since example example current current current
loss last counter weight label predict features
n.a. n.a. 1 1.0 unknown 0.0000 1

finished run
number of examples = 1
weighted example sum = 1.000000
number of examples = 0
weighted example sum = 0.000000
weighted label sum = 0.000000
average loss = n.a.
total feature number = 1
total feature number = 0
23 changes: 11 additions & 12 deletions test/train-sets/ref/topk-train.stderr
Original file line number Diff line number Diff line change
Expand Up @@ -15,21 +15,20 @@ average since example example current current cur
loss last counter weight label predict features
9.000000 9.000000 1 1.0 3.0000 0.0000 4
4.590362 0.180723 2 2.0 0.0000 0.4251 4
3.928039 2.603395 4 4.0 unknown 0.2876 1
3.523584 3.119128 8 8.0 unknown 0.4184 1
2.610412 1.697241 16 16.0 unknown 0.6151 1
1.917275 1.224138 32 32.0 unknown 0.7335 1
1.246961 0.576646 64 64.0 unknown 0.8100 1
0.784439 0.321916 128 128.0 unknown 0.8650 1
0.439552 0.094665 256 256.0 unknown 0.9058 1
0.226776 0.014000 512 512.0 unknown 0.9328 1
0.113599 0.000422 1024 1024.0 unknown 0.9396 1
3.008577 1.426792 4 4.0 0.0000 0.5002 4
2.893238 2.777898 8 8.0 1.0000 0.7497 4
2.321989 1.750740 16 16.0 2.0000 1.5635 4
1.640977 0.959966 32 32.0 3.0000 1.4030 4
1.041363 0.441749 64 64.0 3.0000 2.2510 4
0.623755 0.206147 128 128.0 0.0000 0.4018 4
0.336533 0.049310 256 256.0 0.0000 0.1610 4
0.170349 0.004165 512 512.0 1.0000 1.0024 4

finished run
number of examples per pass = 12
number of examples per pass = 9
passes used = 100
weighted example sum = 1200.000000
weighted example sum = 900.000000
weighted label sum = 1500.000000
average loss = 0.096938
best constant = 1.666667
total feature number = 3900
total feature number = 3600
9 changes: 9 additions & 0 deletions test/train-sets/single_empty_lines.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
1 | x:1








1 change: 1 addition & 0 deletions vowpalwabbit/core/src/learner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ class single_example_handler
}
else if (ec->end_pass) { _context.template process<example, end_pass>(*ec); }
else if (is_save_cmd(ec)) { _context.template process<example, save>(*ec); }
else if (ec->is_newline) { VW::finish_example(_context.get_master(), *ec); }
else { _context.template process<example, learn_ex>(*ec); }
}

Expand Down

0 comments on commit de52303

Please sign in to comment.