Repositories » TXT0 Read More
Clone URL:  
Pushed to one repository · View In Graph Contained in tip

Redesign.

Parsers map documents and tokens to integers.
TFile structure changed and its I/O routines portably serilize integers and floating points values.
Most of the code has been bundled in txt.h and txt.c; need refactoring.
raw2t and t2mem correctly processes tiny.txt, but write out incorrect vocabulary file.
ii.c is broken at this point.

Changeset 7196fc40134c

Parent a74c848dfbb6

by Rup Palchowdhury

Changes to 27 files · Browse files at 7196fc40134c Showing diff from parent a74c848dfbb6 Diff from another changeset...

Change 1 of 2 Show Entire File Makefile Stacked
 
15
16
17
18
19
20
21
22
23
24
 
59
60
61
62
63
64
65
 
 
 
 
66
67
68
69
 
 
 
70
71
72
73
74
75
76
77
78
79
80
 
 
 
 
 
 
 
 
 
 
81
82
83
84
85
86
87
88
89
90
91
 
 
 
 
 
 
 
 
 
 
 
15
16
17
 
18
19
 
20
21
22
 
57
58
59
 
 
 
 
60
61
62
63
64
 
 
 
65
66
67
68
 
 
 
 
 
 
 
 
 
 
69
70
71
72
73
74
75
76
77
78
79
 
 
 
 
 
 
 
 
 
 
80
81
82
83
84
85
86
87
88
89
@@ -15,10 +15,8 @@
 POSTCOMPILE = mv -f $(DEPDIR)/$*.Td $(DEPDIR)/$*.d    SRC = \ - tokenizer.c \   tfile.c \   txt.c \ - parser.c \   crc.c \   porter.c   @@ -59,33 +57,33 @@
 clean:   rm -f $(O)/*.o $(DEPDIR)/*.d raw2t t2mem ii   -.PHONY: parsetest -parsetest: - ./raw2t -x -n -c TREC <test/alice.txt | ./t2mem | diff -q - test/alice.mem - ./raw2t -x -n -c TRECQUERY <test/alice_query.txt | ./t2mem | diff -q - test/alice_query.mem +# .PHONY: parsetest +# parsetest: +# ./raw2t -x -n -c TREC <test/alice.txt | ./t2mem | diff -q - test/alice.mem +# ./raw2t -x -n -c TRECQUERY <test/alice_query.txt | ./t2mem | diff -q - test/alice_query.mem   -.PHONY: iitest -iitest: - ./ii -s test/alice_query.t <test/alice.t | sort -k1,1 -k3,3nr | diff -q - test/alice.rank +# .PHONY: iitest +# iitest: +# ./ii -s test/alice_query.t <test/alice.t | sort -k1,1 -k3,3nr | diff -q - test/alice.rank   -.PHONY: alice -alice: - ./raw2t -x -n -c TREC <test/alice.txt >/tmp/alice.t - ./raw2t -x -n -c TRECQUERY <test/alice_query.txt >/tmp/alice_query.t - ./t2mem </tmp/alice.t >/tmp/alice.mem - ./t2mem </tmp/alice_query.t >/tmp/alice_query.mem - ./ii -s /tmp/alice_query.t </tmp/alice.t >/tmp/alice.res - sort -k1,1 -k3,3nr /tmp/alice.res >/tmp/alice.rank - awk -f txt2trecrun.awk </tmp/alice.rank >/tmp/alice.run - ~/ir/trec_eval.9.0/trec_eval -q test/alice.qrel /tmp/alice.run >/tmp/alice.eval +# .PHONY: alice +# alice: +# ./raw2t -x -n -c TREC <test/alice.txt >/tmp/alice.t +# ./raw2t -x -n -c TRECQUERY <test/alice_query.txt >/tmp/alice_query.t +# ./t2mem </tmp/alice.t >/tmp/alice.mem +# ./t2mem </tmp/alice_query.t >/tmp/alice_query.mem +# ./ii -s /tmp/alice_query.t </tmp/alice.t >/tmp/alice.res +# sort -k1,1 -k3,3nr /tmp/alice.res >/tmp/alice.rank +# awk -f txt2trecrun.awk </tmp/alice.rank >/tmp/alice.run +# ~/ir/trec_eval.9.0/trec_eval -q test/alice.qrel /tmp/alice.run >/tmp/alice.eval   -.PHONY: ap -ap: - ./raw2t -x -n -c TREC <test/ap.txt >/tmp/ap.t - ./raw2t -x -n -c TRECQUERY <test/ap_query.txt >/tmp/ap_query.t - ./t2mem </tmp/ap.t >/tmp/ap.mem - ./t2mem </tmp/ap_query.t >/tmp/ap_query.mem - ./ii -s /tmp/ap_query.t </tmp/ap.t >/tmp/ap.res - sort -k1,1 -k3,3nr /tmp/ap.res >/tmp/ap.rank - awk -f txt2trecrun.awk </tmp/ap.rank >/tmp/ap.run - ~/ir/trec_eval.9.0/trec_eval -q test/ap.qrel /tmp/ap.run >/tmp/ap.eval +# .PHONY: ap +# ap: +# ./raw2t -x -n -c TREC <test/ap.txt >/tmp/ap.t +# ./raw2t -x -n -c TRECQUERY <test/ap_query.txt >/tmp/ap_query.t +# ./t2mem </tmp/ap.t >/tmp/ap.mem +# ./t2mem </tmp/ap_query.t >/tmp/ap_query.mem +# ./ii -s /tmp/ap_query.t </tmp/ap.t >/tmp/ap.res +# sort -k1,1 -k3,3nr /tmp/ap.res >/tmp/ap.rank +# awk -f txt2trecrun.awk </tmp/ap.rank >/tmp/ap.run +# ~/ir/trec_eval.9.0/trec_eval -q test/ap.qrel /tmp/ap.run >/tmp/ap.eval
Change 1 of 1 Show Entire File cw.h Stacked
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
@@ -0,0 +1,310 @@
+/* common word arrays, the word count is the first element */ + +char *cwSMART[572] = {"571", "a", "a's", "able", "about", "above", "according", + "accordingly", "across", "actually", "after", "afterwards", + "again", "against", "ain't", "all", "allow", "allows", + "almost", "alone", "along", "already", "also", "although", + "always", "am", "among", "amongst", "an", "and", "another", + "any", "anybody", "anyhow", "anyone", "anything", "anyway", + "anyways", "anywhere", "apart", "appear", "appreciate", + "appropriate", "are", "aren't", "around", "as", "aside", + "ask", "asking", "associated", "at", "available", "away", + "awfully", "b", "be", "became", "because", "become", + "becomes", "becoming", "been", "before", "beforehand", + "behind", "being", "believe", "below", "beside", "besides", + "best", "better", "between", "beyond", "both", "brief", + "but", "by", "c", "c'mon", "c's", "came", "can", "can't", + "cannot", "cant", "cause", "causes", "certain", "certainly", + "changes", "clearly", "co", "com", "come", "comes", + "concerning", "consequently", "consider", "considering", + "contain", "containing", "contains", "corresponding", + "could", "couldn't", "course", "currently", "d", + "definitely", "described", "despite", "did", "didn't", + "different", "do", "does", "doesn't", "doing", "don't", + "done", "down", "downwards", "during", "e", "each", "edu", + "eg", "eight", "either", "else", "elsewhere", "enough", + "entirely", "especially", "et", "etc", "even", "ever", + "every", "everybody", "everyone", "everything", + "everywhere", "ex", "exactly", "example", "except", "f", + "far", "few", "fifth", "first", "five", "followed", + "following", "follows", "for", "former", "formerly", + "forth", "four", "from", "further", "furthermore", "g", + "get", "gets", "getting", "given", "gives", "go", "goes", + "going", "gone", "got", "gotten", "greetings", "h", "had", + "hadn't", "happens", "hardly", "has", "hasn't", "have", + "haven't", "having", "he", "he's", "hello", "help", "hence", + "her", "here", "here's", "hereafter", "hereby", "herein", + "hereupon", "hers", "herself", "hi", "him", "himself", + "his", "hither", "hopefully", "how", "howbeit", "however", + "i", "i'd", "i'll", "i'm", "i've", "ie", "if", "ignored", + "immediate", "in", "inasmuch", "inc", "indeed", "indicate", + "indicated", "indicates", "inner", "insofar", "instead", + "into", "inward", "is", "isn't", "it", "it'd", "it'll", + "it's", "its", "itself", "j", "just", "k", "keep", "keeps", + "kept", "know", "knows", "known", "l", "last", "lately", + "later", "latter", "latterly", "least", "less", "lest", + "let", "let's", "like", "liked", "likely", "little", "look", + "looking", "looks", "ltd", "m", "mainly", "many", "may", + "maybe", "me", "mean", "meanwhile", "merely", "might", + "more", "moreover", "most", "mostly", "much", "must", "my", + "myself", "n", "name", "namely", "nd", "near", "nearly", + "necessary", "need", "needs", "neither", "never", + "nevertheless", "new", "next", "nine", "no", "nobody", + "non", "none", "noone", "nor", "normally", "not", "nothing", + "novel", "now", "nowhere", "o", "obviously", "of", "off", + "often", "oh", "ok", "okay", "old", "on", "once", "one", + "ones", "only", "onto", "or", "other", "others", + "otherwise", "ought", "our", "ours", "ourselves", "out", + "outside", "over", "overall", "own", "p", "particular", + "particularly", "per", "perhaps", "placed", "please", + "plus", "possible", "presumably", "probably", "provides", + "q", "que", "quite", "qv", "r", "rather", "rd", "re", + "really", "reasonably", "regarding", "regardless", + "regards", "relatively", "respectively", "right", "s", + "said", "same", "saw", "say", "saying", "says", "second", + "secondly", "see", "seeing", "seem", "seemed", "seeming", + "seems", "seen", "self", "selves", "sensible", "sent", + "serious", "seriously", "seven", "several", "shall", "she", + "should", "shouldn't", "since", "six", "so", "some", + "somebody", "somehow", "someone", "something", "sometime", + "sometimes", "somewhat", "somewhere", "soon", "sorry", + "specified", "specify", "specifying", "still", "sub", + "such", "sup", "sure", "t", "t's", "take", "taken", "tell", + "tends", "th", "than", "thank", "thanks", "thanx", "that", + "that's", "thats", "the", "their", "theirs", "them", + "themselves", "then", "thence", "there", "there's", + "thereafter", "thereby", "therefore", "therein", "theres", + "thereupon", "these", "they", "they'd", "they'll", + "they're", "they've", "think", "third", "this", "thorough", + "thoroughly", "those", "though", "three", "through", + "throughout", "thru", "thus", "to", "together", "too", + "took", "toward", "towards", "tried", "tries", "truly", + "try", "trying", "twice", "two", "u", "un", "under", + "unfortunately", "unless", "unlikely", "until", "unto", + "up", "upon", "us", "use", "used", "useful", "uses", + "using", "usually", "uucp", "v", "value", "various", "very", + "via", "viz", "vs", "w", "want", "wants", "was", "wasn't", + "way", "we", "we'd", "we'll", "we're", "we've", "welcome", + "well", "went", "were", "weren't", "what", "what's", + "whatever", "when", "whence", "whenever", "where", + "where's", "whereafter", "whereas", "whereby", "wherein", + "whereupon", "wherever", "whether", "which", "while", + "whither", "who", "who's", "whoever", "whole", "whom", + "whose", "why", "will", "willing", "wish", "with", "within", + "without", "won't", "wonder", "would", "would", "wouldn't", + "x", "y", "yes", "yet", "you", "you'd", "you'll", "you're", + "you've", "your", "yours", "yourself", "yourselves", "z", + "zero"}; + +char *cwLucene[34] = {"33", "a", "an", "and", "are", "as", "at", "be", "but", "by", + "for", "if", "in", "into", "is", "it", "no", "not", "of", + "on", "or", "such", "that", "the", "their", "then", "there", + "these", "they", "this", "to", "was", "will", "with"}; + +char *cwSER[18] = {"17", "a", "the", "an", "at", "by", "into", "on", "for", "from", + "to", "with", "of", "and", "or", "in", "not", "et"}; + +char *cwIndri[419] = {"418", "a", "about", "above", "according", "across", "after", + "afterwards", "again", "against", "albeit", "all", "almost", + "alone", "along", "already", "also", "although", "always", + "am", "among", "amongst", "an", "and", "another", "any", + "anybody", "anyhow", "anyone", "anything", "anyway", + "anywhere", "apart", "are", "around", "as", "at", "av", + "be", "became", "because", "become", "becomes", "becoming", + "been", "before", "beforehand", "behind", "being", "below", + "beside", "besides", "between", "beyond", "both", "but", + "by", "can", "cannot", "canst", "certain", "cf", "choose", + "contrariwise", "cos", "could", "cu", "day", "do", "does", + "doesn't", "doing", "dost", "doth", "double", "down", + "dual", "during", "each", "either", "else", "elsewhere", + "enough", "et", "etc", "even", "ever", "every", "everybody", + "everyone", "everything", "everywhere", "except", + "excepted", "excepting", "exception", "exclude", + "excluding", "exclusive", "far", "farther", "farthest", + "few", "ff", "first", "for", "formerly", "forth", "forward", + "from", "front", "further", "furthermore", "furthest", + "get", "go", "had", "halves", "hardly", "has", "hast", + "hath", "have", "he", "hence", "henceforth", "her", "here", + "hereabouts", "hereafter", "hereby", "herein", "hereto", + "hereupon", "hers", "herself", "him", "himself", "hindmost", + "his", "hither", "hitherto", "how", "however", "howsoever", + "i", "ie", "if", "in", "inasmuch", "inc", "include", + "included", "including", "indeed", "indoors", "inside", + "insomuch", "instead", "into", "inward", "inwards", "is", + "it", "its", "itself", "just", "kind", "kg", "km", "last", + "latter", "latterly", "less", "lest", "let", "like", + "little", "ltd", "many", "may", "maybe", "me", "meantime", + "meanwhile", "might", "moreover", "most", "mostly", "more", + "mr", "mrs", "ms", "much", "must", "my", "myself", "namely", + "need", "neither", "never", "nevertheless", "next", "no", + "nobody", "none", "nonetheless", "noone", "nope", "nor", + "not", "nothing", "notwithstanding", "now", "nowadays", + "nowhere", "of", "off", "often", "ok", "on", "once", "one", + "only", "onto", "or", "other", "others", "otherwise", + "ought", "our", "ours", "ourselves", "out", "outside", + "over", "own", "per", "perhaps", "plenty", "provide", + "quite", "rather", "really", "round", "said", "sake", + "same", "sang", "save", "saw", "see", "seeing", "seem", + "seemed", "seeming", "seems", "seen", "seldom", "selves", + "sent", "several", "shalt", "she", "should", "shown", + "sideways", "since", "slept", "slew", "slung", "slunk", + "smote", "so", "some", "somebody", "somehow", "someone", + "something", "sometime", "sometimes", "somewhat", + "somewhere", "spake", "spat", "spoke", "spoken", "sprang", + "sprung", "stave", "staves", "still", "such", "supposing", + "than", "that", "the", "thee", "their", "them", + "themselves", "then", "thence", "thenceforth", "there", + "thereabout", "thereabouts", "thereafter", "thereby", + "therefore", "therein", "thereof", "thereon", "thereto", + "thereupon", "these", "they", "this", "those", "thou", + "though", "thrice", "through", "throughout", "thru", "thus", + "thy", "thyself", "till", "to", "together", "too", "toward", + "towards", "ugh", "unable", "under", "underneath", "unless", + "unlike", "until", "up", "upon", "upward", "upwards", "us", + "use", "used", "using", "very", "via", "vs", "want", "was", + "we", "week", "well", "were", "what", "whatever", + "whatsoever", "when", "whence", "whenever", "whensoever", + "where", "whereabouts", "whereafter", "whereas", "whereat", + "whereby", "wherefore", "wherefrom", "wherein", "whereinto", + "whereof", "whereon", "wheresoever", "whereto", "whereunto", + "whereupon", "wherever", "wherewith", "whether", "whew", + "which", "whichever", "whichsoever", "while", "whilst", + "whither", "who", "whoa", "whoever", "whole", "whom", + "whomever", "whomsoever", "whose", "whosoever", "why", + "will", "wilt", "with", "within", "without", "worse", + "worst", "would", "wow", "ye", "yet", "year", "yippee", + "you", "your", "yours", "yourself", "yourselves"}; + +char *cwTerrier[734] = {"733", "x", "y", "your", "yours", "yourself", "yourselves", "you", + "yond", "yonder", "yon", "ye", "yet", "z", "zillion", "j", + "u", "umpteen", "usually", "us", "username", "uponed", + "upons", "uponing", "upon", "ups", "upping", "upped", "up", + "unto", "until", "unless", "unlike", "unliker", "unlikest", + "under", "underneath", "use", "used", "usedest", "r", + "rath", "rather", "rathest", "rathe", "re", "relate", + "related", "relatively", "regarding", "really", "res", + "respecting", "respectively", "q", "quite", "que", "qua", + "n", "neither", "neaths", "neath", "nethe", "nethermost", + "necessary", "necessariest", "necessarier", "never", + "nevertheless", "nigh", "nighest", "nigher", "nine", + "noone", "nobody", "nobodies", "nowhere", "nowheres", "no", + "noes", "nor", "nos", "no-one", "none", "not", + "notwithstanding", "nothings", "nothing", "nathless", + "natheless", "t", "ten", "tills", "till", "tilled", + "tilling", "to", "towards", "toward", "towardest", + "towarder", "together", "too", "thy", "thyself", "thus", + "than", "that", "those", "thou", "though", "thous", + "thouses", "thoroughest", "thorougher", "thorough", + "thoroughly", "thru", "thruer", "thruest", "thro", + "through", "throughout", "throughest", "througher", "thine", + "this", "thises", "they", "thee", "the", "then", "thence", + "thenest", "thener", "them", "themselves", "these", + "therer", "there", "thereby", "therest", "thereafter", + "therein", "thereupon", "therefore", "their", "theirs", + "thing", "things", "three", "two", "o", "oh", "owt", + "owning", "owned", "own", "owns", "others", "other", + "otherwise", "otherwisest", "otherwiser", "of", "often", + "oftener", "oftenest", "off", "offs", "offest", "one", + "ought", "oughts", "our", "ours", "ourselves", "ourself", + "out", "outest", "outed", "outwith", "outs", "outside", + "over", "overallest", "overaller", "overalls", "overall", + "overs", "or", "orer", "orest", "on", "oneself", "onest", + "ons", "onto", "a", "atween", "at", "athwart", "atop", + "afore", "afterward", "afterwards", "after", "afterest", + "afterer", "ain", "an", "any", "anything", "anybody", + "anyone", "anyhow", "anywhere", "anent", "anear", "and", + "andor", "another", "around", "ares", "are", "aest", "aer", + "against", "again", "accordingly", "abaft", "abafter", + "abaftest", "abovest", "above", "abover", "abouter", + "aboutest", "about", "aid", "amidst", "amid", "among", + "amongst", "apartest", "aparter", "apart", "appeared", + "appears", "appear", "appearing", "appropriating", + "appropriate", "appropriatest", "appropriates", + "appropriater", "appropriated", "already", "always", "also", + "along", "alongside", "although", "almost", "all", "allest", + "aller", "allyou", "alls", "albeit", "awfully", "as", + "aside", "asides", "aslant", "ases", "astrider", "astride", + "astridest", "astraddlest", "astraddler", "astraddle", + "availablest", "availabler", "available", "aughts", "aught", + "vs", "v", "variousest", "variouser", "various", "via", + "vis-a-vis", "vis-a-viser", "vis-a-visest", "viz", "very", + "veriest", "verier", "versus", "k", "g", "go", "gone", + "good", "got", "gotta", "gotten", "get", "gets", "getting", + "b", "by", "byandby", "by-and-by", "bist", "both", "but", + "buts", "be", "beyond", "because", "became", "becomes", + "become", "becoming", "becomings", "becominger", + "becomingest", "behind", "behinds", "before", "beforehand", + "beforehandest", "beforehander", "bettered", "betters", + "better", "bettering", "betwixt", "between", "beneath", + "been", "below", "besides", "beside", "m", "my", "myself", + "mucher", "muchest", "much", "must", "musts", "musths", + "musth", "main", "make", "mayest", "many", "mauger", + "maugre", "me", "meanwhiles", "meanwhile", "mostly", "most", + "moreover", "more", "might", "mights", "midst", "midsts", + "h", "huh", "humph", "he", "hers", "herself", "her", + "hereby", "herein", "hereafters", "hereafter", "hereupon", + "hence", "hadst", "had", "having", "haves", "have", "has", + "hast", "hardly", "hae", "hath", "him", "himself", "hither", + "hitherest", "hitherer", "his", "how-do-you-do", "however", + "how", "howbeit", "howdoyoudo", "hoos", "hoo", "w", + "woulded", "woulding", "would", "woulds", "was", "wast", + "we", "wert", "were", "with", "withal", "without", "within", + "why", "what", "whatever", "whateverer", "whateverest", + "whatsoeverer", "whatsoeverest", "whatsoever", "whence", + "whencesoever", "whenever", "whensoever", "when", "whenas", + "whether", "wheen", "whereto", "whereupon", "wherever", + "whereon", "whereof", "where", "whereby", "wherewithal", + "wherewith", "whereinto", "wherein", "whereafter", + "whereas", "wheresoever", "wherefrom", "which", "whichever", + "whichsoever", "whilst", "while", "whiles", "whithersoever", + "whither", "whoever", "whosoever", "whoso", "whose", + "whomever", "s", "syne", "syn", "shalling", "shall", + "shalled", "shalls", "shoulding", "should", "shoulded", + "shoulds", "she", "sayyid", "sayid", "said", "saider", + "saidest", "same", "samest", "sames", "samer", "saved", + "sans", "sanses", "sanserifs", "sanserif", "so", "soer", + "soest", "sobeit", "someone", "somebody", "somehow", "some", + "somewhere", "somewhat", "something", "sometimest", + "sometimes", "sometimer", "sometime", "several", + "severaler", "severalest", "serious", "seriousest", + "seriouser", "senza", "send", "sent", "seem", "seems", + "seemed", "seemingest", "seeminger", "seemings", "seven", + "summat", "sups", "sup", "supping", "supped", "such", + "since", "sine", "sines", "sith", "six", "stop", "stopped", + "p", "plaintiff", "plenty", "plenties", "please", "pleased", + "pleases", "per", "perhaps", "particulars", "particularly", + "particular", "particularest", "particularer", "pro", + "providing", "provides", "provided", "provide", "probably", + "l", "layabout", "layabouts", "latter", "latterest", + "latterer", "latterly", "latters", "lots", "lotting", + "lotted", "lot", "lest", "less", "ie", "ifs", "if", "i", + "info", "information", "itself", "its", "it", "is", "idem", + "idemer", "idemest", "immediate", "immediately", + "immediatest", "immediater", "in", "inwards", "inwardest", + "inwarder", "inward", "inasmuch", "into", "instead", + "insofar", "indicates", "indicated", "indicate", + "indicating", "indeed", "inc", "f", "fact", "facts", "fs", + "figupon", "figupons", "figuponing", "figuponed", "few", + "fewer", "fewest", "frae", "from", "failing", "failings", + "five", "furthers", "furtherer", "furthered", "furtherest", + "further", "furthering", "furthermore", "fourscore", + "followthrough", "for", "forwhy", "fornenst", "formerly", + "former", "formerer", "formerest", "formers", "forbye", + "forby", "fore", "forever", "forer", "fores", "four", "d", + "ddays", "dday", "do", "doing", "doings", "doe", "does", + "doth", "downwarder", "downwardest", "downward", + "downwards", "downs", "done", "doner", "dones", "donest", + "dos", "dost", "did", "differentest", "differenter", + "different", "describing", "describe", "describes", + "described", "despiting", "despites", "despited", "despite", + "during", "c", "cum", "circa", "chez", "cer", "certain", + "certainest", "certainer", "cest", "canst", "cannot", + "cant", "cants", "canting", "cantest", "canted", "co", + "could", "couldst", "comeon", "comeons", "come-ons", + "come-on", "concerning", "concerninger", "concerningest", + "consequently", "considering", "e", "eg", "eight", "either", + "even", "evens", "evenser", "evensest", "evened", "evenest", + "ever", "everyone", "everything", "everybody", "everywhere", + "every", "ere", "each", "et", "etc", "elsewhere", "else", + "ex", "excepted", "excepts", "except", "excepting", "exes", + "enough"};
Change 1 of 12 Show Entire File ii.c Stacked
 
6
7
8
9
10
11
12
 
20
21
22
23
24
 
 
 
 
 
 
25
26
27
28
29
30
31
32
 
 
 
 
 
33
34
35
36
37
 
 
 
 
 
 
 
38
39
40
 
49
50
51
52
 
53
54
55
 
 
 
 
56
57
58
59
60
61
 
 
62
63
64
 
66
67
68
 
69
70
 
 
71
 
 
72
 
73
74
75
 
89
90
91
92
93
94
 
 
 
 
95
96
97
98
99
 
 
 
 
 
 
 
 
 
100
101
102
103
104
 
 
105
106
 
107
108
 
109
110
111
 
 
112
 
 
 
 
 
 
 
 
 
 
 
 
113
114
115
 
122
123
124
125
 
126
127
128
 
 
 
 
129
130
131
132
133
134
 
 
135
136
137
 
164
165
166
167
 
168
169
170
 
 
 
 
 
 
 
171
172
173
174
175
176
 
 
177
178
179
180
181
182
183
184
 
185
186
187
188
189
 
 
190
191
192
 
197
198
199
200
201
202
 
 
203
204
205
206
 
 
 
207
208
209
210
211
212
213
 
 
 
 
 
 
 
214
215
216
217
218
219
220
 
 
 
 
 
 
 
 
 
 
 
221
222
 
223
224
225
 
239
240
241
 
242
243
244
 
302
303
304
 
 
 
305
306
307
 
312
313
314
315
 
316
317
318
319
 
 
 
 
320
321
322
 
 
 
 
 
 
 
323
324
325
 
 
 
 
 
 
 
 
326
327
328
329
330
331
332
333
334
335
 
336
337
338
 
343
344
345
 
 
 
 
346
347
348
 
6
7
8
 
9
10
11
 
19
20
21
 
 
22
23
24
25
26
27
28
29
30
 
 
 
 
 
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
 
59
60
61
 
62
63
64
 
65
66
67
68
69
70
71
72
73
 
74
75
76
77
78
 
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
 
109
110
111
 
 
 
112
113
114
115
116
117
118
119
 
120
121
122
123
124
125
126
127
128
129
 
 
 
 
130
131
132
 
133
134
 
135
136
 
 
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
 
161
162
163
 
164
165
166
 
167
168
169
170
171
172
173
174
175
 
176
177
178
179
180
 
207
208
209
 
210
211
212
 
213
214
215
216
217
218
219
220
221
222
223
 
 
224
225
226
227
228
229
230
231
 
 
232
233
234
235
 
 
236
237
238
239
240
 
245
246
247
 
 
 
248
249
250
 
 
 
251
252
253
254
255
256
257
258
259
 
260
261
262
263
264
265
266
267
268
269
270
 
 
 
271
272
273
274
275
276
277
278
279
280
281
282
 
283
284
285
286
 
300
301
302
303
304
305
306
 
364
365
366
367
368
369
370
371
372
 
377
378
379
 
380
381
 
 
 
382
383
384
385
386
387
 
388
389
390
391
392
393
394
395
396
 
397
398
399
400
401
402
403
404
405
406
407
408
409
 
 
 
 
 
410
411
412
413
 
418
419
420
421
422
423
424
425
426
427
@@ -6,7 +6,6 @@
 #include <kak/kcommon.h>  #include <kak/klist.h>  #include <kak/khash.h> -#include "tokenizer.h"  #include "tfile.h"  #include "txt.h"   @@ -20,21 +19,32 @@
  Post *post_;   Node *np, *npq_, *npq, *nppost_;   Query *q_, *q; - uint32_t k, k_, sumtf, uterm, tf; - char ts[KB]; + uint32_t k, k_, sumtf, uterm, tf, n_ts, c, e; + char *ts; + + c = e = 0; + n_ts = 64; + ts = malloc(sizeof(char) * n_ts);     /* read in the query file */   - tfile = readTFile(fp); - if (tfile == NULL) { - fprintf(stderr, "readTFile() failed\n"); - exit(1); - } + /* if ((tfile = readTFile(fp)) == NULL) */ + /* eprintf("buildq: readTFile() failed\n"); */ + + if ((tfile = receiveTFile(fp)) == NULL) + eprintf("buildq: readTFile() failed\n");     for (np = tfile->list; np != NULL; np = np->next) { /* for each query */     tdoc = np->data;   + if (tdoc->h->n_txt <= 0) { + e++; + weprintf("buildq: Skipped empty query %s", tdoc->id); + continue; + } + + c++;   sscanf(tdoc->rsrc[0], "%u", &sumtf);   sscanf(tdoc->rsrc[1], "%u", &uterm);   @@ -49,16 +59,20 @@
    k = k_ = 0;   - for (int j = 0; j < uterm; j++) { /* for each term in query vector */ + while (k < tdoc->h->n_txt) { /* for each 'term frequency' pair in the vector */     /* pick a term */ - for(; tdoc->txt[k] != ' '; k++); + for(; tdoc->txt[k] != ' '; k++) + ; + if (k - k_ >= n_ts) + ts = erealloc(ts, n_ts<<=1);   memcpy(ts, &(tdoc->txt[k_]), k - k_);   ts[k - k_] = '\0';   k++; k_ = k;     /* and then pick its frequency */ - for(; tdoc->txt[k] != ' '; k++); + for(; tdoc->txt[k] != ' '; k++) + ;   sscanf(&(tdoc->txt[k_]), "%u", &tf);   k++; k_ = k;   @@ -66,10 +80,16 @@
  post_ = newpost(ts, tf);   nppost_ = newnode(post_);   q->tlist = addfront(q->tlist, nppost_); +   /* needn't check for duplicates, terms are unique */   } + + fprintf(stderr, "\rqueries built: %d/%d", c, tfile->h->n);   } + fprintf(stderr, "\n"); + free(ts);   freeTFile(tfile, TRECQUERY); + weprintf("buildq: Skipped %d empty query(ies)\n", e);  }    void bubblesort(Post *p[NDOCS], int nel, int (*cmp)(const void*, const void*)) @@ -89,27 +109,46 @@
 THeader *buildii(Hash *hdoc, Hash *hterm, FILE *fp)  {   int pflag_, tflag_; - uint32_t k, k_, sumtf, uterm, sumsqtf, maxtf, tf; - char ts[KB]; - THeader *h; + uint32_t k, k_, sumtf, uterm, sumsqtf, maxtf, tf, n_ts, c, e; + char *ts; + /* THeader *h; */ + TFile *tfile;   TDoc *tdoc;   Doc *doc;   Post *post, *post_;   Term *term, *term_; - Node *npdoc, *npdoc_, *npterm, *npterm_; + Node *np, *npdoc, *npdoc_, *npterm, *npterm_; + + c = e = 0; + n_ts = 64; + ts = malloc(sizeof(char) * n_ts); + /* h = _newTHeader(TREC); */ + + if ((tfile = receiveTFile(fp)) == NULL) + eprintf("receiveTFile failed\n");   - h = _newTHeader(TREC); - - if ((h = readTHeader(h, fp)) == NULL) - exit(0); + /* if ((h = readTHeader(h, fp)) == NULL) */ + /* eprintf("buildii: readTHeader failed\n"); */   - for(int i = 1; i <= h->n; i++) { /* for each doc */ + /* for(int i = 1; i <= tfile->h->n; i++) { /\* for each doc *\/ */   - tdoc = newTDoc(TREC); + /* tdoc = newTDoc(TREC); */   - if ((tdoc = readTDoc(tdoc, fp, TREC)) == NULL) - exit(0); + /* if ((tdoc = readTDoc(tdoc, fp, TREC)) == NULL) */ + /* eprintf("buildii: readTDoc failed\n"); */   + for (np = tfile->list; np != NULL; np = np->next) { /* for each doc */ + + tdoc = np->data; + + if (tdoc->h->n_txt <= 0) { + e++; + weprintf("buildii: Skipped empty doc %s", tdoc->id); + freeTDoc(tdoc, TREC); + continue; + } + + c++;   sscanf(tdoc->rsrc[0], "%u", &sumtf);   sscanf(tdoc->rsrc[1], "%u", &uterm);   sscanf(tdoc->rsrc[2], "%u", &sumsqtf); @@ -122,16 +161,20 @@
    k = k_ = 0;   - for (int j = 0; j < uterm; j++) { /* for each term */ + while (k < tdoc->h->n_txt) { /* for each 'term frequency' pair*/     /* pick a term */ - for(; tdoc->txt[k] != ' '; k++); + for(; tdoc->txt[k] != ' '; k++) + ; + if (k - k_ >= n_ts) + ts = erealloc(ts, n_ts<<=1);   memcpy(ts, &(tdoc->txt[k_]), k - k_);   ts[k - k_] = '\0';   k++; k_ = k;     /* and then pick its frequency */ - for(; tdoc->txt[k] != ' '; k++); + for(; tdoc->txt[k] != ' '; k++) + ;   sscanf(&(tdoc->txt[k_]), "%u", &tf);   k++; k_ = k;   @@ -164,29 +207,34 @@
  free(term_);   }   freeTDoc(tdoc, TREC); - fprintf(stderr, "\rindexed: %d/%d", i, h->n); + fprintf(stderr, "\rindexed: %d/%d", c, tfile->h->n);   }   fprintf(stderr, "\n"); - return h; + free(ts); + + /*TODO: because tfile->h is needed, freeTFile() isn't called, + * but this isn't neat */ + + weprintf("buildii: Skipped %d empty docs\n", e); + return tfile->h;  }    void search(Query *q, Hash *hdoc, Hash *hterm, THeader *h, Model m)  { - double tf_, idf_, qtf_, w_; - Post *post_t, *post_d; + double tf_, df_, qtf_, qdf_, wd_, wq_, dot; + Post *post_q, *post_d;   Term *term, term__;   Doc *doc, doc__;   Node *npterm, node__, *npdoc, *npscore, *npscore_;   Score *score, *score_;   Hash *hresult;   - /* hpost = newhash(NDOCS, cmppost, hashpost); */ - hresult = newhash(NDOCS, cmpscore, hashscore); + hresult = newhash(NDOCS, cmpscore_p, hashscore);     for (Node *np = q->tlist; np != NULL; np = np->next) { /* for a term */   - post_t = (Post *)(np->data); - term__.s = post_t->id; + post_q = (Post *)(np->data); + term__.s = post_q->id;   node__.data = &term__;   npterm = hlookup(hterm, &node__, 0);   @@ -197,29 +245,42 @@
    for (Node *np1 = term->plist; np1 != NULL; np1 = np1->next) { /* for a post */   - post_d = (Post *)(np1->data); - - doc__.id = post_d->id; + post_d = (Post *)(np1->data); + doc__.id = post_d->id;   node__.data = &doc__; - hdoc->cmp = cmpdoc_p; /* dismantle cmp_fn */ - npdoc = hlookup(hdoc, &node__, 0); - hdoc->cmp = cmpdoc; /* put it back */ + hdoc->cmp = cmpdoc_p; /* dismantle cmp_fn */ + npdoc = hlookup(hdoc, &node__, 0); + hdoc->cmp = cmpdoc; /* put it back */     if (npdoc == NULL)   continue;     doc = (Doc *)(npdoc->data);   - /* compute and combine f(tf)), f(df) and f(qtf) */ + /* transform tf and df of the term (using some + * f(tf) and f(df)) and compute the two + * weights for the term; one in the document + * and one in the query and then compute the + * vector dot product */ + + /* transformations */   tf_ = (*m.tf)(post_d->tf, doc->sumtf, doc->uterm,   doc->sumsqtf, doc->maxtf, doc->nb,   h->sumnb, h->sumnu, h->sumnt,   h->n); - idf_ = (*m.df)(term->df, h->n); - qtf_ = (*m.qtf)(post_t->tf); - w_ = tf_ * idf_ * qtf_; + df_ = (*m.df)(term->df, h->n); + qtf_ = (*m.qtf)(post_q->tf, doc->sumtf, doc->uterm, + doc->sumsqtf, doc->maxtf, doc->nb, + h->sumnb, h->sumnu, h->sumnt, + h->n); + qdf_ = (*m.qdf)(term->df, h->n); + + /* weight computation */ + wd_ = tf_ * df_; /* document term weight */ + wq_ = qtf_ * qdf_; /* query term weight */ + dot = wd_ * wq_; /* for the vector dot product */   - score_ = newscore_s(post_d->id, w_); + score_ = newscore_s(post_d->id, dot);   npscore_ = newnode(score_);   npscore = hlookup(hresult, npscore_, 1);   @@ -239,6 +300,7 @@
  }   }   + fflush(stdout);   freehash(hresult, freescore_s);     /*TODO: figure out a way to free memory neatly*/ @@ -302,6 +364,9 @@
  hq = newhash(NHASH, cmpquery, hashquery);     h = buildii(hdoc, hterm, stdin); + + /* exit(0); /\* DEBUG *\/ */ +   /* fprinthash(stdout, hterm, fprintterm); */   /* hstats(hdoc, NDOCS); */   @@ -312,27 +377,37 @@
  /* fprinthash(stdout, hq, fprintquery); */   }   - /* test model, default */ + /* test model nnn.nnn, default */   - m.tf = &_tf; - m.df = &_df; - m.qtf = &_qtf; + m.tf = &SMART_n_n_tf; + m.df = &SMART__n__df; + m.qtf = &SMART_n_n_tf; + m.qdf = &SMART__n__df;     if (opt_m) { - if (strcmp(mstr, "SMART_dtb") == 0) { + if (strcmp(mstr, "SMART_dnb_dtn") == 0) { + m.tf = &SMART_d_b_tf; + m.df = &SMART__n__df; + m.qtf = &SMART_d_n_tf; + m.qdf = &SMART__t__df; + } + else if (strcmp(mstr, "SMART_dtb_nnn") == 0) {   m.tf = &SMART_d_b_tf;   m.df = &SMART__t__df; - m.qtf = &_qtf; + m.qtf = &SMART_n_n_tf; + m.qdf = &SMART__n__df; + } + else if (strcmp(mstr, "SMART_bnn_bnn") == 0) { + m.tf = &SMART_b_n_tf; + m.df = &SMART__n__df; + m.qtf = &SMART_b_n_tf; + m.qdf = &SMART__n__df;   }   else if (strcmp(mstr, "OKAPI_BM25") == 0) {   m.tf = &OKAPI_BM25_tf;   m.df = &OKAPI_BM25_df;   m.qtf = &OKAPI_BM25_qtf; - } - else if (strcmp(mstr, "SMART_bxx") == 0) { - m.tf = &SMART_b___tf; - m.df = &_df; - m.qtf = &_qtf; + m.qdf = &SMART__n__df;   }   }   @@ -343,6 +418,10 @@
  /* fprintf(stdout, "\n"); */   search(hq->tab[i]->data, hdoc, hterm, h, m);   } + + freehash(hq, freequery); + freehash(hterm, freeterm); + freehash(hdoc, freedoc);     /* fprintf(stderr, "\n"); */   /* fprintf(stderr, "doctab[]\n"); */
Change 1 of 1 Show Entire File raw2t.c Stacked
 
14
15
16
 
17
18
19
 
 
20
21
 
22
23
24
25
26
27
 
28
29
30
31
32
33
 
 
34
35
36
 
 
 
37
 
 
 
38
39
40
41
42
 
 
 
 
43
44
45
 
46
 
 
 
 
 
47
48
49
50
51
52
53
54
55
56
57
 
 
 
 
58
59
60
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
63
64
65
66
67
68
69
70
71
72
 
14
15
16
17
18
 
 
19
20
21
 
22
23
24
25
 
 
 
26
27
28
29
30
 
 
31
32
33
 
 
34
35
36
37
38
39
40
41
42
43
 
 
44
45
46
47
48
49
 
50
51
52
53
54
55
56
57
 
 
 
 
 
 
 
 
 
 
58
59
60
61
62
63
 
 
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
 
 
 
 
 
 
 
90
91
92
@@ -14,59 +14,79 @@
 #include <stdlib.h>  #include <string.h>  #include <kak/eprintf.h> +#include <kak/kcommon.h>  #include <kak/klist.h> -#include "crc.h" -#include "tokenizer.h" +#include <kak/khash.h> +#include "txt.h"  #include "tfile.h" -#include "parser.h" +#include "cw.h"    void usage(char *progname)  { - fprintf(stderr, "usage: %s [-c] [-n] [-x] type\n", progname); - fprintf(stderr, "type = TREC | WARC | TRECQUERY\n"); - exit(1); + eprintf("usage: %s [-c] [-n] [-x] [-q] name\n", progname);  }    int main(int argc, char *argv[])  { - FILE *fplog; - TFile *tfile; + FILE *fplog, *fpt, *fpd; + TFile *tfile;   Parser *parser; - E_TDocType type; - int c, n, x, t; + Hash *ht, *hd; + char type, name[11]; + int c, n, x_min, x_max, q;   + if (argc < 2) + usage(argv[0]); +   esetprogname(estrdup(argv[0]));   fplog = fopen(strcat(estrdup(argv[0]), "-error.log"), "w");   esetstream(fplog); - - c = n = x = t = 0; + + name[0] = '\0'; name[10] = '\0'; + + c = n = x_min = x_max = q = 0;     for (int i = 1; i < argc; i++) { - if (strcmp(argv[i], "-c") == 0) { + if (strcmp(argv[i], "-c") == 0)   c = 1; + else if (strcmp(argv[i], "-n") == 0) + n = 1; + else if (strcmp(argv[i], "-x") == 0) { + x_min = 4; + x_max = 20;   } - else if (strcmp(argv[i], "-n") == 0) { - n = 1; - } - else if (strcmp(argv[i], "-x") == 0) { - x = 3; - } - else { - t = 1; - type = getTDocType(argv[i]); - } + else if (strcmp(argv[i], "-q") == 0) + q = 1; + else + strncpy(name, argv[i], 10);   }   - if (!t) - usage(argv[0]); + type = 'd'; + if (q) + type = 'q'; + + ht = newhash(NHASHT, cmptoken_str, hashtoken_str); + hd = newhash(NHASHD, cmptoken_str, hashtoken_str); + parser = newparser(type, cwSMART, c, n, x_min, x_max); + + tfile = parse(ht, hd, parser, stdin); + + send(stdout, tfile); + fflush(stdout); + freeTFile(tfile); + + /* write out {token, id} pairs */ + fpt = fopen(strcat(estrdup(name), "_vocab.txt"), "w"); + walkhash(ht, writetoken, fpt); + fflush(fpt); fclose(fpt); + freehash(ht, freetoken); + + /* write out {docid, id} pairs*/ + fpd = fopen(strcat(estrdup(name), "_docid.txt"), "w"); + walkhash(hd, writetoken, fpd); + fflush(fpd); fclose(fpd); + freehash(hd, freetoken);   - parser = newparser(type, c, n, x); - - tfile = parse(parser, stdin); - writeTFile(tfile, stdout); - - freeTFile(tfile, type); -   fclose(fplog);     return 0;
Change 1 of 1 Show Entire File t2mem.c Stacked
 
1
2
3
 
4
5
6
 
 
7
8
9
10
11
12
 
13
14
15
16
17
18
19
20
 
 
21
22
23
 
1
2
3
4
5
6
 
7
8
9
10
11
12
13
 
14
15
16
17
18
19
 
 
 
20
21
22
23
24
@@ -1,23 +1,24 @@
 #include <stdio.h>  #include <stdlib.h>  #include <string.h> +#include <kak/kcommon.h>  #include <kak/eprintf.h>  #include <kak/klist.h> -#include "tokenizer.h" +#include <kak/khash.h> +#include "txt.h"  #include "tfile.h"    int main(int argc, char *argv[])  {   TFile *tfile; - FILE *fplog; + FILE *fplog;     esetprogname(estrdup(argv[0]));   fplog = fopen(strcat(estrdup(argv[0]), "-error.log"), "w");   esetstream(fplog);   - tfile = readTFile(stdin); - if (tfile) - printTFile(tfile); + tfile = receive(stdin); + printTFile(tfile, stdout);     fclose(fplog);   return 0;
Change 1 of 1 Show Entire File test/​alice.eval Stacked
 
79
80
81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
@@ -79,33 +79,60 @@
 P_200 3 0.0050  P_500 3 0.0020  P_1000 3 0.0010 +num_ret 4 3 +num_rel 4 1 +num_rel_ret 4 1 +map 4 1.0000 +Rprec 4 1.0000 +bpref 4 1.0000 +recip_rank 4 1.0000 +iprec_at_recall_0.00 4 1.0000 +iprec_at_recall_0.10 4 1.0000 +iprec_at_recall_0.20 4 1.0000 +iprec_at_recall_0.30 4 1.0000 +iprec_at_recall_0.40 4 1.0000 +iprec_at_recall_0.50 4 1.0000 +iprec_at_recall_0.60 4 1.0000 +iprec_at_recall_0.70 4 1.0000 +iprec_at_recall_0.80 4 1.0000 +iprec_at_recall_0.90 4 1.0000 +iprec_at_recall_1.00 4 1.0000 +P_5 4 0.2000 +P_10 4 0.1000 +P_15 4 0.0667 +P_20 4 0.0500 +P_30 4 0.0333 +P_100 4 0.0100 +P_200 4 0.0050 +P_500 4 0.0020 +P_1000 4 0.0010  runid all . -num_q all 3 -num_ret all 9 -num_rel all 5 -num_rel_ret all 5 -map all 0.8333 -gm_map all 0.7937 -Rprec all 0.6667 -bpref all 0.6667 -recip_rank all 0.8333 -iprec_at_recall_0.00 all 0.8333 -iprec_at_recall_0.10 all 0.8333 -iprec_at_recall_0.20 all 0.8333 -iprec_at_recall_0.30 all 0.8333 -iprec_at_recall_0.40 all 0.8333 -iprec_at_recall_0.50 all 0.8333 -iprec_at_recall_0.60 all 0.8333 -iprec_at_recall_0.70 all 0.8333 -iprec_at_recall_0.80 all 0.8333 -iprec_at_recall_0.90 all 0.8333 -iprec_at_recall_1.00 all 0.8333 -P_5 all 0.3333 -P_10 all 0.1667 -P_15 all 0.1111 -P_20 all 0.0833 -P_30 all 0.0556 -P_100 all 0.0167 -P_200 all 0.0083 -P_500 all 0.0033 -P_1000 all 0.0017 +num_q all 4 +num_ret all 12 +num_rel all 6 +num_rel_ret all 6 +map all 0.8750 +gm_map all 0.8409 +Rprec all 0.7500 +bpref all 0.7500 +recip_rank all 0.8750 +iprec_at_recall_0.00 all 0.8750 +iprec_at_recall_0.10 all 0.8750 +iprec_at_recall_0.20 all 0.8750 +iprec_at_recall_0.30 all 0.8750 +iprec_at_recall_0.40 all 0.8750 +iprec_at_recall_0.50 all 0.8750 +iprec_at_recall_0.60 all 0.8750 +iprec_at_recall_0.70 all 0.8750 +iprec_at_recall_0.80 all 0.8750 +iprec_at_recall_0.90 all 0.8750 +iprec_at_recall_1.00 all 0.8750 +P_5 all 0.3000 +P_10 all 0.1500 +P_15 all 0.1000 +P_20 all 0.0750 +P_30 all 0.0500 +P_100 all 0.0150 +P_200 all 0.0075 +P_500 all 0.0030 +P_1000 all 0.0015
Change 1 of 1 Show Entire File test/​alice.mem Stacked
 
1
2
 
3
4
 
5
6
7
8
9
 
 
 
 
10
11
12
 
13
14
15
16
17
 
 
 
 
18
19
20
 
21
22
23
24
25
 
 
 
 
26
 
1
 
2
3
 
4
5
 
 
 
 
6
7
8
9
10
11
 
12
13
 
 
 
 
14
15
16
17
18
19
 
20
21
 
 
 
 
22
23
24
25
26
@@ -1,26 +1,26 @@
 MG CRC V T BYTES N SUMNB SUMNU SUMNT R BO -41 2930072002 1 0 649 3 494 63 70 4 44 +41 136804978 1 0 562 3 407 52 57 4 44  CRC TXT ID RSRC0 RSRC1 RSRC2 RSRC3 -1210775004 181 2 2 2 2 1 +134067299 159 2 2 2 2 1  tdoc->id: A. -tdoc->txt: rude 1 open 1 curios 1 look 1 cut 1 alic 2 learn 1 hear 1 desk 1 veri 2 remark 1 great 1 hair 1 sever 1 wide 1 first 1 hatter 2 speech 1 write 1 ey 1 person 1 time 1 raven 1 make 1 . -tdoc->rsrc[0]: 27. -tdoc->rsrc[1]: 24. -tdoc->rsrc[2]: 33. +tdoc->txt: rude 1 open 1 curios 1 cut 1 alic 2 learn 1 hear 1 desk 1 remark 1 great 1 hair 1 sever 1 wide 1 hatter 2 speech 1 write 1 ey 1 person 1 time 1 raven 1 make 1 . +tdoc->rsrc[0]: 23. +tdoc->rsrc[1]: 21. +tdoc->rsrc[2]: 27.  tdoc->rsrc[3]: 2.  CRC TXT ID RSRC0 RSRC1 RSRC2 RSRC3 -3853360751 187 2 2 2 2 1 +4054011989 153 2 2 2 2 1  tdoc->id: B. -tdoc->txt: adventur 1 issu 1 wrong 2 alic 1 1897 1 nevar 1 front 2 never 2 carrol 1 veri 1 propos 1 final 1 flat 1 note 1 himself 1 earli 1 spell 1 lewi 1 answer 1 produc 1 raven 1 though 1 revis 2 . -tdoc->rsrc[0]: 27. -tdoc->rsrc[1]: 23. -tdoc->rsrc[2]: 35. +tdoc->txt: adventur 1 issu 1 wrong 2 alic 1 1897 1 nevar 1 front 2 carrol 1 propos 1 final 1 flat 1 note 1 earli 1 spell 1 lewi 1 answer 1 produc 1 raven 1 revis 2 . +tdoc->rsrc[0]: 22. +tdoc->rsrc[1]: 19. +tdoc->rsrc[2]: 28.  tdoc->rsrc[3]: 2.  CRC TXT ID RSRC0 RSRC1 RSRC2 RSRC3 -3471635844 126 2 2 2 2 1 +2606170364 95 2 2 2 2 1  tdoc->id: C. -tdoc->txt: alic 1 both 1 dip 1 more 1 gave 1 1990 1 martin 1 gardner 1 flap 1 sent 1 answer 1 quill 1 reader 1 annot 1 possibl 1 slope 1 . -tdoc->rsrc[0]: 16. -tdoc->rsrc[1]: 16. -tdoc->rsrc[2]: 16. +tdoc->txt: alic 1 dip 1 gave 1 1990 1 martin 1 gardner 1 flap 1 answer 1 quill 1 reader 1 annot 1 slope 1 . +tdoc->rsrc[0]: 12. +tdoc->rsrc[1]: 12. +tdoc->rsrc[2]: 12.  tdoc->rsrc[3]: 1.
Change 1 of 1 Show Entire File test/​alice.qrel Stacked
 
7
8
9
 
 
 
 
7
8
9
10
11
12
@@ -7,3 +7,6 @@
 3 0 A 0  3 0 B 0  3 0 C 1 +4 0 A 0 +4 0 B 0 +4 0 C 1
Change 1 of 1 Show Entire File test/​alice.rank Stacked
 
7
8
9
10
 
11
12
 
7
8
9
 
10
11
12
@@ -7,6 +7,6 @@
 3 A 6.000000  3 B 2.000000  3 C 2.000000 -4 C 5.000000 +4 C 4.000000  4 A 2.000000  4 B 1.000000
Change 1 of 1 Show Entire File test/​alice.res Stacked
 
9
10
11
12
 
 
9
10
11
 
12
@@ -9,4 +9,4 @@
 3 C 2.000000  4 A 2.000000  4 B 1.000000 -4 C 5.000000 +4 C 4.000000
Change 1 of 1 Show Entire File test/​alice.run Stacked
 
7
8
9
10
 
11
12
 
7
8
9
 
10
11
12
@@ -7,6 +7,6 @@
 3 Q0 A 0 6.000000 .  3 Q0 B 1 2.000000 .  3 Q0 C 2 2.000000 . -4 Q0 C 0 5.000000 . +4 Q0 C 0 4.000000 .  4 Q0 A 1 2.000000 .  4 Q0 B 2 1.000000 .
 
 
 
1
2
 
3
4
5
 
25
26
27
28
 
29
30
31
32
33
 
 
 
 
34
 
1
 
2
3
4
5
 
25
26
27
 
28
29
 
 
 
 
30
31
32
33
34
@@ -1,5 +1,5 @@
 MG CRC V T BYTES N SUMNB SUMNU SUMNT R BO -41 194642396 1 0 400 4 220 26 26 4 44 +41 4267932026 1 0 393 4 213 25 25 4 44  CRC TXT ID RSRC0 RSRC1 RSRC2 RSRC3  3472369368 63 2 1 1 1 1  tdoc->id: 1. @@ -25,10 +25,10 @@
 tdoc->rsrc[2]: 8.  tdoc->rsrc[3]: 1.  CRC TXT ID RSRC0 RSRC1 RSRC2 RSRC3 -864232047 41 2 1 1 1 1 +3844629313 34 2 1 1 1 1  tdoc->id: 4. -tdoc->txt: alic 1 more 1 martin 1 gardner 1 annot 1 . -tdoc->rsrc[0]: 5. -tdoc->rsrc[1]: 5. -tdoc->rsrc[2]: 5. +tdoc->txt: alic 1 martin 1 gardner 1 annot 1 . +tdoc->rsrc[0]: 4. +tdoc->rsrc[1]: 4. +tdoc->rsrc[2]: 4.  tdoc->rsrc[3]: 1.
 
 
Change 1 of 12 Show Entire File test/​ap.eval Stacked
 
106
107
108
109
 
110
111
112
 
113
114
 
115
116
117
 
119
120
121
122
123
124
125
126
127
 
 
 
 
 
 
128
129
130
 
133
134
135
136
 
137
138
139
 
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
 
 
 
 
 
 
 
 
 
 
 
 
 
155
156
157
 
160
161
162
163
 
164
165
166
 
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
183
184
 
214
215
216
217
 
218
219
220
 
221
222
223
 
227
228
229
230
231
232
233
234
 
 
 
 
 
235
236
237
 
241
242
243
244
 
245
246
247
 
248
249
250
251
252
253
254
255
256
257
258
259
260
261
 
 
 
 
 
 
 
 
 
 
 
 
262
263
264
 
268
269
270
271
 
272
273
274
 
275
276
277
278
279
280
281
282
283
284
285
286
287
288
 
 
 
 
 
 
 
 
 
 
 
 
289
290
291
292
293
 
 
294
295
296
297
298
 
299
300
301
 
322
323
324
325
 
326
327
328
 
329
330
331
332
333
334
335
336
337
338
339
340
341
342
 
 
 
 
 
 
 
 
 
 
 
 
 
343
344
345
346
347
348
 
349
350
351
 
376
377
378
379
 
380
381
382
 
383
384
385
386
387
388
389
390
391
392
393
394
395
396
 
 
 
 
 
 
 
 
 
 
 
 
397
398
399
400
 
401
402
403
 
511
512
513
514
 
515
516
517
 
518
519
520
521
522
523
524
525
526
527
528
529
530
531
 
 
 
 
 
 
 
 
 
 
 
 
532
533
534
 
540
541
542
543
 
544
545
546
547
 
 
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
 
 
 
 
 
 
 
 
 
 
 
 
 
 
563
564
565
566
567
 
 
 
568
569
570
 
106
107
108
 
109
110
111
 
112
113
 
114
115
116
117
 
119
120
121
 
 
 
 
 
 
122
123
124
125
126
127
128
129
130
 
133
134
135
 
136
137
138
 
139
140
141
 
 
 
 
 
 
 
 
 
 
 
 
 
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
 
160
161
162
 
163
164
165
 
166
167
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
 
214
215
216
 
217
218
219
 
220
221
222
223
 
227
228
229
 
 
 
 
 
230
231
232
233
234
235
236
237
 
241
242
243
 
244
245
246
 
247
248
249
 
 
 
 
 
 
 
 
 
 
 
 
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
 
268
269
270
 
271
272
273
 
274
275
276
 
 
 
 
 
 
 
 
 
 
 
 
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
 
 
292
293
294
295
296
297
 
298
299
300
301
 
322
323
324
 
325
326
327
 
328
329
 
 
 
 
 
 
 
 
 
 
 
 
 
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
 
348
349
350
351
 
376
377
378
 
379
380
381
 
382
383
384
 
 
 
 
 
 
 
 
 
 
 
 
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
 
400
401
402
403
 
511
512
513
 
514
515
516
 
517
518
519
 
 
 
 
 
 
 
 
 
 
 
 
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
 
540
541
542
 
543
544
545
 
 
546
547
548
 
 
 
 
 
 
 
 
 
 
 
 
 
 
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
 
 
 
565
566
567
568
569
570
@@ -106,12 +106,12 @@
 P_200 207 0.0050  P_500 207 0.0020  P_1000 207 0.0010 -num_ret 208 954 +num_ret 208 716  num_rel 208 2  num_rel_ret 208 2 -map 208 0.7000 +map 208 0.6667  Rprec 208 0.5000 -bpref 208 0.7500 +bpref 208 0.5000  recip_rank 208 1.0000  iprec_at_recall_0.00 208 1.0000  iprec_at_recall_0.10 208 1.0000 @@ -119,12 +119,12 @@
 iprec_at_recall_0.30 208 1.0000  iprec_at_recall_0.40 208 1.0000  iprec_at_recall_0.50 208 1.0000 -iprec_at_recall_0.60 208 0.4000 -iprec_at_recall_0.70 208 0.4000 -iprec_at_recall_0.80 208 0.4000 -iprec_at_recall_0.90 208 0.4000 -iprec_at_recall_1.00 208 0.4000 -P_5 208 0.4000 +iprec_at_recall_0.60 208 0.3333 +iprec_at_recall_0.70 208 0.3333 +iprec_at_recall_0.80 208 0.3333 +iprec_at_recall_0.90 208 0.3333 +iprec_at_recall_1.00 208 0.3333 +P_5 208 0.2000  P_10 208 0.2000  P_15 208 0.1333  P_20 208 0.1000 @@ -133,25 +133,25 @@
 P_200 208 0.0100  P_500 208 0.0040  P_1000 208 0.0020 -num_ret 211 1111 +num_ret 211 1056  num_rel 211 2  num_rel_ret 211 2 -map 211 0.1296 +map 211 0.0883  Rprec 211 0.0000  bpref 211 0.5000 -recip_rank 211 0.2500 -iprec_at_recall_0.00 211 0.2500 -iprec_at_recall_0.10 211 0.2500 -iprec_at_recall_0.20 211 0.2500 -iprec_at_recall_0.30 211 0.2500 -iprec_at_recall_0.40 211 0.2500 -iprec_at_recall_0.50 211 0.2500 -iprec_at_recall_0.60 211 0.0091 -iprec_at_recall_0.70 211 0.0091 -iprec_at_recall_0.80 211 0.0091 -iprec_at_recall_0.90 211 0.0091 -iprec_at_recall_1.00 211 0.0091 -P_5 211 0.2000 +recip_rank 211 0.1667 +iprec_at_recall_0.00 211 0.1667 +iprec_at_recall_0.10 211 0.1667 +iprec_at_recall_0.20 211 0.1667 +iprec_at_recall_0.30 211 0.1667 +iprec_at_recall_0.40 211 0.1667 +iprec_at_recall_0.50 211 0.1667 +iprec_at_recall_0.60 211 0.0099 +iprec_at_recall_0.70 211 0.0099 +iprec_at_recall_0.80 211 0.0099 +iprec_at_recall_0.90 211 0.0099 +iprec_at_recall_1.00 211 0.0099 +P_5 211 0.0000  P_10 211 0.1000  P_15 211 0.0667  P_20 211 0.0500 @@ -160,25 +160,25 @@
 P_200 211 0.0050  P_500 211 0.0040  P_1000 211 0.0020 -num_ret 213 1426 +num_ret 213 596  num_rel 213 1  num_rel_ret 213 1 -map 213 0.2500 +map 213 0.1667  Rprec 213 0.0000 -bpref 213 1.0000 -recip_rank 213 0.2500 -iprec_at_recall_0.00 213 0.2500 -iprec_at_recall_0.10 213 0.2500 -iprec_at_recall_0.20 213 0.2500 -iprec_at_recall_0.30 213 0.2500 -iprec_at_recall_0.40 213 0.2500 -iprec_at_recall_0.50 213 0.2500 -iprec_at_recall_0.60 213 0.2500 -iprec_at_recall_0.70 213 0.2500 -iprec_at_recall_0.80 213 0.2500 -iprec_at_recall_0.90 213 0.2500 -iprec_at_recall_1.00 213 0.2500 -P_5 213 0.2000 +bpref 213 0.0000 +recip_rank 213 0.1667 +iprec_at_recall_0.00 213 0.1667 +iprec_at_recall_0.10 213 0.1667 +iprec_at_recall_0.20 213 0.1667 +iprec_at_recall_0.30 213 0.1667 +iprec_at_recall_0.40 213 0.1667 +iprec_at_recall_0.50 213 0.1667 +iprec_at_recall_0.60 213 0.1667 +iprec_at_recall_0.70 213 0.1667 +iprec_at_recall_0.80 213 0.1667 +iprec_at_recall_0.90 213 0.1667 +iprec_at_recall_1.00 213 0.1667 +P_5 213 0.0000  P_10 213 0.1000  P_15 213 0.0667  P_20 213 0.0500 @@ -214,10 +214,10 @@
 P_200 215 0.0050  P_500 215 0.0020  P_1000 215 0.0020 -num_ret 221 1298 +num_ret 221 1221  num_rel 221 2  num_rel_ret 221 2 -map 221 0.1311 +map 221 0.1316  Rprec 221 0.0000  bpref 221 0.5000  recip_rank 221 0.2500 @@ -227,11 +227,11 @@
 iprec_at_recall_0.30 221 0.2500  iprec_at_recall_0.40 221 0.2500  iprec_at_recall_0.50 221 0.2500 -iprec_at_recall_0.60 221 0.0123 -iprec_at_recall_0.70 221 0.0123 -iprec_at_recall_0.80 221 0.0123 -iprec_at_recall_0.90 221 0.0123 -iprec_at_recall_1.00 221 0.0123 +iprec_at_recall_0.60 221 0.0132 +iprec_at_recall_0.70 221 0.0132 +iprec_at_recall_0.80 221 0.0132 +iprec_at_recall_0.90 221 0.0132 +iprec_at_recall_1.00 221 0.0132  P_5 221 0.2000  P_10 221 0.1000  P_15 221 0.0667 @@ -241,24 +241,24 @@
 P_200 221 0.0100  P_500 221 0.0040  P_1000 221 0.0020 -num_ret 222 613 +num_ret 222 519  num_rel 222 1  num_rel_ret 222 1 -map 222 0.1429 +map 222 0.1667  Rprec 222 0.0000  bpref 222 0.0000 -recip_rank 222 0.1429 -iprec_at_recall_0.00 222 0.1429 -iprec_at_recall_0.10 222 0.1429 -iprec_at_recall_0.20 222 0.1429 -iprec_at_recall_0.30 222 0.1429 -iprec_at_recall_0.40 222 0.1429 -iprec_at_recall_0.50 222 0.1429 -iprec_at_recall_0.60 222 0.1429 -iprec_at_recall_0.70 222 0.1429 -iprec_at_recall_0.80 222 0.1429 -iprec_at_recall_0.90 222 0.1429 -iprec_at_recall_1.00 222 0.1429 +recip_rank 222 0.1667 +iprec_at_recall_0.00 222 0.1667 +iprec_at_recall_0.10 222 0.1667 +iprec_at_recall_0.20 222 0.1667 +iprec_at_recall_0.30 222 0.1667 +iprec_at_recall_0.40 222 0.1667 +iprec_at_recall_0.50 222 0.1667 +iprec_at_recall_0.60 222 0.1667 +iprec_at_recall_0.70 222 0.1667 +iprec_at_recall_0.80 222 0.1667 +iprec_at_recall_0.90 222 0.1667 +iprec_at_recall_1.00 222 0.1667  P_5 222 0.0000  P_10 222 0.1000  P_15 222 0.0667 @@ -268,34 +268,34 @@
 P_200 222 0.0050  P_500 222 0.0020  P_1000 222 0.0010 -num_ret 227 1075 +num_ret 227 1007  num_rel 227 4  num_rel_ret 227 4 -map 227 0.0743 +map 227 0.0763  Rprec 227 0.0000  bpref 227 0.0625 -recip_rank 227 0.0625 -iprec_at_recall_0.00 227 0.0968 -iprec_at_recall_0.10 227 0.0968 -iprec_at_recall_0.20 227 0.0968 -iprec_at_recall_0.30 227 0.0968 -iprec_at_recall_0.40 227 0.0968 -iprec_at_recall_0.50 227 0.0968 -iprec_at_recall_0.60 227 0.0968 -iprec_at_recall_0.70 227 0.0968 -iprec_at_recall_0.80 227 0.0690 -iprec_at_recall_0.90 227 0.0690 -iprec_at_recall_1.00 227 0.0690 +recip_rank 227 0.0455 +iprec_at_recall_0.00 227 0.1071 +iprec_at_recall_0.10 227 0.1071 +iprec_at_recall_0.20 227 0.1071 +iprec_at_recall_0.30 227 0.1071 +iprec_at_recall_0.40 227 0.1071 +iprec_at_recall_0.50 227 0.1071 +iprec_at_recall_0.60 227 0.1071 +iprec_at_recall_0.70 227 0.1071 +iprec_at_recall_0.80 227 0.0727 +iprec_at_recall_0.90 227 0.0727 +iprec_at_recall_1.00 227 0.0727  P_5 227 0.0000  P_10 227 0.0000  P_15 227 0.0000 -P_20 227 0.0500 -P_30 227 0.0667 +P_20 227 0.0000 +P_30 227 0.1000  P_100 227 0.0400  P_200 227 0.0200  P_500 227 0.0080  P_1000 227 0.0040 -num_ret 228 1596 +num_ret 228 1533  num_rel 228 1  num_rel_ret 228 1  map 228 0.5000 @@ -322,30 +322,30 @@
 P_200 228 0.0050  P_500 228 0.0020  P_1000 228 0.0010 -num_ret 229 1333 +num_ret 229 905  num_rel 229 1  num_rel_ret 229 1 -map 229 0.0204 +map 229 0.0093  Rprec 229 0.0000 -bpref 229 1.0000 -recip_rank 229 0.0204 -iprec_at_recall_0.00 229 0.0204 -iprec_at_recall_0.10 229 0.0204 -iprec_at_recall_0.20 229 0.0204 -iprec_at_recall_0.30 229 0.0204 -iprec_at_recall_0.40 229 0.0204 -iprec_at_recall_0.50 229 0.0204 -iprec_at_recall_0.60 229 0.0204 -iprec_at_recall_0.70 229 0.0204 -iprec_at_recall_0.80 229 0.0204 -iprec_at_recall_0.90 229 0.0204 -iprec_at_recall_1.00 229 0.0204 +bpref 229 0.0000 +recip_rank 229 0.0093 +iprec_at_recall_0.00 229 0.0093 +iprec_at_recall_0.10 229 0.0093 +iprec_at_recall_0.20 229 0.0093 +iprec_at_recall_0.30 229 0.0093 +iprec_at_recall_0.40 229 0.0093 +iprec_at_recall_0.50 229 0.0093 +iprec_at_recall_0.60 229 0.0093 +iprec_at_recall_0.70 229 0.0093 +iprec_at_recall_0.80 229 0.0093 +iprec_at_recall_0.90 229 0.0093 +iprec_at_recall_1.00 229 0.0093  P_5 229 0.0000  P_10 229 0.0000  P_15 229 0.0000  P_20 229 0.0000  P_30 229 0.0000 -P_100 229 0.0100 +P_100 229 0.0000  P_200 229 0.0050  P_500 229 0.0020  P_1000 229 0.0010 @@ -376,28 +376,28 @@
 P_200 235 0.0050  P_500 235 0.0020  P_1000 235 0.0010 -num_ret 239 1558 +num_ret 239 1433  num_rel 239 1  num_rel_ret 239 1 -map 239 0.0556 +map 239 0.0345  Rprec 239 0.0000  bpref 239 0.0000 -recip_rank 239 0.0556 -iprec_at_recall_0.00 239 0.0556 -iprec_at_recall_0.10 239 0.0556 -iprec_at_recall_0.20 239 0.0556 -iprec_at_recall_0.30 239 0.0556 -iprec_at_recall_0.40 239 0.0556 -iprec_at_recall_0.50 239 0.0556 -iprec_at_recall_0.60 239 0.0556 -iprec_at_recall_0.70 239 0.0556 -iprec_at_recall_0.80 239 0.0556 -iprec_at_recall_0.90 239 0.0556 -iprec_at_recall_1.00 239 0.0556 +recip_rank 239 0.0345 +iprec_at_recall_0.00 239 0.0345 +iprec_at_recall_0.10 239 0.0345 +iprec_at_recall_0.20 239 0.0345 +iprec_at_recall_0.30 239 0.0345 +iprec_at_recall_0.40 239 0.0345 +iprec_at_recall_0.50 239 0.0345 +iprec_at_recall_0.60 239 0.0345 +iprec_at_recall_0.70 239 0.0345 +iprec_at_recall_0.80 239 0.0345 +iprec_at_recall_0.90 239 0.0345 +iprec_at_recall_1.00 239 0.0345  P_5 239 0.0000  P_10 239 0.0000  P_15 239 0.0000 -P_20 239 0.0500 +P_20 239 0.0000  P_30 239 0.0333  P_100 239 0.0100  P_200 239 0.0050 @@ -511,24 +511,24 @@
 P_200 249 0.0050  P_500 249 0.0020  P_1000 249 0.0010 -num_ret 250 1302 +num_ret 250 1041  num_rel 250 1  num_rel_ret 250 1 -map 250 0.0196 +map 250 0.0172  Rprec 250 0.0000  bpref 250 1.0000 -recip_rank 250 0.0196 -iprec_at_recall_0.00 250 0.0196 -iprec_at_recall_0.10 250 0.0196 -iprec_at_recall_0.20 250 0.0196 -iprec_at_recall_0.30 250 0.0196 -iprec_at_recall_0.40 250 0.0196 -iprec_at_recall_0.50 250 0.0196 -iprec_at_recall_0.60 250 0.0196 -iprec_at_recall_0.70 250 0.0196 -iprec_at_recall_0.80 250 0.0196 -iprec_at_recall_0.90 250 0.0196 -iprec_at_recall_1.00 250 0.0196 +recip_rank 250 0.0172 +iprec_at_recall_0.00 250 0.0172 +iprec_at_recall_0.10 250 0.0172 +iprec_at_recall_0.20 250 0.0172 +iprec_at_recall_0.30 250 0.0172 +iprec_at_recall_0.40 250 0.0172 +iprec_at_recall_0.50 250 0.0172 +iprec_at_recall_0.60 250 0.0172 +iprec_at_recall_0.70 250 0.0172 +iprec_at_recall_0.80 250 0.0172 +iprec_at_recall_0.90 250 0.0172 +iprec_at_recall_1.00 250 0.0172  P_5 250 0.0000  P_10 250 0.0000  P_15 250 0.0000 @@ -540,31 +540,31 @@
 P_1000 250 0.0010  runid all .  num_q all 20 -num_ret all 19167 +num_ret all 16928  num_rel all 43  num_rel_ret all 35 -map all 0.2938 -gm_map all 0.0476 +map all 0.2854 +gm_map all 0.0430  Rprec all 0.1950 -bpref all 0.4444 -recip_rank all 0.3420 -iprec_at_recall_0.00 all 0.3438 -iprec_at_recall_0.10 all 0.3438 -iprec_at_recall_0.20 all 0.3438 -iprec_at_recall_0.30 all 0.3438 -iprec_at_recall_0.40 all 0.3433 -iprec_at_recall_0.50 all 0.3114 -iprec_at_recall_0.60 all 0.2574 -iprec_at_recall_0.70 all 0.2574 -iprec_at_recall_0.80 all 0.2558 -iprec_at_recall_0.90 all 0.2472 -iprec_at_recall_1.00 all 0.2472 -P_5 all 0.1200 +bpref all 0.3319 +recip_rank all 0.3322 +iprec_at_recall_0.00 all 0.3354 +iprec_at_recall_0.10 all 0.3354 +iprec_at_recall_0.20 all 0.3354 +iprec_at_recall_0.30 all 0.3354 +iprec_at_recall_0.40 all 0.3349 +iprec_at_recall_0.50 all 0.3031 +iprec_at_recall_0.60 all 0.2500 +iprec_at_recall_0.70 all 0.2500 +iprec_at_recall_0.80 all 0.2481 +iprec_at_recall_0.90 all 0.2394 +iprec_at_recall_1.00 all 0.2394 +P_5 all 0.0900  P_10 all 0.0700  P_15 all 0.0500 -P_20 all 0.0425 -P_30 all 0.0317 -P_100 all 0.0130 +P_20 all 0.0375 +P_30 all 0.0333 +P_100 all 0.0125  P_200 all 0.0068  P_500 all 0.0031  P_1000 all 0.0018
Show Entire File test/​ap.mem Stacked
This file's diff was not loaded because this changeset is very large. Load changes
Show Entire File test/​ap.rank Stacked
This file's diff was not loaded because this changeset is very large. Load changes
Show Entire File test/​ap.res Stacked
This file's diff was not loaded because this changeset is very large. Load changes
Show Entire File test/​ap.run Stacked
This file's diff was not loaded because this changeset is very large. Load changes
 
 
Change 1 of 7 Show Entire File test/​ap_query.mem Stacked
 
1
2
 
3
4
5
 
33
34
35
36
 
37
38
39
40
41
 
 
 
 
42
43
44
 
49
50
51
52
 
53
54
55
56
57
 
 
 
 
58
59
60
 
65
66
67
68
 
69
70
71
72
73
 
 
 
 
74
75
76
 
77
78
79
80
81
 
 
 
 
82
83
84
 
89
90
91
92
 
93
94
95
96
97
 
 
 
 
98
99
100
 
101
102
103
104
105
 
 
 
 
106
107
108
 
113
114
115
116
 
117
118
119
120
121
 
 
 
 
122
123
124
 
153
154
155
156
 
157
158
159
160
161
 
 
 
 
162
 
1
 
2
3
4
5
 
33
34
35
 
36
37
 
 
 
 
38
39
40
41
42
43
44
 
49
50
51
 
52
53
 
 
 
 
54
55
56
57
58
59
60
 
65
66
67
 
68
69
 
 
 
 
70
71
72
73
74
75
 
76
77
 
 
 
 
78
79
80
81
82
83
84
 
89
90
91
 
92
93
 
 
 
 
94
95
96
97
98
99
 
100
101
 
 
 
 
102
103
104
105
106
107
108
 
113
114
115
 
116
117
 
 
 
 
118
119
120
121
122
123
124
 
153
154
155
 
156
157
 
 
 
 
158
159
160
161
162
@@ -1,5 +1,5 @@
 MG CRC V T BYTES N SUMNB SUMNU SUMNT R BO -41 1594079031 1 0 2198 20 1416 161 163 4 44 +41 1346727550 1 0 2091 20 1312 147 149 4 44  CRC TXT ID RSRC0 RSRC1 RSRC2 RSRC3  1370178390 34 4 1 1 1 1  tdoc->id: 203. @@ -33,12 +33,12 @@
 tdoc->rsrc[2]: 7.  tdoc->rsrc[3]: 1.  CRC TXT ID RSRC0 RSRC1 RSRC2 RSRC3 -1224799587 117 4 2 2 2 1 +2321808533 112 4 2 2 2 1  tdoc->id: 208. -tdoc->txt: convers 1 garbag 1 energi 1 fertil 1 bioconvers 1 materi 1 biolog 1 plant 1 wast 1 us 1 latest 1 product 1 develop 1 . -tdoc->rsrc[0]: 13. -tdoc->rsrc[1]: 13. -tdoc->rsrc[2]: 13. +tdoc->txt: convers 1 garbag 1 energi 1 fertil 1 bioconvers 1 materi 1 biolog 1 plant 1 wast 1 latest 1 product 1 develop 1 . +tdoc->rsrc[0]: 12. +tdoc->rsrc[1]: 12. +tdoc->rsrc[2]: 12.  tdoc->rsrc[3]: 1.  CRC TXT ID RSRC0 RSRC1 RSRC2 RSRC3  2404694064 123 4 2 2 2 1 @@ -49,12 +49,12 @@
 tdoc->rsrc[2]: 17.  tdoc->rsrc[3]: 2.  CRC TXT ID RSRC0 RSRC1 RSRC2 RSRC3 -2661465462 64 4 1 1 1 1 +2080920698 52 4 1 1 1 1  tdoc->id: 213. -tdoc->txt: crime 1 defend 1 absolv 1 more 1 be 1 result 1 test 1 convict 1 . -tdoc->rsrc[0]: 8. -tdoc->rsrc[1]: 8. -tdoc->rsrc[2]: 8. +tdoc->txt: crime 1 defend 1 absolv 1 result 1 test 1 convict 1 . +tdoc->rsrc[0]: 6. +tdoc->rsrc[1]: 6. +tdoc->rsrc[2]: 6.  tdoc->rsrc[3]: 1.  CRC TXT ID RSRC0 RSRC1 RSRC2 RSRC3  1641096067 69 4 1 1 1 1 @@ -65,20 +65,20 @@
 tdoc->rsrc[2]: 8.  tdoc->rsrc[3]: 1.  CRC TXT ID RSRC0 RSRC1 RSRC2 RSRC3 -1149143928 113 4 2 2 2 1 +1863533793 105 4 2 2 2 1  tdoc->id: 221. -tdoc->txt: youth 1 govern 1 step 1 organ 1 gang 1 carnag 1 taken 1 drug 1 civic 1 halt 1 church 1 engag 1 warfar 1 commun 1 . -tdoc->rsrc[0]: 14. -tdoc->rsrc[1]: 14. -tdoc->rsrc[2]: 14. +tdoc->txt: youth 1 govern 1 step 1 organ 1 gang 1 carnag 1 drug 1 civic 1 halt 1 church 1 engag 1 warfar 1 commun 1 . +tdoc->rsrc[0]: 13. +tdoc->rsrc[1]: 13. +tdoc->rsrc[2]: 13.  tdoc->rsrc[3]: 1.  CRC TXT ID RSRC0 RSRC1 RSRC2 RSRC3 -530199975 59 4 1 1 1 1 +893047418 51 4 1 1 1 1  tdoc->id: 222. -tdoc->txt: crime 1 suggest 1 deterr 1 punish 1 data 1 capit 1 avail 1 . -tdoc->rsrc[0]: 7. -tdoc->rsrc[1]: 7. -tdoc->rsrc[2]: 7. +tdoc->txt: crime 1 suggest 1 deterr 1 punish 1 data 1 capit 1 . +tdoc->rsrc[0]: 6. +tdoc->rsrc[1]: 6. +tdoc->rsrc[2]: 6.  tdoc->rsrc[3]: 1.  CRC TXT ID RSRC0 RSRC1 RSRC2 RSRC3  3442007547 98 4 2 2 2 1 @@ -89,20 +89,20 @@
 tdoc->rsrc[2]: 11.  tdoc->rsrc[3]: 1.  CRC TXT ID RSRC0 RSRC1 RSRC2 RSRC3 -1381855207 88 4 1 1 1 1 +2387253857 78 4 1 1 1 1  tdoc->id: 228. -tdoc->txt: year 1 stori 1 environment 1 success 1 recoveri 1 biggest 1 recent 1 concern 1 pollut 1 . -tdoc->rsrc[0]: 9. -tdoc->rsrc[1]: 9. -tdoc->rsrc[2]: 9. +tdoc->txt: year 1 stori 1 environment 1 success 1 recoveri 1 biggest 1 recent 1 pollut 1 . +tdoc->rsrc[0]: 8. +tdoc->rsrc[1]: 8. +tdoc->rsrc[2]: 8.  tdoc->rsrc[3]: 1.  CRC TXT ID RSRC0 RSRC1 RSRC2 RSRC3 -3204500280 52 4 1 1 1 1 +1526072462 33 4 1 1 1 1  tdoc->id: 229. -tdoc->txt: peopl 1 schizophrenia 1 suffer 1 help 1 be 1 done 1 . -tdoc->rsrc[0]: 6. -tdoc->rsrc[1]: 6. -tdoc->rsrc[2]: 6. +tdoc->txt: peopl 1 schizophrenia 1 suffer 1 . +tdoc->rsrc[0]: 3. +tdoc->rsrc[1]: 3. +tdoc->rsrc[2]: 3.  tdoc->rsrc[3]: 1.  CRC TXT ID RSRC0 RSRC1 RSRC2 RSRC3  2875597069 25 4 1 1 1 1 @@ -113,12 +113,12 @@
 tdoc->rsrc[2]: 3.  tdoc->rsrc[3]: 1.  CRC TXT ID RSRC0 RSRC1 RSRC2 RSRC3 -3992499472 104 4 2 2 2 1 +1024109324 80 4 1 1 1 1  tdoc->id: 239. -tdoc->txt: exist 1 unit 1 caus 1 problem 1 specif 1 certain 1 seem 1 region 1 cancer 1 condit 1 state 1 concentr 1 . -tdoc->rsrc[0]: 12. -tdoc->rsrc[1]: 12. -tdoc->rsrc[2]: 12. +tdoc->txt: exist 1 unit 1 problem 1 specif 1 region 1 cancer 1 condit 1 state 1 concentr 1 . +tdoc->rsrc[0]: 9. +tdoc->rsrc[1]: 9. +tdoc->rsrc[2]: 9.  tdoc->rsrc[3]: 1.  CRC TXT ID RSRC0 RSRC1 RSRC2 RSRC3  3393299640 85 4 1 1 1 1 @@ -153,10 +153,10 @@
 tdoc->rsrc[2]: 7.  tdoc->rsrc[3]: 1.  CRC TXT ID RSRC0 RSRC1 RSRC2 RSRC3 -3509051979 103 4 2 2 2 1 +50265599 85 4 2 2 2 1  tdoc->id: 250. -tdoc->txt: involv 1 crime 1 correl 1 show 1 ammunit 1 data 1 between 1 firearm 2 sale 1 posit 1 commiss 1 avail 1 . -tdoc->rsrc[0]: 13. -tdoc->rsrc[1]: 12. -tdoc->rsrc[2]: 15. +tdoc->txt: involv 1 crime 1 correl 1 show 1 ammunit 1 data 1 firearm 2 sale 1 posit 1 commiss 1 . +tdoc->rsrc[0]: 11. +tdoc->rsrc[1]: 10. +tdoc->rsrc[2]: 13.  tdoc->rsrc[3]: 2.
 
 
Show Entire File tfile.c Stacked
This file's diff was not loaded because this changeset is very large. Load changes
Change 1 of 2 Show Entire File tfile.h Stacked
 
1
2
3
4
5
 
 
 
 
 
 
6
7
 
8
9
10
11
 
12
13
14
15
 
 
16
17
18
19
20
21
22
23
24
25
 
 
 
26
27
28
 
 
 
29
30
31
32
 
 
 
 
 
 
 
 
 
33
34
35
36
37
38
39
 
 
 
40
41
42
 
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
 
8
9
10
11
 
12
13
14
15
 
16
17
18
 
 
19
20
 
 
 
 
 
21
22
23
24
25
 
26
27
28
29
 
 
 
30
31
32
33
34
35
36
37
38
39
40
41
 
 
 
 
42
43
44
45
46
47
 
49
50
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
@@ -1,42 +1,47 @@
-#define TREC_RSRC 4 /* number of resources for a TDoc of type TREC */ -#define MAX_RSRC 10 /* maximum number of resources */ -#define TXTBUFSIZE 10240 /* TDoc->txt buffer size, 10KB */ -#define IDBUFSIZE 64 /* A UUID (if used) would be max 36 chars */ -#define RSRCBUFSIZE 32 +#define NTERM 10000 +#define BTHEADER 26 /* packed bytes for THeader; 8 elements; 1*2 + 4*6 */ +#define THEADER 26 /* bytes in memory */ +#define BTDOCHEADER 54 /* packed bytes for TDocHeader; 12 elements; 1*2 + 4*9 + 16*1 */ +#define TDOCHEADER 46 /* bytes in memory; 1*2 + 4*9 + 8*1 */ +#define BFLOAT 16 /* a double formatted as +1.12345678e+01, using +1.8e */   -typedef enum {TREC, WARC, TRECQUERY} E_TDocType; +typedef unsigned char uchar;    typedef struct TFile TFile;  typedef struct THeader THeader; -typedef struct TSubHeader TSubHeader; +typedef struct TDocHeader TDocHeader;  typedef struct TDoc TDoc;    struct THeader { - uint32_t mg; + uint8_t type; + uint8_t bo;   uint32_t crc; - uint32_t ver; - uint32_t type;   uint32_t b;   uint32_t n; - uint32_t sumnb; - uint32_t sumnu; - uint32_t sumnt; - uint32_t r; - uint32_t bo; + uint32_t sumb; + uint32_t sumu; + uint32_t sumtf;  };   -struct TSubHeader { +struct TDocHeader { + uint8_t type; + uint8_t bo;   uint32_t crc; - uint32_t n_txt; - uint32_t n_id; - uint32_t n_rsrc[10]; + uint32_t id; + uint32_t b_txt; + uint32_t b_tf; + uint32_t sumb; + uint32_t sumu; + uint32_t sumtf; + uint32_t maxtf; + uint32_t sumsqtf; + double sumsqlogtf;  };    struct TDoc { - TSubHeader *h; - char *txt; - char *id; - char **rsrc; + TDocHeader *h; + uint32_t *txt; + uint32_t *tf;  };    struct TFile { @@ -44,19 +49,33 @@
  Node *list;  };   -E_TDocType getTDocType(char*); -THeader *_newTHeader(E_TDocType); -void _freeTHeader(void*); -TSubHeader *_newTSubHeader(E_TDocType); -TDoc *newTDoc(E_TDocType); -TFile *newTFile(E_TDocType); -void freeTDoc(void*, E_TDocType); -void freeTFile(TFile*, E_TDocType); -int writeTFile(TFile*, FILE*); -THeader *readTHeader(THeader*, FILE*); -TDoc *readTDoc(TDoc*, FILE*, E_TDocType); -TFile *readTFile(FILE*); -void _printTHeader(THeader*); -void _printTSubHeader(TSubHeader*, E_TDocType); -void _printTDoc(TDoc*, E_TDocType); -void printTFile(TFile*); +THeader *_newTHeader(void); +void _freeTHeader(void*); +TDocHeader *_newTDocHeader(void); +void _freeTDocHeader(void*); + +TDoc *newTDoc(void); +void freeTDoc(void*); +void updateTDoc(void*, void*); + +TFile *newTFile(void); +void freeTFile(void*); + +void _printTHeader(THeader*, FILE*); +void _printTDocHeader(TDocHeader*, FILE*); +void _printTDoc(TDoc*, FILE*); +void printTFile(TFile*, FILE*); + +int _pack(uchar *buf, char *fmt, ...); +int _unpack(uchar *buf, char *fmt, ...); + +int packTHeader(uint8_t *buf, THeader *h); +int packTDocHeader(uint8_t *buf, TDocHeader *h); +int packTDoc_payload(uchar *buf, TDoc *tdoc); + +int unpackTHeader(THeader *h, int n, uchar *buf); +int unpackTDocHeader(TDocHeader *h, int n, uchar *buf); +int unpackTDoc_payload(TDoc *tdoc, int n, uchar *buf); + +void send(FILE *fp, TFile *tfile); +TFile *receive(FILE *fp);
Show Entire File txt.c Stacked
This file's diff was not loaded because this changeset is very large. Load changes
Change 1 of 2 Show Entire File txt.h Stacked
 
1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
3
4
 
6
7
8
9
10
11
 
 
 
12
13
14
15
16
17
18
19
20
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
23
24
25
26
27
28
 
 
 
 
 
29
30
31
32
33
34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
36
37
38
39
 
 
40
41
42
43
44
 
 
 
 
 
 
 
 
 
 
45
46
47
48
49
50
51
 
 
 
52
53
54
55
56
57
58
 
59
60
61
62
63
64
 
65
66
67
68
69
70
71
72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
 
 
 
 
 
 
 
 
93
94
95
96
97
98
99
100
 
101
102
 
 
 
 
103
104
105
106
107
108
 
109
110
111
 
112
113
114
115
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
 
24
25
26
 
 
 
27
28
29
30
 
31
 
 
 
 
 
 
 
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
 
 
 
 
 
66
67
68
69
70
71
72
 
 
 
 
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
 
 
95
96
97
98
 
 
 
99
100
101
102
103
104
105
106
107
108
109
110
 
 
 
 
 
111
112
113
114
 
 
 
 
 
 
115
116
 
 
 
 
 
117
118
 
 
 
 
 
 
 
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
 
 
 
 
 
 
 
 
 
 
162
 
 
 
 
 
 
 
163
164
165
166
167
168
169
170
171
172
173
 
174
 
 
 
175
176
 
177
178
179
180
181
 
 
 
 
 
182
183
184
 
185
186
 
 
 
187
188
189
@@ -1,4 +1,22 @@
-typedef struct Doc Doc; +#define KB 1024 +#define MB 1048576 +#define ASCII 128 +#define CR 13 +#define LF 10 +#define BUFSIZE 10240 +#define MINTERMLEN 1 +#define MAXTERMLEN 59 /* Llanfairpwllgwyngyllgogerychwyrndrobwllllantysiliogogogoch */ +#define NHASH 4093 +#define NHASHT 100003 +#define NHASHD 100003 + +/* typedef struct Doc Doc; */ +typedef struct TFile TFile; +typedef struct stemmer Stemmer; +typedef struct Stack Stack; +typedef struct Tokenizer Tokenizer; +typedef struct Token Token; +typedef struct Parser Parser;  typedef struct Query Query;  typedef struct Term Term;  typedef struct Post Post; @@ -6,110 +24,166 @@
 typedef struct Model Model;    typedef double (tf_fn)(uint32_t, uint32_t, uint32_t, - uint32_t, uint32_t, uint32_t, - uint32_t, uint32_t, uint32_t, - uint32_t); + uint32_t, uint32_t, uint32_t, + uint32_t, uint32_t, uint32_t, + uint32_t);  typedef double (df_fn)(uint32_t, uint32_t); -typedef double (qtf_fn)(uint32_t);   -struct Doc { - char *id; - uint32_t nb; - uint32_t sumtf; - uint32_t uterm; - uint32_t sumsqtf; - uint32_t maxtf; +enum {KEEPCASE, LOWERCASE}; + +typedef enum {CTRLCHAR = 1, PRINTCHAR, SEPCHAR} E_asciitype; + + +/* struct Doc { */ +/* uint32_t id; */ +/* uint32_t sumb; */ +/* uint32_t sumu; */ +/* uint32_t sumtf; */ +/* uint32_t maxtf; */ +/* char *s; */ +/* }; */ + +/* struct Query { */ +/* uint32_t id; */ +/* uint32_t sumtf; */ +/* uint32_t uterm; */ +/* char *s; */ +/* Node *tlist; */ +/* }; */ + +/* struct Term { */ +/* uint32_t id; */ +/* uint32_t df; */ +/* Node *plist; */ +/* }; */ + +struct Stack { + char s[KB]; + int p; + int n;  };   -struct Query { - char *id; - uint32_t sumtf; - uint32_t uterm; - Node *tlist; +struct Tokenizer { + char delimiters[ASCII]; + int asciitab[ASCII]; + Stack *mem; + int casechange;  };   -struct Term { - char *s; - unsigned df; - Node *plist; +struct Token { + char *str; + uint32_t l; + uint32_t id; + uint8_t t; +}; + +struct Parser { + char type; + int c; + int n; + int x_min; + int x_max; + Hash *hcw; /* hash of common words */ + Stemmer *stemmer; + int n_tag; + char endtag[64]; + char idtag[64]; + char *tag[64];  };    struct Post { - char* id; - int tf; + uint32_t id; + uint32_t tf;  };   -struct Score { - char* id; - double n; +/* struct Score { */ +/* uint32_t id; */ +/* double n; */ +/* }; */ + +struct Model { + tf_fn *tf; + df_fn *df; + tf_fn *qtf; + df_fn *qdf;  };   -struct Model { - tf_fn *tf; - df_fn *df; - qtf_fn *qtf; -}; +/* TODO: move to libk */ +void walkhash(Hash*, void (*f)(void*, void*), void*); +uint32_t _inthash(void*, uint32_t);   -Doc *newdoc(char*, uint32_t, uint32_t, uint32_t, uint32_t, uint32_t); -void freedoc(void*); -int cmpdoc(void*, void*); -int cmpdoc_p(void*, void*); -unsigned hashdoc(void*, unsigned); -void fprintdoc(FILE*, void*); +int cmp_p(void*, void*);   -Query *newquery(char*); -void freequery(void*); -int cmpquery(void*, void*); -unsigned hashquery(void*, unsigned); -void fprintquery(FILE*, void*); +int cmpdoc_id(void*, void*);   -Term *newterm(char*, unsigned, Post*); -void freeterm(void*); -int cmpterm(void*, void*); -int cmptermp(void*, void*); -unsigned hashterm(void*, unsigned); -int term_match_handler(void*, void*); -void fprintterm(FILE*, void*); +/* Tokenizer */ +void reset(Stack*, int); +void push(Stack*, char*); +char pop(Stack*); +void printstack(Stack*); +Tokenizer *newtokenizer(char*, int, Stack*); + +/* Token */ +Token *newtoken(char*, uint32_t, uint8_t, uint32_t); +void freetoken(void*); +int cmptoken_str(void*, void*); +int cmptoken_id(void*, void*); +uint32_t hashtoken_str(void*, uint32_t); +uint32_t hashtoken_id(void*, uint32_t); +int gettoken(Token*, FILE*, Tokenizer*); +void writetoken(void*, void*); + +/* Post */ +Post *newpost(uint32_t, uint32_t); +int cmppost(void*, void*); +int cmppost_tf(const void*, const void*); +uint32_t hashpost(void*, uint32_t); + +/* Parser */ +Parser *newparser(char, char**, int, int, int, int); +void freeparser(Parser*); +TFile *parse(Hash*, Hash*, Parser*, FILE*); + +/* Query *newquery(char*); */ +/* void freequery(void*); */ +/* int cmpquery(void*, void*); */ +/* uint32_t hashquery(void*, uint32_t); */ +/* void fprintquery(FILE*, void*); */ + +/* Term *newterm(char*, uint32_t, Post*); */ +/* void freeterm(void*); */ +/* int cmpterm(void*, void*); */ +/* int cmptermp(void*, void*); */ +/* uint32_t hashterm(void*, uint32_t); */ +/* int term_match_handler(void*, void*); */ +/* void fprintterm(FILE*, void*); */  /* void printtree(void*, void*); */   -Post *newpost(char*, int); -void freepost(void*); -Post *newpost_s(char*, int); -void freepost_s(void*); -int cmppost(void*, void*); -int cmppost_p(void*, void*); -int cmppost_tf(const void*, const void*); -void fprintpost(FILE*, void*); -unsigned hashpost(void*, unsigned); -  /* Score */ -Score *newscore(char*, double); -void freescore(void*); -Score *newscore_s(char*, double); -void freescore_s(void*); -int cmpscore(void*, void*); -int cmpscore_n(void*, void*); -unsigned hashscore(void*, unsigned); +/* Score *newscore(char*, double); */ +/* void freescore(void*); */ +/* Score *newscore_s(char*, double); */ +/* void freescore_s(void*); */ +/* int cmpscore(void*, void*); */ +/* int cmpscore_p(void*, void*); */ +/* int cmpscore_n(void*, void*); */ +/* uint32_t hashscore(void*, uint32_t); */    /* Models */   -/** test **/   -tf_fn _tf; -df_fn _df; -qtf_fn _qtf; +/** SMART **/   -/** SMART bxx **/ +tf_fn SMART_n_n_tf; +tf_fn SMART_b_n_tf; +tf_fn SMART_d_b_tf; +tf_fn SMART_d_n_tf;   -tf_fn SMART_b___tf; - -/** SMART dtb **/ - -tf_fn SMART_d_b_tf; +df_fn SMART__n__df;  df_fn SMART__t__df;   -/** OKAPI BM25 **/ +/** OKAPI **/   -tf_fn OKAPI_BM25_tf; -df_fn OKAPI_BM25_df; -qtf_fn OKAPI_BM25_qtf; +tf_fn OKAPI_BM25_tf; +df_fn OKAPI_BM25_df; +tf_fn OKAPI_BM25_qtf;
Change 1 of 1 Show Entire File txt2trecrun.awk Stacked
 
24
25
26
 
27
 
24
25
26
27
28
@@ -24,4 +24,5 @@
  n = split(line[j], a);   print a[1] " Q0 " a[2] " " j " " a[3] " ."   } + fflush(stdout)  }