Repositories » TXT0
Clone URL:  
Pushed to one repository · View In Graph Contained in v0.1 and tip

Tokdoc's buffer was split in two; the document (doc) and the document ID (id). Consequently portions using Tocdoc were changed too.

Changeset 21bb44f9ef38

Parent 8e6308a6c85d

by Rup Palchowdhury

Changes to 8 files · Browse files at 21bb44f9ef38 Showing diff from parent 8e6308a6c85d Diff from another changeset...

Change 1 of 3 Show Entire File parser.c Stacked
 
25
26
27
28
 
29
30
31
 
43
44
45
46
 
47
48
49
50
51
52
 
53
54
 
55
56
57
58
 
 
 
59
60
 
61
62
63
64
65
 
 
66
67
68
69
 
70
71
72
 
79
80
81
82
83
84
85
86
 
 
 
 
 
87
88
89
 
25
26
27
 
28
29
30
31
 
43
44
45
 
46
47
48
49
50
51
 
52
53
 
54
55
 
 
 
56
57
58
59
 
60
61
62
63
64
 
65
66
67
68
69
 
70
71
72
73
 
80
81
82
 
 
 
 
 
83
84
85
86
87
88
89
90
@@ -25,7 +25,7 @@
  char *sepid = " \t\r\n<>";   Log *log;   - log = newlog("toktrec"); + log = newlog("raw2t");     lowmem = KB;   @@ -43,30 +43,31 @@
    tok = newtok(TREC_ADHOC);   tokdoc = newtokdoc(TREC_ADHOC); - token.str = tokdoc->buf->p; + token.str = tokdoc->doc->p;     while((n = gettoken(&token, fp, tokenizer_text)) > 0) {   bytesread += n;   if ((token.type == CTAG) && (strcmp(token.str, "doc") == 0)) {   /* drop trailing ' ' */ - shiftpointer(tokdoc->buf, -1); + shiftpointer(tokdoc->doc, -1);   /* fill doc header */ - tokdoc->header->block[RES1DOCLEN] = tokdoc->buf->i; + tokdoc->header->block[RES1DOCLEN] = tokdoc->doc->i;   tokdoc->header->block[RES2IDLEN] = docid->len; - /* append resource block */ - memcpy(tokdoc->buf->p, docid->str, docid->len); - shiftpointer(tokdoc->buf, docid->len); + /* copy resource block */ + memcpy(tokdoc->id->p, docid->str, docid->len); + shiftpointer(tokdoc->id, docid->len);   /* compute crc of doc buffer and add it to the header */ - crc = compute_crc(tokdoc->buf->b, tokdoc->buf->i); + crc = compute_crc(tokdoc->doc->b, tokdoc->doc->i);   tokdoc->header->block[RES0CRC32] = crc;   /* add doc block to list */   tok->list = addfront(tok->list, newnode((void *)tokdoc));   /* increment data length and doc count */ - tok->header->block[LENGTH] += 4 * tok->header->block[NUMRES] + tokdoc->buf->i; + tok->header->block[LENGTH] += 4 * tok->header->block[NUMRES] + + tokdoc->doc->i + tokdoc->id->i;   tok->header->block[NUMDOCS]++;   /* get new placeholder for doc */   tokdoc = newtokdoc(TREC_ADHOC); - token.str = tokdoc->buf->p; + token.str = tokdoc->doc->p;   continue;   }   if ((token.type == OTAG) && (strcmp(token.str, "docno") == 0)) { @@ -79,11 +80,11 @@
  }   if (token.type == TERM) {   token.str[token.len] = ' '; - token.str = shiftpointer(tokdoc->buf, token.len + 1); - if ((tokdoc->buf->n - tokdoc->buf->i) <= lowmem) { - tokdoc->buf = resizebuffer(tokdoc->buf, - tokdoc->buf->n <<= 1); - token.str = tokdoc->buf->p; + token.str = shiftpointer(tokdoc->doc, token.len + 1); + if ((tokdoc->doc->n - tokdoc->doc->i) <= lowmem) { + tokdoc->doc = resizebuffer(tokdoc->doc, + tokdoc->doc->n <<= 1); + token.str = tokdoc->doc->p;   }   }   }
Change 1 of 1 Show Entire File raw2t.c Stacked
 
25
26
27
28
 
29
30
 
25
26
27
 
28
29
30
@@ -25,6 +25,6 @@
 {   Tok *tok;   tok = parse(stdin); - writetok(tok, stdout); + write_t(tok, stdout);   return 0;  }
Change 1 of 1 Show Entire File t2mem.c Stacked
 
11
12
13
14
 
15
16
17
 
11
12
13
 
14
15
16
17
@@ -11,7 +11,7 @@
 {   Tok *tok;   tok = newtok(TREC_ADHOC); - readtok(tok, stdin); + read_t(tok, stdin);   printtok(tok);   return 0;  }
Change 1 of 1 Show Entire File test/​big.out Stacked
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
@@ -1,230 +1,230 @@
 MG CRC V T BYTES N R BO  41 169037966 1 0 962838 227 3 32  CRC BYTES RSRC2 -3925551471 2991 13 -3264953395 4730 13 -4241454615 3334 13 -2625667217 2764 13 -1058679520 5052 13 -4063206699 5835 13 -1153118023 4933 13 -2195962243 6978 13 -1111704573 5566 13 -2649897190 4587 13 -1860820776 5322 13 -1177012594 5201 13 -2794791849 5889 13 -1313904980 4517 13 -394002083 4751 13 -2837314096 5593 13 -2670818068 4214 13 -2841477430 3872 13 -4177591302 3506 13 -740350867 2966 13 -11626970 3311 13 -177317172 3178 13 -522001294 3746 13 -925725237 3509 13 -4080308966 3902 13 -3701419689 6653 13 -4056362161 719 13 -1071253640 13352 13 -2964786617 4531 13 -4231344809 3810 13 -2550979654 3279 13 -2537446800 5272 13 -1231419629 631 13 -4166237816 8466 13 -401557764 3622 13 -833456995 3778 13 -1491732478 1898 13 -2655523327 1409 13 -203875697 946 13 -2632406934 4607 13 -3216302044 3226 13 -316238398 997 13 -3778083220 1207 13 -1848041888 1326 13 -1780124768 4000 13 -310408654 4879 13 -1206688563 3724 13 -744915707 5147 13 -2488506728 4622 13 -1966254892 3860 13 -3934897259 1674 13 -1615748910 5013 13 -1668821957 3614 13 -187915392 1435 13 -3657634659 1576 13 -3916594599 4367 13 -3414566740 4174 13 -1681188814 5474 13 -3691481637 3044 13 -3666861624 2836 13 -2951190684 2617 13 -2968125999 2070 13 -1359805633 6545 13 -2452631397 20311 13 -1902302050 6469 13 -504489067 7846 13 -770037341 5869 13 -1990464080 6678 13 -328521501 5699 13 -1858938346 6846 13 -1849531494 4897 13 -2057461336 3922 13 -3084882611 5454 13 -3818136598 5281 13 -2218127775 7742 13 -4145340257 3025 13 -3238198644 1215 13 -1286885316 1894 13 -848114428 1627 13 -1212627955 2175 13 -2558714491 1528 13 -1207623193 2223 13 -525146734 721 13 -1362390821 1383 13 -459830258 1111 13 -2656059263 953 13 -3246975925 720 13 -2266412421 2394 13 -3578084641 999 13 -1815527854 997 13 -88237530 1021 13 -2590276805 1107 13 -4038003146 969 13 -2098790741 1100 13 -3043936362 915 13 -2752487837 1188 13 -3179933358 1038 13 -3864631119 957 13 -3574107262 785 13 -3877537595 1043 13 -3757103806 1006 13 -3447995105 1256 13 -3817726519 1157 13 -3736008280 951 13 -1138529311 830 13 -2808777233 1168 13 -4073084305 1251 13 -779953231 1059 13 -387114700 1493 13 -1551016954 1543 13 -2862424906 1865 13 -1183149070 1638 13 -229452541 1447 13 -2258929346 1546 13 -835807606 1417 13 -3458471976 1620 13 -294093196 1337 13 -1208980554 1266 13 -3959925998 1341 13 -3081657503 1212 13 -1352211515 1416 13 -3183054240 1198 13 -2081860774 1246 13 -2426300866 1194 13 -3338281372 1366 13 -1727554154 1365 13 -2167755494 1317 13 -901986643 1302 13 -3776414648 1479 13 -994459809 1338 13 -985427076 1479 13 -4187027136 1674 13 -2054991260 1292 13 -2447317768 761 13 -3990481177 1080 13 -3432161830 1585 13 -1459350126 1560 13 -1116660049 591 13 -210083529 5951 13 -440009097 5191 13 -274932304 2411 13 -1687909996 6835 13 -1320358383 2520 13 -2756350560 1431 13 -1389479088 3011 13 -3216388483 804 13 -3750366589 6035 13 -3192901537 409 13 -290011697 17042 13 -4094449100 20102 13 -2749619686 16829 13 -852093675 13959 13 -3660782595 11544 13 -2040545018 10721 13 -3688084506 14675 13 -3213345845 12920 13 -1649182198 18850 13 -4074113234 25757 13 -3376699113 11119 13 -1759518318 6710 13 -4282077491 3783 13 -50617862 1198 13 -3360204330 1396 13 -1593313328 1090 13 -299290642 1007 13 -626236774 844 13 -874282412 849 13 -4178652911 875 13 -485106180 1209 13 -1547289433 963 13 -3854564980 945 13 -2031689692 1069 13 -3576575130 964 13 -1774026540 876 13 -2731230180 952 13 -1194563574 796 13 -2021580690 879 13 -2746817610 949 13 -3559739251 850 13 -3544315329 1178 13 -1052073627 1078 13 -3505097484 1079 13 -180625815 965 13 -2104211249 1040 13 -1441160327 1065 13 -3945939376 1129 13 -370274851 1024 13 -711481412 1312 13 -840447793 788 13 -3451505519 882 13 -3877604264 1075 13 -830438090 1283 13 -2764297879 1510 13 -1243627180 1284 13 -3196862562 1116 13 -2373579825 6662 13 -2100855598 9967 13 -1274206954 1012 13 -304207352 7976 13 -2441654000 22139 13 -614190297 14272 13 -1181072069 30103 13 -3440517487 25849 13 -1308934667 13421 13 -771330616 19740 13 -2076792446 9595 13 -1291267012 10778 13 -2096899717 1363 13 -1277349134 999 13 -438749173 1020 13 -2731140236 1352 13 -3654717063 1126 13 -3411581499 1039 13 -2420227075 1194 13 -1223436288 1128 13 -705023929 921 13 -1716388376 1118 13 -1363892464 933 13 -3303009813 536 13 -1221600795 1282 13 -1211327181 1575 13 -761086354 1651 13 -2593664197 1512 13 -1768252647 1763 13 -3623700305 36456 13 -1668009117 3974 13 -3469697369 50184 13 +3952458608 2991 13 +4180596825 4730 13 +2603024509 3334 13 +2987787322 2764 13 +4084004272 5052 13 +3380200943 5835 13 +3819459172 4933 13 +2609290778 6978 13 +2431927993 5566 13 +3388015699 4587 13 +906055163 5322 13 +16106216 5201 13 +938852138 5889 13 +781890933 4517 13 +2195828880 4751 13 +3523227799 5593 13 +4265303115 4214 13 +3707907638 3872 13 +2009298664 3506 13 +575874746 2966 13 +3506339302 3311 13 +133480422 3178 13 +2509484477 3746 13 +3794640683 3509 13 +3351139712 3902 13 +412411444 6653 13 +4140561728 719 13 +1807076554 13352 13 +26431906 4531 13 +1375837495 3810 13 +1006039288 3279 13 +3242596812 5272 13 +1538116916 631 13 +4168295957 8466 13 +1302802656 3622 13 +2006363633 3778 13 +3472146439 1898 13 +1978981996 1409 13 +2107710363 946 13 +1074751083 4607 13 +2294917085 3226 13 +1870736336 997 13 +2712923136 1207 13 +865069572 1326 13 +1076618723 4000 13 +392408174 4879 13 +705424812 3724 13 +619334514 5147 13 +4096661586 4622 13 +973638959 3860 13 +2079979648 1674 13 +813855634 5013 13 +3099350440 3614 13 +3817261412 1435 13 +3564327096 1576 13 +4107303729 4367 13 +4162289021 4174 13 +1741989643 5474 13 +3884446162 3044 13 +2024276653 2836 13 +606221974 2617 13 +4160521530 2070 13 +3412058102 6545 13 +2615406904 20311 13 +2052931854 6469 13 +1729394907 7846 13 +34888158 5869 13 +640224006 6678 13 +993431315 5699 13 +3173126984 6846 13 +3544268353 4897 13 +3437302198 3922 13 +1390412822 5454 13 +2052678690 5281 13 +2174221159 7742 13 +2152304676 3025 13 +2926780970 1215 13 +2939177932 1894 13 +630509569 1627 13 +2885598578 2175 13 +56103701 1528 13 +3154743790 2223 13 +3141758185 721 13 +33326339 1383 13 +506330287 1111 13 +739901609 953 13 +3607068541 720 13 +1762721079 2394 13 +3075102083 999 13 +3857294159 997 13 +2391158583 1021 13 +2030790662 1107 13 +2787931618 969 13 +497408694 1100 13 +2543097490 915 13 +288387595 1188 13 +1074141573 1038 13 +3991313986 957 13 +1299252534 785 13 +3543133149 1043 13 +810433522 1006 13 +1858399967 1256 13 +1573755693 1157 13 +2085563328 951 13 +4141520346 830 13 +1554391084 1168 13 +4110243330 1251 13 +611837462 1059 13 +525523161 1493 13 +1965983515 1543 13 +1697459641 1865 13 +1104922517 1638 13 +2568973614 1447 13 +1551545644 1546 13 +1186901197 1417 13 +1440447057 1620 13 +1838299568 1337 13 +3553775430 1266 13 +1059795212 1341 13 +120735459 1212 13 +1368528429 1416 13 +986306003 1198 13 +3518197070 1246 13 +3995884515 1194 13 +903244080 1366 13 +1849844065 1365 13 +3327649755 1317 13 +2203743065 1302 13 +3983327727 1479 13 +3890386091 1338 13 +2515113501 1479 13 +2423315224 1674 13 +290209643 1292 13 +3193944903 761 13 +2998511305 1080 13 +3381557804 1585 13 +1661168854 1560 13 +2069049030 591 13 +3529664844 5951 13 +1312284188 5191 13 +291743233 2411 13 +1812966710 6835 13 +2174636846 2520 13 +670174851 1431 13 +1932873654 3011 13 +1597361638 804 13 +59598318 6035 13 +2269520878 409 13 +3498142390 17042 13 +3482711408 20102 13 +2563397332 16829 13 +4222326415 13959 13 +660985568 11544 13 +1423512806 10721 13 +1306074334 14675 13 +258826600 12920 13 +449475310 18850 13 +2350922497 25757 13 +2366256303 11119 13 +215614198 6710 13 +3667537775 3783 13 +993300258 1198 13 +1362805935 1396 13 +1918043544 1090 13 +2041784928 1007 13 +135142202 844 13 +166618355 849 13 +3881577619 875 13 +791862182 1209 13 +3315318989 963 13 +1656545570 945 13 +1737403637 1069 13 +1639795272 964 13 +1019142940 876 13 +890751442 952 13 +4221374085 796 13 +2222705165 879 13 +1270008051 949 13 +1104304484 850 13 +25904953 1178 13 +3914112955 1078 13 +276099111 1079 13 +613649907 965 13 +1919857326 1040 13 +3108793425 1065 13 +3793294114 1129 13 +3298286411 1024 13 +3392674745 1312 13 +851560912 788 13 +2787928750 882 13 +2738467249 1075 13 +318840841 1283 13 +2107257226 1510 13 +4289550963 1284 13 +2203787076 1116 13 +3730976554 6662 13 +542597751 9967 13 +3736706221 1012 13 +2178581706 7976 13 +388767019 22139 13 +596444352 14272 13 +574814969 30103 13 +3437068797 25849 13 +1092599606 13421 13 +2176314925 19740 13 +2963402358 9595 13 +2090815261 10778 13 +828250602 1363 13 +3843542618 999 13 +2283523746 1020 13 +4276895247 1352 13 +77922060 1126 13 +1227103602 1039 13 +3998858903 1194 13 +3025965497 1128 13 +1089585828 921 13 +2356047571 1118 13 +1737155699 933 13 +3073072316 536 13 +743724141 1282 13 +3453913025 1575 13 +230427021 1651 13 +955324241 1512 13 +3665569555 1763 13 +3726957448 36456 13 +1694611063 3974 13 +4077357517 50184 13
Change 1 of 1 Show Entire File test/​tiny.out Stacked
 
1
2
3
4
5
 
 
 
1
2
3
 
 
4
5
@@ -1,5 +1,5 @@
 MG CRC V T BYTES N R BO  41 92408083 1 0 85 2 3 32  CRC BYTES RSRC2 -1488783105 11 1 -3671375795 48 1 +2357529937 11 1 +2300437156 48 1
Change 1 of 1 Show Entire File test/​two.out Stacked
 
1
2
3
4
5
 
 
 
1
2
3
 
 
4
5
@@ -1,5 +1,5 @@
 MG CRC V T BYTES N R BO  41 2403490936 1 0 11592 2 3 32  CRC BYTES RSRC2 -2215502427 6189 13 -1763768720 5353 13 +296340404 6189 13 +4097792306 5353 13
Change 1 of 4 Show Entire File tok.c Stacked
 
38
39
40
41
 
 
42
43
44
 
51
52
53
54
 
55
56
57
 
62
63
64
65
 
 
66
67
68
69
70
 
71
72
 
73
74
 
 
75
76
77
78
79
80
81
82
83
84
85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
87
 
88
89
90
 
124
125
126
127
128
129
130
 
 
 
131
132
 
38
39
40
 
41
42
43
44
45
 
52
53
54
 
55
56
57
58
 
63
64
65
 
66
67
68
69
70
71
 
72
73
 
74
75
 
76
77
78
79
80
81
 
 
 
 
 
 
 
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
 
142
143
144
 
 
 
 
145
146
147
148
149
@@ -38,7 +38,8 @@
  Tokdoc *d;   d = (Tokdoc *)malloc(sizeof(Tokdoc));   d->header = _newtokdocheader(type); - d->buf = newbuffer(10*KB, CHARBUF); + d->doc = newbuffer(10*KB, CHARBUF); + d->id = newbuffer(100, CHARBUF);   return d;  }   @@ -51,7 +52,7 @@
  return t;  }   -int writetok(Tok *tok, FILE *fp) +int write_t(Tok *tok, FILE *fp)  {   /* TODO: test n */   int n; @@ -62,29 +63,46 @@
  tokdoc = p->data;   n = fwrite(tokdoc->header->block,   4, tok->header->block[NUMRES], fp); - n = fwrite(tokdoc->buf->b, 1, tokdoc->buf->i, fp); + n = fwrite(tokdoc->doc->b, 1, tokdoc->header->block[RES1DOCLEN], fp); + n = fwrite(tokdoc->id->b, 1, tokdoc->header->block[RES2IDLEN], fp);   }   return n;  }   -int readtok(Tok *tok, FILE *fp) +int read_t(Tok *tok, FILE *fp)  { - int i, j, n, docsize; + int i, j, n, toread;   Tokdoc *tokdoc; - n = fread(tok->header->block, 4, 8, fp); + n = fread(tok->header->block, 4, TOK_HEADER_BLOCKS, fp); +   for (i = 0; i < tok->header->block[NUMDOCS]; i++) {   tokdoc = newtokdoc(TREC_ADHOC);   n = fread(tokdoc->header->block,   4, tok->header->block[NUMRES], fp); - docsize = 0; - for (j = 1; j < tok->header->block[NUMRES]; j++) - docsize += tokdoc->header->block[j]; - if (docsize > tokdoc->buf->n) - tokdoc->buf = resizebuffer(tokdoc->buf, docsize); - n = fread(tokdoc->buf->b, 1, docsize, fp); - shiftpointer(tokdoc->buf, docsize); + + /* read the doc text */ + toread = tokdoc->header->block[RES1DOCLEN]; + if (toread > tokdoc->doc->n) + tokdoc->doc = resizebuffer(tokdoc->doc, toread); + if((n = fread(tokdoc->doc->b, 1, toread, fp)) != toread) { + printf("read %d of %d bytes\n", n, toread); + printf("ABORT: failed to read doc\n"); + exit(0); + } + shiftpointer(tokdoc->doc, n); + + /* read the doc id */ + toread = tokdoc->header->block[RES2IDLEN]; + if((n = fread(tokdoc->id->b, 1, toread, fp)) != toread) { + printf("read %d of %d bytes\n", n, toread); + printf("ABORT: failed to read docid\n"); + exit(0); + } + shiftpointer(tokdoc->id, n); +   tok->list = addfront(tok->list, newnode((void *)tokdoc));   } +   return n;  }   @@ -124,9 +142,8 @@
  tokdoc = p->data;   _printtokdocheader(tokdoc->header, tok->header->block[NUMRES]);   /* Avoid printing a huge doc */ - /* - ((char *)tokdoc->buf->b)[tokdoc->buf->i] = '\0'; - printf(":%s:\n", tokdoc->buf->b); - */ + /* ((char *)tokdoc->doc->b)[tokdoc->header->block[RES1DOCLEN]] = '\0'; */ + /* ((char *)tokdoc->id->b)[tokdoc->header->block[RES2IDLEN]] = '\0'; */ + /* printf("%s: %s\n", tokdoc->id->b, tokdoc->doc->b); */   }  }
Change 1 of 2 Show Entire File tok.h Stacked
 
19
20
21
22
 
 
23
24
25
 
31
32
33
34
35
 
 
36
37
38
 
19
20
21
 
22
23
24
25
26
 
32
33
34
 
 
35
36
37
38
39
@@ -19,7 +19,8 @@
   struct Tokdoc {   Tokdocheader *header; - Buffer *buf; + Buffer *doc; + Buffer *id;  };    struct Tok { @@ -31,8 +32,8 @@
 Tokdocheader *_newtokdocheader(E_doctype);  Tokdoc *newtokdoc(E_doctype);  Tok *newtok(E_doctype); -int writetok(Tok*, FILE*); -int readtok(Tok*, FILE*); +int write_t(Tok*, FILE*); +int read_t(Tok*, FILE*);  void _printtokheader(Tokheader*);  void _printtokdocheader(Tokdocheader*, int numres);  void printtok(Tok*);