Repositories » TXT0
Clone URL:  
Pushed to one repository · View In Graph Contained in tip

The first resource in a TDoc of type 'TREC' is the term-count of the
'txt' portion. Serializing integers require more work, so the

TDoc->rsrc[0]->b is written to with sprintf() to get the string
representation of an integer. This wastes space but works for the time
being.

Changeset dd8ba384c059

Parent 4720e0fa4df5

by Rup Palchowdhury

Changes to 2 files · Browse files at dd8ba384c059 Showing diff from parent 4720e0fa4df5 Diff from another changeset...

Change 1 of 1 Show Entire File parser.c Stacked
 
46
47
48
49
 
 
 
 
50
51
52
53
54
55
56
57
 
46
47
48
 
49
50
51
52
53
54
55
56
 
57
58
59
@@ -46,12 +46,14 @@
    /* fill TDoc->rsrc[i], TDoc->txt and TDoc->id   * are complete */ - memcpy(tdoc->rsrc[0], &n_term, sizeof(uint32_t)); + + tdoc->h->n_rsrc[0] = sprintf(tdoc->rsrc[0]->b, "%u", n_term); + if (tdoc->h->n_rsrc[0] == -1) + tdoc->h->n_rsrc[0] = 0;   n_term = 0;     /* fill remaining parts of TDoc->TSubHeader */   tdoc->h->n_txt = tdoc->txt->i; - tdoc->h->n_rsrc[0] = sizeof(uint32_t);   crc = crc_wordwise(crc_model, 0, NULL, 0);   crc = crc_wordwise(crc_model, crc, (unsigned char *)tdoc->txt->b,   tdoc->h->n_txt);
Change 1 of 10 Show Entire File tfile.c Stacked
 
95
96
97
98
 
99
100
101
 
127
128
129
130
131
 
 
132
133
134
 
139
140
141
142
143
144
 
 
145
146
147
148
149
150
151
152
 
 
 
153
154
155
 
157
158
159
160
161
 
 
162
163
164
165
166
 
167
168
169
 
175
176
177
178
 
179
180
181
 
209
210
211
212
213
 
 
214
215
216
 
221
222
223
224
225
226
227
228
 
229
230
231
232
233
234
235
236
237
238
 
239
240
241
 
244
245
246
247
248
249
250
251
 
 
 
 
 
252
253
254
255
256
 
257
258
259
 
280
281
282
283
 
284
285
286
287
288
 
289
290
291
 
293
294
295
 
 
296
297
298
299
 
300
301
 
302
303
 
304
305
306
307
308
309
310
311
 
 
 
 
312
313
314
315
316
317
318
319
 
 
 
 
 
 
320
321
322
 
95
96
97
 
98
99
100
101
 
127
128
129
 
 
130
131
132
133
134
 
139
140
141
 
 
 
142
143
144
145
146
147
 
 
 
 
148
149
150
151
152
153
 
155
156
157
 
 
158
159
160
161
162
163
 
164
165
166
167
 
173
174
175
 
176
177
178
179
 
207
208
209
 
 
210
211
212
213
214
 
219
220
221
 
222
223
224
 
225
226
227
228
229
230
 
231
232
233
 
234
235
236
237
 
240
241
242
 
 
 
 
 
243
244
245
246
247
248
249
250
251
 
252
253
254
255
 
276
277
278
 
279
280
281
282
283
 
284
285
286
287
 
289
290
291
292
293
294
295
296
 
297
298
 
299
300
 
301
302
303
 
 
 
 
 
304
305
306
307
308
309
 
 
 
 
 
 
 
310
311
312
313
314
315
316
317
318
@@ -95,7 +95,7 @@
 int writeTFile(TFile *tfile, FILE *fp)  {   uint16_t v, check; - int i, n; + int i, j, n;   Node *p;   TDoc *tdoc;   @@ -127,8 +127,8 @@
  n = fwrite(&tdoc->h->n_txt, sizeof(uint32_t), 1, fp); v |= n; v <<= 1;   n = fwrite(&tdoc->h->n_id, sizeof(uint32_t), 1, fp); v |= n;   if (tdoc->h->r > 0) { - for (i = 0; i < tdoc->h->r; i++) { - n = fwrite(&tdoc->h->n_rsrc[i], sizeof(uint32_t), 1, fp); + for (j = 0; j < tdoc->h->r; j++) { + n = fwrite(&tdoc->h->n_rsrc[j], sizeof(uint32_t), 1, fp);   v <<= 1; v |= n;   check <<=1; check |= 1;   } @@ -139,17 +139,15 @@
  }     /* write the TDoc->txt */ - v = 0; - n = fwrite(tdoc->txt->b, tdoc->h->n_txt, 1, fp); v |= n; - if (v != 0x0001) { /* 0000 0000 0000 0001 */ + n = fwrite(tdoc->txt->b, tdoc->h->n_txt, 1, fp); + if (n != 1) {   fprintf(stderr, "ERROR: failed to write TDoc->txt\n");   return 0;   }   - /* write the TDoc->id */ - v = 0; - n = fwrite(tdoc->id->b, tdoc->h->n_id, 1, fp); v |= n; - if (v != 0x0001) { /* 0000 0000 0000 0001 */ + /* write the TDoc->id */ + n = fwrite(tdoc->id->b, tdoc->h->n_id, 1, fp); + if (n != 1) {   fprintf(stderr, "ERROR: failed to write TDoc->id\n");   return 0;   } @@ -157,13 +155,13 @@
  if (tdoc->h->r > 0) {   v = 0;   check = 0x0000; - for (i = 0; i < tdoc->h->r; i++) { - n = fwrite(tdoc->rsrc[i]->b, tdoc->h->n_rsrc[i], 1, fp); + for (j = 0; j < tdoc->h->r; j++) { + n = fwrite(tdoc->rsrc[j]->b, tdoc->h->n_rsrc[j], 1, fp);   v <<= 1; v |= n;   check <<= 1; check |= n;   }   if (v != check) { /* the lower tdoc->h->r bits of 'check' were set */ - fprintf(stderr, "ERROR: failed to write a TDoc->rsrc[i]\n"); + fprintf(stderr, "ERROR: failed to write a TDoc->rsrc[j]\n");   return 0;   }   } @@ -175,7 +173,7 @@
 TFile* readTFile(FILE *fp)  {   uint16_t v, check; - int i, n; + int i, j, n;   TFile *tfile;   TDoc *tdoc;   @@ -209,8 +207,8 @@
  n = fread(&tdoc->h->n_txt, sizeof(uint32_t), 1, fp); v |= n; v <<= 1;   n = fread(&tdoc->h->n_id, sizeof(uint32_t), 1, fp); v |= n;   if (tdoc->h->r > 0) { - for (i = 0; i < tdoc->h->r; i++) { - n = fread(&tdoc->h->n_rsrc[i], sizeof(uint32_t), 1, fp); + for (j = 0; j < tdoc->h->r; j++) { + n = fread(&tdoc->h->n_rsrc[j], sizeof(uint32_t), 1, fp);   v <<= 1; v |= n;   check <<=1; check |= 1;   } @@ -221,21 +219,19 @@
  }     /* fill TDoc->txt */ - v = 0;   if (tdoc->txt->n < tdoc->h->n_txt)   tdoc->txt = resizebuffer(tdoc->txt, tdoc->h->n_txt);   n = fread(tdoc->txt->b, tdoc->h->n_txt, 1, fp); - if (v != 0x0001) { /* 0000 0000 0000 0001 */ + if (n != 1) {   fprintf(stderr, "ERROR: failed to read TDoc->txt\n");   return NULL;   }     /* fill TDoc->id */ - v = 0;   if (tdoc->id->n < tdoc->h->n_id)   tdoc->id = resizebuffer(tdoc->id, tdoc->h->n_id);   n = fread(tdoc->id->b, tdoc->h->n_id, 1, fp); - if (v != 0x0001) { /* 0000 0000 0000 0001 */ + if (n != 1) {   fprintf(stderr, "ERROR: failed to read TDoc->id\n");   return NULL;   } @@ -244,16 +240,16 @@
  if (tdoc->h->r > 0) {   v = 0;   check = 0x0000; - for (i = 0; i < tdoc->h->r; i++) { - if (tdoc->rsrc[i]->n < tdoc->h->n_rsrc[i]) - tdoc->rsrc[i] = resizebuffer(tdoc->rsrc[i], - tdoc->h->n_rsrc[i]); - n = fread(tdoc->rsrc[i]->b, tdoc->h->n_rsrc[i], 1, fp); + for (j = 0; j < tdoc->h->r; j++) { + if (tdoc->rsrc[j]->n < tdoc->h->n_rsrc[j]) + tdoc->rsrc[j] = resizebuffer(tdoc->rsrc[j], + tdoc->h->n_rsrc[j]); + n = fread(tdoc->rsrc[j]->b, tdoc->h->n_rsrc[j], 1, fp);   v <<= 1; v |= n;   check <<= 1; check |= n;   }   if (v != check) { /* the lower tdoc->h->r bits of 'check' were set */ - fprintf(stderr, "ERROR: failed to read a TDoc->rsrc[i]\n"); + fprintf(stderr, "ERROR: failed to read a TDoc->rsrc[j]\n");   return 0;   }   } @@ -280,12 +276,12 @@
  static int flag = 1;   if (flag) {   flag = 0; - printf("%-10s %-10s %-10s %-10s ", "CRC", "TXT", "ID", "R"); + printf("%-10s %-10s %-3s %-2s ", "CRC", "TXT", "ID", "R");   for (i = 0; i < h->r; i++)   printf("RSRC%-6u ", i);   printf("\n");   } - printf("%-10u %-10u %-10u %-10u", h->crc, h->n_txt, h->n_id, h->r); + printf("%-10u %-10u %-3u %-2u ", h->crc, h->n_txt, h->n_id, h->r);   for (i = 0; i < h->r; i++)   printf("%-10u ", h->n_rsrc[i]);   printf("\n"); @@ -293,30 +289,30 @@
   void _printTDoc(TDoc *tdoc)  { + int i; +   _printTSubHeader(tdoc->h);     /* print the doc ID */ - /* +   ((char *)tdoc->id->b)[tdoc->h->n_id] = '\0'; - printf("tdoc->id:\n"); + printf("tdoc->id: ");   printf("%s\n", tdoc->id->b); - */ +     /* print the tokenized doc text */ - /* - ((char *)tdoc->txt->b)[tdoc->h->n_txt] = '\0'; - printf("tdoc->txt:\n"); - printf("%s\n", tdoc->txt->b); - */   + ((char *)tdoc->txt->b)[tdoc->h->n_txt] = '\0'; + printf("tdoc->txt: "); + printf("%s\n", tdoc->txt->b); +   /* print the resource blocks */ - /* - for (i = 0; i < tdoc->h->r; i++) { - ((char *)tdoc->rsrc[i]->b)[tdoc->h->n_rsrc[i]] = '\0'; - printf("tdoc->rsrc[%d]:\n", i); - printf("%s\n", tdoc->rsrc[i]->b); - } - */ + + for (i = 0; i < tdoc->h->r; i++) { + ((char *)tdoc->rsrc[i]->b)[tdoc->h->n_rsrc[i]] = '\0'; + printf("tdoc->rsrc[%d]: ", i); + printf("%s\n", tdoc->rsrc[i]->b); + }  }    void printTFile(TFile *tfile)