Repositories » TXT0 Read More
Clone URL:  
Pushed to one repository · View In Graph Contained in tip

tip Remove CRC computation, it does not make sense. Subsequently, remove CRC members from THeader and TDocHeader structures and references to them in functions in tfile.c. The byte sizes of headers were changed accordignly (4 bytes were dropped from each). TFile pretty-printing functions were also modified to drop CRC.

Changeset a557c5a83d29

Parent 903c968226d9

by Rup Palchowdhury

Changes to 5 files · Browse files at a557c5a83d29 Showing diff from parent 903c968226d9 Diff from another changeset...

Change 1 of 1 Show Entire File Makefile Stacked
 
17
18
19
20
21
22
23
 
17
18
19
 
20
21
22
@@ -17,7 +17,6 @@
 SRC = \   tfile.c \   txt.c \ - crc.c \   parser.c \   porter.c  
Change 1 of 10 Show Entire File parser.c Stacked
 
7
8
9
10
11
12
13
 
15
16
17
18
19
20
21
22
23
 
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
 
280
281
282
283
 
284
285
286
 
300
301
302
303
304
305
306
 
420
421
422
423
424
425
426
427
428
429
430
431
 
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
 
459
460
461
 
475
476
477
478
479
480
481
482
 
598
599
600
601
602
603
604
605
606
607
608
609
610
 
622
623
624
625
626
627
628
629
630
631
632
 
7
8
9
 
10
11
12
 
14
15
16
 
 
 
17
18
19
 
117
118
119
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
121
122
 
257
258
259
 
260
261
262
263
 
277
278
279
 
280
281
282
 
396
397
398
 
 
 
 
 
 
399
400
401
 
414
415
416
 
 
 
 
 
 
417
418
419
420
421
 
422
423
424
425
 
439
440
441
 
 
442
443
444
 
560
561
562
 
 
 
 
 
 
 
563
564
565
 
577
578
579
 
 
 
 
 
 
580
581
@@ -7,7 +7,6 @@
 #include <kak/klist.h>  #include <kak/khash.h>  #include "stemmer.h" -#include "crc.h"  #include "tfile.h"  #include "txt.h"  #include "parser.h" @@ -15,9 +14,6 @@
 const uint8_t TERM = 0x01;  const uint8_t OTAG = 0x02;  const uint8_t CTAG = 0x03; -static char *str = "width=32 poly=0x04c11db7 init=0x00000000 refin=false refout=false xorout=0xffffffff check=0x765e7680 name=\"CRC-32/POSIX\""; -static char crc_s[128]; -static model_t crc_m;    /* Tokenizer */   @@ -121,25 +117,6 @@
   /* Parser */   -static void __initcrc() -{ - int ret; - strcpy(crc_s, str); - crc_m.name = NULL; - ret = read_model(&crc_m, crc_s); - if (ret == 2) - eprintf("__initcrc() out of memory\n"); - else if (ret == 1) - eprintf("__initcrc() %s: unusable model\n", - crc_m.name == NULL ? "<no name>" : crc_m.name); - crc_table_wordwise(&crc_m); -} - -static void __deinitcrc() -{ - free(crc_m.name); - crc_m.name = NULL; -}    Parser *newparser(char type, char **cw, int c, int n, int x_min, int x_max)  { @@ -280,7 +257,7 @@
   TFile *parse(Parser *parser, FILE *fp)  { - uint32_t b, tid, did, i, l, size, stemlen, e, crc; + uint32_t b, tid, did, i, l, size, stemlen, e;   uint8_t *buf, s[MAXTERMLEN+1], *bp, docid[MAXTERMLEN+1];   TFile *tfile;   TDoc *tdoc; @@ -300,7 +277,6 @@
  e = 0;   size = MB;   - __initcrc();   tfile = newTFile();     while ((b = getdoc(&buf, parser, fp)) != 0) { @@ -420,12 +396,6 @@
  continue;   }   - /* complete TDocHeader */ - crc = crc_wordwise(&crc_m, 0, NULL, 0); - crc = crc_wordwise(&crc_m, crc, (uint8_t *)tdoc->h, BTDOCHEADER); - crc = crc_wordwise(&crc_m, crc, (uint8_t *)tdoc->txt, tdoc->h->sumu * 4); - crc = crc_wordwise(&crc_m, crc, (uint8_t *)tdoc->tf, tdoc->h->sumu * 4); - tdoc->h->crc = crc;     /* Attach TDoc to TFile->list */   tfile->list = addfront(tfile->list, newnode((void *)tdoc)); @@ -444,18 +414,12 @@
    tfile->h->b += BTHEADER + tfile->h->bt + tfile->h->bd;   - crc = crc_wordwise(&crc_m, 0, NULL, 0); - crc = crc_wordwise(&crc_m, crc, (uint8_t *)tfile->h, BTHEADER); - tfile->h->crc = crc; - - __deinitcrc(); -   return tfile;  }    TFile *parseq(THeader *h, Hash *hv, Parser *parser, FILE *fp)  { - uint32_t b, tid, did, i, l, size, stemlen, e, crc; + uint32_t b, tid, did, i, l, size, stemlen, e;   uint8_t *buf, s[MAXTERMLEN+1], *bp, docid[MAXTERMLEN+1];   TFile *tfile;   TDoc *tdoc; @@ -475,8 +439,6 @@
  e = 0;   size = MB;   - __initcrc(); -   tfile = newTFile();     /* copy the global stats of the given vocabulary block and patch the @@ -598,13 +560,6 @@
  continue;   }   - /* complete TDocHeader */ - crc = crc_wordwise(&crc_m, 0, NULL, 0); - crc = crc_wordwise(&crc_m, crc, (uint8_t *)tdoc->h, BTDOCHEADER); - crc = crc_wordwise(&crc_m, crc, (uint8_t *)tdoc->txt, tdoc->h->sumu * 4); - crc = crc_wordwise(&crc_m, crc, (uint8_t *)tdoc->tf, tdoc->h->sumu * 4); - tdoc->h->crc = crc; -   /* Attach TDoc to TFile->list */   tfile->list = addfront(tfile->list, newnode((void *)tdoc));   @@ -622,11 +577,5 @@
    tfile->h->b += BTHEADER + tfile->h->bt + tfile->h->bd;   - crc = crc_wordwise(&crc_m, 0, NULL, 0); - crc = crc_wordwise(&crc_m, crc, (uint8_t *)tfile->h, BTHEADER); - tfile->h->crc = crc; - - __deinitcrc(); -   return tfile;  }
Change 1 of 1 Show Entire File t2mem.c Stacked
 
22
23
24
25
 
26
27
28
 
22
23
24
 
25
26
27
28
@@ -22,7 +22,7 @@
    tfile = newTFile();   - receive(stdin, tfile); + readTFile(stdin, tfile);   printTFile(tfile, stdout);     fpv = fopen("vocab.txt", "w");
Change 1 of 8 Show Entire File tfile.c Stacked
 
17
18
19
20
21
22
23
 
35
36
37
38
39
40
41
 
115
116
117
118
119
 
 
120
121
122
123
124
 
 
125
126
127
 
129
130
131
132
133
 
 
134
135
136
137
138
 
 
139
140
141
 
394
395
396
397
398
 
 
399
400
401
 
424
425
426
427
428
 
 
429
430
431
 
440
441
442
443
444
 
 
445
446
447
 
495
496
497
498
499
 
 
500
501
502
 
17
18
19
 
20
21
22
 
34
35
36
 
37
38
39
 
113
114
115
 
 
116
117
118
119
120
 
 
121
122
123
124
125
 
127
128
129
 
 
130
131
132
133
134
 
 
135
136
137
138
139
 
392
393
394
 
 
395
396
397
398
399
 
422
423
424
 
 
425
426
427
428
429
 
438
439
440
 
 
441
442
443
444
445
 
493
494
495
 
 
496
497
498
499
500
@@ -17,7 +17,6 @@
  h = (THeader *)emalloc(sizeof(THeader));   h->type = 0x01;   h->bo = THEADER; - h->crc = 0;   h->b = 0;   h->nt = 0;   h->nd = 0; @@ -35,7 +34,6 @@
  h = (TDocHeader *)emalloc(sizeof(TDocHeader));   h->type = 0x02;   h->bo = TDOCHEADER; - h->crc = 0;   h->id = 0;   h->sumb = 0;   h->sumu = 0; @@ -115,13 +113,13 @@
 void _printTHeader(THeader *h, FILE *stream)  {   fprintf(stream, - "%-2s %-2s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s\n", - "T", "BO", "CRC", "B", + "%-2s %-2s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s\n", + "T", "BO", "B",   "NT", "BT", "ND", "BD",   "SUMB", "SUMU", "SUMTF");   fprintf(stream, - "%-2u %-2u %-10u %-10u %-10u %-10u %-10u %-10u %-10u %-10u %-10u\n", - h->type, h->bo, h->crc, h->b, + "%-2u %-2u %-10u %-10u %-10u %-10u %-10u %-10u %-10u %-10u\n", + h->type, h->bo, h->b,   h->nt, h->bt, h->nd, h->bd,   h->sumb, h->sumu, h->sumtf);  } @@ -129,13 +127,13 @@
 void _printTDocHeader(TDocHeader *h, FILE *stream)  {   fprintf(stream, - "%-2s %-2s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-16s\n", - "T", "BO", "CRC", "ID", + "%-2s %-2s %-10s %-10s %-10s %-10s %-10s %-10s %-16s\n", + "T", "BO", "ID",   "SUMB", "SUMU", "SUMTF", "MAXTF",   "SUMSQTF", "SUMSQLOGTF");   fprintf(stream, - "%-2u %-2u %-10u %-10u %-10u %-10u %-10u %-10u %-10u %+1.8le\n", - h->type, h->bo, h->crc, h->id, + "%-2u %-2u %-10u %-10u %-10u %-10u %-10u %-10u %+1.8le\n", + h->type, h->bo, h->id,   h->sumb, h->sumu, h->sumtf, h->maxtf,   h->sumsqtf, h->sumsqlogtf);  } @@ -394,8 +392,8 @@
   int packTHeader(uchar *buf, THeader *h)  { - return _pack(buf, "cciiiiiiiii", - h->type, h->bo, h->crc, h->b, + return _pack(buf, "cciiiiiiii", + h->type, h->bo, h->b,   h->nt, h->nd, h->bt, h->bd,   h->sumb, h->sumu, h->sumtf);  } @@ -424,8 +422,8 @@
   int packTDocHeader(uchar *buf, TDocHeader *h)  { - return _pack(buf, "cciiiiiiif", - h->type, h->bo, h->crc, h->id, + return _pack(buf, "cciiiiiif", + h->type, h->bo, h->id,   h->sumb, h->sumu, h->sumtf, h->maxtf,   h->sumsqtf, h->sumsqlogtf);  } @@ -440,8 +438,8 @@
 int unpackTHeader(THeader *h, int n, uint8_t *buf)  {   int m; - if ((m = _unpack(buf, "cciiiiiiiii", - &h->type, &h->bo, &h->crc, &h->b, + if ((m = _unpack(buf, "cciiiiiiii", + &h->type, &h->bo, &h->b,   &h->nt, &h->nd, &h->bt, &h->bd,   &h->sumb, &h->sumu, &h->sumtf)) != n)   return -1; @@ -495,8 +493,8 @@
 int unpackTDocHeader(TDocHeader *h, int n, uint8_t *buf)  {   int m; - if ((m = _unpack(buf, "cciiiiiiif", - &h->type, &h->bo, &h->crc, &h->id, + if ((m = _unpack(buf, "cciiiiiif", + &h->type, &h->bo, &h->id,   &h->sumb, &h->sumu, &h->sumtf, &h->maxtf,   &h->sumsqtf, &h->sumsqlogtf)) != n)   return -1;
Change 1 of 3 Show Entire File tfile.h Stacked
 
1
2
3
4
5
 
 
 
 
6
7
8
9
 
 
10
11
12
 
18
19
20
21
22
23
24
 
32
33
34
35
36
37
38
 
1
 
 
 
 
2
3
4
5
6
7
 
 
8
9
10
11
12
 
18
19
20
 
21
22
23
 
31
32
33
 
34
35
36
@@ -1,12 +1,12 @@
 #define NTERM 10000 -#define BTHEADER 38 /* packed bytes for THeader; 8 elements; 1*2 + 4*9 */ -#define THEADER 38 /* bytes in memory */ -#define BTDOCHEADER 46 /* packed bytes for TDocHeader; 10 elements; 1*2 + 4*7 + 16*1 */ -#define TDOCHEADER 38 /* bytes in memory; 1*2 + 4*7 + 8*1 */ +#define BTHEADER 34 /* packed bytes for THeader; 1*2 + 4*8 */ +#define THEADER 34 /* bytes in memory */ +#define BTDOCHEADER 42 /* packed bytes for TDocHeader; 1*2 + 4*6 + 16*1 */ +#define TDOCHEADER 34 /* bytes in memory; 1*2 + 4*6 + 8*1 */    #define BFLOAT 16 /* a double formatted as +1.12345678e+01, using - * +1.8e is 15 bytes long. One extra byte for - * 3-digit exponent. (Why 3?)*/ + * format specifier +1.8e is 15 bytes long. One + * extra byte for 3-digit exponent. (Why 3?)*/    typedef unsigned char uchar;   @@ -18,7 +18,6 @@
 struct THeader {   uint8_t type;   uint8_t bo; - uint32_t crc;   uint32_t b;   uint32_t nt;   uint32_t nd; @@ -32,7 +31,6 @@
 struct TDocHeader {   uint8_t type;   uint8_t bo; - uint32_t crc;   uint32_t id;   uint32_t sumb;   uint32_t sumu;