Repositories » TXT0 Read More
Clone URL:  
Pushed to one repository · View In Graph Contained in tip

Refactored and added error handling.

Changeset 7aa7a5f55649

Parent 4cc65e232f6a

by Rup Palchowdhury

Changes to 23 files · Browse files at 7aa7a5f55649 Showing diff from parent 4cc65e232f6a Diff from another changeset...

Change 1 of 1 Show Entire File .hgignore Stacked
 
5
6
7
8
 
 
 
 
 
5
6
7
 
 
8
9
10
@@ -5,4 +5,6 @@
 .d/*  log/*  *.t -*.ii \ No newline at end of file
+*.ii +attic/* +test/attic/*
Change 1 of 4 Show Entire File Makefile Stacked
 
17
18
19
 
20
21
22
23
24
25
26
 
27
28
29
30
31
32
33
 
34
35
36
 
41
42
43
44
45
46
47
48
49
 
66
67
68
69
 
70
71
72
 
75
76
77
78
 
 
17
18
19
20
21
22
 
 
 
 
 
23
24
25
26
 
 
 
 
27
28
29
30
 
35
36
37
 
 
 
38
39
40
 
57
58
59
 
60
61
62
63
 
66
67
68
 
69
@@ -17,20 +17,14 @@
 SRC = \   tokenizer.c \   tfile.c \ + txt.c \   parser.c \   crc.c \ - porter.c \ - post.c \ - term.c \ - doc.c \ - query.c + porter.c    OBJ = $(patsubst %.c,$(O)/%.o,$(SRC))   -all: raw2t t2mem ii1 - -qparse: $(O)/qparse.o $(OBJ) - $(CC) $(CFLAGS) $(INCLUDES) $(LDFLAGS) -o $@ $^ $(LIBS) +all: raw2t t2mem ii    raw2t: $(O)/raw2t.o $(OBJ)   $(CC) $(CFLAGS) $(INCLUDES) $(LDFLAGS) -o $@ $^ $(LIBS) @@ -41,9 +35,6 @@
 ii: $(O)/ii.o $(OBJ)   $(CC) $(CFLAGS) $(INCLUDES) $(LDFLAGS) -o $@ $^ $(LIBS)   -ii1: $(O)/ii1.o $(OBJ) - $(CC) $(CFLAGS) $(INCLUDES) $(LDFLAGS) -o $@ $^ $(LIBS) -  $(O)/%.o: %.c $(DEPDIR)/%.d   $(CC) $(DEPFLAGS) $(CFLAGS) $(INCLUDES) -c $< -o $@   $(POSTCOMPILE) @@ -66,7 +57,7 @@
   .PHONY: clean  clean: - rm -f $(O)/*.o $(DEPDIR)/*.d raw2t t2mem + rm -f $(O)/*.o $(DEPDIR)/*.d raw2t t2mem ii    .PHONY: parsetest  parsetest: @@ -75,4 +66,4 @@
   .PHONY: iitest  iitest: - ./ii1 -s test/alice_query.t <test/alice.t | diff -q - test/alice.res + ./ii -s test/alice_query.t <test/alice.t | sort -k1,1 -k3,3nr | diff -q - test/alice.rank
Change 1 of 1 Show Entire File README Stacked
 
11
12
13
14
 
15
16
17
 
11
12
13
 
14
15
16
17
@@ -11,7 +11,7 @@
 Build inverted index from tokenized corpus and search (-s) using  queries.   - ./ii1 -s q.t <d.t >res + ./ii -s q.t <d.t >res    Rank the search result.  
Change 1 of 1 Show Entire File doc.c Stacked
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
@@ -1,52 +0,0 @@
-#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include "doc.h" - -Doc *newdoc(char *id, uint32_t n_term, uint32_t n_uterm, uint32_t n_byte) -{ - Doc *d; - d = (Doc *)malloc(sizeof(Doc)); - d->id = strdup(id); - d->n_term = n_term; - d->n_uterm = n_uterm; - d->n_byte = n_byte; - return d; -} - -void freedoc(void *d) -{ - if (d == NULL) - return; - free(((Doc *)d)->id); - free(d); -} - -int p_cmpdoc(void *d1, void *d2) -{ - if (((Doc *)d1)->id == ((Doc *)d2)->id) - return 0; - return 1; -} - -int cmpdoc(void *d1, void *d2) -{ - return strcmp(((Doc *)d1)->id, ((Doc *)d2)->id); -} - -unsigned hashdoc(void *data, unsigned hsize) -{ - static unsigned MULTIPLIER = 31; - unsigned h; - h = 0; - for (char *p = ((Doc *)data)->id; *p != '\0'; p++) - h = MULTIPLIER * h + *p; - return h % hsize; -} - -void fprintfdoc(FILE *stream, void *data) -{ - Doc *d; - d = (Doc *)data; - fprintf(stream, " %s:%u:%u:%u", d->id, d->n_term, d->n_uterm, d->n_byte); -}
Change 1 of 1 Show Entire File doc.h Stacked
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
@@ -1,15 +0,0 @@
-typedef struct Doc Doc; - -struct Doc { - char *id; - uint32_t n_term; - uint32_t n_uterm; - uint32_t n_byte; -}; - -Doc *newdoc(char*, uint32_t, uint32_t, uint32_t); -void freedoc(void*); -int cmpdoc(void*, void*); -unsigned hashdoc(void*, unsigned); -void fprintdoc(FILE*, void*); -
Change 1 of 9 Show Entire File ii.c Stacked
 
2
3
4
 
5
6
7
8
9
10
11
12
13
14
 
15
16
17
 
108
109
110
111
112
113
114
115
 
167
168
169
 
170
171
172
 
179
180
181
182
183
184
185
 
187
188
189
190
191
 
 
192
193
 
194
195
196
197
198
199
200
 
201
202
203
 
205
206
207
208
 
209
210
211
212
213
214
215
216
 
 
 
 
 
 
 
 
 
217
218
219
 
241
242
243
244
 
245
246
247
 
262
263
264
265
 
 
 
 
 
266
267
268
 
293
294
295
 
296
297
 
2
3
4
5
6
 
7
8
 
 
9
10
 
 
11
12
13
14
 
105
106
107
 
 
108
109
110
 
162
163
164
165
166
167
168
 
175
176
177
 
178
179
180
 
182
183
184
 
 
185
186
187
 
188
189
190
191
192
193
194
 
195
196
197
198
 
200
201
202
 
203
204
 
 
205
206
207
 
 
208
209
210
211
212
213
214
215
216
217
218
219
 
241
242
243
 
244
245
246
247
 
262
263
264
 
265
266
267
268
269
270
271
272
 
297
298
299
300
301
302
@@ -2,16 +2,13 @@
 #include <stdlib.h>  #include <string.h>  #include <assert.h> +#include <kak/eprintf.h>  #include <kak/kcommon.h> -#include <kak/klog.h>  #include <kak/klist.h>  #include <kak/khash.h> -#include "post.h" -#include "term.h"  #include "tokenizer.h"  #include "tfile.h" -#include "doc.h" -#include "query.h" +#include "txt.h"    enum {NTERMS = 100003, NDOCS = 100003, NHASH = 4093};  /* enum {NTERMS = 10007, NDOCS = 4093, NHASH = 4093}; */ @@ -108,8 +105,6 @@
    for(int i = 1; i <= h->n; i++) { /* for each doc */   - fprintf(stderr, "\r%d/%d", i, h->n); -   tdoc = newTDoc(TREC);     if ((tdoc = readTDoc(tdoc, fp, TREC)) == NULL) @@ -167,6 +162,7 @@
  free(term_);   }   freeTDoc(tdoc, TREC); + fprintf(stderr, "\rindexed: %d/%d", i, h->n);   }   _freeTHeader(h);   fprintf(stderr, "\n"); @@ -179,7 +175,6 @@
  Post *post, *post_, *post_t, *post_d;   Term *term, term__;   Node *nppost, *nppost_, *npterm, node__; - Post *res[NDOCS]; /*TODO: use a list in stead? */   Hash *hpost;     hpost = newhash(NDOCS, cmppost, hashpost); @@ -187,17 +182,17 @@
    for (Node *np = q->tlist; np != NULL; np = np->next) { /* for a term */   - post_t = (Post *)(np->data); - term__.s = post_t->id; + post_t = (Post *)(np->data); + term__.s = post_t->id;   node__.data = &term__; - npterm = hlookup(hterm, &node__, 0); + npterm = hlookup(hterm, &node__, 0);     if (npterm == NULL)   continue;     term = (Term *)(npterm->data);   - for (Node *np1 = term->plist; np1 != NULL; np1 = np1->next) { + for (Node *np1 = term->plist; np1 != NULL; np1 = np1->next) { /* for a doc */   post_d = (Post *)(np1->data);   post_ = newpost(post_d->id, post_d->tf * post_t->tf);   nppost_ = newnode(post_); @@ -205,15 +200,20 @@
  if (nppost != nppost_) { /* merge */   post = (Post *)(nppost->data);   post->tf += post_->tf; - free(nppost_); + freenode(nppost_, freepost);   } - else - res[n_d++] = post_;   }   }   - for (int i = 0; i < n_d; i++) - printf("%s %s %d\n", q->id, res[i]->id, res[i]->tf); + /* print result */ + for (int i = 0; i < hpost->n; i++) { + for (Node *np = hpost->tab[i]; np != NULL; np = np->next) { + post = (Post *)(np->data); + fprintf(stdout, "%s %s %d\n", q->id, post->id, post->tf); + } + } + + freehash(hpost, freepost);     /* results in a list that points to nodes in the hash table */   /* @@ -241,7 +241,7 @@
   void usage(char *progname)  { - fprintf(stderr, "usage: %s [-s query]\n", progname); + fprintf(stderr, "usage: %s [-s query] <doc.t\n", progname);   exit(1);  }   @@ -262,7 +262,11 @@
  }     Hash *hdoc, *hterm, *hq; - FILE *fpq; + FILE *fpq, *fplog; + + esetprogname(estrdup(argv[0])); + fplog = fopen(strcat(estrdup(argv[0]), "-error.log"), "w"); + esetstream(fplog);     hdoc = newhash(NDOCS, cmpdoc, hashdoc);   hterm = newhash(NTERMS, cmpterm, hashterm); @@ -293,5 +297,6 @@
  /* fprintf(stderr, "hterm[]\n"); */   /* hstats(hterm, NTERMS); */   + fclose(fplog);   return 0;  }
Change 1 of 1 Show Entire File ii1.c Stacked
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
@@ -1,295 +0,0 @@
-#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <assert.h> -#include <kak/kcommon.h> -#include <kak/klog.h> -#include <kak/klist.h> -#include <kak/khash.h> -#include "post.h" -#include "term.h" -#include "tokenizer.h" -#include "tfile.h" -#include "doc.h" - -enum {NTERMS = 100003, NDOCS = 100003}; -/* enum {NTERMS = 10007, NDOCS = 4093}; */ - -void hprint(Hash *h, unsigned size) -{ - float fill, avglen; - int c, maxlen, sumlen, chainlen; - Node *np, *np1; - Term *t; - Post *p; - c = maxlen = sumlen = 0; - for (int i = 0; i < size; i++) { - if (h->tab[i] != NULL) { - c++; - chainlen = 0; - for (np = h->tab[i]; np != NULL; np = np->next) { - t = (Term *)(np->data); - chainlen++; - printf("%s:%d ", t->s, t->df); - for (np1 = t->plist; np1 != NULL; np1 = np1->next) { - p = (Post *)(np1->data); - printf("%s:%d ", p->id, p->tf); - } - printf("\n"); - } - sumlen += chainlen; - if (chainlen > maxlen) maxlen = chainlen; - } - } - - fill = (float)c / (float)size * 100; - avglen = (float)sumlen / (float)c; - fprintf(stderr, "Fill: %5.2f%% (%d/%d)\n", fill, c, size); - fprintf(stderr, "Avg chain length: %5.2f\n", avglen); - fprintf(stderr, "Max chain length: %d\n", maxlen); - fprintf(stderr, "Node count: %d\n", sumlen); -} - -unsigned getquery(char q[100][100], FILE *fp) -{ - Tokenizer *tok; - static char *septxt = " ;,.:`'\"?!(){}[]<>~^&*_-+=#$%@|\\/"; - char buf[KB]; - unsigned n; - - if (fgets(buf, KB, fp) == NULL) { - if (!feof(fp)) - fprintf(stderr, "Error reading query file.\n"); - return 0; - } - n = 0; - tok = newtokenizer(septxt, LOWERCASE, NULL); - n = tokenize(q, buf, strlen(buf), tok); - return n; -} - -int postcmptf(const void *p1, const void *p2) -{ - return ((Post *)p1)->tf - ((Post *)p2)->tf; -} - -void bubblesort(Post *p[NDOCS], int nel, int (*cmp)(const void*, const void*)) -{ - Post *tmp; - for (int i = 0; i < nel - 1; i++) { - for (int j = i + 1; j < nel; j++) { - if ((*cmp)((const void *)(p[i]), (const void *)(p[j])) > 0) { - tmp = p[i]; - p[i] = p[j]; - p[j] = tmp; - } - } - } -} - -void buildii(Hash *hdoc, Hash *hterm, FILE *fp) -{ - int j_, pflag_, tflag_; - unsigned n_term; - char *p_txt, *ts; - THeader *h; - TDoc *tdoc; - Doc *doc; - Post *post, *post_; - Term *term, *term_; - Node *npdoc, *npdoc_, *npterm, *npterm_; - - h = _newTHeader(TREC); - - if ((h = readTHeader(h, fp)) == NULL) - exit(0); - - for(int i = 1; i <= h->n; i++) { /* for each doc */ - - fprintf(stderr, "\r%d/%d", i, h->n); - - tdoc = newTDoc(TREC); - - if ((tdoc = readTDoc(tdoc, fp)) == NULL) - exit(0); - - tdoc->id[tdoc->h->n_id] = '\0'; - sscanf(tdoc->rsrc[0], "%u", &n_term); - doc = newdoc(strdup(tdoc->id), tdoc->h->n_txt, n_term); - npdoc_ = newnode(doc); - npdoc = hlookup(hdoc, npdoc_, 1); - if (npdoc != npdoc_) { /* a repeating doc id */ - free(doc); - free(npdoc_); - } - - j_ = 0; - p_txt = tdoc->txt; - - for (int j = 0; j < tdoc->h->n_txt; j++) { /* for each char */ - - if (tdoc->txt[j] != ' ') - continue; - - /* at a term-separator */ - - ts = strndup(p_txt, j - j_); - post_ = newpost((char *)(((Doc *)(npdoc->data))->id), 1); - term_ = newterm(ts, 1, post_); - pflag_ = tflag_ = 0; - npterm_ = newnode(term_); - npterm = hlookup(hterm, npterm_, 1); /* lookup or create new */ - - if (npterm != npterm_) { /* term exists */ - tflag_ = 1; /* mark to free */ - term = (Term *)(npterm->data); - post = (Post *)(term->plist->data); - if (p_postcmp(post_, post) == 0) { /* post exists */ - post->tf++; - pflag_ = 1; /* mark to free */ - } - else { /* attach a new post */ - term->plist = addfront(term->plist, term_->plist); - term->df++; - } - } - - /* update */ - - j_ = j + 1; - p_txt = &(tdoc->txt[j + 1]); - - /* cleanup */ - - if (pflag_) { - free(post_); - free(term_->plist); - } - if (tflag_) - free(term_); - } - freeTDoc(tdoc); - } - _freeTHeader(h); - fprintf(stderr, "\n"); -} - -void retrieve(Hash *hdoc, Hash *hterm, FILE *qfp) -{ - char q[100][100]; - int n_qt, n_q, n_d; - float idf; - Post *post, *post_; - Term *term, *term_; - Node *nppost, *nppost_, *npterm, *npterm_; - Post *res[NDOCS]; - Hash *hpost; - - hpost = newhash(NDOCS, p_postcmp, posthash); - - n_q = 0; - - while ((n_qt = getquery(q, qfp)) > 0) { /* for a query */ - - for (int i = 0; i < NDOCS; i++) { - res[i] = NULL; - hpost->tab[i] = NULL; - } - - n_q++; - n_d = 0; - - for (int i = 0; i < n_qt; i++) { /* for a query-term */ - - term_ = newterm(q[i], 0, NULL); - npterm_ = newnode(term_); - npterm = hlookup(hterm, npterm_, 0); - free(npterm_); - free(term_); - if (npterm == NULL) - continue; - - term = (Term *)(npterm->data); - - for (Node *np = term->plist; np != NULL; np = np->next) { - post_ = (Post *)(np->data); - nppost_ = newnode(post_); - nppost = hlookup(hpost, nppost_, 1); - if (nppost != nppost_) { /* merge */ - post = (Post *)(nppost->data); - post->tf += post_->tf; - free(nppost_); - } - else - res[n_d++] = post_; - } - } - - - /* results in a array of Posts */ - /* for (int i = 0; i < n_d; i++) */ - /* printf("%d %s %d\n", n_q, res[i]->id, res[i]->tf); */ - - - /* FIXIT: qsort isn't working */ - /* qsort(res, n_d, sizeof(Post *), postcmptf); */ - - bubblesort(res, n_d, postcmptf); - - for (int i = 0; i < n_d; i++) - printf("%d %s %d\n", n_q, res[i]->id, res[i]->tf); - - /* results in a list that points to nodes in the hash table */ - /* - for (Node *np = reslist; np != NULL; np = np->next) { - post = (Post *)(np->data); - printf("%d %s %d\n", n_q, post->id, post->tf); - } - */ - - /*TODO: figure out a way to free memory neatly*/ - /* freelist(reslist, NULL); */ - /* reslist = NULL; */ - - /* results in a hash table */ - /* for (int i = 0; i < NDOCS; i++) { */ - /* if (hpost[i] == NULL) */ - /* continue; */ - /* for (Node *np = hpost[i]; np != NULL; np = np->next) { */ - /* post = (Post *)(np->data); */ - /* /\* post->tf /= idf; *\/ /\*FIX: post->tf is int *\/ */ - /* printf("%d %s %d\n", n_q, post->id, post->tf); */ - /* } */ - /* } */ - - } -} - -int main(void) -{ - Hash *hdoc, *hterm; - FILE *qfp; - - hdoc = newhash(NDOCS, doccmp, dochash); - hterm = newhash(NTERMS, termcmp, termhash); - - buildii(hdoc, hterm, stdin); - /* hprint(hterm, NTERMS); */ - /* hstats(hdoc, NDOCS); */ - - /* exit(0); */ - - qfp = fopen("test/qtiny.txt", "r"); - - retrieve(hdoc, hterm, qfp); - - fclose(qfp); - - /* fprintf(stderr, "\n"); */ - /* fprintf(stderr, "doctab[]\n"); */ - /* hstats(doctab, NDOCS); */ - /* fprintf(stderr, "hterm[]\n"); */ - /* hstats(hterm, NTERMS); */ - - return 0; -}
Change 1 of 5 Show Entire File parser.c Stacked
 
3
4
5
 
6
7
8
 
10
11
12
13
 
14
15
16
 
51
52
53
54
 
55
56
57
 
85
86
87
88
 
89
90
91
 
166
167
168
169
 
170
171
172
 
3
4
5
6
7
8
9
 
11
12
13
 
14
15
16
17
 
52
53
54
 
55
56
57
58
 
86
87
88
 
89
90
91
92
 
167
168
169
 
170
171
172
173
@@ -3,6 +3,7 @@
 #include <string.h>  #include <stdint.h>  #include <malloc/malloc.h> +#include <kak/eprintf.h>  #include <kak/kcommon.h>  #include <kak/klist.h>  #include <kak/khash.h> @@ -10,7 +11,7 @@
 #include "stemmer.h"  #include "tokenizer.h"  #include "tfile.h" -#include "post.h" +#include "txt.h"  #include "parser.h"    static char *str = "width=32 poly=0x04c11db7 init=0x00000000 refin=false refout=false xorout=0xffffffff check=0x765e7680 name=\"CRC-32/POSIX\""; @@ -51,7 +52,7 @@
  char s[KB];   char *cfile = "test/common_words.txt";   - p = (Parser *)malloc(sizeof(Parser)); + p = (Parser *)emalloc(sizeof(Parser));   p->type = type;     switch (type) { @@ -85,7 +86,7 @@
  while (fgets(s, KB, fp)) {   if (s[strlen(s)-1] == '\n')   s[strlen(s)-1] = '\0'; - npterm_ = newnode(strdup(s)); + npterm_ = newnode(estrdup(s));   hlookup(p->hterm, npterm_, 1);   /* assuming incoming terms are unique */   } @@ -166,7 +167,7 @@
  post->id, post->tf);   n_txt += m;   if (txtsize - n_txt < lowmem) - tdoc->txt = realloc(tdoc->txt, txtsize <<= 1); + tdoc->txt = erealloc(tdoc->txt, txtsize <<= 1);   n_uterm++;   n_term += post->tf;   }
Change 1 of 1 Show Entire File post.c Stacked
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
@@ -1,69 +0,0 @@
-#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include "post.h" - -Post *newpost(char *id, int tf) -{ - Post *p; - p = (Post *)malloc(sizeof(Post)); - p->id = strdup(id); - p->tf = tf; - return p; -} - -void freepost(void *p) -{ - if (p == NULL) - return; - free(((Post *)p)->id); - free(p); -} - -Post *newpost_s(char *id, int tf) /* shallow */ -{ - Post *p; - p = (Post *)malloc(sizeof(Post)); - p->id = id; - p->tf = tf; - return p; -} - -void freepost_s(void *p) /* shallow */ -{ - free(p); -} - -int cmppost(void *d1, void *d2) -{ - return strcmp(((Post *)d1)->id, ((Post *)d2)->id); -} - -int cmppost_p(void *d1, void *d2) -{ - if (((Post *)d1)->id == ((Post *)d2)->id) - return 0; - return 1; -} - -int cmppost_tf(const void *p1, const void *p2) -{ - return ((Post *)p1)->tf - ((Post *)p2)->tf; -} - -void fprintpost(FILE *stream, void *data) -{ - Post *p; - p = (Post *)data; - fprintf(stream, " %s:%u", p->id, p->tf); -} - -unsigned hashpost(void *data, unsigned hsize) -{ - static unsigned MULTIPLIER = 31; - unsigned h; - h = 0; - for (char *p = ((Post *)data)->id; *p != '\0'; p++) - h = MULTIPLIER * h + *p; - return h % hsize; -}
Change 1 of 1 Show Entire File post.h Stacked
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
@@ -1,16 +0,0 @@
-typedef struct Post Post; - -struct Post { - char* id; - int tf; -}; - -Post *newpost(char*, int); -void freepost(void*); -Post *newpost_s(char*, int); -void freepost_s(void*); -int cmppost(void*, void*); -int cmppost_p(void*, void*); -int cmppost_tf(const void*, const void*); -void fprintpost(FILE*, void*); -unsigned hashpost(void*, unsigned);
Change 1 of 1 Show Entire File query.c Stacked
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
@@ -1,50 +0,0 @@
-#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include "kak/klist.h" -#include "post.h" -#include "term.h" -#include "query.h" - -Query *newquery(char *id) -{ - Query *q; - q = (Query *)malloc(sizeof(Query)); - q->id = strdup(id); - q->n_term = 0; - q->n_uterm = 0; - q->tlist = NULL; - return q; -} -void freequery(void *data) -{ - if (data == NULL) - return; - free(((Query *)data)->id); - freelist(((Query *)data)->tlist, freepost); - free(data); -} - -int cmpquery(void *d1, void *d2) -{ - return strcmp(((Query *)d1)->id, ((Query *)d2)->id); -} - -unsigned hashquery(void *data, unsigned hsize) -{ - static unsigned MULTIPLIER = 31; - unsigned h; - h = 0; - for (char *p = ((Post *)data)->id; *p != '\0'; p++) - h = MULTIPLIER * h + *p; - return h % hsize; -} - -void fprintquery(FILE *stream, void *data) -{ - Query *q; - q = (Query *)data; - fprintf(stream, " %s:%u:%u", q->id, q->n_term, q->n_uterm); - for (Node *np = q->tlist; np != NULL; np = np->next) - fprintpost(stream, np->data); -}
Change 1 of 1 Show Entire File query.h Stacked
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
@@ -1,14 +0,0 @@
-typedef struct Query Query; - -struct Query { - char *id; - uint32_t n_term; - uint32_t n_uterm; - Node *tlist; -}; - -Query *newquery(char*); -void freequery(void*); -int cmpquery(void*, void*); -unsigned hashquery(void*, unsigned); -void fprintquery(FILE*, void*);
Change 1 of 3 Show Entire File raw2t.c Stacked
 
13
14
15
16
 
17
18
19
 
29
30
31
 
32
33
34
35
36
 
 
 
 
37
38
39
 
62
63
64
 
 
65
66
 
13
14
15
 
16
17
18
19
 
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
 
67
68
69
70
71
72
73
@@ -13,7 +13,7 @@
 #include <stdio.h>  #include <stdlib.h>  #include <string.h> -#include <malloc/malloc.h> +#include <kak/eprintf.h>  #include <kak/klist.h>  #include "crc.h"  #include "tokenizer.h" @@ -29,11 +29,16 @@
   int main(int argc, char *argv[])  { + FILE *fplog;   TFile *tfile;   Parser *parser;   E_TDocType type;   int c, n, x, t;   + esetprogname(estrdup(argv[0])); + fplog = fopen(strcat(estrdup(argv[0]), "-error.log"), "w"); + esetstream(fplog); +   c = n = x = t = 0;     for (int i = 1; i < argc; i++) { @@ -62,5 +67,7 @@
    freeTFile(tfile, type);   + fclose(fplog); +   return 0;  }
Change 1 of 1 Show Entire File t2mem.c Stacked
 
1
2
3
4
 
5
6
7
8
9
 
10
11
 
 
 
 
 
 
12
13
14
 
 
15
16
 
1
2
3
 
4
5
6
7
8
 
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
@@ -1,16 +1,24 @@
 #include <stdio.h>  #include <stdlib.h>  #include <string.h> -#include <kak/klog.h> +#include <kak/eprintf.h>  #include <kak/klist.h>  #include "tokenizer.h"  #include "tfile.h"   -int main(void) +int main(int argc, char *argv[])  {   TFile *tfile; + FILE *fplog; + + esetprogname(estrdup(argv[0])); + fplog = fopen(strcat(estrdup(argv[0]), "-error.log"), "w"); + esetstream(fplog); +   tfile = readTFile(stdin);   if (tfile)   printTFile(tfile); + + fclose(fplog);   return 0;  }
Change 1 of 1 Show Entire File term.c Stacked
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
@@ -1,88 +0,0 @@
-#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include "kak/klist.h" -#include "post.h" -#include "term.h" - -Term *newterm(char *s, unsigned df, Post *post) -{ - Term *t; - t = (Term *)malloc(sizeof(Term)); - t->s = strdup(s); - t->df = df; - t->plist = newnode(post); - return t; -} - -void freeterm(void *term) -{ - if (term == NULL) - return; - free(((Term *)term)->s); - free(term); -} - -unsigned hashterm(void *data, unsigned hsize) -{ - static unsigned MULTIPLIER = 31; - unsigned h; - h = 0; - for (char *p = ((Term *)data)->s; *p != '\0'; p++) - h = MULTIPLIER * h + *p; - return h % hsize; -} - -int cmptermp(void *d1, void *d2) -{ - if (((Term *)d1)->s == ((Term *)d2)->s) - return 0; - return 1; -} - -int cmpterm(void *d1, void *d2) -{ - return strcmp(((Term *)d1)->s, ((Term *)d2)->s); -} - -int term_match_handler(void *d1, void *d2) -{ - Term *t, *newt; - Post *p, *newp; - - newt = (Term *)d1; - t = (Term *)d2; - - newp = (Post *)(newt->plist->data); - p = (Post *)(t->plist->data); - - if (cmppost_p(newp, p) == 0) - p->tf++; - else { - t->plist = addfront(t->plist, newt->plist); - t->df++; - } - - return 0; -} - -void fprintterm(FILE *stream, void *data) -{ - Term *t; - t = (Term *)data; - fprintf(stream, " %s:%u", t->s, t->df); - for (Node *np = t->plist; np != NULL; np = np->next) - fprintpost(stream, np->data); -} - -/* TODO: printpost doesn't need the format string, so modify apply */ -/* -void printtree(void *d, void *arg) -{ - char *fmt; - fmt = (char *)arg; - printf(fmt, ((Term *)d)->s, ((Term *)d)->df); - apply(((Term *)d)->plist, printpost, "%s:%d "); - printf("\n"); -} -*/
Change 1 of 1 Show Entire File term.h Stacked
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
@@ -1,16 +0,0 @@
-typedef struct Term Term; - -struct Term { - char *s; - unsigned df; - Node *plist; -}; - -unsigned hashterm(void*, unsigned); -Term *newterm(char*, unsigned, Post*); -int cmpterm(void*, void*); -int cmptermp(void*, void*); -int term_match_handler(void*, void*); -void freeterm(void*); -void fprintterm(FILE*, void*); -/* void printtree(void*, void*); */
Show Entire File test/​big.txt Stacked
This file's diff was not loaded because this changeset is very large. Load changes
Change 1 of 1 Show Entire File test/​tiny.txt Stacked
 
1
2
3
4
5
6
7
8
 
 
 
 
 
 
 
 
 
@@ -1,8 +0,0 @@
-<DOC> -<DOCNO>A</DOCNO> -hello world! -</DOC> -<DOC> -<DOCNO>B</DOCNO> -"Where do you think you are going?", shouted Captain. -</DOC>
Change 1 of 1 Show Entire File test/​two.txt Stacked
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
@@ -1,230 +0,0 @@
- -<DOC> -<DOCNO> AP890101-0001 </DOCNO> -<FILEID>AP-NR-01-01-89 2358EST</FILEID> -<FIRST>r a PM-APArts:60sMovies 01-01 1073</FIRST> -<SECOND>PM-AP Arts: 60s Movies,1100</SECOND> -<HEAD>You Don't Need a Weatherman To Know '60s Films Are Here</HEAD> -<HEAD>Eds: Also in Monday AMs report.</HEAD> -<BYLINE>By HILLEL ITALIE</BYLINE> -<BYLINE>Associated Press Writer</BYLINE> -<DATELINE>NEW YORK (AP) </DATELINE> -<TEXT> - The celluloid torch has been passed to a new -generation: filmmakers who grew up in the 1960s. - ``Platoon,'' ``Running on Empty,'' ``1969'' and ``Mississippi -Burning'' are among the movies released in the past two years from -writers and directors who brought their own experiences of that -turbulent decade to the screen. - ``The contemporaries of the '60s are some of the filmmakers of -the '80s. It's natural,'' said Robert Friedman, the senior vice -president of worldwide advertising and publicity at Warner Bros. - Chris Gerolmo, who wrote the screenplay for ``Mississippi -Burning,'' noted that the sheer passage of time has allowed him and -others to express their feelings about the decade. - ``Distance is important,'' he said. ``I believe there's a lot of -thinking about that time and America in general.'' - The Vietnam War was a defining experience for many people in the -'60s, shattering the consensus that the United States had a right, -even a moral duty to intervene in conflicts around the world. Even -today, politicians talk disparagingly of the ``Vietnam Syndrome'' in -referring to the country's reluctance to use military force to -settle disputes. - ``I think future historians will talk about Vietnam as one of the -near destructions of American society,'' said Urie Brofenbrenner, a -professor of sociology at Cornell University. - ``In World War II, we knew what we were fighting for, but not in -Vietnam.'' - ``Full Metal Jacket,'' ``Gardens of Stone,'' ``Platoon,'' ``Good -Morning, Vietnam,'' ``Hamburger Hill'' and ``Bat 21'' all use the -war as a dramatic backdrop and show how it shaped characters' lives. - The Vietnam War has remained an emotional issue in the United -States as veterans have struggled to come to terms with their -experiences. One was Oliver Stone, who wrote and directed the -Academy Award-winning ``Platoon.'' - ``I saw `Platoon' eight times,'' said John J. Anderson, a Palm -Beach County sheriff's lieutenant who served in Vietnam in 1966-67. -``I cried the first time I saw it ... and the third and fourth -times. `Platoon' helped me understand.'' - Stone, who based ``Platoon'' on some of his own experiences as a -grunt, said the film brought up issues that had yet to be resolved. - ``People are responding to the fact that it's real. They're -curious about the war in Vietnam after 20 years,'' he said. - While Southeast Asia was the pivotal foreign issue in American -society of the '60s, civil rights was the major domestic issue. The -civil rights movement reached its peak in the ``Freedom Summer'' of -1964, when large groups of volunteers headed South to help register -black voters. - In ``Five Corners,'' a movie about the summer of '64 in the Bronx -starring Jodie Foster, her friend, played by Tim Robbins, leaves his -neighborhood to volunteer in the South after seeing the Rev. Martin -Luther King Jr. on television. - Alan Parker's ``Mississippi Burning'' focuses on an incident that -clouded the Mississippi Summer Project _ when 1,000 young volunteers -from mainstream America swept into the state to help register black -voters. The movie is a fictionalized account of the disappearance -and slaying of three civil rights workers: Michael Schwerner, Andrew -Goodman and James Chaney. - They were reported missing on June 21, several hours after being -stopped for speeding near Philadelphia, Miss. After a nationally -publicized search, their bodies were discovered Aug. 4 on a farm -just outside the town. - One of those who recalled the incident was Gerolmo, a student in -the New York public school system at the time. The screenwriter said -the incident had a powerful effect on his way of thinking. - ``It was the first time I ever considered that our country could -be wrong,'' Gerolmo said. - The film stars Willem Dafoe and Gene Hackman star as FBI agents -who try to find the bodies of the missing workers and overcome -fierce local resistance to solve the crime. - In a more offbeat and outrageous way, John Waters' ``Hairspray'' -discusses integration in Baltimore in 1963 when a group of -teen-agers tries to break down the barriers of a segregated dance -show. - Also set in Baltimore is Barry Levinson's ``Tin Men,'' starring -Danny DeVito and Richard Dreyfuss as two slick aluminum siding -salesmen in the early '60s. The movie mirrored a squarely -middle-class culture, one that was not caught up in sex, politics -and drugs. - Instead of focusing on a well-known historic event, -writer-director Ernest Thompson takes a more personal approach in -``1969.'' Robert Downey Jr. and Keifer Sutherland star as college -students who battle their parents and each other over sex, drugs and -the Vietnam War. - ``I was 19 in 1969. It was a fulcrum time for me,'' said -Thompson, who was a student at American University at the time. ``I -think it was just the right time in my growth as an artist and as a -man to try to write about something that happened in my youth.'' - ``Running on Empty'' takes place in the '80s but the '60s are -much in evidence. Judd Hirsch and Christine Lahti play anti-war -activists who sabatoged a napalm plant in 1970 and are forced to -live underground with their two children. - Naomi Foner, who wrote ``Running on Empty'' and also served as -the film's executive producer, grew up in Brooklyn, N.Y., the -daughter of sociologists. Her own experiences made Foner well -qualified to give ``Running on Empty'' its strong political theme. - ``I lived through that time and I've wanted to find the right way -to present it to this generation,'' said Foner, a member of the -radical Students for a Democratic Society while attending graduate -school at Columbia University. - Foner, who also taught in Harlem's Head Start program and helped -register voters in South Carolina, said many young people are -curious about what happened in the '60s. - ``A lot of them think it was an exciting time that they were -sorry to have missed,'' she said. - Brofenbrenner said movies are a good indicator of the concerns of -the general public: ``The principle impact of the media is that they -reflect the values of the larger society. - ``Film is a very powerful art medium,'' he said. ``I believe it -very accurately reflects not only the prevailing but the coming -trends. It's because film writers, like other writers, are -perceptive people. They get the message of what's going on.'' -</TEXT> -</DOC> -<DOC> -<DOCNO> AP890101-0002 </DOCNO> -<FILEID>AP-NR-01-01-89 2359EST</FILEID> -<FIRST>r a PM-FutureFactory 01-01 0872</FIRST> -<SECOND>PM-Future Factory,0897</SECOND> -<HEAD>University Erects A Factory Of The Future</HEAD> -<HEAD>Eds: Also in Monday AMs report.</HEAD> -<BYLINE>By DONNA BRYSON</BYLINE> -<BYLINE>Associated Press Writer</BYLINE> -<DATELINE>ROLLA, Mo. (AP) </DATELINE> -<TEXT> - For students working in a miniature factory at -the University of Missouri-Rolla, the future of American business is -now. - ``When our students go into industry, they will have -state-of-the-art knowledge'' that will affect decisions about -expanding the role of robots and automated machines in the -workplace, said Sema Alptekin, designer of the futuristic business -laboratory. - Ms. Alptekin is a mechanical and industrial engineer who directs -the university's computer-integrated manufacturing and packaging -laboratory. - The lab, established two years ago at a cost of $120,000, is the -only project of its kind in the state, and one of the more advanced -such programs in the country. - Tom Akas of the Society of Manufacturing Engineers said Ms. -Alptekin has created ``a model for similar laboratories ... Students -can see how the factory of the future operates.'' - The Society of Manufacturing Engineers has recognized Ms. -Alptekin as an Outstanding Young Manufacturing Engineer. - In her lab, a conveyer belt at waist level carries parts from -storage shelves to a robot that dominates the room like a silent, -4-foot metal sentry. The robot is a V-shaped, jointed arm on a -pivoting base. The arm can be fitted to allow it to grasp, lift and -turn objects of differing sizes to suit a variety of tasks. The -robot then swivels to place parts in position at the automated lathe -or milling machine. - The entire factory would fit inside a basketball half-court. The -machinery is of the type used to make small parts in metal cutting -shops, Ms. Alptekin said. - Students learn to program a computer and automated machines -linked to it in a complete manufacturing operation _ retrieving raw -materials from the storage shelf unit, which can be programmed to -supply appropriate parts from its inventory; lifting and placing the -parts in position with the robot's arm; and shaping parts into -finished products at the lathe. - Students using the lab have designed and produced engraved key -chains and intricate mazes and puzzles out of plastic and plexiglass. - But the product is less important than the process. Ms. Alptekin -said former students have entered fields as disparate as aviation -and financial consulting. - In the factory of the future, according to the university's -model, human chatter will be replaced by the click-clack of machines. - ``Ours is quite unique, it's totally integrated,'' Ms. Alptekin -said, referring to programming that allows components such as the -automated storage system to ``talk to'' the computer that controls -the manufacturing process, informing it about the availability of -parts. - Ms. Alptekin said it is this aspect that sets the lab apart even -from industries where robots and automated machines have become -almost commonplace. There, she said, robots perform specific tasks -in ``islands of automation,'' but human workers remain responsible -for keeping inventory and coordinating different aspects of the -production line. - Ms. Alptekin said automation is widespread in the automobile -industry, but she would like to see it expanded. - ``It should be used in any industry,'' she said. ``It's difficult -to justify economically, but if quality is the concern, then it can -be easily justified. - ``At least our students will be aware of the enhancements and -developments in this area, and when they make decisions as engineers -and managers, they will take this into consideration,'' she said. - A master's candidate analyzed the existing system with an eye to -altering it to create a new product. Ms. Alptekin said the student -identified what new production components were necessary and -determined how they could be integrated into the system. - A doctoral candidate is developing computer software that will -allow the system to diagnose itself. The system could be programmed -to recognize when a component is not working, and then decide to -switch to manufacturing a product that does not involve the disabled -component, much as human managers would react. - Graduate student Bob Borchelt said the equipment he is using is -on the cutting edge of manufacturing. - ``Truthfully, I can't say that I was expecting this level of -expertise,'' he said. ``I know that our lab is one of the best of -its kind in an educational institution.'' - The lab, like the entire engineering management program, calls on -computing, mechanical, electrical and chemical skills _ and teamwork. - ``For us, for managers, the entire system is important,'' Ms. -Alptekin said. - Ms. Alptekin said she is continually fine-tuning the lab. This -past year, the original robot was replaced with one able to perform -more tasks. She now is considering purchasing a robot with vision -sophisticated enough to act as an inspector. - The university spent $30,000 to upgrade lab equipment in 1987. An -estimated $60,000 to $70,000 was earmarked in 1988. - International Business Machines Corp. recently pledged $1.2 -million in computer equipment and software to the university as part -of an IBM program to aid 48 college-based robotics labs across the -country. - ``Studies show that there's a severe national shortage of -instructional materials in this growing and critical area,'' said -Andy Russell, a spokesman for IBM. ``IBM will benefit because we -will be helping to train the (computer-integrated manufacturing) -workers and decision makers of today and tomorrow.'' -</TEXT> -</DOC>
Change 1 of 11 Show Entire File tfile.c Stacked
 
1
2
3
 
4
5
6
 
8
9
10
11
 
12
13
14
 
30
31
32
33
 
34
35
36
 
44
45
46
47
 
48
49
50
 
 
51
52
 
53
54
 
55
56
57
58
59
60
61
 
62
63
64
 
235
236
237
238
 
239
240
241
 
244
245
246
247
 
248
249
250
 
257
258
259
260
 
261
262
263
 
320
321
322
323
 
324
325
326
 
329
330
331
332
 
333
334
335
 
342
343
344
345
 
346
347
348
 
357
358
359
360
 
361
362
363
 
1
2
3
4
5
6
7
 
9
10
11
 
12
13
14
15
 
31
32
33
 
34
35
36
37
 
45
46
47
 
48
49
 
 
50
51
52
 
53
54
 
55
56
57
58
59
60
61
 
62
63
64
65
 
236
237
238
 
239
240
241
242
 
245
246
247
 
248
249
250
251
 
258
259
260
 
261
262
263
264
 
321
322
323
 
324
325
326
327
 
330
331
332
 
333
334
335
336
 
343
344
345
 
346
347
348
349
 
358
359
360
 
361
362
363
364
@@ -1,6 +1,7 @@
 #include <stdio.h>  #include <stdlib.h>  #include <string.h> +#include <kak/eprintf.h>  #include <kak/klist.h>  #include "tokenizer.h"  #include "tfile.h" @@ -8,7 +9,7 @@
 THeader *_newTHeader(E_TDocType type)  {   THeader *h; - h = (THeader *)malloc(sizeof(THeader)); + h = (THeader *)emalloc(sizeof(THeader));   h->mg = 41;   h->crc = 0;   h->ver = 1; @@ -30,7 +31,7 @@
 TSubHeader *_newTSubHeader(E_TDocType type)  {   TSubHeader *h; - h = (TSubHeader *)malloc(sizeof(TSubHeader)); + h = (TSubHeader *)emalloc(sizeof(TSubHeader));   h->crc = 0;   h->n_txt = 0;   h->n_id = 0; @@ -44,21 +45,21 @@
  r = 0;   if (type == TREC)   r = TREC_R; - tdoc = (TDoc *)malloc(sizeof(TDoc)); + tdoc = (TDoc *)emalloc(sizeof(TDoc));   tdoc->h = _newTSubHeader(type); - tdoc->txt = (char *)malloc(sizeof(char) * TXTBUFSIZE); - tdoc->id = (char *)malloc(sizeof(char) * IDBUFSIZE); + tdoc->txt = (char *)emalloc(sizeof(char) * TXTBUFSIZE); + tdoc->id = (char *)emalloc(sizeof(char) * IDBUFSIZE);   tdoc->rsrc = NULL; - tdoc->rsrc = (char **)malloc(sizeof(char *) * r); + tdoc->rsrc = (char **)emalloc(sizeof(char *) * r);   for (int i = 0; i < r; i++) - tdoc->rsrc[i] = (char *)malloc(sizeof(char) * RSRCBUFSIZE); + tdoc->rsrc[i] = (char *)emalloc(sizeof(char) * RSRCBUFSIZE);   return tdoc;  }    TFile *newTFile(E_TDocType type)  {   TFile *t; - t = (TFile *)malloc(sizeof(TFile)); + t = (TFile *)emalloc(sizeof(TFile));   t->h = _newTHeader(type);   t->list = NULL;   return t; @@ -235,7 +236,7 @@
    /* fill TDoc->txt */   if (tdoc->h->n_txt > TXTBUFSIZE) - tdoc->txt = (char *)realloc(tdoc->txt, tdoc->h->n_txt); + tdoc->txt = (char *)erealloc(tdoc->txt, tdoc->h->n_txt);   n = fread(tdoc->txt, tdoc->h->n_txt, 1, fp);   if (n != 1) {   fprintf(stderr, "ERROR: failed to read TDoc->txt\n"); @@ -244,7 +245,7 @@
    /* fill TDoc->id */   if (tdoc->h->n_id > IDBUFSIZE) - tdoc->id = (char *)realloc(tdoc->id, tdoc->h->n_id); + tdoc->id = (char *)erealloc(tdoc->id, tdoc->h->n_id);   n = fread(tdoc->id, tdoc->h->n_id, 1, fp);   if (n != 1) {   fprintf(stderr, "ERROR: failed to read TDoc->id\n"); @@ -257,7 +258,7 @@
  check = 0x0000;   for (int j = 0; j < r; j++) {   if (tdoc->h->n_rsrc[j] > RSRCBUFSIZE) - tdoc->rsrc[j] = realloc(tdoc->rsrc[j], + tdoc->rsrc[j] = erealloc(tdoc->rsrc[j],   tdoc->h->n_rsrc[j]);   n = fread(tdoc->rsrc[j], tdoc->h->n_rsrc[j], 1, fp);   v <<= 1; v |= n; @@ -320,7 +321,7 @@
    /* fill TDoc->txt */   if (tdoc->h->n_txt > TXTBUFSIZE) - tdoc->txt = (char *)realloc(tdoc->txt, tdoc->h->n_txt); + tdoc->txt = (char *)erealloc(tdoc->txt, tdoc->h->n_txt);   n = fread(tdoc->txt, tdoc->h->n_txt, 1, fp);   if (n != 1) {   fprintf(stderr, "ERROR: failed to read TDoc->txt\n"); @@ -329,7 +330,7 @@
    /* fill TDoc->id */   if (tdoc->h->n_id > IDBUFSIZE) - tdoc->id = (char *)realloc(tdoc->id, tdoc->h->n_id); + tdoc->id = (char *)erealloc(tdoc->id, tdoc->h->n_id);   n = fread(tdoc->id, tdoc->h->n_id, 1, fp);   if (n != 1) {   fprintf(stderr, "ERROR: failed to read TDoc->id\n"); @@ -342,7 +343,7 @@
  check = 0x0000;   for (int j = 0; j < tfile->h->r; j++) {   if (tdoc->h->n_rsrc[j] > RSRCBUFSIZE) - tdoc->rsrc[j] = realloc(tdoc->rsrc[j], + tdoc->rsrc[j] = erealloc(tdoc->rsrc[j],   tdoc->h->n_rsrc[j]);   n = fread(tdoc->rsrc[j], tdoc->h->n_rsrc[j], 1, fp);   v <<= 1; v |= n; @@ -357,7 +358,7 @@
  /* add to the list a new node with a TDoc payload */   tfile->list = addfront(tfile->list, newnode((void *)tdoc));   - fprintf(stderr, "\r%d/%d", i+1, tfile->h->n); + fprintf(stderr, "\rread: %d/%d", i+1, tfile->h->n);   }     fprintf(stderr, "\n");
Change 1 of 3 Show Entire File tokenizer.c Stacked
 
1
2
3
 
4
5
6
 
42
43
44
45
 
46
47
48
 
63
64
65
66
 
67
68
 
 
69
70
71
72
 
73
74
75
 
1
2
3
4
5
6
7
 
43
44
45
 
46
47
48
49
 
64
65
66
 
67
68
 
69
70
71
72
73
 
74
75
76
77
@@ -1,6 +1,7 @@
 #include <stdio.h>  #include <stdlib.h>  #include <string.h> +#include <kak/eprintf.h>  #include "tokenizer.h"    void reset(Stack *stk, int n) @@ -42,7 +43,7 @@
  Tokenizer *t;   int i;   char *c; - t = (Tokenizer *)malloc(sizeof(Tokenizer)); + t = (Tokenizer *)emalloc(sizeof(Tokenizer));   strcpy(t->delimiters, delimiters);   memset(t->asciitab, SEPCHAR, sizeof(int) * ASCII);   /* ASCII chars 0 - 31, 127 and those from the delimiter string @@ -63,13 +64,14 @@
 Token *newtoken(char *str, unsigned n)  {   Token *t; - t = (Token *)malloc(sizeof(Token)); + t = (Token *)emalloc(sizeof(Token));   if (str == NULL) { - t->str = (char *)calloc(n, sizeof(char)); + t->str = (char *)emalloc(sizeof(char) * n); + memset(t->str, '\0', sizeof(char) * n);   t->l = 0;   }   else { - t->str = strdup(str); + t->str = estrdup(str);   t->l = strlen(str);   }   t->type = TERM;
Change 1 of 1 Show Entire File txt.c Stacked
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
@@ -0,0 +1,252 @@
+#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <kak/eprintf.h> +#include "kak/klist.h" +#include "txt.h" + +/* Doc */ + +Doc *newdoc(char *id, uint32_t n_term, uint32_t n_uterm, uint32_t n_byte) +{ + Doc *d; + d = (Doc *)emalloc(sizeof(Doc)); + d->id = estrdup(id); + d->n_term = n_term; + d->n_uterm = n_uterm; + d->n_byte = n_byte; + return d; +} + +void freedoc(void *d) +{ + if (d == NULL) + return; + free(((Doc *)d)->id); + free(d); +} + +int p_cmpdoc(void *d1, void *d2) +{ + if (((Doc *)d1)->id == ((Doc *)d2)->id) + return 0; + return 1; +} + +int cmpdoc(void *d1, void *d2) +{ + return strcmp(((Doc *)d1)->id, ((Doc *)d2)->id); +} + +unsigned hashdoc(void *data, unsigned hsize) +{ + static unsigned MULTIPLIER = 31; + unsigned h; + h = 0; + for (char *p = ((Doc *)data)->id; *p != '\0'; p++) + h = MULTIPLIER * h + *p; + return h % hsize; +} + +void fprintfdoc(FILE *stream, void *data) +{ + Doc *d; + d = (Doc *)data; + fprintf(stream, " %s:%u:%u:%u", d->id, d->n_term, d->n_uterm, d->n_byte); +} + +/* Query */ + +Query *newquery(char *id) +{ + Query *q; + q = (Query *)emalloc(sizeof(Query)); + q->id = estrdup(id); + q->n_term = 0; + q->n_uterm = 0; + q->tlist = NULL; + return q; +} +void freequery(void *data) +{ + if (data == NULL) + return; + free(((Query *)data)->id); + freelist(((Query *)data)->tlist, freepost); + free(data); +} + +int cmpquery(void *d1, void *d2) +{ + return strcmp(((Query *)d1)->id, ((Query *)d2)->id); +} + +unsigned hashquery(void *data, unsigned hsize) +{ + static unsigned MULTIPLIER = 31; + unsigned h; + h = 0; + for (char *p = ((Post *)data)->id; *p != '\0'; p++) + h = MULTIPLIER * h + *p; + return h % hsize; +} + +void fprintquery(FILE *stream, void *data) +{ + Query *q; + q = (Query *)data; + fprintf(stream, " %s:%u:%u", q->id, q->n_term, q->n_uterm); + for (Node *np = q->tlist; np != NULL; np = np->next) + fprintpost(stream, np->data); +} + +/* Term */ + +Term *newterm(char *s, unsigned df, Post *post) +{ + Term *t; + t = (Term *)emalloc(sizeof(Term)); + t->s = estrdup(s); + t->df = df; + t->plist = newnode(post); + return t; +} + +void freeterm(void *term) +{ + if (term == NULL) + return; + free(((Term *)term)->s); + free(term); +} + +unsigned hashterm(void *data, unsigned hsize) +{ + static unsigned MULTIPLIER = 31; + unsigned h; + h = 0; + for (char *p = ((Term *)data)->s; *p != '\0'; p++) + h = MULTIPLIER * h + *p; + return h % hsize; +} + +int cmptermp(void *d1, void *d2) +{ + if (((Term *)d1)->s == ((Term *)d2)->s) + return 0; + return 1; +} + +int cmpterm(void *d1, void *d2) +{ + return strcmp(((Term *)d1)->s, ((Term *)d2)->s); +} + +int term_match_handler(void *d1, void *d2) +{ + Term *t, *newt; + Post *p, *newp; + + newt = (Term *)d1; + t = (Term *)d2; + + newp = (Post *)(newt->plist->data); + p = (Post *)(t->plist->data); + + if (cmppost_p(newp, p) == 0) + p->tf++; + else { + t->plist = addfront(t->plist, newt->plist); + t->df++; + } + + return 0; +} + +void fprintterm(FILE *stream, void *data) +{ + Term *t; + t = (Term *)data; + fprintf(stream, " %s:%u", t->s, t->df); + for (Node *np = t->plist; np != NULL; np = np->next) + fprintpost(stream, np->data); +} + +/* TODO: printpost doesn't need the format string, so modify apply */ +/* +void printtree(void *d, void *arg) +{ + char *fmt; + fmt = (char *)arg; + printf(fmt, ((Term *)d)->s, ((Term *)d)->df); + apply(((Term *)d)->plist, printpost, "%s:%d "); + printf("\n"); +} +*/ + +/* Post */ + +Post *newpost(char *id, int tf) +{ + Post *p; + p = (Post *)emalloc(sizeof(Post)); + p->id = estrdup(id); + p->tf = tf; + return p; +} + +void freepost(void *p) +{ + if (p == NULL) + return; + free(((Post *)p)->id); + free(p); +} + +Post *newpost_s(char *id, int tf) /* shallow */ +{ + Post *p; + p = (Post *)emalloc(sizeof(Post)); + p->id = id; + p->tf = tf; + return p; +} + +void freepost_s(void *p) /* shallow */ +{ + free(p); +} + +int cmppost(void *d1, void *d2) +{ + return strcmp(((Post *)d1)->id, ((Post *)d2)->id); +} + +int cmppost_p(void *d1, void *d2) +{ + if (((Post *)d1)->id == ((Post *)d2)->id) + return 0; + return 1; +} + +int cmppost_tf(const void *p1, const void *p2) +{ + return ((Post *)p1)->tf - ((Post *)p2)->tf; +} + +void fprintpost(FILE *stream, void *data) +{ + Post *p; + p = (Post *)data; + fprintf(stream, " %s:%u", p->id, p->tf); +} + +unsigned hashpost(void *data, unsigned hsize) +{ + static unsigned MULTIPLIER = 31; + unsigned h; + h = 0; + for (char *p = ((Post *)data)->id; *p != '\0'; p++) + h = MULTIPLIER * h + *p; + return h % hsize; +}
Change 1 of 1 Show Entire File txt.h Stacked
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
@@ -0,0 +1,60 @@
+typedef struct Doc Doc; +typedef struct Query Query; +typedef struct Term Term; +typedef struct Post Post; + +struct Doc { + char *id; + uint32_t n_term; + uint32_t n_uterm; + uint32_t n_byte; +}; + +struct Query { + char *id; + uint32_t n_term; + uint32_t n_uterm; + Node *tlist; +}; + +struct Term { + char *s; + unsigned df; + Node *plist; +}; + +struct Post { + char* id; + int tf; +}; + +Doc *newdoc(char*, uint32_t, uint32_t, uint32_t); +void freedoc(void*); +int cmpdoc(void*, void*); +unsigned hashdoc(void*, unsigned); +void fprintdoc(FILE*, void*); + +Query *newquery(char*); +void freequery(void*); +int cmpquery(void*, void*); +unsigned hashquery(void*, unsigned); +void fprintquery(FILE*, void*); + +Term *newterm(char*, unsigned, Post*); +void freeterm(void*); +int cmpterm(void*, void*); +int cmptermp(void*, void*); +unsigned hashterm(void*, unsigned); +int term_match_handler(void*, void*); +void fprintterm(FILE*, void*); +/* void printtree(void*, void*); */ + +Post *newpost(char*, int); +void freepost(void*); +Post *newpost_s(char*, int); +void freepost_s(void*); +int cmppost(void*, void*); +int cmppost_p(void*, void*); +int cmppost_tf(const void*, const void*); +void fprintpost(FILE*, void*); +unsigned hashpost(void*, unsigned);