Repositories » TXT0
Clone URL:  
Pushed to one repository · View In Graph Contained in v0.1 and tip

Blip

Changeset bdbfa0ec0b98

by Rup Palchowdhury

Changes to 18 files · Browse files at bdbfa0ec0b98 Diff from another changeset...

Change 1 of 1 Show Entire File LICENSE Stacked
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
@@ -0,0 +1,21 @@
+The MIT License (MIT) + +Copyright (c) 2016 Rup Palchowdhury + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE.
Change 1 of 1 Show Entire File Makefile Stacked
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
@@ -0,0 +1,59 @@
+# subdirectory for objects +O=obj +TEST=test + +CC = gcc +CFLAGS = -g -Wall +INCLUDES = -I/usr/local/include +LDFLAGS = -L/usr/local/lib +LIBS = -lk + +DEPDIR = .d +$(shell mkdir -p $(DEPDIR) >/dev/null) +DEPFLAGS = -MT $@ -MMD -MP -MF $(DEPDIR)/$*.Td + +POSTCOMPILE = mv -f $(DEPDIR)/$*.Td $(DEPDIR)/$*.d + +SRC = \ + tokenizer.c \ + tok.c \ + parser.c + +OBJ = $(patsubst %.c,$(O)/%.o,$(SRC)) + +all: raw2t t2mem + +raw2t: $(O)/raw2t.o $(OBJ) + $(CC) $(CFLAGS) $(INCLUDES) $(LDFLAGS) -o $@ $^ $(LIBS) + +t2mem: $(O)/t2mem.o $(OBJ) + $(CC) $(CFLAGS) $(INCLUDES) $(LDFLAGS) -o $@ $^ $(LIBS) + +$(O)/%.o: %.c $(DEPDIR)/%.d + $(CC) $(DEPFLAGS) $(CFLAGS) $(INCLUDES) -c $< -o $@ + $(POSTCOMPILE) + +$(DEPDIR)/%.d: ; +.PRECIOUS: $(DEPDIR)/%.d + +-include $(patsubst %,$(DEPDIR)/%.d,$(basename $(SRC))) + +# .PHONY: install +# install: libk.a +# mkdir -p $(PREFIX)/include/kak +# cp -p *.h $(PREFIX)/include/kak/ +# cp -p libk.a $(PREFIX)/lib/ + +# .PHONY: uninstall +# uninstall: +# rm -f $(PREFIX)/lib/libk.a +# rm -f $(PREFIX)/include/kak/*.h + +.PHONY: clean +clean: + rm -f $(O)/* $(DEPDIR)/* raw2t t2mem + +.PHONY: quicktest +quicktest: + ./t2mem <test/tinytrecdoc.t | diff -q - test/tinytrecdoc.out + ./t2mem <test/smalltrecdoc.t | diff -q - test/smalltrecdoc.out
Change 1 of 1 Show Entire File parser.c Stacked
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
@@ -0,0 +1,107 @@
+#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <malloc/malloc.h> +#include <kak/kbuffer.h> +#include <kak/klog.h> +#include <kak/klist.h> +#include "tokenizer.h" +#include "tok.h" +#include "parser.h" + +Tok* parse(FILE *fp) +{ + Tokenizer *tokenizer_text, *tokenizer_id; + Token token, *docid; + Stack mem; + Tok *tok; + Tokdoc *tokdoc; + unsigned lowmem, ntok, ndoc, nresize, bytesread, crc; + int n; + char *septext = " \t\r\n;,.:`'\"?!(){}[]<>~^&*_-=#$%@|\\/"; + char *sepid = " \t\r\n<>"; + Log *log; + + log = newlog("toktrec"); + + lowmem = KB; + + reset(&mem, 3); + + tokenizer_text = newtokenizer(septext, LOWERCASE, &mem); + tokenizer_id = newtokenizer(sepid, KEEPCASE, &mem); + + docid = newtoken(128); + + ntok = 0; /* token count */ + ndoc = 0; /* doc count */ + nresize = 0; /* realloc count */ + bytesread = 0; /* bytes read in */ + + tok = newtok(TREC_ADHOC); + tokdoc = newtokdoc(TREC_ADHOC); + token.str = tokdoc->buf->p; + + while((n = gettoken(&token, fp, tokenizer_text)) > 0) { + bytesread += n; + if ((token.type == CTAG) && (strcmp(token.str, "doc") == 0)) { + /* drop trailing ' ' */ + shiftpointer(tokdoc->buf, -1); + /* fill doc header */ + tokdoc->header->block[RES1DOCLEN] = tokdoc->buf->i; + tokdoc->header->block[RES2IDLEN] = docid->len; + /* append resource block */ + memcpy(tokdoc->buf->p, docid->str, docid->len); + shiftpointer(tokdoc->buf, docid->len); + /* compute and add crc to header */ + /* TODO + crc = crc_bytewise(&model, crc, tokdoc->buf->b, tokdoc->buf->i); + */ + crc = 42; + tokdoc->header->block[RES0CRC32] = crc; + /* add doc block to list */ + tok->list = addfront(tok->list, newnode((void *)tokdoc)); + /* increment data length and doc count */ + tok->header->block[LENGTH] += 4 * tok->header->block[NUMRES] + tokdoc->buf->i; + tok->header->block[NUMDOCS]++; + /* get new placeholder for doc */ + tokdoc = newtokdoc(TREC_ADHOC); + token.str = tokdoc->buf->p; + continue; + } + if ((token.type == OTAG) && (strcmp(token.str, "docno") == 0)) { + n = gettoken(&token, fp, tokenizer_id); + bytesread += n; + /* save a copy of the docid for later */ + memcpy(docid->str, token.str, token.len); + docid->len = token.len; + continue; + } + if (token.type == TERM) { + token.str[token.len] = ' '; + token.str = shiftpointer(tokdoc->buf, token.len + 1); + if ((tokdoc->buf->n - tokdoc->buf->i) <= lowmem) { + tokdoc->buf = resizebuffer(tokdoc->buf, + tokdoc->buf->n <<= 1); + token.str = tokdoc->buf->p; + } + } + } + + bytesread++; + + crc = 42; + tok->header->block[CRC32] = crc; + + fprintf(log->fp, "%16s: %u\n", "bytes read", bytesread); + fprintf(log->fp, "%16s: %u\n", "Magic", tok->header->block[MAGIC]); + fprintf(log->fp, "%16s: %u\n", "CRC32",tok->header->block[CRC32]); + fprintf(log->fp, "%16s: %u\n", "Version", tok->header->block[VERSION]); + fprintf(log->fp, "%16s: %u\n", "Type", tok->header->block[TYPE]); + fprintf(log->fp, "%16s: %u\n", "Length", tok->header->block[LENGTH]); + fprintf(log->fp, "%16s: %u\n", "#docs", tok->header->block[NUMDOCS]); + fprintf(log->fp, "%16s: %u\n", "#resources", tok->header->block[NUMRES]); + fprintf(log->fp, "%16s: %u\n", "offset",tok->header->block[OFFSET]); + + return tok; +}
Change 1 of 1 Show Entire File parser.h Stacked
 
 
 
1
@@ -0,0 +1,1 @@
+Tok* parse(FILE*);
Change 1 of 1 Show Entire File raw2t.c Stacked
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
@@ -0,0 +1,30 @@
+/* + Usage: raw2t <[in] >[out] + + Tokenizes all content within the <DOC> tags of a TREC document, + excluding the markup tags, and the document-id. + + Spaces, punctuations and special characters are treated as + token-separators. + + Letters are converted to lower case. +*/ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <malloc/malloc.h> +#include <kak/kbuffer.h> +#include <kak/klog.h> +#include <kak/klist.h> +#include "tokenizer.h" +#include "tok.h" +#include "parser.h" + +int main(int argc, char *argv[]) +{ + Tok *tok; + tok = parse(stdin); + writetok(tok, stdout); + return 0; +}
Change 1 of 1 Show Entire File t2mem.c Stacked
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
@@ -0,0 +1,17 @@
+#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <kak/kbuffer.h> +#include <kak/klog.h> +#include <kak/klist.h> +#include "tokenizer.h" +#include "tok.h" + +int main(void) +{ + Tok *tok; + tok = newtok(TREC_ADHOC); + readtok(tok, stdin); + printtok(tok); + return 0; +}
Show Entire File test/​alice.txt Stacked
This file's diff was not loaded because this changeset is very large. Load changes
Change 1 of 1 Show Entire File test/​smalltrecdoc.out Stacked
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
@@ -0,0 +1,19 @@
+ Magic 41 + CRC32 42 + Version 1 + Type 0 + Length 7771 + N 2 + R 3 + Offset 32 + + resource0 42 + resource1 2991 + resource2 13 +:08 519 720 ov 08 519 620 m pc week june 4 1990 v7 n22 p84 2 full text copyright ziff davis publishing co 1990 m ibm ps 2 model 80 a31 hardware review one of five evaluations of microcomputers with file server capabilities evaluation gerber barry tracy martha chandler doug m ibm ps 2 80 a31 fileserver priced at 18 880 is a relatively low priced machine with important server features including its 25 mhz 386 central processing unit and a small computer systems interface based disk adapter and drives p the system can be modified to hold six hard disk drives but includes five 3 50 inch ones p netware 386 ran without a hitch on the model 80 a31 o however pc week could not get novell s netware 386 driver for ibm s scsi controller and the dx nlm exerciser to work together well p the system can be used as a low end computer aided design machine according to users m company international business machines corp products o ticker ibm o product ibm ps 2 80 a31 microcomputer o topic evaluation file servers microcomputers m ibm m ps 2 model 80 a31 m the model 80 a31 is housed in a standard ps 2 tower chassis p its important server oriented features are its 25mhz 386 cpu and a scsi based disk adapter and drives p a caching and non caching version of the scsi adapter are available p pc week s test machine was equipped with the faster caching version the system holds five 3 1 2 inch hard disk drives but can be modified to hold six m pc week could not get novell s netware 386 driver for ibm s scsi controller and the dx nlm exerciser developed by netframe and pc week to work together properly p technical staff from novell and netframe were unable to resolve the problem in time for pc week to test the model 80 a31 s disk i o m aside from the dx nlm test netware 386 ran flawlessly on the model 80 a31 m ibm can be reached in armonk n y at 800 426 2468 m buyers say p m a relatively low price was the main reason michael bourne systems analyst for ohio state university in columbus decided to go with the model 80 a31 o we get 40 percent off he said p i d like to have a compaq systempro but pricewise you can t beat the ibm m while the systempro has been designed exclusively as a high performance server the model 80 a31 is more flexible bourne noted p the model 80 a31 can be a low end cad machine a small network server an excellent stand alone os 2 workstation he said p you have to have a pretty big network to require a systempro m he currently uses the model 80 a31 as a stand alone but it will eventually be moved to the university s lan services division to become a file server o we wanted something to simultaneously support dos aix and os 2 with reasonable speed he said m bourne has had some problems with the system p i had 2m bytes of memory die on me i had the vga go flakey i had two or three system board problems and i had a keyboard go out he said p i haven t seen these problems repeated on other ibm models but ibm is always good about repairing on warranty mZF108-519-720: + + resource0 42 + resource1 4730 + resource2 13 +:08 522 878 m pc week june 4 1990 v7 n22 p87 1 full text copyright ziff davis publishing co 1990 m nlms test how well super servers take stress network loadable modules gerber barry m pc week worked with netframe inc to develop and test two netware 386 netware loadable modules nlm that were able to stress a netware server as though a number of workstations were attached to it p this would serve as a server stress test needed in order to evaluate super server products p each nlm s parameters can be adjusted and set before the test is run p in testing one disk controller and one local area network adapter were used p the netware 386 s monitor nlm was loaded to verify server central processing unit utilization p in the end however measuring such performance requires real time testing m topic file servers testing evaluation performance measurement modules m by barry gerber m traditionally server stress tests have been done by attaching a number of workstations to the server through one or more network adapters p the more workstations attached the greater the stress on the server p pc week s applications test suite also takes this approach to server testing m however the i o capacity of super servers like those tested for this review is so high that an unrealistically large number of workstations would be needed to stress them p thus pc week worked with netframe inc to develop and test two netware 386 netware loadable modules nlms that can stress a netware server without any attached workstations m one of these nlms dx nlm tests the i o capacity of disk controllers and drives within a given netware 386 server while the other nlm nx nlm tests the i o capacity of network adapters within the server m dx nlm can be selectively run against any combination of disk controllers and attached disks p similarly nx nlm can be used to test network i o on any set of lan adapters p the two nlms can be run separately or simultaneously m picking parameters m each of the nlms has adjustable parameters that can be set before the test is run p for the disk tests these include access pattern random or sequential test type read or write number of kilobytes per i o 1 to 60 and number of i os concurrently queued per disk drive 1 to 64 p the lan i o nlm allows for the setting of parameters for fixed or random packet size bytes per packet when the fixed packet size option is selected 64 to 1 472 bytes and number of i os concurrently queued per lan adapter 1 to 32 p parameters also can specify whether the test for each adapter should involve only transmission only reception or both transmission and reception m in pc week s tests one disk controller and one lan adapter were used p the following dx nlm and nx nlm parameters were used m dx nlm m access pattern sequential m test type read m number of kilobytes per i o 32 m number of concurrent i os queued m per drive 3 m nx nlm m packet length fixed m bytes per packet 1 154 m number of concurrent sends to queue m per device 3 m device activity transmit m during all tests netware 386 s monitor nlm was loaded to verify server cpu utilization as reported by dx nlm and nx nlm p the ethernet adapter was not connected to a network p instead it was outfitted with a standard ethernet t connector terminated on each side with a standard 50 ohm terminator m the server disk and lan performance were determined by calculating the percent of the server s cpu capacity required for 1m byte per second of either disk or lan i o p this value is obtained by dividing the i o rate obtained in a test in megabytes per second into one and multiplying server utilization for the test by the result of the division m to some extent total server capacity can be estimated by summing the cpu utilization required for each 1m byte increment of disk or lan i o p however this should be done with some caution since capacity will be limited by the actual number of disk controllers or lan adapters that can be plugged into a given server s bus and by the number of disk drives that can be attached to a disk controller m furthermore it should not be assumed that disk and lan i o will be the only processes placing stress on server capacity p print and communications services as well as other applications running on the server may also contribute to this load m finally none of the results obtained with these test nlms should be taken as an indicator of server performance with particular applications and data o measuring such performance requires real world real time testing m binary and source code for dx nlm and nx nlm along with documentation for both can be downloaded from the pc week extra on line service p for instructions on how to access pc week extra see page 173 oZF108-522-878: +
Change 1 of 1 Show Entire File test/​smalltrecdoc.txt Stacked
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
@@ -0,0 +1,203 @@
+<DOC> +<DOCNO> ZF108-519-720 </DOCNO> +<DOCID>08 519 720 OV: 08 519 620.&M; </DOCID> + +<JOURNAL>PC Week June 4 1990 v7 n22 p84(2) +* Full Text COPYRIGHT Ziff-Davis Publishing Co. 1990.&M; +</JOURNAL> +<TITLE>IBM: PS/2 Model 80-A31. (Hardware Review) (one of five evaluations +of microcomputers with file server capabilities) (evaluation) +</TITLE> +<AUTHOR>Gerber, Barry; Tracy, Martha; Chandler, Doug.&M; +</AUTHOR> +<SUMMARY>IBM PS/2 80-A31 fileserver, priced at $18,880, is a relatively +low-priced machine with important server features, including its +25-MHz 386 central processing unit and a small computer systems +interface-based disk adapter and drives.&P; The system can be +modified to hold six hard-disk drives, but includes five 3.50-inch +ones.&P; NetWare 386 ran without a hitch on the Model 80-A31.&O; +However, PC Week could not get Novell's NetWare 386 driver for +IBM's SCSI controller, and the DX.NLM exerciser to work together +well.&P; The system can be used as a low-end computer-aided design +machine, according to users.&M; +</SUMMARY> +<DESCRIPT> +Company: International Business Machines Corp. (products).&O; +Ticker: IBM.&O; +Product: IBM PS/2 80 A31 (Microcomputer).&O; +Topic: Evaluation +File Servers +Microcomputers.&M; +</DESCRIPT> +<TEXT> + +IBM&M; + +PS/2 MODEL 80-A31&M; + +The Model 80-A31 is housed in a standard PS/2 tower chassis.&P; Its important +server-oriented features are its 25MHz 386 CPU and a SCSI-based disk adapter +and drives.&P; (A caching and non-caching version of the SCSI adapter are +available.&P; PC Week's test machine was equipped with the faster, caching +version.) The system holds five 3-1/2-inch hard-disk drives, but can be +modified to hold six.&M; + +PC Week could not get Novell's NetWare 386 driver for IBM's SCSI controller, +and the DX.NLM exerciser, developed by NetFrame and PC Week, to work together +properly.&P; Technical staff from Novell and NetFrame were unable to resolve +the problem in time for PC Week to test the Model 80-A31's disk I/O.&M; + +Aside from the DX.NLM test, NetWare 386 ran flawlessly on the Model 80-A31.&M; + +IBM can be reached in Armonk, N.Y., at (800) 426-2468.&M; + +---------- + +Buyers say .&P; . .&M; + +A relatively low price was the main reason Michael Bourne, systems analyst +for Ohio State University in Columbus, decided to go with the Model 80-A31.&O; +"We get 40 percent off," he said.&P; "I'd like to have a Compaq Systempro, but +pricewise, you can't beat the IBM."&M; + +While the Systempro has been designed exclusively as a high-performance +server, the Model 80-A31 is more flexible, Bourne noted.&P; "The Model 80-A31 +can be a low-end CAD machine, a small network server, an excellent +stand-alone OS/2 workstation," he said.&P; "You have to have a pretty big +network to require a Systempro."&M; + +He currently uses the Model 80-A31 as a stand-alone, but it will eventually +be moved to the university's LAN services division to become a file server.&O; +"We wanted something to simultaneously support DOS, AIX and OS/2 with +reasonable speed," he said.&M; + +Bourne has had some problems with the system.&P; "I had 2M bytes of memory die +on me, I had the VGA go flakey, I had two or three system board problems, and +I had a keyboard go out," he said.&P; "I haven't seen these problems repeated +on other IBM models, but IBM is always good about repairing on warranty."&M; +</TEXT> +</DOC> +<DOC> +<DOCNO> ZF108-522-878 </DOCNO> +<DOCID>08 522 878.&M;</DOCID> + +<JOURNAL>PC Week June 4 1990 v7 n22 p87(1) +* Full Text COPYRIGHT Ziff-Davis Publishing Co. 1990.&M; +</JOURNAL> +<TITLE>NLMs test how well super servers take stress. (Network Loadable +Modules) +</TITLE> +<AUTHOR>Gerber, Barry.&M; +</AUTHOR> +<SUMMARY>PC Week worked with NetFrame Inc to develop and test two NetWare +386 NetWare Loadable Modules (NLM) that were able to stress a +NetWare server as though a number of workstations were attached to +it.&P; This would serve as a server stress test, needed in order to +evaluate super server products.&P; Each NLM's parameters can be +adjusted and set before the test is run.&P; In testing, one disk +controller and one local area network adapter were used.&P; The +NetWare 386's Monitor NLM was loaded to verify server central +processing unit utilization.&P; In the end, however, measuring such +performance requires real-time testing.&M; +</SUMMARY> +<DESCRIPT> +Topic: File Servers +Testing +Evaluation +Performance Measurement +Modules.&M; +</DESCRIPT> +<TEXT> + +By Barry Gerber&M; + +Traditionally, server stress tests have been done by attaching a number of +workstations to the server through one or more network adapters.&P; The more +workstations attached, the greater the stress on the server.&P; PC Week's +Applications Test Suite also takes this approach to server testing.&M; + +However, the I/O capacity of super servers like those tested for this review +is so high that an unrealistically large number of workstations would be +needed to stress them.&P; Thus, PC Week worked with NetFrame Inc. to develop +and test two NetWare 386 NetWare Loadable Modules (NLMs) that can stress a +NetWare server without any attached workstations.&M; + +One of these NLMs, DX.NLM, tests the I/O capacity of disk controllers and +drives within a given NetWare 386 server, while the other NLM, NX.NLM, tests +the I/O capacity of network adapters within the server.&M; + +DX.NLM can be selectively run against any combination of disk controllers and +attached disks.&P; Similarly, NX.NLM can be used to test network I/O on any set +of LAN adapters.&P; The two NLMs can be run separately or simultaneously.&M; + +Picking Parameters&M; + +Each of the NLMs has adjustable parameters that can be set before the test is +run.&P; For the disk tests, these include access pattern (random or +sequential), test type (read or write), number of kilobytes per I/O (1 to +60), and number of I/Os concurrently queued per disk drive (1 to 64).&P; The +LAN I/O NLM allows for the setting of parameters for fixed or random packet +size, bytes per packet when the fixed packet-size option is selected (64 to +1,472 bytes), and number of I/Os concurrently queued per LAN adapter (1 to +32).&P; Parameters also can specify whether the test for each adapter should +involve only transmission, only reception or both transmission and reception.&M; + +In PC Week's tests, one disk controller and one LAN adapter were used.&P; The +following DX.NLM and NX.NLM parameters were used:&M; + +DX.NLM&M; + +Access pattern: Sequential&M; + +Test type: Read&M; + +Number of kilobytes per I/O: 32&M; + +Number of concurrent I/Os queued&M; + +per drive: 3&M; + +NX.NLM&M; + +Packet length: Fixed&M; + +Bytes per packet: 1,154&M; + +Number of concurrent sends to queue&M; + +per device: 3&M; + +Device activity: Transmit&M; + +During all tests, NetWare 386's Monitor NLM was loaded to verify server CPU +utilization as reported by DX.NLM and NX.NLM.&P; The Ethernet adapter was not +connected to a network.&P; Instead it was outfitted with a standard Ethernet T +connector terminated on each side with a standard, 50-ohm terminator.&M; + +The server-disk and LAN performance were determined by calculating the +percent of the server's CPU capacity required for 1M byte per second of +either disk or LAN I/O.&P; This value is obtained by dividing the I/O rate +obtained in a test (in megabytes per second) into one and multiplying server +utilization for the test by the result of the division.&M; + +To some extent, total server capacity can be estimated by summing the CPU +utilization required for each 1M-byte increment of disk or LAN I/O.&P; However, +this should be done with some caution, since capacity will be limited by the +actual number of disk controllers or LAN adapters that can be plugged into a +given server's bus and by the number of disk drives that can be attached to a +disk controller.&M; + +Furthermore, it should not be assumed that disk and LAN I/O will be the only +processes placing stress on server capacity.&P; Print and communications +services as well as other applications running on the server may also +contribute to this load.&M; + +Finally, none of the results obtained with these test NLMs should be taken as +an indicator of server performance with particular applications and data.&O; +Measuring such performance requires real-world, real-time testing.&M; + +Binary and source code for DX.NLM and NX.NLM, along with documentation for +both, can be downloaded from the PC Week Extra! on-line service.&P; (For +instructions on how to access PC Week Extra!, see Page 173.)&O; +</TEXT> +</DOC>
Change 1 of 1 Show Entire File test/​text.txt Stacked
 
 
 
1
@@ -0,0 +1,1 @@
+I gaze out the window of my third-floor apartment you so frequently visited and peer down to the dumpster with the same eyes I used to watch you with. It appears my letter fire has started a dumpster fire. The flames spread like my passion for you once did — searingly hot and incredibly dangerous to everyone in the immediate area.
Change 1 of 1 Show Entire File test/​tinytrecdoc.out Stacked
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
@@ -0,0 +1,19 @@
+ Magic 41 + CRC32 42 + Version 1 + Type 0 + Length 85 + N 2 + R 3 + Offset 32 + + resource0 42 + resource1 11 + resource2 1 +:hello worldA: + + resource0 42 + resource1 48 + resource2 1 +:where do you think you are going shouted captainB: +
Change 1 of 1 Show Entire File test/​tinytrecdoc.txt Stacked
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
@@ -0,0 +1,8 @@
+<DOC> +<DOCNO>A</DOCNO> +hello world! +</DOC> +<DOC> +<DOCNO>B</DOCNO> +"Where do you think you are going?", shouted Captain. +</DOC>
Show Entire File test/​trecdoc.txt Stacked
This file's diff was not loaded because this changeset is very large. Load changes
Change 1 of 1 Show Entire File test/​two.txt Stacked
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
@@ -0,0 +1,230 @@
+ +<DOC> +<DOCNO> AP890101-0001 </DOCNO> +<FILEID>AP-NR-01-01-89 2358EST</FILEID> +<FIRST>r a PM-APArts:60sMovies 01-01 1073</FIRST> +<SECOND>PM-AP Arts: 60s Movies,1100</SECOND> +<HEAD>You Don't Need a Weatherman To Know '60s Films Are Here</HEAD> +<HEAD>Eds: Also in Monday AMs report.</HEAD> +<BYLINE>By HILLEL ITALIE</BYLINE> +<BYLINE>Associated Press Writer</BYLINE> +<DATELINE>NEW YORK (AP) </DATELINE> +<TEXT> + The celluloid torch has been passed to a new +generation: filmmakers who grew up in the 1960s. + ``Platoon,'' ``Running on Empty,'' ``1969'' and ``Mississippi +Burning'' are among the movies released in the past two years from +writers and directors who brought their own experiences of that +turbulent decade to the screen. + ``The contemporaries of the '60s are some of the filmmakers of +the '80s. It's natural,'' said Robert Friedman, the senior vice +president of worldwide advertising and publicity at Warner Bros. + Chris Gerolmo, who wrote the screenplay for ``Mississippi +Burning,'' noted that the sheer passage of time has allowed him and +others to express their feelings about the decade. + ``Distance is important,'' he said. ``I believe there's a lot of +thinking about that time and America in general.'' + The Vietnam War was a defining experience for many people in the +'60s, shattering the consensus that the United States had a right, +even a moral duty to intervene in conflicts around the world. Even +today, politicians talk disparagingly of the ``Vietnam Syndrome'' in +referring to the country's reluctance to use military force to +settle disputes. + ``I think future historians will talk about Vietnam as one of the +near destructions of American society,'' said Urie Brofenbrenner, a +professor of sociology at Cornell University. + ``In World War II, we knew what we were fighting for, but not in +Vietnam.'' + ``Full Metal Jacket,'' ``Gardens of Stone,'' ``Platoon,'' ``Good +Morning, Vietnam,'' ``Hamburger Hill'' and ``Bat 21'' all use the +war as a dramatic backdrop and show how it shaped characters' lives. + The Vietnam War has remained an emotional issue in the United +States as veterans have struggled to come to terms with their +experiences. One was Oliver Stone, who wrote and directed the +Academy Award-winning ``Platoon.'' + ``I saw `Platoon' eight times,'' said John J. Anderson, a Palm +Beach County sheriff's lieutenant who served in Vietnam in 1966-67. +``I cried the first time I saw it ... and the third and fourth +times. `Platoon' helped me understand.'' + Stone, who based ``Platoon'' on some of his own experiences as a +grunt, said the film brought up issues that had yet to be resolved. + ``People are responding to the fact that it's real. They're +curious about the war in Vietnam after 20 years,'' he said. + While Southeast Asia was the pivotal foreign issue in American +society of the '60s, civil rights was the major domestic issue. The +civil rights movement reached its peak in the ``Freedom Summer'' of +1964, when large groups of volunteers headed South to help register +black voters. + In ``Five Corners,'' a movie about the summer of '64 in the Bronx +starring Jodie Foster, her friend, played by Tim Robbins, leaves his +neighborhood to volunteer in the South after seeing the Rev. Martin +Luther King Jr. on television. + Alan Parker's ``Mississippi Burning'' focuses on an incident that +clouded the Mississippi Summer Project _ when 1,000 young volunteers +from mainstream America swept into the state to help register black +voters. The movie is a fictionalized account of the disappearance +and slaying of three civil rights workers: Michael Schwerner, Andrew +Goodman and James Chaney. + They were reported missing on June 21, several hours after being +stopped for speeding near Philadelphia, Miss. After a nationally +publicized search, their bodies were discovered Aug. 4 on a farm +just outside the town. + One of those who recalled the incident was Gerolmo, a student in +the New York public school system at the time. The screenwriter said +the incident had a powerful effect on his way of thinking. + ``It was the first time I ever considered that our country could +be wrong,'' Gerolmo said. + The film stars Willem Dafoe and Gene Hackman star as FBI agents +who try to find the bodies of the missing workers and overcome +fierce local resistance to solve the crime. + In a more offbeat and outrageous way, John Waters' ``Hairspray'' +discusses integration in Baltimore in 1963 when a group of +teen-agers tries to break down the barriers of a segregated dance +show. + Also set in Baltimore is Barry Levinson's ``Tin Men,'' starring +Danny DeVito and Richard Dreyfuss as two slick aluminum siding +salesmen in the early '60s. The movie mirrored a squarely +middle-class culture, one that was not caught up in sex, politics +and drugs. + Instead of focusing on a well-known historic event, +writer-director Ernest Thompson takes a more personal approach in +``1969.'' Robert Downey Jr. and Keifer Sutherland star as college +students who battle their parents and each other over sex, drugs and +the Vietnam War. + ``I was 19 in 1969. It was a fulcrum time for me,'' said +Thompson, who was a student at American University at the time. ``I +think it was just the right time in my growth as an artist and as a +man to try to write about something that happened in my youth.'' + ``Running on Empty'' takes place in the '80s but the '60s are +much in evidence. Judd Hirsch and Christine Lahti play anti-war +activists who sabatoged a napalm plant in 1970 and are forced to +live underground with their two children. + Naomi Foner, who wrote ``Running on Empty'' and also served as +the film's executive producer, grew up in Brooklyn, N.Y., the +daughter of sociologists. Her own experiences made Foner well +qualified to give ``Running on Empty'' its strong political theme. + ``I lived through that time and I've wanted to find the right way +to present it to this generation,'' said Foner, a member of the +radical Students for a Democratic Society while attending graduate +school at Columbia University. + Foner, who also taught in Harlem's Head Start program and helped +register voters in South Carolina, said many young people are +curious about what happened in the '60s. + ``A lot of them think it was an exciting time that they were +sorry to have missed,'' she said. + Brofenbrenner said movies are a good indicator of the concerns of +the general public: ``The principle impact of the media is that they +reflect the values of the larger society. + ``Film is a very powerful art medium,'' he said. ``I believe it +very accurately reflects not only the prevailing but the coming +trends. It's because film writers, like other writers, are +perceptive people. They get the message of what's going on.'' +</TEXT> +</DOC> +<DOC> +<DOCNO> AP890101-0002 </DOCNO> +<FILEID>AP-NR-01-01-89 2359EST</FILEID> +<FIRST>r a PM-FutureFactory 01-01 0872</FIRST> +<SECOND>PM-Future Factory,0897</SECOND> +<HEAD>University Erects A Factory Of The Future</HEAD> +<HEAD>Eds: Also in Monday AMs report.</HEAD> +<BYLINE>By DONNA BRYSON</BYLINE> +<BYLINE>Associated Press Writer</BYLINE> +<DATELINE>ROLLA, Mo. (AP) </DATELINE> +<TEXT> + For students working in a miniature factory at +the University of Missouri-Rolla, the future of American business is +now. + ``When our students go into industry, they will have +state-of-the-art knowledge'' that will affect decisions about +expanding the role of robots and automated machines in the +workplace, said Sema Alptekin, designer of the futuristic business +laboratory. + Ms. Alptekin is a mechanical and industrial engineer who directs +the university's computer-integrated manufacturing and packaging +laboratory. + The lab, established two years ago at a cost of $120,000, is the +only project of its kind in the state, and one of the more advanced +such programs in the country. + Tom Akas of the Society of Manufacturing Engineers said Ms. +Alptekin has created ``a model for similar laboratories ... Students +can see how the factory of the future operates.'' + The Society of Manufacturing Engineers has recognized Ms. +Alptekin as an Outstanding Young Manufacturing Engineer. + In her lab, a conveyer belt at waist level carries parts from +storage shelves to a robot that dominates the room like a silent, +4-foot metal sentry. The robot is a V-shaped, jointed arm on a +pivoting base. The arm can be fitted to allow it to grasp, lift and +turn objects of differing sizes to suit a variety of tasks. The +robot then swivels to place parts in position at the automated lathe +or milling machine. + The entire factory would fit inside a basketball half-court. The +machinery is of the type used to make small parts in metal cutting +shops, Ms. Alptekin said. + Students learn to program a computer and automated machines +linked to it in a complete manufacturing operation _ retrieving raw +materials from the storage shelf unit, which can be programmed to +supply appropriate parts from its inventory; lifting and placing the +parts in position with the robot's arm; and shaping parts into +finished products at the lathe. + Students using the lab have designed and produced engraved key +chains and intricate mazes and puzzles out of plastic and plexiglass. + But the product is less important than the process. Ms. Alptekin +said former students have entered fields as disparate as aviation +and financial consulting. + In the factory of the future, according to the university's +model, human chatter will be replaced by the click-clack of machines. + ``Ours is quite unique, it's totally integrated,'' Ms. Alptekin +said, referring to programming that allows components such as the +automated storage system to ``talk to'' the computer that controls +the manufacturing process, informing it about the availability of +parts. + Ms. Alptekin said it is this aspect that sets the lab apart even +from industries where robots and automated machines have become +almost commonplace. There, she said, robots perform specific tasks +in ``islands of automation,'' but human workers remain responsible +for keeping inventory and coordinating different aspects of the +production line. + Ms. Alptekin said automation is widespread in the automobile +industry, but she would like to see it expanded. + ``It should be used in any industry,'' she said. ``It's difficult +to justify economically, but if quality is the concern, then it can +be easily justified. + ``At least our students will be aware of the enhancements and +developments in this area, and when they make decisions as engineers +and managers, they will take this into consideration,'' she said. + A master's candidate analyzed the existing system with an eye to +altering it to create a new product. Ms. Alptekin said the student +identified what new production components were necessary and +determined how they could be integrated into the system. + A doctoral candidate is developing computer software that will +allow the system to diagnose itself. The system could be programmed +to recognize when a component is not working, and then decide to +switch to manufacturing a product that does not involve the disabled +component, much as human managers would react. + Graduate student Bob Borchelt said the equipment he is using is +on the cutting edge of manufacturing. + ``Truthfully, I can't say that I was expecting this level of +expertise,'' he said. ``I know that our lab is one of the best of +its kind in an educational institution.'' + The lab, like the entire engineering management program, calls on +computing, mechanical, electrical and chemical skills _ and teamwork. + ``For us, for managers, the entire system is important,'' Ms. +Alptekin said. + Ms. Alptekin said she is continually fine-tuning the lab. This +past year, the original robot was replaced with one able to perform +more tasks. She now is considering purchasing a robot with vision +sophisticated enough to act as an inspector. + The university spent $30,000 to upgrade lab equipment in 1987. An +estimated $60,000 to $70,000 was earmarked in 1988. + International Business Machines Corp. recently pledged $1.2 +million in computer equipment and software to the university as part +of an IBM program to aid 48 college-based robotics labs across the +country. + ``Studies show that there's a severe national shortage of +instructional materials in this growing and critical area,'' said +Andy Russell, a spokesman for IBM. ``IBM will benefit because we +will be helping to train the (computer-integrated manufacturing) +workers and decision makers of today and tomorrow.'' +</TEXT> +</DOC>
Change 1 of 1 Show Entire File tok.c Stacked
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
@@ -0,0 +1,123 @@
+#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <kak/kbuffer.h> +#include <kak/klist.h> +#include "tokenizer.h" +#include "tok.h" + +Tokheader *_newtokheader(E_doctype type) +{ + Tokheader *h; + h = (Tokheader *)malloc(sizeof(Tokheader)); + h->block[MAGIC] = 41; + h->block[CRC32] = 42; + h->block[VERSION] = 1; + h->block[TYPE] = type; + h->block[LENGTH] = 0; + h->block[NUMDOCS] = 0; + h->block[NUMRES] = 0; + h->block[OFFSET] = 32; + if (type == TREC_ADHOC) + h->block[NUMRES] = 3; + return h; +} + +Tokdocheader *_newtokdocheader(E_doctype type) +{ + Tokdocheader *h; + h = (Tokdocheader *)malloc(sizeof(Tokdocheader)); + h->block[RES0CRC32] = 0; + h->block[RES1DOCLEN] = 0; + h->block[RES2IDLEN] = 0; + return h; +} + +Tokdoc *newtokdoc(E_doctype type) +{ + Tokdoc *d; + d = (Tokdoc *)malloc(sizeof(Tokdoc)); + d->header = _newtokdocheader(type); + d->buf = newbuffer(10*KB, CHARBUF); + return d; +} + +Tok *newtok(E_doctype type) +{ + Tok *t; + t = (Tok *)malloc(sizeof(Tok)); + t->header = _newtokheader(type); + t->list = NULL; + return t; +} + +int writetok(Tok *tok, FILE *fp) +{ + /* TODO: test n */ + int i, n; + Node *p; + Tokdoc *tokdoc; + n = fwrite(tok->header->block, 4, 8, fp); + for (p = tok->list; p != NULL; p = p->next) { + tokdoc = p->data; + n = fwrite(tokdoc->header->block, + 4, tok->header->block[NUMRES], fp); + n = fwrite(tokdoc->buf->b, 1, tokdoc->buf->i, fp); + } + return n; +} + +int readtok(Tok *tok, FILE *fp) +{ + int i, j, n, docsize; + Tokdoc *tokdoc; + n = fread(tok->header->block, 4, 8, fp); + for (i = 0; i < tok->header->block[NUMDOCS]; i++) { + tokdoc = newtokdoc(TREC_ADHOC); + n = fread(tokdoc->header->block, + 4, tok->header->block[NUMRES], fp); + docsize = 0; + for (j = 1; j < tok->header->block[NUMRES]; j++) + docsize += tokdoc->header->block[j]; + if (docsize > tokdoc->buf->n) + tokdoc->buf = resizebuffer(tokdoc->buf, docsize); + n = fread(tokdoc->buf->b, 1, docsize, fp); + shiftpointer(tokdoc->buf, docsize); + tok->list = addfront(tok->list, newnode((void *)tokdoc)); + } + return n; +} + +void _printtokheader(Tokheader *h) +{ + printf("%16s %d\n", "Magic", h->block[MAGIC]); + printf("%16s %d\n", "CRC32", h->block[CRC32]); + printf("%16s %d\n", "Version", h->block[VERSION]); + printf("%16s %d\n", "Type", h->block[TYPE]); + printf("%16s %d\n", "Length", h->block[LENGTH]); + printf("%16s %d\n", "N", h->block[NUMDOCS]); + printf("%16s %d\n", "R", h->block[NUMRES]); + printf("%16s %d\n", "Offset", h->block[OFFSET]); +} + +void _printtokdocheader(Tokdocheader *h, int numres) +{ + int i, size; + for (i = 0; i < numres; i++) + printf("%16s%d %d\n", "resource", i, h->block[i]); +} + +void printtok(Tok *tok) +{ + Node *p; + Tokdoc *tokdoc; + _printtokheader(tok->header); + printf("\n"); + for (p = tok->list; p != NULL; p = p->next) { + tokdoc = p->data; + _printtokdocheader(tokdoc->header, tok->header->block[NUMRES]); + ((char *)tokdoc->buf->b)[tokdoc->buf->i] = '\0'; + printf(":%s:\n", tokdoc->buf->b); + printf("\n"); + } +}
Change 1 of 1 Show Entire File tok.h Stacked
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
@@ -0,0 +1,41 @@
+#define TOK_HEADER_BLOCKS 8 +#define TOKDOC_HEADER_BLOCKS 12 + +typedef struct Tok Tok; +typedef struct Tokheader Tokheader; +typedef struct Tokdocheader Tokdocheader; +typedef struct Tokdoc Tokdoc; +typedef enum {TREC_ADHOC, TREC_WEB, TREC_TWITTER} E_doctype; +typedef enum {MAGIC, CRC32, VERSION, TYPE, LENGTH, NUMDOCS, NUMRES, OFFSET} E_tokheader; +typedef enum {RES0CRC32, RES1DOCLEN, RES2IDLEN, + RES3, RES4, RES5, + RES6, RES7, RES8, RES9, RES10} E_tokdocheader; + + +struct Tokheader { + int block[TOK_HEADER_BLOCKS]; +}; + +struct Tokdocheader { + int block[TOKDOC_HEADER_BLOCKS]; +}; + +struct Tokdoc { + Tokdocheader *header; + Buffer *buf; +}; + +struct Tok { + Tokheader *header; + Node *list; +}; + +Tokheader *_newtokheader(E_doctype); +Tokdocheader *_newtokdocheader(E_doctype); +Tokdoc *newtokdoc(E_doctype); +Tok *newtok(E_doctype); +int writetok(Tok*, FILE*); +int readtok(Tok*, FILE*); +void _printtokheader(Tokheader*); +void _printtokdocheader(Tokdocheader*, int); +void printtok(Tok*);
Change 1 of 1 Show Entire File tokenizer.c Stacked
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
@@ -0,0 +1,125 @@
+#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "tokenizer.h" + +void reset(Stack *stk, int n) +{ + stk->p = -1; + stk->n = n; +} + +void push(Stack *stk, char *c) +{ + int i; + if (stk->p == (stk->n - 1)) { + for (i = 0; i < stk->p; i++) + stk->s[i] = stk->s[i+1]; + stk->p--; + } + stk->s[++stk->p] = *c; +} + +char pop(Stack *stk) +{ + if (stk->p >= 0) { + stk->p--; + return stk->s[stk->p + 1]; + } + return 0; +} + +void printstack(Stack *stk) +{ + int i; + for (i = 0; i <= stk->p; i++) + printf("%d:%d ", i, stk->s[i]); + printf("\n"); +} + +Tokenizer *newtokenizer(char *delimiters, int casechange, Stack *mem) +{ + Tokenizer *t; + int i; + char *s; + t = (Tokenizer *)malloc(sizeof(Tokenizer)); + strcpy(t->delimiters, delimiters); + for (i = 0; i < NDELIMS; i++) + t->dtab[i] = 0; + s = t->delimiters; + for (; *s; s++) + t->dtab[(int)(*s)] = 1; + t->mem = mem; + t->casechange = casechange; + return t; +} + +Token *newtoken(int n) +{ + Token *t; + t = (Token *)malloc(sizeof(Token)); + t->str = (char *)malloc(n); + t->type = TERM; + t->len = 0; + return t; +} + +int gettoken(Token *tok, FILE *fp, Tokenizer *t) +{ + int n; + char c, *s, c_[2]; + int len, bytes; + bytes = 0; + len = 0; + tok->type = TERM; + s = tok->str; + while ((n = fread(&c, 1, 1, fp)) > 0) { + bytes += n; + if (t->dtab[(int)c] == 1) { + push(t->mem, &c); + if (len == 0) + continue; + *s = '\0'; + if (c == '>') { + pop(t->mem); + c_[1] = pop(t->mem); + c_[0] = pop(t->mem); + if (c_[1] == '<') + tok->type = OTAG; + else if (c_[1] == '/' && c_[0] == '<') + tok->type = CTAG; + } + tok->len = len; + return bytes; + } + if (t->casechange == LOWERCASE && c >= 65 && c <= 90) + c += 32; + *s = c; + s++; + len++; + } + return n; +} + +int tokenize(char *dest, char *src, int size, Tokenizer *t) +{ + int i, n; + static int len = 0; + n = 0; + for (i = 0; i < size; i++, src++) { + if (t->dtab[(int)(*src)] == 1) { + if (len > 0) { + *dest = ' '; + dest++; + len = 0; + n++; + } + continue; + } + *dest = *src; + dest++; + len++; + n++; + } + return n; +}
Change 1 of 1 Show Entire File tokenizer.h Stacked
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
@@ -0,0 +1,38 @@
+#define NDELIMS 128 +#define KB 1024 +#define MB 1048576 +#define GB 1073741824 + +enum {KEEPCASE, LOWERCASE}; +enum {TERM, OTAG, CTAG}; +typedef struct Tokenizer Tokenizer; +typedef struct Token Token; +typedef struct Stack Stack; + +struct Stack { + char s[KB]; + int p; + int n; +}; + +struct Tokenizer { + char delimiters[NDELIMS]; + int dtab[NDELIMS]; + Stack *mem; + int casechange; +}; + +struct Token { + char *str; + int type; + int len; +}; + +void reset(Stack*, int); +void push(Stack*, char*); +char pop(Stack*); +void printstack(Stack*); +Tokenizer *newtokenizer(char*, int, Stack*); +Token *newtoken(int); +int gettoken(Token*, FILE*, Tokenizer*); +int tokenize(char*, char*, int, Tokenizer*);