"Fossies" - the Fresh Open Source Software Archive 
1 /************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1996,1997 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* */
34 /* Author: Richard Caley (rjc@cstr.ed.ac.uk) */
35 /* -------------------------------------------------------------------- */
36 /* Code to reas SOLE format XML as utterances. */
37 /* */
38 /*************************************************************************/
39
40 #include <cstdlib>
41 #include <cstdio>
42 #include "EST_THash.h"
43 #include "EST_error.h"
44 #include "solexml.h"
45 #include "rxp/XML_Parser.h"
46
47 static EST_Regex simpleIDRegex(".*#id(w\\([0-9]+\\))");
48 static EST_Regex rangeIDRegex(".*#id(w\\([0-9]+\\)).*id(w\\([0-9]+\\))");
49
50 class Parse_State
51 {
52 public:
53 int depth;
54 EST_String relName;
55 EST_Utterance *utt;
56 EST_Relation *rel;
57 EST_Item *parent;
58 EST_Item *current;
59
60 EST_THash<EST_String, EST_Item_Content *> contents;
61
62 Parse_State() : contents(100) {}
63 };
64
65 class Sole_Parser_Class : public XML_Parser_Class
66 {
67 protected:
68 virtual void document_open(XML_Parser_Class &c,
69 XML_Parser &p,
70 void *data);
71 virtual void document_close(XML_Parser_Class &c,
72 XML_Parser &p,
73 void *data);
74
75 virtual void element_open(XML_Parser_Class &c,
76 XML_Parser &p,
77 void *data,
78 const char *name,
79 XML_Attribute_List &attributes);
80 virtual void element(XML_Parser_Class &c,
81 XML_Parser &p,
82 void *data,
83 const char *name,
84 XML_Attribute_List &attributes);
85 virtual void element_close(XML_Parser_Class &c,
86 XML_Parser &p,
87 void *data,
88 const char *name);
89
90 virtual void pcdata(XML_Parser_Class &c,
91 XML_Parser &p,
92 void *data,
93 const char *chars);
94 virtual void cdata(XML_Parser_Class &c,
95 XML_Parser &p,
96 void *data,
97 const char *chars);
98
99 virtual void processing(XML_Parser_Class &c,
100 XML_Parser &p,
101 void *data,
102 const char *instruction);
103 virtual void error(XML_Parser_Class &c,
104 XML_Parser &p,
105 void *data);
106 };
107
108 static void print_attributes(XML_Attribute_List &attributes)
109 {
110 XML_Attribute_List::Entries them;
111
112 for(them.begin(attributes); them ; them++)
113 printf(" %s='%s'",
114 (const char *)them->k,
115 (const char *)them->v);
116 }
117
118 EST_read_status solexml_read(FILE *file,
119 const EST_String &name,
120 EST_Utterance &u,
121 int &max_id)
122 {
123 (void)max_id;
124 (void)print_attributes; // just to shut -Wall up.
125 Sole_Parser_Class pclass;
126 Parse_State state;
127
128 u.clear();
129
130 state.utt=&u;
131
132 XML_Parser *parser = pclass.make_parser(file, name, &state);
133 parser->track_context(TRUE);
134
135 CATCH_ERRORS()
136 return read_format_error;
137
138 parser->go();
139
140 END_CATCH_ERRORS();
141
142 return read_ok;
143 }
144
145 static void ensure_relation(Parse_State *state)
146 {
147 if (state->rel==NULL)
148 {
149 state->rel = state->utt->create_relation(state->relName);
150 }
151 }
152
153 static EST_Item_Content *get_contents(Parse_State *state, EST_String id)
154 {
155 EST_Item_Content *c = state->contents.val(id);
156 if (c==NULL)
157 {
158 c = new EST_Item_Content();
159 state->contents.add_item(id, c);
160 }
161
162 return c;
163 }
164
165 static void extract_ids(XML_Attribute_List &attributes,
166 EST_TList<EST_String> &ids)
167 {
168 EST_String val;
169 static int count;
170 if (attributes.present("id"))
171 {
172 val = attributes.val("id");
173 ids.append(val);
174 }
175 else if (attributes.present("href"))
176 {
177 val = attributes.val("href");
178 int starts[EST_Regex_max_subexpressions];
179 int ends[EST_Regex_max_subexpressions];
180
181 if (val.matches(simpleIDRegex, 0, starts, ends))
182 {
183 EST_String n = val.at(starts[1], ends[1]-starts[1]);
184
185 ids.append("w" + n);
186 }
187 else if (val.matches(rangeIDRegex, 0, starts, ends))
188 {
189 int n1 = atoi(val.at(starts[1], ends[1]-starts[1]));
190 int n2 = atoi(val.at(starts[2], ends[2]-starts[2]));
191
192 for(int i=n1; i<=n2; i++)
193 {
194 char buf[100];
195 sprintf(buf, "w%d", i);
196
197 ids.append(buf);
198 }
199
200 }
201 else
202 EST_warning("element with bad ID or HREF '%s'", (const char *)val);
203 }
204 else
205 {
206 char buf[100];
207 sprintf(buf, "n%d", ++count);
208
209 ids.append(buf);
210 return;
211 }
212
213 }
214
215
216 /** Now we define the callbacks.
217 */
218
219 void Sole_Parser_Class::document_open(XML_Parser_Class &c,
220 XML_Parser &p,
221 void *data)
222 {
223 (void)c; (void)p;
224 Parse_State *state = (Parse_State *)data;
225
226 state->depth=1;
227 state->rel=NULL;
228 state->parent=NULL;
229 state->current=NULL;
230 }
231
232 void Sole_Parser_Class::document_close(XML_Parser_Class &c,
233 XML_Parser &p,
234 void *data)
235 {
236 (void)c; (void)p; (void)data;
237 }
238
239
240 void Sole_Parser_Class::element_open(XML_Parser_Class &c,
241 XML_Parser &p,
242 void *data,
243 const char *name,
244 XML_Attribute_List &attributes)
245 {
246 (void)c; (void)p; (void)attributes;
247 Parse_State *state = (Parse_State *)data;
248
249 state->depth++;
250
251 if (strcmp(name, "solexml")==0)
252 {
253 state->relName=attributes.val("relation");
254 printf("start solexml relation=%s\n", (const char *)state->relName);
255 return;
256 }
257 else if (strcmp(name, "text-elem")==0)
258 {
259 // ignore these
260 return;
261 }
262
263 ensure_relation(state);
264
265 if (strcmp(name, "anaphora-elem")==0
266 || strcmp(name, "wordlist")==0
267 || strcmp(name, "w")==0)
268 {
269 EST_TList<EST_String> ids;
270 extract_ids(attributes, ids);
271
272 EST_Litem *idp = ids.head();
273 bool first=TRUE;
274 for(; idp!= NULL; idp = idp->next())
275 {
276 EST_String id = ids(idp);
277 if (id==EST_String::Empty)
278 XML_Parser_Class::error(c, p, data, EST_String("Element With No Id"));
279
280 if (first)
281 first=FALSE;
282 else
283 {
284 state->current = state->parent;
285 state->parent=iup(state->parent);
286 }
287
288
289 EST_Item_Content *cont = get_contents(state, id);
290
291 cont->set_name(id);
292
293 XML_Attribute_List::Entries them;
294 for(them.begin(attributes); them ; them++)
295 {
296 EST_String k = them->k;
297 EST_String v = them->v;
298 cont->f.set(k,v);
299 }
300
301 EST_Item *item;
302
303 if (state->current == NULL)
304 if (state->parent == NULL)
305 item = state->rel->append();
306 else
307 item = state->parent->insert_below();
308 else
309 item = state->current->insert_after();
310
311 item->set_contents(cont);
312
313 state->current=NULL;
314 state->parent=item;
315 }
316 }
317 else
318 EST_warning("SOLE XML Parser: unknown element %s", name);
319 }
320
321
322 void Sole_Parser_Class::element(XML_Parser_Class &c,
323 XML_Parser &p,
324 void *data,
325 const char *name,
326 XML_Attribute_List &attributes)
327 {
328 (void)c; (void)p; (void)attributes;
329 Parse_State *state = (Parse_State *)data;
330
331 if (strcmp(name, "language")==0)
332 {
333 state->utt->f.set("language", attributes.val("name"));
334 return;
335 }
336
337 element_open(c, p, data, name, attributes);
338 element_close(c, p, data, name);
339 }
340
341
342 void Sole_Parser_Class::element_close(XML_Parser_Class &c,
343 XML_Parser &p,
344 void *data,
345 const char *name)
346 {
347 (void)c; (void)p; (void)name;
348 Parse_State *state = (Parse_State *)data;
349
350 if (strcmp(name, "anaphora-elem")==0
351 || strcmp(name, "wordlist")==0
352 || strcmp(name, "w")==0)
353 {
354 state->depth--;
355 state->current = state->parent;
356 state->parent=iup(state->parent);
357 }
358 }
359
360
361 void Sole_Parser_Class::pcdata(XML_Parser_Class &c,
362 XML_Parser &p,
363 void *data,
364 const char *chars)
365 {
366 (void)c;
367
368 Parse_State *state = (Parse_State *)data;
369
370 if (state->parent != NULL && p.context(0) == "w")
371 state->parent->set(EST_String("word"), chars);
372
373 // printf("SOLE XML Parser [pcdata[%s]] %d\n", chars, state->depth);
374 }
375
376
377 void Sole_Parser_Class::cdata(XML_Parser_Class &c,
378 XML_Parser &p,
379 void *data,
380 const char *chars)
381 {
382 (void)c; (void)p; (void)data; (void)chars;
383 // Parse_State *state = (Parse_State *)data;
384
385 // printf("SOLE XML Parser [cdata[%s]] %d\n", chars, state->depth);
386 }
387
388
389 void Sole_Parser_Class::processing(XML_Parser_Class &c,
390 XML_Parser &p,
391 void *data,
392 const char *instruction)
393 {
394 (void)c; (void)p;
395 Parse_State *state = (Parse_State *)data;
396
397 printf("SOLE XML Parser [proc[%s]] %d\n", instruction, state->depth);
398 }
399
400
401 void Sole_Parser_Class::error(XML_Parser_Class &c,
402 XML_Parser &p,
403 void *data)
404 {
405 (void)c; (void)p; (void)data;
406 // Parse_State *state = (Parse_State *)data;
407
408 EST_error("SOLE XML Parser %s", get_error(p));
409
410 est_error_throw();
411 }