libdms5
view src/tools/src/dsXMLReader.cxx @ 26:559fd6c22e81
mingw support
| author | dms@camomile |
|---|---|
| date | Sun Jan 27 23:43:20 2008 +0300 (17 months ago) |
| parents | |
| children |
line source
1 /*
2 *
3 * $Id: dsXMLReader.C,v 1.6 2004/10/19 09:40:08 dsamersoff Exp $
4 *
5 */
7 #include <dsXMLReader.h>
9 using namespace std;
10 using namespace libdms5;
12 dsXMLReader::dsXMLReader(const char *encoding) {
13 _byte_index = 0;
14 _line_num = 1;
15 _col_num = 1;
17 _encoding = (encoding) ? dsStrdup(encoding) : 0;
18 }
20 dsXMLReader::~dsXMLReader() {
21 while (! _st.empty() ) {
22 char *s = _st.top();
23 _st.pop();
24 delete s;
25 }
27 if (_encoding)
28 delete _encoding;
29 }
31 // Handlers
32 void dsXMLReader::StartElementHandler(const char *name, dsHashTable *atts) {
33 return;
34 }
36 void dsXMLReader::EndElementHandler(const char *name) {
37 return;
38 }
40 void dsXMLReader::CharacterDataHandler(const char *s, int len) {
41 return;
42 }
44 void dsXMLReader::ProcessingInstructionHandler(const char *tagret, const char *data) {
45 return;
46 }
48 void dsXMLReader::CommentHandler(const char *data) {
49 return;
50 }
53 dsXMLReaderDebug::dsXMLReaderDebug(const char *encoding):
54 dsXMLReader(encoding) {
56 }
58 // Handlers
59 void dsXMLReaderDebug::StartElementHandler(const char *name, dsHashTable *atts) {
60 cerr << "StartElementHandler:" << GetCurrentLineNumber() << ":" << GetCurrentColumnNumber() << " ";
61 cerr << name << " atts: ";
63 for (int i = 0; i < atts->HSize(); ++i ) {
64 dsHashTableItem *dp = atts->walk(i);
65 if (dp) {
66 cerr << dp->key << "=" << (const char *) dp->body << "; ";
67 }
68 }
70 cerr << endl;
71 return;
72 }
74 void dsXMLReaderDebug::EndElementHandler(const char *name) {
75 cerr << "EndElementHandler:" << GetCurrentLineNumber() << ":" << GetCurrentColumnNumber() << " ";
76 cerr << name << endl;
78 return;
79 }
81 void dsXMLReaderDebug::CharacterDataHandler(const char *s, int len) {
82 cerr << "CharacterDataHandler for <" << GetElementStack().top() << "> :" << GetCurrentLineNumber() << ":" << GetCurrentColumnNumber() << " ";
83 cerr.write(s,len);
84 cerr << endl;
86 return;
87 }
89 void dsXMLReaderDebug::ProcessingInstructionHandler(const char *target, const char *data) {
90 cerr << "ProcessingInstructionHandler:" << GetCurrentLineNumber() << ":" << GetCurrentColumnNumber() << " ";
91 cerr << target << " data: " << data << endl;
93 return;
94 }
96 void dsXMLReaderDebug::CommentHandler(const char *data) {
97 return;
98 }
101 /* ======================================================= */
103 enum ParserStates {
104 READ_TEXT, READ_TAG_NAME, READ_ATTR_NAME, READ_ATTR_VALUE, READ_MODIFIER,
105 READ_PI_NAME, READ_PI_VALUE, READ_PI_CLOSE,
106 READ_ENTITY,
107 IS_OPEN_TAG, IS_CLOSE_TAG, IS_SELF_CLOSE_TAG,
108 HAS_ATTR, NO_ATTR
109 };
112 /* read comments, entity or doctype, rudimental yet */
113 void dsXMLReader::readEntity(istream& is){
114 dsStrstream os;
115 char ch, pch[2];
117 is >> pch[0];
118 is >> pch[1];
120 if (!is.good())
121 throw dsXMLReaderException(_line_num, _col_num, "Unexpected end of file");
123 /* handling COMMENT */
124 if (pch[0] == '-' && pch[1] == '-') { //reading comments
125 pch[0] = 0; pch[1] = 0;
127 while (1) {
128 if (!is.good())
129 throw dsXMLReaderException(_line_num, _col_num, "Unexpected end of file reading comment");
131 is >> ch;
132 if (ch == '>' && pch[0] == '-' && pch[1] == '-') {
133 is.putback(ch);
134 break;
135 }
137 if (pch[0] > 0)
138 os << pch[0];
140 pch[0] = pch[1]; pch[1] = ch;
142 }
145 CommentHandler( os.str() );
146 return;
147 }
148 }
150 void dsXMLReader::Parse(istream& is){
151 char ch;
152 int mode = READ_TEXT;
153 int lit = 0;
154 int closed_tag = IS_OPEN_TAG; // 1 - normal close; 2 - self_close
156 dsStrstream pcdata, attrName, attrVal;
157 dsStrstream *curr = &pcdata;
159 dsHashTable atts(50);
161 is.unsetf( ios::skipws );
163 while ( is.good() ) {
164 is >> ch;
166 /* update error stat counts */
167 ++_byte_index;
168 ++_col_num;
170 switch (ch) {
171 case '<' :
172 if ( mode == READ_TAG_NAME || mode == READ_ATTR_NAME || mode == READ_ATTR_VALUE)
173 throw dsXMLReaderException(_line_num, _col_num, "Unclosed tag");
175 if (pcdata.pcount() > 0) {
176 CharacterDataHandler(pcdata.str(), pcdata.pcount() );
177 pcdata.clear();
178 }
179 mode = READ_MODIFIER;
180 break;
182 case '>':
183 if (mode == READ_ATTR_NAME) // extra spaces after tag name or last attribute
184 mode = READ_TAG_NAME;
186 if (mode == READ_ATTR_VALUE) {
187 if (lit)
188 throw dsXMLReaderException(_line_num, _col_num, "Unmatched <%c>", lit);
190 atts.insert( attrName.str(), (const char *) attrVal.str() );
191 attrName.clear(); attrVal.clear();
192 mode = READ_TAG_NAME;
193 }
195 if (mode == READ_TAG_NAME) {
196 // Open or self-closed tag
197 if (closed_tag != IS_CLOSE_TAG) {
198 _st.push( strdup(pcdata.str()) );
199 StartElementHandler( pcdata.str(), &atts );
200 atts.clear();
201 }
203 //Close or self-closed tag
204 if (closed_tag != IS_OPEN_TAG) {
205 EndElementHandler( pcdata.str() );
207 char *s = _st.top(); _st.pop(); delete s;
208 }
210 }
212 if ( mode == READ_PI_CLOSE) {
213 ProcessingInstructionHandler(pcdata.str(), attrName.str() );
214 attrName.clear();
215 }
217 /* reinit states */
218 pcdata.clear();
219 curr = &pcdata;
220 closed_tag = IS_OPEN_TAG;
221 lit = 0;
222 mode = READ_TEXT;
223 break;
225 case '=':
226 if (mode == READ_ATTR_NAME) {
227 curr = &attrVal;
228 mode = READ_ATTR_VALUE;
229 break;
230 }
232 (*curr) << ch;
233 break;
235 case '?':
236 if (mode == READ_MODIFIER) {
237 mode = READ_PI_NAME;
238 break;
239 }
241 if (mode == READ_PI_NAME || mode == READ_PI_VALUE) {
242 mode = READ_PI_CLOSE;
243 break;
244 }
246 (*curr) << ch;
247 break;
249 case '!':
250 if (mode == READ_MODIFIER) {
251 readEntity(is);
252 mode = READ_ENTITY; // is set to ignore '>' next read
253 break;
254 }
256 (*curr) << ch;
257 break;
259 case '/':
260 if (mode == READ_ATTR_VALUE && !lit) {
261 atts.insert( attrName.str(), (const char *) attrVal.str() );
262 attrName.clear(); attrVal.clear();
264 curr = &attrName;
266 mode = READ_ATTR_NAME;
267 closed_tag = IS_SELF_CLOSE_TAG;
268 break;
269 }
271 if (mode == READ_MODIFIER) {
272 closed_tag = IS_CLOSE_TAG;
273 break;
274 }
276 if (mode==READ_ATTR_NAME || mode == READ_TAG_NAME) {
277 closed_tag = IS_SELF_CLOSE_TAG;
278 break;
279 }
281 (*curr) << ch;
282 break;
283 case '\n':
284 ++_line_num;
285 _col_num = 0;
286 case '\r':
287 case ' ':
288 case '\t':
289 if (mode == READ_TEXT && pcdata.pcount() == 0) // skip leading space
290 break;
292 if (mode == READ_ATTR_NAME) //Skip spaces before '=' DMS: 20030804
293 break;
295 if (mode == READ_TAG_NAME) {
296 mode = READ_ATTR_NAME;
297 curr = &attrName;
298 break;
299 }
301 if (mode == READ_ATTR_VALUE && !lit) {
302 if ( attrVal.pcount() ) { // skip leading spacesi after '='
303 atts.insert( attrName.str(), (const char *) attrVal.str() );
304 attrName.clear(); attrVal.clear();
306 curr = &attrName;
308 mode = READ_ATTR_NAME;
309 }
310 break;
311 }
313 if ( mode == READ_PI_NAME ) {
314 curr = &attrName;
315 mode = READ_PI_VALUE;
316 break;
317 }
319 (*curr) << ch;
320 break;
322 case '\'':
323 case '"' :
324 if (mode == READ_ATTR_VALUE) {
325 if (!lit) { // open literal
326 lit = ch;
327 break;
328 }
330 if (ch == lit) { // close literal
331 lit = 0;
333 atts.insert( attrName.str(), (const char *) attrVal.str() );
334 attrName.clear(); attrVal.clear();
335 curr = &attrName;
337 mode = READ_ATTR_NAME;
338 break;
339 }
340 }
342 (*curr) << ch;
343 break;
345 default:
346 if (mode == READ_MODIFIER) {
347 mode = READ_TAG_NAME;
348 }
350 (*curr) << ch;
351 }
352 }
353 }
