-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathxml-parser.c
87 lines (72 loc) · 2.09 KB
/
xml-parser.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "node-collection.h"
#include "parsers/dtd.h"
#include "parsers/node.h"
#include "util.h"
void skipBOM(FILE *file) {
unsigned char c[4];
size_t nbytes = fread(c, sizeof(char), sizeof(c), file);
size_t to_seek = nbytes;
PRINT_DEBUG("BOM: ");
if (nbytes >= 2) {
if (memcmp(c, "\xFE\xFF", 2) == 0) {
PRINT_DEBUG("UTF-16BE\n");
to_seek = nbytes - 2;
} else if (memcmp(c, "\xFF\xFE", 2) == 0) {
PRINT_DEBUG("UTF-16LE\n");
to_seek = nbytes - 2;
} else if (nbytes >= 3) {
if (memcmp(c, "\xEF\xBB\xBF", 3) == 0) {
PRINT_DEBUG("UTF-8\n");
to_seek = nbytes - 3;
} else if (nbytes == 4) {
if (memcmp(c, "\xFF\xFE\x00\x00", 4) == 0) {
PRINT_DEBUG("UTF-32LE\n");
to_seek = 0;
} else if (memcmp(c, "\x00\x00\xFE\xFF", 4) == 0) {
PRINT_DEBUG("UTF-32BE\n");
to_seek = 0;
}
}
}
}
if (to_seek == nbytes) {
PRINT_DEBUG("No BOM detected\n");
fseek(file, 0, SEEK_SET);
} else if (to_seek > 0)
fseek(file, -(long)to_seek, SEEK_CUR);
}
XMLDocument *parseXML(FILE *file) {
skipBOM(file);
skipWhitespaces(file);
XMLDocument *document = malloc(sizeof(XMLDocument));
document->nodes = initNodeCollection();
document->rootIndex = -1;
XMLElementNode *root = NULL;
XMLNode *currentNode = NULL;
while (fgetc(file) == '<') {
currentNode = parseNode(file, NULL);
if (currentNode) {
if (currentNode->type == ELEMENT) {
XMLElementNode *elementNode = (XMLElementNode *)currentNode;
if (!root) {
root = elementNode;
} else {
PRINT_ERROR("Found multiple root elements\n");
freeXMLNode(currentNode);
}
}
addNodeToCollection(document->nodes, currentNode);
if (currentNode == (XMLNode *)root) {
document->rootIndex = document->nodes->lastIndex;
PRINT_DEBUG("Found root element %i\n", document->rootIndex);
}
}
if (!currentNode)
break;
skipWhitespaces(file);
}
return document;
}