-
Notifications
You must be signed in to change notification settings - Fork 26
/
Markup.cpp
1253 lines (1129 loc) · 31 KB
/
Markup.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#define _CRT_SECURE_NO_WARNINGS
#define NOMINMAX
// Markup.cpp: implementation of the NBC_CMarkup class.
//
// NBC_CMarkup Release 6.5 Lite
// Copyright (C) 1999-2003 First Objective Software, Inc. All rights reserved
// This entire notice must be retained in this source code
// Redistributing this source code requires written permission
// This software is provided "as is", with no warranty.
// Latest fixes enhancements and documentation at www.firstobject.com
#include "Markup.h"
#include <assert.h>
#include <algorithm>
#include <stdarg.h>
#include <string.h>
#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif
void CMarkup::SetIndent(int nIndent)
{
mnIndent = nIndent;
}
void CMarkup::operator=(const CMarkup& markup)
{
m_iPosParent = markup.m_iPosParent;
m_iPos = markup.m_iPos;
m_iPosChild = markup.m_iPosChild;
m_iPosFree = markup.m_iPosFree;
m_nNodeType = markup.m_nNodeType;
m_aPos.clear();
m_aPos = markup.m_aPos;
m_csDoc = markup.m_csDoc;
MARKUP_SETDEBUGSTATE;
}
bool CMarkup::SetDoc(const char* szDoc)
{
// Reset indexes
m_iPosFree = 1;
ResetPos();
// Set document text
if (szDoc)
m_csDoc = szDoc;
else
m_csDoc.erase();
// Starting size of position array: 1 element per 64 bytes of document
// Tight fit when parsing small doc, only 0 to 2 reallocs when parsing large doc
// Start at 8 when creating new document
std::string::size_type nStartSize = m_csDoc.length() / 64 + 8;
if (m_aPos.size() < nStartSize)
m_aPos.resize(nStartSize);
// Parse document
bool bWellFormed = false;
if (m_csDoc.length())
{
m_aPos[0].Clear();
int iPos = x_ParseElem(0);
if (iPos > 0)
{
m_aPos[0].iElemChild = iPos;
bWellFormed = true;
}
}
// Clear indexes if parse failed or empty document
if (! bWellFormed)
{
m_aPos[0].Clear();
m_iPosFree = 1;
}
ResetPos();
memset(mtIndent, ' ', sizeof(mtIndent));
mtIndent[999] = 0;
return bWellFormed;
}
bool CMarkup::IsWellFormed()
{
if (!(m_aPos.empty()) && m_aPos[0].iElemChild)
return true;
return false;
}
bool CMarkup::FindElem(const char* szName)
{
// Change current position only if found
//
if (!m_aPos.empty())
{
int iPos = x_FindElem(m_iPosParent, m_iPos, szName);
if (iPos)
{
// Assign new position
x_SetPos(m_aPos[iPos].iElemParent, iPos, 0);
return true;
}
}
return false;
}
bool CMarkup::FindChildElem(const char* szName)
{
// Change current child position only if found
//
// Shorthand: call this with no current main position
// means find child under root element
if (! m_iPos)
FindElem();
int iPosChild = x_FindElem(m_iPos, m_iPosChild, szName);
if (iPosChild)
{
// Assign new position
int iPos = m_aPos[iPosChild].iElemParent;
x_SetPos(m_aPos[iPos].iElemParent, iPos, iPosChild);
return true;
}
return false;
}
std::string CMarkup::GetTagName() const
{
// Return the tag name at the current main position
std::string csTagName;
if (m_iPos)
csTagName = x_GetTagName(m_iPos);
return csTagName;
}
bool CMarkup::IntoElem()
{
// If there is no child position and IntoElem is called it will succeed in release 6.3
// (A subsequent call to FindElem will find the first element)
// The following short-hand behavior was never part of EDOM and was misleading
// It would find a child element if there was no current child element position and go into it
// It is removed in release 6.3, this change is NOT backwards compatible!
// if (! m_iPosChild)
// FindChildElem();
if (m_iPos && m_nNodeType == MNT_ELEMENT)
{
x_SetPos(m_iPos, m_iPosChild, 0);
return true;
}
return false;
}
bool CMarkup::OutOfElem()
{
// Go to parent element
if (m_iPosParent)
{
x_SetPos(m_aPos[m_iPosParent].iElemParent, m_iPosParent, m_iPos);
return true;
}
return false;
}
//////////////////////////////////////////////////////////////////////
// Private Methods
//////////////////////////////////////////////////////////////////////
int CMarkup::x_GetFreePos()
{
//
// This returns the index of the next unused ElemPos in the array
//
if (m_iPosFree == (int)m_aPos.size())
m_aPos.resize(m_iPosFree + m_iPosFree / 2);
++m_iPosFree;
return m_iPosFree - 1;
}
int CMarkup::x_ReleasePos()
{
//
// This decrements the index of the next unused ElemPos in the array
// allowing the element index returned by GetFreePos() to be reused
//
--m_iPosFree;
return 0;
}
int CMarkup::x_ParseError(const char* szError, const char* szName)
{
if (szName)
m_csError = Format(szError, szName);
else
m_csError = szError;
x_ReleasePos();
return -1;
}
int CMarkup::x_ParseElem(int iPosParent)
{
// This is either called by SetDoc, x_AddSubDoc, or itself recursively
// m_aPos[iPosParent].nEndL is where to start parsing for the child element
// This returns the new position if a tag is found, otherwise zero
// In all cases we need to get a new ElemPos, but release it if unused
//
int iPos = x_GetFreePos();
m_aPos[iPos].nStartL = m_aPos[iPosParent].nEndL;
m_aPos[iPos].iElemParent = iPosParent;
m_aPos[iPos].iElemChild = 0;
m_aPos[iPos].iElemNext = 0;
// Start Tag
// A loop is used to ignore all remarks tags and special tags
// i.e. <?xml version="1.0"?>, and <!-- comment here -->
// So any tag beginning with ? or ! is ignored
// Loop past ignored tags
TokenPos token(m_csDoc.c_str());
token.nNext = m_aPos[iPosParent].nEndL;
std::string csName;
while (csName.empty())
{
// Look for left angle bracket of start tag
m_aPos[iPos].nStartL = token.nNext;
if (! x_FindChar(token.szDoc, m_aPos[iPos].nStartL, '<'))
return x_ParseError("Element tag not found");
// Set parent's End tag to start looking from here (or later)
m_aPos[iPosParent].nEndL = m_aPos[iPos].nStartL;
// Determine whether this is an element, or bypass other type of node
token.nNext = m_aPos[iPos].nStartL + 1;
if (x_FindToken(token))
{
if (token.bIsString)
return x_ParseError("Tag starts with quote");
char cFirstChar = m_csDoc[token.nL];
if (cFirstChar == '?' || cFirstChar == '!')
{
token.nNext = m_aPos[iPos].nStartL;
if (! x_ParseNode(token))
return x_ParseError("Invalid node");
}
else if (cFirstChar != '/')
{
csName = x_GetToken(token);
// Look for end of tag
if (! x_FindChar(token.szDoc, token.nNext, '>'))
return x_ParseError("End of tag not found");
}
else
return x_ReleasePos(); // probably end tag of parent
}
else
return x_ParseError("Abrupt end within tag");
}
m_aPos[iPos].nStartR = token.nNext;
// Is ending mark within start tag, i.e. empty element?
if (m_csDoc[m_aPos[iPos].nStartR-1] == '/')
{
// Empty element
// Close tag left is set to ending mark, and right to open tag right
m_aPos[iPos].nEndL = m_aPos[iPos].nStartR-1;
m_aPos[iPos].nEndR = m_aPos[iPos].nStartR;
}
else // look for end tag
{
// Element probably has contents
// Determine where to start looking for left angle bracket of end tag
// This is done by recursively parsing the contents of this element
int iInner, iInnerPrev = 0;
m_aPos[iPos].nEndL = m_aPos[iPos].nStartR + 1;
while ((iInner = x_ParseElem(iPos)) > 0)
{
// Set links to iInner
if (iInnerPrev)
m_aPos[iInnerPrev].iElemNext = iInner;
else
m_aPos[iPos].iElemChild = iInner;
iInnerPrev = iInner;
// Set offset to reflect child
m_aPos[iPos].nEndL = m_aPos[iInner].nEndR + 1;
}
if (iInner == -1)
return -1;
// Look for left angle bracket of end tag
if (! x_FindChar(token.szDoc, m_aPos[iPos].nEndL, '<'))
return x_ParseError("End tag of %s element not found", csName.c_str());
// Look through tokens of end tag
token.nNext = m_aPos[iPos].nEndL + 1;
int nTokenCount = 0;
while (x_FindToken(token))
{
++nTokenCount;
if (! token.bIsString)
{
// Is first token not an end slash mark?
if (nTokenCount == 1 && m_csDoc[token.nL] != '/')
return x_ParseError("Expecting end tag of element %s", csName.c_str());
else if (nTokenCount == 2 && ! token.Match(csName.c_str()))
return x_ParseError("End tag does not correspond to %s", csName.c_str());
// Else is it a right angle bracket?
else if (m_csDoc[token.nL] == '>')
break;
}
}
// Was a right angle bracket not found?
if (! token.szDoc[token.nL] || nTokenCount < 2)
return x_ParseError("End tag not completed for element %s", csName.c_str());
m_aPos[iPos].nEndR = token.nL;
}
// Successfully parsed element (and contained elements)
return iPos;
}
bool CMarkup::x_FindChar(const char* szDoc, int& nChar, char c)
{
// static function
const char* pChar = &szDoc[nChar];
while (*pChar && *pChar != c)
pChar += 1; //_tclen(pChar);
nChar = (int)(pChar - szDoc);
if (! *pChar)
return false;
/*
while (szDoc[nChar] && szDoc[nChar] != c)
nChar += _tclen(&szDoc[nChar]);
if (! szDoc[nChar])
return false;
*/
return true;
}
bool CMarkup::x_FindAny(const char* szDoc, int& nChar)
{
// Starting at nChar, find a non-whitespace char
// return false if no non-whitespace before end of document, nChar points to end
// otherwise return true and nChar points to non-whitespace char
while (szDoc[nChar] && strchr(" \t\n\r", szDoc[nChar]))
++nChar;
return szDoc[nChar] != '\0';
}
bool CMarkup::x_FindToken(CMarkup::TokenPos& token)
{
// Starting at token.nNext, bypass whitespace and find the next token
// returns true on success, members of token point to token
// returns false on end of document, members point to end of document
const char* szDoc = token.szDoc;
int nChar = token.nNext;
token.bIsString = false;
// By-pass leading whitespace
if (! x_FindAny(szDoc,nChar))
{
// No token was found before end of document
token.nL = nChar;
token.nR = nChar;
token.nNext = nChar;
return false;
}
// Is it an opening quote?
char cFirstChar = szDoc[nChar];
if (cFirstChar == '\"' || cFirstChar == '\'')
{
token.bIsString = true;
// Move past opening quote
++nChar;
token.nL = nChar;
// Look for closing quote
x_FindChar(token.szDoc, nChar, cFirstChar);
// Set right to before closing quote
token.nR = nChar - 1;
// Set nChar past closing quote unless at end of document
if (szDoc[nChar])
++nChar;
}
else
{
// Go until special char or whitespace
token.nL = nChar;
while (szDoc[nChar] && ! strchr(" \t\n\r<>=\\/?!", szDoc[nChar]))
nChar += 1; //_tclen(&szDoc[nChar]);
// Adjust end position if it is one special char
if (nChar == token.nL)
++nChar; // it is a special char
token.nR = nChar - 1;
}
// nNext points to one past last char of token
token.nNext = nChar;
return true;
}
std::string CMarkup::x_GetToken(const CMarkup::TokenPos& token) const
{
// The token contains indexes into the document identifying a small substring
// Build the substring from those indexes and return it
if (token.nL > token.nR)
return "";
return Mid(m_csDoc, token.nL,
token.nR - token.nL + ((token.nR < (int)(m_csDoc.length())) ? 1 : 0));
}
int CMarkup::x_FindElem(int iPosParent, int iPos, const char* szPath)
{
// If szPath is NULL or empty, go to next sibling element
// Otherwise go to next sibling element with matching path
//
if (iPos)
iPos = m_aPos[iPos].iElemNext;
else
iPos = m_aPos[iPosParent].iElemChild;
// Finished here if szPath not specified
if (szPath == NULL || !szPath[0])
return iPos;
// Search
TokenPos token(m_csDoc.c_str());
while (iPos)
{
// Compare tag name
token.nNext = m_aPos[iPos].nStartL + 1;
x_FindToken(token); // Locate tag name
if (token.Match(szPath))
return iPos;
iPos = m_aPos[iPos].iElemNext;
}
return 0;
}
int CMarkup::x_ParseNode(CMarkup::TokenPos& token)
{
// Call this with token.nNext set to the start of the node
// This returns the node type and token.nNext set to the char after the node
// If the node is not found or an element, token.nR is not determined
int nTypeFound = 0;
const char* szDoc = token.szDoc;
token.nL = token.nNext;
if (szDoc[token.nL] == '<')
{
// Started with <, could be:
// <!--...--> comment
// <!DOCTYPE ...> dtd
// <?target ...?> processing instruction
// <![CDATA[...]]> cdata section
// <NAME ...> element
//
if (! szDoc[token.nL+1] || ! szDoc[token.nL+2])
return 0;
char cFirstChar = szDoc[token.nL+1];
const char* szEndOfNode = NULL;
if (cFirstChar == '?')
{
nTypeFound = MNT_PROCESSING_INSTRUCTION;
szEndOfNode = "?>";
}
else if (cFirstChar == '!')
{
char cSecondChar = szDoc[token.nL+2];
if (cSecondChar == '[')
{
nTypeFound = MNT_CDATA_SECTION;
szEndOfNode = "]]>";
}
else if (cSecondChar == '-')
{
nTypeFound = MNT_COMMENT;
szEndOfNode = "-->";
}
else
{
// Document type requires tokenizing because of strings and brackets
nTypeFound = 0;
int nBrackets = 0;
while (x_FindToken(token))
{
if (! token.bIsString)
{
char cChar = szDoc[token.nL];
if (cChar == '[')
++nBrackets;
else if (cChar == ']')
--nBrackets;
else if (nBrackets == 0 && cChar == '>')
{
nTypeFound = MNT_DOCUMENT_TYPE;
break;
}
}
}
if (! nTypeFound)
return 0;
}
}
else if (cFirstChar == '/')
{
// End tag means no node found within parent element
return 0;
}
else
{
nTypeFound = MNT_ELEMENT;
}
// Search for end of node if not found yet
if (szEndOfNode)
{
const char* pEnd = strstr(&szDoc[token.nNext], szEndOfNode);
if (! pEnd)
return 0; // not well-formed
token.nNext = (int)(pEnd - szDoc) + (int)strlen(szEndOfNode);
}
}
else if (szDoc[token.nL])
{
// It is text or whitespace because it did not start with <
nTypeFound = MNT_WHITESPACE;
token.nNext = token.nL;
if (x_FindAny(szDoc,token.nNext))
{
if (szDoc[token.nNext] != '<')
{
nTypeFound = MNT_TEXT;
x_FindChar(szDoc, token.nNext, '<');
}
}
}
return nTypeFound;
}
std::string CMarkup::x_GetTagName(int iPos) const
{
// Return the tag name at specified element
TokenPos token(m_csDoc.c_str());
token.nNext = m_aPos[iPos].nStartL + 1;
if (! iPos || ! x_FindToken(token))
return "";
// Return substring of document
return x_GetToken(token);
}
bool CMarkup::x_FindAttrib(CMarkup::TokenPos& token, const char* szAttrib) const
{
// If szAttrib is NULL find next attrib, otherwise find named attrib
// Return true if found
int nAttrib = 0;
for (int nCount = 0; x_FindToken(token); ++nCount)
{
if (! token.bIsString)
{
// Is it the right angle bracket?
char cChar = m_csDoc[token.nL];
if (cChar == '>' || cChar == '/' || cChar == '?')
break; // attrib not found
// Equal sign
if (cChar == '=')
continue;
// Potential attribute
if (! nAttrib && nCount)
{
// Attribute name search?
if (! szAttrib || ! szAttrib[0])
return true; // return with token at attrib name
// Compare szAttrib
if (token.Match(szAttrib))
nAttrib = nCount;
}
}
else if (nAttrib && nCount == nAttrib + 2)
{
return true;
}
}
// Not found
return false;
}
std::string CMarkup::x_GetAttrib(int iPos, const char* szAttrib) const
{
// Return the value of the attrib
TokenPos token(m_csDoc.c_str());
if (iPos && m_nNodeType == MNT_ELEMENT)
token.nNext = m_aPos[iPos].nStartL + 1;
else
return "";
if (szAttrib && x_FindAttrib(token, szAttrib))
return x_TextFromDoc(token.nL, token.nR - ((token.nR < (int)(m_csDoc.length())) ? 0 : 1));
return "";
}
bool CMarkup::x_SetAttrib(int iPos, const char* szAttrib, const char* szValue)
{
// Set attribute in iPos element
TokenPos token(m_csDoc.c_str());
int nInsertAt;
if (iPos && m_nNodeType == MNT_ELEMENT)
{
token.nNext = m_aPos[iPos].nStartL + 1;
nInsertAt = m_aPos[iPos].nStartR - (m_aPos[iPos].IsEmptyElement()?1:0);
}
else
return false;
// Create insertion text depending on whether attribute already exists
int nReplace = 0;
std::string csInsert;
if (x_FindAttrib(token, szAttrib))
{
// Replace value only
// Decision: for empty value leaving attrib="" instead of removing attrib
csInsert = x_TextToDoc(szValue, true);
nInsertAt = token.nL;
nReplace = token.nR-token.nL+1;
}
else
{
// Insert string name value pair
std::string csFormat;
csFormat = " ";
csFormat += szAttrib;
csFormat += "=\"";
csFormat += x_TextToDoc(szValue, true);
csFormat += "\"";
csInsert = csFormat;
}
x_DocChange(nInsertAt, nReplace, csInsert);
int nAdjust = (int)csInsert.length() - nReplace;
m_aPos[iPos].nStartR += nAdjust;
m_aPos[iPos].AdjustEnd(nAdjust);
x_Adjust(iPos, nAdjust);
MARKUP_SETDEBUGSTATE;
return true;
}
std::string CMarkup::x_GetData(int iPos) const
{
// Return a string representing data between start and end tag
// Return empty string if there are any children elements
if (! m_aPos[iPos].iElemChild && ! m_aPos[iPos].IsEmptyElement())
{
// See if it is a CDATA section
const char* szDoc = (const char*)(m_csDoc.c_str());
int nChar = m_aPos[iPos].nStartR + 1;
if (x_FindAny(szDoc, nChar) && szDoc[nChar] == '<'
&& nChar + 11 < m_aPos[iPos].nEndL
&& strncmp(&szDoc[nChar], "<![CDATA[", 9) == 0)
{
nChar += 9;
int nEndCDATA = (int)m_csDoc.find("]]>", nChar);
if (nEndCDATA != -1 && nEndCDATA < m_aPos[iPos].nEndL)
{
return Mid(m_csDoc, nChar, nEndCDATA - nChar);
}
}
return x_TextFromDoc(m_aPos[iPos].nStartR+1, m_aPos[iPos].nEndL-1);
}
return "";
}
std::string CMarkup::x_TextToDoc(const char* szText, bool bAttrib) const
{
// Convert text as seen outside XML document to XML friendly
// replacing special characters with ampersand escape codes
// E.g. convert "6>7" to "6>7"
//
// < less than
// & ampersand
// > greater than
//
// and for attributes:
//
// ' apostrophe or single quote
// " double quote
//
static const char* szaReplace[] = { "<", "&", ">", "'", """ };
const char* pFind = bAttrib ? "<&>\'\"" : "<&>";
std::string csText;
const char* pSource = szText;
int nDestSize = (int)strlen(pSource);
nDestSize += nDestSize / 10 + 7;
char* pDest = GetBuffer(csText, nDestSize);
int nLen = 0;
char cSource = *pSource;
const char* pFound;
while (cSource)
{
if (nLen > nDestSize - 6)
{
ReleaseBuffer(csText, nLen);
nDestSize *= 2;
pDest = GetBuffer(csText, nDestSize);
}
if ((pFound = strchr(pFind,cSource)) != NULL)
{
pFound = szaReplace[pFound-pFind];
#ifdef _WIN32
strcpy_s(&pDest[nLen], nDestSize, pFound);
#else
strncpy(&pDest[nLen], pFound, nDestSize);
#endif
nLen += (int)strlen(pFound);
}
else
{
pDest[nLen] = *pSource;
nLen += 1; //_tclen(pSource);
}
pSource += 1; //_tclen(pSource);
cSource = *pSource;
}
ReleaseBuffer(csText, nLen);
return csText;
}
std::string CMarkup::x_TextFromDoc(int nLeft, int nRight) const
{
// Convert XML friendly text to text as seen outside XML document
// ampersand escape codes replaced with special characters e.g. convert "6>7" to "6>7"
// Conveniently the result is always the same or shorter in byte length
//
static const char* szaCode[] = { "lt;", "amp;", "gt;", "apos;", "quot;" };
static int anCodeLen[] = { 3,4,3,5,5 };
static const char* szSymbol = "<&>\'\"";
std::string csText;
const char* pSource = m_csDoc.c_str();
int nDestSize = nRight - nLeft + 1;
char* pDest = GetBuffer(csText, nDestSize);
int nLen = 0;
int nCharLen;
int nChar = nLeft;
while (nChar <= nRight)
{
if (pSource[nChar] == '&')
{
// Look for matching &code;
bool bCodeConverted = false;
for (int nMatch = 0; nMatch < 5; ++nMatch)
{
if (nChar <= nRight - anCodeLen[nMatch]
&& strncmp(szaCode[nMatch],&pSource[nChar+1],anCodeLen[nMatch]) == 0)
{
// Insert symbol and increment index past ampersand semi-colon
pDest[nLen++] = szSymbol[nMatch];
nChar += anCodeLen[nMatch] + 1;
bCodeConverted = true;
break;
}
}
// If the code is not converted, leave it as is
if (! bCodeConverted)
{
pDest[nLen++] = '&';
++nChar;
}
}
else // not &
{
nCharLen = 1; //_tclen(&pSource[nChar]);
pDest[nLen] = pSource[nChar];
nLen += nCharLen;
nChar += nCharLen;
}
}
ReleaseBuffer(csText, nLen);
return csText;
}
void CMarkup::x_DocChange(int nLeft, int nReplace, const std::string& csInsert)
{
// Insert csInsert int m_csDoc at nLeft replacing nReplace chars
// Do this with only one buffer reallocation if it grows
//
int nDocLength = (int)m_csDoc.length();
int nInsLength = (int)csInsert.length();
// Make sure nLeft and nReplace are within bounds
nLeft = std::max(0, std::min(nLeft, nDocLength));
nReplace = std::max(0, std::min(nReplace, nDocLength-nLeft));
// Get pointer to buffer with enough room
int nNewLength = nInsLength + nDocLength - nReplace;
int nBufferLen = nNewLength;
char* pDoc = GetBuffer(m_csDoc, nBufferLen);
// Move part of old doc that goes after insert
if (nLeft+nReplace < nDocLength)
memmove(&pDoc[nLeft+nInsLength], &pDoc[nLeft+nReplace], (nDocLength-nLeft-nReplace)*sizeof(char));
// Copy insert
memcpy(&pDoc[nLeft], csInsert.c_str(), nInsLength*sizeof(char));
// Release
ReleaseBuffer(m_csDoc, nNewLength);
}
void CMarkup::x_Adjust(int iPos, int nShift, bool bAfterPos)
{
// Loop through affected elements and adjust indexes
// Algorithm:
// 1. update children unless bAfterPos
// (if no children or bAfterPos is true, end tag of iPos not affected)
// 2. update next siblings and their children
// 3. go up until there is a next sibling of a parent and update end tags
// 4. step 2
int iPosTop = m_aPos[iPos].iElemParent;
bool bPosFirst = bAfterPos; // mark as first to skip its children
while (iPos)
{
// Were we at containing parent of affected position?
bool bPosTop = false;
if (iPos == iPosTop)
{
// Move iPosTop up one towards root
iPosTop = m_aPos[iPos].iElemParent;
bPosTop = true;
}
// Traverse to the next update position
if (! bPosTop && ! bPosFirst && m_aPos[iPos].iElemChild)
{
// Depth first
iPos = m_aPos[iPos].iElemChild;
}
else if (m_aPos[iPos].iElemNext)
{
iPos = m_aPos[iPos].iElemNext;
}
else
{
// Look for next sibling of a parent of iPos
// When going back up, parents have already been done except iPosTop
while ((iPos=m_aPos[iPos].iElemParent) != 0 && iPos != iPosTop)
if (m_aPos[iPos].iElemNext)
{
iPos = m_aPos[iPos].iElemNext;
break;
}
}
bPosFirst = false;
// Shift indexes at iPos
if (iPos != iPosTop)
m_aPos[iPos].AdjustStart(nShift);
m_aPos[iPos].AdjustEnd(nShift);
}
}
void CMarkup::x_LocateNew(int iPosParent, int& iPosRel, int& nOffset, int nLength, int nFlags)
{
// Determine where to insert new element or node
//
bool bInsert = (nFlags&1)?true:false;
bool bHonorWhitespace = (nFlags&2)?true:false;
std::string::size_type nStartL;
if (nLength)
{
// Located at a non-element node
if (bInsert)
nStartL = nOffset;
else
nStartL = nOffset + nLength;
}
else if (iPosRel)
{
// Located at an element
if (bInsert) // precede iPosRel
nStartL = m_aPos[iPosRel].nStartL;
else // follow iPosRel
nStartL = m_aPos[iPosRel].nEndR + 1;
}
else if (! iPosParent)
{
// Outside of all elements
if (bInsert)
nStartL = 0;
else
nStartL = m_csDoc.length();
}
else if (m_aPos[iPosParent].IsEmptyElement())
{
// Parent has no separate end tag, so split empty element
nStartL = m_aPos[iPosParent].nStartR;
}
else
{
if (bInsert) // after start tag
nStartL = m_aPos[iPosParent].nStartR + 1;
else // before end tag
nStartL = m_aPos[iPosParent].nEndL;
}
// Go up to start of next node, unless its splitting an empty element
if (! bHonorWhitespace && ! m_aPos[iPosParent].IsEmptyElement())
{
const char* szDoc = (const char*)m_csDoc.c_str();
int nChar = (int)nStartL;
if (! x_FindAny(szDoc,nChar) || szDoc[nChar] == '<')
nStartL = nChar;
}
// Determine iPosBefore
int iPosBefore = 0;
if (iPosRel)
{
if (bInsert)
{
// Is iPosRel past first sibling?
int iPosPrev = m_aPos[iPosParent].iElemChild;
if (iPosPrev != iPosRel)
{
// Find previous sibling of iPosRel
while (m_aPos[iPosPrev].iElemNext != iPosRel)
iPosPrev = m_aPos[iPosPrev].iElemNext;
iPosBefore = iPosPrev;
}
}
else
{
iPosBefore = iPosRel;
}
}
else if (m_aPos[iPosParent].iElemChild)
{
if (! bInsert)
{
// Find last element under iPosParent
int iPosLast = m_aPos[iPosParent].iElemChild;
int iPosNext = iPosLast;
while (iPosNext)
{
iPosLast = iPosNext;
iPosNext = m_aPos[iPosNext].iElemNext;
}
iPosBefore = iPosLast;
}
}
nOffset = (int)nStartL;
iPosRel = iPosBefore;
}
bool CMarkup::x_AddElem(const char* szName, const char* szValue, bool bInsert, bool bAddChild)
{
if (bAddChild)
{
// Adding a child element under main position
if (! m_iPos)
return false;
}
else if (m_iPosParent == 0)
{
// Adding root element
if (IsWellFormed())
return false;
// Locate after any version and DTD
m_aPos[0].nEndL = (int)m_csDoc.length();
}
// Locate where to add element relative to current node
int iPosParent, iPosBefore, nOffset = 0, nLength = 0;