Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Merged from trunk.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | asTypedList
Files: files | file ages | folders
SHA3-256: 78a7f45535d9925cd6b7c2dbb4984782a036fbbc1df5e459811412406f0def41
User & Date: rolf 2024-09-27 12:17:40
Context
2024-09-27
16:01
Added a few more tests. Closed-Leaf check-in: 96986493f8 user: rolf tags: asTypedList
12:17
Merged from trunk. check-in: 78a7f45535 user: rolf tags: asTypedList
00:38
Noted the expat update in CHANGES. check-in: ebf8e476e5 user: rolf tags: trunk
00:15
Improved error handling and error messages. check-in: cf19b79d68 user: rolf tags: asTypedList
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to CHANGES.





1
2
3
4
5
6
7





2024-09-08  Rolf Ade  <rolf@pointsman.de>

        Added the flag -keepTextStart to the expat command.

2024-08-31  Rolf Ade  <rolf@pointsman.de>

>
>
>
>







1
2
3
4
5
6
7
8
9
10
11

2024-09-23  Rolf Ade  <rolf@pointsman.de>

        Updated to expat 2.6.3.

2024-09-08  Rolf Ade  <rolf@pointsman.de>

        Added the flag -keepTextStart to the expat command.

2024-08-31  Rolf Ade  <rolf@pointsman.de>

Changes to README.md.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16

## tDOM - a XML / DOM / XPath / XSLT / HTML / JSON implementation for Tcl
### Version 0.9.4

tDOM 0.9.4 works with Tcl 8.5, 8.6 and 9.0.

### tDOM contains:

   *  for convenience expat 2.6.2, the XML parser originated from
      James Clark, although you're able to link tDOM with other
      expat versions or the library provided by the system.

   *  building a DOM tree from XML in one go implemented in C for
      maximum performance and minimum memory usage, and DOM I and II
      methods to work on such a tree using either a OO-like or a
      handle syntax.








|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16

## tDOM - a XML / DOM / XPath / XSLT / HTML / JSON implementation for Tcl
### Version 0.9.4

tDOM 0.9.4 works with Tcl 8.5, 8.6 and 9.0.

### tDOM contains:

   *  for convenience expat 2.6.3, the XML parser originated from
      James Clark, although you're able to link tDOM with other
      expat versions or the library provided by the system.

   *  building a DOM tree from XML in one go implemented in C for
      maximum performance and minimum memory usage, and DOM I and II
      methods to work on such a tree using either a OO-like or a
      handle syntax.

Changes to expat/VERSION.

1
expat-2.6.2
|
1
expat-2.6.3

Changes to expat/expat.h.

1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled);

/* Expat follows the semantic versioning convention.
   See https://semver.org
*/
#define XML_MAJOR_VERSION 2
#define XML_MINOR_VERSION 6
#define XML_MICRO_VERSION 2

#ifdef __cplusplus
}
#endif

#endif /* not Expat_INCLUDED */







|






1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled);

/* Expat follows the semantic versioning convention.
   See https://semver.org
*/
#define XML_MAJOR_VERSION 2
#define XML_MINOR_VERSION 6
#define XML_MICRO_VERSION 3

#ifdef __cplusplus
}
#endif

#endif /* not Expat_INCLUDED */

Changes to expat/siphash.h.

122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137

#define SIP_U8TO64_LE(p)                                                       \
  (((uint64_t)((p)[0]) << 0) | ((uint64_t)((p)[1]) << 8)                       \
   | ((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24)                   \
   | ((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40)                   \
   | ((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56))

#define SIPHASH_INITIALIZER                                                    \
  { 0, 0, 0, 0, {0}, 0, 0 }

struct siphash {
  uint64_t v0, v1, v2, v3;

  unsigned char buf[8], *p;
  uint64_t c;
}; /* struct siphash */







|
<







122
123
124
125
126
127
128
129

130
131
132
133
134
135
136

#define SIP_U8TO64_LE(p)                                                       \
  (((uint64_t)((p)[0]) << 0) | ((uint64_t)((p)[1]) << 8)                       \
   | ((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24)                   \
   | ((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40)                   \
   | ((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56))

#define SIPHASH_INITIALIZER {0, 0, 0, 0, {0}, 0, 0}


struct siphash {
  uint64_t v0, v1, v2, v3;

  unsigned char buf[8], *p;
  uint64_t c;
}; /* struct siphash */

Changes to expat/xmlparse.c.

1
2
3
4
5
6
7
8
/* 2a14271ad4d35e82bde8ba210b4edb7998794bcbae54deab114046a300f9639a (2.6.2+)
                            __  __            _
                         ___\ \/ /_ __   __ _| |_
                        / _ \\  /| '_ \ / _` | __|
                       |  __//  \| |_) | (_| | |_
                        \___/_/\_\ .__/ \__,_|\__|
                                 |_| XML parser

|







1
2
3
4
5
6
7
8
/* ba4cdf9bdb534f355a9def4c9e25d20ee8e72f95b0a4d930be52e563f5080196 (2.6.3+)
                            __  __            _
                         ___\ \/ /_ __   __ _| |_
                        / _ \\  /| '_ \ / _` | __|
                       |  __//  \| |_) | (_| | |_
                        \___/_/\_\ .__/ \__,_|\__|
                                 |_| XML parser

35
36
37
38
39
40
41

42
43
44
45
46
47
48
   Copyright (c) 2021      Donghee Na <donghee.na@python.org>
   Copyright (c) 2022      Samanta Navarro <ferivoz@riseup.net>
   Copyright (c) 2022      Jeffrey Walton <noloader@gmail.com>
   Copyright (c) 2022      Jann Horn <jannh@google.com>
   Copyright (c) 2022      Sean McBride <sean@rogue-research.com>
   Copyright (c) 2023      Owain Davies <owaind@bath.edu>
   Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>

   Licensed under the MIT license:

   Permission is  hereby granted,  free of charge,  to any  person obtaining
   a  copy  of  this  software   and  associated  documentation  files  (the
   "Software"),  to  deal in  the  Software  without restriction,  including
   without  limitation the  rights  to use,  copy,  modify, merge,  publish,
   distribute, sublicense, and/or sell copies of the Software, and to permit







>







35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
   Copyright (c) 2021      Donghee Na <donghee.na@python.org>
   Copyright (c) 2022      Samanta Navarro <ferivoz@riseup.net>
   Copyright (c) 2022      Jeffrey Walton <noloader@gmail.com>
   Copyright (c) 2022      Jann Horn <jannh@google.com>
   Copyright (c) 2022      Sean McBride <sean@rogue-research.com>
   Copyright (c) 2023      Owain Davies <owaind@bath.edu>
   Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
   Copyright (c) 2024      Berkay Eren Ürün <berkay.ueruen@siemens.com>
   Licensed under the MIT license:

   Permission is  hereby granted,  free of charge,  to any  person obtaining
   a  copy  of  this  software   and  associated  documentation  files  (the
   "Software"),  to  deal in  the  Software  without restriction,  including
   without  limitation the  rights  to use,  copy,  modify, merge,  publish,
   distribute, sublicense, and/or sell copies of the Software, and to permit
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
  int prefixLen;
} TAG_NAME;

/* TAG represents an open element.
   The name of the element is stored in both the document and API
   encodings.  The memory buffer 'buf' is a separately-allocated
   memory area which stores the name.  During the XML_Parse()/
   XMLParseBuffer() when the element is open, the memory for the 'raw'
   version of the name (in the document encoding) is shared with the
   document buffer.  If the element is open across calls to
   XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to
   contain the 'raw' name as well.

   A parser reuses these structures, maintaining a list of allocated
   TAG objects in a free list.







|







291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
  int prefixLen;
} TAG_NAME;

/* TAG represents an open element.
   The name of the element is stored in both the document and API
   encodings.  The memory buffer 'buf' is a separately-allocated
   memory area which stores the name.  During the XML_Parse()/
   XML_ParseBuffer() when the element is open, the memory for the 'raw'
   version of the name (in the document encoding) is shared with the
   document buffer.  If the element is open across calls to
   XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to
   contain the 'raw' name as well.

   A parser reuses these structures, maintaining a list of allocated
   TAG objects in a free list.
2034
2035
2036
2037
2038
2039
2040






2041
2042
2043
2044
2045
2046
2047
enum XML_Status XMLCALL
XML_ParseBuffer(XML_Parser parser, int len, int isFinal) {
  const char *start;
  enum XML_Status result = XML_STATUS_OK;

  if (parser == NULL)
    return XML_STATUS_ERROR;






  switch (parser->m_parsingStatus.parsing) {
  case XML_SUSPENDED:
    parser->m_errorCode = XML_ERROR_SUSPENDED;
    return XML_STATUS_ERROR;
  case XML_FINISHED:
    parser->m_errorCode = XML_ERROR_FINISHED;
    return XML_STATUS_ERROR;







>
>
>
>
>
>







2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
enum XML_Status XMLCALL
XML_ParseBuffer(XML_Parser parser, int len, int isFinal) {
  const char *start;
  enum XML_Status result = XML_STATUS_OK;

  if (parser == NULL)
    return XML_STATUS_ERROR;

  if (len < 0) {
    parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
    return XML_STATUS_ERROR;
  }

  switch (parser->m_parsingStatus.parsing) {
  case XML_SUSPENDED:
    parser->m_errorCode = XML_ERROR_SUSPENDED;
    return XML_STATUS_ERROR;
  case XML_FINISHED:
    parser->m_errorCode = XML_ERROR_FINISHED;
    return XML_STATUS_ERROR;
5842
5843
5844
5845
5846
5847
5848
5849
5850
5851
5852
5853
5854
5855
5856
5857
5858
5859
5860

5861
5862
5863
5864
5865
5866
5867
  openEntity->internalEventPtr = NULL;
  openEntity->internalEventEndPtr = NULL;
  textStart = (const char *)entity->textPtr;
  textEnd = (const char *)(entity->textPtr + entity->textLen);
  /* Set a safe default value in case 'next' does not get set */
  next = textStart;

#ifdef XML_DTD
  if (entity->is_param) {
    int tok
        = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
    result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
                      tok, next, &next, XML_FALSE, XML_FALSE,
                      XML_ACCOUNT_ENTITY_EXPANSION);
  } else
#endif /* XML_DTD */
    result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding,
                       textStart, textEnd, &next, XML_FALSE,
                       XML_ACCOUNT_ENTITY_EXPANSION);


  if (result == XML_ERROR_NONE) {
    if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
      entity->processed = (int)(next - textStart);
      parser->m_processor = internalEntityProcessor;
    } else if (parser->m_openInternalEntities->entity == entity) {
#if XML_GE == 1







<






|
<



>







5849
5850
5851
5852
5853
5854
5855

5856
5857
5858
5859
5860
5861
5862

5863
5864
5865
5866
5867
5868
5869
5870
5871
5872
5873
  openEntity->internalEventPtr = NULL;
  openEntity->internalEventEndPtr = NULL;
  textStart = (const char *)entity->textPtr;
  textEnd = (const char *)(entity->textPtr + entity->textLen);
  /* Set a safe default value in case 'next' does not get set */
  next = textStart;


  if (entity->is_param) {
    int tok
        = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
    result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
                      tok, next, &next, XML_FALSE, XML_FALSE,
                      XML_ACCOUNT_ENTITY_EXPANSION);
  } else {

    result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding,
                       textStart, textEnd, &next, XML_FALSE,
                       XML_ACCOUNT_ENTITY_EXPANSION);
  }

  if (result == XML_ERROR_NONE) {
    if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
      entity->processed = (int)(next - textStart);
      parser->m_processor = internalEntityProcessor;
    } else if (parser->m_openInternalEntities->entity == entity) {
#if XML_GE == 1
5890
5891
5892
5893
5894
5895
5896
5897
5898
5899
5900
5901
5902
5903
5904
5905
5906
5907
5908

5909
5910
5911
5912
5913
5914
5915

  entity = openEntity->entity;
  textStart = ((const char *)entity->textPtr) + entity->processed;
  textEnd = (const char *)(entity->textPtr + entity->textLen);
  /* Set a safe default value in case 'next' does not get set */
  next = textStart;

#ifdef XML_DTD
  if (entity->is_param) {
    int tok
        = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
    result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
                      tok, next, &next, XML_FALSE, XML_TRUE,
                      XML_ACCOUNT_ENTITY_EXPANSION);
  } else
#endif /* XML_DTD */
    result = doContent(parser, openEntity->startTagLevel,
                       parser->m_internalEncoding, textStart, textEnd, &next,
                       XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION);


  if (result != XML_ERROR_NONE)
    return result;

  if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
    entity->processed = (int)(next - (const char *)entity->textPtr);
    return result;







<






|
<



>







5896
5897
5898
5899
5900
5901
5902

5903
5904
5905
5906
5907
5908
5909

5910
5911
5912
5913
5914
5915
5916
5917
5918
5919
5920

  entity = openEntity->entity;
  textStart = ((const char *)entity->textPtr) + entity->processed;
  textEnd = (const char *)(entity->textPtr + entity->textLen);
  /* Set a safe default value in case 'next' does not get set */
  next = textStart;


  if (entity->is_param) {
    int tok
        = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
    result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
                      tok, next, &next, XML_FALSE, XML_TRUE,
                      XML_ACCOUNT_ENTITY_EXPANSION);
  } else {

    result = doContent(parser, openEntity->startTagLevel,
                       parser->m_internalEncoding, textStart, textEnd, &next,
                       XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION);
  }

  if (result != XML_ERROR_NONE)
    return result;

  if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
    entity->processed = (int)(next - (const char *)entity->textPtr);
    return result;
5928
5929
5930
5931
5932
5933
5934
5935
5936
5937
5938
5939
5940
5941
5942
5943
5944
5945
5946
5947
5948
5949
5950
5951
5952
  // upcoming call to XML_ResumeParser continue with entity content, or it would
  // be ignored altogether.
  if (parser->m_openInternalEntities != NULL
      && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
    return XML_ERROR_NONE;
  }

#ifdef XML_DTD
  if (entity->is_param) {
    int tok;
    parser->m_processor = prologProcessor;
    tok = XmlPrologTok(parser->m_encoding, s, end, &next);
    return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
                    (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
                    XML_ACCOUNT_DIRECT);
  } else
#endif /* XML_DTD */
  {
    parser->m_processor = contentProcessor;
    /* see externalEntityContentProcessor vs contentProcessor */
    result = doContent(parser, parser->m_parentParser ? 1 : 0,
                       parser->m_encoding, s, end, nextPtr,
                       (XML_Bool)! parser->m_parsingStatus.finalBuffer,
                       XML_ACCOUNT_DIRECT);
    if (result == XML_ERROR_NONE) {







<







|
<
<







5933
5934
5935
5936
5937
5938
5939

5940
5941
5942
5943
5944
5945
5946
5947


5948
5949
5950
5951
5952
5953
5954
  // upcoming call to XML_ResumeParser continue with entity content, or it would
  // be ignored altogether.
  if (parser->m_openInternalEntities != NULL
      && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
    return XML_ERROR_NONE;
  }


  if (entity->is_param) {
    int tok;
    parser->m_processor = prologProcessor;
    tok = XmlPrologTok(parser->m_encoding, s, end, &next);
    return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
                    (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
                    XML_ACCOUNT_DIRECT);
  } else {


    parser->m_processor = contentProcessor;
    /* see externalEntityContentProcessor vs contentProcessor */
    result = doContent(parser, parser->m_parentParser ? 1 : 0,
                       parser->m_encoding, s, end, nextPtr,
                       (XML_Bool)! parser->m_parsingStatus.finalBuffer,
                       XML_ACCOUNT_DIRECT);
    if (result == XML_ERROR_NONE) {
7012
7013
7014
7015
7016
7017
7018










7019
7020
7021
7022
7023
7024
7025
    if (! name)
      return 0;
    newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name,
                                  sizeof(ELEMENT_TYPE));
    if (! newE)
      return 0;
    if (oldE->nDefaultAtts) {










      newE->defaultAtts
          = ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
      if (! newE->defaultAtts) {
        return 0;
      }
    }
    if (oldE->idAtt)







>
>
>
>
>
>
>
>
>
>







7014
7015
7016
7017
7018
7019
7020
7021
7022
7023
7024
7025
7026
7027
7028
7029
7030
7031
7032
7033
7034
7035
7036
7037
    if (! name)
      return 0;
    newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name,
                                  sizeof(ELEMENT_TYPE));
    if (! newE)
      return 0;
    if (oldE->nDefaultAtts) {
      /* Detect and prevent integer overflow.
       * The preprocessor guard addresses the "always false" warning
       * from -Wtype-limits on platforms where
       * sizeof(int) < sizeof(size_t), e.g. on x86_64. */
#if UINT_MAX >= SIZE_MAX
      if ((size_t)oldE->nDefaultAtts
          > ((size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE))) {
        return 0;
      }
#endif
      newE->defaultAtts
          = ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
      if (! newE->defaultAtts) {
        return 0;
      }
    }
    if (oldE->idAtt)
7554
7555
7556
7557
7558
7559
7560









7561
7562
7563
7564
7565
7566
7567
static int FASTCALL
nextScaffoldPart(XML_Parser parser) {
  DTD *const dtd = parser->m_dtd; /* save one level of indirection */
  CONTENT_SCAFFOLD *me;
  int next;

  if (! dtd->scaffIndex) {









    dtd->scaffIndex = (int *)MALLOC(parser, parser->m_groupSize * sizeof(int));
    if (! dtd->scaffIndex)
      return -1;
    dtd->scaffIndex[0] = 0;
  }

  if (dtd->scaffCount >= dtd->scaffSize) {







>
>
>
>
>
>
>
>
>







7566
7567
7568
7569
7570
7571
7572
7573
7574
7575
7576
7577
7578
7579
7580
7581
7582
7583
7584
7585
7586
7587
7588
static int FASTCALL
nextScaffoldPart(XML_Parser parser) {
  DTD *const dtd = parser->m_dtd; /* save one level of indirection */
  CONTENT_SCAFFOLD *me;
  int next;

  if (! dtd->scaffIndex) {
    /* Detect and prevent integer overflow.
     * The preprocessor guard addresses the "always false" warning
     * from -Wtype-limits on platforms where
     * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
#if UINT_MAX >= SIZE_MAX
    if (parser->m_groupSize > ((size_t)(-1) / sizeof(int))) {
      return -1;
    }
#endif
    dtd->scaffIndex = (int *)MALLOC(parser, parser->m_groupSize * sizeof(int));
    if (! dtd->scaffIndex)
      return -1;
    dtd->scaffIndex[0] = 0;
  }

  if (dtd->scaffCount >= dtd->scaffSize) {