Index: expat/VERSION ================================================================== --- expat/VERSION +++ expat/VERSION @@ -1,1 +1,1 @@ -expat-2.3.0 +expat-2.4.2 Index: expat/ascii.h ================================================================== --- expat/ascii.h +++ expat/ascii.h @@ -4,12 +4,15 @@ / _ \\ /| '_ \ / _` | __| | __// \| |_) | (_| | |_ \___/_/\_\ .__/ \__,_|\__| |_| XML parser - Copyright (c) 1997-2000 Thai Open Source Software Center Ltd - Copyright (c) 2000-2017 Expat development team + Copyright (c) 1999-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000 Clark Cooper + Copyright (c) 2002 Fred L. Drake, Jr. + Copyright (c) 2007 Karl Waclawek + Copyright (c) 2017 Sebastian Pipping Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including Index: expat/asciitab.h ================================================================== --- expat/asciitab.h +++ expat/asciitab.h @@ -5,11 +5,13 @@ | __// \| |_) | (_| | |_ \___/_/\_\ .__/ \__,_|\__| |_| XML parser Copyright (c) 1997-2000 Thai Open Source Software Center Ltd - Copyright (c) 2000-2017 Expat development team + Copyright (c) 2000 Clark Cooper + Copyright (c) 2002 Fred L. Drake, Jr. + Copyright (c) 2017 Sebastian Pipping Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including Index: expat/expat.h ================================================================== --- expat/expat.h +++ expat/expat.h @@ -5,11 +5,18 @@ | __// \| |_) | (_| | |_ \___/_/\_\ .__/ \__,_|\__| |_| XML parser Copyright (c) 1997-2000 Thai Open Source Software Center Ltd - Copyright (c) 2000-2017 Expat development team + Copyright (c) 2000 Clark Cooper + Copyright (c) 2000-2005 Fred L. Drake, Jr. + Copyright (c) 2001-2002 Greg Stein + Copyright (c) 2002-2016 Karl Waclawek + Copyright (c) 2016-2021 Sebastian Pipping + Copyright (c) 2016 Cristian Rodríguez + Copyright (c) 2016 Thomas Beutlich + Copyright (c) 2017 Rhodri James Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including @@ -115,11 +122,13 @@ XML_ERROR_RESERVED_PREFIX_XMLNS, XML_ERROR_RESERVED_NAMESPACE_URI, /* Added in 2.2.1. */ XML_ERROR_INVALID_ARGUMENT, /* Added in 2.3.0. */ - XML_ERROR_NO_BUFFER + XML_ERROR_NO_BUFFER, + /* Added in 2.4.0. */ + XML_ERROR_AMPLIFICATION_LIMIT_BREACH }; enum XML_Content_Type { XML_CTYPE_EMPTY = 1, XML_CTYPE_ANY, @@ -997,11 +1006,14 @@ XML_FEATURE_MIN_SIZE, XML_FEATURE_SIZEOF_XML_CHAR, XML_FEATURE_SIZEOF_XML_LCHAR, XML_FEATURE_NS, XML_FEATURE_LARGE_SIZE, - XML_FEATURE_ATTR_INFO + XML_FEATURE_ATTR_INFO, + /* Added in Expat 2.4.0. */ + XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT, + XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT /* Additional features must be added to the end of this enum. */ }; typedef struct { enum XML_FeatureEnum feature; @@ -1009,18 +1021,30 @@ long int value; } XML_Feature; XMLPARSEAPI(const XML_Feature *) XML_GetFeatureList(void); + +#ifdef XML_DTD +/* Added in Expat 2.4.0. */ +XMLPARSEAPI(XML_Bool) +XML_SetBillionLaughsAttackProtectionMaximumAmplification( + XML_Parser parser, float maximumAmplificationFactor); + +/* Added in Expat 2.4.0. */ +XMLPARSEAPI(XML_Bool) +XML_SetBillionLaughsAttackProtectionActivationThreshold( + XML_Parser parser, unsigned long long activationThresholdBytes); +#endif /* Expat follows the semantic versioning convention. See http://semver.org. */ #define XML_MAJOR_VERSION 2 -#define XML_MINOR_VERSION 3 -#define XML_MICRO_VERSION 0 +#define XML_MINOR_VERSION 4 +#define XML_MICRO_VERSION 2 #ifdef __cplusplus } #endif #endif /* not Expat_INCLUDED */ Index: expat/expat_external.h ================================================================== --- expat/expat_external.h +++ expat/expat_external.h @@ -5,11 +5,18 @@ | __// \| |_) | (_| | |_ \___/_/\_\ .__/ \__,_|\__| |_| XML parser Copyright (c) 1997-2000 Thai Open Source Software Center Ltd - Copyright (c) 2000-2017 Expat development team + Copyright (c) 2000 Clark Cooper + Copyright (c) 2000-2004 Fred L. Drake, Jr. + Copyright (c) 2001-2002 Greg Stein + Copyright (c) 2002-2006 Karl Waclawek + Copyright (c) 2016 Cristian Rodríguez + Copyright (c) 2016-2019 Sebastian Pipping + Copyright (c) 2017 Rhodri James + Copyright (c) 2018 Yury Gribov Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including Index: expat/iasciitab.h ================================================================== --- expat/iasciitab.h +++ expat/iasciitab.h @@ -5,11 +5,13 @@ | __// \| |_) | (_| | |_ \___/_/\_\ .__/ \__,_|\__| |_| XML parser Copyright (c) 1997-2000 Thai Open Source Software Center Ltd - Copyright (c) 2000-2017 Expat development team + Copyright (c) 2000 Clark Cooper + Copyright (c) 2002 Fred L. Drake, Jr. + Copyright (c) 2017 Sebastian Pipping Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including Index: expat/internal.h ================================================================== --- expat/internal.h +++ expat/internal.h @@ -23,12 +23,16 @@ / _ \\ /| '_ \ / _` | __| | __// \| |_) | (_| | |_ \___/_/\_\ .__/ \__,_|\__| |_| XML parser - Copyright (c) 1997-2000 Thai Open Source Software Center Ltd - Copyright (c) 2000-2017 Expat development team + Copyright (c) 2002-2003 Fred L. Drake, Jr. + Copyright (c) 2002-2006 Karl Waclawek + Copyright (c) 2003 Greg Stein + Copyright (c) 2016-2021 Sebastian Pipping + Copyright (c) 2018 Yury Gribov + Copyright (c) 2019 David Loffredo Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including @@ -98,26 +102,62 @@ #else # ifndef inline # define inline # endif #endif + +#include // ULONG_MAX + +#if defined(_WIN32) && ! defined(__USE_MINGW_ANSI_STDIO) +# define EXPAT_FMT_ULL(midpart) "%" midpart "I64u" +# if defined(_WIN64) // Note: modifiers "td" and "zu" do not work for MinGW +# define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "I64d" +# define EXPAT_FMT_SIZE_T(midpart) "%" midpart "I64u" +# else +# define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "d" +# define EXPAT_FMT_SIZE_T(midpart) "%" midpart "u" +# endif +#else +# define EXPAT_FMT_ULL(midpart) "%" midpart "llu" +# if ! defined(ULONG_MAX) +# error Compiler did not define ULONG_MAX for us +# elif ULONG_MAX == 18446744073709551615u // 2^64-1 +# define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "ld" +# define EXPAT_FMT_SIZE_T(midpart) "%" midpart "lu" +# else +# define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "d" +# define EXPAT_FMT_SIZE_T(midpart) "%" midpart "u" +# endif +#endif #ifndef UNUSED_P # define UNUSED_P(p) (void)p #endif + +/* NOTE BEGIN If you ever patch these defaults to greater values + for non-attack XML payload in your environment, + please file a bug report with libexpat. Thank you! +*/ +#define EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT \ + 100.0f +#define EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT \ + 8388608 // 8 MiB, 2^23 +/* NOTE END */ + +#include "expat.h" // so we can use type XML_Parser below #ifdef __cplusplus extern "C" { #endif -#ifdef XML_ENABLE_VISIBILITY -# if XML_ENABLE_VISIBILITY -__attribute__((visibility("default"))) -# endif +void _INTERNAL_trim_to_complete_utf8_characters(const char *from, + const char **fromLimRef); + +#if defined(XML_DTD) +unsigned long long testingAccountingGetCountBytesDirect(XML_Parser parser); +unsigned long long testingAccountingGetCountBytesIndirect(XML_Parser parser); +const char *unsignedCharToPrintable(unsigned char c); #endif -void -_INTERNAL_trim_to_complete_utf8_characters(const char *from, - const char **fromLimRef); #ifdef __cplusplus } #endif Index: expat/latin1tab.h ================================================================== --- expat/latin1tab.h +++ expat/latin1tab.h @@ -5,11 +5,13 @@ | __// \| |_) | (_| | |_ \___/_/\_\ .__/ \__,_|\__| |_| XML parser Copyright (c) 1997-2000 Thai Open Source Software Center Ltd - Copyright (c) 2000-2017 Expat development team + Copyright (c) 2000 Clark Cooper + Copyright (c) 2002 Fred L. Drake, Jr. + Copyright (c) 2017 Sebastian Pipping Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including Index: expat/nametab.h ================================================================== --- expat/nametab.h +++ expat/nametab.h @@ -4,12 +4,12 @@ / _ \\ /| '_ \ / _` | __| | __// \| |_) | (_| | |_ \___/_/\_\ .__/ \__,_|\__| |_| XML parser - Copyright (c) 1997-2000 Thai Open Source Software Center Ltd - Copyright (c) 2000-2017 Expat development team + Copyright (c) 2000 Clark Cooper + Copyright (c) 2017 Sebastian Pipping Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including Index: expat/utf8tab.h ================================================================== --- expat/utf8tab.h +++ expat/utf8tab.h @@ -5,11 +5,13 @@ | __// \| |_) | (_| | |_ \___/_/\_\ .__/ \__,_|\__| |_| XML parser Copyright (c) 1997-2000 Thai Open Source Software Center Ltd - Copyright (c) 2000-2017 Expat development team + Copyright (c) 2000 Clark Cooper + Copyright (c) 2002 Fred L. Drake, Jr. + Copyright (c) 2017 Sebastian Pipping Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including Index: expat/winconfig.h ================================================================== --- expat/winconfig.h +++ expat/winconfig.h @@ -4,12 +4,14 @@ / _ \\ /| '_ \ / _` | __| | __// \| |_) | (_| | |_ \___/_/\_\ .__/ \__,_|\__| |_| XML parser - Copyright (c) 1997-2000 Thai Open Source Software Center Ltd - Copyright (c) 2000-2017 Expat development team + Copyright (c) 2000 Clark Cooper + Copyright (c) 2002 Greg Stein + Copyright (c) 2005 Karl Waclawek + Copyright (c) 2017-2021 Sebastian Pipping Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including @@ -38,19 +40,6 @@ #undef WIN32_LEAN_AND_MEAN #include #include -#if defined(HAVE_EXPAT_CONFIG_H) /* e.g. MinGW */ -# include -#else /* !defined(HAVE_EXPAT_CONFIG_H) */ - -# define XML_NS 1 -# define XML_DTD 1 -# define XML_CONTEXT_BYTES 1024 - -/* we will assume all Windows platforms are little endian */ -# define BYTEORDER 1234 - -#endif /* !defined(HAVE_EXPAT_CONFIG_H) */ - #endif /* ndef WINCONFIG_H */ Index: expat/xmlparse.c ================================================================== --- expat/xmlparse.c +++ expat/xmlparse.c @@ -1,15 +1,40 @@ -/* d667b5f8e56e24fdfaf5e38596d419d924a9fadceb987d81d5613ecb7ca51b0e (2.3.0+) +/* 0550bc9a27b099d462d8d1007271cfeaa39852f20cd0d5d2caeadaeb39516fbe (2.4.2+) __ __ _ ___\ \/ /_ __ __ _| |_ / _ \\ /| '_ \ / _` | __| | __// \| |_) | (_| | |_ \___/_/\_\ .__/ \__,_|\__| |_| XML parser Copyright (c) 1997-2000 Thai Open Source Software Center Ltd - Copyright (c) 2000-2017 Expat development team + Copyright (c) 2000 Clark Cooper + Copyright (c) 2000-2006 Fred L. Drake, Jr. + Copyright (c) 2001-2002 Greg Stein + Copyright (c) 2002-2016 Karl Waclawek + Copyright (c) 2005-2009 Steven Solie + Copyright (c) 2016 Eric Rahm + Copyright (c) 2016-2021 Sebastian Pipping + Copyright (c) 2016 Gaurav + Copyright (c) 2016 Thomas Beutlich + Copyright (c) 2016 Gustavo Grieco + Copyright (c) 2016 Pascal Cuoq + Copyright (c) 2016 Ed Schouten + Copyright (c) 2017-2018 Rhodri James + Copyright (c) 2017 Václav Slavík + Copyright (c) 2017 Viktor Szakats + Copyright (c) 2017 Chanho Park + Copyright (c) 2017 Rolf Eike Beer + Copyright (c) 2017 Hans Wennborg + Copyright (c) 2018 Anton Maklakov + Copyright (c) 2018 Benjamin Peterson + Copyright (c) 2018 Marco Maggi + Copyright (c) 2018 Mariusz Zaborski + Copyright (c) 2019 David Loffredo + Copyright (c) 2019-2020 Ben Wagner + Copyright (c) 2019 Vadim Zeitlin + Copyright (c) 2021 Dong-hee Na Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including @@ -27,10 +52,14 @@ NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#define XML_BUILDING_EXPAT 1 + +#include #if ! defined(_GNU_SOURCE) # define _GNU_SOURCE 1 /* syscall prototype */ #endif @@ -46,10 +75,11 @@ #include #include /* UINT_MAX */ #include /* fprintf */ #include /* getenv, rand_s */ #include /* uintptr_t */ +#include /* isnan */ #ifdef _WIN32 # define getpid GetCurrentProcessId #else # include /* gettimeofday() */ @@ -57,17 +87,13 @@ # include /* getpid() */ # include /* O_RDONLY */ # include #endif -#define XML_BUILDING_EXPAT 1 - #ifdef _WIN32 # include "winconfig.h" -#elif defined(HAVE_EXPAT_CONFIG_H) -# include -#endif /* ndef _WIN32 */ +#endif #include "ascii.h" #include "expat.h" #include "siphash.h" @@ -370,10 +396,35 @@ ENTITY *entity; int startTagLevel; XML_Bool betweenDecl; /* WFC: PE Between Declarations */ } OPEN_INTERNAL_ENTITY; +enum XML_Account { + XML_ACCOUNT_DIRECT, /* bytes directly passed to the Expat parser */ + XML_ACCOUNT_ENTITY_EXPANSION, /* intermediate bytes produced during entity + expansion */ + XML_ACCOUNT_NONE /* i.e. do not account, was accounted already */ +}; + +#ifdef XML_DTD +typedef unsigned long long XmlBigCount; +typedef struct accounting { + XmlBigCount countBytesDirect; + XmlBigCount countBytesIndirect; + int debugLevel; + float maximumAmplificationFactor; // >=1.0 + unsigned long long activationThresholdBytes; +} ACCOUNTING; + +typedef struct entity_stats { + unsigned int countEverOpened; + unsigned int currentDepth; + unsigned int maximumDepthSeen; + int debugLevel; +} ENTITY_STATS; +#endif /* XML_DTD */ + typedef enum XML_Error PTRCALL Processor(XML_Parser parser, const char *start, const char *end, const char **endPtr); static Processor prologProcessor; static Processor prologInitProcessor; @@ -400,47 +451,53 @@ const char *s, const char *next); static enum XML_Error initializeEncoding(XML_Parser parser); static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, int tok, const char *next, const char **nextPtr, - XML_Bool haveMore, XML_Bool allowClosingDoctype); + XML_Bool haveMore, XML_Bool allowClosingDoctype, + enum XML_Account account); static enum XML_Error processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl); static enum XML_Error doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, const char *start, const char *end, const char **endPtr, - XML_Bool haveMore); + XML_Bool haveMore, enum XML_Account account); static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *, const char **startPtr, const char *end, - const char **nextPtr, XML_Bool haveMore); + const char **nextPtr, XML_Bool haveMore, + enum XML_Account account); #ifdef XML_DTD static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *, const char **startPtr, const char *end, const char **nextPtr, XML_Bool haveMore); #endif /* XML_DTD */ static void freeBindings(XML_Parser parser, BINDING *bindings); static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *, const char *s, TAG_NAME *tagNamePtr, - BINDING **bindingsPtr); + BINDING **bindingsPtr, + enum XML_Account account); static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, const XML_Char *uri, BINDING **bindingsPtr); static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, XML_Bool isCdata, XML_Bool isId, const XML_Char *dfltValue, XML_Parser parser); static enum XML_Error storeAttributeValue(XML_Parser parser, const ENCODING *, XML_Bool isCdata, const char *, - const char *, STRING_POOL *); + const char *, STRING_POOL *, + enum XML_Account account); static enum XML_Error appendAttributeValue(XML_Parser parser, const ENCODING *, XML_Bool isCdata, const char *, - const char *, STRING_POOL *); + const char *, STRING_POOL *, + enum XML_Account account); static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *); static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc, - const char *start, const char *end); + const char *start, const char *end, + enum XML_Account account); static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); static int reportComment(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); static void reportDefault(XML_Parser parser, const ENCODING *enc, @@ -500,10 +557,38 @@ const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep, DTD *dtd); static void parserInit(XML_Parser parser, const XML_Char *encodingName); +#ifdef XML_DTD +static float accountingGetCurrentAmplification(XML_Parser rootParser); +static void accountingReportStats(XML_Parser originParser, const char *epilog); +static void accountingOnAbort(XML_Parser originParser); +static void accountingReportDiff(XML_Parser rootParser, + unsigned int levelsAwayFromRootParser, + const char *before, const char *after, + ptrdiff_t bytesMore, int source_line, + enum XML_Account account); +static XML_Bool accountingDiffTolerated(XML_Parser originParser, int tok, + const char *before, const char *after, + int source_line, + enum XML_Account account); + +static void entityTrackingReportStats(XML_Parser parser, ENTITY *entity, + const char *action, int sourceLine); +static void entityTrackingOnOpen(XML_Parser parser, ENTITY *entity, + int sourceLine); +static void entityTrackingOnClose(XML_Parser parser, ENTITY *entity, + int sourceLine); + +static XML_Parser getRootParserOf(XML_Parser parser, + unsigned int *outLevelDiff); +#endif /* XML_DTD */ + +static unsigned long getDebugLevel(const char *variableName, + unsigned long defaultDebugLevel); + #define poolStart(pool) ((pool)->start) #define poolEnd(pool) ((pool)->ptr) #define poolLength(pool) ((pool)->ptr - (pool)->start) #define poolChop(pool) ((void)--(pool->ptr)) #define poolLastChar(pool) (((pool)->ptr)[-1]) @@ -613,10 +698,14 @@ XML_Bool m_isParamEntity; XML_Bool m_useForeignDTD; enum XML_ParamEntityParsing m_paramEntityParsing; #endif unsigned long m_hash_secret_salt; +#ifdef XML_DTD + ACCOUNTING m_accounting; + ENTITY_STATS m_entity_stats; +#endif }; #define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s))) #define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p), (s))) #define FREE(parser, p) (parser->m_mem.free_fcn((p))) @@ -797,13 +886,12 @@ #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */ static unsigned long ENTROPY_DEBUG(const char *label, unsigned long entropy) { - const char *const EXPAT_ENTROPY_DEBUG = getenv("EXPAT_ENTROPY_DEBUG"); - if (EXPAT_ENTROPY_DEBUG && ! strcmp(EXPAT_ENTROPY_DEBUG, "1")) { - fprintf(stderr, "Entropy: %s --> 0x%0*lx (%lu bytes)\n", label, + if (getDebugLevel("EXPAT_ENTROPY_DEBUG", 0) >= 1u) { + fprintf(stderr, "expat: Entropy: %s --> 0x%0*lx (%lu bytes)\n", label, (int)sizeof(entropy) * 2, entropy, (unsigned long)sizeof(entropy)); } return entropy; } @@ -1061,10 +1149,22 @@ parser->m_isParamEntity = XML_FALSE; parser->m_useForeignDTD = XML_FALSE; parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; #endif parser->m_hash_secret_salt = 0; + +#ifdef XML_DTD + memset(&parser->m_accounting, 0, sizeof(ACCOUNTING)); + parser->m_accounting.debugLevel = getDebugLevel("EXPAT_ACCOUNTING_DEBUG", 0u); + parser->m_accounting.maximumAmplificationFactor + = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT; + parser->m_accounting.activationThresholdBytes + = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT; + + memset(&parser->m_entity_stats, 0, sizeof(ENTITY_STATS)); + parser->m_entity_stats.debugLevel = getDebugLevel("EXPAT_ENTITY_DEBUG", 0u); +#endif } /* moves list of bindings to m_freeBindingList */ static void FASTCALL moveToFreeBindingList(XML_Parser parser, BINDING *bindings) { @@ -2335,10 +2435,14 @@ return XML_L("invalid argument"); /* Added in 2.3.0. */ case XML_ERROR_NO_BUFFER: return XML_L( "a successful prior call to function XML_GetBuffer is required"); + /* Added in 2.4.0. */ + case XML_ERROR_AMPLIFICATION_LIMIT_BREACH: + return XML_L( + "limit on input amplification factor (from DTD and entities) breached"); } return NULL; } const XML_LChar *XMLCALL @@ -2371,45 +2475,79 @@ return version; } const XML_Feature *XMLCALL XML_GetFeatureList(void) { - static const XML_Feature features[] - = {{XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"), - sizeof(XML_Char)}, - {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"), - sizeof(XML_LChar)}, + static const XML_Feature features[] = { + {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"), + sizeof(XML_Char)}, + {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"), + sizeof(XML_LChar)}, #ifdef XML_UNICODE - {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0}, + {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0}, #endif #ifdef XML_UNICODE_WCHAR_T - {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0}, + {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0}, #endif #ifdef XML_DTD - {XML_FEATURE_DTD, XML_L("XML_DTD"), 0}, + {XML_FEATURE_DTD, XML_L("XML_DTD"), 0}, #endif #ifdef XML_CONTEXT_BYTES - {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"), - XML_CONTEXT_BYTES}, + {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"), + XML_CONTEXT_BYTES}, #endif #ifdef XML_MIN_SIZE - {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0}, + {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0}, #endif #ifdef XML_NS - {XML_FEATURE_NS, XML_L("XML_NS"), 0}, + {XML_FEATURE_NS, XML_L("XML_NS"), 0}, #endif #ifdef XML_LARGE_SIZE - {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0}, + {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0}, #endif #ifdef XML_ATTR_INFO - {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0}, + {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0}, +#endif +#ifdef XML_DTD + /* Added in Expat 2.4.0. */ + {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT, + XML_L("XML_BLAP_MAX_AMP"), + (long int) + EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT}, + {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT, + XML_L("XML_BLAP_ACT_THRES"), + EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT}, #endif - {XML_FEATURE_END, NULL, 0}}; + {XML_FEATURE_END, NULL, 0}}; return features; } +#ifdef XML_DTD +XML_Bool XMLCALL +XML_SetBillionLaughsAttackProtectionMaximumAmplification( + XML_Parser parser, float maximumAmplificationFactor) { + if ((parser == NULL) || (parser->m_parentParser != NULL) + || isnan(maximumAmplificationFactor) + || (maximumAmplificationFactor < 1.0f)) { + return XML_FALSE; + } + parser->m_accounting.maximumAmplificationFactor = maximumAmplificationFactor; + return XML_TRUE; +} + +XML_Bool XMLCALL +XML_SetBillionLaughsAttackProtectionActivationThreshold( + XML_Parser parser, unsigned long long activationThresholdBytes) { + if ((parser == NULL) || (parser->m_parentParser != NULL)) { + return XML_FALSE; + } + parser->m_accounting.activationThresholdBytes = activationThresholdBytes; + return XML_TRUE; +} +#endif /* XML_DTD */ + /* Initially tag->rawName always points into the parse buffer; for those TAG instances opened while the current parse buffer was processed, and not yet closed, we need to store tag->rawName in a more permanent location, since the parse buffer is about to be discarded. */ @@ -2458,13 +2596,13 @@ } static enum XML_Error PTRCALL contentProcessor(XML_Parser parser, const char *start, const char *end, const char **endPtr) { - enum XML_Error result - = doContent(parser, 0, parser->m_encoding, start, end, endPtr, - (XML_Bool)! parser->m_parsingStatus.finalBuffer); + enum XML_Error result = doContent( + parser, 0, parser->m_encoding, start, end, endPtr, + (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT); if (result == XML_ERROR_NONE) { if (! storeRawNames(parser)) return XML_ERROR_NO_MEMORY; } return result; @@ -2485,10 +2623,18 @@ const char *end, const char **endPtr) { const char *next = start; /* XmlContentTok doesn't always set the last arg */ int tok = XmlContentTok(parser->m_encoding, start, end, &next); switch (tok) { case XML_TOK_BOM: +#ifdef XML_DTD + if (! accountingDiffTolerated(parser, tok, start, next, __LINE__, + XML_ACCOUNT_DIRECT)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } +#endif /* XML_DTD */ + /* If we are at the end of the buffer, this would cause the next stage, i.e. externalEntityInitProcessor3, to pass control directly to doContent (by detecting XML_TOK_NONE) without processing any xml text declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent. */ @@ -2522,10 +2668,14 @@ const char *end, const char **endPtr) { int tok; const char *next = start; /* XmlContentTok doesn't always set the last arg */ parser->m_eventPtr = start; tok = XmlContentTok(parser->m_encoding, start, end, &next); + /* Note: These bytes are accounted later in: + - processXmlDecl + - externalEntityContentProcessor + */ parser->m_eventEndPtr = next; switch (tok) { case XML_TOK_XML_DECL: { enum XML_Error result; @@ -2563,11 +2713,12 @@ static enum XML_Error PTRCALL externalEntityContentProcessor(XML_Parser parser, const char *start, const char *end, const char **endPtr) { enum XML_Error result = doContent(parser, 1, parser->m_encoding, start, end, endPtr, - (XML_Bool)! parser->m_parsingStatus.finalBuffer); + (XML_Bool)! parser->m_parsingStatus.finalBuffer, + XML_ACCOUNT_ENTITY_EXPANSION); if (result == XML_ERROR_NONE) { if (! storeRawNames(parser)) return XML_ERROR_NO_MEMORY; } return result; @@ -2574,11 +2725,11 @@ } static enum XML_Error doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, const char *s, const char *end, const char **nextPtr, - XML_Bool haveMore) { + XML_Bool haveMore, enum XML_Account account) { /* save one level of indirection */ DTD *const dtd = parser->m_dtd; const char **eventPP; const char **eventEndPP; @@ -2592,10 +2743,21 @@ *eventPP = s; for (;;) { const char *next = s; /* XmlContentTok doesn't always set the last arg */ int tok = XmlContentTok(enc, s, end, &next); +#ifdef XML_DTD + const char *accountAfter + = ((tok == XML_TOK_TRAILING_RSQB) || (tok == XML_TOK_TRAILING_CR)) + ? (haveMore ? s /* i.e. 0 bytes */ : end) + : next; + if (! accountingDiffTolerated(parser, tok, s, accountAfter, __LINE__, + account)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } +#endif *eventEndPP = next; switch (tok) { case XML_TOK_TRAILING_CR: if (haveMore) { *nextPtr = s; @@ -2647,10 +2809,18 @@ const XML_Char *name; ENTITY *entity; XML_Char ch = (XML_Char)XmlPredefinedEntityName( enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); if (ch) { +#ifdef XML_DTD + /* NOTE: We are replacing 4-6 characters original input for 1 character + * so there is no amplification and hence recording without + * protection. */ + accountingDiffTolerated(parser, tok, (char *)&ch, + ((char *)&ch) + sizeof(XML_Char), __LINE__, + XML_ACCOUNT_ENTITY_EXPANSION); +#endif /* XML_DTD */ if (parser->m_characterDataHandler) parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1); else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); break; @@ -2765,11 +2935,12 @@ } } } tag->name.str = (XML_Char *)tag->buf; *toPtr = XML_T('\0'); - result = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings)); + result + = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings), account); if (result) return result; if (parser->m_startElementHandler) parser->m_startElementHandler(parser->m_handlerArg, tag->name.str, (const XML_Char **)parser->m_atts); @@ -2789,11 +2960,12 @@ name.str = poolStoreString(&parser->m_tempPool, enc, rawName, rawName + XmlNameLength(enc, rawName)); if (! name.str) return XML_ERROR_NO_MEMORY; poolFinish(&parser->m_tempPool); - result = storeAtts(parser, enc, s, &name, &bindings); + result = storeAtts(parser, enc, s, &name, &bindings, + XML_ACCOUNT_NONE /* token spans whole start tag */); if (result != XML_ERROR_NONE) { freeBindings(parser, bindings); return result; } poolFinish(&parser->m_tempPool); @@ -2924,11 +3096,12 @@ parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf, 0); /* END disabled code */ else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); - result = doCdataSection(parser, enc, &next, end, nextPtr, haveMore); + result + = doCdataSection(parser, enc, &next, end, nextPtr, haveMore, account); if (result != XML_ERROR_NONE) return result; else if (! next) { parser->m_processor = cdataSectionProcessor; return result; @@ -3053,11 +3226,12 @@ - process namespace declarations (check and report them) - generate namespace aware element name (URI, prefix) */ static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, - TAG_NAME *tagNamePtr, BINDING **bindingsPtr) { + TAG_NAME *tagNamePtr, BINDING **bindingsPtr, + enum XML_Account account) { DTD *const dtd = parser->m_dtd; /* save one level of indirection */ ELEMENT_TYPE *elementType; int nDefaultAtts; const XML_Char **appAtts; /* the attribute list for the application */ int attIndex = 0; @@ -3163,11 +3337,11 @@ } /* normalize the attribute value */ result = storeAttributeValue( parser, enc, isCdata, parser->m_atts[i].valuePtr, - parser->m_atts[i].valueEnd, &parser->m_tempPool); + parser->m_atts[i].valueEnd, &parser->m_tempPool, account); if (result) return result; appAtts[attIndex] = poolStart(&parser->m_tempPool); poolFinish(&parser->m_tempPool); } else { @@ -3552,13 +3726,13 @@ the whole file is parsed with one call. */ static enum XML_Error PTRCALL cdataSectionProcessor(XML_Parser parser, const char *start, const char *end, const char **endPtr) { - enum XML_Error result - = doCdataSection(parser, parser->m_encoding, &start, end, endPtr, - (XML_Bool)! parser->m_parsingStatus.finalBuffer); + enum XML_Error result = doCdataSection( + parser, parser->m_encoding, &start, end, endPtr, + (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT); if (result != XML_ERROR_NONE) return result; if (start) { if (parser->m_parentParser) { /* we are parsing an external entity */ parser->m_processor = externalEntityContentProcessor; @@ -3574,11 +3748,12 @@ /* startPtr gets set to non-null if the section is closed, and to null if the section is not yet closed. */ static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr, - const char *end, const char **nextPtr, XML_Bool haveMore) { + const char *end, const char **nextPtr, XML_Bool haveMore, + enum XML_Account account) { const char *s = *startPtr; const char **eventPP; const char **eventEndPP; if (enc == parser->m_encoding) { eventPP = &parser->m_eventPtr; @@ -3592,10 +3767,18 @@ *startPtr = NULL; for (;;) { const char *next = s; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */ int tok = XmlCdataSectionTok(enc, s, end, &next); +#ifdef XML_DTD + if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } +#else + UNUSED_P(account); +#endif *eventEndPP = next; switch (tok) { case XML_TOK_CDATA_SECT_CLOSE: if (parser->m_endCdataSectionHandler) parser->m_endCdataSectionHandler(parser->m_handlerArg); @@ -3736,10 +3919,17 @@ /* LCOV_EXCL_STOP */ } *eventPP = s; *startPtr = NULL; tok = XmlIgnoreSectionTok(enc, s, end, &next); +# ifdef XML_DTD + if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, + XML_ACCOUNT_DIRECT)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } +# endif *eventEndPP = next; switch (tok) { case XML_TOK_IGNORE_SECT: if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); @@ -3820,10 +4010,19 @@ const ENCODING *newEncoding = NULL; const char *version = NULL; const char *versionend; const XML_Char *storedversion = NULL; int standalone = -1; + +#ifdef XML_DTD + if (! accountingDiffTolerated(parser, XML_TOK_XML_DECL, s, next, __LINE__, + XML_ACCOUNT_DIRECT)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } +#endif + if (! (parser->m_ns ? XmlParseXmlDeclNS : XmlParseXmlDecl)( isGeneralTextEntity, parser->m_encoding, s, next, &parser->m_eventPtr, &version, &versionend, &encodingName, &newEncoding, &standalone)) { if (isGeneralTextEntity) return XML_ERROR_TEXT_DECL; @@ -3969,10 +4168,14 @@ const char *next = start; parser->m_eventPtr = start; for (;;) { tok = XmlPrologTok(parser->m_encoding, start, end, &next); + /* Note: Except for XML_TOK_BOM below, these bytes are accounted later in: + - storeEntityValue + - processXmlDecl + */ parser->m_eventEndPtr = next; if (tok <= 0) { if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) { *nextPtr = s; return XML_ERROR_NONE; @@ -3987,11 +4190,12 @@ case XML_TOK_NONE: /* start == end */ default: break; } /* found end of entity value - can store it now */ - return storeEntityValue(parser, parser->m_encoding, s, end); + return storeEntityValue(parser, parser->m_encoding, s, end, + XML_ACCOUNT_DIRECT); } else if (tok == XML_TOK_XML_DECL) { enum XML_Error result; result = processXmlDecl(parser, 0, start, next); if (result != XML_ERROR_NONE) return result; @@ -4014,10 +4218,18 @@ then, when this routine is entered the next time, XmlPrologTok will return XML_TOK_INVALID, since the BOM is still in the buffer */ else if (tok == XML_TOK_BOM && next == end && ! parser->m_parsingStatus.finalBuffer) { +# ifdef XML_DTD + if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, + XML_ACCOUNT_DIRECT)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } +# endif + *nextPtr = next; return XML_ERROR_NONE; } /* If we get this token, we have the start of what might be a normal tag, but not a declaration (i.e. it doesn't begin with @@ -4056,20 +4268,28 @@ break; } } /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM. However, when parsing an external subset, doProlog will not accept a BOM - as valid, and report a syntax error, so we have to skip the BOM + as valid, and report a syntax error, so we have to skip the BOM, and + account for the BOM bytes. */ else if (tok == XML_TOK_BOM) { + if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, + XML_ACCOUNT_DIRECT)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } + s = next; tok = XmlPrologTok(parser->m_encoding, s, end, &next); } parser->m_processor = prologProcessor; return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr, - (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE); + (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE, + XML_ACCOUNT_DIRECT); } static enum XML_Error PTRCALL entityValueProcessor(XML_Parser parser, const char *s, const char *end, const char **nextPtr) { @@ -4078,10 +4298,13 @@ const ENCODING *enc = parser->m_encoding; int tok; for (;;) { tok = XmlPrologTok(enc, start, end, &next); + /* Note: These bytes are accounted later in: + - storeEntityValue + */ if (tok <= 0) { if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) { *nextPtr = s; return XML_ERROR_NONE; } @@ -4095,11 +4318,11 @@ case XML_TOK_NONE: /* start == end */ default: break; } /* found end of entity value - can store it now */ - return storeEntityValue(parser, enc, s, end); + return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT); } start = next; } } @@ -4109,17 +4332,18 @@ prologProcessor(XML_Parser parser, const char *s, const char *end, const char **nextPtr) { const char *next = s; int tok = XmlPrologTok(parser->m_encoding, s, end, &next); return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr, - (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE); + (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE, + XML_ACCOUNT_DIRECT); } static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, int tok, const char *next, const char **nextPtr, XML_Bool haveMore, - XML_Bool allowClosingDoctype) { + XML_Bool allowClosingDoctype, enum XML_Account account) { #ifdef XML_DTD static const XML_Char externalSubsetName[] = {ASCII_HASH, '\0'}; #endif /* XML_DTD */ static const XML_Char atypeCDATA[] = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'}; @@ -4141,10 +4365,14 @@ static const XML_Char notationPrefix[] = {ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0'}; static const XML_Char enumValueSep[] = {ASCII_PIPE, '\0'}; static const XML_Char enumValueStart[] = {ASCII_LPAREN, '\0'}; + +#ifndef XML_DTD + UNUSED_P(account); +#endif /* save one level of indirection */ DTD *const dtd = parser->m_dtd; const char **eventPP; @@ -4206,10 +4434,23 @@ next = end; break; } } role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc); +#ifdef XML_DTD + switch (role) { + case XML_ROLE_INSTANCE_START: // bytes accounted in contentProcessor + case XML_ROLE_XML_DECL: // bytes accounted in processXmlDecl + case XML_ROLE_TEXT_DECL: // bytes accounted in processXmlDecl + break; + default: + if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } + } +#endif switch (role) { case XML_ROLE_XML_DECL: { enum XML_Error result = processXmlDecl(parser, 0, s, next); if (result != XML_ERROR_NONE) return result; @@ -4481,11 +4722,12 @@ case XML_ROLE_FIXED_ATTRIBUTE_VALUE: if (dtd->keepProcessing) { const XML_Char *attVal; enum XML_Error result = storeAttributeValue( parser, enc, parser->m_declAttributeIsCdata, - s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd->pool); + s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd->pool, + XML_ACCOUNT_NONE); if (result) return result; attVal = poolStart(&dtd->pool); poolFinish(&dtd->pool); /* ID attributes aren't allowed to have a default */ @@ -4514,12 +4756,13 @@ } } break; case XML_ROLE_ENTITY_VALUE: if (dtd->keepProcessing) { - enum XML_Error result = storeEntityValue( - parser, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); + enum XML_Error result + = storeEntityValue(parser, enc, s + enc->minBytesPerChar, + next - enc->minBytesPerChar, XML_ACCOUNT_NONE); if (parser->m_declEntity) { parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool); parser->m_declEntity->textLen = (int)(poolLength(&dtd->entityValuePool)); poolFinish(&dtd->entityValuePool); @@ -4905,16 +5148,19 @@ break; } if (parser->m_externalEntityRefHandler) { dtd->paramEntityRead = XML_FALSE; entity->open = XML_TRUE; + entityTrackingOnOpen(parser, entity, __LINE__); if (! parser->m_externalEntityRefHandler( parser->m_externalEntityRefHandlerArg, 0, entity->base, entity->systemId, entity->publicId)) { + entityTrackingOnClose(parser, entity, __LINE__); entity->open = XML_FALSE; return XML_ERROR_EXTERNAL_ENTITY_HANDLING; } + entityTrackingOnClose(parser, entity, __LINE__); entity->open = XML_FALSE; handleDefault = XML_FALSE; if (! dtd->paramEntityRead) { dtd->keepProcessing = dtd->standalone; break; @@ -5108,10 +5354,17 @@ parser->m_processor = epilogProcessor; parser->m_eventPtr = s; for (;;) { const char *next = NULL; int tok = XmlPrologTok(parser->m_encoding, s, end, &next); +#ifdef XML_DTD + if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, + XML_ACCOUNT_DIRECT)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } +#endif parser->m_eventEndPtr = next; switch (tok) { /* report partial linebreak - it might be the last token */ case -XML_TOK_PROLOG_S: if (parser->m_defaultHandler) { @@ -5181,10 +5434,13 @@ = (OPEN_INTERNAL_ENTITY *)MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY)); if (! openEntity) return XML_ERROR_NO_MEMORY; } entity->open = XML_TRUE; +#ifdef XML_DTD + entityTrackingOnOpen(parser, entity, __LINE__); +#endif entity->processed = 0; openEntity->next = parser->m_openInternalEntities; parser->m_openInternalEntities = openEntity; openEntity->entity = entity; openEntity->startTagLevel = parser->m_tagLevel; @@ -5199,21 +5455,26 @@ #ifdef XML_DTD if (entity->is_param) { int tok = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next); result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, - tok, next, &next, XML_FALSE, XML_FALSE); + tok, next, &next, XML_FALSE, XML_FALSE, + XML_ACCOUNT_ENTITY_EXPANSION); } else #endif /* XML_DTD */ result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding, - textStart, textEnd, &next, XML_FALSE); + textStart, textEnd, &next, XML_FALSE, + XML_ACCOUNT_ENTITY_EXPANSION); if (result == XML_ERROR_NONE) { if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) { entity->processed = (int)(next - textStart); parser->m_processor = internalEntityProcessor; } else { +#ifdef XML_DTD + entityTrackingOnClose(parser, entity, __LINE__); +#endif /* XML_DTD */ entity->open = XML_FALSE; parser->m_openInternalEntities = openEntity->next; /* put openEntity back in list of free instances */ openEntity->next = parser->m_freeInternalEntities; parser->m_freeInternalEntities = openEntity; @@ -5242,24 +5503,28 @@ #ifdef XML_DTD if (entity->is_param) { int tok = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next); result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, - tok, next, &next, XML_FALSE, XML_TRUE); + tok, next, &next, XML_FALSE, XML_TRUE, + XML_ACCOUNT_ENTITY_EXPANSION); } else #endif /* XML_DTD */ result = doContent(parser, openEntity->startTagLevel, parser->m_internalEncoding, textStart, textEnd, &next, - XML_FALSE); + XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION); if (result != XML_ERROR_NONE) return result; else if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) { entity->processed = (int)(next - (const char *)entity->textPtr); return result; } else { +#ifdef XML_DTD + entityTrackingOnClose(parser, entity, __LINE__); +#endif entity->open = XML_FALSE; parser->m_openInternalEntities = openEntity->next; /* put openEntity back in list of free instances */ openEntity->next = parser->m_freeInternalEntities; parser->m_freeInternalEntities = openEntity; @@ -5269,19 +5534,21 @@ if (entity->is_param) { int tok; parser->m_processor = prologProcessor; tok = XmlPrologTok(parser->m_encoding, s, end, &next); return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr, - (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE); + (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE, + XML_ACCOUNT_DIRECT); } else #endif /* XML_DTD */ { parser->m_processor = contentProcessor; /* see externalEntityContentProcessor vs contentProcessor */ return doContent(parser, parser->m_parentParser ? 1 : 0, parser->m_encoding, s, end, nextPtr, - (XML_Bool)! parser->m_parsingStatus.finalBuffer); + (XML_Bool)! parser->m_parsingStatus.finalBuffer, + XML_ACCOUNT_DIRECT); } } static enum XML_Error PTRCALL errorProcessor(XML_Parser parser, const char *s, const char *end, @@ -5292,13 +5559,14 @@ return parser->m_errorCode; } static enum XML_Error storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, - const char *ptr, const char *end, STRING_POOL *pool) { + const char *ptr, const char *end, STRING_POOL *pool, + enum XML_Account account) { enum XML_Error result - = appendAttributeValue(parser, enc, isCdata, ptr, end, pool); + = appendAttributeValue(parser, enc, isCdata, ptr, end, pool, account); if (result) return result; if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20) poolChop(pool); if (! poolAppendChar(pool, XML_T('\0'))) @@ -5306,15 +5574,27 @@ return XML_ERROR_NONE; } static enum XML_Error appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, - const char *ptr, const char *end, STRING_POOL *pool) { + const char *ptr, const char *end, STRING_POOL *pool, + enum XML_Account account) { DTD *const dtd = parser->m_dtd; /* save one level of indirection */ +#ifndef XML_DTD + UNUSED_P(account); +#endif + for (;;) { - const char *next; + const char *next + = ptr; /* XmlAttributeValueTok doesn't always set the last arg */ int tok = XmlAttributeValueTok(enc, ptr, end, &next); +#ifdef XML_DTD + if (! accountingDiffTolerated(parser, tok, ptr, next, __LINE__, account)) { + accountingOnAbort(parser); + return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + } +#endif switch (tok) { case XML_TOK_NONE: return XML_ERROR_NONE; case XML_TOK_INVALID: if (enc == parser->m_encoding) @@ -5370,10 +5650,18 @@ ENTITY *entity; char checkEntityDecl; XML_Char ch = (XML_Char)XmlPredefinedEntityName( enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar); if (ch) { +#ifdef XML_DTD + /* NOTE: We are replacing 4-6 characters original input for 1 character + * so there is no amplification and hence recording without + * protection. */ + accountingDiffTolerated(parser, tok, (char *)&ch, + ((char *)&ch) + sizeof(XML_Char), __LINE__, + XML_ACCOUNT_ENTITY_EXPANSION); +#endif /* XML_DTD */ if (! poolAppendChar(pool, ch)) return XML_ERROR_NO_MEMORY; break; } name = poolStoreString(&parser->m_temp2Pool, enc, @@ -5447,13 +5735,20 @@ return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF; } else { enum XML_Error result; const XML_Char *textEnd = entity->textPtr + entity->textLen; entity->open = XML_TRUE; +#ifdef XML_DTD + entityTrackingOnOpen(parser, entity, __LINE__); +#endif result = appendAttributeValue(parser, parser->m_internalEncoding, isCdata, (const char *)entity->textPtr, - (const char *)textEnd, pool); + (const char *)textEnd, pool, + XML_ACCOUNT_ENTITY_EXPANSION); +#ifdef XML_DTD + entityTrackingOnClose(parser, entity, __LINE__); +#endif entity->open = XML_FALSE; if (result) return result; } } break; @@ -5479,17 +5774,20 @@ /* not reached */ } static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc, - const char *entityTextPtr, const char *entityTextEnd) { + const char *entityTextPtr, const char *entityTextEnd, + enum XML_Account account) { DTD *const dtd = parser->m_dtd; /* save one level of indirection */ STRING_POOL *pool = &(dtd->entityValuePool); enum XML_Error result = XML_ERROR_NONE; #ifdef XML_DTD int oldInEntityValue = parser->m_prologState.inEntityValue; parser->m_prologState.inEntityValue = 1; +#else + UNUSED_P(account); #endif /* XML_DTD */ /* never return Null for the value argument in EntityDeclHandler, since this would indicate an external entity; therefore we have to make sure that entityValuePool.start is not null */ if (! pool->blocks) { @@ -5496,12 +5794,23 @@ if (! poolGrow(pool)) return XML_ERROR_NO_MEMORY; } for (;;) { - const char *next; + const char *next + = entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */ int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next); + +#ifdef XML_DTD + if (! accountingDiffTolerated(parser, tok, entityTextPtr, next, __LINE__, + account)) { + accountingOnAbort(parser); + result = XML_ERROR_AMPLIFICATION_LIMIT_BREACH; + goto endEntityValue; + } +#endif + switch (tok) { case XML_TOK_PARAM_ENTITY_REF: #ifdef XML_DTD if (parser->m_isParamEntity || enc != parser->m_encoding) { const XML_Char *name; @@ -5533,27 +5842,33 @@ } if (entity->systemId) { if (parser->m_externalEntityRefHandler) { dtd->paramEntityRead = XML_FALSE; entity->open = XML_TRUE; + entityTrackingOnOpen(parser, entity, __LINE__); if (! parser->m_externalEntityRefHandler( parser->m_externalEntityRefHandlerArg, 0, entity->base, entity->systemId, entity->publicId)) { + entityTrackingOnClose(parser, entity, __LINE__); entity->open = XML_FALSE; result = XML_ERROR_EXTERNAL_ENTITY_HANDLING; goto endEntityValue; } + entityTrackingOnClose(parser, entity, __LINE__); entity->open = XML_FALSE; if (! dtd->paramEntityRead) dtd->keepProcessing = dtd->standalone; } else dtd->keepProcessing = dtd->standalone; } else { entity->open = XML_TRUE; + entityTrackingOnOpen(parser, entity, __LINE__); result = storeEntityValue( parser, parser->m_internalEncoding, (const char *)entity->textPtr, - (const char *)(entity->textPtr + entity->textLen)); + (const char *)(entity->textPtr + entity->textLen), + XML_ACCOUNT_ENTITY_EXPANSION); + entityTrackingOnClose(parser, entity, __LINE__); entity->open = XML_FALSE; if (result) goto endEntityValue; } break; @@ -6910,5 +7225,757 @@ return NULL; /* Copy the original into place */ memcpy(result, s, charsRequired * sizeof(XML_Char)); return result; } + +#ifdef XML_DTD + +static float +accountingGetCurrentAmplification(XML_Parser rootParser) { + const XmlBigCount countBytesOutput + = rootParser->m_accounting.countBytesDirect + + rootParser->m_accounting.countBytesIndirect; + const float amplificationFactor + = rootParser->m_accounting.countBytesDirect + ? (countBytesOutput + / (float)(rootParser->m_accounting.countBytesDirect)) + : 1.0f; + assert(! rootParser->m_parentParser); + return amplificationFactor; +} + +static void +accountingReportStats(XML_Parser originParser, const char *epilog) { + const XML_Parser rootParser = getRootParserOf(originParser, NULL); + assert(! rootParser->m_parentParser); + + if (rootParser->m_accounting.debugLevel < 1) { + return; + } + + const float amplificationFactor + = accountingGetCurrentAmplification(rootParser); + fprintf(stderr, + "expat: Accounting(%p): Direct " EXPAT_FMT_ULL( + "10") ", indirect " EXPAT_FMT_ULL("10") ", amplification %8.2f%s", + (void *)rootParser, rootParser->m_accounting.countBytesDirect, + rootParser->m_accounting.countBytesIndirect, + (double)amplificationFactor, epilog); +} + +static void +accountingOnAbort(XML_Parser originParser) { + accountingReportStats(originParser, " ABORTING\n"); +} + +static void +accountingReportDiff(XML_Parser rootParser, + unsigned int levelsAwayFromRootParser, const char *before, + const char *after, ptrdiff_t bytesMore, int source_line, + enum XML_Account account) { + assert(! rootParser->m_parentParser); + + fprintf(stderr, + " (+" EXPAT_FMT_PTRDIFF_T("6") " bytes %s|%d, xmlparse.c:%d) %*s\"", + bytesMore, (account == XML_ACCOUNT_DIRECT) ? "DIR" : "EXP", + levelsAwayFromRootParser, source_line, 10, ""); + + const char ellipis[] = "[..]"; + const size_t ellipsisLength = sizeof(ellipis) /* because compile-time */ - 1; + const unsigned int contextLength = 10; + + /* Note: Performance is of no concern here */ + const char *walker = before; + if ((rootParser->m_accounting.debugLevel >= 3) + || (after - before) + <= (ptrdiff_t)(contextLength + ellipsisLength + contextLength)) { + for (; walker < after; walker++) { + fprintf(stderr, "%s", unsignedCharToPrintable(walker[0])); + } + } else { + for (; walker < before + contextLength; walker++) { + fprintf(stderr, "%s", unsignedCharToPrintable(walker[0])); + } + fprintf(stderr, ellipis); + walker = after - contextLength; + for (; walker < after; walker++) { + fprintf(stderr, "%s", unsignedCharToPrintable(walker[0])); + } + } + fprintf(stderr, "\"\n"); +} + +static XML_Bool +accountingDiffTolerated(XML_Parser originParser, int tok, const char *before, + const char *after, int source_line, + enum XML_Account account) { + /* Note: We need to check the token type *first* to be sure that + * we can even access variable , safely. + * E.g. for XML_TOK_NONE may hold an invalid pointer. */ + switch (tok) { + case XML_TOK_INVALID: + case XML_TOK_PARTIAL: + case XML_TOK_PARTIAL_CHAR: + case XML_TOK_NONE: + return XML_TRUE; + } + + if (account == XML_ACCOUNT_NONE) + return XML_TRUE; /* because these bytes have been accounted for, already */ + + unsigned int levelsAwayFromRootParser; + const XML_Parser rootParser + = getRootParserOf(originParser, &levelsAwayFromRootParser); + assert(! rootParser->m_parentParser); + + const int isDirect + = (account == XML_ACCOUNT_DIRECT) && (originParser == rootParser); + const ptrdiff_t bytesMore = after - before; + + XmlBigCount *const additionTarget + = isDirect ? &rootParser->m_accounting.countBytesDirect + : &rootParser->m_accounting.countBytesIndirect; + + /* Detect and avoid integer overflow */ + if (*additionTarget > (XmlBigCount)(-1) - (XmlBigCount)bytesMore) + return XML_FALSE; + *additionTarget += bytesMore; + + const XmlBigCount countBytesOutput + = rootParser->m_accounting.countBytesDirect + + rootParser->m_accounting.countBytesIndirect; + const float amplificationFactor + = accountingGetCurrentAmplification(rootParser); + const XML_Bool tolerated + = (countBytesOutput < rootParser->m_accounting.activationThresholdBytes) + || (amplificationFactor + <= rootParser->m_accounting.maximumAmplificationFactor); + + if (rootParser->m_accounting.debugLevel >= 2) { + accountingReportStats(rootParser, ""); + accountingReportDiff(rootParser, levelsAwayFromRootParser, before, after, + bytesMore, source_line, account); + } + + return tolerated; +} + +unsigned long long +testingAccountingGetCountBytesDirect(XML_Parser parser) { + if (! parser) + return 0; + return parser->m_accounting.countBytesDirect; +} + +unsigned long long +testingAccountingGetCountBytesIndirect(XML_Parser parser) { + if (! parser) + return 0; + return parser->m_accounting.countBytesIndirect; +} + +static void +entityTrackingReportStats(XML_Parser rootParser, ENTITY *entity, + const char *action, int sourceLine) { + assert(! rootParser->m_parentParser); + if (rootParser->m_entity_stats.debugLevel < 1) + return; + +# if defined(XML_UNICODE) + const char *const entityName = "[..]"; +# else + const char *const entityName = entity->name; +# endif + + fprintf( + stderr, + "expat: Entities(%p): Count %9d, depth %2d/%2d %*s%s%s; %s length %d (xmlparse.c:%d)\n", + (void *)rootParser, rootParser->m_entity_stats.countEverOpened, + rootParser->m_entity_stats.currentDepth, + rootParser->m_entity_stats.maximumDepthSeen, + (rootParser->m_entity_stats.currentDepth - 1) * 2, "", + entity->is_param ? "%" : "&", entityName, action, entity->textLen, + sourceLine); +} + +static void +entityTrackingOnOpen(XML_Parser originParser, ENTITY *entity, int sourceLine) { + const XML_Parser rootParser = getRootParserOf(originParser, NULL); + assert(! rootParser->m_parentParser); + + rootParser->m_entity_stats.countEverOpened++; + rootParser->m_entity_stats.currentDepth++; + if (rootParser->m_entity_stats.currentDepth + > rootParser->m_entity_stats.maximumDepthSeen) { + rootParser->m_entity_stats.maximumDepthSeen++; + } + + entityTrackingReportStats(rootParser, entity, "OPEN ", sourceLine); +} + +static void +entityTrackingOnClose(XML_Parser originParser, ENTITY *entity, int sourceLine) { + const XML_Parser rootParser = getRootParserOf(originParser, NULL); + assert(! rootParser->m_parentParser); + + entityTrackingReportStats(rootParser, entity, "CLOSE", sourceLine); + rootParser->m_entity_stats.currentDepth--; +} + +static XML_Parser +getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff) { + XML_Parser rootParser = parser; + unsigned int stepsTakenUpwards = 0; + while (rootParser->m_parentParser) { + rootParser = rootParser->m_parentParser; + stepsTakenUpwards++; + } + assert(! rootParser->m_parentParser); + if (outLevelDiff != NULL) { + *outLevelDiff = stepsTakenUpwards; + } + return rootParser; +} + +const char * +unsignedCharToPrintable(unsigned char c) { + switch (c) { + case 0: + return "\\0"; + case 1: + return "\\x1"; + case 2: + return "\\x2"; + case 3: + return "\\x3"; + case 4: + return "\\x4"; + case 5: + return "\\x5"; + case 6: + return "\\x6"; + case 7: + return "\\x7"; + case 8: + return "\\x8"; + case 9: + return "\\t"; + case 10: + return "\\n"; + case 11: + return "\\xB"; + case 12: + return "\\xC"; + case 13: + return "\\r"; + case 14: + return "\\xE"; + case 15: + return "\\xF"; + case 16: + return "\\x10"; + case 17: + return "\\x11"; + case 18: + return "\\x12"; + case 19: + return "\\x13"; + case 20: + return "\\x14"; + case 21: + return "\\x15"; + case 22: + return "\\x16"; + case 23: + return "\\x17"; + case 24: + return "\\x18"; + case 25: + return "\\x19"; + case 26: + return "\\x1A"; + case 27: + return "\\x1B"; + case 28: + return "\\x1C"; + case 29: + return "\\x1D"; + case 30: + return "\\x1E"; + case 31: + return "\\x1F"; + case 32: + return " "; + case 33: + return "!"; + case 34: + return "\\\""; + case 35: + return "#"; + case 36: + return "$"; + case 37: + return "%"; + case 38: + return "&"; + case 39: + return "'"; + case 40: + return "("; + case 41: + return ")"; + case 42: + return "*"; + case 43: + return "+"; + case 44: + return ","; + case 45: + return "-"; + case 46: + return "."; + case 47: + return "/"; + case 48: + return "0"; + case 49: + return "1"; + case 50: + return "2"; + case 51: + return "3"; + case 52: + return "4"; + case 53: + return "5"; + case 54: + return "6"; + case 55: + return "7"; + case 56: + return "8"; + case 57: + return "9"; + case 58: + return ":"; + case 59: + return ";"; + case 60: + return "<"; + case 61: + return "="; + case 62: + return ">"; + case 63: + return "?"; + case 64: + return "@"; + case 65: + return "A"; + case 66: + return "B"; + case 67: + return "C"; + case 68: + return "D"; + case 69: + return "E"; + case 70: + return "F"; + case 71: + return "G"; + case 72: + return "H"; + case 73: + return "I"; + case 74: + return "J"; + case 75: + return "K"; + case 76: + return "L"; + case 77: + return "M"; + case 78: + return "N"; + case 79: + return "O"; + case 80: + return "P"; + case 81: + return "Q"; + case 82: + return "R"; + case 83: + return "S"; + case 84: + return "T"; + case 85: + return "U"; + case 86: + return "V"; + case 87: + return "W"; + case 88: + return "X"; + case 89: + return "Y"; + case 90: + return "Z"; + case 91: + return "["; + case 92: + return "\\\\"; + case 93: + return "]"; + case 94: + return "^"; + case 95: + return "_"; + case 96: + return "`"; + case 97: + return "a"; + case 98: + return "b"; + case 99: + return "c"; + case 100: + return "d"; + case 101: + return "e"; + case 102: + return "f"; + case 103: + return "g"; + case 104: + return "h"; + case 105: + return "i"; + case 106: + return "j"; + case 107: + return "k"; + case 108: + return "l"; + case 109: + return "m"; + case 110: + return "n"; + case 111: + return "o"; + case 112: + return "p"; + case 113: + return "q"; + case 114: + return "r"; + case 115: + return "s"; + case 116: + return "t"; + case 117: + return "u"; + case 118: + return "v"; + case 119: + return "w"; + case 120: + return "x"; + case 121: + return "y"; + case 122: + return "z"; + case 123: + return "{"; + case 124: + return "|"; + case 125: + return "}"; + case 126: + return "~"; + case 127: + return "\\x7F"; + case 128: + return "\\x80"; + case 129: + return "\\x81"; + case 130: + return "\\x82"; + case 131: + return "\\x83"; + case 132: + return "\\x84"; + case 133: + return "\\x85"; + case 134: + return "\\x86"; + case 135: + return "\\x87"; + case 136: + return "\\x88"; + case 137: + return "\\x89"; + case 138: + return "\\x8A"; + case 139: + return "\\x8B"; + case 140: + return "\\x8C"; + case 141: + return "\\x8D"; + case 142: + return "\\x8E"; + case 143: + return "\\x8F"; + case 144: + return "\\x90"; + case 145: + return "\\x91"; + case 146: + return "\\x92"; + case 147: + return "\\x93"; + case 148: + return "\\x94"; + case 149: + return "\\x95"; + case 150: + return "\\x96"; + case 151: + return "\\x97"; + case 152: + return "\\x98"; + case 153: + return "\\x99"; + case 154: + return "\\x9A"; + case 155: + return "\\x9B"; + case 156: + return "\\x9C"; + case 157: + return "\\x9D"; + case 158: + return "\\x9E"; + case 159: + return "\\x9F"; + case 160: + return "\\xA0"; + case 161: + return "\\xA1"; + case 162: + return "\\xA2"; + case 163: + return "\\xA3"; + case 164: + return "\\xA4"; + case 165: + return "\\xA5"; + case 166: + return "\\xA6"; + case 167: + return "\\xA7"; + case 168: + return "\\xA8"; + case 169: + return "\\xA9"; + case 170: + return "\\xAA"; + case 171: + return "\\xAB"; + case 172: + return "\\xAC"; + case 173: + return "\\xAD"; + case 174: + return "\\xAE"; + case 175: + return "\\xAF"; + case 176: + return "\\xB0"; + case 177: + return "\\xB1"; + case 178: + return "\\xB2"; + case 179: + return "\\xB3"; + case 180: + return "\\xB4"; + case 181: + return "\\xB5"; + case 182: + return "\\xB6"; + case 183: + return "\\xB7"; + case 184: + return "\\xB8"; + case 185: + return "\\xB9"; + case 186: + return "\\xBA"; + case 187: + return "\\xBB"; + case 188: + return "\\xBC"; + case 189: + return "\\xBD"; + case 190: + return "\\xBE"; + case 191: + return "\\xBF"; + case 192: + return "\\xC0"; + case 193: + return "\\xC1"; + case 194: + return "\\xC2"; + case 195: + return "\\xC3"; + case 196: + return "\\xC4"; + case 197: + return "\\xC5"; + case 198: + return "\\xC6"; + case 199: + return "\\xC7"; + case 200: + return "\\xC8"; + case 201: + return "\\xC9"; + case 202: + return "\\xCA"; + case 203: + return "\\xCB"; + case 204: + return "\\xCC"; + case 205: + return "\\xCD"; + case 206: + return "\\xCE"; + case 207: + return "\\xCF"; + case 208: + return "\\xD0"; + case 209: + return "\\xD1"; + case 210: + return "\\xD2"; + case 211: + return "\\xD3"; + case 212: + return "\\xD4"; + case 213: + return "\\xD5"; + case 214: + return "\\xD6"; + case 215: + return "\\xD7"; + case 216: + return "\\xD8"; + case 217: + return "\\xD9"; + case 218: + return "\\xDA"; + case 219: + return "\\xDB"; + case 220: + return "\\xDC"; + case 221: + return "\\xDD"; + case 222: + return "\\xDE"; + case 223: + return "\\xDF"; + case 224: + return "\\xE0"; + case 225: + return "\\xE1"; + case 226: + return "\\xE2"; + case 227: + return "\\xE3"; + case 228: + return "\\xE4"; + case 229: + return "\\xE5"; + case 230: + return "\\xE6"; + case 231: + return "\\xE7"; + case 232: + return "\\xE8"; + case 233: + return "\\xE9"; + case 234: + return "\\xEA"; + case 235: + return "\\xEB"; + case 236: + return "\\xEC"; + case 237: + return "\\xED"; + case 238: + return "\\xEE"; + case 239: + return "\\xEF"; + case 240: + return "\\xF0"; + case 241: + return "\\xF1"; + case 242: + return "\\xF2"; + case 243: + return "\\xF3"; + case 244: + return "\\xF4"; + case 245: + return "\\xF5"; + case 246: + return "\\xF6"; + case 247: + return "\\xF7"; + case 248: + return "\\xF8"; + case 249: + return "\\xF9"; + case 250: + return "\\xFA"; + case 251: + return "\\xFB"; + case 252: + return "\\xFC"; + case 253: + return "\\xFD"; + case 254: + return "\\xFE"; + case 255: + return "\\xFF"; + default: + assert(0); /* never gets here */ + return "dead code"; + } + assert(0); /* never gets here */ +} + +#endif /* XML_DTD */ + +static unsigned long +getDebugLevel(const char *variableName, unsigned long defaultDebugLevel) { + const char *const valueOrNull = getenv(variableName); + if (valueOrNull == NULL) { + return defaultDebugLevel; + } + const char *const value = valueOrNull; + + errno = 0; + char *afterValue = (char *)value; + unsigned long debugLevel = strtoul(value, &afterValue, 10); + if ((errno != 0) || (afterValue[0] != '\0')) { + errno = 0; + return defaultDebugLevel; + } + + return debugLevel; +} Index: expat/xmlrole.c ================================================================== --- expat/xmlrole.c +++ expat/xmlrole.c @@ -5,11 +5,19 @@ | __// \| |_) | (_| | |_ \___/_/\_\ .__/ \__,_|\__| |_| XML parser Copyright (c) 1997-2000 Thai Open Source Software Center Ltd - Copyright (c) 2000-2017 Expat development team + Copyright (c) 2000 Clark Cooper + Copyright (c) 2002 Greg Stein + Copyright (c) 2002-2006 Karl Waclawek + Copyright (c) 2002-2003 Fred L. Drake, Jr. + Copyright (c) 2005-2009 Steven Solie + Copyright (c) 2016-2021 Sebastian Pipping + Copyright (c) 2017 Rhodri James + Copyright (c) 2019 David Loffredo + Copyright (c) 2021 Dong-hee Na Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including @@ -28,19 +36,17 @@ DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include + #include #ifdef _WIN32 # include "winconfig.h" -#else -# ifdef HAVE_EXPAT_CONFIG_H -# include -# endif -#endif /* ndef _WIN32 */ +#endif #include "expat_external.h" #include "internal.h" #include "xmlrole.h" #include "ascii.h" Index: expat/xmlrole.h ================================================================== --- expat/xmlrole.h +++ expat/xmlrole.h @@ -5,11 +5,14 @@ | __// \| |_) | (_| | |_ \___/_/\_\ .__/ \__,_|\__| |_| XML parser Copyright (c) 1997-2000 Thai Open Source Software Center Ltd - Copyright (c) 2000-2017 Expat development team + Copyright (c) 2000 Clark Cooper + Copyright (c) 2002 Karl Waclawek + Copyright (c) 2002 Fred L. Drake, Jr. + Copyright (c) 2017 Sebastian Pipping Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including Index: expat/xmltok.c ================================================================== --- expat/xmltok.c +++ expat/xmltok.c @@ -5,11 +5,24 @@ | __// \| |_) | (_| | |_ \___/_/\_\ .__/ \__,_|\__| |_| XML parser Copyright (c) 1997-2000 Thai Open Source Software Center Ltd - Copyright (c) 2000-2017 Expat development team + Copyright (c) 2000 Clark Cooper + Copyright (c) 2001-2003 Fred L. Drake, Jr. + Copyright (c) 2002 Greg Stein + Copyright (c) 2002-2016 Karl Waclawek + Copyright (c) 2005-2009 Steven Solie + Copyright (c) 2016-2021 Sebastian Pipping + Copyright (c) 2016 Pascal Cuoq + Copyright (c) 2016 Don Lewis + Copyright (c) 2017 Rhodri James + Copyright (c) 2017 Alexander Bluhm + Copyright (c) 2017 Benbuck Nason + Copyright (c) 2017 José Gutiérrez de la Concha + Copyright (c) 2019 David Loffredo + Copyright (c) 2021 Dong-hee Na Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including @@ -28,21 +41,19 @@ DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include + #include #include /* memcpy */ #include #ifdef _WIN32 # include "winconfig.h" -#else -# ifdef HAVE_EXPAT_CONFIG_H -# include -# endif -#endif /* ndef _WIN32 */ +#endif #include "expat_external.h" #include "internal.h" #include "xmltok.h" #include "nametab.h" @@ -259,12 +270,18 @@ # define BYTE_TO_ASCII(enc, p) (*(p)) #endif #define IS_NAME_CHAR(enc, p, n) (AS_NORMAL_ENCODING(enc)->isName##n(enc, p)) #define IS_NMSTRT_CHAR(enc, p, n) (AS_NORMAL_ENCODING(enc)->isNmstrt##n(enc, p)) -#define IS_INVALID_CHAR(enc, p, n) \ - (AS_NORMAL_ENCODING(enc)->isInvalid##n(enc, p)) +#ifdef XML_MIN_SIZE +# define IS_INVALID_CHAR(enc, p, n) \ + (AS_NORMAL_ENCODING(enc)->isInvalid##n \ + && AS_NORMAL_ENCODING(enc)->isInvalid##n(enc, p)) +#else +# define IS_INVALID_CHAR(enc, p, n) \ + (AS_NORMAL_ENCODING(enc)->isInvalid##n(enc, p)) +#endif #ifdef XML_MIN_SIZE # define IS_NAME_CHAR_MINBPC(enc, p) \ (AS_NORMAL_ENCODING(enc)->isNameMin(enc, p)) # define IS_NMSTRT_CHAR_MINBPC(enc, p) \ Index: expat/xmltok.h ================================================================== --- expat/xmltok.h +++ expat/xmltok.h @@ -5,11 +5,15 @@ | __// \| |_) | (_| | |_ \___/_/\_\ .__/ \__,_|\__| |_| XML parser Copyright (c) 1997-2000 Thai Open Source Software Center Ltd - Copyright (c) 2000-2017 Expat development team + Copyright (c) 2000 Clark Cooper + Copyright (c) 2002 Fred L. Drake, Jr. + Copyright (c) 2002-2005 Karl Waclawek + Copyright (c) 2016-2017 Sebastian Pipping + Copyright (c) 2017 Rhodri James Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including Index: expat/xmltok_impl.c ================================================================== --- expat/xmltok_impl.c +++ expat/xmltok_impl.c @@ -1,15 +1,23 @@ -/* This file is included! +/* This file is included (from xmltok.c, 1-3 times depending on XML_MIN_SIZE)! __ __ _ ___\ \/ /_ __ __ _| |_ / _ \\ /| '_ \ / _` | __| | __// \| |_) | (_| | |_ \___/_/\_\ .__/ \__,_|\__| |_| XML parser Copyright (c) 1997-2000 Thai Open Source Software Center Ltd - Copyright (c) 2000-2017 Expat development team + Copyright (c) 2000 Clark Cooper + Copyright (c) 2002 Fred L. Drake, Jr. + Copyright (c) 2002-2016 Karl Waclawek + Copyright (c) 2016-2021 Sebastian Pipping + Copyright (c) 2017 Rhodri James + Copyright (c) 2018 Benjamin Peterson + Copyright (c) 2018 Anton Maklakov + Copyright (c) 2019 David Loffredo + Copyright (c) 2020 Boris Kolpackov Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including @@ -30,11 +38,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifdef XML_TOK_IMPL_C -# ifndef IS_INVALID_CHAR +# ifndef IS_INVALID_CHAR // i.e. for UTF-16 and XML_MIN_SIZE not defined # define IS_INVALID_CHAR(enc, ptr, n) (0) # endif # define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \ case BT_LEAD##n: \ Index: expat/xmltok_impl.h ================================================================== --- expat/xmltok_impl.h +++ expat/xmltok_impl.h @@ -5,11 +5,12 @@ | __// \| |_) | (_| | |_ \___/_/\_\ .__/ \__,_|\__| |_| XML parser Copyright (c) 1997-2000 Thai Open Source Software Center Ltd - Copyright (c) 2000-2017 Expat development team + Copyright (c) 2000 Clark Cooper + Copyright (c) 2017-2019 Sebastian Pipping Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including Index: expat/xmltok_ns.c ================================================================== --- expat/xmltok_ns.c +++ expat/xmltok_ns.c @@ -5,11 +5,15 @@ | __// \| |_) | (_| | |_ \___/_/\_\ .__/ \__,_|\__| |_| XML parser Copyright (c) 1997-2000 Thai Open Source Software Center Ltd - Copyright (c) 2000-2017 Expat development team + Copyright (c) 2000 Clark Cooper + Copyright (c) 2002 Greg Stein + Copyright (c) 2002 Fred L. Drake, Jr. + Copyright (c) 2002-2006 Karl Waclawek + Copyright (c) 2017 Sebastian Pipping Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including Index: generic/tcldom.c ================================================================== --- generic/tcldom.c +++ generic/tcldom.c @@ -168,24 +168,29 @@ /*---------------------------------------------------------------------------- | Module Globals | \---------------------------------------------------------------------------*/ #ifndef TCL_THREADS - static int storeLineColumn = 0; - static int dontCreateObjCommands = 0; - static int dontCheckCharData = 0; - static int dontCheckName = 0; - static int domCreateCmdMode = 0; + static int storeLineColumn = 0; + static int dontCreateObjCommands = 0; + static int dontCheckCharData = 0; + static int dontCheckName = 0; + static int domCreateCmdMode = 0; + /* BLAP stands for BillionLaughsAttackProtection */ + static float BLAPMaximumAmplification; + unsigned long long BLAPActivationThreshold; # define TSD(x) x # define GetTcldomTSD() #else typedef struct ThreadSpecificData { int storeLineColumn; int dontCreateObjCommands; int dontCheckCharData; int dontCheckName; int domCreateCmdMode; + float BLAPMaximumAmplification; + unsigned long long BLAPActivationThreshold; } ThreadSpecificData; static Tcl_ThreadDataKey dataKey; static Tcl_HashTable sharedDocs; static Tcl_Mutex tableMutex; static int tcldomInitialized;