Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Added code which handles outside BMP codepoints even for Tcl 8.6 in case of entity serialization. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | wip |
Files: | files | file ages | folders |
SHA3-256: |
96d934bb241f1e2aace3ce0a82fc9bc6 |
User & Date: | rolf 2024-06-26 01:32:29 |
Context
2024-06-26
| ||
01:48 | Updated to HTML 5 entities also for serializating with -htmlEntities. Closed-Leaf check-in: cb90628bb3 user: rolf tags: HTML5Entities | |
01:32 | Added code which handles outside BMP codepoints even for Tcl 8.6 in case of entity serialization. Closed-Leaf check-in: 96d934bb24 user: rolf tags: wip | |
2024-06-24
| ||
23:39 | Merged from trunk. check-in: 91f818a493 user: rolf tags: wip | |
Changes
Changes to generic/tcldom.c.
︙ | ︙ | |||
2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 | objv[0] = name; objv[1] = attrsList; objv[2] = childList; return Tcl_NewListObj(3, objv); } /*---------------------------------------------------------------------------- | tcldom_AppendEscaped | \---------------------------------------------------------------------------*/ static void tcldom_AppendEscaped ( Tcl_Obj *xmlString, Tcl_Channel chan, char *value, domLength value_length, int outputFlags ) { #define APESC_BUF_SIZE 512 #define AP(c) *b++ = c; #define AE(s) pc1 = s; while(*pc1) *b++ = *pc1++; #define TWOCPE clen2 = UTF8_CHAR_LEN(*(pc+clen)); \ | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | | | 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 | objv[0] = name; objv[1] = attrsList; objv[2] = childList; return Tcl_NewListObj(3, objv); } #if TCL_MAJOR_VERSION < 9 static int tcldom_UtfToUniChar ( const char *src, int *uniChar ) { int clen; Tcl_UniChar uni16; clen = UTF8_CHAR_LEN(*src); if (clen && clen < 4) { clen = Tcl_UtfToUniChar (src, &uni16); *uniChar = uni16; return clen; } else if (clen == 4) { /* This resembles exactly what Tcl 9 does */ if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80) && ((src[3] & 0xC0) == 0x80)) { /* * Four-byte-character lead byte followed by three trail bytes. */ *uniChar = (((src[0] & 0x07) << 18) | ((src[1] & 0x3F) << 12) | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F)); if ((unsigned)(*uniChar - 0x10000) <= 0xFFFFF) { return 4; } } } *uniChar = src[0]; return 1; } #else # define tcldom_UtfToUniChar Tcl_UtfToUniChar #endif /*---------------------------------------------------------------------------- | tcldom_AppendEscaped | \---------------------------------------------------------------------------*/ static void tcldom_AppendEscaped ( Tcl_Obj *xmlString, Tcl_Channel chan, char *value, domLength value_length, int outputFlags ) { #define APESC_BUF_SIZE 512 #define AP(c) *b++ = c; #define AE(s) pc1 = s; while(*pc1) *b++ = *pc1++; #define TWOCPE clen2 = UTF8_CHAR_LEN(*(pc+clen)); \ if (clen) tcldom_UtfToUniChar(pc+clen, &uniChar2); #define MCP pc += clen; clen = clen2; char buf[APESC_BUF_SIZE+80], *b, *bLimit, *pc, *pc1, *pEnd, charRef[10]; int charDone, i; int clen = 0, clen2 = 0; int unicode; int uniChar, uniChar2; b = buf; bLimit = b + APESC_BUF_SIZE; pc = pEnd = value; if (value_length != -1) { pEnd = pc + value_length; } |
︙ | ︙ | |||
2437 2438 2439 2440 2441 2442 2443 | AP('&') AP('#') AP('x') AP('9') AP(';') } else { charDone = 0; clen = UTF8_CHAR_LEN(*pc); if (outputFlags & SERIALIZE_HTML_ENTITIES) { charDone = 1; | | | 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 | AP('&') AP('#') AP('x') AP('9') AP(';') } else { charDone = 0; clen = UTF8_CHAR_LEN(*pc); if (outputFlags & SERIALIZE_HTML_ENTITIES) { charDone = 1; tcldom_UtfToUniChar(pc, &uniChar); switch (uniChar) { #include "HTML5ent.inc" default: charDone = 0; } if (charDone) { pc += (clen - 1); } |
︙ | ︙ |
Changes to tests/domDoc.test.
︙ | ︙ | |||
93 94 95 96 97 98 99 100 101 102 103 104 105 106 | test domDoc-1.3.4 {asHTML -htmlEntities} { set doc [dom parse -html "<html>≪̸≪≪⃒\u226A\u20D2</html>"] set result [$doc asHTML -htmlEntities] $doc delete set result } {<html>≪̸≪≪⃒≪⃒</html>} set doc [dom parse <root/>] test domDoc-1.4 {asXML -doctypeDeclaration} { $doc asXML -doctypeDeclaration 1 } {<!DOCTYPE root> <root/> | > > > > > > > | 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 | test domDoc-1.3.4 {asHTML -htmlEntities} { set doc [dom parse -html "<html>≪̸≪≪⃒\u226A\u20D2</html>"] set result [$doc asHTML -htmlEntities] $doc delete set result } {<html>≪̸≪≪⃒≪⃒</html>} test domDoc-1.3.3 {asHTML -htmlEntities} { set doc [dom parse -html "<html>𝕫</html>"] set result [$doc asHTML -htmlEntities] $doc delete set result } {<html>𝕫</html>} set doc [dom parse <root/>] test domDoc-1.4 {asXML -doctypeDeclaration} { $doc asXML -doctypeDeclaration 1 } {<!DOCTYPE root> <root/> |
︙ | ︙ |