Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Added the isHTML5CustomName method to the dom command. The simple HTML reader now accept HTML5 custom element names. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA3-256: |
9d8f28621bbf19a174ca8e3bb6c02ffa |
User & Date: | rolf 2024-07-02 22:08:25 |
Context
2024-07-02
| ||
22:09 | Updated the CHANGES file. check-in: d02dcd66ea user: rolf tags: trunk | |
22:08 | Added the isHTML5CustomName method to the dom command. The simple HTML reader now accept HTML5 custom element names. check-in: 9d8f28621b user: rolf tags: trunk | |
21:58 | Added the isHTML5CustomName method to the dom command. The simple HTML reader Uses the machinery to accept HTML5 custom element names. Closed-Leaf check-in: a2c9f83840 user: rolf tags: HTML5CustomNames | |
2024-07-01
| ||
11:42 | Use the enhanced encoding capabilities of Tcl 9 better. And more changes as result of TIP 699. check-in: 0058351450 user: rolf tags: trunk | |
Changes
Changes to generic/dom.c.
︙ | ︙ | |||
351 352 353 354 355 356 357 358 359 360 361 362 363 364 | if (!clen) return 0; if (UTF8_XMLCHAR((unsigned const char *)p,clen)) p += clen; else return 0; } return 1; } /*--------------------------------------------------------------------------- | domClearString | \--------------------------------------------------------------------------*/ void domClearString ( | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 | if (!clen) return 0; if (UTF8_XMLCHAR((unsigned const char *)p,clen)) p += clen; else return 0; } return 1; } /*--------------------------------------------------------------------------- | domIsHTML5CustomName | \--------------------------------------------------------------------------*/ int domIsHTML5CustomName ( const char *str ) { const char *p; int clen, dashseen = 0; Tcl_UniChar uniChar; p = str; if (*p < 'a' || *p > 'z') { return 0; } p++; while (*p) { clen = UTF8_CHAR_LEN(*p); if (clen == 0) return 0; if (clen == 1) { if (*p == '-') { dashseen = 1; p++; } else { if (*p == '.' || (*p >= '0' && *p <= '9') || *p == '_' || (*p >= 'a' && *p <= 'z')) { p++; } else { return 0; } } continue; } if (clen == 4) { p += clen; continue; } clen = Tcl_UtfToUniChar (p, &uniChar); if (uniChar == 0xB7 || (uniChar >= 0xC0 && uniChar <= 0xD6) || (uniChar >= 0xD8 && uniChar <= 0xF6) || (uniChar >= 0xF8 && uniChar <= 0x37D) || (uniChar >= 0x37F && uniChar <= 0x1FFF) || (uniChar >= 0x200C && uniChar <= 0x200D) || (uniChar >= 0x203F && uniChar <= 0x2040) || (uniChar >= 0x2070 && uniChar <= 0x218F) || (uniChar >= 0x2C00 && uniChar <= 0x2FEF) || (uniChar >= 0x3001 && uniChar <= 0xD7FF) || (uniChar >= 0xF900 && uniChar <= 0xFDCF) || (uniChar >= 0xFDF0 && uniChar <= 0xFFFD)) { p += clen; } else { return 0; } } if (!dashseen) return 0; switch (str[0]) { case 'a': if (!strcmp(str,"annotation-xml")) {return 0;} break; case 'c': if (!strcmp(str,"color-profile")) {return 0;} break; case 'f': if (!strcmp(str,"font-face") || !strcmp(str,"font-face-src") || !strcmp(str,"font-face-uri") || !strcmp(str,"font-face-format") || !strcmp(str,"font-face-name")) {return 0;} break; case 'm': if (!strcmp(str,"missing-glyph")) {return 0;} break; } return 1; } /*--------------------------------------------------------------------------- | domClearString | \--------------------------------------------------------------------------*/ void domClearString ( |
︙ | ︙ |
Changes to generic/dom.h.
︙ | ︙ | |||
912 913 914 915 916 917 918 919 920 921 922 923 924 925 | int domIsChar (const char *str); void domClearString (char *str, char *replacement, domLength repllen, Tcl_DString *clearedstr, int *changed); int domIsBMPChar (const char *str); int domIsComment (const char *str); int domIsCDATA (const char *str); int domIsPIValue (const char *str); void domCopyTo (domNode *node, domNode *parent, int copyNS); void domCopyNS (domNode *from, domNode *to); domAttrNode * domCreateXMLNamespaceNode (domNode *parent); void domRenumberTree (domNode *node); int domPrecedes (domNode *node, domNode *other); void domNormalize (domNode *node, int forXPath, domFreeCallback freeCB, void *clientData); | > | 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 | int domIsChar (const char *str); void domClearString (char *str, char *replacement, domLength repllen, Tcl_DString *clearedstr, int *changed); int domIsBMPChar (const char *str); int domIsComment (const char *str); int domIsCDATA (const char *str); int domIsPIValue (const char *str); int domIsHTML5CustomName (const char *str); void domCopyTo (domNode *node, domNode *parent, int copyNS); void domCopyNS (domNode *from, domNode *to); domAttrNode * domCreateXMLNamespaceNode (domNode *parent); void domRenumberTree (domNode *node); int domPrecedes (domNode *node, domNode *other); void domNormalize (domNode *node, int forXPath, domFreeCallback freeCB, void *clientData); |
︙ | ︙ |
Changes to generic/domhtml.c.
︙ | ︙ | |||
2555 2556 2557 2558 2559 2560 2561 | int hasContent; domNode *pnode; domNode *node = NULL, *parent_node = parent; domTextNode *tnode; domAttrNode *attrnode, *lastAttr; int ampersandSeen = 0; int only_whites = 0; | | | 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 | int hasContent; domNode *pnode; domNode *node = NULL, *parent_node = parent; domTextNode *tnode; domAttrNode *attrnode, *lastAttr; int ampersandSeen = 0; int only_whites = 0; int hnew, autoclose, ignore, maybeCustomName, rc; char tmp[250], *y = NULL; Tcl_HashEntry *h; domProcessingInstructionNode *pinode; x = &(html[*pos]); while ( (c=*x)!=0 ) { |
︙ | ︙ | |||
2974 2975 2976 2977 2978 2979 2980 2981 | } /*---------------------------------------------------------------- | new tag/element | \---------------------------------------------------------------*/ while ((c=*x)!=0 && c!='/' && c!='>' && c!='<' && !SPACE(c) ) { | > | > > > > > > > | | > > > > > > > > > > | 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 | } /*---------------------------------------------------------------- | new tag/element | \---------------------------------------------------------------*/ maybeCustomName = 0; while ((c=*x)!=0 && c!='/' && c!='>' && c!='<' && !SPACE(c) ) { if (!maybeCustomName && !isalnum(c)) maybeCustomName = 1; x++; } if (!maybeCustomName) { c = *x; *x = '\0'; /* temporarily terminate the string */ x = start + 1; while (*x) { *x = tolower(*x); x++; } *x = c; } else { c = *x; *x = '\0'; /* temporarily terminate the string */ rc = domIsHTML5CustomName (start+1); *x = c; if (!rc) { goto readText; } } hasContent = 1; if (c==0) { RetError("Missing \">\"",(start-html) ); } if ( (x-start)==1) { RetError("Null markup name",(start-html) ); |
︙ | ︙ |
Changes to generic/tcldom.c.
︙ | ︙ | |||
224 225 226 227 228 229 230 231 232 233 234 235 236 237 | " isComment string \n" " isCDATA string \n" " isPIValue string \n" " isName string \n" " isQName string \n" " isNCName string \n" " isPIName string \n" " featureinfo feature \n" ; static char doc_usage[] = "Usage domDoc <method> <args>, where method can be:\n" " documentElement ?objVar? \n" " getElementsByTagName name \n" | > | 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 | " isComment string \n" " isCDATA string \n" " isPIValue string \n" " isName string \n" " isQName string \n" " isNCName string \n" " isPIName string \n" " isHTML5CustomName string \n" " featureinfo feature \n" ; static char doc_usage[] = "Usage domDoc <method> <args>, where method can be:\n" " documentElement ?objVar? \n" " getElementsByTagName name \n" |
︙ | ︙ | |||
7630 7631 7632 7633 7634 7635 7636 7637 7638 7639 7640 7641 7642 7643 7644 7645 7646 7647 7648 | "createDocument", "createDocumentNS", "createNodeCmd", "parse", "setStoreLineColumn", "isCharData", "isName", "isPIName", "isQName", "isComment", "isCDATA", "isPIValue", "isNCName", "createDocumentNode", "setNameCheck", "setTextCheck", "setObjectCommands", "featureinfo", "isBMPCharData", "clearString", #ifdef TCL_THREADS "attachDocument", "detachDocument", #endif NULL }; enum domMethod { m_createDocument, m_createDocumentNS, m_createNodeCmd, m_parse, m_setStoreLineColumn, m_isCharData, m_isName, m_isPIName, m_isQName, m_isComment, m_isCDATA, m_isPIValue, m_isNCName, m_createDocumentNode, m_setNameCheck, m_setTextCheck, m_setObjectCommands, | > | > | 7631 7632 7633 7634 7635 7636 7637 7638 7639 7640 7641 7642 7643 7644 7645 7646 7647 7648 7649 7650 7651 7652 7653 7654 7655 7656 7657 7658 7659 | "createDocument", "createDocumentNS", "createNodeCmd", "parse", "setStoreLineColumn", "isCharData", "isName", "isPIName", "isQName", "isComment", "isCDATA", "isPIValue", "isNCName", "createDocumentNode", "setNameCheck", "setTextCheck", "setObjectCommands", "featureinfo", "isBMPCharData", "clearString", "isHTML5CustomName", #ifdef TCL_THREADS "attachDocument", "detachDocument", #endif NULL }; enum domMethod { m_createDocument, m_createDocumentNS, m_createNodeCmd, m_parse, m_setStoreLineColumn, m_isCharData, m_isName, m_isPIName, m_isQName, m_isComment, m_isCDATA, m_isPIValue, m_isNCName, m_createDocumentNode, m_setNameCheck, m_setTextCheck, m_setObjectCommands, m_featureinfo, m_isBMPCharData, m_clearString, m_isHTML5CustomName #ifdef TCL_THREADS ,m_attachDocument, m_detachDocument #endif }; static const char *nodeModeValues[] = { "automatic", "command", "token", NULL |
︙ | ︙ | |||
7880 7881 7882 7883 7884 7885 7886 7887 7888 7889 7890 7891 7892 7893 | return tcldom_featureinfo(clientData, interp, --objc, objv+1); case m_isBMPCharData: CheckArgs(3,3,2,"string"); SetBooleanResult(domIsBMPChar(Tcl_GetString(objv[2]))); return TCL_OK; case m_clearString: CheckArgs(3,5,2,"?-replace ?replacement?? string"); if (objc >= 4) { option = Tcl_GetString (objv[2]); if (option[0] == '-' && option[1] == 'r') { if (Tcl_GetIndexFromObj (interp, objv[2], clearStringOptions, "option", | > > > > > | 7883 7884 7885 7886 7887 7888 7889 7890 7891 7892 7893 7894 7895 7896 7897 7898 7899 7900 7901 | return tcldom_featureinfo(clientData, interp, --objc, objv+1); case m_isBMPCharData: CheckArgs(3,3,2,"string"); SetBooleanResult(domIsBMPChar(Tcl_GetString(objv[2]))); return TCL_OK; case m_isHTML5CustomName: CheckArgs(3,3,2,"string"); SetBooleanResult(domIsHTML5CustomName(Tcl_GetString(objv[2]))); return TCL_OK; case m_clearString: CheckArgs(3,5,2,"?-replace ?replacement?? string"); if (objc >= 4) { option = Tcl_GetString (objv[2]); if (option[0] == '-' && option[1] == 'r') { if (Tcl_GetIndexFromObj (interp, objv[2], clearStringOptions, "option", |
︙ | ︙ |
Changes to tests/dom.test.
1 2 3 4 5 6 7 | # Features covered: dom command # # This file contains a collection of tests for the dom command of # tDOM. # # dom-1.*: createDocument, createDocumentNS # dom-2.*: parse | | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 | # Features covered: dom command # # This file contains a collection of tests for the dom command of # tDOM. # # dom-1.*: createDocument, createDocumentNS # dom-2.*: parse # dom-3.*: is* methods, clearString # dom-4.*: parse -useForeignDTD # dom-5.*: external entities # dom-6.*: use in slave interpreter # dom-7.*: setNameCheck, setTextCheck # dom-8.*: createDocumentNode, documentNodes # dom-9.*: setObjectCommands # dom-10.*: createNodeCmd |
︙ | ︙ | |||
1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 | \u0004\u0005\u0001 } { lappend result [dom clearString -replace some $str] } set result } [list some asome someb asomeb asomesomeb asomecsomeb asomedsomesomeb asomedsomesomesomesome_foo_bar somesomesome_foo_bar_bazsome_didumsome somesomesome_foo_bar_bazsome_didum\uE000 somesomesome abc somesomesome] test dom-4.1 {-useForeignDTD 0} { set doc [dom parse -useForeignDTD 0 {<root/>}] $doc delete } {} test dom-4.2 {-useForeignDTD 1 with document with internal subset} {need_uri} { set baseURI [tdom::baseURL [file join [pwd] [file dir [info script]] dom.test]] | > > > > > > > > > > > > > > > > > > > > > > > > > > | 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 | \u0004\u0005\u0001 } { lappend result [dom clearString -replace some $str] } set result } [list some asome someb asomeb asomesomeb asomecsomeb asomedsomesomeb asomedsomesomesomesome_foo_bar somesomesome_foo_bar_bazsome_didumsome somesomesome_foo_bar_bazsome_didum\uE000 somesomesome abc somesomesome] test dom-3.46 {isHTML5CustomName} { set result [list] foreach str { abc ab-c ab-C -ab äb-c ab---c ab- ab-ᴨ aÄ-b aA-b aä-b 0n-foo A-b font-face m\u00B7-. n-\uFDF0 o-\u3000 } { lappend result [dom isHTML5CustomName $str] } set result } {0 1 0 0 0 1 1 1 1 0 1 0 0 0 1 1 0} test dom-4.1 {-useForeignDTD 0} { set doc [dom parse -useForeignDTD 0 {<root/>}] $doc delete } {} test dom-4.2 {-useForeignDTD 1 with document with internal subset} {need_uri} { set baseURI [tdom::baseURL [file join [pwd] [file dir [info script]] dom.test]] |
︙ | ︙ |
Changes to tests/htmlreader.test.
︙ | ︙ | |||
2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 | <head><title></title></head><body><form><select id="L" name="nls_language"> <option value="">-- </option><option selected="selected" value="en_US">en_US </option><option value="de_DE">de_DE </option> </select></form></body> </html>} test html-3.1 {Bad data} { set data {line 6 column 17 - Warning: <script> lacks "type" attribute line 10 column 17 - Warning: <script> lacks "type" attribute line 11 column 17 - Warning: <table> lacks "summary" attribute} set doc [dom parse -html $data] set result [$doc asHTML] | > > > > > > > > > > > > > > > > > > > > > > | 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 | <head><title></title></head><body><form><select id="L" name="nls_language"> <option value="">-- </option><option selected="selected" value="en_US">en_US </option><option value="de_DE">de_DE </option> </select></form></body> </html>} test html-2.9 {HTML parsing} {knownBug} { set doc [dom parse -html {<HTML><MATH-field>x = 1 + 1</MATH-field></HTML>}] set result [$doc asHTML] $doc delete set result } {<html><MATH-field>x = 1 + 1</MATH-field></html>} test html-2.10 {HTML parsing} { set doc [dom parse -html {<HTML><mytag>foo</mytag></HTML>}] set result [$doc asHTML] $doc delete set result } {<html><mytag>foo</mytag></html>} test html-2.11 {HTML parsing} { set doc [dom parse -html {<HTML><math-field>x = 1 + 1</math-field></HTML>}] set result [$doc asHTML] $doc delete set result } {<html><math-field>x = 1 + 1</math-field></html>} test html-3.1 {Bad data} { set data {line 6 column 17 - Warning: <script> lacks "type" attribute line 10 column 17 - Warning: <script> lacks "type" attribute line 11 column 17 - Warning: <table> lacks "summary" attribute} set doc [dom parse -html $data] set result [$doc asHTML] |
︙ | ︙ |