Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Added the isHTML5CustomName method to the dom command. The simple HTML reader now accept HTML5 custom element names. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA3-256: |
9d8f28621bbf19a174ca8e3bb6c02ffa |
User & Date: | rolf 2024-07-02 22:08:25 |
Context
2024-07-02
| ||
22:09 | Updated the CHANGES file. check-in: d02dcd66ea user: rolf tags: trunk | |
22:08 | Added the isHTML5CustomName method to the dom command. The simple HTML reader now accept HTML5 custom element names. check-in: 9d8f28621b user: rolf tags: trunk | |
21:58 | Added the isHTML5CustomName method to the dom command. The simple HTML reader Uses the machinery to accept HTML5 custom element names. Closed-Leaf check-in: a2c9f83840 user: rolf tags: HTML5CustomNames | |
2024-07-01
| ||
11:42 | Use the enhanced encoding capabilities of Tcl 9 better. And more changes as result of TIP 699. check-in: 0058351450 user: rolf tags: trunk | |
Changes
Changes to generic/dom.c.
351 352 353 354 355 356 357 358 359 360 361 362 363 364 |
if (!clen) return 0; if (UTF8_XMLCHAR((unsigned const char *)p,clen)) p += clen; else return 0; } return 1; } /*--------------------------------------------------------------------------- | domClearString | \--------------------------------------------------------------------------*/ void domClearString ( |
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > |
351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 |
if (!clen) return 0; if (UTF8_XMLCHAR((unsigned const char *)p,clen)) p += clen; else return 0; } return 1; } /*--------------------------------------------------------------------------- | domIsHTML5CustomName | \--------------------------------------------------------------------------*/ int domIsHTML5CustomName ( const char *str ) { const char *p; int clen, dashseen = 0; Tcl_UniChar uniChar; p = str; if (*p < 'a' || *p > 'z') { return 0; } p++; while (*p) { clen = UTF8_CHAR_LEN(*p); if (clen == 0) return 0; if (clen == 1) { if (*p == '-') { dashseen = 1; p++; } else { if (*p == '.' || (*p >= '0' && *p <= '9') || *p == '_' || (*p >= 'a' && *p <= 'z')) { p++; } else { return 0; } } continue; } if (clen == 4) { p += clen; continue; } clen = Tcl_UtfToUniChar (p, &uniChar); if (uniChar == 0xB7 || (uniChar >= 0xC0 && uniChar <= 0xD6) || (uniChar >= 0xD8 && uniChar <= 0xF6) || (uniChar >= 0xF8 && uniChar <= 0x37D) || (uniChar >= 0x37F && uniChar <= 0x1FFF) || (uniChar >= 0x200C && uniChar <= 0x200D) || (uniChar >= 0x203F && uniChar <= 0x2040) || (uniChar >= 0x2070 && uniChar <= 0x218F) || (uniChar >= 0x2C00 && uniChar <= 0x2FEF) || (uniChar >= 0x3001 && uniChar <= 0xD7FF) || (uniChar >= 0xF900 && uniChar <= 0xFDCF) || (uniChar >= 0xFDF0 && uniChar <= 0xFFFD)) { p += clen; } else { return 0; } } if (!dashseen) return 0; switch (str[0]) { case 'a': if (!strcmp(str,"annotation-xml")) {return 0;} break; case 'c': if (!strcmp(str,"color-profile")) {return 0;} break; case 'f': if (!strcmp(str,"font-face") || !strcmp(str,"font-face-src") || !strcmp(str,"font-face-uri") || !strcmp(str,"font-face-format") || !strcmp(str,"font-face-name")) {return 0;} break; case 'm': if (!strcmp(str,"missing-glyph")) {return 0;} break; } return 1; } /*--------------------------------------------------------------------------- | domClearString | \--------------------------------------------------------------------------*/ void domClearString ( |
Changes to generic/dom.h.
912 913 914 915 916 917 918 919 920 921 922 923 924 925 |
int domIsChar (const char *str); void domClearString (char *str, char *replacement, domLength repllen, Tcl_DString *clearedstr, int *changed); int domIsBMPChar (const char *str); int domIsComment (const char *str); int domIsCDATA (const char *str); int domIsPIValue (const char *str); void domCopyTo (domNode *node, domNode *parent, int copyNS); void domCopyNS (domNode *from, domNode *to); domAttrNode * domCreateXMLNamespaceNode (domNode *parent); void domRenumberTree (domNode *node); int domPrecedes (domNode *node, domNode *other); void domNormalize (domNode *node, int forXPath, domFreeCallback freeCB, void *clientData); |
> |
912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 |
int domIsChar (const char *str);
void domClearString (char *str, char *replacement, domLength repllen,
Tcl_DString *clearedstr, int *changed);
int domIsBMPChar (const char *str);
int domIsComment (const char *str);
int domIsCDATA (const char *str);
int domIsPIValue (const char *str);
int domIsHTML5CustomName (const char *str);
void domCopyTo (domNode *node, domNode *parent, int copyNS);
void domCopyNS (domNode *from, domNode *to);
domAttrNode * domCreateXMLNamespaceNode (domNode *parent);
void domRenumberTree (domNode *node);
int domPrecedes (domNode *node, domNode *other);
void domNormalize (domNode *node, int forXPath,
domFreeCallback freeCB, void *clientData);
|
Changes to generic/domhtml.c.
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
....
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
|
int hasContent; domNode *pnode; domNode *node = NULL, *parent_node = parent; domTextNode *tnode; domAttrNode *attrnode, *lastAttr; int ampersandSeen = 0; int only_whites = 0; int hnew, autoclose, ignore; char tmp[250], *y = NULL; Tcl_HashEntry *h; domProcessingInstructionNode *pinode; x = &(html[*pos]); while ( (c=*x)!=0 ) { ................................................................................ } /*---------------------------------------------------------------- | new tag/element | \---------------------------------------------------------------*/ while ((c=*x)!=0 && c!='/' && c!='>' && c!='<' && !SPACE(c) ) { if (!isalnum(c)) goto readText; *x = tolower(c); x++; } hasContent = 1; if (c==0) { RetError("Missing \">\"",(start-html) ); } if ( (x-start)==1) { RetError("Null markup name",(start-html) ); |
|
>
<
>
>
>
>
>
>
>
>
|
|
>
>
>
>
>
>
>
>
>
>
|
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
....
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
|
int hasContent; domNode *pnode; domNode *node = NULL, *parent_node = parent; domTextNode *tnode; domAttrNode *attrnode, *lastAttr; int ampersandSeen = 0; int only_whites = 0; int hnew, autoclose, ignore, maybeCustomName, rc; char tmp[250], *y = NULL; Tcl_HashEntry *h; domProcessingInstructionNode *pinode; x = &(html[*pos]); while ( (c=*x)!=0 ) { ................................................................................ } /*---------------------------------------------------------------- | new tag/element | \---------------------------------------------------------------*/ maybeCustomName = 0; while ((c=*x)!=0 && c!='/' && c!='>' && c!='<' && !SPACE(c) ) { if (!maybeCustomName && !isalnum(c)) maybeCustomName = 1; x++; } if (!maybeCustomName) { c = *x; *x = '\0'; /* temporarily terminate the string */ x = start + 1; while (*x) { *x = tolower(*x); x++; } *x = c; } else { c = *x; *x = '\0'; /* temporarily terminate the string */ rc = domIsHTML5CustomName (start+1); *x = c; if (!rc) { goto readText; } } hasContent = 1; if (c==0) { RetError("Missing \">\"",(start-html) ); } if ( (x-start)==1) { RetError("Null markup name",(start-html) ); |
Changes to generic/tcldom.c.
224 225 226 227 228 229 230 231 232 233 234 235 236 237 .... 7630 7631 7632 7633 7634 7635 7636 7637 7638 7639 7640 7641 7642 7643 7644 7645 7646 7647 7648 7649 7650 7651 7652 7653 7654 7655 7656 .... 7880 7881 7882 7883 7884 7885 7886 7887 7888 7889 7890 7891 7892 7893 |
" isComment string \n" " isCDATA string \n" " isPIValue string \n" " isName string \n" " isQName string \n" " isNCName string \n" " isPIName string \n" " featureinfo feature \n" ; static char doc_usage[] = "Usage domDoc <method> <args>, where method can be:\n" " documentElement ?objVar? \n" " getElementsByTagName name \n" ................................................................................ "createDocument", "createDocumentNS", "createNodeCmd", "parse", "setStoreLineColumn", "isCharData", "isName", "isPIName", "isQName", "isComment", "isCDATA", "isPIValue", "isNCName", "createDocumentNode", "setNameCheck", "setTextCheck", "setObjectCommands", "featureinfo", "isBMPCharData", "clearString", #ifdef TCL_THREADS "attachDocument", "detachDocument", #endif NULL }; enum domMethod { m_createDocument, m_createDocumentNS, m_createNodeCmd, m_parse, m_setStoreLineColumn, m_isCharData, m_isName, m_isPIName, m_isQName, m_isComment, m_isCDATA, m_isPIValue, m_isNCName, m_createDocumentNode, m_setNameCheck, m_setTextCheck, m_setObjectCommands, m_featureinfo, m_isBMPCharData, m_clearString #ifdef TCL_THREADS ,m_attachDocument, m_detachDocument #endif }; static const char *nodeModeValues[] = { "automatic", "command", "token", NULL ................................................................................ return tcldom_featureinfo(clientData, interp, --objc, objv+1); case m_isBMPCharData: CheckArgs(3,3,2,"string"); SetBooleanResult(domIsBMPChar(Tcl_GetString(objv[2]))); return TCL_OK; case m_clearString: CheckArgs(3,5,2,"?-replace ?replacement?? string"); if (objc >= 4) { option = Tcl_GetString (objv[2]); if (option[0] == '-' && option[1] == 'r') { if (Tcl_GetIndexFromObj (interp, objv[2], clearStringOptions, "option", |
> > | > > > > > > |
224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 .... 7631 7632 7633 7634 7635 7636 7637 7638 7639 7640 7641 7642 7643 7644 7645 7646 7647 7648 7649 7650 7651 7652 7653 7654 7655 7656 7657 7658 7659 .... 7883 7884 7885 7886 7887 7888 7889 7890 7891 7892 7893 7894 7895 7896 7897 7898 7899 7900 7901 |
" isComment string \n" " isCDATA string \n" " isPIValue string \n" " isName string \n" " isQName string \n" " isNCName string \n" " isPIName string \n" " isHTML5CustomName string \n" " featureinfo feature \n" ; static char doc_usage[] = "Usage domDoc <method> <args>, where method can be:\n" " documentElement ?objVar? \n" " getElementsByTagName name \n" ................................................................................ "createDocument", "createDocumentNS", "createNodeCmd", "parse", "setStoreLineColumn", "isCharData", "isName", "isPIName", "isQName", "isComment", "isCDATA", "isPIValue", "isNCName", "createDocumentNode", "setNameCheck", "setTextCheck", "setObjectCommands", "featureinfo", "isBMPCharData", "clearString", "isHTML5CustomName", #ifdef TCL_THREADS "attachDocument", "detachDocument", #endif NULL }; enum domMethod { m_createDocument, m_createDocumentNS, m_createNodeCmd, m_parse, m_setStoreLineColumn, m_isCharData, m_isName, m_isPIName, m_isQName, m_isComment, m_isCDATA, m_isPIValue, m_isNCName, m_createDocumentNode, m_setNameCheck, m_setTextCheck, m_setObjectCommands, m_featureinfo, m_isBMPCharData, m_clearString, m_isHTML5CustomName #ifdef TCL_THREADS ,m_attachDocument, m_detachDocument #endif }; static const char *nodeModeValues[] = { "automatic", "command", "token", NULL ................................................................................ return tcldom_featureinfo(clientData, interp, --objc, objv+1); case m_isBMPCharData: CheckArgs(3,3,2,"string"); SetBooleanResult(domIsBMPChar(Tcl_GetString(objv[2]))); return TCL_OK; case m_isHTML5CustomName: CheckArgs(3,3,2,"string"); SetBooleanResult(domIsHTML5CustomName(Tcl_GetString(objv[2]))); return TCL_OK; case m_clearString: CheckArgs(3,5,2,"?-replace ?replacement?? string"); if (objc >= 4) { option = Tcl_GetString (objv[2]); if (option[0] == '-' && option[1] == 'r') { if (Tcl_GetIndexFromObj (interp, objv[2], clearStringOptions, "option", |
Changes to tests/dom.test.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
....
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
|
# Features covered: dom command # # This file contains a collection of tests for the dom command of # tDOM. # # dom-1.*: createDocument, createDocumentNS # dom-2.*: parse # dom-3.*: isName, isNCName, isCharData, isPIName, isComment, isCDATA # dom-4.*: parse -useForeignDTD # dom-5.*: external entities # dom-6.*: use in slave interpreter # dom-7.*: setNameCheck, setTextCheck # dom-8.*: createDocumentNode, documentNodes # dom-9.*: setObjectCommands # dom-10.*: createNodeCmd ................................................................................ \u0004\u0005\u0001 } { lappend result [dom clearString -replace some $str] } set result } [list some asome someb asomeb asomesomeb asomecsomeb asomedsomesomeb asomedsomesomesomesome_foo_bar somesomesome_foo_bar_bazsome_didumsome somesomesome_foo_bar_bazsome_didum\uE000 somesomesome abc somesomesome] test dom-4.1 {-useForeignDTD 0} { set doc [dom parse -useForeignDTD 0 {<root/>}] $doc delete } {} test dom-4.2 {-useForeignDTD 1 with document with internal subset} {need_uri} { set baseURI [tdom::baseURL [file join [pwd] [file dir [info script]] dom.test]] |
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
....
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
|
# Features covered: dom command # # This file contains a collection of tests for the dom command of # tDOM. # # dom-1.*: createDocument, createDocumentNS # dom-2.*: parse # dom-3.*: is* methods, clearString # dom-4.*: parse -useForeignDTD # dom-5.*: external entities # dom-6.*: use in slave interpreter # dom-7.*: setNameCheck, setTextCheck # dom-8.*: createDocumentNode, documentNodes # dom-9.*: setObjectCommands # dom-10.*: createNodeCmd ................................................................................ \u0004\u0005\u0001 } { lappend result [dom clearString -replace some $str] } set result } [list some asome someb asomeb asomesomeb asomecsomeb asomedsomesomeb asomedsomesomesomesome_foo_bar somesomesome_foo_bar_bazsome_didumsome somesomesome_foo_bar_bazsome_didum\uE000 somesomesome abc somesomesome] test dom-3.46 {isHTML5CustomName} { set result [list] foreach str { abc ab-c ab-C -ab äb-c ab---c ab- ab-ᴨ aÄ-b aA-b aä-b 0n-foo A-b font-face m\u00B7-. n-\uFDF0 o-\u3000 } { lappend result [dom isHTML5CustomName $str] } set result } {0 1 0 0 0 1 1 1 1 0 1 0 0 0 1 1 0} test dom-4.1 {-useForeignDTD 0} { set doc [dom parse -useForeignDTD 0 {<root/>}] $doc delete } {} test dom-4.2 {-useForeignDTD 1 with document with internal subset} {need_uri} { set baseURI [tdom::baseURL [file join [pwd] [file dir [info script]] dom.test]] |
Changes to tests/htmlreader.test.
2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 |
<head><title></title></head><body><form><select id="L" name="nls_language"> <option value="">-- </option><option selected="selected" value="en_US">en_US </option><option value="de_DE">de_DE </option> </select></form></body> </html>} test html-3.1 {Bad data} { set data {line 6 column 17 - Warning: <script> lacks "type" attribute line 10 column 17 - Warning: <script> lacks "type" attribute line 11 column 17 - Warning: <table> lacks "summary" attribute} set doc [dom parse -html $data] set result [$doc asHTML] |
> > > > > > > > > > > > > > > > > > > > > > |
2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 |
<head><title></title></head><body><form><select id="L" name="nls_language"> <option value="">-- </option><option selected="selected" value="en_US">en_US </option><option value="de_DE">de_DE </option> </select></form></body> </html>} test html-2.9 {HTML parsing} {knownBug} { set doc [dom parse -html {<HTML><MATH-field>x = 1 + 1</MATH-field></HTML>}] set result [$doc asHTML] $doc delete set result } {<html><MATH-field>x = 1 + 1</MATH-field></html>} test html-2.10 {HTML parsing} { set doc [dom parse -html {<HTML><mytag>foo</mytag></HTML>}] set result [$doc asHTML] $doc delete set result } {<html><mytag>foo</mytag></html>} test html-2.11 {HTML parsing} { set doc [dom parse -html {<HTML><math-field>x = 1 + 1</math-field></HTML>}] set result [$doc asHTML] $doc delete set result } {<html><math-field>x = 1 + 1</math-field></html>} test html-3.1 {Bad data} { set data {line 6 column 17 - Warning: <script> lacks "type" attribute line 10 column 17 - Warning: <script> lacks "type" attribute line 11 column 17 - Warning: <table> lacks "summary" attribute} set doc [dom parse -html $data] set result [$doc asHTML] |