Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Added the isHTML5CustomName method to the dom command. The simple HTML reader Uses the machinery to accept HTML5 custom element names.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | HTML5CustomNames
Files: files | file ages | folders
SHA3-256: a2c9f8384072360fde7f2ffc49ba5d3b22f07f67b0d32dadf145a11fd875c355
User & Date: rolf 2024-07-02 21:58:55
Context
2024-07-02
22:08
Added the isHTML5CustomName method to the dom command. The simple HTML reader now accept HTML5 custom element names. check-in: 9d8f28621b user: rolf tags: trunk
21:58
Added the isHTML5CustomName method to the dom command. The simple HTML reader Uses the machinery to accept HTML5 custom element names. Closed-Leaf check-in: a2c9f83840 user: rolf tags: HTML5CustomNames
2024-07-01
13:15
Merged from trunk. check-in: ca48d4518e user: rolf tags: HTML5CustomNames
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to generic/dom.c.

377
378
379
380
381
382
383
384
385
386
387
388
389




390
391
392


393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411

412
413
414
415
416
417
418
    while (*p) {
        clen = UTF8_CHAR_LEN(*p);
        if (clen == 0) return 0;
        if (clen == 1) {
            if (*p == '-') {
                dashseen = 1;
                p++;
                continue;
            } else if (*p == '.'
                       || (*p >= '0' && *p <= '9')
                       || *p == '_'
                       || (*p >= 'a' && *p <= 'z')) {
                p++;




                continue;
            }
            return 0;


        } 
        if (clen == 4) return 1;
        clen = Tcl_UtfToUniChar (str, &uniChar);
        if (uniChar == 0xB7
            || (uniChar >= 0xC0 && uniChar <= 0xD6)
            || (uniChar >= 0xD8 && uniChar <= 0xF6) 
            || (uniChar >= 0xF8 && uniChar <= 0x37D) 
            || (uniChar >= 0x37F && uniChar <= 0x1FFF) 
            || (uniChar >= 0x200C && uniChar <= 0x200D) 
            || (uniChar >= 0x203F && uniChar <= 0x2040)
            || (uniChar >= 0x2070 && uniChar <= 0x218F)
            || (uniChar >= 0x2C00 && uniChar <= 0x2FEF)
            || (uniChar >= 0x3001 && uniChar <= 0xD7FF)
            || (uniChar >= 0xF900 && uniChar <= 0xFDCF)
            || (uniChar >= 0xFDF0 && uniChar <= 0xFFFD)) {
            p += clen;
            continue;
        }
        return 0;

    }
    if (!dashseen) return 0;
    switch (str[0]) {
    case 'a': if (!strcmp(str,"annotation-xml")) {return 0;} break;
    case 'c': if (!strcmp(str,"color-profile")) {return 0;} break;
    case 'f': if (!strcmp(str,"font-face")        ||
                  !strcmp(str,"font-face-src")    ||







|
|
|
|
|
|
>
>
>
>
|
|
|
>
>
|
<
|













<
|
|
>







377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399

400
401
402
403
404
405
406
407
408
409
410
411
412
413

414
415
416
417
418
419
420
421
422
423
    while (*p) {
        clen = UTF8_CHAR_LEN(*p);
        if (clen == 0) return 0;
        if (clen == 1) {
            if (*p == '-') {
                dashseen = 1;
                p++;
            } else {
                if (*p == '.'
                    || (*p >= '0' && *p <= '9')
                    || *p == '_'
                    || (*p >= 'a' && *p <= 'z')) {
                    p++;
                } else {
                    return 0;
                }
            }
            continue;
        } 
        if (clen == 4) {
            p += clen;
            continue;
        }

        clen = Tcl_UtfToUniChar (p, &uniChar);
        if (uniChar == 0xB7
            || (uniChar >= 0xC0 && uniChar <= 0xD6)
            || (uniChar >= 0xD8 && uniChar <= 0xF6) 
            || (uniChar >= 0xF8 && uniChar <= 0x37D) 
            || (uniChar >= 0x37F && uniChar <= 0x1FFF) 
            || (uniChar >= 0x200C && uniChar <= 0x200D) 
            || (uniChar >= 0x203F && uniChar <= 0x2040)
            || (uniChar >= 0x2070 && uniChar <= 0x218F)
            || (uniChar >= 0x2C00 && uniChar <= 0x2FEF)
            || (uniChar >= 0x3001 && uniChar <= 0xD7FF)
            || (uniChar >= 0xF900 && uniChar <= 0xFDCF)
            || (uniChar >= 0xFDF0 && uniChar <= 0xFFFD)) {
            p += clen;

        } else {
            return 0;
        }
    }
    if (!dashseen) return 0;
    switch (str[0]) {
    case 'a': if (!strcmp(str,"annotation-xml")) {return 0;} break;
    case 'c': if (!strcmp(str,"color-profile")) {return 0;} break;
    case 'f': if (!strcmp(str,"font-face")        ||
                  !strcmp(str,"font-face-src")    ||

Changes to generic/tcldom.c.

7885
7886
7887
7888
7889
7890
7891
7892
7893
7894
7895
7896
7897
7898
7899
        case m_isBMPCharData:
            CheckArgs(3,3,2,"string");
            SetBooleanResult(domIsBMPChar(Tcl_GetString(objv[2])));
            return TCL_OK;

        case m_isHTML5CustomName:
            CheckArgs(3,3,2,"string");
            SetBooleanResult(domIsHTMLCustomChar(Tcl_GetString(objv[2])));
            return TCL_OK;
        
        case m_clearString:
            CheckArgs(3,5,2,"?-replace ?replacement?? string");
            if (objc >= 4) {
                option = Tcl_GetString (objv[2]);
                if (option[0] == '-' && option[1] == 'r') {







|







7885
7886
7887
7888
7889
7890
7891
7892
7893
7894
7895
7896
7897
7898
7899
        case m_isBMPCharData:
            CheckArgs(3,3,2,"string");
            SetBooleanResult(domIsBMPChar(Tcl_GetString(objv[2])));
            return TCL_OK;

        case m_isHTML5CustomName:
            CheckArgs(3,3,2,"string");
            SetBooleanResult(domIsHTML5CustomName(Tcl_GetString(objv[2])));
            return TCL_OK;
        
        case m_clearString:
            CheckArgs(3,5,2,"?-replace ?replacement?? string");
            if (objc >= 4) {
                option = Tcl_GetString (objv[2]);
                if (option[0] == '-' && option[1] == 'r') {

Changes to tests/dom.test.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# Features covered: dom command
#
# This file contains a collection of tests for the dom command of
# tDOM.
#
#    dom-1.*:  createDocument, createDocumentNS
#    dom-2.*:  parse
#    dom-3.*:  isName, isNCName, isCharData, isPIName, isComment, isCDATA
#    dom-4.*:  parse -useForeignDTD
#    dom-5.*:  external entities
#    dom-6.*:  use in slave interpreter
#    dom-7.*:  setNameCheck, setTextCheck
#    dom-8.*:  createDocumentNode, documentNodes
#    dom-9.*:  setObjectCommands
#    dom-10.*: createNodeCmd







|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# Features covered: dom command
#
# This file contains a collection of tests for the dom command of
# tDOM.
#
#    dom-1.*:  createDocument, createDocumentNS
#    dom-2.*:  parse
#    dom-3.*:  is* methods, clearString
#    dom-4.*:  parse -useForeignDTD
#    dom-5.*:  external entities
#    dom-6.*:  use in slave interpreter
#    dom-7.*:  setNameCheck, setTextCheck
#    dom-8.*:  createDocumentNode, documentNodes
#    dom-9.*:  setObjectCommands
#    dom-10.*: createNodeCmd
1183
1184
1185
1186
1187
1188
1189


























1190
1191
1192
1193
1194
1195
1196
        \u0004\u0005\u0001
    } {
        lappend result [dom clearString -replace some $str]
    }
    set result
} [list some asome someb asomeb asomesomeb asomecsomeb asomedsomesomeb asomedsomesomesomesome_foo_bar somesomesome_foo_bar_bazsome_didumsome somesomesome_foo_bar_bazsome_didum\uE000 somesomesome abc somesomesome]



























test dom-4.1 {-useForeignDTD 0} {
    set doc [dom parse -useForeignDTD 0 {<root/>}]
    $doc delete
} {}

test dom-4.2 {-useForeignDTD 1 with document with internal subset} {need_uri} {
    set baseURI [tdom::baseURL [file join [pwd] [file dir [info script]] dom.test]]







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
        \u0004\u0005\u0001
    } {
        lappend result [dom clearString -replace some $str]
    }
    set result
} [list some asome someb asomeb asomesomeb asomecsomeb asomedsomesomeb asomedsomesomesomesome_foo_bar somesomesome_foo_bar_bazsome_didumsome somesomesome_foo_bar_bazsome_didum\uE000 somesomesome abc somesomesome]

test dom-3.46 {isHTML5CustomName} {
    set result [list]
    foreach str {
        abc
        ab-c
        ab-C
        -ab
        äb-c
        ab---c
        ab-
        ab-ᴨ
        aÄ-b
        aA-b
        aä-b
        0n-foo
        A-b
        font-face
        m\u00B7-.
        n-\uFDF0
        o-\u3000
    } {
        lappend result [dom isHTML5CustomName $str]
    }
    set result
} {0 1 0 0 0 1 1 1 1 0 1 0 0 0 1 1 0}

test dom-4.1 {-useForeignDTD 0} {
    set doc [dom parse -useForeignDTD 0 {<root/>}]
    $doc delete
} {}

test dom-4.2 {-useForeignDTD 1 with document with internal subset} {need_uri} {
    set baseURI [tdom::baseURL [file join [pwd] [file dir [info script]] dom.test]]

Changes to tests/htmlreader.test.

2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
<option value="">--
        </option><option selected="selected" value="en_US">en_US
        </option><option value="de_DE">de_DE
        </option>
</select></form></body>
</html>}

test html-2.9 {HTML parsing} {
    set doc [dom parse -html {<HTML><MATH-field>x = 1 + 1</MATH-field></HTML>}]
    set result [$doc asHTML]
    $doc delete
    set result
} {<html>&lt;MATH-field&gt;x = 1 + 1&lt;/MATH-field&gt;</html&gt;}

test html-2.10 {HTML parsing} {







|







2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
<option value="">--
        </option><option selected="selected" value="en_US">en_US
        </option><option value="de_DE">de_DE
        </option>
</select></form></body>
</html>}

test html-2.9 {HTML parsing} {knownBug} {
    set doc [dom parse -html {<HTML><MATH-field>x = 1 + 1</MATH-field></HTML>}]
    set result [$doc asHTML]
    $doc delete
    set result
} {<html>&lt;MATH-field&gt;x = 1 + 1&lt;/MATH-field&gt;</html&gt;}

test html-2.10 {HTML parsing} {