Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:wip
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | HTML5CustomNames
Files: files | file ages | folders
SHA3-256: 2184b00acacb3debfc10836ef22094f74d83f20fd743b8ebac4258236d28f4a7
User & Date: rolf 2024-06-29 01:35:05
Context
2024-06-30
23:23
wip check-in: e77b8c2886 user: rolf tags: HTML5CustomNames
2024-06-29
01:35
wip check-in: 2184b00aca user: rolf tags: HTML5CustomNames
2024-06-28
00:07
Updated to defines to what expat uses. check-in: b23076ff1f user: rolf tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to generic/dom.c.

351
352
353
354
355
356
357




































































358
359
360
361
362
363
364
        if (!clen) return 0;
        if (UTF8_XMLCHAR((unsigned const char *)p,clen))
            p += clen;
        else return 0;
    }
    return 1;
}





































































/*---------------------------------------------------------------------------
|   domClearString
|
\--------------------------------------------------------------------------*/
void 
domClearString (







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
        if (!clen) return 0;
        if (UTF8_XMLCHAR((unsigned const char *)p,clen))
            p += clen;
        else return 0;
    }
    return 1;
}

/*---------------------------------------------------------------------------
|   domIsHTMLCustomChar 
|
\--------------------------------------------------------------------------*/
int
domIsHTMLCustomName (
    const char *str
    )
{
    const char *p;
    int clen, dashseen = 0;
    Tcl_UniChar uniChar;

    p = str;
    if (*p < 'a' || *p > 'z') {
        return 0;
    }
    p++;
    while (*p) {
        clen = UTF8_CHAR_LEN(*p);
        if (clen == 0) return 0;
        if (clen == 1) {
            if (*p == '-') {
                dashseen = 1;
                p++;
                continue;
            } else if (*p == '.'
                       || (*p >= '0' && *p <= '9')
                       || *p == '_'
                       || (*p >= 'a' && *p <= 'z')) {
                p++;
                continue;
            }
            return 0;
        } 
        if (clen == 4) return 1;
        clen = Tcl_UtfToUniChar (str, &uniChar);
        if (uniChar == 0xB7
            || (uniChar >= 0xC0 && uniChar <= 0xD6)
            || (uniChar >= 0xD8 && uniChar <= 0xF6) 
            || (uniChar >= 0xF8 && uniChar <= 0x37D) 
            || (uniChar >= 0x37F && uniChar <= 0x1FFF) 
            || (uniChar >= 0x200C && uniChar <= 0x200D) 
            || (uniChar >= 0x203F && uniChar <= 0x2040)
            || (uniChar >= 0x2070 && uniChar <= 0x218F)
            || (uniChar >= 0x2C00 && uniChar <= 0x2FEF)
            || (uniChar >= 0x3001 && uniChar <= 0xD7FF)
            || (uniChar >= 0xF900 && uniChar <= 0xFDCF)
            || (uniChar >= 0xFDF0 && uniChar <= 0xFFFD)) {
            p += clen;
            continue;
        }
        return 0;
    }
    if (!dashseen) return 0;
    switch (str[0]) {
    case 'a': if (!strcmp(str,"annotation-xml")) {return 0;} break;
    case 'c': if (!strcmp(str,"color-profile")) {return 0;} break;
    case 'f': if (!strcmp(str,"font-face")        ||
                  !strcmp(str,"font-face-src")    ||
                  !strcmp(str,"font-face-uri")    ||
                  !strcmp(str,"font-face-format") ||
                  !strcmp(str,"font-face-name")) {return 0;} break;
    case 'm': if (!strcmp(str,"missing-glyph")) {return 0;} break;
    }
    return 1;
}

/*---------------------------------------------------------------------------
|   domClearString
|
\--------------------------------------------------------------------------*/
void 
domClearString (

Changes to generic/dom.h.

912
913
914
915
916
917
918

919
920
921
922
923
924
925
int            domIsChar (const char *str);
void           domClearString (char *str, char *replacement, domLength repllen,
                               Tcl_DString *clearedstr, int *changed);
int            domIsBMPChar (const char *str);
int            domIsComment (const char *str);
int            domIsCDATA (const char *str);
int            domIsPIValue (const char *str);

void           domCopyTo (domNode *node, domNode *parent, int copyNS);
void           domCopyNS (domNode *from, domNode *to);
domAttrNode *  domCreateXMLNamespaceNode (domNode *parent);
void           domRenumberTree (domNode *node);
int            domPrecedes (domNode *node, domNode *other);
void           domNormalize (domNode *node, int forXPath, 
                             domFreeCallback freeCB, void *clientData);







>







912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
int            domIsChar (const char *str);
void           domClearString (char *str, char *replacement, domLength repllen,
                               Tcl_DString *clearedstr, int *changed);
int            domIsBMPChar (const char *str);
int            domIsComment (const char *str);
int            domIsCDATA (const char *str);
int            domIsPIValue (const char *str);
int            domIsHTMLCustomName (const char *str);
void           domCopyTo (domNode *node, domNode *parent, int copyNS);
void           domCopyNS (domNode *from, domNode *to);
domAttrNode *  domCreateXMLNamespaceNode (domNode *parent);
void           domRenumberTree (domNode *node);
int            domPrecedes (domNode *node, domNode *other);
void           domNormalize (domNode *node, int forXPath, 
                             domFreeCallback freeCB, void *clientData);

Changes to generic/domhtml.c.

2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
    int            hasContent;
    domNode       *pnode;
    domNode       *node = NULL, *parent_node = parent;
    domTextNode   *tnode;
    domAttrNode   *attrnode, *lastAttr;
    int            ampersandSeen = 0;
    int            only_whites   = 0;
    int            hnew, autoclose, ignore;
    char           tmp[250], *y = NULL;
    Tcl_HashEntry *h;
    domProcessingInstructionNode *pinode;

    x = &(html[*pos]);

    while ( (c=*x)!=0 ) {







|







2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
    int            hasContent;
    domNode       *pnode;
    domNode       *node = NULL, *parent_node = parent;
    domTextNode   *tnode;
    domAttrNode   *attrnode, *lastAttr;
    int            ampersandSeen = 0;
    int            only_whites   = 0;
    int            hnew, autoclose, ignore, maybeCustomName, rc;
    char           tmp[250], *y = NULL;
    Tcl_HashEntry *h;
    domProcessingInstructionNode *pinode;

    x = &(html[*pos]);

    while ( (c=*x)!=0 ) {
2974
2975
2976
2977
2978
2979
2980

2981
2982







2983
2984










2985
2986
2987
2988
2989
2990
2991
            }


            /*----------------------------------------------------------------
            |   new tag/element
            |
            \---------------------------------------------------------------*/

            while ((c=*x)!=0 && c!='/' && c!='>' && c!='<' && !SPACE(c) ) {
                if (!isalnum(c)) goto readText;







                *x = tolower(c);
                x++;










            }
            hasContent = 1;
            if (c==0) {
                RetError("Missing \">\"",(start-html) );
            }
            if ( (x-start)==1) {
                RetError("Null markup name",(start-html) );







>

|
>
>
>
>
>
>
>
|
|
>
>
>
>
>
>
>
>
>
>







2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
            }


            /*----------------------------------------------------------------
            |   new tag/element
            |
            \---------------------------------------------------------------*/
            maybeCustomName = 0;
            while ((c=*x)!=0 && c!='/' && c!='>' && c!='<' && !SPACE(c) ) {
                if (!maybeCustomName && !isalnum(c)) maybeCustomName = 1;
                x++;
            }
            if (!maybeCustomName) {
                c = *x;
                *x = '\0'; /* temporarily terminate the string */
                x = start + 1;
                while (*x) {
                    *x = tolower(*x);
                    x++;
                }
                *x = c;
            } else {
                c = *x;
                *x = '\0'; /* temporarily terminate the string */
                rc = domIsHTMLCustomName (start+1);
                *x = c;
                if (!rc) {
                    goto readText;
                }
            }
            hasContent = 1;
            if (c==0) {
                RetError("Missing \">\"",(start-html) );
            }
            if ( (x-start)==1) {
                RetError("Null markup name",(start-html) );

Changes to tests/htmlreader.test.

2395
2396
2397
2398
2399
2400
2401






















2402
2403
2404
2405
2406
2407
2408
<head><title></title></head><body><form><select id="L" name="nls_language">
<option value="">--
        </option><option selected="selected" value="en_US">en_US
        </option><option value="de_DE">de_DE
        </option>
</select></form></body>
</html>}























test html-3.1 {Bad data} {
    set data {line 6 column 17 - Warning: <script> lacks "type" attribute
line 10 column 17 - Warning: <script> lacks "type" attribute
        line 11 column 17 - Warning: <table> lacks "summary" attribute}
    set doc [dom parse -html $data]
    set result [$doc asHTML]







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
<head><title></title></head><body><form><select id="L" name="nls_language">
<option value="">--
        </option><option selected="selected" value="en_US">en_US
        </option><option value="de_DE">de_DE
        </option>
</select></form></body>
</html>}

test html-2.9 {HTML parsing} {
    set doc [dom parse -html {<HTML><MATH-field>x = 1 + 1</MATH-field></HTML>}]
    set result [$doc asHTML]
    $doc delete
    set result
} {<html>&lt;MATH-field&gt;x = 1 + 1&lt;/MATH-field></html&gt;}

test html-2.10 {HTML parsing} {
    set doc [dom parse -html {<HTML><mytag>foo</mytag></HTML>}]
    set result [$doc asHTML]
    $doc delete
    set result
} {<html><mytag>foo</mytag></html>}

test html-2.11 {HTML parsing} {
    set doc [dom parse -html {<HTML><math-field>x = 1 + 1</math-field></HTML>}]
    set result [$doc asHTML]
    $doc delete
    set result
} {<HTML><math-field>x = 1 + 1</math-field></HTML>}


test html-3.1 {Bad data} {
    set data {line 6 column 17 - Warning: <script> lacks "type" attribute
line 10 column 17 - Warning: <script> lacks "type" attribute
        line 11 column 17 - Warning: <table> lacks "summary" attribute}
    set doc [dom parse -html $data]
    set result [$doc asHTML]