Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Minor code impovements and more tests.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | clearstring
Files: files | file ages | folders
SHA3-256: 522169e2ea43d1e3cc9d0c57713976bec7da0494d0fcfc25172d2ebd9f1b899e
User & Date: rolf 2024-05-20 23:20:33
Context
2024-06-01
23:10
Added an optional replacement string argument to the -replace option of the dom command method clearString. check-in: 945ee29db2 user: rolf tags: trunk
2024-05-20
23:20
Minor code impovements and more tests. Closed-Leaf check-in: 522169e2ea user: rolf tags: clearstring
17:17
Implemented user defined replacement. check-in: 3d780d7563 user: rolf tags: clearstring
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to generic/dom.c.

366
367
368
369
370
371
372
373
374

375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
    char *replacement,
    domLength repllen,
    Tcl_DString *clearedstr,
    int *changed
    )
{
    char *p, *s;
    int   clen, rewrite = 0;
    

    p = str;
    while (*p) {
        clen = UTF8_CHAR_LEN(*p);
        if (!clen || !UTF8_XMLCHAR((unsigned const char*)p,clen)) {
            rewrite = 1;
            *changed = 1;
            Tcl_DStringInit (clearedstr);
            break;
        }
        p += clen;
    }
    if (!rewrite) {
        return;
    }
    Tcl_DStringAppend (clearedstr, str, p-str);
    if (repllen) {
        Tcl_DStringAppend (clearedstr, replacement, repllen);
    }
    if (clen) {







|

>




<






|







366
367
368
369
370
371
372
373
374
375
376
377
378
379

380
381
382
383
384
385
386
387
388
389
390
391
392
393
    char *replacement,
    domLength repllen,
    Tcl_DString *clearedstr,
    int *changed
    )
{
    char *p, *s;
    int   clen;
    
    *changed = 0;
    p = str;
    while (*p) {
        clen = UTF8_CHAR_LEN(*p);
        if (!clen || !UTF8_XMLCHAR((unsigned const char*)p,clen)) {

            *changed = 1;
            Tcl_DStringInit (clearedstr);
            break;
        }
        p += clen;
    }
    if (!*changed) {
        return;
    }
    Tcl_DStringAppend (clearedstr, str, p-str);
    if (repllen) {
        Tcl_DStringAppend (clearedstr, replacement, repllen);
    }
    if (clen) {

Changes to generic/tcldom.c.

8122
8123
8124
8125
8126
8127
8128
8129
8130
8131
8132
8133
8134
8135
8136
                    repllen = 3;
                }
            } else {
                replacement = NULL;
                repllen = 0;
            }
            string = Tcl_GetString (objv[2]);
            changed = 0;
            domClearString (string, replacement, repllen, &cleardString,
                            &changed);
            if (changed) {
                newObj = Tcl_NewStringObj (
                    Tcl_DStringValue (&cleardString),
                    Tcl_DStringLength (&cleardString));
                Tcl_DStringFree (&cleardString);







<







8122
8123
8124
8125
8126
8127
8128

8129
8130
8131
8132
8133
8134
8135
                    repllen = 3;
                }
            } else {
                replacement = NULL;
                repllen = 0;
            }
            string = Tcl_GetString (objv[2]);

            domClearString (string, replacement, repllen, &cleardString,
                            &changed);
            if (changed) {
                newObj = Tcl_NewStringObj (
                    Tcl_DStringValue (&cleardString),
                    Tcl_DStringLength (&cleardString));
                Tcl_DStringFree (&cleardString);

Changes to tests/dom.test.

1161
1162
1163
1164
1165
1166
1167





















1168
1169
1170
1171
1172
1173
1174
        \u0004\u0005\u0001
    } {
        lappend result [dom clearString -replace $str]
    }
    set result
} [list \ufffd a\ufffd \ufffdb a\ufffdb a\ufffd\ufffdb a\ufffdc\ufffdb a\ufffdd\ufffd\ufffdb a\ufffdd\ufffd\ufffd\ufffd\ufffd_foo_bar \ufffd\ufffd\ufffd_foo_bar_baz\ufffd_didum\ufffd \ufffd\ufffd\ufffd_foo_bar_baz\ufffd_didum\uE000 \ufffd\ufffd\ufffd abc \ufffd\ufffd\ufffd]























test dom-4.1 {-useForeignDTD 0} {
    set doc [dom parse -useForeignDTD 0 {<root/>}]
    $doc delete
} {}

test dom-4.2 {-useForeignDTD 1 with document with internal subset} {need_uri} {







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
        \u0004\u0005\u0001
    } {
        lappend result [dom clearString -replace $str]
    }
    set result
} [list \ufffd a\ufffd \ufffdb a\ufffdb a\ufffd\ufffdb a\ufffdc\ufffdb a\ufffdd\ufffd\ufffdb a\ufffdd\ufffd\ufffd\ufffd\ufffd_foo_bar \ufffd\ufffd\ufffd_foo_bar_baz\ufffd_didum\ufffd \ufffd\ufffd\ufffd_foo_bar_baz\ufffd_didum\uE000 \ufffd\ufffd\ufffd abc \ufffd\ufffd\ufffd]

test dom-3.45 {clearString -replace with replacement} {
    set result [list]
    foreach str {
        \u0001
        a\u0002
        \u0003b
        a\u0004b
        a\u0004\u0005b
        a\u0004c\u0005b
        a\u0004d\u0005\u0006b
        a\u0004d\u0005\uD800\uD801\uD802_foo_bar
        \uD800\uD801\uD802_foo_bar_baz\uD802_didum\uDFFF
        \uD800\uD801\uD802_foo_bar_baz\uD802_didum\uE000
        \u0004\u0005\uDABC
        abc
        \u0004\u0005\u0001
    } {
        lappend result [dom clearString -replace some $str]
    }
    set result
} [list some asome someb asomeb asomesomeb asomecsomeb asomedsomesomeb asomedsomesomesomesome_foo_bar somesomesome_foo_bar_bazsome_didumsome somesomesome_foo_bar_bazsome_didum\uE000 somesomesome abc somesomesome]

test dom-4.1 {-useForeignDTD 0} {
    set doc [dom parse -useForeignDTD 0 {<root/>}]
    $doc delete
} {}

test dom-4.2 {-useForeignDTD 1 with document with internal subset} {need_uri} {