Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Implemented user defined replacement. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | clearstring |
Files: | files | file ages | folders |
SHA3-256: |
3d780d75630d8c8d72bfa43c73c793e8 |
User & Date: | rolf 2024-05-20 17:17:42 |
Context
2024-05-20
| ||
23:20 | Minor code impovements and more tests. Closed-Leaf check-in: 522169e2ea user: rolf tags: clearstring | |
17:17 | Implemented user defined replacement. check-in: 3d780d7563 user: rolf tags: clearstring | |
2024-05-17
| ||
00:18 | Updated to expat 2.6.2. check-in: e37386009b user: rolf tags: trunk | |
Changes
Changes to generic/dom.c.
344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 |
{ const char *p; int clen; p = str; while (*p) { clen = UTF8_CHAR_LEN(*p); if (clen > 4) return 0; if (UTF8_XMLCHAR((unsigned const char *)p,clen)) p += clen; else return 0; } return 1; } /*--------------------------------------------------------------------------- | domClearString | \--------------------------------------------------------------------------*/ char * domClearString ( char *str, int *haveToFree, int replace, domLength length ) { const char *s; char *p, *clearedstr; int clen, i, rewrite = 0; p = str; while (*p) { clen = UTF8_CHAR_LEN(*p); if (clen > 4 || !UTF8_XMLCHAR((unsigned const char*)p,clen)) { rewrite = 1; break; } p += clen; } if (!rewrite) { *haveToFree = 0; return str; } s = p; if (replace) { /* Worst case: every char from the first illegal one up to the * end are a single byte one and will be replaced with a three * byte one. So we need (to the one that is already included * in length two additional bytes for every outstandig * char. */ clearedstr = MALLOC (sizeof(char) * (length+(length-(s-str))*2)+1); } else { /* We have at least on code-point to skip so lenght alone will * be enoug for the result string including the closing \0. */ clearedstr = MALLOC (sizeof(char) * length); } memcpy (clearedstr, str, (s-str)); p = clearedstr + (s-str); str += (s-str); if (replace) { *p = '\xEF'; p++; *p = '\xBF'; p++; *p = '\xBD'; p++; } str += clen; while (*str) { clen = UTF8_CHAR_LEN(*str); if (clen <= 4 && UTF8_XMLCHAR((unsigned const char*)str,clen)) { for (i = 0; i < clen; i++) { *p = *str; p++; str++; } } else { if (replace) { *p = '\xEF'; p++; *p = '\xBF'; p++; *p = '\xBD'; p++; } str += clen; } } *p = '\0'; *haveToFree = 1; return clearedstr; } /*--------------------------------------------------------------------------- | domIsBMPChar | \--------------------------------------------------------------------------*/ int |
| | | | | | | | | | | | | > > | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | < < < < < < < < < < < < |
344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 |
{ const char *p; int clen; p = str; while (*p) { clen = UTF8_CHAR_LEN(*p); if (!clen) return 0; if (UTF8_XMLCHAR((unsigned const char *)p,clen)) p += clen; else return 0; } return 1; } /*--------------------------------------------------------------------------- | domClearString | \--------------------------------------------------------------------------*/ void domClearString ( char *str, char *replacement, domLength repllen, Tcl_DString *clearedstr, int *changed ) { char *p, *s; int clen, rewrite = 0; p = str; while (*p) { clen = UTF8_CHAR_LEN(*p); if (!clen || !UTF8_XMLCHAR((unsigned const char*)p,clen)) { rewrite = 1; *changed = 1; Tcl_DStringInit (clearedstr); break; } p += clen; } if (!rewrite) { return; } Tcl_DStringAppend (clearedstr, str, p-str); if (repllen) { Tcl_DStringAppend (clearedstr, replacement, repllen); } if (clen) { p += clen; } else { /* If it isn't an UTF-8 encoded character what is it? And how * many of whatever it is? */ p++; } s = p; while (*p) { clen = UTF8_CHAR_LEN(*p); if (!clen || !UTF8_XMLCHAR((unsigned const char*)p,clen)) { Tcl_DStringAppend (clearedstr, s, p-s); if (repllen) { Tcl_DStringAppend (clearedstr, replacement, repllen); } if (clen) { p += clen; } else { p++; } s = p; } else { p += clen; } } Tcl_DStringAppend (clearedstr, s, p-s); } /*--------------------------------------------------------------------------- | domIsBMPChar | \--------------------------------------------------------------------------*/ int |
Changes to generic/dom.h.
906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 |
void tcldom_tolower (const char *str, char *str_out, int len); int domIsNAME (const char *name); int domIsPINAME (const char *name); int domIsQNAME (const char *name); int domIsNCNAME (const char *name); int domIsChar (const char *str); char * domClearString (char *str, int *haveToFree, int replace, domLength length); int domIsBMPChar (const char *str); int domIsComment (const char *str); int domIsCDATA (const char *str); int domIsPIValue (const char *str); void domCopyTo (domNode *node, domNode *parent, int copyNS); void domCopyNS (domNode *from, domNode *to); domAttrNode * domCreateXMLNamespaceNode (domNode *parent); |
| | |
906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 |
void tcldom_tolower (const char *str, char *str_out, int len); int domIsNAME (const char *name); int domIsPINAME (const char *name); int domIsQNAME (const char *name); int domIsNCNAME (const char *name); int domIsChar (const char *str); void domClearString (char *str, char *replacement, domLength repllen, Tcl_DString *clearedstr, int *changed); int domIsBMPChar (const char *str); int domIsComment (const char *str); int domIsCDATA (const char *str); int domIsPIValue (const char *str); void domCopyTo (domNode *node, domNode *parent, int copyNS); void domCopyNS (domNode *from, domNode *to); domAttrNode * domCreateXMLNamespaceNode (domNode *parent); |
Changes to generic/domxslt.c.
6156
6157
6158
6159
6160
6161
6162
6163
6164
6165
6166
6167
6168
6169
6170
6171
6172
6173
6174
6175
6176
6177
6178
6179
6180
6181
6182
6183
6184
6185
6186
6187
6188
6189
6190
6191
6192
6193
6194
....
6199
6200
6201
6202
6203
6204
6205
6206
6207
6208
6209
6210
6211
6212
6213
6214
6215
6216
6217
6218
6219
6220
6221
6222
6223
6224
6225
6226
6227
6228
6229
6230
6231
6232
6233
6234
6235
6236
6237
6238
6239
6240
6241
6242
6243
6244
6245
6246
6247
6248
6249
6250
6251
6252
6253
6254
6255
6256
6257
|
} else {
/* definitions for default decimal format */
df = xs->decimalFormats;
}
str = getAttr(node, "decimal-separator", a_decimalSeparator);
if (str) {
clen = UTF8_CHAR_LEN (str[0]);
if (str[clen] != '\0') {
reportError (node, "decimal-separator has to be a"
" single char", errMsg);
if (newdf) FREE((char*)df);
return -1;
}
Tcl_UtfToUniChar (str, &df->decimalSeparator);
}
str = getAttr(node, "grouping-separator", a_groupingSeparator);
if (str) {
clen = UTF8_CHAR_LEN (str[0]);
if (str[clen] != '\0') {
reportError (node, "groupingSeparator has to be a"
" single char", errMsg);
if (newdf) FREE((char*)df);
return -1;
}
Tcl_UtfToUniChar (str, &df->groupingSeparator);
}
str = getAttr(node, "infinity", a_infinity);
if (str) df->infinity = str;
str = getAttr(node, "minus-sign", a_minusSign);
if (str) {
clen = UTF8_CHAR_LEN (str[0]);
if (str[clen] != '\0') {
reportError (node, "minus-sign has to be a single"
" char", errMsg);
if (newdf) FREE((char*)df);
return -1;
}
Tcl_UtfToUniChar (str, &df->minusSign);
}
................................................................................
if (str[1] != '\0') {
reportError (node, "percent has to be a single"
" char", errMsg);
return -1;
}
df->percent = str[0];
clen = UTF8_CHAR_LEN (str[0]);
if (str[clen] != '\0') {
reportError (node, "percent has to be a single"
" char", errMsg);
if (newdf) FREE((char*)df);
return -1;
}
Tcl_UtfToUniChar (str, &df->percent);
}
str = getAttr(node, "per-mille", a_perMille);
if (str) {
clen = UTF8_CHAR_LEN (str[0]);
if (str[clen] != '\0') {
reportError (node, "per-mille has to be a single"
" char", errMsg);
if (newdf) FREE((char*)df);
return -1;
}
Tcl_UtfToUniChar (str, &df->perMille);
}
str = getAttr(node, "zero-digit", a_zeroDigit);
if (str) {
clen = UTF8_CHAR_LEN (str[0]);
if (str[clen] != '\0') {
reportError (node, "zero-digit has to be a single"
" char", errMsg);
if (newdf) FREE((char*)df);
return -1;
}
Tcl_UtfToUniChar (str, &df->zeroDigit);
}
str = getAttr(node, "digit", a_digit);
if (str) {
clen = UTF8_CHAR_LEN (str[0]);
if (str[clen] != '\0') {
reportError (node, "digit has to be a single char",
errMsg);
if (newdf) FREE((char*)df);
return -1;
}
Tcl_UtfToUniChar (str, &df->digit);
}
str = getAttr(node, "pattern-separator", a_patternSeparator);
if (str) {
clen = UTF8_CHAR_LEN (str[0]);
if (str[clen] != '\0') {
reportError (node, "pattern-separator has to be a"
" single char", errMsg);
return -1;
}
Tcl_UtfToUniChar (str, &df->patternSeparator);
}
break;
|
|
|
|
|
|
|
|
|
|
6156
6157
6158
6159
6160
6161
6162
6163
6164
6165
6166
6167
6168
6169
6170
6171
6172
6173
6174
6175
6176
6177
6178
6179
6180
6181
6182
6183
6184
6185
6186
6187
6188
6189
6190
6191
6192
6193
6194
....
6199
6200
6201
6202
6203
6204
6205
6206
6207
6208
6209
6210
6211
6212
6213
6214
6215
6216
6217
6218
6219
6220
6221
6222
6223
6224
6225
6226
6227
6228
6229
6230
6231
6232
6233
6234
6235
6236
6237
6238
6239
6240
6241
6242
6243
6244
6245
6246
6247
6248
6249
6250
6251
6252
6253
6254
6255
6256
6257
|
} else { /* definitions for default decimal format */ df = xs->decimalFormats; } str = getAttr(node, "decimal-separator", a_decimalSeparator); if (str) { clen = UTF8_CHAR_LEN (str[0]); if (!clen || str[clen] != '\0') { reportError (node, "decimal-separator has to be a" " single char", errMsg); if (newdf) FREE((char*)df); return -1; } Tcl_UtfToUniChar (str, &df->decimalSeparator); } str = getAttr(node, "grouping-separator", a_groupingSeparator); if (str) { clen = UTF8_CHAR_LEN (str[0]); if (!clen || str[clen] != '\0') { reportError (node, "groupingSeparator has to be a" " single char", errMsg); if (newdf) FREE((char*)df); return -1; } Tcl_UtfToUniChar (str, &df->groupingSeparator); } str = getAttr(node, "infinity", a_infinity); if (str) df->infinity = str; str = getAttr(node, "minus-sign", a_minusSign); if (str) { clen = UTF8_CHAR_LEN (str[0]); if (!clen || str[clen] != '\0') { reportError (node, "minus-sign has to be a single" " char", errMsg); if (newdf) FREE((char*)df); return -1; } Tcl_UtfToUniChar (str, &df->minusSign); } ................................................................................ if (str[1] != '\0') { reportError (node, "percent has to be a single" " char", errMsg); return -1; } df->percent = str[0]; clen = UTF8_CHAR_LEN (str[0]); if (!clen || str[clen] != '\0') { reportError (node, "percent has to be a single" " char", errMsg); if (newdf) FREE((char*)df); return -1; } Tcl_UtfToUniChar (str, &df->percent); } str = getAttr(node, "per-mille", a_perMille); if (str) { clen = UTF8_CHAR_LEN (str[0]); if (!clen || str[clen] != '\0') { reportError (node, "per-mille has to be a single" " char", errMsg); if (newdf) FREE((char*)df); return -1; } Tcl_UtfToUniChar (str, &df->perMille); } str = getAttr(node, "zero-digit", a_zeroDigit); if (str) { clen = UTF8_CHAR_LEN (str[0]); if (!clen || str[clen] != '\0') { reportError (node, "zero-digit has to be a single" " char", errMsg); if (newdf) FREE((char*)df); return -1; } Tcl_UtfToUniChar (str, &df->zeroDigit); } str = getAttr(node, "digit", a_digit); if (str) { clen = UTF8_CHAR_LEN (str[0]); if (!clen || str[clen] != '\0') { reportError (node, "digit has to be a single char", errMsg); if (newdf) FREE((char*)df); return -1; } Tcl_UtfToUniChar (str, &df->digit); } str = getAttr(node, "pattern-separator", a_patternSeparator); if (str) { clen = UTF8_CHAR_LEN (str[0]); if (!clen || str[clen] != '\0') { reportError (node, "pattern-separator has to be a" " single char", errMsg); return -1; } Tcl_UtfToUniChar (str, &df->patternSeparator); } break; |
Changes to generic/tcldom.c.
215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 .... 7826 7827 7828 7829 7830 7831 7832 7833 7834 7835 7836 7837 7838 7839 7840 7841 7842 7843 7844 .... 8091 8092 8093 8094 8095 8096 8097 8098 8099 8100 8101 8102 8103 8104 8105 8106 8107 8108 8109 8110 8111 8112 8113 8114 8115 8116 8117 8118 8119 8120 8121 8122 8123 8124 8125 8126 8127 8128 8129 8130 8131 8132 |
) " createNodeCmd ?-returnNodeCmd? ?-tagName name? ?-jsonType jsonType? ?-namespace URI? (element|comment|text|cdata|pi)Node cmdName \n" " setStoreLineColumn ?boolean? \n" " setNameCheck ?boolean? \n" " setTextCheck ?boolean? \n" " setObjectCommands ?(automatic|token|command)? \n" " isCharData string \n" " clearString string \n" " isBMPCharData string \n" " isComment string \n" " isCDATA string \n" " isPIValue string \n" " isName string \n" " isQName string \n" " isNCName string \n" ................................................................................ Tcl_Interp * interp, int objc, Tcl_Obj * const objv[] ) { GetTcldomDATA; char * method, tmp[300], *clearedStr, *string, *option; int methodIndex, result, i, bool, replace = 0; domLength len; Tcl_CmdInfo cmdInfo; Tcl_Obj * mobjv[MAX_REWRITE_ARGS], *newObj; static const char *domMethods[] = { "createDocument", "createDocumentNS", "createNodeCmd", "parse", "setStoreLineColumn", "isCharData", "isName", "isPIName", "isQName", "isComment", "isCDATA", "isPIValue", "isNCName", "createDocumentNode", ................................................................................ case m_isBMPCharData: CheckArgs(3,3,2,"string"); SetBooleanResult(domIsBMPChar(Tcl_GetString(objv[2]))); return TCL_OK; case m_clearString: CheckArgs(3,4,2,"?-replace? string"); if (objc == 4) { option = Tcl_GetString (objv[2]); if (option[0] == '-' && option[1] == 'r') { if (Tcl_GetIndexFromObj (interp, objv[2], clearStringOptions, "option", 0, &i) != TCL_OK) { return TCL_ERROR; } } else { SetResult("expected: clearString ?-replace? string"); return TCL_ERROR; } replace = 1; objc--; objv++; } string = Tcl_GetStringFromObj (objv[2], &len); clearedStr = domClearString (string, &bool, replace, len); if (bool) { newObj = Tcl_NewStringObj (clearedStr, -1); FREE (clearedStr); Tcl_SetObjResult (interp, newObj); } else { Tcl_SetObjResult (interp, objv[2]); } return TCL_OK; } SetResult( dom_usage); return TCL_ERROR; } #ifdef TCL_THREADS |
| | > | | > | | | > < > > > > > > > | > > > > | > | < > > | < > > > < |
215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 .... 7826 7827 7828 7829 7830 7831 7832 7833 7834 7835 7836 7837 7838 7839 7840 7841 7842 7843 7844 7845 7846 .... 8093 8094 8095 8096 8097 8098 8099 8100 8101 8102 8103 8104 8105 8106 8107 8108 8109 8110 8111 8112 8113 8114 8115 8116 8117 8118 8119 8120 8121 8122 8123 8124 8125 8126 8127 8128 8129 8130 8131 8132 8133 8134 8135 8136 8137 8138 8139 8140 8141 8142 8143 8144 8145 8146 8147 8148 |
) " createNodeCmd ?-returnNodeCmd? ?-tagName name? ?-jsonType jsonType? ?-namespace URI? (element|comment|text|cdata|pi)Node cmdName \n" " setStoreLineColumn ?boolean? \n" " setNameCheck ?boolean? \n" " setTextCheck ?boolean? \n" " setObjectCommands ?(automatic|token|command)? \n" " isCharData string \n" " clearString ?-replace ?replacement?? string \n" " isBMPCharData string \n" " isComment string \n" " isCDATA string \n" " isPIValue string \n" " isName string \n" " isQName string \n" " isNCName string \n" ................................................................................ Tcl_Interp * interp, int objc, Tcl_Obj * const objv[] ) { GetTcldomDATA; char * method, tmp[300], *string, *option, *replacement; int methodIndex, result, i, bool, changed; domLength repllen; Tcl_CmdInfo cmdInfo; Tcl_Obj * mobjv[MAX_REWRITE_ARGS], *newObj; Tcl_DString cleardString; static const char *domMethods[] = { "createDocument", "createDocumentNS", "createNodeCmd", "parse", "setStoreLineColumn", "isCharData", "isName", "isPIName", "isQName", "isComment", "isCDATA", "isPIValue", "isNCName", "createDocumentNode", ................................................................................ case m_isBMPCharData: CheckArgs(3,3,2,"string"); SetBooleanResult(domIsBMPChar(Tcl_GetString(objv[2]))); return TCL_OK; case m_clearString: CheckArgs(3,5,2,"?-replace ?replacement?? string"); if (objc >= 4) { option = Tcl_GetString (objv[2]); if (option[0] == '-' && option[1] == 'r') { if (Tcl_GetIndexFromObj (interp, objv[2], clearStringOptions, "option", 0, &i) != TCL_OK) { return TCL_ERROR; } } else { SetResult("expected: clearString ?-replace ?replacement?" " string"); return TCL_ERROR; } objc--; objv++; if (objc == 4) { replacement = Tcl_GetStringFromObj (objv[2], &repllen); objc--; objv++; } else { replacement = "\xEF\xBF\xBD\0"; repllen = 3; } } else { replacement = NULL; repllen = 0; } string = Tcl_GetString (objv[2]); changed = 0; domClearString (string, replacement, repllen, &cleardString, &changed); if (changed) { newObj = Tcl_NewStringObj ( Tcl_DStringValue (&cleardString), Tcl_DStringLength (&cleardString)); Tcl_DStringFree (&cleardString); Tcl_SetObjResult (interp, newObj); } else { Tcl_SetObjResult (interp, objv[2]); } return TCL_OK; } SetResult( dom_usage); return TCL_ERROR; } #ifdef TCL_THREADS |