Index: libxml/php_libxml.h =================================================================== RCS file: /repository/php-src/ext/libxml/php_libxml.h,v retrieving revision 1.19 diff -u -r1.19 php_libxml.h --- libxml/php_libxml.h 13 Jun 2006 13:12:18 -0000 1.19 +++ libxml/php_libxml.h 17 Jul 2006 17:55:47 -0000 @@ -102,6 +102,35 @@ PHP_LIBXML_API void php_libxml_initialize(); PHP_LIBXML_API void php_libxml_shutdown(); +/* +#define ZVAL_XML_STRING(z, s, flags) { \ + UConverter *conv = NULL; \ + \ + if (UG(unicode)) { \ + UErrorCode status = U_ZERO_ERROR; \ + conv = ucnv_open("UTF-8", &status); \ + if (U_FAILURE(status)) { \ + php_error_docref(NULL TSRMLS_CC, E_WARNING, "could not create converter for 'UTF-8' encoding"); \ \ + } \ + zend_set_converter_error_mode(conv, ZEND_TO_UNICODE, flags); \ + } \ +\ + ZVAL_U_STRING(conv, z, s, flags); \ + if (conv) { \ + ucnv_close(conv); \ + } \ +} +*/ +#define ZVAL_XML_STRING(z, s, flags) { \ + UConverter *libxml_utf_conv = NULL; \ + if (UG(unicode)) { \ + libxml_utf_conv = UG(utf8_conv); \ + } \ + ZVAL_U_STRING(libxml_utf_conv, z, s, flags); \ +} + +#define RETVAL_XML_STRING(s, flags) ZVAL_XML_STRING(return_value, s, flags) + #ifdef ZTS #define LIBXML(v) TSRMG(libxml_globals_id, zend_libxml_globals *, v) #else Index: xmlreader/php_xmlreader.c =================================================================== RCS file: /repository/php-src/ext/xmlreader/php_xmlreader.c,v retrieving revision 1.33 diff -u -r1.33 php_xmlreader.c --- xmlreader/php_xmlreader.c 10 May 2006 11:58:56 -0000 1.33 +++ xmlreader/php_xmlreader.c 17 Jul 2006 17:55:49 -0000 @@ -92,9 +92,9 @@ switch (hnd->type) { case IS_STRING: if (retchar) { - ZVAL_STRING(*retval, (xmlChar *) retchar, 1); + ZVAL_XML_STRING(*retval, (char *) retchar, ZSTR_DUPLICATE); } else { - ZVAL_EMPTY_STRING(*retval); + ZVAL_EMPTY_TEXT(*retval); } break; case IS_BOOL: @@ -121,17 +121,17 @@ zend_object_handlers *std_hnd; int ret = FAILURE; - if (member->type != IS_STRING) { + if (member->type != IS_STRING && member->type != IS_UNICODE) { tmp_member = *member; zval_copy_ctor(&tmp_member); - convert_to_string(&tmp_member); + convert_to_text(&tmp_member); member = &tmp_member; } obj = (xmlreader_object *)zend_objects_get_address(object TSRMLS_CC); if (obj->prop_handler != NULL) { - ret = zend_hash_find(obj->prop_handler, Z_STRVAL_P(member), Z_STRLEN_P(member)+1, (void **) &hnd); + ret = zend_u_hash_find(obj->prop_handler, Z_TYPE_P(member), Z_UNIVAL_P(member), Z_UNILEN_P(member)+1, (void **) &hnd); } if (ret == FAILURE) { std_hnd = zend_get_std_object_handlers(); @@ -155,10 +155,10 @@ zend_object_handlers *std_hnd; int ret; - if (member->type != IS_STRING) { + if (member->type != IS_STRING && member->type != IS_UNICODE) { tmp_member = *member; zval_copy_ctor(&tmp_member); - convert_to_string(&tmp_member); + convert_to_text(&tmp_member); member = &tmp_member; } @@ -166,7 +166,7 @@ obj = (xmlreader_object *)zend_objects_get_address(object TSRMLS_CC); if (obj->prop_handler != NULL) { - ret = zend_hash_find(obj->prop_handler, Z_STRVAL_P(member), Z_STRLEN_P(member)+1, (void **) &hnd); + ret = zend_u_hash_find(obj->prop_handler, Z_TYPE_P(member), Z_UNIVAL_P(member), Z_UNILEN_P(member)+1, (void **) &hnd); } if (ret == SUCCESS) { ret = xmlreader_property_reader(obj, hnd, &retval TSRMLS_CC); @@ -197,10 +197,10 @@ zend_object_handlers *std_hnd; int ret; - if (member->type != IS_STRING) { + if (member->type != IS_STRING && member->type != IS_UNICODE) { tmp_member = *member; zval_copy_ctor(&tmp_member); - convert_to_string(&tmp_member); + convert_to_text(&tmp_member); member = &tmp_member; } @@ -208,7 +208,7 @@ obj = (xmlreader_object *)zend_objects_get_address(object TSRMLS_CC); if (obj->prop_handler != NULL) { - ret = zend_hash_find((HashTable *)obj->prop_handler, Z_STRVAL_P(member), Z_STRLEN_P(member)+1, (void **) &hnd); + ret = zend_u_hash_find(obj->prop_handler, Z_TYPE_P(member), Z_UNIVAL_P(member), Z_UNILEN_P(member)+1, (void **) &hnd); } if (ret == SUCCESS) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Cannot write to read-only property"); @@ -233,8 +233,8 @@ int isFileUri = 0; uri = xmlCreateURI(); - escsource = xmlURIEscapeStr(source, ":"); - xmlParseURIReference(uri, escsource); + escsource = xmlURIEscapeStr((xmlChar *) source, (xmlChar *) ":"); + xmlParseURIReference(uri, (char *)escsource); xmlFree(escsource); if (uri->scheme != NULL) { @@ -420,13 +420,14 @@ static void php_xmlreader_string_arg(INTERNAL_FUNCTION_PARAMETERS, xmlreader_read_one_char_t internal_function) { zval *id; int name_len = 0; - char *retchar = NULL; + xmlChar *retchar = NULL; xmlreader_object *intern; - char *name; + void *name; + zend_uchar type; - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &name, &name_len) == FAILURE) { - return; - } + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "t", &name, &name_len, &type) == FAILURE) { + return; + } if (!name_len) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Argument cannot be an empty string"); @@ -437,10 +438,29 @@ intern = (xmlreader_object *)zend_object_store_get_object(id TSRMLS_CC); if (intern && intern->ptr) { - retchar = internal_function(intern->ptr, name); + if (type == IS_UNICODE) { + char *utf8_name; + UErrorCode err = U_ZERO_ERROR; + int32_t u8TargetLength =0; + int32_t u8DestLen =0; + u8TargetLength = name_len * 3 + 1; + + utf8_name = emalloc (sizeof(char *) * u8TargetLength); + + u_strToUTF8(utf8_name,u8TargetLength, &u8DestLen, name, name_len,&err); + if (U_FAILURE(err)) { + efree(utf8_name); + php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Error converting from Unicode to UTF-8: %s", u_errorName(err)); + RETURN_FALSE; + } + retchar = internal_function(intern->ptr, (xmlChar *)utf8_name); + efree(utf8_name); + } else { + retchar = internal_function(intern->ptr, (xmlChar *)name); + } } if (retchar) { - RETVAL_STRING(retchar, 1); + RETVAL_XML_STRING((char *)retchar, ZSTR_DUPLICATE); xmlFree(retchar); return; } else { @@ -472,7 +492,7 @@ /* {{{ php_xmlreader_no_arg_string */ static void php_xmlreader_no_arg_string(INTERNAL_FUNCTION_PARAMETERS, xmlreader_read_char_t internal_function) { zval *id; - char *retchar = NULL; + xmlChar *retchar = NULL; xmlreader_object *intern; id = getThis(); @@ -482,7 +502,7 @@ retchar = internal_function(intern->ptr); } if (retchar) { - RETVAL_STRING(retchar, 1); + RETVAL_XML_STRING((char *)retchar, ZSTR_DUPLICATE); xmlFree(retchar); return; } else { @@ -498,9 +518,10 @@ int source_len = 0, retval = -1; xmlreader_object *intern; xmlRelaxNGPtr schema = NULL; - char *source; + void *source; + zend_uchar ctype; - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s!", &source, &source_len) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "t!", &source, &source_len, &ctype) == FAILURE) { return; } @@ -514,7 +535,27 @@ intern = (xmlreader_object *)zend_object_store_get_object(id TSRMLS_CC); if (intern && intern->ptr) { if (source) { - schema = _xmlreader_get_relaxNG(source, source_len, type, NULL, NULL TSRMLS_CC); + if (ctype == IS_UNICODE) { + char *utf8_name; + UErrorCode err = U_ZERO_ERROR; + int32_t u8TargetLength =0; + int32_t u8DestLen =0; + u8TargetLength = source_len * 3 + 1; + + utf8_name = emalloc (sizeof(char *) * u8TargetLength); + + u_strToUTF8(utf8_name,u8TargetLength, &u8DestLen, source, source_len,&err); + if (U_FAILURE(err)) { + efree(utf8_name); + php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Error converting from Unicode to UTF-8: %s", u_errorName(err)); + RETURN_FALSE; + } + schema = _xmlreader_get_relaxNG(utf8_name, u8DestLen, type, NULL, NULL TSRMLS_CC); + efree(utf8_name); + } else { + schema = _xmlreader_get_relaxNG(source, source_len, type, NULL, NULL TSRMLS_CC); + } + if (schema) { retval = xmlTextReaderRelaxNGSetSchema(intern->ptr, schema); } @@ -577,7 +618,7 @@ { zval *id; long attr_pos; - char *retchar = NULL; + xmlChar *retchar = NULL; xmlreader_object *intern; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "l", &attr_pos) == FAILURE) { @@ -591,11 +632,11 @@ retchar = xmlTextReaderGetAttributeNo(intern->ptr,attr_pos); } if (retchar) { - RETVAL_STRING(retchar, 1); + RETVAL_XML_STRING((char *)retchar, ZSTR_DUPLICATE); xmlFree(retchar); return; } else { - RETURN_EMPTY_STRING(); + RETURN_EMPTY_TEXT(); } } /* }}} */ @@ -607,12 +648,21 @@ zval *id; int name_len = 0, ns_uri_len = 0; xmlreader_object *intern; - char *name, *ns_uri, *retchar = NULL; + xmlChar *retchar = NULL; + void *name, *ns_uri; + zend_uchar name_type, ns_type; + UConverter *orig_runtime_conv; + + orig_runtime_conv = ZEND_U_CONVERTER(UG(runtime_encoding_conv)); + UG(runtime_encoding_conv) = UG(utf8_conv); - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss", &name, &name_len, &ns_uri, &ns_uri_len) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "TT", &name, &name_len, &name_type, &ns_uri, &ns_uri_len, &ns_type) == FAILURE) { + UG(runtime_encoding_conv) = orig_runtime_conv; return; } + UG(runtime_encoding_conv) = orig_runtime_conv; + if (name_len == 0 || ns_uri_len == 0) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Attribute Name and Namespace URI cannot be empty"); RETURN_FALSE; @@ -622,14 +672,44 @@ intern = (xmlreader_object *)zend_object_store_get_object(id TSRMLS_CC); if (intern && intern->ptr) { - retchar = xmlTextReaderGetAttributeNs(intern->ptr, name, ns_uri); + if (name_type == IS_UNICODE) { + char *utf8_buffer, *utf8_name, *utf8_ns; + UErrorCode err = U_ZERO_ERROR; + int32_t u8TargetLength =0; + int32_t u8DestLen =0; + /* create buffer large enough for all strings */ + u8TargetLength = (name_len + ns_uri_len) * 3 + 2; + + utf8_buffer = emalloc (sizeof(char *) * u8TargetLength); + utf8_name = utf8_buffer; + + u_strToUTF8(utf8_name, u8TargetLength, &u8DestLen, name, name_len, &err); + if (U_FAILURE(err)) { + efree(utf8_buffer); + php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Error converting from Unicode to UTF-8: %s", u_errorName(err)); + RETURN_FALSE; + } + + utf8_ns = &utf8_buffer[u8DestLen+1]; + u_strToUTF8(utf8_ns, u8TargetLength, &u8DestLen, ns_uri, ns_uri_len, &err); + if (U_FAILURE(err)) { + efree(utf8_buffer); + php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Error converting from Unicode to UTF-8: %s", u_errorName(err)); + RETURN_FALSE; + } + + retchar = xmlTextReaderGetAttributeNs(intern->ptr, (xmlChar *)utf8_name, (xmlChar *)utf8_ns); + efree(utf8_buffer); + } else { + retchar = xmlTextReaderGetAttributeNs(intern->ptr, (xmlChar *)name, (xmlChar *)ns_uri); + } } if (retchar) { - RETVAL_STRING(retchar, 1); + RETVAL_XML_STRING((char *)retchar, ZSTR_DUPLICATE); xmlFree(retchar); return; } else { - RETURN_EMPTY_STRING(); + RETURN_EMPTY_TEXT(); } } /* }}} */ @@ -687,9 +767,10 @@ zval *id; int name_len = 0, retval; xmlreader_object *intern; - char *name; + void *name; + zend_uchar type; - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &name, &name_len) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "t", &name, &name_len, &type) == FAILURE) { return; } @@ -702,7 +783,27 @@ intern = (xmlreader_object *)zend_object_store_get_object(id TSRMLS_CC); if (intern && intern->ptr) { - retval = xmlTextReaderMoveToAttribute(intern->ptr, name); + if (type == IS_UNICODE) { + char *utf8_name; + UErrorCode err = U_ZERO_ERROR; + int32_t u8TargetLength =0; + int32_t u8DestLen =0; + u8TargetLength = name_len * 3 + 1; + + utf8_name = emalloc (sizeof(char *) * u8TargetLength); + + u_strToUTF8(utf8_name,u8TargetLength, &u8DestLen, name, name_len,&err); + if (U_FAILURE(err)) { + efree(utf8_name); + php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Error converting from Unicode to UTF-8: %s", u_errorName(err)); + RETURN_FALSE; + } + retval = xmlTextReaderMoveToAttribute(intern->ptr, (xmlChar *)utf8_name); + efree(utf8_name); + } else { + retval = xmlTextReaderMoveToAttribute(intern->ptr, (xmlChar *)name); + } + if (retval == 1) { RETURN_TRUE; } @@ -748,12 +849,20 @@ zval *id; int name_len=0, ns_uri_len=0, retval; xmlreader_object *intern; - char *name, *ns_uri; + void *name, *ns_uri; + zend_uchar name_type, ns_type; + UConverter *orig_runtime_conv; - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss", &name, &name_len, &ns_uri, &ns_uri_len) == FAILURE) { + orig_runtime_conv = ZEND_U_CONVERTER(UG(runtime_encoding_conv)); + UG(runtime_encoding_conv) = UG(utf8_conv); + + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "TT", &name, &name_len, &name_type, &ns_uri, &ns_uri_len, &ns_type) == FAILURE) { + UG(runtime_encoding_conv) = orig_runtime_conv; return; } + UG(runtime_encoding_conv) = orig_runtime_conv; + if (name_len == 0 || ns_uri_len == 0) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Attribute Name and Namespace URI cannot be empty"); RETURN_FALSE; @@ -763,7 +872,37 @@ intern = (xmlreader_object *)zend_object_store_get_object(id TSRMLS_CC); if (intern && intern->ptr) { - retval = xmlTextReaderMoveToAttributeNs(intern->ptr, name, ns_uri); + if (name_type == IS_UNICODE) { + char *utf8_buffer, *utf8_name, *utf8_ns; + UErrorCode err = U_ZERO_ERROR; + int32_t u8TargetLength =0; + int32_t u8DestLen =0; + /* create buffer large enough for all strings */ + u8TargetLength = (name_len + ns_uri_len) * 3 + 2; + + utf8_buffer = emalloc (sizeof(char *) * u8TargetLength); + utf8_name = utf8_buffer; + + u_strToUTF8(utf8_name, u8TargetLength, &u8DestLen, name, name_len, &err); + if (U_FAILURE(err)) { + efree(utf8_buffer); + php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Error converting from Unicode to UTF-8: %s", u_errorName(err)); + RETURN_FALSE; + } + + utf8_ns = &utf8_buffer[u8DestLen+1]; + u_strToUTF8(utf8_ns, u8TargetLength, &u8DestLen, ns_uri, ns_uri_len, &err); + if (U_FAILURE(err)) { + efree(utf8_buffer); + php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Error converting from Unicode to UTF-8: %s", u_errorName(err)); + RETURN_FALSE; + } + + retval = xmlTextReaderMoveToAttributeNs(intern->ptr, (xmlChar *)utf8_name, (xmlChar *)utf8_ns); + efree(utf8_buffer); + } else { + retval = xmlTextReaderMoveToAttributeNs(intern->ptr, (xmlChar *)name, (xmlChar *)ns_uri); + } if (retval == 1) { RETURN_TRUE; } @@ -829,9 +968,10 @@ zval *id; int retval, name_len=0; xmlreader_object *intern; - char *name = NULL; + void *name = NULL; + zend_uchar name_type; - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|t", &name, &name_len, &name_type) == FAILURE) { return; } @@ -845,11 +985,35 @@ } else #endif retval = xmlTextReaderNext(intern->ptr); - while (name != NULL && retval == 1) { - if (xmlStrEqual(xmlTextReaderConstLocalName(intern->ptr), name)) { - RETURN_TRUE; + if (name != NULL && retval == 1) { + char *utf8_name; + + if (name_type == IS_UNICODE) { + UErrorCode err = U_ZERO_ERROR; + int32_t u8TargetLength =0; + int32_t u8DestLen =0; + u8TargetLength = name_len * 3 + 1; + + utf8_name = emalloc (sizeof(char *) * u8TargetLength); + + u_strToUTF8(utf8_name,u8TargetLength, &u8DestLen, name, name_len,&err); + if (U_FAILURE(err)) { + efree(utf8_name); + php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Error converting from Unicode to UTF-8: %s", u_errorName(err)); + RETURN_FALSE; + } + } else { + utf8_name = name; + } + while (retval == 1) { + if (xmlStrEqual(xmlTextReaderConstLocalName(intern->ptr), (xmlChar *)utf8_name)) { + RETURN_TRUE; + } + retval = xmlTextReaderNext(intern->ptr); + } + if (name_type == IS_UNICODE) { + efree(utf8_name); } - retval = xmlTextReaderNext(intern->ptr); } if (retval == -1) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "An Error Occured while reading"); @@ -872,15 +1036,24 @@ int source_len = 0, encoding_len = 0; long options = 0; xmlreader_object *intern = NULL; - char *source, *valid_file = NULL; - char *encoding = NULL; + char *utf8_buffer=NULL; + char *utf8_source, *utf8_encoding = NULL, *valid_file = NULL; + void *source, *encoding = NULL; char resolved_path[MAXPATHLEN + 1]; xmlTextReaderPtr reader = NULL; + zend_uchar source_type, encoding_type; + UConverter *orig_runtime_conv; + + orig_runtime_conv = ZEND_U_CONVERTER(UG(runtime_encoding_conv)); + UG(runtime_encoding_conv) = UG(utf8_conv); - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s!l", &source, &source_len, &encoding, &encoding_len, &options) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "T|T!l", &source, &source_len, &source_type, &encoding, &encoding_len, &encoding_type, &options) == FAILURE) { + UG(runtime_encoding_conv) = orig_runtime_conv; return; } + UG(runtime_encoding_conv) = orig_runtime_conv; + id = getThis(); if (id != NULL) { if (! instanceof_function(Z_OBJCE_P(id), xmlreader_class_entry TSRMLS_CC)) { @@ -896,10 +1069,45 @@ RETURN_FALSE; } - valid_file = _xmlreader_get_valid_file_path(source, resolved_path, MAXPATHLEN TSRMLS_CC); + if (source_type == IS_UNICODE) { + UErrorCode err = U_ZERO_ERROR; + int32_t u8TargetLength =0; + int32_t u8DestLen =0; + /* create buffer large enough for all strings */ + u8TargetLength = (source_len + encoding_len) * 3 + 2; + + utf8_buffer = emalloc (sizeof(char *) * u8TargetLength); + utf8_source = utf8_buffer; + + u_strToUTF8(utf8_source, u8TargetLength, &u8DestLen, source, source_len, &err); + if (U_FAILURE(err)) { + efree(utf8_buffer); + php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Error converting from Unicode to UTF-8: %s", u_errorName(err)); + RETURN_FALSE; + } + + if (encoding_len > 0) { + utf8_encoding = &utf8_buffer[u8DestLen+1]; + u_strToUTF8(utf8_encoding, u8TargetLength, &u8DestLen, encoding, encoding_len, &err); + if (U_FAILURE(err)) { + efree(utf8_buffer); + php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Error converting from Unicode to UTF-8: %s", u_errorName(err)); + RETURN_FALSE; + } + } + } else { + utf8_source = (char *)source; + utf8_encoding = (char *)encoding; + } + + valid_file = _xmlreader_get_valid_file_path(utf8_source, resolved_path, MAXPATHLEN TSRMLS_CC); if (valid_file) { - reader = xmlReaderForFile(valid_file, encoding, options); + reader = xmlReaderForFile(valid_file, utf8_encoding, options); + } + + if (utf8_buffer != NULL) { + efree(utf8_buffer); } if (reader == NULL) { @@ -961,9 +1169,10 @@ zval *id; int source_len = 0, retval = -1; xmlreader_object *intern; - char *source; + void *source; + zend_uchar type; - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s!", &source, &source_len) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "t!", &source, &source_len, &type) == FAILURE) { return; } @@ -976,8 +1185,26 @@ intern = (xmlreader_object *)zend_object_store_get_object(id TSRMLS_CC); if (intern && intern->ptr) { - retval = xmlTextReaderSchemaValidate(intern->ptr, source); - + if (type == IS_UNICODE) { + char *utf8_name; + UErrorCode err = U_ZERO_ERROR; + int32_t u8TargetLength =0; + int32_t u8DestLen =0; + u8TargetLength = source_len * 3 + 1; + + utf8_name = emalloc (sizeof(char *) * u8TargetLength); + + u_strToUTF8(utf8_name,u8TargetLength, &u8DestLen, source, source_len,&err); + if (U_FAILURE(err)) { + efree(utf8_name); + php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Error converting from Unicode to UTF-8: %s", u_errorName(err)); + RETURN_FALSE; + } + retval = xmlTextReaderSchemaValidate(intern->ptr, utf8_name); + efree(utf8_name); + } else { + retval = xmlTextReaderSchemaValidate(intern->ptr, source); + } if (retval == 0) { RETURN_TRUE; } @@ -1055,16 +1282,26 @@ int source_len = 0, encoding_len = 0; long options = 0; xmlreader_object *intern = NULL; - char *source, *uri = NULL, *encoding = NULL; int resolved_path_len; + char *utf8_buffer=NULL, *uri = NULL; + char *utf8_source, *utf8_encoding = NULL; + void *source, *encoding = NULL; char *directory=NULL, resolved_path[MAXPATHLEN]; xmlParserInputBufferPtr inputbfr; - xmlTextReaderPtr reader; + xmlTextReaderPtr reader = NULL; + zend_uchar source_type, encoding_type; + UConverter *orig_runtime_conv; + + orig_runtime_conv = ZEND_U_CONVERTER(UG(runtime_encoding_conv)); + UG(runtime_encoding_conv) = UG(utf8_conv); - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s!l", &source, &source_len, &encoding, &encoding_len, &options) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "T|T!l", &source, &source_len, &source_type, &encoding, &encoding_len, &encoding_type, &options) == FAILURE) { + UG(runtime_encoding_conv) = orig_runtime_conv; return; } + UG(runtime_encoding_conv) = orig_runtime_conv; + id = getThis(); if (id != NULL && ! instanceof_function(Z_OBJCE_P(id), xmlreader_class_entry TSRMLS_CC)) { id = NULL; @@ -1079,7 +1316,44 @@ RETURN_FALSE; } - inputbfr = xmlParserInputBufferCreateMem(source, source_len, XML_CHAR_ENCODING_NONE); + if (source_type == IS_UNICODE) { + UErrorCode err = U_ZERO_ERROR; + int32_t u8TargetLength =0; + int32_t u8DestLen =0; + /* create buffer large enough for all strings */ + u8TargetLength = (source_len + encoding_len) * 3 + 2; + + utf8_buffer = emalloc (sizeof(char *) * u8TargetLength); + utf8_source = utf8_buffer; + + u_strToUTF8(utf8_source, u8TargetLength, &u8DestLen, source, source_len, &err); + if (U_FAILURE(err)) { + efree(utf8_buffer); + php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Error converting from Unicode to UTF-8: %s", u_errorName(err)); + RETURN_FALSE; + } + + source_len = u8DestLen; + + if (encoding_len > 0) { + utf8_encoding = &utf8_buffer[u8DestLen+1]; + u_strToUTF8(utf8_encoding, u8TargetLength, &u8DestLen, encoding, encoding_len, &err); + if (U_FAILURE(err)) { + efree(utf8_buffer); + php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Error converting from Unicode to UTF-8: %s", u_errorName(err)); + RETURN_FALSE; + } + } + } else { + utf8_source = (char *)source; + utf8_encoding = (char *)encoding; + } + + inputbfr = xmlParserInputBufferCreateMem(utf8_source, source_len, XML_CHAR_ENCODING_NONE); + + if (utf8_buffer != NULL) { + efree(utf8_buffer); + } if (inputbfr != NULL) { /* Get the URI of the current script so that we can set the base directory in libxml */ @@ -1210,7 +1484,8 @@ ce.create_object = xmlreader_objects_new; xmlreader_class_entry = zend_register_internal_class(&ce TSRMLS_CC); - zend_hash_init(&xmlreader_prop_handlers, 0, NULL, NULL, 1); + zend_u_hash_init(&xmlreader_prop_handlers, 0, NULL, NULL, 1, (zend_bool)zend_ini_long("unicode.semantics", sizeof("unicode.semantics"), 1)); + xmlreader_register_prop_handler(&xmlreader_prop_handlers, "attributeCount", xmlTextReaderAttributeCount, NULL, IS_LONG TSRMLS_CC); xmlreader_register_prop_handler(&xmlreader_prop_handlers, "baseURI", NULL, xmlTextReaderConstBaseUri, IS_STRING TSRMLS_CC); xmlreader_register_prop_handler(&xmlreader_prop_handlers, "depth", xmlTextReaderDepth, NULL, IS_LONG TSRMLS_CC); Index: xmlreader/tests/012.phpt =================================================================== RCS file: /repository/php-src/ext/xmlreader/tests/012.phpt,v retrieving revision 1.1 diff -u -r1.1 012.phpt --- xmlreader/tests/012.phpt 30 Mar 2006 21:45:27 -0000 1.1 +++ xmlreader/tests/012.phpt 17 Jul 2006 17:55:49 -0000 @@ -67,3 +67,14 @@ string(0) "" string(0) "" ===DONE=== +--UEXPECT-- +unicode(0) "" +NULL +unicode(0) "" +unicode(0) "" +===FILE=== +unicode(0) "" +NULL +unicode(0) "" +unicode(0) "" +===DONE=== Index: xmlreader/tests/013.phpt =================================================================== RCS file: /repository/php-src/ext/xmlreader/tests/013.phpt,v retrieving revision 1.1 diff -u -r1.1 013.phpt --- xmlreader/tests/013.phpt 31 Mar 2006 20:50:29 -0000 1.1 +++ xmlreader/tests/013.phpt 17 Jul 2006 17:55:49 -0000 @@ -50,3 +50,10 @@ Warning: XMLReader::read(): Element 'foo': %s ===DONE=== +--UEXPECTF-- +unicode(3) "123" +unicode(3) "456" +===FAIL=== + +Warning: XMLReader::read(): Element 'foo': %s +===DONE===