List Info

Thread: Problems with file names in UTF-8 on Windows




Problems with file names in UTF-8 on Windows
user name
2006-09-01 08:16:56
 >> Probably, an alternative version of patch will be
more acceptable.
 >> [...]
 >
 > Roland's patch has been applied to CVS. So any
further patch should
 > be against CVs head i.e. the version with Roland's
code changes. This
 > would also help checking what you are really changing,
please resubmit.
 > [...]
 >
 > Daniel
 >
Here is patch for latest version in CVS.

Patched library was compiled with MSVC 5.0, MSVC 7.1, Mingw
(gcc 3.4.2),
Borland C++ 5.5 and was tested under Win 98, Win 2000 SP4,
Win XP SP2.

Changes:
1. Platform specific code is moved to separate functions
(opening
    of files and status information retrieving).
2. As xmlInitParser() should be called in multithreaded
programs
    before use of any other library functions, detection of
    platform and appropriate API features is carried out in
    xmlRegisterDefaultInputCallbacks() and
    xmlRegisterDefaultOutputCallbacks().
    Thus there is no need to complicate a code,
    protecting it with the mutex.
    After initialization two static function pointers are
    used only for reading, therefore problems with
    multithreading will not arise too.
3. Unnecessary increase of string buffer length is removed
in
    function __xmlIOWin32UTF8ToWChar().
4. The dynamic loading of msvcrt.dll is not used at all.
    The functions _wstat()/_wfopen() exists even in
    msvcrt20.dll (Win 95 distribution).
5. Functions xmlMalloc()/xmlFree() are used instead of
malloc()/free().
6. In a code the stat() calls are used. If there is _stat()
    function in C library, appropriate macros is defined.
7. Code is changed for Windows only.

--- xmlIO.orig.c	2006-09-01 09:58:48.000000000 +0400
+++ xmlIO.c	2006-09-01 12:27:58.000000000 +0400
 -50,6
+50,10 
 #    endif
 #    define HAVE_STAT
 #  endif
+#else
+#  ifdef HAVE__STAT
+#      define stat _stat
+#  endif
 #endif
 #ifdef HAVE_STAT
 #  ifndef S_ISDIR
 -193,7
+197,7 
     "unknown address familly",	/* EAFNOSUPPORT
*/
 };
 
-#if defined(WIN32) || defined (__DJGPP__) &&
!defined (__CYGWIN__)
+#if defined(_WIN32) || defined (__DJGPP__) &&
!defined (__CYGWIN__)
 /**
  * __xmlIOWin32UTF8ToWChar:
  * u8String:  uft-8 string
 -210,12
+214,12 
 		int wLen =
MultiByteToWideChar(CP_UTF8,MB_ERR_INVALID_CHARS,u8String,-1
,NULL,0);
 		if (wLen)
 		{
-			wString = malloc((wLen+1) * sizeof(wchar_t));
+			wString = xmlMalloc(wLen * sizeof(wchar_t));
 			if (wString)
 			{
-				if
(MultiByteToWideChar(CP_UTF8,MB_ERR_INVALID_CHARS,u8String,-
1,wString,wLen+1) == 0)
+				if
(MultiByteToWideChar(CP_UTF8,0,u8String,-1,wString,wLen) ==
0)
 				{
-					free(wString);
+					xmlFree(wString);
 					wString = NULL;
 				}
 			}
 -224,73
+228,6 
 	
 	return wString;
 }
-
-/**
- * __xmlIOWin32GetWcharFunc:
- * name:  name of function
- *
- * returns function pointer to certain wide character
functions
- * contained in msvcrt.dll on Windows NT or better. 
- * There is no (really working) support for it on
win95/98/Me
- * but to retain compatibility on ascii basis the
capabilities
- * of the os are depicted during runtime (see use of this
function in this file)
- */
-static void *
-__xmlIOWin32GetWcharFunc(const char *name)
-{
-	void *function = NULL;
-	static HANDLE msvcrt = INVALID_HANDLE_VALUE;
-	static HANDLE winMutex = INVALID_HANDLE_VALUE;
-
-	// create Mutex if not already there
-	if (winMutex == INVALID_HANDLE_VALUE)
-	{
-		winMutex = CreateMutexA(NULL, FALSE,
"__xmlIOWin32GetWcharFunc mutex");
-		if (!winMutex)
-			return NULL;
-	}
-
-	// Be atomic
-	if (WaitForSingleObject(winMutex, INFINITE) ==
WAIT_OBJECT_0)
-	{
-		if (msvcrt == INVALID_HANDLE_VALUE)
-		{
-			msvcrt = NULL; // ensure to enter this code just once
-			OSVERSIONINFOEX osvi;
-	 
-	    	ZeroMemory(&osvi,sizeof(OSVERSIONINFOEX));
-	   	osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFOEX);
-	
-			// Get Operatingsystemversion. If something is wrong
here, the system
-			// is heavily damaged. Refuse to deliver pointers in
this case.
-	   	if (!GetVersionEx((OSVERSIONINFO *)&osvi))
-	   	{
-	      	osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
-	      	if (!GetVersionEx((OSVERSIONINFO *)&osvi))
-	      	{
-	      		ReleaseMutex(winMutex);
-	         	return NULL;
-	         }
-	   	}
-	
-			// Only continue on NT or better
-			if (osvi.dwPlatformId == VER_PLATFORM_WIN32_NT)
-			{
-				unsigned int oldErrorMode =
SetErrorMode(SEM_FAILCRITICALERRORS|SEM_NOOPENFILEERRORBOX);
-		
-				msvcrt = LoadLibraryA("msvcrt.dll");
-				SetErrorMode(oldErrorMode);
-			}
-		}
-	
-		if (msvcrt && name)
-			function = (void *)GetProcAddress(msvcrt,name);
-			
-		ReleaseMutex(winMutex);
-	}
-	
-	return function;
-}
 #endif
 
 /**
 -639,6
+576,130 
  *									*
 
************************************************************
************/
 
+#if defined(_WIN32) || defined (__DJGPP__) &&
!defined (__CYGWIN__)
+
+/**
+ *  xmlWrapOpenUtf8:
+ * path:  the path in utf-8 encoding
+ * mode:  type of access (0 - read, 1 - write)
+ *
+ * function opens the file specified by path
+ *
+ */
+static FILE*
+xmlWrapOpenUtf8(const char *path,int mode)
+{
+    FILE *fd = NULL;
+    wchar_t *wPath;
+
+    wPath = __xmlIOWin32UTF8ToWChar(path);
+    if(wPath)
+    {
+       fd = _wfopen(wPath, mode ? L"wb" :
L"rb");
+       xmlFree(wPath);
+    }
+    // maybe path in native encoding
+    if(fd == NULL)
+       fd = fopen(path, mode ? "wb" :
"rb");
+
+    return fd;
+}
+
+/**
+ *  xmlWrapStatUtf8:
+ * path:  the path in utf-8 encoding
+ * info:  structure that stores results
+ *
+ * function obtains information about the file or directory
+ *
+ */
+static int
+xmlWrapStatUtf8(const char *path,struct stat *info)
+{
+#ifdef HAVE_STAT
+    int retval = -1;
+    wchar_t *wPath;
+
+    wPath = __xmlIOWin32UTF8ToWChar(path);
+    if (wPath)
+    {
+       retval = _wstat(wPath,info);
+       xmlFree(wPath);
+    }
+    // maybe path in native encoding
+    if(retval < 0)
+       retval = stat(path,info);
+    return retval;
+#else
+    return -1;
+#endif
+}
+
+/**
+ *  xmlWrapOpenNative:
+ * path:  the path
+ * mode:  type of access (0 - read, 1 - write)
+ *
+ * function opens the file specified by path
+ *
+ */
+static FILE*
+xmlWrapOpenNative(const char *path,int mode)
+{
+    return fopen(path,mode ? "wb" :
"rb");
+}
+
+/**
+ *  xmlWrapStatNative:
+ * path:  the path
+ * info:  structure that stores results
+ *
+ * function obtains information about the file or directory
+ *
+ */
+static int
+xmlWrapStatNative(const char *path,struct stat *info)
+{
+#ifdef HAVE_STAT
+    return stat(path,info);
+#else
+    return -1;
+#endif
+}
+
+static int   (* xmlWrapStat)(const char *,struct stat *) =
xmlWrapStatNative;
+static FILE* (* xmlWrapOpen)(const char *,int mode)      =
xmlWrapOpenNative;
+
+/**
+ * xmlInitPlatformSpecificIo:
+ *
+ * Initialize platform specific features.
+ */
+static void
+xmlInitPlatformSpecificIo
+(void) {
+    static int xmlPlatformIoInitialized = 0;
+    OSVERSIONINFO osvi;
+
+    if(xmlPlatformIoInitialized)
+      return;
+
+    osvi.dwOSVersionInfoSize = sizeof(osvi);
+
+    if(GetVersionEx(&osvi) &&
(osvi.dwPlatformId == VER_PLATFORM_WIN32_NT)) {
+      xmlWrapStat = xmlWrapStatUtf8;
+      xmlWrapOpen = xmlWrapOpenUtf8;
+    } else {
+      xmlWrapStat = xmlWrapStatNative;
+      xmlWrapOpen = xmlWrapOpenNative;
+    }
+
+    xmlPlatformIoInitialized = 1;
+    return;
+}
+
+#endif
+
 /**
  * xmlCheckFilename:
  * path:  the path to check
 -656,70
+717,25 
 int
 xmlCheckFilename (const char *path)
 {
-#if defined(HAVE_STAT) && !defined(WIN32)
+#ifdef HAVE_STAT
 	struct stat stat_buffer;
 #endif
 	if (path == NULL)
 		return(0);
-  
-#if defined(WIN32) || defined (__DJGPP__) &&
!defined (__CYGWIN__)
-	{
-		int retval = 0;
-
-		// One-time autodetect presence of _wstat. Not available
for Win9x
-		static int (*winwstat)(const wchar_t *path,struct _stat
*buffer) = INVALID_HANDLE_VALUE;
-
-		if (winwstat == INVALID_HANDLE_VALUE)
-			winwstat = (int (*)(const wchar_t *,struct _stat
*))__xmlIOWin32GetWcharFunc("_wstat");
 
-		// Try utf-8 path first on systems capable
-		if (winwstat)
-		{
-			wchar_t *wPath = __xmlIOWin32UTF8ToWChar(path);
-			if (wPath)
-			{
-				struct _stat stat_buffer;
-				
-				if (winwstat(wPath,&stat_buffer) == 0)
-				{
-					retval = 1;
-					
-					if (((stat_buffer.st_mode & S_IFDIR) == S_IFDIR))
-						retval = 2;
-				}
-		
-				free(wPath);
-			}
-		}
-
-		// Fallback: Path in utf-8 representation not present or
win9x		
-		if ((winwstat == NULL) || (retval == 0))
-		{
-				struct _stat stat_buffer;
-				
-				if (_stat(path,&stat_buffer) == 0)
-				{
-					retval = 1;
-					
-					if (((stat_buffer.st_mode & S_IFDIR) == S_IFDIR))
-						retval = 2;
-				}
-		}
-		
-		return retval;
-	}
-#else
 #ifdef HAVE_STAT
+#if defined(_WIN32) || defined (__DJGPP__) &&
!defined (__CYGWIN__)
+    if (xmlWrapStat(path, &stat_buffer) == -1)
+        return 0;
+#else
     if (stat(path, &stat_buffer) == -1)
         return 0;
-
+#endif
 #ifdef S_ISDIR
     if (S_ISDIR(stat_buffer.st_mode))
         return 2;
-#endif /* S_ISDIR */
+#endif
 #endif /* HAVE_STAT */
-#endif /* WIN32 */
-
     return 1;
 }
 
 -811,7
+827,7 
 static void *
 xmlFileOpen_real (const char *filename) {
     const char *path = NULL;
-    FILE *fd = NULL;
+    FILE *fd;
 
     if (filename == NULL)
         return(NULL);
 -841,30
+857,12 
     if (!xmlCheckFilename(path))
         return(NULL);
 
-#if defined(WIN32) || defined (__DJGPP__) &&
!defined (__CYGWIN__)
-	{
-		// One-time autodetect presence of _wfopen. Not available
for Win9x
-		static FILE *(*winwfopen)(const wchar_t *path,const
wchar_t *mode) = INVALID_HANDLE_VALUE;
-
-		if (winwfopen == INVALID_HANDLE_VALUE)
-			winwfopen = (FILE *(*)(const wchar_t *,const wchar_t
*))__xmlIOWin32GetWcharFunc("_wfopen");
-
-		// Try to open file unicode path safe. If not available
or win9x fall thru to non-unicode safe fopen()
-		if (winwfopen)
-		{
-			wchar_t *wPath = __xmlIOWin32UTF8ToWChar(path);
-			if (wPath)
-			{
-				fd = winwfopen(wPath, L"rb");
-				free(wPath);
-	   	}
-   	}
-	}	
+#if defined(_WIN32) || defined (__DJGPP__) &&
!defined (__CYGWIN__)
+    fd = xmlWrapOpen(path, 0);
+#else
+    fd = fopen(path, "r");
 #endif /* WIN32 */
-
-	if (fd == NULL)
-    	fd = fopen(path, "r");
-   if (fd == NULL) xmlIOErr(0, path);
+    if (fd == NULL) xmlIOErr(0, path);
     return((void *) fd);
 }
 
 -905,7
+903,7 
 static void *
 xmlFileOpenW (const char *filename) {
     const char *path = NULL;
-    FILE *fd = NULL;
+    FILE *fd;
 
     if (!strcmp(filename, "-")) {
 	fd = stdout;
 -930,29
+928,11 
     if (path == NULL)
 	return(NULL);
 
-#if defined(WIN32) || defined (__DJGPP__) &&
!defined (__CYGWIN__)
-	{
-		// One-time autodetect presence of _wfopen. Not available
for Win9x
-		static FILE *(*winwfopen)(const wchar_t *path,const
wchar_t *mode) = INVALID_HANDLE_VALUE;
-
-		if (winwfopen == INVALID_HANDLE_VALUE)
-			winwfopen = (FILE *(*)(const wchar_t *,const wchar_t
*))__xmlIOWin32GetWcharFunc("_wfopen");
-
-		// Try to open file unicode path safe. If not available
or win9x fall thru to non-unicode safe fopen()
-		if (winwfopen)
-		{
-			wchar_t *wPath = __xmlIOWin32UTF8ToWChar(path);
-			if (wPath)
-			{
-				fd = winwfopen(wPath, L"wb");
-				free(wPath);
-	   	}
-	   }
-	}
-#endif /* WIN32 */
-
-	if (fd == NULL)
+#if defined(_WIN32) || defined (__DJGPP__) &&
!defined (__CYGWIN__)
+    fd = xmlWrapOpen(path, 1);
+#else
   	   fd = fopen(path, "wb");
+#endif /* WIN32 */
 
 	 if (fd == NULL) xmlIOErr(0, path);
     return((void *) fd);
 -2128,6
+2108,10 
     if (xmlInputCallbackInitialized)
 	return;
 
+#if defined(_WIN32) || defined (__DJGPP__) &&
!defined (__CYGWIN__)
+    xmlInitPlatformSpecificIo();
+#endif
+
     xmlRegisterInputCallbacks(xmlFileMatch, xmlFileOpen,
 	                      xmlFileRead, xmlFileClose);
 #ifdef HAVE_ZLIB_H
 -2159,6
+2143,10 
     if (xmlOutputCallbackInitialized)
 	return;
 
+#if defined(_WIN32) || defined (__DJGPP__) &&
!defined (__CYGWIN__)
+    xmlInitPlatformSpecificIo();
+#endif
+
     xmlRegisterOutputCallbacks(xmlFileMatch, xmlFileOpenW,
 	                      xmlFileWrite, xmlFileClose);
 
_______________________________________________
xml mailing list, project page  http://xmlsoft.org/
xmlgnome.org
http://mai
l.gnome.org/mailman/listinfo/xml
Problems with file names in UTF-8 on Windows
user name
2006-09-01 09:59:32
On Fri, Sep 01, 2006 at 12:16:56PM +0400, Emelyanov Alexey
wrote:
> >> Probably, an alternative version of patch will
be more acceptable.
> >> [...]
> >
> > Roland's patch has been applied to CVS. So any
further patch should
> > be against CVs head i.e. the version with
Roland's code changes. This
> > would also help checking what you are really
changing, please resubmit.
> > [...]
> >
> > Daniel
> >
> Here is patch for latest version in CVS.
> 
> Patched library was compiled with MSVC 5.0, MSVC 7.1,
Mingw (gcc 3.4.2),
> Borland C++ 5.5 and was tested under Win 98, Win 2000
SP4, Win XP SP2.
> 
> Changes:
> 1. Platform specific code is moved to separate
functions (opening
>    of files and status information retrieving).
> 2. As xmlInitParser() should be called in multithreaded
programs
>    before use of any other library functions, detection
of
>    platform and appropriate API features is carried out
in
>    xmlRegisterDefaultInputCallbacks() and
>    xmlRegisterDefaultOutputCallbacks().
>    Thus there is no need to complicate a code,
>    protecting it with the mutex.
>    After initialization two static function pointers
are
>    used only for reading, therefore problems with
>    multithreading will not arise too.
> 3. Unnecessary increase of string buffer length is
removed in
>    function __xmlIOWin32UTF8ToWChar().
> 4. The dynamic loading of msvcrt.dll is not used at
all.
>    The functions _wstat()/_wfopen() exists even in
>    msvcrt20.dll (Win 95 distribution).
> 5. Functions xmlMalloc()/xmlFree() are used instead of
malloc()/free().
> 6. In a code the stat() calls are used. If there is
_stat()
>    function in C library, appropriate macros is
defined.
> 7. Code is changed for Windows only.

  This looks sensible, I particulary appreciate the large
set of compilers
and platform that you report. Eric and others should still
check it again
just in case but that sounds good ! Applied and commited to
CVS,

   thanks a lot !

Daniel

-- 
Red Hat Virtualization group http://redhat.com/v
irtualization/
Daniel Veillard      | virtualization library  http://libvirt.org/
veillardredhat.com  | libxml GNOME XML XSLT toolkit  http://xmlsoft.org/
http://veillard.com/ |
Rpmfind RPM search engine  http://rpmfind.net/
_______________________________________________
xml mailing list, project page  http://xmlsoft.org/
xmlgnome.org
http://mai
l.gnome.org/mailman/listinfo/xml
[1-2]

about | contact  Other archives ( Real Estate discussion Medical topics )