summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDamien Miller <djm@mindrot.org>2016-12-12 13:57:10 +1100
committerDamien Miller <djm@mindrot.org>2016-12-12 13:58:59 +1100
commitdda78a03af32e7994f132d923c2046e98b7c56c8 (patch)
tree589e87350c833aada6358afc719b7e08f7b64b75
parentc35995048f41239fc8895aadc3374c5f75180554 (diff)
Force Turkish locales back to C/POSIX; bz#2643
Turkish locales are unique in their handling of the letters 'i' and 'I' (yes, they are different letters) and OpenSSH isn't remotely prepared to deal with that. For now, the best we can do is to force OpenSSH to use the C/POSIX locale and try to preserve the UTF-8 encoding if possible. ok dtucker@
-rw-r--r--scp.c2
-rw-r--r--sftp.c2
-rw-r--r--ssh.c3
-rw-r--r--utf8.c42
-rw-r--r--utf8.h1
5 files changed, 47 insertions, 3 deletions
diff --git a/scp.c b/scp.c
index c67cd71d..b4db8519 100644
--- a/scp.c
+++ b/scp.c
@@ -379,7 +379,7 @@ main(int argc, char **argv)
/* Ensure that fds 0, 1 and 2 are open or directed to /dev/null */
sanitise_stdfd();
- setlocale(LC_CTYPE, "");
+ msetlocale();
/* Copy argv, because we modify it */
newargv = xcalloc(MAXIMUM(argc + 1, 1), sizeof(*newargv));
diff --git a/sftp.c b/sftp.c
index af6e3a69..2b8fdabf 100644
--- a/sftp.c
+++ b/sftp.c
@@ -2272,7 +2272,7 @@ main(int argc, char **argv)
ssh_malloc_init(); /* must be called before any mallocs */
/* Ensure that fds 0, 1 and 2 are open or directed to /dev/null */
sanitise_stdfd();
- setlocale(LC_CTYPE, "");
+ msetlocale();
__progname = ssh_get_progname(argv[0]);
memset(&args, '\0', sizeof(args));
diff --git a/ssh.c b/ssh.c
index 8aa8daae..ee0b16dc 100644
--- a/ssh.c
+++ b/ssh.c
@@ -109,6 +109,7 @@
#include "version.h"
#include "ssherr.h"
#include "myproposal.h"
+#include "utf8.h"
#ifdef ENABLE_PKCS11
#include "ssh-pkcs11.h"
@@ -589,7 +590,7 @@ main(int ac, char **av)
*/
umask(022);
- setlocale(LC_CTYPE, "");
+ msetlocale();
/*
* Initialize option structure to indicate that no values have been
diff --git a/utf8.c b/utf8.c
index f563d373..87fa9e89 100644
--- a/utf8.c
+++ b/utf8.c
@@ -27,6 +27,7 @@
# include <langinfo.h>
#endif
#include <limits.h>
+#include <locale.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
@@ -288,3 +289,44 @@ mprintf(const char *fmt, ...)
va_end(ap);
return ret;
}
+
+/*
+ * Set up libc for multibyte output in the user's chosen locale.
+ *
+ * XXX: we are known to have problems with Turkish (i/I confusion) so we
+ * deliberately fall back to the C locale for now. Longer term we should
+ * always prefer to select C.[encoding] if possible, but there's no
+ * standardisation in locales between systems, so we'll need to survey
+ * what's out there first.
+ */
+void
+msetlocale(void)
+{
+ const char *vars[] = { "LC_ALL", "LC_CTYPE", "LANG", NULL };
+ char *cp;
+ int i;
+
+ /*
+ * We can't yet cope with dotless/dotted I in Turkish locales,
+ * so fall back to the C locale for these.
+ */
+ for (i = 0; vars[i] != NULL; i++) {
+ if ((cp = getenv(vars[i])) == NULL)
+ continue;
+ if (strncasecmp(cp, "TR", 2) != 0)
+ break;
+ /*
+ * If we're in a UTF-8 locale then prefer to use
+ * the C.UTF-8 locale (or equivalent) if it exists.
+ */
+ if ((strcasestr(cp, "UTF-8") != NULL ||
+ strcasestr(cp, "UTF8") != NULL) &&
+ (setlocale(LC_CTYPE, "C.UTF-8") != NULL ||
+ setlocale(LC_CTYPE, "POSIX.UTF-8") != NULL))
+ return;
+ setlocale(LC_CTYPE, "C");
+ return;
+ }
+ /* We can handle this locale */
+ setlocale(LC_CTYPE, "");
+}
diff --git a/utf8.h b/utf8.h
index 43ce1d55..88c5a34a 100644
--- a/utf8.h
+++ b/utf8.h
@@ -22,3 +22,4 @@ int fmprintf(FILE *, const char *, ...)
int vfmprintf(FILE *, const char *, va_list);
int snmprintf(char *, size_t, int *, const char *, ...)
__attribute__((format(printf, 4, 5)));
+void msetlocale(void);