aboutsummaryrefslogtreecommitdiffstats
path: root/lib/libc/locale
diff options
context:
space:
mode:
authorsvn2git <svn2git@FreeBSD.org>1994-07-01 00:00:00 -0800
committersvn2git <svn2git@FreeBSD.org>1994-07-01 00:00:00 -0800
commit5e0e9b99dc3fc0ecd49d929db0d57c784b66f481 (patch)
treee779b5a6edddbb949b7990751b12d6f25304ba86 /lib/libc/locale
parenta16f65c7d117419bd266c28a1901ef129a337569 (diff)
downloadsrc-releng/1.tar.gz
src-releng/1.zip
Release FreeBSD 1.1.5.1release/1.1.5.1_cvsreleng/1
This commit was manufactured to restore the state of the 1.1.5.1-RELEASE image. Releases prior to 5.3-RELEASE are omitting the secure/ and crypto/ subdirs.
Diffstat (limited to 'lib/libc/locale')
-rw-r--r--lib/libc/locale/Makefile.inc18
-rw-r--r--lib/libc/locale/ansi.c148
-rw-r--r--lib/libc/locale/euc.4231
-rw-r--r--lib/libc/locale/euc.c220
-rw-r--r--lib/libc/locale/frune.c103
-rw-r--r--lib/libc/locale/isctype.c170
-rw-r--r--lib/libc/locale/mbrune.3157
-rw-r--r--lib/libc/locale/mbrune.c112
-rw-r--r--lib/libc/locale/multibyte.3241
-rw-r--r--lib/libc/locale/none.c93
-rw-r--r--lib/libc/locale/rune.3269
-rw-r--r--lib/libc/locale/rune.c334
-rw-r--r--lib/libc/locale/setlocale.3321
-rw-r--r--lib/libc/locale/setlocale.c198
-rw-r--r--lib/libc/locale/table.c160
-rw-r--r--lib/libc/locale/utf2.487
-rw-r--r--lib/libc/locale/utf2.c148
17 files changed, 2997 insertions, 13 deletions
diff --git a/lib/libc/locale/Makefile.inc b/lib/libc/locale/Makefile.inc
index 855608475d59..aa9d8bb14b56 100644
--- a/lib/libc/locale/Makefile.inc
+++ b/lib/libc/locale/Makefile.inc
@@ -1,6 +1,20 @@
-# @(#)Makefile.inc 5.1 (Berkeley) 2/18/91
+# From: @(#)Makefile.inc 5.1 (Berkeley) 2/18/91
+# $Id: Makefile.inc,v 1.5 1994/04/13 20:12:15 wollman Exp $
# locale sources
.PATH: ${.CURDIR}/${MACHINE}/locale ${.CURDIR}/locale
-SRCS+= lconv.c localeconv.c setlocale.c
+SRCS+= ansi.c frune.c mbrune.c lconv.c localeconv.c rune.c setlocale.c \
+ table.c isctype.c \
+ euc.c none.c utf2.c
+MAN3+= locale/mbrune.3 locale/multibyte.3 locale/rune.3 locale/setlocale.3
+MAN4+= locale/euc.4 locale/utf2.4
+
+MLINKS+= mbrune.3 mbrrune.3 mbrune.3 mbmb.3
+MLINKS+= multibyte.3 mblen.3 multibyte.3 mbstowcs.3 \
+ multibyte.3 mbtowc.3 multibyte.3 wcstombs.3 \
+ multibyte.3 wctomb.3
+MLINKS+= rune.3 setrunelocale.3 rune.3 setinvalidrune.3 \
+ rune.3 sgetrune.3 rune.3 sputrune.3
+MLINKS+= setlocale.3 localeconv.3
+MLINKS+= utf2.4 utf8.4
diff --git a/lib/libc/locale/ansi.c b/lib/libc/locale/ansi.c
new file mode 100644
index 000000000000..e5c8e8ff96de
--- /dev/null
+++ b/lib/libc/locale/ansi.c
@@ -0,0 +1,148 @@
+/*-
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Paul Borman at Krystal Technologies.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)ansi.c 8.1 (Berkeley) 6/27/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include <stdlib.h>
+#include <limits.h>
+#include <stddef.h>
+#include <rune.h>
+
+int
+mblen(s, n)
+ const char *s;
+ size_t n;
+{
+ char const *e;
+
+ if (s == 0 || *s == 0)
+ return (0); /* No support for state dependent encodings. */
+
+ if (sgetrune(s, (int)n, &e) == _INVALID_RUNE)
+ return (s - e);
+ return (e - s);
+}
+
+int
+mbtowc(pwc, s, n)
+ wchar_t *pwc;
+ const char *s;
+ size_t n;
+{
+ char const *e;
+ rune_t r;
+
+ if (s == 0 || *s == 0)
+ return (0); /* No support for state dependent encodings. */
+
+ if ((r = sgetrune(s, (int)n, &e)) == _INVALID_RUNE)
+ return (s - e);
+ if (pwc)
+ *pwc = r;
+ return (e - s);
+}
+
+int
+wctomb(s, wchar)
+ char *s;
+ wchar_t wchar;
+{
+ char *e;
+
+ if (s == 0)
+ return (0); /* No support for state dependent encodings. */
+
+ if (wchar == 0) {
+ *s = 0;
+ return (1);
+ }
+
+ sputrune(wchar, s, MB_CUR_MAX, &e);
+ return (e ? e - s : -1);
+}
+
+size_t
+mbstowcs(pwcs, s, n)
+ wchar_t *pwcs;
+ const char *s;
+ size_t n;
+{
+ char const *e;
+ int cnt = 0;
+
+ if (!pwcs || !s)
+ return (-1);
+
+ while (n-- > 0) {
+ *pwcs = sgetrune(s, MB_LEN_MAX, &e);
+ if (*pwcs == _INVALID_RUNE)
+ return (-1);
+ if (*pwcs++ == 0)
+ break;
+ s = e;
+ ++cnt;
+ }
+ return (cnt);
+}
+
+size_t
+wcstombs(s, pwcs, n)
+ char *s;
+ const wchar_t *pwcs;
+ size_t n;
+{
+ char *e;
+ int cnt = 0;
+
+ if (!pwcs || !s)
+ return (-1);
+
+ while (n > 0) {
+ if (*pwcs == 0) {
+ *s = 0;
+ break;
+ }
+ if (!sputrune(*pwcs++, s, (int)n, &e))
+ return (-1); /* encoding error */
+ if (!e) /* too long */
+ return (cnt);
+ cnt += e - s;
+ s = e;
+ }
+ return (cnt);
+}
diff --git a/lib/libc/locale/euc.4 b/lib/libc/locale/euc.4
new file mode 100644
index 000000000000..966b896b657f
--- /dev/null
+++ b/lib/libc/locale/euc.4
@@ -0,0 +1,231 @@
+.\" Copyright (c) 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" This code is derived from software contributed to Berkeley by
+.\" Paul Borman at Krystal Technologies.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)euc.4 8.1 (Berkeley) 6/4/93
+.\"
+.Dd "June 4, 1993"
+.Dt EUC 4
+.Os
+.Sh NAME
+.Nm EUC
+.Nd EUC encoding of runes
+.Sh SYNOPSIS
+\fBENCODING "EUC"\fP
+.br
+\fBVARIABLE \fP\fIlen1 mask1 len2 mask2 len3 mask3 len4 mask4 mask\fP
+.Sh DESCRIPTION
+The
+.Nm EUC
+encoding is provided for compatibility with
+.Ux
+based systems.
+See
+.Xr mklocale 1
+for a complete description of the
+.Ev LC_CTYPE
+source file format.
+.Pp
+.Nm EUC
+implements a system of 4 multibyte codesets.
+A multibyte character in the first codeset consists of
+.Ar len1
+bytes starting with a byte in the range of 0x00 to 0x7f.
+To allow use of ASCII,
+.Ar len1
+is always 1.
+A multibyte character in the second codeset consists of
+.Ar len2
+bytes starting with a byte in the range of 0x80-0xff excluding 0x8e and 0x8f.
+A multibyte character in the third codeset consists of
+.Ar len3
+bytes starting with the byte 0x8e.
+A multibyte character in the fourth codeset consists of
+.Ar len4
+bytes starting with the byte 0x8f.
+.Pp
+The
+.Ev rune_t
+encoding of
+.Nm EUC
+multibyte characters is dependent on the
+.Ar len
+and
+.Ar mask
+arguments.
+First, the bytes are moved into a
+.Ev rune_t
+as follows:
+.Bd -literal
+byte0 << ((\fIlen\fPN-1) * 8) | byte1 << ((\fIlen\fPN-2) * 8) | ... | byte\fIlen\fPN-1
+.Ed
+.sp
+The result is then ANDed with
+.Ar ~mask
+and ORed with
+.Ar mask\fPN.
+Codesets 2 and 3 are special in that the leading byte (0x8e or 0x8f) is
+first removed and the
+.Ar len\fPN
+argument is reduced by 1.
+.sp
+For example, the Japanese locale has the following
+.Ev VARIABLE
+line:
+.Bd -literal
+VARIABLE 1 0x0000 2 0x8080 2 0x0080 3 0x8000 0x8080
+.Ed
+.sp
+Codeset 1 consists of the values 0x0000 - 0x007f.
+.sp
+Codeset 2 consists of the values who have the bits 0x8080 set.
+.sp
+Codeset 3 consists of the values 0x0080 - 0x00ff.
+.sp
+Codeset 4 consists of the values 0x8000 - 0xff7f excluding the values
+which have the 0x0080 bit set.
+.sp
+Notice that the global
+.Ar mask
+is set to 0x8080, this implies that from those 2 bits the codeset can
+be determined.
+.Sh "EXAMPLE - Japanese Locale"
+This is a complete example of an
+.Ev LC_CTYPE
+source file for the Japanese locale
+.Bd -literal
+/*
+ * Japanese LOCALE_CTYPE definitions using EUC of JIS character sets
+ */
+
+ENCODING "EUC"
+
+/* JIS JIS JIS */
+/* X201 X208 X201 */
+/* 00-7f 84-fe */
+
+VARIABLE 1 0x0000 2 0x8080 2 0x0080 3 0x8000 0x8080
+
+/*
+ * Code Set 1
+ */
+ALPHA 'A' - 'Z' 'a' - 'z'
+CONTROL 0x00 - 0x1f 0x7f
+DIGIT '0' - '9'
+GRAPH 0x21 - 0x7e
+LOWER 'a' - 'z'
+PUNCT 0x21 - 0x2f 0x3a - 0x40 0x5b - 0x60 0x7b - 0x7e
+SPACE 0x09 - 0x0d 0x20
+UPPER 'A' - 'Z'
+XDIGIT 'a' - 'f' 'A' - 'F'
+BLANK ' ' '\t'
+PRINT 0x20 - 0x7e
+
+MAPLOWER < 'A' - 'Z' : 'a' > < 'a' - 'z' : 'a' >
+MAPUPPER < 'A' - 'Z' : 'A' > < 'a' - 'z' : 'A' >
+TODIGIT < '0' - '9' : 0 >
+TODIGIT < 'A' - 'F' : 10 > < 'a' - 'f' : 10 >
+
+/*
+ * Code Set 2
+ */
+
+SPACE 0xa1a1
+PHONOGRAM 0xa1bc
+SPECIAL 0xa1a2 - 0xa1fe
+PUNCT 0xa1a2 - 0xa1f8 /* A few too many in here... */
+
+SPECIAL 0xa2a1 - 0xa2ae 0xa2ba - 0xa2c1 0xa2ca - 0xa2d0 0xa2dc - 0xa2ea
+SPECIAL 0xa2f2 - 0xa2f9 0xa2fe
+
+DIGIT 0xa3b0 - 0xa3b9
+UPPER 0xa3c1 - 0xa3da /* Romaji */
+LOWER 0xa3e1 - 0xa3fa /* Romaji */
+MAPLOWER < 0xa3c1 - 0xa3da : 0xa3e1 > /* English */
+MAPLOWER < 0xa3e1 - 0xa3fa : 0xa3e1 > /* English */
+MAPUPPER < 0xa3c1 - 0xa3da : 0xa3c1 >
+MAPUPPER < 0xa3e1 - 0xa3fa : 0xa3c1 >
+
+XDIGIT 0xa3c1 - 0xa3c6 0xa3e1 - 0xa3e6
+
+TODIGIT < 0xa3b0 - 0xa3b9 : 0 >
+TODIGIT < 0xa3c1 - 0xa3c6 : 10 > < 0xa3e1 - 0xa3e6 : 10 >
+
+PHONOGRAM 0xa4a1 - 0xa4f3
+PHONOGRAM 0xa5a1 - 0xa5f6
+
+UPPER 0xa6a1 - 0xa6b8 /* Greek */
+LOWER 0xa6c1 - 0xa6d8 /* Greek */
+MAPLOWER < 0xa6a1 - 0xa6b8 : 0xa6c1 > < 0xa6c1 - 0xa6d8 : 0xa6c1 >
+MAPUPPER < 0xa6a1 - 0xa6b8 : 0xa6a1 > < 0xa6c1 - 0xa6d8 : 0xa6a1 >
+
+UPPER 0xa7a1 - 0xa7c1 /* Cyrillic */
+LOWER 0xa7d1 - 0xa7f1 /* Cyrillic */
+MAPLOWER < 0xa7a1 - 0xa7c1 : 0xa7d1 > < 0xa7d1 - 0xa7f1 : 0xa7d1 >
+MAPUPPER < 0xa7a1 - 0xa7c1 : 0xa7a1 > < 0xa7d1 - 0xa7f1 : 0xa7a1 >
+
+SPECIAL 0xa8a1 - 0xa8c0
+
+IDEOGRAM 0xb0a1 - 0xb0fe 0xb1a1 - 0xb1fe 0xb2a1 - 0xb2fe
+IDEOGRAM 0xb3a1 - 0xb3fe 0xb4a1 - 0xb4fe 0xb5a1 - 0xb5fe
+IDEOGRAM 0xb6a1 - 0xb6fe 0xb7a1 - 0xb7fe 0xb8a1 - 0xb8fe
+IDEOGRAM 0xb9a1 - 0xb9fe 0xbaa1 - 0xbafe 0xbba1 - 0xbbfe
+IDEOGRAM 0xbca1 - 0xbcfe 0xbda1 - 0xbdfe 0xbea1 - 0xbefe
+IDEOGRAM 0xbfa1 - 0xbffe 0xc0a1 - 0xc0fe 0xc1a1 - 0xc1fe
+IDEOGRAM 0xc2a1 - 0xc2fe 0xc3a1 - 0xc3fe 0xc4a1 - 0xc4fe
+IDEOGRAM 0xc5a1 - 0xc5fe 0xc6a1 - 0xc6fe 0xc7a1 - 0xc7fe
+IDEOGRAM 0xc8a1 - 0xc8fe 0xc9a1 - 0xc9fe 0xcaa1 - 0xcafe
+IDEOGRAM 0xcba1 - 0xcbfe 0xcca1 - 0xccfe 0xcda1 - 0xcdfe
+IDEOGRAM 0xcea1 - 0xcefe 0xcfa1 - 0xcfd3 0xd0a1 - 0xd0fe
+IDEOGRAM 0xd1a1 - 0xd1fe 0xd2a1 - 0xd2fe 0xd3a1 - 0xd3fe
+IDEOGRAM 0xd4a1 - 0xd4fe 0xd5a1 - 0xd5fe 0xd6a1 - 0xd6fe
+IDEOGRAM 0xd7a1 - 0xd7fe 0xd8a1 - 0xd8fe 0xd9a1 - 0xd9fe
+IDEOGRAM 0xdaa1 - 0xdafe 0xdba1 - 0xdbfe 0xdca1 - 0xdcfe
+IDEOGRAM 0xdda1 - 0xddfe 0xdea1 - 0xdefe 0xdfa1 - 0xdffe
+IDEOGRAM 0xe0a1 - 0xe0fe 0xe1a1 - 0xe1fe 0xe2a1 - 0xe2fe
+IDEOGRAM 0xe3a1 - 0xe3fe 0xe4a1 - 0xe4fe 0xe5a1 - 0xe5fe
+IDEOGRAM 0xe6a1 - 0xe6fe 0xe7a1 - 0xe7fe 0xe8a1 - 0xe8fe
+IDEOGRAM 0xe9a1 - 0xe9fe 0xeaa1 - 0xeafe 0xeba1 - 0xebfe
+IDEOGRAM 0xeca1 - 0xecfe 0xeda1 - 0xedfe 0xeea1 - 0xeefe
+IDEOGRAM 0xefa1 - 0xeffe 0xf0a1 - 0xf0fe 0xf1a1 - 0xf1fe
+IDEOGRAM 0xf2a1 - 0xf2fe 0xf3a1 - 0xf3fe 0xf4a1 - 0xf4a4
+/*
+ * This is for Code Set 3, half-width kana
+ */
+SPECIAL 0xa1 - 0xdf
+PHONOGRAM 0xa1 - 0xdf
+CONTROL 0x84 - 0x97 0x9b - 0x9f 0xe0 - 0xfe
+.Ed
+.Sh "SEE ALSO"
+.Xr mklocale 1 ,
+.Xr setlocale 3
diff --git a/lib/libc/locale/euc.c b/lib/libc/locale/euc.c
new file mode 100644
index 000000000000..e58c8556087a
--- /dev/null
+++ b/lib/libc/locale/euc.c
@@ -0,0 +1,220 @@
+/*-
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Paul Borman at Krystal Technologies.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)euc.c 8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/types.h>
+
+#include <errno.h>
+#include <rune.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+rune_t _EUC_sgetrune __P((const char *, size_t, char const **));
+int _EUC_sputrune __P((rune_t, char *, size_t, char **));
+
+typedef struct {
+ int count[4];
+ rune_t bits[4];
+ rune_t mask;
+} _EucInfo;
+
+int
+_EUC_init(rl)
+ _RuneLocale *rl;
+{
+ _EucInfo *ei;
+ int x;
+ char *v, *e;
+
+ rl->sgetrune = _EUC_sgetrune;
+ rl->sputrune = _EUC_sputrune;
+
+ if (!rl->variable) {
+ free(rl);
+ return (EFTYPE);
+ }
+ v = (char *) rl->variable;
+
+ while (*v == ' ' || *v == '\t')
+ ++v;
+
+ if ((ei = malloc(sizeof(_EucInfo))) == NULL) {
+ free(rl);
+ return (ENOMEM);
+ }
+ for (x = 0; x < 4; ++x) {
+ ei->count[x] = (int) strtol(v, &e, 0);
+ if (v == e || !(v = e)) {
+ free(rl);
+ free(ei);
+ return (EFTYPE);
+ }
+ while (*v == ' ' || *v == '\t')
+ ++v;
+ ei->bits[x] = (int) strtol(v, &e, 0);
+ if (v == e || !(v = e)) {
+ free(rl);
+ free(ei);
+ return (EFTYPE);
+ }
+ while (*v == ' ' || *v == '\t')
+ ++v;
+ }
+ ei->mask = (int)strtol(v, &e, 0);
+ if (v == e || !(v = e)) {
+ free(rl);
+ free(ei);
+ return (EFTYPE);
+ }
+ if (sizeof(_EucInfo) <= rl->variable_len) {
+ memcpy(rl->variable, ei, sizeof(_EucInfo));
+ free(ei);
+ } else {
+ rl->variable = &ei;
+ }
+ rl->variable_len = sizeof(_EucInfo);
+ _CurrentRuneLocale = rl;
+ __mb_cur_max = 3;
+ return (0);
+}
+
+#define CEI ((_EucInfo *)(_CurrentRuneLocale->variable))
+
+#define _SS2 0x008e
+#define _SS3 0x008f
+
+static inline int
+_euc_set(c)
+ u_int c;
+{
+ c &= 0xff;
+
+ return ((c & 0x80) ? c == _SS3 ? 3 : c == _SS2 ? 2 : 1 : 0);
+}
+rune_t
+_EUC_sgetrune(string, n, result)
+ const char *string;
+ size_t n;
+ char const **result;
+{
+ rune_t rune = 0;
+ int len, set;
+
+ if (n < 1 || (len = CEI->count[set = _euc_set(*string)]) > n) {
+ if (result)
+ *result = string;
+ return (_INVALID_RUNE);
+ }
+ switch (set) {
+ case 3:
+ case 2:
+ --len;
+ ++string;
+ /* FALLTHROUGH */
+ case 1:
+ case 0:
+ while (len-- > 0)
+ rune = (rune << 8) | ((u_int)(*string++) & 0xff);
+ break;
+ }
+ if (result)
+ *result = string;
+ return ((rune & ~CEI->mask) | CEI->bits[set]);
+}
+
+int
+_EUC_sputrune(c, string, n, result)
+ rune_t c;
+ char *string, **result;
+ size_t n;
+{
+ rune_t m = c & CEI->mask;
+ rune_t nm = c & ~m;
+ int i, len;
+
+ if (m == CEI->bits[1]) {
+CodeSet1:
+ /* Codeset 1: The first byte must have 0x80 in it. */
+ i = len = CEI->count[1];
+ if (n >= len) {
+ if (result)
+ *result = string + len;
+ while (i-- > 0)
+ *string++ = (nm >> (i << 3)) | 0x80;
+ } else
+ if (result)
+ *result = (char *) 0;
+ } else {
+ if (m == CEI->bits[0]) {
+ i = len = CEI->count[0];
+ if (n < len) {
+ if (result)
+ *result = NULL;
+ return (len);
+ }
+ } else
+ if (m == CEI->bits[2]) {
+ i = len = CEI->count[2];
+ if (n < len) {
+ if (result)
+ *result = NULL;
+ return (len);
+ }
+ *string++ = _SS2;
+ --i;
+ } else
+ if (m == CEI->bits[3]) {
+ i = len = CEI->count[3];
+ if (n < len) {
+ if (result)
+ *result = NULL;
+ return (len);
+ }
+ *string++ = _SS3;
+ --i;
+ } else
+ goto CodeSet1; /* Bletch */
+ while (i-- > 0)
+ *string++ = (nm >> (i << 3)) & 0xff;
+ if (result)
+ *result = string;
+ }
+ return (len);
+}
diff --git a/lib/libc/locale/frune.c b/lib/libc/locale/frune.c
new file mode 100644
index 000000000000..443d3ba6eb36
--- /dev/null
+++ b/lib/libc/locale/frune.c
@@ -0,0 +1,103 @@
+/*-
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Paul Borman at Krystal Technologies.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)frune.c 8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include <limits.h>
+#include <rune.h>
+#include <stddef.h>
+#include <stdio.h>
+
+long
+fgetrune(fp)
+ FILE *fp;
+{
+ rune_t r;
+ int c, len;
+ char buf[MB_LEN_MAX];
+ char const *result;
+
+ len = 0;
+ do {
+ if ((c = getc(fp)) == EOF) {
+ if (len)
+ break;
+ return (EOF);
+ }
+ buf[len++] = c;
+
+ if ((r = sgetrune(buf, len, &result)) != _INVALID_RUNE)
+ return (r);
+ } while (result == buf && len < MB_LEN_MAX);
+
+ while (--len > 0)
+ ungetc(buf[len], fp);
+ return (_INVALID_RUNE);
+}
+
+int
+fungetrune(r, fp)
+ rune_t r;
+ FILE* fp;
+{
+ int len;
+ char buf[MB_LEN_MAX];
+
+ len = sputrune(r, buf, MB_LEN_MAX, 0);
+ while (len-- > 0)
+ if (ungetc(buf[len], fp) == EOF)
+ return (EOF);
+ return (0);
+}
+
+int
+fputrune(r, fp)
+ rune_t r;
+ FILE *fp;
+{
+ int i, len;
+ char buf[MB_LEN_MAX];
+
+ len = sputrune(r, buf, MB_LEN_MAX, 0);
+
+ for (i = 0; i < len; ++i)
+ if (putc(buf[i], fp) == EOF)
+ return (EOF);
+
+ return (0);
+}
diff --git a/lib/libc/locale/isctype.c b/lib/libc/locale/isctype.c
new file mode 100644
index 000000000000..06cb1254ce68
--- /dev/null
+++ b/lib/libc/locale/isctype.c
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Paul Borman at Krystal Technologies.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)isctype.c 8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#define _ANSI_LIBRARY
+#include <ctype.h>
+
+#undef isalnum
+int
+isalnum(c)
+ int c;
+{
+ return(__istype((c), (_A|_D)));
+}
+
+#undef isalpha
+int
+isalpha(c)
+ int c;
+{
+ return (__istype((c), _A));
+}
+
+#undef isascii
+int
+isascii(c)
+ int c;
+{
+ return((c & ~0x7F) == 0);
+}
+
+#undef isblank
+int
+isblank(c)
+ int c;
+{
+ return (__istype((c), _B));
+}
+
+#undef iscntrl
+int
+iscntrl(c)
+ int c;
+{
+ return (__istype((c), _C));
+}
+
+#undef isdigit
+int
+isdigit(c)
+ int c;
+{
+ return (__isctype((c), _D));
+}
+
+#undef isgraph
+int
+isgraph(c)
+ int c;
+{
+ return (__istype((c), _G));
+}
+
+#undef islower
+int
+islower(c)
+ int c;
+{
+ return (__istype((c), _L));
+}
+
+#undef isprint
+int
+isprint(c)
+ int c;
+{
+ return (__istype((c), _R));
+}
+
+#undef ispunct
+int
+ispunct(c)
+ int c;
+{
+ return (__istype((c), _P));
+}
+
+#undef isspace
+int
+isspace(c)
+ int c;
+{
+ return (__istype((c), _S));
+}
+
+#undef isupper
+int
+isupper(c)
+ int c;
+{
+ return (__istype((c), _U));
+}
+
+#undef isxdigit
+int
+isxdigit(c)
+ int c;
+{
+ return (__isctype((c), _X));
+}
+
+#undef toascii
+int
+toascii(c)
+ int c;
+{
+ return (c & 0177);
+}
+
+#undef toupper
+int
+toupper(c)
+ int c;
+{
+ return((c & _CRMASK) ? ___toupper(c) : _CurrentRuneLocale->mapupper[c]);
+}
+
+#undef tolower
+int
+tolower(c)
+ int c;
+{
+ return((c & _CRMASK) ? ___tolower(c) : _CurrentRuneLocale->maplower[c]);
+}
diff --git a/lib/libc/locale/mbrune.3 b/lib/libc/locale/mbrune.3
new file mode 100644
index 000000000000..83e9dfe938d2
--- /dev/null
+++ b/lib/libc/locale/mbrune.3
@@ -0,0 +1,157 @@
+.\" Copyright (c) 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" This code is derived from software contributed to Berkeley by
+.\" Paul Borman at Krystal Technologies.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)mbrune.3 8.1 (Berkeley) 6/4/93
+.\"
+.Dd "June 4, 1993"
+.Dt MBRUNE 3
+.Os
+.Sh NAME
+.Nm mbrune ,
+.Nm mbrrune ,
+.Nm mbmb
+.Nd multibyte rune support for C
+.Sh SYNOPSIS
+.Fd #include <rune.h>
+.Ft char *
+.Fn mbrune "const char *string" "rune_t rune"
+.Ft char *
+.Fn mbrrune "const char *string" "rune_t rune"
+.Ft char *
+.Fn mbmb "const char *string" "char *pattern"
+.Sh DESCRIPTION
+These routines provide the corresponding functionality of
+.Fn strchr ,
+.Fn strrchr
+and
+.Fn strstr
+for multibyte strings.
+.Pp
+The
+.Fn mbrune
+function locates the first occurrence of
+.Fn rune
+in the string pointed to by
+.Ar string .
+The terminating
+.Dv NULL
+character is considered part of the string.
+If
+.Fa rune
+is
+.Ql \e0 ,
+.Fn mbrune
+locates the terminating
+.Ql \e0 .
+.Pp
+The
+.Fn mbrrune
+function
+locates the last occurrence of
+.Fa rune
+in the string
+.Fa string .
+If
+.Fa rune
+is
+.Ql \e0 ,
+.Fn mbrune
+locates the terminating
+.Ql \e0 .
+.Pp
+The
+.Fn mbmb
+function locates the first occurrence of the null-terminated string
+.Fa pattern
+in the null-terminated string
+.Fa string.
+If
+.Fa pattern
+is the empty string,
+.Fn mbmb
+returns
+.Fa string ;
+if
+.Fa pattern
+occurs nowhere in
+.Fa string ,
+.Fn mbmb
+returns
+.Dv NULL ;
+otherwise
+.Fn mbmb
+returns a pointer to the first character of the first occurrence of
+.Fa pattern .
+.Sh RETURN VALUES
+The function
+.Fn mbrune
+returns a pointer to the located character, or
+.Dv NULL
+if the character does not appear in the string.
+.Pp
+The
+.Fn mbrrune
+function
+returns a pointer to the character, or
+.Dv NULL
+if the character does not appear in the string.
+.Pp
+The
+.Fn mbmb
+function
+returns a pointer to the
+.Fa pattern ,
+or
+.Dv NULL
+if the
+.Fa pattern
+does not appear in the string.
+.Sh "SEE ALSO
+.Xr euc 4 ,
+.Xr mbrune 3 ,
+.Xr rune 3 ,
+.Xr setlocale 3 ,
+.Xr utf2 4
+.Sh HISTORY
+The
+.Fn mbrune ,
+.Fn mbrrune ,
+and
+.Fn mbmb
+functions
+first appeared in Plan 9 from Bell Labs as
+.Fn utfrune ,
+.Fn utfrrune ,
+and
+.Fn utfutf .
diff --git a/lib/libc/locale/mbrune.c b/lib/libc/locale/mbrune.c
new file mode 100644
index 000000000000..92efe838a036
--- /dev/null
+++ b/lib/libc/locale/mbrune.c
@@ -0,0 +1,112 @@
+/*-
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Paul Borman at Krystal Technologies.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)mbrune.c 8.1 (Berkeley) 6/27/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include <limits.h>
+#include <rune.h>
+#include <stddef.h>
+#include <string.h>
+
+char *
+mbrune(string, c)
+ const char *string;
+ rune_t c;
+{
+ char const *result;
+ rune_t r;
+
+ while ((r = sgetrune(string, MB_LEN_MAX, &result))) {
+ if (r == c)
+ return ((char *)string);
+ string = result == string ? string + 1 : result;
+ }
+
+ return (c == *string ? (char *)string : NULL);
+}
+
+char *
+mbrrune(string, c)
+ const char *string;
+ rune_t c;
+{
+ const char *last = 0;
+ char const *result;
+ rune_t r;
+
+ while ((r = sgetrune(string, MB_LEN_MAX, &result))) {
+ if (r == c)
+ last = string;
+ string = result == string ? string + 1 : result;
+ }
+ return (c == *string ? (char *)string : (char *)last);
+}
+
+char *
+mbmb(string, pattern)
+ const char *string;
+ char *pattern;
+{
+ rune_t first, r;
+ size_t plen, slen;
+ char const *result;
+
+ plen = strlen(pattern);
+ slen = strlen(string);
+ if (plen > slen)
+ return (0);
+
+ first = sgetrune(pattern, plen, &result);
+ if (result == string)
+ return (0);
+
+ while (slen >= plen && (r = sgetrune(string, slen, &result))) {
+ if (r == first) {
+ if (strncmp(string, pattern, slen) == 0)
+ return ((char *) string);
+ }
+ if (result == string) {
+ --slen;
+ ++string;
+ } else {
+ slen -= result - string;
+ string = result;
+ }
+ }
+ return (0);
+}
diff --git a/lib/libc/locale/multibyte.3 b/lib/libc/locale/multibyte.3
new file mode 100644
index 000000000000..3ea10e5ace3d
--- /dev/null
+++ b/lib/libc/locale/multibyte.3
@@ -0,0 +1,241 @@
+.\" Copyright (c) 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" This code is derived from software contributed to Berkeley by
+.\" Donn Seeley of BSDI.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)multibyte.3 8.1 (Berkeley) 6/4/93
+.\"
+.Dd "June 4, 1993"
+.Dt MULTIBYTE 3
+.Os
+.Sh NAME
+.Nm mblen ,
+.Nm mbstowcs ,
+.Nm mbtowc ,
+.Nm wcstombs ,
+.Nm wctomb
+.Nd multibyte character support for C
+.Sh SYNOPSIS
+.Fd #include <stdlib.h>
+.Ft int
+.Fn mblen "const char *mbchar" "int nbytes"
+.Ft size_t
+.Fn mbstowcs "wchar_t *wcstring" "const char *mbstring" "size_t nwchars"
+.Ft int
+.Fn mbtowc "wchar_t *wcharp" "const char *mbchar" "size_t nbytes"
+.Ft size_t
+.Fn wcstombs "char *mbstring" "const wchar_t *wcstring" "size_t nbytes"
+.Ft int
+.Fn wctomb "char *mbchar" "wchar_t wchar"
+.Sh DESCRIPTION
+The basic elements of some written natural languages such as Chinese
+cannot be represented uniquely with single C
+.Va char Ns s .
+The C standard supports two different ways of dealing with
+extended natural language encodings,
+.Em wide
+characters and
+.Em multibyte
+characters.
+Wide characters are an internal representation
+which allows each basic element to map
+to a single object of type
+.Va wchar_t .
+Multibyte characters are used for input and output
+and code each basic element as a sequence of C
+.Va char Ns s .
+Individual basic elements may map into one or more
+.Pq up to Dv MB_CHAR_MAX
+bytes in a multibyte character.
+.Pp
+The current locale
+.Pq Xr setlocale 3
+governs the interpretation of wide and multibyte characters.
+The locale category
+.Dv LC_CTYPE
+specifically controls this interpretation.
+The
+.Va wchar_t
+type is wide enough to hold the largest value
+in the wide character representations for all locales.
+.Pp
+Multibyte strings may contain
+.Sq shift
+indicators to switch to and from
+particular modes within the given representation.
+If explicit bytes are used to signal shifting,
+these are not recognized as separate characters
+but are lumped with a neighboring character.
+There is always a distinguished
+.Sq initial
+shift state.
+The
+.Fn mbstowcs
+and
+.Fn wcstombs
+functions assume that multibyte strings are interpreted
+starting from the initial shift state.
+The
+.Fn mblen ,
+.Fn mbtowc
+and
+.Fn wctomb
+functions maintain static shift state internally.
+A call with a null
+.Fa mbchar
+pointer returns nonzero if the current locale requires shift states,
+zero otherwise;
+if shift states are required, the shift state is reset to the initial state.
+The internal shift states are undefined after a call to
+.Fn setlocale
+with the
+.Dv LC_CTYPE
+or
+.Dv LC_ALL
+categories.
+.Pp
+For convenience in processing,
+the wide character with value 0
+.Pq the null wide character
+is recognized as the wide character string terminator,
+and the character with value 0
+.Pq the null byte
+is recognized as the multibyte character string terminator.
+Null bytes are not permitted within multibyte characters.
+.Pp
+The
+.Fn mblen
+function computes the length in bytes
+of a multibyte character
+.Fa mbchar .
+Up to
+.Fa nbytes
+bytes are examined.
+.Pp
+The
+.Fn mbtowc
+function converts a multibyte character
+.Fa mbchar
+into a wide character and stores the result
+in the object pointed to by
+.Fa wcharp.
+Up to
+.Fa nbytes
+bytes are examined.
+.Pp
+The
+.Fn wctomb
+function converts a wide character
+.Fa wchar
+into a multibyte character and stores
+the result in
+.Fa mbchar .
+The object pointed to by
+.Fa mbchar
+must be large enough to accommodate the multibyte character.
+.Pp
+The
+.Fn mbstowcs
+function converts a multibyte character string
+.Fa mbstring
+into a wide character string
+.Fa wcstring .
+No more than
+.Fa nwchars
+wide characters are stored.
+A terminating null wide character is appended if there is room.
+.Pp
+The
+.Fn wcstombs
+function converts a wide character string
+.Fa wcstring
+into a multibyte character string
+.Fa mbstring .
+Up to
+.Fa nbytes
+bytes are stored in
+.Fa mbstring .
+Partial multibyte characters at the end of the string are not stored.
+The multibyte character string is null terminated if there is room.
+.Sh "RETURN VALUES
+If multibyte characters are not supported in the current locale,
+all of these functions will return \-1 if characters can be processed,
+otherwise 0.
+.Pp
+If
+.Fa mbchar
+is
+.Dv NULL ,
+the
+.Fn mblen ,
+.Fn mbtowc
+and
+.Fn wctomb
+functions return nonzero if shift states are supported,
+zero otherwise.
+If
+.Fa mbchar
+is valid,
+then these functions return
+the number of bytes processed in
+.Fa mbchar ,
+or \-1 if no multibyte character
+could be recognized or converted.
+.Pp
+The
+.Fn mbstowcs
+function returns the number of wide characters converted,
+not counting any terminating null wide character.
+The
+.Fn wcstombs
+function returns the number of bytes converted,
+not counting any terminating null byte.
+If any invalid multibyte characters are encountered,
+both functions return \-1.
+.Sh "SEE ALSO
+.Xr euc 4 ,
+.Xr mbrune 3 ,
+.Xr rune 3 ,
+.Xr setlocale 3 ,
+.Xr utf2 4
+.Sh STANDARDS
+The
+.Fn mblen ,
+.Fn mbstowcs ,
+.Fn mbtowc ,
+.Fn wcstombs
+and
+.Fn wctomb
+functions conform to
+.St -ansiC .
+.Sh BUGS
+The current implementation does not support shift states.
diff --git a/lib/libc/locale/none.c b/lib/libc/locale/none.c
new file mode 100644
index 000000000000..b5d8e44299cd
--- /dev/null
+++ b/lib/libc/locale/none.c
@@ -0,0 +1,93 @@
+/*-
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Paul Borman at Krystal Technologies.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)none.c 8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include <stddef.h>
+#include <stdio.h>
+#include <rune.h>
+#include <errno.h>
+#include <stdlib.h>
+
+rune_t _none_sgetrune __P((const char *, size_t, char const **));
+int _none_sputrune __P((rune_t, char *, size_t, char **));
+
+int
+_none_init(rl)
+ _RuneLocale *rl;
+{
+ rl->sgetrune = _none_sgetrune;
+ rl->sputrune = _none_sputrune;
+ _CurrentRuneLocale = rl;
+ __mb_cur_max = 1;
+ return(0);
+}
+
+rune_t
+_none_sgetrune(string, n, result)
+ const char *string;
+ size_t n;
+ char const **result;
+{
+ int c;
+
+ if (n < 1) {
+ if (result)
+ *result = string;
+ return(_INVALID_RUNE);
+ }
+ if (result)
+ *result = string + 1;
+ return(*string & 0xff);
+}
+
+int
+_none_sputrune(c, string, n, result)
+ rune_t c;
+ char *string, **result;
+ size_t n;
+{
+ if (n >= 1) {
+ if (string)
+ *string = c;
+ if (result)
+ *result = string + 1;
+ } else if (result)
+ *result = (char *)0;
+ return(1);
+}
diff --git a/lib/libc/locale/rune.3 b/lib/libc/locale/rune.3
new file mode 100644
index 000000000000..8d2ca5dd4b5e
--- /dev/null
+++ b/lib/libc/locale/rune.3
@@ -0,0 +1,269 @@
+.\" Copyright (c) 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" This code is derived from software contributed to Berkeley by
+.\" Paul Borman at Krystal Technologies.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)rune.3 8.1 (Berkeley) 6/27/93
+.\"
+.Dd "June 27, 1993"
+.Dt RUNE 3
+.Os
+.Sh NAME
+.Nm setrunelocale ,
+.Nm setinvalidrune ,
+.Nm sgetrune ,
+.Nm sputrune
+.Nd rune support for C
+.Sh SYNOPSIS
+.Fd #include <rune.h>
+.Fd #include <errno.h>
+.Ft int
+.Fn setrunelocale "char *locale"
+.Ft void
+.Fn setinvalidrune "rune_t rune"
+.Ft rune_t
+.Fn sgetrune "const char *string" "size_t n" "char const **result"
+.Ft int
+.Fn sputrune "rune_t rune" "char *string" "size_t n" "char **result"
+.sp
+.Fd #include <stdio.h>
+.Ft long
+.Fn fgetrune "FILE *stream"
+.Ft int
+.Fn fungetrune "rune_t rune" "FILE *stream"
+.Ft int
+.Fn fputrune "rune_t rune" "FILE *stream"
+.Sh DESCRIPTION
+The
+.Fn setrunelocale
+controls the type of encoding used to represent runes as multibyte strings
+as well as the properties of the runes as defined in
+\fB<ctype.h>\fP.
+The
+.Fa locale
+argument indicates the locale which to load.
+If the locale is successfully loaded,
+.Dv 0
+is returned, otherwise an errno value is returned to indicate the
+type of error.
+.Pp
+The
+.Fn setinvalidrune
+function sets the value of the global value
+.Ev _INVALID_RUNE
+to be
+.Fa rune.
+.Pp
+The
+.Fn sgetrune
+function tries to read a single multibyte character from
+.Fa string ,
+which is at most
+.Fa n
+bytes long.
+If
+.Fn sgetrune
+is successful, the rune is returned.
+If
+.Fa result
+is not
+.Dv NULL ,
+.Fa *result
+will point to the first byte which was not converted in
+.Fa string.
+If the first
+.Fa n
+bytes of
+.Fa string
+do not describe a full multibyte character,
+.Ev _INVALID_RUNE
+is returned and
+.Fa *result
+will point to
+.Fa string.
+If there is an encoding error at the start of
+.Fa string ,
+.Ev _INVALID_RUNE
+is returned and
+.Fa *result
+will point to the second character of
+.Fa string.
+.Pp
+the
+.Fn sputrune
+function tries to encode
+.Fa rune
+as a multibyte string and store it at
+.Fa string ,
+but no more than
+.Fa n
+bytes will be stored.
+If
+.Fa result
+is not
+.Dv NULL ,
+.Fa *result
+will be set to point to the first byte in string following the new
+multibyte character.
+If
+.Fa string
+is
+.Dv NULL ,
+.Fa *result
+will point to
+.Dv "(char *)0 +"
+.Fa x ,
+where
+.Fa x
+is the number of bytes that would be needed to store the multibyte value.
+If the multibyte character would consist of more than
+.Fa n
+bytes and
+.Fa result
+is not
+.Dv NULL ,
+.Fa *result
+will be set to
+.Dv NULL.
+In all cases,
+.Fn sputrune
+will return the number of bytes which would be needed to store
+.Fa rune
+as a multibyte character.
+.Pp
+The
+.Fn fgetrune
+function operates the same as
+.Fn sgetrune
+with the exception that it attempts to read enough bytes from
+.Fa stream
+to decode a single rune. It returns either
+.Ev EOF
+on end of file,
+.Ev _INVALID_RUNE
+on an encoding error, or the rune decoded if all went well.
+.Pp
+The
+.Fn fungetrune
+function function pushes the multibyte encoding, as provided by
+.Fn sputrune ,
+of
+.Fa rune
+onto
+.Fa stream
+such that the next
+.Fn fgetrune
+call will return
+.Fa rune .
+It returns
+.Ev EOF
+if it fails and
+.Dv 0
+on success.
+.Pp
+The
+.Fn fputrune
+function writes the multibyte encoding of
+.Fa rune ,
+as provided by
+.Fn sputrune ,
+onto
+.Fa stream .
+It returns
+.Ev EOF
+on failure and
+.Dv 0
+on success.
+.Sh RETURN VALUES
+The
+.Fn setrunelocale
+function returns one of the following values:
+.Bl -tag -width WWWWWWWW
+.It Dv 0
+.Fa setrunelocale was successful.
+.It Ev EFAULT
+.Fa locale
+was
+.Dv NULL .
+.It Ev ENOENT
+The locale could not be found.
+.It Ev EFTYPE
+The file found was not a valid file.
+.It Ev EINVAL
+The encoding indicated by the locale was unknown.
+.El
+.Pp
+The
+.Fn sgetrune
+function either returns the rune read or
+.Ev _INVALID_RUNE .
+The
+.Fn sputrune
+function returns the number of bytes needed to store
+.Fa rune
+as a multibyte string.
+.Sh FILES
+.Bl -tag -width /usr/share/locale/locale/LC_CTYPE -compact
+.It Pa $PATH_LOCALE/\fIlocale\fP/LC_CTYPE
+.It Pa /usr/share/locale/\fIlocale\fP/LC_CTYPE
+binary LC_CTYPE file for the locale \fIlocale\fP.
+.El
+.Sh "SEE ALSO
+.Xr euc 4 ,
+.Xr mbrune 3 ,
+.Xr setlocale 3 ,
+.Xr utf2 4
+.Sh NOTE
+The ANSI C type
+.Ev wchar_t
+is the same as
+.Ev rune_t .
+.Ev Rune_t
+was chosen to accent the purposeful choice of not basing the
+system with the ANSI C
+primitives, which were, shall we say, less aesthetic.
+.Sh HISTORY
+These functions first appeared in
+.Bx 4.4 .
+.Pp
+The
+.Fn setrunelocale
+function and the other non-ANSI rune functions were inspired by
+.Nm Plan 9 from Bell Labs
+as a much more sane alternative to the ANSI multibyte and
+wide character support.
+.\"They were conceived at the San Diego 1993 Summer USENIX conference by
+.\"Paul Borman of Krystal Technologies, Keith Bostic of CSRG and Andrew Hume
+.\"of Bell Labs.
+.Pp
+All of the ANSI multibyte and wide character
+support functions are built using the rune functions.
diff --git a/lib/libc/locale/rune.c b/lib/libc/locale/rune.c
new file mode 100644
index 000000000000..b239484fac81
--- /dev/null
+++ b/lib/libc/locale/rune.c
@@ -0,0 +1,334 @@
+/*-
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Paul Borman at Krystal Technologies.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)rune.c 8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
+#include <rune.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+extern int _none_init __P((_RuneLocale *));
+extern int _UTF2_init __P((_RuneLocale *));
+extern int _EUC_init __P((_RuneLocale *));
+static _RuneLocale *_Read_RuneMagi __P((FILE *));
+
+static char *PathLocale = 0;
+
+int
+setrunelocale(encoding)
+ char *encoding;
+{
+ FILE *fp;
+ char name[PATH_MAX];
+ _RuneLocale *rl;
+
+ if (!encoding)
+ return(EFAULT);
+
+ /*
+ * The "C" and "POSIX" locale are always here.
+ */
+ if (!strcmp(encoding, "C") || !strcmp(encoding, "POSIX")) {
+ _CurrentRuneLocale = &_DefaultRuneLocale;
+ return(0);
+ }
+
+ if (!PathLocale && !(PathLocale = getenv("PATH_LOCALE")))
+ PathLocale = _PATH_LOCALE;
+
+ sprintf(name, "%s/%s/LC_CTYPE", PathLocale, encoding);
+
+ if ((fp = fopen(name, "r")) == NULL)
+ return(ENOENT);
+
+ if ((rl = _Read_RuneMagi(fp)) == 0) {
+ fclose(fp);
+ return(EFTYPE);
+ }
+
+ if (!rl->encoding[0] || !strcmp(rl->encoding, "UTF2")) {
+ return(_UTF2_init(rl));
+ } else if (!strcmp(rl->encoding, "NONE")) {
+ return(_none_init(rl));
+ } else if (!strcmp(rl->encoding, "EUC")) {
+ return(_EUC_init(rl));
+ } else
+ return(EINVAL);
+}
+
+void
+setinvalidrune(ir)
+ rune_t ir;
+{
+ _INVALID_RUNE = ir;
+}
+
+static _RuneLocale *
+_Read_RuneMagi(fp)
+ FILE *fp;
+{
+ char *data;
+ void *np;
+ void *lastp;
+ _RuneLocale *rl;
+ _RuneEntry *rr;
+ struct stat sb;
+ int x;
+
+ if (fstat(fileno(fp), &sb) < 0)
+ return(0);
+
+ if (sb.st_size < sizeof(_RuneLocale))
+ return(0);
+
+ if ((data = malloc(sb.st_size)) == NULL)
+ return(0);
+
+ rewind(fp); /* Someone might have read the magic number once already */
+
+ if (fread(data, sb.st_size, 1, fp) != 1) {
+ free(data);
+ return(0);
+ }
+
+ rl = (_RuneLocale *)data;
+ lastp = data + sb.st_size;
+
+ rl->variable = rl + 1;
+
+ if (memcmp(rl->magic, _RUNE_MAGIC_1, sizeof(rl->magic))) {
+ free(data);
+ return(0);
+ }
+
+ rl->invalid_rune = ntohl(rl->invalid_rune);
+ rl->variable_len = ntohl(rl->variable_len);
+ rl->runetype_ext.nranges = ntohl(rl->runetype_ext.nranges);
+ rl->maplower_ext.nranges = ntohl(rl->maplower_ext.nranges);
+ rl->mapupper_ext.nranges = ntohl(rl->mapupper_ext.nranges);
+
+ for (x = 0; x < _CACHED_RUNES; ++x) {
+ rl->runetype[x] = ntohl(rl->runetype[x]);
+ rl->maplower[x] = ntohl(rl->maplower[x]);
+ rl->mapupper[x] = ntohl(rl->mapupper[x]);
+ }
+
+ rl->runetype_ext.ranges = (_RuneEntry *)rl->variable;
+ rl->variable = rl->runetype_ext.ranges + rl->runetype_ext.nranges;
+ if (rl->variable > lastp) {
+ free(data);
+ return(0);
+ }
+
+ rl->maplower_ext.ranges = (_RuneEntry *)rl->variable;
+ rl->variable = rl->maplower_ext.ranges + rl->maplower_ext.nranges;
+ if (rl->variable > lastp) {
+ free(data);
+ return(0);
+ }
+
+ rl->mapupper_ext.ranges = (_RuneEntry *)rl->variable;
+ rl->variable = rl->mapupper_ext.ranges + rl->mapupper_ext.nranges;
+ if (rl->variable > lastp) {
+ free(data);
+ return(0);
+ }
+
+ for (x = 0; x < rl->runetype_ext.nranges; ++x) {
+ rr = rl->runetype_ext.ranges;
+
+ rr[x].min = ntohl(rr[x].min);
+ rr[x].max = ntohl(rr[x].max);
+ if ((rr[x].map = ntohl(rr[x].map)) == 0) {
+ int len = rr[x].max - rr[x].min + 1;
+ rr[x].types = rl->variable;
+ rl->variable = rr[x].types + len;
+ if (rl->variable > lastp) {
+ free(data);
+ return(0);
+ }
+ while (len-- > 0)
+ rr[x].types[len] = ntohl(rr[x].types[len]);
+ } else
+ rr[x].types = 0;
+ }
+
+ for (x = 0; x < rl->maplower_ext.nranges; ++x) {
+ rr = rl->maplower_ext.ranges;
+
+ rr[x].min = ntohl(rr[x].min);
+ rr[x].max = ntohl(rr[x].max);
+ rr[x].map = ntohl(rr[x].map);
+ }
+
+ for (x = 0; x < rl->mapupper_ext.nranges; ++x) {
+ rr = rl->mapupper_ext.ranges;
+
+ rr[x].min = ntohl(rr[x].min);
+ rr[x].max = ntohl(rr[x].max);
+ rr[x].map = ntohl(rr[x].map);
+ }
+ if (((char *)rl->variable) + rl->variable_len > (char *)lastp) {
+ free(data);
+ return(0);
+ }
+
+ /*
+ * Go out and zero pointers that should be zero.
+ */
+ if (!rl->variable_len)
+ rl->variable = 0;
+
+ if (!rl->runetype_ext.nranges)
+ rl->runetype_ext.ranges = 0;
+
+ if (!rl->maplower_ext.nranges)
+ rl->maplower_ext.ranges = 0;
+
+ if (!rl->mapupper_ext.nranges)
+ rl->mapupper_ext.ranges = 0;
+
+ return(rl);
+}
+
+unsigned long
+___runetype(c)
+ _BSD_RUNE_T_ c;
+{
+ int x;
+ _RuneRange *rr = &_CurrentRuneLocale->runetype_ext;
+ _RuneEntry *re = rr->ranges;
+
+ if (c == EOF)
+ return(0);
+ for (x = 0; x < rr->nranges; ++x, ++re) {
+ if (c < re->min)
+ return(0L);
+ if (c <= re->max) {
+ if (re->types)
+ return(re->types[c - re->min]);
+ else
+ return(re->map);
+ }
+ }
+ return(0L);
+}
+
+_BSD_RUNE_T_
+___toupper(c)
+ _BSD_RUNE_T_ c;
+{
+ int x;
+ _RuneRange *rr = &_CurrentRuneLocale->mapupper_ext;
+ _RuneEntry *re = rr->ranges;
+
+ if (c == EOF)
+ return(EOF);
+ for (x = 0; x < rr->nranges; ++x, ++re) {
+ if (c < re->min)
+ return(c);
+ if (c <= re->max)
+ return(re->map + c - re->min);
+ }
+ return(c);
+}
+
+_BSD_RUNE_T_
+___tolower(c)
+ _BSD_RUNE_T_ c;
+{
+ int x;
+ _RuneRange *rr = &_CurrentRuneLocale->maplower_ext;
+ _RuneEntry *re = rr->ranges;
+
+ if (c == EOF)
+ return(EOF);
+ for (x = 0; x < rr->nranges; ++x, ++re) {
+ if (c < re->min)
+ return(c);
+ if (c <= re->max)
+ return(re->map + c - re->min);
+ }
+ return(c);
+}
+
+
+#if !defined(_USE_CTYPE_INLINE_) && !defined(_USE_CTYPE_MACROS_)
+/*
+ * See comments in <machine/ansi.h>
+ */
+int
+__istype(c, f)
+ _BSD_RUNE_T_ c;
+ unsigned long f;
+{
+ return ((((c & _CRMASK) ? ___runetype(c)
+ : _CurrentRuneLocale->runetype[c]) & f) ? 1 : 0);
+}
+
+int
+__isctype(_BSD_RUNE_T_ c, unsigned long f)
+ _BSD_RUNE_T_ c;
+ unsigned long f;
+{
+ return ((((c & _CRMASK) ? 0
+ : _DefaultRuneLocale.runetype[c]) & f) ? 1 : 0);
+}
+
+_BSD_RUNE_T_
+toupper(c)
+ _BSD_RUNE_T_ c;
+{
+ return ((c & _CRMASK) ?
+ ___toupper(c) : _CurrentRuneLocale->mapupper[c]);
+}
+
+_BSD_RUNE_T_
+tolower(c)
+ _BSD_RUNE_T_ c;
+{
+ return ((c & _CRMASK) ?
+ ___tolower(c) : _CurrentRuneLocale->maplower[c]);
+}
+#endif
diff --git a/lib/libc/locale/setlocale.3 b/lib/libc/locale/setlocale.3
new file mode 100644
index 000000000000..74cce03bfc5e
--- /dev/null
+++ b/lib/libc/locale/setlocale.3
@@ -0,0 +1,321 @@
+.\" Copyright (c) 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" This code is derived from software contributed to Berkeley by
+.\" Donn Seeley at BSDI.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)setlocale.3 8.1 (Berkeley) 6/9/93
+.\"
+.Dd June 9, 1993
+.Dt SETLOCALE 3
+.Os
+.Sh NAME
+.Nm setlocale ,
+.Nm localeconv
+.Nd natural language formatting for C
+.Sh SYNOPSIS
+.Fd #include <locale.h>
+.Ft char *
+.Fn setlocale "int category" "const char *locale"
+.Ft struct lconv *
+.Fn localeconv "void"
+.Sh DESCRIPTION
+The
+.Fn setlocale
+function sets the C library's notion
+of natural language formatting style
+for particular sets of routines.
+Each such style is called a
+.Sq locale
+and is invoked using an appropriate name passed as a C string.
+The
+.Fn localeconv
+routine returns the current locale's parameters
+for formatting numbers.
+.Pp
+The
+.Fn setlocale
+function recognizes several categories of routines.
+These are the categories and the sets of routines they select:
+.Pp
+.Bl -tag -width LC_MONETARY
+.It Dv LC_ALL
+Set the entire locale generically.
+.It Dv LC_COLLATE
+Set a locale for string collation routines.
+This controls alphabetic ordering in
+.Fn strcoll
+and
+.Fn strxfrm .
+.It Dv LC_CTYPE
+Set a locale for the
+.Xr ctype 3 ,
+.Xr mbrune 3 ,
+.Xr multibyte 3
+and
+.Xr rune 3
+functions.
+This controls recognition of upper and lower case,
+alphabetic or non-alphabetic characters,
+and so on. The real work is done by the
+.Fn setrunelocale
+function.
+.It Dv LC_MONETARY
+Set a locale for formatting monetary values;
+this affects the
+.Fn localeconv
+function.
+.It Dv LC_NUMERIC
+Set a locale for formatting numbers.
+This controls the formatting of decimal points
+in input and output of floating point numbers
+in functions such as
+.Fn printf
+and
+.Fn scanf ,
+as well as values returned by
+.Fn localeconv .
+.It Dv LC_TIME
+Set a locale for formatting dates and times using the
+.Fn strftime
+function.
+.El
+.Pp
+Only three locales are defined by default,
+the empty string
+.Li "\&""\|""
+which denotes the native environment, and the
+.Li "\&""C""
+and
+.LI "\&""POSIX""
+locales, which denote the C language environment.
+A
+.Fa locale
+argument of
+.Dv NULL
+causes
+.Fn setlocale
+to return the current locale.
+By default, C programs start in the
+.Li "\&""C""
+locale.
+The only function in the library that sets the locale is
+.Fn setlocale ;
+the locale is never changed as a side effect of some other routine.
+.Pp
+The
+.Fn localeconv
+function returns a pointer to a structure
+which provides parameters for formatting numbers,
+especially currency values:
+.Bd -literal -offset indent
+struct lconv {
+ char *decimal_point;
+ char *thousands_sep;
+ char *grouping;
+ char *int_curr_symbol;
+ char *currency_symbol;
+ char *mon_decimal_point;
+ char *mon_thousands_sep;
+ char *mon_grouping;
+ char *positive_sign;
+ char *negative_sign;
+ char int_frac_digits;
+ char frac_digits;
+ char p_cs_precedes;
+ char p_sep_by_space;
+ char n_cs_precedes;
+ char n_sep_by_space;
+ char p_sign_posn;
+ char n_sign_posn;
+};
+.Ed
+.Pp
+The individual fields have the following meanings:
+.Pp
+.Bl -tag -width mon_decimal_point
+.It Fa decimal_point
+The decimal point character, except for currency values.
+.It Fa thousands_sep
+The separator between groups of digits
+before the decimal point, except for currency values.
+.It Fa grouping
+The sizes of the groups of digits, except for currency values.
+This is a pointer to a vector of integers, each of size
+.Va char ,
+representing group size from low order digit groups
+to high order (right to left).
+The list may be terminated with 0 or
+.Dv CHAR_MAX .
+If the list is terminated with 0,
+the last group size before the 0 is repeated to account for all the digits.
+If the list is terminated with
+.Dv CHAR_MAX ,
+no more grouping is performed.
+.It Fa int_curr_symbol
+The standardized international currency symbol.
+.It Fa currency_symbol
+The local currency symbol.
+.It Fa mon_decimal_point
+The decimal point character for currency values.
+.It Fa mon_thousands_sep
+The separator for digit groups in currency values.
+.It Fa mon_grouping
+Like
+.Fa grouping
+but for currency values.
+.It Fa positive_sign
+The character used to denote nonnegative currency values,
+usually the empty string.
+.It Fa negative_sign
+The character used to denote negative currency values,
+usually a minus sign.
+.It Fa int_frac_digits
+The number of digits after the decimal point
+in an international-style currency value.
+.It Fa frac_digits
+The number of digits after the decimal point
+in the local style for currency values.
+.It Fa p_cs_precedes
+1 if the currency symbol precedes the currency value
+for nonnegative values, 0 if it follows.
+.It Fa p_sep_by_space
+1 if a space is inserted between the currency symbol
+and the currency value for nonnegative values, 0 otherwise.
+.It Fa n_cs_precedes
+Like
+.Fa p_cs_precedes
+but for negative values.
+.It Fa n_sep_by_space
+Like
+.Fa p_sep_by_space
+but for negative values.
+.It Fa p_sign_posn
+The location of the
+.Fa positive_sign
+with respect to a nonnegative quantity and the
+.Fa currency_symbol ,
+coded as follows:
+.Bl -tag -width 3n -compact
+.It Li 0
+Parentheses around the entire string.
+.It Li 1
+Before the string.
+.It Li 2
+After the string.
+.It Li 3
+Just before
+.Fa currency_symbol .
+.It Li 4
+Just after
+.Fa currency_symbol .
+.El
+.It Fa n_sign_posn
+Like
+.Fa p_sign_posn
+but for negative currency values.
+.El
+.Pp
+Unless mentioned above,
+an empty string as a value for a field
+indicates a zero length result or
+a value that is not in the current locale.
+A
+.Dv CHAR_MAX
+result similarly denotes an unavailable value.
+.Sh "RETURN VALUES
+The
+.Fn setlocale
+function returns
+.Dv NULL
+and fails to change the locale
+if the given combination of
+.Fa category
+and
+.Fa locale
+makes no sense.
+The
+.Fn localeconv
+function returns a pointer to a static object
+which may be altered by later calls to
+.Fn setlocale
+or
+.Fn localeconv .
+.Sh FILES
+.Bl -tag -width /usr/share/locale/locale/category -compact
+.It Pa $PATH_LOCALE/\fIlocale\fP/\fIcategory\fP
+.It Pa /usr/share/locale/\fIlocale\fP/\fIcategory\fP
+locale file for the locale \fIlocale\fP
+and the category \fIcategory\fP.
+.El
+.Sh "SEE ALSO
+.Xr euc 4 ,
+.Xr mbrune 3 ,
+.Xr multibyte 3 ,
+.Xr rune 3 ,
+.Xr strcoll 3 ,
+.Xr strxfrm 3 ,
+.Xr utf2 4
+.Sh STANDARDS
+The
+.Fn setlocale
+and
+.Fn localeconv
+functions conform to
+.St -ansiC .
+.Sh HISTORY
+The
+.Fn setlocale
+and
+.Fn localeconv
+functions first appeared in 4.4BSD.
+.Sh BUGS
+The current implementation supports only the
+.Li "\&""C""
+and
+.Li "\&""POSIX""
+locales for all but the LC_CTYPE locale.
+.Pp
+In spite of the gnarly currency support in
+.Fn localeconv ,
+the standards don't include any functions
+for generalized currency formatting.
+.Pp
+.Dv LC_COLLATE
+does not make sense for many languages.
+Use of
+.Dv LC_MONETARY
+could lead to misleading results until we have a real time currency
+conversion function.
+.Dv LC_NUMERIC
+and
+.Dv LC_TIME
+are personal choices and should not be wrapped up with the other categories.
diff --git a/lib/libc/locale/setlocale.c b/lib/libc/locale/setlocale.c
index 247a2782b0c9..414f58ef381a 100644
--- a/lib/libc/locale/setlocale.c
+++ b/lib/libc/locale/setlocale.c
@@ -1,6 +1,9 @@
/*
- * Copyright (c) 1991 The Regents of the University of California.
- * All rights reserved.
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Paul Borman at Krystal Technologies.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -32,27 +35,200 @@
*/
#if defined(LIBC_SCCS) && !defined(lint)
-static char sccsid[] = "@(#)setlocale.c 5.2 (Berkeley) 2/24/91";
+static char sccsid[] = "@(#)setlocale.c 8.1 (Berkeley) 7/4/93";
#endif /* LIBC_SCCS and not lint */
+#include <limits.h>
#include <locale.h>
+#include <rune.h>
+#include <stdlib.h>
#include <string.h>
-static char C[] = "C";
+/*
+ * Category names for getenv()
+ */
+static char *categories[_LC_LAST] = {
+ "LC_ALL",
+ "LC_COLLATE",
+ "LC_CTYPE",
+ "LC_MONETARY",
+ "LC_NUMERIC",
+ "LC_TIME",
+};
/*
- * The setlocale function.
- *
- * Sorry, for now we only accept the C locale.
+ * Current locales for each category
*/
+static char current_categories[_LC_LAST][32] = {
+ "C",
+ "C",
+ "C",
+ "C",
+ "C",
+ "C",
+};
+
+/*
+ * The locales we are going to try and load
+ */
+static char new_categories[_LC_LAST][32];
+
+static char current_locale_string[_LC_LAST * 33];
+static char *PathLocale;
+
+static char *currentlocale __P((void));
+static char *loadlocale __P((int));
+
char *
setlocale(category, locale)
int category;
const char *locale;
{
- if ((unsigned int)category >= _LC_LAST)
+ int found, i, len;
+ char *env, *r;
+
+ if (!PathLocale && !(PathLocale = getenv("PATH_LOCALE")))
+ PathLocale = _PATH_LOCALE;
+
+ if (category < 0 || category >= _LC_LAST)
return (NULL);
- if (locale == NULL)
- return (C);
- return(strcmp(locale, C) ? NULL : C);
+
+ if (!locale)
+ return (category ?
+ current_categories[category] : currentlocale());
+
+ /*
+ * Default to the current locale for everything.
+ */
+ for (i = 1; i < _LC_LAST; ++i)
+ (void)strcpy(new_categories[i], current_categories[i]);
+
+ /*
+ * Now go fill up new_categories from the locale argument
+ */
+ if (!*locale) {
+ env = getenv(categories[category]);
+
+ if (!env)
+ env = getenv(categories[0]);
+
+ if (!env)
+ env = getenv("LANG");
+
+ if (!env)
+ env = "C";
+
+ (void) strncpy(new_categories[category], env, 31);
+ new_categories[category][31] = 0;
+ if (!category) {
+ for (i = 1; i < _LC_LAST; ++i) {
+ if (!(env = getenv(categories[i])))
+ env = new_categories[0];
+ (void)strncpy(new_categories[i], env, 31);
+ new_categories[i][31] = 0;
+ }
+ }
+ } else if (category) {
+ (void)strncpy(new_categories[category], locale, 31);
+ new_categories[category][31] = 0;
+ } else {
+ if ((r = strchr(locale, '/')) == 0) {
+ for (i = 1; i < _LC_LAST; ++i) {
+ (void)strncpy(new_categories[i], locale, 31);
+ new_categories[i][31] = 0;
+ }
+ } else {
+ for (i = 1; r[1] == '/'; ++r);
+ if (!r[1])
+ return (NULL); /* Hmm, just slashes... */
+ do {
+ len = r - locale > 31 ? 31 : r - locale;
+ (void)strncpy(new_categories[i++], locale, len);
+ new_categories[i++][len] = 0;
+ locale = r;
+ while (*locale == '/')
+ ++locale;
+ while (*++r && *r != '/');
+ } while (*locale);
+ while (i < _LC_LAST)
+ (void)strcpy(new_categories[i],
+ new_categories[i-1]);
+ }
+ }
+
+ if (category)
+ return (loadlocale(category));
+
+ found = 0;
+ for (i = 1; i < _LC_LAST; ++i)
+ if (loadlocale(i) != NULL)
+ found = 1;
+ if (found)
+ return (currentlocale());
+ return (NULL);
+}
+
+static char *
+currentlocale()
+{
+ int i;
+
+ (void)strcpy(current_locale_string, current_categories[1]);
+
+ for (i = 2; i < _LC_LAST; ++i)
+ if (strcmp(current_categories[1], current_categories[i])) {
+ (void)snprintf(current_locale_string,
+ sizeof(current_locale_string), "%s/%s/%s/%s/%s",
+ current_categories[1], current_categories[2],
+ current_categories[3], current_categories[4],
+ current_categories[5]);
+ break;
+ }
+ return (current_locale_string);
+}
+
+static char *
+loadlocale(category)
+ int category;
+{
+ char name[PATH_MAX];
+
+ if (strcmp(new_categories[category],
+ current_categories[category]) == 0)
+ return (current_categories[category]);
+
+ if (category == LC_CTYPE) {
+ if (setrunelocale(new_categories[LC_CTYPE]))
+ return (NULL);
+ (void)strcpy(current_categories[LC_CTYPE],
+ new_categories[LC_CTYPE]);
+ return (current_categories[LC_CTYPE]);
+ }
+
+ if (!strcmp(new_categories[category], "C") ||
+ !strcmp(new_categories[category], "POSIX")) {
+
+ /*
+ * Some day this will need to reset the locale to the default
+ * C locale. Since we have no way to change them as of yet,
+ * there is no need to reset them.
+ */
+ (void)strcpy(current_categories[category],
+ new_categories[category]);
+ return (current_categories[category]);
+ }
+
+ /*
+ * Some day we will actually look at this file.
+ */
+ (void)snprintf(name, sizeof(name), "%s/%s/%s",
+ PathLocale, new_categories[category], categories[category]);
+
+ switch (category) {
+ case LC_COLLATE:
+ case LC_MONETARY:
+ case LC_NUMERIC:
+ case LC_TIME:
+ return (NULL);
+ }
}
diff --git a/lib/libc/locale/table.c b/lib/libc/locale/table.c
new file mode 100644
index 000000000000..fb7344eafad5
--- /dev/null
+++ b/lib/libc/locale/table.c
@@ -0,0 +1,160 @@
+/*-
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Paul Borman at Krystal Technologies.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)table.c 8.1 (Berkeley) 6/27/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/types.h>
+
+#include <ctype.h>
+#include <rune.h>
+
+extern rune_t _none_sgetrune __P((const char *, size_t, char const **));
+extern int _none_sputrune __P((rune_t, char *, size_t, char **));
+extern int _none_init __P((char *, char **));
+
+_RuneLocale _DefaultRuneLocale = {
+ _RUNE_MAGIC_1,
+ "none",
+ _none_sgetrune,
+ _none_sputrune,
+ 0xFFFD,
+
+ { /*00*/ _C, _C, _C, _C,
+ _C, _C, _C, _C,
+ /*08*/ _C, _C|_S|_B, _C|_S, _C|_S,
+ _C|_S, _C|_S, _C, _C,
+ /*10*/ _C, _C, _C, _C,
+ _C, _C, _C, _C,
+ /*18*/ _C, _C, _C, _C,
+ _C, _C, _C, _C,
+ /*20*/ _S|_B|_R, _P|_R|_G, _P|_R|_G, _P|_R|_G,
+ _P|_R|_G, _P|_R|_G, _P|_R|_G, _P|_R|_G,
+ /*28*/ _P|_R|_G, _P|_R|_G, _P|_R|_G, _P|_R|_G,
+ _P|_R|_G, _P|_R|_G, _P|_R|_G, _P|_R|_G,
+ /*30*/ _D|_R|_G|_X|0, _D|_R|_G|_X|1, _D|_R|_G|_X|2, _D|_R|_G|_X|3,
+ _D|_R|_G|_X|4, _D|_R|_G|_X|5, _D|_R|_G|_X|6, _D|_R|_G|_X|7,
+ /*38*/ _D|_R|_G|_X|8, _D|_R|_G|_X|9, _P|_R|_G, _P|_R|_G,
+ _P|_R|_G, _P|_R|_G, _P|_R|_G, _P|_R|_G,
+ /*40*/ _P|_R|_G, _U|_X|_R|_G|_A|10, _U|_X|_R|_G|_A|11, _U|_X|_R|_G|_A|12,
+ _U|_X|_R|_G|_A|13, _U|_X|_R|_G|_A|14, _U|_X|_R|_G|_A|15, _U|_R|_G|_A,
+ /*48*/ _U|_R|_G|_A, _U|_R|_G|_A, _U|_R|_G|_A, _U|_R|_G|_A,
+ _U|_R|_G|_A, _U|_R|_G|_A, _U|_R|_G|_A, _U|_R|_G|_A,
+ /*50*/ _U|_R|_G|_A, _U|_R|_G|_A, _U|_R|_G|_A, _U|_R|_G|_A,
+ _U|_R|_G|_A, _U|_R|_G|_A, _U|_R|_G|_A, _U|_R|_G|_A,
+ /*58*/ _U|_R|_G|_A, _U|_R|_G|_A, _U|_R|_G|_A, _P|_R|_G,
+ _P|_R|_G, _P|_R|_G, _P|_R|_G, _P|_R|_G,
+ /*60*/ _P|_R|_G, _L|_X|_R|_G|_A|10, _L|_X|_R|_G|_A|11, _L|_X|_R|_G|_A|12,
+ _L|_X|_R|_G|_A|13, _L|_X|_R|_G|_A|14, _L|_X|_R|_G|_A|15, _L|_R|_G|_A,
+ /*68*/ _L|_R|_G|_A, _L|_R|_G|_A, _L|_R|_G|_A, _L|_R|_G|_A,
+ _L|_R|_G|_A, _L|_R|_G|_A, _L|_R|_G|_A, _L|_R|_G|_A,
+ /*70*/ _L|_R|_G|_A, _L|_R|_G|_A, _L|_R|_G|_A, _L|_R|_G|_A,
+ _L|_R|_G|_A, _L|_R|_G|_A, _L|_R|_G|_A, _L|_R|_G|_A,
+ /*78*/ _L|_R|_G|_A, _L|_R|_G|_A, _L|_R|_G|_A, _P|_R|_G,
+ _P|_R|_G, _P|_R|_G, _P|_R|_G, _C,
+ },
+ { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
+ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
+ 0x40, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
+ 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
+ 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
+ 'x', 'y', 'z', 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
+ 0x60, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
+ 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
+ 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
+ 'x', 'y', 'z', 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
+ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
+ 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
+ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
+ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
+ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
+ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
+ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
+ 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
+ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
+ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
+ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
+ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
+ },
+ { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
+ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
+ 0x40, 'A', 'B', 'C', 'D', 'E', 'F', 'G',
+ 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
+ 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
+ 'X', 'Y', 'Z', 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
+ 0x60, 'A', 'B', 'C', 'D', 'E', 'F', 'G',
+ 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
+ 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
+ 'X', 'Y', 'Z', 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
+ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
+ 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
+ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
+ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
+ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
+ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
+ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
+ 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
+ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
+ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
+ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
+ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
+ },
+};
+
+_RuneLocale *_CurrentRuneLocale = &_DefaultRuneLocale;
+
+int __mb_cur_max = 1;
diff --git a/lib/libc/locale/utf2.4 b/lib/libc/locale/utf2.4
new file mode 100644
index 000000000000..c46f4f986c41
--- /dev/null
+++ b/lib/libc/locale/utf2.4
@@ -0,0 +1,87 @@
+.\" Copyright (c) 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" This code is derived from software contributed to Berkeley by
+.\" Paul Borman at Krystal Technologies.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)utf2.4 8.1 (Berkeley) 6/4/93
+.\"
+.Dd "June 4, 1993"
+.Dt UTF2 4
+.Os
+.Sh NAME
+.Nm UTF2
+.Nd "Universal character set Transformation Format encoding of runes
+.Sh SYNOPSIS
+\fBENCODING "UTF2"\fP
+.Sh DESCRIPTION
+The
+.Nm UTF2
+encoding is based on a proposed X-Open multibyte
+\s-1FSS-UCS-TF\s+1 (File System Safe Universal Character Set Transformation Format) encoding as used in
+.Nm Plan 9 from Bell Labs.
+Although it is capable of representing more than 16 bits,
+the current implementation is limited to 16 bits as defined by the
+Unicode Standard.
+UTF2 is also called UTF8 in some circles.
+.Pp
+.Nm UTF2
+representation is backwards compatible with ASCII, so 0x00-0x7f refer to the
+ASCII character set. The multibyte encoding of runes between 0x0080 and 0xffff
+consist entirely of bytes whose high order bit is set. The actual
+encoding is represented by the following table:
+.Bd -literal
+[0x0000 - 0x007f] [00000000.0bbbbbbb] -> 0bbbbbbb
+[0x0080 - 0x03ff] [00000bbb.bbbbbbbb] -> 110bbbbb, 10bbbbbb
+[0x0400 - 0xffff] [bbbbbbbb.bbbbbbbb] -> 1110bbbb, 10bbbbbb, 10bbbbbb
+.Ed
+.sp
+If more than a single representation of a value exists (for example,
+0x00; 0xC0 0x80; 0xE0 0x80 0x80) the shortest representation is always
+used (but the longer ones will be correctly decoded).
+.Pp
+The final three encodings provided by X-Open:
+.Bd -literal
+[00000000.000bbbbb.bbbbbbbb.bbbbbbbb] ->
+ 11110bbb, 10bbbbbb, 10bbbbbb, 10bbbbbb
+
+[000000bb.bbbbbbbb.bbbbbbbb.bbbbbbbb] ->
+ 111110bb, 10bbbbbb, 10bbbbbb, 10bbbbbb, 10bbbbbb
+
+[0bbbbbbb.bbbbbbbb.bbbbbbbb.bbbbbbbb] ->
+ 1111110b, 10bbbbbb, 10bbbbbb, 10bbbbbb, 10bbbbbb, 10bbbbbb
+.Ed
+.sp
+which provides for the entire proposed ISO-10646 31 bit standard are currently
+not implemented.
+.Sh "SEE ALSO"
+.Xr mklocale 1 ,
+.Xr setlocale 3
diff --git a/lib/libc/locale/utf2.c b/lib/libc/locale/utf2.c
new file mode 100644
index 000000000000..846fad90ed6f
--- /dev/null
+++ b/lib/libc/locale/utf2.c
@@ -0,0 +1,148 @@
+/*-
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Paul Borman at Krystal Technologies.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)utf2.c 8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include <errno.h>
+#include <rune.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+rune_t _UTF2_sgetrune __P((const char *, size_t, char const **));
+int _UTF2_sputrune __P((rune_t, char *, size_t, char **));
+
+static _utf_count[16] = {
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0, 0, 0, 2, 2, 3, 0,
+};
+
+int
+_UTF2_init(rl)
+ _RuneLocale *rl;
+{
+ rl->sgetrune = _UTF2_sgetrune;
+ rl->sputrune = _UTF2_sputrune;
+ _CurrentRuneLocale = rl;
+ __mb_cur_max = 3;
+ return (0);
+}
+
+rune_t
+_UTF2_sgetrune(string, n, result)
+ const char *string;
+ size_t n;
+ char const **result;
+{
+ int c;
+
+ if (n < 1 || (c = _utf_count[(*string >> 4) & 0xf]) > n) {
+ if (result)
+ *result = string;
+ return (_INVALID_RUNE);
+ }
+ switch (c) {
+ case 1:
+ if (result)
+ *result = string + 1;
+ return (*string & 0xff);
+ case 2:
+ if ((string[1] & 0xC0) != 0x80)
+ goto encoding_error;
+ if (result)
+ *result = string + 2;
+ return (((string[0] & 0x1F) << 6) | (string[1] & 0x3F));
+ case 3:
+ if ((string[1] & 0xC0) != 0x80 || (string[2] & 0xC0) != 0x80)
+ goto encoding_error;
+ if (result)
+ *result = string + 3;
+ return (((string[0] & 0x1F) << 12) | ((string[1] & 0x3F) << 6)
+ | (string[2] & 0x3F));
+ default:
+encoding_error: if (result)
+ *result = string + 1;
+ return (_INVALID_RUNE);
+ }
+}
+
+int
+_UTF2_sputrune(c, string, n, result)
+ rune_t c;
+ char *string, **result;
+ size_t n;
+{
+ if (c & 0xF800) {
+ if (n >= 3) {
+ if (string) {
+ string[0] = 0xE0 | ((c >> 12) & 0x0F);
+ string[1] = 0x80 | ((c >> 6) & 0x3F);
+ string[2] = 0x80 | ((c) & 0x3F);
+ }
+ if (result)
+ *result = string + 3;
+ } else
+ if (result)
+ *result = NULL;
+
+ return (3);
+ } else
+ if (c & 0x0780) {
+ if (n >= 2) {
+ if (string) {
+ string[0] = 0xC0 | ((c >> 6) & 0x1F);
+ string[1] = 0x80 | ((c) & 0x3F);
+ }
+ if (result)
+ *result = string + 2;
+ } else
+ if (result)
+ *result = NULL;
+ return (2);
+ } else {
+ if (n >= 1) {
+ if (string)
+ string[0] = c;
+ if (result)
+ *result = string + 1;
+ } else
+ if (result)
+ *result = NULL;
+ return (1);
+ }
+}