aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDag-Erling Smørgrav <des@FreeBSD.org>2002-04-06 13:49:43 +0000
committerDag-Erling Smørgrav <des@FreeBSD.org>2002-04-06 13:49:43 +0000
commit1c015e5c73d127df0cffd3197a7580ce09bc6c87 (patch)
treeda6b71d0037495c5d3556bc9d98d44b53b789f68
downloadsrc-1c015e5c73d127df0cffd3197a7580ce09bc6c87.tar.gz
src-1c015e5c73d127df0cffd3197a7580ce09bc6c87.zip
Virgin import of NetBSD's sort(1) where it belongs.
Notes
Notes: svn path=/vendor/sort/dist/; revision=93966
-rw-r--r--contrib/sort/Makefile7
-rw-r--r--contrib/sort/append.c205
-rw-r--r--contrib/sort/extern.h69
-rw-r--r--contrib/sort/fields.c339
-rw-r--r--contrib/sort/files.c372
-rw-r--r--contrib/sort/fsort.c358
-rw-r--r--contrib/sort/fsort.h77
-rw-r--r--contrib/sort/init.c355
-rw-r--r--contrib/sort/msort.c390
-rw-r--r--contrib/sort/pathnames.h41
-rw-r--r--contrib/sort/sort.1441
-rw-r--r--contrib/sort/sort.c332
-rw-r--r--contrib/sort/sort.h149
-rw-r--r--contrib/sort/tmp.c84
14 files changed, 3219 insertions, 0 deletions
diff --git a/contrib/sort/Makefile b/contrib/sort/Makefile
new file mode 100644
index 000000000000..7c12a50ec517
--- /dev/null
+++ b/contrib/sort/Makefile
@@ -0,0 +1,7 @@
+# $NetBSD: Makefile,v 1.4 2001/02/19 17:45:24 jdolecek Exp $
+# from: @(#)Makefile 8.1 (Berkeley) 6/6/93
+
+PROG= sort
+SRCS= append.c fields.c files.c fsort.c init.c msort.c sort.c tmp.c
+
+.include <bsd.prog.mk>
diff --git a/contrib/sort/append.c b/contrib/sort/append.c
new file mode 100644
index 000000000000..d02a96fcc491
--- /dev/null
+++ b/contrib/sort/append.c
@@ -0,0 +1,205 @@
+/* $NetBSD: append.c,v 1.10 2001/02/19 20:50:17 jdolecek Exp $ */
+
+/*-
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Peter McIlroy.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "sort.h"
+
+#ifndef lint
+__RCSID("$NetBSD: append.c,v 1.10 2001/02/19 20:50:17 jdolecek Exp $");
+__SCCSID("@(#)append.c 8.1 (Berkeley) 6/6/93");
+#endif /* not lint */
+
+#include <stdlib.h>
+#include <string.h>
+
+#define OUTPUT { \
+ if ((n = cpos - ppos) > 1) { \
+ for (; ppos < cpos; ++ppos) \
+ *ppos -= odepth; \
+ ppos -= n; \
+ if (stable_sort) \
+ sradixsort(ppos, n, wts1, REC_D); \
+ else \
+ radixsort(ppos, n, wts1, REC_D); \
+ for (; ppos < cpos; ppos++) { \
+ prec = (const RECHEADER *) (*ppos - sizeof(TRECHEADER));\
+ put(prec, fp); \
+ } \
+ } else put(prec, fp); \
+}
+
+/*
+ * copy sorted lines to output; check for uniqueness
+ */
+void
+append(keylist, nelem, depth, fp, put, ftbl)
+ const u_char **keylist;
+ int nelem;
+ int depth;
+ FILE *fp;
+ put_func_t put;
+ struct field *ftbl;
+{
+ u_char *wts, *wts1;
+ int n, odepth;
+ const u_char **cpos, **ppos, **lastkey;
+ const u_char *cend, *pend, *start;
+ const struct recheader *crec, *prec;
+
+ if (*keylist == '\0' && UNIQUE)
+ return;
+ wts1 = wts = ftbl[0].weights;
+ if ((!UNIQUE) && SINGL_FLD) {
+ if ((ftbl[0].flags & F) && (ftbl[0].flags & R))
+ wts1 = Rascii;
+ else if (ftbl[0].flags & F)
+ wts1 = ascii;
+ odepth = depth;
+ }
+ lastkey = keylist + nelem;
+ depth += sizeof(TRECHEADER);
+ if (SINGL_FLD && (UNIQUE || wts1 != wts)) {
+ ppos = keylist;
+ prec = (const RECHEADER *) (*ppos - depth);
+ if (UNIQUE)
+ put(prec, fp);
+ for (cpos = &keylist[1]; cpos < lastkey; cpos++) {
+ crec = (const RECHEADER *) (*cpos - depth);
+ if (crec->length == prec->length) {
+ /*
+ * Set pend and cend so that trailing NUL and
+ * record separator is ignored.
+ */
+ pend = (const u_char *) &prec->data + prec->length - 2;
+ cend = (const u_char *) &crec->data + crec->length - 2;
+ for (start = *cpos; cend >= start; cend--) {
+ if (wts[*cend] != wts[*pend])
+ break;
+ pend--;
+ }
+ if (pend + 1 != *ppos) {
+ if (!UNIQUE) {
+ OUTPUT;
+ } else
+ put(crec, fp);
+ ppos = cpos;
+ prec = crec;
+ }
+ } else {
+ if (!UNIQUE) {
+ OUTPUT;
+ } else
+ put(crec, fp);
+ ppos = cpos;
+ prec = crec;
+ }
+ }
+ if (!UNIQUE) { OUTPUT; }
+ } else if (UNIQUE) {
+ ppos = keylist;
+ prec = (const RECHEADER *) (*ppos - depth);
+ put(prec, fp);
+ for (cpos = &keylist[1]; cpos < lastkey; cpos++) {
+ crec = (const RECHEADER *) (*cpos - depth);
+ if (crec->offset == prec->offset) {
+ /*
+ * Set pend and cend so that trailing NUL and
+ * record separator is ignored.
+ */
+ pend = (const u_char *) &prec->data + prec->offset - 2;
+ cend = (const u_char *) &crec->data + crec->offset - 2;
+ for (start = *cpos; cend >= start; cend--) {
+ if (wts[*cend] != wts[*pend])
+ break;
+ pend--;
+ }
+ if (pend + 1 != *ppos) {
+ ppos = cpos;
+ prec = crec;
+ put(prec, fp);
+ }
+ } else {
+ ppos = cpos;
+ prec = crec;
+ put(prec, fp);
+ }
+ }
+ } else for (cpos = keylist; cpos < lastkey; cpos++) {
+ crec = (const RECHEADER *) (*cpos - depth);
+ put(crec, fp);
+ }
+}
+
+/*
+ * output the already sorted eol bin.
+ */
+void
+rd_append(binno, infl0, nfiles, outfp, buffer, bufend)
+ u_char *buffer;
+ int infl0;
+ int binno, nfiles;
+ FILE *outfp;
+ u_char *bufend;
+{
+ RECHEADER *rec;
+
+ rec = (RECHEADER *) buffer;
+ if (!getnext(binno, infl0, NULL, nfiles,
+ (RECHEADER *) buffer, bufend, 0)) {
+ putline(rec, outfp);
+ while (getnext(binno, infl0, NULL, nfiles, (RECHEADER *) buffer,
+ bufend, 0) == 0) {
+ if (!UNIQUE)
+ putline(rec, outfp);
+ }
+ }
+}
+
+/*
+ * append plain text--used after sorting the biggest bin.
+ */
+void
+concat(a, b)
+ FILE *a, *b;
+{
+ int nread;
+ char buffer[4096];
+
+ rewind(b);
+ while ((nread = fread(buffer, 1, 4096, b)) > 0)
+ EWRITE(buffer, 1, nread, a);
+}
diff --git a/contrib/sort/extern.h b/contrib/sort/extern.h
new file mode 100644
index 000000000000..56fdb5873d52
--- /dev/null
+++ b/contrib/sort/extern.h
@@ -0,0 +1,69 @@
+/* $NetBSD: extern.h,v 1.6 2001/02/19 20:50:17 jdolecek Exp $ */
+
+/*-
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Peter McIlroy.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)extern.h 8.1 (Berkeley) 6/6/93
+ */
+
+void append __P((const u_char **, int, int, FILE *,
+ void (*)(const RECHEADER *, FILE *), struct field *));
+void concat __P((FILE *, FILE *));
+length_t enterkey __P((RECHEADER *, DBT *, int, struct field *));
+void fixit __P((int *, char **));
+void fldreset __P((struct field *));
+FILE *ftmp __P((void));
+void fmerge __P((int, int, struct filelist *, int,
+ get_func_t, FILE *, put_func_t, struct field *));
+void fsort __P((int, int, int, struct filelist *, int, FILE *,
+ struct field *));
+int geteasy __P((int, int, struct filelist *,
+ int, RECHEADER *, u_char *, struct field *));
+int getnext __P((int, int, struct filelist *,
+ int, RECHEADER *, u_char *, struct field *));
+int makekey __P((int, int, struct filelist *,
+ int, RECHEADER *, u_char *, struct field *));
+int makeline __P((int, int, struct filelist *,
+ int, RECHEADER *, u_char *, struct field *));
+void merge __P((int, int, get_func_t, FILE *, put_func_t, struct field *));
+void num_init __P((void));
+void onepass __P((const u_char **, int, long, long *, u_char *, FILE *));
+int optval __P((int, int));
+void order __P((struct filelist *, get_func_t, struct field *));
+void putline __P((const RECHEADER *, FILE *));
+void putrec __P((const RECHEADER *, FILE *));
+void rd_append __P((int, int, int, FILE *, u_char *, u_char *));
+int setfield __P((const char *, struct field *, int));
+void settables __P((int));
diff --git a/contrib/sort/fields.c b/contrib/sort/fields.c
new file mode 100644
index 000000000000..12242f661758
--- /dev/null
+++ b/contrib/sort/fields.c
@@ -0,0 +1,339 @@
+/* $NetBSD: fields.c,v 1.10 2001/02/19 20:50:17 jdolecek Exp $ */
+
+/*-
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Peter McIlroy.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* Subroutines to generate sort keys. */
+
+#include "sort.h"
+
+#ifndef lint
+__RCSID("$NetBSD: fields.c,v 1.10 2001/02/19 20:50:17 jdolecek Exp $");
+__SCCSID("@(#)fields.c 8.1 (Berkeley) 6/6/93");
+#endif /* not lint */
+
+#define blancmange(ptr) { \
+ if (BLANK & d_mask[*(ptr)]) \
+ while (BLANK & d_mask[*(++(ptr))]); \
+}
+
+#define NEXTCOL(pos) { \
+ if (!SEP_FLAG) \
+ while (BLANK & l_d_mask[*(++pos)]); \
+ while (!((FLD_D | REC_D_F) & l_d_mask[*++pos])); \
+}
+
+static u_char *enterfield __P((u_char *, u_char *, struct field *, int));
+static u_char *number __P((u_char *, u_char *, u_char *, u_char *, int));
+
+extern struct coldesc clist[(ND+1)*2];
+extern int ncols;
+
+#define DECIMAL '.'
+#define OFFSET 128
+
+u_char TENS[10]; /* TENS[0] = REC_D <= 128 ? 130 - '0' : 2 -'0'... */
+u_char NEGTENS[10]; /* NEGTENS[0] = REC_D <= 128 ? 126 + '0' : 252 +'0' */
+u_char *OFF_TENS, *OFF_NTENS; /* TENS - '0', NEGTENS - '0' */
+u_char fnum[NBINS], rnum[NBINS];
+
+/*
+ * constructs sort key with leading recheader, followed by the key,
+ * followed by the original line.
+ */
+length_t
+enterkey(keybuf, line, size, fieldtable)
+ RECHEADER *keybuf; /* pointer to start of key */
+ DBT *line;
+ int size;
+ struct field fieldtable[];
+{
+ int i;
+ u_char *l_d_mask;
+ u_char *lineend, *pos;
+ u_char *endkey, *keypos;
+ struct coldesc *clpos;
+ int col = 1;
+ struct field *ftpos;
+ l_d_mask = d_mask;
+ pos = (u_char *) line->data - 1;
+ lineend = (u_char *) line->data + line->size-1;
+ /* don't include rec_delimiter */
+
+ for (i = 0; i < ncols; i++) {
+ clpos = clist + i;
+ for (; (col < clpos->num) && (pos < lineend); col++) {
+ NEXTCOL(pos);
+ }
+ if (pos >= lineend)
+ break;
+ clpos->start = SEP_FLAG ? pos + 1 : pos;
+ NEXTCOL(pos);
+ clpos->end = pos;
+ col++;
+ if (pos >= lineend) {
+ clpos->end = lineend;
+ i++;
+ break;
+ }
+ }
+ for (; i <= ncols; i++)
+ clist[i].start = clist[i].end = lineend;
+ if (clist[0].start < (u_char *) line->data)
+ clist[0].start++;
+
+ keypos = keybuf->data;
+ endkey = (u_char *) keybuf + size - line->size;
+ for (ftpos = fieldtable + 1; ftpos->icol.num; ftpos++)
+ if ((keypos = enterfield(keypos, endkey, ftpos,
+ fieldtable->flags)) == NULL)
+ return (1);
+
+ keybuf->offset = keypos - keybuf->data;
+ keybuf->length = keybuf->offset + line->size;
+ if (keybuf->length + sizeof(TRECHEADER) > size) {
+ /* line too long for buffer */
+ return (1);
+ }
+
+ /*
+ * Make [s]radixsort() only sort by relevant part of key if:
+ * 1. we want to choose unique items by relevant field[s]
+ * 2. we want stable sort and so the items should be sorted only by
+ * the relevant field[s]
+ */
+ if (UNIQUE || (stable_sort && keybuf->offset < line->size))
+ keypos[-1] = REC_D;
+
+ memcpy(keybuf->data + keybuf->offset, line->data, line->size);
+ return (0);
+}
+
+/*
+ * constructs a field (as defined by -k) within a key
+ */
+static u_char *
+enterfield(tablepos, endkey, cur_fld, gflags)
+ struct field *cur_fld;
+ u_char *tablepos, *endkey;
+ int gflags;
+{
+ u_char *start, *end, *lineend, *mask, *lweight;
+ struct column icol, tcol;
+ u_int flags;
+ u_int Rflag;
+
+ icol = cur_fld->icol;
+ tcol = cur_fld->tcol;
+ flags = cur_fld->flags;
+ start = icol.p->start;
+ lineend = clist[ncols].end;
+ if (flags & BI)
+ blancmange(start);
+ start += icol.indent;
+ start = min(start, lineend);
+
+ if (!tcol.num)
+ end = lineend;
+ else {
+ if (tcol.indent) {
+ end = tcol.p->start;
+ if (flags & BT)
+ blancmange(end);
+ end += tcol.indent;
+ end = min(end, lineend);
+ } else
+ end = tcol.p->end;
+ }
+
+ if (flags & N) {
+ Rflag = (gflags & R ) ^ (flags & R) ? 1 : 0;
+ return number(tablepos, endkey, start, end, Rflag);
+ }
+
+ mask = cur_fld->mask;
+ lweight = cur_fld->weights;
+ for (; start < end; start++)
+ if (mask[*start]) {
+ if (*start <= 1) {
+ if (tablepos+2 >= endkey)
+ return (NULL);
+ *tablepos++ = lweight[1];
+ *tablepos++ = lweight[*start ? 2 : 1];
+ } else {
+ if (tablepos+1 >= endkey)
+ return (NULL);
+ *tablepos++ = lweight[*start];
+ }
+ }
+ *tablepos++ = lweight[0];
+ return (tablepos == endkey ? NULL : tablepos);
+}
+
+/* Uses the first bin to assign sign, expsign, 0, and the first
+ * 61 out of the exponent ( (254 - 3 origins - 4 over/underflows)/4 = 61 ).
+ * When sorting in forward order:
+ * use (0-99) -> (130->240) for sorting the mantissa if REC_D <=128;
+ * else use (0-99)->(2-102).
+ * If the exponent is >=61, use another byte for each additional 253
+ * in the exponent. Cutoff is at 567.
+ * To avoid confusing the exponent and the mantissa, use a field delimiter
+ * if the exponent is exactly 61, 61+252, etc--this is ok, since it's the
+ * only time a field delimiter can come in that position.
+ * Reverse order is done analagously.
+ */
+
+static u_char *
+number(pos, bufend, line, lineend, Rflag)
+ u_char *line, *pos, *bufend, *lineend;
+ int Rflag;
+{
+ int or_sign, parity = 0;
+ int expincr = 1, exponent = -1;
+ int bite, expsign = 1, sign = 1;
+ u_char lastvalue, *nonzero, *tline, *C_TENS;
+ u_char *nweights;
+
+ if (Rflag)
+ nweights = rnum;
+ else
+ nweights = fnum;
+ if (pos > bufend - 8)
+ return (NULL);
+ /*
+ * or_sign sets the sort direction:
+ * (-r: +/-)(sign: +/-)(expsign: +/-)
+ */
+ or_sign = sign ^ expsign ^ Rflag;
+ blancmange(line);
+ if (*line == '-') { /* set the sign */
+ or_sign ^= 1;
+ sign = 0;
+ line++;
+ }
+ /* eat initial zeroes */
+ for (; *line == '0' && line < lineend; line++)
+ ;
+ /* calculate exponents < 0 */
+ if (*line == DECIMAL) {
+ exponent = 1;
+ while (*++line == '0' && line < lineend)
+ exponent++;
+ expincr = 0;
+ expsign = 0;
+ }
+ /* next character better be a digit */
+ if (*line < '1' || *line > '9' || line >= lineend) {
+ *pos++ = nweights[127];
+ return (pos);
+ }
+ if (expincr) {
+ for (tline = line-1; *++tline >= '0' &&
+ *tline <= '9' && tline < lineend;)
+ exponent++;
+ }
+ if (exponent > 567) {
+ *pos++ = nweights[sign ? (expsign ? 254 : 128)
+ : (expsign ? 0 : 126)];
+ warnx("exponent out of bounds");
+ return (pos);
+ }
+ bite = min(exponent, 61);
+ *pos++ = nweights[(sign) ? (expsign ? 189+bite : 189-bite)
+ : (expsign ? 64-bite : 64+bite)];
+ if (bite >= 61) {
+ do {
+ exponent -= bite;
+ bite = min(exponent, 254);
+ *pos++ = nweights[or_sign ? 254-bite : bite];
+ } while (bite == 254);
+ }
+ C_TENS = or_sign ? OFF_NTENS : OFF_TENS;
+ for (; line < lineend; line++) {
+ if (*line >= '0' && *line <= '9') {
+ if (parity) {
+ *pos++ = C_TENS[lastvalue] + (or_sign ? - *line
+ : *line);
+ if (pos == bufend)
+ return (NULL);
+ if (*line != '0' || lastvalue != '0')
+ nonzero = pos;
+ } else
+ lastvalue = *line;
+ parity ^= 1;
+ } else if(*line == DECIMAL) {
+ if(!expincr) /* a decimal already occurred once */
+ break;
+ expincr = 0;
+ } else
+ break;
+ }
+ if (parity && lastvalue != '0') {
+ *pos++ = or_sign ? OFF_NTENS[lastvalue] - '0' :
+ OFF_TENS[lastvalue] + '0';
+ } else
+ pos = nonzero;
+ if (pos > bufend-1)
+ return (NULL);
+ *pos++ = or_sign ? nweights[254] : nweights[0];
+ return (pos);
+}
+
+/* This forces a gap around the record delimiter
+ * Thus fnum has vaues over (0,254) -> ((0,REC_D-1),(REC_D+1,255));
+ * rnum over (0,254) -> (255,REC_D+1),(REC_D-1,0))
+ */
+void
+num_init()
+{
+ int i;
+ TENS[0] = REC_D <=128 ? 130 - '0' : 2 - '0';
+ NEGTENS[0] = REC_D <=128 ? 126 + '0' : 254 + '0';
+ OFF_TENS = TENS - '0';
+ OFF_NTENS = NEGTENS - '0';
+ for (i = 1; i < 10; i++) {
+ TENS[i] = TENS[i - 1] + 10;
+ NEGTENS[i] = NEGTENS[i - 1] - 10;
+ }
+ for (i = 0; i < REC_D; i++) {
+ fnum[i] = i;
+ rnum[255 - i] = i;
+ }
+ for (i = REC_D; i <255; i++) {
+ fnum[i] = i + 1;
+ rnum[255 - i] = i - 1;
+ }
+}
diff --git a/contrib/sort/files.c b/contrib/sort/files.c
new file mode 100644
index 000000000000..7c7a58b5ee90
--- /dev/null
+++ b/contrib/sort/files.c
@@ -0,0 +1,372 @@
+/* $NetBSD: files.c,v 1.17 2001/05/15 11:18:23 jdolecek Exp $ */
+
+/*-
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Peter McIlroy.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "sort.h"
+#include "fsort.h"
+
+#ifndef lint
+__RCSID("$NetBSD: files.c,v 1.17 2001/05/15 11:18:23 jdolecek Exp $");
+__SCCSID("@(#)files.c 8.1 (Berkeley) 6/6/93");
+#endif /* not lint */
+
+#include <string.h>
+
+static int seq __P((FILE *, DBT *, DBT *));
+
+/*
+ * this is the subroutine for file management for fsort().
+ * It keeps the buffers for all temporary files.
+ */
+int
+getnext(binno, infl0, filelist, nfiles, pos, end, dummy)
+ int binno, infl0;
+ struct filelist *filelist;
+ int nfiles;
+ RECHEADER *pos;
+ u_char *end;
+ struct field *dummy;
+{
+ int i;
+ u_char *hp;
+ static size_t nleft = 0;
+ static int cnt = 0, flag = -1;
+ static u_char maxb = 0;
+ static FILE *fp;
+
+ if (nleft == 0) {
+ if (binno < 0) /* reset files. */ {
+ for (i = 0; i < nfiles; i++) {
+ rewind(fstack[infl0 + i].fp);
+ fstack[infl0 + i].max_o = 0;
+ }
+ flag = -1;
+ nleft = cnt = 0;
+ return (-1);
+ }
+ maxb = fstack[infl0].maxb;
+ for (; nleft == 0; cnt++) {
+ if (cnt >= nfiles) {
+ cnt = 0;
+ return (EOF);
+ }
+ fp = fstack[infl0 + cnt].fp;
+ fread(&nleft, sizeof(nleft), 1, fp);
+ if (binno < maxb)
+ fstack[infl0+cnt].max_o
+ += sizeof(nleft) + nleft;
+ else if (binno == maxb) {
+ if (binno != fstack[infl0].lastb) {
+ fseek(fp, fstack[infl0+
+ cnt].max_o, SEEK_SET);
+ fread(&nleft, sizeof(nleft), 1, fp);
+ }
+ if (nleft == 0)
+ fclose(fp);
+ } else if (binno == maxb + 1) { /* skip a bin */
+ fseek(fp, nleft, SEEK_CUR);
+ fread(&nleft, sizeof(nleft), 1, fp);
+ flag = cnt;
+ }
+ }
+ }
+ if ((u_char *) pos > end - sizeof(TRECHEADER))
+ return (BUFFEND);
+ fread(pos, sizeof(TRECHEADER), 1, fp);
+ if (end - pos->data < pos->length) {
+ hp = ((u_char *)pos) + sizeof(TRECHEADER);
+ for (i = sizeof(TRECHEADER); i ; i--)
+ ungetc(*--hp, fp);
+ return (BUFFEND);
+ }
+ fread(pos->data, pos->length, 1, fp);
+ nleft -= pos->length + sizeof(TRECHEADER);
+ if (nleft == 0 && binno == fstack[infl0].maxb)
+ fclose(fp);
+ return (0);
+}
+
+/*
+ * this is called when there is no special key. It's only called
+ * in the first fsort pass.
+ */
+int
+makeline(flno, top, filelist, nfiles, recbuf, bufend, dummy2)
+ int flno, top;
+ struct filelist *filelist;
+ int nfiles;
+ RECHEADER *recbuf;
+ u_char *bufend;
+ struct field *dummy2;
+{
+ static u_char *obufend;
+ static size_t osz;
+ char *pos;
+ static int filenum = 0, overflow = 0;
+ static FILE *fp = 0;
+ int c;
+
+ pos = (char *) recbuf->data;
+ if (overflow) {
+ /*
+ * Buffer shortage is solved by either of two ways:
+ * o flush previous buffered data and start using the
+ * buffer from start (see fsort())
+ * o realloc buffer and bump bufend
+ *
+ * The former is preferred, realloc is only done when
+ * there is exactly one item in buffer which does not fit.
+ */
+ if (bufend == obufend)
+ memmove(pos, bufend - osz, osz);
+
+ pos += osz;
+ overflow = 0;
+ }
+ for (;;) {
+ if (flno >= 0 && (fp = fstack[flno].fp) == NULL)
+ return (EOF);
+ else if (fp == NULL) {
+ if (filenum >= nfiles)
+ return (EOF);
+ if (!(fp = fopen(filelist->names[filenum], "r")))
+ err(2, "%s", filelist->names[filenum]);
+ filenum++;
+ }
+ while ((pos < (char *)bufend) && ((c = getc(fp)) != EOF)) {
+ if ((*pos++ = c) == REC_D) {
+ recbuf->offset = 0;
+ recbuf->length = pos - (char *) recbuf->data;
+ return (0);
+ }
+ }
+ if (pos >= (char *)bufend) {
+ if (recbuf->data < bufend) {
+ overflow = 1;
+ obufend = bufend;
+ osz = (pos - (char *) recbuf->data);
+ }
+ return (BUFFEND);
+ } else if (c == EOF) {
+ if (recbuf->data != (u_char *) pos) {
+ *pos++ = REC_D;
+ recbuf->offset = 0;
+ recbuf->length = pos - (char *) recbuf->data;
+ return (0);
+ }
+ FCLOSE(fp);
+ fp = 0;
+ if (flno >= 0)
+ fstack[flno].fp = 0;
+ } else {
+
+ warnx("makeline: line too long: ignoring '%.100s...'", recbuf->data);
+
+ /* Consume the rest of line from input */
+ while((c = getc(fp)) != REC_D && c != EOF)
+ ;
+
+ recbuf->offset = 0;
+ recbuf->length = 0;
+
+ return (BUFFEND);
+ }
+ }
+}
+
+/*
+ * This generates keys. It's only called in the first fsort pass
+ */
+int
+makekey(flno, top, filelist, nfiles, recbuf, bufend, ftbl)
+ int flno, top;
+ struct filelist *filelist;
+ int nfiles;
+ RECHEADER *recbuf;
+ u_char *bufend;
+ struct field *ftbl;
+{
+ static int filenum = 0;
+ static FILE *dbdesc = 0;
+ static DBT dbkey[1], line[1];
+ static int overflow = 0;
+ int c;
+
+ if (overflow) {
+ overflow = enterkey(recbuf, line, bufend - (u_char *)recbuf,
+ ftbl);
+ if (overflow)
+ return (BUFFEND);
+ else
+ return (0);
+ }
+
+ for (;;) {
+ if (flno >= 0) {
+ if (!(dbdesc = fstack[flno].fp))
+ return (EOF);
+ } else if (!dbdesc) {
+ if (filenum >= nfiles)
+ return (EOF);
+ dbdesc = fopen(filelist->names[filenum], "r");
+ if (!dbdesc)
+ err(2, "%s", filelist->names[filenum]);
+ filenum++;
+ }
+ if (!(c = seq(dbdesc, line, dbkey))) {
+ if ((signed)line->size > bufend - recbuf->data) {
+ overflow = 1;
+ } else {
+ overflow = enterkey(recbuf, line,
+ bufend - (u_char *) recbuf, ftbl);
+ }
+ if (overflow)
+ return (BUFFEND);
+ else
+ return (0);
+ }
+ if (c == EOF) {
+ FCLOSE(dbdesc);
+ dbdesc = 0;
+ if (flno >= 0)
+ fstack[flno].fp = 0;
+ } else {
+ ((char *) line->data)[60] = '\000';
+ warnx("makekey: line too long: ignoring %.100s...",
+ (char *)line->data);
+ }
+ }
+}
+
+/*
+ * get a key/line pair from fp
+ */
+static int
+seq(fp, line, key)
+ FILE *fp;
+ DBT *key, *line;
+{
+ static char *buf, flag = 1;
+ char *end, *pos;
+ int c;
+
+ if (flag) {
+ flag = 0;
+ buf = (char *) linebuf;
+ end = buf + linebuf_size;
+ line->data = buf;
+ }
+ pos = buf;
+ while ((c = getc(fp)) != EOF) {
+ if ((*pos++ = c) == REC_D) {
+ line->size = pos - buf;
+ return (0);
+ }
+ if (pos == end) {
+ linebuf_size *= 2;
+ linebuf = realloc(linebuf, linebuf_size);
+ if (!linebuf)
+ err(2, "realloc of linebuf to %lu bytes failed",
+ (unsigned long)linebuf_size);
+
+ end = linebuf + linebuf_size;
+ pos = linebuf + (pos - buf);
+ line->data = buf = (char *)linebuf;
+ continue;
+ }
+ }
+ if (pos != buf) {
+ *pos++ = REC_D;
+ line->size = pos - buf;
+ return (0);
+ } else
+ return (EOF);
+}
+
+/*
+ * write a key/line pair to a temporary file
+ */
+void
+putrec(rec, fp)
+ const RECHEADER *rec;
+ FILE *fp;
+{
+ EWRITE(rec, 1, rec->length + sizeof(TRECHEADER), fp);
+}
+
+/*
+ * write a line to output
+ */
+void
+putline(rec, fp)
+ const RECHEADER *rec;
+ FILE *fp;
+{
+ EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fp);
+}
+
+/*
+ * get a record from a temporary file. (Used by merge sort.)
+ */
+int
+geteasy(flno, top, filelist, nfiles, rec, end, dummy2)
+ int flno, top;
+ struct filelist *filelist;
+ int nfiles;
+ RECHEADER *rec;
+ u_char *end;
+ struct field *dummy2;
+{
+ int i;
+ FILE *fp;
+
+ fp = fstack[flno].fp;
+ if ((u_char *) rec > end - sizeof(TRECHEADER))
+ return (BUFFEND);
+ if (!fread(rec, 1, sizeof(TRECHEADER), fp)) {
+ fclose(fp);
+ fstack[flno].fp = 0;
+ return (EOF);
+ }
+ if (end - rec->data < rec->length) {
+ for (i = sizeof(TRECHEADER) - 1; i >= 0; i--)
+ ungetc(*((char *) rec + i), fp);
+ return (BUFFEND);
+ }
+ fread(rec->data, rec->length, 1, fp);
+ return (0);
+}
diff --git a/contrib/sort/fsort.c b/contrib/sort/fsort.c
new file mode 100644
index 000000000000..0847a5686e7f
--- /dev/null
+++ b/contrib/sort/fsort.c
@@ -0,0 +1,358 @@
+/* $NetBSD: fsort.c,v 1.20 2001/05/15 11:49:25 jdolecek Exp $ */
+
+/*-
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Peter McIlroy.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Read in the next bin. If it fits in one segment sort it;
+ * otherwise refine it by segment deeper by one character,
+ * and try again on smaller bins. Sort the final bin at this level
+ * of recursion to keep the head of fstack at 0.
+ * After PANIC passes, abort to merge sort.
+ */
+#include "sort.h"
+#include "fsort.h"
+
+#ifndef lint
+__RCSID("$NetBSD: fsort.c,v 1.20 2001/05/15 11:49:25 jdolecek Exp $");
+__SCCSID("@(#)fsort.c 8.1 (Berkeley) 6/6/93");
+#endif /* not lint */
+
+#include <stdlib.h>
+#include <string.h>
+
+static const u_char **keylist = 0;
+u_char *buffer = 0, *linebuf = 0;
+size_t bufsize = DEFBUFSIZE;
+size_t linebuf_size;
+struct tempfile fstack[MAXFCT];
+extern char *toutpath;
+#define FSORTMAX 4
+int PANIC = FSORTMAX;
+
+#define MSTART (MAXFCT - MERGE_FNUM)
+#define SALIGN(n) ((n+sizeof(length_t)-1) & ~(sizeof(length_t)-1))
+
+void
+fsort(binno, depth, top, filelist, nfiles, outfp, ftbl)
+ int binno, depth, top;
+ struct filelist *filelist;
+ int nfiles;
+ FILE *outfp;
+ struct field *ftbl;
+{
+ const u_char **keypos;
+ u_char *bufend, *tmpbuf;
+ u_char *weights;
+ int ntfiles, mfct = 0, total, i, maxb, lastb, panic = 0;
+ int c, nelem, base;
+ long sizes [NBINS+1];
+ get_func_t get;
+ struct recheader *crec;
+ struct field tfield[2];
+ FILE *prevfp, *tailfp[FSORTMAX+1];
+
+ memset(tailfp, 0, sizeof(tailfp));
+ prevfp = outfp;
+ memset(tfield, 0, sizeof(tfield));
+ if (ftbl[0].flags & R)
+ tfield[0].weights = Rascii;
+ else
+ tfield[0].weights = ascii;
+ tfield[0].icol.num = 1;
+ weights = ftbl[0].weights;
+ if (!buffer) {
+ buffer = malloc(bufsize);
+ keylist = malloc(MAXNUM * sizeof(u_char *));
+ memset(keylist, 0, MAXNUM * sizeof(u_char *));
+ if (!SINGL_FLD) {
+ linebuf_size = DEFLLEN;
+ if ((linebuf = malloc(linebuf_size)) == NULL)
+ errx(2, "cannot allocate memory");
+ }
+ }
+ bufend = buffer + bufsize;
+ if (binno >= 0) {
+ base = top + nfiles;
+ get = getnext;
+ } else {
+ base = 0;
+ if (SINGL_FLD)
+ get = makeline;
+ else
+ get = makekey;
+ }
+ for (;;) {
+ memset(sizes, 0, sizeof(sizes));
+ c = ntfiles = 0;
+ if (binno == weights[REC_D] &&
+ !(SINGL_FLD && ftbl[0].flags & F)) { /* pop */
+ rd_append(weights[REC_D], top,
+ nfiles, prevfp, buffer, bufend);
+ break;
+ } else if (binno == weights[REC_D]) {
+ depth = 0; /* start over on flat weights */
+ ftbl = tfield;
+ weights = ftbl[0].weights;
+ }
+ while (c != EOF) {
+ keypos = keylist;
+ nelem = 0;
+ crec = (RECHEADER *) buffer;
+
+ do_read:
+ while((c = get(binno, top, filelist, nfiles, crec,
+ bufend, ftbl)) == 0) {
+ *keypos++ = crec->data + depth;
+ if (++nelem == MAXNUM) {
+ c = BUFFEND;
+ break;
+ }
+ crec =(RECHEADER *) ((char *) crec +
+ SALIGN(crec->length) + sizeof(TRECHEADER));
+ }
+
+ if (c == BUFFEND && nelem < MAXNUM
+ && bufsize < MAXBUFSIZE) {
+ const u_char **keyp;
+ u_char *oldb = buffer;
+
+ /* buffer was too small for data, allocate
+ * bigger buffer */
+ bufsize *= 2;
+ buffer = realloc(buffer, bufsize);
+ if (!buffer) {
+ err(2, "failed to realloc buffer to %ld bytes",
+ (unsigned long) bufsize);
+ }
+ bufend = buffer + bufsize;
+
+ /* patch up keylist[] */
+ for(keyp = &keypos[-1]; keyp >= keylist; keyp--)
+ *keyp = buffer + (*keyp - oldb);
+
+ crec = (RECHEADER *) (buffer + ((u_char *)crec - oldb));
+ goto do_read;
+ }
+
+ if (c != BUFFEND && !ntfiles && !mfct) {
+ /* do not push */
+ continue;
+ }
+
+ /* push */
+ if (panic >= PANIC) {
+ fstack[MSTART + mfct].fp = ftmp();
+ if ((stable_sort)
+ ? sradixsort(keylist, nelem,
+ weights, REC_D)
+ : radixsort(keylist, nelem,
+ weights, REC_D) )
+ err(2, NULL);
+ append(keylist, nelem, depth,
+ fstack[MSTART + mfct].fp, putrec,
+ ftbl);
+ mfct++;
+ /* reduce number of open files */
+ if (mfct == MERGE_FNUM ||(c == EOF && ntfiles)) {
+ /*
+ * Only copy extra incomplete crec
+ * data if there are any.
+ */
+ int nodata = (bufend >= (u_char *)crec
+ && bufend <= crec->data);
+
+ if (!nodata) {
+ tmpbuf = malloc(bufend -
+ crec->data);
+ memmove(tmpbuf, crec->data,
+ bufend - crec->data);
+ }
+
+ fstack[base + ntfiles].fp = ftmp();
+ fmerge(0, MSTART, filelist,
+ mfct, geteasy, fstack[base].fp,
+ putrec, ftbl);
+ ntfiles++;
+ mfct = 0;
+
+ if (!nodata) {
+ memmove(crec->data, tmpbuf,
+ bufend - crec->data);
+ free(tmpbuf);
+ }
+ }
+ } else {
+ fstack[base + ntfiles].fp= ftmp();
+ onepass(keylist, depth, nelem, sizes,
+ weights, fstack[base + ntfiles].fp);
+ ntfiles++;
+ }
+ }
+ if (!ntfiles && !mfct) { /* everything in memory--pop */
+ if (nelem > 1
+ && ((stable_sort)
+ ? sradixsort(keylist, nelem, weights, REC_D)
+ : radixsort(keylist, nelem, weights, REC_D) ))
+ err(2, NULL);
+ if (nelem > 0)
+ append(keylist, nelem, depth, outfp, putline, ftbl);
+ break; /* pop */
+ }
+ if (panic >= PANIC) {
+ if (!ntfiles)
+ fmerge(0, MSTART, filelist, mfct, geteasy,
+ outfp, putline, ftbl);
+ else
+ fmerge(0, base, filelist, ntfiles, geteasy,
+ outfp, putline, ftbl);
+ break;
+
+ }
+ total = maxb = lastb = 0; /* find if one bin dominates */
+ for (i = 0; i < NBINS; i++)
+ if (sizes[i]) {
+ if (sizes[i] > sizes[maxb])
+ maxb = i;
+ lastb = i;
+ total += sizes[i];
+ }
+ if (sizes[maxb] < max((total / 2) , BUFSIZE))
+ maxb = lastb; /* otherwise pop after last bin */
+ fstack[base].lastb = lastb;
+ fstack[base].maxb = maxb;
+
+ /* start refining next level. */
+ getnext(-1, base, NULL, ntfiles, crec, bufend, 0); /* rewind */
+ for (i = 0; i < maxb; i++) {
+ if (!sizes[i]) /* bin empty; step ahead file offset */
+ getnext(i, base, NULL,ntfiles, crec, bufend, 0);
+ else
+ fsort(i, depth+1, base, filelist, ntfiles,
+ outfp, ftbl);
+ }
+
+ get = getnext;
+
+ if (lastb != maxb) {
+ if (prevfp != outfp)
+ tailfp[panic] = prevfp;
+ prevfp = ftmp();
+ for (i = maxb+1; i <= lastb; i++)
+ if (!sizes[i])
+ getnext(i, base, NULL, ntfiles, crec,
+ bufend,0);
+ else
+ fsort(i, depth+1, base, filelist,
+ ntfiles, prevfp, ftbl);
+ }
+
+ /* sort biggest (or last) bin at this level */
+ depth++;
+ panic++;
+ binno = maxb;
+ top = base;
+ nfiles = ntfiles; /* so overwrite them */
+ }
+ if (prevfp != outfp) {
+ concat(outfp, prevfp);
+ fclose(prevfp);
+ }
+ for (i = panic; i >= 0; --i)
+ if (tailfp[i]) {
+ concat(outfp, tailfp[i]);
+ fclose(tailfp[i]);
+ }
+
+ /* If on top level, free our structures */
+ if (depth == 0) {
+ free(keylist), keylist = NULL;
+ free(buffer), buffer = NULL;
+ }
+}
+
+/*
+ * This is one pass of radix exchange, dumping the bins to disk.
+ */
+#define swap(a, b, t) t = a, a = b, b = t
+void
+onepass(a, depth, n, sizes, tr, fp)
+ const u_char **a;
+ int depth;
+ long n, sizes[];
+ u_char *tr;
+ FILE *fp;
+{
+ size_t tsizes[NBINS+1];
+ const u_char **bin[257], ***bp, ***bpmax, **top[256], ***tp;
+ static int histo[256];
+ int *hp;
+ int c;
+ const u_char **an, *t, **aj;
+ const u_char **ak, *r;
+
+ memset(tsizes, 0, sizeof(tsizes));
+ depth += sizeof(TRECHEADER);
+ an = &a[n];
+ for (ak = a; ak < an; ak++) {
+ histo[c = tr[**ak]]++;
+ tsizes[c] += ((const RECHEADER *) (*ak -= depth))->length;
+ }
+
+ bin[0] = a;
+ bpmax = bin + 256;
+ tp = top, hp = histo;
+ for (bp = bin; bp < bpmax; bp++) {
+ *tp++ = *(bp+1) = *bp + (c = *hp);
+ *hp++ = 0;
+ if (c <= 1)
+ continue;
+ }
+ for (aj = a; aj < an; *aj = r, aj = bin[c+1])
+ for (r = *aj; aj < (ak = --top[c = tr[r[depth]]]) ;)
+ swap(*ak, r, t);
+
+ for (ak = a, c = 0; c < 256; c++) {
+ an = bin[c+1];
+ n = an - ak;
+ tsizes[c] += n * sizeof(TRECHEADER);
+ /* tell getnext how many elements in this bin, this segment. */
+ EWRITE(&tsizes[c], sizeof(size_t), 1, fp);
+ sizes[c] += tsizes[c];
+ for (; ak < an; ++ak)
+ putrec((const RECHEADER *) *ak, fp);
+ }
+}
diff --git a/contrib/sort/fsort.h b/contrib/sort/fsort.h
new file mode 100644
index 000000000000..41a755757cc5
--- /dev/null
+++ b/contrib/sort/fsort.h
@@ -0,0 +1,77 @@
+/* $NetBSD: fsort.h,v 1.9 2001/05/14 21:45:20 jdolecek Exp $ */
+
+/*-
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Peter McIlroy.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)fsort.h 8.1 (Berkeley) 6/6/93
+ */
+
+#define BUFSIZE (1<<20)
+#define MAXNUM 131072 /* low guess at average record count */
+#define BUFFEND (EOF-2)
+#define MAXFCT 1000
+#define DEFLLEN 65536
+
+/*
+ * Default (initial) and maximum size of record buffer for fsort().
+ * Note that no more than MAXNUM records are stored in the buffer,
+ * even if the buffer is not full yet.
+ */
+#define DEFBUFSIZE (1 << 20) /* 1MB */
+#define MAXBUFSIZE (8 << 20) /* 10 MB */
+
+/*
+ * Number of files merge() can merge in one pass.
+ * This should be power of two so that it's possible to use this value
+ * for rouding.
+ */
+#define MERGE_FNUM 16
+
+extern u_char *buffer, *linebuf;
+extern size_t bufsize, linebuf_size;
+
+/* temp files in the stack have a file descriptor, a largest bin (maxb)
+ * which becomes the last non-empty bin (lastb) when the actual largest
+ * bin is smaller than max(half the total file, BUFSIZE)
+ * Max_o is the offset of maxb so it can be sought after the other bins
+ * are sorted.
+*/
+struct tempfile {
+ FILE *fp;
+ u_char maxb;
+ u_char lastb;
+ int max_o;
+};
+extern struct tempfile fstack[MAXFCT];
diff --git a/contrib/sort/init.c b/contrib/sort/init.c
new file mode 100644
index 000000000000..3f5c068bdb01
--- /dev/null
+++ b/contrib/sort/init.c
@@ -0,0 +1,355 @@
+/* $NetBSD: init.c,v 1.6 2001/12/31 18:45:04 thorpej Exp $ */
+
+/*-
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Peter McIlroy.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "sort.h"
+
+#ifndef lint
+__RCSID("$NetBSD: init.c,v 1.6 2001/12/31 18:45:04 thorpej Exp $");
+__SCCSID("@(#)init.c 8.1 (Berkeley) 6/6/93");
+#endif /* not lint */
+
+#include <ctype.h>
+#include <string.h>
+
+static void insertcol __P((struct field *));
+static const char *setcolumn __P((const char *, struct field *, int));
+int setfield __P((const char *, struct field *, int));
+
+extern struct coldesc clist[(ND+1)*2];
+extern int ncols;
+u_char gweights[NBINS];
+
+/*
+ * masks of ignored characters. Alltable is 256 ones.
+ */
+static u_char alltable[NBINS], dtable[NBINS], itable[NBINS];
+
+/*
+ * clist (list of columns which correspond to one or more icol or tcol)
+ * is in increasing order of columns.
+ * Fields are kept in increasing order of fields.
+ */
+
+/*
+ * keep clist in order--inserts a column in a sorted array
+ */
+static void
+insertcol(field)
+ struct field *field;
+{
+ int i;
+ for (i = 0; i < ncols; i++)
+ if (field->icol.num <= clist[i].num)
+ break;
+ if (field->icol.num != clist[i].num) {
+ memmove(clist+i+1, clist+i, sizeof(COLDESC)*(ncols-i));
+ clist[i].num = field->icol.num;
+ ncols++;
+ }
+ if (field->tcol.num && field->tcol.num != field->icol.num) {
+ for (i = 0; i < ncols; i++)
+ if (field->tcol.num <= clist[i].num)
+ break;
+ if (field->tcol.num != clist[i].num) {
+ memmove(clist+i+1, clist+i,sizeof(COLDESC)*(ncols-i));
+ clist[i].num = field->tcol.num;
+ ncols++;
+ }
+ }
+}
+
+/*
+ * matches fields with the appropriate columns--n^2 but who cares?
+ */
+void
+fldreset(fldtab)
+ struct field *fldtab;
+{
+ int i;
+ fldtab[0].tcol.p = clist+ncols-1;
+ for (++fldtab; fldtab->icol.num; ++fldtab) {
+ for (i = 0; fldtab->icol.num != clist[i].num; i++)
+ ;
+ fldtab->icol.p = clist + i;
+ if (!fldtab->tcol.num)
+ continue;
+ for (i = 0; fldtab->tcol.num != clist[i].num; i++)
+ ;
+ fldtab->tcol.p = clist + i;
+ }
+}
+
+/*
+ * interprets a column in a -k field
+ */
+static const char *
+setcolumn(pos, cur_fld, gflag)
+ const char *pos;
+ struct field *cur_fld;
+ int gflag;
+{
+ struct column *col;
+ int tmp;
+ col = cur_fld->icol.num ? (&(*cur_fld).tcol) : (&(*cur_fld).icol);
+ pos += sscanf(pos, "%d", &(col->num));
+ while (isdigit(*pos))
+ pos++;
+ if (col->num <= 0 && !(col->num == 0 && col == &(cur_fld->tcol)))
+ errx(2, "field numbers must be positive");
+ if (*pos == '.') {
+ if (!col->num)
+ errx(2, "cannot indent end of line");
+ ++pos;
+ pos += sscanf(pos, "%d", &(col->indent));
+ while (isdigit(*pos))
+ pos++;
+ if (&cur_fld->icol == col)
+ col->indent--;
+ if (col->indent < 0)
+ errx(2, "illegal offset");
+ }
+ if (optval(*pos, cur_fld->tcol.num))
+ while ((tmp = optval(*pos, cur_fld->tcol.num))) {
+ cur_fld->flags |= tmp;
+ pos++;
+ }
+ if (cur_fld->icol.num == 0)
+ cur_fld->icol.num = 1;
+ return (pos);
+}
+
+int
+setfield(pos, cur_fld, gflag)
+ const char *pos;
+ struct field *cur_fld;
+ int gflag;
+{
+ static int nfields = 0;
+ int tmp;
+
+ if (++nfields == ND)
+ errx(2, "too many sort keys. (Limit is %d)", ND-1);
+
+ cur_fld->weights = ascii;
+ cur_fld->mask = alltable;
+
+ pos = setcolumn(pos, cur_fld, gflag);
+ if (*pos == '\0') /* key extends to EOL. */
+ cur_fld->tcol.num = 0;
+ else {
+ if (*pos != ',')
+ errx(2, "illegal field descriptor");
+ setcolumn((++pos), cur_fld, gflag);
+ }
+ if (!cur_fld->flags)
+ cur_fld->flags = gflag;
+ tmp = cur_fld->flags;
+
+ /*
+ * Assign appropriate mask table and weight table.
+ * If the global weights are reversed, the local field
+ * must be "re-reversed".
+ */
+ if (((tmp & R) ^ (gflag & R)) && (tmp & F))
+ cur_fld->weights = RFtable;
+ else if (tmp & F)
+ cur_fld->weights = Ftable;
+ else if ((tmp & R) ^ (gflag & R))
+ cur_fld->weights = Rascii;
+
+ if (tmp & I)
+ cur_fld->mask = itable;
+ else if (tmp & D)
+ cur_fld->mask = dtable;
+
+ cur_fld->flags |= (gflag & (BI | BT));
+ if (!cur_fld->tcol.indent) /* BT has no meaning at end of field */
+ cur_fld->flags &= ~BT;
+
+ if (cur_fld->tcol.num && !(!(cur_fld->flags & BI)
+ && cur_fld->flags & BT) && (cur_fld->tcol.num <= cur_fld->icol.num
+ && cur_fld->tcol.indent < cur_fld->icol.indent))
+ errx(2, "fields out of order");
+ insertcol(cur_fld);
+ return (cur_fld->tcol.num);
+}
+
+int
+optval(desc, tcolflag)
+ int desc, tcolflag;
+{
+ switch(desc) {
+ case 'b':
+ if (!tcolflag)
+ return (BI);
+ else
+ return (BT);
+ case 'd': return (D);
+ case 'f': return (F);
+ case 'i': return (I);
+ case 'n': return (N);
+ case 'r': return (R);
+ default: return (0);
+ }
+}
+
+void
+fixit(argc, argv)
+ int *argc;
+ char **argv;
+{
+ int i, j, v, w, x;
+ static char vbuf[ND*20], *vpos, *tpos;
+ vpos = vbuf;
+
+ for (i = 1; i < *argc; i++) {
+ if (argv[i][0] == '+') {
+ tpos = argv[i]+1;
+ argv[i] = vpos;
+ vpos += sprintf(vpos, "-k");
+ tpos += sscanf(tpos, "%d", &v);
+ while (isdigit(*tpos))
+ tpos++;
+ vpos += sprintf(vpos, "%d", v+1);
+ if (*tpos == '.') {
+ ++tpos;
+ tpos += sscanf(tpos, "%d", &x);
+ vpos += sprintf(vpos, ".%d", x+1);
+ }
+ while (*tpos)
+ *vpos++ = *tpos++;
+ vpos += sprintf(vpos, ",");
+ if (argv[i+1] &&
+ argv[i+1][0] == '-' && isdigit(argv[i+1][1])) {
+ tpos = argv[i+1] + 1;
+ tpos += sscanf(tpos, "%d", &w);
+ while (isdigit(*tpos))
+ tpos++;
+ x = 0;
+ if (*tpos == '.') {
+ ++tpos;
+ tpos += sscanf(tpos, "%d", &x);
+ while (isdigit(*tpos))
+ tpos++;
+ }
+ if (x) {
+ vpos += sprintf(vpos, "%d", w+1);
+ vpos += sprintf(vpos, ".%d", x);
+ } else
+ vpos += sprintf(vpos, "%d", w);
+ while (*tpos)
+ *vpos++ = *tpos++;
+ for (j= i+1; j < *argc; j++)
+ argv[j] = argv[j+1];
+ *argc -= 1;
+ }
+ }
+ }
+}
+
+/*
+ * ascii, Rascii, Ftable, and RFtable map
+ * REC_D -> REC_D; {not REC_D} -> {not REC_D}.
+ * gweights maps REC_D -> (0 or 255); {not REC_D} -> {not gweights[REC_D]}.
+ * Note: when sorting in forward order, to encode character zero in a key,
+ * use \001\001; character 1 becomes \001\002. In this case, character 0
+ * is reserved for the field delimiter. Analagously for -r (fld_d = 255).
+ * Note: this is only good for ASCII sorting. For different LC 's,
+ * all bets are off. See also num_init in number.c
+ */
+void
+settables(gflags)
+ int gflags;
+{
+ u_char *wts;
+ int i, incr;
+ for (i=0; i < 256; i++) {
+ ascii[i] = i;
+ if (i > REC_D && i < 255 - REC_D+1)
+ Rascii[i] = 255 - i + 1;
+ else
+ Rascii[i] = 255 - i;
+ if (islower(i)) {
+ Ftable[i] = Ftable[toupper(i)];
+ RFtable[i] = RFtable[toupper(i)];
+ } else if (REC_D>= 'A' && REC_D < 'Z' && i < 'a' && i > REC_D) {
+ Ftable[i] = i + 1;
+ RFtable[i] = Rascii[i] - 1;
+ } else {
+ Ftable[i] = i;
+ RFtable[i] = Rascii[i];
+ }
+ alltable[i] = 1;
+
+ if (i == '\n' || isprint(i))
+ itable[i] = 1;
+ else
+ itable[i] = 0;
+
+ if (i == '\n' || i == '\t' || i == ' ' || isalnum(i))
+ dtable[i] = 1;
+ else
+ dtable[i] = 0;
+ }
+
+ Rascii[REC_D] = RFtable[REC_D] = REC_D;
+ if (isupper(REC_D))
+ Ftable[tolower(REC_D)]++;
+
+ if ((gflags & R) && !((gflags & F) && SINGL_FLD))
+ wts = Rascii;
+ else if (!((gflags & F) && SINGL_FLD))
+ wts = ascii;
+ else if (gflags & R)
+ wts = RFtable;
+ else
+ wts = Ftable;
+
+ memmove(gweights, wts, sizeof(gweights));
+ incr = (gflags & R) ? -1 : 1;
+ for (i = 0; i < REC_D; i++)
+ gweights[i] += incr;
+ gweights[REC_D] = ((gflags & R) ? 255 : 0);
+ if (SINGL_FLD && (gflags & F)) {
+ for (i = 0; i < REC_D; i++) {
+ ascii[i] += incr;
+ Rascii[i] += incr;
+ }
+ ascii[REC_D] = Rascii[REC_D] = gweights[REC_D];
+ }
+}
diff --git a/contrib/sort/msort.c b/contrib/sort/msort.c
new file mode 100644
index 000000000000..8a2500cc35d2
--- /dev/null
+++ b/contrib/sort/msort.c
@@ -0,0 +1,390 @@
+/* $NetBSD: msort.c,v 1.10 2001/02/19 20:50:17 jdolecek Exp $ */
+
+/*-
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Peter McIlroy.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "sort.h"
+#include "fsort.h"
+
+#ifndef lint
+__RCSID("$NetBSD: msort.c,v 1.10 2001/02/19 20:50:17 jdolecek Exp $");
+__SCCSID("@(#)msort.c 8.1 (Berkeley) 6/6/93");
+#endif /* not lint */
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+/* Subroutines using comparisons: merge sort and check order */
+#define DELETE (1)
+
+typedef struct mfile {
+ u_char *end;
+ short flno;
+ struct recheader rec[1];
+} MFILE;
+
+static u_char *wts, *wts1 = NULL;
+
+static int cmp __P((RECHEADER *, RECHEADER *));
+static int insert __P((struct mfile **, struct mfile **, int, int));
+
+void
+fmerge(binno, top, filelist, nfiles, get, outfp, fput, ftbl)
+ int binno, top;
+ struct filelist *filelist;
+ int nfiles;
+ get_func_t get;
+ FILE *outfp;
+ put_func_t fput;
+ struct field *ftbl;
+{
+ FILE *tout;
+ int i, j, last;
+ put_func_t put;
+ struct tempfile *l_fstack;
+
+ wts = ftbl->weights;
+ if (!UNIQUE && SINGL_FLD && ftbl->flags & F)
+ wts1 = (ftbl->flags & R) ? Rascii : ascii;
+
+ if (!buffer) {
+ buffer = malloc(bufsize);
+ if (!buffer)
+ err(2, "fmerge(): realloc");
+
+ if (!linebuf && !SINGL_FLD) {
+ linebuf_size = DEFLLEN;
+ linebuf = malloc(linebuf_size);
+ }
+ }
+
+ if (binno >= 0)
+ l_fstack = fstack + top;
+ else
+ l_fstack = fstack;
+
+ while (nfiles) {
+ put = putrec;
+ for (j = 0; j < nfiles; j += MERGE_FNUM) {
+ if (nfiles <= MERGE_FNUM) {
+ tout = outfp;
+ put = fput;
+ }
+ else
+ tout = ftmp();
+ last = min(MERGE_FNUM, nfiles - j);
+ if (binno < 0) {
+ for (i = 0; i < last; i++)
+ if (!(l_fstack[i+MAXFCT-1-MERGE_FNUM].fp =
+ fopen(filelist->names[j+i], "r")))
+ err(2, "%s",
+ filelist->names[j+i]);
+ merge(MAXFCT-1-MERGE_FNUM, last, get, tout, put, ftbl);
+ } else {
+ for (i = 0; i< last; i++)
+ rewind(l_fstack[i+j].fp);
+ merge(top+j, last, get, tout, put, ftbl);
+ }
+ if (nfiles > MERGE_FNUM)
+ l_fstack[j/MERGE_FNUM].fp = tout;
+ }
+ nfiles = (nfiles + (MERGE_FNUM - 1)) / MERGE_FNUM;
+ if (nfiles == 1)
+ nfiles = 0;
+ if (binno < 0) {
+ binno = 0;
+ get = geteasy;
+ top = 0;
+ }
+ }
+}
+
+void
+merge(infl0, nfiles, get, outfp, put, ftbl)
+ int infl0, nfiles;
+ get_func_t get;
+ put_func_t put;
+ FILE *outfp;
+ struct field *ftbl;
+{
+ int c, i, j, nf = nfiles;
+ struct mfile *flist[MERGE_FNUM], *cfile;
+ size_t availsz = bufsize;
+ static void *bufs[MERGE_FNUM+1];
+ static size_t bufs_sz[MERGE_FNUM+1];
+
+ /*
+ * We need nfiles + 1 buffers. One is 'buffer', the
+ * rest needs to be allocated.
+ */
+ bufs[0] = buffer;
+ bufs_sz[0] = bufsize;
+ for(i=1; i < nfiles+1; i++) {
+ if (bufs[i])
+ continue;
+
+ bufs[i] = malloc(DEFLLEN);
+ if (!bufs[i])
+ err(2, "merge(): realloc");
+ bufs_sz[i] = DEFLLEN;
+ }
+
+ for (i = j = 0; i < nfiles; i++) {
+ cfile = (struct mfile *) bufs[j];
+ cfile->flno = infl0 + j;
+ cfile->end = (u_char *) bufs[j] + bufs_sz[j];
+ for (c = 1; c == 1;) {
+ if (EOF == (c = get(cfile->flno, 0, NULL, nfiles,
+ cfile->rec, cfile->end, ftbl))) {
+ --i;
+ --nfiles;
+ break;
+ }
+
+ if (c == BUFFEND) {
+ cfile = realloc(bufs[j], bufs_sz[j] *= 2);
+ bufs[j] = (void *) cfile;
+
+ if (!cfile)
+ err(2, "merge(): realloc");
+
+ cfile->end = (u_char *)cfile + bufs_sz[j];
+
+ c = 1;
+ continue;
+ }
+
+ if (i)
+ c = insert(flist, &cfile, i, !DELETE);
+ else
+ flist[0] = cfile;
+ }
+ j++;
+ }
+
+ cfile = (struct mfile *) bufs[nf];
+ cfile->flno = flist[0]->flno;
+ cfile->end = (u_char *) cfile + bufs_sz[nf];
+ while (nfiles) {
+ for (c = 1; c == 1;) {
+ if (EOF == (c = get(cfile->flno, 0, NULL, nfiles,
+ cfile->rec, cfile->end, ftbl))) {
+ put(flist[0]->rec, outfp);
+ memmove(flist, flist + 1,
+ sizeof(MFILE *) * (--nfiles));
+ cfile->flno = flist[0]->flno;
+ break;
+ }
+ if (c == BUFFEND) {
+ char *oldbuf = (char *) cfile;
+ availsz = (char *) cfile->end - oldbuf;
+ availsz *= 2;
+ cfile = realloc(oldbuf, availsz);
+ for(i=0; i < nf+1; i++) {
+ if (bufs[i] == oldbuf) {
+ bufs[i] = (char *)cfile;
+ bufs_sz[i] = availsz;
+ break;
+ }
+ }
+
+ if (!cfile)
+ err(2, "merge: realloc");
+
+ cfile->end = (u_char *)cfile + availsz;
+ c = 1;
+ continue;
+ }
+
+ if (!(c = insert(flist, &cfile, nfiles, DELETE)))
+ put(cfile->rec, outfp);
+ }
+ }
+
+ if (bufs_sz[0] > bufsize) {
+ buffer = bufs[0];
+ bufsize = bufs_sz[0];
+ }
+}
+
+/*
+ * if delete: inserts *rec in flist, deletes flist[0], and leaves it in *rec;
+ * otherwise just inserts *rec in flist.
+ */
+static int
+insert(flist, rec, ttop, delete)
+ struct mfile **flist, **rec;
+ int delete, ttop; /* delete = 0 or 1 */
+{
+ struct mfile *tmprec = *rec;
+ int mid, top = ttop, bot = 0, cmpv = 1;
+
+ for (mid = top/2; bot +1 != top; mid = (bot+top)/2) {
+ cmpv = cmp(tmprec->rec, flist[mid]->rec);
+ if (cmpv < 0)
+ top = mid;
+ else if (cmpv > 0)
+ bot = mid;
+ else {
+ if (UNIQUE)
+ break;
+
+ if (stable_sort) {
+ /*
+ * Apply sort by fileno, to give priority
+ * to earlier specified files, hence providing
+ * more stable sort.
+ * If fileno is same, the new record should
+ * be put _after_ the previous entry.
+ */
+ cmpv = tmprec->flno - flist[mid]->flno;
+ if (cmpv >= 0)
+ bot = mid;
+ else /* cmpv == 0 */
+ bot = mid - 1;
+ } else {
+ /* non-stable sort */
+ bot = mid - 1;
+ }
+
+ break;
+ }
+ }
+
+ if (delete) {
+ if (UNIQUE) {
+ if (!bot && cmpv)
+ cmpv = cmp(tmprec->rec, flist[0]->rec);
+ if (!cmpv)
+ return (1);
+ }
+ tmprec = flist[0];
+ if (bot)
+ memmove(flist, flist+1, bot * sizeof(MFILE **));
+ flist[bot] = *rec;
+ *rec = tmprec;
+ (*rec)->flno = flist[0]->flno;
+ return (0);
+ } else {
+ if (!bot && !(UNIQUE && !cmpv)) {
+ cmpv = cmp(tmprec->rec, flist[0]->rec);
+ if (cmpv < 0)
+ bot = -1;
+ }
+ if (UNIQUE && !cmpv)
+ return (1);
+ bot++;
+ memmove(flist + bot+1, flist + bot,
+ (ttop - bot) * sizeof(MFILE **));
+ flist[bot] = *rec;
+ return (0);
+ }
+}
+
+/*
+ * check order on one file
+ */
+void
+order(filelist, get, ftbl)
+ struct filelist *filelist;
+ get_func_t get;
+ struct field *ftbl;
+{
+ u_char *crec_end, *prec_end, *trec_end;
+ int c;
+ RECHEADER *crec, *prec, *trec;
+
+ if (!SINGL_FLD)
+ linebuf = malloc(DEFLLEN);
+ buffer = malloc(2 * (DEFLLEN + sizeof(TRECHEADER)));
+ crec = (RECHEADER *) buffer;
+ crec_end = buffer + DEFLLEN + sizeof(TRECHEADER);
+ prec = (RECHEADER *) (buffer + DEFLLEN + sizeof(TRECHEADER));
+ prec_end = buffer + 2*(DEFLLEN + sizeof(TRECHEADER));
+ wts = ftbl->weights;
+ if (SINGL_FLD && (ftbl->flags & F))
+ wts1 = (ftbl->flags & R) ? Rascii : ascii;
+ else
+ wts1 = NULL;
+ if (0 == get(-1, 0, filelist, 1, prec, prec_end, ftbl))
+ while (0 == get(-1, 0, filelist, 1, crec, crec_end, ftbl)) {
+ if (0 < (c = cmp(prec, crec))) {
+ crec->data[crec->length-1] = 0;
+ errx(1, "found disorder: %s", crec->data+crec->offset);
+ }
+ if (UNIQUE && !c) {
+ crec->data[crec->length-1] = 0;
+ errx(1, "found non-uniqueness: %s",
+ crec->data+crec->offset);
+ }
+ /*
+ * Swap pointers so that this record is on place pointed
+ * to by prec and new record is read to place pointed to by
+ * crec.
+ */
+ trec = prec;
+ prec = crec;
+ crec = trec;
+ trec_end = prec_end;
+ prec_end = crec_end;
+ crec_end = trec_end;
+ }
+ exit(0);
+}
+
+static int
+cmp(rec1, rec2)
+ RECHEADER *rec1, *rec2;
+{
+ int r;
+ u_char *pos1, *pos2, *end;
+ u_char *cwts;
+ for (cwts = wts; cwts; cwts = (cwts == wts1 ? NULL : wts1)) {
+ pos1 = rec1->data;
+ pos2 = rec2->data;
+ if (!SINGL_FLD && (UNIQUE || stable_sort))
+ end = pos1 + min(rec1->offset, rec2->offset);
+ else
+ end = pos1 + min(rec1->length, rec2->length);
+
+ for (; pos1 < end; ) {
+ if ((r = cwts[*pos1++] - cwts[*pos2++]))
+ return (r);
+ }
+ }
+ return (0);
+}
diff --git a/contrib/sort/pathnames.h b/contrib/sort/pathnames.h
new file mode 100644
index 000000000000..4f08e6351444
--- /dev/null
+++ b/contrib/sort/pathnames.h
@@ -0,0 +1,41 @@
+/* $NetBSD: pathnames.h,v 1.3 2000/10/07 20:37:06 bjh21 Exp $ */
+
+/*-
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Peter McIlroy.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)pathnames.h 8.1 (Berkeley) 6/6/93
+ */
+
+#define _PATH_STDIN "/dev/stdin"
diff --git a/contrib/sort/sort.1 b/contrib/sort/sort.1
new file mode 100644
index 000000000000..c4a782b84e35
--- /dev/null
+++ b/contrib/sort/sort.1
@@ -0,0 +1,441 @@
+.\" $NetBSD: sort.1,v 1.18 2002/02/08 01:36:33 ross Exp $
+.\"
+.\" Copyright (c) 1991, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" This code is derived from software contributed to Berkeley by
+.\" the Institute of Electrical and Electronics Engineers, Inc.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)sort.1 8.1 (Berkeley) 6/6/93
+.\"
+.Dd January 13, 2001
+.Dt SORT 1
+.Os
+.Sh NAME
+.Nm sort
+.Nd sort or merge text files
+.Sh SYNOPSIS
+.Nm sort
+.Op Fl cmubdfHinrsS
+.Op Fl t Ar char
+.Op Fl R Ar char
+.Oo
+.Fl k
+.Ar field1 Ns Op Li \&, Ns Ar field2
+.Oc
+.Op Fl T Ar dir
+.Op Fl o Ar output
+.Op Ar
+.Sh DESCRIPTION
+The
+.Nm
+utility sorts text files by lines.
+Comparisons are based on one or more sort keys extracted
+from each line of input, and are performed lexicographically.
+By default, if keys are not given,
+.Nm
+regards each input line as a single field.
+.Pp
+The following options are available:
+.Bl -tag -width Fl
+.It Fl c
+Check that the single input file is sorted.
+If the file is not sorted,
+.Nm
+produces the appropriate error messages and exits with code 1; otherwise,
+.Nm
+returns 0.
+.Nm
+.Fl c
+produces no output.
+.It Fl m
+Merge only; the input files are assumed to be pre-sorted.
+.It Fl o Ar output
+The argument given is the name of an
+.Ar output
+file to be used instead of the standard output.
+This file can be the same as one of the input files.
+.It Fl T Ar dir
+Use
+.Ar dir
+as the directory for temporary files.
+The default is the value specified in the environment variable
+.Ev TMPDIR or
+.Pa /tmp
+if
+.Ev TMPDIR
+is not defined.
+.It Fl u
+Unique: suppress all but one in each set of lines having equal keys.
+If used with the
+.Fl c
+option, check that there are no lines with duplicate keys.
+.El
+.Pp
+The following options override the default ordering rules.
+When ordering options appear independent of key field
+specifications, the requested field ordering rules are
+applied globally to all sort keys.
+When attached to a specific key (see
+.Fl k ) ,
+the ordering options override
+all global ordering options for that key.
+.Bl -tag -width Fl
+.It Fl d
+Only blank space and alphanumeric characters
+.\" according
+.\" to the current setting of LC_CTYPE
+are used
+in making comparisons.
+.It Fl f
+Considers all lowercase characters that have uppercase
+equivalents to be the same for purposes of comparison.
+.It Fl i
+Ignore all non-printable characters.
+.It Fl n
+An initial numeric string, consisting of optional blank space, optional
+minus sign, and zero or more digits (including decimal point)
+.\" with
+.\" optional radix character and thousands
+.\" separator
+.\" (as defined in the current locale),
+is sorted by arithmetic value.
+(The
+.Fl n
+option no longer implies the
+.Fl b
+option.)
+.It Fl r
+Reverse the sense of comparisons.
+.It Fl S
+Don't use stable sort.
+Default is to use stable sort.
+.It Fl s
+Use stable sort.
+This is the default.
+Provided for compatiblity with other
+.Nm
+implementations only.
+.It Fl H
+Use a merge sort instead of a radix sort.
+This option should be used for files larger than 60Mb.
+.El
+.Pp
+The treatment of field separators can be altered using these options:
+.Bl -tag -width Fl
+.It Fl b
+Ignores leading blank space when determining the start
+and end of a restricted sort key.
+A
+.Fl b
+option specified before the first
+.Fl k
+option applies globally to all
+.Fl k
+options.
+Otherwise, the
+.Fl b
+option can be attached independently to each
+.Ar field
+argument of the
+.Fl k
+option (see below).
+Note that the
+.Fl b
+option has no effect unless key fields are specified.
+.It Fl t Ar char
+.Ar char
+is used as the field separator character.
+The initial
+.Ar char
+is not considered to be part of a field when determining
+key offsets (see below).
+Each occurrence of
+.Ar char
+is significant (for example,
+.Dq Ar charchar
+delimits an empty field).
+If
+.Fl t
+is not specified, the default field separator is a sequence of
+blank-space characters, and consecutive blank spaces do
+.Em not
+delimit an empty field; further, the initial blank space
+.Em is
+considered part of a field when determining key offsets.
+.It Fl R Ar char
+.Ar char
+is used as the record separator character.
+This should be used with discretion;
+.Fl R Ar \*[Lt]alphanumeric\*[Gt]
+usually produces undesirable results.
+The default record separator is newline.
+.It Xo
+.Fl k
+.Ar field1 Ns Op Li \&, Ns Ar field2
+.Xc
+Designates the starting position,
+.Ar field1 ,
+and optional ending position,
+.Ar field2 ,
+of a key field.
+The
+.Fl k
+option replaces the obsolescent options
+.Cm \(pl Ns Ar pos1
+and
+.Fl Ns Ar pos2 .
+.El
+.Pp
+The following operands are available:
+.Bl -tag -width Ar
+.It Ar file
+The pathname of a file to be sorted, merged, or checked.
+If no
+.Ar file
+operands are specified, or if
+a
+.Ar file
+operand is
+.Fl ,
+the standard input is used.
+.El
+.Pp
+A field is defined as a minimal sequence of characters followed by a
+field separator or a newline character.
+By default, the first
+blank space of a sequence of blank spaces acts as the field separator.
+All blank spaces in a sequence of blank spaces are considered
+as part of the next field; for example, all blank spaces at
+the beginning of a line are considered to be part of the
+first field.
+.Pp
+Fields are specified
+by the
+.Fl k
+.Ar field1 Ns Op \&, Ns Ar field2
+argument.
+A missing
+.Ar field2
+argument defaults to the end of a line.
+.Pp
+The arguments
+.Ar field1
+and
+.Ar field2
+have the form
+.Ar m Ns Li \&. Ns Ar n
+and can be followed by one or more of the letters
+.Cm b , d , f , i ,
+.Cm n ,
+and
+.Cm r ,
+which correspond to the options discussed above.
+A
+.Ar field1
+position specified by
+.Ar m Ns Li \&. Ns Ar n
+.Pq Ar m , n No \*[Gt] 0
+is interpreted as the
+.Ar n Ns th
+character in the
+.Ar m Ns th
+field.
+A missing
+.Li \&. Ns Ar n
+in
+.Ar field1
+means
+.Ql \&.1 ,
+indicating the first character of the
+.Ar m Ns th
+field; if the
+.Fl b
+option is in effect,
+.Ar n
+is counted from the first non-blank character in the
+.Ar m Ns th
+field;
+.Ar m Ns Li \&.1b
+refers to the first non-blank character in the
+.Ar m Ns th
+field.
+.Pp
+A
+.Ar field2
+position specified by
+.Ar m Ns Li \&. Ns Ar n
+is interpreted as
+the
+.Ar n Ns th
+character (including separators) of the
+.Ar m Ns th
+field.
+A missing
+.Li \&. Ns Ar n
+indicates the last character of the
+.Ar m Ns th
+field;
+.Ar m
+= \&0
+designates the end of a line.
+Thus the option
+.Fl k
+.Sm off
+.Xo
+.Ar v Li \&. Ar x Li \&,
+.Ar w Li \&. Ar y
+.Xc
+.Sm on
+is synonymous with the obsolescent option
+.Sm off
+.Cm \(pl Ar v-\&1 Li \&. Ar x-\&1
+.Fl Ar w-\&1 Li \&. Ar y ;
+.Sm on
+when
+.Ar y
+is omitted,
+.Fl k
+.Sm off
+.Ar v Li \&. Ar x Li \&, Ar w
+.Sm on
+is synonymous with
+.Sm off
+.Cm \(pl Ar v-\&1 Li \&. Ar x-\&1
+.Fl Ar w+1 Li \&.0 .
+.Sm on
+The obsolescent
+.Cm \(pl Ns Ar pos1
+.Fl Ns Ar pos2
+option is still supported, except for
+.Fl Ns Ar w Ns Li \&.0b ,
+which has no
+.Fl k
+equivalent.
+.Sh RETURN VALUES
+Sort exits with one of the following values:
+.Bl -tag -width flag -compact
+.It 0
+Normal behavior.
+.It 1
+On disorder (or non-uniqueness) with the
+.Fl c
+option
+.It 2
+An error occurred.
+.El
+.Sh ENVIRONMENT
+If the following environment variable exists, it is utilized by
+.Nm "" .
+.Bl -tag -width Ev
+.It Ev TMPDIR
+.Nm
+uses the contents of the
+.Ev TMPDIR
+environment variable as the path in which to store
+temporary files.
+.El
+.Sh FILES
+.Bl -tag -width outputNUMBER+some -compact
+.It Pa /tmp/sort.*
+Default temporary files.
+.It Pa Ar output Ns NUMBER
+Temporary file which is used for output if
+.Ar output
+already exists.
+Once sorting is finished, this file replaces
+.Ar output
+(via
+.Xr link 2
+and
+.Xr unlink 2 ) .
+.El
+.Sh SEE ALSO
+.Xr comm 1 ,
+.Xr join 1 ,
+.Xr uniq 1 ,
+.Xr qsort 3 ,
+.Xr radixsort 3
+.Sh HISTORY
+A
+.Nm
+command appeared in
+.At v5 .
+This
+.Nm
+implementation appeared in
+.Bx 4.4
+and is used since
+.Nx 1.6 .
+.Sh BUGS
+To sort files larger than 60Mb, use
+.Nm
+.Fl H ;
+files larger than 704Mb must be sorted in smaller pieces, then merged.
+.Sh NOTES
+This
+.Nm
+has no limits on input line length (other than imposed by available
+memory) or any restrictions on bytes allowed within lines.
+.Pp
+To protect data
+.Nm
+.Fl o
+calls
+.Xr link 2
+and
+.Xr unlink 2 ,
+and thus fails on protected directories.
+.Pp
+Input files should be text files.
+If file doesn't end with record separator (which is typically newline), the
+.Nm
+utility silently supplies one.
+.Pp
+The current
+.Nm
+uses lexicographic radix sorting, which requires
+that sort keys be kept in memory (as opposed to previous versions which used quick
+and merge sorts and did not.)
+Thus performance depends highly on efficient choice of sort keys, and the
+.Fl b
+option and the
+.Ar field2
+argument of the
+.Fl k
+option should be used whenever possible.
+Similarly,
+.Nm
+.Fl k1f
+is equivalent to
+.Nm
+.Fl f
+and may take twice as long.
diff --git a/contrib/sort/sort.c b/contrib/sort/sort.c
new file mode 100644
index 000000000000..a119b7da218a
--- /dev/null
+++ b/contrib/sort/sort.c
@@ -0,0 +1,332 @@
+/* $NetBSD: sort.c,v 1.27 2001/05/14 21:52:21 jdolecek Exp $ */
+
+/*-
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Peter McIlroy.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* Sort sorts a file using an optional user-defined key.
+ * Sort uses radix sort for internal sorting, and allows
+ * a choice of merge sort and radix sort for external sorting.
+ */
+
+#include "sort.h"
+#include "fsort.h"
+#include "pathnames.h"
+
+#ifndef lint
+__COPYRIGHT("@(#) Copyright (c) 1993\n\
+ The Regents of the University of California. All rights reserved.\n");
+#endif /* not lint */
+
+#ifndef lint
+__RCSID("$NetBSD: sort.c,v 1.27 2001/05/14 21:52:21 jdolecek Exp $");
+__SCCSID("@(#)sort.c 8.1 (Berkeley) 6/6/93");
+#endif /* not lint */
+
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#include <paths.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <locale.h>
+
+int REC_D = '\n';
+u_char d_mask[NBINS]; /* flags for rec_d, field_d, <blank> */
+/*
+ * weight tables. Gweights is one of ascii, Rascii..
+ * modified to weight rec_d = 0 (or 255)
+ */
+u_char ascii[NBINS], Rascii[NBINS], RFtable[NBINS], Ftable[NBINS];
+int SINGL_FLD = 0, SEP_FLAG = 0, UNIQUE = 0;
+struct coldesc clist[(ND+1)*2];
+int ncols = 0;
+extern struct coldesc clist[(ND+1)*2];
+extern int ncols;
+
+/*
+ * Default to stable sort.
+ */
+int stable_sort = 1;
+
+char toutpath[MAXPATHLEN];
+
+const char *tmpdir; /* where temporary files should be put */
+
+static void cleanup __P((void));
+static void onsignal __P((int));
+static void usage __P((const char *));
+static void many_files __P((void));
+
+int main __P((int argc, char **argv));
+
+int
+main(argc, argv)
+ int argc;
+ char *argv[];
+{
+ get_func_t get;
+ int ch, i, stdinflag = 0, tmp = 0;
+ char cflag = 0, mflag = 0;
+ char *outfile, *outpath = 0;
+ struct field fldtab[ND+2], *ftpos;
+ struct filelist filelist;
+ FILE *outfp = NULL;
+
+ setlocale(LC_ALL, "");
+
+ memset(fldtab, 0, (ND+2)*sizeof(struct field));
+ memset(d_mask, 0, NBINS);
+ d_mask[REC_D = '\n'] = REC_D_F;
+ SINGL_FLD = SEP_FLAG = 0;
+ d_mask['\t'] = d_mask[' '] = BLANK | FLD_D;
+ ftpos = fldtab;
+ many_files();
+
+ fixit(&argc, argv);
+ if (!(tmpdir = getenv("TMPDIR")))
+ tmpdir = _PATH_TMP;
+
+ while ((ch = getopt(argc, argv, "bcdfik:mHno:rR:sSt:T:ux")) != -1) {
+ switch (ch) {
+ case 'b':
+ fldtab->flags |= BI | BT;
+ break;
+ case 'c':
+ cflag = 1;
+ break;
+ case 'd': case 'f': case 'i': case 'n': case 'r':
+ tmp |= optval(ch, 0);
+ if ((tmp & R) && (tmp & F))
+ fldtab->weights = RFtable;
+ else if (tmp & F)
+ fldtab->weights = Ftable;
+ else if (tmp & R)
+ fldtab->weights = Rascii;
+ fldtab->flags |= tmp;
+ break;
+ case 'H':
+ PANIC = 0;
+ break;
+ case 'k':
+ setfield(optarg, ++ftpos, fldtab->flags);
+ break;
+ case 'm':
+ mflag = 1;
+ break;
+ case 'o':
+ outpath = optarg;
+ break;
+ case 's':
+ /* for GNU sort compatibility (this is our default) */
+ stable_sort = 1;
+ break;
+ case 'S':
+ stable_sort = 0;
+ break;
+ case 't':
+ if (SEP_FLAG)
+ usage("multiple field delimiters");
+ SEP_FLAG = 1;
+ d_mask[' '] &= ~FLD_D;
+ d_mask['\t'] &= ~FLD_D;
+ d_mask[(u_char)*optarg] |= FLD_D;
+ if (d_mask[(u_char)*optarg] & REC_D_F)
+ errx(2, "record/field delimiter clash");
+ break;
+ case 'R':
+ if (REC_D != '\n')
+ usage("multiple record delimiters");
+ if ('\n' == (REC_D = *optarg))
+ break;
+ d_mask['\n'] = d_mask[' '];
+ d_mask[REC_D] = REC_D_F;
+ break;
+ case 'T':
+ /* -T tmpdir */
+ tmpdir = optarg;
+ break;
+ case 'u':
+ UNIQUE = 1;
+ break;
+ case '?':
+ default:
+ usage(NULL);
+ }
+ }
+ if (cflag && argc > optind+1)
+ errx(2, "too many input files for -c option");
+ if (argc - 2 > optind && !strcmp(argv[argc-2], "-o")) {
+ outpath = argv[argc-1];
+ argc -= 2;
+ }
+ if (mflag && argc - optind > (MAXFCT - (16+1))*16)
+ errx(2, "too many input files for -m option");
+ for (i = optind; i < argc; i++) {
+ /* allow one occurrence of /dev/stdin */
+ if (!strcmp(argv[i], "-") || !strcmp(argv[i], _PATH_STDIN)) {
+ if (stdinflag)
+ warnx("ignoring extra \"%s\" in file list",
+ argv[i]);
+ else
+ stdinflag = 1;
+
+ /* change to /dev/stdin if '-' */
+ if (argv[i][0] == '-')
+ argv[i] = _PATH_STDIN;
+
+ } else if ((ch = access(argv[i], R_OK)))
+ err(2, "%s", argv[i]);
+ }
+ if (!(fldtab->flags & (I|D|N) || fldtab[1].icol.num)) {
+ SINGL_FLD = 1;
+ fldtab[0].icol.num = 1;
+ } else {
+ if (!fldtab[1].icol.num) {
+ fldtab[0].flags &= ~(BI|BT);
+ setfield("1", ++ftpos, fldtab->flags);
+ }
+ fldreset(fldtab);
+ fldtab[0].flags &= ~F;
+ }
+ settables(fldtab[0].flags);
+ num_init();
+ fldtab->weights = gweights;
+ if (optind == argc) {
+ static const char * const names[] = { _PATH_STDIN, NULL };
+
+ filelist.names = names;
+ optind--;
+ } else
+ filelist.names = (const char * const *) &argv[optind];
+
+ if (SINGL_FLD)
+ get = makeline;
+ else
+ get = makekey;
+
+ if (cflag) {
+ order(&filelist, get, fldtab);
+ /* NOT REACHED */
+ }
+ if (!outpath) {
+ (void)snprintf(toutpath,
+ sizeof(toutpath), "%sstdout", _PATH_DEV);
+ outfile = outpath = toutpath;
+ outfp = stdout;
+ } else if (!(ch = access(outpath, 0)) &&
+ strncmp(_PATH_DEV, outpath, 5)) {
+ static const struct sigaction act =
+ { onsignal, {{0}}, SA_RESTART | SA_RESETHAND };
+ static const int sigtable[] = {SIGHUP, SIGINT, SIGPIPE,
+ SIGXCPU, SIGXFSZ, SIGVTALRM, SIGPROF, 0};
+ int outfd;
+ errno = 0;
+ if (access(outpath, W_OK))
+ err(2, "%s", outpath);
+ (void)snprintf(toutpath, sizeof(toutpath), "%sXXXXXX",
+ outpath);
+ if ((outfd = mkstemp(toutpath)) == -1)
+ err(2, "Cannot create temporary file `%s'", toutpath);
+ if ((outfp = fdopen(outfd, "w")) == NULL)
+ err(2, "Cannot open temporary file `%s'", toutpath);
+ outfile = toutpath;
+ (void)atexit(cleanup);
+ for (i = 0; sigtable[i]; ++i) /* always unlink toutpath */
+ sigaction(sigtable[i], &act, 0);
+ } else
+ outfile = outpath;
+
+ if (outfp == NULL && (outfp = fopen(outfile, "w")) == NULL)
+ err(2, "output file %s", outfile);
+
+ if (mflag) {
+ fmerge(-1, 0, &filelist, argc-optind, get, outfp, putline,
+ fldtab);
+ } else
+ fsort(-1, 0, 0, &filelist, argc-optind, outfp, fldtab);
+
+ if (outfile != outpath) {
+ if (access(outfile, 0))
+ err(2, "%s", outfile);
+ (void)unlink(outpath);
+ if (link(outfile, outpath))
+ err(2, "cannot link %s: output left in %s",
+ outpath, outfile);
+ (void)unlink(outfile);
+ }
+ exit(0);
+}
+
+static void
+onsignal(sig)
+ int sig;
+{
+ cleanup();
+}
+
+static void
+cleanup()
+{
+ if (toutpath[0])
+ (void)unlink(toutpath);
+}
+
+static void
+usage(msg)
+ const char *msg;
+{
+ if (msg != NULL)
+ (void)fprintf(stderr, "sort: %s\n", msg);
+ (void)fprintf(stderr, "usage: [-o output] [-cmubdfinrsS] [-t char] ");
+ (void)fprintf(stderr, "[-R char] [-k keydef] ... [files]\n");
+ exit(2);
+}
+
+static void
+many_files()
+{
+#if 0
+ struct rlimit rlp_many_files[1];
+
+ if (getrlimit(RLIMIT_NOFILE, rlp_many_files) == 0) {
+ rlp_many_files->rlim_cur = rlp_many_files->rlim_max;
+ setrlimit(RLIMIT_NOFILE, rlp_many_files);
+ }
+#endif
+}
diff --git a/contrib/sort/sort.h b/contrib/sort/sort.h
new file mode 100644
index 000000000000..a8f2c4b17180
--- /dev/null
+++ b/contrib/sort/sort.h
@@ -0,0 +1,149 @@
+/* $NetBSD: sort.h,v 1.12 2001/02/19 19:31:29 jdolecek Exp $ */
+
+/*-
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Peter McIlroy.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)sort.h 8.1 (Berkeley) 6/6/93
+ */
+
+#include <sys/param.h>
+
+#include <db.h>
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define NBINS 256
+#define MAXMERGE 16
+
+/* values for masks, weights, and other flags. */
+#define I 1 /* mask out non-printable characters */
+#define D 2 /* sort alphanumeric characters only */
+#define N 4 /* Field is a number */
+#define F 8 /* weight lower and upper case the same */
+#define R 16 /* Field is reversed with respect to the global weight */
+#define BI 32 /* ignore blanks in icol */
+#define BT 64 /* ignore blanks in tcol */
+
+/* masks for delimiters: blanks, fields, and termination. */
+#define BLANK 1 /* ' ', '\t'; '\n' if -T is invoked */
+#define FLD_D 2 /* ' ', '\t' default; from -t otherwise */
+#define REC_D_F 4 /* '\n' default; from -T otherwise */
+
+#define ND 10 /* limit on number of -k options. */
+
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#define max(a, b) ((a) > (b) ? (a) : (b))
+
+#define FCLOSE(file) { \
+ if (EOF == fclose(file)) \
+ err(2, "%p", file); \
+}
+
+#define EWRITE(ptr, size, n, f) { \
+ if (!fwrite(ptr, size, n, f)) \
+ err(2, NULL); \
+}
+
+/* length of record is currently limited to maximum string length (size_t) */
+typedef size_t length_t;
+
+/* a record is a key/line pair starting at rec.data. It has a total length
+ * and an offset to the start of the line half of the pair.
+ */
+typedef struct recheader {
+ length_t length;
+ length_t offset;
+ u_char data[1];
+} RECHEADER;
+
+typedef struct trecheader {
+ length_t length;
+ length_t offset;
+} TRECHEADER;
+
+/* This is the column as seen by struct field. It is used by enterfield.
+ * They are matched with corresponding coldescs during initialization.
+ */
+struct column {
+ struct coldesc *p;
+ int num;
+ int indent;
+};
+
+/* a coldesc has a number and pointers to the beginning and end of the
+ * corresponding column in the current line. This is determined in enterkey.
+ */
+typedef struct coldesc {
+ u_char *start;
+ u_char *end;
+ int num;
+} COLDESC;
+
+/* A field has an initial and final column; an omitted final column
+ * implies the end of the line. Flags regulate omission of blanks and
+ * numerical sorts; mask determines which characters are ignored (from -i, -d);
+ * weights determines the sort weights of a character (from -f, -r).
+ */
+struct field {
+ struct column icol;
+ struct column tcol;
+ u_int flags;
+ u_char *mask;
+ u_char *weights;
+};
+
+struct filelist {
+ const char * const * names;
+};
+
+typedef int (*get_func_t) __P((int, int, struct filelist *, int,
+ RECHEADER *, u_char *, struct field *));
+typedef void (*put_func_t) __P((const struct recheader *, FILE *));
+
+extern int PANIC; /* maximum depth of fsort before fmerge is called */
+extern u_char ascii[NBINS], Rascii[NBINS], Ftable[NBINS], RFtable[NBINS];
+extern u_char d_mask[NBINS];
+extern int SINGL_FLD, SEP_FLAG, UNIQUE;
+extern int REC_D;
+extern const char *tmpdir;
+extern int stable_sort;
+extern u_char gweights[NBINS];
+
+#include "extern.h"
diff --git a/contrib/sort/tmp.c b/contrib/sort/tmp.c
new file mode 100644
index 000000000000..d8476d641998
--- /dev/null
+++ b/contrib/sort/tmp.c
@@ -0,0 +1,84 @@
+/* $NetBSD: tmp.c,v 1.8 2001/02/23 08:59:49 jdolecek Exp $ */
+
+/*-
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Peter McIlroy.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <ctype.h>
+
+#ifndef lint
+__RCSID("$NetBSD: tmp.c,v 1.8 2001/02/23 08:59:49 jdolecek Exp $");
+__SCCSID("@(#)tmp.c 8.1 (Berkeley) 6/6/93");
+#endif /* not lint */
+
+#include <sys/param.h>
+
+#include <err.h>
+#include <errno.h>
+#include <limits.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "sort.h"
+#include "pathnames.h"
+
+#define _NAME_TMP "sort.XXXXXXXX"
+
+FILE *
+ftmp()
+{
+ sigset_t set, oset;
+ FILE *fp;
+ int fd;
+ char pathb[MAXPATHLEN], *path;
+
+ path = pathb;
+ (void)snprintf(path, sizeof(pathb), "%s%s%s", tmpdir,
+ (tmpdir[strlen(tmpdir)-1] != '/') ? "/" : "", _NAME_TMP);
+
+ sigfillset(&set);
+ (void)sigprocmask(SIG_BLOCK, &set, &oset);
+ if ((fd = mkstemp(path)) < 0)
+ err(2, "ftmp: mkstemp(\"%s\")", path);
+ if (!(fp = fdopen(fd, "w+")))
+ err(2, "ftmp: fdopen(\"%s\")", path);
+ (void)unlink(path);
+
+ (void)sigprocmask(SIG_SETMASK, &oset, NULL);
+ return (fp);
+}