aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorUlrich Spörlein <uqs@FreeBSD.org>2012-10-18 09:55:16 +0000
committerUlrich Spörlein <uqs@FreeBSD.org>2012-10-18 09:55:16 +0000
commit211d2d512b87f4d2be8961b75432e4f0d5206c9a (patch)
tree314af9fcc18a8f8023bbb995dd9ef62bad563089
downloadsrc-211d2d512b87f4d2be8961b75432e4f0d5206c9a.tar.gz
src-211d2d512b87f4d2be8961b75432e4f0d5206c9a.zip
Vendor import of mdocml 1.12.1vendor/mandoc/1.12.1
It is a suite of ISC licensed tools to compile and render mdoc/man pages and will replace groff for formatting manpages in the base system. http://mdocml.bsd.lv/
Notes
Notes: svn path=/vendor/mdocml/dist/; revision=241675 svn path=/vendor/mdocml/1.12.1/; revision=241676; tag=vendor/mandoc/1.12.1
-rw-r--r--Makefile645
-rw-r--r--TODO372
-rw-r--r--apropos.1328
-rw-r--r--apropos.c239
-rw-r--r--apropos_db.c876
-rw-r--r--apropos_db.h73
-rw-r--r--arch.c39
-rw-r--r--arch.in111
-rw-r--r--att.c39
-rw-r--r--att.in40
-rw-r--r--catman.8111
-rw-r--r--catman.c511
-rw-r--r--cgi.c1203
-rw-r--r--chars.c167
-rw-r--r--chars.in397
-rw-r--r--compat_fgetln.c93
-rw-r--r--compat_getsubopt.c104
-rw-r--r--compat_strlcat.c67
-rw-r--r--compat_strlcpy.c63
-rw-r--r--config.h.post42
-rw-r--r--config.h.pre8
-rw-r--r--demandoc.1109
-rw-r--r--demandoc.c257
-rw-r--r--eqn.7280
-rw-r--r--eqn.c949
-rw-r--r--eqn_html.c81
-rw-r--r--eqn_term.c76
-rw-r--r--example.style.css110
-rw-r--r--external.pngbin0 -> 165 bytes
-rw-r--r--html.c699
-rw-r--r--html.h164
-rw-r--r--index.css48
-rw-r--r--index.sgml364
-rw-r--r--lib.c39
-rw-r--r--lib.in99
-rw-r--r--libman.h85
-rw-r--r--libmandoc.h92
-rw-r--r--libmdoc.h141
-rw-r--r--libroff.h84
-rw-r--r--main.c401
-rw-r--r--main.h61
-rw-r--r--man-cgi.css13
-rw-r--r--man.7913
-rw-r--r--man.c690
-rw-r--r--man.cgi.7123
-rw-r--r--man.h113
-rw-r--r--man_hash.c107
-rw-r--r--man_html.c688
-rw-r--r--man_macro.c484
-rw-r--r--man_term.c1117
-rw-r--r--man_validate.c550
-rw-r--r--mandoc.1669
-rw-r--r--mandoc.3600
-rw-r--r--mandoc.c735
-rw-r--r--mandoc.h432
-rw-r--r--mandoc_char.7743
-rw-r--r--mandocdb.8293
-rw-r--r--mandocdb.c1909
-rw-r--r--mandocdb.h62
-rw-r--r--manpath.c225
-rw-r--r--manpath.h38
-rw-r--r--mdoc.73172
-rw-r--r--mdoc.c987
-rw-r--r--mdoc.h392
-rw-r--r--mdoc_argv.c716
-rw-r--r--mdoc_hash.c94
-rw-r--r--mdoc_html.c2284
-rw-r--r--mdoc_macro.c1787
-rw-r--r--mdoc_man.c637
-rw-r--r--mdoc_term.c2257
-rw-r--r--mdoc_validate.c2403
-rw-r--r--msec.c37
-rw-r--r--msec.in40
-rw-r--r--out.c303
-rw-r--r--out.h71
-rw-r--r--preconv.1158
-rw-r--r--preconv.c528
-rw-r--r--predefs.in65
-rw-r--r--read.c846
-rw-r--r--roff.7989
-rw-r--r--roff.c1768
-rw-r--r--st.c39
-rw-r--r--st.in78
-rw-r--r--style.css144
-rw-r--r--tbl.7348
-rw-r--r--tbl.c175
-rw-r--r--tbl_data.c276
-rw-r--r--tbl_html.c151
-rw-r--r--tbl_layout.c472
-rw-r--r--tbl_opts.c270
-rw-r--r--tbl_term.c444
-rw-r--r--term.c736
-rw-r--r--term.h128
-rw-r--r--term_ascii.c289
-rw-r--r--term_ps.c1185
-rw-r--r--test-fgetln.c11
-rw-r--r--test-getsubopt.c12
-rw-r--r--test-mmap.c10
-rw-r--r--test-strlcat.c8
-rw-r--r--test-strlcpy.c8
-rw-r--r--test-strptime.c13
-rw-r--r--tree.c349
-rw-r--r--vol.c39
-rw-r--r--vol.in35
-rw-r--r--whatis.1190
105 files changed, 45115 insertions, 0 deletions
diff --git a/Makefile b/Makefile
new file mode 100644
index 000000000000..304237b47827
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,645 @@
+.PHONY: clean install installwww
+.SUFFIXES: .sgml .html .md5 .h .h.html
+.SUFFIXES: .1 .3 .7 .8
+.SUFFIXES: .1.txt .3.txt .7.txt .8.txt
+.SUFFIXES: .1.pdf .3.pdf .7.pdf .8.pdf
+.SUFFIXES: .1.ps .3.ps .7.ps .8.ps
+.SUFFIXES: .1.html .3.html .7.html .8.html
+.SUFFIXES: .1.xhtml .3.xhtml .7.xhtml .8.xhtml
+
+# Specify this if you want to hard-code the operating system to appear
+# in the lower-left hand corner of -mdoc manuals.
+#
+# CFLAGS += -DOSNAME="\"OpenBSD 4.5\""
+
+VERSION = 1.12.1
+VDATE = 23 March 2012
+
+# IFF your system supports multi-byte functions (setlocale(), wcwidth(),
+# putwchar()) AND has __STDC_ISO_10646__ (that is, wchar_t is simply a
+# UCS-4 value) should you define USE_WCHAR. If you define it and your
+# system DOESN'T support this, -Tlocale will produce garbage.
+# If you don't define it, -Tlocale is a synonym for -Tacsii.
+#
+CFLAGS += -DUSE_WCHAR
+
+# If your system has manpath(1), uncomment this. This is most any
+# system that's not OpenBSD or NetBSD. If uncommented, apropos(1),
+# mandocdb(8), and man.cgi will popen(3) manpath(1) to get the MANPATH
+# variable.
+#CFLAGS += -DUSE_MANPATH
+
+# If your system supports static binaries only, uncomment this. This
+# appears only to be BSD UNIX systems (Mac OS X has no support and Linux
+# requires -pthreads for static libdb).
+STATIC = -static
+
+CFLAGS += -g -DHAVE_CONFIG_H -DVERSION="\"$(VERSION)\""
+CFLAGS += -W -Wall -Wstrict-prototypes -Wno-unused-parameter -Wwrite-strings
+PREFIX = /usr/local
+WWWPREFIX = /var/www
+HTDOCDIR = $(WWWPREFIX)/htdocs
+CGIBINDIR = $(WWWPREFIX)/cgi-bin
+BINDIR = $(PREFIX)/bin
+INCLUDEDIR = $(PREFIX)/include/mandoc
+LIBDIR = $(PREFIX)/lib/mandoc
+MANDIR = $(PREFIX)/man
+EXAMPLEDIR = $(PREFIX)/share/examples/mandoc
+INSTALL = install
+INSTALL_PROGRAM = $(INSTALL) -m 0755
+INSTALL_DATA = $(INSTALL) -m 0444
+INSTALL_LIB = $(INSTALL) -m 0644
+INSTALL_SOURCE = $(INSTALL) -m 0644
+INSTALL_MAN = $(INSTALL_DATA)
+
+# Non-BSD systems (Linux, etc.) need -ldb to compile mandocdb and
+# apropos.
+# However, if you don't have -ldb at all (or it's not native), then
+# comment out apropos and mandocdb.
+#
+#DBLIB = -ldb
+DBBIN = apropos mandocdb man.cgi catman whatis
+DBLN = llib-lapropos.ln llib-lmandocdb.ln llib-lman.cgi.ln llib-lcatman.ln
+
+all: mandoc preconv demandoc $(DBBIN)
+
+SRCS = Makefile \
+ TODO \
+ apropos.1 \
+ apropos.c \
+ apropos_db.c \
+ apropos_db.h \
+ arch.c \
+ arch.in \
+ att.c \
+ att.in \
+ catman.8 \
+ catman.c \
+ cgi.c \
+ chars.c \
+ chars.in \
+ compat_fgetln.c \
+ compat_getsubopt.c \
+ compat_strlcat.c \
+ compat_strlcpy.c \
+ config.h.post \
+ config.h.pre \
+ demandoc.1 \
+ demandoc.c \
+ eqn.7 \
+ eqn.c \
+ eqn_html.c \
+ eqn_term.c \
+ example.style.css \
+ external.png \
+ html.c \
+ html.h \
+ index.css \
+ index.sgml \
+ lib.c \
+ lib.in \
+ libman.h \
+ libmandoc.h \
+ libmdoc.h \
+ libroff.h \
+ main.c \
+ main.h \
+ man.7 \
+ man.c \
+ man.cgi.7 \
+ man-cgi.css \
+ man.h \
+ man_hash.c \
+ man_html.c \
+ man_macro.c \
+ man_term.c \
+ man_validate.c \
+ mandoc.1 \
+ mandoc.3 \
+ mandoc.c \
+ mandoc.h \
+ mandoc_char.7 \
+ mandocdb.8 \
+ mandocdb.c \
+ mandocdb.h \
+ manpath.c \
+ manpath.h \
+ mdoc.7 \
+ mdoc.c \
+ mdoc.h \
+ mdoc_argv.c \
+ mdoc_hash.c \
+ mdoc_html.c \
+ mdoc_macro.c \
+ mdoc_man.c \
+ mdoc_term.c \
+ mdoc_validate.c \
+ msec.c \
+ msec.in \
+ out.c \
+ out.h \
+ preconv.1 \
+ preconv.c \
+ predefs.in \
+ read.c \
+ roff.7 \
+ roff.c \
+ st.c \
+ st.in \
+ style.css \
+ tbl.7 \
+ tbl.c \
+ tbl_data.c \
+ tbl_html.c \
+ tbl_layout.c \
+ tbl_opts.c \
+ tbl_term.c \
+ term.c \
+ term.h \
+ term_ascii.c \
+ term_ps.c \
+ test-fgetln.c \
+ test-getsubopt.c \
+ test-mmap.c \
+ test-strlcat.c \
+ test-strlcpy.c \
+ test-strptime.c \
+ tree.c \
+ vol.c \
+ vol.in \
+ whatis.1
+
+LIBMAN_OBJS = man.o \
+ man_hash.o \
+ man_macro.o \
+ man_validate.o
+LIBMAN_LNS = man.ln \
+ man_hash.ln \
+ man_macro.ln \
+ man_validate.ln
+
+LIBMDOC_OBJS = arch.o \
+ att.o \
+ lib.o \
+ mdoc.o \
+ mdoc_argv.o \
+ mdoc_hash.o \
+ mdoc_macro.o \
+ mdoc_validate.o \
+ st.o \
+ vol.o
+LIBMDOC_LNS = arch.ln \
+ att.ln \
+ lib.ln \
+ mdoc.ln \
+ mdoc_argv.ln \
+ mdoc_hash.ln \
+ mdoc_macro.ln \
+ mdoc_validate.ln \
+ st.ln \
+ vol.ln
+
+LIBROFF_OBJS = eqn.o \
+ roff.o \
+ tbl.o \
+ tbl_data.o \
+ tbl_layout.o \
+ tbl_opts.o
+LIBROFF_LNS = eqn.ln \
+ roff.ln \
+ tbl.ln \
+ tbl_data.ln \
+ tbl_layout.ln \
+ tbl_opts.ln
+
+LIBMANDOC_OBJS = $(LIBMAN_OBJS) \
+ $(LIBMDOC_OBJS) \
+ $(LIBROFF_OBJS) \
+ chars.o \
+ mandoc.o \
+ msec.o \
+ read.o
+LIBMANDOC_LNS = $(LIBMAN_LNS) \
+ $(LIBMDOC_LNS) \
+ $(LIBROFF_LNS) \
+ chars.ln \
+ mandoc.ln \
+ msec.ln \
+ read.ln
+
+COMPAT_OBJS = compat_fgetln.o \
+ compat_getsubopt.o \
+ compat_strlcat.o \
+ compat_strlcpy.o
+COMPAT_LNS = compat_fgetln.ln \
+ compat_getsubopt.ln \
+ compat_strlcat.ln \
+ compat_strlcpy.ln
+
+arch.o arch.ln: arch.in
+att.o att.ln: att.in
+chars.o chars.ln: chars.in
+lib.o lib.ln: lib.in
+msec.o msec.ln: msec.in
+roff.o roff.ln: predefs.in
+st.o st.ln: st.in
+vol.o vol.ln: vol.in
+
+$(LIBMAN_OBJS) $(LIBMAN_LNS): libman.h
+$(LIBMDOC_OBJS) $(LIBMDOC_LNS): libmdoc.h
+$(LIBROFF_OBJS) $(LIBROFF_LNS): libroff.h
+$(LIBMANDOC_OBJS) $(LIBMANDOC_LNS): mandoc.h mdoc.h man.h libmandoc.h config.h
+
+$(COMPAT_OBJS) $(COMPAT_LNS): config.h
+
+MANDOC_HTML_OBJS = eqn_html.o \
+ html.o \
+ man_html.o \
+ mdoc_html.o \
+ tbl_html.o
+MANDOC_HTML_LNS = eqn_html.ln \
+ html.ln \
+ man_html.ln \
+ mdoc_html.ln \
+ tbl_html.ln
+
+MANDOC_MAN_OBJS = mdoc_man.o
+MANDOC_MAN_LNS = mdoc_man.ln
+
+MANDOC_TERM_OBJS = eqn_term.o \
+ man_term.o \
+ mdoc_term.o \
+ term.o \
+ term_ascii.o \
+ term_ps.o \
+ tbl_term.o
+MANDOC_TERM_LNS = eqn_term.ln \
+ man_term.ln \
+ mdoc_term.ln \
+ term.ln \
+ term_ascii.ln \
+ term_ps.ln \
+ tbl_term.ln
+
+MANDOC_OBJS = $(MANDOC_HTML_OBJS) \
+ $(MANDOC_MAN_OBJS) \
+ $(MANDOC_TERM_OBJS) \
+ main.o \
+ out.o \
+ tree.o
+MANDOC_LNS = $(MANDOC_HTML_LNS) \
+ $(MANDOC_MAN_LNS) \
+ $(MANDOC_TERM_LNS) \
+ main.ln \
+ out.ln \
+ tree.ln
+
+$(MANDOC_HTML_OBJS) $(MANDOC_HTML_LNS): html.h
+$(MANDOC_TERM_OBJS) $(MANDOC_TERM_LNS): term.h
+$(MANDOC_OBJS) $(MANDOC_LNS): main.h mandoc.h mdoc.h man.h config.h out.h
+
+MANDOCDB_OBJS = mandocdb.o manpath.o
+MANDOCDB_LNS = mandocdb.ln manpath.ln
+
+$(MANDOCDB_OBJS) $(MANDOCDB_LNS): mandocdb.h mandoc.h mdoc.h man.h config.h manpath.h
+
+PRECONV_OBJS = preconv.o
+PRECONV_LNS = preconv.ln
+
+$(PRECONV_OBJS) $(PRECONV_LNS): config.h
+
+APROPOS_OBJS = apropos.o apropos_db.o manpath.o
+APROPOS_LNS = apropos.ln apropos_db.ln manpath.ln
+
+$(APROPOS_OBJS) $(APROPOS_LNS): config.h mandoc.h apropos_db.h manpath.h mandocdb.h
+
+CGI_OBJS = $(MANDOC_HTML_OBJS) \
+ $(MANDOC_MAN_OBJS) \
+ $(MANDOC_TERM_OBJS) \
+ cgi.o \
+ apropos_db.o \
+ manpath.o \
+ out.o \
+ tree.o
+
+CGI_LNS = $(MANDOC_HTML_LNS) \
+ $(MANDOC_MAN_LNS) \
+ $(MANDOC_TERM_LNS) \
+ cgi.ln \
+ apropos_db.ln \
+ manpath.ln \
+ out.ln \
+ tree.ln
+
+$(CGI_OBJS) $(CGI_LNS): main.h mdoc.h man.h out.h config.h mandoc.h apropos_db.h manpath.h mandocdb.h
+
+CATMAN_OBJS = catman.o manpath.o
+CATMAN_LNS = catman.ln manpath.ln
+
+$(CATMAN_OBJS) $(CATMAN_LNS): config.h mandoc.h manpath.h mandocdb.h
+
+DEMANDOC_OBJS = demandoc.o
+DEMANDOC_LNS = demandoc.ln
+
+$(DEMANDOC_OBJS) $(DEMANDOC_LNS): config.h
+
+INDEX_MANS = apropos.1.html \
+ apropos.1.xhtml \
+ apropos.1.ps \
+ apropos.1.pdf \
+ apropos.1.txt \
+ catman.8.html \
+ catman.8.xhtml \
+ catman.8.ps \
+ catman.8.pdf \
+ catman.8.txt \
+ demandoc.1.html \
+ demandoc.1.xhtml \
+ demandoc.1.ps \
+ demandoc.1.pdf \
+ demandoc.1.txt \
+ mandoc.1.html \
+ mandoc.1.xhtml \
+ mandoc.1.ps \
+ mandoc.1.pdf \
+ mandoc.1.txt \
+ whatis.1.html \
+ whatis.1.xhtml \
+ whatis.1.ps \
+ whatis.1.pdf \
+ whatis.1.txt \
+ mandoc.3.html \
+ mandoc.3.xhtml \
+ mandoc.3.ps \
+ mandoc.3.pdf \
+ mandoc.3.txt \
+ eqn.7.html \
+ eqn.7.xhtml \
+ eqn.7.ps \
+ eqn.7.pdf \
+ eqn.7.txt \
+ man.7.html \
+ man.7.xhtml \
+ man.7.ps \
+ man.7.pdf \
+ man.7.txt \
+ man.cgi.7.html \
+ man.cgi.7.xhtml \
+ man.cgi.7.ps \
+ man.cgi.7.pdf \
+ man.cgi.7.txt \
+ mandoc_char.7.html \
+ mandoc_char.7.xhtml \
+ mandoc_char.7.ps \
+ mandoc_char.7.pdf \
+ mandoc_char.7.txt \
+ mdoc.7.html \
+ mdoc.7.xhtml \
+ mdoc.7.ps \
+ mdoc.7.pdf \
+ mdoc.7.txt \
+ preconv.1.html \
+ preconv.1.xhtml \
+ preconv.1.ps \
+ preconv.1.pdf \
+ preconv.1.txt \
+ roff.7.html \
+ roff.7.xhtml \
+ roff.7.ps \
+ roff.7.pdf \
+ roff.7.txt \
+ tbl.7.html \
+ tbl.7.xhtml \
+ tbl.7.ps \
+ tbl.7.pdf \
+ tbl.7.txt \
+ mandocdb.8.html \
+ mandocdb.8.xhtml \
+ mandocdb.8.ps \
+ mandocdb.8.pdf \
+ mandocdb.8.txt
+
+$(INDEX_MANS): mandoc
+
+INDEX_OBJS = $(INDEX_MANS) \
+ man.h.html \
+ mandoc.h.html \
+ mdoc.h.html \
+ mdocml.tar.gz \
+ mdocml.md5
+
+www: index.html
+
+lint: llib-lmandoc.ln llib-lpreconv.ln llib-ldemandoc.ln $(DBLN)
+
+clean:
+ rm -f libmandoc.a $(LIBMANDOC_OBJS)
+ rm -f llib-llibmandoc.ln $(LIBMANDOC_LNS)
+ rm -f mandocdb $(MANDOCDB_OBJS)
+ rm -f llib-lmandocdb.ln $(MANDOCDB_LNS)
+ rm -f preconv $(PRECONV_OBJS)
+ rm -f llib-lpreconv.ln $(PRECONV_LNS)
+ rm -f apropos whatis $(APROPOS_OBJS)
+ rm -f llib-lapropos.ln $(APROPOS_LNS)
+ rm -f man.cgi $(CGI_OBJS)
+ rm -f llib-lman.cgi.ln $(CGI_LNS)
+ rm -f catman $(CATMAN_OBJS)
+ rm -f llib-lcatman.ln $(CATMAN_LNS)
+ rm -f demandoc $(DEMANDOC_OBJS)
+ rm -f llib-ldemandoc.ln $(DEMANDOC_LNS)
+ rm -f mandoc $(MANDOC_OBJS)
+ rm -f llib-lmandoc.ln $(MANDOC_LNS)
+ rm -f config.h config.log $(COMPAT_OBJS) $(COMPAT_LNS)
+ rm -f mdocml.tar.gz mdocml-win32.zip mdocml-win64.zip mdocml-macosx.zip
+ rm -f index.html $(INDEX_OBJS)
+ rm -rf test-fgetln.dSYM
+ rm -rf test-strlcpy.dSYM
+ rm -rf test-strlcat.dSYM
+ rm -rf test-strptime.dSYM
+ rm -rf test-mmap.dSYM
+ rm -rf test-getsubopt.dSYM
+ rm -rf apropos.dSYM
+ rm -rf catman.dSYM
+ rm -rf mandocdb.dSYM
+ rm -rf whatis.dSYM
+
+install: all
+ mkdir -p $(DESTDIR)$(BINDIR)
+ mkdir -p $(DESTDIR)$(EXAMPLEDIR)
+ mkdir -p $(DESTDIR)$(LIBDIR)
+ mkdir -p $(DESTDIR)$(INCLUDEDIR)
+ mkdir -p $(DESTDIR)$(MANDIR)/man1
+ mkdir -p $(DESTDIR)$(MANDIR)/man3
+ mkdir -p $(DESTDIR)$(MANDIR)/man7
+ $(INSTALL_PROGRAM) mandoc preconv demandoc $(DESTDIR)$(BINDIR)
+ $(INSTALL_LIB) libmandoc.a $(DESTDIR)$(LIBDIR)
+ $(INSTALL_LIB) man.h mdoc.h mandoc.h $(DESTDIR)$(INCLUDEDIR)
+ $(INSTALL_MAN) mandoc.1 preconv.1 demandoc.1 $(DESTDIR)$(MANDIR)/man1
+ $(INSTALL_MAN) mandoc.3 $(DESTDIR)$(MANDIR)/man3
+ $(INSTALL_MAN) man.7 mdoc.7 roff.7 eqn.7 tbl.7 mandoc_char.7 $(DESTDIR)$(MANDIR)/man7
+ $(INSTALL_DATA) example.style.css $(DESTDIR)$(EXAMPLEDIR)
+
+installcgi: all
+ mkdir -p $(DESTDIR)$(CGIBINDIR)
+ mkdir -p $(DESTDIR)$(HTDOCDIR)
+ $(INSTALL_PROGRAM) man.cgi $(DESTDIR)$(CGIBINDIR)
+ $(INSTALL_DATA) example.style.css $(DESTDIR)$(HTDOCDIR)/man.css
+ $(INSTALL_DATA) man-cgi.css $(DESTDIR)$(HTDOCDIR)
+
+installwww: www
+ mkdir -p $(PREFIX)/snapshots
+ mkdir -p $(PREFIX)/binaries
+ $(INSTALL_DATA) index.html external.png index.css $(PREFIX)
+ $(INSTALL_DATA) $(INDEX_MANS) style.css $(PREFIX)
+ $(INSTALL_DATA) mandoc.h.html man.h.html mdoc.h.html $(PREFIX)
+ $(INSTALL_DATA) mdocml.tar.gz $(PREFIX)/snapshots
+ $(INSTALL_DATA) mdocml.md5 $(PREFIX)/snapshots
+ $(INSTALL_DATA) mdocml.tar.gz $(PREFIX)/snapshots/mdocml-$(VERSION).tar.gz
+ $(INSTALL_DATA) mdocml.md5 $(PREFIX)/snapshots/mdocml-$(VERSION).md5
+
+libmandoc.a: $(COMPAT_OBJS) $(LIBMANDOC_OBJS)
+ $(AR) rs $@ $(COMPAT_OBJS) $(LIBMANDOC_OBJS)
+
+llib-llibmandoc.ln: $(COMPAT_LNS) $(LIBMANDOC_LNS)
+ $(LINT) $(LINTFLAGS) -Clibmandoc $(COMPAT_LNS) $(LIBMANDOC_LNS)
+
+mandoc: $(MANDOC_OBJS) libmandoc.a
+ $(CC) $(LDFLAGS) -o $@ $(MANDOC_OBJS) libmandoc.a
+
+llib-lmandoc.ln: $(MANDOC_LNS) llib-llibmandoc.ln
+ $(LINT) $(LINTFLAGS) -Cmandoc $(MANDOC_LNS) llib-llibmandoc.ln
+
+mandocdb: $(MANDOCDB_OBJS) libmandoc.a
+ $(CC) $(LDFLAGS) -o $@ $(MANDOCDB_OBJS) libmandoc.a $(DBLIB)
+
+llib-lmandocdb.ln: $(MANDOCDB_LNS) llib-llibmandoc.ln
+ $(LINT) $(LINTFLAGS) -Cmandocdb $(MANDOCDB_LNS) llib-llibmandoc.ln
+
+preconv: $(PRECONV_OBJS)
+ $(CC) $(LDFLAGS) -o $@ $(PRECONV_OBJS)
+
+llib-lpreconv.ln: $(PRECONV_LNS) llib-llibmandoc.ln
+ $(LINT) $(LINTFLAGS) -Cpreconv $(PRECONV_LNS) llib-llibmandoc.ln
+
+whatis: apropos
+ cp -f apropos whatis
+
+apropos: $(APROPOS_OBJS) libmandoc.a
+ $(CC) $(LDFLAGS) -o $@ $(APROPOS_OBJS) libmandoc.a $(DBLIB)
+
+llib-lapropos.ln: $(APROPOS_LNS) llib-llibmandoc.ln
+ $(LINT) $(LINTFLAGS) -Capropos $(APROPOS_LNS) llib-llibmandoc.ln
+
+catman: $(CATMAN_OBJS) libmandoc.a
+ $(CC) $(LDFLAGS) -o $@ $(CATMAN_OBJS) libmandoc.a $(DBLIB)
+
+llib-lcatman.ln: $(CATMAN_LNS) llib-llibmandoc.ln
+ $(LINT) $(LINTFLAGS) -Ccatman $(CATMAN_LNS) llib-llibmandoc.ln
+
+man.cgi: $(CGI_OBJS) libmandoc.a
+ $(CC) $(LDFLAGS) $(STATIC) -o $@ $(CGI_OBJS) libmandoc.a $(DBLIB)
+
+llib-lman.cgi.ln: $(CGI_LNS) llib-llibmandoc.ln
+ $(LINT) $(LINTFLAGS) -Cman.cgi $(CGI_LNS) llib-llibmandoc.ln
+
+demandoc: $(DEMANDOC_OBJS) libmandoc.a
+ $(CC) $(LDFLAGS) -o $@ $(DEMANDOC_OBJS) libmandoc.a
+
+llib-ldemandoc.ln: $(DEMANDOC_LNS) llib-llibmandoc.ln
+ $(LINT) $(LINTFLAGS) -Cdemandoc $(DEMANDOC_LNS) llib-llibmandoc.ln
+
+mdocml.md5: mdocml.tar.gz
+ md5 mdocml.tar.gz >$@
+
+mdocml.tar.gz: $(SRCS)
+ mkdir -p .dist/mdocml-$(VERSION)/
+ $(INSTALL_SOURCE) $(SRCS) .dist/mdocml-$(VERSION)
+ ( cd .dist/ && tar zcf ../$@ ./ )
+ rm -rf .dist/
+
+mdocml-win32.zip: $(SRCS)
+ mkdir -p .win32/mdocml-$(VERSION)/
+ $(INSTALL_SOURCE) $(SRCS) .win32
+ cp .win32/Makefile .win32/Makefile.old
+ egrep -v -e DUSE_WCHAR -e ^DBBIN .win32/Makefile.old >.win32/Makefile
+ ( cd .win32; \
+ CC=i686-w64-mingw32-gcc AR=i686-w64-mingw32-ar CFLAGS='-DOSNAME=\"Windows\"' make; \
+ make install PREFIX=mdocml-$(VERSION) ; \
+ zip -r ../$@ mdocml-$(VERSION) )
+ rm -rf .win32
+
+mdocml-win64.zip: $(SRCS)
+ mkdir -p .win64/mdocml-$(VERSION)/
+ $(INSTALL_SOURCE) $(SRCS) .win64
+ cp .win64/Makefile .win64/Makefile.old
+ egrep -v -e DUSE_WCHAR -e ^DBBIN .win64/Makefile.old >.win64/Makefile
+ ( cd .win64; \
+ CC=x86_64-w64-mingw32-gcc AR=x86_64-w64-mingw32-ar CFLAGS='-DOSNAME=\"Windows\"' make; \
+ make install PREFIX=mdocml-$(VERSION) ; \
+ zip -r ../$@ mdocml-$(VERSION) )
+ rm -rf .win64
+
+mdocml-macosx.zip: $(SRCS)
+ mkdir -p .macosx/mdocml-$(VERSION)/
+ $(INSTALL_SOURCE) $(SRCS) .macosx
+ ( cd .macosx; \
+ CFLAGS="-arch i386 -arch x86_64 -arch ppc" LDFLAGS="-arch i386 -arch x86_64 -arch ppc" make; \
+ make install PREFIX=mdocml-$(VERSION) ; \
+ zip -r ../$@ mdocml-$(VERSION) )
+ rm -rf .macosx
+
+index.html: $(INDEX_OBJS)
+
+config.h: config.h.pre config.h.post
+ rm -f config.log
+ ( cat config.h.pre; \
+ echo; \
+ if $(CC) $(CFLAGS) -Werror -o test-fgetln test-fgetln.c >> config.log 2>&1; then \
+ echo '#define HAVE_FGETLN'; \
+ rm test-fgetln; \
+ fi; \
+ if $(CC) $(CFLAGS) -Werror -o test-strptime test-strptime.c >> config.log 2>&1; then \
+ echo '#define HAVE_STRPTIME'; \
+ rm test-strptime; \
+ fi; \
+ if $(CC) $(CFLAGS) -Werror -o test-getsubopt test-getsubopt.c >> config.log 2>&1; then \
+ echo '#define HAVE_GETSUBOPT'; \
+ rm test-getsubopt; \
+ fi; \
+ if $(CC) $(CFLAGS) -Werror -o test-strlcat test-strlcat.c >> config.log 2>&1; then \
+ echo '#define HAVE_STRLCAT'; \
+ rm test-strlcat; \
+ fi; \
+ if $(CC) $(CFLAGS) -Werror -o test-mmap test-mmap.c >> config.log 2>&1; then \
+ echo '#define HAVE_MMAP'; \
+ rm test-mmap; \
+ fi; \
+ if $(CC) $(CFLAGS) -Werror -o test-strlcpy test-strlcpy.c >> config.log 2>&1; then \
+ echo '#define HAVE_STRLCPY'; \
+ rm test-strlcpy; \
+ fi; \
+ echo; \
+ cat config.h.post \
+ ) > $@
+
+.h.h.html:
+ highlight -I $< >$@
+
+.1.1.txt .3.3.txt .7.7.txt .8.8.txt:
+ ./mandoc -Tascii -Wall,stop $< | col -b >$@
+
+.1.1.html .3.3.html .7.7.html .8.8.html:
+ ./mandoc -Thtml -Wall,stop -Ostyle=style.css,man=%N.%S.html,includes=%I.html $< >$@
+
+.1.1.ps .3.3.ps .7.7.ps .8.8.ps:
+ ./mandoc -Tps -Wall,stop $< >$@
+
+.1.1.xhtml .3.3.xhtml .7.7.xhtml .8.8.xhtml:
+ ./mandoc -Txhtml -Wall,stop -Ostyle=style.css,man=%N.%S.xhtml,includes=%I.html $< >$@
+
+.1.1.pdf .3.3.pdf .7.7.pdf .8.8.pdf:
+ ./mandoc -Tpdf -Wall,stop $< >$@
+
+.sgml.html:
+ validate --warn $<
+ sed -e "s!@VERSION@!$(VERSION)!" -e "s!@VDATE@!$(VDATE)!" $< >$@
diff --git a/TODO b/TODO
new file mode 100644
index 000000000000..a8701361290c
--- /dev/null
+++ b/TODO
@@ -0,0 +1,372 @@
+************************************************************************
+* Official mandoc TODO.
+* $Id: TODO,v 1.129 2012/03/04 23:53:37 schwarze Exp $
+************************************************************************
+
+************************************************************************
+* parser bugs
+************************************************************************
+
+- ".\}" on its own line gets translated to bare ".\&"
+ which forces pset() into man(7)
+ and then triggers an unknown macro error
+ reported by naddy@ Sun, 3 Jul 2011 21:52:24 +0200
+
+************************************************************************
+* formatter bugs
+************************************************************************
+
+- tbl(7): Horizontal and vertical lines are formatted badly:
+ With the box option, there is too much white space at the end of cells.
+ Horizontal lines from "=" lines are a bit too long.
+ yuri dot pankov at gmail dot com Thu, 14 Apr 2011 05:45:26 +0400
+
+************************************************************************
+* missing features
+************************************************************************
+
+--- missing roff features ----------------------------------------------
+
+- The pod2man preamble wants \h'...' with quoted numerical arguments,
+ see for example AUTHORS in MooseX::Getopt.3p, p5-MooseX-Getopt.
+ reported by Andreas Voegele <mail at andreasvoegele dot com>
+ Tue, 22 Nov 2011 15:34:47 +0100 on ports@
+
+- .if n \{
+ .br\}
+ should cause an extra space to be raised.
+
+- .ad (adjust margins)
+ .ad l -- adjust left margin only (flush left)
+ .ad r -- adjust right margin only (flush right)
+ .ad c -- center text on line
+ .ad b -- adjust both margins (alias: .ad n)
+ .na -- temporarily disable adjustment without changing the mode
+ .ad -- re-enable adjustment without changing the mode
+ Adjustment mode is ignored while in no-fill mode (.nf).
+
+- .it (line traps) occur in mysql(1), yasm_arch(7)
+ generated by DocBook XSL Stylesheets v1.71.1 <http://docbook.sf.net/>
+ reported by brad@ Sat, 15 Jan 2011 15:48:18 -0500
+
+- .ns (no-space mode) occurs in xine-config(1)
+ reported by brad@ Sat, 15 Jan 2011 15:45:23 -0500
+
+- xloadimage(1) wants .ti (temporary indent), rep by naddy@
+
+- .ta (tab settings) occurs in ircbug(1) and probably gnats(1)
+ reported by brad@ Sat, 15 Jan 2011 15:50:51 -0500
+
+- \c (interrupted text) occurs in chat(8)
+
+- using undefined strings or macros defines them to be empty
+ wl@ Mon, 14 Nov 2011 14:37:01 +0000
+
+--- missing mdoc features ----------------------------------------------
+
+- fix bad block nesting involving multiple identical explicit blocks
+ see the OpenBSD mdoc_macro.c 1.47 commit message
+
+- .Bl -column .Xo support is missing
+ ultimate goal:
+ restore .Xr and .Dv to
+ lib/libc/compat-43/sigvec.3
+ lib/libc/gen/signal.3
+ lib/libc/sys/sigaction.2
+
+- edge case: decide how to deal with blk_full bad nesting, e.g.
+ .Sh .Nm .Bk .Nm .Ek .Sh found by jmc@ in ssh-keygen(1)
+ from jmc@ Wed, 14 Jul 2010 18:10:32 +0100
+
+- \\ is now implemented correctly
+ * when defining strings and macros using .ds and .de
+ * when parsing roff(7) and man(7) macro arguments
+ It does not yet work in mdoc(7) macro arguments
+ because libmdoc does not yet use mandoc_getarg().
+ Also check what happens in plain text, it must be identical to \e.
+
+- .Bd -filled should not be the same as .Bd -ragged, but align both
+ the left and right margin. In groff, it is implemented in terms
+ of .ad b, which we don't have either. Found in cksum(1).
+
+- implement blank `Bl -column', such as
+ .Bl -column
+ .It foo Ta bar
+ .El
+
+- explicitly disallow nested `Bl -column', which would clobber internal
+ flags defined for struct mdoc_macro
+
+- In .Bl -column .It, the end of the line probably has to be regarded
+ as an implicit .Ta, if there could be one, see the following mildly
+ ugly code from login.conf(5):
+ .Bl -column minpasswordlen program xetcxmotd
+ .It path Ta path Ta value of Dv _PATH_DEFPATH
+ .br
+ Default search path.
+ reported by Michal Mazurek <akfaew at jasminek dot net>
+ via jmc@ Thu, 7 Apr 2011 16:00:53 +0059
+
+- inside `.Bl -column' phrases, punctuation is handled like normal
+ text, e.g. `.Bl -column .It Fl x . Ta ...' should give "-x -."
+
+- inside `.Bl -column' phrases, TERMP_IGNDELIM handling by `Pf'
+ is not safe, e.g. `.Bl -column .It Pf a b .' gives "ab."
+ but should give "ab ."
+
+- set a meaningful default if no `Bl' list type is assigned
+
+- have a blank `It' head for `Bl -tag' not puke
+
+- prohibit `Nm' from having non-text HEAD children
+ (e.g., NetBSD mDNSShared/dns-sd.1)
+ (mdoc_html.c and mdoc_term.c `Nm' handlers can be slightly simplified)
+
+- When there is free text in the SYNOPSIS and that free text contains
+ the .Nm macro, groff somehow understands to treat the .Nm as an in-line
+ macro, while mandoc treats it as a block macro and breaks the line.
+ No idea how the logic for distinguishing in-line and block instances
+ should be, needs investigation.
+ uqs@ Thu, 2 Jun 2011 11:03:51 +0200
+ uqs@ Thu, 2 Jun 2011 11:33:35 +0200
+
+--- missing man features -----------------------------------------------
+
+- groff an-ext.tmac macros (.UR, .UE) occur in xine(5)
+ reported by brad@ Sat, 15 Jan 2011 15:45:23 -0500
+
+- -T[x]html doesn't stipulate non-collapsing spaces in literal mode
+
+--- missing tbl features -----------------------------------------------
+
+- implement basic non-parametric .de to support e.g. sox(1)
+ reported by naddy@ Sat, 16 Oct 2010 23:51:57 +0200
+ *** sox(1) still doesn't work, tbl(1) errors need investigation
+
+- allow standalone `.' to be interpreted as an end-of-layout
+ delimiter instead of being thrown away as a no-op roff line
+ reported by Yuri Pankov, Wed 18 May 2011 11:34:59 CEST
+
+--- missing misc features ----------------------------------------------
+
+- clean up escape sequence handling, creating three classes:
+ (1) fully implemented, or parsed and ignored without loss of content
+ (2) unimplemented, potentially causing loss of content
+ or serious mangling of formatting (e.g. \n) -> ERROR
+ see textproc/mgdiff(1) for nice examples
+ (3) undefined, just output the character -> perhaps WARNING
+
+- The \t escape sequence is the same as a literal tab, see for example
+ the ASCII table in hexdump(1) where
+ .Bl -column \&000_nu \&001_so \&002_st \&003_et \&004_eo
+ .It \&000\ nul\t001\ soh\t002\ stx\t003\ etx\t004\ eot\t005\ enq
+ produces
+ 000 nul 001 soh 002 stx 003 etx 004 eot 005 enq
+ and the example in oldrdist(1)
+
+- look at pages generated from reStructeredText, e.g. devel/mercurial hg(1)
+ These are a weird mixture of man(7) and custom autogenerated low-level
+ roff stuff. Figure out to what extent we can cope.
+ For details, see http://docutils.sourceforge.net/rst.html
+ noted by stsp@ Sat, 24 Apr 2010 09:17:55 +0200
+ reminded by nicm@ Mon, 3 May 2010 09:52:41 +0100
+
+- check compatibility with Plan9:
+ http://swtch.com/usr/local/plan9/tmac/tmac.an
+ http://swtch.com/plan9port/man/man7/man.html
+ "Anthony J. Bentley" <anthonyjbentley@gmail.com> 28 Dec 2010 21:58:40 -0700
+
+************************************************************************
+* formatting issues: ugly output
+************************************************************************
+
+- a column list with blank `Ta' cells triggers a spurrious
+ start-with-whitespace printing of a newline
+
+- double quotes inside double quotes are escaped by doubling them
+ implement this in mdoc(7), too
+ so far, we only have it in roff(7) and man(7)
+ reminded by millert@ Thu, 09 Dec 2010 17:29:52 -0500
+
+- perl(1) SYNOPSIS looks bad; reported by deraadt@
+ 1) man(7) seems to need SYNOPSIS .Nm blocks, too
+
+- In .Bl -column,
+ .It Em Authentication<tab>Key Length
+ ought to render "Key Length" with emphasis, too,
+ see OpenBSD iked.conf(5).
+ reported again Nicolas Joly via wiz@ Wed, 12 Oct 2011 00:20:00 +0200
+
+- empty phrases in .Bl column produce too few blanks
+ try e.g. .Bl -column It Ta Ta
+ reported by millert Fri, 02 Apr 2010 16:13:46 -0400
+
+- .%T can have trailing punctuation. Currently, it puts the trailing
+ punctuation into a trailing MDOC_TEXT element inside its own scope.
+ That element should rather be outside its scope, such that the
+ punctuation does not get underlines. This is not trivial to
+ implement because .%T then needs some features of in_line_eoln() -
+ slurp all arguments into one single text element - and one feature
+ of in_line() - put trailing punctuation out of scope.
+ Found in mount_nfs(8) and exports(5), search for "Appendix".
+
+- in enclosures, mandoc sometimes fancies a bogus end of sentence
+ reminded by jmc@ Thu, 23 Sep 2010 18:13:39 +0059
+
+************************************************************************
+* formatting issues: gratuitous differences
+************************************************************************
+
+- .Rv (and probably .Ex) print different text if an `Nm' has been named
+ or not (run a manual without `Nm blah' to see this). I'm not sure
+ that this exists in the wild, but it's still an error.
+
+- In .Bl -bullet, the groff bullet is "+\b+\bo\bo", the mandoc bullet
+ is just "o\bo".
+ see for example OpenBSD ksh(1)
+
+- The characters "|" and "\*(Ba" should never be bold,
+ not even in the middle of a word, e.g. ".Cm b\*(Bac" in
+ "mknod [-m mode] name b|c major minor"
+ in OpenBSD ksh(1)
+
+- A bogus .Pp between two .It must not produce a double blank line,
+ see between -R and -r in OpenBSD rm(1), before "update" in mount(8),
+ or in DIAGNOSTICS in init(8), or before "is always true" in ksh(1).
+ The same happens with .Pp just before .El, see bgpd.conf(5).
+ Also have `It' complain if `Pp' is invoked at certain times (not
+ -compact?).
+
+- .Pp between two .It in .Bl -column should produce one,
+ not two blank lines, see e.g. login.conf(5).
+ reported by jmc@ Sun, 17 Apr 2011 14:04:58 +0059
+ reported again by sthen@ Wed, 18 Jan 2012 02:09:39 +0000 (UTC)
+
+- If the *first* line after .It is .Pp, break the line right after
+ the tag, do not pad with space characters before breaking.
+ See the description of the a, c, and i commands in sed(1).
+
+- If the first line after .It is .D1, do not assert a blank line
+ in between, see for example tmux(1).
+ reported by nicm@ 13 Jan 2011 00:18:57 +0000
+
+- .Nx 1.0a
+ should be "NetBSD 1.0A", not "NetBSD 1.0a",
+ see OpenBSD ccdconfig(8).
+
+- In .Bl -tag, if a tag exceeds the right margin and must be continued
+ on the next line, it must be indented by -width, not width+1;
+ see "rule block|pass" in OpenBSD ifconfig(8).
+
+- When the -width string contains macros, the macros must be rendered
+ before measuring the width, for example
+ .Bl -tag -width ".Dv message"
+ in magic(5), located in src/usr.bin/file, is the same
+ as -width 7n, not -width 11n.
+ The same applies to .Bl -column column widths;
+ reported again by Nicolas Joly Thu, 1 Mar 2012 13:41:26 +0100 via wiz@ 5 Mar
+
+- The \& zero-width character counts as output.
+ That is, when it is alone on a line between two .Pp,
+ we want three blank lines, not two as in mandoc.
+
+- When .Fn arguments exceed one output line, all but the first
+ should be indented, see e.g. rpc(3);
+ reported by jmc@ on discuss@ Fri, 29 Oct 2010 13:48:33 +0100
+ reported again by Nicolas Joly via wiz@ Sun, 18 Sep 2011 18:24:40 +0200
+ Also, we don't want to break the line within the argument of:
+ .Fa "chtype tl"
+
+- .Ns should work when called at the end of an input line, see
+ the following code in vi(1):
+ .It Xo
+ .Op Ar line
+ .Cm a Ns Op Cm ppend Ns
+ .Op Cm !\&
+ .Xc
+ The input text is appended after the specified line.
+
+- Header lines of excessive length:
+ Port OpenBSD man_term.c rev. 1.25 to mdoc_term.c
+ and document it in mdoc(7) and man(7) COMPATIBILITY
+ found while talking to Chris Bennett
+
+- In man(7), the sequence
+ .HP
+ one line of regular text
+ .SH
+ should not produce two blank lines before the .SH,
+ see for example named-checkconf(8).
+
+- In man(7), the sequence
+ .SH HEADER
+ <blank line>
+ .PP
+ regular text
+ should not produce any blank lines between the header and the text,
+ see for example rsync(1).
+ Reported by naddy@ Mon, 28 Mar 2011 20:45:42 +0200
+
+- In man(7), the sequence
+ regular text
+ .IP
+ .IP "tag"
+ indented text
+ should produce one, not four blank lines between the regular text
+ and the tag, see for example rsync(1).
+ Likewise,
+ regular text
+ .IP
+ indented text
+ should produce one, not two blank lines in between, and
+ regular text
+ .IP
+ .RS
+ .IP tag
+ indented text
+ should produce one, not three blank lines.
+ Reported by naddy@ Mon, 28 Mar 2011 20:45:42 +0200
+
+- trailing whitespace must be ignored even when followed by a font escape,
+ see for example
+ makes
+ \fBdig \fR
+ operate in batch mode
+ in dig(1).
+
+************************************************************************
+* error reporting issues
+************************************************************************
+
+- .TP directly followed by .RS gives an assertion.
+
+************************************************************************
+* performance issues
+************************************************************************
+
+Several areas can be cleaned up to make mandoc even faster. These are
+
+- improve hashing mechanism for macros (quite important: performance)
+
+- improve hashing mechanism for characters (not as important)
+
+- the PDF file is HUGE: this can be reduced by using relative offsets
+
+- instead of re-initialising the roff predefined-strings set before each
+ parse, create a read-only version the first time and copy it
+
+************************************************************************
+* structural issues
+************************************************************************
+
+- We use the input line number at several places to distinguish
+ same-line from different-line input. That plainly doesn't work
+ with user-defined macros, leading to random breakage.
+
+- Find better ways to prevent endless loops
+ in roff(7) macro and string expansion.
+
+- Finish cleanup of date handling.
+ Decide which formats should be recognized where.
+ Update both mdoc(7) and man(7) documentation.
+ Triggered by Tim van der Molen Tue, 22 Feb 2011 20:30:45 +0100
diff --git a/apropos.1 b/apropos.1
new file mode 100644
index 000000000000..7dea132a461e
--- /dev/null
+++ b/apropos.1
@@ -0,0 +1,328 @@
+.\" $Id: apropos.1,v 1.17 2012/03/24 01:46:25 kristaps Exp $
+.\"
+.\" Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
+.\"
+.\" Permission to use, copy, modify, and distribute this software for any
+.\" purpose with or without fee is hereby granted, provided that the above
+.\" copyright notice and this permission notice appear in all copies.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+.\"
+.Dd $Mdocdate: March 24 2012 $
+.Dt APROPOS 1
+.Os
+.Sh NAME
+.Nm apropos
+.Nd search manual page databases
+.Sh SYNOPSIS
+.Nm
+.Op Fl C Ar file
+.Op Fl M Ar manpath
+.Op Fl m Ar manpath
+.Op Fl S Ar arch
+.Op Fl s Ar section
+.Ar expression ...
+.Sh DESCRIPTION
+The
+.Nm
+utility queries manual page databases generated by
+.Xr mandocdb 8 ,
+evaluating on
+.Ar expression
+for each file in each database.
+.Pp
+By default,
+.Nm
+searches for
+.Xr mandocdb 8
+databases in the default paths stipulated by
+.Xr man 1 ,
+parses terms as case-sensitive regular expressions
+over manual names and descriptions.
+Multiple terms imply pairwise
+.Fl o .
+If standard output is a TTY, a result may be selected from a list and
+its manual displayed with the pager.
+.Pp
+Its arguments are as follows:
+.Bl -tag -width Ds
+.It Fl C Ar file
+Specify an alternative configuration
+.Ar file
+in
+.Xr man.conf 5
+format.
+.It Fl M Ar manpath
+Use the colon-separated path instead of the default list of paths
+searched for
+.Xr mandocdb 8
+databases.
+Invalid paths, or paths without manual databases, are ignored.
+.It Fl m Ar manpath
+Prepend the colon-separated paths to the list of paths searched
+for
+.Xr mandocdb 8
+databases.
+Invalid paths, or paths without manual databases, are ignored.
+.It Fl S Ar arch
+Search only for a particular architecture.
+.It Fl s Ar cat
+Search only for a manual section.
+See
+.Xr man 1
+for a listing of manual sections.
+.El
+.Pp
+An
+.Ar expression
+consists of search terms joined by logical operators
+.Fl a
+.Pq and
+and
+.Fl o
+.Pq or .
+The
+.Fl a
+operator has precedence over
+.Fl o
+and both are evaluated left-to-right.
+.Bl -tag -width Ds
+.It \&( Ar expr No \&)
+True if the subexpression
+.Ar expr
+is true.
+.It Ar expr1 Fl a Ar expr2
+True if both
+.Ar expr1
+and
+.Ar expr2
+are true (logical
+.Qq and ) .
+.It Ar expr1 Oo Fl o Oc Ar expr2
+True if
+.Ar expr1
+and/or
+.Ar expr2
+evaluate to true (logical
+.Qq or ) .
+.It Ar term
+True if
+.Ar term
+is satisfied.
+This has syntax
+.Li [key[,key]*(=~)]?val ,
+where operand
+.Cm key
+is an
+.Xr mdoc 7
+macro to query and
+.Cm val
+is its value.
+See
+.Sx Macro Keys
+for a list of available keys.
+Operator
+.Li \&=
+evaluates a substring, while
+.Li \&~
+evaluates a regular expression.
+.It Fl i Ar term
+If
+.Ar term
+is a regular expression, it
+is evaluated case-insensitively.
+Has no effect on substring terms.
+.El
+.Pp
+Results are sorted by manual title, with output formatted as
+.Pp
+.D1 title(sec) \- description
+.Pp
+Where
+.Qq title
+is the manual's title (note multiple manual names may exist for one
+title),
+.Qq sec
+is the manual section, and
+.Qq description
+is the manual's short description.
+If an architecture is specified for the manual, it is displayed as
+.Pp
+.D1 title(cat/arch) \- description
+.Pp
+If on a TTY, results are prefixed with a numeric identifier.
+.Pp
+.D1 [index] title(cat) \- description
+.Pp
+One may choose a manual be entering the index at the prompt.
+Valid choices are displayed using
+.Ev MANPAGER ,
+or failing that ,
+.Ev PAGER
+or just
+.Xr more 1 .
+Source pages are formatted with
+.Xr mandoc 1 ;
+preformatted pages with
+.Xr cat 1 .
+.Ss Macro Keys
+Queries evaluate over a subset of
+.Xr mdoc 7
+macros indexed by
+.Xr mandocdb 8 .
+In addition to the macro keys listed below, the special key
+.Cm any
+may be used to match any available macro key.
+.Pp
+Names and description:
+.Bl -column "xLix" description -offset indent -compact
+.It Li \&Nm Ta manual name
+.It Li \&Nd Ta one-line manual description
+.El
+.Pp
+Sections and cross references:
+.Bl -column "xLix" description -offset indent -compact
+.It Li \&Sh Ta section header (excluding standard sections)
+.It Li \&Ss Ta subsection header
+.It Li \&Xr Ta cross reference to another manual page
+.It Li \&Rs Ta bibliographic reference
+.El
+.Pp
+Semantic markup for command line utilities:
+.Bl -column "xLix" description -offset indent -compact
+.It Li \&Fl Ta command line options (flags)
+.It Li \&Cm Ta command modifier
+.It Li \&Ar Ta command argument
+.It Li \&Ic Ta internal or interactive command
+.It Li \&Ev Ta environmental variable
+.It Li \&Pa Ta file system path
+.El
+.Pp
+Semantic markup for function libraries:
+.Bl -column "xLix" description -offset indent -compact
+.It Li \&Lb Ta function library name
+.It Li \&In Ta include file
+.It Li \&Ft Ta function return type
+.It Li \&Fn Ta function name
+.It Li \&Fa Ta function argument type and name
+.It Li \&Vt Ta variable type
+.It Li \&Va Ta variable name
+.It Li \&Dv Ta defined variable or preprocessor constant
+.It Li \&Er Ta error constant
+.It Li \&Ev Ta environmental variable
+.El
+.Pp
+Various semantic markup:
+.Bl -column "xLix" description -offset indent -compact
+.It Li \&An Ta author name
+.It Li \&Lk Ta hyperlink
+.It Li \&Mt Ta Do mailto Dc hyperlink
+.It Li \&Cd Ta kernel configuration declaration
+.It Li \&Ms Ta mathematical symbol
+.It Li \&Tn Ta tradename
+.El
+.Pp
+Physical markup:
+.Bl -column "xLix" description -offset indent -compact
+.It Li \&Em Ta italic font or underline
+.It Li \&Sy Ta boldface font
+.It Li \&Li Ta typewriter font
+.El
+.Pp
+Text production:
+.Bl -column "xLix" description -offset indent -compact
+.It Li \&St Ta reference to a standards document
+.It Li \&At Ta At No version reference
+.It Li \&Bx Ta Bx No version reference
+.It Li \&Bsx Ta Bsx No version reference
+.It Li \&Nx Ta Nx No version reference
+.It Li \&Fx Ta Fx No version reference
+.It Li \&Ox Ta Ox No version reference
+.It Li \&Dx Ta Dx No version reference
+.El
+.Sh ENVIRONMENT
+.Bl -tag -width Ds
+.It Ev MANPAGER
+Default pager for manuals.
+If this is unset, falls back to
+.Ev Pager .
+.It Ev PAGER
+The second choice for a manual pager.
+If this is unset, use
+.Xr more 1 .
+.It Ev MANPATH
+Colon-separated paths modifying the default list of paths searched for
+manual databases.
+Invalid paths, or paths without manual databases, are ignored.
+Overridden by
+.Fl M .
+If
+.Ev MANPATH
+begins with a
+.Sq \&: ,
+it is appended to the default list;
+else if it ends with
+.Sq \&: ,
+it is prepended to the default list; else if it contains
+.Sq \&:: ,
+the default list is inserted between the colons.
+If none of these conditions are met, it overrides the default list.
+.El
+.Sh FILES
+.Bl -tag -width "/etc/man.conf" -compact
+.It Pa whatis.db
+name of the
+.Xr mandocdb 8
+keyword database
+.It Pa whatis.index
+name of the
+.Xr mandocdb 8
+filename database
+.It Pa /etc/man.conf
+default
+.Xr man 1
+configuration file
+.El
+.Sh EXIT STATUS
+.Ex -std
+.Sh EXAMPLES
+Search for
+.Qq mdoc
+as a substring and regular expression
+within each manual name and description:
+.Pp
+.Dl $ apropos mdoc
+.Dl $ apropos ~^mdoc$
+.Pp
+Include matches for
+.Qq roff
+and
+.Qq man
+for the regular expression case:
+.Pp
+.Dl $ apropos ~^mdoc$ roff man
+.Dl $ apropos ~^mdoc$ \-o roff \-o man
+.Pp
+Search for
+.Qq optind
+and
+.Qq optarg
+as variable names in the library category:
+.Pp
+.Dl $ apropos \-s 3 Va~^optind \-a Va~^optarg$
+.Sh SEE ALSO
+.Xr more 1
+.Xr re_format 7 ,
+.Xr mandocdb 8
+.Sh AUTHORS
+The
+.Nm
+utility was written by
+.An Kristaps Dzonsons ,
+.Mt kristaps@bsd.lv .
diff --git a/apropos.c b/apropos.c
new file mode 100644
index 000000000000..9c3cae96afa0
--- /dev/null
+++ b/apropos.c
@@ -0,0 +1,239 @@
+/* $Id: apropos.c,v 1.30 2012/03/24 02:18:51 kristaps Exp $ */
+/*
+ * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
+ * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#include <sys/param.h>
+
+#include <assert.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "apropos_db.h"
+#include "mandoc.h"
+#include "manpath.h"
+
+#define SINGLETON(_res, _sz) \
+ ((_sz) && (_res)[0].matched && \
+ (1 == (_sz) || 0 == (_res)[1].matched))
+#define EMPTYSET(_res, _sz) \
+ ((0 == (_sz)) || 0 == (_res)[0].matched)
+
+static int cmp(const void *, const void *);
+static void list(struct res *, size_t, void *);
+static void usage(void);
+
+static char *progname;
+
+int
+main(int argc, char *argv[])
+{
+ int ch, rc, whatis, usecat;
+ struct res *res;
+ struct manpaths paths;
+ const char *prog;
+ pid_t pid;
+ char path[PATH_MAX];
+ int fds[2];
+ size_t terms, ressz, sz;
+ struct opts opts;
+ struct expr *e;
+ char *defpaths, *auxpaths, *conf_file, *cp;
+ extern int optind;
+ extern char *optarg;
+
+ progname = strrchr(argv[0], '/');
+ if (progname == NULL)
+ progname = argv[0];
+ else
+ ++progname;
+
+ whatis = 0 == strncmp(progname, "whatis", 6);
+
+ memset(&paths, 0, sizeof(struct manpaths));
+ memset(&opts, 0, sizeof(struct opts));
+
+ usecat = 0;
+ ressz = 0;
+ res = NULL;
+ auxpaths = defpaths = NULL;
+ conf_file = NULL;
+ e = NULL;
+ path[0] = '\0';
+
+ while (-1 != (ch = getopt(argc, argv, "C:M:m:S:s:")))
+ switch (ch) {
+ case ('C'):
+ conf_file = optarg;
+ break;
+ case ('M'):
+ defpaths = optarg;
+ break;
+ case ('m'):
+ auxpaths = optarg;
+ break;
+ case ('S'):
+ opts.arch = optarg;
+ break;
+ case ('s'):
+ opts.cat = optarg;
+ break;
+ default:
+ usage();
+ return(EXIT_FAILURE);
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if (0 == argc)
+ return(EXIT_SUCCESS);
+
+ rc = 0;
+
+ manpath_parse(&paths, conf_file, defpaths, auxpaths);
+
+ e = whatis ? termcomp(argc, argv, &terms) :
+ exprcomp(argc, argv, &terms);
+
+ if (NULL == e) {
+ fprintf(stderr, "%s: Bad expression\n", progname);
+ goto out;
+ }
+
+ rc = apropos_search
+ (paths.sz, paths.paths, &opts,
+ e, terms, NULL, &ressz, &res, list);
+
+ terms = 1;
+
+ if (0 == rc) {
+ fprintf(stderr, "%s: Bad database\n", progname);
+ goto out;
+ } else if ( ! isatty(STDOUT_FILENO) || EMPTYSET(res, ressz))
+ goto out;
+
+ if ( ! SINGLETON(res, ressz)) {
+ printf("Which manpage would you like [1]? ");
+ fflush(stdout);
+ if (NULL != (cp = fgetln(stdin, &sz)) &&
+ sz > 1 && '\n' == cp[--sz]) {
+ if ((ch = atoi(cp)) <= 0)
+ goto out;
+ terms = (size_t)ch;
+ }
+ }
+
+ if (--terms < ressz && res[terms].matched) {
+ chdir(paths.paths[res[terms].volume]);
+ strlcpy(path, res[terms].file, PATH_MAX);
+ usecat = RESTYPE_CAT == res[terms].type;
+ }
+out:
+ manpath_free(&paths);
+ resfree(res, ressz);
+ exprfree(e);
+
+ if ('\0' == path[0])
+ return(rc ? EXIT_SUCCESS : EXIT_FAILURE);
+
+ if (-1 == pipe(fds)) {
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+
+ if (-1 == (pid = fork())) {
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ } else if (pid > 0) {
+ dup2(fds[0], STDIN_FILENO);
+ close(fds[1]);
+ prog = NULL != getenv("MANPAGER") ?
+ getenv("MANPAGER") :
+ (NULL != getenv("PAGER") ?
+ getenv("PAGER") : "more");
+ execlp(prog, prog, (char *)NULL);
+ perror(prog);
+ return(EXIT_FAILURE);
+ }
+
+ dup2(fds[1], STDOUT_FILENO);
+ close(fds[0]);
+ prog = usecat ? "cat" : "mandoc";
+ execlp(prog, prog, path, (char *)NULL);
+ perror(prog);
+ return(EXIT_FAILURE);
+}
+
+/* ARGSUSED */
+static void
+list(struct res *res, size_t sz, void *arg)
+{
+ size_t i;
+
+ qsort(res, sz, sizeof(struct res), cmp);
+
+ if (EMPTYSET(res, sz) || SINGLETON(res, sz))
+ return;
+
+ if ( ! isatty(STDOUT_FILENO))
+ for (i = 0; i < sz && res[i].matched; i++)
+ printf("%s(%s%s%s) - %.70s\n",
+ res[i].title, res[i].cat,
+ *res[i].arch ? "/" : "",
+ *res[i].arch ? res[i].arch : "",
+ res[i].desc);
+ else
+ for (i = 0; i < sz && res[i].matched; i++)
+ printf("[%zu] %s(%s%s%s) - %.70s\n", i + 1,
+ res[i].title, res[i].cat,
+ *res[i].arch ? "/" : "",
+ *res[i].arch ? res[i].arch : "",
+ res[i].desc);
+}
+
+static int
+cmp(const void *p1, const void *p2)
+{
+ const struct res *r1 = p1;
+ const struct res *r2 = p2;
+
+ if (0 == r1->matched)
+ return(1);
+ else if (0 == r2->matched)
+ return(1);
+
+ return(strcasecmp(r1->title, r2->title));
+}
+
+static void
+usage(void)
+{
+
+ fprintf(stderr, "usage: %s "
+ "[-C file] "
+ "[-M manpath] "
+ "[-m manpath] "
+ "[-S arch] "
+ "[-s section] "
+ "expression ...\n",
+ progname);
+}
diff --git a/apropos_db.c b/apropos_db.c
new file mode 100644
index 000000000000..8aea771da791
--- /dev/null
+++ b/apropos_db.c
@@ -0,0 +1,876 @@
+/* $Id: apropos_db.c,v 1.31 2012/03/24 01:46:25 kristaps Exp $ */
+/*
+ * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
+ * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <assert.h>
+#include <fcntl.h>
+#include <regex.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#if defined(__linux__)
+# include <endian.h>
+# include <db_185.h>
+#elif defined(__APPLE__)
+# include <libkern/OSByteOrder.h>
+# include <db.h>
+#else
+# include <db.h>
+#endif
+
+#include "mandocdb.h"
+#include "apropos_db.h"
+#include "mandoc.h"
+
+#define RESFREE(_x) \
+ do { \
+ free((_x)->file); \
+ free((_x)->cat); \
+ free((_x)->title); \
+ free((_x)->arch); \
+ free((_x)->desc); \
+ free((_x)->matches); \
+ } while (/*CONSTCOND*/0)
+
+struct expr {
+ int regex; /* is regex? */
+ int index; /* index in match array */
+ uint64_t mask; /* type-mask */
+ int and; /* is rhs of logical AND? */
+ char *v; /* search value */
+ regex_t re; /* compiled re, if regex */
+ struct expr *next; /* next in sequence */
+ struct expr *subexpr;
+};
+
+struct type {
+ uint64_t mask;
+ const char *name;
+};
+
+struct rectree {
+ struct res *node; /* record array for dir tree */
+ int len; /* length of record array */
+};
+
+static const struct type types[] = {
+ { TYPE_An, "An" },
+ { TYPE_Ar, "Ar" },
+ { TYPE_At, "At" },
+ { TYPE_Bsx, "Bsx" },
+ { TYPE_Bx, "Bx" },
+ { TYPE_Cd, "Cd" },
+ { TYPE_Cm, "Cm" },
+ { TYPE_Dv, "Dv" },
+ { TYPE_Dx, "Dx" },
+ { TYPE_Em, "Em" },
+ { TYPE_Er, "Er" },
+ { TYPE_Ev, "Ev" },
+ { TYPE_Fa, "Fa" },
+ { TYPE_Fl, "Fl" },
+ { TYPE_Fn, "Fn" },
+ { TYPE_Fn, "Fo" },
+ { TYPE_Ft, "Ft" },
+ { TYPE_Fx, "Fx" },
+ { TYPE_Ic, "Ic" },
+ { TYPE_In, "In" },
+ { TYPE_Lb, "Lb" },
+ { TYPE_Li, "Li" },
+ { TYPE_Lk, "Lk" },
+ { TYPE_Ms, "Ms" },
+ { TYPE_Mt, "Mt" },
+ { TYPE_Nd, "Nd" },
+ { TYPE_Nm, "Nm" },
+ { TYPE_Nx, "Nx" },
+ { TYPE_Ox, "Ox" },
+ { TYPE_Pa, "Pa" },
+ { TYPE_Rs, "Rs" },
+ { TYPE_Sh, "Sh" },
+ { TYPE_Ss, "Ss" },
+ { TYPE_St, "St" },
+ { TYPE_Sy, "Sy" },
+ { TYPE_Tn, "Tn" },
+ { TYPE_Va, "Va" },
+ { TYPE_Va, "Vt" },
+ { TYPE_Xr, "Xr" },
+ { UINT64_MAX, "any" },
+ { 0, NULL }
+};
+
+static DB *btree_open(void);
+static int btree_read(const DBT *, const DBT *,
+ const struct mchars *,
+ uint64_t *, recno_t *, char **);
+static int expreval(const struct expr *, int *);
+static void exprexec(const struct expr *,
+ const char *, uint64_t, struct res *);
+static int exprmark(const struct expr *,
+ const char *, uint64_t, int *);
+static struct expr *exprexpr(int, char *[], int *, int *, size_t *);
+static struct expr *exprterm(char *, int);
+static DB *index_open(void);
+static int index_read(const DBT *, const DBT *, int,
+ const struct mchars *, struct res *);
+static void norm_string(const char *,
+ const struct mchars *, char **);
+static size_t norm_utf8(unsigned int, char[7]);
+static int single_search(struct rectree *, const struct opts *,
+ const struct expr *, size_t terms,
+ struct mchars *, int);
+
+/*
+ * Open the keyword mandoc-db database.
+ */
+static DB *
+btree_open(void)
+{
+ BTREEINFO info;
+ DB *db;
+
+ memset(&info, 0, sizeof(BTREEINFO));
+ info.lorder = 4321;
+ info.flags = R_DUP;
+
+ db = dbopen(MANDOC_DB, O_RDONLY, 0, DB_BTREE, &info);
+ if (NULL != db)
+ return(db);
+
+ return(NULL);
+}
+
+/*
+ * Read a keyword from the database and normalise it.
+ * Return 0 if the database is insane, else 1.
+ */
+static int
+btree_read(const DBT *k, const DBT *v, const struct mchars *mc,
+ uint64_t *mask, recno_t *rec, char **buf)
+{
+ uint64_t vbuf[2];
+
+ /* Are our sizes sane? */
+ if (k->size < 2 || sizeof(vbuf) != v->size)
+ return(0);
+
+ /* Is our string nil-terminated? */
+ if ('\0' != ((const char *)k->data)[(int)k->size - 1])
+ return(0);
+
+ norm_string((const char *)k->data, mc, buf);
+ memcpy(vbuf, v->data, v->size);
+ *mask = betoh64(vbuf[0]);
+ *rec = betoh64(vbuf[1]);
+ return(1);
+}
+
+/*
+ * Take a Unicode codepoint and produce its UTF-8 encoding.
+ * This isn't the best way to do this, but it works.
+ * The magic numbers are from the UTF-8 packaging.
+ * They're not as scary as they seem: read the UTF-8 spec for details.
+ */
+static size_t
+norm_utf8(unsigned int cp, char out[7])
+{
+ int rc;
+
+ rc = 0;
+
+ if (cp <= 0x0000007F) {
+ rc = 1;
+ out[0] = (char)cp;
+ } else if (cp <= 0x000007FF) {
+ rc = 2;
+ out[0] = (cp >> 6 & 31) | 192;
+ out[1] = (cp & 63) | 128;
+ } else if (cp <= 0x0000FFFF) {
+ rc = 3;
+ out[0] = (cp >> 12 & 15) | 224;
+ out[1] = (cp >> 6 & 63) | 128;
+ out[2] = (cp & 63) | 128;
+ } else if (cp <= 0x001FFFFF) {
+ rc = 4;
+ out[0] = (cp >> 18 & 7) | 240;
+ out[1] = (cp >> 12 & 63) | 128;
+ out[2] = (cp >> 6 & 63) | 128;
+ out[3] = (cp & 63) | 128;
+ } else if (cp <= 0x03FFFFFF) {
+ rc = 5;
+ out[0] = (cp >> 24 & 3) | 248;
+ out[1] = (cp >> 18 & 63) | 128;
+ out[2] = (cp >> 12 & 63) | 128;
+ out[3] = (cp >> 6 & 63) | 128;
+ out[4] = (cp & 63) | 128;
+ } else if (cp <= 0x7FFFFFFF) {
+ rc = 6;
+ out[0] = (cp >> 30 & 1) | 252;
+ out[1] = (cp >> 24 & 63) | 128;
+ out[2] = (cp >> 18 & 63) | 128;
+ out[3] = (cp >> 12 & 63) | 128;
+ out[4] = (cp >> 6 & 63) | 128;
+ out[5] = (cp & 63) | 128;
+ } else
+ return(0);
+
+ out[rc] = '\0';
+ return((size_t)rc);
+}
+
+/*
+ * Normalise strings from the index and database.
+ * These strings are escaped as defined by mandoc_char(7) along with
+ * other goop in mandoc.h (e.g., soft hyphens).
+ * This function normalises these into a nice UTF-8 string.
+ * Returns 0 if the database is fucked.
+ */
+static void
+norm_string(const char *val, const struct mchars *mc, char **buf)
+{
+ size_t sz, bsz;
+ char utfbuf[7];
+ const char *seq, *cpp;
+ int len, u, pos;
+ enum mandoc_esc esc;
+ static const char res[] = { '\\', '\t',
+ ASCII_NBRSP, ASCII_HYPH, '\0' };
+
+ /* Pre-allocate by the length of the input */
+
+ bsz = strlen(val) + 1;
+ *buf = mandoc_realloc(*buf, bsz);
+ pos = 0;
+
+ while ('\0' != *val) {
+ /*
+ * Halt on the first escape sequence.
+ * This also halts on the end of string, in which case
+ * we just copy, fallthrough, and exit the loop.
+ */
+ if ((sz = strcspn(val, res)) > 0) {
+ memcpy(&(*buf)[pos], val, sz);
+ pos += (int)sz;
+ val += (int)sz;
+ }
+
+ if (ASCII_HYPH == *val) {
+ (*buf)[pos++] = '-';
+ val++;
+ continue;
+ } else if ('\t' == *val || ASCII_NBRSP == *val) {
+ (*buf)[pos++] = ' ';
+ val++;
+ continue;
+ } else if ('\\' != *val)
+ break;
+
+ /* Read past the slash. */
+
+ val++;
+ u = 0;
+
+ /*
+ * Parse the escape sequence and see if it's a
+ * predefined character or special character.
+ */
+
+ esc = mandoc_escape(&val, &seq, &len);
+ if (ESCAPE_ERROR == esc)
+ break;
+
+ /*
+ * XXX - this just does UTF-8, but we need to know
+ * beforehand whether we should do text substitution.
+ */
+
+ switch (esc) {
+ case (ESCAPE_SPECIAL):
+ if (0 != (u = mchars_spec2cp(mc, seq, len)))
+ break;
+ /* FALLTHROUGH */
+ default:
+ continue;
+ }
+
+ /*
+ * If we have a Unicode codepoint, try to convert that
+ * to a UTF-8 byte string.
+ */
+
+ cpp = utfbuf;
+ if (0 == (sz = norm_utf8(u, utfbuf)))
+ continue;
+
+ /* Copy the rendered glyph into the stream. */
+
+ sz = strlen(cpp);
+ bsz += sz;
+
+ *buf = mandoc_realloc(*buf, bsz);
+
+ memcpy(&(*buf)[pos], cpp, sz);
+ pos += (int)sz;
+ }
+
+ (*buf)[pos] = '\0';
+}
+
+/*
+ * Open the filename-index mandoc-db database.
+ * Returns NULL if opening failed.
+ */
+static DB *
+index_open(void)
+{
+ DB *db;
+
+ db = dbopen(MANDOC_IDX, O_RDONLY, 0, DB_RECNO, NULL);
+ if (NULL != db)
+ return(db);
+
+ return(NULL);
+}
+
+/*
+ * Safely unpack from an index file record into the structure.
+ * Returns 1 if an entry was unpacked, 0 if the database is insane.
+ */
+static int
+index_read(const DBT *key, const DBT *val, int index,
+ const struct mchars *mc, struct res *rec)
+{
+ size_t left;
+ char *np, *cp;
+ char type;
+
+#define INDEX_BREAD(_dst) \
+ do { \
+ if (NULL == (np = memchr(cp, '\0', left))) \
+ return(0); \
+ norm_string(cp, mc, &(_dst)); \
+ left -= (np - cp) + 1; \
+ cp = np + 1; \
+ } while (/* CONSTCOND */ 0)
+
+ if (0 == (left = val->size))
+ return(0);
+
+ cp = val->data;
+ assert(sizeof(recno_t) == key->size);
+ memcpy(&rec->rec, key->data, key->size);
+ rec->volume = index;
+
+ if ('d' == (type = *cp++))
+ rec->type = RESTYPE_MDOC;
+ else if ('a' == type)
+ rec->type = RESTYPE_MAN;
+ else if ('c' == type)
+ rec->type = RESTYPE_CAT;
+ else
+ return(0);
+
+ left--;
+ INDEX_BREAD(rec->file);
+ INDEX_BREAD(rec->cat);
+ INDEX_BREAD(rec->title);
+ INDEX_BREAD(rec->arch);
+ INDEX_BREAD(rec->desc);
+ return(1);
+}
+
+/*
+ * Search mandocdb databases in paths for expression "expr".
+ * Filter out by "opts".
+ * Call "res" with the results, which may be zero.
+ * Return 0 if there was a database error, else return 1.
+ */
+int
+apropos_search(int pathsz, char **paths, const struct opts *opts,
+ const struct expr *expr, size_t terms, void *arg,
+ size_t *sz, struct res **resp,
+ void (*res)(struct res *, size_t, void *))
+{
+ struct rectree tree;
+ struct mchars *mc;
+ int i, rc;
+
+ memset(&tree, 0, sizeof(struct rectree));
+
+ rc = 0;
+ mc = mchars_alloc();
+ *sz = 0;
+ *resp = NULL;
+
+ /*
+ * Main loop. Change into the directory containing manpage
+ * databases. Run our expession over each database in the set.
+ */
+
+ for (i = 0; i < pathsz; i++) {
+ if (chdir(paths[i]))
+ continue;
+ if (single_search(&tree, opts, expr, terms, mc, i))
+ continue;
+
+ resfree(tree.node, tree.len);
+ mchars_free(mc);
+ return(0);
+ }
+
+ (*res)(tree.node, tree.len, arg);
+ *sz = tree.len;
+ *resp = tree.node;
+ mchars_free(mc);
+ return(1);
+}
+
+static int
+single_search(struct rectree *tree, const struct opts *opts,
+ const struct expr *expr, size_t terms,
+ struct mchars *mc, int vol)
+{
+ int root, leaf, ch;
+ DBT key, val;
+ DB *btree, *idx;
+ char *buf;
+ struct res *rs;
+ struct res r;
+ uint64_t mask;
+ recno_t rec;
+
+ root = -1;
+ leaf = -1;
+ btree = NULL;
+ idx = NULL;
+ buf = NULL;
+ rs = tree->node;
+
+ memset(&r, 0, sizeof(struct res));
+
+ if (NULL == (btree = btree_open()))
+ return(1);
+
+ if (NULL == (idx = index_open())) {
+ (*btree->close)(btree);
+ return(1);
+ }
+
+ while (0 == (ch = (*btree->seq)(btree, &key, &val, R_NEXT))) {
+ if ( ! btree_read(&key, &val, mc, &mask, &rec, &buf))
+ break;
+
+ /*
+ * See if this keyword record matches any of the
+ * expressions we have stored.
+ */
+ if ( ! exprmark(expr, buf, mask, NULL))
+ continue;
+
+ /*
+ * O(log n) scan for prior records. Since a record
+ * number is unbounded, this has decent performance over
+ * a complex hash function.
+ */
+
+ for (leaf = root; leaf >= 0; )
+ if (rec > rs[leaf].rec &&
+ rs[leaf].rhs >= 0)
+ leaf = rs[leaf].rhs;
+ else if (rec < rs[leaf].rec &&
+ rs[leaf].lhs >= 0)
+ leaf = rs[leaf].lhs;
+ else
+ break;
+
+ /*
+ * If we find a record, see if it has already evaluated
+ * to true. If it has, great, just keep going. If not,
+ * try to evaluate it now and continue anyway.
+ */
+
+ if (leaf >= 0 && rs[leaf].rec == rec) {
+ if (0 == rs[leaf].matched)
+ exprexec(expr, buf, mask, &rs[leaf]);
+ continue;
+ }
+
+ /*
+ * We have a new file to examine.
+ * Extract the manpage's metadata from the index
+ * database, then begin partial evaluation.
+ */
+
+ key.data = &rec;
+ key.size = sizeof(recno_t);
+
+ if (0 != (*idx->get)(idx, &key, &val, 0))
+ break;
+
+ r.lhs = r.rhs = -1;
+ if ( ! index_read(&key, &val, vol, mc, &r))
+ break;
+
+ /* XXX: this should be elsewhere, I guess? */
+
+ if (opts->cat && strcasecmp(opts->cat, r.cat))
+ continue;
+
+ if (opts->arch && *r.arch)
+ if (strcasecmp(opts->arch, r.arch))
+ continue;
+
+ tree->node = rs = mandoc_realloc
+ (rs, (tree->len + 1) * sizeof(struct res));
+
+ memcpy(&rs[tree->len], &r, sizeof(struct res));
+ memset(&r, 0, sizeof(struct res));
+ rs[tree->len].matches =
+ mandoc_calloc(terms, sizeof(int));
+
+ exprexec(expr, buf, mask, &rs[tree->len]);
+
+ /* Append to our tree. */
+
+ if (leaf >= 0) {
+ if (rec > rs[leaf].rec)
+ rs[leaf].rhs = tree->len;
+ else
+ rs[leaf].lhs = tree->len;
+ } else
+ root = tree->len;
+
+ tree->len++;
+ }
+
+ (*btree->close)(btree);
+ (*idx->close)(idx);
+
+ free(buf);
+ RESFREE(&r);
+ return(1 == ch);
+}
+
+void
+resfree(struct res *rec, size_t sz)
+{
+ size_t i;
+
+ for (i = 0; i < sz; i++)
+ RESFREE(&rec[i]);
+ free(rec);
+}
+
+/*
+ * Compile a list of straight-up terms.
+ * The arguments are re-written into ~[[:<:]]term[[:>:]], or "term"
+ * surrounded by word boundaries, then pumped through exprterm().
+ * Terms are case-insensitive.
+ * This emulates whatis(1) behaviour.
+ */
+struct expr *
+termcomp(int argc, char *argv[], size_t *tt)
+{
+ char *buf;
+ int pos;
+ struct expr *e, *next;
+ size_t sz;
+
+ buf = NULL;
+ e = NULL;
+ *tt = 0;
+
+ for (pos = argc - 1; pos >= 0; pos--) {
+ sz = strlen(argv[pos]) + 18;
+ buf = mandoc_realloc(buf, sz);
+ strlcpy(buf, "Nm~[[:<:]]", sz);
+ strlcat(buf, argv[pos], sz);
+ strlcat(buf, "[[:>:]]", sz);
+ if (NULL == (next = exprterm(buf, 0))) {
+ free(buf);
+ exprfree(e);
+ return(NULL);
+ }
+ next->next = e;
+ e = next;
+ (*tt)++;
+ }
+
+ free(buf);
+ return(e);
+}
+
+/*
+ * Compile a sequence of logical expressions.
+ * See apropos.1 for a grammar of this sequence.
+ */
+struct expr *
+exprcomp(int argc, char *argv[], size_t *tt)
+{
+ int pos, lvl;
+ struct expr *e;
+
+ pos = lvl = 0;
+ *tt = 0;
+
+ e = exprexpr(argc, argv, &pos, &lvl, tt);
+
+ if (0 == lvl && pos >= argc)
+ return(e);
+
+ exprfree(e);
+ return(NULL);
+}
+
+/*
+ * Compile an array of tokens into an expression.
+ * An informal expression grammar is defined in apropos(1).
+ * Return NULL if we fail doing so. All memory will be cleaned up.
+ * Return the root of the expression sequence if alright.
+ */
+static struct expr *
+exprexpr(int argc, char *argv[], int *pos, int *lvl, size_t *tt)
+{
+ struct expr *e, *first, *next;
+ int log;
+
+ first = next = NULL;
+
+ for ( ; *pos < argc; (*pos)++) {
+ e = next;
+
+ /*
+ * Close out a subexpression.
+ */
+
+ if (NULL != e && 0 == strcmp(")", argv[*pos])) {
+ if (--(*lvl) < 0)
+ goto err;
+ break;
+ }
+
+ /*
+ * Small note: if we're just starting, don't let "-a"
+ * and "-o" be considered logical operators: they're
+ * just tokens unless pairwise joining, in which case we
+ * record their existence (or assume "OR").
+ */
+ log = 0;
+
+ if (NULL != e && 0 == strcmp("-a", argv[*pos]))
+ log = 1;
+ else if (NULL != e && 0 == strcmp("-o", argv[*pos]))
+ log = 2;
+
+ if (log > 0 && ++(*pos) >= argc)
+ goto err;
+
+ /*
+ * Now we parse the term part. This can begin with
+ * "-i", in which case the expression is case
+ * insensitive.
+ */
+
+ if (0 == strcmp("(", argv[*pos])) {
+ ++(*pos);
+ ++(*lvl);
+ next = mandoc_calloc(1, sizeof(struct expr));
+ next->subexpr = exprexpr(argc, argv, pos, lvl, tt);
+ if (NULL == next->subexpr) {
+ free(next);
+ next = NULL;
+ }
+ } else if (0 == strcmp("-i", argv[*pos])) {
+ if (++(*pos) >= argc)
+ goto err;
+ next = exprterm(argv[*pos], 0);
+ } else
+ next = exprterm(argv[*pos], 1);
+
+ if (NULL == next)
+ goto err;
+
+ next->and = log == 1;
+ next->index = (int)(*tt)++;
+
+ /* Append to our chain of expressions. */
+
+ if (NULL == first) {
+ assert(NULL == e);
+ first = next;
+ } else {
+ assert(NULL != e);
+ e->next = next;
+ }
+ }
+
+ return(first);
+err:
+ exprfree(first);
+ return(NULL);
+}
+
+/*
+ * Parse a terminal expression with the grammar as defined in
+ * apropos(1).
+ * Return NULL if we fail the parse.
+ */
+static struct expr *
+exprterm(char *buf, int cs)
+{
+ struct expr e;
+ struct expr *p;
+ char *key;
+ int i;
+
+ memset(&e, 0, sizeof(struct expr));
+
+ /* Choose regex or substring match. */
+
+ if (NULL == (e.v = strpbrk(buf, "=~"))) {
+ e.regex = 0;
+ e.v = buf;
+ } else {
+ e.regex = '~' == *e.v;
+ *e.v++ = '\0';
+ }
+
+ /* Determine the record types to search for. */
+
+ e.mask = 0;
+ if (buf < e.v) {
+ while (NULL != (key = strsep(&buf, ","))) {
+ i = 0;
+ while (types[i].mask &&
+ strcmp(types[i].name, key))
+ i++;
+ e.mask |= types[i].mask;
+ }
+ }
+ if (0 == e.mask)
+ e.mask = TYPE_Nm | TYPE_Nd;
+
+ if (e.regex) {
+ i = REG_EXTENDED | REG_NOSUB | (cs ? 0 : REG_ICASE);
+ if (regcomp(&e.re, e.v, i))
+ return(NULL);
+ }
+
+ e.v = mandoc_strdup(e.v);
+
+ p = mandoc_calloc(1, sizeof(struct expr));
+ memcpy(p, &e, sizeof(struct expr));
+ return(p);
+}
+
+void
+exprfree(struct expr *p)
+{
+ struct expr *pp;
+
+ while (NULL != p) {
+ if (p->subexpr)
+ exprfree(p->subexpr);
+ if (p->regex)
+ regfree(&p->re);
+ free(p->v);
+ pp = p->next;
+ free(p);
+ p = pp;
+ }
+}
+
+static int
+exprmark(const struct expr *p, const char *cp,
+ uint64_t mask, int *ms)
+{
+
+ for ( ; p; p = p->next) {
+ if (p->subexpr) {
+ if (exprmark(p->subexpr, cp, mask, ms))
+ return(1);
+ continue;
+ } else if ( ! (mask & p->mask))
+ continue;
+
+ if (p->regex) {
+ if (regexec(&p->re, cp, 0, NULL, 0))
+ continue;
+ } else if (NULL == strcasestr(cp, p->v))
+ continue;
+
+ if (NULL == ms)
+ return(1);
+ else
+ ms[p->index] = 1;
+ }
+
+ return(0);
+}
+
+static int
+expreval(const struct expr *p, int *ms)
+{
+ int match;
+
+ /*
+ * AND has precedence over OR. Analysis is left-right, though
+ * it doesn't matter because there are no side-effects.
+ * Thus, step through pairwise ANDs and accumulate their Boolean
+ * evaluation. If we encounter a single true AND collection or
+ * standalone term, the whole expression is true (by definition
+ * of OR).
+ */
+
+ for (match = 0; p && ! match; p = p->next) {
+ /* Evaluate a subexpression, if applicable. */
+ if (p->subexpr && ! ms[p->index])
+ ms[p->index] = expreval(p->subexpr, ms);
+
+ match = ms[p->index];
+ for ( ; p->next && p->next->and; p = p->next) {
+ /* Evaluate a subexpression, if applicable. */
+ if (p->next->subexpr && ! ms[p->next->index])
+ ms[p->next->index] =
+ expreval(p->next->subexpr, ms);
+ match = match && ms[p->next->index];
+ }
+ }
+
+ return(match);
+}
+
+/*
+ * First, update the array of terms for which this expression evaluates
+ * to true.
+ * Second, logically evaluate all terms over the updated array of truth
+ * values.
+ * If this evaluates to true, mark the expression as satisfied.
+ */
+static void
+exprexec(const struct expr *e, const char *cp,
+ uint64_t mask, struct res *r)
+{
+
+ assert(0 == r->matched);
+ exprmark(e, cp, mask, r->matches);
+ r->matched = expreval(e, r->matches);
+}
diff --git a/apropos_db.h b/apropos_db.h
new file mode 100644
index 000000000000..72d4c204a391
--- /dev/null
+++ b/apropos_db.h
@@ -0,0 +1,73 @@
+/* $Id: apropos_db.h,v 1.13 2012/03/24 01:46:25 kristaps Exp $ */
+/*
+ * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifndef APROPOS_H
+#define APROPOS_H
+
+enum restype {
+ RESTYPE_MAN, /* man(7) file */
+ RESTYPE_MDOC, /* mdoc(7) file */
+ RESTYPE_CAT /* pre-formatted file */
+};
+
+struct res {
+ enum restype type; /* input file type */
+ char *file; /* file in file-system */
+ char *cat; /* category (3p, 3, etc.) */
+ char *title; /* title (FOO, etc.) */
+ char *arch; /* arch (or empty string) */
+ char *desc; /* description (from Nd) */
+ unsigned int rec; /* record in index */
+ /*
+ * The index volume. This indexes into the array of directories
+ * searched for manual page databases.
+ */
+ unsigned int volume;
+ /*
+ * The following fields are used internally.
+ *
+ * Maintain a binary tree for checking the uniqueness of `rec'
+ * when adding elements to the results array.
+ * Since the results array is dynamic, use offset in the array
+ * instead of a pointer to the structure.
+ */
+ int lhs;
+ int rhs;
+ int matched; /* expression is true */
+ int *matches; /* partial truth evaluations */
+};
+
+struct opts {
+ const char *arch; /* restrict to architecture */
+ const char *cat; /* restrict to manual section */
+};
+
+__BEGIN_DECLS
+
+struct expr;
+
+int apropos_search(int, char **, const struct opts *,
+ const struct expr *, size_t,
+ void *, size_t *, struct res **,
+ void (*)(struct res *, size_t, void *));
+struct expr *exprcomp(int, char *[], size_t *);
+void exprfree(struct expr *);
+void resfree(struct res *, size_t);
+struct expr *termcomp(int, char *[], size_t *);
+
+__END_DECLS
+
+#endif /*!APROPOS_H*/
diff --git a/arch.c b/arch.c
new file mode 100644
index 000000000000..e764bfe9931f
--- /dev/null
+++ b/arch.c
@@ -0,0 +1,39 @@
+/* $Id: arch.c,v 1.9 2011/03/22 14:33:05 kristaps Exp $ */
+/*
+ * Copyright (c) 2009 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include "mdoc.h"
+#include "mandoc.h"
+#include "libmdoc.h"
+
+#define LINE(x, y) \
+ if (0 == strcmp(p, x)) return(y);
+
+const char *
+mdoc_a2arch(const char *p)
+{
+
+#include "arch.in"
+
+ return(NULL);
+}
diff --git a/arch.in b/arch.in
new file mode 100644
index 000000000000..5113446e468a
--- /dev/null
+++ b/arch.in
@@ -0,0 +1,111 @@
+/* $Id: arch.in,v 1.12 2012/01/28 14:02:17 joerg Exp $ */
+/*
+ * Copyright (c) 2009 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * This file defines the architecture token of the .Dt prologue macro.
+ * All architectures that your system supports (or the manuals of your
+ * system) should be included here. The right-hand-side is the
+ * formatted output.
+ *
+ * Be sure to escape strings.
+ *
+ * REMEMBER TO ADD NEW ARCHITECTURES TO MDOC.7!
+ */
+
+LINE("acorn26", "Acorn26")
+LINE("acorn32", "Acorn32")
+LINE("algor", "Algor")
+LINE("alpha", "Alpha")
+LINE("amd64", "AMD64")
+LINE("amiga", "Amiga")
+LINE("amigappc", "AmigaPPC")
+LINE("arc", "ARC")
+LINE("arm", "ARM")
+LINE("arm26", "ARM26")
+LINE("arm32", "ARM32")
+LINE("armish", "ARMISH")
+LINE("aviion", "AViiON")
+LINE("atari", "ATARI")
+LINE("beagle", "Beagle")
+LINE("bebox", "BeBox")
+LINE("cats", "cats")
+LINE("cesfic", "CESFIC")
+LINE("cobalt", "Cobalt")
+LINE("dreamcast", "Dreamcast")
+LINE("emips", "EMIPS")
+LINE("evbarm", "evbARM")
+LINE("evbmips", "evbMIPS")
+LINE("evbppc", "evbPPC")
+LINE("evbsh3", "evbSH3")
+LINE("ews4800mips", "EWS4800MIPS")
+LINE("hp300", "HP300")
+LINE("hp700", "HP700")
+LINE("hpcarm", "HPCARM")
+LINE("hpcmips", "HPCMIPS")
+LINE("hpcsh", "HPCSH")
+LINE("hppa", "HPPA")
+LINE("hppa64", "HPPA64")
+LINE("ia64", "ia64")
+LINE("i386", "i386")
+LINE("ibmnws", "IBMNWS")
+LINE("iyonix", "Iyonix")
+LINE("landisk", "LANDISK")
+LINE("loongson", "Loongson")
+LINE("luna68k", "Luna68k")
+LINE("luna88k", "Luna88k")
+LINE("m68k", "m68k")
+LINE("mac68k", "Mac68k")
+LINE("macppc", "MacPPC")
+LINE("mips", "MIPS")
+LINE("mips64", "MIPS64")
+LINE("mipsco", "MIPSCo")
+LINE("mmeye", "mmEye")
+LINE("mvme68k", "MVME68k")
+LINE("mvme88k", "MVME88k")
+LINE("mvmeppc", "MVMEPPC")
+LINE("netwinder", "NetWinder")
+LINE("news68k", "NeWS68k")
+LINE("newsmips", "NeWSMIPS")
+LINE("next68k", "NeXT68k")
+LINE("ofppc", "OFPPC")
+LINE("palm", "Palm")
+LINE("pc532", "PC532")
+LINE("playstation2", "PlayStation2")
+LINE("pmax", "PMAX")
+LINE("pmppc", "pmPPC")
+LINE("powerpc", "PowerPC")
+LINE("prep", "PReP")
+LINE("rs6000", "RS6000")
+LINE("sandpoint", "Sandpoint")
+LINE("sbmips", "SBMIPS")
+LINE("sgi", "SGI")
+LINE("sgimips", "SGIMIPS")
+LINE("sh3", "SH3")
+LINE("shark", "Shark")
+LINE("socppc", "SOCPPC")
+LINE("solbourne", "Solbourne")
+LINE("sparc", "SPARC")
+LINE("sparc64", "SPARC64")
+LINE("sun2", "Sun2")
+LINE("sun3", "Sun3")
+LINE("tahoe", "Tahoe")
+LINE("vax", "VAX")
+LINE("x68k", "X68k")
+LINE("x86", "x86")
+LINE("x86_64", "x86_64")
+LINE("xen", "Xen")
+LINE("zaurus", "Zaurus")
diff --git a/att.c b/att.c
new file mode 100644
index 000000000000..24d757ddf75b
--- /dev/null
+++ b/att.c
@@ -0,0 +1,39 @@
+/* $Id: att.c,v 1.9 2011/03/22 14:33:05 kristaps Exp $ */
+/*
+ * Copyright (c) 2009 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include "mdoc.h"
+#include "mandoc.h"
+#include "libmdoc.h"
+
+#define LINE(x, y) \
+ if (0 == strcmp(p, x)) return(y);
+
+const char *
+mdoc_a2att(const char *p)
+{
+
+#include "att.in"
+
+ return(NULL);
+}
diff --git a/att.in b/att.in
new file mode 100644
index 000000000000..b4ef822158f8
--- /dev/null
+++ b/att.in
@@ -0,0 +1,40 @@
+/* $Id: att.in,v 1.8 2011/07/31 17:30:33 schwarze Exp $ */
+/*
+ * Copyright (c) 2009 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * This file defines the AT&T versions of the .At macro. This probably
+ * isn't going to change. The right-hand side is the formatted string.
+ *
+ * Be sure to escape strings.
+ * The non-breaking blanks prevent ending an output line right before
+ * a number. Groff prevent line breaks at the same places.
+ */
+
+LINE("v1", "Version\\~1 AT&T UNIX")
+LINE("v2", "Version\\~2 AT&T UNIX")
+LINE("v3", "Version\\~3 AT&T UNIX")
+LINE("v4", "Version\\~4 AT&T UNIX")
+LINE("v5", "Version\\~5 AT&T UNIX")
+LINE("v6", "Version\\~6 AT&T UNIX")
+LINE("v7", "Version\\~7 AT&T UNIX")
+LINE("32v", "Version\\~32V AT&T UNIX")
+LINE("III", "AT&T System\\~III UNIX")
+LINE("V", "AT&T System\\~V UNIX")
+LINE("V.1", "AT&T System\\~V Release\\~1 UNIX")
+LINE("V.2", "AT&T System\\~V Release\\~2 UNIX")
+LINE("V.3", "AT&T System\\~V Release\\~3 UNIX")
+LINE("V.4", "AT&T System\\~V Release\\~4 UNIX")
diff --git a/catman.8 b/catman.8
new file mode 100644
index 000000000000..f5246f9a6ced
--- /dev/null
+++ b/catman.8
@@ -0,0 +1,111 @@
+.\" $Id: catman.8,v 1.5 2011/12/25 19:35:44 kristaps Exp $
+.\"
+.\" Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+.\"
+.\" Permission to use, copy, modify, and distribute this software for any
+.\" purpose with or without fee is hereby granted, provided that the above
+.\" copyright notice and this permission notice appear in all copies.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+.\"
+.Dd $Mdocdate: December 25 2011 $
+.Dt CATMAN 8
+.Os
+.Sh NAME
+.Nm catman
+.Nd update a man.cgi manpage cache
+.Sh SYNOPSIS
+.Nm catman
+.Op Fl fv
+.Op Fl C Ar file
+.Op Fl M Ar manpath
+.Op Fl m Ar manpath
+.Op Fl o Ar path
+.Sh DESCRIPTION
+The
+.Nm
+utility updates cached manpages for a jailed
+.Xr man.cgi 7 .
+.Pp
+By default,
+.Nm
+searches for
+.Xr mandocdb 8
+databases in the default paths stipulated by
+.Xr man 1
+and updates the cache in
+.Pa /var/www/cache/man.cgi .
+.Pp
+Its arguments are as follows:
+.Bl -tag -width Ds
+.It Fl f
+Force an update to all files.
+.It Fl v
+Print each file being updated.
+.It Fl C Ar file
+Specify an alternative configuration
+.Ar file
+in
+.Xr man.conf 5
+format.
+.It Fl M Ar manpath
+Use the colon-separated path instead of the default list of paths
+searched for
+.Xr mandocdb 8
+databases.
+Invalid paths, or paths without manual databases, are ignored.
+.It Fl m Ar manpath
+Prepend the colon-separated paths to the list of paths searched
+for
+.Xr mandocdb 8
+databases.
+Invalid paths, or paths without manual databases, are ignored.
+.It Fl o Ar path
+Update into the directory tree under
+.Ar path .
+.El
+.Pp
+Cache updates occur when a
+.Xr mandocdb 8
+database is older than the cached copy unless
+.Fl f
+is specified, in which case files are always considered out of date.
+Cached manual pages are only updated if older than the master copy.
+.Sh ENVIRONMENT
+.Bl -tag -width Ds
+.It Ev MANPATH
+Colon-separated paths modifying the default list of paths searched for
+manual databases.
+Invalid paths, or paths without manual databases, are ignored.
+Overridden by
+.Fl M .
+If
+.Ev MANPATH
+begins with a
+.Sq \&: ,
+it is appended to the default list;
+else if it ends with
+.Sq \&: ,
+it is prepended to the default list; else if it contains
+.Sq \&:: ,
+the default list is inserted between the colons.
+If none of these conditions are met, it overrides the default list.
+.El
+.Sh EXIT STATUS
+.Ex -std
+.Sh SEE ALSO
+.Xr mandoc 1 ,
+.Xr man.cgi 7 ,
+.Xr mandocdb 8
+.Sh AUTHORS
+The
+.Nm
+utility was written by
+.An Kristaps Dzonsons ,
+.Mt kristaps@bsd.lv .
diff --git a/catman.c b/catman.c
new file mode 100644
index 000000000000..1d313ea6e00e
--- /dev/null
+++ b/catman.c
@@ -0,0 +1,511 @@
+/* $Id: catman.c,v 1.10 2012/01/03 15:17:20 kristaps Exp $ */
+/*
+ * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/param.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#ifdef __linux__
+# include <db_185.h>
+#else
+# include <db.h>
+#endif
+
+#include "manpath.h"
+#include "mandocdb.h"
+
+#define xstrlcpy(_dst, _src, _sz) \
+ do if (strlcpy((_dst), (_src), (_sz)) >= (_sz)) { \
+ fprintf(stderr, "%s: Path too long", (_dst)); \
+ exit(EXIT_FAILURE); \
+ } while (/* CONSTCOND */0)
+
+#define xstrlcat(_dst, _src, _sz) \
+ do if (strlcat((_dst), (_src), (_sz)) >= (_sz)) { \
+ fprintf(stderr, "%s: Path too long", (_dst)); \
+ exit(EXIT_FAILURE); \
+ } while (/* CONSTCOND */0)
+
+static int indexhtml(char *, size_t, char *, size_t);
+static int manup(const struct manpaths *, char *);
+static int mkpath(char *, mode_t, mode_t);
+static int treecpy(char *, char *);
+static int update(char *, char *);
+static void usage(void);
+
+static const char *progname;
+static int verbose;
+static int force;
+
+int
+main(int argc, char *argv[])
+{
+ int ch;
+ char *aux, *base, *conf_file;
+ struct manpaths dirs;
+ char buf[MAXPATHLEN];
+ extern char *optarg;
+ extern int optind;
+
+ progname = strrchr(argv[0], '/');
+ if (progname == NULL)
+ progname = argv[0];
+ else
+ ++progname;
+
+ aux = base = conf_file = NULL;
+ xstrlcpy(buf, "/var/www/cache/man.cgi", MAXPATHLEN);
+
+ while (-1 != (ch = getopt(argc, argv, "C:fm:M:o:v")))
+ switch (ch) {
+ case ('C'):
+ conf_file = optarg;
+ break;
+ case ('f'):
+ force = 1;
+ break;
+ case ('m'):
+ aux = optarg;
+ break;
+ case ('M'):
+ base = optarg;
+ break;
+ case ('o'):
+ xstrlcpy(buf, optarg, MAXPATHLEN);
+ break;
+ case ('v'):
+ verbose++;
+ break;
+ default:
+ usage();
+ return(EXIT_FAILURE);
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if (argc > 0) {
+ usage();
+ return(EXIT_FAILURE);
+ }
+
+ memset(&dirs, 0, sizeof(struct manpaths));
+ manpath_parse(&dirs, conf_file, base, aux);
+ ch = manup(&dirs, buf);
+ manpath_free(&dirs);
+ return(ch ? EXIT_SUCCESS : EXIT_FAILURE);
+}
+
+static void
+usage(void)
+{
+
+ fprintf(stderr, "usage: %s "
+ "[-fv] "
+ "[-C file] "
+ "[-o path] "
+ "[-m manpath] "
+ "[-M manpath]\n",
+ progname);
+}
+
+/*
+ * If "src" file doesn't exist (errors out), return -1. Otherwise,
+ * return 1 if "src" is newer (which also happens "dst" doesn't exist)
+ * and 0 otherwise.
+ */
+static int
+isnewer(const char *dst, const char *src)
+{
+ struct stat s1, s2;
+
+ if (-1 == stat(src, &s1))
+ return(-1);
+ if (force)
+ return(1);
+
+ return(-1 == stat(dst, &s2) ? 1 : s1.st_mtime > s2.st_mtime);
+}
+
+/*
+ * Copy the contents of one file into another.
+ * Returns 0 on failure, 1 on success.
+ */
+static int
+filecpy(const char *dst, const char *src)
+{
+ char buf[BUFSIZ];
+ int sfd, dfd, rc;
+ ssize_t rsz, wsz;
+
+ sfd = dfd = -1;
+ rc = 0;
+
+ if (-1 == (dfd = open(dst, O_CREAT|O_TRUNC|O_WRONLY, 0644))) {
+ perror(dst);
+ goto out;
+ } else if (-1 == (sfd = open(src, O_RDONLY, 0))) {
+ perror(src);
+ goto out;
+ }
+
+ while ((rsz = read(sfd, buf, BUFSIZ)) > 0)
+ if (-1 == (wsz = write(dfd, buf, (size_t)rsz))) {
+ perror(dst);
+ goto out;
+ } else if (wsz < rsz) {
+ fprintf(stderr, "%s: Short write\n", dst);
+ goto out;
+ }
+
+ if (rsz < 0)
+ perror(src);
+ else
+ rc = 1;
+out:
+ if (-1 != sfd)
+ close(sfd);
+ if (-1 != dfd)
+ close(dfd);
+
+ return(rc);
+}
+
+/*
+ * Pass over the recno database and re-create HTML pages if they're
+ * found to be out of date.
+ * Returns -1 on fatal error, 1 on success.
+ */
+static int
+indexhtml(char *src, size_t ssz, char *dst, size_t dsz)
+{
+ DB *idx;
+ DBT key, val;
+ int c, rc;
+ unsigned int fl;
+ const char *f;
+ char *d;
+ char fname[MAXPATHLEN];
+ pid_t pid;
+
+ pid = -1;
+
+ xstrlcpy(fname, dst, MAXPATHLEN);
+ xstrlcat(fname, "/", MAXPATHLEN);
+ xstrlcat(fname, MANDOC_IDX, MAXPATHLEN);
+
+ idx = dbopen(fname, O_RDONLY, 0, DB_RECNO, NULL);
+ if (NULL == idx) {
+ perror(fname);
+ return(-1);
+ }
+
+ fl = R_FIRST;
+ while (0 == (c = (*idx->seq)(idx, &key, &val, fl))) {
+ fl = R_NEXT;
+ /*
+ * If the record is zero-length, then it's unassigned.
+ * Skip past these.
+ */
+ if (0 == val.size)
+ continue;
+
+ f = (const char *)val.data + 1;
+ if (NULL == memchr(f, '\0', val.size - 1))
+ break;
+
+ src[(int)ssz] = dst[(int)dsz] = '\0';
+
+ xstrlcat(dst, "/", MAXPATHLEN);
+ xstrlcat(dst, f, MAXPATHLEN);
+
+ xstrlcat(src, "/", MAXPATHLEN);
+ xstrlcat(src, f, MAXPATHLEN);
+
+ if (-1 == (rc = isnewer(dst, src))) {
+ fprintf(stderr, "%s: File missing\n", f);
+ break;
+ } else if (0 == rc)
+ continue;
+
+ d = strrchr(dst, '/');
+ assert(NULL != d);
+ *d = '\0';
+
+ if (-1 == mkpath(dst, 0755, 0755)) {
+ perror(dst);
+ break;
+ }
+
+ *d = '/';
+
+ if ( ! filecpy(dst, src))
+ break;
+ if (verbose)
+ printf("%s\n", dst);
+ }
+
+ (*idx->close)(idx);
+
+ if (c < 0)
+ perror(fname);
+ else if (0 == c)
+ fprintf(stderr, "%s: Corrupt index\n", fname);
+
+ return(1 == c ? 1 : -1);
+}
+
+/*
+ * Copy both recno and btree databases into the destination.
+ * Call in to begin recreating HTML files.
+ * Return -1 on fatal error and 1 if the update went well.
+ */
+static int
+update(char *dst, char *src)
+{
+ size_t dsz, ssz;
+
+ dsz = strlen(dst);
+ ssz = strlen(src);
+
+ xstrlcat(src, "/", MAXPATHLEN);
+ xstrlcat(dst, "/", MAXPATHLEN);
+
+ xstrlcat(src, MANDOC_DB, MAXPATHLEN);
+ xstrlcat(dst, MANDOC_DB, MAXPATHLEN);
+
+ if ( ! filecpy(dst, src))
+ return(-1);
+ if (verbose)
+ printf("%s\n", dst);
+
+ dst[(int)dsz] = src[(int)ssz] = '\0';
+
+ xstrlcat(src, "/", MAXPATHLEN);
+ xstrlcat(dst, "/", MAXPATHLEN);
+
+ xstrlcat(src, MANDOC_IDX, MAXPATHLEN);
+ xstrlcat(dst, MANDOC_IDX, MAXPATHLEN);
+
+ if ( ! filecpy(dst, src))
+ return(-1);
+ if (verbose)
+ printf("%s\n", dst);
+
+ dst[(int)dsz] = src[(int)ssz] = '\0';
+
+ return(indexhtml(src, ssz, dst, dsz));
+}
+
+/*
+ * See if btree or recno databases in the destination are out of date
+ * with respect to a single manpath component.
+ * Return -1 on fatal error, 0 if the source is no longer valid (and
+ * shouldn't be listed), and 1 if the update went well.
+ */
+static int
+treecpy(char *dst, char *src)
+{
+ size_t dsz, ssz;
+ int rc;
+
+ dsz = strlen(dst);
+ ssz = strlen(src);
+
+ xstrlcat(src, "/", MAXPATHLEN);
+ xstrlcat(dst, "/", MAXPATHLEN);
+
+ xstrlcat(src, MANDOC_IDX, MAXPATHLEN);
+ xstrlcat(dst, MANDOC_IDX, MAXPATHLEN);
+
+ if (-1 == (rc = isnewer(dst, src)))
+ return(0);
+
+ dst[(int)dsz] = src[(int)ssz] = '\0';
+
+ if (1 == rc)
+ return(update(dst, src));
+
+ xstrlcat(src, "/", MAXPATHLEN);
+ xstrlcat(dst, "/", MAXPATHLEN);
+
+ xstrlcat(src, MANDOC_DB, MAXPATHLEN);
+ xstrlcat(dst, MANDOC_DB, MAXPATHLEN);
+
+ if (-1 == (rc = isnewer(dst, src)))
+ return(0);
+ else if (rc == 0)
+ return(1);
+
+ dst[(int)dsz] = src[(int)ssz] = '\0';
+
+ return(update(dst, src));
+}
+
+/*
+ * Update the destination's file-tree with respect to changes in the
+ * source manpath components.
+ * "Change" is defined by an updated index or btree database.
+ * Returns 1 on success, 0 on failure.
+ */
+static int
+manup(const struct manpaths *dirs, char *base)
+{
+ char dst[MAXPATHLEN],
+ src[MAXPATHLEN];
+ const char *path;
+ int i, c;
+ size_t sz;
+ FILE *f;
+
+ /* Create the path and file for the catman.conf file. */
+
+ sz = strlen(base);
+ xstrlcpy(dst, base, MAXPATHLEN);
+ xstrlcat(dst, "/etc", MAXPATHLEN);
+ if (-1 == mkpath(dst, 0755, 0755)) {
+ perror(dst);
+ return(0);
+ }
+
+ xstrlcat(dst, "/catman.conf", MAXPATHLEN);
+ if (NULL == (f = fopen(dst, "w"))) {
+ perror(dst);
+ return(0);
+ } else if (verbose)
+ printf("%s\n", dst);
+
+ for (i = 0; i < dirs->sz; i++) {
+ path = dirs->paths[i];
+ dst[(int)sz] = '\0';
+ xstrlcat(dst, path, MAXPATHLEN);
+ if (-1 == mkpath(dst, 0755, 0755)) {
+ perror(dst);
+ break;
+ }
+
+ xstrlcpy(src, path, MAXPATHLEN);
+ if (-1 == (c = treecpy(dst, src)))
+ break;
+ else if (0 == c)
+ continue;
+
+ /*
+ * We want to use a relative path here because manpath.h
+ * will realpath() when invoked with man.cgi, and we'll
+ * make sure to chdir() into the cache directory before.
+ *
+ * This allows the cache directory to be in an arbitrary
+ * place, working in both chroot() and non-chroot()
+ * "safe" modes.
+ */
+ assert('/' == path[0]);
+ fprintf(f, "_whatdb %s/whatis.db\n", path + 1);
+ }
+
+ fclose(f);
+ return(i == dirs->sz);
+}
+
+/*
+ * Copyright (c) 1983, 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+static int
+mkpath(char *path, mode_t mode, mode_t dir_mode)
+{
+ struct stat sb;
+ char *slash;
+ int done, exists;
+
+ slash = path;
+
+ for (;;) {
+ /* LINTED */
+ slash += strspn(slash, "/");
+ /* LINTED */
+ slash += strcspn(slash, "/");
+
+ done = (*slash == '\0');
+ *slash = '\0';
+
+ /* skip existing path components */
+ exists = !stat(path, &sb);
+ if (!done && exists && S_ISDIR(sb.st_mode)) {
+ *slash = '/';
+ continue;
+ }
+
+ if (mkdir(path, done ? mode : dir_mode) == 0) {
+ if (mode > 0777 && chmod(path, mode) < 0)
+ return (-1);
+ } else {
+ if (!exists) {
+ /* Not there */
+ return (-1);
+ }
+ if (!S_ISDIR(sb.st_mode)) {
+ /* Is there, but isn't a directory */
+ errno = ENOTDIR;
+ return (-1);
+ }
+ }
+
+ if (done)
+ break;
+
+ *slash = '/';
+ }
+
+ return (0);
+}
diff --git a/cgi.c b/cgi.c
new file mode 100644
index 000000000000..2f5870ff8efd
--- /dev/null
+++ b/cgi.c
@@ -0,0 +1,1203 @@
+/* $Id: cgi.c,v 1.42 2012/03/24 01:46:25 kristaps Exp $ */
+/*
+ * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/param.h>
+#include <sys/wait.h>
+
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <regex.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "apropos_db.h"
+#include "mandoc.h"
+#include "mdoc.h"
+#include "man.h"
+#include "main.h"
+#include "manpath.h"
+#include "mandocdb.h"
+
+#ifdef __linux__
+# include <db_185.h>
+#else
+# include <db.h>
+#endif
+
+enum page {
+ PAGE_INDEX,
+ PAGE_SEARCH,
+ PAGE_SHOW,
+ PAGE__MAX
+};
+
+struct paths {
+ char *name;
+ char *path;
+};
+
+/*
+ * A query as passed to the search function.
+ */
+struct query {
+ const char *arch; /* architecture */
+ const char *sec; /* manual section */
+ const char *expr; /* unparsed expression string */
+ int manroot; /* manroot index (or -1)*/
+ int legacy; /* whether legacy mode */
+};
+
+struct req {
+ struct query q;
+ struct paths *p;
+ size_t psz;
+ enum page page;
+};
+
+static int atou(const char *, unsigned *);
+static void catman(const struct req *, const char *);
+static int cmp(const void *, const void *);
+static void format(const struct req *, const char *);
+static void html_print(const char *);
+static void html_printquery(const struct req *);
+static void html_putchar(char);
+static int http_decode(char *);
+static void http_parse(struct req *, char *);
+static void http_print(const char *);
+static void http_putchar(char);
+static void http_printquery(const struct req *);
+static int pathstop(DIR *);
+static void pathgen(DIR *, char *, struct req *);
+static void pg_index(const struct req *, char *);
+static void pg_search(const struct req *, char *);
+static void pg_show(const struct req *, char *);
+static void resp_bad(void);
+static void resp_baddb(void);
+static void resp_error400(void);
+static void resp_error404(const char *);
+static void resp_begin_html(int, const char *);
+static void resp_begin_http(int, const char *);
+static void resp_end_html(void);
+static void resp_index(const struct req *);
+static void resp_search(struct res *, size_t, void *);
+static void resp_searchform(const struct req *);
+
+static const char *progname; /* cgi script name */
+static const char *cache; /* cache directory */
+static const char *css; /* css directory */
+static const char *host; /* hostname */
+
+static const char * const pages[PAGE__MAX] = {
+ "index", /* PAGE_INDEX */
+ "search", /* PAGE_SEARCH */
+ "show", /* PAGE_SHOW */
+};
+
+/*
+ * This is just OpenBSD's strtol(3) suggestion.
+ * I use it instead of strtonum(3) for portability's sake.
+ */
+static int
+atou(const char *buf, unsigned *v)
+{
+ char *ep;
+ long lval;
+
+ errno = 0;
+ lval = strtol(buf, &ep, 10);
+ if (buf[0] == '\0' || *ep != '\0')
+ return(0);
+ if ((errno == ERANGE && (lval == LONG_MAX ||
+ lval == LONG_MIN)) ||
+ (lval > INT_MAX || lval < 0))
+ return(0);
+
+ *v = (unsigned int)lval;
+ return(1);
+}
+
+/*
+ * Print a character, escaping HTML along the way.
+ * This will pass non-ASCII straight to output: be warned!
+ */
+static void
+html_putchar(char c)
+{
+
+ switch (c) {
+ case ('"'):
+ printf("&quote;");
+ break;
+ case ('&'):
+ printf("&amp;");
+ break;
+ case ('>'):
+ printf("&gt;");
+ break;
+ case ('<'):
+ printf("&lt;");
+ break;
+ default:
+ putchar((unsigned char)c);
+ break;
+ }
+}
+static void
+http_printquery(const struct req *req)
+{
+
+ printf("&expr=");
+ http_print(req->q.expr ? req->q.expr : "");
+ printf("&sec=");
+ http_print(req->q.sec ? req->q.sec : "");
+ printf("&arch=");
+ http_print(req->q.arch ? req->q.arch : "");
+}
+
+
+static void
+html_printquery(const struct req *req)
+{
+
+ printf("&amp;expr=");
+ html_print(req->q.expr ? req->q.expr : "");
+ printf("&amp;sec=");
+ html_print(req->q.sec ? req->q.sec : "");
+ printf("&amp;arch=");
+ html_print(req->q.arch ? req->q.arch : "");
+}
+
+static void
+http_print(const char *p)
+{
+
+ if (NULL == p)
+ return;
+ while ('\0' != *p)
+ http_putchar(*p++);
+}
+
+/*
+ * Call through to html_putchar().
+ * Accepts NULL strings.
+ */
+static void
+html_print(const char *p)
+{
+
+ if (NULL == p)
+ return;
+ while ('\0' != *p)
+ html_putchar(*p++);
+}
+
+/*
+ * Parse out key-value pairs from an HTTP request variable.
+ * This can be either a cookie or a POST/GET string, although man.cgi
+ * uses only GET for simplicity.
+ */
+static void
+http_parse(struct req *req, char *p)
+{
+ char *key, *val, *manroot;
+ int i, legacy;
+
+ memset(&req->q, 0, sizeof(struct query));
+
+ legacy = -1;
+ manroot = NULL;
+
+ while ('\0' != *p) {
+ key = p;
+ val = NULL;
+
+ p += (int)strcspn(p, ";&");
+ if ('\0' != *p)
+ *p++ = '\0';
+ if (NULL != (val = strchr(key, '=')))
+ *val++ = '\0';
+
+ if ('\0' == *key || NULL == val || '\0' == *val)
+ continue;
+
+ /* Just abort handling. */
+
+ if ( ! http_decode(key))
+ break;
+ if (NULL != val && ! http_decode(val))
+ break;
+
+ if (0 == strcmp(key, "expr"))
+ req->q.expr = val;
+ else if (0 == strcmp(key, "query"))
+ req->q.expr = val;
+ else if (0 == strcmp(key, "sec"))
+ req->q.sec = val;
+ else if (0 == strcmp(key, "sektion"))
+ req->q.sec = val;
+ else if (0 == strcmp(key, "arch"))
+ req->q.arch = val;
+ else if (0 == strcmp(key, "manpath"))
+ manroot = val;
+ else if (0 == strcmp(key, "apropos"))
+ legacy = 0 == strcmp(val, "0");
+ }
+
+ /* Test for old man.cgi compatibility mode. */
+
+ req->q.legacy = legacy > 0;
+
+ /*
+ * Section "0" means no section when in legacy mode.
+ * For some man.cgi scripts, "default" arch is none.
+ */
+
+ if (req->q.legacy && NULL != req->q.sec)
+ if (0 == strcmp(req->q.sec, "0"))
+ req->q.sec = NULL;
+ if (req->q.legacy && NULL != req->q.arch)
+ if (0 == strcmp(req->q.arch, "default"))
+ req->q.arch = NULL;
+
+ /* Default to first manroot. */
+
+ if (NULL != manroot) {
+ for (i = 0; i < (int)req->psz; i++)
+ if (0 == strcmp(req->p[i].name, manroot))
+ break;
+ req->q.manroot = i < (int)req->psz ? i : -1;
+ }
+}
+
+static void
+http_putchar(char c)
+{
+
+ if (isalnum((unsigned char)c)) {
+ putchar((unsigned char)c);
+ return;
+ } else if (' ' == c) {
+ putchar('+');
+ return;
+ }
+ printf("%%%.2x", c);
+}
+
+/*
+ * HTTP-decode a string. The standard explanation is that this turns
+ * "%4e+foo" into "n foo" in the regular way. This is done in-place
+ * over the allocated string.
+ */
+static int
+http_decode(char *p)
+{
+ char hex[3];
+ int c;
+
+ hex[2] = '\0';
+
+ for ( ; '\0' != *p; p++) {
+ if ('%' == *p) {
+ if ('\0' == (hex[0] = *(p + 1)))
+ return(0);
+ if ('\0' == (hex[1] = *(p + 2)))
+ return(0);
+ if (1 != sscanf(hex, "%x", &c))
+ return(0);
+ if ('\0' == c)
+ return(0);
+
+ *p = (char)c;
+ memmove(p + 1, p + 3, strlen(p + 3) + 1);
+ } else
+ *p = '+' == *p ? ' ' : *p;
+ }
+
+ *p = '\0';
+ return(1);
+}
+
+static void
+resp_begin_http(int code, const char *msg)
+{
+
+ if (200 != code)
+ printf("Status: %d %s\n", code, msg);
+
+ puts("Content-Type: text/html; charset=utf-8\n"
+ "Cache-Control: no-cache\n"
+ "Pragma: no-cache\n"
+ "");
+
+ fflush(stdout);
+}
+
+static void
+resp_begin_html(int code, const char *msg)
+{
+
+ resp_begin_http(code, msg);
+
+ printf("<!DOCTYPE HTML PUBLIC "
+ " \"-//W3C//DTD HTML 4.01//EN\""
+ " \"http://www.w3.org/TR/html4/strict.dtd\">\n"
+ "<HTML>\n"
+ "<HEAD>\n"
+ "<META HTTP-EQUIV=\"Content-Type\""
+ " CONTENT=\"text/html; charset=utf-8\">\n"
+ "<LINK REL=\"stylesheet\" HREF=\"%s/man-cgi.css\""
+ " TYPE=\"text/css\" media=\"all\">\n"
+ "<LINK REL=\"stylesheet\" HREF=\"%s/man.css\""
+ " TYPE=\"text/css\" media=\"all\">\n"
+ "<TITLE>System Manpage Reference</TITLE>\n"
+ "</HEAD>\n"
+ "<BODY>\n"
+ "<!-- Begin page content. //-->\n", css, css);
+}
+
+static void
+resp_end_html(void)
+{
+
+ puts("</BODY>\n"
+ "</HTML>");
+}
+
+static void
+resp_searchform(const struct req *req)
+{
+ int i;
+
+ puts("<!-- Begin search form. //-->");
+ printf("<DIV ID=\"mancgi\">\n"
+ "<FORM ACTION=\"%s/search.html\" METHOD=\"get\">\n"
+ "<FIELDSET>\n"
+ "<LEGEND>Search Parameters</LEGEND>\n"
+ "<INPUT TYPE=\"submit\" "
+ " VALUE=\"Search\"> for manuals satisfying \n"
+ "<INPUT TYPE=\"text\" NAME=\"expr\" VALUE=\"",
+ progname);
+ html_print(req->q.expr ? req->q.expr : "");
+ printf("\">, section "
+ "<INPUT TYPE=\"text\""
+ " SIZE=\"4\" NAME=\"sec\" VALUE=\"");
+ html_print(req->q.sec ? req->q.sec : "");
+ printf("\">, arch "
+ "<INPUT TYPE=\"text\""
+ " SIZE=\"8\" NAME=\"arch\" VALUE=\"");
+ html_print(req->q.arch ? req->q.arch : "");
+ printf("\">");
+ if (req->psz > 1) {
+ puts(", <SELECT NAME=\"manpath\">");
+ for (i = 0; i < (int)req->psz; i++) {
+ printf("<OPTION %s VALUE=\"",
+ (i == req->q.manroot) ||
+ (0 == i && -1 == req->q.manroot) ?
+ "SELECTED=\"selected\"" : "");
+ html_print(req->p[i].name);
+ printf("\">");
+ html_print(req->p[i].name);
+ puts("</OPTION>");
+ }
+ puts("</SELECT>");
+ }
+ puts(".\n"
+ "<INPUT TYPE=\"reset\" VALUE=\"Reset\">\n"
+ "</FIELDSET>\n"
+ "</FORM>\n"
+ "</DIV>");
+ puts("<!-- End search form. //-->");
+}
+
+static void
+resp_index(const struct req *req)
+{
+
+ resp_begin_html(200, NULL);
+ resp_searchform(req);
+ resp_end_html();
+}
+
+static void
+resp_error400(void)
+{
+
+ resp_begin_html(400, "Query Malformed");
+ printf("<H1>Malformed Query</H1>\n"
+ "<P>\n"
+ "The query your entered was malformed.\n"
+ "Try again from the\n"
+ "<A HREF=\"%s/index.html\">main page</A>.\n"
+ "</P>", progname);
+ resp_end_html();
+}
+
+static void
+resp_error404(const char *page)
+{
+
+ resp_begin_html(404, "Not Found");
+ puts("<H1>Page Not Found</H1>\n"
+ "<P>\n"
+ "The page you're looking for, ");
+ printf("<B>");
+ html_print(page);
+ printf("</B>,\n"
+ "could not be found.\n"
+ "Try searching from the\n"
+ "<A HREF=\"%s/index.html\">main page</A>.\n"
+ "</P>", progname);
+ resp_end_html();
+}
+
+static void
+resp_bad(void)
+{
+ resp_begin_html(500, "Internal Server Error");
+ puts("<P>Generic badness happened.</P>");
+ resp_end_html();
+}
+
+static void
+resp_baddb(void)
+{
+
+ resp_begin_html(500, "Internal Server Error");
+ puts("<P>Your database is broken.</P>");
+ resp_end_html();
+}
+
+static void
+resp_search(struct res *r, size_t sz, void *arg)
+{
+ size_t i, matched;
+ const struct req *req;
+
+ req = (const struct req *)arg;
+
+ if (sz > 0)
+ assert(req->q.manroot >= 0);
+
+ for (matched = i = 0; i < sz; i++)
+ if (r[i].matched)
+ matched++;
+
+ if (1 == matched) {
+ for (i = 0; i < sz; i++)
+ if (r[i].matched)
+ break;
+ /*
+ * If we have just one result, then jump there now
+ * without any delay.
+ */
+ puts("Status: 303 See Other");
+ printf("Location: http://%s%s/show/%d/%u/%u.html?",
+ host, progname, req->q.manroot,
+ r[i].volume, r[i].rec);
+ http_printquery(req);
+ puts("\n"
+ "Content-Type: text/html; charset=utf-8\n");
+ return;
+ }
+
+ resp_begin_html(200, NULL);
+ resp_searchform(req);
+
+ puts("<DIV CLASS=\"results\">");
+
+ if (0 == matched) {
+ puts("<P>\n"
+ "No results found.\n"
+ "</P>\n"
+ "</DIV>");
+ resp_end_html();
+ return;
+ }
+
+ qsort(r, sz, sizeof(struct res), cmp);
+
+ puts("<TABLE>");
+
+ for (i = 0; i < sz; i++) {
+ if ( ! r[i].matched)
+ continue;
+ printf("<TR>\n"
+ "<TD CLASS=\"title\">\n"
+ "<A HREF=\"%s/show/%d/%u/%u.html?",
+ progname, req->q.manroot,
+ r[i].volume, r[i].rec);
+ html_printquery(req);
+ printf("\">");
+ html_print(r[i].title);
+ putchar('(');
+ html_print(r[i].cat);
+ if (r[i].arch && '\0' != *r[i].arch) {
+ putchar('/');
+ html_print(r[i].arch);
+ }
+ printf(")</A>\n"
+ "</TD>\n"
+ "<TD CLASS=\"desc\">");
+ html_print(r[i].desc);
+ puts("</TD>\n"
+ "</TR>");
+ }
+
+ puts("</TABLE>\n"
+ "</DIV>");
+ resp_end_html();
+}
+
+/* ARGSUSED */
+static void
+pg_index(const struct req *req, char *path)
+{
+
+ resp_index(req);
+}
+
+static void
+catman(const struct req *req, const char *file)
+{
+ FILE *f;
+ size_t len;
+ int i;
+ char *p;
+ int italic, bold;
+
+ if (NULL == (f = fopen(file, "r"))) {
+ resp_baddb();
+ return;
+ }
+
+ resp_begin_html(200, NULL);
+ resp_searchform(req);
+ puts("<DIV CLASS=\"catman\">\n"
+ "<PRE>");
+
+ while (NULL != (p = fgetln(f, &len))) {
+ bold = italic = 0;
+ for (i = 0; i < (int)len - 1; i++) {
+ /*
+ * This means that the catpage is out of state.
+ * Ignore it and keep going (although the
+ * catpage is bogus).
+ */
+
+ if ('\b' == p[i] || '\n' == p[i])
+ continue;
+
+ /*
+ * Print a regular character.
+ * Close out any bold/italic scopes.
+ * If we're in back-space mode, make sure we'll
+ * have something to enter when we backspace.
+ */
+
+ if ('\b' != p[i + 1]) {
+ if (italic)
+ printf("</I>");
+ if (bold)
+ printf("</B>");
+ italic = bold = 0;
+ html_putchar(p[i]);
+ continue;
+ } else if (i + 2 >= (int)len)
+ continue;
+
+ /* Italic mode. */
+
+ if ('_' == p[i]) {
+ if (bold)
+ printf("</B>");
+ if ( ! italic)
+ printf("<I>");
+ bold = 0;
+ italic = 1;
+ i += 2;
+ html_putchar(p[i]);
+ continue;
+ }
+
+ /*
+ * Handle funny behaviour troff-isms.
+ * These grok'd from the original man2html.c.
+ */
+
+ if (('+' == p[i] && 'o' == p[i + 2]) ||
+ ('o' == p[i] && '+' == p[i + 2]) ||
+ ('|' == p[i] && '=' == p[i + 2]) ||
+ ('=' == p[i] && '|' == p[i + 2]) ||
+ ('*' == p[i] && '=' == p[i + 2]) ||
+ ('=' == p[i] && '*' == p[i + 2]) ||
+ ('*' == p[i] && '|' == p[i + 2]) ||
+ ('|' == p[i] && '*' == p[i + 2])) {
+ if (italic)
+ printf("</I>");
+ if (bold)
+ printf("</B>");
+ italic = bold = 0;
+ putchar('*');
+ i += 2;
+ continue;
+ } else if (('|' == p[i] && '-' == p[i + 2]) ||
+ ('-' == p[i] && '|' == p[i + 1]) ||
+ ('+' == p[i] && '-' == p[i + 1]) ||
+ ('-' == p[i] && '+' == p[i + 1]) ||
+ ('+' == p[i] && '|' == p[i + 1]) ||
+ ('|' == p[i] && '+' == p[i + 1])) {
+ if (italic)
+ printf("</I>");
+ if (bold)
+ printf("</B>");
+ italic = bold = 0;
+ putchar('+');
+ i += 2;
+ continue;
+ }
+
+ /* Bold mode. */
+
+ if (italic)
+ printf("</I>");
+ if ( ! bold)
+ printf("<B>");
+ bold = 1;
+ italic = 0;
+ i += 2;
+ html_putchar(p[i]);
+ }
+
+ /*
+ * Clean up the last character.
+ * We can get to a newline; don't print that.
+ */
+
+ if (italic)
+ printf("</I>");
+ if (bold)
+ printf("</B>");
+
+ if (i == (int)len - 1 && '\n' != p[i])
+ html_putchar(p[i]);
+
+ putchar('\n');
+ }
+
+ puts("</PRE>\n"
+ "</DIV>\n"
+ "</BODY>\n"
+ "</HTML>");
+
+ fclose(f);
+}
+
+static void
+format(const struct req *req, const char *file)
+{
+ struct mparse *mp;
+ int fd;
+ struct mdoc *mdoc;
+ struct man *man;
+ void *vp;
+ enum mandoclevel rc;
+ char opts[MAXPATHLEN + 128];
+
+ if (-1 == (fd = open(file, O_RDONLY, 0))) {
+ resp_baddb();
+ return;
+ }
+
+ mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL);
+ rc = mparse_readfd(mp, fd, file);
+ close(fd);
+
+ if (rc >= MANDOCLEVEL_FATAL) {
+ resp_baddb();
+ return;
+ }
+
+ snprintf(opts, sizeof(opts), "fragment,"
+ "man=%s/search.html?sec=%%S&expr=%%N,"
+ /*"includes=/cgi-bin/man.cgi/usr/include/%%I"*/,
+ progname);
+
+ mparse_result(mp, &mdoc, &man);
+ if (NULL == man && NULL == mdoc) {
+ resp_baddb();
+ mparse_free(mp);
+ return;
+ }
+
+ resp_begin_html(200, NULL);
+ resp_searchform(req);
+
+ vp = html_alloc(opts);
+
+ if (NULL != mdoc)
+ html_mdoc(vp, mdoc);
+ else
+ html_man(vp, man);
+
+ puts("</BODY>\n"
+ "</HTML>");
+
+ html_free(vp);
+ mparse_free(mp);
+}
+
+static void
+pg_show(const struct req *req, char *path)
+{
+ struct manpaths ps;
+ size_t sz;
+ char *sub;
+ char file[MAXPATHLEN];
+ const char *cp;
+ int rc, catm;
+ unsigned int vol, rec, mr;
+ DB *idx;
+ DBT key, val;
+
+ idx = NULL;
+
+ /* Parse out mroot, volume, and record from the path. */
+
+ if (NULL == path || NULL == (sub = strchr(path, '/'))) {
+ resp_error400();
+ return;
+ }
+ *sub++ = '\0';
+ if ( ! atou(path, &mr)) {
+ resp_error400();
+ return;
+ }
+ path = sub;
+ if (NULL == (sub = strchr(path, '/'))) {
+ resp_error400();
+ return;
+ }
+ *sub++ = '\0';
+ if ( ! atou(path, &vol) || ! atou(sub, &rec)) {
+ resp_error400();
+ return;
+ } else if (mr >= (unsigned int)req->psz) {
+ resp_error400();
+ return;
+ }
+
+ /*
+ * Begin by chdir()ing into the manroot.
+ * This way we can pick up the database files, which are
+ * relative to the manpath root.
+ */
+
+ if (-1 == chdir(req->p[(int)mr].path)) {
+ perror(req->p[(int)mr].path);
+ resp_baddb();
+ return;
+ }
+
+ memset(&ps, 0, sizeof(struct manpaths));
+ manpath_manconf(&ps, "etc/catman.conf");
+
+ if (vol >= (unsigned int)ps.sz) {
+ resp_error400();
+ goto out;
+ }
+
+ sz = strlcpy(file, ps.paths[vol], MAXPATHLEN);
+ assert(sz < MAXPATHLEN);
+ strlcat(file, "/", MAXPATHLEN);
+ strlcat(file, MANDOC_IDX, MAXPATHLEN);
+
+ /* Open the index recno(3) database. */
+
+ idx = dbopen(file, O_RDONLY, 0, DB_RECNO, NULL);
+ if (NULL == idx) {
+ perror(file);
+ resp_baddb();
+ goto out;
+ }
+
+ key.data = &rec;
+ key.size = 4;
+
+ if (0 != (rc = (*idx->get)(idx, &key, &val, 0))) {
+ rc < 0 ? resp_baddb() : resp_error400();
+ goto out;
+ } else if (0 == val.size) {
+ resp_baddb();
+ goto out;
+ }
+
+ cp = (char *)val.data;
+ catm = 'c' == *cp++;
+
+ if (NULL == memchr(cp, '\0', val.size - 1))
+ resp_baddb();
+ else {
+ file[(int)sz] = '\0';
+ strlcat(file, "/", MAXPATHLEN);
+ strlcat(file, cp, MAXPATHLEN);
+ if (catm)
+ catman(req, file);
+ else
+ format(req, file);
+ }
+out:
+ if (idx)
+ (*idx->close)(idx);
+ manpath_free(&ps);
+}
+
+static void
+pg_search(const struct req *req, char *path)
+{
+ size_t tt, ressz;
+ struct manpaths ps;
+ int i, sz, rc;
+ const char *ep, *start;
+ struct res *res;
+ char **cp;
+ struct opts opt;
+ struct expr *expr;
+
+ if (req->q.manroot < 0 || 0 == req->psz) {
+ resp_search(NULL, 0, (void *)req);
+ return;
+ }
+
+ memset(&opt, 0, sizeof(struct opts));
+
+ ep = req->q.expr;
+ opt.arch = req->q.arch;
+ opt.cat = req->q.sec;
+ rc = -1;
+ sz = 0;
+ cp = NULL;
+ ressz = 0;
+ res = NULL;
+
+ /*
+ * Begin by chdir()ing into the root of the manpath.
+ * This way we can pick up the database files, which are
+ * relative to the manpath root.
+ */
+
+ assert(req->q.manroot < (int)req->psz);
+ if (-1 == (chdir(req->p[req->q.manroot].path))) {
+ perror(req->p[req->q.manroot].path);
+ resp_search(NULL, 0, (void *)req);
+ return;
+ }
+
+ memset(&ps, 0, sizeof(struct manpaths));
+ manpath_manconf(&ps, "etc/catman.conf");
+
+ /*
+ * Poor man's tokenisation: just break apart by spaces.
+ * Yes, this is half-ass. But it works for now.
+ */
+
+ while (ep && isspace((unsigned char)*ep))
+ ep++;
+
+ while (ep && '\0' != *ep) {
+ cp = mandoc_realloc(cp, (sz + 1) * sizeof(char *));
+ start = ep;
+ while ('\0' != *ep && ! isspace((unsigned char)*ep))
+ ep++;
+ cp[sz] = mandoc_malloc((ep - start) + 1);
+ memcpy(cp[sz], start, ep - start);
+ cp[sz++][ep - start] = '\0';
+ while (isspace((unsigned char)*ep))
+ ep++;
+ }
+
+ /*
+ * Pump down into apropos backend.
+ * The resp_search() function is called with the results.
+ */
+
+ expr = req->q.legacy ?
+ termcomp(sz, cp, &tt) : exprcomp(sz, cp, &tt);
+
+ if (NULL != expr)
+ rc = apropos_search
+ (ps.sz, ps.paths, &opt, expr, tt,
+ (void *)req, &ressz, &res, resp_search);
+
+ /* ...unless errors occured. */
+
+ if (0 == rc)
+ resp_baddb();
+ else if (-1 == rc)
+ resp_search(NULL, 0, NULL);
+
+ for (i = 0; i < sz; i++)
+ free(cp[i]);
+
+ free(cp);
+ resfree(res, ressz);
+ exprfree(expr);
+ manpath_free(&ps);
+}
+
+int
+main(void)
+{
+ int i;
+ char buf[MAXPATHLEN];
+ DIR *cwd;
+ struct req req;
+ char *p, *path, *subpath;
+
+ /* Scan our run-time environment. */
+
+ if (NULL == (cache = getenv("CACHE_DIR")))
+ cache = "/cache/man.cgi";
+
+ if (NULL == (progname = getenv("SCRIPT_NAME")))
+ progname = "";
+
+ if (NULL == (css = getenv("CSS_DIR")))
+ css = "";
+
+ if (NULL == (host = getenv("HTTP_HOST")))
+ host = "localhost";
+
+ /*
+ * First we change directory into the cache directory so that
+ * subsequent scanning for manpath directories is rooted
+ * relative to the same position.
+ */
+
+ if (-1 == chdir(cache)) {
+ perror(cache);
+ resp_bad();
+ return(EXIT_FAILURE);
+ } else if (NULL == (cwd = opendir(cache))) {
+ perror(cache);
+ resp_bad();
+ return(EXIT_FAILURE);
+ }
+
+ memset(&req, 0, sizeof(struct req));
+
+ strlcpy(buf, ".", MAXPATHLEN);
+ pathgen(cwd, buf, &req);
+ closedir(cwd);
+
+ /* Next parse out the query string. */
+
+ if (NULL != (p = getenv("QUERY_STRING")))
+ http_parse(&req, p);
+
+ /*
+ * Now juggle paths to extract information.
+ * We want to extract our filetype (the file suffix), the
+ * initial path component, then the trailing component(s).
+ * Start with leading subpath component.
+ */
+
+ subpath = path = NULL;
+ req.page = PAGE__MAX;
+
+ if (NULL == (path = getenv("PATH_INFO")) || '\0' == *path)
+ req.page = PAGE_INDEX;
+
+ if (NULL != path && '/' == *path && '\0' == *++path)
+ req.page = PAGE_INDEX;
+
+ /* Strip file suffix. */
+
+ if (NULL != path && NULL != (p = strrchr(path, '.')))
+ if (NULL != p && NULL == strchr(p, '/'))
+ *p++ = '\0';
+
+ /* Resolve subpath component. */
+
+ if (NULL != path && NULL != (subpath = strchr(path, '/')))
+ *subpath++ = '\0';
+
+ /* Map path into one we recognise. */
+
+ if (NULL != path && '\0' != *path)
+ for (i = 0; i < (int)PAGE__MAX; i++)
+ if (0 == strcmp(pages[i], path)) {
+ req.page = (enum page)i;
+ break;
+ }
+
+ /* Route pages. */
+
+ switch (req.page) {
+ case (PAGE_INDEX):
+ pg_index(&req, subpath);
+ break;
+ case (PAGE_SEARCH):
+ pg_search(&req, subpath);
+ break;
+ case (PAGE_SHOW):
+ pg_show(&req, subpath);
+ break;
+ default:
+ resp_error404(path);
+ break;
+ }
+
+ for (i = 0; i < (int)req.psz; i++) {
+ free(req.p[i].path);
+ free(req.p[i].name);
+ }
+
+ free(req.p);
+ return(EXIT_SUCCESS);
+}
+
+static int
+cmp(const void *p1, const void *p2)
+{
+
+ return(strcasecmp(((const struct res *)p1)->title,
+ ((const struct res *)p2)->title));
+}
+
+/*
+ * Check to see if an "etc" path consists of a catman.conf file. If it
+ * does, that means that the path contains a tree created by catman(8)
+ * and should be used for indexing.
+ */
+static int
+pathstop(DIR *dir)
+{
+ struct dirent *d;
+
+ while (NULL != (d = readdir(dir)))
+ if (DT_REG == d->d_type)
+ if (0 == strcmp(d->d_name, "catman.conf"))
+ return(1);
+
+ return(0);
+}
+
+/*
+ * Scan for indexable paths.
+ * This adds all paths with "etc/catman.conf" to the buffer.
+ */
+static void
+pathgen(DIR *dir, char *path, struct req *req)
+{
+ struct dirent *d;
+ char *cp;
+ DIR *cd;
+ int rc;
+ size_t sz, ssz;
+
+ sz = strlcat(path, "/", MAXPATHLEN);
+ if (sz >= MAXPATHLEN) {
+ fprintf(stderr, "%s: Path too long", path);
+ return;
+ }
+
+ /*
+ * First, scan for the "etc" directory.
+ * If it's found, then see if it should cause us to stop. This
+ * happens when a catman.conf is found in the directory.
+ */
+
+ rc = 0;
+ while (0 == rc && NULL != (d = readdir(dir))) {
+ if (DT_DIR != d->d_type || strcmp(d->d_name, "etc"))
+ continue;
+
+ path[(int)sz] = '\0';
+ ssz = strlcat(path, d->d_name, MAXPATHLEN);
+
+ if (ssz >= MAXPATHLEN) {
+ fprintf(stderr, "%s: Path too long", path);
+ return;
+ } else if (NULL == (cd = opendir(path))) {
+ perror(path);
+ return;
+ }
+
+ rc = pathstop(cd);
+ closedir(cd);
+ }
+
+ if (rc > 0) {
+ /* This also strips the trailing slash. */
+ path[(int)--sz] = '\0';
+ req->p = mandoc_realloc
+ (req->p,
+ (req->psz + 1) * sizeof(struct paths));
+ /*
+ * Strip out the leading "./" unless we're just a ".",
+ * in which case use an empty string as our name.
+ */
+ req->p[(int)req->psz].path = mandoc_strdup(path);
+ req->p[(int)req->psz].name =
+ cp = mandoc_strdup(path + (1 == sz ? 1 : 2));
+ req->psz++;
+ /*
+ * The name is just the path with all the slashes taken
+ * out of it. Simple but effective.
+ */
+ for ( ; '\0' != *cp; cp++)
+ if ('/' == *cp)
+ *cp = ' ';
+ return;
+ }
+
+ /*
+ * If no etc/catman.conf was found, recursively enter child
+ * directory and continue scanning.
+ */
+
+ rewinddir(dir);
+ while (NULL != (d = readdir(dir))) {
+ if (DT_DIR != d->d_type || '.' == d->d_name[0])
+ continue;
+
+ path[(int)sz] = '\0';
+ ssz = strlcat(path, d->d_name, MAXPATHLEN);
+
+ if (ssz >= MAXPATHLEN) {
+ fprintf(stderr, "%s: Path too long", path);
+ return;
+ } else if (NULL == (cd = opendir(path))) {
+ perror(path);
+ return;
+ }
+
+ pathgen(cd, path, req);
+ closedir(cd);
+ }
+}
diff --git a/chars.c b/chars.c
new file mode 100644
index 000000000000..ce03347b5d83
--- /dev/null
+++ b/chars.c
@@ -0,0 +1,167 @@
+/* $Id: chars.c,v 1.52 2011/11/08 00:15:23 kristaps Exp $ */
+/*
+ * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <assert.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "mandoc.h"
+#include "libmandoc.h"
+
+#define PRINT_HI 126
+#define PRINT_LO 32
+
+struct ln {
+ struct ln *next;
+ const char *code;
+ const char *ascii;
+ int unicode;
+};
+
+#define LINES_MAX 328
+
+#define CHAR(in, ch, code) \
+ { NULL, (in), (ch), (code) },
+
+#define CHAR_TBL_START static struct ln lines[LINES_MAX] = {
+#define CHAR_TBL_END };
+
+#include "chars.in"
+
+struct mchars {
+ struct ln **htab;
+};
+
+static const struct ln *find(const struct mchars *,
+ const char *, size_t);
+
+void
+mchars_free(struct mchars *arg)
+{
+
+ free(arg->htab);
+ free(arg);
+}
+
+struct mchars *
+mchars_alloc(void)
+{
+ struct mchars *tab;
+ struct ln **htab;
+ struct ln *pp;
+ int i, hash;
+
+ /*
+ * Constructs a very basic chaining hashtable. The hash routine
+ * is simply the integral value of the first character.
+ * Subsequent entries are chained in the order they're processed.
+ */
+
+ tab = mandoc_malloc(sizeof(struct mchars));
+ htab = mandoc_calloc(PRINT_HI - PRINT_LO + 1, sizeof(struct ln **));
+
+ for (i = 0; i < LINES_MAX; i++) {
+ hash = (int)lines[i].code[0] - PRINT_LO;
+
+ if (NULL == (pp = htab[hash])) {
+ htab[hash] = &lines[i];
+ continue;
+ }
+
+ for ( ; pp->next; pp = pp->next)
+ /* Scan ahead. */ ;
+ pp->next = &lines[i];
+ }
+
+ tab->htab = htab;
+ return(tab);
+}
+
+int
+mchars_spec2cp(const struct mchars *arg, const char *p, size_t sz)
+{
+ const struct ln *ln;
+
+ ln = find(arg, p, sz);
+ if (NULL == ln)
+ return(-1);
+ return(ln->unicode);
+}
+
+char
+mchars_num2char(const char *p, size_t sz)
+{
+ int i;
+
+ if ((i = mandoc_strntoi(p, sz, 10)) < 0)
+ return('\0');
+ return(i > 0 && i < 256 && isprint(i) ?
+ /* LINTED */ i : '\0');
+}
+
+int
+mchars_num2uc(const char *p, size_t sz)
+{
+ int i;
+
+ if ((i = mandoc_strntoi(p, sz, 16)) < 0)
+ return('\0');
+ /* FIXME: make sure we're not in a bogus range. */
+ return(i > 0x80 && i <= 0x10FFFF ? i : '\0');
+}
+
+const char *
+mchars_spec2str(const struct mchars *arg,
+ const char *p, size_t sz, size_t *rsz)
+{
+ const struct ln *ln;
+
+ ln = find(arg, p, sz);
+ if (NULL == ln) {
+ *rsz = 1;
+ return(NULL);
+ }
+
+ *rsz = strlen(ln->ascii);
+ return(ln->ascii);
+}
+
+static const struct ln *
+find(const struct mchars *tab, const char *p, size_t sz)
+{
+ const struct ln *pp;
+ int hash;
+
+ assert(p);
+
+ if (0 == sz || p[0] < PRINT_LO || p[0] > PRINT_HI)
+ return(NULL);
+
+ hash = (int)p[0] - PRINT_LO;
+
+ for (pp = tab->htab[hash]; pp; pp = pp->next)
+ if (0 == strncmp(pp->code, p, sz) &&
+ '\0' == pp->code[(int)sz])
+ return(pp);
+
+ return(NULL);
+}
diff --git a/chars.in b/chars.in
new file mode 100644
index 000000000000..a4c45b3c43eb
--- /dev/null
+++ b/chars.in
@@ -0,0 +1,397 @@
+/* $Id: chars.in,v 1.42 2011/10/02 10:02:26 kristaps Exp $ */
+/*
+ * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * The ASCII translation tables.
+ *
+ * The left-hand side corresponds to the input sequence (\x, \(xx, \*(xx
+ * and so on) whose length is listed second element. The right-hand
+ * side is what's produced by the front-end, with the fourth element
+ * being its length.
+ *
+ * XXX - C-escape strings!
+ * XXX - update LINES_MAX if adding more!
+ */
+
+/* Non-breaking, non-collapsing space uses unit separator. */
+static const char ascii_nbrsp[2] = { ASCII_NBRSP, '\0' };
+
+CHAR_TBL_START
+
+/* Spacing. */
+CHAR("c", "", 0)
+CHAR("0", " ", 8194)
+CHAR(" ", ascii_nbrsp, 160)
+CHAR("~", ascii_nbrsp, 160)
+CHAR("%", "", 0)
+CHAR("&", "", 0)
+CHAR("^", "", 0)
+CHAR("|", "", 0)
+CHAR("}", "", 0)
+
+/* Accents. */
+CHAR("a\"", "\"", 779)
+CHAR("a-", "-", 175)
+CHAR("a.", ".", 729)
+CHAR("a^", "^", 770)
+CHAR("\'", "\'", 769)
+CHAR("aa", "\'", 769)
+CHAR("ga", "`", 768)
+CHAR("`", "`", 768)
+CHAR("ab", "`", 774)
+CHAR("ac", ",", 807)
+CHAR("ad", "\"", 776)
+CHAR("ah", "v", 711)
+CHAR("ao", "o", 730)
+CHAR("a~", "~", 771)
+CHAR("ho", ",", 808)
+CHAR("ha", "^", 94)
+CHAR("ti", "~", 126)
+
+/* Quotes. */
+CHAR("Bq", ",,", 8222)
+CHAR("bq", ",", 8218)
+CHAR("lq", "``", 8220)
+CHAR("rq", "\'\'", 8221)
+CHAR("oq", "`", 8216)
+CHAR("cq", "\'", 8217)
+CHAR("aq", "\'", 39)
+CHAR("dq", "\"", 34)
+CHAR("Fo", "<<", 171)
+CHAR("Fc", ">>", 187)
+CHAR("fo", "<", 8249)
+CHAR("fc", ">", 8250)
+
+/* Brackets. */
+CHAR("lB", "[", 91)
+CHAR("rB", "]", 93)
+CHAR("lC", "{", 123)
+CHAR("rC", "}", 125)
+CHAR("la", "<", 60)
+CHAR("ra", ">", 62)
+CHAR("bv", "|", 9130)
+CHAR("braceex", "|", 9130)
+CHAR("bracketlefttp", "|", 9121)
+CHAR("bracketleftbp", "|", 9123)
+CHAR("bracketleftex", "|", 9122)
+CHAR("bracketrighttp", "|", 9124)
+CHAR("bracketrightbp", "|", 9126)
+CHAR("bracketrightex", "|", 9125)
+CHAR("lt", ",-", 9127)
+CHAR("bracelefttp", ",-", 9127)
+CHAR("lk", "{", 9128)
+CHAR("braceleftmid", "{", 9128)
+CHAR("lb", ",-", 9129)
+CHAR("braceleftbp", "`-", 9129)
+CHAR("braceleftex", "|", 9130)
+CHAR("rt", "-.", 9131)
+CHAR("bracerighttp", "-.", 9131)
+CHAR("rk", "}", 9132)
+CHAR("bracerightmid", "}", 9132)
+CHAR("rb", "-\'", 9133)
+CHAR("bracerightbp", "-\'", 9133)
+CHAR("bracerightex", "|", 9130)
+CHAR("parenlefttp", "/", 9115)
+CHAR("parenleftbp", "\\", 9117)
+CHAR("parenleftex", "|", 9116)
+CHAR("parenrighttp", "\\", 9118)
+CHAR("parenrightbp", "/", 9120)
+CHAR("parenrightex", "|", 9119)
+
+/* Greek characters. */
+CHAR("*A", "A", 913)
+CHAR("*B", "B", 914)
+CHAR("*G", "|", 915)
+CHAR("*D", "/\\", 916)
+CHAR("*E", "E", 917)
+CHAR("*Z", "Z", 918)
+CHAR("*Y", "H", 919)
+CHAR("*H", "O", 920)
+CHAR("*I", "I", 921)
+CHAR("*K", "K", 922)
+CHAR("*L", "/\\", 923)
+CHAR("*M", "M", 924)
+CHAR("*N", "N", 925)
+CHAR("*C", "H", 926)
+CHAR("*O", "O", 927)
+CHAR("*P", "TT", 928)
+CHAR("*R", "P", 929)
+CHAR("*S", ">", 931)
+CHAR("*T", "T", 932)
+CHAR("*U", "Y", 933)
+CHAR("*F", "O_", 934)
+CHAR("*X", "X", 935)
+CHAR("*Q", "Y", 936)
+CHAR("*W", "O", 937)
+CHAR("*a", "a", 945)
+CHAR("*b", "B", 946)
+CHAR("*g", "y", 947)
+CHAR("*d", "d", 948)
+CHAR("*e", "e", 949)
+CHAR("*z", "C", 950)
+CHAR("*y", "n", 951)
+CHAR("*h", "0", 952)
+CHAR("*i", "i", 953)
+CHAR("*k", "k", 954)
+CHAR("*l", "\\", 955)
+CHAR("*m", "u", 956)
+CHAR("*n", "v", 957)
+CHAR("*c", "E", 958)
+CHAR("*o", "o", 959)
+CHAR("*p", "n", 960)
+CHAR("*r", "p", 961)
+CHAR("*s", "o", 963)
+CHAR("*t", "t", 964)
+CHAR("*u", "u", 965)
+CHAR("*f", "o", 981)
+CHAR("*x", "x", 967)
+CHAR("*q", "u", 968)
+CHAR("*w", "w", 969)
+CHAR("+h", "0", 977)
+CHAR("+f", "o", 966)
+CHAR("+p", "w", 982)
+CHAR("+e", "e", 1013)
+CHAR("ts", "s", 962)
+
+/* Accented letters. */
+CHAR(",C", "C", 199)
+CHAR(",c", "c", 231)
+CHAR("/L", "L", 321)
+CHAR("/O", "O", 216)
+CHAR("/l", "l", 322)
+CHAR("/o", "o", 248)
+CHAR("oA", "A", 197)
+CHAR("oa", "a", 229)
+CHAR(":A", "A", 196)
+CHAR(":E", "E", 203)
+CHAR(":I", "I", 207)
+CHAR(":O", "O", 214)
+CHAR(":U", "U", 220)
+CHAR(":a", "a", 228)
+CHAR(":e", "e", 235)
+CHAR(":i", "i", 239)
+CHAR(":o", "o", 246)
+CHAR(":u", "u", 252)
+CHAR(":y", "y", 255)
+CHAR("\'A", "A", 193)
+CHAR("\'E", "E", 201)
+CHAR("\'I", "I", 205)
+CHAR("\'O", "O", 211)
+CHAR("\'U", "U", 218)
+CHAR("\'a", "a", 225)
+CHAR("\'e", "e", 233)
+CHAR("\'i", "i", 237)
+CHAR("\'o", "o", 243)
+CHAR("\'u", "u", 250)
+CHAR("^A", "A", 194)
+CHAR("^E", "E", 202)
+CHAR("^I", "I", 206)
+CHAR("^O", "O", 212)
+CHAR("^U", "U", 219)
+CHAR("^a", "a", 226)
+CHAR("^e", "e", 234)
+CHAR("^i", "i", 238)
+CHAR("^o", "o", 244)
+CHAR("^u", "u", 251)
+CHAR("`A", "A", 192)
+CHAR("`E", "E", 200)
+CHAR("`I", "I", 204)
+CHAR("`O", "O", 210)
+CHAR("`U", "U", 217)
+CHAR("`a", "a", 224)
+CHAR("`e", "e", 232)
+CHAR("`i", "i", 236)
+CHAR("`o", "o", 242)
+CHAR("`u", "u", 249)
+CHAR("~A", "A", 195)
+CHAR("~N", "N", 209)
+CHAR("~O", "O", 213)
+CHAR("~a", "a", 227)
+CHAR("~n", "n", 241)
+CHAR("~o", "o", 245)
+
+/* Arrows and lines. */
+CHAR("<-", "<-", 8592)
+CHAR("->", "->", 8594)
+CHAR("<>", "<>", 8596)
+CHAR("da", "v", 8595)
+CHAR("ua", "^", 8593)
+CHAR("va", "^v", 8597)
+CHAR("lA", "<=", 8656)
+CHAR("rA", "=>", 8658)
+CHAR("hA", "<=>", 8660)
+CHAR("dA", "v", 8659)
+CHAR("uA", "^", 8657)
+CHAR("vA", "^=v", 8661)
+
+/* Logic. */
+CHAR("AN", "^", 8743)
+CHAR("OR", "v", 8744)
+CHAR("no", "~", 172)
+CHAR("tno", "~", 172)
+CHAR("te", "3", 8707)
+CHAR("fa", "V", 8704)
+CHAR("st", "-)", 8715)
+CHAR("tf", ".:.", 8756)
+CHAR("3d", ".:.", 8756)
+CHAR("or", "|", 124)
+
+/* Mathematicals. */
+CHAR("pl", "+", 43)
+CHAR("mi", "-", 8722)
+CHAR("-", "-", 45)
+CHAR("-+", "-+", 8723)
+CHAR("+-", "+-", 177)
+CHAR("t+-", "+-", 177)
+CHAR("pc", ".", 183)
+CHAR("md", ".", 8901)
+CHAR("mu", "x", 215)
+CHAR("tmu", "x", 215)
+CHAR("c*", "x", 8855)
+CHAR("c+", "+", 8853)
+CHAR("di", "-:-", 247)
+CHAR("tdi", "-:-", 247)
+CHAR("f/", "/", 8260)
+CHAR("**", "*", 8727)
+CHAR("<=", "<=", 8804)
+CHAR(">=", ">=", 8805)
+CHAR("<<", "<<", 8810)
+CHAR(">>", ">>", 8811)
+CHAR("eq", "=", 61)
+CHAR("!=", "!=", 8800)
+CHAR("==", "==", 8801)
+CHAR("ne", "!==", 8802)
+CHAR("=~", "=~", 8773)
+CHAR("-~", "-~", 8771)
+CHAR("ap", "~", 8764)
+CHAR("~~", "~~", 8776)
+CHAR("~=", "~=", 8780)
+CHAR("pt", "oc", 8733)
+CHAR("es", "{}", 8709)
+CHAR("mo", "E", 8712)
+CHAR("nm", "!E", 8713)
+CHAR("sb", "(=", 8834)
+CHAR("nb", "(!=", 8836)
+CHAR("sp", "=)", 8835)
+CHAR("nc", "!=)", 8837)
+CHAR("ib", "(=", 8838)
+CHAR("ip", "=)", 8839)
+CHAR("ca", "(^)", 8745)
+CHAR("cu", "U", 8746)
+CHAR("/_", "/_", 8736)
+CHAR("pp", "_|_", 8869)
+CHAR("is", "I", 8747)
+CHAR("integral", "I", 8747)
+CHAR("sum", "E", 8721)
+CHAR("product", "TT", 8719)
+CHAR("coproduct", "U", 8720)
+CHAR("gr", "V", 8711)
+CHAR("sr", "\\/", 8730)
+CHAR("sqrt", "\\/", 8730)
+CHAR("lc", "|~", 8968)
+CHAR("rc", "~|", 8969)
+CHAR("lf", "|_", 8970)
+CHAR("rf", "_|", 8971)
+CHAR("if", "oo", 8734)
+CHAR("Ah", "N", 8501)
+CHAR("Im", "I", 8465)
+CHAR("Re", "R", 8476)
+CHAR("pd", "a", 8706)
+CHAR("-h", "/h", 8463)
+CHAR("12", "1/2", 189)
+CHAR("14", "1/4", 188)
+CHAR("34", "3/4", 190)
+
+/* Ligatures. */
+CHAR("ff", "ff", 64256)
+CHAR("fi", "fi", 64257)
+CHAR("fl", "fl", 64258)
+CHAR("Fi", "ffi", 64259)
+CHAR("Fl", "ffl", 64260)
+CHAR("AE", "AE", 198)
+CHAR("ae", "ae", 230)
+CHAR("OE", "OE", 338)
+CHAR("oe", "oe", 339)
+CHAR("ss", "ss", 223)
+CHAR("IJ", "IJ", 306)
+CHAR("ij", "ij", 307)
+
+/* Special letters. */
+CHAR("-D", "D", 208)
+CHAR("Sd", "o", 240)
+CHAR("TP", "b", 222)
+CHAR("Tp", "b", 254)
+CHAR(".i", "i", 305)
+CHAR(".j", "j", 567)
+
+/* Currency. */
+CHAR("Do", "$", 36)
+CHAR("ct", "c", 162)
+CHAR("Eu", "EUR", 8364)
+CHAR("eu", "EUR", 8364)
+CHAR("Ye", "Y", 165)
+CHAR("Po", "L", 163)
+CHAR("Cs", "x", 164)
+CHAR("Fn", "f", 402)
+
+/* Lines. */
+CHAR("ba", "|", 124)
+CHAR("br", "|", 9474)
+CHAR("ul", "_", 95)
+CHAR("rl", "-", 8254)
+CHAR("bb", "|", 166)
+CHAR("sl", "/", 47)
+CHAR("rs", "\\", 92)
+
+/* Text markers. */
+CHAR("ci", "o", 9675)
+CHAR("bu", "o", 8226)
+CHAR("dd", "=", 8225)
+CHAR("dg", "-", 8224)
+CHAR("lz", "<>", 9674)
+CHAR("sq", "[]", 9633)
+CHAR("ps", "9|", 182)
+CHAR("sc", "S", 167)
+CHAR("lh", "<=", 9756)
+CHAR("rh", "=>", 9758)
+CHAR("at", "@", 64)
+CHAR("sh", "#", 35)
+CHAR("CR", "_|", 8629)
+CHAR("OK", "\\/", 10003)
+
+/* Legal symbols. */
+CHAR("co", "(C)", 169)
+CHAR("rg", "(R)", 174)
+CHAR("tm", "tm", 8482)
+
+/* Punctuation. */
+CHAR(".", ".", 46)
+CHAR("r!", "i", 161)
+CHAR("r?", "c", 191)
+CHAR("em", "--", 8212)
+CHAR("en", "-", 8211)
+CHAR("hy", "-", 8208)
+CHAR("e", "\\", 92)
+
+/* Units. */
+CHAR("de", "o", 176)
+CHAR("%0", "%o", 8240)
+CHAR("fm", "\'", 8242)
+CHAR("sd", "\"", 8243)
+CHAR("mc", "mu", 181)
+
+CHAR_TBL_END
diff --git a/compat_fgetln.c b/compat_fgetln.c
new file mode 100644
index 000000000000..49c9985b6e00
--- /dev/null
+++ b/compat_fgetln.c
@@ -0,0 +1,93 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef HAVE_FGETLN
+
+int dummy;
+
+#else
+
+/* $NetBSD: fgetln.c,v 1.3 2006/09/25 07:18:17 lukem Exp $ */
+
+/*-
+ * Copyright (c) 1998 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Christos Zoulas.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of The NetBSD Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+char *
+fgetln(fp, len)
+ FILE *fp;
+ size_t *len;
+{
+ static char *buf = NULL;
+ static size_t bufsiz = 0;
+ char *ptr;
+
+
+ if (buf == NULL) {
+ bufsiz = BUFSIZ;
+ if ((buf = malloc(bufsiz)) == NULL)
+ return NULL;
+ }
+
+ if (fgets(buf, bufsiz, fp) == NULL)
+ return NULL;
+
+ *len = 0;
+ while ((ptr = strchr(&buf[*len], '\n')) == NULL) {
+ size_t nbufsiz = bufsiz + BUFSIZ;
+ char *nbuf = realloc(buf, nbufsiz);
+
+ if (nbuf == NULL) {
+ int oerrno = errno;
+ free(buf);
+ errno = oerrno;
+ buf = NULL;
+ return NULL;
+ } else
+ buf = nbuf;
+
+ *len = bufsiz;
+ if (fgets(&buf[bufsiz], BUFSIZ, fp) == NULL)
+ return buf;
+
+ bufsiz = nbufsiz;
+ }
+
+ *len = (ptr - buf) + 1;
+ return buf;
+}
+
+#endif
diff --git a/compat_getsubopt.c b/compat_getsubopt.c
new file mode 100644
index 000000000000..9cd415367dad
--- /dev/null
+++ b/compat_getsubopt.c
@@ -0,0 +1,104 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef HAVE_GETSUBOPT
+
+int dummy;
+
+#else
+
+/* $OpenBSD: getsubopt.c,v 1.4 2005/08/08 08:05:36 espie Exp $ */
+
+/*-
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+
+/*
+ * The SVID interface to getsubopt provides no way of figuring out which
+ * part of the suboptions list wasn't matched. This makes error messages
+ * tricky... The extern variable suboptarg is a pointer to the token
+ * which didn't match.
+ */
+char *suboptarg;
+
+int
+getsubopt(char **optionp, char * const *tokens, char **valuep)
+{
+ int cnt;
+ char *p;
+
+ suboptarg = *valuep = NULL;
+
+ if (!optionp || !*optionp)
+ return(-1);
+
+ /* skip leading white-space, commas */
+ for (p = *optionp; *p && (*p == ',' || *p == ' ' || *p == '\t'); ++p);
+
+ if (!*p) {
+ *optionp = p;
+ return(-1);
+ }
+
+ /* save the start of the token, and skip the rest of the token. */
+ for (suboptarg = p;
+ *++p && *p != ',' && *p != '=' && *p != ' ' && *p != '\t';);
+
+ if (*p) {
+ /*
+ * If there's an equals sign, set the value pointer, and
+ * skip over the value part of the token. Terminate the
+ * token.
+ */
+ if (*p == '=') {
+ *p = '\0';
+ for (*valuep = ++p;
+ *p && *p != ',' && *p != ' ' && *p != '\t'; ++p);
+ if (*p)
+ *p++ = '\0';
+ } else
+ *p++ = '\0';
+ /* Skip any whitespace or commas after this token. */
+ for (; *p && (*p == ',' || *p == ' ' || *p == '\t'); ++p);
+ }
+
+ /* set optionp for next round. */
+ *optionp = p;
+
+ for (cnt = 0; *tokens; ++tokens, ++cnt)
+ if (!strcmp(suboptarg, *tokens))
+ return(cnt);
+ return(-1);
+}
+
+#endif
diff --git a/compat_strlcat.c b/compat_strlcat.c
new file mode 100644
index 000000000000..543d40b38b05
--- /dev/null
+++ b/compat_strlcat.c
@@ -0,0 +1,67 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef HAVE_STRLCAT
+
+int dummy;
+
+#else
+
+/* $OpenBSD: strlcat.c,v 1.13 2005/08/08 08:05:37 espie Exp $ */
+
+/*
+ * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+#include <string.h>
+
+/*
+ * Appends src to string dst of size siz (unlike strncat, siz is the
+ * full size of dst, not space left). At most siz-1 characters
+ * will be copied. Always NUL terminates (unless siz <= strlen(dst)).
+ * Returns strlen(src) + MIN(siz, strlen(initial dst)).
+ * If retval >= siz, truncation occurred.
+ */
+size_t
+strlcat(char *dst, const char *src, size_t siz)
+{
+ char *d = dst;
+ const char *s = src;
+ size_t n = siz;
+ size_t dlen;
+
+ /* Find the end of dst and adjust bytes left but don't go past end */
+ while (n-- != 0 && *d != '\0')
+ d++;
+ dlen = d - dst;
+ n = siz - dlen;
+
+ if (n == 0)
+ return(dlen + strlen(s));
+ while (*s != '\0') {
+ if (n != 1) {
+ *d++ = *s;
+ n--;
+ }
+ s++;
+ }
+ *d = '\0';
+
+ return(dlen + (s - src)); /* count does not include NUL */
+}
+
+#endif
diff --git a/compat_strlcpy.c b/compat_strlcpy.c
new file mode 100644
index 000000000000..a7c64ff9997c
--- /dev/null
+++ b/compat_strlcpy.c
@@ -0,0 +1,63 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef HAVE_STRLCPY
+
+int dummy;
+
+#else
+
+/* $OpenBSD: strlcpy.c,v 1.11 2006/05/05 15:27:38 millert Exp $ */
+
+/*
+ * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+#include <string.h>
+
+/*
+ * Copy src to string dst of size siz. At most siz-1 characters
+ * will be copied. Always NUL terminates (unless siz == 0).
+ * Returns strlen(src); if retval >= siz, truncation occurred.
+ */
+size_t
+strlcpy(char *dst, const char *src, size_t siz)
+{
+ char *d = dst;
+ const char *s = src;
+ size_t n = siz;
+
+ /* Copy as many bytes as will fit */
+ if (n != 0) {
+ while (--n != 0) {
+ if ((*d++ = *s++) == '\0')
+ break;
+ }
+ }
+
+ /* Not enough room in dst, add NUL and traverse rest of src */
+ if (n == 0) {
+ if (siz != 0)
+ *d = '\0'; /* NUL-terminate dst */
+ while (*s++)
+ ;
+ }
+
+ return(s - src - 1); /* count does not include NUL */
+}
+
+#endif
diff --git a/config.h.post b/config.h.post
new file mode 100644
index 000000000000..39da2b2f0699
--- /dev/null
+++ b/config.h.post
@@ -0,0 +1,42 @@
+#include <sys/types.h>
+
+#if !defined(__BEGIN_DECLS)
+# ifdef __cplusplus
+# define __BEGIN_DECLS extern "C" {
+# else
+# define __BEGIN_DECLS
+# endif
+#endif
+#if !defined(__END_DECLS)
+# ifdef __cplusplus
+# define __END_DECLS }
+# else
+# define __END_DECLS
+# endif
+#endif
+
+#if defined(__APPLE__)
+# define htobe32(x) OSSwapHostToBigInt32(x)
+# define betoh32(x) OSSwapBigToHostInt32(x)
+# define htobe64(x) OSSwapHostToBigInt64(x)
+# define betoh64(x) OSSwapBigToHostInt64(x)
+#elif defined(__linux__)
+# define betoh32(x) be32toh(x)
+# define betoh64(x) be64toh(x)
+#endif
+
+#ifndef HAVE_STRLCAT
+extern size_t strlcat(char *, const char *, size_t);
+#endif
+#ifndef HAVE_STRLCPY
+extern size_t strlcpy(char *, const char *, size_t);
+#endif
+#ifndef HAVE_GETSUBOPT
+extern int getsubopt(char **, char * const *, char **);
+extern char *suboptarg;
+#endif
+#ifndef HAVE_FGETLN
+extern char *fgetln(FILE *, size_t *);
+#endif
+
+#endif /* MANDOC_CONFIG_H */
diff --git a/config.h.pre b/config.h.pre
new file mode 100644
index 000000000000..bc594784856c
--- /dev/null
+++ b/config.h.pre
@@ -0,0 +1,8 @@
+#ifndef MANDOC_CONFIG_H
+#define MANDOC_CONFIG_H
+
+#if defined(__linux__) || defined(__MINT__)
+# define _GNU_SOURCE /* strptime(), getsubopt() */
+#endif
+
+#include <stdio.h>
diff --git a/demandoc.1 b/demandoc.1
new file mode 100644
index 000000000000..845b9c14b55b
--- /dev/null
+++ b/demandoc.1
@@ -0,0 +1,109 @@
+.\" $Id: demandoc.1,v 1.6 2011/12/25 19:35:44 kristaps Exp $
+.\"
+.\" Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+.\"
+.\" Permission to use, copy, modify, and distribute this software for any
+.\" purpose with or without fee is hereby granted, provided that the above
+.\" copyright notice and this permission notice appear in all copies.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+.\"
+.Dd $Mdocdate: December 25 2011 $
+.Dt DEMANDOC 1
+.Os
+.Sh NAME
+.Nm demandoc
+.Nd emit only text of UNIX manuals
+.Sh SYNOPSIS
+.Nm demandoc
+.Op Fl w
+.Op Ar
+.Sh DESCRIPTION
+The
+.Nm
+utility emits only the text portions of well-formed
+.Xr mdoc 7
+and
+.Xr man 7
+.Ux
+manual files.
+.Pp
+By default,
+.Nm
+parses standard input and outputs only text nodes, preserving line
+and column position.
+Escape sequences are omitted from the output.
+.Pp
+Its arguments are as follows:
+.Bl -tag -width Ds
+.It Fl w
+Output a word list.
+This outputs each word of text on its own line.
+A
+.Qq word ,
+in this case, refers to whitespace-delimited terms beginning with at
+least two letters and not consisting of any escape sequences.
+Words have their leading and trailing punctuation
+.Pq double-quotes, sentence punctuation, etc.
+stripped.
+.It Ar
+The input files.
+.El
+.Pp
+If a document is not well-formed, it is skipped.
+.Pp
+The
+.Fl i ,
+.Fl k ,
+.Fl m ,
+and
+.Fl p
+flags are silently discarded for calling compatibility with the
+historical deroff.
+.Sh EXIT STATUS
+The
+.Nm
+utility exits with one of the following values:
+.Pp
+.Bl -tag -width Ds -compact
+.It 0
+No errors occurred.
+.It 6
+An operating system error occurred, for example memory exhaustion or an
+error accessing input files.
+Such errors cause
+.Nm
+to exit at once, possibly in the middle of parsing or formatting a file.
+The output databases are corrupt and should be removed .
+.El
+.Sh EXAMPLES
+The traditional usage of
+.Nm
+is for spell-checking manuals on
+.Bx .
+This is accomplished as follows (assuming British spelling):
+.Pp
+.Dl $ demandoc -w file.1 | spell -b
+.Sh SEE ALSO
+.Xr mandoc 1 ,
+.Xr man 7
+.Xr mdoc 7
+.Sh HISTORY
+.Nm
+replaces the historical deroff utility for handling modern
+.Xr man 7
+and
+.Xr mdoc 7
+documents.
+.Sh AUTHORS
+The
+.Nm
+utility was written by
+.An Kristaps Dzonsons ,
+.Mt kristaps@bsd.lv .
diff --git a/demandoc.c b/demandoc.c
new file mode 100644
index 000000000000..2474a358e233
--- /dev/null
+++ b/demandoc.c
@@ -0,0 +1,257 @@
+/* $Id: demandoc.c,v 1.6 2011/09/01 22:25:53 kristaps Exp $ */
+/*
+ * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <assert.h>
+#include <ctype.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "man.h"
+#include "mdoc.h"
+#include "mandoc.h"
+
+static void pline(int, int *, int *, int);
+static void pman(const struct man_node *, int *, int *, int);
+static void pmandoc(struct mparse *, int, const char *, int);
+static void pmdoc(const struct mdoc_node *, int *, int *, int);
+static void pstring(const char *, int, int *, int);
+static void usage(void);
+
+static const char *progname;
+
+int
+main(int argc, char *argv[])
+{
+ struct mparse *mp;
+ int ch, i, list;
+ extern int optind;
+
+ progname = strrchr(argv[0], '/');
+ if (progname == NULL)
+ progname = argv[0];
+ else
+ ++progname;
+
+ mp = NULL;
+ list = 0;
+
+ while (-1 != (ch = getopt(argc, argv, "ikm:pw")))
+ switch (ch) {
+ case ('i'):
+ /* FALLTHROUGH */
+ case ('k'):
+ /* FALLTHROUGH */
+ case ('m'):
+ /* FALLTHROUGH */
+ case ('p'):
+ break;
+ case ('w'):
+ list = 1;
+ break;
+ default:
+ usage();
+ return((int)MANDOCLEVEL_BADARG);
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL);
+ assert(mp);
+
+ if (0 == argc)
+ pmandoc(mp, STDIN_FILENO, "<stdin>", list);
+
+ for (i = 0; i < argc; i++) {
+ mparse_reset(mp);
+ pmandoc(mp, -1, argv[i], list);
+ }
+
+ mparse_free(mp);
+ return((int)MANDOCLEVEL_OK);
+}
+
+static void
+usage(void)
+{
+
+ fprintf(stderr, "usage: %s [-w] [files...]\n", progname);
+}
+
+static void
+pmandoc(struct mparse *mp, int fd, const char *fn, int list)
+{
+ struct mdoc *mdoc;
+ struct man *man;
+ int line, col;
+
+ if (mparse_readfd(mp, fd, fn) >= MANDOCLEVEL_FATAL) {
+ fprintf(stderr, "%s: Parse failure\n", fn);
+ return;
+ }
+
+ mparse_result(mp, &mdoc, &man);
+ line = 1;
+ col = 0;
+
+ if (mdoc)
+ pmdoc(mdoc_node(mdoc), &line, &col, list);
+ else if (man)
+ pman(man_node(man), &line, &col, list);
+ else
+ return;
+
+ if ( ! list)
+ putchar('\n');
+}
+
+/*
+ * Strip the escapes out of a string, emitting the results.
+ */
+static void
+pstring(const char *p, int col, int *colp, int list)
+{
+ enum mandoc_esc esc;
+ const char *start, *end;
+ int emit;
+
+ /*
+ * Print as many column spaces til we achieve parity with the
+ * input document.
+ */
+
+again:
+ if (list && '\0' != *p) {
+ while (isspace((unsigned char)*p))
+ p++;
+
+ while ('\'' == *p || '(' == *p || '"' == *p)
+ p++;
+
+ emit = isalpha((unsigned char)p[0]) &&
+ isalpha((unsigned char)p[1]);
+
+ for (start = p; '\0' != *p; p++)
+ if ('\\' == *p) {
+ p++;
+ esc = mandoc_escape(&p, NULL, NULL);
+ if (ESCAPE_ERROR == esc)
+ return;
+ emit = 0;
+ } else if (isspace((unsigned char)*p))
+ break;
+
+ end = p - 1;
+
+ while (end > start)
+ if ('.' == *end || ',' == *end ||
+ '\'' == *end || '"' == *end ||
+ ')' == *end || '!' == *end ||
+ '?' == *end || ':' == *end ||
+ ';' == *end)
+ end--;
+ else
+ break;
+
+ if (emit && end - start >= 1) {
+ for ( ; start <= end; start++)
+ if (ASCII_HYPH == *start)
+ putchar('-');
+ else
+ putchar((unsigned char)*start);
+ putchar('\n');
+ }
+
+ if (isspace((unsigned char)*p))
+ goto again;
+
+ return;
+ }
+
+ while (*colp < col) {
+ putchar(' ');
+ (*colp)++;
+ }
+
+ /*
+ * Print the input word, skipping any special characters.
+ */
+ while ('\0' != *p)
+ if ('\\' == *p) {
+ p++;
+ esc = mandoc_escape(&p, NULL, NULL);
+ if (ESCAPE_ERROR == esc)
+ break;
+ } else {
+ putchar((unsigned char )*p++);
+ (*colp)++;
+ }
+}
+
+static void
+pline(int line, int *linep, int *col, int list)
+{
+
+ if (list)
+ return;
+
+ /*
+ * Print out as many lines as needed to reach parity with the
+ * original input.
+ */
+
+ while (*linep < line) {
+ putchar('\n');
+ (*linep)++;
+ }
+
+ *col = 0;
+}
+
+static void
+pmdoc(const struct mdoc_node *p, int *line, int *col, int list)
+{
+
+ for ( ; p; p = p->next) {
+ if (MDOC_LINE & p->flags)
+ pline(p->line, line, col, list);
+ if (MDOC_TEXT == p->type)
+ pstring(p->string, p->pos, col, list);
+ if (p->child)
+ pmdoc(p->child, line, col, list);
+ }
+}
+
+static void
+pman(const struct man_node *p, int *line, int *col, int list)
+{
+
+ for ( ; p; p = p->next) {
+ if (MAN_LINE & p->flags)
+ pline(p->line, line, col, list);
+ if (MAN_TEXT == p->type)
+ pstring(p->string, p->pos, col, list);
+ if (p->child)
+ pman(p->child, line, col, list);
+ }
+}
diff --git a/eqn.7 b/eqn.7
new file mode 100644
index 000000000000..f86b9c496bb0
--- /dev/null
+++ b/eqn.7
@@ -0,0 +1,280 @@
+.\" $Id: eqn.7,v 1.28 2011/09/25 18:37:09 schwarze Exp $
+.\"
+.\" Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+.\"
+.\" Permission to use, copy, modify, and distribute this software for any
+.\" purpose with or without fee is hereby granted, provided that the above
+.\" copyright notice and this permission notice appear in all copies.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+.\"
+.Dd $Mdocdate: September 25 2011 $
+.Dt EQN 7
+.Os
+.Sh NAME
+.Nm eqn
+.Nd eqn language reference for mandoc
+.Sh DESCRIPTION
+The
+.Nm eqn
+language is an equation-formatting language.
+It is used within
+.Xr mdoc 7
+and
+.Xr man 7
+.Ux
+manual pages.
+It describes the
+.Em structure
+of an equation, not its mathematical meaning.
+This manual describes the
+.Nm
+language accepted by the
+.Xr mandoc 1
+utility, which corresponds to the Second Edition eqn specification (see
+.Sx SEE ALSO
+for references).
+.Pp
+Equations within
+.Xr mdoc 7
+or
+.Xr man 7
+documents are enclosed by the standalone
+.Sq \&.EQ
+and
+.Sq \&.EN
+tags.
+Equations are multi-line blocks consisting of formulas and control
+statements.
+.Sh EQUATION STRUCTURE
+Each equation is bracketed by
+.Sq \&.EQ
+and
+.Sq \&.EN
+strings.
+.Em Note :
+these are not the same as
+.Xr roff 7
+macros, and may only be invoked as
+.Sq \&.EQ .
+.Pp
+The equation grammar is as follows, where quoted strings are
+case-sensitive literals in the input:
+.Bd -literal -offset indent
+eqn : box | eqn box
+box : text
+ | \*q{\*q eqn \*q}\*q
+ | \*qdefine\*q text text
+ | \*qndefine\*q text text
+ | \*qtdefine\*q text text
+ | \*qgfont\*q text
+ | \*qgsize\*q text
+ | \*qset\*q text text
+ | \*qundef\*q text
+ | box pos box
+ | box mark
+ | \*qmatrix\*q \*q{\*q [col \*q{\*q list \*q}\*q ]*
+ | pile \*q{\*q list \*q}\*q
+ | font box
+ | \*qsize\*q text box
+ | \*qleft\*q text eqn [\*qright\*q text]
+col : \*qlcol\*q | \*qrcol\*q | \*qccol\*q | \*qcol\*q
+text : [^space\e\*q]+ | \e\*q.*\e\*q
+pile : \*qlpile\*q | \*qcpile\*q | \*qrpile\*q | \*qpile\*q
+pos : \*qover\*q | \*qsup\*q | \*qsub\*q | \*qto\*q | \*qfrom\*q
+mark : \*qdot\*q | \*qdotdot\*q | \*qhat\*q | \*qtilde\*q | \*qvec\*q
+ | \*qdyad\*q | \*qbar\*q | \*qunder\*q
+font : \*qroman\*q | \*qitalic\*q | \*qbold\*q | \*qfat\*q
+list : eqn
+ | list \*qabove\*q eqn
+space : [\e^~ \et]
+.Ed
+.Pp
+White-space consists of the space, tab, circumflex, and tilde
+characters.
+If within a quoted string, these space characters are retained.
+Quoted strings are also not scanned for replacement definitions.
+.Pp
+The following text terms are translated into a rendered glyph, if
+available: alpha, beta, chi, delta, epsilon, eta, gamma, iota, kappa,
+lambda, mu, nu, omega, omicron, phi, pi, psi, rho, sigma, tau, theta,
+upsilon, xi, zeta, DELTA, GAMMA, LAMBDA, OMEGA, PHI, PI, PSI, SIGMA,
+THETA, UPSILON, XI, inter (intersection), union (union), prod (product),
+int (integral), sum (summation), grad (gradient), del (vector
+differential), times (multiply), cdot (centre-dot), nothing (zero-width
+space), approx (approximately equals), prime (prime), half (one-half),
+partial (partial differential), inf (infinity), >> (much greater), <<
+(much less), \-> (left arrow), <\- (right arrow), += (plus-minus), !=
+(not equal), == (equivalence), <= (less-than-equal), and >=
+(more-than-equal).
+.Pp
+The following control statements are available:
+.Bl -tag -width Ds
+.It Cm define
+Replace all occurrences of a key with a value.
+Its syntax is as follows:
+.Pp
+.D1 define Ar key cvalc
+.Pp
+The first character of the value string,
+.Ar c ,
+is used as the delimiter for the value
+.Ar val .
+This allows for arbitrary enclosure of terms (not just quotes), such as
+.Pp
+.D1 define Ar foo 'bar baz'
+.D1 define Ar foo cbar bazc
+.Pp
+It is an error to have an empty
+.Ar key
+or
+.Ar val .
+Note that a quoted
+.Ar key
+causes errors in some
+.Nm
+implementations and should not be considered portable.
+It is not expanded for replacements.
+Definitions may refer to other definitions; these are evaluated
+recursively when text replacement occurs and not when the definition is
+created.
+.Pp
+Definitions can create arbitrary strings, for example, the following is
+a legal construction.
+.Bd -literal -offset indent
+define foo 'define'
+foo bar 'baz'
+.Ed
+.Pp
+Self-referencing definitions will raise an error.
+The
+.Cm ndefine
+statement is a synonym for
+.Cm define ,
+while
+.Cm tdefine
+is discarded.
+.It Cm gfont
+Set the default font of subsequent output.
+Its syntax is as follows:
+.Pp
+.D1 gfont Ar font
+.Pp
+In mandoc, this value is discarded.
+.It Cm gsize
+Set the default size of subsequent output.
+Its syntax is as follows:
+.Pp
+.D1 gsize Ar size
+.Pp
+The
+.Ar size
+value should be an integer.
+.It Cm set
+Set an equation mode.
+In mandoc, both arguments are thrown away.
+Its syntax is as follows:
+.Pp
+.D1 set Ar key val
+.Pp
+The
+.Ar key
+and
+.Ar val
+are not expanded for replacements.
+This statement is a GNU extension.
+.It Cm undef
+Unset a previously-defined key.
+Its syntax is as follows:
+.Pp
+.D1 define Ar key
+.Pp
+Once invoked, the definition for
+.Ar key
+is discarded.
+The
+.Ar key
+is not expanded for replacements.
+This statement is a GNU extension.
+.El
+.Sh COMPATIBILITY
+This section documents the compatibility of mandoc
+.Nm
+and the troff
+.Nm
+implementation (including GNU troff).
+.Pp
+.Bl -dash -compact
+.It
+The text string
+.Sq \e\*q
+is interpreted as a literal quote in troff.
+In mandoc, this is interpreted as a comment.
+.It
+In troff, The circumflex and tilde white-space symbols map to
+fixed-width spaces.
+In mandoc, these characters are synonyms for the space character.
+.It
+The troff implementation of
+.Nm
+allows for equation alignment with the
+.Cm mark
+and
+.Cm lineup
+tokens.
+mandoc discards these tokens.
+The
+.Cm back Ar n ,
+.Cm fwd Ar n ,
+.Cm up Ar n ,
+and
+.Cm down Ar n
+commands are also ignored.
+.El
+.Sh SEE ALSO
+.Xr mandoc 1 ,
+.Xr man 7 ,
+.Xr mandoc_char 7 ,
+.Xr mdoc 7 ,
+.Xr roff 7
+.Rs
+.%A Brian W. Kernighan
+.%A Lorinda L. Cherry
+.%T System for Typesetting Mathematics
+.%J Communications of the ACM
+.%V 18
+.%P 151\(en157
+.%D March, 1975
+.Re
+.Rs
+.%A Brian W. Kernighan
+.%A Lorinda L. Cherry
+.%T Typesetting Mathematics, User's Guide
+.%D 1976
+.Re
+.Rs
+.%A Brian W. Kernighan
+.%A Lorinda L. Cherry
+.%T Typesetting Mathematics, User's Guide (Second Edition)
+.%D 1978
+.Re
+.Sh HISTORY
+The eqn utility, a preprocessor for troff, was originally written by
+Brian W. Kernighan and Lorinda L. Cherry in 1975.
+The GNU reimplementation of eqn, part of the GNU troff package, was
+released in 1989 by James Clark.
+The eqn component of
+.Xr mandoc 1
+was added in 2011.
+.Sh AUTHORS
+This
+.Nm
+reference was written by
+.An Kristaps Dzonsons ,
+.Mt kristaps@bsd.lv .
diff --git a/eqn.c b/eqn.c
new file mode 100644
index 000000000000..37f01bcb5b6e
--- /dev/null
+++ b/eqn.c
@@ -0,0 +1,949 @@
+/* $Id: eqn.c,v 1.38 2011/07/25 15:37:00 kristaps Exp $ */
+/*
+ * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <assert.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include "mandoc.h"
+#include "libmandoc.h"
+#include "libroff.h"
+
+#define EQN_NEST_MAX 128 /* maximum nesting of defines */
+#define EQN_MSG(t, x) mandoc_msg((t), (x)->parse, (x)->eqn.ln, (x)->eqn.pos, NULL)
+
+enum eqn_rest {
+ EQN_DESCOPE,
+ EQN_ERR,
+ EQN_OK,
+ EQN_EOF
+};
+
+enum eqn_symt {
+ EQNSYM_alpha,
+ EQNSYM_beta,
+ EQNSYM_chi,
+ EQNSYM_delta,
+ EQNSYM_epsilon,
+ EQNSYM_eta,
+ EQNSYM_gamma,
+ EQNSYM_iota,
+ EQNSYM_kappa,
+ EQNSYM_lambda,
+ EQNSYM_mu,
+ EQNSYM_nu,
+ EQNSYM_omega,
+ EQNSYM_omicron,
+ EQNSYM_phi,
+ EQNSYM_pi,
+ EQNSYM_ps,
+ EQNSYM_rho,
+ EQNSYM_sigma,
+ EQNSYM_tau,
+ EQNSYM_theta,
+ EQNSYM_upsilon,
+ EQNSYM_xi,
+ EQNSYM_zeta,
+ EQNSYM_DELTA,
+ EQNSYM_GAMMA,
+ EQNSYM_LAMBDA,
+ EQNSYM_OMEGA,
+ EQNSYM_PHI,
+ EQNSYM_PI,
+ EQNSYM_PSI,
+ EQNSYM_SIGMA,
+ EQNSYM_THETA,
+ EQNSYM_UPSILON,
+ EQNSYM_XI,
+ EQNSYM_inter,
+ EQNSYM_union,
+ EQNSYM_prod,
+ EQNSYM_int,
+ EQNSYM_sum,
+ EQNSYM_grad,
+ EQNSYM_del,
+ EQNSYM_times,
+ EQNSYM_cdot,
+ EQNSYM_nothing,
+ EQNSYM_approx,
+ EQNSYM_prime,
+ EQNSYM_half,
+ EQNSYM_partial,
+ EQNSYM_inf,
+ EQNSYM_muchgreat,
+ EQNSYM_muchless,
+ EQNSYM_larrow,
+ EQNSYM_rarrow,
+ EQNSYM_pm,
+ EQNSYM_nequal,
+ EQNSYM_equiv,
+ EQNSYM_lessequal,
+ EQNSYM_moreequal,
+ EQNSYM__MAX
+};
+
+enum eqnpartt {
+ EQN_DEFINE = 0,
+ EQN_NDEFINE,
+ EQN_TDEFINE,
+ EQN_SET,
+ EQN_UNDEF,
+ EQN_GFONT,
+ EQN_GSIZE,
+ EQN_BACK,
+ EQN_FWD,
+ EQN_UP,
+ EQN_DOWN,
+ EQN__MAX
+};
+
+struct eqnstr {
+ const char *name;
+ size_t sz;
+};
+
+#define STRNEQ(p1, sz1, p2, sz2) \
+ ((sz1) == (sz2) && 0 == strncmp((p1), (p2), (sz1)))
+#define EQNSTREQ(x, p, sz) \
+ STRNEQ((x)->name, (x)->sz, (p), (sz))
+
+struct eqnpart {
+ struct eqnstr str;
+ int (*fp)(struct eqn_node *);
+};
+
+struct eqnsym {
+ struct eqnstr str;
+ const char *sym;
+};
+
+
+static enum eqn_rest eqn_box(struct eqn_node *, struct eqn_box *);
+static struct eqn_box *eqn_box_alloc(struct eqn_node *,
+ struct eqn_box *);
+static void eqn_box_free(struct eqn_box *);
+static struct eqn_def *eqn_def_find(struct eqn_node *,
+ const char *, size_t);
+static int eqn_do_gfont(struct eqn_node *);
+static int eqn_do_gsize(struct eqn_node *);
+static int eqn_do_define(struct eqn_node *);
+static int eqn_do_ign1(struct eqn_node *);
+static int eqn_do_ign2(struct eqn_node *);
+static int eqn_do_tdefine(struct eqn_node *);
+static int eqn_do_undef(struct eqn_node *);
+static enum eqn_rest eqn_eqn(struct eqn_node *, struct eqn_box *);
+static enum eqn_rest eqn_list(struct eqn_node *, struct eqn_box *);
+static enum eqn_rest eqn_matrix(struct eqn_node *, struct eqn_box *);
+static const char *eqn_nexttok(struct eqn_node *, size_t *);
+static const char *eqn_nextrawtok(struct eqn_node *, size_t *);
+static const char *eqn_next(struct eqn_node *,
+ char, size_t *, int);
+static void eqn_rewind(struct eqn_node *);
+
+static const struct eqnpart eqnparts[EQN__MAX] = {
+ { { "define", 6 }, eqn_do_define }, /* EQN_DEFINE */
+ { { "ndefine", 7 }, eqn_do_define }, /* EQN_NDEFINE */
+ { { "tdefine", 7 }, eqn_do_tdefine }, /* EQN_TDEFINE */
+ { { "set", 3 }, eqn_do_ign2 }, /* EQN_SET */
+ { { "undef", 5 }, eqn_do_undef }, /* EQN_UNDEF */
+ { { "gfont", 5 }, eqn_do_gfont }, /* EQN_GFONT */
+ { { "gsize", 5 }, eqn_do_gsize }, /* EQN_GSIZE */
+ { { "back", 4 }, eqn_do_ign1 }, /* EQN_BACK */
+ { { "fwd", 3 }, eqn_do_ign1 }, /* EQN_FWD */
+ { { "up", 2 }, eqn_do_ign1 }, /* EQN_UP */
+ { { "down", 4 }, eqn_do_ign1 }, /* EQN_DOWN */
+};
+
+static const struct eqnstr eqnmarks[EQNMARK__MAX] = {
+ { "", 0 }, /* EQNMARK_NONE */
+ { "dot", 3 }, /* EQNMARK_DOT */
+ { "dotdot", 6 }, /* EQNMARK_DOTDOT */
+ { "hat", 3 }, /* EQNMARK_HAT */
+ { "tilde", 5 }, /* EQNMARK_TILDE */
+ { "vec", 3 }, /* EQNMARK_VEC */
+ { "dyad", 4 }, /* EQNMARK_DYAD */
+ { "bar", 3 }, /* EQNMARK_BAR */
+ { "under", 5 }, /* EQNMARK_UNDER */
+};
+
+static const struct eqnstr eqnfonts[EQNFONT__MAX] = {
+ { "", 0 }, /* EQNFONT_NONE */
+ { "roman", 5 }, /* EQNFONT_ROMAN */
+ { "bold", 4 }, /* EQNFONT_BOLD */
+ { "fat", 3 }, /* EQNFONT_FAT */
+ { "italic", 6 }, /* EQNFONT_ITALIC */
+};
+
+static const struct eqnstr eqnposs[EQNPOS__MAX] = {
+ { "", 0 }, /* EQNPOS_NONE */
+ { "over", 4 }, /* EQNPOS_OVER */
+ { "sup", 3 }, /* EQNPOS_SUP */
+ { "sub", 3 }, /* EQNPOS_SUB */
+ { "to", 2 }, /* EQNPOS_TO */
+ { "from", 4 }, /* EQNPOS_FROM */
+};
+
+static const struct eqnstr eqnpiles[EQNPILE__MAX] = {
+ { "", 0 }, /* EQNPILE_NONE */
+ { "pile", 4 }, /* EQNPILE_PILE */
+ { "cpile", 5 }, /* EQNPILE_CPILE */
+ { "rpile", 5 }, /* EQNPILE_RPILE */
+ { "lpile", 5 }, /* EQNPILE_LPILE */
+ { "col", 3 }, /* EQNPILE_COL */
+ { "ccol", 4 }, /* EQNPILE_CCOL */
+ { "rcol", 4 }, /* EQNPILE_RCOL */
+ { "lcol", 4 }, /* EQNPILE_LCOL */
+};
+
+static const struct eqnsym eqnsyms[EQNSYM__MAX] = {
+ { { "alpha", 5 }, "*a" }, /* EQNSYM_alpha */
+ { { "beta", 4 }, "*b" }, /* EQNSYM_beta */
+ { { "chi", 3 }, "*x" }, /* EQNSYM_chi */
+ { { "delta", 5 }, "*d" }, /* EQNSYM_delta */
+ { { "epsilon", 7 }, "*e" }, /* EQNSYM_epsilon */
+ { { "eta", 3 }, "*y" }, /* EQNSYM_eta */
+ { { "gamma", 5 }, "*g" }, /* EQNSYM_gamma */
+ { { "iota", 4 }, "*i" }, /* EQNSYM_iota */
+ { { "kappa", 5 }, "*k" }, /* EQNSYM_kappa */
+ { { "lambda", 6 }, "*l" }, /* EQNSYM_lambda */
+ { { "mu", 2 }, "*m" }, /* EQNSYM_mu */
+ { { "nu", 2 }, "*n" }, /* EQNSYM_nu */
+ { { "omega", 5 }, "*w" }, /* EQNSYM_omega */
+ { { "omicron", 7 }, "*o" }, /* EQNSYM_omicron */
+ { { "phi", 3 }, "*f" }, /* EQNSYM_phi */
+ { { "pi", 2 }, "*p" }, /* EQNSYM_pi */
+ { { "psi", 2 }, "*q" }, /* EQNSYM_psi */
+ { { "rho", 3 }, "*r" }, /* EQNSYM_rho */
+ { { "sigma", 5 }, "*s" }, /* EQNSYM_sigma */
+ { { "tau", 3 }, "*t" }, /* EQNSYM_tau */
+ { { "theta", 5 }, "*h" }, /* EQNSYM_theta */
+ { { "upsilon", 7 }, "*u" }, /* EQNSYM_upsilon */
+ { { "xi", 2 }, "*c" }, /* EQNSYM_xi */
+ { { "zeta", 4 }, "*z" }, /* EQNSYM_zeta */
+ { { "DELTA", 5 }, "*D" }, /* EQNSYM_DELTA */
+ { { "GAMMA", 5 }, "*G" }, /* EQNSYM_GAMMA */
+ { { "LAMBDA", 6 }, "*L" }, /* EQNSYM_LAMBDA */
+ { { "OMEGA", 5 }, "*W" }, /* EQNSYM_OMEGA */
+ { { "PHI", 3 }, "*F" }, /* EQNSYM_PHI */
+ { { "PI", 2 }, "*P" }, /* EQNSYM_PI */
+ { { "PSI", 3 }, "*Q" }, /* EQNSYM_PSI */
+ { { "SIGMA", 5 }, "*S" }, /* EQNSYM_SIGMA */
+ { { "THETA", 5 }, "*H" }, /* EQNSYM_THETA */
+ { { "UPSILON", 7 }, "*U" }, /* EQNSYM_UPSILON */
+ { { "XI", 2 }, "*C" }, /* EQNSYM_XI */
+ { { "inter", 5 }, "ca" }, /* EQNSYM_inter */
+ { { "union", 5 }, "cu" }, /* EQNSYM_union */
+ { { "prod", 4 }, "product" }, /* EQNSYM_prod */
+ { { "int", 3 }, "integral" }, /* EQNSYM_int */
+ { { "sum", 3 }, "sum" }, /* EQNSYM_sum */
+ { { "grad", 4 }, "gr" }, /* EQNSYM_grad */
+ { { "del", 3 }, "gr" }, /* EQNSYM_del */
+ { { "times", 5 }, "mu" }, /* EQNSYM_times */
+ { { "cdot", 4 }, "pc" }, /* EQNSYM_cdot */
+ { { "nothing", 7 }, "&" }, /* EQNSYM_nothing */
+ { { "approx", 6 }, "~~" }, /* EQNSYM_approx */
+ { { "prime", 5 }, "aq" }, /* EQNSYM_prime */
+ { { "half", 4 }, "12" }, /* EQNSYM_half */
+ { { "partial", 7 }, "pd" }, /* EQNSYM_partial */
+ { { "inf", 3 }, "if" }, /* EQNSYM_inf */
+ { { ">>", 2 }, ">>" }, /* EQNSYM_muchgreat */
+ { { "<<", 2 }, "<<" }, /* EQNSYM_muchless */
+ { { "<-", 2 }, "<-" }, /* EQNSYM_larrow */
+ { { "->", 2 }, "->" }, /* EQNSYM_rarrow */
+ { { "+-", 2 }, "+-" }, /* EQNSYM_pm */
+ { { "!=", 2 }, "!=" }, /* EQNSYM_nequal */
+ { { "==", 2 }, "==" }, /* EQNSYM_equiv */
+ { { "<=", 2 }, "<=" }, /* EQNSYM_lessequal */
+ { { ">=", 2 }, ">=" }, /* EQNSYM_moreequal */
+};
+
+/* ARGSUSED */
+enum rofferr
+eqn_read(struct eqn_node **epp, int ln,
+ const char *p, int pos, int *offs)
+{
+ size_t sz;
+ struct eqn_node *ep;
+ enum rofferr er;
+
+ ep = *epp;
+
+ /*
+ * If we're the terminating mark, unset our equation status and
+ * validate the full equation.
+ */
+
+ if (0 == strncmp(p, ".EN", 3)) {
+ er = eqn_end(epp);
+ p += 3;
+ while (' ' == *p || '\t' == *p)
+ p++;
+ if ('\0' == *p)
+ return(er);
+ mandoc_msg(MANDOCERR_ARGSLOST, ep->parse, ln, pos, NULL);
+ return(er);
+ }
+
+ /*
+ * Build up the full string, replacing all newlines with regular
+ * whitespace.
+ */
+
+ sz = strlen(p + pos) + 1;
+ ep->data = mandoc_realloc(ep->data, ep->sz + sz + 1);
+
+ /* First invocation: nil terminate the string. */
+
+ if (0 == ep->sz)
+ *ep->data = '\0';
+
+ ep->sz += sz;
+ strlcat(ep->data, p + pos, ep->sz + 1);
+ strlcat(ep->data, " ", ep->sz + 1);
+ return(ROFF_IGN);
+}
+
+struct eqn_node *
+eqn_alloc(const char *name, int pos, int line, struct mparse *parse)
+{
+ struct eqn_node *p;
+ size_t sz;
+ const char *end;
+
+ p = mandoc_calloc(1, sizeof(struct eqn_node));
+
+ if (name && '\0' != *name) {
+ sz = strlen(name);
+ assert(sz);
+ do {
+ sz--;
+ end = name + (int)sz;
+ } while (' ' == *end || '\t' == *end);
+ p->eqn.name = mandoc_strndup(name, sz + 1);
+ }
+
+ p->parse = parse;
+ p->eqn.ln = line;
+ p->eqn.pos = pos;
+ p->gsize = EQN_DEFSIZE;
+
+ return(p);
+}
+
+enum rofferr
+eqn_end(struct eqn_node **epp)
+{
+ struct eqn_node *ep;
+ struct eqn_box *root;
+ enum eqn_rest c;
+
+ ep = *epp;
+ *epp = NULL;
+
+ ep->eqn.root = mandoc_calloc(1, sizeof(struct eqn_box));
+
+ root = ep->eqn.root;
+ root->type = EQN_ROOT;
+
+ if (0 == ep->sz)
+ return(ROFF_IGN);
+
+ if (EQN_DESCOPE == (c = eqn_eqn(ep, root))) {
+ EQN_MSG(MANDOCERR_EQNNSCOPE, ep);
+ c = EQN_ERR;
+ }
+
+ return(EQN_EOF == c ? ROFF_EQN : ROFF_IGN);
+}
+
+static enum eqn_rest
+eqn_eqn(struct eqn_node *ep, struct eqn_box *last)
+{
+ struct eqn_box *bp;
+ enum eqn_rest c;
+
+ bp = eqn_box_alloc(ep, last);
+ bp->type = EQN_SUBEXPR;
+
+ while (EQN_OK == (c = eqn_box(ep, bp)))
+ /* Spin! */ ;
+
+ return(c);
+}
+
+static enum eqn_rest
+eqn_matrix(struct eqn_node *ep, struct eqn_box *last)
+{
+ struct eqn_box *bp;
+ const char *start;
+ size_t sz;
+ enum eqn_rest c;
+
+ bp = eqn_box_alloc(ep, last);
+ bp->type = EQN_MATRIX;
+
+ if (NULL == (start = eqn_nexttok(ep, &sz))) {
+ EQN_MSG(MANDOCERR_EQNEOF, ep);
+ return(EQN_ERR);
+ }
+ if ( ! STRNEQ(start, sz, "{", 1)) {
+ EQN_MSG(MANDOCERR_EQNSYNT, ep);
+ return(EQN_ERR);
+ }
+
+ while (EQN_OK == (c = eqn_box(ep, bp)))
+ switch (bp->last->pile) {
+ case (EQNPILE_LCOL):
+ /* FALLTHROUGH */
+ case (EQNPILE_CCOL):
+ /* FALLTHROUGH */
+ case (EQNPILE_RCOL):
+ continue;
+ default:
+ EQN_MSG(MANDOCERR_EQNSYNT, ep);
+ return(EQN_ERR);
+ };
+
+ if (EQN_DESCOPE != c) {
+ if (EQN_EOF == c)
+ EQN_MSG(MANDOCERR_EQNEOF, ep);
+ return(EQN_ERR);
+ }
+
+ eqn_rewind(ep);
+ start = eqn_nexttok(ep, &sz);
+ assert(start);
+ if (STRNEQ(start, sz, "}", 1))
+ return(EQN_OK);
+
+ EQN_MSG(MANDOCERR_EQNBADSCOPE, ep);
+ return(EQN_ERR);
+}
+
+static enum eqn_rest
+eqn_list(struct eqn_node *ep, struct eqn_box *last)
+{
+ struct eqn_box *bp;
+ const char *start;
+ size_t sz;
+ enum eqn_rest c;
+
+ bp = eqn_box_alloc(ep, last);
+ bp->type = EQN_LIST;
+
+ if (NULL == (start = eqn_nexttok(ep, &sz))) {
+ EQN_MSG(MANDOCERR_EQNEOF, ep);
+ return(EQN_ERR);
+ }
+ if ( ! STRNEQ(start, sz, "{", 1)) {
+ EQN_MSG(MANDOCERR_EQNSYNT, ep);
+ return(EQN_ERR);
+ }
+
+ while (EQN_DESCOPE == (c = eqn_eqn(ep, bp))) {
+ eqn_rewind(ep);
+ start = eqn_nexttok(ep, &sz);
+ assert(start);
+ if ( ! STRNEQ(start, sz, "above", 5))
+ break;
+ }
+
+ if (EQN_DESCOPE != c) {
+ if (EQN_ERR != c)
+ EQN_MSG(MANDOCERR_EQNSCOPE, ep);
+ return(EQN_ERR);
+ }
+
+ eqn_rewind(ep);
+ start = eqn_nexttok(ep, &sz);
+ assert(start);
+ if (STRNEQ(start, sz, "}", 1))
+ return(EQN_OK);
+
+ EQN_MSG(MANDOCERR_EQNBADSCOPE, ep);
+ return(EQN_ERR);
+}
+
+static enum eqn_rest
+eqn_box(struct eqn_node *ep, struct eqn_box *last)
+{
+ size_t sz;
+ const char *start;
+ char *left;
+ char sym[64];
+ enum eqn_rest c;
+ int i, size;
+ struct eqn_box *bp;
+
+ if (NULL == (start = eqn_nexttok(ep, &sz)))
+ return(EQN_EOF);
+
+ if (STRNEQ(start, sz, "}", 1))
+ return(EQN_DESCOPE);
+ else if (STRNEQ(start, sz, "right", 5))
+ return(EQN_DESCOPE);
+ else if (STRNEQ(start, sz, "above", 5))
+ return(EQN_DESCOPE);
+ else if (STRNEQ(start, sz, "mark", 4))
+ return(EQN_OK);
+ else if (STRNEQ(start, sz, "lineup", 6))
+ return(EQN_OK);
+
+ for (i = 0; i < (int)EQN__MAX; i++) {
+ if ( ! EQNSTREQ(&eqnparts[i].str, start, sz))
+ continue;
+ return((*eqnparts[i].fp)(ep) ?
+ EQN_OK : EQN_ERR);
+ }
+
+ if (STRNEQ(start, sz, "{", 1)) {
+ if (EQN_DESCOPE != (c = eqn_eqn(ep, last))) {
+ if (EQN_ERR != c)
+ EQN_MSG(MANDOCERR_EQNSCOPE, ep);
+ return(EQN_ERR);
+ }
+ eqn_rewind(ep);
+ start = eqn_nexttok(ep, &sz);
+ assert(start);
+ if (STRNEQ(start, sz, "}", 1))
+ return(EQN_OK);
+ EQN_MSG(MANDOCERR_EQNBADSCOPE, ep);
+ return(EQN_ERR);
+ }
+
+ for (i = 0; i < (int)EQNPILE__MAX; i++) {
+ if ( ! EQNSTREQ(&eqnpiles[i], start, sz))
+ continue;
+ if (EQN_OK == (c = eqn_list(ep, last)))
+ last->last->pile = (enum eqn_pilet)i;
+ return(c);
+ }
+
+ if (STRNEQ(start, sz, "matrix", 6))
+ return(eqn_matrix(ep, last));
+
+ if (STRNEQ(start, sz, "left", 4)) {
+ if (NULL == (start = eqn_nexttok(ep, &sz))) {
+ EQN_MSG(MANDOCERR_EQNEOF, ep);
+ return(EQN_ERR);
+ }
+ left = mandoc_strndup(start, sz);
+ c = eqn_eqn(ep, last);
+ if (last->last)
+ last->last->left = left;
+ else
+ free(left);
+ if (EQN_DESCOPE != c)
+ return(c);
+ assert(last->last);
+ eqn_rewind(ep);
+ start = eqn_nexttok(ep, &sz);
+ assert(start);
+ if ( ! STRNEQ(start, sz, "right", 5))
+ return(EQN_DESCOPE);
+ if (NULL == (start = eqn_nexttok(ep, &sz))) {
+ EQN_MSG(MANDOCERR_EQNEOF, ep);
+ return(EQN_ERR);
+ }
+ last->last->right = mandoc_strndup(start, sz);
+ return(EQN_OK);
+ }
+
+ for (i = 0; i < (int)EQNPOS__MAX; i++) {
+ if ( ! EQNSTREQ(&eqnposs[i], start, sz))
+ continue;
+ if (NULL == last->last) {
+ EQN_MSG(MANDOCERR_EQNSYNT, ep);
+ return(EQN_ERR);
+ }
+ last->last->pos = (enum eqn_post)i;
+ if (EQN_EOF == (c = eqn_box(ep, last))) {
+ EQN_MSG(MANDOCERR_EQNEOF, ep);
+ return(EQN_ERR);
+ }
+ return(c);
+ }
+
+ for (i = 0; i < (int)EQNMARK__MAX; i++) {
+ if ( ! EQNSTREQ(&eqnmarks[i], start, sz))
+ continue;
+ if (NULL == last->last) {
+ EQN_MSG(MANDOCERR_EQNSYNT, ep);
+ return(EQN_ERR);
+ }
+ last->last->mark = (enum eqn_markt)i;
+ if (EQN_EOF == (c = eqn_box(ep, last))) {
+ EQN_MSG(MANDOCERR_EQNEOF, ep);
+ return(EQN_ERR);
+ }
+ return(c);
+ }
+
+ for (i = 0; i < (int)EQNFONT__MAX; i++) {
+ if ( ! EQNSTREQ(&eqnfonts[i], start, sz))
+ continue;
+ if (EQN_EOF == (c = eqn_box(ep, last))) {
+ EQN_MSG(MANDOCERR_EQNEOF, ep);
+ return(EQN_ERR);
+ } else if (EQN_OK == c)
+ last->last->font = (enum eqn_fontt)i;
+ return(c);
+ }
+
+ if (STRNEQ(start, sz, "size", 4)) {
+ if (NULL == (start = eqn_nexttok(ep, &sz))) {
+ EQN_MSG(MANDOCERR_EQNEOF, ep);
+ return(EQN_ERR);
+ }
+ size = mandoc_strntoi(start, sz, 10);
+ if (EQN_EOF == (c = eqn_box(ep, last))) {
+ EQN_MSG(MANDOCERR_EQNEOF, ep);
+ return(EQN_ERR);
+ } else if (EQN_OK != c)
+ return(c);
+ last->last->size = size;
+ }
+
+ bp = eqn_box_alloc(ep, last);
+ bp->type = EQN_TEXT;
+ for (i = 0; i < (int)EQNSYM__MAX; i++)
+ if (EQNSTREQ(&eqnsyms[i].str, start, sz)) {
+ sym[63] = '\0';
+ snprintf(sym, 62, "\\[%s]", eqnsyms[i].sym);
+ bp->text = mandoc_strdup(sym);
+ return(EQN_OK);
+ }
+
+ bp->text = mandoc_strndup(start, sz);
+ return(EQN_OK);
+}
+
+void
+eqn_free(struct eqn_node *p)
+{
+ int i;
+
+ eqn_box_free(p->eqn.root);
+
+ for (i = 0; i < (int)p->defsz; i++) {
+ free(p->defs[i].key);
+ free(p->defs[i].val);
+ }
+
+ free(p->eqn.name);
+ free(p->data);
+ free(p->defs);
+ free(p);
+}
+
+static struct eqn_box *
+eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent)
+{
+ struct eqn_box *bp;
+
+ bp = mandoc_calloc(1, sizeof(struct eqn_box));
+ bp->parent = parent;
+ bp->size = ep->gsize;
+
+ if (NULL == parent->first)
+ parent->first = bp;
+ else
+ parent->last->next = bp;
+
+ parent->last = bp;
+ return(bp);
+}
+
+static void
+eqn_box_free(struct eqn_box *bp)
+{
+
+ if (bp->first)
+ eqn_box_free(bp->first);
+ if (bp->next)
+ eqn_box_free(bp->next);
+
+ free(bp->text);
+ free(bp->left);
+ free(bp->right);
+ free(bp);
+}
+
+static const char *
+eqn_nextrawtok(struct eqn_node *ep, size_t *sz)
+{
+
+ return(eqn_next(ep, '"', sz, 0));
+}
+
+static const char *
+eqn_nexttok(struct eqn_node *ep, size_t *sz)
+{
+
+ return(eqn_next(ep, '"', sz, 1));
+}
+
+static void
+eqn_rewind(struct eqn_node *ep)
+{
+
+ ep->cur = ep->rew;
+}
+
+static const char *
+eqn_next(struct eqn_node *ep, char quote, size_t *sz, int repl)
+{
+ char *start, *next;
+ int q, diff, lim;
+ size_t ssz, dummy;
+ struct eqn_def *def;
+
+ if (NULL == sz)
+ sz = &dummy;
+
+ lim = 0;
+ ep->rew = ep->cur;
+again:
+ /* Prevent self-definitions. */
+
+ if (lim >= EQN_NEST_MAX) {
+ EQN_MSG(MANDOCERR_ROFFLOOP, ep);
+ return(NULL);
+ }
+
+ ep->cur = ep->rew;
+ start = &ep->data[(int)ep->cur];
+ q = 0;
+
+ if ('\0' == *start)
+ return(NULL);
+
+ if (quote == *start) {
+ ep->cur++;
+ q = 1;
+ }
+
+ start = &ep->data[(int)ep->cur];
+
+ if ( ! q) {
+ if ('{' == *start || '}' == *start)
+ ssz = 1;
+ else
+ ssz = strcspn(start + 1, " ^~\"{}\t") + 1;
+ next = start + (int)ssz;
+ if ('\0' == *next)
+ next = NULL;
+ } else
+ next = strchr(start, quote);
+
+ if (NULL != next) {
+ *sz = (size_t)(next - start);
+ ep->cur += *sz;
+ if (q)
+ ep->cur++;
+ while (' ' == ep->data[(int)ep->cur] ||
+ '\t' == ep->data[(int)ep->cur] ||
+ '^' == ep->data[(int)ep->cur] ||
+ '~' == ep->data[(int)ep->cur])
+ ep->cur++;
+ } else {
+ if (q)
+ EQN_MSG(MANDOCERR_BADQUOTE, ep);
+ next = strchr(start, '\0');
+ *sz = (size_t)(next - start);
+ ep->cur += *sz;
+ }
+
+ /* Quotes aren't expanded for values. */
+
+ if (q || ! repl)
+ return(start);
+
+ if (NULL != (def = eqn_def_find(ep, start, *sz))) {
+ diff = def->valsz - *sz;
+
+ if (def->valsz > *sz) {
+ ep->sz += diff;
+ ep->data = mandoc_realloc(ep->data, ep->sz + 1);
+ ep->data[ep->sz] = '\0';
+ start = &ep->data[(int)ep->rew];
+ }
+
+ diff = def->valsz - *sz;
+ memmove(start + *sz + diff, start + *sz,
+ (strlen(start) - *sz) + 1);
+ memcpy(start, def->val, def->valsz);
+ goto again;
+ }
+
+ return(start);
+}
+
+static int
+eqn_do_ign1(struct eqn_node *ep)
+{
+
+ if (NULL == eqn_nextrawtok(ep, NULL))
+ EQN_MSG(MANDOCERR_EQNEOF, ep);
+ else
+ return(1);
+
+ return(0);
+}
+
+static int
+eqn_do_ign2(struct eqn_node *ep)
+{
+
+ if (NULL == eqn_nextrawtok(ep, NULL))
+ EQN_MSG(MANDOCERR_EQNEOF, ep);
+ else if (NULL == eqn_nextrawtok(ep, NULL))
+ EQN_MSG(MANDOCERR_EQNEOF, ep);
+ else
+ return(1);
+
+ return(0);
+}
+
+static int
+eqn_do_tdefine(struct eqn_node *ep)
+{
+
+ if (NULL == eqn_nextrawtok(ep, NULL))
+ EQN_MSG(MANDOCERR_EQNEOF, ep);
+ else if (NULL == eqn_next(ep, ep->data[(int)ep->cur], NULL, 0))
+ EQN_MSG(MANDOCERR_EQNEOF, ep);
+ else
+ return(1);
+
+ return(0);
+}
+
+static int
+eqn_do_define(struct eqn_node *ep)
+{
+ const char *start;
+ size_t sz;
+ struct eqn_def *def;
+ int i;
+
+ if (NULL == (start = eqn_nextrawtok(ep, &sz))) {
+ EQN_MSG(MANDOCERR_EQNEOF, ep);
+ return(0);
+ }
+
+ /*
+ * Search for a key that already exists.
+ * Create a new key if none is found.
+ */
+
+ if (NULL == (def = eqn_def_find(ep, start, sz))) {
+ /* Find holes in string array. */
+ for (i = 0; i < (int)ep->defsz; i++)
+ if (0 == ep->defs[i].keysz)
+ break;
+
+ if (i == (int)ep->defsz) {
+ ep->defsz++;
+ ep->defs = mandoc_realloc
+ (ep->defs, ep->defsz *
+ sizeof(struct eqn_def));
+ ep->defs[i].key = ep->defs[i].val = NULL;
+ }
+
+ ep->defs[i].keysz = sz;
+ ep->defs[i].key = mandoc_realloc
+ (ep->defs[i].key, sz + 1);
+
+ memcpy(ep->defs[i].key, start, sz);
+ ep->defs[i].key[(int)sz] = '\0';
+ def = &ep->defs[i];
+ }
+
+ start = eqn_next(ep, ep->data[(int)ep->cur], &sz, 0);
+
+ if (NULL == start) {
+ EQN_MSG(MANDOCERR_EQNEOF, ep);
+ return(0);
+ }
+
+ def->valsz = sz;
+ def->val = mandoc_realloc(def->val, sz + 1);
+ memcpy(def->val, start, sz);
+ def->val[(int)sz] = '\0';
+ return(1);
+}
+
+static int
+eqn_do_gfont(struct eqn_node *ep)
+{
+
+ if (NULL == eqn_nextrawtok(ep, NULL)) {
+ EQN_MSG(MANDOCERR_EQNEOF, ep);
+ return(0);
+ }
+ return(1);
+}
+
+static int
+eqn_do_gsize(struct eqn_node *ep)
+{
+ const char *start;
+ size_t sz;
+
+ if (NULL == (start = eqn_nextrawtok(ep, &sz))) {
+ EQN_MSG(MANDOCERR_EQNEOF, ep);
+ return(0);
+ }
+ ep->gsize = mandoc_strntoi(start, sz, 10);
+ return(1);
+}
+
+static int
+eqn_do_undef(struct eqn_node *ep)
+{
+ const char *start;
+ struct eqn_def *def;
+ size_t sz;
+
+ if (NULL == (start = eqn_nextrawtok(ep, &sz))) {
+ EQN_MSG(MANDOCERR_EQNEOF, ep);
+ return(0);
+ } else if (NULL != (def = eqn_def_find(ep, start, sz)))
+ def->keysz = 0;
+
+ return(1);
+}
+
+static struct eqn_def *
+eqn_def_find(struct eqn_node *ep, const char *key, size_t sz)
+{
+ int i;
+
+ for (i = 0; i < (int)ep->defsz; i++)
+ if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key,
+ ep->defs[i].keysz, key, sz))
+ return(&ep->defs[i]);
+
+ return(NULL);
+}
diff --git a/eqn_html.c b/eqn_html.c
new file mode 100644
index 000000000000..80c82f1de5b5
--- /dev/null
+++ b/eqn_html.c
@@ -0,0 +1,81 @@
+/* $Id: eqn_html.c,v 1.2 2011/07/24 10:09:03 kristaps Exp $ */
+/*
+ * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "mandoc.h"
+#include "out.h"
+#include "html.h"
+
+static const enum htmltag fontmap[EQNFONT__MAX] = {
+ TAG_SPAN, /* EQNFONT_NONE */
+ TAG_SPAN, /* EQNFONT_ROMAN */
+ TAG_B, /* EQNFONT_BOLD */
+ TAG_B, /* EQNFONT_FAT */
+ TAG_I /* EQNFONT_ITALIC */
+};
+
+
+static void eqn_box(struct html *, const struct eqn_box *);
+
+void
+print_eqn(struct html *p, const struct eqn *ep)
+{
+ struct htmlpair tag;
+ struct tag *t;
+
+ PAIR_CLASS_INIT(&tag, "eqn");
+ t = print_otag(p, TAG_SPAN, 1, &tag);
+
+ p->flags |= HTML_NONOSPACE;
+ eqn_box(p, ep->root);
+ p->flags &= ~HTML_NONOSPACE;
+
+ print_tagq(p, t);
+}
+
+static void
+eqn_box(struct html *p, const struct eqn_box *bp)
+{
+ struct tag *t;
+
+ t = EQNFONT_NONE == bp->font ? NULL :
+ print_otag(p, fontmap[(int)bp->font], 0, NULL);
+
+ if (bp->left)
+ print_text(p, bp->left);
+
+ if (bp->text)
+ print_text(p, bp->text);
+
+ if (bp->first)
+ eqn_box(p, bp->first);
+
+ if (NULL != t)
+ print_tagq(p, t);
+ if (bp->right)
+ print_text(p, bp->right);
+
+ if (bp->next)
+ eqn_box(p, bp->next);
+}
diff --git a/eqn_term.c b/eqn_term.c
new file mode 100644
index 000000000000..cfbd8d48f807
--- /dev/null
+++ b/eqn_term.c
@@ -0,0 +1,76 @@
+/* $Id: eqn_term.c,v 1.4 2011/07/24 10:09:03 kristaps Exp $ */
+/*
+ * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "mandoc.h"
+#include "out.h"
+#include "term.h"
+
+static const enum termfont fontmap[EQNFONT__MAX] = {
+ TERMFONT_NONE, /* EQNFONT_NONE */
+ TERMFONT_NONE, /* EQNFONT_ROMAN */
+ TERMFONT_BOLD, /* EQNFONT_BOLD */
+ TERMFONT_BOLD, /* EQNFONT_FAT */
+ TERMFONT_UNDER /* EQNFONT_ITALIC */
+};
+
+static void eqn_box(struct termp *, const struct eqn_box *);
+
+void
+term_eqn(struct termp *p, const struct eqn *ep)
+{
+
+ p->flags |= TERMP_NONOSPACE;
+ eqn_box(p, ep->root);
+ term_word(p, " ");
+ p->flags &= ~TERMP_NONOSPACE;
+}
+
+static void
+eqn_box(struct termp *p, const struct eqn_box *bp)
+{
+
+ if (EQNFONT_NONE != bp->font)
+ term_fontpush(p, fontmap[(int)bp->font]);
+ if (bp->left)
+ term_word(p, bp->left);
+ if (EQN_SUBEXPR == bp->type)
+ term_word(p, "(");
+
+ if (bp->text)
+ term_word(p, bp->text);
+
+ if (bp->first)
+ eqn_box(p, bp->first);
+
+ if (EQN_SUBEXPR == bp->type)
+ term_word(p, ")");
+ if (bp->right)
+ term_word(p, bp->right);
+ if (EQNFONT_NONE != bp->font)
+ term_fontpop(p);
+
+ if (bp->next)
+ eqn_box(p, bp->next);
+}
diff --git a/example.style.css b/example.style.css
new file mode 100644
index 000000000000..660f4d132000
--- /dev/null
+++ b/example.style.css
@@ -0,0 +1,110 @@
+/* $Id: example.style.css,v 1.49 2011/12/15 12:18:57 kristaps Exp $ */
+/*
+ * This is an example style-sheet provided for mandoc(1) and the -Thtml
+ * or -Txhtml output mode.
+ * It mimics the appearance of the legacy man.cgi output.
+ * See mdoc(7) and man(7) for macro explanations.
+ */
+
+div.mandoc { min-width: 102ex;
+ width: 102ex;
+ font-family: monospace; } /* This is the outer node of all mandoc -T[x]html documents. */
+div.mandoc h1 { margin-bottom: 0ex; font-size: inherit; margin-left: -4ex; } /* Section header (Sh, SH). */
+div.mandoc h2 { margin-bottom: 0ex; font-size: inherit; margin-left: -2ex; } /* Sub-section header (Ss, SS). */
+div.mandoc table { width: 100%; margin-top: 0ex; margin-bottom: 0ex; } /* All tables. */
+div.mandoc td { vertical-align: top; } /* All table cells. */
+div.mandoc p { } /* Paragraph: Pp, Lp. */
+div.mandoc blockquote { margin-left: 5ex; margin-top: 0ex; margin-bottom: 0ex; } /* D1, Dl. */
+div.mandoc div.section { margin-bottom: 2ex; margin-left: 5ex; } /* Sections (Sh, SH). */
+div.mandoc div.subsection { } /* Sub-sections (Ss, SS). */
+div.mandoc table.synopsis { } /* SYNOPSIS section table. */
+div.mandoc table.foot { } /* Document footer. */
+div.mandoc td.foot-date { width: 50%; } /* Document footer: date. */
+div.mandoc td.foot-os { width: 50%; text-align: right; } /* Document footer: OS/source. */
+div.mandoc table.head { } /* Document header. */
+div.mandoc td.head-ltitle { width: 10%; } /* Document header: left-title. */
+div.mandoc td.head-vol { width: 80%; text-align: center; } /* Document header: volume. */
+div.mandoc td.head-rtitle { width: 10%; text-align: right; } /* Document header: right-title. */
+div.mandoc .display { } /* All Bd, D1, Dl. */
+div.mandoc .list { } /* All Bl. */
+div.mandoc i { } /* Italic: BI, IB, I, (implicit). */
+div.mandoc b { } /* Bold: SB, BI, IB, BR, RB, B, (implicit). */
+div.mandoc small { } /* Small: SB, SM. */
+div.mandoc .emph { font-style: italic; font-weight: normal; } /* Emphasis: Em, Bl -emphasis. */
+div.mandoc .symb { font-style: normal; font-weight: bold; } /* Symbolic: Sy, Ms, Bf -symbolic. */
+div.mandoc .lit { font-style: normal; font-weight: normal; font-family: monospace; } /* Literal: Dl, Li, Ql, Bf -literal, Bl -literal, Bl -unfilled. */
+div.mandoc i.addr { font-weight: normal; } /* Address (Ad). */
+div.mandoc i.arg { font-weight: normal; } /* Command argument (Ar). */
+div.mandoc span.author { } /* Author name (An). */
+div.mandoc b.cmd { font-style: normal; } /* Command (Cm). */
+div.mandoc b.config { font-style: normal; } /* Config statement (Cd). */
+div.mandoc span.define { } /* Defines (Dv). */
+div.mandoc span.desc { } /* Nd. After em-dash. */
+div.mandoc b.diag { font-style: normal; } /* Diagnostic (Bl -diag). */
+div.mandoc span.env { } /* Environment variables (Ev). */
+div.mandoc span.errno { } /* Error string (Er). */
+div.mandoc i.farg { font-weight: normal; } /* Function argument (Fa, Fn). */
+div.mandoc i.file { font-weight: normal; } /* File (Pa). */
+div.mandoc b.flag { font-style: normal; } /* Flag (Fl, Cm). */
+div.mandoc b.fname { font-style: normal; } /* Function name (Fa, Fn, Rv). */
+div.mandoc i.ftype { font-weight: normal; } /* Function types (Ft, Fn). */
+div.mandoc b.includes { font-style: normal; } /* Header includes (In). */
+div.mandoc span.lib { } /* Library (Lb). */
+div.mandoc i.link-sec { font-weight: normal; } /* Section links (Sx). */
+div.mandoc b.macro { font-style: normal; } /* Macro-ish thing (Fd). */
+div.mandoc b.name { font-style: normal; } /* Name of utility (Nm). */
+div.mandoc span.opt { } /* Options (Op, Oo/Oc). */
+div.mandoc span.ref { } /* Citations (Rs). */
+div.mandoc span.ref-auth { } /* Reference author (%A). */
+div.mandoc i.ref-book { font-weight: normal; } /* Reference book (%B). */
+div.mandoc span.ref-city { } /* Reference city (%C). */
+div.mandoc span.ref-date { } /* Reference date (%D). */
+div.mandoc i.ref-issue { font-weight: normal; } /* Reference issuer/publisher (%I). */
+div.mandoc i.ref-jrnl { font-weight: normal; } /* Reference journal (%J). */
+div.mandoc span.ref-num { } /* Reference number (%N). */
+div.mandoc span.ref-opt { } /* Reference optionals (%O). */
+div.mandoc span.ref-page { } /* Reference page (%P). */
+div.mandoc span.ref-corp { } /* Reference corporate/foreign author (%Q). */
+div.mandoc span.ref-rep { } /* Reference report (%R). */
+div.mandoc span.ref-title { text-decoration: underline; } /* Reference title (%T). */
+div.mandoc span.ref-vol { } /* Reference volume (%V). */
+div.mandoc span.type { font-style: italic; font-weight: normal; } /* Variable types (Vt). */
+div.mandoc span.unix { } /* Unices (Ux, Ox, Nx, Fx, Bx, Bsx, Dx). */
+div.mandoc b.utility { font-style: normal; } /* Name of utility (Ex). */
+div.mandoc b.var { font-style: normal; } /* Variables (Rv). */
+div.mandoc a.link-ext { } /* Off-site link (Lk). */
+div.mandoc a.link-includes { } /* Include-file link (In). */
+div.mandoc a.link-mail { } /* Mailto links (Mt). */
+div.mandoc a.link-man { } /* Manual links (Xr). */
+div.mandoc a.link-ref { } /* Reference section links (%Q). */
+div.mandoc a.link-sec { } /* Section links (Sx). */
+div.mandoc dl.list-diag { } /* Formatting for lists. See mdoc(7). */
+div.mandoc dt.list-diag { }
+div.mandoc dd.list-diag { }
+div.mandoc dl.list-hang { }
+div.mandoc dt.list-hang { }
+div.mandoc dd.list-hang { }
+div.mandoc dl.list-inset { }
+div.mandoc dt.list-inset { }
+div.mandoc dd.list-inset { }
+div.mandoc dl.list-ohang { }
+div.mandoc dt.list-ohang { }
+div.mandoc dd.list-ohang { margin-left: 0ex; }
+div.mandoc dl.list-tag { }
+div.mandoc dt.list-tag { }
+div.mandoc dd.list-tag { }
+div.mandoc table.list-col { }
+div.mandoc tr.list-col { }
+div.mandoc td.list-col { }
+div.mandoc ul.list-bul { list-style-type: disc; padding-left: 1em; }
+div.mandoc li.list-bul { }
+div.mandoc ul.list-dash { list-style-type: none; padding-left: 0em; }
+div.mandoc li.list-dash:before { content: "\2014 "; }
+div.mandoc ul.list-hyph { list-style-type: none; padding-left: 0em; }
+div.mandoc li.list-hyph:before { content: "\2013 "; }
+div.mandoc ul.list-item { list-style-type: none; padding-left: 0em; }
+div.mandoc li.list-item { }
+div.mandoc ol.list-enum { padding-left: 2em; }
+div.mandoc li.list-enum { }
+div.mandoc span.eqn { } /* Equation modes. See eqn(7). */
+div.mandoc table.tbl { } /* Table modes. See tbl(7). */
diff --git a/external.png b/external.png
new file mode 100644
index 000000000000..419c06fb960b
--- /dev/null
+++ b/external.png
Binary files differ
diff --git a/html.c b/html.c
new file mode 100644
index 000000000000..326df035fc44
--- /dev/null
+++ b/html.c
@@ -0,0 +1,699 @@
+/* $Id: html.c,v 1.150 2011/10/05 21:35:17 kristaps Exp $ */
+/*
+ * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/types.h>
+
+#include <assert.h>
+#include <ctype.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "mandoc.h"
+#include "libmandoc.h"
+#include "out.h"
+#include "html.h"
+#include "main.h"
+
+struct htmldata {
+ const char *name;
+ int flags;
+#define HTML_CLRLINE (1 << 0)
+#define HTML_NOSTACK (1 << 1)
+#define HTML_AUTOCLOSE (1 << 2) /* Tag has auto-closure. */
+};
+
+static const struct htmldata htmltags[TAG_MAX] = {
+ {"html", HTML_CLRLINE}, /* TAG_HTML */
+ {"head", HTML_CLRLINE}, /* TAG_HEAD */
+ {"body", HTML_CLRLINE}, /* TAG_BODY */
+ {"meta", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_META */
+ {"title", HTML_CLRLINE}, /* TAG_TITLE */
+ {"div", HTML_CLRLINE}, /* TAG_DIV */
+ {"h1", 0}, /* TAG_H1 */
+ {"h2", 0}, /* TAG_H2 */
+ {"span", 0}, /* TAG_SPAN */
+ {"link", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_LINK */
+ {"br", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_BR */
+ {"a", 0}, /* TAG_A */
+ {"table", HTML_CLRLINE}, /* TAG_TABLE */
+ {"tbody", HTML_CLRLINE}, /* TAG_TBODY */
+ {"col", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_COL */
+ {"tr", HTML_CLRLINE}, /* TAG_TR */
+ {"td", HTML_CLRLINE}, /* TAG_TD */
+ {"li", HTML_CLRLINE}, /* TAG_LI */
+ {"ul", HTML_CLRLINE}, /* TAG_UL */
+ {"ol", HTML_CLRLINE}, /* TAG_OL */
+ {"dl", HTML_CLRLINE}, /* TAG_DL */
+ {"dt", HTML_CLRLINE}, /* TAG_DT */
+ {"dd", HTML_CLRLINE}, /* TAG_DD */
+ {"blockquote", HTML_CLRLINE}, /* TAG_BLOCKQUOTE */
+ {"p", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_P */
+ {"pre", HTML_CLRLINE }, /* TAG_PRE */
+ {"b", 0 }, /* TAG_B */
+ {"i", 0 }, /* TAG_I */
+ {"code", 0 }, /* TAG_CODE */
+ {"small", 0 }, /* TAG_SMALL */
+};
+
+static const char *const htmlattrs[ATTR_MAX] = {
+ "http-equiv", /* ATTR_HTTPEQUIV */
+ "content", /* ATTR_CONTENT */
+ "name", /* ATTR_NAME */
+ "rel", /* ATTR_REL */
+ "href", /* ATTR_HREF */
+ "type", /* ATTR_TYPE */
+ "media", /* ATTR_MEDIA */
+ "class", /* ATTR_CLASS */
+ "style", /* ATTR_STYLE */
+ "width", /* ATTR_WIDTH */
+ "id", /* ATTR_ID */
+ "summary", /* ATTR_SUMMARY */
+ "align", /* ATTR_ALIGN */
+ "colspan", /* ATTR_COLSPAN */
+};
+
+static const char *const roffscales[SCALE_MAX] = {
+ "cm", /* SCALE_CM */
+ "in", /* SCALE_IN */
+ "pc", /* SCALE_PC */
+ "pt", /* SCALE_PT */
+ "em", /* SCALE_EM */
+ "em", /* SCALE_MM */
+ "ex", /* SCALE_EN */
+ "ex", /* SCALE_BU */
+ "em", /* SCALE_VS */
+ "ex", /* SCALE_FS */
+};
+
+static void bufncat(struct html *, const char *, size_t);
+static void print_ctag(struct html *, enum htmltag);
+static int print_encode(struct html *, const char *, int);
+static void print_metaf(struct html *, enum mandoc_esc);
+static void print_attr(struct html *, const char *, const char *);
+static void *ml_alloc(char *, enum htmltype);
+
+static void *
+ml_alloc(char *outopts, enum htmltype type)
+{
+ struct html *h;
+ const char *toks[5];
+ char *v;
+
+ toks[0] = "style";
+ toks[1] = "man";
+ toks[2] = "includes";
+ toks[3] = "fragment";
+ toks[4] = NULL;
+
+ h = mandoc_calloc(1, sizeof(struct html));
+
+ h->type = type;
+ h->tags.head = NULL;
+ h->symtab = mchars_alloc();
+
+ while (outopts && *outopts)
+ switch (getsubopt(&outopts, UNCONST(toks), &v)) {
+ case (0):
+ h->style = v;
+ break;
+ case (1):
+ h->base_man = v;
+ break;
+ case (2):
+ h->base_includes = v;
+ break;
+ case (3):
+ h->oflags |= HTML_FRAGMENT;
+ break;
+ default:
+ break;
+ }
+
+ return(h);
+}
+
+void *
+html_alloc(char *outopts)
+{
+
+ return(ml_alloc(outopts, HTML_HTML_4_01_STRICT));
+}
+
+
+void *
+xhtml_alloc(char *outopts)
+{
+
+ return(ml_alloc(outopts, HTML_XHTML_1_0_STRICT));
+}
+
+
+void
+html_free(void *p)
+{
+ struct tag *tag;
+ struct html *h;
+
+ h = (struct html *)p;
+
+ while ((tag = h->tags.head) != NULL) {
+ h->tags.head = tag->next;
+ free(tag);
+ }
+
+ if (h->symtab)
+ mchars_free(h->symtab);
+
+ free(h);
+}
+
+
+void
+print_gen_head(struct html *h)
+{
+ struct htmlpair tag[4];
+
+ tag[0].key = ATTR_HTTPEQUIV;
+ tag[0].val = "Content-Type";
+ tag[1].key = ATTR_CONTENT;
+ tag[1].val = "text/html; charset=utf-8";
+ print_otag(h, TAG_META, 2, tag);
+
+ tag[0].key = ATTR_NAME;
+ tag[0].val = "resource-type";
+ tag[1].key = ATTR_CONTENT;
+ tag[1].val = "document";
+ print_otag(h, TAG_META, 2, tag);
+
+ if (h->style) {
+ tag[0].key = ATTR_REL;
+ tag[0].val = "stylesheet";
+ tag[1].key = ATTR_HREF;
+ tag[1].val = h->style;
+ tag[2].key = ATTR_TYPE;
+ tag[2].val = "text/css";
+ tag[3].key = ATTR_MEDIA;
+ tag[3].val = "all";
+ print_otag(h, TAG_LINK, 4, tag);
+ }
+}
+
+static void
+print_metaf(struct html *h, enum mandoc_esc deco)
+{
+ enum htmlfont font;
+
+ switch (deco) {
+ case (ESCAPE_FONTPREV):
+ font = h->metal;
+ break;
+ case (ESCAPE_FONTITALIC):
+ font = HTMLFONT_ITALIC;
+ break;
+ case (ESCAPE_FONTBOLD):
+ font = HTMLFONT_BOLD;
+ break;
+ case (ESCAPE_FONT):
+ /* FALLTHROUGH */
+ case (ESCAPE_FONTROMAN):
+ font = HTMLFONT_NONE;
+ break;
+ default:
+ abort();
+ /* NOTREACHED */
+ }
+
+ if (h->metaf) {
+ print_tagq(h, h->metaf);
+ h->metaf = NULL;
+ }
+
+ h->metal = h->metac;
+ h->metac = font;
+
+ if (HTMLFONT_NONE != font)
+ h->metaf = HTMLFONT_BOLD == font ?
+ print_otag(h, TAG_B, 0, NULL) :
+ print_otag(h, TAG_I, 0, NULL);
+}
+
+int
+html_strlen(const char *cp)
+{
+ int ssz, sz;
+ const char *seq, *p;
+
+ /*
+ * Account for escaped sequences within string length
+ * calculations. This follows the logic in term_strlen() as we
+ * must calculate the width of produced strings.
+ * Assume that characters are always width of "1". This is
+ * hacky, but it gets the job done for approximation of widths.
+ */
+
+ sz = 0;
+ while (NULL != (p = strchr(cp, '\\'))) {
+ sz += (int)(p - cp);
+ ++cp;
+ switch (mandoc_escape(&cp, &seq, &ssz)) {
+ case (ESCAPE_ERROR):
+ return(sz);
+ case (ESCAPE_UNICODE):
+ /* FALLTHROUGH */
+ case (ESCAPE_NUMBERED):
+ /* FALLTHROUGH */
+ case (ESCAPE_SPECIAL):
+ sz++;
+ break;
+ default:
+ break;
+ }
+ }
+
+ assert(sz >= 0);
+ return(sz + strlen(cp));
+}
+
+static int
+print_encode(struct html *h, const char *p, int norecurse)
+{
+ size_t sz;
+ int c, len, nospace;
+ const char *seq;
+ enum mandoc_esc esc;
+ static const char rejs[6] = { '\\', '<', '>', '&', ASCII_HYPH, '\0' };
+
+ nospace = 0;
+
+ while ('\0' != *p) {
+ sz = strcspn(p, rejs);
+
+ fwrite(p, 1, sz, stdout);
+ p += (int)sz;
+
+ if ('\0' == *p)
+ break;
+
+ switch (*p++) {
+ case ('<'):
+ printf("&lt;");
+ continue;
+ case ('>'):
+ printf("&gt;");
+ continue;
+ case ('&'):
+ printf("&amp;");
+ continue;
+ case (ASCII_HYPH):
+ putchar('-');
+ continue;
+ default:
+ break;
+ }
+
+ esc = mandoc_escape(&p, &seq, &len);
+ if (ESCAPE_ERROR == esc)
+ break;
+
+ switch (esc) {
+ case (ESCAPE_UNICODE):
+ /* Skip passed "u" header. */
+ c = mchars_num2uc(seq + 1, len - 1);
+ if ('\0' != c)
+ printf("&#x%x;", c);
+ break;
+ case (ESCAPE_NUMBERED):
+ c = mchars_num2char(seq, len);
+ if ('\0' != c)
+ putchar(c);
+ break;
+ case (ESCAPE_SPECIAL):
+ c = mchars_spec2cp(h->symtab, seq, len);
+ if (c > 0)
+ printf("&#%d;", c);
+ else if (-1 == c && 1 == len)
+ putchar((int)*seq);
+ break;
+ case (ESCAPE_FONT):
+ /* FALLTHROUGH */
+ case (ESCAPE_FONTPREV):
+ /* FALLTHROUGH */
+ case (ESCAPE_FONTBOLD):
+ /* FALLTHROUGH */
+ case (ESCAPE_FONTITALIC):
+ /* FALLTHROUGH */
+ case (ESCAPE_FONTROMAN):
+ if (norecurse)
+ break;
+ print_metaf(h, esc);
+ break;
+ case (ESCAPE_NOSPACE):
+ if ('\0' == *p)
+ nospace = 1;
+ break;
+ default:
+ break;
+ }
+ }
+
+ return(nospace);
+}
+
+
+static void
+print_attr(struct html *h, const char *key, const char *val)
+{
+ printf(" %s=\"", key);
+ (void)print_encode(h, val, 1);
+ putchar('\"');
+}
+
+
+struct tag *
+print_otag(struct html *h, enum htmltag tag,
+ int sz, const struct htmlpair *p)
+{
+ int i;
+ struct tag *t;
+
+ /* Push this tags onto the stack of open scopes. */
+
+ if ( ! (HTML_NOSTACK & htmltags[tag].flags)) {
+ t = mandoc_malloc(sizeof(struct tag));
+ t->tag = tag;
+ t->next = h->tags.head;
+ h->tags.head = t;
+ } else
+ t = NULL;
+
+ if ( ! (HTML_NOSPACE & h->flags))
+ if ( ! (HTML_CLRLINE & htmltags[tag].flags)) {
+ /* Manage keeps! */
+ if ( ! (HTML_KEEP & h->flags)) {
+ if (HTML_PREKEEP & h->flags)
+ h->flags |= HTML_KEEP;
+ putchar(' ');
+ } else
+ printf("&#160;");
+ }
+
+ if ( ! (h->flags & HTML_NONOSPACE))
+ h->flags &= ~HTML_NOSPACE;
+ else
+ h->flags |= HTML_NOSPACE;
+
+ /* Print out the tag name and attributes. */
+
+ printf("<%s", htmltags[tag].name);
+ for (i = 0; i < sz; i++)
+ print_attr(h, htmlattrs[p[i].key], p[i].val);
+
+ /* Add non-overridable attributes. */
+
+ if (TAG_HTML == tag && HTML_XHTML_1_0_STRICT == h->type) {
+ print_attr(h, "xmlns", "http://www.w3.org/1999/xhtml");
+ print_attr(h, "xml:lang", "en");
+ print_attr(h, "lang", "en");
+ }
+
+ /* Accommodate for XML "well-formed" singleton escaping. */
+
+ if (HTML_AUTOCLOSE & htmltags[tag].flags)
+ switch (h->type) {
+ case (HTML_XHTML_1_0_STRICT):
+ putchar('/');
+ break;
+ default:
+ break;
+ }
+
+ putchar('>');
+
+ h->flags |= HTML_NOSPACE;
+
+ if ((HTML_AUTOCLOSE | HTML_CLRLINE) & htmltags[tag].flags)
+ putchar('\n');
+
+ return(t);
+}
+
+
+static void
+print_ctag(struct html *h, enum htmltag tag)
+{
+
+ printf("</%s>", htmltags[tag].name);
+ if (HTML_CLRLINE & htmltags[tag].flags) {
+ h->flags |= HTML_NOSPACE;
+ putchar('\n');
+ }
+}
+
+void
+print_gen_decls(struct html *h)
+{
+ const char *doctype;
+ const char *dtd;
+ const char *name;
+
+ switch (h->type) {
+ case (HTML_HTML_4_01_STRICT):
+ name = "HTML";
+ doctype = "-//W3C//DTD HTML 4.01//EN";
+ dtd = "http://www.w3.org/TR/html4/strict.dtd";
+ break;
+ default:
+ puts("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
+ name = "html";
+ doctype = "-//W3C//DTD XHTML 1.0 Strict//EN";
+ dtd = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";
+ break;
+ }
+
+ printf("<!DOCTYPE %s PUBLIC \"%s\" \"%s\">\n",
+ name, doctype, dtd);
+}
+
+void
+print_text(struct html *h, const char *word)
+{
+
+ if ( ! (HTML_NOSPACE & h->flags)) {
+ /* Manage keeps! */
+ if ( ! (HTML_KEEP & h->flags)) {
+ if (HTML_PREKEEP & h->flags)
+ h->flags |= HTML_KEEP;
+ putchar(' ');
+ } else
+ printf("&#160;");
+ }
+
+ assert(NULL == h->metaf);
+ if (HTMLFONT_NONE != h->metac)
+ h->metaf = HTMLFONT_BOLD == h->metac ?
+ print_otag(h, TAG_B, 0, NULL) :
+ print_otag(h, TAG_I, 0, NULL);
+
+ assert(word);
+ if ( ! print_encode(h, word, 0)) {
+ if ( ! (h->flags & HTML_NONOSPACE))
+ h->flags &= ~HTML_NOSPACE;
+ } else
+ h->flags |= HTML_NOSPACE;
+
+ if (h->metaf) {
+ print_tagq(h, h->metaf);
+ h->metaf = NULL;
+ }
+
+ h->flags &= ~HTML_IGNDELIM;
+}
+
+
+void
+print_tagq(struct html *h, const struct tag *until)
+{
+ struct tag *tag;
+
+ while ((tag = h->tags.head) != NULL) {
+ /*
+ * Remember to close out and nullify the current
+ * meta-font and table, if applicable.
+ */
+ if (tag == h->metaf)
+ h->metaf = NULL;
+ if (tag == h->tblt)
+ h->tblt = NULL;
+ print_ctag(h, tag->tag);
+ h->tags.head = tag->next;
+ free(tag);
+ if (until && tag == until)
+ return;
+ }
+}
+
+
+void
+print_stagq(struct html *h, const struct tag *suntil)
+{
+ struct tag *tag;
+
+ while ((tag = h->tags.head) != NULL) {
+ if (suntil && tag == suntil)
+ return;
+ /*
+ * Remember to close out and nullify the current
+ * meta-font and table, if applicable.
+ */
+ if (tag == h->metaf)
+ h->metaf = NULL;
+ if (tag == h->tblt)
+ h->tblt = NULL;
+ print_ctag(h, tag->tag);
+ h->tags.head = tag->next;
+ free(tag);
+ }
+}
+
+void
+bufinit(struct html *h)
+{
+
+ h->buf[0] = '\0';
+ h->buflen = 0;
+}
+
+void
+bufcat_style(struct html *h, const char *key, const char *val)
+{
+
+ bufcat(h, key);
+ bufcat(h, ":");
+ bufcat(h, val);
+ bufcat(h, ";");
+}
+
+void
+bufcat(struct html *h, const char *p)
+{
+
+ h->buflen = strlcat(h->buf, p, BUFSIZ);
+ assert(h->buflen < BUFSIZ);
+}
+
+void
+bufcat_fmt(struct html *h, const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ (void)vsnprintf(h->buf + (int)h->buflen,
+ BUFSIZ - h->buflen - 1, fmt, ap);
+ va_end(ap);
+ h->buflen = strlen(h->buf);
+}
+
+static void
+bufncat(struct html *h, const char *p, size_t sz)
+{
+
+ assert(h->buflen + sz + 1 < BUFSIZ);
+ strncat(h->buf, p, sz);
+ h->buflen += sz;
+}
+
+void
+buffmt_includes(struct html *h, const char *name)
+{
+ const char *p, *pp;
+
+ pp = h->base_includes;
+
+ bufinit(h);
+ while (NULL != (p = strchr(pp, '%'))) {
+ bufncat(h, pp, (size_t)(p - pp));
+ switch (*(p + 1)) {
+ case('I'):
+ bufcat(h, name);
+ break;
+ default:
+ bufncat(h, p, 2);
+ break;
+ }
+ pp = p + 2;
+ }
+ if (pp)
+ bufcat(h, pp);
+}
+
+void
+buffmt_man(struct html *h,
+ const char *name, const char *sec)
+{
+ const char *p, *pp;
+
+ pp = h->base_man;
+
+ bufinit(h);
+ while (NULL != (p = strchr(pp, '%'))) {
+ bufncat(h, pp, (size_t)(p - pp));
+ switch (*(p + 1)) {
+ case('S'):
+ bufcat(h, sec ? sec : "1");
+ break;
+ case('N'):
+ bufcat_fmt(h, name);
+ break;
+ default:
+ bufncat(h, p, 2);
+ break;
+ }
+ pp = p + 2;
+ }
+ if (pp)
+ bufcat(h, pp);
+}
+
+void
+bufcat_su(struct html *h, const char *p, const struct roffsu *su)
+{
+ double v;
+
+ v = su->scale;
+ if (SCALE_MM == su->unit && 0.0 == (v /= 100.0))
+ v = 1.0;
+
+ bufcat_fmt(h, "%s: %.2f%s;", p, v, roffscales[su->unit]);
+}
+
+void
+bufcat_id(struct html *h, const char *src)
+{
+
+ /* Cf. <http://www.w3.org/TR/html4/types.html#h-6.2>. */
+
+ while ('\0' != *src)
+ bufcat_fmt(h, "%.2x", *src++);
+}
diff --git a/html.h b/html.h
new file mode 100644
index 000000000000..60960702f19b
--- /dev/null
+++ b/html.h
@@ -0,0 +1,164 @@
+/* $Id: html.h,v 1.47 2011/10/05 21:35:17 kristaps Exp $ */
+/*
+ * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifndef HTML_H
+#define HTML_H
+
+__BEGIN_DECLS
+
+enum htmltag {
+ TAG_HTML,
+ TAG_HEAD,
+ TAG_BODY,
+ TAG_META,
+ TAG_TITLE,
+ TAG_DIV,
+ TAG_H1,
+ TAG_H2,
+ TAG_SPAN,
+ TAG_LINK,
+ TAG_BR,
+ TAG_A,
+ TAG_TABLE,
+ TAG_TBODY,
+ TAG_COL,
+ TAG_TR,
+ TAG_TD,
+ TAG_LI,
+ TAG_UL,
+ TAG_OL,
+ TAG_DL,
+ TAG_DT,
+ TAG_DD,
+ TAG_BLOCKQUOTE,
+ TAG_P,
+ TAG_PRE,
+ TAG_B,
+ TAG_I,
+ TAG_CODE,
+ TAG_SMALL,
+ TAG_MAX
+};
+
+enum htmlattr {
+ ATTR_HTTPEQUIV,
+ ATTR_CONTENT,
+ ATTR_NAME,
+ ATTR_REL,
+ ATTR_HREF,
+ ATTR_TYPE,
+ ATTR_MEDIA,
+ ATTR_CLASS,
+ ATTR_STYLE,
+ ATTR_WIDTH,
+ ATTR_ID,
+ ATTR_SUMMARY,
+ ATTR_ALIGN,
+ ATTR_COLSPAN,
+ ATTR_MAX
+};
+
+enum htmlfont {
+ HTMLFONT_NONE = 0,
+ HTMLFONT_BOLD,
+ HTMLFONT_ITALIC,
+ HTMLFONT_MAX
+};
+
+struct tag {
+ struct tag *next;
+ enum htmltag tag;
+};
+
+struct tagq {
+ struct tag *head;
+};
+
+struct htmlpair {
+ enum htmlattr key;
+ const char *val;
+};
+
+#define PAIR_INIT(p, t, v) \
+ do { \
+ (p)->key = (t); \
+ (p)->val = (v); \
+ } while (/* CONSTCOND */ 0)
+
+#define PAIR_ID_INIT(p, v) PAIR_INIT(p, ATTR_ID, v)
+#define PAIR_CLASS_INIT(p, v) PAIR_INIT(p, ATTR_CLASS, v)
+#define PAIR_HREF_INIT(p, v) PAIR_INIT(p, ATTR_HREF, v)
+#define PAIR_STYLE_INIT(p, h) PAIR_INIT(p, ATTR_STYLE, (h)->buf)
+#define PAIR_SUMMARY_INIT(p, v) PAIR_INIT(p, ATTR_SUMMARY, v)
+
+enum htmltype {
+ HTML_HTML_4_01_STRICT,
+ HTML_XHTML_1_0_STRICT
+};
+
+struct html {
+ int flags;
+#define HTML_NOSPACE (1 << 0) /* suppress next space */
+#define HTML_IGNDELIM (1 << 1)
+#define HTML_KEEP (1 << 2)
+#define HTML_PREKEEP (1 << 3)
+#define HTML_NONOSPACE (1 << 4) /* never add spaces */
+#define HTML_LITERAL (1 << 5) /* literal (e.g., <PRE>) context */
+ struct tagq tags; /* stack of open tags */
+ struct rofftbl tbl; /* current table */
+ struct tag *tblt; /* current open table scope */
+ struct mchars *symtab; /* character-escapes */
+ char *base_man; /* base for manpage href */
+ char *base_includes; /* base for include href */
+ char *style; /* style-sheet URI */
+ char buf[BUFSIZ]; /* see bufcat and friends */
+ size_t buflen;
+ struct tag *metaf; /* current open font scope */
+ enum htmlfont metal; /* last used font */
+ enum htmlfont metac; /* current font mode */
+ enum htmltype type; /* output media type */
+ int oflags; /* output options */
+#define HTML_FRAGMENT (1 << 0) /* don't emit HTML/HEAD/BODY */
+};
+
+void print_gen_decls(struct html *);
+void print_gen_head(struct html *);
+struct tag *print_otag(struct html *, enum htmltag,
+ int, const struct htmlpair *);
+void print_tagq(struct html *, const struct tag *);
+void print_stagq(struct html *, const struct tag *);
+void print_text(struct html *, const char *);
+void print_tblclose(struct html *);
+void print_tbl(struct html *, const struct tbl_span *);
+void print_eqn(struct html *, const struct eqn *);
+
+void bufcat_fmt(struct html *, const char *, ...);
+void bufcat(struct html *, const char *);
+void bufcat_id(struct html *, const char *);
+void bufcat_style(struct html *,
+ const char *, const char *);
+void bufcat_su(struct html *, const char *,
+ const struct roffsu *);
+void bufinit(struct html *);
+void buffmt_man(struct html *,
+ const char *, const char *);
+void buffmt_includes(struct html *, const char *);
+
+int html_strlen(const char *);
+
+__END_DECLS
+
+#endif /*!HTML_H*/
diff --git a/index.css b/index.css
new file mode 100644
index 000000000000..d98316eaf311
--- /dev/null
+++ b/index.css
@@ -0,0 +1,48 @@
+html { min-width: 40em;
+ margin-top: 2em;
+ margin-left: auto;
+ margin-right: auto;
+ width: 80%; }
+
+body { text-align: justify;
+ font-family: Helvetica,Arial,sans-serif;
+ line-height: 120%;
+ font-size: small; }
+
+p,ul,table { margin-left: 3em; }
+
+p.head,
+p.subhead,
+p.foot { margin-left: 0.0em; margin-right: 0.0em; }
+
+p.news { margin-left: 2.0em; }
+
+li { margin: 0.25em; }
+
+h1 { font-size: 110%; }
+h2 { font-size: 105%; margin-left: 1.5em }
+
+p.head { margin-bottom: 0.5em;
+ border-bottom: 1px solid #dddddd;
+ padding-bottom: 0.2em; }
+
+p.subhead { margin-top: 0em;
+ margin-bottom: 1.75em; }
+
+p.foot { border-top: 1px solid #dddddd;
+ color: #666666;
+ padding-top: 0.2em;
+ margin-top: 1.75em; }
+
+span.nm { color: green; }
+
+span.file { font-style: italic; }
+
+span.attn { font-weight: bold; }
+
+span.flag { font-weight: bold; }
+
+a { text-decoration: none; }
+
+a.external { background: transparent url(external.png) center right no-repeat;
+ padding-right: 12px; }
diff --git a/index.sgml b/index.sgml
new file mode 100644
index 000000000000..4386a9ebd955
--- /dev/null
+++ b/index.sgml
@@ -0,0 +1,364 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<HTML>
+ <HEAD>
+ <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8">
+ <LINK REL="stylesheet" HREF="index.css" TYPE="text/css" MEDIA="all">
+ <TITLE>mdocml | UNIX manpage compiler</TITLE>
+ </HEAD>
+ <BODY>
+ <P CLASS="head">
+ <B>mdocml</B> &#8211; UNIX manpage compiler, current version @VERSION@ (@VDATE@)
+ </P>
+ <P CLASS="subhead">
+ Sources: <A HREF="/snapshots/mdocml.tar.gz">current</A>,
+ <A HREF="/cgi-bin/cvsweb/?cvsroot=mdocml">cvsweb</A>
+ (<A HREF="/snapshots/">archives</A>)
+ </P>
+ <H1>
+ <A NAME="description">Description</A>
+ </H1>
+ <P>
+ <SPAN CLASS="nm">mdocml</SPAN> is a suite of tools compiling <I><A HREF="mdoc.7.html">mdoc</A></I>, the roff macro
+ package of choice for BSD manual pages, and <I><A HREF="man.7.html">man</A></I>, the predominant historical package for
+ UNIX manuals. The mission of <SPAN CLASS="nm">mdocml</SPAN> is to deprecate <A
+ HREF="http://www.gnu.org/software/groff/" CLASS="external">groff</A>, the GNU troff implementation, for displaying <I>mdoc</I>
+ pages whilst providing token support for <I>man</I>.
+ </P>
+ <P>
+ Why? groff amounts to over 5 MB of source code, most of which is C++ and all of which is GPL. It runs slowly, produces
+ uncertain output, and varies in operation from system to system. mdocml strives to fix this (respectively small, C, <A
+ CLASS="external" HREF="http://www.isc.org/software/license">ISC</A>-licensed, fast and regular).
+ </P>
+ <P>
+ <SPAN CLASS="nm">mdocml</SPAN> consists of the <A HREF="mandoc.3.html">libmandoc</A> validating compiler and <A
+ HREF="mandoc.1.html">mandoc</A>, which interfaces with the compiler library to format output for UNIX terminals (with
+ support for wide-character locales), XHTML, HTML, PostScript, and PDF.
+ It also includes <A HREF="preconv.1.html">preconv</A>, for recoding multibyte manuals;
+ <A HREF="demandoc.1.html">demandoc</A>, for emitting only text parts of manuals;
+ <A HREF="mandocdb.8.html">mandocdb</A>, for indexing manuals; and
+ <A HREF="apropos.1.html">apropos</A>, <A HREF="whatis.1.html">whatis</A>, and
+ <A HREF="man.cgi.7.html">man.cgi</A> (via <A HREF="catman.8.html">catman</A>) for semantic search of manual content.
+ It is a <A CLASS="external" HREF="http://bsd.lv/">BSD.lv</A> project.
+ </P>
+ <P>
+ <I>Disambiguation</I>: <SPAN CLASS="nm">mdocml</SPAN> is often referred to by its installed binary, <Q>mandoc</Q>.
+ </P>
+ <H2>
+ <A NAME="sources">Sources</A>
+ </H2>
+ <P>
+ <SPAN CLASS="nm">mdocml</SPAN> is in plain-old ANSI C and should build and run on any modern system; however, you'll
+ need <A HREF="http://www.oracle.com/technetwork/database/berkeleydb/overview/index.html">libdb</A> to build <A
+ HREF="apropos.1.html">apropos</A>, <A HREF="whatis.1.html">whatis</A>, <A HREF="man.cgi.7.html">man.cgi</A>, <A
+ HREF="catman.8.html">catman</A>, and <A HREF="mandocdb.8.html">mandocdb</A> (this is installed by default on BSD UNIX
+ systems &mdash; see the <I>Makefile</I> if you're running Linux). To build and install into <I>/usr/local/</I>, just
+ run <CODE>make install</CODE>. Be careful: the <B>preconv</B>, <B>apropos</B>, and <B>whatis</B> binary names are
+ usually taken by existing utilities.
+ </P>
+ <H2>
+ <A NAME="binaries">Binaries</A>
+ </H2>
+ <P>
+ Binary archives consist of pre-compiled binaries, manuals, and other necessary files.
+ Universal (Mac OS X) binaries are compiled for the PCC, i386, and x86_64 architectures.
+ Windows binaries are compiled with <A CLASS="external" HREF="http://www.mingw.org">MingW</A> for the 32-bit (i686) and
+ 64-bit (x86_64) architectures.
+ </P>
+ <H2>
+ Downstream
+ </H2>
+ <P>
+ Several systems come bundled with <SPAN CLASS="nm">mdocml</SPAN> utilities.
+ If your system does not appear below, the maintainers have not contacted me and it should not be considered
+ <Q>official</Q>.
+ Please <A HREF="#contact">contact us</A> if you plan on maintaining a downstream version!
+ </P>
+ <TABLE WIDTH="100%" SUMMARY="Downstream Sources">
+ <COL WIDTH="175">
+ <COL>
+ <TBODY>
+ <TR>
+ <TD>DragonFly BSD</TD>
+ <TD>
+ <A HREF="http://gitweb.dragonflybsd.org/dragonfly.git/tree/HEAD:/usr.bin/mandoc" CLASS="external">usr.bin/mandoc</A>
+ </TD>
+ </TR>
+ <TR>
+ <TD>FreeBSD</TD>
+ <TD>
+ <A HREF="http://www.freebsd.org/cgi/cvsweb.cgi/ports/textproc/mdocml/" CLASS="external">ports/textproc/mdocml</A>
+ </TD>
+ </TR>
+ <TR>
+ <TD>NetBSD</TD>
+ <TD>
+ <A HREF="http://cvsweb.netbsd.org/bsdweb.cgi/src/external/bsd/mdocml/" CLASS="external">src/external/bsd/mdocml</A>
+ </TD>
+ </TR>
+ <TR>
+ <TD>OpenBSD</TD>
+ <TD>
+ <A HREF="http://www.openbsd.org/cgi-bin/cvsweb/src/usr.bin/mandoc/" CLASS="external">src/usr.bin/mandoc</A>
+ </TD>
+ </TR>
+ <TR>
+ <TD>pkgsrc</TD>
+ <TD>
+ <A HREF="http://pkgsrc.se/textproc/mdocml" CLASS="external">textproc/mdocml</A>
+ </TD>
+ </TR>
+ <TR>
+ <TD>Minix3</TD>
+ <TD>
+ <A HREF="http://git.minix3.org/?p=minix.git;a=tree;f=external/bsd/mdocml" CLASS="external">external/bsd/mdocml</A>
+ </TD>
+ </TR>
+ </TBODY>
+ </TABLE>
+ <H1>
+ <A NAME="documentation">Documentation</A>
+ </H1>
+ <P>
+ These manuals are generated automatically and refer to the current release.
+ They are the authoritative documentation for the <SPAN CLASS="nm">mdocml</SPAN> system.
+ </P>
+
+ <TABLE WIDTH="100%" SUMMARY="Documentation">
+ <COL WIDTH="175">
+ <COL>
+ <TBODY>
+ <TR>
+ <TD VALIGN="top"><A HREF="apropos.1.html">apropos(1)</A></TD>
+ <TD VALIGN="top">
+ search the manual page database
+ (<A HREF="apropos.1.txt">text</A> |
+ <A HREF="apropos.1.xhtml">xhtml</A> |
+ <A HREF="apropos.1.pdf">pdf</A> |
+ <A HREF="apropos.1.ps">ps</A>)
+ </TD>
+ </TR>
+ <TR>
+ <TD VALIGN="top"><A HREF="demandoc.1.html">demandoc(1)</A></TD>
+ <TD VALIGN="top">
+ emit only text of UNIX manuals
+ (<A HREF="demandoc.1.txt">text</A> |
+ <A HREF="demandoc.1.xhtml">xhtml</A> |
+ <A HREF="demandoc.1.pdf">pdf</A> |
+ <A HREF="demandoc.1.ps">ps</A>)
+ </TD>
+ </TR>
+ <TR>
+ <TD VALIGN="top"><A HREF="mandoc.1.html">mandoc(1)</A></TD>
+ <TD VALIGN="top">
+ format and display UNIX manuals
+ (<A HREF="mandoc.1.txt">text</A> |
+ <A HREF="mandoc.1.xhtml">xhtml</A> |
+ <A HREF="mandoc.1.pdf">pdf</A> |
+ <A HREF="mandoc.1.ps">ps</A>)
+ </TD>
+ </TR>
+ <TR>
+ <TD VALIGN="top"><A HREF="preconv.1.html">preconv(1)</A></TD>
+ <TD VALIGN="top">
+ recode multibyte UNIX manuals
+ (<A HREF="preconv.1.txt">text</A> |
+ <A HREF="preconv.1.xhtml">xhtml</A> |
+ <A HREF="preconv.1.pdf">pdf</A> |
+ <A HREF="preconv.1.ps">ps</A>)
+ </TD>
+ </TR>
+ <TR>
+ <TD VALIGN="top"><A HREF="whatis.1.html">whatis(1)</A></TD>
+ <TD VALIGN="top">
+ search the manual page database
+ (<A HREF="whatis.1.txt">text</A> |
+ <A HREF="whatis.1.xhtml">xhtml</A> |
+ <A HREF="whatis.1.pdf">pdf</A> |
+ <A HREF="whatis.1.ps">ps</A>)
+ </TD>
+ </TR>
+ <TR>
+ <TD VALIGN="top"><A HREF="mandoc.3.html">mandoc(3)</A></TD>
+ <TD VALIGN="top">
+ mandoc macro compiler library
+ (<A HREF="mandoc.3.txt">text</A> |
+ <A HREF="mandoc.3.xhtml">xhtml</A> |
+ <A HREF="mandoc.3.pdf">pdf</A> |
+ <A HREF="mandoc.3.ps">ps</A>)
+ </TD>
+ </TR>
+ <TR>
+ <TD VALIGN="top"><A HREF="man.7.html">man(7)</A></TD>
+ <TD VALIGN="top">
+ man language reference
+ (<A HREF="man.7.txt">text</A> |
+ <A HREF="man.7.xhtml">xhtml</A> |
+ <A HREF="man.7.pdf">pdf</A> |
+ <A HREF="man.7.ps">ps</A>)
+ </TD>
+ </TR>
+ <TR>
+ <TD VALIGN="top"><A HREF="man.cgi.7.html">man.cgi(7)</A></TD>
+ <TD VALIGN="top">
+ cgi for manpage query and display
+ (<A HREF="man.cgi.7.txt">text</A> |
+ <A HREF="man.cgi.7.xhtml">xhtml</A> |
+ <A HREF="man.cgi.7.pdf">pdf</A> |
+ <A HREF="man.cgi.7.ps">ps</A>)
+ </TD>
+ </TR>
+ <TR>
+ <TD VALIGN="top"><A HREF="eqn.7.html">eqn(7)</A></TD>
+ <TD VALIGN="top">
+ eqn-mandoc language reference
+ (<A HREF="eqn.7.txt">text</A> |
+ <A HREF="eqn.7.xhtml">xhtml</A> |
+ <A HREF="eqn.7.pdf">pdf</A> |
+ <A HREF="eqn.7.ps">ps</A>)
+ </TD>
+ </TR>
+ <TR>
+ <TD VALIGN="top"><A HREF="mandoc_char.7.html">mandoc_char(7)</A></TD>
+ <TD VALIGN="top">
+ mandoc special characters
+ (<A HREF="mandoc_char.7.txt">text</A> |
+ <A HREF="mandoc_char.7.xhtml">xhtml</A> |
+ <A HREF="mandoc_char.7.pdf">pdf</A> |
+ <A HREF="mandoc_char.7.ps">ps</A>)
+ </TD>
+ </TR>
+ <TR>
+ <TD VALIGN="top"><A HREF="mdoc.7.html">mdoc(7)</A></TD>
+ <TD VALIGN="top">
+ mdoc language reference
+ (<A HREF="mdoc.7.txt">text</A> |
+ <A HREF="mdoc.7.xhtml">xhtml</A> |
+ <A HREF="mdoc.7.pdf">pdf</A> |
+ <A HREF="mdoc.7.ps">ps</A>)
+ </TD>
+ </TR>
+ <TR>
+ <TD VALIGN="top"><A HREF="roff.7.html">roff(7)</A></TD>
+ <TD VALIGN="top">
+ roff-mandoc language reference
+ (<A HREF="roff.7.txt">text</A> |
+ <A HREF="roff.7.xhtml">xhtml</A> |
+ <A HREF="roff.7.pdf">pdf</A> |
+ <A HREF="roff.7.ps">ps</A>)
+ </TD>
+ </TR>
+ <TR>
+ <TD VALIGN="top"><A HREF="tbl.7.html">tbl(7)</A></TD>
+ <TD VALIGN="top">
+ tbl-mandoc language reference
+ (<A HREF="tbl.7.txt">text</A> |
+ <A HREF="tbl.7.xhtml">xhtml</A> |
+ <A HREF="tbl.7.pdf">pdf</A> |
+ <A HREF="tbl.7.ps">ps</A>)
+ </TD>
+ </TR>
+ <TR>
+ <TD VALIGN="top"><A HREF="catman.8.html">catman(8)</A></TD>
+ <TD VALIGN="top">
+ update a man.cgi manpage cache
+ (<A HREF="catman.8.txt">text</A> |
+ <A HREF="catman.8.xhtml">xhtml</A> |
+ <A HREF="catman.8.pdf">pdf</A> |
+ <A HREF="catman.8.ps">ps</A>)
+ </TD>
+ </TR>
+ <TR>
+ <TD VALIGN="top"><A HREF="mandocdb.8.html">mandocdb(8)</A></TD>
+ <TD VALIGN="top">
+ index UNIX manuals
+ (<A HREF="mandocdb.8.txt">text</A> |
+ <A HREF="mandocdb.8.xhtml">xhtml</A> |
+ <A HREF="mandocdb.8.pdf">pdf</A> |
+ <A HREF="mandocdb.8.ps">ps</A>)
+ </TD>
+ </TR>
+ </TBODY>
+ </TABLE>
+ <H1>
+ <A NAME="contact">Contact</A>
+ </H1>
+ <P>
+ Use the mailing lists for bug-reports, patches, questions, etc. Please check the
+ <A HREF="http://mdocml.bsd.lv/cgi-bin/cvsweb/TODO?cvsroot=mdocml">TODO</A> for known issues
+ before posting. All lists are subscription-only: send a blank e-mail to the listed address to subscribe. Beyond that,
+ contact Kristaps at <A HREF="http://mailhide.recaptcha.net/d?k=01M6h_w7twDp58ZgH57eWC_w==&amp;c=Q2DBUt401ePlSeupJFrq_Q==" TITLE="Reveal
+ this e-mail address">kris...</A>@bsd.lv. Archives are available at <A HREF="http://gmane.org/" CLASS="external">Gmane</A>.
+ </P>
+ <TABLE WIDTH="100%" SUMMARY="Mailing Lists">
+ <COL WIDTH="175">
+ <COL>
+ <TBODY>
+ <TR>
+ <TD>
+ disc<A CLASS="external" TITLE="Reveal this e-mail address"
+ HREF="http://www.google.com/recaptcha/mailhide/d?k=01KQ80PFH5n3BBNpF5Gs4sRg==&amp;c=EV1QytpQqTHSItc2IXvZyocgYLPnG5K0JKw_gwMC9yc=">...</A>@mdocml.bsd.lv
+ </TD>
+ <TD>
+ bug-reports, general questions, and announcements
+ </TD>
+ </TR>
+ <TR>
+ <TD>
+ tec<A CLASS="external" TITLE="Reveal this e-mail address"
+ HREF="http://www.google.com/recaptcha/mailhide/d?k=01qDX_iV0RlUOarEvb6mR28g==&amp;c=gRXsTjza0NNCFPaYu-Taj2tF0pmYZSc90EZkFkhkxgo=">...</A>@mdocml.bsd.lv
+ </TD>
+ <TD>
+ patches and system discussions
+ </TD>
+ </TR>
+ <TR>
+ <TD>
+ sou<A CLASS="external" TITLE="Reveal this e-mail address"
+ HREF="http://www.google.com/recaptcha/mailhide/d?k=01prQrAZhhl2EbIwVcRfABsQ==&amp;c=KtTW4Yic9xk-8g40KzJoca4fR3MYXv28g8NC6OQV-T8=">...</A>@mdocml.bsd.lv
+ </TD>
+ <TD>
+ source commit messages
+ </TD>
+ </TR>
+ </TBODY>
+ </TABLE>
+ <H1>
+ <A NAME="news">News</A>
+ </H1>
+ <P CLASS="news">
+ 23-03-2011: version 1.12.1
+ </P>
+ <P>
+ Significant work on <A HREF="apropos.1.html">apropos</A> and <A HREF="mandocdb.8.html">mandocdb</A>. These tools are
+ now much more robust.
+ A <A HREF="whatis.1.html">whatis</A> implementation is now handled as an <A HREF="apropos.1.html">apropos</A> mode.
+ These tools are also able to minimally handle pre-formatted pages, that is, those already formatted by another utility
+ such as GNU troff.
+ </P>
+ <P>
+ The <A HREF="man.cgi.7.html">man.cgi</A> script is also now available for wider testing. It interfaces with <A
+ HREF="mandocdb.8.html">mandocdb</A> manuals cached by <A HREF="catman.8.html">catman</A>. HTML output is generated
+ on-the-fly by <A HREF="mandoc.3.html">libmandoc</A> or internal methods to convert pre-formatted pages.
+ </P>
+ <P>
+ The mailing list archive for the discuss and tech lists are being hosted by <A CLASS="external"
+ HREF="http://www.gmane.org">Gmane</A> at <A HREF="http://dir.gmane.org/gmane.comp.tools.mdocml.user"
+ CLASS="external">gmane.comp.tools.mdocml.user</A> and <A HREF="http://dir.gmane.org/gmane.comp.tools.mdocml.devel"
+ CLASS="external">gmane.comp.tools.mdocml.devel</A>, respectively.
+ </P>
+ <P>
+ Lastly, I'm no longer providing binaries, as nobody has asked for them.
+ </P>
+ <P>
+ See <A HREF="http://mdocml.bsd.lv/cgi-bin/cvsweb/index.sgml?cvsroot=mdocml">cvsweb</A> for
+ historical notes.
+ </P>
+ <P CLASS="foot">
+ <SMALL>
+ Copyright &#169; 2008&#8211;2011
+ <A CLASS="external" HREF="http://kristaps.bsd.lv">Kristaps Dzonsons</A>,
+ $Date: 2012/03/24 02:07:32 $
+ </SMALL>
+ </P>
+ </BODY>
+</HTML>
diff --git a/lib.c b/lib.c
new file mode 100644
index 000000000000..7a18a5dd4fe6
--- /dev/null
+++ b/lib.c
@@ -0,0 +1,39 @@
+/* $Id: lib.c,v 1.9 2011/03/22 14:33:05 kristaps Exp $ */
+/*
+ * Copyright (c) 2009 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include "mdoc.h"
+#include "mandoc.h"
+#include "libmdoc.h"
+
+#define LINE(x, y) \
+ if (0 == strcmp(p, x)) return(y);
+
+const char *
+mdoc_a2lib(const char *p)
+{
+
+#include "lib.in"
+
+ return(NULL);
+}
diff --git a/lib.in b/lib.in
new file mode 100644
index 000000000000..230a465ad3dd
--- /dev/null
+++ b/lib.in
@@ -0,0 +1,99 @@
+/* $Id: lib.in,v 1.13 2012/01/28 23:46:28 joerg Exp $ */
+/*
+ * Copyright (c) 2009 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * These are all possible .Lb strings. When a new library is added, add
+ * its short-string to the left-hand side and formatted string to the
+ * right-hand side.
+ *
+ * Be sure to escape strings.
+ */
+
+LINE("libarchive", "Reading and Writing Streaming Archives Library (libarchive, \\-larchive)")
+LINE("libarm", "ARM Architecture Library (libarm, \\-larm)")
+LINE("libarm32", "ARM32 Architecture Library (libarm32, \\-larm32)")
+LINE("libbluetooth", "Bluetooth Library (libbluetooth, \\-lbluetooth)")
+LINE("libbsm", "Basic Security Module User Library (libbsm, \\-lbsm)")
+LINE("libc", "Standard C Library (libc, \\-lc)")
+LINE("libc_r", "Reentrant C\\~Library (libc_r, \\-lc_r)")
+LINE("libcalendar", "Calendar Arithmetic Library (libcalendar, \\-lcalendar)")
+LINE("libcam", "Common Access Method User Library (libcam, \\-lcam)")
+LINE("libcdk", "Curses Development Kit Library (libcdk, \\-lcdk)")
+LINE("libcipher", "FreeSec Crypt Library (libcipher, \\-lcipher)")
+LINE("libcompat", "Compatibility Library (libcompat, \\-lcompat)")
+LINE("libcrypt", "Crypt Library (libcrypt, \\-lcrypt)")
+LINE("libcurses", "Curses Library (libcurses, \\-lcurses)")
+LINE("libdevinfo", "Device and Resource Information Utility Library (libdevinfo, \\-ldevinfo)")
+LINE("libdevstat", "Device Statistics Library (libdevstat, \\-ldevstat)")
+LINE("libdisk", "Interface to Slice and Partition Labels Library (libdisk, \\-ldisk)")
+LINE("libdwarf", "DWARF Access Library (libdwarf, \\-ldwarf)")
+LINE("libedit", "Command Line Editor Library (libedit, \\-ledit)")
+LINE("libelf", "ELF Access Library (libelf, \\-lelf)")
+LINE("libevent", "Event Notification Library (libevent, \\-levent)")
+LINE("libfetch", "File Transfer Library for URLs (libfetch, \\-lfetch)")
+LINE("libform", "Curses Form Library (libform, \\-lform)")
+LINE("libgeom", "Userland API Library for kernel GEOM subsystem (libgeom, \\-lgeom)")
+LINE("libgpib", "General-Purpose Instrument Bus (GPIB) library (libgpib, \\-lgpib)")
+LINE("libi386", "i386 Architecture Library (libi386, \\-li386)")
+LINE("libintl", "Internationalized Message Handling Library (libintl, \\-lintl)")
+LINE("libipsec", "IPsec Policy Control Library (libipsec, \\-lipsec)")
+LINE("libipx", "IPX Address Conversion Support Library (libipx, \\-lipx)")
+LINE("libiscsi", "iSCSI protocol library (libiscsi, \\-liscsi)")
+LINE("libisns", "Internet Storage Name Service Library (libisns, \\-lisns)")
+LINE("libjail", "Jail Library (libjail, \\-ljail)")
+LINE("libkiconv", "Kernel side iconv library (libkiconv, \\-lkiconv)")
+LINE("libkse", "N:M Threading Library (libkse, \\-lkse)")
+LINE("libkvm", "Kernel Data Access Library (libkvm, \\-lkvm)")
+LINE("libm", "Math Library (libm, \\-lm)")
+LINE("libm68k", "m68k Architecture Library (libm68k, \\-lm68k)")
+LINE("libmagic", "Magic Number Recognition Library (libmagic, \\-lmagic)")
+LINE("libmd", "Message Digest (MD4, MD5, etc.) Support Library (libmd, \\-lmd)")
+LINE("libmemstat", "Kernel Memory Allocator Statistics Library (libmemstat, \\-lmemstat)")
+LINE("libmenu", "Curses Menu Library (libmenu, \\-lmenu)")
+LINE("libnetgraph", "Netgraph User Library (libnetgraph, \\-lnetgraph)")
+LINE("libnetpgp", "Netpgp signing, verification, encryption and decryption (libnetpgp, \\-lnetpgp)")
+LINE("libossaudio", "OSS Audio Emulation Library (libossaudio, \\-lossaudio)")
+LINE("libpam", "Pluggable Authentication Module Library (libpam, \\-lpam)")
+LINE("libpcap", "Capture Library (libpcap, \\-lpcap)")
+LINE("libpci", "PCI Bus Access Library (libpci, \\-lpci)")
+LINE("libpmc", "Performance Counters Library (libpmc, \\-lpmc)")
+LINE("libposix", "POSIX Compatibility Library (libposix, \\-lposix)")
+LINE("libppath", "Property-List Paths Library (libppath, \\-lppath)")
+LINE("libprop", "Property Container Object Library (libprop, \\-lprop)")
+LINE("libpthread", "POSIX Threads Library (libpthread, \\-lpthread)")
+LINE("libpuffs", "puffs Convenience Library (libpuffs, \\-lpuffs)")
+LINE("libquota", "Disk Quota Access and Control Library (libquota, \\-lquota)")
+LINE("librefuse", "File System in Userspace Convenience Library (librefuse, \\-lrefuse)")
+LINE("libresolv", "DNS Resolver Library (libresolv, \\-lresolv)")
+LINE("librpcsec_gss", "RPC GSS-API Authentication Library (librpcsec_gss, \\-lrpcsec_gss)")
+LINE("librpcsvc", "RPC Service Library (librpcsvc, \\-lrpcsvc)")
+LINE("librt", "POSIX Real\\-time Library (librt, \\-lrt)")
+LINE("libsaslc", "Simple Authentication and Security Layer client library (libsaslc, \\-lsaslc)")
+LINE("libsdp", "Bluetooth Service Discovery Protocol User Library (libsdp, \\-lsdp)")
+LINE("libssp", "Buffer Overflow Protection Library (libssp, \\-lssp)")
+LINE("libSystem", "System Library (libSystem, \\-lSystem)")
+LINE("libtermcap", "Termcap Access Library (libtermcap, \\-ltermcap)")
+LINE("libterminfo", "Terminal Information Library (libterminfo, \\-lterminfo)")
+LINE("libthr", "1:1 Threading Library (libthr, \\-lthr)")
+LINE("libufs", "UFS File System Access Library (libufs, \\-lufs)")
+LINE("libugidfw", "File System Firewall Interface Library (libugidfw, \\-lugidfw)")
+LINE("libulog", "User Login Record Library (libulog, \\-lulog)")
+LINE("libusbhid", "USB Human Interface Devices Library (libusbhid, \\-lusbhid)")
+LINE("libutil", "System Utilities Library (libutil, \\-lutil)")
+LINE("libvgl", "Video Graphics Library (libvgl, \\-lvgl)")
+LINE("libx86_64", "x86_64 Architecture Library (libx86_64, \\-lx86_64)")
+LINE("libz", "Compression Library (libz, \\-lz)")
diff --git a/libman.h b/libman.h
new file mode 100644
index 000000000000..4bc5128204f6
--- /dev/null
+++ b/libman.h
@@ -0,0 +1,85 @@
+/* $Id: libman.h,v 1.55 2011/11/07 01:24:40 schwarze Exp $ */
+/*
+ * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifndef LIBMAN_H
+#define LIBMAN_H
+
+enum man_next {
+ MAN_NEXT_SIBLING = 0,
+ MAN_NEXT_CHILD
+};
+
+struct man {
+ struct mparse *parse; /* parse pointer */
+ int flags; /* parse flags */
+#define MAN_HALT (1 << 0) /* badness happened: die */
+#define MAN_ELINE (1 << 1) /* Next-line element scope. */
+#define MAN_BLINE (1 << 2) /* Next-line block scope. */
+#define MAN_ILINE (1 << 3) /* Ignored in next-line scope. */
+#define MAN_LITERAL (1 << 4) /* Literal input. */
+#define MAN_BPLINE (1 << 5)
+#define MAN_NEWLINE (1 << 6) /* first macro/text in a line */
+ enum man_next next; /* where to put the next node */
+ struct man_node *last; /* the last parsed node */
+ struct man_node *first; /* the first parsed node */
+ struct man_meta meta; /* document meta-data */
+ struct roff *roff;
+};
+
+#define MACRO_PROT_ARGS struct man *m, \
+ enum mant tok, \
+ int line, \
+ int ppos, \
+ int *pos, \
+ char *buf
+
+struct man_macro {
+ int (*fp)(MACRO_PROT_ARGS);
+ int flags;
+#define MAN_SCOPED (1 << 0)
+#define MAN_EXPLICIT (1 << 1) /* See blk_imp(). */
+#define MAN_FSCOPED (1 << 2) /* See blk_imp(). */
+#define MAN_NSCOPED (1 << 3) /* See in_line_eoln(). */
+#define MAN_NOCLOSE (1 << 4) /* See blk_exp(). */
+#define MAN_BSCOPE (1 << 5) /* Break BLINE scope. */
+};
+
+extern const struct man_macro *const man_macros;
+
+__BEGIN_DECLS
+
+#define man_pmsg(m, l, p, t) \
+ mandoc_msg((t), (m)->parse, (l), (p), NULL)
+#define man_nmsg(m, n, t) \
+ mandoc_msg((t), (m)->parse, (n)->line, (n)->pos, NULL)
+int man_word_alloc(struct man *, int, int, const char *);
+int man_block_alloc(struct man *, int, int, enum mant);
+int man_head_alloc(struct man *, int, int, enum mant);
+int man_tail_alloc(struct man *, int, int, enum mant);
+int man_body_alloc(struct man *, int, int, enum mant);
+int man_elem_alloc(struct man *, int, int, enum mant);
+void man_node_delete(struct man *, struct man_node *);
+void man_hash_init(void);
+enum mant man_hash_find(const char *);
+int man_macroend(struct man *);
+int man_valid_post(struct man *);
+int man_valid_pre(struct man *, struct man_node *);
+int man_unscope(struct man *,
+ const struct man_node *, enum mandocerr);
+
+__END_DECLS
+
+#endif /*!LIBMAN_H*/
diff --git a/libmandoc.h b/libmandoc.h
new file mode 100644
index 000000000000..de422884a214
--- /dev/null
+++ b/libmandoc.h
@@ -0,0 +1,92 @@
+/* $Id: libmandoc.h,v 1.29 2011/12/02 01:37:14 schwarze Exp $ */
+/*
+ * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifndef LIBMANDOC_H
+#define LIBMANDOC_H
+
+enum rofferr {
+ ROFF_CONT, /* continue processing line */
+ ROFF_RERUN, /* re-run roff interpreter with offset */
+ ROFF_APPEND, /* re-run main parser, appending next line */
+ ROFF_REPARSE, /* re-run main parser on the result */
+ ROFF_SO, /* include another file */
+ ROFF_IGN, /* ignore current line */
+ ROFF_TBL, /* a table row was successfully parsed */
+ ROFF_EQN, /* an equation was successfully parsed */
+ ROFF_ERR /* badness: puke and stop */
+};
+
+enum regs {
+ REG_nS = 0, /* nS register */
+ REG__MAX
+};
+
+__BEGIN_DECLS
+
+struct roff;
+struct mdoc;
+struct man;
+
+void mandoc_msg(enum mandocerr, struct mparse *,
+ int, int, const char *);
+void mandoc_vmsg(enum mandocerr, struct mparse *,
+ int, int, const char *, ...);
+char *mandoc_getarg(struct mparse *, char **, int, int *);
+char *mandoc_normdate(struct mparse *, char *, int, int);
+int mandoc_eos(const char *, size_t, int);
+int mandoc_getcontrol(const char *, int *);
+int mandoc_strntoi(const char *, size_t, int);
+const char *mandoc_a2msec(const char*);
+
+void mdoc_free(struct mdoc *);
+struct mdoc *mdoc_alloc(struct roff *, struct mparse *);
+void mdoc_reset(struct mdoc *);
+int mdoc_parseln(struct mdoc *, int, char *, int);
+int mdoc_endparse(struct mdoc *);
+int mdoc_addspan(struct mdoc *, const struct tbl_span *);
+int mdoc_addeqn(struct mdoc *, const struct eqn *);
+
+void man_free(struct man *);
+struct man *man_alloc(struct roff *, struct mparse *);
+void man_reset(struct man *);
+int man_parseln(struct man *, int, char *, int);
+int man_endparse(struct man *);
+int man_addspan(struct man *, const struct tbl_span *);
+int man_addeqn(struct man *, const struct eqn *);
+
+void roff_free(struct roff *);
+struct roff *roff_alloc(struct mparse *);
+void roff_reset(struct roff *);
+enum rofferr roff_parseln(struct roff *, int,
+ char **, size_t *, int, int *);
+void roff_endparse(struct roff *);
+int roff_regisset(const struct roff *, enum regs);
+unsigned int roff_regget(const struct roff *, enum regs);
+void roff_regunset(struct roff *, enum regs);
+char *roff_strdup(const struct roff *, const char *);
+#if 0
+char roff_eqndelim(const struct roff *);
+void roff_openeqn(struct roff *, const char *,
+ int, int, const char *);
+int roff_closeeqn(struct roff *);
+#endif
+
+const struct tbl_span *roff_span(const struct roff *);
+const struct eqn *roff_eqn(const struct roff *);
+
+__END_DECLS
+
+#endif /*!LIBMANDOC_H*/
diff --git a/libmdoc.h b/libmdoc.h
new file mode 100644
index 000000000000..af1729268a44
--- /dev/null
+++ b/libmdoc.h
@@ -0,0 +1,141 @@
+/* $Id: libmdoc.h,v 1.78 2011/12/02 01:37:14 schwarze Exp $ */
+/*
+ * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifndef LIBMDOC_H
+#define LIBMDOC_H
+
+enum mdoc_next {
+ MDOC_NEXT_SIBLING = 0,
+ MDOC_NEXT_CHILD
+};
+
+struct mdoc {
+ struct mparse *parse; /* parse pointer */
+ int flags; /* parse flags */
+#define MDOC_HALT (1 << 0) /* error in parse: halt */
+#define MDOC_LITERAL (1 << 1) /* in a literal scope */
+#define MDOC_PBODY (1 << 2) /* in the document body */
+#define MDOC_NEWLINE (1 << 3) /* first macro/text in a line */
+#define MDOC_PHRASELIT (1 << 4) /* literal within a partila phrase */
+#define MDOC_PPHRASE (1 << 5) /* within a partial phrase */
+#define MDOC_FREECOL (1 << 6) /* `It' invocation should close */
+#define MDOC_SYNOPSIS (1 << 7) /* SYNOPSIS-style formatting */
+ enum mdoc_next next; /* where to put the next node */
+ struct mdoc_node *last; /* the last node parsed */
+ struct mdoc_node *first; /* the first node parsed */
+ struct mdoc_meta meta; /* document meta-data */
+ enum mdoc_sec lastnamed;
+ enum mdoc_sec lastsec;
+ struct roff *roff;
+};
+
+#define MACRO_PROT_ARGS struct mdoc *m, \
+ enum mdoct tok, \
+ int line, \
+ int ppos, \
+ int *pos, \
+ char *buf
+
+struct mdoc_macro {
+ int (*fp)(MACRO_PROT_ARGS);
+ int flags;
+#define MDOC_CALLABLE (1 << 0)
+#define MDOC_PARSED (1 << 1)
+#define MDOC_EXPLICIT (1 << 2)
+#define MDOC_PROLOGUE (1 << 3)
+#define MDOC_IGNDELIM (1 << 4)
+ /* Reserved words in arguments treated as text. */
+};
+
+enum margserr {
+ ARGS_ERROR,
+ ARGS_EOLN, /* end-of-line */
+ ARGS_WORD, /* normal word */
+ ARGS_PUNCT, /* series of punctuation */
+ ARGS_QWORD, /* quoted word */
+ ARGS_PHRASE, /* Ta'd phrase (-column) */
+ ARGS_PPHRASE, /* tabbed phrase (-column) */
+ ARGS_PEND /* last phrase (-column) */
+};
+
+enum margverr {
+ ARGV_ERROR,
+ ARGV_EOLN, /* end of line */
+ ARGV_ARG, /* valid argument */
+ ARGV_WORD /* normal word (or bad argument---same thing) */
+};
+
+/*
+ * A punctuation delimiter is opening, closing, or "middle mark"
+ * punctuation. These govern spacing.
+ * Opening punctuation (e.g., the opening parenthesis) suppresses the
+ * following space; closing punctuation (e.g., the closing parenthesis)
+ * suppresses the leading space; middle punctuation (e.g., the vertical
+ * bar) can do either. The middle punctuation delimiter bends the rules
+ * depending on usage.
+ */
+enum mdelim {
+ DELIM_NONE = 0,
+ DELIM_OPEN,
+ DELIM_MIDDLE,
+ DELIM_CLOSE,
+ DELIM_MAX
+};
+
+extern const struct mdoc_macro *const mdoc_macros;
+
+__BEGIN_DECLS
+
+#define mdoc_pmsg(m, l, p, t) \
+ mandoc_msg((t), (m)->parse, (l), (p), NULL)
+#define mdoc_nmsg(m, n, t) \
+ mandoc_msg((t), (m)->parse, (n)->line, (n)->pos, NULL)
+int mdoc_macro(MACRO_PROT_ARGS);
+int mdoc_word_alloc(struct mdoc *,
+ int, int, const char *);
+int mdoc_elem_alloc(struct mdoc *, int, int,
+ enum mdoct, struct mdoc_arg *);
+int mdoc_block_alloc(struct mdoc *, int, int,
+ enum mdoct, struct mdoc_arg *);
+int mdoc_head_alloc(struct mdoc *, int, int, enum mdoct);
+int mdoc_tail_alloc(struct mdoc *, int, int, enum mdoct);
+int mdoc_body_alloc(struct mdoc *, int, int, enum mdoct);
+int mdoc_endbody_alloc(struct mdoc *m, int line, int pos,
+ enum mdoct tok, struct mdoc_node *body,
+ enum mdoc_endbody end);
+void mdoc_node_delete(struct mdoc *, struct mdoc_node *);
+void mdoc_hash_init(void);
+enum mdoct mdoc_hash_find(const char *);
+const char *mdoc_a2att(const char *);
+const char *mdoc_a2lib(const char *);
+const char *mdoc_a2st(const char *);
+const char *mdoc_a2arch(const char *);
+const char *mdoc_a2vol(const char *);
+int mdoc_valid_pre(struct mdoc *, struct mdoc_node *);
+int mdoc_valid_post(struct mdoc *);
+enum margverr mdoc_argv(struct mdoc *, int, enum mdoct,
+ struct mdoc_arg **, int *, char *);
+void mdoc_argv_free(struct mdoc_arg *);
+enum margserr mdoc_args(struct mdoc *, int,
+ int *, char *, enum mdoct, char **);
+enum margserr mdoc_zargs(struct mdoc *, int,
+ int *, char *, char **);
+int mdoc_macroend(struct mdoc *);
+enum mdelim mdoc_isdelim(const char *);
+
+__END_DECLS
+
+#endif /*!LIBMDOC_H*/
diff --git a/libroff.h b/libroff.h
new file mode 100644
index 000000000000..0bdd5a360478
--- /dev/null
+++ b/libroff.h
@@ -0,0 +1,84 @@
+/* $Id: libroff.h,v 1.27 2011/07/25 15:37:00 kristaps Exp $ */
+/*
+ * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifndef LIBROFF_H
+#define LIBROFF_H
+
+__BEGIN_DECLS
+
+enum tbl_part {
+ TBL_PART_OPTS, /* in options (first line) */
+ TBL_PART_LAYOUT, /* describing layout */
+ TBL_PART_DATA, /* creating data rows */
+ TBL_PART_CDATA /* continue previous row */
+};
+
+struct tbl_node {
+ struct mparse *parse; /* parse point */
+ int pos; /* invocation column */
+ int line; /* invocation line */
+ enum tbl_part part;
+ struct tbl opts;
+ struct tbl_row *first_row;
+ struct tbl_row *last_row;
+ struct tbl_span *first_span;
+ struct tbl_span *current_span;
+ struct tbl_span *last_span;
+ struct tbl_head *first_head;
+ struct tbl_head *last_head;
+ struct tbl_node *next;
+};
+
+struct eqn_node {
+ struct eqn_def *defs;
+ size_t defsz;
+ char *data;
+ size_t rew;
+ size_t cur;
+ size_t sz;
+ int gsize;
+ struct eqn eqn;
+ struct mparse *parse;
+ struct eqn_node *next;
+};
+
+struct eqn_def {
+ char *key;
+ size_t keysz;
+ char *val;
+ size_t valsz;
+};
+
+struct tbl_node *tbl_alloc(int, int, struct mparse *);
+void tbl_restart(int, int, struct tbl_node *);
+void tbl_free(struct tbl_node *);
+void tbl_reset(struct tbl_node *);
+enum rofferr tbl_read(struct tbl_node *, int, const char *, int);
+int tbl_option(struct tbl_node *, int, const char *);
+int tbl_layout(struct tbl_node *, int, const char *);
+int tbl_data(struct tbl_node *, int, const char *);
+int tbl_cdata(struct tbl_node *, int, const char *);
+const struct tbl_span *tbl_span(struct tbl_node *);
+void tbl_end(struct tbl_node **);
+struct eqn_node *eqn_alloc(const char *, int, int, struct mparse *);
+enum rofferr eqn_end(struct eqn_node **);
+void eqn_free(struct eqn_node *);
+enum rofferr eqn_read(struct eqn_node **, int,
+ const char *, int, int *);
+
+__END_DECLS
+
+#endif /*LIBROFF_H*/
diff --git a/main.c b/main.c
new file mode 100644
index 000000000000..fec83fba513a
--- /dev/null
+++ b/main.c
@@ -0,0 +1,401 @@
+/* $Id: main.c,v 1.165 2011/10/06 22:29:12 kristaps Exp $ */
+/*
+ * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "mandoc.h"
+#include "main.h"
+#include "mdoc.h"
+#include "man.h"
+
+#if !defined(__GNUC__) || (__GNUC__ < 2)
+# if !defined(lint)
+# define __attribute__(x)
+# endif
+#endif /* !defined(__GNUC__) || (__GNUC__ < 2) */
+
+typedef void (*out_mdoc)(void *, const struct mdoc *);
+typedef void (*out_man)(void *, const struct man *);
+typedef void (*out_free)(void *);
+
+enum outt {
+ OUTT_ASCII = 0, /* -Tascii */
+ OUTT_LOCALE, /* -Tlocale */
+ OUTT_UTF8, /* -Tutf8 */
+ OUTT_TREE, /* -Ttree */
+ OUTT_MAN, /* -Tman */
+ OUTT_HTML, /* -Thtml */
+ OUTT_XHTML, /* -Txhtml */
+ OUTT_LINT, /* -Tlint */
+ OUTT_PS, /* -Tps */
+ OUTT_PDF /* -Tpdf */
+};
+
+struct curparse {
+ struct mparse *mp;
+ enum mandoclevel wlevel; /* ignore messages below this */
+ int wstop; /* stop after a file with a warning */
+ enum outt outtype; /* which output to use */
+ out_mdoc outmdoc; /* mdoc output ptr */
+ out_man outman; /* man output ptr */
+ out_free outfree; /* free output ptr */
+ void *outdata; /* data for output */
+ char outopts[BUFSIZ]; /* buf of output opts */
+};
+
+static int moptions(enum mparset *, char *);
+static void mmsg(enum mandocerr, enum mandoclevel,
+ const char *, int, int, const char *);
+static void parse(struct curparse *, int,
+ const char *, enum mandoclevel *);
+static int toptions(struct curparse *, char *);
+static void usage(void) __attribute__((noreturn));
+static void version(void) __attribute__((noreturn));
+static int woptions(struct curparse *, char *);
+
+static const char *progname;
+
+int
+main(int argc, char *argv[])
+{
+ int c;
+ struct curparse curp;
+ enum mparset type;
+ enum mandoclevel rc;
+
+ progname = strrchr(argv[0], '/');
+ if (progname == NULL)
+ progname = argv[0];
+ else
+ ++progname;
+
+ memset(&curp, 0, sizeof(struct curparse));
+
+ type = MPARSE_AUTO;
+ curp.outtype = OUTT_ASCII;
+ curp.wlevel = MANDOCLEVEL_FATAL;
+
+ /* LINTED */
+ while (-1 != (c = getopt(argc, argv, "m:O:T:VW:")))
+ switch (c) {
+ case ('m'):
+ if ( ! moptions(&type, optarg))
+ return((int)MANDOCLEVEL_BADARG);
+ break;
+ case ('O'):
+ (void)strlcat(curp.outopts, optarg, BUFSIZ);
+ (void)strlcat(curp.outopts, ",", BUFSIZ);
+ break;
+ case ('T'):
+ if ( ! toptions(&curp, optarg))
+ return((int)MANDOCLEVEL_BADARG);
+ break;
+ case ('W'):
+ if ( ! woptions(&curp, optarg))
+ return((int)MANDOCLEVEL_BADARG);
+ break;
+ case ('V'):
+ version();
+ /* NOTREACHED */
+ default:
+ usage();
+ /* NOTREACHED */
+ }
+
+ curp.mp = mparse_alloc(type, curp.wlevel, mmsg, &curp);
+
+ /*
+ * Conditionally start up the lookaside buffer before parsing.
+ */
+ if (OUTT_MAN == curp.outtype)
+ mparse_keep(curp.mp);
+
+ argc -= optind;
+ argv += optind;
+
+ rc = MANDOCLEVEL_OK;
+
+ if (NULL == *argv)
+ parse(&curp, STDIN_FILENO, "<stdin>", &rc);
+
+ while (*argv) {
+ parse(&curp, -1, *argv, &rc);
+ if (MANDOCLEVEL_OK != rc && curp.wstop)
+ break;
+ ++argv;
+ }
+
+ if (curp.outfree)
+ (*curp.outfree)(curp.outdata);
+ if (curp.mp)
+ mparse_free(curp.mp);
+
+ return((int)rc);
+}
+
+static void
+version(void)
+{
+
+ printf("%s %s\n", progname, VERSION);
+ exit((int)MANDOCLEVEL_OK);
+}
+
+static void
+usage(void)
+{
+
+ fprintf(stderr, "usage: %s "
+ "[-V] "
+ "[-foption] "
+ "[-mformat] "
+ "[-Ooption] "
+ "[-Toutput] "
+ "[-Wlevel] "
+ "[file...]\n",
+ progname);
+
+ exit((int)MANDOCLEVEL_BADARG);
+}
+
+static void
+parse(struct curparse *curp, int fd,
+ const char *file, enum mandoclevel *level)
+{
+ enum mandoclevel rc;
+ struct mdoc *mdoc;
+ struct man *man;
+
+ /* Begin by parsing the file itself. */
+
+ assert(file);
+ assert(fd >= -1);
+
+ rc = mparse_readfd(curp->mp, fd, file);
+
+ /* Stop immediately if the parse has failed. */
+
+ if (MANDOCLEVEL_FATAL <= rc)
+ goto cleanup;
+
+ /*
+ * With -Wstop and warnings or errors of at least the requested
+ * level, do not produce output.
+ */
+
+ if (MANDOCLEVEL_OK != rc && curp->wstop)
+ goto cleanup;
+
+ /* If unset, allocate output dev now (if applicable). */
+
+ if ( ! (curp->outman && curp->outmdoc)) {
+ switch (curp->outtype) {
+ case (OUTT_XHTML):
+ curp->outdata = xhtml_alloc(curp->outopts);
+ curp->outfree = html_free;
+ break;
+ case (OUTT_HTML):
+ curp->outdata = html_alloc(curp->outopts);
+ curp->outfree = html_free;
+ break;
+ case (OUTT_UTF8):
+ curp->outdata = utf8_alloc(curp->outopts);
+ curp->outfree = ascii_free;
+ break;
+ case (OUTT_LOCALE):
+ curp->outdata = locale_alloc(curp->outopts);
+ curp->outfree = ascii_free;
+ break;
+ case (OUTT_ASCII):
+ curp->outdata = ascii_alloc(curp->outopts);
+ curp->outfree = ascii_free;
+ break;
+ case (OUTT_PDF):
+ curp->outdata = pdf_alloc(curp->outopts);
+ curp->outfree = pspdf_free;
+ break;
+ case (OUTT_PS):
+ curp->outdata = ps_alloc(curp->outopts);
+ curp->outfree = pspdf_free;
+ break;
+ default:
+ break;
+ }
+
+ switch (curp->outtype) {
+ case (OUTT_HTML):
+ /* FALLTHROUGH */
+ case (OUTT_XHTML):
+ curp->outman = html_man;
+ curp->outmdoc = html_mdoc;
+ break;
+ case (OUTT_TREE):
+ curp->outman = tree_man;
+ curp->outmdoc = tree_mdoc;
+ break;
+ case (OUTT_MAN):
+ curp->outmdoc = man_mdoc;
+ curp->outman = man_man;
+ break;
+ case (OUTT_PDF):
+ /* FALLTHROUGH */
+ case (OUTT_ASCII):
+ /* FALLTHROUGH */
+ case (OUTT_UTF8):
+ /* FALLTHROUGH */
+ case (OUTT_LOCALE):
+ /* FALLTHROUGH */
+ case (OUTT_PS):
+ curp->outman = terminal_man;
+ curp->outmdoc = terminal_mdoc;
+ break;
+ default:
+ break;
+ }
+ }
+
+ mparse_result(curp->mp, &mdoc, &man);
+
+ /* Execute the out device, if it exists. */
+
+ if (man && curp->outman)
+ (*curp->outman)(curp->outdata, man);
+ if (mdoc && curp->outmdoc)
+ (*curp->outmdoc)(curp->outdata, mdoc);
+
+ cleanup:
+
+ mparse_reset(curp->mp);
+
+ if (*level < rc)
+ *level = rc;
+}
+
+static int
+moptions(enum mparset *tflags, char *arg)
+{
+
+ if (0 == strcmp(arg, "doc"))
+ *tflags = MPARSE_MDOC;
+ else if (0 == strcmp(arg, "andoc"))
+ *tflags = MPARSE_AUTO;
+ else if (0 == strcmp(arg, "an"))
+ *tflags = MPARSE_MAN;
+ else {
+ fprintf(stderr, "%s: Bad argument\n", arg);
+ return(0);
+ }
+
+ return(1);
+}
+
+static int
+toptions(struct curparse *curp, char *arg)
+{
+
+ if (0 == strcmp(arg, "ascii"))
+ curp->outtype = OUTT_ASCII;
+ else if (0 == strcmp(arg, "lint")) {
+ curp->outtype = OUTT_LINT;
+ curp->wlevel = MANDOCLEVEL_WARNING;
+ } else if (0 == strcmp(arg, "tree"))
+ curp->outtype = OUTT_TREE;
+ else if (0 == strcmp(arg, "man"))
+ curp->outtype = OUTT_MAN;
+ else if (0 == strcmp(arg, "html"))
+ curp->outtype = OUTT_HTML;
+ else if (0 == strcmp(arg, "utf8"))
+ curp->outtype = OUTT_UTF8;
+ else if (0 == strcmp(arg, "locale"))
+ curp->outtype = OUTT_LOCALE;
+ else if (0 == strcmp(arg, "xhtml"))
+ curp->outtype = OUTT_XHTML;
+ else if (0 == strcmp(arg, "ps"))
+ curp->outtype = OUTT_PS;
+ else if (0 == strcmp(arg, "pdf"))
+ curp->outtype = OUTT_PDF;
+ else {
+ fprintf(stderr, "%s: Bad argument\n", arg);
+ return(0);
+ }
+
+ return(1);
+}
+
+static int
+woptions(struct curparse *curp, char *arg)
+{
+ char *v, *o;
+ const char *toks[6];
+
+ toks[0] = "stop";
+ toks[1] = "all";
+ toks[2] = "warning";
+ toks[3] = "error";
+ toks[4] = "fatal";
+ toks[5] = NULL;
+
+ while (*arg) {
+ o = arg;
+ switch (getsubopt(&arg, UNCONST(toks), &v)) {
+ case (0):
+ curp->wstop = 1;
+ break;
+ case (1):
+ /* FALLTHROUGH */
+ case (2):
+ curp->wlevel = MANDOCLEVEL_WARNING;
+ break;
+ case (3):
+ curp->wlevel = MANDOCLEVEL_ERROR;
+ break;
+ case (4):
+ curp->wlevel = MANDOCLEVEL_FATAL;
+ break;
+ default:
+ fprintf(stderr, "-W%s: Bad argument\n", o);
+ return(0);
+ }
+ }
+
+ return(1);
+}
+
+static void
+mmsg(enum mandocerr t, enum mandoclevel lvl,
+ const char *file, int line, int col, const char *msg)
+{
+
+ fprintf(stderr, "%s:%d:%d: %s: %s",
+ file, line, col + 1,
+ mparse_strlevel(lvl),
+ mparse_strerror(t));
+
+ if (msg)
+ fprintf(stderr, ": %s", msg);
+
+ fputc('\n', stderr);
+}
diff --git a/main.h b/main.h
new file mode 100644
index 000000000000..79dcf489ae65
--- /dev/null
+++ b/main.h
@@ -0,0 +1,61 @@
+/* $Id: main.h,v 1.15 2011/10/06 22:29:12 kristaps Exp $ */
+/*
+ * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifndef MAIN_H
+#define MAIN_H
+
+__BEGIN_DECLS
+
+struct mdoc;
+struct man;
+
+#define UNCONST(a) ((void *)(uintptr_t)(const void *)(a))
+
+
+/*
+ * Definitions for main.c-visible output device functions, e.g., -Thtml
+ * and -Tascii. Note that ascii_alloc() is named as such in
+ * anticipation of latin1_alloc() and so on, all of which map into the
+ * terminal output routines with different character settings.
+ */
+
+void *html_alloc(char *);
+void *xhtml_alloc(char *);
+void html_mdoc(void *, const struct mdoc *);
+void html_man(void *, const struct man *);
+void html_free(void *);
+
+void tree_mdoc(void *, const struct mdoc *);
+void tree_man(void *, const struct man *);
+
+void man_mdoc(void *, const struct mdoc *);
+void man_man(void *, const struct man *);
+
+void *locale_alloc(char *);
+void *utf8_alloc(char *);
+void *ascii_alloc(char *);
+void ascii_free(void *);
+
+void *pdf_alloc(char *);
+void *ps_alloc(char *);
+void pspdf_free(void *);
+
+void terminal_mdoc(void *, const struct mdoc *);
+void terminal_man(void *, const struct man *);
+
+__END_DECLS
+
+#endif /*!MAIN_H*/
diff --git a/man-cgi.css b/man-cgi.css
new file mode 100644
index 000000000000..5300267cfbc1
--- /dev/null
+++ b/man-cgi.css
@@ -0,0 +1,13 @@
+body { font-family: Helvetica, Arial, sans-serif; }
+body > div { padding-left: 2em;
+ padding-top: 1em; }
+body > div#mancgi { padding-left: 0em;
+ padding-top: 0em; }
+body > div.results { font-size: smaller; }
+#mancgi fieldset { text-align: center;
+ border: thin solid silver;
+ border-radius: 1em;
+ font-size: small; }
+#mancgi input[name=expr] { width: 25%; }
+.results td.title { vertical-align: top;
+ padding-right: 1em; }
diff --git a/man.7 b/man.7
new file mode 100644
index 000000000000..1715a7ca119b
--- /dev/null
+++ b/man.7
@@ -0,0 +1,913 @@
+.\" $Id: man.7,v 1.113 2012/01/03 15:16:24 kristaps Exp $
+.\"
+.\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+.\" Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
+.\"
+.\" Permission to use, copy, modify, and distribute this software for any
+.\" purpose with or without fee is hereby granted, provided that the above
+.\" copyright notice and this permission notice appear in all copies.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+.\"
+.Dd $Mdocdate: January 3 2012 $
+.Dt MAN 7
+.Os
+.Sh NAME
+.Nm man
+.Nd legacy formatting language for manual pages
+.Sh DESCRIPTION
+Traditionally, the
+.Nm man
+language has been used to write
+.Ux
+manuals for the
+.Xr man 1
+utility.
+It supports limited control of presentational details like fonts,
+indentation and spacing.
+This reference document describes the structure of manual pages
+and the syntax and usage of the man language.
+.Pp
+.Bf -emphasis
+Do not use
+.Nm
+to write your manuals:
+.Ef
+It lacks support for semantic markup.
+Use the
+.Xr mdoc 7
+language, instead.
+.Pp
+In a
+.Nm
+document, lines beginning with the control character
+.Sq \&.
+are called
+.Dq macro lines .
+The first word is the macro name.
+It usually consists of two capital letters.
+For a list of available macros, see
+.Sx MACRO OVERVIEW .
+The words following the macro name are arguments to the macro.
+.Pp
+Lines not beginning with the control character are called
+.Dq text lines .
+They provide free-form text to be printed; the formatting of the text
+depends on the respective processing context:
+.Bd -literal -offset indent
+\&.SH Macro lines change control state.
+Text lines are interpreted within the current state.
+.Ed
+.Pp
+Many aspects of the basic syntax of the
+.Nm
+language are based on the
+.Xr roff 7
+language; see the
+.Em LANGUAGE SYNTAX
+and
+.Em MACRO SYNTAX
+sections in the
+.Xr roff 7
+manual for details, in particular regarding
+comments, escape sequences, whitespace, and quoting.
+.Sh MANUAL STRUCTURE
+Each
+.Nm
+document must contain the
+.Sx \&TH
+macro describing the document's section and title.
+It may occur anywhere in the document, although conventionally it
+appears as the first macro.
+.Pp
+Beyond
+.Sx \&TH ,
+at least one macro or text line must appear in the document.
+.Pp
+The following is a well-formed skeleton
+.Nm
+file for a utility
+.Qq progname :
+.Bd -literal -offset indent
+\&.TH PROGNAME 1 2009-10-10
+\&.SH NAME
+\efBprogname\efR \e(en a description goes here
+\&.\e\(dq .SH LIBRARY
+\&.\e\(dq For sections 2 & 3 only.
+\&.\e\(dq Not used in OpenBSD.
+\&.SH SYNOPSIS
+\efBprogname\efR [\efB\e-options\efR] arguments...
+\&.SH DESCRIPTION
+The \efBfoo\efR utility processes files...
+\&.\e\(dq .SH IMPLEMENTATION NOTES
+\&.\e\(dq Not used in OpenBSD.
+\&.\e\(dq .SH RETURN VALUES
+\&.\e\(dq For sections 2, 3, & 9 only.
+\&.\e\(dq .SH ENVIRONMENT
+\&.\e\(dq For sections 1, 6, 7, & 8 only.
+\&.\e\(dq .SH FILES
+\&.\e\(dq .SH EXIT STATUS
+\&.\e\(dq For sections 1, 6, & 8 only.
+\&.\e\(dq .SH EXAMPLES
+\&.\e\(dq .SH DIAGNOSTICS
+\&.\e\(dq For sections 1, 4, 6, 7, & 8 only.
+\&.\e\(dq .SH ERRORS
+\&.\e\(dq For sections 2, 3, & 9 only.
+\&.\e\(dq .SH SEE ALSO
+\&.\e\(dq .BR foo ( 1 )
+\&.\e\(dq .SH STANDARDS
+\&.\e\(dq .SH HISTORY
+\&.\e\(dq .SH AUTHORS
+\&.\e\(dq .SH CAVEATS
+\&.\e\(dq .SH BUGS
+\&.\e\(dq .SH SECURITY CONSIDERATIONS
+\&.\e\(dq Not used in OpenBSD.
+.Ed
+.Pp
+The sections in a
+.Nm
+document are conventionally ordered as they appear above.
+Sections should be composed as follows:
+.Bl -ohang -offset indent
+.It Em NAME
+The name(s) and a short description of the documented material.
+The syntax for this is generally as follows:
+.Pp
+.D1 \efBname\efR \e(en description
+.It Em LIBRARY
+The name of the library containing the documented material, which is
+assumed to be a function in a section 2 or 3 manual.
+For functions in the C library, this may be as follows:
+.Pp
+.D1 Standard C Library (libc, -lc)
+.It Em SYNOPSIS
+Documents the utility invocation syntax, function call syntax, or device
+configuration.
+.Pp
+For the first, utilities (sections 1, 6, and 8), this is
+generally structured as follows:
+.Pp
+.D1 \efBname\efR [-\efBab\efR] [-\efBc\efR\efIarg\efR] \efBpath\efR...
+.Pp
+For the second, function calls (sections 2, 3, 9):
+.Pp
+.D1 \&.B char *name(char *\efIarg\efR);
+.Pp
+And for the third, configurations (section 4):
+.Pp
+.D1 \&.B name* at cardbus ? function ?
+.Pp
+Manuals not in these sections generally don't need a
+.Em SYNOPSIS .
+.It Em DESCRIPTION
+This expands upon the brief, one-line description in
+.Em NAME .
+It usually contains a break-down of the options (if documenting a
+command).
+.It Em IMPLEMENTATION NOTES
+Implementation-specific notes should be kept here.
+This is useful when implementing standard functions that may have side
+effects or notable algorithmic implications.
+.It Em RETURN VALUES
+This section documents the return values of functions in sections 2, 3, and 9.
+.It Em ENVIRONMENT
+Documents any usages of environment variables, e.g.,
+.Xr environ 7 .
+.It Em FILES
+Documents files used.
+It's helpful to document both the file name and a short description of how
+the file is used (created, modified, etc.).
+.It Em EXIT STATUS
+This section documents the command exit status for
+section 1, 6, and 8 utilities.
+Historically, this information was described in
+.Em DIAGNOSTICS ,
+a practise that is now discouraged.
+.It Em EXAMPLES
+Example usages.
+This often contains snippets of well-formed,
+well-tested invocations.
+Make sure that examples work properly!
+.It Em DIAGNOSTICS
+Documents error conditions.
+This is most useful in section 4 manuals.
+Historically, this section was used in place of
+.Em EXIT STATUS
+for manuals in sections 1, 6, and 8; however, this practise is
+discouraged.
+.It Em ERRORS
+Documents error handling in sections 2, 3, and 9.
+.It Em SEE ALSO
+References other manuals with related topics.
+This section should exist for most manuals.
+.Pp
+.D1 \&.BR bar \&( 1 \&),
+.Pp
+Cross-references should conventionally be ordered
+first by section, then alphabetically.
+.It Em STANDARDS
+References any standards implemented or used, such as
+.Pp
+.D1 IEEE Std 1003.2 (\e(lqPOSIX.2\e(rq)
+.Pp
+If not adhering to any standards, the
+.Em HISTORY
+section should be used.
+.It Em HISTORY
+A brief history of the subject, including where support first appeared.
+.It Em AUTHORS
+Credits to the person or persons who wrote the code and/or documentation.
+Authors should generally be noted by both name and email address.
+.It Em CAVEATS
+Common misuses and misunderstandings should be explained
+in this section.
+.It Em BUGS
+Known bugs, limitations, and work-arounds should be described
+in this section.
+.It Em SECURITY CONSIDERATIONS
+Documents any security precautions that operators should consider.
+.El
+.Sh MACRO OVERVIEW
+This overview is sorted such that macros of similar purpose are listed
+together, to help find the best macro for any given purpose.
+Deprecated macros are not included in the overview, but can be found
+in the alphabetical reference below.
+.Ss Page header and footer meta-data
+.Bl -column "PP, LP, P" description
+.It Sx TH Ta set the title: Ar title section date Op Ar source Op Ar volume
+.It Sx AT Ta display AT&T UNIX version in the page footer (<= 1 argument)
+.It Sx UC Ta display BSD version in the page footer (<= 1 argument)
+.El
+.Ss Sections and paragraphs
+.Bl -column "PP, LP, P" description
+.It Sx SH Ta section header (one line)
+.It Sx SS Ta subsection header (one line)
+.It Sx PP , LP , P Ta start an undecorated paragraph (no arguments)
+.It Sx RS , RE Ta reset the left margin: Op Ar width
+.It Sx IP Ta indented paragraph: Op Ar head Op Ar width
+.It Sx TP Ta tagged paragraph: Op Ar width
+.It Sx HP Ta hanged paragraph: Op Ar width
+.It Sx \&br Ta force output line break in text mode (no arguments)
+.It Sx \&sp Ta force vertical space: Op Ar height
+.It Sx fi , nf Ta fill mode and no-fill mode (no arguments)
+.It Sx in Ta additional indent: Op Ar width
+.El
+.Ss Physical markup
+.Bl -column "PP, LP, P" description
+.It Sx B Ta boldface font
+.It Sx I Ta italic font
+.It Sx R Ta roman (default) font
+.It Sx SB Ta small boldface font
+.It Sx SM Ta small roman font
+.It Sx BI Ta alternate between boldface and italic fonts
+.It Sx BR Ta alternate between boldface and roman fonts
+.It Sx IB Ta alternate between italic and boldface fonts
+.It Sx IR Ta alternate between italic and roman fonts
+.It Sx RB Ta alternate between roman and boldface fonts
+.It Sx RI Ta alternate between roman and italic fonts
+.El
+.Ss Semantic markup
+.Bl -column "PP, LP, P" description
+.It Sx OP Ta optional arguments
+.El
+.Sh MACRO REFERENCE
+This section is a canonical reference to all macros, arranged
+alphabetically.
+For the scoping of individual macros, see
+.Sx MACRO SYNTAX .
+.Ss \&AT
+Sets the volume for the footer for compatibility with man pages from
+.Tn AT&T UNIX
+releases.
+The optional arguments specify which release it is from.
+.Ss \&B
+Text is rendered in bold face.
+.Pp
+See also
+.Sx \&I
+and
+.Sx \&R .
+.Ss \&BI
+Text is rendered alternately in bold face and italic.
+Thus,
+.Sq .BI this word and that
+causes
+.Sq this
+and
+.Sq and
+to render in bold face, while
+.Sq word
+and
+.Sq that
+render in italics.
+Whitespace between arguments is omitted in output.
+.Pp
+Examples:
+.Pp
+.Dl \&.BI bold italic bold italic
+.Pp
+The output of this example will be emboldened
+.Dq bold
+and italicised
+.Dq italic ,
+with spaces stripped between arguments.
+.Pp
+See also
+.Sx \&IB ,
+.Sx \&BR ,
+.Sx \&RB ,
+.Sx \&RI ,
+and
+.Sx \&IR .
+.Ss \&BR
+Text is rendered alternately in bold face and roman (the default font).
+Whitespace between arguments is omitted in output.
+.Pp
+See
+.Sx \&BI
+for an equivalent example.
+.Pp
+See also
+.Sx \&BI ,
+.Sx \&IB ,
+.Sx \&RB ,
+.Sx \&RI ,
+and
+.Sx \&IR .
+.Ss \&DT
+Has no effect.
+Included for compatibility.
+.Ss \&HP
+Begin a paragraph whose initial output line is left-justified, but
+subsequent output lines are indented, with the following syntax:
+.Bd -filled -offset indent
+.Pf \. Sx \&HP
+.Op Cm width
+.Ed
+.Pp
+The
+.Cm width
+argument must conform to
+.Sx Scaling Widths .
+If specified, it's saved for later paragraph left-margins; if unspecified, the
+saved or default width is used.
+.Pp
+See also
+.Sx \&IP ,
+.Sx \&LP ,
+.Sx \&P ,
+.Sx \&PP ,
+and
+.Sx \&TP .
+.Ss \&I
+Text is rendered in italics.
+.Pp
+See also
+.Sx \&B
+and
+.Sx \&R .
+.Ss \&IB
+Text is rendered alternately in italics and bold face.
+Whitespace between arguments is omitted in output.
+.Pp
+See
+.Sx \&BI
+for an equivalent example.
+.Pp
+See also
+.Sx \&BI ,
+.Sx \&BR ,
+.Sx \&RB ,
+.Sx \&RI ,
+and
+.Sx \&IR .
+.Ss \&IP
+Begin an indented paragraph with the following syntax:
+.Bd -filled -offset indent
+.Pf \. Sx \&IP
+.Op Cm head Op Cm width
+.Ed
+.Pp
+The
+.Cm width
+argument defines the width of the left margin and is defined by
+.Sx Scaling Widths .
+It's saved for later paragraph left-margins; if unspecified, the saved or
+default width is used.
+.Pp
+The
+.Cm head
+argument is used as a leading term, flushed to the left margin.
+This is useful for bulleted paragraphs and so on.
+.Pp
+See also
+.Sx \&HP ,
+.Sx \&LP ,
+.Sx \&P ,
+.Sx \&PP ,
+and
+.Sx \&TP .
+.Ss \&IR
+Text is rendered alternately in italics and roman (the default font).
+Whitespace between arguments is omitted in output.
+.Pp
+See
+.Sx \&BI
+for an equivalent example.
+.Pp
+See also
+.Sx \&BI ,
+.Sx \&IB ,
+.Sx \&BR ,
+.Sx \&RB ,
+and
+.Sx \&RI .
+.Ss \&LP
+Begin an undecorated paragraph.
+The scope of a paragraph is closed by a subsequent paragraph,
+sub-section, section, or end of file.
+The saved paragraph left-margin width is reset to the default.
+.Pp
+See also
+.Sx \&HP ,
+.Sx \&IP ,
+.Sx \&P ,
+.Sx \&PP ,
+and
+.Sx \&TP .
+.Ss \&OP
+Optional command-line argument.
+This has the following syntax:
+.Bd -filled -offset indent
+.Pf \. Sx \&OP
+.Cm key Op Cm value
+.Ed
+.Pp
+The
+.Cm key
+is usually a command-line flag and
+.Cm value
+its argument.
+.Ss \&P
+Synonym for
+.Sx \&LP .
+.Pp
+See also
+.Sx \&HP ,
+.Sx \&IP ,
+.Sx \&LP ,
+.Sx \&PP ,
+and
+.Sx \&TP .
+.Ss \&PP
+Synonym for
+.Sx \&LP .
+.Pp
+See also
+.Sx \&HP ,
+.Sx \&IP ,
+.Sx \&LP ,
+.Sx \&P ,
+and
+.Sx \&TP .
+.Ss \&R
+Text is rendered in roman (the default font).
+.Pp
+See also
+.Sx \&I
+and
+.Sx \&B .
+.Ss \&RB
+Text is rendered alternately in roman (the default font) and bold face.
+Whitespace between arguments is omitted in output.
+.Pp
+See
+.Sx \&BI
+for an equivalent example.
+.Pp
+See also
+.Sx \&BI ,
+.Sx \&IB ,
+.Sx \&BR ,
+.Sx \&RI ,
+and
+.Sx \&IR .
+.Ss \&RE
+Explicitly close out the scope of a prior
+.Sx \&RS .
+The default left margin is restored to the state of the original
+.Sx \&RS
+invocation.
+.Ss \&RI
+Text is rendered alternately in roman (the default font) and italics.
+Whitespace between arguments is omitted in output.
+.Pp
+See
+.Sx \&BI
+for an equivalent example.
+.Pp
+See also
+.Sx \&BI ,
+.Sx \&IB ,
+.Sx \&BR ,
+.Sx \&RB ,
+and
+.Sx \&IR .
+.Ss \&RS
+Temporarily reset the default left margin.
+This has the following syntax:
+.Bd -filled -offset indent
+.Pf \. Sx \&RS
+.Op Cm width
+.Ed
+.Pp
+The
+.Cm width
+argument must conform to
+.Sx Scaling Widths .
+If not specified, the saved or default width is used.
+.Pp
+See also
+.Sx \&RE .
+.Ss \&SB
+Text is rendered in small size (one point smaller than the default font)
+bold face.
+.Ss \&SH
+Begin a section.
+The scope of a section is only closed by another section or the end of
+file.
+The paragraph left-margin width is reset to the default.
+.Ss \&SM
+Text is rendered in small size (one point smaller than the default
+font).
+.Ss \&SS
+Begin a sub-section.
+The scope of a sub-section is closed by a subsequent sub-section,
+section, or end of file.
+The paragraph left-margin width is reset to the default.
+.Ss \&TH
+Sets the title of the manual page with the following syntax:
+.Bd -filled -offset indent
+.Pf \. Sx \&TH
+.Ar title section date
+.Op Ar source Op Ar volume
+.Ed
+.Pp
+Conventionally, the document
+.Ar title
+is given in all caps.
+The recommended
+.Ar date
+format is
+.Sy YYYY-MM-DD
+as specified in the ISO-8601 standard;
+if the argument does not conform, it is printed verbatim.
+If the
+.Ar date
+is empty or not specified, the current date is used.
+The optional
+.Ar source
+string specifies the organisation providing the utility.
+The
+.Ar volume
+string replaces the default rendered volume, which is dictated by the
+manual section.
+.Pp
+Examples:
+.Pp
+.Dl \&.TH CVS 5 "1992-02-12" GNU
+.Ss \&TP
+Begin a paragraph where the head, if exceeding the indentation width, is
+followed by a newline; if not, the body follows on the same line after a
+buffer to the indentation width.
+Subsequent output lines are indented.
+The syntax is as follows:
+.Bd -filled -offset indent
+.Pf \. Sx \&TP
+.Op Cm width
+.Ed
+.Pp
+The
+.Cm width
+argument must conform to
+.Sx Scaling Widths .
+If specified, it's saved for later paragraph left-margins; if
+unspecified, the saved or default width is used.
+.Pp
+See also
+.Sx \&HP ,
+.Sx \&IP ,
+.Sx \&LP ,
+.Sx \&P ,
+and
+.Sx \&PP .
+.Ss \&UC
+Sets the volume for the footer for compatibility with man pages from
+BSD releases.
+The optional first argument specifies which release it is from.
+.Ss \&br
+Breaks the current line.
+Consecutive invocations have no further effect.
+.Pp
+See also
+.Sx \&sp .
+.Ss \&fi
+End literal mode begun by
+.Sx \&nf .
+.Ss \&ft
+Change the current font mode.
+See
+.Sx Text Decoration
+for a listing of available font modes.
+.Ss \&in
+Indent relative to the current indentation:
+.Pp
+.D1 Pf \. Sx \&in Op Cm width
+.Pp
+If
+.Cm width
+is signed, the new offset is relative.
+Otherwise, it is absolute.
+This value is reset upon the next paragraph, section, or sub-section.
+.Ss \&na
+Don't align to the right margin.
+.Ss \&nf
+Begin literal mode: all subsequent free-form lines have their end of
+line boundaries preserved.
+May be ended by
+.Sx \&fi .
+Literal mode is implicitly ended by
+.Sx \&SH
+or
+.Sx \&SS .
+.Ss \&sp
+Insert vertical spaces into output with the following syntax:
+.Bd -filled -offset indent
+.Pf \. Sx \&sp
+.Op Cm height
+.Ed
+.Pp
+Insert
+.Cm height
+spaces, which must conform to
+.Sx Scaling Widths .
+If 0, this is equivalent to the
+.Sx \&br
+macro.
+Defaults to 1, if unspecified.
+.Pp
+See also
+.Sx \&br .
+.Sh MACRO SYNTAX
+The
+.Nm
+macros are classified by scope: line scope or block scope.
+Line macros are only scoped to the current line (and, in some
+situations, the subsequent line).
+Block macros are scoped to the current line and subsequent lines until
+closed by another block macro.
+.Ss Line Macros
+Line macros are generally scoped to the current line, with the body
+consisting of zero or more arguments.
+If a macro is scoped to the next line and the line arguments are empty,
+the next line, which must be text, is used instead.
+Thus:
+.Bd -literal -offset indent
+\&.I
+foo
+.Ed
+.Pp
+is equivalent to
+.Sq \&.I foo .
+If next-line macros are invoked consecutively, only the last is used.
+If a next-line macro is followed by a non-next-line macro, an error is
+raised, except for
+.Sx \&br ,
+.Sx \&sp ,
+and
+.Sx \&na .
+.Pp
+The syntax is as follows:
+.Bd -literal -offset indent
+\&.YO \(lBbody...\(rB
+\(lBbody...\(rB
+.Ed
+.Bl -column "MacroX" "ArgumentsX" "ScopeXXXXX" "CompatX" -offset indent
+.It Em Macro Ta Em Arguments Ta Em Scope Ta Em Notes
+.It Sx \&AT Ta <=1 Ta current Ta \&
+.It Sx \&B Ta n Ta next-line Ta \&
+.It Sx \&BI Ta n Ta current Ta \&
+.It Sx \&BR Ta n Ta current Ta \&
+.It Sx \&DT Ta 0 Ta current Ta \&
+.It Sx \&I Ta n Ta next-line Ta \&
+.It Sx \&IB Ta n Ta current Ta \&
+.It Sx \&IR Ta n Ta current Ta \&
+.It Sx \&OP Ta 0, 1 Ta current Ta compat
+.It Sx \&R Ta n Ta next-line Ta \&
+.It Sx \&RB Ta n Ta current Ta \&
+.It Sx \&RI Ta n Ta current Ta \&
+.It Sx \&SB Ta n Ta next-line Ta \&
+.It Sx \&SM Ta n Ta next-line Ta \&
+.It Sx \&TH Ta >1, <6 Ta current Ta \&
+.It Sx \&UC Ta <=1 Ta current Ta \&
+.It Sx \&br Ta 0 Ta current Ta compat
+.It Sx \&fi Ta 0 Ta current Ta compat
+.It Sx \&ft Ta 1 Ta current Ta compat
+.It Sx \&in Ta 1 Ta current Ta compat
+.It Sx \&na Ta 0 Ta current Ta compat
+.It Sx \&nf Ta 0 Ta current Ta compat
+.It Sx \&sp Ta 1 Ta current Ta compat
+.El
+.Pp
+Macros marked as
+.Qq compat
+are included for compatibility with the significant corpus of existing
+manuals that mix dialects of roff.
+These macros should not be used for portable
+.Nm
+manuals.
+.Ss Block Macros
+Block macros comprise a head and body.
+As with in-line macros, the head is scoped to the current line and, in
+one circumstance, the next line (the next-line stipulations as in
+.Sx Line Macros
+apply here as well).
+.Pp
+The syntax is as follows:
+.Bd -literal -offset indent
+\&.YO \(lBhead...\(rB
+\(lBhead...\(rB
+\(lBbody...\(rB
+.Ed
+.Pp
+The closure of body scope may be to the section, where a macro is closed
+by
+.Sx \&SH ;
+sub-section, closed by a section or
+.Sx \&SS ;
+part, closed by a section, sub-section, or
+.Sx \&RE ;
+or paragraph, closed by a section, sub-section, part,
+.Sx \&HP ,
+.Sx \&IP ,
+.Sx \&LP ,
+.Sx \&P ,
+.Sx \&PP ,
+or
+.Sx \&TP .
+No closure refers to an explicit block closing macro.
+.Pp
+As a rule, block macros may not be nested; thus, calling a block macro
+while another block macro scope is open, and the open scope is not
+implicitly closed, is syntactically incorrect.
+.Bl -column "MacroX" "ArgumentsX" "Head ScopeX" "sub-sectionX" "compatX" -offset indent
+.It Em Macro Ta Em Arguments Ta Em Head Scope Ta Em Body Scope Ta Em Notes
+.It Sx \&HP Ta <2 Ta current Ta paragraph Ta \&
+.It Sx \&IP Ta <3 Ta current Ta paragraph Ta \&
+.It Sx \&LP Ta 0 Ta current Ta paragraph Ta \&
+.It Sx \&P Ta 0 Ta current Ta paragraph Ta \&
+.It Sx \&PP Ta 0 Ta current Ta paragraph Ta \&
+.It Sx \&RE Ta 0 Ta current Ta none Ta compat
+.It Sx \&RS Ta 1 Ta current Ta part Ta compat
+.It Sx \&SH Ta >0 Ta next-line Ta section Ta \&
+.It Sx \&SS Ta >0 Ta next-line Ta sub-section Ta \&
+.It Sx \&TP Ta n Ta next-line Ta paragraph Ta \&
+.El
+.Pp
+Macros marked
+.Qq compat
+are as mentioned in
+.Sx Line Macros .
+.Pp
+If a block macro is next-line scoped, it may only be followed by in-line
+macros for decorating text.
+.Ss Font handling
+In
+.Nm
+documents, both
+.Sx Physical markup
+macros and
+.Xr roff 7
+.Ql \ef
+font escape sequences can be used to choose fonts.
+In text lines, the effect of manual font selection by escape sequences
+only lasts until the next macro invocation; in macro lines, it only lasts
+until the end of the macro scope.
+Note that macros like
+.Sx \&BR
+open and close a font scope for each argument.
+.Sh COMPATIBILITY
+This section documents areas of questionable portability between
+implementations of the
+.Nm
+language.
+.Pp
+.Bl -dash -compact
+.It
+Do not depend on
+.Sx \&SH
+or
+.Sx \&SS
+to close out a literal context opened with
+.Sx \&nf .
+This behaviour may not be portable.
+.It
+In quoted literals, GNU troff allowed pair-wise double-quotes to produce
+a standalone double-quote in formatted output.
+It is not known whether this behaviour is exhibited by other formatters.
+.It
+troff suppresses a newline before
+.Sq \(aq
+macro output; in mandoc, it is an alias for the standard
+.Sq \&.
+control character.
+.It
+The
+.Sq \eh
+.Pq horizontal position ,
+.Sq \ev
+.Pq vertical position ,
+.Sq \em
+.Pq text colour ,
+.Sq \eM
+.Pq text filling colour ,
+.Sq \ez
+.Pq zero-length character ,
+.Sq \ew
+.Pq string length ,
+.Sq \ek
+.Pq horizontal position marker ,
+.Sq \eo
+.Pq text overstrike ,
+and
+.Sq \es
+.Pq text size
+escape sequences are all discarded in mandoc.
+.It
+The
+.Sq \ef
+scaling unit is accepted by mandoc, but rendered as the default unit.
+.It
+The
+.Sx \&sp
+macro does not accept negative values in mandoc.
+In GNU troff, this would result in strange behaviour.
+.It
+In page header lines, GNU troff versions up to and including 1.21
+only print
+.Ar volume
+names explicitly specified in the
+.Sx \&TH
+macro; mandoc and newer groff print the default volume name
+corresponding to the
+.Ar section
+number when no
+.Ar volume
+is given, like in
+.Xr mdoc 7 .
+.El
+.Pp
+The
+.Sx OP
+macro is part of the extended
+.Nm
+macro set, and may not be portable to non-GNU troff implementations.
+.Sh SEE ALSO
+.Xr man 1 ,
+.Xr mandoc 1 ,
+.Xr eqn 7 ,
+.Xr mandoc_char 7 ,
+.Xr mdoc 7 ,
+.Xr roff 7 ,
+.Xr tbl 7
+.Sh HISTORY
+The
+.Nm
+language first appeared as a macro package for the roff typesetting
+system in
+.At v7 .
+It was later rewritten by James Clark as a macro package for groff.
+Eric S. Raymond wrote the extended
+.Nm
+macros for groff in 2007.
+The stand-alone implementation that is part of the
+.Xr mandoc 1
+utility written by Kristaps Dzonsons appeared in
+.Ox 4.6 .
+.Sh AUTHORS
+This
+.Nm
+reference was written by
+.An Kristaps Dzonsons ,
+.Mt kristaps@bsd.lv .
+.Sh CAVEATS
+Do not use this language.
+Use
+.Xr mdoc 7 ,
+instead.
diff --git a/man.c b/man.c
new file mode 100644
index 000000000000..1bea5610e3d7
--- /dev/null
+++ b/man.c
@@ -0,0 +1,690 @@
+/* $Id: man.c,v 1.115 2012/01/03 15:16:24 kristaps Exp $ */
+/*
+ * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/types.h>
+
+#include <assert.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "man.h"
+#include "mandoc.h"
+#include "libman.h"
+#include "libmandoc.h"
+
+const char *const __man_macronames[MAN_MAX] = {
+ "br", "TH", "SH", "SS",
+ "TP", "LP", "PP", "P",
+ "IP", "HP", "SM", "SB",
+ "BI", "IB", "BR", "RB",
+ "R", "B", "I", "IR",
+ "RI", "na", "sp", "nf",
+ "fi", "RE", "RS", "DT",
+ "UC", "PD", "AT", "in",
+ "ft", "OP"
+ };
+
+const char * const *man_macronames = __man_macronames;
+
+static struct man_node *man_node_alloc(struct man *, int, int,
+ enum man_type, enum mant);
+static int man_node_append(struct man *,
+ struct man_node *);
+static void man_node_free(struct man_node *);
+static void man_node_unlink(struct man *,
+ struct man_node *);
+static int man_ptext(struct man *, int, char *, int);
+static int man_pmacro(struct man *, int, char *, int);
+static void man_free1(struct man *);
+static void man_alloc1(struct man *);
+static int man_descope(struct man *, int, int);
+
+
+const struct man_node *
+man_node(const struct man *m)
+{
+
+ assert( ! (MAN_HALT & m->flags));
+ return(m->first);
+}
+
+
+const struct man_meta *
+man_meta(const struct man *m)
+{
+
+ assert( ! (MAN_HALT & m->flags));
+ return(&m->meta);
+}
+
+
+void
+man_reset(struct man *man)
+{
+
+ man_free1(man);
+ man_alloc1(man);
+}
+
+
+void
+man_free(struct man *man)
+{
+
+ man_free1(man);
+ free(man);
+}
+
+
+struct man *
+man_alloc(struct roff *roff, struct mparse *parse)
+{
+ struct man *p;
+
+ p = mandoc_calloc(1, sizeof(struct man));
+
+ man_hash_init();
+ p->parse = parse;
+ p->roff = roff;
+
+ man_alloc1(p);
+ return(p);
+}
+
+
+int
+man_endparse(struct man *m)
+{
+
+ assert( ! (MAN_HALT & m->flags));
+ if (man_macroend(m))
+ return(1);
+ m->flags |= MAN_HALT;
+ return(0);
+}
+
+
+int
+man_parseln(struct man *m, int ln, char *buf, int offs)
+{
+
+ m->flags |= MAN_NEWLINE;
+
+ assert( ! (MAN_HALT & m->flags));
+
+ return (mandoc_getcontrol(buf, &offs) ?
+ man_pmacro(m, ln, buf, offs) :
+ man_ptext(m, ln, buf, offs));
+}
+
+
+static void
+man_free1(struct man *man)
+{
+
+ if (man->first)
+ man_node_delete(man, man->first);
+ if (man->meta.title)
+ free(man->meta.title);
+ if (man->meta.source)
+ free(man->meta.source);
+ if (man->meta.date)
+ free(man->meta.date);
+ if (man->meta.vol)
+ free(man->meta.vol);
+ if (man->meta.msec)
+ free(man->meta.msec);
+}
+
+
+static void
+man_alloc1(struct man *m)
+{
+
+ memset(&m->meta, 0, sizeof(struct man_meta));
+ m->flags = 0;
+ m->last = mandoc_calloc(1, sizeof(struct man_node));
+ m->first = m->last;
+ m->last->type = MAN_ROOT;
+ m->last->tok = MAN_MAX;
+ m->next = MAN_NEXT_CHILD;
+}
+
+
+static int
+man_node_append(struct man *man, struct man_node *p)
+{
+
+ assert(man->last);
+ assert(man->first);
+ assert(MAN_ROOT != p->type);
+
+ switch (man->next) {
+ case (MAN_NEXT_SIBLING):
+ man->last->next = p;
+ p->prev = man->last;
+ p->parent = man->last->parent;
+ break;
+ case (MAN_NEXT_CHILD):
+ man->last->child = p;
+ p->parent = man->last;
+ break;
+ default:
+ abort();
+ /* NOTREACHED */
+ }
+
+ assert(p->parent);
+ p->parent->nchild++;
+
+ if ( ! man_valid_pre(man, p))
+ return(0);
+
+ switch (p->type) {
+ case (MAN_HEAD):
+ assert(MAN_BLOCK == p->parent->type);
+ p->parent->head = p;
+ break;
+ case (MAN_TAIL):
+ assert(MAN_BLOCK == p->parent->type);
+ p->parent->tail = p;
+ break;
+ case (MAN_BODY):
+ assert(MAN_BLOCK == p->parent->type);
+ p->parent->body = p;
+ break;
+ default:
+ break;
+ }
+
+ man->last = p;
+
+ switch (p->type) {
+ case (MAN_TBL):
+ /* FALLTHROUGH */
+ case (MAN_TEXT):
+ if ( ! man_valid_post(man))
+ return(0);
+ break;
+ default:
+ break;
+ }
+
+ return(1);
+}
+
+
+static struct man_node *
+man_node_alloc(struct man *m, int line, int pos,
+ enum man_type type, enum mant tok)
+{
+ struct man_node *p;
+
+ p = mandoc_calloc(1, sizeof(struct man_node));
+ p->line = line;
+ p->pos = pos;
+ p->type = type;
+ p->tok = tok;
+
+ if (MAN_NEWLINE & m->flags)
+ p->flags |= MAN_LINE;
+ m->flags &= ~MAN_NEWLINE;
+ return(p);
+}
+
+
+int
+man_elem_alloc(struct man *m, int line, int pos, enum mant tok)
+{
+ struct man_node *p;
+
+ p = man_node_alloc(m, line, pos, MAN_ELEM, tok);
+ if ( ! man_node_append(m, p))
+ return(0);
+ m->next = MAN_NEXT_CHILD;
+ return(1);
+}
+
+
+int
+man_tail_alloc(struct man *m, int line, int pos, enum mant tok)
+{
+ struct man_node *p;
+
+ p = man_node_alloc(m, line, pos, MAN_TAIL, tok);
+ if ( ! man_node_append(m, p))
+ return(0);
+ m->next = MAN_NEXT_CHILD;
+ return(1);
+}
+
+
+int
+man_head_alloc(struct man *m, int line, int pos, enum mant tok)
+{
+ struct man_node *p;
+
+ p = man_node_alloc(m, line, pos, MAN_HEAD, tok);
+ if ( ! man_node_append(m, p))
+ return(0);
+ m->next = MAN_NEXT_CHILD;
+ return(1);
+}
+
+
+int
+man_body_alloc(struct man *m, int line, int pos, enum mant tok)
+{
+ struct man_node *p;
+
+ p = man_node_alloc(m, line, pos, MAN_BODY, tok);
+ if ( ! man_node_append(m, p))
+ return(0);
+ m->next = MAN_NEXT_CHILD;
+ return(1);
+}
+
+
+int
+man_block_alloc(struct man *m, int line, int pos, enum mant tok)
+{
+ struct man_node *p;
+
+ p = man_node_alloc(m, line, pos, MAN_BLOCK, tok);
+ if ( ! man_node_append(m, p))
+ return(0);
+ m->next = MAN_NEXT_CHILD;
+ return(1);
+}
+
+int
+man_word_alloc(struct man *m, int line, int pos, const char *word)
+{
+ struct man_node *n;
+
+ n = man_node_alloc(m, line, pos, MAN_TEXT, MAN_MAX);
+ n->string = roff_strdup(m->roff, word);
+
+ if ( ! man_node_append(m, n))
+ return(0);
+
+ m->next = MAN_NEXT_SIBLING;
+ return(1);
+}
+
+
+/*
+ * Free all of the resources held by a node. This does NOT unlink a
+ * node from its context; for that, see man_node_unlink().
+ */
+static void
+man_node_free(struct man_node *p)
+{
+
+ if (p->string)
+ free(p->string);
+ free(p);
+}
+
+
+void
+man_node_delete(struct man *m, struct man_node *p)
+{
+
+ while (p->child)
+ man_node_delete(m, p->child);
+
+ man_node_unlink(m, p);
+ man_node_free(p);
+}
+
+int
+man_addeqn(struct man *m, const struct eqn *ep)
+{
+ struct man_node *n;
+
+ assert( ! (MAN_HALT & m->flags));
+
+ n = man_node_alloc(m, ep->ln, ep->pos, MAN_EQN, MAN_MAX);
+ n->eqn = ep;
+
+ if ( ! man_node_append(m, n))
+ return(0);
+
+ m->next = MAN_NEXT_SIBLING;
+ return(man_descope(m, ep->ln, ep->pos));
+}
+
+int
+man_addspan(struct man *m, const struct tbl_span *sp)
+{
+ struct man_node *n;
+
+ assert( ! (MAN_HALT & m->flags));
+
+ n = man_node_alloc(m, sp->line, 0, MAN_TBL, MAN_MAX);
+ n->span = sp;
+
+ if ( ! man_node_append(m, n))
+ return(0);
+
+ m->next = MAN_NEXT_SIBLING;
+ return(man_descope(m, sp->line, 0));
+}
+
+static int
+man_descope(struct man *m, int line, int offs)
+{
+ /*
+ * Co-ordinate what happens with having a next-line scope open:
+ * first close out the element scope (if applicable), then close
+ * out the block scope (also if applicable).
+ */
+
+ if (MAN_ELINE & m->flags) {
+ m->flags &= ~MAN_ELINE;
+ if ( ! man_unscope(m, m->last->parent, MANDOCERR_MAX))
+ return(0);
+ }
+
+ if ( ! (MAN_BLINE & m->flags))
+ return(1);
+ m->flags &= ~MAN_BLINE;
+
+ if ( ! man_unscope(m, m->last->parent, MANDOCERR_MAX))
+ return(0);
+ return(man_body_alloc(m, line, offs, m->last->tok));
+}
+
+static int
+man_ptext(struct man *m, int line, char *buf, int offs)
+{
+ int i;
+
+ /* Literal free-form text whitespace is preserved. */
+
+ if (MAN_LITERAL & m->flags) {
+ if ( ! man_word_alloc(m, line, offs, buf + offs))
+ return(0);
+ return(man_descope(m, line, offs));
+ }
+
+ /* Pump blank lines directly into the backend. */
+
+ for (i = offs; ' ' == buf[i]; i++)
+ /* Skip leading whitespace. */ ;
+
+ if ('\0' == buf[i]) {
+ /* Allocate a blank entry. */
+ if ( ! man_word_alloc(m, line, offs, ""))
+ return(0);
+ return(man_descope(m, line, offs));
+ }
+
+ /*
+ * Warn if the last un-escaped character is whitespace. Then
+ * strip away the remaining spaces (tabs stay!).
+ */
+
+ i = (int)strlen(buf);
+ assert(i);
+
+ if (' ' == buf[i - 1] || '\t' == buf[i - 1]) {
+ if (i > 1 && '\\' != buf[i - 2])
+ man_pmsg(m, line, i - 1, MANDOCERR_EOLNSPACE);
+
+ for (--i; i && ' ' == buf[i]; i--)
+ /* Spin back to non-space. */ ;
+
+ /* Jump ahead of escaped whitespace. */
+ i += '\\' == buf[i] ? 2 : 1;
+
+ buf[i] = '\0';
+ }
+
+ if ( ! man_word_alloc(m, line, offs, buf + offs))
+ return(0);
+
+ /*
+ * End-of-sentence check. If the last character is an unescaped
+ * EOS character, then flag the node as being the end of a
+ * sentence. The front-end will know how to interpret this.
+ */
+
+ assert(i);
+ if (mandoc_eos(buf, (size_t)i, 0))
+ m->last->flags |= MAN_EOS;
+
+ return(man_descope(m, line, offs));
+}
+
+static int
+man_pmacro(struct man *m, int ln, char *buf, int offs)
+{
+ int i, ppos;
+ enum mant tok;
+ char mac[5];
+ struct man_node *n;
+
+ if ('"' == buf[offs]) {
+ man_pmsg(m, ln, offs, MANDOCERR_BADCOMMENT);
+ return(1);
+ } else if ('\0' == buf[offs])
+ return(1);
+
+ ppos = offs;
+
+ /*
+ * Copy the first word into a nil-terminated buffer.
+ * Stop copying when a tab, space, or eoln is encountered.
+ */
+
+ i = 0;
+ while (i < 4 && '\0' != buf[offs] &&
+ ' ' != buf[offs] && '\t' != buf[offs])
+ mac[i++] = buf[offs++];
+
+ mac[i] = '\0';
+
+ tok = (i > 0 && i < 4) ? man_hash_find(mac) : MAN_MAX;
+
+ if (MAN_MAX == tok) {
+ mandoc_vmsg(MANDOCERR_MACRO, m->parse, ln,
+ ppos, "%s", buf + ppos - 1);
+ return(1);
+ }
+
+ /* The macro is sane. Jump to the next word. */
+
+ while (buf[offs] && ' ' == buf[offs])
+ offs++;
+
+ /*
+ * Trailing whitespace. Note that tabs are allowed to be passed
+ * into the parser as "text", so we only warn about spaces here.
+ */
+
+ if ('\0' == buf[offs] && ' ' == buf[offs - 1])
+ man_pmsg(m, ln, offs - 1, MANDOCERR_EOLNSPACE);
+
+ /*
+ * Remove prior ELINE macro, as it's being clobbered by a new
+ * macro. Note that NSCOPED macros do not close out ELINE
+ * macros---they don't print text---so we let those slip by.
+ */
+
+ if ( ! (MAN_NSCOPED & man_macros[tok].flags) &&
+ m->flags & MAN_ELINE) {
+ n = m->last;
+ assert(MAN_TEXT != n->type);
+
+ /* Remove repeated NSCOPED macros causing ELINE. */
+
+ if (MAN_NSCOPED & man_macros[n->tok].flags)
+ n = n->parent;
+
+ mandoc_vmsg(MANDOCERR_LINESCOPE, m->parse, n->line,
+ n->pos, "%s breaks %s", man_macronames[tok],
+ man_macronames[n->tok]);
+
+ man_node_delete(m, n);
+ m->flags &= ~MAN_ELINE;
+ }
+
+ /*
+ * Remove prior BLINE macro that is being clobbered.
+ */
+ if ((m->flags & MAN_BLINE) &&
+ (MAN_BSCOPE & man_macros[tok].flags)) {
+ n = m->last;
+
+ /* Might be a text node like 8 in
+ * .TP 8
+ * .SH foo
+ */
+ if (MAN_TEXT == n->type)
+ n = n->parent;
+
+ /* Remove element that didn't end BLINE, if any. */
+ if ( ! (MAN_BSCOPE & man_macros[n->tok].flags))
+ n = n->parent;
+
+ assert(MAN_HEAD == n->type);
+ n = n->parent;
+ assert(MAN_BLOCK == n->type);
+ assert(MAN_SCOPED & man_macros[n->tok].flags);
+
+ mandoc_vmsg(MANDOCERR_LINESCOPE, m->parse, n->line,
+ n->pos, "%s breaks %s", man_macronames[tok],
+ man_macronames[n->tok]);
+
+ man_node_delete(m, n);
+ m->flags &= ~MAN_BLINE;
+ }
+
+ /*
+ * Save the fact that we're in the next-line for a block. In
+ * this way, embedded roff instructions can "remember" state
+ * when they exit.
+ */
+
+ if (MAN_BLINE & m->flags)
+ m->flags |= MAN_BPLINE;
+
+ /* Call to handler... */
+
+ assert(man_macros[tok].fp);
+ if ( ! (*man_macros[tok].fp)(m, tok, ln, ppos, &offs, buf))
+ goto err;
+
+ /*
+ * We weren't in a block-line scope when entering the
+ * above-parsed macro, so return.
+ */
+
+ if ( ! (MAN_BPLINE & m->flags)) {
+ m->flags &= ~MAN_ILINE;
+ return(1);
+ }
+ m->flags &= ~MAN_BPLINE;
+
+ /*
+ * If we're in a block scope, then allow this macro to slip by
+ * without closing scope around it.
+ */
+
+ if (MAN_ILINE & m->flags) {
+ m->flags &= ~MAN_ILINE;
+ return(1);
+ }
+
+ /*
+ * If we've opened a new next-line element scope, then return
+ * now, as the next line will close out the block scope.
+ */
+
+ if (MAN_ELINE & m->flags)
+ return(1);
+
+ /* Close out the block scope opened in the prior line. */
+
+ assert(MAN_BLINE & m->flags);
+ m->flags &= ~MAN_BLINE;
+
+ if ( ! man_unscope(m, m->last->parent, MANDOCERR_MAX))
+ return(0);
+ return(man_body_alloc(m, ln, ppos, m->last->tok));
+
+err: /* Error out. */
+
+ m->flags |= MAN_HALT;
+ return(0);
+}
+
+/*
+ * Unlink a node from its context. If "m" is provided, the last parse
+ * point will also be adjusted accordingly.
+ */
+static void
+man_node_unlink(struct man *m, struct man_node *n)
+{
+
+ /* Adjust siblings. */
+
+ if (n->prev)
+ n->prev->next = n->next;
+ if (n->next)
+ n->next->prev = n->prev;
+
+ /* Adjust parent. */
+
+ if (n->parent) {
+ n->parent->nchild--;
+ if (n->parent->child == n)
+ n->parent->child = n->prev ? n->prev : n->next;
+ }
+
+ /* Adjust parse point, if applicable. */
+
+ if (m && m->last == n) {
+ /*XXX: this can occur when bailing from validation. */
+ /*assert(NULL == n->next);*/
+ if (n->prev) {
+ m->last = n->prev;
+ m->next = MAN_NEXT_SIBLING;
+ } else {
+ m->last = n->parent;
+ m->next = MAN_NEXT_CHILD;
+ }
+ }
+
+ if (m && m->first == n)
+ m->first = NULL;
+}
+
+const struct mparse *
+man_mparse(const struct man *m)
+{
+
+ assert(m && m->parse);
+ return(m->parse);
+}
diff --git a/man.cgi.7 b/man.cgi.7
new file mode 100644
index 000000000000..b7afd84b907a
--- /dev/null
+++ b/man.cgi.7
@@ -0,0 +1,123 @@
+.Dd $Mdocdate: March 24 2012 $
+.Dt MAN.CGI 7
+.Os
+.Sh NAME
+.Nm man.cgi
+.Nd cgi for manpage query and display
+.Sh SYNOPSIS
+.Nm
+.Sh DESCRIPTION
+The
+.Nm
+script queries and displays manual pages.
+It interfaces with
+.Xr mandocdb 8
+databases cached with
+.Xr catman 8 .
+.Pp
+To use
+.Nm ,
+create a manual cache in
+.Xr catman 8 .
+Assign this directory to the environment variable
+.Ev CACHE_DIR ,
+defaulting to
+.Pa /cache/man.cgi .
+Copy the
+.Pa man.cgi
+script into your CGI directory (see
+.Sx FILES
+for other relevant files).
+.Pp
+Multiple
+.Xr catman 8
+trees may be managed by
+.Nm :
+directories under
+.Ev CACHE_DIR
+containing
+.Pa etc/catman.conf
+are identified as
+.Qq manroots .
+The path of a manroot under
+.Ev CACHE_DIR
+is converted to a name by replacing path separators with spaces.
+.Pp
+Thus, if
+.Ev CACHE_DIR
+is the default
+.Pa /cache/man.cgi ,
+the web-server is jailed to
+.Pa /var/www ,
+and cache subdirectories
+.Pa ./foo/1
+and
+.Pa ./bar/2
+contain
+.Pa etc/catman.conf ,
+.Nm
+will assign these to manroots
+.Qq foo 1
+and
+.Qq bar 2 ,
+respectively.
+These names will appear as choices when searching for manuals.
+.Pp
+If
+.Nm
+finds only one manroot, or none, then the selection box is omitted.
+If no manroot is specified during search, the first manroot is used by
+default.
+.Sh ENVIRONMENT
+.Bl -tag -width Ds
+.It Ev CACHE_DIR
+The absolute path of the
+.Xr catman 8
+cache directory.
+This must not have a trailing slash.
+.It Ev CSS_DIR
+Prepended to CSS file links in outputted HTML files.
+This must not have a trailing slash.
+.El
+.Sh FILES
+.Bl -tag -width Ds
+.It Pa etc/catman.conf
+Built by
+.Xr catman 8
+and must exist at least once under the configuration directory root.
+.It Pa man.css
+Should be visible in the server document root or within
+.Ev CSS_DIR .
+Included in each page after
+.Pa man-cgi.css ,
+ostensibly for
+.Xr mandoc 1
+HTML output styling.
+.It Pa man.cgi.css
+Should be visible in the server document root or within
+.Ev CSS_DIR .
+Included in each page, ostensibly for general
+.Nm
+styling.
+.El
+.Sh COMPATIBILITY
+The
+.Nm
+script is call-compatible with queries from the traditional
+.Pa man.cgi
+script by Wolfram Schneider.
+However, the results may not be quite the same.
+.Sh SEE ALSO
+.Xr catman 8 ,
+.Xr mandocdb 8
+.Sh AUTHORS
+The
+.Nm
+utility was written by
+.An Kristaps Dzonsons ,
+.Mt kristaps@bsd.lv .
+.Sh CAVEATS
+If you're running in a jailed web-server, make sure the
+.Pa /tmp
+directory exists and is writable.
+The databases may need this for scratch space.
diff --git a/man.h b/man.h
new file mode 100644
index 000000000000..4fc3934e6f6f
--- /dev/null
+++ b/man.h
@@ -0,0 +1,113 @@
+/* $Id: man.h,v 1.60 2012/01/03 15:16:24 kristaps Exp $ */
+/*
+ * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifndef MAN_H
+#define MAN_H
+
+enum mant {
+ MAN_br = 0,
+ MAN_TH,
+ MAN_SH,
+ MAN_SS,
+ MAN_TP,
+ MAN_LP,
+ MAN_PP,
+ MAN_P,
+ MAN_IP,
+ MAN_HP,
+ MAN_SM,
+ MAN_SB,
+ MAN_BI,
+ MAN_IB,
+ MAN_BR,
+ MAN_RB,
+ MAN_R,
+ MAN_B,
+ MAN_I,
+ MAN_IR,
+ MAN_RI,
+ MAN_na,
+ MAN_sp,
+ MAN_nf,
+ MAN_fi,
+ MAN_RE,
+ MAN_RS,
+ MAN_DT,
+ MAN_UC,
+ MAN_PD,
+ MAN_AT,
+ MAN_in,
+ MAN_ft,
+ MAN_OP,
+ MAN_MAX
+};
+
+enum man_type {
+ MAN_TEXT,
+ MAN_ELEM,
+ MAN_ROOT,
+ MAN_BLOCK,
+ MAN_HEAD,
+ MAN_BODY,
+ MAN_TAIL,
+ MAN_TBL,
+ MAN_EQN
+};
+
+struct man_meta {
+ char *msec; /* `TH' section (1, 3p, etc.) */
+ char *date; /* `TH' normalised date */
+ char *vol; /* `TH' volume */
+ char *title; /* `TH' title (e.g., FOO) */
+ char *source; /* `TH' source (e.g., GNU) */
+};
+
+struct man_node {
+ struct man_node *parent; /* parent AST node */
+ struct man_node *child; /* first child AST node */
+ struct man_node *next; /* sibling AST node */
+ struct man_node *prev; /* prior sibling AST node */
+ int nchild; /* number children */
+ int line;
+ int pos;
+ enum mant tok; /* tok or MAN__MAX if none */
+ int flags;
+#define MAN_VALID (1 << 0) /* has been validated */
+#define MAN_EOS (1 << 2) /* at sentence boundary */
+#define MAN_LINE (1 << 3) /* first macro/text on line */
+ enum man_type type; /* AST node type */
+ char *string; /* TEXT node argument */
+ struct man_node *head; /* BLOCK node HEAD ptr */
+ struct man_node *tail; /* BLOCK node TAIL ptr */
+ struct man_node *body; /* BLOCK node BODY ptr */
+ const struct tbl_span *span; /* TBL */
+ const struct eqn *eqn; /* EQN */
+};
+
+/* Names of macros. Index is enum mant. */
+extern const char *const *man_macronames;
+
+__BEGIN_DECLS
+
+struct man;
+
+const struct man_node *man_node(const struct man *);
+const struct man_meta *man_meta(const struct man *);
+const struct mparse *man_mparse(const struct man *);
+
+__END_DECLS
+
+#endif /*!MAN_H*/
diff --git a/man_hash.c b/man_hash.c
new file mode 100644
index 000000000000..86c5c40a199b
--- /dev/null
+++ b/man_hash.c
@@ -0,0 +1,107 @@
+/* $Id: man_hash.c,v 1.25 2011/07/24 18:15:14 kristaps Exp $ */
+/*
+ * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/types.h>
+
+#include <assert.h>
+#include <ctype.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "man.h"
+#include "mandoc.h"
+#include "libman.h"
+
+#define HASH_DEPTH 6
+
+#define HASH_ROW(x) do { \
+ if (isupper((unsigned char)(x))) \
+ (x) -= 65; \
+ else \
+ (x) -= 97; \
+ (x) *= HASH_DEPTH; \
+ } while (/* CONSTCOND */ 0)
+
+/*
+ * Lookup table is indexed first by lower-case first letter (plus one
+ * for the period, which is stored in the last row), then by lower or
+ * uppercase second letter. Buckets correspond to the index of the
+ * macro (the integer value of the enum stored as a char to save a bit
+ * of space).
+ */
+static unsigned char table[26 * HASH_DEPTH];
+
+/*
+ * XXX - this hash has global scope, so if intended for use as a library
+ * with multiple callers, it will need re-invocation protection.
+ */
+void
+man_hash_init(void)
+{
+ int i, j, x;
+
+ memset(table, UCHAR_MAX, sizeof(table));
+
+ assert(/* LINTED */
+ MAN_MAX < UCHAR_MAX);
+
+ for (i = 0; i < (int)MAN_MAX; i++) {
+ x = man_macronames[i][0];
+
+ assert(isalpha((unsigned char)x));
+
+ HASH_ROW(x);
+
+ for (j = 0; j < HASH_DEPTH; j++)
+ if (UCHAR_MAX == table[x + j]) {
+ table[x + j] = (unsigned char)i;
+ break;
+ }
+
+ assert(j < HASH_DEPTH);
+ }
+}
+
+
+enum mant
+man_hash_find(const char *tmp)
+{
+ int x, y, i;
+ enum mant tok;
+
+ if ('\0' == (x = tmp[0]))
+ return(MAN_MAX);
+ if ( ! (isalpha((unsigned char)x)))
+ return(MAN_MAX);
+
+ HASH_ROW(x);
+
+ for (i = 0; i < HASH_DEPTH; i++) {
+ if (UCHAR_MAX == (y = table[x + i]))
+ return(MAN_MAX);
+
+ tok = (enum mant)y;
+ if (0 == strcmp(tmp, man_macronames[tok]))
+ return(tok);
+ }
+
+ return(MAN_MAX);
+}
diff --git a/man_html.c b/man_html.c
new file mode 100644
index 000000000000..a76ea2d70708
--- /dev/null
+++ b/man_html.c
@@ -0,0 +1,688 @@
+/* $Id: man_html.c,v 1.86 2012/01/03 15:16:24 kristaps Exp $ */
+/*
+ * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/types.h>
+
+#include <assert.h>
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "mandoc.h"
+#include "out.h"
+#include "html.h"
+#include "man.h"
+#include "main.h"
+
+/* TODO: preserve ident widths. */
+/* FIXME: have PD set the default vspace width. */
+
+#define INDENT 5
+
+#define MAN_ARGS const struct man_meta *m, \
+ const struct man_node *n, \
+ struct mhtml *mh, \
+ struct html *h
+
+struct mhtml {
+ int fl;
+#define MANH_LITERAL (1 << 0) /* literal context */
+};
+
+struct htmlman {
+ int (*pre)(MAN_ARGS);
+ int (*post)(MAN_ARGS);
+};
+
+static void print_bvspace(struct html *,
+ const struct man_node *);
+static void print_man(MAN_ARGS);
+static void print_man_head(MAN_ARGS);
+static void print_man_nodelist(MAN_ARGS);
+static void print_man_node(MAN_ARGS);
+static int a2width(const struct man_node *,
+ struct roffsu *);
+static int man_B_pre(MAN_ARGS);
+static int man_HP_pre(MAN_ARGS);
+static int man_IP_pre(MAN_ARGS);
+static int man_I_pre(MAN_ARGS);
+static int man_OP_pre(MAN_ARGS);
+static int man_PP_pre(MAN_ARGS);
+static int man_RS_pre(MAN_ARGS);
+static int man_SH_pre(MAN_ARGS);
+static int man_SM_pre(MAN_ARGS);
+static int man_SS_pre(MAN_ARGS);
+static int man_alt_pre(MAN_ARGS);
+static int man_br_pre(MAN_ARGS);
+static int man_ign_pre(MAN_ARGS);
+static int man_in_pre(MAN_ARGS);
+static int man_literal_pre(MAN_ARGS);
+static void man_root_post(MAN_ARGS);
+static void man_root_pre(MAN_ARGS);
+
+static const struct htmlman mans[MAN_MAX] = {
+ { man_br_pre, NULL }, /* br */
+ { NULL, NULL }, /* TH */
+ { man_SH_pre, NULL }, /* SH */
+ { man_SS_pre, NULL }, /* SS */
+ { man_IP_pre, NULL }, /* TP */
+ { man_PP_pre, NULL }, /* LP */
+ { man_PP_pre, NULL }, /* PP */
+ { man_PP_pre, NULL }, /* P */
+ { man_IP_pre, NULL }, /* IP */
+ { man_HP_pre, NULL }, /* HP */
+ { man_SM_pre, NULL }, /* SM */
+ { man_SM_pre, NULL }, /* SB */
+ { man_alt_pre, NULL }, /* BI */
+ { man_alt_pre, NULL }, /* IB */
+ { man_alt_pre, NULL }, /* BR */
+ { man_alt_pre, NULL }, /* RB */
+ { NULL, NULL }, /* R */
+ { man_B_pre, NULL }, /* B */
+ { man_I_pre, NULL }, /* I */
+ { man_alt_pre, NULL }, /* IR */
+ { man_alt_pre, NULL }, /* RI */
+ { man_ign_pre, NULL }, /* na */
+ { man_br_pre, NULL }, /* sp */
+ { man_literal_pre, NULL }, /* nf */
+ { man_literal_pre, NULL }, /* fi */
+ { NULL, NULL }, /* RE */
+ { man_RS_pre, NULL }, /* RS */
+ { man_ign_pre, NULL }, /* DT */
+ { man_ign_pre, NULL }, /* UC */
+ { man_ign_pre, NULL }, /* PD */
+ { man_ign_pre, NULL }, /* AT */
+ { man_in_pre, NULL }, /* in */
+ { man_ign_pre, NULL }, /* ft */
+ { man_OP_pre, NULL }, /* OP */
+};
+
+/*
+ * Printing leading vertical space before a block.
+ * This is used for the paragraph macros.
+ * The rules are pretty simple, since there's very little nesting going
+ * on here. Basically, if we're the first within another block (SS/SH),
+ * then don't emit vertical space. If we are (RS), then do. If not the
+ * first, print it.
+ */
+static void
+print_bvspace(struct html *h, const struct man_node *n)
+{
+
+ if (n->body && n->body->child)
+ if (MAN_TBL == n->body->child->type)
+ return;
+
+ if (MAN_ROOT == n->parent->type || MAN_RS != n->parent->tok)
+ if (NULL == n->prev)
+ return;
+
+ print_otag(h, TAG_P, 0, NULL);
+}
+
+void
+html_man(void *arg, const struct man *m)
+{
+ struct mhtml mh;
+
+ memset(&mh, 0, sizeof(struct mhtml));
+ print_man(man_meta(m), man_node(m), &mh, (struct html *)arg);
+ putchar('\n');
+}
+
+static void
+print_man(MAN_ARGS)
+{
+ struct tag *t, *tt;
+ struct htmlpair tag;
+
+ PAIR_CLASS_INIT(&tag, "mandoc");
+
+ if ( ! (HTML_FRAGMENT & h->oflags)) {
+ print_gen_decls(h);
+ t = print_otag(h, TAG_HTML, 0, NULL);
+ tt = print_otag(h, TAG_HEAD, 0, NULL);
+ print_man_head(m, n, mh, h);
+ print_tagq(h, tt);
+ print_otag(h, TAG_BODY, 0, NULL);
+ print_otag(h, TAG_DIV, 1, &tag);
+ } else
+ t = print_otag(h, TAG_DIV, 1, &tag);
+
+ print_man_nodelist(m, n, mh, h);
+ print_tagq(h, t);
+}
+
+
+/* ARGSUSED */
+static void
+print_man_head(MAN_ARGS)
+{
+
+ print_gen_head(h);
+ assert(m->title);
+ assert(m->msec);
+ bufcat_fmt(h, "%s(%s)", m->title, m->msec);
+ print_otag(h, TAG_TITLE, 0, NULL);
+ print_text(h, h->buf);
+}
+
+
+static void
+print_man_nodelist(MAN_ARGS)
+{
+
+ print_man_node(m, n, mh, h);
+ if (n->next)
+ print_man_nodelist(m, n->next, mh, h);
+}
+
+
+static void
+print_man_node(MAN_ARGS)
+{
+ int child;
+ struct tag *t;
+
+ child = 1;
+ t = h->tags.head;
+
+ switch (n->type) {
+ case (MAN_ROOT):
+ man_root_pre(m, n, mh, h);
+ break;
+ case (MAN_TEXT):
+ /*
+ * If we have a blank line, output a vertical space.
+ * If we have a space as the first character, break
+ * before printing the line's data.
+ */
+ if ('\0' == *n->string) {
+ print_otag(h, TAG_P, 0, NULL);
+ return;
+ }
+
+ if (' ' == *n->string && MAN_LINE & n->flags)
+ print_otag(h, TAG_BR, 0, NULL);
+ else if (MANH_LITERAL & mh->fl && n->prev)
+ print_otag(h, TAG_BR, 0, NULL);
+
+ print_text(h, n->string);
+ return;
+ case (MAN_EQN):
+ print_eqn(h, n->eqn);
+ break;
+ case (MAN_TBL):
+ /*
+ * This will take care of initialising all of the table
+ * state data for the first table, then tearing it down
+ * for the last one.
+ */
+ print_tbl(h, n->span);
+ return;
+ default:
+ /*
+ * Close out scope of font prior to opening a macro
+ * scope.
+ */
+ if (HTMLFONT_NONE != h->metac) {
+ h->metal = h->metac;
+ h->metac = HTMLFONT_NONE;
+ }
+
+ /*
+ * Close out the current table, if it's open, and unset
+ * the "meta" table state. This will be reopened on the
+ * next table element.
+ */
+ if (h->tblt) {
+ print_tblclose(h);
+ t = h->tags.head;
+ }
+ if (mans[n->tok].pre)
+ child = (*mans[n->tok].pre)(m, n, mh, h);
+ break;
+ }
+
+ if (child && n->child)
+ print_man_nodelist(m, n->child, mh, h);
+
+ /* This will automatically close out any font scope. */
+ print_stagq(h, t);
+
+ switch (n->type) {
+ case (MAN_ROOT):
+ man_root_post(m, n, mh, h);
+ break;
+ case (MAN_EQN):
+ break;
+ default:
+ if (mans[n->tok].post)
+ (*mans[n->tok].post)(m, n, mh, h);
+ break;
+ }
+}
+
+
+static int
+a2width(const struct man_node *n, struct roffsu *su)
+{
+
+ if (MAN_TEXT != n->type)
+ return(0);
+ if (a2roffsu(n->string, su, SCALE_BU))
+ return(1);
+
+ return(0);
+}
+
+
+/* ARGSUSED */
+static void
+man_root_pre(MAN_ARGS)
+{
+ struct htmlpair tag[3];
+ struct tag *t, *tt;
+ char b[BUFSIZ], title[BUFSIZ];
+
+ b[0] = 0;
+ if (m->vol)
+ (void)strlcat(b, m->vol, BUFSIZ);
+
+ assert(m->title);
+ assert(m->msec);
+ snprintf(title, BUFSIZ - 1, "%s(%s)", m->title, m->msec);
+
+ PAIR_SUMMARY_INIT(&tag[0], "Document Header");
+ PAIR_CLASS_INIT(&tag[1], "head");
+ PAIR_INIT(&tag[2], ATTR_WIDTH, "100%");
+ t = print_otag(h, TAG_TABLE, 3, tag);
+ PAIR_INIT(&tag[0], ATTR_WIDTH, "30%");
+ print_otag(h, TAG_COL, 1, tag);
+ print_otag(h, TAG_COL, 1, tag);
+ print_otag(h, TAG_COL, 1, tag);
+
+ print_otag(h, TAG_TBODY, 0, NULL);
+
+ tt = print_otag(h, TAG_TR, 0, NULL);
+
+ PAIR_CLASS_INIT(&tag[0], "head-ltitle");
+ print_otag(h, TAG_TD, 1, tag);
+ print_text(h, title);
+ print_stagq(h, tt);
+
+ PAIR_CLASS_INIT(&tag[0], "head-vol");
+ PAIR_INIT(&tag[1], ATTR_ALIGN, "center");
+ print_otag(h, TAG_TD, 2, tag);
+ print_text(h, b);
+ print_stagq(h, tt);
+
+ PAIR_CLASS_INIT(&tag[0], "head-rtitle");
+ PAIR_INIT(&tag[1], ATTR_ALIGN, "right");
+ print_otag(h, TAG_TD, 2, tag);
+ print_text(h, title);
+ print_tagq(h, t);
+}
+
+
+/* ARGSUSED */
+static void
+man_root_post(MAN_ARGS)
+{
+ struct htmlpair tag[3];
+ struct tag *t, *tt;
+
+ PAIR_SUMMARY_INIT(&tag[0], "Document Footer");
+ PAIR_CLASS_INIT(&tag[1], "foot");
+ PAIR_INIT(&tag[2], ATTR_WIDTH, "100%");
+ t = print_otag(h, TAG_TABLE, 3, tag);
+ PAIR_INIT(&tag[0], ATTR_WIDTH, "50%");
+ print_otag(h, TAG_COL, 1, tag);
+ print_otag(h, TAG_COL, 1, tag);
+
+ tt = print_otag(h, TAG_TR, 0, NULL);
+
+ PAIR_CLASS_INIT(&tag[0], "foot-date");
+ print_otag(h, TAG_TD, 1, tag);
+
+ assert(m->date);
+ print_text(h, m->date);
+ print_stagq(h, tt);
+
+ PAIR_CLASS_INIT(&tag[0], "foot-os");
+ PAIR_INIT(&tag[1], ATTR_ALIGN, "right");
+ print_otag(h, TAG_TD, 2, tag);
+
+ if (m->source)
+ print_text(h, m->source);
+ print_tagq(h, t);
+}
+
+
+/* ARGSUSED */
+static int
+man_br_pre(MAN_ARGS)
+{
+ struct roffsu su;
+ struct htmlpair tag;
+
+ SCALE_VS_INIT(&su, 1);
+
+ if (MAN_sp == n->tok) {
+ if (NULL != (n = n->child))
+ if ( ! a2roffsu(n->string, &su, SCALE_VS))
+ SCALE_VS_INIT(&su, atoi(n->string));
+ } else
+ su.scale = 0;
+
+ bufinit(h);
+ bufcat_su(h, "height", &su);
+ PAIR_STYLE_INIT(&tag, h);
+ print_otag(h, TAG_DIV, 1, &tag);
+
+ /* So the div isn't empty: */
+ print_text(h, "\\~");
+
+ return(0);
+}
+
+/* ARGSUSED */
+static int
+man_SH_pre(MAN_ARGS)
+{
+ struct htmlpair tag;
+
+ if (MAN_BLOCK == n->type) {
+ mh->fl &= ~MANH_LITERAL;
+ PAIR_CLASS_INIT(&tag, "section");
+ print_otag(h, TAG_DIV, 1, &tag);
+ return(1);
+ } else if (MAN_BODY == n->type)
+ return(1);
+
+ print_otag(h, TAG_H1, 0, NULL);
+ return(1);
+}
+
+/* ARGSUSED */
+static int
+man_alt_pre(MAN_ARGS)
+{
+ const struct man_node *nn;
+ int i, savelit;
+ enum htmltag fp;
+ struct tag *t;
+
+ if ((savelit = mh->fl & MANH_LITERAL))
+ print_otag(h, TAG_BR, 0, NULL);
+
+ mh->fl &= ~MANH_LITERAL;
+
+ for (i = 0, nn = n->child; nn; nn = nn->next, i++) {
+ t = NULL;
+ switch (n->tok) {
+ case (MAN_BI):
+ fp = i % 2 ? TAG_I : TAG_B;
+ break;
+ case (MAN_IB):
+ fp = i % 2 ? TAG_B : TAG_I;
+ break;
+ case (MAN_RI):
+ fp = i % 2 ? TAG_I : TAG_MAX;
+ break;
+ case (MAN_IR):
+ fp = i % 2 ? TAG_MAX : TAG_I;
+ break;
+ case (MAN_BR):
+ fp = i % 2 ? TAG_MAX : TAG_B;
+ break;
+ case (MAN_RB):
+ fp = i % 2 ? TAG_B : TAG_MAX;
+ break;
+ default:
+ abort();
+ /* NOTREACHED */
+ }
+
+ if (i)
+ h->flags |= HTML_NOSPACE;
+
+ if (TAG_MAX != fp)
+ t = print_otag(h, fp, 0, NULL);
+
+ print_man_node(m, nn, mh, h);
+
+ if (t)
+ print_tagq(h, t);
+ }
+
+ if (savelit)
+ mh->fl |= MANH_LITERAL;
+
+ return(0);
+}
+
+/* ARGSUSED */
+static int
+man_SM_pre(MAN_ARGS)
+{
+
+ print_otag(h, TAG_SMALL, 0, NULL);
+ if (MAN_SB == n->tok)
+ print_otag(h, TAG_B, 0, NULL);
+ return(1);
+}
+
+/* ARGSUSED */
+static int
+man_SS_pre(MAN_ARGS)
+{
+ struct htmlpair tag;
+
+ if (MAN_BLOCK == n->type) {
+ mh->fl &= ~MANH_LITERAL;
+ PAIR_CLASS_INIT(&tag, "subsection");
+ print_otag(h, TAG_DIV, 1, &tag);
+ return(1);
+ } else if (MAN_BODY == n->type)
+ return(1);
+
+ print_otag(h, TAG_H2, 0, NULL);
+ return(1);
+}
+
+/* ARGSUSED */
+static int
+man_PP_pre(MAN_ARGS)
+{
+
+ if (MAN_HEAD == n->type)
+ return(0);
+ else if (MAN_BLOCK == n->type)
+ print_bvspace(h, n);
+
+ return(1);
+}
+
+/* ARGSUSED */
+static int
+man_IP_pre(MAN_ARGS)
+{
+ const struct man_node *nn;
+
+ if (MAN_BODY == n->type) {
+ print_otag(h, TAG_DD, 0, NULL);
+ return(1);
+ } else if (MAN_HEAD != n->type) {
+ print_otag(h, TAG_DL, 0, NULL);
+ return(1);
+ }
+
+ /* FIXME: width specification. */
+
+ print_otag(h, TAG_DT, 0, NULL);
+
+ /* For IP, only print the first header element. */
+
+ if (MAN_IP == n->tok && n->child)
+ print_man_node(m, n->child, mh, h);
+
+ /* For TP, only print next-line header elements. */
+
+ if (MAN_TP == n->tok)
+ for (nn = n->child; nn; nn = nn->next)
+ if (nn->line > n->line)
+ print_man_node(m, nn, mh, h);
+
+ return(0);
+}
+
+/* ARGSUSED */
+static int
+man_HP_pre(MAN_ARGS)
+{
+ struct htmlpair tag;
+ struct roffsu su;
+ const struct man_node *np;
+
+ if (MAN_HEAD == n->type)
+ return(0);
+ else if (MAN_BLOCK != n->type)
+ return(1);
+
+ np = n->head->child;
+
+ if (NULL == np || ! a2width(np, &su))
+ SCALE_HS_INIT(&su, INDENT);
+
+ bufinit(h);
+
+ print_bvspace(h, n);
+ bufcat_su(h, "margin-left", &su);
+ su.scale = -su.scale;
+ bufcat_su(h, "text-indent", &su);
+ PAIR_STYLE_INIT(&tag, h);
+ print_otag(h, TAG_P, 1, &tag);
+ return(1);
+}
+
+/* ARGSUSED */
+static int
+man_OP_pre(MAN_ARGS)
+{
+ struct tag *tt;
+ struct htmlpair tag;
+
+ print_text(h, "[");
+ h->flags |= HTML_NOSPACE;
+ PAIR_CLASS_INIT(&tag, "opt");
+ tt = print_otag(h, TAG_SPAN, 1, &tag);
+
+ if (NULL != (n = n->child)) {
+ print_otag(h, TAG_B, 0, NULL);
+ print_text(h, n->string);
+ }
+
+ print_stagq(h, tt);
+
+ if (NULL != n && NULL != n->next) {
+ print_otag(h, TAG_I, 0, NULL);
+ print_text(h, n->next->string);
+ }
+
+ print_stagq(h, tt);
+ h->flags |= HTML_NOSPACE;
+ print_text(h, "]");
+ return(0);
+}
+
+
+/* ARGSUSED */
+static int
+man_B_pre(MAN_ARGS)
+{
+
+ print_otag(h, TAG_B, 0, NULL);
+ return(1);
+}
+
+/* ARGSUSED */
+static int
+man_I_pre(MAN_ARGS)
+{
+
+ print_otag(h, TAG_I, 0, NULL);
+ return(1);
+}
+
+/* ARGSUSED */
+static int
+man_literal_pre(MAN_ARGS)
+{
+
+ if (MAN_nf != n->tok) {
+ print_otag(h, TAG_BR, 0, NULL);
+ mh->fl &= ~MANH_LITERAL;
+ } else
+ mh->fl |= MANH_LITERAL;
+
+ return(0);
+}
+
+/* ARGSUSED */
+static int
+man_in_pre(MAN_ARGS)
+{
+
+ print_otag(h, TAG_BR, 0, NULL);
+ return(0);
+}
+
+/* ARGSUSED */
+static int
+man_ign_pre(MAN_ARGS)
+{
+
+ return(0);
+}
+
+/* ARGSUSED */
+static int
+man_RS_pre(MAN_ARGS)
+{
+ struct htmlpair tag;
+ struct roffsu su;
+
+ if (MAN_HEAD == n->type)
+ return(0);
+ else if (MAN_BODY == n->type)
+ return(1);
+
+ SCALE_HS_INIT(&su, INDENT);
+ if (n->head->child)
+ a2width(n->head->child, &su);
+
+ bufinit(h);
+ bufcat_su(h, "margin-left", &su);
+ PAIR_STYLE_INIT(&tag, h);
+ print_otag(h, TAG_DIV, 1, &tag);
+ return(1);
+}
diff --git a/man_macro.c b/man_macro.c
new file mode 100644
index 000000000000..4bbbc4fa7f16
--- /dev/null
+++ b/man_macro.c
@@ -0,0 +1,484 @@
+/* $Id: man_macro.c,v 1.71 2012/01/03 15:16:24 kristaps Exp $ */
+/*
+ * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <assert.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "man.h"
+#include "mandoc.h"
+#include "libmandoc.h"
+#include "libman.h"
+
+enum rew {
+ REW_REWIND,
+ REW_NOHALT,
+ REW_HALT
+};
+
+static int blk_close(MACRO_PROT_ARGS);
+static int blk_exp(MACRO_PROT_ARGS);
+static int blk_imp(MACRO_PROT_ARGS);
+static int in_line_eoln(MACRO_PROT_ARGS);
+static int man_args(struct man *, int,
+ int *, char *, char **);
+
+static int rew_scope(enum man_type,
+ struct man *, enum mant);
+static enum rew rew_dohalt(enum mant, enum man_type,
+ const struct man_node *);
+static enum rew rew_block(enum mant, enum man_type,
+ const struct man_node *);
+static void rew_warn(struct man *,
+ struct man_node *, enum mandocerr);
+
+const struct man_macro __man_macros[MAN_MAX] = {
+ { in_line_eoln, MAN_NSCOPED }, /* br */
+ { in_line_eoln, MAN_BSCOPE }, /* TH */
+ { blk_imp, MAN_BSCOPE | MAN_SCOPED }, /* SH */
+ { blk_imp, MAN_BSCOPE | MAN_SCOPED }, /* SS */
+ { blk_imp, MAN_BSCOPE | MAN_SCOPED | MAN_FSCOPED }, /* TP */
+ { blk_imp, MAN_BSCOPE }, /* LP */
+ { blk_imp, MAN_BSCOPE }, /* PP */
+ { blk_imp, MAN_BSCOPE }, /* P */
+ { blk_imp, MAN_BSCOPE }, /* IP */
+ { blk_imp, MAN_BSCOPE }, /* HP */
+ { in_line_eoln, MAN_SCOPED }, /* SM */
+ { in_line_eoln, MAN_SCOPED }, /* SB */
+ { in_line_eoln, 0 }, /* BI */
+ { in_line_eoln, 0 }, /* IB */
+ { in_line_eoln, 0 }, /* BR */
+ { in_line_eoln, 0 }, /* RB */
+ { in_line_eoln, MAN_SCOPED }, /* R */
+ { in_line_eoln, MAN_SCOPED }, /* B */
+ { in_line_eoln, MAN_SCOPED }, /* I */
+ { in_line_eoln, 0 }, /* IR */
+ { in_line_eoln, 0 }, /* RI */
+ { in_line_eoln, MAN_NSCOPED }, /* na */
+ { in_line_eoln, MAN_NSCOPED }, /* sp */
+ { in_line_eoln, MAN_BSCOPE }, /* nf */
+ { in_line_eoln, MAN_BSCOPE }, /* fi */
+ { blk_close, 0 }, /* RE */
+ { blk_exp, MAN_EXPLICIT }, /* RS */
+ { in_line_eoln, 0 }, /* DT */
+ { in_line_eoln, 0 }, /* UC */
+ { in_line_eoln, 0 }, /* PD */
+ { in_line_eoln, 0 }, /* AT */
+ { in_line_eoln, 0 }, /* in */
+ { in_line_eoln, 0 }, /* ft */
+ { in_line_eoln, 0 }, /* OP */
+};
+
+const struct man_macro * const man_macros = __man_macros;
+
+
+/*
+ * Warn when "n" is an explicit non-roff macro.
+ */
+static void
+rew_warn(struct man *m, struct man_node *n, enum mandocerr er)
+{
+
+ if (er == MANDOCERR_MAX || MAN_BLOCK != n->type)
+ return;
+ if (MAN_VALID & n->flags)
+ return;
+ if ( ! (MAN_EXPLICIT & man_macros[n->tok].flags))
+ return;
+
+ assert(er < MANDOCERR_FATAL);
+ man_nmsg(m, n, er);
+}
+
+
+/*
+ * Rewind scope. If a code "er" != MANDOCERR_MAX has been provided, it
+ * will be used if an explicit block scope is being closed out.
+ */
+int
+man_unscope(struct man *m, const struct man_node *to,
+ enum mandocerr er)
+{
+ struct man_node *n;
+
+ assert(to);
+
+ m->next = MAN_NEXT_SIBLING;
+
+ /* LINTED */
+ while (m->last != to) {
+ /*
+ * Save the parent here, because we may delete the
+ * m->last node in the post-validation phase and reset
+ * it to m->last->parent, causing a step in the closing
+ * out to be lost.
+ */
+ n = m->last->parent;
+ rew_warn(m, m->last, er);
+ if ( ! man_valid_post(m))
+ return(0);
+ m->last = n;
+ assert(m->last);
+ }
+
+ rew_warn(m, m->last, er);
+ if ( ! man_valid_post(m))
+ return(0);
+
+ return(1);
+}
+
+
+static enum rew
+rew_block(enum mant ntok, enum man_type type, const struct man_node *n)
+{
+
+ if (MAN_BLOCK == type && ntok == n->parent->tok &&
+ MAN_BODY == n->parent->type)
+ return(REW_REWIND);
+ return(ntok == n->tok ? REW_HALT : REW_NOHALT);
+}
+
+
+/*
+ * There are three scope levels: scoped to the root (all), scoped to the
+ * section (all less sections), and scoped to subsections (all less
+ * sections and subsections).
+ */
+static enum rew
+rew_dohalt(enum mant tok, enum man_type type, const struct man_node *n)
+{
+ enum rew c;
+
+ /* We cannot progress beyond the root ever. */
+ if (MAN_ROOT == n->type)
+ return(REW_HALT);
+
+ assert(n->parent);
+
+ /* Normal nodes shouldn't go to the level of the root. */
+ if (MAN_ROOT == n->parent->type)
+ return(REW_REWIND);
+
+ /* Already-validated nodes should be closed out. */
+ if (MAN_VALID & n->flags)
+ return(REW_NOHALT);
+
+ /* First: rewind to ourselves. */
+ if (type == n->type && tok == n->tok)
+ return(REW_REWIND);
+
+ /*
+ * Next follow the implicit scope-smashings as defined by man.7:
+ * section, sub-section, etc.
+ */
+
+ switch (tok) {
+ case (MAN_SH):
+ break;
+ case (MAN_SS):
+ /* Rewind to a section, if a block. */
+ if (REW_NOHALT != (c = rew_block(MAN_SH, type, n)))
+ return(c);
+ break;
+ case (MAN_RS):
+ /* Rewind to a subsection, if a block. */
+ if (REW_NOHALT != (c = rew_block(MAN_SS, type, n)))
+ return(c);
+ /* Rewind to a section, if a block. */
+ if (REW_NOHALT != (c = rew_block(MAN_SH, type, n)))
+ return(c);
+ break;
+ default:
+ /* Rewind to an offsetter, if a block. */
+ if (REW_NOHALT != (c = rew_block(MAN_RS, type, n)))
+ return(c);
+ /* Rewind to a subsection, if a block. */
+ if (REW_NOHALT != (c = rew_block(MAN_SS, type, n)))
+ return(c);
+ /* Rewind to a section, if a block. */
+ if (REW_NOHALT != (c = rew_block(MAN_SH, type, n)))
+ return(c);
+ break;
+ }
+
+ return(REW_NOHALT);
+}
+
+
+/*
+ * Rewinding entails ascending the parse tree until a coherent point,
+ * for example, the `SH' macro will close out any intervening `SS'
+ * scopes. When a scope is closed, it must be validated and actioned.
+ */
+static int
+rew_scope(enum man_type type, struct man *m, enum mant tok)
+{
+ struct man_node *n;
+ enum rew c;
+
+ /* LINTED */
+ for (n = m->last; n; n = n->parent) {
+ /*
+ * Whether we should stop immediately (REW_HALT), stop
+ * and rewind until this point (REW_REWIND), or keep
+ * rewinding (REW_NOHALT).
+ */
+ c = rew_dohalt(tok, type, n);
+ if (REW_HALT == c)
+ return(1);
+ if (REW_REWIND == c)
+ break;
+ }
+
+ /*
+ * Rewind until the current point. Warn if we're a roff
+ * instruction that's mowing over explicit scopes.
+ */
+ assert(n);
+
+ return(man_unscope(m, n, MANDOCERR_MAX));
+}
+
+
+/*
+ * Close out a generic explicit macro.
+ */
+/* ARGSUSED */
+int
+blk_close(MACRO_PROT_ARGS)
+{
+ enum mant ntok;
+ const struct man_node *nn;
+
+ switch (tok) {
+ case (MAN_RE):
+ ntok = MAN_RS;
+ break;
+ default:
+ abort();
+ /* NOTREACHED */
+ }
+
+ for (nn = m->last->parent; nn; nn = nn->parent)
+ if (ntok == nn->tok)
+ break;
+
+ if (NULL == nn)
+ man_pmsg(m, line, ppos, MANDOCERR_NOSCOPE);
+
+ if ( ! rew_scope(MAN_BODY, m, ntok))
+ return(0);
+ if ( ! rew_scope(MAN_BLOCK, m, ntok))
+ return(0);
+
+ return(1);
+}
+
+
+/* ARGSUSED */
+int
+blk_exp(MACRO_PROT_ARGS)
+{
+ int la;
+ char *p;
+
+ /*
+ * Close out prior scopes. "Regular" explicit macros cannot be
+ * nested, but we allow roff macros to be placed just about
+ * anywhere.
+ */
+
+ if ( ! man_block_alloc(m, line, ppos, tok))
+ return(0);
+ if ( ! man_head_alloc(m, line, ppos, tok))
+ return(0);
+
+ for (;;) {
+ la = *pos;
+ if ( ! man_args(m, line, pos, buf, &p))
+ break;
+ if ( ! man_word_alloc(m, line, la, p))
+ return(0);
+ }
+
+ assert(m);
+ assert(tok != MAN_MAX);
+
+ if ( ! rew_scope(MAN_HEAD, m, tok))
+ return(0);
+ return(man_body_alloc(m, line, ppos, tok));
+}
+
+
+
+/*
+ * Parse an implicit-block macro. These contain a MAN_HEAD and a
+ * MAN_BODY contained within a MAN_BLOCK. Rules for closing out other
+ * scopes, such as `SH' closing out an `SS', are defined in the rew
+ * routines.
+ */
+/* ARGSUSED */
+int
+blk_imp(MACRO_PROT_ARGS)
+{
+ int la;
+ char *p;
+ struct man_node *n;
+
+ /* Close out prior scopes. */
+
+ if ( ! rew_scope(MAN_BODY, m, tok))
+ return(0);
+ if ( ! rew_scope(MAN_BLOCK, m, tok))
+ return(0);
+
+ /* Allocate new block & head scope. */
+
+ if ( ! man_block_alloc(m, line, ppos, tok))
+ return(0);
+ if ( ! man_head_alloc(m, line, ppos, tok))
+ return(0);
+
+ n = m->last;
+
+ /* Add line arguments. */
+
+ for (;;) {
+ la = *pos;
+ if ( ! man_args(m, line, pos, buf, &p))
+ break;
+ if ( ! man_word_alloc(m, line, la, p))
+ return(0);
+ }
+
+ /* Close out head and open body (unless MAN_SCOPE). */
+
+ if (MAN_SCOPED & man_macros[tok].flags) {
+ /* If we're forcing scope (`TP'), keep it open. */
+ if (MAN_FSCOPED & man_macros[tok].flags) {
+ m->flags |= MAN_BLINE;
+ return(1);
+ } else if (n == m->last) {
+ m->flags |= MAN_BLINE;
+ return(1);
+ }
+ }
+
+ if ( ! rew_scope(MAN_HEAD, m, tok))
+ return(0);
+ return(man_body_alloc(m, line, ppos, tok));
+}
+
+
+/* ARGSUSED */
+int
+in_line_eoln(MACRO_PROT_ARGS)
+{
+ int la;
+ char *p;
+ struct man_node *n;
+
+ if ( ! man_elem_alloc(m, line, ppos, tok))
+ return(0);
+
+ n = m->last;
+
+ for (;;) {
+ la = *pos;
+ if ( ! man_args(m, line, pos, buf, &p))
+ break;
+ if ( ! man_word_alloc(m, line, la, p))
+ return(0);
+ }
+
+ /*
+ * If no arguments are specified and this is MAN_SCOPED (i.e.,
+ * next-line scoped), then set our mode to indicate that we're
+ * waiting for terms to load into our context.
+ */
+
+ if (n == m->last && MAN_SCOPED & man_macros[tok].flags) {
+ assert( ! (MAN_NSCOPED & man_macros[tok].flags));
+ m->flags |= MAN_ELINE;
+ return(1);
+ }
+
+ /* Set ignorable context, if applicable. */
+
+ if (MAN_NSCOPED & man_macros[tok].flags) {
+ assert( ! (MAN_SCOPED & man_macros[tok].flags));
+ m->flags |= MAN_ILINE;
+ }
+
+ assert(MAN_ROOT != m->last->type);
+ m->next = MAN_NEXT_SIBLING;
+
+ /*
+ * Rewind our element scope. Note that when TH is pruned, we'll
+ * be back at the root, so make sure that we don't clobber as
+ * its sibling.
+ */
+
+ for ( ; m->last; m->last = m->last->parent) {
+ if (m->last == n)
+ break;
+ if (m->last->type == MAN_ROOT)
+ break;
+ if ( ! man_valid_post(m))
+ return(0);
+ }
+
+ assert(m->last);
+
+ /*
+ * Same here regarding whether we're back at the root.
+ */
+
+ if (m->last->type != MAN_ROOT && ! man_valid_post(m))
+ return(0);
+
+ return(1);
+}
+
+
+int
+man_macroend(struct man *m)
+{
+
+ return(man_unscope(m, m->first, MANDOCERR_SCOPEEXIT));
+}
+
+static int
+man_args(struct man *m, int line, int *pos, char *buf, char **v)
+{
+ char *start;
+
+ assert(*pos);
+ *v = start = buf + *pos;
+ assert(' ' != *start);
+
+ if ('\0' == *start)
+ return(0);
+
+ *v = mandoc_getarg(m->parse, v, line, pos);
+ return(1);
+}
diff --git a/man_term.c b/man_term.c
new file mode 100644
index 000000000000..69c5c95e442d
--- /dev/null
+++ b/man_term.c
@@ -0,0 +1,1117 @@
+/* $Id: man_term.c,v 1.127 2012/01/03 15:16:24 kristaps Exp $ */
+/*
+ * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/types.h>
+
+#include <assert.h>
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "mandoc.h"
+#include "out.h"
+#include "man.h"
+#include "term.h"
+#include "main.h"
+
+#define MAXMARGINS 64 /* maximum number of indented scopes */
+
+/* FIXME: have PD set the default vspace width. */
+
+struct mtermp {
+ int fl;
+#define MANT_LITERAL (1 << 0)
+ size_t lmargin[MAXMARGINS]; /* margins (incl. visible page) */
+ int lmargincur; /* index of current margin */
+ int lmarginsz; /* actual number of nested margins */
+ size_t offset; /* default offset to visible page */
+};
+
+#define DECL_ARGS struct termp *p, \
+ struct mtermp *mt, \
+ const struct man_node *n, \
+ const struct man_meta *m
+
+struct termact {
+ int (*pre)(DECL_ARGS);
+ void (*post)(DECL_ARGS);
+ int flags;
+#define MAN_NOTEXT (1 << 0) /* Never has text children. */
+};
+
+static int a2width(const struct termp *, const char *);
+static size_t a2height(const struct termp *, const char *);
+
+static void print_man_nodelist(DECL_ARGS);
+static void print_man_node(DECL_ARGS);
+static void print_man_head(struct termp *, const void *);
+static void print_man_foot(struct termp *, const void *);
+static void print_bvspace(struct termp *,
+ const struct man_node *);
+
+static int pre_B(DECL_ARGS);
+static int pre_HP(DECL_ARGS);
+static int pre_I(DECL_ARGS);
+static int pre_IP(DECL_ARGS);
+static int pre_OP(DECL_ARGS);
+static int pre_PP(DECL_ARGS);
+static int pre_RS(DECL_ARGS);
+static int pre_SH(DECL_ARGS);
+static int pre_SS(DECL_ARGS);
+static int pre_TP(DECL_ARGS);
+static int pre_alternate(DECL_ARGS);
+static int pre_ft(DECL_ARGS);
+static int pre_ign(DECL_ARGS);
+static int pre_in(DECL_ARGS);
+static int pre_literal(DECL_ARGS);
+static int pre_sp(DECL_ARGS);
+
+static void post_IP(DECL_ARGS);
+static void post_HP(DECL_ARGS);
+static void post_RS(DECL_ARGS);
+static void post_SH(DECL_ARGS);
+static void post_SS(DECL_ARGS);
+static void post_TP(DECL_ARGS);
+
+static const struct termact termacts[MAN_MAX] = {
+ { pre_sp, NULL, MAN_NOTEXT }, /* br */
+ { NULL, NULL, 0 }, /* TH */
+ { pre_SH, post_SH, 0 }, /* SH */
+ { pre_SS, post_SS, 0 }, /* SS */
+ { pre_TP, post_TP, 0 }, /* TP */
+ { pre_PP, NULL, 0 }, /* LP */
+ { pre_PP, NULL, 0 }, /* PP */
+ { pre_PP, NULL, 0 }, /* P */
+ { pre_IP, post_IP, 0 }, /* IP */
+ { pre_HP, post_HP, 0 }, /* HP */
+ { NULL, NULL, 0 }, /* SM */
+ { pre_B, NULL, 0 }, /* SB */
+ { pre_alternate, NULL, 0 }, /* BI */
+ { pre_alternate, NULL, 0 }, /* IB */
+ { pre_alternate, NULL, 0 }, /* BR */
+ { pre_alternate, NULL, 0 }, /* RB */
+ { NULL, NULL, 0 }, /* R */
+ { pre_B, NULL, 0 }, /* B */
+ { pre_I, NULL, 0 }, /* I */
+ { pre_alternate, NULL, 0 }, /* IR */
+ { pre_alternate, NULL, 0 }, /* RI */
+ { pre_ign, NULL, MAN_NOTEXT }, /* na */
+ { pre_sp, NULL, MAN_NOTEXT }, /* sp */
+ { pre_literal, NULL, 0 }, /* nf */
+ { pre_literal, NULL, 0 }, /* fi */
+ { NULL, NULL, 0 }, /* RE */
+ { pre_RS, post_RS, 0 }, /* RS */
+ { pre_ign, NULL, 0 }, /* DT */
+ { pre_ign, NULL, 0 }, /* UC */
+ { pre_ign, NULL, 0 }, /* PD */
+ { pre_ign, NULL, 0 }, /* AT */
+ { pre_in, NULL, MAN_NOTEXT }, /* in */
+ { pre_ft, NULL, MAN_NOTEXT }, /* ft */
+ { pre_OP, NULL, 0 }, /* OP */
+};
+
+
+
+void
+terminal_man(void *arg, const struct man *man)
+{
+ struct termp *p;
+ const struct man_node *n;
+ const struct man_meta *m;
+ struct mtermp mt;
+
+ p = (struct termp *)arg;
+
+ if (0 == p->defindent)
+ p->defindent = 7;
+
+ p->overstep = 0;
+ p->maxrmargin = p->defrmargin;
+ p->tabwidth = term_len(p, 5);
+
+ if (NULL == p->symtab)
+ p->symtab = mchars_alloc();
+
+ n = man_node(man);
+ m = man_meta(man);
+
+ term_begin(p, print_man_head, print_man_foot, m);
+ p->flags |= TERMP_NOSPACE;
+
+ memset(&mt, 0, sizeof(struct mtermp));
+
+ mt.lmargin[mt.lmargincur] = term_len(p, p->defindent);
+ mt.offset = term_len(p, p->defindent);
+
+ if (n->child)
+ print_man_nodelist(p, &mt, n->child, m);
+
+ term_end(p);
+}
+
+
+static size_t
+a2height(const struct termp *p, const char *cp)
+{
+ struct roffsu su;
+
+ if ( ! a2roffsu(cp, &su, SCALE_VS))
+ SCALE_VS_INIT(&su, atoi(cp));
+
+ return(term_vspan(p, &su));
+}
+
+
+static int
+a2width(const struct termp *p, const char *cp)
+{
+ struct roffsu su;
+
+ if ( ! a2roffsu(cp, &su, SCALE_BU))
+ return(-1);
+
+ return((int)term_hspan(p, &su));
+}
+
+/*
+ * Printing leading vertical space before a block.
+ * This is used for the paragraph macros.
+ * The rules are pretty simple, since there's very little nesting going
+ * on here. Basically, if we're the first within another block (SS/SH),
+ * then don't emit vertical space. If we are (RS), then do. If not the
+ * first, print it.
+ */
+static void
+print_bvspace(struct termp *p, const struct man_node *n)
+{
+
+ term_newln(p);
+
+ if (n->body && n->body->child)
+ if (MAN_TBL == n->body->child->type)
+ return;
+
+ if (MAN_ROOT == n->parent->type || MAN_RS != n->parent->tok)
+ if (NULL == n->prev)
+ return;
+
+ term_vspace(p);
+}
+
+/* ARGSUSED */
+static int
+pre_ign(DECL_ARGS)
+{
+
+ return(0);
+}
+
+
+/* ARGSUSED */
+static int
+pre_I(DECL_ARGS)
+{
+
+ term_fontrepl(p, TERMFONT_UNDER);
+ return(1);
+}
+
+
+/* ARGSUSED */
+static int
+pre_literal(DECL_ARGS)
+{
+
+ term_newln(p);
+
+ if (MAN_nf == n->tok)
+ mt->fl |= MANT_LITERAL;
+ else
+ mt->fl &= ~MANT_LITERAL;
+
+ /*
+ * Unlike .IP and .TP, .HP does not have a HEAD.
+ * So in case a second call to term_flushln() is needed,
+ * indentation has to be set up explicitly.
+ */
+ if (MAN_HP == n->parent->tok && p->rmargin < p->maxrmargin) {
+ p->offset = p->rmargin;
+ p->rmargin = p->maxrmargin;
+ p->flags &= ~(TERMP_NOBREAK | TERMP_TWOSPACE);
+ p->flags |= TERMP_NOSPACE;
+ }
+
+ return(0);
+}
+
+/* ARGSUSED */
+static int
+pre_alternate(DECL_ARGS)
+{
+ enum termfont font[2];
+ const struct man_node *nn;
+ int savelit, i;
+
+ switch (n->tok) {
+ case (MAN_RB):
+ font[0] = TERMFONT_NONE;
+ font[1] = TERMFONT_BOLD;
+ break;
+ case (MAN_RI):
+ font[0] = TERMFONT_NONE;
+ font[1] = TERMFONT_UNDER;
+ break;
+ case (MAN_BR):
+ font[0] = TERMFONT_BOLD;
+ font[1] = TERMFONT_NONE;
+ break;
+ case (MAN_BI):
+ font[0] = TERMFONT_BOLD;
+ font[1] = TERMFONT_UNDER;
+ break;
+ case (MAN_IR):
+ font[0] = TERMFONT_UNDER;
+ font[1] = TERMFONT_NONE;
+ break;
+ case (MAN_IB):
+ font[0] = TERMFONT_UNDER;
+ font[1] = TERMFONT_BOLD;
+ break;
+ default:
+ abort();
+ }
+
+ savelit = MANT_LITERAL & mt->fl;
+ mt->fl &= ~MANT_LITERAL;
+
+ for (i = 0, nn = n->child; nn; nn = nn->next, i = 1 - i) {
+ term_fontrepl(p, font[i]);
+ if (savelit && NULL == nn->next)
+ mt->fl |= MANT_LITERAL;
+ print_man_node(p, mt, nn, m);
+ if (nn->next)
+ p->flags |= TERMP_NOSPACE;
+ }
+
+ return(0);
+}
+
+/* ARGSUSED */
+static int
+pre_B(DECL_ARGS)
+{
+
+ term_fontrepl(p, TERMFONT_BOLD);
+ return(1);
+}
+
+/* ARGSUSED */
+static int
+pre_OP(DECL_ARGS)
+{
+
+ term_word(p, "[");
+ p->flags |= TERMP_NOSPACE;
+
+ if (NULL != (n = n->child)) {
+ term_fontrepl(p, TERMFONT_BOLD);
+ term_word(p, n->string);
+ }
+ if (NULL != n && NULL != n->next) {
+ term_fontrepl(p, TERMFONT_UNDER);
+ term_word(p, n->next->string);
+ }
+
+ term_fontrepl(p, TERMFONT_NONE);
+ p->flags |= TERMP_NOSPACE;
+ term_word(p, "]");
+ return(0);
+}
+
+/* ARGSUSED */
+static int
+pre_ft(DECL_ARGS)
+{
+ const char *cp;
+
+ if (NULL == n->child) {
+ term_fontlast(p);
+ return(0);
+ }
+
+ cp = n->child->string;
+ switch (*cp) {
+ case ('4'):
+ /* FALLTHROUGH */
+ case ('3'):
+ /* FALLTHROUGH */
+ case ('B'):
+ term_fontrepl(p, TERMFONT_BOLD);
+ break;
+ case ('2'):
+ /* FALLTHROUGH */
+ case ('I'):
+ term_fontrepl(p, TERMFONT_UNDER);
+ break;
+ case ('P'):
+ term_fontlast(p);
+ break;
+ case ('1'):
+ /* FALLTHROUGH */
+ case ('C'):
+ /* FALLTHROUGH */
+ case ('R'):
+ term_fontrepl(p, TERMFONT_NONE);
+ break;
+ default:
+ break;
+ }
+ return(0);
+}
+
+/* ARGSUSED */
+static int
+pre_in(DECL_ARGS)
+{
+ int len, less;
+ size_t v;
+ const char *cp;
+
+ term_newln(p);
+
+ if (NULL == n->child) {
+ p->offset = mt->offset;
+ return(0);
+ }
+
+ cp = n->child->string;
+ less = 0;
+
+ if ('-' == *cp)
+ less = -1;
+ else if ('+' == *cp)
+ less = 1;
+ else
+ cp--;
+
+ if ((len = a2width(p, ++cp)) < 0)
+ return(0);
+
+ v = (size_t)len;
+
+ if (less < 0)
+ p->offset -= p->offset > v ? v : p->offset;
+ else if (less > 0)
+ p->offset += v;
+ else
+ p->offset = v;
+
+ /* Don't let this creep beyond the right margin. */
+
+ if (p->offset > p->rmargin)
+ p->offset = p->rmargin;
+
+ return(0);
+}
+
+
+/* ARGSUSED */
+static int
+pre_sp(DECL_ARGS)
+{
+ size_t i, len;
+
+ if ((NULL == n->prev && n->parent)) {
+ if (MAN_SS == n->parent->tok)
+ return(0);
+ if (MAN_SH == n->parent->tok)
+ return(0);
+ }
+
+ switch (n->tok) {
+ case (MAN_br):
+ len = 0;
+ break;
+ default:
+ len = n->child ? a2height(p, n->child->string) : 1;
+ break;
+ }
+
+ if (0 == len)
+ term_newln(p);
+ for (i = 0; i < len; i++)
+ term_vspace(p);
+
+ return(0);
+}
+
+
+/* ARGSUSED */
+static int
+pre_HP(DECL_ARGS)
+{
+ size_t len, one;
+ int ival;
+ const struct man_node *nn;
+
+ switch (n->type) {
+ case (MAN_BLOCK):
+ print_bvspace(p, n);
+ return(1);
+ case (MAN_BODY):
+ p->flags |= TERMP_NOBREAK;
+ p->flags |= TERMP_TWOSPACE;
+ break;
+ default:
+ return(0);
+ }
+
+ len = mt->lmargin[mt->lmargincur];
+ ival = -1;
+
+ /* Calculate offset. */
+
+ if (NULL != (nn = n->parent->head->child))
+ if ((ival = a2width(p, nn->string)) >= 0)
+ len = (size_t)ival;
+
+ one = term_len(p, 1);
+ if (len < one)
+ len = one;
+
+ p->offset = mt->offset;
+ p->rmargin = mt->offset + len;
+
+ if (ival >= 0)
+ mt->lmargin[mt->lmargincur] = (size_t)ival;
+
+ return(1);
+}
+
+
+/* ARGSUSED */
+static void
+post_HP(DECL_ARGS)
+{
+
+ switch (n->type) {
+ case (MAN_BLOCK):
+ term_flushln(p);
+ break;
+ case (MAN_BODY):
+ term_flushln(p);
+ p->flags &= ~TERMP_NOBREAK;
+ p->flags &= ~TERMP_TWOSPACE;
+ p->offset = mt->offset;
+ p->rmargin = p->maxrmargin;
+ break;
+ default:
+ break;
+ }
+}
+
+
+/* ARGSUSED */
+static int
+pre_PP(DECL_ARGS)
+{
+
+ switch (n->type) {
+ case (MAN_BLOCK):
+ mt->lmargin[mt->lmargincur] = term_len(p, p->defindent);
+ print_bvspace(p, n);
+ break;
+ default:
+ p->offset = mt->offset;
+ break;
+ }
+
+ return(MAN_HEAD != n->type);
+}
+
+
+/* ARGSUSED */
+static int
+pre_IP(DECL_ARGS)
+{
+ const struct man_node *nn;
+ size_t len;
+ int savelit, ival;
+
+ switch (n->type) {
+ case (MAN_BODY):
+ p->flags |= TERMP_NOSPACE;
+ break;
+ case (MAN_HEAD):
+ p->flags |= TERMP_NOBREAK;
+ break;
+ case (MAN_BLOCK):
+ print_bvspace(p, n);
+ /* FALLTHROUGH */
+ default:
+ return(1);
+ }
+
+ len = mt->lmargin[mt->lmargincur];
+ ival = -1;
+
+ /* Calculate the offset from the optional second argument. */
+ if (NULL != (nn = n->parent->head->child))
+ if (NULL != (nn = nn->next))
+ if ((ival = a2width(p, nn->string)) >= 0)
+ len = (size_t)ival;
+
+ switch (n->type) {
+ case (MAN_HEAD):
+ /* Handle zero-width lengths. */
+ if (0 == len)
+ len = term_len(p, 1);
+
+ p->offset = mt->offset;
+ p->rmargin = mt->offset + len;
+ if (ival < 0)
+ break;
+
+ /* Set the saved left-margin. */
+ mt->lmargin[mt->lmargincur] = (size_t)ival;
+
+ savelit = MANT_LITERAL & mt->fl;
+ mt->fl &= ~MANT_LITERAL;
+
+ if (n->child)
+ print_man_node(p, mt, n->child, m);
+
+ if (savelit)
+ mt->fl |= MANT_LITERAL;
+
+ return(0);
+ case (MAN_BODY):
+ p->offset = mt->offset + len;
+ p->rmargin = p->maxrmargin;
+ break;
+ default:
+ break;
+ }
+
+ return(1);
+}
+
+
+/* ARGSUSED */
+static void
+post_IP(DECL_ARGS)
+{
+
+ switch (n->type) {
+ case (MAN_HEAD):
+ term_flushln(p);
+ p->flags &= ~TERMP_NOBREAK;
+ p->rmargin = p->maxrmargin;
+ break;
+ case (MAN_BODY):
+ term_newln(p);
+ break;
+ default:
+ break;
+ }
+}
+
+
+/* ARGSUSED */
+static int
+pre_TP(DECL_ARGS)
+{
+ const struct man_node *nn;
+ size_t len;
+ int savelit, ival;
+
+ switch (n->type) {
+ case (MAN_HEAD):
+ p->flags |= TERMP_NOBREAK;
+ break;
+ case (MAN_BODY):
+ p->flags |= TERMP_NOSPACE;
+ break;
+ case (MAN_BLOCK):
+ print_bvspace(p, n);
+ /* FALLTHROUGH */
+ default:
+ return(1);
+ }
+
+ len = (size_t)mt->lmargin[mt->lmargincur];
+ ival = -1;
+
+ /* Calculate offset. */
+
+ if (NULL != (nn = n->parent->head->child))
+ if (nn->string && nn->parent->line == nn->line)
+ if ((ival = a2width(p, nn->string)) >= 0)
+ len = (size_t)ival;
+
+ switch (n->type) {
+ case (MAN_HEAD):
+ /* Handle zero-length properly. */
+ if (0 == len)
+ len = term_len(p, 1);
+
+ p->offset = mt->offset;
+ p->rmargin = mt->offset + len;
+
+ savelit = MANT_LITERAL & mt->fl;
+ mt->fl &= ~MANT_LITERAL;
+
+ /* Don't print same-line elements. */
+ for (nn = n->child; nn; nn = nn->next)
+ if (nn->line > n->line)
+ print_man_node(p, mt, nn, m);
+
+ if (savelit)
+ mt->fl |= MANT_LITERAL;
+ if (ival >= 0)
+ mt->lmargin[mt->lmargincur] = (size_t)ival;
+
+ return(0);
+ case (MAN_BODY):
+ p->offset = mt->offset + len;
+ p->rmargin = p->maxrmargin;
+ break;
+ default:
+ break;
+ }
+
+ return(1);
+}
+
+
+/* ARGSUSED */
+static void
+post_TP(DECL_ARGS)
+{
+
+ switch (n->type) {
+ case (MAN_HEAD):
+ term_flushln(p);
+ p->flags &= ~TERMP_NOBREAK;
+ p->flags &= ~TERMP_TWOSPACE;
+ p->rmargin = p->maxrmargin;
+ break;
+ case (MAN_BODY):
+ term_newln(p);
+ break;
+ default:
+ break;
+ }
+}
+
+
+/* ARGSUSED */
+static int
+pre_SS(DECL_ARGS)
+{
+
+ switch (n->type) {
+ case (MAN_BLOCK):
+ mt->fl &= ~MANT_LITERAL;
+ mt->lmargin[mt->lmargincur] = term_len(p, p->defindent);
+ mt->offset = term_len(p, p->defindent);
+ /* If following a prior empty `SS', no vspace. */
+ if (n->prev && MAN_SS == n->prev->tok)
+ if (NULL == n->prev->body->child)
+ break;
+ if (NULL == n->prev)
+ break;
+ term_vspace(p);
+ break;
+ case (MAN_HEAD):
+ term_fontrepl(p, TERMFONT_BOLD);
+ p->offset = term_len(p, p->defindent/2);
+ break;
+ case (MAN_BODY):
+ p->offset = mt->offset;
+ break;
+ default:
+ break;
+ }
+
+ return(1);
+}
+
+
+/* ARGSUSED */
+static void
+post_SS(DECL_ARGS)
+{
+
+ switch (n->type) {
+ case (MAN_HEAD):
+ term_newln(p);
+ break;
+ case (MAN_BODY):
+ term_newln(p);
+ break;
+ default:
+ break;
+ }
+}
+
+
+/* ARGSUSED */
+static int
+pre_SH(DECL_ARGS)
+{
+
+ switch (n->type) {
+ case (MAN_BLOCK):
+ mt->fl &= ~MANT_LITERAL;
+ mt->lmargin[mt->lmargincur] = term_len(p, p->defindent);
+ mt->offset = term_len(p, p->defindent);
+ /* If following a prior empty `SH', no vspace. */
+ if (n->prev && MAN_SH == n->prev->tok)
+ if (NULL == n->prev->body->child)
+ break;
+ /* If the first macro, no vspae. */
+ if (NULL == n->prev)
+ break;
+ term_vspace(p);
+ break;
+ case (MAN_HEAD):
+ term_fontrepl(p, TERMFONT_BOLD);
+ p->offset = 0;
+ break;
+ case (MAN_BODY):
+ p->offset = mt->offset;
+ break;
+ default:
+ break;
+ }
+
+ return(1);
+}
+
+
+/* ARGSUSED */
+static void
+post_SH(DECL_ARGS)
+{
+
+ switch (n->type) {
+ case (MAN_HEAD):
+ term_newln(p);
+ break;
+ case (MAN_BODY):
+ term_newln(p);
+ break;
+ default:
+ break;
+ }
+}
+
+/* ARGSUSED */
+static int
+pre_RS(DECL_ARGS)
+{
+ int ival;
+ size_t sz;
+
+ switch (n->type) {
+ case (MAN_BLOCK):
+ term_newln(p);
+ return(1);
+ case (MAN_HEAD):
+ return(0);
+ default:
+ break;
+ }
+
+ sz = term_len(p, p->defindent);
+
+ if (NULL != (n = n->parent->head->child))
+ if ((ival = a2width(p, n->string)) >= 0)
+ sz = (size_t)ival;
+
+ mt->offset += sz;
+ p->rmargin = p->maxrmargin;
+ p->offset = mt->offset < p->rmargin ? mt->offset : p->rmargin;
+
+ if (++mt->lmarginsz < MAXMARGINS)
+ mt->lmargincur = mt->lmarginsz;
+
+ mt->lmargin[mt->lmargincur] = mt->lmargin[mt->lmargincur - 1];
+ return(1);
+}
+
+/* ARGSUSED */
+static void
+post_RS(DECL_ARGS)
+{
+ int ival;
+ size_t sz;
+
+ switch (n->type) {
+ case (MAN_BLOCK):
+ return;
+ case (MAN_HEAD):
+ return;
+ default:
+ term_newln(p);
+ break;
+ }
+
+ sz = term_len(p, p->defindent);
+
+ if (NULL != (n = n->parent->head->child))
+ if ((ival = a2width(p, n->string)) >= 0)
+ sz = (size_t)ival;
+
+ mt->offset = mt->offset < sz ? 0 : mt->offset - sz;
+ p->offset = mt->offset;
+
+ if (--mt->lmarginsz < MAXMARGINS)
+ mt->lmargincur = mt->lmarginsz;
+}
+
+static void
+print_man_node(DECL_ARGS)
+{
+ size_t rm, rmax;
+ int c;
+
+ switch (n->type) {
+ case(MAN_TEXT):
+ /*
+ * If we have a blank line, output a vertical space.
+ * If we have a space as the first character, break
+ * before printing the line's data.
+ */
+ if ('\0' == *n->string) {
+ term_vspace(p);
+ return;
+ } else if (' ' == *n->string && MAN_LINE & n->flags)
+ term_newln(p);
+
+ term_word(p, n->string);
+
+ /*
+ * If we're in a literal context, make sure that words
+ * togehter on the same line stay together. This is a
+ * POST-printing call, so we check the NEXT word. Since
+ * -man doesn't have nested macros, we don't need to be
+ * more specific than this.
+ */
+ if (MANT_LITERAL & mt->fl && ! (TERMP_NOBREAK & p->flags) &&
+ (NULL == n->next ||
+ n->next->line > n->line)) {
+ rm = p->rmargin;
+ rmax = p->maxrmargin;
+ p->rmargin = p->maxrmargin = TERM_MAXMARGIN;
+ p->flags |= TERMP_NOSPACE;
+ term_flushln(p);
+ p->rmargin = rm;
+ p->maxrmargin = rmax;
+ }
+
+ if (MAN_EOS & n->flags)
+ p->flags |= TERMP_SENTENCE;
+ return;
+ case (MAN_EQN):
+ term_eqn(p, n->eqn);
+ return;
+ case (MAN_TBL):
+ /*
+ * Tables are preceded by a newline. Then process a
+ * table line, which will cause line termination,
+ */
+ if (TBL_SPAN_FIRST & n->span->flags)
+ term_newln(p);
+ term_tbl(p, n->span);
+ return;
+ default:
+ break;
+ }
+
+ if ( ! (MAN_NOTEXT & termacts[n->tok].flags))
+ term_fontrepl(p, TERMFONT_NONE);
+
+ c = 1;
+ if (termacts[n->tok].pre)
+ c = (*termacts[n->tok].pre)(p, mt, n, m);
+
+ if (c && n->child)
+ print_man_nodelist(p, mt, n->child, m);
+
+ if (termacts[n->tok].post)
+ (*termacts[n->tok].post)(p, mt, n, m);
+ if ( ! (MAN_NOTEXT & termacts[n->tok].flags))
+ term_fontrepl(p, TERMFONT_NONE);
+
+ if (MAN_EOS & n->flags)
+ p->flags |= TERMP_SENTENCE;
+}
+
+
+static void
+print_man_nodelist(DECL_ARGS)
+{
+
+ print_man_node(p, mt, n, m);
+ if ( ! n->next)
+ return;
+ print_man_nodelist(p, mt, n->next, m);
+}
+
+
+static void
+print_man_foot(struct termp *p, const void *arg)
+{
+ char title[BUFSIZ];
+ size_t datelen;
+ const struct man_meta *meta;
+
+ meta = (const struct man_meta *)arg;
+ assert(meta->title);
+ assert(meta->msec);
+ assert(meta->date);
+
+ term_fontrepl(p, TERMFONT_NONE);
+
+ term_vspace(p);
+
+ /*
+ * Temporary, undocumented option to imitate mdoc(7) output.
+ * In the bottom right corner, use the source instead of
+ * the title.
+ */
+
+ if ( ! p->mdocstyle) {
+ term_vspace(p);
+ term_vspace(p);
+ snprintf(title, BUFSIZ, "%s(%s)", meta->title, meta->msec);
+ } else if (meta->source) {
+ strlcpy(title, meta->source, BUFSIZ);
+ } else {
+ title[0] = '\0';
+ }
+ datelen = term_strlen(p, meta->date);
+
+ /* Bottom left corner: manual source. */
+
+ p->flags |= TERMP_NOSPACE | TERMP_NOBREAK;
+ p->offset = 0;
+ p->rmargin = (p->maxrmargin - datelen + term_len(p, 1)) / 2;
+
+ if (meta->source)
+ term_word(p, meta->source);
+ term_flushln(p);
+
+ /* At the bottom in the middle: manual date. */
+
+ p->flags |= TERMP_NOSPACE;
+ p->offset = p->rmargin;
+ p->rmargin = p->maxrmargin - term_strlen(p, title);
+ if (p->offset + datelen >= p->rmargin)
+ p->rmargin = p->offset + datelen;
+
+ term_word(p, meta->date);
+ term_flushln(p);
+
+ /* Bottom right corner: manual title and section. */
+
+ p->flags &= ~TERMP_NOBREAK;
+ p->flags |= TERMP_NOSPACE;
+ p->offset = p->rmargin;
+ p->rmargin = p->maxrmargin;
+
+ term_word(p, title);
+ term_flushln(p);
+}
+
+
+static void
+print_man_head(struct termp *p, const void *arg)
+{
+ char buf[BUFSIZ], title[BUFSIZ];
+ size_t buflen, titlen;
+ const struct man_meta *m;
+
+ m = (const struct man_meta *)arg;
+ assert(m->title);
+ assert(m->msec);
+
+ if (m->vol)
+ strlcpy(buf, m->vol, BUFSIZ);
+ else
+ buf[0] = '\0';
+ buflen = term_strlen(p, buf);
+
+ /* Top left corner: manual title and section. */
+
+ snprintf(title, BUFSIZ, "%s(%s)", m->title, m->msec);
+ titlen = term_strlen(p, title);
+
+ p->flags |= TERMP_NOBREAK | TERMP_NOSPACE;
+ p->offset = 0;
+ p->rmargin = 2 * (titlen+1) + buflen < p->maxrmargin ?
+ (p->maxrmargin -
+ term_strlen(p, buf) + term_len(p, 1)) / 2 :
+ p->maxrmargin - buflen;
+
+ term_word(p, title);
+ term_flushln(p);
+
+ /* At the top in the middle: manual volume. */
+
+ p->flags |= TERMP_NOSPACE;
+ p->offset = p->rmargin;
+ p->rmargin = p->offset + buflen + titlen < p->maxrmargin ?
+ p->maxrmargin - titlen : p->maxrmargin;
+
+ term_word(p, buf);
+ term_flushln(p);
+
+ /* Top right corner: title and section, again. */
+
+ p->flags &= ~TERMP_NOBREAK;
+ if (p->rmargin + titlen <= p->maxrmargin) {
+ p->flags |= TERMP_NOSPACE;
+ p->offset = p->rmargin;
+ p->rmargin = p->maxrmargin;
+ term_word(p, title);
+ term_flushln(p);
+ }
+
+ p->flags &= ~TERMP_NOSPACE;
+ p->offset = 0;
+ p->rmargin = p->maxrmargin;
+
+ /*
+ * Groff prints three blank lines before the content.
+ * Do the same, except in the temporary, undocumented
+ * mode imitating mdoc(7) output.
+ */
+
+ term_vspace(p);
+ if ( ! p->mdocstyle) {
+ term_vspace(p);
+ term_vspace(p);
+ }
+}
diff --git a/man_validate.c b/man_validate.c
new file mode 100644
index 000000000000..e40b089f53b8
--- /dev/null
+++ b/man_validate.c
@@ -0,0 +1,550 @@
+/* $Id: man_validate.c,v 1.80 2012/01/03 15:16:24 kristaps Exp $ */
+/*
+ * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ * Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/types.h>
+
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include "man.h"
+#include "mandoc.h"
+#include "libman.h"
+#include "libmandoc.h"
+
+#define CHKARGS struct man *m, struct man_node *n
+
+typedef int (*v_check)(CHKARGS);
+
+struct man_valid {
+ v_check *pres;
+ v_check *posts;
+};
+
+static int check_eq0(CHKARGS);
+static int check_eq2(CHKARGS);
+static int check_le1(CHKARGS);
+static int check_ge2(CHKARGS);
+static int check_le5(CHKARGS);
+static int check_par(CHKARGS);
+static int check_part(CHKARGS);
+static int check_root(CHKARGS);
+static void check_text(CHKARGS);
+
+static int post_AT(CHKARGS);
+static int post_vs(CHKARGS);
+static int post_fi(CHKARGS);
+static int post_ft(CHKARGS);
+static int post_nf(CHKARGS);
+static int post_sec(CHKARGS);
+static int post_TH(CHKARGS);
+static int post_UC(CHKARGS);
+static int pre_sec(CHKARGS);
+
+static v_check posts_at[] = { post_AT, NULL };
+static v_check posts_br[] = { post_vs, check_eq0, NULL };
+static v_check posts_eq0[] = { check_eq0, NULL };
+static v_check posts_eq2[] = { check_eq2, NULL };
+static v_check posts_fi[] = { check_eq0, post_fi, NULL };
+static v_check posts_ft[] = { post_ft, NULL };
+static v_check posts_nf[] = { check_eq0, post_nf, NULL };
+static v_check posts_par[] = { check_par, NULL };
+static v_check posts_part[] = { check_part, NULL };
+static v_check posts_sec[] = { post_sec, NULL };
+static v_check posts_sp[] = { post_vs, check_le1, NULL };
+static v_check posts_th[] = { check_ge2, check_le5, post_TH, NULL };
+static v_check posts_uc[] = { post_UC, NULL };
+static v_check pres_sec[] = { pre_sec, NULL };
+
+static const struct man_valid man_valids[MAN_MAX] = {
+ { NULL, posts_br }, /* br */
+ { NULL, posts_th }, /* TH */
+ { pres_sec, posts_sec }, /* SH */
+ { pres_sec, posts_sec }, /* SS */
+ { NULL, NULL }, /* TP */
+ { NULL, posts_par }, /* LP */
+ { NULL, posts_par }, /* PP */
+ { NULL, posts_par }, /* P */
+ { NULL, NULL }, /* IP */
+ { NULL, NULL }, /* HP */
+ { NULL, NULL }, /* SM */
+ { NULL, NULL }, /* SB */
+ { NULL, NULL }, /* BI */
+ { NULL, NULL }, /* IB */
+ { NULL, NULL }, /* BR */
+ { NULL, NULL }, /* RB */
+ { NULL, NULL }, /* R */
+ { NULL, NULL }, /* B */
+ { NULL, NULL }, /* I */
+ { NULL, NULL }, /* IR */
+ { NULL, NULL }, /* RI */
+ { NULL, posts_eq0 }, /* na */
+ { NULL, posts_sp }, /* sp */
+ { NULL, posts_nf }, /* nf */
+ { NULL, posts_fi }, /* fi */
+ { NULL, NULL }, /* RE */
+ { NULL, posts_part }, /* RS */
+ { NULL, NULL }, /* DT */
+ { NULL, posts_uc }, /* UC */
+ { NULL, NULL }, /* PD */
+ { NULL, posts_at }, /* AT */
+ { NULL, NULL }, /* in */
+ { NULL, posts_ft }, /* ft */
+ { NULL, posts_eq2 }, /* OP */
+};
+
+
+int
+man_valid_pre(struct man *m, struct man_node *n)
+{
+ v_check *cp;
+
+ switch (n->type) {
+ case (MAN_TEXT):
+ /* FALLTHROUGH */
+ case (MAN_ROOT):
+ /* FALLTHROUGH */
+ case (MAN_EQN):
+ /* FALLTHROUGH */
+ case (MAN_TBL):
+ return(1);
+ default:
+ break;
+ }
+
+ if (NULL == (cp = man_valids[n->tok].pres))
+ return(1);
+ for ( ; *cp; cp++)
+ if ( ! (*cp)(m, n))
+ return(0);
+ return(1);
+}
+
+
+int
+man_valid_post(struct man *m)
+{
+ v_check *cp;
+
+ if (MAN_VALID & m->last->flags)
+ return(1);
+ m->last->flags |= MAN_VALID;
+
+ switch (m->last->type) {
+ case (MAN_TEXT):
+ check_text(m, m->last);
+ return(1);
+ case (MAN_ROOT):
+ return(check_root(m, m->last));
+ case (MAN_EQN):
+ /* FALLTHROUGH */
+ case (MAN_TBL):
+ return(1);
+ default:
+ break;
+ }
+
+ if (NULL == (cp = man_valids[m->last->tok].posts))
+ return(1);
+ for ( ; *cp; cp++)
+ if ( ! (*cp)(m, m->last))
+ return(0);
+
+ return(1);
+}
+
+
+static int
+check_root(CHKARGS)
+{
+
+ if (MAN_BLINE & m->flags)
+ man_nmsg(m, n, MANDOCERR_SCOPEEXIT);
+ else if (MAN_ELINE & m->flags)
+ man_nmsg(m, n, MANDOCERR_SCOPEEXIT);
+
+ m->flags &= ~MAN_BLINE;
+ m->flags &= ~MAN_ELINE;
+
+ if (NULL == m->first->child) {
+ man_nmsg(m, n, MANDOCERR_NODOCBODY);
+ return(0);
+ } else if (NULL == m->meta.title) {
+ man_nmsg(m, n, MANDOCERR_NOTITLE);
+
+ /*
+ * If a title hasn't been set, do so now (by
+ * implication, date and section also aren't set).
+ */
+
+ m->meta.title = mandoc_strdup("unknown");
+ m->meta.msec = mandoc_strdup("1");
+ m->meta.date = mandoc_normdate
+ (m->parse, NULL, n->line, n->pos);
+ }
+
+ return(1);
+}
+
+static void
+check_text(CHKARGS)
+{
+ char *cp, *p;
+
+ if (MAN_LITERAL & m->flags)
+ return;
+
+ cp = n->string;
+ for (p = cp; NULL != (p = strchr(p, '\t')); p++)
+ man_pmsg(m, n->line, (int)(p - cp), MANDOCERR_BADTAB);
+}
+
+#define INEQ_DEFINE(x, ineq, name) \
+static int \
+check_##name(CHKARGS) \
+{ \
+ if (n->nchild ineq (x)) \
+ return(1); \
+ mandoc_vmsg(MANDOCERR_ARGCOUNT, m->parse, n->line, n->pos, \
+ "line arguments %s %d (have %d)", \
+ #ineq, (x), n->nchild); \
+ return(1); \
+}
+
+INEQ_DEFINE(0, ==, eq0)
+INEQ_DEFINE(2, ==, eq2)
+INEQ_DEFINE(1, <=, le1)
+INEQ_DEFINE(2, >=, ge2)
+INEQ_DEFINE(5, <=, le5)
+
+static int
+post_ft(CHKARGS)
+{
+ char *cp;
+ int ok;
+
+ if (0 == n->nchild)
+ return(1);
+
+ ok = 0;
+ cp = n->child->string;
+ switch (*cp) {
+ case ('1'):
+ /* FALLTHROUGH */
+ case ('2'):
+ /* FALLTHROUGH */
+ case ('3'):
+ /* FALLTHROUGH */
+ case ('4'):
+ /* FALLTHROUGH */
+ case ('I'):
+ /* FALLTHROUGH */
+ case ('P'):
+ /* FALLTHROUGH */
+ case ('R'):
+ if ('\0' == cp[1])
+ ok = 1;
+ break;
+ case ('B'):
+ if ('\0' == cp[1] || ('I' == cp[1] && '\0' == cp[2]))
+ ok = 1;
+ break;
+ case ('C'):
+ if ('W' == cp[1] && '\0' == cp[2])
+ ok = 1;
+ break;
+ default:
+ break;
+ }
+
+ if (0 == ok) {
+ mandoc_vmsg
+ (MANDOCERR_BADFONT, m->parse,
+ n->line, n->pos, "%s", cp);
+ *cp = '\0';
+ }
+
+ if (1 < n->nchild)
+ mandoc_vmsg
+ (MANDOCERR_ARGCOUNT, m->parse, n->line,
+ n->pos, "want one child (have %d)",
+ n->nchild);
+
+ return(1);
+}
+
+static int
+pre_sec(CHKARGS)
+{
+
+ if (MAN_BLOCK == n->type)
+ m->flags &= ~MAN_LITERAL;
+ return(1);
+}
+
+static int
+post_sec(CHKARGS)
+{
+
+ if ( ! (MAN_HEAD == n->type && 0 == n->nchild))
+ return(1);
+
+ man_nmsg(m, n, MANDOCERR_SYNTARGCOUNT);
+ return(0);
+}
+
+static int
+check_part(CHKARGS)
+{
+
+ if (MAN_BODY == n->type && 0 == n->nchild)
+ mandoc_msg(MANDOCERR_ARGCWARN, m->parse, n->line,
+ n->pos, "want children (have none)");
+
+ return(1);
+}
+
+
+static int
+check_par(CHKARGS)
+{
+
+ switch (n->type) {
+ case (MAN_BLOCK):
+ if (0 == n->body->nchild)
+ man_node_delete(m, n);
+ break;
+ case (MAN_BODY):
+ if (0 == n->nchild)
+ man_nmsg(m, n, MANDOCERR_IGNPAR);
+ break;
+ case (MAN_HEAD):
+ if (n->nchild)
+ man_nmsg(m, n, MANDOCERR_ARGSLOST);
+ break;
+ default:
+ break;
+ }
+
+ return(1);
+}
+
+
+static int
+post_TH(CHKARGS)
+{
+ const char *p;
+ int line, pos;
+
+ if (m->meta.title)
+ free(m->meta.title);
+ if (m->meta.vol)
+ free(m->meta.vol);
+ if (m->meta.source)
+ free(m->meta.source);
+ if (m->meta.msec)
+ free(m->meta.msec);
+ if (m->meta.date)
+ free(m->meta.date);
+
+ line = n->line;
+ pos = n->pos;
+ m->meta.title = m->meta.vol = m->meta.date =
+ m->meta.msec = m->meta.source = NULL;
+
+ /* ->TITLE<- MSEC DATE SOURCE VOL */
+
+ n = n->child;
+ if (n && n->string) {
+ for (p = n->string; '\0' != *p; p++) {
+ /* Only warn about this once... */
+ if (isalpha((unsigned char)*p) &&
+ ! isupper((unsigned char)*p)) {
+ man_nmsg(m, n, MANDOCERR_UPPERCASE);
+ break;
+ }
+ }
+ m->meta.title = mandoc_strdup(n->string);
+ } else
+ m->meta.title = mandoc_strdup("");
+
+ /* TITLE ->MSEC<- DATE SOURCE VOL */
+
+ if (n)
+ n = n->next;
+ if (n && n->string)
+ m->meta.msec = mandoc_strdup(n->string);
+ else
+ m->meta.msec = mandoc_strdup("");
+
+ /* TITLE MSEC ->DATE<- SOURCE VOL */
+
+ if (n)
+ n = n->next;
+ if (n && n->string && '\0' != n->string[0]) {
+ pos = n->pos;
+ m->meta.date = mandoc_normdate
+ (m->parse, n->string, line, pos);
+ } else
+ m->meta.date = mandoc_strdup("");
+
+ /* TITLE MSEC DATE ->SOURCE<- VOL */
+
+ if (n && (n = n->next))
+ m->meta.source = mandoc_strdup(n->string);
+
+ /* TITLE MSEC DATE SOURCE ->VOL<- */
+ /* If missing, use the default VOL name for MSEC. */
+
+ if (n && (n = n->next))
+ m->meta.vol = mandoc_strdup(n->string);
+ else if ('\0' != m->meta.msec[0] &&
+ (NULL != (p = mandoc_a2msec(m->meta.msec))))
+ m->meta.vol = mandoc_strdup(p);
+
+ /*
+ * Remove the `TH' node after we've processed it for our
+ * meta-data.
+ */
+ man_node_delete(m, m->last);
+ return(1);
+}
+
+static int
+post_nf(CHKARGS)
+{
+
+ if (MAN_LITERAL & m->flags)
+ man_nmsg(m, n, MANDOCERR_SCOPEREP);
+
+ m->flags |= MAN_LITERAL;
+ return(1);
+}
+
+static int
+post_fi(CHKARGS)
+{
+
+ if ( ! (MAN_LITERAL & m->flags))
+ man_nmsg(m, n, MANDOCERR_WNOSCOPE);
+
+ m->flags &= ~MAN_LITERAL;
+ return(1);
+}
+
+static int
+post_UC(CHKARGS)
+{
+ static const char * const bsd_versions[] = {
+ "3rd Berkeley Distribution",
+ "4th Berkeley Distribution",
+ "4.2 Berkeley Distribution",
+ "4.3 Berkeley Distribution",
+ "4.4 Berkeley Distribution",
+ };
+
+ const char *p, *s;
+
+ n = n->child;
+
+ if (NULL == n || MAN_TEXT != n->type)
+ p = bsd_versions[0];
+ else {
+ s = n->string;
+ if (0 == strcmp(s, "3"))
+ p = bsd_versions[0];
+ else if (0 == strcmp(s, "4"))
+ p = bsd_versions[1];
+ else if (0 == strcmp(s, "5"))
+ p = bsd_versions[2];
+ else if (0 == strcmp(s, "6"))
+ p = bsd_versions[3];
+ else if (0 == strcmp(s, "7"))
+ p = bsd_versions[4];
+ else
+ p = bsd_versions[0];
+ }
+
+ if (m->meta.source)
+ free(m->meta.source);
+
+ m->meta.source = mandoc_strdup(p);
+ return(1);
+}
+
+static int
+post_AT(CHKARGS)
+{
+ static const char * const unix_versions[] = {
+ "7th Edition",
+ "System III",
+ "System V",
+ "System V Release 2",
+ };
+
+ const char *p, *s;
+ struct man_node *nn;
+
+ n = n->child;
+
+ if (NULL == n || MAN_TEXT != n->type)
+ p = unix_versions[0];
+ else {
+ s = n->string;
+ if (0 == strcmp(s, "3"))
+ p = unix_versions[0];
+ else if (0 == strcmp(s, "4"))
+ p = unix_versions[1];
+ else if (0 == strcmp(s, "5")) {
+ nn = n->next;
+ if (nn && MAN_TEXT == nn->type && nn->string[0])
+ p = unix_versions[3];
+ else
+ p = unix_versions[2];
+ } else
+ p = unix_versions[0];
+ }
+
+ if (m->meta.source)
+ free(m->meta.source);
+
+ m->meta.source = mandoc_strdup(p);
+ return(1);
+}
+
+static int
+post_vs(CHKARGS)
+{
+
+ /*
+ * Don't warn about this because it occurs in pod2man and would
+ * cause considerable (unfixable) warnage.
+ */
+ if (NULL == n->prev && MAN_ROOT == n->parent->type)
+ man_node_delete(m, n);
+
+ return(1);
+}
diff --git a/mandoc.1 b/mandoc.1
new file mode 100644
index 000000000000..dbff0e31caa3
--- /dev/null
+++ b/mandoc.1
@@ -0,0 +1,669 @@
+.\" $Id: mandoc.1,v 1.100 2011/12/25 19:35:44 kristaps Exp $
+.\"
+.\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+.\"
+.\" Permission to use, copy, modify, and distribute this software for any
+.\" purpose with or without fee is hereby granted, provided that the above
+.\" copyright notice and this permission notice appear in all copies.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+.\"
+.Dd $Mdocdate: December 25 2011 $
+.Dt MANDOC 1
+.Os
+.Sh NAME
+.Nm mandoc
+.Nd format and display UNIX manuals
+.Sh SYNOPSIS
+.Nm mandoc
+.Op Fl V
+.Op Fl m Ns Ar format
+.Op Fl O Ns Ar option
+.Op Fl T Ns Ar output
+.Op Fl W Ns Ar level
+.Op Ar
+.Sh DESCRIPTION
+The
+.Nm
+utility formats
+.Ux
+manual pages for display.
+.Pp
+By default,
+.Nm
+reads
+.Xr mdoc 7
+or
+.Xr man 7
+text from stdin, implying
+.Fl m Ns Cm andoc ,
+and produces
+.Fl T Ns Cm ascii
+output.
+.Pp
+The arguments are as follows:
+.Bl -tag -width Ds
+.It Fl m Ns Ar format
+Input format.
+See
+.Sx Input Formats
+for available formats.
+Defaults to
+.Fl m Ns Cm andoc .
+.It Fl O Ns Ar option
+Comma-separated output options.
+.It Fl T Ns Ar output
+Output format.
+See
+.Sx Output Formats
+for available formats.
+Defaults to
+.Fl T Ns Cm ascii .
+.It Fl V
+Print version and exit.
+.It Fl W Ns Ar level
+Specify the minimum message
+.Ar level
+to be reported on the standard error output and to affect the exit status.
+The
+.Ar level
+can be
+.Cm warning ,
+.Cm error ,
+or
+.Cm fatal .
+The default is
+.Fl W Ns Cm fatal ;
+.Fl W Ns Cm all
+is an alias for
+.Fl W Ns Cm warning .
+See
+.Sx EXIT STATUS
+and
+.Sx DIAGNOSTICS
+for details.
+.Pp
+The special option
+.Fl W Ns Cm stop
+tells
+.Nm
+to exit after parsing a file that causes warnings or errors of at least
+the requested level.
+No formatted output will be produced from that file.
+If both a
+.Ar level
+and
+.Cm stop
+are requested, they can be joined with a comma, for example
+.Fl W Ns Cm error , Ns Cm stop .
+.It Ar file
+Read input from zero or more files.
+If unspecified, reads from stdin.
+If multiple files are specified,
+.Nm
+will halt with the first failed parse.
+.El
+.Ss Input Formats
+The
+.Nm
+utility accepts
+.Xr mdoc 7
+and
+.Xr man 7
+input with
+.Fl m Ns Cm doc
+and
+.Fl m Ns Cm an ,
+respectively.
+The
+.Xr mdoc 7
+format is
+.Em strongly
+recommended;
+.Xr man 7
+should only be used for legacy manuals.
+.Pp
+A third option,
+.Fl m Ns Cm andoc ,
+which is also the default, determines encoding on-the-fly: if the first
+non-comment macro is
+.Sq \&Dd
+or
+.Sq \&Dt ,
+the
+.Xr mdoc 7
+parser is used; otherwise, the
+.Xr man 7
+parser is used.
+.Pp
+If multiple
+files are specified with
+.Fl m Ns Cm andoc ,
+each has its file-type determined this way.
+If multiple files are
+specified and
+.Fl m Ns Cm doc
+or
+.Fl m Ns Cm an
+is specified, then this format is used exclusively.
+.Ss Output Formats
+The
+.Nm
+utility accepts the following
+.Fl T
+arguments, which correspond to output modes:
+.Bl -tag -width "-Tlocale"
+.It Fl T Ns Cm ascii
+Produce 7-bit ASCII output.
+This is the default.
+See
+.Sx ASCII Output .
+.It Fl T Ns Cm html
+Produce strict CSS1/HTML-4.01 output.
+See
+.Sx HTML Output .
+.It Fl T Ns Cm lint
+Parse only: produce no output.
+Implies
+.Fl W Ns Cm warning .
+.It Fl T Ns Cm locale
+Encode output using the current locale.
+See
+.Sx Locale Output .
+.It Fl T Ns Cm man
+Produce
+.Xr man 7
+format output.
+See
+.Sx Man Output .
+.It Fl T Ns Cm pdf
+Produce PDF output.
+See
+.Sx PDF Output .
+.It Fl T Ns Cm ps
+Produce PostScript output.
+See
+.Sx PostScript Output .
+.It Fl T Ns Cm tree
+Produce an indented parse tree.
+.It Fl T Ns Cm utf8
+Encode output in the UTF\-8 multi-byte format.
+See
+.Sx UTF\-8 Output .
+.It Fl T Ns Cm xhtml
+Produce strict CSS1/XHTML-1.0 output.
+See
+.Sx XHTML Output .
+.El
+.Pp
+If multiple input files are specified, these will be processed by the
+corresponding filter in-order.
+.Ss ASCII Output
+Output produced by
+.Fl T Ns Cm ascii ,
+which is the default, is rendered in standard 7-bit ASCII documented in
+.Xr ascii 7 .
+.Pp
+Font styles are applied by using back-spaced encoding such that an
+underlined character
+.Sq c
+is rendered as
+.Sq _ Ns \e[bs] Ns c ,
+where
+.Sq \e[bs]
+is the back-space character number 8.
+Emboldened characters are rendered as
+.Sq c Ns \e[bs] Ns c .
+.Pp
+The special characters documented in
+.Xr mandoc_char 7
+are rendered best-effort in an ASCII equivalent.
+If no equivalent is found,
+.Sq \&?
+is used instead.
+.Pp
+Output width is limited to 78 visible columns unless literal input lines
+exceed this limit.
+.Pp
+The following
+.Fl O
+arguments are accepted:
+.Bl -tag -width Ds
+.It Cm indent Ns = Ns Ar indent
+The left margin for normal text is set to
+.Ar indent
+blank characters instead of the default of five for
+.Xr mdoc 7
+and seven for
+.Xr man 7 .
+Increasing this is not recommended; it may result in degraded formatting,
+for example overfull lines or ugly line breaks.
+.It Cm width Ns = Ns Ar width
+The output width is set to
+.Ar width ,
+which will normalise to \(>=60.
+.El
+.Ss HTML Output
+Output produced by
+.Fl T Ns Cm html
+conforms to HTML-4.01 strict.
+.Pp
+The
+.Pa example.style.css
+file documents style-sheet classes available for customising output.
+If a style-sheet is not specified with
+.Fl O Ns Ar style ,
+.Fl T Ns Cm html
+defaults to simple output readable in any graphical or text-based web
+browser.
+.Pp
+Special characters are rendered in decimal-encoded UTF\-8.
+.Pp
+The following
+.Fl O
+arguments are accepted:
+.Bl -tag -width Ds
+.It Cm fragment
+Omit the
+.Aq !DOCTYPE
+declaration and the
+.Aq html ,
+.Aq head ,
+and
+.Aq body
+elements and only emit the subtree below the
+.Aq body
+element.
+The
+.Cm style
+argument will be ignored.
+This is useful when embedding manual content within existing documents.
+.It Cm includes Ns = Ns Ar fmt
+The string
+.Ar fmt ,
+for example,
+.Ar ../src/%I.html ,
+is used as a template for linked header files (usually via the
+.Sq \&In
+macro).
+Instances of
+.Sq \&%I
+are replaced with the include filename.
+The default is not to present a
+hyperlink.
+.It Cm man Ns = Ns Ar fmt
+The string
+.Ar fmt ,
+for example,
+.Ar ../html%S/%N.%S.html ,
+is used as a template for linked manuals (usually via the
+.Sq \&Xr
+macro).
+Instances of
+.Sq \&%N
+and
+.Sq %S
+are replaced with the linked manual's name and section, respectively.
+If no section is included, section 1 is assumed.
+The default is not to
+present a hyperlink.
+.It Cm style Ns = Ns Ar style.css
+The file
+.Ar style.css
+is used for an external style-sheet.
+This must be a valid absolute or
+relative URI.
+.El
+.Ss Locale Output
+Locale-depending output encoding is triggered with
+.Fl T Ns Cm locale .
+This option is not available on all systems: systems without locale
+support, or those whose internal representation is not natively UCS-4,
+will fall back to
+.Fl T Ns Cm ascii .
+See
+.Sx ASCII Output
+for font style specification and available command-line arguments.
+.Ss Man Output
+Translate input format into
+.Xr man 7
+output format.
+This is useful for distributing manual sources to legancy systems
+lacking
+.Xr mdoc 7
+formatters.
+.Pp
+If
+.Xr mdoc 7
+is passed as input, it is translated into
+.Xr man 7 .
+If the input format is
+.Xr man 7 ,
+the input is copied to the output, expanding any
+.Xr roff 7
+.Sq so
+requests.
+The parser is also run, and as usual, the
+.Fl W
+level controls which
+.Sx DIAGNOSTICS
+are displayed before copying the input to the output.
+.Ss PDF Output
+PDF-1.1 output may be generated by
+.Fl T Ns Cm pdf .
+See
+.Sx PostScript Output
+for
+.Fl O
+arguments and defaults.
+.Ss PostScript Output
+PostScript
+.Qq Adobe-3.0
+Level-2 pages may be generated by
+.Fl T Ns Cm ps .
+Output pages default to letter sized and are rendered in the Times font
+family, 11-point.
+Margins are calculated as 1/9 the page length and width.
+Line-height is 1.4m.
+.Pp
+Special characters are rendered as in
+.Sx ASCII Output .
+.Pp
+The following
+.Fl O
+arguments are accepted:
+.Bl -tag -width Ds
+.It Cm paper Ns = Ns Ar name
+The paper size
+.Ar name
+may be one of
+.Ar a3 ,
+.Ar a4 ,
+.Ar a5 ,
+.Ar legal ,
+or
+.Ar letter .
+You may also manually specify dimensions as
+.Ar NNxNN ,
+width by height in millimetres.
+If an unknown value is encountered,
+.Ar letter
+is used.
+.El
+.Ss UTF\-8 Output
+Use
+.Fl T Ns Cm utf8
+to force a UTF\-8 locale.
+See
+.Sx Locale Output
+for details and options.
+.Ss XHTML Output
+Output produced by
+.Fl T Ns Cm xhtml
+conforms to XHTML-1.0 strict.
+.Pp
+See
+.Sx HTML Output
+for details; beyond generating XHTML tags instead of HTML tags, these
+output modes are identical.
+.Sh EXIT STATUS
+The
+.Nm
+utility exits with one of the following values, controlled by the message
+.Ar level
+associated with the
+.Fl W
+option:
+.Pp
+.Bl -tag -width Ds -compact
+.It 0
+No warnings or errors occurred, or those that did were ignored because
+they were lower than the requested
+.Ar level .
+.It 2
+At least one warning occurred, but no error, and
+.Fl W Ns Cm warning
+was specified.
+.It 3
+At least one parsing error occurred, but no fatal error, and
+.Fl W Ns Cm error
+or
+.Fl W Ns Cm warning
+was specified.
+.It 4
+A fatal parsing error occurred.
+.It 5
+Invalid command line arguments were specified.
+No input files have been read.
+.It 6
+An operating system error occurred, for example memory exhaustion or an
+error accessing input files.
+Such errors cause
+.Nm
+to exit at once, possibly in the middle of parsing or formatting a file.
+.El
+.Pp
+Note that selecting
+.Fl T Ns Cm lint
+output mode implies
+.Fl W Ns Cm warning .
+.Sh EXAMPLES
+To page manuals to the terminal:
+.Pp
+.Dl $ mandoc \-Wall,stop mandoc.1 2\*(Gt&1 | less
+.Dl $ mandoc mandoc.1 mdoc.3 mdoc.7 | less
+.Pp
+To produce HTML manuals with
+.Ar style.css
+as the style-sheet:
+.Pp
+.Dl $ mandoc \-Thtml -Ostyle=style.css mdoc.7 \*(Gt mdoc.7.html
+.Pp
+To check over a large set of manuals:
+.Pp
+.Dl $ mandoc \-Tlint `find /usr/src -name \e*\e.[1-9]`
+.Pp
+To produce a series of PostScript manuals for A4 paper:
+.Pp
+.Dl $ mandoc \-Tps \-Opaper=a4 mdoc.7 man.7 \*(Gt manuals.ps
+.Pp
+Convert a modern
+.Xr mdoc 7
+manual to the older
+.Xr man 7
+format, for use on systems lacking an
+.Xr mdoc 7
+parser:
+.Pp
+.Dl $ mandoc \-Tman foo.mdoc \*(Gt foo.man
+.Sh DIAGNOSTICS
+Standard error messages reporting parsing errors are prefixed by
+.Pp
+.Sm off
+.D1 Ar file : line : column : \ level :
+.Sm on
+.Pp
+where the fields have the following meanings:
+.Bl -tag -width "column"
+.It Ar file
+The name of the input file causing the message.
+.It Ar line
+The line number in that input file.
+Line numbering starts at 1.
+.It Ar column
+The column number in that input file.
+Column numbering starts at 1.
+If the issue is caused by a word, the column number usually
+points to the first character of the word.
+.It Ar level
+The message level, printed in capital letters.
+.El
+.Pp
+Message levels have the following meanings:
+.Bl -tag -width "warning"
+.It Cm fatal
+The parser is unable to parse a given input file at all.
+No formatted output is produced from that input file.
+.It Cm error
+An input file contains syntax that cannot be safely interpreted,
+either because it is invalid or because
+.Nm
+does not implement it yet.
+By discarding part of the input or inserting missing tokens,
+the parser is able to continue, and the error does not prevent
+generation of formatted output, but typically, preparing that
+output involves information loss, broken document structure
+or unintended formatting.
+.It Cm warning
+An input file uses obsolete, discouraged or non-portable syntax.
+All the same, the meaning of the input is unambiguous and a correct
+rendering can be produced.
+Documents causing warnings may render poorly when using other
+formatting tools instead of
+.Nm .
+.El
+.Pp
+Messages of the
+.Cm warning
+and
+.Cm error
+levels are hidden unless their level, or a lower level, is requested using a
+.Fl W
+option or
+.Fl T Ns Cm lint
+output mode.
+.Pp
+The
+.Nm
+utility may also print messages related to invalid command line arguments
+or operating system errors, for example when memory is exhausted or
+input files cannot be read.
+Such messages do not carry the prefix described above.
+.Sh COMPATIBILITY
+This section summarises
+.Nm
+compatibility with GNU troff.
+Each input and output format is separately noted.
+.Ss ASCII Compatibility
+.Bl -bullet -compact
+.It
+Unrenderable unicode codepoints specified with
+.Sq \e[uNNNN]
+escapes are printed as
+.Sq \&?
+in mandoc.
+In GNU troff, these raise an error.
+.It
+The
+.Sq \&Bd \-literal
+and
+.Sq \&Bd \-unfilled
+macros of
+.Xr mdoc 7
+in
+.Fl T Ns Cm ascii
+are synonyms, as are \-filled and \-ragged.
+.It
+In historic GNU troff, the
+.Sq \&Pa
+.Xr mdoc 7
+macro does not underline when scoped under an
+.Sq \&It
+in the FILES section.
+This behaves correctly in
+.Nm .
+.It
+A list or display following the
+.Sq \&Ss
+.Xr mdoc 7
+macro in
+.Fl T Ns Cm ascii
+does not assert a prior vertical break, just as it doesn't with
+.Sq \&Sh .
+.It
+The
+.Sq \&na
+.Xr man 7
+macro in
+.Fl T Ns Cm ascii
+has no effect.
+.It
+Words aren't hyphenated.
+.El
+.Ss HTML/XHTML Compatibility
+.Bl -bullet -compact
+.It
+The
+.Sq \efP
+escape will revert the font to the previous
+.Sq \ef
+escape, not to the last rendered decoration, which is now dictated by
+CSS instead of hard-coded.
+It also will not span past the current scope,
+for the same reason.
+Note that in
+.Sx ASCII Output
+mode, this will work fine.
+.It
+The
+.Xr mdoc 7
+.Sq \&Bl \-hang
+and
+.Sq \&Bl \-tag
+list types render similarly (no break following overreached left-hand
+side) due to the expressive constraints of HTML.
+.It
+The
+.Xr man 7
+.Sq IP
+and
+.Sq TP
+lists render similarly.
+.El
+.Sh SEE ALSO
+.Xr eqn 7 ,
+.Xr man 7 ,
+.Xr mandoc_char 7 ,
+.Xr mdoc 7 ,
+.Xr roff 7 ,
+.Xr tbl 7
+.Sh AUTHORS
+The
+.Nm
+utility was written by
+.An Kristaps Dzonsons ,
+.Mt kristaps@bsd.lv .
+.Sh CAVEATS
+In
+.Fl T Ns Cm html
+and
+.Fl T Ns Cm xhtml ,
+the maximum size of an element attribute is determined by
+.Dv BUFSIZ ,
+which is usually 1024 bytes.
+Be aware of this when setting long link
+formats such as
+.Fl O Ns Cm style Ns = Ns Ar really/long/link .
+.Pp
+Nesting elements within next-line element scopes of
+.Fl m Ns Cm an ,
+such as
+.Sq br
+within an empty
+.Sq B ,
+will confuse
+.Fl T Ns Cm html
+and
+.Fl T Ns Cm xhtml
+and cause them to forget the formatting of the prior next-line scope.
+.Pp
+The
+.Sq \(aq
+control character is an alias for the standard macro control character
+and does not emit a line-break as stipulated in GNU troff.
diff --git a/mandoc.3 b/mandoc.3
new file mode 100644
index 000000000000..4d0b20d6507e
--- /dev/null
+++ b/mandoc.3
@@ -0,0 +1,600 @@
+.\" $Id: mandoc.3,v 1.17 2012/01/13 15:27:14 joerg Exp $
+.\"
+.\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+.\" Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
+.\"
+.\" Permission to use, copy, modify, and distribute this software for any
+.\" purpose with or without fee is hereby granted, provided that the above
+.\" copyright notice and this permission notice appear in all copies.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+.\"
+.Dd $Mdocdate: January 13 2012 $
+.Dt MANDOC 3
+.Os
+.Sh NAME
+.Nm mandoc ,
+.Nm mandoc_escape ,
+.Nm man_meta ,
+.Nm man_mparse ,
+.Nm man_node ,
+.Nm mchars_alloc ,
+.Nm mchars_free ,
+.Nm mchars_num2char ,
+.Nm mchars_num2uc ,
+.Nm mchars_spec2cp ,
+.Nm mchars_spec2str ,
+.Nm mdoc_meta ,
+.Nm mdoc_node ,
+.Nm mparse_alloc ,
+.Nm mparse_free ,
+.Nm mparse_getkeep ,
+.Nm mparse_keep ,
+.Nm mparse_readfd ,
+.Nm mparse_reset ,
+.Nm mparse_result ,
+.Nm mparse_strerror ,
+.Nm mparse_strlevel
+.Nd mandoc macro compiler library
+.Sh LIBRARY
+.Lb mandoc
+.Sh SYNOPSIS
+.In man.h
+.In mdoc.h
+.In mandoc.h
+.Ft "enum mandoc_esc"
+.Fo mandoc_escape
+.Fa "const char **end"
+.Fa "const char **start"
+.Fa "int *sz"
+.Fc
+.Ft "const struct man_meta *"
+.Fo man_meta
+.Fa "const struct man *man"
+.Fc
+.Ft "const struct mparse *"
+.Fo man_mparse
+.Fa "const struct man *man"
+.Fc
+.Ft "const struct man_node *"
+.Fo man_node
+.Fa "const struct man *man"
+.Fc
+.Ft "struct mchars *"
+.Fn mchars_alloc
+.Ft void
+.Fn mchars_free "struct mchars *p"
+.Ft char
+.Fn mchars_num2char "const char *cp" "size_t sz"
+.Ft int
+.Fn mchars_num2uc "const char *cp" "size_t sz"
+.Ft "const char *"
+.Fo mchars_spec2str
+.Fa "const struct mchars *p"
+.Fa "const char *cp"
+.Fa "size_t sz"
+.Fa "size_t *rsz"
+.Fc
+.Ft int
+.Fo mchars_spec2cp
+.Fa "const struct mchars *p"
+.Fa "const char *cp"
+.Fa "size_t sz"
+.Ft "const char *"
+.Fc
+.Ft "const struct mdoc_meta *"
+.Fo mdoc_meta
+.Fa "const struct mdoc *mdoc"
+.Fc
+.Ft "const struct mdoc_node *"
+.Fo mdoc_node
+.Fa "const struct mdoc *mdoc"
+.Fc
+.Ft void
+.Fo mparse_alloc
+.Fa "enum mparset type"
+.Fa "enum mandoclevel wlevel"
+.Fa "mandocmsg msg"
+.Fa "void *msgarg"
+.Fc
+.Ft void
+.Fo mparse_free
+.Fa "struct mparse *parse"
+.Fc
+.Ft void
+.Fo mparse_getkeep
+.Fa "const struct mparse *parse"
+.Fc
+.Ft void
+.Fo mparse_keep
+.Fa "struct mparse *parse"
+.Fc
+.Ft "enum mandoclevel"
+.Fo mparse_readfd
+.Fa "struct mparse *parse"
+.Fa "int fd"
+.Fa "const char *fname"
+.Fc
+.Ft void
+.Fo mparse_reset
+.Fa "struct mparse *parse"
+.Fc
+.Ft void
+.Fo mparse_result
+.Fa "struct mparse *parse"
+.Fa "struct mdoc **mdoc"
+.Fa "struct man **man"
+.Fc
+.Ft "const char *"
+.Fo mparse_strerror
+.Fa "enum mandocerr"
+.Fc
+.Ft "const char *"
+.Fo mparse_strlevel
+.Fa "enum mandoclevel"
+.Fc
+.Vt extern const char * const * man_macronames;
+.Vt extern const char * const * mdoc_argnames;
+.Vt extern const char * const * mdoc_macronames;
+.Fd "#define ASCII_NBRSP"
+.Fd "#define ASCII_HYPH"
+.Sh DESCRIPTION
+The
+.Nm mandoc
+library parses a
+.Ux
+manual into an abstract syntax tree (AST).
+.Ux
+manuals are composed of
+.Xr mdoc 7
+or
+.Xr man 7 ,
+and may be mixed with
+.Xr roff 7 ,
+.Xr tbl 7 ,
+and
+.Xr eqn 7
+invocations.
+.Pp
+The following describes a general parse sequence:
+.Bl -enum
+.It
+initiate a parsing sequence with
+.Fn mparse_alloc ;
+.It
+parse files or file descriptors with
+.Fn mparse_readfd ;
+.It
+retrieve a parsed syntax tree, if the parse was successful, with
+.Fn mparse_result ;
+.It
+iterate over parse nodes with
+.Fn mdoc_node
+or
+.Fn man_node ;
+.It
+free all allocated memory with
+.Fn mparse_free ,
+or invoke
+.Fn mparse_reset
+and parse new files.
+.El
+.Pp
+The
+.Nm
+library also contains routines for translating character strings into glyphs
+.Pq see Fn mchars_alloc
+and parsing escape sequences from strings
+.Pq see Fn mandoc_escape .
+.Sh REFERENCE
+This section documents the functions, types, and variables available
+via
+.In mandoc.h .
+.Ss Types
+.Bl -ohang
+.It Vt "enum mandoc_esc"
+An escape sequence classification.
+.It Vt "enum mandocerr"
+A fatal error, error, or warning message during parsing.
+.It Vt "enum mandoclevel"
+A classification of an
+.Vt "enum mandoclevel"
+as regards system operation.
+.It Vt "struct mchars"
+An opaque pointer to an object allowing for translation between
+character strings and glyphs.
+See
+.Fn mchars_alloc .
+.It Vt "enum mparset"
+The type of parser when reading input.
+This should usually be
+.Dv MPARSE_AUTO
+for auto-detection.
+.It Vt "struct mparse"
+An opaque pointer to a running parse sequence.
+Created with
+.Fn mparse_alloc
+and freed with
+.Fn mparse_free .
+This may be used across parsed input if
+.Fn mparse_reset
+is called between parses.
+.It Vt "mandocmsg"
+A prototype for a function to handle fatal error, error, and warning
+messages emitted by the parser.
+.El
+.Ss Functions
+.Bl -ohang
+.It Fn mandoc_escape
+Scan an escape sequence, i.e., a character string beginning with
+.Sq \e .
+Pass a pointer to the character after the
+.Sq \e
+as
+.Va end ;
+it will be set to the supremum of the parsed escape sequence unless
+returning
+.Dv ESCAPE_ERROR ,
+in which case the string is bogus and should be
+thrown away.
+If not
+.Dv ESCAPE_ERROR
+or
+.Dv ESCAPE_IGNORE ,
+.Va start
+is set to the first relevant character of the substring (font, glyph,
+whatever) of length
+.Va sz .
+Both
+.Va start
+and
+.Va sz
+may be
+.Dv NULL .
+.It Fn man_meta
+Obtain the meta-data of a successful parse.
+This may only be used on a pointer returned by
+.Fn mparse_result .
+.It Fn man_mparse
+Get the parser used for the current output.
+.It Fn man_node
+Obtain the root node of a successful parse.
+This may only be used on a pointer returned by
+.Fn mparse_result .
+.It Fn mchars_alloc
+Allocate an
+.Vt "struct mchars *"
+object for translating special characters into glyphs.
+See
+.Xr mandoc_char 7
+for an overview of special characters.
+The object must be freed with
+.Fn mchars_free .
+.It Fn mchars_free
+Free an object created with
+.Fn mchars_alloc .
+.It Fn mchars_num2char
+Convert a character index (e.g., the \eN\(aq\(aq escape) into a
+printable ASCII character.
+Returns \e0 (the nil character) if the input sequence is malformed.
+.It Fn mchars_num2uc
+Convert a hexadecimal character index (e.g., the \e[uNNNN] escape) into
+a Unicode codepoint.
+Returns \e0 (the nil character) if the input sequence is malformed.
+.It Fn mchars_spec2cp
+Convert a special character into a valid Unicode codepoint.
+Returns \-1 on failure or a non-zero Unicode codepoint on success.
+.It Fn mchars_spec2str
+Convert a special character into an ASCII string.
+Returns
+.Dv NULL
+on failure.
+.It Fn mdoc_meta
+Obtain the meta-data of a successful parse.
+This may only be used on a pointer returned by
+.Fn mparse_result .
+.It Fn mdoc_node
+Obtain the root node of a successful parse.
+This may only be used on a pointer returned by
+.Fn mparse_result .
+.It Fn mparse_alloc
+Allocate a parser.
+The same parser may be used for multiple files so long as
+.Fn mparse_reset
+is called between parses.
+.Fn mparse_free
+must be called to free the memory allocated by this function.
+.It Fn mparse_free
+Free all memory allocated by
+.Fn mparse_alloc .
+.It Fn mparse_getkeep
+Acquire the keep buffer.
+Must follow a call of
+.Fn mparse_keep .
+.It Fn mparse_keep
+Instruct the parser to retain a copy of its parsed input.
+This can be acquired with subsequent
+.Fn mparse_getkeep
+calls.
+.It Fn mparse_readfd
+Parse a file or file descriptor.
+If
+.Va fd
+is -1,
+.Va fname
+is opened for reading.
+Otherwise,
+.Va fname
+is assumed to be the name associated with
+.Va fd .
+This may be called multiple times with different parameters; however,
+.Fn mparse_reset
+should be invoked between parses.
+.It Fn mparse_reset
+Reset a parser so that
+.Fn mparse_readfd
+may be used again.
+.It Fn mparse_result
+Obtain the result of a parse.
+Only successful parses
+.Po
+i.e., those where
+.Fn mparse_readfd
+returned less than MANDOCLEVEL_FATAL
+.Pc
+should invoke this function, in which case one of the two pointers will
+be filled in.
+.It Fn mparse_strerror
+Return a statically-allocated string representation of an error code.
+.It Fn mparse_strlevel
+Return a statically-allocated string representation of a level code.
+.El
+.Ss Variables
+.Bl -ohang
+.It Va man_macronames
+The string representation of a man macro as indexed by
+.Vt "enum mant" .
+.It Va mdoc_argnames
+The string representation of a mdoc macro argument as indexed by
+.Vt "enum mdocargt" .
+.It Va mdoc_macronames
+The string representation of a mdoc macro as indexed by
+.Vt "enum mdoct" .
+.El
+.Sh IMPLEMENTATION NOTES
+This section consists of structural documentation for
+.Xr mdoc 7
+and
+.Xr man 7
+syntax trees and strings.
+.Ss Man and Mdoc Strings
+Strings may be extracted from mdoc and man meta-data, or from text
+nodes (MDOC_TEXT and MAN_TEXT, respectively).
+These strings have special non-printing formatting cues embedded in the
+text itself, as well as
+.Xr roff 7
+escapes preserved from input.
+Implementing systems will need to handle both situations to produce
+human-readable text.
+In general, strings may be assumed to consist of 7-bit ASCII characters.
+.Pp
+The following non-printing characters may be embedded in text strings:
+.Bl -tag -width Ds
+.It Dv ASCII_NBRSP
+A non-breaking space character.
+.It Dv ASCII_HYPH
+A soft hyphen.
+.El
+.Pp
+Escape characters are also passed verbatim into text strings.
+An escape character is a sequence of characters beginning with the
+backslash
+.Pq Sq \e .
+To construct human-readable text, these should be intercepted with
+.Fn mandoc_escape
+and converted with one of
+.Fn mchars_num2char ,
+.Fn mchars_spec2str ,
+and so on.
+.Ss Man Abstract Syntax Tree
+This AST is governed by the ontological rules dictated in
+.Xr man 7
+and derives its terminology accordingly.
+.Pp
+The AST is composed of
+.Vt struct man_node
+nodes with element, root and text types as declared by the
+.Va type
+field.
+Each node also provides its parse point (the
+.Va line ,
+.Va sec ,
+and
+.Va pos
+fields), its position in the tree (the
+.Va parent ,
+.Va child ,
+.Va next
+and
+.Va prev
+fields) and some type-specific data.
+.Pp
+The tree itself is arranged according to the following normal form,
+where capitalised non-terminals represent nodes.
+.Pp
+.Bl -tag -width "ELEMENTXX" -compact
+.It ROOT
+\(<- mnode+
+.It mnode
+\(<- ELEMENT | TEXT | BLOCK
+.It BLOCK
+\(<- HEAD BODY
+.It HEAD
+\(<- mnode*
+.It BODY
+\(<- mnode*
+.It ELEMENT
+\(<- ELEMENT | TEXT*
+.It TEXT
+\(<- [[:ascii:]]*
+.El
+.Pp
+The only elements capable of nesting other elements are those with
+next-lint scope as documented in
+.Xr man 7 .
+.Ss Mdoc Abstract Syntax Tree
+This AST is governed by the ontological
+rules dictated in
+.Xr mdoc 7
+and derives its terminology accordingly.
+.Qq In-line
+elements described in
+.Xr mdoc 7
+are described simply as
+.Qq elements .
+.Pp
+The AST is composed of
+.Vt struct mdoc_node
+nodes with block, head, body, element, root and text types as declared
+by the
+.Va type
+field.
+Each node also provides its parse point (the
+.Va line ,
+.Va sec ,
+and
+.Va pos
+fields), its position in the tree (the
+.Va parent ,
+.Va child ,
+.Va nchild ,
+.Va next
+and
+.Va prev
+fields) and some type-specific data, in particular, for nodes generated
+from macros, the generating macro in the
+.Va tok
+field.
+.Pp
+The tree itself is arranged according to the following normal form,
+where capitalised non-terminals represent nodes.
+.Pp
+.Bl -tag -width "ELEMENTXX" -compact
+.It ROOT
+\(<- mnode+
+.It mnode
+\(<- BLOCK | ELEMENT | TEXT
+.It BLOCK
+\(<- HEAD [TEXT] (BODY [TEXT])+ [TAIL [TEXT]]
+.It ELEMENT
+\(<- TEXT*
+.It HEAD
+\(<- mnode*
+.It BODY
+\(<- mnode* [ENDBODY mnode*]
+.It TAIL
+\(<- mnode*
+.It TEXT
+\(<- [[:ascii:]]*
+.El
+.Pp
+Of note are the TEXT nodes following the HEAD, BODY and TAIL nodes of
+the BLOCK production: these refer to punctuation marks.
+Furthermore, although a TEXT node will generally have a non-zero-length
+string, in the specific case of
+.Sq \&.Bd \-literal ,
+an empty line will produce a zero-length string.
+Multiple body parts are only found in invocations of
+.Sq \&Bl \-column ,
+where a new body introduces a new phrase.
+.Pp
+The
+.Xr mdoc 7
+syntax tree accommodates for broken block structures as well.
+The ENDBODY node is available to end the formatting associated
+with a given block before the physical end of that block.
+It has a non-null
+.Va end
+field, is of the BODY
+.Va type ,
+has the same
+.Va tok
+as the BLOCK it is ending, and has a
+.Va pending
+field pointing to that BLOCK's BODY node.
+It is an indirect child of that BODY node
+and has no children of its own.
+.Pp
+An ENDBODY node is generated when a block ends while one of its child
+blocks is still open, like in the following example:
+.Bd -literal -offset indent
+\&.Ao ao
+\&.Bo bo ac
+\&.Ac bc
+\&.Bc end
+.Ed
+.Pp
+This example results in the following block structure:
+.Bd -literal -offset indent
+BLOCK Ao
+ HEAD Ao
+ BODY Ao
+ TEXT ao
+ BLOCK Bo, pending -> Ao
+ HEAD Bo
+ BODY Bo
+ TEXT bo
+ TEXT ac
+ ENDBODY Ao, pending -> Ao
+ TEXT bc
+TEXT end
+.Ed
+.Pp
+Here, the formatting of the
+.Sq \&Ao
+block extends from TEXT ao to TEXT ac,
+while the formatting of the
+.Sq \&Bo
+block extends from TEXT bo to TEXT bc.
+It renders as follows in
+.Fl T Ns Cm ascii
+mode:
+.Pp
+.Dl <ao [bo ac> bc] end
+.Pp
+Support for badly-nested blocks is only provided for backward
+compatibility with some older
+.Xr mdoc 7
+implementations.
+Using badly-nested blocks is
+.Em strongly discouraged ;
+for example, the
+.Fl T Ns Cm html
+and
+.Fl T Ns Cm xhtml
+front-ends to
+.Xr mandoc 1
+are unable to render them in any meaningful way.
+Furthermore, behaviour when encountering badly-nested blocks is not
+consistent across troff implementations, especially when using multiple
+levels of badly-nested blocks.
+.Sh SEE ALSO
+.Xr mandoc 1 ,
+.Xr eqn 7 ,
+.Xr man 7 ,
+.Xr mandoc_char 7 ,
+.Xr mdoc 7 ,
+.Xr roff 7 ,
+.Xr tbl 7
+.Sh AUTHORS
+The
+.Nm
+library was written by
+.An Kristaps Dzonsons ,
+.Mt kristaps@bsd.lv .
diff --git a/mandoc.c b/mandoc.c
new file mode 100644
index 000000000000..604bb67e6ae4
--- /dev/null
+++ b/mandoc.c
@@ -0,0 +1,735 @@
+/* $Id: mandoc.c,v 1.62 2011/12/03 16:08:51 schwarze Exp $ */
+/*
+ * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/types.h>
+
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+
+#include "mandoc.h"
+#include "libmandoc.h"
+
+#define DATESIZE 32
+
+static int a2time(time_t *, const char *, const char *);
+static char *time2a(time_t);
+static int numescape(const char *);
+
+/*
+ * Pass over recursive numerical expressions. This context of this
+ * function is important: it's only called within character-terminating
+ * escapes (e.g., \s[xxxyyy]), so all we need to do is handle initial
+ * recursion: we don't care about what's in these blocks.
+ * This returns the number of characters skipped or -1 if an error
+ * occurs (the caller should bail).
+ */
+static int
+numescape(const char *start)
+{
+ int i;
+ size_t sz;
+ const char *cp;
+
+ i = 0;
+
+ /* The expression consists of a subexpression. */
+
+ if ('\\' == start[i]) {
+ cp = &start[++i];
+ /*
+ * Read past the end of the subexpression.
+ * Bail immediately on errors.
+ */
+ if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL))
+ return(-1);
+ return(i + cp - &start[i]);
+ }
+
+ if ('(' != start[i++])
+ return(0);
+
+ /*
+ * A parenthesised subexpression. Read until the closing
+ * parenthesis, making sure to handle any nested subexpressions
+ * that might ruin our parse.
+ */
+
+ while (')' != start[i]) {
+ sz = strcspn(&start[i], ")\\");
+ i += (int)sz;
+
+ if ('\0' == start[i])
+ return(-1);
+ else if ('\\' != start[i])
+ continue;
+
+ cp = &start[++i];
+ if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL))
+ return(-1);
+ i += cp - &start[i];
+ }
+
+ /* Read past the terminating ')'. */
+ return(++i);
+}
+
+enum mandoc_esc
+mandoc_escape(const char **end, const char **start, int *sz)
+{
+ char c, term, numeric;
+ int i, lim, ssz, rlim;
+ const char *cp, *rstart;
+ enum mandoc_esc gly;
+
+ cp = *end;
+ rstart = cp;
+ if (start)
+ *start = rstart;
+ i = lim = 0;
+ gly = ESCAPE_ERROR;
+ term = numeric = '\0';
+
+ switch ((c = cp[i++])) {
+ /*
+ * First the glyphs. There are several different forms of
+ * these, but each eventually returns a substring of the glyph
+ * name.
+ */
+ case ('('):
+ gly = ESCAPE_SPECIAL;
+ lim = 2;
+ break;
+ case ('['):
+ gly = ESCAPE_SPECIAL;
+ /*
+ * Unicode escapes are defined in groff as \[uXXXX] to
+ * \[u10FFFF], where the contained value must be a valid
+ * Unicode codepoint. Here, however, only check whether
+ * it's not a zero-width escape.
+ */
+ if ('u' == cp[i] && ']' != cp[i + 1])
+ gly = ESCAPE_UNICODE;
+ term = ']';
+ break;
+ case ('C'):
+ if ('\'' != cp[i])
+ return(ESCAPE_ERROR);
+ gly = ESCAPE_SPECIAL;
+ term = '\'';
+ break;
+
+ /*
+ * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where
+ * 'X' is the trigger. These have opaque sub-strings.
+ */
+ case ('F'):
+ /* FALLTHROUGH */
+ case ('g'):
+ /* FALLTHROUGH */
+ case ('k'):
+ /* FALLTHROUGH */
+ case ('M'):
+ /* FALLTHROUGH */
+ case ('m'):
+ /* FALLTHROUGH */
+ case ('n'):
+ /* FALLTHROUGH */
+ case ('V'):
+ /* FALLTHROUGH */
+ case ('Y'):
+ gly = ESCAPE_IGNORE;
+ /* FALLTHROUGH */
+ case ('f'):
+ if (ESCAPE_ERROR == gly)
+ gly = ESCAPE_FONT;
+
+ rstart= &cp[i];
+ if (start)
+ *start = rstart;
+
+ switch (cp[i++]) {
+ case ('('):
+ lim = 2;
+ break;
+ case ('['):
+ term = ']';
+ break;
+ default:
+ lim = 1;
+ i--;
+ break;
+ }
+ break;
+
+ /*
+ * These escapes are of the form \X'Y', where 'X' is the trigger
+ * and 'Y' is any string. These have opaque sub-strings.
+ */
+ case ('A'):
+ /* FALLTHROUGH */
+ case ('b'):
+ /* FALLTHROUGH */
+ case ('D'):
+ /* FALLTHROUGH */
+ case ('o'):
+ /* FALLTHROUGH */
+ case ('R'):
+ /* FALLTHROUGH */
+ case ('X'):
+ /* FALLTHROUGH */
+ case ('Z'):
+ if ('\'' != cp[i++])
+ return(ESCAPE_ERROR);
+ gly = ESCAPE_IGNORE;
+ term = '\'';
+ break;
+
+ /*
+ * These escapes are of the form \X'N', where 'X' is the trigger
+ * and 'N' resolves to a numerical expression.
+ */
+ case ('B'):
+ /* FALLTHROUGH */
+ case ('h'):
+ /* FALLTHROUGH */
+ case ('H'):
+ /* FALLTHROUGH */
+ case ('L'):
+ /* FALLTHROUGH */
+ case ('l'):
+ gly = ESCAPE_NUMBERED;
+ /* FALLTHROUGH */
+ case ('S'):
+ /* FALLTHROUGH */
+ case ('v'):
+ /* FALLTHROUGH */
+ case ('w'):
+ /* FALLTHROUGH */
+ case ('x'):
+ if (ESCAPE_ERROR == gly)
+ gly = ESCAPE_IGNORE;
+ if ('\'' != cp[i++])
+ return(ESCAPE_ERROR);
+ term = numeric = '\'';
+ break;
+
+ /*
+ * Special handling for the numbered character escape.
+ * XXX Do any other escapes need similar handling?
+ */
+ case ('N'):
+ if ('\0' == cp[i])
+ return(ESCAPE_ERROR);
+ *end = &cp[++i];
+ if (isdigit((unsigned char)cp[i-1]))
+ return(ESCAPE_IGNORE);
+ while (isdigit((unsigned char)**end))
+ (*end)++;
+ if (start)
+ *start = &cp[i];
+ if (sz)
+ *sz = *end - &cp[i];
+ if ('\0' != **end)
+ (*end)++;
+ return(ESCAPE_NUMBERED);
+
+ /*
+ * Sizes get a special category of their own.
+ */
+ case ('s'):
+ gly = ESCAPE_IGNORE;
+
+ rstart = &cp[i];
+ if (start)
+ *start = rstart;
+
+ /* See +/- counts as a sign. */
+ c = cp[i];
+ if ('+' == c || '-' == c || ASCII_HYPH == c)
+ ++i;
+
+ switch (cp[i++]) {
+ case ('('):
+ lim = 2;
+ break;
+ case ('['):
+ term = numeric = ']';
+ break;
+ case ('\''):
+ term = numeric = '\'';
+ break;
+ default:
+ lim = 1;
+ i--;
+ break;
+ }
+
+ /* See +/- counts as a sign. */
+ c = cp[i];
+ if ('+' == c || '-' == c || ASCII_HYPH == c)
+ ++i;
+
+ break;
+
+ /*
+ * Anything else is assumed to be a glyph.
+ */
+ default:
+ gly = ESCAPE_SPECIAL;
+ lim = 1;
+ i--;
+ break;
+ }
+
+ assert(ESCAPE_ERROR != gly);
+
+ rstart = &cp[i];
+ if (start)
+ *start = rstart;
+
+ /*
+ * If a terminating block has been specified, we need to
+ * handle the case of recursion, which could have their
+ * own terminating blocks that mess up our parse. This, by the
+ * way, means that the "start" and "size" values will be
+ * effectively meaningless.
+ */
+
+ ssz = 0;
+ if (numeric && -1 == (ssz = numescape(&cp[i])))
+ return(ESCAPE_ERROR);
+
+ i += ssz;
+ rlim = -1;
+
+ /*
+ * We have a character terminator. Try to read up to that
+ * character. If we can't (i.e., we hit the nil), then return
+ * an error; if we can, calculate our length, read past the
+ * terminating character, and exit.
+ */
+
+ if ('\0' != term) {
+ *end = strchr(&cp[i], term);
+ if ('\0' == *end)
+ return(ESCAPE_ERROR);
+
+ rlim = *end - &cp[i];
+ if (sz)
+ *sz = rlim;
+ (*end)++;
+ goto out;
+ }
+
+ assert(lim > 0);
+
+ /*
+ * We have a numeric limit. If the string is shorter than that,
+ * stop and return an error. Else adjust our endpoint, length,
+ * and return the current glyph.
+ */
+
+ if ((size_t)lim > strlen(&cp[i]))
+ return(ESCAPE_ERROR);
+
+ rlim = lim;
+ if (sz)
+ *sz = rlim;
+
+ *end = &cp[i] + lim;
+
+out:
+ assert(rlim >= 0 && rstart);
+
+ /* Run post-processors. */
+
+ switch (gly) {
+ case (ESCAPE_FONT):
+ /*
+ * Pretend that the constant-width font modes are the
+ * same as the regular font modes.
+ */
+ if (2 == rlim && 'C' == *rstart)
+ rstart++;
+ else if (1 != rlim)
+ break;
+
+ switch (*rstart) {
+ case ('3'):
+ /* FALLTHROUGH */
+ case ('B'):
+ gly = ESCAPE_FONTBOLD;
+ break;
+ case ('2'):
+ /* FALLTHROUGH */
+ case ('I'):
+ gly = ESCAPE_FONTITALIC;
+ break;
+ case ('P'):
+ gly = ESCAPE_FONTPREV;
+ break;
+ case ('1'):
+ /* FALLTHROUGH */
+ case ('R'):
+ gly = ESCAPE_FONTROMAN;
+ break;
+ }
+ break;
+ case (ESCAPE_SPECIAL):
+ if (1 != rlim)
+ break;
+ if ('c' == *rstart)
+ gly = ESCAPE_NOSPACE;
+ break;
+ default:
+ break;
+ }
+
+ return(gly);
+}
+
+void *
+mandoc_calloc(size_t num, size_t size)
+{
+ void *ptr;
+
+ ptr = calloc(num, size);
+ if (NULL == ptr) {
+ perror(NULL);
+ exit((int)MANDOCLEVEL_SYSERR);
+ }
+
+ return(ptr);
+}
+
+
+void *
+mandoc_malloc(size_t size)
+{
+ void *ptr;
+
+ ptr = malloc(size);
+ if (NULL == ptr) {
+ perror(NULL);
+ exit((int)MANDOCLEVEL_SYSERR);
+ }
+
+ return(ptr);
+}
+
+
+void *
+mandoc_realloc(void *ptr, size_t size)
+{
+
+ ptr = realloc(ptr, size);
+ if (NULL == ptr) {
+ perror(NULL);
+ exit((int)MANDOCLEVEL_SYSERR);
+ }
+
+ return(ptr);
+}
+
+char *
+mandoc_strndup(const char *ptr, size_t sz)
+{
+ char *p;
+
+ p = mandoc_malloc(sz + 1);
+ memcpy(p, ptr, sz);
+ p[(int)sz] = '\0';
+ return(p);
+}
+
+char *
+mandoc_strdup(const char *ptr)
+{
+ char *p;
+
+ p = strdup(ptr);
+ if (NULL == p) {
+ perror(NULL);
+ exit((int)MANDOCLEVEL_SYSERR);
+ }
+
+ return(p);
+}
+
+/*
+ * Parse a quoted or unquoted roff-style request or macro argument.
+ * Return a pointer to the parsed argument, which is either the original
+ * pointer or advanced by one byte in case the argument is quoted.
+ * Null-terminate the argument in place.
+ * Collapse pairs of quotes inside quoted arguments.
+ * Advance the argument pointer to the next argument,
+ * or to the null byte terminating the argument line.
+ */
+char *
+mandoc_getarg(struct mparse *parse, char **cpp, int ln, int *pos)
+{
+ char *start, *cp;
+ int quoted, pairs, white;
+
+ /* Quoting can only start with a new word. */
+ start = *cpp;
+ quoted = 0;
+ if ('"' == *start) {
+ quoted = 1;
+ start++;
+ }
+
+ pairs = 0;
+ white = 0;
+ for (cp = start; '\0' != *cp; cp++) {
+ /* Move left after quoted quotes and escaped backslashes. */
+ if (pairs)
+ cp[-pairs] = cp[0];
+ if ('\\' == cp[0]) {
+ if ('\\' == cp[1]) {
+ /* Poor man's copy mode. */
+ pairs++;
+ cp++;
+ } else if (0 == quoted && ' ' == cp[1])
+ /* Skip escaped blanks. */
+ cp++;
+ } else if (0 == quoted) {
+ if (' ' == cp[0]) {
+ /* Unescaped blanks end unquoted args. */
+ white = 1;
+ break;
+ }
+ } else if ('"' == cp[0]) {
+ if ('"' == cp[1]) {
+ /* Quoted quotes collapse. */
+ pairs++;
+ cp++;
+ } else {
+ /* Unquoted quotes end quoted args. */
+ quoted = 2;
+ break;
+ }
+ }
+ }
+
+ /* Quoted argument without a closing quote. */
+ if (1 == quoted)
+ mandoc_msg(MANDOCERR_BADQUOTE, parse, ln, *pos, NULL);
+
+ /* Null-terminate this argument and move to the next one. */
+ if (pairs)
+ cp[-pairs] = '\0';
+ if ('\0' != *cp) {
+ *cp++ = '\0';
+ while (' ' == *cp)
+ cp++;
+ }
+ *pos += (int)(cp - start) + (quoted ? 1 : 0);
+ *cpp = cp;
+
+ if ('\0' == *cp && (white || ' ' == cp[-1]))
+ mandoc_msg(MANDOCERR_EOLNSPACE, parse, ln, *pos, NULL);
+
+ return(start);
+}
+
+static int
+a2time(time_t *t, const char *fmt, const char *p)
+{
+ struct tm tm;
+ char *pp;
+
+ memset(&tm, 0, sizeof(struct tm));
+
+ pp = NULL;
+#ifdef HAVE_STRPTIME
+ pp = strptime(p, fmt, &tm);
+#endif
+ if (NULL != pp && '\0' == *pp) {
+ *t = mktime(&tm);
+ return(1);
+ }
+
+ return(0);
+}
+
+static char *
+time2a(time_t t)
+{
+ struct tm *tm;
+ char *buf, *p;
+ size_t ssz;
+ int isz;
+
+ tm = localtime(&t);
+
+ /*
+ * Reserve space:
+ * up to 9 characters for the month (September) + blank
+ * up to 2 characters for the day + comma + blank
+ * 4 characters for the year and a terminating '\0'
+ */
+ p = buf = mandoc_malloc(10 + 4 + 4 + 1);
+
+ if (0 == (ssz = strftime(p, 10 + 1, "%B ", tm)))
+ goto fail;
+ p += (int)ssz;
+
+ if (-1 == (isz = snprintf(p, 4 + 1, "%d, ", tm->tm_mday)))
+ goto fail;
+ p += isz;
+
+ if (0 == strftime(p, 4 + 1, "%Y", tm))
+ goto fail;
+ return(buf);
+
+fail:
+ free(buf);
+ return(NULL);
+}
+
+char *
+mandoc_normdate(struct mparse *parse, char *in, int ln, int pos)
+{
+ char *out;
+ time_t t;
+
+ if (NULL == in || '\0' == *in ||
+ 0 == strcmp(in, "$" "Mdocdate$")) {
+ mandoc_msg(MANDOCERR_NODATE, parse, ln, pos, NULL);
+ time(&t);
+ }
+ else if (a2time(&t, "%Y-%m-%d", in))
+ t = 0;
+ else if (!a2time(&t, "$" "Mdocdate: %b %d %Y $", in) &&
+ !a2time(&t, "%b %d, %Y", in)) {
+ mandoc_msg(MANDOCERR_BADDATE, parse, ln, pos, NULL);
+ t = 0;
+ }
+ out = t ? time2a(t) : NULL;
+ return(out ? out : mandoc_strdup(in));
+}
+
+int
+mandoc_eos(const char *p, size_t sz, int enclosed)
+{
+ const char *q;
+ int found;
+
+ if (0 == sz)
+ return(0);
+
+ /*
+ * End-of-sentence recognition must include situations where
+ * some symbols, such as `)', allow prior EOS punctuation to
+ * propagate outward.
+ */
+
+ found = 0;
+ for (q = p + (int)sz - 1; q >= p; q--) {
+ switch (*q) {
+ case ('\"'):
+ /* FALLTHROUGH */
+ case ('\''):
+ /* FALLTHROUGH */
+ case (']'):
+ /* FALLTHROUGH */
+ case (')'):
+ if (0 == found)
+ enclosed = 1;
+ break;
+ case ('.'):
+ /* FALLTHROUGH */
+ case ('!'):
+ /* FALLTHROUGH */
+ case ('?'):
+ found = 1;
+ break;
+ default:
+ return(found && (!enclosed || isalnum((unsigned char)*q)));
+ }
+ }
+
+ return(found && !enclosed);
+}
+
+/*
+ * Find out whether a line is a macro line or not. If it is, adjust the
+ * current position and return one; if it isn't, return zero and don't
+ * change the current position.
+ */
+int
+mandoc_getcontrol(const char *cp, int *ppos)
+{
+ int pos;
+
+ pos = *ppos;
+
+ if ('\\' == cp[pos] && '.' == cp[pos + 1])
+ pos += 2;
+ else if ('.' == cp[pos] || '\'' == cp[pos])
+ pos++;
+ else
+ return(0);
+
+ while (' ' == cp[pos] || '\t' == cp[pos])
+ pos++;
+
+ *ppos = pos;
+ return(1);
+}
+
+/*
+ * Convert a string to a long that may not be <0.
+ * If the string is invalid, or is less than 0, return -1.
+ */
+int
+mandoc_strntoi(const char *p, size_t sz, int base)
+{
+ char buf[32];
+ char *ep;
+ long v;
+
+ if (sz > 31)
+ return(-1);
+
+ memcpy(buf, p, sz);
+ buf[(int)sz] = '\0';
+
+ errno = 0;
+ v = strtol(buf, &ep, base);
+
+ if (buf[0] == '\0' || *ep != '\0')
+ return(-1);
+
+ if (v > INT_MAX)
+ v = INT_MAX;
+ if (v < INT_MIN)
+ v = INT_MIN;
+
+ return((int)v);
+}
diff --git a/mandoc.h b/mandoc.h
new file mode 100644
index 000000000000..a37effc5f58e
--- /dev/null
+++ b/mandoc.h
@@ -0,0 +1,432 @@
+/* $Id: mandoc.h,v 1.99 2012/02/16 20:51:31 joerg Exp $ */
+/*
+ * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifndef MANDOC_H
+#define MANDOC_H
+
+#define ASCII_NBRSP 31 /* non-breaking space */
+#define ASCII_HYPH 30 /* breakable hyphen */
+
+/*
+ * Status level. This refers to both internal status (i.e., whilst
+ * running, when warnings/errors are reported) and an indicator of a
+ * threshold of when to halt (when said internal state exceeds the
+ * threshold).
+ */
+enum mandoclevel {
+ MANDOCLEVEL_OK = 0,
+ MANDOCLEVEL_RESERVED,
+ MANDOCLEVEL_WARNING, /* warnings: syntax, whitespace, etc. */
+ MANDOCLEVEL_ERROR, /* input has been thrown away */
+ MANDOCLEVEL_FATAL, /* input is borked */
+ MANDOCLEVEL_BADARG, /* bad argument in invocation */
+ MANDOCLEVEL_SYSERR, /* system error */
+ MANDOCLEVEL_MAX
+};
+
+/*
+ * All possible things that can go wrong within a parse, be it libroff,
+ * libmdoc, or libman.
+ */
+enum mandocerr {
+ MANDOCERR_OK,
+
+ MANDOCERR_WARNING, /* ===== start of warnings ===== */
+
+ /* related to the prologue */
+ MANDOCERR_NOTITLE, /* no title in document */
+ MANDOCERR_UPPERCASE, /* document title should be all caps */
+ MANDOCERR_BADMSEC, /* unknown manual section */
+ MANDOCERR_NODATE, /* date missing, using today's date */
+ MANDOCERR_BADDATE, /* cannot parse date, using it verbatim */
+ MANDOCERR_PROLOGOOO, /* prologue macros out of order */
+ MANDOCERR_PROLOGREP, /* duplicate prologue macro */
+ MANDOCERR_BADPROLOG, /* macro not allowed in prologue */
+ MANDOCERR_BADBODY, /* macro not allowed in body */
+
+ /* related to document structure */
+ MANDOCERR_SO, /* .so is fragile, better use ln(1) */
+ MANDOCERR_NAMESECFIRST, /* NAME section must come first */
+ MANDOCERR_BADNAMESEC, /* bad NAME section contents */
+ MANDOCERR_NONAME, /* manual name not yet set */
+ MANDOCERR_SECOOO, /* sections out of conventional order */
+ MANDOCERR_SECREP, /* duplicate section name */
+ MANDOCERR_SECMSEC, /* section not in conventional manual section */
+
+ /* related to macros and nesting */
+ MANDOCERR_MACROOBS, /* skipping obsolete macro */
+ MANDOCERR_IGNPAR, /* skipping paragraph macro */
+ MANDOCERR_IGNNS, /* skipping no-space macro */
+ MANDOCERR_SCOPENEST, /* blocks badly nested */
+ MANDOCERR_CHILD, /* child violates parent syntax */
+ MANDOCERR_NESTEDDISP, /* nested displays are not portable */
+ MANDOCERR_SCOPEREP, /* already in literal mode */
+ MANDOCERR_LINESCOPE, /* line scope broken */
+
+ /* related to missing macro arguments */
+ MANDOCERR_MACROEMPTY, /* skipping empty macro */
+ MANDOCERR_ARGCWARN, /* argument count wrong */
+ MANDOCERR_DISPTYPE, /* missing display type */
+ MANDOCERR_LISTFIRST, /* list type must come first */
+ MANDOCERR_NOWIDTHARG, /* tag lists require a width argument */
+ MANDOCERR_FONTTYPE, /* missing font type */
+ MANDOCERR_WNOSCOPE, /* skipping end of block that is not open */
+
+ /* related to bad macro arguments */
+ MANDOCERR_IGNARGV, /* skipping argument */
+ MANDOCERR_ARGVREP, /* duplicate argument */
+ MANDOCERR_DISPREP, /* duplicate display type */
+ MANDOCERR_LISTREP, /* duplicate list type */
+ MANDOCERR_BADATT, /* unknown AT&T UNIX version */
+ MANDOCERR_BADBOOL, /* bad Boolean value */
+ MANDOCERR_BADFONT, /* unknown font */
+ MANDOCERR_BADSTANDARD, /* unknown standard specifier */
+ MANDOCERR_BADWIDTH, /* bad width argument */
+
+ /* related to plain text */
+ MANDOCERR_NOBLANKLN, /* blank line in non-literal context */
+ MANDOCERR_BADTAB, /* tab in non-literal context */
+ MANDOCERR_EOLNSPACE, /* end of line whitespace */
+ MANDOCERR_BADCOMMENT, /* bad comment style */
+ MANDOCERR_BADESCAPE, /* unknown escape sequence */
+ MANDOCERR_BADQUOTE, /* unterminated quoted string */
+
+ /* related to equations */
+ MANDOCERR_EQNQUOTE, /* unexpected literal in equation */
+
+ MANDOCERR_ERROR, /* ===== start of errors ===== */
+
+ /* related to equations */
+ MANDOCERR_EQNNSCOPE, /* unexpected equation scope closure*/
+ MANDOCERR_EQNSCOPE, /* equation scope open on exit */
+ MANDOCERR_EQNBADSCOPE, /* overlapping equation scopes */
+ MANDOCERR_EQNEOF, /* unexpected end of equation */
+ MANDOCERR_EQNSYNT, /* equation syntax error */
+
+ /* related to tables */
+ MANDOCERR_TBL, /* bad table syntax */
+ MANDOCERR_TBLOPT, /* bad table option */
+ MANDOCERR_TBLLAYOUT, /* bad table layout */
+ MANDOCERR_TBLNOLAYOUT, /* no table layout cells specified */
+ MANDOCERR_TBLNODATA, /* no table data cells specified */
+ MANDOCERR_TBLIGNDATA, /* ignore data in cell */
+ MANDOCERR_TBLBLOCK, /* data block still open */
+ MANDOCERR_TBLEXTRADAT, /* ignoring extra data cells */
+
+ MANDOCERR_ROFFLOOP, /* input stack limit exceeded, infinite loop? */
+ MANDOCERR_BADCHAR, /* skipping bad character */
+ MANDOCERR_NAMESC, /* escaped character not allowed in a name */
+ MANDOCERR_NOTEXT, /* skipping text before the first section header */
+ MANDOCERR_MACRO, /* skipping unknown macro */
+ MANDOCERR_REQUEST, /* NOT IMPLEMENTED: skipping request */
+ MANDOCERR_ARGCOUNT, /* argument count wrong */
+ MANDOCERR_NOSCOPE, /* skipping end of block that is not open */
+ MANDOCERR_SCOPEBROKEN, /* missing end of block */
+ MANDOCERR_SCOPEEXIT, /* scope open on exit */
+ MANDOCERR_UNAME, /* uname(3) system call failed */
+ /* FIXME: merge following with MANDOCERR_ARGCOUNT */
+ MANDOCERR_NOARGS, /* macro requires line argument(s) */
+ MANDOCERR_NOBODY, /* macro requires body argument(s) */
+ MANDOCERR_NOARGV, /* macro requires argument(s) */
+ MANDOCERR_LISTTYPE, /* missing list type */
+ MANDOCERR_ARGSLOST, /* line argument(s) will be lost */
+ MANDOCERR_BODYLOST, /* body argument(s) will be lost */
+
+ MANDOCERR_FATAL, /* ===== start of fatal errors ===== */
+
+ MANDOCERR_NOTMANUAL, /* manual isn't really a manual */
+ MANDOCERR_COLUMNS, /* column syntax is inconsistent */
+ MANDOCERR_BADDISP, /* NOT IMPLEMENTED: .Bd -file */
+ MANDOCERR_SYNTARGVCOUNT, /* argument count wrong, violates syntax */
+ MANDOCERR_SYNTCHILD, /* child violates parent syntax */
+ MANDOCERR_SYNTARGCOUNT, /* argument count wrong, violates syntax */
+ MANDOCERR_SOPATH, /* NOT IMPLEMENTED: .so with absolute path or ".." */
+ MANDOCERR_NODOCBODY, /* no document body */
+ MANDOCERR_NODOCPROLOG, /* no document prologue */
+ MANDOCERR_MEM, /* static buffer exhausted */
+ MANDOCERR_MAX
+};
+
+struct tbl {
+ char tab; /* cell-separator */
+ char decimal; /* decimal point */
+ int linesize;
+ int opts;
+#define TBL_OPT_CENTRE (1 << 0)
+#define TBL_OPT_EXPAND (1 << 1)
+#define TBL_OPT_BOX (1 << 2)
+#define TBL_OPT_DBOX (1 << 3)
+#define TBL_OPT_ALLBOX (1 << 4)
+#define TBL_OPT_NOKEEP (1 << 5)
+#define TBL_OPT_NOSPACE (1 << 6)
+ int cols; /* number of columns */
+};
+
+enum tbl_headt {
+ TBL_HEAD_DATA, /* plug in data from tbl_dat */
+ TBL_HEAD_VERT, /* vertical spacer */
+ TBL_HEAD_DVERT /* double-vertical spacer */
+};
+
+/*
+ * The head of a table specifies all of its columns. When formatting a
+ * tbl_span, iterate over these and plug in data from the tbl_span when
+ * appropriate, using tbl_cell as a guide to placement.
+ */
+struct tbl_head {
+ enum tbl_headt pos;
+ int ident; /* 0 <= unique id < cols */
+ struct tbl_head *next;
+ struct tbl_head *prev;
+};
+
+enum tbl_cellt {
+ TBL_CELL_CENTRE, /* c, C */
+ TBL_CELL_RIGHT, /* r, R */
+ TBL_CELL_LEFT, /* l, L */
+ TBL_CELL_NUMBER, /* n, N */
+ TBL_CELL_SPAN, /* s, S */
+ TBL_CELL_LONG, /* a, A */
+ TBL_CELL_DOWN, /* ^ */
+ TBL_CELL_HORIZ, /* _, - */
+ TBL_CELL_DHORIZ, /* = */
+ TBL_CELL_VERT, /* | */
+ TBL_CELL_DVERT, /* || */
+ TBL_CELL_MAX
+};
+
+/*
+ * A cell in a layout row.
+ */
+struct tbl_cell {
+ struct tbl_cell *next;
+ enum tbl_cellt pos;
+ size_t spacing;
+ int flags;
+#define TBL_CELL_TALIGN (1 << 0) /* t, T */
+#define TBL_CELL_BALIGN (1 << 1) /* d, D */
+#define TBL_CELL_BOLD (1 << 2) /* fB, B, b */
+#define TBL_CELL_ITALIC (1 << 3) /* fI, I, i */
+#define TBL_CELL_EQUAL (1 << 4) /* e, E */
+#define TBL_CELL_UP (1 << 5) /* u, U */
+#define TBL_CELL_WIGN (1 << 6) /* z, Z */
+ struct tbl_head *head;
+};
+
+/*
+ * A layout row.
+ */
+struct tbl_row {
+ struct tbl_row *next;
+ struct tbl_cell *first;
+ struct tbl_cell *last;
+};
+
+enum tbl_datt {
+ TBL_DATA_NONE, /* has no data */
+ TBL_DATA_DATA, /* consists of data/string */
+ TBL_DATA_HORIZ, /* horizontal line */
+ TBL_DATA_DHORIZ, /* double-horizontal line */
+ TBL_DATA_NHORIZ, /* squeezed horizontal line */
+ TBL_DATA_NDHORIZ /* squeezed double-horizontal line */
+};
+
+/*
+ * A cell within a row of data. The "string" field contains the actual
+ * string value that's in the cell. The rest is layout.
+ */
+struct tbl_dat {
+ struct tbl_cell *layout; /* layout cell */
+ int spans; /* how many spans follow */
+ struct tbl_dat *next;
+ char *string; /* data (NULL if not TBL_DATA_DATA) */
+ enum tbl_datt pos;
+};
+
+enum tbl_spant {
+ TBL_SPAN_DATA, /* span consists of data */
+ TBL_SPAN_HORIZ, /* span is horizontal line */
+ TBL_SPAN_DHORIZ /* span is double horizontal line */
+};
+
+/*
+ * A row of data in a table.
+ */
+struct tbl_span {
+ struct tbl *tbl;
+ struct tbl_head *head;
+ struct tbl_row *layout; /* layout row */
+ struct tbl_dat *first;
+ struct tbl_dat *last;
+ int line; /* parse line */
+ int flags;
+#define TBL_SPAN_FIRST (1 << 0)
+#define TBL_SPAN_LAST (1 << 1)
+ enum tbl_spant pos;
+ struct tbl_span *next;
+};
+
+enum eqn_boxt {
+ EQN_ROOT, /* root of parse tree */
+ EQN_TEXT, /* text (number, variable, whatever) */
+ EQN_SUBEXPR, /* nested `eqn' subexpression */
+ EQN_LIST, /* subexpressions list */
+ EQN_MATRIX /* matrix subexpression */
+};
+
+enum eqn_markt {
+ EQNMARK_NONE = 0,
+ EQNMARK_DOT,
+ EQNMARK_DOTDOT,
+ EQNMARK_HAT,
+ EQNMARK_TILDE,
+ EQNMARK_VEC,
+ EQNMARK_DYAD,
+ EQNMARK_BAR,
+ EQNMARK_UNDER,
+ EQNMARK__MAX
+};
+
+enum eqn_fontt {
+ EQNFONT_NONE = 0,
+ EQNFONT_ROMAN,
+ EQNFONT_BOLD,
+ EQNFONT_FAT,
+ EQNFONT_ITALIC,
+ EQNFONT__MAX
+};
+
+enum eqn_post {
+ EQNPOS_NONE = 0,
+ EQNPOS_OVER,
+ EQNPOS_SUP,
+ EQNPOS_SUB,
+ EQNPOS_TO,
+ EQNPOS_FROM,
+ EQNPOS__MAX
+};
+
+enum eqn_pilet {
+ EQNPILE_NONE = 0,
+ EQNPILE_PILE,
+ EQNPILE_CPILE,
+ EQNPILE_RPILE,
+ EQNPILE_LPILE,
+ EQNPILE_COL,
+ EQNPILE_CCOL,
+ EQNPILE_RCOL,
+ EQNPILE_LCOL,
+ EQNPILE__MAX
+};
+
+ /*
+ * A "box" is a parsed mathematical expression as defined by the eqn.7
+ * grammar.
+ */
+struct eqn_box {
+ int size; /* font size of expression */
+#define EQN_DEFSIZE INT_MIN
+ enum eqn_boxt type; /* type of node */
+ struct eqn_box *first; /* first child node */
+ struct eqn_box *last; /* last child node */
+ struct eqn_box *next; /* node sibling */
+ struct eqn_box *parent; /* node sibling */
+ char *text; /* text (or NULL) */
+ char *left;
+ char *right;
+ enum eqn_post pos; /* position of next box */
+ enum eqn_markt mark; /* a mark about the box */
+ enum eqn_fontt font; /* font of box */
+ enum eqn_pilet pile; /* equation piling */
+};
+
+/*
+ * An equation consists of a tree of expressions starting at a given
+ * line and position.
+ */
+struct eqn {
+ char *name; /* identifier (or NULL) */
+ struct eqn_box *root; /* root mathematical expression */
+ int ln; /* invocation line */
+ int pos; /* invocation position */
+};
+
+/*
+ * The type of parse sequence. This value is usually passed via the
+ * mandoc(1) command line of -man and -mdoc. It's almost exclusively
+ * -mandoc but the others have been retained for compatibility.
+ */
+enum mparset {
+ MPARSE_AUTO, /* magically determine the document type */
+ MPARSE_MDOC, /* assume -mdoc */
+ MPARSE_MAN /* assume -man */
+};
+
+enum mandoc_esc {
+ ESCAPE_ERROR = 0, /* bail! unparsable escape */
+ ESCAPE_IGNORE, /* escape to be ignored */
+ ESCAPE_SPECIAL, /* a regular special character */
+ ESCAPE_FONT, /* a generic font mode */
+ ESCAPE_FONTBOLD, /* bold font mode */
+ ESCAPE_FONTITALIC, /* italic font mode */
+ ESCAPE_FONTROMAN, /* roman font mode */
+ ESCAPE_FONTPREV, /* previous font mode */
+ ESCAPE_NUMBERED, /* a numbered glyph */
+ ESCAPE_UNICODE, /* a unicode codepoint */
+ ESCAPE_NOSPACE /* suppress space if the last on a line */
+};
+
+typedef void (*mandocmsg)(enum mandocerr, enum mandoclevel,
+ const char *, int, int, const char *);
+
+struct mparse;
+struct mchars;
+struct mdoc;
+struct man;
+
+__BEGIN_DECLS
+
+void *mandoc_calloc(size_t, size_t);
+enum mandoc_esc mandoc_escape(const char **, const char **, int *);
+void *mandoc_malloc(size_t);
+void *mandoc_realloc(void *, size_t);
+char *mandoc_strdup(const char *);
+char *mandoc_strndup(const char *, size_t);
+struct mchars *mchars_alloc(void);
+void mchars_free(struct mchars *);
+char mchars_num2char(const char *, size_t);
+int mchars_num2uc(const char *, size_t);
+int mchars_spec2cp(const struct mchars *,
+ const char *, size_t);
+const char *mchars_spec2str(const struct mchars *,
+ const char *, size_t, size_t *);
+struct mparse *mparse_alloc(enum mparset,
+ enum mandoclevel, mandocmsg, void *);
+void mparse_free(struct mparse *);
+void mparse_keep(struct mparse *);
+enum mandoclevel mparse_readfd(struct mparse *, int, const char *);
+enum mandoclevel mparse_readmem(struct mparse *, const void *, size_t,
+ const char *);
+void mparse_reset(struct mparse *);
+void mparse_result(struct mparse *,
+ struct mdoc **, struct man **);
+const char *mparse_getkeep(const struct mparse *);
+const char *mparse_strerror(enum mandocerr);
+const char *mparse_strlevel(enum mandoclevel);
+
+__END_DECLS
+
+#endif /*!MANDOC_H*/
diff --git a/mandoc_char.7 b/mandoc_char.7
new file mode 100644
index 000000000000..acc1b6100d4f
--- /dev/null
+++ b/mandoc_char.7
@@ -0,0 +1,743 @@
+.\" $Id: mandoc_char.7,v 1.51 2011/11/23 10:09:30 kristaps Exp $
+.\"
+.\" Copyright (c) 2003 Jason McIntyre <jmc@openbsd.org>
+.\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+.\" Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
+.\"
+.\" Permission to use, copy, modify, and distribute this software for any
+.\" purpose with or without fee is hereby granted, provided that the above
+.\" copyright notice and this permission notice appear in all copies.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+.\"
+.Dd $Mdocdate: November 23 2011 $
+.Dt MANDOC_CHAR 7
+.Os
+.Sh NAME
+.Nm mandoc_char
+.Nd mandoc special characters
+.Sh DESCRIPTION
+This page documents the
+.Xr roff 7
+escape sequences accepted by
+.Xr mandoc 1
+to represent special characters in
+.Xr mdoc 7
+and
+.Xr man 7
+documents.
+.Pp
+The rendering depends on the
+.Xr mandoc 1
+output mode; in ASCII output, most characters are completely
+unintelligible.
+For that reason, using any of the special characters documented here,
+except those discussed in the
+.Sx DESCRIPTION ,
+is strongly discouraged; they are supported merely for backwards
+compatibility with existing documents.
+.Pp
+In particular, in English manual pages, do not use special-character
+escape sequences to represent national language characters in author
+names; instead, provide ASCII transcriptions of the names.
+.Ss Dashes and Hyphens
+In typography there are different types of dashes of various width:
+the hyphen (-),
+the minus sign (\-),
+the en-dash (\(en),
+and the em-dash (\(em).
+.Pp
+Hyphens are used for adjectives;
+to separate the two parts of a compound word;
+or to separate a word across two successive lines of text.
+The hyphen does not need to be escaped:
+.Bd -unfilled -offset indent
+blue-eyed
+lorry-driver
+.Ed
+.Pp
+The mathematical minus sign is used for negative numbers or subtraction.
+It should be written as
+.Sq \e- :
+.Bd -unfilled -offset indent
+a = 3 \e- 1;
+b = \e-2;
+.Ed
+.Pp
+The en-dash is used to separate the two elements of a range,
+or can be used the same way as an em-dash.
+It should be written as
+.Sq \e(en :
+.Bd -unfilled -offset indent
+pp. 95\e(en97.
+Go away \e(en or else!
+.Ed
+.Pp
+The em-dash can be used to show an interruption
+or can be used the same way as colons, semi-colons, or parentheses.
+It should be written as
+.Sq \e(em :
+.Bd -unfilled -offset indent
+Three things \e(em apples, oranges, and bananas.
+This is not that \e(em rather, this is that.
+.Ed
+.Pp
+Note:
+hyphens, minus signs, and en-dashes look identical under normal ASCII output.
+Other formats, such as PostScript, render them correctly,
+with differing widths.
+.Ss Spaces
+To separate words in normal text, for indenting and alignment
+in literal context, and when none of the following special cases apply,
+just use the normal space character
+.Pq Sq \ .
+.Pp
+When filling text, lines may be broken between words, i.e. at space
+characters.
+To prevent a line break between two particular words,
+use the non-breaking space escape sequence
+.Pq Sq \e~
+instead of the normal space character.
+For example, the input string
+.Dq number\e~1
+will be kept together as
+.Dq number\~1
+on the same output line.
+.Pp
+On request and macro lines, the normal space character serves as an
+argument delimiter.
+To include whitespace into arguments, quoting is usually the best choice.
+In some cases, using either the non-breaking
+.Pq Sq \e~
+or the breaking
+.Pq Sq \e\ \&
+space escape sequence may be preferable.
+To escape macro names and to protect whitespace at the end
+of input lines, the zero-width space
+.Pq Sq \e&
+is often useful.
+For example, in
+.Xr mdoc 7 ,
+a normal space character can be displayed in single quotes in either
+of the following ways:
+.Pp
+.Dl .Sq \(dq \(dq
+.Dl .Sq \e \e&
+.Ss Quotes
+On request and macro lines, the double-quote character
+.Pq Sq \(dq
+is handled specially to allow quoting.
+One way to prevent this special handling is by using the
+.Sq \e(dq
+escape sequence.
+.Pp
+Note that on text lines, literal double-quote characters can be used
+verbatim.
+All other quote-like characters can be used verbatim as well,
+even on request and macro lines.
+.Ss Periods
+The period
+.Pq Sq \&.
+is handled specially at the beginning of an input line,
+where it introduces a
+.Xr roff 7
+request or a macro, and when appearing alone as a macro argument in
+.Xr mdoc 7 .
+In such situations, prepend a zero-width space
+.Pq Sq \e&.
+to make it behave like normal text.
+.Pp
+Do not use the
+.Sq \e.
+escape sequence.
+It does not prevent special handling of the period.
+.Ss Backslashes
+To include a literal backslash
+.Pq Sq \e
+into the output, use the
+.Pq Sq \ee
+escape sequence.
+.Pp
+Note that doubling it
+.Pq Sq \e\e
+is not the right way to output a backslash.
+Because
+.Xr mandoc 1
+does not implement full
+.Xr roff 7
+functionality, it may work with
+.Xr mandoc 1 ,
+but it may have weird effects on complete
+.Xr roff 7
+implementations.
+.Sh SPECIAL CHARACTERS
+Special characters are encoded as
+.Sq \eX
+.Pq for a one-character escape ,
+.Sq \e(XX
+.Pq two-character ,
+and
+.Sq \e[N]
+.Pq N-character .
+For details, see the
+.Em Special Characters
+subsection of the
+.Xr roff 7
+manual.
+.Pp
+Spacing:
+.Bl -column "Input" "Description" -offset indent -compact
+.It Em Input Ta Em Description
+.It \e~ Ta non-breaking, non-collapsing space
+.It \e Ta breaking, non-collapsing n-width space
+.It \e^ Ta zero-width space
+.It \e% Ta zero-width space
+.It \e& Ta zero-width space
+.It \e| Ta zero-width space
+.It \e0 Ta breaking, non-collapsing digit-width space
+.It \ec Ta removes any trailing space (if applicable)
+.El
+.Pp
+Lines:
+.Bl -column "Input" "Rendered" "Description" -offset indent -compact
+.It Em Input Ta Em Rendered Ta Em Description
+.It \e(ba Ta \(ba Ta bar
+.It \e(br Ta \(br Ta box rule
+.It \e(ul Ta \(ul Ta underscore
+.It \e(rl Ta \(rl Ta overline
+.It \e(bb Ta \(bb Ta broken bar
+.It \e(sl Ta \(sl Ta forward slash
+.It \e(rs Ta \(rs Ta backward slash
+.El
+.Pp
+Text markers:
+.Bl -column "Input" "Rendered" "Description" -offset indent -compact
+.It Em Input Ta Em Rendered Ta Em Description
+.It \e(ci Ta \(ci Ta circle
+.It \e(bu Ta \(bu Ta bullet
+.It \e(dd Ta \(dd Ta double dagger
+.It \e(dg Ta \(dg Ta dagger
+.It \e(lz Ta \(lz Ta lozenge
+.It \e(sq Ta \(sq Ta white square
+.It \e(ps Ta \(ps Ta paragraph
+.It \e(sc Ta \(sc Ta section
+.It \e(lh Ta \(lh Ta left hand
+.It \e(rh Ta \(rh Ta right hand
+.It \e(at Ta \(at Ta at
+.It \e(sh Ta \(sh Ta hash (pound)
+.It \e(CR Ta \(CR Ta carriage return
+.It \e(OK Ta \(OK Ta check mark
+.El
+.Pp
+Legal symbols:
+.Bl -column "Input" "Rendered" "Description" -offset indent -compact
+.It Em Input Ta Em Rendered Ta Em Description
+.It \e(co Ta \(co Ta copyright
+.It \e(rg Ta \(rg Ta registered
+.It \e(tm Ta \(tm Ta trademarked
+.El
+.Pp
+Punctuation:
+.Bl -column "Input" "Rendered" "Description" -offset indent -compact
+.It Em Input Ta Em Rendered Ta Em Description
+.It \e(em Ta \(em Ta em-dash
+.It \e(en Ta \(en Ta en-dash
+.It \e(hy Ta \(hy Ta hyphen
+.It \ee Ta \e Ta back-slash
+.It \e. Ta \. Ta period
+.It \e(r! Ta \(r! Ta upside-down exclamation
+.It \e(r? Ta \(r? Ta upside-down question
+.El
+.Pp
+Quotes:
+.Bl -column "Input" "Rendered" "Description" -offset indent -compact
+.It Em Input Ta Em Rendered Ta Em Description
+.It \e(Bq Ta \(Bq Ta right low double-quote
+.It \e(bq Ta \(bq Ta right low single-quote
+.It \e(lq Ta \(lq Ta left double-quote
+.It \e(rq Ta \(rq Ta right double-quote
+.It \e(oq Ta \(oq Ta left single-quote
+.It \e(cq Ta \(cq Ta right single-quote
+.It \e(aq Ta \(aq Ta apostrophe quote (text)
+.It \e(dq Ta \(dq Ta double quote (text)
+.It \e(Fo Ta \(Fo Ta left guillemet
+.It \e(Fc Ta \(Fc Ta right guillemet
+.It \e(fo Ta \(fo Ta left single guillemet
+.It \e(fc Ta \(fc Ta right single guillemet
+.El
+.Pp
+Brackets:
+.Bl -column "xxbracketrightbpx" Rendered Description -offset indent -compact
+.It Em Input Ta Em Rendered Ta Em Description
+.It \e(lB Ta \(lB Ta left bracket
+.It \e(rB Ta \(rB Ta right bracket
+.It \e(lC Ta \(lC Ta left brace
+.It \e(rC Ta \(rC Ta right brace
+.It \e(la Ta \(la Ta left angle
+.It \e(ra Ta \(ra Ta right angle
+.It \e(bv Ta \(bv Ta brace extension
+.It \e[braceex] Ta \[braceex] Ta brace extension
+.It \e[bracketlefttp] Ta \[bracketlefttp] Ta top-left hooked bracket
+.It \e[bracketleftbp] Ta \[bracketleftbp] Ta bottom-left hooked bracket
+.It \e[bracketleftex] Ta \[bracketleftex] Ta left hooked bracket extension
+.It \e[bracketrighttp] Ta \[bracketrighttp] Ta top-right hooked bracket
+.It \e[bracketrightbp] Ta \[bracketrightbp] Ta bottom-right hooked bracket
+.It \e[bracketrightex] Ta \[bracketrightex] Ta right hooked bracket extension
+.It \e(lt Ta \(lt Ta top-left hooked brace
+.It \e[bracelefttp] Ta \[bracelefttp] Ta top-left hooked brace
+.It \e(lk Ta \(lk Ta mid-left hooked brace
+.It \e[braceleftmid] Ta \[braceleftmid] Ta mid-left hooked brace
+.It \e(lb Ta \(lb Ta bottom-left hooked brace
+.It \e[braceleftbp] Ta \[braceleftbp] Ta bottom-left hooked brace
+.It \e[braceleftex] Ta \[braceleftex] Ta left hooked brace extension
+.It \e(rt Ta \(rt Ta top-left hooked brace
+.It \e[bracerighttp] Ta \[bracerighttp] Ta top-right hooked brace
+.It \e(rk Ta \(rk Ta mid-right hooked brace
+.It \e[bracerightmid] Ta \[bracerightmid] Ta mid-right hooked brace
+.It \e(rb Ta \(rb Ta bottom-right hooked brace
+.It \e[bracerightbp] Ta \[bracerightbp] Ta bottom-right hooked brace
+.It \e[bracerightex] Ta \[bracerightex] Ta right hooked brace extension
+.It \e[parenlefttp] Ta \[parenlefttp] Ta top-left hooked parenthesis
+.It \e[parenleftbp] Ta \[parenleftbp] Ta bottom-left hooked parenthesis
+.It \e[parenleftex] Ta \[parenleftex] Ta left hooked parenthesis extension
+.It \e[parenrighttp] Ta \[parenrighttp] Ta top-right hooked parenthesis
+.It \e[parenrightbp] Ta \[parenrightbp] Ta bottom-right hooked parenthesis
+.It \e[parenrightex] Ta \[parenrightex] Ta right hooked parenthesis extension
+.El
+.Pp
+Arrows:
+.Bl -column "Input" "Rendered" "Description" -offset indent -compact
+.It Em Input Ta Em Rendered Ta Em Description
+.It \e(<- Ta \(<- Ta left arrow
+.It \e(-> Ta \(-> Ta right arrow
+.It \e(<> Ta \(<> Ta left-right arrow
+.It \e(da Ta \(da Ta down arrow
+.It \e(ua Ta \(ua Ta up arrow
+.It \e(va Ta \(va Ta up-down arrow
+.It \e(lA Ta \(lA Ta left double-arrow
+.It \e(rA Ta \(rA Ta right double-arrow
+.It \e(hA Ta \(hA Ta left-right double-arrow
+.It \e(uA Ta \(uA Ta up double-arrow
+.It \e(dA Ta \(dA Ta down double-arrow
+.It \e(vA Ta \(vA Ta up-down double-arrow
+.El
+.Pp
+Logical:
+.Bl -column "Input" "Rendered" "Description" -offset indent -compact
+.It Em Input Ta Em Rendered Ta Em Description
+.It \e(AN Ta \(AN Ta logical and
+.It \e(OR Ta \(OR Ta logical or
+.It \e(no Ta \(no Ta logical not
+.It \e[tno] Ta \[tno] Ta logical not (text)
+.It \e(te Ta \(te Ta existential quantifier
+.It \e(fa Ta \(fa Ta universal quantifier
+.It \e(st Ta \(st Ta such that
+.It \e(tf Ta \(tf Ta therefore
+.It \e(3d Ta \(3d Ta therefore
+.It \e(or Ta \(or Ta bitwise or
+.El
+.Pp
+Mathematical:
+.Bl -column "xxcoproductxx" "Rendered" "Description" -offset indent -compact
+.It Em Input Ta Em Rendered Ta Em Description
+.It \e(pl Ta \(pl Ta plus
+.It \e(mi Ta \(mi Ta minus
+.It \e- Ta \- Ta minus (text)
+.It \e(-+ Ta \(-+ Ta minus-plus
+.It \e(+- Ta \(+- Ta plus-minus
+.It \e[t+-] Ta \[t+-] Ta plus-minus (text)
+.It \e(pc Ta \(pc Ta centre-dot
+.It \e(mu Ta \(mu Ta multiply
+.It \e[tmu] Ta \[tmu] Ta multiply (text)
+.It \e(c* Ta \(c* Ta circle-multiply
+.It \e(c+ Ta \(c+ Ta circle-plus
+.It \e(di Ta \(di Ta divide
+.It \e[tdi] Ta \[tdi] Ta divide (text)
+.It \e(f/ Ta \(f/ Ta fraction
+.It \e(** Ta \(** Ta asterisk
+.It \e(<= Ta \(<= Ta less-than-equal
+.It \e(>= Ta \(>= Ta greater-than-equal
+.It \e(<< Ta \(<< Ta much less
+.It \e(>> Ta \(>> Ta much greater
+.It \e(eq Ta \(eq Ta equal
+.It \e(!= Ta \(!= Ta not equal
+.It \e(== Ta \(== Ta equivalent
+.It \e(ne Ta \(ne Ta not equivalent
+.It \e(=~ Ta \(=~ Ta congruent
+.It \e(-~ Ta \(-~ Ta asymptotically congruent
+.It \e(ap Ta \(ap Ta asymptotically similar
+.It \e(~~ Ta \(~~ Ta approximately similar
+.It \e(~= Ta \(~= Ta approximately equal
+.It \e(pt Ta \(pt Ta proportionate
+.It \e(es Ta \(es Ta empty set
+.It \e(mo Ta \(mo Ta element
+.It \e(nm Ta \(nm Ta not element
+.It \e(sb Ta \(sb Ta proper subset
+.It \e(nb Ta \(nb Ta not subset
+.It \e(sp Ta \(sp Ta proper superset
+.It \e(nc Ta \(nc Ta not superset
+.It \e(ib Ta \(ib Ta reflexive subset
+.It \e(ip Ta \(ip Ta reflexive superset
+.It \e(ca Ta \(ca Ta intersection
+.It \e(cu Ta \(cu Ta union
+.It \e(/_ Ta \(/_ Ta angle
+.It \e(pp Ta \(pp Ta perpendicular
+.It \e(is Ta \(is Ta integral
+.It \e[integral] Ta \[integral] Ta integral
+.It \e[sum] Ta \[sum] Ta summation
+.It \e[product] Ta \[product] Ta product
+.It \e[coproduct] Ta \[coproduct] Ta coproduct
+.It \e(gr Ta \(gr Ta gradient
+.It \e(sr Ta \(sr Ta square root
+.It \e[sqrt] Ta \[sqrt] Ta square root
+.It \e(lc Ta \(lc Ta left-ceiling
+.It \e(rc Ta \(rc Ta right-ceiling
+.It \e(lf Ta \(lf Ta left-floor
+.It \e(rf Ta \(rf Ta right-floor
+.It \e(if Ta \(if Ta infinity
+.It \e(Ah Ta \(Ah Ta aleph
+.It \e(Im Ta \(Im Ta imaginary
+.It \e(Re Ta \(Re Ta real
+.It \e(pd Ta \(pd Ta partial differential
+.It \e(-h Ta \(-h Ta Planck constant over 2\(*p
+.It \e[12] Ta \[12] Ta one-half
+.It \e[14] Ta \[14] Ta one-fourth
+.It \e[34] Ta \[34] Ta three-fourths
+.El
+.Pp
+Ligatures:
+.Bl -column "Input" "Rendered" "Description" -offset indent -compact
+.It Em Input Ta Em Rendered Ta Em Description
+.It \e(ff Ta \(ff Ta ff ligature
+.It \e(fi Ta \(fi Ta fi ligature
+.It \e(fl Ta \(fl Ta fl ligature
+.It \e(Fi Ta \(Fi Ta ffi ligature
+.It \e(Fl Ta \(Fl Ta ffl ligature
+.It \e(AE Ta \(AE Ta AE
+.It \e(ae Ta \(ae Ta ae
+.It \e(OE Ta \(OE Ta OE
+.It \e(oe Ta \(oe Ta oe
+.It \e(ss Ta \(ss Ta German eszett
+.It \e(IJ Ta \(IJ Ta IJ ligature
+.It \e(ij Ta \(ij Ta ij ligature
+.El
+.Pp
+Accents:
+.Bl -column "Input" "Rendered" "Description" -offset indent -compact
+.It Em Input Ta Em Rendered Ta Em Description
+.It \e(a" Ta \(a" Ta Hungarian umlaut
+.It \e(a- Ta \(a- Ta macron
+.It \e(a. Ta \(a. Ta dotted
+.It \e(a^ Ta \(a^ Ta circumflex
+.It \e(aa Ta \(aa Ta acute
+.It \e' Ta \' Ta acute
+.It \e(ga Ta \(ga Ta grave
+.It \e` Ta \` Ta grave
+.It \e(ab Ta \(ab Ta breve
+.It \e(ac Ta \(ac Ta cedilla
+.It \e(ad Ta \(ad Ta dieresis
+.It \e(ah Ta \(ah Ta caron
+.It \e(ao Ta \(ao Ta ring
+.It \e(a~ Ta \(a~ Ta tilde
+.It \e(ho Ta \(ho Ta ogonek
+.It \e(ha Ta \(ha Ta hat (text)
+.It \e(ti Ta \(ti Ta tilde (text)
+.El
+.Pp
+Accented letters:
+.Bl -column "Input" "Rendered" "Description" -offset indent -compact
+.It Em Input Ta Em Rendered Ta Em Description
+.It \e('A Ta \('A Ta acute A
+.It \e('E Ta \('E Ta acute E
+.It \e('I Ta \('I Ta acute I
+.It \e('O Ta \('O Ta acute O
+.It \e('U Ta \('U Ta acute U
+.It \e('a Ta \('a Ta acute a
+.It \e('e Ta \('e Ta acute e
+.It \e('i Ta \('i Ta acute i
+.It \e('o Ta \('o Ta acute o
+.It \e('u Ta \('u Ta acute u
+.It \e(`A Ta \(`A Ta grave A
+.It \e(`E Ta \(`E Ta grave E
+.It \e(`I Ta \(`I Ta grave I
+.It \e(`O Ta \(`O Ta grave O
+.It \e(`U Ta \(`U Ta grave U
+.It \e(`a Ta \(`a Ta grave a
+.It \e(`e Ta \(`e Ta grave e
+.It \e(`i Ta \(`i Ta grave i
+.It \e(`o Ta \(`i Ta grave o
+.It \e(`u Ta \(`u Ta grave u
+.It \e(~A Ta \(~A Ta tilde A
+.It \e(~N Ta \(~N Ta tilde N
+.It \e(~O Ta \(~O Ta tilde O
+.It \e(~a Ta \(~a Ta tilde a
+.It \e(~n Ta \(~n Ta tilde n
+.It \e(~o Ta \(~o Ta tilde o
+.It \e(:A Ta \(:A Ta dieresis A
+.It \e(:E Ta \(:E Ta dieresis E
+.It \e(:I Ta \(:I Ta dieresis I
+.It \e(:O Ta \(:O Ta dieresis O
+.It \e(:U Ta \(:U Ta dieresis U
+.It \e(:a Ta \(:a Ta dieresis a
+.It \e(:e Ta \(:e Ta dieresis e
+.It \e(:i Ta \(:i Ta dieresis i
+.It \e(:o Ta \(:o Ta dieresis o
+.It \e(:u Ta \(:u Ta dieresis u
+.It \e(:y Ta \(:y Ta dieresis y
+.It \e(^A Ta \(^A Ta circumflex A
+.It \e(^E Ta \(^E Ta circumflex E
+.It \e(^I Ta \(^I Ta circumflex I
+.It \e(^O Ta \(^O Ta circumflex O
+.It \e(^U Ta \(^U Ta circumflex U
+.It \e(^a Ta \(^a Ta circumflex a
+.It \e(^e Ta \(^e Ta circumflex e
+.It \e(^i Ta \(^i Ta circumflex i
+.It \e(^o Ta \(^o Ta circumflex o
+.It \e(^u Ta \(^u Ta circumflex u
+.It \e(,C Ta \(,C Ta cedilla C
+.It \e(,c Ta \(,c Ta cedilla c
+.It \e(/L Ta \(/L Ta stroke L
+.It \e(/l Ta \(/l Ta stroke l
+.It \e(/O Ta \(/O Ta stroke O
+.It \e(/o Ta \(/o Ta stroke o
+.It \e(oA Ta \(oA Ta ring A
+.It \e(oa Ta \(oa Ta ring a
+.El
+.Pp
+Special letters:
+.Bl -column "Input" "Rendered" "Description" -offset indent -compact
+.It Em Input Ta Em Rendered Ta Em Description
+.It \e(-D Ta \(-D Ta Eth
+.It \e(Sd Ta \(Sd Ta eth
+.It \e(TP Ta \(TP Ta Thorn
+.It \e(Tp Ta \(Tp Ta thorn
+.It \e(.i Ta \(.i Ta dotless i
+.It \e(.j Ta \(.j Ta dotless j
+.El
+.Pp
+Currency:
+.Bl -column "Input" "Rendered" "Description" -offset indent -compact
+.It Em Input Ta Em Rendered Ta Em Description
+.It \e(Do Ta \(Do Ta dollar
+.It \e(ct Ta \(ct Ta cent
+.It \e(Eu Ta \(Eu Ta Euro symbol
+.It \e(eu Ta \(eu Ta Euro symbol
+.It \e(Ye Ta \(Ye Ta yen
+.It \e(Po Ta \(Po Ta pound
+.It \e(Cs Ta \(Cs Ta Scandinavian
+.It \e(Fn Ta \(Fn Ta florin
+.El
+.Pp
+Units:
+.Bl -column "Input" "Rendered" "Description" -offset indent -compact
+.It Em Input Ta Em Rendered Ta Em Description
+.It \e(de Ta \(de Ta degree
+.It \e(%0 Ta \(%0 Ta per-thousand
+.It \e(fm Ta \(fm Ta minute
+.It \e(sd Ta \(sd Ta second
+.It \e(mc Ta \(mc Ta micro
+.El
+.Pp
+Greek letters:
+.Bl -column "Input" "Rendered" "Description" -offset indent -compact
+.It Em Input Ta Em Rendered Ta Em Description
+.It \e(*A Ta \(*A Ta Alpha
+.It \e(*B Ta \(*B Ta Beta
+.It \e(*G Ta \(*G Ta Gamma
+.It \e(*D Ta \(*D Ta Delta
+.It \e(*E Ta \(*E Ta Epsilon
+.It \e(*Z Ta \(*Z Ta Zeta
+.It \e(*Y Ta \(*Y Ta Eta
+.It \e(*H Ta \(*H Ta Theta
+.It \e(*I Ta \(*I Ta Iota
+.It \e(*K Ta \(*K Ta Kappa
+.It \e(*L Ta \(*L Ta Lambda
+.It \e(*M Ta \(*M Ta Mu
+.It \e(*N Ta \(*N Ta Nu
+.It \e(*C Ta \(*C Ta Xi
+.It \e(*O Ta \(*O Ta Omicron
+.It \e(*P Ta \(*P Ta Pi
+.It \e(*R Ta \(*R Ta Rho
+.It \e(*S Ta \(*S Ta Sigma
+.It \e(*T Ta \(*T Ta Tau
+.It \e(*U Ta \(*U Ta Upsilon
+.It \e(*F Ta \(*F Ta Phi
+.It \e(*X Ta \(*X Ta Chi
+.It \e(*Q Ta \(*Q Ta Psi
+.It \e(*W Ta \(*W Ta Omega
+.It \e(*a Ta \(*a Ta alpha
+.It \e(*b Ta \(*b Ta beta
+.It \e(*g Ta \(*g Ta gamma
+.It \e(*d Ta \(*d Ta delta
+.It \e(*e Ta \(*e Ta epsilon
+.It \e(*z Ta \(*z Ta zeta
+.It \e(*y Ta \(*y Ta eta
+.It \e(*h Ta \(*h Ta theta
+.It \e(*i Ta \(*i Ta iota
+.It \e(*k Ta \(*k Ta kappa
+.It \e(*l Ta \(*l Ta lambda
+.It \e(*m Ta \(*m Ta mu
+.It \e(*n Ta \(*n Ta nu
+.It \e(*c Ta \(*c Ta xi
+.It \e(*o Ta \(*o Ta omicron
+.It \e(*p Ta \(*p Ta pi
+.It \e(*r Ta \(*r Ta rho
+.It \e(*s Ta \(*s Ta sigma
+.It \e(*t Ta \(*t Ta tau
+.It \e(*u Ta \(*u Ta upsilon
+.It \e(*f Ta \(*f Ta phi
+.It \e(*x Ta \(*x Ta chi
+.It \e(*q Ta \(*q Ta psi
+.It \e(*w Ta \(*w Ta omega
+.It \e(+h Ta \(+h Ta theta variant
+.It \e(+f Ta \(+f Ta phi variant
+.It \e(+p Ta \(+p Ta pi variant
+.It \e(+e Ta \(+e Ta epsilon variant
+.It \e(ts Ta \(ts Ta sigma terminal
+.El
+.Sh PREDEFINED STRINGS
+Predefined strings are inherited from the macro packages of historical
+troff implementations.
+They are
+.Em not recommended
+for use, as they differ across implementations.
+Manuals using these predefined strings are almost certainly not
+portable.
+.Pp
+Their syntax is similar to special characters, using
+.Sq \e*X
+.Pq for a one-character escape ,
+.Sq \e*(XX
+.Pq two-character ,
+and
+.Sq \e*[N]
+.Pq N-character .
+For details, see the
+.Em Predefined Strings
+subsection of the
+.Xr roff 7
+manual.
+.Bl -column "Input" "Rendered" "Description" -offset indent
+.It Em Input Ta Em Rendered Ta Em Description
+.It \e*(Ba Ta \*(Ba Ta vertical bar
+.It \e*(Ne Ta \*(Ne Ta not equal
+.It \e*(Ge Ta \*(Ge Ta greater-than-equal
+.It \e*(Le Ta \*(Le Ta less-than-equal
+.It \e*(Gt Ta \*(Gt Ta greater-than
+.It \e*(Lt Ta \*(Lt Ta less-than
+.It \e*(Pm Ta \*(Pm Ta plus-minus
+.It \e*(If Ta \*(If Ta infinity
+.It \e*(Pi Ta \*(Pi Ta pi
+.It \e*(Na Ta \*(Na Ta NaN
+.It \e*(Am Ta \*(Am Ta ampersand
+.It \e*R Ta \*R Ta restricted mark
+.It \e*(Tm Ta \*(Tm Ta trade mark
+.It \e*q Ta \*q Ta double-quote
+.It \e*(Rq Ta \*(Rq Ta right-double-quote
+.It \e*(Lq Ta \*(Lq Ta left-double-quote
+.It \e*(lp Ta \*(lp Ta right-parenthesis
+.It \e*(rp Ta \*(rp Ta left-parenthesis
+.It \e*(lq Ta \*(lq Ta left double-quote
+.It \e*(rq Ta \*(rq Ta right double-quote
+.It \e*(ua Ta \*(ua Ta up arrow
+.It \e*(va Ta \*(va Ta up-down arrow
+.It \e*(<= Ta \*(<= Ta less-than-equal
+.It \e*(>= Ta \*(>= Ta greater-than-equal
+.It \e*(aa Ta \*(aa Ta acute
+.It \e*(ga Ta \*(ga Ta grave
+.It \e*(Px Ta \*(Px Ta POSIX standard name
+.It \e*(Ai Ta \*(Ai Ta ANSI standard name
+.El
+.Sh UNICODE CHARACTERS
+The escape sequence
+.Pp
+.Dl \e[uXXXX]
+.Pp
+is interpreted as a Unicode codepoint.
+The codepoint must be in the range above U+0080 and less than U+10FFFF.
+For compatibility, points must be zero-padded to four characters; if
+greater than four characters, no zero padding is allowed.
+Unicode surrogates are not allowed.
+.\" .Pp
+.\" Unicode glyphs attenuate to the
+.\" .Sq \&?
+.\" character if invalid or not rendered by current output media.
+.Sh NUMBERED CHARACTERS
+For backward compatibility with existing manuals,
+.Xr mandoc 1
+also supports the
+.Pp
+.Dl \eN\(aq Ns Ar number Ns \(aq
+.Pp
+escape sequence, inserting the character
+.Ar number
+from the current character set into the output.
+Of course, this is inherently non-portable and is already marked
+as deprecated in the Heirloom roff manual.
+For example, do not use \eN'34', use \e(dq, or even the plain
+.Sq \(dq
+character where possible.
+.Sh COMPATIBILITY
+This section documents compatibility between mandoc and other other
+troff implementations, at this time limited to GNU troff
+.Pq Qq groff .
+.Pp
+.Bl -dash -compact
+.It
+The \eN\(aq\(aq escape sequence is limited to printable characters; in
+groff, it accepts arbitrary character numbers.
+.It
+In
+.Fl T Ns Cm ascii ,
+the
+\e(ss, \e(nm, \e(nb, \e(nc, \e(ib, \e(ip, \e(pp, \e[sum], \e[product],
+\e[coproduct], \e(gr, \e(\-h, and \e(a. special characters render
+differently between mandoc and groff.
+.It
+In
+.Fl T Ns Cm html
+and
+.Fl T Ns Cm xhtml ,
+the \e(~=, \e(nb, and \e(nc special characters render differently
+between mandoc and groff.
+.It
+The
+.Fl T Ns Cm ps
+and
+.Fl T Ns Cm pdf
+modes format like
+.Fl T Ns Cm ascii
+instead of rendering glyphs as in groff.
+.It
+The \e[radicalex], \e[sqrtex], and \e(ru special characters have been omitted
+from mandoc either because they are poorly documented or they have no
+known representation.
+.El
+.Sh SEE ALSO
+.Xr mandoc 1 ,
+.Xr man 7 ,
+.Xr mdoc 7 ,
+.Xr roff 7
+.Sh AUTHORS
+The
+.Nm
+manual page was written by
+.An Kristaps Dzonsons ,
+.Mt kristaps@bsd.lv .
+.Sh CAVEATS
+The
+.Sq \e*(Ba
+escape mimics the behaviour of the
+.Sq \&|
+character in
+.Xr mdoc 7 ;
+thus, if you wish to render a vertical bar with no side effects, use
+the
+.Sq \e(ba
+escape.
diff --git a/mandocdb.8 b/mandocdb.8
new file mode 100644
index 000000000000..cb48359b3407
--- /dev/null
+++ b/mandocdb.8
@@ -0,0 +1,293 @@
+.\" $Id: mandocdb.8,v 1.17 2011/12/25 21:00:23 schwarze Exp $
+.\"
+.\" Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+.\"
+.\" Permission to use, copy, modify, and distribute this software for any
+.\" purpose with or without fee is hereby granted, provided that the above
+.\" copyright notice and this permission notice appear in all copies.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+.\"
+.Dd $Mdocdate: December 25 2011 $
+.Dt MANDOCDB 8
+.Os
+.Sh NAME
+.Nm mandocdb
+.Nd index UNIX manuals
+.Sh SYNOPSIS
+.Nm
+.Op Fl avW
+.Op Fl C Ar file
+.Nm
+.Op Fl avW
+.Ar dir ...
+.Nm
+.Op Fl vW
+.Fl d Ar dir
+.Op Ar
+.Nm
+.Op Fl vW
+.Fl u Ar dir
+.Op Ar
+.Nm
+.Fl t Ar
+.Sh DESCRIPTION
+The
+.Nm
+utility extracts keywords from
+.Ux
+manuals and indexes them in a
+.Sx Keyword Database
+and
+.Sx Index Database
+for fast retrieval by
+.Xr apropos 1 ,
+.Xr whatis 1 ,
+and
+.Xr man 1 Ns 's
+.Fl k
+option.
+.Pp
+By default,
+.Nm
+creates databases in each
+.Ar dir
+using the files
+.Sm off
+.Sy man Ar section Li /
+.Op Ar arch Li /
+.Ar title . section
+.Sm on
+and
+.Sm off
+.Sy cat Ar section Li /
+.Op Ar arch Li /
+.Ar title . Sy 0
+.Sm on
+in that directory;
+existing databases are truncated.
+If
+.Ar dir
+is not provided,
+.Nm
+uses the default paths stipulated by
+.Xr man 1 .
+.Pp
+The arguments are as follows:
+.Bl -tag -width "-C file"
+.It Fl a
+Use all directories and files found below
+.Ar dir ... .
+.It Fl C Ar file
+Specify an alternative configuration
+.Ar file
+in
+.Xr man.conf 5
+format.
+.It Fl d Ar dir
+Merge (remove and re-add)
+.Ar
+to the database in
+.Ar dir
+without truncating it.
+.It Fl t Ar
+Check the given
+.Ar files
+for potential problems.
+No databases are modified.
+Implies
+.Fl a
+and
+.Fl W .
+All diagnostic messages are printed to the standard output;
+the standard error output is not used.
+.It Fl u Ar dir
+Remove
+.Ar
+from the database in
+.Ar dir
+without truncating it.
+.It Fl v
+Display all files added or removed to the index.
+.It Fl W
+Print warnings about potential problems with manual pages
+to the standard error output.
+.El
+.Pp
+If fatal parse errors are encountered while parsing, the offending file
+is printed to stderr, omitted from the index, and the parse continues
+with the next input file.
+.Ss Index Database
+The index database,
+.Pa whatis.index ,
+is a
+.Xr recno 3
+database with record values consisting of
+.Pp
+.Bl -enum -compact
+.It
+the character
+.Cm d ,
+.Cm a ,
+or
+.Cm c
+to indicate the file type
+.Po
+.Xr mdoc 7 ,
+.Xr man 7 ,
+and post-formatted, respectively
+.Pc ,
+.It
+the filename relative to the databases' path,
+.It
+the manual section,
+.It
+the manual title,
+.It
+the architecture
+.Pq often empty ,
+.It
+and the description.
+.El
+.Pp
+Each of the above is NUL-terminated.
+.Pp
+If the record value is zero-length, it is unassigned.
+.Ss Keyword Database
+The keyword database,
+.Pa whatis.db ,
+is a
+.Xr btree 3
+database of NUL-terminated keywords (record length is non-zero string
+length plus one) mapping to a 16-byte binary field consisting of the
+64-bit keyword type and the 64-bit
+.Sx Index Database
+record number, both in network-byte order.
+.Pp
+The type bit-mask consists of the following
+values mapping into
+.Xr mdoc 7
+macro identifiers:
+.Pp
+.Bl -column "x0x0000000000000001ULLx" "xLix" -offset indent -compact
+.It Li 0x0000000000000001ULL Ta \&An
+.It Li 0x0000000000000002ULL Ta \&Ar
+.It Li 0x0000000000000004ULL Ta \&At
+.It Li 0x0000000000000008ULL Ta \&Bsx
+.It Li 0x0000000000000010ULL Ta \&Bx
+.It Li 0x0000000000000020ULL Ta \&Cd
+.It Li 0x0000000000000040ULL Ta \&Cm
+.It Li 0x0000000000000080ULL Ta \&Dv
+.It Li 0x0000000000000100ULL Ta \&Dx
+.It Li 0x0000000000000200ULL Ta \&Em
+.It Li 0x0000000000000400ULL Ta \&Er
+.It Li 0x0000000000000800ULL Ta \&Ev
+.It Li 0x0000000000001000ULL Ta \&Fa
+.It Li 0x0000000000002000ULL Ta \&Fl
+.It Li 0x0000000000004000ULL Ta \&Fn
+.It Li 0x0000000000008000ULL Ta \&Ft
+.It Li 0x0000000000010000ULL Ta \&Fx
+.It Li 0x0000000000020000ULL Ta \&Ic
+.It Li 0x0000000000040000ULL Ta \&In
+.It Li 0x0000000000080000ULL Ta \&Lb
+.It Li 0x0000000000100000ULL Ta \&Li
+.It Li 0x0000000000200000ULL Ta \&Lk
+.It Li 0x0000000000400000ULL Ta \&Ms
+.It Li 0x0000000000800000ULL Ta \&Mt
+.It Li 0x0000000001000000ULL Ta \&Nd
+.It Li 0x0000000002000000ULL Ta \&Nm
+.It Li 0x0000000004000000ULL Ta \&Nx
+.It Li 0x0000000008000000ULL Ta \&Ox
+.It Li 0x0000000010000000ULL Ta \&Pa
+.It Li 0x0000000020000000ULL Ta \&Rs
+.It Li 0x0000000040000000ULL Ta \&Sh
+.It Li 0x0000000080000000ULL Ta \&Ss
+.It Li 0x0000000100000000ULL Ta \&St
+.It Li 0x0000000200000000ULL Ta \&Sy
+.It Li 0x0000000400000000ULL Ta \&Tn
+.It Li 0x0000000800000000ULL Ta \&Va
+.It Li 0x0000001000000000ULL Ta \&Vt
+.It Li 0x0000002000000000ULL Ta \&Xr
+.El
+.Sh IMPLEMENTATION NOTES
+The time to construct a new database pair grows linearly with the
+number of keywords in the input files.
+However, removing or updating entries with
+.Fl u
+or
+.Fl d ,
+respectively, grows as a multiple of the index length and input size.
+.Sh FILES
+.Bl -tag -width Ds
+.It Pa whatis.db
+A
+.Xr btree 3
+keyword database mapping keywords to a type and file reference in
+.Pa whatis.index .
+.It Pa whatis.index
+A
+.Xr recno 3
+database of indexed file-names.
+.It Pa /etc/man.conf
+The default
+.Xr man 1
+configuration file.
+.El
+.Sh EXIT STATUS
+The
+.Nm
+utility exits with one of the following values:
+.Pp
+.Bl -tag -width Ds -compact
+.It 0
+No errors occurred.
+.It 5
+Invalid command line arguments were specified.
+No input files have been read.
+.It 6
+An operating system error occurred, for example memory exhaustion or an
+error accessing input files.
+Such errors cause
+.Nm
+to exit at once, possibly in the middle of parsing or formatting a file.
+The output databases are corrupt and should be removed.
+.El
+.Sh DIAGNOSTICS
+If the following errors occur, the
+.Nm
+databases should be rebuilt.
+.Bl -diag
+.It "%s: Corrupt database"
+The keyword database file indicated by
+.Pa %s
+is unreadable.
+.It "%s: Corrupt index"
+The index database file indicated by
+.Pa %s
+is unreadable.
+.It "%s: Path too long"
+The file
+.Pa %s
+is too long.
+This usually indicates database corruption or invalid command-line
+arguments.
+.El
+.Sh SEE ALSO
+.Xr apropos 1 ,
+.Xr man 1 ,
+.Xr whatis 1 ,
+.Xr btree 3 ,
+.Xr recno 3 ,
+.Xr man.conf 5
+.Sh AUTHORS
+The
+.Nm
+utility was written by
+.An Kristaps Dzonsons ,
+.Mt kristaps@bsd.lv .
diff --git a/mandocdb.c b/mandocdb.c
new file mode 100644
index 000000000000..e621c1d2a211
--- /dev/null
+++ b/mandocdb.c
@@ -0,0 +1,1909 @@
+/* $Id: mandocdb.c,v 1.46 2012/03/23 06:52:17 kristaps Exp $ */
+/*
+ * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/param.h>
+#include <sys/types.h>
+
+#include <assert.h>
+#include <ctype.h>
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#if defined(__linux__)
+# include <endian.h>
+# include <db_185.h>
+#elif defined(__APPLE__)
+# include <libkern/OSByteOrder.h>
+# include <db.h>
+#else
+# include <db.h>
+#endif
+
+#include "man.h"
+#include "mdoc.h"
+#include "mandoc.h"
+#include "mandocdb.h"
+#include "manpath.h"
+
+#define MANDOC_BUFSZ BUFSIZ
+#define MANDOC_SLOP 1024
+
+#define MANDOC_SRC 0x1
+#define MANDOC_FORM 0x2
+
+#define WARNING(_f, _b, _fmt, _args...) \
+ do if (warnings) { \
+ fprintf(stderr, "%s: ", (_b)); \
+ fprintf(stderr, (_fmt), ##_args); \
+ if ('\0' != *(_f)) \
+ fprintf(stderr, ": %s", (_f)); \
+ fprintf(stderr, "\n"); \
+ } while (/* CONSTCOND */ 0)
+
+/* Access to the mandoc database on disk. */
+
+struct mdb {
+ char idxn[MAXPATHLEN]; /* index db filename */
+ char dbn[MAXPATHLEN]; /* keyword db filename */
+ DB *idx; /* index recno database */
+ DB *db; /* keyword btree database */
+};
+
+/* Stack of temporarily unused index records. */
+
+struct recs {
+ recno_t *stack; /* pointer to a malloc'ed array */
+ size_t size; /* number of allocated slots */
+ size_t cur; /* current number of empty records */
+ recno_t last; /* last record number in the index */
+};
+
+/* Tiny list for files. No need to bring in QUEUE. */
+
+struct of {
+ char *fname; /* heap-allocated */
+ char *sec;
+ char *arch;
+ char *title;
+ int src_form;
+ struct of *next; /* NULL for last one */
+ struct of *first; /* first in list */
+};
+
+/* Buffer for storing growable data. */
+
+struct buf {
+ char *cp;
+ size_t len; /* current length */
+ size_t size; /* total buffer size */
+};
+
+/* Operation we're going to perform. */
+
+enum op {
+ OP_DEFAULT = 0, /* new dbs from dir list or default config */
+ OP_CONFFILE, /* new databases from custom config file */
+ OP_UPDATE, /* delete/add entries in existing database */
+ OP_DELETE, /* delete entries from existing database */
+ OP_TEST /* change no databases, report potential problems */
+};
+
+#define MAN_ARGS DB *hash, \
+ struct buf *buf, \
+ struct buf *dbuf, \
+ const struct man_node *n
+#define MDOC_ARGS DB *hash, \
+ struct buf *buf, \
+ struct buf *dbuf, \
+ const struct mdoc_node *n, \
+ const struct mdoc_meta *m
+
+static void buf_appendmdoc(struct buf *,
+ const struct mdoc_node *, int);
+static void buf_append(struct buf *, const char *);
+static void buf_appendb(struct buf *,
+ const void *, size_t);
+static void dbt_put(DB *, const char *, DBT *, DBT *);
+static void hash_put(DB *, const struct buf *, uint64_t);
+static void hash_reset(DB **);
+static void index_merge(const struct of *, struct mparse *,
+ struct buf *, struct buf *, DB *,
+ struct mdb *, struct recs *,
+ const char *);
+static void index_prune(const struct of *, struct mdb *,
+ struct recs *, const char *);
+static void ofile_argbuild(int, char *[],
+ struct of **, const char *);
+static void ofile_dirbuild(const char *, const char *,
+ const char *, int, struct of **, char *);
+static void ofile_free(struct of *);
+static void pformatted(DB *, struct buf *, struct buf *,
+ const struct of *, const char *);
+static int pman_node(MAN_ARGS);
+static void pmdoc_node(MDOC_ARGS);
+static int pmdoc_head(MDOC_ARGS);
+static int pmdoc_body(MDOC_ARGS);
+static int pmdoc_Fd(MDOC_ARGS);
+static int pmdoc_In(MDOC_ARGS);
+static int pmdoc_Fn(MDOC_ARGS);
+static int pmdoc_Nd(MDOC_ARGS);
+static int pmdoc_Nm(MDOC_ARGS);
+static int pmdoc_Sh(MDOC_ARGS);
+static int pmdoc_St(MDOC_ARGS);
+static int pmdoc_Xr(MDOC_ARGS);
+
+#define MDOCF_CHILD 0x01 /* Automatically index child nodes. */
+
+struct mdoc_handler {
+ int (*fp)(MDOC_ARGS); /* Optional handler. */
+ uint64_t mask; /* Set unless handler returns 0. */
+ int flags; /* For use by pmdoc_node. */
+};
+
+static const struct mdoc_handler mdocs[MDOC_MAX] = {
+ { NULL, 0, 0 }, /* Ap */
+ { NULL, 0, 0 }, /* Dd */
+ { NULL, 0, 0 }, /* Dt */
+ { NULL, 0, 0 }, /* Os */
+ { pmdoc_Sh, TYPE_Sh, MDOCF_CHILD }, /* Sh */
+ { pmdoc_head, TYPE_Ss, MDOCF_CHILD }, /* Ss */
+ { NULL, 0, 0 }, /* Pp */
+ { NULL, 0, 0 }, /* D1 */
+ { NULL, 0, 0 }, /* Dl */
+ { NULL, 0, 0 }, /* Bd */
+ { NULL, 0, 0 }, /* Ed */
+ { NULL, 0, 0 }, /* Bl */
+ { NULL, 0, 0 }, /* El */
+ { NULL, 0, 0 }, /* It */
+ { NULL, 0, 0 }, /* Ad */
+ { NULL, TYPE_An, MDOCF_CHILD }, /* An */
+ { NULL, TYPE_Ar, MDOCF_CHILD }, /* Ar */
+ { NULL, TYPE_Cd, MDOCF_CHILD }, /* Cd */
+ { NULL, TYPE_Cm, MDOCF_CHILD }, /* Cm */
+ { NULL, TYPE_Dv, MDOCF_CHILD }, /* Dv */
+ { NULL, TYPE_Er, MDOCF_CHILD }, /* Er */
+ { NULL, TYPE_Ev, MDOCF_CHILD }, /* Ev */
+ { NULL, 0, 0 }, /* Ex */
+ { NULL, TYPE_Fa, MDOCF_CHILD }, /* Fa */
+ { pmdoc_Fd, TYPE_In, 0 }, /* Fd */
+ { NULL, TYPE_Fl, MDOCF_CHILD }, /* Fl */
+ { pmdoc_Fn, 0, 0 }, /* Fn */
+ { NULL, TYPE_Ft, MDOCF_CHILD }, /* Ft */
+ { NULL, TYPE_Ic, MDOCF_CHILD }, /* Ic */
+ { pmdoc_In, TYPE_In, 0 }, /* In */
+ { NULL, TYPE_Li, MDOCF_CHILD }, /* Li */
+ { pmdoc_Nd, TYPE_Nd, MDOCF_CHILD }, /* Nd */
+ { pmdoc_Nm, TYPE_Nm, MDOCF_CHILD }, /* Nm */
+ { NULL, 0, 0 }, /* Op */
+ { NULL, 0, 0 }, /* Ot */
+ { NULL, TYPE_Pa, MDOCF_CHILD }, /* Pa */
+ { NULL, 0, 0 }, /* Rv */
+ { pmdoc_St, TYPE_St, 0 }, /* St */
+ { NULL, TYPE_Va, MDOCF_CHILD }, /* Va */
+ { pmdoc_body, TYPE_Va, MDOCF_CHILD }, /* Vt */
+ { pmdoc_Xr, TYPE_Xr, 0 }, /* Xr */
+ { NULL, 0, 0 }, /* %A */
+ { NULL, 0, 0 }, /* %B */
+ { NULL, 0, 0 }, /* %D */
+ { NULL, 0, 0 }, /* %I */
+ { NULL, 0, 0 }, /* %J */
+ { NULL, 0, 0 }, /* %N */
+ { NULL, 0, 0 }, /* %O */
+ { NULL, 0, 0 }, /* %P */
+ { NULL, 0, 0 }, /* %R */
+ { NULL, 0, 0 }, /* %T */
+ { NULL, 0, 0 }, /* %V */
+ { NULL, 0, 0 }, /* Ac */
+ { NULL, 0, 0 }, /* Ao */
+ { NULL, 0, 0 }, /* Aq */
+ { NULL, TYPE_At, MDOCF_CHILD }, /* At */
+ { NULL, 0, 0 }, /* Bc */
+ { NULL, 0, 0 }, /* Bf */
+ { NULL, 0, 0 }, /* Bo */
+ { NULL, 0, 0 }, /* Bq */
+ { NULL, TYPE_Bsx, MDOCF_CHILD }, /* Bsx */
+ { NULL, TYPE_Bx, MDOCF_CHILD }, /* Bx */
+ { NULL, 0, 0 }, /* Db */
+ { NULL, 0, 0 }, /* Dc */
+ { NULL, 0, 0 }, /* Do */
+ { NULL, 0, 0 }, /* Dq */
+ { NULL, 0, 0 }, /* Ec */
+ { NULL, 0, 0 }, /* Ef */
+ { NULL, TYPE_Em, MDOCF_CHILD }, /* Em */
+ { NULL, 0, 0 }, /* Eo */
+ { NULL, TYPE_Fx, MDOCF_CHILD }, /* Fx */
+ { NULL, TYPE_Ms, MDOCF_CHILD }, /* Ms */
+ { NULL, 0, 0 }, /* No */
+ { NULL, 0, 0 }, /* Ns */
+ { NULL, TYPE_Nx, MDOCF_CHILD }, /* Nx */
+ { NULL, TYPE_Ox, MDOCF_CHILD }, /* Ox */
+ { NULL, 0, 0 }, /* Pc */
+ { NULL, 0, 0 }, /* Pf */
+ { NULL, 0, 0 }, /* Po */
+ { NULL, 0, 0 }, /* Pq */
+ { NULL, 0, 0 }, /* Qc */
+ { NULL, 0, 0 }, /* Ql */
+ { NULL, 0, 0 }, /* Qo */
+ { NULL, 0, 0 }, /* Qq */
+ { NULL, 0, 0 }, /* Re */
+ { NULL, 0, 0 }, /* Rs */
+ { NULL, 0, 0 }, /* Sc */
+ { NULL, 0, 0 }, /* So */
+ { NULL, 0, 0 }, /* Sq */
+ { NULL, 0, 0 }, /* Sm */
+ { NULL, 0, 0 }, /* Sx */
+ { NULL, TYPE_Sy, MDOCF_CHILD }, /* Sy */
+ { NULL, TYPE_Tn, MDOCF_CHILD }, /* Tn */
+ { NULL, 0, 0 }, /* Ux */
+ { NULL, 0, 0 }, /* Xc */
+ { NULL, 0, 0 }, /* Xo */
+ { pmdoc_head, TYPE_Fn, 0 }, /* Fo */
+ { NULL, 0, 0 }, /* Fc */
+ { NULL, 0, 0 }, /* Oo */
+ { NULL, 0, 0 }, /* Oc */
+ { NULL, 0, 0 }, /* Bk */
+ { NULL, 0, 0 }, /* Ek */
+ { NULL, 0, 0 }, /* Bt */
+ { NULL, 0, 0 }, /* Hf */
+ { NULL, 0, 0 }, /* Fr */
+ { NULL, 0, 0 }, /* Ud */
+ { NULL, TYPE_Lb, MDOCF_CHILD }, /* Lb */
+ { NULL, 0, 0 }, /* Lp */
+ { NULL, TYPE_Lk, MDOCF_CHILD }, /* Lk */
+ { NULL, TYPE_Mt, MDOCF_CHILD }, /* Mt */
+ { NULL, 0, 0 }, /* Brq */
+ { NULL, 0, 0 }, /* Bro */
+ { NULL, 0, 0 }, /* Brc */
+ { NULL, 0, 0 }, /* %C */
+ { NULL, 0, 0 }, /* Es */
+ { NULL, 0, 0 }, /* En */
+ { NULL, TYPE_Dx, MDOCF_CHILD }, /* Dx */
+ { NULL, 0, 0 }, /* %Q */
+ { NULL, 0, 0 }, /* br */
+ { NULL, 0, 0 }, /* sp */
+ { NULL, 0, 0 }, /* %U */
+ { NULL, 0, 0 }, /* Ta */
+};
+
+static const char *progname;
+static int use_all; /* Use all directories and files. */
+static int verb; /* Output verbosity level. */
+static int warnings; /* Potential problems in manuals. */
+
+int
+main(int argc, char *argv[])
+{
+ struct mparse *mp; /* parse sequence */
+ struct manpaths dirs;
+ struct mdb mdb;
+ struct recs recs;
+ enum op op; /* current operation */
+ const char *dir;
+ int ch, i, flags;
+ char dirbuf[MAXPATHLEN];
+ DB *hash; /* temporary keyword hashtable */
+ BTREEINFO info; /* btree configuration */
+ size_t sz1, sz2;
+ struct buf buf, /* keyword buffer */
+ dbuf; /* description buffer */
+ struct of *of; /* list of files for processing */
+ extern int optind;
+ extern char *optarg;
+
+ progname = strrchr(argv[0], '/');
+ if (progname == NULL)
+ progname = argv[0];
+ else
+ ++progname;
+
+ memset(&dirs, 0, sizeof(struct manpaths));
+ memset(&mdb, 0, sizeof(struct mdb));
+ memset(&recs, 0, sizeof(struct recs));
+
+ of = NULL;
+ mp = NULL;
+ hash = NULL;
+ op = OP_DEFAULT;
+ dir = NULL;
+
+ while (-1 != (ch = getopt(argc, argv, "aC:d:tu:vW")))
+ switch (ch) {
+ case ('a'):
+ use_all = 1;
+ break;
+ case ('C'):
+ if (op) {
+ fprintf(stderr,
+ "-C: conflicting options\n");
+ goto usage;
+ }
+ dir = optarg;
+ op = OP_CONFFILE;
+ break;
+ case ('d'):
+ if (op) {
+ fprintf(stderr,
+ "-d: conflicting options\n");
+ goto usage;
+ }
+ dir = optarg;
+ op = OP_UPDATE;
+ break;
+ case ('t'):
+ dup2(STDOUT_FILENO, STDERR_FILENO);
+ if (op) {
+ fprintf(stderr,
+ "-t: conflicting options\n");
+ goto usage;
+ }
+ op = OP_TEST;
+ use_all = 1;
+ warnings = 1;
+ break;
+ case ('u'):
+ if (op) {
+ fprintf(stderr,
+ "-u: conflicting options\n");
+ goto usage;
+ }
+ dir = optarg;
+ op = OP_DELETE;
+ break;
+ case ('v'):
+ verb++;
+ break;
+ case ('W'):
+ warnings = 1;
+ break;
+ default:
+ goto usage;
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if (OP_CONFFILE == op && argc > 0) {
+ fprintf(stderr, "-C: too many arguments\n");
+ goto usage;
+ }
+
+ memset(&info, 0, sizeof(BTREEINFO));
+ info.lorder = 4321;
+ info.flags = R_DUP;
+
+ mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL);
+
+ memset(&buf, 0, sizeof(struct buf));
+ memset(&dbuf, 0, sizeof(struct buf));
+
+ buf.size = dbuf.size = MANDOC_BUFSZ;
+
+ buf.cp = mandoc_malloc(buf.size);
+ dbuf.cp = mandoc_malloc(dbuf.size);
+
+ if (OP_TEST == op) {
+ ofile_argbuild(argc, argv, &of, ".");
+ if (NULL == of)
+ goto out;
+ index_merge(of, mp, &dbuf, &buf,
+ hash, &mdb, &recs, ".");
+ goto out;
+ }
+
+ if (OP_UPDATE == op || OP_DELETE == op) {
+ strlcat(mdb.dbn, dir, MAXPATHLEN);
+ strlcat(mdb.dbn, "/", MAXPATHLEN);
+ sz1 = strlcat(mdb.dbn, MANDOC_DB, MAXPATHLEN);
+
+ strlcat(mdb.idxn, dir, MAXPATHLEN);
+ strlcat(mdb.idxn, "/", MAXPATHLEN);
+ sz2 = strlcat(mdb.idxn, MANDOC_IDX, MAXPATHLEN);
+
+ if (sz1 >= MAXPATHLEN || sz2 >= MAXPATHLEN) {
+ fprintf(stderr, "%s: path too long\n", dir);
+ exit((int)MANDOCLEVEL_BADARG);
+ }
+
+ flags = O_CREAT | O_RDWR;
+ mdb.db = dbopen(mdb.dbn, flags, 0644, DB_BTREE, &info);
+ mdb.idx = dbopen(mdb.idxn, flags, 0644, DB_RECNO, NULL);
+
+ if (NULL == mdb.db) {
+ perror(mdb.dbn);
+ exit((int)MANDOCLEVEL_SYSERR);
+ } else if (NULL == mdb.idx) {
+ perror(mdb.idxn);
+ exit((int)MANDOCLEVEL_SYSERR);
+ }
+
+ ofile_argbuild(argc, argv, &of, dir);
+
+ if (NULL == of)
+ goto out;
+
+ index_prune(of, &mdb, &recs, dir);
+
+ /*
+ * Go to the root of the respective manual tree.
+ * This must work or no manuals may be found (they're
+ * indexed relative to the root).
+ */
+
+ if (OP_UPDATE == op) {
+ if (-1 == chdir(dir)) {
+ perror(dir);
+ exit((int)MANDOCLEVEL_SYSERR);
+ }
+ index_merge(of, mp, &dbuf, &buf, hash,
+ &mdb, &recs, dir);
+ }
+
+ goto out;
+ }
+
+ /*
+ * Configure the directories we're going to scan.
+ * If we have command-line arguments, use them.
+ * If not, we use man(1)'s method (see mandocdb.8).
+ */
+
+ if (argc > 0) {
+ dirs.paths = mandoc_calloc(argc, sizeof(char *));
+ dirs.sz = argc;
+ for (i = 0; i < argc; i++)
+ dirs.paths[i] = mandoc_strdup(argv[i]);
+ } else
+ manpath_parse(&dirs, dir, NULL, NULL);
+
+ for (i = 0; i < dirs.sz; i++) {
+ /*
+ * Go to the root of the respective manual tree.
+ * This must work or no manuals may be found:
+ * They are indexed relative to the root.
+ */
+
+ if (-1 == chdir(dirs.paths[i])) {
+ perror(dirs.paths[i]);
+ exit((int)MANDOCLEVEL_SYSERR);
+ }
+
+ strlcpy(mdb.dbn, MANDOC_DB, MAXPATHLEN);
+ strlcpy(mdb.idxn, MANDOC_IDX, MAXPATHLEN);
+
+ flags = O_CREAT | O_TRUNC | O_RDWR;
+ mdb.db = dbopen(mdb.dbn, flags, 0644, DB_BTREE, &info);
+ mdb.idx = dbopen(mdb.idxn, flags, 0644, DB_RECNO, NULL);
+
+ if (NULL == mdb.db) {
+ perror(mdb.dbn);
+ exit((int)MANDOCLEVEL_SYSERR);
+ } else if (NULL == mdb.idx) {
+ perror(mdb.idxn);
+ exit((int)MANDOCLEVEL_SYSERR);
+ }
+
+ /*
+ * Search for manuals and fill the new database.
+ */
+
+ strlcpy(dirbuf, dirs.paths[i], MAXPATHLEN);
+ ofile_dirbuild(".", "", "", 0, &of, dirbuf);
+
+ if (NULL != of) {
+ index_merge(of, mp, &dbuf, &buf, hash,
+ &mdb, &recs, dirs.paths[i]);
+ ofile_free(of);
+ of = NULL;
+ }
+
+ (*mdb.db->close)(mdb.db);
+ (*mdb.idx->close)(mdb.idx);
+ mdb.db = NULL;
+ mdb.idx = NULL;
+ }
+
+out:
+ if (mdb.db)
+ (*mdb.db->close)(mdb.db);
+ if (mdb.idx)
+ (*mdb.idx->close)(mdb.idx);
+ if (hash)
+ (*hash->close)(hash);
+ if (mp)
+ mparse_free(mp);
+
+ manpath_free(&dirs);
+ ofile_free(of);
+ free(buf.cp);
+ free(dbuf.cp);
+ free(recs.stack);
+
+ return(MANDOCLEVEL_OK);
+
+usage:
+ fprintf(stderr,
+ "usage: %s [-av] [-C file] | dir ... | -t file ...\n"
+ " -d dir [file ...] | "
+ "-u dir [file ...]\n",
+ progname);
+
+ return((int)MANDOCLEVEL_BADARG);
+}
+
+void
+index_merge(const struct of *of, struct mparse *mp,
+ struct buf *dbuf, struct buf *buf, DB *hash,
+ struct mdb *mdb, struct recs *recs,
+ const char *basedir)
+{
+ recno_t rec;
+ int ch, skip;
+ DBT key, val;
+ DB *files; /* temporary file name table */
+ char emptystring[1] = {'\0'};
+ struct mdoc *mdoc;
+ struct man *man;
+ char *p;
+ const char *fn, *msec, *march, *mtitle;
+ uint64_t mask;
+ size_t sv;
+ unsigned seq;
+ uint64_t vbuf[2];
+ char type;
+
+ if (warnings) {
+ files = NULL;
+ hash_reset(&files);
+ }
+
+ rec = 0;
+ for (of = of->first; of; of = of->next) {
+ fn = of->fname;
+
+ /*
+ * Try interpreting the file as mdoc(7) or man(7)
+ * source code, unless it is already known to be
+ * formatted. Fall back to formatted mode.
+ */
+
+ mparse_reset(mp);
+ mdoc = NULL;
+ man = NULL;
+
+ if ((MANDOC_SRC & of->src_form ||
+ ! (MANDOC_FORM & of->src_form)) &&
+ MANDOCLEVEL_FATAL > mparse_readfd(mp, -1, fn))
+ mparse_result(mp, &mdoc, &man);
+
+ if (NULL != mdoc) {
+ msec = mdoc_meta(mdoc)->msec;
+ march = mdoc_meta(mdoc)->arch;
+ if (NULL == march)
+ march = "";
+ mtitle = mdoc_meta(mdoc)->title;
+ } else if (NULL != man) {
+ msec = man_meta(man)->msec;
+ march = "";
+ mtitle = man_meta(man)->title;
+ } else {
+ msec = of->sec;
+ march = of->arch;
+ mtitle = of->title;
+ }
+
+ /*
+ * Check whether the manual section given in a file
+ * agrees with the directory where the file is located.
+ * Some manuals have suffixes like (3p) on their
+ * section number either inside the file or in the
+ * directory name, some are linked into more than one
+ * section, like encrypt(1) = makekey(8). Do not skip
+ * manuals for such reasons.
+ */
+
+ skip = 0;
+ assert(of->sec);
+ assert(msec);
+ if (strcasecmp(msec, of->sec))
+ WARNING(fn, basedir, "Section \"%s\" manual "
+ "in \"%s\" directory", msec, of->sec);
+ /*
+ * Manual page directories exist for each kernel
+ * architecture as returned by machine(1).
+ * However, many manuals only depend on the
+ * application architecture as returned by arch(1).
+ * For example, some (2/ARM) manuals are shared
+ * across the "armish" and "zaurus" kernel
+ * architectures.
+ * A few manuals are even shared across completely
+ * different architectures, for example fdformat(1)
+ * on amd64, i386, sparc, and sparc64.
+ * Thus, warn about architecture mismatches,
+ * but don't skip manuals for this reason.
+ */
+
+ assert(of->arch);
+ assert(march);
+ if (strcasecmp(march, of->arch))
+ WARNING(fn, basedir, "Architecture \"%s\" "
+ "manual in \"%s\" directory",
+ march, of->arch);
+
+ /*
+ * By default, skip a file if the title given
+ * in the file disagrees with the file name.
+ * Do not warn, this happens for all MLINKs.
+ */
+
+ assert(of->title);
+ assert(mtitle);
+ if (strcasecmp(mtitle, of->title))
+ skip = 1;
+
+ /*
+ * Build a title string for the file. If it matches
+ * the location of the file, remember the title as
+ * found; else, remember it as missing.
+ */
+
+ if (warnings) {
+ buf->len = 0;
+ buf_appendb(buf, mtitle, strlen(mtitle));
+ buf_appendb(buf, "(", 1);
+ buf_appendb(buf, msec, strlen(msec));
+ if ('\0' != *march) {
+ buf_appendb(buf, "/", 1);
+ buf_appendb(buf, march, strlen(march));
+ }
+ buf_appendb(buf, ")", 2);
+ for (p = buf->cp; '\0' != *p; p++)
+ *p = tolower(*p);
+ key.data = buf->cp;
+ key.size = buf->len;
+ val.data = NULL;
+ val.size = 0;
+ if (0 == skip)
+ val.data = emptystring;
+ else {
+ ch = (*files->get)(files, &key, &val, 0);
+ if (ch < 0) {
+ perror("hash");
+ exit((int)MANDOCLEVEL_SYSERR);
+ } else if (ch > 0) {
+ val.data = (void *)fn;
+ val.size = strlen(fn) + 1;
+ } else
+ val.data = NULL;
+ }
+ if (NULL != val.data &&
+ (*files->put)(files, &key, &val, 0) < 0) {
+ perror("hash");
+ exit((int)MANDOCLEVEL_SYSERR);
+ }
+ }
+
+ if (skip && !use_all)
+ continue;
+
+ /*
+ * The index record value consists of a nil-terminated
+ * filename, a nil-terminated manual section, and a
+ * nil-terminated description. Use the actual
+ * location of the file, such that the user can find
+ * it with man(1). Since the description may not be
+ * set, we set a sentinel to see if we're going to
+ * write a nil byte in its place.
+ */
+
+ dbuf->len = 0;
+ type = mdoc ? 'd' : (man ? 'a' : 'c');
+ buf_appendb(dbuf, &type, 1);
+ buf_appendb(dbuf, fn, strlen(fn) + 1);
+ buf_appendb(dbuf, of->sec, strlen(of->sec) + 1);
+ buf_appendb(dbuf, of->title, strlen(of->title) + 1);
+ buf_appendb(dbuf, of->arch, strlen(of->arch) + 1);
+
+ sv = dbuf->len;
+
+ /*
+ * Collect keyword/mask pairs.
+ * Each pair will become a new btree node.
+ */
+
+ hash_reset(&hash);
+ if (mdoc)
+ pmdoc_node(hash, buf, dbuf,
+ mdoc_node(mdoc), mdoc_meta(mdoc));
+ else if (man)
+ pman_node(hash, buf, dbuf, man_node(man));
+ else
+ pformatted(hash, buf, dbuf, of, basedir);
+
+ /* Test mode, do not access any database. */
+
+ if (NULL == mdb->db || NULL == mdb->idx)
+ continue;
+
+ /*
+ * Make sure the file name is always registered
+ * as an .Nm search key.
+ */
+ buf->len = 0;
+ buf_append(buf, of->title);
+ hash_put(hash, buf, TYPE_Nm);
+
+ /*
+ * Reclaim an empty index record, if available.
+ * Use its record number for all new btree nodes.
+ */
+
+ if (recs->cur > 0) {
+ recs->cur--;
+ rec = recs->stack[(int)recs->cur];
+ } else if (recs->last > 0) {
+ rec = recs->last;
+ recs->last = 0;
+ } else
+ rec++;
+ vbuf[1] = htobe64(rec);
+
+ /*
+ * Copy from the in-memory hashtable of pending
+ * keyword/mask pairs into the database.
+ */
+
+ seq = R_FIRST;
+ while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) {
+ seq = R_NEXT;
+ assert(sizeof(uint64_t) == val.size);
+ memcpy(&mask, val.data, val.size);
+ vbuf[0] = htobe64(mask);
+ val.size = sizeof(vbuf);
+ val.data = &vbuf;
+ dbt_put(mdb->db, mdb->dbn, &key, &val);
+ }
+ if (ch < 0) {
+ perror("hash");
+ exit((int)MANDOCLEVEL_SYSERR);
+ }
+
+ /*
+ * Apply to the index. If we haven't had a description
+ * set, put an empty one in now.
+ */
+
+ if (dbuf->len == sv)
+ buf_appendb(dbuf, "", 1);
+
+ key.data = &rec;
+ key.size = sizeof(recno_t);
+
+ val.data = dbuf->cp;
+ val.size = dbuf->len;
+
+ if (verb)
+ printf("%s: Adding to index: %s\n", basedir, fn);
+
+ dbt_put(mdb->idx, mdb->idxn, &key, &val);
+ }
+
+ /*
+ * Iterate the remembered file titles and check that
+ * all files can be found by their main title.
+ */
+
+ if (warnings) {
+ seq = R_FIRST;
+ while (0 == (*files->seq)(files, &key, &val, seq)) {
+ seq = R_NEXT;
+ if (val.size)
+ WARNING((char *)val.data, basedir,
+ "Probably unreachable, title "
+ "is %s", (char *)key.data);
+ }
+ (*files->close)(files);
+ }
+}
+
+/*
+ * Scan through all entries in the index file `idx' and prune those
+ * entries in `ofile'.
+ * Pruning consists of removing from `db', then invalidating the entry
+ * in `idx' (zeroing its value size).
+ */
+static void
+index_prune(const struct of *ofile, struct mdb *mdb,
+ struct recs *recs, const char *basedir)
+{
+ const struct of *of;
+ const char *fn;
+ uint64_t vbuf[2];
+ unsigned seq, sseq;
+ DBT key, val;
+ int ch;
+
+ recs->cur = 0;
+ seq = R_FIRST;
+ while (0 == (ch = (*mdb->idx->seq)(mdb->idx, &key, &val, seq))) {
+ seq = R_NEXT;
+ assert(sizeof(recno_t) == key.size);
+ memcpy(&recs->last, key.data, key.size);
+
+ /* Deleted records are zero-sized. Skip them. */
+
+ if (0 == val.size)
+ goto cont;
+
+ /*
+ * Make sure we're sane.
+ * Read past our mdoc/man/cat type to the next string,
+ * then make sure it's bounded by a NUL.
+ * Failing any of these, we go into our error handler.
+ */
+
+ fn = (char *)val.data + 1;
+ if (NULL == memchr(fn, '\0', val.size - 1))
+ break;
+
+ /*
+ * Search for the file in those we care about.
+ * XXX: build this into a tree. Too slow.
+ */
+
+ for (of = ofile->first; of; of = of->next)
+ if (0 == strcmp(fn, of->fname))
+ break;
+
+ if (NULL == of)
+ continue;
+
+ /*
+ * Search through the keyword database, throwing out all
+ * references to our file.
+ */
+
+ sseq = R_FIRST;
+ while (0 == (ch = (*mdb->db->seq)(mdb->db,
+ &key, &val, sseq))) {
+ sseq = R_NEXT;
+ if (sizeof(vbuf) != val.size)
+ break;
+
+ memcpy(vbuf, val.data, val.size);
+ if (recs->last != betoh64(vbuf[1]))
+ continue;
+
+ if ((ch = (*mdb->db->del)(mdb->db,
+ &key, R_CURSOR)) < 0)
+ break;
+ }
+
+ if (ch < 0) {
+ perror(mdb->dbn);
+ exit((int)MANDOCLEVEL_SYSERR);
+ } else if (1 != ch) {
+ fprintf(stderr, "%s: corrupt database\n",
+ mdb->dbn);
+ exit((int)MANDOCLEVEL_SYSERR);
+ }
+
+ if (verb)
+ printf("%s: Deleting from index: %s\n",
+ basedir, fn);
+
+ val.size = 0;
+ ch = (*mdb->idx->put)(mdb->idx, &key, &val, R_CURSOR);
+
+ if (ch < 0)
+ break;
+cont:
+ if (recs->cur >= recs->size) {
+ recs->size += MANDOC_SLOP;
+ recs->stack = mandoc_realloc(recs->stack,
+ recs->size * sizeof(recno_t));
+ }
+
+ recs->stack[(int)recs->cur] = recs->last;
+ recs->cur++;
+ }
+
+ if (ch < 0) {
+ perror(mdb->idxn);
+ exit((int)MANDOCLEVEL_SYSERR);
+ } else if (1 != ch) {
+ fprintf(stderr, "%s: corrupt index\n", mdb->idxn);
+ exit((int)MANDOCLEVEL_SYSERR);
+ }
+
+ recs->last++;
+}
+
+/*
+ * Grow the buffer (if necessary) and copy in a binary string.
+ */
+static void
+buf_appendb(struct buf *buf, const void *cp, size_t sz)
+{
+
+ /* Overshoot by MANDOC_BUFSZ. */
+
+ while (buf->len + sz >= buf->size) {
+ buf->size = buf->len + sz + MANDOC_BUFSZ;
+ buf->cp = mandoc_realloc(buf->cp, buf->size);
+ }
+
+ memcpy(buf->cp + (int)buf->len, cp, sz);
+ buf->len += sz;
+}
+
+/*
+ * Append a nil-terminated string to the buffer.
+ * This can be invoked multiple times.
+ * The buffer string will be nil-terminated.
+ * If invoked multiple times, a space is put between strings.
+ */
+static void
+buf_append(struct buf *buf, const char *cp)
+{
+ size_t sz;
+
+ if (0 == (sz = strlen(cp)))
+ return;
+
+ if (buf->len)
+ buf->cp[(int)buf->len - 1] = ' ';
+
+ buf_appendb(buf, cp, sz + 1);
+}
+
+/*
+ * Recursively add all text from a given node.
+ * This is optimised for general mdoc nodes in this context, which do
+ * not consist of subexpressions and having a recursive call for n->next
+ * would be wasteful.
+ * The "f" variable should be 0 unless called from pmdoc_Nd for the
+ * description buffer, which does not start at the beginning of the
+ * buffer.
+ */
+static void
+buf_appendmdoc(struct buf *buf, const struct mdoc_node *n, int f)
+{
+
+ for ( ; n; n = n->next) {
+ if (n->child)
+ buf_appendmdoc(buf, n->child, f);
+
+ if (MDOC_TEXT == n->type && f) {
+ f = 0;
+ buf_appendb(buf, n->string,
+ strlen(n->string) + 1);
+ } else if (MDOC_TEXT == n->type)
+ buf_append(buf, n->string);
+
+ }
+}
+
+static void
+hash_reset(DB **db)
+{
+ DB *hash;
+
+ if (NULL != (hash = *db))
+ (*hash->close)(hash);
+
+ *db = dbopen(NULL, O_CREAT|O_RDWR, 0644, DB_HASH, NULL);
+ if (NULL == *db) {
+ perror("hash");
+ exit((int)MANDOCLEVEL_SYSERR);
+ }
+}
+
+/* ARGSUSED */
+static int
+pmdoc_head(MDOC_ARGS)
+{
+
+ return(MDOC_HEAD == n->type);
+}
+
+/* ARGSUSED */
+static int
+pmdoc_body(MDOC_ARGS)
+{
+
+ return(MDOC_BODY == n->type);
+}
+
+/* ARGSUSED */
+static int
+pmdoc_Fd(MDOC_ARGS)
+{
+ const char *start, *end;
+ size_t sz;
+
+ if (SEC_SYNOPSIS != n->sec)
+ return(0);
+ if (NULL == (n = n->child) || MDOC_TEXT != n->type)
+ return(0);
+
+ /*
+ * Only consider those `Fd' macro fields that begin with an
+ * "inclusion" token (versus, e.g., #define).
+ */
+ if (strcmp("#include", n->string))
+ return(0);
+
+ if (NULL == (n = n->next) || MDOC_TEXT != n->type)
+ return(0);
+
+ /*
+ * Strip away the enclosing angle brackets and make sure we're
+ * not zero-length.
+ */
+
+ start = n->string;
+ if ('<' == *start || '"' == *start)
+ start++;
+
+ if (0 == (sz = strlen(start)))
+ return(0);
+
+ end = &start[(int)sz - 1];
+ if ('>' == *end || '"' == *end)
+ end--;
+
+ assert(end >= start);
+
+ buf_appendb(buf, start, (size_t)(end - start + 1));
+ buf_appendb(buf, "", 1);
+ return(1);
+}
+
+/* ARGSUSED */
+static int
+pmdoc_In(MDOC_ARGS)
+{
+
+ if (NULL == n->child || MDOC_TEXT != n->child->type)
+ return(0);
+
+ buf_append(buf, n->child->string);
+ return(1);
+}
+
+/* ARGSUSED */
+static int
+pmdoc_Fn(MDOC_ARGS)
+{
+ struct mdoc_node *nn;
+ const char *cp;
+
+ nn = n->child;
+
+ if (NULL == nn || MDOC_TEXT != nn->type)
+ return(0);
+
+ /* .Fn "struct type *name" "char *arg" */
+
+ cp = strrchr(nn->string, ' ');
+ if (NULL == cp)
+ cp = nn->string;
+
+ /* Strip away pointer symbol. */
+
+ while ('*' == *cp)
+ cp++;
+
+ /* Store the function name. */
+
+ buf_append(buf, cp);
+ hash_put(hash, buf, TYPE_Fn);
+
+ /* Store the function type. */
+
+ if (nn->string < cp) {
+ buf->len = 0;
+ buf_appendb(buf, nn->string, cp - nn->string);
+ buf_appendb(buf, "", 1);
+ hash_put(hash, buf, TYPE_Ft);
+ }
+
+ /* Store the arguments. */
+
+ for (nn = nn->next; nn; nn = nn->next) {
+ if (MDOC_TEXT != nn->type)
+ continue;
+ buf->len = 0;
+ buf_append(buf, nn->string);
+ hash_put(hash, buf, TYPE_Fa);
+ }
+
+ return(0);
+}
+
+/* ARGSUSED */
+static int
+pmdoc_St(MDOC_ARGS)
+{
+
+ if (NULL == n->child || MDOC_TEXT != n->child->type)
+ return(0);
+
+ buf_append(buf, n->child->string);
+ return(1);
+}
+
+/* ARGSUSED */
+static int
+pmdoc_Xr(MDOC_ARGS)
+{
+
+ if (NULL == (n = n->child))
+ return(0);
+
+ buf_appendb(buf, n->string, strlen(n->string));
+
+ if (NULL != (n = n->next)) {
+ buf_appendb(buf, ".", 1);
+ buf_appendb(buf, n->string, strlen(n->string) + 1);
+ } else
+ buf_appendb(buf, ".", 2);
+
+ return(1);
+}
+
+/* ARGSUSED */
+static int
+pmdoc_Nd(MDOC_ARGS)
+{
+
+ if (MDOC_BODY != n->type)
+ return(0);
+
+ buf_appendmdoc(dbuf, n->child, 1);
+ return(1);
+}
+
+/* ARGSUSED */
+static int
+pmdoc_Nm(MDOC_ARGS)
+{
+
+ if (SEC_NAME == n->sec)
+ return(1);
+ else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
+ return(0);
+
+ if (NULL == n->child)
+ buf_append(buf, m->name);
+
+ return(1);
+}
+
+/* ARGSUSED */
+static int
+pmdoc_Sh(MDOC_ARGS)
+{
+
+ return(SEC_CUSTOM == n->sec && MDOC_HEAD == n->type);
+}
+
+static void
+hash_put(DB *db, const struct buf *buf, uint64_t mask)
+{
+ uint64_t oldmask;
+ DBT key, val;
+ int rc;
+
+ if (buf->len < 2)
+ return;
+
+ key.data = buf->cp;
+ key.size = buf->len;
+
+ if ((rc = (*db->get)(db, &key, &val, 0)) < 0) {
+ perror("hash");
+ exit((int)MANDOCLEVEL_SYSERR);
+ } else if (0 == rc) {
+ assert(sizeof(uint64_t) == val.size);
+ memcpy(&oldmask, val.data, val.size);
+ mask |= oldmask;
+ }
+
+ val.data = &mask;
+ val.size = sizeof(uint64_t);
+
+ if ((rc = (*db->put)(db, &key, &val, 0)) < 0) {
+ perror("hash");
+ exit((int)MANDOCLEVEL_SYSERR);
+ }
+}
+
+static void
+dbt_put(DB *db, const char *dbn, DBT *key, DBT *val)
+{
+
+ assert(key->size);
+ assert(val->size);
+
+ if (0 == (*db->put)(db, key, val, 0))
+ return;
+
+ perror(dbn);
+ exit((int)MANDOCLEVEL_SYSERR);
+ /* NOTREACHED */
+}
+
+/*
+ * Call out to per-macro handlers after clearing the persistent database
+ * key. If the macro sets the database key, flush it to the database.
+ */
+static void
+pmdoc_node(MDOC_ARGS)
+{
+
+ if (NULL == n)
+ return;
+
+ switch (n->type) {
+ case (MDOC_HEAD):
+ /* FALLTHROUGH */
+ case (MDOC_BODY):
+ /* FALLTHROUGH */
+ case (MDOC_TAIL):
+ /* FALLTHROUGH */
+ case (MDOC_BLOCK):
+ /* FALLTHROUGH */
+ case (MDOC_ELEM):
+ buf->len = 0;
+
+ /*
+ * Both NULL handlers and handlers returning true
+ * request using the data. Only skip the element
+ * when the handler returns false.
+ */
+
+ if (NULL != mdocs[n->tok].fp &&
+ 0 == (*mdocs[n->tok].fp)(hash, buf, dbuf, n, m))
+ break;
+
+ /*
+ * For many macros, use the text from all children.
+ * Set zero flags for macros not needing this.
+ * In that case, the handler must fill the buffer.
+ */
+
+ if (MDOCF_CHILD & mdocs[n->tok].flags)
+ buf_appendmdoc(buf, n->child, 0);
+
+ /*
+ * Cover the most common case:
+ * Automatically stage one string per element.
+ * Set a zero mask for macros not needing this.
+ * Additional staging can be done in the handler.
+ */
+
+ if (mdocs[n->tok].mask)
+ hash_put(hash, buf, mdocs[n->tok].mask);
+ break;
+ default:
+ break;
+ }
+
+ pmdoc_node(hash, buf, dbuf, n->child, m);
+ pmdoc_node(hash, buf, dbuf, n->next, m);
+}
+
+static int
+pman_node(MAN_ARGS)
+{
+ const struct man_node *head, *body;
+ char *start, *sv, *title;
+ size_t sz, titlesz;
+
+ if (NULL == n)
+ return(0);
+
+ /*
+ * We're only searching for one thing: the first text child in
+ * the BODY of a NAME section. Since we don't keep track of
+ * sections in -man, run some hoops to find out whether we're in
+ * the correct section or not.
+ */
+
+ if (MAN_BODY == n->type && MAN_SH == n->tok) {
+ body = n;
+ assert(body->parent);
+ if (NULL != (head = body->parent->head) &&
+ 1 == head->nchild &&
+ NULL != (head = (head->child)) &&
+ MAN_TEXT == head->type &&
+ 0 == strcmp(head->string, "NAME") &&
+ NULL != (body = body->child) &&
+ MAN_TEXT == body->type) {
+
+ title = NULL;
+ titlesz = 0;
+ /*
+ * Suck the entire NAME section into memory.
+ * Yes, we might run away.
+ * But too many manuals have big, spread-out
+ * NAME sections over many lines.
+ */
+ for ( ; NULL != body; body = body->next) {
+ if (MAN_TEXT != body->type)
+ break;
+ if (0 == (sz = strlen(body->string)))
+ continue;
+ title = mandoc_realloc
+ (title, titlesz + sz + 1);
+ memcpy(title + titlesz, body->string, sz);
+ titlesz += sz + 1;
+ title[(int)titlesz - 1] = ' ';
+ }
+ if (NULL == title)
+ return(0);
+
+ title = mandoc_realloc(title, titlesz + 1);
+ title[(int)titlesz] = '\0';
+
+ /* Skip leading space. */
+
+ sv = title;
+ while (isspace((unsigned char)*sv))
+ sv++;
+
+ if (0 == (sz = strlen(sv))) {
+ free(title);
+ return(0);
+ }
+
+ /* Erase trailing space. */
+
+ start = &sv[sz - 1];
+ while (start > sv && isspace((unsigned char)*start))
+ *start-- = '\0';
+
+ if (start == sv) {
+ free(title);
+ return(0);
+ }
+
+ start = sv;
+
+ /*
+ * Go through a special heuristic dance here.
+ * This is why -man manuals are great!
+ * (I'm being sarcastic: my eyes are bleeding.)
+ * Conventionally, one or more manual names are
+ * comma-specified prior to a whitespace, then a
+ * dash, then a description. Try to puzzle out
+ * the name parts here.
+ */
+
+ for ( ;; ) {
+ sz = strcspn(start, " ,");
+ if ('\0' == start[(int)sz])
+ break;
+
+ buf->len = 0;
+ buf_appendb(buf, start, sz);
+ buf_appendb(buf, "", 1);
+
+ hash_put(hash, buf, TYPE_Nm);
+
+ if (' ' == start[(int)sz]) {
+ start += (int)sz + 1;
+ break;
+ }
+
+ assert(',' == start[(int)sz]);
+ start += (int)sz + 1;
+ while (' ' == *start)
+ start++;
+ }
+
+ buf->len = 0;
+
+ if (sv == start) {
+ buf_append(buf, start);
+ free(title);
+ return(1);
+ }
+
+ while (isspace((unsigned char)*start))
+ start++;
+
+ if (0 == strncmp(start, "-", 1))
+ start += 1;
+ else if (0 == strncmp(start, "\\-\\-", 4))
+ start += 4;
+ else if (0 == strncmp(start, "\\-", 2))
+ start += 2;
+ else if (0 == strncmp(start, "\\(en", 4))
+ start += 4;
+ else if (0 == strncmp(start, "\\(em", 4))
+ start += 4;
+
+ while (' ' == *start)
+ start++;
+
+ sz = strlen(start) + 1;
+ buf_appendb(dbuf, start, sz);
+ buf_appendb(buf, start, sz);
+
+ hash_put(hash, buf, TYPE_Nd);
+ free(title);
+ }
+ }
+
+ for (n = n->child; n; n = n->next)
+ if (pman_node(hash, buf, dbuf, n))
+ return(1);
+
+ return(0);
+}
+
+/*
+ * Parse a formatted manual page.
+ * By necessity, this involves rather crude guesswork.
+ */
+static void
+pformatted(DB *hash, struct buf *buf, struct buf *dbuf,
+ const struct of *of, const char *basedir)
+{
+ FILE *stream;
+ char *line, *p, *title;
+ size_t len, plen, titlesz;
+
+ if (NULL == (stream = fopen(of->fname, "r"))) {
+ WARNING(of->fname, basedir, "%s", strerror(errno));
+ return;
+ }
+
+ /*
+ * Always use the title derived from the filename up front,
+ * do not even try to find it in the file. This also makes
+ * sure we don't end up with an orphan index record, even if
+ * the file content turns out to be completely unintelligible.
+ */
+
+ buf->len = 0;
+ buf_append(buf, of->title);
+ hash_put(hash, buf, TYPE_Nm);
+
+ /* Skip to first blank line. */
+
+ while (NULL != (line = fgetln(stream, &len)))
+ if ('\n' == *line)
+ break;
+
+ /*
+ * Assume the first line that is not indented
+ * is the first section header. Skip to it.
+ */
+
+ while (NULL != (line = fgetln(stream, &len)))
+ if ('\n' != *line && ' ' != *line)
+ break;
+
+ /*
+ * Read up until the next section into a buffer.
+ * Strip the leading and trailing newline from each read line,
+ * appending a trailing space.
+ * Ignore empty (whitespace-only) lines.
+ */
+
+ titlesz = 0;
+ title = NULL;
+
+ while (NULL != (line = fgetln(stream, &len))) {
+ if (' ' != *line || '\n' != line[(int)len - 1])
+ break;
+ while (len > 0 && isspace((unsigned char)*line)) {
+ line++;
+ len--;
+ }
+ if (1 == len)
+ continue;
+ title = mandoc_realloc(title, titlesz + len);
+ memcpy(title + titlesz, line, len);
+ titlesz += len;
+ title[(int)titlesz - 1] = ' ';
+ }
+
+ /*
+ * If no page content can be found, or the input line
+ * is already the next section header, or there is no
+ * trailing newline, reuse the page title as the page
+ * description.
+ */
+
+ if (NULL == title || '\0' == *title) {
+ WARNING(of->fname, basedir,
+ "Cannot find NAME section");
+ buf_appendb(dbuf, buf->cp, buf->size);
+ hash_put(hash, buf, TYPE_Nd);
+ fclose(stream);
+ free(title);
+ return;
+ }
+
+ title = mandoc_realloc(title, titlesz + 1);
+ title[(int)titlesz] = '\0';
+
+ /*
+ * Skip to the first dash.
+ * Use the remaining line as the description (no more than 70
+ * bytes).
+ */
+
+ if (NULL != (p = strstr(title, "- "))) {
+ for (p += 2; ' ' == *p || '\b' == *p; p++)
+ /* Skip to next word. */ ;
+ } else {
+ WARNING(of->fname, basedir,
+ "No dash in title line");
+ p = title;
+ }
+
+ plen = strlen(p);
+
+ /* Strip backspace-encoding from line. */
+
+ while (NULL != (line = memchr(p, '\b', plen))) {
+ len = line - p;
+ if (0 == len) {
+ memmove(line, line + 1, plen--);
+ continue;
+ }
+ memmove(line - 1, line + 1, plen - len);
+ plen -= 2;
+ }
+
+ buf_appendb(dbuf, p, plen + 1);
+ buf->len = 0;
+ buf_appendb(buf, p, plen + 1);
+ hash_put(hash, buf, TYPE_Nd);
+ fclose(stream);
+ free(title);
+}
+
+static void
+ofile_argbuild(int argc, char *argv[],
+ struct of **of, const char *basedir)
+{
+ char buf[MAXPATHLEN];
+ const char *sec, *arch, *title;
+ char *p;
+ int i, src_form;
+ struct of *nof;
+
+ for (i = 0; i < argc; i++) {
+
+ /*
+ * Try to infer the manual section, architecture and
+ * page title from the path, assuming it looks like
+ * man*[/<arch>]/<title>.<section> or
+ * cat<section>[/<arch>]/<title>.0
+ */
+
+ if (strlcpy(buf, argv[i], sizeof(buf)) >= sizeof(buf)) {
+ fprintf(stderr, "%s: Path too long\n", argv[i]);
+ continue;
+ }
+ sec = arch = title = "";
+ src_form = 0;
+ p = strrchr(buf, '\0');
+ while (p-- > buf) {
+ if ('\0' == *sec && '.' == *p) {
+ sec = p + 1;
+ *p = '\0';
+ if ('0' == *sec)
+ src_form |= MANDOC_FORM;
+ else if ('1' <= *sec && '9' >= *sec)
+ src_form |= MANDOC_SRC;
+ continue;
+ }
+ if ('/' != *p)
+ continue;
+ if ('\0' == *title) {
+ title = p + 1;
+ *p = '\0';
+ continue;
+ }
+ if (0 == strncmp("man", p + 1, 3))
+ src_form |= MANDOC_SRC;
+ else if (0 == strncmp("cat", p + 1, 3))
+ src_form |= MANDOC_FORM;
+ else
+ arch = p + 1;
+ break;
+ }
+ if ('\0' == *title) {
+ WARNING(argv[i], basedir,
+ "Cannot deduce title from filename");
+ title = buf;
+ }
+
+ /*
+ * Build the file structure.
+ */
+
+ nof = mandoc_calloc(1, sizeof(struct of));
+ nof->fname = mandoc_strdup(argv[i]);
+ nof->sec = mandoc_strdup(sec);
+ nof->arch = mandoc_strdup(arch);
+ nof->title = mandoc_strdup(title);
+ nof->src_form = src_form;
+
+ /*
+ * Add the structure to the list.
+ */
+
+ if (NULL == *of) {
+ *of = nof;
+ (*of)->first = nof;
+ } else {
+ nof->first = (*of)->first;
+ (*of)->next = nof;
+ *of = nof;
+ }
+ }
+}
+
+/*
+ * Recursively build up a list of files to parse.
+ * We use this instead of ftw() and so on because I don't want global
+ * variables hanging around.
+ * This ignores the mandocdb.db and mandocdb.index files, but assumes that
+ * everything else is a manual.
+ * Pass in a pointer to a NULL structure for the first invocation.
+ */
+static void
+ofile_dirbuild(const char *dir, const char* psec, const char *parch,
+ int p_src_form, struct of **of, char *basedir)
+{
+ char buf[MAXPATHLEN];
+ size_t sz;
+ DIR *d;
+ const char *fn, *sec, *arch;
+ char *p, *q, *suffix;
+ struct of *nof;
+ struct dirent *dp;
+ int src_form;
+
+ if (NULL == (d = opendir(dir))) {
+ WARNING("", dir, "%s", strerror(errno));
+ return;
+ }
+
+ while (NULL != (dp = readdir(d))) {
+ fn = dp->d_name;
+
+ if ('.' == *fn)
+ continue;
+
+ src_form = p_src_form;
+
+ if (DT_DIR == dp->d_type) {
+ sec = psec;
+ arch = parch;
+
+ /*
+ * By default, only use directories called:
+ * man<section>/[<arch>/] or
+ * cat<section>/[<arch>/]
+ */
+
+ if ('\0' == *sec) {
+ if(0 == strncmp("man", fn, 3)) {
+ src_form |= MANDOC_SRC;
+ sec = fn + 3;
+ } else if (0 == strncmp("cat", fn, 3)) {
+ src_form |= MANDOC_FORM;
+ sec = fn + 3;
+ } else {
+ WARNING(fn, basedir, "Bad section");
+ if (use_all)
+ sec = fn;
+ else
+ continue;
+ }
+ } else if ('\0' == *arch) {
+ if (NULL != strchr(fn, '.')) {
+ WARNING(fn, basedir, "Bad architecture");
+ if (0 == use_all)
+ continue;
+ }
+ arch = fn;
+ } else {
+ WARNING(fn, basedir, "Excessive subdirectory");
+ if (0 == use_all)
+ continue;
+ }
+
+ buf[0] = '\0';
+ strlcat(buf, dir, MAXPATHLEN);
+ strlcat(buf, "/", MAXPATHLEN);
+ strlcat(basedir, "/", MAXPATHLEN);
+ strlcat(basedir, fn, MAXPATHLEN);
+ sz = strlcat(buf, fn, MAXPATHLEN);
+
+ if (MAXPATHLEN <= sz) {
+ WARNING(fn, basedir, "Path too long");
+ continue;
+ }
+
+ ofile_dirbuild(buf, sec, arch,
+ src_form, of, basedir);
+
+ p = strrchr(basedir, '/');
+ *p = '\0';
+ continue;
+ }
+
+ if (DT_REG != dp->d_type) {
+ WARNING(fn, basedir, "Not a regular file");
+ continue;
+ }
+ if (!strcmp(MANDOC_DB, fn) || !strcmp(MANDOC_IDX, fn))
+ continue;
+ if ('\0' == *psec) {
+ WARNING(fn, basedir, "File outside section");
+ if (0 == use_all)
+ continue;
+ }
+
+ /*
+ * By default, skip files where the file name suffix
+ * does not agree with the section directory
+ * they are located in.
+ */
+
+ suffix = strrchr(fn, '.');
+ if (NULL == suffix) {
+ WARNING(fn, basedir, "No filename suffix");
+ if (0 == use_all)
+ continue;
+ } else if ((MANDOC_SRC & src_form &&
+ strcmp(suffix + 1, psec)) ||
+ (MANDOC_FORM & src_form &&
+ strcmp(suffix + 1, "0"))) {
+ WARNING(fn, basedir, "Wrong filename suffix");
+ if (0 == use_all)
+ continue;
+ if ('0' == suffix[1])
+ src_form |= MANDOC_FORM;
+ else if ('1' <= suffix[1] && '9' >= suffix[1])
+ src_form |= MANDOC_SRC;
+ }
+
+ /*
+ * Skip formatted manuals if a source version is
+ * available. Ignore the age: it is very unlikely
+ * that people install newer formatted base manuals
+ * when they used to have source manuals before,
+ * and in ports, old manuals get removed on update.
+ */
+ if (0 == use_all && MANDOC_FORM & src_form &&
+ '\0' != *psec) {
+ buf[0] = '\0';
+ strlcat(buf, dir, MAXPATHLEN);
+ p = strrchr(buf, '/');
+ if ('\0' != *parch && NULL != p)
+ for (p--; p > buf; p--)
+ if ('/' == *p)
+ break;
+ if (NULL == p)
+ p = buf;
+ else
+ p++;
+ if (0 == strncmp("cat", p, 3))
+ memcpy(p, "man", 3);
+ strlcat(buf, "/", MAXPATHLEN);
+ sz = strlcat(buf, fn, MAXPATHLEN);
+ if (sz >= MAXPATHLEN) {
+ WARNING(fn, basedir, "Path too long");
+ continue;
+ }
+ q = strrchr(buf, '.');
+ if (NULL != q && p < q++) {
+ *q = '\0';
+ sz = strlcat(buf, psec, MAXPATHLEN);
+ if (sz >= MAXPATHLEN) {
+ WARNING(fn, basedir, "Path too long");
+ continue;
+ }
+ if (0 == access(buf, R_OK))
+ continue;
+ }
+ }
+
+ buf[0] = '\0';
+ assert('.' == dir[0]);
+ if ('/' == dir[1]) {
+ strlcat(buf, dir + 2, MAXPATHLEN);
+ strlcat(buf, "/", MAXPATHLEN);
+ }
+ sz = strlcat(buf, fn, MAXPATHLEN);
+ if (sz >= MAXPATHLEN) {
+ WARNING(fn, basedir, "Path too long");
+ continue;
+ }
+
+ nof = mandoc_calloc(1, sizeof(struct of));
+ nof->fname = mandoc_strdup(buf);
+ nof->sec = mandoc_strdup(psec);
+ nof->arch = mandoc_strdup(parch);
+ nof->src_form = src_form;
+
+ /*
+ * Remember the file name without the extension,
+ * to be used as the page title in the database.
+ */
+
+ if (NULL != suffix)
+ *suffix = '\0';
+ nof->title = mandoc_strdup(fn);
+
+ /*
+ * Add the structure to the list.
+ */
+
+ if (NULL == *of) {
+ *of = nof;
+ (*of)->first = nof;
+ } else {
+ nof->first = (*of)->first;
+ (*of)->next = nof;
+ *of = nof;
+ }
+ }
+
+ closedir(d);
+}
+
+static void
+ofile_free(struct of *of)
+{
+ struct of *nof;
+
+ if (NULL != of)
+ of = of->first;
+
+ while (NULL != of) {
+ nof = of->next;
+ free(of->fname);
+ free(of->sec);
+ free(of->arch);
+ free(of->title);
+ free(of);
+ of = nof;
+ }
+}
diff --git a/mandocdb.h b/mandocdb.h
new file mode 100644
index 000000000000..bda0536e793f
--- /dev/null
+++ b/mandocdb.h
@@ -0,0 +1,62 @@
+/* $Id: mandocdb.h,v 1.6 2012/03/23 02:52:33 kristaps Exp $ */
+/*
+ * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifndef MANDOCDB_H
+#define MANDOCDB_H
+
+#define MANDOC_DB "mandocdb.db"
+#define MANDOC_IDX "mandocdb.index"
+
+#define TYPE_An 0x0000000000000001ULL
+#define TYPE_Ar 0x0000000000000002ULL
+#define TYPE_At 0x0000000000000004ULL
+#define TYPE_Bsx 0x0000000000000008ULL
+#define TYPE_Bx 0x0000000000000010ULL
+#define TYPE_Cd 0x0000000000000020ULL
+#define TYPE_Cm 0x0000000000000040ULL
+#define TYPE_Dv 0x0000000000000080ULL
+#define TYPE_Dx 0x0000000000000100ULL
+#define TYPE_Em 0x0000000000000200ULL
+#define TYPE_Er 0x0000000000000400ULL
+#define TYPE_Ev 0x0000000000000800ULL
+#define TYPE_Fa 0x0000000000001000ULL
+#define TYPE_Fl 0x0000000000002000ULL
+#define TYPE_Fn 0x0000000000004000ULL
+#define TYPE_Ft 0x0000000000008000ULL
+#define TYPE_Fx 0x0000000000010000ULL
+#define TYPE_Ic 0x0000000000020000ULL
+#define TYPE_In 0x0000000000040000ULL
+#define TYPE_Lb 0x0000000000080000ULL
+#define TYPE_Li 0x0000000000100000ULL
+#define TYPE_Lk 0x0000000000200000ULL
+#define TYPE_Ms 0x0000000000400000ULL
+#define TYPE_Mt 0x0000000000800000ULL
+#define TYPE_Nd 0x0000000001000000ULL
+#define TYPE_Nm 0x0000000002000000ULL
+#define TYPE_Nx 0x0000000004000000ULL
+#define TYPE_Ox 0x0000000008000000ULL
+#define TYPE_Pa 0x0000000010000000ULL
+#define TYPE_Rs 0x0000000020000000ULL
+#define TYPE_Sh 0x0000000040000000ULL
+#define TYPE_Ss 0x0000000080000000ULL
+#define TYPE_St 0x0000000100000000ULL
+#define TYPE_Sy 0x0000000200000000ULL
+#define TYPE_Tn 0x0000000400000000ULL
+#define TYPE_Va 0x0000000800000000ULL
+#define TYPE_Vt 0x0000001000000000ULL
+#define TYPE_Xr 0x0000002000000000ULL
+
+#endif /*!MANDOCDB_H */
diff --git a/manpath.c b/manpath.c
new file mode 100644
index 000000000000..b6d9574e3070
--- /dev/null
+++ b/manpath.c
@@ -0,0 +1,225 @@
+/* $Id: manpath.c,v 1.8 2011/12/24 22:37:16 kristaps Exp $ */
+/*
+ * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/param.h>
+
+#include <assert.h>
+#include <ctype.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "mandoc.h"
+#include "manpath.h"
+
+#define MAN_CONF_FILE "/etc/man.conf"
+#define MAN_CONF_KEY "_whatdb"
+
+static void manpath_add(struct manpaths *, const char *);
+static void manpath_parseline(struct manpaths *, char *);
+
+void
+manpath_parse(struct manpaths *dirs, const char *file,
+ char *defp, char *auxp)
+{
+#ifdef USE_MANPATH
+ char cmd[(MAXPATHLEN * 3) + 20];
+ FILE *stream;
+ char *buf;
+ size_t sz, bsz;
+
+ strlcpy(cmd, "manpath", sizeof(cmd));
+ if (file) {
+ strlcat(cmd, " -C ", sizeof(cmd));
+ strlcat(cmd, file, sizeof(cmd));
+ }
+ if (auxp) {
+ strlcat(cmd, " -m ", sizeof(cmd));
+ strlcat(cmd, auxp, sizeof(cmd));
+ }
+ if (defp) {
+ strlcat(cmd, " -M ", sizeof(cmd));
+ strlcat(cmd, defp, sizeof(cmd));
+ }
+
+ /* Open manpath(1). Ignore errors. */
+
+ stream = popen(cmd, "r");
+ if (NULL == stream)
+ return;
+
+ buf = NULL;
+ bsz = 0;
+
+ /* Read in as much output as we can. */
+
+ do {
+ buf = mandoc_realloc(buf, bsz + 1024);
+ sz = fread(buf + (int)bsz, 1, 1024, stream);
+ bsz += sz;
+ } while (sz > 0);
+
+ if ( ! ferror(stream) && feof(stream) &&
+ bsz && '\n' == buf[bsz - 1]) {
+ buf[bsz - 1] = '\0';
+ manpath_parseline(dirs, buf);
+ }
+
+ free(buf);
+ pclose(stream);
+#else
+ char *insert;
+
+ /* Always prepend -m. */
+ manpath_parseline(dirs, auxp);
+
+ /* If -M is given, it overrides everything else. */
+ if (NULL != defp) {
+ manpath_parseline(dirs, defp);
+ return;
+ }
+
+ /* MANPATH and man.conf(5) cooperate. */
+ defp = getenv("MANPATH");
+ if (NULL == file)
+ file = MAN_CONF_FILE;
+
+ /* No MANPATH; use man.conf(5) only. */
+ if (NULL == defp || '\0' == defp[0]) {
+ manpath_manconf(dirs, file);
+ return;
+ }
+
+ /* Prepend man.conf(5) to MANPATH. */
+ if (':' == defp[0]) {
+ manpath_manconf(dirs, file);
+ manpath_parseline(dirs, defp);
+ return;
+ }
+
+ /* Append man.conf(5) to MANPATH. */
+ if (':' == defp[(int)strlen(defp) - 1]) {
+ manpath_parseline(dirs, defp);
+ manpath_manconf(dirs, file);
+ return;
+ }
+
+ /* Insert man.conf(5) into MANPATH. */
+ insert = strstr(defp, "::");
+ if (NULL != insert) {
+ *insert++ = '\0';
+ manpath_parseline(dirs, defp);
+ manpath_manconf(dirs, file);
+ manpath_parseline(dirs, insert + 1);
+ return;
+ }
+
+ /* MANPATH overrides man.conf(5) completely. */
+ manpath_parseline(dirs, defp);