aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBaptiste Daroussin <bapt@FreeBSD.org>2014-11-22 18:08:25 +0000
committerBaptiste Daroussin <bapt@FreeBSD.org>2014-11-22 18:08:25 +0000
commit52c0e9552d2c7c67a39132a9eb3dc5a876a7429e (patch)
tree7ef9f92f249a503d1c005f88d483481725db364a
parent53f5f26c3e5c6d5c310f44f1882afbd9cdd0297b (diff)
downloadsrc-52c0e9552d2c7c67a39132a9eb3dc5a876a7429e.tar.gz
src-52c0e9552d2c7c67a39132a9eb3dc5a876a7429e.zip
Import mandoc 1.13.1vendor/mandoc/1.13.1
Notes
Notes: svn path=/vendor/mdocml/dist/; revision=274876 svn path=/vendor/mdocml/1.13.1/; revision=274877; tag=vendor/mandoc/1.13.1
-rw-r--r--INSTALL187
-rw-r--r--LICENSE44
-rw-r--r--Makefile507
-rw-r--r--Makefile.depend70
-rw-r--r--NEWS82
-rw-r--r--TODO175
-rw-r--r--apropos.1220
-rw-r--r--apropos.c117
-rw-r--r--apropos_db.c884
-rw-r--r--apropos_db.h73
-rw-r--r--arch.c8
-rw-r--r--arch.in6
-rw-r--r--att.c8
-rw-r--r--catman.8111
-rw-r--r--catman.c509
-rw-r--r--cgi.c1267
-rw-r--r--cgi.h.example9
-rw-r--r--chars.c35
-rw-r--r--chars.in44
-rw-r--r--compat_ohash.c339
-rw-r--r--compat_ohash.h73
-rw-r--r--compat_reallocarray.c45
-rw-r--r--compat_sqlite3_errstr.c18
-rw-r--r--compat_strcasestr.c74
-rw-r--r--compat_strsep.c80
-rw-r--r--config.h.post36
-rw-r--r--config.h.pre3
-rwxr-xr-xconfigure49
-rw-r--r--demandoc.c6
-rw-r--r--eqn.c73
-rw-r--r--eqn_html.c10
-rw-r--r--eqn_term.c5
-rw-r--r--external.pngbin165 -> 0 bytes
-rw-r--r--gmdiff9
-rw-r--r--html.c183
-rw-r--r--html.h17
-rw-r--r--index.css48
-rw-r--r--index.sgml438
-rw-r--r--lib.c5
-rw-r--r--lib.in3
-rw-r--r--libman.h14
-rw-r--r--libmandoc.h36
-rw-r--r--libmdoc.h19
-rw-r--r--libroff.h6
-rw-r--r--main.c131
-rw-r--r--main.h4
-rw-r--r--makewhatis.8217
-rw-r--r--man.7116
-rw-r--r--man.c218
-rw-r--r--man.cgi.7122
-rw-r--r--man.cgi.8409
-rw-r--r--man.h6
-rw-r--r--man_hash.c8
-rw-r--r--man_html.c102
-rw-r--r--man_macro.c155
-rw-r--r--man_term.c274
-rw-r--r--man_validate.c370
-rw-r--r--mandoc.1875
-rw-r--r--mandoc.3341
-rw-r--r--mandoc.c226
-rw-r--r--mandoc.db.5144
-rw-r--r--mandoc.h216
-rw-r--r--mandoc_aux.c121
-rw-r--r--mandoc_aux.h33
-rw-r--r--mandoc_escape.3362
-rw-r--r--mandoc_html.3249
-rw-r--r--mandoc_malloc.3197
-rw-r--r--mandocdb.8324
-rw-r--r--mandocdb.c3653
-rw-r--r--mandocdb.h62
-rw-r--r--manpage.c190
-rw-r--r--manpath.c9
-rw-r--r--mansearch.3228
-rw-r--r--mansearch.c861
-rw-r--r--mansearch.h101
-rw-r--r--mansearch_const.c35
-rw-r--r--mchars_alloc.3224
-rw-r--r--mdoc.7578
-rw-r--r--mdoc.c339
-rw-r--r--mdoc.h26
-rw-r--r--mdoc_argv.c112
-rw-r--r--mdoc_hash.c6
-rw-r--r--mdoc_html.c646
-rw-r--r--mdoc_macro.c384
-rw-r--r--mdoc_man.c353
-rw-r--r--mdoc_term.c732
-rw-r--r--mdoc_validate.c1753
-rw-r--r--msec.c3
-rw-r--r--out.c58
-rw-r--r--out.h6
-rw-r--r--read.c365
-rw-r--r--roff.7305
-rw-r--r--roff.c1266
-rw-r--r--st.c5
-rw-r--r--st.in5
-rw-r--r--tbl.c25
-rw-r--r--tbl_data.c49
-rw-r--r--tbl_html.c11
-rw-r--r--tbl_layout.c110
-rw-r--r--tbl_opts.c47
-rw-r--r--tbl_term.c74
-rw-r--r--term.c225
-rw-r--r--term.h22
-rw-r--r--term_ascii.c84
-rw-r--r--term_ps.c212
-rw-r--r--test-betoh64.c18
-rw-r--r--test-fgetln.c8
-rw-r--r--test-getsubopt.c13
-rw-r--r--test-mmap.c7
-rw-r--r--test-ohash.c21
-rw-r--r--test-reallocarray.c7
-rw-r--r--test-sqlite3_errstr.c8
-rw-r--r--test-strcasestr.c13
-rw-r--r--test-strlcat.c7
-rw-r--r--test-strlcpy.c7
-rw-r--r--test-strptime.c9
-rw-r--r--test-strsep.c10
-rw-r--r--tree.c122
-rw-r--r--vol.c5
-rw-r--r--whatis.1171
120 files changed, 14162 insertions, 10593 deletions
diff --git a/INSTALL b/INSTALL
new file mode 100644
index 000000000000..da8eeab9dd4e
--- /dev/null
+++ b/INSTALL
@@ -0,0 +1,187 @@
+$Id: INSTALL,v 1.2 2014/08/10 17:22:26 schwarze Exp $
+
+About mdocml, the portable mandoc distribution
+----------------------------------------------
+The mandoc manpage compiler toolset is a suite of tools compiling
+mdoc(7), the roff(7) macro language of choice for BSD manual pages,
+and man(7), the predominant historical language for UNIX manuals.
+The toolset does not yet implement man(1); that is only scheduled
+for the next release, 1.13.2. It can, however, already serve to
+translate source manpages to the output displayed by man(1).
+For general information, see <http://mdocml.bsd.lv/>.
+
+In this document, we describe the installation and deployment of
+mandoc(1), first as a simple, standalone formatter, and then as part of
+the man(1) system.
+
+In case you have questions or want to provide feedback, read
+<http://mdocml.bsd.lv/contact.html>. Consider subscribing to the
+discuss@ mailing list mentioned on that page. If you intend to
+help with the development of mandoc, consider subscribing to the
+tech@ mailing list, too.
+
+Enjoy using the mandoc toolset!
+
+Ingo Schwarze, Karlsruhe, August 2014
+
+
+Installation
+------------
+Before manually installing mandoc on your system, please check
+whether the newest version of mandoc is already installed by default
+or available via a binary package or a ports system. A list of the
+latest bundled and ported versions of mandoc for various operating
+systems is maintained at <http://mdocml.bsd.lv/ports.html>.
+
+If mandoc is installed, you can check the version by running "mandoc -V".
+The version contained in this distribution tarball is listed near
+the beginning of the file "Makefile".
+
+Regarding how packages and ports are maintained for your operating
+system, please consult your operating system documentation.
+To install mandoc manually, the following steps are needed:
+
+1. Decide whether you want to build the base tools mandoc(1),
+preconv(1) and demandoc(1) only or whether you also want to build the
+database tools apropos(1) and makewhatis(8). For the latter,
+the following dependencies are required:
+
+1.1. The SQLite database system, see <http://sqlite.org/>.
+The recommended version of SQLite is 3.8.4.3 or newer. The mandoc
+toolset is known to work with version 3.7.5 or newer. Versions
+older than 3.8.3 may not achieve full performance due to the
+missing SQLITE_DETERMINISTIC optimization flag. Versions older
+than 3.8.0 may not show full error information if opening a database
+fails due to the missing sqlite3_errstr() API. Both are very minor
+problems, apropos(1) is fully usable with SQLite 3.7.5. Versions
+older than 3.7.5 may or may not work, they have not been tested.
+
+1.2. The fts(3) directory traversion functions.
+A compatibility version will be bundled for 1.13.2 but is not available
+yet. If you want apropos(1) and makewhatis(8) but do not have fts(3),
+please stay with mandoc 1.12.3 for now and upgrade first to 1.12.4,
+then to 1.13.2 when these versionns are released. Be careful: the
+glibc version of fts(3) is known to be broken on 32bit platforms,
+see <https://sourceware.org/bugzilla/show_bug.cgi?id=15838>.
+
+1.3. Marc Espie's ohash(3) library.
+If your system does not have it, the bundled compatibility version
+will be used, so you probably need not worry about it.
+
+2. If you choose to build the database tools, too, decide whether
+you also want to build the CGI program, man.cgi(8).
+
+3. Read the beginning of the file "Makefile" from "USER SETTINGS"
+to "END OF USER SETTINGS" and edit it as required. In particular,
+disable "BUILD_TARGETS += db-build" if you do not want database
+support or enable "BUILD_TARGETS += cgi-build" if you do want
+the CGI program.
+
+4. Run "make". No separate "./configure" or "make depend" steps
+are needed. The former is run automatically by "make". The latter
+is a maintainer target. If you merely want to build the released
+version as opposed to doing active development, there is no need
+to regenerate the dependency specifications. Any POSIX-compatible
+make, in particular both BSD make and GNU make, should work.
+
+5. Run "make -n install" and check whether everything will be
+installed to the intended places. Otherwise, edit the *DIR variables
+in the Makefile until it is.
+
+6. Run "sudo make install". If you intend to build a binary
+package using some kind of fake root mechanism, you may need a
+command like "make DESTDIR=... install". Read the *-install targets
+in the "Makefile" to understand how DESTDIR is used.
+
+7. To set up a man.cgi(8) server, read its manual page.
+
+8. To use mandoc(1) as your man(1) formatter, read the "Deployment"
+section below.
+
+
+Checking autoconfiguration quality
+----------------------------------
+If you want to check whether automatic configuration works well
+on your platform, consider the following:
+
+The mandoc package intentionally does not use GNU autoconf because
+we consider that toolset a blatant example of overengineering that
+is obsolete nowadays, since all modern operating systems are now
+reasonably close to POSIX and do not need arcane shell magic any
+longer. If your system does need such magic, consider upgrading
+to reasonably modern POSIX-compliant tools rather than asking for
+autoconf-style workarounds.
+
+As far as mandoc is using any features not mandated by ANSI X3.159-1989
+("ANSI C") or IEEE Std 1003.1-2008 ("POSIX") that some modern systems
+do not have, we intend to provide autoconfiguration tests and
+compat_*.c implementations. Please report any that turn out to be
+missing. Note that while we do strive to produce portable code,
+we do not slavishly restrict ourselves to POSIX-only interfaces.
+For improved security and readability, we do use well-designed,
+modern interfaces like reallocarray(3) even if they are still rather
+uncommon, of course bundling compat_*.c implementations as needed.
+
+Where mandoc is using ANSI C or POSIX features that some systems
+still lack and that compat_*.c implementations can be provided for
+without too much hassle, we will consider adding them, too, so
+please report whatever is missing on your platform.
+
+The following steps can be used to manually check the automatic
+configuration on your platform:
+
+1. Run "make clean".
+
+2. Run "make config.h"
+
+3. Read the file "config.log". It shows the compiler commands used
+to test the libraries installed on your system and the standard
+output and standard error output these commands produce. Watch out
+for unexpected failures. Those are most likely to happen if headers
+or libraries are installed in unusual places or interfaces defined
+in unusual headers. You can also look at the file "config.h" and
+check that no expected "#define HAVE_*" lines are missing. The
+list of tests run can be found in the file "configure".
+
+
+Deployment
+----------
+If you want to integrate the mandoc(1) tools with your existing
+man(1) system as a formatter, then contact us first: on systems without
+mandoc(1) as the default, you may have your work cut out for you!
+Usually, you can have your default installation and mandoc(1) work right
+alongside each other by using user-specific versions of the files
+mentioned below.
+
+0. Back up each file you want to change!
+
+1. First see whether your system has "/etc/man.conf" or "/etc/manpath.conf"
+(if it has neither, but man(1) is functional, then let us know) or,
+if running as your own user, a per-user override file. In either
+case, find where man(1) is executing nroff(1) or groff(1) to format
+manuals. Replace these calls with mandoc(1).
+
+2. Then make sure that man(1) isn't running preprocessors, so you may
+need to replace tbl(1), eqn(1), and similar references with cat(1).
+Some man(1) implementations, like that on Mac OSX, let you run "man -d"
+to see how the formatter is invoked. Use this to test your changes. On
+Mac OS X, for instance, man(1) will prepend all files with ".ll" and
+".nr" to set the terminal size, so you need to pass "tail -n+2 |
+mandoc(1)" to disregard them.
+
+3. Finally, make sure that mandoc(1) is actually being invoked instead
+of cached pages being pulled up. You can usually do this by commenting
+out NOCACHE or similar.
+
+mandoc(1) still has a long way to go in understanding non-trivial
+low-level roff(7) markup embedded in some man(7) pages. On the BSD
+systems using mandoc(1), third-party software is generally vetted
+on whether it may be formatted with mandoc(1). If not, groff(1)
+is pulled in as a dependency and used to install a pre-formatted
+"catpage" intead of directly as manual page source.
+
+For more background on switching operating systems to use mandoc(1)
+instead of groff(1) to format manuals, see the two BSDCan presentations
+by Ingo Schwarze:
+<http://www.openbsd.org/papers/bsdcan11-mandoc-openbsd.html>
+<http://www.openbsd.org/papers/bsdcan14-mandoc.pdf>
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 000000000000..35072fb2d1ba
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,44 @@
+$Id: LICENSE,v 1.2 2014/04/23 21:06:41 schwarze Exp $
+
+With the exceptions noted below, all code and documentation
+contained in the mdocml toolkit is protected by the Copyright
+of the following developers:
+
+Copyright (c) 2008, 2009, 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
+Copyright (c) 2010, 2011, 2012, 2013, 2014 Ingo Schwarze <schwarze@openbsd.org>
+Copyright (c) 2009, 2010, 2011, 2012 Joerg Sonnenberger <joerg@netbsd.org>
+Copyright (c) 2013 Franco Fichtner <franco@lastsummer.de>
+Copyright (c) 1999, 2004 Marc Espie <espie@openbsd.org>
+Copyright (c) 1998, 2010 Todd C. Miller <Todd.Miller@courtesan.com>
+Copyright (c) 2008 Otto Moerbeek <otto@drijf.net>
+Copyright (c) 2003 Jason McIntyre <jmc@openbsd.org>
+
+See the individual source files for information about who contributed
+to which file during which years.
+
+
+The mdocml distribution as a whole is distributed by its developers
+under the following license:
+
+Permission to use, copy, modify, and distribute this software for any
+purpose with or without fee is hereby granted, provided that the above
+copyright notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+
+The following files included from outside sources are protected by
+other people's Copyright and are distributed under a 3-clause BSD
+license; see these individual files for details.
+
+compat_getsubopt.c, compat_strcasestr.c, compat_strsep.c:
+Copyright (c) 1990, 1993 The Regents of the University of California
+
+compat_fgetln.c:
+Copyright (c) 1998 The NetBSD Foundation, Inc.
diff --git a/Makefile b/Makefile
index 20b9feaa022b..47f37a7dffc9 100644
--- a/Makefile
+++ b/Makefile
@@ -1,15 +1,30 @@
-.PHONY: clean install installwww
-.SUFFIXES: .sgml .html .md5 .h .h.html
-.SUFFIXES: .1 .3 .7 .8
-.SUFFIXES: .1.html .3.html .7.html .8.html
+# $Id: Makefile,v 1.435 2014/08/10 02:45:04 schwarze Exp $
+#
+# Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
+# Copyright (c) 2011, 2013, 2014 Ingo Schwarze <schwarze@openbsd.org>
+#
+# Permission to use, copy, modify, and distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+VERSION = 1.13.1
+
+# === USER SETTINGS ====================================================
+
+# --- user settings relevant for all builds ----------------------------
# Specify this if you want to hard-code the operating system to appear
# in the lower-left hand corner of -mdoc manuals.
#
-# CFLAGS += -DOSNAME="\"OpenBSD 5.4\""
-
-VERSION = 1.12.3
-VDATE = 31 December 2013
+# CFLAGS += -DOSNAME="\"OpenBSD 5.5\""
# IFF your system supports multi-byte functions (setlocale(), wcwidth(),
# putwchar()) AND has __STDC_ISO_10646__ (that is, wchar_t is simply a
@@ -19,113 +34,136 @@ VDATE = 31 December 2013
#
CFLAGS += -DUSE_WCHAR
-# If your system has manpath(1), uncomment this. This is most any
-# system that's not OpenBSD or NetBSD. If uncommented, apropos(1),
-# mandocdb(8), and man.cgi will popen(3) manpath(1) to get the MANPATH
-# variable.
-#CFLAGS += -DUSE_MANPATH
-
-# If your system does not support static binaries, comment this,
-# for example on Mac OS X.
-STATIC = -static
-# Linux requires -pthread to statically link with libdb.
-#STATIC += -pthread
-
CFLAGS += -g -DHAVE_CONFIG_H
CFLAGS += -W -Wall -Wstrict-prototypes -Wno-unused-parameter -Wwrite-strings
PREFIX = /usr/local
-WWWPREFIX = /var/www
-HTDOCDIR = $(WWWPREFIX)/htdocs
-CGIBINDIR = $(WWWPREFIX)/cgi-bin
BINDIR = $(PREFIX)/bin
INCLUDEDIR = $(PREFIX)/include/mandoc
LIBDIR = $(PREFIX)/lib/mandoc
MANDIR = $(PREFIX)/man
EXAMPLEDIR = $(PREFIX)/share/examples/mandoc
+
INSTALL = install
-INSTALL_PROGRAM = $(INSTALL) -m 0755
+INSTALL_PROGRAM = $(INSTALL) -m 0555
INSTALL_DATA = $(INSTALL) -m 0444
-INSTALL_LIB = $(INSTALL) -m 0644
+INSTALL_LIB = $(INSTALL) -m 0444
INSTALL_SOURCE = $(INSTALL) -m 0644
INSTALL_MAN = $(INSTALL_DATA)
-# Non-BSD systems (Linux, etc.) need -ldb to compile mandocdb and
-# apropos.
-# However, if you don't have -ldb at all (or it's not native), then
-# comment out apropos and mandocdb.
+# --- user settings related to database support ------------------------
+
+# Building apropos(1) and makewhatis(8) requires both SQLite3 and fts(3).
+# To avoid those dependencies, comment the following line.
+# Be careful: the fts(3) implementation in glibc is broken on 32bit
+# machines, see: https://sourceware.org/bugzilla/show_bug.cgi?id=15838
#
-#DBLIB = -ldb
-DBBIN = apropos mandocdb man.cgi catman whatis
-DBLN = llib-lapropos.ln llib-lmandocdb.ln llib-lman.cgi.ln llib-lcatman.ln
+BUILD_TARGETS += db-build
-all: mandoc preconv demandoc $(DBBIN)
+# The remaining settings in this section
+# are only relevant if db-build is enabled.
+# Otherwise, they have no effect either way.
-SRCS = Makefile \
- NEWS \
- TODO \
- apropos.1 \
- apropos.c \
- apropos_db.c \
- apropos_db.h \
+# If your system has manpath(1), uncomment this. This is most any
+# system that's not OpenBSD or NetBSD. If uncommented, apropos(1)
+# and makewhatis(8) will use manpath(1) to get the MANPATH variable.
+#
+#CFLAGS += -DUSE_MANPATH
+
+# On some systems, SQLite3 may be installed below /usr/local.
+# In that case, uncomment the following two lines.
+#
+#CFLAGS += -I/usr/local/include
+#DBLIB += -L/usr/local/lib
+
+# OpenBSD has the ohash functions in libutil.
+# Comment the following line if your system doesn't.
+#
+DBLIB += -lutil
+
+SBINDIR = $(PREFIX)/sbin
+
+# --- user settings related to man.cgi ---------------------------------
+
+# To build man.cgi, copy cgi.h.example to cgi.h, edit it,
+# and enable the following line.
+# Obviously, this requires that db-build is enabled, too.
+#
+#BUILD_TARGETS += cgi-build
+
+# The remaining settings in this section
+# are only relevant if cgi-build is enabled.
+# Otherwise, they have no effect either way.
+
+# If your system does not support static binaries, comment this,
+# for example on Mac OS X.
+#
+STATIC = -static
+
+# Linux requires -pthread for statical linking.
+#
+#STATIC += -pthread
+
+WWWPREFIX = /var/www
+HTDOCDIR = $(WWWPREFIX)/htdocs
+CGIBINDIR = $(WWWPREFIX)/cgi-bin
+
+# === END OF USER SETTINGS =============================================
+
+INSTALL_TARGETS = $(BUILD_TARGETS:-build=-install)
+
+BASEBIN = mandoc preconv demandoc
+DBBIN = apropos makewhatis
+CGIBIN = man.cgi
+
+DBLIB += -lsqlite3
+
+TESTSRCS = test-fgetln.c \
+ test-getsubopt.c \
+ test-mmap.c \
+ test-ohash.c \
+ test-reallocarray.c \
+ test-sqlite3_errstr.c \
+ test-strcasestr.c \
+ test-strlcat.c \
+ test-strlcpy.c \
+ test-strptime.c \
+ test-strsep.c
+
+SRCS = apropos.c \
arch.c \
- arch.in \
att.c \
- att.in \
- catman.8 \
- catman.c \
cgi.c \
chars.c \
- chars.in \
compat_fgetln.c \
compat_getsubopt.c \
+ compat_ohash.c \
+ compat_reallocarray.c \
+ compat_sqlite3_errstr.c \
+ compat_strcasestr.c \
compat_strlcat.c \
compat_strlcpy.c \
- config.h.post \
- config.h.pre \
- demandoc.1 \
+ compat_strsep.c \
demandoc.c \
- eqn.7 \
eqn.c \
eqn_html.c \
eqn_term.c \
- example.style.css \
- external.png \
- gmdiff \
html.c \
- html.h \
- index.css \
- index.sgml \
lib.c \
- lib.in \
- libman.h \
- libmandoc.h \
- libmdoc.h \
- libroff.h \
main.c \
- main.h \
- man.7 \
man.c \
- man.cgi.7 \
- man-cgi.css \
- man.h \
man_hash.c \
man_html.c \
man_macro.c \
man_term.c \
man_validate.c \
- mandoc.1 \
- mandoc.3 \
mandoc.c \
- mandoc.h \
- mandoc_char.7 \
- mandocdb.8 \
+ mandoc_aux.c \
mandocdb.c \
- mandocdb.h \
+ manpage.c \
manpath.c \
- manpath.h \
- mdoc.7 \
+ mansearch.c \
+ mansearch_const.c \
mdoc.c \
- mdoc.h \
mdoc_argv.c \
mdoc_hash.c \
mdoc_html.c \
@@ -134,20 +172,11 @@ SRCS = Makefile \
mdoc_term.c \
mdoc_validate.c \
msec.c \
- msec.in \
out.c \
- out.h \
- preconv.1 \
preconv.c \
- predefs.in \
read.c \
- roff.7 \
roff.c \
st.c \
- st.in \
- style.css \
- tbl.3 \
- tbl.7 \
tbl.c \
tbl_data.c \
tbl_html.c \
@@ -155,20 +184,70 @@ SRCS = Makefile \
tbl_opts.c \
tbl_term.c \
term.c \
- term.h \
term_ascii.c \
term_ps.c \
- test-betoh64.c \
- test-fgetln.c \
- test-getsubopt.c \
- test-mmap.c \
- test-strlcat.c \
- test-strlcpy.c \
- test-strptime.c \
tree.c \
vol.c \
+ $(TESTSRCS)
+
+DISTFILES = INSTALL \
+ LICENSE \
+ Makefile \
+ Makefile.depend \
+ NEWS \
+ TODO \
+ apropos.1 \
+ arch.in \
+ att.in \
+ cgi.h.example \
+ chars.in \
+ compat_ohash.h \
+ config.h.post \
+ config.h.pre \
+ configure \
+ demandoc.1 \
+ eqn.7 \
+ example.style.css \
+ gmdiff \
+ html.h \
+ lib.in \
+ libman.h \
+ libmandoc.h \
+ libmdoc.h \
+ libroff.h \
+ main.h \
+ makewhatis.8 \
+ man-cgi.css \
+ man.7 \
+ man.cgi.8 \
+ man.h \
+ mandoc.1 \
+ mandoc.3 \
+ mandoc.db.5 \
+ mandoc.h \
+ mandoc_aux.h \
+ mandoc_char.7 \
+ mandoc_escape.3 \
+ mandoc_html.3 \
+ mandoc_malloc.3 \
+ manpath.h \
+ mansearch.3 \
+ mansearch.h \
+ mchars_alloc.3 \
+ mdoc.7 \
+ mdoc.h \
+ msec.in \
+ out.h \
+ preconv.1 \
+ predefs.in \
+ roff.7 \
+ st.in \
+ style.css \
+ tbl.3 \
+ tbl.7 \
+ term.h \
vol.in \
- whatis.1
+ $(SRCS)
LIBMAN_OBJS = man.o \
man_hash.o \
@@ -198,35 +277,25 @@ LIBMANDOC_OBJS = $(LIBMAN_OBJS) \
$(LIBROFF_OBJS) \
chars.o \
mandoc.o \
+ mandoc_aux.o \
msec.o \
read.o
COMPAT_OBJS = compat_fgetln.o \
compat_getsubopt.o \
+ compat_ohash.o \
+ compat_reallocarray.o \
+ compat_sqlite3_errstr.o \
+ compat_strcasestr.o \
compat_strlcat.o \
- compat_strlcpy.o
-
-arch.o: arch.in
-att.o: att.in
-chars.o: chars.in
-lib.o: lib.in
-msec.o: msec.in
-roff.o: predefs.in
-st.o: st.in
-vol.o: vol.in
-
-$(LIBMAN_OBJS): libman.h
-$(LIBMDOC_OBJS): libmdoc.h
-$(LIBROFF_OBJS): libroff.h
-$(LIBMANDOC_OBJS): mandoc.h mdoc.h man.h libmandoc.h config.h
-$(COMPAT_OBJS): config.h
+ compat_strlcpy.o \
+ compat_strsep.o
MANDOC_HTML_OBJS = eqn_html.o \
html.o \
man_html.o \
mdoc_html.o \
tbl_html.o
-$(MANDOC_HTML_OBJS): html.h
MANDOC_MAN_OBJS = mdoc_man.o
@@ -237,7 +306,6 @@ MANDOC_TERM_OBJS = eqn_term.o \
term_ascii.o \
term_ps.o \
tbl_term.o
-$(MANDOC_TERM_OBJS): term.h
MANDOC_OBJS = $(MANDOC_HTML_OBJS) \
$(MANDOC_MAN_OBJS) \
@@ -245,76 +313,85 @@ MANDOC_OBJS = $(MANDOC_HTML_OBJS) \
main.o \
out.o \
tree.o
-$(MANDOC_OBJS): main.h mandoc.h mdoc.h man.h config.h out.h
-MANDOCDB_OBJS = mandocdb.o manpath.o
-$(MANDOCDB_OBJS): mandocdb.h mandoc.h mdoc.h man.h config.h manpath.h
+MAKEWHATIS_OBJS = mandocdb.o mansearch_const.o manpath.o
PRECONV_OBJS = preconv.o
-$(PRECONV_OBJS): config.h
-APROPOS_OBJS = apropos.o apropos_db.o manpath.o
-$(APROPOS_OBJS): config.h mandoc.h apropos_db.h manpath.h mandocdb.h
+APROPOS_OBJS = apropos.o mansearch.o mansearch_const.o manpath.o
CGI_OBJS = $(MANDOC_HTML_OBJS) \
- $(MANDOC_MAN_OBJS) \
- $(MANDOC_TERM_OBJS) \
cgi.o \
- apropos_db.o \
- manpath.o \
- out.o \
- tree.o
-$(CGI_OBJS): main.h mdoc.h man.h out.h config.h mandoc.h apropos_db.h manpath.h mandocdb.h
+ mansearch.o \
+ mansearch_const.o \
+ out.o
-CATMAN_OBJS = catman.o manpath.o
-$(CATMAN_OBJS): config.h mandoc.h manpath.h mandocdb.h
+MANPAGE_OBJS = manpage.o mansearch.o mansearch_const.o manpath.o
DEMANDOC_OBJS = demandoc.o
-$(DEMANDOC_OBJS): config.h
-INDEX_MANS = apropos.1.html \
- catman.8.html \
+WWW_MANS = apropos.1.html \
demandoc.1.html \
mandoc.1.html \
- whatis.1.html \
+ preconv.1.html \
mandoc.3.html \
+ mandoc_escape.3.html \
+ mandoc_html.3.html \
+ mandoc_malloc.3.html \
+ mansearch.3.html \
+ mchars_alloc.3.html \
tbl.3.html \
+ mandoc.db.5.html \
eqn.7.html \
man.7.html \
- man.cgi.7.html \
mandoc_char.7.html \
mdoc.7.html \
- preconv.1.html \
roff.7.html \
tbl.7.html \
- mandocdb.8.html
-
-$(INDEX_MANS): mandoc
-
-INDEX_OBJS = $(INDEX_MANS) \
+ makewhatis.8.html \
+ man.cgi.8.html \
man.h.html \
mandoc.h.html \
- mdoc.h.html \
- mdocml.tar.gz \
- mdocml.md5
+ mandoc_aux.h.html \
+ manpath.h.html \
+ mansearch.h.html \
+ mdoc.h.html
+
+WWW_OBJS = mdocml.tar.gz \
+ mdocml.sha256
+
+# === DEPENDENCY HANDLING ==============================================
+
+all: base-build $(BUILD_TARGETS)
+
+base-build: $(BASEBIN)
+
+db-build: $(DBBIN)
+
+cgi-build: $(CGIBIN)
+
+install: base-install $(INSTALL_TARGETS)
-www: index.html
+www: $(WWW_OBJS) $(WWW_MANS)
+
+include Makefile.depend
+
+# === TARGETS CONTAINING SHELL COMMANDS ================================
clean:
rm -f libmandoc.a $(LIBMANDOC_OBJS)
- rm -f mandocdb $(MANDOCDB_OBJS)
+ rm -f apropos $(APROPOS_OBJS)
+ rm -f makewhatis $(MAKEWHATIS_OBJS)
rm -f preconv $(PRECONV_OBJS)
- rm -f apropos whatis $(APROPOS_OBJS)
rm -f man.cgi $(CGI_OBJS)
- rm -f catman $(CATMAN_OBJS)
+ rm -f manpage $(MANPAGE_OBJS)
rm -f demandoc $(DEMANDOC_OBJS)
rm -f mandoc $(MANDOC_OBJS)
rm -f config.h config.log $(COMPAT_OBJS)
- rm -f mdocml.tar.gz
- rm -f index.html $(INDEX_OBJS)
+ rm -f $(WWW_MANS) $(WWW_OBJS)
rm -rf *.dSYM
-install: all
+base-install: base-build
mkdir -p $(DESTDIR)$(BINDIR)
mkdir -p $(DESTDIR)$(EXAMPLEDIR)
mkdir -p $(DESTDIR)$(LIBDIR)
@@ -322,31 +399,59 @@ install: all
mkdir -p $(DESTDIR)$(MANDIR)/man1
mkdir -p $(DESTDIR)$(MANDIR)/man3
mkdir -p $(DESTDIR)$(MANDIR)/man7
- $(INSTALL_PROGRAM) mandoc preconv demandoc $(DESTDIR)$(BINDIR)
+ $(INSTALL_PROGRAM) $(BASEBIN) $(DESTDIR)$(BINDIR)
$(INSTALL_LIB) libmandoc.a $(DESTDIR)$(LIBDIR)
- $(INSTALL_LIB) man.h mdoc.h mandoc.h $(DESTDIR)$(INCLUDEDIR)
+ $(INSTALL_LIB) man.h mandoc.h mandoc_aux.h mdoc.h \
+ $(DESTDIR)$(INCLUDEDIR)
$(INSTALL_MAN) mandoc.1 preconv.1 demandoc.1 $(DESTDIR)$(MANDIR)/man1
- $(INSTALL_MAN) mandoc.3 tbl.3 $(DESTDIR)$(MANDIR)/man3
- $(INSTALL_MAN) man.7 mdoc.7 roff.7 eqn.7 tbl.7 mandoc_char.7 $(DESTDIR)$(MANDIR)/man7
+ $(INSTALL_MAN) mandoc.3 mandoc_escape.3 mandoc_malloc.3 \
+ mchars_alloc.3 tbl.3 $(DESTDIR)$(MANDIR)/man3
+ $(INSTALL_MAN) man.7 mdoc.7 roff.7 eqn.7 tbl.7 mandoc_char.7 \
+ $(DESTDIR)$(MANDIR)/man7
$(INSTALL_DATA) example.style.css $(DESTDIR)$(EXAMPLEDIR)
-installcgi: all
+db-install: db-build
+ mkdir -p $(DESTDIR)$(BINDIR)
+ mkdir -p $(DESTDIR)$(SBINDIR)
+ mkdir -p $(DESTDIR)$(MANDIR)/man1
+ mkdir -p $(DESTDIR)$(MANDIR)/man3
+ mkdir -p $(DESTDIR)$(MANDIR)/man5
+ mkdir -p $(DESTDIR)$(MANDIR)/man8
+ $(INSTALL_PROGRAM) apropos $(DESTDIR)$(BINDIR)
+ ln -f $(DESTDIR)$(BINDIR)/apropos $(DESTDIR)$(BINDIR)/whatis
+ $(INSTALL_PROGRAM) makewhatis $(DESTDIR)$(SBINDIR)
+ $(INSTALL_MAN) apropos.1 $(DESTDIR)$(MANDIR)/man1
+ ln -f $(DESTDIR)$(MANDIR)/man1/apropos.1 \
+ $(DESTDIR)$(MANDIR)/man1/whatis.1
+ $(INSTALL_MAN) mansearch.3 $(DESTDIR)$(MANDIR)/man3
+ $(INSTALL_MAN) mandoc.db.5 $(DESTDIR)$(MANDIR)/man5
+ $(INSTALL_MAN) makewhatis.8 $(DESTDIR)$(MANDIR)/man8
+
+cgi-install: cgi-build
mkdir -p $(DESTDIR)$(CGIBINDIR)
mkdir -p $(DESTDIR)$(HTDOCDIR)
+ mkdir -p $(DESTDIR)$(WWWPREFIX)/man/mandoc/man1
+ mkdir -p $(DESTDIR)$(WWWPREFIX)/man/mandoc/man8
$(INSTALL_PROGRAM) man.cgi $(DESTDIR)$(CGIBINDIR)
$(INSTALL_DATA) example.style.css $(DESTDIR)$(HTDOCDIR)/man.css
$(INSTALL_DATA) man-cgi.css $(DESTDIR)$(HTDOCDIR)
-
-installwww: www
- mkdir -p $(PREFIX)/snapshots
- mkdir -p $(PREFIX)/binaries
- $(INSTALL_DATA) index.html external.png index.css $(PREFIX)
- $(INSTALL_DATA) $(INDEX_MANS) style.css $(PREFIX)
- $(INSTALL_DATA) mandoc.h.html man.h.html mdoc.h.html $(PREFIX)
- $(INSTALL_DATA) mdocml.tar.gz $(PREFIX)/snapshots
- $(INSTALL_DATA) mdocml.md5 $(PREFIX)/snapshots
- $(INSTALL_DATA) mdocml.tar.gz $(PREFIX)/snapshots/mdocml-$(VERSION).tar.gz
- $(INSTALL_DATA) mdocml.md5 $(PREFIX)/snapshots/mdocml-$(VERSION).md5
+ $(INSTALL_MAN) apropos.1 $(DESTDIR)$(WWWPREFIX)/man/mandoc/man1/
+ $(INSTALL_MAN) man.cgi.8 $(DESTDIR)$(WWWPREFIX)/man/mandoc/man8/
+
+www-install: www
+ mkdir -p $(DESTDIR)$(HTDOCDIR)/snapshots
+ $(INSTALL_DATA) $(WWW_MANS) style.css $(DESTDIR)$(HTDOCDIR)
+ $(INSTALL_DATA) $(WWW_OBJS) $(DESTDIR)$(HTDOCDIR)/snapshots
+ $(INSTALL_DATA) mdocml.tar.gz \
+ $(DESTDIR)$(HTDOCDIR)/snapshots/mdocml-$(VERSION).tar.gz
+ $(INSTALL_DATA) mdocml.sha256 \
+ $(DESTDIR)$(HTDOCDIR)/snapshots/mdocml-$(VERSION).sha256
+
+depend: config.h
+ mkdep -f Makefile.depend $(CFLAGS) $(SRCS)
+ perl -e 'undef $$/; $$_ = <>; s|/usr/include/\S+||g; \
+ s|\\\n||g; s| +| |g; print;' Makefile.depend > Makefile.tmp
+ mv Makefile.tmp Makefile.depend
libmandoc.a: $(COMPAT_OBJS) $(LIBMANDOC_OBJS)
$(AR) rs $@ $(COMPAT_OBJS) $(LIBMANDOC_OBJS)
@@ -354,81 +459,47 @@ libmandoc.a: $(COMPAT_OBJS) $(LIBMANDOC_OBJS)
mandoc: $(MANDOC_OBJS) libmandoc.a
$(CC) $(LDFLAGS) -o $@ $(MANDOC_OBJS) libmandoc.a
-mandocdb: $(MANDOCDB_OBJS) libmandoc.a
- $(CC) $(LDFLAGS) -o $@ $(MANDOCDB_OBJS) libmandoc.a $(DBLIB)
+makewhatis: $(MAKEWHATIS_OBJS) libmandoc.a
+ $(CC) $(LDFLAGS) -o $@ $(MAKEWHATIS_OBJS) libmandoc.a $(DBLIB)
preconv: $(PRECONV_OBJS)
$(CC) $(LDFLAGS) -o $@ $(PRECONV_OBJS)
-whatis: apropos
- cp -f apropos whatis
+manpage: $(MANPAGE_OBJS) libmandoc.a
+ $(CC) $(LDFLAGS) -o $@ $(MANPAGE_OBJS) libmandoc.a $(DBLIB)
apropos: $(APROPOS_OBJS) libmandoc.a
$(CC) $(LDFLAGS) -o $@ $(APROPOS_OBJS) libmandoc.a $(DBLIB)
-catman: $(CATMAN_OBJS) libmandoc.a
- $(CC) $(LDFLAGS) -o $@ $(CATMAN_OBJS) libmandoc.a $(DBLIB)
-
man.cgi: $(CGI_OBJS) libmandoc.a
$(CC) $(LDFLAGS) $(STATIC) -o $@ $(CGI_OBJS) libmandoc.a $(DBLIB)
demandoc: $(DEMANDOC_OBJS) libmandoc.a
$(CC) $(LDFLAGS) -o $@ $(DEMANDOC_OBJS) libmandoc.a
-mdocml.md5: mdocml.tar.gz
- md5 mdocml.tar.gz >$@
+mdocml.sha256: mdocml.tar.gz
+ sha256 mdocml.tar.gz > $@
-mdocml.tar.gz: $(SRCS)
+mdocml.tar.gz: $(DISTFILES)
mkdir -p .dist/mdocml-$(VERSION)/
- $(INSTALL_SOURCE) $(SRCS) .dist/mdocml-$(VERSION)
- ( cd .dist/ && tar zcf ../$@ ./ )
+ $(INSTALL_SOURCE) $(DISTFILES) .dist/mdocml-$(VERSION)
+ chmod 755 .dist/mdocml-$(VERSION)/configure
+ ( cd .dist/ && tar zcf ../$@ mdocml-$(VERSION) )
rm -rf .dist/
-index.html: $(INDEX_OBJS)
-
-config.h: config.h.pre config.h.post
+config.h: configure config.h.pre config.h.post $(TESTSRCS)
rm -f config.log
- ( cat config.h.pre; \
- echo; \
- echo '#define VERSION "$(VERSION)"'; \
- if $(CC) $(CFLAGS) -Werror -Wno-unused -o test-fgetln test-fgetln.c >> config.log 2>&1; then \
- echo '#define HAVE_FGETLN'; \
- rm test-fgetln; \
- fi; \
- if $(CC) $(CFLAGS) -Werror -Wno-unused -o test-strptime test-strptime.c >> config.log 2>&1; then \
- echo '#define HAVE_STRPTIME'; \
- rm test-strptime; \
- fi; \
- if $(CC) $(CFLAGS) -Werror -Wno-unused -o test-getsubopt test-getsubopt.c >> config.log 2>&1; then \
- echo '#define HAVE_GETSUBOPT'; \
- rm test-getsubopt; \
- fi; \
- if $(CC) $(CFLAGS) -Werror -Wno-unused -o test-strlcat test-strlcat.c >> config.log 2>&1; then \
- echo '#define HAVE_STRLCAT'; \
- rm test-strlcat; \
- fi; \
- if $(CC) $(CFLAGS) -Werror -Wno-unused -o test-mmap test-mmap.c >> config.log 2>&1; then \
- echo '#define HAVE_MMAP'; \
- rm test-mmap; \
- fi; \
- if $(CC) $(CFLAGS) -Werror -Wno-unused -o test-strlcpy test-strlcpy.c >> config.log 2>&1; then \
- echo '#define HAVE_STRLCPY'; \
- rm test-strlcpy; \
- fi; \
- if $(CC) $(CFLAGS) -Werror -Wno-unused -o test-betoh64 test-betoh64.c >> config.log 2>&1; then \
- echo '#define HAVE_BETOH64'; \
- rm test-betoh64; \
- fi; \
- echo; \
- cat config.h.post \
- ) > $@
+ CC="$(CC)" CFLAGS="$(CFLAGS)" DBLIB="$(DBLIB)" \
+ VERSION="$(VERSION)" ./configure
-.h.h.html:
- highlight -I $< >$@
+.PHONY: base-install cgi-install db-install install www-install
+.PHONY: clean depend
+.SUFFIXES: .1 .3 .5 .7 .8 .h
+.SUFFIXES: .1.html .3.html .5.html .7.html .8.html .h.html
-.1.1.html .3.3.html .7.7.html .8.8.html:
- ./mandoc -Thtml -Wall,stop -Ostyle=style.css,man=%N.%S.html,includes=%I.html $< >$@
+.h.h.html:
+ highlight -I $< > $@
-.sgml.html:
- validate --warn $<
- sed -e "s!@VERSION@!$(VERSION)!" -e "s!@VDATE@!$(VDATE)!" $< >$@
+.1.1.html .3.3.html .5.5.html .7.7.html .8.8.html: mandoc
+ ./mandoc -Thtml -Wall,stop \
+ -Ostyle=style.css,man=%N.%S.html,includes=%I.html $< > $@
diff --git a/Makefile.depend b/Makefile.depend
new file mode 100644
index 000000000000..dc49310e5222
--- /dev/null
+++ b/Makefile.depend
@@ -0,0 +1,70 @@
+apropos.o: apropos.c config.h manpath.h mansearch.h
+arch.o: arch.c config.h mdoc.h libmdoc.h arch.in
+att.o: att.c config.h mdoc.h libmdoc.h att.in
+cgi.o: cgi.c config.h mandoc.h mandoc_aux.h main.h manpath.h mansearch.h cgi.h
+chars.o: chars.c config.h mandoc.h mandoc_aux.h libmandoc.h chars.in
+compat_fgetln.o: compat_fgetln.c config.h
+compat_getsubopt.o: compat_getsubopt.c config.h
+compat_ohash.o: compat_ohash.c config.h
+compat_reallocarray.o: compat_reallocarray.c config.h
+compat_sqlite3_errstr.o: compat_sqlite3_errstr.c config.h
+compat_strcasestr.o: compat_strcasestr.c config.h
+compat_strlcat.o: compat_strlcat.c config.h
+compat_strlcpy.o: compat_strlcpy.c config.h
+compat_strsep.o: compat_strsep.c config.h
+demandoc.o: demandoc.c config.h man.h mdoc.h mandoc.h
+eqn.o: eqn.c config.h mandoc.h mandoc_aux.h libmandoc.h libroff.h
+eqn_html.o: eqn_html.c config.h mandoc.h out.h html.h
+eqn_term.o: eqn_term.c config.h mandoc.h out.h term.h
+html.o: html.c config.h mandoc.h mandoc_aux.h libmandoc.h out.h html.h main.h
+lib.o: lib.c config.h mdoc.h libmdoc.h lib.in
+main.o: main.c config.h mandoc.h mandoc_aux.h main.h mdoc.h man.h
+man.o: man.c config.h man.h mandoc.h mandoc_aux.h libman.h libmandoc.h
+man_hash.o: man_hash.c config.h man.h mandoc.h libman.h
+man_html.o: man_html.c config.h mandoc.h mandoc_aux.h out.h html.h man.h main.h
+man_macro.o: man_macro.c config.h man.h mandoc.h libmandoc.h libman.h
+man_term.o: man_term.c config.h mandoc.h mandoc_aux.h out.h man.h term.h main.h
+man_validate.o: man_validate.c config.h man.h mandoc.h mandoc_aux.h libman.h libmandoc.h
+mandoc.o: mandoc.c config.h mandoc.h mandoc_aux.h libmandoc.h
+mandoc_aux.o: mandoc_aux.c config.h mandoc.h mandoc_aux.h
+mandocdb.o: mandocdb.c config.h mdoc.h man.h mandoc.h mandoc_aux.h manpath.h mansearch.h
+manpage.o: manpage.c config.h manpath.h mansearch.h
+manpath.o: manpath.c config.h mandoc_aux.h manpath.h
+mansearch.o: mansearch.c config.h mandoc.h mandoc_aux.h manpath.h mansearch.h
+mansearch_const.o: mansearch_const.c config.h manpath.h mansearch.h
+mdoc.o: mdoc.c config.h mdoc.h mandoc.h mandoc_aux.h libmdoc.h libmandoc.h
+mdoc_argv.o: mdoc_argv.c config.h mdoc.h mandoc.h mandoc_aux.h libmdoc.h libmandoc.h
+mdoc_hash.o: mdoc_hash.c config.h mdoc.h libmdoc.h
+mdoc_html.o: mdoc_html.c config.h mandoc.h mandoc_aux.h out.h html.h mdoc.h main.h
+mdoc_macro.o: mdoc_macro.c config.h mdoc.h mandoc.h libmdoc.h libmandoc.h
+mdoc_man.o: mdoc_man.c config.h mandoc.h mandoc_aux.h out.h man.h mdoc.h main.h
+mdoc_term.o: mdoc_term.c config.h mandoc.h mandoc_aux.h out.h term.h mdoc.h main.h
+mdoc_validate.o: mdoc_validate.c config.h mdoc.h mandoc.h mandoc_aux.h libmdoc.h libmandoc.h
+msec.o: msec.c config.h mandoc.h libmandoc.h msec.in
+out.o: out.c config.h mandoc_aux.h mandoc.h out.h
+preconv.o: preconv.c config.h
+read.o: read.c config.h mandoc.h mandoc_aux.h libmandoc.h mdoc.h man.h main.h
+roff.o: roff.c config.h mandoc.h mandoc_aux.h libroff.h libmandoc.h predefs.in
+st.o: st.c config.h mdoc.h libmdoc.h st.in
+tbl.o: tbl.c config.h mandoc.h mandoc_aux.h libmandoc.h libroff.h
+tbl_data.o: tbl_data.c config.h mandoc.h mandoc_aux.h libmandoc.h libroff.h
+tbl_html.o: tbl_html.c config.h mandoc.h out.h html.h
+tbl_layout.o: tbl_layout.c config.h mandoc.h mandoc_aux.h libmandoc.h libroff.h
+tbl_opts.o: tbl_opts.c config.h mandoc.h libmandoc.h libroff.h
+tbl_term.o: tbl_term.c config.h mandoc.h out.h term.h
+term.o: term.c config.h mandoc.h mandoc_aux.h out.h term.h main.h
+term_ascii.o: term_ascii.c config.h mandoc.h mandoc_aux.h out.h term.h main.h
+term_ps.o: term_ps.c config.h mandoc.h mandoc_aux.h out.h main.h term.h
+tree.o: tree.c config.h mandoc.h mdoc.h man.h main.h
+vol.o: vol.c config.h mdoc.h libmdoc.h vol.in
+test-fgetln.o: test-fgetln.c
+test-getsubopt.o: test-getsubopt.c
+test-mmap.o: test-mmap.c
+test-ohash.o: test-ohash.c
+test-reallocarray.o: test-reallocarray.c
+test-sqlite3_errstr.o: test-sqlite3_errstr.c
+test-strcasestr.o: test-strcasestr.c
+test-strlcat.o: test-strlcat.c
+test-strlcpy.o: test-strlcpy.c
+test-strptime.o: test-strptime.c
+test-strsep.o: test-strsep.c
diff --git a/NEWS b/NEWS
index 6f21a38dea3c..61006b5ee117 100644
--- a/NEWS
+++ b/NEWS
@@ -1,7 +1,87 @@
-$Id: NEWS,v 1.3 2013/10/13 16:06:50 schwarze Exp $
+$Id: NEWS,v 1.5 2014/08/10 16:32:57 schwarze Exp $
This file lists the most important changes in the mdocml.bsd.lv distribution.
+Changes in version 1.13.1, released on August 10, 2014
+
+ --- MAJOR NEW FEATURES ---
+ * A complete apropos(1)/makewhatis(8)/man.cgi(8) suite
+ based on SQLite3 is now included.
+ CAVEAT: This also requires a working fts(3) implementation.
+ If your system lacks that *and* you want apropos(1)/makewhatis(8),
+ stay with 1.12.3 for now, then go to 1.12.4 and 1.13.2.
+ * The roff(7) parser now provides an almost complete implementation
+ of numerical expressions.
+ * Warning and error messages have been improved in many ways.
+ Almost all fatal errors were downgraded to normal errors and some
+ even to warnings. Almost all messages now mention the macro where
+ the issue is detected and many indicate the workaround employed.
+ The mandoc(1) manual now includes a list explaining all messages.
+ --- MINOR NEW FEATURES ---
+ * The roff(7) parser now supports the .ami (append to macro with
+ indirectly specified name), .as (append to user-defined
+ string), .dei (define macro with indirectly specified name),
+ .ll (line length), and .rr (remove register) requests.
+ * The roff(7) parser now supports string comparison and numerical
+ conditionals in the .if and .ie requests.
+ * The roff parser now fully supports the \B (validate numerical
+ expression) and partially supports the \w (measure text width)
+ escape sequences.
+ * The terminal formatter now supports the \: (optional line break)
+ escape sequence.
+ * The roff parser now supports expansion of user-defined strings
+ involving indirect references.
+ * The roff(7) parser now handles some pre-defined read-only
+ number registers that occur in the pod2man(1) preamble.
+ * For backward compatibility, the mdoc(7) parser and formatters
+ now support the obsolete macros .En, .Es, .Fr, and .Ot.
+ * The mdoc(7) formatter non partially supports .Bd -centered.
+ * tbl(7) now handles leading and trailing vertical lines.
+ * The build system now provides fallback versions of strcasestr(3)
+ and strsep(3) for systems lacking them.
+ * The mdoc(7) manual now explains how various standards
+ supported by the .St macro are related to each other.
+ --- BUGFIXES ---
+ * In the roff(7) parser, several bugs were fixed with respect
+ to closing conditional blocks on macro lines.
+ * Parsing of roff(7) identifiers and escape sequences was improved
+ in multiple respects.
+ * In the mdoc(7) parser, the handling of defective document
+ prologues was improved in multiple ways.
+ * The mdoc(7) parser no longer skips content before the first section
+ header, and it no longer deletes non-.% content from .Rs blocks.
+ * In the mdoc(7) parser, a crash was fixed related to weird .Sh headers.
+ * In the mdoc(7) parser, handling of .Sm with missing or invalid
+ arguments was corrected.
+ * In the mdoc(7) parser, trailing punctuation at the end of partial
+ implicit macros no longer triggers end-of-sentence spacing.
+ * In the terminal formatter, two crashes were fixed: one triggered by
+ excessive indentation and another by excessively long .Nm arguments.
+ * In the terminal formatter, a floating point rounding bug was
+ fixed that sometimes caused an off-by-one error in indentation.
+ * In the UTF-8 formatter, rendering of accents, breakable hyphens,
+ and non-breakable spaces was corrected.
+ * In the HTML formatter, encoding of special characters was
+ corrected in multiple respects.
+ * In the mdoc(7) formatter, rendering of .Ex and .Rv was
+ improved for various edge cases.
+ * In the mdoc(7) formatter, handling of empty .Bl -inset item
+ heads was improved.
+ * In the man(7) formatter, some bugs were fixed with respect
+ to same-line detection in the context of .TP and .nf macros,
+ and the indentation of .IP and .TP blocks was improved.
+ * The mandoc(3) library no longer prints to stderr.
+ --- THANKS TO ---
+ Abhinav Upadhyay (NetBSD), Andreas Voegele, Anthony Bentley (OpenBSD),
+ Christian Weisgerber (OpenBSD), Havard Eidnes (NetBSD), Jan Stary,
+ Jason McIntyre (OpenBSD), Jeremie Courreges-Anglas (OpenBSD),
+ Joerg Sonnenberger (NetBSD), Juan Francisco Cantero Hurtado (OpenBSD),
+ Marc Espie (OpenBSD), Matthias Scheler (NetBSD), Pascal Stumpf (OpenBSD),
+ Paul Onyschuk (Alpine Linux), Sebastien Marie, Steffen Nurpmeso,
+ Stuart Henderson (OpenBSD), Ted Unangst (OpenBSD), Theo de Raadt (OpenBSD),
+ Thomas Klausner (NetBSD), and Ulrich Spoerlein (FreeBSD)
+ for reporting bugs and missing features.
+
Changes in version 1.12.3, released on December 31, 2013
* In the mdoc(7) SYNOPSIS, line breaks and hanging indentation
diff --git a/TODO b/TODO
index 26f42c23b417..a41df2988270 100644
--- a/TODO
+++ b/TODO
@@ -1,13 +1,15 @@
************************************************************************
* Official mandoc TODO.
-* $Id: TODO,v 1.162 2013/12/25 14:40:34 schwarze Exp $
+* $Id: TODO,v 1.176 2014/08/09 14:24:53 schwarze Exp $
************************************************************************
************************************************************************
* crashes
************************************************************************
-None known.
+- The abort() in bufcat(), html.c, can be triggered via buffmt_includes()
+ by running -Thtml -Oincludes on a file containing a long .In argument.
+ Fixing this will probably require reworking the whole bufcat() concept.
************************************************************************
* missing features
@@ -15,11 +17,6 @@ None known.
--- missing roff features ----------------------------------------------
-- roff.c should treat \n(.H>23 and \n(.V>19 in the pod2man(1)
- preamble as true, see for example AUTHORS in MooseX::Getopt.3p
- reported by Andreas Voegele <mail at andreasvoegele dot com>
- Tue, 22 Nov 2011 15:34:47 +0100 on ports@
-
- .ad (adjust margins)
.ad l -- adjust left margin only (flush left)
.ad r -- adjust right margin only (flush right)
@@ -29,20 +26,9 @@ None known.
.ad -- re-enable adjustment without changing the mode
Adjustment mode is ignored while in no-fill mode (.nf).
-- .as (append to string)
- found by jca@ in ratpoison(1) Sun, 30 Jun 2013 12:01:09 +0200
-
-- .ce (center N lines)
- found by naddy@ in xloadimage(1)
- found by Juan Francisco Cantero Hurtado <iam at juanfra dot info>
- in lang/racket(1) Thu, 20 Jun 2013 03:19:11 +0200
-
- .fc (field control)
found by naddy@ in xloadimage(1)
-- .ll (line length)
- found by naddy@ in textproc/enchant(1) Sat, 12 Oct 2013 03:27:10 +0200
-
- .nr third argument (auto-increment step size, requires \n+)
found by bentley@ in sbcl(1) Mon, 9 Dec 2013 18:36:57 -0700
@@ -51,6 +37,7 @@ None known.
- .ta (tab settings) occurs in ircbug(1) and probably gnats(1)
reported by brad@ Sat, 15 Jan 2011 15:50:51 -0500
+ also Tcl_NewStringObj(3) via wiz@ Wed, 5 Mar 2014 22:27:43 +0100
- .ti (temporary indent)
found by naddy@ in xloadimage(1)
@@ -70,6 +57,10 @@ None known.
- \n+ and \n- numerical register increment and decrement
found by bentley@ in sbcl(1) Mon, 9 Dec 2013 18:36:57 -0700
+- \w'' width measurements
+ would not be very useful without an expression parser, see below
+ needed for Tcl_NewStringObj(3) via wiz@ Wed, 5 Mar 2014 22:27:43 +0100
+
- using undefined strings or macros defines them to be empty
wl@ Mon, 14 Nov 2011 14:37:01 +0000
@@ -96,6 +87,12 @@ None known.
because libmdoc does not yet use mandoc_getarg().
Also check what happens in plain text, it must be identical to \e.
+- .Bd -centered implies -filled, not -unfilled, which is not
+ easy to implement; it requires code similar to .ce, which
+ we don't have either.
+ Besides, groff has bug causing text right *before* .Bd -centered
+ to be centered as well.
+
- .Bd -filled should not be the same as .Bd -ragged, but align both
the left and right margin. In groff, it is implemented in terms
of .ad b, which we don't have either. Found in cksum(1).
@@ -129,10 +126,19 @@ None known.
- have a blank `It' head for `Bl -tag' not puke
+- check whether it is correct that `D1' uses INDENT+1;
+ does it need its own constant?
+
- prohibit `Nm' from having non-text HEAD children
(e.g., NetBSD mDNSShared/dns-sd.1)
(mdoc_html.c and mdoc_term.c `Nm' handlers can be slightly simplified)
+- support translated section names
+ e.g. x11/scrotwm scrotwm_es.1:21:2: error: NAME section must be first
+ that one uses NOMBRE because it is spanish...
+ deraadt tends to think that section-dependent macro behaviour
+ is a bad idea in the first place, so this may be irrelevant
+
- When there is free text in the SYNOPSIS and that free text contains
the .Nm macro, groff somehow understands to treat the .Nm as an in-line
macro, while mandoc treats it as a block macro and breaks the line.
@@ -143,18 +149,15 @@ None known.
--- missing man features -----------------------------------------------
-- groff an-ext.tmac macros (.UR, .UE) occur in xine(5)
- reported by brad@ Sat, 15 Jan 2011 15:45:23 -0500
- also occur in freeciv-client(6) freeciv-server(6) freeciv-modpack(6)
- reported by bentley@ Tue, 30 Oct 2012 01:05:57 -0600
-
- -T[x]html doesn't stipulate non-collapsing spaces in literal mode
--- missing tbl features -----------------------------------------------
-- implement basic non-parametric .de to support e.g. sox(1)
- reported by naddy@ Sat, 16 Oct 2010 23:51:57 +0200
- *** sox(1) still doesn't work, tbl(1) errors need investigation
+- look at the POSIX manuals in the books/man-pages-posix port,
+ they use some unsupported tbl(7) features.
+
+- investigate tbl(1) errors in sox(1)
+ see also naddy@ Sat, 16 Oct 2010 23:51:57 +0200
- allow standalone `.' to be interpreted as an end-of-layout
delimiter instead of being thrown away as a no-op roff line
@@ -165,14 +168,19 @@ None known.
- italic correction (\/) in PostScript mode
Werner LEMBERG on groff at gnu dot org Sun, 10 Nov 2013 12:47:46
-- The whatis(1) utility looks for whole words in Nm.
- If the file name of a page does not agree with the contents of any
- of its Nm macros (e.g. pool(9)), add the file name as an Nm entry
- to the mandoc.db as well, such that whatis(1) finds it.
- If there is a page with a file name that does not appear as a substring
- neither in Nm nor in Nd, the same fix would allow finding that page
- with apropos(1) using the file name as a key, as well.
- Issue reported by tedu@ Fri, 05 Jul 2013 21:15:23 -0400
+- When makewhatis(8) encounters a FATAL parse error,
+ it silently treats the file as formatted, which makes no sense
+ at all for paths like man1/foo.1 - and which also contradicts
+ what the manual says at the end of the description.
+ The end result will be ENOENT for file names returned
+ by mansearch() in manpage.file.
+
+- makewhatis(8) for preformatted pages:
+ parse the section number from the header line
+ and compare to the section number from the directory name
+
+- Does makewhatis(8) detect missing NAME sections, missing names,
+ and missing descriptions in all the file formats?
- clean up escape sequence handling, creating three classes:
(1) fully implemented, or parsed and ignored without loss of content
@@ -181,6 +189,16 @@ None known.
see textproc/mgdiff(1) for nice examples
(3) undefined, just output the character -> perhaps WARNING
+- kettenis wants base roff, ms, and me Fri, 1 Jan 2010 22:13:15 +0100 (CET)
+
+--- compatibility checks -----------------------------------------------
+
+- is .Bk implemented correctly in modern groff?
+ sobrado@ Tue, 19 Apr 2011 22:12:55 +0200
+
+- compare output to Heirloom roff, Solaris roff, and
+ http://repo.or.cz/w/neatroff.git http://litcave.rudi.ir/
+
- look at pages generated from reStructeredText, e.g. devel/mercurial hg(1)
These are a weird mixture of man(7) and custom autogenerated low-level
roff stuff. Figure out to what extent we can cope.
@@ -188,14 +206,24 @@ None known.
noted by stsp@ Sat, 24 Apr 2010 09:17:55 +0200
reminded by nicm@ Mon, 3 May 2010 09:52:41 +0100
+- look at pages generated from ronn(1) github.com/rtomayko/ronn
+ (based on markdown)
+
- look at pages generated from Texinfo source by yat2m, e.g. security/gnupg
First impression is not that bad.
+- look at pages generated by pandoc; see
+ https://github.com/jgm/pandoc/blob/master/src/Text/Pandoc/Writers/Man.hs
+ porting planned by kili@ Thu, 19 Jun 2014 19:46:28 +0200
+
- check compatibility with Plan9:
http://swtch.com/usr/local/plan9/tmac/tmac.an
http://swtch.com/plan9port/man/man7/man.html
"Anthony J. Bentley" <anthonyjbentley@gmail.com> 28 Dec 2010 21:58:40 -0700
+- check compatibility with the man(7) formatter
+ https://raw.githubusercontent.com/rofl0r/hardcore-utils/master/man.c
+
************************************************************************
* formatting issues: ugly output
************************************************************************
@@ -227,6 +255,10 @@ None known.
the right solution, it sends mandoc into an endless loop.
reported by Nicolas Joly Sat, 17 Nov 2012 11:49:54 +0100
+- global variables in the SYNOPSIS of section 3 pages
+ .Vt vs .Vt/.Va vs .Ft/.Va vs .Ft/.Fa ...
+ from kristaps@ Tue, 08 Jun 2010 11:13:32 +0200
+
- in enclosures, mandoc sometimes fancies a bogus end of sentence
reminded by jmc@ Thu, 23 Sep 2010 18:13:39 +0059
@@ -234,6 +266,23 @@ None known.
reveals lots of bugs both in groff and mandoc...
reported by bentley@ Wed, 22 May 2013 23:49:30 -0600
+--- PDF issues ---------------------------------------------------------
+
+- PDF output doesn't use a monospaced font for .Bd -literal
+ Example: "mandoc -Tpdf afterboot.8 > output.pdf && pdfviewer output.pdf".
+ Search the text "Routing tables".
+ Also check what PostScript mode does when fixing this.
+ reported by juanfra@ Wed, 04 Jun 2014 21:44:58 +0200
+
+--- HTML issues --------------------------------------------------------
+
+- <dl><dt><dd> formatting is ugly
+ hints are easy to find on the web, e.g.
+ http://stackoverflow.com/questions/1713048/
+ see also matthew@ Fri, 18 Jul 2014 19:25:12 -0700
+
+- check https://github.com/trentm/mdocml
+
************************************************************************
* formatting issues: gratuitous differences
************************************************************************
@@ -246,6 +295,10 @@ None known.
is just "o\bo".
see for example OpenBSD ksh(1)
+- In .Bl -enum -width 0n, groff continues one the same line after
+ the number, mandoc breaks the line.
+ mail to kristaps@ Mon, 20 Jul 2009 02:21:39 +0200
+
- .Pp between two .It in .Bl -column should produce one,
not two blank lines, see e.g. login.conf(5).
reported by jmc@ Sun, 17 Apr 2011 14:04:58 +0059
@@ -299,9 +352,57 @@ None known.
in dig(1).
************************************************************************
+* warning issues
+************************************************************************
+
+- check that MANDOCERR_BADTAB is thrown in the right cases,
+ i.e. when finding a literal tab character in fill mode,
+ and possibly change the wording of the warning message
+ to refer to fill mode, not literal mode
+ See the mail from Werner LEMBERG on the groff list,
+ Fri, 14 Feb 2014 18:54:42 +0100 (CET)
+
+- warn about "new sentence, new line"
+
+- mandoc_special does not really check the escape sequence,
+ but just the overall format
+
+- integrate mdoclint into mandoc ("end-of-line whitespace" thread)
+ from jmc@ Mon, 13 Jul 2009 17:12:09 +0100
+ from kristaps@ Mon, 13 Jul 2009 18:34:53 +0200
+ from jmc@ Mon, 13 Jul 2009 17:45:37 +0059
+ from kristaps@ Mon, 13 Jul 2009 19:02:03 +0200
+
+- -Tlint parser errors and warnings to stdout
+ to tech@mdocml, naddy@ Wed, 28 Sep 2011 11:21:46 +0200
+ wait! kristaps@ Sun, 02 Oct 2011 17:12:52 +0200
+
+- for system errors, use errno/strerror/warn/err
+
+************************************************************************
+* documentation issues
+************************************************************************
+
+- mention hyphenation rules:
+ breaking at letter-letter in text mode (not macro args)
+ proper hyphenation is unimplemented
+
+- talk about spacing around delimiters
+ to jmc@, kristaps@ Sat, 23 Apr 2011 17:41:27 +0200
+
+- mark macros as: page structure domain, manual domain, general text domain
+ is this useful?
+
+- mention /usr/share/misc/mdoc.template in mdoc(7)?
+
+************************************************************************
* performance issues
************************************************************************
+- Why are we using MAP_SHARED, not MAP_PRIVATE for mmap(2)?
+ How does SQLITE_CONFIG_PAGECACHE actually work? Document it!
+ from kristaps@ Sat, 09 Aug 2014 13:51:36 +0200
+
Several areas can be cleaned up to make mandoc even faster. These are
- improve hashing mechanism for macros (quite important: performance)
@@ -328,3 +429,9 @@ Several areas can be cleaned up to make mandoc even faster. These are
Decide which formats should be recognized where.
Update both mdoc(7) and man(7) documentation.
Triggered by Tim van der Molen Tue, 22 Feb 2011 20:30:45 +0100
+
+- Consider creating some views that will make the database more
+ readable from the sqlite3 shell. Consider using them to
+ abstract from the database structure, too.
+ suggested by espie@ Sat, 19 Apr 2014 14:52:57 +0200
+
diff --git a/apropos.1 b/apropos.1
index 5adfeb6c3109..14682420ff00 100644
--- a/apropos.1
+++ b/apropos.1
@@ -1,6 +1,7 @@
-.\" $Id: apropos.1,v 1.16.2.3 2013/10/05 01:25:20 schwarze Exp $
+.\" $Id: apropos.1,v 1.29 2014/04/24 00:28:19 schwarze Exp $
.\"
-.\" Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+.\" Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
+.\" Copyright (c) 2011, 2012, 2014 Ingo Schwarze <schwarze@openbsd.org>
.\"
.\" Permission to use, copy, modify, and distribute this software for any
.\" purpose with or without fee is hereby granted, provided that the above
@@ -14,41 +15,49 @@
.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
.\"
-.Dd $Mdocdate: October 5 2013 $
+.Dd $Mdocdate: April 24 2014 $
.Dt APROPOS 1
.Os
.Sh NAME
-.Nm apropos
+.Nm apropos ,
+.Nm whatis
.Nd search manual page databases
.Sh SYNOPSIS
.Nm
.Op Fl C Ar file
.Op Fl M Ar path
.Op Fl m Ar path
+.Op Fl O Ar outkey
.Op Fl S Ar arch
.Op Fl s Ar section
.Ar expression ...
.Sh DESCRIPTION
The
-.Nm
-utility queries manual page databases generated by
-.Xr mandocdb 8 ,
-evaluating on
+.Nm apropos
+and
+.Nm whatis
+utilities query manual page databases generated by
+.Xr makewhatis 8 ,
+evaluating
.Ar expression
for each file in each database.
+By default, it displays the names, section numbers, and description lines
+of all matching manuals.
.Pp
By default,
.Nm
searches for
-.Xr mandocdb 8
+.Xr makewhatis 8
databases in the default paths stipulated by
-.Xr man 1 ,
-parses terms as case-sensitive regular expressions
-.Pq the Li \&~ operator
+.Xr man 1
+and uses case-insensitive substring matching
+.Pq the Cm = No operator
over manual names and descriptions
.Pq the Li \&Nm No and Li \&Nd No macro keys .
Multiple terms imply pairwise
.Fl o .
+.Nm whatis
+maps terms only to case-sensitive manual names.
.Pp
Its arguments are as follows:
.Bl -tag -width Ds
@@ -61,22 +70,32 @@ format.
.It Fl M Ar path
Use the colon-separated path instead of the default list of paths
searched for
-.Xr mandocdb 8
+.Xr makewhatis 8
databases.
Invalid paths, or paths without manual databases, are ignored.
.It Fl m Ar path
Prepend the colon-separated paths to the list of paths searched
for
-.Xr mandocdb 8
+.Xr makewhatis 8
databases.
Invalid paths, or paths without manual databases, are ignored.
+.It Fl O Ar outkey
+Show the values associated with the key
+.Ar outkey
+instead of the manual descriptions.
.It Fl S Ar arch
-Search only for a particular architecture.
-.It Fl s Ar cat
-Search only for a manual section.
+Restrict the search to pages for the specified
+.Xr machine 1
+architecture.
+.Ar arch
+is case insensitive.
+By default, pages for all architectures are shown.
+.It Fl s Ar section
+Restrict the search to the specified section of the manual.
+By default, pages from all sections are shown.
See
.Xr man 1
-for a listing of manual sections.
+for a listing of sections.
.El
.Pp
An
@@ -103,34 +122,40 @@ True if both
and
.Ar expr2
are true (logical
-.Qq and ) .
+.Sq and ) .
.It Ar expr1 Oo Fl o Oc Ar expr2
True if
.Ar expr1
and/or
.Ar expr2
evaluate to true (logical
-.Qq or ) .
+.Sq or ) .
.It Ar term
True if
.Ar term
is satisfied.
This has syntax
-.Li [key[,key]*(=~)]?val ,
-where operand
-.Cm key
+.Sm off
+.Oo
+.Op Ar key Op , Ar key ...
+.Pq Cm = | ~
+.Oc
+.Ar val ,
+.Sm on
+where
+.Ar key
is an
.Xr mdoc 7
macro to query and
-.Cm val
+.Ar val
is its value.
See
.Sx Macro Keys
for a list of available keys.
Operator
-.Li \&=
+.Cm =
evaluates a substring, while
-.Li \&~
+.Cm ~
evaluates a regular expression.
.It Fl i Ar term
If
@@ -140,34 +165,38 @@ is evaluated case-insensitively.
Has no effect on substring terms.
.El
.Pp
-Results are sorted by manual title, with output formatted as
+.Nm whatis
+considers an
+.Ar expression
+to consist of an opaque keyword.
+.Pp
+Results are sorted by manual sections and names, with output formatted as
.Pp
-.D1 title(sec) \- description
+.D1 name[, name...](sec) \- description
.Pp
Where
-.Qq title
-is the manual's title (note multiple manual names may exist for one
-title),
-.Qq sec
+.Dq name
+is the manual's name,
+.Dq sec
is the manual section, and
-.Qq description
+.Dq description
is the manual's short description.
If an architecture is specified for the manual, it is displayed as
.Pp
-.D1 title(cat/arch) \- description
+.D1 name(sec/arch) \- description
.Pp
Resulting manuals may be accessed as
.Pp
-.Dl $ man \-s sec title
+.Dl $ man \-s sec name
.Pp
If an architecture is specified in the output, use
.Pp
-.Dl $ man \-s sec \-S arch title
+.Dl $ man \-s sec \-S arch name
.Ss Macro Keys
Queries evaluate over a subset of
.Xr mdoc 7
macros indexed by
-.Xr mandocdb 8 .
+.Xr makewhatis 8 .
In addition to the macro keys listed below, the special key
.Cm any
may be used to match any available macro key.
@@ -176,6 +205,8 @@ Names and description:
.Bl -column "xLix" description -offset indent -compact
.It Li \&Nm Ta manual name
.It Li \&Nd Ta one-line manual description
+.It Li arch Ta machine architecture (case-insensitive)
+.It Li sec Ta manual section number
.El
.Pp
Sections and cross references:
@@ -239,35 +270,31 @@ Text production:
.It Li \&Dx Ta Dx No version reference
.El
.Sh ENVIRONMENT
-.Bl -tag -width Ds
+.Bl -tag -width MANPATH
.It Ev MANPATH
-Colon-separated paths modifying the default list of paths searched for
-manual databases.
+The standard search path used by
+.Xr man 1
+may be changed by specifying a path in the
+.Ev MANPATH
+environment variable.
Invalid paths, or paths without manual databases, are ignored.
Overridden by
.Fl M .
If
.Ev MANPATH
-begins with a
-.Sq \&: ,
-it is appended to the default list;
-else if it ends with
-.Sq \&: ,
-it is prepended to the default list; else if it contains
-.Sq \&:: ,
-the default list is inserted between the colons.
-If none of these conditions are met, it overrides the default list.
+begins with a colon, it is appended to the default list;
+if it ends with a colon, it is prepended to the default list;
+or if it contains two adjacent colons,
+the standard search path is inserted between the colons.
+If none of these conditions are met, it overrides the
+standard search path.
.El
.Sh FILES
.Bl -tag -width "/etc/man.conf" -compact
.It Pa mandoc.db
name of the
-.Xr mandocdb 8
+.Xr makewhatis 8
keyword database
-.It Pa mandoc.index
-name of the
-.Xr mandocdb 8
-filename database
.It Pa /etc/man.conf
default
.Xr man 1
@@ -277,35 +304,84 @@ configuration file
.Ex -std
.Sh EXAMPLES
Search for
-.Qq mdoc
-as a substring and regular expression
-within each manual name and description:
+.Qq .cf
+as a substring of manual names and descriptions:
.Pp
-.Dl $ apropos mdoc
-.Dl $ apropos ~^mdoc$
+.Dl $ apropos .cf
.Pp
Include matches for
-.Qq roff
+.Qq .cnf
and
-.Qq man
-for the regular expression case:
+.Qq .conf
+as well:
.Pp
-.Dl $ apropos ~^mdoc$ roff man
-.Dl $ apropos ~^mdoc$ \-o roff \-o man
+.Dl $ apropos .cf .cnf .conf
.Pp
-Search for
+Search in names and descriptions using a regular expression:
+.Pp
+.Dl $ apropos '~set.?[ug]id'
+.Pp
+Search for manuals in the library section mentioning both the
.Qq optind
-and
+and the
.Qq optarg
-as variable names in the library category:
+variables:
.Pp
-.Dl $ apropos \-s 3 Va~^optind \-a Va~^optarg$
+.Dl $ apropos \-s 3 Va=optind \-a Va=optarg
+.Pp
+Do exactly the same as calling
+.Xr whatis 1
+with the argument
+.Qq ssh :
+.Pp
+.Dl $ apropos \-\- \-i 'Nm~[[:<:]]ssh[[:>:]]'
+.Pp
+The following two invocations are equivalent:
+.Pp
+.D1 Li $ apropos -S Ar arch Li -s Ar section expression
+.Bd -ragged -offset indent
+.Li $ apropos \e( Ar expression Li \e)
+.Li -a arch~^( Ns Ar arch Ns Li |any)$
+.Li -a sec~^ Ns Ar section Ns Li $
+.Ed
.Sh SEE ALSO
.Xr man 1 ,
.Xr re_format 7 ,
-.Xr mandocdb 8
-.Sh AUTHORS
+.Xr makewhatis 8
+.Sh HISTORY
+An
+.Nm
+utility first appeared in
+.Bx 2 .
+It was rewritten from scratch for
+.Ox 5.6 .
+.Pp
The
+.Fl M
+option and the
+.Ev MANPATH
+variable first appeared in
+.Bx 4.3 ;
+.Fl m
+in
+.Bx 4.3 Reno ;
+.Fl C
+in
+.Bx 4.4 Lite1 ;
+and
+.Fl S
+and
+.Fl s
+in
+.Ox 4.5 .
+.Sh AUTHORS
+.An -nosplit
+.An Bill Joy
+wrote the original
+.Bx
.Nm
-utility was written by
-.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv .
+in February 1979.
+The current version was written by
+.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv
+and
+.An Ingo Schwarze Aq Mt schwarze@openbsd.org .
diff --git a/apropos.c b/apropos.c
index f5d1425f8615..80b6bc6d036e 100644
--- a/apropos.c
+++ b/apropos.c
@@ -1,7 +1,7 @@
-/* $Id: apropos.c,v 1.27.2.1 2013/09/17 23:23:10 schwarze Exp $ */
+/* $Id: apropos.c,v 1.39 2014/04/20 16:46:04 schwarze Exp $ */
/*
- * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv>
+ * Copyright (c) 2013 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -22,30 +22,28 @@
#include <assert.h>
#include <getopt.h>
+#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <unistd.h>
-#include "apropos_db.h"
-#include "mandoc.h"
#include "manpath.h"
+#include "mansearch.h"
-static int cmp(const void *, const void *);
-static void list(struct res *, size_t, void *);
-
-static char *progname;
int
main(int argc, char *argv[])
{
- int ch, rc, whatis;
- struct res *res;
+ int ch, whatis;
+ struct mansearch search;
+ size_t i, sz;
+ struct manpage *res;
struct manpaths paths;
- size_t terms, ressz;
- struct opts opts;
- struct expr *e;
char *defpaths, *auxpaths;
char *conf_file;
+ char *progname;
+ const char *outkey;
extern char *optarg;
extern int optind;
@@ -58,30 +56,31 @@ main(int argc, char *argv[])
whatis = (0 == strncmp(progname, "whatis", 6));
memset(&paths, 0, sizeof(struct manpaths));
- memset(&opts, 0, sizeof(struct opts));
+ memset(&search, 0, sizeof(struct mansearch));
- ressz = 0;
- res = NULL;
auxpaths = defpaths = NULL;
conf_file = NULL;
- e = NULL;
+ outkey = "Nd";
- while (-1 != (ch = getopt(argc, argv, "C:M:m:S:s:")))
+ while (-1 != (ch = getopt(argc, argv, "C:M:m:O:S:s:")))
switch (ch) {
- case ('C'):
+ case 'C':
conf_file = optarg;
break;
- case ('M'):
+ case 'M':
defpaths = optarg;
break;
- case ('m'):
+ case 'm':
auxpaths = optarg;
break;
- case ('S'):
- opts.arch = optarg;
+ case 'O':
+ outkey = optarg;
+ break;
+ case 'S':
+ search.arch = optarg;
break;
- case ('s'):
- opts.cat = optarg;
+ case 's':
+ search.sec = optarg;
break;
default:
goto usage;
@@ -93,64 +92,32 @@ main(int argc, char *argv[])
if (0 == argc)
goto usage;
- rc = 0;
+ search.deftype = whatis ? TYPE_Nm : TYPE_Nm | TYPE_Nd;
+ search.flags = whatis ? MANSEARCH_WHATIS : 0;
manpath_parse(&paths, conf_file, defpaths, auxpaths);
+ mansearch_setup(1);
+ ch = mansearch(&search, &paths, argc, argv, outkey, &res, &sz);
+ manpath_free(&paths);
- e = whatis ? termcomp(argc, argv, &terms) :
- exprcomp(argc, argv, &terms);
-
- if (NULL == e) {
- fprintf(stderr, "%s: Bad expression\n", progname);
- goto out;
- }
-
- rc = apropos_search
- (paths.sz, paths.paths, &opts,
- e, terms, NULL, &ressz, &res, list);
+ if (0 == ch)
+ goto usage;
- if (0 == rc) {
- fprintf(stderr, "%s: Bad database\n", progname);
- goto out;
+ for (i = 0; i < sz; i++) {
+ printf("%s - %s\n", res[i].names,
+ NULL == res[i].output ? "" : res[i].output);
+ free(res[i].file);
+ free(res[i].names);
+ free(res[i].output);
}
-out:
- manpath_free(&paths);
- resfree(res, ressz);
- exprfree(e);
- return(rc ? EXIT_SUCCESS : EXIT_FAILURE);
-
+ free(res);
+ mansearch_setup(0);
+ return(sz ? EXIT_SUCCESS : EXIT_FAILURE);
usage:
fprintf(stderr, "usage: %s [-C file] [-M path] [-m path] "
+ "[-O outkey] "
"[-S arch] [-s section]%s ...\n", progname,
whatis ? " name" : "\n expression");
return(EXIT_FAILURE);
}
-
-/* ARGSUSED */
-static void
-list(struct res *res, size_t sz, void *arg)
-{
- size_t i;
-
- qsort(res, sz, sizeof(struct res), cmp);
-
- for (i = 0; i < sz; i++) {
- if ( ! res[i].matched)
- continue;
- printf("%s(%s%s%s) - %.70s\n",
- res[i].title,
- res[i].cat,
- *res[i].arch ? "/" : "",
- *res[i].arch ? res[i].arch : "",
- res[i].desc);
- }
-}
-
-static int
-cmp(const void *p1, const void *p2)
-{
-
- return(strcasecmp(((const struct res *)p1)->title,
- ((const struct res *)p2)->title));
-}
diff --git a/apropos_db.c b/apropos_db.c
deleted file mode 100644
index 786fc7bd8fd4..000000000000
--- a/apropos_db.c
+++ /dev/null
@@ -1,884 +0,0 @@
-/* $Id: apropos_db.c,v 1.32.2.3 2013/10/10 23:43:04 schwarze Exp $ */
-/*
- * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-#include <sys/param.h>
-
-#include <assert.h>
-#include <fcntl.h>
-#include <regex.h>
-#include <stdarg.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-
-#if defined(__APPLE__)
-# include <libkern/OSByteOrder.h>
-#elif defined(__linux__)
-# include <endian.h>
-#elif defined(__sun)
-# include <sys/byteorder.h>
-#else
-# include <sys/endian.h>
-#endif
-
-#if defined(__linux__) || defined(__sun)
-# include <db_185.h>
-#else
-# include <db.h>
-#endif
-
-#include "mandocdb.h"
-#include "apropos_db.h"
-#include "mandoc.h"
-
-#define RESFREE(_x) \
- do { \
- free((_x)->file); \
- free((_x)->cat); \
- free((_x)->title); \
- free((_x)->arch); \
- free((_x)->desc); \
- free((_x)->matches); \
- } while (/*CONSTCOND*/0)
-
-struct expr {
- int regex; /* is regex? */
- int index; /* index in match array */
- uint64_t mask; /* type-mask */
- int and; /* is rhs of logical AND? */
- char *v; /* search value */
- regex_t re; /* compiled re, if regex */
- struct expr *next; /* next in sequence */
- struct expr *subexpr;
-};
-
-struct type {
- uint64_t mask;
- const char *name;
-};
-
-struct rectree {
- struct res *node; /* record array for dir tree */
- int len; /* length of record array */
-};
-
-static const struct type types[] = {
- { TYPE_An, "An" },
- { TYPE_Ar, "Ar" },
- { TYPE_At, "At" },
- { TYPE_Bsx, "Bsx" },
- { TYPE_Bx, "Bx" },
- { TYPE_Cd, "Cd" },
- { TYPE_Cm, "Cm" },
- { TYPE_Dv, "Dv" },
- { TYPE_Dx, "Dx" },
- { TYPE_Em, "Em" },
- { TYPE_Er, "Er" },
- { TYPE_Ev, "Ev" },
- { TYPE_Fa, "Fa" },
- { TYPE_Fl, "Fl" },
- { TYPE_Fn, "Fn" },
- { TYPE_Fn, "Fo" },
- { TYPE_Ft, "Ft" },
- { TYPE_Fx, "Fx" },
- { TYPE_Ic, "Ic" },
- { TYPE_In, "In" },
- { TYPE_Lb, "Lb" },
- { TYPE_Li, "Li" },
- { TYPE_Lk, "Lk" },
- { TYPE_Ms, "Ms" },
- { TYPE_Mt, "Mt" },
- { TYPE_Nd, "Nd" },
- { TYPE_Nm, "Nm" },
- { TYPE_Nx, "Nx" },
- { TYPE_Ox, "Ox" },
- { TYPE_Pa, "Pa" },
- { TYPE_Rs, "Rs" },
- { TYPE_Sh, "Sh" },
- { TYPE_Ss, "Ss" },
- { TYPE_St, "St" },
- { TYPE_Sy, "Sy" },
- { TYPE_Tn, "Tn" },
- { TYPE_Va, "Va" },
- { TYPE_Va, "Vt" },
- { TYPE_Xr, "Xr" },
- { UINT64_MAX, "any" },
- { 0, NULL }
-};
-
-static DB *btree_open(void);
-static int btree_read(const DBT *, const DBT *,
- const struct mchars *,
- uint64_t *, recno_t *, char **);
-static int expreval(const struct expr *, int *);
-static void exprexec(const struct expr *,
- const char *, uint64_t, struct res *);
-static int exprmark(const struct expr *,
- const char *, uint64_t, int *);
-static struct expr *exprexpr(int, char *[], int *, int *, size_t *);
-static struct expr *exprterm(char *, int);
-static DB *index_open(void);
-static int index_read(const DBT *, const DBT *, int,
- const struct mchars *, struct res *);
-static void norm_string(const char *,
- const struct mchars *, char **);
-static size_t norm_utf8(unsigned int, char[7]);
-static int single_search(struct rectree *, const struct opts *,
- const struct expr *, size_t terms,
- struct mchars *, int);
-
-/*
- * Open the keyword mandoc-db database.
- */
-static DB *
-btree_open(void)
-{
- BTREEINFO info;
- DB *db;
-
- memset(&info, 0, sizeof(BTREEINFO));
- info.lorder = 4321;
- info.flags = R_DUP;
-
- db = dbopen(MANDOC_DB, O_RDONLY, 0, DB_BTREE, &info);
- if (NULL != db)
- return(db);
-
- return(NULL);
-}
-
-/*
- * Read a keyword from the database and normalise it.
- * Return 0 if the database is insane, else 1.
- */
-static int
-btree_read(const DBT *k, const DBT *v, const struct mchars *mc,
- uint64_t *mask, recno_t *rec, char **buf)
-{
- uint64_t vbuf[2];
-
- /* Are our sizes sane? */
- if (k->size < 2 || sizeof(vbuf) != v->size)
- return(0);
-
- /* Is our string nil-terminated? */
- if ('\0' != ((const char *)k->data)[(int)k->size - 1])
- return(0);
-
- norm_string((const char *)k->data, mc, buf);
- memcpy(vbuf, v->data, v->size);
- *mask = betoh64(vbuf[0]);
- *rec = betoh64(vbuf[1]);
- return(1);
-}
-
-/*
- * Take a Unicode codepoint and produce its UTF-8 encoding.
- * This isn't the best way to do this, but it works.
- * The magic numbers are from the UTF-8 packaging.
- * They're not as scary as they seem: read the UTF-8 spec for details.
- */
-static size_t
-norm_utf8(unsigned int cp, char out[7])
-{
- int rc;
-
- rc = 0;
-
- if (cp <= 0x0000007F) {
- rc = 1;
- out[0] = (char)cp;
- } else if (cp <= 0x000007FF) {
- rc = 2;
- out[0] = (cp >> 6 & 31) | 192;
- out[1] = (cp & 63) | 128;
- } else if (cp <= 0x0000FFFF) {
- rc = 3;
- out[0] = (cp >> 12 & 15) | 224;
- out[1] = (cp >> 6 & 63) | 128;
- out[2] = (cp & 63) | 128;
- } else if (cp <= 0x001FFFFF) {
- rc = 4;
- out[0] = (cp >> 18 & 7) | 240;
- out[1] = (cp >> 12 & 63) | 128;
- out[2] = (cp >> 6 & 63) | 128;
- out[3] = (cp & 63) | 128;
- } else if (cp <= 0x03FFFFFF) {
- rc = 5;
- out[0] = (cp >> 24 & 3) | 248;
- out[1] = (cp >> 18 & 63) | 128;
- out[2] = (cp >> 12 & 63) | 128;
- out[3] = (cp >> 6 & 63) | 128;
- out[4] = (cp & 63) | 128;
- } else if (cp <= 0x7FFFFFFF) {
- rc = 6;
- out[0] = (cp >> 30 & 1) | 252;
- out[1] = (cp >> 24 & 63) | 128;
- out[2] = (cp >> 18 & 63) | 128;
- out[3] = (cp >> 12 & 63) | 128;
- out[4] = (cp >> 6 & 63) | 128;
- out[5] = (cp & 63) | 128;
- } else
- return(0);
-
- out[rc] = '\0';
- return((size_t)rc);
-}
-
-/*
- * Normalise strings from the index and database.
- * These strings are escaped as defined by mandoc_char(7) along with
- * other goop in mandoc.h (e.g., soft hyphens).
- * This function normalises these into a nice UTF-8 string.
- * Returns 0 if the database is fucked.
- */
-static void
-norm_string(const char *val, const struct mchars *mc, char **buf)
-{
- size_t sz, bsz;
- char utfbuf[7];
- const char *seq, *cpp;
- int len, u, pos;
- enum mandoc_esc esc;
- static const char res[] = { '\\', '\t',
- ASCII_NBRSP, ASCII_HYPH, '\0' };
-
- /* Pre-allocate by the length of the input */
-
- bsz = strlen(val) + 1;
- *buf = mandoc_realloc(*buf, bsz);
- pos = 0;
-
- while ('\0' != *val) {
- /*
- * Halt on the first escape sequence.
- * This also halts on the end of string, in which case
- * we just copy, fallthrough, and exit the loop.
- */
- if ((sz = strcspn(val, res)) > 0) {
- memcpy(&(*buf)[pos], val, sz);
- pos += (int)sz;
- val += (int)sz;
- }
-
- if (ASCII_HYPH == *val) {
- (*buf)[pos++] = '-';
- val++;
- continue;
- } else if ('\t' == *val || ASCII_NBRSP == *val) {
- (*buf)[pos++] = ' ';
- val++;
- continue;
- } else if ('\\' != *val)
- break;
-
- /* Read past the slash. */
-
- val++;
- u = 0;
-
- /*
- * Parse the escape sequence and see if it's a
- * predefined character or special character.
- */
-
- esc = mandoc_escape(&val, &seq, &len);
- if (ESCAPE_ERROR == esc)
- break;
-
- /*
- * XXX - this just does UTF-8, but we need to know
- * beforehand whether we should do text substitution.
- */
-
- switch (esc) {
- case (ESCAPE_SPECIAL):
- if (0 != (u = mchars_spec2cp(mc, seq, len)))
- break;
- /* FALLTHROUGH */
- default:
- continue;
- }
-
- /*
- * If we have a Unicode codepoint, try to convert that
- * to a UTF-8 byte string.
- */
-
- cpp = utfbuf;
- if (0 == (sz = norm_utf8(u, utfbuf)))
- continue;
-
- /* Copy the rendered glyph into the stream. */
-
- sz = strlen(cpp);
- bsz += sz;
-
- *buf = mandoc_realloc(*buf, bsz);
-
- memcpy(&(*buf)[pos], cpp, sz);
- pos += (int)sz;
- }
-
- (*buf)[pos] = '\0';
-}
-
-/*
- * Open the filename-index mandoc-db database.
- * Returns NULL if opening failed.
- */
-static DB *
-index_open(void)
-{
- DB *db;
-
- db = dbopen(MANDOC_IDX, O_RDONLY, 0, DB_RECNO, NULL);
- if (NULL != db)
- return(db);
-
- return(NULL);
-}
-
-/*
- * Safely unpack from an index file record into the structure.
- * Returns 1 if an entry was unpacked, 0 if the database is insane.
- */
-static int
-index_read(const DBT *key, const DBT *val, int index,
- const struct mchars *mc, struct res *rec)
-{
- size_t left;
- char *np, *cp;
- char type;
-
-#define INDEX_BREAD(_dst) \
- do { \
- if (NULL == (np = memchr(cp, '\0', left))) \
- return(0); \
- norm_string(cp, mc, &(_dst)); \
- left -= (np - cp) + 1; \
- cp = np + 1; \
- } while (/* CONSTCOND */ 0)
-
- if (0 == (left = val->size))
- return(0);
-
- cp = val->data;
- assert(sizeof(recno_t) == key->size);
- memcpy(&rec->rec, key->data, key->size);
- rec->volume = index;
-
- if ('d' == (type = *cp++))
- rec->type = RESTYPE_MDOC;
- else if ('a' == type)
- rec->type = RESTYPE_MAN;
- else if ('c' == type)
- rec->type = RESTYPE_CAT;
- else
- return(0);
-
- left--;
- INDEX_BREAD(rec->file);
- INDEX_BREAD(rec->cat);
- INDEX_BREAD(rec->title);
- INDEX_BREAD(rec->arch);
- INDEX_BREAD(rec->desc);
- return(1);
-}
-
-/*
- * Search mandocdb databases in paths for expression "expr".
- * Filter out by "opts".
- * Call "res" with the results, which may be zero.
- * Return 0 if there was a database error, else return 1.
- */
-int
-apropos_search(int pathsz, char **paths, const struct opts *opts,
- const struct expr *expr, size_t terms, void *arg,
- size_t *sz, struct res **resp,
- void (*res)(struct res *, size_t, void *))
-{
- struct rectree tree;
- struct mchars *mc;
- int i;
-
- memset(&tree, 0, sizeof(struct rectree));
-
- mc = mchars_alloc();
- *sz = 0;
- *resp = NULL;
-
- /*
- * Main loop. Change into the directory containing manpage
- * databases. Run our expession over each database in the set.
- */
-
- for (i = 0; i < pathsz; i++) {
- assert('/' == paths[i][0]);
- if (chdir(paths[i]))
- continue;
- if (single_search(&tree, opts, expr, terms, mc, i))
- continue;
-
- resfree(tree.node, tree.len);
- mchars_free(mc);
- return(0);
- }
-
- (*res)(tree.node, tree.len, arg);
- *sz = tree.len;
- *resp = tree.node;
- mchars_free(mc);
- return(1);
-}
-
-static int
-single_search(struct rectree *tree, const struct opts *opts,
- const struct expr *expr, size_t terms,
- struct mchars *mc, int vol)
-{
- int root, leaf, ch;
- DBT key, val;
- DB *btree, *idx;
- char *buf;
- struct res *rs;
- struct res r;
- uint64_t mask;
- recno_t rec;
-
- root = -1;
- leaf = -1;
- btree = NULL;
- idx = NULL;
- buf = NULL;
- rs = tree->node;
-
- memset(&r, 0, sizeof(struct res));
-
- if (NULL == (btree = btree_open()))
- return(1);
-
- if (NULL == (idx = index_open())) {
- (*btree->close)(btree);
- return(1);
- }
-
- while (0 == (ch = (*btree->seq)(btree, &key, &val, R_NEXT))) {
- if ( ! btree_read(&key, &val, mc, &mask, &rec, &buf))
- break;
-
- /*
- * See if this keyword record matches any of the
- * expressions we have stored.
- */
- if ( ! exprmark(expr, buf, mask, NULL))
- continue;
-
- /*
- * O(log n) scan for prior records. Since a record
- * number is unbounded, this has decent performance over
- * a complex hash function.
- */
-
- for (leaf = root; leaf >= 0; )
- if (rec > rs[leaf].rec &&
- rs[leaf].rhs >= 0)
- leaf = rs[leaf].rhs;
- else if (rec < rs[leaf].rec &&
- rs[leaf].lhs >= 0)
- leaf = rs[leaf].lhs;
- else
- break;
-
- /*
- * If we find a record, see if it has already evaluated
- * to true. If it has, great, just keep going. If not,
- * try to evaluate it now and continue anyway.
- */
-
- if (leaf >= 0 && rs[leaf].rec == rec) {
- if (0 == rs[leaf].matched)
- exprexec(expr, buf, mask, &rs[leaf]);
- continue;
- }
-
- /*
- * We have a new file to examine.
- * Extract the manpage's metadata from the index
- * database, then begin partial evaluation.
- */
-
- key.data = &rec;
- key.size = sizeof(recno_t);
-
- if (0 != (*idx->get)(idx, &key, &val, 0))
- break;
-
- r.lhs = r.rhs = -1;
- if ( ! index_read(&key, &val, vol, mc, &r))
- break;
-
- /* XXX: this should be elsewhere, I guess? */
-
- if (opts->cat && strcasecmp(opts->cat, r.cat))
- continue;
-
- if (opts->arch && *r.arch)
- if (strcasecmp(opts->arch, r.arch))
- continue;
-
- tree->node = rs = mandoc_realloc
- (rs, (tree->len + 1) * sizeof(struct res));
-
- memcpy(&rs[tree->len], &r, sizeof(struct res));
- memset(&r, 0, sizeof(struct res));
- rs[tree->len].matches =
- mandoc_calloc(terms, sizeof(int));
-
- exprexec(expr, buf, mask, &rs[tree->len]);
-
- /* Append to our tree. */
-
- if (leaf >= 0) {
- if (rec > rs[leaf].rec)
- rs[leaf].rhs = tree->len;
- else
- rs[leaf].lhs = tree->len;
- } else
- root = tree->len;
-
- tree->len++;
- }
-
- (*btree->close)(btree);
- (*idx->close)(idx);
-
- free(buf);
- RESFREE(&r);
- return(1 == ch);
-}
-
-void
-resfree(struct res *rec, size_t sz)
-{
- size_t i;
-
- for (i = 0; i < sz; i++)
- RESFREE(&rec[i]);
- free(rec);
-}
-
-/*
- * Compile a list of straight-up terms.
- * The arguments are re-written into ~[[:<:]]term[[:>:]], or "term"
- * surrounded by word boundaries, then pumped through exprterm().
- * Terms are case-insensitive.
- * This emulates whatis(1) behaviour.
- */
-struct expr *
-termcomp(int argc, char *argv[], size_t *tt)
-{
- char *buf;
- int pos;
- struct expr *e, *next;
- size_t sz;
-
- buf = NULL;
- e = NULL;
- *tt = 0;
-
- for (pos = argc - 1; pos >= 0; pos--) {
- sz = strlen(argv[pos]) + 18;
- buf = mandoc_realloc(buf, sz);
- strlcpy(buf, "Nm~[[:<:]]", sz);
- strlcat(buf, argv[pos], sz);
- strlcat(buf, "[[:>:]]", sz);
- if (NULL == (next = exprterm(buf, 0))) {
- free(buf);
- exprfree(e);
- return(NULL);
- }
- next->next = e;
- e = next;
- (*tt)++;
- }
-
- free(buf);
- return(e);
-}
-
-/*
- * Compile a sequence of logical expressions.
- * See apropos.1 for a grammar of this sequence.
- */
-struct expr *
-exprcomp(int argc, char *argv[], size_t *tt)
-{
- int pos, lvl;
- struct expr *e;
-
- pos = lvl = 0;
- *tt = 0;
-
- e = exprexpr(argc, argv, &pos, &lvl, tt);
-
- if (0 == lvl && pos >= argc)
- return(e);
-
- exprfree(e);
- return(NULL);
-}
-
-/*
- * Compile an array of tokens into an expression.
- * An informal expression grammar is defined in apropos(1).
- * Return NULL if we fail doing so. All memory will be cleaned up.
- * Return the root of the expression sequence if alright.
- */
-static struct expr *
-exprexpr(int argc, char *argv[], int *pos, int *lvl, size_t *tt)
-{
- struct expr *e, *first, *next;
- int log;
-
- first = next = NULL;
-
- for ( ; *pos < argc; (*pos)++) {
- e = next;
-
- /*
- * Close out a subexpression.
- */
-
- if (NULL != e && 0 == strcmp(")", argv[*pos])) {
- if (--(*lvl) < 0)
- goto err;
- break;
- }
-
- /*
- * Small note: if we're just starting, don't let "-a"
- * and "-o" be considered logical operators: they're
- * just tokens unless pairwise joining, in which case we
- * record their existence (or assume "OR").
- */
- log = 0;
-
- if (NULL != e && 0 == strcmp("-a", argv[*pos]))
- log = 1;
- else if (NULL != e && 0 == strcmp("-o", argv[*pos]))
- log = 2;
-
- if (log > 0 && ++(*pos) >= argc)
- goto err;
-
- /*
- * Now we parse the term part. This can begin with
- * "-i", in which case the expression is case
- * insensitive.
- */
-
- if (0 == strcmp("(", argv[*pos])) {
- ++(*pos);
- ++(*lvl);
- next = mandoc_calloc(1, sizeof(struct expr));
- next->subexpr = exprexpr(argc, argv, pos, lvl, tt);
- if (NULL == next->subexpr) {
- free(next);
- next = NULL;
- }
- } else if (0 == strcmp("-i", argv[*pos])) {
- if (++(*pos) >= argc)
- goto err;
- next = exprterm(argv[*pos], 0);
- } else
- next = exprterm(argv[*pos], 1);
-
- if (NULL == next)
- goto err;
-
- next->and = log == 1;
- next->index = (int)(*tt)++;
-
- /* Append to our chain of expressions. */
-
- if (NULL == first) {
- assert(NULL == e);
- first = next;
- } else {
- assert(NULL != e);
- e->next = next;
- }
- }
-
- return(first);
-err:
- exprfree(first);
- return(NULL);
-}
-
-/*
- * Parse a terminal expression with the grammar as defined in
- * apropos(1).
- * Return NULL if we fail the parse.
- */
-static struct expr *
-exprterm(char *buf, int cs)
-{
- struct expr e;
- struct expr *p;
- char *key;
- int i;
-
- memset(&e, 0, sizeof(struct expr));
-
- /* Choose regex or substring match. */
-
- if (NULL == (e.v = strpbrk(buf, "=~"))) {
- e.regex = 0;
- e.v = buf;
- } else {
- e.regex = '~' == *e.v;
- *e.v++ = '\0';
- }
-
- /* Determine the record types to search for. */
-
- e.mask = 0;
- if (buf < e.v) {
- while (NULL != (key = strsep(&buf, ","))) {
- i = 0;
- while (types[i].mask &&
- strcmp(types[i].name, key))
- i++;
- e.mask |= types[i].mask;
- }
- }
- if (0 == e.mask)
- e.mask = TYPE_Nm | TYPE_Nd;
-
- if (e.regex) {
- i = REG_EXTENDED | REG_NOSUB | (cs ? 0 : REG_ICASE);
- if (regcomp(&e.re, e.v, i))
- return(NULL);
- }
-
- e.v = mandoc_strdup(e.v);
-
- p = mandoc_calloc(1, sizeof(struct expr));
- memcpy(p, &e, sizeof(struct expr));
- return(p);
-}
-
-void
-exprfree(struct expr *p)
-{
- struct expr *pp;
-
- while (NULL != p) {
- if (p->subexpr)
- exprfree(p->subexpr);
- if (p->regex)
- regfree(&p->re);
- free(p->v);
- pp = p->next;
- free(p);
- p = pp;
- }
-}
-
-static int
-exprmark(const struct expr *p, const char *cp,
- uint64_t mask, int *ms)
-{
-
- for ( ; p; p = p->next) {
- if (p->subexpr) {
- if (exprmark(p->subexpr, cp, mask, ms))
- return(1);
- continue;
- } else if ( ! (mask & p->mask))
- continue;
-
- if (p->regex) {
- if (regexec(&p->re, cp, 0, NULL, 0))
- continue;
- } else if (NULL == strcasestr(cp, p->v))
- continue;
-
- if (NULL == ms)
- return(1);
- else
- ms[p->index] = 1;
- }
-
- return(0);
-}
-
-static int
-expreval(const struct expr *p, int *ms)
-{
- int match;
-
- /*
- * AND has precedence over OR. Analysis is left-right, though
- * it doesn't matter because there are no side-effects.
- * Thus, step through pairwise ANDs and accumulate their Boolean
- * evaluation. If we encounter a single true AND collection or
- * standalone term, the whole expression is true (by definition
- * of OR).
- */
-
- for (match = 0; p && ! match; p = p->next) {
- /* Evaluate a subexpression, if applicable. */
- if (p->subexpr && ! ms[p->index])
- ms[p->index] = expreval(p->subexpr, ms);
-
- match = ms[p->index];
- for ( ; p->next && p->next->and; p = p->next) {
- /* Evaluate a subexpression, if applicable. */
- if (p->next->subexpr && ! ms[p->next->index])
- ms[p->next->index] =
- expreval(p->next->subexpr, ms);
- match = match && ms[p->next->index];
- }
- }
-
- return(match);
-}
-
-/*
- * First, update the array of terms for which this expression evaluates
- * to true.
- * Second, logically evaluate all terms over the updated array of truth
- * values.
- * If this evaluates to true, mark the expression as satisfied.
- */
-static void
-exprexec(const struct expr *e, const char *cp,
- uint64_t mask, struct res *r)
-{
-
- assert(0 == r->matched);
- exprmark(e, cp, mask, r->matches);
- r->matched = expreval(e, r->matches);
-}
diff --git a/apropos_db.h b/apropos_db.h
deleted file mode 100644
index 72d4c204a391..000000000000
--- a/apropos_db.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/* $Id: apropos_db.h,v 1.13 2012/03/24 01:46:25 kristaps Exp $ */
-/*
- * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-#ifndef APROPOS_H
-#define APROPOS_H
-
-enum restype {
- RESTYPE_MAN, /* man(7) file */
- RESTYPE_MDOC, /* mdoc(7) file */
- RESTYPE_CAT /* pre-formatted file */
-};
-
-struct res {
- enum restype type; /* input file type */
- char *file; /* file in file-system */
- char *cat; /* category (3p, 3, etc.) */
- char *title; /* title (FOO, etc.) */
- char *arch; /* arch (or empty string) */
- char *desc; /* description (from Nd) */
- unsigned int rec; /* record in index */
- /*
- * The index volume. This indexes into the array of directories
- * searched for manual page databases.
- */
- unsigned int volume;
- /*
- * The following fields are used internally.
- *
- * Maintain a binary tree for checking the uniqueness of `rec'
- * when adding elements to the results array.
- * Since the results array is dynamic, use offset in the array
- * instead of a pointer to the structure.
- */
- int lhs;
- int rhs;
- int matched; /* expression is true */
- int *matches; /* partial truth evaluations */
-};
-
-struct opts {
- const char *arch; /* restrict to architecture */
- const char *cat; /* restrict to manual section */
-};
-
-__BEGIN_DECLS
-
-struct expr;
-
-int apropos_search(int, char **, const struct opts *,
- const struct expr *, size_t,
- void *, size_t *, struct res **,
- void (*)(struct res *, size_t, void *));
-struct expr *exprcomp(int, char *[], size_t *);
-void exprfree(struct expr *);
-void resfree(struct res *, size_t);
-struct expr *termcomp(int, char *[], size_t *);
-
-__END_DECLS
-
-#endif /*!APROPOS_H*/
diff --git a/arch.c b/arch.c
index e764bfe9931f..3e746d8337ca 100644
--- a/arch.c
+++ b/arch.c
@@ -1,4 +1,4 @@
-/* $Id: arch.c,v 1.9 2011/03/22 14:33:05 kristaps Exp $ */
+/* $Id: arch.c,v 1.11 2014/04/20 16:46:04 schwarze Exp $ */
/*
* Copyright (c) 2009 Kristaps Dzonsons <kristaps@bsd.lv>
*
@@ -18,22 +18,20 @@
#include "config.h"
#endif
-#include <stdlib.h>
#include <string.h>
-#include <time.h>
#include "mdoc.h"
-#include "mandoc.h"
#include "libmdoc.h"
#define LINE(x, y) \
if (0 == strcmp(p, x)) return(y);
+
const char *
mdoc_a2arch(const char *p)
{
-#include "arch.in"
+#include "arch.in"
return(NULL);
}
diff --git a/arch.in b/arch.in
index d0c445f308b2..a22ffd58ba7b 100644
--- a/arch.in
+++ b/arch.in
@@ -1,4 +1,4 @@
-/* $Id: arch.in,v 1.14 2013/09/16 22:12:57 schwarze Exp $ */
+/* $Id: arch.in,v 1.15 2014/04/27 22:42:15 schwarze Exp $ */
/*
* Copyright (c) 2009 Kristaps Dzonsons <kristaps@bsd.lv>
*
@@ -65,8 +65,8 @@ LINE("ibmnws", "IBMNWS")
LINE("iyonix", "Iyonix")
LINE("landisk", "LANDISK")
LINE("loongson", "Loongson")
-LINE("luna68k", "Luna68k")
-LINE("luna88k", "Luna88k")
+LINE("luna68k", "LUNA68K")
+LINE("luna88k", "LUNA88K")
LINE("m68k", "m68k")
LINE("mac68k", "Mac68k")
LINE("macppc", "MacPPC")
diff --git a/att.c b/att.c
index 24d757ddf75b..059639af37aa 100644
--- a/att.c
+++ b/att.c
@@ -1,4 +1,4 @@
-/* $Id: att.c,v 1.9 2011/03/22 14:33:05 kristaps Exp $ */
+/* $Id: att.c,v 1.11 2014/04/20 16:46:04 schwarze Exp $ */
/*
* Copyright (c) 2009 Kristaps Dzonsons <kristaps@bsd.lv>
*
@@ -18,22 +18,20 @@
#include "config.h"
#endif
-#include <stdlib.h>
#include <string.h>
-#include <time.h>
#include "mdoc.h"
-#include "mandoc.h"
#include "libmdoc.h"
#define LINE(x, y) \
if (0 == strcmp(p, x)) return(y);
+
const char *
mdoc_a2att(const char *p)
{
-#include "att.in"
+#include "att.in"
return(NULL);
}
diff --git a/catman.8 b/catman.8
deleted file mode 100644
index f5246f9a6ced..000000000000
--- a/catman.8
+++ /dev/null
@@ -1,111 +0,0 @@
-.\" $Id: catman.8,v 1.5 2011/12/25 19:35:44 kristaps Exp $
-.\"
-.\" Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
-.\"
-.\" Permission to use, copy, modify, and distribute this software for any
-.\" purpose with or without fee is hereby granted, provided that the above
-.\" copyright notice and this permission notice appear in all copies.
-.\"
-.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-.\"
-.Dd $Mdocdate: December 25 2011 $
-.Dt CATMAN 8
-.Os
-.Sh NAME
-.Nm catman
-.Nd update a man.cgi manpage cache
-.Sh SYNOPSIS
-.Nm catman
-.Op Fl fv
-.Op Fl C Ar file
-.Op Fl M Ar manpath
-.Op Fl m Ar manpath
-.Op Fl o Ar path
-.Sh DESCRIPTION
-The
-.Nm
-utility updates cached manpages for a jailed
-.Xr man.cgi 7 .
-.Pp
-By default,
-.Nm
-searches for
-.Xr mandocdb 8
-databases in the default paths stipulated by
-.Xr man 1
-and updates the cache in
-.Pa /var/www/cache/man.cgi .
-.Pp
-Its arguments are as follows:
-.Bl -tag -width Ds
-.It Fl f
-Force an update to all files.
-.It Fl v
-Print each file being updated.
-.It Fl C Ar file
-Specify an alternative configuration
-.Ar file
-in
-.Xr man.conf 5
-format.
-.It Fl M Ar manpath
-Use the colon-separated path instead of the default list of paths
-searched for
-.Xr mandocdb 8
-databases.
-Invalid paths, or paths without manual databases, are ignored.
-.It Fl m Ar manpath
-Prepend the colon-separated paths to the list of paths searched
-for
-.Xr mandocdb 8
-databases.
-Invalid paths, or paths without manual databases, are ignored.
-.It Fl o Ar path
-Update into the directory tree under
-.Ar path .
-.El
-.Pp
-Cache updates occur when a
-.Xr mandocdb 8
-database is older than the cached copy unless
-.Fl f
-is specified, in which case files are always considered out of date.
-Cached manual pages are only updated if older than the master copy.
-.Sh ENVIRONMENT
-.Bl -tag -width Ds
-.It Ev MANPATH
-Colon-separated paths modifying the default list of paths searched for
-manual databases.
-Invalid paths, or paths without manual databases, are ignored.
-Overridden by
-.Fl M .
-If
-.Ev MANPATH
-begins with a
-.Sq \&: ,
-it is appended to the default list;
-else if it ends with
-.Sq \&: ,
-it is prepended to the default list; else if it contains
-.Sq \&:: ,
-the default list is inserted between the colons.
-If none of these conditions are met, it overrides the default list.
-.El
-.Sh EXIT STATUS
-.Ex -std
-.Sh SEE ALSO
-.Xr mandoc 1 ,
-.Xr man.cgi 7 ,
-.Xr mandocdb 8
-.Sh AUTHORS
-The
-.Nm
-utility was written by
-.An Kristaps Dzonsons ,
-.Mt kristaps@bsd.lv .
diff --git a/catman.c b/catman.c
deleted file mode 100644
index 8767e5e3f6db..000000000000
--- a/catman.c
+++ /dev/null
@@ -1,509 +0,0 @@
-/* $Id: catman.c,v 1.11.2.2 2013/10/11 00:06:48 schwarze Exp $ */
-/*
- * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-#include <sys/param.h>
-#include <sys/stat.h>
-#include <sys/wait.h>
-
-#include <assert.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <getopt.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-
-#if defined(__linux__) || defined(__sun)
-# include <db_185.h>
-#else
-# include <db.h>
-#endif
-
-#include "manpath.h"
-#include "mandocdb.h"
-
-#define xstrlcpy(_dst, _src, _sz) \
- do if (strlcpy((_dst), (_src), (_sz)) >= (_sz)) { \
- fprintf(stderr, "%s: Path too long", (_dst)); \
- exit(EXIT_FAILURE); \
- } while (/* CONSTCOND */0)
-
-#define xstrlcat(_dst, _src, _sz) \
- do if (strlcat((_dst), (_src), (_sz)) >= (_sz)) { \
- fprintf(stderr, "%s: Path too long", (_dst)); \
- exit(EXIT_FAILURE); \
- } while (/* CONSTCOND */0)
-
-static int indexhtml(char *, size_t, char *, size_t);
-static int manup(const struct manpaths *, char *);
-static int mkpath(char *, mode_t, mode_t);
-static int treecpy(char *, char *);
-static int update(char *, char *);
-static void usage(void);
-
-static const char *progname;
-static int verbose;
-static int force;
-
-int
-main(int argc, char *argv[])
-{
- int ch;
- char *aux, *base, *conf_file;
- struct manpaths dirs;
- char buf[MAXPATHLEN];
- extern char *optarg;
- extern int optind;
-
- progname = strrchr(argv[0], '/');
- if (progname == NULL)
- progname = argv[0];
- else
- ++progname;
-
- aux = base = conf_file = NULL;
- xstrlcpy(buf, "/var/www/cache/man.cgi", MAXPATHLEN);
-
- while (-1 != (ch = getopt(argc, argv, "C:fm:M:o:v")))
- switch (ch) {
- case ('C'):
- conf_file = optarg;
- break;
- case ('f'):
- force = 1;
- break;
- case ('m'):
- aux = optarg;
- break;
- case ('M'):
- base = optarg;
- break;
- case ('o'):
- xstrlcpy(buf, optarg, MAXPATHLEN);
- break;
- case ('v'):
- verbose++;
- break;
- default:
- usage();
- return(EXIT_FAILURE);
- }
-
- argc -= optind;
- argv += optind;
-
- if (argc > 0) {
- usage();
- return(EXIT_FAILURE);
- }
-
- memset(&dirs, 0, sizeof(struct manpaths));
- manpath_parse(&dirs, conf_file, base, aux);
- ch = manup(&dirs, buf);
- manpath_free(&dirs);
- return(ch ? EXIT_SUCCESS : EXIT_FAILURE);
-}
-
-static void
-usage(void)
-{
-
- fprintf(stderr, "usage: %s "
- "[-fv] "
- "[-C file] "
- "[-o path] "
- "[-m manpath] "
- "[-M manpath]\n",
- progname);
-}
-
-/*
- * If "src" file doesn't exist (errors out), return -1. Otherwise,
- * return 1 if "src" is newer (which also happens "dst" doesn't exist)
- * and 0 otherwise.
- */
-static int
-isnewer(const char *dst, const char *src)
-{
- struct stat s1, s2;
-
- if (-1 == stat(src, &s1))
- return(-1);
- if (force)
- return(1);
-
- return(-1 == stat(dst, &s2) ? 1 : s1.st_mtime > s2.st_mtime);
-}
-
-/*
- * Copy the contents of one file into another.
- * Returns 0 on failure, 1 on success.
- */
-static int
-filecpy(const char *dst, const char *src)
-{
- char buf[BUFSIZ];
- int sfd, dfd, rc;
- ssize_t rsz, wsz;
-
- sfd = dfd = -1;
- rc = 0;
-
- if (-1 == (dfd = open(dst, O_CREAT|O_TRUNC|O_WRONLY, 0644))) {
- perror(dst);
- goto out;
- } else if (-1 == (sfd = open(src, O_RDONLY, 0))) {
- perror(src);
- goto out;
- }
-
- while ((rsz = read(sfd, buf, BUFSIZ)) > 0)
- if (-1 == (wsz = write(dfd, buf, (size_t)rsz))) {
- perror(dst);
- goto out;
- } else if (wsz < rsz) {
- fprintf(stderr, "%s: Short write\n", dst);
- goto out;
- }
-
- if (rsz < 0)
- perror(src);
- else
- rc = 1;
-out:
- if (-1 != sfd)
- close(sfd);
- if (-1 != dfd)
- close(dfd);
-
- return(rc);
-}
-
-/*
- * Pass over the recno database and re-create HTML pages if they're
- * found to be out of date.
- * Returns -1 on fatal error, 1 on success.
- */
-static int
-indexhtml(char *src, size_t ssz, char *dst, size_t dsz)
-{
- DB *idx;
- DBT key, val;
- int c, rc;
- unsigned int fl;
- const char *f;
- char *d;
- char fname[MAXPATHLEN];
-
- xstrlcpy(fname, dst, MAXPATHLEN);
- xstrlcat(fname, "/", MAXPATHLEN);
- xstrlcat(fname, MANDOC_IDX, MAXPATHLEN);
-
- idx = dbopen(fname, O_RDONLY, 0, DB_RECNO, NULL);
- if (NULL == idx) {
- perror(fname);
- return(-1);
- }
-
- fl = R_FIRST;
- while (0 == (c = (*idx->seq)(idx, &key, &val, fl))) {
- fl = R_NEXT;
- /*
- * If the record is zero-length, then it's unassigned.
- * Skip past these.
- */
- if (0 == val.size)
- continue;
-
- f = (const char *)val.data + 1;
- if (NULL == memchr(f, '\0', val.size - 1))
- break;
-
- src[(int)ssz] = dst[(int)dsz] = '\0';
-
- xstrlcat(dst, "/", MAXPATHLEN);
- xstrlcat(dst, f, MAXPATHLEN);
-
- xstrlcat(src, "/", MAXPATHLEN);
- xstrlcat(src, f, MAXPATHLEN);
-
- if (-1 == (rc = isnewer(dst, src))) {
- fprintf(stderr, "%s: File missing\n", f);
- break;
- } else if (0 == rc)
- continue;
-
- d = strrchr(dst, '/');
- assert(NULL != d);
- *d = '\0';
-
- if (-1 == mkpath(dst, 0755, 0755)) {
- perror(dst);
- break;
- }
-
- *d = '/';
-
- if ( ! filecpy(dst, src))
- break;
- if (verbose)
- printf("%s\n", dst);
- }
-
- (*idx->close)(idx);
-
- if (c < 0)
- perror(fname);
- else if (0 == c)
- fprintf(stderr, "%s: Corrupt index\n", fname);
-
- return(1 == c ? 1 : -1);
-}
-
-/*
- * Copy both recno and btree databases into the destination.
- * Call in to begin recreating HTML files.
- * Return -1 on fatal error and 1 if the update went well.
- */
-static int
-update(char *dst, char *src)
-{
- size_t dsz, ssz;
-
- dsz = strlen(dst);
- ssz = strlen(src);
-
- xstrlcat(src, "/", MAXPATHLEN);
- xstrlcat(dst, "/", MAXPATHLEN);
-
- xstrlcat(src, MANDOC_DB, MAXPATHLEN);
- xstrlcat(dst, MANDOC_DB, MAXPATHLEN);
-
- if ( ! filecpy(dst, src))
- return(-1);
- if (verbose)
- printf("%s\n", dst);
-
- dst[(int)dsz] = src[(int)ssz] = '\0';
-
- xstrlcat(src, "/", MAXPATHLEN);
- xstrlcat(dst, "/", MAXPATHLEN);
-
- xstrlcat(src, MANDOC_IDX, MAXPATHLEN);
- xstrlcat(dst, MANDOC_IDX, MAXPATHLEN);
-
- if ( ! filecpy(dst, src))
- return(-1);
- if (verbose)
- printf("%s\n", dst);
-
- dst[(int)dsz] = src[(int)ssz] = '\0';
-
- return(indexhtml(src, ssz, dst, dsz));
-}
-
-/*
- * See if btree or recno databases in the destination are out of date
- * with respect to a single manpath component.
- * Return -1 on fatal error, 0 if the source is no longer valid (and
- * shouldn't be listed), and 1 if the update went well.
- */
-static int
-treecpy(char *dst, char *src)
-{
- size_t dsz, ssz;
- int rc;
-
- dsz = strlen(dst);
- ssz = strlen(src);
-
- xstrlcat(src, "/", MAXPATHLEN);
- xstrlcat(dst, "/", MAXPATHLEN);
-
- xstrlcat(src, MANDOC_IDX, MAXPATHLEN);
- xstrlcat(dst, MANDOC_IDX, MAXPATHLEN);
-
- if (-1 == (rc = isnewer(dst, src)))
- return(0);
-
- dst[(int)dsz] = src[(int)ssz] = '\0';
-
- if (1 == rc)
- return(update(dst, src));
-
- xstrlcat(src, "/", MAXPATHLEN);
- xstrlcat(dst, "/", MAXPATHLEN);
-
- xstrlcat(src, MANDOC_DB, MAXPATHLEN);
- xstrlcat(dst, MANDOC_DB, MAXPATHLEN);
-
- if (-1 == (rc = isnewer(dst, src)))
- return(0);
- else if (rc == 0)
- return(1);
-
- dst[(int)dsz] = src[(int)ssz] = '\0';
-
- return(update(dst, src));
-}
-
-/*
- * Update the destination's file-tree with respect to changes in the
- * source manpath components.
- * "Change" is defined by an updated index or btree database.
- * Returns 1 on success, 0 on failure.
- */
-static int
-manup(const struct manpaths *dirs, char *base)
-{
- char dst[MAXPATHLEN],
- src[MAXPATHLEN];
- const char *path;
- size_t i;
- int c;
- size_t sz;
- FILE *f;
-
- /* Create the path and file for the catman.conf file. */
-
- sz = strlen(base);
- xstrlcpy(dst, base, MAXPATHLEN);
- xstrlcat(dst, "/etc", MAXPATHLEN);
- if (-1 == mkpath(dst, 0755, 0755)) {
- perror(dst);
- return(0);
- }
-
- xstrlcat(dst, "/catman.conf", MAXPATHLEN);
- if (NULL == (f = fopen(dst, "w"))) {
- perror(dst);
- return(0);
- } else if (verbose)
- printf("%s\n", dst);
-
- for (i = 0; i < dirs->sz; i++) {
- path = dirs->paths[i];
- dst[(int)sz] = '\0';
- xstrlcat(dst, path, MAXPATHLEN);
- if (-1 == mkpath(dst, 0755, 0755)) {
- perror(dst);
- break;
- }
-
- xstrlcpy(src, path, MAXPATHLEN);
- if (-1 == (c = treecpy(dst, src)))
- break;
- else if (0 == c)
- continue;
-
- /*
- * We want to use a relative path here because manpath.h
- * will realpath() when invoked with man.cgi, and we'll
- * make sure to chdir() into the cache directory before.
- *
- * This allows the cache directory to be in an arbitrary
- * place, working in both chroot() and non-chroot()
- * "safe" modes.
- */
- assert('/' == path[0]);
- fprintf(f, "_whatdb %s/whatis.db\n", path + 1);
- }
-
- fclose(f);
- return(i == dirs->sz);
-}
-
-/*
- * Copyright (c) 1983, 1992, 1993
- * The Regents of the University of California. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-static int
-mkpath(char *path, mode_t mode, mode_t dir_mode)
-{
- struct stat sb;
- char *slash;
- int done, exists;
-
- slash = path;
-
- for (;;) {
- /* LINTED */
- slash += strspn(slash, "/");
- /* LINTED */
- slash += strcspn(slash, "/");
-
- done = (*slash == '\0');
- *slash = '\0';
-
- /* skip existing path components */
- exists = !stat(path, &sb);
- if (!done && exists && S_ISDIR(sb.st_mode)) {
- *slash = '/';
- continue;
- }
-
- if (mkdir(path, done ? mode : dir_mode) == 0) {
- if (mode > 0777 && chmod(path, mode) < 0)
- return (-1);
- } else {
- if (!exists) {
- /* Not there */
- return (-1);
- }
- if (!S_ISDIR(sb.st_mode)) {
- /* Is there, but isn't a directory */
- errno = ENOTDIR;
- return (-1);
- }
- }
-
- if (done)
- break;
-
- *slash = '/';
- }
-
- return (0);
-}
diff --git a/cgi.c b/cgi.c
index 64bde45ce2c7..1e38e3d872a5 100644
--- a/cgi.c
+++ b/cgi.c
@@ -1,6 +1,7 @@
-/* $Id: cgi.c,v 1.46 2013/10/11 00:06:48 schwarze Exp $ */
+/* $Id: cgi.c,v 1.92 2014/08/05 15:29:30 schwarze Exp $ */
/*
* Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
+ * Copyright (c) 2014 Ingo Schwarze <schwarze@usta.de>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -18,134 +19,103 @@
#include "config.h"
#endif
-#include <sys/wait.h>
-
-#include <assert.h>
#include <ctype.h>
#include <errno.h>
-#include <dirent.h>
#include <fcntl.h>
#include <limits.h>
-#include <regex.h>
-#include <stdio.h>
-#include <stdarg.h>
#include <stdint.h>
+#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
-#if defined(__sun)
-/* for stat() */
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#endif
-
-#include "apropos_db.h"
#include "mandoc.h"
-#include "mdoc.h"
-#include "man.h"
+#include "mandoc_aux.h"
#include "main.h"
#include "manpath.h"
-#include "mandocdb.h"
-
-#if defined(__linux__) || defined(__sun)
-# include <db_185.h>
-#else
-# include <db.h>
-#endif
-
-enum page {
- PAGE_INDEX,
- PAGE_SEARCH,
- PAGE_SHOW,
- PAGE__MAX
-};
-
-struct paths {
- char *name;
- char *path;
-};
+#include "mansearch.h"
+#include "cgi.h"
/*
* A query as passed to the search function.
*/
struct query {
- const char *arch; /* architecture */
- const char *sec; /* manual section */
- const char *expr; /* unparsed expression string */
- int manroot; /* manroot index (or -1)*/
- int legacy; /* whether legacy mode */
+ char *manpath; /* desired manual directory */
+ char *arch; /* architecture */
+ char *sec; /* manual section */
+ char *query; /* unparsed query expression */
+ int equal; /* match whole names, not substrings */
};
struct req {
- struct query q;
- struct paths *p;
- size_t psz;
- enum page page;
+ struct query q;
+ char **p; /* array of available manpaths */
+ size_t psz; /* number of available manpaths */
};
-static int atou(const char *, unsigned *);
static void catman(const struct req *, const char *);
-static int cmp(const void *, const void *);
static void format(const struct req *, const char *);
static void html_print(const char *);
-static void html_printquery(const struct req *);
static void html_putchar(char);
static int http_decode(char *);
-static void http_parse(struct req *, char *);
+static void http_parse(struct req *, const char *);
static void http_print(const char *);
static void http_putchar(char);
-static void http_printquery(const struct req *);
-static int pathstop(DIR *);
-static void pathgen(DIR *, char *, struct req *);
-static void pg_index(const struct req *, char *);
-static void pg_search(const struct req *, char *);
-static void pg_show(const struct req *, char *);
-static void resp_bad(void);
-static void resp_baddb(void);
-static void resp_error400(void);
-static void resp_error404(const char *);
+static void http_printquery(const struct req *, const char *);
+static void pathgen(struct req *);
+static void pg_error_badrequest(const char *);
+static void pg_error_internal(void);
+static void pg_index(const struct req *);
+static void pg_noresult(const struct req *, const char *);
+static void pg_search(const struct req *);
+static void pg_searchres(const struct req *,
+ struct manpage *, size_t);
+static void pg_show(struct req *, const char *);
static void resp_begin_html(int, const char *);
static void resp_begin_http(int, const char *);
static void resp_end_html(void);
-static void resp_index(const struct req *);
-static void resp_search(struct res *, size_t, void *);
static void resp_searchform(const struct req *);
+static void resp_show(const struct req *, const char *);
+static void set_query_attr(char **, char **);
+static int validate_filename(const char *);
+static int validate_manpath(const struct req *, const char *);
+static int validate_urifrag(const char *);
-static const char *progname; /* cgi script name */
-static const char *cache; /* cache directory */
-static const char *css; /* css directory */
-static const char *host; /* hostname */
+static const char *scriptname; /* CGI script name */
-static const char * const pages[PAGE__MAX] = {
- "index", /* PAGE_INDEX */
- "search", /* PAGE_SEARCH */
- "show", /* PAGE_SHOW */
+static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
+static const char *const sec_numbers[] = {
+ "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
};
-
-/*
- * This is just OpenBSD's strtol(3) suggestion.
- * I use it instead of strtonum(3) for portability's sake.
- */
-static int
-atou(const char *buf, unsigned *v)
-{
- char *ep;
- long lval;
-
- errno = 0;
- lval = strtol(buf, &ep, 10);
- if (buf[0] == '\0' || *ep != '\0')
- return(0);
- if ((errno == ERANGE && (lval == LONG_MAX ||
- lval == LONG_MIN)) ||
- (lval > INT_MAX || lval < 0))
- return(0);
-
- *v = (unsigned int)lval;
- return(1);
-}
+static const char *const sec_names[] = {
+ "All Sections",
+ "1 - General Commands",
+ "2 - System Calls",
+ "3 - Subroutines",
+ "3p - Perl Subroutines",
+ "4 - Special Files",
+ "5 - File Formats",
+ "6 - Games",
+ "7 - Macros and Conventions",
+ "8 - Maintenance Commands",
+ "9 - Kernel Interface"
+};
+static const int sec_MAX = sizeof(sec_names) / sizeof(char *);
+
+static const char *const arch_names[] = {
+ "amd64", "alpha", "armish", "armv7",
+ "aviion", "hppa", "hppa64", "i386",
+ "ia64", "landisk", "loongson", "luna88k",
+ "macppc", "mips64", "octeon", "sgi",
+ "socppc", "solbourne", "sparc", "sparc64",
+ "vax", "zaurus",
+ "amiga", "arc", "arm32", "atari",
+ "beagle", "cats", "hp300", "mac68k",
+ "mvme68k", "mvme88k", "mvmeppc", "palm",
+ "pc532", "pegasos", "pmax", "powerpc",
+ "sun3", "wgrisc", "x68k"
+};
+static const int arch_MAX = sizeof(arch_names) / sizeof(char *);
/*
* Print a character, escaping HTML along the way.
@@ -173,29 +143,30 @@ html_putchar(char c)
break;
}
}
-static void
-http_printquery(const struct req *req)
-{
-
- printf("&expr=");
- http_print(req->q.expr ? req->q.expr : "");
- printf("&sec=");
- http_print(req->q.sec ? req->q.sec : "");
- printf("&arch=");
- http_print(req->q.arch ? req->q.arch : "");
-}
-
static void
-html_printquery(const struct req *req)
+http_printquery(const struct req *req, const char *sep)
{
- printf("&amp;expr=");
- html_print(req->q.expr ? req->q.expr : "");
- printf("&amp;sec=");
- html_print(req->q.sec ? req->q.sec : "");
- printf("&amp;arch=");
- html_print(req->q.arch ? req->q.arch : "");
+ if (NULL != req->q.query) {
+ printf("query=");
+ http_print(req->q.query);
+ }
+ if (0 == req->q.equal)
+ printf("%sapropos=1", sep);
+ if (NULL != req->q.sec) {
+ printf("%ssec=", sep);
+ http_print(req->q.sec);
+ }
+ if (NULL != req->q.arch) {
+ printf("%sarch=", sep);
+ http_print(req->q.arch);
+ }
+ if (NULL != req->q.manpath &&
+ strcmp(req->q.manpath, req->p[0])) {
+ printf("%smanpath=", sep);
+ http_print(req->q.manpath);
+ }
}
static void
@@ -223,81 +194,114 @@ html_print(const char *p)
}
/*
- * Parse out key-value pairs from an HTTP request variable.
- * This can be either a cookie or a POST/GET string, although man.cgi
- * uses only GET for simplicity.
+ * Transfer the responsibility for the allocated string *val
+ * to the query structure.
+ */
+static void
+set_query_attr(char **attr, char **val)
+{
+
+ free(*attr);
+ if (**val == '\0') {
+ *attr = NULL;
+ free(*val);
+ } else
+ *attr = *val;
+ *val = NULL;
+}
+
+/*
+ * Parse the QUERY_STRING for key-value pairs
+ * and store the values into the query structure.
*/
static void
-http_parse(struct req *req, char *p)
+http_parse(struct req *req, const char *qs)
{
- char *key, *val, *manroot;
- int i, legacy;
+ char *key, *val;
+ size_t keysz, valsz;
- memset(&req->q, 0, sizeof(struct query));
+ req->q.manpath = NULL;
+ req->q.arch = NULL;
+ req->q.sec = NULL;
+ req->q.query = NULL;
+ req->q.equal = 1;
- legacy = -1;
- manroot = NULL;
+ key = val = NULL;
+ while (*qs != '\0') {
- while ('\0' != *p) {
- key = p;
- val = NULL;
+ /* Parse one key. */
- p += (int)strcspn(p, ";&");
- if ('\0' != *p)
- *p++ = '\0';
- if (NULL != (val = strchr(key, '=')))
- *val++ = '\0';
+ keysz = strcspn(qs, "=;&");
+ key = mandoc_strndup(qs, keysz);
+ qs += keysz;
+ if (*qs != '=')
+ goto next;
- if ('\0' == *key || NULL == val || '\0' == *val)
- continue;
+ /* Parse one value. */
- /* Just abort handling. */
-
- if ( ! http_decode(key))
- break;
- if (NULL != val && ! http_decode(val))
- break;
-
- if (0 == strcmp(key, "expr"))
- req->q.expr = val;
- else if (0 == strcmp(key, "query"))
- req->q.expr = val;
- else if (0 == strcmp(key, "sec"))
- req->q.sec = val;
- else if (0 == strcmp(key, "sektion"))
- req->q.sec = val;
- else if (0 == strcmp(key, "arch"))
- req->q.arch = val;
- else if (0 == strcmp(key, "manpath"))
- manroot = val;
- else if (0 == strcmp(key, "apropos"))
- legacy = 0 == strcmp(val, "0");
- }
+ valsz = strcspn(++qs, ";&");
+ val = mandoc_strndup(qs, valsz);
+ qs += valsz;
- /* Test for old man.cgi compatibility mode. */
+ /* Decode and catch encoding errors. */
- req->q.legacy = legacy > 0;
+ if ( ! (http_decode(key) && http_decode(val)))
+ goto next;
- /*
- * Section "0" means no section when in legacy mode.
- * For some man.cgi scripts, "default" arch is none.
- */
+ /* Handle key-value pairs. */
+
+ if ( ! strcmp(key, "query"))
+ set_query_attr(&req->q.query, &val);
+
+ else if ( ! strcmp(key, "apropos"))
+ req->q.equal = !strcmp(val, "0");
+
+ else if ( ! strcmp(key, "manpath")) {
+#ifdef COMPAT_OLDURI
+ if ( ! strncmp(val, "OpenBSD ", 8)) {
+ val[7] = '-';
+ if ('C' == val[8])
+ val[8] = 'c';
+ }
+#endif
+ set_query_attr(&req->q.manpath, &val);
+ }
+
+ else if ( ! (strcmp(key, "sec")
+#ifdef COMPAT_OLDURI
+ && strcmp(key, "sektion")
+#endif
+ )) {
+ if ( ! strcmp(val, "0"))
+ *val = '\0';
+ set_query_attr(&req->q.sec, &val);
+ }
- if (req->q.legacy && NULL != req->q.sec)
- if (0 == strcmp(req->q.sec, "0"))
- req->q.sec = NULL;
- if (req->q.legacy && NULL != req->q.arch)
- if (0 == strcmp(req->q.arch, "default"))
- req->q.arch = NULL;
+ else if ( ! strcmp(key, "arch")) {
+ if ( ! strcmp(val, "default"))
+ *val = '\0';
+ set_query_attr(&req->q.arch, &val);
+ }
- /* Default to first manroot. */
+ /*
+ * The key must be freed in any case.
+ * The val may have been handed over to the query
+ * structure, in which case it is now NULL.
+ */
+next:
+ free(key);
+ key = NULL;
+ free(val);
+ val = NULL;
- if (NULL != manroot) {
- for (i = 0; i < (int)req->psz; i++)
- if (0 == strcmp(req->p[i].name, manroot))
- break;
- req->q.manroot = i < (int)req->psz ? i : -1;
+ if (*qs != '\0')
+ qs++;
}
+
+ /* Fall back to the default manpath. */
+
+ if (req->q.manpath == NULL)
+ req->q.manpath = mandoc_strdup(req->p[0]);
}
static void
@@ -323,11 +327,13 @@ static int
http_decode(char *p)
{
char hex[3];
+ char *q;
int c;
hex[2] = '\0';
- for ( ; '\0' != *p; p++) {
+ q = p;
+ for ( ; '\0' != *p; p++, q++) {
if ('%' == *p) {
if ('\0' == (hex[0] = *(p + 1)))
return(0);
@@ -338,13 +344,13 @@ http_decode(char *p)
if ('\0' == c)
return(0);
- *p = (char)c;
- memmove(p + 1, p + 3, strlen(p + 3) + 1);
+ *q = (char)c;
+ p += 2;
} else
- *p = '+' == *p ? ' ' : *p;
+ *q = '+' == *p ? ' ' : *p;
}
- *p = '\0';
+ *q = '\0';
return(1);
}
@@ -353,12 +359,12 @@ resp_begin_http(int code, const char *msg)
{
if (200 != code)
- printf("Status: %d %s\n", code, msg);
+ printf("Status: %d %s\r\n", code, msg);
- puts("Content-Type: text/html; charset=utf-8\n"
- "Cache-Control: no-cache\n"
- "Pragma: no-cache\n"
- "");
+ printf("Content-Type: text/html; charset=utf-8\r\n"
+ "Cache-Control: no-cache\r\n"
+ "Pragma: no-cache\r\n"
+ "\r\n");
fflush(stdout);
}
@@ -380,10 +386,11 @@ resp_begin_html(int code, const char *msg)
" TYPE=\"text/css\" media=\"all\">\n"
"<LINK REL=\"stylesheet\" HREF=\"%s/man.css\""
" TYPE=\"text/css\" media=\"all\">\n"
- "<TITLE>System Manpage Reference</TITLE>\n"
+ "<TITLE>%s</TITLE>\n"
"</HEAD>\n"
"<BODY>\n"
- "<!-- Begin page content. //-->\n", css, css);
+ "<!-- Begin page content. //-->\n",
+ CSS_DIR, CSS_DIR, CUSTOMIZE_TITLE);
}
static void
@@ -399,192 +406,288 @@ resp_searchform(const struct req *req)
{
int i;
+ puts(CUSTOMIZE_BEGIN);
puts("<!-- Begin search form. //-->");
printf("<DIV ID=\"mancgi\">\n"
- "<FORM ACTION=\"%s/search.html\" METHOD=\"get\">\n"
+ "<FORM ACTION=\"%s\" METHOD=\"get\">\n"
"<FIELDSET>\n"
- "<LEGEND>Search Parameters</LEGEND>\n"
- "<INPUT TYPE=\"submit\" "
- " VALUE=\"Search\"> for manuals satisfying \n"
- "<INPUT TYPE=\"text\" NAME=\"expr\" VALUE=\"",
- progname);
- html_print(req->q.expr ? req->q.expr : "");
- printf("\">, section "
- "<INPUT TYPE=\"text\""
- " SIZE=\"4\" NAME=\"sec\" VALUE=\"");
- html_print(req->q.sec ? req->q.sec : "");
- printf("\">, arch "
- "<INPUT TYPE=\"text\""
- " SIZE=\"8\" NAME=\"arch\" VALUE=\"");
- html_print(req->q.arch ? req->q.arch : "");
- printf("\">");
+ "<LEGEND>Manual Page Search Parameters</LEGEND>\n",
+ scriptname);
+
+ /* Write query input box. */
+
+ printf( "<TABLE><TR><TD>\n"
+ "<INPUT TYPE=\"text\" NAME=\"query\" VALUE=\"");
+ if (NULL != req->q.query)
+ html_print(req->q.query);
+ puts("\" SIZE=\"40\">");
+
+ /* Write submission and reset buttons. */
+
+ printf( "<INPUT TYPE=\"submit\" VALUE=\"Submit\">\n"
+ "<INPUT TYPE=\"reset\" VALUE=\"Reset\">\n");
+
+ /* Write show radio button */
+
+ printf( "</TD><TD>\n"
+ "<INPUT TYPE=\"radio\" ");
+ if (req->q.equal)
+ printf("CHECKED=\"checked\" ");
+ printf( "NAME=\"apropos\" ID=\"show\" VALUE=\"0\">\n"
+ "<LABEL FOR=\"show\">Show named manual page</LABEL>\n");
+
+ /* Write section selector. */
+
+ puts( "</TD></TR><TR><TD>\n"
+ "<SELECT NAME=\"sec\">");
+ for (i = 0; i < sec_MAX; i++) {
+ printf("<OPTION VALUE=\"%s\"", sec_numbers[i]);
+ if (NULL != req->q.sec &&
+ 0 == strcmp(sec_numbers[i], req->q.sec))
+ printf(" SELECTED=\"selected\"");
+ printf(">%s</OPTION>\n", sec_names[i]);
+ }
+ puts("</SELECT>");
+
+ /* Write architecture selector. */
+
+ printf( "<SELECT NAME=\"arch\">\n"
+ "<OPTION VALUE=\"default\"");
+ if (NULL == req->q.arch)
+ printf(" SELECTED=\"selected\"");
+ puts(">All Architectures</OPTION>");
+ for (i = 0; i < arch_MAX; i++) {
+ printf("<OPTION VALUE=\"%s\"", arch_names[i]);
+ if (NULL != req->q.arch &&
+ 0 == strcmp(arch_names[i], req->q.arch))
+ printf(" SELECTED=\"selected\"");
+ printf(">%s</OPTION>\n", arch_names[i]);
+ }
+ puts("</SELECT>");
+
+ /* Write manpath selector. */
+
if (req->psz > 1) {
- puts(", <SELECT NAME=\"manpath\">");
+ puts("<SELECT NAME=\"manpath\">");
for (i = 0; i < (int)req->psz; i++) {
- printf("<OPTION %s VALUE=\"",
- (i == req->q.manroot) ||
- (0 == i && -1 == req->q.manroot) ?
- "SELECTED=\"selected\"" : "");
- html_print(req->p[i].name);
+ printf("<OPTION ");
+ if (NULL == req->q.manpath ? 0 == i :
+ 0 == strcmp(req->q.manpath, req->p[i]))
+ printf("SELECTED=\"selected\" ");
+ printf("VALUE=\"");
+ html_print(req->p[i]);
printf("\">");
- html_print(req->p[i].name);
+ html_print(req->p[i]);
puts("</OPTION>");
}
puts("</SELECT>");
}
- puts(".\n"
- "<INPUT TYPE=\"reset\" VALUE=\"Reset\">\n"
+
+ /* Write search radio button */
+
+ printf( "</TD><TD>\n"
+ "<INPUT TYPE=\"radio\" ");
+ if (0 == req->q.equal)
+ printf("CHECKED=\"checked\" ");
+ printf( "NAME=\"apropos\" ID=\"search\" VALUE=\"1\">\n"
+ "<LABEL FOR=\"search\">Search with apropos query</LABEL>\n");
+
+ puts("</TD></TR></TABLE>\n"
"</FIELDSET>\n"
"</FORM>\n"
"</DIV>");
puts("<!-- End search form. //-->");
}
-static void
-resp_index(const struct req *req)
+static int
+validate_urifrag(const char *frag)
{
- resp_begin_html(200, NULL);
- resp_searchform(req);
- resp_end_html();
+ while ('\0' != *frag) {
+ if ( ! (isalnum((unsigned char)*frag) ||
+ '-' == *frag || '.' == *frag ||
+ '/' == *frag || '_' == *frag))
+ return(0);
+ frag++;
+ }
+ return(1);
}
-static void
-resp_error400(void)
+static int
+validate_manpath(const struct req *req, const char* manpath)
{
+ size_t i;
- resp_begin_html(400, "Query Malformed");
- printf("<H1>Malformed Query</H1>\n"
- "<P>\n"
- "The query your entered was malformed.\n"
- "Try again from the\n"
- "<A HREF=\"%s/index.html\">main page</A>.\n"
- "</P>", progname);
- resp_end_html();
+ if ( ! strcmp(manpath, "mandoc"))
+ return(1);
+
+ for (i = 0; i < req->psz; i++)
+ if ( ! strcmp(manpath, req->p[i]))
+ return(1);
+
+ return(0);
+}
+
+static int
+validate_filename(const char *file)
+{
+
+ if ('.' == file[0] && '/' == file[1])
+ file += 2;
+
+ return ( ! (strstr(file, "../") || strstr(file, "/..") ||
+ (strncmp(file, "man", 3) && strncmp(file, "cat", 3))));
}
static void
-resp_error404(const char *page)
+pg_index(const struct req *req)
{
- resp_begin_html(404, "Not Found");
- puts("<H1>Page Not Found</H1>\n"
- "<P>\n"
- "The page you're looking for, ");
- printf("<B>");
- html_print(page);
- printf("</B>,\n"
- "could not be found.\n"
- "Try searching from the\n"
- "<A HREF=\"%s/index.html\">main page</A>.\n"
- "</P>", progname);
+ resp_begin_html(200, NULL);
+ resp_searchform(req);
+ printf("<P>\n"
+ "This web interface is documented in the\n"
+ "<A HREF=\"%s/mandoc/man8/man.cgi.8\">man.cgi</A>\n"
+ "manual, and the\n"
+ "<A HREF=\"%s/mandoc/man1/apropos.1\">apropos</A>\n"
+ "manual explains the query syntax.\n"
+ "</P>\n",
+ scriptname, scriptname);
resp_end_html();
}
static void
-resp_bad(void)
+pg_noresult(const struct req *req, const char *msg)
{
- resp_begin_html(500, "Internal Server Error");
- puts("<P>Generic badness happened.</P>");
+ resp_begin_html(200, NULL);
+ resp_searchform(req);
+ puts("<P>");
+ puts(msg);
+ puts("</P>");
resp_end_html();
}
static void
-resp_baddb(void)
+pg_error_badrequest(const char *msg)
{
- resp_begin_html(500, "Internal Server Error");
- puts("<P>Your database is broken.</P>");
+ resp_begin_html(400, "Bad Request");
+ puts("<H1>Bad Request</H1>\n"
+ "<P>\n");
+ puts(msg);
+ printf("Try again from the\n"
+ "<A HREF=\"%s\">main page</A>.\n"
+ "</P>", scriptname);
resp_end_html();
}
static void
-resp_search(struct res *r, size_t sz, void *arg)
+pg_error_internal(void)
{
- size_t i, matched;
- const struct req *req;
+ resp_begin_html(500, "Internal Server Error");
+ puts("<P>Internal Server Error</P>");
+ resp_end_html();
+}
- req = (const struct req *)arg;
+static void
+pg_searchres(const struct req *req, struct manpage *r, size_t sz)
+{
+ char *arch, *archend;
+ size_t i, iuse, isec;
+ int archprio, archpriouse;
+ int prio, priouse;
+ char sec;
- if (sz > 0)
- assert(req->q.manroot >= 0);
+ for (i = 0; i < sz; i++) {
+ if (validate_filename(r[i].file))
+ continue;
+ fprintf(stderr, "invalid filename %s in %s database\n",
+ r[i].file, req->q.manpath);
+ pg_error_internal();
+ return;
+ }
- for (matched = i = 0; i < sz; i++)
- if (r[i].matched)
- matched++;
-
- if (1 == matched) {
- for (i = 0; i < sz; i++)
- if (r[i].matched)
- break;
+ if (1 == sz) {
/*
* If we have just one result, then jump there now
* without any delay.
*/
- puts("Status: 303 See Other");
- printf("Location: http://%s%s/show/%d/%u/%u.html?",
- host, progname, req->q.manroot,
- r[i].volume, r[i].rec);
- http_printquery(req);
- puts("\n"
- "Content-Type: text/html; charset=utf-8\n");
+ printf("Status: 303 See Other\r\n");
+ printf("Location: http://%s%s/%s/%s?",
+ HTTP_HOST, scriptname, req->q.manpath, r[0].file);
+ http_printquery(req, "&");
+ printf("\r\n"
+ "Content-Type: text/html; charset=utf-8\r\n"
+ "\r\n");
return;
}
resp_begin_html(200, NULL);
resp_searchform(req);
-
puts("<DIV CLASS=\"results\">");
-
- if (0 == matched) {
- puts("<P>\n"
- "No results found.\n"
- "</P>\n"
- "</DIV>");
- resp_end_html();
- return;
- }
-
- qsort(r, sz, sizeof(struct res), cmp);
-
puts("<TABLE>");
for (i = 0; i < sz; i++) {
- if ( ! r[i].matched)
- continue;
printf("<TR>\n"
"<TD CLASS=\"title\">\n"
- "<A HREF=\"%s/show/%d/%u/%u.html?",
- progname, req->q.manroot,
- r[i].volume, r[i].rec);
- html_printquery(req);
+ "<A HREF=\"%s/%s/%s?",
+ scriptname, req->q.manpath, r[i].file);
+ http_printquery(req, "&amp;");
printf("\">");
- html_print(r[i].title);
- putchar('(');
- html_print(r[i].cat);
- if (r[i].arch && '\0' != *r[i].arch) {
- putchar('/');
- html_print(r[i].arch);
- }
- printf(")</A>\n"
+ html_print(r[i].names);
+ printf("</A>\n"
"</TD>\n"
"<TD CLASS=\"desc\">");
- html_print(r[i].desc);
+ html_print(r[i].output);
puts("</TD>\n"
"</TR>");
}
puts("</TABLE>\n"
"</DIV>");
- resp_end_html();
-}
-/* ARGSUSED */
-static void
-pg_index(const struct req *req, char *path)
-{
+ /*
+ * In man(1) mode, show one of the pages
+ * even if more than one is found.
+ */
- resp_index(req);
+ if (req->q.equal) {
+ puts("<HR>");
+ iuse = 0;
+ priouse = 10;
+ archpriouse = 3;
+ for (i = 0; i < sz; i++) {
+ isec = strcspn(r[i].file, "123456789");
+ sec = r[i].file[isec];
+ if ('\0' == sec)
+ continue;
+ prio = sec_prios[sec - '1'];
+ if (NULL == req->q.arch) {
+ archprio =
+ (NULL == (arch = strchr(
+ r[i].file + isec, '/'))) ? 3 :
+ (NULL == (archend = strchr(
+ arch + 1, '/'))) ? 0 :
+ strncmp(arch, "amd64/",
+ archend - arch) ? 2 : 1;
+ if (archprio < archpriouse) {
+ archpriouse = archprio;
+ priouse = prio;
+ iuse = i;
+ continue;
+ }
+ if (archprio > archpriouse)
+ continue;
+ }
+ if (prio >= priouse)
+ continue;
+ priouse = prio;
+ iuse = i;
+ }
+ resp_show(req, r[iuse].file);
+ }
+
+ resp_end_html();
}
static void
@@ -597,12 +700,10 @@ catman(const struct req *req, const char *file)
int italic, bold;
if (NULL == (f = fopen(file, "r"))) {
- resp_baddb();
+ puts("<P>You specified an invalid manual file.</P>");
return;
}
- resp_begin_html(200, NULL);
- resp_searchform(req);
puts("<DIV CLASS=\"catman\">\n"
"<PRE>");
@@ -716,9 +817,7 @@ catman(const struct req *req, const char *file)
}
puts("</PRE>\n"
- "</DIV>\n"
- "</BODY>\n"
- "</HTML>");
+ "</DIV>");
fclose(f);
}
@@ -727,42 +826,49 @@ static void
format(const struct req *req, const char *file)
{
struct mparse *mp;
- int fd;
struct mdoc *mdoc;
struct man *man;
void *vp;
+ char *opts;
enum mandoclevel rc;
- char opts[PATH_MAX + 128];
+ int fd;
+ int usepath;
if (-1 == (fd = open(file, O_RDONLY, 0))) {
- resp_baddb();
+ puts("<P>You specified an invalid manual file.</P>");
return;
}
- mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL, NULL);
+ mp = mparse_alloc(MPARSE_SO, MANDOCLEVEL_FATAL, NULL,
+ req->q.manpath);
rc = mparse_readfd(mp, fd, file);
close(fd);
if (rc >= MANDOCLEVEL_FATAL) {
- resp_baddb();
+ fprintf(stderr, "fatal mandoc error: %s/%s\n",
+ req->q.manpath, file);
+ pg_error_internal();
return;
}
- snprintf(opts, sizeof(opts), "fragment,"
- "man=%s/search.html?sec=%%S&expr=Nm~^%%N$,"
- /*"includes=/cgi-bin/man.cgi/usr/include/%%I"*/,
- progname);
+ usepath = strcmp(req->q.manpath, req->p[0]);
+ mandoc_asprintf(&opts,
+ "fragment,man=%s?query=%%N&sec=%%S%s%s%s%s",
+ scriptname,
+ req->q.arch ? "&arch=" : "",
+ req->q.arch ? req->q.arch : "",
+ usepath ? "&manpath=" : "",
+ usepath ? req->q.manpath : "");
- mparse_result(mp, &mdoc, &man);
+ mparse_result(mp, &mdoc, &man, NULL);
if (NULL == man && NULL == mdoc) {
- resp_baddb();
+ fprintf(stderr, "fatal mandoc error: %s/%s\n",
+ req->q.manpath, file);
+ pg_error_internal();
mparse_free(mp);
return;
}
- resp_begin_html(200, NULL);
- resp_searchform(req);
-
vp = html_alloc(opts);
if (NULL != mdoc)
@@ -770,145 +876,87 @@ format(const struct req *req, const char *file)
else
html_man(vp, man);
- puts("</BODY>\n"
- "</HTML>");
-
html_free(vp);
mparse_free(mp);
+ free(opts);
+}
+
+static void
+resp_show(const struct req *req, const char *file)
+{
+
+ if ('.' == file[0] && '/' == file[1])
+ file += 2;
+
+ if ('c' == *file)
+ catman(req, file);
+ else
+ format(req, file);
}
static void
-pg_show(const struct req *req, char *path)
+pg_show(struct req *req, const char *fullpath)
{
- struct manpaths ps;
- size_t sz;
- char *sub;
- char file[PATH_MAX];
- const char *cp;
- int rc, catm;
- unsigned int vol, rec, mr;
- DB *idx;
- DBT key, val;
-
- idx = NULL;
-
- /* Parse out mroot, volume, and record from the path. */
-
- if (NULL == path || NULL == (sub = strchr(path, '/'))) {
- resp_error400();
+ char *manpath;
+ const char *file;
+
+ if ((file = strchr(fullpath, '/')) == NULL) {
+ pg_error_badrequest(
+ "You did not specify a page to show.");
return;
}
- *sub++ = '\0';
- if ( ! atou(path, &mr)) {
- resp_error400();
- return;
- }
- path = sub;
- if (NULL == (sub = strchr(path, '/'))) {
- resp_error400();
- return;
- }
- *sub++ = '\0';
- if ( ! atou(path, &vol) || ! atou(sub, &rec)) {
- resp_error400();
- return;
- } else if (mr >= (unsigned int)req->psz) {
- resp_error400();
+ manpath = mandoc_strndup(fullpath, file - fullpath);
+ file++;
+
+ if ( ! validate_manpath(req, manpath)) {
+ pg_error_badrequest(
+ "You specified an invalid manpath.");
+ free(manpath);
return;
}
/*
- * Begin by chdir()ing into the manroot.
+ * Begin by chdir()ing into the manpath.
* This way we can pick up the database files, which are
* relative to the manpath root.
*/
- if (-1 == chdir(req->p[(int)mr].path)) {
- perror(req->p[(int)mr].path);
- resp_baddb();
+ if (chdir(manpath) == -1) {
+ fprintf(stderr, "chdir %s: %s\n",
+ manpath, strerror(errno));
+ pg_error_internal();
+ free(manpath);
return;
}
- memset(&ps, 0, sizeof(struct manpaths));
- manpath_manconf(&ps, "etc/catman.conf");
-
- if (vol >= (unsigned int)ps.sz) {
- resp_error400();
- goto out;
- }
-
- sz = strlcpy(file, ps.paths[vol], PATH_MAX);
- assert(sz < PATH_MAX);
- strlcat(file, "/", PATH_MAX);
- strlcat(file, MANDOC_IDX, PATH_MAX);
-
- /* Open the index recno(3) database. */
-
- idx = dbopen(file, O_RDONLY, 0, DB_RECNO, NULL);
- if (NULL == idx) {
- perror(file);
- resp_baddb();
- goto out;
- }
-
- key.data = &rec;
- key.size = 4;
+ if (strcmp(manpath, "mandoc")) {
+ free(req->q.manpath);
+ req->q.manpath = manpath;
+ } else
+ free(manpath);
- if (0 != (rc = (*idx->get)(idx, &key, &val, 0))) {
- rc < 0 ? resp_baddb() : resp_error400();
- goto out;
- } else if (0 == val.size) {
- resp_baddb();
- goto out;
+ if ( ! validate_filename(file)) {
+ pg_error_badrequest(
+ "You specified an invalid manual file.");
+ return;
}
- cp = (char *)val.data;
- catm = 'c' == *cp++;
-
- if (NULL == memchr(cp, '\0', val.size - 1))
- resp_baddb();
- else {
- file[(int)sz] = '\0';
- strlcat(file, "/", PATH_MAX);
- strlcat(file, cp, PATH_MAX);
- if (catm)
- catman(req, file);
- else
- format(req, file);
- }
-out:
- if (idx)
- (*idx->close)(idx);
- manpath_free(&ps);
+ resp_begin_html(200, NULL);
+ resp_searchform(req);
+ resp_show(req, file);
+ resp_end_html();
}
static void
-pg_search(const struct req *req, char *path)
+pg_search(const struct req *req)
{
- size_t tt, ressz;
- struct manpaths ps;
- int i, sz, rc;
- const char *ep, *start;
- struct res *res;
- char **cp;
- struct opts opt;
- struct expr *expr;
-
- if (req->q.manroot < 0 || 0 == req->psz) {
- resp_search(NULL, 0, (void *)req);
- return;
- }
-
- memset(&opt, 0, sizeof(struct opts));
-
- ep = req->q.expr;
- opt.arch = req->q.arch;
- opt.cat = req->q.sec;
- rc = -1;
- sz = 0;
- cp = NULL;
- ressz = 0;
- res = NULL;
+ struct mansearch search;
+ struct manpaths paths;
+ struct manpage *res;
+ char **cp;
+ const char *ep, *start;
+ size_t ressz;
+ int i, sz;
/*
* Begin by chdir()ing into the root of the manpath.
@@ -916,26 +964,35 @@ pg_search(const struct req *req, char *path)
* relative to the manpath root.
*/
- assert(req->q.manroot < (int)req->psz);
- if (-1 == (chdir(req->p[req->q.manroot].path))) {
- perror(req->p[req->q.manroot].path);
- resp_search(NULL, 0, (void *)req);
+ if (-1 == (chdir(req->q.manpath))) {
+ fprintf(stderr, "chdir %s: %s\n",
+ req->q.manpath, strerror(errno));
+ pg_error_internal();
return;
}
- memset(&ps, 0, sizeof(struct manpaths));
- manpath_manconf(&ps, "etc/catman.conf");
+ search.arch = req->q.arch;
+ search.sec = req->q.sec;
+ search.deftype = req->q.equal ? TYPE_Nm : (TYPE_Nm | TYPE_Nd);
+ search.flags = req->q.equal ? MANSEARCH_MAN : 0;
+
+ paths.sz = 1;
+ paths.paths = mandoc_malloc(sizeof(char *));
+ paths.paths[0] = mandoc_strdup(".");
/*
* Poor man's tokenisation: just break apart by spaces.
* Yes, this is half-ass. But it works for now.
*/
+ ep = req->q.query;
while (ep && isspace((unsigned char)*ep))
ep++;
+ sz = 0;
+ cp = NULL;
while (ep && '\0' != *ep) {
- cp = mandoc_realloc(cp, (sz + 1) * sizeof(char *));
+ cp = mandoc_reallocarray(cp, sz + 1, sizeof(char *));
start = ep;
while ('\0' != *ep && ! isspace((unsigned char)*ep))
ep++;
@@ -946,288 +1003,148 @@ pg_search(const struct req *req, char *path)
ep++;
}
- /*
- * Pump down into apropos backend.
- * The resp_search() function is called with the results.
- */
-
- expr = req->q.legacy ?
- termcomp(sz, cp, &tt) : exprcomp(sz, cp, &tt);
-
- if (NULL != expr)
- rc = apropos_search
- (ps.sz, ps.paths, &opt, expr, tt,
- (void *)req, &ressz, &res, resp_search);
-
- /* ...unless errors occured. */
-
- if (0 == rc)
- resp_baddb();
- else if (-1 == rc)
- resp_search(NULL, 0, NULL);
+ if (0 == mansearch(&search, &paths, sz, cp, "Nd", &res, &ressz))
+ pg_noresult(req, "You entered an invalid query.");
+ else if (0 == ressz)
+ pg_noresult(req, "No results found.");
+ else
+ pg_searchres(req, res, ressz);
for (i = 0; i < sz; i++)
free(cp[i]);
-
free(cp);
- resfree(res, ressz);
- exprfree(expr);
- manpath_free(&ps);
+
+ for (i = 0; i < (int)ressz; i++) {
+ free(res[i].file);
+ free(res[i].names);
+ free(res[i].output);
+ }
+ free(res);
+
+ free(paths.paths[0]);
+ free(paths.paths);
}
int
main(void)
{
- int i;
- char buf[PATH_MAX];
- DIR *cwd;
struct req req;
- char *p, *path, *subpath;
+ const char *path;
+ const char *querystring;
+ int i;
/* Scan our run-time environment. */
- if (NULL == (cache = getenv("CACHE_DIR")))
- cache = "/cache/man.cgi";
-
- if (NULL == (progname = getenv("SCRIPT_NAME")))
- progname = "";
+ if (NULL == (scriptname = getenv("SCRIPT_NAME")))
+ scriptname = "";
- if (NULL == (css = getenv("CSS_DIR")))
- css = "";
-
- if (NULL == (host = getenv("HTTP_HOST")))
- host = "localhost";
+ if ( ! validate_urifrag(scriptname)) {
+ fprintf(stderr, "unsafe SCRIPT_NAME \"%s\"\n",
+ scriptname);
+ pg_error_internal();
+ return(EXIT_FAILURE);
+ }
/*
- * First we change directory into the cache directory so that
+ * First we change directory into the MAN_DIR so that
* subsequent scanning for manpath directories is rooted
* relative to the same position.
*/
- if (-1 == chdir(cache)) {
- perror(cache);
- resp_bad();
- return(EXIT_FAILURE);
- } else if (NULL == (cwd = opendir(cache))) {
- perror(cache);
- resp_bad();
+ if (-1 == chdir(MAN_DIR)) {
+ fprintf(stderr, "MAN_DIR: %s: %s\n",
+ MAN_DIR, strerror(errno));
+ pg_error_internal();
return(EXIT_FAILURE);
}
memset(&req, 0, sizeof(struct req));
-
- strlcpy(buf, ".", PATH_MAX);
- pathgen(cwd, buf, &req);
- closedir(cwd);
+ pathgen(&req);
/* Next parse out the query string. */
- if (NULL != (p = getenv("QUERY_STRING")))
- http_parse(&req, p);
-
- /*
- * Now juggle paths to extract information.
- * We want to extract our filetype (the file suffix), the
- * initial path component, then the trailing component(s).
- * Start with leading subpath component.
- */
+ if (NULL != (querystring = getenv("QUERY_STRING")))
+ http_parse(&req, querystring);
- subpath = path = NULL;
- req.page = PAGE__MAX;
-
- if (NULL == (path = getenv("PATH_INFO")) || '\0' == *path)
- req.page = PAGE_INDEX;
-
- if (NULL != path && '/' == *path && '\0' == *++path)
- req.page = PAGE_INDEX;
-
- /* Strip file suffix. */
-
- if (NULL != path && NULL != (p = strrchr(path, '.')))
- if (NULL != p && NULL == strchr(p, '/'))
- *p++ = '\0';
-
- /* Resolve subpath component. */
-
- if (NULL != path && NULL != (subpath = strchr(path, '/')))
- *subpath++ = '\0';
-
- /* Map path into one we recognise. */
-
- if (NULL != path && '\0' != *path)
- for (i = 0; i < (int)PAGE__MAX; i++)
- if (0 == strcmp(pages[i], path)) {
- req.page = (enum page)i;
- break;
- }
-
- /* Route pages. */
-
- switch (req.page) {
- case (PAGE_INDEX):
- pg_index(&req, subpath);
- break;
- case (PAGE_SEARCH):
- pg_search(&req, subpath);
- break;
- case (PAGE_SHOW):
- pg_show(&req, subpath);
- break;
- default:
- resp_error404(path);
- break;
+ if ( ! (NULL == req.q.manpath ||
+ validate_manpath(&req, req.q.manpath))) {
+ pg_error_badrequest(
+ "You specified an invalid manpath.");
+ return(EXIT_FAILURE);
}
- for (i = 0; i < (int)req.psz; i++) {
- free(req.p[i].path);
- free(req.p[i].name);
+ if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) {
+ pg_error_badrequest(
+ "You specified an invalid architecture.");
+ return(EXIT_FAILURE);
}
- free(req.p);
- return(EXIT_SUCCESS);
-}
-
-static int
-cmp(const void *p1, const void *p2)
-{
-
- return(strcasecmp(((const struct res *)p1)->title,
- ((const struct res *)p2)->title));
-}
+ /* Dispatch to the three different pages. */
-/*
- * Check to see if an "etc" path consists of a catman.conf file. If it
- * does, that means that the path contains a tree created by catman(8)
- * and should be used for indexing.
- */
-static int
-pathstop(DIR *dir)
-{
- struct dirent *d;
-#if defined(__sun)
- struct stat sb;
-#endif
+ path = getenv("PATH_INFO");
+ if (NULL == path)
+ path = "";
+ else if ('/' == *path)
+ path++;
- while (NULL != (d = readdir(dir))) {
-#if defined(__sun)
- stat(d->d_name, &sb);
- if (S_IFREG & sb.st_mode)
-#else
- if (DT_REG == d->d_type)
-#endif
- if (0 == strcmp(d->d_name, "catman.conf"))
- return(1);
- }
-
- return(0);
+ if ('\0' != *path)
+ pg_show(&req, path);
+ else if (NULL != req.q.query)
+ pg_search(&req);
+ else
+ pg_index(&req);
+
+ free(req.q.manpath);
+ free(req.q.arch);
+ free(req.q.sec);
+ free(req.q.query);
+ for (i = 0; i < (int)req.psz; i++)
+ free(req.p[i]);
+ free(req.p);
+ return(EXIT_SUCCESS);
}
/*
* Scan for indexable paths.
- * This adds all paths with "etc/catman.conf" to the buffer.
*/
static void
-pathgen(DIR *dir, char *path, struct req *req)
+pathgen(struct req *req)
{
- struct dirent *d;
- char *cp;
- DIR *cd;
- int rc;
- size_t sz, ssz;
-#if defined(__sun)
- struct stat sb;
-#endif
-
- sz = strlcat(path, "/", PATH_MAX);
- if (sz >= PATH_MAX) {
- fprintf(stderr, "%s: Path too long", path);
- return;
- }
-
- /*
- * First, scan for the "etc" directory.
- * If it's found, then see if it should cause us to stop. This
- * happens when a catman.conf is found in the directory.
- */
-
- rc = 0;
- while (0 == rc && NULL != (d = readdir(dir))) {
-#if defined(__sun)
- stat(d->d_name, &sb);
- if (!(S_IFDIR & sb.st_mode)
-#else
- if (DT_DIR != d->d_type
-#endif
- || strcmp(d->d_name, "etc"))
- continue;
-
- path[(int)sz] = '\0';
- ssz = strlcat(path, d->d_name, PATH_MAX);
-
- if (ssz >= PATH_MAX) {
- fprintf(stderr, "%s: Path too long", path);
- return;
- } else if (NULL == (cd = opendir(path))) {
- perror(path);
- return;
- }
-
- rc = pathstop(cd);
- closedir(cd);
+ FILE *fp;
+ char *dp;
+ size_t dpsz;
+
+ if (NULL == (fp = fopen("manpath.conf", "r"))) {
+ fprintf(stderr, "%s/manpath.conf: %s\n",
+ MAN_DIR, strerror(errno));
+ pg_error_internal();
+ exit(EXIT_FAILURE);
}
- if (rc > 0) {
- /* This also strips the trailing slash. */
- path[(int)--sz] = '\0';
- req->p = mandoc_realloc
- (req->p,
- (req->psz + 1) * sizeof(struct paths));
- /*
- * Strip out the leading "./" unless we're just a ".",
- * in which case use an empty string as our name.
- */
- req->p[(int)req->psz].path = mandoc_strdup(path);
- req->p[(int)req->psz].name =
- cp = mandoc_strdup(path + (1 == sz ? 1 : 2));
- req->psz++;
- /*
- * The name is just the path with all the slashes taken
- * out of it. Simple but effective.
- */
- for ( ; '\0' != *cp; cp++)
- if ('/' == *cp)
- *cp = ' ';
- return;
- }
-
- /*
- * If no etc/catman.conf was found, recursively enter child
- * directory and continue scanning.
- */
-
- rewinddir(dir);
- while (NULL != (d = readdir(dir))) {
-#if defined(__sun)
- stat(d->d_name, &sb);
- if (!(S_IFDIR & sb.st_mode)
-#else
- if (DT_DIR != d->d_type
-#endif
- || '.' == d->d_name[0])
- continue;
-
- path[(int)sz] = '\0';
- ssz = strlcat(path, d->d_name, PATH_MAX);
-
- if (ssz >= PATH_MAX) {
- fprintf(stderr, "%s: Path too long", path);
- return;
- } else if (NULL == (cd = opendir(path))) {
- perror(path);
- return;
+ while (NULL != (dp = fgetln(fp, &dpsz))) {
+ if ('\n' == dp[dpsz - 1])
+ dpsz--;
+ req->p = mandoc_realloc(req->p,
+ (req->psz + 1) * sizeof(char *));
+ dp = mandoc_strndup(dp, dpsz);
+ if ( ! validate_urifrag(dp)) {
+ fprintf(stderr, "%s/manpath.conf contains "
+ "unsafe path \"%s\"\n", MAN_DIR, dp);
+ pg_error_internal();
+ exit(EXIT_FAILURE);
+ }
+ if (NULL != strchr(dp, '/')) {
+ fprintf(stderr, "%s/manpath.conf contains "
+ "path with slash \"%s\"\n", MAN_DIR, dp);
+ pg_error_internal();
+ exit(EXIT_FAILURE);
}
+ req->p[req->psz++] = dp;
+ }
- pathgen(cd, path, req);
- closedir(cd);
+ if ( req->p == NULL ) {
+ fprintf(stderr, "%s/manpath.conf is empty\n", MAN_DIR);
+ pg_error_internal();
+ exit(EXIT_FAILURE);
}
}
diff --git a/cgi.h.example b/cgi.h.example
new file mode 100644
index 000000000000..f4c783186751
--- /dev/null
+++ b/cgi.h.example
@@ -0,0 +1,9 @@
+/* Example compile-time configuration file for man.cgi(8). */
+
+#define HTTP_HOST "mdocml.bsd.lv"
+#define MAN_DIR "/var/www/man"
+#define CSS_DIR ""
+#define CUSTOMIZE_TITLE "Manual pages with mandoc"
+#define CUSTOMIZE_BEGIN "<H2>\nManual pages with " \
+ "<A HREF=\"http://mdocml.bsd.lv/\">mandoc</A>\n</H2>"
+#define COMPAT_OLDURI Yes
diff --git a/chars.c b/chars.c
index 3ad1f57471c6..d758d0ccbd1b 100644
--- a/chars.c
+++ b/chars.c
@@ -1,4 +1,4 @@
-/* $Id: chars.c,v 1.54 2013/06/20 22:39:30 schwarze Exp $ */
+/* $Id: chars.c,v 1.58 2014/07/23 15:00:08 schwarze Exp $ */
/*
* Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
@@ -25,6 +25,7 @@
#include <string.h>
#include "mandoc.h"
+#include "mandoc_aux.h"
#include "libmandoc.h"
#define PRINT_HI 126
@@ -37,7 +38,7 @@ struct ln {
int unicode;
};
-#define LINES_MAX 329
+#define LINES_MAX 330
#define CHAR(in, ch, code) \
{ NULL, (in), (ch), (code) },
@@ -51,9 +52,10 @@ struct mchars {
struct ln **htab;
};
-static const struct ln *find(const struct mchars *,
+static const struct ln *find(const struct mchars *,
const char *, size_t);
+
void
mchars_free(struct mchars *arg)
{
@@ -110,27 +112,38 @@ mchars_spec2cp(const struct mchars *arg, const char *p, size_t sz)
char
mchars_num2char(const char *p, size_t sz)
{
- int i;
+ int i;
if ((i = mandoc_strntoi(p, sz, 10)) < 0)
return('\0');
- return(i > 0 && i < 256 && isprint(i) ?
- /* LINTED */ i : '\0');
+
+ return(i > 0 && i < 256 && isprint(i) ? i : '\0');
}
int
mchars_num2uc(const char *p, size_t sz)
{
- int i;
+ int i;
if ((i = mandoc_strntoi(p, sz, 16)) < 0)
return('\0');
- /* FIXME: make sure we're not in a bogus range. */
+
+ /*
+ * Security warning:
+ * Never extend the range of accepted characters
+ * to overlap with the ASCII range, 0x00-0x7F
+ * without re-auditing the callers of this function.
+ * Some callers might relay on the fact that we never
+ * return ASCII characters for their escaping decisions.
+ *
+ * XXX Code is missing here to exclude bogus ranges.
+ */
+
return(i > 0x80 && i <= 0x10FFFF ? i : '\0');
}
const char *
-mchars_spec2str(const struct mchars *arg,
+mchars_spec2str(const struct mchars *arg,
const char *p, size_t sz, size_t *rsz)
{
const struct ln *ln;
@@ -159,8 +172,8 @@ find(const struct mchars *tab, const char *p, size_t sz)
hash = (int)p[0] - PRINT_LO;
for (pp = tab->htab[hash]; pp; pp = pp->next)
- if (0 == strncmp(pp->code, p, sz) &&
- '\0' == pp->code[(int)sz])
+ if (0 == strncmp(pp->code, p, sz) &&
+ '\0' == pp->code[(int)sz])
return(pp);
return(NULL);
diff --git a/chars.in b/chars.in
index cc6549e7e5be..098504fa1648 100644
--- a/chars.in
+++ b/chars.in
@@ -1,6 +1,7 @@
-/* $Id: chars.in,v 1.43 2013/06/20 22:39:30 schwarze Exp $ */
+/* $Id: chars.in,v 1.46 2014/04/20 16:46:04 schwarze Exp $ */
/*
* Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ * Copyright (c) 2014 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -16,7 +17,7 @@
*/
/*
- * The ASCII translation tables.
+ * The ASCII translation tables.
*
* The left-hand side corresponds to the input sequence (\x, \(xx, \*(xx
* and so on) whose length is listed second element. The right-hand
@@ -27,39 +28,42 @@
* XXX - update LINES_MAX if adding more!
*/
-/* Non-breaking, non-collapsing space uses unit separator. */
+/* Special break control characters. */
static const char ascii_nbrsp[2] = { ASCII_NBRSP, '\0' };
+static const char ascii_break[2] = { ASCII_BREAK, '\0' };
CHAR_TBL_START
/* Spacing. */
-CHAR("c", "", 0)
-CHAR("0", " ", 8194)
CHAR(" ", ascii_nbrsp, 160)
CHAR("~", ascii_nbrsp, 160)
-CHAR("%", "", 0)
-CHAR("&", "", 0)
-CHAR("^", "", 0)
+CHAR("0", " ", 8194)
CHAR("|", "", 0)
-CHAR("}", "", 0)
+CHAR("^", "", 0)
+CHAR("&", "", 0)
+CHAR("%", "", 0)
+CHAR(":", ascii_break, 0)
+/* XXX The following three do not really belong into this file. */
CHAR("t", "", 0)
+CHAR("c", "", 0)
+CHAR("}", "", 0)
/* Accents. */
-CHAR("a\"", "\"", 779)
+CHAR("a\"", "\"", 733)
CHAR("a-", "-", 175)
CHAR("a.", ".", 729)
-CHAR("a^", "^", 770)
-CHAR("\'", "\'", 769)
-CHAR("aa", "\'", 769)
-CHAR("ga", "`", 768)
-CHAR("`", "`", 768)
-CHAR("ab", "`", 774)
-CHAR("ac", ",", 807)
-CHAR("ad", "\"", 776)
+CHAR("a^", "^", 94)
+CHAR("\'", "\'", 180)
+CHAR("aa", "\'", 180)
+CHAR("ga", "`", 96)
+CHAR("`", "`", 96)
+CHAR("ab", "`", 728)
+CHAR("ac", ",", 184)
+CHAR("ad", "\"", 168)
CHAR("ah", "v", 711)
CHAR("ao", "o", 730)
-CHAR("a~", "~", 771)
-CHAR("ho", ",", 808)
+CHAR("a~", "~", 126)
+CHAR("ho", ",", 731)
CHAR("ha", "^", 94)
CHAR("ti", "~", 126)
diff --git a/compat_ohash.c b/compat_ohash.c
new file mode 100644
index 000000000000..0992b3657dde
--- /dev/null
+++ b/compat_ohash.c
@@ -0,0 +1,339 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef HAVE_OHASH
+
+int dummy;
+
+#else
+
+/* $OpenBSD: ohash.c,v 1.1 2014/06/02 18:52:03 deraadt Exp $ */
+
+/* Copyright (c) 1999, 2004 Marc Espie <espie@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include "compat_ohash.h"
+
+struct _ohash_record {
+ uint32_t hv;
+ const char *p;
+};
+
+#define DELETED ((const char *)h)
+#define NONE (h->size)
+
+/* Don't bother changing the hash table if the change is small enough. */
+#define MINSIZE (1UL << 4)
+#define MINDELETED 4
+
+static void ohash_resize(struct ohash *);
+
+
+/* This handles the common case of variable length keys, where the
+ * key is stored at the end of the record.
+ */
+void *
+ohash_create_entry(struct ohash_info *i, const char *start, const char **end)
+{
+ char *p;
+
+ if (!*end)
+ *end = start + strlen(start);
+ p = (i->alloc)(i->key_offset + (*end - start) + 1, i->data);
+ if (p) {
+ memcpy(p+i->key_offset, start, *end-start);
+ p[i->key_offset + (*end - start)] = '\0';
+ }
+ return (void *)p;
+}
+
+/* hash_delete only frees the hash structure. Use hash_first/hash_next
+ * to free entries as well. */
+void
+ohash_delete(struct ohash *h)
+{
+ (h->info.free)(h->t, h->info.data);
+#ifndef NDEBUG
+ h->t = NULL;
+#endif
+}
+
+static void
+ohash_resize(struct ohash *h)
+{
+ struct _ohash_record *n;
+ size_t ns;
+ unsigned int j;
+ unsigned int i, incr;
+
+ if (4 * h->deleted < h->total) {
+ if (h->size >= (UINT_MAX >> 1U))
+ ns = UINT_MAX;
+ else
+ ns = h->size << 1U;
+ } else if (3 * h->deleted > 2 * h->total)
+ ns = h->size >> 1U;
+ else
+ ns = h->size;
+ if (ns < MINSIZE)
+ ns = MINSIZE;
+#ifdef STATS_HASH
+ STAT_HASH_EXPAND++;
+ STAT_HASH_SIZE += ns - h->size;
+#endif
+
+ n = (h->info.calloc)(ns, sizeof(struct _ohash_record), h->info.data);
+ if (!n)
+ return;
+
+ for (j = 0; j < h->size; j++) {
+ if (h->t[j].p != NULL && h->t[j].p != DELETED) {
+ i = h->t[j].hv % ns;
+ incr = ((h->t[j].hv % (ns - 2)) & ~1) + 1;
+ while (n[i].p != NULL) {
+ i += incr;
+ if (i >= ns)
+ i -= ns;
+ }
+ n[i].hv = h->t[j].hv;
+ n[i].p = h->t[j].p;
+ }
+ }
+ (h->info.free)(h->t, h->info.data);
+ h->t = n;
+ h->size = ns;
+ h->total -= h->deleted;
+ h->deleted = 0;
+}
+
+void *
+ohash_remove(struct ohash *h, unsigned int i)
+{
+ void *result = (void *)h->t[i].p;
+
+ if (result == NULL || result == DELETED)
+ return NULL;
+
+#ifdef STATS_HASH
+ STAT_HASH_ENTRIES--;
+#endif
+ h->t[i].p = DELETED;
+ h->deleted++;
+ if (h->deleted >= MINDELETED && 4 * h->deleted > h->total)
+ ohash_resize(h);
+ return result;
+}
+
+void *
+ohash_find(struct ohash *h, unsigned int i)
+{
+ if (h->t[i].p == DELETED)
+ return NULL;
+ else
+ return (void *)h->t[i].p;
+}
+
+void *
+ohash_insert(struct ohash *h, unsigned int i, void *p)
+{
+#ifdef STATS_HASH
+ STAT_HASH_ENTRIES++;
+#endif
+ if (h->t[i].p == DELETED) {
+ h->deleted--;
+ h->t[i].p = p;
+ } else {
+ h->t[i].p = p;
+ /* Arbitrary resize boundary. Tweak if not efficient enough. */
+ if (++h->total * 4 > h->size * 3)
+ ohash_resize(h);
+ }
+ return p;
+}
+
+unsigned int
+ohash_entries(struct ohash *h)
+{
+ return h->total - h->deleted;
+}
+
+void *
+ohash_first(struct ohash *h, unsigned int *pos)
+{
+ *pos = 0;
+ return ohash_next(h, pos);
+}
+
+void *
+ohash_next(struct ohash *h, unsigned int *pos)
+{
+ for (; *pos < h->size; (*pos)++)
+ if (h->t[*pos].p != DELETED && h->t[*pos].p != NULL)
+ return (void *)h->t[(*pos)++].p;
+ return NULL;
+}
+
+void
+ohash_init(struct ohash *h, unsigned int size, struct ohash_info *info)
+{
+ h->size = 1UL << size;
+ if (h->size < MINSIZE)
+ h->size = MINSIZE;
+#ifdef STATS_HASH
+ STAT_HASH_CREATION++;
+ STAT_HASH_SIZE += h->size;
+#endif
+ /* Copy info so that caller may free it. */
+ h->info.key_offset = info->key_offset;
+ h->info.calloc = info->calloc;
+ h->info.free = info->free;
+ h->info.alloc = info->alloc;
+ h->info.data = info->data;
+ h->t = (h->info.calloc)(h->size, sizeof(struct _ohash_record),
+ h->info.data);
+ h->total = h->deleted = 0;
+}
+
+uint32_t
+ohash_interval(const char *s, const char **e)
+{
+ uint32_t k;
+
+ if (!*e)
+ *e = s + strlen(s);
+ if (s == *e)
+ k = 0;
+ else
+ k = *s++;
+ while (s != *e)
+ k = ((k << 2) | (k >> 30)) ^ *s++;
+ return k;
+}
+
+unsigned int
+ohash_lookup_interval(struct ohash *h, const char *start, const char *end,
+ uint32_t hv)
+{
+ unsigned int i, incr;
+ unsigned int empty;
+
+#ifdef STATS_HASH
+ STAT_HASH_LOOKUP++;
+#endif
+ empty = NONE;
+ i = hv % h->size;
+ incr = ((hv % (h->size-2)) & ~1) + 1;
+ while (h->t[i].p != NULL) {
+#ifdef STATS_HASH
+ STAT_HASH_LENGTH++;
+#endif
+ if (h->t[i].p == DELETED) {
+ if (empty == NONE)
+ empty = i;
+ } else if (h->t[i].hv == hv &&
+ strncmp(h->t[i].p+h->info.key_offset, start,
+ end - start) == 0 &&
+ (h->t[i].p+h->info.key_offset)[end-start] == '\0') {
+ if (empty != NONE) {
+ h->t[empty].hv = hv;
+ h->t[empty].p = h->t[i].p;
+ h->t[i].p = DELETED;
+ return empty;
+ } else {
+#ifdef STATS_HASH
+ STAT_HASH_POSITIVE++;
+#endif
+ return i;
+ }
+ }
+ i += incr;
+ if (i >= h->size)
+ i -= h->size;
+ }
+
+ /* Found an empty position. */
+ if (empty != NONE)
+ i = empty;
+ h->t[i].hv = hv;
+ return i;
+}
+
+unsigned int
+ohash_lookup_memory(struct ohash *h, const char *k, size_t size, uint32_t hv)
+{
+ unsigned int i, incr;
+ unsigned int empty;
+
+#ifdef STATS_HASH
+ STAT_HASH_LOOKUP++;
+#endif
+ empty = NONE;
+ i = hv % h->size;
+ incr = ((hv % (h->size-2)) & ~1) + 1;
+ while (h->t[i].p != NULL) {
+#ifdef STATS_HASH
+ STAT_HASH_LENGTH++;
+#endif
+ if (h->t[i].p == DELETED) {
+ if (empty == NONE)
+ empty = i;
+ } else if (h->t[i].hv == hv &&
+ memcmp(h->t[i].p+h->info.key_offset, k, size) == 0) {
+ if (empty != NONE) {
+ h->t[empty].hv = hv;
+ h->t[empty].p = h->t[i].p;
+ h->t[i].p = DELETED;
+ return empty;
+ } else {
+#ifdef STATS_HASH
+ STAT_HASH_POSITIVE++;
+#endif
+ } return i;
+ }
+ i += incr;
+ if (i >= h->size)
+ i -= h->size;
+ }
+
+ /* Found an empty position. */
+ if (empty != NONE)
+ i = empty;
+ h->t[i].hv = hv;
+ return i;
+}
+
+unsigned int
+ohash_qlookup(struct ohash *h, const char *s)
+{
+ const char *e = NULL;
+ return ohash_qlookupi(h, s, &e);
+}
+
+unsigned int
+ohash_qlookupi(struct ohash *h, const char *s, const char **e)
+{
+ uint32_t hv;
+
+ hv = ohash_interval(s, e);
+ return ohash_lookup_interval(h, s, *e, hv);
+}
+
+#endif /*!HAVE_OHASH*/
diff --git a/compat_ohash.h b/compat_ohash.h
new file mode 100644
index 000000000000..e3124c96b124
--- /dev/null
+++ b/compat_ohash.h
@@ -0,0 +1,73 @@
+/* $OpenBSD: ohash.h,v 1.2 2014/06/02 18:52:03 deraadt Exp $ */
+
+/* Copyright (c) 1999, 2004 Marc Espie <espie@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef OHASH_H
+#define OHASH_H
+
+/* Open hashing support.
+ * Open hashing was chosen because it is much lighter than other hash
+ * techniques, and more efficient in most cases.
+ */
+
+/* user-visible data structure */
+struct ohash_info {
+ ptrdiff_t key_offset;
+ void *data; /* user data */
+ void *(*calloc)(size_t, size_t, void *);
+ void (*free)(void *, void *);
+ void *(*alloc)(size_t, void *);
+};
+
+struct _ohash_record;
+
+/* private structure. It's there just so you can do a sizeof */
+struct ohash {
+ struct _ohash_record *t;
+ struct ohash_info info;
+ unsigned int size;
+ unsigned int total;
+ unsigned int deleted;
+};
+
+/* For this to be tweakable, we use small primitives, and leave part of the
+ * logic to the client application. e.g., hashing is left to the client
+ * application. We also provide a simple table entry lookup that yields
+ * a hashing table index (opaque) to be used in find/insert/remove.
+ * The keys are stored at a known position in the client data.
+ */
+__BEGIN_DECLS
+void ohash_init(struct ohash *, unsigned, struct ohash_info *);
+void ohash_delete(struct ohash *);
+
+unsigned int ohash_lookup_interval(struct ohash *, const char *,
+ const char *, uint32_t);
+unsigned int ohash_lookup_memory(struct ohash *, const char *,
+ size_t, uint32_t);
+void *ohash_find(struct ohash *, unsigned int);
+void *ohash_remove(struct ohash *, unsigned int);
+void *ohash_insert(struct ohash *, unsigned int, void *);
+void *ohash_first(struct ohash *, unsigned int *);
+void *ohash_next(struct ohash *, unsigned int *);
+unsigned int ohash_entries(struct ohash *);
+
+void *ohash_create_entry(struct ohash_info *, const char *, const char **);
+uint32_t ohash_interval(const char *, const char **);
+
+unsigned int ohash_qlookupi(struct ohash *, const char *, const char **);
+unsigned int ohash_qlookup(struct ohash *, const char *);
+__END_DECLS
+#endif
diff --git a/compat_reallocarray.c b/compat_reallocarray.c
new file mode 100644
index 000000000000..e25d8374bd53
--- /dev/null
+++ b/compat_reallocarray.c
@@ -0,0 +1,45 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef HAVE_REALLOCARRAY
+
+int dummy;
+
+#else
+
+/* $OpenBSD: malloc.c,v 1.158 2014/04/23 15:07:27 tedu Exp $ */
+/*
+ * Copyright (c) 2008 Otto Moerbeek <otto@drijf.net>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <sys/types.h>
+#include <errno.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#define MUL_NO_OVERFLOW (1UL << (sizeof(size_t) * 4))
+
+void *
+reallocarray(void *optr, size_t nmemb, size_t size)
+{
+ if ((nmemb >= MUL_NO_OVERFLOW || size >= MUL_NO_OVERFLOW) &&
+ nmemb > 0 && SIZE_MAX / nmemb < size) {
+ errno = ENOMEM;
+ return NULL;
+ }
+ return realloc(optr, size * nmemb);
+}
+
+#endif /*!HAVE_REALLOCARRAY*/
diff --git a/compat_sqlite3_errstr.c b/compat_sqlite3_errstr.c
new file mode 100644
index 000000000000..b8d6eb58f1cc
--- /dev/null
+++ b/compat_sqlite3_errstr.c
@@ -0,0 +1,18 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef HAVE_SQLITE3_ERRSTR
+
+int dummy;
+
+#else
+
+const char *
+sqlite3_errstr(int rc)
+{
+
+ return(rc ? "unknown error" : "not an error");
+}
+
+#endif
diff --git a/compat_strcasestr.c b/compat_strcasestr.c
new file mode 100644
index 000000000000..5216d0215753
--- /dev/null
+++ b/compat_strcasestr.c
@@ -0,0 +1,74 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef HAVE_STRCASESTR
+
+int dummy;
+
+#else
+
+/* ($)NetBSD: strcasestr.c,v 1.2 2005/02/09 21:35:47 kleink Exp $ */
+
+/*-
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Chris Torek.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <ctype.h>
+#include <string.h>
+
+#define __UNCONST(a) ((void *)(unsigned long)(const void *)(a))
+
+/*
+ * Find the first occurrence of find in s, ignore case.
+ */
+char *
+strcasestr(const char *s, const char *find)
+{
+ char c, sc;
+ size_t len;
+
+ if ((c = *find++) != 0) {
+ c = tolower((unsigned char)c);
+ len = strlen(find);
+ do {
+ do {
+ if ((sc = *s++) == 0)
+ return (NULL);
+ } while ((char)tolower((unsigned char)sc) != c);
+ } while (strncasecmp(s, find, len) != 0);
+ s--;
+ }
+ return __UNCONST(s);
+}
+
+#endif
diff --git a/compat_strsep.c b/compat_strsep.c
new file mode 100644
index 000000000000..a5c58c625326
--- /dev/null
+++ b/compat_strsep.c
@@ -0,0 +1,80 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef HAVE_STRSEP
+
+int dummy;
+
+#else
+
+/* ($)OpenBSD: strsep.c,v 1.6 2005/08/08 08:05:37 espie Exp $ */
+
+/*-
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Get next token from string *stringp, where tokens are possibly-empty
+ * strings separated by characters from delim.
+ *
+ * Writes NULs into the string at *stringp to end tokens.
+ * delim need not remain constant from call to call.
+ * On return, *stringp points past the last NUL written (if there might
+ * be further tokens), or is NULL (if there are definitely no more tokens).
+ *
+ * If *stringp is NULL, strsep returns NULL.
+ */
+char *
+strsep(char **stringp, const char *delim)
+{
+ char *s;
+ const char *spanp;
+ int c, sc;
+ char *tok;
+
+ if ((s = *stringp) == NULL)
+ return (NULL);
+ for (tok = s;;) {
+ c = *s++;
+ spanp = delim;
+ do {
+ if ((sc = *spanp++) == c) {
+ if (c == 0)
+ s = NULL;
+ else
+ s[-1] = 0;
+ *stringp = s;
+ return (tok);
+ }
+ } while (sc != 0);
+ }
+ /* NOTREACHED */
+}
+
+#endif
diff --git a/config.h.post b/config.h.post
index 9a33671b568e..e95f5f5311d4 100644
--- a/config.h.post
+++ b/config.h.post
@@ -1,5 +1,3 @@
-#include <sys/types.h>
-
#if !defined(__BEGIN_DECLS)
# ifdef __cplusplus
# define __BEGIN_DECLS extern "C" {
@@ -15,30 +13,30 @@
# endif
#endif
-#ifndef HAVE_BETOH64
-# if defined(__APPLE__)
-# define betoh64(x) OSSwapBigToHostInt64(x)
-# define htobe64(x) OSSwapHostToBigInt64(x)
-# elif defined(__sun)
-# define betoh64(x) BE_64(x)
-# define htobe64(x) BE_64(x)
-# else
-# define betoh64(x) be64toh(x)
-# endif
+#ifndef HAVE_FGETLN
+extern char *fgetln(FILE *, size_t *);
+#endif
+#ifndef HAVE_GETSUBOPT
+extern int getsubopt(char **, char * const *, char **);
+extern char *suboptarg;
+#endif
+#ifndef HAVE_REALLOCARRAY
+extern void *reallocarray(void *, size_t, size_t);
+#endif
+#ifndef HAVE_SQLITE3_ERRSTR
+extern const char *sqlite3_errstr(int);
+#endif
+#ifndef HAVE_STRCASESTR
+extern char *strcasestr(const char *, const char *);
#endif
-
#ifndef HAVE_STRLCAT
extern size_t strlcat(char *, const char *, size_t);
#endif
#ifndef HAVE_STRLCPY
extern size_t strlcpy(char *, const char *, size_t);
#endif
-#ifndef HAVE_GETSUBOPT
-extern int getsubopt(char **, char * const *, char **);
-extern char *suboptarg;
-#endif
-#ifndef HAVE_FGETLN
-extern char *fgetln(FILE *, size_t *);
+#ifndef HAVE_STRSEP
+extern char *strsep(char **, const char *);
#endif
#endif /* MANDOC_CONFIG_H */
diff --git a/config.h.pre b/config.h.pre
index bc594784856c..1c3940de5cc0 100644
--- a/config.h.pre
+++ b/config.h.pre
@@ -2,7 +2,8 @@
#define MANDOC_CONFIG_H
#if defined(__linux__) || defined(__MINT__)
-# define _GNU_SOURCE /* strptime(), getsubopt() */
+# define _GNU_SOURCE /* getsubopt(), strcasestr(), strptime() */
#endif
+#include <sys/types.h>
#include <stdio.h>
diff --git a/configure b/configure
new file mode 100755
index 000000000000..5b987ebef60d
--- /dev/null
+++ b/configure
@@ -0,0 +1,49 @@
+#!/bin/sh
+#
+# Copyright (c) 2014 Ingo Schwarze <schwarze@openbsd.org>
+#
+# Permission to use, copy, modify, and distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+echo "/* RUNNING ./CONFIGURE - SHOULD BE USED ONLY VIA MAKE, READ INSTALL */"
+
+set -e
+exec > config.h 2> config.log
+
+CFLAGS="${CFLAGS} -Wno-unused -Werror"
+
+runtest() {
+ echo ${CC} ${CFLAGS} ${3} -o test-${1} test-${1}.c 1>&2
+ ${CC} ${CFLAGS} ${3} -o "test-${1}" "test-${1}.c" 1>&2 || return 0
+ "./test-${1}" && echo "#define HAVE_${2}" \
+ || echo FAILURE: test-${1} returned $? 1>&2
+ rm "test-${1}"
+}
+
+cat config.h.pre
+echo
+echo "#define VERSION \"${VERSION}\""
+runtest fgetln FGETLN
+runtest getsubopt GETSUBOPT
+runtest mmap MMAP
+runtest ohash OHASH "${DBLIB}"
+runtest reallocarray REALLOCARRAY
+runtest sqlite3_errstr SQLITE3_ERRSTR "${DBLIB}"
+runtest strcasestr STRCASESTR
+runtest strlcat STRLCAT
+runtest strlcpy STRLCPY
+runtest strptime STRPTIME
+runtest strsep STRSEP
+echo
+cat config.h.post
+
+exit 0
diff --git a/demandoc.c b/demandoc.c
index aad42085b185..4a7b979e9225 100644
--- a/demandoc.c
+++ b/demandoc.c
@@ -1,4 +1,4 @@
-/* $Id: demandoc.c,v 1.7 2012/05/31 22:27:14 schwarze Exp $ */
+/* $Id: demandoc.c,v 1.10 2014/03/19 22:20:43 schwarze Exp $ */
/*
* Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
*
@@ -76,7 +76,7 @@ main(int argc, char *argv[])
argc -= optind;
argv += optind;
- mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL, NULL);
+ mp = mparse_alloc(MPARSE_SO, MANDOCLEVEL_FATAL, NULL, NULL);
assert(mp);
if (0 == argc)
@@ -110,7 +110,7 @@ pmandoc(struct mparse *mp, int fd, const char *fn, int list)
return;
}
- mparse_result(mp, &mdoc, &man);
+ mparse_result(mp, &mdoc, &man, NULL);
line = 1;
col = 0;
diff --git a/eqn.c b/eqn.c
index 37f01bcb5b6e..cda0db5d26e4 100644
--- a/eqn.c
+++ b/eqn.c
@@ -1,4 +1,4 @@
-/* $Id: eqn.c,v 1.38 2011/07/25 15:37:00 kristaps Exp $ */
+/* $Id: eqn.c,v 1.44 2014/07/06 19:09:00 schwarze Exp $ */
/*
* Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
*
@@ -26,6 +26,7 @@
#include <time.h>
#include "mandoc.h"
+#include "mandoc_aux.h"
#include "libmandoc.h"
#include "libroff.h"
@@ -137,12 +138,11 @@ struct eqnsym {
const char *sym;
};
-
static enum eqn_rest eqn_box(struct eqn_node *, struct eqn_box *);
-static struct eqn_box *eqn_box_alloc(struct eqn_node *,
+static struct eqn_box *eqn_box_alloc(struct eqn_node *,
struct eqn_box *);
static void eqn_box_free(struct eqn_box *);
-static struct eqn_def *eqn_def_find(struct eqn_node *,
+static struct eqn_def *eqn_def_find(struct eqn_node *,
const char *, size_t);
static int eqn_do_gfont(struct eqn_node *);
static int eqn_do_gsize(struct eqn_node *);
@@ -156,7 +156,7 @@ static enum eqn_rest eqn_list(struct eqn_node *, struct eqn_box *);
static enum eqn_rest eqn_matrix(struct eqn_node *, struct eqn_box *);
static const char *eqn_nexttok(struct eqn_node *, size_t *);
static const char *eqn_nextrawtok(struct eqn_node *, size_t *);
-static const char *eqn_next(struct eqn_node *,
+static const char *eqn_next(struct eqn_node *,
char, size_t *, int);
static void eqn_rewind(struct eqn_node *);
@@ -277,9 +277,9 @@ static const struct eqnsym eqnsyms[EQNSYM__MAX] = {
{ { ">=", 2 }, ">=" }, /* EQNSYM_moreequal */
};
-/* ARGSUSED */
+
enum rofferr
-eqn_read(struct eqn_node **epp, int ln,
+eqn_read(struct eqn_node **epp, int ln,
const char *p, int pos, int *offs)
{
size_t sz;
@@ -298,9 +298,10 @@ eqn_read(struct eqn_node **epp, int ln,
p += 3;
while (' ' == *p || '\t' == *p)
p++;
- if ('\0' == *p)
+ if ('\0' == *p)
return(er);
- mandoc_msg(MANDOCERR_ARGSLOST, ep->parse, ln, pos, NULL);
+ mandoc_vmsg(MANDOCERR_ARG_SKIP, ep->parse,
+ ln, pos, "EN %s", p);
return(er);
}
@@ -413,11 +414,11 @@ eqn_matrix(struct eqn_node *ep, struct eqn_box *last)
while (EQN_OK == (c = eqn_box(ep, bp)))
switch (bp->last->pile) {
- case (EQNPILE_LCOL):
+ case EQNPILE_LCOL:
/* FALLTHROUGH */
- case (EQNPILE_CCOL):
+ case EQNPILE_CCOL:
/* FALLTHROUGH */
- case (EQNPILE_RCOL):
+ case EQNPILE_RCOL:
continue;
default:
EQN_MSG(MANDOCERR_EQNSYNT, ep);
@@ -512,9 +513,8 @@ eqn_box(struct eqn_node *ep, struct eqn_box *last)
for (i = 0; i < (int)EQN__MAX; i++) {
if ( ! EQNSTREQ(&eqnparts[i].str, start, sz))
continue;
- return((*eqnparts[i].fp)(ep) ?
- EQN_OK : EQN_ERR);
- }
+ return((*eqnparts[i].fp)(ep) ? EQN_OK : EQN_ERR);
+ }
if (STRNEQ(start, sz, "{", 1)) {
if (EQN_DESCOPE != (c = eqn_eqn(ep, last))) {
@@ -529,7 +529,7 @@ eqn_box(struct eqn_node *ep, struct eqn_box *last)
return(EQN_OK);
EQN_MSG(MANDOCERR_EQNBADSCOPE, ep);
return(EQN_ERR);
- }
+ }
for (i = 0; i < (int)EQNPILE__MAX; i++) {
if ( ! EQNSTREQ(&eqnpiles[i], start, sz))
@@ -575,7 +575,7 @@ eqn_box(struct eqn_node *ep, struct eqn_box *last)
if (NULL == last->last) {
EQN_MSG(MANDOCERR_EQNSYNT, ep);
return(EQN_ERR);
- }
+ }
last->last->pos = (enum eqn_post)i;
if (EQN_EOF == (c = eqn_box(ep, last))) {
EQN_MSG(MANDOCERR_EQNEOF, ep);
@@ -590,7 +590,7 @@ eqn_box(struct eqn_node *ep, struct eqn_box *last)
if (NULL == last->last) {
EQN_MSG(MANDOCERR_EQNSYNT, ep);
return(EQN_ERR);
- }
+ }
last->last->mark = (enum eqn_markt)i;
if (EQN_EOF == (c = eqn_box(ep, last))) {
EQN_MSG(MANDOCERR_EQNEOF, ep);
@@ -629,7 +629,7 @@ eqn_box(struct eqn_node *ep, struct eqn_box *last)
for (i = 0; i < (int)EQNSYM__MAX; i++)
if (EQNSTREQ(&eqnsyms[i].str, start, sz)) {
sym[63] = '\0';
- snprintf(sym, 62, "\\[%s]", eqnsyms[i].sym);
+ (void)snprintf(sym, 62, "\\[%s]", eqnsyms[i].sym);
bp->text = mandoc_strdup(sym);
return(EQN_OK);
}
@@ -762,13 +762,13 @@ again:
if (q)
ep->cur++;
while (' ' == ep->data[(int)ep->cur] ||
- '\t' == ep->data[(int)ep->cur] ||
- '^' == ep->data[(int)ep->cur] ||
- '~' == ep->data[(int)ep->cur])
+ '\t' == ep->data[(int)ep->cur] ||
+ '^' == ep->data[(int)ep->cur] ||
+ '~' == ep->data[(int)ep->cur])
ep->cur++;
} else {
if (q)
- EQN_MSG(MANDOCERR_BADQUOTE, ep);
+ EQN_MSG(MANDOCERR_ARG_QUOTE, ep);
next = strchr(start, '\0');
*sz = (size_t)(next - start);
ep->cur += *sz;
@@ -790,8 +790,8 @@ again:
}
diff = def->valsz - *sz;
- memmove(start + *sz + diff, start + *sz,
- (strlen(start) - *sz) + 1);
+ memmove(start + *sz + diff, start + *sz,
+ (strlen(start) - *sz) + 1);
memcpy(start, def->val, def->valsz);
goto again;
}
@@ -852,8 +852,8 @@ eqn_do_define(struct eqn_node *ep)
return(0);
}
- /*
- * Search for a key that already exists.
+ /*
+ * Search for a key that already exists.
* Create a new key if none is found.
*/
@@ -865,15 +865,14 @@ eqn_do_define(struct eqn_node *ep)
if (i == (int)ep->defsz) {
ep->defsz++;
- ep->defs = mandoc_realloc
- (ep->defs, ep->defsz *
- sizeof(struct eqn_def));
+ ep->defs = mandoc_reallocarray(ep->defs,
+ ep->defsz, sizeof(struct eqn_def));
ep->defs[i].key = ep->defs[i].val = NULL;
}
ep->defs[i].keysz = sz;
- ep->defs[i].key = mandoc_realloc
- (ep->defs[i].key, sz + 1);
+ ep->defs[i].key = mandoc_realloc(
+ ep->defs[i].key, sz + 1);
memcpy(ep->defs[i].key, start, sz);
ep->defs[i].key[(int)sz] = '\0';
@@ -901,7 +900,7 @@ eqn_do_gfont(struct eqn_node *ep)
if (NULL == eqn_nextrawtok(ep, NULL)) {
EQN_MSG(MANDOCERR_EQNEOF, ep);
return(0);
- }
+ }
return(1);
}
@@ -914,7 +913,7 @@ eqn_do_gsize(struct eqn_node *ep)
if (NULL == (start = eqn_nextrawtok(ep, &sz))) {
EQN_MSG(MANDOCERR_EQNEOF, ep);
return(0);
- }
+ }
ep->gsize = mandoc_strntoi(start, sz, 10);
return(1);
}
@@ -940,9 +939,9 @@ eqn_def_find(struct eqn_node *ep, const char *key, size_t sz)
{
int i;
- for (i = 0; i < (int)ep->defsz; i++)
- if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key,
- ep->defs[i].keysz, key, sz))
+ for (i = 0; i < (int)ep->defsz; i++)
+ if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key,
+ ep->defs[i].keysz, key, sz))
return(&ep->defs[i]);
return(NULL);
diff --git a/eqn_html.c b/eqn_html.c
index 80c82f1de5b5..3e58ab5880b5 100644
--- a/eqn_html.c
+++ b/eqn_html.c
@@ -1,4 +1,4 @@
-/* $Id: eqn_html.c,v 1.2 2011/07/24 10:09:03 kristaps Exp $ */
+/* $Id: eqn_html.c,v 1.3 2014/04/20 16:46:04 schwarze Exp $ */
/*
* Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
*
@@ -35,9 +35,9 @@ static const enum htmltag fontmap[EQNFONT__MAX] = {
TAG_I /* EQNFONT_ITALIC */
};
-
static void eqn_box(struct html *, const struct eqn_box *);
+
void
print_eqn(struct html *p, const struct eqn *ep)
{
@@ -59,12 +59,12 @@ eqn_box(struct html *p, const struct eqn_box *bp)
{
struct tag *t;
- t = EQNFONT_NONE == bp->font ? NULL :
- print_otag(p, fontmap[(int)bp->font], 0, NULL);
+ t = EQNFONT_NONE == bp->font ? NULL :
+ print_otag(p, fontmap[(int)bp->font], 0, NULL);
if (bp->left)
print_text(p, bp->left);
-
+
if (bp->text)
print_text(p, bp->text);
diff --git a/eqn_term.c b/eqn_term.c
index cfbd8d48f807..889c5c6586f0 100644
--- a/eqn_term.c
+++ b/eqn_term.c
@@ -1,4 +1,4 @@
-/* $Id: eqn_term.c,v 1.4 2011/07/24 10:09:03 kristaps Exp $ */
+/* $Id: eqn_term.c,v 1.5 2014/04/20 16:46:04 schwarze Exp $ */
/*
* Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
*
@@ -37,6 +37,7 @@ static const enum termfont fontmap[EQNFONT__MAX] = {
static void eqn_box(struct termp *, const struct eqn_box *);
+
void
term_eqn(struct termp *p, const struct eqn *ep)
{
@@ -68,7 +69,7 @@ eqn_box(struct termp *p, const struct eqn_box *bp)
term_word(p, ")");
if (bp->right)
term_word(p, bp->right);
- if (EQNFONT_NONE != bp->font)
+ if (EQNFONT_NONE != bp->font)
term_fontpop(p);
if (bp->next)
diff --git a/external.png b/external.png
deleted file mode 100644
index 419c06fb960b..000000000000
--- a/external.png
+++ /dev/null
Binary files differ
diff --git a/gmdiff b/gmdiff
index a5bca9d31c9a..2c7ba4b343ca 100644
--- a/gmdiff
+++ b/gmdiff
@@ -1,5 +1,5 @@
#!/bin/sh
-# Copyright (c) 2013 Ingo Schwarze <schwarze@openbsd.org>
+# Copyright (c) 2013, 2014 Ingo Schwarze <schwarze@openbsd.org>
#
# Permission to use, copy, modify, and distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
@@ -27,8 +27,11 @@ while [ -n "$1" ]; do
file=$1
shift
echo " ========== $file ========== "
- tbl $file | groff -mandoc -Tascii -P -c 2>&1 > /tmp/groff.out
- mandoc -Ios='OpenBSD ports' -Werror $file 2>&1 > /tmp/mandoc.out
+ tbl $file | groff -mandoc -Tascii -P -c 2> /tmp/groff.err > /tmp/groff.out
+ mandoc -Ios='OpenBSD ports' -Werror $file 2> /tmp/mandoc.err > /tmp/mandoc.out
+ for i in groff mandoc; do
+ [[ -s /tmp/$i.err ]] && echo "$i errors:" && cat /tmp/$i.err
+ done
diff -au /tmp/groff.out /tmp/mandoc.out 2>&1
done
diff --git a/html.c b/html.c
index 9d28b4270e4c..d4783ee06fdc 100644
--- a/html.c
+++ b/html.c
@@ -1,7 +1,7 @@
-/* $Id: html.c,v 1.152 2013/08/08 20:07:47 schwarze Exp $ */
+/* $Id: html.c,v 1.159 2014/07/23 15:00:08 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2011, 2012, 2013 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2011, 2012, 2013, 2014 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -31,6 +31,7 @@
#include <unistd.h>
#include "mandoc.h"
+#include "mandoc_aux.h"
#include "libmandoc.h"
#include "out.h"
#include "html.h"
@@ -109,11 +110,13 @@ static const char *const roffscales[SCALE_MAX] = {
static void bufncat(struct html *, const char *, size_t);
static void print_ctag(struct html *, enum htmltag);
+static int print_escape(char);
static int print_encode(struct html *, const char *, int);
static void print_metaf(struct html *, enum mandoc_esc);
static void print_attr(struct html *, const char *, const char *);
static void *ml_alloc(char *, enum htmltype);
+
static void *
ml_alloc(char *outopts, enum htmltype type)
{
@@ -135,16 +138,16 @@ ml_alloc(char *outopts, enum htmltype type)
while (outopts && *outopts)
switch (getsubopt(&outopts, UNCONST(toks), &v)) {
- case (0):
+ case 0:
h->style = v;
break;
- case (1):
+ case 1:
h->base_man = v;
break;
- case (2):
+ case 2:
h->base_includes = v;
break;
- case (3):
+ case 3:
h->oflags |= HTML_FRAGMENT;
break;
default:
@@ -161,7 +164,6 @@ html_alloc(char *outopts)
return(ml_alloc(outopts, HTML_HTML_4_01_STRICT));
}
-
void *
xhtml_alloc(char *outopts)
{
@@ -169,7 +171,6 @@ xhtml_alloc(char *outopts)
return(ml_alloc(outopts, HTML_XHTML_1_0_STRICT));
}
-
void
html_free(void *p)
{
@@ -179,17 +180,16 @@ html_free(void *p)
h = (struct html *)p;
while ((tag = h->tags.head) != NULL) {
- h->tags.head = tag->next;
+ h->tags.head = tag->next;
free(tag);
}
-
+
if (h->symtab)
mchars_free(h->symtab);
free(h);
}
-
void
print_gen_head(struct html *h)
{
@@ -226,21 +226,21 @@ print_metaf(struct html *h, enum mandoc_esc deco)
enum htmlfont font;
switch (deco) {
- case (ESCAPE_FONTPREV):
+ case ESCAPE_FONTPREV:
font = h->metal;
break;
- case (ESCAPE_FONTITALIC):
+ case ESCAPE_FONTITALIC:
font = HTMLFONT_ITALIC;
break;
- case (ESCAPE_FONTBOLD):
+ case ESCAPE_FONTBOLD:
font = HTMLFONT_BOLD;
break;
- case (ESCAPE_FONTBI):
+ case ESCAPE_FONTBI:
font = HTMLFONT_BI;
break;
- case (ESCAPE_FONT):
+ case ESCAPE_FONT:
/* FALLTHROUGH */
- case (ESCAPE_FONTROMAN):
+ case ESCAPE_FONTROMAN:
font = HTMLFONT_NONE;
break;
default:
@@ -257,13 +257,13 @@ print_metaf(struct html *h, enum mandoc_esc deco)
h->metac = font;
switch (font) {
- case (HTMLFONT_ITALIC):
+ case HTMLFONT_ITALIC:
h->metaf = print_otag(h, TAG_I, 0, NULL);
break;
- case (HTMLFONT_BOLD):
+ case HTMLFONT_BOLD:
h->metaf = print_otag(h, TAG_B, 0, NULL);
break;
- case (HTMLFONT_BI):
+ case HTMLFONT_BI:
h->metaf = print_otag(h, TAG_B, 0, NULL);
print_otag(h, TAG_I, 0, NULL);
break;
@@ -302,19 +302,19 @@ html_strlen(const char *cp)
break;
cp++;
switch (mandoc_escape(&cp, NULL, NULL)) {
- case (ESCAPE_ERROR):
+ case ESCAPE_ERROR:
return(sz);
- case (ESCAPE_UNICODE):
+ case ESCAPE_UNICODE:
/* FALLTHROUGH */
- case (ESCAPE_NUMBERED):
+ case ESCAPE_NUMBERED:
/* FALLTHROUGH */
- case (ESCAPE_SPECIAL):
+ case ESCAPE_SPECIAL:
if (skip)
skip = 0;
else
sz++;
break;
- case (ESCAPE_SKIPCHAR):
+ case ESCAPE_SKIPCHAR:
skip = 1;
break;
default:
@@ -325,13 +325,45 @@ html_strlen(const char *cp)
}
static int
+print_escape(char c)
+{
+
+ switch (c) {
+ case '<':
+ printf("&lt;");
+ break;
+ case '>':
+ printf("&gt;");
+ break;
+ case '&':
+ printf("&amp;");
+ break;
+ case '"':
+ printf("&quot;");
+ break;
+ case ASCII_NBRSP:
+ putchar('-');
+ break;
+ case ASCII_HYPH:
+ putchar('-');
+ /* FALLTHROUGH */
+ case ASCII_BREAK:
+ break;
+ default:
+ return(0);
+ }
+ return(1);
+}
+
+static int
print_encode(struct html *h, const char *p, int norecurse)
{
size_t sz;
int c, len, nospace;
const char *seq;
enum mandoc_esc esc;
- static const char rejs[6] = { '\\', '<', '>', '&', ASCII_HYPH, '\0' };
+ static const char rejs[9] = { '\\', '<', '>', '&', '"',
+ ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' };
nospace = 0;
@@ -350,43 +382,29 @@ print_encode(struct html *h, const char *p, int norecurse)
if ('\0' == *p)
break;
- switch (*p++) {
- case ('<'):
- printf("&lt;");
+ if (print_escape(*p++))
continue;
- case ('>'):
- printf("&gt;");
- continue;
- case ('&'):
- printf("&amp;");
- continue;
- case (ASCII_HYPH):
- putchar('-');
- continue;
- default:
- break;
- }
esc = mandoc_escape(&p, &seq, &len);
if (ESCAPE_ERROR == esc)
break;
switch (esc) {
- case (ESCAPE_FONT):
+ case ESCAPE_FONT:
/* FALLTHROUGH */
- case (ESCAPE_FONTPREV):
+ case ESCAPE_FONTPREV:
/* FALLTHROUGH */
- case (ESCAPE_FONTBOLD):
+ case ESCAPE_FONTBOLD:
/* FALLTHROUGH */
- case (ESCAPE_FONTITALIC):
+ case ESCAPE_FONTITALIC:
/* FALLTHROUGH */
- case (ESCAPE_FONTBI):
+ case ESCAPE_FONTBI:
/* FALLTHROUGH */
- case (ESCAPE_FONTROMAN):
+ case ESCAPE_FONTROMAN:
if (0 == norecurse)
print_metaf(h, esc);
continue;
- case (ESCAPE_SKIPCHAR):
+ case ESCAPE_SKIPCHAR:
h->flags |= HTML_SKIPCHAR;
continue;
default:
@@ -399,25 +417,26 @@ print_encode(struct html *h, const char *p, int norecurse)
}
switch (esc) {
- case (ESCAPE_UNICODE):
- /* Skip passed "u" header. */
+ case ESCAPE_UNICODE:
+ /* Skip past "u" header. */
c = mchars_num2uc(seq + 1, len - 1);
if ('\0' != c)
printf("&#x%x;", c);
break;
- case (ESCAPE_NUMBERED):
+ case ESCAPE_NUMBERED:
c = mchars_num2char(seq, len);
- if ('\0' != c)
+ if ( ! ('\0' == c || print_escape(c)))
putchar(c);
break;
- case (ESCAPE_SPECIAL):
+ case ESCAPE_SPECIAL:
c = mchars_spec2cp(h->symtab, seq, len);
if (c > 0)
printf("&#%d;", c);
- else if (-1 == c && 1 == len)
+ else if (-1 == c && 1 == len &&
+ !print_escape(*seq))
putchar((int)*seq);
break;
- case (ESCAPE_NOSPACE):
+ case ESCAPE_NOSPACE:
if ('\0' == *p)
nospace = 1;
break;
@@ -429,7 +448,6 @@ print_encode(struct html *h, const char *p, int norecurse)
return(nospace);
}
-
static void
print_attr(struct html *h, const char *key, const char *val)
{
@@ -438,9 +456,8 @@ print_attr(struct html *h, const char *key, const char *val)
putchar('\"');
}
-
struct tag *
-print_otag(struct html *h, enum htmltag tag,
+print_otag(struct html *h, enum htmltag tag,
int sz, const struct htmlpair *p)
{
int i;
@@ -490,7 +507,7 @@ print_otag(struct html *h, enum htmltag tag,
if (HTML_AUTOCLOSE & htmltags[tag].flags)
switch (h->type) {
- case (HTML_XHTML_1_0_STRICT):
+ case HTML_XHTML_1_0_STRICT:
putchar('/');
break;
default:
@@ -507,16 +524,15 @@ print_otag(struct html *h, enum htmltag tag,
return(t);
}
-
static void
print_ctag(struct html *h, enum htmltag tag)
{
-
+
printf("</%s>", htmltags[tag].name);
if (HTML_CLRLINE & htmltags[tag].flags) {
h->flags |= HTML_NOSPACE;
putchar('\n');
- }
+ }
}
void
@@ -527,7 +543,7 @@ print_gen_decls(struct html *h)
const char *name;
switch (h->type) {
- case (HTML_HTML_4_01_STRICT):
+ case HTML_HTML_4_01_STRICT:
name = "HTML";
doctype = "-//W3C//DTD HTML 4.01//EN";
dtd = "http://www.w3.org/TR/html4/strict.dtd";
@@ -540,8 +556,8 @@ print_gen_decls(struct html *h)
break;
}
- printf("<!DOCTYPE %s PUBLIC \"%s\" \"%s\">\n",
- name, doctype, dtd);
+ printf("<!DOCTYPE %s PUBLIC \"%s\" \"%s\">\n",
+ name, doctype, dtd);
}
void
@@ -560,13 +576,13 @@ print_text(struct html *h, const char *word)
assert(NULL == h->metaf);
switch (h->metac) {
- case (HTMLFONT_ITALIC):
+ case HTMLFONT_ITALIC:
h->metaf = print_otag(h, TAG_I, 0, NULL);
break;
- case (HTMLFONT_BOLD):
+ case HTMLFONT_BOLD:
h->metaf = print_otag(h, TAG_B, 0, NULL);
break;
- case (HTMLFONT_BI):
+ case HTMLFONT_BI:
h->metaf = print_otag(h, TAG_B, 0, NULL);
print_otag(h, TAG_I, 0, NULL);
break;
@@ -589,14 +605,13 @@ print_text(struct html *h, const char *word)
h->flags &= ~HTML_IGNDELIM;
}
-
void
print_tagq(struct html *h, const struct tag *until)
{
struct tag *tag;
while ((tag = h->tags.head) != NULL) {
- /*
+ /*
* Remember to close out and nullify the current
* meta-font and table, if applicable.
*/
@@ -612,7 +627,6 @@ print_tagq(struct html *h, const struct tag *until)
}
}
-
void
print_stagq(struct html *h, const struct tag *suntil)
{
@@ -621,7 +635,7 @@ print_stagq(struct html *h, const struct tag *suntil)
while ((tag = h->tags.head) != NULL) {
if (suntil && tag == suntil)
return;
- /*
+ /*
* Remember to close out and nullify the current
* meta-font and table, if applicable.
*/
@@ -657,6 +671,12 @@ void
bufcat(struct html *h, const char *p)
{
+ /*
+ * XXX This is broken and not easy to fix.
+ * When using the -Oincludes option, buffmt_includes()
+ * may pass in strings overrunning BUFSIZ, causing a crash.
+ */
+
h->buflen = strlcat(h->buf, p, BUFSIZ);
assert(h->buflen < BUFSIZ);
}
@@ -667,8 +687,8 @@ bufcat_fmt(struct html *h, const char *fmt, ...)
va_list ap;
va_start(ap, fmt);
- (void)vsnprintf(h->buf + (int)h->buflen,
- BUFSIZ - h->buflen - 1, fmt, ap);
+ (void)vsnprintf(h->buf + (int)h->buflen,
+ BUFSIZ - h->buflen - 1, fmt, ap);
va_end(ap);
h->buflen = strlen(h->buf);
}
@@ -688,12 +708,12 @@ buffmt_includes(struct html *h, const char *name)
const char *p, *pp;
pp = h->base_includes;
-
+
bufinit(h);
while (NULL != (p = strchr(pp, '%'))) {
bufncat(h, pp, (size_t)(p - pp));
switch (*(p + 1)) {
- case('I'):
+ case'I':
bufcat(h, name);
break;
default:
@@ -707,22 +727,21 @@ buffmt_includes(struct html *h, const char *name)
}
void
-buffmt_man(struct html *h,
- const char *name, const char *sec)
+buffmt_man(struct html *h, const char *name, const char *sec)
{
const char *p, *pp;
pp = h->base_man;
-
+
bufinit(h);
while (NULL != (p = strchr(pp, '%'))) {
bufncat(h, pp, (size_t)(p - pp));
switch (*(p + 1)) {
- case('S'):
+ case 'S':
bufcat(h, sec ? sec : "1");
break;
- case('N'):
- bufcat_fmt(h, name);
+ case 'N':
+ bufcat_fmt(h, "%s", name);
break;
default:
bufncat(h, p, 2);
diff --git a/html.h b/html.h
index 894cfc4cff47..ca15f0f32164 100644
--- a/html.h
+++ b/html.h
@@ -1,4 +1,4 @@
-/* $Id: html.h,v 1.49 2013/08/08 20:07:47 schwarze Exp $ */
+/* $Id: html.h,v 1.51 2014/04/20 16:46:04 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
*
@@ -105,7 +105,7 @@ struct htmlpair {
#define PAIR_STYLE_INIT(p, h) PAIR_INIT(p, ATTR_STYLE, (h)->buf)
#define PAIR_SUMMARY_INIT(p, v) PAIR_INIT(p, ATTR_SUMMARY, v)
-enum htmltype {
+enum htmltype {
HTML_HTML_4_01_STRICT,
HTML_XHTML_1_0_STRICT
};
@@ -127,7 +127,7 @@ struct html {
char *base_includes; /* base for include href */
char *style; /* style-sheet URI */
char buf[BUFSIZ]; /* see bufcat and friends */
- size_t buflen;
+ size_t buflen;
struct tag *metaf; /* current open font scope */
enum htmlfont metal; /* last used font */
enum htmlfont metac; /* current font mode */
@@ -138,7 +138,7 @@ struct html {
void print_gen_decls(struct html *);
void print_gen_head(struct html *);
-struct tag *print_otag(struct html *, enum htmltag,
+struct tag *print_otag(struct html *, enum htmltag,
int, const struct htmlpair *);
void print_tagq(struct html *, const struct tag *);
void print_stagq(struct html *, const struct tag *);
@@ -147,15 +147,18 @@ void print_tblclose(struct html *);
void print_tbl(struct html *, const struct tbl_span *);
void print_eqn(struct html *, const struct eqn *);
+#if __GNUC__ - 0 >= 4
+__attribute__((__format__ (__printf__, 2, 3)))
+#endif
void bufcat_fmt(struct html *, const char *, ...);
void bufcat(struct html *, const char *);
void bufcat_id(struct html *, const char *);
-void bufcat_style(struct html *,
+void bufcat_style(struct html *,
const char *, const char *);
-void bufcat_su(struct html *, const char *,
+void bufcat_su(struct html *, const char *,
const struct roffsu *);
void bufinit(struct html *);
-void buffmt_man(struct html *,
+void buffmt_man(struct html *,
const char *, const char *);
void buffmt_includes(struct html *, const char *);
diff --git a/index.css b/index.css
deleted file mode 100644
index d98316eaf311..000000000000
--- a/index.css
+++ /dev/null
@@ -1,48 +0,0 @@
-html { min-width: 40em;
- margin-top: 2em;
- margin-left: auto;
- margin-right: auto;
- width: 80%; }
-
-body { text-align: justify;
- font-family: Helvetica,Arial,sans-serif;
- line-height: 120%;
- font-size: small; }
-
-p,ul,table { margin-left: 3em; }
-
-p.head,
-p.subhead,
-p.foot { margin-left: 0.0em; margin-right: 0.0em; }
-
-p.news { margin-left: 2.0em; }
-
-li { margin: 0.25em; }
-
-h1 { font-size: 110%; }
-h2 { font-size: 105%; margin-left: 1.5em }
-
-p.head { margin-bottom: 0.5em;
- border-bottom: 1px solid #dddddd;
- padding-bottom: 0.2em; }
-
-p.subhead { margin-top: 0em;
- margin-bottom: 1.75em; }
-
-p.foot { border-top: 1px solid #dddddd;
- color: #666666;
- padding-top: 0.2em;
- margin-top: 1.75em; }
-
-span.nm { color: green; }
-
-span.file { font-style: italic; }
-
-span.attn { font-weight: bold; }
-
-span.flag { font-weight: bold; }
-
-a { text-decoration: none; }
-
-a.external { background: transparent url(external.png) center right no-repeat;
- padding-right: 12px; }
diff --git a/index.sgml b/index.sgml
deleted file mode 100644
index 83a43a737d73..000000000000
--- a/index.sgml
+++ /dev/null
@@ -1,438 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
-<HTML>
- <HEAD>
- <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8">
- <LINK REL="stylesheet" HREF="index.css" TYPE="text/css" MEDIA="all">
- <TITLE>mdocml | UNIX manpage compiler</TITLE>
- </HEAD>
- <BODY>
- <P CLASS="head">
- <A HREF="http://www.openbsd.org/"><IMG SRC="puffy.gif" ALT="Puffy" WIDTH="100" HEIGHT="91" STYLE="float: right"></A>
- <B>mdocml</B> &#8211; UNIX manpage compiler, current version @VERSION@ (@VDATE@)
- </P>
- <P CLASS="subhead">
- Sources: <A HREF="/snapshots/mdocml.tar.gz">current</A>,
- <A HREF="/cgi-bin/cvsweb/?cvsroot=mdocml">cvsweb</A>
- (<A HREF="/snapshots/">archives</A>)
- </P>
- <H1>
- <A NAME="description">Description</A>
- </H1>
- <P>
- <SPAN CLASS="nm">mdocml</SPAN> is a suite of tools compiling <I><A HREF="mdoc.7.html">mdoc</A></I>, the roff macro
- package of choice for BSD manual pages, and <I><A HREF="man.7.html">man</A></I>, the predominant historical package for
- UNIX manuals.
- It is small, ISO C, <A CLASS="external" HREF="http://www.isc.org/software/license">ISC</A>-licensed, and quite fast.
- </P>
- <P>
- The tool set features <A HREF="mandoc.1.html">mandoc</A>,
- based on the <A HREF="mandoc.3.html">libmandoc</A> validating compiler,
- to format output for UNIX terminals (with
- support for wide-character locales), XHTML, HTML, PostScript, and PDF.
- It also includes <A HREF="preconv.1.html">preconv</A>, for recoding multibyte manuals;
- <A HREF="demandoc.1.html">demandoc</A>, for emitting only text parts of manuals;
- <A HREF="mandocdb.8.html">mandocdb</A>, for indexing manuals; and
- <A HREF="apropos.1.html">apropos</A>, <A HREF="whatis.1.html">whatis</A>, and
- <A HREF="man.cgi.7.html">man.cgi</A> (via <A HREF="catman.8.html">catman</A>) for semantic search of manual content.
- </P>
- <P>
- <SPAN CLASS="nm">mdocml</SPAN> has predominantly been developed on OpenBSD
- and is both an <A CLASS="external" HREF="http://www.openbsd.org/">OpenBSD</A>
- and a <A CLASS="external" HREF="http://bsd.lv/">BSD.lv</A> project.
- We strive to support all interested free operating systems, in particular
- <A CLASS="external" HREF="http://www.dragonflybsd.org/">DragonFly</A>,
- <A CLASS="external" HREF="http://www.netbsd.org/">NetBSD</A>,
- <A CLASS="external" HREF="http://www.freebsd.org/">FreeBSD</A>,
- <A CLASS="external" HREF="http://www.minix3.org/">Minix 3</A>,
- and <A CLASS="external" HREF="http://www.gnu.org/">GNU</A>/Linux,
- as well as all systems running the <A CLASS="external" HREF="http://www.pkgsrc.org/">pkgsrc</A> portable package build system.
- All of these projects have helped to make <SPAN CLASS="nm">mdocml</SPAN> better, by providing feedback and advice,
- bug reports, and patches.
- </P>
- <P>
- <I>Disambiguation</I>: <SPAN CLASS="nm">mdocml</SPAN> is often referred to by its installed binary, <Q>mandoc</Q>.
- </P>
- <H2>
- <A NAME="sources">Sources</A>
- </H2>
- <P>
- <SPAN CLASS="nm">mdocml</SPAN> should build and run on any modern system with
- <A HREF="http://www.oracle.com/technetwork/database/berkeleydb/overview/index.html">libdb</A>
- (this is installed by default on BSD UNIX systems &mdash; see the <I>Makefile</I> if you're running Linux).
- To build and install into <I>/usr/local/</I>, just run <CODE>make install</CODE>.
- Be careful: the <B>preconv</B>, <B>apropos</B>, and <B>whatis</B> installed binary names
- may be taken by existing utilities.
- </P>
- <H2>
- Downstream
- </H2>
- <P>
- Several systems come bundled with <SPAN CLASS="nm">mdocml</SPAN> utilities.
- If your system does not appear below, the maintainers have not contacted me and it should not be considered
- <Q>official</Q>, so please <A HREF="#contact">contact us</A> if you plan on maintaining a downstream version!
- </P>
- <TABLE WIDTH="100%" SUMMARY="Downstream Sources">
- <COL WIDTH="175">
- <COL>
- <TBODY>
- <TR>
- <TD>DragonFly BSD</TD>
- <TD>
- <A HREF="http://gitweb.dragonflybsd.org/dragonfly.git/tree/HEAD:/contrib/mdocml" CLASS="external">contrib/mdocml</A> (1.12.3 sources)
- <A HREF="http://gitweb.dragonflybsd.org/dragonfly.git/tree/HEAD:/lib/libmandoc" CLASS="external">lib/libmandoc</A>
- <A HREF="http://gitweb.dragonflybsd.org/dragonfly.git/tree/HEAD:/usr.bin/mandoc" CLASS="external">usr.bin/mandoc</A> (build system)
- </TD>
- </TR>
- <TR>
- <TD>FreeBSD 10.0, -CURRENT</TD>
- <TD>
- <A HREF="http://svnweb.freebsd.org/base/head/contrib/mdocml/" CLASS="external">contrib/mdocml</A> (1.12.1 sources)
- <A HREF="http://svnweb.freebsd.org/base/head/usr.bin/mandoc/" CLASS="external">usr.bin/mandoc</A> (build system)
- </TD>
- </TR>
- <TR>
- <TD>FreeBSD 9.x, 8.x</TD>
- <TD>
- <A HREF="http://svnweb.freebsd.org/ports/head/textproc/mdocml/" CLASS="external">ports/textproc/mdocml</A> (1.12.2 port)
- </TD>
- </TR>
- <TR>
- <TD>NetBSD</TD>
- <TD>
- <A HREF="http://cvsweb.netbsd.org/bsdweb.cgi/src/external/bsd/mdocml/" CLASS="external">src/external/bsd/mdocml</A> (1.12.1 sources plus patches and build system)
- </TD>
- </TR>
- <TR>
- <TD>OpenBSD</TD>
- <TD>
- <A HREF="http://www.openbsd.org/cgi-bin/cvsweb/src/usr.bin/mandoc/" CLASS="external">src/usr.bin/mandoc</A> (1.12.3 sources under active development and build system)
- </TD>
- </TR>
- <TR>
- <TD>pkgsrc</TD>
- <TD>
- <A HREF="http://pkgsrc.se/textproc/mdocml" CLASS="external">textproc/mdocml</A> (1.12.2 port)
- </TD>
- </TR>
- <TR>
- <TD>Minix3</TD>
- <TD>
- <A HREF="http://git.minix3.org/?p=minix.git;a=tree;f=external/bsd/mdocml" CLASS="external">external/bsd/mdocml</A> (1.10.9 sources and build system)
- </TD>
- </TR>
- <TR>
- <TD>Alpine Linux</TD>
- <TD>
- <A HREF="http://git.alpinelinux.org/cgit/aports/tree/main/mdocml" CLASS="external">aports/main/mdocml</A> (1.12.2 port)
- </TD>
- </TR>
- </TBODY>
- </TABLE>
- <H1>
- <A NAME="documentation">Documentation</A>
- </H1>
- <P>
- These manuals are generated automatically and refer to the current release.
- They are the authoritative documentation for the <SPAN CLASS="nm">mdocml</SPAN> system.
- </P>
-
- <TABLE WIDTH="100%" SUMMARY="Documentation">
- <COL WIDTH="175">
- <COL>
- <TBODY>
- <TR>
- <TD VALIGN="top"><A HREF="apropos.1.html">apropos(1)</A></TD>
- <TD VALIGN="top">
- search the manual page database
- </TD>
- </TR>
- <TR>
- <TD VALIGN="top"><A HREF="demandoc.1.html">demandoc(1)</A></TD>
- <TD VALIGN="top">
- emit only text of UNIX manuals
- </TD>
- </TR>
- <TR>
- <TD VALIGN="top"><A HREF="mandoc.1.html">mandoc(1)</A></TD>
- <TD VALIGN="top">
- format and display UNIX manuals
- </TD>
- </TR>
- <TR>
- <TD VALIGN="top"><A HREF="preconv.1.html">preconv(1)</A></TD>
- <TD VALIGN="top">
- recode multibyte UNIX manuals
- </TD>
- </TR>
- <TR>
- <TD VALIGN="top"><A HREF="whatis.1.html">whatis(1)</A></TD>
- <TD VALIGN="top">
- search the manual page database
- </TD>
- </TR>
- <TR>
- <TD VALIGN="top"><A HREF="mandoc.3.html">mandoc(3)</A></TD>
- <TD VALIGN="top">
- mandoc macro compiler library
- </TD>
- </TR>
- <TR>
- <TD VALIGN="top"><A HREF="tbl.3.html">tbl(3)</A></TD>
- <TD VALIGN="top">
- roff table parser library for mandoc
- </TD>
- </TR>
- <TR>
- <TD VALIGN="top"><A HREF="eqn.7.html">eqn(7)</A></TD>
- <TD VALIGN="top">
- eqn-mandoc language reference
- </TD>
- </TR>
- <TR>
- <TD VALIGN="top"><A HREF="man.7.html">man(7)</A></TD>
- <TD VALIGN="top">
- man language reference
- </TD>
- </TR>
- <TR>
- <TD VALIGN="top"><A HREF="man.cgi.7.html">man.cgi(7)</A></TD>
- <TD VALIGN="top">
- cgi for manpage query and display
- </TD>
- </TR>
- <TR>
- <TD VALIGN="top"><A HREF="mandoc_char.7.html">mandoc_char(7)</A></TD>
- <TD VALIGN="top">
- mandoc special characters
- </TD>
- </TR>
- <TR>
- <TD VALIGN="top"><A HREF="mdoc.7.html">mdoc(7)</A></TD>
- <TD VALIGN="top">
- mdoc language reference
- </TD>
- </TR>
- <TR>
- <TD VALIGN="top"><A HREF="roff.7.html">roff(7)</A></TD>
- <TD VALIGN="top">
- roff-mandoc language reference
- </TD>
- </TR>
- <TR>
- <TD VALIGN="top"><A HREF="tbl.7.html">tbl(7)</A></TD>
- <TD VALIGN="top">
- tbl-mandoc language reference
- </TD>
- </TR>
- <TR>
- <TD VALIGN="top"><A HREF="catman.8.html">catman(8)</A></TD>
- <TD VALIGN="top">
- update a man.cgi manpage cache
- </TD>
- </TR>
- <TR>
- <TD VALIGN="top"><A HREF="mandocdb.8.html">mandocdb(8)</A></TD>
- <TD VALIGN="top">
- index UNIX manuals
- </TD>
- </TR>
- </TBODY>
- </TABLE>
- <H2>
- <A NAME="links">Supplementary Information</A>
- </H2>
- <UL>
- <LI>
- <A HREF="http://manpages.bsd.lv/">Practical UNIX Manuals</A>: mdoc tutorial by Kristaps Dzonsons
- </LI>
- <LI>
- <A HREF="http://www.openbsd.org/faq/ports/specialtopics.html#Mandoc" CLASS="external">OpenBSD porting guide</A>
- chapter regarding manual pages
- </LI>
- <LI>
- <A HREF="press.html">Publications and media coverage</A>
- concerning mdocml and mandoc
- </LI>
- <LI>
- <A HREF="http://manpages.bsd.lv/history.html">History of UNIX Manpages</A>: a comprehensive overview by Kristaps Dzonsons
- </LI>
- </UL>
- <H1>
- <A NAME="contact">Contact</A>
- </H1>
- <P>
- Use the mailing lists for bug-reports, patches, questions, etc. Please check the
- <A HREF="http://mdocml.bsd.lv/cgi-bin/cvsweb/TODO?cvsroot=mdocml">TODO</A> for known issues
- before posting. All lists are subscription-only: send a blank e-mail to the listed address to subscribe. Beyond that,
- contact Kristaps at <A HREF="http://mailhide.recaptcha.net/d?k=01M6h_w7twDp58ZgH57eWC_w==&amp;c=Q2DBUt401ePlSeupJFrq_Q==" TITLE="Reveal
- this e-mail address">kris...</A>@bsd.lv. Archives are available at <A HREF="http://gmane.org/" CLASS="external">Gmane</A>.
- </P>
- <TABLE WIDTH="100%" SUMMARY="Mailing Lists">
- <COL WIDTH="175">
- <COL>
- <TBODY>
- <TR>
- <TD>
- disc<A CLASS="external" TITLE="Reveal this e-mail address"
- HREF="http://www.google.com/recaptcha/mailhide/d?k=01KQ80PFH5n3BBNpF5Gs4sRg==&amp;c=EV1QytpQqTHSItc2IXvZyocgYLPnG5K0JKw_gwMC9yc=">...</A>@mdocml.bsd.lv
- </TD>
- <TD>
- bug-reports, general questions, and announcements
- </TD>
- </TR>
- <TR>
- <TD>
- tec<A CLASS="external" TITLE="Reveal this e-mail address"
- HREF="http://www.google.com/recaptcha/mailhide/d?k=01qDX_iV0RlUOarEvb6mR28g==&amp;c=gRXsTjza0NNCFPaYu-Taj2tF0pmYZSc90EZkFkhkxgo=">...</A>@mdocml.bsd.lv
- </TD>
- <TD>
- patches and system discussions
- </TD>
- </TR>
- <TR>
- <TD>
- sou<A CLASS="external" TITLE="Reveal this e-mail address"
- HREF="http://www.google.com/recaptcha/mailhide/d?k=01prQrAZhhl2EbIwVcRfABsQ==&amp;c=KtTW4Yic9xk-8g40KzJoca4fR3MYXv28g8NC6OQV-T8=">...</A>@mdocml.bsd.lv
- </TD>
- <TD>
- source commit messages
- </TD>
- </TR>
- </TBODY>
- </TABLE>
- <H1>
- <A NAME="news">News</A>
- </H1>
- <P CLASS="news">
- 31-12-2013: version 1.12.3
- </P>
- <P>
- In the <A HREF="mdoc.7.html">mdoc(7)</A> SYNOPSIS, line breaks and hanging indentation
- now work correctly for .Fo/.Fa/.Fc and .Fn blocks.
- Thanks to Franco Fichtner for doing part of the work.
- </P>
- <P>
- The <A HREF="mdoc.7.html">mdoc(7)</A> .Bk macro got some addititonal bugfixes.
- </P>
- <P>
- In <A HREF="mdoc.7.html">mdoc(7)</A> macro arguments, double quotes can now be quoted
- by doubling them, just like in <A HREF="man.7.html">man(7)</A>.
- Thanks to Tsugutomo ENAMI for the patch.
- </P>
- <P>
- At the end of <A HREF="man.7.html">man(7)</A> macro lines, end-of-sentence spacing
- now works. Thanks to Franco Fichtner for the patch.
- </P>
- <P>
- For backward compatibility, the <A HREF="man.7.html">man(7)</A> parser now supports the
- man-ext .UR/.UE (uniform resource identifier) block macros.
- </P>
- <P>
- The <A HREF="man.7.html">man(7)</A> parser now handles closing blocks that are not open
- more gracefully.
- </P>
- <P>
- The <A HREF="man.7.html">man(7)</A> parser now ignores blank lines right after .SH and .SS.
- </P>
- <P>
- In the <A HREF="man.7.html">man(7)</A> formatter, reset indentation when leaving a block,
- not just when entering the next one.
- </P>
- <P>
- The <A HREF="roff.7.html">roff(7)</A> .nr request now supports incrementing and decrementing
- number registers and stops parsing the number right before the first non-digit character.
- </P>
- <P>
- The <A HREF="roff.7.html">roff(7)</A> parser now supports the alternative escape sequence
- syntax \C'uXXXX' for Unicode characters.
- </P>
- <P>
- The <A HREF="roff.7.html">roff(7)</A> parser now parses and ignores the .fam (font family)
- and .hw (hyphenation points) requests and the \d and \u escape sequences.
- </P>
- <P>
- The <A HREF="roff.7.html">roff(7)</A> manual got a new ESCAPE SEQUENCE REFERENCE.
- </P>
- <P CLASS="news">
- 05-10-2013: version 1.12.2
- </P>
- <P>
- The <A HREF="mdoc.7.html">mdoc(7)</A> to <A HREF="man.7.html">man(7)</A> converter,
- to be called as <CODE>mandoc -Tman</CODE>, is now fully functional.
- </P>
- <P>
- The <A HREF="mandoc.1.html">mandoc(1)</A> utility now supports the <CODE>-Ios</CODE> (default operating system)
- input option, and the <CODE>-Tutf8</CODE> output mode now actually works.
- </P>
- <P>
- The <A HREF="mandocdb.8.html">mandocdb(8)</A> utility no longer truncates existing databases when starting to build new ones,
- but only replaces them when the build actually succeeds.
- </P>
- <P>
- The <A HREF="man.7.html">man(7)</A> parser now supports the <EM>PD</EM> macro (paragraph distance),
- and (for GNU man-ext compatibility only) <EM>EX</EM> (example block) and <EM>EE</EM> (example end).
- Plus several bugfixes regarding indentation, line breaks, and vertical spacing,
- and regarding <EM>RS</EM> following <EM>TP</EM>.
- </P>
- <P>
- The <A HREF="roff.7.html">roff(7)</A> parser now supports the <EM>\f(BI</EM> (bold+italic) font escape,
- the <EM>\z</EM> (zero cursor advance) escape and the <EM>cc</EM> (change control character)
- and <EM>it</EM> (input line trap) requests.
- Plus bugfixes regarding the <EM>\t</EM> (tab) escape, nested escape sequences, and conditional requests.
- </P>
- <P>
- In <A HREF="mdoc.7.html">mdoc(7)</A>, several bugs were fixed related to UTF-8 output of quoting enclosures,
- delimiter handling, list indentation and horizontal and vertical spacing,
- formatting of the <EM>Lk</EM>, <EM>%U</EM>, and <EM>%C</EM> macros,
- plus some bugfixes related to the handling of syntax errors like badly nested font blocks,
- stray <EM>Ta</EM> macros outside column lists, unterminated <EM>It Xo</EM> blocks,
- and non-text children of <EM>Nm</EM> blocks.
- </P>
- <P>
- In <A HREF="tbl.7.html">tbl(7)</A>, the width of horizontal spans and the vertical spacing around tables was corrected,
- and in <A HREF="man.7.html">man(7)</A> files, a crash was fixed that was triggered by some particular unclosed <EM>T{</EM> macros.
- </P>
- <P>
- For mandoc developers, we now provide a <A HREF="tbl.3.html">tbl(3)</A> library manual and <CODE>gmdiff</CODE>,
- a very small, very simplistic groff-versus-mandoc output comparison tool.
- </P>
- <H2>
- <A>History</A>
- </H2>
- <UL>
- <LI>
- <A HREF="NEWS">Release notes</A> going back to release 1.9.15, February 18, 2010.
- Briefly explaining the most important changes in each release in relatively easy terms.
- Very many changes are not mentioned here.
- </LI>
- <LI>
- <A HREF="history.html">Development history</A> going back to the beginning of the project, November 22, 2008.
- One-line entries for important commits, releases, merges, hackathons and talks.
- Makes it easy to find out who did what, and when, and when it became available where.
- However, this is still incomplete, mentioning only a small fraction of all commits,
- and to keep the size down, the individual entries are extremely terse and technical.
- Feel free to look up more details and longer explanations about individual entries
- in the ChangeLog or in CVS.
- </LI>
- <LI>
- <A HREF="ChangeLog">CVS ChangeLog</A> going back to the beginning of the project.
- Very technical information of varying quality, strictly chronological.
- All commits are mentioned, but some messages neglect to mention some changes.
- Partly terse, partly detailed and verbose. In any case, the ChangeLog is very long -
- more than 25,000 lines, more than 700 kB.
- </LI>
- <LI>
- <A HREF="/cgi-bin/cvsweb/?cvsroot=mdocml">CVS</A> web interface, going back to the beginning of the project.
- Source code, diffs and commit messages for each source file. The real thing.
- </LI>
- </UL>
- <P CLASS="foot">
- <SMALL>
- Copyright &#169; 2008&#8211;2011
- <A CLASS="external" HREF="http://kristaps.bsd.lv">Kristaps Dzonsons</A>,
- &#169; 2013 Ingo Schwarze,
- $Date: 2013/12/31 $
- </SMALL>
- </P>
- </BODY>
-</HTML>
diff --git a/lib.c b/lib.c
index 7a18a5dd4fe6..8cc8a778a690 100644
--- a/lib.c
+++ b/lib.c
@@ -1,4 +1,4 @@
-/* $Id: lib.c,v 1.9 2011/03/22 14:33:05 kristaps Exp $ */
+/* $Id: lib.c,v 1.10 2014/03/23 11:25:26 schwarze Exp $ */
/*
* Copyright (c) 2009 Kristaps Dzonsons <kristaps@bsd.lv>
*
@@ -18,12 +18,9 @@
#include "config.h"
#endif
-#include <stdlib.h>
#include <string.h>
-#include <time.h>
#include "mdoc.h"
-#include "mandoc.h"
#include "libmdoc.h"
#define LINE(x, y) \
diff --git a/lib.in b/lib.in
index 334e093f727c..f65c9c2f69ed 100644
--- a/lib.in
+++ b/lib.in
@@ -1,6 +1,7 @@
-/* $Id: lib.in,v 1.17 2013/10/13 15:24:03 schwarze Exp $ */
+/* $Id: lib.in,v 1.18 2014/01/06 00:53:33 schwarze Exp $ */
/*
* Copyright (c) 2009 Kristaps Dzonsons <kristaps@bsd.lv>
+ * Copyright (c) 2009, 2012 Joerg Sonnenberger <joerg@netbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
diff --git a/libman.h b/libman.h
index f2ba6a125630..e852927cd2cd 100644
--- a/libman.h
+++ b/libman.h
@@ -1,4 +1,4 @@
-/* $Id: libman.h,v 1.56 2012/11/17 00:26:33 schwarze Exp $ */
+/* $Id: libman.h,v 1.63 2014/08/01 21:24:17 schwarze Exp $ */
/*
* Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
*
@@ -24,13 +24,11 @@ enum man_next {
struct man {
struct mparse *parse; /* parse pointer */
+ int quick; /* abort parse early */
int flags; /* parse flags */
-#define MAN_HALT (1 << 0) /* badness happened: die */
#define MAN_ELINE (1 << 1) /* Next-line element scope. */
#define MAN_BLINE (1 << 2) /* Next-line block scope. */
-#define MAN_ILINE (1 << 3) /* Ignored in next-line scope. */
#define MAN_LITERAL (1 << 4) /* Literal input. */
-#define MAN_BPLINE (1 << 5)
#define MAN_NEWLINE (1 << 6) /* first macro/text in a line */
enum man_next next; /* where to put the next node */
struct man_node *last; /* the last parsed node */
@@ -61,10 +59,6 @@ extern const struct man_macro *const man_macros;
__BEGIN_DECLS
-#define man_pmsg(man, l, p, t) \
- mandoc_msg((t), (man)->parse, (l), (p), NULL)
-#define man_nmsg(man, n, t) \
- mandoc_msg((t), (man)->parse, (n)->line, (n)->pos, NULL)
int man_word_alloc(struct man *, int, int, const char *);
int man_block_alloc(struct man *, int, int, enum mant);
int man_head_alloc(struct man *, int, int, enum mant);
@@ -76,9 +70,7 @@ void man_hash_init(void);
enum mant man_hash_find(const char *);
int man_macroend(struct man *);
int man_valid_post(struct man *);
-int man_valid_pre(struct man *, struct man_node *);
-int man_unscope(struct man *,
- const struct man_node *, enum mandocerr);
+int man_unscope(struct man *, const struct man_node *);
__END_DECLS
diff --git a/libmandoc.h b/libmandoc.h
index 3c005e106da9..1011cc502147 100644
--- a/libmandoc.h
+++ b/libmandoc.h
@@ -1,7 +1,7 @@
-/* $Id: libmandoc.h,v 1.35 2013/12/15 21:23:52 schwarze Exp $ */
+/* $Id: libmandoc.h,v 1.42 2014/07/09 11:31:43 schwarze Exp $ */
/*
* Copyright (c) 2009, 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2013 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2013, 2014 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -36,46 +36,50 @@ struct roff;
struct mdoc;
struct man;
-void mandoc_msg(enum mandocerr, struct mparse *,
+void mandoc_msg(enum mandocerr, struct mparse *,
int, int, const char *);
-void mandoc_vmsg(enum mandocerr, struct mparse *,
+#if __GNUC__ - 0 >= 4
+__attribute__((__format__ (__printf__, 5, 6)))
+#endif
+void mandoc_vmsg(enum mandocerr, struct mparse *,
int, int, const char *, ...);
char *mandoc_getarg(struct mparse *, char **, int, int *);
char *mandoc_normdate(struct mparse *, char *, int, int);
-int mandoc_eos(const char *, size_t, int);
+int mandoc_eos(const char *, size_t);
int mandoc_strntoi(const char *, size_t, int);
const char *mandoc_a2msec(const char*);
-void mdoc_free(struct mdoc *);
-struct mdoc *mdoc_alloc(struct roff *, struct mparse *, char *);
+void mdoc_free(struct mdoc *);
+struct mdoc *mdoc_alloc(struct roff *, struct mparse *,
+ const char *, int);
void mdoc_reset(struct mdoc *);
-int mdoc_parseln(struct mdoc *, int, char *, int);
+int mdoc_parseln(struct mdoc *, int, char *, int);
int mdoc_endparse(struct mdoc *);
int mdoc_addspan(struct mdoc *, const struct tbl_span *);
int mdoc_addeqn(struct mdoc *, const struct eqn *);
-void man_free(struct man *);
-struct man *man_alloc(struct roff *, struct mparse *);
+void man_free(struct man *);
+struct man *man_alloc(struct roff *, struct mparse *, int);
void man_reset(struct man *);
-int man_parseln(struct man *, int, char *, int);
+int man_parseln(struct man *, int, char *, int);
int man_endparse(struct man *);
int man_addspan(struct man *, const struct tbl_span *);
int man_addeqn(struct man *, const struct eqn *);
-void roff_free(struct roff *);
-struct roff *roff_alloc(enum mparset, struct mparse *);
+void roff_free(struct roff *);
+struct roff *roff_alloc(struct mparse *, int);
void roff_reset(struct roff *);
-enum rofferr roff_parseln(struct roff *, int,
+enum rofferr roff_parseln(struct roff *, int,
char **, size_t *, int, int *);
void roff_endparse(struct roff *);
void roff_setreg(struct roff *, const char *, int, char sign);
int roff_getreg(const struct roff *, const char *);
char *roff_strdup(const struct roff *, const char *);
-int roff_getcontrol(const struct roff *,
+int roff_getcontrol(const struct roff *,
const char *, int *);
#if 0
char roff_eqndelim(const struct roff *);
-void roff_openeqn(struct roff *, const char *,
+void roff_openeqn(struct roff *, const char *,
int, int, const char *);
int roff_closeeqn(struct roff *);
#endif
diff --git a/libmdoc.h b/libmdoc.h
index 3f14519d3b40..1507a8c26fbb 100644
--- a/libmdoc.h
+++ b/libmdoc.h
@@ -1,4 +1,4 @@
-/* $Id: libmdoc.h,v 1.82 2013/10/21 23:47:58 schwarze Exp $ */
+/* $Id: libmdoc.h,v 1.88 2014/08/01 17:40:34 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2013 Ingo Schwarze <schwarze@openbsd.org>
@@ -25,9 +25,9 @@ enum mdoc_next {
struct mdoc {
struct mparse *parse; /* parse pointer */
- char *defos; /* default argument for .Os */
+ const char *defos; /* default argument for .Os */
+ int quick; /* abort parse early */
int flags; /* parse flags */
-#define MDOC_HALT (1 << 0) /* error in parse: halt */
#define MDOC_LITERAL (1 << 1) /* in a literal scope */
#define MDOC_PBODY (1 << 2) /* in the document body */
#define MDOC_NEWLINE (1 << 3) /* first macro/text in a line */
@@ -40,6 +40,7 @@ struct mdoc {
enum mdoc_next next; /* where to put the next node */
struct mdoc_node *last; /* the last node parsed */
struct mdoc_node *first; /* the first node parsed */
+ struct mdoc_node *last_es; /* the most recent Es node */
struct mdoc_meta meta; /* document meta-data */
enum mdoc_sec lastnamed;
enum mdoc_sec lastsec;
@@ -103,17 +104,13 @@ extern const struct mdoc_macro *const mdoc_macros;
__BEGIN_DECLS
-#define mdoc_pmsg(mdoc, l, p, t) \
- mandoc_msg((t), (mdoc)->parse, (l), (p), NULL)
-#define mdoc_nmsg(mdoc, n, t) \
- mandoc_msg((t), (mdoc)->parse, (n)->line, (n)->pos, NULL)
int mdoc_macro(MACRO_PROT_ARGS);
-int mdoc_word_alloc(struct mdoc *,
+int mdoc_word_alloc(struct mdoc *,
int, int, const char *);
void mdoc_word_append(struct mdoc *, const char *);
-int mdoc_elem_alloc(struct mdoc *, int, int,
+int mdoc_elem_alloc(struct mdoc *, int, int,
enum mdoct, struct mdoc_arg *);
-int mdoc_block_alloc(struct mdoc *, int, int,
+int mdoc_block_alloc(struct mdoc *, int, int,
enum mdoct, struct mdoc_arg *);
int mdoc_head_alloc(struct mdoc *, int, int, enum mdoct);
int mdoc_tail_alloc(struct mdoc *, int, int, enum mdoct);
@@ -136,7 +133,7 @@ enum margverr mdoc_argv(struct mdoc *, int, enum mdoct,
void mdoc_argv_free(struct mdoc_arg *);
enum margserr mdoc_args(struct mdoc *, int,
int *, char *, enum mdoct, char **);
-enum margserr mdoc_zargs(struct mdoc *, int,
+enum margserr mdoc_zargs(struct mdoc *, int,
int *, char *, char **);
int mdoc_macroend(struct mdoc *);
enum mdelim mdoc_isdelim(const char *);
diff --git a/libroff.h b/libroff.h
index 5b84c5fc4541..f917b14926f9 100644
--- a/libroff.h
+++ b/libroff.h
@@ -1,4 +1,4 @@
-/* $Id: libroff.h,v 1.28 2013/05/31 21:37:17 schwarze Exp $ */
+/* $Id: libroff.h,v 1.29 2014/04/20 16:46:04 schwarze Exp $ */
/*
* Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
*
@@ -66,7 +66,7 @@ struct tbl_node *tbl_alloc(int, int, struct mparse *);
void tbl_restart(int, int, struct tbl_node *);
void tbl_free(struct tbl_node *);
void tbl_reset(struct tbl_node *);
-enum rofferr tbl_read(struct tbl_node *, int, const char *, int);
+enum rofferr tbl_read(struct tbl_node *, int, const char *, int);
int tbl_option(struct tbl_node *, int, const char *);
int tbl_layout(struct tbl_node *, int, const char *);
int tbl_data(struct tbl_node *, int, const char *);
@@ -76,7 +76,7 @@ void tbl_end(struct tbl_node **);
struct eqn_node *eqn_alloc(const char *, int, int, struct mparse *);
enum rofferr eqn_end(struct eqn_node **);
void eqn_free(struct eqn_node *);
-enum rofferr eqn_read(struct eqn_node **, int,
+enum rofferr eqn_read(struct eqn_node **, int,
const char *, int, int *);
__END_DECLS
diff --git a/main.c b/main.c
index 7e5c7a98aef2..d97c8e2393b0 100644
--- a/main.c
+++ b/main.c
@@ -1,7 +1,8 @@
-/* $Id: main.c,v 1.167 2012/11/19 17:22:26 schwarze Exp $ */
+/* $Id: main.c,v 1.177 2014/06/21 22:24:01 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2010, 2011, 2012 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2010, 2011, 2012, 2014 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2010 Joerg Sonnenberger <joerg@netbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -27,6 +28,7 @@
#include <unistd.h>
#include "mandoc.h"
+#include "mandoc_aux.h"
#include "main.h"
#include "mdoc.h"
#include "man.h"
@@ -58,18 +60,18 @@ struct curparse {
struct mparse *mp;
enum mandoclevel wlevel; /* ignore messages below this */
int wstop; /* stop after a file with a warning */
- enum outt outtype; /* which output to use */
+ enum outt outtype; /* which output to use */
out_mdoc outmdoc; /* mdoc output ptr */
- out_man outman; /* man output ptr */
+ out_man outman; /* man output ptr */
out_free outfree; /* free output ptr */
void *outdata; /* data for output */
char outopts[BUFSIZ]; /* buf of output opts */
};
-static int moptions(enum mparset *, char *);
+static int moptions(int *, char *);
static void mmsg(enum mandocerr, enum mandoclevel,
const char *, int, int, const char *);
-static void parse(struct curparse *, int,
+static void parse(struct curparse *, int,
const char *, enum mandoclevel *);
static int toptions(struct curparse *, char *);
static void usage(void) __attribute__((noreturn));
@@ -78,12 +80,13 @@ static int woptions(struct curparse *, char *);
static const char *progname;
+
int
main(int argc, char *argv[])
{
int c;
struct curparse curp;
- enum mparset type;
+ int options;
enum mandoclevel rc;
char *defos;
@@ -95,44 +98,45 @@ main(int argc, char *argv[])
memset(&curp, 0, sizeof(struct curparse));
- type = MPARSE_AUTO;
+ options = MPARSE_SO;
curp.outtype = OUTT_ASCII;
curp.wlevel = MANDOCLEVEL_FATAL;
defos = NULL;
- /* LINTED */
while (-1 != (c = getopt(argc, argv, "I:m:O:T:VW:")))
switch (c) {
- case ('I'):
+ case 'I':
if (strncmp(optarg, "os=", 3)) {
- fprintf(stderr, "-I%s: Bad argument\n",
- optarg);
+ fprintf(stderr,
+ "%s: -I%s: Bad argument\n",
+ progname, optarg);
return((int)MANDOCLEVEL_BADARG);
}
if (defos) {
- fprintf(stderr, "-I%s: Duplicate argument\n",
- optarg);
+ fprintf(stderr,
+ "%s: -I%s: Duplicate argument\n",
+ progname, optarg);
return((int)MANDOCLEVEL_BADARG);
}
defos = mandoc_strdup(optarg + 3);
break;
- case ('m'):
- if ( ! moptions(&type, optarg))
+ case 'm':
+ if ( ! moptions(&options, optarg))
return((int)MANDOCLEVEL_BADARG);
break;
- case ('O'):
+ case 'O':
(void)strlcat(curp.outopts, optarg, BUFSIZ);
(void)strlcat(curp.outopts, ",", BUFSIZ);
break;
- case ('T'):
+ case 'T':
if ( ! toptions(&curp, optarg))
return((int)MANDOCLEVEL_BADARG);
break;
- case ('W'):
+ case 'W':
if ( ! woptions(&curp, optarg))
return((int)MANDOCLEVEL_BADARG);
break;
- case ('V'):
+ case 'V':
version();
/* NOTREACHED */
default:
@@ -140,7 +144,7 @@ main(int argc, char *argv[])
/* NOTREACHED */
}
- curp.mp = mparse_alloc(type, curp.wlevel, mmsg, &curp, defos);
+ curp.mp = mparse_alloc(options, curp.wlevel, mmsg, defos);
/*
* Conditionally start up the lookaside buffer before parsing.
@@ -191,15 +195,15 @@ usage(void)
"[-Ooption] "
"[-Toutput] "
"[-Wlevel]\n"
- "\t [file ...]\n",
+ "\t [file ...]\n",
progname);
exit((int)MANDOCLEVEL_BADARG);
}
static void
-parse(struct curparse *curp, int fd,
- const char *file, enum mandoclevel *level)
+parse(struct curparse *curp, int fd, const char *file,
+ enum mandoclevel *level)
{
enum mandoclevel rc;
struct mdoc *mdoc;
@@ -229,31 +233,31 @@ parse(struct curparse *curp, int fd,
if ( ! (curp->outman && curp->outmdoc)) {
switch (curp->outtype) {
- case (OUTT_XHTML):
+ case OUTT_XHTML:
curp->outdata = xhtml_alloc(curp->outopts);
curp->outfree = html_free;
break;
- case (OUTT_HTML):
+ case OUTT_HTML:
curp->outdata = html_alloc(curp->outopts);
curp->outfree = html_free;
break;
- case (OUTT_UTF8):
+ case OUTT_UTF8:
curp->outdata = utf8_alloc(curp->outopts);
curp->outfree = ascii_free;
break;
- case (OUTT_LOCALE):
+ case OUTT_LOCALE:
curp->outdata = locale_alloc(curp->outopts);
curp->outfree = ascii_free;
break;
- case (OUTT_ASCII):
+ case OUTT_ASCII:
curp->outdata = ascii_alloc(curp->outopts);
curp->outfree = ascii_free;
break;
- case (OUTT_PDF):
+ case OUTT_PDF:
curp->outdata = pdf_alloc(curp->outopts);
curp->outfree = pspdf_free;
break;
- case (OUTT_PS):
+ case OUTT_PS:
curp->outdata = ps_alloc(curp->outopts);
curp->outfree = pspdf_free;
break;
@@ -262,29 +266,29 @@ parse(struct curparse *curp, int fd,
}
switch (curp->outtype) {
- case (OUTT_HTML):
+ case OUTT_HTML:
/* FALLTHROUGH */
- case (OUTT_XHTML):
+ case OUTT_XHTML:
curp->outman = html_man;
curp->outmdoc = html_mdoc;
break;
- case (OUTT_TREE):
+ case OUTT_TREE:
curp->outman = tree_man;
curp->outmdoc = tree_mdoc;
break;
- case (OUTT_MAN):
+ case OUTT_MAN:
curp->outmdoc = man_mdoc;
curp->outman = man_man;
break;
- case (OUTT_PDF):
+ case OUTT_PDF:
/* FALLTHROUGH */
- case (OUTT_ASCII):
+ case OUTT_ASCII:
/* FALLTHROUGH */
- case (OUTT_UTF8):
+ case OUTT_UTF8:
/* FALLTHROUGH */
- case (OUTT_LOCALE):
+ case OUTT_LOCALE:
/* FALLTHROUGH */
- case (OUTT_PS):
+ case OUTT_PS:
curp->outman = terminal_man;
curp->outmdoc = terminal_mdoc;
break;
@@ -293,7 +297,7 @@ parse(struct curparse *curp, int fd,
}
}
- mparse_result(curp->mp, &mdoc, &man);
+ mparse_result(curp->mp, &mdoc, &man, NULL);
/* Execute the out device, if it exists. */
@@ -311,17 +315,18 @@ parse(struct curparse *curp, int fd,
}
static int
-moptions(enum mparset *tflags, char *arg)
+moptions(int *options, char *arg)
{
if (0 == strcmp(arg, "doc"))
- *tflags = MPARSE_MDOC;
+ *options |= MPARSE_MDOC;
else if (0 == strcmp(arg, "andoc"))
- *tflags = MPARSE_AUTO;
+ /* nothing to do */;
else if (0 == strcmp(arg, "an"))
- *tflags = MPARSE_MAN;
+ *options |= MPARSE_MAN;
else {
- fprintf(stderr, "%s: Bad argument\n", arg);
+ fprintf(stderr, "%s: -m%s: Bad argument\n",
+ progname, arg);
return(0);
}
@@ -354,7 +359,8 @@ toptions(struct curparse *curp, char *arg)
else if (0 == strcmp(arg, "pdf"))
curp->outtype = OUTT_PDF;
else {
- fprintf(stderr, "%s: Bad argument\n", arg);
+ fprintf(stderr, "%s: -T%s: Bad argument\n",
+ progname, arg);
return(0);
}
@@ -365,7 +371,7 @@ static int
woptions(struct curparse *curp, char *arg)
{
char *v, *o;
- const char *toks[6];
+ const char *toks[6];
toks[0] = "stop";
toks[1] = "all";
@@ -377,22 +383,23 @@ woptions(struct curparse *curp, char *arg)
while (*arg) {
o = arg;
switch (getsubopt(&arg, UNCONST(toks), &v)) {
- case (0):
+ case 0:
curp->wstop = 1;
break;
- case (1):
+ case 1:
/* FALLTHROUGH */
- case (2):
+ case 2:
curp->wlevel = MANDOCLEVEL_WARNING;
break;
- case (3):
+ case 3:
curp->wlevel = MANDOCLEVEL_ERROR;
break;
- case (4):
+ case 4:
curp->wlevel = MANDOCLEVEL_FATAL;
break;
default:
- fprintf(stderr, "-W%s: Bad argument\n", o);
+ fprintf(stderr, "%s: -W%s: Bad argument\n",
+ progname, o);
return(0);
}
}
@@ -401,14 +408,20 @@ woptions(struct curparse *curp, char *arg)
}
static void
-mmsg(enum mandocerr t, enum mandoclevel lvl,
+mmsg(enum mandocerr t, enum mandoclevel lvl,
const char *file, int line, int col, const char *msg)
{
+ const char *mparse_msg;
+
+ fprintf(stderr, "%s: %s:", progname, file);
+
+ if (line)
+ fprintf(stderr, "%d:%d:", line, col + 1);
+
+ fprintf(stderr, " %s", mparse_strlevel(lvl));
- fprintf(stderr, "%s:%d:%d: %s: %s",
- file, line, col + 1,
- mparse_strlevel(lvl),
- mparse_strerror(t));
+ if (NULL != (mparse_msg = mparse_strerror(t)))
+ fprintf(stderr, ": %s", mparse_msg);
if (msg)
fprintf(stderr, ": %s", msg);
diff --git a/main.h b/main.h
index 79dcf489ae65..beb0481cab83 100644
--- a/main.h
+++ b/main.h
@@ -1,4 +1,4 @@
-/* $Id: main.h,v 1.15 2011/10/06 22:29:12 kristaps Exp $ */
+/* $Id: main.h,v 1.16 2014/04/20 16:46:04 schwarze Exp $ */
/*
* Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
*
@@ -25,7 +25,7 @@ struct man;
#define UNCONST(a) ((void *)(uintptr_t)(const void *)(a))
-/*
+/*
* Definitions for main.c-visible output device functions, e.g., -Thtml
* and -Tascii. Note that ascii_alloc() is named as such in
* anticipation of latin1_alloc() and so on, all of which map into the
diff --git a/makewhatis.8 b/makewhatis.8
new file mode 100644
index 000000000000..02c4cc354bd6
--- /dev/null
+++ b/makewhatis.8
@@ -0,0 +1,217 @@
+.\" $Id: makewhatis.8,v 1.2 2014/04/25 12:13:15 schwarze Exp $
+.\"
+.\" Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
+.\" Copyright (c) 2011, 2012 Ingo Schwarze <schwarze@openbsd.org>
+.\"
+.\" Permission to use, copy, modify, and distribute this software for any
+.\" purpose with or without fee is hereby granted, provided that the above
+.\" copyright notice and this permission notice appear in all copies.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+.\"
+.Dd $Mdocdate: April 25 2014 $
+.Dt MAKEWHATIS 8
+.Os
+.Sh NAME
+.Nm makewhatis
+.Nd index UNIX manuals
+.Sh SYNOPSIS
+.Nm
+.Op Fl aDnpQ
+.Op Fl T Cm utf8
+.Op Fl C Ar file
+.Nm
+.Op Fl aDnpQ
+.Op Fl T Cm utf8
+.Ar dir ...
+.Nm
+.Op Fl DnpQ
+.Op Fl T Cm utf8
+.Fl d Ar dir
+.Op Ar
+.Nm
+.Op Fl Dnp
+.Op Fl T Cm utf8
+.Fl u Ar dir
+.Op Ar
+.Nm
+.Op Fl DQ
+.Fl t Ar
+.Sh DESCRIPTION
+The
+.Nm
+utility extracts keywords from
+.Ux
+manuals and indexes them in a database for fast retrieval by
+.Xr apropos 1 ,
+.Xr whatis 1 ,
+and
+.Xr man 1 Ns 's
+.Fl k
+option.
+.Pp
+By default,
+.Nm
+creates a database in each
+.Ar dir
+using the files
+.Sm off
+.Sy man Ar section Li /
+.Op Ar arch Li /
+.Ar title . section
+.Sm on
+and
+.Sm off
+.Sy cat Ar section Li /
+.Op Ar arch Li /
+.Ar title . Sy 0
+.Sm on
+in that directory.
+Existing databases are replaced.
+If
+.Ar dir
+is not provided,
+.Nm
+uses the default paths stipulated by
+.Xr manpath 1 ,
+or
+.Xr man.conf 5 .
+.Pp
+The arguments are as follows:
+.Bl -tag -width "-C file"
+.It Fl a
+Use all directories and files found below
+.Ar dir ... .
+.It Fl C Ar file
+Specify an alternative configuration
+.Ar file
+in
+.Xr man.conf 5
+format.
+.It Fl D
+Display all files added or removed to the index.
+With a second
+.Fl D ,
+also show all keyswords added for each file.
+.It Fl d Ar dir
+Merge (remove and re-add)
+.Ar
+to the database in
+.Ar dir .
+.It Fl n
+Do not create or modify any database; scan and parse only,
+and print manual page names and descriptions to standard output.
+.It Fl p
+Print warnings about potential problems with manual pages
+to the standard error output.
+.It Fl Q
+Quickly build reduced-size databases
+by reading only the NAME sections of manuals.
+The resulting databases will usually contain names and descriptions only.
+.It Fl T Cm utf8
+Use UTF-8 encoding instead of ASCII for strings stored in the databases.
+.It Fl t Ar
+Check the given
+.Ar files
+for potential problems.
+Implies
+.Fl a ,
+.Fl n ,
+and
+.Fl p .
+All diagnostic messages are printed to the standard output;
+the standard error output is not used.
+.It Fl u Ar dir
+Remove
+.Ar
+from the database in
+.Ar dir .
+.El
+.Pp
+If fatal parse errors are encountered while parsing, the offending file
+is printed to stderr, omitted from the index, and the parse continues
+with the next input file.
+.Sh FILES
+.Bl -tag -width Ds
+.It Pa mandoc.db
+A database of manpages relative to the directory of the file.
+This file is portable across architectures and systems, so long as the
+manpage hierarchy it indexes does not change.
+.It Pa /etc/man.conf
+The default
+.Xr man 1
+configuration file.
+.El
+.Sh EXIT STATUS
+The
+.Nm
+utility exits with one of the following values:
+.Pp
+.Bl -tag -width Ds -compact
+.It 0
+No errors occurred.
+.It 5
+Invalid command line arguments were specified.
+No input files have been read.
+.It 6
+An operating system error occurred, for example memory exhaustion or an
+error accessing input files.
+Such errors cause
+.Nm
+to exit at once, possibly in the middle of parsing or formatting a file.
+The output databases are corrupt and should be removed.
+.El
+.Sh SEE ALSO
+.Xr apropos 1 ,
+.Xr man 1 ,
+.Xr whatis 1 ,
+.Xr man.conf 5
+.Sh HISTORY
+A
+.Nm
+utility first appeared in
+.Bx 2 .
+It was rewritten in
+.Xr perl 1
+for
+.Ox 2.7
+and in C for
+.Ox 5.6 .
+.Pp
+The
+.Ar dir
+argument first appeared in
+.Nx 1.0 ;
+the options
+.Fl dpt
+in
+.Ox 2.7 ;
+the option
+.Fl u
+in
+.Ox 3.4 ;
+and the options
+.Fl aCDnQT
+in
+.Ox 5.6 .
+.Sh AUTHORS
+.An -nosplit
+.An Bill Joy
+wrote the original
+.Bx
+.Nm
+in February 1979,
+.An Marc Espie
+started the Perl version in 2000,
+and the current version of
+.Nm
+was written by
+.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv
+and
+.An Ingo Schwarze Aq Mt schwarze@openbsd.org .
diff --git a/man.7 b/man.7
index f2f4d1d8c099..4b64d7f442f4 100644
--- a/man.7
+++ b/man.7
@@ -1,7 +1,8 @@
-.\" $Id: man.7,v 1.120 2013/09/16 22:58:57 schwarze Exp $
+.\" $Id: man.7,v 1.127 2014/06/22 16:39:45 schwarze Exp $
.\"
.\" Copyright (c) 2009, 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
-.\" Copyright (c) 2011, 2012 Ingo Schwarze <schwarze@openbsd.org>
+.\" Copyright (c) 2011, 2012, 2013 Ingo Schwarze <schwarze@openbsd.org>
+.\" Copyright (c) 2010 Joerg Sonnenberger <joerg@netbsd.org>
.\"
.\" Permission to use, copy, modify, and distribute this software for any
.\" purpose with or without fee is hereby granted, provided that the above
@@ -15,7 +16,7 @@
.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
.\"
-.Dd $Mdocdate: September 16 2013 $
+.Dd $Mdocdate: June 22 2014 $
.Dt MAN 7
.Os
.Sh NAME
@@ -97,30 +98,32 @@ file for a utility
.Bd -literal -offset indent
\&.TH PROGNAME 1 2009-10-10
\&.SH NAME
-\efBprogname\efR \e(en a description goes here
+\efBprogname\efR \e(en one line about what it does
\&.\e\(dq .SH LIBRARY
-\&.\e\(dq For sections 2 & 3 only.
+\&.\e\(dq For sections 2, 3, and 9 only.
\&.\e\(dq Not used in OpenBSD.
\&.SH SYNOPSIS
-\efBprogname\efR [\efB\e-options\efR] arguments...
+\efBprogname\efR [\efB\e-options\efR] \efIfile ...\efR
\&.SH DESCRIPTION
-The \efBfoo\efR utility processes files...
+The \efBfoo\efR utility processes files ...
+\&.\e\(dq .Sh CONTEXT
+\&.\e\(dq For section 9 functions only.
\&.\e\(dq .SH IMPLEMENTATION NOTES
\&.\e\(dq Not used in OpenBSD.
\&.\e\(dq .SH RETURN VALUES
-\&.\e\(dq For sections 2, 3, & 9 only.
+\&.\e\(dq For sections 2, 3, and 9 function return values only.
\&.\e\(dq .SH ENVIRONMENT
-\&.\e\(dq For sections 1, 6, 7, & 8 only.
+\&.\e\(dq For sections 1, 6, 7, and 8 only.
\&.\e\(dq .SH FILES
\&.\e\(dq .SH EXIT STATUS
-\&.\e\(dq For sections 1, 6, & 8 only.
+\&.\e\(dq For sections 1, 6, and 8 only.
\&.\e\(dq .SH EXAMPLES
\&.\e\(dq .SH DIAGNOSTICS
-\&.\e\(dq For sections 1, 4, 6, 7, & 8 only.
+\&.\e\(dq For sections 1, 4, 6, 7, 8, and 9 printf/stderr messages only.
\&.\e\(dq .SH ERRORS
-\&.\e\(dq For sections 2, 3, & 9 only.
+\&.\e\(dq For sections 2, 3, 4, and 9 errno settings only.
\&.\e\(dq .SH SEE ALSO
-\&.\e\(dq .BR foo ( 1 )
+\&.\e\(dq .BR foobar ( 1 )
\&.\e\(dq .SH STANDARDS
\&.\e\(dq .SH HISTORY
\&.\e\(dq .SH AUTHORS
@@ -170,6 +173,9 @@ This expands upon the brief, one-line description in
.Em NAME .
It usually contains a break-down of the options (if documenting a
command).
+.It Em CONTEXT
+This section lists the contexts in which functions can be called in section 9.
+The contexts are autoconf, process, or interrupt.
.It Em IMPLEMENTATION NOTES
Implementation-specific notes should be kept here.
This is useful when implementing standard functions that may have side
@@ -196,13 +202,19 @@ well-tested invocations.
Make sure that examples work properly!
.It Em DIAGNOSTICS
Documents error conditions.
-This is most useful in section 4 manuals.
+In section 4 and 9 manuals, these are usually messages
+printed by the kernel to the console and to the kernel log.
+In section 1, 6, 7, and 8, these are usually messages
+printed by userland programs to the standard error output.
+.Pp
Historically, this section was used in place of
.Em EXIT STATUS
for manuals in sections 1, 6, and 8; however, this practise is
discouraged.
.It Em ERRORS
-Documents error handling in sections 2, 3, and 9.
+Documents
+.Xr errno 2
+settings in sections 2, 3, 4, and 9.
.It Em SEE ALSO
References other manuals with related topics.
This section should exist for most manuals.
@@ -280,7 +292,7 @@ For the scoping of individual macros, see
.Sx MACRO SYNTAX .
.Ss \&AT
Sets the volume for the footer for compatibility with man pages from
-.Tn AT&T UNIX
+.At
releases.
The optional arguments specify which release it is from.
.Ss \&B
@@ -656,6 +668,20 @@ Sets the volume for the footer for compatibility with man pages from
.Bx
releases.
The optional first argument specifies which release it is from.
+.Ss \&UE
+End a uniform resource identifier block.
+This is a non-standard GNU extension, included only for compatibility.
+See
+.Sx \&UE .
+.Ss \&UR
+Begin a uniform resource identifier block.
+This is a non-standard GNU extension, included only for compatibility.
+It has the following syntax:
+.Bd -literal -offset indent
+.Pf \. Sx \&UR Ar uri
+link description to be shown
+.Pf \. Sx UE
+.Ed
.Ss \&br
Breaks the current line.
Consecutive invocations have no further effect.
@@ -665,11 +691,6 @@ See also
.Ss \&fi
End literal mode begun by
.Sx \&nf .
-.Ss \&ft
-Change the current font mode.
-See
-.Sx Text Decoration
-for a listing of available font modes.
.Ss \&in
Indent relative to the current indentation:
.Pp
@@ -750,10 +771,13 @@ The syntax is as follows:
.It Sx \&BI Ta n Ta current Ta \&
.It Sx \&BR Ta n Ta current Ta \&
.It Sx \&DT Ta 0 Ta current Ta \&
+.It Sx \&EE Ta 0 Ta current Ta compat
+.It Sx \&EX Ta 0 Ta current Ta compat
.It Sx \&I Ta n Ta next-line Ta \&
.It Sx \&IB Ta n Ta current Ta \&
.It Sx \&IR Ta n Ta current Ta \&
.It Sx \&OP Ta 0, 1 Ta current Ta compat
+.It Sx \&PD Ta 1 Ta current Ta \&
.It Sx \&R Ta n Ta next-line Ta \&
.It Sx \&RB Ta n Ta current Ta \&
.It Sx \&RI Ta n Ta current Ta \&
@@ -763,7 +787,6 @@ The syntax is as follows:
.It Sx \&UC Ta <=1 Ta current Ta \&
.It Sx \&br Ta 0 Ta current Ta compat
.It Sx \&fi Ta 0 Ta current Ta compat
-.It Sx \&ft Ta 1 Ta current Ta compat
.It Sx \&in Ta 1 Ta current Ta compat
.It Sx \&na Ta 0 Ta current Ta compat
.It Sx \&nf Ta 0 Ta current Ta compat
@@ -823,6 +846,8 @@ implicitly closed, is syntactically incorrect.
.It Sx \&SH Ta >0 Ta next-line Ta section Ta \&
.It Sx \&SS Ta >0 Ta next-line Ta sub-section Ta \&
.It Sx \&TP Ta n Ta next-line Ta paragraph Ta \&
+.It Sx \&UE Ta 0 Ta current Ta none Ta compat
+.It Sx \&UR Ta 1 Ta current Ta part Ta compat
.El
.Pp
Macros marked
@@ -848,10 +873,11 @@ Note that macros like
.Sx \&BR
open and close a font scope for each argument.
.Sh COMPATIBILITY
-This section documents areas of questionable portability between
+This section mentions some areas of questionable portability between
implementations of the
.Nm
language.
+More incompatibilities exist.
.Pp
.Bl -dash -compact
.It
@@ -863,47 +889,12 @@ to close out a literal context opened with
.Sx \&nf .
This behaviour may not be portable.
.It
-In quoted literals, GNU troff allowed pair-wise double-quotes to produce
-a standalone double-quote in formatted output.
-It is not known whether this behaviour is exhibited by other formatters.
-.It
troff suppresses a newline before
.Sq \(aq
macro output; in mandoc, it is an alias for the standard
.Sq \&.
control character.
.It
-The
-.Sq \eh
-.Pq horizontal position ,
-.Sq \ev
-.Pq vertical position ,
-.Sq \em
-.Pq text colour ,
-.Sq \eM
-.Pq text filling colour ,
-.Sq \ez
-.Pq zero-length character ,
-.Sq \ew
-.Pq string length ,
-.Sq \ek
-.Pq horizontal position marker ,
-.Sq \eo
-.Pq text overstrike ,
-and
-.Sq \es
-.Pq text size
-escape sequences are all discarded in mandoc.
-.It
-The
-.Sq \ef
-scaling unit is accepted by mandoc, but rendered as the default unit.
-.It
-The
-.Sx \&sp
-macro does not accept negative values in mandoc.
-In GNU troff, this would result in strange behaviour.
-.It
In page header lines, GNU troff versions up to and including 1.21
only print
.Ar volume
@@ -919,8 +910,13 @@ is given, like in
.El
.Pp
The
-.Sx OP
-macro is part of the extended
+.Sx EE ,
+.Sx EX ,
+.Sx OP ,
+.Sx UE ,
+and
+.Sx UR
+macros are part of the GNU extended
.Nm
macro set, and may not be portable to non-GNU troff implementations.
.Sh SEE ALSO
diff --git a/man.c b/man.c
index e6e1c2899209..75a6350577ba 100644
--- a/man.c
+++ b/man.c
@@ -1,6 +1,8 @@
-/* $Id: man.c,v 1.121 2013/11/10 22:54:40 schwarze Exp $ */
+/* $Id: man.c,v 1.137 2014/08/01 21:24:17 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ * Copyright (c) 2013, 2014 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2011 Joerg Sonnenberger <joerg@netbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -21,6 +23,7 @@
#include <sys/types.h>
#include <assert.h>
+#include <ctype.h>
#include <stdarg.h>
#include <stdlib.h>
#include <stdio.h>
@@ -28,12 +31,13 @@
#include "man.h"
#include "mandoc.h"
+#include "mandoc_aux.h"
#include "libman.h"
#include "libmandoc.h"
-const char *const __man_macronames[MAN_MAX] = {
+const char *const __man_macronames[MAN_MAX] = {
"br", "TH", "SH", "SS",
- "TP", "LP", "PP", "P",
+ "TP", "LP", "PP", "P",
"IP", "HP", "SM", "SB",
"BI", "IB", "BR", "RB",
"R", "B", "I", "IR",
@@ -41,17 +45,17 @@ const char *const __man_macronames[MAN_MAX] = {
"fi", "RE", "RS", "DT",
"UC", "PD", "AT", "in",
"ft", "OP", "EX", "EE",
- "UR", "UE"
+ "UR", "UE", "ll"
};
const char * const *man_macronames = __man_macronames;
-static struct man_node *man_node_alloc(struct man *, int, int,
+static struct man_node *man_node_alloc(struct man *, int, int,
enum man_type, enum mant);
-static int man_node_append(struct man *,
+static int man_node_append(struct man *,
struct man_node *);
static void man_node_free(struct man_node *);
-static void man_node_unlink(struct man *,
+static void man_node_unlink(struct man *,
struct man_node *);
static int man_ptext(struct man *, int, char *, int);
static int man_pmacro(struct man *, int, char *, int);
@@ -64,20 +68,16 @@ const struct man_node *
man_node(const struct man *man)
{
- assert( ! (MAN_HALT & man->flags));
return(man->first);
}
-
const struct man_meta *
man_meta(const struct man *man)
{
- assert( ! (MAN_HALT & man->flags));
return(&man->meta);
}
-
void
man_reset(struct man *man)
{
@@ -86,7 +86,6 @@ man_reset(struct man *man)
man_alloc1(man);
}
-
void
man_free(struct man *man)
{
@@ -95,9 +94,8 @@ man_free(struct man *man)
free(man);
}
-
struct man *
-man_alloc(struct roff *roff, struct mparse *parse)
+man_alloc(struct roff *roff, struct mparse *parse, int quick)
{
struct man *p;
@@ -105,39 +103,31 @@ man_alloc(struct roff *roff, struct mparse *parse)
man_hash_init();
p->parse = parse;
+ p->quick = quick;
p->roff = roff;
man_alloc1(p);
return(p);
}
-
int
man_endparse(struct man *man)
{
- assert( ! (MAN_HALT & man->flags));
- if (man_macroend(man))
- return(1);
- man->flags |= MAN_HALT;
- return(0);
+ return(man_macroend(man));
}
-
int
man_parseln(struct man *man, int ln, char *buf, int offs)
{
man->flags |= MAN_NEWLINE;
- assert( ! (MAN_HALT & man->flags));
-
return (roff_getcontrol(man->roff, buf, &offs) ?
- man_pmacro(man, ln, buf, offs) :
- man_ptext(man, ln, buf, offs));
+ man_pmacro(man, ln, buf, offs) :
+ man_ptext(man, ln, buf, offs));
}
-
static void
man_free1(struct man *man)
{
@@ -156,7 +146,6 @@ man_free1(struct man *man)
free(man->meta.msec);
}
-
static void
man_alloc1(struct man *man)
{
@@ -180,12 +169,12 @@ man_node_append(struct man *man, struct man_node *p)
assert(MAN_ROOT != p->type);
switch (man->next) {
- case (MAN_NEXT_SIBLING):
+ case MAN_NEXT_SIBLING:
man->last->next = p;
p->prev = man->last;
p->parent = man->last->parent;
break;
- case (MAN_NEXT_CHILD):
+ case MAN_NEXT_CHILD:
man->last->child = p;
p->parent = man->last;
break;
@@ -193,23 +182,24 @@ man_node_append(struct man *man, struct man_node *p)
abort();
/* NOTREACHED */
}
-
+
assert(p->parent);
p->parent->nchild++;
- if ( ! man_valid_pre(man, p))
- return(0);
-
switch (p->type) {
- case (MAN_HEAD):
+ case MAN_BLOCK:
+ if (p->tok == MAN_SH || p->tok == MAN_SS)
+ man->flags &= ~MAN_LITERAL;
+ break;
+ case MAN_HEAD:
assert(MAN_BLOCK == p->parent->type);
p->parent->head = p;
break;
- case (MAN_TAIL):
+ case MAN_TAIL:
assert(MAN_BLOCK == p->parent->type);
p->parent->tail = p;
break;
- case (MAN_BODY):
+ case MAN_BODY:
assert(MAN_BLOCK == p->parent->type);
p->parent->body = p;
break;
@@ -220,9 +210,9 @@ man_node_append(struct man *man, struct man_node *p)
man->last = p;
switch (p->type) {
- case (MAN_TBL):
+ case MAN_TBL:
/* FALLTHROUGH */
- case (MAN_TEXT):
+ case MAN_TEXT:
if ( ! man_valid_post(man))
return(0);
break;
@@ -233,9 +223,8 @@ man_node_append(struct man *man, struct man_node *p)
return(1);
}
-
static struct man_node *
-man_node_alloc(struct man *man, int line, int pos,
+man_node_alloc(struct man *man, int line, int pos,
enum man_type type, enum mant tok)
{
struct man_node *p;
@@ -252,7 +241,6 @@ man_node_alloc(struct man *man, int line, int pos,
return(p);
}
-
int
man_elem_alloc(struct man *man, int line, int pos, enum mant tok)
{
@@ -265,7 +253,6 @@ man_elem_alloc(struct man *man, int line, int pos, enum mant tok)
return(1);
}
-
int
man_tail_alloc(struct man *man, int line, int pos, enum mant tok)
{
@@ -278,7 +265,6 @@ man_tail_alloc(struct man *man, int line, int pos, enum mant tok)
return(1);
}
-
int
man_head_alloc(struct man *man, int line, int pos, enum mant tok)
{
@@ -291,7 +277,6 @@ man_head_alloc(struct man *man, int line, int pos, enum mant tok)
return(1);
}
-
int
man_body_alloc(struct man *man, int line, int pos, enum mant tok)
{
@@ -304,7 +289,6 @@ man_body_alloc(struct man *man, int line, int pos, enum mant tok)
return(1);
}
-
int
man_block_alloc(struct man *man, int line, int pos, enum mant tok)
{
@@ -332,7 +316,6 @@ man_word_alloc(struct man *man, int line, int pos, const char *word)
return(1);
}
-
/*
* Free all of the resources held by a node. This does NOT unlink a
* node from its context; for that, see man_node_unlink().
@@ -346,7 +329,6 @@ man_node_free(struct man_node *p)
free(p);
}
-
void
man_node_delete(struct man *man, struct man_node *p)
{
@@ -363,8 +345,6 @@ man_addeqn(struct man *man, const struct eqn *ep)
{
struct man_node *n;
- assert( ! (MAN_HALT & man->flags));
-
n = man_node_alloc(man, ep->ln, ep->pos, MAN_EQN, MAN_MAX);
n->eqn = ep;
@@ -380,8 +360,6 @@ man_addspan(struct man *man, const struct tbl_span *sp)
{
struct man_node *n;
- assert( ! (MAN_HALT & man->flags));
-
n = man_node_alloc(man, sp->line, 0, MAN_TBL, MAN_MAX);
n->span = sp;
@@ -403,7 +381,7 @@ man_descope(struct man *man, int line, int offs)
if (MAN_ELINE & man->flags) {
man->flags &= ~MAN_ELINE;
- if ( ! man_unscope(man, man->last->parent, MANDOCERR_MAX))
+ if ( ! man_unscope(man, man->last->parent))
return(0);
}
@@ -411,7 +389,7 @@ man_descope(struct man *man, int line, int offs)
return(1);
man->flags &= ~MAN_BLINE;
- if ( ! man_unscope(man, man->last->parent, MANDOCERR_MAX))
+ if ( ! man_unscope(man, man->last->parent))
return(0);
return(man_body_alloc(man, line, offs, man->last->tok));
}
@@ -448,9 +426,9 @@ man_ptext(struct man *man, int line, char *buf, int offs)
return(1);
}
- /*
+ /*
* Warn if the last un-escaped character is whitespace. Then
- * strip away the remaining spaces (tabs stay!).
+ * strip away the remaining spaces (tabs stay!).
*/
i = (int)strlen(buf);
@@ -458,7 +436,8 @@ man_ptext(struct man *man, int line, char *buf, int offs)
if (' ' == buf[i - 1] || '\t' == buf[i - 1]) {
if (i > 1 && '\\' != buf[i - 2])
- man_pmsg(man, line, i - 1, MANDOCERR_EOLNSPACE);
+ mandoc_msg(MANDOCERR_SPACE_EOL, man->parse,
+ line, i - 1, NULL);
for (--i; i && ' ' == buf[i]; i--)
/* Spin back to non-space. */ ;
@@ -479,7 +458,7 @@ man_ptext(struct man *man, int line, char *buf, int offs)
*/
assert(i);
- if (mandoc_eos(buf, (size_t)i, 0))
+ if (mandoc_eos(buf, (size_t)i))
man->last->flags |= MAN_EOS;
return(man_descope(man, line, offs));
@@ -488,13 +467,15 @@ man_ptext(struct man *man, int line, char *buf, int offs)
static int
man_pmacro(struct man *man, int ln, char *buf, int offs)
{
- int i, ppos;
- enum mant tok;
char mac[5];
struct man_node *n;
+ enum mant tok;
+ int i, ppos;
+ int bline;
if ('"' == buf[offs]) {
- man_pmsg(man, ln, offs, MANDOCERR_BADCOMMENT);
+ mandoc_msg(MANDOCERR_COMMENT_BAD, man->parse,
+ ln, offs, NULL);
return(1);
} else if ('\0' == buf[offs])
return(1);
@@ -507,8 +488,8 @@ man_pmacro(struct man *man, int ln, char *buf, int offs)
*/
i = 0;
- while (i < 4 && '\0' != buf[offs] &&
- ' ' != buf[offs] && '\t' != buf[offs])
+ while (i < 4 && '\0' != buf[offs] && ' ' != buf[offs] &&
+ '\t' != buf[offs])
mac[i++] = buf[offs++];
mac[i] = '\0';
@@ -516,8 +497,8 @@ man_pmacro(struct man *man, int ln, char *buf, int offs)
tok = (i > 0 && i < 4) ? man_hash_find(mac) : MAN_MAX;
if (MAN_MAX == tok) {
- mandoc_vmsg(MANDOCERR_MACRO, man->parse, ln,
- ppos, "%s", buf + ppos - 1);
+ mandoc_msg(MANDOCERR_MACRO, man->parse,
+ ln, ppos, buf + ppos - 1);
return(1);
}
@@ -526,15 +507,16 @@ man_pmacro(struct man *man, int ln, char *buf, int offs)
while (buf[offs] && ' ' == buf[offs])
offs++;
- /*
+ /*
* Trailing whitespace. Note that tabs are allowed to be passed
* into the parser as "text", so we only warn about spaces here.
*/
if ('\0' == buf[offs] && ' ' == buf[offs - 1])
- man_pmsg(man, ln, offs - 1, MANDOCERR_EOLNSPACE);
+ mandoc_msg(MANDOCERR_SPACE_EOL, man->parse,
+ ln, offs - 1, NULL);
- /*
+ /*
* Remove prior ELINE macro, as it's being clobbered by a new
* macro. Note that NSCOPED macros do not close out ELINE
* macros---they don't print text---so we let those slip by.
@@ -550,7 +532,7 @@ man_pmacro(struct man *man, int ln, char *buf, int offs)
if (MAN_NSCOPED & man_macros[n->tok].flags)
n = n->parent;
- mandoc_vmsg(MANDOCERR_LINESCOPE, man->parse, n->line,
+ mandoc_vmsg(MANDOCERR_BLK_LINE, man->parse, n->line,
n->pos, "%s breaks %s", man_macronames[tok],
man_macronames[n->tok]);
@@ -581,7 +563,7 @@ man_pmacro(struct man *man, int ln, char *buf, int offs)
assert(MAN_BLOCK == n->type);
assert(MAN_SCOPED & man_macros[n->tok].flags);
- mandoc_vmsg(MANDOCERR_LINESCOPE, man->parse, n->line,
+ mandoc_vmsg(MANDOCERR_BLK_LINE, man->parse, n->line,
n->pos, "%s breaks %s", man_macronames[tok],
man_macronames[n->tok]);
@@ -589,63 +571,41 @@ man_pmacro(struct man *man, int ln, char *buf, int offs)
man->flags &= ~MAN_BLINE;
}
- /*
- * Save the fact that we're in the next-line for a block. In
- * this way, embedded roff instructions can "remember" state
- * when they exit.
- */
+ /* Remember whether we are in next-line scope for a block head. */
- if (MAN_BLINE & man->flags)
- man->flags |= MAN_BPLINE;
+ bline = man->flags & MAN_BLINE;
/* Call to handler... */
assert(man_macros[tok].fp);
if ( ! (*man_macros[tok].fp)(man, tok, ln, ppos, &offs, buf))
- goto err;
+ return(0);
- /*
- * We weren't in a block-line scope when entering the
- * above-parsed macro, so return.
- */
+ /* In quick mode (for mandocdb), abort after the NAME section. */
- if ( ! (MAN_BPLINE & man->flags)) {
- man->flags &= ~MAN_ILINE;
- return(1);
+ if (man->quick && MAN_SH == tok) {
+ n = man->last;
+ if (MAN_BODY == n->type &&
+ strcmp(n->prev->child->string, "NAME"))
+ return(2);
}
- man->flags &= ~MAN_BPLINE;
/*
- * If we're in a block scope, then allow this macro to slip by
- * without closing scope around it.
+ * If we are in a next-line scope for a block head,
+ * close it out now and switch to the body,
+ * unless the next-line scope is allowed to continue.
*/
- if (MAN_ILINE & man->flags) {
- man->flags &= ~MAN_ILINE;
+ if ( ! bline || man->flags & MAN_ELINE ||
+ man_macros[tok].flags & MAN_NSCOPED)
return(1);
- }
-
- /*
- * If we've opened a new next-line element scope, then return
- * now, as the next line will close out the block scope.
- */
-
- if (MAN_ELINE & man->flags)
- return(1);
-
- /* Close out the block scope opened in the prior line. */
assert(MAN_BLINE & man->flags);
man->flags &= ~MAN_BLINE;
- if ( ! man_unscope(man, man->last->parent, MANDOCERR_MAX))
+ if ( ! man_unscope(man, man->last->parent))
return(0);
return(man_body_alloc(man, ln, ppos, man->last->tok));
-
-err: /* Error out. */
-
- man->flags |= MAN_HALT;
- return(0);
}
/*
@@ -696,3 +656,49 @@ man_mparse(const struct man *man)
assert(man && man->parse);
return(man->parse);
}
+
+void
+man_deroff(char **dest, const struct man_node *n)
+{
+ char *cp;
+ size_t sz;
+
+ if (MAN_TEXT != n->type) {
+ for (n = n->child; n; n = n->next)
+ man_deroff(dest, n);
+ return;
+ }
+
+ /* Skip leading whitespace and escape sequences. */
+
+ cp = n->string;
+ while ('\0' != *cp) {
+ if ('\\' == *cp) {
+ cp++;
+ mandoc_escape((const char **)&cp, NULL, NULL);
+ } else if (isspace((unsigned char)*cp))
+ cp++;
+ else
+ break;
+ }
+
+ /* Skip trailing whitespace. */
+
+ for (sz = strlen(cp); sz; sz--)
+ if (0 == isspace((unsigned char)cp[sz-1]))
+ break;
+
+ /* Skip empty strings. */
+
+ if (0 == sz)
+ return;
+
+ if (NULL == *dest) {
+ *dest = mandoc_strndup(cp, sz);
+ return;
+ }
+
+ mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
+ free(*dest);
+ *dest = cp;
+}
diff --git a/man.cgi.7 b/man.cgi.7
deleted file mode 100644
index ec927ca9b10f..000000000000
--- a/man.cgi.7
+++ /dev/null
@@ -1,122 +0,0 @@
-.Dd $Mdocdate: July 13 2013 $
-.Dt MAN.CGI 7
-.Os
-.Sh NAME
-.Nm man.cgi
-.Nd cgi for manpage query and display
-.Sh SYNOPSIS
-.Nm
-.Sh DESCRIPTION
-The
-.Nm
-script queries and displays manual pages.
-It interfaces with
-.Xr mandocdb 8
-databases cached with
-.Xr catman 8 .
-.Pp
-To use
-.Nm ,
-create a manual cache in
-.Xr catman 8 .
-Assign this directory to the environment variable
-.Ev CACHE_DIR ,
-defaulting to
-.Pa /cache/man.cgi .
-Copy the
-.Pa man.cgi
-script into your CGI directory (see
-.Sx FILES
-for other relevant files).
-.Pp
-Multiple
-.Xr catman 8
-trees may be managed by
-.Nm :
-directories under
-.Ev CACHE_DIR
-containing
-.Pa etc/catman.conf
-are identified as
-.Qq manroots .
-The path of a manroot under
-.Ev CACHE_DIR
-is converted to a name by replacing path separators with spaces.
-.Pp
-Thus, if
-.Ev CACHE_DIR
-is the default
-.Pa /cache/man.cgi ,
-the web-server is jailed to
-.Pa /var/www ,
-and cache subdirectories
-.Pa ./foo/1
-and
-.Pa ./bar/2
-contain
-.Pa etc/catman.conf ,
-.Nm
-will assign these to manroots
-.Qq foo 1
-and
-.Qq bar 2 ,
-respectively.
-These names will appear as choices when searching for manuals.
-.Pp
-If
-.Nm
-finds only one manroot, or none, then the selection box is omitted.
-If no manroot is specified during search, the first manroot is used by
-default.
-.Sh ENVIRONMENT
-.Bl -tag -width Ds
-.It Ev CACHE_DIR
-The absolute path of the
-.Xr catman 8
-cache directory.
-This must not have a trailing slash.
-.It Ev CSS_DIR
-Prepended to CSS file links in outputted HTML files.
-This must not have a trailing slash.
-.El
-.Sh FILES
-.Bl -tag -width Ds
-.It Pa etc/catman.conf
-Built by
-.Xr catman 8
-and must exist at least once under the configuration directory root.
-.It Pa man.css
-Should be visible in the server document root or within
-.Ev CSS_DIR .
-Included in each page after
-.Pa man-cgi.css ,
-ostensibly for
-.Xr mandoc 1
-HTML output styling.
-.It Pa man.cgi.css
-Should be visible in the server document root or within
-.Ev CSS_DIR .
-Included in each page, ostensibly for general
-.Nm
-styling.
-.El
-.Sh COMPATIBILITY
-The
-.Nm
-script is call-compatible with queries from the traditional
-.Pa man.cgi
-script by Wolfram Schneider.
-However, the results may not be quite the same.
-.Sh SEE ALSO
-.Xr catman 8 ,
-.Xr mandocdb 8
-.Sh AUTHORS
-The
-.Nm
-utility was written by
-.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv .
-.Sh CAVEATS
-If you're running in a jailed web-server, make sure the
-.Pa /tmp
-directory exists and is writable.
-The databases may need this for scratch space.
diff --git a/man.cgi.8 b/man.cgi.8
new file mode 100644
index 000000000000..69335e632ec9
--- /dev/null
+++ b/man.cgi.8
@@ -0,0 +1,409 @@
+.\" $Id: man.cgi.8,v 1.9 2014/07/22 18:14:13 schwarze Exp $
+.\"
+.\" Copyright (c) 2014 Ingo Schwarze <schwarze@openbsd.org>
+.\"
+.\" Permission to use, copy, modify, and distribute this software for any
+.\" purpose with or without fee is hereby granted, provided that the above
+.\" copyright notice and this permission notice appear in all copies.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+.\"
+.Dd $Mdocdate: July 22 2014 $
+.Dt MAN.CGI 8
+.Os
+.Sh NAME
+.Nm man.cgi
+.Nd CGI program to search and display manual pages
+.Sh DESCRIPTION
+The
+.Nm
+CGI program searches for manual pages on a WWW server
+and displays them to HTTP clients,
+providing functionality equivalent to the
+.Xr apropos 1
+and
+.Xr man 1
+utilities.
+It can use multiple manual trees in parallel.
+.Ss HTML search interface
+At the top of each generated HTML page,
+.Nm
+displays a search form containing these elements:
+.Bl -enum
+.It
+An input box for search queries, expecting
+either a name of a manual page or an
+.Ar expression
+using the syntax described in the
+.Xr apropos 1
+manual; filling this in is required for each search.
+.It
+A
+.Dq Submit
+button to send a search request from the client to the server.
+.It
+A
+.Dq Reset
+button to undo any changes to the input boxes and the dropdown menus
+and reset them to the values contained in the
+.Ev QUERY_STRING .
+.It
+Radio buttons to select pages either by name like in
+.Xr man 1
+or using
+.Xr apropos 1
+queries.
+.It
+A dropdown menu to optionally select a manual section.
+If one is provided, it has the same effect as the
+.Xr man 1
+and
+.Xr apropos 1
+.Fl s
+option.
+Otherwise, pages from all sections are shown.
+.It
+A dropdown menu to optionally select an architecture.
+If one is provided, it has the same effect as the
+.Xr man 1
+and
+.Xr apropos 1
+.Fl S
+option.
+By default, pages for all architectures are shown.
+.It
+A dropdown menu to select a manual tree.
+If the configuration file
+.Pa /var/www/man/manpath.conf
+contains only one manpath, the dropdown menu is not shown.
+By default, the first manpath given in the file is used.
+.El
+.Ss Program output
+The
+.Nm
+program generates five kinds of output pages:
+.Bl -tag -width Ds
+.It The index page.
+This is returned when calling
+.Nm
+without
+.Ev PATH_INFO
+and without a
+.Ev QUERY_STRING .
+It serves as a starting point for using the program
+and shows the search form only.
+.It A list page.
+Lists are returned when searches match more than one manual page.
+The first column shows the names and section numbers of manuals
+as clickable links.
+The second column shows the one-line descriptions of the manuals.
+.It A manual page.
+This output format is used when a search matches exactly one
+manual page, or when a link on a list page or an
+.Ic \&Xr
+link on another manual page is followed.
+.It A no-result page.
+This is shown when a search request returns no results -
+eiher because it violates the query syntax, or because
+the search does not match any manual pages.
+.It \&An error page.
+This cannot happen by merely clicking the
+.Dq Search
+button, but only by manually entering an invalid URI.
+It does not show the search form, but only an error message
+and a link back to the index page.
+.El
+.Ss Setup
+For each manual tree, create one first-level subdirectory below
+.Pa /var/www/man .
+The name of one of these directories is called a
+.Dq manpath
+in the context of
+.Nm .
+Create a single ASCII text file
+.Pa /var/www/man/manpath.conf
+containing the names of these directories, one per line.
+The directory given first is used as the default manpath.
+.Pp
+Inside each of these directories, use the same directory and file
+structure as found below
+.Pa /usr/share/man ,
+that is, second-level subdirectories
+.Pa /var/www/man/*/man1 , /var/www/man/*/man2
+etc. containing source
+.Xr mdoc 7
+and
+.Xr man 7
+manuals with file name extensions matching the section numbers,
+second-level subdirectories
+.Pa /var/www/man/*/cat1 , /var/www/man/*/cat2
+etc. containing preformatted manuals with the file name extension
+.Sq 0 ,
+and optional third-level subdirectories for architectures.
+Use
+.Xr makewhatis 8
+to create a
+.Xr mandoc.db 5
+database inside each manpath.
+.Pp
+Configure your web server to execute CGI programs located in
+.Pa /cgi-bin .
+When using
+.Xr nginx 8 ,
+the
+.Xr slowcgi 8
+proxy daemon is needed to translate FastCGI requests to plain old CGI.
+.Pp
+To compile
+.Nm ,
+first copy
+.Pa cgi.h.example
+to
+.Pa cgi.h
+and edit it according to your needs.
+It contains the following compile-time definitions:
+.Bl -tag -width Ds
+.It Ev COMPAT_OLDURI
+Only useful for running on www.openbsd.org to deal with old URIs containing
+.Qq "manpath=OpenBSD "
+where the blank character has to be translated to a hyphen.
+When compiling for other sites, this definition can be deleted.
+.It Ev CSS_DIR
+An optional path to the directory containing the CSS files,
+to be specified relative to the server's document root,
+and to be specified without a trailing slash.
+When not specified, the CSS files
+are assumed to be in the document root.
+This is used in generated HTML code.
+.It Ev CUSTOMIZE_BEGIN
+A HTML string to be inserted right after opening the
+.Aq BODY
+element.
+.It Ev CUSTOMIZE_TITLE
+An ASCII string to be used for the HTML
+.Aq TITLE
+element.
+.It Ev HTTP_HOST
+The FQDN of the (possibly virtual) host the HTTP server is running on.
+This is used for
+.Ic Location:
+headers in HTTP 303 responses.
+.It Ev MAN_DIR
+A path to the
+.Nm
+data directory to be used instead of
+.Pa /var/www/man ,
+relative to the web server
+.Xr chroot 2
+directory, to be specified without a trailing slash.
+This is prepended to the manpath when opening
+.Xr mandoc.db 5
+and manual page files.
+.El
+.Pp
+After editing
+.Pa cgi.h ,
+run
+.Pp
+.Dl make man.cgi
+.Pp
+and copy the files to the proper locations.
+Reading the
+.Cm installcgi
+target in the
+.Pa Makefile
+can help with that, but do not run it without carefully checking it
+because the directory layouts of web servers vary greatly.
+.Ss URI interface
+.Nm
+uniform resource identifiers are not needed for interactive use,
+but can be useful for deep linking.
+They consist of:
+.Bl -enum
+.It
+The
+.Cm http://
+protocol specifier.
+.It
+The host name and a following slash.
+.It
+The path to the program, normally
+.Pa cgi-bin/man.cgi/ .
+.It
+To show a single page, a slash, the manpath, another slash,
+and the name of the requested file, for example
+.Pa /OpenBSD-current/man1/mandoc.1 .
+.It
+For searches, a query string starting with a question mark
+and consisting of
+.Ar key Ns = Ns Ar value
+pairs, separated by ampersands, for example
+.Pa ?manpath=OpenBSD-current&query=mandoc .
+Supported keys are
+.Cm manpath ,
+.Cm query ,
+.Cm sec ,
+.Cm arch ,
+corresponding to
+.Xr apropos 1
+.Fl M ,
+.Ar expression ,
+.Fl s ,
+.Fl S ,
+respectively, and
+.Cm apropos ,
+which is a boolean parameter to select or deselect the
+.Xr apropos 1
+query mode.
+For backward compatibility with the traditional
+.Nm ,
+.Cm sektion
+is supported as an alias for
+.Cm sec .
+.El
+.Ss Restricted character set
+For security reasons, in particular to prevent cross site scripting
+attacks, some strings used by
+.Nm
+can only contain the following characters:
+.Pp
+.Bl -dash -compact -offset indent
+.It
+lower case and upper case ASCII letters
+.It
+the ten decimal digits
+.It
+the dash
+.Pq Sq -
+.It
+the dot
+.Pq Sq \&.
+.It
+the slash
+.Pq Sq /
+.It
+the underscore
+.Pq Sq _
+.El
+.Pp
+In particular, this applies to the
+.Ev SCRIPT_NAME ,
+to all manpaths, and to all architecture names.
+.Sh ENVIRONMENT
+The web server may pass the following CGI variables to
+.Nm :
+.Bl -tag -width Ds
+.It Ev PATH_INFO
+The final part of the URI path passed from the client to the server,
+starting after the
+.Ev SCRIPT_NAME
+and ending before the
+.Ev QUERY_STRING .
+It is used by the
+.Cm show
+page to aquire the manpath and filename it needs.
+.It Ev QUERY_STRING
+The HTTP query string passed from the client to the server.
+It is the final part of the URI, after the question mark.
+It is used by the
+.Cm search
+page to acquire the named parameters it needs.
+.It Ev SCRIPT_NAME
+The path to the
+.Nm
+binary relative to the server root, usually
+.Pa /cgi-bin/man.cgi .
+This is used for generating URIs to be embedded
+in generated HTML code and HTTP headers.
+If this contains any character not contained in the
+.Sx Restricted character set ,
+.Nm
+reports an internal server error and exits without doing anything.
+.El
+.Sh FILES
+.Bl -tag -width Ds
+.It Pa /var/www
+Default web server
+.Xr chroot 2
+directory.
+All the following paths are specified relative to this directory.
+.It Pa /cgi-bin/man.cgi
+The path to the
+.Nm
+program relative to the server root.
+Can be overridden by
+.Ev SCRIPT_NAME .
+.It Pa /htdocs
+The path to the server document root relative to the server root.
+This is part of the web server configuration and not specific to
+.Nm .
+.It Pa /htdocs/man-cgi.css
+A style sheet for general
+.Nm
+styling, referenced from each generated HTML page.
+.It Pa /htdocs/man.css
+A style sheet for
+.Xr mandoc 1
+HTML styling, referenced from each generated HTML page after
+.Pa man-cgi.css .
+.It Pa /man
+Default
+.Nm
+data directory containing all the manual trees.
+Can be overridden by
+.Ev MAN_DIR .
+.It Pa /man/mandoc/man1/apropos.1 , /man/mandoc/man8/man.cgi.8
+Manual pages documenting
+.Nm
+itself, linked from the index page.
+.It Pa /man/manpath.conf
+The list of available manpaths, one per line.
+If any of the lines in this file contains a slash
+.Pq Sq /
+or any character not contained in the
+.Sx Restricted character set ,
+.Nm
+reports an internal server error and exits without doing anything.
+.It Pa /man/OpenBSD-current/man1/mandoc.1
+An example
+.Xr mdoc 7
+source file located below the
+.Dq OpenBSD-current
+manpath.
+.El
+.Sh COMPATIBILITY
+The
+.Nm
+CGI program is call-compatible with queries from the traditional
+.Pa man.cgi
+script by Wolfram Schneider.
+However, the output may not be quite the same.
+.Sh SEE ALSO
+.Xr apropos 1 ,
+.Xr mandoc.db 5 ,
+.Xr makewhatis 8 ,
+.Xr slowcgi 8
+.Sh HISTORY
+A version of
+.Nm
+based on
+.Xr mandoc 1
+first appeared in mdocml-1.12.1 (March 2012).
+The current SQLite3-based version first appeared in
+.Ox 5.6 .
+.Sh AUTHORS
+.An -nosplit
+The
+.Nm
+program was written by
+.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv
+and ported to the SQLite3-based
+.Xr mandoc.db 5
+backend by
+.An Ingo Schwarze Aq Mt schwarze@openbsd.org .
diff --git a/man.h b/man.h
index ef9480f2768c..aa80b6732eae 100644
--- a/man.h
+++ b/man.h
@@ -1,6 +1,7 @@
-/* $Id: man.h,v 1.62 2013/10/17 20:54:58 schwarze Exp $ */
+/* $Id: man.h,v 1.65 2014/06/20 23:02:31 schwarze Exp $ */
/*
* Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ * Copyright (c) 2014 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -56,6 +57,7 @@ enum mant {
MAN_EE,
MAN_UR,
MAN_UE,
+ MAN_ll,
MAN_MAX
};
@@ -77,6 +79,7 @@ struct man_meta {
char *vol; /* `TH' volume */
char *title; /* `TH' title (e.g., FOO) */
char *source; /* `TH' source (e.g., GNU) */
+ int hasbody; /* document is not empty */
};
struct man_node {
@@ -111,6 +114,7 @@ struct man;
const struct man_node *man_node(const struct man *);
const struct man_meta *man_meta(const struct man *);
const struct mparse *man_mparse(const struct man *);
+void man_deroff(char **, const struct man_node *);
__END_DECLS
diff --git a/man_hash.c b/man_hash.c
index 86c5c40a199b..ab887226b5de 100644
--- a/man_hash.c
+++ b/man_hash.c
@@ -1,4 +1,4 @@
-/* $Id: man_hash.c,v 1.25 2011/07/24 18:15:14 kristaps Exp $ */
+/* $Id: man_hash.c,v 1.27 2014/04/20 16:46:04 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
*
@@ -23,7 +23,6 @@
#include <assert.h>
#include <ctype.h>
#include <limits.h>
-#include <stdlib.h>
#include <string.h>
#include "man.h"
@@ -49,6 +48,7 @@
*/
static unsigned char table[26 * HASH_DEPTH];
+
/*
* XXX - this hash has global scope, so if intended for use as a library
* with multiple callers, it will need re-invocation protection.
@@ -60,8 +60,7 @@ man_hash_init(void)
memset(table, UCHAR_MAX, sizeof(table));
- assert(/* LINTED */
- MAN_MAX < UCHAR_MAX);
+ assert(MAN_MAX < UCHAR_MAX);
for (i = 0; i < (int)MAN_MAX; i++) {
x = man_macronames[i][0];
@@ -80,7 +79,6 @@ man_hash_init(void)
}
}
-
enum mant
man_hash_find(const char *tmp)
{
diff --git a/man_html.c b/man_html.c
index 2c4e220a1181..9689cc261e5f 100644
--- a/man_html.c
+++ b/man_html.c
@@ -1,7 +1,7 @@
-/* $Id: man_html.c,v 1.90 2013/10/17 20:54:58 schwarze Exp $ */
+/* $Id: man_html.c,v 1.96 2014/08/01 19:25:52 schwarze Exp $ */
/*
* Copyright (c) 2008-2012 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2013 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2013, 2014 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -28,6 +28,7 @@
#include <string.h>
#include "mandoc.h"
+#include "mandoc_aux.h"
#include "out.h"
#include "html.h"
#include "man.h"
@@ -53,7 +54,7 @@ struct htmlman {
int (*post)(MAN_ARGS);
};
-static void print_bvspace(struct html *,
+static void print_bvspace(struct html *,
const struct man_node *);
static void print_man(MAN_ARGS);
static void print_man_head(MAN_ARGS);
@@ -90,7 +91,7 @@ static const struct htmlman mans[MAN_MAX] = {
{ man_PP_pre, NULL }, /* PP */
{ man_PP_pre, NULL }, /* P */
{ man_IP_pre, NULL }, /* IP */
- { man_HP_pre, NULL }, /* HP */
+ { man_HP_pre, NULL }, /* HP */
{ man_SM_pre, NULL }, /* SM */
{ man_SM_pre, NULL }, /* SB */
{ man_alt_pre, NULL }, /* BI */
@@ -119,8 +120,10 @@ static const struct htmlman mans[MAN_MAX] = {
{ man_literal_pre, NULL }, /* EE */
{ man_UR_pre, NULL }, /* UR */
{ NULL, NULL }, /* UE */
+ { man_ign_pre, NULL }, /* ll */
};
+
/*
* Printing leading vertical space before a block.
* This is used for the paragraph macros.
@@ -155,7 +158,7 @@ html_man(void *arg, const struct man *man)
}
static void
-print_man(MAN_ARGS)
+print_man(MAN_ARGS)
{
struct tag *t, *tt;
struct htmlpair tag;
@@ -170,15 +173,13 @@ print_man(MAN_ARGS)
print_tagq(h, tt);
print_otag(h, TAG_BODY, 0, NULL);
print_otag(h, TAG_DIV, 1, &tag);
- } else
+ } else
t = print_otag(h, TAG_DIV, 1, &tag);
print_man_nodelist(man, n, mh, h);
print_tagq(h, t);
}
-
-/* ARGSUSED */
static void
print_man_head(MAN_ARGS)
{
@@ -191,7 +192,6 @@ print_man_head(MAN_ARGS)
print_text(h, h->buf);
}
-
static void
print_man_nodelist(MAN_ARGS)
{
@@ -201,7 +201,6 @@ print_man_nodelist(MAN_ARGS)
print_man_nodelist(man, n->next, mh, h);
}
-
static void
print_man_node(MAN_ARGS)
{
@@ -212,10 +211,10 @@ print_man_node(MAN_ARGS)
t = h->tags.head;
switch (n->type) {
- case (MAN_ROOT):
+ case MAN_ROOT:
man_root_pre(man, n, mh, h);
break;
- case (MAN_TEXT):
+ case MAN_TEXT:
/*
* If we have a blank line, output a vertical space.
* If we have a space as the first character, break
@@ -233,10 +232,10 @@ print_man_node(MAN_ARGS)
print_text(h, n->string);
return;
- case (MAN_EQN):
+ case MAN_EQN:
print_eqn(h, n->eqn);
break;
- case (MAN_TBL):
+ case MAN_TBL:
/*
* This will take care of initialising all of the table
* state data for the first table, then tearing it down
@@ -245,7 +244,7 @@ print_man_node(MAN_ARGS)
print_tbl(h, n->span);
return;
default:
- /*
+ /*
* Close out scope of font prior to opening a macro
* scope.
*/
@@ -275,10 +274,10 @@ print_man_node(MAN_ARGS)
print_stagq(h, t);
switch (n->type) {
- case (MAN_ROOT):
+ case MAN_ROOT:
man_root_post(man, n, mh, h);
break;
- case (MAN_EQN):
+ case MAN_EQN:
break;
default:
if (mans[n->tok].post)
@@ -287,7 +286,6 @@ print_man_node(MAN_ARGS)
}
}
-
static int
a2width(const struct man_node *n, struct roffsu *su)
{
@@ -300,22 +298,16 @@ a2width(const struct man_node *n, struct roffsu *su)
return(0);
}
-
-/* ARGSUSED */
static void
man_root_pre(MAN_ARGS)
{
struct htmlpair tag[3];
struct tag *t, *tt;
- char b[BUFSIZ], title[BUFSIZ];
-
- b[0] = 0;
- if (man->vol)
- (void)strlcat(b, man->vol, BUFSIZ);
+ char *title;
assert(man->title);
assert(man->msec);
- snprintf(title, BUFSIZ - 1, "%s(%s)", man->title, man->msec);
+ mandoc_asprintf(&title, "%s(%s)", man->title, man->msec);
PAIR_SUMMARY_INIT(&tag[0], "Document Header");
PAIR_CLASS_INIT(&tag[1], "head");
@@ -338,7 +330,8 @@ man_root_pre(MAN_ARGS)
PAIR_CLASS_INIT(&tag[0], "head-vol");
PAIR_INIT(&tag[1], ATTR_ALIGN, "center");
print_otag(h, TAG_TD, 2, tag);
- print_text(h, b);
+ if (NULL != man->vol)
+ print_text(h, man->vol);
print_stagq(h, tt);
PAIR_CLASS_INIT(&tag[0], "head-rtitle");
@@ -346,10 +339,9 @@ man_root_pre(MAN_ARGS)
print_otag(h, TAG_TD, 2, tag);
print_text(h, title);
print_tagq(h, t);
+ free(title);
}
-
-/* ARGSUSED */
static void
man_root_post(MAN_ARGS)
{
@@ -383,7 +375,6 @@ man_root_post(MAN_ARGS)
}
-/* ARGSUSED */
static int
man_br_pre(MAN_ARGS)
{
@@ -397,7 +388,7 @@ man_br_pre(MAN_ARGS)
if ( ! a2roffsu(n->string, &su, SCALE_VS))
SCALE_VS_INIT(&su, atoi(n->string));
} else
- su.scale = 0;
+ su.scale = 0.0;
bufinit(h);
bufcat_su(h, "height", &su);
@@ -410,7 +401,6 @@ man_br_pre(MAN_ARGS)
return(0);
}
-/* ARGSUSED */
static int
man_SH_pre(MAN_ARGS)
{
@@ -428,7 +418,6 @@ man_SH_pre(MAN_ARGS)
return(1);
}
-/* ARGSUSED */
static int
man_alt_pre(MAN_ARGS)
{
@@ -437,7 +426,7 @@ man_alt_pre(MAN_ARGS)
enum htmltag fp;
struct tag *t;
- if ((savelit = mh->fl & MANH_LITERAL))
+ if ((savelit = mh->fl & MANH_LITERAL))
print_otag(h, TAG_BR, 0, NULL);
mh->fl &= ~MANH_LITERAL;
@@ -445,22 +434,22 @@ man_alt_pre(MAN_ARGS)
for (i = 0, nn = n->child; nn; nn = nn->next, i++) {
t = NULL;
switch (n->tok) {
- case (MAN_BI):
+ case MAN_BI:
fp = i % 2 ? TAG_I : TAG_B;
break;
- case (MAN_IB):
+ case MAN_IB:
fp = i % 2 ? TAG_B : TAG_I;
break;
- case (MAN_RI):
+ case MAN_RI:
fp = i % 2 ? TAG_I : TAG_MAX;
break;
- case (MAN_IR):
+ case MAN_IR:
fp = i % 2 ? TAG_MAX : TAG_I;
break;
- case (MAN_BR):
+ case MAN_BR:
fp = i % 2 ? TAG_MAX : TAG_B;
break;
- case (MAN_RB):
+ case MAN_RB:
fp = i % 2 ? TAG_B : TAG_MAX;
break;
default:
@@ -486,18 +475,16 @@ man_alt_pre(MAN_ARGS)
return(0);
}
-/* ARGSUSED */
static int
man_SM_pre(MAN_ARGS)
{
-
+
print_otag(h, TAG_SMALL, 0, NULL);
if (MAN_SB == n->tok)
print_otag(h, TAG_B, 0, NULL);
return(1);
}
-/* ARGSUSED */
static int
man_SS_pre(MAN_ARGS)
{
@@ -515,7 +502,6 @@ man_SS_pre(MAN_ARGS)
return(1);
}
-/* ARGSUSED */
static int
man_PP_pre(MAN_ARGS)
{
@@ -528,13 +514,12 @@ man_PP_pre(MAN_ARGS)
return(1);
}
-/* ARGSUSED */
static int
man_IP_pre(MAN_ARGS)
{
const struct man_node *nn;
- if (MAN_BODY == n->type) {
+ if (MAN_BODY == n->type) {
print_otag(h, TAG_DD, 0, NULL);
return(1);
} else if (MAN_HEAD != n->type) {
@@ -553,15 +538,19 @@ man_IP_pre(MAN_ARGS)
/* For TP, only print next-line header elements. */
- if (MAN_TP == n->tok)
- for (nn = n->child; nn; nn = nn->next)
- if (nn->line > n->line)
- print_man_node(man, nn, mh, h);
+ if (MAN_TP == n->tok) {
+ nn = n->child;
+ while (NULL != nn && 0 == (MAN_LINE & nn->flags))
+ nn = nn->next;
+ while (NULL != nn) {
+ print_man_node(man, nn, mh, h);
+ nn = nn->next;
+ }
+ }
return(0);
}
-/* ARGSUSED */
static int
man_HP_pre(MAN_ARGS)
{
@@ -590,7 +579,6 @@ man_HP_pre(MAN_ARGS)
return(1);
}
-/* ARGSUSED */
static int
man_OP_pre(MAN_ARGS)
{
@@ -620,8 +608,6 @@ man_OP_pre(MAN_ARGS)
return(0);
}
-
-/* ARGSUSED */
static int
man_B_pre(MAN_ARGS)
{
@@ -630,16 +616,14 @@ man_B_pre(MAN_ARGS)
return(1);
}
-/* ARGSUSED */
static int
man_I_pre(MAN_ARGS)
{
-
+
print_otag(h, TAG_I, 0, NULL);
return(1);
}
-/* ARGSUSED */
static int
man_literal_pre(MAN_ARGS)
{
@@ -653,7 +637,6 @@ man_literal_pre(MAN_ARGS)
return(0);
}
-/* ARGSUSED */
static int
man_in_pre(MAN_ARGS)
{
@@ -662,7 +645,6 @@ man_in_pre(MAN_ARGS)
return(0);
}
-/* ARGSUSED */
static int
man_ign_pre(MAN_ARGS)
{
@@ -670,7 +652,6 @@ man_ign_pre(MAN_ARGS)
return(0);
}
-/* ARGSUSED */
static int
man_RS_pre(MAN_ARGS)
{
@@ -693,7 +674,6 @@ man_RS_pre(MAN_ARGS)
return(1);
}
-/* ARGSUSED */
static int
man_UR_pre(MAN_ARGS)
{
diff --git a/man_macro.c b/man_macro.c
index 479d0484c41f..ea45a504db4c 100644
--- a/man_macro.c
+++ b/man_macro.c
@@ -1,4 +1,4 @@
-/* $Id: man_macro.c,v 1.79 2013/12/25 00:50:05 schwarze Exp $ */
+/* $Id: man_macro.c,v 1.87 2014/07/30 23:01:39 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2012, 2013 Ingo Schwarze <schwarze@openbsd.org>
@@ -40,17 +40,15 @@ static int blk_close(MACRO_PROT_ARGS);
static int blk_exp(MACRO_PROT_ARGS);
static int blk_imp(MACRO_PROT_ARGS);
static int in_line_eoln(MACRO_PROT_ARGS);
-static int man_args(struct man *, int,
+static int man_args(struct man *, int,
int *, char *, char **);
-static int rew_scope(enum man_type,
+static int rew_scope(enum man_type,
struct man *, enum mant);
-static enum rew rew_dohalt(enum mant, enum man_type,
+static enum rew rew_dohalt(enum mant, enum man_type,
const struct man_node *);
-static enum rew rew_block(enum mant, enum man_type,
+static enum rew rew_block(enum mant, enum man_type,
const struct man_node *);
-static void rew_warn(struct man *,
- struct man_node *, enum mandocerr);
const struct man_macro __man_macros[MAN_MAX] = {
{ in_line_eoln, MAN_NSCOPED }, /* br */
@@ -91,85 +89,80 @@ const struct man_macro __man_macros[MAN_MAX] = {
{ in_line_eoln, MAN_BSCOPE }, /* EE */
{ blk_exp, MAN_BSCOPE | MAN_EXPLICIT }, /* UR */
{ blk_close, 0 }, /* UE */
+ { in_line_eoln, 0 }, /* ll */
};
const struct man_macro * const man_macros = __man_macros;
-/*
- * Warn when "n" is an explicit non-roff macro.
- */
-static void
-rew_warn(struct man *man, struct man_node *n, enum mandocerr er)
-{
-
- if (er == MANDOCERR_MAX || MAN_BLOCK != n->type)
- return;
- if (MAN_VALID & n->flags)
- return;
- if ( ! (MAN_EXPLICIT & man_macros[n->tok].flags))
- return;
-
- assert(er < MANDOCERR_FATAL);
- man_nmsg(man, n, er);
-}
-
-
-/*
- * Rewind scope. If a code "er" != MANDOCERR_MAX has been provided, it
- * will be used if an explicit block scope is being closed out.
- */
int
-man_unscope(struct man *man, const struct man_node *to,
- enum mandocerr er)
+man_unscope(struct man *man, const struct man_node *to)
{
struct man_node *n;
- assert(to);
-
man->next = MAN_NEXT_SIBLING;
+ to = to->parent;
+ n = man->last;
+ while (n != to) {
+
+ /* Reached the end of the document? */
+
+ if (to == NULL && ! (n->flags & MAN_VALID)) {
+ if (man->flags & (MAN_BLINE | MAN_ELINE) &&
+ man_macros[n->tok].flags & MAN_SCOPED) {
+ mandoc_vmsg(MANDOCERR_BLK_LINE,
+ man->parse, n->line, n->pos,
+ "EOF breaks %s",
+ man_macronames[n->tok]);
+ if (man->flags & MAN_ELINE)
+ man->flags &= ~MAN_ELINE;
+ else {
+ assert(n->type == MAN_HEAD);
+ n = n->parent;
+ man->flags &= ~MAN_BLINE;
+ }
+ man->last = n;
+ n = n->parent;
+ man_node_delete(man, man->last);
+ continue;
+ }
+ if (n->type == MAN_BLOCK &&
+ man_macros[n->tok].flags & MAN_EXPLICIT)
+ mandoc_msg(MANDOCERR_BLK_NOEND,
+ man->parse, n->line, n->pos,
+ man_macronames[n->tok]);
+ }
- /* LINTED */
- while (man->last != to) {
/*
- * Save the parent here, because we may delete the
- * man->last node in the post-validation phase and reset
- * it to man->last->parent, causing a step in the closing
- * out to be lost.
+ * We might delete the man->last node
+ * in the post-validation phase.
+ * Save a pointer to the parent such that
+ * we know where to continue the iteration.
*/
- n = man->last->parent;
- rew_warn(man, man->last, er);
+ man->last = n;
+ n = n->parent;
if ( ! man_valid_post(man))
return(0);
- man->last = n;
- assert(man->last);
}
-
- rew_warn(man, man->last, er);
- if ( ! man_valid_post(man))
- return(0);
-
return(1);
}
-
static enum rew
rew_block(enum mant ntok, enum man_type type, const struct man_node *n)
{
- if (MAN_BLOCK == type && ntok == n->parent->tok &&
- MAN_BODY == n->parent->type)
+ if (MAN_BLOCK == type && ntok == n->parent->tok &&
+ MAN_BODY == n->parent->type)
return(REW_REWIND);
return(ntok == n->tok ? REW_HALT : REW_NOHALT);
}
-
/*
* There are three scope levels: scoped to the root (all), scoped to the
* section (all less sections), and scoped to subsections (all less
* sections and subsections).
*/
-static enum rew
+static enum rew
rew_dohalt(enum mant tok, enum man_type type, const struct man_node *n)
{
enum rew c;
@@ -196,20 +189,20 @@ rew_dohalt(enum mant tok, enum man_type type, const struct man_node *n)
return(REW_REWIND);
}
- /*
+ /*
* Next follow the implicit scope-smashings as defined by man.7:
* section, sub-section, etc.
*/
switch (tok) {
- case (MAN_SH):
+ case MAN_SH:
break;
- case (MAN_SS):
+ case MAN_SS:
/* Rewind to a section, if a block. */
if (REW_NOHALT != (c = rew_block(MAN_SH, type, n)))
return(c);
break;
- case (MAN_RS):
+ case MAN_RS:
/* Preserve empty paragraphs before RS. */
if (0 == n->nchild && (MAN_P == n->tok ||
MAN_PP == n->tok || MAN_LP == n->tok))
@@ -237,7 +230,6 @@ rew_dohalt(enum mant tok, enum man_type type, const struct man_node *n)
return(REW_NOHALT);
}
-
/*
* Rewinding entails ascending the parse tree until a coherent point,
* for example, the `SH' macro will close out any intervening `SS'
@@ -249,9 +241,8 @@ rew_scope(enum man_type type, struct man *man, enum mant tok)
struct man_node *n;
enum rew c;
- /* LINTED */
for (n = man->last; n; n = n->parent) {
- /*
+ /*
* Whether we should stop immediately (REW_HALT), stop
* and rewind until this point (REW_REWIND), or keep
* rewinding (REW_NOHALT).
@@ -263,31 +254,30 @@ rew_scope(enum man_type type, struct man *man, enum mant tok)
break;
}
- /*
+ /*
* Rewind until the current point. Warn if we're a roff
* instruction that's mowing over explicit scopes.
*/
assert(n);
- return(man_unscope(man, n, MANDOCERR_MAX));
+ return(man_unscope(man, n));
}
/*
* Close out a generic explicit macro.
*/
-/* ARGSUSED */
int
blk_close(MACRO_PROT_ARGS)
{
- enum mant ntok;
+ enum mant ntok;
const struct man_node *nn;
switch (tok) {
- case (MAN_RE):
+ case MAN_RE:
ntok = MAN_RS;
break;
- case (MAN_UE):
+ case MAN_UE:
ntok = MAN_UR;
break;
default:
@@ -300,17 +290,16 @@ blk_close(MACRO_PROT_ARGS)
break;
if (NULL == nn) {
- man_pmsg(man, line, ppos, MANDOCERR_NOSCOPE);
+ mandoc_msg(MANDOCERR_BLK_NOTOPEN, man->parse,
+ line, ppos, man_macronames[tok]);
if ( ! rew_scope(MAN_BLOCK, man, MAN_PP))
return(0);
- } else
- man_unscope(man, nn, MANDOCERR_MAX);
+ } else
+ man_unscope(man, nn);
return(1);
}
-
-/* ARGSUSED */
int
blk_exp(MACRO_PROT_ARGS)
{
@@ -343,22 +332,19 @@ blk_exp(MACRO_PROT_ARGS)
if (n->tok != tok)
continue;
assert(MAN_HEAD == n->type);
- man_unscope(man, n, MANDOCERR_MAX);
+ man_unscope(man, n);
break;
}
return(man_body_alloc(man, line, ppos, tok));
}
-
-
/*
* Parse an implicit-block macro. These contain a MAN_HEAD and a
* MAN_BODY contained within a MAN_BLOCK. Rules for closing out other
* scopes, such as `SH' closing out an `SS', are defined in the rew
* routines.
*/
-/* ARGSUSED */
int
blk_imp(MACRO_PROT_ARGS)
{
@@ -410,8 +396,6 @@ blk_imp(MACRO_PROT_ARGS)
return(man_body_alloc(man, line, ppos, tok));
}
-
-/* ARGSUSED */
int
in_line_eoln(MACRO_PROT_ARGS)
{
@@ -438,7 +422,7 @@ in_line_eoln(MACRO_PROT_ARGS)
*/
if (n != man->last &&
- mandoc_eos(man->last->string, strlen(man->last->string), 0))
+ mandoc_eos(man->last->string, strlen(man->last->string)))
man->last->flags |= MAN_EOS;
/*
@@ -451,18 +435,11 @@ in_line_eoln(MACRO_PROT_ARGS)
assert( ! (MAN_NSCOPED & man_macros[tok].flags));
man->flags |= MAN_ELINE;
return(1);
- }
-
- /* Set ignorable context, if applicable. */
-
- if (MAN_NSCOPED & man_macros[tok].flags) {
- assert( ! (MAN_SCOPED & man_macros[tok].flags));
- man->flags |= MAN_ILINE;
}
assert(MAN_ROOT != man->last->type);
man->next = MAN_NEXT_SIBLING;
-
+
/*
* Rewind our element scope. Note that when TH is pruned, we'll
* be back at the root, so make sure that we don't clobber as
@@ -481,7 +458,7 @@ in_line_eoln(MACRO_PROT_ARGS)
assert(man->last);
/*
- * Same here regarding whether we're back at the root.
+ * Same here regarding whether we're back at the root.
*/
if (man->last->type != MAN_ROOT && ! man_valid_post(man))
@@ -495,7 +472,7 @@ int
man_macroend(struct man *man)
{
- return(man_unscope(man, man->first, MANDOCERR_SCOPEEXIT));
+ return(man_unscope(man, man->first));
}
static int
diff --git a/man_term.c b/man_term.c
index 4bd62443b430..c91c0746201a 100644
--- a/man_term.c
+++ b/man_term.c
@@ -1,7 +1,7 @@
-/* $Id: man_term.c,v 1.139 2013/12/22 23:34:13 schwarze Exp $ */
+/* $Id: man_term.c,v 1.149 2014/06/20 23:02:31 schwarze Exp $ */
/*
* Copyright (c) 2008-2012 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2010, 2011, 2012, 2013 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -28,6 +28,7 @@
#include <string.h>
#include "mandoc.h"
+#include "mandoc_aux.h"
#include "out.h"
#include "man.h"
#include "term.h"
@@ -45,7 +46,7 @@ struct mtermp {
int pardist; /* vert. space before par., unit: [v] */
};
-#define DECL_ARGS struct termp *p, \
+#define DECL_ARGS struct termp *p, \
struct mtermp *mt, \
const struct man_node *n, \
const struct man_meta *meta
@@ -64,7 +65,7 @@ static void print_man_nodelist(DECL_ARGS);
static void print_man_node(DECL_ARGS);
static void print_man_head(struct termp *, const void *);
static void print_man_foot(struct termp *, const void *);
-static void print_bvspace(struct termp *,
+static void print_bvspace(struct termp *,
const struct man_node *, int);
static int pre_B(DECL_ARGS);
@@ -84,6 +85,7 @@ static int pre_ft(DECL_ARGS);
static int pre_ign(DECL_ARGS);
static int pre_in(DECL_ARGS);
static int pre_literal(DECL_ARGS);
+static int pre_ll(DECL_ARGS);
static int pre_sp(DECL_ARGS);
static void post_IP(DECL_ARGS);
@@ -104,7 +106,7 @@ static const struct termact termacts[MAN_MAX] = {
{ pre_PP, NULL, 0 }, /* PP */
{ pre_PP, NULL, 0 }, /* P */
{ pre_IP, post_IP, 0 }, /* IP */
- { pre_HP, post_HP, 0 }, /* HP */
+ { pre_HP, post_HP, 0 }, /* HP */
{ NULL, NULL, 0 }, /* SM */
{ pre_B, NULL, 0 }, /* SB */
{ pre_alternate, NULL, 0 }, /* BI */
@@ -133,10 +135,10 @@ static const struct termact termacts[MAN_MAX] = {
{ pre_literal, NULL, 0 }, /* EE */
{ pre_UR, post_UR, 0 }, /* UR */
{ NULL, NULL, 0 }, /* UE */
+ { pre_ll, NULL, MAN_NOTEXT }, /* ll */
};
-
void
terminal_man(void *arg, const struct man *man)
{
@@ -187,7 +189,6 @@ a2height(const struct termp *p, const char *cp)
return(term_vspan(p, &su));
}
-
static int
a2width(const struct termp *p, const char *cp)
{
@@ -226,7 +227,7 @@ print_bvspace(struct termp *p, const struct man_node *n, int pardist)
term_vspace(p);
}
-/* ARGSUSED */
+
static int
pre_ign(DECL_ARGS)
{
@@ -234,8 +235,14 @@ pre_ign(DECL_ARGS)
return(0);
}
+static int
+pre_ll(DECL_ARGS)
+{
+
+ term_setwidth(p, n->nchild ? n->child->string : NULL);
+ return(0);
+}
-/* ARGSUSED */
static int
pre_I(DECL_ARGS)
{
@@ -244,8 +251,6 @@ pre_I(DECL_ARGS)
return(1);
}
-
-/* ARGSUSED */
static int
pre_literal(DECL_ARGS)
{
@@ -266,14 +271,13 @@ pre_literal(DECL_ARGS)
p->offset = p->rmargin;
p->rmargin = p->maxrmargin;
p->trailspace = 0;
- p->flags &= ~TERMP_NOBREAK;
+ p->flags &= ~(TERMP_NOBREAK | TERMP_BRIND);
p->flags |= TERMP_NOSPACE;
}
return(0);
}
-/* ARGSUSED */
static int
pre_PD(DECL_ARGS)
{
@@ -288,7 +292,6 @@ pre_PD(DECL_ARGS)
return(0);
}
-/* ARGSUSED */
static int
pre_alternate(DECL_ARGS)
{
@@ -297,27 +300,27 @@ pre_alternate(DECL_ARGS)
int savelit, i;
switch (n->tok) {
- case (MAN_RB):
+ case MAN_RB:
font[0] = TERMFONT_NONE;
font[1] = TERMFONT_BOLD;
break;
- case (MAN_RI):
+ case MAN_RI:
font[0] = TERMFONT_NONE;
font[1] = TERMFONT_UNDER;
break;
- case (MAN_BR):
+ case MAN_BR:
font[0] = TERMFONT_BOLD;
font[1] = TERMFONT_NONE;
break;
- case (MAN_BI):
+ case MAN_BI:
font[0] = TERMFONT_BOLD;
font[1] = TERMFONT_UNDER;
break;
- case (MAN_IR):
+ case MAN_IR:
font[0] = TERMFONT_UNDER;
font[1] = TERMFONT_NONE;
break;
- case (MAN_IB):
+ case MAN_IB:
font[0] = TERMFONT_UNDER;
font[1] = TERMFONT_BOLD;
break;
@@ -340,7 +343,6 @@ pre_alternate(DECL_ARGS)
return(0);
}
-/* ARGSUSED */
static int
pre_B(DECL_ARGS)
{
@@ -349,7 +351,6 @@ pre_B(DECL_ARGS)
return(1);
}
-/* ARGSUSED */
static int
pre_OP(DECL_ARGS)
{
@@ -372,7 +373,6 @@ pre_OP(DECL_ARGS)
return(0);
}
-/* ARGSUSED */
static int
pre_ft(DECL_ARGS)
{
@@ -385,26 +385,26 @@ pre_ft(DECL_ARGS)
cp = n->child->string;
switch (*cp) {
- case ('4'):
+ case '4':
/* FALLTHROUGH */
- case ('3'):
+ case '3':
/* FALLTHROUGH */
- case ('B'):
+ case 'B':
term_fontrepl(p, TERMFONT_BOLD);
break;
- case ('2'):
+ case '2':
/* FALLTHROUGH */
- case ('I'):
+ case 'I':
term_fontrepl(p, TERMFONT_UNDER);
break;
- case ('P'):
+ case 'P':
term_fontlast(p);
break;
- case ('1'):
+ case '1':
/* FALLTHROUGH */
- case ('C'):
+ case 'C':
/* FALLTHROUGH */
- case ('R'):
+ case 'R':
term_fontrepl(p, TERMFONT_NONE);
break;
default:
@@ -413,7 +413,6 @@ pre_ft(DECL_ARGS)
return(0);
}
-/* ARGSUSED */
static int
pre_in(DECL_ARGS)
{
@@ -447,7 +446,7 @@ pre_in(DECL_ARGS)
p->offset -= p->offset > v ? v : p->offset;
else if (less > 0)
p->offset += v;
- else
+ else
p->offset = v;
/* Don't let this creep beyond the right margin. */
@@ -458,8 +457,6 @@ pre_in(DECL_ARGS)
return(0);
}
-
-/* ARGSUSED */
static int
pre_sp(DECL_ARGS)
{
@@ -469,15 +466,15 @@ pre_sp(DECL_ARGS)
if ((NULL == n->prev && n->parent)) {
switch (n->parent->tok) {
- case (MAN_SH):
+ case MAN_SH:
/* FALLTHROUGH */
- case (MAN_SS):
+ case MAN_SS:
/* FALLTHROUGH */
- case (MAN_PP):
+ case MAN_PP:
/* FALLTHROUGH */
- case (MAN_LP):
+ case MAN_LP:
/* FALLTHROUGH */
- case (MAN_P):
+ case MAN_P:
/* FALLTHROUGH */
return(0);
default:
@@ -487,7 +484,7 @@ pre_sp(DECL_ARGS)
neg = 0;
switch (n->tok) {
- case (MAN_br):
+ case MAN_br:
len = 0;
break;
default:
@@ -515,8 +512,6 @@ pre_sp(DECL_ARGS)
return(0);
}
-
-/* ARGSUSED */
static int
pre_HP(DECL_ARGS)
{
@@ -525,17 +520,17 @@ pre_HP(DECL_ARGS)
const struct man_node *nn;
switch (n->type) {
- case (MAN_BLOCK):
+ case MAN_BLOCK:
print_bvspace(p, n, mt->pardist);
return(1);
- case (MAN_BODY):
+ case MAN_BODY:
break;
default:
return(0);
}
if ( ! (MANT_LITERAL & mt->fl)) {
- p->flags |= TERMP_NOBREAK;
+ p->flags |= TERMP_NOBREAK | TERMP_BRIND;
p->trailspace = 2;
}
@@ -561,16 +556,14 @@ pre_HP(DECL_ARGS)
return(1);
}
-
-/* ARGSUSED */
static void
post_HP(DECL_ARGS)
{
switch (n->type) {
- case (MAN_BODY):
+ case MAN_BODY:
term_newln(p);
- p->flags &= ~TERMP_NOBREAK;
+ p->flags &= ~(TERMP_NOBREAK | TERMP_BRIND);
p->trailspace = 0;
p->offset = mt->offset;
p->rmargin = p->maxrmargin;
@@ -580,14 +573,12 @@ post_HP(DECL_ARGS)
}
}
-
-/* ARGSUSED */
static int
pre_PP(DECL_ARGS)
{
switch (n->type) {
- case (MAN_BLOCK):
+ case MAN_BLOCK:
mt->lmargin[mt->lmargincur] = term_len(p, p->defindent);
print_bvspace(p, n, mt->pardist);
break;
@@ -599,8 +590,6 @@ pre_PP(DECL_ARGS)
return(MAN_HEAD != n->type);
}
-
-/* ARGSUSED */
static int
pre_IP(DECL_ARGS)
{
@@ -609,14 +598,14 @@ pre_IP(DECL_ARGS)
int savelit, ival;
switch (n->type) {
- case (MAN_BODY):
+ case MAN_BODY:
p->flags |= TERMP_NOSPACE;
break;
- case (MAN_HEAD):
+ case MAN_HEAD:
p->flags |= TERMP_NOBREAK;
p->trailspace = 1;
break;
- case (MAN_BLOCK):
+ case MAN_BLOCK:
print_bvspace(p, n, mt->pardist);
/* FALLTHROUGH */
default:
@@ -633,7 +622,7 @@ pre_IP(DECL_ARGS)
len = (size_t)ival;
switch (n->type) {
- case (MAN_HEAD):
+ case MAN_HEAD:
/* Handle zero-width lengths. */
if (0 == len)
len = term_len(p, 1);
@@ -656,9 +645,10 @@ pre_IP(DECL_ARGS)
mt->fl |= MANT_LITERAL;
return(0);
- case (MAN_BODY):
+ case MAN_BODY:
p->offset = mt->offset + len;
- p->rmargin = p->maxrmargin;
+ p->rmargin = p->maxrmargin > p->offset ?
+ p->maxrmargin : p->offset;
break;
default:
break;
@@ -667,20 +657,18 @@ pre_IP(DECL_ARGS)
return(1);
}
-
-/* ARGSUSED */
static void
post_IP(DECL_ARGS)
{
switch (n->type) {
- case (MAN_HEAD):
+ case MAN_HEAD:
term_flushln(p);
p->flags &= ~TERMP_NOBREAK;
p->trailspace = 0;
p->rmargin = p->maxrmargin;
break;
- case (MAN_BODY):
+ case MAN_BODY:
term_newln(p);
p->offset = mt->offset;
break;
@@ -689,8 +677,6 @@ post_IP(DECL_ARGS)
}
}
-
-/* ARGSUSED */
static int
pre_TP(DECL_ARGS)
{
@@ -699,14 +685,14 @@ pre_TP(DECL_ARGS)
int savelit, ival;
switch (n->type) {
- case (MAN_HEAD):
+ case MAN_HEAD:
p->flags |= TERMP_NOBREAK;
p->trailspace = 1;
break;
- case (MAN_BODY):
+ case MAN_BODY:
p->flags |= TERMP_NOSPACE;
break;
- case (MAN_BLOCK):
+ case MAN_BLOCK:
print_bvspace(p, n, mt->pardist);
/* FALLTHROUGH */
default:
@@ -719,12 +705,12 @@ pre_TP(DECL_ARGS)
/* Calculate offset. */
if (NULL != (nn = n->parent->head->child))
- if (nn->string && nn->parent->line == nn->line)
+ if (nn->string && 0 == (MAN_LINE & nn->flags))
if ((ival = a2width(p, nn->string)) >= 0)
len = (size_t)ival;
switch (n->type) {
- case (MAN_HEAD):
+ case MAN_HEAD:
/* Handle zero-length properly. */
if (0 == len)
len = term_len(p, 1);
@@ -736,9 +722,14 @@ pre_TP(DECL_ARGS)
mt->fl &= ~MANT_LITERAL;
/* Don't print same-line elements. */
- for (nn = n->child; nn; nn = nn->next)
- if (nn->line > n->line)
- print_man_node(p, mt, nn, meta);
+ nn = n->child;
+ while (NULL != nn && 0 == (MAN_LINE & nn->flags))
+ nn = nn->next;
+
+ while (NULL != nn) {
+ print_man_node(p, mt, nn, meta);
+ nn = nn->next;
+ }
if (savelit)
mt->fl |= MANT_LITERAL;
@@ -746,9 +737,10 @@ pre_TP(DECL_ARGS)
mt->lmargin[mt->lmargincur] = (size_t)ival;
return(0);
- case (MAN_BODY):
+ case MAN_BODY:
p->offset = mt->offset + len;
- p->rmargin = p->maxrmargin;
+ p->rmargin = p->maxrmargin > p->offset ?
+ p->maxrmargin : p->offset;
p->trailspace = 0;
p->flags &= ~TERMP_NOBREAK;
break;
@@ -759,17 +751,15 @@ pre_TP(DECL_ARGS)
return(1);
}
-
-/* ARGSUSED */
static void
post_TP(DECL_ARGS)
{
switch (n->type) {
- case (MAN_HEAD):
+ case MAN_HEAD:
term_flushln(p);
break;
- case (MAN_BODY):
+ case MAN_BODY:
term_newln(p);
p->offset = mt->offset;
break;
@@ -778,15 +768,13 @@ post_TP(DECL_ARGS)
}
}
-
-/* ARGSUSED */
static int
pre_SS(DECL_ARGS)
{
int i;
switch (n->type) {
- case (MAN_BLOCK):
+ case MAN_BLOCK:
mt->fl &= ~MANT_LITERAL;
mt->lmargin[mt->lmargincur] = term_len(p, p->defindent);
mt->offset = term_len(p, p->defindent);
@@ -799,11 +787,11 @@ pre_SS(DECL_ARGS)
for (i = 0; i < mt->pardist; i++)
term_vspace(p);
break;
- case (MAN_HEAD):
+ case MAN_HEAD:
term_fontrepl(p, TERMFONT_BOLD);
p->offset = term_len(p, 3);
break;
- case (MAN_BODY):
+ case MAN_BODY:
p->offset = mt->offset;
break;
default:
@@ -813,17 +801,15 @@ pre_SS(DECL_ARGS)
return(1);
}
-
-/* ARGSUSED */
static void
post_SS(DECL_ARGS)
{
-
+
switch (n->type) {
- case (MAN_HEAD):
+ case MAN_HEAD:
term_newln(p);
break;
- case (MAN_BODY):
+ case MAN_BODY:
term_newln(p);
break;
default:
@@ -831,15 +817,13 @@ post_SS(DECL_ARGS)
}
}
-
-/* ARGSUSED */
static int
pre_SH(DECL_ARGS)
{
int i;
switch (n->type) {
- case (MAN_BLOCK):
+ case MAN_BLOCK:
mt->fl &= ~MANT_LITERAL;
mt->lmargin[mt->lmargincur] = term_len(p, p->defindent);
mt->offset = term_len(p, p->defindent);
@@ -853,11 +837,11 @@ pre_SH(DECL_ARGS)
for (i = 0; i < mt->pardist; i++)
term_vspace(p);
break;
- case (MAN_HEAD):
+ case MAN_HEAD:
term_fontrepl(p, TERMFONT_BOLD);
p->offset = 0;
break;
- case (MAN_BODY):
+ case MAN_BODY:
p->offset = mt->offset;
break;
default:
@@ -867,17 +851,15 @@ pre_SH(DECL_ARGS)
return(1);
}
-
-/* ARGSUSED */
static void
post_SH(DECL_ARGS)
{
-
+
switch (n->type) {
- case (MAN_HEAD):
+ case MAN_HEAD:
term_newln(p);
break;
- case (MAN_BODY):
+ case MAN_BODY:
term_newln(p);
break;
default:
@@ -885,7 +867,6 @@ post_SH(DECL_ARGS)
}
}
-/* ARGSUSED */
static int
pre_RS(DECL_ARGS)
{
@@ -893,10 +874,10 @@ pre_RS(DECL_ARGS)
size_t sz;
switch (n->type) {
- case (MAN_BLOCK):
+ case MAN_BLOCK:
term_newln(p);
return(1);
- case (MAN_HEAD):
+ case MAN_HEAD:
return(0);
default:
break;
@@ -905,12 +886,13 @@ pre_RS(DECL_ARGS)
sz = term_len(p, p->defindent);
if (NULL != (n = n->parent->head->child))
- if ((ival = a2width(p, n->string)) >= 0)
+ if ((ival = a2width(p, n->string)) >= 0)
sz = (size_t)ival;
mt->offset += sz;
- p->rmargin = p->maxrmargin;
- p->offset = mt->offset < p->rmargin ? mt->offset : p->rmargin;
+ p->offset = mt->offset;
+ p->rmargin = p->maxrmargin > p->offset ?
+ p->maxrmargin : p->offset;
if (++mt->lmarginsz < MAXMARGINS)
mt->lmargincur = mt->lmarginsz;
@@ -919,7 +901,6 @@ pre_RS(DECL_ARGS)
return(1);
}
-/* ARGSUSED */
static void
post_RS(DECL_ARGS)
{
@@ -927,9 +908,9 @@ post_RS(DECL_ARGS)
size_t sz;
switch (n->type) {
- case (MAN_BLOCK):
+ case MAN_BLOCK:
return;
- case (MAN_HEAD):
+ case MAN_HEAD:
return;
default:
term_newln(p);
@@ -938,8 +919,8 @@ post_RS(DECL_ARGS)
sz = term_len(p, p->defindent);
- if (NULL != (n = n->parent->head->child))
- if ((ival = a2width(p, n->string)) >= 0)
+ if (NULL != (n = n->parent->head->child))
+ if ((ival = a2width(p, n->string)) >= 0)
sz = (size_t)ival;
mt->offset = mt->offset < sz ? 0 : mt->offset - sz;
@@ -949,7 +930,6 @@ post_RS(DECL_ARGS)
mt->lmargincur = mt->lmarginsz;
}
-/* ARGSUSED */
static int
pre_UR(DECL_ARGS)
{
@@ -957,7 +937,6 @@ pre_UR(DECL_ARGS)
return (MAN_HEAD != n->type);
}
-/* ARGSUSED */
static void
post_UR(DECL_ARGS)
{
@@ -982,7 +961,7 @@ print_man_node(DECL_ARGS)
int c;
switch (n->type) {
- case(MAN_TEXT):
+ case MAN_TEXT:
/*
* If we have a blank line, output a vertical space.
* If we have a space as the first character, break
@@ -997,15 +976,15 @@ print_man_node(DECL_ARGS)
term_word(p, n->string);
goto out;
- case (MAN_EQN):
+ case MAN_EQN:
term_eqn(p, n->eqn);
return;
- case (MAN_TBL):
+ case MAN_TBL:
/*
* Tables are preceded by a newline. Then process a
* table line, which will cause line termination,
*/
- if (TBL_SPAN_FIRST & n->span->flags)
+ if (TBL_SPAN_FIRST & n->span->flags)
term_newln(p);
term_tbl(p, n->span);
return;
@@ -1037,7 +1016,7 @@ out:
* more specific than this.
*/
if (MANT_LITERAL & mt->fl && ! (TERMP_NOBREAK & p->flags) &&
- (NULL == n->next || n->next->line > n->line)) {
+ (NULL == n->next || MAN_LINE & n->next->flags)) {
rm = p->rmargin;
rmax = p->maxrmargin;
p->rmargin = p->maxrmargin = TERM_MAXMARGIN;
@@ -1068,13 +1047,12 @@ print_man_nodelist(DECL_ARGS)
print_man_nodelist(p, mt, n->next, meta);
}
-
static void
print_man_foot(struct termp *p, const void *arg)
{
- char title[BUFSIZ];
- size_t datelen;
- const struct man_meta *meta;
+ const struct man_meta *meta;
+ char *title;
+ size_t datelen;
meta = (const struct man_meta *)arg;
assert(meta->title);
@@ -1083,7 +1061,8 @@ print_man_foot(struct termp *p, const void *arg)
term_fontrepl(p, TERMFONT_NONE);
- term_vspace(p);
+ if (meta->hasbody)
+ term_vspace(p);
/*
* Temporary, undocumented option to imitate mdoc(7) output.
@@ -1092,13 +1071,16 @@ print_man_foot(struct termp *p, const void *arg)
*/
if ( ! p->mdocstyle) {
- term_vspace(p);
- term_vspace(p);
- snprintf(title, BUFSIZ, "%s(%s)", meta->title, meta->msec);
+ if (meta->hasbody) {
+ term_vspace(p);
+ term_vspace(p);
+ }
+ mandoc_asprintf(&title, "%s(%s)",
+ meta->title, meta->msec);
} else if (meta->source) {
- strlcpy(title, meta->source, BUFSIZ);
+ title = mandoc_strdup(meta->source);
} else {
- title[0] = '\0';
+ title = mandoc_strdup("");
}
datelen = term_strlen(p, meta->date);
@@ -1134,38 +1116,35 @@ print_man_foot(struct termp *p, const void *arg)
term_word(p, title);
term_flushln(p);
+ free(title);
}
-
static void
print_man_head(struct termp *p, const void *arg)
{
- char buf[BUFSIZ], title[BUFSIZ];
- size_t buflen, titlen;
- const struct man_meta *meta;
+ const struct man_meta *meta;
+ const char *volume;
+ char *title;
+ size_t vollen, titlen;
meta = (const struct man_meta *)arg;
assert(meta->title);
assert(meta->msec);
- if (meta->vol)
- strlcpy(buf, meta->vol, BUFSIZ);
- else
- buf[0] = '\0';
- buflen = term_strlen(p, buf);
+ volume = NULL == meta->vol ? "" : meta->vol;
+ vollen = term_strlen(p, volume);
/* Top left corner: manual title and section. */
- snprintf(title, BUFSIZ, "%s(%s)", meta->title, meta->msec);
+ mandoc_asprintf(&title, "%s(%s)", meta->title, meta->msec);
titlen = term_strlen(p, title);
p->flags |= TERMP_NOBREAK | TERMP_NOSPACE;
p->trailspace = 1;
p->offset = 0;
- p->rmargin = 2 * (titlen+1) + buflen < p->maxrmargin ?
- (p->maxrmargin -
- term_strlen(p, buf) + term_len(p, 1)) / 2 :
- p->maxrmargin - buflen;
+ p->rmargin = 2 * (titlen+1) + vollen < p->maxrmargin ?
+ (p->maxrmargin - vollen + term_len(p, 1)) / 2 :
+ p->maxrmargin - vollen;
term_word(p, title);
term_flushln(p);
@@ -1174,10 +1153,10 @@ print_man_head(struct termp *p, const void *arg)
p->flags |= TERMP_NOSPACE;
p->offset = p->rmargin;
- p->rmargin = p->offset + buflen + titlen < p->maxrmargin ?
+ p->rmargin = p->offset + vollen + titlen < p->maxrmargin ?
p->maxrmargin - titlen : p->maxrmargin;
- term_word(p, buf);
+ term_word(p, volume);
term_flushln(p);
/* Top right corner: title and section, again. */
@@ -1196,7 +1175,7 @@ print_man_head(struct termp *p, const void *arg)
p->offset = 0;
p->rmargin = p->maxrmargin;
- /*
+ /*
* Groff prints three blank lines before the content.
* Do the same, except in the temporary, undocumented
* mode imitating mdoc(7) output.
@@ -1207,4 +1186,5 @@ print_man_head(struct termp *p, const void *arg)
term_vspace(p);
term_vspace(p);
}
+ free(title);
}
diff --git a/man_validate.c b/man_validate.c
index da2e557ebb5e..c17eb9ecdac6 100644
--- a/man_validate.c
+++ b/man_validate.c
@@ -1,7 +1,7 @@
-/* $Id: man_validate.c,v 1.86 2013/10/17 20:54:58 schwarze Exp $ */
+/* $Id: man_validate.c,v 1.105 2014/08/06 15:09:05 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2010, 2012, 2013 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2010, 2012, 2013, 2014 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -32,6 +32,7 @@
#include "man.h"
#include "mandoc.h"
+#include "mandoc_aux.h"
#include "libman.h"
#include "libmandoc.h"
@@ -39,21 +40,14 @@
typedef int (*v_check)(CHKARGS);
-struct man_valid {
- v_check *pres;
- v_check *posts;
-};
-
static int check_eq0(CHKARGS);
static int check_eq2(CHKARGS);
static int check_le1(CHKARGS);
-static int check_ge2(CHKARGS);
static int check_le5(CHKARGS);
-static int check_head1(CHKARGS);
static int check_par(CHKARGS);
static int check_part(CHKARGS);
static int check_root(CHKARGS);
-static void check_text(CHKARGS);
+static int check_text(CHKARGS);
static int post_AT(CHKARGS);
static int post_IP(CHKARGS);
@@ -61,174 +55,122 @@ static int post_vs(CHKARGS);
static int post_fi(CHKARGS);
static int post_ft(CHKARGS);
static int post_nf(CHKARGS);
-static int post_sec(CHKARGS);
static int post_TH(CHKARGS);
static int post_UC(CHKARGS);
-static int pre_sec(CHKARGS);
-
-static v_check posts_at[] = { post_AT, NULL };
-static v_check posts_br[] = { post_vs, check_eq0, NULL };
-static v_check posts_eq0[] = { check_eq0, NULL };
-static v_check posts_eq2[] = { check_eq2, NULL };
-static v_check posts_fi[] = { check_eq0, post_fi, NULL };
-static v_check posts_ft[] = { post_ft, NULL };
-static v_check posts_ip[] = { post_IP, NULL };
-static v_check posts_le1[] = { check_le1, NULL };
-static v_check posts_nf[] = { check_eq0, post_nf, NULL };
-static v_check posts_par[] = { check_par, NULL };
-static v_check posts_part[] = { check_part, NULL };
-static v_check posts_sec[] = { post_sec, NULL };
-static v_check posts_sp[] = { post_vs, check_le1, NULL };
-static v_check posts_th[] = { check_ge2, check_le5, post_TH, NULL };
-static v_check posts_uc[] = { post_UC, NULL };
-static v_check posts_ur[] = { check_head1, check_part, NULL };
-static v_check pres_sec[] = { pre_sec, NULL };
-
-static const struct man_valid man_valids[MAN_MAX] = {
- { NULL, posts_br }, /* br */
- { NULL, posts_th }, /* TH */
- { pres_sec, posts_sec }, /* SH */
- { pres_sec, posts_sec }, /* SS */
- { NULL, NULL }, /* TP */
- { NULL, posts_par }, /* LP */
- { NULL, posts_par }, /* PP */
- { NULL, posts_par }, /* P */
- { NULL, posts_ip }, /* IP */
- { NULL, NULL }, /* HP */
- { NULL, NULL }, /* SM */
- { NULL, NULL }, /* SB */
- { NULL, NULL }, /* BI */
- { NULL, NULL }, /* IB */
- { NULL, NULL }, /* BR */
- { NULL, NULL }, /* RB */
- { NULL, NULL }, /* R */
- { NULL, NULL }, /* B */
- { NULL, NULL }, /* I */
- { NULL, NULL }, /* IR */
- { NULL, NULL }, /* RI */
- { NULL, posts_eq0 }, /* na */
- { NULL, posts_sp }, /* sp */
- { NULL, posts_nf }, /* nf */
- { NULL, posts_fi }, /* fi */
- { NULL, NULL }, /* RE */
- { NULL, posts_part }, /* RS */
- { NULL, NULL }, /* DT */
- { NULL, posts_uc }, /* UC */
- { NULL, posts_le1 }, /* PD */
- { NULL, posts_at }, /* AT */
- { NULL, NULL }, /* in */
- { NULL, posts_ft }, /* ft */
- { NULL, posts_eq2 }, /* OP */
- { NULL, posts_nf }, /* EX */
- { NULL, posts_fi }, /* EE */
- { NULL, posts_ur }, /* UR */
- { NULL, NULL }, /* UE */
+static int post_UR(CHKARGS);
+
+static v_check man_valids[MAN_MAX] = {
+ post_vs, /* br */
+ post_TH, /* TH */
+ NULL, /* SH */
+ NULL, /* SS */
+ NULL, /* TP */
+ check_par, /* LP */
+ check_par, /* PP */
+ check_par, /* P */
+ post_IP, /* IP */
+ NULL, /* HP */
+ NULL, /* SM */
+ NULL, /* SB */
+ NULL, /* BI */
+ NULL, /* IB */
+ NULL, /* BR */
+ NULL, /* RB */
+ NULL, /* R */
+ NULL, /* B */
+ NULL, /* I */
+ NULL, /* IR */
+ NULL, /* RI */
+ check_eq0, /* na */
+ post_vs, /* sp */
+ post_nf, /* nf */
+ post_fi, /* fi */
+ NULL, /* RE */
+ check_part, /* RS */
+ NULL, /* DT */
+ post_UC, /* UC */
+ check_le1, /* PD */
+ post_AT, /* AT */
+ NULL, /* in */
+ post_ft, /* ft */
+ check_eq2, /* OP */
+ post_nf, /* EX */
+ post_fi, /* EE */
+ post_UR, /* UR */
+ NULL, /* UE */
+ NULL, /* ll */
};
int
-man_valid_pre(struct man *man, struct man_node *n)
-{
- v_check *cp;
-
- switch (n->type) {
- case (MAN_TEXT):
- /* FALLTHROUGH */
- case (MAN_ROOT):
- /* FALLTHROUGH */
- case (MAN_EQN):
- /* FALLTHROUGH */
- case (MAN_TBL):
- return(1);
- default:
- break;
- }
-
- if (NULL == (cp = man_valids[n->tok].pres))
- return(1);
- for ( ; *cp; cp++)
- if ( ! (*cp)(man, n))
- return(0);
- return(1);
-}
-
-
-int
man_valid_post(struct man *man)
{
+ struct man_node *n;
v_check *cp;
- if (MAN_VALID & man->last->flags)
+ n = man->last;
+ if (n->flags & MAN_VALID)
return(1);
- man->last->flags |= MAN_VALID;
+ n->flags |= MAN_VALID;
- switch (man->last->type) {
- case (MAN_TEXT):
- check_text(man, man->last);
- return(1);
- case (MAN_ROOT):
- return(check_root(man, man->last));
- case (MAN_EQN):
+ switch (n->type) {
+ case MAN_TEXT:
+ return(check_text(man, n));
+ case MAN_ROOT:
+ return(check_root(man, n));
+ case MAN_EQN:
/* FALLTHROUGH */
- case (MAN_TBL):
+ case MAN_TBL:
return(1);
default:
- break;
+ cp = man_valids + n->tok;
+ return(*cp ? (*cp)(man, n) : 1);
}
-
- if (NULL == (cp = man_valids[man->last->tok].posts))
- return(1);
- for ( ; *cp; cp++)
- if ( ! (*cp)(man, man->last))
- return(0);
-
- return(1);
}
-
static int
-check_root(CHKARGS)
+check_root(CHKARGS)
{
- if (MAN_BLINE & man->flags)
- man_nmsg(man, n, MANDOCERR_SCOPEEXIT);
- else if (MAN_ELINE & man->flags)
- man_nmsg(man, n, MANDOCERR_SCOPEEXIT);
+ assert((man->flags & (MAN_BLINE | MAN_ELINE)) == 0);
- man->flags &= ~MAN_BLINE;
- man->flags &= ~MAN_ELINE;
+ if (NULL == man->first->child)
+ mandoc_msg(MANDOCERR_DOC_EMPTY, man->parse,
+ n->line, n->pos, NULL);
+ else
+ man->meta.hasbody = 1;
- if (NULL == man->first->child) {
- man_nmsg(man, n, MANDOCERR_NODOCBODY);
- return(0);
- } else if (NULL == man->meta.title) {
- man_nmsg(man, n, MANDOCERR_NOTITLE);
+ if (NULL == man->meta.title) {
+ mandoc_msg(MANDOCERR_TH_NOTITLE, man->parse,
+ n->line, n->pos, NULL);
/*
* If a title hasn't been set, do so now (by
* implication, date and section also aren't set).
*/
- man->meta.title = mandoc_strdup("unknown");
- man->meta.msec = mandoc_strdup("1");
- man->meta.date = mandoc_normdate
- (man->parse, NULL, n->line, n->pos);
+ man->meta.title = mandoc_strdup("");
+ man->meta.msec = mandoc_strdup("");
+ man->meta.date = man->quick ? mandoc_strdup("") :
+ mandoc_normdate(man->parse, NULL, n->line, n->pos);
}
return(1);
}
-static void
+static int
check_text(CHKARGS)
{
char *cp, *p;
if (MAN_LITERAL & man->flags)
- return;
+ return(1);
cp = n->string;
for (p = cp; NULL != (p = strchr(p, '\t')); p++)
- man_pmsg(man, n->line, (int)(p - cp), MANDOCERR_BADTAB);
+ mandoc_msg(MANDOCERR_FI_TAB, man->parse,
+ n->line, n->pos + (p - cp), NULL);
+ return(1);
}
#define INEQ_DEFINE(x, ineq, name) \
@@ -238,26 +180,25 @@ check_##name(CHKARGS) \
if (n->nchild ineq (x)) \
return(1); \
mandoc_vmsg(MANDOCERR_ARGCOUNT, man->parse, n->line, n->pos, \
- "line arguments %s %d (have %d)", \
- #ineq, (x), n->nchild); \
+ "line arguments %s %d (have %d)", \
+ #ineq, (x), n->nchild); \
return(1); \
}
INEQ_DEFINE(0, ==, eq0)
INEQ_DEFINE(2, ==, eq2)
INEQ_DEFINE(1, <=, le1)
-INEQ_DEFINE(2, >=, ge2)
INEQ_DEFINE(5, <=, le5)
static int
-check_head1(CHKARGS)
+post_UR(CHKARGS)
{
if (MAN_HEAD == n->type && 1 != n->nchild)
mandoc_vmsg(MANDOCERR_ARGCOUNT, man->parse, n->line,
n->pos, "line arguments eq 1 (have %d)", n->nchild);
- return(1);
+ return(check_part(man, n));
}
static int
@@ -272,27 +213,27 @@ post_ft(CHKARGS)
ok = 0;
cp = n->child->string;
switch (*cp) {
- case ('1'):
+ case '1':
/* FALLTHROUGH */
- case ('2'):
+ case '2':
/* FALLTHROUGH */
- case ('3'):
+ case '3':
/* FALLTHROUGH */
- case ('4'):
+ case '4':
/* FALLTHROUGH */
- case ('I'):
+ case 'I':
/* FALLTHROUGH */
- case ('P'):
+ case 'P':
/* FALLTHROUGH */
- case ('R'):
+ case 'R':
if ('\0' == cp[1])
ok = 1;
break;
- case ('B'):
+ case 'B':
if ('\0' == cp[1] || ('I' == cp[1] && '\0' == cp[2]))
ok = 1;
break;
- case ('C'):
+ case 'C':
if ('W' == cp[1] && '\0' == cp[2])
ok = 1;
break;
@@ -301,69 +242,51 @@ post_ft(CHKARGS)
}
if (0 == ok) {
- mandoc_vmsg
- (MANDOCERR_BADFONT, man->parse,
- n->line, n->pos, "%s", cp);
+ mandoc_vmsg(MANDOCERR_FT_BAD, man->parse,
+ n->line, n->pos, "ft %s", cp);
*cp = '\0';
}
if (1 < n->nchild)
- mandoc_vmsg
- (MANDOCERR_ARGCOUNT, man->parse, n->line,
- n->pos, "want one child (have %d)",
- n->nchild);
-
- return(1);
-}
-
-static int
-pre_sec(CHKARGS)
-{
+ mandoc_vmsg(MANDOCERR_ARGCOUNT, man->parse, n->line,
+ n->pos, "want one child (have %d)", n->nchild);
- if (MAN_BLOCK == n->type)
- man->flags &= ~MAN_LITERAL;
return(1);
}
static int
-post_sec(CHKARGS)
-{
-
- if ( ! (MAN_HEAD == n->type && 0 == n->nchild))
- return(1);
-
- man_nmsg(man, n, MANDOCERR_SYNTARGCOUNT);
- return(0);
-}
-
-static int
check_part(CHKARGS)
{
if (MAN_BODY == n->type && 0 == n->nchild)
- mandoc_msg(MANDOCERR_ARGCWARN, man->parse, n->line,
- n->pos, "want children (have none)");
+ mandoc_msg(MANDOCERR_ARGCWARN, man->parse, n->line,
+ n->pos, "want children (have none)");
return(1);
}
-
static int
check_par(CHKARGS)
{
switch (n->type) {
- case (MAN_BLOCK):
+ case MAN_BLOCK:
if (0 == n->body->nchild)
man_node_delete(man, n);
break;
- case (MAN_BODY):
+ case MAN_BODY:
if (0 == n->nchild)
- man_nmsg(man, n, MANDOCERR_IGNPAR);
+ mandoc_vmsg(MANDOCERR_PAR_SKIP,
+ man->parse, n->line, n->pos,
+ "%s empty", man_macronames[n->tok]);
break;
- case (MAN_HEAD):
+ case MAN_HEAD:
if (n->nchild)
- man_nmsg(man, n, MANDOCERR_ARGSLOST);
+ mandoc_vmsg(MANDOCERR_ARG_SKIP,
+ man->parse, n->line, n->pos,
+ "%s %s%s", man_macronames[n->tok],
+ n->child->string,
+ n->nchild > 1 ? " ..." : "");
break;
default:
break;
@@ -377,13 +300,15 @@ post_IP(CHKARGS)
{
switch (n->type) {
- case (MAN_BLOCK):
+ case MAN_BLOCK:
if (0 == n->head->nchild && 0 == n->body->nchild)
man_node_delete(man, n);
break;
- case (MAN_BODY):
+ case MAN_BODY:
if (0 == n->parent->head->nchild && 0 == n->nchild)
- man_nmsg(man, n, MANDOCERR_IGNPAR);
+ mandoc_vmsg(MANDOCERR_PAR_SKIP,
+ man->parse, n->line, n->pos,
+ "%s empty", man_macronames[n->tok]);
break;
default:
break;
@@ -394,8 +319,10 @@ post_IP(CHKARGS)
static int
post_TH(CHKARGS)
{
+ struct man_node *nb;
const char *p;
- int line, pos;
+
+ check_le5(man, n);
free(man->meta.title);
free(man->meta.vol);
@@ -403,10 +330,10 @@ post_TH(CHKARGS)
free(man->meta.msec);
free(man->meta.date);
- line = n->line;
- pos = n->pos;
man->meta.title = man->meta.vol = man->meta.date =
- man->meta.msec = man->meta.source = NULL;
+ man->meta.msec = man->meta.source = NULL;
+
+ nb = n;
/* ->TITLE<- MSEC DATE SOURCE VOL */
@@ -414,15 +341,21 @@ post_TH(CHKARGS)
if (n && n->string) {
for (p = n->string; '\0' != *p; p++) {
/* Only warn about this once... */
- if (isalpha((unsigned char)*p) &&
- ! isupper((unsigned char)*p)) {
- man_nmsg(man, n, MANDOCERR_UPPERCASE);
+ if (isalpha((unsigned char)*p) &&
+ ! isupper((unsigned char)*p)) {
+ mandoc_vmsg(MANDOCERR_TITLE_CASE,
+ man->parse, n->line,
+ n->pos + (p - n->string),
+ "TH %s", n->string);
break;
}
}
man->meta.title = mandoc_strdup(n->string);
- } else
+ } else {
man->meta.title = mandoc_strdup("");
+ mandoc_msg(MANDOCERR_TH_NOTITLE, man->parse,
+ nb->line, nb->pos, "TH");
+ }
/* TITLE ->MSEC<- DATE SOURCE VOL */
@@ -430,19 +363,27 @@ post_TH(CHKARGS)
n = n->next;
if (n && n->string)
man->meta.msec = mandoc_strdup(n->string);
- else
+ else {
man->meta.msec = mandoc_strdup("");
+ mandoc_vmsg(MANDOCERR_MSEC_MISSING, man->parse,
+ nb->line, nb->pos, "TH %s", man->meta.title);
+ }
/* TITLE MSEC ->DATE<- SOURCE VOL */
if (n)
n = n->next;
if (n && n->string && '\0' != n->string[0]) {
- pos = n->pos;
- man->meta.date = mandoc_normdate
- (man->parse, n->string, line, pos);
- } else
+ man->meta.date = man->quick ?
+ mandoc_strdup(n->string) :
+ mandoc_normdate(man->parse, n->string,
+ n->line, n->pos);
+ } else {
man->meta.date = mandoc_strdup("");
+ mandoc_msg(MANDOCERR_DATE_MISSING, man->parse,
+ n ? n->line : nb->line,
+ n ? n->pos : nb->pos, "TH");
+ }
/* TITLE MSEC DATE ->SOURCE<- VOL */
@@ -470,8 +411,11 @@ static int
post_nf(CHKARGS)
{
+ check_eq0(man, n);
+
if (MAN_LITERAL & man->flags)
- man_nmsg(man, n, MANDOCERR_SCOPEREP);
+ mandoc_msg(MANDOCERR_NF_SKIP, man->parse,
+ n->line, n->pos, "nf");
man->flags |= MAN_LITERAL;
return(1);
@@ -481,8 +425,11 @@ static int
post_fi(CHKARGS)
{
+ check_eq0(man, n);
+
if ( ! (MAN_LITERAL & man->flags))
- man_nmsg(man, n, MANDOCERR_WNOSCOPE);
+ mandoc_msg(MANDOCERR_FI_SKIP, man->parse,
+ n->line, n->pos, "fi");
man->flags &= ~MAN_LITERAL;
return(1);
@@ -568,17 +515,24 @@ static int
post_vs(CHKARGS)
{
+ if (n->tok == MAN_br)
+ check_eq0(man, n);
+ else
+ check_le1(man, n);
+
if (NULL != n->prev)
return(1);
switch (n->parent->tok) {
- case (MAN_SH):
+ case MAN_SH:
/* FALLTHROUGH */
- case (MAN_SS):
- man_nmsg(man, n, MANDOCERR_IGNPAR);
+ case MAN_SS:
+ mandoc_vmsg(MANDOCERR_PAR_SKIP, man->parse, n->line, n->pos,
+ "%s after %s", man_macronames[n->tok],
+ man_macronames[n->parent->tok]);
/* FALLTHROUGH */
- case (MAN_MAX):
- /*
+ case MAN_MAX:
+ /*
* Don't warn about this because it occurs in pod2man
* and would cause considerable (unfixable) warnage.
*/
diff --git a/mandoc.1 b/mandoc.1
index 0657bc66c559..e9f7375777bc 100644
--- a/mandoc.1
+++ b/mandoc.1
@@ -1,7 +1,7 @@
-.\" $Id: mandoc.1,v 1.103 2013/07/13 19:41:16 schwarze Exp $
+.\" $Id: mandoc.1,v 1.106 2014/08/08 01:50:59 schwarze Exp $
.\"
.\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
-.\" Copyright (c) 2012 Ingo Schwarze <schwarze@openbsd.org>
+.\" Copyright (c) 2012, 2014 Ingo Schwarze <schwarze@openbsd.org>
.\"
.\" Permission to use, copy, modify, and distribute this software for any
.\" purpose with or without fee is hereby granted, provided that the above
@@ -15,7 +15,7 @@
.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
.\"
-.Dd $Mdocdate: July 13 2013 $
+.Dd $Mdocdate: August 8 2014 $
.Dt MANDOC 1
.Os
.Sh NAME
@@ -496,30 +496,28 @@ parser:
.Pp
.Dl $ mandoc \-Tman foo.mdoc \*(Gt foo.man
.Sh DIAGNOSTICS
-Standard error messages reporting parsing errors are prefixed by
+Messages displayed by
+.Nm
+follow this format:
.Pp
-.Sm off
-.D1 Ar file : line : column : \ level :
-.Sm on
+.D1 Nm Ns : Ar file : Ns Ar line : Ns Ar column : level : message : macro args
.Pp
-where the fields have the following meanings:
-.Bl -tag -width "column"
-.It Ar file
-The name of the input file causing the message.
-.It Ar line
-The line number in that input file.
-Line numbering starts at 1.
-.It Ar column
-The column number in that input file.
-Column numbering starts at 1.
-If the issue is caused by a word, the column number usually
-points to the first character of the word.
-.It Ar level
-The message level, printed in capital letters.
-.El
+Line and column numbers start at 1.
+Both are omitted for messages referring to an input file as a whole.
+Macro names and arguments are omitted where meaningless.
+Fatal messages about invalid command line arguments
+or operating system errors, for example when memory is exhausted,
+may also omit the
+.Ar file
+and
+.Ar level
+fields.
.Pp
Message levels have the following meanings:
.Bl -tag -width "warning"
+.It Cm syserr
+Opening or reading an input file failed, so the parser cannot
+even be started and no output is produced from that input file.
.It Cm fatal
The parser is unable to parse a given input file at all.
No formatted output is produced from that input file.
@@ -551,13 +549,836 @@ levels are hidden unless their level, or a lower level, is requested using a
option or
.Fl T Ns Cm lint
output mode.
-.Pp
+.Ss Warnings related to the document prologue
+.Bl -ohang
+.It Sy "missing manual title, using UNTITLED"
+.Pq mdoc
+A
+.Ic \&Dt
+macro has no arguments, or there is no
+.Ic \&Dt
+macro before the first non-prologue macro.
+.It Sy "missing manual title, using \(dq\(dq"
+.Pq man
+There is no
+.Ic \&TH
+macro, or it has no arguments.
+.It Sy "lower case character in document title"
+.Pq mdoc , man
+The title is still used as given in the
+.Ic \&Dt
+or
+.Ic \&TH
+macro.
+.It Sy "missing manual section, using \(dq\(dq"
+.Pq mdoc , man
+A
+.Ic \&Dt
+or
+.Ic \&TH
+macro lacks the mandatory section argument.
+.It Sy "unknown manual section"
+.Pq mdoc
+The section number in a
+.Ic \&Dt
+line is invalid, but still used.
+.It Sy "unknown manual volume or arch"
+.Pq mdoc
+The volume name in a
+.Ic \&Dt
+line is invalid, but still used.
+The manual is assumed to be architecture-independent.
+.It Sy "missing date, using today's date"
+.Pq mdoc, man
+The document was parsed as
+.Xr mdoc 7
+and it has no
+.Ic \&Dd
+macro, or the
+.Ic \&Dd
+macro has no arguments or only empty arguments;
+or the document was parsed as
+.Xr man 7
+and it has no
+.Ic \&TH
+macro, or the
+.Ic \&TH
+macro has less than three arguments or its third argument is empty.
+.It Sy "cannot parse date, using it verbatim"
+.Pq mdoc , man
+The date given in a
+.Ic \&Dd
+or
+.Ic \&TH
+macro does not follow the conventional format.
+.It Sy "missing Os macro, using \(dq\(dq"
+.Pq mdoc
+The default or current system is not shown in this case.
+.It Sy "duplicate prologue macro"
+.Pq mdoc
+One of the prologue macros occurs more than once.
+The last instance overrides all previous ones.
+.It Sy "late prologue macro"
+.Pq mdoc
+A
+.Ic \&Dd
+or
+.Ic \&Os
+macro occurs after some non-prologue macro, but still takes effect.
+.It Sy "skipping late title macro"
+.Pq mdoc
+The
+.Ic \&Dt
+macro can only occur before the first non-prologue macro
+because traditional formatters write the page header
+before parsing the document body.
+Even though this technical restriction does not apply to
+.Nm ,
+traditional semantics is preserved.
+The late macro is discarded including its arguments.
+.It Sy "prologue macros out of order"
+.Pq mdoc
+The prologue macros are not given in the conventional order
+.Ic \&Dd ,
+.Ic \&Dt ,
+.Ic \&Os .
+All three macros are used even when given in another order.
+.El
+.Ss Warnings regarding document structure
+.Bl -ohang
+.It Sy ".so is fragile, better use ln(1)"
+.Pq roff
+Including files only works when the parser program runs with the correct
+current working directory.
+.It Sy "no document body"
+.Pq mdoc , man
+The document body contains neither text nor macros.
+An empty document is shown, consisting only of a header and a footer line.
+.It Sy "content before first section header"
+.Pq mdoc , man
+Some macros or text precede the first
+.Ic \&Sh
+or
+.Ic \&SH
+section header.
+The offending macros and text are parsed and added to the top level
+of the syntax tree, outside any section block.
+.It Sy "first section is not NAME"
+.Pq mdoc
+The argument of the first
+.Ic \&Sh
+macro is not
+.Sq NAME .
+This may confuse
+.Xr makewhatis 8
+and
+.Xr apropos 1 .
+.It Sy "bad NAME section contents"
+.Pq mdoc
+The last node in the NAME section is not an
+.Ic \&Nd
+macro, or any preceding macro is not
+.Ic \&Nm ,
+or the NAME section is completely empty.
+This may confuse
+.Xr makewhatis 8
+and
+.Xr apropos 1 .
+.It Sy "sections out of conventional order"
+.Pq mdoc
+A standard section occurs after another section it usually precedes.
+All section titles are used as given,
+and the order of sections is not changed.
+.It Sy "duplicate section title"
+.Pq mdoc
+The same standard section title occurs more than once.
+.It Sy "unexpected section"
+.Pq mdoc
+A standard section header occurs in a section of the manual
+where it normally isn't useful.
+.El
+.Ss "Warnings related to macros and nesting"
+.Bl -ohang
+.It Sy "obsolete macro"
+.Pq mdoc
+See the
+.Xr mdoc 7
+manual for replacements.
+.It Sy "skipping paragraph macro"
+In
+.Xr mdoc 7
+documents, this happens
+.Bl -dash -compact
+.It
+at the beginning and end of sections and subsections
+.It
+right before non-compact lists and displays
+.It
+at the end of items in non-column, non-compact lists
+.It
+and for multiple consecutive paragraph macros.
+.El
+In
+.Xr man 7
+documents, it happens
+.Bl -dash -compact
+.It
+for empty
+.Ic \&P ,
+.Ic \&PP ,
+and
+.Ic \&LP
+macros
+.It
+for
+.Ic \&IP
+macros having neither head nor body arguments
+.It
+for
+.Ic \&br
+or
+.Ic \&sp
+right after
+.Ic \&SH
+or
+.Ic \&SS
+.El
+.It Sy "moving paragraph macro out of list"
+.Pq mdoc
+A list item in a
+.Ic \&Bl
+list contains a trailing paragraph macro.
+The paragraph macro is moved after the end of the list.
+.It Sy "skipping no-space macro"
+.Pq mdoc
+An input line begins with an
+.Ic \&Ns
+macro.
+The macro is ignored.
+.It Sy "blocks badly nested"
+.Pq mdoc
+If two blocks intersect, one should completely contain the other.
+Otherwise, rendered output is likely to look strange in any output
+format, and rendering in SGML-based output formats is likely to be
+outright wrong because such languages do not support badly nested
+blocks at all.
+Typical examples of badly nested blocks are
+.Qq Ic \&Ao \&Bo \&Ac \&Bc
+and
+.Qq Ic \&Ao \&Bq \&Ac .
+In these examples,
+.Ic \&Ac
+breaks
+.Ic \&Bo
+and
+.Ic \&Bq ,
+respectively.
+.It Sy "nested displays are not portable"
+.Pq mdoc
+A
+.Ic \&Bd ,
+.Ic \&D1 ,
+or
+.Ic \&Dl
+display occurs nested inside another
+.Ic \&Bd
+display.
+This works with
+.Nm ,
+but fails with most other implementations.
+.It Sy "moving content out of list"
+.Pq mdoc
+A
+.Ic \&Bl
+list block contains text or macros before the first
+.Ic \&It
+macro.
+The offending children are moved before the beginning of the list.
+.It Sy ".Vt block has child macro"
+.Pq mdoc
+The
+.Ic \&Vt
+macro supports plain text arguments only.
+Formatting may be ugly and semantic searching
+for the affected content might not work.
+.It Sy "fill mode already enabled, skipping"
+.Pq man
+A
+.Ic \&fi
+request occurs even though the document is still in fill mode,
+or already switched back to fill mode.
+It has no effect.
+.It Sy "fill mode already disabled, skipping"
+.Pq man
+An
+.Ic \&nf
+request occurs even though the document already switched to no-fill mode
+and did not switch back to fill mode yet.
+It has no effect.
+.It Sy "line scope broken"
+.Pq man
+While parsing the next-line scope of the previous macro,
+another macro is found that prematurely terminates the previous one.
+The previous, interrupted macro is deleted from the parse tree.
+.El
+.Ss "Warnings related to missing arguments"
+.Bl -ohang
+.It Sy "skipping empty request"
+.Pq roff
+The macro name is missing from a macro definition request.
+.It Sy "conditional request controls empty scope"
+.Pq roff
+A conditional request is only useful if any of the following
+follows it on the same logical input line:
+.Bl -dash -compact
+.It
+The
+.Sq \e{
+keyword to open a multi-line scope.
+.It
+A request or macro or some text, resulting in a single-line scope.
+.It
+The immediate end of the logical line without any intervening whitespace,
+resulting in next-line scope.
+.El
+Here, a conditional request is followed by trailing whitespace only,
+and there is no other content on its logical input line.
+Note that it doesn't matter whether the logical input line is split
+across multiple physical input lines using
+.Sq \e
+line continuation characters.
+This is one of the rare cases
+where trailing whitespace is syntactically significant.
+The conditional request controls a scope containing whitespace only,
+so it is unlikely to have a significant effect,
+except that it may control a following
+.Ic \&el
+clause.
+.It Sy "skipping empty macro"
+.Pq mdoc
+The indicated macro has no arguments and hence no effect.
+.It Sy "empty argument, using 0n"
+.Pq mdoc
+The required width is missing after
+.Ic \&Bd
+or
+.Ic \&Bl
+.Fl offset
+or
+.Fl width.
+.It Sy "argument count wrong"
+.Pq mdoc , man
+The indicated macro has too few or too many arguments.
+The syntax tree will contain the wrong number of arguments as given.
+Formatting behaviour depends on the specific macro in question.
+Note that the same message may also occur as an ERROR, see below.
+.It Sy "missing display type, using -ragged"
+.Pq mdoc
+The
+.Ic \&Bd
+macro is invoked without the required display type.
+.It Sy "list type is not the first argument"
+.Pq mdoc
+In a
+.Ic \&Bl
+macro, at least one other argument precedes the type argument.
+The
+.Nm
+utility copes with any argument order, but some other
+.Xr mdoc 7
+implementations do not.
+.It Sy "missing -width in -tag list, using 8n"
+.Pq mdoc
+Every
+.Ic \&Bl
+macro having the
+.Fl tag
+argument requires
+.Fl width ,
+too.
+.It Sy "missing utility name, using \(dq\(dq"
+.Pq mdoc
+The
+.Ic \&Ex Fl std
+macro is called without an argument before
+.Ic \&Nm
+has first been called with an argument.
+.It Sy "empty head in list item"
+.Pq mdoc
+In a
+.Ic \&Bl
+.Fl diag ,
+.Fl hang ,
+.Fl inset ,
+.Fl ohang ,
+or
+.Fl tag
+list, an
+.Ic \&It
+macro lacks the required argument.
+The item head is left empty.
+.It Sy "empty list item"
+.Pq mdoc
+In a
+.Ic \&Bl
+.Fl bullet ,
+.Fl dash ,
+.Fl enum ,
+or
+.Fl hyphen
+list, an
+.Ic \&It
+block is empty.
+An empty list item is shown.
+.It Sy "missing font type"
+.Pq mdoc
+A
+.Ic \&Bf
+macro has no argument.
+It switches to the default font,
+.Cm \efR .
+.It Sy "unknown font type"
+.Pq mdoc
+The
+.Ic \&Bf
+argument is invalid.
+The default font
+.Cm \efR
+is used instead.
+.It Sy "missing -std argument, adding it"
+.Pq mdoc
+An
+.Ic \&Ex
+or
+.Ic \&Rv
+macro lacks the required
+.Fl std
+argument.
+The
+.Nm
+utility assumes
+.Fl std
+even when it is not specified, but other implementations may not.
+.El
+.Ss "Warnings related to bad macro arguments"
+.Bl -ohang
+.It Sy "unterminated quoted argument"
+.Pq roff
+Macro arguments can be enclosed in double quote characters
+such that space characters and macro names contained in the quoted
+argument need not be escaped.
+The closing quote of the last argument of a macro can be omitted.
+However, omitting it is not recommended because it makes the code
+harder to read.
+.It Sy "duplicate argument"
+.Pq mdoc
+A
+.Ic \&Bd
+or
+.Ic \&Bl
+macro has more than one
+.Fl compact ,
+more than one
+.Fl offset ,
+or more than one
+.Fl width
+argument.
+All but the last instances of these arguments are ignored.
+.It Sy "skipping duplicate argument"
+.Pq mdoc
+An
+.Ic \&An
+macro has more than one
+.Fl split
+or
+.Fl nosplit
+argument.
+All but the first of these arguments are ignored.
+.It Sy "skipping duplicate display type"
+.Pq mdoc
+A
+.Ic \&Bd
+macro has more than one type argument; the first one is used.
+.It Sy "skipping duplicate list type"
+.Pq mdoc
+A
+.Ic \&Bl
+macro has more than one type argument; the first one is used.
+.It Sy "skipping -width argument"
+.Pq mdoc
+A
+.Ic \&Bl
+.Fl column ,
+.Fl diag ,
+.Fl ohang ,
+.Fl inset ,
+or
+.Fl item
+list has a
+.Fl width
+argument.
+That has no effect.
+.It Sy "unknown AT&T UNIX version"
+.Pq mdoc
+An
+.Ic \&At
+macro has an invalid argument.
+It is used verbatim, with
+.Qq "AT&T UNIX "
+prefixed to it.
+.It Sy "invalid content in Rs block"
+.Pq mdoc
+An
+.Ic \&Rs
+block contains plain text or non-% macros.
+The bogus content is left in the syntax tree.
+Formatting may be poor.
+.It Sy "invalid Boolean argument"
+.Pq mdoc
+An
+.Ic \&Sm
+macro has an argument other than
+.Cm on
+or
+.Cm off .
+The invalid argument is moved out of the macro, which leaves the macro
+empty, causing it to toggle the spacing mode.
+.It Sy "unknown font, skipping request"
+.Pq man
+A
+.Xr roff 7
+.Ic \&ft
+request has an invalid argument.
+.El
+.Ss "Warnings related to plain text"
+.Bl -ohang
+.It Sy "blank line in fill mode, using .sp"
+.Pq mdoc
+The meaning of blank input lines is only well-defined in non-fill mode:
+In fill mode, line breaks of text input lines are not supposed to be
+significant.
+However, for compatibility with groff, blank lines in fill mode
+are replaced with
+.Ic \&sp
+requests.
+.It Sy "tab in filled text"
+.Pq mdoc , man
+The meaning of tab characters is only well-defined in non-fill mode:
+In fill mode, whitespace is not supposed to be significant
+on text input lines.
+As an implementation dependent choice, tab characters on text lines
+are passed through to the formatters in any case.
+Given that the text before the tab character will be filled,
+it is hard to predict which tab stop position the tab will advance to.
+.It Sy "whitespace at end of input line"
+.Pq mdoc , man , roff
+Whitespace at the end of input lines is almost never semantically
+significant \(em but in the odd case where it might be, it is
+extremely confusing when reviewing and maintaining documents.
+.It Sy "bad comment style"
+.Pq roff
+Comment lines start with a dot, a backslash, and a double-quote character.
The
.Nm
-utility may also print messages related to invalid command line arguments
-or operating system errors, for example when memory is exhausted or
-input files cannot be read.
-Such messages do not carry the prefix described above.
+utility treats the line as a comment line even without the backslash,
+but leaving out the backslash might not be portable.
+.It Sy "invalid escape sequence"
+.Pq roff
+An escape sequence has an invalid opening argument delimiter, lacks the
+closing argument delimiter, or the argument has too few characters.
+If the argument is incomplete,
+.Ic \e*
+and
+.Ic \en
+expand to an empty string,
+.Ic \eB
+to the digit
+.Sq 0 ,
+and
+.Ic \ew
+to the length of the incomplete argument.
+All other invalid escape sequences are ignored.
+.It Sy "undefined string, using \(dq\(dq"
+.Pq roff
+If a string is used without being defined before,
+its value is implicitly set to the empty string.
+However, defining strings explicitly before use
+keeps the code more readable.
+.El
+.Ss "Errors related to equations"
+.Bl -inset -compact
+.It "unexpected equation scope closure"
+.It "equation scope open on exit"
+.It "overlapping equation scopes"
+.It "unexpected end of equation"
+.It "equation syntax error"
+.El
+.Ss "Errors related to tables"
+.Bl -inset -compact
+.It "bad table syntax"
+.It "bad table option"
+.It "bad table layout"
+.It "no table layout cells specified"
+.It "no table data cells specified"
+.It "ignore data in cell"
+.It "data block still open"
+.It "ignoring extra data cells"
+.El
+.Ss "Errors related to roff, mdoc, and man code"
+.Bl -ohang
+.It Sy "input stack limit exceeded, infinite loop?"
+.Pq roff
+Explicit recursion limits are implemented for the following features,
+in order to prevent infinite loops:
+.Bl -dash -compact
+.It
+expansion of nested escape sequences
+including expansion of strings and number registers,
+.It
+expansion of nested user-defined macros,
+.It
+and
+.Ic \&so
+file inclusion.
+.El
+When a limit is hit, the output is incorrect, typically losing
+some content, but the parser can continue.
+.It Sy "skipping bad character"
+.Pq mdoc , man , roff
+The input file contains a byte that is not a printable
+.Xr ascii 7
+character.
+The message mentions the character number.
+The offending byte is replaced with a question mark
+.Pq Sq \&? .
+Consider editing the input file to replace the byte with an ASCII
+transliteration of the intended character.
+.It Sy "skipping unknown macro"
+.Pq mdoc , man , roff
+The first identifier on a request or macro line is neither recognized as a
+.Xr roff 7
+request, nor as a user-defined macro, nor, respectively, as an
+.Xr mdoc 7
+or
+.Xr man 7
+macro.
+It may be mistyped or unsupported.
+The request or macro is discarded including its arguments.
+.It Sy "skipping item outside list"
+.Pq mdoc
+An
+.Ic \&It
+macro occurs outside any
+.Ic \&Bl
+list.
+It is discarded including its arguments.
+.It Sy "skipping column outside column list"
+.Pq mdoc
+A
+.Ic \&Ta
+macro occurs outside any
+.Ic \&Bl Fl column
+block.
+It is discarded including its arguments.
+.It Sy "skipping end of block that is not open"
+.Pq mdoc , man , eqn , tbl , roff
+Various syntax elements can only be used to explicitly close blocks
+that have previously been opened.
+An
+.Xr mdoc 7
+block closing macro, a
+.Xr man 7
+.Ic \&RE
+or
+.Ic \&UE
+macro, or the end of an equation, table, or
+.Xr roff 7
+conditional request is encountered but no matching block is open.
+The offending request or macro is discarded.
+.It Sy "inserting missing end of block"
+.Pq mdoc , tbl
+Various
+.Xr mdoc 7
+macros as well as tables require explicit closing by dedicated macros.
+A block that doesn't support bad nesting
+ends before all of its children are properly closed.
+The open child nodes are closed implicitly.
+.It Sy "scope open on exit"
+.Pq mdoc , man , eqn , tbl , roff
+At the end of the document, an explicit
+.Xr mdoc 7
+block, a
+.Xr man 7
+next-line scope or
+.Ic \&RS
+or
+.Ic \&UR
+block, an equation, table, or
+.Xr roff 7
+conditional or ignore block is still open.
+The open block is closed implicitly.
+.It Sy "escaped character not allowed in a name"
+.Pq roff
+Macro, string and register identifiers consist of printable,
+non-whitespace ASCII characters.
+Escape sequences and characters and strings expressed in terms of them
+cannot form part of a name.
+The first argument of an
+.Ic \&am ,
+.Ic \&as ,
+.Ic \&de ,
+.Ic \&ds ,
+.Ic \&nr ,
+or
+.Ic \&rr
+request, or any argument of an
+.Ic \&rm
+request, or the name of a request or user defined macro being called,
+is terminated by an escape sequence.
+In the cases of
+.Ic \&as ,
+.Ic \&ds ,
+and
+.Ic \&nr ,
+the request has no effect at all.
+In the cases of
+.Ic \&am ,
+.Ic \&de ,
+.Ic \&rr ,
+and
+.Ic \&rm ,
+what was parsed up to this point is used as the arguments to the request,
+and the rest of the input line is discarded including the escape sequence.
+When parsing for a request or a user-defined macro name to be called,
+only the escape sequence is discarded.
+The characters preceding it are used as the request or macro name,
+the characters following it are used as the arguments to the request or macro.
+.It Sy "argument count wrong"
+.Pq mdoc , man , roff
+The indicated request or macro has too few or too many arguments.
+The syntax tree will contain the wrong number of arguments as given.
+Formatting behaviour depends on the specific request or macro in question.
+Note that the same message may also occur as a WARNING, see above.
+.It Sy "missing list type, using -item"
+.Pq mdoc
+A
+.Ic \&Bl
+macro fails to specify the list type.
+.It Sy "missing manual name, using \(dq\(dq"
+.Pq mdoc
+The first call to
+.Ic \&Nm
+lacks the required argument.
+.It Sy "uname(3) system call failed, using UNKNOWN"
+.Pq mdoc
+The
+.Ic \&Os
+macro is called without arguments, and the
+.Xr uname 3
+system call failed.
+As a workaround,
+.Nm
+can be compiled with
+.Sm off
+.Fl D Cm OSNAME=\(dq\e\(dq Ar string Cm \e\(dq\(dq .
+.Sm on
+.It Sy "unknown standard specifier"
+.Pq mdoc
+An
+.Ic \&St
+macro has an unknown argument and is discarded.
+.It Sy "skipping request without numeric argument"
+.Pq roff
+An
+.Ic \&it
+request has a non-numeric or negative argument or no argument at all.
+The invalid request is ignored.
+.It Sy "skipping all arguments"
+.Pq mdoc , man , eqn , roff
+An
+.Xr mdoc 7
+.Ic \&Bt ,
+.Ic \&Ed ,
+.Ic \&Ef ,
+.Ic \&Ek ,
+.Ic \&El ,
+.Ic \&Re ,
+or
+.Ic \&Ud
+macro, an
+.Ic \&It
+macro in a list that don't support item heads, a
+.Xr man 7
+.Ic \&LP ,
+.Ic \&P ,
+or
+.Ic \&PP
+macro, an
+.Xr eqn 7
+.Ic \&EN
+macro, or a
+.Xr roff 7
+.Sq \&..
+block closing request is invoked with at least one argument.
+All arguments are ignored.
+.It Sy "skipping excess arguments"
+.Pq mdoc , roff
+The
+.Ic \&Bf
+macro is invoked with more than one argument, or a request of the
+.Ic \&de
+family is invoked with more than two arguments.
+The excess arguments are ignored.
+.El
+.Ss FATAL errors
+.Bl -ohang
+.It Sy "input too large"
+.Pq mdoc , man
+Currently,
+.Nm
+cannot handle input files larger than its arbitrary size limit
+of 2^31 bytes (2 Gigabytes).
+Since useful manuals are always small, this is not a problem in practice.
+Parsing is aborted as soon as the condition is detected.
+.It Sy "NOT IMPLEMENTED: Bd -file"
+.Pq mdoc
+For security reasons, the
+.Ic \&Bd
+macro does not support the
+.Fl file
+argument.
+By requesting the inclusion of a sensitive file, a malicious document
+might otherwise trick a privileged user into inadvertently displaying
+the file on the screen, revealing the file content to bystanders.
+The parser exits immediately.
+.It Sy "NOT IMPLEMENTED: .so with absolute path or \(dq..\(dq"
+.Pq roff
+For security reasons,
+.Nm
+allows
+.Ic \&so
+file inclusion requests only with relative paths
+and only without ascending to any parent directory.
+By requesting the inclusion of a sensitive file, a malicious document
+might otherwise trick a privileged user into inadvertently displaying
+the file on the screen, revealing the file content to bystanders.
+The parser exits immediately.
+.It Sy ".so request failed"
+.Pq roff
+Servicing a
+.Ic \&so
+request requires reading an external file.
+While trying to do so, an
+.Xr open 2 ,
+.Xr stat 2 ,
+or
+.Xr read 2
+system call failed.
+The parser exits immediately.
+Before showing this message,
+.Nm
+always shows another message explaining why the system call failed.
+.El
.Sh COMPATIBILITY
This section summarises
.Nm
diff --git a/mandoc.3 b/mandoc.3
index fe6503d5477e..8f76ad21fff4 100644
--- a/mandoc.3
+++ b/mandoc.3
@@ -1,4 +1,4 @@
-.\" $Id: mandoc.3,v 1.22 2013/10/06 17:01:52 schwarze Exp $
+.\" $Id: mandoc.3,v 1.25 2014/08/05 05:48:56 schwarze Exp $
.\"
.\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
.\" Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
@@ -15,21 +15,16 @@
.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
.\"
-.Dd $Mdocdate: October 6 2013 $
+.Dd $Mdocdate: August 5 2014 $
.Dt MANDOC 3
.Os
.Sh NAME
.Nm mandoc ,
-.Nm mandoc_escape ,
+.Nm man_deroff ,
.Nm man_meta ,
.Nm man_mparse ,
.Nm man_node ,
-.Nm mchars_alloc ,
-.Nm mchars_free ,
-.Nm mchars_num2char ,
-.Nm mchars_num2uc ,
-.Nm mchars_spec2cp ,
-.Nm mchars_spec2str ,
+.Nm mdoc_deroff ,
.Nm mdoc_meta ,
.Nm mdoc_node ,
.Nm mparse_alloc ,
@@ -45,68 +40,32 @@
.Sh LIBRARY
.Lb libmandoc
.Sh SYNOPSIS
-.In man.h
-.In mdoc.h
+.In sys/types.h
.In mandoc.h
-.Ft "enum mandoc_esc"
-.Fo mandoc_escape
-.Fa "const char const **end"
-.Fa "const char const **start"
-.Fa "int *sz"
-.Fc
-.Ft "const struct man_meta *"
-.Fo man_meta
-.Fa "const struct man *man"
-.Fc
-.Ft "const struct mparse *"
-.Fo man_mparse
-.Fa "const struct man *man"
-.Fc
-.Ft "const struct man_node *"
-.Fo man_node
-.Fa "const struct man *man"
-.Fc
-.Ft "struct mchars *"
-.Fn mchars_alloc "void"
-.Ft void
-.Fn mchars_free "struct mchars *p"
-.Ft char
-.Fn mchars_num2char "const char *cp" "size_t sz"
-.Ft int
-.Fn mchars_num2uc "const char *cp" "size_t sz"
-.Ft "const char *"
-.Fo mchars_spec2str
-.Fa "const struct mchars *p"
-.Fa "const char *cp"
-.Fa "size_t sz"
-.Fa "size_t *rsz"
-.Fc
-.Ft int
-.Fo mchars_spec2cp
-.Fa "const struct mchars *p"
-.Fa "const char *cp"
-.Fa "size_t sz"
-.Fc
-.Ft "const struct mdoc_meta *"
-.Fo mdoc_meta
-.Fa "const struct mdoc *mdoc"
-.Fc
-.Ft "const struct mdoc_node *"
-.Fo mdoc_node
-.Fa "const struct mdoc *mdoc"
-.Fc
-.Ft void
+.Fd "#define ASCII_NBRSP"
+.Fd "#define ASCII_HYPH"
+.Fd "#define ASCII_BREAK"
+.Ft struct mparse *
.Fo mparse_alloc
-.Fa "enum mparset type"
+.Fa "int options"
.Fa "enum mandoclevel wlevel"
-.Fa "mandocmsg msg"
-.Fa "void *msgarg"
+.Fa "mandocmsg mmsg"
+.Fa "char *defos"
+.Fc
+.Ft void
+.Fo (*mandocmsg)
+.Fa "enum mandocerr errtype"
+.Fa "enum mandoclevel level"
+.Fa "const char *file"
+.Fa "int line"
+.Fa "int col"
+.Fa "const char *msg"
.Fc
.Ft void
.Fo mparse_free
.Fa "struct mparse *parse"
.Fc
-.Ft void
+.Ft const char *
.Fo mparse_getkeep
.Fa "const struct mparse *parse"
.Fc
@@ -129,6 +88,7 @@
.Fa "struct mparse *parse"
.Fa "struct mdoc **mdoc"
.Fa "struct man **man"
+.Fa "char **sodest"
.Fc
.Ft "const char *"
.Fo mparse_strerror
@@ -138,11 +98,45 @@
.Fo mparse_strlevel
.Fa "enum mandoclevel"
.Fc
-.Vt extern const char * const * man_macronames;
+.In sys/types.h
+.In mandoc.h
+.In mdoc.h
+.Ft void
+.Fo mdoc_deroff
+.Fa "char **dest"
+.Fa "const struct mdoc_node *node"
+.Fc
+.Ft "const struct mdoc_meta *"
+.Fo mdoc_meta
+.Fa "const struct mdoc *mdoc"
+.Fc
+.Ft "const struct mdoc_node *"
+.Fo mdoc_node
+.Fa "const struct mdoc *mdoc"
+.Fc
.Vt extern const char * const * mdoc_argnames;
.Vt extern const char * const * mdoc_macronames;
-.Fd "#define ASCII_NBRSP"
-.Fd "#define ASCII_HYPH"
+.In sys/types.h
+.In mandoc.h
+.In man.h
+.Ft void
+.Fo man_deroff
+.Fa "char **dest"
+.Fa "const struct man_node *node"
+.Fc
+.Ft "const struct man_meta *"
+.Fo man_meta
+.Fa "const struct man *man"
+.Fc
+.Ft "const struct mparse *"
+.Fo man_mparse
+.Fa "const struct man *man"
+.Fc
+.Ft "const struct man_node *"
+.Fo man_node
+.Fa "const struct man *man"
+.Fc
+.Vt extern const char * const * man_macronames;
.Sh DESCRIPTION
The
.Nm mandoc
@@ -184,37 +178,22 @@ or invoke
.Fn mparse_reset
and parse new files.
.El
-.Pp
-The
-.Nm
-library also contains routines for translating character strings into glyphs
-.Pq see Fn mchars_alloc
-and parsing escape sequences from strings
-.Pq see Fn mandoc_escape .
.Sh REFERENCE
This section documents the functions, types, and variables available
via
-.In mandoc.h .
+.In mandoc.h ,
+with the exception of those documented in
+.Xr mandoc_escape 3
+and
+.Xr mchars_alloc 3 .
.Ss Types
.Bl -ohang
-.It Vt "enum mandoc_esc"
-An escape sequence classification.
.It Vt "enum mandocerr"
A fatal error, error, or warning message during parsing.
.It Vt "enum mandoclevel"
A classification of an
-.Vt "enum mandoclevel"
+.Vt "enum mandocerr"
as regards system operation.
-.It Vt "struct mchars"
-An opaque pointer to an object allowing for translation between
-character strings and glyphs.
-See
-.Fn mchars_alloc .
-.It Vt "enum mparset"
-The type of parser when reading input.
-This should usually be
-.Dv MPARSE_AUTO
-for auto-detection.
.It Vt "struct mparse"
An opaque pointer to a running parse sequence.
Created with
@@ -230,38 +209,20 @@ messages emitted by the parser.
.El
.Ss Functions
.Bl -ohang
-.It Fn mandoc_escape
-Scan an escape sequence, i.e., a character string beginning with
-.Sq \e .
-Pass a pointer to the character after the
-.Sq \e
-as
-.Va end ;
-it will be set to the supremum of the parsed escape sequence unless
-returning
-.Dv ESCAPE_ERROR ,
-in which case the string is bogus and should be
-thrown away.
-If not
-.Dv ESCAPE_ERROR
-or
-.Dv ESCAPE_IGNORE ,
-.Va start
-is set to the first relevant character of the substring (font, glyph,
-whatever) of length
-.Va sz .
-Both
-.Va start
-and
-.Va sz
-may be
-.Dv NULL .
-Declared in
-.In mandoc.h ,
-implemented in
-.Pa mandoc.c .
+.It Fn man_deroff
+Obtain a text-only representation of a
+.Vt struct man_node ,
+including text contained in its child nodes.
+To be used on children of the pointer returned from
+.Fn man_node .
+When it is no longer needed, the pointer returned from
+.Fn man_deroff
+can be passed to
+.Xr free 3 .
.It Fn man_meta
-Obtain the meta-data of a successful parse.
+Obtain the meta-data of a successful
+.Xr man 7
+parse.
This may only be used on a pointer returned by
.Fn mparse_result .
Declared in
@@ -275,67 +236,29 @@ Declared in
implemented in
.Pa man.c .
.It Fn man_node
-Obtain the root node of a successful parse.
+Obtain the root node of a successful
+.Xr man 7
+parse.
This may only be used on a pointer returned by
.Fn mparse_result .
Declared in
.In man.h ,
implemented in
.Pa man.c .
-.It Fn mchars_alloc
-Allocate an
-.Vt "struct mchars *"
-object for translating special characters into glyphs.
-See
-.Xr mandoc_char 7
-for an overview of special characters.
-The object must be freed with
-.Fn mchars_free .
-Declared in
-.In mandoc.h ,
-implemented in
-.Pa chars.c .
-.It Fn mchars_free
-Free an object created with
-.Fn mchars_alloc .
-Declared in
-.In mandoc.h ,
-implemented in
-.Pa chars.c .
-.It Fn mchars_num2char
-Convert a character index (e.g., the \eN\(aq\(aq escape) into a
-printable ASCII character.
-Returns \e0 (the nil character) if the input sequence is malformed.
-Declared in
-.In mandoc.h ,
-implemented in
-.Pa chars.c .
-.It Fn mchars_num2uc
-Convert a hexadecimal character index (e.g., the \e[uNNNN] escape) into
-a Unicode codepoint.
-Returns \e0 (the nil character) if the input sequence is malformed.
-Declared in
-.In mandoc.h ,
-implemented in
-.Pa chars.c .
-.It Fn mchars_spec2cp
-Convert a special character into a valid Unicode codepoint.
-Returns \-1 on failure or a non-zero Unicode codepoint on success.
-Declared in
-.In mandoc.h ,
-implemented in
-.Pa chars.c .
-.It Fn mchars_spec2str
-Convert a special character into an ASCII string.
-Returns
-.Dv NULL
-on failure.
-Declared in
-.In mandoc.h ,
-implemented in
-.Pa chars.c .
+.It Fn mdoc_deroff
+Obtain a text-only representation of a
+.Vt struct mdoc_node ,
+including text contained in its child nodes.
+To be used on children of the pointer returned from
+.Fn mdoc_node .
+When it is no longer needed, the pointer returned from
+.Fn mdoc_deroff
+can be passed to
+.Xr free 3 .
.It Fn mdoc_meta
-Obtain the meta-data of a successful parse.
+Obtain the meta-data of a successful
+.Xr mdoc
+parse.
This may only be used on a pointer returned by
.Fn mparse_result .
Declared in
@@ -343,7 +266,9 @@ Declared in
implemented in
.Pa mdoc.c .
.It Fn mdoc_node
-Obtain the root node of a successful parse.
+Obtain the root node of a successful
+.Xr mdoc
+parse.
This may only be used on a pointer returned by
.Fn mparse_result .
Declared in
@@ -352,6 +277,57 @@ implemented in
.Pa mdoc.c .
.It Fn mparse_alloc
Allocate a parser.
+The arguments have the following effect:
+.Bl -tag -offset 5n -width inttype
+.It Ar options
+When the
+.Dv MPARSE_MDOC
+or
+.Dv MPARSE_MAN
+bit is set, only that parser is used.
+Otherwise, the document type is automatically detected.
+.Pp
+When the
+.Dv MPARSE_SO
+bit is set,
+.Xr roff 7
+.Ic \&so
+file inclusion requests are always honoured.
+Otherwise, if the request is the only content in an input file,
+only the file name is remembered, to be returned in the
+.Fa sodest
+argument of
+.Fn mparse_result .
+.Pp
+When the
+.Dv MPARSE_QUICK
+bit is set, parsing is aborted after the NAME section.
+This is for example useful in
+.Xr makewhatis 8
+.Fl Q
+to quickly build minimal databases.
+.It Ar wlevel
+Can be set to
+.Dv MANDOCLEVEL_FATAL ,
+.Dv MANDOCLEVEL_ERROR ,
+or
+.Dv MANDOCLEVEL_WARNING .
+Messages below the selected level will be suppressed.
+.It Ar mmsg
+A callback function to handle errors and warnings.
+See
+.Pa main.c
+for an example.
+.It Ar defos
+A default string for the
+.Xr mdoc 7
+.Sq \&Os
+macro, overriding the
+.Dv OSNAME
+preprocessor definition and the results of
+.Xr uname 3 .
+.El
+.Pp
The same parser may be used for multiple files so long as
.Fn mparse_reset
is called between parses.
@@ -419,7 +395,7 @@ i.e., those where
.Fn mparse_readfd
returned less than MANDOCLEVEL_FATAL
.Pc
-should invoke this function, in which case one of the two pointers will
+should invoke this function, in which case one of the three pointers will
be filled in.
Declared in
.In mandoc.h ,
@@ -473,6 +449,8 @@ The following non-printing characters may be embedded in text strings:
A non-breaking space character.
.It Dv ASCII_HYPH
A soft hyphen.
+.It Dv ASCII_BREAK
+A breakable zero-width space.
.El
.Pp
Escape characters are also passed verbatim into text strings.
@@ -480,11 +458,9 @@ An escape character is a sequence of characters beginning with the
backslash
.Pq Sq \e .
To construct human-readable text, these should be intercepted with
-.Fn mandoc_escape
-and converted with one of
-.Fn mchars_num2char ,
-.Fn mchars_spec2str ,
-and so on.
+.Xr mandoc_escape 3
+and converted with one the functions described in
+.Xr mchars_alloc 3 .
.Ss Man Abstract Syntax Tree
This AST is governed by the ontological rules dictated in
.Xr man 7
@@ -529,7 +505,7 @@ where capitalised non-terminals represent nodes.
.El
.Pp
The only elements capable of nesting other elements are those with
-next-lint scope as documented in
+next-line scope as documented in
.Xr man 7 .
.Ss Mdoc Abstract Syntax Tree
This AST is governed by the ontological
@@ -665,10 +641,13 @@ front-ends to
.Xr mandoc 1
are unable to render them in any meaningful way.
Furthermore, behaviour when encountering badly-nested blocks is not
-consistent across troff implementations, especially when using multiple
+consistent across troff implementations, especially when using multiple
levels of badly-nested blocks.
.Sh SEE ALSO
.Xr mandoc 1 ,
+.Xr mandoc_escape 3 ,
+.Xr mandoc_malloc 3 ,
+.Xr mchars_alloc 3 ,
.Xr eqn 7 ,
.Xr man 7 ,
.Xr mandoc_char 7 ,
diff --git a/mandoc.c b/mandoc.c
index da738f20fa72..dd2227299931 100644
--- a/mandoc.c
+++ b/mandoc.c
@@ -1,7 +1,7 @@
-/* $Id: mandoc.c,v 1.74 2013/12/30 18:30:32 schwarze Exp $ */
+/* $Id: mandoc.c,v 1.83 2014/07/06 19:09:00 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2011, 2012, 2013 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2011, 2012, 2013, 2014 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -31,6 +31,7 @@
#include <time.h>
#include "mandoc.h"
+#include "mandoc_aux.h"
#include "libmandoc.h"
#define DATESIZE 32
@@ -45,7 +46,7 @@ mandoc_escape(const char **end, const char **start, int *sz)
const char *local_start;
int local_sz;
char term;
- enum mandoc_esc gly;
+ enum mandoc_esc gly;
/*
* When the caller doesn't provide return storage,
@@ -74,11 +75,11 @@ mandoc_escape(const char **end, const char **start, int *sz)
* these, but each eventually returns a substring of the glyph
* name.
*/
- case ('('):
+ case '(':
gly = ESCAPE_SPECIAL;
*sz = 2;
break;
- case ('['):
+ case '[':
gly = ESCAPE_SPECIAL;
/*
* Unicode escapes are defined in groff as \[uXXXX] to
@@ -90,7 +91,7 @@ mandoc_escape(const char **end, const char **start, int *sz)
gly = ESCAPE_UNICODE;
term = ']';
break;
- case ('C'):
+ case 'C':
if ('\'' != **start)
return(ESCAPE_ERROR);
*start = ++*end;
@@ -104,50 +105,50 @@ mandoc_escape(const char **end, const char **start, int *sz)
/*
* Escapes taking no arguments at all.
*/
- case ('d'):
+ case 'd':
/* FALLTHROUGH */
- case ('u'):
+ case 'u':
return(ESCAPE_IGNORE);
/*
* The \z escape is supposed to output the following
- * character without advancing the cursor position.
+ * character without advancing the cursor position.
* Since we are mostly dealing with terminal mode,
* let us just skip the next character.
*/
- case ('z'):
+ case 'z':
return(ESCAPE_SKIPCHAR);
/*
* Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where
* 'X' is the trigger. These have opaque sub-strings.
*/
- case ('F'):
+ case 'F':
/* FALLTHROUGH */
- case ('g'):
+ case 'g':
/* FALLTHROUGH */
- case ('k'):
+ case 'k':
/* FALLTHROUGH */
- case ('M'):
+ case 'M':
/* FALLTHROUGH */
- case ('m'):
+ case 'm':
/* FALLTHROUGH */
- case ('n'):
+ case 'n':
/* FALLTHROUGH */
- case ('V'):
+ case 'V':
/* FALLTHROUGH */
- case ('Y'):
+ case 'Y':
gly = ESCAPE_IGNORE;
/* FALLTHROUGH */
- case ('f'):
+ case 'f':
if (ESCAPE_ERROR == gly)
gly = ESCAPE_FONT;
switch (**start) {
- case ('('):
+ case '(':
*start = ++*end;
*sz = 2;
break;
- case ('['):
+ case '[':
*start = ++*end;
term = ']';
break;
@@ -160,60 +161,59 @@ mandoc_escape(const char **end, const char **start, int *sz)
/*
* These escapes are of the form \X'Y', where 'X' is the trigger
* and 'Y' is any string. These have opaque sub-strings.
+ * The \B and \w escapes are handled in roff.c, roff_res().
*/
- case ('A'):
+ case 'A':
/* FALLTHROUGH */
- case ('b'):
+ case 'b':
/* FALLTHROUGH */
- case ('B'):
+ case 'D':
/* FALLTHROUGH */
- case ('D'):
+ case 'o':
/* FALLTHROUGH */
- case ('o'):
+ case 'R':
/* FALLTHROUGH */
- case ('R'):
+ case 'X':
/* FALLTHROUGH */
- case ('w'):
- /* FALLTHROUGH */
- case ('X'):
- /* FALLTHROUGH */
- case ('Z'):
- if ('\'' != **start)
+ case 'Z':
+ if ('\0' == **start)
return(ESCAPE_ERROR);
gly = ESCAPE_IGNORE;
+ term = **start;
*start = ++*end;
- term = '\'';
break;
/*
* These escapes are of the form \X'N', where 'X' is the trigger
* and 'N' resolves to a numerical expression.
*/
- case ('h'):
+ case 'h':
/* FALLTHROUGH */
- case ('H'):
+ case 'H':
/* FALLTHROUGH */
- case ('L'):
+ case 'L':
/* FALLTHROUGH */
- case ('l'):
+ case 'l':
/* FALLTHROUGH */
- case ('S'):
+ case 'S':
/* FALLTHROUGH */
- case ('v'):
+ case 'v':
/* FALLTHROUGH */
- case ('x'):
- if ('\'' != **start)
+ case 'x':
+ if (strchr(" %&()*+-./0123456789:<=>", **start)) {
+ ++*end;
return(ESCAPE_ERROR);
+ }
gly = ESCAPE_IGNORE;
+ term = **start;
*start = ++*end;
- term = '\'';
break;
/*
* Special handling for the numbered character escape.
* XXX Do any other escapes need similar handling?
*/
- case ('N'):
+ case 'N':
if ('\0' == **start)
return(ESCAPE_ERROR);
(*end)++;
@@ -229,10 +229,10 @@ mandoc_escape(const char **end, const char **start, int *sz)
(*end)++;
return(ESCAPE_NUMBERED);
- /*
+ /*
* Sizes get a special category of their own.
*/
- case ('s'):
+ case 's':
gly = ESCAPE_IGNORE;
/* See +/- counts as a sign. */
@@ -240,15 +240,15 @@ mandoc_escape(const char **end, const char **start, int *sz)
(*end)++;
switch (**end) {
- case ('('):
+ case '(':
*start = ++*end;
*sz = 2;
break;
- case ('['):
+ case '[':
*start = ++*end;
term = ']';
break;
- case ('\''):
+ case '\'':
*start = ++*end;
term = '\'';
break;
@@ -280,9 +280,9 @@ mandoc_escape(const char **end, const char **start, int *sz)
if ('\0' != term) {
while (**end != term) {
switch (**end) {
- case ('\0'):
+ case '\0':
return(ESCAPE_ERROR);
- case ('\\'):
+ case '\\':
(*end)++;
if (ESCAPE_ERROR ==
mandoc_escape(end, NULL, NULL))
@@ -304,7 +304,7 @@ mandoc_escape(const char **end, const char **start, int *sz)
/* Run post-processors. */
switch (gly) {
- case (ESCAPE_FONT):
+ case ESCAPE_FONT:
if (2 == *sz) {
if ('C' == **start) {
/*
@@ -322,27 +322,27 @@ mandoc_escape(const char **end, const char **start, int *sz)
break;
switch (**start) {
- case ('3'):
+ case '3':
/* FALLTHROUGH */
- case ('B'):
+ case 'B':
gly = ESCAPE_FONTBOLD;
break;
- case ('2'):
+ case '2':
/* FALLTHROUGH */
- case ('I'):
+ case 'I':
gly = ESCAPE_FONTITALIC;
break;
- case ('P'):
+ case 'P':
gly = ESCAPE_FONTPREV;
break;
- case ('1'):
+ case '1':
/* FALLTHROUGH */
- case ('R'):
+ case 'R':
gly = ESCAPE_FONTROMAN;
break;
}
break;
- case (ESCAPE_SPECIAL):
+ case ESCAPE_SPECIAL:
if (1 == *sz && 'c' == **start)
gly = ESCAPE_NOSPACE;
break;
@@ -353,74 +353,6 @@ mandoc_escape(const char **end, const char **start, int *sz)
return(gly);
}
-void *
-mandoc_calloc(size_t num, size_t size)
-{
- void *ptr;
-
- ptr = calloc(num, size);
- if (NULL == ptr) {
- perror(NULL);
- exit((int)MANDOCLEVEL_SYSERR);
- }
-
- return(ptr);
-}
-
-
-void *
-mandoc_malloc(size_t size)
-{
- void *ptr;
-
- ptr = malloc(size);
- if (NULL == ptr) {
- perror(NULL);
- exit((int)MANDOCLEVEL_SYSERR);
- }
-
- return(ptr);
-}
-
-
-void *
-mandoc_realloc(void *ptr, size_t size)
-{
-
- ptr = realloc(ptr, size);
- if (NULL == ptr) {
- perror(NULL);
- exit((int)MANDOCLEVEL_SYSERR);
- }
-
- return(ptr);
-}
-
-char *
-mandoc_strndup(const char *ptr, size_t sz)
-{
- char *p;
-
- p = mandoc_malloc(sz + 1);
- memcpy(p, ptr, sz);
- p[(int)sz] = '\0';
- return(p);
-}
-
-char *
-mandoc_strdup(const char *ptr)
-{
- char *p;
-
- p = strdup(ptr);
- if (NULL == p) {
- perror(NULL);
- exit((int)MANDOCLEVEL_SYSERR);
- }
-
- return(p);
-}
-
/*
* Parse a quoted or unquoted roff-style request or macro argument.
* Return a pointer to the parsed argument, which is either the original
@@ -442,7 +374,7 @@ mandoc_getarg(struct mparse *parse, char **cpp, int ln, int *pos)
if ('"' == *start) {
quoted = 1;
start++;
- }
+ }
pairs = 0;
white = 0;
@@ -461,14 +393,14 @@ mandoc_getarg(struct mparse *parse, char **cpp, int ln, int *pos)
* backslashes and backslash-t to literal tabs.
*/
switch (cp[1]) {
- case ('t'):
+ case 't':
cp[0] = '\t';
/* FALLTHROUGH */
- case ('\\'):
+ case '\\':
pairs++;
cp++;
break;
- case (' '):
+ case ' ':
/* Skip escaped blanks. */
if (0 == quoted)
cp++;
@@ -497,7 +429,7 @@ mandoc_getarg(struct mparse *parse, char **cpp, int ln, int *pos)
/* Quoted argument without a closing quote. */
if (1 == quoted)
- mandoc_msg(MANDOCERR_BADQUOTE, parse, ln, *pos, NULL);
+ mandoc_msg(MANDOCERR_ARG_QUOTE, parse, ln, *pos, NULL);
/* NUL-terminate this argument and move to the next one. */
if (pairs)
@@ -511,7 +443,7 @@ mandoc_getarg(struct mparse *parse, char **cpp, int ln, int *pos)
*cpp = cp;
if ('\0' == *cp && (white || ' ' == cp[-1]))
- mandoc_msg(MANDOCERR_EOLNSPACE, parse, ln, *pos, NULL);
+ mandoc_msg(MANDOCERR_SPACE_EOL, parse, ln, *pos, NULL);
return(start);
}
@@ -579,14 +511,14 @@ mandoc_normdate(struct mparse *parse, char *in, int ln, int pos)
if (NULL == in || '\0' == *in ||
0 == strcmp(in, "$" "Mdocdate$")) {
- mandoc_msg(MANDOCERR_NODATE, parse, ln, pos, NULL);
+ mandoc_msg(MANDOCERR_DATE_MISSING, parse, ln, pos, NULL);
time(&t);
}
else if (a2time(&t, "%Y-%m-%d", in))
t = 0;
else if (!a2time(&t, "$" "Mdocdate: %b %d %Y $", in) &&
!a2time(&t, "%b %d, %Y", in)) {
- mandoc_msg(MANDOCERR_BADDATE, parse, ln, pos, NULL);
+ mandoc_msg(MANDOCERR_DATE_BAD, parse, ln, pos, in);
t = 0;
}
out = t ? time2a(t) : NULL;
@@ -594,10 +526,10 @@ mandoc_normdate(struct mparse *parse, char *in, int ln, int pos)
}
int
-mandoc_eos(const char *p, size_t sz, int enclosed)
+mandoc_eos(const char *p, size_t sz)
{
- const char *q;
- int found;
+ const char *q;
+ int enclosed, found;
if (0 == sz)
return(0);
@@ -608,24 +540,24 @@ mandoc_eos(const char *p, size_t sz, int enclosed)
* propagate outward.
*/
- found = 0;
+ enclosed = found = 0;
for (q = p + (int)sz - 1; q >= p; q--) {
switch (*q) {
- case ('\"'):
+ case '\"':
/* FALLTHROUGH */
- case ('\''):
+ case '\'':
/* FALLTHROUGH */
- case (']'):
+ case ']':
/* FALLTHROUGH */
- case (')'):
+ case ')':
if (0 == found)
enclosed = 1;
break;
- case ('.'):
+ case '.':
/* FALLTHROUGH */
- case ('!'):
+ case '!':
/* FALLTHROUGH */
- case ('?'):
+ case '?':
found = 1;
break;
default:
diff --git a/mandoc.db.5 b/mandoc.db.5
new file mode 100644
index 000000000000..8d5649578f54
--- /dev/null
+++ b/mandoc.db.5
@@ -0,0 +1,144 @@
+.\" $Id: mandoc.db.5,v 1.1 2014/04/15 20:18:26 schwarze Exp $
+.\"
+.\" Copyright (c) 2014 Ingo Schwarze <schwarze@openbsd.org>
+.\"
+.\" Permission to use, copy, modify, and distribute this software for any
+.\" purpose with or without fee is hereby granted, provided that the above
+.\" copyright notice and this permission notice appear in all copies.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+.\"
+.Dd $Mdocdate: April 15 2014 $
+.Dt MANDOC.DB 5
+.Os
+.Sh NAME
+.Nm mandoc.db
+.Nd manual page database
+.Sh DESCRIPTION
+The
+.Nm
+SQLite3 file format is used to store information about installed manual
+pages to facilitate semantic searching for manuals.
+Each manual page tree contains its own
+.Nm
+file; see
+.Sx FILES
+for examples.
+.Pp
+Such database files are generated by
+.Xr makewhatis 8
+and used by
+.Xr apropos 1
+and
+.Xr whatis 1 .
+.Pp
+One line in the following tables describes:
+.Bl -tag -width Ds
+.It Sy mpages
+One physical manual page file, no matter how many times and under which
+names it may appear in the file system.
+.It Sy mlinks
+One entry in the file system, no matter which content it points to.
+.It Sy names
+One manual page name, no matter whether it appears in a page header,
+in a NAME or SYNOPSIS section, or as a file name.
+.It Sy keys
+One chunk of text from some macro invocation.
+.El
+.Pp
+Each record in the latter three tables uses its
+.Va pageid
+column to point to a record in the
+.Sy mpages
+table.
+.Pp
+The other columns are as follows; unless stated otherwise, they are
+of type
+.Vt TEXT .
+.Bl -tag -width mpages.desc
+.It Sy mpages.desc
+The description line
+.Pq Sq \&Nd
+of the page.
+.It Sy mpages.form
+The
+.Vt INTEGER
+1 if the page is unformatted, i.e. in
+.Xr mdoc 7
+or
+.Xr man 7
+format, and 2 if it is formatted, i.e. a
+.Sq cat
+page.
+.It Sy mlinks.sec
+The manual section as found in the subdirectory name.
+.It Sy mlinks.arch
+The manual architecture as found in the subdirectory name, or
+.Qq any .
+.It Sy mlinks.name
+The manual name as found in the file name.
+.It Sy names.bits
+An
+.Vt INTEGER
+bit mask telling whether the name came from a header line, from the
+NAME or SYNOPSIS section, or from a file name.
+Bits are defined in
+.In mansearch.h .
+.It Sy names.name
+The name itself.
+.It Sy keys.bits
+An
+.Vt INTEGER
+bit mask telling which semantic contexts the key was found in;
+defined in
+.In mansearch.h ,
+documented in
+.Xr apropos 1 .
+.It Sy keys.key
+The string found in those contexts.
+.El
+.Sh FILES
+.Bl -tag -width /usr/share/mandoc.db -compact
+.It Pa /usr/share/mandoc.db
+The manual page database for the base system.
+.It Pa /usr/X11R6/mandoc.db
+The same for the
+.Xr X 7
+Window System.
+.It Pa /usr/local/mandoc.db
+The same for
+.Xr packages 7 .
+.El
+.Sh SEE ALSO
+.Xr apropos 1 ,
+.Xr man 1 ,
+.Xr sqlite3 1 ,
+.Xr whatis 1 ,
+.Xr mansearch 3 ,
+.Xr makewhatis 8
+.Sh HISTORY
+A manual page database
+.Pa /usr/lib/whatis
+first appeared in
+.Bx 2 .
+The present format first appeared in
+.Ox 5.6 .
+.Sh AUTHORS
+.An -nosplit
+The original version of
+.Xr makewhatis 8
+was written by
+.An Bill Joy
+in 1979.
+An SQLite3 version was first implemented by
+.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv
+in 2012.
+The present database format was designed by
+.An Ingo Schwarze Aq Mt schwarze@openbsd.org
+in 2014.
diff --git a/mandoc.h b/mandoc.h
index 4c6a32f7a62d..fd91314d76e8 100644
--- a/mandoc.h
+++ b/mandoc.h
@@ -1,7 +1,7 @@
-/* $Id: mandoc.h,v 1.112 2013/12/30 18:30:32 schwarze Exp $ */
+/* $Id: mandoc.h,v 1.152 2014/08/06 15:09:05 schwarze Exp $ */
/*
* Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2012, 2013 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -20,6 +20,7 @@
#define ASCII_NBRSP 31 /* non-breaking space */
#define ASCII_HYPH 30 /* breakable hyphen */
+#define ASCII_BREAK 29 /* breakable zero-width space */
/*
* Status level. This refers to both internal status (i.e., whilst
@@ -48,66 +49,78 @@ enum mandocerr {
MANDOCERR_WARNING, /* ===== start of warnings ===== */
/* related to the prologue */
- MANDOCERR_NOTITLE, /* no title in document */
- MANDOCERR_UPPERCASE, /* document title should be all caps */
- MANDOCERR_BADMSEC, /* unknown manual section */
- MANDOCERR_BADVOLARCH, /* unknown manual volume or arch */
- MANDOCERR_NODATE, /* date missing, using today's date */
- MANDOCERR_BADDATE, /* cannot parse date, using it verbatim */
- MANDOCERR_PROLOGOOO, /* prologue macros out of order */
- MANDOCERR_PROLOGREP, /* duplicate prologue macro */
- MANDOCERR_BADPROLOG, /* macro not allowed in prologue */
- MANDOCERR_BADBODY, /* macro not allowed in body */
+ MANDOCERR_DT_NOTITLE, /* missing manual title, using UNTITLED: line */
+ MANDOCERR_TH_NOTITLE, /* missing manual title, using "": [macro] */
+ MANDOCERR_TITLE_CASE, /* lower case character in document title */
+ MANDOCERR_MSEC_MISSING, /* missing manual section, using "": macro */
+ MANDOCERR_MSEC_BAD, /* unknown manual section: Dt ... section */
+ MANDOCERR_ARCH_BAD, /* unknown manual volume or arch: Dt ... volume */
+ MANDOCERR_DATE_MISSING, /* missing date, using today's date */
+ MANDOCERR_DATE_BAD, /* cannot parse date, using it verbatim: date */
+ MANDOCERR_OS_MISSING, /* missing Os macro, using "" */
+ MANDOCERR_PROLOG_REP, /* duplicate prologue macro: macro */
+ MANDOCERR_PROLOG_LATE, /* late prologue macro: macro */
+ MANDOCERR_DT_LATE, /* skipping late title macro: Dt args */
+ MANDOCERR_PROLOG_ORDER, /* prologue macros out of order: macros */
/* related to document structure */
- MANDOCERR_SO, /* .so is fragile, better use ln(1) */
- MANDOCERR_NAMESECFIRST, /* NAME section must come first */
- MANDOCERR_BADNAMESEC, /* bad NAME section contents */
- MANDOCERR_SECOOO, /* sections out of conventional order */
- MANDOCERR_SECREP, /* duplicate section name */
- MANDOCERR_SECMSEC, /* section header suited to sections ... */
+ MANDOCERR_SO, /* .so is fragile, better use ln(1): so path */
+ MANDOCERR_DOC_EMPTY, /* no document body */
+ MANDOCERR_SEC_BEFORE, /* content before first section header: macro */
+ MANDOCERR_NAMESEC_FIRST, /* first section is not NAME: Sh title */
+ MANDOCERR_NAMESEC_BAD, /* bad NAME section contents: macro */
+ MANDOCERR_SEC_ORDER, /* sections out of conventional order: Sh title */
+ MANDOCERR_SEC_REP, /* duplicate section title: Sh title */
+ MANDOCERR_SEC_MSEC, /* unexpected section: Sh title for ... only */
/* related to macros and nesting */
- MANDOCERR_MACROOBS, /* skipping obsolete macro */
- MANDOCERR_IGNPAR, /* skipping paragraph macro */
- MANDOCERR_MOVEPAR, /* moving paragraph macro out of list */
- MANDOCERR_IGNNS, /* skipping no-space macro */
- MANDOCERR_SCOPENEST, /* blocks badly nested */
- MANDOCERR_CHILD, /* child violates parent syntax */
- MANDOCERR_NESTEDDISP, /* nested displays are not portable */
- MANDOCERR_SCOPEREP, /* already in literal mode */
- MANDOCERR_LINESCOPE, /* line scope broken */
-
- /* related to missing macro arguments */
- MANDOCERR_MACROEMPTY, /* skipping empty macro */
+ MANDOCERR_MACRO_OBS, /* obsolete macro: macro */
+ MANDOCERR_PAR_SKIP, /* skipping paragraph macro: macro ... */
+ MANDOCERR_PAR_MOVE, /* moving paragraph macro out of list: macro */
+ MANDOCERR_NS_SKIP, /* skipping no-space macro */
+ MANDOCERR_BLK_NEST, /* blocks badly nested: macro ... */
+ MANDOCERR_BD_NEST, /* nested displays are not portable: macro ... */
+ MANDOCERR_BL_MOVE, /* moving content out of list: macro */
+ MANDOCERR_VT_CHILD, /* .Vt block has child macro: macro */
+ MANDOCERR_FI_SKIP, /* fill mode already enabled, skipping: fi */
+ MANDOCERR_NF_SKIP, /* fill mode already disabled, skipping: nf */
+ MANDOCERR_BLK_LINE, /* line scope broken: macro breaks macro */
+
+ /* related to missing arguments */
+ MANDOCERR_REQ_EMPTY, /* skipping empty request: request */
+ MANDOCERR_COND_EMPTY, /* conditional request controls empty scope */
+ MANDOCERR_MACRO_EMPTY, /* skipping empty macro: macro */
+ MANDOCERR_ARG_EMPTY, /* empty argument, using 0n: macro arg */
MANDOCERR_ARGCWARN, /* argument count wrong */
- MANDOCERR_DISPTYPE, /* missing display type */
- MANDOCERR_LISTFIRST, /* list type must come first */
- MANDOCERR_NOWIDTHARG, /* tag lists require a width argument */
- MANDOCERR_FONTTYPE, /* missing font type */
- MANDOCERR_WNOSCOPE, /* skipping end of block that is not open */
-
- /* related to bad macro arguments */
- MANDOCERR_IGNARGV, /* skipping argument */
- MANDOCERR_ARGVREP, /* duplicate argument */
- MANDOCERR_DISPREP, /* duplicate display type */
- MANDOCERR_LISTREP, /* duplicate list type */
- MANDOCERR_BADATT, /* unknown AT&T UNIX version */
- MANDOCERR_BADBOOL, /* bad Boolean value */
- MANDOCERR_BADFONT, /* unknown font */
- MANDOCERR_BADSTANDARD, /* unknown standard specifier */
- MANDOCERR_BADWIDTH, /* bad width argument */
+ MANDOCERR_BD_NOTYPE, /* missing display type, using -ragged: Bd */
+ MANDOCERR_BL_LATETYPE, /* list type is not the first argument: Bl arg */
+ MANDOCERR_BL_NOWIDTH, /* missing -width in -tag list, using 8n */
+ MANDOCERR_EX_NONAME, /* missing utility name, using "": Ex */
+ MANDOCERR_IT_NOHEAD, /* empty head in list item: Bl -type It */
+ MANDOCERR_IT_NOBODY, /* empty list item: Bl -type It */
+ MANDOCERR_BF_NOFONT, /* missing font type, using \fR: Bf */
+ MANDOCERR_BF_BADFONT, /* unknown font type, using \fR: Bf font */
+ MANDOCERR_ARG_STD, /* missing -std argument, adding it: macro */
+
+ /* related to bad arguments */
+ MANDOCERR_ARG_QUOTE, /* unterminated quoted argument */
+ MANDOCERR_ARG_REP, /* duplicate argument: macro arg */
+ MANDOCERR_AN_REP, /* skipping duplicate argument: An -arg */
+ MANDOCERR_BD_REP, /* skipping duplicate display type: Bd -type */
+ MANDOCERR_BL_REP, /* skipping duplicate list type: Bl -type */
+ MANDOCERR_BL_SKIPW, /* skipping -width argument: Bl -type */
+ MANDOCERR_AT_BAD, /* unknown AT&T UNIX version: At version */
+ MANDOCERR_RS_BAD, /* invalid content in Rs block: macro */
+ MANDOCERR_SM_BAD, /* invalid Boolean argument: macro arg */
+ MANDOCERR_FT_BAD, /* unknown font, skipping request: ft font */
/* related to plain text */
- MANDOCERR_NOBLANKLN, /* blank line in non-literal context */
- MANDOCERR_BADTAB, /* tab in non-literal context */
- MANDOCERR_EOLNSPACE, /* end of line whitespace */
- MANDOCERR_BADCOMMENT, /* bad comment style */
- MANDOCERR_BADESCAPE, /* unknown escape sequence */
- MANDOCERR_BADQUOTE, /* unterminated quoted string */
-
- /* related to equations */
- MANDOCERR_EQNQUOTE, /* unexpected literal in equation */
+ MANDOCERR_FI_BLANK, /* blank line in fill mode, using .sp */
+ MANDOCERR_FI_TAB, /* tab in filled text */
+ MANDOCERR_SPACE_EOL, /* whitespace at end of input line */
+ MANDOCERR_COMMENT_BAD, /* bad comment style */
+ MANDOCERR_ESC_BAD, /* invalid escape sequence: esc */
+ MANDOCERR_STR_UNDEF, /* undefined string, using "": name */
MANDOCERR_ERROR, /* ===== start of errors ===== */
@@ -128,40 +141,40 @@ enum mandocerr {
MANDOCERR_TBLBLOCK, /* data block still open */
MANDOCERR_TBLEXTRADAT, /* ignoring extra data cells */
+ /* related to document structure and macros */
MANDOCERR_ROFFLOOP, /* input stack limit exceeded, infinite loop? */
- MANDOCERR_BADCHAR, /* skipping bad character */
- MANDOCERR_NAMESC, /* escaped character not allowed in a name */
- MANDOCERR_NONAME, /* manual name not yet set */
- MANDOCERR_NOTEXT, /* skipping text before the first section header */
- MANDOCERR_MACRO, /* skipping unknown macro */
- MANDOCERR_REQUEST, /* NOT IMPLEMENTED: skipping request */
+ MANDOCERR_BADCHAR, /* skipping bad character: number */
+ MANDOCERR_MACRO, /* skipping unknown macro: macro */
+ MANDOCERR_IT_STRAY, /* skipping item outside list: It ... */
+ MANDOCERR_TA_STRAY, /* skipping column outside column list: Ta */
+ MANDOCERR_BLK_NOTOPEN, /* skipping end of block that is not open */
+ MANDOCERR_BLK_BROKEN, /* inserting missing end of block: macro ... */
+ MANDOCERR_BLK_NOEND, /* appending missing end of block: macro */
+
+ /* related to request and macro arguments */
+ MANDOCERR_NAMESC, /* escaped character not allowed in a name: name */
MANDOCERR_ARGCOUNT, /* argument count wrong */
- MANDOCERR_STRAYTA, /* skipping column outside column list */
- MANDOCERR_NOSCOPE, /* skipping end of block that is not open */
- MANDOCERR_SCOPEBROKEN, /* missing end of block */
- MANDOCERR_SCOPEEXIT, /* scope open on exit */
- MANDOCERR_UNAME, /* uname(3) system call failed */
- /* FIXME: merge following with MANDOCERR_ARGCOUNT */
- MANDOCERR_NOARGS, /* macro requires line argument(s) */
- MANDOCERR_NOBODY, /* macro requires body argument(s) */
- MANDOCERR_NOARGV, /* macro requires argument(s) */
- MANDOCERR_NUMERIC, /* request requires a numeric argument */
- MANDOCERR_LISTTYPE, /* missing list type */
- MANDOCERR_ARGSLOST, /* line argument(s) will be lost */
- MANDOCERR_BODYLOST, /* body argument(s) will be lost */
+ MANDOCERR_BL_NOTYPE, /* missing list type, using -item: Bl */
+ MANDOCERR_NM_NONAME, /* missing manual name, using "": Nm */
+ MANDOCERR_OS_UNAME, /* uname(3) system call failed, using UNKNOWN */
+ MANDOCERR_ST_BAD, /* unknown standard specifier: St standard */
+ MANDOCERR_IT_NONUM, /* skipping request without numeric argument */
+ MANDOCERR_ARG_SKIP, /* skipping all arguments: macro args */
+ MANDOCERR_ARG_EXCESS, /* skipping excess arguments: macro ... args */
MANDOCERR_FATAL, /* ===== start of fatal errors ===== */
- MANDOCERR_NOTMANUAL, /* manual isn't really a manual */
- MANDOCERR_COLUMNS, /* column syntax is inconsistent */
- MANDOCERR_BADDISP, /* NOT IMPLEMENTED: .Bd -file */
- MANDOCERR_SYNTARGVCOUNT, /* argument count wrong, violates syntax */
- MANDOCERR_SYNTCHILD, /* child violates parent syntax */
- MANDOCERR_SYNTARGCOUNT, /* argument count wrong, violates syntax */
- MANDOCERR_SOPATH, /* NOT IMPLEMENTED: .so with absolute path or ".." */
- MANDOCERR_NODOCBODY, /* no document body */
- MANDOCERR_NODOCPROLOG, /* no document prologue */
- MANDOCERR_MEM, /* static buffer exhausted */
+ MANDOCERR_TOOLARGE, /* input too large */
+ MANDOCERR_BD_FILE, /* NOT IMPLEMENTED: Bd -file */
+ MANDOCERR_SO_PATH, /* NOT IMPLEMENTED: .so with absolute path or ".." */
+ MANDOCERR_SO_FAIL, /* .so request failed */
+
+ /* ===== system errors ===== */
+
+ MANDOCERR_SYSOPEN, /* cannot open file */
+ MANDOCERR_SYSSTAT, /* cannot stat file */
+ MANDOCERR_SYSREAD, /* cannot read file */
+
MANDOCERR_MAX
};
@@ -231,6 +244,7 @@ struct tbl_row {
struct tbl_row *next;
struct tbl_cell *first;
struct tbl_cell *last;
+ int vert; /* trailing vertical line */
};
enum tbl_datt {
@@ -353,7 +367,7 @@ struct eqn_box {
/*
* An equation consists of a tree of expressions starting at a given
- * line and position.
+ * line and position.
*/
struct eqn {
char *name; /* identifier (or NULL) */
@@ -363,15 +377,12 @@ struct eqn {
};
/*
- * The type of parse sequence. This value is usually passed via the
- * mandoc(1) command line of -man and -mdoc. It's almost exclusively
- * -mandoc but the others have been retained for compatibility.
+ * Parse options.
*/
-enum mparset {
- MPARSE_AUTO, /* magically determine the document type */
- MPARSE_MDOC, /* assume -mdoc */
- MPARSE_MAN /* assume -man */
-};
+#define MPARSE_MDOC 1 /* assume -mdoc */
+#define MPARSE_MAN 2 /* assume -man */
+#define MPARSE_SO 4 /* honour .so requests */
+#define MPARSE_QUICK 8 /* abort the parse early */
enum mandoc_esc {
ESCAPE_ERROR = 0, /* bail! unparsable escape */
@@ -399,30 +410,25 @@ struct man;
__BEGIN_DECLS
-void *mandoc_calloc(size_t, size_t);
enum mandoc_esc mandoc_escape(const char **, const char **, int *);
-void *mandoc_malloc(size_t);
-void *mandoc_realloc(void *, size_t);
-char *mandoc_strdup(const char *);
-char *mandoc_strndup(const char *, size_t);
struct mchars *mchars_alloc(void);
void mchars_free(struct mchars *);
-char mchars_num2char(const char *, size_t);
+char mchars_num2char(const char *, size_t);
int mchars_num2uc(const char *, size_t);
-int mchars_spec2cp(const struct mchars *,
+int mchars_spec2cp(const struct mchars *,
const char *, size_t);
-const char *mchars_spec2str(const struct mchars *,
+const char *mchars_spec2str(const struct mchars *,
const char *, size_t, size_t *);
-struct mparse *mparse_alloc(enum mparset, enum mandoclevel,
- mandocmsg, void *, char *);
+struct mparse *mparse_alloc(int, enum mandoclevel, mandocmsg,
+ const char *);
void mparse_free(struct mparse *);
void mparse_keep(struct mparse *);
enum mandoclevel mparse_readfd(struct mparse *, int, const char *);
enum mandoclevel mparse_readmem(struct mparse *, const void *, size_t,
const char *);
void mparse_reset(struct mparse *);
-void mparse_result(struct mparse *,
- struct mdoc **, struct man **);
+void mparse_result(struct mparse *,
+ struct mdoc **, struct man **, char **);
const char *mparse_getkeep(const struct mparse *);
const char *mparse_strerror(enum mandocerr);
const char *mparse_strlevel(enum mandoclevel);
diff --git a/mandoc_aux.c b/mandoc_aux.c
new file mode 100644
index 000000000000..b5376735ceea
--- /dev/null
+++ b/mandoc_aux.c
@@ -0,0 +1,121 @@
+/* $Id: mandoc_aux.c,v 1.3 2014/07/09 08:20:34 schwarze Exp $ */
+/*
+ * Copyright (c) 2009, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ * Copyright (c) 2014 Ingo Schwarze <schwarze@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/types.h>
+
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "mandoc.h"
+#include "mandoc_aux.h"
+
+int
+mandoc_asprintf(char **dest, const char *fmt, ...)
+{
+ va_list ap;
+ int ret;
+
+ va_start(ap, fmt);
+ ret = vasprintf(dest, fmt, ap);
+ va_end(ap);
+
+ if (-1 == ret) {
+ perror(NULL);
+ exit((int)MANDOCLEVEL_SYSERR);
+ }
+ return(ret);
+}
+
+void *
+mandoc_calloc(size_t num, size_t size)
+{
+ void *ptr;
+
+ ptr = calloc(num, size);
+ if (NULL == ptr) {
+ perror(NULL);
+ exit((int)MANDOCLEVEL_SYSERR);
+ }
+ return(ptr);
+}
+
+void *
+mandoc_malloc(size_t size)
+{
+ void *ptr;
+
+ ptr = malloc(size);
+ if (NULL == ptr) {
+ perror(NULL);
+ exit((int)MANDOCLEVEL_SYSERR);
+ }
+ return(ptr);
+}
+
+void *
+mandoc_realloc(void *ptr, size_t size)
+{
+
+ ptr = realloc(ptr, size);
+ if (NULL == ptr) {
+ perror(NULL);
+ exit((int)MANDOCLEVEL_SYSERR);
+ }
+ return(ptr);
+}
+
+void *
+mandoc_reallocarray(void *ptr, size_t num, size_t size)
+{
+
+ ptr = reallocarray(ptr, num, size);
+ if (NULL == ptr) {
+ perror(NULL);
+ exit((int)MANDOCLEVEL_SYSERR);
+ }
+ return(ptr);
+}
+
+char *
+mandoc_strdup(const char *ptr)
+{
+ char *p;
+
+ p = strdup(ptr);
+ if (NULL == p) {
+ perror(NULL);
+ exit((int)MANDOCLEVEL_SYSERR);
+ }
+ return(p);
+}
+
+char *
+mandoc_strndup(const char *ptr, size_t sz)
+{
+ char *p;
+
+ p = mandoc_malloc(sz + 1);
+ memcpy(p, ptr, sz);
+ p[(int)sz] = '\0';
+ return(p);
+}
diff --git a/mandoc_aux.h b/mandoc_aux.h
new file mode 100644
index 000000000000..04f4baff6065
--- /dev/null
+++ b/mandoc_aux.h
@@ -0,0 +1,33 @@
+/* $Id: mandoc_aux.h,v 1.2 2014/04/23 21:06:41 schwarze Exp $ */
+/*
+ * Copyright (c) 2009, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ * Copyright (c) 2014 Ingo Schwarze <schwarze@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifndef MANDOC_AUX_H
+#define MANDOC_AUX_H
+
+__BEGIN_DECLS
+
+int mandoc_asprintf(char **, const char *, ...);
+void *mandoc_calloc(size_t, size_t);
+void *mandoc_malloc(size_t);
+void *mandoc_realloc(void *, size_t);
+void *mandoc_reallocarray(void *, size_t, size_t);
+char *mandoc_strdup(const char *);
+char *mandoc_strndup(const char *, size_t);
+
+__END_DECLS
+
+#endif /*!MANDOC_AUX_H*/
diff --git a/mandoc_escape.3 b/mandoc_escape.3
new file mode 100644
index 000000000000..84243fdeb6eb
--- /dev/null
+++ b/mandoc_escape.3
@@ -0,0 +1,362 @@
+.\" $Id: mandoc_escape.3,v 1.1 2014/08/05 05:48:56 schwarze Exp $
+.\"
+.\" Copyright (c) 2014 Ingo Schwarze <schwarze@openbsd.org>
+.\"
+.\" Permission to use, copy, modify, and distribute this software for any
+.\" purpose with or without fee is hereby granted, provided that the above
+.\" copyright notice and this permission notice appear in all copies.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+.\"
+.Dd $Mdocdate: August 5 2014 $
+.Dt MANDOC_ESCAPE 3
+.Os
+.Sh NAME
+.Nm mandoc_escape
+.Nd parse roff escape sequences
+.Sh LIBRARY
+.Lb libmandoc
+.Sh SYNOPSIS
+.In sys/types.h
+.In mandoc.h
+.Ft "enum mandoc_esc"
+.Fo mandoc_escape
+.Fa "const char **end"
+.Fa "const char **start"
+.Fa "int *sz"
+.Fc
+.Sh DESCRIPTION
+This function scans a
+.Xr roff 7
+escape sequence.
+.Pp
+An escape sequence consists of
+.Bl -dash -compact -width 2n
+.It
+an initial backslash character
+.Pq Sq \e ,
+.It
+a single ASCII character called the escape sequence identifier,
+.It
+and, with only a few exceptions, an argument.
+.El
+.Pp
+Arguments can be given in the following forms; some escape sequence
+identifiers only accept some of these forms as specified below.
+The first three forms are called the standard forms.
+.Bl -tag -width 2n
+.It \&In brackets: Ic \&[ Ns Ar argument Ns Ic \&]
+The argument starts after the initial
+.Sq \&[ ,
+ends before the final
+.Sq \&] ,
+and the escape sequence ends with the final
+.Sq \&] .
+.It Two-character argument short form: Ic \&( Ns Ar ar
+This form can only be used for arguments
+consisting of exactly two characters.
+It has the same effect as
+.Ic \&[ Ns Ar ar Ns Ic \&] .
+.It One-character argument short form: Ar a
+This form can only be used for arguments
+consisting of exactly one character.
+It has the same effect as
+.Ic \&[ Ns Ar a Ns Ic \&] .
+.It Delimited form: Ar C Ns Ar argument Ns Ar C
+The argument starts after the initial delimiter character
+.Ar C ,
+ends before the next occurrence of the delimiter character
+.Ar C ,
+and the escape sequence ends with that second
+.Ar C .
+Some escape sequences allow arbitrary characters
+.Ar C
+as quoting characters, some restrict the range of characters
+that can be used as quoting characters.
+.El
+.Pp
+Upon function entry,
+.Fa end
+is expected to point to the escape sequence identifier.
+The values passed in as
+.Fa start
+and
+.Fa sz
+are ignored and overwritten.
+.Pp
+By design, this function cannot handle those
+.Xr roff 7
+escape sequences that require in-place expansion, in particular
+user-defined strings
+.Ic \e* ,
+number registers
+.Ic \en ,
+width measurements
+.Ic \ew ,
+and numerical expression control
+.Ic \eB .
+These are handled by
+.Fn roff_res ,
+a private preprocessor function called from
+.Fn roff_parseln ,
+see the file
+.Pa roff.c .
+.Pp
+The function
+.Fn mandoc_escape
+is used
+.Bl -dash -compact -width 2n
+.It
+recursively by itself, because some escape sequence arguments can
+in turn contain other escape sequences,
+.It
+for error detection internally by the
+.Xr roff 7
+parser part of the
+.Lb libmandoc ,
+see the file
+.Pa roff.c ,
+.It
+above all externally by the
+.Xr mandoc
+formatting modules, in particular
+.Fl Tascii
+and
+.Fl Thtml ,
+for formatting purposes, see the files
+.Pa term.c
+and
+.Pa html.c ,
+.It
+and rarely externally by high-level utilities using the mandoc library,
+for example
+.Xr makewhatis 8 ,
+to purge escape sequences from text.
+.El
+.Sh RETURN VALUES
+Upon function return, the pointer
+.Fa end
+is set to the character after the end of the escape sequence,
+such that the calling higher-level parser can easily continue.
+.Pp
+For escape sequences taking an argument, the pointer
+.Fa start
+is set to the beginning of the argument and
+.Fa sz
+is set to the length of the argument.
+For escape sequences not taking an argument,
+.Fa start
+is set to the character after the end of the sequence and
+.Fa sz
+is set to 0.
+Both
+.Fa start
+and
+.Fa sz
+may be
+.Dv NULL ;
+in that case, the argument and the length are not returned.
+.Pp
+For sequences taking an argument, the function
+.Fn mandoc_escape
+returns one of the following values:
+.Bl -tag -width 2n
+.It Dv ESCAPE_FONT
+The escape sequence
+.Ic \ef
+taking an argument in standard form:
+.Ic \ef[ , \ef( , \ef Ns Ar a .
+Two-character arguments starting with the character
+.Sq C
+are reduced to one-character arguments by skipping the
+.Sq C .
+More specific values are returned for the most commonly used arguments:
+.Bl -column "argument" "ESCAPE_FONTITALIC"
+.It argument Ta return value
+.It Cm R No or Cm 1 Ta Dv ESCAPE_FONTROMAN
+.It Cm I No or Cm 2 Ta Dv ESCAPE_FONTITALIC
+.It Cm B No or Cm 3 Ta Dv ESCAPE_FONTBOLD
+.It Cm P Ta Dv ESCAPE_FONTPREV
+.It Cm BI Ta Dv ESCAPE_FONTBI
+.El
+.It Dv ESCAPE_SPECIAL
+The escape sequence
+.Ic \eC
+taking an argument delimited with the single quote character
+and, as a special exception, the escape sequences
+.Em not
+having an identifier, that is, those where the argument, in standard
+form, directly follows the initial backslash:
+.Ic \eC' , \e[ , \e( , \e Ns Ar a .
+Note that the one-character argument short form can only be used for
+argument characters that do not clash with escape sequence identifiers.
+.Pp
+If the argument consists of more than one character
+and starts with the character
+.Sq u ,
+.Dv ESCAPE_UNICODE
+is returned as described below.
+If the argument is just the single character
+.Sq u ,
+.Dv ESCAPE_ERROR
+is returned.
+.Pp
+The
+.Dv ESCAPE_SPECIAL
+special character escape sequences can be rendered using the functions
+.Fn mchars_spec2cp
+and
+.Fn mchars_spec2str
+described in the
+.Xr mchars_alloc 3
+manual.
+.It Dv ESCAPE_UNICODE
+Escape sequences of the same format as described above under
+.Dv ESCAPE_SPECIAL ,
+but with an argument starting with the character
+.Sq u :
+.Ic \eC'u , \e[u .
+As a special exception,
+.Fa start
+is set to the character after the
+.Sq u ,
+and the
+.Fa sz
+return value does not include the
+.Sq u
+either.
+.Pp
+Such Unicode character escape sequences can be rendered using the function
+.Fn mchars_num2uc
+described in the
+.Xr mchars_alloc 3
+manual.
+.It Dv ESCAPE_NUMBERED
+The escape sequence
+.Ic \eN
+followed by a delimited argument.
+The delimiter character is arbitrary except that digits cannot be used.
+If a digit is encountered instead of the opening delimiter, that
+digit is considered to be the argument and the end of the sequence, and
+.Dv ESCAPE_IGNORE
+is returned.
+.Pp
+Such ASCII character escape sequences can be rendered using the function
+.Fn mchars_num2char
+described in the
+.Xr mchars_alloc 3
+manual.
+.It Dv ESCAPE_IGNORE
+.Bl -bullet -width 2n
+.It
+The escape sequence
+.Ic \es
+followed by an argument in standard form or by an argument delimited
+by the single quote character:
+.Ic \es' , \es[ , \es( , \es Ns Ar a .
+As a special exception, an optional
+.Sq +
+or
+.Sq \-
+character is allowed after the
+.Sq s
+for all forms.
+.It
+The escape sequences
+.Ic \eF ,
+.Ic \eg ,
+.Ic \ek ,
+.Ic \eM ,
+.Ic \em ,
+.Ic \en ,
+.Ic \eV ,
+and
+.Ic \eY
+followed by an argument in standard form.
+.It
+The escape sequences
+.Ic \eA ,
+.Ic \eb ,
+.Ic \eD ,
+.Ic \eo ,
+.Ic \eR ,
+.Ic \eX ,
+and
+.Ic \eZ
+followed by an argument delimited by an arbitrary character.
+.It
+The escape sequences
+.Ic \eH ,
+.Ic \eh ,
+.Ic \eL ,
+.Ic \el ,
+.Ic \eS ,
+.Ic \ev ,
+and
+.Ic \ex
+followed by an argument delimited by a character that cannot occur
+in numerical expressions.
+However, if any character that can occur in numerical expressions
+is found instead of a delimiter, the sequence is considered to end
+with that character, and
+.Dv ESCAPE_ERROR
+is returned.
+.El
+.It Dv ESCAPE_ERROR
+Escape sequences taking an argument but not matching any of the above patterns.
+In particular, that happens if the end of the logical input line
+is reached before the end of the argument.
+.El
+.Pp
+For sequences that do not take an argument, the function
+.Fn mandoc_escape
+returns one of the following values:
+.Bl -tag -width 2n
+.It Dv ESCAPE_SKIPCHAR
+The escape sequence
+.Qq \ez .
+.It Dv ESCAPE_NOSPACE
+The escape sequence
+.Qq \ec .
+.It Dv ESCAPE_IGNORE
+The escape sequences
+.Qq \ed
+and
+.Qq \eu .
+.El
+.Sh FILES
+This function is implemented in
+.Pa mandoc.c .
+.Sh SEE ALSO
+.Xr mchars_alloc 3 ,
+.Xr mandoc_char 7 ,
+.Xr roff 7
+.Sh HISTORY
+This function has been available since mandoc 1.11.2.
+.Sh AUTHORS
+.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv
+.An Ingo Schwarze Aq Mt schwarze@openbsd.org
+.Sh BUGS
+The function doesn't cleanly distinguish between sequences that are
+valid and supported, valid and ignored, valid and unsupported,
+syntactically invalid, or undefined.
+For sequences that are ignored or unsupported, it doesn't tell
+whether that deficiency is likely to cause major formatting problems
+and/or loss of document content.
+The function is already rather complicated and still parses some
+sequences incorrectly.
+.
+.ig
+For these sequences, the list given below specifies a starting string
+and either the length of the argument or an ending character.
+The argument starts after the starting string.
+In the former case, the sequence ends with the end of the argument.
+In the latter case, the argument ends before the ending character,
+and the sequence ends with the ending character.
+..
diff --git a/mandoc_html.3 b/mandoc_html.3
new file mode 100644
index 000000000000..994eb3a288e7
--- /dev/null
+++ b/mandoc_html.3
@@ -0,0 +1,249 @@
+.\" $Id: mandoc_html.3,v 1.1 2014/07/23 18:13:09 schwarze Exp $
+.\"
+.\" Copyright (c) 2014 Ingo Schwarze <schwarze@openbsd.org>
+.\"
+.\" Permission to use, copy, modify, and distribute this software for any
+.\" purpose with or without fee is hereby granted, provided that the above
+.\" copyright notice and this permission notice appear in all copies.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+.\"
+.Dd $Mdocdate: July 23 2014 $
+.Dt MANDOC_HTML 3
+.Os
+.Sh NAME
+.Nm mandoc_html
+.Nd internals of the mandoc HTML formatter
+.Sh SYNOPSIS
+.In "html.h"
+.Ft void
+.Fn print_gen_decls "struct html *h"
+.Ft void
+.Fn print_gen_head "struct html *h"
+.Ft struct tag *
+.Fo print_otag
+.Fa "struct html *h"
+.Fa "enum htmltag tag"
+.Fa "int sz"
+.Fa "const struct htmlpair *p"
+.Fc
+.Ft void
+.Fo print_tagq
+.Fa "struct html *h"
+.Fa "const struct tag *until"
+.Fc
+.Ft void
+.Fo print_stagq
+.Fa "struct html *h"
+.Fa "const struct tag *suntil"
+.Fc
+.Ft void
+.Fo print_text
+.Fa "struct html *h"
+.Fa "const char *word"
+.Fc
+.Sh DESCRIPTION
+The mandoc HTML formatter is not a formal library.
+However, as it is compiled into more than one program, in particular
+.Xr mandoc 1
+and
+.Xr man.cgi 8 ,
+and because it may be security-critical in some contexts,
+some documentation is useful to help to use it correctly and
+to prevent XSS vulnerabilities.
+.Pp
+The formatter produces HTML output on the standard output.
+Since proper escaping is usually required and best taken care of
+at one central place, the language-specific formatters
+.Po
+.Pa *_html.c ,
+see
+.Sx FILES
+.Pc
+are not supposed to print directly to
+.Dv stdout
+using functions like
+.Xr printf 3 ,
+.Xr putc 3 ,
+.Xr puts 3 ,
+or
+.Xr write 2 .
+Instead, they are expected to use the output functions declared in
+.Pa html.h
+and implemented as part of the main HTML formatting engine in
+.Pa html.c .
+.Ss Data structures
+These structures are declared in
+.Pa html.h .
+.Bl -tag -width Ds
+.It Vt struct html
+Internal state of the HTML formatter.
+.It Vt struct htmlpair
+Holds one HTML attribute.
+Members are
+.Fa "enum htmlattr key"
+and
+.Fa "const char *val" .
+Helper macros
+.Fn PAIR_*
+are provided to support initialization of such structures.
+.It Vt struct tag
+One entry for the LIFO stack of HTML elements.
+Members are
+.Fa "enum htmltag tag"
+and
+.Fa "struct tag *next" .
+.El
+.Ss Private interface functions
+The function
+.Fn print_gen_decls
+prints the opening
+.Ao Pf \&? Ic xml ? Ac
+and
+.Aq Pf \&! Ic DOCTYPE
+declarations required for the current document type.
+.Pp
+The function
+.Fn print_gen_head
+prints the opening
+.Aq Ic META
+and
+.Aq Ic LINK
+elements for the document
+.Aq Ic HEAD ,
+using the
+.Fa style
+member of
+.Fa h
+unless that is
+.Dv NULL .
+It uses
+.Fn print_otag
+which takes care of properly encoding attributes,
+which is relevant for the
+.Fa style
+link in particular.
+.Pp
+The function
+.Fn print_otag
+prints the start tag of an HTML element with the name
+.Fa tag ,
+including the
+.Fa sz
+attributes that can optionally be provided in the
+.Fa p
+array.
+It uses the private function
+.Fn print_attr
+which in turn uses the private function
+.Fn print_encode
+to take care of HTML encoding.
+If required by the element type, it remembers in
+.Fa h
+that the element is open.
+The function
+.Fn print_tagq
+is used to close out all open elements up to and including
+.Fa until ;
+.Fn print_stagq
+is a variant to close out all open elements up to but excluding
+.Fa suntil .
+.Pp
+The function
+.Fn print_text
+prints HTML element content.
+It uses the private function
+.Fn print_encode
+to take care of HTML encoding.
+If the document has requested a non-standard font, for example using a
+.Xr roff 7
+.Ic \ef
+font escape sequence,
+.Fn print_text
+wraps
+.Fa word
+in an HTML font selection element using the
+.Fn print_otag
+and
+.Fn print_tagq
+functions.
+.Pp
+The functions
+.Fn bufinit ,
+.Fn bufcat* ,
+and
+.Fn buffmt*
+do not directly produce output but buffer text in the
+.Fa buf
+member of
+.Fa h .
+They are not used internally by
+.Pa html.c
+but intended for use by the language-specific formatters
+to ease preparation of strings for the
+.Fa p
+argument of
+.Fn print_otag
+and for the
+.Fa word
+argument of
+.Fn print_text .
+Consequently, these functions do not do any HTML encoding.
+.Pp
+The functions
+.Fn html_strlen ,
+.Fn print_eqn ,
+.Fn print_tbl ,
+and
+.Fn print_tblclose
+are not yet documented.
+.Sh FILES
+.Bl -tag -width mandoc_aux.c -compact
+.It Pa main.h
+declarations of public functions for use by the main program,
+not yet documented
+.It Pa html.h
+declarations of data types and private functions
+for use by language-specific HTML formatters
+.It Pa html.c
+main HTML formatting engine and utility functions
+.It Pa mdoc_html.c
+.Xr mdoc 7
+HTML formatter
+.It Pa man_html.c
+.Xr man 7
+HTML formatter
+.It Pa tbl_html.c
+.Xr tbl 7
+HTML formatter
+.It Pa eqn_html.c
+.Xr eqn 7
+HTML formatter
+.It Pa out.h
+declarations of data types and private functions
+for shared use by all mandoc formatters,
+not yet documented
+.It Pa out.c
+private functions for shared use by all mandoc formatters
+.It Pa mandoc_aux.h
+declarations of common mandoc utility functions, see
+.Xr mandoc 3
+.It Pa mandoc_aux.c
+implementation of common mandoc utility functions
+.El
+.Sh SEE ALSO
+.Xr mandoc 1 ,
+.Xr mandoc 3 ,
+.Xr man.cgi 8
+.Sh AUTHORS
+.An -nosplit
+The mandoc HTML formatter was written by
+.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv .
+This manual was written by
+.An Ingo Schwarze Aq Mt schwarze@openbsd.org .
diff --git a/mandoc_malloc.3 b/mandoc_malloc.3
new file mode 100644
index 000000000000..c16798424af6
--- /dev/null
+++ b/mandoc_malloc.3
@@ -0,0 +1,197 @@
+.\" $Id: mandoc_malloc.3,v 1.1 2014/08/05 05:48:56 schwarze Exp $
+.\"
+.\" Copyright (c) 2014 Ingo Schwarze <schwarze@openbsd.org>
+.\"
+.\" Permission to use, copy, modify, and distribute this software for any
+.\" purpose with or without fee is hereby granted, provided that the above
+.\" copyright notice and this permission notice appear in all copies.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+.\"
+.Dd $Mdocdate: August 5 2014 $
+.Dt MANDOC_MALLOC 3
+.Os
+.Sh NAME
+.Nm mandoc_malloc ,
+.Nm mandoc_realloc ,
+.Nm mandoc_reallocarray ,
+.Nm mandoc_calloc ,
+.Nm mandoc_strdup ,
+.Nm mandoc_strndup ,
+.Nm mandoc_asprintf
+.Nd memory allocation function wrappers used in the mandoc library
+.Sh LIBRARY
+.Lb libmandoc
+.Sh SYNOPSIS
+.In sys/types.h
+.In mandoc_aux.h
+.Ft "void *"
+.Fo mandoc_malloc
+.Fa "size_t size"
+.Fc
+.Ft "void *"
+.Fo mandoc_realloc
+.Fa "void *ptr"
+.Fa "size_t size"
+.Fc
+.Ft "void *"
+.Fo mandoc_reallocarray
+.Fa "void *ptr"
+.Fa "size_t nmemb"
+.Fa "size_t size"
+.Fc
+.Ft "void *"
+.Fo mandoc_calloc
+.Fa "size_t nmemb"
+.Fa "size_t size"
+.Fc
+.Ft "char *"
+.Fo mandoc_strdup
+.Fa "const char *s"
+.Fc
+.Ft "char *"
+.Fo mandoc_strndup
+.Fa "const char *s"
+.Fa "size_t maxlen"
+.Fc
+.Ft int
+.Fo mandoc_asprintf
+.Fa "char **ret"
+.Fa "const char *format"
+.Fa "..."
+.Fc
+.Sh DESCRIPTION
+These functions call the
+.Lb libc
+functions of the same names, passing through their return values when
+successful.
+In case of failure, they do not return, but instead call
+.Xr perror 3
+and
+.Xr exit 3 .
+They can be used both internally by any code in the
+.Lb libmandoc
+and externally by programs using that library, for example
+.Xr mandoc 1 ,
+.Xr apropos 1 ,
+and
+.Xr makewhatis 8 .
+.Pp
+The function
+.Fn mandoc_malloc
+allocates one new object, leaving the memory uninitialized.
+The functions
+.Fn mandoc_realloc
+and
+.Fn mandoc_reallocarray
+change the size of an existing object or array, possibly moving it.
+When shrinking the size, existing data is truncated; when growing,
+the additional memory is not initialized.
+The function
+.Fn mandoc_calloc
+allocates a new array, initializing it to zero.
+.Pp
+The argument
+.Fa size
+is the size of each object.
+The argument
+.Fa nmemb
+is the new number of objects in the array.
+The argument
+.Fa ptr
+is a pointer to the existing object or array to be resized; if it is
+.Dv NULL ,
+a new object or array is allocated.
+.Pp
+The functions
+.Fn mandoc_strdup
+and
+.Fn mandoc_strndup
+copy a string into newly allocated memory.
+For
+.Fn mandoc_strdup ,
+the string pointed to by
+.Fa s
+needs to be NUL-terminated.
+For
+.Fn mandoc_strndup ,
+at most
+.Fa maxlen
+bytes are copied.
+The function
+.Fn mandoc_asprintf
+writes output formatted according to
+.Fa format
+into newly allocated memory and returns a pointer to the result in
+.Fa ret .
+For all three string functions, the result is always NUL-terminated.
+.Pp
+When the objects and strings are no longer needed,
+the pointers returned by these functions can be passed to
+.Xr free 3 .
+.Sh RETURN VALUES
+The function
+.Fn mandoc_asprintf
+always returns the number of characters written, excluding the
+final NUL byte.
+It never returns -1.
+.Pp
+The other functions always return a valid pointer; they never return
+.Dv NULL .
+.Sh FILES
+These functions are implemented in
+.Pa mandoc_aux.c .
+.Sh SEE ALSO
+.Xr asprintf 3 ,
+.Xr exit 3 ,
+.Xr malloc 3 ,
+.Xr perror 3 ,
+.Xr strdup 3
+.Sh STANDARDS
+The functions
+.Fn malloc ,
+.Fn realloc ,
+and
+.Fn calloc
+are required by
+.St -ansiC .
+The functions
+.Fn strdup
+and
+.Fn strndup
+are required by
+.St -p1003.1-2008 .
+The function
+.Fn asprintf
+is a widespread extension that first appeared in the GNU C library.
+.Pp
+The function
+.Fn reallocarray
+is an extension that first appeared in
+.Ox 5.6 .
+If it is not provided by the operating system, the mandoc build system
+uses a bundled portable implementation.
+.Sh HISTORY
+The functions
+.Fn mandoc_malloc ,
+.Fn mandoc_realloc ,
+.Fn mandoc_calloc ,
+and
+.Fn mandoc_strdup
+have been available since mandoc 1.9.12,
+.Fn mandoc_strndup
+since 1.11.5,
+and
+.Fn mandoc_asprintf
+and
+.Fn mandoc_reallocarray
+since 1.12.4 and 1.13.0.
+.Sh AUTHORS
+.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv
+.An Ingo Schwarze Aq Mt schwarze@openbsd.org
diff --git a/mandocdb.8 b/mandocdb.8
deleted file mode 100644
index 83b6a0102e2a..000000000000
--- a/mandocdb.8
+++ /dev/null
@@ -1,324 +0,0 @@
-.\" $Id: mandocdb.8,v 1.17.2.1 2013/09/18 01:04:07 schwarze Exp $
-.\"
-.\" Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
-.\"
-.\" Permission to use, copy, modify, and distribute this software for any
-.\" purpose with or without fee is hereby granted, provided that the above
-.\" copyright notice and this permission notice appear in all copies.
-.\"
-.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-.\"
-.Dd $Mdocdate: September 18 2013 $
-.Dt MANDOCDB 8
-.Os
-.Sh NAME
-.Nm mandocdb
-.Nd index UNIX manuals
-.Sh SYNOPSIS
-.Nm
-.Op Fl avW
-.Op Fl C Ar file
-.Nm
-.Op Fl avW
-.Ar dir ...
-.Nm
-.Op Fl vW
-.Fl d Ar dir
-.Op Ar
-.Nm
-.Op Fl vW
-.Fl u Ar dir
-.Op Ar
-.Nm
-.Fl t Ar
-.Sh DESCRIPTION
-The
-.Nm
-utility extracts keywords from
-.Ux
-manuals and indexes them in a
-.Sx Keyword Database
-and
-.Sx Index Database
-for fast retrieval by
-.Xr apropos 1 ,
-.Xr whatis 1 ,
-and
-.Xr man 1 Ns 's
-.Fl k
-option.
-.Pp
-By default,
-.Nm
-creates databases in each
-.Ar dir
-using the files
-.Sm off
-.Sy man Ar section Li /
-.Op Ar arch Li /
-.Ar title . section
-.Sm on
-and
-.Sm off
-.Sy cat Ar section Li /
-.Op Ar arch Li /
-.Ar title . Sy 0
-.Sm on
-in that directory;
-existing databases are truncated.
-If
-.Ar dir
-is not provided,
-.Nm
-uses the default paths stipulated by
-.Xr man 1 .
-.Pp
-The arguments are as follows:
-.Bl -tag -width "-C file"
-.It Fl a
-Use all directories and files found below
-.Ar dir ... .
-.It Fl C Ar file
-Specify an alternative configuration
-.Ar file
-in
-.Xr man.conf 5
-format.
-.It Fl d Ar dir
-Merge (remove and re-add)
-.Ar
-to the database in
-.Ar dir
-without truncating it.
-.It Fl t Ar
-Check the given
-.Ar files
-for potential problems.
-No databases are modified.
-Implies
-.Fl a
-and
-.Fl W .
-All diagnostic messages are printed to the standard output;
-the standard error output is not used.
-.It Fl u Ar dir
-Remove
-.Ar
-from the database in
-.Ar dir
-without truncating it.
-.It Fl v
-Display all files added or removed to the index.
-.It Fl W
-Print warnings about potential problems with manual pages
-to the standard error output.
-.El
-.Pp
-If fatal parse errors are encountered while parsing, the offending file
-is printed to stderr, omitted from the index, and the parse continues
-with the next input file.
-.Ss Index Database
-The index database,
-.Pa mandoc.index ,
-is a
-.Xr recno 3
-database with record values consisting of
-.Pp
-.Bl -enum -compact
-.It
-the character
-.Cm d ,
-.Cm a ,
-or
-.Cm c
-to indicate the file type
-.Po
-.Xr mdoc 7 ,
-.Xr man 7 ,
-and post-formatted, respectively
-.Pc ,
-.It
-the filename relative to the databases' path,
-.It
-the manual section,
-.It
-the manual title,
-.It
-the architecture
-.Pq often empty ,
-.It
-and the description.
-.El
-.Pp
-Each of the above is NUL-terminated.
-.Pp
-If the record value is zero-length, it is unassigned.
-.Ss Keyword Database
-The keyword database,
-.Pa mandoc.db ,
-is a
-.Xr btree 3
-database of NUL-terminated keywords (record length is non-zero string
-length plus one) mapping to a 16-byte binary field consisting of the
-64-bit keyword type and the 64-bit
-.Sx Index Database
-record number, both in network-byte order.
-.Pp
-The type bit-mask consists of the following
-values mapping into
-.Xr mdoc 7
-macro identifiers:
-.Pp
-.Bl -column "x0x0000000000000001ULLx" "xLix" -offset indent -compact
-.It Li 0x0000000000000001ULL Ta \&An
-.It Li 0x0000000000000002ULL Ta \&Ar
-.It Li 0x0000000000000004ULL Ta \&At
-.It Li 0x0000000000000008ULL Ta \&Bsx
-.It Li 0x0000000000000010ULL Ta \&Bx
-.It Li 0x0000000000000020ULL Ta \&Cd
-.It Li 0x0000000000000040ULL Ta \&Cm
-.It Li 0x0000000000000080ULL Ta \&Dv
-.It Li 0x0000000000000100ULL Ta \&Dx
-.It Li 0x0000000000000200ULL Ta \&Em
-.It Li 0x0000000000000400ULL Ta \&Er
-.It Li 0x0000000000000800ULL Ta \&Ev
-.It Li 0x0000000000001000ULL Ta \&Fa
-.It Li 0x0000000000002000ULL Ta \&Fl
-.It Li 0x0000000000004000ULL Ta \&Fn
-.It Li 0x0000000000008000ULL Ta \&Ft
-.It Li 0x0000000000010000ULL Ta \&Fx
-.It Li 0x0000000000020000ULL Ta \&Ic
-.It Li 0x0000000000040000ULL Ta \&In
-.It Li 0x0000000000080000ULL Ta \&Lb
-.It Li 0x0000000000100000ULL Ta \&Li
-.It Li 0x0000000000200000ULL Ta \&Lk
-.It Li 0x0000000000400000ULL Ta \&Ms
-.It Li 0x0000000000800000ULL Ta \&Mt
-.It Li 0x0000000001000000ULL Ta \&Nd
-.It Li 0x0000000002000000ULL Ta \&Nm
-.It Li 0x0000000004000000ULL Ta \&Nx
-.It Li 0x0000000008000000ULL Ta \&Ox
-.It Li 0x0000000010000000ULL Ta \&Pa
-.It Li 0x0000000020000000ULL Ta \&Rs
-.It Li 0x0000000040000000ULL Ta \&Sh
-.It Li 0x0000000080000000ULL Ta \&Ss
-.It Li 0x0000000100000000ULL Ta \&St
-.It Li 0x0000000200000000ULL Ta \&Sy
-.It Li 0x0000000400000000ULL Ta \&Tn
-.It Li 0x0000000800000000ULL Ta \&Va
-.It Li 0x0000001000000000ULL Ta \&Vt
-.It Li 0x0000002000000000ULL Ta \&Xr
-.El
-.Sh IMPLEMENTATION NOTES
-The time to construct a new database pair grows linearly with the
-number of keywords in the input files.
-However, removing or updating entries with
-.Fl u
-or
-.Fl d ,
-respectively, grows as a multiple of the index length and input size.
-.Sh FILES
-.Bl -tag -width Ds
-.It Pa mandoc.db
-A
-.Xr btree 3
-keyword database mapping keywords to a type and file reference in
-.Pa mandoc.index .
-.It Pa mandoc.index
-A
-.Xr recno 3
-database of indexed file-names.
-.It Pa /etc/man.conf
-The default
-.Xr man 1
-configuration file.
-.El
-.Sh EXIT STATUS
-The
-.Nm
-utility exits with one of the following values:
-.Pp
-.Bl -tag -width Ds -compact
-.It 0
-No errors occurred.
-.It 5
-Invalid command line arguments were specified.
-No input files have been read.
-.It 6
-An operating system error occurred, for example memory exhaustion or an
-error accessing input files.
-Such errors cause
-.Nm
-to exit at once, possibly in the middle of parsing or formatting a file.
-The output databases are corrupt and should be removed.
-.El
-.Sh DIAGNOSTICS
-If the following errors occur, the
-.Nm
-databases should be rebuilt.
-.Bl -diag
-.It "%s: Corrupt database"
-The keyword database file indicated by
-.Pa %s
-is unreadable.
-.It "%s: Corrupt index"
-The index database file indicated by
-.Pa %s
-is unreadable.
-.It "%s: Path too long"
-The file
-.Pa %s
-is too long.
-This usually indicates database corruption or invalid command-line
-arguments.
-.El
-.Sh SEE ALSO
-.Xr apropos 1 ,
-.Xr man 1 ,
-.Xr whatis 1 ,
-.Xr btree 3 ,
-.Xr recno 3 ,
-.Xr man.conf 5
-.Sh HISTORY
-A
-.Nm makewhatis
-utility first appeared in
-.Bx 2 .
-It was rewritten in
-.Xr perl 1
-for
-.Ox 2.7
-and in C for
-.Ox 5.1 .
-.Pp
-The
-.Ar dir
-argument first appeared in
-.Nx 1.0 ;
-the options
-.Fl dtu
-in
-.Ox 2.7 ;
-and the options
-.Fl aCvW
-in
-.Ox 5.1 .
-.Sh AUTHORS
-.An -nosplit
-.An Bill Joy
-wrote the original
-.Bx
-.Nm makewhatis
-in February 1979,
-.An Marc Espie
-started the Perl version in 2000,
-and the current version of
-.Nm
-was written by
-.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv .
diff --git a/mandocdb.c b/mandocdb.c
index a7491cafa6ab..a604b468ff74 100644
--- a/mandocdb.c
+++ b/mandocdb.c
@@ -1,7 +1,7 @@
-/* $Id: mandocdb.c,v 1.49.2.10 2013/11/21 01:53:48 schwarze Exp $ */
+/* $Id: mandocdb.c,v 1.155 2014/08/06 15:09:05 schwarze Exp $ */
/*
* Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2011, 2012 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2011, 2012, 2013, 2014 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -19,299 +19,328 @@
#include "config.h"
#endif
-#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
#include <assert.h>
#include <ctype.h>
-#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
+#include <fts.h>
#include <getopt.h>
#include <limits.h>
+#include <stddef.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
-#if defined(__APPLE__)
-# include <libkern/OSByteOrder.h>
-#elif defined(__linux__)
-# include <endian.h>
-#elif defined(__sun)
-# include <sys/byteorder.h>
-# include <sys/stat.h>
+#ifdef HAVE_OHASH
+#include <ohash.h>
#else
-# include <sys/endian.h>
+#include "compat_ohash.h"
#endif
+#include <sqlite3.h>
-#if defined(__linux__) || defined(__sun)
-# include <db_185.h>
-#else
-# include <db.h>
-#endif
-
-#include "man.h"
#include "mdoc.h"
+#include "man.h"
#include "mandoc.h"
-#include "mandocdb.h"
+#include "mandoc_aux.h"
#include "manpath.h"
+#include "mansearch.h"
+
+extern int mansearch_keymax;
+extern const char *const mansearch_keynames[];
+
+#define SQL_EXEC(_v) \
+ if (SQLITE_OK != sqlite3_exec(db, (_v), NULL, NULL, NULL)) \
+ say("", "%s: %s", (_v), sqlite3_errmsg(db))
+#define SQL_BIND_TEXT(_s, _i, _v) \
+ if (SQLITE_OK != sqlite3_bind_text \
+ ((_s), (_i)++, (_v), -1, SQLITE_STATIC)) \
+ say(mlink->file, "%s", sqlite3_errmsg(db))
+#define SQL_BIND_INT(_s, _i, _v) \
+ if (SQLITE_OK != sqlite3_bind_int \
+ ((_s), (_i)++, (_v))) \
+ say(mlink->file, "%s", sqlite3_errmsg(db))
+#define SQL_BIND_INT64(_s, _i, _v) \
+ if (SQLITE_OK != sqlite3_bind_int64 \
+ ((_s), (_i)++, (_v))) \
+ say(mlink->file, "%s", sqlite3_errmsg(db))
+#define SQL_STEP(_s) \
+ if (SQLITE_DONE != sqlite3_step((_s))) \
+ say(mlink->file, "%s", sqlite3_errmsg(db))
-#define MANDOC_BUFSZ BUFSIZ
-#define MANDOC_SLOP 1024
-
-#define MANDOC_SRC 0x1
-#define MANDOC_FORM 0x2
-
-/* Access to the mandoc database on disk. */
-
-struct mdb {
- char idxn[PATH_MAX]; /* index db filename */
- char dbn[PATH_MAX]; /* keyword db filename */
- DB *idx; /* index recno database */
- DB *db; /* keyword btree database */
+enum op {
+ OP_DEFAULT = 0, /* new dbs from dir list or default config */
+ OP_CONFFILE, /* new databases from custom config file */
+ OP_UPDATE, /* delete/add entries in existing database */
+ OP_DELETE, /* delete entries from existing database */
+ OP_TEST /* change no databases, report potential problems */
};
-/* Stack of temporarily unused index records. */
-
-struct recs {
- recno_t *stack; /* pointer to a malloc'ed array */
- size_t size; /* number of allocated slots */
- size_t cur; /* current number of empty records */
- recno_t last; /* last record number in the index */
+enum form {
+ FORM_NONE, /* format is unknown */
+ FORM_SRC, /* format is -man or -mdoc */
+ FORM_CAT /* format is cat */
};
-/* Tiny list for files. No need to bring in QUEUE. */
-
-struct of {
- char *fname; /* heap-allocated */
- char *sec;
- char *arch;
- char *title;
- int src_form;
- struct of *next; /* NULL for last one */
- struct of *first; /* first in list */
+struct str {
+ char *rendered; /* key in UTF-8 or ASCII form */
+ const struct mpage *mpage; /* if set, the owning parse */
+ uint64_t mask; /* bitmask in sequence */
+ char key[]; /* may contain escape sequences */
};
-/* Buffer for storing growable data. */
+struct inodev {
+ ino_t st_ino;
+ dev_t st_dev;
+};
-struct buf {
- char *cp;
- size_t len; /* current length */
- size_t size; /* total buffer size */
+struct mpage {
+ struct inodev inodev; /* used for hashing routine */
+ int64_t pageid; /* pageid in mpages SQL table */
+ enum form form; /* format from file content */
+ char *sec; /* section from file content */
+ char *arch; /* architecture from file content */
+ char *title; /* title from file content */
+ char *desc; /* description from file content */
+ struct mlink *mlinks; /* singly linked list */
};
-/* Operation we're going to perform. */
+struct mlink {
+ char file[PATH_MAX]; /* filename rel. to manpath */
+ enum form dform; /* format from directory */
+ enum form fform; /* format from file name suffix */
+ char *dsec; /* section from directory */
+ char *arch; /* architecture from directory */
+ char *name; /* name from file name (not empty) */
+ char *fsec; /* section from file name suffix */
+ struct mlink *next; /* singly linked list */
+ struct mpage *mpage; /* parent */
+ int gzip; /* filename has a .gz suffix */
+};
-enum op {
- OP_DEFAULT = 0, /* new dbs from dir list or default config */
- OP_CONFFILE, /* new databases from custom config file */
- OP_UPDATE, /* delete/add entries in existing database */
- OP_DELETE, /* delete entries from existing database */
- OP_TEST /* change no databases, report potential problems */
+enum stmt {
+ STMT_DELETE_PAGE = 0, /* delete mpage */
+ STMT_INSERT_PAGE, /* insert mpage */
+ STMT_INSERT_LINK, /* insert mlink */
+ STMT_INSERT_NAME, /* insert name */
+ STMT_INSERT_KEY, /* insert parsed key */
+ STMT__MAX
};
-#define MAN_ARGS DB *hash, \
- struct buf *buf, \
- struct buf *dbuf, \
- const struct man_node *n
-#define MDOC_ARGS DB *hash, \
- struct buf *buf, \
- struct buf *dbuf, \
- const struct mdoc_node *n, \
- const struct mdoc_meta *m
-
-static void buf_appendmdoc(struct buf *,
- const struct mdoc_node *, int);
-static void buf_append(struct buf *, const char *);
-static void buf_appendb(struct buf *,
- const void *, size_t);
-static void dbt_put(DB *, const char *, DBT *, DBT *);
-static void hash_put(DB *, const struct buf *, uint64_t);
-static void hash_reset(DB **);
-static void index_merge(const struct of *, struct mparse *,
- struct buf *, struct buf *, DB *,
- struct mdb *, struct recs *);
-static void index_prune(const struct of *, struct mdb *,
- struct recs *);
-static void ofile_argbuild(int, char *[], struct of **,
- const char *);
-static void ofile_dirbuild(const char *, const char *,
- const char *, int, struct of **);
-static void ofile_free(struct of *);
-static void pformatted(DB *, struct buf *,
- struct buf *, const struct of *);
-static int pman_node(MAN_ARGS);
-static void pmdoc_node(MDOC_ARGS);
-static int pmdoc_head(MDOC_ARGS);
-static int pmdoc_body(MDOC_ARGS);
-static int pmdoc_Fd(MDOC_ARGS);
-static int pmdoc_In(MDOC_ARGS);
-static int pmdoc_Fn(MDOC_ARGS);
-static int pmdoc_Nd(MDOC_ARGS);
-static int pmdoc_Nm(MDOC_ARGS);
-static int pmdoc_Sh(MDOC_ARGS);
-static int pmdoc_St(MDOC_ARGS);
-static int pmdoc_Xr(MDOC_ARGS);
-
-#define MDOCF_CHILD 0x01 /* Automatically index child nodes. */
+typedef int (*mdoc_fp)(struct mpage *, const struct mdoc_node *);
struct mdoc_handler {
- int (*fp)(MDOC_ARGS); /* Optional handler. */
- uint64_t mask; /* Set unless handler returns 0. */
- int flags; /* For use by pmdoc_node. */
+ mdoc_fp fp; /* optional handler */
+ uint64_t mask; /* set unless handler returns 0 */
};
+static void dbclose(int);
+static void dbadd(struct mpage *, struct mchars *);
+static void dbadd_mlink(const struct mlink *mlink);
+static int dbopen(int);
+static void dbprune(void);
+static void filescan(const char *);
+static void *hash_alloc(size_t, void *);
+static void hash_free(void *, void *);
+static void *hash_calloc(size_t, size_t, void *);
+static void mlink_add(struct mlink *, const struct stat *);
+static void mlink_check(struct mpage *, struct mlink *);
+static void mlink_free(struct mlink *);
+static void mlinks_undupe(struct mpage *);
+static void mpages_free(void);
+static void mpages_merge(struct mchars *, struct mparse *);
+static void names_check(void);
+static void parse_cat(struct mpage *, int);
+static void parse_man(struct mpage *, const struct man_node *);
+static void parse_mdoc(struct mpage *, const struct mdoc_node *);
+static int parse_mdoc_body(struct mpage *, const struct mdoc_node *);
+static int parse_mdoc_head(struct mpage *, const struct mdoc_node *);
+static int parse_mdoc_Fd(struct mpage *, const struct mdoc_node *);
+static int parse_mdoc_Fn(struct mpage *, const struct mdoc_node *);
+static int parse_mdoc_Nd(struct mpage *, const struct mdoc_node *);
+static int parse_mdoc_Nm(struct mpage *, const struct mdoc_node *);
+static int parse_mdoc_Sh(struct mpage *, const struct mdoc_node *);
+static int parse_mdoc_Xr(struct mpage *, const struct mdoc_node *);
+static void putkey(const struct mpage *, char *, uint64_t);
+static void putkeys(const struct mpage *,
+ const char *, size_t, uint64_t);
+static void putmdockey(const struct mpage *,
+ const struct mdoc_node *, uint64_t);
+static void render_key(struct mchars *, struct str *);
+static void say(const char *, const char *, ...);
+static int set_basedir(const char *);
+static int treescan(void);
+static size_t utf8(unsigned int, char [7]);
+
+static char tempfilename[32];
+static char *progname;
+static int nodb; /* no database changes */
+static int mparse_options; /* abort the parse early */
+static int use_all; /* use all found files */
+static int debug; /* print what we're doing */
+static int warnings; /* warn about crap */
+static int write_utf8; /* write UTF-8 output; else ASCII */
+static int exitcode; /* to be returned by main */
+static enum op op; /* operational mode */
+static char basedir[PATH_MAX]; /* current base directory */
+static struct ohash mpages; /* table of distinct manual pages */
+static struct ohash mlinks; /* table of directory entries */
+static struct ohash names; /* table of all names */
+static struct ohash strings; /* table of all strings */
+static sqlite3 *db = NULL; /* current database */
+static sqlite3_stmt *stmts[STMT__MAX]; /* current statements */
+static uint64_t name_mask;
+
static const struct mdoc_handler mdocs[MDOC_MAX] = {
- { NULL, 0, 0 }, /* Ap */
- { NULL, 0, 0 }, /* Dd */
- { NULL, 0, 0 }, /* Dt */
- { NULL, 0, 0 }, /* Os */
- { pmdoc_Sh, TYPE_Sh, MDOCF_CHILD }, /* Sh */
- { pmdoc_head, TYPE_Ss, MDOCF_CHILD }, /* Ss */
- { NULL, 0, 0 }, /* Pp */
- { NULL, 0, 0 }, /* D1 */
- { NULL, 0, 0 }, /* Dl */
- { NULL, 0, 0 }, /* Bd */
- { NULL, 0, 0 }, /* Ed */
- { NULL, 0, 0 }, /* Bl */
- { NULL, 0, 0 }, /* El */
- { NULL, 0, 0 }, /* It */
- { NULL, 0, 0 }, /* Ad */
- { NULL, TYPE_An, MDOCF_CHILD }, /* An */
- { NULL, TYPE_Ar, MDOCF_CHILD }, /* Ar */
- { NULL, TYPE_Cd, MDOCF_CHILD }, /* Cd */
- { NULL, TYPE_Cm, MDOCF_CHILD }, /* Cm */
- { NULL, TYPE_Dv, MDOCF_CHILD }, /* Dv */
- { NULL, TYPE_Er, MDOCF_CHILD }, /* Er */
- { NULL, TYPE_Ev, MDOCF_CHILD }, /* Ev */
- { NULL, 0, 0 }, /* Ex */
- { NULL, TYPE_Fa, MDOCF_CHILD }, /* Fa */
- { pmdoc_Fd, TYPE_In, 0 }, /* Fd */
- { NULL, TYPE_Fl, MDOCF_CHILD }, /* Fl */
- { pmdoc_Fn, 0, 0 }, /* Fn */
- { NULL, TYPE_Ft, MDOCF_CHILD }, /* Ft */
- { NULL, TYPE_Ic, MDOCF_CHILD }, /* Ic */
- { pmdoc_In, TYPE_In, 0 }, /* In */
- { NULL, TYPE_Li, MDOCF_CHILD }, /* Li */
- { pmdoc_Nd, TYPE_Nd, MDOCF_CHILD }, /* Nd */
- { pmdoc_Nm, TYPE_Nm, MDOCF_CHILD }, /* Nm */
- { NULL, 0, 0 }, /* Op */
- { NULL, 0, 0 }, /* Ot */
- { NULL, TYPE_Pa, MDOCF_CHILD }, /* Pa */
- { NULL, 0, 0 }, /* Rv */
- { pmdoc_St, TYPE_St, 0 }, /* St */
- { NULL, TYPE_Va, MDOCF_CHILD }, /* Va */
- { pmdoc_body, TYPE_Va, MDOCF_CHILD }, /* Vt */
- { pmdoc_Xr, TYPE_Xr, 0 }, /* Xr */
- { NULL, 0, 0 }, /* %A */
- { NULL, 0, 0 }, /* %B */
- { NULL, 0, 0 }, /* %D */
- { NULL, 0, 0 }, /* %I */
- { NULL, 0, 0 }, /* %J */
- { NULL, 0, 0 }, /* %N */
- { NULL, 0, 0 }, /* %O */
- { NULL, 0, 0 }, /* %P */
- { NULL, 0, 0 }, /* %R */
- { NULL, 0, 0 }, /* %T */
- { NULL, 0, 0 }, /* %V */
- { NULL, 0, 0 }, /* Ac */
- { NULL, 0, 0 }, /* Ao */
- { NULL, 0, 0 }, /* Aq */
- { NULL, TYPE_At, MDOCF_CHILD }, /* At */
- { NULL, 0, 0 }, /* Bc */
- { NULL, 0, 0 }, /* Bf */
- { NULL, 0, 0 }, /* Bo */
- { NULL, 0, 0 }, /* Bq */
- { NULL, TYPE_Bsx, MDOCF_CHILD }, /* Bsx */
- { NULL, TYPE_Bx, MDOCF_CHILD }, /* Bx */
- { NULL, 0, 0 }, /* Db */
- { NULL, 0, 0 }, /* Dc */
- { NULL, 0, 0 }, /* Do */
- { NULL, 0, 0 }, /* Dq */
- { NULL, 0, 0 }, /* Ec */
- { NULL, 0, 0 }, /* Ef */
- { NULL, TYPE_Em, MDOCF_CHILD }, /* Em */
- { NULL, 0, 0 }, /* Eo */
- { NULL, TYPE_Fx, MDOCF_CHILD }, /* Fx */
- { NULL, TYPE_Ms, MDOCF_CHILD }, /* Ms */
- { NULL, 0, 0 }, /* No */
- { NULL, 0, 0 }, /* Ns */
- { NULL, TYPE_Nx, MDOCF_CHILD }, /* Nx */
- { NULL, TYPE_Ox, MDOCF_CHILD }, /* Ox */
- { NULL, 0, 0 }, /* Pc */
- { NULL, 0, 0 }, /* Pf */
- { NULL, 0, 0 }, /* Po */
- { NULL, 0, 0 }, /* Pq */
- { NULL, 0, 0 }, /* Qc */
- { NULL, 0, 0 }, /* Ql */
- { NULL, 0, 0 }, /* Qo */
- { NULL, 0, 0 }, /* Qq */
- { NULL, 0, 0 }, /* Re */
- { NULL, 0, 0 }, /* Rs */
- { NULL, 0, 0 }, /* Sc */
- { NULL, 0, 0 }, /* So */
- { NULL, 0, 0 }, /* Sq */
- { NULL, 0, 0 }, /* Sm */
- { NULL, 0, 0 }, /* Sx */
- { NULL, TYPE_Sy, MDOCF_CHILD }, /* Sy */
- { NULL, TYPE_Tn, MDOCF_CHILD }, /* Tn */
- { NULL, 0, 0 }, /* Ux */
- { NULL, 0, 0 }, /* Xc */
- { NULL, 0, 0 }, /* Xo */
- { pmdoc_head, TYPE_Fn, 0 }, /* Fo */
- { NULL, 0, 0 }, /* Fc */
- { NULL, 0, 0 }, /* Oo */
- { NULL, 0, 0 }, /* Oc */
- { NULL, 0, 0 }, /* Bk */
- { NULL, 0, 0 }, /* Ek */
- { NULL, 0, 0 }, /* Bt */
- { NULL, 0, 0 }, /* Hf */
- { NULL, 0, 0 }, /* Fr */
- { NULL, 0, 0 }, /* Ud */
- { NULL, TYPE_Lb, MDOCF_CHILD }, /* Lb */
- { NULL, 0, 0 }, /* Lp */
- { NULL, TYPE_Lk, MDOCF_CHILD }, /* Lk */
- { NULL, TYPE_Mt, MDOCF_CHILD }, /* Mt */
- { NULL, 0, 0 }, /* Brq */
- { NULL, 0, 0 }, /* Bro */
- { NULL, 0, 0 }, /* Brc */
- { NULL, 0, 0 }, /* %C */
- { NULL, 0, 0 }, /* Es */
- { NULL, 0, 0 }, /* En */
- { NULL, TYPE_Dx, MDOCF_CHILD }, /* Dx */
- { NULL, 0, 0 }, /* %Q */
- { NULL, 0, 0 }, /* br */
- { NULL, 0, 0 }, /* sp */
- { NULL, 0, 0 }, /* %U */
- { NULL, 0, 0 }, /* Ta */
+ { NULL, 0 }, /* Ap */
+ { NULL, 0 }, /* Dd */
+ { NULL, 0 }, /* Dt */
+ { NULL, 0 }, /* Os */
+ { parse_mdoc_Sh, TYPE_Sh }, /* Sh */
+ { parse_mdoc_head, TYPE_Ss }, /* Ss */
+ { NULL, 0 }, /* Pp */
+ { NULL, 0 }, /* D1 */
+ { NULL, 0 }, /* Dl */
+ { NULL, 0 }, /* Bd */
+ { NULL, 0 }, /* Ed */
+ { NULL, 0 }, /* Bl */
+ { NULL, 0 }, /* El */
+ { NULL, 0 }, /* It */
+ { NULL, 0 }, /* Ad */
+ { NULL, TYPE_An }, /* An */
+ { NULL, TYPE_Ar }, /* Ar */
+ { NULL, TYPE_Cd }, /* Cd */
+ { NULL, TYPE_Cm }, /* Cm */
+ { NULL, TYPE_Dv }, /* Dv */
+ { NULL, TYPE_Er }, /* Er */
+ { NULL, TYPE_Ev }, /* Ev */
+ { NULL, 0 }, /* Ex */
+ { NULL, TYPE_Fa }, /* Fa */
+ { parse_mdoc_Fd, 0 }, /* Fd */
+ { NULL, TYPE_Fl }, /* Fl */
+ { parse_mdoc_Fn, 0 }, /* Fn */
+ { NULL, TYPE_Ft }, /* Ft */
+ { NULL, TYPE_Ic }, /* Ic */
+ { NULL, TYPE_In }, /* In */
+ { NULL, TYPE_Li }, /* Li */
+ { parse_mdoc_Nd, 0 }, /* Nd */
+ { parse_mdoc_Nm, 0 }, /* Nm */
+ { NULL, 0 }, /* Op */
+ { NULL, 0 }, /* Ot */
+ { NULL, TYPE_Pa }, /* Pa */
+ { NULL, 0 }, /* Rv */
+ { NULL, TYPE_St }, /* St */
+ { NULL, TYPE_Va }, /* Va */
+ { parse_mdoc_body, TYPE_Va }, /* Vt */
+ { parse_mdoc_Xr, 0 }, /* Xr */
+ { NULL, 0 }, /* %A */
+ { NULL, 0 }, /* %B */
+ { NULL, 0 }, /* %D */
+ { NULL, 0 }, /* %I */
+ { NULL, 0 }, /* %J */
+ { NULL, 0 }, /* %N */
+ { NULL, 0 }, /* %O */
+ { NULL, 0 }, /* %P */
+ { NULL, 0 }, /* %R */
+ { NULL, 0 }, /* %T */
+ { NULL, 0 }, /* %V */
+ { NULL, 0 }, /* Ac */
+ { NULL, 0 }, /* Ao */
+ { NULL, 0 }, /* Aq */
+ { NULL, TYPE_At }, /* At */
+ { NULL, 0 }, /* Bc */
+ { NULL, 0 }, /* Bf */
+ { NULL, 0 }, /* Bo */
+ { NULL, 0 }, /* Bq */
+ { NULL, TYPE_Bsx }, /* Bsx */
+ { NULL, TYPE_Bx }, /* Bx */
+ { NULL, 0 }, /* Db */
+ { NULL, 0 }, /* Dc */
+ { NULL, 0 }, /* Do */
+ { NULL, 0 }, /* Dq */
+ { NULL, 0 }, /* Ec */
+ { NULL, 0 }, /* Ef */
+ { NULL, TYPE_Em }, /* Em */
+ { NULL, 0 }, /* Eo */
+ { NULL, TYPE_Fx }, /* Fx */
+ { NULL, TYPE_Ms }, /* Ms */
+ { NULL, 0 }, /* No */
+ { NULL, 0 }, /* Ns */
+ { NULL, TYPE_Nx }, /* Nx */
+ { NULL, TYPE_Ox }, /* Ox */
+ { NULL, 0 }, /* Pc */
+ { NULL, 0 }, /* Pf */
+ { NULL, 0 }, /* Po */
+ { NULL, 0 }, /* Pq */
+ { NULL, 0 }, /* Qc */
+ { NULL, 0 }, /* Ql */
+ { NULL, 0 }, /* Qo */
+ { NULL, 0 }, /* Qq */
+ { NULL, 0 }, /* Re */
+ { NULL, 0 }, /* Rs */
+ { NULL, 0 }, /* Sc */
+ { NULL, 0 }, /* So */
+ { NULL, 0 }, /* Sq */
+ { NULL, 0 }, /* Sm */
+ { NULL, 0 }, /* Sx */
+ { NULL, TYPE_Sy }, /* Sy */
+ { NULL, TYPE_Tn }, /* Tn */
+ { NULL, 0 }, /* Ux */
+ { NULL, 0 }, /* Xc */
+ { NULL, 0 }, /* Xo */
+ { parse_mdoc_head, 0 }, /* Fo */
+ { NULL, 0 }, /* Fc */
+ { NULL, 0 }, /* Oo */
+ { NULL, 0 }, /* Oc */
+ { NULL, 0 }, /* Bk */
+ { NULL, 0 }, /* Ek */
+ { NULL, 0 }, /* Bt */
+ { NULL, 0 }, /* Hf */
+ { NULL, 0 }, /* Fr */
+ { NULL, 0 }, /* Ud */
+ { NULL, TYPE_Lb }, /* Lb */
+ { NULL, 0 }, /* Lp */
+ { NULL, TYPE_Lk }, /* Lk */
+ { NULL, TYPE_Mt }, /* Mt */
+ { NULL, 0 }, /* Brq */
+ { NULL, 0 }, /* Bro */
+ { NULL, 0 }, /* Brc */
+ { NULL, 0 }, /* %C */
+ { NULL, 0 }, /* Es */
+ { NULL, 0 }, /* En */
+ { NULL, TYPE_Dx }, /* Dx */
+ { NULL, 0 }, /* %Q */
+ { NULL, 0 }, /* br */
+ { NULL, 0 }, /* sp */
+ { NULL, 0 }, /* %U */
+ { NULL, 0 }, /* Ta */
};
-static const char *progname;
-static int use_all; /* Use all directories and files. */
-static int verb; /* Output verbosity level. */
-static int warnings; /* Potential problems in manuals. */
int
main(int argc, char *argv[])
{
- struct mparse *mp; /* parse sequence */
- struct manpaths dirs;
- struct mdb mdb;
- struct recs recs;
- enum op op; /* current operation */
- const char *dir;
- char *cp;
- char pbuf[PATH_MAX];
- int ch, i, flags;
- DB *hash; /* temporary keyword hashtable */
- BTREEINFO info; /* btree configuration */
- size_t sz1, sz2, ipath;
- struct buf buf, /* keyword buffer */
- dbuf; /* description buffer */
- struct of *of; /* list of files for processing */
- extern int optind;
- extern char *optarg;
+ int ch, i;
+ size_t j, sz;
+ const char *path_arg;
+ struct mchars *mc;
+ struct manpaths dirs;
+ struct mparse *mp;
+ struct ohash_info mpages_info, mlinks_info;
+
+ memset(stmts, 0, STMT__MAX * sizeof(sqlite3_stmt *));
+ memset(&dirs, 0, sizeof(struct manpaths));
+
+ mpages_info.alloc = mlinks_info.alloc = hash_alloc;
+ mpages_info.calloc = mlinks_info.calloc = hash_calloc;
+ mpages_info.free = mlinks_info.free = hash_free;
+
+ mpages_info.key_offset = offsetof(struct mpage, inodev);
+ mlinks_info.key_offset = offsetof(struct mlink, file);
progname = strrchr(argv[0], '/');
if (progname == NULL)
@@ -319,64 +348,70 @@ main(int argc, char *argv[])
else
++progname;
- memset(&dirs, 0, sizeof(struct manpaths));
- memset(&mdb, 0, sizeof(struct mdb));
- memset(&recs, 0, sizeof(struct recs));
-
- of = NULL;
- mp = NULL;
- hash = NULL;
+ /*
+ * We accept a few different invocations.
+ * The CHECKOP macro makes sure that invocation styles don't
+ * clobber each other.
+ */
+#define CHECKOP(_op, _ch) do \
+ if (OP_DEFAULT != (_op)) { \
+ fprintf(stderr, "%s: -%c: Conflicting option\n", \
+ progname, (_ch)); \
+ goto usage; \
+ } while (/*CONSTCOND*/0)
+
+ path_arg = NULL;
op = OP_DEFAULT;
- dir = NULL;
- while (-1 != (ch = getopt(argc, argv, "aC:d:tu:vW")))
+ while (-1 != (ch = getopt(argc, argv, "aC:Dd:npQT:tu:v")))
switch (ch) {
- case ('a'):
+ case 'a':
use_all = 1;
break;
- case ('C'):
- if (op) {
- fprintf(stderr,
- "-C: conflicting options\n");
- goto usage;
- }
- dir = optarg;
+ case 'C':
+ CHECKOP(op, ch);
+ path_arg = optarg;
op = OP_CONFFILE;
break;
- case ('d'):
- if (op) {
- fprintf(stderr,
- "-d: conflicting options\n");
- goto usage;
- }
- dir = optarg;
+ case 'D':
+ debug++;
+ break;
+ case 'd':
+ CHECKOP(op, ch);
+ path_arg = optarg;
op = OP_UPDATE;
break;
- case ('t'):
- dup2(STDOUT_FILENO, STDERR_FILENO);
- if (op) {
- fprintf(stderr,
- "-t: conflicting options\n");
- goto usage;
- }
- op = OP_TEST;
- use_all = 1;
+ case 'n':
+ nodb = 1;
+ break;
+ case 'p':
warnings = 1;
break;
- case ('u'):
- if (op) {
- fprintf(stderr,
- "-u: conflicting options\n");
+ case 'Q':
+ mparse_options |= MPARSE_QUICK;
+ break;
+ case 'T':
+ if (strcmp(optarg, "utf8")) {
+ fprintf(stderr, "%s: -T%s: "
+ "Unsupported output format\n",
+ progname, optarg);
goto usage;
}
- dir = optarg;
- op = OP_DELETE;
+ write_utf8 = 1;
break;
- case ('v'):
- verb++;
+ case 't':
+ CHECKOP(op, ch);
+ dup2(STDOUT_FILENO, STDERR_FILENO);
+ op = OP_TEST;
+ nodb = warnings = 1;
break;
- case ('W'):
- warnings = 1;
+ case 'u':
+ CHECKOP(op, ch);
+ path_arg = optarg;
+ op = OP_DELETE;
+ break;
+ case 'v':
+ /* Compatibility with espie@'s makewhatis. */
break;
default:
goto usage;
@@ -386,729 +421,1208 @@ main(int argc, char *argv[])
argv += optind;
if (OP_CONFFILE == op && argc > 0) {
- fprintf(stderr, "-C: too many arguments\n");
+ fprintf(stderr, "%s: -C: Too many arguments\n",
+ progname);
goto usage;
}
- memset(&info, 0, sizeof(BTREEINFO));
- info.lorder = 4321;
- info.flags = R_DUP;
+ exitcode = (int)MANDOCLEVEL_OK;
+ mp = mparse_alloc(mparse_options, MANDOCLEVEL_FATAL, NULL, NULL);
+ mc = mchars_alloc();
- mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL, NULL);
+ ohash_init(&mpages, 6, &mpages_info);
+ ohash_init(&mlinks, 6, &mlinks_info);
- memset(&buf, 0, sizeof(struct buf));
- memset(&dbuf, 0, sizeof(struct buf));
+ if (OP_UPDATE == op || OP_DELETE == op || OP_TEST == op) {
- buf.size = dbuf.size = MANDOC_BUFSZ;
-
- buf.cp = mandoc_malloc(buf.size);
- dbuf.cp = mandoc_malloc(dbuf.size);
-
- if (OP_TEST == op) {
- ofile_argbuild(argc, argv, &of, NULL);
- if (NULL == of)
+ /*
+ * Most of these deal with a specific directory.
+ * Jump into that directory first.
+ */
+ if (OP_TEST != op && 0 == set_basedir(path_arg))
goto out;
- index_merge(of, mp, &dbuf, &buf, hash, &mdb, &recs);
- goto out;
- }
-
- if (OP_UPDATE == op || OP_DELETE == op) {
- if (NULL == realpath(dir, pbuf)) {
- perror(dir);
- exit((int)MANDOCLEVEL_BADARG);
- }
- if (strlcat(pbuf, "/", PATH_MAX) >= PATH_MAX) {
- fprintf(stderr, "%s: path too long\n", pbuf);
- exit((int)MANDOCLEVEL_BADARG);
- }
-
- strlcat(mdb.dbn, pbuf, PATH_MAX);
- sz1 = strlcat(mdb.dbn, MANDOC_DB, PATH_MAX);
- strlcat(mdb.idxn, pbuf, PATH_MAX);
- sz2 = strlcat(mdb.idxn, MANDOC_IDX, PATH_MAX);
-
- if (sz1 >= PATH_MAX || sz2 >= PATH_MAX) {
- fprintf(stderr, "%s: path too long\n", mdb.idxn);
- exit((int)MANDOCLEVEL_BADARG);
+ if (dbopen(1)) {
+ /*
+ * The existing database is usable. Process
+ * all files specified on the command-line.
+ */
+ use_all = 1;
+ for (i = 0; i < argc; i++)
+ filescan(argv[i]);
+ if (OP_TEST != op)
+ dbprune();
+ } else {
+ /*
+ * Database missing or corrupt.
+ * Recreate from scratch.
+ */
+ exitcode = (int)MANDOCLEVEL_OK;
+ op = OP_DEFAULT;
+ if (0 == treescan())
+ goto out;
+ if (0 == dbopen(0))
+ goto out;
}
+ if (OP_DELETE != op)
+ mpages_merge(mc, mp);
+ dbclose(OP_DEFAULT == op ? 0 : 1);
+ } else {
+ /*
+ * If we have arguments, use them as our manpaths.
+ * If we don't, grok from manpath(1) or however else
+ * manpath_parse() wants to do it.
+ */
+ if (argc > 0) {
+ dirs.paths = mandoc_reallocarray(NULL,
+ argc, sizeof(char *));
+ dirs.sz = (size_t)argc;
+ for (i = 0; i < argc; i++)
+ dirs.paths[i] = mandoc_strdup(argv[i]);
+ } else
+ manpath_parse(&dirs, path_arg, NULL, NULL);
- flags = O_CREAT | O_RDWR;
- mdb.db = dbopen(mdb.dbn, flags, 0644, DB_BTREE, &info);
- mdb.idx = dbopen(mdb.idxn, flags, 0644, DB_RECNO, NULL);
-
- if (NULL == mdb.db) {
- perror(mdb.dbn);
- exit((int)MANDOCLEVEL_SYSERR);
- } else if (NULL == mdb.idx) {
- perror(mdb.idxn);
- exit((int)MANDOCLEVEL_SYSERR);
+ if (0 == dirs.sz) {
+ exitcode = (int)MANDOCLEVEL_BADARG;
+ say("", "Empty manpath");
}
- ofile_argbuild(argc, argv, &of, pbuf);
-
- if (NULL == of)
- goto out;
-
- index_prune(of, &mdb, &recs);
-
/*
- * Go to the root of the respective manual tree.
- * This must work or no manuals may be found (they're
- * indexed relative to the root).
+ * First scan the tree rooted at a base directory, then
+ * build a new database and finally move it into place.
+ * Ignore zero-length directories and strip trailing
+ * slashes.
*/
+ for (j = 0; j < dirs.sz; j++) {
+ sz = strlen(dirs.paths[j]);
+ if (sz && '/' == dirs.paths[j][sz - 1])
+ dirs.paths[j][--sz] = '\0';
+ if (0 == sz)
+ continue;
- if (OP_UPDATE == op) {
- if (-1 == chdir(dir)) {
- perror(dir);
- exit((int)MANDOCLEVEL_SYSERR);
+ if (j) {
+ ohash_init(&mpages, 6, &mpages_info);
+ ohash_init(&mlinks, 6, &mlinks_info);
}
- index_merge(of, mp, &dbuf, &buf, hash,
- &mdb, &recs);
- }
- goto out;
- }
+ if (0 == set_basedir(dirs.paths[j]))
+ goto out;
+ if (0 == treescan())
+ goto out;
+ if (0 == dbopen(0))
+ goto out;
- /*
- * Configure the directories we're going to scan.
- * If we have command-line arguments, use them.
- * If not, we use man(1)'s method (see mandocdb.8).
- */
+ mpages_merge(mc, mp);
+ if (warnings && !nodb &&
+ ! (MPARSE_QUICK & mparse_options))
+ names_check();
+ dbclose(0);
- if (argc > 0) {
- dirs.paths = mandoc_calloc(argc, sizeof(char *));
- dirs.sz = argc;
- for (i = 0; i < argc; i++) {
- if (NULL == (cp = realpath(argv[i], pbuf))) {
- perror(argv[i]);
- goto out;
+ if (j + 1 < dirs.sz) {
+ mpages_free();
+ ohash_delete(&mpages);
+ ohash_delete(&mlinks);
}
- dirs.paths[i] = mandoc_strdup(cp);
}
- } else
- manpath_parse(&dirs, dir, NULL, NULL);
+ }
+out:
+ manpath_free(&dirs);
+ mchars_free(mc);
+ mparse_free(mp);
+ mpages_free();
+ ohash_delete(&mpages);
+ ohash_delete(&mlinks);
+ return(exitcode);
+usage:
+ fprintf(stderr, "usage: %s [-aDnpQ] [-C file] [-Tutf8]\n"
+ " %s [-aDnpQ] [-Tutf8] dir ...\n"
+ " %s [-DnpQ] [-Tutf8] -d dir [file ...]\n"
+ " %s [-Dnp] -u dir [file ...]\n"
+ " %s [-Q] -t file ...\n",
+ progname, progname, progname,
+ progname, progname);
- for (ipath = 0; ipath < dirs.sz; ipath++) {
+ return((int)MANDOCLEVEL_BADARG);
+}
- /*
- * Go to the root of the respective manual tree.
- * This must work or no manuals may be found:
- * They are indexed relative to the root.
- */
+/*
+ * Scan a directory tree rooted at "basedir" for manpages.
+ * We use fts(), scanning directory parts along the way for clues to our
+ * section and architecture.
+ *
+ * If use_all has been specified, grok all files.
+ * If not, sanitise paths to the following:
+ *
+ * [./]man*[/<arch>]/<name>.<section>
+ * or
+ * [./]cat<section>[/<arch>]/<name>.0
+ *
+ * TODO: accomodate for multi-language directories.
+ */
+static int
+treescan(void)
+{
+ char buf[PATH_MAX];
+ FTS *f;
+ FTSENT *ff;
+ struct mlink *mlink;
+ int dform, gzip;
+ char *dsec, *arch, *fsec, *cp;
+ const char *path;
+ const char *argv[2];
+
+ argv[0] = ".";
+ argv[1] = (char *)NULL;
+
+ f = fts_open((char * const *)argv,
+ FTS_PHYSICAL | FTS_NOCHDIR, NULL);
+ if (NULL == f) {
+ exitcode = (int)MANDOCLEVEL_SYSERR;
+ say("", "&fts_open");
+ return(0);
+ }
- if (-1 == chdir(dirs.paths[ipath])) {
- perror(dirs.paths[ipath]);
- exit((int)MANDOCLEVEL_SYSERR);
- }
+ dsec = arch = NULL;
+ dform = FORM_NONE;
- /* Create a new database in two temporary files. */
+ while (NULL != (ff = fts_read(f))) {
+ path = ff->fts_path + 2;
+ switch (ff->fts_info) {
- flags = O_CREAT | O_EXCL | O_RDWR;
- while (NULL == mdb.db) {
- strlcpy(mdb.dbn, MANDOC_DB, PATH_MAX);
- strlcat(mdb.dbn, ".XXXXXXXXXX", PATH_MAX);
- if (NULL == mktemp(mdb.dbn)) {
- perror(mdb.dbn);
- exit((int)MANDOCLEVEL_SYSERR);
- }
- mdb.db = dbopen(mdb.dbn, flags, 0644,
- DB_BTREE, &info);
- if (NULL == mdb.db && EEXIST != errno) {
- perror(mdb.dbn);
- exit((int)MANDOCLEVEL_SYSERR);
+ /*
+ * Symbolic links require various sanity checks,
+ * then get handled just like regular files.
+ */
+ case FTS_SL:
+ if (NULL == realpath(path, buf)) {
+ if (warnings)
+ say(path, "&realpath");
+ continue;
}
- }
- while (NULL == mdb.idx) {
- strlcpy(mdb.idxn, MANDOC_IDX, PATH_MAX);
- strlcat(mdb.idxn, ".XXXXXXXXXX", PATH_MAX);
- if (NULL == mktemp(mdb.idxn)) {
- perror(mdb.idxn);
- unlink(mdb.dbn);
- exit((int)MANDOCLEVEL_SYSERR);
+ if (strstr(buf, basedir) != buf) {
+ if (warnings) say("",
+ "%s: outside base directory", buf);
+ continue;
}
- mdb.idx = dbopen(mdb.idxn, flags, 0644,
- DB_RECNO, NULL);
- if (NULL == mdb.idx && EEXIST != errno) {
- perror(mdb.idxn);
- unlink(mdb.dbn);
- exit((int)MANDOCLEVEL_SYSERR);
+ /* Use logical inode to avoid mpages dupe. */
+ if (-1 == stat(path, ff->fts_statp)) {
+ if (warnings)
+ say(path, "&stat");
+ continue;
}
- }
+ /* FALLTHROUGH */
/*
- * Search for manuals and fill the new database.
+ * If we're a regular file, add an mlink by using the
+ * stored directory data and handling the filename.
*/
+ case FTS_F:
+ if (0 == strcmp(path, MANDOC_DB))
+ continue;
+ if ( ! use_all && ff->fts_level < 2) {
+ if (warnings)
+ say(path, "Extraneous file");
+ continue;
+ }
+ gzip = 0;
+ fsec = NULL;
+ while (NULL == fsec) {
+ fsec = strrchr(ff->fts_name, '.');
+ if (NULL == fsec || strcmp(fsec+1, "gz"))
+ break;
+ gzip = 1;
+ *fsec = '\0';
+ fsec = NULL;
+ }
+ if (NULL == fsec) {
+ if ( ! use_all) {
+ if (warnings)
+ say(path,
+ "No filename suffix");
+ continue;
+ }
+ } else if (0 == strcmp(++fsec, "html")) {
+ if (warnings)
+ say(path, "Skip html");
+ continue;
+ } else if (0 == strcmp(fsec, "ps")) {
+ if (warnings)
+ say(path, "Skip ps");
+ continue;
+ } else if (0 == strcmp(fsec, "pdf")) {
+ if (warnings)
+ say(path, "Skip pdf");
+ continue;
+ } else if ( ! use_all &&
+ ((FORM_SRC == dform && strcmp(fsec, dsec)) ||
+ (FORM_CAT == dform && strcmp(fsec, "0")))) {
+ if (warnings)
+ say(path, "Wrong filename suffix");
+ continue;
+ } else
+ fsec[-1] = '\0';
+
+ mlink = mandoc_calloc(1, sizeof(struct mlink));
+ if (strlcpy(mlink->file, path,
+ sizeof(mlink->file)) >=
+ sizeof(mlink->file)) {
+ say(path, "Filename too long");
+ free(mlink);
+ continue;
+ }
+ mlink->dform = dform;
+ mlink->dsec = dsec;
+ mlink->arch = arch;
+ mlink->name = ff->fts_name;
+ mlink->fsec = fsec;
+ mlink->gzip = gzip;
+ mlink_add(mlink, ff->fts_statp);
+ continue;
- ofile_dirbuild(".", "", "", 0, &of);
+ case FTS_D:
+ /* FALLTHROUGH */
+ case FTS_DP:
+ break;
- if (NULL != of) {
- index_merge(of, mp, &dbuf, &buf, hash,
- &mdb, &recs);
- ofile_free(of);
- of = NULL;
+ default:
+ if (warnings)
+ say(path, "Not a regular file");
+ continue;
}
- (*mdb.db->close)(mdb.db);
- (*mdb.idx->close)(mdb.idx);
- mdb.db = NULL;
- mdb.idx = NULL;
+ switch (ff->fts_level) {
+ case 0:
+ /* Ignore the root directory. */
+ break;
+ case 1:
+ /*
+ * This might contain manX/ or catX/.
+ * Try to infer this from the name.
+ * If we're not in use_all, enforce it.
+ */
+ cp = ff->fts_name;
+ if (FTS_DP == ff->fts_info)
+ break;
- /*
- * Replace the old database with the new one.
- * This is not perfectly atomic,
- * but i cannot think of a better way.
- */
+ if (0 == strncmp(cp, "man", 3)) {
+ dform = FORM_SRC;
+ dsec = cp + 3;
+ } else if (0 == strncmp(cp, "cat", 3)) {
+ dform = FORM_CAT;
+ dsec = cp + 3;
+ } else {
+ dform = FORM_NONE;
+ dsec = NULL;
+ }
- if (-1 == rename(mdb.dbn, MANDOC_DB)) {
- perror(MANDOC_DB);
- unlink(mdb.dbn);
- unlink(mdb.idxn);
- exit((int)MANDOCLEVEL_SYSERR);
- }
- if (-1 == rename(mdb.idxn, MANDOC_IDX)) {
- perror(MANDOC_IDX);
- unlink(MANDOC_DB);
- unlink(MANDOC_IDX);
- unlink(mdb.idxn);
- exit((int)MANDOCLEVEL_SYSERR);
+ if (NULL != dsec || use_all)
+ break;
+
+ if (warnings)
+ say(path, "Unknown directory part");
+ fts_set(f, ff, FTS_SKIP);
+ break;
+ case 2:
+ /*
+ * Possibly our architecture.
+ * If we're descending, keep tabs on it.
+ */
+ if (FTS_DP != ff->fts_info && NULL != dsec)
+ arch = ff->fts_name;
+ else
+ arch = NULL;
+ break;
+ default:
+ if (FTS_DP == ff->fts_info || use_all)
+ break;
+ if (warnings)
+ say(path, "Extraneous directory part");
+ fts_set(f, ff, FTS_SKIP);
+ break;
}
}
-out:
- if (mdb.db)
- (*mdb.db->close)(mdb.db);
- if (mdb.idx)
- (*mdb.idx->close)(mdb.idx);
- if (hash)
- (*hash->close)(hash);
- if (mp)
- mparse_free(mp);
+ fts_close(f);
+ return(1);
+}
- manpath_free(&dirs);
- ofile_free(of);
- free(buf.cp);
- free(dbuf.cp);
- free(recs.stack);
+/*
+ * Add a file to the mlinks table.
+ * Do not verify that it's a "valid" looking manpage (we'll do that
+ * later).
+ *
+ * Try to infer the manual section, architecture, and page name from the
+ * path, assuming it looks like
+ *
+ * [./]man*[/<arch>]/<name>.<section>
+ * or
+ * [./]cat<section>[/<arch>]/<name>.0
+ *
+ * See treescan() for the fts(3) version of this.
+ */
+static void
+filescan(const char *file)
+{
+ char buf[PATH_MAX];
+ struct stat st;
+ struct mlink *mlink;
+ char *p, *start;
- return(MANDOCLEVEL_OK);
+ assert(use_all);
-usage:
- fprintf(stderr,
- "usage: %s [-avvv] [-C file] | dir ... | -t file ...\n"
- " -d dir [file ...] | "
- "-u dir [file ...]\n",
- progname);
+ if (0 == strncmp(file, "./", 2))
+ file += 2;
- return((int)MANDOCLEVEL_BADARG);
-}
+ /*
+ * We have to do lstat(2) before realpath(3) loses
+ * the information whether this is a symbolic link.
+ * We need to know that because for symbolic links,
+ * we want to use the orginal file name, while for
+ * regular files, we want to use the real path.
+ */
+ if (-1 == lstat(file, &st)) {
+ exitcode = (int)MANDOCLEVEL_BADARG;
+ say(file, "&lstat");
+ return;
+ } else if (0 == ((S_IFREG | S_IFLNK) & st.st_mode)) {
+ exitcode = (int)MANDOCLEVEL_BADARG;
+ say(file, "Not a regular file");
+ return;
+ }
-void
-index_merge(const struct of *of, struct mparse *mp,
- struct buf *dbuf, struct buf *buf, DB *hash,
- struct mdb *mdb, struct recs *recs)
-{
- recno_t rec;
- int ch, skip;
- DBT key, val;
- DB *files; /* temporary file name table */
- struct mdoc *mdoc;
- struct man *man;
- const char *fn, *msec, *march, *mtitle;
- char *p;
- uint64_t mask;
- size_t sv;
- unsigned seq;
- uint64_t vbuf[2];
- char type;
-
- static char emptystring[] = "";
-
- if (warnings) {
- files = NULL;
- hash_reset(&files);
+ /*
+ * We have to resolve the file name to the real path
+ * in any case for the base directory check.
+ */
+ if (NULL == realpath(file, buf)) {
+ exitcode = (int)MANDOCLEVEL_BADARG;
+ say(file, "&realpath");
+ return;
}
- rec = 0;
- for (of = of->first; of; of = of->next) {
- fn = of->fname;
+ if (OP_TEST == op)
+ start = buf;
+ else if (strstr(buf, basedir) == buf)
+ start = buf + strlen(basedir);
+ else {
+ exitcode = (int)MANDOCLEVEL_BADARG;
+ say("", "%s: outside base directory", buf);
+ return;
+ }
- /*
- * Try interpreting the file as mdoc(7) or man(7)
- * source code, unless it is already known to be
- * formatted. Fall back to formatted mode.
- */
+ /*
+ * Now we are sure the file is inside our tree.
+ * If it is a symbolic link, ignore the real path
+ * and use the original name.
+ * This implies passing stuff like "cat1/../man1/foo.1"
+ * on the command line won't work. So don't do that.
+ * Note the stat(2) can still fail if the link target
+ * doesn't exist.
+ */
+ if (S_IFLNK & st.st_mode) {
+ if (-1 == stat(buf, &st)) {
+ exitcode = (int)MANDOCLEVEL_BADARG;
+ say(file, "&stat");
+ return;
+ }
+ if (strlcpy(buf, file, sizeof(buf)) >= sizeof(buf)) {
+ say(file, "Filename too long");
+ return;
+ }
+ start = buf;
+ if (OP_TEST != op && strstr(buf, basedir) == buf)
+ start += strlen(basedir);
+ }
- mparse_reset(mp);
- mdoc = NULL;
- man = NULL;
+ mlink = mandoc_calloc(1, sizeof(struct mlink));
+ if (strlcpy(mlink->file, start, sizeof(mlink->file)) >=
+ sizeof(mlink->file)) {
+ say(start, "Filename too long");
+ return;
+ }
- if ((MANDOC_SRC & of->src_form ||
- ! (MANDOC_FORM & of->src_form)) &&
- MANDOCLEVEL_FATAL > mparse_readfd(mp, -1, fn))
- mparse_result(mp, &mdoc, &man);
+ /*
+ * First try to guess our directory structure.
+ * If we find a separator, try to look for man* or cat*.
+ * If we find one of these and what's underneath is a directory,
+ * assume it's an architecture.
+ */
+ if (NULL != (p = strchr(start, '/'))) {
+ *p++ = '\0';
+ if (0 == strncmp(start, "man", 3)) {
+ mlink->dform = FORM_SRC;
+ mlink->dsec = start + 3;
+ } else if (0 == strncmp(start, "cat", 3)) {
+ mlink->dform = FORM_CAT;
+ mlink->dsec = start + 3;
+ }
- if (NULL != mdoc) {
- msec = mdoc_meta(mdoc)->msec;
- march = mdoc_meta(mdoc)->arch;
- if (NULL == march)
- march = "";
- mtitle = mdoc_meta(mdoc)->title;
- } else if (NULL != man) {
- msec = man_meta(man)->msec;
- march = "";
- mtitle = man_meta(man)->title;
- } else {
- msec = of->sec;
- march = of->arch;
- mtitle = of->title;
+ start = p;
+ if (NULL != mlink->dsec && NULL != (p = strchr(start, '/'))) {
+ *p++ = '\0';
+ mlink->arch = start;
+ start = p;
}
+ }
- /*
- * Check whether the manual section given in a file
- * agrees with the directory where the file is located.
- * Some manuals have suffixes like (3p) on their
- * section number either inside the file or in the
- * directory name, some are linked into more than one
- * section, like encrypt(1) = makekey(8). Do not skip
- * manuals for such reasons.
- */
+ /*
+ * Now check the file suffix.
+ * Suffix of `.0' indicates a catpage, `.1-9' is a manpage.
+ */
+ p = strrchr(start, '\0');
+ while (p-- > start && '/' != *p && '.' != *p)
+ /* Loop. */ ;
- skip = 0;
- assert(of->sec);
- assert(msec);
- if (warnings)
- if (strcasecmp(msec, of->sec))
- fprintf(stderr, "%s: "
- "section \"%s\" manual "
- "in \"%s\" directory\n",
- fn, msec, of->sec);
+ if ('.' == *p) {
+ *p++ = '\0';
+ mlink->fsec = p;
+ }
- /*
- * Manual page directories exist for each kernel
- * architecture as returned by machine(1).
- * However, many manuals only depend on the
- * application architecture as returned by arch(1).
- * For example, some (2/ARM) manuals are shared
- * across the "armish" and "zaurus" kernel
- * architectures.
- * A few manuals are even shared across completely
- * different architectures, for example fdformat(1)
- * on amd64, i386, sparc, and sparc64.
- * Thus, warn about architecture mismatches,
- * but don't skip manuals for this reason.
- */
+ /*
+ * Now try to parse the name.
+ * Use the filename portion of the path.
+ */
+ mlink->name = start;
+ if (NULL != (p = strrchr(start, '/'))) {
+ mlink->name = p + 1;
+ *p = '\0';
+ }
+ mlink_add(mlink, &st);
+}
- assert(of->arch);
- assert(march);
- if (warnings)
- if (strcasecmp(march, of->arch))
- fprintf(stderr, "%s: "
- "architecture \"%s\" manual "
- "in \"%s\" directory\n",
- fn, march, of->arch);
+static void
+mlink_add(struct mlink *mlink, const struct stat *st)
+{
+ struct inodev inodev;
+ struct mpage *mpage;
+ unsigned int slot;
+
+ assert(NULL != mlink->file);
+
+ mlink->dsec = mandoc_strdup(mlink->dsec ? mlink->dsec : "");
+ mlink->arch = mandoc_strdup(mlink->arch ? mlink->arch : "");
+ mlink->name = mandoc_strdup(mlink->name ? mlink->name : "");
+ mlink->fsec = mandoc_strdup(mlink->fsec ? mlink->fsec : "");
+
+ if ('0' == *mlink->fsec) {
+ free(mlink->fsec);
+ mlink->fsec = mandoc_strdup(mlink->dsec);
+ mlink->fform = FORM_CAT;
+ } else if ('1' <= *mlink->fsec && '9' >= *mlink->fsec)
+ mlink->fform = FORM_SRC;
+ else
+ mlink->fform = FORM_NONE;
+
+ slot = ohash_qlookup(&mlinks, mlink->file);
+ assert(NULL == ohash_find(&mlinks, slot));
+ ohash_insert(&mlinks, slot, mlink);
+
+ inodev.st_ino = st->st_ino;
+ inodev.st_dev = st->st_dev;
+ slot = ohash_lookup_memory(&mpages, (char *)&inodev,
+ sizeof(struct inodev), inodev.st_ino);
+ mpage = ohash_find(&mpages, slot);
+ if (NULL == mpage) {
+ mpage = mandoc_calloc(1, sizeof(struct mpage));
+ mpage->inodev.st_ino = inodev.st_ino;
+ mpage->inodev.st_dev = inodev.st_dev;
+ ohash_insert(&mpages, slot, mpage);
+ } else
+ mlink->next = mpage->mlinks;
+ mpage->mlinks = mlink;
+ mlink->mpage = mpage;
+}
- /*
- * By default, skip a file if the title given
- * in the file disagrees with the file name.
- * Do not warn, this happens for all MLINKs.
- */
+static void
+mlink_free(struct mlink *mlink)
+{
- assert(of->title);
- assert(mtitle);
- if (strcasecmp(mtitle, of->title))
- skip = 1;
+ free(mlink->dsec);
+ free(mlink->arch);
+ free(mlink->name);
+ free(mlink->fsec);
+ free(mlink);
+}
- /*
- * Build a title string for the file. If it matches
- * the location of the file, remember the title as
- * found; else, remember it as missing.
- */
+static void
+mpages_free(void)
+{
+ struct mpage *mpage;
+ struct mlink *mlink;
+ unsigned int slot;
+
+ mpage = ohash_first(&mpages, &slot);
+ while (NULL != mpage) {
+ while (NULL != (mlink = mpage->mlinks)) {
+ mpage->mlinks = mlink->next;
+ mlink_free(mlink);
+ }
+ free(mpage->sec);
+ free(mpage->arch);
+ free(mpage->title);
+ free(mpage->desc);
+ free(mpage);
+ mpage = ohash_next(&mpages, &slot);
+ }
+}
- if (warnings) {
- buf->len = 0;
- buf_appendb(buf, mtitle, strlen(mtitle));
- buf_appendb(buf, "(", 1);
- buf_appendb(buf, msec, strlen(msec));
- if ('\0' != *march) {
- buf_appendb(buf, "/", 1);
- buf_appendb(buf, march, strlen(march));
- }
- buf_appendb(buf, ")", 2);
- for (p = buf->cp; '\0' != *p; p++)
- *p = tolower((unsigned char)*p);
- key.data = buf->cp;
- key.size = buf->len;
- val.data = NULL;
- val.size = 0;
- if (0 == skip)
- val.data = emptystring;
- else {
- ch = (*files->get)(files, &key, &val, 0);
- if (ch < 0) {
- perror("hash");
- exit((int)MANDOCLEVEL_SYSERR);
- } else if (ch > 0) {
- val.data = (void *)fn;
- val.size = strlen(fn) + 1;
- } else
- val.data = NULL;
- }
- if (NULL != val.data &&
- (*files->put)(files, &key, &val, 0) < 0) {
- perror("hash");
- exit((int)MANDOCLEVEL_SYSERR);
- }
+/*
+ * For each mlink to the mpage, check whether the path looks like
+ * it is formatted, and if it does, check whether a source manual
+ * exists by the same name, ignoring the suffix.
+ * If both conditions hold, drop the mlink.
+ */
+static void
+mlinks_undupe(struct mpage *mpage)
+{
+ char buf[PATH_MAX];
+ struct mlink **prev;
+ struct mlink *mlink;
+ char *bufp;
+
+ mpage->form = FORM_CAT;
+ prev = &mpage->mlinks;
+ while (NULL != (mlink = *prev)) {
+ if (FORM_CAT != mlink->dform) {
+ mpage->form = FORM_NONE;
+ goto nextlink;
}
+ (void)strlcpy(buf, mlink->file, sizeof(buf));
+ bufp = strstr(buf, "cat");
+ assert(NULL != bufp);
+ memcpy(bufp, "man", 3);
+ if (NULL != (bufp = strrchr(buf, '.')))
+ *++bufp = '\0';
+ (void)strlcat(buf, mlink->dsec, sizeof(buf));
+ if (NULL == ohash_find(&mlinks,
+ ohash_qlookup(&mlinks, buf)))
+ goto nextlink;
+ if (warnings)
+ say(mlink->file, "Man source exists: %s", buf);
+ if (use_all)
+ goto nextlink;
+ *prev = mlink->next;
+ mlink_free(mlink);
+ continue;
+nextlink:
+ prev = &(*prev)->next;
+ }
+}
- if (skip && !use_all)
- continue;
+static void
+mlink_check(struct mpage *mpage, struct mlink *mlink)
+{
+ struct str *str;
+ unsigned int slot;
- /*
- * The index record value consists of a nil-terminated
- * filename, a nil-terminated manual section, and a
- * nil-terminated description. Use the actual
- * location of the file, such that the user can find
- * it with man(1). Since the description may not be
- * set, we set a sentinel to see if we're going to
- * write a nil byte in its place.
- */
+ /*
+ * Check whether the manual section given in a file
+ * agrees with the directory where the file is located.
+ * Some manuals have suffixes like (3p) on their
+ * section number either inside the file or in the
+ * directory name, some are linked into more than one
+ * section, like encrypt(1) = makekey(8).
+ */
- dbuf->len = 0;
- type = mdoc ? 'd' : (man ? 'a' : 'c');
- buf_appendb(dbuf, &type, 1);
- buf_appendb(dbuf, fn, strlen(fn) + 1);
- buf_appendb(dbuf, of->sec, strlen(of->sec) + 1);
- buf_appendb(dbuf, of->title, strlen(of->title) + 1);
- buf_appendb(dbuf, of->arch, strlen(of->arch) + 1);
+ if (FORM_SRC == mpage->form &&
+ strcasecmp(mpage->sec, mlink->dsec))
+ say(mlink->file, "Section \"%s\" manual in %s directory",
+ mpage->sec, mlink->dsec);
- sv = dbuf->len;
+ /*
+ * Manual page directories exist for each kernel
+ * architecture as returned by machine(1).
+ * However, many manuals only depend on the
+ * application architecture as returned by arch(1).
+ * For example, some (2/ARM) manuals are shared
+ * across the "armish" and "zaurus" kernel
+ * architectures.
+ * A few manuals are even shared across completely
+ * different architectures, for example fdformat(1)
+ * on amd64, i386, sparc, and sparc64.
+ */
- /*
- * Collect keyword/mask pairs.
- * Each pair will become a new btree node.
- */
+ if (strcasecmp(mpage->arch, mlink->arch))
+ say(mlink->file, "Architecture \"%s\" manual in "
+ "\"%s\" directory", mpage->arch, mlink->arch);
- hash_reset(&hash);
- if (mdoc)
- pmdoc_node(hash, buf, dbuf,
- mdoc_node(mdoc), mdoc_meta(mdoc));
- else if (man)
- pman_node(hash, buf, dbuf, man_node(man));
- else
- pformatted(hash, buf, dbuf, of);
+ /*
+ * XXX
+ * parse_cat() doesn't set NAME_TITLE yet.
+ */
+
+ if (FORM_CAT == mpage->form)
+ return;
+
+ /*
+ * Check whether this mlink
+ * appears as a name in the NAME section.
+ */
- /* Test mode, do not access any database. */
+ slot = ohash_qlookup(&names, mlink->name);
+ str = ohash_find(&names, slot);
+ assert(NULL != str);
+ if ( ! (NAME_TITLE & str->mask))
+ say(mlink->file, "Name missing in NAME section");
+}
- if (NULL == mdb->db || NULL == mdb->idx)
+/*
+ * Run through the files in the global vector "mpages"
+ * and add them to the database specified in "basedir".
+ *
+ * This handles the parsing scheme itself, using the cues of directory
+ * and filename to determine whether the file is parsable or not.
+ */
+static void
+mpages_merge(struct mchars *mc, struct mparse *mp)
+{
+ char any[] = "any";
+ struct ohash_info str_info;
+ int fd[2];
+ struct mpage *mpage, *mpage_dest;
+ struct mlink *mlink, *mlink_dest;
+ struct mdoc *mdoc;
+ struct man *man;
+ char *sodest;
+ char *cp;
+ pid_t child_pid;
+ int status;
+ unsigned int pslot;
+ enum mandoclevel lvl;
+
+ str_info.alloc = hash_alloc;
+ str_info.calloc = hash_calloc;
+ str_info.free = hash_free;
+ str_info.key_offset = offsetof(struct str, key);
+
+ if (0 == nodb)
+ SQL_EXEC("BEGIN TRANSACTION");
+
+ mpage = ohash_first(&mpages, &pslot);
+ while (NULL != mpage) {
+ mlinks_undupe(mpage);
+ if (NULL == mpage->mlinks) {
+ mpage = ohash_next(&mpages, &pslot);
continue;
+ }
- /*
- * Make sure the file name is always registered
- * as an .Nm search key.
- */
- buf->len = 0;
- buf_append(buf, of->title);
- hash_put(hash, buf, TYPE_Nm);
+ name_mask = NAME_MASK;
+ ohash_init(&names, 4, &str_info);
+ ohash_init(&strings, 6, &str_info);
+ mparse_reset(mp);
+ mdoc = NULL;
+ man = NULL;
+ sodest = NULL;
+ child_pid = 0;
+ fd[0] = -1;
+ fd[1] = -1;
+
+ if (mpage->mlinks->gzip) {
+ if (-1 == pipe(fd)) {
+ exitcode = (int)MANDOCLEVEL_SYSERR;
+ say(mpage->mlinks->file, "&pipe gunzip");
+ goto nextpage;
+ }
+ switch (child_pid = fork()) {
+ case -1:
+ exitcode = (int)MANDOCLEVEL_SYSERR;
+ say(mpage->mlinks->file, "&fork gunzip");
+ child_pid = 0;
+ close(fd[1]);
+ close(fd[0]);
+ goto nextpage;
+ case 0:
+ close(fd[0]);
+ if (-1 == dup2(fd[1], STDOUT_FILENO)) {
+ say(mpage->mlinks->file,
+ "&dup gunzip");
+ exit(1);
+ }
+ execlp("gunzip", "gunzip", "-c",
+ mpage->mlinks->file, NULL);
+ say(mpage->mlinks->file, "&exec gunzip");
+ exit(1);
+ default:
+ close(fd[1]);
+ break;
+ }
+ }
/*
- * Reclaim an empty index record, if available.
- * Use its record number for all new btree nodes.
+ * Try interpreting the file as mdoc(7) or man(7)
+ * source code, unless it is already known to be
+ * formatted. Fall back to formatted mode.
*/
+ if (FORM_CAT != mpage->mlinks->dform ||
+ FORM_CAT != mpage->mlinks->fform) {
+ lvl = mparse_readfd(mp, fd[0], mpage->mlinks->file);
+ if (lvl < MANDOCLEVEL_FATAL)
+ mparse_result(mp, &mdoc, &man, &sodest);
+ }
- if (recs->cur > 0) {
- recs->cur--;
- rec = recs->stack[(int)recs->cur];
- } else if (recs->last > 0) {
- rec = recs->last;
- recs->last = 0;
- } else
- rec++;
- vbuf[1] = htobe64(rec);
+ if (NULL != sodest) {
+ mlink_dest = ohash_find(&mlinks,
+ ohash_qlookup(&mlinks, sodest));
+ if (NULL != mlink_dest) {
- /*
- * Copy from the in-memory hashtable of pending
- * keyword/mask pairs into the database.
- */
+ /* The .so target exists. */
- seq = R_FIRST;
- while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) {
- seq = R_NEXT;
- assert(sizeof(uint64_t) == val.size);
- memcpy(&mask, val.data, val.size);
- vbuf[0] = htobe64(mask);
- val.size = sizeof(vbuf);
- val.data = &vbuf;
- dbt_put(mdb->db, mdb->dbn, &key, &val);
- }
- if (ch < 0) {
- perror("hash");
- unlink(mdb->dbn);
- unlink(mdb->idxn);
- exit((int)MANDOCLEVEL_SYSERR);
- }
+ mpage_dest = mlink_dest->mpage;
+ mlink = mpage->mlinks;
+ while (1) {
+ mlink->mpage = mpage_dest;
- /*
- * Apply to the index. If we haven't had a description
- * set, put an empty one in now.
- */
+ /*
+ * If the target was already
+ * processed, add the links
+ * to the database now.
+ * Otherwise, this will
+ * happen when we come
+ * to the target.
+ */
- if (dbuf->len == sv)
- buf_appendb(dbuf, "", 1);
+ if (mpage_dest->pageid)
+ dbadd_mlink(mlink);
- key.data = &rec;
- key.size = sizeof(recno_t);
+ if (NULL == mlink->next)
+ break;
+ mlink = mlink->next;
+ }
- val.data = dbuf->cp;
- val.size = dbuf->len;
+ /* Move all links to the target. */
- if (verb)
- printf("%s: adding to index\n", fn);
+ mlink->next = mlink_dest->next;
+ mlink_dest->next = mpage->mlinks;
+ mpage->mlinks = NULL;
+ }
+ goto nextpage;
+ } else if (NULL != mdoc) {
+ mpage->form = FORM_SRC;
+ mpage->sec = mdoc_meta(mdoc)->msec;
+ mpage->sec = mandoc_strdup(
+ NULL == mpage->sec ? "" : mpage->sec);
+ mpage->arch = mdoc_meta(mdoc)->arch;
+ mpage->arch = mandoc_strdup(
+ NULL == mpage->arch ? "" : mpage->arch);
+ mpage->title =
+ mandoc_strdup(mdoc_meta(mdoc)->title);
+ } else if (NULL != man) {
+ mpage->form = FORM_SRC;
+ mpage->sec =
+ mandoc_strdup(man_meta(man)->msec);
+ mpage->arch =
+ mandoc_strdup(mpage->mlinks->arch);
+ mpage->title =
+ mandoc_strdup(man_meta(man)->title);
+ } else {
+ mpage->form = FORM_CAT;
+ mpage->sec =
+ mandoc_strdup(mpage->mlinks->dsec);
+ mpage->arch =
+ mandoc_strdup(mpage->mlinks->arch);
+ mpage->title =
+ mandoc_strdup(mpage->mlinks->name);
+ }
+ putkey(mpage, mpage->sec, TYPE_sec);
+ putkey(mpage, '\0' == *mpage->arch ?
+ any : mpage->arch, TYPE_arch);
+
+ for (mlink = mpage->mlinks; mlink; mlink = mlink->next) {
+ if ('\0' != *mlink->dsec)
+ putkey(mpage, mlink->dsec, TYPE_sec);
+ if ('\0' != *mlink->fsec)
+ putkey(mpage, mlink->fsec, TYPE_sec);
+ putkey(mpage, '\0' == *mlink->arch ?
+ any : mlink->arch, TYPE_arch);
+ putkey(mpage, mlink->name, NAME_FILE);
+ }
- dbt_put(mdb->idx, mdb->idxn, &key, &val);
+ assert(NULL == mpage->desc);
+ if (NULL != mdoc) {
+ if (NULL != (cp = mdoc_meta(mdoc)->name))
+ putkey(mpage, cp, NAME_HEAD);
+ parse_mdoc(mpage, mdoc_node(mdoc));
+ } else if (NULL != man)
+ parse_man(mpage, man_node(man));
+ else
+ parse_cat(mpage, fd[0]);
+ if (NULL == mpage->desc)
+ mpage->desc = mandoc_strdup(mpage->mlinks->name);
+
+ if (warnings && !use_all)
+ for (mlink = mpage->mlinks; mlink;
+ mlink = mlink->next)
+ mlink_check(mpage, mlink);
+
+ dbadd(mpage, mc);
+
+nextpage:
+ if (child_pid) {
+ if (-1 == waitpid(child_pid, &status, 0)) {
+ exitcode = (int)MANDOCLEVEL_SYSERR;
+ say(mpage->mlinks->file, "&wait gunzip");
+ } else if (WIFSIGNALED(status)) {
+ exitcode = (int)MANDOCLEVEL_SYSERR;
+ say(mpage->mlinks->file,
+ "gunzip died from signal %d",
+ WTERMSIG(status));
+ } else if (WEXITSTATUS(status)) {
+ exitcode = (int)MANDOCLEVEL_SYSERR;
+ say(mpage->mlinks->file,
+ "gunzip failed with code %d",
+ WEXITSTATUS(status));
+ }
+ }
+ ohash_delete(&strings);
+ ohash_delete(&names);
+ mpage = ohash_next(&mpages, &pslot);
}
- /*
- * Iterate the remembered file titles and check that
- * all files can be found by their main title.
- */
+ if (0 == nodb)
+ SQL_EXEC("END TRANSACTION");
+}
- if (warnings) {
- seq = R_FIRST;
- while (0 == (*files->seq)(files, &key, &val, seq)) {
- seq = R_NEXT;
- if (val.size)
- fprintf(stderr, "%s: probably "
- "unreachable, title is %s\n",
- (char *)val.data, (char *)key.data);
- }
- (*files->close)(files);
+static void
+names_check(void)
+{
+ sqlite3_stmt *stmt;
+ const char *name, *sec, *arch, *key;
+ int irc;
+
+ sqlite3_prepare_v2(db,
+ "SELECT name, sec, arch, key FROM ("
+ "SELECT name AS key, pageid FROM names "
+ "WHERE bits & ? AND NOT EXISTS ("
+ "SELECT pageid FROM mlinks "
+ "WHERE mlinks.pageid == names.pageid "
+ "AND mlinks.name == names.name"
+ ")"
+ ") JOIN ("
+ "SELECT sec, arch, name, pageid FROM mlinks "
+ "GROUP BY pageid"
+ ") USING (pageid);",
+ -1, &stmt, NULL);
+
+ if (SQLITE_OK != sqlite3_bind_int64(stmt, 1, NAME_TITLE))
+ say("", "%s", sqlite3_errmsg(db));
+
+ while (SQLITE_ROW == (irc = sqlite3_step(stmt))) {
+ name = (const char *)sqlite3_column_text(stmt, 0);
+ sec = (const char *)sqlite3_column_text(stmt, 1);
+ arch = (const char *)sqlite3_column_text(stmt, 2);
+ key = (const char *)sqlite3_column_text(stmt, 3);
+ say("", "%s(%s%s%s) lacks mlink \"%s\"", name, sec,
+ '\0' == *arch ? "" : "/",
+ '\0' == *arch ? "" : arch, key);
}
+ sqlite3_finalize(stmt);
}
-/*
- * Scan through all entries in the index file `idx' and prune those
- * entries in `ofile'.
- * Pruning consists of removing from `db', then invalidating the entry
- * in `idx' (zeroing its value size).
- */
static void
-index_prune(const struct of *ofile, struct mdb *mdb, struct recs *recs)
+parse_cat(struct mpage *mpage, int fd)
{
- const struct of *of;
- const char *fn;
- uint64_t vbuf[2];
- unsigned seq, sseq;
- DBT key, val;
- int ch;
+ FILE *stream;
+ char *line, *p, *title;
+ size_t len, plen, titlesz;
- recs->cur = 0;
- seq = R_FIRST;
- while (0 == (ch = (*mdb->idx->seq)(mdb->idx, &key, &val, seq))) {
- seq = R_NEXT;
- assert(sizeof(recno_t) == key.size);
- memcpy(&recs->last, key.data, key.size);
+ stream = (-1 == fd) ?
+ fopen(mpage->mlinks->file, "r") :
+ fdopen(fd, "r");
+ if (NULL == stream) {
+ if (warnings)
+ say(mpage->mlinks->file, "&fopen");
+ return;
+ }
- /* Deleted records are zero-sized. Skip them. */
+ /* Skip to first blank line. */
- if (0 == val.size)
- goto cont;
+ while (NULL != (line = fgetln(stream, &len)))
+ if ('\n' == *line)
+ break;
- /*
- * Make sure we're sane.
- * Read past our mdoc/man/cat type to the next string,
- * then make sure it's bounded by a NUL.
- * Failing any of these, we go into our error handler.
- */
+ /*
+ * Assume the first line that is not indented
+ * is the first section header. Skip to it.
+ */
- fn = (char *)val.data + 1;
- if (NULL == memchr(fn, '\0', val.size - 1))
+ while (NULL != (line = fgetln(stream, &len)))
+ if ('\n' != *line && ' ' != *line)
break;
- /*
- * Search for the file in those we care about.
- * XXX: build this into a tree. Too slow.
- */
+ /*
+ * Read up until the next section into a buffer.
+ * Strip the leading and trailing newline from each read line,
+ * appending a trailing space.
+ * Ignore empty (whitespace-only) lines.
+ */
- for (of = ofile->first; of; of = of->next)
- if (0 == strcmp(fn, of->fname))
- break;
+ titlesz = 0;
+ title = NULL;
- if (NULL == of)
+ while (NULL != (line = fgetln(stream, &len))) {
+ if (' ' != *line || '\n' != line[len - 1])
+ break;
+ while (len > 0 && isspace((unsigned char)*line)) {
+ line++;
+ len--;
+ }
+ if (1 == len)
continue;
+ title = mandoc_realloc(title, titlesz + len);
+ memcpy(title + titlesz, line, len);
+ titlesz += len;
+ title[titlesz - 1] = ' ';
+ }
- /*
- * Search through the keyword database, throwing out all
- * references to our file.
- */
+ /*
+ * If no page content can be found, or the input line
+ * is already the next section header, or there is no
+ * trailing newline, reuse the page title as the page
+ * description.
+ */
- sseq = R_FIRST;
- while (0 == (ch = (*mdb->db->seq)(mdb->db,
- &key, &val, sseq))) {
- sseq = R_NEXT;
- if (sizeof(vbuf) != val.size)
- break;
+ if (NULL == title || '\0' == *title) {
+ if (warnings)
+ say(mpage->mlinks->file,
+ "Cannot find NAME section");
+ fclose(stream);
+ free(title);
+ return;
+ }
- memcpy(vbuf, val.data, val.size);
- if (recs->last != betoh64(vbuf[1]))
- continue;
+ title = mandoc_realloc(title, titlesz + 1);
+ title[titlesz] = '\0';
- if ((ch = (*mdb->db->del)(mdb->db,
- &key, R_CURSOR)) < 0)
- break;
- }
+ /*
+ * Skip to the first dash.
+ * Use the remaining line as the description (no more than 70
+ * bytes).
+ */
- if (ch < 0) {
- perror(mdb->dbn);
- exit((int)MANDOCLEVEL_SYSERR);
- } else if (1 != ch) {
- fprintf(stderr, "%s: corrupt database\n",
- mdb->dbn);
- exit((int)MANDOCLEVEL_SYSERR);
- }
+ if (NULL != (p = strstr(title, "- "))) {
+ for (p += 2; ' ' == *p || '\b' == *p; p++)
+ /* Skip to next word. */ ;
+ } else {
+ if (warnings)
+ say(mpage->mlinks->file,
+ "No dash in title line");
+ p = title;
+ }
- if (verb)
- printf("%s: deleting from index\n", fn);
+ plen = strlen(p);
- val.size = 0;
- ch = (*mdb->idx->put)(mdb->idx, &key, &val, R_CURSOR);
+ /* Strip backspace-encoding from line. */
- if (ch < 0)
- break;
-cont:
- if (recs->cur >= recs->size) {
- recs->size += MANDOC_SLOP;
- recs->stack = mandoc_realloc(recs->stack,
- recs->size * sizeof(recno_t));
+ while (NULL != (line = memchr(p, '\b', plen))) {
+ len = line - p;
+ if (0 == len) {
+ memmove(line, line + 1, plen--);
+ continue;
}
-
- recs->stack[(int)recs->cur] = recs->last;
- recs->cur++;
- }
-
- if (ch < 0) {
- perror(mdb->idxn);
- exit((int)MANDOCLEVEL_SYSERR);
- } else if (1 != ch) {
- fprintf(stderr, "%s: corrupt index\n", mdb->idxn);
- exit((int)MANDOCLEVEL_SYSERR);
+ memmove(line - 1, line + 1, plen - len);
+ plen -= 2;
}
- recs->last++;
+ mpage->desc = mandoc_strdup(p);
+ fclose(stream);
+ free(title);
}
/*
- * Grow the buffer (if necessary) and copy in a binary string.
+ * Put a type/word pair into the word database for this particular file.
*/
static void
-buf_appendb(struct buf *buf, const void *cp, size_t sz)
+putkey(const struct mpage *mpage, char *value, uint64_t type)
{
+ char *cp;
+
+ assert(NULL != value);
+ if (TYPE_arch == type)
+ for (cp = value; *cp; cp++)
+ if (isupper((unsigned char)*cp))
+ *cp = _tolower((unsigned char)*cp);
+ putkeys(mpage, value, strlen(value), type);
+}
- /* Overshoot by MANDOC_BUFSZ. */
+/*
+ * Grok all nodes at or below a certain mdoc node into putkey().
+ */
+static void
+putmdockey(const struct mpage *mpage,
+ const struct mdoc_node *n, uint64_t m)
+{
- while (buf->len + sz >= buf->size) {
- buf->size = buf->len + sz + MANDOC_BUFSZ;
- buf->cp = mandoc_realloc(buf->cp, buf->size);
+ for ( ; NULL != n; n = n->next) {
+ if (NULL != n->child)
+ putmdockey(mpage, n->child, m);
+ if (MDOC_TEXT == n->type)
+ putkey(mpage, n->string, m);
}
-
- memcpy(buf->cp + (int)buf->len, cp, sz);
- buf->len += sz;
}
-/*
- * Append a nil-terminated string to the buffer.
- * This can be invoked multiple times.
- * The buffer string will be nil-terminated.
- * If invoked multiple times, a space is put between strings.
- */
static void
-buf_append(struct buf *buf, const char *cp)
+parse_man(struct mpage *mpage, const struct man_node *n)
{
+ const struct man_node *head, *body;
+ char *start, *title;
+ char byte;
size_t sz;
- if (0 == (sz = strlen(cp)))
+ if (NULL == n)
return;
- if (buf->len)
- buf->cp[(int)buf->len - 1] = ' ';
+ /*
+ * We're only searching for one thing: the first text child in
+ * the BODY of a NAME section. Since we don't keep track of
+ * sections in -man, run some hoops to find out whether we're in
+ * the correct section or not.
+ */
- buf_appendb(buf, cp, sz + 1);
-}
+ if (MAN_BODY == n->type && MAN_SH == n->tok) {
+ body = n;
+ assert(body->parent);
+ if (NULL != (head = body->parent->head) &&
+ 1 == head->nchild &&
+ NULL != (head = (head->child)) &&
+ MAN_TEXT == head->type &&
+ 0 == strcmp(head->string, "NAME") &&
+ NULL != body->child) {
-/*
- * Recursively add all text from a given node.
- * This is optimised for general mdoc nodes in this context, which do
- * not consist of subexpressions and having a recursive call for n->next
- * would be wasteful.
- * The "f" variable should be 0 unless called from pmdoc_Nd for the
- * description buffer, which does not start at the beginning of the
- * buffer.
- */
-static void
-buf_appendmdoc(struct buf *buf, const struct mdoc_node *n, int f)
-{
+ /*
+ * Suck the entire NAME section into memory.
+ * Yes, we might run away.
+ * But too many manuals have big, spread-out
+ * NAME sections over many lines.
+ */
- for ( ; n; n = n->next) {
- if (n->child)
- buf_appendmdoc(buf, n->child, f);
+ title = NULL;
+ man_deroff(&title, body);
+ if (NULL == title)
+ return;
- if (MDOC_TEXT == n->type && f) {
- f = 0;
- buf_appendb(buf, n->string,
- strlen(n->string) + 1);
- } else if (MDOC_TEXT == n->type)
- buf_append(buf, n->string);
+ /*
+ * Go through a special heuristic dance here.
+ * Conventionally, one or more manual names are
+ * comma-specified prior to a whitespace, then a
+ * dash, then a description. Try to puzzle out
+ * the name parts here.
+ */
- }
-}
+ start = title;
+ for ( ;; ) {
+ sz = strcspn(start, " ,");
+ if ('\0' == start[sz])
+ break;
-static void
-hash_reset(DB **db)
-{
- DB *hash;
+ byte = start[sz];
+ start[sz] = '\0';
- if (NULL != (hash = *db))
- (*hash->close)(hash);
+ /*
+ * Assume a stray trailing comma in the
+ * name list if a name begins with a dash.
+ */
- *db = dbopen(NULL, O_CREAT|O_RDWR, 0644, DB_HASH, NULL);
- if (NULL == *db) {
- perror("hash");
- exit((int)MANDOCLEVEL_SYSERR);
- }
-}
+ if ('-' == start[0] ||
+ ('\\' == start[0] && '-' == start[1]))
+ break;
-/* ARGSUSED */
-static int
-pmdoc_head(MDOC_ARGS)
-{
+ putkey(mpage, start, NAME_TITLE);
- return(MDOC_HEAD == n->type);
+ if (' ' == byte) {
+ start += sz + 1;
+ break;
+ }
+
+ assert(',' == byte);
+ start += sz + 1;
+ while (' ' == *start)
+ start++;
+ }
+
+ if (start == title) {
+ putkey(mpage, start, NAME_TITLE);
+ free(title);
+ return;
+ }
+
+ while (isspace((unsigned char)*start))
+ start++;
+
+ if (0 == strncmp(start, "-", 1))
+ start += 1;
+ else if (0 == strncmp(start, "\\-\\-", 4))
+ start += 4;
+ else if (0 == strncmp(start, "\\-", 2))
+ start += 2;
+ else if (0 == strncmp(start, "\\(en", 4))
+ start += 4;
+ else if (0 == strncmp(start, "\\(em", 4))
+ start += 4;
+
+ while (' ' == *start)
+ start++;
+
+ mpage->desc = mandoc_strdup(start);
+ free(title);
+ return;
+ }
+ }
+
+ for (n = n->child; n; n = n->next) {
+ if (NULL != mpage->desc)
+ break;
+ parse_man(mpage, n);
+ }
}
-/* ARGSUSED */
-static int
-pmdoc_body(MDOC_ARGS)
+static void
+parse_mdoc(struct mpage *mpage, const struct mdoc_node *n)
{
- return(MDOC_BODY == n->type);
+ assert(NULL != n);
+ for (n = n->child; NULL != n; n = n->next) {
+ switch (n->type) {
+ case MDOC_ELEM:
+ /* FALLTHROUGH */
+ case MDOC_BLOCK:
+ /* FALLTHROUGH */
+ case MDOC_HEAD:
+ /* FALLTHROUGH */
+ case MDOC_BODY:
+ /* FALLTHROUGH */
+ case MDOC_TAIL:
+ if (NULL != mdocs[n->tok].fp)
+ if (0 == (*mdocs[n->tok].fp)(mpage, n))
+ break;
+ if (mdocs[n->tok].mask)
+ putmdockey(mpage, n->child,
+ mdocs[n->tok].mask);
+ break;
+ default:
+ assert(MDOC_ROOT != n->type);
+ continue;
+ }
+ if (NULL != n->child)
+ parse_mdoc(mpage, n);
+ }
}
-/* ARGSUSED */
static int
-pmdoc_Fd(MDOC_ARGS)
+parse_mdoc_Fd(struct mpage *mpage, const struct mdoc_node *n)
{
const char *start, *end;
size_t sz;
- if (SEC_SYNOPSIS != n->sec)
- return(0);
- if (NULL == (n = n->child) || MDOC_TEXT != n->type)
+ if (SEC_SYNOPSIS != n->sec ||
+ NULL == (n = n->child) ||
+ MDOC_TEXT != n->type)
return(0);
/*
* Only consider those `Fd' macro fields that begin with an
* "inclusion" token (versus, e.g., #define).
*/
+
if (strcmp("#include", n->string))
return(0);
@@ -1131,884 +1645,847 @@ pmdoc_Fd(MDOC_ARGS)
if ('>' == *end || '"' == *end)
end--;
- assert(end >= start);
-
- buf_appendb(buf, start, (size_t)(end - start + 1));
- buf_appendb(buf, "", 1);
- return(1);
-}
-
-/* ARGSUSED */
-static int
-pmdoc_In(MDOC_ARGS)
-{
-
- if (NULL == n->child || MDOC_TEXT != n->child->type)
- return(0);
-
- buf_append(buf, n->child->string);
- return(1);
+ if (end > start)
+ putkeys(mpage, start, end - start + 1, TYPE_In);
+ return(0);
}
-/* ARGSUSED */
static int
-pmdoc_Fn(MDOC_ARGS)
+parse_mdoc_Fn(struct mpage *mpage, const struct mdoc_node *n)
{
- struct mdoc_node *nn;
- const char *cp;
-
- nn = n->child;
+ char *cp;
- if (NULL == nn || MDOC_TEXT != nn->type)
+ if (NULL == (n = n->child) || MDOC_TEXT != n->type)
return(0);
- /* .Fn "struct type *name" "char *arg" */
-
- cp = strrchr(nn->string, ' ');
- if (NULL == cp)
- cp = nn->string;
+ /*
+ * Parse: .Fn "struct type *name" "char *arg".
+ * First strip away pointer symbol.
+ * Then store the function name, then type.
+ * Finally, store the arguments.
+ */
- /* Strip away pointer symbol. */
+ if (NULL == (cp = strrchr(n->string, ' ')))
+ cp = n->string;
while ('*' == *cp)
cp++;
- /* Store the function name. */
-
- buf_append(buf, cp);
- hash_put(hash, buf, TYPE_Fn);
+ putkey(mpage, cp, TYPE_Fn);
- /* Store the function type. */
+ if (n->string < cp)
+ putkeys(mpage, n->string, cp - n->string, TYPE_Ft);
- if (nn->string < cp) {
- buf->len = 0;
- buf_appendb(buf, nn->string, cp - nn->string);
- buf_appendb(buf, "", 1);
- hash_put(hash, buf, TYPE_Ft);
- }
-
- /* Store the arguments. */
-
- for (nn = nn->next; nn; nn = nn->next) {
- if (MDOC_TEXT != nn->type)
- continue;
- buf->len = 0;
- buf_append(buf, nn->string);
- hash_put(hash, buf, TYPE_Fa);
- }
+ for (n = n->next; NULL != n; n = n->next)
+ if (MDOC_TEXT == n->type)
+ putkey(mpage, n->string, TYPE_Fa);
return(0);
}
-/* ARGSUSED */
static int
-pmdoc_St(MDOC_ARGS)
+parse_mdoc_Xr(struct mpage *mpage, const struct mdoc_node *n)
{
+ char *cp;
- if (NULL == n->child || MDOC_TEXT != n->child->type)
+ if (NULL == (n = n->child))
return(0);
- buf_append(buf, n->child->string);
- return(1);
+ if (NULL == n->next) {
+ putkey(mpage, n->string, TYPE_Xr);
+ return(0);
+ }
+
+ mandoc_asprintf(&cp, "%s(%s)", n->string, n->next->string);
+ putkey(mpage, cp, TYPE_Xr);
+ free(cp);
+ return(0);
}
-/* ARGSUSED */
static int
-pmdoc_Xr(MDOC_ARGS)
+parse_mdoc_Nd(struct mpage *mpage, const struct mdoc_node *n)
{
- if (NULL == (n = n->child))
- return(0);
-
- buf_appendb(buf, n->string, strlen(n->string));
-
- if (NULL != (n = n->next)) {
- buf_appendb(buf, ".", 1);
- buf_appendb(buf, n->string, strlen(n->string) + 1);
- } else
- buf_appendb(buf, ".", 2);
-
- return(1);
+ if (MDOC_BODY == n->type)
+ mdoc_deroff(&mpage->desc, n);
+ return(0);
}
-/* ARGSUSED */
static int
-pmdoc_Nd(MDOC_ARGS)
+parse_mdoc_Nm(struct mpage *mpage, const struct mdoc_node *n)
{
- if (MDOC_BODY != n->type)
- return(0);
-
- buf_appendmdoc(dbuf, n->child, 1);
- return(1);
+ if (SEC_NAME == n->sec)
+ putmdockey(mpage, n->child, NAME_TITLE);
+ else if (SEC_SYNOPSIS == n->sec && MDOC_HEAD == n->type)
+ putmdockey(mpage, n->child, NAME_SYN);
+ return(0);
}
-/* ARGSUSED */
static int
-pmdoc_Nm(MDOC_ARGS)
+parse_mdoc_Sh(struct mpage *mpage, const struct mdoc_node *n)
{
- if (SEC_NAME == n->sec)
- return(1);
- else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
- return(0);
+ return(SEC_CUSTOM == n->sec && MDOC_HEAD == n->type);
+}
- if (NULL == n->child)
- buf_append(buf, m->name);
+static int
+parse_mdoc_head(struct mpage *mpage, const struct mdoc_node *n)
+{
- return(1);
+ return(MDOC_HEAD == n->type);
}
-/* ARGSUSED */
static int
-pmdoc_Sh(MDOC_ARGS)
+parse_mdoc_body(struct mpage *mpage, const struct mdoc_node *n)
{
- return(SEC_CUSTOM == n->sec && MDOC_HEAD == n->type);
+ return(MDOC_BODY == n->type);
}
+/*
+ * Add a string to the hash table for the current manual.
+ * Each string has a bitmask telling which macros it belongs to.
+ * When we finish the manual, we'll dump the table.
+ */
static void
-hash_put(DB *db, const struct buf *buf, uint64_t mask)
+putkeys(const struct mpage *mpage,
+ const char *cp, size_t sz, uint64_t v)
{
- uint64_t oldmask;
- DBT key, val;
- int rc;
+ struct ohash *htab;
+ struct str *s;
+ const char *end;
+ unsigned int slot;
+ int i;
- if (buf->len < 2)
+ if (0 == sz)
return;
- key.data = buf->cp;
- key.size = buf->len;
-
- if ((rc = (*db->get)(db, &key, &val, 0)) < 0) {
- perror("hash");
- exit((int)MANDOCLEVEL_SYSERR);
- } else if (0 == rc) {
- assert(sizeof(uint64_t) == val.size);
- memcpy(&oldmask, val.data, val.size);
- mask |= oldmask;
+ if (TYPE_Nm & v) {
+ htab = &names;
+ v &= name_mask;
+ name_mask &= ~NAME_FIRST;
+ if (debug > 1)
+ say(mpage->mlinks->file,
+ "Adding name %*s", sz, cp);
+ } else {
+ htab = &strings;
+ if (debug > 1)
+ for (i = 0; i < mansearch_keymax; i++)
+ if (1 << i & v)
+ say(mpage->mlinks->file,
+ "Adding key %s=%*s",
+ mansearch_keynames[i], sz, cp);
}
- val.data = &mask;
- val.size = sizeof(uint64_t);
+ end = cp + sz;
+ slot = ohash_qlookupi(htab, cp, &end);
+ s = ohash_find(htab, slot);
- if ((rc = (*db->put)(db, &key, &val, 0)) < 0) {
- perror("hash");
- exit((int)MANDOCLEVEL_SYSERR);
- }
+ if (NULL != s && mpage == s->mpage) {
+ s->mask |= v;
+ return;
+ } else if (NULL == s) {
+ s = mandoc_calloc(1, sizeof(struct str) + sz + 1);
+ memcpy(s->key, cp, sz);
+ ohash_insert(htab, slot, s);
+ }
+ s->mpage = mpage;
+ s->mask = v;
}
-static void
-dbt_put(DB *db, const char *dbn, DBT *key, DBT *val)
+/*
+ * Take a Unicode codepoint and produce its UTF-8 encoding.
+ * This isn't the best way to do this, but it works.
+ * The magic numbers are from the UTF-8 packaging.
+ * They're not as scary as they seem: read the UTF-8 spec for details.
+ */
+static size_t
+utf8(unsigned int cp, char out[7])
{
+ size_t rc;
+
+ rc = 0;
+ if (cp <= 0x0000007F) {
+ rc = 1;
+ out[0] = (char)cp;
+ } else if (cp <= 0x000007FF) {
+ rc = 2;
+ out[0] = (cp >> 6 & 31) | 192;
+ out[1] = (cp & 63) | 128;
+ } else if (cp <= 0x0000FFFF) {
+ rc = 3;
+ out[0] = (cp >> 12 & 15) | 224;
+ out[1] = (cp >> 6 & 63) | 128;
+ out[2] = (cp & 63) | 128;
+ } else if (cp <= 0x001FFFFF) {
+ rc = 4;
+ out[0] = (cp >> 18 & 7) | 240;
+ out[1] = (cp >> 12 & 63) | 128;
+ out[2] = (cp >> 6 & 63) | 128;
+ out[3] = (cp & 63) | 128;
+ } else if (cp <= 0x03FFFFFF) {
+ rc = 5;
+ out[0] = (cp >> 24 & 3) | 248;
+ out[1] = (cp >> 18 & 63) | 128;
+ out[2] = (cp >> 12 & 63) | 128;
+ out[3] = (cp >> 6 & 63) | 128;
+ out[4] = (cp & 63) | 128;
+ } else if (cp <= 0x7FFFFFFF) {
+ rc = 6;
+ out[0] = (cp >> 30 & 1) | 252;
+ out[1] = (cp >> 24 & 63) | 128;
+ out[2] = (cp >> 18 & 63) | 128;
+ out[3] = (cp >> 12 & 63) | 128;
+ out[4] = (cp >> 6 & 63) | 128;
+ out[5] = (cp & 63) | 128;
+ } else
+ return(0);
- assert(key->size);
- assert(val->size);
-
- if (0 == (*db->put)(db, key, val, 0))
- return;
-
- perror(dbn);
- exit((int)MANDOCLEVEL_SYSERR);
- /* NOTREACHED */
+ out[rc] = '\0';
+ return(rc);
}
/*
- * Call out to per-macro handlers after clearing the persistent database
- * key. If the macro sets the database key, flush it to the database.
+ * Store the rendered version of a key, or alias the pointer
+ * if the key contains no escape sequences.
*/
static void
-pmdoc_node(MDOC_ARGS)
+render_key(struct mchars *mc, struct str *key)
{
+ size_t sz, bsz, pos;
+ char utfbuf[7], res[6];
+ char *buf;
+ const char *seq, *cpp, *val;
+ int len, u;
+ enum mandoc_esc esc;
- if (NULL == n)
- return;
+ assert(NULL == key->rendered);
- switch (n->type) {
- case (MDOC_HEAD):
- /* FALLTHROUGH */
- case (MDOC_BODY):
- /* FALLTHROUGH */
- case (MDOC_TAIL):
- /* FALLTHROUGH */
- case (MDOC_BLOCK):
- /* FALLTHROUGH */
- case (MDOC_ELEM):
- buf->len = 0;
+ res[0] = '\\';
+ res[1] = '\t';
+ res[2] = ASCII_NBRSP;
+ res[3] = ASCII_HYPH;
+ res[4] = ASCII_BREAK;
+ res[5] = '\0';
- /*
- * Both NULL handlers and handlers returning true
- * request using the data. Only skip the element
- * when the handler returns false.
- */
+ val = key->key;
+ bsz = strlen(val);
- if (NULL != mdocs[n->tok].fp &&
- 0 == (*mdocs[n->tok].fp)(hash, buf, dbuf, n, m))
- break;
+ /*
+ * Pre-check: if we have no stop-characters, then set the
+ * pointer as ourselvse and get out of here.
+ */
+ if (strcspn(val, res) == bsz) {
+ key->rendered = key->key;
+ return;
+ }
- /*
- * For many macros, use the text from all children.
- * Set zero flags for macros not needing this.
- * In that case, the handler must fill the buffer.
- */
+ /* Pre-allocate by the length of the input */
- if (MDOCF_CHILD & mdocs[n->tok].flags)
- buf_appendmdoc(buf, n->child, 0);
+ buf = mandoc_malloc(++bsz);
+ pos = 0;
+ while ('\0' != *val) {
/*
- * Cover the most common case:
- * Automatically stage one string per element.
- * Set a zero mask for macros not needing this.
- * Additional staging can be done in the handler.
+ * Halt on the first escape sequence.
+ * This also halts on the end of string, in which case
+ * we just copy, fallthrough, and exit the loop.
*/
+ if ((sz = strcspn(val, res)) > 0) {
+ memcpy(&buf[pos], val, sz);
+ pos += sz;
+ val += sz;
+ }
- if (mdocs[n->tok].mask)
- hash_put(hash, buf, mdocs[n->tok].mask);
- break;
- default:
- break;
- }
-
- pmdoc_node(hash, buf, dbuf, n->child, m);
- pmdoc_node(hash, buf, dbuf, n->next, m);
-}
-
-static int
-pman_node(MAN_ARGS)
-{
- const struct man_node *head, *body;
- char *start, *sv, *title;
- size_t sz, titlesz;
-
- if (NULL == n)
- return(0);
-
- /*
- * We're only searching for one thing: the first text child in
- * the BODY of a NAME section. Since we don't keep track of
- * sections in -man, run some hoops to find out whether we're in
- * the correct section or not.
- */
-
- if (MAN_BODY == n->type && MAN_SH == n->tok) {
- body = n;
- assert(body->parent);
- if (NULL != (head = body->parent->head) &&
- 1 == head->nchild &&
- NULL != (head = (head->child)) &&
- MAN_TEXT == head->type &&
- 0 == strcmp(head->string, "NAME") &&
- NULL != (body = body->child) &&
- MAN_TEXT == body->type) {
-
- title = NULL;
- titlesz = 0;
- /*
- * Suck the entire NAME section into memory.
- * Yes, we might run away.
- * But too many manuals have big, spread-out
- * NAME sections over many lines.
- */
- for ( ; NULL != body; body = body->next) {
- if (MAN_TEXT != body->type)
- break;
- if (0 == (sz = strlen(body->string)))
- continue;
- title = mandoc_realloc
- (title, titlesz + sz + 1);
- memcpy(title + titlesz, body->string, sz);
- titlesz += sz + 1;
- title[(int)titlesz - 1] = ' ';
- }
- if (NULL == title)
- return(0);
-
- title = mandoc_realloc(title, titlesz + 1);
- title[(int)titlesz] = '\0';
+ switch (*val) {
+ case ASCII_HYPH:
+ buf[pos++] = '-';
+ val++;
+ continue;
+ case '\t':
+ /* FALLTHROUGH */
+ case ASCII_NBRSP:
+ buf[pos++] = ' ';
+ val++;
+ /* FALLTHROUGH */
+ case ASCII_BREAK:
+ continue;
+ default:
+ break;
+ }
+ if ('\\' != *val)
+ break;
- /* Skip leading space. */
+ /* Read past the slash. */
- sv = title;
- while (isspace((unsigned char)*sv))
- sv++;
+ val++;
- if (0 == (sz = strlen(sv))) {
- free(title);
- return(0);
- }
+ /*
+ * Parse the escape sequence and see if it's a
+ * predefined character or special character.
+ */
- /* Erase trailing space. */
+ esc = mandoc_escape((const char **)&val,
+ &seq, &len);
+ if (ESCAPE_ERROR == esc)
+ break;
+ if (ESCAPE_SPECIAL != esc)
+ continue;
- start = &sv[sz - 1];
- while (start > sv && isspace((unsigned char)*start))
- *start-- = '\0';
+ /*
+ * Render the special character
+ * as either UTF-8 or ASCII.
+ */
- if (start == sv) {
- free(title);
- return(0);
+ if (write_utf8) {
+ if (0 == (u = mchars_spec2cp(mc, seq, len)))
+ continue;
+ cpp = utfbuf;
+ if (0 == (sz = utf8(u, utfbuf)))
+ continue;
+ sz = strlen(cpp);
+ } else {
+ cpp = mchars_spec2str(mc, seq, len, &sz);
+ if (NULL == cpp)
+ continue;
+ if (ASCII_NBRSP == *cpp) {
+ cpp = " ";
+ sz = 1;
}
+ }
- start = sv;
-
- /*
- * Go through a special heuristic dance here.
- * This is why -man manuals are great!
- * (I'm being sarcastic: my eyes are bleeding.)
- * Conventionally, one or more manual names are
- * comma-specified prior to a whitespace, then a
- * dash, then a description. Try to puzzle out
- * the name parts here.
- */
-
- for ( ;; ) {
- sz = strcspn(start, " ,");
- if ('\0' == start[(int)sz])
- break;
+ /* Copy the rendered glyph into the stream. */
- buf->len = 0;
- buf_appendb(buf, start, sz);
- buf_appendb(buf, "", 1);
+ bsz += sz;
+ buf = mandoc_realloc(buf, bsz);
+ memcpy(&buf[pos], cpp, sz);
+ pos += sz;
+ }
- hash_put(hash, buf, TYPE_Nm);
+ buf[pos] = '\0';
+ key->rendered = buf;
+}
- if (' ' == start[(int)sz]) {
- start += (int)sz + 1;
- break;
- }
+static void
+dbadd_mlink(const struct mlink *mlink)
+{
+ size_t i;
+
+ i = 1;
+ SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->dsec);
+ SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->arch);
+ SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->name);
+ SQL_BIND_INT64(stmts[STMT_INSERT_LINK], i, mlink->mpage->pageid);
+ SQL_STEP(stmts[STMT_INSERT_LINK]);
+ sqlite3_reset(stmts[STMT_INSERT_LINK]);
+}
- assert(',' == start[(int)sz]);
- start += (int)sz + 1;
- while (' ' == *start)
- start++;
+/*
+ * Flush the current page's terms (and their bits) into the database.
+ * Wrap the entire set of additions in a transaction to make sqlite be a
+ * little faster.
+ * Also, handle escape sequences at the last possible moment.
+ */
+static void
+dbadd(struct mpage *mpage, struct mchars *mc)
+{
+ struct mlink *mlink;
+ struct str *key;
+ size_t i;
+ unsigned int slot;
+
+ mlink = mpage->mlinks;
+
+ if (nodb) {
+ for (key = ohash_first(&names, &slot); NULL != key;
+ key = ohash_next(&names, &slot)) {
+ if (key->rendered != key->key)
+ free(key->rendered);
+ free(key);
+ }
+ for (key = ohash_first(&strings, &slot); NULL != key;
+ key = ohash_next(&strings, &slot)) {
+ if (key->rendered != key->key)
+ free(key->rendered);
+ free(key);
+ }
+ if (0 == debug)
+ return;
+ while (NULL != mlink) {
+ fputs(mlink->name, stdout);
+ if (NULL == mlink->next ||
+ strcmp(mlink->dsec, mlink->next->dsec) ||
+ strcmp(mlink->fsec, mlink->next->fsec) ||
+ strcmp(mlink->arch, mlink->next->arch)) {
+ putchar('(');
+ if ('\0' == *mlink->dsec)
+ fputs(mlink->fsec, stdout);
+ else
+ fputs(mlink->dsec, stdout);
+ if ('\0' != *mlink->arch)
+ printf("/%s", mlink->arch);
+ putchar(')');
}
+ mlink = mlink->next;
+ if (NULL != mlink)
+ fputs(", ", stdout);
+ }
+ printf(" - %s\n", mpage->desc);
+ return;
+ }
- buf->len = 0;
+ if (debug)
+ say(mlink->file, "Adding to database");
- if (sv == start) {
- buf_append(buf, start);
- free(title);
- return(1);
- }
+ i = strlen(mpage->desc) + 1;
+ key = mandoc_calloc(1, sizeof(struct str) + i);
+ memcpy(key->key, mpage->desc, i);
+ render_key(mc, key);
- while (isspace((unsigned char)*start))
- start++;
-
- if (0 == strncmp(start, "-", 1))
- start += 1;
- else if (0 == strncmp(start, "\\-\\-", 4))
- start += 4;
- else if (0 == strncmp(start, "\\-", 2))
- start += 2;
- else if (0 == strncmp(start, "\\(en", 4))
- start += 4;
- else if (0 == strncmp(start, "\\(em", 4))
- start += 4;
+ i = 1;
+ SQL_BIND_TEXT(stmts[STMT_INSERT_PAGE], i, key->rendered);
+ SQL_BIND_INT(stmts[STMT_INSERT_PAGE], i, FORM_SRC == mpage->form);
+ SQL_STEP(stmts[STMT_INSERT_PAGE]);
+ mpage->pageid = sqlite3_last_insert_rowid(db);
+ sqlite3_reset(stmts[STMT_INSERT_PAGE]);
- while (' ' == *start)
- start++;
+ if (key->rendered != key->key)
+ free(key->rendered);
+ free(key);
- sz = strlen(start) + 1;
- buf_appendb(dbuf, start, sz);
- buf_appendb(buf, start, sz);
+ while (NULL != mlink) {
+ dbadd_mlink(mlink);
+ mlink = mlink->next;
+ }
+ mlink = mpage->mlinks;
+
+ for (key = ohash_first(&names, &slot); NULL != key;
+ key = ohash_next(&names, &slot)) {
+ assert(key->mpage == mpage);
+ if (NULL == key->rendered)
+ render_key(mc, key);
+ i = 1;
+ SQL_BIND_INT64(stmts[STMT_INSERT_NAME], i, key->mask);
+ SQL_BIND_TEXT(stmts[STMT_INSERT_NAME], i, key->rendered);
+ SQL_BIND_INT64(stmts[STMT_INSERT_NAME], i, mpage->pageid);
+ SQL_STEP(stmts[STMT_INSERT_NAME]);
+ sqlite3_reset(stmts[STMT_INSERT_NAME]);
+ if (key->rendered != key->key)
+ free(key->rendered);
+ free(key);
+ }
+ for (key = ohash_first(&strings, &slot); NULL != key;
+ key = ohash_next(&strings, &slot)) {
+ assert(key->mpage == mpage);
+ if (NULL == key->rendered)
+ render_key(mc, key);
+ i = 1;
+ SQL_BIND_INT64(stmts[STMT_INSERT_KEY], i, key->mask);
+ SQL_BIND_TEXT(stmts[STMT_INSERT_KEY], i, key->rendered);
+ SQL_BIND_INT64(stmts[STMT_INSERT_KEY], i, mpage->pageid);
+ SQL_STEP(stmts[STMT_INSERT_KEY]);
+ sqlite3_reset(stmts[STMT_INSERT_KEY]);
+ if (key->rendered != key->key)
+ free(key->rendered);
+ free(key);
+ }
+}
- hash_put(hash, buf, TYPE_Nd);
- free(title);
+static void
+dbprune(void)
+{
+ struct mpage *mpage;
+ struct mlink *mlink;
+ size_t i;
+ unsigned int slot;
+
+ if (0 == nodb)
+ SQL_EXEC("BEGIN TRANSACTION");
+
+ for (mpage = ohash_first(&mpages, &slot); NULL != mpage;
+ mpage = ohash_next(&mpages, &slot)) {
+ mlink = mpage->mlinks;
+ if (debug)
+ say(mlink->file, "Deleting from database");
+ if (nodb)
+ continue;
+ for ( ; NULL != mlink; mlink = mlink->next) {
+ i = 1;
+ SQL_BIND_TEXT(stmts[STMT_DELETE_PAGE],
+ i, mlink->dsec);
+ SQL_BIND_TEXT(stmts[STMT_DELETE_PAGE],
+ i, mlink->arch);
+ SQL_BIND_TEXT(stmts[STMT_DELETE_PAGE],
+ i, mlink->name);
+ SQL_STEP(stmts[STMT_DELETE_PAGE]);
+ sqlite3_reset(stmts[STMT_DELETE_PAGE]);
}
}
- for (n = n->child; n; n = n->next)
- if (pman_node(hash, buf, dbuf, n))
- return(1);
-
- return(0);
+ if (0 == nodb)
+ SQL_EXEC("END TRANSACTION");
}
/*
- * Parse a formatted manual page.
- * By necessity, this involves rather crude guesswork.
+ * Close an existing database and its prepared statements.
+ * If "real" is not set, rename the temporary file into the real one.
*/
static void
-pformatted(DB *hash, struct buf *buf,
- struct buf *dbuf, const struct of *of)
+dbclose(int real)
{
- FILE *stream;
- char *line, *p, *title;
- size_t len, plen, titlesz;
+ size_t i;
+ int status;
+ pid_t child;
- if (NULL == (stream = fopen(of->fname, "r"))) {
- if (warnings)
- perror(of->fname);
+ if (nodb)
return;
- }
-
- /*
- * Always use the title derived from the filename up front,
- * do not even try to find it in the file. This also makes
- * sure we don't end up with an orphan index record, even if
- * the file content turns out to be completely unintelligible.
- */
-
- buf->len = 0;
- buf_append(buf, of->title);
- hash_put(hash, buf, TYPE_Nm);
-
- /* Skip to first blank line. */
- while (NULL != (line = fgetln(stream, &len)))
- if ('\n' == *line)
- break;
-
- /*
- * Assume the first line that is not indented
- * is the first section header. Skip to it.
- */
+ for (i = 0; i < STMT__MAX; i++) {
+ sqlite3_finalize(stmts[i]);
+ stmts[i] = NULL;
+ }
- while (NULL != (line = fgetln(stream, &len)))
- if ('\n' != *line && ' ' != *line)
- break;
-
- /*
- * Read up until the next section into a buffer.
- * Strip the leading and trailing newline from each read line,
- * appending a trailing space.
- * Ignore empty (whitespace-only) lines.
- */
+ sqlite3_close(db);
+ db = NULL;
- titlesz = 0;
- title = NULL;
+ if (real)
+ return;
- while (NULL != (line = fgetln(stream, &len))) {
- if (' ' != *line || '\n' != line[(int)len - 1])
- break;
- while (len > 0 && isspace((unsigned char)*line)) {
- line++;
- len--;
+ if ('\0' == *tempfilename) {
+ if (-1 == rename(MANDOC_DB "~", MANDOC_DB)) {
+ exitcode = (int)MANDOCLEVEL_SYSERR;
+ say(MANDOC_DB, "&rename");
}
- if (1 == len)
- continue;
- title = mandoc_realloc(title, titlesz + len);
- memcpy(title + titlesz, line, len);
- titlesz += len;
- title[(int)titlesz - 1] = ' ';
+ return;
}
-
- /*
- * If no page content can be found, or the input line
- * is already the next section header, or there is no
- * trailing newline, reuse the page title as the page
- * description.
- */
-
- if (NULL == title || '\0' == *title) {
- if (warnings)
- fprintf(stderr, "%s: cannot find NAME section\n",
- of->fname);
- buf_appendb(dbuf, buf->cp, buf->size);
- hash_put(hash, buf, TYPE_Nd);
- fclose(stream);
- free(title);
+ switch (child = fork()) {
+ case -1:
+ exitcode = (int)MANDOCLEVEL_SYSERR;
+ say("", "&fork cmp");
return;
+ case 0:
+ execlp("cmp", "cmp", "-s",
+ tempfilename, MANDOC_DB, NULL);
+ say("", "&exec cmp");
+ exit(0);
+ default:
+ break;
}
-
- title = mandoc_realloc(title, titlesz + 1);
- title[(int)titlesz] = '\0';
-
- /*
- * Skip to the first dash.
- * Use the remaining line as the description (no more than 70
- * bytes).
- */
-
- if (NULL != (p = strstr(title, "- "))) {
- for (p += 2; ' ' == *p || '\b' == *p; p++)
- /* Skip to next word. */ ;
- } else {
- if (warnings)
- fprintf(stderr, "%s: no dash in title line\n",
- of->fname);
- p = title;
+ if (-1 == waitpid(child, &status, 0)) {
+ exitcode = (int)MANDOCLEVEL_SYSERR;
+ say("", "&wait cmp");
+ } else if (WIFSIGNALED(status)) {
+ exitcode = (int)MANDOCLEVEL_SYSERR;
+ say("", "cmp died from signal %d", WTERMSIG(status));
+ } else if (WEXITSTATUS(status)) {
+ exitcode = (int)MANDOCLEVEL_SYSERR;
+ say(MANDOC_DB,
+ "Data changed, but cannot replace database");
}
- plen = strlen(p);
-
- /* Strip backspace-encoding from line. */
-
- while (NULL != (line = memchr(p, '\b', plen))) {
- len = line - p;
- if (0 == len) {
- memmove(line, line + 1, plen--);
- continue;
- }
- memmove(line - 1, line + 1, plen - len);
- plen -= 2;
+ *strrchr(tempfilename, '/') = '\0';
+ switch (child = fork()) {
+ case -1:
+ exitcode = (int)MANDOCLEVEL_SYSERR;
+ say("", "&fork rm");
+ return;
+ case 0:
+ execlp("rm", "rm", "-rf", tempfilename, NULL);
+ say("", "&exec rm");
+ exit((int)MANDOCLEVEL_SYSERR);
+ default:
+ break;
+ }
+ if (-1 == waitpid(child, &status, 0)) {
+ exitcode = (int)MANDOCLEVEL_SYSERR;
+ say("", "&wait rm");
+ } else if (WIFSIGNALED(status) || WEXITSTATUS(status)) {
+ exitcode = (int)MANDOCLEVEL_SYSERR;
+ say("", "%s: Cannot remove temporary directory",
+ tempfilename);
}
-
- buf_appendb(dbuf, p, plen + 1);
- buf->len = 0;
- buf_appendb(buf, p, plen + 1);
- hash_put(hash, buf, TYPE_Nd);
- fclose(stream);
- free(title);
}
-static void
-ofile_argbuild(int argc, char *argv[], struct of **of,
- const char *basedir)
+/*
+ * This is straightforward stuff.
+ * Open a database connection to a "temporary" database, then open a set
+ * of prepared statements we'll use over and over again.
+ * If "real" is set, we use the existing database; if not, we truncate a
+ * temporary one.
+ * Must be matched by dbclose().
+ */
+static int
+dbopen(int real)
{
- char buf[PATH_MAX];
- char pbuf[PATH_MAX];
- const char *sec, *arch, *title;
- char *relpath, *p;
- int i, src_form;
- struct of *nof;
-
- for (i = 0; i < argc; i++) {
- if (NULL == (relpath = realpath(argv[i], pbuf))) {
- perror(argv[i]);
- continue;
- }
- if (NULL != basedir) {
- if (strstr(pbuf, basedir) != pbuf) {
- fprintf(stderr, "%s: file outside "
- "base directory %s\n",
- pbuf, basedir);
- continue;
- }
- relpath = pbuf + strlen(basedir);
- }
-
- /*
- * Try to infer the manual section, architecture and
- * page title from the path, assuming it looks like
- * man*[/<arch>]/<title>.<section> or
- * cat<section>[/<arch>]/<title>.0
- */
+ const char *sql;
+ int rc, ofl;
- if (strlcpy(buf, relpath, sizeof(buf)) >= sizeof(buf)) {
- fprintf(stderr, "%s: path too long\n", relpath);
- continue;
- }
- sec = arch = title = "";
- src_form = 0;
- p = strrchr(buf, '\0');
- while (p-- > buf) {
- if ('\0' == *sec && '.' == *p) {
- sec = p + 1;
- *p = '\0';
- if ('0' == *sec)
- src_form |= MANDOC_FORM;
- else if ('1' <= *sec && '9' >= *sec)
- src_form |= MANDOC_SRC;
- continue;
- }
- if ('/' != *p)
- continue;
- if ('\0' == *title) {
- title = p + 1;
- *p = '\0';
- continue;
- }
- if (0 == strncmp("man", p + 1, 3))
- src_form |= MANDOC_SRC;
- else if (0 == strncmp("cat", p + 1, 3))
- src_form |= MANDOC_FORM;
- else
- arch = p + 1;
- break;
- }
- if ('\0' == *title) {
- if (warnings)
- fprintf(stderr,
- "%s: cannot deduce title "
- "from filename\n",
- relpath);
- title = buf;
- }
+ if (nodb)
+ return(1);
- /*
- * Build the file structure.
- */
+ *tempfilename = '\0';
+ ofl = SQLITE_OPEN_READWRITE;
- nof = mandoc_calloc(1, sizeof(struct of));
- nof->fname = mandoc_strdup(relpath);
- nof->sec = mandoc_strdup(sec);
- nof->arch = mandoc_strdup(arch);
- nof->title = mandoc_strdup(title);
- nof->src_form = src_form;
+ if (real) {
+ rc = sqlite3_open_v2(MANDOC_DB, &db, ofl, NULL);
+ if (SQLITE_OK != rc) {
+ exitcode = (int)MANDOCLEVEL_SYSERR;
+ if (SQLITE_CANTOPEN != rc)
+ say(MANDOC_DB, "%s", sqlite3_errstr(rc));
+ return(0);
+ }
+ goto prepare_statements;
+ }
- /*
- * Add the structure to the list.
- */
+ ofl |= SQLITE_OPEN_CREATE | SQLITE_OPEN_EXCLUSIVE;
- if (NULL == *of) {
- *of = nof;
- (*of)->first = nof;
- } else {
- nof->first = (*of)->first;
- (*of)->next = nof;
- *of = nof;
- }
+ remove(MANDOC_DB "~");
+ rc = sqlite3_open_v2(MANDOC_DB "~", &db, ofl, NULL);
+ if (SQLITE_OK == rc)
+ goto create_tables;
+ if (MPARSE_QUICK & mparse_options) {
+ exitcode = (int)MANDOCLEVEL_SYSERR;
+ say(MANDOC_DB "~", "%s", sqlite3_errstr(rc));
+ return(0);
}
-}
-/*
- * Recursively build up a list of files to parse.
- * We use this instead of ftw() and so on because I don't want global
- * variables hanging around.
- * This ignores the mandoc.db and mandoc.index files, but assumes that
- * everything else is a manual.
- * Pass in a pointer to a NULL structure for the first invocation.
- */
-static void
-ofile_dirbuild(const char *dir, const char* psec, const char *parch,
- int p_src_form, struct of **of)
-{
- char buf[PATH_MAX];
-#if defined(__sun)
- struct stat sb;
-#endif
- size_t sz;
- DIR *d;
- const char *fn, *sec, *arch;
- char *p, *q, *suffix;
- struct of *nof;
- struct dirent *dp;
- int src_form;
-
- if (NULL == (d = opendir(dir))) {
- if (warnings)
- perror(dir);
- return;
+ (void)strlcpy(tempfilename, "/tmp/mandocdb.XXXXXX",
+ sizeof(tempfilename));
+ if (NULL == mkdtemp(tempfilename)) {
+ exitcode = (int)MANDOCLEVEL_SYSERR;
+ say("", "&%s", tempfilename);
+ return(0);
+ }
+ (void)strlcat(tempfilename, "/" MANDOC_DB,
+ sizeof(tempfilename));
+ rc = sqlite3_open_v2(tempfilename, &db, ofl, NULL);
+ if (SQLITE_OK != rc) {
+ exitcode = (int)MANDOCLEVEL_SYSERR;
+ say("", "%s: %s", tempfilename, sqlite3_errstr(rc));
+ return(0);
}
- while (NULL != (dp = readdir(d))) {
- fn = dp->d_name;
+create_tables:
+ sql = "CREATE TABLE \"mpages\" (\n"
+ " \"desc\" TEXT NOT NULL,\n"
+ " \"form\" INTEGER NOT NULL,\n"
+ " \"pageid\" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL\n"
+ ");\n"
+ "\n"
+ "CREATE TABLE \"mlinks\" (\n"
+ " \"sec\" TEXT NOT NULL,\n"
+ " \"arch\" TEXT NOT NULL,\n"
+ " \"name\" TEXT NOT NULL,\n"
+ " \"pageid\" INTEGER NOT NULL REFERENCES mpages(pageid) "
+ "ON DELETE CASCADE\n"
+ ");\n"
+ "CREATE INDEX mlinks_pageid_idx ON mlinks (pageid);\n"
+ "\n"
+ "CREATE TABLE \"names\" (\n"
+ " \"bits\" INTEGER NOT NULL,\n"
+ " \"name\" TEXT NOT NULL,\n"
+ " \"pageid\" INTEGER NOT NULL REFERENCES mpages(pageid) "
+ "ON DELETE CASCADE\n"
+ ");\n"
+ "\n"
+ "CREATE TABLE \"keys\" (\n"
+ " \"bits\" INTEGER NOT NULL,\n"
+ " \"key\" TEXT NOT NULL,\n"
+ " \"pageid\" INTEGER NOT NULL REFERENCES mpages(pageid) "
+ "ON DELETE CASCADE\n"
+ ");\n"
+ "CREATE INDEX keys_pageid_idx ON keys (pageid);\n";
+
+ if (SQLITE_OK != sqlite3_exec(db, sql, NULL, NULL, NULL)) {
+ exitcode = (int)MANDOCLEVEL_SYSERR;
+ say(MANDOC_DB, "%s", sqlite3_errmsg(db));
+ sqlite3_close(db);
+ return(0);
+ }
- if ('.' == *fn)
- continue;
+prepare_statements:
+ if (SQLITE_OK != sqlite3_exec(db,
+ "PRAGMA foreign_keys = ON", NULL, NULL, NULL)) {
+ exitcode = (int)MANDOCLEVEL_SYSERR;
+ say(MANDOC_DB, "PRAGMA foreign_keys: %s",
+ sqlite3_errmsg(db));
+ sqlite3_close(db);
+ return(0);
+ }
- src_form = p_src_form;
+ sql = "DELETE FROM mpages WHERE pageid IN "
+ "(SELECT pageid FROM mlinks WHERE "
+ "sec=? AND arch=? AND name=?)";
+ sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_DELETE_PAGE], NULL);
+ sql = "INSERT INTO mpages "
+ "(desc,form) VALUES (?,?)";
+ sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_PAGE], NULL);
+ sql = "INSERT INTO mlinks "
+ "(sec,arch,name,pageid) VALUES (?,?,?,?)";
+ sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_LINK], NULL);
+ sql = "INSERT INTO names "
+ "(bits,name,pageid) VALUES (?,?,?)";
+ sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_NAME], NULL);
+ sql = "INSERT INTO keys "
+ "(bits,key,pageid) VALUES (?,?,?)";
+ sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_KEY], NULL);
+
+#ifndef __APPLE__
+ /*
+ * When opening a new database, we can turn off
+ * synchronous mode for much better performance.
+ */
-#if defined(__sun)
- stat(dp->d_name, &sb);
- if (S_IFDIR & sb.st_mode) {
-#else
- if (DT_DIR == dp->d_type) {
+ if (real && SQLITE_OK != sqlite3_exec(db,
+ "PRAGMA synchronous = OFF", NULL, NULL, NULL)) {
+ exitcode = (int)MANDOCLEVEL_SYSERR;
+ say(MANDOC_DB, "PRAGMA synchronous: %s",
+ sqlite3_errmsg(db));
+ sqlite3_close(db);
+ return(0);
+ }
#endif
- sec = psec;
- arch = parch;
- /*
- * By default, only use directories called:
- * man<section>/[<arch>/] or
- * cat<section>/[<arch>/]
- */
+ return(1);
+}
- if ('\0' == *sec) {
- if(0 == strncmp("man", fn, 3)) {
- src_form |= MANDOC_SRC;
- sec = fn + 3;
- } else if (0 == strncmp("cat", fn, 3)) {
- src_form |= MANDOC_FORM;
- sec = fn + 3;
- } else {
- if (warnings) fprintf(stderr,
- "%s/%s: bad section\n",
- dir, fn);
- if (use_all)
- sec = fn;
- else
- continue;
- }
- } else if ('\0' == *arch) {
- if (NULL != strchr(fn, '.')) {
- if (warnings) fprintf(stderr,
- "%s/%s: bad architecture\n",
- dir, fn);
- if (0 == use_all)
- continue;
- }
- arch = fn;
- } else {
- if (warnings) fprintf(stderr, "%s/%s: "
- "excessive subdirectory\n", dir, fn);
- if (0 == use_all)
- continue;
- }
+static void *
+hash_calloc(size_t n, size_t sz, void *arg)
+{
- buf[0] = '\0';
- strlcat(buf, dir, PATH_MAX);
- strlcat(buf, "/", PATH_MAX);
- sz = strlcat(buf, fn, PATH_MAX);
+ return(mandoc_calloc(n, sz));
+}
- if (PATH_MAX <= sz) {
- if (warnings) fprintf(stderr, "%s/%s: "
- "path too long\n", dir, fn);
- continue;
- }
+static void *
+hash_alloc(size_t sz, void *arg)
+{
- ofile_dirbuild(buf, sec, arch, src_form, of);
- continue;
- }
+ return(mandoc_malloc(sz));
+}
-#if defined(__sun)
- if (0 == S_IFREG & sb.st_mode) {
-#else
- if (DT_REG != dp->d_type) {
-#endif
- if (warnings)
- fprintf(stderr,
- "%s/%s: not a regular file\n",
- dir, fn);
- continue;
- }
- if (!strcmp(MANDOC_DB, fn) || !strcmp(MANDOC_IDX, fn))
- continue;
- if ('\0' == *psec) {
- if (warnings)
- fprintf(stderr,
- "%s/%s: file outside section\n",
- dir, fn);
- if (0 == use_all)
- continue;
- }
+static void
+hash_free(void *p, void *arg)
+{
- /*
- * By default, skip files where the file name suffix
- * does not agree with the section directory
- * they are located in.
- */
+ free(p);
+}
- suffix = strrchr(fn, '.');
- if (NULL == suffix) {
- if (warnings)
- fprintf(stderr,
- "%s/%s: no filename suffix\n",
- dir, fn);
- if (0 == use_all)
- continue;
- } else if ((MANDOC_SRC & src_form &&
- strcmp(suffix + 1, psec)) ||
- (MANDOC_FORM & src_form &&
- strcmp(suffix + 1, "0"))) {
- if (warnings)
- fprintf(stderr,
- "%s/%s: wrong filename suffix\n",
- dir, fn);
- if (0 == use_all