#!/bin/sh -e
# makewhatis: create the whatis database
# Created: Sun Jun 14 10:49:37 1992
# Revised: Sat Jan  8 14:12:37 1994 by faith@cs.unc.edu
# Revised: Sat Mar 23 17:56:18 1996 by micheal@actrix.gen.nz
# ALT Linux adaptations by Dmitry V. Levin <ldv@altlinux.org>
# Copyright 1992, 1993, 1994 Rickard E. Faith (faith@cs.unc.edu)
# May be freely distributed and modified as long as copyright is retained.
#
# Wed Dec 23 13:27:50 1992: Rik Faith (faith@cs.unc.edu) applied changes
# based on Mitchum DSouza (mitchum.dsouza@mrc-apu.cam.ac.uk) cat patches.
# Also, cleaned up code and make it work with NET-2 doc pages.
#
# makewhatis-1.4: aeb 940802, 941007, 950417
# Fixed so that the -c option works correctly for the cat pages
# on my machine. Fix for -u by Nan Zou (nan@ksu.ksu.edu).
# Many minor changes.
# The -s option is undocumented, and may well disappear again.
#
# Sat Mar 23 1996: Michael Hamilton (michael@actrix.gen.nz).
# I changed the script to invoke gawk only once for each directory tree.
# This speeds things up considerably (from 30 minutes down to 1.5 minutes
# on my 486DX66).
# 960401 - aeb: slight adaptation to work correctly with cat pages.
# 960510 - added fixes by brennan@raven.ca.boeing.com, author of mawk.
# 971012 - replaced "test -z" - it doesnt work on SunOS 4.1.3_U1.
# 980710 - be more careful with TMPFILE
# 000323 - do not change PATH, better treatment of catpages - Bryan Henderson
# 011117 - avoid suspicious filenames
# 030310 - find files only; fix LAPACK cruft; no /usr/man default;
#	use /dev/stderr instead of /dev/tty; handle files with strange names;
#	add support for chinese, hungarian, indonesian, japanese, korean,
#	polish, russian (Thierry Vignaud);
#
# Note for Slackware users: "makewhatis -v -w -c" will work.
#
# makewhatis aeb 030801 (from man-1.5m)

PROG="${0##*/}"
PATH=/bin:/usr/bin
export PATH
umask 022

exit_handler()
{
	local rc=$?
	trap '' EXIT
	[ -z "$TMPFILE" ] || rm -f "$TMPFILE"
	exit $rc
}

: ${TMPDIR:=$HOME/tmp}
[ -d "$TMPDIR" ] || TMPDIR=/tmp
TMPFILE=`/bin/mktemp -t "$PROG.XXXXXXXXXX"`

trap exit_handler SIGHUP SIGPIPE SIGINT SIGQUIT SIGTERM EXIT

[ ! -s /etc/profile.d/lang.sh ] || . /etc/profile.d/lang.sh
DEFMANPATH=`man --path`

SECTIONS="1 8 2 3 4 5 6 7 9 tcl n l p o"

USAGE()
{
	cat >&2 <<EOF
Build the whatis database for the man pages.

Usage: $PROG [options] [manpath]

Valid options are:
-u: update database with new pages
-v: verbose
[manpath]: man directories (default: $DEFMANPATH)
EOF
	[ -n "$1" ] && exit "$1" || exit
}

TEMP=`getopt -n $PROG -o huv -- "$@"` || USAGE
eval set -- "$TEMP"

deffindarg='-size +0'
findarg=
update=
pages=
export verbose=
while :; do
	case "$1" in
		-u) findarg='-ctime 0'
		    update=1; shift
			;;
		-v) verbose=1; shift
			;;
		-h) USAGE 0
			;;
		--) shift; break
			;;
		*) echo "$PROG: unrecognized option: $1" >&2; exit 1
			;;
esac
done

if [ -n "$*" ]; then
	manpath="$*"
else
	manpath=`echo "$DEFMANPATH" |tr : \ `
fi

# first truncate all the whatis files that will be created new,
# then only update - we might visit the same directory twice
if [ -z "$update" ]; then
	for mandir in $manpath; do
		[ -d "$mandir" ] || continue
		[ "$mandir" != "/usr/man" ] || continue
		if [ -z "${mandir##/usr/share/man*}" ]; then
			mandir="${mandir/\/usr\/share\/man//var/cache/man}"
		elif [ "$mandir" == /usr/X11R6/man ]; then
			mandir=/var/cache/man/X11R6
		elif [ "$mandir" == /usr/lib/perl5/man ]; then
			mandir=/var/cache/man/perl
		elif [ "$mandir" == /usr/local/man ]; then
			mandir=/var/cache/man/local
		fi
		[ -d "$mandir" ] || continue
		:>$mandir/whatis
   done
fi

ICONV=/usr/bin/iconv
if [ ! -x "$ICONV" ]; then
	ICONV=
fi

for mandir in $manpath; do
	[ -d "$mandir" ] || continue
	[ "$mandir" != "/usr/man" ] || continue
	if [ -z "${mandir##/usr/share/man*}" ]; then
		destdir="${mandir/\/usr\/share\/man//var/cache/man}"
	elif [ "$mandir" == /usr/X11R6/man ]; then
		destdir=/var/cache/man/X11R6
	elif [ "$mandir" == /usr/lib/perl5/man ]; then
		destdir=/var/cache/man/perl
	elif [ "$mandir" == /usr/local/man ]; then
		destdir=/var/cache/man/local
	else
		destdir="$mandir"
	fi
	[ -d "$destdir" ] || continue

	if [ -n "$verbose" ]; then
		echo "about to enter $mandir" >&2
	fi

	if [ -z "$update" -a -s "$destdir/whatis" ]; then
		if [ -n "$verbose" ]; then
			echo "skipping $mandir - we did it already" >&2
		fi
	else      
		here=$PWD
		cd "$mandir"
		iconv_cmd=cat
		if [ -f "$mandir/.charset" ]; then
			charset=`cat $mandir/.charset`
			if [ -n "$charset" -a -n "$ICONV" ]; then
				iconv_cmd="$ICONV -f $charset -t utf-8"
			fi
			if [ "`echo TEST | $iconv_cmd`" != "TEST" ]; then
				iconv_cmd=cat
			fi
		fi
		for i in $SECTIONS; do
			if [ -d "man$i" ]; then
				cd "man$i"
				export section=$i
				export curdir="$mandir/man$i"
				find -type f -name '*.*' $findarg $deffindarg -print | gawk '
	    function readline() {
		result = (pipe_cmd | getline);
		if (result < 0) {
		  print "Pipe error: " pipe_cmd " " ERRNO > "/dev/stderr";
		}
	      return result;
	    }
	    
	    function closeline() {
		return close(pipe_cmd);
	    }
	    
	    function do_one() {
	      insh = 0; thisjoin = 1; done = 0;
	      entire_line = "";

	      if (verbose) {
		print "adding " filename > "/dev/stderr"
	      }
	      
 	      use_bzcat = (filename ~ /\.bz2$/);
	      use_zcat = match(filename,"\\.Z$") ||
			 match(filename,"\\.z$") || match(filename,"\\.gz$");
	      if(use_zcat) {
		filename_no_gz = substr(filename, 0, RSTART - 1);
	      } else {
		filename_no_gz = filename;
	      }
	      match(filename_no_gz, "/[^/]+$");
	      progname = substr(filename, RSTART + 1, RLENGTH - 1);
	      if (match(progname, "\\." section "[A-Za-z]+")) {
		actual_section = substr(progname, RSTART + 1, RLENGTH - 1);
	      } else {
		actual_section = section;
	      }
	      sub(/\..*/, "", progname);
	      # try to avoid suspicious stuff
	      if (filename ~ /[;&|`$(]/) {
		print "ignored strange file name " filename " in " curdir > "/dev/stderr";
		return;
	      }
	      pattern="[\\&;()<>!|{}$?*`\"'\'' ]"
	      if (use_zcat) {
		pipe_cmd = filename;
		gsub(pattern, "\\\\&", pipe_cmd);
		pipe_cmd = "zcat " pipe_cmd;
	      } else if (use_bzcat) {
		pipe_cmd = filename;
		gsub(pattern, "\\\\&", pipe_cmd);
		pipe_cmd = "bzcat " pipe_cmd;
	      } else {
	        pipe_cmd = filename;
		gsub(pattern, "\\\\&", pipe_cmd);
		pipe_cmd = "cat " pipe_cmd;
	      }
	      pipe_cmd = pipe_cmd " | " iconv_cmd;
	    
	      while (!done && readline() > 0) {
		gsub(/.\b/, "");
		if (($1 ~ /^\.[Ss][Hh]/ &&
		  ($2 ~ /[Nn][Aa][Mm][Ee]/ ||
		   $2 ~ /(И|и)(М|м)(Я|я)/ || $2 ~ /(Н|н)(А|а)(З|з)(В|в)(А|а)(Н|н)(И|и)(Е|е)/ ||
		   $2 ~ /^JMNO/ || $2 ~ /^NAVN/ ||
		   $2 ~ /^BEZEICHNUNG/ || $2 ~ /^NOMBRE/ ||
		   $2 ~ /^NIMI/ || $2 ~ /^NOM/ || $2 ~ /^IME/ ||
		   $2 ~ /^N[E]V/ || $2 ~ /^NAMA/ ||
		   $2 ~ /^NOME/ || $2 ~ /^NAAM/)) ||
		  (pages == "cat" && $1 ~ /^NAME/)) {
		    if (!insh) {
		      insh = 1;
		    } else {
		      done = 1;
		    }
		} else if (insh) {
		  if ($1 ~ /^\.[Ss][HhYS]/ ||
		    (pages == "cat" &&
		    ($1 ~ /^S[yYeE]/ || $1 ~ /^DESCRIPTION/ ||
		     $1 ~ /^COMMAND/ || $1 ~ /^OVERVIEW/ ||
		     $1 ~ /^STRUCTURES/ || $1 ~ /^INTRODUCTION/ ||
		     $0 ~ /^[^ ]/))) {
		      # end insh for Synopsis, Syntax, but also for
		      # DESCRIPTION (e.g., XFree86.1x),
		      # COMMAND (e.g., xspread.1)
		      # OVERVIEW (e.g., TclCommandWriting.3)
		      # STRUCTURES (e.g., XEvent.3x)
		      # INTRODUCTION (e.g., TclX.n)
		      # and anything at all that begins in Column 1, so 
		      # is probably a section header.
		    done = 1;
		  } else {
		    if ($0 ~ progname"-") {  # Fix old cat pages
			sub(progname"-", progname" - ");
		    }
		    if ($0 ~ /[^ \\]-$/) {
		      sub(/-$/, "");	  # Handle Hyphenations
		      nextjoin = 1;
		    } else if ($0 ~ /\\c$/) {
		      sub(/\\c$/, "");	  # Handle Continuations
		      nextjoin = 1;
		    } else
		      nextjoin = 0;

		    sub(/^.[IB] /, "");       # Kill bold and italics
		    sub(/^.BI /, "");         #
		    sub(/^.SM /, "");         # Kill small
		    sub(/^.Nm /, "");         # Kill bold
		    sub(/^.Tn /, "");         # Kill normal
	            sub(/^.Li /, "");         # Kill .Li
	            sub(/^.Dq /, "");         # Kill .Dq
	            sub(/^.Nd */, "- ");      # Convert .Nd to dash
		    sub(/\\\".*/, "");        # Trim pending comments
		    sub(/  *$/, "");          # Trim pending spaces
		    sub(/^\.$/, "");          # Kill blank comments
		    sub(/^'"'"'.*/, "");      # Kill comment/troff lines
		    sub(/^.in .*/, "");       # Kill various macros
		    sub(/^.ti .*/, "");
		    sub(/^.ta .*/, "");
		    sub(/^.Vb .*/, "");
		    sub(/^.[PLTH]P$/, "");    # .PP/.LP/.TP/.HP
		    sub(/^.Pp$/, "");
		    sub(/^.IX .*$/, "");
		    sub(/^.nolinks$/, "");
		    sub(/^.B$/, "");
		    sub(/^.nf$/, "");

		    if (($1 ~ /^\.../ || $1 == "") &&
		        (entire_line ~ / - / || entire_line ~ / \\- /)) {
		      # Assume that this ends the description of one line
		      # Sometimes there are several descriptions in one page,
		      # as in outb(2).
		      handle_entire_line();
		      entire_line = "";
		      thisjoin = 1;
		    } else {
		      if (thisjoin) {
			entire_line = entire_line $0;
		      } else {
			entire_line = entire_line " " $0;
		      }
		      thisjoin = nextjoin;
		    }
		  }
		}
	      }
	      handle_entire_line();
	      closeline();
	    }

	    function handle_entire_line() {
	      x = entire_line;             # Keep it short

	      gsub(/\015/, "", x);         # Kill DOS remains
	      gsub(/	/, " ", x);        # Translate tabs to spaces
	      gsub(/  +/, " ", x);         # Collapse spaces
	      gsub(/ *, */, ", ", x);      # Fix comma spacings
	      sub(/^ /, "", x);            # Kill initial spaces
	      sub(/ $/, "", x);            # Kill trailing spaces
	      sub(/__+/, "_", x);          # Collapse underscores

	      gsub(/\\f\(../, "", x);         # Kill font changes
	      gsub(/\\f[PRIB0123]/, "", x);   # Kill font changes
	      gsub(/\\s[-+0-9]*/, "", x);     # Kill size changes
	      gsub(/\\&/, "", x);             # Kill \&
	      gsub(/\\\|/, "", x);            # Kill \|
	      gsub(/\\\((ru|ul)/, "_", x);    # Translate
	      gsub(/\\\((mi|hy|em)/, "-", x); # Translate
	      gsub(/\\\*\(../, "", x);        # Kill troff strings
	      gsub(/\\/, "", x);              # Kill all backslashes
	      gsub(/"/, "", x);               # Kill quotes (from .Nd "foo bar")
	      sub(/<h1 align=center>/, "", x);# Yuk! HTML cruft
	      gsub(/\000.*/, "X", x);         # Binary cruft in LAPACK pages
	      gsub(/  +/, " ", x);            # Collapse spaces (again)
	      sub(/^ /, "", x);               # Kill initial spaces (again)
	      sub(/ $/, "", x);               # Kill trailing spaces (again)
	      sub(/\.$/, "", x);              # Kill trailing period

	      if (!match(x, / - /))
		return;

	      after_dash = substr(x, RSTART);
	      head = substr(x, 1, RSTART-1) ", ";
	      while (match(head, /, /)) {
		prog = substr(head, 1, RSTART-1);
		head = substr(head, RSTART+2);
		if (prog != progname)
		  prog = prog " [" progname "]";
		printf "%-*s (%s) %s\n", 20, prog, actual_section, after_dash;
	      }
	    }

	    {			# Main action - process each filename read in.
	      filename = $0;
	      do_one();
	    }
	    ' pages=$pages section=$section verbose=$verbose curdir=$curdir iconv_cmd="$iconv_cmd"
	    cd ..
	 fi
		done > "$TMPFILE"

		cd "$here"

		[ ! -s "$destdir/whatis" ] || cat "$destdir/whatis" >> "$TMPFILE"
		sed '/^$/d' < "$TMPFILE" | LC_COLLATE=C sort -u > "$destdir/whatis"
     fi
   done
