#!/usr/bin/guile \
-e "(scripts doc-snarf)" -s
!#
;;; doc-snarf --- Extract documentation from Scheme files

;; 	Copyright (C) 2001,2003,2004,2005 Free Software Foundation, Inc.
;;
;; This program is free software; you can redistribute it and/or
;; modify it under the terms of the GNU General Public License as
;; published by the Free Software Foundation; either version 2, or
;; (at your option) any later version.
;;
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;; General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with this software; see the file COPYING.  If not, write to
;; the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
;; Boston, MA 02110-1301, USA.
;;
;; As a special exception, the Free Software Foundation gives permission
;; for additional uses of the text contained in its release of GUILE.
;;
;; The exception is that, if you link the GUILE library with other files
;; to produce an executable, this does not by itself cause the
;; resulting executable to be covered by the GNU General Public License.
;; Your use of that executable is in no way restricted on account of
;; linking the GUILE library code into it.
;;
;; This exception does not however invalidate any other reasons why
;; the executable file might be covered by the GNU General Public License.
;;
;; This exception applies only to the code released by the
;; Free Software Foundation under the name GUILE.  If you copy
;; code from other Free Software Foundation releases into a copy of
;; GUILE, as the General Public License permits, the exception does
;; not apply to the code that you add in this way.  To avoid misleading
;; anyone as to the status of such modified files, you must delete
;; this exception notice from them.
;;
;; If you write modifications of your own for GUILE, it is your choice
;; whether to permit this exception to apply to your modifications.
;; If you do not wish that, delete this exception notice.

;;; Author: Martin Grabmueller

;;; Commentary:

;; Usage: doc-snarf [OPTIONS] FILE
;;
;;   -o, --output FILE  -- Specify output file [default=stdout]
;;   -t, --texinfo      -- Format output as texinfo
;;   -D, --dot-doc      -- Format output as GDFv1 .doc
;;
;; This program reads in a Scheme source file and extracts docstrings
;; in the format specified below.  Additionally, a procedure protoype
;; is inferred from the procedure definition line starting with
;; (define... ).
;;
;; Currently, three output modi are implemented: plaintext, texinfo
;; and GDFv1.  Default is plaintext, texinfo can be switched on with the
;; `--texinfo, -t' command line option, and GDFv1 with `--dot-doc, -D'.
;;
;; A docstring is recognized: (1) as a comment block preceding and
;; "touching" a top-level `define' (or similar) form, with each line in
;; the comment beginning flush left and starting with two semicolons; or
;; (2) as a "standard internal docstring", i.e., a string occuring
;; immediately after the formals in the form.  If a form has both kinds
;; of docstring, then (2) takes precedence.  For example, the fragment:
;;
;;   ;; This procedure foos, or bars, depending on @var{braz}.
;;   ;;-Author: Martin Grabmueller
;;   (define (foo/bar braz)
;;     (if braz 'foo 'bar))
;;
;; results in the following docstring if texinfo output is enabled:
;;
;;   ^Lfoo/bar
;;   @c snarfed from doc-snarf:81
;;   @deffn procedure foo/bar braz
;;   This procedure foos, or bars, depending on @var{braz}.
;;   @c Author: Martin Grabmueller
;;   @end deffn
;;
;; or in this if plaintext output is used:
;;
;;   Procedure: foo/bar braz
;;   This procedure foos, or bars, depending on BRAZ.
;;   ;; Author: Martin Grabmueller
;;   Snarfed from doc-snarf:81
;;   ^L
;;
;; Note that for plaintext output, @var{foo} is replaced by FOO, in
;; both the docstring and the options.  Normally, if the signature of
;; a procedure ends in a dotted pair, the last symbol S is rendered as
;; "[S...]".  You can use the special option `sig' to override the
;; inferred signature.  For example:
;;
;;   ;; Do something w/ @var{a} and optional arg @var{b}.
;;   ;;-sig: (a [b [c]])
;;   (define (my-proc a . rest) ...)
;;
;; yields: (my-proc a [b [c]]).  Without `sig': (my-proc a [rest...]).
;;
;;
;; For Scheme programs, this module exports the following procs:
;;   (entry-symbol e) => symbol
;;   (entry-signature e) => string
;;   (entry-docstring e) => string
;;   (entry-options e [split]) => list of elements, either
;;                                "NAME: VALUE" or ("NAME" . "VALUE")
;;   (entry-filename e) => string
;;   (entry-line e) => integer
;;   (display-untexinfoized s)           ; see docstrings for these
;;   (format-texinfo entry)
;;   (format-dot-doc entry)
;;   (format-plain entry)
;;   (make-doc-snarfer formatter) => (lambda (file) ...)
;;
;; If the provided format-* procs are not to your liking, you can use the
;; entry-* procs to create a custom formatter to pass to `make-doc-snarfer',
;; for example:
;;
;;   (define (display-sig-and-docstring-only entry)
;;     (simple-format #t "(~A)\n~A\n"
;;                    (entry-signature entry)
;;                    (entry-docstring entry)))
;;
;;   (define my-snarf (make-doc-snarfer display-sig-and-docstring-only))
;;
;;   (for-each my-snarf (command-line-args))
;;
;; The proc `display-untexinfoized' is useful for simple rendering of
;; @var and @code constructs, likely to be present in docstrings targeted
;; for "guile-tools twerp2texi" or other texinfo-based methodologies.
;;
;;
;; TODO: More parameterization.

;;; Code:

(define-module (scripts doc-snarf)
  #:autoload (scripts PROGRAM) (HVQC-MAIN)
  #:use-module ((scripts read-scheme-source)
                #:select ((read-scheme-source-silently . file-forms)
                          quoted? clump-comments))
  #:use-module ((ice-9 regex) #:select (match:start
                                        match:end
                                        match:substring
                                        match:suffix))
  #:use-module ((srfi srfi-13) #:select (string-join
                                         string-trim-both
                                         substring/shared))
  #:use-module ((srfi srfi-14) #:select (char-set))
  #:export (entry-symbol
            entry-signature
            entry-docstring
            entry-options
            entry-filename
            entry-line
            display-untexinfoized
            format-texinfo
            format-dot-doc
            format-plain
            make-doc-snarfer))

(define (make-entry symbol signature docstring options filename line)
  (vector 'entry symbol signature docstring options filename line))

;; Return the symbol of entry @var{e}.
;; This is usually the name of the procedure or macro.
;;
(define (entry-symbol e)
  (vector-ref e 1))

;; Return the rendered signature (sans parens) of entry @var{e}.
;; See also @code{entry-symbol}.
;;
(define (entry-signature e)
  (vector-ref e 2))

;; Return the docstring of entry @var{e}.
;; This may have internal newlines but never at the beginning or end.
;;
(define (entry-docstring e)
  (vector-ref e 3))

;; Return a (possibly empty) list of options for entry @var{e}.
;; Each element is a string of the form: "NAME: VALUE".
;; Optional arg @var{split} non-#f means to split each element
;; into a pair of the form @code{("NAME" . "VALUE")}, instead.
;;
;;-sig: (e [split])
;;
(define (entry-options e . split)
  (let ((ls (vector-ref e 4)))
    (if (or (null? split) (not (car split)))
        (map (lambda (pair)
               (simple-format #f "~A: ~A" (car pair) (cdr pair)))
             ls)
        ls)))

;; Return the filename entry @var{e}.
;;
(define (entry-filename e)
  (vector-ref e 5))

;; Return the line number of entry @var{e}.
;;
(define (entry-line e)
  (vector-ref e 6))

;; Write an @var{entry} using Guile Documentation Format Version 1.
;;
(define (format-dot-doc entry)
  (simple-format #t "\f\n(~A)\n~A\n~A[~A:~A]\n"
                 (entry-signature entry)
                 (entry-docstring entry)
                 #\soh                  ; control-A
                 (entry-filename entry)
                 (entry-line entry)))

;; Display an @var{entry} using texinfo format.
;;
(define (format-texinfo entry)
  (simple-format #t "\n\f~A\n@c snarfed from ~A:~A\n@deffn procedure ~A\n~A\n"
                 (entry-symbol entry)
                 (entry-filename entry)
                 (entry-line entry)
                 (entry-signature entry)
                 (entry-docstring entry))
  (for-each (lambda (s)
              (simple-format #t "@c ~A\n" s))
            (entry-options entry))
  (simple-format #t "@end deffn\n"))

;; Display string @var{s} to stdout, converting @var and @code constructs.
;; The @var arg is upcased.  The @code arg is surrouned by left and right
;; single quotes.
;;
(define display-untexinfoized
  (let ((rx (make-regexp "@(var|code)\\{([^\{\}]*)\\}")))
    (lambda (s)
      (let loop ((start 0))
        (cond ((regexp-exec rx s start)
               => (lambda (m)
                    (let* ((beg (match:start m))
                           (txt (match:start m 2))
                           (bod (match:substring m 2))
                           (v? (= 5 (- txt beg)))) ; otherwise 6 for code
                      (display (substring/shared s start (match:start m)))
                      (or v? (display "`"))
                      (display (if v? (string-upcase bod) bod))
                      (or v? (display "'")))
                    (loop (match:end m))))
              (else
               (display (substring/shared s start))))))))

;; Write an @var{entry} using plain format.
;; Skip output for the special option @code{sig}.
;;
(define (format-plain entry)
  (simple-format #t "Procedure: ~A\n" (entry-signature entry))
  (display-untexinfoized (entry-docstring entry))
  (newline)
  (for-each (lambda (pair)
              (or (string=? "sig" (car pair))
                  (begin
                    (simple-format #t ";; ~A: " (car pair))
                    (display-untexinfoized (cdr pair))
                    (newline))))
            (entry-options entry #t))
  (simple-format #t "Snarfed from ~A:~A\n\f\n"
                 (entry-filename entry)
                 (entry-line entry)))

(define display-procedure-doc-entry
  (let ((parens (char-set #\( #\))))
    (lambda (f invocation doc opts file line)
      (f (make-entry
          (car invocation)
          (string-trim-both (simple-format #f "~A" invocation) parens)
          doc
          opts
          file
          line)))))

(define option-prefix-rx (make-regexp "^;+-([-A-Za-z]+):[ \t]*"))

(define --opts (make-object-property))

(define (process-file f file)

  (define (->line x) (assq-ref x 'line))
  (define (->sid  x) (assq-ref x 'std-int-doc))
  (define (->type x) (assq-ref x 'type))

  (define (make-chop level)
    (let* ((rx (make-regexp "[- ]*")))
      (lambda (s)
        (cond ((= level (string-length s))
               99999)
              ((regexp-exec rx s level)
               => (lambda (m)
                    (- (match:end m) (match:start m))))
              (else 0)))))

  (define (clean level lines)
    (let* ((chop (make-chop level))
           (bye-bye (+ level (apply min (map chop lines)))))
      (let loop ((lines lines) (opts '()) (after '()))
        (if (null? lines)
            (let ((chunk (string-join (reverse after) "\n")))
              (or (null? opts) (set! (--opts chunk) opts))
              (cons level chunk))       ; retval
            (let ((line (car lines)))
              (cond ((regexp-exec option-prefix-rx line)
                     => (lambda (m)
                          (loop (cdr lines)
                                (acons (string->symbol
                                        (match:substring m 1))
                                       (match:suffix m)
                                       opts)
                                after)))
                    (else
                     (loop (cdr lines)
                           opts
                           (cons (make-shared-substring
                                  line (if (= level (string-length line))
                                           level
                                           bye-bye))
                                 after)))))))))

  (define (->sig ls chunk)
    (or (and=> (and=> (--opts chunk)
                      (lambda (opts)
                        (assq-ref opts 'sig)))
               (lambda (sig)
                 (cons (car ls) (read (open-input-string sig)))))
        (let ((tail (last-pair ls)))
          (or (list? tail)
              (set-cdr! tail (list (simple-format #f "[~A...]" (cdr tail)))))
          ls)))

  ;; do it!
  (let loop ((forms (clump-comments (file-forms file) clean))
             (stash #f))
    (or (null? forms)
        (let ((form (car forms)))
          (cond ((quoted? 'following-form-properties form)
                 => (lambda (alist)
                      (case (->type alist)
                        ((procedure syntax)
                         (let ((sid (cond ((->sid alist))
                                          (else #f))))
                           (and (or sid stash)
                                (display-procedure-doc-entry
                                 f
                                 (->sig (assq-ref alist 'signature) stash)
                                 (string-trim-both
                                  ;; venerate tradition
                                  (cond (sid) (else stash))
                                  #\newline)
                                 (or (--opts stash) '())
                                 file
                                 (1- (->line alist)))))))
                      (loop (cdr forms) #f)))
                ((and (pair? form) (number? (car form))) ; level
                 (loop (cdr forms) (cdr form)))
                (else
                 (loop (cdr forms) #f)))))))

;; Return a procedure that snarfs docstrings and displays each entry with
;; proc @var{formatter} (such as, for example, @code{format-dot-doc}).
;; The returned procedure takes one arg, the name of a file to process.
;;
(define (make-doc-snarfer formatter)
  (lambda (file)
    (process-file formatter file)))

(define (main args)
  (HVQC-MAIN
   args (lambda (qop)
          (let ((go (make-doc-snarfer (cond ((qop 'texinfo) format-texinfo)
                                            ((qop 'dot-doc) format-dot-doc)
                                            (else format-plain)))))
            (with-output-to-port (or (qop 'output open-output-file)
                                     (current-output-port))
              (lambda () (for-each go (qop '()))))))
   '(usage . commentary)
   '(package . "Guile")
   '(version . "0.2")                   ; update before publishing!
   '(option-spec (output  (single-char #\o) (value #t))
                 (texinfo (single-char #\t))
                 (dot-doc (single-char #\D)))))

;;; doc-snarf ends here
