#!/usr/bin/guile \
-e "(scripts twerp2texi)" -s
!#
;;; twerp2texi --- Process .twerp to make .texi

;;	Copyright (C) 2002,2003,2004,2005 Free Software Foundation, Inc.
;;
;; This program is free software; you can redistribute it and/or
;; modify it under the terms of the GNU General Public License as
;; published by the Free Software Foundation; either version 2, or
;; (at your option) any later version.
;;
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;; General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with this software; see the file COPYING.  If not, write to
;; the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
;; Boston, MA 02110-1301, USA.
;;
;; As a special exception, the Free Software Foundation gives permission
;; for additional uses of the text contained in its release of GUILE.
;;
;; The exception is that, if you link the GUILE library with other files
;; to produce an executable, this does not by itself cause the
;; resulting executable to be covered by the GNU General Public License.
;; Your use of that executable is in no way restricted on account of
;; linking the GUILE library code into it.
;;
;; This exception does not however invalidate any other reasons why
;; the executable file might be covered by the GNU General Public License.
;;
;; This exception applies only to the code released by the
;; Free Software Foundation under the name GUILE.  If you copy
;; code from other Free Software Foundation releases into a copy of
;; GUILE, as the General Public License permits, the exception does
;; not apply to the code that you add in this way.  To avoid misleading
;; anyone as to the status of such modified files, you must delete
;; this exception notice from them.
;;
;; If you write modifications of your own for GUILE, it is your choice
;; whether to permit this exception to apply to your modifications.
;; If you do not wish that, delete this exception notice.

;;; Author: Thien-Thi Nguyen <ttn@gnu.org>

;;; Commentary:

;; Usage: twerp2texi -I INDEX [...] [-d DEPS] [-o OUTFILE] TWERP
;;
;; Process TWERP (a Texi With Eval-Requiring Predelictions file) and
;; write the result to OUTFILE, typically named w/ .texi extension, or
;; stdout if "-o OUTFILE" is not specified.  Processing requires one or
;; more index files prepared by "guile-tools make-twerp2texi-index";
;; it is an error to omit "-I INDEX".
;;
;; * Options
;;
;;   At this time there is only one additional option: "-d DEPS" means
;;   write to the file DEPS a makefile fragment suitable for "include
;;   DEPS".  This fragment is typically named w/ extension .Ptexi to
;;   indicate the prerequisites (aka dependencies) of OUTFILE determined
;;   during twerp2texi operation (and not before!).  Note that DEPS only
;;   lists source files w/ no mention of .doc files.  Dependency
;;   tracking requires you to prepare the Makefile in a special way; see
;;   "guile-tools prep-Ptexi" for more info.
;;
;; * Twerp Processing
;;
;;   A .twerp file is mostly texinfo, w/ a some directives thrown in.
;;   At this time, the entire directive must be on one line.  This
;;   restriction will probably be lifted in the future.
;;
;;   In its simplest form a directive names a single symbol:
;;
;;      @twerpdoc (SYMBOL)
;;
;;   This is replaced by the header
;;
;;      @deffn {Scheme Procedure} SYMBOL SIG ...
;;
;;   followed by text associated with SYMBOL from the appropriate .doc
;;   file.  The text is inserted verbatim.  SIG is also taken from the
;;   .doc file.
;;
;;   The list following the "@twerpdoc" directive can name additional
;;   symbols or sublists beginning w/ `C'.  These inherit the SIG and
;;   are formatted appropriately[1], depending on whether the language
;;   is Scheme (default) or C.  For example, the directive:
;;
;;      @twerpdoc (acons (C scm_acons))
;;
;;   results in the header:
;;
;;      @deffn {Scheme Procedure} acons key value alist
;;      @deffnx {C Function} scm_acons (key, value, alist)
;;
;;   Scheme syntax and macros macros are likewise handled with
;;   @twerpsyntaxdoc and @twerpmacdoc, respectively.  For example:
;;
;;      @twerpmacdoc (begin-thread)
;;
;;   produces header:
;;
;;      @deffn {Scheme Macro} begin-thread first . rest
;;
;;   followed by the snarfed documentation.  C macros are handled
;;   similarly, using @twerpcmacdoc (note the "c").  For example:
;;
;;      @twerpcmacdoc (GH_USE_MODULE)
;;
;;   produces header:
;;
;;      @deffn {C Macro} GH_USE_MODULE (cvar, fullname)
;;
;;   If the macro does not have an arglist, that part is omitted.
;;
;;   [1] Style issues like what is "appropriate" are not yet finalized.
;;       There is much room for generalization here.
;;
;;   To include commentary from a module, use a "commentary directive"
;;   followed by a sexp of the form: (STYLE MODULE-NAME).  STYLE specifies
;;   STYLE specifies how to handle the raw text of the commentary, and
;;   MODULE-NAME specifies which module to consult.  For example:
;;
;;      @twerpcommentary (example (ice-9 documentation))
;;
;;   Currently the following styles are supported, corresponding to the
;;   same-named texinfo commands for enclosing blocks of text:
;;
;;      verbatim, quotation, example, smallexample,
;;      display, smalldisplay, format, smallformat
;;
;;   For example (literally), the `example' style results in something
;;   like the following to be generated:
;;
;;      @example
;;      COMMENTARY TEXT
;;      @end example
;;
;;   Additionally, the built-in `include' style inserts COMMENTARY TEXT
;;   directly, flush-left, without any "@" tags.  All styles except for
;;   `verbatim' and `include' also perform "@"-escaping for at-sign,
;;   left-curly-brace and right-curly-brace ("@", "{" and "}", respectively)
;;   to prevent texinfo misinterpretation.  You can inhibit this processing
;;   with the name pure-STYLE, for example `pure-example'.  NOTE: There is
;;   no `pure-include' style, since that combination is redundant.
;;
;;   You can specify a particular path to use for module searching
;;   with a setsearchpath directive, which takes a sexp: (DIR DIR...).
;;   For example:
;;
;;      @twerpsetsearchpath ("..")
;;
;;   This sets the search path to simply the parent directory.  This
;;   path is actually used as a prefix; if the search fails for the
;;   specified path, the normal `%load-path' is then searched.
;;
;; * Errors
;;
;;   The indexes are read first.  If there is an error during this
;;   process, the output file is not opened.  After index reading, any
;;   kind of error results in the output file being deleted (if not to
;;   stdout).
;;
;; * More Twerp Processing
;;
;;   All directives beginning w/ "@twerp" are reserved for future use.
;;
;;
;; TODO: Handle multi-line directives.
;;       Clean up error handling.
;;       Make `keep-going-on-missing-tag?' optional.

;;; Code:

(define-module (scripts twerp2texi)
  #:autoload (scripts PROGRAM) (HVQC-MAIN)
  #:autoload (ice-9 common-list) (some)
  #:autoload (ice-9 rdelim) (read-line write-line)
  #:autoload (ice-9 regex) (match:suffix)
  #:autoload (ice-9 rw) (read-string!/partial)
  #:autoload (scripts split-string-no-nulls) (split-string-no-nulls)
  #:autoload (srfi srfi-13) (string-join string-delete string-index
                                         string-trim-both)
  #:autoload (srfi srfi-14) (char-set)
  #:autoload (ice-9 documentation) (file-commentary)
  ;; nothing for now
  ;; #:export (twerp2texi)
  )

(define check-collision? #f)            ; for now

(define (hasherr!! dir file tag tv-cdr cur)
  (simple-format (current-error-port)
                 "ERROR: hash collision: ~A\nnew: ~A/~A ~A\ncurrent: ~A\n"
                 tag
                 dir file tv-cdr
                 cur)
  (throw 'hash-collision))

(define (read-one-index ht dir port)
  (for-each (lambda (file-info)
              (let ((file (car file-info)))
                (for-each (lambda (tag-info)
                            (let ((tag (car tag-info))
                                  (tv-cdr (cdr tag-info)))
                              (and check-collision?
                                   (hashq-ref ht tag)
                                   (not (equal? (cdr (hashq-ref ht tag))
                                                tv-cdr))
                                   (hasherr!! dir file tag tv-cdr
                                              (hashq-ref ht tag)))
                              (hashq-set! ht tag (cons (cons dir file)
                                                       tv-cdr))))
                          (cdr file-info))))
            (read port)))

(define (read-ext-indexes ports)
  (let ((ht (make-hash-table #:size 1031)))
    (for-each (lambda (port)
                (read-one-index ht (dirname (port-filename port)) port))
              ports)
    ht))

;;; {tag-info extraction}

(define (tv:dir tv)
  (caar tv))

(define (tv:file tv)                    ; "tag value" field extraction
  (in-vicinity (tv:dir tv) (cdar tv)))

(define (tv:sig-raw tv)
  (cadr tv))

(define (tv:sig-parts tv)
  (split-string-no-nulls (cadr tv)))

(define (tv:offsets tv)
  (cddr tv))

;;; {dependencies}

;; Return a proc WARD that knows how to write to MKFRAG (a filename) the
;; source dependencies of CHILD (the input filename) collected over standard
;; twerp2texi processing.  The output is in standard makefile format.  WARD
;; takes a command keyword and additional args, performing different tasks
;; depending on the command.
;;
;;  #:note! dep -- Add DEP to the list of dependencies.
;;                 This command can be called many times.
;;
;;  #:write!    -- Write the collected dependencies to MKFRAG.
;;                 This command can only be called once;
;;                 subsequent calls signal an error.
;;
;; For more info on why things are done in this way, see Automake info pages,
;; specifically the philosophy for the "depcomp" program.  (Eventually,
;; twerp2texi or some more generalized cousin will probably be merged into
;; automake, proper.  Probably this should be made into its own module...)
;;
(define (dep-ward child mkfrag)
  (let* ((deps '())
         (note! (lambda (dep)
                  (or (member dep deps)
                      (set! deps (cons dep deps)))))
         (write! (lambda ()
                   (cond ((not deps)
                          (error "write! called multiply!"))
                         ((null? deps)) ; do nothing
                         (else
                          (let ((p (open-file mkfrag OPEN_WRITE)))
                            (simple-format p "~A :" (basename child))
                            (for-each (lambda (dep)
                                        (simple-format p " \\\n ~A" dep))
                                      deps)
                            (newline p))))
                   (set! deps #f))))
    ;; retval
    (lambda (command . args)
      (case command
        ((#:note!) (note! (car args)))
        ((#:write!) (write!))))))

(define file-port-cache '())            ; todo: s/alist/weak-hash/

(define (file-frag tv ward)
  (let* ((file (tv:file tv))
         (offsets (tv:offsets tv))
         (port (or (assoc-ref file-port-cache file)
                   (let ((port (open-file file OPEN_READ)))
                     (set! file-port-cache
                           (acons file port file-port-cache))
                     port))))
    (seek port (car offsets) SEEK_SET)
    (let* ((len (- (cadr offsets) (car offsets)))
           (s (make-string len)))
      (or (= len (read-string!/partial s port))
          (error "lame (non-robust) file-frag implementation!"))
      (cond (ward
             (or (char=? #\soh (read-char port)) ; control-A
                 (error "corrupt .doc file!"))
             (ward #:note!
                   (let ((mystery (car (split-string-no-nulls
                                        (read-line port) ":[]")))
                         (hint (tv:dir tv)))
                     (if (char=? #\/ (string-ref mystery 0))
                         mystery
                         (string-append hint "/" mystery))))))
      s)))

(define (read-string s)
  (or (false-if-exception (with-input-from-string s (lambda () (read))))
      (throw 'twerp2texi-error 'bad-directive-data)))

(define (mapconcat proc ls sep)
  (string-join (map proc ls) sep))

(define (>> . args)
  (for-each (lambda (x)
              (if (pair? x)
                  (apply >> x)
                  (display x)))
            args))

;; Each entry in handlers is a triple (see `define-handler' below):
;;
;;   MATCH? ARG-DISCIPLINE BACKEND
;;
;; MATCH? is a procedure that takes a line of input (string) and
;; performs some kind of regexp-exec on it, returning that value.
;; ARG-DISCIPLINE is a keyword, either #:stylized-lookup or #:sexp.
;; If ARG-DISCIPLINE is #:stylized-lookup, then BACKEND should be
;; a procedure that accepts four arguments: tv full tag ward.  If
;; ARG-DISCIPLINE is #:sexp, then BACKEND should be a procedure
;; that takes the sexp read from the remainder of the line
;; (after the @twerpFOO keyword) and a second arg `ward'.
;;
;; If BACKEND returns #f, the handler is considered inapplicable
;; and the next one will be tried; thus an exclusive handler must
;; both match the input line and have a backend that returns non-#f.

(define *handlers*
  '())

(define (define-handler meta handler)
  (let* ((key (car meta))
         (arg-discipline (cadr meta))
         (rx (make-regexp (simple-format #f "^@twerp~A" key))))
    (set! *handlers* (cons (list (lambda (line)
                                   (regexp-exec rx line))
                                 arg-discipline
                                 handler)
                           *handlers*))))

(define-handler '(doc #:stylized-lookup)
  (let* (([]-set (char-set #\[ #\]))
         (del-[] (lambda (s) (string-delete s []-set)))
         (zonk-ellipsis (lambda (ls)
                          (if (null? ls)
                              ls
                              (let ((r (reverse ls)))
                                (if (string=? "..." (car r))
                                    (reverse (cdr r))
                                    ls))))))
    (lambda (tv full tag ward)
      (define (extra ls tv)
        (if (null? ls)
            ""
            (string-append
             (mapconcat
              (lambda (spec)
                (cond ((symbol? spec)
                       (string-append
                        "@deffnx {Scheme Procedure} "
                        (symbol->string spec)
                        " "
                        (tv:sig-raw tv)))
                      ((eq? 'C (car spec))
                       (mapconcat
                        (lambda (sym)
                          (string-append
                           "@deffnx {C Function} "
                           (symbol->string sym)
                           " ("
                           (string-join (zonk-ellipsis
                                         (map del-[] (tv:sig-parts tv)))
                                        ", ")
                           ")"))
                        (cdr spec)
                        "\n"))
                      (else
                       (error "bad make-extra stuff!"))))
              ls
              "\n")
             "\n")))
      (>>
       ;; be nice to humans
       "@c " (tv:file tv) "\n"
       ;; header(s)
       "@deffn {Scheme Procedure} " tag " " (tv:sig-raw tv) "\n"
       (extra (cdr full) tv)
       ;; doc
       (file-frag tv ward)
       "@end deffn\n"))))

(define-handler '(macdoc #:stylized-lookup)
  (lambda (tv full tag ward)
    (>>
     ;; be nice to humans
     "@c " (tv:file tv) "\n"
     ;; header
     "@deffn {Scheme Macro} " tag " " (tv:sig-raw tv) "\n"
     ;; doc
     (file-frag tv ward)
     "@end deffn\n")))

(define-handler '(syntaxdoc #:stylized-lookup)
  (lambda (tv full tag ward)
    (>>
     ;; be nice to humans
     "@c " (tv:file tv) "\n"
     ;; header
     "@deffn {Syntax} " tag " " (tv:sig-raw tv) "\n"
     ;; doc
     (file-frag tv ward)
     "@end deffn")))

(define-handler '(cmacdoc #:stylized-lookup)
  (lambda (tv full tag ward)
    (>>
     ;; be nice to humans
     "@c " (tv:file tv) "\n"
     ;; header
     "@deffn {C Macro} " tag
     (let ((ls (split-string-no-nulls (tv:sig-raw tv))))
       (if (null? ls)
           ""
           (let loop ((ls ls) (acc '(" (")))
             (if (null? (cdr ls))
                 (apply string-append
                        (reverse (append (list ")" (car ls)) acc)))
                 (loop (cdr ls)
                       (append (list ", " (car ls)) acc))))))
     "\n"
     ;; doc
     (file-frag tv ward)
     "@end deffn\n")))

(define *search-path* #f)

(define-handler '(setsearchpath #:sexp)
  (lambda (x ward)
    (set! *search-path* x)))

(define (module-name->filename-fragment ls)
  (string-append                        ; yuk!
   (symbol->string (car ls))
   (if (null? (cdr ls))
       ""
       (string-append                   ; double yuk!
        "/"
        (module-name->filename-fragment (cdr ls))))))

(define texinfo-quote
  (let ((dangerous "{}@"))
    (lambda (s)
      (let* ((cs (cond ((char-set? dangerous) dangerous)
                       (else (set! dangerous (string->char-set dangerous))
                             dangerous)))
             (holes (let loop ((hole (string-index s cs 0))
                               (acc '()))
                      (if (not hole)
                          acc
                          (loop (string-index s cs (1+ hole))
                                (cons hole acc))))))
        (if (null? holes)
            s                           ; optimization retval
            (let* ((len (string-length s))
                   (hole-count (length holes))
                   (new-s (make-string (+ len hole-count))))
              (let ((end (car holes)))
                (string-copy! new-s (+ end hole-count) s end len))
              (let loop ((holes holes)
                         (offset (1- hole-count)))
                (if (> 0 offset)
                    new-s               ; normal retval
                    (let* ((end (car holes))
                           (beg (if (= 0 offset) 0 (cadr holes))))
                      (string-copy! new-s (+ beg offset) s beg end)
                      (string-set! new-s (+ end offset) #\@)
                      (loop (cdr holes)
                            (1- offset)))))))))))

(define-handler '(commentary #:sexp)
  (lambda (form ward)
    (define trimmed-c
      (let ((nl (char-set #\newline)))
        (lambda (file)
          (string-trim-both (file-commentary file) nl))))
    (let* ((style (car form))
           (tag (let ((s (symbol->string style)))
                  (if (and (< 5 (string-length s))
                           (string=? "pure-" (substring s 0 5)))
                      (substring s 5)
                      s)))
           (name (cadr form)))
      (cond ((search-path (or *search-path* %load-path)
                          (module-name->filename-fragment name)
                          '(".scm" ""))
             => (lambda (file)
                  (>> (if (eq? 'include style) "" (list "@" tag "\n"))
                      (case style
                        ((include verbatim)
                         (trimmed-c file))
                        ((pure-quotation pure-example pure-smallexample
                                         pure-display pure-smalldisplay
                                         pure-format pure-smallformat)
                         (trimmed-c file))
                        ((quotation example smallexample
                                    display smalldisplay
                                    format smallformat)
                         (texinfo-quote (trimmed-c file)))
                        (else
                         (error "bad @twerpcommentary style:" style)))
                      "\n"
                      (if (eq? 'include style) "" (list "@end " tag "\n")))
                  (and ward (ward #:note! file))
                  #t))
            (else
             (>> "[could not find commentary for "
                 (object->string name) "]@*\n"
                 "[*search-path* => " *search-path* "]\n"))))))

(define keep-going-on-missing-tag? #t)  ; for now

(define (process inport index ward)
  (let loop ((line (read-line inport)))
    (or (eof-object? line)
        (begin
          (or (some (lambda (triple)
                      (let ((match? (car triple))
                            (arg-discipline (cadr triple))
                            (handle (caddr triple)))
                        (cond ((match? line)
                               => (lambda (m)
                                    (let* ((full (read-string (match:suffix m)))
                                           (tag (car full)))
                                      ;; be nice to humans
                                      (>> "@c " line "\n")
                                      (case arg-discipline
                                        ((#:sexp)
                                         (handle full ward))
                                        ((#:stylized-lookup)
                                         (cond ((hashq-ref index tag)
                                                => (lambda (tv)
                                                     (handle tv full tag ward)))
                                               (keep-going-on-missing-tag?
                                                (>> "\n[NOTE: docs missing"
                                                    " for " tag "]\n\n"))
                                               (else
                                                (throw 'twerp2texi-error
                                                       "no such tag: ~A" tag))))))))
                              (else #f))))
                    *handlers*)
              (write-line line))
          (loop (read-line inport))))))

;; maybe later
;; (define (twerp2texi ...) ...)

(define (twerp2texi/qop qop)
  (let* ((index (qop 'index-file (lambda (filenames)
                                   (read-ext-indexes
                                    (map open-input-file filenames)))))
         (infile (or (false-if-exception (car (qop '())))
                     (error "no input file specified")))
         (inport (open-file infile OPEN_READ))
         (outfile (or (qop 'output-file) #f))
         (outport (if outfile
                      (open-file outfile OPEN_WRITE)
                      (current-output-port)))
         (ward (qop 'write-deps
                    (lambda (mkfrag)
                      (dep-ward outfile mkfrag)))))
    (catch #t
           (lambda ()
             (write-line "@c generated file -- do not edit!" outport)
             (set-current-output-port outport)
             (process inport index ward)
             (for-each close-port (list inport outport)))
           (lambda (type . args)
             (cond (outfile
                    (close-port outport)
                    (delete-file outfile)
                    (simple-format (current-error-port)
                                   "twerp2texi: error caught: ~S ~S\ndeleted ~A\n"
                                   type args outfile)))
             ;; re-throw
             (scm-error type #f (car args) (cdr args) #f)))
    (and ward (ward #:write!)))
  #t)

(define (main args)
  (HVQC-MAIN args twerp2texi/qop
             '(usage . commentary)
             '(package . "Guile")
             '(option-spec (index-file (single-char #\I)
                                       (merge-multiple? #t)
                                       (required? #t)
                                       (value #t))
                           (output-file (single-char #\o)
                                        (value #t))
                           (write-deps (single-char #\d)
                                       (value #t)))))

;;; twerp2texi ends here
