#!/usr/bin/guile \
-e "(scripts c2doc)" -s
!#
;;; c2doc --- Extract documentation from .c files

;;	Copyright (C) 2002,2003,2004 Free Software Foundation, Inc.
;;
;; This program is free software; you can redistribute it and/or
;; modify it under the terms of the GNU General Public License as
;; published by the Free Software Foundation; either version 2, or
;; (at your option) any later version.
;;
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;; General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with this software; see the file COPYING.  If not, write to
;; the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
;; Boston, MA 02110-1301, USA.
;;
;; As a special exception, the Free Software Foundation gives permission
;; for additional uses of the text contained in its release of GUILE.
;;
;; The exception is that, if you link the GUILE library with other files
;; to produce an executable, this does not by itself cause the
;; resulting executable to be covered by the GNU General Public License.
;; Your use of that executable is in no way restricted on account of
;; linking the GUILE library code into it.
;;
;; This exception does not however invalidate any other reasons why
;; the executable file might be covered by the GNU General Public License.
;;
;; This exception applies only to the code released by the
;; Free Software Foundation under the name GUILE.  If you copy
;; code from other Free Software Foundation releases into a copy of
;; GUILE, as the General Public License permits, the exception does
;; not apply to the code that you add in this way.  To avoid misleading
;; anyone as to the status of such modified files, you must delete
;; this exception notice from them.
;;
;; If you write modifications of your own for GUILE, it is your choice
;; whether to permit this exception to apply to your modifications.
;; If you do not wish that, delete this exception notice.

;;; Author: Thien-Thi Nguyen <ttn@gnu.org>

;;; Commentary:

;; Usage: c2doc [--cpp CPP-PROG] -o OUTFILE INFILE -- [CPP-OPTIONS ...]
;;
;; Process INFILE using the C pre-processor, passing CPP-OPTIONS ... to it.
;; Note the "--" used to mark the start of the CPP-OPTIONS.  Write output
;; to a file named OUTFILE, typically named with .doc extension.  If errors
;; occur during C pre-processor operation, OUTFILE is not written and c2doc
;; exits failurefully.
;;
;; During snarfing, the pre-processor macro SCM_MAGIC_SNARFER is defined,
;; enabling recognition of SCM_REGISTER_PROC, SCM_DEFINE and SCM_DEFINE1
;; macros (through tokens SCM__DR, SCM__DP and SCM__D1, respectively).
;; See the Guile manual and libguile/snarf.h for details.
;;
;; By default c2doc uses the C pre-processor determined at Guile
;; configure time (use "guile-tools guile-config-data CPP" to display
;; this value).  You can specify an alternative program CPP-PROG using
;; either the command line option "--cpp CPP-PROG", or by setting the env
;; var CPP to CPP-PROG (command line option takes precendece).  CPP-PROG
;; may name either a program or a program and some initial command-line
;; arguments.  In the latter case, remember to quote CPP-PROG to protect
;; against shell mishaps.
;;
;; If the documentation string ends with the sequence:
;;
;;   \n\n-sig: (SIGNATURE-HINT)
;;
;; (that is, two newlines, hyphen, #\s, #\i, #\g, colon, space, left
;; paren, arbitrary non-paren text, and right paren), then SIGNATURE-HINT
;; is used instead of the signature normally inferred from the req/opt/var
;; numbers and given C arglist.  This signature hint portion is omitted
;; from the resulting docstring, as well.  SIGNATURE-HINT can contain
;; space-separated symbols, square braces and ellipses, for example:
;; "a b [c [d]] [e f g ...]".

;;; Code:

(define-module (scripts c2doc)
  #:autoload (scripts PROGRAM) (HVQC-MAIN)
  #:autoload (scripts guile-config-data) (guile-config-data)
  #:autoload (srfi srfi-13) (string-join string-trim-right)
  #:autoload (ice-9 popen) (open-input-pipe)
  #:autoload (ice-9 rdelim) (read-line)
  #:autoload (ice-9 regex) (match:substring match:start match:end)
  #:autoload (database tmpfile) (tmpfile)
  ;; nothing for now
  ;; #:export (c2doc)
  )

;; support

(define subs make-shared-substring)

(define quote-space-quote-rx "\" *\"")

(define (clean-func-name func-name)
  (let loop ((start 0) (acc '()))
    (cond ((regexp-exec quote-space-quote-rx func-name start)
           => (lambda (m)
                (loop (match:end m)
                      (cons (subs func-name start (match:start m)) acc))))
          (else
           (reverse (cons (subs func-name start) acc))))))

(define SCM-arg-rx "SCM *([^,]+),* *")

(define (clean-sig sig rov-sum)
  (let ((len (string-length sig)))
    (let loop ((start 0) (acc '()))
      (if (>= start len)
          (or (and (not (= 0 len))
                   (not (= (length acc) rov-sum))
                   (error (simple-format #f "sig not clean: exp:~A actual:~A"
                                         rov-sum (reverse acc))))
              (reverse acc))            ; retval
          (cond ((regexp-exec SCM-arg-rx sig start)
                 => (lambda (m)
                      (loop (match:end m 0)
                            (cons (match:substring m 1) acc))))
                (loop len acc))))))

(define (read-docs port)
  (let loop ((obj (read port)) (acc '()))
    (cond ((or (eof-object? obj)
               (eq? 'SCM__E obj))       ; end symbol
           (let ((rv (string-trim-right
                      (apply string-append (reverse acc)))))
             (cond ((string=? "" rv)
                    rv)
                   ((let ((len (string-length rv)))
                      (and (> len 10)
                           (char=? #\) (string-ref rv (1- len)))
                           (let ((maybe (string-rindex rv #\()))
                             (and maybe
                                  (> maybe 8)
                                  (string=? "\n\n-sig: "
                                            (substring rv (- maybe 8) maybe))
                                  maybe))))
                    => (lambda (lparen)
                         (string-set! rv lparen #\space)
                         (cons (substring rv lparen)            ;;; sig hint
                               (substring rv 0 (- lparen 7))))) ;;; docstring
                   (else
                    (string-append rv "\n")))))
          ((string? obj)
           (loop (read port) (cons obj acc)))
          (else
           (error "weird doc! what is this object? =>" obj)))))

(define (format-args/names req opt var . names)
  ;; e.g.: (format-args 2 2 2 'some 'body 'wants 'me 'to 'hurl 'badly)
  ;;        => " some body [wants [me]] [to hurl ...]"
  ;; use `names' if available, otherwise "argN"
  (let* ((v (list->vector names))
         (count (vector-length v)))

    (define (arg n) (if (<= n count)
                        (vector-ref v (1- n))
                        (list "arg" (number->string n))))
    (define (buddy-req n) (list " " (arg (+ 1 n))))
    (define (buddy-opt n) (list " [" (arg (+ 1 n req))))
    (define (buddy-var n) (list (arg (+ 1 n req opt)) " "))
    (define (proper-friends buddy all) (map (lambda (n) (buddy n)) all))

    (list
     (proper-friends buddy-req (iota req))
     (proper-friends buddy-opt (iota opt))
     (make-list opt "]")
     (or (and (= 0 var) '())
         (list " [" (proper-friends buddy-var (iota var)) "...]")))))

(define (format-args sig req opt var)
  (apply format-args/names req opt var (clean-sig sig (+ req opt var))))

(define (find+format-args m sig-get req-get opt-get var-get)
  (apply format-args (map (lambda (proc) (proc m))
                          (list (or sig-get (lambda (m) ""))
                                req-get
                                opt-get
                                var-get))))

(define (walk-proc port)
  (letrec ((walk (lambda (tree)
                   (if (list? tree)
                       (for-each walk tree)
                       (display tree port)))))
    (lambda tree
      (walk tree))))

;; SCM__DP

(define DP-regexp
  (string-append " *SCM__I.*SCM__DP"
                 " *\"(.+)\""           ; 1 -- func-name
                 " *\"[(](.*)[)]\""     ; 2 -- sig
                 " *\\| *([0-9]*)"      ; 3 -- required-count
                 " *\\| *([0-9]*)"      ; 4 -- optional-count
                 " *\\| *([0-9]*)"      ; 5 -- variable-count
                 " *\\| *\"([^ ]+)\":"  ; 6 -- filename
                 " *([0-9]+)"           ; 7 -- line number
                 ".*SCM__S"))

(define (DP-func-name m)                 (match:substring m 1))
(define (DP-sig       m)                 (match:substring m 2))
(define (DP-req-count m) (string->number (match:substring m 3)))
(define (DP-opt-count m) (string->number (match:substring m 4)))
(define (DP-var-count m) (string->number (match:substring m 5)))
(define (DP-filename  m)                 (match:substring m 6))
(define (DP-line-num  m)                 (match:substring m 7))

(define (>>DP-proc inp outp)
  (let ((walk (walk-proc outp)))
    (lambda (m)
      (walk "\f\n("
            (clean-func-name (DP-func-name m))
            (let ((inferred (find+format-args m DP-sig
                                              DP-req-count
                                              DP-opt-count
                                              DP-var-count))
                  (raw (read-docs inp)))
              (if (string? raw)
                  (list inferred ")\n" raw)
                  (list (car raw) "\n" (cdr raw))))
            #\soh "[" (DP-filename m) ":" (DP-line-num m) "]\n"))))

;; SCM__DR

(define DR-regexp
  (string-append " *SCM__I.*SCM__DR"
                 " *\"(.+)\""           ; 1 -- func-name
                 " *\\| *([0-9]*)"      ; 2 -- required-count
                 " *\\| *([0-9]*)"      ; 3 -- optional-count
                 " *\\| *([0-9]*)"      ; 4 -- variable-count
                 " *\\| *\"([^ ]+)\":"  ; 5 -- filename
                 " *([0-9]+)"           ; 6 -- line number
                 ".*SCM__S *([^ ]+)"))  ; 7 -- C-func-name

(define (DR-func-name m)                 (match:substring m 1))
(define (DR-req-count m) (string->number (match:substring m 2)))
(define (DR-opt-count m) (string->number (match:substring m 3)))
(define (DR-var-count m) (string->number (match:substring m 4)))
(define (DR-filename  m)                 (match:substring m 5))
(define (DR-line-num  m)                 (match:substring m 6))
(define (DR-C-fn-name m)                 (match:substring m 7))

(define (>>DR-proc outp)
  (let ((walk (walk-proc outp)))
    (lambda (m)
      (walk "\f\n("
            (clean-func-name (DR-func-name m))
            (find+format-args m #f
                              DP-req-count
                              DR-opt-count
                              DR-var-count)
            ")\n"
            (DR-C-fn-name m) "\n"
            #\soh "[" (DR-filename m) ":" (DR-line-num m) "]\n"))))

;; SCM__D1

(define D1-regexp
  (string-append " *SCM__I.*SCM__D1"
                 " *\"(.+)\""           ; 1 -- func-name
                 " *\"[(](.*)[)]\""     ; 2 -- sig
                 " *\\| *2"             ;   -- required-count always 2
                 " *\\| *0"             ;   -- optional-count always 0
                 " *\\| *0"             ;   -- variable-count always 0
                 " *\\| *\"([^ ]+)\":"  ; 3 -- filename
                 " *([0-9]+)"           ; 4 -- line number
                 ".*SCM__S"))

(define (D1-func-name m) (match:substring m 1))
(define (D1-sig       m) (match:substring m 2))
(define (D1-filename  m) (match:substring m 3))
(define (D1-line-num  m) (match:substring m 4))

(define (>>D1-proc inp outp)
  (let ((walk (walk-proc outp)))
    (lambda (m)
      (walk "\f\n("
            (clean-func-name (D1-func-name m))
            (let ((inferred (format-args (D1-sig m) 2 0 0))
                  (raw (read-docs inp)))
              (if (string? raw)
                  (list inferred ")\n" raw)
                  (list (car raw) "\n" (cdr raw))))
            #\soh
            "[" (D1-filename m) ":" (D1-line-num m) "]\n"))))

;; stages

(define save-on-rx-len #f)
(define save-on-rx #f)
(define save-off-rx #f)

(define (cpp-output-stash cpp infile cpp-options)
  (cond ((not save-on-rx)
         (let ((save-on-trigger "SCM__I"))
           (set! save-on-rx-len (string-length save-on-trigger))
           (set! save-on-rx (make-regexp save-on-trigger)))
         (set! save-off-rx (make-regexp "SCM__E"))))
  (let* ((inp (open-input-pipe
               (simple-format #f "~A -DSCM_MAGIC_SNARFER ~A ~A"
                              cpp infile
                              (if (list? cpp-options)
                                  (string-join cpp-options " ")
                                  cpp-options))))
         (next (lambda () (read-line inp)))
         (stash (tmpfile)))
    (let loop ((line (next)) (save?/start #f))
      (cond ((eof-object? line)
             (and (= 0 (status:exit-val (close-pipe inp)))
                  (begin
                    (seek stash 0 SEEK_SET)
                    stash)))            ; retval
            (save?/start
             (cond ((and (number? save?/start)
                         (regexp-exec save-on-rx line
                                      (+ save?/start save-on-rx-len)))
                    => (lambda (m)
                         (let* ((snip (match:start m 0))
                                (rest (subs line snip)))
                           (write-line (subs line 0 snip) stash)
                           (loop rest
                                 (cond ((regexp-exec save-on-rx rest)
                                        => (lambda (mm) (match:start mm 0))))))))
                   (else
                    (write-line line stash)
                    (loop (next) (not (regexp-exec save-off-rx line))))))
            (else
             (cond ((regexp-exec save-on-rx line)
                    => (lambda (m) (loop line (match:start m 0))))
                   (else
                    (loop (next) #f))))))))

(define DP-rx #f)
(define DR-rx #f)
(define D1-rx #f)

(define (spin-thunk inp >>DP >>DR >>D1)
  (cond ((string? quote-space-quote-rx)
         (set! quote-space-quote-rx (make-regexp quote-space-quote-rx))
         (set! SCM-arg-rx (make-regexp SCM-arg-rx))
         (set! DP-rx (make-regexp DP-regexp))
         (set! DR-rx (make-regexp DR-regexp))
         (set! D1-rx (make-regexp D1-regexp))))
  (let ((try-rx (lambda (rx line bol)
                  (cond ((regexp-exec rx line)
                         => (lambda (m)
                              (seek inp (+ bol (match:end m)) SEEK_SET)
                              m))
                        (else #f)))))
    (lambda ()                          ; retval
      (let loop ((bol  0)
                 (line (read-line inp)))
        (or (eof-object? line)
            (begin
              (cond ((try-rx DP-rx line bol) => >>DP)
                    ((try-rx DR-rx line bol) => >>DR)
                    ((try-rx D1-rx line bol) => >>D1))
              (loop (seek inp 0 SEEK_CUR)
                    (read-line inp))))))))

;; dispatch

(define (c2doc outp infile cpp cpp-options)
  (let ((inp (or (cpp-output-stash cpp infile cpp-options)
                 (error "C pre-processor had problems"))))
    (catch #t                           ; everything
           (spin-thunk inp
                       (>>DP-proc inp outp)
                       (>>DR-proc     outp)
                       (>>D1-proc inp outp))
           (lambda args                 ; handler
             (let ((file (port-filename outp)))
               (close-port outp)
               (and file
                    (file-exists? file)
                    (delete-file file)))
             (close-port inp)
             (apply scm-error args))))) ; re-throw

(define (c2doc/qop qop)
  (c2doc (open-output-file
          (or (qop 'outfile)
              (error "no output file specified")))
         (car (qop '()))                ; infile
         (or (qop 'cpp)
             (getenv "CPP")
             (assq-ref guile-config-data 'CPP))
         (cdr (qop '()))))              ; cpp-options

(define (main args)
  (HVQC-MAIN args c2doc/qop
             '(usage . commentary)
             '(package . "Guile")
             '(option-spec (cpp     (value #t))
                           (outfile (value #t)
                                    (single-char #\o)))))

;;; c2doc ends here
