#!/usr/bin/guile \
-e "(scripts fspec2c)" -s
!#
;;; fspec2c --- translate flag spec to C code

;;	Copyright (C) 2003,2004,2005 Free Software Foundation, Inc.
;;
;; This program is free software; you can redistribute it and/or
;; modify it under the terms of the GNU General Public License as
;; published by the Free Software Foundation; either version 2, or
;; (at your option) any later version.
;;
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;; General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with this software; see the file COPYING.  If not, write to
;; the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
;; Boston, MA 02110-1301, USA.
;;
;; As a special exception, the Free Software Foundation gives permission
;; for additional uses of the text contained in its release of GUILE.
;;
;; The exception is that, if you link the GUILE library with other files
;; to produce an executable, this does not by itself cause the
;; resulting executable to be covered by the GNU General Public License.
;; Your use of that executable is in no way restricted on account of
;; linking the GUILE library code into it.
;;
;; This exception does not however invalidate any other reasons why
;; the executable file might be covered by the GNU General Public License.
;;
;; This exception applies only to the code released by the
;; Free Software Foundation under the name GUILE.  If you copy
;; code from other Free Software Foundation releases into a copy of
;; GUILE, as the General Public License permits, the exception does
;; not apply to the code that you add in this way.  To avoid misleading
;; anyone as to the status of such modified files, you must delete
;; this exception notice from them.
;;
;; If you write modifications of your own for GUILE, it is your choice
;; whether to permit this exception to apply to your modifications.
;; If you do not wish that, delete this exception notice.

;;; Author: Thien-Thi Nguyen <ttn@gnu.org>

;;; Commentary:

;; Usage: fspec2c [OPTIONS] FSPEC
;;
;; Write C fragment to stdout derived from running gperf (GNU perfect
;; hash function generator) on the flags scanned from the header as
;; specified in FSPEC.  OPTIONS are zero or more of:
;;
;;  -o, --output FILE    -- write to FILE instead of stdout
;;  -I, --include DIR    -- look in DIR instead of /usr/include
;;  -n, --no-cgen        -- write generated gperf input instead
;;                          of actually sending it to gperf
;;
;; The FSPEC file contents is a Scheme list with alternating keyword and
;; data elements.  These configure the two phases of fspec2c operation:
;; scanning for entries and feeding properly formatted entries to gperf.
;; Entries may be extracted by regular expression matching of lines in a
;; file (or a region of the file) or by a custom (user-defined) procedure.
;; Formatting includes addition of boilerplate C code and specification of
;; command-line options.
;;
;; Currently, these keywords are recognized:
;;
;;  #:init (BODY...)     -- evaluate BODY forms prior to scan
;;  #:one-custom THUNK   -- call THUNK for a single entry
;;  #:acc-custom THUNK   -- call THUNK for a list of entries
;;  #:infile RELPATH     -- which header to scan (string)
;;  #:region (BEG . END) -- only scan lines inside BEG-END regexps (strings)
;;  #:regexp REGEXP      -- regexp w/ at least one subexpression (string)
;;  #:key-match-num NUM  -- which subexpression is the key
;;  #:struct SPEC        -- struct-member specifiers (list)
;;  #:struct-name NAME   -- for "struct NAME { ... }" (string)
;;  #:gperf-options OPT  -- additional options for gperf (string)
;;  #:pre-boilerplate S  -- gperf "%{ ... %}" declarations (symbol)
;;  #:post-boilerplate S -- mostly direct C inclusion (symbol)
;;  #:subst (SUBSPEC...) -- compute some of the boilerplate dynamically
;;
;; For #:one-custom and #:acc-custom, THUNK is evaluated in the #:init
;; environment.  If #:one-custom is specified, THUNK should return an
;; entry or #f when there are no more entries.  Otherwise, if #:acc-custom
;; is specified, THUNK should return a list of entries.  Otherwise, the
;; default is to scan #:infile (in #:region if specified) for #:regexp,
;; and each entry is match data as returned by `regexp-exec'.
;;
;; SPEC is a list of struct-member specifiers, each a list of the form:
;;
;;   (MATCH C-TYPE-COMPONENT-1 [C-TYPE-COMPONENT-2 ...] C-VAR-NAME)
;;
;; MATCH can be a number to specify a subexpression of REGEXP to use for
;; static-data initialization, a string to be used for same (note that
;; to indicate a C string, you need to escape its double-quotes), or #f
;; to indicate no initialization.  If MATCH is a negative number N, the
;; the init data is formed by taking the subexpression `(abs N)' of
;; REGEXP and formatting it with `~S', thus quoting it.  Note: Numerical
;; MATCH is only useful when the entries are match data.  If MATCH is a
;; symbol, the procedure it names (in the #:init) is applied to the
;; entry to returned the desired output.
;;
;; MATCH can also be a list of the form:
;;
;;   (FORMATTER ACCESS [REST...])
;;
;; In this case, a value V is extracted by `(apply ACCESS entry REST)'
;; and furthermore formatted depending on the FORMATTER keyword, one of:
;;
;;   #:~A          -- Use `(simple-format #f "~A" V)'.
;;   #:~S          -- Use `(simple-format #f "~S" V)'.
;;   #:SCM_MAKINUM -- Use `(simple-format #f "SCM_MAKINUM (~A)" V)'.
;;
;; ACCESS and REST are evaluated in the #:init environment.  Per info node
;; "(gperf)User-supplied Struct", the first element of SPEC must be:
;;
;;   (MATCH char * name)
;;
;; This restriction may be lifted in the future, as fspec2c is enhanced
;; to interoperate better with those features of gperf that allow
;; customization of this struct element.  Specifying `#:key-match-num N'
;; is the same as specifying `-N' for MATCH.
;;
;; The #:pre-boilerplate and #:post-boilerplate data elements are
;; symbols rather than strings, in order to minimize quoting headaches.
;; Such symbols have the syntax: #{ TEXT }# where TEXT can include
;; anything (including spaces, newlines, and quote characters) except
;; the closing curly-brace-hash token.  The only other stipulation is
;; that backslashes MUST be doubled.  Boilerplate text is processed
;; by #:subst specifications (see below) and then passed more or less
;; straight-through by both fspec2c and gperf; errors in the code will
;; only be flagged during compilation.
;;
;; The #:subst keyword takes a list of "substitution specification"
;; forms, the general form of these being `("TAG" BODY...)', where "TAG"
;; is a string and BODY is a series of Scheme expressions.  In the
;; boilerplate (both pre and post), regions of text that look like:
;;
;;   /*@TAG@*/        (NOTE: no double-quotes)
;;
;; are deleted.  The BODY expressions are evaluated in the #:init
;; environment and their output (data sent to the current-output-port)
;; is used to replace the deleted text.  Two convenience procs are
;; pre-defined in the #:init environment to facilitate formatted output:
;;
;;   (fs s . args)  -- Send format-string `s' to the current output port,
;;                     replacing `~A' with the `display', and `~S' with
;;                     the `write' representation, respectively, of `args'.
;;   (fln s . args) -- Like `fs' but also send a newline afterwards.
;;
;; As a special shorthand notation, if SUBSPEC is a single symbol VAR, it
;; is taken to be equivalent to: `("VAR" (fs "~S" VAR))'.  This is useful
;; for concisely splicing in Scheme variable values, a common occurance.

;;; Code:

(define-module (scripts fspec2c)
  #:use-module (scripts PROGRAM)
  #:use-module (ice-9 rdelim)
  #:use-module (ice-9 regex)
  #:autoload (ice-9 editing-buffer) (editing-buffer)
  #:autoload (ice-9 popen) (open-output-pipe))

  ;;; Do this here so that the first form in the file is the
  ;;; `define-module' for the file.
  (define-module (scripts fspec2c user)
    #:use-module ((ice-9 gumm) #:select (eval-in-current-module-proc))
    #:use-module (ice-9 regex))
  (define-public fspec2c-user-eval (eval-in-current-module-proc))
  (define (fs s . args) (apply simple-format #t s args))
  (define (fln s . args) (apply fs s args) (newline))

  ;;; Ideally, we could use something like:
  ;;; + (define-module (scripts fspec2c user)
  ;;; +   #:use-module (ice-9 regex)
  ;;; +   #:eval-in-current-module-proc (fspec2c-user-eval)
  ;;; +   #:export (fspec2c-user-eval))

(define-module (scripts fspec2c)
  #:use-module (scripts fspec2c user))

(define (spec-proc filename)
  (let ((info (read (open-input-file filename))))
    (lambda (kw)
      (and=> (memq kw info) cadr))))

(define (scan-file filename spec)
  (let* ((rv '())
         (p (open-input-file filename))
         (rx (make-regexp (spec #:regexp)))
         (beg-rx (and=> (and=> (spec #:region) car) make-regexp))
         (end-rx (and=> (and=> (spec #:region) cdr) make-regexp))
         (in? #f))
    (let loop ((line (read-line p)))
      (or (eof-object? line)
          (number? in?)
          (begin
            (cond ((and end-rx in? (regexp-exec end-rx line))
                   (set! in? 0))
                  ((and beg-rx (not in?) (regexp-exec beg-rx line))
                   (set! in? #t))
                  ((and (or (and beg-rx in?)
                            (not beg-rx))
                        (regexp-exec rx line))
                   => (lambda (m)
                        (set! rv (cons m rv)))))
            (loop (read-line p)))))
    (close-port p)
    rv))

(define (render init)
  (cond ((pair? init)
         (let* ((access (fspec2c-user-eval (cadr init)))
                (rest (map fspec2c-user-eval (cddr init)))
                (extract (lambda (ent)
                           (apply access ent rest))))
           (case (car init)
             ((#:~A)
              (lambda (ent)
                (simple-format #f "~A" (extract ent))))
             ((#:~S)
              (lambda (ent)
                (simple-format #f "~S" (extract ent))))
             ((#:SCM_MAKINUM)
              (lambda (ent)
                (simple-format #f "SCM_MAKINUM (~A)" (extract ent))))
             (else (error "bad init:" init)))))
        ((symbol? init)
         (fspec2c-user-eval init))
        ((number? init)
         (if (positive? init)
             (lambda (m)
               (match:substring m init))
             (let ((n-init (- init)))
               (lambda (m)
                 (simple-format
                  #f "~S"
                  (match:substring m n-init))))))
        ((string? init)
         (lambda (m) init))
        (else
         (error "bad init:" init))))

(define (specs->munge-proc specs)
  (let ((work (map (lambda (spec)
                     (and (symbol? spec)
                          (set! spec `(,(symbol->string spec)
                                       (fs "~S" ,spec))))
                     (cons (simple-format #f "/*@~A@*/" (car spec))
                           (fspec2c-user-eval `(lambda ()
                                                 ,@(cdr spec)))))
                   specs)))
    (lambda (s)                         ; rv
      (editing-buffer s
        (for-each (lambda (w)
                    (goto-char (point-min))
                    (while (search-forward (car w) (point-max) #t)
                           (replace-match (with-output-to-string (cdr w)))))
                  work)
        (buffer-string)))))

(define (feed-gperf ofile no-cgen? spec gperf-input)
  (let* ((kw-num (spec #:key-match-num))
         (struct (or (spec #:struct) '()))
         (op (if no-cgen?
                 (if (string=? "-" ofile)
                     (current-output-port)
                     (open-output-file ofile))
                 (open-output-pipe
                  (simple-format #f "gperf~A~A --output-file=~A"
                                 (if (null? struct) "" " -t")
                                 (cond ((spec #:gperf-options)
                                        => (lambda (o) (simple-format #f " ~A" o)))
                                       (else ""))
                                 ofile))))
         (munge (cond ((spec #:subst) => specs->munge-proc)
                      (else identity))))
    (cond ((spec #:pre-boilerplate)
           => (lambda (x)
                (display "%{\n" op)
                (display (munge (symbol->string x)) op)
                (display "\n%}\n" op))))
    (cond ((null? struct))
          (else
           (simple-format op "struct ~A {" (or (spec #:struct-name)
                                               "randomstructname"))
           (for-each (lambda (x)
                       (for-each (lambda (xx)
                                   (simple-format op " ~A" xx))
                                 (cdr x))
                       (simple-format op ";"))
                     struct)
           (simple-format op " };\n%%\n")))
    (let* ((raw (map (lambda (struct-spec)
                       (and=> (car struct-spec) render))
                     struct))
           (out (map (lambda (proc)
                       (if proc
                           (lambda (m) (simple-format op ", ~A" (proc m)))
                           (lambda (m) #f)))
                     raw)))
      (for-each (lambda (m)
                  (simple-format op "~A" ((car raw) m))
                  (or (null? struct)
                      (for-each (lambda (proc) (proc m))
                                (cdr out)))
                  (newline op))
                gperf-input))
    (cond ((spec #:post-boilerplate)
           => (lambda (x)
                (display "%%\n" op)
                (display (munge (symbol->string x)) op)
                (newline op))))
    (if no-cgen?
        (or (eq? op (current-output-port))
            (close-port op))
        (close-pipe op))
    (or (string=? "-" ofile)
        (file-exists? ofile)
        (error "Problem with gperf, output file not written"))))

(define (fspec2c/qop qop)
  (let ((spec (spec-proc (let ((args (qop '())))
                           (or (and (pair? args) (car args))
                               (error "no input specified"))))))
    (and=> (spec #:init)
           (lambda (forms)
             (for-each fspec2c-user-eval forms)))
    (feed-gperf (or (qop 'output) "-")
                (qop 'no-cgen)
                spec
                (cond ((spec #:one-custom)
                       => (lambda (pname)
                            (let ((next (fspec2c-user-eval pname)))
                              ;; fixme: use stream
                              (let loop ((v (next)) (acc '()))
                                (if v
                                    (loop (next) (cons v acc))
                                    acc)))))
                      ((spec #:acc-custom)
                       => (lambda (pname)
                            ((fspec2c-user-eval pname))))
                      (else
                       (scan-file (in-vicinity
                                   (or (qop 'include) "/usr/include")
                                   (spec #:infile))
                                  spec)))))
  #t)

(define (main args)
  (HVQC-MAIN args fspec2c/qop
             '(usage . commentary)
             '(package . "Guile")
             '(version . "1.2")
             ;; 1.0 -- first release
             ;; 1.1 -- close-pipe bugfix; support string init; support #:region
             ;; 1.2 -- new support: init, custom scanning
             '(option-spec (output  (single-char #\o) (value #t))
                           (include (single-char #\I) (value #t))
                           (no-cgen (single-char #\n)))))

;;; fspec2c ends here
