From: Damien Kick
Subject: Fun with DCGs in Allegro
Date: 
Message-ID: <AqWji.4762$rR.1331@newsread2.news.pas.earthlink.net>
And so of course Norvig is much smarter than I am 
<http://tinyurl.com/2yapyj>.  But he did leave an exercise for the 
reader in PAIP, Chapter 20, Exercise 20.1, "make-dcg violates one of the 
cardinal rules of macros.  What does it do wrong?  How would you fix it?"

(defmacro rule (head &optional (arrow ':-) &body body)
   (funcall (get arrow 'rule-function) head body))

(setf (get ':- 'rule-function)
       #'(lambda (head body) `(<- ,head ,body)))

(setf (get ':-- 'rule-function)
       #'(lambda (head body) `(<-- ,head ,body)))

(setf (get '--> 'rule-function)
       #'(lambda (head body) `(<- ,@(rewrite-dcg-term head body))))

(setf (get '---> 'rule-function)
       #'(lambda (head body) `(<-- ,@(rewrite-dcg-term head body))))

(defmacro ---> (head &body body)
   `(<-- ,@(rewrite-dcg-term head body)))

(defmacro --> (head &body body)
   `(<- ,@(rewrite-dcg-term head body)))

(defun dcg-normal-goal-p (x) (or (eq x '!) (eq (car x) :test)))

(defun dcg-word-list-p (x) (eq (car x) :word))

;; PAIP calls it make-dcg.
(defun rewrite-dcg-term (head body)
   (let* ((n (count-if (complement #'dcg-normal-goal-p) body))
          (symbols (use make-array (1+ n) :initial-contents
                        (loop repeat (1+ n) collect (gensym "?S/")))))
     (labels ((lemma (body n)
                (if (null body)
                    nil
                    (let ((goal (car body)))
                      (cond
                        ((eq goal '!) (cons '! (lemma (cdr body) n)))
                        ((dcg-normal-goal-p goal)
                         (append (cdr goal)
                                 (lemma (cdr body) n)))
                        ((dcg-word-list-p goal)
                         (cons ` (= ,(aref symbols n)
                                    (,@(cdr goal)
                                     ,@(aref symbols (1+ n))))
                               (lemma (cdr body) (1+ n))))
                        (t (cons (append goal
                                         (list (aref symbols n)
                                               (aref symbols (1+ n))))
                                 (lemma (cdr body) (1+ n)))))))))
       `((,@head ,(aref symbols 0) ,(aref symbols n))
         ,@(lemma body 0)))))

I love being able to weave back and forth between Lisp and Prolog.

(defvar *strict-eol* t
   "Whether or not to be strict about end-of-line being a carriage
return followed by a newline.")

;; Says that an MM3 message has a from, to, cc, subject, some other
;; MM3 headers, and a body.
(---> (mm3-message)
   (from) (to) (cc) (subject)
   (mm3-headers) (eol) (mm3-body) (eol))

;; From is the sequence "From: " followed by an address and
;; end-of-line.  The out of the box DCG only supports lists, as this
;; is what basic Prolog provides.  Expanding sexp-logic to allow for
;; "From: " to be a shorthand for (list #\F #\r #\o #\m #\:) would be
;; very easy.
(---> (from)
   (:word #\F #\r #\o #\m #\: #\Space) (address) (eol))

(---> (to)
   (:word #\T #\o #\: #\Space) (address) (eol))

;;; Cc can be either empty or "Cc: " follwed by an address and EOL.
(---> (cc))
(--> (cc)
   (:word #\C #\c \: #\Space) (address) (eol))

;;; Just use two fake addresses for this little demonstration.
(---> (address)
   (:word #\j #\o #\h #\n #\- #\d #\o #\e ··@ #\h #\e #\r #\e))
(--> (address)
   (:word #\j #\a #\n #\e #\- #\d #\o #\e ··@ #\t #\h #\e #\r #\e))

;; And a fake subject.
(---> (subject)
   (:word #\S #\u #\b #\j #\e #\c #\t #\: #\Space #\H #\e #\l #\l #\o)
   (eol))

;;; MM3 headers got to be a little more complicated to express the
;;; zero or more without any repeated headers.  Part of this could be
;;; my inexperience with DCGs, though.  There is probably a more
;;; succint way of expressing this.
(---> (mm3-headers))
(--> (mm3-headers)
   (mm3-headers-lemma () ?headers))
(---> (mm3-headers-lemma ?headers (?header . ?headers))
   (mm3-header ?header)
   (:test (not (member ?header ?headers))))
(--> (mm3-headers-lemma ?headers (?header/1 ?header/2 . ?headers))
   (mm3-headers-lemma ?headers (?header/1 . ?headers))
   (mm3-header ?header/2)
   (:test (not (= ?header/2 ?header/1)))
   (:test (not (member ?header/2 ?headers))))

;;; Just invent two fake headers for the purpose of demonstration.
(---> (mm3-header x-mms-foo)
   (:word #\X #\- #\m #\m #\s #\- #\f #\o #\o #\: #\Space #\b #\a #\r)
   (eol))
(--> (mm3-header x-mms-bar)
   (:word #\X #\- #\m #\m #\s #\- #\b #\a #\r #\: #\Space #\b #\a #\z)
   (eol))

;;; Blah blah blah, the body.
(---> (mm3-body)
   (:word #\b #\l #\a #\h #\Space #\b #\l #\a #\h #\Space #\b #\l #\a #\h))

;;; And the end-of-line for an example of restricting or expanding on
;;; the allowed "sentences".  Only allow a blank newline when we are
;;; not being strict about end-of-line.
(---> (eol)
   (:word #\Return #\Linefeed))
(--> (eol)
   (:test (lispp (not *strict-eol*)))
   (:word #\Newline))

;;; A quick example of having lisp call out into the prolog and the
;;; prolog calling out into the lisp.
(defgeneric mm3-message-p (message))

(defmethod mm3-message-p ((message string))
   (mm3-message-p (coerce message 'list)))

(defmethod mm3-message-p ((message list))
   (prolog
     (lisp ?message message)
     (mm3-message ?message ())
     (lisp (return-from mm3-message-p ?message))))

(defun valid-message ()
   (use concatenate 'string
        "From: ········@here" (eol) "To: ········@there" (eol)
        "Subject: Hello" (eol) (eol) "blah blah blah" (eol)))

(defun invalid-message ()
   (use concatenate 'string
        "Form: ········@here" (eol) "To: ········@there" (eol)
        "Subject: Hello" (eol) (eol) "blah blah blah" (eol)))

(defun eol ()
   (coerce (list #\Return #\Linefeed) 'string))

 From the REPL:

USER> (mm3-message-p (valid-message))
(#\F #\r #\o #\m #\: #\Space #\j #\o #\h #\n #\- #\d #\o #\e ··@ #\h
  #\e #\r #\e #\Return #\Newline #\T #\o #\: #\Space #\j #\a #\n #\e #\-
  #\d #\o #\e ··@ #\t #\h #\e #\r #\e #\Return #\Newline #\S #\u #\b #\j
  #\e #\c #\t #\: #\Space #\H #\e #\l #\l #\o #\Return #\Newline
  #\Return #\Newline #\b #\l #\a #\h #\Space #\b #\l #\a #\h #\Space #\b
  #\l #\a #\h #\Return #\Newline)
USER> (mm3-message-p (invalid-message))
NIL
USER> (?- (mm3-message ?m ()))
?M = (F r o m :   j o h n - d o e @ h e r e

  T o :   j o h n - d o e @ h e r e

  S u b j e c t :   H e l l o



  b l a h   b l a h   b l a h

)

?M = (F r o m :   j o h n - d o e @ h e r e

  T o :   j o h n - d o e @ h e r e

  S u b j e c t :   H e l l o

  X - m m s - f o o :   b a r



  b l a h   b l a h   b l a h

)

?M = (F r o m :   j o h n - d o e @ h e r e

  T o :   j o h n - d o e @ h e r e

  S u b j e c t :   H e l l o

  X - m m s - b a r :   b a z



  b l a h   b l a h   b l a h

)

?M = (F r o m :   j o h n - d o e @ h e r e

  T o :   j o h n - d o e @ h e r e

  S u b j e c t :   H e l l o

  X - m m s - f o o :   b a r

  X - m m s - b a r :   b a z



  b l a h   b l a h   b l a h

).

No.
; No value
USER>

Now I just need to find an example of using DCGs with binary formats.