;;;; -*- Mode: Lisp; Base: 10; Syntax: ANSI-Common-Lisp; Package: kira; Coding: utf-8 -*-
;;;; Copyright © 2018 David Mullen. All Rights Reserved. Origin: <https://cl-pdx.com/kira/>

(in-package :kira)

(defstatic *registered-markup-table*
  (make-hash-table :test 'equalp))

(defstruct markup name tag)
(defstruct (inline-markup (:include markup)))
(defstruct (block-markup (:include markup)))

(defmacro define-inline-markup (name . tag)
  `(setf (gethash (symbol-name ',name) *registered-markup-table*)
         (make-inline-markup :name ',name :tag ',(or tag `(,name)))))

(define-inline-markup
  :a :a (:href uri))

(define-inline-markup :b)
(define-inline-markup :cite)
(define-inline-markup :code)
(define-inline-markup :del)
(define-inline-markup :em)
(define-inline-markup :i)
(define-inline-markup :ins)
(define-inline-markup :kbd)
(define-inline-markup :mark)
(define-inline-markup :q)
(define-inline-markup :s)
(define-inline-markup :samp)
(define-inline-markup :small)
(define-inline-markup :strike)
(define-inline-markup :strong)
(define-inline-markup :sub)
(define-inline-markup :sup)
(define-inline-markup :tt)
(define-inline-markup :var)

(define-inline-markup
  :img :img (:src uri))

(define-inline-markup :time
  :time (:datetime local-time))

(defun sanitize-attribute (value type)
  (unless value (return-from sanitize-attribute "empty"))
  (when (and (eq type 'uri) (stringp (uri-scheme value)))
    (return-from sanitize-attribute "#uri-scheme-restricted"))
  (grab (:attribute value)))

(defun clean-markup-tag (tag arguments)
  (loop with (name . attributes) = tag
        for argument in arguments
        for (attribute-name attribute-type) in attributes
        for value = (sanitize-attribute argument attribute-type)
        nconc `(,attribute-name ,value) into sanitized-attributes
        finally (return (values name sanitized-attributes))))

(defmacro define-block-markup (name . tag)
  `(setf (gethash ,(symbol-name name) *registered-markup-table*)
         (make-block-markup :name ',name :tag ',(or tag `(,name)))))

(define-block-markup :blockquote)
(define-block-markup :bq :blockquote)
(define-block-markup :h1)
(define-block-markup :h2)
(define-block-markup :h3)
(define-block-markup :h4)
(define-block-markup :h5)
(define-block-markup :h6)
(define-block-markup :pre)

(defmacro get-balanced-token (&rest additional-stop-characters)
  `(loop with open-count fixnum = 0 and start fixnum = $lexer-position
         do (lexer-skip (not (or whitespace "()<>{}" ,@additional-stop-characters)))
            (cond ((lexer-match #\() (incf open-count))
                  ((and (> open-count 0) (lexer-match #\))) (decf open-count))
                  (t (return (when (> $lexer-position start) ; Have token?
                               (subseq $lexer-string start $lexer-position)))))))

(defmacro get-markup-tag (markup)
  `(loop with tag = (markup-tag ,markup)
         with count = (length (rest tag))
         for i fixnum from 0 below count
         do (lexer-skip whitespace)
         collecting (get-balanced-token) into arguments
         finally (return (clean-markup-tag tag arguments))))

(defmacro print-text (pending-break plain-text-start)
  ;; The pending break applies to text and inline elements, but not block-level elements.
  `(progn (when ,pending-break (start-tag ,pending-break nil) (setq ,pending-break nil))
          (when (and ,plain-text-start (not (eq ,plain-text-start :print-pending-break-only)))
            (write-string $lexer-string stream :start ,plain-text-start :end $lexer-position)
            (setq ,plain-text-start $lexer-position))))

(defmacro start-markup-tag (operator-name &optional (op 'list))
  `(whereas ((markup (gethash ,operator-name *registered-markup-table*)))
     (multiple-value-bind (name attributes) (get-markup-tag markup)
       (typecase markup
         (block-markup (setq pending-break nil))
         (t (print-text pending-break :print-pending-break-only)))
       (push (,op (start-tag name attributes)) stack)
       (prog1 name (lexer-skip horizontal-whitespace)))))

(deftype operator ()
  "Paragraph or bracketed extent."
  '(or keyword (cons keyword null)))

(defun end-markup-tag (operator)
  (cond ((keywordp operator) (end-tag operator))
        ((consp operator) (end-tag (car operator)))))

(defun verbatim-mode-p (stack)
  (loop for operator in stack
        for name = (parse-html-tag operator)
        thereis (member name '(:pre :code))))

(defun %get-li-marker (string start end)
  (declare (type array-index start end))
  (declare (optimize (safety 0) (speed 3)))
  (let ((li-marker (make-string (- end start))))
    (loop for i of-type array-index from start below end
          for j of-type array-index = 0 then (1+ j) finally (return li-marker)
          do (setf (schar li-marker j) (if (eql (schar string i) #\#) #\# #\-)))))

(defmacro match-li-marker ()
  (with-gensyms (marker-start marker-end)
    `(lexer-advance-if (let ((,marker-start (lexer-skip "-*+#")))
                         (declare (type array-index ,marker-start))
                         (when (> $lexer-position ,marker-start)
                           (let ((,marker-end $lexer-position))
                             (declare (type array-index ,marker-end))
                             (lexer-skip horizontal-whitespace)
                             (when (> $lexer-position ,marker-end)
                               (%get-li-marker $lexer-string
                                               ,marker-start
                                               ,marker-end))))))))

(defun compute-li-level (stack)
  (loop with level = '() for operator in stack
        if (eq operator :ul) do (push #\- level)
        else if (eq operator :ol) do (push #\# level)
        finally (return (coerce level 'simple-string))))

(defmacro %l-close (old-n old-level mismatch)
  `(loop for i of-type array-index downfrom (1- ,old-n) downto ,mismatch
         do (whereas ((tail (member (if (eql (schar ,old-level i) #\-) :ul :ol) stack)))
              (end-tag (first tail)) (setq stack (nconc (ldiff stack tail) (rest tail))))))

(defmacro %l-open (new-n new-level mismatch)
  `(loop for i of-type array-index from ,mismatch below ,new-n
         for tag = (if (eql (schar ,new-level i) #\-) :ul :ol)
         do (push (start-tag tag nil) stack)))

(defmacro %update-list-level (old-level new-level mismatch)
  `(let ((old-n (length ,old-level)) (new-n (length ,new-level)))
     (declare (type array-index old-n) (type array-index new-n))
     (when (<= new-n old-n) (%l-close old-n ,old-level ,mismatch))
     (when (>= new-n old-n) (%l-open new-n ,new-level ,mismatch))))

(defmacro pop-inside-list (stack)
  (with-gensyms (operator st tail)
    `(loop for ,st on ,stack as (,operator . ,tail) = ,st
           until (or (eq ,operator :ul) (eq ,operator :ol))
           when (keywordp ,operator) do (end-markup-tag ,operator)
           and do (setq ,stack (nconc (ldiff ,stack ,st) ,tail))
           finally (return ,stack))))

(defun render-text (string &optional inline-text-p)
  (with-lexer ((or string (return-from render-text)))
    (loop with stream = *html-output* and li-marker = (match-li-marker)
          with pending-break = (if (or inline-text-p li-marker) nil :p)
          with stack = '() for scheme = nil as token = nil
          for plain-text-start of-type array-index = $lexer-position
          for verbatim-mode-p = nil then (verbatim-mode-p stack)
          do (unless verbatim-mode-p
               (when li-marker
                 (let ((old-level (compute-li-level stack)))
                   (whereas ((mismatch (mismatch li-marker old-level)))
                     (%update-list-level old-level li-marker mismatch)
                     (when (eql mismatch 0) (setq pending-break nil)))
                   (start-tag :li nil)
                   (setq li-marker nil)
                   (if (and (eq pending-break :p) (plusp (length old-level)))
                       (print-text pending-break :print-pending-break-only)
                       (setq pending-break nil))))
               (lexer-advance-if
                (setq scheme (match-symbol (:http :https :ftp) :terminated-by #\:))
                (lexer-match #\/) (lexer-match #\/) (setq token (get-balanced-token)))
               (whereas ((full-link (if token (format nil "~(~A~)://~A" scheme token))))
                 (print-text pending-break :print-pending-break-only)
                 (setq plain-text-start $lexer-position)
                 (html ((:a :href full-link) full-link))))
             (lexer-skip (not (or "\\()<>&:" whitespace)))
             (when (< plain-text-start $lexer-position)
               (print-text pending-break plain-text-start))
             (lexer-case (char (loop-finish))
               (#\Newline (let* ((start (1- (the array-index (lexer-skip #\Newline))))
                                 (line-break-p (eql (- $lexer-position start) 1)))
                            (declare (type array-index start) (type boolean line-break-p))
                            (when (and stack (not line-break-p)) ; UL/OL backstops auto-closing.
                              (setq verbatim-mode-p (verbatim-mode-p (pop-inside-list stack))))
                            (setq li-marker (if verbatim-mode-p nil (match-li-marker)))
                            (unless (or li-marker line-break-p)
                              (loop for operator in stack if (keywordp operator) do (end-markup-tag operator))
                              (setq verbatim-mode-p (verbatim-mode-p (setq stack (delete-if #'keywordp stack)))))
                            (cond ((not verbatim-mode-p) (setq pending-break (if line-break-p :br :p)))
                                  ((write-string $lexer-string stream :start start :end $lexer-position)))))
               (#\< (print-text pending-break :print-pending-break-only) (write-string "&lt;" stream))
               (#\> (print-text pending-break :print-pending-break-only) (write-string "&gt;" stream))
               (#\& (print-text pending-break :print-pending-break-only) (write-string "&amp;" stream))
               (whitespace (print-text pending-break plain-text-start))
               (#\) (or (loop for (operator . tail) on stack
                              if (typep operator '(or cons (eql #\()))
                              do (setq stack (nconc stack-front tail))
                                 (end-markup-tag operator)
                                 (return (consp operator))
                              ;; Remember the operators we bypassed.
                              else collect operator into stack-front)
                        ;; Plain-text parenthesis, which may or may
                        ;; not have had its counterpart on the stack.
                        (print-text pending-break plain-text-start)))
               ;; Operator with explicit (bracketed) extent.
               (#\( (let ((operator-name (and (lexer-match #\:) (get-balanced-token #\&))))
                      (or (and operator-name (not verbatim-mode-p) (start-markup-tag operator-name))
                          (progn (print-text pending-break plain-text-start) (push #\( stack)))))
               ;; Operator with implicit (paragraph) extent.
               (#\: (let ((operator-name (get-balanced-token #\&)))
                      (or (when (and operator-name (not verbatim-mode-p))
                            (start-markup-tag operator-name prog1))
                          (print-text pending-break plain-text-start))))
               ;; Potential escape sequence (only for special characters).
               (#\\ (print-text pending-break :print-pending-break-only)
                    (write-char (or (lexer-match "\\:()") #\\) stream)))
          ;; Any open tags get closed automatically.
          finally (mapc #'end-markup-tag stack))))