;;;; -*- Mode: Lisp; Base: 10; Syntax: ANSI-Common-Lisp; Package: kira; Coding: utf-8 -*-
;;;; Copyright © 2024 David Mullen. All Rights Reserved. Origin: <https://cl-pdx.com/kira/>

(in-package :kira)

(defstatic *registered-markup-table*
  (make-hash-table :test 'equalp))

(defstruct markup name tag)
(defstruct (inline-markup (:include markup)))
(defstruct (block-markup (:include markup)))
(define-char-class special () "\\:()[]|{}")

(defmacro define-inline-markup (name &optional tag)
  `(setf (gethash (symbol-name ',name) *registered-markup-table*)
         (make-inline-markup :name ',name :tag ',tag)))

(define-inline-markup :b)
(define-inline-markup :cite)
(define-inline-markup :code)
(define-inline-markup :del)
(define-inline-markup :em)
(define-inline-markup :hr)
(define-inline-markup :i)
(define-inline-markup :ins)
(define-inline-markup :kbd)
(define-inline-markup :mark)
(define-inline-markup :q)
(define-inline-markup :s)
(define-inline-markup :samp)
(define-inline-markup :small)
(define-inline-markup :strike)
(define-inline-markup :strong)
(define-inline-markup :sub)
(define-inline-markup :sup)
(define-inline-markup :tt)
(define-inline-markup :var)

(define-inline-markup :a (href))
(define-inline-markup :img (src . alt))
(define-inline-markup :time (datetime))

(defmacro define-block-markup (name &optional tag)
  `(setf (gethash ,(symbol-name name) *registered-markup-table*)
         (make-block-markup :name ',name :tag ',tag)))

(define-block-markup :blockquote)
(define-block-markup :bq :blockquote)
(define-block-markup :h1)
(define-block-markup :h2)
(define-block-markup :h3)
(define-block-markup :h4)
(define-block-markup :h5)
(define-block-markup :h6)
(define-block-markup :pre)
  
(defmacro get-balanced-token (&optional (class '(not whitespace)) (close #\)))
  (let ((open (or (whereas ((cl-pos (position close ")]}"))) (char "([{" cl-pos)) close)))
    `(loop with start of-type array-index = lexer-position initially (reset-buffer buffer)
           with open-count fixnum = 0 for char = (lexer-match (and ,class (not (or ,close #\\))))
           do (cond (char (vector-push-extend char buffer) (when (eql char ,open) (incf open-count)))
                    ((lexer-match #\\) (vector-push-extend (or (lexer-match special) #\\) buffer))
                    ((and (> open-count 0) (lexer-match ,close))
                     (vector-push-extend ,close buffer) (decf open-count))
                    ((return (when (> lexer-position start) buffer)))))))

(defun sanitize-attribute (string)
  (with-lexer ((or string (return-from sanitize-attribute "")))
    (grab (:when (match-symbol :javascript :terminated-by #\:) "noscript:")
          (escape-html lexer-string t :start lexer-position :end lexer-end))))

(defmacro parse-attribute (name value-form)
  `(let ((value (progn (lexer-skip whitespace) ,value-form)))
     (collect (ensure-keyword ,name) :into markup-attributes)
     (collect (sanitize-attribute value) :into markup-attributes)))

(defmacro get-markup-tag (markup)
  `(collecting (markup-attributes)
     (loop with tag do (setq tag (markup-tag ,markup)) until (listp tag)
           do (setq ,markup (gethash (symbol-name tag) *registered-markup-table*))
           finally (destructuring-bind (prefix-name . text-name) (or tag '(nil))
                     (when prefix-name (parse-attribute prefix-name (get-balanced-token)))
                     (when text-name (parse-attribute text-name (get-balanced-token t)))
                     (return (values (markup-name ,markup) markup-attributes))))))

(defmacro print-text (pending-break plain-text-start)
  ;; The pending break applies to text and inline elements, but not block-level elements.
  `(progn (when ,pending-break (start-tag ,pending-break nil) (setq ,pending-break nil))
          (when (and ,plain-text-start (not (eq ,plain-text-start :print-pending-break-only)))
            (write-string lexer-string stream :start ,plain-text-start :end lexer-position)
            (setq ,plain-text-start lexer-position))))

(defmacro start-markup-tag (operator-name &optional (op 'list))
  `(whereas ((markup (gethash ,operator-name *registered-markup-table*)))
     (multiple-value-bind (name attributes) (get-markup-tag markup)
       (typecase markup
         (block-markup (setq pending-break nil))
         (t (print-text pending-break :print-pending-break-only)))
       (push (,op (start-tag name attributes)) stack)
       (prog1 name (lexer-skip horizontal-whitespace)))))

(deftype operator ()
  "Paragraph or bracketed extent."
  '(or keyword (cons keyword null)))

(defun end-markup-tag (operator)
  (cond ((keywordp operator) (end-tag operator))
        ((consp operator) (end-tag (car operator)))))

(defun verbatim-mode-p (stack)
  (loop for operator in stack
        for name = (parse-html-tag operator)
        thereis (member name '(:pre :code))))

(defmacro match-li-marker ()
  (with-gensyms (marker-start marker-end)
    `(lexer-advance-if (let ((,marker-start (lexer-skip "*#")))
                         (declare (type array-index ,marker-start))
                         (when (> lexer-position ,marker-start)
                           (let ((,marker-end lexer-position))
                             (declare (type array-index ,marker-end))
                             (lexer-skip horizontal-whitespace)
                             (when (> lexer-position ,marker-end)
                               (subseq lexer-string
                                       ,marker-start
                                       ,marker-end))))))))

(defun compute-li-level (stack)
  (loop with level = '() for operator in stack
        if (eq operator :ul) do (push #\* level)
        else if (eq operator :ol) do (push #\# level)
        finally (return (coerce level 'simple-string))))

(defmacro %l-close (old-n old-level mismatch)
  `(loop for i of-type array-index downfrom (1- ,old-n) downto ,mismatch
         do (whereas ((tail (member (if (eql (schar ,old-level i) #\*) :ul :ol) stack)))
              (end-tag (first tail)) (setq stack (nconc (ldiff stack tail) (rest tail))))))

(defmacro %l-open (new-n new-level mismatch)
  `(loop for i of-type array-index from ,mismatch below ,new-n
         for tag = (if (eql (schar ,new-level i) #\*) :ul :ol)
         do (push (start-tag tag nil) stack)))

(defmacro %update-list-level (old-level new-level mismatch)
  `(let ((old-n (length ,old-level)) (new-n (length ,new-level)))
     (declare (type array-index old-n) (type array-index new-n))
     (when (<= new-n old-n) (%l-close old-n ,old-level ,mismatch))
     (when (>= new-n old-n) (%l-open new-n ,new-level ,mismatch))))

(defmacro pop-inside-list (stack)
  (with-gensyms (operator st tail)
    `(loop for ,st on ,stack as (,operator . ,tail) = ,st
           until (or (eq ,operator :ul) (eq ,operator :ol))
           when (keywordp ,operator) do (end-markup-tag ,operator)
           and do (setq ,stack (nconc (ldiff ,stack ,st) ,tail))
           finally (return ,stack))))

(defmacro escape-link (&optional in-attribute-p (start 'plain-text-start))
  `(escape-html lexer-string ,in-attribute-p :start ,start :end lexer-position))

(defun render-text (string &optional inline-text-p)
  (with-lexer ((or string (return-from render-text)))
    (loop with buffer = (make-buffer 80 'character)
          with stream = *html-output* and li-marker = (match-li-marker)
          with pending-break = (if (or inline-text-p li-marker) nil :p)
          with page-name and linked-object and stack = '()
          for plain-text-start of-type array-index = lexer-position
          for verbatim-mode-p = nil then (verbatim-mode-p stack)
          do (unless verbatim-mode-p
               (when li-marker
                 (let ((old-level (compute-li-level stack)))
                   (whereas ((mismatch (mismatch li-marker old-level)))
                     (%update-list-level old-level li-marker mismatch)
                     (when (eql mismatch 0) (setq pending-break nil)))
                   (start-tag :li nil)
                   (setq li-marker nil)
                   (if (and (eq pending-break :p) (plusp (length old-level)))
                       (print-text pending-break :print-pending-break-only)
                       (setq pending-break nil))))
               (lexer-advance-if
                (match-symbol (:http :https :ftp) :terminated-by #\:)
                (lexer-match #\/) (lexer-match #\/) (get-balanced-token)
                (progn (print-text pending-break :print-pending-break-only)
                       (html ((:a :href (escape-link t)) (escape-link)))
                       (setq plain-text-start lexer-position))))
             (lexer-skip (not (or "\\()[<>&:" whitespace)))
             (when (< plain-text-start lexer-position)
               (print-text pending-break plain-text-start))
             (lexer-case (char (loop-finish))
               (#\Newline (let* ((start (1- (the array-index (lexer-skip #\Newline))))
                                 (line-break-p (eql (- lexer-position start) 1)))
                            (declare (type array-index start) (type boolean line-break-p))
                            (when (and stack (not line-break-p)) ; UL/OL backstops auto-closing.
                              (setq verbatim-mode-p (verbatim-mode-p (pop-inside-list stack))))
                            (setq li-marker (if verbatim-mode-p nil (match-li-marker)))
                            (unless (or li-marker line-break-p)
                              (loop for operator in stack if (keywordp operator) do (end-markup-tag operator))
                              (setq verbatim-mode-p (verbatim-mode-p (setq stack (delete-if #'keywordp stack)))))
                            (cond ((not verbatim-mode-p) (setq pending-break (if line-break-p :br :p)))
                                  (t (write-string lexer-string stream :start start :end lexer-position)))))
               (#\< (print-text pending-break :print-pending-break-only) (write-string "&lt;" stream))
               (#\> (print-text pending-break :print-pending-break-only) (write-string "&gt;" stream))
               (#\& (print-text pending-break :print-pending-break-only) (write-string "&amp;" stream))
               (whitespace (print-text pending-break plain-text-start))
               (#\) (or (loop for (operator . tail) on stack
                              if (typep operator '(or cons (eql #\()))
                              do (setq stack (nconc stack-front tail))
                                 (end-markup-tag operator)
                                 (return (consp operator))
                              ;; Remember the operators we bypassed.
                              else collect operator into stack-front)
                        ;; Plain-text parenthesis, which may or may
                        ;; not have had its counterpart on the stack.
                        (print-text pending-break plain-text-start)))
               ;; Operator with explicit (bracketed) extent.
               (#\( (let ((operator-name (and (lexer-match #\:) (get-balanced-token alphanumeric))))
                      (or (and operator-name (not verbatim-mode-p) (start-markup-tag operator-name))
                          (progn (print-text pending-break plain-text-start) (push #\( stack)))))
               ;; Operator with implicit (paragraph) extent.
               (#\: (let ((operator-name (get-balanced-token alphanumeric)))
                      (or (when (and operator-name (not verbatim-mode-p))
                            (start-markup-tag operator-name prog1))
                          (print-text pending-break plain-text-start))))
               ;; Wiki-style bracketed link.
               (#\[ (or (lexer-advance-if
                         (lexer-match #\[)
                         (setq page-name (and (not verbatim-mode-p) (get-balanced-token (not #\|) #\])))
                         (setq linked-object (find-object (collapse (setq page-name (copy-seq page-name)))))
                         (let ((page-title (or (and (lexer-match #\|) (get-balanced-token t #\])) page-name)))
                           (when (lexer-match #\] #\])
                             (print-text pending-break :print-pending-break-only)
                             (html ((:a :href (uri-to linked-object)) page-title)))))
                        (print-text pending-break plain-text-start)))
               ;; Potential escape sequence (only for special characters).
               (#\\ (print-text pending-break :print-pending-break-only)
                    (write-char (or (lexer-match special) #\\) stream)))
          ;; Any open tags get closed automatically.
          finally (mapc #'end-markup-tag stack))))

(defmacro %push-line (end-of-line &aux (extent `(:start start :end ,end-of-line)))
  `(vector-push-extend (make-indirect-vector lexer-string ,@extent) lines))

(defun get-lines (string)
  (with-lexer (string)
    (loop with length of-type fixnum = (- lexer-end lexer-start)
          with lines = (make-buffer (floor length 50) 'string)
          with start of-type array-index = lexer-start
          for char = (or (lexer-match t) (loop-finish))
          when (eql char #\Newline)
          do (if (= lexer-position (1+ start))
                 (vector-push-extend "" lines)
                 (%push-line (1- lexer-position)))
             ;; Start the next line.
             (setq start lexer-position)
          finally (unless (= start lexer-end)
                    (%push-line lexer-end))
                  (return lines))))

(defun escape-lines (lines stream &key (start 0) end)
  (with-array-data ((lines lines) (start start) (end end))
    (loop for i of-type array-index from start below end
          do (escape-html (svref lines i)) (terpri stream))))

(defun %compute-lcs-lengths (a b &optional (n (length b)))
  (declare (optimize (safety 0) (speed 3)) (type fixnum n))
  (loop with curr = (make-array (1+ n) :initial-element 0)
        with prev = (make-array (1+ n) :initial-element 0)
        for index of-type array-index below (length a)
        for item = (aref a index) finally (return curr)
        do (replace prev curr)
           (loop for j of-type array-index from 1
                 for i of-type array-index below n
                 do (setf (svref curr j) (if (equal item (aref b i))
                                             (1+ (the array-index (svref prev i)))
                                             (max (the array-index (svref curr i))
                                                  (the array-index (svref prev j))))))))

(defun hirschberg-lcs (a b &optional (m (length a)) (n (length b)))
  (cond ((<= m 1) (if (and (eql m 1) (find (aref a 0) b :test #'equal)) a #()))
        (t (let* ((i (ash m -1)) (a-left (subseq a 0 i)) (a-right (subseq a i)))
             (loop with left-table = (%compute-lcs-lengths a-left b) and max fixnum = -1
                   with right-table = (%compute-lcs-lengths (reverse a-right) (reverse b))
                   with k of-type array-index = 0 for j of-type array-index from 0 to n
                   for score fixnum = (+ (svref left-table j) (svref right-table (- n j)))
                   if (> score max) do (setq max score k j) ; Keep track of best split.
                   finally (return (let ((b-left (subseq b 0 k)) (b-right (subseq b k))
                                         (o (- (the array-index m) (the array-index i)))
                                         (p (- (the array-index n) (the array-index k))))
                                     (declare (type fixnum o p))
                                     (concatenate 'simple-vector
                                                  (hirschberg-lcs a-left  b-left  i k)
                                                  (hirschberg-lcs a-right b-right o p)))))))))

(defun render-diff (new old)
  (setq new (get-lines (or new "")))
  (setq old (get-lines (or old "")))
  (loop with stream = *html-output*
        with lcs = (hirschberg-lcs new old)
        with new-start of-type array-index = 0
        with old-start of-type array-index = 0
        with lcs-start of-type array-index = 0
        for lcs-element across lcs
        for new-end = (position lcs-element new :start new-start :test #'equal)
        for old-end = (position lcs-element old :start old-start :test #'equal)
        for lcs-end of-type array-index from 0
        do (unless (and (= old-end old-start) (= new-end new-start))
             (escape-lines lcs stream :start lcs-start :end lcs-end)
             (setq lcs-start lcs-end))
           (when (> old-end old-start) (html (:del (escape-lines old stream :start old-start :end old-end))))
           (when (> new-end new-start) (html (:ins (escape-lines new stream :start new-start :end new-end))))
           (setq new-start (1+ (the array-index new-end)) old-start (1+ (the array-index old-end)))
        finally (escape-lines lcs stream :start lcs-start)
                (when (> (length old) old-start) (html (:del (escape-lines old stream :start old-start))))
                (when (> (length new) new-start) (html (:ins (escape-lines new stream :start new-start))))))