;;;; -*- Mode: Lisp; Base: 10; Syntax: ANSI-Common-Lisp; Package: kira; Coding: utf-8 -*-
;;;; Copyright © 2024 David Mullen. All Rights Reserved. Origin: <https://cl-pdx.com/kira/>

(in-package :kira)

(defstruct* (uri (:conc-name %uri-))
  scheme authority user host port
  path segments query alist fragment
  string)

;;; RFC 2396 included the exclamation point, asterisk,
;;; single-quote, and parentheses in the 'unreserved' set.
;;; RFC 3986 designated those characters sub-delims instead.

(define-char-class gen-delims () ":/?#[]@")
(define-char-class sub-delims () "!$&'()*+,;=")
(define-char-class reserved () '(or gen-delims sub-delims))
(define-char-class unreserved () '(or alphanumeric "-._~"))

(defun uri-scheme (uri)
  (%uri-scheme (uri uri)))

(defun uri-authority (uri)
  (orf (%uri-authority (setq uri (uri uri)))
       (let ((user (%uri-user uri)) (port (%uri-port uri)))
         (unless (or user port) (return-from uri-authority (%uri-host uri)))
         (format nil "~@[~A@~]~@[~A~]~@[:~D~]" user (%uri-host uri) port))))

(defun %authority (uri)
  "Parse URI authority if it has not been parsed."
  (when (%uri-host uri) (return-from %authority))
  (whereas ((authority (%uri-authority uri)))
    (setf (values (%uri-user uri)
                  (%uri-host uri)
                  (%uri-port uri))
          (parse-authority authority))))

(defun uri-user (uri)
  (or (%uri-user (setq uri (uri uri)))
      (nth-value 0 (%authority uri))))

(defun uri-host (uri)
  (or (%uri-host (setq uri (uri uri)))
      (nth-value 1 (%authority uri))))

(defun uri-port (uri)
  (or (%uri-port (setq uri (uri uri)))
      (nth-value 2 (%authority uri))))

(defun parse-authority (string)
  "Get user, host, and port."
  (with-lexer (string)
    (let (port start-of-host end-of-host)
      (loop (lexer-case (char (return))
              (#\@ (unless start-of-host ; Seen already?
                     (setq start-of-host lexer-position)))
              ;; Integer following the colon is a port
              ;; only if it's at the end of the string.
              (#\: (let ((start-of-port lexer-position))
                     (setq port (lexer-unsigned))
                     (if (= lexer-position lexer-end)
                         (setq end-of-host (1- start-of-port))
                         (setq port nil))))
              ;; Either part of the user
              ;; or the host components.
              (otherwise)))
      ;; We have a user only if START-OF-HOST was explicitly set.
      (values (when start-of-host (subseq lexer-string 0 (the fixnum (1- start-of-host))))
              (subseq lexer-string (or start-of-host 0) (or end-of-host lexer-end))
              port))))

(defun uri-path (uri)
  (orf (%uri-path  (setq uri (uri uri)))
       (cond ((null (rest (%uri-segments uri))) "")
             (t (let* ((segments (%uri-segments uri))
                       (start 0) (car (pop segments))
                       (total-size 0) path)
                  (declare (fixnum start))
                  (declare (fixnum total-size))
                  (loop for segment in segments
                        for length fixnum = (length segment)
                        for total-length fixnum = (1+ length)
                        do (incf total-size total-length))
                  ;; Prepend leading slash?
                  (cond ((eq car :relative)
                         (setq path (make-string (decf total-size))))
                        (t (setq path (make-string total-size))
                           (setf (schar path 0) #\/ start 1)))
                  ;; Splice in segments.
                  (loop finally (return path)
                        for (segment . tail) on segments
                        as end fixnum = (+ start (length segment))
                        do (replace path segment :start1 start)
                        when tail do (setf (schar path end) #\/)
                                     (setq start (1+ end))))))))

(defun uri-segments (uri)
  (orf (%uri-segments (setq uri (uri uri)))
       (with-lexer ((or (%uri-path uri) ""))
         (cond ((= lexer-end lexer-start) '(:relative))
               (t (let ((leading-slash-p (lexer-match #\/)))
                    (list* (if leading-slash-p :absolute :relative)
                           (loop collecting (get-token (not #\/))
                                 while (lexer-match #\/)))))))))

(defun uri-query (uri)
  (orf (%uri-query (setq uri (uri uri)))
       (whereas ((alist (%uri-alist uri)))
         (with-output-to-string (stream)
           (loop for ((key . value) . tail) on alist
                 do (format stream "~A~@[=~A~]~@[&~]"
                            key value tail))))))

(defun uri-alist (uri)
  (orf (%uri-alist (setq uri (uri uri)))
       (with-lexer ((or (%uri-query uri) ""))
         (unless (= lexer-position lexer-end)
           (loop for key = (get-token (not "=&"))
                 for parameter = (cons key nil)
                 when (lexer-match #\=)
                 do (rplacd parameter (get-token (not #\&)))
                 collect parameter while (lexer-match #\&))))))

(defun uri-fragment (uri)
  (%uri-fragment (uri uri)))

(defun uri (designator)
  (typecase designator
    (uri designator)
    (t (parse-uri
        designator))))

;;; Appendix B of RFC 3986 gives this regexp for parsing a URI:
;;; -----------------------------------------------------------
;;; ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
;;;   scheme          authority  path       query      fragment
;;; -----------------------------------------------------------

(defun %make-uri-from-string (string)
  (let ((uri (make-uri :string string)))
    (with-lexer ((%uri-string uri))
      (orf (%uri-scheme uri)
           (match-symbol (:http :https :mailto :ftp) :terminated-by #\:)
           (when (lexer-advance-if (lexer-skip (not ":/?#")) (lexer-match #\:))
             (subseq lexer-string lexer-start (the array-index (1- lexer-position)))))
      (with-accessors ((path %uri-path)) uri
        ;; "A component is undefined if its associated delimiter
        ;; does not appear in the URI reference; the path component
        ;; is never undefined, though it may be empty."
        (let ((has-authority-p (lexer-match #\/ #\/)))
          (setf (%uri-authority uri)
                (when has-authority-p
                  (get-token (not "/?#")))))      
        (setq path (get-token (not "?#")))
        (let ((query-string-p (lexer-match #\?)))
          (setf (%uri-query uri) (when query-string-p (get-token (not #\#))))
          (setf (%uri-fragment uri) (when (lexer-match #\#) (get-token t)))
          uri)))))

(defun uri-string (uri)
  (orf (%uri-string (setq uri (uri uri)))
       (format nil "~@[~(~A~):~]~@[//~A~]~@[~A~]~@[?~A~]~@[#~A~]"
               (uri-scheme uri) (uri-authority uri) (uri-path uri)
               (uri-query uri) (uri-fragment uri))))

(defun uri= (uri1 uri2)
  (setq uri1 (uri uri1))
  (setq uri2 (uri uri2))
  (and (equalp (%uri-scheme uri1)
               (%uri-scheme uri2))
       (equal (uri-user uri1) (uri-user uri2))
       (equalp (uri-host uri1) (uri-host uri2))
       (eql (uri-port uri1) (uri-port uri2))
       (equal (uri-path uri1) (uri-path uri2))
       (equal (uri-query uri1) (uri-query uri2))
       (equal (%uri-fragment uri1)
              (%uri-fragment uri2))))

(defun percent-encode (string &key (start 0) end (external-format +content-external-format+))
  (let ((octets (encode-string-to-octets string :start start :end end :external-format external-format)))
    (declare (type (simple-array (unsigned-byte 8) (*)) octets) (optimize (safety 0) (speed 3)))
    (let* ((length-of-octets (length octets)) (encoded-length length-of-octets))
      (declare (type array-index length-of-octets encoded-length))
      (loop for i of-type fixnum below length-of-octets
            for octet of-type octet = (aref octets i)
            for char of-type character = (code-char octet)
            do (unless (char-of-class-p char 'unreserved)
                 (incf encoded-length 2)))
      ;; Now we know the exact length needed.
      (when (= encoded-length length-of-octets)
        (unless (or end (/= 0 (the fixnum start)))
          (return-from percent-encode string)))
      ;; Need to do at least some encoding.
      (symbol-macrolet ((digits "0123456789ABCDEF"))
        (loop with result of-type simple-string = (make-string encoded-length)
              for j of-type fixnum = 0 then (1+ j) finally (return result)
              for i of-type fixnum below length-of-octets
              for octet of-type octet = (aref octets i)
              for char of-type character = (code-char octet)
              for unreserved-char-p = (char-of-class-p char 'unreserved)
              when unreserved-char-p do (setf (schar result j) char)
              else do (setf (schar result j) #\%)
                      (setf (schar result (the fixnum (incf j)))
                            (schar digits (ldb (byte 4 4) octet))
                            (schar result (the fixnum (incf j)))
                            (schar digits (ldb (byte 4 0) octet))))))))

(defmacro lexer-hex-digit (&aux (lexer `(lexer-case (char) ((digit 16) char))))
  `(the (mod 16) (aref +digits+ (char-code (the character (or ,lexer (lexer-error)))))))

(defun %decoded-octets (string start end external-format plus-to-space-p)
  (setq external-format (normalize-external-format t external-format))
  (let ((character-encoding (external-format-character-encoding external-format)))
    (with-output-to-vector (stream nil :external-format character-encoding)
      (with-lexer (string start end)
        (with-lexer-error ("percent-encoding")
          (loop (lexer-case (char (return))
                  (#\% (let* ((octet (ash (lexer-hex-digit) 4))
                              (octet (logior octet (lexer-hex-digit))))
                         (declare (type octet octet))
                         (write-byte octet stream)))
                  (otherwise
                   (if (when plus-to-space-p
                         (char= char #\+))
                       (write-char #\  stream)
                       (write-char char stream))))))))))

(defun decode-content-from-octets (octets &key (start 0) end (external-format +content-external-format+))
  (let (buffer (size (- (the array-index (check-vector-bounds octets start end)) (the array-index start))))
    (with-input-from-vector (octet-stream octets :start start :end end :external-format external-format)
      (subseq (setq buffer (make-string size)) 0 (read-sequence buffer octet-stream)))))

(defun percent-decode (string &key (start 0) end (external-format +content-external-format+) plus-to-space-p)
  (let ((string-as-percent-decoded-octets (%decoded-octets string start end external-format plus-to-space-p)))
    (decode-content-from-octets string-as-percent-decoded-octets :external-format external-format)))

(defmethod print-object ((uri uri) stream)
  (if (or *print-escape* *print-readably*)
      (let ((string (uri-string uri)))
        (write-string "#u" stream)
        (write-escaped-string string stream))
      (write-string (uri-string uri) stream)))

(defun parse-uri (string &key (start 0) end)
  (let ((uri-string (subseq string start end)))
    (%make-uri-from-string uri-string)))

(set-dispatch-macro-character
 #\# #\u #'(lambda (stream sub-char parameter)
             (declare (ignore sub-char parameter))
             (%make-uri-from-string (read stream))))

(defmethod make-load-form ((uri uri) &optional environment)
  (declare (ignore environment)) `(uri ,(uri-string uri)))

(defvar *base-uri*
  (%make-uri-from-string
   #+localhost "http://localhost/"
   #-localhost "https://cl-pdx.com/"))

(defun remove-dot-segments (segments)
  (let* ((last-segment (last segments)) (dot-end-p (equal last-segment '("."))))
    (removef "." segments :test #'equal) (when dot-end-p (appendf segments '(""))))
  (loop for up-position = (position ".." segments :test #'equal) while up-position
        finally (return (if (equal segments '(:relative "")) '(:relative) segments))
        do (let* ((x (nthcdr (1- up-position) segments)) (y (or (cddr x) '(""))))
             (when (eq x segments) (setq x (rest segments)))
             (setq segments (append (ldiff segments x) y)))))

(defun %uri-for-resolution (uri)
  (with-accessors ((authority uri-authority)
                   (segments uri-segments)
                   (fragment %uri-fragment)
                   (query uri-query))
      (setq uri (uri uri))
    (unless (%uri-scheme uri)
      (return-from %uri-for-resolution
        (make-uri :authority authority
                  :segments segments
                  :fragment fragment
                  :query query)))
    ;; If the URI has a scheme then it will be
    ;; resolved in-place but we need to ensure:
    ;; (1) slots used for resolution are set
    ;; (2) all the other slots are cleared.
    (prog1 uri
      (setf (values
             (%uri-user uri)
             (%uri-host uri)
             (%uri-port uri)
             (%uri-path uri)
             (%uri-alist uri)
             (%uri-string uri))
            (progn authority
                   segments
                   query
                   nil)))))

(defun resolve-uri (uri &optional (base *base-uri*))
  (setq uri (%uri-for-resolution uri) base (uri base))
  (check-type base (satisfies %uri-scheme) "a base-uri")
  (let ((segments (%uri-segments uri)))
    (unless (or (%uri-scheme uri) (%uri-authority uri))
      (setf (%uri-authority uri) (uri-authority base))
      (let ((base-segments (uri-segments base)))
        (if (endp (rest segments))
            (prog1 (setq segments base-segments)
              (orf (%uri-query uri) (uri-query base)))
            (unless (eq (first segments) :absolute)
              (cond ((rest base-segments)
                     (setq segments (nconc (butlast base-segments) (rest segments))))
                    ((%uri-authority uri) (pop segments) (push :absolute segments)))))))
    (orf (%uri-scheme uri) (%uri-scheme base))
    (setq segments (remove-dot-segments segments))
    (setf (%uri-segments uri) segments)
    uri))

(defun pathname-uri (pathname &rest initargs)
  (assert (pathname-match-p pathname "kira:www;**;") (pathname))
  (let* ((namestring (enough-namestring pathname "kira:www;"))
         (directory-levels (cdr (pathname-directory namestring)))
         (segments (list* :absolute directory-levels)))
    (appendf segments (list (file-namestring namestring)))
    (apply #'make-uri :segments segments initargs)))

(defun versioned-uri (pathname)
  (let* ((write-date (file-write-date pathname))
         (version (format nil "~(~36R~)" write-date)))
    (pathname-uri pathname :alist `(("v" . ,version)))))

(defmacro pop-segment (place &environment environment)
  (multiple-value-bind (temps vals news writer reader)
      (get-setf-expansion place environment)
    (with-gensyms (segment segments)
      `(let* (,.(mapcar #'list temps vals) ,segment)
         (loop for ,segments = ,reader as ,(car news) = (cdr ,segments)
               do ,writer (setq ,segment (if ,segments (car ,segments)))
               unless (equal ,segment "") return ,segment)))))

(defun find-object (uri &aux (host (uri-host uri)))
  (when (or (not host) (equalp host (uri-host *base-uri*)))
    (let ((object 'home) (segments (rest (uri-segments uri))))
      (loop as segment = (or (pop-segment segments) (return object))
            always (setq object (get-inferior-object segment object))))))

(defgeneric canonical-name (object)
  (:method ((any-object t)) nil))

(defgeneric canonical-path (object)
  (:method ((home-object (eql 'home))) "/")
  (:method ((handler (eql 'kira))) "/kira/")
  (:method ((handler (eql 'system))) "/system/")
  (:method ((handler (eql 'contact))) "/contact")
  (:method ((object object))
    (with-slots (object-id) object
      (cond ((eql object-id 0) "/")
            ((format nil "/~D" object-id))))))

(defmethod canonical-path ((handler symbol))
  "Cache handler path as a symbol property."
  (orf (get handler 'canonical-path)
       (concatenate 'simple-string "/system/"
                    (string-downcase handler))))

(defmacro %as-string (object)
  "Equivalent to PRINC-TO-STRING."
  `(typecase ,object
     (string ,object)
     (uri (uri-string ,object))
     (character (string ,object))
     (pathname (namestring ,object))
     (t (princ-to-string ,object))))

(defun parameter-string (object)
  (cond ((eq object nil) object)
        (t (if (typep object 'object)
               (format nil "~D" (object-id object))
               (percent-encode (%as-string object))))))

(defun uri-to (object &rest parameters)
  (make-uri :path (canonical-path object)
            :alist (loop with *print-case* = :downcase
                         for (name value) on parameters by #'cddr
                         for name-string = (parameter-string name)
                         for value-string = (parameter-string value)
                         collecting (cons name-string value-string))))

(defun absolute-uri-to (object &rest parameters)
  (resolve-uri (apply #'uri-to object parameters)))