From: Pascal Bourguignon
Subject: Three implementations of sets with bits.
Date: 
Message-ID: <87smguzd4m.fsf@thalassa.informatimago.com>
I  compared  three  ways  of  implementating  sets  with  bits,  using
integers, bit vectors, or arrays of small integers.


-----------------------------------------------------------
loops x #bits
                         CLISP    CLISP    SBCL     SBCL    programming 
                         time(s)  space(B) time(s)  space(B)    
100x65536            
  integer(func)          1.79     8480k     0.30     1195k  ultra simple
  bvector(func)          1.64        0k     1.36    45892k  simple
  bvector(proc)          1.36      360k     1.31    13854k  simple
  bset(proc)             0.25      440k     0.03        0k  complex
1000x256             
  integer(func)          1.24     6344k     0.31     7685k 
  bvector(func)          1.27        0k     1.18    35096k 
  bvector(proc)          1.02      120k     0.87     8617k 
  bset(proc)             0.15      128k     0.21     7850k 
10000x32             
  integer(func)         30.52   154891k     7.62   194816k 
  bvector(func)         32.51        0k    27.94   892479k 
  bvector(proc)         26.31     2460k    21.38   212498k 
  bset(proc)             3.70     2460k     5.26   198094k
-----------------------------------------------------------


Normalized (by run and by cl implementation):
-----------------------------------------------------------
loops x #bits
                         CLISP    CLISP    SBCL     SBCL     
                         time     space    time     space    
100x65536            
  integer(func)          1.00     1.00     0.22     0.03 
  bvector(func)          0.92     0.00     1.00     1.00 
  bvector(proc)          0.76     0.04     0.96     0.30 
  bset(proc)             0.14     0.05     0.02     0.00 
1000x256             
  integer(func)          0.98     1.00     0.26     0.22 
  bvector(func)          1.00     0.00     1.00     1.00 
  bvector(proc)          0.80     0.02     0.74     0.25 
  bset(proc)             0.12     0.02     0.18     0.22 
10000x32             
  integer(func)          0.94     1.00     0.27     0.22 
  bvector(func)          1.00     0.00     1.00     1.00 
  bvector(proc)          0.81     0.02     0.77     0.24 
  bset(proc)             0.11     0.02     0.19     0.22
-----------------------------------------------------------


I  find  it somewhat  depressing  that  the  ultra-simple and  elegant
formulation  be also  the slowest  and  greediest (on  clisp, but  the
second nicest implementation is worse on sbcl).  

That integer representation of sets could be much more efficient (both
in space  and time)  if it  was possible to  modify an  integer value.
Must all integer values absolutely be immutable objects?


In the case of the bit  vector representation, it could probably be as
efficient as the most efficient if we could manipulate several bits at
once.  Is  there a  way to extract  an integer  from a bit  vector? Or
"displace"  bits  to  an  integer?   

In any case, I'd suggest the implementations to make a special case in
map and  map-into when the  function is identity,  or one of  the log*
family, and when the arguments  are bit vectors, to process them words
by words.  The speed gain  on 32bit architecture would be good enough,
on 64bit it would be impressive!



integer(functional):
--------------------
(defun integer-intersection (p q)  (logand p q))
(defun integer-union        (p q)  (logior p q))
(defun integer-difference   (p q)  (logandc2 p q))
(defun integer-contains     (s e)  (logbitp e s))
(defun integer-singleton    (e)    (dpb 1 (byte 1 e) 0))
(defun integer-include      (s e)  (dpb 1 (byte 1 e) s))
(defun integer-exclude      (s e)  (dpb 0 (byte 1 e) s))
(defun integer-cardinal     (s)    (logcount s))


bitvector(functional):
----------------------
(defun bit-vector-intersection (p q) 
  (map '(array bit (*)) (function logand) p q))

(defun bit-vector-union (p q)
  (map '(array bit (*)) (function logior) p q))

(defun bit-vector-difference (p q)
  (map '(array bit (*)) (function logandc2) p q))

(defun bit-vector-contains (v e)  (not (zerop (aref v e))))
(defun bit-vector-include  (v e)  (setf (aref v e) 1))
(defun bit-vector-exclude  (v e)  (setf (aref v e) 0))


bitvector(procedural):
----------------------
(defun bit-vector-assign-2       (p q)  (map-into p (function identity) q))
(defun bit-vector-intersection-2 (p q)  (map-into p (function logand) p q))
(defun bit-vector-union-2        (p q)  (map-into p (function logior) p q))
(defun bit-vector-difference-2   (p q)  (map-into p (function logandc2) p q))


bset(procedural):
-----------------

(defconstant +bit-per-bitset+ 32)
(deftype bitset () `(unsigned-byte ,+bit-per-bitset+))

(defstruct bset
  (bitsets (make-array :type (array bitset *))
  (cardinal      nil :type (or null (integer 0)))
  (first-element 0   :type (integer 0)) ;; approximate
  (last-element  0   :type (integer 0)) ;; approximate
  ;; (for all i (==> (< i (bset-first-element bset)) (not (is-element i bset))))
  ;; (for all i (==> (> i (bset-last-element  bset)) (not (is-element i bset))))
  );;bset

(proclaim '(inline elem-to-bit))
(defun elem-to-bit    (element) (mod element +bit-per-bitset+))
(proclaim '(inline bitset-to-elem))
(defun bitset-to-elem (index)   (* +bit-per-bitset+ (1+ index)))

(defun intersection (set1 set2)
  "
DO:      set1 := set1 inter set2 inter
         Accumulate in set1 the intersection of set1 and set2
         (elements in set1 and in set2).
RETURN:  SET1
"
  (let ((bits1 (bset-bitsets set1))
        (bits2 (bset-bitsets set2)))
    (for (i
         (elem-to-bitset (max (bset-first-element set1)
                              (bset-first-element set2)))
         (elem-to-bitset (min (bset-last-element set1)
                              (bset-last-element set2))))
         (setf (bsref bits1 i) (logand (bsref bits1 i) (bsref bits2 i)))))
  (setf (bset-cardinal set1) nil
        (bset-first-element set1) (max (bset-first-element set1)
                                       (bset-first-element set2))
        (bset-last-element set1)  (min (bset-last-element set1)
                                       (bset-last-element set2)))
  set1)

(defun include (bset element)
  "
PRE:    (<= 0 element (size bset))
POST:   (is-element element bset)
RETURN: BSET
"
  (declare (type (integer 0) element))
  (let ((bits (bset-bitsets bset)))
    (setf (bsref bits (elem-to-bitset element))
          (dpb 1 (byte 1 (elem-to-bit element))
               (bsref bits (elem-to-bitset element)))))
  (setf (bset-cardinal bset) nil
        (bset-first-element bset) (cond
                                   ((is-element 0 bset)               0)
                                   ((zerop (bset-first-element bset)) element)
                                   (t (min element (bset-first-element bset))))
        (bset-last-element  bset) (max element (bset-last-element  bset)))
  bset)



    


-- 
__Pascal_Bourguignon__                     http://www.informatimago.com/
There is no worse tyranny than to force a man to pay for what he doesn't
want merely because you think it would be good for him.--Robert Heinlein
http://www.theadvocates.org/

From: Raymond Toy
Subject: Re: Three implementations of sets with bits.
Date: 
Message-ID: <404197FE.7090909@earthlink.net>
Pascal Bourguignon wrote:
> bitvector(functional):
> ----------------------
> (defun bit-vector-intersection (p q) 
>   (map '(array bit (*)) (function logand) p q))
> 
> (defun bit-vector-union (p q)
>   (map '(array bit (*)) (function logior) p q))
> 
> (defun bit-vector-difference (p q)
>   (map '(array bit (*)) (function logandc2) p q))

Why not use bit-and, bit-ior and bit-andc2 for these?  A good 
implementation will probably make these operate on words at a time 
instead of a bit at a time, as you wanted.

> (defstruct bset
>   (bitsets (make-array :type (array bitset *))

Some kind of typo here.  I guess you wanted to some array with element 
type bitset?

Ray
From: Pascal Bourguignon
Subject: Re: Three implementations of sets with bits.
Date: 
Message-ID: <87oerhxnev.fsf@thalassa.informatimago.com>
Raymond Toy <ยทยทยทยท@earthlink.net> writes:

> Pascal Bourguignon wrote:
> > bitvector(functional):
> > ----------------------
> > (defun bit-vector-intersection (p q)   (map '(array bit (*))
> > (function logand) p q))
> > (defun bit-vector-union (p q)
> >   (map '(array bit (*)) (function logior) p q))
> > (defun bit-vector-difference (p q)
> >   (map '(array bit (*)) (function logandc2) p q))
>
> Why not use bit-and, bit-ior and bit-andc2 for these?  A good
> implementation will probably make these operate on words at a time
> instead of a bit at a time, as you wanted.

I overlooked it.   Thank you for recalling it to me.

Here are the results using implementations based on bit-* functions.


> > (defstruct bset
> >   (bitsets (make-array :type (array bitset *))
>
> Some kind of typo here.  I guess you wanted to some array with element
> type bitset?


Yes, I did not cut enough, I wanted to avoid you the burden of the
initial array creation.

(defstruct bset
   (bitsets :type (array bitset *))
    ;;...
    )



Well,  using (bit-and vector-1  vector-2 t)  [bvector-bit(proc)] looks
like the most efficient.  The only gain we could have with an array of
(byte 32),  is that we could  keep information about  ranges (at least
min-elem/max-elem)  to  avoid  processing  the whole  array  when  the
elements are not spread over the whole range (imagine: [0-9a-z]).


                     CLISP    CLISP          SBCL     SBCL
                     time     space          time     space
100x65536
  integer(func)          1.68        0k          1.41    45900k
  bvector-bit(func)      0.68        0k          0.66    10898k
  bvector-map(func)      1.39      360k          1.32    13859k
  bvector-bit(proc)      0.22      360k          0.25      719k
  bvector-map(proc)      0.21      440k          0.29    11291k
1000x256
  integer(func)          1.31        0k          1.10    35100k
  bvector-bit(func)      0.48        0k          0.36     8261k
  bvector-map(func)      1.03      120k          0.88     8638k
  bvector-bit(proc)      0.17      120k          0.08      138k
  bvector-map(proc)      0.13      128k          0.24     8775k
10000x32
  integer(func)         32.90        0k         28.00   892483k
  bvector-bit(func)     13.09        0k          9.12   209989k
  bvector-map(func)     26.94     2460k         21.79   212497k
  bvector-bit(proc)      4.32     2460k          1.51     2459k
  bvector-map(proc)      3.42     2460k          5.12   197241k



------------------------------------------------------------------------
For the interested here is  my evaluation code:


(DEFUN MAKE-LIST-OF-RANDOM-NUMBERS (LENGTH &key (modulo MOST-POSITIVE-FIXNUM))
  "
RETURN:  A list of length `length' filled with random numbers
MODULO:  The argument to RANDOM.
"
    (LOOP WHILE (< 0 LENGTH)
          COLLECT (RANDOM MODULO) INTO RESULT
          DO (SETQ LENGTH (1- LENGTH))
          FINALLY (RETURN RESULT))
    );;MAKE-LIST-OF-RANDOM-NUMBERS


(defun make-random-bit-vector (size)
  (let ((result (make-array (list size) :element-type 'bit)))
    (map nil (lambda (element) (setf (aref result element) 1))
         (MAKE-LIST-OF-RANDOM-NUMBERS (/ size 2) :modulo size))
    result));;make-random-bit-vector


(defun integer-intersection (p q)  (logand p q))
(defun integer-union        (p q)  (logior p q))
(defun integer-difference   (p q)  (logandc2 p q))
(defun integer-contains     (s e)  (logbitp e s))
(defun integer-singleton    (e)    (dpb 1 (byte 1 e) 0))
(defun integer-include      (s e)  (dpb 1 (byte 1 e) s))
(defun integer-exclude      (s e)  (dpb 0 (byte 1 e) s))
(defun integer-cardinal     (s)    (logcount s))

(DEFUN COPY-BIT-VECTOR-TO-integer (BV)
  (let ((integer-set 0))
    (DOTIMES (ELEMENT (LENGTH BV))
      (unless (ZEROP (AREF BV ELEMENT))
        (setf integer-set (integer-include integer-set element))))
    integer-set));;COPY-BIT-VECTOR-TO-integer




;; bvector-bit(func)
(defun bit-vector-intersection    (p q) (bit-and p q))
(defun bit-vector-union           (p q) (bit-ior p q))
(defun bit-vector-difference      (p q) (bit-andc2 p q))

;; bvector-map(func) 
(defun bit-vector-intersection-m(p q)(map'(array bit (*))(function logand) p q))
(defun bit-vector-union-m       (p q)(map'(array bit (*))(function logior) p q))
(defun bit-vector-difference-m(p q)(map'(array bit (*))(function logandc2) p q))

;; bvector-bit(proc) 
(defun bit-vector-assign-b2       (p q) (map-into p (function identity) q))
(defun bit-vector-intersection-b2 (p q) (bit-and   p q t))
(defun bit-vector-union-b2        (p q) (bit-ior   p q t))
(defun bit-vector-difference-b2   (p q) (bit-andc2 p q t))

;; bvector-map(proc) 
(defun bit-vector-assign-m2       (p q) (map-into p (function identity) q))
(defun bit-vector-intersection-m2 (p q) (map-into p (function logand) p q))
(defun bit-vector-union-m2        (p q) (map-into p (function logior) p q))
(defun bit-vector-difference-m2   (p q) (map-into p (function logandc2) p q))

(defun bit-vector-contains     (v e) (not (zerop (aref v e))))
(defun bit-vector-include      (v e) (setf (aref v e) 1))
(defun bit-vector-exclude      (v e) (setf (aref v e) 0))


(defun bit-vector-cardinal (v)
  (let ((cardinal 0))
    (DOTIMES (ELEMENT (LENGTH v))
      (UNLESS (ZEROP (AREF v ELEMENT)) (incf cardinal)))
    cardinal));;bit-vector-cardinal

#+bset
(DEFUN COPY-BIT-VECTOR-TO-BSET (BV BS)
  (BSET:ASSIGN-EMPTY BS)
  (DOTIMES (ELEMENT (LENGTH BV))
    (UNLESS (ZEROP (AREF BV ELEMENT)) (BSET:INCLUDE BS ELEMENT)))
  bs);;COPY-BIT-VECTOR-TO-BSET




#+clisp (ext:gc)
#+sbcl  (sb-ext:gc)

(defparameter stat-text
  (with-output-to-string (*trace-output*)
    (dolist (ls '((100 65536) (1000 256) (10000 32)))
      (let ((loops (first ls))(size (second ls)))
        (defparameter p (make-random-bit-vector size))
        (defparameter q (make-random-bit-vector size))
        (defparameter r (make-random-bit-vector size))
        (defparameter s (make-random-bit-vector size))
        #+bset
        (progn
          (DEFPARAMETER BP (BSET:MAKE-BSET size))
          (DEFPARAMETER BQ (BSET:MAKE-BSET size))
          (DEFPARAMETER BR (BSET:MAKE-BSET size))
          (DEFPARAMETER BS (BSET:MAKE-BSET size))
          (COPY-BIT-VECTOR-TO-BSET P BP)
          (COPY-BIT-VECTOR-TO-BSET Q BQ))
        (defparameter ip (copy-bit-vector-to-integer p))
        (defparameter iq (copy-bit-vector-to-integer q))
        (format *trace-output* "~&:try (:loops ~D :size ~D)~%" loops size)
        (format *trace-output* "~&:kind (integer func)~%")
        (finish-output *trace-output*)
        (let ((c 0))
          (time (dotimes (i loops)
                  (let ((s (integer-difference
                            (integer-union ip iq)
                            (integer-intersection ip iq))))
                    (dotimes (e size)
                      (when (integer-contains s e) (incf c)))))))
        (format *trace-output* "~&:kind (bit-vector-bit  func)~%")
        (finish-output *trace-output*)
        (let ((c 0))
          (time (dotimes (i loops)
                  (let ((s (bit-vector-difference
                            (bit-vector-union p q)
                            (bit-vector-intersection p q))))
                    (dotimes (e size)
                      (when (bit-vector-contains s e) (incf c)))))))
        (format *trace-output* "~&:kind (bit-vector-map  func)~%")
        (finish-output *trace-output*)
        (let ((c 0))
          (time (dotimes (i loops)
                  (let ((s (bit-vector-difference-m
                            (bit-vector-union-m p q)
                            (bit-vector-intersection-m p q))))
                    (dotimes (e size)
                      (when (bit-vector-contains s e) (incf c)))))))
        (format *trace-output* "~&:kind (bit-vector-bit  proc)~%")
        (finish-output *trace-output*)
        (let ((c 0))
          (time (dotimes (i loops)
                  (bit-vector-assign-b2 r p)
                  (bit-vector-intersection-b2 r q)
                  (bit-vector-assign-b2 s p)
                  (bit-vector-union-b2 s q)
                  (bit-vector-difference-b2 s r)
                  (dotimes (e size)
                    (when (bit-vector-contains s e) (incf c))))))
        (format *trace-output* "~&:kind (bit-vector-map  proc)~%")
        (finish-output *trace-output*)
        (let ((c 0))
          (time (dotimes (i loops)
                  (bit-vector-assign-m2 r p)
                  (bit-vector-intersection-m2 r q)
                  (bit-vector-assign-m2 s p)
                  (bit-vector-union-m2 s q)
                  (bit-vector-difference-m2 s r)
                  (dotimes (e size)
                    (when (bit-vector-contains s e) (incf c))))))
        #+bset
        (progn
          (format *trace-output* "~&:kind (bset proc)~%")
          (finish-output *trace-output*)
          (let ((c 0))
            (time (dotimes (i loops)
                    (bset:assign br bp)
                    (bset:intersection br bq)
                    (bset:assign bs bp)
                    (bset:union bs bq)
                    (bset:difference bs br)
                    (dotimes (e size)
                      (when (bset:is-element e bs) (incf c))))))
          (finish-output *trace-output*))
        )))) ;;stat-text


(defparameter stat-sexp
  (block :post
    (with-input-from-string (*standard-input* stat-text)
      (do  ((line (read-line *standard-input* nil nil)
                  (read-line *standard-input* nil nil))
            (result '())
            (word))
          ((null line) (nreverse result))
        (block :got-value
          (let* ((eof (gensym)))
            (macrolet ((read-word
                        ()
                        `(let ((val (read *standard-input* nil eof)))
                           (if (eq eof val) (return-from :got-value) val))))
              (with-input-from-string
                  (*standard-input* (substitute (character " ")
                                                (character ":") line))
                (setf word (read-word))
                (case word
                  ((kind try)  (push (list(intern (string word)
                                                  (find-package "KEYWORD"))
                                          (read-word)) result))
                  ((real  gc evaluation))
                  ((run) (read-word) (push (list :time  (read-word)) result))
                  ((space) (push (list :space  (read-word)) result))
                  (otherwise
                   (when (numberp word)
                     (read-word)
                     (cond
                      ((eq (read-word) 'consed.)
                       (push (list :space word) result))
                      ((eq (read-word) 'user)
                       (push (list :time word) result))))))))))))));;stat-sexp

;; (format t "~&~S~%" stat-sexp)

(defparameter stats
  (read-from-string
   (format nil "#4A~S"
           (list
            (let ((result (list)))
              (dolist (item stat-sexp)
                (case (car item)
                  ((:try) (push (list) result))
                  ((:kind) (push (list) (car result)))
                  ((:time) (push (cadr item)  (caar result)))
                  ((:space) (push (cadr item) (cdaar result)))))
              result)))));;stats

;;(format t "~&~S~%" stats)


(defun normalize-stats (stats)
  (let ((normalized (make-array (array-dimensions stats))))
    (dotimes (try (array-dimension stats 1))
      (dotimes (cl (array-dimension stats 0))
        (dotimes (kind (array-dimension stats 3))
          (let ((max (apply
                      (function max)
                      (loop for imp
                            from 0 to (1- (array-dimension stats 2))
                            collect (aref stats cl try imp kind)
                            into res
                            return res) )))
            (dotimes (imp (array-dimension stats 2))
              (setf (aref normalized cl try imp kind)
                    (coerce
                     (if (= 0 max)
                       0 (/ (aref  stats cl try imp kind) max)) 'float))))
          )))
    normalized));;normalize-stats


(defun print-stats (titles stats &key normalized order)
  ;;order: '(0 1 2 3) '(1 2 0 3)
  (let ((ivar (make-array '(4)))
        (iorder (map 'vector (function identity) order)))
    (format t "~&~20A " "")
    (dotimes (k (length (elt titles (aref iorder 2))))
      (dotimes (l (length (elt titles (aref iorder 3))))
        (format t "~8A " (elt  (elt titles (aref iorder 2)) k))))
    (format t "~&~20A " "")
    (dotimes (k (length (elt titles (aref iorder 2))))
      (dotimes (l (length (elt titles (aref iorder 3))))
        (format t "~8A " (elt  (elt titles (aref iorder 3)) l))))
    (dotimes (i (length (elt titles (aref iorder 0))))
      (setf (aref ivar (aref iorder 0)) i)
      (format t "~&~20A "(elt (elt titles (aref iorder 0)) i))
      (dotimes (j (length (elt titles (aref iorder 1))))
        (setf (aref ivar (aref iorder 1)) j)
        (format t "~&  ~18A "(elt (elt titles (aref iorder 1)) j))
        (dotimes (k (length (elt titles (aref iorder 2))))
          (setf (aref ivar (aref iorder 2)) k)
          (dotimes (l (length (elt titles (aref iorder 3))))
            (setf (aref ivar (aref iorder 3)) l)
            (let ((value (aref stats (aref ivar 0)(aref ivar 1)(aref ivar 2)(aref ivar 3))))
              (cond
               (normalized (format t "~8,2f " value))
               ((integerp value) (format t "~8Dk " (truncate value 1000)))
               (t (format t "~8,2f " value))))))))));;print-stats


(defparameter titles
  '((#+clisp clisp #+sbcl sbcl)
    ("100x65536" "1000x256" "10000x32")
    ("integer(func)"
     "bvector-bit(func)" "bvector-map(func)"
     "bvector-bit(proc)" "bvector-map(proc)"
     #+bset "bset(proc)")
    ("time" "space")));;titles


(defparameter normalized (normalize-stats stats))


(progn (print-stats titles stats :order '(1 2 0 3))
       (print-stats titles normalized  :order '(1 2 0 3)))

;;;; bv-test.lisp                     -- 2004-03-01 04:09:45 -- pascal   ;;;;


--
__Pascal_Bourguignon__                     http://www.informatimago.com/
There is no worse tyranny than to force a man to pay for what he doesn't
want merely because you think it would be good for him.--Robert Heinlein
http://www.theadvocates.org/