From: Rainer Joswig
Subject: Re: Bottleneck rule
Date: 
Message-ID: <joswig-465263.21495616072008@news-europe.giganews.com>
In article 
<····································@a1g2000hsb.googlegroups.com>,
 Francogrex <······@grex.org> wrote:

> I wrote a function which loops through a long list of strings (about a
> 10000) modifies some according to some specified rules and finally it
> groups similar strings together. When this is run (even loaded from a
> compiled .fas file), it takes about 2 hours to complete (it's true at
> some level there is an iteration through an array of 10 million rows
> (10000*10000). I was sreading Paul Graham's book and in it he was
> talking about the bottleneck rule and optimizing speed and says: If a
> major bottleneck occurred in the inner loop of some function, we might
> add
> a declaration like the following:
> (defun bottleneck (...)
>   (do (...)
>     (...)
>     (do (...)
>       (...)
>       (declare (optimize (speed 3) (safety 0)))
>       ...) ) )
> 
> But I have no idea how to use that in my code? Anyone has any hint?
> thanks. Below I attach the function just for info.

Wow, what a mess! ;-)

To Do:

1) make sure that your news client does not break lines 

2) Don't program pascal/c/... in Lisp

3) DEFUN is not there to define local functions. Use FLET and LABELS.

4) DEFMACRO is not for defining local macros. There's MACROLET.

5) You need to introduce variables before you assign something with SETQ or SETF.
   Use local parameters of a function, or LET or something else to introduce a variable.

6) 'the Scan function' is not a useful comment.

7) use documentation to describe what a function does, not what's its name.

...

Let's stop here. What is next?

First:

break up the code in smaller pieces. Start getting rid of the large function and
bring all those DEFUNs and DEFMACROs to the top. Don't assign random variables - declare them.
Reformat your code. Document the functions.

Second:

Post again...


-------------

(defun GroupModifyStrings (intres)
  "Function to group strings by removing the last chars
  and using the edit distance (modified) to correct typing errors.
  (GroupModifystrings 'false) prints out only the output,
(GroupModifystrings 'true)
  prints out an intermediate store file to view intermediate results.
  List should be sorted by decreasing counter order so that the
modification is done on   strings with lower counts."
  ;;Remove last chars and group.
  ;;The Scan function.
  (defun scan (file)
    (with-open-file (stream file :direction :input)
      (setq m (loop for input = (read stream nil stream)
                    until (eq input stream) collect input))))
  ;;Read the files ("string" and "counter").
  (setf x (scan "C:/string.txt"))
  (setf counter (scan "C:/counter.txt"))
  ;;The while macro.
  (defmacro while (condition &rest body)
    (let ((var (gensym)))
      `(do ((,var nil (progn ,@body)))
           ((null ,condition) ,var))))
  ;;First remove the last chars and store in an array.
  (setf store (make-array (list (length x) 1)))
  (dotimes (i (length x))
    (if (not (alpha-char-p (char (nth i x) (1- (length (nth i x))))))
        (setf (nth i x) (concatenate 'string (nth i x) "Z"))))
  (setf i 0)
  (while (< i (length x))
         (setf (aref store i 0)
               (while (AND (alpha-char-p (char (nth i x) (1- (length (nth i x))))) (> (length (nth i x)) 1))
                      (setf (nth i x) (string-right-trim (list (char (nth i x) (1- (length (nth i x))))) (nth i x)))))
         (incf i))
  ;;Store the array in a list.
  (setf ls nil)
  (dotimes (i (array-dimension store 0))
    (setf ls (append ls (list (aref store i 0)))))
  ;;Tabulate like the pivot table.
  (setf temp (make-array (list (length ls)) :initial-element '0))
  (dotimes (i (length ls))
    (setf j (1+ i))
    (if (not (equal (nth i ls) 'zzz)) (setf (aref temp i) (+ (aref temp i)(nth i counter))))
    (while (< j (length ls))
           (if (AND (equal (nth i ls) (nth j ls)) (not (equal (nth j ls) 'zzz)))
               (AND (setf (aref temp i) (+ (aref temp i) (nth j counter)))
                    (setf (nth j ls) 'zzz) (setf (nth j counter) 'zzz)))
           (incf j)))
  ;;Remove the indicators.
  (setf counter (coerce temp 'list))
  (delete 'zzz ls)
  (delete 0 counter)
  ;;Edit Distance.
  (defun distance (s1 s2)
    (let* ((width (1+ (length s1)))
           (height (1+ (length s2)))
           (d (make-array (list height width))))
      (dotimes (x width)
        (setf (aref d 0 x) x))
      (dotimes (y height)
        (setf (aref d y 0) y))
      (dotimes (x (length s1))
        (dotimes (y (length s2))
          (setf (aref d (1+ y) (1+ x))
                (min (1+ (aref d y (1+ x)))
                     (1+ (aref d (1+ y) x))
                     (+ (aref d y x)
                        (if (char= (aref s1 x) (aref s2 y))
                            0
                          1))))))
      (aref d (1- height) (1- width))))
  ;;Test the Edit distance and store raw results.
  (setf rows (*(1- (length ls))(length ls)))
  (setf store (make-array (list rows 4)))
  (setf k 0)
  (dotimes (i (length ls))
    (dotimes (j (length ls))
      (when (not (equal (nth i ls) (nth j ls)))
        (setf (aref store k 0) (nth i ls))
        (setf (aref store k 1) (nth j ls) )
        (setf (aref store k 2) (distance (nth i ls)(nth j ls)))
        (incf k)
        )))
  (dotimes (i (array-dimension store 0))
    (dotimes (j (length ls))
      (if (eq (aref store i 0) (nth j ls)) (setf (aref store i 3) (nth j counter)))
      ))
  ;;Extra Rule: The last 3 digits must be equal.
  (dotimes (i (array-dimension store 0))
    (if (AND (not (equal (loop for n from 1 to 3 collect (digit-char-p
                                                          (char (aref store i 0) (- (length (aref store i 0)) n))))
                         (loop for n from 1 to 3 collect (digit-char-p
                                                          (char (aref store i 1) (- (length (aref store i 1)) n))))))
             (< (aref store i 2) 2))
        (setf (aref store i 2) 20)))
  ;;conditional printout of the intermediate array store after applying rule.
  (when (equal intres 'true)
    (setf out-stream (open "storeint.txt" :direction :output))
    (print store out-stream)
    (close out-stream))
  ;;Map (redefine) similar strings to just one
  (dotimes (j (array-dimension store 0))
    (if (< (aref store j 2) 2)
        (dotimes (i (array-dimension store 0))
          (if (AND (equal (aref store i 0) (aref store j 1)) (< (aref store i 2) 2))
              (setf (aref store i 0) (aref store j 0))))))
  ;;The position helper function
  (defun position-of-min (list ordering)
    (let ((min-index (length ordering))
          (min-value nil)
          (position -1)
          (i 0))
      (dolist (x list (values position min-value))
        (let ((pos (position x ordering)))
          (when (and pos (<= pos min-index))
            (setf position i min-index pos min-value x)))
        (incf i))))
  ;;Grouped into a list.
  (setf chunck (/ (array-dimension store 0) (length ls)))
  (setf chorder (loop for i from 0 to chunck collect i))
  (setf jj 0)
  (setf n 0)
  (setf arr (make-array (list (length ls) 2)))
  (while (< jj rows)
         (setf place (loop for i from jj below (+ jj chunck) collect (aref store i 2)))
         (setf (aref arr n 0) (aref store (+ jj (position-of-min place chorder)) 0))
         (setf (aref arr n 1)  (aref store (+ jj (position-of-min place chorder)) 3))
         (setf n (1+ n))
         (setf jj (+ jj chunck)))
  ;;Store the arr array in a list.
  ;;Reset ls and Counter first.
  (setf ls nil)
  (setf counter nil)
  (dotimes (i (array-dimension arr 0))
    (setf ls (append ls (list (aref arr i 0))))
    (setf counter (append counter (list (aref arr i 1))))
    )
  ;;Tabulate like the pivot table.
  (setf temp (make-array (list (length ls)) :initial-element '0))
  (dotimes (i (length ls))
    (setf j (1+ i))
    (if (not (equal (nth i ls) 'zzz)) (setf (aref temp i) (+ (aref temp i)(nth i counter))))
    (while (< j (length ls))
           (if (AND (equal (nth i ls) (nth j ls)) (not (equal (nth j ls) 'zzz)))
               (AND (setf (aref temp i) (+ (aref temp i) (nth j counter)))
                    (setf (nth j ls) 'zzz) (setf (nth j counter) 'zzz)))
           (incf j)))
  ;;Remove the indicators.
  (setf counter (coerce temp 'list))
  (delete 'zzz ls)
  (delete 0 counter)
  ;;Print output the lists of grouped strings and counts.
  (setf out-stream (open "output.txt" :direction :output))
  (dotimes (n (length ls))
    (print (list (nth n ls)(nth n counter)) out-stream))
  (close out-stream)
  )


> ----------------------------------------------------------------------------------------------------------------------------------------
> (defun GroupModifyStrings (intres)
>   "Function to group strings by removing the last chars
>   and using the edit distance (modified) to correct typing errors.
>   (GroupModifystrings 'false) prints out only the output,
> (GroupModifystrings 'true)
>   prints out an intermediate store file to view intermediate results.
>   List should be sorted by decreasing counter order so that the
> modification is done on   strings with lower counts."
>   ;;Remove last chars and group.
>   ;;The Scan function.
>   (defun scan (file)
>     (with-open-file (stream file :direction :input)
>       (setq m (loop for input = (read stream nil stream)
>                 until (eq input stream) collect input))))
>   ;;Read the files ("string" and "counter").
>   (setf x (scan "C:/string.txt"))
>   (setf counter (scan "C:/counter.txt"))
>   ;;The while macro.
>   (defmacro while (condition &rest body)
>     (let ((var (gensym)))
>       `(do ((,var nil (progn ,@body)))
>          ((null ,condition) ,var))))
>   ;;First remove the last chars and store in an array.
>   (setf store (make-array (list (length x) 1)))
>   (dotimes (i (length x))
>     (if (not (alpha-char-p (char (nth i x) (1- (length (nth i x))))))
>         (setf (nth i x) (concatenate 'string (nth i x) "Z"))))
>   (setf i 0)
>   (while (< i (length x))
>     (setf (aref store i 0)
>       (while (AND (alpha-char-p (char (nth i x) (1- (length (nth i
> x))))) (> (length (nth i x)) 1))
>         (setf (nth i x) (string-right-trim (list (char (nth i x) (1-
> (length (nth i x))))) (nth i x)))))
>     (incf i))
>   ;;Store the array in a list.
>   (setf ls nil)
>   (dotimes (i (array-dimension store 0))
>     (setf ls (append ls (list (aref store i 0)))))
>   ;;Tabulate like the pivot table.
>   (setf temp (make-array (list (length ls)) :initial-element '0))
>   (dotimes (i (length ls))
>     (setf j (1+ i))
>     (if (not (equal (nth i ls) 'zzz)) (setf (aref temp i) (+ (aref
> temp i)(nth i counter))))
>     (while (< j (length ls))
>       (if (AND (equal (nth i ls) (nth j ls)) (not (equal (nth j ls)
> 'zzz)))
>           (AND (setf (aref temp i) (+ (aref temp i) (nth j counter)))
>             (setf (nth j ls) 'zzz) (setf (nth j counter) 'zzz)))
>       (incf j)))
>   ;;Remove the indicators.
>   (setf counter (coerce temp 'list))
>   (delete 'zzz ls)
>   (delete 0 counter)
>   ;;Edit Distance.
>   (defun distance (s1 s2)
>     (let* ((width (1+ (length s1)))
>            (height (1+ (length s2)))
>            (d (make-array (list height width))))
>       (dotimes (x width)
>         (setf (aref d 0 x) x))
>       (dotimes (y height)
>         (setf (aref d y 0) y))
>       (dotimes (x (length s1))
>         (dotimes (y (length s2))
>           (setf (aref d (1+ y) (1+ x))
>             (min (1+ (aref d y (1+ x)))
>               (1+ (aref d (1+ y) x))
>               (+ (aref d y x)
>                  (if (char= (aref s1 x) (aref s2 y))
>                      0
>                      1))))))
>       (aref d (1- height) (1- width))))
>   ;;Test the Edit distance and store raw results.
>   (setf rows (*(1- (length ls))(length ls)))
>   (setf store (make-array (list rows 4)))
>   (setf k 0)
>   (dotimes (i (length ls))
>     (dotimes (j (length ls))
>       (when (not (equal (nth i ls) (nth j ls)))
>         (setf (aref store k 0) (nth i ls))
>         (setf (aref store k 1) (nth j ls) )
>         (setf (aref store k 2) (distance (nth i ls)(nth j ls)))
>         (incf k)
>         )))
>   (dotimes (i (array-dimension store 0))
>     (dotimes (j (length
> ls))
>       (if (eq (aref store i 0) (nth j ls)) (setf (aref store i 3) (nth
> j counter)))
>       ))
>   ;;Extra Rule: The last 3 digits must be equal.
>   (dotimes (i (array-dimension store 0))
>     (if (AND (not (equal (loop for n from 1 to 3 collect (digit-char-p
> (char (aref store i 0) (- (length (aref store i 0)) n))))
>                     (loop for n from 1 to 3 collect (digit-char-p
> (char (aref store i 1) (- (length (aref store i 1)) n))))))
>           (< (aref store i 2) 2))
>         (setf (aref store i 2) 20)))
>   ;;conditional printout of the intermediate array store after
> applying rule.
>   (when (equal intres 'true)
>     (setf out-stream (open "storeint.txt" :direction :output))
>     (print store out-stream)
>     (close out-stream))
>   ;;Map (redefine) similar strings to just one
>   (dotimes (j (array-dimension store 0))
>     (if (< (aref store j 2) 2)
>         (dotimes (i (array-dimension store 0))
>           (if (AND (equal (aref store i 0) (aref store j 1)) (< (aref
> store i 2) 2))
>               (setf (aref store i 0) (aref store j 0))))))
>   ;;The position helper function
>   (defun position-of-min (list ordering)
>     (let ((min-index (length ordering))
>           (min-value nil)
>           (position -1)
>           (i 0))
>       (dolist (x list (values position min-value))
>         (let ((pos (position x ordering)))
>           (when (and pos (<= pos min-index))
>             (setf position i min-index pos min-value x)))
>         (incf i))))
>   ;;Grouped into a list.
>   (setf chunck (/ (array-dimension store 0) (length ls)))
>   (setf chorder (loop for i from 0 to chunck collect i))
>   (setf jj 0)
>   (setf n 0)
>   (setf arr (make-array (list (length ls) 2)))
>   (while (< jj rows)
>     (setf place (loop for i from jj below (+ jj chunck) collect (aref
> store i 2)))
>     (setf (aref arr n 0) (aref store (+ jj (position-of-min place
> chorder)) 0))
>     (setf (aref arr n 1)  (aref store (+ jj (position-of-min place
> chorder)) 3))
>     (setf n (1+ n))
>     (setf jj (+ jj chunck)))
>   ;;Store the arr array in a list.
>   ;;Reset ls and Counter first.
>   (setf ls nil)
>   (setf counter nil)
>   (dotimes (i (array-dimension arr 0))
>     (setf ls (append ls (list (aref arr i 0))))
>     (setf counter (append counter (list (aref arr i 1))))
>     )
>   ;;Tabulate like the pivot table.
>   (setf temp (make-array (list (length ls)) :initial-element '0))
>   (dotimes (i (length ls))
>     (setf j (1+ i))
>     (if (not (equal (nth i ls) 'zzz)) (setf (aref temp i) (+ (aref
> temp i)(nth i counter))))
>     (while (< j (length ls))
>       (if (AND (equal (nth i ls) (nth j ls)) (not (equal (nth j ls)
> 'zzz)))
>           (AND (setf (aref temp i) (+ (aref temp i) (nth j counter)))
>             (setf (nth j ls) 'zzz) (setf (nth j counter) 'zzz)))
>       (incf j)))
>   ;;Remove the indicators.
>   (setf counter (coerce temp 'list))
>   (delete 'zzz ls)
>   (delete 0 counter)
>   ;;Print output the lists of grouped strings and counts.
>   (setf out-stream (open "output.txt" :direction :output))
>   (dotimes (n (length ls))
>     (print (list (nth n ls)(nth n counter)) out-stream))
>   (close out-stream)
>   )
> 
> Example:
> string.txt would contain:
> "string1F"
> "string1L"
> "strinf1P"
> "test2A"
> "trest2Z"
> "string2G"
> "string2T"
> 
> counter.txt would contain:
> 42
> 25
> 12
> 9
> 6
> 4
> 3
> (GROUPMODIFYSTRINGS 'true) would output:
> ("string1" 79)
> ("test2" 15)
> ("string2" 7)

-- 
http://lispm.dyndns.org/