From: Eric Dedieu
Subject: Lucid 3.0 bug: compiled "dotimes"
Date: 
Message-ID: <7270@lifia.imag.fr>
Hello,

Another bug found in Lucid Common List 3.0.1 on sun 3
(It was a good day for me)
It seems to be the "dotimes" instruction that wants
to try our sense of humor when compiled in production mode

The sun4 version works correctly -- I was wondering why my
program didn't do the same thing on sun3, when it fell on
an floating-point-underflow-error. I examined this and found
a bug, but I'd still like to know what is exactly a
floating-point UNDERFLOW error...

Full info about what I found:

> ;;; float vector constructor

(defmacro vf (&body x)
  `(make-array ,(length x) :initial-contents (list ,.x) :element-type 'float))
Vf
> ;;; A well-known distance

(defun dist1 (v1 v2 &aux (res 0.0))
  (declare (type (simple-array float (*)) v1 v2) (float res))
  (dotimes (i 2) ; originally (length v1)
    (incf res
	  (the float (abs (the float (- (the float (aref v1 i))
					(the float (aref v2 i))))))))
  res)
Dist1
>
;;; Examples

(dist1 (vf 0.0 0.0) (vf 0.0 3.0))
3.0
> (dist1 (vf 1.0 1.0) (vf 3.0 3.0))
4.0
> ;;; Compiler in production mode

(report-compiler-options)
;;;  Compiler options are:
;;;    Target.............SUN-68881
;;;    Egc........................T
;;;    Show-Optimizations.......Nil
;;;    Undef-Warnings.............T
;;;    Warnings...................T
;;;    Documentation..............T
;;;    Read-Safety..............Nil
;;;    Write-Safety.............Nil
;;;    Bounds-Check.............Nil
;;;    File-Messages..............T
;;;    Messages.................Nil
;;;    Fast-Entry...............Nil
;;;    Tail-Merge.................T
;;;    Notinline................Nil
;;;  Compiler optimizations are:
;;;    Speed......................3
;;;    Safety.....................1
;;;    Space......................0
;;;    Compilation-Speed..........0

> (compile 'dist1)
Dist1
>
;;; The starting point of my investigations...
;;; v1 & v2 obtained via my application

(dist1 v1 v2)

>>Error: A condition of type FLOATING-POINT-UNDERFLOW occurred.

DIST1:
   Required arg 0 (V1): #(0.0 0.0)
   Required arg 1 (V2): #(0.0 2.1457270408163267)
:C  0: Use de-normalized (or zero) result
:A  1: Abort to Lisp Top Level

-> :c
Use de-normalized (or zero) result
3.666E-321
>
;;; Enjoy: same examples as above

(dist1 (vf 0.0 0.0) (vf 0.0 3.0))
0.0
>
(dist1 (vf 1.0 1.0) (vf 3.0 3.0))
3.0
>
;;; The same bound to variables

(setq vv1 (vf 0.0 0.0)
      vv2 (vf 0.0 3.0))
3.0
> (dist1 vv1 vv2)
>>Error: A condition of type FLOATING-POINT-UNDERFLOW occurred.

DIST1:
   Required arg 0 (V1): #(0.0 0.0)
   Required arg 1 (V2): #(0.0 3.0)
:C  0: Use de-normalized (or zero) result
:A  1: Abort to Lisp Top Level

-> :a
Abort to Lisp Top Level
Back to Lisp Top Level

---------------------------------------------

Disassembling dist1 and other:

;;; Tata works well
;;; The code is intersting because it shows that aref 0 and 1
;;; are separated by 8 bytes (= 13 - 5)

(defun tata (v1 v2 &aux (res 0.0))
  (declare (type (simple-array float (*)) v1 v2) (float res))
  (incf res
	(the float (abs (the float (- (the float (aref v1 0))
				      (the float (aref v2 0)))))))
  (incf res
	(the float (abs (the float (- (the float (aref v1 1))
				      (the float (aref v2 1)))))))
  res)

> (disassemble 'tata)
        CMPI.W      #2, D6
        BEQ         L1
        MOVEA.L     (227,A4), A2        ; Sq-Incorrect-No-Args
        JSR         (5,A2)
L1:     MOVEA.L     (-12,A6), A0
        MOVEA.L     (17,A5), A2 ; 0.0
        FMOVE.D     (6,A2), FP1
        FMOVE.D     (5,A0), FP2
        MOVEA.L     (-16,A6), A1
        FMOVE.D     (5,A1), FP3
        FSUB.X      FP3, FP2
        FABS.X      FP2, FP3
        FADD.X      FP3, FP1
        FMOVE.D     (13,A0), FP3
        FMOVE.D     (13,A1), FP2
        FSUB.X      FP2, FP3
        FABS.X      FP3, FP2
        FADD.X      FP2, FP1
        MOVEA.L     (907,A4), A2        ; Sq-Cons-Float
        JSR         (5,A2)
        FMOVE.D     FP1, (6,A2)
        MOVE.L      A2, (4,A6)
        MOVEA.L     (-4,A6), A5
        LEA         (-8,A6), A7
        MOVEA.L     (A7), A3
        JMP         (A3)

;;; Dist1 now:
;;; The index for dotimes hasa step of 4, I guess the LSL instruction
;;; is to achieve the 8 bytes seen above
;;; I think the bug is that there should not be applied to the counter
;;; (here D0) but to a copy. (see the === marks)

> (disassemble 'dist1)
        CMPI.W      #2, D6
        BEQ         L1
        MOVEA.L     (227,A4), A2        ; Sq-Incorrect-No-Args
        JSR         (5,A2)
L1:     MOVEA.L     (-12,A6), A0
        MOVEA.L     (17,A5), A2 ; 0.0
        FMOVE.D     (6,A2), FP1
        MOVEQ       #0, D0
L3:     CMPI.L      #8, D0
        BGE         L2
        LSL.L       #1, D0               ===
        FMOVE.D     (5,A0,D0.L), FP2
        MOVEA.L     (-16,A6), A1
        LSL.L       #1, D0               ===
        FMOVE.D     (5,A1,D0.L), FP3
        FSUB.X      FP3, FP2
        FABS.X      FP2, FP3
        FADD.X      FP3, FP1
        ADDQ.L      #4, D0
        BRA         L3
L2:     MOVEA.L     (907,A4), A2        ; Sq-Cons-Float
        JSR         (5,A2)
        FMOVE.D     FP1, (6,A2)
        MOVE.L      A2, (4,A6)
        MOVEA.L     (-4,A6), A5
        LEA         (-8,A6), A7
        MOVEA.L     (A7), A3
        JMP         (A3)

;;; The final test
;;; It is the same function, with i being put in variables
;;; at the critical point. This version works.

>
(defun dist1 (v1 v2 &aux (res 0.0))
  (declare (type (simple-array double-float (*)) v1 v2) (float res))
  (dotimes (i 2) 
    (incf res  
          (the float (abs (the float (- (the float (aref v1 (setq x i)))
                                        (the float (aref v2 (setq y i))))))))
    )
  res) 
Dist1
> (compile 'dist1) 
;;; Warning: Free variable X assumed to be special
;;; Warning: Free variable Y assumed to be special
Dist1

;;; Here, the LSL instruction is found twice too,
;;; but they're not applied to the counter (now D2)
;;; So the preceding code was a real bug...

> (disassemble 'dist1)
        CMPI.W      #2, D6
        BEQ         L1
        MOVEA.L     (227,A4), A2        ; Sq-Incorrect-No-Args
        JSR         (5,A2)
L1:     MOVEA.L     (-12,A6), A0
        MOVEA.L     (17,A5), A2 ; 0.0
        FMOVE.D     (6,A2), FP5
        MOVEQ       #0, D2
L3:     CMPI.L      #8, D2
        BGE         L2
        MOVEA.L     (21,A5), A1 ; X
        MOVE.L      D2, (3,A1)
        MOVE.L      D2, D1
        LSL.L       #1, D1                   ===
        FMOVE.D     (5,A0,D1.L), FP6
        FMOVE.X     FP6, FP4
        LEA         (-16,A6), A7
        MOVEA.L     (25,A5), A1 ; Y
        MOVE.L      D2, (3,A1)
        MOVE.L      D2, D0
        MOVEA.L     (-16,A6), A1
        LSL.L       #1, D0                   ===
        FMOVE.D     (5,A1,D0.L), FP6
        FMOVE.X     FP6, FP3
        LEA         (-16,A6), A7
        FMOVE.X     FP4, FP6
        FSUB.X      FP3, FP6
        FMOVE.X     FP6, FP2
        LEA         (-16,A6), A7
        FABS.X      FP2, FP6
        FMOVE.X     FP6, FP1
        LEA         (-16,A6), A7
        FMOVE.X     FP5, FP6
        FADD.X      FP1, FP6
        FMOVE.X     FP6, FP5
        LEA         (-16,A6), A7
        ADDQ.L      #4, D2
        BRA         L3
L2:     MOVEA.L     (907,A4), A2        ; Sq-Cons-Float
        JSR         (5,A2)
        FMOVE.D     FP5, (6,A2)
        MOVE.L      A2, (4,A6)
        MOVEA.L     (-4,A6), A5
        LEA         (-8,A6), A7
        MOVEA.L     (A7), A3
        JMP         (A3)
Nil

------------------------------------