From: An[z]elmus
Subject: How do I shorten or split this function
Date: 
Message-ID: <k2b8o4d6gvg8f0hd43guirto4i7fdvd210@4ax.com>
Maybe I am asking too much, I apologize.
Using structure here would make the code clearer, but lists are more
handy.
 
;;The input is a list of lists each representing an entry
;;in the catalog of a library.
;;Attempt to normalize the records so that every one is composed by
;;exactly 6 fields.
;;Where a field is missing, an "empty-field" sign is inserted
;;The final structure of a record is like this: <header> <author>
;;<title1> <title2> <title3> <publication data>.
;;In the input lists the fields <header> <title> <publication data>
;;are always present while the others may be missing.
;;The field <author> when present always starts with the second
;;name all in capital letters.
;;The field <title3> when present always start with "["  
   
(defun normalize-rec2 (lst)
  (let ((result '())
        (empty "<empty>"))
    (dolist (rec lst (reverse result))
      (cond
       ((= (length rec) 3)
        (let ((norm-rec '()))
          (setq norm-rec (cons (first rec) norm-rec))
          (setq norm-rec (cons empty norm-rec))
          (setq norm-rec (cons (second rec) norm-rec))
          (setq norm-rec (cons empty norm-rec))
          (setq norm-rec (cons empty norm-rec))
          (setq norm-rec (cons (third rec) norm-rec))
          (setq result(cons (reverse norm-rec) result))))
       ((= (length rec) 4)
        (cond
         ((check-uppercase 3 (second rec))
          (let ((norm-rec '()))
            (setq norm-rec (cons (first rec) norm-rec))
            (setq norm-rec (cons (second rec) norm-rec))
            (setq norm-rec (cons (third rec) norm-rec))
            (setq norm-rec (cons empty norm-rec))
            (setq norm-rec (cons empty norm-rec))
            (setq norm-rec (cons (fourth rec) norm-rec))
            (setq result(cons (reverse norm-rec) result))))
         (t
          (cond
           ((equal (aref (third rec) 0) #\[) 
            (let ((norm-rec '()))
              (setq norm-rec (cons (first rec) norm-rec))
              (setq norm-rec (cons empty norm-rec))
              (setq norm-rec (cons (second rec) norm-rec))
              (setq norm-rec (cons empty norm-rec))
              (setq norm-rec (cons (third rec) norm-rec))
              (setq norm-rec (cons (fourth rec) norm-rec))
              (setq result (cons (reverse norm-rec) result))))
           (t
            (let ((norm-rec '()))
              (setq norm-rec (cons (first rec) norm-rec))
              (setq norm-rec (cons empty norm-rec))
              (setq norm-rec (cons (second rec) norm-rec))
              (setq norm-rec (cons (third rec) norm-rec))
              (setq norm-rec (cons empty norm-rec))            
              (setq norm-rec (cons (fourth rec) norm-rec))
              (setq result (cons (reverse norm-rec) result))))))))
       ((= (length rec) 5)
        (let ((norm-rec '()))
          (setq norm-rec (cons (first rec) norm-rec))
          (setq norm-rec (cons (second rec) norm-rec))
          (setq norm-rec (cons (third rec) norm-rec))
          (cond
           ((equal (aref (fourth rec) 0) #\[)
            (setq norm-rec (cons empty norm-rec))
            (setq norm-rec (cons (fourth rec) norm-rec)))
           (t
            (setq norm-rec (cons (fourth rec) norm-rec))
            (setq norm-rec (cons empty norm-rec))))
          (setq norm-rec (cons (fifth rec) norm-rec))
          (setq result (cons (reverse norm-rec) result))))
       (t (setq result (cons rec result)))))))   


;;check if n charatacters at the beginning of a string are all upper
;;case
(defun check-uppercase (n str)
  (dotimes (x n t)
    (if (not (upper-case-p (aref str x)))
      (return))))

From: Marco Antoniotti
Subject: Re: How do I shorten or split this function
Date: 
Message-ID: <8c2ab61e-9d78-42a9-be1f-e2e03a07df48@r36g2000prf.googlegroups.com>
On Jan 31, 12:14 pm, "An[z]elmus" <·······@somewhere.org> wrote:

Untested...

Cheers
Marco


(defun check-uppercase (n str)
  (loop for i below n always (upper-case-p (char str i)))


(defstruct (rec (:type list)
                (:constructor make-rec (header
                                        author
                                        title1
                                        title2
                                        title3
                                        publication-data)))
  header
  author
  title1
  title2
  title3
  publication-data)


(defun normalize-rec2 (recs-list)
  (loop with empty = "<empty>"
        for rec in recs-list
        collect (case (length rec)
                  (3 (make-rec (first rec) empty (second rec) empty
empty (third rec)))
                  (4 (cond ((check-uppercase 3 (second rec))
                            (make-rec (first rec) (second rec) (third
rec) empty empty (fourth rec)))
                           ((char= (char (third rec) 0) #\[)
                            (make-rec (first rec) empty (second rec)
empty (third rec) (fourth rec)))
                           (t
                            (make-rec (first rec) empty (second rec)
(third rec) empty (fourth rec)))
                           ))
                  (5 (let ((fourth-starts-with-[(char= (char (fourth
rec) 0) #\[)))
                       (make-rec (first rec)
                                 (second rec)
                                 (third rec)
                                 (if fourth-starts-with-[
                                     empty
                                     (fourth rec))
                                 (if (not fourth-starts-with-[)
                                     (fourth rec)
                                     empty)
                                 (fifth rec))))
                  (t (apply 'make-rec rec)))))



> Maybe I am asking too much, I apologize.
> Using structure here would make the code clearer, but lists are more
> handy.
>
> ;;The input is a list of lists each representing an entry
> ;;in the catalog of a library.
> ;;Attempt to normalize the records so that every one is composed by
> ;;exactly 6 fields.
> ;;Where a field is missing, an "empty-field" sign is inserted
> ;;The final structure of a record is like this: <header> <author>
> ;;<title1> <title2> <title3> <publication data>.
> ;;In the input lists the fields <header> <title> <publication data>
> ;;are always present while the others may be missing.
> ;;The field <author> when present always starts with the second
> ;;name all in capital letters.
> ;;The field <title3> when present always start with "["  
>
> (defun normalize-rec2 (lst)
>   (let ((result '())
>         (empty "<empty>"))
>     (dolist (rec lst (reverse result))
>       (cond
>        ((= (length rec) 3)
>         (let ((norm-rec '()))
>           (setq norm-rec (cons (first rec) norm-rec))
>           (setq norm-rec (cons empty norm-rec))
>           (setq norm-rec (cons (second rec) norm-rec))
>           (setq norm-rec (cons empty norm-rec))
>           (setq norm-rec (cons empty norm-rec))
>           (setq norm-rec (cons (third rec) norm-rec))
>           (setq result(cons (reverse norm-rec) result))))
>        ((= (length rec) 4)
>         (cond
>          ((check-uppercase 3 (second rec))
>           (let ((norm-rec '()))
>             (setq norm-rec (cons (first rec) norm-rec))
>             (setq norm-rec (cons (second rec) norm-rec))
>             (setq norm-rec (cons (third rec) norm-rec))
>             (setq norm-rec (cons empty norm-rec))
>             (setq norm-rec (cons empty norm-rec))
>             (setq norm-rec (cons (fourth rec) norm-rec))
>             (setq result(cons (reverse norm-rec) result))))
>          (t
>           (cond
>            ((equal (aref (third rec) 0) #\[)
>             (let ((norm-rec '()))
>               (setq norm-rec (cons (first rec) norm-rec))
>               (setq norm-rec (cons empty norm-rec))
>               (setq norm-rec (cons (second rec) norm-rec))
>               (setq norm-rec (cons empty norm-rec))
>               (setq norm-rec (cons (third rec) norm-rec))
>               (setq norm-rec (cons (fourth rec) norm-rec))
>               (setq result (cons (reverse norm-rec) result))))
>            (t
>             (let ((norm-rec '()))
>               (setq norm-rec (cons (first rec) norm-rec))
>               (setq norm-rec (cons empty norm-rec))
>               (setq norm-rec (cons (second rec) norm-rec))
>               (setq norm-rec (cons (third rec) norm-rec))
>               (setq norm-rec (cons empty norm-rec))            
>               (setq norm-rec (cons (fourth rec) norm-rec))
>               (setq result (cons (reverse norm-rec) result))))))))
>        ((= (length rec) 5)
>         (let ((norm-rec '()))
>           (setq norm-rec (cons (first rec) norm-rec))
>           (setq norm-rec (cons (second rec) norm-rec))
>           (setq norm-rec (cons (third rec) norm-rec))
>           (cond
>            ((equal (aref (fourth rec) 0) #\[)
>             (setq norm-rec (cons empty norm-rec))
>             (setq norm-rec (cons (fourth rec) norm-rec)))
>            (t
>             (setq norm-rec (cons (fourth rec) norm-rec))
>             (setq norm-rec (cons empty norm-rec))))
>           (setq norm-rec (cons (fifth rec) norm-rec))
>           (setq result (cons (reverse norm-rec) result))))
>        (t (setq result (cons rec result)))))))  
>
> ;;check if n charatacters at the beginning of a string are all upper
> ;;case
> (defun check-uppercase (n str)
>   (dotimes (x n t)
>     (if (not (upper-case-p (aref str x)))
>       (return))))
From: Marco Antoniotti
Subject: Re: How do I shorten or split this function
Date: 
Message-ID: <d29729b7-1f74-4c38-9486-825aba5bbd0a@n2g2000vbl.googlegroups.com>
On Jan 31, 12:14 pm, "An[z]elmus" <·······@somewhere.org> wrote:
> Maybe I am asking too much, I apologize.
> Using structure here would make the code clearer, but lists are more
> handy.
>
> ;;The input is a list of lists each representing an entry
> ;;in the catalog of a library.
> ;;Attempt to normalize the records so that every one is composed by
> ;;exactly 6 fields.
> ;;Where a field is missing, an "empty-field" sign is inserted
> ;;The final structure of a record is like this: <header> <author>
> ;;<title1> <title2> <title3> <publication data>.
> ;;In the input lists the fields <header> <title> <publication data>
> ;;are always present while the others may be missing.
> ;;The field <author> when present always starts with the second
> ;;name all in capital letters.
> ;;The field <title3> when present always start with "["  
>
> (defun normalize-rec2 (lst)
>   (let ((result '())
>         (empty "<empty>"))
>     (dolist (rec lst (reverse result))
>       (cond
>        ((= (length rec) 3)
>         (let ((norm-rec '()))
>           (setq norm-rec (cons (first rec) norm-rec))
>           (setq norm-rec (cons empty norm-rec))
>           (setq norm-rec (cons (second rec) norm-rec))
>           (setq norm-rec (cons empty norm-rec))
>           (setq norm-rec (cons empty norm-rec))
>           (setq norm-rec (cons (third rec) norm-rec))
>           (setq result(cons (reverse norm-rec) result))))
>        ((= (length rec) 4)
>         (cond
>          ((check-uppercase 3 (second rec))
>           (let ((norm-rec '()))
>             (setq norm-rec (cons (first rec) norm-rec))
>             (setq norm-rec (cons (second rec) norm-rec))
>             (setq norm-rec (cons (third rec) norm-rec))
>             (setq norm-rec (cons empty norm-rec))
>             (setq norm-rec (cons empty norm-rec))
>             (setq norm-rec (cons (fourth rec) norm-rec))
>             (setq result(cons (reverse norm-rec) result))))
>          (t
>           (cond
>            ((equal (aref (third rec) 0) #\[)
>             (let ((norm-rec '()))
>               (setq norm-rec (cons (first rec) norm-rec))
>               (setq norm-rec (cons empty norm-rec))
>               (setq norm-rec (cons (second rec) norm-rec))
>               (setq norm-rec (cons empty norm-rec))
>               (setq norm-rec (cons (third rec) norm-rec))
>               (setq norm-rec (cons (fourth rec) norm-rec))
>               (setq result (cons (reverse norm-rec) result))))
>            (t
>             (let ((norm-rec '()))
>               (setq norm-rec (cons (first rec) norm-rec))
>               (setq norm-rec (cons empty norm-rec))
>               (setq norm-rec (cons (second rec) norm-rec))
>               (setq norm-rec (cons (third rec) norm-rec))
>               (setq norm-rec (cons empty norm-rec))            
>               (setq norm-rec (cons (fourth rec) norm-rec))
>               (setq result (cons (reverse norm-rec) result))))))))
>        ((= (length rec) 5)
>         (let ((norm-rec '()))
>           (setq norm-rec (cons (first rec) norm-rec))
>           (setq norm-rec (cons (second rec) norm-rec))
>           (setq norm-rec (cons (third rec) norm-rec))
>           (cond
>            ((equal (aref (fourth rec) 0) #\[)
>             (setq norm-rec (cons empty norm-rec))
>             (setq norm-rec (cons (fourth rec) norm-rec)))
>            (t
>             (setq norm-rec (cons (fourth rec) norm-rec))
>             (setq norm-rec (cons empty norm-rec))))
>           (setq norm-rec (cons (fifth rec) norm-rec))
>           (setq result (cons (reverse norm-rec) result))))
>        (t (setq result (cons rec result)))))))  
>
> ;;check if n charatacters at the beginning of a string are all upper
> ;;case
> (defun check-uppercase (n str)
>   (dotimes (x n t)
>     (if (not (upper-case-p (aref str x)))
>       (return))))

Of course... a Ruby version will appear shortly.

Cheers
--
Marco
From: William James
Subject: Re: How do I shorten or split this function
Date: 
Message-ID: <gm262d028cq@enews4.newsguy.com>
Marco Antoniotti wrote:

> On Jan 31, 12:14�pm, "An[z]elmus" <·······@somewhere.org> wrote:
> > Maybe I am asking too much, I apologize.
> > Using structure here would make the code clearer, but lists are more
> > handy.
> > 
> > ;;The input is a list of lists each representing an entry
> > ;;in the catalog of a library.
> > ;;Attempt to normalize the records so that every one is composed by
> > ;;exactly 6 fields.
> > ;;Where a field is missing, an "empty-field" sign is inserted
> > ;;The final structure of a record is like this: <header> <author>
> > ;;<title1> <title2> <title3> <publication data>.
> > ;;In the input lists the fields <header> <title> <publication data>
> > ;;are always present while the others may be missing.
> > ;;The field <author> when present always starts with the second
> > ;;name all in capital letters.
> > ;;The field <title3> when present always start with "[" �
> > 
> > (defun normalize-rec2 (lst)
> > � (let ((result '())
> > � � � � (empty "<empty>"))
> > � � (dolist (rec lst (reverse result))
> > � � � (cond
> > � � � �((= (length rec) 3)
> > � � � � (let ((norm-rec '()))
> > � � � � � (setq norm-rec (cons (first rec) norm-rec))
> > � � � � � (setq norm-rec (cons empty norm-rec))
> > � � � � � (setq norm-rec (cons (second rec) norm-rec))
> > � � � � � (setq norm-rec (cons empty norm-rec))
> > � � � � � (setq norm-rec (cons empty norm-rec))
> > � � � � � (setq norm-rec (cons (third rec) norm-rec))
> > � � � � � (setq result(cons (reverse norm-rec) result))))
> > � � � �((= (length rec) 4)
> > � � � � (cond
> > � � � � �((check-uppercase 3 (second rec))
> > � � � � � (let ((norm-rec '()))
> > � � � � � � (setq norm-rec (cons (first rec) norm-rec))
> > � � � � � � (setq norm-rec (cons (second rec) norm-rec))
> > � � � � � � (setq norm-rec (cons (third rec) norm-rec))
> > � � � � � � (setq norm-rec (cons empty norm-rec))
> > � � � � � � (setq norm-rec (cons empty norm-rec))
> > � � � � � � (setq norm-rec (cons (fourth rec) norm-rec))
> > � � � � � � (setq result(cons (reverse norm-rec) result))))
> > � � � � �(t
> > � � � � � (cond
> > � � � � � �((equal (aref (third rec) 0) #\[)
> > � � � � � � (let ((norm-rec '()))
> > � � � � � � � (setq norm-rec (cons (first rec) norm-rec))
> > � � � � � � � (setq norm-rec (cons empty norm-rec))
> > � � � � � � � (setq norm-rec (cons (second rec) norm-rec))
> > � � � � � � � (setq norm-rec (cons empty norm-rec))
> > � � � � � � � (setq norm-rec (cons (third rec) norm-rec))
> > � � � � � � � (setq norm-rec (cons (fourth rec) norm-rec))
> > � � � � � � � (setq result (cons (reverse norm-rec) result))))
> > � � � � � �(t
> > � � � � � � (let ((norm-rec '()))
> > � � � � � � � (setq norm-rec (cons (first rec) norm-rec))
> > � � � � � � � (setq norm-rec (cons empty norm-rec))
> > � � � � � � � (setq norm-rec (cons (second rec) norm-rec))
> > � � � � � � � (setq norm-rec (cons (third rec) norm-rec))
> > � � � � � � � (setq norm-rec (cons empty norm-rec)) � � � � � �
> > � � � � � � � (setq norm-rec (cons (fourth rec) norm-rec))
> > � � � � � � � (setq result (cons (reverse norm-rec) result))))))))
> > � � � �((= (length rec) 5)
> > � � � � (let ((norm-rec '()))
> > � � � � � (setq norm-rec (cons (first rec) norm-rec))
> > � � � � � (setq norm-rec (cons (second rec) norm-rec))
> > � � � � � (setq norm-rec (cons (third rec) norm-rec))
> > � � � � � (cond
> > � � � � � �((equal (aref (fourth rec) 0) #\[)
> > � � � � � � (setq norm-rec (cons empty norm-rec))
> > � � � � � � (setq norm-rec (cons (fourth rec) norm-rec)))
> > � � � � � �(t
> > � � � � � � (setq norm-rec (cons (fourth rec) norm-rec))
> > � � � � � � (setq norm-rec (cons empty norm-rec))))
> > � � � � � (setq norm-rec (cons (fifth rec) norm-rec))
> > � � � � � (setq result (cons (reverse norm-rec) result))))
> > � � � �(t (setq result (cons rec result))))))) �
> > 
> > ;;check if n charatacters at the beginning of a string are all upper
> > ;;case
> > (defun check-uppercase (n str)
> > � (dotimes (x n t)
> > � � (if (not (upper-case-p (aref str x)))
> > � � � (return))))
> 
> Of course... a Ruby version will appear shortly.

I think you're right.

Ruby:


def normalize list
  return list  if list.size == 6
  empty = "<empty>"
  if 3 == list.size
    h, t, p = list
    return [h, empty, t, empty, empty, p]
  end
  norm_rec = [ list.first, empty, empty, empty, empty, list.last ]
  i = 2
  list[1...-1].each{|item|
    first_word = item[ /^[a-z][ a-z-]*/i ]
    if first_word and first_word == first_word.upcase
      norm_rec[1] = item
    elsif item[0,1] == "["
      norm_rec[4] = item
    else
      norm_rec[i] = item
      i += 1
    end
  }
  norm_rec
end


# Test.
[
  %w(header title pub-data),
  %w(header title title2 pub-data),
  %w(header author title title2 title3 pub-data),
  %w(header BROWN,Tom title pub-data),
  %w(header title BROWN,Tom title2 pub-data),
  %w(header title [title3] pub-data),

  "403 Ha 104
  DE SEIDLITZ, W.
  Les estampes japonaises
  PARIS: Librairie Hachette, 1911 - p. 271".split(/\s*\n\s*/),

  "403 Ha 105
  FAHR-BECKER, Gabriele
  Japanese prints
  KOELN: Taschen, 2007 - p. 200".split(/\s*\n\s*/)

].each{|x|
  p normalize(x)
}
  

--- output ---
["header", "<empty>", "title", "<empty>", "<empty>", "pub-data"]
["header", "<empty>", "title", "title2", "<empty>", "pub-data"]
["header", "author", "title", "title2", "title3", "pub-data"]
["header", "BROWN,Tom", "title", "<empty>", "<empty>", "pub-data"]
["header", "BROWN,Tom", "title", "title2", "<empty>", "pub-data"]
["header", "<empty>", "title", "<empty>", "[title3]", "pub-data"]
["403 Ha 104", "DE SEIDLITZ, W.", "Les estampes japonaises", "<empty>",
"<empty>", "PARIS: Librairie Hachette, 1911 - p. 271"]
["403 Ha 105", "FAHR-BECKER, Gabriele", "Japanese prints", "<empty>",
"<empty>", "KOELN: Taschen, 2007 - p. 200"]
From: Adam "Dodek" Michalik
Subject: Re: How do I shorten or split this function
Date: 
Message-ID: <4dfa4c76-4a85-4cad-8a92-4e0422ae234c@x38g2000yqj.googlegroups.com>
On 31 Sty, 19:42, "William James" <·········@yahoo.com> wrote:
> I think you're right.
>
> Ruby:

What I really don't get about this group is why on every topic some
guy shows Ruby solution to problem, without even being asked. It's
like going to car mechanic, who constantly keeps saying "I don't like
your Smart, so I'll fix it the way I would fix my Hummer".
From: Raymond Wiker
Subject: Re: How do I shorten or split this function
Date: 
Message-ID: <m2wsca59s1.fsf@RAWMBP.local>
"Adam \"Dodek\" Michalik" <·······@gmail.com> writes:

> On 31 Sty, 19:42, "William James" <·········@yahoo.com> wrote:
>> I think you're right.
>>
>> Ruby:
>
> What I really don't get about this group is why on every topic some
> guy shows Ruby solution to problem, without even being asked. It's
> like going to car mechanic, who constantly keeps saying "I don't like
> your Smart, so I'll fix it the way I would fix my Hummer".

	Dogs have fleas, sharks have remora, comp.lang.lisp has the
Ruby guy, the Mathematica guy and the F# guy. The R, M and F guys can
safely be ignored (killfiled), as they contribute just as little to
cll as fleas and remora contribute to the glory of dogs and sharks.
From: Kenneth Tilton
Subject: Re: How do I shorten or split this function
Date: 
Message-ID: <49860ff5$0$18010$607ed4bc@cv.net>
Raymond Wiker wrote:
> "Adam \"Dodek\" Michalik" <·······@gmail.com> writes:
> 
>> On 31 Sty, 19:42, "William James" <·········@yahoo.com> wrote:
>>> I think you're right.
>>>
>>> Ruby:
>> What I really don't get about this group is why on every topic some
>> guy shows Ruby solution to problem, without even being asked. It's
>> like going to car mechanic, who constantly keeps saying "I don't like
>> your Smart, so I'll fix it the way I would fix my Hummer".
> 
> 	Dogs have fleas, sharks have remora, comp.lang.lisp has the
> Ruby guy, the Mathematica guy and the F# guy. The R, M and F guys can
> safely be ignored (killfiled), as they contribute just as little to
> cll as fleas and remora contribute to the glory of dogs and sharks.

What is the sound of no trolls posting?

You just listed the three most popular c.l.l denizens based on responses 
thereto. I suppose you could just read the responses thereto. Me, I like 
the studies in social ineptitude personified by Rubytwit and Frogleg.

Especially Rubytwit. He uses a toy language compared to CL, spends all 
his time on c.l.l, and is learning nothing. Kinda the poster boy for 
"Your cup is full." Has he ever disclosed his purpose in posting? 
Educational? The Joy of Fighting? Other?

curiously,kth
From: GP lisper
Subject: Re: How do I shorten or split this function
Date: 
Message-ID: <slrngod9bk.kq5.spambait@phoenix.clouddancer.com>
On Sun, 01 Feb 2009 16:11:24 -0500, <·········@gmail.com> wrote:
>
> Especially Rubytwit. He uses a toy language compared to CL, spends all 
> his time on c.l.l, and is learning nothing. Kinda the poster boy for 
> "Your cup is full." Has he ever disclosed his purpose in posting? 
> Educational? The Joy of Fighting? Other?

Response post count.  Perhaps global killfile ranking.

But most of the trolls are not a single person, some post under
multiple names-du-jour.  Some of the giveaways are a 'help me' post
with an OP followup straight into some worn out argument.  The good
side is that many of the responses to the troll of the moment have
been rather informative more times than I expected.

-- 
Lisp : 'My God, it's full of cars!'
From: TomSW
Subject: Re: How do I shorten or split this function
Date: 
Message-ID: <1036628c-beca-4a06-b2eb-d7f1dda2b263@w39g2000prb.googlegroups.com>
On Jan 31, 12:14 pm, "An[z]elmus" <·······@somewhere.org> wrote:

> ;;The input is a list of lists each representing an entry
> ;;in the catalog of a library.
> ;;Attempt to normalize the records so that every one is composed by
> ;;exactly 6 fields.

Is the input produced by parsing a string or list of strings? If it
is, you could almost certainly write a regular expression to handle
the various cases.
From: An[z]elmus
Subject: Re: How do I shorten or split this function
Date: 
Message-ID: <4749o41618lkdcoe779t7829ehs9emg1vv@4ax.com>
On Sat, 31 Jan 2009 08:28:51 -0800 (PST), TomSW
<·············@gmail.com> wrote:
>> ;;The input is a list of lists each representing an entry
>> ;;in the catalog of a library.
>> ;;Attempt to normalize the records so that every one is composed by
>> ;;exactly 6 fields.
>
>Is the input produced by parsing a string or list of strings? If it
>is, you could almost certainly write a regular expression to handle
>the various cases.

The input list is produced by reading text files where each "card" is
(should be) separated by a blank line. In the "cards" each line
(should) represent a different field.

......................................................,
403 Ha 104
DE SEIDLITZ, W.
Les estampes japonaises
PARIS: Librairie Hachette, 1911 � p. 271

403 Ha 105
FAHR-BECKER, Gabriele
Japanese prints
KOELN: Taschen, 2007 � p. 200
.....................................................,

Unfortunetly the structure of the cards is not  entirely consistent
throughout the files.
From: Thomas A. Russ
Subject: Re: How do I shorten or split this function
Date: 
Message-ID: <ymifxiwogha.fsf@blackcat.isi.edu>
"An[z]elmus" <·······@somewhere.org> writes:


> 403 Ha 104
> DE SEIDLITZ, W.
> Les estampes japonaises
> PARIS: Librairie Hachette, 1911 � p. 271

Um, doesn't this record violate the rules for detecting author names?

  (upper-case-p #\Space)  ==>  NIL

Perhaps you need a more sophisticated test?  Maybe something like

 (notany #'lower-case-p (subseq string 0 (position #\, string)))

This is, of course, where having a nice AUTHOR-FIELD-P function comes in
handy, since you only need to improve that one function to make sure
that every invocation gets fixed.

-- 
Thomas A. Russ,  USC/Information Sciences Institute
From: An[z]elmus
Subject: Re: How do I shorten or split this function
Date: 
Message-ID: <34ieo4dlmghhv4fmlotm2fo6apdtkh5sji@4ax.com>
On 02 Feb 2009 10:12:33 -0800, ···@sevak.isi.edu (Thomas A. Russ)
wrote:

>> 403 Ha 104
>> DE SEIDLITZ, W.
>> Les estampes japonaises
>> PARIS: Librairie Hachette, 1911 � p. 271
>
>Um, doesn't this record violate the rules for detecting author names?
>
>  (upper-case-p #\Space)  ==>  NIL
>

Right, see how carefull I choosed the example?

>Perhaps you need a more sophisticated test?  Maybe something like
>
> (notany #'lower-case-p (subseq string 0 (position #\, string)))
>
>This is, of course, where having a nice AUTHOR-FIELD-P function comes in
>handy, since you only need to improve that one function to make sure
>that every invocation gets fixed.

Indeed.
From: Pascal J. Bourguignon
Subject: Re: How do I shorten or split this function
Date: 
Message-ID: <87y6wru10c.fsf@galatea.local>
"An[z]elmus" <·······@somewhere.org> writes:

> Maybe I am asking too much, I apologize.
> Using structure here would make the code clearer, but lists are more
> handy.

Then why are you using cons cells?  Use LIST!
  
> ;;The input is a list of lists each representing an entry
> ;;in the catalog of a library.
> ;;Attempt to normalize the records so that every one is composed by
> ;;exactly 6 fields.
> ;;Where a field is missing, an "empty-field" sign is inserted
> ;;The final structure of a record is like this: <header> <author>
> ;;<title1> <title2> <title3> <publication data>.
> ;;In the input lists the fields <header> <title> <publication data>
> ;;are always present while the others may be missing.
> ;;The field <author> when present always starts with the second
> ;;name all in capital letters.
> ;;The field <title3> when present always start with "["  

(defun normalize-rec2 (list-of-records)
  (let ((empty "<empty>"))
    (mapcar
     (lambda (record)
       (case (length record)
         ((3) (list (first record)  empty (second record) empty empty (third record)))
         ((4) 
          (cond
            ((check-uppercase 3 (second record))
             (list (first record) (second record) (third record) empty empty (fourth record)))
            ((equal (aref (third record) 0) #\[)
             (list (first record) empty (second record) empty (third record) (fourth record)))
            (t
             (list (first record) empty (second record) (third record) empty (fourth record)))))
         ((5)
          (if (equal (aref (fourth record) 0) #\[)
              (list (first record) (second record) (third record) empty (fourth record) (fifth record))
              (list (first record) (second record) (third record) (fourth record) empty (fifth record))))
         (otherwise record)))
     list-of-records)))


Now even if you don't like structures, it's not a reason not to use them:

(defstruct (bib (:type list)) ; butter & butter money.
  (header "<empty>")
  (author "<empty>")
  (title-1 "<empty>")
  (title-2 "<empty>")
  (title-3 "<empty>")
  (pubdate "<empty>")) 

;; Do you really need "<empty>"?  In lisp the convention is to use NIL for empty stuff.


(defun normalize-rec2 (list-of-records)
  (mapcar
   (lambda (record)
     (case (length record)
       ((3)
        (make-bib :header (first record) :title-1 (second record) :author (third record)))
       ((4)
        (make-bib :header (first record)
                  (if (check-update 3 (second record)) :author :title-1) (second record)
                  (cond
                    ((check-update 3 (second record))    :title-1)
                    ((equal (aref (third record) 0) #\[) :title-3)
                    (t                                   :title-2)) (third record)
                  :pubdate (fourth record)))
       ((5)
        (make-bib :header  (first record)
                  :author  (second record)
                  :title-1 (third record)
                  (if (equal (aref (fourth record) 0) #\[)
                      :title-3 :title-2) (fourth record)
                  :pubdate (fifth record)))
       (otherwise record)))
   list-of-records))



> ;;check if n charatacters at the beginning of a string are all upper
> ;;case
> (defun check-uppercase (n str)
>   (dotimes (x n t)
>     (if (not (upper-case-p (aref str x)))
>       (return))))

(defun check-uppercase (n str)
    (loop :repeat n :for c :across str :always (upper-case-p c)))

Applied on a string of uppercase shorter than N, your function signals
an error, mine returns T.

-- 
__Pascal Bourguignon__
From: Barry Fishman
Subject: Re: How do I shorten or split this function
Date: 
Message-ID: <m3vdrvhy5f.fsf@barry_fishman.acm.org>
···@informatimago.com (Pascal J. Bourguignon) writes:

> "An[z]elmus" <·······@somewhere.org> writes:
>> ;;The input is a list of lists each representing an entry
>> ;;in the catalog of a library.
>> ;;Attempt to normalize the records so that every one is composed by
>> ;;exactly 6 fields.
>> ;;Where a field is missing, an "empty-field" sign is inserted
>> ;;The final structure of a record is like this: <header> <author>
>> ;;<title1> <title2> <title3> <publication data>.
>> ;;In the input lists the fields <header> <title> <publication data>
>> ;;are always present while the others may be missing.
>> ;;The field <author> when present always starts with the second
>> ;;name all in capital letters.
>> ;;The field <title3> when present always start with "["  
>
> (defun normalize-rec2 (list-of-records)
>   (let ((empty "<empty>"))
>     (mapcar
>      (lambda (record)
>        (case (length record)
>          ((3) (list (first record)  empty (second record) empty empty (third record)))
>          ((4) 
>           (cond
>             ((check-uppercase 3 (second record))
>              (list (first record) (second record) (third record) empty empty (fourth record)))
>             ((equal (aref (third record) 0) #\[)
>              (list (first record) empty (second record) empty (third record) (fourth record)))
>             (t
>              (list (first record) empty (second record) (third record) empty (fourth record)))))
>          ((5)
>           (if (equal (aref (fourth record) 0) #\[)
>               (list (first record) (second record) (third record) empty (fourth record) (fifth record))
>               (list (first record) (second record) (third record) (fourth record) empty (fifth record))))
>          (otherwise record)))
>      list-of-records)))
>
>...
> (defun check-uppercase (n str)
>     (loop :repeat n :for c :across str :always (upper-case-p c)))
>
> Applied on a string of uppercase shorter than N, your function signals
> an error, mine returns T.

My code was similar to Pascal's, but I think readability is helped by
breaking out functions with domain specific names.  I would generally
use nil for the empty value but, I made it a parameter so the decision
is at least localized.  It also help if your tests need to be
redone.  For example you find an author named "KY, John".

(defparameter *empty* "<empty>")

(defun author-p (string)
  "Check for author name (last name first and in uppercase)"
  (every #'upper-case-p (subseq string 0 3)))

(defun title3-p (string)
  "Check for third title (begins with bracket)"
  (char= (char string 0) #\[))

Then a seperate function to process each entry.

(defun normalize-entry (entry)
  "Normalize a catalog entry"
  (case (length entry)
    (3 (list (first entry) *empty* (second entry)
             *empty* *empty* (third entry)))
    (4 (if (author-p (second entry))
           (list (first entry) (second entry) (third entry)
                 *empty* *empty* (fourth entry))
           (if (title3-p (third entry))
               (list (first entry) *empty* (second entry)
                     *empty* (third entry) (fourth entry))
               (list (first entry) *empty* (second entry)
                     (third entry) *empty* (fourth entry)))))
    (5 (if (title3-p (fourth entry))
           (list (first entry) (second entry) (third entry)
                 *empty* (fourth entry) (fifth entry))
           (list (first entry) (second entry) (third entry)
                 (fourth entry) *empty* (fifth entry))))
    (6 entry)
    (t (format t "Bad entry: ~s~%" entry) entry)))

(defun normalize-rec (list)
  "Normalize the catalog"
  (mapcar #'normalize-entry list))

I agree that using a structure or a class for the entries is clearer in
the long run, but adds to the amount of code.

One can always later change to:

(defun normalize-rec2 (list)
  "Normalize the catalog as bib instances"
  (mapcar (lambda (entry)
            (apply #'make-bib-entry (normalize-entry entry)))
          list))

-- 
Barry Fishman
From: An[z]elmus
Subject: Re: How do I shorten or split this function
Date: 
Message-ID: <o75bo41c6o9nt97irmna4fi2lp3a3igrdh@4ax.com>
On Sat, 31 Jan 2009 18:08:12 -0500, Barry Fishman
<·············@acm.org> wrote:

>I agree that using a structure or a class for the entries is clearer in
>the long run, but adds to the amount of code.

Not so much after all: the functions written by Marco and by Pascal
(the second version) both use a structure, but they are short enough
and fit very comfortably in my sceen whithout having to scroll to see
the end (which is a pain because matching parenthesis no longer works
in LW).
Your solution which improves the first one by Pascal is very clear
too.
From: Thomas A. Russ
Subject: Re: How do I shorten or split this function
Date: 
Message-ID: <ymik588ogoz.fsf@blackcat.isi.edu>
Barry Fishman <·············@acm.org> writes:

Some code snipped, but I note that it is very similar to what I had also
devised.  So that is evidence that the basic principles of code
organization in Lisp are fairly standard.  ;-)

-- 
Thomas A. Russ,  USC/Information Sciences Institute
From: An[z]elmus
Subject: Re: How do I shorten or split this function
Date: 
Message-ID: <6td9o41hopg0v34hmkmeac6me768up29cg@4ax.com>
On Sat, 31 Jan 2009 13:13:23 +0100, ···@informatimago.com (Pascal J.
Bourguignon) wrote:

>(defun normalize-rec2 (list-of-records)
....
....
>         ((5)
>          (if (equal (aref (fourth record) 0) #\[)
>              (list (first record) (second record) (third record) empty (fourth record) (fifth record))
>              (list (first record) (second record) (third record) (fourth record) empty (fifth record))))
>         (otherwise record)))
>     list-of-records)))

Looking at the solutions proposed  by you and the others who kindly
replied to my post, I realized that I did not handled properly the
entry with five elements. I noticed that theoretically the field
missing could still be <author> and not only one between <title2> and
<title3>.
If this is true (I will check tomorrow again), to modifie your code is
much simpler than to modifie mine:


(defun normalize-rec2 (list-of-records)
.....
.....
         ((5)
          (cond
           ((check-uppercase 3 (second record))            
            (if (equal (aref (fourth record) 0) #\[)
              (list (first record) (second record) (third record)
empty (fourth record) (fifth record))
              (list (first record) (second record) (third record)
(fourth record) empty (fifth record))))
           (t
            (list (first record) empty (second record) (third record)
(fourth record) (fifth record)))))            
         (otherwise record)))
     list-of-records)))

I will take my time to check, complete and test all the solutions
propsed in this thread. Thanks to every one.
From: TomSW
Subject: Re: How do I shorten or split this function
Date: 
Message-ID: <1945b2ae-7626-4431-a229-1c44df28c69e@w24g2000prd.googlegroups.com>
> Looking at the solutions proposed  by you and the others who kindly
> replied to my post, I realized that I did not handled properly the
> entry with five elements. I noticed that theoretically the field
> missing could still be <author> and not only one between <title2> and
> <title3>.
> If this is true (I will check tomorrow again), to modifie your code is
> much simpler than to modifie mine:

Why not find a way to declare the various types of entries you expect,
then create something to turn that declaration into a parser. That way
you avoid having to look through chunks of logic to figure out if
you've missed a particular combination. In your case a regular
expression would probably be sufficient, and it doesn't have to look
like perl...
From: An[z]elmus
Subject: Re: How do I shorten or split this function
Date: 
Message-ID: <df6bo4digc79rs13corrj33ssacbcqjml9@4ax.com>
On Sat, 31 Jan 2009 16:57:26 -0800 (PST), TomSW
<·············@gmail.com> wrote:

>Why not find a way to declare the various types of entries you expect,
>then create something to turn that declaration into a parser. That way
>you avoid having to look through chunks of logic to figure out if
>you've missed a particular combination. In your case a regular
>expression would probably be sufficient, and it doesn't have to look
>like perl...

Yes, it is a kind of logic quite convoluted and fragile. I'll think
about your suggestion.
From: An[z]elmus
Subject: Re: How do I shorten or split this function
Date: 
Message-ID: <8g0co4t0beqs83fgpdd8fjvl8g2vkl8glh@4ax.com>
On Sat, 31 Jan 2009 16:57:26 -0800 (PST), TomSW
<·············@gmail.com> wrote:

>Why not find a way to declare the various types of entries you expect,
>then create something to turn that declaration into a parser.

Maybe the following could be a starting point in this direction. I
still use <empty> instead of nil for better visibilty:


-----------------------------------------------
;an entry distributed in 3 lines
'(header <empty> title1 <empty> <empty> publication-data)

;an entry distributed in 4 lines
'(header author title1 <empty> <empty> publication-data)
'(header <empty> title1 title2 <empty> publication-data)
'(header <empty> title1 <empty> title3 publication-data)

;an entry distributed in 5 lines
'(header author title1 title2 <empty> publication-data)
'(header author title1 <empty> title3 publication-data)
'(header <empty> title1 title title3 publication-data)

;an entry distributed in 6 lines
'(header author title1 title2 title3 publication-data)
----------------------------------------------


To recapitulate the (now admittedly boring) "facts":

1)The minimum number of lines is 3, the maximum is 6

2)"header" "title1" and "publication-data" are always
present: "header" is always the first line;
"publication-data" is always the last; "title1" may
be the second line if "author" is not present or the
third line otherwise.

3)"author" when present always start with a second
name all in capital letters

4)"title3" when present always start with an open
square parenthesis.

I have to warn, like I did already in a previous thread,
that this is more an academic exercise because in the
reality these rules are violated in every possible
imaginable way.

If someone by chance and out of curiosity is interested,
the catalog of the library in a small zipped file is here:
http://csaeo.altervista.org/biblio.htm

Beware that the files are in MS Word format. 
From: TomSW
Subject: Re: How do I shorten or split this function
Date: 
Message-ID: <bf00df78-762f-4165-a775-0a51e415420b@g39g2000pri.googlegroups.com>
On 1 Fev, 21:23, "An[z]elmus" <·······@somewhere.org> wrote:

> >Why not find a way to declare the various types of entries you expect,
> >then create something to turn that declaration into a parser.
>
> Maybe the following could be a starting point in this direction. I
> still use <empty> instead of nil for better visibilty:

How about:


(defvar *bib-fields*
  '((header author title1 title2 title3 pub-data)
    ;; 5 items
    (header author title1 title3 pub-data)
    (header author title1 title2 pub-data)
    (header title1 title2 title3 pub-data)
    ;; 4 items
    (header author title1 pub-data)
    (header title1 title3 pub-data)
    (header title1 title2 pub-data)
    ;; 3 items
    (header title1 pub-data))
  "A list of the possible ways fields can be distributed
in a bibliography record, in descending order of preference.")


(defvar *bib-regexes*
  '((author . "^[A-Z]+,")
    (title3 . "^\\["))
  "Map field types to the regular expression that the corresponding
field must match")


(defun field-matches-p (string field-type)
  "Predicate to test if STRING could be a field of type FIELD-TYPE"
  (let ((regex (cdr (assoc field-name *bib-regexes*))))
    (if regex
	(scan regex string)
      t)))


(defun make-bib-entry (strings pattern)
  "Create a bibliography entry from STRINGS, a list of
strings that match PATTERN"
  ;; just create an alist
  (mapcar #'cons pattern strings))


(defun read-bib-record (record)
  "Transform RECORD, a list of strings, into a
bibliography entry, or raise an error if it isn't of
the correct form."
  (let* ((record-length (length record))
	 (pattern (find-if #'(lambda (pattern)
			       (every #'field-matches-p
				      record
				      pattern))
			   (remove record-length *bib-fields* :test #'/= :key #'length))))
    (if pattern
	(make-bib-record record pattern)
      ;; nothing matched!
      (error 'unmatched-record :record record))))
From: An[z]elmus
Subject: Re: How do I shorten or split this function
Date: 
Message-ID: <apudo41kv34v3r1q861aop7q5ctn5uhuj8@4ax.com>
On Mon, 2 Feb 2009 01:46:30 -0800 (PST), TomSW
<·············@gmail.com> wrote:
>How about:
>

My rather convoluted "ermeneutics":

>(defvar *bib-fields*
>  '((header author title1 title2 title3 pub-data)
>    ;; 5 items
>    (header author title1 title3 pub-data)
>    (header author title1 title2 pub-data)
>    (header title1 title2 title3 pub-data)
>    ;; 4 items
>    (header author title1 pub-data)
>    (header title1 title3 pub-data)
>    (header title1 title2 pub-data)
>    ;; 3 items
>    (header title1 pub-data))
>  "A list of the possible ways fields can be distributed
>in a bibliography record, in descending order of preference.")

Ok, this is what you meant with "...find a way to declare the various
types of entries you expect ...". I had thought something like that my
self.

>(defvar *bib-regexes*
>  '((author . "^[A-Z]+,")
>    (title3 . "^\\["))
>  "Map field types to the regular expression that the corresponding
>field must match")

Ok

>(defun field-matches-p (string field-type)
>  "Predicate to test if STRING could be a field of type FIELD-TYPE"
>  (let ((regex (cdr (assoc field-name *bib-regexes*))))	;<<<<<<
>    (if regex
>	(scan regex string)
>      t)))

Here you are using SCAN from the CL-PPCRE package.
Instead of field-name you mean field-type.
I don't understand (yet) why the function returns true (if (not
regex))

>(defun make-bib-entry (strings pattern)
>  "Create a bibliography entry from STRINGS, a list of
>strings that match PATTERN"
>  ;; just create an alist
>  (mapcar #'cons pattern strings))
>

I will undestand more probably when I see a call to the function.

>(defun read-bib-record (record)
>  "Transform RECORD, a list of strings, into a
>bibliography entry, or raise an error if it isn't of
>the correct form."
>  (let* ((record-length (length record))
>	 (pattern (find-if #'(lambda (pattern)
>			       (every #'field-matches-p
>				      record
>				      pattern))
>			   (remove record-length *bib-fields* :test #'/= :key #'length))))
>    (if pattern
>	(make-bib-record record pattern)
>      ;; nothing matched!
>      (error 'unmatched-record :record record))))

I need first to understand what the call to REMOVE does: not so
difficult after all, it removes every member of *bib-fields* where
LENGHT /= record-length.

Then on the list of lists returned by REMOVE, FIND-IF one of them
matches the structure of record. Here is where we use FIELD-MATCHES-P,
but still I have troubles understanding how it works.
If a corrisponding pattern was found we build a record as an
associative list calling MAKE-BIB-RECORD. Otherwise an error is
raised. We don't have yet a normalized record but we are probably one
step away.
 
Back again to FIELD-MATCHES-P (and EVERY): in this case EVERY returns
true if and only if each element of record FIELD-MATCHES-P each
element of pattern. In particular I think this ensure that the fields
"detected" by mean of regular expressions <author> and <title3> are
matched in the right position inside the record.
I think I understand now how it works, but still I have some
difficulties to explain it.
From: TomSW
Subject: Re: How do I shorten or split this function
Date: 
Message-ID: <fe8c6a95-31d6-4e33-9d04-946b6c70f0ad@z6g2000pre.googlegroups.com>
On Feb 2, 5:10 pm, "An[z]elmus" <·······@somewhere.org> wrote:
> >(defun field-matches-p (string field-type)
> >  "Predicate to test if STRING could be a field of type FIELD-TYPE"
> >  (let ((regex (cdr (assoc field-name *bib-regexes*))))     ;<<<<<<
> >    (if regex
> >    (scan regex string)
> >      t)))
>
> Here you are using SCAN from the CL-PPCRE package.
> Instead of field-name you mean field-type.

oops

> I don't understand (yet) why the function returns true (if (not
> regex))

If there's no regular expression associated with the field type, then
it doesn't test the string to see if it's of the suitable form for the
field, it just assumes that it is.

> Back again to FIELD-MATCHES-P (and EVERY): in this case EVERY returns
> true if and only if each element of record FIELD-MATCHES-P each
> element of pattern.

Exactly. So you only test patterns of the same length as the input,
and you find the first one where every string in the input matches the
corresponding field of the pattern. The patterns are ordered so that
the more specific fields (author, title3) take precedence over the
less specific ones.

If you want to change the type of the records created, you just need
to change MAKE-BIB-ENTRY, similarly, you can set up various ways to
handle unmatched inputs (have a look at the section on error handling
in "Practical Common Lisp".

btw I had a look at the catalogue files & can only commiserate :)
From: An[z]elmus
Subject: Re: How do I shorten or split this function
Date: 
Message-ID: <iobeo49slbru1ehsjsqi15mtmt0g242hae@4ax.com>
On Mon, 2 Feb 2009 09:04:31 -0800 (PST), TomSW
<·············@gmail.com> wrote:

>btw I had a look at the catalogue files & can only commiserate :)

Fortunetly I have no obligation and nobody is asking me to do this.
Thanks a lot.
From: Barry Fishman
Subject: Re: How do I shorten or split this function
Date: 
Message-ID: <m34ozczqjj.fsf@barry_fishman.acm.org>
"An[z]elmus" <·······@somewhere.org> writes:
> Maybe the following could be a starting point in this direction. I
> still use <empty> instead of nil for better visibilty:
>
>
> -----------------------------------------------
> ;an entry distributed in 3 lines
> '(header <empty> title1 <empty> <empty> publication-data)
>
> ;an entry distributed in 4 lines
> '(header author title1 <empty> <empty> publication-data)
> '(header <empty> title1 title2 <empty> publication-data)
> '(header <empty> title1 <empty> title3 publication-data)
>
> ;an entry distributed in 5 lines
> '(header author title1 title2 <empty> publication-data)
> '(header author title1 <empty> title3 publication-data)
> '(header <empty> title1 title title3 publication-data)
>
> ;an entry distributed in 6 lines
> '(header author title1 title2 title3 publication-data)
> ----------------------------------------------

On can also use a recursive descent approach, looking at the strings
in sequence.


(defparameter *empty* "<empty>")

(defun author-p (string)
  "Check for author name (last name first and in uppercase)"
  (every #'upper-case-p (subseq string 0 3)))

(defun title3-p (string)
  "Check for third title (begins with bracket)"
  (char= (char string 0) #\[))

(defun normalize-entry-r (entry)
  "Normalize a catalog entry"
  (labels ((at-header (data)
             (cons (car data) (at-author (cdr data))))
           (at-author (data)
             (if (author-p (car data))
                 (cons (car data) (at-title1 (cdr data)))
                 (cons *empty* (at-title1 data))))
           (at-title1 (data)
             (cons (car data) (at-title2 (cdr data))))
           (at-title2 (data)
             (if (title3-p (car data))
                 (list* *empty* (car data) (at-pub (cdr data)))
                 (if (cdr data)
                     (cons (car data) (at-title3 (cdr data)))
                     (cons *empty* (at-title3 data)))))
           (at-title3 (data)
             (if (title3-p (car data))
                 (cons (car data) (at-pub (cdr data)))
                 (cons *empty* (at-pub data))))
           (at-pub (data)
             (list (car data))))
    (at-header entry)))

(defun normalize-rec (list)
  "Normalize the catalog"
  (mapcar #'normalize-entry-r list))

-- 
Barry Fishman
From: An[z]elmus
Subject: Re: How do I shorten or split this function
Date: 
Message-ID: <h1cgo4df2p9a9iev9br29gndhck9krrkfo@4ax.com>
On Mon, 02 Feb 2009 12:39:44 -0500, Barry Fishman
<·············@acm.org> wrote:
>(defun normalize-entry-r (entry)
>  "Normalize a catalog entry"
>  (labels ((at-header (data)
>             (cons (car data) (at-author (cdr data))))
>           (at-author (data)
>             (if (author-p (car data))
>                 (cons (car data) (at-title1 (cdr data)))
>                 (cons *empty* (at-title1 data))))
>           (at-title1 (data)
>             (cons (car data) (at-title2 (cdr data))))
>           (at-title2 (data)
>             (if (title3-p (car data))
>                 (list* *empty* (car data) (at-pub (cdr data)))
>                 (if (cdr data)
>                     (cons (car data) (at-title3 (cdr data)))
>                     (cons *empty* (at-title3 data)))))
>           (at-title3 (data)
>             (if (title3-p (car data))
>                 (cons (car data) (at-pub (cdr data)))
>                 (cons *empty* (at-pub data))))
>           (at-pub (data)
>             (list (car data))))
>    (at-header entry)))

Nice example of how to use LABELS, I think I understand how it works.
From: Rainer Joswig
Subject: Re: How do I shorten or split this function
Date: 
Message-ID: <joswig-4398CA.13463831012009@news-europe.giganews.com>
In article <··································@4ax.com>,
 "An[z]elmus" <·······@somewhere.org> wrote:

> Maybe I am asking too much, I apologize.
> Using structure here would make the code clearer, but lists are more
> handy.
>  
> ;;The input is a list of lists each representing an entry
> ;;in the catalog of a library.
> ;;Attempt to normalize the records so that every one is composed by
> ;;exactly 6 fields.
> ;;Where a field is missing, an "empty-field" sign is inserted
> ;;The final structure of a record is like this: <header> <author>
> ;;<title1> <title2> <title3> <publication data>.
> ;;In the input lists the fields <header> <title> <publication data>
> ;;are always present while the others may be missing.
> ;;The field <author> when present always starts with the second
> ;;name all in capital letters.
> ;;The field <title3> when present always start with "["  
>    
> (defun normalize-rec2 (lst)
>   (let ((result '())
>         (empty "<empty>"))
>     (dolist (rec lst (reverse result))
>       (cond
>        ((= (length rec) 3)
>         (let ((norm-rec '()))
>           (setq norm-rec (cons (first rec) norm-rec))
>           (setq norm-rec (cons empty norm-rec))
>           (setq norm-rec (cons (second rec) norm-rec))
>           (setq norm-rec (cons empty norm-rec))
>           (setq norm-rec (cons empty norm-rec))
>           (setq norm-rec (cons (third rec) norm-rec))
>           (setq result(cons (reverse norm-rec) result))))
>        ((= (length rec) 4)
>         (cond
>          ((check-uppercase 3 (second rec))
>           (let ((norm-rec '()))
>             (setq norm-rec (cons (first rec) norm-rec))
>             (setq norm-rec (cons (second rec) norm-rec))
>             (setq norm-rec (cons (third rec) norm-rec))
>             (setq norm-rec (cons empty norm-rec))
>             (setq norm-rec (cons empty norm-rec))
>             (setq norm-rec (cons (fourth rec) norm-rec))
>             (setq result(cons (reverse norm-rec) result))))
>          (t
>           (cond
>            ((equal (aref (third rec) 0) #\[) 
>             (let ((norm-rec '()))
>               (setq norm-rec (cons (first rec) norm-rec))
>               (setq norm-rec (cons empty norm-rec))
>               (setq norm-rec (cons (second rec) norm-rec))
>               (setq norm-rec (cons empty norm-rec))
>               (setq norm-rec (cons (third rec) norm-rec))
>               (setq norm-rec (cons (fourth rec) norm-rec))
>               (setq result (cons (reverse norm-rec) result))))
>            (t
>             (let ((norm-rec '()))
>               (setq norm-rec (cons (first rec) norm-rec))
>               (setq norm-rec (cons empty norm-rec))
>               (setq norm-rec (cons (second rec) norm-rec))
>               (setq norm-rec (cons (third rec) norm-rec))
>               (setq norm-rec (cons empty norm-rec))            
>               (setq norm-rec (cons (fourth rec) norm-rec))
>               (setq result (cons (reverse norm-rec) result))))))))
>        ((= (length rec) 5)
>         (let ((norm-rec '()))
>           (setq norm-rec (cons (first rec) norm-rec))
>           (setq norm-rec (cons (second rec) norm-rec))
>           (setq norm-rec (cons (third rec) norm-rec))
>           (cond
>            ((equal (aref (fourth rec) 0) #\[)
>             (setq norm-rec (cons empty norm-rec))
>             (setq norm-rec (cons (fourth rec) norm-rec)))
>            (t
>             (setq norm-rec (cons (fourth rec) norm-rec))
>             (setq norm-rec (cons empty norm-rec))))
>           (setq norm-rec (cons (fifth rec) norm-rec))
>           (setq result (cons (reverse norm-rec) result))))
>        (t (setq result (cons rec result)))))))   
> 
> 
> ;;check if n charatacters at the beginning of a string are all upper
> ;;case
> (defun check-uppercase (n str)
>   (dotimes (x n t)
>     (if (not (upper-case-p (aref str x)))
>       (return))))


An example. You need to expand on it.

; write a function that works on a list
(loop for rec in list collect (normalize-record rec))

;write a function that works on an individual record

(defun normalize-record (record)
   "normalizes publication records.
returns (header author title1 title2 title3 publication-data)"

  (let* ((empty "<empty")
         (record-length (length record))

         ; new record data
         (header           (first record))
         (author           empty)
         (title1           empty)
         (title2           empty)
         (title3           empty)
         (publication-data empty)))

  (cond ; describe first alternative
        ((= record-length 3)
        (setf title1 (second record)
              publication-data (third record)))

        ; describe second alternative
        ((and (= record-length 4)
              (check-uppercase 3 (second record)))
         (setf author (second record)
               title1 (third record)
               publication-data (sixth record)))

        ; describe third alternative
        ((and (= record-length 4)
              (equal (aref (third rec) 0) #\[))
         (setf title1 (second record)
         ; more of your code
               )))

  ; the result
  (list header author title1 title2 title3 publication-data))


  
  
Usually I would have the external data described in lists,
but the result of 'normalizing' them would be a CLOS object:

(defclass publication-record ()
  ((header :initarg :header :initform "<empty")
   (author :initarg :author :initform "<empty")
   ...
  ))

Then the code might be changed to this:


(defun normalize-record (record)
  "normalizes publication records.
returns (header author title1 title2 title3 publication-data)"

  (let ((empty "<empty")
        (record-length (length record))
        (new-record (make-instance 'publication-record)))
    (with-slots (header author title1 title2 title3 publication-data)
            new-record
      (setf header (first record))

      (cond ; describe first alternative
       ((= record-length 3)
        (setf title1 (second record)
              publication-data (third record)))

       ; describe second alternative
       ((and (= record-length 4)
             (check-uppercase 3 (second record)))
        (setf author (second record)
              title1 (third record)
              publication-data (sixth record)))

       ; describe third alternative
       ((and (= record-length 4)
             (equal (aref (third rec) 0) #\[))
        (setf title1 (second record)
         ; more of your code
              )))

      ; the result
      new-record )))

This would return a CLOS object.

-- 
http://lispm.dyndns.org/
From: An[z]elmus
Subject: Re: How do I shorten or split this function
Date: 
Message-ID: <tsebo49rhs9l96jnesjivfqin8d9ormisp@4ax.com>
On Sat, 31 Jan 2009 13:46:38 +0100, Rainer Joswig <······@lisp.de>
wrote:
>An example. You need to expand on it.
>....
>(defun normalize-record (record)
>   "normalizes publication records.
>returns (header author title1 title2 title3 publication-data)"
>
>  (let* ((empty "<empty")
>         (record-length (length record))
>
>         ; new record data
>         (header           (first record))
>         (author           empty)
>         (title1           empty)
>         (title2           empty)
>         (title3           empty)
>         (publication-data empty)))
>.....

I like the idea of a template where we have to fill the blanks.
To be exact the (unfortunetly abstract) logic of the algorithm
guarantees that also the last field is always present. So the template
could be modified as follow:

....
 (let* ((empty "<empty")
         (record-length (length record))

         ; new record data
         (header           (first record))
         (author           empty)
         (title1           empty)
         (title2           empty)
         (title3           empty)
         (publication-data (first (last record)))))	;<<<<<<
....
 
A part from that remark, which probably might even allow a different
organisation of the logic of the function, it was easy for me to
complete your skeleton using repeatedly  the operator AND for the
rather convoluted description of alternatives. The code (yet to be
tested) again is shorter and clearer than mine.  
From: Rainer Joswig
Subject: Re: How do I shorten or split this function
Date: 
Message-ID: <joswig-EF1DA8.21055101022009@news-europe.giganews.com>
In article <··································@4ax.com>,
 "An[z]elmus" <·······@somewhere.org> wrote:

> On Sat, 31 Jan 2009 13:46:38 +0100, Rainer Joswig <······@lisp.de>
> wrote:
> >An example. You need to expand on it.
> >....
> >(defun normalize-record (record)
> >   "normalizes publication records.
> >returns (header author title1 title2 title3 publication-data)"
> >
> >  (let* ((empty "<empty")
> >         (record-length (length record))
> >
> >         ; new record data
> >         (header           (first record))
> >         (author           empty)
> >         (title1           empty)
> >         (title2           empty)
> >         (title3           empty)
> >         (publication-data empty)))
> >.....
> 
> I like the idea of a template where we have to fill the blanks.
> To be exact the (unfortunetly abstract) logic of the algorithm
> guarantees that also the last field is always present. So the template
> could be modified as follow:
> 
> ....
>  (let* ((empty "<empty")
>          (record-length (length record))
> 
>          ; new record data
>          (header           (first record))
>          (author           empty)
>          (title1           empty)
>          (title2           empty)
>          (title3           empty)
>          (publication-data (first (last record)))))	;<<<<<<
> ....

Right, that's the idea. Put everything at the top which
holds for all records. For the others provide a default.

> A part from that remark, which probably might even allow a different
> organisation of the logic of the function, it was easy for me to
> complete your skeleton using repeatedly  the operator AND for the
> rather convoluted description of alternatives. The code (yet to be
> tested) again is shorter and clearer than mine.  

Sounds good.

You saw also my CLOSified version. There is also another (more
advanced) idea that you could use if the effort would be justified:

  Make it more data/description driven.

You need:

1 the basic data type(s) you want to generate.

  As a side note, the DEFSTRUCT macro also allows
  you to define accessors, make-function, etc.
  for lists (and not only for structures).

2 a basic engine for taking a record, checking what
  kind it is and applying the correspoding
  transformation

3 descriptions for the checks and transformations.

So for 3 you would have a bunch of descriptions like this:


(def-publ-transform four-element-record
   "this transformation is for records with
    the four elements: header title author publication-date"
  :length 4
  :test (some-test-here record)
  :transformation ((header first)
                   (author third)
                   (title1 second)
                   (publication-date fourth)))

The macro DEF-PUBL-TRANSFORM would generate
a test function, the corresponding transformation
function and a link between both, so that when
the test is true, it returns the transformation
function.


This can be coding overhead, but for code that needs
to be maintained over a long time, with
new record types added, it may make sense.
The idea is that the code is clearly
separated into the data types, the engine
and the driving data descriptions.
For most part of the maintenance you will
only need to add, change or remove
the transformation descriptions. Those
are easy to write.

-- 
http://lispm.dyndns.org/
From: An[z]elmus
Subject: Re: How do I shorten or split this function
Date: 
Message-ID: <48tdo4lu1dj21f9tq1d8296sro0j4nru8u@4ax.com>
On Sun, 01 Feb 2009 21:05:51 +0100, Rainer Joswig <······@lisp.de>
wrote:
>You need:
>
>1 the basic data type(s) you want to generate.
> ...
>
>2 a basic engine for taking a record, checking what
>  kind it is and applying the correspoding
>  transformation
>
>3 descriptions for the checks and transformations.
>
>So for 3 you would have a bunch of descriptions like this:
>
>
>(def-publ-transform four-element-record
>   "this transformation is for records with
>    the four elements: header title author publication-date"
>  :length 4
>  :test (some-test-here record)
>  :transformation ((header first)
>                   (author third)
>                   (title1 second)
>                   (publication-date fourth)))
>
>The macro DEF-PUBL-TRANSFORM would generate
>a test function, the corresponding transformation
>function and a link between both, so that when
>the test is true, it returns the transformation
>function.

I am afraid this is a little too advanced for me, but if you are
willing to satisfy my curiosity, please show me something more. 
From: Rainer Joswig
Subject: Re: How do I shorten or split this function
Date: 
Message-ID: <joswig-B1BA7C.15355102022009@news-europe.giganews.com>
In article <··································@4ax.com>,
 "An[z]elmus" <·······@somewhere.org> wrote:

> On Sun, 01 Feb 2009 21:05:51 +0100, Rainer Joswig <······@lisp.de>
> wrote:
> >You need:
> >
> >1 the basic data type(s) you want to generate.
> > ...
> >
> >2 a basic engine for taking a record, checking what
> >  kind it is and applying the correspoding
> >  transformation
> >
> >3 descriptions for the checks and transformations.
> >
> >So for 3 you would have a bunch of descriptions like this:
> >
> >
> >(def-publ-transform four-element-record
> >   "this transformation is for records with
> >    the four elements: header title author publication-date"
> >  :length 4
> >  :test (some-test-here record)
> >  :transformation ((header first)
> >                   (author third)
> >                   (title1 second)
> >                   (publication-date fourth)))
> >
> >The macro DEF-PUBL-TRANSFORM would generate
> >a test function, the corresponding transformation
> >function and a link between both, so that when
> >the test is true, it returns the transformation
> >function.
> 
> I am afraid this is a little too advanced for me, but if you are
> willing to satisfy my curiosity, please show me something more. 

Those who ask might get an answer. ;-)


This is a sketch. The idea is to write a small transformation engine
and let the maintainer then only maintain the transformations.
So there is a machinery part and then there is the 'domain knowledge'
enter with a nice macro.
This idea is basic to lots of AI programming.



; primitive transformation engine, once written, hopefully never touched again.

(defparameter *fields* '(header author title1 title2 title3 publication-date))

(defun make-transformation (alist)
  "the alist has entries of (field-name field-transformation-function-name).
it returns the transformation function for a single record"
  `(lambda (record)
     (list ,@(loop for field in *fields*
                   collect (let ((function (second (assoc field alist))))
                             (if function
                                 `(,function record)
                               "<empty>"))))))
 
(defun make-transformation-entry (name documentation &key length test transformation)
  "creates a list of two functions. the first function
is the test for the record and the second is the transformation function"
  (declare (ignore name documentation))
  (list (coerce `(lambda (record)
                   (and (= (length record) ,length)
                        ,test))
                'function)
        (coerce (make-transformation transformation) 'function)))

(defvar *transformations* nil)

(defun intern-transformation (name documentation &key length test transformation)
  "creates a new transformation and enters it into the list *transformations*.
if a transformation with a given name exists, then the check and transformation
function will be replaced."
  (let  ((old-transformation (assoc name *transformations*)))
    (if old-transformation
        (setf (cdr (assoc name *transformations*) )
              (make-transformation-entry name documentation
                                         :length length :test test :transformation transformation))
      (push (list name (make-transformation-entry name documentation
                                         :length length :test test :transformation transformation))
            *transformations*))))


(defmacro def-publ-transform (name documentation &key length test transformation)
  "User macro to describe a transformation.
NAME is a symbol.
DOCUMENTATION is a string.
LENGTH is an integer. the length matches against the record length.
TEST is an expression, which can access the variable named RECORD.
TRANSFORMATION is an alist of (header-name extraction-function)."
  `(intern-transformation ',name ',documentation :length ,length 
                          :test ',test
                          :transformation ',transformation))

(defun transform-record (record)
  "Transform a single record. Uses the first transformation whose test is T for this record."
  (loop for (nil test-function transformation-function) in *transformations*
        when (funcall test-function record)
        do (return-from transform-record (funcall transformation-function record))))



; the maintainer then only writes domain specific code:

(def-publ-transform four-element-record 
   "this transformation is for records with 
    the four elements: header title author publication-date, where the header starts with capital F." 
  :length 4 
  :test (char= #\F (aref (first record) 0))
  :transformation ((header first) 
                   (author third) 
                   (title1 second) 
                   (publication-date fourth)))



Let's try it. First example does not match, because "foo1" starts with a lower case f.

CL-USER 14 > (transform-record '("foo1" "foo2" "foo3" "foo4"))
NIL

CL-USER 15 > (transform-record '("Foo1" "foo2" "foo3" "foo4"))
("Foo1" "foo3" "foo2" "<empty>" "<empty>" "foo4")


The stuff is kind of lengthy, uses macros and removes the domain knowledge from the
engine. If you are comfortable with this kind of code, then Lisp is the
perfect language for you. ;-) Now you got a much more complicated solution
to your original problem. But that's the way I would write it, when there
is lots of domain knowledge to maintain frequently.

-- 
http://lispm.dyndns.org/
From: An[z]elmus
Subject: Re: How do I shorten or split this function
Date: 
Message-ID: <9k4go459u4pt5b9sp4ekmmcn0hvr2ge3l5@4ax.com>
On Mon, 02 Feb 2009 15:35:52 +0100, Rainer Joswig <······@lisp.de>
wrote:

>This is a sketch. The idea is to write a small transformation engine
>and let the maintainer then only maintain the transformations.
>So there is a machinery part and then there is the 'domain knowledge'
>enter with a nice macro.

Infact given the template I could easily write 8 "transformations"
which cover all the alternatives so far as in the other solutions, And
I got the oveall design of the enginery part.  I have to evaluate
twice every "tranformation" in LispWorks before it works, but this may
be because of the macro (I usually don't compile any of my small
chunks of code). 

>This idea is basic to lots of AI programming.

I just started a few weeks ago to read "On Lisp" by Paul Graham, then
I temporarily suspended because I found a copy of Paradigms of
Artificial Intelligence Programming by Peter Norvig. It will take some
times though before I reach the end of these two very "dense" books. 
From: William James
Subject: Re: How do I shorten or split this function
Date: 
Message-ID: <gm235l0266o@enews4.newsguy.com>
An[z]elmus wrote:

> Maybe I am asking too much, I apologize.
> Using structure here would make the code clearer, but lists are more
> handy.
>  
> ;;The input is a list of lists each representing an entry
> ;;in the catalog of a library.
> ;;Attempt to normalize the records so that every one is composed by
> ;;exactly 6 fields.
> ;;Where a field is missing, an "empty-field" sign is inserted
> ;;The final structure of a record is like this: <header> <author>
> ;;<title1> <title2> <title3> <publication data>.
> ;;In the input lists the fields <header> <title> <publication data>
> ;;are always present while the others may be missing.
> ;;The field <author> when present always starts with the second
> ;;name all in capital letters.
> ;;The field <title3> when present always start with "["  
>    

A few examples would be helpful.
From: An[z]elmus
Subject: Re: How do I shorten or split this function
Date: 
Message-ID: <c769o4dfneeqbdevdopkmmi18up29li0ju@4ax.com>
On 31 Jan 2009 17:52:53 GMT, "William James" <·········@yahoo.com>
wrote:

>A few examples would be helpful.

Here is an example of a normalized entry. 
The fields here are still one per line for clarity, but the final
result should be comma-delimited files with one record per line where
the fields are separated by a "pipe". 
 
400 MUNAKATA c 1|
<empty>|
Shiko Munakata|
<empty>|
<empty>|
TORINO: Galleria Civica d�Arte Moderna, 1960 � p.18 + tav.18

Here is the entry before beeing processed:

400 MUNAKATA c 1
Shiko Munakata
TORINO: Galleria Civica d�Arte Moderna, 1960 � p.18 + tav.18
From: Thomas A. Russ
Subject: Re: How do I shorten or split this function
Date: 
Message-ID: <ymibptkog5k.fsf@blackcat.isi.edu>
"An[z]elmus" <·······@somewhere.org> writes:

> On 31 Jan 2009 17:52:53 GMT, "William James" <·········@yahoo.com>
> wrote:
> 
> >A few examples would be helpful.
> 
> Here is an example of a normalized entry. 
> The fields here are still one per line for clarity, but the final
> result should be comma-delimited files with one record per line where
> the fields are separated by a "pipe". 
>  
> 400 MUNAKATA c 1|
> <empty>|
> Shiko Munakata|
> <empty>|
> <empty>|
> TORINO: Galleria Civica d�Arte Moderna, 1960 � p.18 + tav.18

Fortunately, that is easy to produce from a list of strings by using
just a small bit of FORMAT wizardry:

  (format nil "~{~A~^|~}~%" record)

For doing a list of records, one could either iterate over that or even
go so far as to put the entire iteration into the format statement:

(defun process-record-files (input-file-name output-file-name)
  (with-open-file (in input-file-name :direction :input)
    (with-open-file (out output-file-name :direction :output)
      (format out "~{~{~A~^|~}~%~}"
              (normalize-records (collect-records in))))))

-- 
Thomas A. Russ,  USC/Information Sciences Institute
From: Thomas A. Russ
Subject: Re: How do I shorten or split this function
Date: 
Message-ID: <ymiocxkogss.fsf@blackcat.isi.edu>
"An[z]elmus" <·······@somewhere.org> writes:

> Maybe I am asking too much, I apologize.
> Using structure here would make the code clearer, but lists are more
> handy.

Well, in addition to the nice solutions posted by others, I will just
point up some more lisp-like ways to structure the code that you have
written.  This is mainly to help you see more of the lisp style, since I
think that the nice declarative approach suggested by Tom Weissman is a
much nicer solution to this particular problem.

As a general overview, it really seems like you are programming in a C
or FORTRAN style, but encoding it in Common Lisp.  One sign of that is
repeated assignment statements to the same variable.  Generally in lisp
one would make a lot more use of function composition (that is, nested
calls) and return values.

So, for example, you write this snippet with repeated assignments, a
reverse and assignment to the output variable:

    (let ((norm-rec '()))
      (setq norm-rec (cons (first rec) norm-rec))
      (setq norm-rec (cons empty norm-rec))
      (setq norm-rec (cons (second rec) norm-rec))
      (setq norm-rec (cons empty norm-rec))
      (setq norm-rec (cons empty norm-rec))
      (setq norm-rec (cons (third rec) norm-rec))
      (setq result (cons (reverse norm-rec) result)))

A much simpler, and more lisp-like method would be to just build up the
list structure of your answer in a single call to the LIST function:

  (list (first rec) empty (second rec) empty empty (third rec))

So now you have a single line of codde instead of eight, and the
structure of what you return is easily visible by inspection instead of
needing to actually work through the process of evaluating the code.

Secondly, you should abstract your testing functions for field types
into their own function calls.  You've started this a little bit by
introducing the CHECK-UPPERCASE function.  But you should also go one
step further and introduce nice named functions to detect author or
title fields.  In addition to making maintenance easier, this ends up
making the code self-documenting:

  (defun author-field-p (field-value)
     (check-uppercase 3 field-value))

Something similar should be done for the title3 field.

Of course, one now wonders why you are only checking 3 characters?  Is
this because of efficiency concerns, or is it really needed by your
domain.  We can now also turn our attention to the CHECK-UPPERCASE
function:

   (defun check-uppercase (n str)
      (dotimes (x n t)
        (if (not (upper-case-p (aref str x)))
           (return))))

First of all, it can be generalized to apply ANY test to the first N
characters, giving you a much more useful function.  You would do this
by making the test function one of the parameters.  I also make a few
other nice style changes, such as replacing IF with UNLESS, and making
the NIL explicit in the return statement, since we really care about
that value.

   (defun test-characters (string n test-function)
      (dotimes (index n t)
        (unless (funcall test (aref string index))
           (return nil))))

 (check-uppercase n str) ==> (test-characters string n #'upper-case-p)

But by using some of the nice sequence functions that are present in
Common Lisp, you might be able to simplify this even more, particularly
if you don't mind testing every character:

   (every #'upper-case-p string)

I would still use the AUTHOR-FIELD-P function because that makes it
easier to update if the criteria for being an author field ever changes.

One other oddity that I noticed in your code was a curious nesting of
COND statements in the default (T) branch:

  (cond ((test1 ...) ...)
        ((test2 ...) ...)
        (t (cond ((test3 ...) ...))
                 ((test4 ...) ...)))

This can be simplfied by eliminating the trailing cond form and just
promoting its tests into the main body of the COND statement.

I will also move the collection part of your main loop outside the
individual elements.  That also makes it clearer.  It also allows you to
spin off the single record normalization into its own function, again a
good way to make things both more modular and reusable as well as to
make the structure of the computation clearer.

So, here is a rewrite.  Notice also the use of more informative variable
names.


> ;;The input is a list of lists each representing an entry
> ;;in the catalog of a library.
> ;;Attempt to normalize the records so that every one is composed by
> ;;exactly 6 fields.
> ;;Where a field is missing, an "empty-field" sign is inserted
> ;;The final structure of a record is like this: <header> <author>
> ;;<title1> <title2> <title3> <publication data>.
> ;;In the input lists the fields <header> <title> <publication data>
> ;;are always present while the others may be missing.
> ;;The field <author> when present always starts with the second
> ;;name all in capital letters.
> ;;The field <title3> when present always start with "["  

;; Make this a global constant:

(defconstant +empty* "<empty>")

(defun author-field-p (field-value)
  "Tests if the field-value is an author field.
   Author fields are all upper-case"
  (every #'upper-case-p field-value))

(defun title3-field-p (field-value)
  "Tests if field-value is a title3 field, marked by a leading [ character."
  (and (> (length field-value) 0)
       (char= (aref field-value 0) #\[))))

(defun normalize-records (record-list)
  (let ((result nil))
     (dolist (record record-list (nreverse result))
        (push (normalize-one-record record) result))))

;; Here are some alternate looping constructs.  Note how abstracting the
;; normalization of a single record makes it easy to define these
;; alternate iteration constructs.
;; 
(defun normalize-records2 (record-list)
  (loop for record in list 
        collect (normalize-one-record record)))

(defun normalize-records3 (record-list)
   (mapcar #'normalize-one-record record-list))

(defun normalize-one-record (record)
  (case (length record)
    (3 
     (list (first record) +empty+ (second record)
	   +empty+        +empty+ (third record)))
    (4 (cond ((author-field-p (second record))
	      (list (first record) (second record) (third record)
		    +empty+        +empty+         (fourth record)))
	     ((title3-field-p (third record))
	      (list (first record) +empty+        (second record)
		    +empty+       (third record)  (fourth record)))
	     (t
	      (list (first record) +empty+        (second record)
		    (third record) +empty+        (fourth record)))))
    (5 (cond ((title3-field-p (fourth record))
	      (list (first record) (second record) (third record)
		    +empty+        (fourth record) (fifth record)))
	     (t
	      (list (first record)  (second record) (third record)
		    (fourth record) +empty+         (fifth record)))))
     
    (6 record)))



> (defun normalize-rec2 (lst)
>   (let ((result '())
>         (empty "<empty>"))
>     (dolist (rec lst (reverse result))
>       (cond
>        ((= (length rec) 3)
>         (let ((norm-rec '()))
>           (setq norm-rec (cons (first rec) norm-rec))
>           (setq norm-rec (cons empty norm-rec))
>           (setq norm-rec (cons (second rec) norm-rec))
>           (setq norm-rec (cons empty norm-rec))
>           (setq norm-rec (cons empty norm-rec))
>           (setq norm-rec (cons (third rec) norm-rec))
>           (setq result(cons (reverse norm-rec) result))))
>        ((= (length rec) 4)
>         (cond
>          ((check-uppercase 3 (second rec))
>           (let ((norm-rec '()))
>             (setq norm-rec (cons (first rec) norm-rec))
>             (setq norm-rec (cons (second rec) norm-rec))
>             (setq norm-rec (cons (third rec) norm-rec))
>             (setq norm-rec (cons empty norm-rec))
>             (setq norm-rec (cons empty norm-rec))
>             (setq norm-rec (cons (fourth rec) norm-rec))
>             (setq result(cons (reverse norm-rec) result))))
>          (t
>           (cond
>            ((equal (aref (third rec) 0) #\[) 
>             (let ((norm-rec '()))
>               (setq norm-rec (cons (first rec) norm-rec))
>               (setq norm-rec (cons empty norm-rec))
>               (setq norm-rec (cons (second rec) norm-rec))
>               (setq norm-rec (cons empty norm-rec))
>               (setq norm-rec (cons (third rec) norm-rec))
>               (setq norm-rec (cons (fourth rec) norm-rec))
>               (setq result (cons (reverse norm-rec) result))))
>            (t
>             (let ((norm-rec '()))
>               (setq norm-rec (cons (first rec) norm-rec))
>               (setq norm-rec (cons empty norm-rec))
>               (setq norm-rec (cons (second rec) norm-rec))
>               (setq norm-rec (cons (third rec) norm-rec))
>               (setq norm-rec (cons empty norm-rec))            
>               (setq norm-rec (cons (fourth rec) norm-rec))
>               (setq result (cons (reverse norm-rec) result))))))))
>        ((= (length rec) 5)
>         (let ((norm-rec '()))
>           (setq norm-rec (cons (first rec) norm-rec))
>           (setq norm-rec (cons (second rec) norm-rec))
>           (setq norm-rec (cons (third rec) norm-rec))
>           (cond
>            ((equal (aref (fourth rec) 0) #\[)
>             (setq norm-rec (cons empty norm-rec))
>             (setq norm-rec (cons (fourth rec) norm-rec)))
>            (t
>             (setq norm-rec (cons (fourth rec) norm-rec))
>             (setq norm-rec (cons empty norm-rec))))
>           (setq norm-rec (cons (fifth rec) norm-rec))
>           (setq result (cons (reverse norm-rec) result))))
>        (t (setq result (cons rec result)))))))   
> 
> 
> ;;check if n charatacters at the beginning of a string are all upper
> ;;case
> (defun check-uppercase (n str)
>   (dotimes (x n t)
>     (if (not (upper-case-p (aref str x)))
>       (return))))
> 

-- 
Thomas A. Russ,  USC/Information Sciences Institute
From: An[z]elmus
Subject: Re: How do I shorten or split this function
Date: 
Message-ID: <vtieo4hsbrta12t44nfts08kstcbncnvij@4ax.com>
On 02 Feb 2009 10:05:39 -0800, ···@sevak.isi.edu (Thomas A. Russ)
wrote:
>Well, in addition to the nice solutions posted by others, I will just
>point up some more lisp-like ways to structure the code that you have
>written.  This is mainly to help you see more of the lisp style, since I
>think that the nice declarative approach suggested by Tom Weissman is a
>much nicer solution to this particular problem.
> ...

Well, you wrote a nice and perfect resume' of many if not all the
suggestions I received in the previous posts and added more.
There is really plenty of material in this thread for me to elaborate
on.
From: An[z]elmus
Subject: Re: How do I shorten or split this function
Date: 
Message-ID: <tbago4dd36vb734jtaagigr76am31ggauq@4ax.com>
On 02 Feb 2009 10:05:39 -0800, ···@sevak.isi.edu (Thomas A. Russ)
wrote:
> ...
>Secondly, you should abstract your testing functions for field types
>into their own function calls.  You've started this a little bit by
>introducing the CHECK-UPPERCASE function.  But you should also go one
>step further and introduce nice named functions to detect author or
>title fields.  In addition to making maintenance easier, this ends up
>making the code self-documenting:
>
>  (defun author-field-p (field-value)
>     (check-uppercase 3 field-value))
>
>Something similar should be done for the title3 field.
>
>Of course, one now wonders why you are only checking 3 characters?  Is
>this because of efficiency concerns, or is it really needed by your
>domain.

No, I don't need to check exactly 3 characters. I just thought that
this should be enough. But it could even be too much ... as you
noticed in one of your previous post.

> ...
> ...
>But by using some of the nice sequence functions that are present in
>Common Lisp, you might be able to simplify this even more, particularly
>if you don't mind testing every character:
>
>   (every #'upper-case-p string)
>

Here we are dealing with the name of the author which normally should
be in the form of  "SECOND_NAME, first_name". But there could be more
than one second name, or some punctuation mark in between. So I thing
it is better just to check if the first letters are all capitals, for
example

(every #'upper-case-p (subseq string 0 2)

 
>I would still use the AUTHOR-FIELD-P function because that makes it
>easier to update if the criteria for being an author field ever changes.

Yes, absolutely.
From: An[z]elmus
Subject: Re: How do I shorten or split this function
Date: 
Message-ID: <5u9go41n7mmcbklqrcdgv5156f93u7lisb@4ax.com>
On 02 Feb 2009 10:05:39 -0800, ···@sevak.isi.edu (Thomas A. Russ)
wrote:
>So, for example, you write this snippet with repeated assignments, a
>reverse and assignment to the output variable:
>
>    (let ((norm-rec '()))
>      (setq norm-rec (cons (first rec) norm-rec))
>      (setq norm-rec (cons empty norm-rec))
>      (setq norm-rec (cons (second rec) norm-rec))
>      (setq norm-rec (cons empty norm-rec))
>      (setq norm-rec (cons empty norm-rec))
>      (setq norm-rec (cons (third rec) norm-rec))
>      (setq result (cons (reverse norm-rec) result)))
>
>A much simpler, and more lisp-like method would be to just build up the
>list structure of your answer in a single call to the LIST function:
>
>  (list (first rec) empty (second rec) empty empty (third rec))

Yes,  here LIST is the right choice.