caml-list - the Caml user's mailing list
 help / color / mirror / Atom feed
* [Caml-list] An interesting sorting algorithm for list
@ 2017-10-02  4:11 Christophe Raffalli
  2017-10-02 20:05 ` Van Chan Ngo
  0 siblings, 1 reply; 3+ messages in thread
From: Christophe Raffalli @ 2017-10-02  4:11 UTC (permalink / raw)
  To: caml-list


[-- Attachment #1.1: Type: text/plain, Size: 1598 bytes --]

Hello,

Here is an algorithm I found nice to sort lists.  It is in O(n ln(k))
where n is the size and k the number of changes of direction in the list.

for [ 1; 5; 10; 15; 20; 10; 2; 3; 5; 7], k = 3.

This implementation is a stable sort.

It is "almost" as fast as List.sort in the bad cases (Random lists)
(when k ~ n) and can be much faster if k is small as expected...

A challenge: find a similar algorithm, for lists, that is always
faster than List.sort ... I have tried a lot and I was always 5% slower
on the bas cases ... (Try to remain stable)

Enjoy,
Christophe

PS: the benchmark:

Correctness test passed
Stability test passed
Random lists:
          random: tf = 1.53, tg = 1.56, factor = 0.98x, gain = -2.33%
    random small: tf = 1.37, tg = 1.44, factor = 0.95x, gain = -4.88%
Worst cases:
          worst1: tf = 1.31, tg = 1.38, factor = 0.95x, gain = -5.18%
          worst2: tf = 1.32, tg = 1.36, factor = 0.98x, gain = -2.49%
Sorted (partially) lists:
          sorted: tf = 1.28, tg = 0.01, factor = 97.21x, gain = 98.97%
        reversed: tf = 1.31, tg = 0.17, factor = 7.76x, gain = 87.11%
      sorted@rev: tf = 1.33, tg = 0.37, factor = 3.60x, gain = 72.23%
      rev@sorted: tf = 1.30, tg = 0.38, factor = 3.44x, gain = 70.94%
Shuffled lists (permute k times 2 elements in a sorted list):
      shuffle 10: tf = 1.35, tg = 0.80, factor = 1.68x, gain = 40.64%
     shuffle 100: tf = 1.36, tg = 1.07, factor = 1.27x, gain = 21.56%
    shuffle 1000: tf = 1.38, tg = 1.20, factor = 1.15x, gain = 13.17%
   shuffle 10000: tf = 1.41, tg = 1.25, factor = 1.13x, gain = 11.45%

[-- Attachment #1.2: block_sort.ml --]
[-- Type: text/plain, Size: 8464 bytes --]

(** Implementation of a sorting algorithm for lists
    in O(n ln(k)), where
    - n is the list size
    - k is the number of changes of direction *)

(** The first phase of the algorithm is
    to split the list in block which are either
    in order of in reverse order.

    The block have a size and a pointer to the original list.
 *)

type 'a blocks =
  | Ord of { size : int
           ; list : 'a list (* beginning of the block *)
           ; next : 'a blocks
           }
  | Rev of { size : int
           ; list : 'a list (* beginning of the block *)
           ; next : 'a blocks
           }
  | Fin

(** [split cmp x l l0] the list [x::l as l0] with [cmp] as comparison.
    Beware, the block are returned with first the first block being
    the last in the list.
 *)
let split cmp x l l0 =
  (** Start of a new block *)
  let rec split_ini len next x l l0 =
    match l with
    | [] -> (len, Ord { size=1; list=l0; next })
    | y :: l' ->
       if cmp x y <= 0 then
         split_sam len next 2 y l' l0
       else
         split_rev len next 2 y l' l0

  (** We are building a reverse block *)
  and split_rev len next size x l l0 =
    match l with
    | [] -> (len, Rev { size; list=l0; next })
    | y :: l' ->
       (** Note: < 0 here is a bit faster but not stable *)
       if cmp x y <= 0 then
         let next = Rev { size; list=l0; next } in
         split_ini (len+1) next y l' l
       else
         split_rev len next (size+1) y l' l0

  and split_sam len next size x l l0 =
    match l with
    | [] -> (len, Ord { size; list=l0; next })
    | y :: l' ->
       if cmp x y <= 0 then
         split_sam len next (size+1) y l' l0
       else
         let next = Ord { size; list=l0; next } in
         split_ini (len+1) next y l' l

  in
  split_ini 1 Fin x l l0

(* The two next functions are used to convert block cells in list *)

(** [rev_heads n l] gives the reversal of the n firsts elements of l.*)
let rev_heads n l =
  let rec gn acc n l =
    match n,l with
    | 0, _ -> acc
    | _, x::l -> gn (x::acc) (n-1) l
    | _ -> assert false
  in
  gn [] n l

(** [heads n l] gives the n firsts elements of l *)
let rec heads n l =
  let rec fn n l =
    match n,l with
    | 0, _ -> []
    | _, x::l -> x::fn (n-1) l
    | _ -> assert false
  in
  if n >= 100_000 then (** use a tail rec version if n is large *)
    List.rev (rev_heads n l)
  else
    fn n l

(** Now a merge sort with a spacial treatment of the leafs *)

(** reversal of the merge of l1 l2 which are sorted,
    the result is reverse sorted*)
let rec rev_merge cmp acc l1 l2 =
  match l1, l2 with
  | ([], l) | (l, []) -> List.rev_append l acc
  | (x::l1'), (y::l2') ->
     if cmp x y <= 0 then rev_merge cmp (x::acc) l1' l2
     else rev_merge cmp (y::acc) l1 l2'

(** merge of l1 l2 which are reverse sorted, the result is sorted *)
let rec merge_rev cmp acc l1 l2 =
  match l1, l2 with
  | ([], l) | (l, []) -> List.rev_append l acc
  | (x::l1'), (y::l2') ->
     if cmp x y > 0 then merge_rev cmp (x::acc) l1' l2
     else merge_rev cmp (y::acc) l1 l2'

(** the two mutually recursive sort functions, one returning a sorted
    list the second a reverse sorted list. The first argument is a
    reference to the blocks *)
let rec sort cur cmp n =
  match n with
  | 1 -> (* 1 block, we transform it into a list *)
     begin
       match !cur with
       | Ord{size;list;next} ->
          cur := next;
          (* This case is a bit of a waste ... *)
          heads size list;
       | Rev{size;list;next} ->
          cur := next;
          rev_heads size list
       | Fin ->
          assert false
     end
  | _ -> (* standard merge *)
     let n1 = n lsr 1 in
     let n2 = n - n1 in
     (** end of the list first as cur point to the end first *)
     let l2 = rev_sort cur cmp n2 in
     let l1 = rev_sort cur cmp n1 in
     merge_rev cmp [] l1 l2

and rev_sort cur cmp n =
  match n with
  | 1 -> (* 1 block, we transform it into a list *)
     begin
       match !cur with
       | Ord{size;list;next} ->
          cur := next;
          rev_heads size list;
       | Rev{size;list;next} ->
          cur := next;
          (* This case is a bit of a waste ... *)
          heads size list
       | Fin ->
          assert false
     end
  | _ ->
     let n1 = n lsr 1 in
     let n2 = n - n1 in
     (** end of the list first as cur point to the end first *)
     let l2 = sort cur cmp n2 in
     let l1 = sort cur cmp n1 in
     rev_merge cmp [] l1 l2

(** Final sorting algorithm *)
let sort : type a. (a -> a -> int) -> a list -> a list = fun cmp l0 ->
  match l0 with [] | [_] -> l0 | x::l ->
  let (len, blocks) = split cmp x l l0 in
  match blocks with
  | Ord{list;next=Fin} -> list
  | Rev{list;next=Fin} -> List.rev list
  | _ -> sort (ref blocks) cmp len

(* TESTS *)

(* Printing *)
let print_list f ch =
  Printf.fprintf ch "[%a]" (fun ch l ->
                   List.iteri (fun i x ->
                       Printf.fprintf ch "%s%a" (if i > 0 then ";" else "") f x) l)

let print_il = print_list (fun ch -> Printf.fprintf ch "%d")
let print_ill = print_list (fun ch -> Printf.fprintf ch "%a" print_il)

(* Timing *)
let chrono f x =
  Gc.compact ();
  let t1 = Sys.time () in
  let r = f x in
  let t2 = Sys.time () in
  (t2 -. t1, r)

let chronos msg f g x =
  let tf, lf = chrono f x in
  let tg, lg = chrono g x in
  assert(lf = lg);
  let g = 100. *. (tf -. tg) /. tf in
  let f = tf /. tg in
  Printf.printf "%16s: tf = %.2f, tg = %.2f, factor = %.2fx, gain = %.2f%%\n%!" msg tf tg f g

(* TEST CORRECTNESS *)

let alea n p =
  let rec fn acc n =
    if n <= 0 then acc else fn (Random.int p :: acc) (n - 1)
  in fn [] n

let _ =
  for i = 0 to 1000 do
    let l = alea i 10_000 in
    assert (sort compare l = List.sort compare l)
  done

let _ = Printf.printf "Correctness test passed\n%!"

(* TEST STABILITY *)

let alea2 n p =
  let rec fn acc n =
    if n <= 0 then acc else fn ((Random.int p, Random.int p) :: acc) (n - 1)
  in fn [] n

let _ =
  for i = 0 to 1000 do
    let l = alea2 i 100 in
    let cmp (x,_) (y,_) = compare x y in
    assert (sort cmp l = List.sort cmp l)
  done

let _ = Printf.printf "Stability test passed\n%!"

(** Random lists *)

let _ = Printf.printf "Random lists:\n%!"

let l0 = alea 2_000_000 100_000_000
let _ = chronos "random" (List.stable_sort compare) (sort compare) l0

let l0 = alea 2_000_000 5
let _ = chronos "random small" (List.stable_sort compare) (sort compare) l0

(** TWO WORST CASES *)
let _ = Printf.printf "Worst cases:\n%!"

let worst1 n =
  let rec fn acc n =
    if n <= 0 then acc else
    fn (n-3::n::acc) (n-2)
  in fn [] n

let worst2 n =
  let rec fn acc n =
    if n <= 0 then acc else
    fn (n-4::n+1::n::acc) (n-3)
  in fn [] n

let l0 = worst1 2_000_000
let _ = chronos "worst1" (List.stable_sort compare) (sort compare) l0

let l0 = worst2 2_000_000
let _ = chronos "worst2" (List.stable_sort compare) (sort compare) l0

(** SORTED LISTS *)
let _ = Printf.printf "Sorted (partially) lists:\n%!"

let sorted n a b =
  let rec fn acc n =
    if n <= 0 then acc else fn (a*n+b :: acc) (n - 1)
  in fn [] n

(** Almost sorted *)
let l0 = sorted 2_000_000 1 0
let _ = chronos "sorted" (List.stable_sort compare) (sort compare) l0

let l0 = sorted 2_000_000 (-1) 0
let _ = chronos "reversed" (List.stable_sort compare) (sort compare) l0

let l0 = sorted 1_000_000 1 0 @ sorted 1_000_000 (-1) 0
let _ = chronos "sorted@rev" (List.stable_sort compare) (sort compare) l0

let l0 = sorted 1_000_000 (-1) 0 @ sorted 1_000_000 1 0
let _ = chronos "rev@sorted" (List.stable_sort compare) (sort compare) l0

(** Shuffled lists (permute k times 2 elements in a sorted list) *)
let _ = Printf.printf
          "Shuffled lists (permute k times 2 elements in a sorted list):\n%!"

let shuffle n k =
  let array = Array.init n (fun i -> i) in
  for i = 1 to k; do
    let a = Random.int n and b = Random.int n in
    let tmp = array.(a) in
    array.(a) <- array.(b);
    array.(b) <- tmp
  done;
  Array.to_list array

let l0 = shuffle 2_000_000 10
let _ = chronos "shuffle 10" (List.stable_sort compare) (sort compare) l0

let l0 = shuffle 2_000_000 100
let _ = chronos "shuffle 100" (List.stable_sort compare) (sort compare) l0

let l0 = shuffle 2_000_000 1_000
let _ = chronos "shuffle 1000" (List.stable_sort compare) (sort compare) l0

let l0 = shuffle 2_000_000 10_000
let _ = chronos "shuffle 10000" (List.stable_sort compare) (sort compare) l0

let l0 = []

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2017-10-03  1:21 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-10-02  4:11 [Caml-list] An interesting sorting algorithm for list Christophe Raffalli
2017-10-02 20:05 ` Van Chan Ngo
2017-10-03  1:21   ` Christophe Raffalli

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).