caml-list - the Caml user's mailing list
 help / color / mirror / Atom feed
* [Caml-list] An interesting sorting algorithm for list
@ 2017-10-02  4:11 Christophe Raffalli
  2017-10-02 20:05 ` Van Chan Ngo
  0 siblings, 1 reply; 3+ messages in thread
From: Christophe Raffalli @ 2017-10-02  4:11 UTC (permalink / raw)
  To: caml-list


[-- Attachment #1.1: Type: text/plain, Size: 1598 bytes --]

Hello,

Here is an algorithm I found nice to sort lists.  It is in O(n ln(k))
where n is the size and k the number of changes of direction in the list.

for [ 1; 5; 10; 15; 20; 10; 2; 3; 5; 7], k = 3.

This implementation is a stable sort.

It is "almost" as fast as List.sort in the bad cases (Random lists)
(when k ~ n) and can be much faster if k is small as expected...

A challenge: find a similar algorithm, for lists, that is always
faster than List.sort ... I have tried a lot and I was always 5% slower
on the bas cases ... (Try to remain stable)

Enjoy,
Christophe

PS: the benchmark:

Correctness test passed
Stability test passed
Random lists:
          random: tf = 1.53, tg = 1.56, factor = 0.98x, gain = -2.33%
    random small: tf = 1.37, tg = 1.44, factor = 0.95x, gain = -4.88%
Worst cases:
          worst1: tf = 1.31, tg = 1.38, factor = 0.95x, gain = -5.18%
          worst2: tf = 1.32, tg = 1.36, factor = 0.98x, gain = -2.49%
Sorted (partially) lists:
          sorted: tf = 1.28, tg = 0.01, factor = 97.21x, gain = 98.97%
        reversed: tf = 1.31, tg = 0.17, factor = 7.76x, gain = 87.11%
      sorted@rev: tf = 1.33, tg = 0.37, factor = 3.60x, gain = 72.23%
      rev@sorted: tf = 1.30, tg = 0.38, factor = 3.44x, gain = 70.94%
Shuffled lists (permute k times 2 elements in a sorted list):
      shuffle 10: tf = 1.35, tg = 0.80, factor = 1.68x, gain = 40.64%
     shuffle 100: tf = 1.36, tg = 1.07, factor = 1.27x, gain = 21.56%
    shuffle 1000: tf = 1.38, tg = 1.20, factor = 1.15x, gain = 13.17%
   shuffle 10000: tf = 1.41, tg = 1.25, factor = 1.13x, gain = 11.45%

[-- Attachment #1.2: block_sort.ml --]
[-- Type: text/plain, Size: 8464 bytes --]

(** Implementation of a sorting algorithm for lists
    in O(n ln(k)), where
    - n is the list size
    - k is the number of changes of direction *)

(** The first phase of the algorithm is
    to split the list in block which are either
    in order of in reverse order.

    The block have a size and a pointer to the original list.
 *)

type 'a blocks =
  | Ord of { size : int
           ; list : 'a list (* beginning of the block *)
           ; next : 'a blocks
           }
  | Rev of { size : int
           ; list : 'a list (* beginning of the block *)
           ; next : 'a blocks
           }
  | Fin

(** [split cmp x l l0] the list [x::l as l0] with [cmp] as comparison.
    Beware, the block are returned with first the first block being
    the last in the list.
 *)
let split cmp x l l0 =
  (** Start of a new block *)
  let rec split_ini len next x l l0 =
    match l with
    | [] -> (len, Ord { size=1; list=l0; next })
    | y :: l' ->
       if cmp x y <= 0 then
         split_sam len next 2 y l' l0
       else
         split_rev len next 2 y l' l0

  (** We are building a reverse block *)
  and split_rev len next size x l l0 =
    match l with
    | [] -> (len, Rev { size; list=l0; next })
    | y :: l' ->
       (** Note: < 0 here is a bit faster but not stable *)
       if cmp x y <= 0 then
         let next = Rev { size; list=l0; next } in
         split_ini (len+1) next y l' l
       else
         split_rev len next (size+1) y l' l0

  and split_sam len next size x l l0 =
    match l with
    | [] -> (len, Ord { size; list=l0; next })
    | y :: l' ->
       if cmp x y <= 0 then
         split_sam len next (size+1) y l' l0
       else
         let next = Ord { size; list=l0; next } in
         split_ini (len+1) next y l' l

  in
  split_ini 1 Fin x l l0

(* The two next functions are used to convert block cells in list *)

(** [rev_heads n l] gives the reversal of the n firsts elements of l.*)
let rev_heads n l =
  let rec gn acc n l =
    match n,l with
    | 0, _ -> acc
    | _, x::l -> gn (x::acc) (n-1) l
    | _ -> assert false
  in
  gn [] n l

(** [heads n l] gives the n firsts elements of l *)
let rec heads n l =
  let rec fn n l =
    match n,l with
    | 0, _ -> []
    | _, x::l -> x::fn (n-1) l
    | _ -> assert false
  in
  if n >= 100_000 then (** use a tail rec version if n is large *)
    List.rev (rev_heads n l)
  else
    fn n l

(** Now a merge sort with a spacial treatment of the leafs *)

(** reversal of the merge of l1 l2 which are sorted,
    the result is reverse sorted*)
let rec rev_merge cmp acc l1 l2 =
  match l1, l2 with
  | ([], l) | (l, []) -> List.rev_append l acc
  | (x::l1'), (y::l2') ->
     if cmp x y <= 0 then rev_merge cmp (x::acc) l1' l2
     else rev_merge cmp (y::acc) l1 l2'

(** merge of l1 l2 which are reverse sorted, the result is sorted *)
let rec merge_rev cmp acc l1 l2 =
  match l1, l2 with
  | ([], l) | (l, []) -> List.rev_append l acc
  | (x::l1'), (y::l2') ->
     if cmp x y > 0 then merge_rev cmp (x::acc) l1' l2
     else merge_rev cmp (y::acc) l1 l2'

(** the two mutually recursive sort functions, one returning a sorted
    list the second a reverse sorted list. The first argument is a
    reference to the blocks *)
let rec sort cur cmp n =
  match n with
  | 1 -> (* 1 block, we transform it into a list *)
     begin
       match !cur with
       | Ord{size;list;next} ->
          cur := next;
          (* This case is a bit of a waste ... *)
          heads size list;
       | Rev{size;list;next} ->
          cur := next;
          rev_heads size list
       | Fin ->
          assert false
     end
  | _ -> (* standard merge *)
     let n1 = n lsr 1 in
     let n2 = n - n1 in
     (** end of the list first as cur point to the end first *)
     let l2 = rev_sort cur cmp n2 in
     let l1 = rev_sort cur cmp n1 in
     merge_rev cmp [] l1 l2

and rev_sort cur cmp n =
  match n with
  | 1 -> (* 1 block, we transform it into a list *)
     begin
       match !cur with
       | Ord{size;list;next} ->
          cur := next;
          rev_heads size list;
       | Rev{size;list;next} ->
          cur := next;
          (* This case is a bit of a waste ... *)
          heads size list
       | Fin ->
          assert false
     end
  | _ ->
     let n1 = n lsr 1 in
     let n2 = n - n1 in
     (** end of the list first as cur point to the end first *)
     let l2 = sort cur cmp n2 in
     let l1 = sort cur cmp n1 in
     rev_merge cmp [] l1 l2

(** Final sorting algorithm *)
let sort : type a. (a -> a -> int) -> a list -> a list = fun cmp l0 ->
  match l0 with [] | [_] -> l0 | x::l ->
  let (len, blocks) = split cmp x l l0 in
  match blocks with
  | Ord{list;next=Fin} -> list
  | Rev{list;next=Fin} -> List.rev list
  | _ -> sort (ref blocks) cmp len

(* TESTS *)

(* Printing *)
let print_list f ch =
  Printf.fprintf ch "[%a]" (fun ch l ->
                   List.iteri (fun i x ->
                       Printf.fprintf ch "%s%a" (if i > 0 then ";" else "") f x) l)

let print_il = print_list (fun ch -> Printf.fprintf ch "%d")
let print_ill = print_list (fun ch -> Printf.fprintf ch "%a" print_il)

(* Timing *)
let chrono f x =
  Gc.compact ();
  let t1 = Sys.time () in
  let r = f x in
  let t2 = Sys.time () in
  (t2 -. t1, r)

let chronos msg f g x =
  let tf, lf = chrono f x in
  let tg, lg = chrono g x in
  assert(lf = lg);
  let g = 100. *. (tf -. tg) /. tf in
  let f = tf /. tg in
  Printf.printf "%16s: tf = %.2f, tg = %.2f, factor = %.2fx, gain = %.2f%%\n%!" msg tf tg f g

(* TEST CORRECTNESS *)

let alea n p =
  let rec fn acc n =
    if n <= 0 then acc else fn (Random.int p :: acc) (n - 1)
  in fn [] n

let _ =
  for i = 0 to 1000 do
    let l = alea i 10_000 in
    assert (sort compare l = List.sort compare l)
  done

let _ = Printf.printf "Correctness test passed\n%!"

(* TEST STABILITY *)

let alea2 n p =
  let rec fn acc n =
    if n <= 0 then acc else fn ((Random.int p, Random.int p) :: acc) (n - 1)
  in fn [] n

let _ =
  for i = 0 to 1000 do
    let l = alea2 i 100 in
    let cmp (x,_) (y,_) = compare x y in
    assert (sort cmp l = List.sort cmp l)
  done

let _ = Printf.printf "Stability test passed\n%!"

(** Random lists *)

let _ = Printf.printf "Random lists:\n%!"

let l0 = alea 2_000_000 100_000_000
let _ = chronos "random" (List.stable_sort compare) (sort compare) l0

let l0 = alea 2_000_000 5
let _ = chronos "random small" (List.stable_sort compare) (sort compare) l0

(** TWO WORST CASES *)
let _ = Printf.printf "Worst cases:\n%!"

let worst1 n =
  let rec fn acc n =
    if n <= 0 then acc else
    fn (n-3::n::acc) (n-2)
  in fn [] n

let worst2 n =
  let rec fn acc n =
    if n <= 0 then acc else
    fn (n-4::n+1::n::acc) (n-3)
  in fn [] n

let l0 = worst1 2_000_000
let _ = chronos "worst1" (List.stable_sort compare) (sort compare) l0

let l0 = worst2 2_000_000
let _ = chronos "worst2" (List.stable_sort compare) (sort compare) l0

(** SORTED LISTS *)
let _ = Printf.printf "Sorted (partially) lists:\n%!"

let sorted n a b =
  let rec fn acc n =
    if n <= 0 then acc else fn (a*n+b :: acc) (n - 1)
  in fn [] n

(** Almost sorted *)
let l0 = sorted 2_000_000 1 0
let _ = chronos "sorted" (List.stable_sort compare) (sort compare) l0

let l0 = sorted 2_000_000 (-1) 0
let _ = chronos "reversed" (List.stable_sort compare) (sort compare) l0

let l0 = sorted 1_000_000 1 0 @ sorted 1_000_000 (-1) 0
let _ = chronos "sorted@rev" (List.stable_sort compare) (sort compare) l0

let l0 = sorted 1_000_000 (-1) 0 @ sorted 1_000_000 1 0
let _ = chronos "rev@sorted" (List.stable_sort compare) (sort compare) l0

(** Shuffled lists (permute k times 2 elements in a sorted list) *)
let _ = Printf.printf
          "Shuffled lists (permute k times 2 elements in a sorted list):\n%!"

let shuffle n k =
  let array = Array.init n (fun i -> i) in
  for i = 1 to k; do
    let a = Random.int n and b = Random.int n in
    let tmp = array.(a) in
    array.(a) <- array.(b);
    array.(b) <- tmp
  done;
  Array.to_list array

let l0 = shuffle 2_000_000 10
let _ = chronos "shuffle 10" (List.stable_sort compare) (sort compare) l0

let l0 = shuffle 2_000_000 100
let _ = chronos "shuffle 100" (List.stable_sort compare) (sort compare) l0

let l0 = shuffle 2_000_000 1_000
let _ = chronos "shuffle 1000" (List.stable_sort compare) (sort compare) l0

let l0 = shuffle 2_000_000 10_000
let _ = chronos "shuffle 10000" (List.stable_sort compare) (sort compare) l0

let l0 = []

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [Caml-list] An interesting sorting algorithm for list
  2017-10-02  4:11 [Caml-list] An interesting sorting algorithm for list Christophe Raffalli
@ 2017-10-02 20:05 ` Van Chan Ngo
  2017-10-03  1:21   ` Christophe Raffalli
  0 siblings, 1 reply; 3+ messages in thread
From: Van Chan Ngo @ 2017-10-02 20:05 UTC (permalink / raw)
  To: Christophe Raffalli; +Cc: caml-list

[-- Attachment #1: Type: text/plain, Size: 1921 bytes --]

Hi,

It sounds interesting. However, I suppose they have the same worst-case
complexity O(nlogn).
Could we formally have the average complexity?

Best,
-Van Chan


On Mon, Oct 2, 2017 at 12:11 AM, Christophe Raffalli <christophe@raffalli.eu
> wrote:

> Hello,
>
> Here is an algorithm I found nice to sort lists.  It is in O(n ln(k))
> where n is the size and k the number of changes of direction in the list.
>
> for [ 1; 5; 10; 15; 20; 10; 2; 3; 5; 7], k = 3.
>
> This implementation is a stable sort.
>
> It is "almost" as fast as List.sort in the bad cases (Random lists)
> (when k ~ n) and can be much faster if k is small as expected...
>
> A challenge: find a similar algorithm, for lists, that is always
> faster than List.sort ... I have tried a lot and I was always 5% slower
> on the bas cases ... (Try to remain stable)
>
> Enjoy,
> Christophe
>
> PS: the benchmark:
>
> Correctness test passed
> Stability test passed
> Random lists:
>           random: tf = 1.53, tg = 1.56, factor = 0.98x, gain = -2.33%
>     random small: tf = 1.37, tg = 1.44, factor = 0.95x, gain = -4.88%
> Worst cases:
>           worst1: tf = 1.31, tg = 1.38, factor = 0.95x, gain = -5.18%
>           worst2: tf = 1.32, tg = 1.36, factor = 0.98x, gain = -2.49%
> Sorted (partially) lists:
>           sorted: tf = 1.28, tg = 0.01, factor = 97.21x, gain = 98.97%
>         reversed: tf = 1.31, tg = 0.17, factor = 7.76x, gain = 87.11%
>       sorted@rev: tf = 1.33, tg = 0.37, factor = 3.60x, gain = 72.23%
>       rev@sorted: tf = 1.30, tg = 0.38, factor = 3.44x, gain = 70.94%
> Shuffled lists (permute k times 2 elements in a sorted list):
>       shuffle 10: tf = 1.35, tg = 0.80, factor = 1.68x, gain = 40.64%
>      shuffle 100: tf = 1.36, tg = 1.07, factor = 1.27x, gain = 21.56%
>     shuffle 1000: tf = 1.38, tg = 1.20, factor = 1.15x, gain = 13.17%
>    shuffle 10000: tf = 1.41, tg = 1.25, factor = 1.13x, gain = 11.45%
>

[-- Attachment #2: Type: text/html, Size: 2475 bytes --]

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [Caml-list] An interesting sorting algorithm for list
  2017-10-02 20:05 ` Van Chan Ngo
@ 2017-10-03  1:21   ` Christophe Raffalli
  0 siblings, 0 replies; 3+ messages in thread
From: Christophe Raffalli @ 2017-10-03  1:21 UTC (permalink / raw)
  To: Van Chan Ngo; +Cc: caml-list

[-- Attachment #1: Type: text/plain, Size: 2699 bytes --]

Hi

On 17-10-02 16:05:25, Van Chan Ngo wrote:
> Hi,
>
> It sounds interesting. However, I suppose they have the same worst-case
> complexity O(nlogn).

Yes sure word case is k=O(n).

> Could we formally have the average complexity?

Yes, O(n log n). It is not too hard to see that lists have k = O(n) in
average. No miracle to hope here. However there are a lot of practical
cases where k is lower in average.

The only fun challenge is to have a O(n ln(k) algorithm that is always faster
than the current merge sort of OCaml ... This is hard because the
merge sort is very well suited for a language like OCaml, that can allocates
memory really fast.

Cheers,
Christophe

> Best,
> -Van Chan
>
>
> On Mon, Oct 2, 2017 at 12:11 AM, Christophe Raffalli <christophe@raffalli.eu>
> wrote:
>
>     Hello,
>
>     Here is an algorithm I found nice to sort lists.  It is in O(n ln(k))
>     where n is the size and k the number of changes of direction in the list.
>
>     for [ 1; 5; 10; 15; 20; 10; 2; 3; 5; 7], k = 3.
>
>     This implementation is a stable sort.
>
>     It is "almost" as fast as List.sort in the bad cases (Random lists)
>     (when k ~ n) and can be much faster if k is small as expected...
>
>     A challenge: find a similar algorithm, for lists, that is always
>     faster than List.sort ... I have tried a lot and I was always 5% slower
>     on the bas cases ... (Try to remain stable)
>
>     Enjoy,
>     Christophe
>
>     PS: the benchmark:
>
>     Correctness test passed
>     Stability test passed
>     Random lists:
>               random: tf = 1.53, tg = 1.56, factor = 0.98x, gain = -2.33%
>         random small: tf = 1.37, tg = 1.44, factor = 0.95x, gain = -4.88%
>     Worst cases:
>               worst1: tf = 1.31, tg = 1.38, factor = 0.95x, gain = -5.18%
>               worst2: tf = 1.32, tg = 1.36, factor = 0.98x, gain = -2.49%
>     Sorted (partially) lists:
>               sorted: tf = 1.28, tg = 0.01, factor = 97.21x, gain = 98.97%
>             reversed: tf = 1.31, tg = 0.17, factor = 7.76x, gain = 87.11%
>           sorted@rev: tf = 1.33, tg = 0.37, factor = 3.60x, gain = 72.23%
>           rev@sorted: tf = 1.30, tg = 0.38, factor = 3.44x, gain = 70.94%
>     Shuffled lists (permute k times 2 elements in a sorted list):
>           shuffle 10: tf = 1.35, tg = 0.80, factor = 1.68x, gain = 40.64%
>          shuffle 100: tf = 1.36, tg = 1.07, factor = 1.27x, gain = 21.56%
>         shuffle 1000: tf = 1.38, tg = 1.20, factor = 1.15x, gain = 13.17%
>        shuffle 10000: tf = 1.41, tg = 1.25, factor = 1.13x, gain = 11.45%
>
>

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2017-10-03  1:21 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-10-02  4:11 [Caml-list] An interesting sorting algorithm for list Christophe Raffalli
2017-10-02 20:05 ` Van Chan Ngo
2017-10-03  1:21   ` Christophe Raffalli

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).