caml-list - the Caml user's mailing list
 help / color / mirror / Atom feed
From: jean-vincent.loddo@lipn.univ-paris13.fr
To: <caml-list@inria.fr>
Subject: [Caml-list] Memory leaks generated by Scanf.fscanf?
Date: Fri, 20 Jun 2014 14:29:00 +0200	[thread overview]
Message-ID: <e9dba6f33347ae197b0fc1f1974001b0@lipn.univ-paris13.fr> (raw)

Hi,

working on Marionnet (https://launchpad.net/marionnet), I noticed a 
serious memory leak making the system unusable after a few tens of 
minutes. After investigation, the problem seems to be related to 
Scanf.fscanf. This hypothesis is confirmed by the fact that by replacing 
it with the composition of Pervasives.input_line and Scanf.sscanf, the 
problem disappears. I tried to write a simple code (I put it at the end 
of this message) that illustrates the problem with a thread scanning the 
content of a file repetitively (with fscanf or sscanf). The result is 
the same with OCaml 3.12.1 or 4.01.0: with sscanf (~with_sscanf:true) 
the function `start_thread' shows that data are successfully collected:

Iteration #01: stat called 256 times: live blocks: 109590
Iteration #02: stat called 256 times: live blocks: 103063
Iteration #03: stat called 256 times: live blocks: 104091
Iteration #04: stat called 256 times: live blocks: 105119
Iteration #05: stat called 256 times: live blocks: 106147
Iteration #06: stat called 256 times: live blocks: 107175
Iteration #07: stat called 256 times: live blocks: 108203
Iteration #08: stat called 256 times: live blocks: 99637
Iteration #09: stat called 256 times: live blocks: 100665
Iteration #10: stat called 256 times: live blocks: 101693
Iteration #11: stat called 256 times: live blocks: 102721
Iteration #12: stat called 256 times: live blocks: 103749
Iteration #13: stat called 256 times: live blocks: 99637
Iteration #14: stat called 256 times: live blocks: 100665
Iteration #15: stat called 256 times: live blocks: 101693

With fscanf however the used memory continues to grow (things are 
apparently not collected, even if they should):

Iteration #01: stat called 256 times: live blocks: 114469
Iteration #02: stat called 256 times: live blocks: 107613
Iteration #03: stat called 256 times: live blocks: 111456
Iteration #04: stat called 256 times: live blocks: 115299
Iteration #05: stat called 256 times: live blocks: 118890
Iteration #06: stat called 256 times: live blocks: 116601
Iteration #07: stat called 256 times: live blocks: 120235
Iteration #08: stat called 256 times: live blocks: 123925
Iteration #09: stat called 256 times: live blocks: 127615
Iteration #10: stat called 256 times: live blocks: 131179
Iteration #11: stat called 256 times: live blocks: 135023
Iteration #12: stat called 256 times: live blocks: 135583
Iteration #13: stat called 256 times: live blocks: 140450
Iteration #14: stat called 256 times: live blocks: 144197
Iteration #15: stat called 256 times: live blocks: 147790

Sorry if the problem is well known or if something is wrong in my 
analysis, but I can not find anything about it on this list neither on 
the net.
Best regards,
Jean-Vincent Loddo

---
let stat_with_fscanf pid =
   let filename = Printf.sprintf "/proc/%d/stat" pid in
   try
     let ch = open_in filename in
     let result =
       try
         let obj =
           Scanf.fscanf ch "%d %s %c %d %d %s@\n"
             (fun pid comm state ppid pgrp _ -> (pid, comm, state, ppid, 
pgrp))
         in
         Some obj
       with Scanf.Scan_failure(msg) ->
         (Printf.kfprintf flush stderr "failed scanning file %s: %s\n" 
filename msg; None)
     in
     let () = close_in ch in
     result
   with _ -> None

let stat_with_sscanf pid =
   let input_line_from_file filename =
     try
       let ch = open_in filename in
       let result = try Some (input_line ch) with _ -> None in
       let () = close_in ch in
       result
     with _ -> None
   in
   let filename = Printf.sprintf "/proc/%d/stat" pid in
   match (input_line_from_file filename) with
   | None -> None
   | Some line ->
       try
         let obj =
           Scanf.sscanf line "%d %s %c %d %d %s@\n"
             (fun pid comm state ppid pgrp _ -> (pid, comm, state, ppid, 
pgrp))
         in
         Some obj
       with Scanf.Scan_failure(msg) ->
         (Printf.kfprintf flush stderr "failed scanning file %s: %s\n" 
filename msg; None)

(* Just for testing: stat 256 times the `init' process (pid 1) *)
let get_some_stats ?(with_sscanf=false) () =
   let stat = if with_sscanf then stat_with_sscanf else stat_with_fscanf 
in
   let init_pid = 1 in
   let zs = Array.create 256 init_pid in
   Array.map (stat) zs

let start_thread ?with_sscanf () =
   let rec loop i =
     let xs = get_some_stats ?with_sscanf () in
     let () =
       Printf.kfprintf flush stderr
         "Iteration #%02d: stat called %d times: live blocks: %d\n"
         i (Array.length xs) (Gc.stat ()).Gc.live_blocks
     in
     let () = Thread.delay 2. in
     loop (i+1)
   in
   let _ = Thread.create (loop) 1 in
   ()

(* Usage:
    start_thread ~with_sscanf:true ();;
    start_thread ();;
*)


             reply	other threads:[~2014-06-20 12:29 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-06-20 12:29 jean-vincent.loddo [this message]
2014-06-20 13:01 ` Jeremy Yallop
2014-06-20 15:35   ` Gabriel Scherer
2014-06-22 17:11     ` Benoît Vaugon
2014-06-23  9:06       ` François Bobot
2014-06-27 14:32   ` Jeremy Yallop

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=e9dba6f33347ae197b0fc1f1974001b0@lipn.univ-paris13.fr \
    --to=jean-vincent.loddo@lipn.univ-paris13.fr \
    --cc=caml-list@inria.fr \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).