How can I download, uncompress and process a gzip file in OCaml? -


i ocaml application directly download, uncompress (gzip) , process resulting text file line line without using temporary file , external programms.

the libraries looked @ cohttp, ocurl , camlzip. unfortunately found no nice way make them work together.

what way of ocaml achive this?

you can make ocurl , camlzip work using pipes , threads. proof of concept:

#use "topfind";; #thread;; #require "unix";; #require "curl";; #require "zip";;  let () = curl.(global_init curlinit_globalall)  let download url oc =   let open curl in   let h = init () in   setopt h (curlopt_url url);   setopt h (curlopt_writefunction (fun x -> output_string oc x; string.length x));   perform h;   cleanup h  let read_line really_input =   let buf = buffer.create 256 in   try     while true       let x = " " in       let () = really_input x 0 1 in       if x = "\n" raise exit else buffer.add_string buf x;     done;     assert false     | exit -> buffer.contents buf   | end_of_file -> if buffer.length buf = 0 raise end_of_file else buffer.contents buf  let curl_gzip_iter f url =   let ic, oc = unix.pipe () in   let ic = unix.in_channel_of_descr ic , oc = unix.out_channel_of_descr oc in   let t = thread.create (fun () -> download url oc; close_out oc) () in   let zic = gzip.open_in_chan ic in   let zii = gzip.really_input zic in   let () =     try       while true         let () = f (read_line zii) in ()       done;       assert false         | end_of_file -> ()   in   gzip.close_in zic;   thread.join t  let () = curl_gzip_iter print_endline "file:///tmp/toto.gz" 

it becomes painful when 1 has handle errors, though.


Comments