(* Matrix Multiplication and Markov Chains: Checking safety with respect to the restriction in circle-shift The present code is based upon the code written by Walid Taha. See band_markov_orig.ml for the original code. We re-write the original code using delimited control rather than mutation, checking the circle-shift restriction. Problem: Given a particular Markov model (defined in detail below) during a transition a state can remain unchanged with probability "p" or move to some close-by state with another given probability, compute p so that there is a 50% chance after 2 transitions that if we start from s0 we will still be in s0. Notes: mkmatrix assumes n greater than 3. *) Trx.init_times ();; (* Abstract data type of matrices *) module type Matrix = sig type el type w type t type trow (* a particular row of the matrix *) val make : int -> int -> el -> t (* make nrows ncols init_val *) val get_row : t -> int -> trow val getj : trow -> int -> el val set : trow -> int -> el -> unit val getij : t -> int -> int -> el val top : (unit -> w) -> w end;; (* One implementation, using Array *) module MArray(EL:sig type el type w end) : Matrix with type el = EL.el and type w = EL.w = struct open Array type el = EL.el type w = EL.w type t = el array array type trow = el array let make = make_matrix let get_row = Array.get let getj = Array.get let set = Array.set let getij a i j = a.(i).(j) let top th = th () end;; (* Another implementation, using delimited continuations and pure-functional maps. Alas, OCaml does not have IntMap. The latter are essentially just as fast as mutable vectors and matrices: the time to access or modify an element is bounded by a constant. In any case, the implementation below does not have to be efficient; it is only needed to work. If it works, the more efficient implementation shall work as well: note the signature ascription and sealing. To match the formalization in the circle-shift paper, we explicitly use only one prompt, as the mark for the `local' heap. In the present example, all matrices have the same element type. For simplicity, we take the advantage of the fact. This is not a restriction as we can always use more than one prompt (i.e., multiple local heaps) or the appropriate universal type encoding. The matrix is still typed, even with the universal encoding of the heap (we would use a phantom type), so typing is not compromised. In the present example, the general approach is not necessary. *) module MShift(EL:sig type el type w end) : Matrix with type el = EL.el and type w = EL.w = struct type matrix_id = int type el = EL.el type w = EL.w module Coll = (* The key is: matrix number * row * col *) Map.Make(struct type t = matrix_id * int * int let compare = Pervasives.compare end) open Delimcc let p = new_prompt () type heap = matrix_id * el Coll.t (* allocation counter * content *) type t = matrix_id type trow = matrix_id * int (* specific row *) let get_row m i = (m,i) let make i j init = (* we disregard init, it never matters in the present application *) shift p (fun k -> function (alloc_cnt,coll) -> k alloc_cnt (succ alloc_cnt,coll)) let set (m,i) j el = shift p (fun k -> function (alloc_cnt,coll) -> let coll' = Coll.add (m,i,j) el coll in k () (alloc_cnt,coll')) let getij m i j : el = shift p (fun k -> function (_,coll) as heap -> k (Coll.find (m,i,j) coll) heap) let getj (m,i) j = getij m i j let top (thunk : unit -> w) = (push_prompt p (fun () -> let v = thunk () in fun heap -> v)) (0,Coll.empty) end;; (**************************************************************************) (* Building a model parameterized by a probability (Unstaged) *) (**************************************************************************) module Unstaged(M: Matrix with type el = float and type w = float) = struct let mkmatrix rows cols p = let last_col = cols - 1 and m = M.make rows cols 0.0 in for i = 0 to rows - 1 do let mi = M.get_row m i in for j = 0 to last_col do (* stay probability is p *) (* choices are max(0,i-5)...min(n-1,i+5), divide remain prob *) let r = rows / 4 in let mn = max 0 (i-r) in let mx = min (rows-1) (i+r) in let rn = mx-mn in let q =(if i=j then p else if (j>=mn)&&(j<=mx) then (1.0 -. p)/.(float rn) else 0.0) in M.set mi j q; done; done; m let rec inner_loop k v m1i m2 j = if k < 0 then v else inner_loop (k - 1) (v +. M.getj m1i k *. (M.getij m2 k j)) m1i m2 j let mmult rows cols m1 m2 m3 = let last_col = cols - 1 and last_row = rows - 1 in for i = 0 to last_row do let m1i = M.get_row m1 i and m3i = M.get_row m3 i in for j = 0 to last_col do M.set m3i j (inner_loop last_row 0.0 m1i m2 j) done; done let ans1 = fun p -> M.top (fun () -> let size = 30 in let m1 = mkmatrix size size p and m2 = mkmatrix size size p in let m3 = M.make size size 0.0 in mmult size size m1 m2 m3; M.getij m3 0 0) end;; (**************************************************************************) (* Staged version *) (**************************************************************************) (* Staged version uses an option type to elimination additions, subtractions, and multiplications that involve a known 0.0 value *) type 'el cde = {cd : 'a. ('a,'el) code};; let ( -..) x y = match (x,y) with (None, None) -> None |(None, Some b) -> Some {cd = .< -. .~b.cd>.} |(Some a, None) -> Some a |(Some a, Some b) -> Some {cd = .< .~(a.cd) -. .~(b.cd)>.} let ( +..) x y = match (x,y) with (None, None) -> None |(None, Some b) -> Some b |(Some a, None) -> Some a |(Some a, Some b) -> Some {cd = .< .~(a.cd) +. .~(b.cd)>.} let ( *..) x y = match (x,y) with (None, None) -> None |(None, Some b) -> None |(Some a, None) -> None |(Some a, Some b) -> Some {cd = .< .~(a.cd) *. .~(b.cd)>.} (* Staged version proper *) let lift x = ..;; let cast_classifier (x : ('a,'el) code) : 'el cde = {cd = Obj.magic x};; module Staged(M: Matrix with type el = float cde option and type w = float cde option) = struct let mkmatrix rows cols p = let last_col = cols - 1 and m = M.make rows cols None in for i = 0 to rows - 1 do let mi = M.get_row m i in for j = 0 to last_col do (* stay probability is p *) (* choices are max(0,i-5)...min(n-1,i+5), divide remain prob *) let r = rows / 4 in let mn = max 0 (i-r) in let mx = min (rows-1) (i+r) in let rn = mx-mn in let q =(if i=j then Some p else if (j>=mn)&&(j<=mx) then Some {cd = .<(1.0 -. .~p.cd)/.(.~(lift (float rn)))>.} else None) in M.set mi j q; done; done; m let rec inner_loop k v m1i m2 j = if k < 0 then v else inner_loop (k - 1) (v +.. M.getj m1i k *.. (M.getij m2 k j)) m1i m2 j let mmult rows cols m1 m2 m3 = let last_col = cols - 1 and last_row = rows - 1 in for i = 0 to last_row do let m1i = M.get_row m1 i and m3i = M.get_row m3 i in for j = 0 to last_col do M.set m3i j (inner_loop last_row None m1i m2 j) done; done let ans = Trx.timenew "ans" (fun () -> . .~( match (M.top (fun () -> let size = 30 in let pcde = cast_classifier .

. in let m1 = mkmatrix size size pcde and m2 = mkmatrix size size pcde in let m3 = M.make size size None in mmult size size m1 m2 m3; M.getij m3 0 0)) with Some q -> q.cd) >.) let ans2 = Trx.timenew "ans2" (fun () -> .! ans) end;; (**************************************************************************) (* Simple Solver *) (**************************************************************************) (* A function that numerically solves for x such that f(x)=y *) let ab x = if x>0.0 then x else -. x;; let rec solve f y x0 y0 x1 y1 e n = let xb = (x0 +. x1)/.2.0 in let yg = f xb in let en = ab(y-.yg) in if (en<=e) then (n+1,xb) else if (yg solve ans1 0.5 0.0 (ans1 0.0) 1.0 (ans1 1.0) (1E-15) 0);; let r1_shift = let module A = Unstaged(MShift(UnstagedEL)) in let ans1 = A.ans1 in Trx.timenew "r1_shift" (fun () -> solve ans1 0.5 0.0 (ans1 0.0) 1.0 (ans1 1.0) (1E-15) 0);; Trx.print_times ();; module StagedEL = struct type el = float cde option type w = float cde option end;; let r2_arr = let module A = Staged(MArray(StagedEL)) in let ans2 = A.ans2 in Trx.timenew "r2_arr" (fun () -> solve ans2 0.5 0.0 (ans2 0.0) 1.0 (ans2 1.0) (1E-15) 0);; Trx.print_times ();; let r2_arr = let module A = Staged(MShift(StagedEL)) in let ans2 = A.ans2 in Trx.timenew "r2_shift" (fun () -> solve ans2 0.5 0.0 (ans2 0.0) 1.0 (ans2 1.0) (1E-15) 0);; Trx.print_times ();; (* Timings on my machine: First, the baseline, band_markov.ml val r1 : int * float = (49, 0.701138422300628505) val r2 : int * float = (49, 0.701138422300628505) __ r1 __________________________ 2x avg= 7.524974E+02 msec __ ans ________________________ 16x avg= 9.165629E+01 msec __ ans2 ______________________ 256x avg= 5.907093E+00 msec __ r2 _______________________ 8192x avg= 2.160367E-01 msec r1 is solving using the unstaged version ans is the time needed to generate for the staged version ans2 is the time needed to compile the staged version r2 is solving using the staged version Notice that the sum of the last three times is still almost an order of magnitude less than the time needed to computer r1. If we just compare r2 to r1, we have a speedup of over 3,500 times. val r1_arr : int * float = (49, 0.701138422300628505) __ r1_arr ______________________ 2x avg= 9.713190E+02 msec We notice that making the model functorial (a functor depending on the Matrix representation) adds a bit of overhead: (9.7-7.5)/7.5, about 30% val r1_shift : int * float = (49, 0.701138422300628505) __ r1_shift ____________________ 1x avg= 3.767327E+04 msec Using delimited continuations _and_ Map to emulate mutable arrays is about 50 times slower. But it gives the same result. Again, the efficiency doesn't matter: because of the sealing, the exact implementation of mutable array does not matter for the user code. Staging, using arrays: val r2_arr : int * float = (49, 0.701138422300628505) __ ans ________________________ 16x avg= 8.059752E+01 msec __ ans2 ______________________ 256x avg= 6.016140E+00 msec __ r2_arr ___________________ 8192x avg= 2.167637E-01 msec Staging, using shift: val r2_arr : int * float = (49, 0.701138422300628505) __ ans _________________________ 2x avg= 8.376485E+02 msec __ ans2 ______________________ 256x avg= 6.023433E+00 msec __ r2_shift _________________ 8192x avg= 2.147052E-01 msec Note that r2_arr and r2_shift are understandably the same. Also notice that r1_shift/r2_shift is about 10^5. That is quite a speed-up... *)