type 'a arbre = Feuille of 'a | Noeud of 'a arbre * 'a arbre;;
(* Question 2 *)
let nb_occurrences phrase =
let n = String.length phrase in
let occ = Array.make 256 0 in
for i = 0 to n-1 do
occ.(Char.code phrase.[i]) <- occ.(Char.code phrase.[i]) + 1
done;
occ
;;
(* Question 3 *)
let rec split g d = function
| [] -> g, d
| [x] -> x::g, d
| h :: i :: t -> split (h::g) (i::d) t
;;
let rec merge compare a b = match a, b with
| [], l | l, [] -> l
| h :: t, i :: _ when compare h i -> h :: merge compare t b
| _, h :: t -> h :: merge compare a t
;;
let rec merge_sort compare = function
| [] -> []
| [x] -> [x]
| l -> let g, d = split [] [] l in
merge compare (merge_sort compare g) (merge_sort compare d)
;;
(* Question 4 *)
let list_of_occ occ =
let rec list_of_occ i =
if i >= 256 then []
else if occ.(i) > 0 then
(Feuille (Char.chr i), occ.(i)) :: list_of_occ (i+1)
else list_of_occ (i+1)
in
list_of_occ 0
;;
(* Question 5 *)
let compare (_, a) (_, b) = a < b
;;
(* Question 6 *)
let rec insert compare e = function
| [] -> [e]
| h :: t when compare h e -> h :: insert compare e t
| l -> e :: l
;;
let rec arbre_huffman = function
| [] -> invalid_arg "arbre_huffman"
| [a, p] -> a
| (a1, p1) :: (a2, p2) :: t -> arbre_huffman (insert compare (Noeud(a1, a2), p1+p2) t)
;;
(* Question 7 *)
let table_huffman arbre =
let rec table_huffman chemin acc = function
| Feuille c -> (c, chemin) :: acc
| Noeud(a, b) -> table_huffman (chemin^"0") (table_huffman (chemin^"1") acc b) a
in
table_huffman "" [] arbre
;;
(* Question 8 *)
let encode phrase =
let n = String.length phrase in
let arbre = arbre_huffman (merge_sort compare (list_of_occ (nb_occurrences phrase))) in
let table = table_huffman arbre in
let rec encode acc i =
if i >= n then acc
else encode (acc^(List.assoc phrase.[i] table)) (i+1)
in
encode "" 0, arbre
;;
(* Question 9 *)
let decode message arbre =
let n = String.length message in
let rec decode acc i = function
| Feuille c ->
if i >= n then acc^(String.make 1 c)
else decode (acc^(String.make 1 c)) i arbre
| Noeud(a, b) ->
if message.[i] = '0' then
decode acc (i+1) a
else if message.[i] = '1' then
decode acc (i+1) b
else invalid_arg "decode"
in
decode "" 0 arbre
;;