Correction de Shannon, Run-Length Encoding et Lempel-Ziv-Welsh

from math import log
from collections import Counter
import sys

s1 = "CODAGEINFORMATIONETCALCUL"
s2 = "TOBEORNOTTOBEORTOBEORNOT"

# print(len(s1) * 8)
# print(len(s2) * 8)

# sys.exit()

def entropy(p):
    return -sum(pi * log(pi, 2) for pi in p)

"""print(entropy([0.5, 0.5]))
print(entropy([0.4, 0.6]))
print(entropy([0.01, 0.99]))
print(entropy([0.99, 0.01]))"""

# sys.exit()

def string_entropy(s):
    n = len(s)
    c = Counter(s)
    p = []
    for nb_occ in c.values():
        p.append(nb_occ / n)
    return entropy(p)

# print(Counter("coucou"))

# print(string_entropy(s1), 'bits pour', s1)
# print(string_entropy(s2), 'bits pour', s2)

# sys.exit()

img1 = ["11111111",
        "11111111",
        "00000000",
        "00000000",
        "11111111",
        "11111111",
        "00000000",
        "00000000"]

# sys.exit()

img2 = [("1" * 4 + "0" * 4) * 8]

# Comment couper une liste
l = list(range(20))
print(l)
print(l[5:15:2])
#print(l[a:b:-1])
print(l[:-1])
print(l[::-1])  # Inverser

# sys.exit()

img3 = ["10" * 4, "01" * 4, "0" * 16, "1" * 16, "0" * 16]

print(bin(1)[2:].zfill(3))
# sys.exit()

def binary(n):
    return bin(n - 1)[2:].zfill(3)  # Rempli avec des 0 à gauche

def rle(img):
    seq = ''.join(img)
    n = len(seq)
    current = ''
    nb_occ = 0
    encoding = ""
    for i in range(n):
        if seq[i] != current or nb_occ == 8:
            if current:
                # print(current, nb_occ)
                encoding += current + binary(nb_occ)
            current = seq[i]
            nb_occ = 1
        else:
            nb_occ += 1
    encoding += current + binary(nb_occ)
    return encoding

print(rle(img1), len(rle(img1)))
print(rle(img2), len(rle(img2)))
print(rle(img3), len(rle(img3)))

# sys.exit()

new_img3 = ["2" * 4 + "1" * 4 + "0" * 8 + "3" * 8 + "0" * 8]

# print(rle(new_img3), len(rle(new_img3)))

N = 1000000
freq = [0.45, 0.13, 0.12, 0.16, 0.09, 0.05]
length = [1, 3, 3, 3, 4, 4]
# print(sum(freq[i] * length[i] * N for i in range(6)) / (3 * N))
print('huffman', sum(freq[i] * length[i] for i in range(6)))

print('shannon', entropy(freq))

def lzw(s):
    dico = {}
    # Initialize
    for value in range(255):
        dico[chr(value)] = value
    new_value = 256
    acc = ""
    encoding = []
    for letter in s:
        acc += letter
        if acc not in dico:
            dico[acc] = new_value
            this_code = dico[acc[:-1]]
            new_value += 1
            encoding.append(this_code)
            acc = acc[-1]
    this_code = dico[acc]
    for letter, v in dico.items():
        if v > 255:
            print(letter, v)
    encoding.append(this_code)
    return encoding

encoding = lzw("TOBEORNOTTOBEORTOBEORNOT")
print(encoding)
print(''.join(list(map(chr, encoding))))

# sys.exit()

def decode_lzw(encoding):
    dico = {}
    # Initialize
    for value in range(255):
        dico[value] = chr(value)
    new_value = 256
    plain = ""
    last_output = ""
    for code in encoding:
        if code in dico:
            output = dico[code]
            plain += output
            if last_output:
                dico[new_value] = last_output + output[0]
                new_value += 1
        last_output = output
    return plain

print(decode_lzw([84, 79, 66, 69, 79, 82, 78, 79, 84, 256, 258, 260, 265, 259, 261, 263]))