def sort_rename(sequence):
    last_value = None
    last_index = None
    cluster_index = 0
    name = [None] * len(sequence)
    pos = {}
    # dodajemy indeks oraz sortujemy elementy
    for value, index in sorted([(e,i) for i,e in enumerate(sequence)]):
        # klastrujemy elementy
        if(last_value and last_value != value):
            cluster_index += 1
            pos[cluster_index] = index

        # uzupełniamy tablicę nazw
        name[index] = cluster_index
        if last_value is None:
            pos[0] = index
        last_value, last_index = value, index
    return (name, pos)


sort_rename([(1,2), (3,1), (2,2), (1,1), (2,3), (1,2)])

([1, 4, 2, 0, 3, 1], {0: 3, 1: 0, 2: 2, 3: 4, 4: 1})


from random import randint


size = 10
max_value = 5
a = [randint(0,max_value-1) for i in range(size)]
print(a)
count = [0 for i in range(max_value)]
for i in a:
    count[i] += 1
print(count)
total = 0
for v in range(max_value):
    count[v], total = total, count[v] + total
print(count)
indices = []
for i in a:
    indices.append(count[i])
    count[i] += 1
print(indices)
result = [0 for i in range(size)]
for i in range(len(a)):
    result[indices[i]] = a[i]
    
print(result)

[0, 3, 2, 3, 3, 3, 0, 0, 1, 1]
[3, 2, 1, 4, 0]
[0, 3, 5, 6, 10]
[0, 6, 5, 7, 8, 9, 1, 2, 3, 4]
[0, 0, 0, 1, 1, 2, 3, 3, 3, 3]


import math

def kmr(text):
    original_length = len(text)
    factor = math.floor(math.log2(len(text)))
    max_lenght = 2 ** factor
    padding_lenght = 2 ** (factor + 1) - 1 - original_length
    text += "z" * padding_lenght

    name, pos = sort_rename(list(text))
    names = {1: name}
    entries = {1: pos}
    for i in range(1, factor):
        power = 2 ** (i - 1)
        new_sequence = []
        for j in range(len(text)):
            if(j+power < len(names[power])):
                new_sequence.append((names[power][j], names[power][j+power]))
        name, pos = sort_rename(new_sequence)
        names[power * 2] = name
        entries[power * 2] = pos
    return (names, entries)


text = "abaabbaa"
names, entries = kmr(text)

print("names:")
for k,v in names.items():
    print(k, [e+1 for e in v[:len(text)]])
    
print("\npositions:")
for k,v in entries.items():
    print(k, [v[e]+1 for e in range(len(v)-1)])
    strings = [text[v[e]:v[e]+k] for e in range(len(v) -1)]
    print(" ", [ s + "z" * (k-len(s)) for s in strings] )

names:
1 [1, 2, 1, 1, 2, 2, 1, 1]
2 [2, 4, 1, 2, 5, 4, 1, 3]
4 [3, 6, 1, 4, 8, 7, 2, 5]

positions:
1 [1, 2]
  ['a', 'b']
2 [3, 1, 8, 2, 5]
  ['aa', 'ab', 'az', 'ba', 'bb']
4 [3, 7, 1, 4, 8, 2, 6, 5]
  ['aabb', 'aazz', 'abaa', 'abba', 'azzz', 'baab', 'baaz', 'bbaa']


print(regular_pairs(50))

[(0, 49), (0, 32, 49), (0, 16, 32, 48, 49), (0, 8, 16, 24, 32, 40, 48, 49), (0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 49), (0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 49), (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49)]

Alogrytm Karpa-Millera-Rosenberga (KMR)¶

dr inż. Aleksander Smywiński-Pohl¶

konsultacje: pt. 16:30 - 17:30¶

Etykietowanie (numerowanie) ciągu¶

$k$-ekwiwalecja¶

Pozycje 1 i 4 są 5-ekwiwalentne.¶

Pozycje 2 i 5 są 4-ekwiwalentne.¶

Pozycje 3 i 6 są 3-ekwiwalentne.¶

Tablica etykiet¶

Tablica odesłań¶

Słownik podstawowych składowych¶

Wyszukiwanie w oparciu o DBF¶

Przykład¶

sort-rename¶

Algorytm KMR¶

Złożoność obliczeniowa KMR¶

Co z składowymi o długościach nie będących potęgą 2?¶

Zastosowanie¶

Ad. 1,2¶

Ad. 2¶