Неправильный процесс при переводе MD5 в Python - PullRequest
0 голосов
/ 28 февраля 2020

Вот моя реализация хеширования MD5:

md5.py

from math import floor, sin, fabs
import struct
from enum import Enum
from bitarray import bitarray


def rotate_left(x, c):
    return (x << c) or (x >> (32-c))


def modular_add(a, b):
    return (a + b) % pow(2, 32)


# Note: All variables are unsigned 32 bit and wrap modulo 2^32 when calculating
s = [None] * 64
K = [None] * 64
i = 0


class MD5Buffer(Enum):
    # Initialize variables:
    a0 = 0x67452301
    b0 = 0xefcdab89
    c0 = 0x98badcfe
    d0 = 0x10325476


string = "The quick brown fox jumps over the lazy dog"
buffers = {
    MD5Buffer.a0: None,
    MD5Buffer.b0: None,
    MD5Buffer.c0: None,
    MD5Buffer.d0: None,
}

# s specifies the per-round shift amounts
s[0:16] = [7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22]
s[16:32] = [5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20]
s[32:48] = [4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23]
s[48:64] = [6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21]

# Use binary integer part of the sines of integers (Radians) as constants:
K = [floor(pow(2, 32) * fabs(sin(i + 1))) for i in range(64)]

# Convert the string to a bit array.
bit_array = bitarray(endian="big")
bit_array.frombytes(string.encode("utf-8"))

# append "1" bit to message
bit_array.append(1)

# append "0" bit until message length in bits ≡ 448 (mod 512)
while bit_array.length() % 512 != 448:
    bit_array.append(0)

# Transform the bit array to little endian
temp_array = bitarray(bit_array, endian="little")

# append original length in bits mod 264 to message
# Extend the result from step 1 with a 64-bit little endian
length = (len(string) * 8) % pow(2, 64)
length_bit_array = bitarray(endian="little")
length_bit_array.frombytes(struct.pack("<Q", int(length)))
temp_array.extend(length_bit_array)
bit_array = temp_array

# Initialize the buffers to their default values.
for buffer_type in buffers.keys():
    buffers[buffer_type] = buffer_type.value


# The total number of 32-bit words to process, N, is always a multiple of 16.
N = len(bit_array) // 32

# Process the message in successive 512-bit chunks:
for chunk_index in range(N // 16):
    # break chunk into sixteen 32-bit words M[j], 0 ≤ j ≤ 15
    # Convert the `bitarray` objects to integers.
    start = chunk_index * 512
    M = [bit_array[start + (x * 32): start + (x * 32) + 32] for x in range(16)]
    M = [int.from_bytes(word.tobytes(), byteorder="little") for word in M]

    # Initialize hash value for this chunk
    A = buffers[MD5Buffer.a0]
    B = buffers[MD5Buffer.b0]
    C = buffers[MD5Buffer.c0]
    D = buffers[MD5Buffer.d0]

    # Main loop
    # (four rounds with 16 operations each)
    for i in range(64):
        F = 0
        g = 0
        if 0 <= i <= 15:
            F = (B and C) or ((not B) and D)
            g = i
        elif 16 <= i <= 31:
            F = (D and B) or ((not D) and C)
            g = (5*i + 1) % 16
        elif 32 <= i <= 47:
            F = B ^ C ^ D
            g = (3*i + 5) % 16
        elif 48 <= i <= 63:
            F = C ^ (B or (not D))
            g = (7*i) % 16

        # Be wary of the below definitions of a,b,c,d
        # M[g] must be a 32-bits block

        # The MD5 algorithm uses modular addition. Note that we need a
        # temporary variable here. If we would put the result in `A`, then
        # the expression `A = D` below would overwrite it. We also cannot
        # move `A = D` lower because the original `D` would already have
        # been overwritten by the `D = C` expression.
        F = modular_add(F, F)
        F = modular_add(F, A)
        F = modular_add(F, K[i])
        F = modular_add(F, M[g])

        A = D
        D = C
        C = B
        B = B + rotate_left(F, s[i])

    # Add this chunk's hash to result so far:
    buffers[MD5Buffer.a0] = modular_add(buffers[MD5Buffer.a0], A)
    buffers[MD5Buffer.b0] = modular_add(buffers[MD5Buffer.b0], B)
    buffers[MD5Buffer.c0] = modular_add(buffers[MD5Buffer.c0], C)
    buffers[MD5Buffer.d0] = modular_add(buffers[MD5Buffer.d0], D)


# convert buffers to little endian
A1 = struct.unpack("<I", struct.pack(">I", buffers[MD5Buffer.a0]))[0]
B1 = struct.unpack("<I", struct.pack(">I", buffers[MD5Buffer.b0]))[0]
C1 = struct.unpack("<I", struct.pack(">I", buffers[MD5Buffer.c0]))[0]
D1 = struct.unpack("<I", struct.pack(">I", buffers[MD5Buffer.d0]))[0]

print(f"{format(A1, '08x')}{format(B1, '08x')}{format(C1, '08x')}{format(D1, '08x')}")

output

(input: "The quick brown fox jumps over the lazy dog")
9a035e7e224496f4977523d40f6ddb31

Так же, как я думал, что все идет хорошо, я обнаружил, что мои результаты не совпадают с теми, что были предварительно предоставлены на этом сайте Википедии , то есть на странице, где я смоделировал свой код после.

Может кто-нибудь сказать мне, где я сбился с пути? Любая помощь приветствуется.

...