Source code for multibase.converters

from io import BytesIO
from itertools import zip_longest

from baseconv import BaseConverter
from morphys import ensure_bytes


[docs] class BaseStringConverter(BaseConverter):
[docs] def encode(self, bytes): number = int.from_bytes(bytes, byteorder="big", signed=False) return ensure_bytes(super().encode(number))
[docs] def bytes_to_int(self, bytes): length = len(bytes) base = len(self.digits) value = 0 for i, x in enumerate(bytes): value += self.digits.index(chr(x)) * base ** (length - (i + 1)) return value
[docs] def decode(self, bytes): decoded_int = self.bytes_to_int(bytes) # See https://docs.python.org/3.5/library/stdtypes.html#int.to_bytes for more about the magical expression # below decoded_data = decoded_int.to_bytes((decoded_int.bit_length() + 7) // 8, byteorder="big") return decoded_data
[docs] class Base16StringConverter(BaseStringConverter): def __init__(self, digits): super().__init__(digits) self.uppercase = digits.isupper()
[docs] def encode(self, bytes): result = "".join([f"{byte:02x}" for byte in bytes]) if self.uppercase: result = result.upper() return ensure_bytes(result)
[docs] def decode(self, data): # Base16 decode is case-insensitive, normalize to our digits case if isinstance(data, bytes): data_str = data.decode("utf-8") else: data_str = data # Convert to match our digits case if self.uppercase: data_str = data_str.upper() else: data_str = data_str.lower() return super().decode(data_str.encode("utf-8"))
[docs] class BaseByteStringConverter: ENCODE_GROUP_BYTES = 1 ENCODING_BITS = 1 DECODING_BITS = 1 def __init__(self, digits, pad=False): self.digits = digits self.pad = pad def _chunk_with_padding(self, iterable, n, fillvalue=None): "Collect data into fixed-length chunks or blocks" # _chunk_with_padding('ABCDEFG', 3, 'x') --> ABC DEF Gxx" args = [iter(iterable)] * n return zip_longest(*args, fillvalue=fillvalue) def _chunk_without_padding(self, iterable, n): return map("".join, zip(*[iter(iterable)] * n)) def _encode_bytes(self, bytes_, group_bytes, encoding_bits, decoding_bits, output_chars): buffer = BytesIO(bytes_) encoded_bytes = BytesIO() input_length = len(bytes_) while True: byte_ = buffer.read(group_bytes) if not byte_: break # convert all bytes to a binary format and concatenate them into a 24bit string binstringfmt = f"{{:0{encoding_bits}b}}" binstring = "".join([binstringfmt.format(x) for x in byte_]) # break the 24 bit length string into pieces of 6 bits each and convert them to integer digits = (int("".join(x), 2) for x in self._chunk_with_padding(binstring, decoding_bits, "0")) for digit in digits: # convert binary representation to an integer encoded_bytes.write(ensure_bytes(self.digits[digit])) result = encoded_bytes.getvalue() # Add padding if needed (RFC 4648) if self.pad: remainder = input_length % group_bytes if remainder > 0: # For partial groups, we need to pad the output # The padding makes the output length a multiple of output_chars chars_produced = len(result) # Calculate padding needed to reach next multiple of output_chars padding_needed = output_chars - (chars_produced % output_chars) result += ensure_bytes("=" * padding_needed) return result def _decode_bytes(self, bytes_, group_bytes, decoding_bits, encoding_bits): # Remove padding if present if self.pad: bytes_ = bytes_.rstrip(b"=") buffer = BytesIO() decoded_bytes = BytesIO() for byte_ in bytes_.decode(): idx = self.digits.index(byte_) buffer.write(bytes([idx])) buffer.seek(0) while True: byte_ = buffer.read(group_bytes) if not byte_: break # convert all bytes to a binary format and concatenate them into a 8, 16, 24bit string binstringfmt = f"{{:0{decoding_bits}b}}" binstring = "".join([binstringfmt.format(x) for x in byte_]) # break the 24 bit length string into pieces of 8 bits each and convert them to integer digits = [int("".join(x), 2) for x in self._chunk_without_padding(binstring, encoding_bits)] for digit in digits: decoded_bytes.write(bytes([digit])) return decoded_bytes.getvalue()
[docs] def encode(self, bytes): raise NotImplementedError
[docs] def decode(self, bytes): return NotImplementedError
[docs] class Base64StringConverter(BaseByteStringConverter):
[docs] def encode(self, bytes): return self._encode_bytes(ensure_bytes(bytes), 3, 8, 6, 4)
[docs] def decode(self, bytes): return self._decode_bytes(ensure_bytes(bytes), 4, 6, 8)
[docs] class Base32StringConverter(BaseByteStringConverter):
[docs] def encode(self, bytes): return self._encode_bytes(ensure_bytes(bytes), 5, 8, 5, 8)
[docs] def decode(self, bytes): return self._decode_bytes(ensure_bytes(bytes), 8, 5, 8)
[docs] class Base256EmojiConverter: """Base256 emoji encoding using 256 unique emoji characters. This implementation uses the exact same hardcoded emoji alphabet as js-multiformats and go-multibase reference implementations to ensure full compatibility. The alphabet is curated from Unicode emoji frequency data, excluding modifier-based emojis (such as flags) that are bigger than one single code point. """ # Hardcoded emoji alphabet matching js-multiformats and go-multibase # This is the exact same alphabet used in reference implementations # Source: js-multiformats/src/bases/base256emoji.ts and go-multibase/base256emoji.go _EMOJI_ALPHABET = ( "๐Ÿš€๐Ÿชโ˜„๐Ÿ›ฐ๐ŸŒŒ" # Space "๐ŸŒ‘๐ŸŒ’๐ŸŒ“๐ŸŒ”๐ŸŒ•๐ŸŒ–๐ŸŒ—๐ŸŒ˜" # Moon "๐ŸŒ๐ŸŒ๐ŸŒŽ" # Earth "๐Ÿ‰" # Dragon "โ˜€" # Sun "๐Ÿ’ป๐Ÿ–ฅ๐Ÿ’พ๐Ÿ’ฟ" # Computer # Rest from Unicode emoji frequency data (most used first) "๐Ÿ˜‚โค๐Ÿ˜๐Ÿคฃ๐Ÿ˜Š๐Ÿ™๐Ÿ’•๐Ÿ˜ญ๐Ÿ˜˜๐Ÿ‘" "๐Ÿ˜…๐Ÿ‘๐Ÿ˜๐Ÿ”ฅ๐Ÿฅฐ๐Ÿ’”๐Ÿ’–๐Ÿ’™๐Ÿ˜ข๐Ÿค”" "๐Ÿ˜†๐Ÿ™„๐Ÿ’ช๐Ÿ˜‰โ˜บ๐Ÿ‘Œ๐Ÿค—๐Ÿ’œ๐Ÿ˜”๐Ÿ˜Ž" "๐Ÿ˜‡๐ŸŒน๐Ÿคฆ๐ŸŽ‰๐Ÿ’žโœŒโœจ๐Ÿคท๐Ÿ˜ฑ๐Ÿ˜Œ" "๐ŸŒธ๐Ÿ™Œ๐Ÿ˜‹๐Ÿ’—๐Ÿ’š๐Ÿ˜๐Ÿ’›๐Ÿ™‚๐Ÿ’“๐Ÿคฉ" "๐Ÿ˜„๐Ÿ˜€๐Ÿ–ค๐Ÿ˜ƒ๐Ÿ’ฏ๐Ÿ™ˆ๐Ÿ‘‡๐ŸŽถ๐Ÿ˜’๐Ÿคญ" "โฃ๐Ÿ˜œ๐Ÿ’‹๐Ÿ‘€๐Ÿ˜ช๐Ÿ˜‘๐Ÿ’ฅ๐Ÿ™‹๐Ÿ˜ž๐Ÿ˜ฉ" "๐Ÿ˜ก๐Ÿคช๐Ÿ‘Š๐Ÿฅณ๐Ÿ˜ฅ๐Ÿคค๐Ÿ‘‰๐Ÿ’ƒ๐Ÿ˜ณโœ‹" "๐Ÿ˜š๐Ÿ˜๐Ÿ˜ด๐ŸŒŸ๐Ÿ˜ฌ๐Ÿ™ƒ๐Ÿ€๐ŸŒท๐Ÿ˜ป๐Ÿ˜“" "โญโœ…๐Ÿฅบ๐ŸŒˆ๐Ÿ˜ˆ๐Ÿค˜๐Ÿ’ฆโœ”๐Ÿ˜ฃ๐Ÿƒ" "๐Ÿ’โ˜น๐ŸŽŠ๐Ÿ’˜๐Ÿ˜ โ˜๐Ÿ˜•๐ŸŒบ๐ŸŽ‚๐ŸŒป" "๐Ÿ˜๐Ÿ–•๐Ÿ’๐Ÿ™Š๐Ÿ˜น๐Ÿ—ฃ๐Ÿ’ซ๐Ÿ’€๐Ÿ‘‘๐ŸŽต" "๐Ÿคž๐Ÿ˜›๐Ÿ”ด๐Ÿ˜ค๐ŸŒผ๐Ÿ˜ซโšฝ๐Ÿค™โ˜•๐Ÿ†" "๐Ÿคซ๐Ÿ‘ˆ๐Ÿ˜ฎ๐Ÿ™†๐Ÿป๐Ÿƒ๐Ÿถ๐Ÿ’๐Ÿ˜ฒ๐ŸŒฟ" "๐Ÿงก๐ŸŽโšก๐ŸŒž๐ŸŽˆโŒโœŠ๐Ÿ‘‹๐Ÿ˜ฐ๐Ÿคจ" "๐Ÿ˜ถ๐Ÿค๐Ÿšถ๐Ÿ’ฐ๐Ÿ“๐Ÿ’ข๐ŸคŸ๐Ÿ™๐Ÿšจ๐Ÿ’จ" "๐Ÿคฌโœˆ๐ŸŽ€๐Ÿบ๐Ÿค“๐Ÿ˜™๐Ÿ’Ÿ๐ŸŒฑ๐Ÿ˜–๐Ÿ‘ถ" "๐Ÿฅดโ–ถโžกโ“๐Ÿ’Ž๐Ÿ’ธโฌ‡๐Ÿ˜จ๐ŸŒš๐Ÿฆ‹" "๐Ÿ˜ท๐Ÿ•บโš ๐Ÿ™…๐Ÿ˜Ÿ๐Ÿ˜ต๐Ÿ‘Ž๐Ÿคฒ๐Ÿค ๐Ÿคง" "๐Ÿ“Œ๐Ÿ”ต๐Ÿ’…๐Ÿง๐Ÿพ๐Ÿ’๐Ÿ˜—๐Ÿค‘๐ŸŒŠ๐Ÿคฏ" "๐Ÿทโ˜Ž๐Ÿ’ง๐Ÿ˜ฏ๐Ÿ’†๐Ÿ‘†๐ŸŽค๐Ÿ™‡๐Ÿ‘โ„" "๐ŸŒด๐Ÿ’ฃ๐Ÿธ๐Ÿ’Œ๐Ÿ“๐Ÿฅ€๐Ÿคข๐Ÿ‘…๐Ÿ’ก๐Ÿ’ฉ" "๐Ÿ‘๐Ÿ“ธ๐Ÿ‘ป๐Ÿค๐Ÿคฎ๐ŸŽผ๐Ÿฅต๐Ÿšฉ๐ŸŽ๐ŸŠ" "๐Ÿ‘ผ๐Ÿ’๐Ÿ“ฃ๐Ÿฅ‚" ) def __init__(self): # Verify alphabet length if len(self._EMOJI_ALPHABET) != 256: raise ValueError(f"EMOJI_ALPHABET must contain exactly 256 characters, got {len(self._EMOJI_ALPHABET)}") # Create mapping from byte value to emoji character self.byte_to_emoji = {i: self._EMOJI_ALPHABET[i] for i in range(256)} # Create reverse mapping from emoji character to byte value # This matches the approach in js-multiformats and go-multibase self.emoji_to_byte = {emoji: byte for byte, emoji in self.byte_to_emoji.items()}
[docs] def encode(self, bytes_) -> bytes: """Encode bytes to emoji string. :param bytes_: Bytes to encode :type bytes_: bytes or str :return: UTF-8 encoded emoji string :rtype: bytes """ bytes_ = ensure_bytes(bytes_) result = [] for byte_val in bytes_: result.append(self.byte_to_emoji[byte_val]) return "".join(result).encode("utf-8")
[docs] def decode(self, bytes_) -> bytes: """Decode emoji string to bytes. Decodes character-by-character, matching the behavior of js-multiformats and go-multibase reference implementations. Each emoji in the alphabet is a single Unicode code point, so we can safely iterate character by character. :param bytes_: UTF-8 encoded emoji string :type bytes_: bytes or str :return: Decoded bytes :rtype: bytes :raises ValueError: if an invalid emoji character is encountered """ bytes_ = ensure_bytes(bytes_, "utf8") # Decode UTF-8 to get emoji string emoji_str = bytes_.decode("utf-8") result = bytearray() # Iterate character by character (Python string iteration handles # single code point emojis correctly, matching js-multiformats and go-multibase) for char in emoji_str: if char not in self.emoji_to_byte: raise ValueError(f"Non-base256emoji character: {char}") result.append(self.emoji_to_byte[char]) return bytes(result)
[docs] class IdentityConverter:
[docs] def encode(self, x): return x
[docs] def decode(self, x): return x