Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,7 @@ This category also contains `ascii85`, `adobe`, `[x]btoa`, `zeromq` with the `ba
- [X] `rotN`: aka Caesar cipher (*N* belongs to [1,25])
- [X] `scytaleN`: encrypts using the number of letters on the rod (*N* belongs to [1,[)
- [X] `shiftN`: shift ordinals (*N* belongs to [1,255])
- [X] `vigenere`: aka Vigenere Cipher
- [X] `xorN`: XOR with a single byte (*N* belongs to [1,255])

> :warning: Crypto functions are of course definitely **NOT** encoding functions ; they are implemented for leveraging the `.encode(...)` API from `codecs`.
Expand Down
18 changes: 18 additions & 0 deletions docs/pages/enc/crypto.md
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,24 @@ This is a dynamic encoding, that is, it can be called with an integer to define

-----

### Vigenere Cipher

This is a dynamic encoding, that is, it holds the key. There is no default key, meaning that `vigenere` as the encoding scheme throws a `LookupError` indicating that the _key must be a non-empty alphabetic string_.

**Codec** | **Conversions** | **Aliases** | **Comment**
:---: | :---: | --- | ---
`vigenere` | text <-> Vigenere ciphertext | `vigenere-abcdef`, `vigenere_MySuperSecret` | key only consists of characters, not digits

```python
>>> codext.encode("This is a test !", "vigenere-abababa")
'Tiit it a tfsu !'
>>> codext.encode("This is a test !", "vigenere_MySuperSecret")
'Ffam xw r liuk !'
>>> codext.decode("Tiit it a tfsu !", "vigenere-abababa")
```

-----

### XOR with 1 byte

This is a dynamic encoding, that is, it can be called with an integer to define the ordinal of the byte to XOR with the input text.
Expand Down
2 changes: 2 additions & 0 deletions src/codext/crypto/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# -*- coding: UTF-8 -*-
from .affine import *
from .atbash import *
from .bazeries import *
from .bacon import *
from .barbie import *
from .citrix import *
Expand All @@ -9,5 +10,6 @@
from .rot import *
from .scytale import *
from .shift import *
from .vigenere import *
from .xor import *

177 changes: 177 additions & 0 deletions src/codext/crypto/bazeries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
# -*- coding: UTF-8 -*-
"""Bazeries Cipher Codec - bazeries content encoding.

The Bazeries cipher is an encryption system created by Étienne Bazeries that combines
two Polybius grids (5×5 square arrays of letters) and a transposition based on a
numeric key. The plaintext is split into groups whose sizes are the digits of the key,
each group is reversed, and then a substitution is applied by mapping each letter's
position in the first (standard) Polybius square to the same position in the second
(key-based) Polybius square. When the key is a keyword instead of a number, the
lengths of the words in the keyword are used as group sizes.

This codec:
- en/decodes strings from str to str
- en/decodes strings from bytes to bytes
- decodes file content to str (read)
- encodes file content from str to bytes (write)

Reference: https://www.dcode.fr/bazeries-cipher
"""
from ..__common__ import *


__examples__ = {
'enc(bazeries-137)': {'HELLO': 'TSSUB', 'ATTACK': 'OOLLYE'},
'dec(bazeries-137)': {'TSSUB': 'HELLO', 'OOLLYE': 'ATTACK'},
}
__guess__ = ["bazeries-137"]


_DEFAULT_KEY = "137"
# Standard 5×5 Polybius square alphabet (I and J share the same cell)
_DEFAULT_ALPHABET = "ABCDEFGHIKLMNOPQRSTUVWXYZ"

_ONES = ["", "ONE", "TWO", "THREE", "FOUR", "FIVE", "SIX", "SEVEN", "EIGHT", "NINE",
"TEN", "ELEVEN", "TWELVE", "THIRTEEN", "FOURTEEN", "FIFTEEN", "SIXTEEN",
"SEVENTEEN", "EIGHTEEN", "NINETEEN"]
_TENS = ["", "", "TWENTY", "THIRTY", "FORTY", "FIFTY", "SIXTY", "SEVENTY", "EIGHTY", "NINETY"]


def _num_to_words(n):
""" Convert a non-negative integer to its English word representation (uppercase). """
if n == 0:
return "ZERO"
if n < 20:
return _ONES[n]
if n < 100:
rest = n % 10
return (_TENS[n // 10] + (" " + _ONES[rest] if rest else "")).strip()
if n < 1000:
rest = n % 100
return (_ONES[n // 100] + " HUNDRED" + (" " + _num_to_words(rest) if rest else "")).strip()
if n < 1_000_000:
rest = n % 1000
return (_num_to_words(n // 1000) + " THOUSAND" + (" " + _num_to_words(rest) if rest else "")).strip()
if n < 1_000_000_000:
rest = n % 1_000_000
return (_num_to_words(n // 1_000_000) + " MILLION" + (" " + _num_to_words(rest) if rest else "")).strip()
rest = n % 1_000_000_000
return (_num_to_words(n // 1_000_000_000) + " BILLION" + (" " + _num_to_words(rest) if rest else "")).strip()


def _parse_key(key):
""" Parse the key into (phrase, group_sizes).

For a numeric key, it is written in English words to build the phrase, and its
individual non-zero digits form the group sizes for transposition.
For a keyword, the key itself is the phrase and word lengths are the group sizes.
"""
if not key:
key = _DEFAULT_KEY
key_str = str(key).upper().replace("-", " ").replace("_", " ").strip()
if key_str.replace(" ", "").isdigit():
n = int(key_str.replace(" ", ""))
phrase = _num_to_words(n)
digits = [int(d) for d in str(n) if d != '0']
if not digits:
digits = [1]
else:
phrase = key_str
digits = [len(w) for w in key_str.split() if w]
if not digits:
digits = [1]
return phrase, digits


def _build_key_alphabet(phrase):
""" Build a 25-character cipher alphabet from the key phrase for the second Polybius square.

Letters appear in the order they first occur in the phrase (with J merged into I),
followed by the remaining letters of the standard alphabet.
"""
seen = []
for c in phrase.upper():
if c == 'J':
c = 'I'
if c.isalpha() and c not in seen:
seen.append(c)
for c in _DEFAULT_ALPHABET:
if c not in seen:
seen.append(c)
return "".join(seen)


def _build_squares(key_alphabet):
""" Build position maps and lookup maps for the two 5×5 Polybius squares.

Returns (sq1_pos, sq2_pos, sq1_lkp, sq2_lkp) where:
- sq1_pos / sq2_pos map a letter to its (row, col) 1-indexed coordinate
- sq1_lkp / sq2_lkp map a (row, col) coordinate to its letter
"""
alph1 = _DEFAULT_ALPHABET
alph2 = key_alphabet
sq1_pos = {alph1[i]: (i // 5 + 1, i % 5 + 1) for i in range(25)}
sq2_pos = {alph2[i]: (i // 5 + 1, i % 5 + 1) for i in range(25)}
sq1_lkp = {(i // 5 + 1, i % 5 + 1): alph1[i] for i in range(25)}
sq2_lkp = {(i // 5 + 1, i % 5 + 1): alph2[i] for i in range(25)}
# J shares the cell with I in both squares
sq1_pos['J'] = sq1_pos['I']
sq2_pos['J'] = sq2_pos['I']
return sq1_pos, sq2_pos, sq1_lkp, sq2_lkp


def _transpose(chars, digits):
""" Split chars into consecutive groups of sizes given by digits (cycling) and reverse each group. """
result, i, grp_idx = [], 0, 0
while i < len(chars):
size = digits[grp_idx % len(digits)]
grp_idx += 1
group = chars[i:i + size]
result.extend(reversed(group))
i += size
return result


def bazeries_encode(key=""):
phrase, digits = _parse_key(key)
key_alph = _build_key_alphabet(phrase)
sq1_pos, sq2_pos, sq1_lkp, sq2_lkp = _build_squares(key_alph)

def encode(text, errors="strict"):
_h = handle_error("bazeries", errors)
alpha = [('I' if c == 'J' else c) for c in ensure_str(text).upper() if c.isalpha()]
transposed = _transpose(alpha, digits)
result = []
for pos, c in enumerate(transposed):
if c in sq1_pos:
result.append(sq2_lkp[sq1_pos[c]])
else:
result.append(_h(c, pos, "".join(result)))
r = "".join(result)
return r, len(text)
return encode


def bazeries_decode(key=""):
phrase, digits = _parse_key(key)
key_alph = _build_key_alphabet(phrase)
sq1_pos, sq2_pos, sq1_lkp, sq2_lkp = _build_squares(key_alph)

def decode(text, errors="strict"):
_h = handle_error("bazeries", errors, decode=True)
alpha = [c for c in ensure_str(text).upper() if c.isalpha()]
sub = []
for pos, c in enumerate(alpha):
if c in sq2_pos:
sub.append(sq1_lkp[sq2_pos[c]])
else:
sub.append(_h(c, pos, "".join(sub)))
result = _transpose(sub, digits)
r = "".join(result)
return r, len(text)
return decode


add("bazeries", bazeries_encode, bazeries_decode,
r"^bazeries(?:[-_](.+))?$",
printables_rate=1., expansion_factor=1.)
65 changes: 65 additions & 0 deletions src/codext/crypto/vigenere.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# -*- coding: UTF-8 -*-
"""Vigenere Cipher Codec - vigenere content encoding.

This codec:
- en/decodes strings from str to str
- en/decodes strings from bytes to bytes
- decodes file content to str (read)
- encodes file content from str to bytes (write)
"""
from string import ascii_lowercase as LC, ascii_uppercase as UC

from ..__common__ import *


__examples__ = {
'enc(vigenere)': None,
'enc(vigenere-lemon)': {'ATTACKATDAWN': 'LXFOPVEFRNHR'},
'enc(vigenere-key)': {'hello': 'rijvs'},
'enc(vigenère_key)': {'Hello World': 'Rijvs Uyvjn'},
'enc-dec(vigenere-secret)': ['hello world', 'ATTACK AT DAWN', 'Test 1234!'],
}
__guess__ = ["vigenere-key", "vigenere-secret", "vigenere-password"]


__char = lambda c, k, i, d=False: (LC if (b := c in LC) else UC)[(ord(c) - ord("Aa"[b]) + \
[1, -1][d] * (ord(k[i % len(k)]) - ord('a'))) % 26]


def __check(key):
key = key.lower()
if not key or not key.isalpha():
raise LookupError("Bad parameter for encoding 'vigenere': key must be a non-empty alphabetic string")
return key


def vigenere_encode(key):
def encode(text, errors="strict"):
result, i, k = [], 0, __check(key)
for c in ensure_str(text):
if c in LC or c in UC:
result.append(__char(c, k, i))
i += 1
else:
result.append(c)
r = "".join(result)
return r, len(r)
return encode


def vigenere_decode(key):
def decode(text, errors="strict"):
result, i, k = [], 0, __check(key)
for c in ensure_str(text):
if c in LC or c in UC:
result.append(__char(c, k, i, True))
i += 1
else:
result.append(c)
r = "".join(result)
return r, len(r)
return decode


add("vigenere", vigenere_encode, vigenere_decode, r"vigen[eè]re(?:[-_]cipher)?(?:[-_]([a-zA-Z]+))?$", penalty=.1)

Loading