Pseudonymization and Depseudonymization
generate keys
[ ]:
%%bash
logprep pseudo generate -f ./analyst 1024
logprep pseudo generate -f ./depseudo 2048
pseudonymize a string
[ ]:
%%bash
logprep pseudo pseudonymize ./analyst.crt ./depseudo.crt mystring
depseudonymize the string
[ ]:
%%bash
logprep pseudo depseudonymize ./analyst.key ./depseudo.key < cyphertext from above >
Pseudonymizer Processor
[ ]:
import json
import uuid
from logprep.factory import Factory
from logprep.util.time import TimeParser
from logprep.ng.event.log_event import LogEvent
from logprep.ng.event.event_state import EventStateType
import logging
import sys
# Configure logging
logging.basicConfig(level=logging.DEBUG, stream=sys.stdout)
document = {
"id": f"{uuid.uuid4()}",
"@timestamp": str(TimeParser.now()),
"user": {
"name": "Hubert K. Kabal",
"email": "kabal@example.com",
"id": 12345,
},
}
event = LogEvent(document, original=b"", state=EventStateType.RECEIVED)
print(f"Event before processing: {json.dumps(event.data, indent=2)}")
# Pseudonymization
config = {
"almighty pseudonymizer": {
"type": "ng_pseudonymizer",
"pubkey_analyst": "./analyst.crt",
"pubkey_depseudo": "./depseudo.crt",
"regex_mapping": "../../../../../examples/exampledata/rules/pseudonymizer/regex_mapping.yml",
"hash_salt": "a_secret_tasty_ingredient",
"outputs": [
{"opensearch": "pseudonyms"}
],
"rules": [
{
"filter": "*",
"pseudonymizer": {
"mapping": {
"user.name": "RE_WHOLE_FIELD",
}
}
}
],
"max_cached_pseudonyms": 1000000
}
}
processor = Factory.create(config)
processor.setup()
processor.process(event)
[ ]:
print(f"Event after processing: {json.dumps(event.data, indent=2)}")
print(f"{len(event.extra_data)=}")
print(f"Event extra data: {json.dumps(event.extra_data[0].data, indent=2)}")