import hashlib
import string
from typing import List
from cas.accession.base_accession_manager import BaseAccessionManager
[docs]
class HashAccessionManager(BaseAccessionManager):
def __init__(self, accession_prefix=None, digest_size=5):
"""
Initializer.
Params:
accession_prefix: accession_id prefix
digest_size: output hash size
"""
self.accession_prefix = accession_prefix
self.digest_size = digest_size
self.accession_ids = list()
[docs]
def generate_accession_id(
self, id_recommendation: str = None, cell_ids: List = None, labelset: str = None, suppress_warnings=False, cellset_name: str = None
) -> str:
"""
Generates a Blake2b hashing algorithm based hash for the given cell IDs.
Params:
id_recommendation: pre-calculated hash accession recommendation. Returns this value if recommendation is a
valid accession id.
cell_ids: Cell IDs list. Algorithm sorts cell ids internally.
labelset: Labelset name. If provided, uses it as a prefix to the accession id.
suppress_warnings: If True, suppresses warnings.
cellset_name: this parameter is not utilized in this implementation.
Return: accession_id
"""
if id_recommendation and labelset and ":" not in id_recommendation:
id_recommendation = labelset + ":" + id_recommendation
if is_hash_accession(id_recommendation):
return id_recommendation
if not cell_ids:
raise Exception("Cell IDs list is empty.")
blake_hasher = hashlib.blake2b(
str.encode(" ".join(sorted(cell_ids))), digest_size=self.digest_size
)
accession_id = blake_hasher.hexdigest()
if labelset:
accession_id = labelset + ":" + accession_id
if accession_id in self.accession_ids:
if not suppress_warnings:
print("ERROR: Hash ID conflict occurred: " + accession_id)
# raise Exception("Hash ID conflict occurred: " + accession_id)
else:
self.accession_ids.append(accession_id)
return accession_id
[docs]
def is_hash_accession(accession_id: str):
"""
Checks if the given accession is a valid hash accession. Hash accessions are 10 char long and only has hexdigits
Args:
accession_id: accession to check
Returns: True if value is a valid hash accession id, false otherwise.
"""
if not accession_id:
return False
hash_part = accession_id.split(":")[-1]
return len(hash_part) == 10 and all(c in string.hexdigits for c in hash_part)