2
0
Fork 0

Type annotations for bookwyrm.isbn

This commit is contained in:
Joeri de Ruiter 2023-08-21 15:46:50 +02:00
parent 3760e3b45c
commit f6d8786179
3 changed files with 69 additions and 19 deletions

View file

@ -1,11 +1,20 @@
""" Use the range message from isbn-international to hyphenate ISBNs """ """ Use the range message from isbn-international to hyphenate ISBNs """
import os import os
from typing import Optional
from xml.etree import ElementTree from xml.etree import ElementTree
from xml.etree.ElementTree import Element
import requests import requests
from bookwyrm import settings from bookwyrm import settings
def _get_rules(element: Element) -> list[Element]:
if (rules_el := element.find("Rules")) is not None:
return rules_el.findall("Rule")
return []
class IsbnHyphenator: class IsbnHyphenator:
"""Class to manage the range message xml file and use it to hyphenate ISBNs""" """Class to manage the range message xml file and use it to hyphenate ISBNs"""
@ -15,58 +24,94 @@ class IsbnHyphenator:
) )
__element_tree = None __element_tree = None
def update_range_message(self): def update_range_message(self) -> None:
"""Download the range message xml file and save it locally""" """Download the range message xml file and save it locally"""
response = requests.get(self.__range_message_url) response = requests.get(self.__range_message_url)
with open(self.__range_file_path, "w", encoding="utf-8") as file: with open(self.__range_file_path, "w", encoding="utf-8") as file:
file.write(response.text) file.write(response.text)
self.__element_tree = None self.__element_tree = None
def hyphenate(self, isbn_13): def hyphenate(self, isbn_13: Optional[str]) -> Optional[str]:
"""hyphenate the given ISBN-13 number using the range message""" """hyphenate the given ISBN-13 number using the range message"""
if isbn_13 is None: if isbn_13 is None:
return None return None
if self.__element_tree is None: if self.__element_tree is None:
self.__element_tree = ElementTree.parse(self.__range_file_path) self.__element_tree = ElementTree.parse(self.__range_file_path)
gs1_prefix = isbn_13[:3] gs1_prefix = isbn_13[:3]
reg_group = self.__find_reg_group(isbn_13, gs1_prefix) reg_group = self.__find_reg_group(isbn_13, gs1_prefix)
if reg_group is None: if reg_group is None:
return isbn_13 # failed to hyphenate return isbn_13 # failed to hyphenate
registrant = self.__find_registrant(isbn_13, gs1_prefix, reg_group) registrant = self.__find_registrant(isbn_13, gs1_prefix, reg_group)
if registrant is None: if registrant is None:
return isbn_13 # failed to hyphenate return isbn_13 # failed to hyphenate
publication = isbn_13[len(gs1_prefix) + len(reg_group) + len(registrant) : -1] publication = isbn_13[len(gs1_prefix) + len(reg_group) + len(registrant) : -1]
check_digit = isbn_13[-1:] check_digit = isbn_13[-1:]
return "-".join((gs1_prefix, reg_group, registrant, publication, check_digit)) return "-".join((gs1_prefix, reg_group, registrant, publication, check_digit))
def __find_reg_group(self, isbn_13, gs1_prefix): def __find_reg_group(self, isbn_13: str, gs1_prefix: str) -> Optional[str]:
for ean_ucc_el in self.__element_tree.find("EAN.UCCPrefixes").findall( if self.__element_tree is None:
"EAN.UCC" self.__element_tree = ElementTree.parse(self.__range_file_path)
):
if ean_ucc_el.find("Prefix").text == gs1_prefix: ucc_prefixes_el = self.__element_tree.find("EAN.UCCPrefixes")
for rule_el in ean_ucc_el.find("Rules").findall("Rule"): if ucc_prefixes_el is None:
length = int(rule_el.find("Length").text) return None
for ean_ucc_el in ucc_prefixes_el.findall("EAN.UCC"):
if (
prefix_el := ean_ucc_el.find("Prefix")
) is not None and prefix_el.text == gs1_prefix:
for rule_el in _get_rules(ean_ucc_el):
length_el = rule_el.find("Length")
if length_el is None:
continue
length = int(text) if (text := length_el.text) else 0
if length == 0: if length == 0:
continue continue
reg_grp_range = [
int(x[:length]) for x in rule_el.find("Range").text.split("-") range_el = rule_el.find("Range")
] if range_el is None or range_el.text is None:
continue
reg_grp_range = [int(x[:length]) for x in range_el.text.split("-")]
reg_group = isbn_13[len(gs1_prefix) : len(gs1_prefix) + length] reg_group = isbn_13[len(gs1_prefix) : len(gs1_prefix) + length]
if reg_grp_range[0] <= int(reg_group) <= reg_grp_range[1]: if reg_grp_range[0] <= int(reg_group) <= reg_grp_range[1]:
return reg_group return reg_group
return None return None
return None return None
def __find_registrant(self, isbn_13, gs1_prefix, reg_group): def __find_registrant(
self, isbn_13: str, gs1_prefix: str, reg_group: str
) -> Optional[str]:
from_ind = len(gs1_prefix) + len(reg_group) from_ind = len(gs1_prefix) + len(reg_group)
for group_el in self.__element_tree.find("RegistrationGroups").findall("Group"):
if group_el.find("Prefix").text == "-".join((gs1_prefix, reg_group)): if self.__element_tree is None:
for rule_el in group_el.find("Rules").findall("Rule"): self.__element_tree = ElementTree.parse(self.__range_file_path)
length = int(rule_el.find("Length").text)
reg_groups_el = self.__element_tree.find("RegistrationGroups")
if reg_groups_el is None:
return None
for group_el in reg_groups_el.findall("Group"):
if (
prefix_el := group_el.find("Prefix")
) is not None and prefix_el.text == "-".join((gs1_prefix, reg_group)):
for rule_el in _get_rules(group_el):
length_el = rule_el.find("Length")
if length_el is None:
continue
length = int(text) if (text := length_el.text) else 0
if length == 0: if length == 0:
continue continue
range_el = rule_el.find("Range")
if range_el is None or range_el.text is None:
continue
registrant_range = [ registrant_range = [
int(x[:length]) for x in rule_el.find("Range").text.split("-") int(x[:length]) for x in range_el.text.split("-")
] ]
registrant = isbn_13[from_ind : from_ind + length] registrant = isbn_13[from_ind : from_ind + length]
if registrant_range[0] <= int(registrant) <= registrant_range[1]: if registrant_range[0] <= int(registrant) <= registrant_range[1]:

View file

@ -1,5 +1,7 @@
""" bookwyrm settings and configuration """ """ bookwyrm settings and configuration """
import os import os
from typing import AnyStr
from environs import Env from environs import Env
import requests import requests
@ -37,7 +39,7 @@ EMAIL_SENDER_DOMAIN = env("EMAIL_SENDER_DOMAIN", DOMAIN)
EMAIL_SENDER = f"{EMAIL_SENDER_NAME}@{EMAIL_SENDER_DOMAIN}" EMAIL_SENDER = f"{EMAIL_SENDER_NAME}@{EMAIL_SENDER_DOMAIN}"
# Build paths inside the project like this: os.path.join(BASE_DIR, ...) # Build paths inside the project like this: os.path.join(BASE_DIR, ...)
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) BASE_DIR: AnyStr = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
LOCALE_PATHS = [ LOCALE_PATHS = [
os.path.join(BASE_DIR, "locale"), os.path.join(BASE_DIR, "locale"),
] ]

View file

@ -13,6 +13,9 @@ implicit_reexport = True
[mypy-bookwyrm.connectors.*] [mypy-bookwyrm.connectors.*]
ignore_errors = False ignore_errors = False
[mypy-bookwyrm.isbn.*]
ignore_errors = False
[mypy-celerywyrm.*] [mypy-celerywyrm.*]
ignore_errors = False ignore_errors = False