From 640bf229812c48874633a968fb0ca728ef1b702d Mon Sep 17 00:00:00 2001 From: Kostiantyn Chumachenko Date: Wed, 26 Apr 2023 17:39:06 +0300 Subject: [PATCH 1/3] encoding parameter for parsing method --- .gitignore | 2 +- gedcom/parser.py | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index f041b4f..fc7226c 100644 --- a/.gitignore +++ b/.gitignore @@ -101,7 +101,7 @@ venv/ ENV/ env.bak/ venv.bak/ - +.idea # Spyder project settings .spyderproject .spyproject diff --git a/gedcom/parser.py b/gedcom/parser.py index 74f40c7..5c715a6 100644 --- a/gedcom/parser.py +++ b/gedcom/parser.py @@ -134,18 +134,20 @@ def get_root_child_elements(self): """ return self.get_root_element().get_child_elements() - def parse_file(self, file_path, strict=True): + def parse_file(self, file_path, strict=True, encoding='utf-8-sig'): """Opens and parses a file, from the given file path, as GEDCOM 5.5 formatted data :type file_path: str :type strict: bool + :type encoding: string """ with open(file_path, 'rb') as gedcom_stream: - self.parse(gedcom_stream, strict) + self.parse(gedcom_stream, strict, encoding) - def parse(self, gedcom_stream, strict=True): + def parse(self, gedcom_stream, strict=True, encoding='utf-8-sig'): """Parses a stream, or an array of lines, as GEDCOM 5.5 formatted data :type gedcom_stream: a file stream, or str array of lines with new line at the end :type strict: bool + :type encoding: string """ self.invalidate_cache() self.__root_element = RootElement() @@ -154,7 +156,7 @@ def parse(self, gedcom_stream, strict=True): last_element = self.get_root_element() for line in gedcom_stream: - last_element = self.__parse_line(line_number, line.decode('utf-8-sig'), last_element, strict) + last_element = self.__parse_line(line_number, line.decode(encoding), last_element, strict) line_number += 1 # Private methods From f05776005f64f0b9f805cf90ebc3f17fd4e3caf0 Mon Sep 17 00:00:00 2001 From: Kostiantyn Chumachenko Date: Wed, 26 Apr 2023 18:12:22 +0300 Subject: [PATCH 2/3] ignore decode errors when non-string mode --- gedcom/parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gedcom/parser.py b/gedcom/parser.py index 5c715a6..dfeda15 100644 --- a/gedcom/parser.py +++ b/gedcom/parser.py @@ -154,9 +154,9 @@ def parse(self, gedcom_stream, strict=True, encoding='utf-8-sig'): line_number = 1 last_element = self.get_root_element() - + decode_errors_handling = 'strict' if strict else 'ignore' for line in gedcom_stream: - last_element = self.__parse_line(line_number, line.decode(encoding), last_element, strict) + last_element = self.__parse_line(line_number, line.decode(encoding, errors=decode_errors_handling), last_element, strict) line_number += 1 # Private methods From a479d609dbef46eb6496b984776322a085950c71 Mon Sep 17 00:00:00 2001 From: Kostiantyn Chumachenko Date: Wed, 26 Apr 2023 18:13:25 +0300 Subject: [PATCH 3/3] bump version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 72771ec..3247870 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ setup( name='python-gedcom', - version='1.0.0', + version='1.0.1', description='A Python module for parsing, analyzing, and manipulating GEDCOM files.', long_description=long_description, long_description_content_type='text/markdown',