diff --git a/.gitignore b/.gitignore index f041b4f..fc7226c 100644 --- a/.gitignore +++ b/.gitignore @@ -101,7 +101,7 @@ venv/ ENV/ env.bak/ venv.bak/ - +.idea # Spyder project settings .spyderproject .spyproject diff --git a/gedcom/parser.py b/gedcom/parser.py index 74f40c7..dfeda15 100644 --- a/gedcom/parser.py +++ b/gedcom/parser.py @@ -134,27 +134,29 @@ def get_root_child_elements(self): """ return self.get_root_element().get_child_elements() - def parse_file(self, file_path, strict=True): + def parse_file(self, file_path, strict=True, encoding='utf-8-sig'): """Opens and parses a file, from the given file path, as GEDCOM 5.5 formatted data :type file_path: str :type strict: bool + :type encoding: string """ with open(file_path, 'rb') as gedcom_stream: - self.parse(gedcom_stream, strict) + self.parse(gedcom_stream, strict, encoding) - def parse(self, gedcom_stream, strict=True): + def parse(self, gedcom_stream, strict=True, encoding='utf-8-sig'): """Parses a stream, or an array of lines, as GEDCOM 5.5 formatted data :type gedcom_stream: a file stream, or str array of lines with new line at the end :type strict: bool + :type encoding: string """ self.invalidate_cache() self.__root_element = RootElement() line_number = 1 last_element = self.get_root_element() - + decode_errors_handling = 'strict' if strict else 'ignore' for line in gedcom_stream: - last_element = self.__parse_line(line_number, line.decode('utf-8-sig'), last_element, strict) + last_element = self.__parse_line(line_number, line.decode(encoding, errors=decode_errors_handling), last_element, strict) line_number += 1 # Private methods diff --git a/setup.py b/setup.py index 72771ec..3247870 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ setup( name='python-gedcom', - version='1.0.0', + version='1.0.1', description='A Python module for parsing, analyzing, and manipulating GEDCOM files.', long_description=long_description, long_description_content_type='text/markdown',