From 8a6249d14dc45d47eb672f587602f861c5f2cd80 Mon Sep 17 00:00:00 2001 From: Guy Khmelnitsky Date: Wed, 20 Mar 2024 12:39:35 +0200 Subject: [PATCH 1/5] New MyHeritage - Get PersonName, Gender and Photo --- collections/myheritage.js | 92 +++++++++++++++++++++++---------------- 1 file changed, 54 insertions(+), 38 deletions(-) diff --git a/collections/myheritage.js b/collections/myheritage.js index f34cf25..19c4d48 100644 --- a/collections/myheritage.js +++ b/collections/myheritage.js @@ -10,7 +10,7 @@ registerCollection({ return url; }, "collectionMatch": function(url) { - return (startsWithMH(url,"person-") || startsWithMH(url,"member-") || startsWithMH(url,"site-family-tree-")); + return (startsWithMH(url,"person-") || startsWithMH(url,"member-") || startsWithMH(url,"site-family-tree-") || startsWithMH(url,"profile-")); }, "parseData": function(url) { if (startsWithHTTP(url,"https://www.myheritage.com/site-family-tree-") && !url.endsWith("-info")) { @@ -22,9 +22,9 @@ registerCollection({ focusURLid = focusURLid.substring(0, focusURLid.indexOf('-')); } else if (url.contains("rootIndivudalID=")) { focusURLid = getParameterByName('rootIndivudalID', url); - } else { + } else if (url.contains("profile-")) { focusURLid = url.substring(url.indexOf('-') + 1); - focusURLid = focusURLid.substring(0, focusURLid.indexOf('_')); + focusURLid = focusURLid.substring(focusURLid.indexOf('-') + 1, focusURLid.indexOf('/')) } getPageCode(); } @@ -37,8 +37,8 @@ registerCollection({ this.parseProfileData = ""; return; } - var parsed = $(request.source.replace(/]*>/ig, "")); - var fperson = parsed.find("span.FL_LabelxxLargeBold"); + const parsed = $(request.source.replace(/]*>/ig, "")); + const fperson = parsed.find("span.FL_LabelxxLargeBold"); focusname = fperson.text(); focusrange = ""; }, @@ -47,40 +47,52 @@ registerCollection({ function parseMyHeritage(htmlstring, familymembers, relation) { relation = relation || ""; - var splitdata = htmlstring.replace(/ 2) { splitdata[1] += splitdata.pop(); } - var aboutdata = ""; - var profiledata = {}; - var focusdaterange = ""; - var fperson = parsed.find("span.FL_LabelxxLargeBold"); - var focusperson = fperson.text(); + + const header = parsed.find("div.profile_page_header"); + + const aboutdata = ""; + const profiledata = {}; + const focusdaterange = ""; + + let fperson = header.find("div.person_name"); + + const focusperson = fperson.text(); $("#readstatus").html(escapeHtml(focusperson)); - var genderval = "unknown"; + let genderval = "unknown"; if (htmlstring.contains("PK_Silhouette PK_SilhouetteSize192 PK_Silhouette_S_192_F_A_LTR") || htmlstring.contains("PK_Silhouette PK_SilhouetteSize150 PK_Silhouette_S_150_F_A_LTR") || - htmlstring.contains("PK_Silhouette PK_SilhouetteSize96 PK_Silhouette_S_96_F_A_LTR")) { + htmlstring.contains("PK_Silhouette PK_SilhouetteSize96 PK_Silhouette_S_96_F_A_LTR") || + htmlstring.contains("profile_photo_element svg_silhouette svg_silhouette_F_A")) { genderval = "female"; } else if (htmlstring.contains("PK_Silhouette PK_SilhouetteSize192 PK_Silhouette_S_192_M_A_LTR") || htmlstring.contains("PK_Silhouette PK_SilhouetteSize150 PK_Silhouette_S_150_M_A_LTR") || - htmlstring.contains("PK_Silhouette PK_SilhouetteSize96 PK_Silhouette_S_96_M_A_LTR")) { + htmlstring.contains("PK_Silhouette PK_SilhouetteSize96 PK_Silhouette_S_96_M_A_LTR") || + htmlstring.contains("profile_photo_element svg_silhouette svg_silhouette_M_A")) { genderval = "male"; } else if (focusperson.contains("(born")) { genderval = "female"; } else if (isPartner(relation.title)) { genderval = reverseGender(focusgender); } - var imagedata = parsed.find("#profilePhotoImg"); + + const photoWrapper = header.find(".profile_photo_wrapper"); + + const imagedata = photoWrapper.find(".profile_photo_element.actual_photo"); if (exists(imagedata[0])) { - var imglink = $(imagedata[0]).attr('src'); - if (exists(imglink[0])) { - var thumb = imglink; - var image = thumb; + const styleAttribute = $(photoWrapper[0]).attr('style'); + + if (exists(styleAttribute[0])) { + const imglink = styleAttribute.match(/url\((.*?)\)/)[1].replace(/['"]/g, ''); + const thumb = imglink; + let image = thumb; if (!thumb.endsWith("spacer.gif")) { if (htmlstring.contains("profilePhotoFullUrl")) { - var imgtemp = htmlstring.match(/profilePhotoFullUrl = '(.*?)';/i); + const imgtemp = htmlstring.match(/profilePhotoFullUrl = '(.*?)';/i); if (exists(imgtemp) && imgtemp.length > 1) { image = imgtemp[1]; } @@ -101,17 +113,17 @@ function parseMyHeritage(htmlstring, familymembers, relation) { if (focusdaterange !== "") { profiledata["daterange"] = focusdaterange; } - var burialdtflag = false; - var buriallcflag = false; - var deathdtflag = false; + const burialdtflag = false; + let buriallcflag = false; + let deathdtflag = false; fperson = parsed.find('tr'); for (var i = 0; i < fperson.length; i++) { - var row = $(fperson[i]).find('td.FL_LabelBold'); + const row = $(fperson[i]).find('td.FL_LabelBold'); if (row.length > 0) { - var rowtitle = $(row[0]).text().toLowerCase(); - var dateval = $(row[0]).next('td').text().trim(); - var eventval = $(fperson[i]).find('span.map_callout_link'); + const rowtitle = $(row[0]).text().toLowerCase(); + let dateval = $(row[0]).next('td').text().trim(); + const eventval = $(fperson[i]).find('span.map_callout_link'); data = []; if (exists(dateval)) { @@ -124,7 +136,7 @@ function parseMyHeritage(htmlstring, familymembers, relation) { } } if (exists(eventval) && eventval.length > 0) { - var eventlocation = $(eventval).text().trim(); + const eventlocation = $(eventval).text().trim(); if (eventlocation !== "") { data.push({id: geoid, location: eventlocation}); geoid++; @@ -164,18 +176,18 @@ function parseMyHeritage(htmlstring, familymembers, relation) { var famid = 0; } - var siblingparents = []; + const siblingparents = []; if (exists(splitdata[1])) { splitdata = splitdata[1].split("FirstColumn"); parsed = $(splitdata[0]); // ---------------------- Family Data -------------------- fperson = parsed.find('a.FL_LinkBold'); for (var i = 0; i < fperson.length; i++) { - var member = $(fperson[i]); - var title = member.next('br').next('span.FL_LabelDimmed').text().trim(); + const member = $(fperson[i]); + let title = member.next('br').next('span.FL_LabelDimmed').text().trim(); title = title.replace("His", "").replace("Her", "").trim().toLowerCase(); - var url = member.attr("href"); - var itemid = ""; + const url = member.attr("href"); + let itemid = ""; if (url.contains("#!profile-")) { itemid = url.substring(url.indexOf('#!profile-') + 10); itemid = itemid.substring(0, itemid.indexOf('-')); @@ -189,12 +201,12 @@ function parseMyHeritage(htmlstring, familymembers, relation) { if (exists(title)) { if (familymembers) { if (isParent(title) || isSibling(title) || isChild(title) || isPartner(title)) { - var name = member.text().trim(); + const name = member.text().trim(); if (exists(url)) { if (!exists(alldata["family"][title])) { alldata["family"][title] = []; } - var subdata = {name: name, title: title}; + const subdata = {name: name, title: title}; subdata["url"] = url; subdata["itemId"] = itemid; subdata["profile_id"] = famid; @@ -257,8 +269,12 @@ function getMyHeritageFamily(famid, url, subdata) { url: url, variable: subdata }, function (response) { - var arg = response.variable; - var person = parseMyHeritage(response.source, false, {"title": arg.title, "proid": arg.profile_id, "itemId": arg.itemId}); + const arg = response.variable; + let person = parseMyHeritage(response.source, false, { + "title": arg.title, + "proid": arg.profile_id, + "itemId": arg.itemId + }); if (person === "") { familystatus.pop(); return; From ac088c664582de2ab134c8d56c83c6bdbccb3ef0 Mon Sep 17 00:00:00 2001 From: Guy Khmelnitsky Date: Wed, 20 Mar 2024 15:23:26 +0200 Subject: [PATCH 2/5] New MyHeritage - Parse Birth/Death Dates --- collections/myheritage.js | 100 ++++++++++++++++++++++---------------- 1 file changed, 57 insertions(+), 43 deletions(-) diff --git a/collections/myheritage.js b/collections/myheritage.js index 19c4d48..061219c 100644 --- a/collections/myheritage.js +++ b/collections/myheritage.js @@ -117,56 +117,70 @@ function parseMyHeritage(htmlstring, familymembers, relation) { let buriallcflag = false; let deathdtflag = false; - fperson = parsed.find('tr'); - for (var i = 0; i < fperson.length; i++) { - const row = $(fperson[i]).find('td.FL_LabelBold'); - if (row.length > 0) { - const rowtitle = $(row[0]).text().toLowerCase(); - let dateval = $(row[0]).next('td').text().trim(); - const eventval = $(fperson[i]).find('span.map_callout_link'); + const events = header.find(".events").find(".event") + const eventsDic = [] - data = []; - if (exists(dateval)) { - if (dateval.indexOf("(") !== -1) { - dateval = dateval.substring(0, dateval.indexOf("(")); - } - dateval = cleanDate(dateval); - if (dateval !== "") { - data.push({date: dateval}); - } + events.each(function(index, element) { + const label = element.getElementsByClassName("label")[0].textContent.replaceAll(":", "").trim(); + const dateElements = element.getElementsByClassName("date"); + let date = null; + if (dateElements && dateElements.length > 0) { + date = dateElements[0].textContent; + } + + const placeElements = element.getElementsByClassName("place"); + let place = null; + if (placeElements && placeElements.length > 0) { + place = placeElements[0].textContent; + } + + eventsDic.push({label: label, date: date, place: place}); + }); + + eventsDic.forEach((value) => { + + let label = value.label.toLowerCase(); + let date = value.date; + let place = value.place; + + data = []; + if (exists(date)) { + if (date.indexOf("(") !== -1) { + date = date.substring(0, dateval.indexOf("(")); } - if (exists(eventval) && eventval.length > 0) { - const eventlocation = $(eventval).text().trim(); - if (eventlocation !== "") { - data.push({id: geoid, location: eventlocation}); - geoid++; - } + date = cleanDate(date); + if (dateval !== "") { + data.push({date: date}); } - if (rowtitle.startsWith("born")) { - if (!$.isEmptyObject(data)) { - profiledata["birth"] = data; - } - } else if (rowtitle.startsWith("died")) { - if (!$.isEmptyObject(data)) { - if (exists(getDate(data))) { - deathdtflag = true; - } - profiledata["death"] = data; - } - } else if (rowtitle.startsWith("burial")) { - if (!$.isEmptyObject(data)) { - if (exists(getLocation(data))) { - buriallcflag = true; - } - profiledata["burial"] = data; + } + if (exists(place) && place.length > 0) { + data.push({id: geoid, location: place}); + geoid++; + } + if (label.startsWith("born")) { + if (!$.isEmptyObject(data)) { + profiledata["birth"] = data; + } + } else if (label.startsWith("died")) { + if (!$.isEmptyObject(data)) { + if (exists(getDate(data))) { + deathdtflag = true; } - } else if (rowtitle.startsWith("baptism") || rowtitle.startsWith("christening")) { - if (!$.isEmptyObject(data)) { - profiledata["baptism"] = data; + profiledata["death"] = data; + } + } else if (label.startsWith("burial")) { + if (!$.isEmptyObject(data)) { + if (exists(getLocation(data))) { + buriallcflag = true; } + profiledata["burial"] = data; + } + } else if (label.startsWith("baptism") || label.startsWith("christening")) { + if (!$.isEmptyObject(data)) { + profiledata["baptism"] = data; } } - } + }); profiledata["name"] = focusperson; profiledata["status"] = relation.title; From e9afc29b405e3b931ffb5f57aa7d98fac811ed11 Mon Sep 17 00:00:00 2001 From: Guy Khmelnitsky Date: Wed, 20 Mar 2024 16:57:15 +0200 Subject: [PATCH 3/5] New MyHeritage - Parse family members (without URL) --- collections/myheritage.js | 56 ++++++++++++++++++++------------------- 1 file changed, 29 insertions(+), 27 deletions(-) diff --git a/collections/myheritage.js b/collections/myheritage.js index 061219c..0336639 100644 --- a/collections/myheritage.js +++ b/collections/myheritage.js @@ -47,11 +47,12 @@ registerCollection({ function parseMyHeritage(htmlstring, familymembers, relation) { relation = relation || ""; - let splitdata = htmlstring.replace(/ 2) { - splitdata[1] += splitdata.pop(); - } + const parsed = $(htmlstring.replace(/ 2) { + // splitdata[1] += splitdata.pop(); + // } const header = parsed.find("div.profile_page_header"); @@ -191,17 +192,18 @@ function parseMyHeritage(htmlstring, familymembers, relation) { } const siblingparents = []; - if (exists(splitdata[1])) { - splitdata = splitdata[1].split("FirstColumn"); - parsed = $(splitdata[0]); + const immediateFamily = parsed.find("div.immediate_family"); + + if (exists(immediateFamily[0])) { // ---------------------- Family Data -------------------- - fperson = parsed.find('a.FL_LinkBold'); - for (var i = 0; i < fperson.length; i++) { - const member = $(fperson[i]); - let title = member.next('br').next('span.FL_LabelDimmed').text().trim(); - title = title.replace("His", "").replace("Her", "").trim().toLowerCase(); - const url = member.attr("href"); - let itemid = ""; + const relatives = immediateFamily[0].getElementsByClassName("family_relative") + + relatives.forEach((value) => { + const name = value.getElementsByClassName("relative_name")[0].textContent.trim(); + const relationship = value.getElementsByClassName("relative_relationship")[0].textContent.replace("His", "").replace("Her", "").trim().toLowerCase(); + const years = value.getElementsByClassName("relative_years")[0].textContent.trim(); + const url = ""; // How to get URLs? + let itemid; if (url.contains("#!profile-")) { itemid = url.substring(url.indexOf('#!profile-') + 10); itemid = itemid.substring(0, itemid.indexOf('-')); @@ -209,24 +211,23 @@ function parseMyHeritage(htmlstring, familymembers, relation) { itemid = getParameterByName('rootIndivudalID', url); } else { itemid = decodeURIComponent(url.substring(url.indexOf('-') + 1)); - itemid = itemid.substring(0, itemid.indexOf('_')); + itemid = itemid.substring(focusURLid.indexOf('-') + 1, focusURLid.indexOf('/')) } - if (exists(title)) { + if (exists(relationship)) { if (familymembers) { - if (isParent(title) || isSibling(title) || isChild(title) || isPartner(title)) { - const name = member.text().trim(); + if (isParent(relationship) || isSibling(relationship) || isChild(relationship) || isPartner(relationship)) { if (exists(url)) { - if (!exists(alldata["family"][title])) { - alldata["family"][title] = []; + if (!exists(alldata["family"][relationship])) { + alldata["family"][relationship] = []; } - const subdata = {name: name, title: title}; + const subdata = {name: name, title: relationship}; subdata["url"] = url; subdata["itemId"] = itemid; subdata["profile_id"] = famid; if (isParent(title)) { parentlist.push(itemid); - } else if (isPartner(title)) { + } else if (isPartner(relationship)) { myhspouse.push(famid); } unionurls[famid] = itemid; @@ -235,15 +236,15 @@ function parseMyHeritage(htmlstring, familymembers, relation) { } } } else if (isChild(relation.title)) { - if (isParent(title)) { + if (isParent(relationship)) { if (focusURLid !== itemid) { childlist[relation.proid] = $.inArray(itemid, unionurls); profiledata["parent_id"] = $.inArray(itemid, unionurls); - break; + return; } } } else if (isSibling(relation.title)) { - if (isParent(title)) { + if (isParent(relationship)) { siblingparents.push(itemid); } } else if (isPartner(relation.title)) { @@ -252,8 +253,9 @@ function parseMyHeritage(htmlstring, familymembers, relation) { //marriage data - parse event tab } } - } + }); } + if (exists(relation.title) && isSibling(relation.title) && siblingparents.length > 0) { profiledata["halfsibling"] = !recursiveCompare(parentlist, siblingparents); } From e81d694f16249c2e571061b0820070828a7dc648 Mon Sep 17 00:00:00 2001 From: Guy Khmelnitsky Date: Thu, 21 Mar 2024 10:12:07 +0200 Subject: [PATCH 4/5] New MyHeritage - Fix gender parsing and family --- collections/myheritage.js | 48 ++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/collections/myheritage.js b/collections/myheritage.js index 0336639..0ad30f4 100644 --- a/collections/myheritage.js +++ b/collections/myheritage.js @@ -20,11 +20,15 @@ registerCollection({ if (url.contains("#!profile-")) { focusURLid = url.substring(url.indexOf('#!profile-') + 10); focusURLid = focusURLid.substring(0, focusURLid.indexOf('-')); + } else if (url.contains("/profile-")) { + focusURLid = url.substring(url.indexOf('/profile-') + 9) + focusURLid = focusURLid.substring(0, focusURLid.indexOf("/")) + focusURLid = focusURLid.substring(focusURLid.indexOf('-') + 1); // - } else if (url.contains("rootIndivudalID=")) { focusURLid = getParameterByName('rootIndivudalID', url); - } else if (url.contains("profile-")) { + } else { focusURLid = url.substring(url.indexOf('-') + 1); - focusURLid = focusURLid.substring(focusURLid.indexOf('-') + 1, focusURLid.indexOf('/')) + focusURLid = focusURLid.substring(0, focusURLid.indexOf('_')); } getPageCode(); } @@ -65,20 +69,22 @@ function parseMyHeritage(htmlstring, familymembers, relation) { const focusperson = fperson.text(); $("#readstatus").html(escapeHtml(focusperson)); let genderval = "unknown"; - if (htmlstring.contains("PK_Silhouette PK_SilhouetteSize192 PK_Silhouette_S_192_F_A_LTR") || - htmlstring.contains("PK_Silhouette PK_SilhouetteSize150 PK_Silhouette_S_150_F_A_LTR") || - htmlstring.contains("PK_Silhouette PK_SilhouetteSize96 PK_Silhouette_S_96_F_A_LTR") || - htmlstring.contains("profile_photo_element svg_silhouette svg_silhouette_F_A")) { - genderval = "female"; - } else if (htmlstring.contains("PK_Silhouette PK_SilhouetteSize192 PK_Silhouette_S_192_M_A_LTR") || - htmlstring.contains("PK_Silhouette PK_SilhouetteSize150 PK_Silhouette_S_150_M_A_LTR") || - htmlstring.contains("PK_Silhouette PK_SilhouetteSize96 PK_Silhouette_S_96_M_A_LTR") || - htmlstring.contains("profile_photo_element svg_silhouette svg_silhouette_M_A")) { - genderval = "male"; - } else if (focusperson.contains("(born")) { - genderval = "female"; - } else if (isPartner(relation.title)) { - genderval = reverseGender(focusgender); + + let photo_element = parsed.find(".profile_page_header").find(".person_photo").find(".profile_photo_element.svg_silhouette") + if (photo_element.length === 1) { + if (photo_element[0].classList.contains("svg_silhouette_F_A")) { + genderval = "female"; + } else if (photo_element[0].classList.contains("svg_silhouette_M_A")) { + genderval = "male"; + } + } + + if (!genderval) { + if (focusperson.contains("(born")) { + genderval = "female"; + } else if (isPartner(relation.title)) { + genderval = reverseGender(focusgender); + } } const photoWrapper = header.find(".profile_photo_wrapper"); @@ -147,10 +153,10 @@ function parseMyHeritage(htmlstring, familymembers, relation) { data = []; if (exists(date)) { if (date.indexOf("(") !== -1) { - date = date.substring(0, dateval.indexOf("(")); + date = date.substring(0, date.indexOf("(")); } date = cleanDate(date); - if (dateval !== "") { + if (date !== "") { data.push({date: date}); } } @@ -198,7 +204,7 @@ function parseMyHeritage(htmlstring, familymembers, relation) { // ---------------------- Family Data -------------------- const relatives = immediateFamily[0].getElementsByClassName("family_relative") - relatives.forEach((value) => { + for (let value of relatives) { const name = value.getElementsByClassName("relative_name")[0].textContent.trim(); const relationship = value.getElementsByClassName("relative_relationship")[0].textContent.replace("His", "").replace("Her", "").trim().toLowerCase(); const years = value.getElementsByClassName("relative_years")[0].textContent.trim(); @@ -225,7 +231,7 @@ function parseMyHeritage(htmlstring, familymembers, relation) { subdata["url"] = url; subdata["itemId"] = itemid; subdata["profile_id"] = famid; - if (isParent(title)) { + if (isParent(relationship)) { parentlist.push(itemid); } else if (isPartner(relationship)) { myhspouse.push(famid); @@ -253,7 +259,7 @@ function parseMyHeritage(htmlstring, familymembers, relation) { //marriage data - parse event tab } } - }); + } } if (exists(relation.title) && isSibling(relation.title) && siblingparents.length > 0) { From 7413d1e67e876ea4a6e05c432f00113d6fdbdfa2 Mon Sep 17 00:00:00 2001 From: Guy Khmelnitsky <3136012+GuyKh@users.noreply.github.com> Date: Tue, 24 Sep 2024 13:52:38 +0300 Subject: [PATCH 5/5] FamilySearch Json - Fix no spouse => null --- collections/familysearchjson.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/collections/familysearchjson.js b/collections/familysearchjson.js index 82e4317..d92c109 100644 --- a/collections/familysearchjson.js +++ b/collections/familysearchjson.js @@ -318,7 +318,7 @@ function parseFamilySearchJSON(htmlstring, familymembers, relation) { spouse = jsonrel[x]["parent1"]["id"]; image = jsonrel[x]["parent1"]["portraitUrl"] || ""; } - if (spouse !== "") { + if (spouse) { var data = parseFSJSONUnion(jsonrel[x]["event"]); var valid = processFamilySearchJSON(spouse, "spouse", famid, image, data); if (valid) {