diff --git a/docs/_build/doctrees/environment.pickle b/docs/_build/doctrees/environment.pickle index d3ea12c69..6a2b35637 100644 Binary files a/docs/_build/doctrees/environment.pickle and b/docs/_build/doctrees/environment.pickle differ diff --git a/docs/_build/doctrees/tools/indexing.doctree b/docs/_build/doctrees/tools/indexing.doctree index 2d4155e12..2531f628a 100644 Binary files a/docs/_build/doctrees/tools/indexing.doctree and b/docs/_build/doctrees/tools/indexing.doctree differ diff --git a/docs/_build/doctrees/tools/metadata.doctree b/docs/_build/doctrees/tools/metadata.doctree index 131c5ba49..fac6d8fe5 100644 Binary files a/docs/_build/doctrees/tools/metadata.doctree and b/docs/_build/doctrees/tools/metadata.doctree differ diff --git a/docs/_build/html/_modules/gen3/tools/indexing/index_manifest.html b/docs/_build/html/_modules/gen3/tools/indexing/index_manifest.html index a198daa86..8b9050049 100644 --- a/docs/_build/html/_modules/gen3/tools/indexing/index_manifest.html +++ b/docs/_build/html/_modules/gen3/tools/indexing/index_manifest.html @@ -532,6 +532,72 @@

Source code for gen3.tools.indexing.index_manifest

+[docs] +def populate_object_manifest_with_valid_guids( + commons_url, manifest_file, output_filename=None +): + """ + Given a minimal file object manifest, populate any missing GUIDs with valid GUIDs + for the given commons. + + NOTE: This DOES NOT index anything, it only works client side to populate the manifest + with valid GUIDs (which are obtained from the server). No records are created + as part of this function call. + + Args: + commons_url (str): root domain for commons where indexd lives + manifest_file (str): file path for input manifest file to populate empty GUIDs + output_filename(str): output file name for manifest + """ + if not output_filename: + file, extension = os.path.splitext(manifest_file) + output_filename = file + "_populated_guids" + extension + + try: + records, headers = get_and_verify_fileinfos_from_manifest( + manifest_file, manifest_file_delimiter=None, include_additional_columns=True + ) + except Exception as exc: + logging.error( + f"Can not read records and headers from input manifest: {manifest_file}." + ) + raise + + # ensure GUID column exists + try: + headers.index(GUID_STANDARD_KEY) + except ValueError: + headers.insert(0, GUID_STANDARD_KEY) + + index = Gen3Index(commons_url) + valid_guids = index.get_valid_guids(count=10000) + + # modify records to include a valid GUID if it doesn't exist + new_records = [] + for record in records: + if not record.get(GUID_STANDARD_KEY): + record[GUID_STANDARD_KEY] = valid_guids.pop() + + # if we run out of valid GUIDs, get some more + if not valid_guids: + valid_guids = index.get_valid_guids(count=10000) + + new_records.append(record) + + assert len(new_records) == len(records) + + output_filename = os.path.abspath(output_filename) + logging.info(f"Writing output to {output_filename}") + + # remove existing output if it exists + if os.path.isfile(output_filename): + os.unlink(output_filename) + + _write_csv(os.path.join(CURRENT_DIR, output_filename), new_records, headers)
+ + + @click.command() @click.option( "--commons-url", diff --git a/docs/_build/html/genindex.html b/docs/_build/html/genindex.html index 3c8704b10..93160852c 100644 --- a/docs/_build/html/genindex.html +++ b/docs/_build/html/genindex.html @@ -527,10 +527,12 @@

O

P

diff --git a/docs/_build/html/index.html b/docs/_build/html/index.html index bf4d4bba6..d5fedd197 100644 --- a/docs/_build/html/index.html +++ b/docs/_build/html/index.html @@ -250,6 +250,7 @@

Welcome to Gen3 SDK’s documentation!ThreadControl
  • delete_all_guids()
  • index_object_manifest()
  • +
  • populate_object_manifest_with_valid_guids()
  • Verify
  • Verify
  • Verify