Skip to content

Commit

Permalink
Merge branch 'ziotom78:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
ziotom78 authored Dec 4, 2023
2 parents 84761d6 + 6847fd4 commit 5b7d2a3
Show file tree
Hide file tree
Showing 5 changed files with 120 additions and 26 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# HEAD

- Permit cross-references in dependencies when importing databases [#124](https://github.com/ziotom78/instrumentdb/pull/124)

- Permit longer file names [#123](https://github.com/ziotom78/instrumentdb/pull/123)

- Return more useful errors when wrong paths are used [#121](https://github.com/ziotom78/instrumentdb/pull/121)
Expand Down
106 changes: 91 additions & 15 deletions browse/management/commands/import.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from pathlib import Path
import json
from typing import Any, List, Dict
from uuid import UUID
import yaml

Expand Down Expand Up @@ -48,7 +49,13 @@ class Command(BaseCommand):
output_transaction = True
requires_migrations_checks = True

def create_entities(self, entities, parent=None, nest_level=0):
def create_entities(
self,
entities,
parent=None,
nest_level=0,
dependencies_to_add: Dict[UUID, List[UUID]] = {},
):
for entity_dict in entities:
cur_entity_name = entity_dict.get("name")
uuid = entity_dict.get("uuid")
Expand Down Expand Up @@ -81,6 +88,7 @@ def create_entities(self, entities, parent=None, nest_level=0):
entity_dict["quantities"],
parent_entity=cur_entity,
nest_level=nest_level + 1,
dependencies_to_add=dependencies_to_add,
)

if not self.dry_run:
Expand All @@ -91,6 +99,7 @@ def create_entities(self, entities, parent=None, nest_level=0):
entity_dict.get("children", []),
parent=cur_entity,
nest_level=nest_level + 1,
dependencies_to_add=dependencies_to_add,
)

def create_format_specifications(self, specs):
Expand Down Expand Up @@ -159,10 +168,17 @@ def create_format_specifications(self, specs):
if fp:
fp.close()

def create_quantities(self, quantities, parent_entity=None, nest_level=0):
def create_quantities(
self,
quantities,
parent_entity=None,
nest_level=0,
dependencies_to_add: Dict[UUID, List[UUID]] = {},
):
for quantity_dict in quantities:
name = quantity_dict.get("name")
uuid = quantity_dict.get("uuid")

if uuid:
uuid = UUID(uuid)
if self.no_overwrite and Quantity.objects.filter(uuid=uuid):
Expand Down Expand Up @@ -227,9 +243,20 @@ def create_quantities(self, quantities, parent_entity=None, nest_level=0):
nest_level=nest_level + 1,
)

for cur_dict in quantity_dict["data_files"]:
deps = cur_dict.get("dependencies", [])
if deps:
dependencies_to_add[UUID(cur_dict["uuid"])] = deps

quantity.save()

def create_data_files(self, data_files, parent_quantity=None, nest_level=0):
def create_data_files(
self,
data_files,
parent_quantity=None,
nest_level=0,
dependencies_to_add: Dict[UUID, List[UUID]] = {},
):
for data_file_dict in data_files:
name = data_file_dict.get("name")
uuid = data_file_dict.get("uuid")
Expand All @@ -243,10 +270,13 @@ def create_data_files(self, data_files, parent_quantity=None, nest_level=0):
)
continue

dependencies = data_file_dict.get("dependencies", [])
if dependencies:
dependencies_to_add[uuid] = dependencies

metadata = json.dumps(data_file_dict.get("metadata", {}))
filename = data_file_dict.get("file_name")
plot_filename = data_file_dict.get("plot_file")
dependencies = data_file_dict.get("dependencies", [])

try:
upload_date = parse_datetime(data_file_dict.get("upload_date"))
Expand Down Expand Up @@ -299,9 +329,6 @@ def create_data_files(self, data_files, parent_quantity=None, nest_level=0):
spaces(nest_level) + f'Data file "{name}" ({filename})'
)

for cur_dep in dependencies:
self.stdout.write(spaces(nest_level) + f" Depends on {cur_dep[0:6]}")

if self.dry_run:
continue

Expand All @@ -323,18 +350,50 @@ def create_data_files(self, data_files, parent_quantity=None, nest_level=0):
"plot_mime_type": data_file_dict.get("plot_mime_type"),
},
)

for cur_dep in dependencies:
reference = DataFile.objects.get(uuid=cur_dep)
cur_data_file.dependencies.add(reference)

cur_data_file.save()

if fp:
fp.close()
if plot_fp:
plot_fp.close()

def update_dependencies(self, dependencies_to_add: Dict[UUID, List[UUID]]):
for data_file_uuid, dependencies in dependencies_to_add.items():
if not dependencies:
continue

try:
cur_data_file = DataFile.objects.get(uuid=data_file_uuid)
except DataFile.DoesNotExist:
raise CommandError(
"There is no data file with UUID {}".format(
data_file_uuid.hex[0:6],
)
)
for cur_dep in dependencies:
try:
reference = DataFile.objects.get(uuid=cur_dep)
cur_data_file.dependencies.add(reference)
self.stdout.write(
(
'Adding "{dep_name}" ({dep_uuid}) as a dependency '
+ 'to "{parent_name}" ({parent_uuid})'
).format(
dep_name=reference.name,
dep_uuid=reference.uuid.hex[0:6],
parent_name=cur_data_file.name,
parent_uuid=cur_data_file.uuid.hex[0:6],
)
)
except DataFile.DoesNotExist:
raise CommandError(
(
'Object with UUID "{cur_dep}" does not exist but is '
+ 'listed in the dependencies for "{name}"'
).format(cur_dep=cur_dep, name=cur_data_file.name)
)
cur_data_file.save()

def create_releases(self, releases):
for rel_dict in releases:
tag = rel_dict.get("tag")
Expand Down Expand Up @@ -435,9 +494,26 @@ def handle(self, *args, **options):
schema = json.load(inpf)

self.create_format_specifications(schema.get("format_specifications", []))
self.create_entities(schema.get("entities", []))
self.create_quantities(schema.get("quantities", []))
self.create_data_files(schema.get("data_files", []))

# FIRST add all the data files, THEN update the dependencies, otherwise
# some dependencies might not be found because they refer to data files
# that have not been added yet. Note that data files can appear either
# in the entity/quantity tree or in a separated "data_files" section
# in the JSON/YAML file, so we must gather all of them before calling
# self.update_dependencies(). That's the reason why we pass the
# dictionary "dependencies_to_add" to all the self_create_* methods
dependencies_to_add = {} # type: Dict[UUID, List[UUID]]
self.create_entities(
schema.get("entities", []), dependencies_to_add=dependencies_to_add
)
self.create_quantities(
schema.get("quantities", []), dependencies_to_add=dependencies_to_add
)
self.create_data_files(
schema.get("data_files", []), dependencies_to_add=dependencies_to_add
)
self.update_dependencies(dependencies_to_add)

self.create_releases(schema.get("releases", []))

update_release_file_dumps()
4 changes: 4 additions & 0 deletions examples/schema1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ entities:
upload_date: "2019-12-13 20:44:51"
file_data: "horn01_grasp_v0.fits"
spec_version: "0.1"
dependencies:
# This is a dependency to a data file that comes
# later in this YAML file
- "37bb70e4-29b2-4657-ba0b-4ccefbc5ae36"

- uuid: "01eeffe6-ac90-4a55-8b72-8d49e5dbe7ad"
name: "horn01_synth"
Expand Down
22 changes: 11 additions & 11 deletions examples/schema2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,17 @@ quantities:
entity: "43377e3d-e457-43fe-baef-4017b0db53b2"

data_files:
- uuid: "37bb70e4-29b2-4657-ba0b-4ccefbc5ae36"
name: "horn01_synth.json"
metadata:
fwhm_deg: 1.0
ellipticity: 0.0
quantity: "01eeffe6-ac90-4a55-8b72-8d49e5dbe7ad"
upload_date: "2020-01-01 17:21:54"
dependencies:
- "a6dd07ee-9721-4453-abb1-e58aa53a9c01"
spec_version: "0.2"

- uuid: "a6dd07ee-9721-4453-abb1-e58aa53a9c01"
name: "horn01_grasp.fits"
metadata:
Expand All @@ -121,17 +132,6 @@ data_files:
quantity: "e9916db9-a234-4921-adfd-6c3bb4f816e9"
spec_version: "0.1"

- uuid: "37bb70e4-29b2-4657-ba0b-4ccefbc5ae36"
name: "horn01_synth.json"
metadata:
fwhm_deg: 1.0
ellipticity: 0.0
quantity: "01eeffe6-ac90-4a55-8b72-8d49e5dbe7ad"
upload_date: "2020-01-01 17:21:54"
dependencies:
- "a6dd07ee-9721-4453-abb1-e58aa53a9c01"
spec_version: "0.2"

releases:
- tag: 1.0
release_date: "2020-02-01 14:03:12"
Expand Down
12 changes: 12 additions & 0 deletions tests/test_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,15 @@ def check_db_size(
test.assertEqual(len(Release.objects.all()), release_len)


def check_deps_in_schema(test):
horn01_synth_obj = DataFile.objects.get(uuid="37bb70e4-29b2-4657-ba0b-4ccefbc5ae36")
horn01_grasp_obj = DataFile.objects.get(uuid="a6dd07ee-9721-4453-abb1-e58aa53a9c01")

test.assertTrue(
horn01_synth_obj.dependencies.filter(uuid=horn01_grasp_obj.uuid).exists()
)


class TestNestedYamlIO(TestCase):
def setUp(self):
self.input_file = Path(__file__).parent / ".." / "examples" / "schema1.yaml"
Expand Down Expand Up @@ -43,6 +52,7 @@ def test_import_nested_yaml(self):
data_file_len=3,
release_len=1,
)
check_deps_in_schema(self)

def test_import_nested_yaml_no_overwrite(self):
# Test that --no-overwrite works
Expand All @@ -55,6 +65,7 @@ def test_import_nested_yaml_no_overwrite(self):
data_file_len=3,
release_len=1,
)
check_deps_in_schema(self)


class TestPlainYamlIO(TestCase):
Expand All @@ -69,6 +80,7 @@ def test_import_plain_yaml(self):
data_file_len=3,
release_len=1,
)
check_deps_in_schema(self)


class TestTutorial(TestCase):
Expand Down

0 comments on commit 5b7d2a3

Please sign in to comment.