Source code for ocflcore.domain.ocflobj
# -*- coding: utf-8 -*-
#
# Copyright (C) 2021 CERN.
# Copyright (C) 2021 Data Futures.
#
# OCFL Core is free software; you can redistribute it and/or modify it under the
# terms of the MIT License; see LICENSE file for more details.
"""Logical representation of an OCFL Object."""
from ..errors import LogicalPathError
#
# Validators
#
def validate_fixity_algo(algo):
"""Validate accepted digest algorithms."""
assert algo in ["md5", "sha1", "sha256", "sha512", "blake2b-512"]
def validate_digest_algo(algo):
"""Validate accepted digest algorithms."""
assert algo in ["sha256", "sha512"]
def validate_spec(spec):
"""Validate a version for the OCFL spec."""
assert spec in ["1.0", "1.1"]
def validate_path(path):
"""Validate a path according to OCFL."""
# See 3.5.3.1
assert path[0] != "/"
assert path[-1] != "/"
for path_elem in path.split("/"):
validate_path_elem(path_elem)
def validate_path_elem(path_elem):
"""Validate a path element according to OCFL."""
# See 3.5.3.1
assert "/" not in path_elem
assert path_elem not in ["", ".", ".."]
#
# Logical files for a version
#
class VersionFile:
"""Represents a file associated with a version."""
def __init__(self, logical_path, stream, digest, fixity=None):
"""Constructor for a file."""
self._logical_path = logical_path
self._stream = stream
self._digest = digest
self._fixity = fixity
@property
def digest(self):
"""The logical path inside the version."""
return self._digest
@property
def stream(self):
"""The logical path inside the version."""
return self._stream
@property
def fixity(self):
"""The logical path inside the version."""
return self._fixity
@property
def logical_path(self):
"""The logical path inside the version."""
return self._logical_path
def content_path(self, idx, content_directory):
"""Generate a content path for this file relative to object root."""
return f"v{idx}/{content_directory}/{self.logical_path}"
class FilesManager:
"""Files manager for an OCFL version."""
def __init__(self, inventory=None):
"""Constructor."""
self._files = {}
def __len__(self):
"""Number of versions."""
return len(self._files)
def __iter__(self):
"""Iterator over the files."""
for f in self._files.values():
yield f
def add(self, logical_path, stream, digest, fixity=None):
"""Add a new file to the version."""
if logical_path in self._files:
raise LogicalPathError("Logical path already present in version.")
validate_path(logical_path)
self._files[logical_path] = VersionFile(
logical_path, stream, digest, fixity=fixity
)
#
# Versions
#
[docs]class OCFLVersion:
"""Logical representation of a version."""
def __init__(self, creation_time):
"""Constructor."""
self._created = creation_time
self._files = FilesManager()
self._version_index = None
self._user = None
self._message = None
@property
def index(self):
"""Get the version index."""
return self._version_index
@property
def files(self):
"""Get the object identifier."""
return self._files
@property
def created(self):
"""Creation date."""
return self._created
@property
def message(self):
"""Get the message."""
return self._message
@property
def user(self):
"""Get the message."""
return self._user
@property
def state(self):
"""Version state."""
result = {}
for f in self.files:
if f.digest not in result:
result[f.digest] = []
result[f.digest].append(f.logical_path)
return result
class VersionManager:
"""Version manager for OCFL objects."""
def __init__(self):
"""Constructor for a version manager."""
self._versions = []
def append(self, version):
"""Add a version to the version manager."""
self._versions.append(version)
def __len__(self):
"""Number of versions."""
return len(self._versions)
def __getitem__(self, index):
"""Number of versions."""
return self._versions[index]
def __iter__(self):
"""Iterate the versions."""
for v in self._versions:
yield v
def enumerated(self, version=None):
"""Iterate the versions."""
# OCFL is 1-indexed.
for idx, v in enumerate(self._versions):
version_number = idx + 1
if version is not None and version == version_number + 1:
break
yield version_number, v
#
# OCFL object
#
[docs]class OCFLObject:
"""Logical representation of an OCFL Object."""
def __init__(
self,
object_id,
content_directory="content",
digest_algorithm="sha512",
spec="1.1",
):
"""OCFL Object constructor."""
validate_path_elem(content_directory)
validate_digest_algo(digest_algorithm)
validate_spec(spec)
# TODO validate object_id
self._object_id = object_id
self._versions = VersionManager()
self._content_directory = content_directory
self._digest_algorithm = digest_algorithm
self._spec = "1.1"
@property
def id(self):
"""Get the object identifier."""
return self._object_id
@property
def versions(self):
"""Get the object identifier."""
return self._versions
@property
def version_numbers(self):
"""Return a range of version numbers."""
return range(1, len(self.versions) + 1)
@property
def head(self):
"""Get the most recent version."""
return self._versions[-1]
@property
def content_directory(self):
"""Get the content directory."""
return self._content_directory
@property
def digest_algorithm(self):
"""Get the digest algorithm."""
return self._digest_algorithm
@property
def spec(self):
"""Get OCFL specification for this object."""
return self._spec
[docs] def content_files(self, version=None):
"""Iterate over deduplicated list of content files."""
_manifest = {}
for idx, v in self.versions.enumerated(version=version):
for f in v.files:
if f.digest not in _manifest:
_manifest[f.digest] = True
content_path = f.content_path(idx, self._content_directory)
yield (content_path, f)