import json
import os
from datetime import datetime, timezone
from pathlib import Path
from typing import Optional, Union
from randonneur.licenses import LICENSES
from randonneur.validation import DatapackageMetadata, MappingFields, validate_data_for_verb
[docs]
class Datapackage:
def __init__(
self,
*,
name: str,
description: str,
contributors: list,
mapping_source: dict,
mapping_target: dict,
source_id: Optional[str] = None,
target_id: Optional[str] = None,
homepage: Optional[str] = None,
created: Optional[datetime] = None,
version: str = "1.0.0",
licenses: Optional[list] = None,
graph_context: Optional[list] = None,
):
[docs]
self.description = description
[docs]
self.contributors = contributors
[docs]
self.source_id = source_id
[docs]
self.target_id = target_id
[docs]
self.homepage = homepage
[docs]
self.created = created or datetime.now(timezone.utc).isoformat()
[docs]
self.mapping = {"source": mapping_source, "target": mapping_target}
[docs]
self.licenses = licenses or [LICENSES["CC-BY-4.0"]]
[docs]
self.graph_context = graph_context or ["edges"]
MappingFields(**mapping_source)
MappingFields(**mapping_target)
DatapackageMetadata(
name=self.name,
description=self.description,
source_id=self.source_id,
target_id=self.target_id,
homepage=self.homepage,
created=self.created,
version=self.version,
licenses=self.licenses,
graph_context=self.graph_context,
contributors=self.contributors,
)
[docs]
def add_data(self, verb: str, data: list) -> None:
if not data:
raise ValueError(f"Provided data `{data}`is empty")
validate_data_for_verb(verb=verb, data=data, mapping=self.mapping)
if verb not in self.data:
self.data[verb] = []
self.data[verb].extend(data)
[docs]
def to_json(self, filepath: Optional[Path] = None) -> Union[Path, str]:
if filepath is None:
return json.dumps(self.metadata() | self.data, indent=2, ensure_ascii=False)
if not isinstance(filepath, Path):
filepath = Path(filepath)
if filepath.suffix.lower() != ".json":
filepath = filepath.parent / f"{filepath.name}.json"
if not os.access(filepath.parent, os.W_OK):
raise OSError(f"Can't write to directory {filepath.parent}")
with open(filepath, "w", encoding="utf-8") as f:
json.dump(self.metadata() | self.data, f, indent=2, ensure_ascii=False)
return filepath
@staticmethod
[docs]
def from_json(filepath: Path) -> "Datapackage":
assert filepath.is_file(), "Given path isn't a file"
file_data = json.load(open(filepath))
file_data["created"] = datetime.fromisoformat(file_data["created"])
mapping = file_data.pop("mapping")
file_data["mapping_source"] = mapping.pop("source")
file_data["mapping_target"] = mapping.pop("target")
data = {verb: file_data.pop(verb) for verb in ("create", "replace", "update", "delete", "disaggregate") if verb in file_data}
dp = Datapackage(**file_data)
dp.data = data
return dp