Source code for randonneur.datapackage

import json
import os
from datetime import datetime, timezone
from pathlib import Path
from typing import Optional, Union

from randonneur.licenses import LICENSES
from randonneur.validation import DatapackageMetadata, MappingFields, validate_data_for_verb


[docs] class Datapackage: def __init__( self, *, name: str, description: str, contributors: list, mapping_source: dict, mapping_target: dict, source_id: Optional[str] = None, target_id: Optional[str] = None, homepage: Optional[str] = None, created: Optional[datetime] = None, version: str = "1.0.0", licenses: Optional[list] = None, graph_context: Optional[list] = None, ):
[docs] self.name = name
[docs] self.description = description
[docs] self.contributors = contributors
[docs] self.source_id = source_id
[docs] self.target_id = target_id
[docs] self.homepage = homepage
[docs] self.created = created or datetime.now(timezone.utc).isoformat()
[docs] self.mapping = {"source": mapping_source, "target": mapping_target}
[docs] self.licenses = licenses or [LICENSES["CC-BY-4.0"]]
[docs] self.version = version
[docs] self.data = {}
[docs] self.graph_context = graph_context or ["edges"]
MappingFields(**mapping_source) MappingFields(**mapping_target) DatapackageMetadata( name=self.name, description=self.description, source_id=self.source_id, target_id=self.target_id, homepage=self.homepage, created=self.created, version=self.version, licenses=self.licenses, graph_context=self.graph_context, contributors=self.contributors, )
[docs] def metadata(self) -> dict: data = { "name": self.name, "description": self.description, "contributors": self.contributors, "created": ( self.created.isoformat() if isinstance(self.created, datetime) else self.created ), "version": self.version, "licenses": self.licenses, "graph_context": self.graph_context, "mapping": self.mapping, "source_id": self.source_id, "target_id": self.target_id, } if self.homepage: data["homepage"] = self.homepage return data
[docs] def add_data(self, verb: str, data: list) -> None: if not data: raise ValueError(f"Provided data `{data}`is empty") validate_data_for_verb(verb=verb, data=data, mapping=self.mapping) if verb not in self.data: self.data[verb] = [] self.data[verb].extend(data)
[docs] def to_json(self, filepath: Optional[Path] = None) -> Union[Path, str]: if filepath is None: return json.dumps(self.metadata() | self.data, indent=2, ensure_ascii=False) if not isinstance(filepath, Path): filepath = Path(filepath) if filepath.suffix.lower() != ".json": filepath = filepath.parent / f"{filepath.name}.json" if not os.access(filepath.parent, os.W_OK): raise OSError(f"Can't write to directory {filepath.parent}") with open(filepath, "w", encoding="utf-8") as f: json.dump(self.metadata() | self.data, f, indent=2, ensure_ascii=False) return filepath
@staticmethod
[docs] def from_json(filepath: Path) -> "Datapackage": assert filepath.is_file(), "Given path isn't a file" file_data = json.load(open(filepath)) file_data["created"] = datetime.fromisoformat(file_data["created"]) mapping = file_data.pop("mapping") file_data["mapping_source"] = mapping.pop("source") file_data["mapping_target"] = mapping.pop("target") data = {verb: file_data.pop(verb) for verb in ("create", "replace", "update", "delete", "disaggregate") if verb in file_data} dp = Datapackage(**file_data) dp.data = data return dp