Source code for idaes.dmf.resource

##############################################################################
# Institute for the Design of Advanced Energy Systems Process Systems
# Engineering Framework (IDAES PSE Framework) Copyright (c) 2018, by the
# software owners: The Regents of the University of California, through
# Lawrence Berkeley National Laboratory,  National Technology & Engineering
# Solutions of Sandia, LLC, Carnegie Mellon University, West Virginia
# University Research Corporation, et al. All rights reserved.
#
# Please see the files COPYRIGHT.txt and LICENSE.txt for full copyright and
# license information, respectively. Both files are also available online
# at the URL "https://github.com/IDAES/idaes".
##############################################################################
"""
Resource representaitons.
"""
# stdlib
from collections import namedtuple
from datetime import datetime
import getpass
import os
import pprint
import re
import uuid
# third-party
import jsonschema
import pendulum
import six
# local
from .util import get_logger, datetime_timestamp

_log = get_logger('resource')

#: Constants for relation predicates
PR_DERIVED = 'derived'  # derivedFrom
PR_CONTAINS = 'contains'
PR_USES = 'uses'
PR_VERSION = 'version'
RELATION_PREDICATES = {PR_DERIVED, PR_CONTAINS, PR_USES, PR_VERSION}

#: Constants for resource 'types'
TY_EXPERIMENT = 'experiment'
TY_TABULAR = 'tabular_data'
TY_PROPERTY = 'propertydb'
TY_FLOWSHEET = 'flowsheet'
TY_NOTEBOOK = 'notebook'
TY_CODE = 'code'
TY_SURRMOD = 'surrogate_model'
TY_DATA = 'data'
TY_OTHER = 'other'
RESOURCE_TYPES = {TY_EXPERIMENT, TY_TABULAR, TY_PROPERTY, TY_FLOWSHEET,
                  TY_NOTEBOOK, TY_CODE, TY_SURRMOD, TY_DATA, TY_OTHER}

# Constants for fields in stored relations
RR_PRED = 'predicate'
RR_SUBJ = 'subject'
RR_OBJ = 'object'
RR_ID = 'identifier'
RR_ROLE = 'role'

RESOURCE_SCHEMA = {
    "$schema": "http://json-schema.org/draft-04/schema#",
    "id": "http://idaes.org",
    "definitions": {
        "SemanticVersion": {
            "type": "array",
            "items": [
                {"type": "integer"},
                {"type": "integer"},
                {"type": "integer"},
                {"type": "string"}
            ],
            "minItems": 4
        }
    },
    "type": "object",
    "properties": {
        "aliases": {
            "type": "array",
            "items": {
                "type": "string"
            }
        },
        "codes": {
            "type": "array",
            "items": {
                "type": "object",
                "properties": {
                    "type": {
                        "type": "string",
                        "enum": ["method", "function", "module", "class",
                                 "file",
                                 "package", "repository", "notebook"]
                    },
                    "desc": {"type": "string"},
                    "name": {"type": "string"},
                    "language": {"type": "string"},
                    "idhash": {"type": "string"},
                    "location": {"type": "string"},
                    "version": {"$ref": "#/definitions/SemanticVersion"}
                },
                "required": ["name"]
            }
        },
        "collaborators": {
            "type": "array",
            "items": {
                "type": "object",
                "properties": {
                    "email": {
                        "type": "string",
                        "format": "email"
                    },
                    "name": {"type": "string"}
                },
                "required": ["name"]
            }
        },
        "created": {
            "type": "number"
        },
        "creator": {
            "type": "object",
            "properties": {
                "email": {
                    "type": "string",
                    "format": "email"
                },
                "name": {"type": "string"}
            },
            "required": ["name"]
        },
        "data": {
            "type": "object"
        },
        "datafiles": {
            "type": "array",
            "items": {
                "type": "object",
                "properties": {
                    "desc": {"type": "string"},
                    "metadata": {
                        "type": "object"
                    },
                    "mimetype": {"type": "string"},
                    "path": {"type": "string"},
                    "is_copy": {"type": "boolean"}
                },
                "required": ["path"]
            }
        },
        "datafiles_dir": {
            "type": "string"
        },
        "desc": {
            "type": "string"
        },
        "id_": {
            "type": "string"
        },
        "modified": {
            "type": "number"
        },
        "relations": {
            "type": "array",
            "items": {
                "type": "object",
                "properties": {
                    RR_PRED: {
                        "type": "string",
                        "enum": list(RELATION_PREDICATES)
                    },
                    RR_ID: {"type": "string"},
                    RR_ROLE: {
                        "type": "string",
                        "enum": [RR_SUBJ, RR_OBJ]
                    }
                },
                "required": [RR_PRED, RR_ID, RR_ROLE]
            }
        },
        "sources": {
            "type": "array",
            "items": {
                "type": "object",
                "properties": {
                    "date": {"type": "number"},
                    "doi": {"type": "string"},
                    "isbn": {"type": "string"},
                    "language": {"type": "string"},
                    "source": {"type": "string"}
                }
            }
        },
        "tags": {
            "type": "array",
            "items": {
                "type": "string",
            }
        },
        "type": {
            "type": "string",
            "enum": list(RESOURCE_TYPES)
        },
        "version_info": {
            "type": "object",
            "properties": {
                "created": {"type": "number"},
                "name": {"type": "string"},
                "version": {"$ref": "#/definitions/SemanticVersion"}
            }
        }
    },
    "required": ["id_"],
    "additionalProperties": False
}


[docs]class Dict(dict): """Subclass of dict that has a 'dirty' bit. """ def __init__(self, *args, **kwargs): super(Dict, self).__init__(*args, **kwargs) self._dirty = True def __setitem__(self, key, value): self._dirty = True super(Dict, self).__setitem__(key, value)
[docs] def set_clean(self): self._dirty = False
[docs] def is_dirty(self): return self._dirty
[docs]class Resource(object): """Core object for the Data Management Framework. """ ID_FIELD = 'id_' #: Identifier field name constant TYPE_FIELD = 'type' #: Resource type field name constant def __init__(self, value=None, type_=None): self._set_defaults() if value: self.v.update(value) if type_ is not None: self.v[self.TYPE_FIELD] = type_ self._validator = jsonschema.Draft4Validator(RESOURCE_SCHEMA) self._validations = 0 # count validations; mostly for testing self.do_copy = self.is_tmp = False # flags for copying datafiles def _set_defaults(self): now = date_float(pendulum.utcnow()) self.v = Dict({ self.ID_FIELD: identifier_str(), self.TYPE_FIELD: TY_OTHER, 'aliases': [], 'codes': [], 'collaborators': [], 'created': now, 'modified': now, 'creator': {'name': getpass.getuser()}, 'data': {}, 'datafiles': [], 'datafiles_dir': '', 'desc': '', 'relations': [], 'sources': [], 'tags': [], 'version_info': { 'created': now, 'version': (0, 0, 0), 'name': '' } }) def _massage_values(self): try: # convert dates for item in self.v['sources']: if not isinstance(item['date'], float): item['date'] = date_float(item['date']) if not isinstance(self.v['created'], float): self.v['created'] = date_float(self.v['created']) if not isinstance(self.v['modified'], float): self.v['modified'] = date_float(self.v['modified']) if not isinstance(self.v['version_info']['created'], float): self.v['version_info']['created'] = date_float( self.v['version_info']['created']) # convert versions if not isinstance(self.v['version_info']['version'], list): self.v['version_info']['version'] = version_list( self.v['version_info']['version']) for i, code in enumerate(self.v['codes']): if not isinstance(code['version'], list): code['version'] = version_list(code['version']) self.v['codes'][i] = code except (TypeError, ValueError, KeyError) as err: raise ValueError('While converting resource values: {}' .format(err)) self.v.set_clean()
[docs] def validate(self): if self.v.is_dirty(): self._massage_values() self._validator.validate(self.v) self._validations += 1
@property def id(self): """Get resource identifier. """ return self.v[self.ID_FIELD]
[docs] def set_id(self, value=None): self.v[self.ID_FIELD] = identifier_str(value)
@property def type(self): """Get resource type. """ return self.v[self.TYPE_FIELD] @property def data(self): """Get JSON data for this resource. """ return self.v['data'] @data.setter def data(self, value): """Set JSON data for this resource. """ self.v['data'] = value
[docs] def get_datafiles(self, mode='r'): """Generate readable file objects for 'datafiles' in resource. Args: mode (str): Mode for `open()` Returns: generator: Generates `file`s. """ dfdir = self.v['datafiles_dir'] for datafile in self.v['datafiles']: if not dfdir: path = datafile['path'] else: path = os.path.join(dfdir, datafile['path']) fp = open(path, mode=mode) yield fp
def _repr_text_(self): return pprint.pformat(self.v, indent=2)
# # Function(s) to help creating [two-way] relations # between resources # #: Provide attribute access to an RDF subject, predicate, object triple Triple = namedtuple('Triple', 'subject predicate object')
[docs]def create_relation(rel): """Create a relationship between two Resource instances. Relations are stored in both the `subject` and `object` resources, in the following way:: If R = (subject)S, (predicate)P, and (object)O then store the following: In S.relations: {predicate: P, identifier:O.id, role:subject} In O.relations: {predicate: P, identifier:S.id, role:object} Args: rel (Triple): Relation triple. The 'subject' and 'object' parts should be :class:`Resource`, and the 'predicate' should be a simple string. Returns: None Raises: ValueError: if this relation already exists in the subject or object resource, or the predicate is not in the list of valid ones in RELATION_PREDICATES """ if rel.predicate not in RELATION_PREDICATES: raise ValueError('Bad predicate: "{}" not in: {}'.format( rel.predicate, ', '.join(list(RELATION_PREDICATES)))) rel_d = {RR_PRED: rel.predicate, RR_ID: rel.object.v[Resource.ID_FIELD], RR_ROLE: RR_SUBJ} if rel_d in rel.subject.v['relations']: raise ValueError('Duplicate relation for subject: {}'.format(rel)) rel.subject.v['relations'].append(rel_d) rel_d = {RR_PRED: rel.predicate, RR_ID: rel.subject.v[Resource.ID_FIELD], RR_ROLE: RR_OBJ} # note: hard for this to happen unless the relation was added manually if rel_d in rel.object.v['relations']: raise ValueError('Duplicate relation for object: {}'.format(rel)) rel.object.v['relations'].append(rel_d)
[docs]def create_relation_args(*args): """Syntactic sugar to take 3 args instead of a Triple. """ return create_relation(Triple(*args))
[docs]def triple_from_resource_relations(id_, rrel): """Create a Triple from one entry in resource['relations']. Args: id_ (str): Identifier of the containing resource. rrel (dict): Stored relation with three keys, see `create_relation()`. Return: Triple: A triple """ if rrel[RR_ROLE] == RR_SUBJ: rel = Triple(id_, rrel[RR_PRED], rrel[RR_ID]) else: rel = Triple(rrel[RR_ID], rrel[RR_PRED], id_) return rel
# # Some handy-dandy conversion functions. #
[docs]def date_float(value): def bad_date(e): raise ValueError('Cannot convert date "{}" to float: {}' .format(value, e)) dt, usec = None, 0 if isinstance(value, pendulum.Pendulum): return value.timestamp() elif isinstance(value, datetime): dt = value elif isinstance(value, tuple): try: dt = datetime(*value) except TypeError as err: bad_date(err) elif isinstance(value, six.string_types): try: dt = pendulum.parse(value) except pendulum.exceptions.ParserError as err: bad_date(err) elif isinstance(value, float) or isinstance(value, int): try: dt = datetime.fromtimestamp(value) except ValueError as err: bad_date(err) if dt is None: raise ValueError('Cannot convert date, value is "None"') return datetime_timestamp(dt) + usec # just a float
# def isoformat(ts): # return datetime.fromtimestamp(ts).isoformat()
[docs]def version_list(value): """Semantic version. Three numeric identifiers, separated by a dot. Trailing non-numeric characters allowed. Inputs, string or tuple, may have less than three numeric identifiers, but internally the value will be padded with zeros to always be of length four. A leading dash or underscore in the trailing non-numeric characters is removed. Some examples: - 1 => valid => (1, 0, 0, '') - rc3 => invalid: no number - 1.1 => valid => (1, 1, 0, '') - 1a => valid => (1, 0, 0, 'a') - 1.a.1 => invalid: non-numeric can only go at end - 1.12.1 => valid => (1, 12, 1, '') - 1.12.13-1 => valid => (1, 12, 13, '1') - 1.12.13.x => invalid: too many parts Returns: list: [major:int, minor:int, debug:int, release-type:str] """ def bad_version(v): raise ValueError("Bad version: {}".format(v)) ver = () if isinstance(value, list) or isinstance(value, tuple): ver = value elif isinstance(value, str): ver = value.split('.', 2) elif isinstance(value, int): ver = (value, 0, 0) else: bad_version(value) if len(ver) < 1: bad_version(value) verlist = [] # leading version numbers for i in range(len(ver) - 1): try: verlist.append(int(ver[i])) except ValueError: bad_version(value) # last version number s = ver[-1] extra = '' if isinstance(s, int): verlist.append(s if len(verlist) < 3 else str(s)) elif isinstance(s, six.string_types): if s: m = re.match('([0-9]+)?(.*)', s) if m.group(1) is not None: verlist.append(int(m.group(1))) extra = m.group(2) else: # last version must be int or str bad_version(value) # must have at least one numbered version if len(verlist) == 0: bad_version(value) # pad with zeros, and add non-numeric ID while len(verlist) < 3: verlist.append(0) if extra and extra[0] == '.': # cannot start extra version with '.' bad_version(value) if extra and extra[0] in ('-', '_'): extra = extra[1:] verlist.append(extra) return verlist
[docs]def format_version(values): s = '{}.{}.{}'.format(*values[:3]) if values[3]: s += '-{}'.format(values[3]) return s
[docs]def identifier_str(value=None): """Unique identifier. Args: value (str): If given, validate that it is a 32-byte str If not given or None, set new random value. """ # regular expression for identifier: hex string len=32 id_expr = '[0-9a-f]{32}' if value is None: value = uuid.uuid4().hex elif not re.match(id_expr, value): raise ValueError('Bad format for identifier "{}", must match ' 'regular exprresion "{}"'.format(value, id_expr)) return value