"""
Common models for all clld apps
"""
from __future__ import unicode_literals
import os
from collections import OrderedDict
from datetime import date
from itertools import product, groupby
from sqlalchemy import (
Column,
Float,
Integer,
String,
Boolean,
Unicode,
Date,
CheckConstraint,
UniqueConstraint,
ForeignKey,
desc,
)
from sqlalchemy.orm import (
relationship,
validates,
backref,
joinedload_all,
)
from sqlalchemy.ext.declarative import declared_attr
from sqlalchemy.ext.associationproxy import association_proxy
from zope.interface import implementer
from clld.db.meta import Base, PolymorphicBaseMixin, DBSession
from clld.db.versioned import Versioned
from clld import interfaces
from clld.util import DeclEnum, cached_property
from clld.lib import bibtex
from clld.lib import coins
from clld.web.util.htmllib import HTML
from clld.web.icon import ORDERED_ICONS
class Config(Base):
"""Model class to allow storage of key-value pairs of configuration data in the
database. This model is also (ab-)used to implement a mechanism linking database
objects of all types without enforcing referential intagrity, e.g. to model chains
of superseding objects, where referred objects may become obsolete themselves.
"""
key = Column(Unicode)
value = Column(Unicode)
gone = '__gone__'
@staticmethod
def replacement_key(model, id_):
"""
:param model: Model class or instance.
:param id_: Identifier of a class instance.
:return: ``str`` representation identifying a database object.
"""
mapper_name = model if isinstance(model, basestring) else model.mapper_name()
return '__%s_%s__' % (mapper_name, id_)
@classmethod
def get_replacement_id(cls, model, id_):
"""
:return: id of a resource registered as replacement for the specified resource.
"""
res = DBSession.query(cls.value)\
.filter(cls.key == cls.replacement_key(model, id_)).first()
if res:
return res[0]
@classmethod
def add_replacement(cls, replaced, replacement, model=None, session=None):
"""Method to register a replacement relation.
:param replaced: db object or identifier of the object to be replaced.
:param replacement: db object or identifier of the superseding object.
:param model: If only an identifier is passed as ``replaced`` or ``replacement``\
the corresponding model class must be passed as ``model``.
:param session: Db session the relation is added to.
"""
session = session or DBSession
value = getattr(replacement, 'id', replacement) if replacement else cls.gone
session.add(cls(
key=cls.replacement_key(model or replaced, getattr(replaced, 'id', replaced)),
value=value))
[docs]class IdNameDescriptionMixin(object):
"""Mixin for 'visible' objects, i.e. anything that has to be displayed (to humans or
machines); in particular all :doc:`resources` fall into this category.
.. note::
Only one of :py:attr:`clld.db.models.common.IdNameDescriptionMixin.description`
or :py:attr:`clld.db.models.common.IdNameDescriptionMixin.markup_description`
should be supplied, since these are used mutually exclusively.
"""
#: A ``str`` identifier of an object which can be used for sorting and as part of a
#: URL path; thus should be limited to characters valid in URLs, and should not
#: contain '.' or '/' since this may trip up route matching.
id = Column(String, unique=True)
#: A human readable 'identifier' of the object.
name = Column(Unicode)
#: A description of the object.
description = Column(Unicode)
#: A description of the object containing HTML markup.
markup_description = Column(Unicode)
#-----------------------------------------------------------------------------
# We augment mapper classes for basic objects using mixins to add the ability
# to store arbitrary key-value pairs and files associated with an object.
#-----------------------------------------------------------------------------
[docs]class FilesMixin(IdNameDescriptionMixin):
"""This mixin provides a way to associate files with instances of another model class.
.. note::
The file itself is not stored in the database but must be created in the
filesystem, e.g. using the create method.
"""
@classmethod
def owner_class(cls):
return cls.__name__.split('_')[0]
#: Ordinal to control sorting of files associated with one db object.
ord = Column(Integer, default=1)
#: Mime-type of the file content.
mime_type = Column(String)
@declared_attr
def object_pk(cls):
return Column(Integer, ForeignKey('%s.pk' % cls.owner_class().lower()))
@property
[docs] def relpath(self):
"""OS file path of the file relative to the application's file-system directory.
"""
return os.path.join(self.owner_class().lower(), str(self.object.id), str(self.id))
[docs] def create(self, dir_, content):
"""Write ``content`` to a file using ``dir_`` as file-system directory.
:return: File-system path of the file that was created.
"""
p = dir_.joinpath(self.relpath)
p.dirname().makedirs_p()
with open(p, 'wb') as fp:
fp.write(content)
return p
[docs]class HasFilesMixin(object):
"""Mixin for model classes which may have associated files.
"""
@property
[docs] def files(self):
"""
:return: ``dict`` of associated files keyed by ``id``.
"""
return dict((f.id, f) for f in self._files)
@declared_attr
def _files(cls):
return relationship(cls.__name__ + '_files', backref='object')
[docs]class DataMixin(object):
"""This mixin provides a simple way to attach arbitrary key-value pairs to another
model class identified by class name.
"""
@classmethod
def owner_class(cls):
return cls.__name__.split('_')[0]
key = Column(Unicode)
value = Column(Unicode)
ord = Column(Integer, default=1)
@declared_attr
def object_pk(cls):
return Column(Integer, ForeignKey('%s.pk' % cls.owner_class().lower()))
[docs]class HasDataMixin(object):
"""Adds a convenience method to retrieve the key-value pairs from data as dict.
.. note::
It is the responsibility of the programmer to make sure conversion to a ``dict``
makes sense, i.e. the keys in data are actually unique, thus usable as dictionary
keys.
"""
[docs] def datadict(self):
"""
:return: ``dict`` of associated key-value pairs.
"""
return dict((d.key, d.value) for d in self.data)
@declared_attr
def data(cls):
return relationship(cls.__name__ + '_data', order_by=cls.__name__ + '_data.ord')
class LanguageSource(Base, Versioned):
__table_args__ = (UniqueConstraint('language_pk', 'source_pk'),)
language_pk = Column(Integer, ForeignKey('language.pk'))
source_pk = Column(Integer, ForeignKey('source.pk'))
def _add_solr_language_info(res, obj):
"""
:param res: Solr document, i.e. a dict to which new keys will be added.
:param obj: object which is searched for language information.
:return: mutated dict res
"""
if getattr(obj, 'language', None):
res['language_t'] = obj.language.name
obj = obj.language
for attr in ['iso_code', 'glottocode']:
value = getattr(obj, attr, None)
if value:
res.update({attr + '_s': value})
return res
#-----------------------------------------------------------------------------
# The mapper classes for basic objects of the clld db model are marked as
# implementers of the related interface.
#-----------------------------------------------------------------------------
class Dataset_data(Base, Versioned, DataMixin):
pass
class Dataset_files(Base, Versioned, FilesMixin):
pass
@implementer(interfaces.IDataset)
[docs]class Dataset(Base,
PolymorphicBaseMixin,
Versioned,
IdNameDescriptionMixin,
HasDataMixin,
HasFilesMixin):
"""Each project (e.g. WALS, APiCS) is regarded as one dataset; thus, each app will
have exactly one Dataset object.
"""
published = Column(Date, default=date.today)
publisher_name = Column(Unicode)
publisher_place = Column(Unicode)
publisher_url = Column(String)
license = Column(String, default="http://creativecommons.org/licenses/by/3.0/")
domain = Column(String, nullable=False)
contact = Column(String)
[docs] def get_stats(self, resources, **filters):
"""
:param resources:
:param filters:
:return:
"""
res = OrderedDict()
for rsc in resources:
if rsc.name != 'combination':
query = DBSession.query(rsc.model)
if rsc.name in filters:
query = query.filter(filters[rsc.name])
res[rsc.name] = query.count()
return res
def formatted_editors(self):
return ' & '.join(ed.contributor.last_first() for ed in self.editors)
def formatted_name(self):
return HTML.span(
self.name,
**{
'xmlns:dct': "http://purl.org/dc/terms/",
'href': "http://purl.org/dc/dcmitype/Dataset",
'property': "dct:title",
'rel': "dct:type",
'class': 'Dataset'}
)
class Language_data(Base, Versioned, DataMixin):
pass
class Language_files(Base, Versioned, FilesMixin):
pass
@implementer(interfaces.ILanguage)
[docs]class Language(Base,
PolymorphicBaseMixin,
Versioned,
IdNameDescriptionMixin,
HasDataMixin,
HasFilesMixin):
"""Languages are the main objects of discourse. We attach a geo-coordinate
to them to be able to put them on maps.
"""
#__table_args__ = (UniqueConstraint('name'),)
latitude = Column(
Float(), CheckConstraint('-90 <= latitude and latitude <= 90'))
longitude = Column(
Float(), CheckConstraint('-180 <= longitude and longitude <= 180 '))
identifiers = association_proxy('languageidentifier', 'identifier')
def get_identifier_objs(self, type_):
return [i for i in self.identifiers if i.type == getattr(type_, 'value', type_)]
def get_identifier(self, type_):
objs = self.get_identifier_objs(type_)
if objs:
return objs[0].name
@property
def iso_code(self):
return self.get_identifier(IdentifierType.iso)
@property
def glottocode(self):
return self.get_identifier(IdentifierType.glottolog)
def __solr__(self, req):
res = Base.__solr__(self, req)
res['altname_txt'] = [i.name for i in self.identifiers if i.type == 'name']
return _add_solr_language_info(res, self)
class DomainElement_data(Base, Versioned, DataMixin):
pass
class DomainElement_files(Base, Versioned, FilesMixin):
pass
@implementer(interfaces.IDomainElement)
class DomainElement(Base,
PolymorphicBaseMixin,
Versioned,
IdNameDescriptionMixin,
HasDataMixin,
HasFilesMixin):
"""DomainElements can be used to model controlled lists of values for a Parameter.
"""
__table_args__ = (
UniqueConstraint('name', 'parameter_pk'),
UniqueConstraint('number', 'parameter_pk'))
parameter_pk = Column(Integer, ForeignKey('parameter.pk'))
# the number is used to sort domain elements within the domain of one parameter
number = Column(Integer)
# abbreviated name, e.g. as label for map legends
abbr = Column(Unicode)
class Parameter_data(Base, Versioned, DataMixin):
pass
class Parameter_files(Base, Versioned, FilesMixin):
pass
@implementer(interfaces.IParameter)
[docs]class Parameter(Base,
PolymorphicBaseMixin,
Versioned,
IdNameDescriptionMixin,
HasDataMixin,
HasFilesMixin):
"""A measurable attribute of a language.
"""
__table_args__ = (UniqueConstraint('name'),)
domain = relationship(
'DomainElement', backref='parameter', order_by=DomainElement.number)
class CombinationDomainElement(object):
def __init__(self, combination, domainelements, icon=None):
self.number = tuple(de.number for de in domainelements)
self.id = '-'.join(map(str, self.number))
self.name = ' / '.join(de.name for de in domainelements)
self.icon = icon
self.languages = []
@implementer(interfaces.ICombination)
class Combination(object):
"""A combination of parameters
"""
delimiter = '_'
def __init__(self, *parameters):
"""
:param parameters: distinct Parameter instances.
"""
assert len(parameters) < 5
assert len(set(parameters)) == len(parameters)
self.id = self.delimiter.join(map(str, [p.id for p in parameters]))
self.name = ' / '.join(p.name for p in parameters)
self.parameters = parameters
# we keep track of languages with multiple values.
self.multiple = []
@classmethod
def mapper_name(cls):
return str('combination')
@classmethod
def get(cls, id_, **kw):
params = []
for pid in set(id_.split(cls.delimiter)):
params.append(
DBSession.query(Parameter)
.filter(Parameter.id == pid)
.options(joinedload_all(Parameter.domain))
.one())
return cls(*params)
@cached_property()
def domain(self):
"""
.. note::
This does only work well with parameters which have a discrete domain.
"""
d = OrderedDict()
for i, des in enumerate(product(*[p.domain for p in self.parameters])):
cde = CombinationDomainElement(
self, des, icon=ORDERED_ICONS[i % len(ORDERED_ICONS)])
d[cde.number] = cde
for language, values in groupby(
sorted(self.values, key=lambda v: v.valueset.language_pk),
lambda i: i.valueset.language,
):
# values may contain multiple values for the same parameter, so we have to
# group those, too.
values_by_parameter = OrderedDict()
for p in self.parameters:
values_by_parameter[p.pk] = []
for v in values:
values_by_parameter[v.valueset.parameter_pk].append(v)
for i, cv in enumerate(product(*values_by_parameter.values())):
d[tuple(v.domainelement.number for v in cv)].languages.append(language)
if i > 0:
# a language with multiple values, store a reference.
self.multiple.append(language)
self.multiple = set(self.multiple)
return d.values()
@cached_property()
def values(self):
def _filter(query, operation):
q = query.filter(Parameter.pk == self.parameters[0].pk)
return getattr(q, operation)(
*[query.filter(Parameter.pk == p.pk) for p in self.parameters[1:]])
# determine relevant languages, i.e. languages having a value for all parameters:
languages = _filter(
DBSession.query(Language.pk).join(ValueSet).join(Parameter),
'intersect').subquery()
# value query:
return _filter(
DBSession.query(Value)
.join(Value.valueset)
.join(ValueSet.parameter)
.filter(ValueSet.language_pk.in_(languages))
.options(
joinedload_all(Value.domainelement),
joinedload_all(Value.valueset, ValueSet.language)),
'union').all()
class Source_data(Base, Versioned, DataMixin):
pass
class Source_files(Base, Versioned, FilesMixin):
pass
@implementer(interfaces.ISource)
[docs]class Source(Base,
PolymorphicBaseMixin,
Versioned,
IdNameDescriptionMixin,
HasDataMixin,
HasFilesMixin):
"""A bibliographic record, cited as source for some statement.
"""
glottolog_id = Column(String)
google_book_search_id = Column(String)
#
# BibTeX fields:
#
bibtex_type = Column(bibtex.EntryType.db_type())
author = Column(Unicode)
year = Column(Unicode)
title = Column(Unicode)
type = Column(Unicode)
booktitle = Column(Unicode)
editor = Column(Unicode)
pages = Column(Unicode)
edition = Column(Unicode)
journal = Column(Unicode)
school = Column(Unicode)
address = Column(Unicode)
url = Column(Unicode)
note = Column(Unicode)
number = Column(Unicode)
series = Column(Unicode)
volume = Column(Unicode)
publisher = Column(Unicode)
organization = Column(Unicode)
chapter = Column(Unicode)
howpublished = Column(Unicode)
# typed information we might want to use for searching or sorting:
year_int = Column(Integer)
startpage_int = Column(Integer)
pages_int = Column(Integer)
languages = relationship(
Language, backref='sources', secondary=LanguageSource.__table__)
@property
def gbs_identifier(self):
if not self.jsondata or not self.jsondata.get('gbs'):
return
if not self.jsondata['gbs']['volumeInfo'].get('industryIdentifiers'):
return
id_ = None
for identifier in self.jsondata['gbs']['volumeInfo']['industryIdentifiers']:
# prefer ISBN_13 over ISBN_10 over anything else
if identifier['type'] == 'ISBN_13':
id_ = 'ISBN:' + identifier['identifier']
if identifier['type'] == 'ISBN_10' and not id_:
id_ = 'ISBN:' + identifier['identifier']
if not id_:
# grab the last one in the list (most probably the only one!)
id_ = identifier['identifier']
return id_
def __bibtex__(self):
return {}
def bibtex(self):
exclude = ['gbs', 'glottolog_ref_id']
kw = {k: self.jsondatadict[k] for k in self.jsondatadict if not k in exclude}
kw.update(self.__bibtex__())
return bibtex.Record.from_object(self, **kw)
def coins(self, req):
return HTML.span(
' ',
**coins.ContextObject.from_bibtex(
req.dataset.name, self.bibtex()).span_attrs()
)
class Contribution_data(Base, Versioned, DataMixin):
pass
class Contribution_files(Base, Versioned, FilesMixin):
pass
@implementer(interfaces.IContribution)
[docs]class Contribution(Base,
PolymorphicBaseMixin,
Versioned,
IdNameDescriptionMixin,
HasDataMixin,
HasFilesMixin):
"""A set of data contributed within the same context by the same set of contributors.
"""
__table_args__ = (UniqueConstraint('name'),)
date = Column(Date)
@property
def primary_contributors(self):
return [assoc.contributor for assoc in
sorted(self.contributor_assocs,
key=lambda a: (a.ord, a.contributor.id)) if assoc.primary]
@property
def secondary_contributors(self):
return [assoc.contributor for assoc in
sorted(self.contributor_assocs,
key=lambda a: (a.ord, a.contributor.id)) if not assoc.primary]
def formatted_contributors(self):
contribs = [' and '.join(c.name for c in self.primary_contributors)]
if self.secondary_contributors:
contribs.append(' and '.join(c.name for c in self.secondary_contributors))
return ' with '.join(contribs)
class ValueSet_data(Base, Versioned, DataMixin):
pass
class ValueSet_files(Base, Versioned, FilesMixin):
pass
@implementer(interfaces.IValueSet)
[docs]class ValueSet(Base,
PolymorphicBaseMixin,
Versioned,
IdNameDescriptionMixin,
HasDataMixin,
HasFilesMixin):
"""The intersection of Language and Parameter.
"""
language_pk = Column(Integer, ForeignKey('language.pk'))
parameter_pk = Column(Integer, ForeignKey('parameter.pk'))
contribution_pk = Column(Integer, ForeignKey('contribution.pk'))
source = Column(Unicode)
parameter = relationship('Parameter', backref='valuesets')
@declared_attr
def contribution(cls):
return relationship(
'Contribution', backref=backref('valuesets', order_by=cls.parameter_pk))
@declared_attr
def language(cls):
return relationship(
'Language', backref=backref('valuesets', order_by=cls.language_pk))
@property
def name(self):
return self.language.name + ' / ' + self.parameter.name
class Value_data(Base, Versioned, DataMixin):
pass
class Value_files(Base, Versioned, FilesMixin):
pass
@implementer(interfaces.IValue)
[docs]class Value(Base,
PolymorphicBaseMixin,
Versioned,
IdNameDescriptionMixin,
HasDataMixin,
HasFilesMixin):
"""A measurement of a parameter for a particular language.
"""
# we must override the pk col declaration from Base to have it available for ordering.
pk = Column(Integer, primary_key=True)
valueset_pk = Column(Integer, ForeignKey('valueset.pk'))
# Values may be taken from a domain.
domainelement_pk = Column(Integer, ForeignKey('domainelement.pk'))
# Languages may have multiple values for the same parameter. Their relative
# frequency can be stored here.
frequency = Column(Float)
confidence = Column(Unicode)
domainelement = relationship('DomainElement', backref='values')
@declared_attr
def valueset(cls):
return relationship(
ValueSet,
backref=backref(
'values', order_by=[desc(cls.frequency), cls.confidence, cls.pk]))
def __json__(self, req):
res = Base.__json__(self, req)
res['domainelement'] = self.domainelement.__json__(req) \
if self.domainelement else None
res['valueset'] = self.valueset.__json__(req)
return res
def __unicode__(self):
return self.domainelement.name if self.domainelement else self.name or self.id
class Contributor_data(Base, Versioned, DataMixin):
pass
class Contributor_files(Base, Versioned, FilesMixin):
pass
@implementer(interfaces.IContributor)
[docs]class Contributor(Base,
PolymorphicBaseMixin,
Versioned,
IdNameDescriptionMixin,
HasDataMixin,
HasFilesMixin):
"""Creator of a contribution.
"""
__table_args__ = (UniqueConstraint('name'),)
url = Column(Unicode())
email = Column(String)
address = Column(Unicode)
[docs] def last_first(self):
"""ad hoc - possibly incorrect - way of formatting the name as "last, first"
"""
parts = (self.name or '').split()
return '' if not parts else ', '.join([parts[-1], ' '.join(parts[:-1])])
class Sentence_data(Base, Versioned, DataMixin):
pass
class Sentence_files(Base, Versioned, FilesMixin):
pass
@implementer(interfaces.ISentence)
class Sentence(Base,
PolymorphicBaseMixin,
Versioned,
IdNameDescriptionMixin,
HasDataMixin,
HasFilesMixin):
"""Sentence of a language serving as example for some statement.
"""
#: Inherited from IdNameDescriptionMixin:
#: name: The text of the sentence in object language
#: description: A translation of the sentence
analyzed = Column(Unicode)
gloss = Column(Unicode)
type = Column(Unicode)
source = Column(Unicode)
comment = Column(Unicode)
original_script = Column(Unicode)
xhtml = Column(Unicode)
#: The following columns store data which contains markup and should be looked at
#: first, when rendering a sentence:
markup_text = Column(Unicode)
markup_analyzed = Column(Unicode)
markup_gloss = Column(Unicode)
markup_comment = Column(Unicode)
language_pk = Column(Integer, ForeignKey('language.pk'))
@declared_attr
def language(cls):
return relationship(
'Language', backref=backref('sentences', order_by=cls.language_pk))
def __solr__(self, req):
return _add_solr_language_info(Base.__solr__(self, req), self)
@property
def audio(self):
for f in self._files:
if f.mime_type.split('/')[0] == 'audio':
return f
class Unit_data(Base, Versioned, DataMixin):
pass
class Unit_files(Base, Versioned, FilesMixin):
pass
@implementer(interfaces.IUnit)
[docs]class Unit(Base,
PolymorphicBaseMixin,
Versioned,
IdNameDescriptionMixin,
HasDataMixin,
HasFilesMixin):
"""A linguistic unit of a language.
"""
language_pk = Column(Integer, ForeignKey('language.pk'))
language = relationship(Language)
def __solr__(self, req):
return _add_solr_language_info(Base.__solr__(self, req), self)
class UnitDomainElement_data(Base, Versioned, DataMixin):
pass
class UnitDomainElement_files(Base, Versioned, FilesMixin):
pass
@implementer(interfaces.IUnitDomainElement)
class UnitDomainElement(Base,
PolymorphicBaseMixin,
Versioned,
IdNameDescriptionMixin,
HasDataMixin,
HasFilesMixin):
unitparameter_pk = Column(Integer, ForeignKey('unitparameter.pk'))
ord = Column(Integer)
# do we need a numeric value for these?
class UnitParameter_data(Base, Versioned, DataMixin):
pass
class UnitParameter_files(Base, Versioned, FilesMixin):
pass
@implementer(interfaces.IUnitParameter)
[docs]class UnitParameter(Base,
PolymorphicBaseMixin,
Versioned,
IdNameDescriptionMixin,
HasDataMixin,
HasFilesMixin):
"""A measurable attribute of a unit.
"""
domain = relationship(
'UnitDomainElement', backref='parameter', order_by=UnitDomainElement.id)
class UnitValue_data(Base, Versioned, DataMixin):
pass
class UnitValue_files(Base, Versioned, FilesMixin):
pass
@implementer(interfaces.IUnitValue)
[docs]class UnitValue(Base,
PolymorphicBaseMixin,
Versioned,
IdNameDescriptionMixin,
HasDataMixin,
HasFilesMixin):
unit_pk = Column(Integer, ForeignKey('unit.pk'))
unitparameter_pk = Column(Integer, ForeignKey('unitparameter.pk'))
contribution_pk = Column(Integer, ForeignKey('contribution.pk'))
# Values may be taken from a domain.
unitdomainelement_pk = Column(Integer, ForeignKey('unitdomainelement.pk'))
# Languages may have multiple values for the same parameter. Their relative
# frequency can be stored here.
frequency = Column(Float)
unitparameter = relationship('UnitParameter', backref='unitvalues')
unitdomainelement = relationship('UnitDomainElement', backref='unitvalues')
contribution = relationship('Contribution', backref='unitvalues')
@declared_attr
def unit(cls):
return relationship('Unit', backref=backref('unitvalues', order_by=cls.unit_pk))
@validates('unitparameter_pk')
[docs] def validate_parameter_pk(self, key, unitparameter_pk):
"""We have to make sure, the parameter a value is tied to and the parameter a
possible domainelement is tied to stay in sync.
"""
if self.unitdomainelement and self.unitdomainelement.unitparameter_pk:
assert self.unitdomainelement.unitparameter_pk == unitparameter_pk
return unitparameter_pk
def __unicode__(self):
return self.unitdomainelement.name \
if self.unitdomainelement else self.name or self.id
#-----------------------------------------------------------------------------
# Non-core mappers and association tables
#-----------------------------------------------------------------------------
class GlossAbbreviation(Base, Versioned, IdNameDescriptionMixin):
__table_args__ = (UniqueConstraint('id', 'language_pk'),)
language_pk = Column(Integer, ForeignKey('language.pk'))
language = relationship(Language, backref="gloss_abbreviations")
class IdentifierType(DeclEnum):
iso = 'iso639-3', 'ISO 639-3', \
'http://www.sil.org/iso639-3/documentation.asp?id={0.name}'
wals = 'wals', 'WALS Code', 'http://wals.info/languoid/lect/wals_code_{0.name}'
glottolog = 'glottolog', 'Glottocode', \
'http://glottolog.org/resource/languoid/id/{0.name}'
ethnologue = 'ethnologue', 'Ethnologue', 'http://www.ethnologue.com/language/{0.name}'
class Identifier(Base, Versioned, IdNameDescriptionMixin):
"""We want to be able to link languages to languages in other systems. Thus,
we store identifiers of various types like 'wals', 'iso639-3', 'glottolog'.
But we might as well just store alternative names for languages.
"""
__table_args__ = (UniqueConstraint('name', 'type', 'description'),)
id = Column(String)
type = Column(String)
lang = Column(String(3), default='en')
def url(self):
try:
return IdentifierType.from_string(self.type).args[0].format(self)
except ValueError:
return
class LanguageIdentifier(Base, Versioned):
"""Languages are linked to identifiers with an optional description of this
linkage, e.g. 'is dialect of'.
"""
language_pk = Column(Integer, ForeignKey('language.pk'))
identifier_pk = Column(Integer, ForeignKey('identifier.pk'))
description = Column(Unicode)
identifier = relationship(Identifier)
language = relationship(
Language,
backref=backref("languageidentifier", cascade="all, delete-orphan"))
#
# Several objects can be linked to sources, i.e. they can have references.
#
class HasSourceMixin(object):
key = Column(Unicode) # the citation key, specific (and unique) within a contribution
description = Column(Unicode) # e.g. page numbers.
@declared_attr
def source_pk(cls):
return Column(Integer, ForeignKey('source.pk'))
@declared_attr
def source(cls):
return relationship(Source, backref=cls.__name__.lower() + 's')
class SentenceReference(Base, Versioned, HasSourceMixin):
"""
"""
sentence_pk = Column(Integer, ForeignKey('sentence.pk'))
sentence = relationship(Sentence, backref="references")
class ContributionReference(Base, Versioned, HasSourceMixin):
"""
"""
contribution_pk = Column(Integer, ForeignKey('contribution.pk'))
contribution = relationship(Contribution, backref="references")
class ValueSetReference(Base, Versioned, HasSourceMixin):
"""References for a set of values (related to one parameter and one language).
These references can be interpreted as justifications why a language does not "have"
certain values for a parameter, too.
"""
valueset_pk = Column(Integer, ForeignKey('valueset.pk'))
valueset = relationship(ValueSet, backref="references")
class ContributionContributor(Base, PolymorphicBaseMixin, Versioned):
"""Many-to-many association between contributors and contributions
"""
contribution_pk = Column(Integer, ForeignKey('contribution.pk'))
contributor_pk = Column(Integer, ForeignKey('contributor.pk'))
# contributors are ordered.
ord = Column(Integer, default=1)
# we distinguish between primary and secondary (a.k.a. 'with ...') contributors.
primary = Column(Boolean, default=True)
contribution = relationship(Contribution, backref='contributor_assocs')
contributor = relationship(Contributor, lazy=False, backref='contribution_assocs')
class Editor(Base, PolymorphicBaseMixin, Versioned):
"""Many-to-many association between contributors and dataset
"""
dataset_pk = Column(Integer, ForeignKey('dataset.pk'))
contributor_pk = Column(Integer, ForeignKey('contributor.pk'))
# contributors are ordered.
ord = Column(Integer, default=1)
# we distinguish between primary and secondary (a.k.a. 'with ...') contributors.
primary = Column(Boolean, default=True)
contributor = relationship(Contributor, lazy=False)
@declared_attr
def dataset(cls):
return relationship(
Dataset, backref=backref(
'editors', order_by=[cls.primary, cls.ord], lazy=False))
class ValueSentence(Base, PolymorphicBaseMixin, Versioned):
"""Many-to-many association between values and sentences given as explanation of a
value.
"""
value_pk = Column(Integer, ForeignKey('value.pk'))
sentence_pk = Column(Integer, ForeignKey('sentence.pk'))
description = Column(Unicode())
value = relationship(Value, backref='sentence_assocs')
sentence = relationship(Sentence, backref='value_assocs', order_by=Sentence.id)
#
# TODO: UnitValueSentence!
#
class UnitParameterUnit(Base, PolymorphicBaseMixin, Versioned, IdNameDescriptionMixin):
unit_pk = Column(Integer, ForeignKey('unit.pk'))
unitparameter_pk = Column(Integer, ForeignKey('unitparameter.pk'))
unit = relationship(Unit, backref='unitparameter_assocs')
unitparameter = relationship(UnitParameter, backref='unit_assocs')