Source code for clld.web.views.sitemap

"""
views implementing the sitemap protocol

.. seealso:: http://www.sitemaps.org/
"""
from itertools import groupby

from pyramid.response import Response
from pyramid.httpexceptions import HTTPNotFound

from clld import RESOURCES
from clld.db.meta import DBSession
from clld.db.models import common
from clld.web.util.helpers import get_url_template


# http://www.sitemaps.org/protocol.html#index
LIMIT = 50000


[docs]def robots(req): """ .. seealso:: http://www.sitemaps.org/protocol.html#submit_robots """ return Response( "Sitemap: %s\n" % req.route_url('sitemapindex'), content_type="text/plain")
def _query(req, rsc): """we must make sure, each query is ordered, so that limit and offset does make sense. """ return DBSession.query(rsc.model.id, rsc.model.updated).order_by(rsc.model.pk) def _e(name, *content): return '<{0}>{1}</{0}>'.format(name, ''.join(content)) def _response(type_, itemiter): def serialize(item): name = 'url' if type_ == 'urlset' else 'sitemap' return _e(name, *[_e(k, v) for k, v in item.items()]) return Response( """\ <?xml version="1.0" encoding="UTF-8"?> <{0} xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> {1} </{0}>""".format(type_, '\n'.join(map(serialize, itemiter))), content_type="application/xml")
[docs]def sitemapindex(req): """ .. seealso:: http://www.sitemaps.org/protocol.html#index """ def _iter(sitemaps): for r in RESOURCES: if r.with_index and r.name in sitemaps: n, m = divmod(_query(req, r).count(), LIMIT) if m: n += 1 for i in range(n): yield dict(loc=req.route_url('sitemap', rsc=r.name, n=i)) return _response('sitemapindex', _iter(req.registry.settings.get('sitemaps', [])))
[docs]def sitemap(req): """ .. seealso:: http://www.sitemaps.org/protocol.html#xmlTagDefinitions """ def _iter(): for r in RESOURCES: if r.name == req.matchdict['rsc']: query = _query(req, r)\ .offset(LIMIT * int(req.matchdict['n']))\ .limit(LIMIT) for id_, updated in query: yield dict( loc=req.route_url(r.name, id=id_), lastmod=str(updated).split(' ')[0]) return _response('urlset', _iter())
def resourcemap(req): res = {'properties': {'dataset': req.dataset.id}, 'resources': []} rsc = req.params.get('rsc') if rsc: res['properties']['uri_template'] = get_url_template(req, rsc, relative=False) if rsc == 'language': q = DBSession.query( common.Language.id, common.Language.name, common.Language.latitude, common.Language.longitude, common.Identifier.type, common.Identifier.name)\ .join(common.Language.languageidentifier)\ .join(common.LanguageIdentifier.identifier)\ .filter(common.Language.active == True)\ .filter(common.Identifier.type != 'name')\ .order_by(common.Language.id) for lang, codes in groupby(q, lambda r: (r[0], r[1], r[2], r[3])): res['resources'].append({ 'id': lang[0], 'name': lang[1], 'latitude': lang[2], 'longitude': lang[3], 'identifiers': [ {'type': c.type, 'identifier': c.name.lower() if c.type.startswith('WALS') else c.name} for c in codes]}) return res if rsc == 'parameter': for id, name in DBSession.query( common.Parameter.id, common.Parameter.name, ).order_by(common.Parameter.pk): res['resources'].append({'id': id, 'name': name}) return res return HTTPNotFound()