123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176 |
- from pelican.readers import BaseReader
- from pelican.generators import CachingGenerator
- from pelican.contents import Page, is_valid_content
- from pelican import signals
- import logging
- from blinker import signal
- import requests
- from os import listdir
- from os.path import isfile, join
- """
- pelican-rdf
- ===============
- This plugin integrates to pelican a new type of media, the vocabulary.
- Vocabularies are .rdf or .owl files, and metadata about them is collected
- through sparql queries.
- """
- try:
- import rdflib
- from rdflib.query import Processor
- rdflib_loaded=True
- except ImportError:
- rdflib_loaded=False
- logger = logging.getLogger(__name__)
- voc_generator_init = signal('voc_generator_init')
- voc_generator_finalized = signal('voc_generator_finalized')
- voc_writer_finalized = signal('voc_writer_finalized')
- voc_generator_preread = signal('voc_generator_preread')
- voc_generator_context = signal('voc_generator_context')
- class VocabularyGenerator(CachingGenerator):
- """Generate vocabulary descriptions"""
- # temporary file where the vocabulary is dereferenced to
- # when collected online
- _local_vocabulary_path = "/tmp/"
-
- def __init__(self, *args, **kwargs):
- logger.debug("Vocabulary generator called")
- self.vocabularies =[]
- super(VocabularyGenerator, self).__init__(*args, **kwargs)
-
- # Called both for local and remote vocabulary context creation.
- # Performs the actual Vocabulary generation.
- def generate_vocabulary_context(
- self, vocabulary_file_name, path_to_vocabulary):
- logger.debug("Generating__ vocabulary context for "+
- path_to_vocabulary+"/"+vocabulary_file_name)
- voc = self.get_cached_data(vocabulary_file_name, None)
- if voc is None:
- try:
- voc = self.readers.read_file(
- base_path=path_to_vocabulary,
- path=vocabulary_file_name,
- content_class=Vocabulary,
- context=self.context,
- preread_signal=voc_generator_preread,
- preread_sender=self,
- context_signal=voc_generator_context,
- context_sender=self)
- except Exception as e:
- logger.error(
- 'Could not process %s\n%s', vocabulary_file_name, e,
- exc_info=self.settings.get('DEBUG', False))
- self._add_failed_source_path(vocabulary_file_name)
-
- if not is_valid_content(voc, vocabulary_file_name):
- self._add_failed_source_path(vocabulary_file_name)
-
- self.cache_data(vocabulary_file_name, voc)
- self.vocabularies.append(voc)
- self.add_source_path(voc)
-
-
- def generate_local_context(self):
- for f in self.get_files(
- self.settings['VOC_PATHS'],
- exclude=self.settings['VOC_EXCLUDES']):
- self.generate_vocabulary_context(f, self.path)
-
- def dereference(self, uri, local_file):
- logger.debug("Dereferencing "+uri+" into "+local_file)
- headers={"Accept":"application/rdf+xml"}
- r = requests.get(uri, headers=headers)
- with open(self._local_vocabulary_path+local_file, 'w') as f:
- f.write(r.text)
-
- def generate_remote_context(self):
- for uri in self.settings["VOC_URIS"]:
- logger.debug("Generating context for remote "+uri)
- local_name = uri.split("/")[-1]+".rdf"
- self.dereference(uri, local_name)
- self.generate_vocabulary_context(
- local_name,
- self._local_vocabulary_path)
-
- def generate_context(self):
- self.generate_local_context()
- self.generate_remote_context()
- self._update_context(('vocabularies',))
- self.save_cache()
- self.readers.save_cache()
-
- def generate_output(self, writer):
- for voc in self.vocabularies:
- writer.write_file(
- voc.save_as, self.get_template(voc.template),
- self.context, voc=voc,
- relative_urls=self.settings['RELATIVE_URLS'],
- override_output=hasattr(voc, 'override_save_as'))
- voc_writer_finalized.send(self, writer=writer)
- class RdfReader(BaseReader):
-
- file_extensions = ['rdf', 'owl']
- enabled = bool(rdflib_loaded)
-
- def __init__(self, *args, **kwargs):
- super(RdfReader, self).__init__(*args, **kwargs)
- def read(self, source_path):
- """Parse content and metadata of an rdf file"""
- logger.debug("Loading graph described in "+source_path)
- graph = rdflib.Graph()
- graph.load(source_path)
- meta = {}
- queries = [
- f for f in listdir(self.settings["VOC_QUERIES_PATH"])
- if (isfile(join(self.settings["VOC_QUERIES_PATH"], f))
- and f.endswith(".sparql"))]
- for query_path in queries:
- query_file_path = self.settings["VOC_QUERIES_PATH"]+"/"+query_path
- with open(query_file_path, "r") as query_file:
- query = query_file.read()
- # The name of the query identifies the elements in the context
- query_key=query_path.split(".")[0]
- result_set = graph.query(query)
- # Each query result will be stored as a dictionnary in the
- # vocabulary context, referenced by the query name as its key.
- # Multiple results are stored in a list.
- for result in result_set:
- if not query_key in meta.keys():
- meta[query_key]=result.asdict()
- elif type(meta[query_key]) == list:
- meta[query_key].append(result.asdict())
- else:
- meta[query_key]=[meta[query_key], result.asdict()]
- meta["iri"] = meta["lov_metadata"]["iri"]
- meta["description"] = meta["lov_metadata"]["description"]
- meta["version"] = meta["lov_metadata"]["version"]
- meta["title"] = meta["lov_metadata"]["title"]
- return "", meta
- class Vocabulary(Page):
- mandatory_properties = ('iri','description','version', 'title')
- default_template = 'vocabulary'
- def add_reader(readers):
- for ext in RdfReader.file_extensions:
- readers.reader_classes[ext] = RdfReader
- def add_generator(pelican_object):
- print("Adding the generator")
- return VocabularyGenerator
- def register():
- signals.get_generators.connect(add_generator)
- signals.readers_init.connect(add_reader)
|