|
@@ -0,0 +1,175 @@
|
|
|
+from pelican.readers import BaseReader
|
|
|
+from pelican.generators import CachingGenerator
|
|
|
+from pelican.contents import Page, is_valid_content
|
|
|
+from pelican import signals
|
|
|
+import logging
|
|
|
+from blinker import signal
|
|
|
+import requests
|
|
|
+from os import listdir
|
|
|
+from os.path import isfile, join
|
|
|
+
|
|
|
+"""
|
|
|
+pelican-rdf
|
|
|
+===============
|
|
|
+
|
|
|
+This plugin integrates to pelican a new type of media, the vocabulary.
|
|
|
+Vocabularies are .rdf or .owl files, and metadata about them is collected
|
|
|
+through sparql queries.
|
|
|
+"""
|
|
|
+
|
|
|
+
|
|
|
+try:
|
|
|
+ import rdflib
|
|
|
+ from rdflib.query import Processor
|
|
|
+ rdflib_loaded=True
|
|
|
+except ImportError:
|
|
|
+ rdflib_loaded=False
|
|
|
+
|
|
|
+logger = logging.getLogger(__name__)
|
|
|
+
|
|
|
+voc_generator_init = signal('voc_generator_init')
|
|
|
+voc_generator_finalized = signal('voc_generator_finalized')
|
|
|
+voc_writer_finalized = signal('voc_writer_finalized')
|
|
|
+voc_generator_preread = signal('voc_generator_preread')
|
|
|
+voc_generator_context = signal('voc_generator_context')
|
|
|
+
|
|
|
+class VocabularyGenerator(CachingGenerator):
|
|
|
+ """Generate vocabulary descriptions"""
|
|
|
+
|
|
|
+ # temporary file where the vocabulary is dereferenced to
|
|
|
+ # when collected online
|
|
|
+ _local_vocabulary_path = "/tmp/"
|
|
|
+
|
|
|
+ def __init__(self, *args, **kwargs):
|
|
|
+ logger.debug("Vocabulary generator called")
|
|
|
+ self.vocabularies =[]
|
|
|
+ super(VocabularyGenerator, self).__init__(*args, **kwargs)
|
|
|
+
|
|
|
+ # Called both for local and remote vocabulary context creation.
|
|
|
+ # Performs the actual Vocabulary generation.
|
|
|
+ def generate_vocabulary_context(
|
|
|
+ self, vocabulary_file_name, path_to_vocabulary):
|
|
|
+ logger.debug("Generating__ vocabulary context for "+
|
|
|
+ path_to_vocabulary+"/"+vocabulary_file_name)
|
|
|
+ voc = self.get_cached_data(vocabulary_file_name, None)
|
|
|
+ if voc is None:
|
|
|
+ try:
|
|
|
+ voc = self.readers.read_file(
|
|
|
+ base_path=path_to_vocabulary,
|
|
|
+ path=vocabulary_file_name,
|
|
|
+ content_class=Vocabulary,
|
|
|
+ context=self.context,
|
|
|
+ preread_signal=voc_generator_preread,
|
|
|
+ preread_sender=self,
|
|
|
+ context_signal=voc_generator_context,
|
|
|
+ context_sender=self)
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(
|
|
|
+ 'Could not process %s\n%s', vocabulary_file_name, e,
|
|
|
+ exc_info=self.settings.get('DEBUG', False))
|
|
|
+ self._add_failed_source_path(vocabulary_file_name)
|
|
|
+
|
|
|
+ if not is_valid_content(voc, vocabulary_file_name):
|
|
|
+ self._add_failed_source_path(vocabulary_file_name)
|
|
|
+
|
|
|
+ self.cache_data(vocabulary_file_name, voc)
|
|
|
+ self.vocabularies.append(voc)
|
|
|
+ self.add_source_path(voc)
|
|
|
+
|
|
|
+
|
|
|
+ def generate_local_context(self):
|
|
|
+ for f in self.get_files(
|
|
|
+ self.settings['VOC_PATHS'],
|
|
|
+ exclude=self.settings['VOC_EXCLUDES']):
|
|
|
+ self.generate_vocabulary_context(f, self.path)
|
|
|
+
|
|
|
+ def dereference(self, uri, local_file):
|
|
|
+ logger.debug("Dereferencing "+uri+" into "+local_file)
|
|
|
+ headers={"Accept":"application/rdf+xml"}
|
|
|
+ r = requests.get(uri, headers=headers)
|
|
|
+ with open(self._local_vocabulary_path+local_file, 'w') as f:
|
|
|
+ f.write(r.text)
|
|
|
+
|
|
|
+ def generate_remote_context(self):
|
|
|
+ for uri in self.settings["VOC_URIS"]:
|
|
|
+ logger.debug("Generating context for remote "+uri)
|
|
|
+ local_name = uri.split("/")[-1]+".rdf"
|
|
|
+ self.dereference(uri, local_name)
|
|
|
+ self.generate_vocabulary_context(
|
|
|
+ local_name,
|
|
|
+ self._local_vocabulary_path)
|
|
|
+
|
|
|
+ def generate_context(self):
|
|
|
+ self.generate_local_context()
|
|
|
+ self.generate_remote_context()
|
|
|
+ self._update_context(('vocabularies',))
|
|
|
+ self.save_cache()
|
|
|
+ self.readers.save_cache()
|
|
|
+
|
|
|
+ def generate_output(self, writer):
|
|
|
+ for voc in self.vocabularies:
|
|
|
+ writer.write_file(
|
|
|
+ voc.save_as, self.get_template(voc.template),
|
|
|
+ self.context, voc=voc,
|
|
|
+ relative_urls=self.settings['RELATIVE_URLS'],
|
|
|
+ override_output=hasattr(voc, 'override_save_as'))
|
|
|
+ voc_writer_finalized.send(self, writer=writer)
|
|
|
+
|
|
|
+class RdfReader(BaseReader):
|
|
|
+
|
|
|
+ file_extensions = ['rdf', 'owl']
|
|
|
+ enabled = bool(rdflib_loaded)
|
|
|
+
|
|
|
+ def __init__(self, *args, **kwargs):
|
|
|
+ super(RdfReader, self).__init__(*args, **kwargs)
|
|
|
+
|
|
|
+ def read(self, source_path):
|
|
|
+ """Parse content and metadata of an rdf file"""
|
|
|
+ logger.debug("Loading graph described in "+source_path)
|
|
|
+ graph = rdflib.Graph()
|
|
|
+ graph.load(source_path)
|
|
|
+ meta = {}
|
|
|
+ queries = [
|
|
|
+ f for f in listdir(self.settings["VOC_QUERIES_PATH"])
|
|
|
+ if (isfile(join(self.settings["VOC_QUERIES_PATH"], f))
|
|
|
+ and f.endswith(".sparql"))]
|
|
|
+ for query_path in queries:
|
|
|
+ query_file_path = self.settings["VOC_QUERIES_PATH"]+"/"+query_path
|
|
|
+ with open(query_file_path, "r") as query_file:
|
|
|
+ query = query_file.read()
|
|
|
+
|
|
|
+ # The name of the query identifies the elements in the context
|
|
|
+ query_key=query_path.split(".")[0]
|
|
|
+ result_set = graph.query(query)
|
|
|
+ # Each query result will be stored as a dictionnary in the
|
|
|
+ # vocabulary context, referenced by the query name as its key.
|
|
|
+ # Multiple results are stored in a list.
|
|
|
+ for result in result_set:
|
|
|
+ if not query_key in meta.keys():
|
|
|
+ meta[query_key]=result.asdict()
|
|
|
+ elif type(meta[query_key]) == list:
|
|
|
+ meta[query_key].append(result.asdict())
|
|
|
+ else:
|
|
|
+ meta[query_key]=[meta[query_key], result.asdict()]
|
|
|
+ meta["iri"] = meta["lov_metadata"]["iri"]
|
|
|
+ meta["description"] = meta["lov_metadata"]["description"]
|
|
|
+ meta["version"] = meta["lov_metadata"]["version"]
|
|
|
+ meta["title"] = meta["lov_metadata"]["title"]
|
|
|
+ return "", meta
|
|
|
+
|
|
|
+class Vocabulary(Page):
|
|
|
+ mandatory_properties = ('iri','description','version', 'title')
|
|
|
+ default_template = 'vocabulary'
|
|
|
+
|
|
|
+def add_reader(readers):
|
|
|
+ for ext in RdfReader.file_extensions:
|
|
|
+ readers.reader_classes[ext] = RdfReader
|
|
|
+
|
|
|
+def add_generator(pelican_object):
|
|
|
+ print("Adding the generator")
|
|
|
+ return VocabularyGenerator
|
|
|
+
|
|
|
+
|
|
|
+def register():
|
|
|
+ signals.get_generators.connect(add_generator)
|
|
|
+ signals.readers_init.connect(add_reader)
|