pelican_rdf.py 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176
  1. from pelican.readers import BaseReader
  2. from pelican.generators import CachingGenerator
  3. from pelican.contents import Page, is_valid_content
  4. from pelican import signals
  5. import logging
  6. from blinker import signal
  7. import requests
  8. from os import listdir
  9. from os.path import isfile, join
  10. """
  11. pelican-rdf
  12. ===============
  13. This plugin integrates to pelican a new type of media, the vocabulary.
  14. Vocabularies are .rdf or .owl files, and metadata about them is collected
  15. through sparql queries.
  16. """
  17. try:
  18. import rdflib
  19. from rdflib.query import Processor
  20. rdflib_loaded=True
  21. except ImportError:
  22. rdflib_loaded=False
  23. logger = logging.getLogger(__name__)
  24. voc_generator_init = signal('voc_generator_init')
  25. voc_generator_finalized = signal('voc_generator_finalized')
  26. voc_writer_finalized = signal('voc_writer_finalized')
  27. voc_generator_preread = signal('voc_generator_preread')
  28. voc_generator_context = signal('voc_generator_context')
  29. class VocabularyGenerator(CachingGenerator):
  30. """Generate vocabulary descriptions"""
  31. # temporary file where the vocabulary is dereferenced to
  32. # when collected online
  33. _local_vocabulary_path = "/tmp/"
  34. def __init__(self, *args, **kwargs):
  35. logger.debug("Vocabulary generator called")
  36. self.vocabularies =[]
  37. super(VocabularyGenerator, self).__init__(*args, **kwargs)
  38. # Called both for local and remote vocabulary context creation.
  39. # Performs the actual Vocabulary generation.
  40. def generate_vocabulary_context(
  41. self, vocabulary_file_name, path_to_vocabulary):
  42. logger.debug("Generating__ vocabulary context for "+
  43. path_to_vocabulary+"/"+vocabulary_file_name)
  44. voc = self.get_cached_data(vocabulary_file_name, None)
  45. if voc is None:
  46. try:
  47. voc = self.readers.read_file(
  48. base_path=path_to_vocabulary,
  49. path=vocabulary_file_name,
  50. content_class=Vocabulary,
  51. context=self.context,
  52. preread_signal=voc_generator_preread,
  53. preread_sender=self,
  54. context_signal=voc_generator_context,
  55. context_sender=self)
  56. except Exception as e:
  57. logger.error(
  58. 'Could not process %s\n%s', vocabulary_file_name, e,
  59. exc_info=self.settings.get('DEBUG', False))
  60. self._add_failed_source_path(vocabulary_file_name)
  61. if not is_valid_content(voc, vocabulary_file_name):
  62. self._add_failed_source_path(vocabulary_file_name)
  63. self.cache_data(vocabulary_file_name, voc)
  64. self.vocabularies.append(voc)
  65. self.add_source_path(voc)
  66. def generate_local_context(self):
  67. for f in self.get_files(
  68. self.settings['VOC_PATHS'],
  69. exclude=self.settings['VOC_EXCLUDES']):
  70. self.generate_vocabulary_context(f, self.path)
  71. def dereference(self, uri, local_file):
  72. logger.debug("Dereferencing "+uri+" into "+local_file)
  73. headers={"Accept":"application/rdf+xml"}
  74. r = requests.get(uri, headers=headers)
  75. with open(self._local_vocabulary_path+local_file, 'w') as f:
  76. f.write(r.text)
  77. def generate_remote_context(self):
  78. for uri in self.settings["VOC_URIS"]:
  79. logger.debug("Generating context for remote "+uri)
  80. local_name = uri.split("/")[-1]+".rdf"
  81. self.dereference(uri, local_name)
  82. self.generate_vocabulary_context(
  83. local_name,
  84. self._local_vocabulary_path)
  85. def generate_context(self):
  86. self.generate_local_context()
  87. self.generate_remote_context()
  88. self._update_context(('vocabularies',))
  89. self.save_cache()
  90. self.readers.save_cache()
  91. def generate_output(self, writer):
  92. for voc in self.vocabularies:
  93. writer.write_file(
  94. voc.save_as, self.get_template(voc.template),
  95. self.context, voc=voc,
  96. relative_urls=self.settings['RELATIVE_URLS'],
  97. override_output=hasattr(voc, 'override_save_as'))
  98. voc_writer_finalized.send(self, writer=writer)
  99. class RdfReader(BaseReader):
  100. file_extensions = ['rdf', 'owl']
  101. enabled = bool(rdflib_loaded)
  102. def __init__(self, *args, **kwargs):
  103. super(RdfReader, self).__init__(*args, **kwargs)
  104. def read(self, source_path):
  105. """Parse content and metadata of an rdf file"""
  106. logger.debug("Loading graph described in "+source_path)
  107. graph = rdflib.Graph()
  108. graph.load(source_path)
  109. meta = {}
  110. queries = [
  111. f for f in listdir(self.settings["VOC_QUERIES_PATH"])
  112. if (isfile(join(self.settings["VOC_QUERIES_PATH"], f))
  113. and f.endswith(".sparql"))]
  114. for query_path in queries:
  115. query_file_path = self.settings["VOC_QUERIES_PATH"]+"/"+query_path
  116. with open(query_file_path, "r") as query_file:
  117. query = query_file.read()
  118. # The name of the query identifies the elements in the context
  119. query_key=query_path.split(".")[0]
  120. result_set = graph.query(query)
  121. # Each query result will be stored as a dictionnary in the
  122. # vocabulary context, referenced by the query name as its key.
  123. # Multiple results are stored in a list.
  124. for result in result_set:
  125. if not query_key in meta.keys():
  126. meta[query_key]=result.asdict()
  127. elif type(meta[query_key]) == list:
  128. meta[query_key].append(result.asdict())
  129. else:
  130. meta[query_key]=[meta[query_key], result.asdict()]
  131. meta["iri"] = meta["lov_metadata"]["iri"]
  132. meta["description"] = meta["lov_metadata"]["description"]
  133. meta["version"] = meta["lov_metadata"]["version"]
  134. meta["title"] = meta["lov_metadata"]["title"]
  135. return "", meta
  136. class Vocabulary(Page):
  137. mandatory_properties = ('iri','description','version', 'title')
  138. default_template = 'vocabulary'
  139. def add_reader(readers):
  140. for ext in RdfReader.file_extensions:
  141. readers.reader_classes[ext] = RdfReader
  142. def add_generator(pelican_object):
  143. print("Adding the generator")
  144. return VocabularyGenerator
  145. def register():
  146. signals.get_generators.connect(add_generator)
  147. signals.readers_init.connect(add_reader)