Source code for pycopia.XML.DTD

#!/usr/bin/python
# vim:ts=4:sw=4:softtabstop=0:smarttab
#
#    This library is free software; you can redistribute it and/or
#    modify it under the terms of the GNU Lesser General Public
#    License as published by the Free Software Foundation; either
#    version 2.1 of the License, or (at your option) any later version.
#
#    This library is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
#    Lesser General Public License for more details.

"""
Deals with DTD files. Primarily, provides the DTD parser and Python
"compiler".
"""

import sys, os

from pycopia import sourcegen
from pycopia.textutils import identifier, keyword_identifier

import pycopia.XML.POM
from pycopia.XML.POM import (ContentModel, ElementNode, Notation, ValidationError,
                             normalize_unicode)
from pycopia.XML.POMparse import (XMLAttribute, ANY, PCDATA, EMPTY)

### DTD compiler components ###

[docs]def get_dtd_compiler(fo, mixinmodule=None, doctype=None): import xml if hasattr(xml, "use_pyxml"): # per Gentoo bug #367729 xml.use_pyxml() from xml.parsers.xmlproc.dtdparser import DTDParser generator = sourcegen.get_sourcefile(fo) dh = DTDConsumerForSourceGeneration(generator, mixinmodule, doctype) parser = DTDParser() parser.set_dtd_consumer(dh) return parser
[docs]def get_identifier(uname): return identifier(normalize_unicode(uname))
[docs]class AttributeMap(dict): def __repr__(self): s = ["{"] for t in self.items(): s.append("%r: %s, " % t) s.append("}") return "\n ".join(s) # this DTD parser consumer generates the Python source code from the DTD.
[docs]class DTDConsumerForSourceGeneration(object): def __init__(self, generator, mixins=None, doctype=None): self.generator = generator self._code_index = 0 self.elements = {} self.parameter_entities = {} self.general_entities = {} self._forwardattributes = {} self._allattributes = {} self.mixins = mixins # should be a module object self.doctype = doctype
[docs] def dtd_start(self): print "Starting to parse DTD...", self.generator.add_comment("This file generated by a program. do not edit.") self.generator.add_import(pycopia.XML.POM) if self.mixins: self.generator.add_import(self.mixins) self.generator.add_blank() self._code_index = self.generator.get_current_index()
[docs] def dtd_end(self): print "done parsing. Writing file." gen = self.generator for name, value in self._allattributes.items(): gen.add_code("%s = %r" % (name, value), index=2) gen.add_instance("GENERAL_ENTITIES", self.general_entities) gen.add_comment("Cache for dynamic classes for this dtd.") gen.add_instance("_CLASSCACHE", {}) gen.write()
[docs] def new_element_type(self, elem_name, elem_cont): "Receives the declaration of an element type." try: element = self.elements[elem_name] except KeyError: self.make_new_element(elem_name, elem_cont)
[docs] def make_new_element(self, elem_name, contentmodel): parents = [ElementNode] if self.mixins: mixinname = "%sMixin" % ( get_identifier(elem_name) ) if hasattr(self.mixins, mixinname): parents.insert(0, getattr(self.mixins, mixinname)) # class name is capitalized to avoid clashes with Python key words. ch = self.generator.add_class(get_identifier(elem_name), tuple(parents)) ch.add_attribute("_name", elem_name) ch.add_attribute("CONTENTMODEL", _ContentModelGenerator(contentmodel)) self.elements[elem_name] = ch # Add any previously seen attributes try: fwdattribs, fwdkwattribs = self._forwardattributes[elem_name] except KeyError: pass else: ch.add_attribute("ATTRIBUTES", fwdattribs) ch.add_attribute("KWATTRIBUTES", fwdkwattribs) del self._forwardattributes[elem_name] # identify the root element with a generic name (_Root). if self.doctype and elem_name.lower() == self.doctype.name.lower(): self.generator.add_code("\n_Root = %s\n" % (get_identifier(elem_name),))
[docs] def new_attribute(self, elem, a_name, a_type, a_decl, a_def): "Receives the declaration of a new attribute." attr = XMLAttribute(a_name, a_type, a_decl, a_def) ident = attr.get_identifier() self._allattributes[ident] = attr try: element = self.elements[elem] except KeyError: # Got a forward attribute definition (defined before element) try: fwdattribs, fwdkwattribs = self._forwardattributes[elem] except KeyError: fwdattribs = AttributeMap() fwdkwattribs = AttributeMap() self._forwardattributes[elem] = (fwdattribs, fwdkwattribs) fwdattribs[a_name] = ident keywordname = keyword_identifier(normalize_unicode(a_name)) fwdkwattribs[keywordname] = ident else: self._add_element_attlist(element, attr, ident)
def _add_element_attlist(self, element, xmlattribute, ident): try: attrmap = element.get_attribute("ATTRIBUTES") kwattrmap = element.get_attribute("KWATTRIBUTES") except KeyError: element.add_attribute("ATTRIBUTES", AttributeMap()) element.add_attribute("KWATTRIBUTES", AttributeMap()) attrmap = element.get_attribute("ATTRIBUTES") kwattrmap = element.get_attribute("KWATTRIBUTES") attrmap[xmlattribute.name] = ident keywordname = keyword_identifier(normalize_unicode(xmlattribute.name)) kwattrmap[keywordname] = ident
[docs] def handle_comment(self, contents): "Receives the contents of a comment." self.generator.add_comment(contents)
[docs] def new_parameter_entity(self,name,val): "Receives internal parameter entity declarations." # these are handled internally by the DTD parser. but.. save it anyway. self.parameter_entities[name] = val
[docs] def new_external_pe(self, name, pubid, sysid): "Receives external parameter entity declarations." # these are handled internally by the DTD parser.
[docs] def new_general_entity(self, name, val): "Receives internal general entity declarations." self.general_entities[normalize_unicode(name)] = val
[docs] def new_external_entity(self, ent_name, pub_id, sys_id, ndata): """Receives external general entity declarations. 'ndata' is the empty string if the entity is parsed.""" # XXX do we need to handle this? print "XXX external entity:" print ent_name, pub_id, sys_id, ndata
[docs] def new_notation(self,name, pubid, sysid): "Receives notation declarations." n = Notation(name, pubid, sysid) self.generator.add_instance(get_identifier(name), n)
[docs] def handle_pi(self, target, data): "Receives the target and data of processing instructions." # XXX do we need to handle this? print "XXX unhandled PI:", print "target=%r; data=%r" % (target, data)
class _ContentModelGenerator(object): """_ContentModelGenerator(rawmodel) The DTD parser generated and final content model are so different that a different content model generator is used for this object. """ def __init__(self, rawmodel=None): tm_type = type(rawmodel) if tm_type is str: if rawmodel == "EMPTY": self.model = EMPTY elif rawmodel == "#PCDATA": self.model = PCDATA elif rawmodel == "ANY": self.model = ANY else: raise ValidationError, "ContentModelGenerator: unknown special type" elif tm_type is tuple: self.model = (ANY,) # rawmodel # XXX elif tm_type is type(None): self.model = None else: raise RuntimeError, "Unknown content model type: %r" % (rawmodel,) def __repr__(self): return "%s.%s(%r)" % (ContentModel.__module__, ContentModel.__name__, self.model)