Source code for pycopia.XML.DTD

# vim:ts=4:sw=4:softtabstop=0:smarttab
#    This library is free software; you can redistribute it and/or
#    modify it under the terms of the GNU Lesser General Public
#    License as published by the Free Software Foundation; either
#    version 2.1 of the License, or (at your option) any later version.
#    This library is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    Lesser General Public License for more details.

Deals with DTD files. Primarily, provides the DTD parser and Python

import sys, os

from pycopia import sourcegen
from pycopia.textutils import identifier, keyword_identifier

import pycopia.XML.POM
from pycopia.XML.POM import (ContentModel, ElementNode, Notation, ValidationError,
from pycopia.XML.POMparse import (XMLAttribute, ANY, PCDATA, EMPTY)

### DTD compiler components ###

[docs]def get_dtd_compiler(fo, mixinmodule=None, doctype=None): import xml if hasattr(xml, "use_pyxml"): # per Gentoo bug #367729 xml.use_pyxml() from xml.parsers.xmlproc.dtdparser import DTDParser generator = sourcegen.get_sourcefile(fo) dh = DTDConsumerForSourceGeneration(generator, mixinmodule, doctype) parser = DTDParser() parser.set_dtd_consumer(dh) return parser
[docs]def get_identifier(uname): return identifier(normalize_unicode(uname))
[docs]class AttributeMap(dict): def __repr__(self): s = ["{"] for t in self.items(): s.append("%r: %s, " % t) s.append("}") return "\n ".join(s) # this DTD parser consumer generates the Python source code from the DTD.
[docs]class DTDConsumerForSourceGeneration(object): def __init__(self, generator, mixins=None, doctype=None): self.generator = generator self._code_index = 0 self.elements = {} self.parameter_entities = {} self.general_entities = {} self._forwardattributes = {} self._allattributes = {} self.mixins = mixins # should be a module object self.doctype = doctype
[docs] def dtd_start(self): print "Starting to parse DTD...", self.generator.add_comment("This file generated by a program. do not edit.") self.generator.add_import(pycopia.XML.POM) if self.mixins: self.generator.add_import(self.mixins) self.generator.add_blank() self._code_index = self.generator.get_current_index()
[docs] def dtd_end(self): print "done parsing. Writing file." gen = self.generator for name, value in self._allattributes.items(): gen.add_code("%s = %r" % (name, value), index=2) gen.add_instance("GENERAL_ENTITIES", self.general_entities) gen.add_comment("Cache for dynamic classes for this dtd.") gen.add_instance("_CLASSCACHE", {}) gen.write()
[docs] def new_element_type(self, elem_name, elem_cont): "Receives the declaration of an element type." try: element = self.elements[elem_name] except KeyError: self.make_new_element(elem_name, elem_cont)
[docs] def make_new_element(self, elem_name, contentmodel): parents = [ElementNode] if self.mixins: mixinname = "%sMixin" % ( get_identifier(elem_name) ) if hasattr(self.mixins, mixinname): parents.insert(0, getattr(self.mixins, mixinname)) # class name is capitalized to avoid clashes with Python key words. ch = self.generator.add_class(get_identifier(elem_name), tuple(parents)) ch.add_attribute("_name", elem_name) ch.add_attribute("CONTENTMODEL", _ContentModelGenerator(contentmodel)) self.elements[elem_name] = ch # Add any previously seen attributes try: fwdattribs, fwdkwattribs = self._forwardattributes[elem_name] except KeyError: pass else: ch.add_attribute("ATTRIBUTES", fwdattribs) ch.add_attribute("KWATTRIBUTES", fwdkwattribs) del self._forwardattributes[elem_name] # identify the root element with a generic name (_Root). if self.doctype and elem_name.lower() == self.generator.add_code("\n_Root = %s\n" % (get_identifier(elem_name),))
[docs] def new_attribute(self, elem, a_name, a_type, a_decl, a_def): "Receives the declaration of a new attribute." attr = XMLAttribute(a_name, a_type, a_decl, a_def) ident = attr.get_identifier() self._allattributes[ident] = attr try: element = self.elements[elem] except KeyError: # Got a forward attribute definition (defined before element) try: fwdattribs, fwdkwattribs = self._forwardattributes[elem] except KeyError: fwdattribs = AttributeMap() fwdkwattribs = AttributeMap() self._forwardattributes[elem] = (fwdattribs, fwdkwattribs) fwdattribs[a_name] = ident keywordname = keyword_identifier(normalize_unicode(a_name)) fwdkwattribs[keywordname] = ident else: self._add_element_attlist(element, attr, ident)
def _add_element_attlist(self, element, xmlattribute, ident): try: attrmap = element.get_attribute("ATTRIBUTES") kwattrmap = element.get_attribute("KWATTRIBUTES") except KeyError: element.add_attribute("ATTRIBUTES", AttributeMap()) element.add_attribute("KWATTRIBUTES", AttributeMap()) attrmap = element.get_attribute("ATTRIBUTES") kwattrmap = element.get_attribute("KWATTRIBUTES") attrmap[] = ident keywordname = keyword_identifier(normalize_unicode( kwattrmap[keywordname] = ident
[docs] def handle_comment(self, contents): "Receives the contents of a comment." self.generator.add_comment(contents)
[docs] def new_parameter_entity(self,name,val): "Receives internal parameter entity declarations." # these are handled internally by the DTD parser. but.. save it anyway. self.parameter_entities[name] = val
[docs] def new_external_pe(self, name, pubid, sysid): "Receives external parameter entity declarations." # these are handled internally by the DTD parser.
[docs] def new_general_entity(self, name, val): "Receives internal general entity declarations." self.general_entities[normalize_unicode(name)] = val
[docs] def new_external_entity(self, ent_name, pub_id, sys_id, ndata): """Receives external general entity declarations. 'ndata' is the empty string if the entity is parsed.""" # XXX do we need to handle this? print "XXX external entity:" print ent_name, pub_id, sys_id, ndata
[docs] def new_notation(self,name, pubid, sysid): "Receives notation declarations." n = Notation(name, pubid, sysid) self.generator.add_instance(get_identifier(name), n)
[docs] def handle_pi(self, target, data): "Receives the target and data of processing instructions." # XXX do we need to handle this? print "XXX unhandled PI:", print "target=%r; data=%r" % (target, data)
class _ContentModelGenerator(object): """_ContentModelGenerator(rawmodel) The DTD parser generated and final content model are so different that a different content model generator is used for this object. """ def __init__(self, rawmodel=None): tm_type = type(rawmodel) if tm_type is str: if rawmodel == "EMPTY": self.model = EMPTY elif rawmodel == "#PCDATA": self.model = PCDATA elif rawmodel == "ANY": self.model = ANY else: raise ValidationError, "ContentModelGenerator: unknown special type" elif tm_type is tuple: self.model = (ANY,) # rawmodel # XXX elif tm_type is type(None): self.model = None else: raise RuntimeError, "Unknown content model type: %r" % (rawmodel,) def __repr__(self): return "%s.%s(%r)" % (ContentModel.__module__, ContentModel.__name__, self.model)