Package translate :: Package storage :: Module dtd
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.dtd

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  #  
  4  # Copyright 2002-2006 Zuza Software Foundation 
  5  #  
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  #  
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21   
 22  """classes that hold units of .dtd files (dtdunit) or entire files (dtdfile) 
 23  these are specific .dtd files for localisation used by mozilla""" 
 24   
 25  from translate.storage import base 
 26  from translate.misc import quote 
 27   
 28  import re 
 29  import sys 
 30  import warnings 
 31   
32 -def quotefordtd(source):
33 if '"' in source: 34 if "'" in source: 35 return "'" + source.replace("'", ''') + "'" 36 else: 37 return quote.singlequotestr(source) 38 else: 39 return quote.quotestr(source)
40
41 -def unquotefromdtd(source):
42 """unquotes a quoted dtd definition""" 43 # extract the string, get rid of quoting 44 if len(source) == 0: source = '""' 45 quotechar = source[0] 46 extracted,quotefinished = quote.extractwithoutquotes(source,quotechar,quotechar,allowreentry=False) 47 if quotechar == "'" and "'" in extracted: 48 extracted = extracted.replace("'", "'") 49 # the quote characters should be the first and last characters in the string 50 # of course there could also be quote characters within the string; not handled here 51 return extracted
52
53 -class dtdunit(base.TranslationUnit):
54 """this class represents an entity definition from a dtd file (and possibly associated comments)"""
55 - def __init__(self, source=""):
56 """construct the dtdunit, prepare it for parsing""" 57 super(dtdunit, self).__init__(source) 58 self.comments = [] 59 self.unparsedlines = [] 60 self.incomment = 0 61 self.inentity = 0 62 self.entity = "FakeEntityOnlyForInitialisationAndTesting" 63 self.source = source
64 65 # Note that source and target are equivalent for monolingual units
66 - def setsource(self, source):
67 """Sets the definition to the quoted value of source""" 68 self.definition = quotefordtd(source)
69
70 - def getsource(self):
71 """gets the unquoted source string""" 72 return unquotefromdtd(self.definition)
73 source = property(getsource, setsource) 74
75 - def settarget(self, target):
76 """Sets the definition to the quoted value of target""" 77 if target is None: 78 target = "" 79 self.definition = quotefordtd(target)
80
81 - def gettarget(self):
82 """gets the unquoted target string""" 83 return unquotefromdtd(self.definition)
84 target = property(gettarget, settarget) 85
86 - def isnull(self):
87 """returns whether this dtdunit doesn't actually have an entity definition""" 88 # for dtds, we currently return a blank string if there is no .entity (==location in other files) 89 # TODO: this needs to work better with base class expectations 90 return self.entity is None
91
92 - def parse(self, dtdsrc):
93 """read the first dtd element from the source code into this object, return linesprocessed""" 94 self.comments = [] 95 # make all the lists the same 96 self.locfilenotes = self.comments 97 self.locgroupstarts = self.comments 98 self.locgroupends = self.comments 99 self.locnotes = self.comments 100 # self.locfilenotes = [] 101 # self.locgroupstarts = [] 102 # self.locgroupends = [] 103 # self.locnotes = [] 104 # self.comments = [] 105 self.entity = None 106 self.definition = '' 107 if not dtdsrc: 108 return 0 109 lines = dtdsrc.split("\n") 110 linesprocessed = 0 111 comment = "" 112 for line in lines: 113 line += "\n" 114 linesprocessed += 1 115 # print "line(%d,%d): " % (self.incomment,self.inentity),line[:-1] 116 if not self.incomment: 117 if (line.find('<!--') != -1): 118 self.incomment = 1 119 self.continuecomment = 0 120 # now work out the type of comment, and save it (remember we're not in the comment yet) 121 (comment, dummy) = quote.extract(line,"<!--","-->",None,0) 122 if comment.find('LOCALIZATION NOTE') != -1: 123 l = quote.findend(comment,'LOCALIZATION NOTE') 124 while (comment[l] == ' '): l += 1 125 if comment.find('FILE',l) == l: 126 self.commenttype = "locfile" 127 elif comment.find('BEGIN',l) == l: 128 self.commenttype = "locgroupstart" 129 elif comment.find('END',l) == l: 130 self.commenttype = "locgroupend" 131 else: 132 self.commenttype = "locnote" 133 else: 134 # plain comment 135 self.commenttype = "comment" 136 137 if self.incomment: 138 # some kind of comment 139 (comment, self.incomment) = quote.extract(line,"<!--","-->",None,self.continuecomment) 140 # print "comment(%d,%d): " % (self.incomment,self.continuecomment),comment 141 self.continuecomment = self.incomment 142 # strip the comment out of what will be parsed 143 line = line.replace(comment, "", 1) 144 # add a end of line of this is the end of the comment 145 if not self.incomment: 146 if line.isspace(): 147 comment += line 148 line = '' 149 else: 150 comment += '\n' 151 # check if there's actually an entity definition that's commented out 152 # TODO: parse these, store as obsolete messages 153 # if comment.find('<!ENTITY') != -1: 154 # # remove the entity from the comment 155 # comment, dummy = quote.extractwithoutquotes(comment, ">", "<!ENTITY", None, 1) 156 # depending on the type of comment (worked out at the start), put it in the right place 157 # make it record the comment and type as a tuple 158 commentpair = (self.commenttype,comment) 159 if self.commenttype == "locfile": 160 self.locfilenotes.append(commentpair) 161 elif self.commenttype == "locgroupstart": 162 self.locgroupstarts.append(commentpair) 163 elif self.commenttype == "locgroupend": 164 self.locgroupends.append(commentpair) 165 elif self.commenttype == "locnote": 166 self.locnotes.append(commentpair) 167 elif self.commenttype == "comment": 168 self.comments.append(commentpair) 169 170 if not self.inentity and not self.incomment: 171 entitypos = line.find('<!ENTITY') 172 if entitypos != -1: 173 self.inentity = 1 174 beforeentity = line[:entitypos].strip() 175 if beforeentity.startswith("#"): 176 self.hashprefix = beforeentity 177 self.entitypart = "start" 178 else: 179 self.unparsedlines.append(line) 180 181 if self.inentity: 182 if self.entitypart == "start": 183 # the entity definition 184 e = quote.findend(line,'<!ENTITY') 185 line = line[e:] 186 self.entitypart = "name" 187 self.entitytype = "internal" 188 if self.entitypart == "name": 189 e = 0 190 while (e < len(line) and line[e].isspace()): e += 1 191 self.entity = '' 192 if (e < len(line) and line[e] == '%'): 193 self.entitytype = "external" 194 self.entityparameter = "" 195 e += 1 196 while (e < len(line) and line[e].isspace()): e += 1 197 while (e < len(line) and not line[e].isspace()): 198 self.entity += line[e] 199 e += 1 200 while (e < len(line) and line[e].isspace()): e += 1 201 if self.entity: 202 if self.entitytype == "external": 203 self.entitypart = "parameter" 204 else: 205 self.entitypart = "definition" 206 # remember the start position and the quote character 207 if e == len(line): 208 self.entityhelp = None 209 continue 210 elif self.entitypart == "definition": 211 self.entityhelp = (e,line[e]) 212 self.instring = 0 213 if self.entitypart == "parameter": 214 paramstart = e 215 while (e < len(line) and line[e].isalnum()): e += 1 216 self.entityparameter += line[paramstart:e] 217 while (e < len(line) and line[e].isspace()): e += 1 218 line = line[e:] 219 e = 0 220 if not line: 221 continue 222 if line[0] in ('"', "'"): 223 self.entitypart = "definition" 224 self.entityhelp = (e,line[e]) 225 self.instring = 0 226 if self.entitypart == "definition": 227 if self.entityhelp is None: 228 e = 0 229 while (e < len(line) and line[e].isspace()): e += 1 230 if e == len(line): 231 continue 232 self.entityhelp = (e,line[e]) 233 self.instring = 0 234 # actually the lines below should remember instring, rather than using it as dummy 235 e = self.entityhelp[0] 236 if (self.entityhelp[1] == "'"): 237 (defpart,self.instring) = quote.extract(line[e:],"'","'",startinstring=self.instring,allowreentry=False) 238 elif (self.entityhelp[1] == '"'): 239 (defpart,self.instring) = quote.extract(line[e:],'"','"',startinstring=self.instring,allowreentry=False) 240 else: 241 raise ValueError("Unexpected quote character... %r" % (self.entityhelp[1])) 242 # for any following lines, start at the beginning of the line. remember the quote character 243 self.entityhelp = (0,self.entityhelp[1]) 244 self.definition += defpart 245 if not self.instring: 246 self.inentity = 0 247 break 248 249 # uncomment this line to debug processing 250 if 0: 251 for attr in dir(self): 252 r = repr(getattr(self,attr)) 253 if len(r) > 60: r = r[:57]+"..." 254 self.comments.append(("comment","self.%s = %s" % (attr,r) )) 255 return linesprocessed
256
257 - def __str__(self):
258 """convert to a string. double check that unicode is handled somehow here""" 259 source = self.getoutput() 260 if isinstance(source, unicode): 261 return source.encode(getattr(self, "encoding", "UTF-8")) 262 return source
263
264 - def getoutput(self):
265 """convert the dtd entity back to string form""" 266 lines = [] 267 lines.extend([comment for commenttype,comment in self.comments]) 268 lines.extend(self.unparsedlines) 269 if self.isnull(): 270 result = "".join(lines) 271 return result.rstrip() + "\n" 272 # for f in self.locfilenotes: yield f 273 # for ge in self.locgroupends: yield ge 274 # for gs in self.locgroupstarts: yield gs 275 # for n in self.locnotes: yield n 276 if len(self.entity) > 0: 277 if getattr(self, 'entitytype', None) == 'external': 278 entityline = '<!ENTITY % '+self.entity+' '+self.entityparameter+' '+self.definition+'>' 279 else: 280 entityline = '<!ENTITY '+self.entity+' '+self.definition+'>' 281 if getattr(self, 'hashprefix', None): 282 entityline = self.hashprefix + " " + entityline 283 if isinstance(entityline, unicode): 284 entityline = entityline.encode('UTF-8') 285 lines.append(entityline+'\n') 286 return "".join(lines)
287
288 -class dtdfile(base.TranslationStore):
289 """this class represents a .dtd file, made up of dtdunits""" 290 UnitClass = dtdunit
291 - def __init__(self, inputfile=None):
292 """construct a dtdfile, optionally reading in from inputfile""" 293 base.TranslationStore.__init__(self, unitclass = self.UnitClass) 294 self.units = [] 295 self.filename = getattr(inputfile, 'name', '') 296 if inputfile is not None: 297 dtdsrc = inputfile.read() 298 self.parse(dtdsrc) 299 self.makeindex()
300
301 - def parse(self, dtdsrc):
302 """read the source code of a dtd file in and include them as dtdunits in self.units (any existing units are lost)""" 303 self.units = [] 304 start = 0 305 end = 0 306 lines = dtdsrc.split("\n") 307 while end < len(lines): 308 if (start == end): end += 1 309 foundentity = 0 310 while end < len(lines): 311 if end >= len(lines): 312 break 313 if lines[end].find('<!ENTITY') > -1: 314 foundentity = 1 315 if foundentity and re.match("[\"']\s*>", lines[end]): 316 end += 1 317 break 318 end += 1 319 # print "processing from %d to %d" % (start,end) 320 321 linesprocessed = 1 # to initialise loop 322 while linesprocessed >= 1: 323 newdtd = dtdunit() 324 try: 325 linesprocessed = newdtd.parse("\n".join(lines[start:end])) 326 if linesprocessed >= 1 and (not newdtd.isnull() or newdtd.unparsedlines): 327 self.units.append(newdtd) 328 except Exception, e: 329 warnings.warn("%s\nError occured between lines %d and %d:\n%s" % (e, start+1, end, "\n".join(lines[start:end]))) 330 start += linesprocessed
331
332 - def __str__(self):
333 """convert to a string. double check that unicode is handled somehow here""" 334 source = self.getoutput() 335 if isinstance(source, unicode): 336 return source.encode(getattr(self, "encoding", "UTF-8")) 337 return source
338
339 - def getoutput(self):
340 """convert the units back to source""" 341 sources = [str(dtd) for dtd in self.units] 342 return "".join(sources)
343
344 - def makeindex(self):
345 """makes self.index dictionary keyed on entities""" 346 self.index = {} 347 for dtd in self.units: 348 if not dtd.isnull(): 349 self.index[dtd.entity] = dtd
350
351 - def rewrap(self):
352 for dtd in self.units: 353 lines = dtd.definition.split("\n") 354 if len(lines) > 1: 355 definition = lines[0] 356 for line in lines[1:]: 357 if definition[-1:].isspace() or line[:1].isspace(): 358 definition += line 359 else: 360 definition += " " + line 361 dtd.definition = definition
362 363 if __name__ == "__main__": 364 import sys 365 d = dtdfile(sys.stdin) 366 d.rewrap() 367 sys.stdout.write(str(d)) 368