Package translate :: Package storage :: Module xliff
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.xliff

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2005-2007 Zuza Software Foundation 
  5  #  
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  #  
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21  # 
 22   
 23  """Module for handling XLIFF files for translation. 
 24   
 25  The official recommendation is to use the extention .xlf for XLIFF files. 
 26  """ 
 27   
 28  from translate.storage import base 
 29  from translate.storage import lisa 
 30  from lxml import etree 
 31   
 32  # TODO: handle translation types 
 33   
34 -class xliffunit(lisa.LISAunit):
35 """A single term in the xliff file.""" 36 37 rootNode = "trans-unit" 38 languageNode = "source" 39 textNode = "" 40 namespace = 'urn:oasis:names:tc:xliff:document:1.1' 41 42 #TODO: id and all the trans-unit level stuff 43
44 - def createlanguageNode(self, lang, text, purpose):
45 """Returns an xml Element setup with given parameters.""" 46 47 #TODO: for now we do source, but we have to test if it is target, perhaps 48 # with parameter. Alternatively, we can use lang, if supplied, since an xliff 49 #file has to conform to the bilingual nature promised by the header. 50 assert purpose 51 langset = etree.Element(self.namespaced(purpose)) 52 #TODO: check language 53 # lisa.setXMLlang(langset, lang) 54 55 # self.createPHnodes(langset, text) 56 langset.text = text 57 return langset
58
59 - def getlanguageNodes(self):
60 """We override this to get source and target nodes.""" 61 sources = self.xmlelement.findall('.//%s' % self.namespaced(self.languageNode)) 62 targets = self.xmlelement.findall('.//%s' % self.namespaced('target')) 63 sourcesl = len(sources) 64 targetsl = len(targets) 65 nodes = [] 66 for pair in zip(sources, targets): 67 nodes.extend(list(pair)) 68 if sourcesl > targetsl: 69 nodes.extend(sources[- (sourcesl - targetsl):]) 70 return nodes
71
72 - def addalttrans(self, txt, origin=None, lang=None):
73 """Adds an alt-trans tag and alt-trans components to the unit. 74 75 @type txt: String 76 @param txt: Alternative translation of the source text. 77 """ 78 79 #TODO: support adding a source tag ad match quality attribute. At 80 # the source tag is needed to inject fuzzy matches from a TM. 81 if isinstance(txt, str): 82 txt = txt.decode("utf-8") 83 alttrans = etree.SubElement(self.xmlelement, self.namespaced("alt-trans")) 84 alttarget = etree.SubElement(alttrans, self.namespaced("target")) 85 alttarget.text = txt 86 if origin: 87 alttrans.set("origin", origin) 88 if lang: 89 lisa.setXMLlang(alttrans, lang)
90
91 - def getalttrans(self, origin=None):
92 """Returns <alt-trans> for the given origin as a list of units. No 93 origin means all alternatives.""" 94 translist = [] 95 for node in self.xmlelement.findall(".//%s" % self.namespaced("alt-trans")): 96 if self.correctorigin(node, origin): 97 # We build some mini units that keep the xmlelement. This 98 # makes it easier to delete it if it is passed back to us. 99 newunit = base.TranslationUnit(self.source) 100 101 # the source tag is optional 102 sourcenode = node.find(".//%s" % self.namespaced("source")) 103 if not sourcenode is None: 104 newunit.source = lisa.getText(sourcenode) 105 106 # must have one or more targets 107 targetnode = node.find(".//%s" % self.namespaced("target")) 108 newunit.target = lisa.getText(targetnode) 109 #TODO: support multiple targets better 110 #TODO: support notes in alt-trans 111 newunit.xmlelement = node 112 113 translist.append(newunit) 114 return translist
115
116 - def delalttrans(self, alternative):
117 """Removes the supplied alternative from the list of alt-trans tags""" 118 self.xmlelement.remove(alternative.xmlelement)
119
120 - def addnote(self, text, origin=None):
121 """Add a note specifically in a "note" tag""" 122 if isinstance(text, str): 123 text = text.decode("utf-8") 124 note = etree.SubElement(self.xmlelement, self.namespaced("note")) 125 note.text = text.strip() 126 if origin: 127 note.set("from", origin)
128
129 - def getnotelist(self, origin=None):
130 """Private method that returns the text from notes matching 'origin' or all notes.""" 131 notenodes = self.xmlelement.findall(".//%s" % self.namespaced("note")) 132 # TODO: consider using xpath to construct initial_list directly 133 # or to simply get the correct text from the outset (just remember to 134 # check for duplication. 135 initial_list = [lisa.getText(note) for note in notenodes if self.correctorigin(note, origin)] 136 137 # Remove duplicate entries from list: 138 dictset = {} 139 notelist = [dictset.setdefault(note, note) for note in initial_list if note not in dictset] 140 141 return notelist
142
143 - def getnotes(self, origin=None):
144 return '\n'.join(self.getnotelist(origin=origin))
145
146 - def removenotes(self):
147 """Remove all the translator notes.""" 148 notes = self.xmlelement.findall(".//%s" % self.namespaced("note")) 149 for note in notes: 150 if self.correctorigin(note, origin="translator"): 151 self.xmlelement.remove(note)
152
153 - def adderror(self, errorname, errortext):
154 """Adds an error message to this unit.""" 155 #TODO: consider factoring out: some duplication between XLIFF and TMX 156 text = errorname + ': ' + errortext 157 self.addnote(text, origin="pofilter")
158
159 - def geterrors(self):
160 """Get all error messages.""" 161 #TODO: consider factoring out: some duplication between XLIFF and TMX 162 notelist = self.getnotelist(origin="pofilter") 163 errordict = {} 164 for note in notelist: 165 errorname, errortext = note.split(': ') 166 errordict[errorname] = errortext 167 return errordict
168
169 - def isapproved(self):
170 """States whether this unit is approved.""" 171 return self.xmlelement.get("approved") == "yes"
172
173 - def markapproved(self, value=True):
174 """Mark this unit as approved.""" 175 if value: 176 self.xmlelement.set("approved", "yes") 177 else: 178 self.xmlelement.set("approved", "no")
179
180 - def isreview(self):
181 """States whether this unit needs to be reviewed""" 182 targetnode = self.getlanguageNode(lang=None, index=1) 183 return not targetnode is None and \ 184 "needs-review" in targetnode.get("state", "")
185
186 - def markreviewneeded(self, needsreview=True, explanation=None):
187 """Marks the unit to indicate whether it needs review. Adds an optional explanation as a note.""" 188 targetnode = self.getlanguageNode(lang=None, index=1) 189 if not targetnode is None: 190 if needsreview: 191 targetnode.set("state", "needs-review-translation") 192 if explanation: 193 self.addnote(explanation, origin="translator") 194 else: 195 del targetnode.attrib["state"]
196
197 - def isfuzzy(self):
198 # targetnode = self.getlanguageNode(lang=None, index=1) 199 # return not targetnode is None and \ 200 # (targetnode.get("state-qualifier") == "fuzzy-match" or \ 201 # targetnode.get("state") == "needs-review-translation") 202 return not self.isapproved()
203
204 - def markfuzzy(self, value=True):
205 if value: 206 self.markapproved(False) 207 else: 208 self.markapproved(True) 209 targetnode = self.getlanguageNode(lang=None, index=1) 210 if not targetnode is None: 211 if value: 212 targetnode.set("state", "needs-review-translation") 213 else: 214 for attribute in ["state", "state-qualifier"]: 215 if attribute in targetnode.attrib: 216 del targetnode.attrib[attribute]
217
218 - def settarget(self, text, lang='xx', append=False):
219 """Sets the target string to the given value.""" 220 super(xliffunit, self).settarget(text, lang, append) 221 if text: 222 self.marktranslated()
223 224 # This code is commented while this will almost always return false. 225 # This way pocount, etc. works well. 226 # def istranslated(self): 227 # targetnode = self.getlanguageNode(lang=None, index=1) 228 # return not targetnode is None and \ 229 # (targetnode.get("state") == "translated") 230
231 - def istranslatable(self):
232 value = self.xmlelement.get("translate") 233 if value and value.lower() == 'no': 234 return False 235 return True
236
237 - def marktranslated(self):
238 targetnode = self.getlanguageNode(lang=None, index=1) 239 if not targetnode: 240 return 241 if self.isfuzzy() and "state-qualifier" in targetnode.attrib: 242 #TODO: consider 243 del targetnode.attrib["state-qualifier"] 244 targetnode.set("state", "translated")
245
246 - def setid(self, id):
247 self.xmlelement.set("id", id)
248
249 - def getid(self):
250 return self.xmlelement.get("id") or ""
251
252 - def addlocation(self, location):
253 self.setid(location)
254
255 - def getlocations(self):
256 return [self.getid()]
257
258 - def createcontextgroup(self, name, contexts=None, purpose=None):
259 """Add the context group to the trans-unit with contexts a list with 260 (type, text) tuples describing each context.""" 261 assert contexts 262 group = etree.SubElement(self.xmlelement, self.namespaced("context-group")) 263 group.set("name", name) 264 if purpose: 265 group.set("purpose", purpose) 266 for type, text in contexts: 267 if isinstance(text, str): 268 text = text.decode("utf-8") 269 context = etree.SubElement(group, self.namespaced("context")) 270 context.text = text 271 context.set("context-type", type)
272
273 - def getcontextgroups(self, name):
274 """Returns the contexts in the context groups with the specified name""" 275 groups = [] 276 grouptags = self.xmlelement.findall(".//%s" % self.namespaced("context-group")) 277 for group in grouptags: 278 if group.get("name") == name: 279 contexts = group.findall(".//%s" % self.namespaced("context")) 280 pairs = [] 281 for context in contexts: 282 pairs.append((context.get("context-type"), lisa.getText(context))) 283 groups.append(pairs) #not extend 284 return groups
285
286 - def getrestype(self):
287 """returns the restype attribute in the trans-unit tag""" 288 return self.xmlelement.get("restype")
289
290 - def merge(self, otherunit, overwrite=False, comments=True):
291 #TODO: consider other attributes like "approved" 292 super(xliffunit, self).merge(otherunit, overwrite, comments) 293 if self.target: 294 self.marktranslated() 295 if otherunit.isfuzzy(): 296 self.markfuzzy()
297
298 - def correctorigin(self, node, origin):
299 """Check against node tag's origin (e.g note or alt-trans)""" 300 if origin == None: 301 return True 302 elif origin in node.get("from", ""): 303 return True 304 elif origin in node.get("origin", ""): 305 return True 306 else: 307 return False
308
309 -class xlifffile(lisa.LISAfile):
310 """Class representing a XLIFF file store.""" 311 UnitClass = xliffunit 312 rootNode = "xliff" 313 bodyNode = "body" 314 XMLskeleton = '''<?xml version="1.0" ?> 315 <xliff version='1.1' xmlns='urn:oasis:names:tc:xliff:document:1.1'> 316 <file original='NoName' source-language='en' datatype='plaintext'> 317 <body> 318 </body> 319 </file> 320 </xliff>''' 321 namespace = 'urn:oasis:names:tc:xliff:document:1.1' 322
323 - def __init__(self, *args, **kwargs):
324 lisa.LISAfile.__init__(self, *args, **kwargs) 325 self._filename = "NoName" 326 self._messagenum = 0 327 328 # Allow the inputfile to override defaults for source and target language. 329 filenode = self.document.find('.//%s' % self.namespaced('file')) 330 sourcelanguage = filenode.get('source-language') 331 if sourcelanguage: 332 self.setsourcelanguage(sourcelanguage) 333 targetlanguage = filenode.get('target-language') 334 if targetlanguage: 335 self.settargetlanguage(targetlanguage)
336
337 - def addheader(self):
338 """Initialise the file header.""" 339 filenode = self.document.find(self.namespaced("file")) 340 filenode.set("source-language", self.sourcelanguage) 341 if self.targetlanguage: 342 filenode.set("target-language", self.targetlanguage)
343
344 - def createfilenode(self, filename, sourcelanguage=None, targetlanguage=None, datatype='plaintext'):
345 """creates a filenode with the given filename. All parameters are needed 346 for XLIFF compliance.""" 347 self.removedefaultfile() 348 if sourcelanguage is None: 349 sourcelanguage = self.sourcelanguage 350 if targetlanguage is None: 351 targetlanguage = self.targetlanguage 352 filenode = etree.Element(self.namespaced("file")) 353 filenode.set("original", filename) 354 filenode.set("source-language", sourcelanguage) 355 if targetlanguage: 356 filenode.set("target-language", targetlanguage) 357 filenode.set("datatype", datatype) 358 bodyNode = etree.SubElement(filenode, self.namespaced(self.bodyNode)) 359 return filenode
360
361 - def getfilename(self, filenode):
362 """returns the name of the given file""" 363 return filenode.get("original")
364
365 - def getfilenames(self):
366 """returns all filenames in this XLIFF file""" 367 filenodes = self.document.findall(self.namespaced("file")) 368 filenames = [self.getfilename(filenode) for filenode in filenodes] 369 filenames = filter(None, filenames) 370 if len(filenames) == 1 and filenames[0] == '': 371 filenames = [] 372 return filenames
373
374 - def getfilenode(self, filename):
375 """finds the filenode with the given name""" 376 filenodes = self.document.findall(self.namespaced("file")) 377 for filenode in filenodes: 378 if self.getfilename(filenode) == filename: 379 return filenode 380 return None
381
382 - def getdatatype(self, filename=None):
383 """Returns the datatype of the stored file. If no filename is given, 384 the datatype of the first file is given.""" 385 if filename: 386 node = self.getfilenode(filename) 387 if not node is None: 388 return node.get("datatype") 389 else: 390 filenames = self.getfilenames() 391 if len(filenames) > 0 and filenames[0] != "NoName": 392 return self.getdatatype(filenames[0]) 393 return ""
394
395 - def removedefaultfile(self):
396 """We want to remove the default file-tag as soon as possible if we 397 know if still present and empty.""" 398 filenodes = self.document.findall(self.namespaced("file")) 399 if len(filenodes) > 1: 400 for filenode in filenodes: 401 if filenode.get("original") == "NoName" and \ 402 not filenode.findall(".//%s" % self.namespaced(self.UnitClass.rootNode)): 403 self.document.getroot().remove(filenode) 404 break
405
406 - def getheadernode(self, filenode, createifmissing=False):
407 """finds the header node for the given filenode""" 408 # TODO: Deprecated? 409 headernode = list(filenode.find(self.namespaced("header"))) 410 if not headernode is None: 411 return headernode 412 if not createifmissing: 413 return None 414 headernode = etree.SubElement(filenode, self.namespaced("header")) 415 return headernode
416
417 - def getbodynode(self, filenode, createifmissing=False):
418 """finds the body node for the given filenode""" 419 bodynode = filenode.find(self.namespaced("body")) 420 if not bodynode is None: 421 return bodynode 422 if not createifmissing: 423 return None 424 bodynode = etree.SubElement(filenode, self.namespaced("body")) 425 return bodynode
426
427 - def addsourceunit(self, source, filename="NoName", createifmissing=False):
428 """adds the given trans-unit to the last used body node if the filename has changed it uses the slow method instead (will create the nodes required if asked). Returns success""" 429 if self._filename != filename: 430 if not self.switchfile(filename, createifmissing): 431 return None 432 unit = super(xlifffile, self).addsourceunit(source) 433 self._messagenum += 1 434 unit.setid("%d" % self._messagenum) 435 lisa.setXMLspace(unit.xmlelement, "preserve") 436 return unit
437
438 - def switchfile(self, filename, createifmissing=False):
439 """adds the given trans-unit (will create the nodes required if asked). Returns success""" 440 self._filename = filename 441 filenode = self.getfilenode(filename) 442 if filenode is None: 443 if not createifmissing: 444 return False 445 filenode = self.createfilenode(filename) 446 self.document.getroot().append(filenode) 447 448 self.body = self.getbodynode(filenode, createifmissing=createifmissing) 449 if self.body is None: 450 return False 451 self._messagenum = len(self.body.findall(".//%s" % self.namespaced("trans-unit"))) 452 #TODO: was 0 based before - consider 453 # messagenum = len(self.units) 454 #TODO: we want to number them consecutively inside a body/file tag 455 #instead of globally in the whole XLIFF file, but using len(self.units) 456 #will be much faster 457 return True
458
459 - def creategroup(self, filename="NoName", createifmissing=False, restype=None):
460 """adds a group tag into the specified file""" 461 if self._filename != filename: 462 if not self.switchfile(filename, createifmissing): 463 return None 464 group = etree.SubElement(self.body, self.namespaced("group")) 465 if restype: 466 group.set("restype", restype) 467 return group
468
469 - def __str__(self):
470 self.removedefaultfile() 471 return super(xlifffile, self).__str__()
472
473 - def parsestring(cls, storestring):
474 """Parses the string to return the correct file object""" 475 xliff = super(xlifffile, cls).parsestring(storestring) 476 if xliff.units: 477 header = xliff.units[0] 478 if ("gettext-domain-header" in (header.getrestype() or "") \ 479 or xliff.getdatatype() == "po") \ 480 and cls.__name__.lower() != "poxlifffile": 481 import poxliff 482 xliff = poxliff.PoXliffFile.parsestring(storestring) 483 return xliff
484 parsestring = classmethod(parsestring)
485