Package translate :: Package convert :: Module test_html2po
[hide private]
[frames] | no frames]

Source Code for Module translate.convert.test_html2po

  1  #!/usr/bin/env python 
  2   
  3  from translate.convert import html2po 
  4  from translate.convert import po2html 
  5  from translate.convert import test_convert 
  6  from translate.misc import wStringIO 
  7   
8 -class TestHTML2PO:
9 - def html2po(self, markup):
10 """Helper to convert html to po without a file.""" 11 inputfile = wStringIO.StringIO(markup) 12 convertor = html2po.html2po() 13 outputpo = convertor.convertfile(inputfile, "test", False, False) 14 return outputpo
15
16 - def po2html(self, posource, htmltemplate):
17 """Helper to convert po to html without a file.""" 18 inputfile = wStringIO.StringIO(posource) 19 outputfile = wStringIO.StringIO() 20 templatefile = wStringIO.StringIO(htmltemplate) 21 assert po2html.converthtml(inputfile, outputfile, templatefile) 22 return outputfile.getvalue()
23
24 - def countunits(self, pofile, expected):
25 """helper to check that we got the expected number of messages""" 26 actual = len(pofile.units) 27 if actual > 0: 28 if pofile.units[0].isheader(): 29 actual = actual - 1 30 print pofile 31 assert actual == expected
32
33 - def compareunit(self, pofile, unitnumber, expected):
34 """helper to validate a PO message""" 35 if not pofile.units[0].isheader(): 36 unitnumber = unitnumber - 1 37 print 'unit source: ' + str(pofile.units[unitnumber].source) + '|' 38 print 'expected: ' + expected.encode('utf-8') + '|' 39 assert unicode(pofile.units[unitnumber].source) == unicode(expected)
40
41 - def check_single(self, markup, itemtext):
42 """checks that converting this markup produces a single element with value itemtext""" 43 pofile = self.html2po(markup) 44 self.countunits(pofile, 1) 45 self.compareunit(pofile, 1, itemtext)
46
47 - def check_null(self, markup):
48 """checks that converting this markup produces no elements""" 49 pofile = self.html2po(markup) 50 self.countunits(pofile, 0)
51
52 - def test_htmllang(self):
53 """test to ensure that we no longer use the lang attribure""" 54 markup = '''<html lang="en"><head><title>My title</title></head><body></body></html>''' 55 pofile = self.html2po(markup) 56 self.countunits(pofile, 1) 57 # Check that the first item is the <title> not <head> 58 self.compareunit(pofile, 1, "My title")
59
60 - def test_title(self):
61 """test that we can extract the <title> tag""" 62 self.check_single("<html><head><title>My title</title></head><body></body></html>", "My title")
63
65 """Test a linebreak in the <title> tag""" 66 htmltext = '''<html> 67 <head> 68 <title>My 69 title</title> 70 </head> 71 <body> 72 </body> 73 </html> 74 ''' 75 self.check_single(htmltext, "My title")
76
77 - def test_meta(self):
78 """Test that we can extract certain <meta> info from <head>.""" 79 self.check_single('''<html><head><meta name="keywords" content="these are keywords"></head><body></body></html>''', "these are keywords")
80
81 - def test_tag_p(self):
82 """test that we can extract the <p> tag""" 83 self.check_single("<html><head></head><body><p>A paragraph.</p></body></html>", "A paragraph.") 84 markup = "<p>First line.<br>Second line.</p>" 85 pofile = self.html2po(markup) 86 self.compareunit(pofile, 1, "First line.<br>Second line.")
87
89 """Test newlines within the <p> tag.""" 90 htmltext = '''<html> 91 <head> 92 </head> 93 <body> 94 <p> 95 A paragraph is a section in a piece of writing, usually highlighting a 96 particular point or topic. It always begins on a new line and usually 97 with indentation, and it consists of at least one sentence. 98 </p> 99 </body> 100 </html> 101 ''' 102 self.check_single(htmltext, "A paragraph is a section in a piece of writing, usually highlighting a particular point or topic. It always begins on a new line and usually with indentation, and it consists of at least one sentence.") 103 markup = "<p>First\nline.<br>Second\nline.</p>" 104 pofile = self.html2po(markup) 105 self.compareunit(pofile, 1, "First line.<br>Second line.")
106
107 - def test_tag_div(self):
108 """test that we can extract the <div> tag""" 109 self.check_single("<html><head></head><body><div>A paragraph.</div></body></html>", "A paragraph.") 110 markup = "<div>First line.<br>Second line.</div>" 111 pofile = self.html2po(markup) 112 self.compareunit(pofile, 1, "First line.<br>Second line.")
113
115 """Test linebreaks within a <div> tag.""" 116 htmltext = '''<html> 117 <head> 118 </head> 119 <body> 120 <div> 121 A paragraph is a section in a piece of writing, usually highlighting a 122 particular point or topic. It always begins on a new line and usually 123 with indentation, and it consists of at least one sentence. 124 </div> 125 </body> 126 </html> 127 ''' 128 self.check_single(htmltext, "A paragraph is a section in a piece of writing, usually highlighting a particular point or topic. It always begins on a new line and usually with indentation, and it consists of at least one sentence.") 129 markup = "<div>First\nline.<br>Second\nline.</div>" 130 pofile = self.html2po(markup) 131 self.compareunit(pofile, 1, "First line.<br>Second line.")
132
133 - def test_tag_a(self):
134 """test that we can extract the <a> tag""" 135 self.check_single('<html><head></head><body><p>A paragraph with <a href="http://translate.org.za/">hyperlink</a>.</p></body></html>', 'A paragraph with <a href="http://translate.org.za/">hyperlink</a>.')
136
138 """Test that we can extract the <a> tag with newlines in it.""" 139 htmltext = '''<html> 140 <head> 141 </head> 142 <body> 143 <p>A 144 paragraph 145 with <a 146 href="http://translate.org.za/">hyperlink</a> 147 and 148 newlines.</p></body></html> 149 ''' 150 self.check_single(htmltext, 'A paragraph with <a href="http://translate.org.za/">hyperlink</a> and newlines.')
151
152 - def test_tag_img(self):
153 """Test that we can extract the alt attribute from the <img> tag.""" 154 self.check_single('''<html><head></head><body><img src="picture.png" alt="A picture"></body></html>''', "A picture")
155
156 - def test_img_empty(self):
157 """Test that we can extract the alt attribute from the <img> tag.""" 158 htmlsource = '''<html><head></head><body><img src="images/topbar.jpg" width="750" height="80"></body></html>''' 159 self.check_null(htmlsource)
160
161 - def test_tag_table_summary(self):
162 """Test that we can extract the summary attribute.""" 163 self.check_single( '''<html><head></head><body><table summary="Table summary"></table></body></html>''', "Table summary")
164
165 - def test_table_simple(self):
166 """Test that we can fully extract a simple table.""" 167 markup = '''<html><head></head><body><table><tr><th>Heading One</th><th>Heading Two</th><tr><td>One</td><td>Two</td></tr></table></body></html>''' 168 pofile = self.html2po(markup) 169 self.countunits(pofile, 4) 170 self.compareunit(pofile, 1, "Heading One") 171 self.compareunit(pofile, 2, "Heading Two") 172 self.compareunit(pofile, 3, "One") 173 self.compareunit(pofile, 4, "Two")
174
175 - def test_table_complex(self):
176 markup = '''<table summary="This is the summary"><caption>A caption</caption><thead><tr><th abbr="Head 1">Heading One</th><th>Heading Two</th></thead><tfoot><tr><td>Foot One</td><td>Foot Two</td></tr></tfoot><tbody><tr><td>One</td><td>Two</td></tr></tbody></table>''' 177 pofile = self.html2po(markup) 178 self.countunits(pofile, 9) 179 self.compareunit(pofile, 1, "This is the summary") 180 self.compareunit(pofile, 2, "A caption") 181 self.compareunit(pofile, 3, "Head 1") 182 self.compareunit(pofile, 4, "Heading One") 183 self.compareunit(pofile, 5, "Heading Two") 184 self.compareunit(pofile, 6, "Foot One") 185 self.compareunit(pofile, 7, "Foot Two") 186 self.compareunit(pofile, 8, "One") 187 self.compareunit(pofile, 9, "Two")
188
189 - def test_table_empty(self):
190 """Test that we ignore tables that are empty. 191 192 A table is deemed empty if it has no translatable content. 193 """ 194 195 self.check_null('''<html><head></head><body><table><tr><td><img src="bob.png"></td></tr></table></body></html>''') 196 self.check_null('''<html><head></head><body><table><tr><td>&nbsp;</td></tr></table></body></html>''') 197 self.check_null('''<html><head></head><body><table><tr><td><strong></strong></td></tr></table></body></html>''')
198
199 - def test_address(self):
200 """Test to see if the address element is extracted""" 201 self.check_single("<body><address>My address</address></body>", "My address")
202
203 - def test_headings(self):
204 """Test to see if the h* elements are extracted""" 205 markup = "<html><head></head><body><h1>Heading One</h1><h2>Heading Two</h2><h3>Heading Three</h3><h4>Heading Four</h4><h5>Heading Five</h5><h6>Heading Six</h6></body></html>" 206 pofile = self.html2po(markup) 207 self.countunits(pofile, 6) 208 self.compareunit(pofile, 1, "Heading One") 209 self.compareunit(pofile, 2, "Heading Two") 210 self.compareunit(pofile, 3, "Heading Three") 211 self.compareunit(pofile, 4, "Heading Four") 212 self.compareunit(pofile, 5, "Heading Five") 213 self.compareunit(pofile, 6, "Heading Six")
214
216 """Test to see if h* elements with newlines can be extracted""" 217 markup = "<html><head></head><body><h1>Heading\nOne</h1><h2>Heading\nTwo</h2><h3>Heading\nThree</h3><h4>Heading\nFour</h4><h5>Heading\nFive</h5><h6>Heading\nSix</h6></body></html>" 218 pofile = self.html2po(markup) 219 self.countunits(pofile, 6) 220 self.compareunit(pofile, 1, "Heading One") 221 self.compareunit(pofile, 2, "Heading Two") 222 self.compareunit(pofile, 3, "Heading Three") 223 self.compareunit(pofile, 4, "Heading Four") 224 self.compareunit(pofile, 5, "Heading Five") 225 self.compareunit(pofile, 6, "Heading Six")
226
227 - def test_dt(self):
228 """Test to see if the definition list title (dt) element is extracted""" 229 self.check_single("<html><head></head><body><dl><dt>Definition List Item Title</dt></dl></body></html>", "Definition List Item Title")
230
231 - def test_dd(self):
232 """Test to see if the definition list description (dd) element is extracted""" 233 self.check_single("<html><head></head><body><dl><dd>Definition List Item Description</dd></dl></body></html>", "Definition List Item Description")
234
235 - def test_span(self):
236 """test to check that we don't double extract a span item""" 237 self.check_single("<html><head></head><body><p>You are a <span>Spanish</span> sentence.</p></body></html>", "You are a <span>Spanish</span> sentence.")
238
239 - def test_ul(self):
240 """Test to see if the list item <li> is exracted""" 241 markup = "<html><head></head><body><ul><li>Unordered One</li><li>Unordered Two</li></ul><ol><li>Ordered One</li><li>Ordered Two</li></ol></body></html>" 242 pofile = self.html2po(markup) 243 self.countunits(pofile, 4) 244 self.compareunit(pofile, 1, "Unordered One") 245 self.compareunit(pofile, 2, "Unordered Two") 246 self.compareunit(pofile, 3, "Ordered One") 247 self.compareunit(pofile, 4, "Ordered Two")
248
249 - def test_duplicates(self):
250 """check that we use the default style of msgid_comments to disambiguate duplicate messages""" 251 markup = "<html><head></head><body><p>Duplicate</p><p>Duplicate</p></body></html>" 252 pofile = self.html2po(markup) 253 self.countunits(pofile, 2) 254 # FIXME change this so that we check that the KDE comment is correctly added 255 self.compareunit(pofile, 1, "Duplicate") 256 self.compareunit(pofile, 2, "Duplicate")
257
258 - def wtest_multiline_reflow(self):
259 """check that we reflow multiline content to make it more readable for translators""" 260 self.check_single('''<td valign="middle" width="96%"><font class="headingwhite">South 261 Africa</font></td>''', '''<font class="headingwhite">South Africa</font>''')
262
263 - def wtest_nested_tags(self):
264 """check that we can extract items within nested tags""" 265 markup = "<div><p>Extract this</p>And this</div>" 266 pofile = self.html2po(markup) 267 self.countunits(pofile, 2) 268 self.compareunit(pofile, 1, "Extract this") 269 self.compareunit(pofile, 2, "And this")
270
271 - def test_carriage_return(self):
272 """Remove carriage returns from files in dos format.""" 273 htmlsource = '''<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">\r 274 <html><!-- InstanceBegin template="/Templates/masterpage.dwt" codeOutsideHTMLIsLocked="false" -->\r 275 <head>\r 276 <!-- InstanceBeginEditable name="doctitle" -->\r 277 <link href="fmfi.css" rel="stylesheet" type="text/css">\r 278 </head>\r 279 \r 280 <body>\r 281 <p>The rapid expansion of telecommunications infrastructure in recent\r 282 years has helped to bridge the digital divide to a limited extent.</p> \r 283 </body>\r 284 <!-- InstanceEnd --></html>\r 285 ''' 286 287 self.check_single(htmlsource, 'The rapid expansion of telecommunications infrastructure in recent years has helped to bridge the digital divide to a limited extent.')
288
289 - def test_encoding_latin1(self):
290 """Convert HTML input in iso-8859-1 correctly to unicode.""" 291 htmlsource = '''<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> 292 <html><!-- InstanceBegin template="/Templates/masterpage.dwt" codeOutsideHTMLIsLocked="false" --> 293 <head> 294 <!-- InstanceBeginEditable name="doctitle" --> 295 <title>FMFI - South Africa - CSIR Openphone - Overview</title> 296 <!-- InstanceEndEditable --> 297 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> 298 <meta name="keywords" content="fmfi, first mile, first inch, wireless, rural development, access devices, mobile devices, wifi, connectivity, rural connectivty, ict, low cost, cheap, digital divide, csir, idrc, community"> 299 300 <!-- InstanceBeginEditable name="head" --> 301 <!-- InstanceEndEditable --> 302 <link href="../../../fmfi.css" rel="stylesheet" type="text/css"> 303 </head> 304 305 <body> 306 <p>We aim to please \x96 will you aim too, please?</p> 307 <p>South Africa\x92s language diversity can be challenging.</p> 308 </body> 309 </html> 310 ''' 311 pofile = self.html2po(htmlsource) 312 313 self.countunits(pofile, 4) 314 self.compareunit(pofile, 3, u'We aim to please \x96 will you aim too, please?') 315 self.compareunit(pofile, 4, u'South Africa\x92s language diversity can be challenging.')
316
317 - def test_strip_html(self):
318 """Ensure that unnecessary html is stripped from the resulting unit.""" 319 320 htmlsource = '''<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> 321 <html> 322 <head> 323 <title>FMFI - Contact</title> 324 </head> 325 <body> 326 <table width="100%" border="0" cellpadding="0" cellspacing="0"> 327 <tr align="left" valign="top"> 328 <td width="150" height="556"> 329 <table width="157" height="100%" border="0" cellspacing="0" id="leftmenubg-color"> 330 <tr> 331 <td align="left" valign="top" height="555"> 332 <table width="100%" border="0" cellspacing="0" cellpadding="2"> 333 <tr align="left" valign="top" bgcolor="#660000"> 334 <td width="4%"><strong></strong></td> 335 <td width="96%"><strong><font class="headingwhite">Projects</font></strong></td> 336 </tr> 337 <tr align="left" valign="top"> 338 <td valign="middle" width="4%"><img src="images/arrow.gif" width="8" height="8"></td> 339 <td width="96%"><a href="index.html">Home Page</a></td> 340 </tr> 341 </table> 342 </td> 343 </tr> 344 </table></td> 345 </table> 346 </body> 347 </html> 348 ''' 349 pofile = self.html2po(htmlsource) 350 self.countunits(pofile, 3) 351 self.compareunit(pofile, 2, u'Projects') 352 self.compareunit(pofile, 3, u'Home Page') 353 354 # Translate and convert back: 355 pofile.units[1].target = 'Projekte' 356 pofile.units[2].target = 'Tuisblad' 357 htmlresult = self.po2html(str(pofile), htmlsource).replace('\n', ' ').replace('= "', '="').replace('> <', '><') 358 snippet = '<td width="96%"><strong><font class="headingwhite">Projekte</font></strong></td>' 359 assert snippet in htmlresult 360 snippet = '<td width="96%"><a href="index.html">Tuisblad</a></td>' 361 assert snippet in htmlresult
362
363 -class TestHTML2POCommand(test_convert.TestConvertCommand, TestHTML2PO):
364 """Tests running actual html2po commands on files""" 365 convertmodule = html2po 366 defaultoptions = {"progress": "none"} 367
368 - def test_help(self):
369 """tests getting help""" 370 options = test_convert.TestConvertCommand.test_help(self) 371 options = self.help_check(options, "-P, --pot") 372 options = self.help_check(options, "--duplicates=DUPLICATESTYLE") 373 options = self.help_check(options, "-u, --untagged", last=True)
374