# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the # "Software"), to deal in the Software without restriction, including # without limitation the rights to use, copy, modify, merge, publish, dis- # tribute, sublicense, and/or sell copies of the Software, and to permit # persons to whom the Software is furnished to do so, subject to the fol- # lowing conditions: # # The above copyright notice and this permission notice shall be included # in all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. import pytest import botocore.docs.bcdoc.docstringparser as parser from botocore.docs.bcdoc.restdoc import ReSTDocument from tests import mock, unittest class TestDocStringParser(unittest.TestCase): def parse(self, html): docstring_parser = parser.DocStringParser(ReSTDocument()) docstring_parser.feed(html) docstring_parser.close() return docstring_parser.doc.getvalue() def assert_contains_exact_lines_in_order(self, actual, expected): # Get each line and filter out empty lines contents = actual.split(b'\n') contents = [line for line in contents if line and not line.isspace()] for line in expected: self.assertIn(line, contents) beginning = contents.index(line) contents = contents[beginning:] def test_tag_with_collapsible_spaces(self): html = "

a bcd efg

" result = self.parse(html) self.assert_contains_exact_lines_in_order(result, [b'a bcd efg']) def test_nested_lists(self): html = "" result = self.parse(html) self.assert_contains_exact_lines_in_order( result, [b'* Wello', b' * Horld'] ) def test_nested_lists_with_extra_white_space(self): html = "" result = self.parse(html) self.assert_contains_exact_lines_in_order( result, [b'* Wello', b' * Horld'] ) def test_link_with_no_period(self): html = "

This is a test Link

" result = self.parse(html) self.assert_contains_exact_lines_in_order( result, [b'This is a test `Link `__'] ) def test_link_with_period(self): html = "

This is a test Link.

" result = self.parse(html) self.assert_contains_exact_lines_in_order( result, [b'This is a test `Link `__.'] ) def test_code_with_empty_link(self): html = "

Foo Link

" result = self.parse(html) self.assert_contains_exact_lines_in_order(result, [b'Foo ``Link``']) def test_code_with_link_spaces(self): html = "

Foo Link

" result = self.parse(html) self.assert_contains_exact_lines_in_order(result, [b'Foo ``Link``']) def test_code_with_link_no_spaces(self): html = "

Foo Link

" result = self.parse(html) self.assert_contains_exact_lines_in_order(result, [b'Foo ``Link``']) def test_href_with_spaces(self): html = "

Link

" result = self.parse(html) self.assert_contains_exact_lines_in_order( result, [b' `Link `__'] ) def test_bold_with_nested_formatting(self): html = "TesttestLink" result = self.parse(html) self.assert_contains_exact_lines_in_order( result, [b'``Test``test `Link `__'] ) def test_link_with_nested_formatting(self): html = "Test" result = self.parse(html) self.assert_contains_exact_lines_in_order( result, [b'`Test `__'] ) def test_indentation_with_spaces_between_tags(self): # Leading spaces inserted as padding between HTML tags can lead to # unexpected indentation in RST. For example, consider the space between # ``

`` and ```` in the third line of this example: html = ( "

First paragraph

" "

Second paragraph in note

" "

Bold statement: Third paragraph

" "

Last paragraph

" ) # If kept, it will appear as indentation in RST and have the effect of # pulling the third paragraph into the note: # # ``` # First paragraph # # ..note:: # Second paragraph in note <-- intentionally indented # # **Bold statement:** Third paragraph <-- unintentionally indented # # Last paragraph <-- first non-indented paragraph after note # ``` result = self.parse(html) self.assert_contains_exact_lines_in_order( result, # ↓ no whitespace here [b'**Bold statement:** Third paragraph'], ) class TestHTMLTree(unittest.TestCase): def setUp(self): self.style = mock.Mock() self.doc = mock.Mock() self.doc.style = self.style self.tree = parser.HTMLTree(self.doc) def test_add_tag(self): self.tree.add_tag('foo') self.assertIsInstance(self.tree.current_node, parser.TagNode) self.assertEqual(self.tree.current_node.tag, 'foo') def test_add_unsupported_tag(self): del self.style.start_foo del self.style.end_foo self.tree.add_tag('foo') self.assertIn('foo', self.tree.unhandled_tags) def test_add_data(self): self.tree.add_data('foo') self.assertNotIsInstance(self.tree.current_node, parser.DataNode) node = self.tree.head.children[0] self.assertIsInstance(node, parser.DataNode) self.assertEqual(node.data, 'foo') class TestStemNode(unittest.TestCase): def setUp(self): self.style = mock.Mock() self.doc = mock.Mock() self.doc.style = self.style self.node = parser.StemNode() def test_add_child(self): child = parser.StemNode() self.node.add_child(child) self.assertIn(child, self.node.children) self.assertEqual(child.parent, self.node) def test_write(self): self.node.add_child(mock.Mock()) self.node.add_child(mock.Mock()) self.node.write(mock.Mock()) for child in self.node.children: self.assertTrue(child.write.called) class TestTagNode(unittest.TestCase): def setUp(self): self.style = mock.Mock() self.doc = mock.Mock() self.doc.style = self.style self.tag = 'foo' self.node = parser.TagNode(self.tag) def test_write_calls_style(self): self.node.write(self.doc) self.assertTrue(self.style.start_foo.called) self.assertTrue(self.style.end_foo.called) def test_write_unsupported_tag(self): del self.style.start_foo del self.style.end_foo try: self.node.write(self.doc) except AttributeError as e: self.fail(str(e)) class TestDataNode(unittest.TestCase): def setUp(self): self.style = mock.Mock() self.doc = mock.Mock() self.doc.style = self.style self.doc.translate_words.return_value = [] def test_string_data(self): node = parser.DataNode('foo') self.assertEqual(node.data, 'foo') def test_non_string_data_raises_error(self): with self.assertRaises(ValueError): parser.DataNode(5) def test_lstrip(self): node = parser.DataNode(' foo') node.lstrip() self.assertEqual(node.data, 'foo') def test_write(self): node = parser.DataNode('foo bar baz') self.doc.translate_words.return_value = ['foo', 'bar', 'baz'] node.write(self.doc) self.doc.handle_data.assert_called_once_with('foo bar baz') def test_write_empty_string(self): node = parser.DataNode('') node.write(self.doc) self.assertFalse(self.doc.handle_data.called) @pytest.mark.parametrize( 'data, lstrip, rstrip, both', [ ('foo', 'foo', 'foo', 'foo'), (' foo', 'foo', ' foo', 'foo'), (' foo', 'foo', ' foo', 'foo'), ('\tfoo', 'foo', '\tfoo', 'foo'), ('\t \t foo', 'foo', '\t \t foo', 'foo'), ('foo ', 'foo ', 'foo', 'foo'), ('foo ', 'foo ', 'foo', 'foo'), ('foo\t\t', 'foo\t\t', 'foo', 'foo'), ], ) def test_datanode_stripping(data, lstrip, rstrip, both): doc = mock.Mock() doc.style = mock.Mock() doc.translate_words.side_effect = lambda words: words node = parser.DataNode(data) node.lstrip() node.write(doc) doc.handle_data.assert_called_once_with(lstrip) doc.handle_data.reset_mock() node = parser.DataNode(data) node.rstrip() node.write(doc) doc.handle_data.assert_called_once_with(rstrip) doc.handle_data.reset_mock() node = parser.DataNode(data) node.lstrip() node.rstrip() node.write(doc) doc.handle_data.assert_called_once_with(both) @pytest.mark.parametrize( 'data', [ (' '), (' '), ('\t'), ('\t \t '), ], ) def test_datanode_stripping_empty_string(data): doc = mock.Mock() doc.style = mock.Mock() doc.translate_words.side_effect = lambda words: words node = parser.DataNode(data) node.lstrip() node.write(doc) doc.handle_data.assert_not_called() @pytest.mark.parametrize( 'html, expected_lines', [ ('

foo

', [b'foo']), ('

\tfoo

', [b'foo']), ('

foo

', [b'foo']), ('

foo

', [b'foo']), ('

foo\t

', [b'foo']), ('

foo

', [b'foo']), ('

foo

', [b'foo']), ('

foo

', [b'foo']), ('

foo

', [b'foo']), ('

foo

', [b'foo']), # various nested markup examples ('italic', [b'*italic*']), ('

italic

', [b'*italic*']), ('

italic

', [b'*italic*']), ('

italic

', [b'*italic*']), ('

foo italic bar

', [b'foo *italic* bar']), ('

foo bar

', [b'foo *bar*']), ('

foo bar

', [b'foo* bar*']), ('

foo bar

', [b'foo *bar*']), # links ('foo bar', [b'`foo `__ *bar*']), # ReST does not support link text starting with whitespace ('

abc foo

', [b'abc `foo `__']), ('

abc foo bar

', [b'abc `foo `__ bar']), # code-in-a removed and whitespace removed (' foo bar', [b'`foo `__ bar']), # list items ('
  • foo
  • ', [b'* foo']), ('
  • foo
  • ', [b'* foo']), ('
  • foo
  • ', [b'* foo']), ('
  • foo bar
  • ', [b'* foo bar']), ('
  • foo bar
  • ', [b'* foo bar']), ('
  • foo bar
  • ', [b'* foo bar']), # multiple block tags in sequence are each left and right stripped ('

    foo

    bar\t

    ', [b'foo', b'bar']), ('

    foo

  • bar
  • ', [b'foo', b'* bar']), # nested block tags also work ( '

    foo

    bar

    ', [b'foo', b'bar'], ), ], ) def test_whitespace_collapsing(html, expected_lines): docstring_parser = parser.DocStringParser(ReSTDocument()) docstring_parser.feed(html) docstring_parser.close() actual = docstring_parser.doc.getvalue() # Get each line and filter out empty lines contents = actual.split(b'\n') contents = [line for line in contents if line and not line.isspace()] for line in expected_lines: assert line in contents beginning = contents.index(line) contents = contents[beginning:]