37 lines
885 B
Python
37 lines
885 B
Python
import html.parser
|
|
|
|
|
|
class Parser(html.parser.HTMLParser):
|
|
def __init__(self):
|
|
super().__init__()
|
|
self._stream = []
|
|
|
|
def handle_starttag(self, tag, attrs):
|
|
attrs = sorted(attrs, key=lambda x: x[0])
|
|
attrs = '|'.join([k[0] + ':' + k[1] for k in attrs])
|
|
self._stream.append(('<', tag, attrs))
|
|
|
|
def handle_endtag(self, tag):
|
|
self._stream.append(('>', tag, ''))
|
|
|
|
def handle_data(self, data):
|
|
self._stream.append(('_', data, ''))
|
|
|
|
@property
|
|
def stream(self):
|
|
return self._stream
|
|
|
|
|
|
def _serialize(t):
|
|
parser = Parser()
|
|
parser.feed(t)
|
|
return parser.stream
|
|
|
|
|
|
def structural_diff(a, b):
|
|
"""Check if there is a structural difference between two HTML files."""
|
|
a_s = _serialize(a)
|
|
b_s = _serialize(b)
|
|
|
|
for e, f in zip(a_s, b_s):
|
|
assert e == f, f'Expected: {e}, found: {f}'
|