-
Notifications
You must be signed in to change notification settings - Fork 846
/
wc3_validate.py
76 lines (60 loc) · 2.13 KB
/
wc3_validate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# -*- coding: utf-8 -*-
"""
W3C HTML Validator plugin for genrated content.
"""
from pelican import signals
import logging
import os
LOG = logging.getLogger(__name__)
INCLUDE_TYPES = ['html']
def validate_files(pelican):
"""
Validate a generated HTML file
:param pelican: pelican object
"""
for dirpath, _, filenames in os.walk(pelican.settings['OUTPUT_PATH']):
for name in filenames:
if should_validate(name):
filepath = os.path.join(dirpath, name)
validate(filepath)
def validate(filename):
"""
Use W3C validator service: https://bitbucket.org/nmb10/py_w3c/ .
:param filename: the filename to validate
"""
try:
from html.parser import HTMLParser
except ImportError: # fallback for Python 2:
from HTMLParser import HTMLParser
from py_w3c.validators.html.validator import HTMLValidator
h = HTMLParser() # for unescaping WC3 messages
vld = HTMLValidator()
LOG.info("Validating: {0}".format(filename))
# call w3c webservice
vld.validate_file(filename)
# display errors and warning
for err in vld.errors:
line = err.get('line') or err['lastLine']
col = err.get('col') or '{}-{}'.format(err['firstColumn'], err['lastColumn'])
LOG.error(u'line: {0}; col: {1}; message: {2}'.
format(line, col, h.unescape(err['message']))
)
for err in vld.warnings:
line = err.get('line') or err['lastLine']
col = err.get('col') or '{}-{}'.format(err['firstColumn'], err['lastColumn'])
LOG.warning(u'line: {0}; col: {1}; message: {2}'.
format(line, col, h.unescape(err['message']))
)
def should_validate(filename):
"""Check if the filename is a type of file that should be validated.
:param filename: A file name to check against
"""
for extension in INCLUDE_TYPES:
if filename.endswith(extension):
return True
return False
def register():
"""
Register Pelican signal for validating content after it is generated.
"""
signals.finalized.connect(validate_files)