Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor for performance and features #32

Open
wants to merge 13 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,16 @@ PATH
remote: .
specs:
deadweight (0.2.2)
bisect
css_parser (~> 1.3.5)
nokogiri

GEM
remote: https://rubygems.org/
specs:
addressable (2.3.6)
css_parser (1.3.5)
addressable (2.3.8)
bisect (0.1)
css_parser (1.3.6)
addressable
domain_name (0.5.3)
unf (~> 0.0.3)
Expand All @@ -22,9 +24,11 @@ GEM
ntlm-http (~> 0.1, >= 0.1.1)
webrobots (~> 0.0, >= 0.0.9)
mime-types (1.19)
mini_portile (0.6.2)
net-http-digest_auth (1.2.1)
net-http-persistent (2.7)
nokogiri (1.4.4)
nokogiri (1.6.6.2)
mini_portile (~> 0.6.0)
ntlm-http (0.1.1)
rake (0.9.2)
shoulda (2.11.3)
Expand All @@ -39,5 +43,9 @@ PLATFORMS
DEPENDENCIES
deadweight!
mechanize (~> 2.5.1)
nokogiri (~> 1.6.6.2)
rake
shoulda

BUNDLED WITH
1.10.4
2 changes: 2 additions & 0 deletions deadweight.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@ Gem::Specification.new do |s|
s.license = 'MIT'

s.add_dependency 'nokogiri'
s.add_dependency 'bisect'
s.add_dependency 'css_parser', '~> 1.3.5'

s.add_development_dependency "shoulda"
s.add_development_dependency "mechanize", "~> 2.5.1"
s.add_development_dependency "rake"
s.add_development_dependency "nokogiri", '~> 1.6.6.2'

s.files = `git ls-files LICENSE README.md bin lib vendor`.split
s.require_path = 'lib'
Expand Down
174 changes: 138 additions & 36 deletions lib/deadweight.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
require 'css_parser'
require 'bisect'
require 'nokogiri'
require 'open-uri'
require 'deadweight/deadweight_helper'

begin
require 'colored'
Expand All @@ -12,9 +14,11 @@ class String
end
end


class Deadweight
attr_accessor :root, :stylesheets, :rules, :pages, :ignore_selectors, :mechanize, :log_file
attr_reader :unused_selectors, :parsed_rules
attr_reader :selector_nodes, :selector_tree_root, :unused_selector_nodes, :unsupported_selector_nodes
include DeadweightHelper

def initialize
@root = 'http://localhost:3000'
Expand All @@ -27,46 +31,86 @@ def initialize
yield self and run if block_given?
end

def analyze(html)
def analyze(html, selector_nodes=nil)
doc = Nokogiri::HTML(html)

@unused_selectors.collect do |selector, declarations|
# We test against the selector stripped of any pseudo classes,
# but we report on the selector with its pseudo classes.
stripped_selector = strip(selector)
selector_nodes ||= @unused_selector_nodes.dup

found_nodes = selector_nodes.collect do |selector_node|
selector = selector_node.selector

begin
if doc.css(selector).any?
log.puts(" #{selector.green}") if selector_node.from_css?
selector_node
end
rescue
@unused_selector_nodes.delete(selector_node)
@unsupported_selector_nodes << selector_node
nil
end
end

next if stripped_selector.empty?
found_nodes.compact
end

if doc.search(stripped_selector).any?
log.puts(" #{selector.green}")
selector
def process!(html)
selector_nodes = @unused_selector_nodes
until selector_nodes.empty?
new_selector_nodes = []

analyze(html, selector_nodes).each do |found_node|
@unused_selector_nodes.delete(found_node)
@unused_selector_nodes.push(*found_node.children)
new_selector_nodes.push(*found_node.children)
end

selector_nodes = new_selector_nodes
end
end

def add_css!(css)
parser = CssParser::Parser.new
parser.add_block!(css)

selector_count = 0
new_selectors_count = 0

first_nodes = @selector_tree_root.children.dup

parser.each_selector do |selector, declarations, specificity|
next if @unused_selectors.include?(selector)
next if selector =~ @ignore_selectors
next if has_pseudo_classes(selector) and @unused_selectors.include?(strip(selector))
next if selector =~ @ignore_selectors || normalize_whitespace(selector) =~ @ignore_selectors
normalized_selector = normalize(selector)

selector_node = @selector_nodes[normalized_selector]
selector_node ||= SelectorTreeNode.new(normalized_selector)

selector_node.original_selectors << selector
selector_node.declarations << declarations

next if @selector_nodes[normalized_selector]

@selector_nodes[normalized_selector] = selector_node
new_selectors_count += 1

@unused_selectors << selector
@parsed_rules[selector] = declarations
if known_unsupported_selector?(normalized_selector)
@unsupported_selector_nodes << selector_node
else
@selector_tree_root.add_node(selector_node)
end

selector_count += 1
end

selector_count
new_root_nodes = @selector_tree_root.children - first_nodes
@unused_selector_nodes.push(*new_root_nodes)

new_selectors_count
end

def reset!
@parsed_rules = {}
@unused_selectors = []
@unused_selector_nodes = []
@unsupported_selector_nodes = []
@selector_nodes = {}
@selector_tree_root = SelectorTreeRoot.new

@stylesheets.each do |path|
new_selector_count = add_css!(fetch(path))
Expand All @@ -79,12 +123,12 @@ def reset!
log.puts("Added #{new_selector_count} extra selectors".yellow)
end

@total_selectors = @unused_selectors.size
@total_selectors = selectors_from_nodes(@selector_tree_root.children + @unsupported_selector_nodes).size
end

def report
log.puts
log.puts "found #{@unused_selectors.size} unused selectors out of #{@total_selectors} total".yellow
log.puts "found #{selectors_to_review.size} unused selectors out of #{@total_selectors} total".yellow
log.puts
end

Expand Down Expand Up @@ -120,17 +164,28 @@ def run

report

@unused_selectors
selectors_to_review
end

def dump(output)
output.puts(@unused_selectors)
def selectors_to_review(&block)
(unused_selectors(&block) + unsupported_selectors(&block)).uniq
end

def process!(html)
analyze(html).each do |selector|
@unused_selectors.delete(selector)
end
def selectors_from_nodes(selector_nodes, &block)
block ||= :original_selectors.to_proc
selector_nodes.flatten.map{|node| node.and_descendants}.flatten.select(&:from_css?).map(&block).flatten.uniq
end

def unused_selectors(&block)
selectors_from_nodes(@unused_selector_nodes, &block)
end

def unsupported_selectors(&block)
selectors_from_nodes(@unsupported_selector_nodes, &block)
end

def dump(output)
output.puts(selectors_to_review)
end

# Returns the Mechanize instance, if +mechanize+ is set to +true+.
Expand Down Expand Up @@ -169,14 +224,60 @@ def fetch(path)

private

def has_pseudo_classes(selector)
selector =~ /::?[\w\-]+/
def normalize(selector)
normalize_whitespace(remove_simple_pseudo(selector))
end

def normalize_whitespace(selector)
normalized_selector = ''

tokenize_selector(selector).each do |type, text|
# We remove all the unnecessary spaces unless it's a significative one, which corresponds to the type :S
# When it's a significant space, we leave a single one of them.
normalized_selector << (type == :S ? ' ' : text.strip)
end
normalized_selector.strip
end

# Nokogiri supports lots of pseudo-classes! Those it doesn't support, we will eventually mark as unsupported if they are reached in the pages.
# However, some of those unsupported pseudo-classes (and all pseudo-elements) can be implied from other rules.
# Example, if we find ".hello", then we can pretty safely infer that ".hello:hover" is used (unless it's never actually displayed...)
def remove_simple_pseudo(selector)
selector_text_parts = []
selector_type_parts = []

discarded_pseudo = %w(active checked disabled enabled focus hover in-range invalid lang link optional out-of-range read-only read-write required target valid visited)

# These are pseudo-elements. Correct CSS3 would be using :: for these, but single-colon syntax is still valid.
discarded_pseudo += %w(after before first-letter first-line selection)

tokenize_selector(selector).each do |type, text|
if selector_type_parts[-2..-1] == [':', ':']
# Discard all pseudo-elements (those starting with ::)
selector_type_parts.pop(2)
selector_text_parts.pop(2)
next
elsif selector_type_parts[-1] == ':' && discarded_pseudo.include?(text)
# Discard :hover, :valid
selector_type_parts.pop
selector_text_parts.pop
next
end

selector_type_parts << type
selector_text_parts << text
end

selector_text_parts.join
end

def strip(selector)
selector = selector.gsub(/^@.*/, '') # @-webkit-keyframes ...
selector = selector.gsub(/:.*/, '') # input#x:nth-child(2):not(#z.o[type='file'])
selector.strip
# No idea what we should do with at_rules
def known_unsupported_selector?(selector)
tokenize_selector(selector).each do |type, text|
return true if type == :IDENT && text.start_with?('@')
return true if type == '@'
end
return false
end

def log
Expand Down Expand Up @@ -215,6 +316,7 @@ def initialize_agent

class FetchError < StandardError; end
end

require 'deadweight/selector_tree_node'
require 'deadweight/selector_tree_root'
require 'deadweight/rake_task'

17 changes: 17 additions & 0 deletions lib/deadweight/deadweight_helper.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
class Deadweight
module DeadweightHelper
def self.tokenize_selector(selector)
tokenizer = Nokogiri::CSS::Tokenizer.new
tokenizer.scan_setup(selector)
tokens = []
while token = tokenizer.next_token
tokens << token
end
tokens
end

def tokenize_selector(*args)
DeadweightHelper.tokenize_selector(*args)
end
end
end
66 changes: 66 additions & 0 deletions lib/deadweight/selector_tree_node.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
class Deadweight
class SelectorTreeNode
include Comparable
include DeadweightHelper

attr_accessor :selector, :original_selectors, :declarations, :children

def initialize(selector)
self.selector = selector
self.original_selectors = []
self.declarations = []
self.children = []
end

def from_css?
!original_selectors.empty?
end

def <=>(other)
return false unless other.is_a?(SelectorTreeNode)
selector <=> other.selector
end

def inspect
"{#{selector.inspect} => #{children.inspect}}"
end

def add_node(node)
insert_location = Bisect.bisect_left(children, node)

# Already there
return if children[insert_location] == node

if children[insert_location-1] && node.implies?(children[insert_location-1])
children[insert_location-1].add_node(node)
elsif children[insert_location] && children[insert_location].implies?(node)
node.add_node(children[insert_location])
children[insert_location] = node
else
children.insert(insert_location, node)
end
self
end

def and_descendants
[self] + descendants
end

def descendants
children + children.map(&:descendants).flatten
end

# .hello is implied by .hello.world, because if something matches .hello.world, it has to also match .hello.
# Need to watch out for .hello and .hello_world, since that is not implied
def implies?(other)
return true if other.selector == selector
return false unless selector.start_with?(other.selector)
# If other ends on a symbol like a ] or a ), then it can't be a case of .hello and .hello_world
return true if other.selector[-1] =~ /[^\w-]/

!!(selector[other.selector.size] =~ /[^\w-]/)
end

end

end
Loading