diff --git a/lib/htmldiff/diff_builder.rb b/lib/htmldiff/diff_builder.rb index 404a0f8..0cc4485 100644 --- a/lib/htmldiff/diff_builder.rb +++ b/lib/htmldiff/diff_builder.rb @@ -16,10 +16,15 @@ def initialize(old_version, new_version, options = {}) def default_options { - block_tag_classes: [] + block_tag_classes: [], + compare_tag_attributes: false } end + def compare_tag_attributes? + @options[:compare_tag_attributes] + end + def build perform_operations content.join @@ -54,7 +59,8 @@ def replace(operation) # added e.g.

becomes

due to an editor button # press. For this, we just show the new version, otherwise it gets messy # trying to find the closing tag. - if operation.same_tag? + + if operation.same_tag?(compare_tag_attributes?) equal(operation) else delete(operation, 'diffmod') @@ -99,10 +105,12 @@ def insert_tag(tagname, cssclass, words) loop do break if words.empty? - if words.first.standalone_tag? - tag_words = words.extract_consecutive_words! do |word| - word.standalone_tag? - end + # Handle empty tags as single blocks + if words.first.closed_empty_tag? + tag_words = words.extract_consecutive_words! { |word| word.closed_empty_tag? } + @content << wrap_text_in_diff_tag(tag_words.join, tagname, cssclass) + elsif words.first.standalone_tag? + tag_words = words.extract_consecutive_words! { |word| word.standalone_tag? } @content << wrap_text_in_diff_tag(tag_words.join, tagname, cssclass) elsif words.first.iframe_tag? tag_words = words.extract_consecutive_words! { |word| word.iframe_tag? } @@ -124,7 +132,7 @@ def insert_tag(tagname, cssclass, words) wrapped = true end @content += words.extract_consecutive_words! do |word| - word.tag? && !word.standalone_tag? && !word.iframe_tag? + word.tag? && !word.standalone_tag? && !word.iframe_tag? && !word.closed_empty_tag? end else non_tags = words.extract_consecutive_words! do |word| diff --git a/lib/htmldiff/list_of_words.rb b/lib/htmldiff/list_of_words.rb index feb4634..0fcfeba 100644 --- a/lib/htmldiff/list_of_words.rb +++ b/lib/htmldiff/list_of_words.rb @@ -13,6 +13,7 @@ def initialize(string, options = {}) @words = string else convert_html_to_list_of_words string.chars + group_empty_tags! end end @@ -72,6 +73,55 @@ def contains_unclosed_tag? private + def group_empty_tags! + return if @words.empty? + new_words = [] + i = 0 + + while i < @words.length + current_word = @words[i] + + # Check if this is an opening tag + if (tag_match = current_word.to_s.match(/^<([^\s>\/]+)[^>]*>$/i)) + tag_name = tag_match[1] + + # Look ahead to see if the very next word (after any whitespace) is the closing tag + # next_non_whitespace_index = find_next_non_whitespace_word(i + 1) + next_index = i + 1 + # If the very next word is the closing tag, group the empty tag pair + if @words[next_index]&.to_s&.match?(/^<\/#{Regexp.escape(tag_name)}>$/i) + word_group = [] + (i..next_index).each do |idx| + word_group << @words[idx] + end + + new_words << Word.new(word_group.map(&:to_s).join) + i = next_index + 1 + else + # Otherwise, add as individual word + new_words << current_word + i += 1 + end + else + # Not an opening tag - keep individual word + new_words << current_word + i += 1 + end + end + + @words = new_words + end + + def find_next_non_whitespace_word(start_index) + i = start_index + while i < @words.length + word_str = @words[i].to_s.strip + return i unless word_str.empty? + i += 1 + end + nil + end + def convert_html_to_list_of_words(character_array) @mode = :char @current_word = Word.new diff --git a/lib/htmldiff/operation.rb b/lib/htmldiff/operation.rb index 7635c6b..9cf0f26 100644 --- a/lib/htmldiff/operation.rb +++ b/lib/htmldiff/operation.rb @@ -14,17 +14,25 @@ class Operation # @!method old_words # @!method new_words - # Ignores any attributes and tells us if the tag is the same e.g.

and - #

are the same. - def same_tag? - pattern = /<([^>\s]+)[\s>].*/ - first_tagname = pattern.match(old_text) # nil means they are not tags - first_tagname = first_tagname[1] if first_tagname + # Unless `compare_attributes` is true, Ignores any attributes and tells us + # if the tag is the same e.g.

and

are the same. + def same_tag?(compare_attributes = false) + pattern = /<(?[^>\s]+)\s(?.*)[\s>].*/ + first_tag = pattern.match(old_text) # nil means they are not tags + first_tagname = first_tag[:name] if first_tag - second_tagname = pattern.match(new_text) - second_tagname = second_tagname[1] if second_tagname + second_tag = pattern.match(new_text) + second_tagname = second_tag[:name] if second_tag - first_tagname && (first_tagname == second_tagname) + return false unless first_tag && second_tag + + if compare_attributes + first_attrs = first_tag[:attrs] + second_attrs = first_tag[:attrs] + return false if first_attrs && (first_attrs == second_attrs) + end + + first_tagname == second_tagname end def old_text diff --git a/lib/htmldiff/word.rb b/lib/htmldiff/word.rb index a29b44a..4a71ea8 100644 --- a/lib/htmldiff/word.rb +++ b/lib/htmldiff/word.rb @@ -26,6 +26,10 @@ def iframe_tag? (@word[0..7].downcase =~ %r{^<\/?iframe ?}) end + def closed_empty_tag? + @word.match?(/^<([^\/\s>]+)(?:\s[^>]*)?>(?:\s*)<\/\1>$/) + end + def tag? opening_tag? || closing_tag? || standalone_tag? end