820. Short Encoding of Words #

Problem #

A valid encoding of an array of words is any reference string s and array of indices indices such that:

words.length == indices.length
The reference string s ends with the '#' character.
For each index indices[i], the substring of s starting from indices[i] and up to (but not including) the next '#' character is equal to words[i].

Given an array of words, return the length of the shortest reference string s possible of any valid encoding of words*.*

Example 1:

Input: words = ["time", "me", "bell"]
Output: 10
Explanation: A valid encoding would be s = "time#bell#" and indices = [0, 2, 5].
words[0] = "time", the substring of s starting from indices[0] = 0 to the next '#' is underlined in "time#bell#"
words[1] = "me", the substring of s starting from indices[1] = 2 to the next '#' is underlined in "time#bell#"
words[2] = "bell", the substring of s starting from indices[2] = 5 to the next '#' is underlined in "time#bell#"

Example 2:

Input: words = ["t"]
Output: 2
Explanation: A valid encoding would be s = "t#" and indices = [0].

Constraints:

1 <= words.length <= 2000
1 <= words[i].length <= 7
words[i] consists of only lowercase letters.

Problem Summary #

A valid encoding of the word array words consists of any reference string s and index array indices, and satisfies:

words.length == indices.length
The reference string s ends with the ‘#’ character
For each index indices[i], a substring of s starting from indices[i] and ending at the next ‘#’ character (but not including ‘#') is exactly equal to words[i]

Given a word array words, return the length of the shortest reference string s that can successfully encode words.

Solution Approach #

Brute force solution. First put all words into a dictionary. Then for each word in the dictionary, delete its substrings from the dictionary one by one. In this way, strings with the same suffix are removed, and what remains in the dictionary are words without common suffixes. The final answer is the total length after joining all remaining words with #.
Trie solution. Build a Trie tree, and identical suffixes will be placed on some path from the root to a leaf node. Finally, traverse all words one by one. If the last letter of a word is a leaf node, it means this word should be selected, because it may be the longest word containing some word suffixes. Accumulate the length of this word and add 1 (the length of the # character). The final accumulated length is the answer required by the problem.

Code #

package leetcode

// Solution 1 Brute Force
func minimumLengthEncoding(words []string) int {
	res, m := 0, map[string]bool{}
	for _, w := range words {
		m[w] = true
	}
	for w := range m {
		for i := 1; i < len(w); i++ {
			delete(m, w[i:])
		}
	}
	for w := range m {
		res += len(w) + 1
	}
	return res
}

// Solution 2 Trie
type node struct {
	value byte
	sub   []*node
}

func (t *node) has(b byte) (*node, bool) {
	if t == nil {
		return nil, false
	}
	for i := range t.sub {
		if t.sub[i] != nil && t.sub[i].value == b {
			return t.sub[i], true
		}
	}
	return nil, false
}

func (t *node) isLeaf() bool {
	if t == nil {
		return false
	}
	return len(t.sub) == 0
}

func (t *node) add(s []byte) {
	now := t
	for i := len(s) - 1; i > -1; i-- {
		if v, ok := now.has(s[i]); ok {
			now = v
			continue
		}
		temp := new(node)
		temp.value = s[i]
		now.sub = append(now.sub, temp)
		now = temp
	}
}

func (t *node) endNodeOf(s []byte) *node {
	now := t
	for i := len(s) - 1; i > -1; i-- {
		if v, ok := now.has(s[i]); ok {
			now = v
			continue
		}
		return nil
	}
	return now
}

func minimumLengthEncoding1(words []string) int {
	res, tree, m := 0, new(node), make(map[string]bool)
	for i := range words {
		if !m[words[i]] {
			tree.add([]byte(words[i]))
			m[words[i]] = true
		}
	}
	for s := range m {
		if tree.endNodeOf([]byte(s)).isLeaf() {
			res += len(s)
			res++
		}
	}
	return res
}