Substring with Concatenation of All Words

Problem Id: 30 Difficulty: Hard Tag: Hash Table Tag: Two Pointers Tag: String


Intuition

Let's consider a simplified problem. If the giving words all has length 1, then we could simply use a sliding window with lenth of words, and check if it contains all words given by words.

Now the words has the same length, but the length of each word could be greater than 1, so we just need to do it multiple times with different start point.

Solution


class Solution:
    def findSubstring(self, s: str, words: List[str]) -> List[int]:
        keys = set(words)

        positions = []
        for i in range(len(words[0])):
            self._search(i, s[i:], words, keys, positions)
        return positions

    def _search(self, offset, s, words, keys, positions):
        wordLen = len(words[0])
        if len(s) < wordLen * len(words):
            return []

        counts = {}
        for word in words:
            if word not in counts:
                counts[word] = 0
            counts[word] += 1

        start = 0
        end = 0
        while end < wordLen * len(words):
            current = s[end:end + wordLen]
            if current in keys:
                if current not in counts:
                    counts[current] = 0
                counts[current] -= 1
                if counts[current] == 0:
                    counts.pop(current)
            end += wordLen

        while end <= len(s):
            if len(counts) == 0:
                positions.append(start + offset)

            removed = s[start:start + wordLen]
            if removed in keys:
                if removed not in counts:
                    counts[removed] = 0
                counts[removed] += 1
                if counts[removed] == 0:
                    counts.pop(removed)


            added = s[end:end + wordLen]
            if added in keys:
                if added not in counts:
                    counts[added] = 0
                counts[added] -= 1
                if counts[added] == 0:
                    counts.pop(added)

            start += wordLen
            end += wordLen