140. Word Break II

  • 23.3%

https://leetcode.com/problems/word-break-ii/

Given a non-empty string s and a dictionary wordDict containing a list of non-empty words, add spaces in s to construct a sentence where each word is a valid dictionary word. You may assume the dictionary does not contain duplicate words.

Return all such possible sentences.

1
2
3
4
5
For example, given
s = "catsanddog",
dict = ["cat", "cats", "and", "sand", "dog"].

A solution is ["cats and dog", "cat sand dog"].

UPDATE (2017/1/4):
The wordDict parameter had been changed to a list of strings (instead of a set of strings). Please reload the code definition to get the latest changes.


还需要思考

方法一:

https://discuss.leetcode.com/topic/12997/11ms-c-solution-concise

11ms C++ solution (concise)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
class Solution {
unordered_map<string, vector<string>> m;

vector<string> combine(string word, vector<string> prev){
for(int i=0;i<prev.size();++i){
prev[i]+=" "+word;
}
return prev;
}

public:
vector<string> wordBreak(string s, unordered_set<string>& dict) {
if(m.count(s)) return m[s]; //take from memory
vector<string> result;
if(dict.count(s)){ //a whole string is a word
result.push_back(s);
}
for(int i=1;i<s.size();++i){
string word=s.substr(i);
if(dict.count(word)){
string rem=s.substr(0,i);
vector<string> prev=combine(word,wordBreak(rem,dict));
result.insert(result.end(),prev.begin(), prev.end());
}
}
m[s]=result; //memorize
return result;
}
};

方法二:

https://discuss.leetcode.com/topic/35762/9-lines-python-10-lines-c

9 lines Python, 10 lines C++

1
2
3
4
5
6
7
8
9
10
11
12
vector<string> wordBreak(string s, unordered_set<string>& wordDict) {
unordered_map<int, vector<string>> memo {{s.size(), {""}}};
function<vector<string>(int)> sentences = [&](int i) {
if (!memo.count(i))
for (int j=i+1; j<=s.size(); j++)
if (wordDict.count(s.substr(i, j-i)))
for (string tail : sentences(j))
memo[i].push_back(s.substr(i, j-i) + (tail=="" ? "" : ' ' + tail));
return memo[i];
};
return sentences(0);
}

方法三:

我的代码实现:

超时了

“aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa”
[“a”,”aa”,”aaa”,”aaaa”,”aaaaa”,”aaaaaa”,”aaaaaaa”,”aaaaaaaa”,”aaaaaaaaa”,”aaaaaaaaaa”]

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
class Solution {
public:
vector<string> wordBreak(string s, vector<string>& wordDict) {
int n = s.size();
unordered_set<string> wordSet(wordDict.begin(), wordDict.end());
unordered_map<int, vector<vector<string>>> memo;
vector<vector<string>> tmp{vector<vector<string>>(1, vector<string>(1, ""))};
memo[0] = tmp;
for(int i=1; i<=n; i++){
// 严格区分 ++ --
vector<vector<string>> v;
bool flag = false;
for(int j=i-1; j>=0; j--){
string word = s.substr(j, i-j);
if(wordSet.find(word)!=wordSet.end() && memo.find(j)!=memo.end()){
flag = true;
for(auto cur:memo[j]){
cur.push_back(word);
v.push_back(cur);
}
}
}
if(flag)
memo[i] = v;
}
vector<string> res;
for(int i=0; i<memo[n].size(); i++){
string t = memo[n][i][1];
for(int j=2; j<memo[n][i].size(); j++)
t += " " + memo[n][i][j];
res.push_back(t);
}
return res;
}
};

python


https://discuss.leetcode.com/topic/35762/9-lines-python-10-lines-c

9 lines Python, 10 lines C++

1
2
3
4
5
6
7
8
9
10
def wordBreak(self, s, wordDict):
memo = {len(s): ['']}
def sentences(i):
if i not in memo:
memo[i] = [s[i:j] + (tail and ' ' + tail)
for j in range(i+1, len(s)+1)
if s[i:j] in wordDict
for tail in sentences(j)]
return memo[i]
return sentences(0)

java


https://discuss.leetcode.com/topic/27855/my-concise-java-solution-based-on-memorized-dfs

My concise JAVA solution based on memorized DFS

18ms, 34.39%, Jan.13 2017

Explanation

Using DFS directly will lead to TLE, so I just used HashMap to save the previous results to prune duplicated branches, as the following:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
public class Solution {
public List<String> wordBreak(String s, List<String> wordDict) {
return DFS(s, wordDict, new HashMap<String, LinkedList<String>>());
}

// DFS function returns an array including all substrings derived from s.
List<String> DFS(String s, List<String> wordDict, HashMap<String, LinkedList<String>>map) {
if (map.containsKey(s))
return map.get(s);

LinkedList<String>res = new LinkedList<String>();
if (s.length() == 0) {
res.add("");
return res;
}
for (String word : wordDict) {
if (s.startsWith(word)) {
List<String>sublist = DFS(s.substring(word.length()), wordDict, map);
for (String sub : sublist)
res.add(word + (sub.isEmpty() ? "" : " ") + sub);
}
}
map.put(s, res);
return res;
}
}

Brilliant idea, also can be simplified like this.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
public class Solution {
HashMap<String, LinkedList<String>> map = new HashMap<String, LinkedList<String>>();

public List<String> wordBreak(String s, Set<String> wordDict) {
if (map.containsKey(s))
return map.get(s);

LinkedList<String> res = new LinkedList<String>();
if (s.length() == 0) {
res.add("");
return res;
}
for (String word : wordDict) {
if (s.startsWith(word)) {
List<String> sublist = wordBreak(s.substring(word.length()), wordDict);
for (String sub : sublist)
res.add(word + (sub.isEmpty() ? "" : " ") + sub);
}
}
map.put(s, res);
return res;
}
}

https://discuss.leetcode.com/topic/9837/my-concise-answer

My concise answer.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
public class Solution {
public List<String> wordBreak(String s, Set<String> dict) {
List<String> result = new ArrayList<String>();
for(int j = s.length() - 1; j >= 0; j--){
if(dict.contains(s.substring(j)))
break;
else{
if(j == 0)
return result;
}
}
for(int i = 0; i < s.length()-1; i++)
{
if(dict.contains(s.substring(0,i+1)))
{
List<String> strs = wordBreak(s.substring(i+1,s.length()),dict);
if(strs.size() != 0)
for(Iterator<String> it = strs.iterator();it.hasNext();)
{
result.add(s.substring(0,i+1)+" "+it.next());
}
}
}
if(dict.contains(s)) result.add(s);
return result;
}
}

https://discuss.leetcode.com/topic/8178/slightly-modified-dp-java-solution

Slightly modified DP Java solution

Hi guys!

There’s a lot of concern in other posts about “aaaa…aab” test case that causes TLE when we run through our string not in reverse but from start to end. I’ve thought a bit on how to add a tiny modification and make just the whole thing more effective, not only pass the TLE case.

The approach is the same as before: we loop through all possible prefixes checking if it in the dictionary and caching the results.

But just before jumping into recursion we could also check that the right reminder has a prefix from the dictionary, because if it hasn’t then there’s no sense in splitting the reminder into sub-strings. It’s just a linear check, which I think also could be optimized with some caching but even without optimization the solution is accepted. And also the code looks quite understandable.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
public class Solution {

private final Map<String, List<String>> cache = new HashMap<>();

private boolean containsSuffix(Set<String> dict, String str) {
for (int i = 0; i < str.length(); i++) {
if (dict.contains(str.substring(i))) return true;
}
return false;
}

public List<String> wordBreak(String s, Set<String> dict) {
if (cache.containsKey(s)) return cache.get(s);
List<String> result = new LinkedList<>();
if (dict.contains(s)) result.add(s);
for (int i = 1; i < s.length(); i++) {
String left = s.substring(0,i), right = s.substring(i);
if (dict.contains(left) && containsSuffix(dict, right)) {
for (String ss : wordBreak(right, dict)) {
result.add(left + " " + ss);
}
}
}
cache.put(s, result);
return result;
}
}

https://discuss.leetcode.com/topic/39833/java-6ms-simple-solution-beating-88

Java 6ms simple solution beating 88%

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
public class Solution {
HashMap<Integer, List<String>> dp = new HashMap<>();

public List<String> wordBreak(String s, Set<String> wordDict) {
int maxLength = -1;
for(String ss : wordDict) maxLength = Math.max(maxLength, ss.length());
return addSpaces(s, wordDict, 0, maxLength);
}

private List<String> addSpaces(String s, Set<String> wordDict, int start, int max){
List<String> words = new ArrayList<>();
if(start == s.length()) {
words.add("");
return words;
}
for(int i = start + 1; i <= max + start && i <= s.length(); i++){
String temp = s.substring(start, i);
if(wordDict.contains(temp)){
List<String> ll;
if(dp.containsKey(i)) ll = dp.get(i);
else ll = addSpaces(s, wordDict, i, max);
for(String ss : ll) words.add(temp + (ss.equals("") ? "" : " ") + ss);
}

}
dp.put(start, words);
return words;
}
}

https://discuss.leetcode.com/topic/3495/my-dp-solution-in-java

My DP solution in JAVA

Basically my idea is the following:

  1. Scan the the string from the tail
  2. Build possible solution for the current index based on DP results
  3. Return the solution when index==0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
public class Solution {
public List<String> wordBreak(String s, Set<String> dict) {
Map<Integer, List<String>> validMap = new HashMap<Integer, List<String>>();

// initialize the valid values
List<String> l = new ArrayList<String>();
l.add("");
validMap.put(s.length(), l);

// generate solutions from the end
for(int i = s.length() - 1; i >= 0; i--) {
List<String> values = new ArrayList<String>();
for(int j = i + 1; j <= s.length(); j++) {
if (dict.contains(s.substring(i, j))) {
for(String word : validMap.get(j)) {
values.add(s.substring(i, j) + (word.isEmpty() ? "" : " ") + word);
}
}
}
validMap.put(i, values);
}
return validMap.get(0);
}
}

https://discuss.leetcode.com/topic/34260/java-dp-dfs-memoization-dfs-and-dp-pruning-solutions-with-analysis

Java DP+DFS, Memoization+DFS, and DP Pruning Solutions with Analysis

I’ve been struggling with this problem for a long time, and I’d love to share three different strategies I have tried to solve it. All of them are ACed.

Method 1: DP + DFS. Very similar to Word Break I, but instead of using a boolean dp array, I used an array of Lists to maintain all of the valid start positions for every end position. Then just do classic backtracking to find all solutions. The time complexity is O(n*m) + O(n * number of solutions), where n is the length of the input string, m is the length of the longest word in the dictionary. The run time was 6ms. It is very efficient because DP is used to find out all the valid answers, and no time is wasted on doing the backtracking.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
public List<String> wordBreak(String s, Set<String> wordDict) {
List<Integer>[] starts = new List[s.length() + 1]; // valid start positions
starts[0] = new ArrayList<Integer>();

int maxLen = getMaxLen(wordDict);
for (int i = 1; i <= s.length(); i++) {
for (int j = i - 1; j >= i - maxLen && j >= 0; j--) {
if (starts[j] == null) continue;
String word = s.substring(j, i);
if (wordDict.contains(word)) {
if (starts[i] == null) {
starts[i] = new ArrayList<Integer>();
}
starts[i].add(j);
}
}
}

List<String> rst = new ArrayList<>();
if (starts[s.length()] == null) {
return rst;
}

dfs(rst, "", s, starts, s.length());
return rst;
}


private void dfs(List<String> rst, String path, String s, List<Integer>[] starts, int end) {
if (end == 0) {
rst.add(path.substring(1));
return;
}

for (Integer start: starts[end]) {
String word = s.substring(start, end);
dfs(rst, " " + word + path, s, starts, start);
}
}

private int getMaxLen(Set<String> wordDict) {
int max = 0;
for (String s : wordDict) {
max = Math.max(max, s.length());
}
return max;
}

Method 2: Memoization + Backtracking. Before I came up with Method 1, I also tried using a HashMap to memoize all the possible strings that can be formed starting from index i. I referred to this post from @Pixel_
The time complexity is O(len(wordDict) ^ len(s / minWordLenInDict)) as @Pixel_ mentioned. The space complexity would be larger than other methods though. Here is my code:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
public List<String> wordBreak(String s, Set<String> wordDict) {
HashMap<Integer, List<String>> memo = new HashMap<>(); // <Starting index, rst list>
return dfs(s, 0, wordDict, memo);
}

private List<String> dfs(String s, int start, Set<String> dict, HashMap<Integer, List<String>> memo) {
if (memo.containsKey(start)) {
return memo.get(start);
}

List<String> rst = new ArrayList<>();
if (start == s.length()) {
rst.add("");
return rst;
}

String curr = s.substring(start);
for (String word: dict) {
if (curr.startsWith(word)) {
List<String> sublist = dfs(s, start + word.length(), dict, memo);
for (String sub : sublist) {
rst.add(word + (sub.isEmpty() ? "" : " ") + sub);
}
}
}

memo.put(start, rst);
return rst;
}

Method 3: DP Prunning + Backtracking. My very first solution is like this: using a boolean array to memoize whether a substring starting from position i to the end is breakable. This works well for worst cases like: s = “aaaaaaaaaaaab”, dict = [“a”, “aa”, “aaa”, “aaaa”]. However, for cases like: s = “aaaaaaaaaaaaa”, dict = [“a”, “aa”, “aaa”, “aaaa”], the time complexity is still O(2^n). Here is the code:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
public List<String> wordBreak(String s, Set<String> wordDict) {
List<String> rst = new ArrayList<>();
if (s == null || s.length() == 0 || wordDict == null) {
return rst;
}

boolean[] canBreak = new boolean[s.length()];
Arrays.fill(canBreak, true);
StringBuilder sb = new StringBuilder();
dfs(rst, sb, s, wordDict, canBreak, 0);
return rst;
}

private void dfs(List<String> rst, StringBuilder sb, String s, Set<String> dict,
boolean[] canBreak, int start) {
if (start == s.length()) {
rst.add(sb.substring(1));
return;
}

if (!canBreak[start]) {
return;
}

for (int i = start + 1; i <= s.length(); i++) {
String word = s.substring(start, i);
if (!dict.contains(word)) continue;

int sbBeforeAdd = sb.length();
sb.append(" " + word);

int rstBeforeDFS = rst.size();
dfs(rst, sb, s, dict, canBreak, i);
if (rst.size() == rstBeforeDFS) {
canBreak[i] = false;
}
sb.delete(sbBeforeAdd, sb.length());
}
}

private int getMaxLen(Set<String> wordDict) {
int max = 0;
for (String s : wordDict) {
max = Math.max(max, s.length());
}
return max;
}

谢谢你,可爱的朋友。