算法题--找出n元组

image.png

0. 链接

1. 题目

Given an array of strings, group anagrams together.

Example:

Input: ["eat", "tea", "tan", "ate", "nat", "bat"],
Output:
[
  ["ate","eat","tea"],
  ["nat","tan"],
  ["bat"]
]

Note:

All inputs will be in lowercase.
The order of your output does not matter.

2. 思路1：将每个单词升序排列+通过键值收集n元组

3. 代码

# coding:utf8
from typing import List
import collections
import time
from functools import wraps
import copy


class Solution:
    def groupAnagrams(self, strs: List[str]) -> List[List[str]]:
        ans = collections.defaultdict(list)
        for i in strs:
            ans[tuple(sorted(i))].append(i)
        return list(ans.values())


input_1 = ['eat', 'tea', 'tan', 'ate', 'nat', 'bat']
input_2 = ['hello', 'elloh', 'hello', 'world', 'dorlw']


def cost_time_100(func):
    @wraps(func)
    def _log(*args, **kwargs):
        time_start = time.time()
        count = 1000
        while count > 0:
            if count == 1:
                results = func(*args, **kwargs)
                for result in results:
                    print(result)
            else:
                func(*args, *kwargs)
            count -= 1
        print('耗时{:.4f}秒\n'.format(time.time() - time_start))

    return _log


@cost_time_100
def test(solution, in_1, in_2):
    results = list()
    results.append(solution.groupAnagrams(copy.deepcopy(in_1)))
    results.append(solution.groupAnagrams(copy.deepcopy(in_2)))
    return results


test(Solution(), input_1, input_2)

输出结果

[['eat', 'tea', 'ate'], ['tan', 'nat'], ['bat']]
[['hello', 'elloh', 'hello'], ['world', 'dorlw']]
耗时0.0270秒

4. 结果

image.png

5. 思路2: 哈希+分组

先对每个单词进行哈希, 保证

hash('eat') == hash('ate')

其中哈希算法如下:
- 统计['a', 'b', 'c', ..., 'z']中每个字母的出现次数,
- 哈希结果长度固定为26, 每个位置上的值即为该字母出现次数
如

hash('eeat') = '10002000000000000001000000'

再从左到右根据哈希值，在字典的帮助下，进行分组

6. 代码

# coding:utf8
from typing import List
import collections
import time
from functools import wraps
import copy


class Solution1:
    def groupAnagrams(self, strs: List[str]) -> List[List[str]]:
        code_dict = dict()

        for i, each_str in enumerate(strs):
            hash = self.str_hash(each_str)
            if hash not in code_dict:
                code_dict[hash] = list()
            code_dict[hash].append(each_str)

        rtn_list = list()
        for each_str_list in code_dict.values():
            rtn_list.append(each_str_list)

        return rtn_list

    def str_hash(self, each_str):
        ch_array = [0 for _ in range(26)]
        for ch in each_str:
            ch_ord = ord(ch) - ord('a')
            ch_array[ch_ord] += 1
        return tuple(ch_array)


input_1 = ['eat', 'tea', 'tan', 'ate', 'nat', 'bat']
input_2 = ['hello', 'elloh', 'hello', 'world', 'dorlw']


def cost_time_100(func):
    @wraps(func)
    def _log(*args, **kwargs):
        time_start = time.time()
        count = 1000
        while count > 0:
            if count == 1:
                results = func(*args, **kwargs)
                for result in results:
                    print(result)
            else:
                func(*args, *kwargs)
            count -= 1
        print('耗时{:.4f}秒\n'.format(time.time() - time_start))

    return _log


@cost_time_100
def test(solution, in_1, in_2):
    results = list()
    results.append(solution.groupAnagrams(copy.deepcopy(in_1)))
    results.append(solution.groupAnagrams(copy.deepcopy(in_2)))
    return results


test(Solution1(), input_1, input_2)

输出结果

[['eat', 'tea', 'ate'], ['tan', 'nat'], ['bat']]
[['hello', 'elloh', 'hello'], ['world', 'dorlw']]
耗时0.0600秒

7. 结果

image.png

8. 思路3: 哈希(利用素数)+分组

先对每个单词进行哈希, 保证

hash('eat') == hash('ate')

其中哈希算法如下:
- 定义一个素数组primes, 长度为26, 分别对应26个英文字母。对于一个单词的所有字母, 利用
hash *= primes[ord(ch) - ord('a')] % 1e12
计算出哈希值

再对原始列表从左到右根据哈希值，在字典的帮助下，进行分组

9. 代码

# coding:utf8
from typing import List
import collections
import time
from functools import wraps
import copy


class Solution2:
    def groupAnagrams(self, strs: List[str]) -> List[List[str]]:
        code_dict = dict()

        for i, each_str in enumerate(strs):
            hash = self.str_hash(each_str)
            if hash not in code_dict:
                code_dict[hash] = list()
            code_dict[hash].append(each_str)

        return list(code_dict.values())

    def str_hash(self, each_str):
        constant = 1e12
        primes = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53,
                  59, 61, 67, 71, 73, 79, 83, 89, 97, 101]
        hash = 1
        for ch in each_str:
            hash *= primes[ord(ch) - ord('a')]
            if hash > constant:
                hash %= constant
        return hash


input_1 = ['eat', 'tea', 'tan', 'ate', 'nat', 'bat']
input_2 = ['hello', 'elloh', 'hello', 'world', 'dorlw']


def cost_time_100(func):
    @wraps(func)
    def _log(*args, **kwargs):
        time_start = time.time()
        count = 1000
        while count > 0:
            if count == 1:
                results = func(*args, **kwargs)
                for result in results:
                    print(result)
            else:
                func(*args, *kwargs)
            count -= 1
        print('耗时{:.4f}秒\n'.format(time.time() - time_start))

    return _log


@cost_time_100
def test(solution, in_1, in_2):
    results = list()
    results.append(solution.groupAnagrams(copy.deepcopy(in_1)))
    results.append(solution.groupAnagrams(copy.deepcopy(in_2)))
    return results


test(Solution2(), input_1, input_2)

输出结果

[['eat', 'tea', 'ate'], ['tan', 'nat'], ['bat']]
[['hello', 'elloh', 'hello'], ['world', 'dorlw']]
耗时0.0430秒

10. 结果

image.png

算法题--找出n元组

0. 链接

1. 题目

2. 思路1：将每个单词升序排列+通过键值收集n元组

3. 代码

4. 结果

5. 思路2: 哈希+分组

6. 代码

7. 结果

8. 思路3: 哈希(利用素数)+分组

9. 代码

10. 结果

推荐阅读更多精彩内容