一 什么是集合和映射?
集合是高级数据结构的一种,它们的底层可以用多种方式来实现,就像之前的栈和队列一样。集合就像一个容器一样,我们这里实现的集合里边存储的数据是不能重复的,这样的数据结构可以帮助我们更加快速的进行统计。讲到这里是不是发现和我们之前的二分搜索树非常相似,其实这一次我们就会使用二分搜索树作为集合的底层实现。
二 二分搜索树实现集合Set
1⃣️ 实现集合Set接口
package com.mufeng.set;
/**
* Created by wb-yxk397023 on 2018/7/4.
*/
public interface Set<E> {
/**
* 向集合中添加元素(实现去重的操作)
* @param e
*/
void add(E e);
/**
* 从集合中删除元素
* @param e
*/
void remove(E e);
/**
* 查询集合中是否有相同元素
* @param e
* @return
*/
boolean contains(E e);
/**
* 获取集合中元素的个数
* @return
*/
int getSize();
/**
* 查看集合是否为空
* @return
*/
boolean isEmpty();
}
2⃣️ 实现具体的Set类
package com.mufeng.set;
import com.mufeng.binarySearchTree.BST;
/**
* Created by wb-yxk397023 on 2018/7/4.
*/
public class BSTSet<E extends Comparable<E>> implements Set<E> {
/**
* 引入二分搜索树
*/
private BST<E> bst;
/**
* 初始化bst
*/
public BSTSet(){
bst = new BST<>();
}
/**
* 向集合中添加元素(实现去重添加)
* @param e
*/
@Override
public void add(E e) {
bst.add(e);
}
/**
* 从集合中删除一个元素
* @param e
*/
@Override
public void remove(E e) {
bst.remove(e);
}
/**
* 查看集合中是否包含元素e
* @param e
* @return
*/
@Override
public boolean contains(E e) {
return bst.contains(e);
}
/**
* 获取集合中元素的个数
* @return
*/
@Override
public int getSize() {
return bst.size();
}
/**
* 查看集合是否为空
* @return
*/
@Override
public boolean isEmpty() {
return bst.isEmpty();
}
}
其实不难发现,在使用二分搜索树实现集合的时候,我们没有做多余的操作,因为我们自己实现的二分搜索树已经完全包含了集合的所有特性;
三 测试
1⃣️ 编写测试工具类
package com.mufeng.set;
import java.io.FileInputStream;
import java.util.ArrayList;
import java.util.Scanner;
import java.util.Locale;
import java.io.File;
import java.io.BufferedInputStream;
import java.io.IOException;
/**
* Created by wb-yxk397023 on 2018/7/4.
*/
public class FileOperation {
// 读取文件名称为filename中的内容,并将其中包含的所有词语放进words中
public static boolean readFile(String filename, ArrayList<String> words){
if (filename == null || words == null){
System.out.println("filename is null or words is null");
return false;
}
// 文件读取
Scanner scanner;
try {
File file = new File(filename);
if(file.exists()){
FileInputStream fis = new FileInputStream(file);
scanner = new Scanner(new BufferedInputStream(fis), "UTF-8");
scanner.useLocale(Locale.ENGLISH);
}
else
return false;
}
catch(IOException ioe){
System.out.println("Cannot open " + filename);
return false;
}
// 简单分词
// 这个分词方式相对简陋, 没有考虑很多文本处理中的特殊问题
// 在这里只做demo展示用
if (scanner.hasNextLine()) {
String contents = scanner.useDelimiter("\\A").next();
int start = firstCharacterIndex(contents, 0);
for (int i = start + 1; i <= contents.length(); )
if (i == contents.length() || !Character.isLetter(contents.charAt(i))) {
String word = contents.substring(start, i).toLowerCase();
words.add(word);
start = firstCharacterIndex(contents, i);
i = start + 1;
} else
I++;
}
return true;
}
// 寻找字符串s中,从start的位置开始的第一个字母字符的位置
private static int firstCharacterIndex(String s, int start){
for( int i = start ; i < s.length() ; i ++ )
if( Character.isLetter(s.charAt(i)) )
return I;
return s.length();
}
}
2⃣️ 引入测试文件
a-tale-of-two-cities.txt
pride-and-prejudice.txt
3⃣️ 编写测试程序
package com.mufeng;
import com.mufeng.set.BSTSet;
import com.mufeng.set.FileOperation;
import java.util.ArrayList;
public class Main {
public static void main(String[] args) {
System.out.println("Pride and Prejudice");
ArrayList<String> words1 = new ArrayList<>();
if(FileOperation.readFile("pride-and-prejudice.txt", words1)) {
System.out.println("Total words: " + words1.size());
BSTSet<String> set1 = new BSTSet<>();
for (String word : words1)
set1.add(word);
System.out.println("Total different words: " + set1.getSize());
}
System.out.println();
System.out.println("A Tale of Two Cities");
ArrayList<String> words2 = new ArrayList<>();
if(FileOperation.readFile("a-tale-of-two-cities.txt", words2)){
System.out.println("Total words: " + words2.size());
BSTSet<String> set2 = new BSTSet<>();
for(String word: words2)
set2.add(word);
System.out.println("Total different words: " + set2.getSize());
}
}
}
四 链表实现集合Set
package com.mufeng.set;
import com.mufeng.linkedList.LinkedList;
/**
* Created by wb-yxk397023 on 2018/7/4.
*/
public class LinkedListSet<E> implements Set<E> {
/**
* 引入链表
*/
private LinkedList<E> linkedList;
/**
* 初始化
*/
public LinkedListSet(){
linkedList = new LinkedList<>();
}
/**
* 向集合中添加元素(去重添加)
* @param e
*/
@Override
public void add(E e) {
// 实现去重添加元素e
if (!linkedList.contains(e)){
linkedList.addFrist(e);
}
}
/**
* 从集合中删除元素
* @param e
*/
@Override
public void remove(E e) {
linkedList.removeElement(e);
}
/**
* 查看集合是否包含元素e
* @param e
* @return
*/
@Override
public boolean contains(E e) {
return linkedList.contains(e);
}
/**
* 获取集合中元素的个数
* @return
*/
@Override
public int getSize() {
return linkedList.getSize();
}
/**
* 查看集合是否为空
* @return
*/
@Override
public boolean isEmpty() {
return linkedList.isEmpty();
}
}
五 测试
package com.mufeng;
import com.mufeng.set.FileOperation;
import com.mufeng.set.LinkedListSet;
import java.util.ArrayList;
public class Main {
public static void main(String[] args) {
System.out.println("Pride and Prejudice");
ArrayList<String> words1 = new ArrayList<>();
if(FileOperation.readFile("pride-and-prejudice.txt", words1)) {
System.out.println("Total words: " + words1.size());
LinkedListSet<String> set1 = new LinkedListSet<>();
for (String word : words1)
set1.add(word);
System.out.println("Total different words: " + set1.getSize());
}
System.out.println();
System.out.println("A Tale of Two Cities");
ArrayList<String> words2 = new ArrayList<>();
if(FileOperation.readFile("a-tale-of-two-cities.txt", words2)){
System.out.println("Total words: " + words2.size());
LinkedListSet<String> set2 = new LinkedListSet<>();
for(String word: words2)
set2.add(word);
System.out.println("Total different words: " + set2.getSize());
}
}
}
六 两种实现方式的性能对比
1⃣️ 测试类
package com.mufeng;
import com.mufeng.set.BSTSet;
import com.mufeng.set.FileOperation;
import com.mufeng.set.LinkedListSet;
import com.mufeng.set.Set;
import java.util.ArrayList;
public class Main {
private static double testSet(Set<String> set, String filename){
long startTime = System.nanoTime();
System.out.println(filename);
ArrayList<String> words = new ArrayList<>();
if(FileOperation.readFile(filename, words)) {
System.out.println("Total words: " + words.size());
for (String word : words)
set.add(word);
System.out.println("Total different words: " + set.getSize());
}
long endTime = System.nanoTime();
return (endTime - startTime) / 1000000000.0;
}
public static void main(String[] args) {
String filename = "pride-and-prejudice.txt";
BSTSet<String> bstSet = new BSTSet<>();
double time1 = testSet(bstSet, filename);
System.out.println("BST Set: " + time1 + " s");
System.out.println();
LinkedListSet<String> linkedListSet = new LinkedListSet<>();
double time2 = testSet(linkedListSet, filename);
System.out.println("Linked List Set: " + time2 + " s");
}
}