这是用String本身的方法实现(es6的新增方法不太熟悉,基本上es5的);
- 使用String的match方法
- 用正则匹配返回是的匹配到的字符串的数组
- 用string匹配,则返回一个类似
["a", index: 2, input: "dfas"]
- 没有匹配,则返回null
function getMostChar(str) {
//出现的字符
var most_char = '';
var char = '';
var char_arr = [];
//出现的次数
var most_count = 0;
var count = 0;
var count_arr = [];
var len = str.length;
if (len > 1) {
(function() {
for (var i = 0; i < len; i++) {
char = str[i];
var flag = char_arr.some(function(item, index) {
return item == char;
})
if (!flag) {
char_arr.push(char);
var pattern = new RegExp(char, 'g');
var result = str.match(pattern);
if (result) {
count = result.length;
}
count_arr.push(count);
}
if (most_count < count) {
most_count = count;
most_char = char;
}
}
})()
} else {
char_arr.push(str);
most_count = len;
most_char = str;
}
console.log('拥有的字符的种类:' + char_arr, ';出现最多次数的字符:' + most_char, ';出现的次数的数组:' + count_arr, ';出现最多次数:' + most_count);
}
- indexOf 和
do{}while()
- indexOf() 返回字符出现的第一次的位置,没有匹配则返回-1
- idnexOf(str,startpos) 的有两个参数
- str 匹配的字符
- startpos
{type number}
设置从字符串的开始匹配位置(包含这个startpos)
- do{}while() 错误的也得执行一次
function getMostChar(str) {
//出现的字符
var most_char = '';
var char = '';
var char_arr = [];
//出现的次数
var most_count = 0;
var count = 0;
var count_arr = [];
//字符串出现的位置
var pos = -1;
var len = str.length;
if (len > 1) {
(function() {
for (var i = 0; i < len; i++) {
char = str[i];
var flag = char_arr.some(function(item, index) {
return item == char;
})
if (!flag) {
char_arr.push(char);
do {
count++;
pos = str.indexOf(char, pos+1);
} while(pos>-1)
if(i===0){
count = count-1;
}
count_arr.push(count);
}
if(most_count < count){
most_count = count;
most_char = char;
}
//重置为下次遍历做准备
count = 0;
// 已经检测过不在检测,对pos设置为当前位置
pos = i+1;
}
})()
} else {
char_arr.push(str);
most_count = len;
most_char = str;
}
console.log('pos: ' + pos, ';拥有的字符的种类:' + char_arr, ';出现最多次数的字符:' + most_char, ';出现的次数的数组:' + count_arr, ';出现最多次数:' + most_count);
}
while 类似
function getMostChar(str) {
//出现的字符
var most_char = '';
var char = '';
var char_arr = [];
//出现的次数
var most_count = 0;
var count = 0;
var count_arr = [];
//字符串出现的位置
var pos = 0;
var len = str.length;
if (len > 1) {
(function() {
for (var i = 0; i < len; i++) {
char = str[i];
var flag = char_arr.some(function(item, index) {
return item == char;
})
if (!flag) {
char_arr.push(char);
while(pos>-1) {
count++;
pos = str.indexOf(char, pos+1);
}
count_arr.push(count);
}
if(most_count < count){
most_count = count;
most_char = char;
}
//重置为下次遍历做准备
count = 0;
pos = i+1;
}
})()
} else {
char_arr.push(str);
most_count = len;
most_char = str;
}
console.log('pos: ' + pos, ';拥有的字符的种类:' + char_arr, ';出现最多次数的字符:' + most_char, ';出现的次数的数组:' + count_arr, ';出现最多次数:' + most_count);
}
- 用search 方法实现,其实逻辑同indexOf是一样的。只不过要利用字符串的截取方法。
function getMostChar(str) {
//出现的字符
var most_char = '';
var char = '';
var char_arr = [];
//出现的次数
var most_count = 0;
var count = 0;
var count_arr = [];
//字符串出现的位置
var pos = 0;
var sub_str = '';
var sub_str_pos = 0;
var len = str.length;
if (len > 1) {
(function() {
for (var i = 0; i < len; i++) {
char = str[i];
var flag = char_arr.some(function(item, index) {
return item == char;
})
if (!flag) {
char_arr.push(char);
var pattern = new RegExp(char, 'g');
while (sub_str_pos > -1) {
count++;
sub_str = str.slice(pos+1);
sub_str_pos = sub_str.search(pattern);
pos = (pos + 1) + sub_str_pos;
}
count_arr.push(count);
}
if (most_count < count) {
most_count = count;
most_char = char;
}
//重置为下次遍历做准备
pos = i + 1;
count = 0;
sub_str = '';
sub_str_pos = 0;
}
})()
} else {
char_arr.push(str);
most_count = len;
most_char = str;
}
console.log('pos: ' + pos, ';拥有的字符的种类:' + char_arr, ';出现最多次数的字符:' + most_char, ';出现的次数的数组:' + count_arr, ';出现最多次数:' + most_count);
}
- replace ,这是最最简单的,最好理解。
function getMostChar(str) {
//出现的字符
var most_char = '';
var char_arr = [];
//出现的次数
var most_count = 0;
var count = 0;
var count_arr = [];
var new_char = str[0];
var len = str.length;
while (str) { // 实际上是这样子的 while(str !== '')
char_arr.push(new_char);
var pattern = new RegExp(new_char, 'g');
str = str.replace(pattern, '');
count = len - str.length;
count_arr.push(count);
if (most_count < count) {
most_count = count;
most_char = new_char;
}
// 重置
len = str.length;
new_char = str[0];
}
console.log('拥有的字符的种类:' + char_arr, ';出现最多次数的字符:' + most_char, ';出现的次数的数组:' + count_arr, ';出现最多次数:' + most_count);
}
- split 这个方法使用起来实际上是把字符串变成数组,那就不是String方法。所以没做下去。
这个有什么好玩性了,可以检测作者是怎么用词,检测他用词。
例如,我找汪曾祺的《异秉》,测试:
var str = '小说的内容';
getMostCharSort(str);
function getMostCharSort(str) {
var kv_arr = [];
//出现的字符
var char = str[0];
var most_char = '';
var char_arr = [];
//出现的次数
var most_count = 0;
var count = 0;
var count_arr = [];
var len = str.length;
while (str) {
var pattern = new RegExp(char, 'g');
str = str.replace(pattern, '');
if (!(/\,|\。|\:|\“|\”|\?|\!|\s|\、|\—|\(|\)/ig.test(char))) {
char_arr.push(char);
count = len - str.length;
kv_arr.push({
count: count,
char: char
});
count_arr.push(count);
if (most_count < count) {
most_count = count;
most_char = char;
}
}
// 重置
len = str.length;
char = str[0];
}
console.log('拥有的字符的种类:' + char_arr);
console.log('出现最多次数的字符:' + most_char);
console.log('出现的次数的数组:' + count_arr, );
console.log('出现最多次数:' + most_count);
des(kv_arr)
show(kv_arr);
}
function show(arr) {
var table = document.createElement('table');
var table_html = '<tr><td>字符</td><td>次数</td></tr>'
+ '<tr><td>'+ '字的种类:' +'</td><td>' + arr.length + '</td></tr>';
arr.forEach(function(item, index) {
table_html = table_html + ('<tr ><td>' + item.char + '</td><td> ' + item.count + '</td></tr>');
})
table.innerHTML = table_html;
document.body.appendChild(table);
}
function des(arr){
for(var i = 0; i < arr.length; i++){
for(var j = arr.length-1; j > i; j--) {
if(arr[i].count <= arr[j].count) {
var des_count = arr[j].count;
var des_char = arr[j].char;
arr[j].count = arr[i].count;
arr[j].char = arr[i].char;
arr[i].count = des_count;
arr[i].char = des_char;
}
}
}
}
我们将算法改进,不在用冒泡。
function getMostCharSort(str) {
var kv_arr = [];
//出现的字符
var char = str[0];
//出现的次数
var most_count = 0;
var less_count = 0;
var center_count = 0;
var center_index = 0;
var count = 0;
var len = str.length;
while (str) { // 实际上是这样子的 while(str !== '')
var pattern = new RegExp(char, 'g');
str = str.replace(pattern, '');
// 去掉一些特殊的符号
if (!(/\,|\。|\:|\“|\”|\?|\!|\s|\、|\—|\(|\)/ig.test(char))) {
count = len - str.length;
if (count >= most_count) {
kv_arr.unshift({
count: count,
char: char
});
center_index ++;
center_count = kv_arr[center_index];
most_count = count;
if(kv_arr.length == 1){
less_count = count;
}
} else {
if (count > center_count) {
for (var j = center_index - 1; j > 0; j--) {
if (count < kv_arr[j]) {
kv_arr.splice(j, 0, {
count: count,
char: char
})
}
center_count++
center_count = kv_arr[center_index];
}
} else {
if (count <= less_count) {
kv_arr.push({
count: count,
char: char
})
less_count = count;
} else if(count > less_count) {
var less_index = kv_arr.length;
for (var i = center_index + 1; i < less_index; i++) {
if (count > kv_arr[i]) {
kv_arr.splice(i-1,0,{
count: count,
char: char
})
}
}
}
}
}
}
len = str.length;
char = str[0];
}
show(kv_arr);
}
function show(arr) {
var table = document.createElement('table');
var table_html = '<tr><td>字符</td><td>次数</td></tr>' +
'<tr><td>' + '字的种类:' + '</td><td>' + arr.length + '</td></tr>';
arr.forEach(function(item, index) {
table_html = table_html + ('<tr ><td>' + item.char + '</td><td> ' + item.count + '</td></tr>');
})
table.innerHTML = table_html;
document.body.appendChild(table);
}