出现最多的字符串

这是用String本身的方法实现（es6的新增方法不太熟悉，基本上es5的）;

使用String的match方法

用正则匹配返回是的匹配到的字符串的数组
用string匹配，则返回一个类似 ["a", index: 2, input: "dfas"]
没有匹配，则返回null

function getMostChar(str) {
  //出现的字符
  var most_char = '';
  var char = '';
  var char_arr = [];

  //出现的次数
  var most_count = 0;
  var count = 0;
  var count_arr = [];

  var len = str.length;
  if (len > 1) {
    (function() {
      for (var i = 0; i < len; i++) {

        char = str[i];

        var flag = char_arr.some(function(item, index) {
          return item == char;
        })

        if (!flag) {
          char_arr.push(char);

          var pattern = new RegExp(char, 'g');
          var result = str.match(pattern);
          if (result) {
            count = result.length;
          }
          
          count_arr.push(count);
        }

        if (most_count < count) {
          most_count = count;
          most_char = char;
        }
      }
    })()
  } else {
    char_arr.push(str);
    most_count = len;
    most_char = str;
  }
  console.log('拥有的字符的种类：' + char_arr, '；出现最多次数的字符：' + most_char, '；出现的次数的数组：' + count_arr, '；出现最多次数：' + most_count);
}

indexOf 和 do{}while()

indexOf() 返回字符出现的第一次的位置，没有匹配则返回-1
idnexOf(str,startpos) 的有两个参数
- str 匹配的字符
- startpos ｛type number｝ 设置从字符串的开始匹配位置（包含这个startpos）
do{}while() 错误的也得执行一次

function getMostChar(str) {
  //出现的字符
  var most_char = '';
  var char = '';
  var char_arr = [];

  //出现的次数
  var most_count = 0;
  var count = 0;
  var count_arr = [];

  //字符串出现的位置
  var pos = -1;

  var len = str.length;
  if (len > 1) {
    (function() {
      for (var i = 0; i < len; i++) {
        char = str[i];
        var flag = char_arr.some(function(item, index) {
          return item == char;
        })

        if (!flag) {
          char_arr.push(char);

          do {
              count++;
              pos = str.indexOf(char, pos+1);
          } while(pos>-1)
          if(i===0){
            count = count-1;
          }
          
          count_arr.push(count);
        }

        if(most_count < count){
          most_count = count;
          most_char = char;
        }

        //重置为下次遍历做准备
        count = 0;
        // 已经检测过不在检测，对pos设置为当前位置
        pos = i+1;
      }
    })()
  } else {
    char_arr.push(str);
    most_count = len;
    most_char = str;
  }
  console.log('pos: ' + pos, '；拥有的字符的种类：' + char_arr, '；出现最多次数的字符：' + most_char, '；出现的次数的数组：' + count_arr, '；出现最多次数：' + most_count);
}

while 类似

function getMostChar(str) {
  //出现的字符
  var most_char = '';
  var char = '';
  var char_arr = [];

  //出现的次数
  var most_count = 0;
  var count = 0;
  var count_arr = [];

  //字符串出现的位置
  var pos = 0;

  var len = str.length;
  if (len > 1) {
    (function() {
      for (var i = 0; i < len; i++) {
        char = str[i];
        var flag = char_arr.some(function(item, index) {
          return item == char;
        })

        if (!flag) {
          char_arr.push(char);

          while(pos>-1) {
              count++;
              pos = str.indexOf(char, pos+1);
          } 

          count_arr.push(count);
        }

        if(most_count < count){
          most_count = count;
          most_char = char;
        }

        //重置为下次遍历做准备
        count = 0;
        pos = i+1;
      }
    })()
  } else {
    char_arr.push(str);
    most_count = len;
    most_char = str;
  }
  console.log('pos: ' + pos, '；拥有的字符的种类：' + char_arr, '；出现最多次数的字符：' + most_char, '；出现的次数的数组：' + count_arr, '；出现最多次数：' + most_count);
}

用search 方法实现，其实逻辑同indexOf是一样的。只不过要利用字符串的截取方法。

function getMostChar(str) {
  //出现的字符
  var most_char = '';
  var char = '';
  var char_arr = [];

  //出现的次数
  var most_count = 0;
  var count = 0;
  var count_arr = [];

  //字符串出现的位置
  var pos = 0;
  var sub_str = '';
  var sub_str_pos = 0;

  var len = str.length;
  if (len > 1) {
    (function() {
      for (var i = 0; i < len; i++) {
        char = str[i];
        var flag = char_arr.some(function(item, index) {
          return item == char;
        })

        if (!flag) {
          char_arr.push(char);

          var pattern = new RegExp(char, 'g');

          while (sub_str_pos > -1) {
            count++;
            sub_str = str.slice(pos+1);
            sub_str_pos = sub_str.search(pattern);
            pos = (pos + 1) + sub_str_pos;
          }

          count_arr.push(count);
        }

        if (most_count < count) {
          most_count = count;
          most_char = char;
        }

        //重置为下次遍历做准备
        pos = i + 1;
        count = 0;
        sub_str = '';
        sub_str_pos = 0;
      }
    })()
  } else {
    char_arr.push(str);
    most_count = len;
    most_char = str;
  }
  console.log('pos: ' + pos, '；拥有的字符的种类：' + char_arr, '；出现最多次数的字符：' + most_char, '；出现的次数的数组：' + count_arr, '；出现最多次数：' + most_count);
}

replace ,这是最最简单的，最好理解。

function getMostChar(str) {
  //出现的字符
  var most_char = '';
  var char_arr = [];

  //出现的次数
  var most_count = 0;
  var count = 0;
  var count_arr = [];

  var new_char = str[0];
  var len = str.length;

  while (str) { // 实际上是这样子的 while(str !== '')
    char_arr.push(new_char);

    var pattern = new RegExp(new_char, 'g');
    str = str.replace(pattern, '');

    count = len - str.length;
    count_arr.push(count);
    if (most_count < count) {
      most_count = count;
      most_char = new_char;
    }
    // 重置
    len = str.length;
    new_char = str[0];
  }

  console.log('拥有的字符的种类：' + char_arr, '；出现最多次数的字符：' + most_char, '；出现的次数的数组：' + count_arr, '；出现最多次数：' + most_count);
}

split 这个方法使用起来实际上是把字符串变成数组，那就不是String方法。所以没做下去。

这个有什么好玩性了，可以检测作者是怎么用词，检测他用词。

例如，我找汪曾祺的《异秉》,测试：

var str = '小说的内容';

getMostCharSort(str);

function getMostCharSort(str) {
  var kv_arr = [];

  //出现的字符
  var char = str[0];
  var most_char = '';
  var char_arr = [];

  //出现的次数
  var most_count = 0;
  var count = 0;
  var count_arr = [];

  var len = str.length;

  while (str) { 

    var pattern = new RegExp(char, 'g');
    str = str.replace(pattern, '');

    if (!(/\，|\。|\：|\“|\”|\？|\！|\s|\、|\—|\（|\）/ig.test(char))) {

      char_arr.push(char);
      count = len - str.length;

      kv_arr.push({
        count: count,
        char: char
      });

      count_arr.push(count);
      if (most_count < count) {
        most_count = count;
        most_char = char;
      }
    }
    // 重置
    len = str.length;
    char = str[0];
  }

  console.log('拥有的字符的种类：' + char_arr);
  console.log('出现最多次数的字符：' + most_char);
  console.log('出现的次数的数组：' + count_arr, );
  console.log('出现最多次数：' + most_count);

  des(kv_arr)

  show(kv_arr);
}

function show(arr) {
  var table = document.createElement('table');
  var table_html = '<tr><td>字符</td><td>次数</td></tr>' 
    + '<tr><td>'+ '字的种类：' +'</td><td>' + arr.length + '</td></tr>';

  arr.forEach(function(item, index) {
    table_html = table_html + ('<tr ><td>' + item.char + '</td><td> ' + item.count + '</td></tr>');
  })

  table.innerHTML = table_html;
  document.body.appendChild(table);
}

function des(arr){
  for(var i = 0; i < arr.length; i++){
    for(var j = arr.length-1; j > i; j--) {
      if(arr[i].count <= arr[j].count) {

        var des_count = arr[j].count;
        var des_char = arr[j].char;

        arr[j].count = arr[i].count;
        arr[j].char = arr[i].char;

        arr[i].count = des_count;
        arr[i].char = des_char;
      }
    }
  }
}

我们将算法改进，不在用冒泡。

function getMostCharSort(str) {
  var kv_arr = [];

  //出现的字符
  var char = str[0];

  //出现的次数
  var most_count = 0;
  var less_count = 0;
  var center_count = 0;
  var center_index = 0;
  var count = 0;

  var len = str.length;

  while (str) { // 实际上是这样子的 while(str !== '')

    var pattern = new RegExp(char, 'g');
    str = str.replace(pattern, '');
    // 去掉一些特殊的符号
    if (!(/\，|\。|\：|\“|\”|\？|\！|\s|\、|\—|\（|\）/ig.test(char))) {
      count = len - str.length;

      if (count >= most_count) {
        kv_arr.unshift({
          count: count,
          char: char
        });

        center_index ++;
        center_count = kv_arr[center_index];

        most_count = count;
        if(kv_arr.length == 1){
          less_count = count;
        }
      } else {
        if (count > center_count) {
          for (var j = center_index - 1; j > 0; j--) {
            if (count < kv_arr[j]) {
              kv_arr.splice(j, 0, {
                count: count,
                char: char
              })
            }
            center_count++
            center_count = kv_arr[center_index];
          }
        } else {
          if (count <= less_count) {
            kv_arr.push({
              count: count,
              char: char
            })
            less_count = count;
          } else if(count > less_count) {
            var less_index = kv_arr.length;
            for (var i = center_index + 1; i < less_index; i++) {
              if (count > kv_arr[i]) {
                kv_arr.splice(i-1,0,{
                  count: count,
                  char: char
                })
              }
            }
          }
        }
      }
    }
    len = str.length;
    char = str[0];
  }

  show(kv_arr);
}

function show(arr) {
  var table = document.createElement('table');
  var table_html = '<tr><td>字符</td><td>次数</td></tr>' +
    '<tr><td>' + '字的种类：' + '</td><td>' + arr.length + '</td></tr>';

  arr.forEach(function(item, index) {
    table_html = table_html + ('<tr ><td>' + item.char + '</td><td> ' + item.count + '</td></tr>');
  })

  table.innerHTML = table_html;
  document.body.appendChild(table);
}

出现最多的字符串

出现最多的字符串

这个有什么好玩性了，可以检测作者是怎么用词，检测他用词。

相关阅读更多精彩内容

友情链接更多精彩内容