一、String内部的实际的数据存储结构是char数组,源码(1.8)
public final class String implements java.io.Serializable, Comparable<String>, CharSequence{
//用于存储字符串值
/** The value is used for character storage. */
private final charvalue[];
//缓存字符串的哈希值
/** Cache the hash code for the string */
private inthash; // Default to 0
}
二、String 五类构造函数:
public String(byte[] bytes) {
this(bytes, 0, bytes.length);
}
public String(char[] value) {
this.value = Arrays.copyOf(value, value, length);
}
public String(String original) {
this.value = original.value;
this.hash = original.hash;
}
public String(StringBuffer buffer) {
synchronized (buffer) {
this.value = Arrays.copyOf(buffer.getValue(), buffer.length());
}
}
public String StringBuilder builder) {
this.value = Arrays.copyOf(builder.getValue(), builder.length());
}
三、equals方法:
public boolean equals(Object anObject) {
//对象与自身引用相同直接返回true
if (this == anObject) {
return true;
}
//对象如果不为String类型,直接返回false
if (anObject instanceof String) {
String anotherString = (String)anObject;
int n = value.length;
//如果对象的长度与本身的长度不一样,直接返回false
if (n == anotherString.value.length) {
char v1[] = value;
char v2[] = anotherString.value;
int i = 0;
//循环每个字符进行对比,完全相同则true,否则为false
while (n-- != 0) {
if (v1[i] != v2[i])
return false;
i++;
}
return true;
}
}
return false;
}
四、compareTo方法:
public int compareTo(String anotherString) {
int len1 = value.length;
int len2 = anotherString.value.length;
int lim = Math.min(len1, len2);
char v1[] = value;
char v2[] = anotherString.value;
int k = 0;
//从0开始,到长度最短的长度,逐字符比较
while (k < lim) {
char c1 = v1[k];
char c2 = v2[k];
//当两字符不相等时,用自身当前对比位置的字符ascii码-被比较字符串当前位置字符的ascii码
if (c1 != c2) {
return c1 - c2;
}
k++;
}
//如果短的完全包含在长的字符串中,则直接用本身的长度-被比较字符串的长度
return len1 - len2;
}
五、indexOf方法:
public int indexOf(String str) {
return indexOf(str, 0);
}
//重载的方法
public int indexOf(String str, int fromIndex) {
return indexOf(value, 0, value.length,str.value, 0, str.value.length, fromIndex);
}
//内部实际方法,仅供包内访问
staticint indexOf(char[] source, int sourceOffset, int sourceCount,char[] target, int targetOffset, int targetCount, int fromIndex) {
//如果开始下标大于等于源字符串总长度时,查找字符长度为0时返回源字符串长度,否则返回-1
if (fromIndex >= sourceCount) {
return (targetCount == 0 ? sourceCount : -1);
}
//开始下标小于0,则强制从0开始
if (fromIndex < 0) {
fromIndex = 0;
}
//查找字符串长度为0,则返回开始下标
if (targetCount == 0) {
return fromIndex;
}
//通过查找目标偏移位置找出第一个字符
char first = target[targetOffset];
//最大下标 = 源偏移位置 + (源长度 - 目标长度)
int max = sourceOffset + (sourceCount - targetCount);
//从起始下标开始到最大下标逐个字符查找
for (int i = sourceOffset + fromIndex; i <= max; i++) {
//起始下标不是第一个要找的字符,则先找到第一个字符
/* Look for first character. */
if (source[i] != first) {
while (++i <= max && source[i] != first);
}
//找到第一个字符后,开始找剩下的字符
/* Found first character, now look at the rest of v2 */
if (i <= max) {
int j = i + 1;
int end = j + targetCount - 1;
for (int k = targetOffset + 1; j < end && source[j] == target[k]; j++, k++);
if (j == end) {
/* Found whole string. */
return i - sourceOffset;
}
}
}
return -1;
}
六、contains方法:
public boolean contains(CharSequence s) {
//调用indexOf方法,如果>-1表示包含,否则不包含
return indexOf(s.toString()) > -1;
}
七、toLowerCase方法:
public String toLowerCase(Locale locale) {
//默认为 zh_CN
if (locale == null) {
throw new NullPointerException();
}
int firstUpper;
final int len = value.length;
//从第一个字符开始判断是否本身就为小写字符,把开头的小写字符串直接拷到目标result字符数组中
/* Now check if there are any characters that need to be changed. */
scan: {
for (firstUpper = 0 ; firstUpper < len; ) {
char c = value[firstUpper];
//判断字符是否在高代理区间, 常规字母不在高代理区间
if ((c >= Character.MIN_HIGH_SURROGATE)
&& (c <= Character.MAX_HIGH_SURROGATE)) {
int supplChar = codePointAt(firstUpper);
if (supplChar != Character.toLowerCase(supplChar)) {
break scan;
}
firstUpper += Character.charCount(supplChar);
} else {
if (c != Character.toLowerCase(c)) {
break scan;
}
firstUpper++;
}
}
return this;
}
char[] result = new char[len];
int resultOffset = 0; /* result may grow, so i+resultOffset
* is the write location in result */
/* Just copy the first few lowerCase characters. */
System.arraycopy(value, 0, result, 0, firstUpper);
String lang = locale.getLanguage();
boolean localeDependent =
(lang == "tr" || lang == "az" || lang == "lt");
char[] lowerCharArray;
int lowerChar;
int srcChar;
int srcCount;
for (int i = firstUpper; i < len; i += srcCount) {
srcChar = (int)value[i];
if ((char)srcChar >= Character.MIN_HIGH_SURROGATE
&& (char)srcChar <= Character.MAX_HIGH_SURROGATE) {
srcChar = codePointAt(i);
srcCount = Character.charCount(srcChar);
} else {
srcCount = 1;
}
if (localeDependent ||
srcChar == '\u03A3' || // GREEK CAPITAL LETTER SIGMA
srcChar == '\u0130') { // LATIN CAPITAL LETTER I WITH DOT ABOVE
lowerChar = ConditionalSpecialCasing.toLowerCaseEx(this, i, locale);
} else {
//调用Character的toLowerCase方法转换为小写字符
lowerChar = Character.toLowerCase(srcChar);
}
if ((lowerChar == Character.ERROR)
|| (lowerChar >= Character.MIN_SUPPLEMENTARY_CODE_POINT)) {
if (lowerChar == Character.ERROR) {
lowerCharArray =
ConditionalSpecialCasing.toLowerCaseCharArray(this, i, locale);
} else if (srcCount == 2) {
resultOffset += Character.toChars(lowerChar, result, i + resultOffset) - srcCount;
continue;
} else {
lowerCharArray = Character.toChars(lowerChar);
}
/* Grow result if needed */
int mapLen = lowerCharArray.length;
if (mapLen > srcCount) {
char[] result2 = new char[result.length + mapLen - srcCount];
System.arraycopy(result, 0, result2, 0, i + resultOffset);
result = result2;
}
for (int x = 0; x < mapLen; ++x) {
result[i + resultOffset + x] = lowerCharArray[x];
}
resultOffset += (mapLen - srcCount);
} else {
result[i + resultOffset] = (char)lowerChar;
}
}
return new String(result, 0, len + resultOffset);
}
八、length()方法:
public int length() {
//返回字符数组的长度,也就是多少个字符
return value.length;
}
九、trim()方法:
public String trim() {
int len = value.length;
int st = 0;
char[] val = value; /* avoid getfield opcode */
//从0开始判断字符ascii是否小于等于空格字符
while ((st < len) && (val[st] <= ' ')) {
st++;
}
//从最后一个字符开始判断字符ascii是否小于等于空格字符
while ((st < len) && (val[len - 1] <= ' ')) {
len--;
}
//如果前或后存在空白字符,则substring截取中间非空白字符串
return ((st > 0) || (len < value.length)) ? substring(st, len) : this;
}
十、replace()方法:
public String replace(char oldChar, char newChar) {
//若旧字符与新字符相同,直接返回原始字符串
if (oldChar != newChar) {
int len = value.length;
int i = -1;
char[] val = value; /* avoid getfield opcode */
//找到第一个需要替换的字符,因为不用变的字符可以直接拷贝到暂存字符数组
while (++i < len) {
if (val[i] == oldChar) {
break;
}
}
if (i < len) {
char buf[] = new char[len];
for (int j = 0; j < i; j++) {
buf[j] = val[j];
}
//从第1个需要替换字符开始到最后个字符,通过判断替换赋值给暂存字符数组
while (i < len) {
char c = val[i];
buf[i] = (c == oldChar) ? newChar : c;
i++;
}
//返回一个新的字符串
return new String(buf, true);
}
}
return this;
}
//当仅替换单个字符,一定要使用上面的方法,否则性能相差10位,2毫秒与0.2毫秒的区别
public String replace(CharSequence target, CharSequence replacement) {
//通过正则表达式替换
returnPattern.compile(target.toString(), Pattern.LITERAL).matcher(
this).replaceAll(Matcher.quoteReplacement(replacement.toString()));
}
//字符串替换,replaceAll 比 replace高
public String replaceAll(String regex, String replacement) {
returnPattern.compile(regex).matcher(this).replaceAll(replacement);
}
十一、split方法:
public String[] split(String regex) {
return split(regex, 0);
}
public String[] split(String regex, int limit) {
/* fastpath if the regex is a
(1)one-char String and this character is not one of the
RegEx's meta characters ".$|()[{^?*+\\", or
(2)two-char String and the first char is the backslash and
the second is not the ascii digit or ascii letter.
*/
char ch = 0;
if (((regex.value.length == 1 &&
".$|()[{^?*+\\".indexOf(ch = regex.charAt(0)) == -1) ||
(regex.length() == 2 &&
regex.charAt(0) == '\\' &&
(((ch = regex.charAt(1))-'0')|('9'-ch)) < 0 &&
((ch-'a')|('z'-ch)) < 0 &&
((ch-'A')|('Z'-ch)) < 0)) &&
(ch < Character.MIN_HIGH_SURROGATE ||
ch > Character.MAX_LOW_SURROGATE))
{
int off = 0;
int next = 0;
boolean limited = limit > 0;
//通过ArrayList暂存分隔后的字符串
ArrayList<String> list = new ArrayList<>();
while ((next = indexOf(ch, off)) != -1) {
if (!limited || list.size() < limit - 1) {
list.add(substring(off, next));
off = next + 1;
} else { // last one
//assert (list.size() == limit - 1);
list.add(substring(off, value.length));
off = value.length;
break;
}
}
//如果没有找到分隔符,则返回原始字符串
// If no match was found, return this
if (off == 0)
return new String[]{this};
// Add remaining segment
if (!limited || list.size() < limit)
list.add(substring(off, value.length));
// Construct result
int resultSize = list.size();
//如果limit参数为0,当分隔后最后一个字符串长度为0,则忽略掉
if (limit == 0) {
while (resultSize > 0 && list.get(resultSize - 1).length() == 0) {
resultSize--;
}
}
String[] result = new String[resultSize];
return list.subList(0, resultSize).toArray(result);
}
//当分隔表达式不是一个字符或正则表达式开头,则直接使用Pattern的split方法进行分隔
returnPattern.compile(regex).split(this, limit);
}
十二、startWith方法:
public boolean startsWith(String prefix, int toffset) {
char ta[] = value;
int to = toffset;
char pa[] = prefix.value;
int po = 0;
int pc = prefix.value.length;
// Note: toffset might be near -1>>>1.
if ((toffset < 0) || (toffset > value.length - pc)) {
return false;
}
//循环判断前缀字符个数次,跳过toffset个字符之后,挨个prefix字符与字符串字符相比,不相等则返回false
while (--pc >= 0) {
if (ta[to++] != pa[po++]) {
return false;
}
}
return true;
}
十三、join方法(jdk1.8才开始有此方法):
public static String join(CharSequence delimiter, CharSequence... elements) {
//判断delimiter,若为null则抛出空指针异常
Objects.requireNonNull(delimiter);
Objects.requireNonNull(elements);
// Number of elements not likely worth Arrays.stream overhead.
StringJoiner joiner = new StringJoiner(delimiter);
//StringJoiner内部通过一个prefix, StringBuilder,suffix组成,add的时候实际是append(prefix).append(element), toString()的时候添加suffix
for (CharSequence cs: elements) {
joiner.add(cs);
}
return joiner.toString();
}