Python入门:Regular Expressions

Code

## 1. Regular expressions ##

strings = ["data science", "big data", "metadata"]
regex = "data"

## 2. Special characters ##

strings = ["bat", "robotics", "megabyte"]
regex = "b.t"

## 3. Beginnings and ends of string ##

strings = ["better not put too much", "butter in the", "batter"]
bad_string = "We also wouldn't want it to be bitter"
regex = "^b.tter"

## 5. Reading and printing the dataset ##

import csv
posts_with_header = csv.reader(open("askreddit_2015.csv", "r"))
posts = list(posts_with_header)[1:]
for i in range(0, 10):
    print(posts[i])

## 6. Testing for matches ##

import re

of_reddit_count = 0

for post in posts:
    post_string = post[0]
    if re.search("of Reddit", post_string):
        of_reddit_count = of_reddit_count + 1

## 7. Accounting for inconsistencies ##

import re

of_reddit_count = 0
for row in posts:
    if re.search("of [Rr]eddit", row[0]) != None:
        of_reddit_count += 1

## 8. Escaping special characters ##

import re

serious_count = 0

for row in posts:
    if re.search("\[Serious\]", row[0]) != None:
        serious_count += 1

## 9. Refining the search ##

import re

serious_count = 0
for row in posts:
    if re.search("\[[Ss]erious\]", row[0]) != None:
        serious_count += 1

## 10. More inconsistency ##

import re

serious_count = 0
for row in posts:
    if re.search("[\[\(][Ss]erious[\]\)]", row[0]) != None:
        serious_count += 1

## 11. Multiple regular expressions ##

import re

serious_start_count = 0
serious_end_count = 0
serious_count_final = 0

for post in posts:
    post_string = post[0]
    if re.search("^[\[\(][Ss]erious[\]\)]", post_string) != None:
        serious_start_count += 1
    if re.search("[\[\(][Ss]erious[\]\)]$", post_string) != None:
        serious_end_count += 1
    if re.search("^[\[\(][Ss]erious[\]\)]|[\[\(][Ss]erious[\]\)]$", post_string) != None:
        serious_count_final += 1

## 12. Substituting strings ##

import re
posts_new = []
for row in posts:
    row[0] = re.sub("[\[\(][Ss]erious[\]\)]", "[Serious]", row[0])
    posts_new.append(row)

## 13. Matching years ##

import re

year_strings = []

for string in strings:
    if re.search("[1-2][0-9][0-9][0-9]", string) != None:
        year_strings.append(string)

## 14. Repeating regular expressions ##

import re

year_strings = []

for string in strings:
    if re.search("[1-2][0-9]{3}", string) != None:
        year_strings.append(string)

## 15. Extracting years ##

import re

years = re.findall("[1-2][0-9]{3}", years_string)
最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容

  • error code(错误代码)=0是操作成功完成。error code(错误代码)=1是功能错误。error c...
    Heikki_阅读 3,457评论 1 9
  • error code(错误代码)=2000是无效的像素格式。error code(错误代码)=2001是指定的驱动...
    Heikki_阅读 1,874评论 0 4
  • intKEYCODE_0Key code constant: '0' key. intKEYCODE_1Key c...
    几千里也阅读 1,669评论 0 1
  • 8086汇编 本笔记是笔者观看小甲鱼老师(鱼C论坛)《零基础入门学习汇编语言》系列视频的笔记,在此感谢他和像他一样...
    Gibbs基阅读 37,422评论 8 114
  • “小时候,幸福是件很简单的事。长大后,简单是件很幸福的事。”——某某某云 “妈妈,什么是幸福?” ...
    公民崔阅读 703评论 0 2