30-Python3 正则表达式
''' re.match函数 ''' import re print(re.match('www','www.runoob.com').span()) print(re.match('ww','www.runoob.com').span()) print(re.match('w','www.runoob.com').span()) print(re.match('com','www.runoob.com'))line = 'Cats are smarter than dogs' macthObj = re.match(r'(.*)are(.*?).*',line,re.M|re.I) if macthObj:print('matchObj.group():',macthObj.group())print('matchObj.group(1):',macthObj.group(1))print('matchObj.group(2):',macthObj.group(2)) else:print('No match')''' re.search方法 ''' import re print(re.search('www','www.runoob.com').span()) print(re.search('com','www.runoob.com').span())line1 = 'Cats are smarter than dogs' searchObj = re.search(r'(.*)are(.*?).*',line1,re.M|re.I) if searchObj:print('searchObj.group():',searchObj.group())print('searchObj.group(1):',searchObj.group(1))print('searchObj.group(2):',searchObj.group(2)) else:print('Nothing found!')''' re.match和re.search的区别 ''' line2 = 'Cats are smarter than dogs' matchObj = re.match(r'dogs',line2,re.M|re.I) if matchObj:print('re.match:',matchObj.group()) else:print('no match1') matchObj = re.search(r'dogs',line2,re.M|re.I) if matchObj:print('re.search:',matchObj.group()) else:print('no match2') ''' 检索和替换 ''' phone = '2004-959-559 #这是一个电话号码' ##删除注释 num = re.sub(r'#.*$','',phone) print('电话号码1:',num) ##移除非数字的内容 num = re.sub(r'\D','',phone) print('电话号码2:',num)''' repl参数是一个函数 '''#将匹配到到数字乘以2 def double(matched):value = int(matched.group('value'))return str(value*2)s = 'QAA342RFDFD56FGFG' print(re.sub('(?P<value>\d+)',double,s))''' compile函数 ''' pattern1 = re.compile(r'\d+') m = pattern1.match('one12twothree34four') m1 = pattern1.search('one12twothree34four') print('m',m) print('m1',m1)m2 =pattern1.match('one12twothree34four',2,10) print('m2',m2)m3 =pattern1.match('one12twothree34four',3,10) print('m3:',m3)print('m3.group():',m3.group()) print('m3.start():',m3.start()) print('m3.end():',m3.end()) print('m3.span():',m3.span())pattern2 = re.compile(r'([a-z]+)([a-z])',re.I) #re.I 表示忽略大小写 mm = pattern2.match('Hello World Wide Web') print('mm:',mm)print('mm.group(0):',mm.group(0)) print('mm.span(0):',mm.span(0)) print('mm.group(1):',mm.group(1)) print('mm.span(1):',mm.span(1)) print('mm.group(2):',mm.group(2)) print('mm.span(2):',mm.span(2)) print('mm.groups():',mm.groups()) # print('mm.group(3):',mm.group(3))''' findall ''' # 在字符串中找到正则表达式所匹配的所有子串,并返回一个列表,如果没有找到匹配的,则返回空列表。 # 注意: match 和 search 是匹配一次 findall 匹配所有。 # 语法格式为: # findall(string[, pos[, endpos]]) pattern3 = re.compile(r'\d+') result1 = pattern3.findall('runoob 123 google 456') result2 = pattern3.findall('run88oob123google456',0,10) print('result1:',result1) print('result2:',result2)''' re.finditer:找到正则表达式所匹配的所有子串,并把他们作为一个迭代器返回 ''' it = re.finditer(r'\d+','qaz12edc34edc4rfv56') for match in it:print(match.group()) ''' re.split ''' print('1:',re.split('\W+','runoob,runoob,runoob.'))print('2:',re.split('(\W+)','runoob,runoob,runoob.')) print('3',re.split('\W+','runoob,runoob,runoob.',1))''' 正则表达式对象 '''''' 正则表达式修饰符-可选标志 '''''' 正则表达式模式 '''''' 正则表达式实例 '''