Python Cheatsheet: Regular Expression

re module
HTML tags
re.findall()
Substitute String
Full

re module

Python re module provides regular expression functions.

# -*- coding: UTF-8 -*-
import re

# re.match
print(re.match('www', 'www.runoob.com').span())
print(re.match('com', 'www.runoob.com'))

# re.search
## partner
key = r"<html><body><h1>hello world<h1></body></html>"
p1 = r"(?<=<h1>).+?(?=<h1>)"## regular expression
pattern1 = re.compile(p1)
match1 = re.search(pattern1, key)
print match1.group(0)

# re.match means if the string is not match the pattern at the begining, then fail, return None
# re.search will search the whole string to find all


#re.sub to replace the match part
phone = "2004-959-559 # 这是一个国外电话号码"

# delete python comment
num = re.sub(r'#.*$', "", phone)
print "电话号码是: ", num

# delete -
num = re.sub(r'\D', "", phone)
print "电话号码是 : ", num

HTML tags

re.findall()

# split all string
source = "Hello World Ker HAHA"
print re.findall('[\w]+', source)

import urllib
s = urllib.urlopen('https://www.python.org')
html = s.read()
s.close()
print re.findall('<[^/>][^>]*>', html)[0:2]

Substitute String

# Substitute String
res = "1a2b3c"
print re.sub(r'[a-z]',' ', res)
# substitute with group reference
date = r'2016-01-01'
print re.sub(r'(\d{4})-(\d{2})-(\d{2})',r'\2/\3/\1/',date)

Python Cheatsheet: Regular Expression

re module

HTML tags

re.findall()

Substitute String

Full

Comments