在处理文本、代码等过程中,我们进程需要进行单词的获取,此处介绍一个简单的方式:
遍历输入字符串,遇到az、AZ、0~9、_等,就认为是单词,其它情况则忽略;
#coding=utf-8
class getwords :
# members
basechareters = ['a', 'b','c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',\
'A', 'B','C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',\
'0', '1','2', '3','4', '5','6', '7','8', '9',\
'_']
words = []
# functions
def getWords(self, contents):
self.words = []
oneword = ''
for c in contents:
if c in self.basechareters:
oneword += c
else:
if len(oneword) > 0:
self.words.append(oneword)
oneword = ''
调用:
content = 'def __init__(self, type)'
gw = getwords.getwords()
gw.getWords(content)
输入:
def __init__(self, type)
输出:
['def'、'__init__'、'self'、'type']