ECUST - 2022年下半年大二上学期外语学院python备考资料

#### 文件相关
```python
path1 = r"C:\windows\temp\readme.txt"
path2 = "C:\\windows\\temp\\readme.txt"
path3 = "c:/windows/temp/readme.txt"

def read_txt_to_list():                    # 把txt文件读取为列表
    with open(path1, 'r') as file:
        lines_list = file.readlines()
        # 注意列表读取的每个字符串元素的结尾都为\n
        for line in lines_list:
            line = lines_list.strip()  # 把尾部\n除去

for line in lines_list:        # 把每行读取为一个列表
            line = line.split(",")
            # line = line.split("\t")  # 把每行按符号分开
            # line = line.split(" ")

def write_list_to_txt():                   # 把列表写为txt文件
    with open(path1, 'w') as file:
        file.write(lines_list)

def accum(Dir,allFile):     # 批量合并txt文件 
    targetfile=glob.glob(pathname=Dir)
    print(targetfile)
    for file in targetfile:
        with open(file,'r',encoding='utf-8')as f:
            alltext=f.readlines()
            with open(allFile,'a',encoding='utf-8')as nf:
                nf.writelines(alltext)
#accum(r'review*.txt',r'allreviews.txt')

def read_txt_to_str_list():   # 将txt读取为用于词频统计的列表
    with open(path1, 'r') as file:
        lines_list = file.readlines()
        for line in lines_list:        # 把每行读取为一个列表
            line_str_list = line.split(",")
            # line = line.split("\t")  # 把每行按符号分开
            # line = line.split(" ")  
            str_list.append(line_str_list)
        # print(str_list)
        return str_list
```
#### 列表相关
```python
def func1():
    fruit_list = ['banana','orange','mango','lemon']

print(fruit_list.index("mango"))
    # print(fruit_list.index("nihao"))找不到则报错

fruit_list.insert(1,"apple") #在第二位增加apple
    print(fruit_list)

del(fruit_list[3])      #删除第四位
    print(fruit_list)

fruit_list[-1] = "pear" #最后一位
    print(fruit_list)

```
#### 字典相关
```python

def func1():    
   dic1 = {}
   s1,s2,s3,s4 = "alan","biden","candy","jacky"
   dic1["Alan"] = s1
   dic1["Biden"] = s2                 # 字典赋值
   dic1["Candy"] = s3                 # 字典赋值
   dic1.update({"Jacky":s4})
   print(dic1)
   del dic1["Alan"]                   # 删除字典某键值对
   print(dic1)
   for name,sentence in dic1.items():    # 读字典
      print(f"{name} said:{sentence}")   # name.title()首字母大写

def func2():            # 集合运算
   s1={'computer','python','language','intelligence','word','sentence'}
   s2={'sentence','text','corpus','token','word','computer'}
   print(s1&s2)         # 交集
   print(s1|s2)         # 并集
   print(s1-s2)         # 差集

```
#### 字符串相关
```python
def fun1():     # 统计不同类型字符个数
    str1 = "nihao woshizhanghengyu jinnian 18 sui !!! di_er_hang\n"
    digit_num,space_num,alpha_num = 0,0,0
    for c in str1:
        if c.isdigit():
            digit_num = digit_num + 1
        if c.isspace():
            space_num = space_num + 1
        if c.isalpha():
            alpha_num = alpha_num + 1
        rest_num = len(str1)-(digit_num+space_num+alpha_num)

print(digit_num,space_num,alpha_num,rest_num)

def fun2():    # 统计某具体字符个数
    text = "zhy:'hello world'"
    o_num,space_num = 0,0
    for c in text:
        if c=="o":
            o_num = o_num+1
        if c==" ":
            space_num = space_num+1

print(o_num,space_num)

def panduan():  # 判断以xx结尾
    if _str.endswith("'d")==True:
        pass
```

#### nltk库相关
```python
from nltk import word_tokenize, pos_tag
from nltk.corpus import wordnet
from nltk.stem import WordNetLemmatizer

"""
词性还原
"""
def get_wordnet_pos(tag):
    if tag.startswith('J'):
        return wordnet.ADJ
    elif tag.startswith('V'):
        return wordnet.VERB
    elif tag.startswith('N'):
        return wordnet.NOUN
    elif tag.startswith('R'):
        return wordnet.ADV
    else:
        return None

sentence = 'football is a family of team sports that involve, to varying degrees, kicking a ball to score a goal.'
tokens = word_tokenize(sentence)  
tagged_sent = pos_tag(tokens)

wnl = WordNetLemmatizer()
for tag in tagged_sent:
    wordnet_pos = get_wordnet_pos(tag[1]) or wordnet.NOUN 
    if tag[0] != wnl.lemmatize(tag[0], pos=wordnet_pos):
        print(tag[0],"还原为",wnl.lemmatize(tag[0], pos=wordnet_pos))
"""
词性还原结束
"""

###############################################

"""
考点二
"""
xxxxxxx
"""
考点二结束
"""

```
#### 词频统计
```python
"""
统计词频方法一
"""
def tongji1(): 
    str_list = ["nihao","jacky","hello","jacky"]  # 也可通过该列表找到某单词的位置
    dict1 = {}

for key in str_list:         
        dict1[key] = dict1.get(key, 0) + 1  # 字典的get函数可以查询键的值，
                                            # 0代表默认值,每出现一次加1
    print (dict1)

```
#### 词清洗
```python
"""
对字符串 例如“i love py”
以空格split()分为列表，
对列表里的每个字符串进行判断
"""

"""
对txt文件
见文件相关.py 将其读为字符串后操作
然后字符串还原为列表
列表还原为二维列表
二维列表还原为txt文件
"""

# 清洗常用判断是否为它或是否在里面
# 如果为 则删除或替换

```