我爱python...
生工学院py大一实验报告
生工学院大二py实验报告2(sklearn)
外国语学院大二py实验报告1(NLP-nltk)
外国语学院py实验报告
外国语学院大二py实验报告4.1
外国语学院大二py实验报告4.2
豆瓣爬虫
ECUST - 2022年下半年大二上学期外语学院python期末考试真题题库(python与语言智能)
ECUST - 2022年下半年大二上学期外语学院python备考资料
c语言求解n的阶乘1
c语言随机生成数组并排序
c语言编写递归,求解阶乘之和
2025 openvpn入门:用openvpn+云服务器实现私有网络代理
本文档使用 MrDoc 发布
-
+
首页
ECUST - 2022年下半年大二上学期外语学院python备考资料
#### 文件相关 ```python path1 = r"C:\windows\temp\readme.txt" path2 = "C:\\windows\\temp\\readme.txt" path3 = "c:/windows/temp/readme.txt" def read_txt_to_list(): # 把txt文件读取为列表 with open(path1, 'r') as file: lines_list = file.readlines() # 注意列表读取的每个字符串元素的结尾都为\n for line in lines_list: line = lines_list.strip() # 把尾部\n除去 for line in lines_list: # 把每行读取为一个列表 line = line.split(",") # line = line.split("\t") # 把每行按符号分开 # line = line.split(" ") def write_list_to_txt(): # 把列表写为txt文件 with open(path1, 'w') as file: file.write(lines_list) def accum(Dir,allFile): # 批量合并txt文件 targetfile=glob.glob(pathname=Dir) print(targetfile) for file in targetfile: with open(file,'r',encoding='utf-8')as f: alltext=f.readlines() with open(allFile,'a',encoding='utf-8')as nf: nf.writelines(alltext) #accum(r'review*.txt',r'allreviews.txt') def read_txt_to_str_list(): # 将txt读取为用于词频统计的列表 with open(path1, 'r') as file: lines_list = file.readlines() for line in lines_list: # 把每行读取为一个列表 line_str_list = line.split(",") # line = line.split("\t") # 把每行按符号分开 # line = line.split(" ") str_list.append(line_str_list) # print(str_list) return str_list ``` #### 列表相关 ```python def func1(): fruit_list = ['banana','orange','mango','lemon'] print(fruit_list.index("mango")) # print(fruit_list.index("nihao"))找不到则报错 fruit_list.insert(1,"apple") #在第二位增加apple print(fruit_list) del(fruit_list[3]) #删除第四位 print(fruit_list) fruit_list[-1] = "pear" #最后一位 print(fruit_list) ``` #### 字典相关 ```python def func1(): dic1 = {} s1,s2,s3,s4 = "alan","biden","candy","jacky" dic1["Alan"] = s1 dic1["Biden"] = s2 # 字典赋值 dic1["Candy"] = s3 # 字典赋值 dic1.update({"Jacky":s4}) print(dic1) del dic1["Alan"] # 删除字典某键值对 print(dic1) for name,sentence in dic1.items(): # 读字典 print(f"{name} said:{sentence}") # name.title()首字母大写 def func2(): # 集合运算 s1={'computer','python','language','intelligence','word','sentence'} s2={'sentence','text','corpus','token','word','computer'} print(s1&s2) # 交集 print(s1|s2) # 并集 print(s1-s2) # 差集 ``` #### 字符串相关 ```python def fun1(): # 统计不同类型字符个数 str1 = "nihao woshizhanghengyu jinnian 18 sui !!! di_er_hang\n" digit_num,space_num,alpha_num = 0,0,0 for c in str1: if c.isdigit(): digit_num = digit_num + 1 if c.isspace(): space_num = space_num + 1 if c.isalpha(): alpha_num = alpha_num + 1 rest_num = len(str1)-(digit_num+space_num+alpha_num) print(digit_num,space_num,alpha_num,rest_num) def fun2(): # 统计某具体字符个数 text = "zhy:'hello world'" o_num,space_num = 0,0 for c in text: if c=="o": o_num = o_num+1 if c==" ": space_num = space_num+1 print(o_num,space_num) def panduan(): # 判断以xx结尾 if _str.endswith("'d")==True: pass ``` #### nltk库相关 ```python from nltk import word_tokenize, pos_tag from nltk.corpus import wordnet from nltk.stem import WordNetLemmatizer """ 词性还原 """ def get_wordnet_pos(tag): if tag.startswith('J'): return wordnet.ADJ elif tag.startswith('V'): return wordnet.VERB elif tag.startswith('N'): return wordnet.NOUN elif tag.startswith('R'): return wordnet.ADV else: return None sentence = 'football is a family of team sports that involve, to varying degrees, kicking a ball to score a goal.' tokens = word_tokenize(sentence) tagged_sent = pos_tag(tokens) wnl = WordNetLemmatizer() for tag in tagged_sent: wordnet_pos = get_wordnet_pos(tag[1]) or wordnet.NOUN if tag[0] != wnl.lemmatize(tag[0], pos=wordnet_pos): print(tag[0],"还原为",wnl.lemmatize(tag[0], pos=wordnet_pos)) """ 词性还原结束 """ ############################################### """ 考点二 """ xxxxxxx """ 考点二结束 """ ``` #### 词频统计 ```python """ 统计词频方法一 """ def tongji1(): str_list = ["nihao","jacky","hello","jacky"] # 也可通过该列表找到某单词的位置 dict1 = {} for key in str_list: dict1[key] = dict1.get(key, 0) + 1 # 字典的get函数可以查询键的值, # 0代表默认值,每出现一次加1 print (dict1) ``` #### 词清洗 ```python """ 对字符串 例如“i love py” 以空格split()分为列表, 对列表里的每个字符串进行判断 """ """ 对txt文件 见文件相关.py 将其读为字符串后操作 然后字符串还原为列表 列表还原为二维列表 二维列表还原为txt文件 """ # 清洗常用判断是否为它或是否在里面 # 如果为 则删除或替换 ```
zhy@@ldy
2022年12月24日 15:42
转发文档
收藏文档
上一篇
下一篇
手机扫码
复制链接
手机扫一扫转发分享
复制链接
Markdown文件
PDF文档(打印)
分享
链接
类型
密码
更新密码