- 文件操作
- 文件夹和路径
- csv文件
- ini文件
- xml文件
- excel文件
- 压缩文件
day09 函数模块
目的
- 文件操作
- 文件夹和路径
- csv文件
- ini文件
- xml文件
- excel文件
- 压缩文件
函数
函数相当于是代码块
内置函数
- bin
- oct
- int
- hex
- len
自定义函数
def hello_py():
print('你好呀')
模块
- 模块可包含很多自定义函数
- 模块是一个py文件
- 可以按照功能分类放入py文件
- 使用import导入并使用
内置模块
内置模块解决: ini文件 xml文件 压缩文件 文件夹和路径
import random
data = random.randint(1,10)
print(data)
a = [1,2,3,4,5]
data1 = random.choice(a)
print(data1)
第三方模块
- excel
自定义模块
文件操作
- 字符串以unicode二进制储存
字节以utf-8和gbk形式等编码二进制储存
- 图片,视频,文件直接以二进制字节储存,不转译
读取
- rb,读取二进制
f = open('1.txt',mode='rb')
data = f.read()
data = data.decode('utf-8')
print(data)
f.close()
- rt 读取文本字符串
f = open('1.txt',mode='rt',encoding='utf-8')
data = f.read()
print(data)
f.close()
- 相对路径
需要切换到对应目录,否则notfoundfile
绝对路径
- mac,linux读取 /
windows读取
避免符号转译
\
f = open('E:\\python\\pythonProject\\classroom\\day07\\1.txt',mode='rb') data = f.read() data = data.decode('utf-8') print(data) f.close()
r
f = open(r'E:\python\pythonProject\classroom\day07\1.txt',mode='rb') data = f.read() data = data.decode('utf-8') print(data) f.close()
判断路径是否存在
os.path.exists()
import os file_exists = os.path.exists(r'E:\python\pythonProject\classroom\day07\1.txt') print(file_exists)
写入
wb
f = open('2.txt',mode='wb') f.write('武沛齐'.encode('utf-8')) f.close()
wt
f = open('3.txt',mode='wt',encoding='utf-8') f.write('武沛齐') f.close()
图片使用rb复制
#图片读取并复制 f = open('91.jpg',mode='rb') content = f.read() f.close() f = open('1.png',mode='wb') f.write(content) f.close()
案例
用户注册
#用户注册 user = input('输入用户名').strip() pwd = input('输入密码').strip() data = f'{user}-{pwd}' f = open(r'files\1.txt',mode='wt',encoding='utf-8') f.write(data) f.close()
批量用户注册
f = open(r'files\1.txt',mode='wt',encoding='utf-8') #获取输入 while True: user = input('输入用户名').strip() if user.upper() == 'Q': break pwd = input('输入密码').strip() #整合数据 data = f'{user}-{pwd}\n' #写入文件 f.write(data) f.close()
高级案例
网络下载(请求)文件,存到文件
#从网站下载数据,并储存到files/logs1.txt import requests res = requests.get('https://blog.asuna.one/sitemap.xml', headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36'}, proxies={'http': '127.0.0.1:10808', 'https': '127.0.0.1:10808'}) files_object = open('files\logs1.txt',mode='wb') files_object.write(res.content) files_object.close()
网络请求图片,存到文件
#从网站下载数据,并储存到files/sakura1.webp import requests res = requests.get('https://baidu.com/i/107567203.webp', headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36'}, proxies={'http': '127.0.0.1:10808', 'https': '127.0.0.1:10808'}) files_object = open('files\sakura1.webp',mode='wb') files_object.write(res.content) files_object.close()
文件打开模式
r
- 默认rt
- rb
w
- 默认wt
- wb
x
- 文件存在报错,否则覆写,w保守版
- 默认xt
- xb
a 追加
#无限用户注册追加 f = open('files\\1.txt',mode='a') while True: user_input = input('enter your name').strip() if user_input.upper() == 'Q': break pwd_input = input('enter your password').strip() data = f'{user_input}-{pwd_input}\n' f.write(data) f.close()
- t
- b
r+
- 可读,可写
光标根据操作移动
f = open(r'files\\1.txt',mode='rt+',encoding='utf-8') data = f.read() #read操作将光标移动到末尾 print(data) f.write('eric-166') f.close()
w+
- 覆写,新建文件,光标在起点
x+
- 新建文件,光标在起点
a+
光标在结尾,读取时将光标移动到起点(写入时一定从末尾开始,seek(0)无用)
f = open(r'files\\1.txt',mode='at+',encoding='utf-8') f.write('\nleon-588') f.seek(0) data =f.read() print(data) f.close()
文件功能
读取功能
read读取
open的wt可范围字符串
f = open('1.txt',mode='rb') f.read(3) #读一个汉字
open也可wt范围字节(一个汉字三个字节)
f = open('1.txt',mode='rt',encoding='utf-8') f.read(3) #读三个字节
readline
#读取一行 f = open(r'files\\1.txt',mode='rt+',encoding='utf-8') data = f.readline() print(data) f.close()
readlines
#读取每一行(以列表存在) f = open(r'files\\1.txt',mode='rt+',encoding='utf-8') data = f.readlines() print(data) f.close()
for循环读取大文件
f = open(r'files\\1.txt',mode='rt+',encoding='utf-8') for i in f: data = i.strip() print(data) f.close()
写入功能
write()
- wt
- wb
flush()
缓冲区刷新到硬盘
#用户账号密码追加 f = open('files\\account.txt',mode='at+',encoding='utf-8') while True: user_input = input('enter your name').strip() if user_input.upper() == 'Q': break pwd_input = input('enter your password').strip() data = f'{user_input}-{pwd_input}\n' f.write(data) f.flush() f.close()
seek()
以字节为计数单位
#光标移动 f = open('files\\1.txt',mode='rt+',encoding='utf-8') f.seek(9) data = f.read() print(data) f.close()
当seek移动位置不满足汉字三字节时会报错
tell()
以字节为计数单位
#光标移动rt f = open('files\\1.txt',mode='rt+',encoding='utf-8') f.read(3) data = f.tell() #9 print(data) f.close() #光标移动-rb f = open('files\\1.txt',mode='rb+',encoding='utf-8') f.read(3) data = f.tell() #3 print(data) f.close()
上下文管理
在打开文件时,会忘记关闭(造成资源占用),引用with…as…可解决
打开一个文件
with open('files/1.txt', 'rt+',encoding='utf-8') as f: data = f.read() print(data)
打开多个文件
with open('files/1.txt', 'rt+',encoding='utf-8') as f,open('files/logs1.txt', 'rt+',encoding='utf-8') as f2: data = f.read() data2 = f2.read() print(data,data2)
练习题
文件字符串替换
写入到内存,并replace(小文件)
with open('files/ha.conf', 'rt+',encoding='utf-8') as f: #读取内容 data = f.read() data_replace = data.replace('luffycity','pythonav') #写文文件 f.seek(0) f.write(data_replace)
读取每一行并替换,将内容写入到新文件(大文件)
with open('files/ha.conf', 'rt+') as f,open('files/replace.ha.conf',mode='wt',encoding='utf-8') as f2: for i in f: if 'luffycity' in i: data = i.replace('luffycity','pythonav') f2.write(data) continue f2.write(i)
文件夹重命名
import shutil shutil.move('files/replace.ha.conf','files/ha.conf') #默认文件存在会覆盖
将股票当前价格大于20取出
股票代码,股票名称,当前价,涨跌额,涨跌幅,年初至今,成交量,成交额,换手率,市盈率(TTM),股息率,市值
SH601778,N晶科,6.29,+1.92,+43.94%,+43.94%,259.66万,1625.52万,0.44%,22.32,-,173.95亿
SH688566,吉贝尔,52.66,+6.96,+15.23%,+122.29%,1626.58万,8.09亿,42.29%,89.34,-,98.44亿
SH688268,华特气体,88.80,+11.72,+15.20%,+102.51%,622.60万,5.13亿,22.87%,150.47,-,106.56亿
SH600734,实达集团,2.60,+0.24,+10.17%,-61.71%,1340.27万,3391.14万,2.58%,亏损,0.00%,16.18亿
SH900957,凌云B股,0.36,+0.033,+10.09%,-35.25%,119.15万,42.10万,0.65%,44.65,0.00%,1.26亿
SZ000584,哈工智能,6.01,+0.55,+10.07%,-4.15%,2610.86万,1.53亿,4.36%,199.33,0.26%,36.86亿
SH600599,熊猫金控,6.78,+0.62,+10.06%,-35.55%,599.64万,3900.23万,3.61%,亏损,0.00%,11.25亿
SH600520,文一科技,8.21,+0.75,+10.05%,-24.05%,552.34万,4464.69万,3.49%,亏损,0.00%,13.01亿
SH603682,锦和商业,11.73,+1.07,+10.04%,+48.29%,2746.63万,3.15亿,29.06%,29.62,-,55.42亿
SZ300831,派瑞股份,12.27,+1.12,+10.04%,+208.29%,25.38万,311.41万,0.32%,60.59,-,39.26亿
#筛选出当前价格大于20的股票,使用readline跳过第一行(移动光标)
plus_20_stock = {}
with open('files/stock.txt', 'rt',encoding='utf-8') as f:
f.readline()
for i in f:
#切割列表
data = i.split(',')
price = float(data[2])
if not price>20:
continue
plus_20_stock[data[1]] = price
print(plus_20_stock)
读取accessl.log的单独ip访问次数
#读取files/access1.log的的ip访问次数
count_ip = {}
count_ipv6 = {}
with open('files/access1.log', 'rt') as f:
for line in f:
data = line.split(' ')
#获取ip或者端口
ip_addr = data[3]
#判断是否为ipv6
if ip_addr.startswith('['):
#分割ipv6
ipv6_addr_all = ip_addr.rsplit(':',1)
#获得ip
ipv6_addr = ipv6_addr_all[0]
#ip写入ipv6_count
if not ipv6_addr in count_ipv6:
count_ipv6[ipv6_addr] = 1
continue
count_ipv6[ipv6_addr] += 1
else:
ip_addr_all = ip_addr.split(':')
#写入计数字典
if ip_addr_all[0] not in count_ip:
count_ip[ip_addr_all[0]] = 1
else:
count_ip[ip_addr_all[0]] += 1
# #将字典放入files/count_ip.txt
with open('files/count_ip.txt', 'wt',encoding='utf-8') as f1:
f1.write('[ipv4访问次数] \n')
for i in count_ip.items():
f1.write(f'ip:{i[0]},次数:{str(i[1])},\n')
f1.write('[ipv6访问次数] \n')
for j in count_ipv6.items():
f1.write(f'ip:{j[0]},次数:{str(j[1])},\n')