文件操作
文件夹和路径
csv文件
ini文件
xml文件
excel文件
压缩文件

day09 函数模块

目的

文件操作
文件夹和路径
csv文件
ini文件
xml文件
excel文件
压缩文件

函数

函数相当于是代码块

内置函数

自定义函数

def hello_py():
    print('你好呀')

模块

模块可包含很多自定义函数
模块是一个py文件
可以按照功能分类放入py文件
使用import导入并使用

内置模块

内置模块解决: ini文件 xml文件压缩文件文件夹和路径

import random
data = random.randint(1,10)
print(data)

a = [1,2,3,4,5]
data1 = random.choice(a)
print(data1)

第三方模块

excel

自定义模块

文件操作

字符串以unicode二进制储存
字节以utf-8和gbk形式等编码二进制储存
- 图片，视频，文件直接以二进制字节储存，不转译

读取

rb,读取二进制

f = open('1.txt',mode='rb')
data = f.read()
data = data.decode('utf-8')
print(data)
f.close()

rt 读取文本字符串

f = open('1.txt',mode='rt',encoding='utf-8')
data = f.read()
print(data)
f.close()

相对路径

需要切换到对应目录，否则notfoundfile

绝对路径

mac,linux读取 /

windows读取

避免符号转译

f = open('E:\\python\\pythonProject\\classroom\\day07\\1.txt',mode='rb')
data = f.read()
data = data.decode('utf-8')
print(data)
f.close()

f = open(r'E:\python\pythonProject\classroom\day07\1.txt',mode='rb')
data = f.read()
data = data.decode('utf-8')
print(data)
f.close()

判断路径是否存在

os.path.exists()

import os
file_exists = os.path.exists(r'E:\python\pythonProject\classroom\day07\1.txt')
print(file_exists)

写入

f = open('2.txt',mode='wb')
f.write('武沛齐'.encode('utf-8'))
f.close()

f = open('3.txt',mode='wt',encoding='utf-8')
f.write('武沛齐')
f.close()

图片使用rb复制

#图片读取并复制
f = open('91.jpg',mode='rb')
content = f.read()
f.close()

f = open('1.png',mode='wb')
f.write(content)
f.close()

案例

用户注册

#用户注册

user = input('输入用户名').strip()
pwd = input('输入密码').strip()

data = f'{user}-{pwd}'
f = open(r'files\1.txt',mode='wt',encoding='utf-8')
f.write(data)
f.close()

批量用户注册

f = open(r'files\1.txt',mode='wt',encoding='utf-8')
#获取输入
while True:
    user = input('输入用户名').strip()
    if user.upper() == 'Q':
        break
    pwd = input('输入密码').strip()

    #整合数据
    data = f'{user}-{pwd}\n'
    #写入文件
    f.write(data)

f.close()

高级案例

网络下载(请求)文件,存到文件

#从网站下载数据，并储存到files/logs1.txt
import requests

res = requests.get('https://blog.asuna.one/sitemap.xml',
                   headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36'},
                   proxies={'http': '127.0.0.1:10808', 'https': '127.0.0.1:10808'})

files_object = open('files\logs1.txt',mode='wb')
files_object.write(res.content)
files_object.close()

网络请求图片,存到文件

#从网站下载数据，并储存到files/sakura1.webp
import requests

res = requests.get('https://baidu.com/i/107567203.webp',
                   headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36'},
                   proxies={'http': '127.0.0.1:10808', 'https': '127.0.0.1:10808'})

files_object = open('files\sakura1.webp',mode='wb')
files_object.write(res.content)
files_object.close()

文件打开模式

r
- 默认rt
- rb
w
- 默认wt
- wb
x
- 文件存在报错，否则覆写,w保守版
- 默认xt
- xb

a 追加

#无限用户注册追加
f = open('files\\1.txt',mode='a')

while True:
    user_input = input('enter your name').strip()
    if user_input.upper() == 'Q':
        break
    pwd_input = input('enter your password').strip()

    data = f'{user_input}-{pwd_input}\n'
    f.write(data)
f.close()

可读，可写

光标根据操作移动

f = open(r'files\\1.txt',mode='rt+',encoding='utf-8')
data = f.read() #read操作将光标移动到末尾
print(data)
f.write('eric-166')
f.close()

w+
- 覆写,新建文件，光标在起点
x+
- 新建文件，光标在起点

光标在结尾，读取时将光标移动到起点(写入时一定从末尾开始，seek(0)无用)

f = open(r'files\\1.txt',mode='at+',encoding='utf-8')
f.write('\nleon-588')
f.seek(0)
data =f.read()
print(data)
f.close()

文件功能

读取功能

read读取

open的wt可范围字符串

f = open('1.txt',mode='rb')
f.read(3) #读一个汉字

open也可wt范围字节(一个汉字三个字节)

f = open('1.txt',mode='rt',encoding='utf-8')
f.read(3) #读三个字节

readline

#读取一行
f = open(r'files\\1.txt',mode='rt+',encoding='utf-8')
data = f.readline()
print(data)
f.close()

readlines

#读取每一行（以列表存在）
f = open(r'files\\1.txt',mode='rt+',encoding='utf-8')
data = f.readlines()
print(data)
f.close()

for循环读取大文件

f = open(r'files\\1.txt',mode='rt+',encoding='utf-8')
for i in f:
    data = i.strip()
    print(data)
f.close()

写入功能

write()
- wt
- wb

flush()

缓冲区刷新到硬盘

#用户账号密码追加
f = open('files\\account.txt',mode='at+',encoding='utf-8')

while True:
    user_input = input('enter your name').strip()
    if user_input.upper() == 'Q':
        break
    pwd_input = input('enter your password').strip()
    data = f'{user_input}-{pwd_input}\n'
    f.write(data)
    f.flush()
f.close()

seek()

以字节为计数单位

#光标移动
f = open('files\\1.txt',mode='rt+',encoding='utf-8')
f.seek(9)
data = f.read()
print(data)
f.close()

当seek移动位置不满足汉字三字节时会报错

tell()

以字节为计数单位

#光标移动rt
f = open('files\\1.txt',mode='rt+',encoding='utf-8')
f.read(3)
data = f.tell() #9
print(data)
f.close()

#光标移动-rb
f = open('files\\1.txt',mode='rb+',encoding='utf-8')
f.read(3)
data = f.tell() #3
print(data)
f.close()

上下文管理

在打开文件时，会忘记关闭(造成资源占用)，引用with…as…可解决

打开一个文件

with open('files/1.txt', 'rt+',encoding='utf-8') as f:
    data = f.read()
    print(data)

打开多个文件

with open('files/1.txt', 'rt+',encoding='utf-8') as f,open('files/logs1.txt', 'rt+',encoding='utf-8') as f2:
    data = f.read()
    data2 = f2.read()
    print(data,data2)

练习题

文件字符串替换

写入到内存，并replace(小文件)

with open('files/ha.conf', 'rt+',encoding='utf-8') as f:
    #读取内容
    data = f.read()
    data_replace = data.replace('luffycity','pythonav')
    #写文文件
    f.seek(0)
    f.write(data_replace)

读取每一行并替换，将内容写入到新文件(大文件)

with open('files/ha.conf', 'rt+') as f,open('files/replace.ha.conf',mode='wt',encoding='utf-8') as f2:
    for i in f:
        if 'luffycity' in i:
            data = i.replace('luffycity','pythonav')
            f2.write(data)
            continue
        f2.write(i)

文件夹重命名

import shutil

shutil.move('files/replace.ha.conf','files/ha.conf') #默认文件存在会覆盖

将股票当前价格大于20取出

股票代码,股票名称,当前价,涨跌额,涨跌幅,年初至今,成交量,成交额,换手率,市盈率(TTM),股息率,市值
SH601778,N晶科,6.29,+1.92,+43.94%,+43.94%,259.66万,1625.52万,0.44%,22.32,-,173.95亿
SH688566,吉贝尔,52.66,+6.96,+15.23%,+122.29%,1626.58万,8.09亿,42.29%,89.34,-,98.44亿
SH688268,华特气体,88.80,+11.72,+15.20%,+102.51%,622.60万,5.13亿,22.87%,150.47,-,106.56亿
SH600734,实达集团,2.60,+0.24,+10.17%,-61.71%,1340.27万,3391.14万,2.58%,亏损,0.00%,16.18亿
SH900957,凌云B股,0.36,+0.033,+10.09%,-35.25%,119.15万,42.10万,0.65%,44.65,0.00%,1.26亿
SZ000584,哈工智能,6.01,+0.55,+10.07%,-4.15%,2610.86万,1.53亿,4.36%,199.33,0.26%,36.86亿
SH600599,熊猫金控,6.78,+0.62,+10.06%,-35.55%,599.64万,3900.23万,3.61%,亏损,0.00%,11.25亿
SH600520,文一科技,8.21,+0.75,+10.05%,-24.05%,552.34万,4464.69万,3.49%,亏损,0.00%,13.01亿
SH603682,锦和商业,11.73,+1.07,+10.04%,+48.29%,2746.63万,3.15亿,29.06%,29.62,-,55.42亿
SZ300831,派瑞股份,12.27,+1.12,+10.04%,+208.29%,25.38万,311.41万,0.32%,60.59,-,39.26亿

#筛选出当前价格大于20的股票，使用readline跳过第一行(移动光标)
plus_20_stock = {}
with open('files/stock.txt', 'rt',encoding='utf-8') as f:
    f.readline()
    for i in f:
        #切割列表
        data = i.split(',')
        price = float(data[2])
        if not price>20:
            continue
        plus_20_stock[data[1]] = price
print(plus_20_stock)

读取accessl.log的单独ip访问次数

#读取files/access1.log的的ip访问次数
count_ip = {}
count_ipv6 = {}

with open('files/access1.log', 'rt') as f:
    for line in f:
        data = line.split(' ')
        #获取ip或者端口
        ip_addr = data[3]
        #判断是否为ipv6
        if ip_addr.startswith('['):
            #分割ipv6
            ipv6_addr_all = ip_addr.rsplit(':',1)
            #获得ip
            ipv6_addr = ipv6_addr_all[0]
            #ip写入ipv6_count
            if not ipv6_addr in count_ipv6:
                count_ipv6[ipv6_addr] = 1
                continue
            count_ipv6[ipv6_addr] += 1
        else:
            ip_addr_all = ip_addr.split(':')
            #写入计数字典
            if ip_addr_all[0] not in count_ip:
                count_ip[ip_addr_all[0]] = 1
            else:
                count_ip[ip_addr_all[0]] += 1

# #将字典放入files/count_ip.txt
with open('files/count_ip.txt', 'wt',encoding='utf-8') as f1:
    f1.write('[ipv4访问次数] \n')
    for i in count_ip.items():
        f1.write(f'ip:{i[0]},次数：{str(i[1])},\n')
    f1.write('[ipv6访问次数] \n')
    for j in count_ipv6.items():
        f1.write(f'ip:{j[0]},次数:{str(j[1])},\n')

Python的文件操作|基础七