File Handling in Python
File handling is an essential part of any programming language. Python provides several functions and methods for creating, reading, updating, and deleting files. This guide covers the basics of file operations in Python, including working with text and binary files, using different file modes, and implementing best practices.
Basic File Operations
Opening and Closing Files
In Python, files are opened using the open()
function, which returns a file object.
It's important to properly close files after operations to free up system resources.
# Basic file opening and closing file = open('example.txt', 'r') # Open file for reading # Perform operations... file.close() # Explicitly close the file # Using 'with' statement (recommended) # File is automatically closed when exiting the block with open('example.txt', 'r') as file: # Perform operations... content = file.read() # File is now closed, even if an exception occurred # File modes # 'r': Read (default) - Open for reading # 'w': Write - Create new file or truncate existing file # 'a': Append - Open for writing, append to end of file # 'x': Exclusive creation - Create a new file, fail if exists # 'b': Binary mode (e.g., 'rb' for reading binary) # 't': Text mode (default) # '+': Update mode (e.g., 'r+' for reading and writing) # Examples of different modes with open('file.txt', 'w') as f: # Write mode f.write('Hello, World!') with open('file.txt', 'a') as f: # Append mode f.write('\nAppending line') with open('file.txt', 'r+') as f: # Read and write mode content = f.read() f.write('\nUpdating file') with open('image.jpg', 'rb') as f: # Binary read mode binary_data = f.read()
Reading Files
Python provides several methods to read data from files.
# Reading a file
with open('example.txt', 'r') as file: # 'r' is for reading (default)
content = file.read() # Read the entire file
print(content)
file.seek(0) # Reset file pointer to beginning
lines = file.readlines() # Read all lines into a list
for line in lines:
print(line.strip()) # strip() removes leading/trailing whitespace
file.seek(0)
for line in file: # Iterate through the file line by line
print(line.strip())
Writing Files
Python provides methods to write data to files.
# Writing a string to a file with open('output.txt', 'w') as f: f.write('Hello, World!\n') # Write a string f.write('Second line') # Write another string # Writing multiple lines at once lines = ['First line\n', 'Second line\n', 'Third line\n'] with open('output.txt', 'w') as f: f.writelines(lines) # Write a list of strings # Appending to a file with open('output.txt', 'a') as f: f.write('\nAppended text') # Add to end of file # Writing at a specific position with open('output.txt', 'r+') as f: # Read and write mode content = f.read() f.seek(0) # Go back to the beginning f.write('REPLACED: ' + content) # Overwrite from the beginning # Flushing data to disk with open('output.txt', 'w') as f: f.write('Critical data') f.flush() # Force write of buffered data to disk
Working with File Paths
Python's os
and pathlib
modules provide functions for working with file paths in a platform-independent way.
# Using os.path for path operations import os # Join path components file_path = os.path.join('folder', 'subfolder', 'file.txt') print(file_path) # 'folder/subfolder/file.txt' on Unix # 'folder\subfolder\file.txt' on Windows # Get absolute path abs_path = os.path.abspath('file.txt') print(abs_path) # Split path into directory and file dirname, filename = os.path.split('/path/to/file.txt') print(dirname) # '/path/to' print(filename) # 'file.txt' # Split file name and extension file_name, file_ext = os.path.splitext('file.txt') print(file_name) # 'file' print(file_ext) # '.txt' # Check if path exists exists = os.path.exists('file.txt') print(exists) # Check if path is a file is_file = os.path.isfile('file.txt') print(is_file) # Check if path is a directory is_dir = os.path.isdir('folder') print(is_dir) # Using pathlib (modern approach, Python 3.4+) from pathlib import Path # Create path object path = Path('folder') / 'subfolder' / 'file.txt' print(path) # Get absolute path abs_path = path.absolute() print(abs_path) # Get parent directory parent = path.parent print(parent) # Get file name and suffix print(path.name) # 'file.txt' print(path.suffix) # '.txt' print(path.stem) # 'file' # Check if path exists exists = path.exists() print(exists) # Check type is_file = path.is_file() is_dir = path.is_dir() # List directory contents current_dir = Path('.') for item in current_dir.iterdir(): print(item)
File and Directory Operations
Python's os
and shutil
modules provide functions for file system operations like creating, moving, copying, and deleting files and directories.
# File operations with os module import os # Create directory os.mkdir('new_folder') # Create single directory os.makedirs('path/to/nested/folders', exist_ok=True) # Create nested directories # List directory contents files = os.listdir('folder') print(files) # Rename file or directory os.rename('old_name.txt', 'new_name.txt') # Remove file os.remove('file_to_delete.txt') # Remove directory (must be empty) os.rmdir('empty_folder') # Get file information file_stat = os.stat('file.txt') print(f"Size: {file_stat.st_size} bytes") print(f"Modified time: {file_stat.st_mtime}") # Advanced file operations with shutil import shutil # Copy file shutil.copy('source.txt', 'destination.txt') # Copy file shutil.copy2('source.txt', 'destination.txt') # Copy with metadata # Copy directory shutil.copytree('source_dir', 'destination_dir') # Move file or directory shutil.move('source', 'destination') # Remove directory and contents shutil.rmtree('folder_to_delete') # Get disk usage total, used, free = shutil.disk_usage('/') print(f"Total: {total // (2**30)} GB") print(f"Used: {used // (2**30)} GB") print(f"Free: {free // (2**30)} GB")
Working with CSV Files
Python's csv
module simplifies working with CSV (Comma-Separated Values) files.
# Reading CSV files import csv # Reading as lists with open('data.csv', 'r', newline='') as file: csv_reader = csv.reader(file) header = next(csv_reader) # Skip header row for row in csv_reader: print(row) # Each row is a list # Reading as dictionaries with open('data.csv', 'r', newline='') as file: csv_reader = csv.DictReader(file) for row in csv_reader: print(row) # Each row is a dict with keys from header # Writing CSV files # Writing with lists data = [ ['Name', 'Age', 'City'], ['Alice', '30', 'New York'], ['Bob', '25', 'Chicago'] ] with open('output.csv', 'w', newline='') as file: csv_writer = csv.writer(file) for row in data: csv_writer.writerow(row) # Alternative: write all rows at once # csv_writer.writerows(data) # Writing with dictionaries data = [ {'Name': 'Alice', 'Age': '30', 'City': 'New York'}, {'Name': 'Bob', 'Age': '25', 'City': 'Chicago'} ] with open('output.csv', 'w', newline='') as file: fieldnames = ['Name', 'Age', 'City'] csv_writer = csv.DictWriter(file, fieldnames=fieldnames) csv_writer.writeheader() # Write header row for row in data: csv_writer.writerow(row) # Alternative: write all rows at once # csv_writer.writerows(data) # Working with different delimiters with open('data.tsv', 'r', newline='') as file: tsv_reader = csv.reader(file, delimiter='\t') for row in tsv_reader: print(row)
Working with JSON Files
Python's json
module makes it easy to work with JSON (JavaScript Object Notation) data.
# Working with JSON import json # Python object to JSON string data = { 'name': 'Alice', 'age': 30, 'city': 'New York', 'languages': ['Python', 'JavaScript'], 'active': True, 'height': 5.5 } # Convert Python object to JSON string json_string = json.dumps(data) print(json_string) # Pretty print with indentation pretty_json = json.dumps(data, indent=4, sort_keys=True) print(pretty_json) # Write JSON to file with open('data.json', 'w') as file: json.dump(data, file, indent=4) # Read JSON from string json_string = '{"name": "Bob", "age": 25, "city": "Chicago"}' python_obj = json.loads(json_string) print(python_obj['name']) # Bob # Read JSON from file with open('data.json', 'r') as file: data = json.load(file) print(data)
Best Practices and Error Handling
Proper error handling is crucial when working with files to handle unexpected situations gracefully.
# Error handling with file operations # Handling file not found try: with open('nonexistent_file.txt', 'r') as file: content = file.read() except FileNotFoundError: print("File not found") # Create an empty file with open('nonexistent_file.txt', 'w') as file: pass # Handling permission errors try: with open('/root/system_file.txt', 'w') as file: file.write('This will likely fail') except PermissionError: print("Permission denied") # Checking if file exists before opening import os if os.path.exists('file.txt'): with open('file.txt', 'r') as file: content = file.read() else: print("File does not exist") # Best practices # 1. Always use 'with' statement # 2. Specify encoding for text files with open('file.txt', 'r', encoding='utf-8') as file: content = file.read() # 3. Be specific with error handling try: with open('file.txt', 'r') as file: content = file.read() except FileNotFoundError: print("File not found") except PermissionError: print("Permission denied") except Exception as e: print(f"An unexpected error occurred: {e}") # 4. Use 'newline' parameter for CSV files with open('data.csv', 'r', newline='') as file: csv_reader = csv.reader(file) # Process CSV data # 5. Use context managers for temporary files import tempfile with tempfile.NamedTemporaryFile(delete=True) as temp: temp.write(b"Temporary data") temp.flush() # Do something with temp.name # File is automatically deleted after the block # 6. Using exclusive creation mode to prevent overwriting try: with open('important_data.txt', 'x') as file: file.write('New data') except FileExistsError: print("File already exists. Will not overwrite.")