File Handling in Python

File handling is an essential part of any programming language. Python provides several functions and methods for creating, reading, updating, and deleting files. This guide covers the basics of file operations in Python, including working with text and binary files, using different file modes, and implementing best practices.

Basic File Operations

Opening and Closing Files

In Python, files are opened using the open() function, which returns a file object. It's important to properly close files after operations to free up system resources.

# Basic file opening and closing
file = open('example.txt', 'r')  # Open file for reading
# Perform operations...
file.close()  # Explicitly close the file

# Using 'with' statement (recommended)
# File is automatically closed when exiting the block
with open('example.txt', 'r') as file:
    # Perform operations...
    content = file.read()
# File is now closed, even if an exception occurred

# File modes
# 'r': Read (default) - Open for reading
# 'w': Write - Create new file or truncate existing file
# 'a': Append - Open for writing, append to end of file
# 'x': Exclusive creation - Create a new file, fail if exists
# 'b': Binary mode (e.g., 'rb' for reading binary)
# 't': Text mode (default)
# '+': Update mode (e.g., 'r+' for reading and writing)

# Examples of different modes
with open('file.txt', 'w') as f:  # Write mode
    f.write('Hello, World!')

with open('file.txt', 'a') as f:  # Append mode
    f.write('\nAppending line')

with open('file.txt', 'r+') as f:  # Read and write mode
    content = f.read()
    f.write('\nUpdating file')

with open('image.jpg', 'rb') as f:  # Binary read mode
    binary_data = f.read()

Reading Files

Python provides several methods to read data from files.

# Reading a file
with open('example.txt', 'r') as file:  # 'r' is for reading (default)
    content = file.read()  # Read the entire file
    print(content)
    
    file.seek(0)  # Reset file pointer to beginning
    
    lines = file.readlines()  # Read all lines into a list
    for line in lines:
        print(line.strip())  # strip() removes leading/trailing whitespace
    
    file.seek(0)
    for line in file:  # Iterate through the file line by line
        print(line.strip())

Writing Files

Python provides methods to write data to files.

# Writing a string to a file
with open('output.txt', 'w') as f:
    f.write('Hello, World!\n')  # Write a string
    f.write('Second line')      # Write another string

# Writing multiple lines at once
lines = ['First line\n', 'Second line\n', 'Third line\n']
with open('output.txt', 'w') as f:
    f.writelines(lines)  # Write a list of strings

# Appending to a file
with open('output.txt', 'a') as f:
    f.write('\nAppended text')  # Add to end of file

# Writing at a specific position
with open('output.txt', 'r+') as f:  # Read and write mode
    content = f.read()
    f.seek(0)  # Go back to the beginning
    f.write('REPLACED: ' + content)  # Overwrite from the beginning

# Flushing data to disk
with open('output.txt', 'w') as f:
    f.write('Critical data')
    f.flush()  # Force write of buffered data to disk

Working with File Paths

Python's os and pathlib modules provide functions for working with file paths in a platform-independent way.

# Using os.path for path operations
import os

# Join path components
file_path = os.path.join('folder', 'subfolder', 'file.txt')
print(file_path)  # 'folder/subfolder/file.txt' on Unix
                  # 'folder\subfolder\file.txt' on Windows

# Get absolute path
abs_path = os.path.abspath('file.txt')
print(abs_path)

# Split path into directory and file
dirname, filename = os.path.split('/path/to/file.txt')
print(dirname)   # '/path/to'
print(filename)  # 'file.txt'

# Split file name and extension
file_name, file_ext = os.path.splitext('file.txt')
print(file_name)  # 'file'
print(file_ext)   # '.txt'

# Check if path exists
exists = os.path.exists('file.txt')
print(exists)

# Check if path is a file
is_file = os.path.isfile('file.txt')
print(is_file)

# Check if path is a directory
is_dir = os.path.isdir('folder')
print(is_dir)

# Using pathlib (modern approach, Python 3.4+)
from pathlib import Path

# Create path object
path = Path('folder') / 'subfolder' / 'file.txt'
print(path)

# Get absolute path
abs_path = path.absolute()
print(abs_path)

# Get parent directory
parent = path.parent
print(parent)

# Get file name and suffix
print(path.name)    # 'file.txt'
print(path.suffix)  # '.txt'
print(path.stem)    # 'file'

# Check if path exists
exists = path.exists()
print(exists)

# Check type
is_file = path.is_file()
is_dir = path.is_dir()

# List directory contents
current_dir = Path('.')
for item in current_dir.iterdir():
    print(item)

File and Directory Operations

Python's os and shutil modules provide functions for file system operations like creating, moving, copying, and deleting files and directories.

# File operations with os module
import os

# Create directory
os.mkdir('new_folder')  # Create single directory
os.makedirs('path/to/nested/folders', exist_ok=True)  # Create nested directories

# List directory contents
files = os.listdir('folder')
print(files)

# Rename file or directory
os.rename('old_name.txt', 'new_name.txt')

# Remove file
os.remove('file_to_delete.txt')

# Remove directory (must be empty)
os.rmdir('empty_folder')

# Get file information
file_stat = os.stat('file.txt')
print(f"Size: {file_stat.st_size} bytes")
print(f"Modified time: {file_stat.st_mtime}")

# Advanced file operations with shutil
import shutil

# Copy file
shutil.copy('source.txt', 'destination.txt')  # Copy file
shutil.copy2('source.txt', 'destination.txt')  # Copy with metadata

# Copy directory
shutil.copytree('source_dir', 'destination_dir')

# Move file or directory
shutil.move('source', 'destination')

# Remove directory and contents
shutil.rmtree('folder_to_delete')

# Get disk usage
total, used, free = shutil.disk_usage('/')
print(f"Total: {total // (2**30)} GB")
print(f"Used: {used // (2**30)} GB")
print(f"Free: {free // (2**30)} GB")

Working with CSV Files

Python's csv module simplifies working with CSV (Comma-Separated Values) files.

# Reading CSV files
import csv

# Reading as lists
with open('data.csv', 'r', newline='') as file:
    csv_reader = csv.reader(file)
    header = next(csv_reader)  # Skip header row
    for row in csv_reader:
        print(row)  # Each row is a list

# Reading as dictionaries
with open('data.csv', 'r', newline='') as file:
    csv_reader = csv.DictReader(file)
    for row in csv_reader:
        print(row)  # Each row is a dict with keys from header

# Writing CSV files
# Writing with lists
data = [
    ['Name', 'Age', 'City'],
    ['Alice', '30', 'New York'],
    ['Bob', '25', 'Chicago']
]

with open('output.csv', 'w', newline='') as file:
    csv_writer = csv.writer(file)
    for row in data:
        csv_writer.writerow(row)
    
    # Alternative: write all rows at once
    # csv_writer.writerows(data)

# Writing with dictionaries
data = [
    {'Name': 'Alice', 'Age': '30', 'City': 'New York'},
    {'Name': 'Bob', 'Age': '25', 'City': 'Chicago'}
]

with open('output.csv', 'w', newline='') as file:
    fieldnames = ['Name', 'Age', 'City']
    csv_writer = csv.DictWriter(file, fieldnames=fieldnames)
    
    csv_writer.writeheader()  # Write header row
    for row in data:
        csv_writer.writerow(row)
    
    # Alternative: write all rows at once
    # csv_writer.writerows(data)

# Working with different delimiters
with open('data.tsv', 'r', newline='') as file:
    tsv_reader = csv.reader(file, delimiter='\t')
    for row in tsv_reader:
        print(row)

Working with JSON Files

Python's json module makes it easy to work with JSON (JavaScript Object Notation) data.

# Working with JSON
import json

# Python object to JSON string
data = {
    'name': 'Alice',
    'age': 30,
    'city': 'New York',
    'languages': ['Python', 'JavaScript'],
    'active': True,
    'height': 5.5
}

# Convert Python object to JSON string
json_string = json.dumps(data)
print(json_string)

# Pretty print with indentation
pretty_json = json.dumps(data, indent=4, sort_keys=True)
print(pretty_json)

# Write JSON to file
with open('data.json', 'w') as file:
    json.dump(data, file, indent=4)

# Read JSON from string
json_string = '{"name": "Bob", "age": 25, "city": "Chicago"}'
python_obj = json.loads(json_string)
print(python_obj['name'])  # Bob

# Read JSON from file
with open('data.json', 'r') as file:
    data = json.load(file)
    print(data)

Best Practices and Error Handling

Proper error handling is crucial when working with files to handle unexpected situations gracefully.

# Error handling with file operations
# Handling file not found
try:
    with open('nonexistent_file.txt', 'r') as file:
        content = file.read()
except FileNotFoundError:
    print("File not found")
    # Create an empty file
    with open('nonexistent_file.txt', 'w') as file:
        pass

# Handling permission errors
try:
    with open('/root/system_file.txt', 'w') as file:
        file.write('This will likely fail')
except PermissionError:
    print("Permission denied")

# Checking if file exists before opening
import os
if os.path.exists('file.txt'):
    with open('file.txt', 'r') as file:
        content = file.read()
else:
    print("File does not exist")

# Best practices
# 1. Always use 'with' statement
# 2. Specify encoding for text files
with open('file.txt', 'r', encoding='utf-8') as file:
    content = file.read()

# 3. Be specific with error handling
try:
    with open('file.txt', 'r') as file:
        content = file.read()
except FileNotFoundError:
    print("File not found")
except PermissionError:
    print("Permission denied")
except Exception as e:
    print(f"An unexpected error occurred: {e}")

# 4. Use 'newline' parameter for CSV files
with open('data.csv', 'r', newline='') as file:
    csv_reader = csv.reader(file)
    # Process CSV data

# 5. Use context managers for temporary files
import tempfile

with tempfile.NamedTemporaryFile(delete=True) as temp:
    temp.write(b"Temporary data")
    temp.flush()
    # Do something with temp.name
# File is automatically deleted after the block

# 6. Using exclusive creation mode to prevent overwriting
try:
    with open('important_data.txt', 'x') as file:
        file.write('New data')
except FileExistsError:
    print("File already exists. Will not overwrite.")
Back to Cheat Sheet