Massive training corpus for AI coding models containing: - 10 JSONL training datasets (641+ examples across coding, reasoning, planning, architecture, communication, debugging, security, workflows, error handling, UI/UX) - 11 agent behavior specifications (explorer, planner, reviewer, debugger, executor, UI designer, Linux admin, kernel engineer, security architect, automation engineer, API architect) - 6 skill definition files (coding, API engineering, kernel, Linux server, security architecture, server automation, UI/UX) - Master README with project origin story and philosophy Built by Pony Alpha 2 to help AI models learn expert-level coding approaches.
1293 lines
30 KiB
Markdown
1293 lines
30 KiB
Markdown
# Kernel Engineering Expert Skill
|
|
|
|
## Activation Criteria
|
|
Activate this skill when the user:
|
|
- Requests kernel module development or loading
|
|
- Needs device driver implementation (char, block, network)
|
|
- Asks for kernel debugging techniques (ftrace, perf, eBPF)
|
|
- Requires memory management optimization
|
|
- Needs synchronization primitives (spinlocks, mutexes, RCU)
|
|
- Asks for filesystem development or VFS integration
|
|
- Requires network stack programming
|
|
- Needs interrupt handling or bottom half implementation
|
|
- Asks for kernel security mechanisms (LSM, SELinux)
|
|
- Requires kernel performance optimization
|
|
- Needs kernel-space / user-space communication
|
|
- Is working on: embedded systems, high-performance computing, custom hardware drivers, real-time systems
|
|
|
|
## Core Methodology
|
|
|
|
### 1. Kernel Module Development
|
|
|
|
#### Basic Module Structure
|
|
|
|
```c
|
|
// hello_kernel.c - Basic Kernel Module
|
|
#include <linux/module.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/init.h>
|
|
|
|
#define MODULE_NAME "hello_kernel"
|
|
#define MODULE_LICENSE "GPL"
|
|
#define MODULE_AUTHOR "Kernel Engineer"
|
|
#define MODULE_DESCRIPTION "A simple kernel module"
|
|
#define MODULE_VERSION "1.0"
|
|
|
|
// Module parameters
|
|
static int debug_level = 0;
|
|
module_param(debug_level, int, 0644);
|
|
MODULE_PARM_DESC(debug_level, "Debug level (0-3)");
|
|
|
|
static char *device_name = "kernel0";
|
|
module_param(device_name, charp, 0644);
|
|
MODULE_PARM_DESC(device_name, "Device name");
|
|
|
|
// Module initialization
|
|
static int __init hello_init(void)
|
|
{
|
|
pr_info("%s: Module loaded\n", MODULE_NAME);
|
|
pr_info("%s: Device name: %s, debug level: %d\n",
|
|
MODULE_NAME, device_name, debug_level);
|
|
return 0;
|
|
}
|
|
|
|
// Module cleanup
|
|
static void __exit hello_exit(void)
|
|
{
|
|
pr_info("%s: Module unloaded\n", MODULE_NAME);
|
|
}
|
|
|
|
module_init(hello_init);
|
|
module_exit(hello_exit);
|
|
|
|
MODULE_INFO(intree, "Y");
|
|
MODULE_LICENSE(MODULE_LICENSE);
|
|
MODULE_AUTHOR(MODULE_AUTHOR);
|
|
MODULE_DESCRIPTION(MODULE_DESCRIPTION);
|
|
MODULE_VERSION(MODULE_VERSION);
|
|
```
|
|
|
|
#### Makefile for Kernel Modules
|
|
|
|
```makefile
|
|
# Makefile for kernel module
|
|
obj-m += hello_kernel.o
|
|
|
|
# Kernel build directory
|
|
KDIR := /lib/modules/$(shell uname -r)/build
|
|
|
|
# Current directory
|
|
PWD := $(shell pwd)
|
|
|
|
# Default target
|
|
all:
|
|
$(MAKE) -C $(KDIR) M=$(PWD) modules
|
|
|
|
# Clean build artifacts
|
|
clean:
|
|
$(MAKE) -C $(KDIR) M=$(PWD) clean
|
|
rm -f Module.symvers modules.order
|
|
|
|
# Load module
|
|
load:
|
|
insmod hello_kernel.ko
|
|
dmesg | tail -10
|
|
|
|
# Unload module
|
|
unload:
|
|
rmmod hello_kernel
|
|
dmesg | tail -10
|
|
|
|
# Reload module
|
|
reload: unload load
|
|
|
|
# Show module info
|
|
info:
|
|
modinfo hello_kernel.ko
|
|
|
|
.PHONY: all clean load unload reload info
|
|
```
|
|
|
|
### 2. Character Device Driver
|
|
|
|
#### Complete Character Driver Implementation
|
|
|
|
```c
|
|
// char_dev.c - Character Device Driver
|
|
#include <linux/module.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/cdev.h>
|
|
#include <linux/uaccess.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/device.h>
|
|
#include <linux/mutex.h>
|
|
|
|
#define DEVICE_NAME "chardev"
|
|
#define CLASS_NAME "chardev_class"
|
|
#define BUF_LEN 1024
|
|
|
|
MODULE_LICENSE("GPL");
|
|
MODULE_AUTHOR("Kernel Engineer");
|
|
MODULE_DESCRIPTION("Character device driver");
|
|
MODULE_VERSION("1.0");
|
|
|
|
// Global variables
|
|
static int major_number;
|
|
static struct class *chardev_class = NULL;
|
|
static struct device *chardev_device = NULL;
|
|
static struct cdev chardev_cdev;
|
|
|
|
static char device_buffer[BUF_LEN];
|
|
static int buffer_pointer = 0;
|
|
static DEFINE_MUTEX(chardev_mutex);
|
|
|
|
// Function prototypes
|
|
static int chardev_open(struct inode *, struct file *);
|
|
static int chardev_release(struct inode *, struct file *);
|
|
static ssize_t chardev_read(struct file *, char __user *, size_t, loff_t *);
|
|
static ssize_t chardev_write(struct file *, const char __user *, size_t, loff_t *);
|
|
static loff_t chardev_llseek(struct file *, loff_t, int);
|
|
|
|
// File operations structure
|
|
static const struct file_operations fops = {
|
|
.owner = THIS_MODULE,
|
|
.open = chardev_open,
|
|
.release = chardev_release,
|
|
.read = chardev_read,
|
|
.write = chardev_write,
|
|
.llseek = chardev_llseek,
|
|
};
|
|
|
|
// Device open
|
|
static int chardev_open(struct inode *inodep, struct file *filep)
|
|
{
|
|
if (!mutex_trylock(&chardev_mutex)) {
|
|
pr_alert("%s: Device busy\n", DEVICE_NAME);
|
|
return -EBUSY;
|
|
}
|
|
|
|
buffer_pointer = 0;
|
|
pr_info("%s: Device opened\n", DEVICE_NAME);
|
|
return 0;
|
|
}
|
|
|
|
// Device release
|
|
static int chardev_release(struct inode *inodep, struct file *filep)
|
|
{
|
|
mutex_unlock(&chardev_mutex);
|
|
pr_info("%s: Device closed\n", DEVICE_NAME);
|
|
return 0;
|
|
}
|
|
|
|
// Device read
|
|
static ssize_t chardev_read(struct file *filep, char __user *buffer,
|
|
size_t len, loff_t *offset)
|
|
{
|
|
int bytes_read = 0;
|
|
|
|
if (*offset >= buffer_pointer) {
|
|
return 0; // EOF
|
|
}
|
|
|
|
if (*offset + len > BUF_LEN) {
|
|
len = BUF_LEN - *offset;
|
|
}
|
|
|
|
if (copy_to_user(buffer, device_buffer + *offset, len) != 0) {
|
|
return -EFAULT;
|
|
}
|
|
|
|
*offset += len;
|
|
bytes_read = len;
|
|
|
|
pr_info("%s: Sent %d bytes to user\n", DEVICE_NAME, bytes_read);
|
|
return bytes_read;
|
|
}
|
|
|
|
// Device write
|
|
static ssize_t chardev_write(struct file *filep, const char __user *buffer,
|
|
size_t len, loff_t *offset)
|
|
{
|
|
if (*offset + len > BUF_LEN) {
|
|
len = BUF_LEN - *offset;
|
|
}
|
|
|
|
if (copy_from_user(device_buffer + *offset, buffer, len) != 0) {
|
|
return -EFAULT;
|
|
}
|
|
|
|
*offset += len;
|
|
buffer_pointer = *offset;
|
|
|
|
pr_info("%s: Received %zu bytes from user\n", DEVICE_NAME, len);
|
|
return len;
|
|
}
|
|
|
|
// Device seek
|
|
static loff_t chardev_llseek(struct file *filep, loff_t offset, int orig)
|
|
{
|
|
loff_t new_pos = 0;
|
|
|
|
switch (orig) {
|
|
case 0: // SEEK_SET
|
|
new_pos = offset;
|
|
break;
|
|
case 1: // SEEK_CUR
|
|
new_pos = filep->f_pos + offset;
|
|
break;
|
|
case 2: // SEEK_END
|
|
new_pos = buffer_pointer + offset;
|
|
break;
|
|
default:
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (new_pos < 0 || new_pos > BUF_LEN) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
filep->f_pos = new_pos;
|
|
return new_pos;
|
|
}
|
|
|
|
// Module initialization
|
|
static int __init chardev_init(void)
|
|
{
|
|
int ret = 0;
|
|
|
|
pr_info("%s: Initializing\n", DEVICE_NAME);
|
|
|
|
// Allocate dynamic major number
|
|
major_number = register_chrdev(0, DEVICE_NAME, &fops);
|
|
if (major_number < 0) {
|
|
pr_err("%s: Failed to register major number\n", DEVICE_NAME);
|
|
return major_number;
|
|
}
|
|
|
|
// Create device class
|
|
chardev_class = class_create(CLASS_NAME);
|
|
if (IS_ERR(chardev_class)) {
|
|
unregister_chrdev(major_number, DEVICE_NAME);
|
|
pr_err("%s: Failed to register device class\n", DEVICE_NAME);
|
|
return PTR_ERR(chardev_class);
|
|
}
|
|
|
|
// Create device
|
|
chardev_device = device_create(chardev_class, NULL,
|
|
MKDEV(major_number, 0), NULL, DEVICE_NAME);
|
|
if (IS_ERR(chardev_device)) {
|
|
class_destroy(chardev_class);
|
|
unregister_chrdev(major_number, DEVICE_NAME);
|
|
pr_err("%s: Failed to create device\n", DEVICE_NAME);
|
|
return PTR_ERR(chardev_device);
|
|
}
|
|
|
|
mutex_init(&chardev_mutex);
|
|
pr_info("%s: Device created with major %d\n", DEVICE_NAME, major_number);
|
|
return 0;
|
|
}
|
|
|
|
// Module cleanup
|
|
static void __exit chardev_exit(void)
|
|
{
|
|
device_destroy(chardev_class, MKDEV(major_number, 0));
|
|
class_unregister(chardev_class);
|
|
class_destroy(chardev_class);
|
|
unregister_chrdev(major_number, DEVICE_NAME);
|
|
mutex_destroy(&chardev_mutex);
|
|
pr_info("%s: Exiting\n", DEVICE_NAME);
|
|
}
|
|
|
|
module_init(chardev_init);
|
|
module_exit(chardev_exit);
|
|
```
|
|
|
|
### 3. Memory Management
|
|
|
|
#### Kernel Memory Allocation Patterns
|
|
|
|
```c
|
|
// memory_management.c - Kernel Memory Management
|
|
#include <linux/module.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/vmalloc.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/gfp.h>
|
|
#include <linux/highmem.h>
|
|
|
|
// Small fixed-size allocation
|
|
static void *small_buffer;
|
|
|
|
// Large allocation example
|
|
static void *large_buffer;
|
|
|
|
// DMA-capable allocation example
|
|
static void *dma_buffer;
|
|
dma_addr_t dma_handle;
|
|
|
|
struct page *pages;
|
|
void *vaddr;
|
|
|
|
// Allocation strategies
|
|
static int allocate_memory(void)
|
|
{
|
|
// 1. Small allocations (< 128 bytes) - use kmalloc
|
|
small_buffer = kmalloc(64, GFP_KERNEL);
|
|
if (!small_buffer) {
|
|
pr_err("Failed to allocate small buffer\n");
|
|
return -ENOMEM;
|
|
}
|
|
pr_info("Allocated small buffer: %p\n", small_buffer);
|
|
|
|
// 2. Medium allocations (128 bytes - 4KB) - use kmalloc
|
|
// GFP flags:
|
|
// - GFP_KERNEL: Normal kernel allocation, can sleep
|
|
// - GFP_ATOMIC: Atomic allocation, cannot sleep (interrupt context)
|
|
// - GFP_DMA: DMA-able memory (low 16MB)
|
|
// - GFP_HIGHUSER: High memory for userspace pages
|
|
large_buffer = kmalloc(8192, GFP_KERNEL);
|
|
if (!large_buffer) {
|
|
pr_err("Failed to allocate large buffer\n");
|
|
kfree(small_buffer);
|
|
return -ENOMEM;
|
|
}
|
|
pr_info("Allocated large buffer: %p\n", large_buffer);
|
|
|
|
// 3. Very large allocations (> 128KB) - use vmalloc
|
|
// Note: vmalloc memory is not contiguous in physical memory
|
|
void *very_large = vmalloc(1024 * 1024); // 1 MB
|
|
if (!very_large) {
|
|
pr_err("Failed to allocate very large buffer\n");
|
|
kfree(small_buffer);
|
|
kfree(large_buffer);
|
|
return -ENOMEM;
|
|
}
|
|
pr_info("Allocated very large buffer: %p\n", very_large);
|
|
|
|
// 4. DMA-coherent allocation
|
|
// For device DMA, must use physically contiguous memory
|
|
dma_buffer = dma_alloc_coherent(NULL, 4096, &dma_handle, GFP_KERNEL);
|
|
if (!dma_buffer) {
|
|
pr_err("Failed to allocate DMA buffer\n");
|
|
vfree(very_large);
|
|
kfree(small_buffer);
|
|
kfree(large_buffer);
|
|
return -ENOMEM;
|
|
}
|
|
pr_info("Allocated DMA buffer: %p (phys: %pad)\n", dma_buffer, &dma_handle);
|
|
|
|
// 5. Page-based allocation
|
|
pages = alloc_pages(GFP_KERNEL, 2); // 2^2 = 4 pages
|
|
if (!pages) {
|
|
pr_err("Failed to allocate pages\n");
|
|
dma_free_coherent(NULL, 4096, dma_buffer, dma_handle);
|
|
vfree(very_large);
|
|
kfree(small_buffer);
|
|
kfree(large_buffer);
|
|
return -ENOMEM;
|
|
}
|
|
vaddr = page_address(pages);
|
|
pr_info("Allocated pages: %p\n", vaddr);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void free_memory(void)
|
|
{
|
|
// Free in reverse order
|
|
if (pages) {
|
|
__free_pages(pages, 2);
|
|
}
|
|
|
|
if (dma_buffer) {
|
|
dma_free_coherent(NULL, 4096, dma_buffer, dma_handle);
|
|
}
|
|
|
|
if (large_buffer) {
|
|
kfree(large_buffer);
|
|
}
|
|
|
|
if (small_buffer) {
|
|
kfree(small_buffer);
|
|
}
|
|
}
|
|
|
|
// Memory copy example
|
|
static void memory_copy_example(void)
|
|
{
|
|
char *src, *dst;
|
|
size_t size = 1024;
|
|
|
|
src = kmalloc(size, GFP_KERNEL);
|
|
dst = kmalloc(size, GFP_KERNEL);
|
|
|
|
if (src && dst) {
|
|
// Kernel-to-kernel copy
|
|
memcpy(dst, src, size);
|
|
|
|
// User-kernel copy (in read/write operations)
|
|
// copy_from_user(to, from, n);
|
|
// copy_to_user(to, from, n);
|
|
|
|
// Optimized copy for large data
|
|
// __memcpy(dst, src, size);
|
|
|
|
kfree(src);
|
|
kfree(dst);
|
|
}
|
|
}
|
|
|
|
// Memory mapping example (mmap support)
|
|
static void *mmap_buffer;
|
|
static size_t mmap_size = PAGE_SIZE;
|
|
|
|
static int mmap_allocate(void)
|
|
{
|
|
// Allocate memory that will be mapped to userspace
|
|
mmap_buffer = kmalloc(mmap_size, GFP_KERNEL);
|
|
if (!mmap_buffer) {
|
|
return -ENOMEM;
|
|
}
|
|
memset(mmap_buffer, 0, mmap_size);
|
|
return 0;
|
|
}
|
|
|
|
static void mmap_free(void)
|
|
{
|
|
kfree(mmap_buffer);
|
|
}
|
|
|
|
static int chardev_mmap(struct file *filp, struct vm_area_struct *vma)
|
|
{
|
|
unsigned long size = vma->vm_end - vma->vm_start;
|
|
|
|
if (size > mmap_size) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
// Set page attributes
|
|
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
|
|
|
|
// Map memory to userspace
|
|
if (remap_pfn_range(vma, vma->vm_start,
|
|
virt_to_phys(mmap_buffer) >> PAGE_SHIFT,
|
|
size, vma->vm_page_prot)) {
|
|
return -EAGAIN;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
```
|
|
|
|
### 4. Synchronization and Concurrency
|
|
|
|
#### Synchronization Primitives
|
|
|
|
```c
|
|
// synchronization.c - Kernel Synchronization
|
|
#include <linux/module.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/spinlock.h>
|
|
#include <linux/mutex.h>
|
|
#include <linux/rwlock.h>
|
|
#include <linux/semaphore.h>
|
|
#include <linux/completion.h>
|
|
#include <linux/atomic.h>
|
|
#include <linux/wait.h>
|
|
|
|
// 1. Spinlock - for short critical sections, cannot sleep
|
|
static DEFINE_SPINLOCK(my_spinlock);
|
|
|
|
void spinlock_example(void)
|
|
{
|
|
unsigned long flags;
|
|
|
|
// Spinlock variant: can be used in interrupt context
|
|
spin_lock_irqsave(&my_spinlock, flags);
|
|
// Critical section - cannot sleep, no blocking calls
|
|
pr_info("In spinlock critical section\n");
|
|
spin_unlock_irqrestore(&my_spinlock, flags);
|
|
|
|
// Regular spinlock (not in interrupt context)
|
|
spin_lock(&my_spinlock);
|
|
// Critical section
|
|
spin_unlock(&my_spinlock);
|
|
|
|
// Try lock - non-blocking
|
|
if (spin_trylock(&my_spinlock)) {
|
|
// Got the lock
|
|
spin_unlock(&my_spinlock);
|
|
}
|
|
}
|
|
|
|
// 2. Mutex - for longer critical sections, can sleep
|
|
static DEFINE_MUTEX(my_mutex);
|
|
|
|
void mutex_example(void)
|
|
{
|
|
// Lock mutex - can sleep
|
|
mutex_lock(&my_mutex);
|
|
// Critical section - can sleep, blocking calls allowed
|
|
msleep(100); // OK to sleep
|
|
mutex_unlock(&my_mutex);
|
|
|
|
// Try lock - non-blocking
|
|
if (mutex_trylock(&my_mutex)) {
|
|
// Got the lock
|
|
mutex_unlock(&my_mutex);
|
|
}
|
|
}
|
|
|
|
// 3. Read-Write Semaphore
|
|
static DECLARE_RWSEM(rwsem);
|
|
|
|
void rwsem_example(void)
|
|
{
|
|
// Reader lock - multiple readers allowed
|
|
down_read(&rwsem);
|
|
// Read critical section
|
|
up_read(&rwsem);
|
|
|
|
// Writer lock - exclusive access
|
|
down_write(&rwsem);
|
|
// Write critical section
|
|
up_write(&rwsem);
|
|
|
|
// Try variants
|
|
if (down_read_trylock(&rwsem)) {
|
|
up_read(&rwsem);
|
|
}
|
|
|
|
if (down_write_trylock(&rwsem)) {
|
|
up_write(&rwsem);
|
|
}
|
|
}
|
|
|
|
// 4. Atomic Operations - lock-free synchronization
|
|
static atomic_t counter = ATOMIC_INIT(0);
|
|
|
|
void atomic_example(void)
|
|
{
|
|
int old, new;
|
|
|
|
// Atomic increment
|
|
atomic_inc(&counter);
|
|
|
|
// Atomic decrement
|
|
atomic_dec(&counter);
|
|
|
|
// Atomic add
|
|
atomic_add(5, &counter);
|
|
|
|
// Atomic read
|
|
old = atomic_read(&counter);
|
|
|
|
// Atomic exchange
|
|
new = atomic_xchg(&counter, 100);
|
|
|
|
// Atomic compare and exchange
|
|
old = atomic_read(&counter);
|
|
atomic_cmpxchg(&counter, old, 200);
|
|
|
|
// Atomic add and return old value
|
|
old = atomic_fetch_add(&counter, 10);
|
|
}
|
|
|
|
// 5. Completion - one-to-one synchronization
|
|
static struct completion my_completion;
|
|
|
|
static int wait_thread(void *data)
|
|
{
|
|
pr_info("Worker: Waiting for signal\n");
|
|
wait_for_completion(&my_completion);
|
|
pr_info("Worker: Received signal, proceeding\n");
|
|
return 0;
|
|
}
|
|
|
|
void completion_example(void)
|
|
{
|
|
init_completion(&my_completion);
|
|
|
|
// Start thread that waits
|
|
// kthread_run(wait_thread, NULL, "waiter");
|
|
|
|
msleep(1000);
|
|
|
|
// Signal completion
|
|
complete(&my_completion);
|
|
}
|
|
|
|
// 6. Wait Queue - producer-consumer pattern
|
|
static DECLARE_WAIT_QUEUE_HEAD(my_wait_queue);
|
|
static int data_ready = 0;
|
|
|
|
void wait_queue_example(void)
|
|
{
|
|
// Consumer - wait for data
|
|
wait_event_interruptible(my_wait_queue, data_ready != 0);
|
|
pr_info("Consumer: Data is ready\n");
|
|
data_ready = 0;
|
|
|
|
// Producer - signal data ready
|
|
data_ready = 1;
|
|
wake_up_interruptible(&my_wait_queue);
|
|
}
|
|
|
|
// 7. RCU (Read-Copy-Update) - read-mostly data structures
|
|
#include <linux/rcupdate.h>
|
|
|
|
struct rcu_data {
|
|
int value;
|
|
struct rcu_head rcu;
|
|
};
|
|
|
|
static struct rcu_data *global_data;
|
|
|
|
static void rcu_reclaim(struct rcu_head *rp)
|
|
{
|
|
struct rcu_data *data = container_of(rp, struct rcu_data, rcu);
|
|
kfree(data);
|
|
}
|
|
|
|
void rcu_example(void)
|
|
{
|
|
struct rcu_data *new_data, *old_data;
|
|
|
|
// Reader side - very fast, no locks
|
|
rcu_read_lock();
|
|
if (global_data) {
|
|
pr_info("RCU data: %d\n", global_data->value);
|
|
}
|
|
rcu_read_unlock();
|
|
|
|
// Writer side - update with grace period
|
|
new_data = kmalloc(sizeof(*new_data), GFP_KERNEL);
|
|
if (new_data) {
|
|
new_data->value = 42;
|
|
|
|
// Swap pointers atomically
|
|
old_data = global_data;
|
|
rcu_assign_pointer(global_data, new_data);
|
|
|
|
// Wait for readers to finish
|
|
call_rcu(&old_data->rcu, rcu_reclaim);
|
|
}
|
|
}
|
|
|
|
// 8. Seqlock - for data with many readers and few writers
|
|
static DEFINE_SEQLOCK(seqlock_data);
|
|
static unsigned long seqlock_timestamp;
|
|
|
|
void seqlock_example(void)
|
|
{
|
|
unsigned int seq;
|
|
unsigned long timestamp;
|
|
|
|
// Reader
|
|
do {
|
|
seq = read_seqbegin(&seqlock_data);
|
|
timestamp = seqlock_timestamp;
|
|
} while (read_seqretry(&seqlock_data, seq));
|
|
|
|
// Writer
|
|
write_seqlock(&seqlock_data);
|
|
seqlock_timestamp = jiffies;
|
|
write_sequnlock(&seqlock_data);
|
|
}
|
|
|
|
// Per-CPU variables - avoid locking entirely
|
|
#include <linux/percpu.h>
|
|
|
|
static DEFINE_PER_CPU(unsigned long, per_cpu_counter);
|
|
|
|
void percpu_example(void)
|
|
{
|
|
unsigned long *counter;
|
|
unsigned long sum = 0;
|
|
int cpu;
|
|
|
|
// Access per-CPU variable
|
|
preempt_disable(); // Prevent CPU migration
|
|
counter = this_cpu_ptr(&per_cpu_counter);
|
|
(*counter)++;
|
|
preempt_enable();
|
|
|
|
// Sum all per-CPU counters
|
|
for_each_possible_cpu(cpu) {
|
|
counter = per_cpu_ptr(&per_cpu_counter, cpu);
|
|
sum += *counter;
|
|
}
|
|
|
|
pr_info("Total count: %lu\n", sum);
|
|
}
|
|
```
|
|
|
|
### 5. Interrupt Handling
|
|
|
|
#### Interrupt Handler Implementation
|
|
|
|
```c
|
|
// interrupt_handler.c - Interrupt Handling
|
|
#include <linux/module.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/interrupt.h>
|
|
#include <linux/gpio.h>
|
|
#include <linux/irq.h>
|
|
#include <linux/delay.h>
|
|
|
|
#define IRQ_NUMBER 42 // Example IRQ number
|
|
|
|
// Shared data between interrupt handler and process context
|
|
static atomic_t irq_count = ATOMIC_INIT(0);
|
|
static DECLARE_WAIT_QUEUE_HEAD(irq_wait_queue);
|
|
|
|
// Top-half: Interrupt Service Routine (ISR)
|
|
static irqreturn_t my_isr(int irq, void *dev_id)
|
|
{
|
|
atomic_inc(&irq_count);
|
|
wake_up_interruptible(&irq_wait_queue);
|
|
|
|
pr_info("IRQ %d handled\n", irq);
|
|
|
|
// Return IRQ_HANDLED if we handled this interrupt
|
|
// Return IRQ_NONE if we didn't handle it
|
|
return IRQ_HANDLED;
|
|
}
|
|
|
|
// Alternative: Threaded interrupt handler
|
|
static irqreturn_t my_isr_thread(int irq, void *dev_id)
|
|
{
|
|
pr_info("Threaded IRQ handler running\n");
|
|
msleep(100); // Can sleep in threaded handler
|
|
|
|
return IRQ_HANDLED;
|
|
}
|
|
|
|
// Request interrupt
|
|
static int request_interrupt(void)
|
|
{
|
|
int ret;
|
|
|
|
// Request standard IRQ
|
|
ret = request_irq(IRQ_NUMBER,
|
|
my_isr,
|
|
IRQF_TRIGGER_RISING | IRQF_SHARED,
|
|
"my_interrupt",
|
|
(void *)my_isr);
|
|
if (ret) {
|
|
pr_err("Failed to request IRQ %d\n", IRQ_NUMBER);
|
|
return ret;
|
|
}
|
|
|
|
// Alternative: Request threaded IRQ
|
|
// ret = request_threaded_irq(IRQ_NUMBER,
|
|
// my_isr, // Top half
|
|
// my_isr_thread, // Bottom half (thread)
|
|
// IRQF_TRIGGER_RISING | IRQF_ONESHOT,
|
|
// "my_interrupt",
|
|
// (void *)my_isr);
|
|
|
|
return 0;
|
|
}
|
|
|
|
// Free interrupt
|
|
static void free_interrupt(void)
|
|
{
|
|
free_irq(IRQ_NUMBER, (void *)my_isr);
|
|
}
|
|
|
|
// Bottom half: Softirq (not commonly used directly)
|
|
static void softirq_handler(struct softirq_action *h)
|
|
{
|
|
pr_info("Softirq handler\n");
|
|
}
|
|
|
|
// Bottom half: Tasklet
|
|
static void my_tasklet_handler(unsigned long data);
|
|
static DECLARE_TASKLET(my_tasklet, my_tasklet_handler, 0);
|
|
|
|
static void my_tasklet_handler(unsigned long data)
|
|
{
|
|
pr_info("Tasklet handler\n");
|
|
}
|
|
|
|
// Schedule tasklet
|
|
static void schedule_tasklet(void)
|
|
{
|
|
tasklet_schedule(&my_tasklet);
|
|
}
|
|
|
|
// Bottom half: Workqueue
|
|
#include <linux/workqueue.h>
|
|
|
|
static struct workqueue_struct *my_workqueue;
|
|
static void my_work_handler(struct work_struct *work);
|
|
|
|
static DECLARE_WORK(my_work, my_work_handler);
|
|
static DECLARE_DELAYED_WORK(my_delayed_work, my_work_handler);
|
|
|
|
static void my_work_handler(struct work_struct *work)
|
|
{
|
|
pr_info("Workqueue handler\n");
|
|
// Can sleep here
|
|
msleep(100);
|
|
}
|
|
|
|
// Initialize workqueue
|
|
static int init_workqueue(void)
|
|
{
|
|
// Create dedicated workqueue
|
|
my_workqueue = create_singlethread_workqueue("my_workqueue");
|
|
if (!my_workqueue) {
|
|
return -ENOMEM;
|
|
}
|
|
|
|
// Queue work on system workqueue
|
|
schedule_work(&my_work);
|
|
|
|
// Queue delayed work
|
|
schedule_delayed_work(&my_delayed_work, msecs_to_jiffies(1000));
|
|
|
|
// Queue work on dedicated workqueue
|
|
// queue_work(my_workqueue, &my_work);
|
|
|
|
return 0;
|
|
}
|
|
|
|
// Cleanup workqueue
|
|
static void cleanup_workqueue(void)
|
|
{
|
|
// Wait for work to complete
|
|
cancel_work_sync(&my_work);
|
|
cancel_delayed_work_sync(&my_delayed_work);
|
|
|
|
// Destroy workqueue
|
|
if (my_workqueue) {
|
|
destroy_workqueue(my_workqueue);
|
|
}
|
|
}
|
|
|
|
// GPIO Interrupt example
|
|
#define GPIO_PIN 17
|
|
|
|
static irqreturn_t gpio_isr(int irq, void *data)
|
|
{
|
|
int state = gpio_get_value(GPIO_PIN);
|
|
pr_info("GPIO %d changed to %d\n", GPIO_PIN, state);
|
|
return IRQ_HANDLED;
|
|
}
|
|
|
|
static int setup_gpio_interrupt(void)
|
|
{
|
|
int ret, irq;
|
|
|
|
// Request GPIO
|
|
ret = gpio_request(GPIO_PIN, "my_gpio");
|
|
if (ret) {
|
|
pr_err("Failed to request GPIO %d\n", GPIO_PIN);
|
|
return ret;
|
|
}
|
|
|
|
// Set direction
|
|
gpio_direction_input(GPIO_PIN);
|
|
|
|
// Get IRQ number for GPIO
|
|
irq = gpio_to_irq(GPIO_PIN);
|
|
|
|
// Request IRQ
|
|
ret = request_irq(irq, gpio_isr,
|
|
IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
|
|
"gpio_irq", NULL);
|
|
if (ret) {
|
|
gpio_free(GPIO_PIN);
|
|
return ret;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void cleanup_gpio_interrupt(void)
|
|
{
|
|
int irq = gpio_to_irq(GPIO_PIN);
|
|
free_irq(irq, NULL);
|
|
gpio_free(GPIO_PIN);
|
|
}
|
|
```
|
|
|
|
### 6. Kernel Debugging
|
|
|
|
#### Ftrace Usage
|
|
|
|
```bash
|
|
#!/bin/bash
|
|
# Kernel Tracing with Ftrace
|
|
|
|
# Enable ftrace
|
|
echo 1 > /proc/sys/kernel/ftrace_enabled
|
|
|
|
# Available tracers
|
|
cat /sys/kernel/debug/tracing/available_tracers
|
|
|
|
# Use function tracer
|
|
echo function > /sys/kernel/debug/tracing/current_tracer
|
|
|
|
# Filter functions
|
|
echo '*sched*' > /sys/kernel/debug/tracing/set_ftrace_filter
|
|
|
|
# View trace
|
|
cat /sys/kernel/debug/tracing/trace
|
|
|
|
# Clear trace
|
|
echo > /sys/kernel/debug/tracing/trace
|
|
|
|
# Function graph tracer
|
|
echo function_graph > /sys/kernel/debug/tracing/current_tracer
|
|
|
|
# Set graph depth
|
|
echo 3 > /sys/kernel/debug/tracing/max_graph_depth
|
|
|
|
# View graph trace
|
|
cat /sys/kernel/debug/tracing/trace_graph
|
|
|
|
# Trace specific function
|
|
echo do_sys_open > /sys/kernel/debug/tracing/set_ftrace_pid
|
|
```
|
|
|
|
#### Kernel Code with Tracepoints
|
|
|
|
```c
|
|
// tracing_example.c - Kernel Tracing
|
|
#include <linux/module.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/ftrace.h>
|
|
|
|
// Tracepoint example
|
|
#define CREATE_TRACE_POINTS
|
|
#include <trace/events/sample.h>
|
|
|
|
// Trace events
|
|
static void trace_events_example(void)
|
|
{
|
|
int value = 42;
|
|
|
|
// Custom trace event
|
|
trace_sample_event(value, "example data");
|
|
|
|
pr_info("Generated trace event\n");
|
|
}
|
|
|
|
// Using trace_printk() (for debugging only)
|
|
static void trace_printk_example(void)
|
|
{
|
|
// Only works when tracing is enabled
|
|
trace_printk("Debug message: value=%d\n", 42);
|
|
}
|
|
|
|
// Function tracing
|
|
static noinline void traced_function(void)
|
|
{
|
|
pr_info("This function will be traced\n");
|
|
}
|
|
|
|
// Performance counters
|
|
static void perf_counter_example(void)
|
|
{
|
|
// Use perf events to count occurrences
|
|
trace_printk("Performance marker\n");
|
|
}
|
|
```
|
|
|
|
#### eBPF Programs
|
|
|
|
```c
|
|
// eBPF program example (requires bpf() syscall)
|
|
// This would be loaded via bpftool or libbpf
|
|
// Example: Socket filter eBPF program
|
|
|
|
#include <linux/bpf.h>
|
|
#include <linux/pkt_cls.h>
|
|
|
|
// BPF program for packet counting
|
|
SEC("socket")
|
|
int bpf_prog1(struct __sk_buff *skb)
|
|
{
|
|
void *data_end = (void *)(long)skb->data_end;
|
|
void *data = (void *)(long)skb->data;
|
|
__u32 nh_off;
|
|
|
|
nh_off = sizeof(struct ethhdr);
|
|
|
|
if (data + nh_off > data_end)
|
|
return 0;
|
|
|
|
// Count packets
|
|
__u64 *counter = bpf_map_lookup_elem(&my_map, &key);
|
|
if (counter)
|
|
__sync_fetch_and_add(counter, 1);
|
|
|
|
return 0;
|
|
}
|
|
|
|
// Maps definition
|
|
struct bpf_map_def SEC("maps") my_map = {
|
|
.type = BPF_MAP_TYPE_ARRAY,
|
|
.key_size = sizeof(__u32),
|
|
.value_size = sizeof(__u64),
|
|
.max_entries = 256,
|
|
};
|
|
|
|
char _license[] SEC("license") = "GPL";
|
|
```
|
|
|
|
### 7. Kernel Security
|
|
|
|
#### Linux Security Module (LSM) Hook
|
|
|
|
```c
|
|
// lsm_example.c - Simple LSM Hook
|
|
#include <linux/module.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/security.h>
|
|
#include <linux/binfmts.h>
|
|
|
|
// Security hook for file execution
|
|
static int my_security_bprm_check(struct linux_binprm *bprm)
|
|
{
|
|
const char *filename = bprm->filename;
|
|
|
|
pr_info("LSM: Executing %s\n", filename);
|
|
|
|
// Security check logic here
|
|
// Return 0 to allow, negative error to deny
|
|
|
|
return 0;
|
|
}
|
|
|
|
// Security operations list
|
|
static struct security_hook_list my_hooks[] = {
|
|
LSM_HOOK_INIT(bprm_check, my_security_bprm_check),
|
|
};
|
|
|
|
// Initialize LSM
|
|
static int __init my_lsm_init(void)
|
|
{
|
|
pr_info("LSM: Initializing\n");
|
|
security_add_hooks(my_hooks, ARRAY_SIZE(my_hooks), "my_lsm");
|
|
return 0;
|
|
}
|
|
|
|
// Cleanup LSM
|
|
static void __exit my_lsm_exit(void)
|
|
{
|
|
pr_info("LSM: Exiting\n");
|
|
}
|
|
|
|
module_init(my_lsm_init);
|
|
module_exit(my_lsm_exit);
|
|
|
|
MODULE_LICENSE("GPL");
|
|
```
|
|
|
|
### 8. Network Programming
|
|
|
|
#### Kernel Socket Programming
|
|
|
|
```c
|
|
// kernel_socket.c - Kernel Network Programming
|
|
#include <linux/module.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/net.h>
|
|
#include <net/sock.h>
|
|
#include <linux/tcp.h>
|
|
#include <linux/in.h>
|
|
#include <asm/uaccess.h>
|
|
#include <linux/socket.h>
|
|
|
|
// Create socket in kernel
|
|
static int create_kernel_socket(void)
|
|
{
|
|
struct socket *sock;
|
|
struct sockaddr_in addr;
|
|
int ret;
|
|
|
|
// Create TCP socket
|
|
ret = sock_create_kern(&init_net, AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
|
|
if (ret < 0) {
|
|
pr_err("Failed to create socket: %d\n", ret);
|
|
return ret;
|
|
}
|
|
|
|
// Set address
|
|
memset(&addr, 0, sizeof(addr));
|
|
addr.sin_family = AF_INET;
|
|
addr.sin_port = htons(8080);
|
|
addr.sin_addr.s_addr = htonl(INADDR_ANY);
|
|
|
|
// Bind socket
|
|
ret = kernel_bind(sock, (struct sockaddr *)&addr, sizeof(addr));
|
|
if (ret < 0) {
|
|
pr_err("Failed to bind socket: %d\n", ret);
|
|
sock_release(sock);
|
|
return ret;
|
|
}
|
|
|
|
// Listen
|
|
ret = kernel_listen(sock, 10);
|
|
if (ret < 0) {
|
|
pr_err("Failed to listen: %d\n", ret);
|
|
sock_release(sock);
|
|
return ret;
|
|
}
|
|
|
|
pr_info("Socket created and bound\n");
|
|
return 0;
|
|
}
|
|
|
|
// Send data from kernel
|
|
static int send_kernel_data(struct socket *sock, const char *data, size_t len)
|
|
{
|
|
struct msghdr msg;
|
|
struct kvec iov;
|
|
int ret;
|
|
|
|
memset(&msg, 0, sizeof(msg));
|
|
iov.iov_base = (void *)data;
|
|
iov.iov_len = len;
|
|
|
|
ret = kernel_sendmsg(sock, &msg, &iov, 1, len);
|
|
if (ret < 0) {
|
|
pr_err("Failed to send data: %d\n", ret);
|
|
return ret;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
// Receive data in kernel
|
|
static int recv_kernel_data(struct socket *sock, char *data, size_t len)
|
|
{
|
|
struct msghdr msg;
|
|
struct kvec iov;
|
|
int ret;
|
|
|
|
memset(&msg, 0, sizeof(msg));
|
|
iov.iov_base = data;
|
|
iov.iov_len = len;
|
|
|
|
ret = kernel_recvmsg(sock, &msg, &iov, 1, len, 0);
|
|
if (ret < 0) {
|
|
pr_err("Failed to receive data: %d\n", ret);
|
|
return ret;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
// Netfilter hook example
|
|
#include <linux/netfilter.h>
|
|
#include <linux/netfilter_ipv4.h>
|
|
#include <linux/ip.h>
|
|
|
|
static unsigned int nf_hook_func(void *priv, struct sk_buff *skb,
|
|
const struct nf_hook_state *state)
|
|
{
|
|
struct iphdr *iph;
|
|
|
|
if (!skb)
|
|
return NF_ACCEPT;
|
|
|
|
iph = ip_hdr(skb);
|
|
pr_info("Packet: protocol=%d, saddr=%pI4, daddr=%pI4\n",
|
|
iph->protocol, &iph->saddr, &iph->daddr);
|
|
|
|
return NF_ACCEPT;
|
|
}
|
|
|
|
static struct nf_hook_ops nf_hook_ops = {
|
|
.hook = nf_hook_func,
|
|
.pf = NFPROTO_IPV4,
|
|
.hooknum = NF_INET_PRE_ROUTING,
|
|
.priority = NF_IP_PRI_FIRST,
|
|
};
|
|
|
|
static int __init netfilter_init(void)
|
|
{
|
|
return nf_register_net_hook(&init_net, &nf_hook_ops);
|
|
}
|
|
|
|
static void __exit netfilter_exit(void)
|
|
{
|
|
nf_unregister_net_hook(&init_net, &nf_hook_ops);
|
|
}
|
|
```
|
|
|
|
### 9. Decision Trees
|
|
|
|
#### Synchronization Primitive Selection
|
|
|
|
```
|
|
Critical section characteristics?
|
|
│
|
|
├─ Very short (< microsecond) → Spinlock
|
|
├─ Can sleep, exclusive access → Mutex
|
|
├─ Many readers, few writers → RCU or Seqlock
|
|
├─ Readers and writers → RW Semaphore
|
|
├─ Simple signal → Completion
|
|
└─ Producer-consumer → Wait queue
|
|
```
|
|
|
|
#### Memory Allocation Strategy
|
|
|
|
```
|
|
Allocation size and context?
|
|
│
|
|
├─ < 128 bytes → kmalloc with appropriate GFP flags
|
|
├─ 128 bytes - 128 KB → kmalloc
|
|
├─ > 128 KB → vmalloc (or alloc_pages if contiguous)
|
|
├─ DMA required → dma_alloc_coherent
|
|
├─ Highmem → __get_free_pages with GFP_HIGHUSER
|
|
└─ Interrupt context → GFP_ATOMIC (no sleep)
|
|
```
|
|
|
|
### 10. Anti-Patterns to Avoid
|
|
|
|
1. **Sleeping in atomic context**: Never use sleeping functions while holding spinlock
|
|
2. **Race conditions**: Always use proper synchronization
|
|
3. **Memory leaks**: Track and free all allocations
|
|
4. **Use after free**: Be careful with RCU and freeing
|
|
5. **Integer overflow**: Check arithmetic operations
|
|
6. **Buffer overflows**: Validate all user input
|
|
7. **Deadlocks**: Acquire locks in consistent order
|
|
8. **Priority inversion**: Use proper priority inheritance
|
|
9. **Ignoring return values**: Always check error codes
|
|
10. **Missing module_put**: Match get/put operations
|
|
|
|
### 11. Quality Checklist
|
|
|
|
Before considering kernel code production-ready:
|
|
|
|
- [ ] All error paths properly handled
|
|
- [ ] Memory allocations checked for failure
|
|
- [ ] Synchronization primitives correctly used
|
|
- [ ] No sleeping in atomic context
|
|
- [ ] No use of deprecated APIs
|
|
- [ ] Module metadata complete
|
|
- [ ] Coding style follows kernel standards
|
|
- [ ] Sparse checking passes
|
|
- [ ] Tested with lockdep
|
|
- [ ] Memory leak testing performed
|
|
- [ ] Performance testing completed
|
|
- [ ] Security review conducted
|
|
- [ ] Documentation complete
|
|
- [ ] Backward compatibility considered
|
|
- [ ] API stability maintained
|
|
- [ ] Tested on multiple architectures
|
|
- [ ] Kernel version compatibility verified
|
|
- [ ] Static analysis performed
|
|
- [ ] Stress testing completed
|
|
- [ ] Integration testing done
|
|
|
|
This comprehensive skill definition provides complete guidance for kernel engineering across Linux environments.
|