# Kernel Engineering Expert Skill ## Activation Criteria Activate this skill when the user: - Requests kernel module development or loading - Needs device driver implementation (char, block, network) - Asks for kernel debugging techniques (ftrace, perf, eBPF) - Requires memory management optimization - Needs synchronization primitives (spinlocks, mutexes, RCU) - Asks for filesystem development or VFS integration - Requires network stack programming - Needs interrupt handling or bottom half implementation - Asks for kernel security mechanisms (LSM, SELinux) - Requires kernel performance optimization - Needs kernel-space / user-space communication - Is working on: embedded systems, high-performance computing, custom hardware drivers, real-time systems ## Core Methodology ### 1. Kernel Module Development #### Basic Module Structure ```c // hello_kernel.c - Basic Kernel Module #include #include #include #define MODULE_NAME "hello_kernel" #define MODULE_LICENSE "GPL" #define MODULE_AUTHOR "Kernel Engineer" #define MODULE_DESCRIPTION "A simple kernel module" #define MODULE_VERSION "1.0" // Module parameters static int debug_level = 0; module_param(debug_level, int, 0644); MODULE_PARM_DESC(debug_level, "Debug level (0-3)"); static char *device_name = "kernel0"; module_param(device_name, charp, 0644); MODULE_PARM_DESC(device_name, "Device name"); // Module initialization static int __init hello_init(void) { pr_info("%s: Module loaded\n", MODULE_NAME); pr_info("%s: Device name: %s, debug level: %d\n", MODULE_NAME, device_name, debug_level); return 0; } // Module cleanup static void __exit hello_exit(void) { pr_info("%s: Module unloaded\n", MODULE_NAME); } module_init(hello_init); module_exit(hello_exit); MODULE_INFO(intree, "Y"); MODULE_LICENSE(MODULE_LICENSE); MODULE_AUTHOR(MODULE_AUTHOR); MODULE_DESCRIPTION(MODULE_DESCRIPTION); MODULE_VERSION(MODULE_VERSION); ``` #### Makefile for Kernel Modules ```makefile # Makefile for kernel module obj-m += hello_kernel.o # Kernel build directory KDIR := /lib/modules/$(shell uname -r)/build # Current directory PWD := $(shell pwd) # Default target all: $(MAKE) -C $(KDIR) M=$(PWD) modules # Clean build artifacts clean: $(MAKE) -C $(KDIR) M=$(PWD) clean rm -f Module.symvers modules.order # Load module load: insmod hello_kernel.ko dmesg | tail -10 # Unload module unload: rmmod hello_kernel dmesg | tail -10 # Reload module reload: unload load # Show module info info: modinfo hello_kernel.ko .PHONY: all clean load unload reload info ``` ### 2. Character Device Driver #### Complete Character Driver Implementation ```c // char_dev.c - Character Device Driver #include #include #include #include #include #include #include #include #define DEVICE_NAME "chardev" #define CLASS_NAME "chardev_class" #define BUF_LEN 1024 MODULE_LICENSE("GPL"); MODULE_AUTHOR("Kernel Engineer"); MODULE_DESCRIPTION("Character device driver"); MODULE_VERSION("1.0"); // Global variables static int major_number; static struct class *chardev_class = NULL; static struct device *chardev_device = NULL; static struct cdev chardev_cdev; static char device_buffer[BUF_LEN]; static int buffer_pointer = 0; static DEFINE_MUTEX(chardev_mutex); // Function prototypes static int chardev_open(struct inode *, struct file *); static int chardev_release(struct inode *, struct file *); static ssize_t chardev_read(struct file *, char __user *, size_t, loff_t *); static ssize_t chardev_write(struct file *, const char __user *, size_t, loff_t *); static loff_t chardev_llseek(struct file *, loff_t, int); // File operations structure static const struct file_operations fops = { .owner = THIS_MODULE, .open = chardev_open, .release = chardev_release, .read = chardev_read, .write = chardev_write, .llseek = chardev_llseek, }; // Device open static int chardev_open(struct inode *inodep, struct file *filep) { if (!mutex_trylock(&chardev_mutex)) { pr_alert("%s: Device busy\n", DEVICE_NAME); return -EBUSY; } buffer_pointer = 0; pr_info("%s: Device opened\n", DEVICE_NAME); return 0; } // Device release static int chardev_release(struct inode *inodep, struct file *filep) { mutex_unlock(&chardev_mutex); pr_info("%s: Device closed\n", DEVICE_NAME); return 0; } // Device read static ssize_t chardev_read(struct file *filep, char __user *buffer, size_t len, loff_t *offset) { int bytes_read = 0; if (*offset >= buffer_pointer) { return 0; // EOF } if (*offset + len > BUF_LEN) { len = BUF_LEN - *offset; } if (copy_to_user(buffer, device_buffer + *offset, len) != 0) { return -EFAULT; } *offset += len; bytes_read = len; pr_info("%s: Sent %d bytes to user\n", DEVICE_NAME, bytes_read); return bytes_read; } // Device write static ssize_t chardev_write(struct file *filep, const char __user *buffer, size_t len, loff_t *offset) { if (*offset + len > BUF_LEN) { len = BUF_LEN - *offset; } if (copy_from_user(device_buffer + *offset, buffer, len) != 0) { return -EFAULT; } *offset += len; buffer_pointer = *offset; pr_info("%s: Received %zu bytes from user\n", DEVICE_NAME, len); return len; } // Device seek static loff_t chardev_llseek(struct file *filep, loff_t offset, int orig) { loff_t new_pos = 0; switch (orig) { case 0: // SEEK_SET new_pos = offset; break; case 1: // SEEK_CUR new_pos = filep->f_pos + offset; break; case 2: // SEEK_END new_pos = buffer_pointer + offset; break; default: return -EINVAL; } if (new_pos < 0 || new_pos > BUF_LEN) { return -EINVAL; } filep->f_pos = new_pos; return new_pos; } // Module initialization static int __init chardev_init(void) { int ret = 0; pr_info("%s: Initializing\n", DEVICE_NAME); // Allocate dynamic major number major_number = register_chrdev(0, DEVICE_NAME, &fops); if (major_number < 0) { pr_err("%s: Failed to register major number\n", DEVICE_NAME); return major_number; } // Create device class chardev_class = class_create(CLASS_NAME); if (IS_ERR(chardev_class)) { unregister_chrdev(major_number, DEVICE_NAME); pr_err("%s: Failed to register device class\n", DEVICE_NAME); return PTR_ERR(chardev_class); } // Create device chardev_device = device_create(chardev_class, NULL, MKDEV(major_number, 0), NULL, DEVICE_NAME); if (IS_ERR(chardev_device)) { class_destroy(chardev_class); unregister_chrdev(major_number, DEVICE_NAME); pr_err("%s: Failed to create device\n", DEVICE_NAME); return PTR_ERR(chardev_device); } mutex_init(&chardev_mutex); pr_info("%s: Device created with major %d\n", DEVICE_NAME, major_number); return 0; } // Module cleanup static void __exit chardev_exit(void) { device_destroy(chardev_class, MKDEV(major_number, 0)); class_unregister(chardev_class); class_destroy(chardev_class); unregister_chrdev(major_number, DEVICE_NAME); mutex_destroy(&chardev_mutex); pr_info("%s: Exiting\n", DEVICE_NAME); } module_init(chardev_init); module_exit(chardev_exit); ``` ### 3. Memory Management #### Kernel Memory Allocation Patterns ```c // memory_management.c - Kernel Memory Management #include #include #include #include #include #include #include // Small fixed-size allocation static void *small_buffer; // Large allocation example static void *large_buffer; // DMA-capable allocation example static void *dma_buffer; dma_addr_t dma_handle; struct page *pages; void *vaddr; // Allocation strategies static int allocate_memory(void) { // 1. Small allocations (< 128 bytes) - use kmalloc small_buffer = kmalloc(64, GFP_KERNEL); if (!small_buffer) { pr_err("Failed to allocate small buffer\n"); return -ENOMEM; } pr_info("Allocated small buffer: %p\n", small_buffer); // 2. Medium allocations (128 bytes - 4KB) - use kmalloc // GFP flags: // - GFP_KERNEL: Normal kernel allocation, can sleep // - GFP_ATOMIC: Atomic allocation, cannot sleep (interrupt context) // - GFP_DMA: DMA-able memory (low 16MB) // - GFP_HIGHUSER: High memory for userspace pages large_buffer = kmalloc(8192, GFP_KERNEL); if (!large_buffer) { pr_err("Failed to allocate large buffer\n"); kfree(small_buffer); return -ENOMEM; } pr_info("Allocated large buffer: %p\n", large_buffer); // 3. Very large allocations (> 128KB) - use vmalloc // Note: vmalloc memory is not contiguous in physical memory void *very_large = vmalloc(1024 * 1024); // 1 MB if (!very_large) { pr_err("Failed to allocate very large buffer\n"); kfree(small_buffer); kfree(large_buffer); return -ENOMEM; } pr_info("Allocated very large buffer: %p\n", very_large); // 4. DMA-coherent allocation // For device DMA, must use physically contiguous memory dma_buffer = dma_alloc_coherent(NULL, 4096, &dma_handle, GFP_KERNEL); if (!dma_buffer) { pr_err("Failed to allocate DMA buffer\n"); vfree(very_large); kfree(small_buffer); kfree(large_buffer); return -ENOMEM; } pr_info("Allocated DMA buffer: %p (phys: %pad)\n", dma_buffer, &dma_handle); // 5. Page-based allocation pages = alloc_pages(GFP_KERNEL, 2); // 2^2 = 4 pages if (!pages) { pr_err("Failed to allocate pages\n"); dma_free_coherent(NULL, 4096, dma_buffer, dma_handle); vfree(very_large); kfree(small_buffer); kfree(large_buffer); return -ENOMEM; } vaddr = page_address(pages); pr_info("Allocated pages: %p\n", vaddr); return 0; } static void free_memory(void) { // Free in reverse order if (pages) { __free_pages(pages, 2); } if (dma_buffer) { dma_free_coherent(NULL, 4096, dma_buffer, dma_handle); } if (large_buffer) { kfree(large_buffer); } if (small_buffer) { kfree(small_buffer); } } // Memory copy example static void memory_copy_example(void) { char *src, *dst; size_t size = 1024; src = kmalloc(size, GFP_KERNEL); dst = kmalloc(size, GFP_KERNEL); if (src && dst) { // Kernel-to-kernel copy memcpy(dst, src, size); // User-kernel copy (in read/write operations) // copy_from_user(to, from, n); // copy_to_user(to, from, n); // Optimized copy for large data // __memcpy(dst, src, size); kfree(src); kfree(dst); } } // Memory mapping example (mmap support) static void *mmap_buffer; static size_t mmap_size = PAGE_SIZE; static int mmap_allocate(void) { // Allocate memory that will be mapped to userspace mmap_buffer = kmalloc(mmap_size, GFP_KERNEL); if (!mmap_buffer) { return -ENOMEM; } memset(mmap_buffer, 0, mmap_size); return 0; } static void mmap_free(void) { kfree(mmap_buffer); } static int chardev_mmap(struct file *filp, struct vm_area_struct *vma) { unsigned long size = vma->vm_end - vma->vm_start; if (size > mmap_size) { return -EINVAL; } // Set page attributes vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); // Map memory to userspace if (remap_pfn_range(vma, vma->vm_start, virt_to_phys(mmap_buffer) >> PAGE_SHIFT, size, vma->vm_page_prot)) { return -EAGAIN; } return 0; } ``` ### 4. Synchronization and Concurrency #### Synchronization Primitives ```c // synchronization.c - Kernel Synchronization #include #include #include #include #include #include #include #include #include // 1. Spinlock - for short critical sections, cannot sleep static DEFINE_SPINLOCK(my_spinlock); void spinlock_example(void) { unsigned long flags; // Spinlock variant: can be used in interrupt context spin_lock_irqsave(&my_spinlock, flags); // Critical section - cannot sleep, no blocking calls pr_info("In spinlock critical section\n"); spin_unlock_irqrestore(&my_spinlock, flags); // Regular spinlock (not in interrupt context) spin_lock(&my_spinlock); // Critical section spin_unlock(&my_spinlock); // Try lock - non-blocking if (spin_trylock(&my_spinlock)) { // Got the lock spin_unlock(&my_spinlock); } } // 2. Mutex - for longer critical sections, can sleep static DEFINE_MUTEX(my_mutex); void mutex_example(void) { // Lock mutex - can sleep mutex_lock(&my_mutex); // Critical section - can sleep, blocking calls allowed msleep(100); // OK to sleep mutex_unlock(&my_mutex); // Try lock - non-blocking if (mutex_trylock(&my_mutex)) { // Got the lock mutex_unlock(&my_mutex); } } // 3. Read-Write Semaphore static DECLARE_RWSEM(rwsem); void rwsem_example(void) { // Reader lock - multiple readers allowed down_read(&rwsem); // Read critical section up_read(&rwsem); // Writer lock - exclusive access down_write(&rwsem); // Write critical section up_write(&rwsem); // Try variants if (down_read_trylock(&rwsem)) { up_read(&rwsem); } if (down_write_trylock(&rwsem)) { up_write(&rwsem); } } // 4. Atomic Operations - lock-free synchronization static atomic_t counter = ATOMIC_INIT(0); void atomic_example(void) { int old, new; // Atomic increment atomic_inc(&counter); // Atomic decrement atomic_dec(&counter); // Atomic add atomic_add(5, &counter); // Atomic read old = atomic_read(&counter); // Atomic exchange new = atomic_xchg(&counter, 100); // Atomic compare and exchange old = atomic_read(&counter); atomic_cmpxchg(&counter, old, 200); // Atomic add and return old value old = atomic_fetch_add(&counter, 10); } // 5. Completion - one-to-one synchronization static struct completion my_completion; static int wait_thread(void *data) { pr_info("Worker: Waiting for signal\n"); wait_for_completion(&my_completion); pr_info("Worker: Received signal, proceeding\n"); return 0; } void completion_example(void) { init_completion(&my_completion); // Start thread that waits // kthread_run(wait_thread, NULL, "waiter"); msleep(1000); // Signal completion complete(&my_completion); } // 6. Wait Queue - producer-consumer pattern static DECLARE_WAIT_QUEUE_HEAD(my_wait_queue); static int data_ready = 0; void wait_queue_example(void) { // Consumer - wait for data wait_event_interruptible(my_wait_queue, data_ready != 0); pr_info("Consumer: Data is ready\n"); data_ready = 0; // Producer - signal data ready data_ready = 1; wake_up_interruptible(&my_wait_queue); } // 7. RCU (Read-Copy-Update) - read-mostly data structures #include struct rcu_data { int value; struct rcu_head rcu; }; static struct rcu_data *global_data; static void rcu_reclaim(struct rcu_head *rp) { struct rcu_data *data = container_of(rp, struct rcu_data, rcu); kfree(data); } void rcu_example(void) { struct rcu_data *new_data, *old_data; // Reader side - very fast, no locks rcu_read_lock(); if (global_data) { pr_info("RCU data: %d\n", global_data->value); } rcu_read_unlock(); // Writer side - update with grace period new_data = kmalloc(sizeof(*new_data), GFP_KERNEL); if (new_data) { new_data->value = 42; // Swap pointers atomically old_data = global_data; rcu_assign_pointer(global_data, new_data); // Wait for readers to finish call_rcu(&old_data->rcu, rcu_reclaim); } } // 8. Seqlock - for data with many readers and few writers static DEFINE_SEQLOCK(seqlock_data); static unsigned long seqlock_timestamp; void seqlock_example(void) { unsigned int seq; unsigned long timestamp; // Reader do { seq = read_seqbegin(&seqlock_data); timestamp = seqlock_timestamp; } while (read_seqretry(&seqlock_data, seq)); // Writer write_seqlock(&seqlock_data); seqlock_timestamp = jiffies; write_sequnlock(&seqlock_data); } // Per-CPU variables - avoid locking entirely #include static DEFINE_PER_CPU(unsigned long, per_cpu_counter); void percpu_example(void) { unsigned long *counter; unsigned long sum = 0; int cpu; // Access per-CPU variable preempt_disable(); // Prevent CPU migration counter = this_cpu_ptr(&per_cpu_counter); (*counter)++; preempt_enable(); // Sum all per-CPU counters for_each_possible_cpu(cpu) { counter = per_cpu_ptr(&per_cpu_counter, cpu); sum += *counter; } pr_info("Total count: %lu\n", sum); } ``` ### 5. Interrupt Handling #### Interrupt Handler Implementation ```c // interrupt_handler.c - Interrupt Handling #include #include #include #include #include #include #define IRQ_NUMBER 42 // Example IRQ number // Shared data between interrupt handler and process context static atomic_t irq_count = ATOMIC_INIT(0); static DECLARE_WAIT_QUEUE_HEAD(irq_wait_queue); // Top-half: Interrupt Service Routine (ISR) static irqreturn_t my_isr(int irq, void *dev_id) { atomic_inc(&irq_count); wake_up_interruptible(&irq_wait_queue); pr_info("IRQ %d handled\n", irq); // Return IRQ_HANDLED if we handled this interrupt // Return IRQ_NONE if we didn't handle it return IRQ_HANDLED; } // Alternative: Threaded interrupt handler static irqreturn_t my_isr_thread(int irq, void *dev_id) { pr_info("Threaded IRQ handler running\n"); msleep(100); // Can sleep in threaded handler return IRQ_HANDLED; } // Request interrupt static int request_interrupt(void) { int ret; // Request standard IRQ ret = request_irq(IRQ_NUMBER, my_isr, IRQF_TRIGGER_RISING | IRQF_SHARED, "my_interrupt", (void *)my_isr); if (ret) { pr_err("Failed to request IRQ %d\n", IRQ_NUMBER); return ret; } // Alternative: Request threaded IRQ // ret = request_threaded_irq(IRQ_NUMBER, // my_isr, // Top half // my_isr_thread, // Bottom half (thread) // IRQF_TRIGGER_RISING | IRQF_ONESHOT, // "my_interrupt", // (void *)my_isr); return 0; } // Free interrupt static void free_interrupt(void) { free_irq(IRQ_NUMBER, (void *)my_isr); } // Bottom half: Softirq (not commonly used directly) static void softirq_handler(struct softirq_action *h) { pr_info("Softirq handler\n"); } // Bottom half: Tasklet static void my_tasklet_handler(unsigned long data); static DECLARE_TASKLET(my_tasklet, my_tasklet_handler, 0); static void my_tasklet_handler(unsigned long data) { pr_info("Tasklet handler\n"); } // Schedule tasklet static void schedule_tasklet(void) { tasklet_schedule(&my_tasklet); } // Bottom half: Workqueue #include static struct workqueue_struct *my_workqueue; static void my_work_handler(struct work_struct *work); static DECLARE_WORK(my_work, my_work_handler); static DECLARE_DELAYED_WORK(my_delayed_work, my_work_handler); static void my_work_handler(struct work_struct *work) { pr_info("Workqueue handler\n"); // Can sleep here msleep(100); } // Initialize workqueue static int init_workqueue(void) { // Create dedicated workqueue my_workqueue = create_singlethread_workqueue("my_workqueue"); if (!my_workqueue) { return -ENOMEM; } // Queue work on system workqueue schedule_work(&my_work); // Queue delayed work schedule_delayed_work(&my_delayed_work, msecs_to_jiffies(1000)); // Queue work on dedicated workqueue // queue_work(my_workqueue, &my_work); return 0; } // Cleanup workqueue static void cleanup_workqueue(void) { // Wait for work to complete cancel_work_sync(&my_work); cancel_delayed_work_sync(&my_delayed_work); // Destroy workqueue if (my_workqueue) { destroy_workqueue(my_workqueue); } } // GPIO Interrupt example #define GPIO_PIN 17 static irqreturn_t gpio_isr(int irq, void *data) { int state = gpio_get_value(GPIO_PIN); pr_info("GPIO %d changed to %d\n", GPIO_PIN, state); return IRQ_HANDLED; } static int setup_gpio_interrupt(void) { int ret, irq; // Request GPIO ret = gpio_request(GPIO_PIN, "my_gpio"); if (ret) { pr_err("Failed to request GPIO %d\n", GPIO_PIN); return ret; } // Set direction gpio_direction_input(GPIO_PIN); // Get IRQ number for GPIO irq = gpio_to_irq(GPIO_PIN); // Request IRQ ret = request_irq(irq, gpio_isr, IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING, "gpio_irq", NULL); if (ret) { gpio_free(GPIO_PIN); return ret; } return 0; } static void cleanup_gpio_interrupt(void) { int irq = gpio_to_irq(GPIO_PIN); free_irq(irq, NULL); gpio_free(GPIO_PIN); } ``` ### 6. Kernel Debugging #### Ftrace Usage ```bash #!/bin/bash # Kernel Tracing with Ftrace # Enable ftrace echo 1 > /proc/sys/kernel/ftrace_enabled # Available tracers cat /sys/kernel/debug/tracing/available_tracers # Use function tracer echo function > /sys/kernel/debug/tracing/current_tracer # Filter functions echo '*sched*' > /sys/kernel/debug/tracing/set_ftrace_filter # View trace cat /sys/kernel/debug/tracing/trace # Clear trace echo > /sys/kernel/debug/tracing/trace # Function graph tracer echo function_graph > /sys/kernel/debug/tracing/current_tracer # Set graph depth echo 3 > /sys/kernel/debug/tracing/max_graph_depth # View graph trace cat /sys/kernel/debug/tracing/trace_graph # Trace specific function echo do_sys_open > /sys/kernel/debug/tracing/set_ftrace_pid ``` #### Kernel Code with Tracepoints ```c // tracing_example.c - Kernel Tracing #include #include #include // Tracepoint example #define CREATE_TRACE_POINTS #include // Trace events static void trace_events_example(void) { int value = 42; // Custom trace event trace_sample_event(value, "example data"); pr_info("Generated trace event\n"); } // Using trace_printk() (for debugging only) static void trace_printk_example(void) { // Only works when tracing is enabled trace_printk("Debug message: value=%d\n", 42); } // Function tracing static noinline void traced_function(void) { pr_info("This function will be traced\n"); } // Performance counters static void perf_counter_example(void) { // Use perf events to count occurrences trace_printk("Performance marker\n"); } ``` #### eBPF Programs ```c // eBPF program example (requires bpf() syscall) // This would be loaded via bpftool or libbpf // Example: Socket filter eBPF program #include #include // BPF program for packet counting SEC("socket") int bpf_prog1(struct __sk_buff *skb) { void *data_end = (void *)(long)skb->data_end; void *data = (void *)(long)skb->data; __u32 nh_off; nh_off = sizeof(struct ethhdr); if (data + nh_off > data_end) return 0; // Count packets __u64 *counter = bpf_map_lookup_elem(&my_map, &key); if (counter) __sync_fetch_and_add(counter, 1); return 0; } // Maps definition struct bpf_map_def SEC("maps") my_map = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(__u32), .value_size = sizeof(__u64), .max_entries = 256, }; char _license[] SEC("license") = "GPL"; ``` ### 7. Kernel Security #### Linux Security Module (LSM) Hook ```c // lsm_example.c - Simple LSM Hook #include #include #include #include // Security hook for file execution static int my_security_bprm_check(struct linux_binprm *bprm) { const char *filename = bprm->filename; pr_info("LSM: Executing %s\n", filename); // Security check logic here // Return 0 to allow, negative error to deny return 0; } // Security operations list static struct security_hook_list my_hooks[] = { LSM_HOOK_INIT(bprm_check, my_security_bprm_check), }; // Initialize LSM static int __init my_lsm_init(void) { pr_info("LSM: Initializing\n"); security_add_hooks(my_hooks, ARRAY_SIZE(my_hooks), "my_lsm"); return 0; } // Cleanup LSM static void __exit my_lsm_exit(void) { pr_info("LSM: Exiting\n"); } module_init(my_lsm_init); module_exit(my_lsm_exit); MODULE_LICENSE("GPL"); ``` ### 8. Network Programming #### Kernel Socket Programming ```c // kernel_socket.c - Kernel Network Programming #include #include #include #include #include #include #include #include // Create socket in kernel static int create_kernel_socket(void) { struct socket *sock; struct sockaddr_in addr; int ret; // Create TCP socket ret = sock_create_kern(&init_net, AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock); if (ret < 0) { pr_err("Failed to create socket: %d\n", ret); return ret; } // Set address memset(&addr, 0, sizeof(addr)); addr.sin_family = AF_INET; addr.sin_port = htons(8080); addr.sin_addr.s_addr = htonl(INADDR_ANY); // Bind socket ret = kernel_bind(sock, (struct sockaddr *)&addr, sizeof(addr)); if (ret < 0) { pr_err("Failed to bind socket: %d\n", ret); sock_release(sock); return ret; } // Listen ret = kernel_listen(sock, 10); if (ret < 0) { pr_err("Failed to listen: %d\n", ret); sock_release(sock); return ret; } pr_info("Socket created and bound\n"); return 0; } // Send data from kernel static int send_kernel_data(struct socket *sock, const char *data, size_t len) { struct msghdr msg; struct kvec iov; int ret; memset(&msg, 0, sizeof(msg)); iov.iov_base = (void *)data; iov.iov_len = len; ret = kernel_sendmsg(sock, &msg, &iov, 1, len); if (ret < 0) { pr_err("Failed to send data: %d\n", ret); return ret; } return ret; } // Receive data in kernel static int recv_kernel_data(struct socket *sock, char *data, size_t len) { struct msghdr msg; struct kvec iov; int ret; memset(&msg, 0, sizeof(msg)); iov.iov_base = data; iov.iov_len = len; ret = kernel_recvmsg(sock, &msg, &iov, 1, len, 0); if (ret < 0) { pr_err("Failed to receive data: %d\n", ret); return ret; } return ret; } // Netfilter hook example #include #include #include static unsigned int nf_hook_func(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct iphdr *iph; if (!skb) return NF_ACCEPT; iph = ip_hdr(skb); pr_info("Packet: protocol=%d, saddr=%pI4, daddr=%pI4\n", iph->protocol, &iph->saddr, &iph->daddr); return NF_ACCEPT; } static struct nf_hook_ops nf_hook_ops = { .hook = nf_hook_func, .pf = NFPROTO_IPV4, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP_PRI_FIRST, }; static int __init netfilter_init(void) { return nf_register_net_hook(&init_net, &nf_hook_ops); } static void __exit netfilter_exit(void) { nf_unregister_net_hook(&init_net, &nf_hook_ops); } ``` ### 9. Decision Trees #### Synchronization Primitive Selection ``` Critical section characteristics? │ ├─ Very short (< microsecond) → Spinlock ├─ Can sleep, exclusive access → Mutex ├─ Many readers, few writers → RCU or Seqlock ├─ Readers and writers → RW Semaphore ├─ Simple signal → Completion └─ Producer-consumer → Wait queue ``` #### Memory Allocation Strategy ``` Allocation size and context? │ ├─ < 128 bytes → kmalloc with appropriate GFP flags ├─ 128 bytes - 128 KB → kmalloc ├─ > 128 KB → vmalloc (or alloc_pages if contiguous) ├─ DMA required → dma_alloc_coherent ├─ Highmem → __get_free_pages with GFP_HIGHUSER └─ Interrupt context → GFP_ATOMIC (no sleep) ``` ### 10. Anti-Patterns to Avoid 1. **Sleeping in atomic context**: Never use sleeping functions while holding spinlock 2. **Race conditions**: Always use proper synchronization 3. **Memory leaks**: Track and free all allocations 4. **Use after free**: Be careful with RCU and freeing 5. **Integer overflow**: Check arithmetic operations 6. **Buffer overflows**: Validate all user input 7. **Deadlocks**: Acquire locks in consistent order 8. **Priority inversion**: Use proper priority inheritance 9. **Ignoring return values**: Always check error codes 10. **Missing module_put**: Match get/put operations ### 11. Quality Checklist Before considering kernel code production-ready: - [ ] All error paths properly handled - [ ] Memory allocations checked for failure - [ ] Synchronization primitives correctly used - [ ] No sleeping in atomic context - [ ] No use of deprecated APIs - [ ] Module metadata complete - [ ] Coding style follows kernel standards - [ ] Sparse checking passes - [ ] Tested with lockdep - [ ] Memory leak testing performed - [ ] Performance testing completed - [ ] Security review conducted - [ ] Documentation complete - [ ] Backward compatibility considered - [ ] API stability maintained - [ ] Tested on multiple architectures - [ ] Kernel version compatibility verified - [ ] Static analysis performed - [ ] Stress testing completed - [ ] Integration testing done This comprehensive skill definition provides complete guidance for kernel engineering across Linux environments.