#!/usr/bin/env python3 """ OS-Copilot Self-Learner - Learning from Completed Tasks Implements pattern detection, knowledge extraction, and self-improvement """ import json import sys import os import re from pathlib import Path from datetime import datetime from typing import Any, Dict, List, Optional from dataclasses import dataclass, field from collections import Counter, defaultdict import hashlib @dataclass class TaskExecution: """Record of a task execution""" id: str timestamp: str request: str agent: str commands: List[str] files_modified: List[str] success: bool duration: float tokens_used: int = 0 user_feedback: str = "" patterns: List[str] = field(default_factory=list) @dataclass class LearnedPattern: """A learned pattern from task execution""" pattern: str context: str frequency: int last_seen: str success_rate: float examples: List[str] = field(default_factory=list) @dataclass class Improvement: """A suggested improvement""" type: str # "workflow", "code", "prompt" suggestion: str rationale: str priority: int applied: bool = False class SelfLearner: """Self-learning system for continuous improvement""" def __init__(self, data_path: str = None): self.data_path = data_path or Path.home() / ".claude" / "self-learner" self.data_path.mkdir(parents=True, exist_ok=True) self.executions: List[TaskExecution] = [] self.patterns: Dict[str, LearnedPattern] = {} self.improvements: List[Improvement] = [] self._load_data() def _load_data(self): """Load learning data""" # Load executions exec_file = self.data_path / "executions.jsonl" if exec_file.exists(): with open(exec_file) as f: for line in f: if line.strip(): data = json.loads(line) self.executions.append(TaskExecution(**data)) # Load patterns pattern_file = self.data_path / "patterns.json" if pattern_file.exists(): with open(pattern_file) as f: data = json.load(f) for pattern, pattern_data in data.items(): self.patterns[pattern] = LearnedPattern(**pattern_data) # Load improvements improvement_file = self.data_path / "improvements.json" if improvement_file.exists(): with open(improvement) as f: data = json.load(f) self.improvements = [Improvement(**imp) for imp in data] def _save_data(self): """Save learning data""" # Save executions exec_file = self.data_path / "executions.jsonl" with open(exec_file, 'w') as f: for exec in self.executions[-1000]: # Keep last 1000 f.write(json.dumps(exec.__dict__) + "\n") # Save patterns pattern_file = self.data_path / "patterns.json" with open(pattern_file, 'w') as f: patterns_data = {p: pat.__dict__ for p, pat in self.patterns.items()} json.dump(patterns_data, f, indent=2) # Save improvements improvement_file = self.data_path / "improvements.json" with open(improvement_file, 'w') as f: json.dump([imp.__dict__ for imp in self.improvements], f, indent=2) def record_execution(self, request: str, agent: str, commands: List[str], files_modified: List[str], success: bool, duration: float, tokens_used: int = 0) -> str: """Record a task execution""" execution = TaskExecution( id=hashlib.md5(f"{request}{datetime.now().isoformat()}".encode()).hexdigest()[:12], timestamp=datetime.now().isoformat(), request=request, agent=agent, commands=commands, files_modified=files_modified, success=success, duration=duration, tokens_used=tokens_used ) self.executions.append(execution) # Extract patterns from this execution self._extract_patterns(execution) # Generate improvements self._generate_improvements() self._save_data() return execution.id def _extract_patterns(self, execution: TaskExecution): """Extract patterns from an execution""" # Pattern 1: Common command sequences if len(execution.commands) >= 2: for i in range(len(execution.commands) - 1): seq = f"{execution.commands[i]} → {execution.commands[i+1]}" self._update_pattern(seq, "command_sequence", execution) # Pattern 2: File operation patterns for file_path in execution.files_modified: ext = Path(file_path).suffix self._update_pattern(f"modify_{ext}_file", "file_operation", execution) # Pattern 3: Request type patterns request_lower = execution.request.lower() if any(word in request_lower for word in ["fix", "bug", "error"]): self._update_pattern("debugging_task", "request_type", execution) elif any(word in request_lower for word in ["add", "create", "implement"]): self._update_pattern("feature_implementation", "request_type", execution) elif any(word in request_lower for word in ["refactor", "clean", "improve"]): self._update_pattern("refactoring_task", "request_type", execution) def _update_pattern(self, pattern: str, context: str, execution: TaskExecution): """Update a pattern with new execution data""" if pattern not in self.patterns: self.patterns[pattern] = LearnedPattern( pattern=pattern, context=context, frequency=0, last_seen="", success_rate=0.0 ) pat = self.patterns[pattern] pat.frequency += 1 pat.last_seen = execution.timestamp # Update success rate relevant_execs = [e for e in self.executions if pattern in str(e.patterns)] if relevant_execs: pat.success_rate = sum(1 for e in relevant_execs if e.success) / len(relevant_execs) # Add example if successful if execution.success and len(pat.examples) < 5: pat.examples.append(execution.request[:100]) def _generate_improvements(self): """Generate improvement suggestions""" # Check for repeated failures failed_patterns = {p: pat for p, pat in self.patterns.items() if pat.success_rate < 0.5 and pat.frequency >= 3} for pattern, pat in failed_patterns.items(): existing = any(imp.suggestion == pattern for imp in self.improvements) if not existing: self.improvements.append(Improvement( type="workflow", suggestion=f"Review pattern: {pattern}", rationale=f"Success rate only {pat.success_rate*100:.0f}% over {pat.frequency} attempts", priority=3 )) # Check for frequently used commands that could be optimized command_counts = Counter() for exec in self.executions[-100:]: for cmd in exec.commands: command_counts[cmd] += 1 for cmd, count in command_counts.most_common(5): if count >= 10: existing = any(f"Create shortcut for {cmd}" in imp.suggestion for imp in self.improvements) if not existing: self.improvements.append(Improvement( type="workflow", suggestion=f"Create shortcut for: {cmd}", rationale=f"Used {count} times recently", priority=2 )) def get_patterns(self, context: str = None) -> List[LearnedPattern]: """Get learned patterns, optionally filtered by context""" patterns = list(self.patterns.values()) if context: patterns = [p for p in patterns if p.context == context] return sorted(patterns, key=lambda p: p.frequency, reverse=True) def get_improvements(self, priority: int = None) -> List[Improvement]: """Get pending improvements""" improvements = [imp for imp in self.improvements if not imp.applied] if priority is not None: improvements = [imp for imp in improvements if imp.priority >= priority] return sorted(improvements, key=lambda imp: imp.priority, reverse=True) def apply_improvement(self, improvement_id: int): """Mark an improvement as applied""" if 0 <= improvement_id < len(self.improvements): self.improvements[improvement_id].applied = True self._save_data() def get_statistics(self) -> Dict[str, Any]: """Get learning statistics""" total = len(self.executions) successful = sum(1 for e in self.executions if e.success) return { "total_executions": total, "successful_executions": successful, "success_rate": successful / total if total > 0 else 0, "total_patterns": len(self.patterns), "pending_improvements": sum(1 for imp in self.improvements if not imp.applied), "unique_agents": len(set(e.agent for e in self.executions)), "average_duration": sum(e.duration for e in self.executions) / total if total > 0 else 0, "total_tokens": sum(e.tokens_used for e in self.executions) } def learn_from_git(self) -> int: """Learn from recent git history""" count = 0 try: # Get recent commits result = subprocess.run( ["git", "log", "--oneline", "-20", "--pretty=%H %s"], capture_output=True, text=True, timeout=10 ) if result.returncode == 0: for line in result.stdout.strip().split('\n'): if line: parts = line.split(' ', 1) if len(parts) == 2: commit_hash, message = parts self.record_execution( request=message, agent="git", commands=[f"git commit -m '{message}'"], files_modified=[], success=True, duration=0 ) count += 1 except: pass return count def main(): """Main entry point""" learner = SelfLearner() if len(sys.argv) < 2: stats = learner.get_statistics() print("OS-Copilot Self-Learner") print("=" * 40) print(f"Total Executions: {stats['total_executions']}") print(f"Success Rate: {stats['success_rate']*100:.1f}%") print(f"Patterns Learned: {stats['total_patterns']}") print(f"Pending Improvements: {stats['pending_improvements']}") print() return command = sys.argv[1] if command == "record": if len(sys.argv) < 5: print("Usage: self-learner.py record ") return learner.record_execution( request=sys.argv[2], agent=sys.argv[3], commands=[], files_modified=[], success=sys.argv[4].lower() == "success", duration=0 ) print("✓ Execution recorded") elif command == "patterns": context = sys.argv[2] if len(sys.argv) > 2 else None patterns = learner.get_patterns(context) print("Learned Patterns:") for pat in patterns[:10]: print(f" • {pat.pattern} ({pat.frequency}x, {pat.success_rate*100:.0f}% success)") elif command == "improvements": improvements = learner.get_improvements() print("Pending Improvements:") for i, imp in enumerate(improvements): print(f" [{i}] {imp.suggestion}") print(f" Priority: {imp.priority}/5 - {imp.rationale}") elif command == "learn-git": count = learner.learn_from_git() print(f"✓ Learned from {count} git commits") elif command == "stats": print(json.dumps(learner.get_statistics(), indent=2)) else: print(f"Unknown command: {command}") if __name__ == "__main__": import subprocess main()