Short-term Memory
Holds recent conversation history and immediate context
Effective memory management is crucial for AI agents to maintain context across interactions and execute complex multi-stage tasks. At VrealSoft, we’ve developed sophisticated memory architectures to address these challenges.
Short-term Memory
Holds recent conversation history and immediate context
Working Memory
Stores task-specific information needed for current execution
Long-term Memory
Archives persistent knowledge and past interactions
# High-level memory architectureclass AgentMemorySystem: def __init__(self): # Short-term conversation buffer self.conversation_buffer = ConversationBuffer(max_messages=20)
# Working memory for task execution self.working_memory = WorkingMemory()
# Long-term storage self.semantic_memory = VectorStore() # For conceptual knowledge self.episodic_memory = EpisodicStore() # For past interactions self.procedural_memory = ToolRepository() # For action knowledge
def update(self, interaction): # Update conversation buffer self.conversation_buffer.add(interaction)
# Extract and store important information entities = extract_entities(interaction) self.working_memory.update_entities(entities)
# Archive to long-term memory if significant if is_significant(interaction): self.episodic_memory.store(interaction)
# Extract knowledge for semantic memory knowledge = extract_knowledge(interaction) self.semantic_memory.store(knowledge)
def retrieve_context(self, query, task): context = { "conversation": self.conversation_buffer.get_recent(), "working_memory": self.working_memory.get_relevant(task), "semantic_knowledge": self.semantic_memory.query(query), "relevant_episodes": self.episodic_memory.find_similar(query, task), "relevant_tools": self.procedural_memory.suggest_tools(task) }
return prioritize_and_format(context)Our approach to session persistence includes:
class ConversationBuffer: def __init__(self, max_messages=20, max_tokens=4000): self.messages = [] self.summary = "" self.max_messages = max_messages self.max_tokens = max_tokens
def add(self, message): self.messages.append(message)
# If we exceed our message limit, summarize older messages if len(self.messages) > self.max_messages: # Summarize the oldest messages to_summarize = self.messages[:len(self.messages) - self.max_messages + 1] new_summary = summarize_messages(to_summarize, self.summary)
# Update our storage self.summary = new_summary self.messages = self.messages[len(self.messages) - self.max_messages + 1:]
def get_recent(self, num_messages=None): if num_messages is None or num_messages >= len(self.messages): return {"summary": self.summary, "messages": self.messages} else: return {"summary": self.summary, "messages": self.messages[-num_messages:]}class EntityTracker: def __init__(self): self.entities = {} # Maps entity IDs to their current state self.mentions = {} # Maps entity IDs to most recent mention position self.relationships = {} # Maps entity pairs to their relationships
def update_entity(self, entity_id, properties): if entity_id not in self.entities: self.entities[entity_id] = {}
# Update properties, preserving existing ones self.entities[entity_id].update(properties)
# Update mention position self.mentions[entity_id] = get_current_position()
def add_relationship(self, entity1_id, entity2_id, relationship): key = (entity1_id, entity2_id) self.relationships[key] = relationship
def get_entity_context(self, query): # Find relevant entities relevant_entities = find_relevant_entities(query, self.entities)
# Sort by recency of mention relevant_entities.sort(key=lambda e: self.mentions.get(e, 0), reverse=True)
# Return formatted context return format_entity_context(relevant_entities, self.entities, self.relationships)One of the key challenges in agent memory is determining what information to keep accessible versus what to archive or discard.
Recency Weighting
Prioritizing recently discussed information
Relevance Scoring
Keeping information most related to current goals
Importance Detection
Identifying critical facts regardless of recency
User-flagged Content
Explicitly marked important information
# Memory prioritization systemdef prioritize_context(items, query, task_state, max_items=10): scored_items = []
for item in items: # Calculate different factors recency_score = calculate_recency(item.timestamp) relevance_score = calculate_relevance(item.content, query) importance_score = calculate_importance(item.content) user_attention_score = calculate_user_attention(item)
# Combine scores with learned weights combined_score = ( 0.2 * recency_score + 0.4 * relevance_score + 0.3 * importance_score + 0.1 * user_attention_score )
scored_items.append((item, combined_score))
# Sort by score and return top items scored_items.sort(key=lambda x: x[1], reverse=True) return [item for item, score in scored_items[:max_items]]Our knowledge integration system handles:
def fuse_knowledge(query, memories): # Extract different types of memories conversation = memories.get("conversation", []) entity_states = memories.get("entities", {}) semantic_facts = memories.get("semantic", []) episodic_memories = memories.get("episodic", [])
# Resolve conflicts resolved_facts = resolve_conflicts(semantic_facts, entity_states)
# Organize by topic topics = organize_by_topic(resolved_facts, conversation, episodic_memories)
# Create integrated knowledge representation integrated_knowledge = [] for topic in topics: # Combine information about this topic topic_info = { "topic": topic.name, "facts": topic.facts, "relevant_history": topic.episodes, "current_state": topic.current_state, "confidence": topic.confidence } integrated_knowledge.append(topic_info)
# Sort by relevance to query integrated_knowledge.sort( key=lambda k: calculate_relevance(k, query), reverse=True )
return integrated_knowledgeclass MemoryManager: def __init__(self, memory_system): self.memory = memory_system
def process_new_information(self, info, source): # Immediate storage in short-term memory self.memory.short_term.add(info)
# Check if information should be in working memory if is_task_relevant(info): self.memory.working.add(extract_task_info(info))
# Check if information should be stored long-term if should_store_long_term(info): # Determine appropriate long-term storage if is_episodic(info): self.memory.episodic.add(format_episodic(info))
if contains_facts(info): facts = extract_facts(info) self.memory.semantic.add(facts)
if contains_procedural(info): procedures = extract_procedures(info) self.memory.procedural.add(procedures)
def consolidate_memories(self): # Identify patterns in short-term memory to consolidate patterns = identify_patterns(self.memory.short_term.get_all())
# Create consolidated representations for pattern in patterns: consolidated = create_consolidated_memory(pattern)
# Store in appropriate long-term memory self.store_consolidated(consolidated)
# Clear consolidated short-term memories self.memory.short_term.clear_consolidated(patterns)Effective memory systems must efficiently retrieve relevant information when needed:
# Example of multi-query retrievaldef retrieve_multi_perspective(base_query, memory_system): # Generate different perspective queries queries = generate_perspective_queries(base_query)
# Retrieve from each perspective results = {} for perspective, query in queries.items(): results[perspective] = memory_system.retrieve(query)
# Combine and deduplicate combined = combine_retrieval_results(results)
# Rerank by relevance to original query reranked = rerank_by_relevance(combined, base_query)
return rerankedWe evaluate our memory systems on several dimensions:
Retention Accuracy
Correctly remembering past information
Retrieval Relevance
Finding the most useful information for the current context
Consistency
Maintaining coherent knowledge without contradictions
Temporal Awareness
Understanding when events occurred relative to each other
We continue to explore: