CockroachDB Performance Testing Rule
This rule provides a comprehensive approach to performance testing CockroachDB by populating tables with large datasets to simulate real-world scenarios with millions of users. It includes data generation strategies, performance monitoring techniques, and bottleneck identification methods.
# CockroachDB Performance Testing Rule
## Overview
This rule provides a comprehensive approach to performance testing CockroachDB by populating tables with large datasets to simulate real-world scenarios with millions of users. It includes data generation strategies, performance monitoring techniques, and bottleneck identification methods.
## Implementation
### 1. Create Performance Test Schema
```sql
-- Users table for 1M+ users
CREATE TABLE users (
    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    email STRING UNIQUE NOT NULL,
    username STRING UNIQUE NOT NULL,
    first_name STRING NOT NULL,
    last_name STRING NOT NULL,
    created_at TIMESTAMP DEFAULT now(),
    updated_at TIMESTAMP DEFAULT now(),
    is_active BOOLEAN DEFAULT true,
    metadata JSONB
);
-- User sessions table (high volume)
CREATE TABLE user_sessions (
    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    user_id UUID NOT NULL REFERENCES users(id),
    session_token STRING NOT NULL,
    ip_address INET,
    user_agent TEXT,
    created_at TIMESTAMP DEFAULT now(),
    expires_at TIMESTAMP NOT NULL,
    is_active BOOLEAN DEFAULT true
);
-- Orders table (transaction-heavy)
CREATE TABLE orders (
    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    user_id UUID NOT NULL REFERENCES users(id),
    order_number STRING UNIQUE NOT NULL,
    total_amount DECIMAL(10,2) NOT NULL,
    status STRING NOT NULL DEFAULT 'pending',
    created_at TIMESTAMP DEFAULT now(),
    updated_at TIMESTAMP DEFAULT now()
);
-- Order items (high cardinality)
CREATE TABLE order_items (
    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    order_id UUID NOT NULL REFERENCES orders(id),
    product_id UUID NOT NULL,
    quantity INT NOT NULL,
    price DECIMAL(10,2) NOT NULL,
    created_at TIMESTAMP DEFAULT now()
);
-- Activity logs (massive volume)
CREATE TABLE activity_logs (
    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    user_id UUID REFERENCES users(id),
    action STRING NOT NULL,
    resource_type STRING,
    resource_id UUID,
    ip_address INET,
    created_at TIMESTAMP DEFAULT now(),
    metadata JSONB
);
```
### 2. Create Performance Indexes
```sql
-- Critical indexes for performance
CREATE INDEX idx_users_email ON users(email);
CREATE INDEX idx_users_created_at ON users(created_at);
CREATE INDEX idx_user_sessions_user_id ON user_sessions(user_id);
CREATE INDEX idx_user_sessions_expires_at ON user_sessions(expires_at);
CREATE INDEX idx_orders_user_id ON orders(user_id);
CREATE INDEX idx_orders_created_at ON orders(created_at);
CREATE INDEX idx_order_items_order_id ON order_items(order_id);
CREATE INDEX idx_activity_logs_user_id ON activity_logs(user_id);
CREATE INDEX idx_activity_logs_created_at ON activity_logs(created_at);
```
### 3. Data Generation Scripts
```sql
-- Function to generate realistic test data
CREATE OR REPLACE FUNCTION generate_performance_test_data(num_users INT)
RETURNS VOID AS $$
DECLARE
    i INT := 1;
    user_uuid UUID;
    order_uuid UUID;
    batch_size INT := 1000;
BEGIN
    -- Generate users in batches
    WHILE i <= num_users LOOP
        INSERT INTO users (email, username, first_name, last_name, created_at, metadata)
        SELECT 
            'user' || generate_series(i, LEAST(i + batch_size - 1, num_users)) || '@example.com',
            'username' || generate_series(i, LEAST(i + batch_size - 1, num_users)),
            'FirstName' || generate_series(i, LEAST(i + batch_size - 1, num_users)),
            'LastName' || generate_series(i, LEAST(i + batch_size - 1, num_users)),
            now() - (random() * interval '365 days'),
            ('{"signup_source": "web", "preferences": {"notifications": ' || (random() > 0.5)::text || '}}')::jsonb;
        
        i := i + batch_size;
        
        -- Commit every batch to avoid long transactions
        IF i % (batch_size * 10) = 0 THEN
            RAISE NOTICE 'Generated % users', i;
        END IF;
    END LOOP;
END;
$$ LANGUAGE plpgsql;
-- Generate sessions (multiple per user)
CREATE OR REPLACE FUNCTION generate_user_sessions(session_multiplier INT DEFAULT 5)
RETURNS VOID AS $$
DECLARE
    user_record RECORD;
    j INT;
BEGIN
    FOR user_record IN SELECT id FROM users LOOP
        FOR j IN 1..session_multiplier LOOP
            INSERT INTO user_sessions (user_id, session_token, ip_address, user_agent, expires_at)
            VALUES (
                user_record.id,
                md5(random()::text || clock_timestamp()::text),
                ('192.168.' || floor(random() * 255) || '.' || floor(random() * 255))::inet,
                'Mozilla/5.0 (compatible; TestAgent/' || j || ')',
                now() + interval '24 hours'
            );
        END LOOP;
    END LOOP;
END;
$$ LANGUAGE plpgsql;
-- Generate orders with items
CREATE OR REPLACE FUNCTION generate_orders_with_items(orders_per_user INT DEFAULT 3)
RETURNS VOID AS $$
DECLARE
    user_record RECORD;
    order_uuid UUID;
    i INT;
    j INT;
BEGIN
    FOR user_record IN SELECT id FROM users LOOP
        FOR i IN 1..orders_per_user LOOP
            INSERT INTO orders (user_id, order_number, total_amount, status, created_at)
            VALUES (
                user_record.id,
                'ORD-' || user_record.id || '-' || i,
                (random() * 500 + 10)::decimal(10,2),
                CASE 
                    WHEN random() > 0.8 THEN 'cancelled'
                    WHEN random() > 0.6 THEN 'shipped'
                    WHEN random() > 0.4 THEN 'processing'
                    ELSE 'delivered'
                END,
                now() - (random() * interval '90 days')
            ) RETURNING id INTO order_uuid;
            
            -- Add 1-5 items per order
            FOR j IN 1..(1 + floor(random() * 5))::int LOOP
                INSERT INTO order_items (order_id, product_id, quantity, price)
                VALUES (
                    order_uuid,
                    gen_random_uuid(),
                    (1 + floor(random() * 10))::int,
                    (random() * 100 + 5)::decimal(10,2)
                );
            END LOOP;
        END LOOP;
    END LOOP;
END;
$$ LANGUAGE plpgsql;
-- Generate activity logs (high volume)
CREATE OR REPLACE FUNCTION generate_activity_logs(logs_per_user INT DEFAULT 50)
RETURNS VOID AS $$
DECLARE
    user_record RECORD;
    i INT;
    actions TEXT[] := ARRAY['login', 'logout', 'view_product', 'add_to_cart', 'purchase', 'update_profile', 'search'];
BEGIN
    FOR user_record IN SELECT id FROM users LOOP
        FOR i IN 1..logs_per_user LOOP
            INSERT INTO activity_logs (user_id, action, resource_type, resource_id, ip_address, created_at, metadata)
            VALUES (
                user_record.id,
                actions[1 + floor(random() * array_length(actions, 1))],
                CASE 
                    WHEN random() > 0.5 THEN 'product'
                    WHEN random() > 0.3 THEN 'order'
                    ELSE 'user'
                END,
                gen_random_uuid(),
                ('10.' || floor(random() * 255) || '.' || floor(random() * 255) || '.' || floor(random() * 255))::inet,
                now() - (random() * interval '30 days'),
                ('{"page": "/app/page' || floor(random() * 100) || '", "duration": ' || floor(random() * 10000) || '}')::jsonb
            );
        END LOOP;
    END LOOP;
END;
$$ LANGUAGE plpgsql;
```
### 4. Execute Performance Test Data Generation
```sql
-- Generate 1 million users
SELECT generate_performance_test_data(1000000);
-- Generate sessions (5 million sessions)
SELECT generate_user_sessions(5);
-- Generate orders with items (3 million orders, ~9 million items)
SELECT generate_orders_with_items(3);
-- Generate activity logs (50 million logs)
SELECT generate_activity_logs(50);
```
### 5. Performance Monitoring Queries
```sql
-- Monitor table sizes and row counts
SELECT 
    schemaname,
    tablename,
    pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) as size,
    pg_stat_get_tuples_inserted(c.oid) as inserts,
    pg_stat_get_tuples_updated(c.oid) as updates,
    pg_stat_get_tuples_deleted(c.oid) as deletes
FROM pg_tables t
JOIN pg_class c ON c.relname = t.tablename
WHERE schemaname = 'public'
ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC;
-- Monitor query performance
SELECT 
    query,
    calls,
    total_time,
    rows,
    100.0 * shared_blks_hit / nullif(shared_blks_hit + shared_blks_read, 0) AS hit_percent
FROM pg_stat_statements
WHERE query LIKE '%users%' OR query LIKE '%orders%' OR query LIKE '%activity_logs%'
ORDER BY total_time DESC
LIMIT 10;
-- Check slow queries
SELECT 
    pid,
    now() - pg_stat_activity.query_start AS duration,
    query,
    state
FROM pg_stat_activity
WHERE (now() - pg_stat_activity.query_start) > interval '5 minutes'
ORDER BY duration DESC;
```
### 6. Performance Test Scenarios
```sql
-- Test 1: User lookup performance
EXPLAIN ANALYZE SELECT * FROM users WHERE email = 'user500000@example.com';
-- Test 2: Recent orders query
EXPLAIN ANALYZE 
SELECT u.username, o.order_number, o.total_amount, o.created_at
FROM users u
JOIN orders o ON u.id = o.user_id
WHERE o.created_at > now() - interval '7 days'
ORDER BY o.created_at DESC
LIMIT 100;
-- Test 3: Activity logs aggregation
EXPLAIN ANALYZE
SELECT 
    action,
    DATE_TRUNC('day', created_at) as day,
    COUNT(*) as action_count
FROM activity_logs
WHERE created_at > now() - interval '30 days'
GROUP BY action, DATE_TRUNC('day', created_at)
ORDER BY day DESC, action_count DESC;
-- Test 4: Complex join performance
EXPLAIN ANALYZE
SELECT 
    u.username,
    COUNT(DISTINCT o.id) as total_orders,
    SUM(o.total_amount) as total_spent,
    COUNT(DISTINCT s.id) as active_sessions
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
LEFT JOIN user_sessions s ON u.id = s.user_id AND s.is_active = true
WHERE u.created_at > now() - interval '90 days'
GROUP BY u.id, u.username
HAVING COUNT(DISTINCT o.id) > 2
ORDER BY total_spent DESC
LIMIT 50;
```
## Best Practices
### Data Generation
- **Batch Processing**: Generate data in batches of 1,000-10,000 records to avoid memory issues
- **Realistic Distributions**: Use random functions to simulate real-world data patterns
- **Incremental Loading**: Load data incrementally and monitor system resources
- **Transaction Management**: Commit frequently during bulk operations
### Performance Monitoring
- **Enable Query Statistics**: Use `pg_stat_statements` extension for query analysis
- **Monitor Resource Usage**: Track CPU, memory, and disk I/O during tests
- **Index Usage**: Monitor index effectiveness with `pg_stat_user_indexes`
- **Connection Pooling**: Use connection pooling for high-concurrency tests
### Test Scenarios
- **Gradual Load Increase**: Start with smaller datasets and gradually increase
- **Mixed Workloads**: Test read-heavy, write-heavy, and mixed scenarios
- **Concurrent Users**: Simulate multiple concurrent connections
- **Peak Load Testing**: Test system behavior during peak usage periods
## Common Issues
### Performance Bottlenecks
```sql
-- Issue: Slow bulk inserts
-- Solution: Use batch inserts and disable autocommit
BEGIN;
INSERT INTO users (...) VALUES (...), (...), (...); -- Batch multiple rows
COMMIT;
-- Issue: Index contention during data loading
-- Solution: Create indexes after bulk data loading
DROP INDEX IF EXISTS idx_users_email;
-- Load data
CREATE INDEX idx_users_email ON users(email);
```
### Memory and Storage Issues
```sql
-- Monitor cluster storage usage
SELECT 
    store_id,
    node_id,
    used_bytes,
    available_bytes,
    (used_bytes::float / (used_bytes + available_bytes)) * 100 as usage_percent
FROM crdb_internal.kv_store_status;
-- Check for memory pressure
SHOW CLUSTER SETTING sql.stats.automatic_collection.enabled;
SET CLUSTER SETTING sql.stats.automatic_collection.enabled = true;
```
### Query Optimization
```sql
-- Issue: Sequential scans on large tables
-- Solution: Add appropriate indexes and use LIMIT
CREATE INDEX CONCURRENTLY idx_activity_logs_user_created 
ON activity_logs(user_id, created_at DESC);
-- Issue: Inefficient JOINs
-- Solution: Use proper join order and indexes
EXPLAIN (ANALYZE, BUFFERS) 
SELECT * FROM users u 
JOIN orders o ON u.id = o.user_id 
WHERE u.created_at > now() - interval '30 days';
```
### Connection Management
```sql
-- Monitor active connections
SELECT 
    application_name,
    state,
    COUNT(*)
FROM pg_stat_activity
GROUP BY application_name, state;
-- Set appropriate connection limits
SET CLUSTER SETTING server.max_connections_per_node = 500;
```
### Cleanup and Maintenance
```sql
-- Clean up test data
TRUNCATE activity_logs, order_items, orders, user_sessions, users CASCADE;
-- Update table statistics after large data changes
ANALYZE users;
ANALYZE orders;
ANALYZE activity_logs;
-- Monitor fragmentation
SELECT 
    schemaname,
    tablename,
    n_tup_ins,
    n_tup_upd,
    n_tup_del,
    n_dead_tup
FROM pg_stat_user_tables
WHERE n_dead_tup > 1000;
```
Created: 6/1/2025
Keywords: text snippets, slack for ai prompts, slack for ai, AI consulting, AI Cheat Tool, AI Cheat Tool for developers, AI Cheat Tool for AI, AI Cheat Tool for ChatGPT, chatgpt prompt generator, AI Cheat Tool for email, AI Cheat Tool for text, AI Cheat Tool for keyboard shortcuts, AI Cheat Tool for text expansion, AI Cheat Tool for text snippets, AI Cheat Tool for text replacement, AI Cheating Tool, AI Cheating Tool for developers, AI Cheating Tool for AI, AI Cheating Tool for ChatGPT, AI Cheating Tool for email, AI Cheating Tool for text, AI Cheating Tool for keyboard shortcuts, prompt cheating, AI prompt engineering, AI context engineering, context engineering, ai prompt manager, AI prompt manager, AI prompt management, ai consulting, prompt engineering consulting, generative ai consulting, ai implementation services, llm integration consultants, ai strategy for enterprises, enterprise ai transformation, ai prompt optimization, large language model consulting, ai training for teams, ai workflow automation, build ai knowledge base, llm prompt management, ai prompt infrastructure, ai adoption consulting, enterprise ai onboarding, custom ai workflow design, ai integration for dev teams, ai productivity tools, team prompt collaboration, github gists, github snippets, github code snippets, github code snippets automation, github, text expansion, text automation, snippet manager, code snippets, team collaboration tools, shared snippets, snippet sharing, keyboard shortcuts, productivity tools, workflow automation, AI-powered productivity, snippet tool for teams, team knowledge base, AI text completion, text expander for teams, snippet collaboration, multi-platform productivity, custom keyboard shortcuts, snippet sharing platform, collaborative snippet management, knowledge base automation, team productivity software, business productivity tools, snippet management software, quick text input, macOS productivity apps, Windows productivity tools, Linux productivity tools, cloud-based snippets, cross-platform snippets, team workspace tools, workflow enhancement tools, automation tools for teams, text automation software, team knowledge sharing, task automation, integrated team tools, real-time collaboration, AI for team productivity, business text automation, time-saving tools, clipboard manager, multi-device clipboard, keyboard shortcut manager, team communication tools, project management integration, productivity boost AI, text snippet sharing, text replacement software, text management tools, efficient team collaboration, AI workspace tools, modern productivity apps, custom text automation, digital workspace tools, collaborative workspaces, cloud productivity tools, streamline team workflows, smart text management, snippets AI app, snippet management for teams, shared knowledge platforms, team-focused text automation, team productivity platform, AI text expansion tools, snippet taking app, note taking app, note taking software, note taking tools, note taking app for teams, note taking app for developers, note taking app for AI, note taking app for ChatGPT, snippet software, snippet tools, snippet app for teams, snippet app for developers, snippet app for AI, snippet app for ChatGPT, AI agent builder, AI agent snippets, AI agent prompts, prompt management, prompt engineering, ChatGPT snippets, ChatGPT prompts, AI prompt optimization, AI-powered prompts, prompt libraries for AI, prompt sharing for ChatGPT, GPT productivity tools, AI assistant snippets, ChatGPT integrations, custom AI prompts, AI agent workflows, machine learning snippets, automated AI prompts, AI workflow automation, collaborative AI prompts, personalized AI agents, text snippets for ChatGPT, AI prompt creation tools, AI code snippet manager, GPT-4 text automation, AI-powered writing assistants, AI tools for developers, AI agent integrations, developer prompt snippets, AI text generation workflows, AI-enhanced productivity, GPT prompt sharing tools, team collaboration for AI, openAI integrations, text automation for AI teams, AI-powered collaboration tools, GPT-4 team tools, AI-driven text expanders, AI-driven productivity solutions, AI agent for email writing, AI agent for text expansion, AI agent for text automation, AI agent for text snippets, AI agent for text replacement, AI agent for keyboard shortcuts, AI Agent Developer, Prompt engineering, Machine Learning Engineer, AI Engineer, Customer Support, Code snippets for developers, Recruiting, AI agent for automation, AI agent for AI automation, AI agent for ChatGPT automation, AI agent for email automation, electron app for snippets, desktop snippet manager, code snippet organization, AI prompt repository, intelligent text expansion, vibe coding, Claude cli ai prompts, prompt optimizer, buy prompts, sell prompts, snippets store, sell scripts, buy scripts, buy python scripts, scraping scripts, AI prompt marketplace, ChatGPT prompt marketplace, best AI prompts, best ChatGPT prompts, AI prompt database, AI prompt packs, AI prompt bundles, GPT prompt marketplace, prompt engineering masterclass, prompt engineering certification, prompt engineering course, ChatGPT prompt store, AI prompt store, prompt monetization, sell AI prompts, buy AI prompts, prompt marketplace platform, AI prompt plugins, Claude prompt marketplace, AI prompt subscription, Custom GPT, real-time prompt collaboration, developer workflow optimization, team prompt library, knowledge management for developers, code snippet search, searchable code library, reusable code blocks, prompt engineering tools, prompt template management, collaborative coding, cross-team knowledge sharing, code snippet versioning, AI prompt templates, technical documentation tools, developer productivity suite, team snippet repository, AI prompt history, snippet synchronization, cloud snippet backup, markdown snippet support, syntax highlighting for snippets, code categorization, programming language snippets, language-specific code templates, contextual code suggestions, snippets with AI integration, command palette for snippets, code snippet folder organization, team snippet discovery, private and public snippets, enterprise code management, team codebase documentation, prompt engineering best practices, Vibe Coding, Vibe Coding for developers, Vibe Coding for AI, Vibe Coding for ChatGPT, Vibe Coding for email, Vibe Coding for text, Vibe Coding for keyboard shortcuts, Vibe Coding for text expansion, Vibe Coding for text snippets, Vibe Coding for text replacement, free prompt generator, ai prompt generator, prompt generator, promptlayer, promptimize ai, langchain prompt management, lanhsmith prompt management, latitude, langchain, langgraph, langchain documentation, raycast, text expander, raycast snippets, raycast mac, cursor, cursro ai, cursor snippets, cursor rules, cursor ai rules, learn prompting, how to prompt, prompting guide, prompting tutorials, best prompting practices, ai prompt best practices, prompting techniques, prompting, aws, testing, api, go, express, pwa, javascript, react, typescript, java, nextjs, jest, ios, spa, performance, accessibility, security, node, seo, rest, python, webpack, vite, windows, logging, php, laravel, redis, monitoring, openai, gpt, azure, gcp, rust, swift, deployment, vue, angular, django, ci/cd, ssr, nuxt, c#, jwt, lambda, microservices, git, pandas, flask, analytics, cdn, oauth, mongodb, graphql, react native, electron, spring, android, scaling, mocha, machine learning, numpy, postgresql, mysql, kubernetes, serverless, bcrypt, ubuntu, fastapi
AI Prompts, ChatGPT, Code Snippets, Prompt Engineering