CockroachDB Performance Testing Rule

This rule provides a comprehensive approach to performance testing CockroachDB by populating tables with large datasets to simulate real-world scenarios with millions of users. It includes data generation strategies, performance monitoring techniques, and bottleneck identification methods.

# CockroachDB Performance Testing Rule ## Overview This rule provides a comprehensive approach to performance testing CockroachDB by populating tables with large datasets to simulate real-world scenarios with millions of users. It includes data generation strategies, performance monitoring techniques, and bottleneck identification methods. ## Implementation ### 1. Create Performance Test Schema ```sql -- Users table for 1M+ users CREATE TABLE users ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), email STRING UNIQUE NOT NULL, username STRING UNIQUE NOT NULL, first_name STRING NOT NULL, last_name STRING NOT NULL, created_at TIMESTAMP DEFAULT now(), updated_at TIMESTAMP DEFAULT now(), is_active BOOLEAN DEFAULT true, metadata JSONB ); -- User sessions table (high volume) CREATE TABLE user_sessions ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), user_id UUID NOT NULL REFERENCES users(id), session_token STRING NOT NULL, ip_address INET, user_agent TEXT, created_at TIMESTAMP DEFAULT now(), expires_at TIMESTAMP NOT NULL, is_active BOOLEAN DEFAULT true ); -- Orders table (transaction-heavy) CREATE TABLE orders ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), user_id UUID NOT NULL REFERENCES users(id), order_number STRING UNIQUE NOT NULL, total_amount DECIMAL(10,2) NOT NULL, status STRING NOT NULL DEFAULT 'pending', created_at TIMESTAMP DEFAULT now(), updated_at TIMESTAMP DEFAULT now() ); -- Order items (high cardinality) CREATE TABLE order_items ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), order_id UUID NOT NULL REFERENCES orders(id), product_id UUID NOT NULL, quantity INT NOT NULL, price DECIMAL(10,2) NOT NULL, created_at TIMESTAMP DEFAULT now() ); -- Activity logs (massive volume) CREATE TABLE activity_logs ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), user_id UUID REFERENCES users(id), action STRING NOT NULL, resource_type STRING, resource_id UUID, ip_address INET, created_at TIMESTAMP DEFAULT now(), metadata JSONB ); ``` ### 2. Create Performance Indexes ```sql -- Critical indexes for performance CREATE INDEX idx_users_email ON users(email); CREATE INDEX idx_users_created_at ON users(created_at); CREATE INDEX idx_user_sessions_user_id ON user_sessions(user_id); CREATE INDEX idx_user_sessions_expires_at ON user_sessions(expires_at); CREATE INDEX idx_orders_user_id ON orders(user_id); CREATE INDEX idx_orders_created_at ON orders(created_at); CREATE INDEX idx_order_items_order_id ON order_items(order_id); CREATE INDEX idx_activity_logs_user_id ON activity_logs(user_id); CREATE INDEX idx_activity_logs_created_at ON activity_logs(created_at); ``` ### 3. Data Generation Scripts ```sql -- Function to generate realistic test data CREATE OR REPLACE FUNCTION generate_performance_test_data(num_users INT) RETURNS VOID AS $$ DECLARE i INT := 1; user_uuid UUID; order_uuid UUID; batch_size INT := 1000; BEGIN -- Generate users in batches WHILE i <= num_users LOOP INSERT INTO users (email, username, first_name, last_name, created_at, metadata) SELECT 'user' || generate_series(i, LEAST(i + batch_size - 1, num_users)) || '@example.com', 'username' || generate_series(i, LEAST(i + batch_size - 1, num_users)), 'FirstName' || generate_series(i, LEAST(i + batch_size - 1, num_users)), 'LastName' || generate_series(i, LEAST(i + batch_size - 1, num_users)), now() - (random() * interval '365 days'), ('{"signup_source": "web", "preferences": {"notifications": ' || (random() > 0.5)::text || '}}')::jsonb; i := i + batch_size; -- Commit every batch to avoid long transactions IF i % (batch_size * 10) = 0 THEN RAISE NOTICE 'Generated % users', i; END IF; END LOOP; END; $$ LANGUAGE plpgsql; -- Generate sessions (multiple per user) CREATE OR REPLACE FUNCTION generate_user_sessions(session_multiplier INT DEFAULT 5) RETURNS VOID AS $$ DECLARE user_record RECORD; j INT; BEGIN FOR user_record IN SELECT id FROM users LOOP FOR j IN 1..session_multiplier LOOP INSERT INTO user_sessions (user_id, session_token, ip_address, user_agent, expires_at) VALUES ( user_record.id, md5(random()::text || clock_timestamp()::text), ('192.168.' || floor(random() * 255) || '.' || floor(random() * 255))::inet, 'Mozilla/5.0 (compatible; TestAgent/' || j || ')', now() + interval '24 hours' ); END LOOP; END LOOP; END; $$ LANGUAGE plpgsql; -- Generate orders with items CREATE OR REPLACE FUNCTION generate_orders_with_items(orders_per_user INT DEFAULT 3) RETURNS VOID AS $$ DECLARE user_record RECORD; order_uuid UUID; i INT; j INT; BEGIN FOR user_record IN SELECT id FROM users LOOP FOR i IN 1..orders_per_user LOOP INSERT INTO orders (user_id, order_number, total_amount, status, created_at) VALUES ( user_record.id, 'ORD-' || user_record.id || '-' || i, (random() * 500 + 10)::decimal(10,2), CASE WHEN random() > 0.8 THEN 'cancelled' WHEN random() > 0.6 THEN 'shipped' WHEN random() > 0.4 THEN 'processing' ELSE 'delivered' END, now() - (random() * interval '90 days') ) RETURNING id INTO order_uuid; -- Add 1-5 items per order FOR j IN 1..(1 + floor(random() * 5))::int LOOP INSERT INTO order_items (order_id, product_id, quantity, price) VALUES ( order_uuid, gen_random_uuid(), (1 + floor(random() * 10))::int, (random() * 100 + 5)::decimal(10,2) ); END LOOP; END LOOP; END LOOP; END; $$ LANGUAGE plpgsql; -- Generate activity logs (high volume) CREATE OR REPLACE FUNCTION generate_activity_logs(logs_per_user INT DEFAULT 50) RETURNS VOID AS $$ DECLARE user_record RECORD; i INT; actions TEXT[] := ARRAY['login', 'logout', 'view_product', 'add_to_cart', 'purchase', 'update_profile', 'search']; BEGIN FOR user_record IN SELECT id FROM users LOOP FOR i IN 1..logs_per_user LOOP INSERT INTO activity_logs (user_id, action, resource_type, resource_id, ip_address, created_at, metadata) VALUES ( user_record.id, actions[1 + floor(random() * array_length(actions, 1))], CASE WHEN random() > 0.5 THEN 'product' WHEN random() > 0.3 THEN 'order' ELSE 'user' END, gen_random_uuid(), ('10.' || floor(random() * 255) || '.' || floor(random() * 255) || '.' || floor(random() * 255))::inet, now() - (random() * interval '30 days'), ('{"page": "/app/page' || floor(random() * 100) || '", "duration": ' || floor(random() * 10000) || '}')::jsonb ); END LOOP; END LOOP; END; $$ LANGUAGE plpgsql; ``` ### 4. Execute Performance Test Data Generation ```sql -- Generate 1 million users SELECT generate_performance_test_data(1000000); -- Generate sessions (5 million sessions) SELECT generate_user_sessions(5); -- Generate orders with items (3 million orders, ~9 million items) SELECT generate_orders_with_items(3); -- Generate activity logs (50 million logs) SELECT generate_activity_logs(50); ``` ### 5. Performance Monitoring Queries ```sql -- Monitor table sizes and row counts SELECT schemaname, tablename, pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) as size, pg_stat_get_tuples_inserted(c.oid) as inserts, pg_stat_get_tuples_updated(c.oid) as updates, pg_stat_get_tuples_deleted(c.oid) as deletes FROM pg_tables t JOIN pg_class c ON c.relname = t.tablename WHERE schemaname = 'public' ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC; -- Monitor query performance SELECT query, calls, total_time, rows, 100.0 * shared_blks_hit / nullif(shared_blks_hit + shared_blks_read, 0) AS hit_percent FROM pg_stat_statements WHERE query LIKE '%users%' OR query LIKE '%orders%' OR query LIKE '%activity_logs%' ORDER BY total_time DESC LIMIT 10; -- Check slow queries SELECT pid, now() - pg_stat_activity.query_start AS duration, query, state FROM pg_stat_activity WHERE (now() - pg_stat_activity.query_start) > interval '5 minutes' ORDER BY duration DESC; ``` ### 6. Performance Test Scenarios ```sql -- Test 1: User lookup performance EXPLAIN ANALYZE SELECT * FROM users WHERE email = 'user500000@example.com'; -- Test 2: Recent orders query EXPLAIN ANALYZE SELECT u.username, o.order_number, o.total_amount, o.created_at FROM users u JOIN orders o ON u.id = o.user_id WHERE o.created_at > now() - interval '7 days' ORDER BY o.created_at DESC LIMIT 100; -- Test 3: Activity logs aggregation EXPLAIN ANALYZE SELECT action, DATE_TRUNC('day', created_at) as day, COUNT(*) as action_count FROM activity_logs WHERE created_at > now() - interval '30 days' GROUP BY action, DATE_TRUNC('day', created_at) ORDER BY day DESC, action_count DESC; -- Test 4: Complex join performance EXPLAIN ANALYZE SELECT u.username, COUNT(DISTINCT o.id) as total_orders, SUM(o.total_amount) as total_spent, COUNT(DISTINCT s.id) as active_sessions FROM users u LEFT JOIN orders o ON u.id = o.user_id LEFT JOIN user_sessions s ON u.id = s.user_id AND s.is_active = true WHERE u.created_at > now() - interval '90 days' GROUP BY u.id, u.username HAVING COUNT(DISTINCT o.id) > 2 ORDER BY total_spent DESC LIMIT 50; ``` ## Best Practices ### Data Generation - **Batch Processing**: Generate data in batches of 1,000-10,000 records to avoid memory issues - **Realistic Distributions**: Use random functions to simulate real-world data patterns - **Incremental Loading**: Load data incrementally and monitor system resources - **Transaction Management**: Commit frequently during bulk operations ### Performance Monitoring - **Enable Query Statistics**: Use `pg_stat_statements` extension for query analysis - **Monitor Resource Usage**: Track CPU, memory, and disk I/O during tests - **Index Usage**: Monitor index effectiveness with `pg_stat_user_indexes` - **Connection Pooling**: Use connection pooling for high-concurrency tests ### Test Scenarios - **Gradual Load Increase**: Start with smaller datasets and gradually increase - **Mixed Workloads**: Test read-heavy, write-heavy, and mixed scenarios - **Concurrent Users**: Simulate multiple concurrent connections - **Peak Load Testing**: Test system behavior during peak usage periods ## Common Issues ### Performance Bottlenecks ```sql -- Issue: Slow bulk inserts -- Solution: Use batch inserts and disable autocommit BEGIN; INSERT INTO users (...) VALUES (...), (...), (...); -- Batch multiple rows COMMIT; -- Issue: Index contention during data loading -- Solution: Create indexes after bulk data loading DROP INDEX IF EXISTS idx_users_email; -- Load data CREATE INDEX idx_users_email ON users(email); ``` ### Memory and Storage Issues ```sql -- Monitor cluster storage usage SELECT store_id, node_id, used_bytes, available_bytes, (used_bytes::float / (used_bytes + available_bytes)) * 100 as usage_percent FROM crdb_internal.kv_store_status; -- Check for memory pressure SHOW CLUSTER SETTING sql.stats.automatic_collection.enabled; SET CLUSTER SETTING sql.stats.automatic_collection.enabled = true; ``` ### Query Optimization ```sql -- Issue: Sequential scans on large tables -- Solution: Add appropriate indexes and use LIMIT CREATE INDEX CONCURRENTLY idx_activity_logs_user_created ON activity_logs(user_id, created_at DESC); -- Issue: Inefficient JOINs -- Solution: Use proper join order and indexes EXPLAIN (ANALYZE, BUFFERS) SELECT * FROM users u JOIN orders o ON u.id = o.user_id WHERE u.created_at > now() - interval '30 days'; ``` ### Connection Management ```sql -- Monitor active connections SELECT application_name, state, COUNT(*) FROM pg_stat_activity GROUP BY application_name, state; -- Set appropriate connection limits SET CLUSTER SETTING server.max_connections_per_node = 500; ``` ### Cleanup and Maintenance ```sql -- Clean up test data TRUNCATE activity_logs, order_items, orders, user_sessions, users CASCADE; -- Update table statistics after large data changes ANALYZE users; ANALYZE orders; ANALYZE activity_logs; -- Monitor fragmentation SELECT schemaname, tablename, n_tup_ins, n_tup_upd, n_tup_del, n_dead_tup FROM pg_stat_user_tables WHERE n_dead_tup > 1000; ```

Created: 6/1/2025

Keywords: text snippets, slack for ai prompts, slack for ai, AI consulting, AI Cheat Tool, AI Cheat Tool for developers, AI Cheat Tool for AI, AI Cheat Tool for ChatGPT, chatgpt prompt generator, AI Cheat Tool for email, AI Cheat Tool for text, AI Cheat Tool for keyboard shortcuts, AI Cheat Tool for text expansion, AI Cheat Tool for text snippets, AI Cheat Tool for text replacement, AI Cheating Tool, AI Cheating Tool for developers, AI Cheating Tool for AI, AI Cheating Tool for ChatGPT, AI Cheating Tool for email, AI Cheating Tool for text, AI Cheating Tool for keyboard shortcuts, prompt cheating, AI prompt engineering, AI context engineering, context engineering, ai prompt manager, AI prompt manager, AI prompt management, ai consulting, prompt engineering consulting, generative ai consulting, ai implementation services, llm integration consultants, ai strategy for enterprises, enterprise ai transformation, ai prompt optimization, large language model consulting, ai training for teams, ai workflow automation, build ai knowledge base, llm prompt management, ai prompt infrastructure, ai adoption consulting, enterprise ai onboarding, custom ai workflow design, ai integration for dev teams, ai productivity tools, team prompt collaboration, github gists, github snippets, github code snippets, github code snippets automation, github, text expansion, text automation, snippet manager, code snippets, team collaboration tools, shared snippets, snippet sharing, keyboard shortcuts, productivity tools, workflow automation, AI-powered productivity, snippet tool for teams, team knowledge base, AI text completion, text expander for teams, snippet collaboration, multi-platform productivity, custom keyboard shortcuts, snippet sharing platform, collaborative snippet management, knowledge base automation, team productivity software, business productivity tools, snippet management software, quick text input, macOS productivity apps, Windows productivity tools, Linux productivity tools, cloud-based snippets, cross-platform snippets, team workspace tools, workflow enhancement tools, automation tools for teams, text automation software, team knowledge sharing, task automation, integrated team tools, real-time collaboration, AI for team productivity, business text automation, time-saving tools, clipboard manager, multi-device clipboard, keyboard shortcut manager, team communication tools, project management integration, productivity boost AI, text snippet sharing, text replacement software, text management tools, efficient team collaboration, AI workspace tools, modern productivity apps, custom text automation, digital workspace tools, collaborative workspaces, cloud productivity tools, streamline team workflows, smart text management, snippets AI app, snippet management for teams, shared knowledge platforms, team-focused text automation, team productivity platform, AI text expansion tools, snippet taking app, note taking app, note taking software, note taking tools, note taking app for teams, note taking app for developers, note taking app for AI, note taking app for ChatGPT, snippet software, snippet tools, snippet app for teams, snippet app for developers, snippet app for AI, snippet app for ChatGPT, AI agent builder, AI agent snippets, AI agent prompts, prompt management, prompt engineering, ChatGPT snippets, ChatGPT prompts, AI prompt optimization, AI-powered prompts, prompt libraries for AI, prompt sharing for ChatGPT, GPT productivity tools, AI assistant snippets, ChatGPT integrations, custom AI prompts, AI agent workflows, machine learning snippets, automated AI prompts, AI workflow automation, collaborative AI prompts, personalized AI agents, text snippets for ChatGPT, AI prompt creation tools, AI code snippet manager, GPT-4 text automation, AI-powered writing assistants, AI tools for developers, AI agent integrations, developer prompt snippets, AI text generation workflows, AI-enhanced productivity, GPT prompt sharing tools, team collaboration for AI, openAI integrations, text automation for AI teams, AI-powered collaboration tools, GPT-4 team tools, AI-driven text expanders, AI-driven productivity solutions, AI agent for email writing, AI agent for text expansion, AI agent for text automation, AI agent for text snippets, AI agent for text replacement, AI agent for keyboard shortcuts, AI Agent Developer, Prompt engineering, Machine Learning Engineer, AI Engineer, Customer Support, Code snippets for developers, Recruiting, AI agent for automation, AI agent for AI automation, AI agent for ChatGPT automation, AI agent for email automation, electron app for snippets, desktop snippet manager, code snippet organization, AI prompt repository, intelligent text expansion, vibe coding, Claude cli ai prompts, prompt optimizer, buy prompts, sell prompts, snippets store, sell scripts, buy scripts, buy python scripts, scraping scripts, AI prompt marketplace, ChatGPT prompt marketplace, best AI prompts, best ChatGPT prompts, AI prompt database, AI prompt packs, AI prompt bundles, GPT prompt marketplace, prompt engineering masterclass, prompt engineering certification, prompt engineering course, ChatGPT prompt store, AI prompt store, prompt monetization, sell AI prompts, buy AI prompts, prompt marketplace platform, AI prompt plugins, Claude prompt marketplace, AI prompt subscription, Custom GPT, real-time prompt collaboration, developer workflow optimization, team prompt library, knowledge management for developers, code snippet search, searchable code library, reusable code blocks, prompt engineering tools, prompt template management, collaborative coding, cross-team knowledge sharing, code snippet versioning, AI prompt templates, technical documentation tools, developer productivity suite, team snippet repository, AI prompt history, snippet synchronization, cloud snippet backup, markdown snippet support, syntax highlighting for snippets, code categorization, programming language snippets, language-specific code templates, contextual code suggestions, snippets with AI integration, command palette for snippets, code snippet folder organization, team snippet discovery, private and public snippets, enterprise code management, team codebase documentation, prompt engineering best practices, Vibe Coding, Vibe Coding for developers, Vibe Coding for AI, Vibe Coding for ChatGPT, Vibe Coding for email, Vibe Coding for text, Vibe Coding for keyboard shortcuts, Vibe Coding for text expansion, Vibe Coding for text snippets, Vibe Coding for text replacement, free prompt generator, ai prompt generator, prompt generator, promptlayer, promptimize ai, langchain prompt management, lanhsmith prompt management, latitude, langchain, langgraph, langchain documentation, raycast, text expander, raycast snippets, raycast mac, cursor, cursro ai, cursor snippets, cursor rules, cursor ai rules, learn prompting, how to prompt, prompting guide, prompting tutorials, best prompting practices, ai prompt best practices, prompting techniques, prompting, aws, testing, api, go, express, pwa, javascript, react, typescript, java, nextjs, jest, ios, spa, performance, accessibility, security, node, seo, rest, python, webpack, vite, windows, logging, php, laravel, redis, monitoring, openai, gpt, azure, gcp, rust, swift, deployment, vue, angular, django, ci/cd, ssr, nuxt, c#, jwt, lambda, microservices, git, pandas, flask, analytics, cdn, oauth, mongodb, graphql, react native, electron, spring, android, scaling, mocha, machine learning, numpy, postgresql, mysql, kubernetes, serverless, bcrypt, ubuntu, fastapi

AI Prompts, ChatGPT, Code Snippets, Prompt Engineering

CockroachDB Performance Testing Rule

This rule provides a comprehensive approach to performance testing CockroachDB by populating tables with large datasets to simulate real-world scenarios with millions of users. It includes data generation strategies, performance monitoring techniques, and bottleneck identification methods.