-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathinit.sql
More file actions
167 lines (150 loc) · 5.22 KB
/
Copy pathinit.sql
File metadata and controls
167 lines (150 loc) · 5.22 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
-- Initialize test database with sample data
--
-- TODO: Consider increasing test data volume for better LLM query analysis testing.
-- Current data sizes (users: 1000, products: 500, orders: 5000) are small enough
-- that the LLM correctly approves full table scans and leading wildcard LIKE queries.
-- For testing rejection scenarios, consider:
-- - 100K+ rows in users table to trigger full scan rejection
-- - Add a "large_events" table with 1M+ rows for clear rejection cases
-- See tests/features/analysis/real_bedrock.feature for related test comments.
-- Categories
CREATE TABLE IF NOT EXISTS categories (
id SERIAL PRIMARY KEY,
name VARCHAR(100) NOT NULL,
description TEXT
);
-- Users
CREATE TABLE IF NOT EXISTS users (
id SERIAL PRIMARY KEY,
username VARCHAR(50) NOT NULL UNIQUE,
email VARCHAR(100) NOT NULL UNIQUE,
password_hash VARCHAR(255),
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
is_active BOOLEAN DEFAULT TRUE
);
-- Products
CREATE TABLE IF NOT EXISTS products (
id SERIAL PRIMARY KEY,
name VARCHAR(100) NOT NULL,
description TEXT,
price DECIMAL(10, 2) NOT NULL,
stock_quantity INTEGER DEFAULT 0,
category_id INTEGER REFERENCES categories(id),
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);
-- Orders
CREATE TABLE IF NOT EXISTS orders (
id SERIAL PRIMARY KEY,
user_id INTEGER REFERENCES users(id),
status VARCHAR(20) DEFAULT 'pending',
total_amount DECIMAL(10, 2),
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
shipped_at TIMESTAMP WITH TIME ZONE
);
-- Order Items
CREATE TABLE IF NOT EXISTS order_items (
id SERIAL PRIMARY KEY,
order_id INTEGER REFERENCES orders(id),
product_id INTEGER REFERENCES products(id),
quantity INTEGER NOT NULL,
unit_price DECIMAL(10, 2) NOT NULL
);
-- Create indexes
CREATE INDEX IF NOT EXISTS idx_users_username ON users(username);
CREATE INDEX IF NOT EXISTS idx_orders_user_id ON orders(user_id);
CREATE INDEX IF NOT EXISTS idx_orders_status ON orders(status);
CREATE INDEX IF NOT EXISTS idx_order_items_order_id ON order_items(order_id);
CREATE INDEX IF NOT EXISTS idx_products_category_id ON products(category_id);
-- Insert sample data
INSERT INTO categories (name, description) VALUES
('Electronics', 'Electronic devices and accessories'),
('Clothing', 'Apparel and fashion items'),
('Books', 'Books and publications'),
('Home & Garden', 'Home improvement and garden supplies'),
('Sports', 'Sports equipment and gear'),
('Toys', 'Toys and games'),
('Food', 'Food and beverages')
ON CONFLICT DO NOTHING;
-- Insert 1000 users
INSERT INTO users (username, email, password_hash, is_active)
SELECT
'user' || i,
'user' || i || '@example.com',
'hashed_password_' || i,
TRUE
FROM generate_series(1, 1000) AS i
ON CONFLICT DO NOTHING;
-- Insert 500 products
INSERT INTO products (name, description, price, stock_quantity, category_id)
SELECT
'Product ' || i,
'Description for product ' || i,
(random() * 1000)::decimal(10,2),
(random() * 100)::integer,
(random() * 6 + 1)::integer
FROM generate_series(1, 500) AS i
ON CONFLICT DO NOTHING;
-- Insert 5000 orders
INSERT INTO orders (user_id, status, total_amount, created_at)
SELECT
(random() * 999 + 1)::integer,
(ARRAY['pending', 'processing', 'shipped', 'delivered'])[floor(random() * 4 + 1)::integer],
(random() * 500)::decimal(10,2),
NOW() - (random() * 365 || ' days')::interval
FROM generate_series(1, 5000) AS i
ON CONFLICT DO NOTHING;
-- Insert 15000 order items
INSERT INTO order_items (order_id, product_id, quantity, unit_price)
SELECT
(random() * 4999 + 1)::integer,
(random() * 499 + 1)::integer,
(random() * 5 + 1)::integer,
(random() * 100)::decimal(10,2)
FROM generate_series(1, 15000) AS i
ON CONFLICT DO NOTHING;
-- Test table for NULL handling in masking
-- This table has nullable sensitive columns to verify mask expressions preserve NULLs
CREATE TABLE IF NOT EXISTS test_nullable_pii (
id SERIAL PRIMARY KEY,
name VARCHAR(100),
email VARCHAR(100), -- nullable email
phone VARCHAR(20), -- nullable phone
notes TEXT
);
-- Insert test data with NULLs
INSERT INTO test_nullable_pii (name, email, phone, notes) VALUES
('Alice', 'alice@example.com', '+1-555-1234', 'Has all fields'),
('Bob', NULL, '+1-555-5678', 'No email'),
('Charlie', 'charlie@example.com', NULL, 'No phone'),
('Diana', NULL, NULL, 'No email or phone'),
('Eve', 'eve@example.com', '+1-555-9999', NULL)
ON CONFLICT DO NOTHING;
-- Views
-- Simple view exposing sensitive data (email)
CREATE OR REPLACE VIEW user_emails AS
SELECT id, username, email FROM users;
-- View joining tables with sensitive data
CREATE OR REPLACE VIEW order_summary AS
SELECT
o.id AS order_id,
u.username,
u.email,
o.total_amount,
o.status,
o.created_at
FROM orders o
JOIN users u ON u.id = o.user_id;
-- View with no sensitive data (for contrast)
CREATE OR REPLACE VIEW product_catalog AS
SELECT
p.id,
p.name,
p.description,
p.price,
p.stock_quantity,
c.name AS category_name
FROM products p
LEFT JOIN categories c ON c.id = p.category_id;
-- Analyze tables for query planning
ANALYZE;