Files
shorturl-analytics/scripts/db/sql/clickhouse/seed-clickhouse-analytics.sql
2025-03-21 12:08:37 +08:00

828 lines
16 KiB
SQL

-- 清空现有数据(可选)
TRUNCATE TABLE IF EXISTS limq.link_events;
TRUNCATE TABLE IF EXISTS limq.link_daily_stats;
TRUNCATE TABLE IF EXISTS limq.link_hourly_patterns;
TRUNCATE TABLE IF EXISTS limq.links;
-- 使用固定的UUID值插入链接
INSERT INTO
limq.links (
link_id,
original_url,
created_at,
created_by,
title,
description,
tags,
is_active
)
VALUES
(
'11111111-1111-1111-1111-111111111111',
'https://example.com/page1',
now(),
'user-1',
'产品页面',
'我们的主要产品页面',
[ '产品',
'营销' ],
true
);
INSERT INTO
limq.links (
link_id,
original_url,
created_at,
created_by,
title,
description,
tags,
is_active
)
VALUES
(
'22222222-2222-2222-2222-222222222222',
'https://example.com/promo',
now(),
'user-1',
'促销活动',
'夏季特别促销活动',
[ '促销',
'活动' ],
true
);
INSERT INTO
limq.links (
link_id,
original_url,
created_at,
created_by,
title,
description,
tags,
is_active
)
VALUES
(
'33333333-3333-3333-3333-333333333333',
'https://example.com/blog',
now(),
'user-2',
'公司博客',
'公司新闻和更新',
[ '博客',
'内容' ],
true
);
INSERT INTO
limq.links (
link_id,
original_url,
created_at,
created_by,
title,
description,
tags,
is_active
)
VALUES
(
'44444444-4444-4444-4444-444444444444',
'https://example.com/signup',
now(),
'user-2',
'注册页面',
'新用户注册页面',
[ '转化',
'注册' ],
true
);
-- 为第一个链接创建500条记录
INSERT INTO
limq.link_events (
event_id,
event_time,
date,
link_id,
channel_id,
visitor_id,
session_id,
event_type,
ip_address,
country,
city,
referrer,
utm_source,
utm_medium,
utm_campaign,
user_agent,
device_type,
browser,
os,
time_spent_sec,
is_bounce,
is_qr_scan,
qr_code_id,
conversion_type,
conversion_value,
custom_data
)
SELECT
generateUUIDv4() AS event_id,
subtractDays(now(), rand() % 30) AS event_time,
toDate(event_time) AS date,
'11111111-1111-1111-1111-111111111111' AS link_id,
'channel-1' AS channel_id,
concat('visitor-', toString(rand() % 100 + 1)) AS visitor_id,
concat('session-', toString(number % 50 + 1)) AS session_id,
multiIf(
rand() % 100 < 70,
'click',
rand() % 100 < 90,
'redirect',
rand() % 100 < 98,
'conversion',
'error'
) AS event_type,
concat('192.168.1.', toString(rand() % 255)) AS ip_address,
multiIf(
rand() % 100 < 60,
'China',
rand() % 100 < 85,
'US',
rand() % 100 < 95,
'Japan',
'Other'
) AS country,
multiIf(
rand() % 100 < 60,
'Beijing',
rand() % 100 < 85,
'New York',
rand() % 100 < 95,
'Tokyo',
'Other'
) AS city,
multiIf(
rand() % 100 < 30,
'https://google.com',
rand() % 100 < 50,
'https://facebook.com',
rand() % 100 < 65,
'https://twitter.com',
rand() % 100 < 75,
'https://instagram.com',
rand() % 100 < 85,
'https://linkedin.com',
rand() % 100 < 90,
'https://bing.com',
rand() % 100 < 95,
'https://baidu.com',
'direct'
) AS referrer,
multiIf(
rand() % 100 < 40,
'google',
rand() % 100 < 70,
'facebook',
rand() % 100 < 90,
'email',
'direct'
) AS utm_source,
multiIf(
rand() % 100 < 40,
'cpc',
rand() % 100 < 70,
'social',
rand() % 100 < 90,
'email',
'direct'
) AS utm_medium,
multiIf(
rand() % 100 < 40,
'summer_sale',
rand() % 100 < 70,
'product_launch',
rand() % 100 < 90,
'newsletter',
'brand'
) AS utm_campaign,
'Mozilla/5.0' AS user_agent,
multiIf(
rand() % 100 < 60,
'mobile',
rand() % 100 < 85,
'desktop',
rand() % 100 < 95,
'tablet',
'other'
) AS device_type,
multiIf(
rand() % 100 < 50,
'Chrome',
rand() % 100 < 80,
'Safari',
rand() % 100 < 95,
'Firefox',
'Edge'
) AS browser,
multiIf(
rand() % 100 < 50,
'iOS',
rand() % 100 < 90,
'Android',
'Windows'
) AS os,
rand() % 300 AS time_spent_sec,
rand() % 100 < 25 AS is_bounce,
rand() % 100 < 20 AS is_qr_scan,
concat('qr-', toString(rand() % 10 + 1)) AS qr_code_id,
multiIf(
rand() % 100 < 50,
'visit',
rand() % 100 < 70,
'stay',
rand() % 100 < 85,
'interact',
rand() % 100 < 93,
'signup',
rand() % 100 < 97,
'subscription',
'purchase'
) AS conversion_type,
rand() % 100 * 1.5 AS conversion_value,
'{}' AS custom_data
FROM
numbers(500);
-- 为第二个链接创建300条记录
INSERT INTO
limq.link_events (
event_id,
event_time,
date,
link_id,
channel_id,
visitor_id,
session_id,
event_type,
ip_address,
country,
city,
referrer,
utm_source,
utm_medium,
utm_campaign,
user_agent,
device_type,
browser,
os,
time_spent_sec,
is_bounce,
is_qr_scan,
qr_code_id,
conversion_type,
conversion_value,
custom_data
)
SELECT
generateUUIDv4() AS event_id,
subtractDays(now(), rand() % 30) AS event_time,
toDate(event_time) AS date,
'22222222-2222-2222-2222-222222222222' AS link_id,
'channel-1' AS channel_id,
concat('visitor-', toString(rand() % 100 + 1)) AS visitor_id,
concat('session-', toString(number % 40 + 1)) AS session_id,
multiIf(
rand() % 100 < 70,
'click',
rand() % 100 < 90,
'redirect',
rand() % 100 < 98,
'conversion',
'error'
) AS event_type,
concat('192.168.1.', toString(rand() % 255)) AS ip_address,
multiIf(
rand() % 100 < 60,
'China',
rand() % 100 < 85,
'US',
rand() % 100 < 95,
'Japan',
'Other'
) AS country,
multiIf(
rand() % 100 < 60,
'Beijing',
rand() % 100 < 85,
'New York',
rand() % 100 < 95,
'Tokyo',
'Other'
) AS city,
multiIf(
rand() % 100 < 30,
'https://google.com',
rand() % 100 < 50,
'https://facebook.com',
rand() % 100 < 65,
'https://twitter.com',
rand() % 100 < 75,
'https://instagram.com',
rand() % 100 < 85,
'https://linkedin.com',
rand() % 100 < 90,
'https://bing.com',
rand() % 100 < 95,
'https://baidu.com',
'direct'
) AS referrer,
multiIf(
rand() % 100 < 40,
'google',
rand() % 100 < 70,
'facebook',
rand() % 100 < 90,
'email',
'direct'
) AS utm_source,
multiIf(
rand() % 100 < 40,
'cpc',
rand() % 100 < 70,
'social',
rand() % 100 < 90,
'email',
'direct'
) AS utm_medium,
multiIf(
rand() % 100 < 40,
'summer_sale',
rand() % 100 < 70,
'product_launch',
rand() % 100 < 90,
'newsletter',
'brand'
) AS utm_campaign,
'Mozilla/5.0' AS user_agent,
multiIf(
rand() % 100 < 60,
'mobile',
rand() % 100 < 85,
'desktop',
rand() % 100 < 95,
'tablet',
'other'
) AS device_type,
multiIf(
rand() % 100 < 50,
'Chrome',
rand() % 100 < 80,
'Safari',
rand() % 100 < 95,
'Firefox',
'Edge'
) AS browser,
multiIf(
rand() % 100 < 50,
'iOS',
rand() % 100 < 90,
'Android',
'Windows'
) AS os,
rand() % 300 AS time_spent_sec,
rand() % 100 < 25 AS is_bounce,
rand() % 100 < 15 AS is_qr_scan,
concat('qr-', toString(rand() % 10 + 1)) AS qr_code_id,
multiIf(
rand() % 100 < 50,
'visit',
rand() % 100 < 70,
'stay',
rand() % 100 < 85,
'interact',
rand() % 100 < 93,
'signup',
rand() % 100 < 97,
'subscription',
'purchase'
) AS conversion_type,
rand() % 100 * 2.5 AS conversion_value,
'{}' AS custom_data
FROM
numbers(300);
-- 为第三个链接创建200条记录
INSERT INTO
limq.link_events (
event_id,
event_time,
date,
link_id,
channel_id,
visitor_id,
session_id,
event_type,
ip_address,
country,
city,
referrer,
utm_source,
utm_medium,
utm_campaign,
user_agent,
device_type,
browser,
os,
time_spent_sec,
is_bounce,
is_qr_scan,
qr_code_id,
conversion_type,
conversion_value,
custom_data
)
SELECT
generateUUIDv4() AS event_id,
subtractDays(now(), rand() % 30) AS event_time,
toDate(event_time) AS date,
'33333333-3333-3333-3333-333333333333' AS link_id,
'channel-2' AS channel_id,
concat('visitor-', toString(rand() % 100 + 1)) AS visitor_id,
concat('session-', toString(number % 30 + 1)) AS session_id,
multiIf(
rand() % 100 < 70,
'click',
rand() % 100 < 90,
'redirect',
rand() % 100 < 98,
'conversion',
'error'
) AS event_type,
concat('192.168.1.', toString(rand() % 255)) AS ip_address,
multiIf(
rand() % 100 < 60,
'China',
rand() % 100 < 85,
'US',
rand() % 100 < 95,
'Japan',
'Other'
) AS country,
multiIf(
rand() % 100 < 60,
'Beijing',
rand() % 100 < 85,
'New York',
rand() % 100 < 95,
'Tokyo',
'Other'
) AS city,
multiIf(
rand() % 100 < 30,
'https://google.com',
rand() % 100 < 50,
'https://facebook.com',
rand() % 100 < 65,
'https://twitter.com',
rand() % 100 < 75,
'https://instagram.com',
rand() % 100 < 85,
'https://linkedin.com',
rand() % 100 < 90,
'https://bing.com',
rand() % 100 < 95,
'https://baidu.com',
'direct'
) AS referrer,
multiIf(
rand() % 100 < 40,
'google',
rand() % 100 < 70,
'facebook',
rand() % 100 < 90,
'email',
'direct'
) AS utm_source,
multiIf(
rand() % 100 < 40,
'cpc',
rand() % 100 < 70,
'social',
rand() % 100 < 90,
'email',
'direct'
) AS utm_medium,
multiIf(
rand() % 100 < 40,
'summer_sale',
rand() % 100 < 70,
'product_launch',
rand() % 100 < 90,
'newsletter',
'brand'
) AS utm_campaign,
'Mozilla/5.0' AS user_agent,
multiIf(
rand() % 100 < 60,
'mobile',
rand() % 100 < 85,
'desktop',
rand() % 100 < 95,
'tablet',
'other'
) AS device_type,
multiIf(
rand() % 100 < 50,
'Chrome',
rand() % 100 < 80,
'Safari',
rand() % 100 < 95,
'Firefox',
'Edge'
) AS browser,
multiIf(
rand() % 100 < 50,
'iOS',
rand() % 100 < 90,
'Android',
'Windows'
) AS os,
rand() % 600 AS time_spent_sec,
rand() % 100 < 15 AS is_bounce,
rand() % 100 < 10 AS is_qr_scan,
concat('qr-', toString(rand() % 10 + 1)) AS qr_code_id,
multiIf(
rand() % 100 < 50,
'visit',
rand() % 100 < 70,
'stay',
rand() % 100 < 85,
'interact',
rand() % 100 < 93,
'signup',
rand() % 100 < 97,
'subscription',
'purchase'
) AS conversion_type,
rand() % 100 * 1.2 AS conversion_value,
'{}' AS custom_data
FROM
numbers(200);
-- 为第四个链接创建400条记录
INSERT INTO
limq.link_events (
event_id,
event_time,
date,
link_id,
channel_id,
visitor_id,
session_id,
event_type,
ip_address,
country,
city,
referrer,
utm_source,
utm_medium,
utm_campaign,
user_agent,
device_type,
browser,
os,
time_spent_sec,
is_bounce,
is_qr_scan,
qr_code_id,
conversion_type,
conversion_value,
custom_data
)
SELECT
generateUUIDv4() AS event_id,
subtractDays(now(), rand() % 30) AS event_time,
toDate(event_time) AS date,
'44444444-4444-4444-4444-444444444444' AS link_id,
'channel-2' AS channel_id,
concat('visitor-', toString(rand() % 100 + 1)) AS visitor_id,
concat('session-', toString(number % 60 + 1)) AS session_id,
multiIf(
rand() % 100 < 70,
'click',
rand() % 100 < 90,
'redirect',
rand() % 100 < 98,
'conversion',
'error'
) AS event_type,
concat('192.168.1.', toString(rand() % 255)) AS ip_address,
multiIf(
rand() % 100 < 60,
'China',
rand() % 100 < 85,
'US',
rand() % 100 < 95,
'Japan',
'Other'
) AS country,
multiIf(
rand() % 100 < 60,
'Beijing',
rand() % 100 < 85,
'New York',
rand() % 100 < 95,
'Tokyo',
'Other'
) AS city,
multiIf(
rand() % 100 < 30,
'https://google.com',
rand() % 100 < 50,
'https://facebook.com',
rand() % 100 < 65,
'https://twitter.com',
rand() % 100 < 75,
'https://instagram.com',
rand() % 100 < 85,
'https://linkedin.com',
rand() % 100 < 90,
'https://bing.com',
rand() % 100 < 95,
'https://baidu.com',
'direct'
) AS referrer,
multiIf(
rand() % 100 < 40,
'google',
rand() % 100 < 70,
'facebook',
rand() % 100 < 90,
'email',
'direct'
) AS utm_source,
multiIf(
rand() % 100 < 40,
'cpc',
rand() % 100 < 70,
'social',
rand() % 100 < 90,
'email',
'direct'
) AS utm_medium,
multiIf(
rand() % 100 < 40,
'summer_sale',
rand() % 100 < 70,
'product_launch',
rand() % 100 < 90,
'newsletter',
'brand'
) AS utm_campaign,
'Mozilla/5.0' AS user_agent,
multiIf(
rand() % 100 < 60,
'mobile',
rand() % 100 < 85,
'desktop',
rand() % 100 < 95,
'tablet',
'other'
) AS device_type,
multiIf(
rand() % 100 < 50,
'Chrome',
rand() % 100 < 80,
'Safari',
rand() % 100 < 95,
'Firefox',
'Edge'
) AS browser,
multiIf(
rand() % 100 < 50,
'iOS',
rand() % 100 < 90,
'Android',
'Windows'
) AS os,
rand() % 400 AS time_spent_sec,
rand() % 100 < 20 AS is_bounce,
rand() % 100 < 25 AS is_qr_scan,
concat('qr-', toString(rand() % 10 + 1)) AS qr_code_id,
multiIf(
rand() % 100 < 50,
'visit',
rand() % 100 < 70,
'stay',
rand() % 100 < 85,
'interact',
rand() % 100 < 93,
'signup',
rand() % 100 < 97,
'subscription',
'purchase'
) AS conversion_type,
rand() % 100 * 3.5 AS conversion_value,
'{}' AS custom_data
FROM
numbers(400);
-- 插入link_daily_stats表数据
INSERT INTO
limq.link_daily_stats (
date,
link_id,
total_clicks,
unique_visitors,
unique_sessions,
total_time_spent,
avg_time_spent,
bounce_count,
conversion_count,
unique_referrers,
mobile_count,
tablet_count,
desktop_count,
qr_scan_count,
total_conversion_value
)
SELECT
subtractDays(today(), number) AS date,
multiIf(
number % 4 = 0,
'11111111-1111-1111-1111-111111111111',
number % 4 = 1,
'22222222-2222-2222-2222-222222222222',
number % 4 = 2,
'33333333-3333-3333-3333-333333333333',
'44444444-4444-4444-4444-444444444444'
) AS link_id,
50 + rand() % 100 AS total_clicks,
30 + rand() % 50 AS unique_visitors,
20 + rand() % 40 AS unique_sessions,
(500 + rand() % 1000) * 60 AS total_time_spent,
(rand() % 10) * 60 + rand() % 60 AS avg_time_spent,
5 + rand() % 20 AS bounce_count,
rand() % 30 AS conversion_count,
3 + rand() % 8 AS unique_referrers,
20 + rand() % 40 AS mobile_count,
5 + rand() % 15 AS tablet_count,
15 + rand() % 30 AS desktop_count,
rand() % 10 AS qr_scan_count,
rand() % 1000 * 2.5 AS total_conversion_value
FROM
numbers(30)
WHERE
number < 30;
-- 插入link_hourly_patterns表数据
INSERT INTO
limq.link_hourly_patterns (date, hour, link_id, visits, unique_visitors)
SELECT
subtractDays(today(), number % 7) AS date,
number % 24 AS hour,
multiIf(
intDiv(number, 24) % 4 = 0,
'11111111-1111-1111-1111-111111111111',
intDiv(number, 24) % 4 = 1,
'22222222-2222-2222-2222-222222222222',
intDiv(number, 24) % 4 = 2,
'33333333-3333-3333-3333-333333333333',
'44444444-4444-4444-4444-444444444444'
) AS link_id,
5 + rand() % 20 AS visits,
3 + rand() % 10 AS unique_visitors
FROM
numbers(672) -- 7天 x 24小时 x 4个链接
WHERE
number < 672;
-- 显示数据行数,验证插入成功
SELECT
'link_events 表行数:' AS metric,
count() AS value
FROM
limq.link_events
UNION
ALL
SELECT
'link_daily_stats 表行数:',
count()
FROM
limq.link_daily_stats
UNION
ALL
SELECT
'link_hourly_patterns 表行数:',
count()
FROM
limq.link_hourly_patterns;