cleaning up db

This commit is contained in:
2025-03-12 14:08:38 +08:00
parent ae7ea3b567
commit 285a0c780a
11 changed files with 1011 additions and 70 deletions

View File

@@ -0,0 +1,251 @@
-- 删除旧表
DROP TABLE IF EXISTS events;
DROP TABLE IF EXISTS mv_kol_performance;
DROP TABLE IF EXISTS mv_platform_distribution;
DROP TABLE IF EXISTS mv_sentiment_analysis;
DROP TABLE IF EXISTS mv_interaction_time;
DROP TABLE IF EXISTS mv_conversion_funnel;
-- 创建新的events表
CREATE TABLE events (
-- 基本信息
event_id UUID DEFAULT generateUUIDv4(),
timestamp DateTime DEFAULT now(),
date Date DEFAULT toDate(now()),
hour UInt8 DEFAULT toHour(now()),
-- 实体关联
user_id String,
influencer_id String,
content_id String,
project_id String,
-- 事件信息
event_type Enum8(
'view' = 1,
-- 浏览
'like' = 2,
-- 点赞
'unlike' = 3,
-- 取消点赞
'follow' = 4,
-- 关注
'unfollow' = 5,
-- 取消关注
'comment' = 6,
-- 评论
'share' = 7,
-- 分享
'click' = 8,
-- 点击链接
'impression' = 9,
-- 曝光
'purchase' = 10,
-- 购买
'signup' = 11 -- 注册
),
-- 转化漏斗
funnel_stage Enum8(
'exposure' = 1,
-- 曝光
'interest' = 2,
-- 兴趣
'consideration' = 3,
-- 考虑
'intent' = 4,
-- 意向
'evaluation' = 5,
-- 评估
'purchase' = 6 -- 购买
),
-- 内容信息
platform String,
-- 社交平台
content_type Enum8(
'video' = 1,
'image' = 2,
'text' = 3,
'story' = 4,
'reel' = 5,
'live' = 6
),
content_status Enum8(
-- 审核状态
'approved' = 1,
'pending' = 2,
'rejected' = 3
),
-- 互动分析
sentiment Enum8(
-- 情感分析
'positive' = 1,
'neutral' = 2,
'negative' = 3
),
comment_text String,
-- 评论文本
keywords Array(String),
-- 关键词
-- 数值指标
interaction_value Float64,
-- 互动价值
followers_count UInt32,
-- 粉丝数
followers_change Int32,
-- 粉丝变化量
likes_count UInt32,
-- 点赞数
likes_change Int32,
-- 点赞变化量
views_count UInt32,
-- 观看数
-- 设备信息
ip String,
user_agent String,
device_type String,
referrer String,
-- 地理信息
geo_country String,
geo_city String,
-- 会话信息
session_id String
) ENGINE = MergeTree() PARTITION BY toYYYYMM(timestamp)
ORDER BY
(event_type, influencer_id, date, hour) SETTINGS index_granularity = 8192;
-- 创建物化视图KOL表现概览
CREATE MATERIALIZED VIEW mv_kol_performance ENGINE = SummingMergeTree() PARTITION BY toYYYYMM(date)
ORDER BY
(influencer_id, date) AS
SELECT
influencer_id,
date,
sum(if(event_type = 'follow', 1, 0)) - sum(if(event_type = 'unfollow', 1, 0)) AS new_followers,
sum(if(event_type = 'like', 1, 0)) - sum(if(event_type = 'unlike', 1, 0)) AS new_likes,
sum(if(event_type = 'view', 1, 0)) AS views,
sum(if(event_type = 'comment', 1, 0)) AS comments,
sum(if(event_type = 'share', 1, 0)) AS shares
FROM
events
GROUP BY
influencer_id,
date;
-- 创建物化视图:平台分布
CREATE MATERIALIZED VIEW mv_platform_distribution ENGINE = SummingMergeTree() PARTITION BY toYYYYMM(date)
ORDER BY
(platform, date) AS
SELECT
platform,
date,
count() AS events_count,
uniqExact(user_id) AS unique_users,
uniqExact(content_id) AS unique_contents
FROM
events
GROUP BY
platform,
date;
-- 创建物化视图:情感分析
CREATE MATERIALIZED VIEW mv_sentiment_analysis ENGINE = SummingMergeTree() PARTITION BY toYYYYMM(date)
ORDER BY
(sentiment, date) AS
SELECT
sentiment,
date,
count() AS count
FROM
events
WHERE
sentiment IS NOT NULL
AND event_type = 'comment'
GROUP BY
sentiment,
date;
-- 创建物化视图:用户互动时间
CREATE MATERIALIZED VIEW mv_interaction_time ENGINE = SummingMergeTree() PARTITION BY toYYYYMM(date)
ORDER BY
(date, hour) AS
SELECT
date,
hour,
count() AS events_count,
uniqExact(user_id) AS unique_users
FROM
events
GROUP BY
date,
hour;
-- 创建物化视图:内容审核状态
CREATE MATERIALIZED VIEW mv_content_status ENGINE = SummingMergeTree() PARTITION BY toYYYYMM(date)
ORDER BY
(content_status, date) AS
SELECT
content_status,
date,
count() AS count
FROM
events
WHERE
content_status IS NOT NULL
GROUP BY
content_status,
date;
-- 创建物化视图:转化漏斗
CREATE MATERIALIZED VIEW mv_conversion_funnel ENGINE = SummingMergeTree() PARTITION BY toYYYYMM(date)
ORDER BY
(funnel_stage, date) AS
SELECT
funnel_stage,
date,
count() AS stage_count,
uniqExact(user_id) AS unique_users
FROM
events
WHERE
funnel_stage IS NOT NULL
GROUP BY
funnel_stage,
date;
-- 创建物化视图:热门内容
CREATE MATERIALIZED VIEW mv_popular_content ENGINE = SummingMergeTree() PARTITION BY toYYYYMM(date)
ORDER BY
(content_id, date) AS
SELECT
content_id,
influencer_id,
date,
sum(if(event_type = 'view', 1, 0)) AS views,
sum(if(event_type = 'like', 1, 0)) AS likes,
sum(if(event_type = 'comment', 1, 0)) AS comments,
sum(if(event_type = 'share', 1, 0)) AS shares
FROM
events
GROUP BY
content_id,
influencer_id,
date;
-- 创建物化视图:关键词分析
CREATE MATERIALIZED VIEW mv_keywords ENGINE = SummingMergeTree() PARTITION BY toYYYYMM(date)
ORDER BY
(keyword, date) AS
SELECT
arrayJoin(keywords) AS keyword,
date,
count() AS frequency
FROM
events
WHERE
length(keywords) > 0
GROUP BY
keyword,
date;