MySQL:
database:数据库 =》 文件夹
table:表 =》excel
1.字段类型
数值类型:
int 整型
long 长整型
bigint
float 单精度
double 双精度
decimal 小数 跟钱有关
字符串
char 字符 0-255 长度 zuoshaoxxxxxxx 255 自动补齐
varchar 字符串 变长 zuoshao
text 文本
日期:
date 日期 YYYY-MM-DD
time 时间 HH:mm:SS
datetime 年月日时分秒 YYYY-MM-DD HH:mm:SS
timestamp 年月日时分秒【时间戳】 YYYY-MM-DD HH:mm:SS
2.sql类型
ddl 数据定义语言:create drop
dml 数据操作语言:select insert update delete
dcl 数据控制语言:grant
3.基本语法【sql】
1.创建数据库
CREATE {DATABASE | SCHEMA} [IF NOT EXISTS] db_name
create database gh;
2.查看数据库
show databases;
3.使用数据库 [切换数据库] default
use gh;
4.创建表
CREATE [TEMPORARY] TABLE [IF NOT EXISTS] tbl_name
(create_definition,...)
create_definition: {
col_name data_type
}
create table student(
id int(11) not null auto_increment,
name varchar(255),
age int(3),
create_user varchar(255),
create_time timestamp not null default current_timestamp,
update_user varchar(255),
update_time timestamp not null default current_timestamp on update current_timestamp,
primary key(id)
);
要求:规范
1.表名字 不要写中文
2.建表风格:
1.id这个字段 一般是自增主键
2.
5.查看表
show tables;
6.插入数据
insert
INSERT
[INTO] tbl_name
[(col_name [, col_name] ...)]
{ {VALUES | VALUE} (value_list) [, (value_list)] ... }
insert into gh.student (name,age) VALUES ("haoge",21),("bangzhang",20);
insert into gh.student (name,age) VALUES ("zuoshao",22);
insert into gh.student VALUES 全部指定;
insert into gh.student (name,age) VALUES ("zuoshao",22);
update gh.student set name="左少" where name="zuoshao";
insert into gh.student (name,age) VALUES ("浩哥",22);
7.查看数据
select 语法结构:
select 字段名字 from 表名字 [where]
* 表示 显示表里面所有字段
select * from gh.student;
8.更新数据
update
UPDATE table_reference
SET assignment_list
[WHERE where_condition]
update gh.student set age=80 where name="bangzhang";
9.删除数据
delete
DELETE FROM tbl_name
[WHERE where_condition]
delete from gh.student where name="banzhang";
update 、delete 思考 是否加 where
update gh.student set age=80 ;
delete from gh.student;
其他语法:
1.条件过滤 where
=
>
<
and
or
in
not in
select * from gh.student where age <20;
select * from gh.student where age=20 and name="zuoshao" ;
select * from gh.student where age=20 or name="zuoshao" ;
select * from gh.student where age in (20,12,11,99) ;
select * from gh.student where age not in (20,12,11,99) ;
2.排序语法
order by
select * from gh.student order by age asc;
select * from gh.student order by age desc;
select * from gh.student order by age desc,name desc;
3.模糊查询
like :
1.%
2._ 占位符
regexp 【正则表达式】【课后作业】
select * from gh.student where name like 'z%' ;
select * from gh.student where name like '%z%' ;
select * from gh.student where name like '%z' ;
先思考: name 第三个字母是n
select * from gh.student where name like '__n%' ;
4.合并表
union 去重
union all 不去重
create table a(id int ,name varchar(255));
create table b(id int ,name varchar(255));
insert into a VALUES(1,'左少');
insert into b VALUES(1,'左少');
insert into b VALUES(1,'左少');
select * from a
union
select * from b;
select * from a
union all
select * from b
5.null
"" '' 'null' null
1.过滤:
is
is not
select * from student where create_user is null;
2.数据清洗(etl)
脏数据 =》 规范的数据
函数 :ifnull ,coalesce
null -> --
select
id
,name
,age
,ifnull(create_user,"--") as elt_create_user
,create_time
, ifnull(update_user,"--") as elt_update_user
,update_time
from gh.student ;
select
id
,name
,age
,coalesce(create_user,"--") as elt_create_user
,create_time
, coalesce(update_user,"--") as elt_update_user
,update_time
from gh.student ;
6.聚合语法 【聚合函数】
1.聚合函数
sum
count
max
min
avg
聚合函数:多行数据 按照一定规则【聚合函数】 聚合为一行
理论上说:
聚合后的行数 <=聚合前的行数
2.group by 【分组】
select avg(age) from student; //整个班 平均年龄
需求:
1.求name中带有zuoshao 的平均年龄
select avg(age) from student where name like "%zuoshao%";
2.求每个name 的平均年龄
name :zuoshaoxx
name:其他
指标:avg age
select
'zuoshao' as name_1,
avg(age) as avg_age
from student
where
name like "%zuoshao%"
group by name_1
union all
select
name,
avg(age) as avg_age
from student
where
name not like "%zuoshao%"
group by name;
select
if(name like "%zuoshao%","zuoshao",name) as name_1,
avg(age) as avg_age
from student
group by name_1;
select
case when
name like "%zuoshao%" then "zuoshao"
else name end as name_1,
avg(age) as avg_age
from student
group by name_1;
3.2求每个name 的平均年龄
select
name,
avg(age) as avg_age
from student
group by
name;
4. 查询 平均年龄 20
select
case when
name like "%zuoshao%" then "zuoshao"
else name end as name_1,
avg(age) as avg_age
from student
group by name_1
聚合之后 过滤不能用where having
select
case when
name like "%zuoshao%" then "zuoshao"
else name end as name_1,
avg(age) as avg_age
from student
group by name_1
having avg_age>20;
select
name_1 as name ,
round(avg_age,2) as age
from
(
select
case when
name like "%zuoshao%" then "zuoshao"
else name end as name_1,
avg(age) as avg_age
from student
group by name_1
) as tmp
where
avg_age >20;
子查询:
sql 套sql
select
sum(age) as sum_age,
count(1) as cnt, -- 统计 表中数据条数
max(age) as max_age,
min(age) as min_age,
avg(age) as avg_age
from student;
统计表中name字段数据条数【name不能重复】
1.select count(distinct name) as cnt from student;
select count(distinct name) as cnt from student;
2.使用group by 执行效率高
select
count(name) as cnt
from
(
select
name
from student
group by name
) as a;
group by :分组
select
name_1,
avg(age) as avg_age
from student
group by name_1
group by :
x,1
y,1
z,1
x,1
=> 分组+聚合函数 =》 “拉倒一起” “去做一些事情””
“拉倒一起”: group by
x,<1,1>
y,<1>
z,<1>
“去做一些事情””avg
x,<1,1> => x,(1+1) /2
y,<1> => y, 1/1
子查询:
select
from
(
select
from
) as a ; ***
select
from xx
where
column in (select id from xxxx );
7.join(多表联查)
种类:7大类
create table a1(id int ,name varchar(255),address varchar(255));
create table b1(id int ,name varchar(255),age int(3));
insert into a1 VALUES(1,'zuoshao','长春');
insert into a1 VALUES(2,'haoge','苏州');
insert into a1 VALUES(4,'banzhang','山东');
insert into b1 VALUES(1,'zuoshao',21);
insert into b1 VALUES(2,'haoge',21);
insert into b1 VALUES(3,'xuanxuan',29);
sql 7 join:
1.内连接 inner join (join)
select
a.*,
b.*
from a1 as a join b1 as b
on a.id = b.id and a.name=b.name;
select
a.id,
a.name,
a.address,
b.age
from a1 as a join b1 as b
on a.id = b.id and a.name=b.name;
2.左连接 left join
以左表为主,数据是全的 ,右表来匹配,匹配不上就是 null
select
a.*,
b.*
from a1 as a left join b1 as b
on a.id = b.id and a.name=b.name;
3.右连接 right join
以右表为主,数据是全的 ,左表来匹配,匹配不上就是 null
select
a.*,
b.*
from a1 as a right join b1 as b
on a.id = b.id and a.name=b.name;
4.全连接 full outer join
全连接表都为主,数据是全的
左表来匹配,匹配不上就是 null
右表来匹配,匹配不上就是 null
左连接: 99.99 00.1 inner join
主表 维表 =》 事实表 维度表
主表 维表
维度组合:
通过维度去分析指标
维度:column
指标:sum max min count
三个维度:
id | name | age
指标 : count条数
select
id,
name,
age,
count(1) as cnt
from student
group by
id,
name,
age
注意:
select 字段、指标【聚合函数】 from xx where
group by 字段
有视频=》 大数据 => 电商的进销存 erp =》 指标查询
grouping sets
java
linux shell mysql hadoop** hive***
zookeeper kafka* flume* hbase** scala spark**** flink ***
maxwell cannel sqoop* datax* clickhouse*
sql on hbase => phoniex
spark****:
sparkcore 离线
sparksql 离线
sparkstreaming 实时
structed streaming 实时 ***
flink :
flink
flinkcdc
flinksql ****
离线数仓: *****
实时数仓:***
实时同步***
大数据同步工具**sparksql
maxwell 底层开发 issue
t5 =>
极客时间 =》 可以
官网 =》
大厂分享博客=》
公众号=》
问 同事