Are you getting ready for an upcoming technical interview, but the cheatsheets don't really help with the more complex codes? This SQL code review is by no means a beginners guide. However, if you have learned all the basics and have worked on SQL, then this will refresh the practical use of SQL in your memory. Enjoy!
CREATION
-- Create table from scratch
CREATE TABLE letscreatetable (
id string,
code int,
name string,
amount double,
buy_date date,
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
-- add data load from hdfs directory to hive tables
LOAD DATA INPATH '/user/hive/newtable.csv' OVERWRITE INTO TABLE letscreatetable;
-- add data load from local directory to hive tables
LOAD DATA LOCAL INPATH '/user/local/newtable.csv' OVERWRITE INTO TABLE letscreatetable;
-- Create table from another tables as base
CREATE TABLE
database.anothertable AS
SELECT
id,
DAY(date) AS day_of_month,
DAYOFTHEWEEK(date) AS day_of_week_method1,
WEEKDAY(date) AS day_of_week_method2,
WEEKOFTHEYEAR(date) AS week_of_year, -- useful if you have weekly running things
MONTH(date) AS month_method1,
DATE_FORMAT(date, "%m") AS month_method2, -- also works to extract day, year only
DATE_FORMAT(date, "%M %d %Y") AS date_format_change, -- suppose date is "2017-06-15" this form
CURRENT_TIMESTAMP AS current_time, -- useful for when you have different sanpshots of the same features
CURRENT_DATE AS current_date,
DATEDIFF(wk, CURRENT_TIMESTAMP, date), -- difference in dates, you can specify the interval
CASE
WHEN
WEEKDAY(date) > 5
or date='2018-01-01'
or date='2018-12-25'
THEN 0 ELSE 1
END AS workday_indicator,
AVG(price) OVER (ORDER BY date ASC ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS avg_price_over_prev_now,
MAX(price) OVER (PARTITION BY id ORDER BY date)
FROM
(SELECT * FROM source_table WHERE date IS NOT NULL)
-- Create table from multiple tables as base
CREATE TABLE database.anothernewtable AS
WITH source1 AS
(SELECT
id,
date,
sum(sold_price) as sales
FROM
another_db.sourcetb1
WHERE
date >= to_date(from_unixtime(messedup_date_of_unixtime))
GROUP BY id, date),
WITH source2 AS
(SELECT
id,
date,
sum(bought_price) as cost
FROM
another_db.sourcetb2
WHERE
date >= to_date(from_unixtime(messedup_date_of_unixtime))
GROUP BY id, date),
SELECT *
FROM source1 AS A
JOIN source2 AS B ON A.id = B.id, A.date = B.date
JOIN (SELECT id, date, descp FROM another_db.source3) AS C ON C.id = A.id, C.date = A.date
JOINS
SELECT DISTINCT *
FROM table1
INNER JOIN table2 ON table1.id = table2.id
WHERE tabel1.id LIKE "%s" --search for other regex patterns (a%, %a, "%a%"")
LIMIT 500
ORDER BY table1.id
SELECT
CONCAT(TRIM(t1.id_pt1), TRIM(t2.id_pt2)) AS id,
ROUND(t1.price) AS price
FROM table1 AS t1
LEFT JOIN table2 AS t2 ON t1.id = t2.id
UNION ALL
SELECT
CONCAT(TRIM(t3.id_pt1), TRIM(t4.id_pt2)) AS id,
ROUND(t4.price) AS price
FROM table3 AS t3
RIGHT JOIN table4 AS t4 ON t3.id = t4.id
SELECT *
FROM table1 AS t1
FULL OUTER JOIN table2 AS t2 ON t1.id = t2.id
WHERE (t1.id IS NOT NULL) AND (t2.id IS NOT NULL)
SELECT name
FROM person AS t1
CROSS JOIN (SELECT price FROM pricelist) AS t2
--for every t1 matches to every t2
UPDATE & INSERT & ALTER
INSERT INTO table (col1, col2, col3)
VALUES (val1, val2, val3)
ALTER TABLE table
ADD newcol String
UPDATE table
SET col1 = value1, col2 = val2
WHERE col1 = "idddd"
OUTPUT
INSERT OVERWRITE DIRECTORY '/user/data'ROW FORMAT DELIMITEDFIELDS TERMINATED BY '|'NULL DEFINED AS ''STORED AS PARQUET
SELECT * FROM data
Comentários