17  SQL in Excel: Example

SQL Commands & Clauses

17.1 Data: Tables

17.2 Connection using MS Excel

  • All examples in this chapter are presented using MS Excel

  • For running the SQL statements in MS Excel, see the previous chapter for Options 1 and 2.

  • If you select Option 2, check that all cells are blank in the range where outputs will be displayed.

  • In principle, just copy and paste the SQL statement.

  • The SQL commands and clauses are not case sensitive; in general, they are written in upper case.

  • Note that Excel requires SQL query table names to include a $ sign and be enclosed in [].

  • Here is an example: SELECT * FROM [tblStudent$]

  • Options 1 and 2 cannot accept multiline SQL statements. Submit the SQL statement in one line.

Code
SELECT * FROM [tblStudent$]

17.3 SELECT

Code
SELECT * FROM [tblStudent$]

SELECT SName, Gender, Age FROM [tblStudent$]

SELECT SName AS StudentName, Gender, Age FROM [tblStudent$]

17.4 WHERE

Code
sSQL = "
SELECT SName AS StudentName, Gender, Age
FROM [tblStudent$]
WHERE Age > 16 AND Attendance > 85
"

17.5 ORDER BY

Code
sSQL = "
SELECT SName AS StudentName, Gender, Age, Attendance
FROM [tblStudent$]
WHERE Age > 16 AND Attendance > 85
ORDER BY Attendance ASC
"

17.6 SELECT with DISTINCT

Code
sSQL = "
SELECT COUNT(DISTINCT Age)
FROM [tblStudent$]
"

sSQL = "
SELECT DISTINCT ClassID
FROM [tblStudent$]
WHERE Age > 16 AND Attendance > 85
ORDER BY Attendance ASC
"

17.7 SELECT with Aggregate function

Code
sSQL = "
SELECT COUNT(DISTINCT Age)
FROM [tblStudent$]
"

sSQL = "
SELECT COUNT(Age)
FROM [tblStudent$]
"

sSQL = "
SELECT COUNT(Age) AS N
FROM [tblStudent$]
WHERE Attendance > 85
"


sSQL = "
SELECT COUNT(Age) AS N
FROM [tblStudent$] 
WHERE Attendance BETWEEN 85 AND 95
"

sSQL = "
SELECT COUNT(Age) AS N, AVG(Age) as AvgAge
FROM [tblStudent$] 
WHERE Attendance BETWEEN 85 AND 95
"

sSQL = "
SELECT MIN(Age) AS Min, MAX(Age) AS Max, 
       SUM(Age) AS Sum, AVG(Age) AS Avg, COUNT(Age) AS N
FROM [tblStudent$] 
ORDER BY N ASC
"

17.8 GROUP BY

Code
sSQL = "
SELECT Gender, COUNT(DISTINCT ClassID) AS N
FROM [tblStudent$] 
WHERE Attendance BETWEEN 85 AND 95
GROUP BY Gender
ORDER BY Gender ASC
"

sSQL = "
SELECT Gender, ClassID, COUNT(DISTINCT ClassID) AS N
FROM [tblStudent$] 
WHERE Attendance BETWEEN 85 AND 95
GROUP BY Gender, ClassID
ORDER BY ClassID, Gender ASC
"


sSQL = "
SELECT Gender, COUNT(DISTINCT ClassID) AS N, AVG(Age) AS AvgAge
FROM [tblStudent$] 
WHERE Attendance BETWEEN 85 AND 95
GROUP BY Gender
ORDER BY N ASC
"


sSQL = "
SELECT Gender,
       COUNT(DISTINCT ClassID) AS N,
       MIN(Age) AS Min, MAX(Age) AS Max, 
       SUM(Age) AS Sum, AVG(Age) AS Avg
FROM [tblStudent$] 
WHERE Attendance BETWEEN 85 AND 95
GROUP BY Gender
ORDER BY N ASC
"

17.9 HAVING

Code
sSQL = "
SELECT Age, COUNT(ClassID) AS N
FROM [tblStudent$]
WHERE Attendance > 85
GROUP BY Age
HAVING N > 1
"

sSQL = "
SELECT Gender, COUNT(ClassID) AS N, AVG(Age) as AvgAge
FROM [tblStudent$] 
WHERE Attendance BETWEEN 85 AND 95
GROUP BY Gender
HAVING AvgAge > 16
ORDER BY N ASC
"

17.10 FROM with JOIN

  • Read all tables as data.frame

  • Register the connection with duckdb

  • Note separate table names for each table

Code
tblClass = read.table(file = "tblClass.txt", sep = "\t", header = TRUE)
tblTeacher = read.table(file = "tblTeacher.txt", sep = "\t", header = TRUE)
tblStudent = read.table(file = "tblStudent.txt", sep = "\t", header = TRUE)
tblMarks = read.table(file = "tblMarks.txt", sep = "\t", header = TRUE)

conn = dbConnect(duckdb())

duckdb_register(conn = conn, name = "tblClass", df = tblClass)
duckdb_register(conn = conn, name = "tblTeacher", df = tblTeacher)
duckdb_register(conn = conn, name = "tblStudent", df = tblStudent)
duckdb_register(conn = conn, name = "tblMarks", df = tblMarks)

17.10.1 LEFT JOIN

  • LEFT JOIN: Everything on the LEFT AND those matches on the RIGHT

LEFT JOIN

Code
sSQL = "
SELECT * 
FROM tblClass
LEFT JOIN [tblTeacher$] 
ON [tblClass$].TeacherID = [tblTeacher$].TeacherID
"

sSQL = "
SELECT * 
FROM [tblClass$] AS C
LEFT JOIN [tblTeacher$] AS T
ON C.TeacherID = T.TeacherID
"

sSQL = "
SELECT [tblClass$].ClassID, [tblClass$].Subject, [tblClass$].TeacherID 
FROM tblClass
LEFT JOIN [tblTeacher$] AS T
ON tblClass.TeacherID = T.TeacherID
"

sSQL = "
SELECT C.ClassID, C.Subject, C.TeacherID 
FROM [tblClass$] AS C
LEFT JOIN [tblTeacher$] AS T
ON C.TeacherID = T.TeacherID
"

sSQL = "
SELECT C.ClassID, C.Subject, C.TeacherID, 
       T.TeacherID, T.TName, T.Experience
FROM [tblClass$] AS C
LEFT JOIN [tblTeacher$] AS T
ON C.TeacherID = T.TeacherID
"

sSQL = "
SELECT T.TeacherID, T.TName, T.Experience,
       C.ClassID, C.Subject, C.TeacherID
FROM [tblTeacher$] AS T
LEFT JOIN [tblClass$] AS C 
ON T.TeacherID = C.TeacherID
"

17.10.2 Anti LEFT JOIN

  • Anti LEFT JOIN or LEFT JOIN Exclusive

  • Everything on the LEFT that is NOT on the RIGHT

LEFT JOIN Exclusive

Code
sSQL = "
SELECT T.TeacherID, T.TName, T.Experience,
       C.ClassID, C.Subject, C.TeacherID
FROM [tblTeacher$] AS T
LEFT JOIN [tblClass$] AS C 
ON T.TeacherID = C.TeacherID
WHERE C.TeacherID IS NULL
"

17.10.3 RIGHT JOIN

  • RIGHT JOIN: Everything on the RIGHT AND those matches on the LEFT

RIGHT JOIN

Code
sSQL = "
SELECT C.ClassID, C.Subject,
       S.StudentID, S.SName, S.ClassID
FROM [tblClass$] AS C 
RIGHT JOIN [tblStudent$] AS S
ON C.ClassID = S.ClassID
ORDER BY S.StudentID
"

17.10.4 Anti RIGHT JOIN

  • Anti RIGHT JOIN or RIGHT JOIN Exclusive

  • Everything on the RIGHT that is NOT on the LEFT

RIGHT JOIN Exclusive

Code
sSQL = "
SELECT C.ClassID, C.Subject,
       S.StudentID, S.SName, S.ClassID
FROM [tblClass$] AS C 
RIGHT JOIN [tblStudent$] AS S
ON C.ClassID = S.ClassID
WHERE C.ClassID IS NULL
ORDER BY S.StudentID
"

17.10.5 INNER JOIN

  • INNER JOIN: Only those matches on the LEFT AND the RIGHT

INNER JOIN

Code
sSQL = "
SELECT S.StudentID, S.SName, S.ClassID,
       M.StudentID, M.Marks
FROM [tblStudent$] AS S
INNER JOIN [tblMarks$] AS M
ON S.StudentID = M.StudentID
ORDER BY S.StudentID
"

17.10.6 FULL JOIN

  • FULL JOIN or OUTER JOIN: Everything on the LEFT AND everything on the RIGHT

FULL JOIN

Code
sSQL = "
SELECT S.StudentID, S.SName, S.ClassID,
       M.StudentID, M.Marks
FROM [tblStudent$] AS S
FULL JOIN [tblMarks$] AS M
ON S.StudentID = M.StudentID
ORDER BY S.StudentID
"