Comment invoquer des fonctions Python depuis SQLite ?
Romain Clement - Meetup Python Grenoble - 27/04/2023
sqlite3.Connection.create_function(
name,
narg,
func,
*,
deterministic=False
)
def hello(name):
return f"Hello, {name}!"
conn = sqlite3.connect(":memory:")
conn.create_function("hello", 1, hello)
conn.execute("SELECT hello('Romain')").fetchone()
('Hello, Romain!',)
import base64
def base64_encode(payload):
return base64.urlsafe_b64encode(str(payload).encode()).decode()
def base64_decode(payload):
return base64.urlsafe_b64decode(str(payload).encode()).decode()
conn = sqlite3.connect(":memory:")
conn.create_function("base64_encode", 1, base64_encode)
conn.create_function("base64_decode", 1, base64_decode)
conn.execute(
"SELECT base64_encode('Some stuff to be encoded')"
).fetchone()
('U29tZSBzdHVmZiB0byBiZSBlbmNvZGVk',)
conn.execute(
"SELECT base64_decode('U29tZSBzdHVmZiB0byBiZSBlbmNvZGVk')"
).fetchone()
('Some stuff to be encoded',)
conn.execute(
"SELECT base64_decode(base64_encode('Some stuff to be encoded'))"
).fetchone()
('Some stuff to be encoded',)
sqlite3.Connection.create_aggregate(
name,
/,
n_arg,
aggregate_class
)
SQLite n'intègre pas de calcul statistique de médiane : intégrons-le !
import statistics
class Median:
def __init__(self):
self.values = []
def step(self, value):
self.values.append(value)
def finalize(self):
return statistics.median(self.values)
conn = sqlite3.connect(":memory:")
conn.create_aggregate("median", 1, Median)
conn.execute("DROP TABLE IF EXISTS test")
conn.execute("CREATE TABLE test(v)")
conn.execute("INSERT INTO test(v) VALUES (1), (2), (3), (100)")
conn.execute("SELECT * FROM test").fetchall()
[(1,), (2,), (3,), (100,)]
conn.execute("SELECT median(v) FROM test").fetchone()
(2.5,)
Et si je souhaite récupérer des données depuis une URL ? Lançons des requêtes HTTP directement depuis une fonction SQL !
import requests
def request_get(url, headers={}):
rv = requests.get(url, headers=headers)
return rv.content.decode()
conn = sqlite3.connect(":memory:")
conn.create_function("request_get", -1, request_get)
conn.execute(
"""
SELECT
request_get(
"https://www.data.gouv.fr/fr/datasets/r/34fc7b52-ef11-4ab0-bc16-e1aae5c942e7"
)
"""
).fetchone()
("code_region,nom_region\n01,Guadeloupe\n02,Martinique\n03,Guyane\n04,La Réunion\n06,Mayotte\n11,Île-de-France\n24,Centre-Val de Loire\n27,Bourgogne-Franche-Comté\n28,Normandie\n32,Hauts-de-France\n44,Grand Est\n52,Pays de la Loire\n53,Bretagne\n75,Nouvelle-Aquitaine\n76,Occitanie\n84,Auvergne-Rhône-Alpes\n93,Provence-Alpes-Côte d'Azur\n94,Corse\n",)
Extraire des données depuis un payload JSON :
conn.execute(
"""
SELECT
repo ->> '$.name',
repo ->> '$.stargazers_count',
repo ->> '$.watchers_count'
FROM (
SELECT
request_get(
'https://api.github.com/repos/meetup-python-grenoble/meetup-python-grenoble.github.io'
) AS repo
)
"""
).fetchone()
('meetup-python-grenoble.github.io', 6, 6)
Mini-pipeline ETL depuis une API REST JSON :
_ = conn.execute(
"""
CREATE TABLE IF NOT EXISTS github_events (
created_at DATETIME NOT NULL,
event_id TEXT NOT NULL,
event_type TEXT NOT NULL,
username TEXT NOT NULL,
payload TEXT NOT NULL
)
"""
)
_ = conn.execute(
"""
INSERT INTO github_events
SELECT
value ->> '$.created_at' AS created_at,
value ->> '$.id' AS event_id,
value ->> '$.type' AS event_type,
value ->> '$.actor.login',
value ->> '$.payload'
FROM
json_each(
request_get(
'https://api.github.com/repos/meetup-python-grenoble/meetup-python-grenoble.github.io/events'
)
)
"""
)
conn.execute(
"""
SELECT created_at, event_id, event_type, username
FROM github_events
LIMIT 5
"""
).fetchall()
[('2023-04-26T12:52:59Z', '28676051673', 'PushEvent', 'renovate[bot]'), ('2023-04-26T09:00:23Z', '28670247485', 'CreateEvent', 'rclement'), ('2023-04-26T08:58:49Z', '28670202743', 'PushEvent', 'rclement'), ('2023-04-26T08:48:09Z', '28669921736', 'PushEvent', 'rclement'), ('2023-04-25T06:42:23Z', '28638542667', 'PushEvent', 'renovate[bot]')]
Et si je veux extraire du contenu HTML en SQL ?
import json
import bs4
def html_extract(text, selector):
soup = bs4.BeautifulSoup(text)
elements = [el.get_text() for el in soup.select(selector)]
return json.dumps(elements)
conn = sqlite3.connect(":memory:")
conn.create_function("request_get", -1, request_get)
conn.create_function("html_extract", -1, html_extract)
conn.execute(
"""
SELECT
html_extract(
'<body><h1>Title</h1><p>This is a paragraph</p></body>',
'h1'
)
"""
).fetchone()
('["Title"]',)
En combinant avec une requête HTTP, on obtient un web-scraper :
conn.execute(
"""
SELECT
trim(replace(value, '\n', ''))
FROM
json_each(
html_extract(
request_get('https://www.scrapethissite.com/pages/simple/'),
'.country > .country-name'
)
)
LIMIT 10
"""
).fetchall()
[('Andorra',), ('United Arab Emirates',), ('Afghanistan',), ('Antigua and Barbuda',), ('Anguilla',), ('Albania',), ('Armenia',), ('Angola',), ('Antarctica',), ('Argentina',)]
SELECT sqml_load_dataset('iris');
SELECT sqml_train(
'Iris prediction',
'classification',
'logistic_regression',
'dataset_iris',
'target'
);
SELECT
dataset_iris.*,
sqml_predict(
'Iris prediction',
json_object(
'sepal length (cm)', [sepal length (cm)],
'sepal width (cm)', [sepal width (cm)],
'petal length (cm)', [petal length (cm)],
'petal width (cm)', [petal width (cm)]
)
) AS prediction
FROM dataset_iris
LIMIT 1;
sqlite3