core.py 39 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246
  1. # Licensed to the Apache Software Foundation (ASF) under one
  2. # or more contributor license agreements. See the NOTICE file
  3. # distributed with this work for additional information
  4. # regarding copyright ownership. The ASF licenses this file
  5. # to you under the Apache License, Version 2.0 (the
  6. # "License"); you may not use this file except in compliance
  7. # with the License. You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing,
  12. # software distributed under the License is distributed on an
  13. # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  14. # KIND, either express or implied. See the License for the
  15. # specific language governing permissions and limitations
  16. # under the License.
  17. # pylint: disable=C,R,W
  18. """Utility functions used across Superset"""
  19. import decimal
  20. import errno
  21. import functools
  22. import json
  23. import logging
  24. import os
  25. import signal
  26. import smtplib
  27. import traceback
  28. import uuid
  29. import zlib
  30. from datetime import date, datetime, time, timedelta
  31. from email.mime.application import MIMEApplication
  32. from email.mime.image import MIMEImage
  33. from email.mime.multipart import MIMEMultipart
  34. from email.mime.text import MIMEText
  35. from email.utils import formatdate
  36. from enum import Enum
  37. from time import struct_time
  38. from typing import Any, Dict, Iterator, List, NamedTuple, Optional, Set, Tuple, Union
  39. from urllib.parse import unquote_plus
  40. import bleach
  41. import markdown as md
  42. import numpy as np
  43. import pandas as pd
  44. import parsedatetime
  45. import sqlalchemy as sa
  46. from dateutil.parser import parse
  47. from dateutil.relativedelta import relativedelta
  48. from flask import current_app, flash, Flask, g, Markup, render_template
  49. from flask_appbuilder import SQLA
  50. from flask_appbuilder.security.sqla.models import User
  51. from flask_babel import gettext as __, lazy_gettext as _
  52. from sqlalchemy import event, exc, select, Text
  53. from sqlalchemy.dialects.mysql import MEDIUMTEXT
  54. from sqlalchemy.sql.type_api import Variant
  55. from sqlalchemy.types import TEXT, TypeDecorator
  56. from superset.exceptions import SupersetException, SupersetTimeoutException
  57. from superset.utils.dates import datetime_to_epoch, EPOCH
  58. try:
  59. from pydruid.utils.having import Having
  60. except ImportError:
  61. pass
  62. logging.getLogger("MARKDOWN").setLevel(logging.INFO)
  63. logger = logging.getLogger(__name__)
  64. DTTM_ALIAS = "__timestamp"
  65. ADHOC_METRIC_EXPRESSION_TYPES = {"SIMPLE": "SIMPLE", "SQL": "SQL"}
  66. JS_MAX_INTEGER = 9007199254740991 # Largest int Java Script can handle 2^53-1
  67. try:
  68. # Having might not have been imported.
  69. class DimSelector(Having):
  70. def __init__(self, **args):
  71. # Just a hack to prevent any exceptions
  72. Having.__init__(self, type="equalTo", aggregation=None, value=None)
  73. self.having = {
  74. "having": {
  75. "type": "dimSelector",
  76. "dimension": args["dimension"],
  77. "value": args["value"],
  78. }
  79. }
  80. except NameError:
  81. pass
  82. def flasher(msg, severity=None):
  83. """Flask's flash if available, logging call if not"""
  84. try:
  85. flash(msg, severity)
  86. except RuntimeError:
  87. if severity == "danger":
  88. logger.error(msg)
  89. else:
  90. logger.info(msg)
  91. class _memoized:
  92. """Decorator that caches a function's return value each time it is called
  93. If called later with the same arguments, the cached value is returned, and
  94. not re-evaluated.
  95. Define ``watch`` as a tuple of attribute names if this Decorator
  96. should account for instance variable changes.
  97. """
  98. def __init__(self, func, watch=()):
  99. self.func = func
  100. self.cache = {}
  101. self.is_method = False
  102. self.watch = watch
  103. def __call__(self, *args, **kwargs):
  104. key = [args, frozenset(kwargs.items())]
  105. if self.is_method:
  106. key.append(tuple([getattr(args[0], v, None) for v in self.watch]))
  107. key = tuple(key)
  108. if key in self.cache:
  109. return self.cache[key]
  110. try:
  111. value = self.func(*args, **kwargs)
  112. self.cache[key] = value
  113. return value
  114. except TypeError:
  115. # uncachable -- for instance, passing a list as an argument.
  116. # Better to not cache than to blow up entirely.
  117. return self.func(*args, **kwargs)
  118. def __repr__(self):
  119. """Return the function's docstring."""
  120. return self.func.__doc__
  121. def __get__(self, obj, objtype):
  122. if not self.is_method:
  123. self.is_method = True
  124. """Support instance methods."""
  125. return functools.partial(self.__call__, obj)
  126. def memoized(func=None, watch=None):
  127. if func:
  128. return _memoized(func)
  129. else:
  130. def wrapper(f):
  131. return _memoized(f, watch)
  132. return wrapper
  133. def parse_js_uri_path_item(
  134. item: Optional[str], unquote: bool = True, eval_undefined: bool = False
  135. ) -> Optional[str]:
  136. """Parse a uri path item made with js.
  137. :param item: a uri path component
  138. :param unquote: Perform unquoting of string using urllib.parse.unquote_plus()
  139. :param eval_undefined: When set to True and item is either 'null' or 'undefined',
  140. assume item is undefined and return None.
  141. :return: Either None, the original item or unquoted item
  142. """
  143. item = None if eval_undefined and item in ("null", "undefined") else item
  144. return unquote_plus(item) if unquote and item else item
  145. def string_to_num(s: str):
  146. """Converts a string to an int/float
  147. Returns ``None`` if it can't be converted
  148. >>> string_to_num('5')
  149. 5
  150. >>> string_to_num('5.2')
  151. 5.2
  152. >>> string_to_num(10)
  153. 10
  154. >>> string_to_num(10.1)
  155. 10.1
  156. >>> string_to_num('this is not a string') is None
  157. True
  158. """
  159. if isinstance(s, (int, float)):
  160. return s
  161. if s.isdigit():
  162. return int(s)
  163. try:
  164. return float(s)
  165. except ValueError:
  166. return None
  167. def list_minus(l: List, minus: List) -> List:
  168. """Returns l without what is in minus
  169. >>> list_minus([1, 2, 3], [2])
  170. [1, 3]
  171. """
  172. return [o for o in l if o not in minus]
  173. def parse_human_datetime(s):
  174. """
  175. Returns ``datetime.datetime`` from human readable strings
  176. >>> from datetime import date, timedelta
  177. >>> from dateutil.relativedelta import relativedelta
  178. >>> parse_human_datetime('2015-04-03')
  179. datetime.datetime(2015, 4, 3, 0, 0)
  180. >>> parse_human_datetime('2/3/1969')
  181. datetime.datetime(1969, 2, 3, 0, 0)
  182. >>> parse_human_datetime('now') <= datetime.now()
  183. True
  184. >>> parse_human_datetime('yesterday') <= datetime.now()
  185. True
  186. >>> date.today() - timedelta(1) == parse_human_datetime('yesterday').date()
  187. True
  188. >>> year_ago_1 = parse_human_datetime('one year ago').date()
  189. >>> year_ago_2 = (datetime.now() - relativedelta(years=1) ).date()
  190. >>> year_ago_1 == year_ago_2
  191. True
  192. """
  193. if not s:
  194. return None
  195. try:
  196. dttm = parse(s)
  197. except Exception:
  198. try:
  199. cal = parsedatetime.Calendar()
  200. parsed_dttm, parsed_flags = cal.parseDT(s)
  201. # when time is not extracted, we 'reset to midnight'
  202. if parsed_flags & 2 == 0:
  203. parsed_dttm = parsed_dttm.replace(hour=0, minute=0, second=0)
  204. dttm = dttm_from_timetuple(parsed_dttm.utctimetuple())
  205. except Exception as e:
  206. logger.exception(e)
  207. raise ValueError("Couldn't parse date string [{}]".format(s))
  208. return dttm
  209. def dttm_from_timetuple(d: struct_time) -> datetime:
  210. return datetime(d.tm_year, d.tm_mon, d.tm_mday, d.tm_hour, d.tm_min, d.tm_sec)
  211. class DashboardEncoder(json.JSONEncoder):
  212. def __init__(self, *args, **kwargs):
  213. super().__init__(*args, **kwargs)
  214. self.sort_keys = True
  215. # pylint: disable=E0202
  216. def default(self, o):
  217. try:
  218. vals = {k: v for k, v in o.__dict__.items() if k != "_sa_instance_state"}
  219. return {"__{}__".format(o.__class__.__name__): vals}
  220. except Exception:
  221. if type(o) == datetime:
  222. return {"__datetime__": o.replace(microsecond=0).isoformat()}
  223. return json.JSONEncoder(sort_keys=True).default(self, o)
  224. def parse_human_timedelta(s: Optional[str]) -> timedelta:
  225. """
  226. Returns ``datetime.datetime`` from natural language time deltas
  227. >>> parse_human_datetime('now') <= datetime.now()
  228. True
  229. """
  230. cal = parsedatetime.Calendar()
  231. dttm = dttm_from_timetuple(datetime.now().timetuple())
  232. d = cal.parse(s or "", dttm)[0]
  233. d = datetime(d.tm_year, d.tm_mon, d.tm_mday, d.tm_hour, d.tm_min, d.tm_sec)
  234. return d - dttm
  235. def parse_past_timedelta(delta_str: str) -> timedelta:
  236. """
  237. Takes a delta like '1 year' and finds the timedelta for that period in
  238. the past, then represents that past timedelta in positive terms.
  239. parse_human_timedelta('1 year') find the timedelta 1 year in the future.
  240. parse_past_timedelta('1 year') returns -datetime.timedelta(-365)
  241. or datetime.timedelta(365).
  242. """
  243. return -parse_human_timedelta(
  244. delta_str if delta_str.startswith("-") else f"-{delta_str}"
  245. )
  246. class JSONEncodedDict(TypeDecorator):
  247. """Represents an immutable structure as a json-encoded string."""
  248. impl = TEXT
  249. def process_bind_param(self, value, dialect):
  250. if value is not None:
  251. value = json.dumps(value)
  252. return value
  253. def process_result_value(self, value, dialect):
  254. if value is not None:
  255. value = json.loads(value)
  256. return value
  257. def datetime_f(dttm):
  258. """Formats datetime to take less room when it is recent"""
  259. if dttm:
  260. dttm = dttm.isoformat()
  261. now_iso = datetime.now().isoformat()
  262. if now_iso[:10] == dttm[:10]:
  263. dttm = dttm[11:]
  264. elif now_iso[:4] == dttm[:4]:
  265. dttm = dttm[5:]
  266. return "<nobr>{}</nobr>".format(dttm)
  267. def format_timedelta(td: timedelta) -> str:
  268. """
  269. Ensures negative time deltas are easily interpreted by humans
  270. >>> td = timedelta(0) - timedelta(days=1, hours=5,minutes=6)
  271. >>> str(td)
  272. '-2 days, 18:54:00'
  273. >>> format_timedelta(td)
  274. '-1 day, 5:06:00'
  275. """
  276. if td < timedelta(0):
  277. return "-" + str(abs(td))
  278. else:
  279. # Change this to format positive time deltas the way you want
  280. return str(td)
  281. def base_json_conv(obj):
  282. if isinstance(obj, memoryview):
  283. obj = obj.tobytes()
  284. if isinstance(obj, np.int64):
  285. return int(obj)
  286. elif isinstance(obj, np.bool_):
  287. return bool(obj)
  288. elif isinstance(obj, np.ndarray):
  289. return obj.tolist()
  290. elif isinstance(obj, set):
  291. return list(obj)
  292. elif isinstance(obj, decimal.Decimal):
  293. return float(obj)
  294. elif isinstance(obj, uuid.UUID):
  295. return str(obj)
  296. elif isinstance(obj, timedelta):
  297. return format_timedelta(obj)
  298. elif isinstance(obj, bytes):
  299. try:
  300. return obj.decode("utf-8")
  301. except Exception:
  302. return "[bytes]"
  303. def json_iso_dttm_ser(obj, pessimistic: Optional[bool] = False):
  304. """
  305. json serializer that deals with dates
  306. >>> dttm = datetime(1970, 1, 1)
  307. >>> json.dumps({'dttm': dttm}, default=json_iso_dttm_ser)
  308. '{"dttm": "1970-01-01T00:00:00"}'
  309. """
  310. val = base_json_conv(obj)
  311. if val is not None:
  312. return val
  313. if isinstance(obj, (datetime, date, time, pd.Timestamp)):
  314. obj = obj.isoformat()
  315. else:
  316. if pessimistic:
  317. return "Unserializable [{}]".format(type(obj))
  318. else:
  319. raise TypeError(
  320. "Unserializable object {} of type {}".format(obj, type(obj))
  321. )
  322. return obj
  323. def pessimistic_json_iso_dttm_ser(obj):
  324. """Proxy to call json_iso_dttm_ser in a pessimistic way
  325. If one of object is not serializable to json, it will still succeed"""
  326. return json_iso_dttm_ser(obj, pessimistic=True)
  327. def json_int_dttm_ser(obj):
  328. """json serializer that deals with dates"""
  329. val = base_json_conv(obj)
  330. if val is not None:
  331. return val
  332. if isinstance(obj, (datetime, pd.Timestamp)):
  333. obj = datetime_to_epoch(obj)
  334. elif isinstance(obj, date):
  335. obj = (obj - EPOCH.date()).total_seconds() * 1000
  336. else:
  337. raise TypeError("Unserializable object {} of type {}".format(obj, type(obj)))
  338. return obj
  339. def json_dumps_w_dates(payload):
  340. return json.dumps(payload, default=json_int_dttm_ser)
  341. def error_msg_from_exception(e: Exception) -> str:
  342. """Translate exception into error message
  343. Database have different ways to handle exception. This function attempts
  344. to make sense of the exception object and construct a human readable
  345. sentence.
  346. TODO(bkyryliuk): parse the Presto error message from the connection
  347. created via create_engine.
  348. engine = create_engine('presto://localhost:3506/silver') -
  349. gives an e.message as the str(dict)
  350. presto.connect('localhost', port=3506, catalog='silver') - as a dict.
  351. The latter version is parsed correctly by this function.
  352. """
  353. msg = ""
  354. if hasattr(e, "message"):
  355. if isinstance(e.message, dict): # type: ignore
  356. msg = e.message.get("message") # type: ignore
  357. elif e.message: # type: ignore
  358. msg = e.message # type: ignore
  359. return msg or str(e)
  360. def markdown(s: str, markup_wrap: Optional[bool] = False) -> str:
  361. safe_markdown_tags = [
  362. "h1",
  363. "h2",
  364. "h3",
  365. "h4",
  366. "h5",
  367. "h6",
  368. "b",
  369. "i",
  370. "strong",
  371. "em",
  372. "tt",
  373. "p",
  374. "br",
  375. "span",
  376. "div",
  377. "blockquote",
  378. "code",
  379. "hr",
  380. "ul",
  381. "ol",
  382. "li",
  383. "dd",
  384. "dt",
  385. "img",
  386. "a",
  387. ]
  388. safe_markdown_attrs = {
  389. "img": ["src", "alt", "title"],
  390. "a": ["href", "alt", "title"],
  391. }
  392. s = md.markdown(
  393. s or "",
  394. extensions=[
  395. "markdown.extensions.tables",
  396. "markdown.extensions.fenced_code",
  397. "markdown.extensions.codehilite",
  398. ],
  399. )
  400. s = bleach.clean(s, safe_markdown_tags, safe_markdown_attrs)
  401. if markup_wrap:
  402. s = Markup(s)
  403. return s
  404. def readfile(file_path: str) -> Optional[str]:
  405. with open(file_path) as f:
  406. content = f.read()
  407. return content
  408. def generic_find_constraint_name(
  409. table: str, columns: Set[str], referenced: str, db: SQLA
  410. ):
  411. """Utility to find a constraint name in alembic migrations"""
  412. t = sa.Table(table, db.metadata, autoload=True, autoload_with=db.engine)
  413. for fk in t.foreign_key_constraints:
  414. if fk.referred_table.name == referenced and set(fk.column_keys) == columns:
  415. return fk.name
  416. def generic_find_fk_constraint_name(
  417. table: str, columns: Set[str], referenced: str, insp
  418. ):
  419. """Utility to find a foreign-key constraint name in alembic migrations"""
  420. for fk in insp.get_foreign_keys(table):
  421. if (
  422. fk["referred_table"] == referenced
  423. and set(fk["referred_columns"]) == columns
  424. ):
  425. return fk["name"]
  426. def generic_find_fk_constraint_names(table, columns, referenced, insp):
  427. """Utility to find foreign-key constraint names in alembic migrations"""
  428. names = set()
  429. for fk in insp.get_foreign_keys(table):
  430. if (
  431. fk["referred_table"] == referenced
  432. and set(fk["referred_columns"]) == columns
  433. ):
  434. names.add(fk["name"])
  435. return names
  436. def generic_find_uq_constraint_name(table, columns, insp):
  437. """Utility to find a unique constraint name in alembic migrations"""
  438. for uq in insp.get_unique_constraints(table):
  439. if columns == set(uq["column_names"]):
  440. return uq["name"]
  441. def get_datasource_full_name(database_name, datasource_name, schema=None):
  442. if not schema:
  443. return "[{}].[{}]".format(database_name, datasource_name)
  444. return "[{}].[{}].[{}]".format(database_name, schema, datasource_name)
  445. def validate_json(obj):
  446. if obj:
  447. try:
  448. json.loads(obj)
  449. except Exception as e:
  450. logger.error(f"JSON is not valid {e}")
  451. raise SupersetException("JSON is not valid")
  452. def table_has_constraint(table, name, db):
  453. """Utility to find a constraint name in alembic migrations"""
  454. t = sa.Table(table, db.metadata, autoload=True, autoload_with=db.engine)
  455. for c in t.constraints:
  456. if c.name == name:
  457. return True
  458. return False
  459. class timeout:
  460. """
  461. To be used in a ``with`` block and timeout its content.
  462. """
  463. def __init__(self, seconds=1, error_message="Timeout"):
  464. self.seconds = seconds
  465. self.error_message = error_message
  466. def handle_timeout(self, signum, frame):
  467. logger.error("Process timed out")
  468. raise SupersetTimeoutException(self.error_message)
  469. def __enter__(self):
  470. try:
  471. signal.signal(signal.SIGALRM, self.handle_timeout)
  472. signal.alarm(self.seconds)
  473. except ValueError as e:
  474. logger.warning("timeout can't be used in the current context")
  475. logger.exception(e)
  476. def __exit__(self, type, value, traceback):
  477. try:
  478. signal.alarm(0)
  479. except ValueError as e:
  480. logger.warning("timeout can't be used in the current context")
  481. logger.exception(e)
  482. def pessimistic_connection_handling(some_engine):
  483. @event.listens_for(some_engine, "engine_connect")
  484. def ping_connection(connection, branch):
  485. if branch:
  486. # 'branch' refers to a sub-connection of a connection,
  487. # we don't want to bother pinging on these.
  488. return
  489. # turn off 'close with result'. This flag is only used with
  490. # 'connectionless' execution, otherwise will be False in any case
  491. save_should_close_with_result = connection.should_close_with_result
  492. connection.should_close_with_result = False
  493. try:
  494. # run a SELECT 1. use a core select() so that
  495. # the SELECT of a scalar value without a table is
  496. # appropriately formatted for the backend
  497. connection.scalar(select([1]))
  498. except exc.DBAPIError as err:
  499. # catch SQLAlchemy's DBAPIError, which is a wrapper
  500. # for the DBAPI's exception. It includes a .connection_invalidated
  501. # attribute which specifies if this connection is a 'disconnect'
  502. # condition, which is based on inspection of the original exception
  503. # by the dialect in use.
  504. if err.connection_invalidated:
  505. # run the same SELECT again - the connection will re-validate
  506. # itself and establish a new connection. The disconnect detection
  507. # here also causes the whole connection pool to be invalidated
  508. # so that all stale connections are discarded.
  509. connection.scalar(select([1]))
  510. else:
  511. raise
  512. finally:
  513. # restore 'close with result'
  514. connection.should_close_with_result = save_should_close_with_result
  515. class QueryStatus:
  516. """Enum-type class for query statuses"""
  517. STOPPED: str = "stopped"
  518. FAILED: str = "failed"
  519. PENDING: str = "pending"
  520. RUNNING: str = "running"
  521. SCHEDULED: str = "scheduled"
  522. SUCCESS: str = "success"
  523. TIMED_OUT: str = "timed_out"
  524. def notify_user_about_perm_udate(granter, user, role, datasource, tpl_name, config):
  525. msg = render_template(
  526. tpl_name, granter=granter, user=user, role=role, datasource=datasource
  527. )
  528. logger.info(msg)
  529. subject = __(
  530. "[Superset] Access to the datasource %(name)s was granted",
  531. name=datasource.full_name,
  532. )
  533. send_email_smtp(
  534. user.email,
  535. subject,
  536. msg,
  537. config,
  538. bcc=granter.email,
  539. dryrun=not config["EMAIL_NOTIFICATIONS"],
  540. )
  541. def send_email_smtp(
  542. to,
  543. subject,
  544. html_content,
  545. config,
  546. files=None,
  547. data=None,
  548. images=None,
  549. dryrun=False,
  550. cc=None,
  551. bcc=None,
  552. mime_subtype="mixed",
  553. ):
  554. """
  555. Send an email with html content, eg:
  556. send_email_smtp(
  557. 'test@example.com', 'foo', '<b>Foo</b> bar',['/dev/null'], dryrun=True)
  558. """
  559. smtp_mail_from = config["SMTP_MAIL_FROM"]
  560. to = get_email_address_list(to)
  561. msg = MIMEMultipart(mime_subtype)
  562. msg["Subject"] = subject
  563. msg["From"] = smtp_mail_from
  564. msg["To"] = ", ".join(to)
  565. msg.preamble = "This is a multi-part message in MIME format."
  566. recipients = to
  567. if cc:
  568. cc = get_email_address_list(cc)
  569. msg["CC"] = ", ".join(cc)
  570. recipients = recipients + cc
  571. if bcc:
  572. # don't add bcc in header
  573. bcc = get_email_address_list(bcc)
  574. recipients = recipients + bcc
  575. msg["Date"] = formatdate(localtime=True)
  576. mime_text = MIMEText(html_content, "html")
  577. msg.attach(mime_text)
  578. # Attach files by reading them from disk
  579. for fname in files or []:
  580. basename = os.path.basename(fname)
  581. with open(fname, "rb") as f:
  582. msg.attach(
  583. MIMEApplication(
  584. f.read(),
  585. Content_Disposition="attachment; filename='%s'" % basename,
  586. Name=basename,
  587. )
  588. )
  589. # Attach any files passed directly
  590. for name, body in (data or {}).items():
  591. msg.attach(
  592. MIMEApplication(
  593. body, Content_Disposition="attachment; filename='%s'" % name, Name=name
  594. )
  595. )
  596. # Attach any inline images, which may be required for display in
  597. # HTML content (inline)
  598. for msgid, body in (images or {}).items():
  599. image = MIMEImage(body)
  600. image.add_header("Content-ID", "<%s>" % msgid)
  601. image.add_header("Content-Disposition", "inline")
  602. msg.attach(image)
  603. send_MIME_email(smtp_mail_from, recipients, msg, config, dryrun=dryrun)
  604. def send_MIME_email(e_from, e_to, mime_msg, config, dryrun=False):
  605. SMTP_HOST = config["SMTP_HOST"]
  606. SMTP_PORT = config["SMTP_PORT"]
  607. SMTP_USER = config["SMTP_USER"]
  608. SMTP_PASSWORD = config["SMTP_PASSWORD"]
  609. SMTP_STARTTLS = config["SMTP_STARTTLS"]
  610. SMTP_SSL = config["SMTP_SSL"]
  611. if not dryrun:
  612. s = (
  613. smtplib.SMTP_SSL(SMTP_HOST, SMTP_PORT)
  614. if SMTP_SSL
  615. else smtplib.SMTP(SMTP_HOST, SMTP_PORT)
  616. )
  617. if SMTP_STARTTLS:
  618. s.starttls()
  619. if SMTP_USER and SMTP_PASSWORD:
  620. s.login(SMTP_USER, SMTP_PASSWORD)
  621. logger.info("Sent an email to " + str(e_to))
  622. s.sendmail(e_from, e_to, mime_msg.as_string())
  623. s.quit()
  624. else:
  625. logger.info("Dryrun enabled, email notification content is below:")
  626. logger.info(mime_msg.as_string())
  627. def get_email_address_list(address_string: str) -> List[str]:
  628. address_string_list: List[str] = []
  629. if isinstance(address_string, str):
  630. if "," in address_string:
  631. address_string_list = address_string.split(",")
  632. elif "\n" in address_string:
  633. address_string_list = address_string.split("\n")
  634. elif ";" in address_string:
  635. address_string_list = address_string.split(";")
  636. else:
  637. address_string_list = [address_string]
  638. return [x.strip() for x in address_string_list if x.strip()]
  639. def choicify(values):
  640. """Takes an iterable and makes an iterable of tuples with it"""
  641. return [(v, v) for v in values]
  642. def zlib_compress(data):
  643. """
  644. Compress things in a py2/3 safe fashion
  645. >>> json_str = '{"test": 1}'
  646. >>> blob = zlib_compress(json_str)
  647. """
  648. if isinstance(data, str):
  649. return zlib.compress(bytes(data, "utf-8"))
  650. return zlib.compress(data)
  651. def zlib_decompress(blob: bytes, decode: Optional[bool] = True) -> Union[bytes, str]:
  652. """
  653. Decompress things to a string in a py2/3 safe fashion
  654. >>> json_str = '{"test": 1}'
  655. >>> blob = zlib_compress(json_str)
  656. >>> got_str = zlib_decompress(blob)
  657. >>> got_str == json_str
  658. True
  659. """
  660. if isinstance(blob, bytes):
  661. decompressed = zlib.decompress(blob)
  662. else:
  663. decompressed = zlib.decompress(bytes(blob, "utf-8"))
  664. return decompressed.decode("utf-8") if decode else decompressed
  665. def to_adhoc(filt, expressionType="SIMPLE", clause="where"):
  666. result = {
  667. "clause": clause.upper(),
  668. "expressionType": expressionType,
  669. "filterOptionName": str(uuid.uuid4()),
  670. }
  671. if expressionType == "SIMPLE":
  672. result.update(
  673. {
  674. "comparator": filt.get("val"),
  675. "operator": filt.get("op"),
  676. "subject": filt.get("col"),
  677. }
  678. )
  679. elif expressionType == "SQL":
  680. result.update({"sqlExpression": filt.get(clause)})
  681. return result
  682. def merge_extra_filters(form_data: dict):
  683. # extra_filters are temporary/contextual filters (using the legacy constructs)
  684. # that are external to the slice definition. We use those for dynamic
  685. # interactive filters like the ones emitted by the "Filter Box" visualization.
  686. # Note extra_filters only support simple filters.
  687. if "extra_filters" in form_data:
  688. # __form and __to are special extra_filters that target time
  689. # boundaries. The rest of extra_filters are simple
  690. # [column_name in list_of_values]. `__` prefix is there to avoid
  691. # potential conflicts with column that would be named `from` or `to`
  692. if "adhoc_filters" not in form_data or not isinstance(
  693. form_data["adhoc_filters"], list
  694. ):
  695. form_data["adhoc_filters"] = []
  696. date_options = {
  697. "__time_range": "time_range",
  698. "__time_col": "granularity_sqla",
  699. "__time_grain": "time_grain_sqla",
  700. "__time_origin": "druid_time_origin",
  701. "__granularity": "granularity",
  702. }
  703. # Grab list of existing filters 'keyed' on the column and operator
  704. def get_filter_key(f):
  705. if "expressionType" in f:
  706. return "{}__{}".format(f["subject"], f["operator"])
  707. else:
  708. return "{}__{}".format(f["col"], f["op"])
  709. existing_filters = {}
  710. for existing in form_data["adhoc_filters"]:
  711. if (
  712. existing["expressionType"] == "SIMPLE"
  713. and existing["comparator"] is not None
  714. and existing["subject"] is not None
  715. ):
  716. existing_filters[get_filter_key(existing)] = existing["comparator"]
  717. for filtr in form_data["extra_filters"]:
  718. # Pull out time filters/options and merge into form data
  719. if date_options.get(filtr["col"]):
  720. if filtr.get("val"):
  721. form_data[date_options[filtr["col"]]] = filtr["val"]
  722. elif filtr["val"]:
  723. # Merge column filters
  724. filter_key = get_filter_key(filtr)
  725. if filter_key in existing_filters:
  726. # Check if the filter already exists
  727. if isinstance(filtr["val"], list):
  728. if isinstance(existing_filters[filter_key], list):
  729. # Add filters for unequal lists
  730. # order doesn't matter
  731. if set(existing_filters[filter_key]) != set(filtr["val"]):
  732. form_data["adhoc_filters"].append(to_adhoc(filtr))
  733. else:
  734. form_data["adhoc_filters"].append(to_adhoc(filtr))
  735. else:
  736. # Do not add filter if same value already exists
  737. if filtr["val"] != existing_filters[filter_key]:
  738. form_data["adhoc_filters"].append(to_adhoc(filtr))
  739. else:
  740. # Filter not found, add it
  741. form_data["adhoc_filters"].append(to_adhoc(filtr))
  742. # Remove extra filters from the form data since no longer needed
  743. del form_data["extra_filters"]
  744. def merge_request_params(form_data: Dict[str, Any], params: Dict[str, Any]) -> None:
  745. """
  746. Merge request parameters to the key `url_params` in form_data. Only updates
  747. or appends parameters to `form_data` that are defined in `params; pre-existing
  748. parameters not defined in params are left unchanged.
  749. :param form_data: object to be updated
  750. :param params: request parameters received via query string
  751. """
  752. url_params = form_data.get("url_params", {})
  753. for key, value in params.items():
  754. if key in ("form_data", "r"):
  755. continue
  756. url_params[key] = value
  757. form_data["url_params"] = url_params
  758. def user_label(user: User) -> Optional[str]:
  759. """Given a user ORM FAB object, returns a label"""
  760. if user:
  761. if user.first_name and user.last_name:
  762. return user.first_name + " " + user.last_name
  763. else:
  764. return user.username
  765. return None
  766. def get_or_create_db(database_name, sqlalchemy_uri, *args, **kwargs):
  767. from superset import db
  768. from superset.models import core as models
  769. database = (
  770. db.session.query(models.Database).filter_by(database_name=database_name).first()
  771. )
  772. if not database:
  773. logger.info(f"Creating database reference for {database_name}")
  774. database = models.Database(database_name=database_name, *args, **kwargs)
  775. db.session.add(database)
  776. database.set_sqlalchemy_uri(sqlalchemy_uri)
  777. db.session.commit()
  778. return database
  779. def get_example_database():
  780. from superset import conf
  781. db_uri = conf.get("SQLALCHEMY_EXAMPLES_URI") or conf.get("SQLALCHEMY_DATABASE_URI")
  782. return get_or_create_db("examples", db_uri)
  783. def is_adhoc_metric(metric) -> bool:
  784. return bool(
  785. isinstance(metric, dict)
  786. and (
  787. (
  788. metric["expressionType"] == ADHOC_METRIC_EXPRESSION_TYPES["SIMPLE"]
  789. and metric["column"]
  790. and metric["aggregate"]
  791. )
  792. or (
  793. metric["expressionType"] == ADHOC_METRIC_EXPRESSION_TYPES["SQL"]
  794. and metric["sqlExpression"]
  795. )
  796. )
  797. and metric["label"]
  798. )
  799. def get_metric_name(metric):
  800. return metric["label"] if is_adhoc_metric(metric) else metric
  801. def get_metric_names(metrics):
  802. return [get_metric_name(metric) for metric in metrics]
  803. def ensure_path_exists(path: str):
  804. try:
  805. os.makedirs(path)
  806. except OSError as exc:
  807. if not (os.path.isdir(path) and exc.errno == errno.EEXIST):
  808. raise
  809. def get_since_until(
  810. time_range: Optional[str] = None,
  811. since: Optional[str] = None,
  812. until: Optional[str] = None,
  813. time_shift: Optional[str] = None,
  814. relative_start: Optional[str] = None,
  815. relative_end: Optional[str] = None,
  816. ) -> Tuple[datetime, datetime]:
  817. """Return `since` and `until` date time tuple from string representations of
  818. time_range, since, until and time_shift.
  819. This functiom supports both reading the keys separately (from `since` and
  820. `until`), as well as the new `time_range` key. Valid formats are:
  821. - ISO 8601
  822. - X days/years/hours/day/year/weeks
  823. - X days/years/hours/day/year/weeks ago
  824. - X days/years/hours/day/year/weeks from now
  825. - freeform
  826. Additionally, for `time_range` (these specify both `since` and `until`):
  827. - Last day
  828. - Last week
  829. - Last month
  830. - Last quarter
  831. - Last year
  832. - No filter
  833. - Last X seconds/minutes/hours/days/weeks/months/years
  834. - Next X seconds/minutes/hours/days/weeks/months/years
  835. """
  836. separator = " : "
  837. relative_start = parse_human_datetime(relative_start if relative_start else "today")
  838. relative_end = parse_human_datetime(relative_end if relative_end else "today")
  839. common_time_frames = {
  840. "Last day": (
  841. relative_start - relativedelta(days=1), # type: ignore
  842. relative_end,
  843. ),
  844. "Last week": (
  845. relative_start - relativedelta(weeks=1), # type: ignore
  846. relative_end,
  847. ),
  848. "Last month": (
  849. relative_start - relativedelta(months=1), # type: ignore
  850. relative_end,
  851. ),
  852. "Last quarter": (
  853. relative_start - relativedelta(months=3), # type: ignore
  854. relative_end,
  855. ),
  856. "Last year": (
  857. relative_start - relativedelta(years=1), # type: ignore
  858. relative_end,
  859. ),
  860. }
  861. if time_range:
  862. if separator in time_range:
  863. since, until = time_range.split(separator, 1)
  864. if since and since not in common_time_frames:
  865. since = add_ago_to_since(since)
  866. since = parse_human_datetime(since)
  867. until = parse_human_datetime(until)
  868. elif time_range in common_time_frames:
  869. since, until = common_time_frames[time_range]
  870. elif time_range == "No filter":
  871. since = until = None
  872. else:
  873. rel, num, grain = time_range.split()
  874. if rel == "Last":
  875. since = relative_start - relativedelta( # type: ignore
  876. **{grain: int(num)}
  877. )
  878. until = relative_end
  879. else: # rel == 'Next'
  880. since = relative_start
  881. until = relative_end + relativedelta( # type: ignore
  882. **{grain: int(num)}
  883. )
  884. else:
  885. since = since or ""
  886. if since:
  887. since = add_ago_to_since(since)
  888. since = parse_human_datetime(since)
  889. until = parse_human_datetime(until) if until else relative_end
  890. if time_shift:
  891. time_delta = parse_past_timedelta(time_shift)
  892. since = since if since is None else (since - time_delta) # type: ignore
  893. until = until if until is None else (until - time_delta) # type: ignore
  894. if since and until and since > until:
  895. raise ValueError(_("From date cannot be larger than to date"))
  896. return since, until # type: ignore
  897. def add_ago_to_since(since: str) -> str:
  898. """
  899. Backwards compatibility hack. Without this slices with since: 7 days will
  900. be treated as 7 days in the future.
  901. :param str since:
  902. :returns: Since with ago added if necessary
  903. :rtype: str
  904. """
  905. since_words = since.split(" ")
  906. grains = ["days", "years", "hours", "day", "year", "weeks"]
  907. if len(since_words) == 2 and since_words[1] in grains:
  908. since += " ago"
  909. return since
  910. def convert_legacy_filters_into_adhoc(fd):
  911. mapping = {"having": "having_filters", "where": "filters"}
  912. if not fd.get("adhoc_filters"):
  913. fd["adhoc_filters"] = []
  914. for clause, filters in mapping.items():
  915. if clause in fd and fd[clause] != "":
  916. fd["adhoc_filters"].append(to_adhoc(fd, "SQL", clause))
  917. if filters in fd:
  918. for filt in filter(lambda x: x is not None, fd[filters]):
  919. fd["adhoc_filters"].append(to_adhoc(filt, "SIMPLE", clause))
  920. for key in ("filters", "having", "having_filters", "where"):
  921. if key in fd:
  922. del fd[key]
  923. def split_adhoc_filters_into_base_filters(fd):
  924. """
  925. Mutates form data to restructure the adhoc filters in the form of the four base
  926. filters, `where`, `having`, `filters`, and `having_filters` which represent
  927. free form where sql, free form having sql, structured where clauses and structured
  928. having clauses.
  929. """
  930. adhoc_filters = fd.get("adhoc_filters")
  931. if isinstance(adhoc_filters, list):
  932. simple_where_filters = []
  933. simple_having_filters = []
  934. sql_where_filters = []
  935. sql_having_filters = []
  936. for adhoc_filter in adhoc_filters:
  937. expression_type = adhoc_filter.get("expressionType")
  938. clause = adhoc_filter.get("clause")
  939. if expression_type == "SIMPLE":
  940. if clause == "WHERE":
  941. simple_where_filters.append(
  942. {
  943. "col": adhoc_filter.get("subject"),
  944. "op": adhoc_filter.get("operator"),
  945. "val": adhoc_filter.get("comparator"),
  946. }
  947. )
  948. elif clause == "HAVING":
  949. simple_having_filters.append(
  950. {
  951. "col": adhoc_filter.get("subject"),
  952. "op": adhoc_filter.get("operator"),
  953. "val": adhoc_filter.get("comparator"),
  954. }
  955. )
  956. elif expression_type == "SQL":
  957. if clause == "WHERE":
  958. sql_where_filters.append(adhoc_filter.get("sqlExpression"))
  959. elif clause == "HAVING":
  960. sql_having_filters.append(adhoc_filter.get("sqlExpression"))
  961. fd["where"] = " AND ".join(["({})".format(sql) for sql in sql_where_filters])
  962. fd["having"] = " AND ".join(["({})".format(sql) for sql in sql_having_filters])
  963. fd["having_filters"] = simple_having_filters
  964. fd["filters"] = simple_where_filters
  965. def get_username() -> Optional[str]:
  966. """Get username if within the flask context, otherwise return noffin'"""
  967. try:
  968. return g.user.username
  969. except Exception:
  970. return None
  971. def MediumText() -> Variant:
  972. return Text().with_variant(MEDIUMTEXT(), "mysql")
  973. def shortid() -> str:
  974. return "{}".format(uuid.uuid4())[-12:]
  975. class DatasourceName(NamedTuple):
  976. table: str
  977. schema: str
  978. def get_stacktrace():
  979. if current_app.config["SHOW_STACKTRACE"]:
  980. return traceback.format_exc()
  981. def split(
  982. s: str, delimiter: str = " ", quote: str = '"', escaped_quote: str = r"\""
  983. ) -> Iterator[str]:
  984. """
  985. A split function that is aware of quotes and parentheses.
  986. :param s: string to split
  987. :param delimiter: string defining where to split, usually a comma or space
  988. :param quote: string, either a single or a double quote
  989. :param escaped_quote: string representing an escaped quote
  990. :return: list of strings
  991. """
  992. parens = 0
  993. quotes = False
  994. i = 0
  995. for j, c in enumerate(s):
  996. complete = parens == 0 and not quotes
  997. if complete and c == delimiter:
  998. yield s[i:j]
  999. i = j + len(delimiter)
  1000. elif c == "(":
  1001. parens += 1
  1002. elif c == ")":
  1003. parens -= 1
  1004. elif c == quote:
  1005. if quotes and s[j - len(escaped_quote) + 1 : j + 1] != escaped_quote:
  1006. quotes = False
  1007. elif not quotes:
  1008. quotes = True
  1009. yield s[i:]
  1010. class TimeRangeEndpoint(str, Enum):
  1011. """
  1012. The time range endpoint types which represent inclusive, exclusive, or unknown.
  1013. Unknown represents endpoints which are ill-defined as though the interval may be
  1014. [start, end] the filter may behave like (start, end] due to mixed data types and
  1015. lexicographical ordering.
  1016. :see: https://github.com/apache/incubator-superset/issues/6360
  1017. """
  1018. EXCLUSIVE = "exclusive"
  1019. INCLUSIVE = "inclusive"
  1020. UNKNOWN = "unknown"
  1021. class ReservedUrlParameters(Enum):
  1022. """
  1023. Reserved URL parameters that are used internally by Superset. These will not be
  1024. passed to chart queries, as they control the behavior of the UI.
  1025. """
  1026. STANDALONE = "standalone"
  1027. EDIT_MODE = "edit"
  1028. class QuerySource(Enum):
  1029. """
  1030. The source of a SQL query.
  1031. """
  1032. CHART = 0
  1033. DASHBOARD = 1
  1034. SQL_LAB = 2