utils.py 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026
  1. # pylint: disable=C,R,W
  2. """Utility functions used across Superset"""
  3. from builtins import object
  4. from datetime import date, datetime, time, timedelta
  5. import decimal
  6. from email.mime.application import MIMEApplication
  7. from email.mime.multipart import MIMEMultipart
  8. from email.mime.text import MIMEText
  9. from email.utils import formatdate
  10. import errno
  11. import functools
  12. import json
  13. import logging
  14. import os
  15. import signal
  16. import smtplib
  17. import sys
  18. import uuid
  19. import zlib
  20. import bleach
  21. import celery
  22. from dateutil.parser import parse
  23. from dateutil.relativedelta import relativedelta
  24. from flask import flash, g, Markup, render_template
  25. from flask_babel import gettext as __
  26. from flask_caching import Cache
  27. import markdown as md
  28. import numpy
  29. import pandas as pd
  30. import parsedatetime
  31. from past.builtins import basestring
  32. from pydruid.utils.having import Having
  33. import pytz
  34. import sqlalchemy as sa
  35. from sqlalchemy import event, exc, select, Text
  36. from sqlalchemy.dialects.mysql import MEDIUMTEXT
  37. from sqlalchemy.types import TEXT, TypeDecorator
  38. from superset.exceptions import SupersetException, SupersetTimeoutException
  39. logging.getLogger('MARKDOWN').setLevel(logging.INFO)
  40. PY3K = sys.version_info >= (3, 0)
  41. EPOCH = datetime(1970, 1, 1)
  42. DTTM_ALIAS = '__timestamp'
  43. ADHOC_METRIC_EXPRESSION_TYPES = {
  44. 'SIMPLE': 'SIMPLE',
  45. 'SQL': 'SQL',
  46. }
  47. JS_MAX_INTEGER = 9007199254740991 # Largest int Java Script can handle 2^53-1
  48. def flasher(msg, severity=None):
  49. """Flask's flash if available, logging call if not"""
  50. try:
  51. flash(msg, severity)
  52. except RuntimeError:
  53. if severity == 'danger':
  54. logging.error(msg)
  55. else:
  56. logging.info(msg)
  57. class _memoized(object): # noqa
  58. """Decorator that caches a function's return value each time it is called
  59. If called later with the same arguments, the cached value is returned, and
  60. not re-evaluated.
  61. Define ``watch`` as a tuple of attribute names if this Decorator
  62. should account for instance variable changes.
  63. """
  64. def __init__(self, func, watch=()):
  65. self.func = func
  66. self.cache = {}
  67. self.is_method = False
  68. self.watch = watch
  69. def __call__(self, *args, **kwargs):
  70. key = [args, frozenset(kwargs.items())]
  71. if self.is_method:
  72. key.append(tuple([getattr(args[0], v, None) for v in self.watch]))
  73. key = tuple(key)
  74. if key in self.cache:
  75. return self.cache[key]
  76. try:
  77. value = self.func(*args, **kwargs)
  78. self.cache[key] = value
  79. return value
  80. except TypeError:
  81. # uncachable -- for instance, passing a list as an argument.
  82. # Better to not cache than to blow up entirely.
  83. return self.func(*args, **kwargs)
  84. def __repr__(self):
  85. """Return the function's docstring."""
  86. return self.func.__doc__
  87. def __get__(self, obj, objtype):
  88. if not self.is_method:
  89. self.is_method = True
  90. """Support instance methods."""
  91. return functools.partial(self.__call__, obj)
  92. def memoized(func=None, watch=None):
  93. if func:
  94. return _memoized(func)
  95. else:
  96. def wrapper(f):
  97. return _memoized(f, watch)
  98. return wrapper
  99. def js_string_to_python(item):
  100. return None if item in ('null', 'undefined') else item
  101. def string_to_num(s):
  102. """Converts a string to an int/float
  103. Returns ``None`` if it can't be converted
  104. >>> string_to_num('5')
  105. 5
  106. >>> string_to_num('5.2')
  107. 5.2
  108. >>> string_to_num(10)
  109. 10
  110. >>> string_to_num(10.1)
  111. 10.1
  112. >>> string_to_num('this is not a string') is None
  113. True
  114. """
  115. if isinstance(s, (int, float)):
  116. return s
  117. if s.isdigit():
  118. return int(s)
  119. try:
  120. return float(s)
  121. except ValueError:
  122. return None
  123. class DimSelector(Having):
  124. def __init__(self, **args):
  125. # Just a hack to prevent any exceptions
  126. Having.__init__(self, type='equalTo', aggregation=None, value=None)
  127. self.having = {
  128. 'having': {
  129. 'type': 'dimSelector',
  130. 'dimension': args['dimension'],
  131. 'value': args['value'],
  132. },
  133. }
  134. def list_minus(l, minus):
  135. """Returns l without what is in minus
  136. >>> list_minus([1, 2, 3], [2])
  137. [1, 3]
  138. """
  139. return [o for o in l if o not in minus]
  140. def parse_human_datetime(s):
  141. """
  142. Returns ``datetime.datetime`` from human readable strings
  143. >>> from datetime import date, timedelta
  144. >>> from dateutil.relativedelta import relativedelta
  145. >>> parse_human_datetime('2015-04-03')
  146. datetime.datetime(2015, 4, 3, 0, 0)
  147. >>> parse_human_datetime('2/3/1969')
  148. datetime.datetime(1969, 2, 3, 0, 0)
  149. >>> parse_human_datetime('now') <= datetime.now()
  150. True
  151. >>> parse_human_datetime('yesterday') <= datetime.now()
  152. True
  153. >>> date.today() - timedelta(1) == parse_human_datetime('yesterday').date()
  154. True
  155. >>> year_ago_1 = parse_human_datetime('one year ago').date()
  156. >>> year_ago_2 = (datetime.now() - relativedelta(years=1) ).date()
  157. >>> year_ago_1 == year_ago_2
  158. True
  159. """
  160. if not s:
  161. return None
  162. try:
  163. dttm = parse(s)
  164. except Exception:
  165. try:
  166. cal = parsedatetime.Calendar()
  167. parsed_dttm, parsed_flags = cal.parseDT(s)
  168. # when time is not extracted, we 'reset to midnight'
  169. if parsed_flags & 2 == 0:
  170. parsed_dttm = parsed_dttm.replace(hour=0, minute=0, second=0)
  171. dttm = dttm_from_timtuple(parsed_dttm.utctimetuple())
  172. except Exception as e:
  173. logging.exception(e)
  174. raise ValueError("Couldn't parse date string [{}]".format(s))
  175. return dttm
  176. def dttm_from_timtuple(d):
  177. return datetime(
  178. d.tm_year, d.tm_mon, d.tm_mday, d.tm_hour, d.tm_min, d.tm_sec)
  179. def decode_dashboards(o):
  180. """
  181. Function to be passed into json.loads obj_hook parameter
  182. Recreates the dashboard object from a json representation.
  183. """
  184. import superset.models.core as models
  185. from superset.connectors.sqla.models import (
  186. SqlaTable, SqlMetric, TableColumn,
  187. )
  188. if '__Dashboard__' in o:
  189. d = models.Dashboard()
  190. d.__dict__.update(o['__Dashboard__'])
  191. return d
  192. elif '__Slice__' in o:
  193. d = models.Slice()
  194. d.__dict__.update(o['__Slice__'])
  195. return d
  196. elif '__TableColumn__' in o:
  197. d = TableColumn()
  198. d.__dict__.update(o['__TableColumn__'])
  199. return d
  200. elif '__SqlaTable__' in o:
  201. d = SqlaTable()
  202. d.__dict__.update(o['__SqlaTable__'])
  203. return d
  204. elif '__SqlMetric__' in o:
  205. d = SqlMetric()
  206. d.__dict__.update(o['__SqlMetric__'])
  207. return d
  208. elif '__datetime__' in o:
  209. return datetime.strptime(o['__datetime__'], '%Y-%m-%dT%H:%M:%S')
  210. else:
  211. return o
  212. class DashboardEncoder(json.JSONEncoder):
  213. # pylint: disable=E0202
  214. def default(self, o):
  215. try:
  216. vals = {
  217. k: v for k, v in o.__dict__.items() if k != '_sa_instance_state'}
  218. return {'__{}__'.format(o.__class__.__name__): vals}
  219. except Exception:
  220. if type(o) == datetime:
  221. return {'__datetime__': o.replace(microsecond=0).isoformat()}
  222. return json.JSONEncoder.default(self, o)
  223. def parse_human_timedelta(s):
  224. """
  225. Returns ``datetime.datetime`` from natural language time deltas
  226. >>> parse_human_datetime('now') <= datetime.now()
  227. True
  228. """
  229. cal = parsedatetime.Calendar()
  230. dttm = dttm_from_timtuple(datetime.now().timetuple())
  231. d = cal.parse(s, dttm)[0]
  232. d = datetime(d.tm_year, d.tm_mon, d.tm_mday, d.tm_hour, d.tm_min, d.tm_sec)
  233. return d - dttm
  234. class JSONEncodedDict(TypeDecorator):
  235. """Represents an immutable structure as a json-encoded string."""
  236. impl = TEXT
  237. def process_bind_param(self, value, dialect):
  238. if value is not None:
  239. value = json.dumps(value)
  240. return value
  241. def process_result_value(self, value, dialect):
  242. if value is not None:
  243. value = json.loads(value)
  244. return value
  245. def datetime_f(dttm):
  246. """Formats datetime to take less room when it is recent"""
  247. if dttm:
  248. dttm = dttm.isoformat()
  249. now_iso = datetime.now().isoformat()
  250. if now_iso[:10] == dttm[:10]:
  251. dttm = dttm[11:]
  252. elif now_iso[:4] == dttm[:4]:
  253. dttm = dttm[5:]
  254. return '<nobr>{}</nobr>'.format(dttm)
  255. def base_json_conv(obj):
  256. if isinstance(obj, numpy.int64):
  257. return int(obj)
  258. elif isinstance(obj, numpy.bool_):
  259. return bool(obj)
  260. elif isinstance(obj, set):
  261. return list(obj)
  262. elif isinstance(obj, decimal.Decimal):
  263. return float(obj)
  264. elif isinstance(obj, uuid.UUID):
  265. return str(obj)
  266. elif isinstance(obj, timedelta):
  267. return str(obj)
  268. elif isinstance(obj, bytes):
  269. try:
  270. return '{}'.format(obj)
  271. except Exception:
  272. return '[bytes]'
  273. def json_iso_dttm_ser(obj, pessimistic=False):
  274. """
  275. json serializer that deals with dates
  276. >>> dttm = datetime(1970, 1, 1)
  277. >>> json.dumps({'dttm': dttm}, default=json_iso_dttm_ser)
  278. '{"dttm": "1970-01-01T00:00:00"}'
  279. """
  280. val = base_json_conv(obj)
  281. if val is not None:
  282. return val
  283. if isinstance(obj, (datetime, date, time, pd.Timestamp)):
  284. obj = obj.isoformat()
  285. else:
  286. if pessimistic:
  287. return 'Unserializable [{}]'.format(type(obj))
  288. else:
  289. raise TypeError(
  290. 'Unserializable object {} of type {}'.format(obj, type(obj)))
  291. return obj
  292. def pessimistic_json_iso_dttm_ser(obj):
  293. """Proxy to call json_iso_dttm_ser in a pessimistic way
  294. If one of object is not serializable to json, it will still succeed"""
  295. return json_iso_dttm_ser(obj, pessimistic=True)
  296. def datetime_to_epoch(dttm):
  297. if dttm.tzinfo:
  298. epoch_with_tz = pytz.utc.localize(EPOCH)
  299. return (dttm - epoch_with_tz).total_seconds() * 1000
  300. return (dttm - EPOCH).total_seconds() * 1000
  301. def now_as_float():
  302. return datetime_to_epoch(datetime.utcnow())
  303. def json_int_dttm_ser(obj):
  304. """json serializer that deals with dates"""
  305. val = base_json_conv(obj)
  306. if val is not None:
  307. return val
  308. if isinstance(obj, (datetime, pd.Timestamp)):
  309. obj = datetime_to_epoch(obj)
  310. elif isinstance(obj, date):
  311. obj = (obj - EPOCH.date()).total_seconds() * 1000
  312. else:
  313. raise TypeError(
  314. 'Unserializable object {} of type {}'.format(obj, type(obj)))
  315. return obj
  316. def json_dumps_w_dates(payload):
  317. return json.dumps(payload, default=json_int_dttm_ser)
  318. def error_msg_from_exception(e):
  319. """Translate exception into error message
  320. Database have different ways to handle exception. This function attempts
  321. to make sense of the exception object and construct a human readable
  322. sentence.
  323. TODO(bkyryliuk): parse the Presto error message from the connection
  324. created via create_engine.
  325. engine = create_engine('presto://localhost:3506/silver') -
  326. gives an e.message as the str(dict)
  327. presto.connect('localhost', port=3506, catalog='silver') - as a dict.
  328. The latter version is parsed correctly by this function.
  329. """
  330. msg = ''
  331. if hasattr(e, 'message'):
  332. if isinstance(e.message, dict):
  333. msg = e.message.get('message')
  334. elif e.message:
  335. msg = '{}'.format(e.message)
  336. return msg or '{}'.format(e)
  337. def markdown(s, markup_wrap=False):
  338. safe_markdown_tags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'b', 'i',
  339. 'strong', 'em', 'tt', 'p', 'br', 'span',
  340. 'div', 'blockquote', 'code', 'hr', 'ul', 'ol',
  341. 'li', 'dd', 'dt', 'img', 'a']
  342. safe_markdown_attrs = {'img': ['src', 'alt', 'title'],
  343. 'a': ['href', 'alt', 'title']}
  344. s = md.markdown(s or '', extensions=[
  345. 'markdown.extensions.tables',
  346. 'markdown.extensions.fenced_code',
  347. 'markdown.extensions.codehilite',
  348. ])
  349. s = bleach.clean(s, safe_markdown_tags, safe_markdown_attrs)
  350. if markup_wrap:
  351. s = Markup(s)
  352. return s
  353. def readfile(file_path):
  354. with open(file_path) as f:
  355. content = f.read()
  356. return content
  357. def generic_find_constraint_name(table, columns, referenced, db):
  358. """Utility to find a constraint name in alembic migrations"""
  359. t = sa.Table(table, db.metadata, autoload=True, autoload_with=db.engine)
  360. for fk in t.foreign_key_constraints:
  361. if fk.referred_table.name == referenced and set(fk.column_keys) == columns:
  362. return fk.name
  363. def generic_find_fk_constraint_name(table, columns, referenced, insp):
  364. """Utility to find a foreign-key constraint name in alembic migrations"""
  365. for fk in insp.get_foreign_keys(table):
  366. if fk['referred_table'] == referenced and set(fk['referred_columns']) == columns:
  367. return fk['name']
  368. def generic_find_fk_constraint_names(table, columns, referenced, insp):
  369. """Utility to find foreign-key constraint names in alembic migrations"""
  370. names = set()
  371. for fk in insp.get_foreign_keys(table):
  372. if fk['referred_table'] == referenced and set(fk['referred_columns']) == columns:
  373. names.add(fk['name'])
  374. return names
  375. def generic_find_uq_constraint_name(table, columns, insp):
  376. """Utility to find a unique constraint name in alembic migrations"""
  377. for uq in insp.get_unique_constraints(table):
  378. if columns == set(uq['column_names']):
  379. return uq['name']
  380. def get_datasource_full_name(database_name, datasource_name, schema=None):
  381. if not schema:
  382. return '[{}].[{}]'.format(database_name, datasource_name)
  383. return '[{}].[{}].[{}]'.format(database_name, schema, datasource_name)
  384. def validate_json(obj):
  385. if obj:
  386. try:
  387. json.loads(obj)
  388. except Exception:
  389. raise SupersetException('JSON is not valid')
  390. def table_has_constraint(table, name, db):
  391. """Utility to find a constraint name in alembic migrations"""
  392. t = sa.Table(table, db.metadata, autoload=True, autoload_with=db.engine)
  393. for c in t.constraints:
  394. if c.name == name:
  395. return True
  396. return False
  397. class timeout(object):
  398. """
  399. To be used in a ``with`` block and timeout its content.
  400. """
  401. def __init__(self, seconds=1, error_message='Timeout'):
  402. self.seconds = seconds
  403. self.error_message = error_message
  404. def handle_timeout(self, signum, frame):
  405. logging.error('Process timed out')
  406. raise SupersetTimeoutException(self.error_message)
  407. def __enter__(self):
  408. try:
  409. signal.signal(signal.SIGALRM, self.handle_timeout)
  410. signal.alarm(self.seconds)
  411. except ValueError as e:
  412. logging.warning("timeout can't be used in the current context")
  413. logging.exception(e)
  414. def __exit__(self, type, value, traceback):
  415. try:
  416. signal.alarm(0)
  417. except ValueError as e:
  418. logging.warning("timeout can't be used in the current context")
  419. logging.exception(e)
  420. def pessimistic_connection_handling(some_engine):
  421. @event.listens_for(some_engine, 'engine_connect')
  422. def ping_connection(connection, branch):
  423. if branch:
  424. # 'branch' refers to a sub-connection of a connection,
  425. # we don't want to bother pinging on these.
  426. return
  427. # turn off 'close with result'. This flag is only used with
  428. # 'connectionless' execution, otherwise will be False in any case
  429. save_should_close_with_result = connection.should_close_with_result
  430. connection.should_close_with_result = False
  431. try:
  432. # run a SELECT 1. use a core select() so that
  433. # the SELECT of a scalar value without a table is
  434. # appropriately formatted for the backend
  435. connection.scalar(select([1]))
  436. except exc.DBAPIError as err:
  437. # catch SQLAlchemy's DBAPIError, which is a wrapper
  438. # for the DBAPI's exception. It includes a .connection_invalidated
  439. # attribute which specifies if this connection is a 'disconnect'
  440. # condition, which is based on inspection of the original exception
  441. # by the dialect in use.
  442. if err.connection_invalidated:
  443. # run the same SELECT again - the connection will re-validate
  444. # itself and establish a new connection. The disconnect detection
  445. # here also causes the whole connection pool to be invalidated
  446. # so that all stale connections are discarded.
  447. connection.scalar(select([1]))
  448. else:
  449. raise
  450. finally:
  451. # restore 'close with result'
  452. connection.should_close_with_result = save_should_close_with_result
  453. class QueryStatus(object):
  454. """Enum-type class for query statuses"""
  455. STOPPED = 'stopped'
  456. FAILED = 'failed'
  457. PENDING = 'pending'
  458. RUNNING = 'running'
  459. SCHEDULED = 'scheduled'
  460. SUCCESS = 'success'
  461. TIMED_OUT = 'timed_out'
  462. def notify_user_about_perm_udate(
  463. granter, user, role, datasource, tpl_name, config):
  464. msg = render_template(tpl_name, granter=granter, user=user, role=role,
  465. datasource=datasource)
  466. logging.info(msg)
  467. subject = __('[Superset] Access to the datasource %(name)s was granted',
  468. name=datasource.full_name)
  469. send_email_smtp(user.email, subject, msg, config, bcc=granter.email,
  470. dryrun=not config.get('EMAIL_NOTIFICATIONS'))
  471. def send_email_smtp(to, subject, html_content, config, files=None,
  472. dryrun=False, cc=None, bcc=None, mime_subtype='mixed'):
  473. """
  474. Send an email with html content, eg:
  475. send_email_smtp(
  476. 'test@example.com', 'foo', '<b>Foo</b> bar',['/dev/null'], dryrun=True)
  477. """
  478. smtp_mail_from = config.get('SMTP_MAIL_FROM')
  479. to = get_email_address_list(to)
  480. msg = MIMEMultipart(mime_subtype)
  481. msg['Subject'] = subject
  482. msg['From'] = smtp_mail_from
  483. msg['To'] = ', '.join(to)
  484. recipients = to
  485. if cc:
  486. cc = get_email_address_list(cc)
  487. msg['CC'] = ', '.join(cc)
  488. recipients = recipients + cc
  489. if bcc:
  490. # don't add bcc in header
  491. bcc = get_email_address_list(bcc)
  492. recipients = recipients + bcc
  493. msg['Date'] = formatdate(localtime=True)
  494. mime_text = MIMEText(html_content, 'html')
  495. msg.attach(mime_text)
  496. for fname in files or []:
  497. basename = os.path.basename(fname)
  498. with open(fname, 'rb') as f:
  499. msg.attach(
  500. MIMEApplication(
  501. f.read(),
  502. Content_Disposition="attachment; filename='%s'" % basename,
  503. Name=basename))
  504. send_MIME_email(smtp_mail_from, recipients, msg, config, dryrun=dryrun)
  505. def send_MIME_email(e_from, e_to, mime_msg, config, dryrun=False):
  506. SMTP_HOST = config.get('SMTP_HOST')
  507. SMTP_PORT = config.get('SMTP_PORT')
  508. SMTP_USER = config.get('SMTP_USER')
  509. SMTP_PASSWORD = config.get('SMTP_PASSWORD')
  510. SMTP_STARTTLS = config.get('SMTP_STARTTLS')
  511. SMTP_SSL = config.get('SMTP_SSL')
  512. if not dryrun:
  513. s = smtplib.SMTP_SSL(SMTP_HOST, SMTP_PORT) if SMTP_SSL else \
  514. smtplib.SMTP(SMTP_HOST, SMTP_PORT)
  515. if SMTP_STARTTLS:
  516. s.starttls()
  517. if SMTP_USER and SMTP_PASSWORD:
  518. s.login(SMTP_USER, SMTP_PASSWORD)
  519. logging.info('Sent an alert email to ' + str(e_to))
  520. s.sendmail(e_from, e_to, mime_msg.as_string())
  521. s.quit()
  522. else:
  523. logging.info('Dryrun enabled, email notification content is below:')
  524. logging.info(mime_msg.as_string())
  525. def get_email_address_list(address_string):
  526. if isinstance(address_string, basestring):
  527. if ',' in address_string:
  528. address_string = address_string.split(',')
  529. elif ';' in address_string:
  530. address_string = address_string.split(';')
  531. else:
  532. address_string = [address_string]
  533. return address_string
  534. def choicify(values):
  535. """Takes an iterable and makes an iterable of tuples with it"""
  536. return [(v, v) for v in values]
  537. def setup_cache(app, cache_config):
  538. """Setup the flask-cache on a flask app"""
  539. if cache_config and cache_config.get('CACHE_TYPE') != 'null':
  540. return Cache(app, config=cache_config)
  541. def zlib_compress(data):
  542. """
  543. Compress things in a py2/3 safe fashion
  544. >>> json_str = '{"test": 1}'
  545. >>> blob = zlib_compress(json_str)
  546. """
  547. if PY3K:
  548. if isinstance(data, str):
  549. return zlib.compress(bytes(data, 'utf-8'))
  550. return zlib.compress(data)
  551. return zlib.compress(data)
  552. def zlib_decompress_to_string(blob):
  553. """
  554. Decompress things to a string in a py2/3 safe fashion
  555. >>> json_str = '{"test": 1}'
  556. >>> blob = zlib_compress(json_str)
  557. >>> got_str = zlib_decompress_to_string(blob)
  558. >>> got_str == json_str
  559. True
  560. """
  561. if PY3K:
  562. if isinstance(blob, bytes):
  563. decompressed = zlib.decompress(blob)
  564. else:
  565. decompressed = zlib.decompress(bytes(blob, 'utf-8'))
  566. return decompressed.decode('utf-8')
  567. return zlib.decompress(blob)
  568. _celery_app = None
  569. def get_celery_app(config):
  570. global _celery_app
  571. if _celery_app:
  572. return _celery_app
  573. _celery_app = celery.Celery()
  574. _celery_app.config_from_object(config.get('CELERY_CONFIG'))
  575. _celery_app.set_default()
  576. return _celery_app
  577. def to_adhoc(filt, expressionType='SIMPLE', clause='where'):
  578. result = {
  579. 'clause': clause.upper(),
  580. 'expressionType': expressionType,
  581. 'filterOptionName': str(uuid.uuid4()),
  582. }
  583. if expressionType == 'SIMPLE':
  584. result.update({
  585. 'comparator': filt.get('val'),
  586. 'operator': filt.get('op'),
  587. 'subject': filt.get('col'),
  588. })
  589. elif expressionType == 'SQL':
  590. result.update({
  591. 'sqlExpression': filt.get(clause),
  592. })
  593. return result
  594. def merge_extra_filters(form_data):
  595. # extra_filters are temporary/contextual filters (using the legacy constructs)
  596. # that are external to the slice definition. We use those for dynamic
  597. # interactive filters like the ones emitted by the "Filter Box" visualization.
  598. # Note extra_filters only support simple filters.
  599. if 'extra_filters' in form_data:
  600. # __form and __to are special extra_filters that target time
  601. # boundaries. The rest of extra_filters are simple
  602. # [column_name in list_of_values]. `__` prefix is there to avoid
  603. # potential conflicts with column that would be named `from` or `to`
  604. if (
  605. 'adhoc_filters' not in form_data or
  606. not isinstance(form_data['adhoc_filters'], list)
  607. ):
  608. form_data['adhoc_filters'] = []
  609. date_options = {
  610. '__time_range': 'time_range',
  611. '__time_col': 'granularity_sqla',
  612. '__time_grain': 'time_grain_sqla',
  613. '__time_origin': 'druid_time_origin',
  614. '__granularity': 'granularity',
  615. }
  616. # Grab list of existing filters 'keyed' on the column and operator
  617. def get_filter_key(f):
  618. if 'expressionType' in f:
  619. return '{}__{}'.format(f['subject'], f['operator'])
  620. else:
  621. return '{}__{}'.format(f['col'], f['op'])
  622. existing_filters = {}
  623. for existing in form_data['adhoc_filters']:
  624. if (
  625. existing['expressionType'] == 'SIMPLE' and
  626. existing['comparator'] is not None and
  627. existing['subject'] is not None
  628. ):
  629. existing_filters[get_filter_key(existing)] = existing['comparator']
  630. for filtr in form_data['extra_filters']:
  631. # Pull out time filters/options and merge into form data
  632. if date_options.get(filtr['col']):
  633. if filtr.get('val'):
  634. form_data[date_options[filtr['col']]] = filtr['val']
  635. elif filtr['val'] and len(filtr['val']):
  636. # Merge column filters
  637. filter_key = get_filter_key(filtr)
  638. if filter_key in existing_filters:
  639. # Check if the filter already exists
  640. if isinstance(filtr['val'], list):
  641. if isinstance(existing_filters[filter_key], list):
  642. # Add filters for unequal lists
  643. # order doesn't matter
  644. if (
  645. sorted(existing_filters[filter_key]) !=
  646. sorted(filtr['val'])
  647. ):
  648. form_data['adhoc_filters'].append(to_adhoc(filtr))
  649. else:
  650. form_data['adhoc_filters'].append(to_adhoc(filtr))
  651. else:
  652. # Do not add filter if same value already exists
  653. if filtr['val'] != existing_filters[filter_key]:
  654. form_data['adhoc_filters'].append(to_adhoc(filtr))
  655. else:
  656. # Filter not found, add it
  657. form_data['adhoc_filters'].append(to_adhoc(filtr))
  658. # Remove extra filters from the form data since no longer needed
  659. del form_data['extra_filters']
  660. def merge_request_params(form_data, params):
  661. url_params = {}
  662. for key, value in params.items():
  663. if key in ('form_data', 'r'):
  664. continue
  665. url_params[key] = value
  666. form_data['url_params'] = url_params
  667. def get_update_perms_flag():
  668. val = os.environ.get('SUPERSET_UPDATE_PERMS')
  669. return val.lower() not in ('0', 'false', 'no') if val else True
  670. def user_label(user):
  671. """Given a user ORM FAB object, returns a label"""
  672. if user:
  673. if user.first_name and user.last_name:
  674. return user.first_name + ' ' + user.last_name
  675. else:
  676. return user.username
  677. def get_or_create_main_db():
  678. from superset import conf, db
  679. from superset.models import core as models
  680. logging.info('Creating database reference')
  681. dbobj = get_main_database(db.session)
  682. if not dbobj:
  683. dbobj = models.Database(database_name='main')
  684. dbobj.set_sqlalchemy_uri(conf.get('SQLALCHEMY_DATABASE_URI'))
  685. dbobj.expose_in_sqllab = True
  686. dbobj.allow_run_sync = True
  687. dbobj.allow_csv_upload = True
  688. db.session.add(dbobj)
  689. db.session.commit()
  690. return dbobj
  691. def get_main_database(session):
  692. from superset.models import core as models
  693. return (
  694. session.query(models.Database)
  695. .filter_by(database_name='main')
  696. .first()
  697. )
  698. def is_adhoc_metric(metric):
  699. return (
  700. isinstance(metric, dict) and
  701. (
  702. (
  703. metric['expressionType'] == ADHOC_METRIC_EXPRESSION_TYPES['SIMPLE'] and
  704. metric['column'] and
  705. metric['aggregate']
  706. ) or
  707. (
  708. metric['expressionType'] == ADHOC_METRIC_EXPRESSION_TYPES['SQL'] and
  709. metric['sqlExpression']
  710. )
  711. ) and
  712. metric['label']
  713. )
  714. def get_metric_name(metric):
  715. return metric['label'] if is_adhoc_metric(metric) else metric
  716. def get_metric_names(metrics):
  717. return [get_metric_name(metric) for metric in metrics]
  718. def ensure_path_exists(path):
  719. try:
  720. os.makedirs(path)
  721. except OSError as exc:
  722. if not (os.path.isdir(path) and exc.errno == errno.EEXIST):
  723. raise
  724. def get_since_until(form_data):
  725. """Return `since` and `until` from form_data.
  726. This functiom supports both reading the keys separately (from `since` and
  727. `until`), as well as the new `time_range` key. Valid formats are:
  728. - ISO 8601
  729. - X days/years/hours/day/year/weeks
  730. - X days/years/hours/day/year/weeks ago
  731. - X days/years/hours/day/year/weeks from now
  732. - freeform
  733. Additionally, for `time_range` (these specify both `since` and `until`):
  734. - Last day
  735. - Last week
  736. - Last month
  737. - Last quarter
  738. - Last year
  739. - No filter
  740. - Last X seconds/minutes/hours/days/weeks/months/years
  741. - Next X seconds/minutes/hours/days/weeks/months/years
  742. """
  743. separator = ' : '
  744. today = parse_human_datetime('today')
  745. common_time_frames = {
  746. 'Last day': (today - relativedelta(days=1), today),
  747. 'Last week': (today - relativedelta(weeks=1), today),
  748. 'Last month': (today - relativedelta(months=1), today),
  749. 'Last quarter': (today - relativedelta(months=3), today),
  750. 'Last year': (today - relativedelta(years=1), today),
  751. }
  752. if 'time_range' in form_data:
  753. time_range = form_data['time_range']
  754. if separator in time_range:
  755. since, until = time_range.split(separator, 1)
  756. since = parse_human_datetime(since)
  757. until = parse_human_datetime(until)
  758. elif time_range in common_time_frames:
  759. since, until = common_time_frames[time_range]
  760. elif time_range == 'No filter':
  761. since = until = None
  762. else:
  763. rel, num, grain = time_range.split()
  764. if rel == 'Last':
  765. since = today - relativedelta(**{grain: int(num)})
  766. until = today
  767. else: # rel == 'Next'
  768. since = today
  769. until = today + relativedelta(**{grain: int(num)})
  770. else:
  771. since = form_data.get('since', '')
  772. if since:
  773. since_words = since.split(' ')
  774. grains = ['days', 'years', 'hours', 'day', 'year', 'weeks']
  775. if len(since_words) == 2 and since_words[1] in grains:
  776. since += ' ago'
  777. since = parse_human_datetime(since)
  778. until = parse_human_datetime(form_data.get('until', 'now'))
  779. return since, until
  780. def convert_legacy_filters_into_adhoc(fd):
  781. mapping = {'having': 'having_filters', 'where': 'filters'}
  782. if not fd.get('adhoc_filters'):
  783. fd['adhoc_filters'] = []
  784. for clause, filters in mapping.items():
  785. if clause in fd and fd[clause] != '':
  786. fd['adhoc_filters'].append(to_adhoc(fd, 'SQL', clause))
  787. if filters in fd:
  788. for filt in filter(lambda x: x is not None, fd[filters]):
  789. fd['adhoc_filters'].append(to_adhoc(filt, 'SIMPLE', clause))
  790. for key in ('filters', 'having', 'having_filters', 'where'):
  791. if key in fd:
  792. del fd[key]
  793. def split_adhoc_filters_into_base_filters(fd):
  794. """
  795. Mutates form data to restructure the adhoc filters in the form of the four base
  796. filters, `where`, `having`, `filters`, and `having_filters` which represent
  797. free form where sql, free form having sql, structured where clauses and structured
  798. having clauses.
  799. """
  800. adhoc_filters = fd.get('adhoc_filters')
  801. if isinstance(adhoc_filters, list):
  802. simple_where_filters = []
  803. simple_having_filters = []
  804. sql_where_filters = []
  805. sql_having_filters = []
  806. for adhoc_filter in adhoc_filters:
  807. expression_type = adhoc_filter.get('expressionType')
  808. clause = adhoc_filter.get('clause')
  809. if expression_type == 'SIMPLE':
  810. if clause == 'WHERE':
  811. simple_where_filters.append({
  812. 'col': adhoc_filter.get('subject'),
  813. 'op': adhoc_filter.get('operator'),
  814. 'val': adhoc_filter.get('comparator'),
  815. })
  816. elif clause == 'HAVING':
  817. simple_having_filters.append({
  818. 'col': adhoc_filter.get('subject'),
  819. 'op': adhoc_filter.get('operator'),
  820. 'val': adhoc_filter.get('comparator'),
  821. })
  822. elif expression_type == 'SQL':
  823. if clause == 'WHERE':
  824. sql_where_filters.append(adhoc_filter.get('sqlExpression'))
  825. elif clause == 'HAVING':
  826. sql_having_filters.append(adhoc_filter.get('sqlExpression'))
  827. fd['where'] = ' AND '.join(['({})'.format(sql) for sql in sql_where_filters])
  828. fd['having'] = ' AND '.join(['({})'.format(sql) for sql in sql_having_filters])
  829. fd['having_filters'] = simple_having_filters
  830. fd['filters'] = simple_where_filters
  831. def get_username():
  832. """Get username if within the flask context, otherwise return noffin'"""
  833. try:
  834. return g.user.username
  835. except Exception:
  836. pass
  837. def MediumText():
  838. return Text().with_variant(MEDIUMTEXT(), 'mysql')
  839. def shortid():
  840. return '{}'.format(uuid.uuid4())[-12:]