1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827 |
- # pylint: disable=C,R,W
- """This module contains the 'Viz' objects
- These objects represent the backend of all the visualizations that
- Superset can render.
- """
- from collections import defaultdict, OrderedDict
- import copy
- from datetime import datetime, timedelta
- from functools import reduce
- import hashlib
- import inspect
- from itertools import product
- import logging
- import math
- import pickle as pkl
- import re
- import traceback
- import uuid
- import os
- from dateutil import relativedelta as rdelta
- from flask import request
- from flask_babel import lazy_gettext as _
- import geohash
- from geopy.point import Point
- from markdown import markdown
- import numpy as np
- import pandas as pd
- from pandas.tseries.frequencies import to_offset
- from past.builtins import basestring
- import polyline
- import simplejson as json
- from superset import app, cache, get_css_manifest_files, utils
- from superset.exceptions import NullValueException, SpatialException
- from superset.utils import (
- DTTM_ALIAS,
- JS_MAX_INTEGER,
- merge_extra_filters,
- to_adhoc,
- )
- config = app.config
- stats_logger = config.get('STATS_LOGGER')
- METRIC_KEYS = [
- 'metric', 'metrics', 'percent_metrics', 'metric_2', 'secondary_metric',
- 'x', 'y', 'size',
- ]
- class BaseViz(object):
- """All visualizations derive this base class"""
- viz_type = None
- verbose_name = 'Base Viz'
- credits = ''
- is_timeseries = False
- default_fillna = 0
- cache_type = 'df'
- enforce_numerical_metrics = True
- def __init__(self, datasource, form_data, force=False):
- if not datasource:
- raise Exception(_('Viz is missing a datasource'))
- self.datasource = datasource
- self.request = request
- self.viz_type = form_data.get('viz_type')
- self.form_data = form_data
- self.query = ''
- self.token = self.form_data.get(
- 'token', 'token_' + uuid.uuid4().hex[:8])
- self.groupby = self.form_data.get('groupby') or []
- self.time_shift = timedelta()
- self.status = None
- self.error_message = None
- self.force = force
- # Keeping track of whether some data came from cache
- # this is useful to trigger the <CachedLabel /> when
- # in the cases where visualization have many queries
- # (FilterBox for instance)
- self._some_from_cache = False
- self._any_cache_key = None
- self._any_cached_dttm = None
- self._extra_chart_data = []
- self.process_metrics()
- def process_metrics(self):
- # metrics in TableViz is order sensitive, so metric_dict should be
- # OrderedDict
- self.metric_dict = OrderedDict()
- fd = self.form_data
- for mkey in METRIC_KEYS:
- val = fd.get(mkey)
- if val:
- if not isinstance(val, list):
- val = [val]
- for o in val:
- label = self.get_metric_label(o)
- if isinstance(o, dict):
- o['label'] = label
- self.metric_dict[label] = o
- # Cast to list needed to return serializable object in py3
- self.all_metrics = list(self.metric_dict.values())
- self.metric_labels = list(self.metric_dict.keys())
- def get_metric_label(self, metric):
- if isinstance(metric, str):
- return metric
- if isinstance(metric, dict):
- metric = metric.get('label')
- if self.datasource.type == 'table':
- db_engine_spec = self.datasource.database.db_engine_spec
- metric = db_engine_spec.mutate_expression_label(metric)
- return metric
- @staticmethod
- def handle_js_int_overflow(data):
- for d in data.get('records', dict()):
- for k, v in list(d.items()):
- if isinstance(v, int):
- # if an int is too big for Java Script to handle
- # convert it to a string
- if abs(v) > JS_MAX_INTEGER:
- d[k] = str(v)
- return data
- def run_extra_queries(self):
- """Lifecycle method to use when more than one query is needed
- In rare-ish cases, a visualization may need to execute multiple
- queries. That is the case for FilterBox or for time comparison
- in Line chart for instance.
- In those cases, we need to make sure these queries run before the
- main `get_payload` method gets called, so that the overall caching
- metadata can be right. The way it works here is that if any of
- the previous `get_df_payload` calls hit the cache, the main
- payload's metadata will reflect that.
- The multi-query support may need more work to become a first class
- use case in the framework, and for the UI to reflect the subtleties
- (show that only some of the queries were served from cache for
- instance). In the meantime, since multi-query is rare, we treat
- it with a bit of a hack. Note that the hack became necessary
- when moving from caching the visualization's data itself, to caching
- the underlying query(ies).
- """
- pass
- def handle_nulls(self, df):
- fillna = self.get_fillna_for_columns(df.columns)
- return df.fillna(fillna)
- def get_fillna_for_col(self, col):
- """Returns the value to use as filler for a specific Column.type"""
- if col:
- if col.is_string:
- return ' NULL'
- return self.default_fillna
- def get_fillna_for_columns(self, columns=None):
- """Returns a dict or scalar that can be passed to DataFrame.fillna"""
- if columns is None:
- return self.default_fillna
- columns_dict = {col.column_name: col for col in self.datasource.columns}
- fillna = {
- c: self.get_fillna_for_col(columns_dict.get(c))
- for c in columns
- }
- return fillna
- def get_samples(self):
- query_obj = self.query_obj()
- query_obj.update({
- 'groupby': [],
- 'metrics': [],
- 'row_limit': 1000,
- 'columns': [o.column_name for o in self.datasource.columns],
- })
- df = self.get_df(query_obj)
- return df.to_dict(orient='records')
- def get_df(self, query_obj=None):
- """Returns a pandas dataframe based on the query object"""
- if not query_obj:
- query_obj = self.query_obj()
- if not query_obj:
- return None
- self.error_msg = ''
- timestamp_format = None
- if self.datasource.type == 'table':
- dttm_col = self.datasource.get_col(query_obj['granularity'])
- if dttm_col:
- timestamp_format = dttm_col.python_date_format
- # The datasource here can be different backend but the interface is common
- self.results = self.datasource.query(query_obj)
- self.query = self.results.query
- self.status = self.results.status
- self.error_message = self.results.error_message
- df = self.results.df
- # Transform the timestamp we received from database to pandas supported
- # datetime format. If no python_date_format is specified, the pattern will
- # be considered as the default ISO date format
- # If the datetime format is unix, the parse will use the corresponding
- # parsing logic.
- if df is not None and not df.empty:
- if DTTM_ALIAS in df.columns:
- if timestamp_format in ('epoch_s', 'epoch_ms'):
- # Column has already been formatted as a timestamp.
- df[DTTM_ALIAS] = df[DTTM_ALIAS].apply(pd.Timestamp)
- else:
- df[DTTM_ALIAS] = pd.to_datetime(
- df[DTTM_ALIAS], utc=False, format=timestamp_format)
- if self.datasource.offset:
- df[DTTM_ALIAS] += timedelta(hours=self.datasource.offset)
- df[DTTM_ALIAS] += self.time_shift
- if self.enforce_numerical_metrics:
- self.df_metrics_to_num(df)
- df.replace([np.inf, -np.inf], np.nan)
- df = self.handle_nulls(df)
- return df
- def df_metrics_to_num(self, df):
- """Converting metrics to numeric when pandas.read_sql cannot"""
- metrics = self.metric_labels
- for col, dtype in df.dtypes.items():
- if dtype.type == np.object_ and col in metrics:
- df[col] = pd.to_numeric(df[col], errors='coerce')
- def process_query_filters(self):
- utils.convert_legacy_filters_into_adhoc(self.form_data)
- merge_extra_filters(self.form_data)
- utils.split_adhoc_filters_into_base_filters(self.form_data)
- def query_obj(self):
- """Building a query object"""
- form_data = self.form_data
- self.process_query_filters()
- gb = form_data.get('groupby') or []
- metrics = self.all_metrics or []
- columns = form_data.get('columns') or []
- groupby = []
- for o in gb + columns:
- if o not in groupby:
- groupby.append(o)
- is_timeseries = self.is_timeseries
- if DTTM_ALIAS in groupby:
- groupby.remove(DTTM_ALIAS)
- is_timeseries = True
- granularity = (
- form_data.get('granularity') or
- form_data.get('granularity_sqla')
- )
- limit = int(form_data.get('limit') or 0)
- timeseries_limit_metric = form_data.get('timeseries_limit_metric')
- row_limit = int(form_data.get('row_limit') or config.get('ROW_LIMIT'))
- # default order direction
- order_desc = form_data.get('order_desc', True)
- since, until = utils.get_since_until(form_data)
- time_shift = form_data.get('time_shift', '')
- self.time_shift = utils.parse_human_timedelta(time_shift)
- from_dttm = None if since is None else (since - self.time_shift)
- to_dttm = None if until is None else (until - self.time_shift)
- if from_dttm and to_dttm and from_dttm > to_dttm:
- raise Exception(_('From date cannot be larger than to date'))
- self.from_dttm = from_dttm
- self.to_dttm = to_dttm
- # extras are used to query elements specific to a datasource type
- # for instance the extra where clause that applies only to Tables
- extras = {
- 'where': form_data.get('where', ''),
- 'having': form_data.get('having', ''),
- 'having_druid': form_data.get('having_filters', []),
- 'time_grain_sqla': form_data.get('time_grain_sqla', ''),
- 'druid_time_origin': form_data.get('druid_time_origin', ''),
- }
- d = {
- 'granularity': granularity,
- 'from_dttm': from_dttm,
- 'to_dttm': to_dttm,
- 'is_timeseries': is_timeseries,
- 'groupby': groupby,
- 'metrics': metrics,
- 'row_limit': row_limit,
- 'filter': self.form_data.get('filters', []),
- 'timeseries_limit': limit,
- 'extras': extras,
- 'timeseries_limit_metric': timeseries_limit_metric,
- 'order_desc': order_desc,
- 'prequeries': [],
- 'is_prequery': False,
- }
- return d
- @property
- def cache_timeout(self):
- if self.form_data.get('cache_timeout') is not None:
- return int(self.form_data.get('cache_timeout'))
- if self.datasource.cache_timeout is not None:
- return self.datasource.cache_timeout
- if (
- hasattr(self.datasource, 'database') and
- self.datasource.database.cache_timeout) is not None:
- return self.datasource.database.cache_timeout
- return config.get('CACHE_DEFAULT_TIMEOUT')
- def get_json(self):
- return json.dumps(
- self.get_payload(),
- default=utils.json_int_dttm_ser, ignore_nan=True)
- def cache_key(self, query_obj, **extra):
- """
- The cache key is made out of the key/values in `query_obj`, plus any
- other key/values in `extra`.
- We remove datetime bounds that are hard values, and replace them with
- the use-provided inputs to bounds, which may be time-relative (as in
- "5 days ago" or "now").
- The `extra` arguments are currently used by time shift queries, since
- different time shifts wil differ only in the `from_dttm` and `to_dttm`
- values which are stripped.
- """
- cache_dict = copy.copy(query_obj)
- cache_dict.update(extra)
- for k in ['from_dttm', 'to_dttm']:
- del cache_dict[k]
- cache_dict['time_range'] = self.form_data.get('time_range')
- cache_dict['datasource'] = self.datasource.uid
- json_data = self.json_dumps(cache_dict, sort_keys=True)
- return hashlib.md5(json_data.encode('utf-8')).hexdigest()
- def get_payload(self, query_obj=None):
- """Returns a payload of metadata and data"""
- self.run_extra_queries()
- payload = self.get_df_payload(query_obj)
- df = payload.get('df')
- if self.status != utils.QueryStatus.FAILED:
- if df is not None and df.empty:
- payload['error'] = 'No data'
- else:
- payload['data'] = self.get_data(df)
- if 'df' in payload:
- del payload['df']
- return payload
- def get_df_payload(self, query_obj=None, **kwargs):
- """Handles caching around the df payload retrieval"""
- if not query_obj:
- query_obj = self.query_obj()
- cache_key = self.cache_key(query_obj, **kwargs) if query_obj else None
- logging.info('Cache key: {}'.format(cache_key))
- is_loaded = False
- stacktrace = None
- df = None
- cached_dttm = datetime.utcnow().isoformat().split('.')[0]
- if cache_key and cache and not self.force:
- cache_value = cache.get(cache_key)
- if cache_value:
- stats_logger.incr('loaded_from_cache')
- try:
- cache_value = pkl.loads(cache_value)
- df = cache_value['df']
- self.query = cache_value['query']
- self._any_cached_dttm = cache_value['dttm']
- self._any_cache_key = cache_key
- self.status = utils.QueryStatus.SUCCESS
- is_loaded = True
- except Exception as e:
- logging.exception(e)
- logging.error('Error reading cache: ' +
- utils.error_msg_from_exception(e))
- logging.info('Serving from cache')
- if query_obj and not is_loaded:
- try:
- df = self.get_df(query_obj)
- if self.status != utils.QueryStatus.FAILED:
- stats_logger.incr('loaded_from_source')
- is_loaded = True
- except Exception as e:
- logging.exception(e)
- if not self.error_message:
- self.error_message = '{}'.format(e)
- self.status = utils.QueryStatus.FAILED
- stacktrace = traceback.format_exc()
- if (
- is_loaded and
- cache_key and
- cache and
- self.status != utils.QueryStatus.FAILED):
- try:
- cache_value = dict(
- dttm=cached_dttm,
- df=df if df is not None else None,
- query=self.query,
- )
- cache_value = pkl.dumps(
- cache_value, protocol=pkl.HIGHEST_PROTOCOL)
- logging.info('Caching {} chars at key {}'.format(
- len(cache_value), cache_key))
- stats_logger.incr('set_cache_key')
- cache.set(
- cache_key,
- cache_value,
- timeout=self.cache_timeout)
- except Exception as e:
- # cache.set call can fail if the backend is down or if
- # the key is too large or whatever other reasons
- logging.warning('Could not cache key {}'.format(cache_key))
- logging.exception(e)
- cache.delete(cache_key)
- return {
- 'cache_key': self._any_cache_key,
- 'cached_dttm': self._any_cached_dttm,
- 'cache_timeout': self.cache_timeout,
- 'df': df,
- 'error': self.error_message,
- 'form_data': self.form_data,
- 'is_cached': self._any_cache_key is not None,
- 'query': self.query,
- 'status': self.status,
- 'stacktrace': stacktrace,
- 'rowcount': len(df.index) if df is not None else 0,
- }
- def json_dumps(self, obj, sort_keys=False):
- return json.dumps(
- obj,
- default=utils.json_int_dttm_ser,
- ignore_nan=True,
- sort_keys=sort_keys,
- )
- @property
- def data(self):
- """This is the data object serialized to the js layer"""
- content = {
- 'form_data': self.form_data,
- 'token': self.token,
- 'viz_name': self.viz_type,
- 'filter_select_enabled': self.datasource.filter_select_enabled,
- }
- return content
- def get_csv(self):
- df = self.get_df()
- include_index = not isinstance(df.index, pd.RangeIndex)
- csv = df.to_csv(index=include_index, **config.get('CSV_EXPORT'))
- return csv
- def get_xlsx(self):
- df = self.get_df()
- # return df.to_csv(index=include_index, **config.get('CSV_EXPORT'))
- # 先删除原来有的xlsx
- for root, dirs, files in os.walk(".", topdown=False):
- for name in files:
- str=os.path.join(root, name)
- if str.split('.')[-1] == 'xlsx':
- os.remove(str)
- # 写入excel文件
- dt = datetime.now()
- name = dt.strftime('%Y%m%d_%H%M%S.xlsx')
- writer = pd.ExcelWriter(name)
- df.to_excel(writer, 'Sheet1')
- writer.save()
- # 读取这个excel文件
- file = open(name, 'rb')
- file_context = file.read()
- return file_context
- def get_data(self, df):
- return self.get_df().to_dict(orient='records')
- @property
- def json_data(self):
- return json.dumps(self.data)
- # 额外添加的ECharts极坐标柱状图
- class EchartsBarPolar(BaseViz):
- viz_type = 'echarts_bar_polar' # 对应前端的名字
- is_timeseries = False
- def should_be_timeseries(self):
- fd = self.form_data
- conditions_met = (
- (fd.get('granularity') and fd.get('granularity') != 'all') or
- (fd.get('granularity_sqla') and fd.get('time_grain_sqla'))
- )
- if fd.get('include_time') and not conditions_met:
- raise Exception(_(
- 'Pick a granularity in the Time section or '
- "uncheck 'Include Time'"))
- return fd.get('include_time')
- def query_obj(self):
- d = super(EchartsBarPolar, self).query_obj()
- fd = self.form_data
- if fd.get('all_columns') and (fd.get('groupby') or fd.get('metrics')):
- raise Exception(_(
- 'Choose either fields to [Group By] and [Metrics] or '
- '[Columns], not both'))
- sort_by = fd.get('timeseries_limit_metric')
- if fd.get('all_columns'):
- d['columns'] = fd.get('all_columns')
- d['groupby'] = []
- order_by_cols = fd.get('order_by_cols') or []
- d['orderby'] = [json.loads(t) for t in order_by_cols]
- elif sort_by:
- if sort_by not in d['metrics']:
- d['metrics'] += [sort_by]
- d['orderby'] = [(sort_by, not fd.get('order_desc', True))]
- if 'percent_metrics' in fd:
- d['metrics'] = d['metrics'] + list(filter(
- lambda m: m not in d['metrics'],
- fd['percent_metrics'],
- ))
- d['is_timeseries'] = self.should_be_timeseries()
- return d
- def get_data(self, df):
- fd = self.form_data
- if not self.should_be_timeseries() and DTTM_ALIAS in df:
- del df[DTTM_ALIAS]
- return dict(
- records=df.to_dict(orient='records'),
- columns=list(df.columns),
- )
- # 中国地图
- class ChinaMap(BaseViz):
- """ ChinaMap Viz """
- viz_type = "ChinaMap"
- verbose_name = _("ChinaMap")
- is_timeseries = False # 是否是有时效性的,即查数据是通过日期字段的
- def get_data(self, df):
- form_data = self.form_data
- df.sort_values(by=df.columns[0], inplace=True)
- print(df.values.tolist())
- ori_data = df.values.tolist()
- data = [{'name': ori_data[i][0], 'value': ori_data[i][1]} for i in range(len(ori_data))]
- data_name = [ori_data[i][0] for i in range(len(ori_data))]
- max_data = max([ori_data[i][1] for i in range(len(ori_data))])
- min_data = min([ori_data[i][1] for i in range(len(ori_data))])
- return [data, data_name, max_data, min_data]
- # 尝试自己添加的echarts的图
- class MyEchartsBar(BaseViz):
- """ MyEchartsBar """
- viz_type = "MyEchartsBar"
- verbose_name = _("MyEchartsBar")
- is_timeseries = False
- # 查询数据,根据用户查询的信息(字段,order by, group by等)而定
- def query_obj(self):
- d = super(MyEchartsBar, self).query_obj()
- fd = self.form_data
- if fd.get('all_columns') and (fd.get('groupby') or fd.get('metrics')):
- raise Exception(_(
- 'Choose either fields to [Group By] and [Metrics] or '
- '[Columns], not both'))
- sort_by = fd.get('timeseries_limit_metric')
- if fd.get('all_columns'):
- d['columns'] = fd.get('all_columns')
- d['groupby'] = []
- order_by_cols = fd.get('order_by_cols') or []
- d['orderby'] = [json.loads(t) for t in order_by_cols]
- elif sort_by:
- if sort_by not in d['metrics']:
- d['metrics'] += [sort_by]
- d['orderby'] = [(sort_by, not fd.get('order_desc', True))]
- if 'percent_metrics' in fd:
- d['metrics'] = d['metrics'] + list(filter(
- lambda m: m not in d['metrics'],
- fd['percent_metrics'],
- ))
- d['is_timeseries'] = False
- return d
- # 返回数据
- def get_data(self, df):
- return dict(
- records=df.to_dict(orient='records'),
- columns=list(df.columns),
- )
- class TableViz(BaseViz):
- """A basic html table that is sortable and searchable"""
- viz_type = 'table'
- verbose_name = _('Table View')
- credits = 'a <a href="https://github.com/airbnb/superset">Superset</a> original'
- is_timeseries = False
- enforce_numerical_metrics = False
- def should_be_timeseries(self):
- fd = self.form_data
- # TODO handle datasource-type-specific code in datasource
- conditions_met = (
- (fd.get('granularity') and fd.get('granularity') != 'all') or
- (fd.get('granularity_sqla') and fd.get('time_grain_sqla'))
- )
- if fd.get('include_time') and not conditions_met:
- raise Exception(_(
- 'Pick a granularity in the Time section or '
- "uncheck 'Include Time'"))
- return fd.get('include_time')
- def query_obj(self):
- d = super(TableViz, self).query_obj()
- fd = self.form_data
- if fd.get('all_columns') and (fd.get('groupby') or fd.get('metrics')):
- raise Exception(_(
- 'Choose either fields to [Group By] and [Metrics] or '
- '[Columns], not both'))
- sort_by = fd.get('timeseries_limit_metric')
- if fd.get('all_columns'):
- d['columns'] = fd.get('all_columns')
- d['groupby'] = []
- order_by_cols = fd.get('order_by_cols') or []
- d['orderby'] = [json.loads(t) for t in order_by_cols]
- elif sort_by:
- sort_by_label = utils.get_metric_name(sort_by)
- if sort_by_label not in utils.get_metric_names(d['metrics']):
- d['metrics'] += [sort_by]
- d['orderby'] = [(sort_by, not fd.get('order_desc', True))]
- # Add all percent metrics that are not already in the list
- if 'percent_metrics' in fd:
- d['metrics'] = d['metrics'] + list(filter(
- lambda m: m not in d['metrics'],
- fd['percent_metrics'] or [],
- ))
- d['is_timeseries'] = self.should_be_timeseries()
- return d
- def get_data(self, df):
- fd = self.form_data
- if (
- not self.should_be_timeseries() and
- df is not None and
- DTTM_ALIAS in df
- ):
- del df[DTTM_ALIAS]
- # Sum up and compute percentages for all percent metrics
- percent_metrics = fd.get('percent_metrics') or []
- percent_metrics = [self.get_metric_label(m) for m in percent_metrics]
- if len(percent_metrics):
- percent_metrics = list(filter(lambda m: m in df, percent_metrics))
- metric_sums = {
- m: reduce(lambda a, b: a + b, df[m])
- for m in percent_metrics
- }
- metric_percents = {
- m: list(map(
- lambda a: None if metric_sums[m] == 0 else a / metric_sums[m], df[m]))
- for m in percent_metrics
- }
- for m in percent_metrics:
- m_name = '%' + m
- df[m_name] = pd.Series(metric_percents[m], name=m_name)
- # Remove metrics that are not in the main metrics list
- metrics = fd.get('metrics') or []
- metrics = [self.get_metric_label(m) for m in metrics]
- for m in filter(
- lambda m: m not in metrics and m in df.columns,
- percent_metrics,
- ):
- del df[m]
- data = self.handle_js_int_overflow(
- dict(
- records=df.to_dict(orient='records'),
- columns=list(df.columns),
- ))
- return data
- def json_dumps(self, obj, sort_keys=False):
- if self.form_data.get('all_columns'):
- return json.dumps(
- obj,
- default=utils.json_iso_dttm_ser,
- sort_keys=sort_keys,
- ignore_nan=True)
- else:
- return super(TableViz, self).json_dumps(obj)
- class TimeTableViz(BaseViz):
- """A data table with rich time-series related columns"""
- viz_type = 'time_table'
- verbose_name = _('Time Table View')
- credits = 'a <a href="https://github.com/airbnb/superset">Superset</a> original'
- is_timeseries = True
- def query_obj(self):
- d = super(TimeTableViz, self).query_obj()
- fd = self.form_data
- if not fd.get('metrics'):
- raise Exception(_('Pick at least one metric'))
- if fd.get('groupby') and len(fd.get('metrics')) > 1:
- raise Exception(_(
- "When using 'Group By' you are limited to use a single metric"))
- return d
- def get_data(self, df):
- fd = self.form_data
- columns = None
- values = self.metric_labels
- if fd.get('groupby'):
- values = self.metric_labels[0]
- columns = fd.get('groupby')
- pt = df.pivot_table(
- index=DTTM_ALIAS,
- columns=columns,
- values=values)
- pt.index = pt.index.map(str)
- pt = pt.sort_index()
- return dict(
- records=pt.to_dict(orient='index'),
- columns=list(pt.columns),
- is_group_by=len(fd.get('groupby')) > 0,
- )
- class PivotTableViz(BaseViz):
- """A pivot table view, define your rows, columns and metrics"""
- viz_type = 'pivot_table'
- verbose_name = _('Pivot Table')
- credits = 'a <a href="https://github.com/airbnb/superset">Superset</a> original'
- is_timeseries = False
- def query_obj(self):
- d = super(PivotTableViz, self).query_obj()
- groupby = self.form_data.get('groupby')
- columns = self.form_data.get('columns')
- metrics = self.form_data.get('metrics')
- if not columns:
- columns = []
- if not groupby:
- groupby = []
- if not groupby:
- raise Exception(_("Please choose at least one 'Group by' field "))
- if not metrics:
- raise Exception(_('Please choose at least one metric'))
- if (
- any(v in groupby for v in columns) or
- any(v in columns for v in groupby)):
- raise Exception(_("Group By' and 'Columns' can't overlap"))
- return d
- def get_data(self, df):
- if (
- self.form_data.get('granularity') == 'all' and
- DTTM_ALIAS in df):
- del df[DTTM_ALIAS]
- df = df.pivot_table(
- index=self.form_data.get('groupby'),
- columns=self.form_data.get('columns'),
- values=[self.get_metric_label(m) for m in self.form_data.get('metrics')],
- aggfunc=self.form_data.get('pandas_aggfunc'),
- margins=self.form_data.get('pivot_margins'),
- )
- # Display metrics side by side with each column
- if self.form_data.get('combine_metric'):
- df = df.stack(0).unstack()
- return dict(
- columns=list(df.columns),
- html=df.to_html(
- na_rep='',
- classes=(
- 'dataframe table table-striped table-bordered '
- 'table-condensed table-hover').split(' ')),
- )
- class MarkupViz(BaseViz):
- """Use html or markdown to create a free form widget"""
- viz_type = 'markup'
- verbose_name = _('Markup')
- is_timeseries = False
- def query_obj(self):
- return None
- def get_df(self, query_obj=None):
- return None
- def get_data(self, df):
- markup_type = self.form_data.get('markup_type')
- code = self.form_data.get('code', '')
- if markup_type == 'markdown':
- code = markdown(code)
- return dict(html=code, theme_css=get_css_manifest_files('theme'))
- class SeparatorViz(MarkupViz):
- """Use to create section headers in a dashboard, similar to `Markup`"""
- viz_type = 'separator'
- verbose_name = _('Separator')
- class WordCloudViz(BaseViz):
- """Build a colorful word cloud
- Uses the nice library at:
- https://github.com/jasondavies/d3-cloud
- """
- viz_type = 'word_cloud'
- verbose_name = _('Word Cloud')
- is_timeseries = False
- def query_obj(self):
- d = super(WordCloudViz, self).query_obj()
- d['groupby'] = [self.form_data.get('series')]
- return d
- class TreemapViz(BaseViz):
- """Tree map visualisation for hierarchical data."""
- viz_type = 'treemap'
- verbose_name = _('Treemap')
- credits = '<a href="https://d3js.org">d3.js</a>'
- is_timeseries = False
- def _nest(self, metric, df):
- nlevels = df.index.nlevels
- if nlevels == 1:
- result = [{'name': n, 'value': v}
- for n, v in zip(df.index, df[metric])]
- else:
- result = [{'name': l, 'children': self._nest(metric, df.loc[l])}
- for l in df.index.levels[0]]
- return result
- def get_data(self, df):
- df = df.set_index(self.form_data.get('groupby'))
- chart_data = [{'name': metric, 'children': self._nest(metric, df)}
- for metric in df.columns]
- return chart_data
- class CalHeatmapViz(BaseViz):
- """Calendar heatmap."""
- viz_type = 'cal_heatmap'
- verbose_name = _('Calendar Heatmap')
- credits = (
- '<a href=https://github.com/wa0x6e/cal-heatmap>cal-heatmap</a>')
- is_timeseries = True
- def get_data(self, df):
- form_data = self.form_data
- data = {}
- records = df.to_dict('records')
- for metric in self.metric_labels:
- data[metric] = {
- str(obj[DTTM_ALIAS].value / 10**9): obj.get(metric)
- for obj in records
- }
- start, end = utils.get_since_until(form_data)
- if not start or not end:
- raise Exception('Please provide both time bounds (Since and Until)')
- domain = form_data.get('domain_granularity')
- diff_delta = rdelta.relativedelta(end, start)
- diff_secs = (end - start).total_seconds()
- if domain == 'year':
- range_ = diff_delta.years + 1
- elif domain == 'month':
- range_ = diff_delta.years * 12 + diff_delta.months + 1
- elif domain == 'week':
- range_ = diff_delta.years * 53 + diff_delta.weeks + 1
- elif domain == 'day':
- range_ = diff_secs // (24 * 60 * 60) + 1
- else:
- range_ = diff_secs // (60 * 60) + 1
- return {
- 'data': data,
- 'start': start,
- 'domain': domain,
- 'subdomain': form_data.get('subdomain_granularity'),
- 'range': range_,
- }
- def query_obj(self):
- d = super(CalHeatmapViz, self).query_obj()
- fd = self.form_data
- d['metrics'] = fd.get('metrics')
- return d
- class NVD3Viz(BaseViz):
- """Base class for all nvd3 vizs"""
- credits = '<a href="http://nvd3.org/">NVD3.org</a>'
- viz_type = None
- verbose_name = 'Base NVD3 Viz'
- is_timeseries = False
- class BoxPlotViz(NVD3Viz):
- """Box plot viz from ND3"""
- viz_type = 'box_plot'
- verbose_name = _('Box Plot')
- sort_series = False
- is_timeseries = True
- def to_series(self, df, classed='', title_suffix=''):
- label_sep = ' - '
- chart_data = []
- for index_value, row in zip(df.index, df.to_dict(orient='records')):
- if isinstance(index_value, tuple):
- index_value = label_sep.join(index_value)
- boxes = defaultdict(dict)
- for (label, key), value in row.items():
- if key == 'median':
- key = 'Q2'
- boxes[label][key] = value
- for label, box in boxes.items():
- if len(self.form_data.get('metrics')) > 1:
- # need to render data labels with metrics
- chart_label = label_sep.join([index_value, label])
- else:
- chart_label = index_value
- chart_data.append({
- 'label': chart_label,
- 'values': box,
- })
- return chart_data
- def get_data(self, df):
- form_data = self.form_data
- df = df.fillna(0)
- # conform to NVD3 names
- def Q1(series): # need to be named functions - can't use lambdas
- return np.percentile(series, 25)
- def Q3(series):
- return np.percentile(series, 75)
- whisker_type = form_data.get('whisker_options')
- if whisker_type == 'Tukey':
- def whisker_high(series):
- upper_outer_lim = Q3(series) + 1.5 * (Q3(series) - Q1(series))
- series = series[series <= upper_outer_lim]
- return series[np.abs(series - upper_outer_lim).argmin()]
- def whisker_low(series):
- lower_outer_lim = Q1(series) - 1.5 * (Q3(series) - Q1(series))
- # find the closest value above the lower outer limit
- series = series[series >= lower_outer_lim]
- return series[np.abs(series - lower_outer_lim).argmin()]
- elif whisker_type == 'Min/max (no outliers)':
- def whisker_high(series):
- return series.max()
- def whisker_low(series):
- return series.min()
- elif ' percentiles' in whisker_type:
- low, high = whisker_type.replace(' percentiles', '').split('/')
- def whisker_high(series):
- return np.percentile(series, int(high))
- def whisker_low(series):
- return np.percentile(series, int(low))
- else:
- raise ValueError('Unknown whisker type: {}'.format(whisker_type))
- def outliers(series):
- above = series[series > whisker_high(series)]
- below = series[series < whisker_low(series)]
- # pandas sometimes doesn't like getting lists back here
- return set(above.tolist() + below.tolist())
- aggregate = [Q1, np.median, Q3, whisker_high, whisker_low, outliers]
- df = df.groupby(form_data.get('groupby')).agg(aggregate)
- chart_data = self.to_series(df)
- return chart_data
- class BubbleViz(NVD3Viz):
- """Based on the NVD3 bubble chart"""
- viz_type = 'bubble'
- verbose_name = _('Bubble Chart')
- is_timeseries = False
- def query_obj(self):
- form_data = self.form_data
- d = super(BubbleViz, self).query_obj()
- d['groupby'] = [
- form_data.get('entity'),
- ]
- if form_data.get('series'):
- d['groupby'].append(form_data.get('series'))
- self.x_metric = form_data.get('x')
- self.y_metric = form_data.get('y')
- self.z_metric = form_data.get('size')
- self.entity = form_data.get('entity')
- self.series = form_data.get('series') or self.entity
- d['row_limit'] = form_data.get('limit')
- d['metrics'] = [
- self.z_metric,
- self.x_metric,
- self.y_metric,
- ]
- if not all(d['metrics'] + [self.entity]):
- raise Exception(_('Pick a metric for x, y and size'))
- return d
- def get_data(self, df):
- df['x'] = df[[utils.get_metric_name(self.x_metric)]]
- df['y'] = df[[utils.get_metric_name(self.y_metric)]]
- df['size'] = df[[utils.get_metric_name(self.z_metric)]]
- df['shape'] = 'circle'
- df['group'] = df[[self.series]]
- series = defaultdict(list)
- for row in df.to_dict(orient='records'):
- series[row['group']].append(row)
- chart_data = []
- for k, v in series.items():
- chart_data.append({
- 'key': k,
- 'values': v})
- return chart_data
- class BulletViz(NVD3Viz):
- """Based on the NVD3 bullet chart"""
- viz_type = 'bullet'
- verbose_name = _('Bullet Chart')
- is_timeseries = False
- def query_obj(self):
- form_data = self.form_data
- d = super(BulletViz, self).query_obj()
- self.metric = form_data.get('metric')
- def as_strings(field):
- value = form_data.get(field)
- return value.split(',') if value else []
- def as_floats(field):
- return [float(x) for x in as_strings(field)]
- self.ranges = as_floats('ranges')
- self.range_labels = as_strings('range_labels')
- self.markers = as_floats('markers')
- self.marker_labels = as_strings('marker_labels')
- self.marker_lines = as_floats('marker_lines')
- self.marker_line_labels = as_strings('marker_line_labels')
- d['metrics'] = [
- self.metric,
- ]
- if not self.metric:
- raise Exception(_('Pick a metric to display'))
- return d
- def get_data(self, df):
- df = df.fillna(0)
- df['metric'] = df[[self.get_metric_label(self.metric)]]
- values = df['metric'].values
- return {
- 'measures': values.tolist(),
- 'ranges': self.ranges or [0, values.max() * 1.1],
- 'rangeLabels': self.range_labels or None,
- 'markers': self.markers or None,
- 'markerLabels': self.marker_labels or None,
- 'markerLines': self.marker_lines or None,
- 'markerLineLabels': self.marker_line_labels or None,
- }
- class BigNumberViz(BaseViz):
- """Put emphasis on a single metric with this big number viz"""
- viz_type = 'big_number'
- verbose_name = _('Big Number with Trendline')
- credits = 'a <a href="https://github.com/airbnb/superset">Superset</a> original'
- is_timeseries = True
- def query_obj(self):
- d = super(BigNumberViz, self).query_obj()
- metric = self.form_data.get('metric')
- if not metric:
- raise Exception(_('Pick a metric!'))
- d['metrics'] = [self.form_data.get('metric')]
- self.form_data['metric'] = metric
- return d
- class BigNumberTotalViz(BaseViz):
- """Put emphasis on a single metric with this big number viz"""
- viz_type = 'big_number_total'
- verbose_name = _('Big Number')
- credits = 'a <a href="https://github.com/airbnb/superset">Superset</a> original'
- is_timeseries = False
- def query_obj(self):
- d = super(BigNumberTotalViz, self).query_obj()
- metric = self.form_data.get('metric')
- if not metric:
- raise Exception(_('Pick a metric!'))
- d['metrics'] = [self.form_data.get('metric')]
- self.form_data['metric'] = metric
- return d
- class NVD3TimeSeriesViz(NVD3Viz):
- """A rich line chart component with tons of options"""
- viz_type = 'line'
- verbose_name = _('Time Series - Line Chart')
- sort_series = False
- is_timeseries = True
- def to_series(self, df, classed='', title_suffix=''):
- cols = []
- for col in df.columns:
- if col == '':
- cols.append('N/A')
- elif col is None:
- cols.append('NULL')
- else:
- cols.append(col)
- df.columns = cols
- series = df.to_dict('series')
- chart_data = []
- for name in df.T.index.tolist():
- ys = series[name]
- if df[name].dtype.kind not in 'biufc':
- continue
- if isinstance(name, list):
- series_title = [str(title) for title in name]
- elif isinstance(name, tuple):
- series_title = tuple(str(title) for title in name)
- else:
- series_title = str(name)
- if (
- isinstance(series_title, (list, tuple)) and
- len(series_title) > 1 and
- len(self.metric_labels) == 1):
- # Removing metric from series name if only one metric
- series_title = series_title[1:]
- if title_suffix:
- if isinstance(series_title, str):
- series_title = (series_title, title_suffix)
- elif isinstance(series_title, (list, tuple)):
- series_title = series_title + (title_suffix,)
- values = []
- for ds in df.index:
- if ds in ys:
- d = {
- 'x': ds,
- 'y': ys[ds],
- }
- else:
- d = {}
- values.append(d)
- d = {
- 'key': series_title,
- 'values': values,
- }
- if classed:
- d['classed'] = classed
- chart_data.append(d)
- return chart_data
- def process_data(self, df, aggregate=False):
- fd = self.form_data
- df = df.fillna(0)
- if fd.get('granularity') == 'all':
- raise Exception(_('Pick a time granularity for your time series'))
- if not aggregate:
- df = df.pivot_table(
- index=DTTM_ALIAS,
- columns=fd.get('groupby'),
- values=self.metric_labels)
- else:
- df = df.pivot_table(
- index=DTTM_ALIAS,
- columns=fd.get('groupby'),
- values=self.metric_labels,
- fill_value=0,
- aggfunc=sum)
- fm = fd.get('resample_fillmethod')
- if not fm:
- fm = None
- how = fd.get('resample_how')
- rule = fd.get('resample_rule')
- if how and rule:
- df = df.resample(rule, how=how, fill_method=fm)
- if not fm:
- df = df.fillna(0)
- if self.sort_series:
- dfs = df.sum()
- dfs.sort_values(ascending=False, inplace=True)
- df = df[dfs.index]
- if fd.get('contribution'):
- dft = df.T
- df = (dft / dft.sum()).T
- rolling_type = fd.get('rolling_type')
- rolling_periods = int(fd.get('rolling_periods') or 0)
- min_periods = int(fd.get('min_periods') or 0)
- if rolling_type in ('mean', 'std', 'sum') and rolling_periods:
- kwargs = dict(
- window=rolling_periods,
- min_periods=min_periods)
- if rolling_type == 'mean':
- df = df.rolling(**kwargs).mean()
- elif rolling_type == 'std':
- df = df.rolling(**kwargs).std()
- elif rolling_type == 'sum':
- df = df.rolling(**kwargs).sum()
- elif rolling_type == 'cumsum':
- df = df.cumsum()
- if min_periods:
- df = df[min_periods:]
- return df
- def run_extra_queries(self):
- fd = self.form_data
- time_compare = fd.get('time_compare') or []
- # backwards compatibility
- if not isinstance(time_compare, list):
- time_compare = [time_compare]
- for option in time_compare:
- query_object = self.query_obj()
- delta = utils.parse_human_timedelta(option)
- query_object['inner_from_dttm'] = query_object['from_dttm']
- query_object['inner_to_dttm'] = query_object['to_dttm']
- if not query_object['from_dttm'] or not query_object['to_dttm']:
- raise Exception(_(
- '`Since` and `Until` time bounds should be specified '
- 'when using the `Time Shift` feature.'))
- query_object['from_dttm'] -= delta
- query_object['to_dttm'] -= delta
- df2 = self.get_df_payload(query_object, time_compare=option).get('df')
- if df2 is not None and DTTM_ALIAS in df2:
- label = '{} offset'. format(option)
- df2[DTTM_ALIAS] += delta
- df2 = self.process_data(df2)
- self._extra_chart_data.append((label, df2))
- def get_data(self, df):
- fd = self.form_data
- comparison_type = fd.get('comparison_type') or 'values'
- df = self.process_data(df)
- if comparison_type == 'values':
- chart_data = self.to_series(df)
- for i, (label, df2) in enumerate(self._extra_chart_data):
- chart_data.extend(
- self.to_series(
- df2, classed='time-shift-{}'.format(i), title_suffix=label))
- else:
- chart_data = []
- for i, (label, df2) in enumerate(self._extra_chart_data):
- # reindex df2 into the df2 index
- combined_index = df.index.union(df2.index)
- df2 = df2.reindex(combined_index) \
- .interpolate(method='time') \
- .reindex(df.index)
- if comparison_type == 'absolute':
- diff = df - df2
- elif comparison_type == 'percentage':
- diff = (df - df2) / df2
- elif comparison_type == 'ratio':
- diff = df / df2
- else:
- raise Exception(
- 'Invalid `comparison_type`: {0}'.format(comparison_type))
- # remove leading/trailing NaNs from the time shift difference
- diff = diff[diff.first_valid_index():diff.last_valid_index()]
- chart_data.extend(
- self.to_series(
- diff, classed='time-shift-{}'.format(i), title_suffix=label))
- return sorted(chart_data, key=lambda x: tuple(x['key']))
- class MultiLineViz(NVD3Viz):
- """Pile on multiple line charts"""
- viz_type = 'line_multi'
- verbose_name = _('Time Series - Multiple Line Charts')
- is_timeseries = True
- def query_obj(self):
- return None
- def get_data(self, df):
- fd = self.form_data
- # Late imports to avoid circular import issues
- from superset.models.core import Slice
- from superset import db
- slice_ids1 = fd.get('line_charts')
- slices1 = db.session.query(Slice).filter(Slice.id.in_(slice_ids1)).all()
- slice_ids2 = fd.get('line_charts_2')
- slices2 = db.session.query(Slice).filter(Slice.id.in_(slice_ids2)).all()
- return {
- 'slices': {
- 'axis1': [slc.data for slc in slices1],
- 'axis2': [slc.data for slc in slices2],
- },
- }
- class NVD3DualLineViz(NVD3Viz):
- """A rich line chart with dual axis"""
- viz_type = 'dual_line'
- verbose_name = _('Time Series - Dual Axis Line Chart')
- sort_series = False
- is_timeseries = True
- def query_obj(self):
- d = super(NVD3DualLineViz, self).query_obj()
- m1 = self.form_data.get('metric')
- m2 = self.form_data.get('metric_2')
- d['metrics'] = [m1, m2]
- if not m1:
- raise Exception(_('Pick a metric for left axis!'))
- if not m2:
- raise Exception(_('Pick a metric for right axis!'))
- if m1 == m2:
- raise Exception(_('Please choose different metrics'
- ' on left and right axis'))
- return d
- def to_series(self, df, classed=''):
- cols = []
- for col in df.columns:
- if col == '':
- cols.append('N/A')
- elif col is None:
- cols.append('NULL')
- else:
- cols.append(col)
- df.columns = cols
- series = df.to_dict('series')
- chart_data = []
- metrics = [
- self.form_data.get('metric'),
- self.form_data.get('metric_2'),
- ]
- for i, m in enumerate(metrics):
- m = utils.get_metric_name(m)
- ys = series[m]
- if df[m].dtype.kind not in 'biufc':
- continue
- series_title = m
- d = {
- 'key': series_title,
- 'classed': classed,
- 'values': [
- {'x': ds, 'y': ys[ds] if ds in ys else None}
- for ds in df.index
- ],
- 'yAxis': i + 1,
- 'type': 'line',
- }
- chart_data.append(d)
- return chart_data
- def get_data(self, df):
- fd = self.form_data
- df = df.fillna(0)
- if self.form_data.get('granularity') == 'all':
- raise Exception(_('Pick a time granularity for your time series'))
- metric = self.get_metric_label(fd.get('metric'))
- metric_2 = self.get_metric_label(fd.get('metric_2'))
- df = df.pivot_table(
- index=DTTM_ALIAS,
- values=[metric, metric_2])
- chart_data = self.to_series(df)
- return chart_data
- class NVD3TimeSeriesBarViz(NVD3TimeSeriesViz):
- """A bar chart where the x axis is time"""
- viz_type = 'bar'
- sort_series = True
- verbose_name = _('Time Series - Bar Chart')
- class NVD3TimePivotViz(NVD3TimeSeriesViz):
- """Time Series - Periodicity Pivot"""
- viz_type = 'time_pivot'
- sort_series = True
- verbose_name = _('Time Series - Period Pivot')
- def query_obj(self):
- d = super(NVD3TimePivotViz, self).query_obj()
- d['metrics'] = [self.form_data.get('metric')]
- return d
- def get_data(self, df):
- fd = self.form_data
- df = self.process_data(df)
- freq = to_offset(fd.get('freq'))
- freq.normalize = True
- df[DTTM_ALIAS] = df.index.map(freq.rollback)
- df['ranked'] = df[DTTM_ALIAS].rank(method='dense', ascending=False) - 1
- df.ranked = df.ranked.map(int)
- df['series'] = '-' + df.ranked.map(str)
- df['series'] = df['series'].str.replace('-0', 'current')
- rank_lookup = {
- row['series']: row['ranked']
- for row in df.to_dict(orient='records')
- }
- max_ts = df[DTTM_ALIAS].max()
- max_rank = df['ranked'].max()
- df[DTTM_ALIAS] = df.index + (max_ts - df[DTTM_ALIAS])
- df = df.pivot_table(
- index=DTTM_ALIAS,
- columns='series',
- values=self.get_metric_label(fd.get('metric')))
- chart_data = self.to_series(df)
- for serie in chart_data:
- serie['rank'] = rank_lookup[serie['key']]
- serie['perc'] = 1 - (serie['rank'] / (max_rank + 1))
- return chart_data
- class NVD3CompareTimeSeriesViz(NVD3TimeSeriesViz):
- """A line chart component where you can compare the % change over time"""
- viz_type = 'compare'
- verbose_name = _('Time Series - Percent Change')
- class NVD3TimeSeriesStackedViz(NVD3TimeSeriesViz):
- """A rich stack area chart"""
- viz_type = 'area'
- verbose_name = _('Time Series - Stacked')
- sort_series = True
- class DistributionPieViz(NVD3Viz):
- """Annoy visualization snobs with this controversial pie chart"""
- viz_type = 'pie'
- verbose_name = _('Distribution - NVD3 - Pie Chart')
- is_timeseries = False
- def get_data(self, df):
- metric = self.metric_labels[0]
- df = df.pivot_table(
- index=self.groupby,
- values=[metric])
- df.sort_values(by=metric, ascending=False, inplace=True)
- df = df.reset_index()
- df.columns = ['x', 'y']
- return df.to_dict(orient='records')
- class HistogramViz(BaseViz):
- """Histogram"""
- viz_type = 'histogram'
- verbose_name = _('Histogram')
- is_timeseries = False
- def query_obj(self):
- """Returns the query object for this visualization"""
- d = super(HistogramViz, self).query_obj()
- d['row_limit'] = self.form_data.get(
- 'row_limit', int(config.get('VIZ_ROW_LIMIT')))
- numeric_columns = self.form_data.get('all_columns_x')
- if numeric_columns is None:
- raise Exception(_('Must have at least one numeric column specified'))
- self.columns = numeric_columns
- d['columns'] = numeric_columns + self.groupby
- # override groupby entry to avoid aggregation
- d['groupby'] = []
- return d
- def get_data(self, df):
- """Returns the chart data"""
- chart_data = []
- if len(self.groupby) > 0:
- groups = df.groupby(self.groupby)
- else:
- groups = [((), df)]
- for keys, data in groups:
- if isinstance(keys, str):
- keys = (keys,)
- # removing undesirable characters
- keys = [re.sub(r'\W+', r'_', k) for k in keys]
- chart_data.extend([{
- 'key': '__'.join([c] + keys),
- 'values': data[c].tolist()}
- for c in self.columns])
- return chart_data
- class DistributionBarViz(DistributionPieViz):
- """A good old bar chart"""
- viz_type = 'dist_bar'
- verbose_name = _('Distribution - Bar Chart')
- is_timeseries = False
- def query_obj(self):
- d = super(DistributionBarViz, self).query_obj() # noqa
- fd = self.form_data
- if (
- len(d['groupby']) <
- len(fd.get('groupby') or []) + len(fd.get('columns') or [])
- ):
- raise Exception(
- _("Can't have overlap between Series and Breakdowns"))
- if not fd.get('metrics'):
- raise Exception(_('Pick at least one metric'))
- if not fd.get('groupby'):
- raise Exception(_('Pick at least one field for [Series]'))
- return d
- def get_data(self, df):
- fd = self.form_data
- metrics = self.metric_labels
- row = df.groupby(self.groupby).sum()[metrics[0]].copy()
- row.sort_values(ascending=False, inplace=True)
- columns = fd.get('columns') or []
- pt = df.pivot_table(
- index=self.groupby,
- columns=columns,
- values=metrics)
- if fd.get('contribution'):
- pt = pt.fillna(0)
- pt = pt.T
- pt = (pt / pt.sum()).T
- pt = pt.reindex(row.index)
- chart_data = []
- for name, ys in pt.items():
- if pt[name].dtype.kind not in 'biufc' or name in self.groupby:
- continue
- if isinstance(name, str):
- series_title = name
- else:
- offset = 0 if len(metrics) > 1 else 1
- series_title = ', '.join([str(s) for s in name[offset:]])
- values = []
- for i, v in ys.items():
- x = i
- if isinstance(x, (tuple, list)):
- x = ', '.join([str(s) for s in x])
- else:
- x = str(x)
- values.append({
- 'x': x,
- 'y': v,
- })
- d = {
- 'key': series_title,
- 'values': values,
- }
- chart_data.append(d)
- return chart_data
- class SunburstViz(BaseViz):
- """A multi level sunburst chart"""
- viz_type = 'sunburst'
- verbose_name = _('Sunburst')
- is_timeseries = False
- credits = (
- 'Kerry Rodden '
- '@<a href="https://bl.ocks.org/kerryrodden/7090426">bl.ocks.org</a>')
- def get_data(self, df):
- fd = self.form_data
- cols = fd.get('groupby')
- metric = self.get_metric_label(fd.get('metric'))
- secondary_metric = self.get_metric_label(fd.get('secondary_metric'))
- if metric == secondary_metric or secondary_metric is None:
- df.columns = cols + ['m1']
- df['m2'] = df['m1']
- return json.loads(df.to_json(orient='values'))
- def query_obj(self):
- qry = super(SunburstViz, self).query_obj()
- fd = self.form_data
- qry['metrics'] = [fd['metric']]
- secondary_metric = fd.get('secondary_metric')
- if secondary_metric and secondary_metric != fd['metric']:
- qry['metrics'].append(secondary_metric)
- return qry
- class SankeyViz(BaseViz):
- """A Sankey diagram that requires a parent-child dataset"""
- viz_type = 'sankey'
- verbose_name = _('Sankey')
- is_timeseries = False
- credits = '<a href="https://www.npmjs.com/package/d3-sankey">d3-sankey on npm</a>'
- def query_obj(self):
- qry = super(SankeyViz, self).query_obj()
- if len(qry['groupby']) != 2:
- raise Exception(_('Pick exactly 2 columns as [Source / Target]'))
- qry['metrics'] = [
- self.form_data['metric']]
- return qry
- def get_data(self, df):
- df.columns = ['source', 'target', 'value']
- df['source'] = df['source'].astype(basestring)
- df['target'] = df['target'].astype(basestring)
- recs = df.to_dict(orient='records')
- hierarchy = defaultdict(set)
- for row in recs:
- hierarchy[row['source']].add(row['target'])
- def find_cycle(g):
- """Whether there's a cycle in a directed graph"""
- path = set()
- def visit(vertex):
- path.add(vertex)
- for neighbour in g.get(vertex, ()):
- if neighbour in path or visit(neighbour):
- return (vertex, neighbour)
- path.remove(vertex)
- for v in g:
- cycle = visit(v)
- if cycle:
- return cycle
- cycle = find_cycle(hierarchy)
- if cycle:
- raise Exception(_(
- "There's a loop in your Sankey, please provide a tree. "
- "Here's a faulty link: {}").format(cycle))
- return recs
- class DirectedForceViz(BaseViz):
- """An animated directed force layout graph visualization"""
- viz_type = 'directed_force'
- verbose_name = _('Directed Force Layout')
- credits = 'd3noob @<a href="http://bl.ocks.org/d3noob/5141278">bl.ocks.org</a>'
- is_timeseries = False
- def query_obj(self):
- qry = super(DirectedForceViz, self).query_obj()
- if len(self.form_data['groupby']) != 2:
- raise Exception(_("Pick exactly 2 columns to 'Group By'"))
- qry['metrics'] = [self.form_data['metric']]
- return qry
- def get_data(self, df):
- df.columns = ['source', 'target', 'value']
- return df.to_dict(orient='records')
- class ChordViz(BaseViz):
- """A Chord diagram"""
- viz_type = 'chord'
- verbose_name = _('Directed Force Layout')
- credits = '<a href="https://github.com/d3/d3-chord">Bostock</a>'
- is_timeseries = False
- def query_obj(self):
- qry = super(ChordViz, self).query_obj()
- fd = self.form_data
- qry['groupby'] = [fd.get('groupby'), fd.get('columns')]
- qry['metrics'] = [self.get_metric_label(fd.get('metric'))]
- return qry
- def get_data(self, df):
- df.columns = ['source', 'target', 'value']
- # Preparing a symetrical matrix like d3.chords calls for
- nodes = list(set(df['source']) | set(df['target']))
- matrix = {}
- for source, target in product(nodes, nodes):
- matrix[(source, target)] = 0
- for source, target, value in df.to_records(index=False):
- matrix[(source, target)] = value
- m = [[matrix[(n1, n2)] for n1 in nodes] for n2 in nodes]
- return {
- 'nodes': list(nodes),
- 'matrix': m,
- }
- class CountryMapViz(BaseViz):
- """A country centric"""
- viz_type = 'country_map'
- verbose_name = _('Country Map')
- is_timeseries = False
- credits = 'From bl.ocks.org By john-guerra'
- def query_obj(self):
- qry = super(CountryMapViz, self).query_obj()
- qry['metrics'] = [
- self.form_data['metric']]
- qry['groupby'] = [self.form_data['entity']]
- return qry
- def get_data(self, df):
- fd = self.form_data
- cols = [fd.get('entity')]
- metric = self.metric_labels[0]
- cols += [metric]
- ndf = df[cols]
- df = ndf
- df.columns = ['country_id', 'metric']
- d = df.to_dict(orient='records')
- return d
- class WorldMapViz(BaseViz):
- """A country centric world map"""
- viz_type = 'world_map'
- verbose_name = _('World Map')
- is_timeseries = False
- credits = 'datamaps on <a href="https://www.npmjs.com/package/datamaps">npm</a>'
- def query_obj(self):
- qry = super(WorldMapViz, self).query_obj()
- qry['groupby'] = [self.form_data['entity']]
- return qry
- def get_data(self, df):
- from superset.data import countries
- fd = self.form_data
- cols = [fd.get('entity')]
- metric = self.get_metric_label(fd.get('metric'))
- secondary_metric = self.get_metric_label(fd.get('secondary_metric'))
- columns = ['country', 'm1', 'm2']
- if metric == secondary_metric:
- ndf = df[cols]
- # df[metric] will be a DataFrame
- # because there are duplicate column names
- ndf['m1'] = df[metric].iloc[:, 0]
- ndf['m2'] = ndf['m1']
- else:
- if secondary_metric:
- cols += [metric, secondary_metric]
- else:
- cols += [metric]
- columns = ['country', 'm1']
- ndf = df[cols]
- df = ndf
- df.columns = columns
- d = df.to_dict(orient='records')
- for row in d:
- country = None
- if isinstance(row['country'], str):
- country = countries.get(
- fd.get('country_fieldtype'), row['country'])
- if country:
- row['country'] = country['cca3']
- row['latitude'] = country['lat']
- row['longitude'] = country['lng']
- row['name'] = country['name']
- else:
- row['country'] = 'XXX'
- return d
- class FilterBoxViz(BaseViz):
- """A multi filter, multi-choice filter box to make dashboards interactive"""
- viz_type = 'filter_box'
- verbose_name = _('Filters')
- is_timeseries = False
- credits = 'a <a href="https://github.com/airbnb/superset">Superset</a> original'
- cache_type = 'get_data'
- def query_obj(self):
- return None
- def run_extra_queries(self):
- qry = self.filter_query_obj()
- filters = [g for g in self.form_data['groupby']]
- self.dataframes = {}
- for flt in filters:
- qry['groupby'] = [flt]
- df = self.get_df_payload(query_obj=qry).get('df')
- self.dataframes[flt] = df
- def filter_query_obj(self):
- qry = super(FilterBoxViz, self).query_obj()
- groupby = self.form_data.get('groupby')
- if len(groupby) < 1 and not self.form_data.get('date_filter'):
- raise Exception(_('Pick at least one filter field'))
- qry['metrics'] = [
- self.form_data['metric']]
- return qry
- def get_data(self, df):
- d = {}
- filters = [g for g in self.form_data['groupby']]
- for flt in filters:
- df = self.dataframes[flt]
- d[flt] = [{
- 'id': row[0],
- 'text': row[0],
- 'filter': flt,
- 'metric': row[1]}
- for row in df.itertuples(index=False)
- ]
- return d
- class IFrameViz(BaseViz):
- """You can squeeze just about anything in this iFrame component"""
- viz_type = 'iframe'
- verbose_name = _('iFrame')
- credits = 'a <a href="https://github.com/airbnb/superset">Superset</a> original'
- is_timeseries = False
- def query_obj(self):
- return None
- def get_df(self, query_obj=None):
- return None
- class ParallelCoordinatesViz(BaseViz):
- """Interactive parallel coordinate implementation
- Uses this amazing javascript library
- https://github.com/syntagmatic/parallel-coordinates
- """
- viz_type = 'para'
- verbose_name = _('Parallel Coordinates')
- credits = (
- '<a href="https://syntagmatic.github.io/parallel-coordinates/">'
- "Syntagmatic's library</a>")
- is_timeseries = False
- def query_obj(self):
- d = super(ParallelCoordinatesViz, self).query_obj()
- fd = self.form_data
- d['groupby'] = [fd.get('series')]
- return d
- def get_data(self, df):
- return df.to_dict(orient='records')
- class HeatmapViz(BaseViz):
- """A nice heatmap visualization that support high density through canvas"""
- viz_type = 'heatmap'
- verbose_name = _('Heatmap')
- is_timeseries = False
- credits = (
- 'inspired from mbostock @<a href="http://bl.ocks.org/mbostock/3074470">'
- 'bl.ocks.org</a>')
- def query_obj(self):
- d = super(HeatmapViz, self).query_obj()
- fd = self.form_data
- d['metrics'] = [fd.get('metric')]
- d['groupby'] = [fd.get('all_columns_x'), fd.get('all_columns_y')]
- return d
- def get_data(self, df):
- fd = self.form_data
- x = fd.get('all_columns_x')
- y = fd.get('all_columns_y')
- v = self.metric_labels[0]
- if x == y:
- df.columns = ['x', 'y', 'v']
- else:
- df = df[[x, y, v]]
- df.columns = ['x', 'y', 'v']
- norm = fd.get('normalize_across')
- overall = False
- max_ = df.v.max()
- min_ = df.v.min()
- if norm == 'heatmap':
- overall = True
- else:
- gb = df.groupby(norm, group_keys=False)
- if len(gb) <= 1:
- overall = True
- else:
- df['perc'] = (
- gb.apply(
- lambda x: (x.v - x.v.min()) / (x.v.max() - x.v.min()))
- )
- df['rank'] = gb.apply(lambda x: x.v.rank(pct=True))
- if overall:
- df['perc'] = (df.v - min_) / (max_ - min_)
- df['rank'] = df.v.rank(pct=True)
- return {
- 'records': df.to_dict(orient='records'),
- 'extents': [min_, max_],
- }
- class HorizonViz(NVD3TimeSeriesViz):
- """Horizon chart
- https://www.npmjs.com/package/d3-horizon-chart
- """
- viz_type = 'horizon'
- verbose_name = _('Horizon Charts')
- credits = (
- '<a href="https://www.npmjs.com/package/d3-horizon-chart">'
- 'd3-horizon-chart</a>')
- class MapboxViz(BaseViz):
- """Rich maps made with Mapbox"""
- viz_type = 'mapbox'
- verbose_name = _('Mapbox')
- is_timeseries = False
- credits = (
- '<a href=https://www.mapbox.com/mapbox-gl-js/api/>Mapbox GL JS</a>')
- def query_obj(self):
- d = super(MapboxViz, self).query_obj()
- fd = self.form_data
- label_col = fd.get('mapbox_label')
- if not fd.get('groupby'):
- d['columns'] = [fd.get('all_columns_x'), fd.get('all_columns_y')]
- if label_col and len(label_col) >= 1:
- if label_col[0] == 'count':
- raise Exception(_(
- "Must have a [Group By] column to have 'count' as the [Label]"))
- d['columns'].append(label_col[0])
- if fd.get('point_radius') != 'Auto':
- d['columns'].append(fd.get('point_radius'))
- d['columns'] = list(set(d['columns']))
- else:
- # Ensuring columns chosen are all in group by
- if (label_col and len(label_col) >= 1 and
- label_col[0] != 'count' and
- label_col[0] not in fd.get('groupby')):
- raise Exception(_(
- 'Choice of [Label] must be present in [Group By]'))
- if (fd.get('point_radius') != 'Auto' and
- fd.get('point_radius') not in fd.get('groupby')):
- raise Exception(_(
- 'Choice of [Point Radius] must be present in [Group By]'))
- if (fd.get('all_columns_x') not in fd.get('groupby') or
- fd.get('all_columns_y') not in fd.get('groupby')):
- raise Exception(_(
- '[Longitude] and [Latitude] columns must be present in [Group By]'))
- return d
- def get_data(self, df):
- if df is None:
- return None
- fd = self.form_data
- label_col = fd.get('mapbox_label')
- has_custom_metric = label_col is not None and len(label_col) > 0
- metric_col = [None] * len(df.index)
- if has_custom_metric:
- if label_col[0] == fd.get('all_columns_x'):
- metric_col = df[fd.get('all_columns_x')]
- elif label_col[0] == fd.get('all_columns_y'):
- metric_col = df[fd.get('all_columns_y')]
- else:
- metric_col = df[label_col[0]]
- point_radius_col = (
- [None] * len(df.index)
- if fd.get('point_radius') == 'Auto'
- else df[fd.get('point_radius')])
- # using geoJSON formatting
- geo_json = {
- 'type': 'FeatureCollection',
- 'features': [
- {
- 'type': 'Feature',
- 'properties': {
- 'metric': metric,
- 'radius': point_radius,
- },
- 'geometry': {
- 'type': 'Point',
- 'coordinates': [lon, lat],
- },
- }
- for lon, lat, metric, point_radius
- in zip(
- df[fd.get('all_columns_x')],
- df[fd.get('all_columns_y')],
- metric_col, point_radius_col)
- ],
- }
- x_series, y_series = df[fd.get('all_columns_x')], df[fd.get('all_columns_y')]
- south_west = [x_series.min(), y_series.min()]
- north_east = [x_series.max(), y_series.max()]
- return {
- 'geoJSON': geo_json,
- 'hasCustomMetric': has_custom_metric,
- 'mapboxApiKey': config.get('MAPBOX_API_KEY'),
- 'mapStyle': fd.get('mapbox_style'),
- 'aggregatorName': fd.get('pandas_aggfunc'),
- 'clusteringRadius': fd.get('clustering_radius'),
- 'pointRadiusUnit': fd.get('point_radius_unit'),
- 'globalOpacity': fd.get('global_opacity'),
- 'bounds': [south_west, north_east],
- 'renderWhileDragging': fd.get('render_while_dragging'),
- 'tooltip': fd.get('rich_tooltip'),
- 'color': fd.get('mapbox_color'),
- }
- class DeckGLMultiLayer(BaseViz):
- """Pile on multiple DeckGL layers"""
- viz_type = 'deck_multi'
- verbose_name = _('Deck.gl - Multiple Layers')
- is_timeseries = False
- credits = '<a href="https://uber.github.io/deck.gl/">deck.gl</a>'
- def query_obj(self):
- return None
- def get_data(self, df):
- fd = self.form_data
- # Late imports to avoid circular import issues
- from superset.models.core import Slice
- from superset import db
- slice_ids = fd.get('deck_slices')
- slices = db.session.query(Slice).filter(Slice.id.in_(slice_ids)).all()
- return {
- 'mapboxApiKey': config.get('MAPBOX_API_KEY'),
- 'slices': [slc.data for slc in slices],
- }
- class BaseDeckGLViz(BaseViz):
- """Base class for deck.gl visualizations"""
- is_timeseries = False
- credits = '<a href="https://uber.github.io/deck.gl/">deck.gl</a>'
- spatial_control_keys = []
- def handle_nulls(self, df):
- return df
- def get_metrics(self):
- self.metric = self.form_data.get('size')
- return [self.metric] if self.metric else []
- def process_spatial_query_obj(self, key, group_by):
- group_by.extend(self.get_spatial_columns(key))
- def get_spatial_columns(self, key):
- spatial = self.form_data.get(key)
- if spatial is None:
- raise ValueError(_('Bad spatial key'))
- if spatial.get('type') == 'latlong':
- return [spatial.get('lonCol'), spatial.get('latCol')]
- elif spatial.get('type') == 'delimited':
- return [spatial.get('lonlatCol')]
- elif spatial.get('type') == 'geohash':
- return [spatial.get('geohashCol')]
- @staticmethod
- def parse_coordinates(s):
- if not s:
- return None
- try:
- p = Point(s)
- except Exception:
- raise SpatialException(
- _('Invalid spatial point encountered: %s' % s))
- return (p.latitude, p.longitude)
- @staticmethod
- def reverse_geohash_decode(geohash_code):
- lat, lng = geohash.decode(geohash_code)
- return (lng, lat)
- @staticmethod
- def reverse_latlong(df, key):
- df[key] = [
- tuple(reversed(o))
- for o in df[key]
- if isinstance(o, (list, tuple))
- ]
- def process_spatial_data_obj(self, key, df):
- spatial = self.form_data.get(key)
- if spatial is None:
- raise ValueError(_('Bad spatial key'))
- if spatial.get('type') == 'latlong':
- df[key] = list(zip(
- pd.to_numeric(df[spatial.get('lonCol')], errors='coerce'),
- pd.to_numeric(df[spatial.get('latCol')], errors='coerce'),
- ))
- elif spatial.get('type') == 'delimited':
- lon_lat_col = spatial.get('lonlatCol')
- df[key] = df[lon_lat_col].apply(self.parse_coordinates)
- del df[lon_lat_col]
- elif spatial.get('type') == 'geohash':
- df[key] = df[spatial.get('geohashCol')].map(self.reverse_geohash_decode)
- del df[spatial.get('geohashCol')]
- if spatial.get('reverseCheckbox'):
- self.reverse_latlong(df, key)
- if df.get(key) is None:
- raise NullValueException(_('Encountered invalid NULL spatial entry, \
- please consider filtering those out'))
- return df
- def add_null_filters(self):
- fd = self.form_data
- spatial_columns = set()
- for key in self.spatial_control_keys:
- for column in self.get_spatial_columns(key):
- spatial_columns.add(column)
- if fd.get('adhoc_filters') is None:
- fd['adhoc_filters'] = []
- line_column = fd.get('line_column')
- if line_column:
- spatial_columns.add(line_column)
- for column in sorted(spatial_columns):
- filter_ = to_adhoc({
- 'col': column,
- 'op': 'IS NOT NULL',
- 'val': '',
- })
- fd['adhoc_filters'].append(filter_)
- def query_obj(self):
- fd = self.form_data
- # add NULL filters
- if fd.get('filter_nulls', True):
- self.add_null_filters()
- d = super(BaseDeckGLViz, self).query_obj()
- gb = []
- for key in self.spatial_control_keys:
- self.process_spatial_query_obj(key, gb)
- if fd.get('dimension'):
- gb += [fd.get('dimension')]
- if fd.get('js_columns'):
- gb += fd.get('js_columns')
- metrics = self.get_metrics()
- gb = list(set(gb))
- if metrics:
- d['groupby'] = gb
- d['metrics'] = metrics
- d['columns'] = []
- else:
- d['columns'] = gb
- return d
- def get_js_columns(self, d):
- cols = self.form_data.get('js_columns') or []
- return {col: d.get(col) for col in cols}
- def get_data(self, df):
- if df is None:
- return None
- # Processing spatial info
- for key in self.spatial_control_keys:
- df = self.process_spatial_data_obj(key, df)
- features = []
- for d in df.to_dict(orient='records'):
- feature = self.get_properties(d)
- extra_props = self.get_js_columns(d)
- if extra_props:
- feature['extraProps'] = extra_props
- features.append(feature)
- return {
- 'features': features,
- 'mapboxApiKey': config.get('MAPBOX_API_KEY'),
- 'metricLabels': self.metric_labels,
- }
- def get_properties(self, d):
- raise NotImplementedError()
- class DeckScatterViz(BaseDeckGLViz):
- """deck.gl's ScatterLayer"""
- viz_type = 'deck_scatter'
- verbose_name = _('Deck.gl - Scatter plot')
- spatial_control_keys = ['spatial']
- is_timeseries = True
- def query_obj(self):
- fd = self.form_data
- self.is_timeseries = bool(
- fd.get('time_grain_sqla') or fd.get('granularity'))
- self.point_radius_fixed = (
- fd.get('point_radius_fixed') or {'type': 'fix', 'value': 500})
- return super(DeckScatterViz, self).query_obj()
- def get_metrics(self):
- self.metric = None
- if self.point_radius_fixed.get('type') == 'metric':
- self.metric = self.point_radius_fixed.get('value')
- return [self.metric]
- return None
- def get_properties(self, d):
- return {
- 'metric': d.get(self.metric_label),
- 'radius': self.fixed_value if self.fixed_value else d.get(self.metric_label),
- 'cat_color': d.get(self.dim) if self.dim else None,
- 'position': d.get('spatial'),
- DTTM_ALIAS: d.get(DTTM_ALIAS),
- }
- def get_data(self, df):
- fd = self.form_data
- self.metric_label = \
- self.get_metric_label(self.metric) if self.metric else None
- self.point_radius_fixed = fd.get('point_radius_fixed')
- self.fixed_value = None
- self.dim = self.form_data.get('dimension')
- if self.point_radius_fixed.get('type') != 'metric':
- self.fixed_value = self.point_radius_fixed.get('value')
- return super(DeckScatterViz, self).get_data(df)
- class DeckScreengrid(BaseDeckGLViz):
- """deck.gl's ScreenGridLayer"""
- viz_type = 'deck_screengrid'
- verbose_name = _('Deck.gl - Screen Grid')
- spatial_control_keys = ['spatial']
- is_timeseries = True
- def query_obj(self):
- fd = self.form_data
- self.is_timeseries = fd.get('time_grain_sqla') or fd.get('granularity')
- return super(DeckScreengrid, self).query_obj()
- def get_properties(self, d):
- return {
- 'position': d.get('spatial'),
- 'weight': d.get(self.metric_label) or 1,
- '__timestamp': d.get(DTTM_ALIAS) or d.get('__time'),
- }
- def get_data(self, df):
- self.metric_label = self.get_metric_label(self.metric)
- return super(DeckScreengrid, self).get_data(df)
- class DeckGrid(BaseDeckGLViz):
- """deck.gl's DeckLayer"""
- viz_type = 'deck_grid'
- verbose_name = _('Deck.gl - 3D Grid')
- spatial_control_keys = ['spatial']
- def get_properties(self, d):
- return {
- 'position': d.get('spatial'),
- 'weight': d.get(self.metric_label) or 1,
- }
- def get_data(self, df):
- self.metric_label = self.get_metric_label(self.metric)
- return super(DeckGrid, self).get_data(df)
- def geohash_to_json(geohash_code):
- p = geohash.bbox(geohash_code)
- return [
- [p.get('w'), p.get('n')],
- [p.get('e'), p.get('n')],
- [p.get('e'), p.get('s')],
- [p.get('w'), p.get('s')],
- [p.get('w'), p.get('n')],
- ]
- class DeckPathViz(BaseDeckGLViz):
- """deck.gl's PathLayer"""
- viz_type = 'deck_path'
- verbose_name = _('Deck.gl - Paths')
- deck_viz_key = 'path'
- deser_map = {
- 'json': json.loads,
- 'polyline': polyline.decode,
- 'geohash': geohash_to_json,
- }
- def query_obj(self):
- d = super(DeckPathViz, self).query_obj()
- line_col = self.form_data.get('line_column')
- if d['metrics']:
- self.has_metrics = True
- d['groupby'].append(line_col)
- else:
- self.has_metrics = False
- d['columns'].append(line_col)
- return d
- def get_properties(self, d):
- fd = self.form_data
- line_type = fd.get('line_type')
- deser = self.deser_map[line_type]
- line_column = fd.get('line_column')
- path = deser(d[line_column])
- if fd.get('reverse_long_lat'):
- path = [(o[1], o[0]) for o in path]
- d[self.deck_viz_key] = path
- if line_type != 'geohash':
- del d[line_column]
- return d
- class DeckPolygon(DeckPathViz):
- """deck.gl's Polygon Layer"""
- viz_type = 'deck_polygon'
- deck_viz_key = 'polygon'
- verbose_name = _('Deck.gl - Polygon')
- class DeckHex(BaseDeckGLViz):
- """deck.gl's DeckLayer"""
- viz_type = 'deck_hex'
- verbose_name = _('Deck.gl - 3D HEX')
- spatial_control_keys = ['spatial']
- def get_properties(self, d):
- return {
- 'position': d.get('spatial'),
- 'weight': d.get(self.metric_label) or 1,
- }
- def get_data(self, df):
- self.metric_label = self.get_metric_label(self.metric)
- return super(DeckHex, self).get_data(df)
- class DeckGeoJson(BaseDeckGLViz):
- """deck.gl's GeoJSONLayer"""
- viz_type = 'deck_geojson'
- verbose_name = _('Deck.gl - GeoJSON')
- def query_obj(self):
- d = super(DeckGeoJson, self).query_obj()
- d['columns'] += [self.form_data.get('geojson')]
- d['metrics'] = []
- d['groupby'] = []
- return d
- def get_properties(self, d):
- geojson = d.get(self.form_data.get('geojson'))
- return json.loads(geojson)
- class DeckArc(BaseDeckGLViz):
- """deck.gl's Arc Layer"""
- viz_type = 'deck_arc'
- verbose_name = _('Deck.gl - Arc')
- spatial_control_keys = ['start_spatial', 'end_spatial']
- is_timeseries = True
- def query_obj(self):
- fd = self.form_data
- self.is_timeseries = bool(
- fd.get('time_grain_sqla') or fd.get('granularity'))
- return super(DeckArc, self).query_obj()
- def get_properties(self, d):
- dim = self.form_data.get('dimension')
- return {
- 'sourcePosition': d.get('start_spatial'),
- 'targetPosition': d.get('end_spatial'),
- 'cat_color': d.get(dim) if dim else None,
- DTTM_ALIAS: d.get(DTTM_ALIAS),
- }
- def get_data(self, df):
- d = super(DeckArc, self).get_data(df)
- return {
- 'features': d['features'],
- 'mapboxApiKey': config.get('MAPBOX_API_KEY'),
- }
- class EventFlowViz(BaseViz):
- """A visualization to explore patterns in event sequences"""
- viz_type = 'event_flow'
- verbose_name = _('Event flow')
- credits = 'from <a href="https://github.com/williaster/data-ui">@data-ui</a>'
- is_timeseries = True
- def query_obj(self):
- query = super(EventFlowViz, self).query_obj()
- form_data = self.form_data
- event_key = form_data.get('all_columns_x')
- entity_key = form_data.get('entity')
- meta_keys = [
- col for col in form_data.get('all_columns')
- if col != event_key and col != entity_key
- ]
- query['columns'] = [event_key, entity_key] + meta_keys
- if form_data['order_by_entity']:
- query['orderby'] = [(entity_key, True)]
- return query
- def get_data(self, df):
- return df.to_dict(orient='records')
- class PairedTTestViz(BaseViz):
- """A table displaying paired t-test values"""
- viz_type = 'paired_ttest'
- verbose_name = _('Time Series - Paired t-test')
- sort_series = False
- is_timeseries = True
- def get_data(self, df):
- """
- Transform received data frame into an object of the form:
- {
- 'metric1': [
- {
- groups: ('groupA', ... ),
- values: [ {x, y}, ... ],
- }, ...
- ], ...
- }
- """
- fd = self.form_data
- groups = fd.get('groupby')
- metrics = fd.get('metrics')
- df.fillna(0)
- df = df.pivot_table(
- index=DTTM_ALIAS,
- columns=groups,
- values=metrics)
- cols = []
- # Be rid of falsey keys
- for col in df.columns:
- if col == '':
- cols.append('N/A')
- elif col is None:
- cols.append('NULL')
- else:
- cols.append(col)
- df.columns = cols
- data = {}
- series = df.to_dict('series')
- for nameSet in df.columns:
- # If no groups are defined, nameSet will be the metric name
- hasGroup = not isinstance(nameSet, str)
- Y = series[nameSet]
- d = {
- 'group': nameSet[1:] if hasGroup else 'All',
- 'values': [
- {'x': t, 'y': Y[t] if t in Y else None}
- for t in df.index
- ],
- }
- key = nameSet[0] if hasGroup else nameSet
- if key in data:
- data[key].append(d)
- else:
- data[key] = [d]
- return data
- class RoseViz(NVD3TimeSeriesViz):
- viz_type = 'rose'
- verbose_name = _('Time Series - Nightingale Rose Chart')
- sort_series = False
- is_timeseries = True
- def get_data(self, df):
- data = super(RoseViz, self).get_data(df)
- result = {}
- for datum in data:
- key = datum['key']
- for val in datum['values']:
- timestamp = val['x'].value
- if not result.get(timestamp):
- result[timestamp] = []
- value = 0 if math.isnan(val['y']) else val['y']
- result[timestamp].append({
- 'key': key,
- 'value': value,
- 'name': ', '.join(key) if isinstance(key, list) else key,
- 'time': val['x'],
- })
- return result
- class PartitionViz(NVD3TimeSeriesViz):
- """
- A hierarchical data visualization with support for time series.
- """
- viz_type = 'partition'
- verbose_name = _('Partition Diagram')
- def query_obj(self):
- query_obj = super(PartitionViz, self).query_obj()
- time_op = self.form_data.get('time_series_option', 'not_time')
- # Return time series data if the user specifies so
- query_obj['is_timeseries'] = time_op != 'not_time'
- return query_obj
- def levels_for(self, time_op, groups, df):
- """
- Compute the partition at each `level` from the dataframe.
- """
- levels = {}
- for i in range(0, len(groups) + 1):
- agg_df = df.groupby(groups[:i]) if i else df
- levels[i] = (
- agg_df.mean() if time_op == 'agg_mean'
- else agg_df.sum(numeric_only=True))
- return levels
- def levels_for_diff(self, time_op, groups, df):
- # Obtain a unique list of the time grains
- times = list(set(df[DTTM_ALIAS]))
- times.sort()
- until = times[len(times) - 1]
- since = times[0]
- # Function describing how to calculate the difference
- func = {
- 'point_diff': [
- pd.Series.sub,
- lambda a, b, fill_value: a - b,
- ],
- 'point_factor': [
- pd.Series.div,
- lambda a, b, fill_value: a / float(b),
- ],
- 'point_percent': [
- lambda a, b, fill_value=0: a.div(b, fill_value=fill_value) - 1,
- lambda a, b, fill_value: a / float(b) - 1,
- ],
- }[time_op]
- agg_df = df.groupby(DTTM_ALIAS).sum()
- levels = {0: pd.Series({
- m: func[1](agg_df[m][until], agg_df[m][since], 0)
- for m in agg_df.columns})}
- for i in range(1, len(groups) + 1):
- agg_df = df.groupby([DTTM_ALIAS] + groups[:i]).sum()
- levels[i] = pd.DataFrame({
- m: func[0](agg_df[m][until], agg_df[m][since], fill_value=0)
- for m in agg_df.columns})
- return levels
- def levels_for_time(self, groups, df):
- procs = {}
- for i in range(0, len(groups) + 1):
- self.form_data['groupby'] = groups[:i]
- df_drop = df.drop(groups[i:], 1)
- procs[i] = self.process_data(df_drop, aggregate=True).fillna(0)
- self.form_data['groupby'] = groups
- return procs
- def nest_values(self, levels, level=0, metric=None, dims=()):
- """
- Nest values at each level on the back-end with
- access and setting, instead of summing from the bottom.
- """
- if not level:
- return [{
- 'name': m,
- 'val': levels[0][m],
- 'children': self.nest_values(levels, 1, m),
- } for m in levels[0].index]
- if level == 1:
- return [{
- 'name': i,
- 'val': levels[1][metric][i],
- 'children': self.nest_values(levels, 2, metric, (i,)),
- } for i in levels[1][metric].index]
- if level >= len(levels):
- return []
- return [{
- 'name': i,
- 'val': levels[level][metric][dims][i],
- 'children': self.nest_values(
- levels, level + 1, metric, dims + (i,),
- ),
- } for i in levels[level][metric][dims].index]
- def nest_procs(self, procs, level=-1, dims=(), time=None):
- if level == -1:
- return [{
- 'name': m,
- 'children': self.nest_procs(procs, 0, (m,)),
- } for m in procs[0].columns]
- if not level:
- return [{
- 'name': t,
- 'val': procs[0][dims[0]][t],
- 'children': self.nest_procs(procs, 1, dims, t),
- } for t in procs[0].index]
- if level >= len(procs):
- return []
- return [{
- 'name': i,
- 'val': procs[level][dims][i][time],
- 'children': self.nest_procs(procs, level + 1, dims + (i,), time),
- } for i in procs[level][dims].columns]
- def get_data(self, df):
- fd = self.form_data
- groups = fd.get('groupby', [])
- time_op = fd.get('time_series_option', 'not_time')
- if not len(groups):
- raise ValueError('Please choose at least one groupby')
- if time_op == 'not_time':
- levels = self.levels_for('agg_sum', groups, df)
- elif time_op in ['agg_sum', 'agg_mean']:
- levels = self.levels_for(time_op, groups, df)
- elif time_op in ['point_diff', 'point_factor', 'point_percent']:
- levels = self.levels_for_diff(time_op, groups, df)
- elif time_op == 'adv_anal':
- procs = self.levels_for_time(groups, df)
- return self.nest_procs(procs)
- else:
- levels = self.levels_for('agg_sum', [DTTM_ALIAS] + groups, df)
- return self.nest_values(levels)
- viz_types = {
- o.viz_type: o for o in globals().values()
- if (
- inspect.isclass(o) and
- issubclass(o, BaseViz) and
- o.viz_type not in config.get('VIZ_TYPE_BLACKLIST'))}
|