pinot.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
  1. # Licensed to the Apache Software Foundation (ASF) under one
  2. # or more contributor license agreements. See the NOTICE file
  3. # distributed with this work for additional information
  4. # regarding copyright ownership. The ASF licenses this file
  5. # to you under the Apache License, Version 2.0 (the
  6. # "License"); you may not use this file except in compliance
  7. # with the License. You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing,
  12. # software distributed under the License is distributed on an
  13. # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  14. # KIND, either express or implied. See the License for the
  15. # specific language governing permissions and limitations
  16. # under the License.
  17. import datetime
  18. from typing import Dict, List, Optional
  19. from sqlalchemy.sql.expression import ColumnClause, ColumnElement
  20. from superset.db_engine_specs.base import BaseEngineSpec, TimestampExpression
  21. class PinotEngineSpec(BaseEngineSpec): # pylint: disable=abstract-method
  22. engine = "pinot"
  23. allows_subqueries = False
  24. allows_joins = False
  25. allows_column_aliases = False
  26. # Pinot does its own conversion below
  27. _time_grain_functions: Dict[Optional[str], str] = {
  28. "PT1S": "1:SECONDS",
  29. "PT1M": "1:MINUTES",
  30. "PT1H": "1:HOURS",
  31. "P1D": "1:DAYS",
  32. "P1W": "1:WEEKS",
  33. "P1M": "1:MONTHS",
  34. "P0.25Y": "3:MONTHS",
  35. "P1Y": "1:YEARS",
  36. }
  37. _python_to_java_time_patterns: Dict[str, str] = {
  38. "%Y": "yyyy",
  39. "%m": "MM",
  40. "%d": "dd",
  41. "%H": "HH",
  42. "%M": "mm",
  43. "%S": "ss",
  44. }
  45. @classmethod
  46. def get_timestamp_expr(
  47. cls, col: ColumnClause, pdf: Optional[str], time_grain: Optional[str]
  48. ) -> TimestampExpression:
  49. is_epoch = pdf in ("epoch_s", "epoch_ms")
  50. # The DATETIMECONVERT pinot udf is documented at
  51. # Per https://github.com/apache/incubator-pinot/wiki/dateTimeConvert-UDF
  52. # We are not really converting any time units, just bucketing them.
  53. tf = ""
  54. if not is_epoch:
  55. try:
  56. today = datetime.datetime.today()
  57. today.strftime(str(pdf))
  58. except ValueError:
  59. raise ValueError(f"Invalid column datetime format:{str(pdf)}")
  60. java_date_format = str(pdf)
  61. for (
  62. python_pattern,
  63. java_pattern,
  64. ) in cls._python_to_java_time_patterns.items():
  65. java_date_format.replace(python_pattern, java_pattern)
  66. tf = f"1:SECONDS:SIMPLE_DATE_FORMAT:{java_date_format}"
  67. else:
  68. seconds_or_ms = "MILLISECONDS" if pdf == "epoch_ms" else "SECONDS"
  69. tf = f"1:{seconds_or_ms}:EPOCH"
  70. granularity = cls.get_time_grain_functions().get(time_grain)
  71. if not granularity:
  72. raise NotImplementedError("No pinot grain spec for " + str(time_grain))
  73. # In pinot the output is a string since there is no timestamp column like pg
  74. time_expr = f'DATETIMECONVERT({{col}}, "{tf}", "{tf}", "{granularity}")'
  75. return TimestampExpression(time_expr, col)
  76. @classmethod
  77. def make_select_compatible(
  78. cls, groupby_exprs: Dict[str, ColumnElement], select_exprs: List[ColumnElement]
  79. ) -> List[ColumnElement]:
  80. # Pinot does not want the group by expr's to appear in the select clause
  81. select_sans_groupby = []
  82. # We want identity and not equality, so doing the filtering manually
  83. for sel in select_exprs:
  84. for gr in groupby_exprs:
  85. if sel is gr:
  86. break
  87. else:
  88. select_sans_groupby.append(sel)
  89. return select_sans_groupby