TemporalGraphOperators.java

  1. /*
  2.  * Copyright © 2014 - 2021 Leipzig University (Database Research Group)
  3.  *
  4.  * Licensed under the Apache License, Version 2.0 (the "License");
  5.  * you may not use this file except in compliance with the License.
  6.  * You may obtain a copy of the License at
  7.  *
  8.  *     http://www.apache.org/licenses/LICENSE-2.0
  9.  *
  10.  * Unless required by applicable law or agreed to in writing, software
  11.  * distributed under the License is distributed on an "AS IS" BASIS,
  12.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13.  * See the License for the specific language governing permissions and
  14.  * limitations under the License.
  15.  */
  16. package org.gradoop.temporal.model.api;

  17. import org.gradoop.flink.model.api.epgm.BaseGraphOperators;
  18. import org.gradoop.flink.model.api.functions.AggregateFunction;
  19. import org.gradoop.flink.model.api.functions.KeyFunction;
  20. import org.gradoop.flink.model.impl.epgm.LogicalGraph;
  21. import org.gradoop.flink.model.impl.operators.keyedgrouping.KeyedGrouping;
  22. import org.gradoop.flink.model.impl.operators.matching.common.MatchStrategy;
  23. import org.gradoop.temporal.model.api.functions.TemporalPredicate;
  24. import org.gradoop.temporal.model.impl.TemporalGraph;
  25. import org.gradoop.temporal.model.impl.TemporalGraphCollection;
  26. import org.gradoop.temporal.model.impl.functions.predicates.AsOf;
  27. import org.gradoop.temporal.model.impl.functions.predicates.Between;
  28. import org.gradoop.temporal.model.impl.functions.predicates.ContainedIn;
  29. import org.gradoop.temporal.model.impl.functions.predicates.CreatedIn;
  30. import org.gradoop.temporal.model.impl.functions.predicates.DeletedIn;
  31. import org.gradoop.temporal.model.impl.functions.predicates.FromTo;
  32. import org.gradoop.temporal.model.impl.functions.predicates.ValidDuring;
  33. import org.gradoop.temporal.model.impl.operators.aggregation.functions.MaxEdgeTime;
  34. import org.gradoop.temporal.model.impl.operators.aggregation.functions.MaxVertexTime;
  35. import org.gradoop.temporal.model.impl.operators.aggregation.functions.MinEdgeTime;
  36. import org.gradoop.temporal.model.impl.operators.aggregation.functions.MinVertexTime;
  37. import org.gradoop.temporal.model.impl.operators.diff.Diff;
  38. import org.gradoop.temporal.model.impl.operators.matching.common.query.postprocessing.CNFPostProcessing;
  39. import org.gradoop.temporal.model.impl.operators.matching.common.statistics.TemporalGraphStatistics;
  40. import org.gradoop.temporal.model.impl.operators.matching.common.statistics.dummy.DummyTemporalGraphStatistics;
  41. import org.gradoop.temporal.model.impl.operators.matching.single.cypher.CypherTemporalPatternMatching;
  42. import org.gradoop.temporal.model.impl.operators.snapshot.Snapshot;
  43. import org.gradoop.temporal.model.impl.operators.verify.VerifyAndUpdateEdgeValidity;
  44. import org.gradoop.temporal.model.impl.pojo.TemporalEdge;
  45. import org.gradoop.temporal.model.impl.pojo.TemporalGraphHead;
  46. import org.gradoop.temporal.model.impl.pojo.TemporalVertex;

  47. import java.util.ArrayList;
  48. import java.util.List;

  49. /**
  50.  * Defines the operators that are available on a {@link TemporalGraph}.
  51.  */
  52. public interface TemporalGraphOperators extends BaseGraphOperators<TemporalGraphHead, TemporalVertex,
  53.   TemporalEdge, TemporalGraph, TemporalGraphCollection> {

  54.   //----------------------------------------------------------------------------
  55.   // Unary Operators
  56.   //----------------------------------------------------------------------------

  57.   /**
  58.    * Extracts a snapshot of this temporal graph using a given temporal predicate. The predicate is applied
  59.    * on the valid time dimension by default. To use the transaction time dimension, use
  60.    * {@link TemporalGraphOperators#snapshot(TemporalPredicate, TimeDimension)} instead.
  61.    * This operator will calculate the subgraph induced by the predicate.
  62.    *
  63.    * @param predicate the temporal predicate to apply on the valid times
  64.    * @return the snapshot as a temporal graph
  65.    */
  66.   default TemporalGraph snapshot(TemporalPredicate predicate) {
  67.     return snapshot(predicate, TimeDimension.VALID_TIME);
  68.   }

  69.   /**
  70.    * Extracts a snapshot of this temporal graph using a given temporal predicate. The predicate is applied
  71.    * on the given time dimension.
  72.    * This operator will calculate the subgraph induced by the predicate.
  73.    *
  74.    * @param predicate the temporal predicate to apply
  75.    * @param dimension the dimension that is used
  76.    * @return the snapshot as a temporal graph
  77.    */
  78.   default TemporalGraph snapshot(TemporalPredicate predicate, TimeDimension dimension) {
  79.     return callForGraph(new Snapshot(predicate, dimension));
  80.   }

  81.   /**
  82.    * Extracts a snapshot of this temporal graph using the temporal predicate {@code AS OF timestamp}
  83.    * where {@code timestamp} is a timestamp in milliseconds.
  84.    *
  85.    * @param timestamp the timestamp in milliseconds to query
  86.    * @return the snapshot as a temporal graph
  87.    * @see Snapshot
  88.    * @see AsOf
  89.    */
  90.   default TemporalGraph asOf(long timestamp) {
  91.     return snapshot(new AsOf(timestamp));
  92.   }

  93.   /**
  94.    * Extracts a snapshot of this temporal graph using the temporal predicate
  95.    * {@code FROM fromTimestamp TO toTimestamp} where both values are timestamps in milliseconds.
  96.    *
  97.    * @param fromTimestamp the from timestamp in milliseconds to query
  98.    * @param toTimestamp   the to timestamp in milliseconds to query
  99.    * @return the snapshot as a temporal graph
  100.    * @see Snapshot
  101.    * @see FromTo
  102.    */
  103.   default TemporalGraph fromTo(long fromTimestamp, long toTimestamp) {
  104.     return snapshot(new FromTo(fromTimestamp, toTimestamp));
  105.   }

  106.   /**
  107.    * Extracts a snapshot of this temporal graph using the temporal predicate
  108.    * {@code BETWEEN fromTimestamp AND toTimestamp} where both values are timestamps in milliseconds.
  109.    *
  110.    * @param fromTimestamp the from timestamp in milliseconds to query
  111.    * @param toTimestamp   the to timestamp in milliseconds to query
  112.    * @return the snapshot as a temporal graph
  113.    * @see Snapshot
  114.    * @see Between
  115.    */
  116.   default TemporalGraph between(long fromTimestamp, long toTimestamp) {
  117.     return snapshot(new Between(fromTimestamp, toTimestamp));
  118.   }

  119.   /**
  120.    * Extracts a snapshot of this temporal graph using the temporal predicate
  121.    * {@code CONTAINED IN (fromTimestamp, toTimestamp)} where both values are timestamps in milliseconds.
  122.    *
  123.    * @param fromTimestamp the from timestamp in milliseconds to query
  124.    * @param toTimestamp   the to timestamp in milliseconds to query
  125.    * @return the snapshot as a temporal graph
  126.    * @see Snapshot
  127.    * @see ContainedIn
  128.    */
  129.   default TemporalGraph containedIn(long fromTimestamp, long toTimestamp) {
  130.     return snapshot(new ContainedIn(fromTimestamp, toTimestamp));
  131.   }

  132.   /**
  133.    * Extracts a snapshot of this temporal graph using the temporal predicate
  134.    * {@code VALID DURING (fromTimestamp, toTimestamp)} where both values are timestamps in milliseconds.
  135.    *
  136.    * @param fromTimestamp the from timestamp in milliseconds to query
  137.    * @param toTimestamp   the to timestamp in milliseconds to query
  138.    * @return the snapshot as a temporal graph
  139.    * @see Snapshot
  140.    * @see ValidDuring
  141.    */
  142.   default TemporalGraph validDuring(long fromTimestamp, long toTimestamp) {
  143.     return snapshot(new ValidDuring(fromTimestamp, toTimestamp));
  144.   }

  145.   /**
  146.    * Extracts a snapshot of this temporal graph using the temporal predicate
  147.    * {@code CREATED IN (fromTimestamp, toTimestamp)} where both values are timestamps in milliseconds.
  148.    *
  149.    * @param fromTimestamp the from timestamp in milliseconds to query
  150.    * @param toTimestamp   the to timestamp in milliseconds to query
  151.    * @return the snapshot as a temporal graph
  152.    * @see Snapshot
  153.    * @see CreatedIn
  154.    */
  155.   default TemporalGraph createdIn(long fromTimestamp, long toTimestamp) {
  156.     return snapshot(new CreatedIn(fromTimestamp, toTimestamp));
  157.   }

  158.   /**
  159.    * Extracts a snapshot of this temporal graph using the temporal predicate
  160.    * {@code DELETED IN (fromTimestamp, toTimestamp)} where both values are timestamps in milliseconds.
  161.    *
  162.    * @param fromTimestamp the from timestamp in milliseconds to query
  163.    * @param toTimestamp   the to timestamp in milliseconds to query
  164.    * @return the snapshot as a temporal graph
  165.    * @see Snapshot
  166.    * @see DeletedIn
  167.    */
  168.   default TemporalGraph deletedIn(long fromTimestamp, long toTimestamp) {
  169.     return snapshot(new DeletedIn(fromTimestamp, toTimestamp));
  170.   }

  171.   /**
  172.    * Compares two snapshots of this graph. Given two temporal predicates, this operation
  173.    * will check if a graph element (vertex or edge) was added, removed or persists in the second
  174.    * snapshot compared to the first snapshot. The predicates are applied on the valid times. To use
  175.    * transaction time dimension, use
  176.    * {@link TemporalGraphOperators#diff(TemporalPredicate, TemporalPredicate, TimeDimension)}.
  177.    * <p>
  178.    * This operation returns the union of both snapshots with the following changes:
  179.    * A property with key {@value Diff#PROPERTY_KEY}
  180.    * will be set on each graph element. Its value will be set to
  181.    * <ul>
  182.    *   <li>{@code 0}, if the element is present in both snapshots.</li>
  183.    *   <li>{@code 1}, if the element is present in the second, but not the first snapshot
  184.    *   (i.e. it was added since the first snapshot).</li>
  185.    *   <li>{@code -1}, if the element is present in the first, but not the second snapshot
  186.    *   (i.e. it was removed since the first snapshot).</li>
  187.    * </ul>
  188.    * Graph elements present in neither snapshot will be discarded.
  189.    * The resulting graph will not be verified, i.e. dangling edges could occur. Use the
  190.    * {@code verify()} operator to validate the graph. The graph head is preserved.
  191.    *
  192.    * @param firstSnapshot  The predicate used to determine the first snapshot.
  193.    * @param secondSnapshot The predicate used to determine the second snapshot.
  194.    * @return A temporal graph containing the union of vertex and edge sets of both snapshots,
  195.    * defined by the given two predicate functions. A property with key
  196.    * {@link Diff#PROPERTY_KEY} is set on each graph element with a numerical value (-1, 0, 1) defined above.
  197.    */
  198.   default TemporalGraph diff(TemporalPredicate firstSnapshot, TemporalPredicate secondSnapshot) {
  199.     return diff(firstSnapshot, secondSnapshot, TimeDimension.VALID_TIME);
  200.   }

  201.   /**
  202.    * Compares two snapshots of this graph. Given two temporal predicates, this operation will check if a
  203.    * graph element (vertex or edge) was added, removed or persists in the second snapshot compared to the
  204.    * first snapshot. The predicates are applied on the specified time dimension.
  205.    * <p>
  206.    * This operation returns the union of both snapshots with the following changes:
  207.    * A property with key {@value Diff#PROPERTY_KEY} will be set on each graph element.
  208.    * Its value will be set to
  209.    * <ul>
  210.    *   <li>{@code 0}, if the element is present in both snapshots.</li>
  211.    *   <li>{@code 1}, if the element is present in the second, but not the first snapshot
  212.    *   (i.e. it was added since the first snapshot).</li>
  213.    *   <li>{@code -1}, if the element is present in the first, but not the second snapshot
  214.    *   (i.e. it was removed since the first snapshot).</li>
  215.    * </ul>
  216.    * Graph elements present in neither snapshot will be discarded.
  217.    * The resulting graph will not be verified, i.e. dangling edges could occur. Use the
  218.    * {@code verify()} operator to validate the graph. The graph head is preserved.
  219.    *
  220.    * @param firstSnapshot  The predicate used to determine the first snapshot.
  221.    * @param secondSnapshot The predicate used to determine the second snapshot.
  222.    * @param dimension      The time dimension that will be considered by the predicates.
  223.    * @return A temporal graph containing the union of vertex and edge sets of both snapshots, defined by the
  224.    * given two predicate functions. A property with key {@link Diff#PROPERTY_KEY} is set on each graph
  225.    * element with a numerical value (-1, 0, 1) defined above.
  226.    */
  227.   default TemporalGraph diff(TemporalPredicate firstSnapshot, TemporalPredicate secondSnapshot,
  228.                              TimeDimension dimension) {
  229.     return callForGraph(new Diff(firstSnapshot, secondSnapshot, dimension));
  230.   }

  231.   /**
  232.    * Evaluates the given query using the Temporal-GDL query engine. The engine uses default morphism
  233.    * strategies, which is vertex homomorphism and edge isomorphism. The vertex and edge data of the graph
  234.    * elements is attached to the resulting vertices.
  235.    * <p>
  236.    * Note, that this method used no statistics about the data graph which may result in bad runtime
  237.    * performance. Use {@link #temporalQuery(String, TemporalGraphStatistics)} to provide statistics for the
  238.    * query planner.
  239.    *
  240.    * @param temporalGdlQuery A Temporal-GDL query as {@link String}
  241.    * @return graph collection containing matching subgraphs
  242.    */
  243.   default TemporalGraphCollection temporalQuery(String temporalGdlQuery) {
  244.     return temporalQuery(temporalGdlQuery, new DummyTemporalGraphStatistics());
  245.   }

  246.   /**
  247.    * Evaluates the given query using the Temporal-GDL query engine. The engine uses default morphism
  248.    * strategies, which is vertex homomorphism and edge isomorphism. The vertex and edge data of the data graph
  249.    * elements is attached to the resulting vertices.
  250.    * <p>
  251.    * Note, that this method used no statistics about the data graph which may result in bad runtime
  252.    * performance. Use {@link #temporalQuery(String, String, TemporalGraphStatistics)} to provide statistics
  253.    * for the query planner.
  254.    * <p>
  255.    * In addition, the operator can be supplied with a construction pattern allowing the creation of new graph
  256.    * elements based on variable bindings of the match pattern. Consider the following example:
  257.    * <br>
  258.    * {@code graph.query(
  259.    * "MATCH (a:Author)-[:WROTE]->(:Paper)<-[:WROTE]-(b:Author) WHERE a <> b",
  260.    * "(a)-[:CO_AUTHOR]->(b)")}
  261.    * <p>
  262.    * The query pattern is looking for pairs of authors that worked on the same paper.
  263.    * The construction pattern defines a new edge of type CO_AUTHOR between the two entities.
  264.    *
  265.    * @param temporalGdlQuery A Temporal-GDL query as {@link String}
  266.    * @param constructionPattern Construction pattern in Temporal-GDL format
  267.    * @return graph collection containing the output of the construct pattern
  268.    */
  269.   default TemporalGraphCollection temporalQuery(String temporalGdlQuery, String constructionPattern) {
  270.     return temporalQuery(temporalGdlQuery, constructionPattern, new DummyTemporalGraphStatistics());
  271.   }

  272.   /**
  273.    * Evaluates the given query using the Temporal-GDL query engine. The engine uses default morphism
  274.    * strategies, which is vertex homomorphism and edge isomorphism. The vertex and edge data of the data graph
  275.    * elements is attached to the resulting vertices.
  276.    *
  277.    * @param temporalGdlQuery A Temporal-GDL query as {@link String}
  278.    * @param graphStatistics statistics about the data graph
  279.    * @return graph collection containing matching subgraphs
  280.    */
  281.   default TemporalGraphCollection temporalQuery(String temporalGdlQuery,
  282.     TemporalGraphStatistics graphStatistics) {
  283.     return temporalQuery(temporalGdlQuery, null, graphStatistics);
  284.   }

  285.   /**
  286.    * Evaluates the given query using the Temporal-GDL query engine. The engine uses default morphism
  287.    * strategies, which is vertex homomorphism and edge isomorphism. The vertex and edge data of the data graph
  288.    * elements is attached to the resulting vertices.
  289.    * <p>
  290.    * In addition, the operator can be supplied with a construction pattern allowing the creation of new graph
  291.    * elements based on variable bindings of the match pattern. Consider the following example:
  292.    * <br>
  293.    * {@code graph.query(
  294.    * "MATCH (a:Author)-[:WROTE]->(:Paper)<-[:WROTE]-(b:Author) WHERE a <> b",
  295.    * "(a)-[:CO_AUTHOR]->(b)")}
  296.    * <p>
  297.    * The query pattern is looking for pairs of authors that worked on the same paper.
  298.    * The construction pattern defines a new edge of type CO_AUTHOR between the two entities.
  299.    *
  300.    * @param temporalGdlQuery A Temporal-GDL query as {@link String}
  301.    * @param constructionPattern Construction pattern in Temporal-GDL format
  302.    * @param graphStatistics Statistics about the data graph
  303.    * @return graph collection containing the output of the construct pattern
  304.    */
  305.   default TemporalGraphCollection temporalQuery(String temporalGdlQuery, String constructionPattern,
  306.     TemporalGraphStatistics graphStatistics) {
  307.     return temporalQuery(temporalGdlQuery, constructionPattern, true, MatchStrategy.HOMOMORPHISM,
  308.       MatchStrategy.ISOMORPHISM, graphStatistics);
  309.   }

  310.   /**
  311.    * Evaluates the given query using the Temporal-GDL query engine.
  312.    *
  313.    * @param temporalGdlQuery A Temporal-GDL query as {@link String}
  314.    * @param constructionPattern Construction pattern in Temporal-GDL format
  315.    * @param attachData attach original vertex and edge data to the result
  316.    * @param vertexStrategy morphism setting for vertex mapping
  317.    * @param edgeStrategy morphism setting for edge mapping
  318.    * @param stats statistics about the data graph
  319.    * @return graph collection containing the output of the construct pattern or a graph collection containing
  320.    *         matching subgraphs if the construction pattern is {@code null}.
  321.    */
  322.   default TemporalGraphCollection temporalQuery(String temporalGdlQuery, String constructionPattern,
  323.     boolean attachData, MatchStrategy vertexStrategy, MatchStrategy edgeStrategy,
  324.     TemporalGraphStatistics stats) {
  325.     return callForCollection(
  326.       new CypherTemporalPatternMatching(temporalGdlQuery, constructionPattern, attachData, vertexStrategy,
  327.         edgeStrategy, stats, new CNFPostProcessing()));
  328.   }

  329.   /**
  330.    * Grouping operator that aggregates valid times per group and sets it as new valid time.
  331.    * The grouped validFrom value will be computed by min over all validFrom values.
  332.    * The grouped validTo value will be computed by max over all validTo values.
  333.    *
  334.    * @param vertexGroupingKeys property keys to group vertices
  335.    * @return summary graph
  336.    * @see KeyedGrouping
  337.    */
  338.   default TemporalGraph temporalGroupBy(List<KeyFunction<TemporalVertex, ?>> vertexGroupingKeys) {
  339.     return temporalGroupBy(vertexGroupingKeys, null);
  340.   }

  341.   /**
  342.    * Grouping operator that aggregates valid times per group and sets it as new valid time.
  343.    * The grouped validFrom value will be computed by min over all validFrom values.
  344.    * The grouped validTo value will be computed by max over all validTo values.
  345.    *
  346.    * @param vertexGroupingKeys property keys to group vertices
  347.    * @param edgeGroupingKeys   property keys to group edges
  348.    * @return summary graph
  349.    * @see KeyedGrouping
  350.    */
  351.   default TemporalGraph temporalGroupBy(List<KeyFunction<TemporalVertex, ?>> vertexGroupingKeys,
  352.     List<KeyFunction<TemporalEdge, ?>> edgeGroupingKeys) {
  353.     return temporalGroupBy(vertexGroupingKeys, new ArrayList<>(), edgeGroupingKeys, new ArrayList<>());
  354.   }

  355.   /**
  356.    * Grouping operator that aggregates valid times per group and sets it as new valid time.
  357.    * The grouped validFrom value will be computed by min over all validFrom values.
  358.    * The grouped validTo value will be computed by max over all validTo values.
  359.    *
  360.    * @param vertexGroupingKeys       property keys to group vertices
  361.    * @param vertexAggregateFunctions aggregate functions to apply on super vertices
  362.    * @param edgeGroupingKeys         property keys to group edges
  363.    * @param edgeAggregateFunctions   aggregate functions to apply on super edges
  364.    * @return summary graph
  365.    * @see KeyedGrouping
  366.    */
  367.   default TemporalGraph temporalGroupBy(List<KeyFunction<TemporalVertex, ?>> vertexGroupingKeys,
  368.     List<AggregateFunction> vertexAggregateFunctions, List<KeyFunction<TemporalEdge, ?>> edgeGroupingKeys,
  369.     List<AggregateFunction> edgeAggregateFunctions) {
  370.     // Add min/max valid time aggregations that will result in the new valid times
  371.     List<AggregateFunction> tempVertexAgg = new ArrayList<>(vertexAggregateFunctions);
  372.     tempVertexAgg.add(new MinVertexTime(TimeDimension.VALID_TIME, TimeDimension.Field.FROM)
  373.       .setAsValidTime(TimeDimension.Field.FROM));
  374.     tempVertexAgg.add(new MaxVertexTime(TimeDimension.VALID_TIME, TimeDimension.Field.TO)
  375.       .setAsValidTime(TimeDimension.Field.TO));
  376.     List<AggregateFunction> tempEdgeAgg = new ArrayList<>(edgeAggregateFunctions);
  377.     tempEdgeAgg.add(new MinEdgeTime(TimeDimension.VALID_TIME, TimeDimension.Field.FROM)
  378.       .setAsValidTime(TimeDimension.Field.FROM));
  379.     tempEdgeAgg.add(new MaxEdgeTime(TimeDimension.VALID_TIME, TimeDimension.Field.TO)
  380.       .setAsValidTime(TimeDimension.Field.TO));

  381.     return callForGraph(new KeyedGrouping<>(vertexGroupingKeys, tempVertexAgg, edgeGroupingKeys,
  382.       tempEdgeAgg));
  383.   }

  384.   //----------------------------------------------------------------------------
  385.   // Utilities
  386.   //----------------------------------------------------------------------------

  387.   /**
  388.    * Converts the {@link TemporalGraph} to a {@link LogicalGraph} instance by discarding all
  389.    * temporal information from the graph elements. All Ids (graphs, vertices, edges) are kept
  390.    * during the transformation.
  391.    *
  392.    * @return the logical graph instance
  393.    */
  394.   LogicalGraph toLogicalGraph();

  395.   /**
  396.    * Updates edges of this graph to set their validity such that an edge is only valid if both of
  397.    * its adjacent vertices are valid at that time. Edges that can never be valid since the
  398.    * validity of both vertices does not overlap, are discarded.<p>
  399.    * Note that this will also remove dangling edges, in the same way {@link #verify()} would.
  400.    *
  401.    * @return This graph with invalid or dangling edges removed.
  402.    */
  403.   default TemporalGraph updateEdgeValidity() {
  404.     return callForGraph(new VerifyAndUpdateEdgeValidity());
  405.   }

  406. }