CREATE QUERY tg_pagerank (
STRING v_type,
STRING e_type,
FLOAT max_change=0.001,
INT max_iter=25,
FLOAT damping=0.85,
INT top_k = 100,
BOOL print_accum = TRUE,
STRING result_attr = "",
STRING file_path = "",
BOOL display_edges = FALSE
)
SYNTAX V1 {
/*
Compute the pageRank score for each vertex in the GRAPH
In each iteration, compute a score for each vertex:
score = (1-damping) + damping*sum(received scores FROM its neighbors).
The pageRank algorithm stops when either of the following is true:
a) it reaches max_iter iterations;
b) the max score change for any vertex compared to the last iteration <= max_change.
v_type: vertex types to traverse print_accum: print JSON output
e_type: edge types to traverse result_attr: INT attr to store results to
max_iter; max #iterations file_path: file to write CSV output to
top_k: #top scores to output display_edges: output edges for visualization
max_change: max allowed change between iterations to achieve convergence
damping: importance of traversal vs. random teleport
This query supports only taking in a single edge for the time being (8/13/2020).
*/
TYPEDEF TUPLE<VERTEX Vertex_ID, FLOAT score> Vertex_Score;
HeapAccum<Vertex_Score>(top_k, score DESC) @@top_scores_heap;
MaxAccum<FLOAT> @@max_diff = 9999; # max score change in an iteration
SumAccum<FLOAT> @sum_recvd_score = 0; # sum of scores each vertex receives FROM neighbors
SumAccum<FLOAT> @sum_score = 1; # initial score for every vertex is 1.
SetAccum<EDGE> @@edge_set; # list of all edges, if display is needed
FILE f (file_path);
# PageRank iterations
Start = {v_type}; # Start with all vertices of specified type(s)
WHILE @@max_diff > max_change LIMIT max_iter
DO @@max_diff = 0;
V = SELECT s FROM Start:s -(e_type:e)- v_type:t
ACCUM t.@sum_recvd_score += s.@sum_score/(s.outdegree(e_type))
POST-ACCUM s.@sum_score = (1.0-damping) + damping * s.@sum_recvd_score,
s.@sum_recvd_score = 0,
@@max_diff += abs(s.@sum_score - s.@sum_score')
;
END; # END WHILE loop
# Output
IF file_path != "" THEN f.println("Vertex_ID", "PageRank"); END;
V = SELECT s FROM Start:s
POST-ACCUM IF result_attr != "" THEN s.setAttr(result_attr, s.@sum_score) END,
IF file_path != "" THEN f.println(s, s.@sum_score) END,
IF print_accum THEN @@top_scores_heap += Vertex_Score(s, s.@sum_score) END
;
IF print_accum THEN PRINT @@top_scores_heap;
IF display_edges THEN PRINT Start[Start.@sum_score];
Start = SELECT s FROM Start:s -(e_type:e)- v_type:t
ACCUM @@edge_set += e;
PRINT @@edge_set;
END;
END;
}