tranlv · pradeepjnayak · Jun 6, 2019 · Jun 10, 2019 · Jun 10, 2019 · May 27, 2020
diff --git a/__init__.py b/__init__.py
diff --git a/wikilink/wiki_link.py b/wikilink/wiki_link.py
@@ -25,7 +25,8 @@
 from .db import Page
 from .db import Link
 
-__author__ = "Tran Ly Vu ([email protected])"
+
+__author__ = "Tran Ly Vu ([email protected]), Pradeep Nayak ([email protected]"
 __copyright__ = "Copyright (c) 2016 - 2019 Tran Ly Vu. All Rights Reserved."
 __credits__ = ["Tranlyvu"]
 __license__ = "Apache License 2.0"
@@ -396,6 +397,48 @@ def _worker(self, execution_queue, storage_queue, separation_queue, event):
 						storage_queue.put(n)
 						separation_queue.put(number_of_sep + 1)
 
+	def print_path(self):
+		""" 
+			Function prints the sequence of paths from source to destination 
+			with the shortest number of link.
+		Args:
+			None
+		Returns:
+			None
+		"""
+		dest_node = LinkNode(self.dest_id)
+
+		successful_paths = []
+
+		with self._session_scope() as session:
+			def find_route_to_parent(node, source_root_id, depth=0, session=session):
+				parents = get_all_my_parents(node.node_id, session)
+				parent_ids = get_parent_id_list(parents)
+				depth += 1
+				if source_root_id in parent_ids and depth < self.limit:
+					node.path.append(node.node_id)
+					node.path.append(source_root_id) 
+					successful_paths.append(node.path)
+					return True
+
+				if depth > self.limit:
+					return
+
+				for every_parent in parents:
+					every_parent.path.extend(node.path)
+					every_parent.path.append(node.node_id)
+					if find_route_to_parent(every_parent, source_root_id, depth):
+						break
+
+			#recursively backtrack from dest id to source id
+			find_route_to_parent(dest_node, self.source_id)
+
+			success_path_url = [] #store url paths from page id
+			if successful_paths:
+				success_path_url = get_url_path_from_page_ids(session,min(successful_paths, key=len))
+				print("{}".format("=>".join(success_path_url)))
+
+			return
 
 def _scraper(session, url_id):
 
@@ -525,3 +568,32 @@ def _insert_link(session, from_page_id, to_page_id, no_of_separation):
 			"Many rows found in DB to store link from {} to {}\
 			 with number of seperation {}".format(from_page_id, to_page_id,
 		 								          no_of_separation))
+
+
+class LinkNode(object):
+    def __init__(self, node_id):
+        self.node_id = node_id
+        self.path = [] #parent urls
+
+def get_all_my_parents(node_id, session):
+    parent_list = []
+	try:
+		for every_parent in session.query(Link).filter(Link.to_page_id==node_id,Link.number_of_separation==1):
+			parent_node = LinkNode(every_parent.from_page_id)
+			parent_list.append(parent_node)
+	except DisconnectionError:
+		raise DisconnectionError("There is error with DB connection")
+    return parent_list
+
+
+def get_parent_id_list(parent_list):
+    return [parent_node.node_id for parent_node in parent_list]
+
+
+def get_url_path_from_page_ids(session, page_ids_list):
+    path_url = []
+    for page_id in page_ids_list:
+        page_url = session.query(Page).filter(Page.id == page_id).first()
+        path_url.append(page_url.url)
+    path_url.reverse()
+    return path_url