You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

trie.py 3.0 kB

5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Wed Jan 30 10:48:49 2019
  5. Trie (prefix tree)
  6. @author: ljia
  7. @references:
  8. `NLP: Build a Trie Data structure from scratch with python <https://viblo.asia/p/nlp-build-a-trie-data-structure-from-scratch-with-python-3P0lPzroKox>`__, 2019.1
  9. """
  10. import pickle
  11. import json
  12. """ Trie class
  13. """
  14. class Trie:
  15. """
  16. """
  17. # init Trie class
  18. def __init__(self):
  19. self.root = self.getNode()
  20. def getNode(self):
  21. return {"isEndOfWord": False, "children": {}}
  22. def insertWord(self, word):
  23. current = self.root
  24. for ch in word:
  25. if ch in current["children"]:
  26. node = current["children"][ch]
  27. else:
  28. node = self.getNode()
  29. current["children"][ch] = node
  30. current = node
  31. current["isEndOfWord"] = True
  32. if 'count' in current:
  33. current['count'] += 1
  34. else:
  35. current['count'] = 1
  36. def searchWord(self, word):
  37. current = self.root
  38. for ch in word:
  39. if ch not in current["children"]:
  40. return 0
  41. node = current["children"][ch]
  42. current = node
  43. if 'count' in current:
  44. return current["count"]
  45. else:
  46. return 0
  47. def searchWordPrefix(self, word):
  48. current = self.root
  49. for ch in word:
  50. if not current["children"].has_key(ch):
  51. return False
  52. node = current["children"][ch]
  53. current = node
  54. # return True if children contain keys and values
  55. return bool(current["children"])
  56. def deleteWord(self, word):
  57. self._delete(self.root, word, 0)
  58. def _delete(self, current, word, index):
  59. if(index == len(word)):
  60. if not current["isEndOfWord"]:
  61. return False
  62. current["isEndOfWord"] = False
  63. return len(current["children"].keys()) == 0
  64. ch = word[index]
  65. if not current["children"].has_key(ch):
  66. return False
  67. node = current["children"][ch]
  68. should_delete_current_node = self._delete(node, word, index + 1)
  69. if should_delete_current_node:
  70. current["children"].pop(ch)
  71. return len(current["children"].keys()) == 0
  72. return False
  73. def save_to_pickle(self, file_name):
  74. f = open(file_name + ".pkl", "wb")
  75. pickle.dump(self.root, f)
  76. f.close()
  77. def load_from_pickle(self, file_name):
  78. f = open(file_name + ".pkl", "rb")
  79. self.root = pickle.load(f)
  80. f.close()
  81. def to_json(self):
  82. return json.dump(self.root)
  83. def save_to_json(self, file_name):
  84. json_data = json.dumps(self.root)
  85. f = open(file_name + ".json", "w")
  86. f.write(json_data)
  87. f.close()
  88. def load_from_json(self, file_name):
  89. json_file = open(file_name + ".json", "r")
  90. self.root = json.load(json_file)
  91. json_file.close()

A Python package for graph kernels, graph edit distances and graph pre-image problem.