You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

trie.py 3.0 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Wed Jan 30 10:48:49 2019
  5. Trie (prefix tree)
  6. @author: ljia
  7. @references:
  8. https://viblo.asia/p/nlp-build-a-trie-data-structure-from-scratch-with-python-3P0lPzroKox, 2019.1
  9. """
  10. import pickle
  11. import json
  12. """ Trie class
  13. """
  14. class Trie:
  15. # init Trie class
  16. def __init__(self):
  17. self.root = self.getNode()
  18. def getNode(self):
  19. return {"isEndOfWord": False, "children": {}}
  20. def insertWord(self, word):
  21. current = self.root
  22. for ch in word:
  23. if ch in current["children"]:
  24. node = current["children"][ch]
  25. else:
  26. node = self.getNode()
  27. current["children"][ch] = node
  28. current = node
  29. current["isEndOfWord"] = True
  30. if 'count' in current:
  31. current['count'] += 1
  32. else:
  33. current['count'] = 1
  34. def searchWord(self, word):
  35. current = self.root
  36. for ch in word:
  37. if ch not in current["children"]:
  38. return 0
  39. node = current["children"][ch]
  40. current = node
  41. if 'count' in current:
  42. return current["count"]
  43. else:
  44. return 0
  45. def searchWordPrefix(self, word):
  46. current = self.root
  47. for ch in word:
  48. if not current["children"].has_key(ch):
  49. return False
  50. node = current["children"][ch]
  51. current = node
  52. # return True if children contain keys and values
  53. return bool(current["children"])
  54. def deleteWord(self, word):
  55. self._delete(self.root, word, 0)
  56. def _delete(self, current, word, index):
  57. if(index == len(word)):
  58. if not current["isEndOfWord"]:
  59. return False
  60. current["isEndOfWord"] = False
  61. return len(current["children"].keys()) == 0
  62. ch = word[index]
  63. if not current["children"].has_key(ch):
  64. return False
  65. node = current["children"][ch]
  66. should_delete_current_node = self._delete(node, word, index + 1)
  67. if should_delete_current_node:
  68. current["children"].pop(ch)
  69. return len(current["children"].keys()) == 0
  70. return False
  71. def save_to_pickle(self, file_name):
  72. f = open(file_name + ".pkl", "wb")
  73. pickle.dump(self.root, f)
  74. f.close()
  75. def load_from_pickle(self, file_name):
  76. f = open(file_name + ".pkl", "rb")
  77. self.root = pickle.load(f)
  78. f.close()
  79. def to_json(self):
  80. return json.dump(self.root)
  81. def save_to_json(self, file_name):
  82. json_data = json.dumps(self.root)
  83. f = open(file_name + ".json", "w")
  84. f.write(json_data)
  85. f.close()
  86. def load_from_json(self, file_name):
  87. json_file = open(file_name + ".json", "r")
  88. self.root = json.load(json_file)
  89. json_file.close()

A Python package for graph kernels, graph edit distances and graph pre-image problem.