Source code for gismo.post_processing

from gismo.clustering import get_sim


[docs]def post_documents_item_raw(gismo, i): """ Document indice to document entry Parameters ---------- gismo: Gismo Gismo instance i : int document indice Returns ------- object Document i from corpus """ return gismo.corpus[i]
[docs]def post_documents_item_content(gismo, i, max_size=None): """ Document indice to document content. Assumes that document has a 'content' key. Parameters ---------- gismo: Gismo Gismo instance i : int document indice max_size: int, optional Maximum number of chars to return Returns ------- str Content of document i from corpus """ return gismo.corpus[i]['content'][:max_size]
[docs]def post_features_item_raw(gismo, i): """ Feature indice to feature name Parameters ---------- gismo: Gismo Gismo instance i : int feature indice Returns ------- str Feature i from embedding """ return gismo.embedding.features[i]
[docs]def post_documents_cluster_json(gismo, cluster): """ Convert cluster of documents into basic json Parameters ---------- gismo: Gismo Gismo instance cluster: Cluster Cluster of documents Returns ------- dict dictionary with keys 'document', 'focus', and recursive 'children' """ return {'document': gismo.corpus[cluster.indice], 'focus': cluster.focus, 'children': [post_documents_cluster_json(gismo, c) for c in cluster.children]}
[docs]def post_documents_cluster_print(gismo, cluster, post_item=None, depth=""): """ Print an ASCII view of a document cluster with metrics (focus, relevance, similarity) Parameters ---------- gismo: Gismo Gismo instance cluster: Cluster Cluster of documents post_item: function, optional Post-processing function for individual documents depth: str, optional Current depth string used in recursion """ if post_item is None: post_item = gismo.post_documents_item sim = get_sim(cluster.vector, gismo.diteration.y_relevance) if len(cluster.children) == 0: txt = post_item(gismo, cluster.indice) print(f"{depth} {txt} " f"(R: {gismo.diteration.x_relevance[cluster.indice]:.2f}; " f"S: {sim:.2f})") else: print(f"{depth} F: {cluster.focus:.2f}. " f"R: {sum(gismo.diteration.x_relevance[cluster.members]):.2f}. " f"S: {sim:.2f}.") for c in cluster.children: post_documents_cluster_print(gismo, c, post_item=post_item, depth=depth + '-')
[docs]def post_features_cluster_json(gismo, cluster): """ Convert feature cluster into basic json Parameters ---------- gismo: Gismo Gismo instance cluster: Cluster Cluster of features Returns ------- dict dictionary with keys 'feature', 'focus', and recursive 'children' """ return {'feature': gismo.embedding.features[cluster.indice], 'focus': cluster.focus, 'children': [post_features_cluster_json(gismo, c) for c in cluster.children]}
[docs]def post_features_cluster_print(gismo, cluster, post_item=None, depth=""): """ Print an ASCII view of a feature cluster with metrics (focus, relevance, similarity) Parameters ---------- gismo: Gismo Gismo instance cluster: Cluster Cluster of features post_item: function, optional Post-processing function for individual features depth: str, optional Current depth string used in recursion """ if post_item is None: post_item = gismo.post_features_item sim = get_sim(cluster.vector, gismo.diteration.x_relevance) if len(cluster.children) == 0: print(f"{depth} {post_item(gismo, cluster.indice)} " f"(R: {gismo.diteration.y_relevance[cluster.indice]:.2f}; " f"S: {sim:.2f})") else: print(f"{depth} F: {cluster.focus:.2f}. " f"R: {sum(gismo.diteration.y_relevance[cluster.members]):.2f}. " f"S: {sim:.2f}.") for c in cluster.children: post_features_cluster_print(gismo, c, depth=depth + '-')
[docs]def post_landmarks_item_raw(landmark, i): """ Default post processor for individual landmarks. Parameters ---------- landmark: Landmarks A Landmarks instance i: int Indice of the landmark to process. Returns ------- object The landmark of indice i. """ return landmark[i]
[docs]def post_landmarks_cluster_json(landmark, cluster): """ Default post processor for a cluster of landmarks. Parameters ---------- landmark: Landmarks A Landmarks instance cluster: Cluster Cluster of the landmarks to process. Returns ------- dict A dict with the head landmark, cluster focus, and list of children. """ return {'landmark': landmark[cluster.indice], 'focus': cluster.focus, 'children': [post_landmarks_cluster_json(landmark, child) for child in cluster.children]}
[docs]def post_landmarks_cluster_print(landmark, cluster, post_item=None, depth=""): """ ASCII display post processor for a cluster of landmarks. Parameters ---------- landmark: Landmarks A Landmarks instance cluster: Cluster Cluster of the landmarks to process. post_item: function, optional Post-processing function for individual landmarks depth: str, optional Current depth string used in recursion """ if post_item is None: post_item = landmark.post_item if len(cluster.children) == 0: txt = post_item(landmark, cluster.indice) print(f"{depth} {txt} ") else: print(f"{depth} F: {cluster.focus:.2f}. ") for c in cluster.children: post_landmarks_cluster_print(landmark, c, post_item=post_item, depth=depth + '-')