Elasticsearch Hadoop installation on top of hadoop?
I install Hadoop and ELK stack. now i want to install ESHADOOP on a top of hadoop so i can fetch data from elastic search and store in Es-hadoop. please if any one have the idea how to do it tell me?
See also questions close to this topic
-
Elasticsearch: No feature for name [myitem]
I get the following info about the index:
curl -XGET 'http://XXX.XX.XX.XX:9200/_cat/indices?v' health status index pri rep docs.count docs.deleted store.size pri.store.size yellow open news_abc 1 1 8983616 3008123 6.5gb 6.5gb
It seems that there is a data. Then I want to retrieve some data. For this I run this code:
curl -XGET 'http://XXX.XX.XX.XX:9200/news_abc'
and get the following result:
{"news_abc":{"aliases":{},"mappings":{"myitem":{"include_in_all":false,"properties":{"audience":{"type":"long","store":true,"include_in_all":false},"author":{"type":"integer","store":true,"include_in_all":false},"author_signature":{"type":"string","store":true,"analyzer":"generic_text_field","include_in_all":false},"body":{"type":"string","store":true,"term_vector":"with_positions_offsets","analyzer":"generic_text_field","include_in_all":false},"channel_name":{"type":"string","store":true,"fields":{"sort":{"type":"string","analyzer":"sortable_text_field"}},"analyzer":"generic_text_field","include_in_all":false},"client":{"type":"integer","store":true,"include_in_all":false},"cluster":{"type":"string","index":"not_analyzed","store":true,"include_in_all":false},"creation_time":{"type":"date","store":true,"format":"date_time","include_in_all":false},"expanded_outbound_link":{"type":"string","store":true,"fields":{"raw":{"type":"string","index":"not_analyzed"}},"analyzer":"url_text_field","include_in_all":false},"hashtag":{"type":"string","store":true,"analyzer":"lowercase_keyword","include_in_all":false},"is_supplement":{"type":"boolean","store":true},"klout_score":{"type":"short","store":true,"include_in_all":false},"language_id":{"type":"string","index":"not_analyzed","store":true,"include_in_all":false},"media_type":{"type":"string","index":"not_analyzed","store":true,"include_in_all":false},"project":{"type":"integer","store":true,"include_in_all":false},"publication_date":{"type":"date","store":true,"format":"date_time","include_in_all":false},"publishing_platform":{"type":"string","index":"not_analyzed","store":true,"include_in_all":false},"remote_code":{"type":"string","index":"not_analyzed","store":true,"include_in_all":false},"remote_media_code":{"type":"string","index":"not_analyzed","store":true,"include_in_all":false},"scope":{"type":"integer","store":true,"include_in_all":false},"source_country_id":{"type":"string","index":"not_analyzed","store":true,"include_in_all":false},"source_name":{"type":"string","store":true,"fields":{"sort":{"type":"string","analyzer":"sortable_text_field"}},"analyzer":"generic_text_field","include_in_all":false},"source_rank":{"type":"integer","store":true,"include_in_all":false},"source_region_id":{"type":"string","index":"not_analyzed","store":true,"include_in_all":false},"source_url":{"type":"string","store":true,"term_vector":"with_positions","analyzer":"url_text_field","include_in_all":false},"summary":{"type":"string","store":true,"term_vector":"with_positions_offsets","analyzer":"generic_text_field","include_in_all":false},"title":{"type":"string","store":true,"fields":{"sort":{"type":"string","analyzer":"sortable_truncated_text_field"}},"analyzer":"generic_text_field","include_in_all":false},"twitter_in_reply_to_status_id":{"type":"long","store":true,"include_in_all":false},"twitter_in_reply_to_user":{"type":"string","store":true,"analyzer":"lowercase_keyword","include_in_all":false},"twitter_in_reply_to_user_id":{"type":"long","store":true,"include_in_all":false},"twitter_mentioned_user":{"type":"string","store":true,"analyzer":"lowercase_keyword","include_in_all":false},"twitter_mentioned_user_id":{"type":"long","store":true,"include_in_all":false},"twitter_quoted_status_id":{"type":"long","store":true,"include_in_all":false},"twitter_quoted_user":{"type":"string","store":true,"analyzer":"lowercase_keyword","include_in_all":false},"twitter_quoted_user_id":{"type":"long","store":true,"include_in_all":false},"twitter_retweet_of_status_id":{"type":"long","store":true,"include_in_all":false},"twitter_retweeted_user":{"type":"string","store":true,"analyzer":"lowercase_keyword","include_in_all":false},"twitter_retweeted_user_id":{"type":"long","store":true,"include_in_all":false},"twitter_status_id":{"type":"long","store":true,"include_in_all":false},"twitter_user":{"type":"string","store":true,"analyzer":"lowercase_keyword","include_in_all":false},"twitter_user_id":{"type":"long","store":true,"include_in_all":false},"typology":{"type":"string","index":"not_analyzed","store":true,"include_in_all":false},"uri_hash":{"type":"short","store":true,"include_in_all":false},"url":{"type":"string","store":true,"term_vector":"with_positions","analyzer":"url_text_field","include_in_all":false}}},"annotation":{"include_in_all":false,"_parent":{"type":"myitem","fielddata":{"loading":"eager_global_ordinals"}},"_routing":{"required":true},"properties":{"category":{"type":"integer","store":true,"include_in_all":false},"category_annotations":{"type":"nested","include_in_root":true,"include_in_all":false,"properties":{"category":{"type":"integer","store":true,"include_in_all":false},"category_value":{"type":"integer","store":true,"include_in_all":false},"creation_time":{"type":"date","store":true,"format":"date_time","include_in_all":false},"is_automatic":{"type":"boolean","store":true},"rule_id":{"type":"integer","store":true,"include_in_all":false},"tone":{"type":"short","store":true,"include_in_all":false},"user_id":{"type":"integer","store":true,"include_in_all":false}}},"category_value":{"type":"integer","store":true,"include_in_all":false},"client":{"type":"integer","store":true,"include_in_all":false},"creation_time":{"type":"date","store":true,"format":"date_time","include_in_all":false},"draft":{"type":"boolean","store":true},"entity":{"type":"integer","store":true,"include_in_all":false},"is_automatic":{"type":"boolean","store":true},"project":{"type":"integer","store":true,"include_in_all":false},"publication_date":{"type":"date","store":true,"format":"date_time","include_in_all":false},"rule_id":{"type":"integer","store":true,"include_in_all":false},"user_id":{"type":"integer","store":true,"include_in_all":false}}},"project_category_annotation":{"include_in_all":false,"_parent":{"type":"status","fielddata":{"loading":"eager_global_ordinals"}},"_routing":{"required":true},"properties":{"annotation_rule":{"type":"integer","store":true,"include_in_all":false},"category":{"type":"integer","store":true,"include_in_all":false},"category_value":{"type":"integer","store":true,"include_in_all":false},"creation_time":{"type":"date","store":true,"format":"date_time","include_in_all":false},"origin":{"type":"integer","store":true,"include_in_all":false}}},"status":{"include_in_all":false,"_parent":{"type":"myitem","fielddata":{"loading":"eager_global_ordinals"}},"_routing":{"required":true},"properties":{"auto_status":{"type":"byte","store":true,"include_in_all":false},"client":{"type":"integer","store":true,"include_in_all":false},"confidence":{"type":"float","store":true,"include_in_all":false},"control_item":{"type":"boolean","store":true},"creation_time":{"type":"date","store":true,"format":"date_time","include_in_all":false},"origin":{"type":"integer","store":true,"include_in_all":false},"project":{"type":"integer","store":true,"include_in_all":false},"publication_date":{"type":"date","store":true,"format":"date_time","include_in_all":false},"reason":{"type":"short","store":true,"include_in_all":false},"sample_rule":{"type":"integer","store":true,"include_in_all":false},"status":{"type":"integer","store":true,"include_in_all":false},"validated":{"type":"boolean","store":true}}}},"settings":{"index":{"number_of_shards":"1","mapper":{"dynamic":"false"},"creation_date":"1482908398811","analysis":{"filter":{"sortable_truncate":{"length":"10","type":"truncate"},"spanish_stemmer":{"type":"stemmer","language":"light_spanish"},"spanish_stop":{"type":"stop","stopwords":"_spanish_"},"custom_icu_folding":{"type":"icu_folding","unicodeSetFilter":"[^åäöÅÄÖñÑ]"}},"analyzer":{"url_text_field":{"filter":["lowercase"],"pattern":"#|&|\\+|\\,|-|\\.|/|:|\\;|=|\\?|_","type":"pattern","stopwords":["http","https","ftp","www"],"tokenizer":"keyword"},"sortable_truncated_text_field":{"filter":["standard","lowercase","sortable_truncate","custom_icu_folding"],"char_filter":["html_strip"],"type":"custom","tokenizer":"keyword"},"spanish_unstemmed":{"filter":["lowercase"],"char_filter":["html_strip"],"type":"custom","stopwords":"_none_","tokenizer":"standard"},"lowercase_keyword":{"filter":["lowercase"],"type":"custom","tokenizer":"keyword"},"generic_text_field":{"filter":["standard","lowercase","custom_icu_folding"],"char_filter":["html_strip"],"type":"custom","tokenizer":"standard"},"spanish_html_strip":{"filter":["lowercase","spanish_stop","spanish_stemmer"],"char_filter":["html_strip"],"type":"custom","tokenizer":"standard"},"sortable_text_field":{"filter":["standard","lowercase","custom_icu_folding"],"char_filter":["html_strip"],"type":"custom","tokenizer":"keyword"}}},"number_of_replicas":"1","uuid":"j3gp_JcIR6q8HyLUT4sSfg","version":{"created":"2020199"}}},"warmers":{}}}
However, I want to obtain the data from the mapping
myitem
:curl -XGET 'http://XXX.XX.XX.XX:9200/news_abc/myitem'
Error:
{"error":{"root_cause":[{"type":"illegal_argument_exception","reason":"No feature for name [myitem]"}],"type":"illegal_argument_exception","reason":"No feature for name [myitem]"},"status":400}
-
ElasticSearch Specific Split series Aggregation
I am new to Elastic Search.
I have this visualization which shows the total nº of requests and the split series which takes a key and split the bar on different values of a
keyword.Now
what I want is only to split on a specific value of that key.And remove all the items which do not have that entry from the visualization Please refer the screenshot.
Here I want only the bars with that purple tip on top and not the light blue ones. Nothing is working. Nor the son input neither the exclude pattern fields please help
I have only this section to edit.I can't alter the data of the search.
-
Laravel how to send requests to an elasticsearch node?
I'm trying to send request from my laravel application to my elasticsearch node to get an index but the application return Elasticsearch \ Common \ Exceptions \ Forbidden403Exception (403)
This is my php code:
$client = ClientBuilder::create()->build(); $params = ['index' =>'$message']; $response = $client->indices()->getSettings($params); print_r($response);
-
How to login to real time company hadoop cluster
I am new to hadoop environment . I was joined in a company and was given KT and required documents for project . They asked me to login into cluster and start work immediately. Can any one suggest steps in login
-
How does namenode recognize changes in namespace after checkpointing
After checkpointing in hadoop, the new fsimage is copied back to Namenode and the previous Editlogs are truncated. My question is how would the namenode know any changes done after the new fsimage is created because it continues to have older Fsimage in memory while the editlogs were truncated during checkpointing.
It is only after a Namenode restart that the new fsimage is loaded in the memory, so how would a namenode recognize a change when during the checkpointing a new fsimage is constructed which is not in memory.
-
Hive UDAF: "ClassCastException: java.lang.String cannot be cast to org.apache.hadoop.hive.serde2.lazy.LazyString"
As a learning exercise, I'm writing a Hive UDAF that is similar to the built in
collect_set
but instead of returning only the distinct items, I also want to return the associated counts (so returning a map instead of set).When running this in my pseudo-distributed Hadoop 2.8.2 setup, I run into the following error and have no idea what's causing it. Can someone be a hero and help me understand the fix for this?
I'm including all the relevant info below. Thank you so much for your time!
The query:
select collect_count(words) from word_table;
The error message:
Vertex killed, vertexName=Reducer 2, vertexId=vertex_1524519000431_0001_1_01, diagnostics=[Vertex received Kill while in RUNNING state., Vertex did not succeed due to OTHER_VERTEX_FAILURE, failedTasks:0 killedTasks:1, Vertex vertex_1524519000431_0001_1_01 [Reducer 2] killed/failed due to:OTHER_VERTEX_FAILURE] DAG did not succeed due to VERTEX_FAILURE. failedVertices:1 killedVertices:1 FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.tez.TezTask. Vertex failed, vertexName=Map 1, vertexId=vertex_1524519000431_0001_1_00, diagnostics=[Task failed, taskId=task_1524519000431_0001_1_00_000000, diagnostics=[TaskAttempt 0 failed, info=[Error: Error while running task ( failure ) : attempt_1524519000431_0001_1_00_000000_0:java.lang.RuntimeException: java.lang.RuntimeException: Hive Runtime Error while closing operators at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:211) at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:168) at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:374) at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73) at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1836) at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61) at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37) at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Caused by: java.lang.RuntimeException: Hive Runtime Error while closing operators at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.close(MapRecordProcessor.java:488) at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:199) ... 14 more Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: org.apache.hadoop.hive.ql.metadata.HiveException: org.apache.hadoop.hive.ql.metadata.HiveException: java.lang.ClassCastException: java.lang.String cannot be cast to org.apache.hadoop.hive.serde2.lazy.LazyString at org.apache.hadoop.hive.ql.exec.GroupByOperator.closeOp(GroupByOperator.java:1126) at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697) at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:711) at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:711) at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:711) at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.close(MapRecordProcessor.java:464) ... 15 more Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: org.apache.hadoop.hive.ql.metadata.HiveException: java.lang.ClassCastException: java.lang.String cannot be cast to org.apache.hadoop.hive.serde2.lazy.LazyString at org.apache.hadoop.hive.ql.exec.GroupByOperator.flush(GroupByOperator.java:1084) at org.apache.hadoop.hive.ql.exec.GroupByOperator.closeOp(GroupByOperator.java:1123) ... 20 more Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: java.lang.ClassCastException: java.lang.String cannot be cast to org.apache.hadoop.hive.serde2.lazy.LazyString at org.apache.hadoop.hive.ql.exec.ReduceSinkOperator.process(ReduceSinkOperator.java:397) at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:897) at org.apache.hadoop.hive.ql.exec.GroupByOperator.forward(GroupByOperator.java:1047) at org.apache.hadoop.hive.ql.exec.GroupByOperator.flush(GroupByOperator.java:1067) ... 21 more Caused by: java.lang.ClassCastException: java.lang.String cannot be cast to org.apache.hadoop.hive.serde2.lazy.LazyString at org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyStringObjectInspector.getPrimitiveWritableObject(LazyStringObjectInspector.java:51) at org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe.serialize(LazyBinarySerDe.java:473) at org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe.serialize(LazyBinarySerDe.java:641) at org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe.serializeStruct(LazyBinarySerDe.java:283) at org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe.serializeStruct(LazyBinarySerDe.java:243) at org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe.serialize(LazyBinarySerDe.java:205) at org.apache.hadoop.hive.ql.exec.ReduceSinkOperator.makeValueWritable(ReduceSinkOperator.java:534) at org.apache.hadoop.hive.ql.exec.ReduceSinkOperator.process(ReduceSinkOperator.java:376) ... 24 more
The code:
public class CollectCountUDAF extends AbstractGenericUDAFResolver { private static Logger LOG = LoggerFactory.getLogger(CollectCountUDAF.class); public static int DEFAULT_MAX_VALUES = 10; @Override public GenericUDAFEvaluator getEvaluator(TypeInfo[] params) throws SemanticException { if (params.length != 1) { throw new UDFArgumentTypeException(params.length - 1, "One argument is expected: the column to collect distinct value counts from"); } return new CollectCountEvaluator(); } public static class CollectCountEvaluator extends GenericUDAFEvaluator { int numValues = DEFAULT_MAX_VALUES; // For PARTIAL1 and COMPLETE: ObjectInspectors for original data private PrimitiveObjectInspector inputOI; // For PARTIAL2 and FINAL: ObjectInspectors for partial aggregations (list of objs) private StandardMapObjectInspector mapOI; public CollectCountEvaluator(){ } public ObjectInspector init(Mode m, ObjectInspector[] params) throws HiveException { super.init(m, params); // init input object inspectors if (m == Mode.PARTIAL1 || m == Mode.COMPLETE) { inputOI = (PrimitiveObjectInspector) params[0]; } else { mapOI = (StandardMapObjectInspector) params[0]; } // init output object inspectors if (m == Mode.PARTIAL1 || m == Mode.PARTIAL2) { // The output of a partial aggregation is a map of {value: count} // with "value" being of the same type as the original input column, // and count being an integer return ObjectInspectorFactory.getStandardMapObjectInspector(inputOI, PrimitiveObjectInspectorFactory.writableIntObjectInspector); } else { // // The output of FINAL and COMPLETE are also a map of {value: count} return ObjectInspectorFactory.getStandardMapObjectInspector(inputOI, PrimitiveObjectInspectorFactory.writableIntObjectInspector); } } @Override public void iterate(AggregationBuffer agg, Object[] params) throws HiveException { Object p = params[0]; LOG.info("iterating: [{}]", p.toString()); if (p != null) { addToBuffer(agg, p); } } @Override public Object terminatePartial(AggregationBuffer agg) throws HiveException { LOG.info("terminatePartial"); return ((CountAggBuffer) agg).serialize(); } @Override public Object terminate(AggregationBuffer agg) throws HiveException { LOG.info("terminate"); return ((CountAggBuffer) agg).serialize(); } @Override public void merge(AggregationBuffer agg, Object partial) throws HiveException { LOG.info("merge"); Map<Object, IntWritable> other = (HashMap<Object, IntWritable>) mapOI.getMap(partial); ((CountAggBuffer)agg).mergePartial(other); } private void addToBuffer(AggregationBuffer agg, Object obj){ CountAggBuffer buff = (CountAggBuffer) agg; buff.add(ObjectInspectorUtils.copyToStandardObject(obj, inputOI).toString()); } static class CountAggBuffer extends AbstractAggregationBuffer { Map<String, Integer> counts = Maps.newHashMap(); public void clear(){ counts.clear(); } public void add(String key){ counts.merge(key, 1, Integer::sum); } public Map<Object, IntWritable> serialize(){ Map<Object, IntWritable> m = Maps.newHashMap(); counts.forEach((k,v) -> m.put(k, new IntWritable(counts.get(k)))); return m; //return Maps.asMap(counts.keySet(), k -> new IntWritable(counts.get(k))); } public void mergePartial(Map<Object, IntWritable> partialCounts){ // merge the partialCount from serialize() partialCounts.forEach( (k, pCount) -> counts.merge(k.toString(), pCount.get(), Integer::sum) ); } } @Override public AggregationBuffer getNewAggregationBuffer() throws HiveException { return new CountAggBuffer(); } @Override public void reset(AggregationBuffer buff) throws HiveException { ((CountAggBuffer) buff).clear(); } } }