Monday, August 29, 2016

Comparison of Space used between SQL Server and Vertica


Table Size in SQL Server


SELECT
    t.NAME AS TableName,
    s.Name AS SchemaName,
    p.rows AS RowCounts,
    SUM(a.total_pages) * 8 AS TotalSpaceKB,
    SUM(a.used_pages) * 8 AS UsedSpaceKB,
    (SUM(a.total_pages) - SUM(a.used_pages)) * 8 AS UnusedSpaceKB
FROM
    sys.tables t
INNER JOIN    
    sys.indexes i ON t.OBJECT_ID = i.object_id
INNER JOIN
    sys.partitions p ON i.object_id = p.OBJECT_ID AND i.index_id = p.index_id
INNER JOIN
    sys.allocation_units a ON p.partition_id = a.container_id
LEFT OUTER JOIN
    sys.schemas s ON t.schema_id = s.schema_id
WHERE
    t.NAME ='RETENTION_METRICS_Vertica'
    AND t.is_ms_shipped = 0
    AND i.OBJECT_ID > 255
GROUP BY
    t.Name, s.Name, p.Rows
ORDER BY
    t.Name

 








Table Size in Vertica


SELECT t.table_name AS table_name, 
SUM(ps.wos_row_count + ps.ros_row_count) AS row_count,
SUM(ps.wos_used_bytes + ps.ros_used_bytes) AS byte_count,sum(ps.USED_BYTES) as sum_used_bytes
FROM tables t
JOIN projections p ON t.table_id = p.anchor_table_id
JOIN projection_storage ps on p.projection_name = ps.projection_name
WHERE t.table_name ='subscription'
GROUP BY t.table_name
ORDER BY byte_count DESC; 

Tuesday, July 12, 2016

How to load data from HDFS into Vertica

Through Copy Command

Data:(hdfsdata.txt) ( delimited by space)

1 amazon
2 google
3 microsoft

 

drop table if exists test.hdfsdata;

create table  test.hdfsdata(
col1 int, 
col2 varchar(100)
);

hadoop fs -put hdfsdata.txt  /user/tickmoh/test/.


vsql> COPY test.hdfsdata SOURCE Hdfs(url='localhost:50070/webhdfs/v1/user/tickmoh/test/hdfsdata.txt', username='tickmoh')  DELIMITER  AS ' ' NULL AS 'null';

 

Through External table

 
drop table if exists test.hdfsdata_ext;
create external table  test.hdfsdata_ext(
col1 int,
col2 varchar(100)
)
AS COPY SOURCE Hdfs(url='localhost:50070/webhdfs/v1/user/tickmoh/test/hdfsdata.txt', username='tickmoh')  DELIMITER  AS ' ' NULL AS 'null';

 

localhost should be your hadoop host

Thursday, June 30, 2016

Important queries


vsql -U dbadmin -f test200agg_8_6.sql &

SELECT p.PROJECTION_NAME, p.NODE_id,p.IS_UP_TO_DATE, p.HAS_STATISTICS, p.is_super_projection,
ps.USED_BYTES, ps.wos_row_count , ps.ros_row_count ,
ps.wos_used_bytes , ps.ros_used_bytes, ps.ROS_COUNT
FROM tables t
JOIN projections p ON t.table_id = p.anchor_table_id
JOIN projection_storage ps on p.projection_name = ps.projection_name
WHERE t.table_name ilike 'subscription';

drop table if exists test.tt1;
create external table  test.tt1(
col1 int
)
AS COPY SOURCE Hdfs(url='hdfs://sjvqnhdpl010.mcafee.int:50070/user/smohapa1/testfile.txt', username = 'smohapa1',password='smohapa1');


COPY testTable SOURCE Hdfs(url='http://sjvqnhdpl010.mcafee.int:8888/user/smohapa1/testfile.txt',   username='dbadmin');





DROP PROJECTION IF EXISTS sales_p1;
CREATE PROJECTION sales_p1(code,amount,product_id,sales_date)
as
(
select code,amount,product_id,sales_date
from sales
where sales_date>now()-30
) UNSEGMENTED ALL NODES ;
select refresh('sales');
 [Error Code: 5600, SQL State: 42601]  [Vertica][VJDBC](5600) ERROR: Invalid predicate in projection-select. Only PK=FK equijoins are allowed

DROP PROJECTION IF EXISTS sales_p1;
CREATE PROJECTION sales_p1(code,amount,product_id,sales_date)
as
(
select sales_date, count(code) ,sum(amount) 
from sales
group by sales_date
) UNSEGMENTED ALL NODES ;

 ERROR: SEGMENTED BY / UNSEGMENTED is not allowed in aggregate projection. The aggregate projection is automatically segmented on group by columns

Exporting the Sales table script
dbadmin=> select export_tables('/home/dbadmin/scr/sql_tables_empty.sql','sales');

Exporting the Sales table script
dbadmin=> select export_tables('/home/dbadmin/scr/sql_tables_empty.sql','');



Check the table script 



SELECT query_profiles.transaction_id, 
          query_profiles.statement_id, 
          query_profiles.query,
          resource_acquisitions.open_file_handle_count,
          resource_acquisitions.thread_count,
          resource_acquisitions.memory_inuse_kb,
          resource_acquisitions.duration_ms
   FROM query_profiles, resource_acquisitions 
   WHERE (
         resource_acquisitions.transaction_id = query_profiles.transaction_id AND
         resource_acquisitions.statement_id = query_profiles.statement_id
         ) 
         AND 
         (
         query_profiles.transaction_id =  49539595901078494 AND
         query_profiles.statement_id = 1
         );
SELECT transaction_id, statement_id, io_type, projection_name FROM projection_usage ORDER BY query_start_timestamp DESC;






Auto commit flag is on or off



<div dir="ltr" style="text-align: left;" trbidi="on">
<span style="font-family: &quot;arial&quot; , &quot;helvetica&quot; , sans-serif;">

</span><br />
<br />
<pre style="background-color: #eeeeee; border: 1px dashed #999999; color: black; font-family: &quot;andale mono&quot; , &quot;lucida console&quot; , &quot;monaco&quot; , &quot;fixed&quot; , monospace; font-size: 12px; line-height: 14px; overflow: auto; padding: 5px; width: 100%;"><code style="color: black; word-wrap: normal;">



// scalastyle:off println
package org.apache.spark.examples.streaming

import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}

/**
 * Counts words in new text files created in the given directory
 * Usage: HdfsWordCount <directory>
 *   <directory> is the directory that Spark Streaming will use to find and read new text files.
 *
 * To run this on your local machine on directory `localdir`, run this example
 *    $ bin/run-example \
 *       org.apache.spark.examples.streaming.HdfsWordCount localdir
 *
 * Then create a text file in `localdir` and the words in the file will get counted.
 */
object HdfsWordCount {
  def main(args: Array[String]) {
    if (args.length < 1) {
      System.err.println("Usage: HdfsWordCount <directory>")
      System.exit(1)
    }

    StreamingExamples.setStreamingLogLevels()
    val sparkConf = new SparkConf().setAppName("HdfsWordCount")
    // Create the context
    val ssc = new StreamingContext(sparkConf, Seconds(2))

    // Create the FileInputDStream on the directory and use the
    // stream to count words in new files created
    val lines = ssc.textFileStream(args(0))
    val words = lines.flatMap(_.split(" "))
    val wordCounts = words.map(x => (x, 1)).reduceByKey(_ + _)
    wordCounts.print()
    ssc.start()
    ssc.awaitTermination()
  }
}
// scalastyle:on println

</code> </pre>
<br /></div>

Export table script

To extract table script in vertica , use the following .
EXPORT_TABLES ( [ 'destination' ] , [ 'scope' ] ) Example:

 
dbadmin=> select EXPORT_TABLES ( '' , 'public.sales' );
 
dbadmin=> select EXPORT_TABLES ( '/home/dbadmin/scripts/sales.txt' , 'public.sales' );
 
 
default is '' means standard output , it will print on screen.