Monday 6 November 2017

Python - Scrap data from tabular sheet and insert into sqlite3 database.

#Scrap data from tabular sheet and insert into sqlite3 database.
#listing the inserted records to console.

from bs4 import BeautifulSoup
import sys
import requests
from datetime import date
import sqlite3

now = date.today()

startdt = sys.argv[1]
enddt = sys.argv[2]

url = 'coinmarketcap.com/currencies/bitcoin/historical-data/?start='+startdt+'&end='+enddt+''

r  = requests.get("https://" +url)
print(url)
data = r.text
soup = BeautifulSoup(data,"lxml")
table = soup.find("table", { "class" : "table" })
currencytype = soup.find("select",{"class" : "pointer"})
row=table.findAll('tr')
row1 = table.findAll('li')

records = []
count = 0

def writedb():            
    conn = sqlite3.connect('bitcoin.db')
    cursor = conn.cursor()
    #Drop table if already exists in the database.
    cursor.execute('DROP TABLE IF EXISTS bitcoin')
    #create table
    cursor.execute('''CREATE TABLE bitcoin
                 (Date text, Open text, High text, Low text, Close text, Volume text,MarketCap text)''')
    conn.commit()

    #reading tabular data
    for row in table.findAll("tr")[1:]:
        cells = row.findAll("td")
        Date = cells[0].find(text=True)
        Open = cells[1].find(text=True)
        High = cells[2].find(text=True)
        Low = cells[3].find(text=True)
        Close = cells[4].find(text=True)
        Volume = cells[5].find(text=True)
        MarketCap = cells[6].find(text=True)
        record = (Date,Open,High,Low,Close,Volume,MarketCap)
        #insert data to table
        cursor.execute('INSERT INTO bitcoin VALUES (?,?,?,?,?,?,?)', record )
   
    #listing data from tables.
    cursor.execute("SELECT * FROM bitcoin") 
    result = cursor.fetchall() 
    
    for r in result:
        print(r)
                
    print("No of Records:%s"%len(result))    

#calling function
writedb();


#executing the script in command prompt
python e:\learning\python\beautifulsoup_sqlite.py 20160101 20171106

#output

Sunday 5 November 2017

SQL Server - Extract specific value from string.

Extract specific values from given string.

For example, i have some data as follows in LAB table.

Value
LDL=86
HDL-48  LDL-108
CHO=235  LDL=135  TRI=237
HDL=45  LDL=134  
HDL=82  LDL=99 N
CHO=259  LDL=157  TRI=282
LDL:  123
See scanned report.  CHOLEST: 211  LDL: 211  TRIG: 208  HDL DIR: 56  VLDL: 42

Want to extract only LDL values from above and show it as follows.

LDL=86 86
HDL-48  LDL-108 108
CHO=235  LDL=135  TRI=237 135
HDL=45  LDL=134   134
HDL=82  LDL=99 N 99
CHO=259  LDL=157  TRI=282 157
LDL:  123 123
See scanned report.  CHOLEST: 211  LDL: 211  TRIG: 208  HDL DIR: 56  VLDL: 42 211

Written sample T-SQL code to extract required data. 

Script:

CREATE FUNCTION [dbo].[parseint]
(@string VARCHAR(256))
RETURNS VARCHAR(256)
AS
BEGIN
declare @p_string varchar(256)
set @p_string = substring(@string,CHARINDEX('LDL',replace(upper(@string),'VLDL','VXDL')),len(@string))
DECLARE @isalpha INT
SET @isalpha = PATINDEX('%[^0-9]%', @p_string)
BEGIN
WHILE @isalpha > 0
BEGIN
SET @p_string = STUFF(@p_string, @isalpha, 1, '' )
SET @isalpha = PATINDEX('%[^0-9]%', @p_string )
if @isalpha<>1 
break
END
END
RETURN coalesce(substring(@p_string,1,(case when @isalpha=0 then 3 else (@isalpha-1) end)),0)
END

Calling above function:

SELECT value,
                dbo.parseint(value) as LDL 
FROM dbo.lab

Output:



Saturday 4 November 2017

Python - using BeautifulSoup4 Scrapping Tabular data, export data to pipe separated text file

Requirement:
Need to scrap the data of crypto currency details for given date ranges. Run time we are passing dates as parameters to the python script. Data is available in Tabular Format.



Code to execute:

# parameters in yyyymmdd format
>>>python beautifulsoup.py 20160101 20171104

Sample Output:


----------------------------------------------------------------------------------------------------------------

Code:

#file name: beautifulsoup.py
from bs4 import BeautifulSoup
import sys
import requests
from datetime import date


now = date.today()

#read run time parameters
startdt = sys.argv[1]
enddt = sys.argv[2]

#Website details
url = 'coinmarketcap.com/currencies/bitcoin/historical-data/?start='+startdt+'&end='+enddt+''

r  = requests.get("https://" +url)
data = r.text
soup = BeautifulSoup(data,"lxml")
table = soup.find("table", { "class" : "table" })
currencytype = soup.find("select",{"class" : "pointer"})
row=table.findAll('tr')
row1 = table.findAll('li')
records = []
count = 0

#code to write data to text file.
def writetext():
    with open('c:\output_%s.txt'%now, 'w') as f:
        f.write("Date".ljust(20,' ')+
                "|Open".ljust(21,' ')+
                "|High".ljust(21,' ')+
                "|Low".ljust(21,' ')+
                "|Close".ljust(21,' ')+
                "|Volume".ljust(21,' ')+
                "|MarketCap".ljust(20,' ')+
                "\n")
        for row in table.findAll("tr")[1:]:
            cells = row.findAll("td")
            Date = cells[0].find(text=True)
            Open = cells[1].find(text=True)
            High = cells[2].find(text=True)
            Low = cells[3].find(text=True)
            Close = cells[4].find(text=True)
            Volume = cells[5].find(text=True)
            MarketCap = cells[6].find(text=True)
            record = (Date,Open,High,Low,Close,Volume,MarketCap)
            f.write("%s|%s|%s|%s|%s|%s|%s \n" % \
                  (Date.ljust(20,' '),
                   Open.ljust(20,' '),
                   High.ljust(20,' '),
                   Low.ljust(20,' '),
                   Close.ljust(20,' '),
                   Volume.ljust(20,' '),
                   MarketCap.ljust(20,' ')
                   )
                  )

#calling function
writetext();



Facebook