Extreme Thinking
Yelp 作業

2020-10-28


結構請參考 :

https://www.yelp.com/dataset/documentation/main

還沒改完這邊沒有外鍵 …..

SET NAMES utf8;
SET time_zone = '+00:00';
SET foreign_key_checks = 0;
SET sql_mode = 'NO_AUTO_VALUE_ON_ZERO';

SET NAMES utf8mb4;

CREATE TABLE `business` (
  `business_id` varchar(256) DEFAULT NULL,
  `name` varchar(256) DEFAULT NULL,
  `address` varchar(256) DEFAULT NULL,
  `city` varchar(256) DEFAULT NULL,
  `state` varchar(256) DEFAULT NULL,
  `postal_code` varchar(11) DEFAULT NULL,
  `latitude` float DEFAULT NULL,
  `longitude` float DEFAULT NULL,
  `stars` float DEFAULT NULL,
  `review_count` int(11) DEFAULT NULL,
  `is_open` int(11) DEFAULT NULL,
  `attributes` longtext DEFAULT NULL,
  `categories` longtext DEFAULT NULL,
  `hours` longtext NOT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;


CREATE TABLE `checkin` (
  `business_id` varchar(256) DEFAULT NULL,
  `date` longtext DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;


CREATE TABLE `photo` (
  `photo_id` varchar(256) DEFAULT NULL,
  `business_id` varchar(256) DEFAULT NULL,
  `caption` longtext DEFAULT NULL,
  `label` varchar(256) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;


CREATE TABLE `review` (
  `review_id` varchar(256) DEFAULT NULL,
  `user_id` varchar(256) DEFAULT NULL,
  `business_id` varchar(256) DEFAULT NULL,
  `stars` float DEFAULT NULL,
  `useful` int(11) DEFAULT NULL,
  `funny` int(11) DEFAULT NULL,
  `cool` int(11) DEFAULT NULL,
  `text` longtext DEFAULT NULL,
  `date` datetime DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;


CREATE TABLE `tip` (
  `user_id` varchar(256) DEFAULT NULL,
  `business_id` varchar(256) DEFAULT NULL,
  `text` varchar(1000) DEFAULT NULL,
  `date` datetime DEFAULT NULL,
  `compliment_count` int(11) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;


CREATE TABLE `user` (
  `user_id` varchar(256) DEFAULT NULL,
  `name` varchar(256) DEFAULT NULL,
  `review_count` int(11) DEFAULT NULL,
  `yelping_since` varchar(256) DEFAULT NULL,
  `useful` int(11) DEFAULT NULL,
  `funny` int(11) DEFAULT NULL,
  `cool` int(11) DEFAULT NULL,
  `elite` varchar(256) DEFAULT NULL,
  `friends` longtext DEFAULT NULL,
  `fans`int(11) DEFAULT NULL,
  `average_stars` float DEFAULT NULL,
  `compliment_hot` int(11) DEFAULT NULL,
  `compliment_more` int(11) DEFAULT NULL,
  `compliment_profile` int(11) DEFAULT NULL,
  `compliment_cute` int(11) DEFAULT NULL,
  `compliment_list` int(11) DEFAULT NULL,
  `compliment_note` int(11) DEFAULT NULL,
  `compliment_plain` int(11) DEFAULT NULL,
  `compliment_cool` int(11) DEFAULT NULL,
  `compliment_funny` int(11) DEFAULT NULL,
  `compliment_writer` int(11) DEFAULT NULL,
  `compliment_photos` int(11) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;

下載 json 後匯入

business.py

import json
import pymysql
def prem(db):
    cursor = db.cursor()
    cursor.execute("SELECT VERSION()")
    data = cursor.fetchone()
    print("Database version : %s " % data) # 結果表明已經連線成功
def reviewdata_insert(db):
    with open('yelp_academic_dataset_business.json', encoding='utf-8') as f:
        i = 0
        while True:
            i += 1
            print(u'正在載入第%s行......' % i)
            try:
                lines = f.readline() # 使用逐行讀取的方法
                review_text = json.loads(lines) # 解析每一行資料
                sql = 'INSERT INTO business('
                for k,v in review_text.items():
                    print (k,v)

                result = []
                result.append((review_text['business_id'], 
                review_text['name'],review_text['address'],
                review_text['city'], review_text['state'],
                review_text['postal_code'], review_text['latitude'],
                review_text['longitude'], review_text['stars'],
                review_text['review_count'], review_text['is_open'],
                str(review_text['attributes']),str(review_text['categories']), 
                str(review_text['hours'])
                ))
                print(result)
                sql = 'INSERT INTO business(business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,attributes,categories,hours)'
                sql = sql + 'VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'
                cursor = db.cursor()
                cursor.executemany(sql, result)
                db.commit()
            except Exception as e:
                db.rollback()
                print(str(e))
                break

db = pymysql.connect(unix_socket="/opt/lampp/var/mysql/mysql.sock", user="root", passwd="root", db="yelp", charset='utf8')
cursor = db.cursor()
prem(db)
reviewdata_insert(db)
cursor.close()

checkin.py

import json
import pymysql
def prem(db):
    cursor = db.cursor()
    cursor.execute("SELECT VERSION()")
    data = cursor.fetchone()
    print("Database version : %s " % data) # 結果表明已經連線成功
def reviewdata_insert(db):
    with open('yelp_academic_dataset_checkin.json', encoding='utf-8') as f:
        i = 0
        while True:
            i += 1
            print(u'正在載入第%s行......' % i)
            try:
                lines = f.readline() # 使用逐行讀取的方法
                review_text = json.loads(lines) # 解析每一行資料
                result = []
                result.append((review_text['business_id'], str(review_text['date'])[0:500]))
                print(result)
                sql = 'INSERT INTO checkin(business_id, date) VALUES (%s, %s)'
                cursor = db.cursor()
                cursor.executemany(sql, result)
                db.commit()
            except Exception as e:
                db.rollback()
                print(str(e))
                break

db = pymysql.connect(unix_socket="/opt/lampp/var/mysql/mysql.sock", user="root", passwd="root", db="yelp", charset='utf8')
cursor = db.cursor()
prem(db)
reviewdata_insert(db)
cursor.close()

photos.py

import json
import pymysql
def prem(db):
    cursor = db.cursor()
    cursor.execute("SELECT VERSION()")
    data = cursor.fetchone()
    print("Database version : %s " % data) # 結果表明已經連線成功
def reviewdata_insert(db):
    with open('photos.json', encoding='utf-8') as f:
        i = 0
        while True:
            i += 1
            print(u'正在載入第%s行......' % i)
            try:
                lines = f.readline() # 使用逐行讀取的方法
                review_text = json.loads(lines) # 解析每一行資料
                result = []
                result.append((review_text['photo_id'], review_text['business_id'],str(review_text['caption']), str(review_text['label'])))
                print(result)
                sql = 'INSERT INTO photo(photo_id, business_id,caption,label) VALUES (%s, %s,%s, %s)'
                cursor = db.cursor()
                cursor.executemany(sql, result)
                db.commit()
            except Exception as e:
                db.rollback()
                print(str(e))
                break

db = pymysql.connect(unix_socket="/opt/lampp/var/mysql/mysql.sock", user="root", passwd="root", db="yelp", charset='utf8')
cursor = db.cursor()
prem(db)
reviewdata_insert(db)
cursor.close()

review.py

import json
import pymysql
def prem(db):
    cursor = db.cursor()
    cursor.execute("SELECT VERSION()")
    data = cursor.fetchone()
    print("Database version : %s " % data) # 結果表明已經連線成功
def reviewdata_insert(db):
    with open('yelp_academic_dataset_review.json', encoding='utf-8') as f:
        i = 0
        while True:
            i += 1
            print(u'正在載入第%s行......' % i)
            try:
                lines = f.readline() # 使用逐行讀取的方法
                review_text = json.loads(lines) # 解析每一行資料
                result = []
                result.append((review_text['review_id'], review_text['user_id'],review_text['business_id'],review_text['stars'], review_text['useful'],review_text['funny'],review_text['cool'],review_text['text'],review_text['date']))
                print(result)
                sql = 'INSERT INTO review(review_id, user_id, business_id, stars, useful,funny,cool,text,date) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)'
                cursor = db.cursor()
                cursor.executemany(sql, result)
                db.commit()
            except Exception as e:
                db.rollback()
                print(str(e))
                break

db = pymysql.connect(unix_socket="/opt/lampp/var/mysql/mysql.sock", user="root", passwd="root", db="yelp", charset='utf8')
cursor = db.cursor()
prem(db)
reviewdata_insert(db)
cursor.close()

tip.py

import json
import pymysql
def prem(db):
    cursor = db.cursor()
    cursor.execute("SELECT VERSION()")
    data = cursor.fetchone()
    print("Database version : %s " % data) # 結果表明已經連線成功
def reviewdata_insert(db):
    with open('yelp_academic_dataset_tip.json', encoding='utf-8') as f:
        i = 0
        while True:
            i += 1
            print(u'正在載入第%s行......' % i)
            try:
                lines = f.readline() # 使用逐行讀取的方法
                review_text = json.loads(lines) # 解析每一行資料
                result = []
                result.append((review_text['text'], review_text['date'],review_text['compliment_count'],review_text['business_id'], review_text['user_id']))
                print(result)
                sql = 'INSERT INTO tip(text, date, compliment_count, business_id, user_id) VALUES (%s, %s, %s, %s, %s)'
                cursor = db.cursor()
                cursor.executemany(sql, result)
                db.commit()
            except Exception as e:
                db.rollback()
                print(str(e))
                break

db = pymysql.connect(unix_socket="/opt/lampp/var/mysql/mysql.sock", user="root", passwd="root", db="yelp", charset='utf8')
cursor = db.cursor()
prem(db)
reviewdata_insert(db)
cursor.close()

user.py

import json
import pymysql
def prem(db):
    cursor = db.cursor()
    cursor.execute("SELECT VERSION()")
    data = cursor.fetchone()
    print("Database version : %s " % data) # 結果表明已經連線成功
def reviewdata_insert(db):
    with open('yelp_academic_dataset_user.json', encoding='utf-8') as f:
        i = 0
        while True:
            i += 1
            print(u'正在載入第%s行......' % i)
            try:
                lines = f.readline() # 使用逐行讀取的方法
                review_text = json.loads(lines) # 解析每一行資料
                result = []
                result.append((review_text['user_id'], 
                review_text['name'],review_text['review_count'],
                review_text['yelping_since'], review_text['useful'],
                review_text['funny'], review_text['cool'],
                review_text['elite'],str(review_text['friends']),
                review_text['fans'], review_text['average_stars'],
                review_text['compliment_hot'], review_text['compliment_more'],
                review_text['compliment_profile'], review_text['compliment_cute'],
                review_text['compliment_list'], review_text['compliment_note'],
                review_text['compliment_plain'], review_text['compliment_cool'],
                review_text['compliment_funny'], review_text['compliment_writer'],
                review_text['compliment_photos'],
                ))
                print(result)
                sql = 'INSERT INTO user(user_id,name,review_count,yelping_since,useful,funny,cool,elite,friends,fans,average_stars,compliment_hot,compliment_more,compliment_profile,compliment_cute,compliment_list,compliment_note,compliment_plain,compliment_cool,compliment_funny,compliment_writer,compliment_photos) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'
                cursor = db.cursor()
                cursor.executemany(sql, result)
                db.commit()
            except Exception as e:
                db.rollback()
                print(str(e))
                break

db = pymysql.connect(unix_socket="/opt/lampp/var/mysql/mysql.sock", user="root", passwd="root", db="yelp", charset='utf8')
cursor = db.cursor()
prem(db)
reviewdata_insert(db)
cursor.close()