2020-10-28
結構請參考 :
https://www.yelp.com/dataset/documentation/main
還沒改完這邊沒有外鍵 …..
SET NAMES utf8;
SET time_zone = '+00:00';
SET foreign_key_checks = 0;
SET sql_mode = 'NO_AUTO_VALUE_ON_ZERO';
SET NAMES utf8mb4;
CREATE TABLE `business` (
`business_id` varchar(256) DEFAULT NULL,
`name` varchar(256) DEFAULT NULL,
`address` varchar(256) DEFAULT NULL,
`city` varchar(256) DEFAULT NULL,
`state` varchar(256) DEFAULT NULL,
`postal_code` varchar(11) DEFAULT NULL,
`latitude` float DEFAULT NULL,
`longitude` float DEFAULT NULL,
`stars` float DEFAULT NULL,
`review_count` int(11) DEFAULT NULL,
`is_open` int(11) DEFAULT NULL,
`attributes` longtext DEFAULT NULL,
`categories` longtext DEFAULT NULL,
`hours` longtext NOT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
CREATE TABLE `checkin` (
`business_id` varchar(256) DEFAULT NULL,
`date` longtext DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
CREATE TABLE `photo` (
`photo_id` varchar(256) DEFAULT NULL,
`business_id` varchar(256) DEFAULT NULL,
`caption` longtext DEFAULT NULL,
`label` varchar(256) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
CREATE TABLE `review` (
`review_id` varchar(256) DEFAULT NULL,
`user_id` varchar(256) DEFAULT NULL,
`business_id` varchar(256) DEFAULT NULL,
`stars` float DEFAULT NULL,
`useful` int(11) DEFAULT NULL,
`funny` int(11) DEFAULT NULL,
`cool` int(11) DEFAULT NULL,
`text` longtext DEFAULT NULL,
`date` datetime DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
CREATE TABLE `tip` (
`user_id` varchar(256) DEFAULT NULL,
`business_id` varchar(256) DEFAULT NULL,
`text` varchar(1000) DEFAULT NULL,
`date` datetime DEFAULT NULL,
`compliment_count` int(11) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
CREATE TABLE `user` (
`user_id` varchar(256) DEFAULT NULL,
`name` varchar(256) DEFAULT NULL,
`review_count` int(11) DEFAULT NULL,
`yelping_since` varchar(256) DEFAULT NULL,
`useful` int(11) DEFAULT NULL,
`funny` int(11) DEFAULT NULL,
`cool` int(11) DEFAULT NULL,
`elite` varchar(256) DEFAULT NULL,
`friends` longtext DEFAULT NULL,
`fans`int(11) DEFAULT NULL,
`average_stars` float DEFAULT NULL,
`compliment_hot` int(11) DEFAULT NULL,
`compliment_more` int(11) DEFAULT NULL,
`compliment_profile` int(11) DEFAULT NULL,
`compliment_cute` int(11) DEFAULT NULL,
`compliment_list` int(11) DEFAULT NULL,
`compliment_note` int(11) DEFAULT NULL,
`compliment_plain` int(11) DEFAULT NULL,
`compliment_cool` int(11) DEFAULT NULL,
`compliment_funny` int(11) DEFAULT NULL,
`compliment_writer` int(11) DEFAULT NULL,
`compliment_photos` int(11) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
下載 json 後匯入
business.py
import json
import pymysql
def prem(db):
cursor = db.cursor()
cursor.execute("SELECT VERSION()")
data = cursor.fetchone()
print("Database version : %s " % data) # 結果表明已經連線成功
def reviewdata_insert(db):
with open('yelp_academic_dataset_business.json', encoding='utf-8') as f:
i = 0
while True:
i += 1
print(u'正在載入第%s行......' % i)
try:
lines = f.readline() # 使用逐行讀取的方法
review_text = json.loads(lines) # 解析每一行資料
sql = 'INSERT INTO business('
for k,v in review_text.items():
print (k,v)
result = []
result.append((review_text['business_id'],
review_text['name'],review_text['address'],
review_text['city'], review_text['state'],
review_text['postal_code'], review_text['latitude'],
review_text['longitude'], review_text['stars'],
review_text['review_count'], review_text['is_open'],
str(review_text['attributes']),str(review_text['categories']),
str(review_text['hours'])
))
print(result)
sql = 'INSERT INTO business(business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,attributes,categories,hours)'
sql = sql + 'VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'
cursor = db.cursor()
cursor.executemany(sql, result)
db.commit()
except Exception as e:
db.rollback()
print(str(e))
break
db = pymysql.connect(unix_socket="/opt/lampp/var/mysql/mysql.sock", user="root", passwd="root", db="yelp", charset='utf8')
cursor = db.cursor()
prem(db)
reviewdata_insert(db)
cursor.close()
checkin.py
import json
import pymysql
def prem(db):
cursor = db.cursor()
cursor.execute("SELECT VERSION()")
data = cursor.fetchone()
print("Database version : %s " % data) # 結果表明已經連線成功
def reviewdata_insert(db):
with open('yelp_academic_dataset_checkin.json', encoding='utf-8') as f:
i = 0
while True:
i += 1
print(u'正在載入第%s行......' % i)
try:
lines = f.readline() # 使用逐行讀取的方法
review_text = json.loads(lines) # 解析每一行資料
result = []
result.append((review_text['business_id'], str(review_text['date'])[0:500]))
print(result)
sql = 'INSERT INTO checkin(business_id, date) VALUES (%s, %s)'
cursor = db.cursor()
cursor.executemany(sql, result)
db.commit()
except Exception as e:
db.rollback()
print(str(e))
break
db = pymysql.connect(unix_socket="/opt/lampp/var/mysql/mysql.sock", user="root", passwd="root", db="yelp", charset='utf8')
cursor = db.cursor()
prem(db)
reviewdata_insert(db)
cursor.close()
photos.py
import json
import pymysql
def prem(db):
cursor = db.cursor()
cursor.execute("SELECT VERSION()")
data = cursor.fetchone()
print("Database version : %s " % data) # 結果表明已經連線成功
def reviewdata_insert(db):
with open('photos.json', encoding='utf-8') as f:
i = 0
while True:
i += 1
print(u'正在載入第%s行......' % i)
try:
lines = f.readline() # 使用逐行讀取的方法
review_text = json.loads(lines) # 解析每一行資料
result = []
result.append((review_text['photo_id'], review_text['business_id'],str(review_text['caption']), str(review_text['label'])))
print(result)
sql = 'INSERT INTO photo(photo_id, business_id,caption,label) VALUES (%s, %s,%s, %s)'
cursor = db.cursor()
cursor.executemany(sql, result)
db.commit()
except Exception as e:
db.rollback()
print(str(e))
break
db = pymysql.connect(unix_socket="/opt/lampp/var/mysql/mysql.sock", user="root", passwd="root", db="yelp", charset='utf8')
cursor = db.cursor()
prem(db)
reviewdata_insert(db)
cursor.close()
review.py
import json
import pymysql
def prem(db):
cursor = db.cursor()
cursor.execute("SELECT VERSION()")
data = cursor.fetchone()
print("Database version : %s " % data) # 結果表明已經連線成功
def reviewdata_insert(db):
with open('yelp_academic_dataset_review.json', encoding='utf-8') as f:
i = 0
while True:
i += 1
print(u'正在載入第%s行......' % i)
try:
lines = f.readline() # 使用逐行讀取的方法
review_text = json.loads(lines) # 解析每一行資料
result = []
result.append((review_text['review_id'], review_text['user_id'],review_text['business_id'],review_text['stars'], review_text['useful'],review_text['funny'],review_text['cool'],review_text['text'],review_text['date']))
print(result)
sql = 'INSERT INTO review(review_id, user_id, business_id, stars, useful,funny,cool,text,date) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)'
cursor = db.cursor()
cursor.executemany(sql, result)
db.commit()
except Exception as e:
db.rollback()
print(str(e))
break
db = pymysql.connect(unix_socket="/opt/lampp/var/mysql/mysql.sock", user="root", passwd="root", db="yelp", charset='utf8')
cursor = db.cursor()
prem(db)
reviewdata_insert(db)
cursor.close()
tip.py
import json
import pymysql
def prem(db):
cursor = db.cursor()
cursor.execute("SELECT VERSION()")
data = cursor.fetchone()
print("Database version : %s " % data) # 結果表明已經連線成功
def reviewdata_insert(db):
with open('yelp_academic_dataset_tip.json', encoding='utf-8') as f:
i = 0
while True:
i += 1
print(u'正在載入第%s行......' % i)
try:
lines = f.readline() # 使用逐行讀取的方法
review_text = json.loads(lines) # 解析每一行資料
result = []
result.append((review_text['text'], review_text['date'],review_text['compliment_count'],review_text['business_id'], review_text['user_id']))
print(result)
sql = 'INSERT INTO tip(text, date, compliment_count, business_id, user_id) VALUES (%s, %s, %s, %s, %s)'
cursor = db.cursor()
cursor.executemany(sql, result)
db.commit()
except Exception as e:
db.rollback()
print(str(e))
break
db = pymysql.connect(unix_socket="/opt/lampp/var/mysql/mysql.sock", user="root", passwd="root", db="yelp", charset='utf8')
cursor = db.cursor()
prem(db)
reviewdata_insert(db)
cursor.close()
user.py
import json
import pymysql
def prem(db):
cursor = db.cursor()
cursor.execute("SELECT VERSION()")
data = cursor.fetchone()
print("Database version : %s " % data) # 結果表明已經連線成功
def reviewdata_insert(db):
with open('yelp_academic_dataset_user.json', encoding='utf-8') as f:
i = 0
while True:
i += 1
print(u'正在載入第%s行......' % i)
try:
lines = f.readline() # 使用逐行讀取的方法
review_text = json.loads(lines) # 解析每一行資料
result = []
result.append((review_text['user_id'],
review_text['name'],review_text['review_count'],
review_text['yelping_since'], review_text['useful'],
review_text['funny'], review_text['cool'],
review_text['elite'],str(review_text['friends']),
review_text['fans'], review_text['average_stars'],
review_text['compliment_hot'], review_text['compliment_more'],
review_text['compliment_profile'], review_text['compliment_cute'],
review_text['compliment_list'], review_text['compliment_note'],
review_text['compliment_plain'], review_text['compliment_cool'],
review_text['compliment_funny'], review_text['compliment_writer'],
review_text['compliment_photos'],
))
print(result)
sql = 'INSERT INTO user(user_id,name,review_count,yelping_since,useful,funny,cool,elite,friends,fans,average_stars,compliment_hot,compliment_more,compliment_profile,compliment_cute,compliment_list,compliment_note,compliment_plain,compliment_cool,compliment_funny,compliment_writer,compliment_photos) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'
cursor = db.cursor()
cursor.executemany(sql, result)
db.commit()
except Exception as e:
db.rollback()
print(str(e))
break
db = pymysql.connect(unix_socket="/opt/lampp/var/mysql/mysql.sock", user="root", passwd="root", db="yelp", charset='utf8')
cursor = db.cursor()
prem(db)
reviewdata_insert(db)
cursor.close()