方案实现
库表设计
在mysql上创建采集表
CREATE TABLE `test`.`goods_data` (
`mid` int(11) NOT NULL AUTO_INCREMENT,
`collector` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
`coll_time` datetime DEFAULT NULL,
`title` varchar(200) COLLATE utf8_unicode_ci DEFAULT NULL,
`raw_title` varchar(200) COLLATE utf8_unicode_ci DEFAULT NULL,
`pic_url` varchar(145) COLLATE utf8_unicode_ci DEFAULT NULL,
`detail_url` varchar(145) COLLATE utf8_unicode_ci DEFAULT NULL,
`view_price` decimal(10,2) DEFAULT 0.0,
`view_fee` decimal(10,2) DEFAULT 0.0,
`item_loc` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
`view_sales` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
`comment_count` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
PRIMARY KEY (`mid`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;
`mid` int(11) NOT NULL AUTO_INCREMENT,
`collector` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
`coll_time` datetime DEFAULT NULL,
`title` varchar(200) COLLATE utf8_unicode_ci DEFAULT NULL,
`raw_title` varchar(200) COLLATE utf8_unicode_ci DEFAULT NULL,
`pic_url` varchar(145) COLLATE utf8_unicode_ci DEFAULT NULL,
`detail_url` varchar(145) COLLATE utf8_unicode_ci DEFAULT NULL,
`view_price` decimal(10,2) DEFAULT 0.0,
`view_fee` decimal(10,2) DEFAULT 0.0,
`item_loc` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
`view_sales` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
`comment_count` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
PRIMARY KEY (`mid`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;
在mysql上创建收集表
CREATE TABLE `wc_goods_name` (
`mid` int(11) NOT NULL AUTO_INCREMENT,
`collector` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
`coll_time` datetime DEFAULT NULL,
`word` varchar(500) COLLATE utf8_unicode_ci DEFAULT NULL,
`acount` int(11) DEFAULT '0',
PRIMARY KEY (`mid`)
) ENGINE=InnoDB AUTO_INCREMENT=1767 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;
`mid` int(11) NOT NULL AUTO_INCREMENT,
`collector` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
`coll_time` datetime DEFAULT NULL,
`word` varchar(500) COLLATE utf8_unicode_ci DEFAULT NULL,
`acount` int(11) DEFAULT '0',
PRIMARY KEY (`mid`)
) ENGINE=InnoDB AUTO_INCREMENT=1767 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;
数据存储
mysql采集到hive
sqoop import --connect "jdbc:mysql://home.hddly.cn:53306/test?useSSL=false&useUnicode=true&characterEncoding=utf-8" --username test --password test --table goods_data --where "collector='张三'" --delete-target-dir --target-dir /user/myname/sqoop_hive_goods --hive-database myname --hive-import --hive-overwrite --hive-table goods_data --hive-drop-import-delims --m 1