2、点击Statistics|linear model and related|linear regression菜单。
4、在结果界面中,_cons为.5205279表示回归截距,说明回归方程具有统计学意义。R-squared和Adj R-squared分别为0.9905和0.9893,说明回归方程拟合效果很好。
5、回归拟合图。点击Statistics|linear model and related|Regression diagnostics|Added-variable plot。
6、在弹出的avplot/avplots中,选择“all variables”,点确定。
/* Lecture 1:How to Import/Export Data*/
cd"//stata application"
sysuse "auto.dta", clear /*导入系统自带的数据,clear代表关闭之前所有的数据,打开新的数据*/
//export file
help export delimited
export delimited using "auto.txt", replace
//export csv file
export delimited make price using "auto.cxv",replace /*导出的数据只会有这两个变量*/
//import excel file
help import excel
export excel using "auto.xlsx" in 11/L /*L代表最后一行*/
export excel using "auto.xlax" if price>=3000,replace /*导出数据*/
help saveold
use "Data_luanma.dta" , clear
help unicode
unicode encoding set gb18030 /*针对乱码编译*/
unicode translate "Data_luanma.dta"
use "Data_luanma.dta" , clear
set excelxlsxlargefile on
import excel " "
ssc install chunky
findit chunky
help chunky//安装新文件
sysuse auto.dta, clear
export delimited "auto.csv" in 1, replace
export delimited "auto.csv" in 2, replace
export delimited "auto.csv" in 3, replace
export delimited "auto.csv" in 4, replace
export delimited "auto.csv" in 5/L, replace
help foreach
//非常重要 需要批量操作多个变量,用循环语句
foreach num of numlist 1/10 3 5 8 9(10) 100{
display `num'
//从numlist(提取出来num并展示 /代表从一到十依次增加一位, ()代表每增加多少单位展示
import delimited "auto1.csv", clear
foreach num of numlist 1/5{
import delimited "auto`num'.csv",clear
//如果是乱码 ?encoding(gb1830)
save "auto`num'.dta",replace
help append//连接起来
use "auto1.dta",clear
append using "auto2.dta" "auto3.dta" "auto4.dta"/*
*/ "auto5.dta"
clear use "auto1.dta",clear
foreach num of numlist 2/5{
append using "auto`num'.dta"
save "auto_new.dta", replace
help erase
foreach num of numlist 1/5{
erase "auto`num'.dta"
help openall
findit openall
openall "auto?",insheet
save "auto_new.dta",replace
//lecture 3
cd "/Users/Victor/stata"
set more off
help format
sysuse auto.dta
format %5s make
edit make price headroom
format %-20s make
format %10.0g price
format %3.2f headroom
help list
list make price headroom in 1/L
sysuse auto.dta, clear
help label
label data "US auto data美国汽车数据"
//修改数据集的标签 修改成双引号中的内容(在右下角的data可以看到改变)
label var price "auto price汽车价格"
//定义或修改变量的标签 黑色是要修改的变量 price后面要有空格(在右上角的price一栏有显示)
label define origin_f 0 "国产" 1 "进口"
//定义新标签(本身有一个origin 不能重新定义)指定一个标签名为origin_f,这个标签的内容是0代表"国产" 1代表"进口"
label values foreign origin_f
//修改 foreign中存储的是0和1 values代表foreign里面的数值
//将origin_f这个刚刚定义好的标签分配给foreign这个变量(可以看到原来的domestic foreign变成了国产 进口)因为标签就代替了原有的0,1,所有的0,1出现的地方都变成了标签的内容
replace foreign = 2 in 1/8
label define origin_m 0 "国产" 1 "进口" 2 "unknown"
label values foreign origin_m
saveold //存储为stat啊其他版本的数据格式
list foreign
list foreign in 70/L
list make if foreign == 0 //逻辑关系符号, == ~= < > =
list make price if make == "AMC Concord" /*
*/ |make == "Merc. Cougar" /*
*/ |make == "Olds Toronado"
list make foreign price if (foreign == 1 & price <=5000)/*
*/ | (foreign ==0 & price>3000)
codebook make price
sysuse auto.dta, clear
export excel using "auto.xlsx", nolabel replace
import excel using "auto.xlsx", clear
help rename
rename A make
rename B price
rename (C D E F G H I J K L)/*
*/ (mpg rep78 headroom trunk weight length /*
*/ turn displacement gear_ratio foreign)
//虽然名字改回来了 但是名字里面的标签没了
rename _all, proper
//首字母大写 其他小写
save auto.dta , replace
foreach v of varlist _all {
label var `v' " `v' "
//批量操作 每个标签都是变量本身 这个循环可以操作所有变量 _all这个宏
sysuse auto.dta, clear
gen price2 = price^2
gen price_mpg = price*mpg if foreign == 1
replace price_mpg = 0 if price_mpg == .
gen logprice = log(price)
//生成price的对数值, be aware of 0,0会被丢掉 要看它是0还是missing要看录入数据的人是否失误
gen lnprice = ln(price)
gen pricecat = 0
replace pricecat = 1 if price >=5000 & price < 10000
replace pricecat = 2 if price >=10000
edit pricecat
label define category5 0 "less than 5k" 1 "between 5k and 10k" 2 "more than 10k"
label values pricecat category5
edit price pricecat
help egen
gen priceavg = mean(price)
egen priceavg = mean(price)
gen price_dev = price-priceavg
edit price_avg = mean(price)
//如何计算 分 domestic和 foreign的price均值呢?
sort foreign
egen price_avg = mean(price) if foreign == 1
//对进口车构造平均价,国产车先不管 是缺失值
egen price_avg2 = mean(price) if foreign == 0
replace price_avg = price_avg2 if price_avg == .
drop price_avg2
by foreign: egen priceavg_by6 = mean(price)
sort foreign
by foreign: egen priceavg_by4 = mean(price)
help egen //extensions to generate
egen priceavg = mean(price) //计算price的均值
bys turn length: egen priceavg2 = count(price)
//只有当两组 turn length 都相同才显示2 其余为1(根据turn length数price的个数)
gen price_dev = price-priceavg // 计算price与priceavg的差
edit price priceavg price_dev
//如何计算 domestic 和foreign 的price的均值呢
sort foreign /*按这个0 1 变量的大小排序 从小到大*/
by foreign : egen priceavg_by = mean(price) /*by根据某个变量分类*/
bys foreign : egen priceavg_by1 = mean(price) /*分组计算之前先sort排序一下*/
bysort foreign : egen pricemed = median (foreign) //产生常数变量pricemed赋值为foreign的中位数
// std(ec) 表示对教育标准化
// 生成一个新变量highec,如果ec大于12则highec包含这个变量的数值,否则为缺失值 egen highec=anyvalue(ec), v(13/18)
edit price foreign priceavg priceavg_by
help tostring
help destring
sysuse auto.dta, clear
edit mpg
tostring mpg, gen(mpg_str)
tostring mpg, replace force/*强制替代*/
destring mpg_str ,replace
encode //为已经存在的字符串变量添加一个去了标签的数值变量
/*stata中在将纯字符型变量(如A,B,A1)生成新的数值型变量(只能生成,不能转换)用encode: encode 字符变量,gen(新的数值变量)
sysuseauto.dta, clear
destring make, gen(make_str)
edit make make_str
encode make, gen(make_num)
sysuse auto.dta, clear
//one way is combination between replace and gen
gen mmy_high = 0
replace mmy_high = 1 if price>10000
//another way is to use gen newvar=(varname>#)
gen indicator_hi =(price>10000)
edit price mmy_high indicator_hi
edit mmy_high indicator_hi if mmy_high~=indicator_hi
sum mmy_high indicator_hi
help recode
help autocode
egen price_pc25 = pctile(price),p(25)
egen price_pc50 = pctile(price),p(50)
egen price_pc75 = pctile(price),p(75)
//one way is to use replace and generate
gen price_4cat = 0
replace price_4cat = 1 if price >=price_pc25 & price <price_pc50
replace price_4cat = 2 if price >=price_pc50 & price <price_pc75
replace price_4cat = 3 if price >=price_pc75
//数据合并 append(纵) merge(横) joinby
//数据横向合并 指将两个数据文件的变量加总在一起。合并后数据的样本不变,但是变量的数目增加了,也就是数据文件变宽了
//数据纵向合并 两个数据的变量相同 只是变长了 加多了观测值
sysuse auto.dta, clear
keep if foreign == 0
save auto_domestic.dta, replace
sysuse auto.dta, clear
keep if foreign == 1
append using auto_domestic.dta
sysuse auto.dta, clear
gen id = _n
keep make id mpg weight length
save auto_tech.dta, replace
sysuse auto.dta, clear
gen id = _n
drop make mpg weight length
merge m:1 id using "auto_tech.dta"///多对一 本地:外地
//lecture 5
//字符串变量 红色便是字符串变量
edit newid year so2
//以下两个是对于数值变量排序 展示的顺序是这样的 ID year so2
sort newid year
//从小到大排序 先ID 后year
gsort newid -year
//ID不变 year不按照传统的从小到大 可以按照从大到小排序(前面加一个负号便是)
edit newid year facilityname_origin
//字典序 数字优先于字母 标点符号优先于数字
gsort -facilityname_origin
order so2 co newid year
order newid, before(co)
//把某一个从后面放到某个变量的前面,插个队 方便观察 屏幕只有这么宽
//string variables
help string
edit newid facilityname_origin year
sort newid facilityname_origin year
//整理字符串变量 去掉多余的标点 空格 大小写统一整理 同一个企业赋予同一个名称 这样才有id
gen facility_name = facilityname_origin
edit facility_name facilityname_origin
format %30s facility_name facilityname_origin
//30个字符串的长度 刚好可以充满
//考试:数据给你 清洗 合并 统计分析 作图 design model 结果输出 写计量回归模型 解释为什么这样
replace facility_name = lower(facility_name)
//统一变量的小写化 lower upper proper
replace facility_name = proper(facility_name)
//去掉多余的空格:trim(只去掉左右 首位 不能去掉中间的) itrim(只修中间)ltrim(左) rtrim(右)
replace facility_name = itrim(facility_name)
//trim 多余的代表多余一个的
help subinstr //无论是单词还是单词的一部分 都替代 而subinword 只能替代单词
replace facility_name = subinstr(facility_name,","," ",.)
//无论出现多少次 全都替换掉 用空格代替逗号
replace facility_name = subinstr(facility_name,"."," ",.)
replace facility_name = subinstr(facility_name,"/"," ",.)
replace facility_name = subinstr(facility_name,"#"," ",.)
replace facility_name = subinstr(facility_name,"-"," ",.)
help subinword
//llc corporated inc corp company co
replace facility_name = subinword(facility_name,"Co"," ",.)
replace facility_name = subinword(facility_name,"Llc"," ",.)
replace facility_name = subinword(facility_name,"Inc"," ",.)
replace facility_name = subinword(facility_name,"Corp"," ",.)
replace facility_name = subinword(facility_name,"Company"," ",.)
replace facility_name = subinword(facility_name,"Corporation"," ",.)
replace facility_name = subinword(facility_name,"Co"," ",.)
replace facility_name = subinstr(facility_name,"U S","u s",.)
gen flag2 = 1 if regexm(facility_name, "u s") == 1
//搜寻,如果这行观测值有u s,那么把它找出来 对成功的进行进一步处理(==1代表语句成立)
gen flag3 = 1 if regexm(facility_name, "^u s") == 1
replace facility_name = trim(facility_name)
replace facility_name = itrim(facility_name)
help string function
//continue to clean facility_name and atreet name
help split facility_name
split facility_name
gen fac_name = facility_name1 + " " + facility_name2
edit zipcode
split zipcode