发布网友 发布时间:2022-04-24 12:13
共4个回答
热心网友 时间:2023-10-12 11:28
1、生成一个自变量和一个因变量。
2、点击Statistics|linear model and related|linear regression菜单。
3、在弹出的regress中设置相关变量,然后再点确定。
4、在结果界面中,_cons为.5205279表示回归截距,说明回归方程具有统计学意义。R-squared和Adj R-squared分别为0.9905和0.9893,说明回归方程拟合效果很好。
5、回归拟合图。点击Statistics|linear model and related|Regression diagnostics|Added-variable plot。
6、在弹出的avplot/avplots中,选择“all variables”,点确定。
7、结果如下图。
热心网友 时间:2023-10-12 11:28
1、首先我们现在在软件中生成参数和因变量。
2、单击Statistics |线性模型和相关|线性回归菜单。
3、在弹出回归中设置相关变量,然后单击“确定”的功能按钮。
4、在结果界面中,_cons是.5205279,表示回归截距,表明回归方程具有统计显着性。 R平方和Adj R平方分别为0.9905和0.9893,表明回归方程拟合良好。
5、回归拟合图。单击统计|线性模型和相关|回归诊断|添加变量图。
6、在弹出的avplot / avplots中,选择“所有变量”,然后单击“确定”。
7、stata回归就已经完成,结果如下界面。
热心网友 时间:2023-10-12 11:29
回归有很多种呀,你要做哪种回归?热心网友 时间:2023-10-12 11:30
/* Lecture 1:How to Import/Export Data*/
cd"//stata application"
sysuse "auto.dta", clear /*导入系统自带的数据,clear代表关闭之前所有的数据,打开新的数据*/
edit
//export file
help export delimited
export delimited using "auto.txt", replace
//export csv file
export delimited make price using "auto.cxv",replace /*导出的数据只会有这两个变量*/
//import excel file
help import excel
export excel using "auto.xlsx" in 11/L /*L代表最后一行*/
export excel using "auto.xlax" if price>=3000,replace /*导出数据*/
help saveold
use "Data_luanma.dta" , clear
help unicode
clear
unicode encoding set gb18030 /*针对乱码编译*/
unicode translate "Data_luanma.dta"
use "Data_luanma.dta" , clear
set excelxlsxlargefile on
import excel " "
/*内存太小时,将大文件打开并拆分*/
ssc install chunky
findit chunky
help chunky//安装新文件
//lecture2
sysuse auto.dta, clear
//拆分数据(系统自带的文件)
export delimited "auto.csv" in 1, replace
export delimited "auto.csv" in 2, replace
export delimited "auto.csv" in 3, replace
export delimited "auto.csv" in 4, replace
export delimited "auto.csv" in 5/L, replace
//如何快速,批量导出数据
help foreach
//非常重要 需要批量操作多个变量,用循环语句
foreach num of numlist 1/10 3 5 8 9(10) 100{
display `num'
}
//从numlist(提取出来num并展示 /代表从一到十依次增加一位, ()代表每增加多少单位展示
import delimited "auto1.csv", clear
//需要变换的是12345,这就是numlist//
foreach num of numlist 1/5{
import delimited "auto`num'.csv",clear
//如果是乱码 ?encoding(gb1830)
save "auto`num'.dta",replace
}
help append//连接起来
use "auto1.dta",clear
append using "auto2.dta" "auto3.dta" "auto4.dta"/*
*/ "auto5.dta"
clear use "auto1.dta",clear
foreach num of numlist 2/5{
append using "auto`num'.dta"
}
save "auto_new.dta", replace
help erase
foreach num of numlist 1/5{
erase "auto`num'.dta"
}
//清除12345,csv也可用
help openall
findit openall
openall "auto?",insheet
//注意?和*的差别
clear
openall:auto*",insheet
save "auto_new.dta",replace
//删除多余文件
//lecture 3
cd "/Users/Victor/stata"
set more off
help format
sysuse auto.dta
format %5s make
//右对齐(默认)
edit make price headroom
//只显示这三列
format %-20s make
//左对齐(浏览方便)
format %10.0g price
//去掉逗号
format %3.2f headroom
help list
//将data展示在主界面中
list make price headroom in 1/L
sysuse auto.dta, clear
describe
//用于产生一个对数据集的简明总结表格
help label
//变量的标签
label data "US auto data美国汽车数据"
//修改数据集的标签 修改成双引号中的内容(在右下角的data可以看到改变)
label var price "auto price汽车价格"
//定义或修改变量的标签 黑色是要修改的变量 price后面要有空格(在右上角的price一栏有显示)
label define origin_f 0 "国产" 1 "进口"
//定义新标签(本身有一个origin 不能重新定义)指定一个标签名为origin_f,这个标签的内容是0代表"国产" 1代表"进口"
label values foreign origin_f
//修改 foreign中存储的是0和1 values代表foreign里面的数值
//将origin_f这个刚刚定义好的标签分配给foreign这个变量(可以看到原来的domestic foreign变成了国产 进口)因为标签就代替了原有的0,1,所有的0,1出现的地方都变成了标签的内容
describe
replace foreign = 2 in 1/8
label define origin_m 0 "国产" 1 "进口" 2 "unknown"
label values foreign origin_m
//打开数据集
edit
//浏览数据集
browse
//存储数据集
save
saveold //存储为stat啊其他版本的数据格式
//总结表格
describe
//显示
list foreign
list foreign in 70/L
list make if foreign == 0 //逻辑关系符号, == ~= < > =
list make price if make == "AMC Concord" /*
*/ |make == "Merc. Cougar" /*
*/ |make == "Olds Toronado"
//或
list make foreign price if (foreign == 1 & price <=5000)/*
*/ | (foreign ==0 & price>3000)
//且
codebook make price
//详尽表述变量,包括变量名称,标签和赋值
//exercise
sysuse auto.dta, clear
export excel using "auto.xlsx", nolabel replace
//去掉了标签,变量名也没了,才有以下步骤
import excel using "auto.xlsx", clear
describe
help rename
rename A make
rename B price
rename (C D E F G H I J K L)/*
*/ (mpg rep78 headroom trunk weight length /*
*/ turn displacement gear_ratio foreign)
//虽然名字改回来了 但是名字里面的标签没了
rename _all, proper
//首字母大写 其他小写
save auto.dta , replace
foreach v of varlist _all {
label var `v' " `v' "
}
//批量操作 每个标签都是变量本身 这个循环可以操作所有变量 _all这个宏
generate
//建立新变量
replace
//更改已有变量
sysuse auto.dta, clear
gen price2 = price^2
//生成price的平方
gen price_mpg = price*mpg if foreign == 1
//产生交乘项
replace price_mpg = 0 if price_mpg == .
//缺失项
gen logprice = log(price)
//生成price的对数值, be aware of 0,0会被丢掉 要看它是0还是missing要看录入数据的人是否失误
gen lnprice = ln(price)
//生成price的自然对数值,这两个没什么区别
gen pricecat = 0
//根据价格区间定义pricecat
replace pricecat = 1 if price >=5000 & price < 10000
replace pricecat = 2 if price >=10000
edit pricecat
label define category5 0 "less than 5k" 1 "between 5k and 10k" 2 "more than 10k"
label values pricecat category5
edit price pricecat
help egen
gen priceavg = mean(price)
egen priceavg = mean(price)
//计算price均值
gen price_dev = price-priceavg
//计算price和priceavg的差
edit price_avg = mean(price)
//如何计算 分 domestic和 foreign的price均值呢?
sort foreign
egen price_avg = mean(price) if foreign == 1
//对进口车构造平均价,国产车先不管 是缺失值
egen price_avg2 = mean(price) if foreign == 0
replace price_avg = price_avg2 if price_avg == .
//重新赋值
//下面介绍一种简便方法
drop price_avg2
//去掉一列
by foreign: egen priceavg_by6 = mean(price)
//这一行就够了,通过foreign的不同计算平均值
sort foreign
by foreign: egen priceavg_by4 = mean(price)
//合并sort和by
//学姐的:
help egen //extensions to generate
egen priceavg = mean(price) //计算price的均值
bys turn length: egen priceavg2 = count(price)
//只有当两组 turn length 都相同才显示2 其余为1(根据turn length数price的个数)
gen price_dev = price-priceavg // 计算price与priceavg的差
edit price priceavg price_dev
//如何计算 domestic 和foreign 的price的均值呢
sort foreign /*按这个0 1 变量的大小排序 从小到大*/
by foreign : egen priceavg_by = mean(price) /*by根据某个变量分类*/
bys foreign : egen priceavg_by1 = mean(price) /*分组计算之前先sort排序一下*/
bysort foreign : egen pricemed = median (foreign) //产生常数变量pricemed赋值为foreign的中位数
// std(ec) 表示对教育标准化
// 生成一个新变量highec,如果ec大于12则highec包含这个变量的数值,否则为缺失值 egen highec=anyvalue(ec), v(13/18)
edit price foreign priceavg priceavg_by
//完
help tostring
//数值变量转换为字符串变量
help destring
//反过来
sysuse auto.dta, clear
edit mpg
tostring mpg, gen(mpg_str)
tostring mpg, replace force/*强制替代*/
destring mpg_str ,replace
//字符串不能做运算,但是可以做逻辑关系
//数值和字符串的转
encode //为已经存在的字符串变量添加一个去了标签的数值变量
decode//根据一个数值变量和他的标签生成一个字符串变量
/*stata中在将纯字符型变量(如A,B,A1)生成新的数值型变量(只能生成,不能转换)用encode: encode 字符变量,gen(新的数值变量)
这种方法在产生新的数值变量的同时,把原字符变量作为标签添加到新的数值变量上,如果在数据窗口查看数据是蓝色的。
*/
sysuseauto.dta, clear
destring make, gen(make_str)
edit make make_str
encode make, gen(make_num)
//生成分类变量和虚拟变量
sysuse auto.dta, clear
//one way is combination between replace and gen
gen mmy_high = 0
replace mmy_high = 1 if price>10000
//another way is to use gen newvar=(varname>#)
gen indicator_hi =(price>10000)
edit price mmy_high indicator_hi
edit mmy_high indicator_hi if mmy_high~=indicator_hi
//没有就是代码没问题
sum mmy_high indicator_hi
//生成分类变量
help recode
help autocode
//计算price四分位区间,25%,50%,75%
egen price_pc25 = pctile(price),p(25)
egen price_pc50 = pctile(price),p(50)
egen price_pc75 = pctile(price),p(75)
//one way is to use replace and generate
gen price_4cat = 0
replace price_4cat = 1 if price >=price_pc25 & price <price_pc50
replace price_4cat = 2 if price >=price_pc50 & price <price_pc75
replace price_4cat = 3 if price >=price_pc75
//且
//数据合并 append(纵) merge(横) joinby
//数据横向合并 指将两个数据文件的变量加总在一起。合并后数据的样本不变,但是变量的数目增加了,也就是数据文件变宽了
//数据纵向合并 两个数据的变量相同 只是变长了 加多了观测值
//以下是数据纵向合并
sysuse auto.dta, clear
keep if foreign == 0
//删除所有foreign
save auto_domestic.dta, replace
sysuse auto.dta, clear
keep if foreign == 1
append using auto_domestic.dta
//数据中只有进口车,要把存储在本地电脑里的国产车合并起来(上下连接)
//数据横向合并
sysuse auto.dta, clear
gen id = _n
//生成序号变量
keep make id mpg weight length
save auto_tech.dta, replace
sysuse auto.dta, clear
gen id = _n
drop make mpg weight length
merge m:1 id using "auto_tech.dta"///多对一 本地:外地
//根据id进行合并
//lecture 5
//字符串变量 红色便是字符串变量
edit newid year so2
//以下两个是对于数值变量排序 展示的顺序是这样的 ID year so2
sort newid year
//从小到大排序 先ID 后year
gsort newid -year
//ID不变 year不按照传统的从小到大 可以按照从大到小排序(前面加一个负号便是)
//以下对字符串变量排序
edit newid year facilityname_origin
//字典序 数字优先于字母 标点符号优先于数字
gsort -facilityname_origin
//按照字典的倒序
order so2 co newid year
//重新排“列”的顺序
order newid, before(co)
//把某一个从后面放到某个变量的前面,插个队 方便观察 屏幕只有这么宽
//string variables
help string
edit newid facilityname_origin year
sort newid facilityname_origin year
//整理字符串变量 去掉多余的标点 空格 大小写统一整理 同一个企业赋予同一个名称 这样才有id
gen facility_name = facilityname_origin
//生成新变量
edit facility_name facilityname_origin
format %30s facility_name facilityname_origin
//30个字符串的长度 刚好可以充满
//考试:数据给你 清洗 合并 统计分析 作图 design model 结果输出 写计量回归模型 解释为什么这样
replace facility_name = lower(facility_name)
//统一变量的小写化 lower upper proper
replace facility_name = proper(facility_name)
//去掉多余的空格:trim(只去掉左右 首位 不能去掉中间的) itrim(只修中间)ltrim(左) rtrim(右)
replace facility_name = itrim(facility_name)
//trim 多余的代表多余一个的
//不希望出现标点符号:
help subinstr //无论是单词还是单词的一部分 都替代 而subinword 只能替代单词
replace facility_name = subinstr(facility_name,","," ",.)
//无论出现多少次 全都替换掉 用空格代替逗号
replace facility_name = subinstr(facility_name,"."," ",.)
replace facility_name = subinstr(facility_name,"/"," ",.)
replace facility_name = subinstr(facility_name,"#"," ",.)
replace facility_name = subinstr(facility_name,"-"," ",.)
help subinword
//llc corporated inc corp company co
replace facility_name = subinword(facility_name,"Co"," ",.)
replace facility_name = subinword(facility_name,"Llc"," ",.)
//大小写取决于前面的处理
replace facility_name = subinword(facility_name,"Inc"," ",.)
replace facility_name = subinword(facility_name,"Corp"," ",.)
replace facility_name = subinword(facility_name,"Company"," ",.)
replace facility_name = subinword(facility_name,"Corporation"," ",.)
replace facility_name = subinword(facility_name,"Co"," ",.)
replace facility_name = subinstr(facility_name,"U S","u s",.)
gen flag2 = 1 if regexm(facility_name, "u s") == 1
//搜寻,如果这行观测值有u s,那么把它找出来 对成功的进行进一步处理(==1代表语句成立)
gen flag3 = 1 if regexm(facility_name, "^u s") == 1
//搜寻以us开头的
replace facility_name = trim(facility_name)
replace facility_name = itrim(facility_name)
//去掉多余空格
help string function
//continue to clean facility_name and atreet name
help split facility_name
split facility_name
//默认根据空格拆分
gen fac_name = facility_name1 + " " + facility_name2
edit zipcode
split zipcode